altq_rmclass.c revision 130368
1130368Smlaier/* $FreeBSD: head/sys/contrib/altq/altq/altq_rmclass.c 130368 2004-06-12 00:57:20Z mlaier $ */ 2130365Smlaier/* $KAME: altq_rmclass.c,v 1.18 2003/11/06 06:32:53 kjc Exp $ */ 3130365Smlaier 4130365Smlaier/* 5130365Smlaier * Copyright (c) 1991-1997 Regents of the University of California. 6130365Smlaier * All rights reserved. 7130365Smlaier * 8130365Smlaier * Redistribution and use in source and binary forms, with or without 9130365Smlaier * modification, are permitted provided that the following conditions 10130365Smlaier * are met: 11130365Smlaier * 1. Redistributions of source code must retain the above copyright 12130365Smlaier * notice, this list of conditions and the following disclaimer. 13130365Smlaier * 2. Redistributions in binary form must reproduce the above copyright 14130365Smlaier * notice, this list of conditions and the following disclaimer in the 15130365Smlaier * documentation and/or other materials provided with the distribution. 16130365Smlaier * 3. All advertising materials mentioning features or use of this software 17130365Smlaier * must display the following acknowledgement: 18130365Smlaier * This product includes software developed by the Network Research 19130365Smlaier * Group at Lawrence Berkeley Laboratory. 20130365Smlaier * 4. Neither the name of the University nor of the Laboratory may be used 21130365Smlaier * to endorse or promote products derived from this software without 22130365Smlaier * specific prior written permission. 23130365Smlaier * 24130365Smlaier * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25130365Smlaier * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26130365Smlaier * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27130365Smlaier * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28130365Smlaier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29130365Smlaier * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30130365Smlaier * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31130365Smlaier * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32130365Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33130365Smlaier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34130365Smlaier * SUCH DAMAGE. 35130365Smlaier * 36130365Smlaier * LBL code modified by speer@eng.sun.com, May 1977. 37130365Smlaier * For questions and/or comments, please send mail to cbq@ee.lbl.gov 38130365Smlaier */ 39130365Smlaier 40130365Smlaier#ident "@(#)rm_class.c 1.48 97/12/05 SMI" 41130365Smlaier 42130365Smlaier#if defined(__FreeBSD__) || defined(__NetBSD__) 43130365Smlaier#include "opt_altq.h" 44130365Smlaier#if (__FreeBSD__ != 2) 45130365Smlaier#include "opt_inet.h" 46130365Smlaier#ifdef __FreeBSD__ 47130365Smlaier#include "opt_inet6.h" 48130365Smlaier#endif 49130365Smlaier#endif 50130365Smlaier#endif /* __FreeBSD__ || __NetBSD__ */ 51130365Smlaier#ifdef ALTQ_CBQ /* cbq is enabled by ALTQ_CBQ option in opt_altq.h */ 52130365Smlaier 53130365Smlaier#include <sys/param.h> 54130365Smlaier#include <sys/malloc.h> 55130365Smlaier#include <sys/mbuf.h> 56130365Smlaier#include <sys/socket.h> 57130365Smlaier#include <sys/systm.h> 58130365Smlaier#include <sys/errno.h> 59130365Smlaier#include <sys/time.h> 60130365Smlaier#ifdef ALTQ3_COMPAT 61130365Smlaier#include <sys/kernel.h> 62130365Smlaier#endif 63130365Smlaier 64130365Smlaier#include <net/if.h> 65130365Smlaier#ifdef ALTQ3_COMPAT 66130365Smlaier#include <netinet/in.h> 67130365Smlaier#include <netinet/in_systm.h> 68130365Smlaier#include <netinet/ip.h> 69130365Smlaier#endif 70130365Smlaier 71130365Smlaier#include <altq/altq.h> 72130365Smlaier#include <altq/altq_rmclass.h> 73130365Smlaier#include <altq/altq_rmclass_debug.h> 74130365Smlaier#include <altq/altq_red.h> 75130365Smlaier#include <altq/altq_rio.h> 76130365Smlaier 77130365Smlaier/* 78130365Smlaier * Local Macros 79130365Smlaier */ 80130365Smlaier 81130365Smlaier#define reset_cutoff(ifd) { ifd->cutoff_ = RM_MAXDEPTH; } 82130365Smlaier 83130365Smlaier/* 84130365Smlaier * Local routines. 85130365Smlaier */ 86130365Smlaier 87130365Smlaierstatic int rmc_satisfied(struct rm_class *, struct timeval *); 88130365Smlaierstatic void rmc_wrr_set_weights(struct rm_ifdat *); 89130365Smlaierstatic void rmc_depth_compute(struct rm_class *); 90130365Smlaierstatic void rmc_depth_recompute(rm_class_t *); 91130365Smlaier 92130365Smlaierstatic mbuf_t *_rmc_wrr_dequeue_next(struct rm_ifdat *, int); 93130365Smlaierstatic mbuf_t *_rmc_prr_dequeue_next(struct rm_ifdat *, int); 94130365Smlaier 95130365Smlaierstatic int _rmc_addq(rm_class_t *, mbuf_t *); 96130365Smlaierstatic void _rmc_dropq(rm_class_t *); 97130365Smlaierstatic mbuf_t *_rmc_getq(rm_class_t *); 98130365Smlaierstatic mbuf_t *_rmc_pollq(rm_class_t *); 99130365Smlaier 100130365Smlaierstatic int rmc_under_limit(struct rm_class *, struct timeval *); 101130365Smlaierstatic void rmc_tl_satisfied(struct rm_ifdat *, struct timeval *); 102130365Smlaierstatic void rmc_drop_action(struct rm_class *); 103130365Smlaierstatic void rmc_restart(struct rm_class *); 104130365Smlaierstatic void rmc_root_overlimit(struct rm_class *, struct rm_class *); 105130365Smlaier 106130365Smlaier#define BORROW_OFFTIME 107130365Smlaier/* 108130365Smlaier * BORROW_OFFTIME (experimental): 109130365Smlaier * borrow the offtime of the class borrowing from. 110130365Smlaier * the reason is that when its own offtime is set, the class is unable 111130365Smlaier * to borrow much, especially when cutoff is taking effect. 112130365Smlaier * but when the borrowed class is overloaded (advidle is close to minidle), 113130365Smlaier * use the borrowing class's offtime to avoid overload. 114130365Smlaier */ 115130365Smlaier#define ADJUST_CUTOFF 116130365Smlaier/* 117130365Smlaier * ADJUST_CUTOFF (experimental): 118130365Smlaier * if no underlimit class is found due to cutoff, increase cutoff and 119130365Smlaier * retry the scheduling loop. 120130365Smlaier * also, don't invoke delay_actions while cutoff is taking effect, 121130365Smlaier * since a sleeping class won't have a chance to be scheduled in the 122130365Smlaier * next loop. 123130365Smlaier * 124130365Smlaier * now heuristics for setting the top-level variable (cutoff_) becomes: 125130365Smlaier * 1. if a packet arrives for a not-overlimit class, set cutoff 126130365Smlaier * to the depth of the class. 127130365Smlaier * 2. if cutoff is i, and a packet arrives for an overlimit class 128130365Smlaier * with an underlimit ancestor at a lower level than i (say j), 129130365Smlaier * then set cutoff to j. 130130365Smlaier * 3. at scheduling a packet, if there is no underlimit class 131130365Smlaier * due to the current cutoff level, increase cutoff by 1 and 132130365Smlaier * then try to schedule again. 133130365Smlaier */ 134130365Smlaier 135130365Smlaier/* 136130365Smlaier * rm_class_t * 137130365Smlaier * rmc_newclass(...) - Create a new resource management class at priority 138130365Smlaier * 'pri' on the interface given by 'ifd'. 139130365Smlaier * 140130365Smlaier * nsecPerByte is the data rate of the interface in nanoseconds/byte. 141130365Smlaier * E.g., 800 for a 10Mb/s ethernet. If the class gets less 142130365Smlaier * than 100% of the bandwidth, this number should be the 143130365Smlaier * 'effective' rate for the class. Let f be the 144130365Smlaier * bandwidth fraction allocated to this class, and let 145130365Smlaier * nsPerByte be the data rate of the output link in 146130365Smlaier * nanoseconds/byte. Then nsecPerByte is set to 147130365Smlaier * nsPerByte / f. E.g., 1600 (= 800 / .5) 148130365Smlaier * for a class that gets 50% of an ethernet's bandwidth. 149130365Smlaier * 150130365Smlaier * action the routine to call when the class is over limit. 151130365Smlaier * 152130365Smlaier * maxq max allowable queue size for class (in packets). 153130365Smlaier * 154130365Smlaier * parent parent class pointer. 155130365Smlaier * 156130365Smlaier * borrow class to borrow from (should be either 'parent' or null). 157130365Smlaier * 158130365Smlaier * maxidle max value allowed for class 'idle' time estimate (this 159130365Smlaier * parameter determines how large an initial burst of packets 160130365Smlaier * can be before overlimit action is invoked. 161130365Smlaier * 162130365Smlaier * offtime how long 'delay' action will delay when class goes over 163130365Smlaier * limit (this parameter determines the steady-state burst 164130365Smlaier * size when a class is running over its limit). 165130365Smlaier * 166130365Smlaier * Maxidle and offtime have to be computed from the following: If the 167130365Smlaier * average packet size is s, the bandwidth fraction allocated to this 168130365Smlaier * class is f, we want to allow b packet bursts, and the gain of the 169130365Smlaier * averaging filter is g (= 1 - 2^(-RM_FILTER_GAIN)), then: 170130365Smlaier * 171130365Smlaier * ptime = s * nsPerByte * (1 - f) / f 172130365Smlaier * maxidle = ptime * (1 - g^b) / g^b 173130365Smlaier * minidle = -ptime * (1 / (f - 1)) 174130365Smlaier * offtime = ptime * (1 + 1/(1 - g) * (1 - g^(b - 1)) / g^(b - 1) 175130365Smlaier * 176130365Smlaier * Operationally, it's convenient to specify maxidle & offtime in units 177130365Smlaier * independent of the link bandwidth so the maxidle & offtime passed to 178130365Smlaier * this routine are the above values multiplied by 8*f/(1000*nsPerByte). 179130365Smlaier * (The constant factor is a scale factor needed to make the parameters 180130365Smlaier * integers. This scaling also means that the 'unscaled' values of 181130365Smlaier * maxidle*nsecPerByte/8 and offtime*nsecPerByte/8 will be in microseconds, 182130365Smlaier * not nanoseconds.) Also note that the 'idle' filter computation keeps 183130365Smlaier * an estimate scaled upward by 2^RM_FILTER_GAIN so the passed value of 184130365Smlaier * maxidle also must be scaled upward by this value. Thus, the passed 185130365Smlaier * values for maxidle and offtime can be computed as follows: 186130365Smlaier * 187130365Smlaier * maxidle = maxidle * 2^RM_FILTER_GAIN * 8 / (1000 * nsecPerByte) 188130365Smlaier * offtime = offtime * 8 / (1000 * nsecPerByte) 189130365Smlaier * 190130365Smlaier * When USE_HRTIME is employed, then maxidle and offtime become: 191130365Smlaier * maxidle = maxilde * (8.0 / nsecPerByte); 192130365Smlaier * offtime = offtime * (8.0 / nsecPerByte); 193130365Smlaier */ 194130365Smlaierstruct rm_class * 195130365Smlaierrmc_newclass(int pri, struct rm_ifdat *ifd, u_int nsecPerByte, 196130365Smlaier void (*action)(rm_class_t *, rm_class_t *), int maxq, 197130365Smlaier struct rm_class *parent, struct rm_class *borrow, u_int maxidle, 198130365Smlaier int minidle, u_int offtime, int pktsize, int flags) 199130365Smlaier{ 200130365Smlaier struct rm_class *cl; 201130365Smlaier struct rm_class *peer; 202130365Smlaier int s; 203130365Smlaier 204130365Smlaier if (pri >= RM_MAXPRIO) 205130365Smlaier return (NULL); 206130365Smlaier#ifndef ALTQ_RED 207130365Smlaier if (flags & RMCF_RED) { 208130365Smlaier#ifdef ALTQ_DEBUG 209130365Smlaier printf("rmc_newclass: RED not configured for CBQ!\n"); 210130365Smlaier#endif 211130365Smlaier return (NULL); 212130365Smlaier } 213130365Smlaier#endif 214130365Smlaier#ifndef ALTQ_RIO 215130365Smlaier if (flags & RMCF_RIO) { 216130365Smlaier#ifdef ALTQ_DEBUG 217130365Smlaier printf("rmc_newclass: RIO not configured for CBQ!\n"); 218130365Smlaier#endif 219130365Smlaier return (NULL); 220130365Smlaier } 221130365Smlaier#endif 222130365Smlaier 223130365Smlaier MALLOC(cl, struct rm_class *, sizeof(struct rm_class), 224130365Smlaier M_DEVBUF, M_WAITOK); 225130365Smlaier if (cl == NULL) 226130365Smlaier return (NULL); 227130365Smlaier bzero(cl, sizeof(struct rm_class)); 228130365Smlaier CALLOUT_INIT(&cl->callout_); 229130365Smlaier MALLOC(cl->q_, class_queue_t *, sizeof(class_queue_t), 230130365Smlaier M_DEVBUF, M_WAITOK); 231130365Smlaier if (cl->q_ == NULL) { 232130365Smlaier FREE(cl, M_DEVBUF); 233130365Smlaier return (NULL); 234130365Smlaier } 235130365Smlaier bzero(cl->q_, sizeof(class_queue_t)); 236130365Smlaier 237130365Smlaier /* 238130365Smlaier * Class initialization. 239130365Smlaier */ 240130365Smlaier cl->children_ = NULL; 241130365Smlaier cl->parent_ = parent; 242130365Smlaier cl->borrow_ = borrow; 243130365Smlaier cl->leaf_ = 1; 244130365Smlaier cl->ifdat_ = ifd; 245130365Smlaier cl->pri_ = pri; 246130365Smlaier cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 247130365Smlaier cl->depth_ = 0; 248130365Smlaier cl->qthresh_ = 0; 249130365Smlaier cl->ns_per_byte_ = nsecPerByte; 250130365Smlaier 251130365Smlaier qlimit(cl->q_) = maxq; 252130365Smlaier qtype(cl->q_) = Q_DROPHEAD; 253130365Smlaier qlen(cl->q_) = 0; 254130365Smlaier cl->flags_ = flags; 255130365Smlaier 256130365Smlaier#if 1 /* minidle is also scaled in ALTQ */ 257130365Smlaier cl->minidle_ = (minidle * (int)nsecPerByte) / 8; 258130365Smlaier if (cl->minidle_ > 0) 259130365Smlaier cl->minidle_ = 0; 260130365Smlaier#else 261130365Smlaier cl->minidle_ = minidle; 262130365Smlaier#endif 263130365Smlaier cl->maxidle_ = (maxidle * nsecPerByte) / 8; 264130365Smlaier if (cl->maxidle_ == 0) 265130365Smlaier cl->maxidle_ = 1; 266130365Smlaier#if 1 /* offtime is also scaled in ALTQ */ 267130365Smlaier cl->avgidle_ = cl->maxidle_; 268130365Smlaier cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 269130365Smlaier if (cl->offtime_ == 0) 270130365Smlaier cl->offtime_ = 1; 271130365Smlaier#else 272130365Smlaier cl->avgidle_ = 0; 273130365Smlaier cl->offtime_ = (offtime * nsecPerByte) / 8; 274130365Smlaier#endif 275130365Smlaier cl->overlimit = action; 276130365Smlaier 277130365Smlaier#ifdef ALTQ_RED 278130365Smlaier if (flags & (RMCF_RED|RMCF_RIO)) { 279130365Smlaier int red_flags, red_pkttime; 280130365Smlaier 281130365Smlaier red_flags = 0; 282130365Smlaier if (flags & RMCF_ECN) 283130365Smlaier red_flags |= REDF_ECN; 284130365Smlaier if (flags & RMCF_FLOWVALVE) 285130365Smlaier red_flags |= REDF_FLOWVALVE; 286130365Smlaier#ifdef ALTQ_RIO 287130365Smlaier if (flags & RMCF_CLEARDSCP) 288130365Smlaier red_flags |= RIOF_CLEARDSCP; 289130365Smlaier#endif 290130365Smlaier red_pkttime = nsecPerByte * pktsize / 1000; 291130365Smlaier 292130365Smlaier if (flags & RMCF_RED) { 293130365Smlaier cl->red_ = red_alloc(0, 0, 294130365Smlaier qlimit(cl->q_) * 10/100, 295130365Smlaier qlimit(cl->q_) * 30/100, 296130365Smlaier red_flags, red_pkttime); 297130365Smlaier if (cl->red_ != NULL) 298130365Smlaier qtype(cl->q_) = Q_RED; 299130365Smlaier } 300130365Smlaier#ifdef ALTQ_RIO 301130365Smlaier else { 302130365Smlaier cl->red_ = (red_t *)rio_alloc(0, NULL, 303130365Smlaier red_flags, red_pkttime); 304130365Smlaier if (cl->red_ != NULL) 305130365Smlaier qtype(cl->q_) = Q_RIO; 306130365Smlaier } 307130365Smlaier#endif 308130365Smlaier } 309130365Smlaier#endif /* ALTQ_RED */ 310130365Smlaier 311130365Smlaier /* 312130365Smlaier * put the class into the class tree 313130365Smlaier */ 314130365Smlaier#ifdef __NetBSD__ 315130365Smlaier s = splnet(); 316130365Smlaier#else 317130365Smlaier s = splimp(); 318130365Smlaier#endif 319130368Smlaier IFQ_LOCK(ifd->ifq_); 320130365Smlaier if ((peer = ifd->active_[pri]) != NULL) { 321130365Smlaier /* find the last class at this pri */ 322130365Smlaier cl->peer_ = peer; 323130365Smlaier while (peer->peer_ != ifd->active_[pri]) 324130365Smlaier peer = peer->peer_; 325130365Smlaier peer->peer_ = cl; 326130365Smlaier } else { 327130365Smlaier ifd->active_[pri] = cl; 328130365Smlaier cl->peer_ = cl; 329130365Smlaier } 330130365Smlaier 331130365Smlaier if (cl->parent_) { 332130365Smlaier cl->next_ = parent->children_; 333130365Smlaier parent->children_ = cl; 334130365Smlaier parent->leaf_ = 0; 335130365Smlaier } 336130365Smlaier 337130365Smlaier /* 338130365Smlaier * Compute the depth of this class and its ancestors in the class 339130365Smlaier * hierarchy. 340130365Smlaier */ 341130365Smlaier rmc_depth_compute(cl); 342130365Smlaier 343130365Smlaier /* 344130365Smlaier * If CBQ's WRR is enabled, then initialize the class WRR state. 345130365Smlaier */ 346130365Smlaier if (ifd->wrr_) { 347130365Smlaier ifd->num_[pri]++; 348130365Smlaier ifd->alloc_[pri] += cl->allotment_; 349130365Smlaier rmc_wrr_set_weights(ifd); 350130365Smlaier } 351130368Smlaier IFQ_UNLOCK(ifd->ifq_); 352130365Smlaier splx(s); 353130365Smlaier return (cl); 354130365Smlaier} 355130365Smlaier 356130365Smlaierint 357130365Smlaierrmc_modclass(struct rm_class *cl, u_int nsecPerByte, int maxq, u_int maxidle, 358130365Smlaier int minidle, u_int offtime, int pktsize) 359130365Smlaier{ 360130365Smlaier struct rm_ifdat *ifd; 361130365Smlaier u_int old_allotment; 362130365Smlaier int s; 363130365Smlaier 364130365Smlaier ifd = cl->ifdat_; 365130365Smlaier old_allotment = cl->allotment_; 366130365Smlaier 367130365Smlaier#ifdef __NetBSD__ 368130365Smlaier s = splnet(); 369130365Smlaier#else 370130365Smlaier s = splimp(); 371130365Smlaier#endif 372130368Smlaier IFQ_LOCK(ifd->ifq_); 373130365Smlaier cl->allotment_ = RM_NS_PER_SEC / nsecPerByte; /* Bytes per sec */ 374130365Smlaier cl->qthresh_ = 0; 375130365Smlaier cl->ns_per_byte_ = nsecPerByte; 376130365Smlaier 377130365Smlaier qlimit(cl->q_) = maxq; 378130365Smlaier 379130365Smlaier#if 1 /* minidle is also scaled in ALTQ */ 380130365Smlaier cl->minidle_ = (minidle * nsecPerByte) / 8; 381130365Smlaier if (cl->minidle_ > 0) 382130365Smlaier cl->minidle_ = 0; 383130365Smlaier#else 384130365Smlaier cl->minidle_ = minidle; 385130365Smlaier#endif 386130365Smlaier cl->maxidle_ = (maxidle * nsecPerByte) / 8; 387130365Smlaier if (cl->maxidle_ == 0) 388130365Smlaier cl->maxidle_ = 1; 389130365Smlaier#if 1 /* offtime is also scaled in ALTQ */ 390130365Smlaier cl->avgidle_ = cl->maxidle_; 391130365Smlaier cl->offtime_ = ((offtime * nsecPerByte) / 8) >> RM_FILTER_GAIN; 392130365Smlaier if (cl->offtime_ == 0) 393130365Smlaier cl->offtime_ = 1; 394130365Smlaier#else 395130365Smlaier cl->avgidle_ = 0; 396130365Smlaier cl->offtime_ = (offtime * nsecPerByte) / 8; 397130365Smlaier#endif 398130365Smlaier 399130365Smlaier /* 400130365Smlaier * If CBQ's WRR is enabled, then initialize the class WRR state. 401130365Smlaier */ 402130365Smlaier if (ifd->wrr_) { 403130365Smlaier ifd->alloc_[cl->pri_] += cl->allotment_ - old_allotment; 404130365Smlaier rmc_wrr_set_weights(ifd); 405130365Smlaier } 406130368Smlaier IFQ_UNLOCK(ifd->ifq_); 407130365Smlaier splx(s); 408130365Smlaier return (0); 409130365Smlaier} 410130365Smlaier 411130365Smlaier/* 412130365Smlaier * static void 413130365Smlaier * rmc_wrr_set_weights(struct rm_ifdat *ifdat) - This function computes 414130365Smlaier * the appropriate run robin weights for the CBQ weighted round robin 415130365Smlaier * algorithm. 416130365Smlaier * 417130365Smlaier * Returns: NONE 418130365Smlaier */ 419130365Smlaier 420130365Smlaierstatic void 421130365Smlaierrmc_wrr_set_weights(struct rm_ifdat *ifd) 422130365Smlaier{ 423130365Smlaier int i; 424130365Smlaier struct rm_class *cl, *clh; 425130365Smlaier 426130365Smlaier for (i = 0; i < RM_MAXPRIO; i++) { 427130365Smlaier /* 428130365Smlaier * This is inverted from that of the simulator to 429130365Smlaier * maintain precision. 430130365Smlaier */ 431130365Smlaier if (ifd->num_[i] == 0) 432130365Smlaier ifd->M_[i] = 0; 433130365Smlaier else 434130365Smlaier ifd->M_[i] = ifd->alloc_[i] / 435130365Smlaier (ifd->num_[i] * ifd->maxpkt_); 436130365Smlaier /* 437130365Smlaier * Compute the weighted allotment for each class. 438130365Smlaier * This takes the expensive div instruction out 439130365Smlaier * of the main loop for the wrr scheduling path. 440130365Smlaier * These only get recomputed when a class comes or 441130365Smlaier * goes. 442130365Smlaier */ 443130365Smlaier if (ifd->active_[i] != NULL) { 444130365Smlaier clh = cl = ifd->active_[i]; 445130365Smlaier do { 446130365Smlaier /* safe-guard for slow link or alloc_ == 0 */ 447130365Smlaier if (ifd->M_[i] == 0) 448130365Smlaier cl->w_allotment_ = 0; 449130365Smlaier else 450130365Smlaier cl->w_allotment_ = cl->allotment_ / 451130365Smlaier ifd->M_[i]; 452130365Smlaier cl = cl->peer_; 453130365Smlaier } while ((cl != NULL) && (cl != clh)); 454130365Smlaier } 455130365Smlaier } 456130365Smlaier} 457130365Smlaier 458130365Smlaierint 459130365Smlaierrmc_get_weight(struct rm_ifdat *ifd, int pri) 460130365Smlaier{ 461130365Smlaier if ((pri >= 0) && (pri < RM_MAXPRIO)) 462130365Smlaier return (ifd->M_[pri]); 463130365Smlaier else 464130365Smlaier return (0); 465130365Smlaier} 466130365Smlaier 467130365Smlaier/* 468130365Smlaier * static void 469130365Smlaier * rmc_depth_compute(struct rm_class *cl) - This function computes the 470130365Smlaier * appropriate depth of class 'cl' and its ancestors. 471130365Smlaier * 472130365Smlaier * Returns: NONE 473130365Smlaier */ 474130365Smlaier 475130365Smlaierstatic void 476130365Smlaierrmc_depth_compute(struct rm_class *cl) 477130365Smlaier{ 478130365Smlaier rm_class_t *t = cl, *p; 479130365Smlaier 480130365Smlaier /* 481130365Smlaier * Recompute the depth for the branch of the tree. 482130365Smlaier */ 483130365Smlaier while (t != NULL) { 484130365Smlaier p = t->parent_; 485130365Smlaier if (p && (t->depth_ >= p->depth_)) { 486130365Smlaier p->depth_ = t->depth_ + 1; 487130365Smlaier t = p; 488130365Smlaier } else 489130365Smlaier t = NULL; 490130365Smlaier } 491130365Smlaier} 492130365Smlaier 493130365Smlaier/* 494130365Smlaier * static void 495130365Smlaier * rmc_depth_recompute(struct rm_class *cl) - This function re-computes 496130365Smlaier * the depth of the tree after a class has been deleted. 497130365Smlaier * 498130365Smlaier * Returns: NONE 499130365Smlaier */ 500130365Smlaier 501130365Smlaierstatic void 502130365Smlaierrmc_depth_recompute(rm_class_t *cl) 503130365Smlaier{ 504130365Smlaier#if 1 /* ALTQ */ 505130365Smlaier rm_class_t *p, *t; 506130365Smlaier 507130365Smlaier p = cl; 508130365Smlaier while (p != NULL) { 509130365Smlaier if ((t = p->children_) == NULL) { 510130365Smlaier p->depth_ = 0; 511130365Smlaier } else { 512130365Smlaier int cdepth = 0; 513130365Smlaier 514130365Smlaier while (t != NULL) { 515130365Smlaier if (t->depth_ > cdepth) 516130365Smlaier cdepth = t->depth_; 517130365Smlaier t = t->next_; 518130365Smlaier } 519130365Smlaier 520130365Smlaier if (p->depth_ == cdepth + 1) 521130365Smlaier /* no change to this parent */ 522130365Smlaier return; 523130365Smlaier 524130365Smlaier p->depth_ = cdepth + 1; 525130365Smlaier } 526130365Smlaier 527130365Smlaier p = p->parent_; 528130365Smlaier } 529130365Smlaier#else 530130365Smlaier rm_class_t *t; 531130365Smlaier 532130365Smlaier if (cl->depth_ >= 1) { 533130365Smlaier if (cl->children_ == NULL) { 534130365Smlaier cl->depth_ = 0; 535130365Smlaier } else if ((t = cl->children_) != NULL) { 536130365Smlaier while (t != NULL) { 537130365Smlaier if (t->children_ != NULL) 538130365Smlaier rmc_depth_recompute(t); 539130365Smlaier t = t->next_; 540130365Smlaier } 541130365Smlaier } else 542130365Smlaier rmc_depth_compute(cl); 543130365Smlaier } 544130365Smlaier#endif 545130365Smlaier} 546130365Smlaier 547130365Smlaier/* 548130365Smlaier * void 549130365Smlaier * rmc_delete_class(struct rm_ifdat *ifdat, struct rm_class *cl) - This 550130365Smlaier * function deletes a class from the link-sharing structure and frees 551130365Smlaier * all resources associated with the class. 552130365Smlaier * 553130365Smlaier * Returns: NONE 554130365Smlaier */ 555130365Smlaier 556130365Smlaiervoid 557130365Smlaierrmc_delete_class(struct rm_ifdat *ifd, struct rm_class *cl) 558130365Smlaier{ 559130365Smlaier struct rm_class *p, *head, *previous; 560130365Smlaier int s; 561130365Smlaier 562130365Smlaier ASSERT(cl->children_ == NULL); 563130365Smlaier 564130365Smlaier if (cl->sleeping_) 565130365Smlaier CALLOUT_STOP(&cl->callout_); 566130365Smlaier 567130365Smlaier#ifdef __NetBSD__ 568130365Smlaier s = splnet(); 569130365Smlaier#else 570130365Smlaier s = splimp(); 571130365Smlaier#endif 572130368Smlaier IFQ_LOCK(ifd->ifq_); 573130365Smlaier /* 574130365Smlaier * Free packets in the packet queue. 575130365Smlaier * XXX - this may not be a desired behavior. Packets should be 576130365Smlaier * re-queued. 577130365Smlaier */ 578130365Smlaier rmc_dropall(cl); 579130365Smlaier 580130365Smlaier /* 581130365Smlaier * If the class has a parent, then remove the class from the 582130365Smlaier * class from the parent's children chain. 583130365Smlaier */ 584130365Smlaier if (cl->parent_ != NULL) { 585130365Smlaier head = cl->parent_->children_; 586130365Smlaier p = previous = head; 587130365Smlaier if (head->next_ == NULL) { 588130365Smlaier ASSERT(head == cl); 589130365Smlaier cl->parent_->children_ = NULL; 590130365Smlaier cl->parent_->leaf_ = 1; 591130365Smlaier } else while (p != NULL) { 592130365Smlaier if (p == cl) { 593130365Smlaier if (cl == head) 594130365Smlaier cl->parent_->children_ = cl->next_; 595130365Smlaier else 596130365Smlaier previous->next_ = cl->next_; 597130365Smlaier cl->next_ = NULL; 598130365Smlaier p = NULL; 599130365Smlaier } else { 600130365Smlaier previous = p; 601130365Smlaier p = p->next_; 602130365Smlaier } 603130365Smlaier } 604130365Smlaier } 605130365Smlaier 606130365Smlaier /* 607130365Smlaier * Delete class from class priority peer list. 608130365Smlaier */ 609130365Smlaier if ((p = ifd->active_[cl->pri_]) != NULL) { 610130365Smlaier /* 611130365Smlaier * If there is more than one member of this priority 612130365Smlaier * level, then look for class(cl) in the priority level. 613130365Smlaier */ 614130365Smlaier if (p != p->peer_) { 615130365Smlaier while (p->peer_ != cl) 616130365Smlaier p = p->peer_; 617130365Smlaier p->peer_ = cl->peer_; 618130365Smlaier 619130365Smlaier if (ifd->active_[cl->pri_] == cl) 620130365Smlaier ifd->active_[cl->pri_] = cl->peer_; 621130365Smlaier } else { 622130365Smlaier ASSERT(p == cl); 623130365Smlaier ifd->active_[cl->pri_] = NULL; 624130365Smlaier } 625130365Smlaier } 626130365Smlaier 627130365Smlaier /* 628130365Smlaier * Recompute the WRR weights. 629130365Smlaier */ 630130365Smlaier if (ifd->wrr_) { 631130365Smlaier ifd->alloc_[cl->pri_] -= cl->allotment_; 632130365Smlaier ifd->num_[cl->pri_]--; 633130365Smlaier rmc_wrr_set_weights(ifd); 634130365Smlaier } 635130365Smlaier 636130365Smlaier /* 637130365Smlaier * Re-compute the depth of the tree. 638130365Smlaier */ 639130365Smlaier#if 1 /* ALTQ */ 640130365Smlaier rmc_depth_recompute(cl->parent_); 641130365Smlaier#else 642130365Smlaier rmc_depth_recompute(ifd->root_); 643130365Smlaier#endif 644130365Smlaier 645130368Smlaier IFQ_UNLOCK(ifd->ifq_); 646130365Smlaier splx(s); 647130365Smlaier 648130365Smlaier /* 649130365Smlaier * Free the class structure. 650130365Smlaier */ 651130365Smlaier if (cl->red_ != NULL) { 652130365Smlaier#ifdef ALTQ_RIO 653130365Smlaier if (q_is_rio(cl->q_)) 654130365Smlaier rio_destroy((rio_t *)cl->red_); 655130365Smlaier#endif 656130365Smlaier#ifdef ALTQ_RED 657130365Smlaier if (q_is_red(cl->q_)) 658130365Smlaier red_destroy(cl->red_); 659130365Smlaier#endif 660130365Smlaier } 661130365Smlaier FREE(cl->q_, M_DEVBUF); 662130365Smlaier FREE(cl, M_DEVBUF); 663130365Smlaier} 664130365Smlaier 665130365Smlaier 666130365Smlaier/* 667130365Smlaier * void 668130365Smlaier * rmc_init(...) - Initialize the resource management data structures 669130365Smlaier * associated with the output portion of interface 'ifp'. 'ifd' is 670130365Smlaier * where the structures will be built (for backwards compatibility, the 671130365Smlaier * structures aren't kept in the ifnet struct). 'nsecPerByte' 672130365Smlaier * gives the link speed (inverse of bandwidth) in nanoseconds/byte. 673130365Smlaier * 'restart' is the driver-specific routine that the generic 'delay 674130365Smlaier * until under limit' action will call to restart output. `maxq' 675130365Smlaier * is the queue size of the 'link' & 'default' classes. 'maxqueued' 676130365Smlaier * is the maximum number of packets that the resource management 677130365Smlaier * code will allow to be queued 'downstream' (this is typically 1). 678130365Smlaier * 679130365Smlaier * Returns: NONE 680130365Smlaier */ 681130365Smlaier 682130365Smlaiervoid 683130365Smlaierrmc_init(struct ifaltq *ifq, struct rm_ifdat *ifd, u_int nsecPerByte, 684130365Smlaier void (*restart)(struct ifaltq *), int maxq, int maxqueued, u_int maxidle, 685130365Smlaier int minidle, u_int offtime, int flags) 686130365Smlaier{ 687130365Smlaier int i, mtu; 688130365Smlaier 689130365Smlaier /* 690130365Smlaier * Initialize the CBQ tracing/debug facility. 691130365Smlaier */ 692130365Smlaier CBQTRACEINIT(); 693130365Smlaier 694130365Smlaier bzero((char *)ifd, sizeof (*ifd)); 695130365Smlaier mtu = ifq->altq_ifp->if_mtu; 696130365Smlaier ifd->ifq_ = ifq; 697130365Smlaier ifd->restart = restart; 698130365Smlaier ifd->maxqueued_ = maxqueued; 699130365Smlaier ifd->ns_per_byte_ = nsecPerByte; 700130365Smlaier ifd->maxpkt_ = mtu; 701130365Smlaier ifd->wrr_ = (flags & RMCF_WRR) ? 1 : 0; 702130365Smlaier ifd->efficient_ = (flags & RMCF_EFFICIENT) ? 1 : 0; 703130365Smlaier#if 1 704130365Smlaier ifd->maxiftime_ = mtu * nsecPerByte / 1000 * 16; 705130365Smlaier if (mtu * nsecPerByte > 10 * 1000000) 706130365Smlaier ifd->maxiftime_ /= 4; 707130365Smlaier#endif 708130365Smlaier 709130365Smlaier reset_cutoff(ifd); 710130365Smlaier CBQTRACE(rmc_init, 'INIT', ifd->cutoff_); 711130365Smlaier 712130365Smlaier /* 713130365Smlaier * Initialize the CBQ's WRR state. 714130365Smlaier */ 715130365Smlaier for (i = 0; i < RM_MAXPRIO; i++) { 716130365Smlaier ifd->alloc_[i] = 0; 717130365Smlaier ifd->M_[i] = 0; 718130365Smlaier ifd->num_[i] = 0; 719130365Smlaier ifd->na_[i] = 0; 720130365Smlaier ifd->active_[i] = NULL; 721130365Smlaier } 722130365Smlaier 723130365Smlaier /* 724130365Smlaier * Initialize current packet state. 725130365Smlaier */ 726130365Smlaier ifd->qi_ = 0; 727130365Smlaier ifd->qo_ = 0; 728130365Smlaier for (i = 0; i < RM_MAXQUEUED; i++) { 729130365Smlaier ifd->class_[i] = NULL; 730130365Smlaier ifd->curlen_[i] = 0; 731130365Smlaier ifd->borrowed_[i] = NULL; 732130365Smlaier } 733130365Smlaier 734130365Smlaier /* 735130365Smlaier * Create the root class of the link-sharing structure. 736130365Smlaier */ 737130365Smlaier if ((ifd->root_ = rmc_newclass(0, ifd, 738130365Smlaier nsecPerByte, 739130365Smlaier rmc_root_overlimit, maxq, 0, 0, 740130365Smlaier maxidle, minidle, offtime, 741130365Smlaier 0, 0)) == NULL) { 742130365Smlaier printf("rmc_init: root class not allocated\n"); 743130365Smlaier return ; 744130365Smlaier } 745130365Smlaier ifd->root_->depth_ = 0; 746130365Smlaier} 747130365Smlaier 748130365Smlaier/* 749130365Smlaier * void 750130365Smlaier * rmc_queue_packet(struct rm_class *cl, mbuf_t *m) - Add packet given by 751130365Smlaier * mbuf 'm' to queue for resource class 'cl'. This routine is called 752130365Smlaier * by a driver's if_output routine. This routine must be called with 753130365Smlaier * output packet completion interrupts locked out (to avoid racing with 754130365Smlaier * rmc_dequeue_next). 755130365Smlaier * 756130365Smlaier * Returns: 0 on successful queueing 757130365Smlaier * -1 when packet drop occurs 758130365Smlaier */ 759130365Smlaierint 760130365Smlaierrmc_queue_packet(struct rm_class *cl, mbuf_t *m) 761130365Smlaier{ 762130365Smlaier struct timeval now; 763130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 764130365Smlaier int cpri = cl->pri_; 765130365Smlaier int is_empty = qempty(cl->q_); 766130365Smlaier 767130365Smlaier RM_GETTIME(now); 768130365Smlaier if (ifd->cutoff_ > 0) { 769130365Smlaier if (TV_LT(&cl->undertime_, &now)) { 770130365Smlaier if (ifd->cutoff_ > cl->depth_) 771130365Smlaier ifd->cutoff_ = cl->depth_; 772130365Smlaier CBQTRACE(rmc_queue_packet, 'ffoc', cl->depth_); 773130365Smlaier } 774130365Smlaier#if 1 /* ALTQ */ 775130365Smlaier else { 776130365Smlaier /* 777130365Smlaier * the class is overlimit. if the class has 778130365Smlaier * underlimit ancestors, set cutoff to the lowest 779130365Smlaier * depth among them. 780130365Smlaier */ 781130365Smlaier struct rm_class *borrow = cl->borrow_; 782130365Smlaier 783130365Smlaier while (borrow != NULL && 784130365Smlaier borrow->depth_ < ifd->cutoff_) { 785130365Smlaier if (TV_LT(&borrow->undertime_, &now)) { 786130365Smlaier ifd->cutoff_ = borrow->depth_; 787130365Smlaier CBQTRACE(rmc_queue_packet, 'ffob', ifd->cutoff_); 788130365Smlaier break; 789130365Smlaier } 790130365Smlaier borrow = borrow->borrow_; 791130365Smlaier } 792130365Smlaier } 793130365Smlaier#else /* !ALTQ */ 794130365Smlaier else if ((ifd->cutoff_ > 1) && cl->borrow_) { 795130365Smlaier if (TV_LT(&cl->borrow_->undertime_, &now)) { 796130365Smlaier ifd->cutoff_ = cl->borrow_->depth_; 797130365Smlaier CBQTRACE(rmc_queue_packet, 'ffob', 798130365Smlaier cl->borrow_->depth_); 799130365Smlaier } 800130365Smlaier } 801130365Smlaier#endif /* !ALTQ */ 802130365Smlaier } 803130365Smlaier 804130365Smlaier if (_rmc_addq(cl, m) < 0) 805130365Smlaier /* failed */ 806130365Smlaier return (-1); 807130365Smlaier 808130365Smlaier if (is_empty) { 809130365Smlaier CBQTRACE(rmc_queue_packet, 'ytpe', cl->stats_.handle); 810130365Smlaier ifd->na_[cpri]++; 811130365Smlaier } 812130365Smlaier 813130365Smlaier if (qlen(cl->q_) > qlimit(cl->q_)) { 814130365Smlaier /* note: qlimit can be set to 0 or 1 */ 815130365Smlaier rmc_drop_action(cl); 816130365Smlaier return (-1); 817130365Smlaier } 818130365Smlaier return (0); 819130365Smlaier} 820130365Smlaier 821130365Smlaier/* 822130365Smlaier * void 823130365Smlaier * rmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) - Check all 824130365Smlaier * classes to see if there are satified. 825130365Smlaier */ 826130365Smlaier 827130365Smlaierstatic void 828130365Smlaierrmc_tl_satisfied(struct rm_ifdat *ifd, struct timeval *now) 829130365Smlaier{ 830130365Smlaier int i; 831130365Smlaier rm_class_t *p, *bp; 832130365Smlaier 833130365Smlaier for (i = RM_MAXPRIO - 1; i >= 0; i--) { 834130365Smlaier if ((bp = ifd->active_[i]) != NULL) { 835130365Smlaier p = bp; 836130365Smlaier do { 837130365Smlaier if (!rmc_satisfied(p, now)) { 838130365Smlaier ifd->cutoff_ = p->depth_; 839130365Smlaier return; 840130365Smlaier } 841130365Smlaier p = p->peer_; 842130365Smlaier } while (p != bp); 843130365Smlaier } 844130365Smlaier } 845130365Smlaier 846130365Smlaier reset_cutoff(ifd); 847130365Smlaier} 848130365Smlaier 849130365Smlaier/* 850130365Smlaier * rmc_satisfied - Return 1 of the class is satisfied. O, otherwise. 851130365Smlaier */ 852130365Smlaier 853130365Smlaierstatic int 854130365Smlaierrmc_satisfied(struct rm_class *cl, struct timeval *now) 855130365Smlaier{ 856130365Smlaier rm_class_t *p; 857130365Smlaier 858130365Smlaier if (cl == NULL) 859130365Smlaier return (1); 860130365Smlaier if (TV_LT(now, &cl->undertime_)) 861130365Smlaier return (1); 862130365Smlaier if (cl->depth_ == 0) { 863130365Smlaier if (!cl->sleeping_ && (qlen(cl->q_) > cl->qthresh_)) 864130365Smlaier return (0); 865130365Smlaier else 866130365Smlaier return (1); 867130365Smlaier } 868130365Smlaier if (cl->children_ != NULL) { 869130365Smlaier p = cl->children_; 870130365Smlaier while (p != NULL) { 871130365Smlaier if (!rmc_satisfied(p, now)) 872130365Smlaier return (0); 873130365Smlaier p = p->next_; 874130365Smlaier } 875130365Smlaier } 876130365Smlaier 877130365Smlaier return (1); 878130365Smlaier} 879130365Smlaier 880130365Smlaier/* 881130365Smlaier * Return 1 if class 'cl' is under limit or can borrow from a parent, 882130365Smlaier * 0 if overlimit. As a side-effect, this routine will invoke the 883130365Smlaier * class overlimit action if the class if overlimit. 884130365Smlaier */ 885130365Smlaier 886130365Smlaierstatic int 887130365Smlaierrmc_under_limit(struct rm_class *cl, struct timeval *now) 888130365Smlaier{ 889130365Smlaier rm_class_t *p = cl; 890130365Smlaier rm_class_t *top; 891130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 892130365Smlaier 893130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 894130365Smlaier /* 895130365Smlaier * If cl is the root class, then always return that it is 896130365Smlaier * underlimit. Otherwise, check to see if the class is underlimit. 897130365Smlaier */ 898130365Smlaier if (cl->parent_ == NULL) 899130365Smlaier return (1); 900130365Smlaier 901130365Smlaier if (cl->sleeping_) { 902130365Smlaier if (TV_LT(now, &cl->undertime_)) 903130365Smlaier return (0); 904130365Smlaier 905130365Smlaier CALLOUT_STOP(&cl->callout_); 906130365Smlaier cl->sleeping_ = 0; 907130365Smlaier cl->undertime_.tv_sec = 0; 908130365Smlaier return (1); 909130365Smlaier } 910130365Smlaier 911130365Smlaier top = NULL; 912130365Smlaier while (cl->undertime_.tv_sec && TV_LT(now, &cl->undertime_)) { 913130365Smlaier if (((cl = cl->borrow_) == NULL) || 914130365Smlaier (cl->depth_ > ifd->cutoff_)) { 915130365Smlaier#ifdef ADJUST_CUTOFF 916130365Smlaier if (cl != NULL) 917130365Smlaier /* cutoff is taking effect, just 918130365Smlaier return false without calling 919130365Smlaier the delay action. */ 920130365Smlaier return (0); 921130365Smlaier#endif 922130365Smlaier#ifdef BORROW_OFFTIME 923130365Smlaier /* 924130365Smlaier * check if the class can borrow offtime too. 925130365Smlaier * borrow offtime from the top of the borrow 926130365Smlaier * chain if the top class is not overloaded. 927130365Smlaier */ 928130365Smlaier if (cl != NULL) { 929130365Smlaier /* cutoff is taking effect, use this class as top. */ 930130365Smlaier top = cl; 931130365Smlaier CBQTRACE(rmc_under_limit, 'ffou', ifd->cutoff_); 932130365Smlaier } 933130365Smlaier if (top != NULL && top->avgidle_ == top->minidle_) 934130365Smlaier top = NULL; 935130365Smlaier p->overtime_ = *now; 936130365Smlaier (p->overlimit)(p, top); 937130365Smlaier#else 938130365Smlaier p->overtime_ = *now; 939130365Smlaier (p->overlimit)(p, NULL); 940130365Smlaier#endif 941130365Smlaier return (0); 942130365Smlaier } 943130365Smlaier top = cl; 944130365Smlaier } 945130365Smlaier 946130365Smlaier if (cl != p) 947130365Smlaier ifd->borrowed_[ifd->qi_] = cl; 948130365Smlaier return (1); 949130365Smlaier} 950130365Smlaier 951130365Smlaier/* 952130365Smlaier * _rmc_wrr_dequeue_next() - This is scheduler for WRR as opposed to 953130365Smlaier * Packet-by-packet round robin. 954130365Smlaier * 955130365Smlaier * The heart of the weighted round-robin scheduler, which decides which 956130365Smlaier * class next gets to send a packet. Highest priority first, then 957130365Smlaier * weighted round-robin within priorites. 958130365Smlaier * 959130365Smlaier * Each able-to-send class gets to send until its byte allocation is 960130365Smlaier * exhausted. Thus, the active pointer is only changed after a class has 961130365Smlaier * exhausted its allocation. 962130365Smlaier * 963130365Smlaier * If the scheduler finds no class that is underlimit or able to borrow, 964130365Smlaier * then the first class found that had a nonzero queue and is allowed to 965130365Smlaier * borrow gets to send. 966130365Smlaier */ 967130365Smlaier 968130365Smlaierstatic mbuf_t * 969130365Smlaier_rmc_wrr_dequeue_next(struct rm_ifdat *ifd, int op) 970130365Smlaier{ 971130365Smlaier struct rm_class *cl = NULL, *first = NULL; 972130365Smlaier u_int deficit; 973130365Smlaier int cpri; 974130365Smlaier mbuf_t *m; 975130365Smlaier struct timeval now; 976130365Smlaier 977130365Smlaier RM_GETTIME(now); 978130365Smlaier 979130365Smlaier /* 980130365Smlaier * if the driver polls the top of the queue and then removes 981130365Smlaier * the polled packet, we must return the same packet. 982130365Smlaier */ 983130365Smlaier if (op == ALTDQ_REMOVE && ifd->pollcache_) { 984130365Smlaier cl = ifd->pollcache_; 985130365Smlaier cpri = cl->pri_; 986130365Smlaier if (ifd->efficient_) { 987130365Smlaier /* check if this class is overlimit */ 988130365Smlaier if (cl->undertime_.tv_sec != 0 && 989130365Smlaier rmc_under_limit(cl, &now) == 0) 990130365Smlaier first = cl; 991130365Smlaier } 992130365Smlaier ifd->pollcache_ = NULL; 993130365Smlaier goto _wrr_out; 994130365Smlaier } 995130365Smlaier else { 996130365Smlaier /* mode == ALTDQ_POLL || pollcache == NULL */ 997130365Smlaier ifd->pollcache_ = NULL; 998130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 999130365Smlaier } 1000130365Smlaier#ifdef ADJUST_CUTOFF 1001130365Smlaier _again: 1002130365Smlaier#endif 1003130365Smlaier for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 1004130365Smlaier if (ifd->na_[cpri] == 0) 1005130365Smlaier continue; 1006130365Smlaier deficit = 0; 1007130365Smlaier /* 1008130365Smlaier * Loop through twice for a priority level, if some class 1009130365Smlaier * was unable to send a packet the first round because 1010130365Smlaier * of the weighted round-robin mechanism. 1011130365Smlaier * During the second loop at this level, deficit==2. 1012130365Smlaier * (This second loop is not needed if for every class, 1013130365Smlaier * "M[cl->pri_])" times "cl->allotment" is greater than 1014130365Smlaier * the byte size for the largest packet in the class.) 1015130365Smlaier */ 1016130365Smlaier _wrr_loop: 1017130365Smlaier cl = ifd->active_[cpri]; 1018130365Smlaier ASSERT(cl != NULL); 1019130365Smlaier do { 1020130365Smlaier if ((deficit < 2) && (cl->bytes_alloc_ <= 0)) 1021130365Smlaier cl->bytes_alloc_ += cl->w_allotment_; 1022130365Smlaier if (!qempty(cl->q_)) { 1023130365Smlaier if ((cl->undertime_.tv_sec == 0) || 1024130365Smlaier rmc_under_limit(cl, &now)) { 1025130365Smlaier if (cl->bytes_alloc_ > 0 || deficit > 1) 1026130365Smlaier goto _wrr_out; 1027130365Smlaier 1028130365Smlaier /* underlimit but no alloc */ 1029130365Smlaier deficit = 1; 1030130365Smlaier#if 1 1031130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 1032130365Smlaier#endif 1033130365Smlaier } 1034130365Smlaier else if (first == NULL && cl->borrow_ != NULL) 1035130365Smlaier first = cl; /* borrowing candidate */ 1036130365Smlaier } 1037130365Smlaier 1038130365Smlaier cl->bytes_alloc_ = 0; 1039130365Smlaier cl = cl->peer_; 1040130365Smlaier } while (cl != ifd->active_[cpri]); 1041130365Smlaier 1042130365Smlaier if (deficit == 1) { 1043130365Smlaier /* first loop found an underlimit class with deficit */ 1044130365Smlaier /* Loop on same priority level, with new deficit. */ 1045130365Smlaier deficit = 2; 1046130365Smlaier goto _wrr_loop; 1047130365Smlaier } 1048130365Smlaier } 1049130365Smlaier 1050130365Smlaier#ifdef ADJUST_CUTOFF 1051130365Smlaier /* 1052130365Smlaier * no underlimit class found. if cutoff is taking effect, 1053130365Smlaier * increase cutoff and try again. 1054130365Smlaier */ 1055130365Smlaier if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1056130365Smlaier ifd->cutoff_++; 1057130365Smlaier CBQTRACE(_rmc_wrr_dequeue_next, 'ojda', ifd->cutoff_); 1058130365Smlaier goto _again; 1059130365Smlaier } 1060130365Smlaier#endif /* ADJUST_CUTOFF */ 1061130365Smlaier /* 1062130365Smlaier * If LINK_EFFICIENCY is turned on, then the first overlimit 1063130365Smlaier * class we encounter will send a packet if all the classes 1064130365Smlaier * of the link-sharing structure are overlimit. 1065130365Smlaier */ 1066130365Smlaier reset_cutoff(ifd); 1067130365Smlaier CBQTRACE(_rmc_wrr_dequeue_next, 'otsr', ifd->cutoff_); 1068130365Smlaier 1069130365Smlaier if (!ifd->efficient_ || first == NULL) 1070130365Smlaier return (NULL); 1071130365Smlaier 1072130365Smlaier cl = first; 1073130365Smlaier cpri = cl->pri_; 1074130365Smlaier#if 0 /* too time-consuming for nothing */ 1075130365Smlaier if (cl->sleeping_) 1076130365Smlaier CALLOUT_STOP(&cl->callout_); 1077130365Smlaier cl->sleeping_ = 0; 1078130365Smlaier cl->undertime_.tv_sec = 0; 1079130365Smlaier#endif 1080130365Smlaier ifd->borrowed_[ifd->qi_] = cl->borrow_; 1081130365Smlaier ifd->cutoff_ = cl->borrow_->depth_; 1082130365Smlaier 1083130365Smlaier /* 1084130365Smlaier * Deque the packet and do the book keeping... 1085130365Smlaier */ 1086130365Smlaier _wrr_out: 1087130365Smlaier if (op == ALTDQ_REMOVE) { 1088130365Smlaier m = _rmc_getq(cl); 1089130365Smlaier if (m == NULL) 1090130365Smlaier panic("_rmc_wrr_dequeue_next"); 1091130365Smlaier if (qempty(cl->q_)) 1092130365Smlaier ifd->na_[cpri]--; 1093130365Smlaier 1094130365Smlaier /* 1095130365Smlaier * Update class statistics and link data. 1096130365Smlaier */ 1097130365Smlaier if (cl->bytes_alloc_ > 0) 1098130365Smlaier cl->bytes_alloc_ -= m_pktlen(m); 1099130365Smlaier 1100130365Smlaier if ((cl->bytes_alloc_ <= 0) || first == cl) 1101130365Smlaier ifd->active_[cl->pri_] = cl->peer_; 1102130365Smlaier else 1103130365Smlaier ifd->active_[cl->pri_] = cl; 1104130365Smlaier 1105130365Smlaier ifd->class_[ifd->qi_] = cl; 1106130365Smlaier ifd->curlen_[ifd->qi_] = m_pktlen(m); 1107130365Smlaier ifd->now_[ifd->qi_] = now; 1108130365Smlaier ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1109130365Smlaier ifd->queued_++; 1110130365Smlaier } else { 1111130365Smlaier /* mode == ALTDQ_PPOLL */ 1112130365Smlaier m = _rmc_pollq(cl); 1113130365Smlaier ifd->pollcache_ = cl; 1114130365Smlaier } 1115130365Smlaier return (m); 1116130365Smlaier} 1117130365Smlaier 1118130365Smlaier/* 1119130365Smlaier * Dequeue & return next packet from the highest priority class that 1120130365Smlaier * has a packet to send & has enough allocation to send it. This 1121130365Smlaier * routine is called by a driver whenever it needs a new packet to 1122130365Smlaier * output. 1123130365Smlaier */ 1124130365Smlaierstatic mbuf_t * 1125130365Smlaier_rmc_prr_dequeue_next(struct rm_ifdat *ifd, int op) 1126130365Smlaier{ 1127130365Smlaier mbuf_t *m; 1128130365Smlaier int cpri; 1129130365Smlaier struct rm_class *cl, *first = NULL; 1130130365Smlaier struct timeval now; 1131130365Smlaier 1132130365Smlaier RM_GETTIME(now); 1133130365Smlaier 1134130365Smlaier /* 1135130365Smlaier * if the driver polls the top of the queue and then removes 1136130365Smlaier * the polled packet, we must return the same packet. 1137130365Smlaier */ 1138130365Smlaier if (op == ALTDQ_REMOVE && ifd->pollcache_) { 1139130365Smlaier cl = ifd->pollcache_; 1140130365Smlaier cpri = cl->pri_; 1141130365Smlaier ifd->pollcache_ = NULL; 1142130365Smlaier goto _prr_out; 1143130365Smlaier } else { 1144130365Smlaier /* mode == ALTDQ_POLL || pollcache == NULL */ 1145130365Smlaier ifd->pollcache_ = NULL; 1146130365Smlaier ifd->borrowed_[ifd->qi_] = NULL; 1147130365Smlaier } 1148130365Smlaier#ifdef ADJUST_CUTOFF 1149130365Smlaier _again: 1150130365Smlaier#endif 1151130365Smlaier for (cpri = RM_MAXPRIO - 1; cpri >= 0; cpri--) { 1152130365Smlaier if (ifd->na_[cpri] == 0) 1153130365Smlaier continue; 1154130365Smlaier cl = ifd->active_[cpri]; 1155130365Smlaier ASSERT(cl != NULL); 1156130365Smlaier do { 1157130365Smlaier if (!qempty(cl->q_)) { 1158130365Smlaier if ((cl->undertime_.tv_sec == 0) || 1159130365Smlaier rmc_under_limit(cl, &now)) 1160130365Smlaier goto _prr_out; 1161130365Smlaier if (first == NULL && cl->borrow_ != NULL) 1162130365Smlaier first = cl; 1163130365Smlaier } 1164130365Smlaier cl = cl->peer_; 1165130365Smlaier } while (cl != ifd->active_[cpri]); 1166130365Smlaier } 1167130365Smlaier 1168130365Smlaier#ifdef ADJUST_CUTOFF 1169130365Smlaier /* 1170130365Smlaier * no underlimit class found. if cutoff is taking effect, increase 1171130365Smlaier * cutoff and try again. 1172130365Smlaier */ 1173130365Smlaier if (first != NULL && ifd->cutoff_ < ifd->root_->depth_) { 1174130365Smlaier ifd->cutoff_++; 1175130365Smlaier goto _again; 1176130365Smlaier } 1177130365Smlaier#endif /* ADJUST_CUTOFF */ 1178130365Smlaier /* 1179130365Smlaier * If LINK_EFFICIENCY is turned on, then the first overlimit 1180130365Smlaier * class we encounter will send a packet if all the classes 1181130365Smlaier * of the link-sharing structure are overlimit. 1182130365Smlaier */ 1183130365Smlaier reset_cutoff(ifd); 1184130365Smlaier if (!ifd->efficient_ || first == NULL) 1185130365Smlaier return (NULL); 1186130365Smlaier 1187130365Smlaier cl = first; 1188130365Smlaier cpri = cl->pri_; 1189130365Smlaier#if 0 /* too time-consuming for nothing */ 1190130365Smlaier if (cl->sleeping_) 1191130365Smlaier CALLOUT_STOP(&cl->callout_); 1192130365Smlaier cl->sleeping_ = 0; 1193130365Smlaier cl->undertime_.tv_sec = 0; 1194130365Smlaier#endif 1195130365Smlaier ifd->borrowed_[ifd->qi_] = cl->borrow_; 1196130365Smlaier ifd->cutoff_ = cl->borrow_->depth_; 1197130365Smlaier 1198130365Smlaier /* 1199130365Smlaier * Deque the packet and do the book keeping... 1200130365Smlaier */ 1201130365Smlaier _prr_out: 1202130365Smlaier if (op == ALTDQ_REMOVE) { 1203130365Smlaier m = _rmc_getq(cl); 1204130365Smlaier if (m == NULL) 1205130365Smlaier panic("_rmc_prr_dequeue_next"); 1206130365Smlaier if (qempty(cl->q_)) 1207130365Smlaier ifd->na_[cpri]--; 1208130365Smlaier 1209130365Smlaier ifd->active_[cpri] = cl->peer_; 1210130365Smlaier 1211130365Smlaier ifd->class_[ifd->qi_] = cl; 1212130365Smlaier ifd->curlen_[ifd->qi_] = m_pktlen(m); 1213130365Smlaier ifd->now_[ifd->qi_] = now; 1214130365Smlaier ifd->qi_ = (ifd->qi_ + 1) % ifd->maxqueued_; 1215130365Smlaier ifd->queued_++; 1216130365Smlaier } else { 1217130365Smlaier /* mode == ALTDQ_POLL */ 1218130365Smlaier m = _rmc_pollq(cl); 1219130365Smlaier ifd->pollcache_ = cl; 1220130365Smlaier } 1221130365Smlaier return (m); 1222130365Smlaier} 1223130365Smlaier 1224130365Smlaier/* 1225130365Smlaier * mbuf_t * 1226130365Smlaier * rmc_dequeue_next(struct rm_ifdat *ifd, struct timeval *now) - this function 1227130365Smlaier * is invoked by the packet driver to get the next packet to be 1228130365Smlaier * dequeued and output on the link. If WRR is enabled, then the 1229130365Smlaier * WRR dequeue next routine will determine the next packet to sent. 1230130365Smlaier * Otherwise, packet-by-packet round robin is invoked. 1231130365Smlaier * 1232130365Smlaier * Returns: NULL, if a packet is not available or if all 1233130365Smlaier * classes are overlimit. 1234130365Smlaier * 1235130365Smlaier * Otherwise, Pointer to the next packet. 1236130365Smlaier */ 1237130365Smlaier 1238130365Smlaiermbuf_t * 1239130365Smlaierrmc_dequeue_next(struct rm_ifdat *ifd, int mode) 1240130365Smlaier{ 1241130365Smlaier if (ifd->queued_ >= ifd->maxqueued_) 1242130365Smlaier return (NULL); 1243130365Smlaier else if (ifd->wrr_) 1244130365Smlaier return (_rmc_wrr_dequeue_next(ifd, mode)); 1245130365Smlaier else 1246130365Smlaier return (_rmc_prr_dequeue_next(ifd, mode)); 1247130365Smlaier} 1248130365Smlaier 1249130365Smlaier/* 1250130365Smlaier * Update the utilization estimate for the packet that just completed. 1251130365Smlaier * The packet's class & the parent(s) of that class all get their 1252130365Smlaier * estimators updated. This routine is called by the driver's output- 1253130365Smlaier * packet-completion interrupt service routine. 1254130365Smlaier */ 1255130365Smlaier 1256130365Smlaier/* 1257130365Smlaier * a macro to approximate "divide by 1000" that gives 0.000999, 1258130365Smlaier * if a value has enough effective digits. 1259130365Smlaier * (on pentium, mul takes 9 cycles but div takes 46!) 1260130365Smlaier */ 1261130365Smlaier#define NSEC_TO_USEC(t) (((t) >> 10) + ((t) >> 16) + ((t) >> 17)) 1262130365Smlaiervoid 1263130365Smlaierrmc_update_class_util(struct rm_ifdat *ifd) 1264130365Smlaier{ 1265130365Smlaier int idle, avgidle, pktlen; 1266130365Smlaier int pkt_time, tidle; 1267130365Smlaier rm_class_t *cl, *borrowed; 1268130365Smlaier rm_class_t *borrows; 1269130365Smlaier struct timeval *nowp; 1270130365Smlaier 1271130365Smlaier /* 1272130365Smlaier * Get the most recent completed class. 1273130365Smlaier */ 1274130365Smlaier if ((cl = ifd->class_[ifd->qo_]) == NULL) 1275130365Smlaier return; 1276130365Smlaier 1277130365Smlaier pktlen = ifd->curlen_[ifd->qo_]; 1278130365Smlaier borrowed = ifd->borrowed_[ifd->qo_]; 1279130365Smlaier borrows = borrowed; 1280130365Smlaier 1281130365Smlaier PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1282130365Smlaier 1283130365Smlaier /* 1284130365Smlaier * Run estimator on class and its ancestors. 1285130365Smlaier */ 1286130365Smlaier /* 1287130365Smlaier * rm_update_class_util is designed to be called when the 1288130365Smlaier * transfer is completed from a xmit complete interrupt, 1289130365Smlaier * but most drivers don't implement an upcall for that. 1290130365Smlaier * so, just use estimated completion time. 1291130365Smlaier * as a result, ifd->qi_ and ifd->qo_ are always synced. 1292130365Smlaier */ 1293130365Smlaier nowp = &ifd->now_[ifd->qo_]; 1294130365Smlaier /* get pkt_time (for link) in usec */ 1295130365Smlaier#if 1 /* use approximation */ 1296130365Smlaier pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_; 1297130365Smlaier pkt_time = NSEC_TO_USEC(pkt_time); 1298130365Smlaier#else 1299130365Smlaier pkt_time = ifd->curlen_[ifd->qo_] * ifd->ns_per_byte_ / 1000; 1300130365Smlaier#endif 1301130365Smlaier#if 1 /* ALTQ4PPP */ 1302130365Smlaier if (TV_LT(nowp, &ifd->ifnow_)) { 1303130365Smlaier int iftime; 1304130365Smlaier 1305130365Smlaier /* 1306130365Smlaier * make sure the estimated completion time does not go 1307130365Smlaier * too far. it can happen when the link layer supports 1308130365Smlaier * data compression or the interface speed is set to 1309130365Smlaier * a much lower value. 1310130365Smlaier */ 1311130365Smlaier TV_DELTA(&ifd->ifnow_, nowp, iftime); 1312130365Smlaier if (iftime+pkt_time < ifd->maxiftime_) { 1313130365Smlaier TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1314130365Smlaier } else { 1315130365Smlaier TV_ADD_DELTA(nowp, ifd->maxiftime_, &ifd->ifnow_); 1316130365Smlaier } 1317130365Smlaier } else { 1318130365Smlaier TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1319130365Smlaier } 1320130365Smlaier#else 1321130365Smlaier if (TV_LT(nowp, &ifd->ifnow_)) { 1322130365Smlaier TV_ADD_DELTA(&ifd->ifnow_, pkt_time, &ifd->ifnow_); 1323130365Smlaier } else { 1324130365Smlaier TV_ADD_DELTA(nowp, pkt_time, &ifd->ifnow_); 1325130365Smlaier } 1326130365Smlaier#endif 1327130365Smlaier 1328130365Smlaier while (cl != NULL) { 1329130365Smlaier TV_DELTA(&ifd->ifnow_, &cl->last_, idle); 1330130365Smlaier if (idle >= 2000000) 1331130365Smlaier /* 1332130365Smlaier * this class is idle enough, reset avgidle. 1333130365Smlaier * (TV_DELTA returns 2000000 us when delta is large.) 1334130365Smlaier */ 1335130365Smlaier cl->avgidle_ = cl->maxidle_; 1336130365Smlaier 1337130365Smlaier /* get pkt_time (for class) in usec */ 1338130365Smlaier#if 1 /* use approximation */ 1339130365Smlaier pkt_time = pktlen * cl->ns_per_byte_; 1340130365Smlaier pkt_time = NSEC_TO_USEC(pkt_time); 1341130365Smlaier#else 1342130365Smlaier pkt_time = pktlen * cl->ns_per_byte_ / 1000; 1343130365Smlaier#endif 1344130365Smlaier idle -= pkt_time; 1345130365Smlaier 1346130365Smlaier avgidle = cl->avgidle_; 1347130365Smlaier avgidle += idle - (avgidle >> RM_FILTER_GAIN); 1348130365Smlaier cl->avgidle_ = avgidle; 1349130365Smlaier 1350130365Smlaier /* Are we overlimit ? */ 1351130365Smlaier if (avgidle <= 0) { 1352130365Smlaier CBQTRACE(rmc_update_class_util, 'milo', cl->stats_.handle); 1353130365Smlaier#if 1 /* ALTQ */ 1354130365Smlaier /* 1355130365Smlaier * need some lower bound for avgidle, otherwise 1356130365Smlaier * a borrowing class gets unbounded penalty. 1357130365Smlaier */ 1358130365Smlaier if (avgidle < cl->minidle_) 1359130365Smlaier avgidle = cl->avgidle_ = cl->minidle_; 1360130365Smlaier#endif 1361130365Smlaier /* set next idle to make avgidle 0 */ 1362130365Smlaier tidle = pkt_time + 1363130365Smlaier (((1 - RM_POWER) * avgidle) >> RM_FILTER_GAIN); 1364130365Smlaier TV_ADD_DELTA(nowp, tidle, &cl->undertime_); 1365130365Smlaier ++cl->stats_.over; 1366130365Smlaier } else { 1367130365Smlaier cl->avgidle_ = 1368130365Smlaier (avgidle > cl->maxidle_) ? cl->maxidle_ : avgidle; 1369130365Smlaier cl->undertime_.tv_sec = 0; 1370130365Smlaier if (cl->sleeping_) { 1371130365Smlaier CALLOUT_STOP(&cl->callout_); 1372130365Smlaier cl->sleeping_ = 0; 1373130365Smlaier } 1374130365Smlaier } 1375130365Smlaier 1376130365Smlaier if (borrows != NULL) { 1377130365Smlaier if (borrows != cl) 1378130365Smlaier ++cl->stats_.borrows; 1379130365Smlaier else 1380130365Smlaier borrows = NULL; 1381130365Smlaier } 1382130365Smlaier cl->last_ = ifd->ifnow_; 1383130365Smlaier cl->last_pkttime_ = pkt_time; 1384130365Smlaier 1385130365Smlaier#if 1 1386130365Smlaier if (cl->parent_ == NULL) { 1387130365Smlaier /* take stats of root class */ 1388130365Smlaier PKTCNTR_ADD(&cl->stats_.xmit_cnt, pktlen); 1389130365Smlaier } 1390130365Smlaier#endif 1391130365Smlaier 1392130365Smlaier cl = cl->parent_; 1393130365Smlaier } 1394130365Smlaier 1395130365Smlaier /* 1396130365Smlaier * Check to see if cutoff needs to set to a new level. 1397130365Smlaier */ 1398130365Smlaier cl = ifd->class_[ifd->qo_]; 1399130365Smlaier if (borrowed && (ifd->cutoff_ >= borrowed->depth_)) { 1400130365Smlaier#if 1 /* ALTQ */ 1401130365Smlaier if ((qlen(cl->q_) <= 0) || TV_LT(nowp, &borrowed->undertime_)) { 1402130365Smlaier rmc_tl_satisfied(ifd, nowp); 1403130365Smlaier CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1404130365Smlaier } else { 1405130365Smlaier ifd->cutoff_ = borrowed->depth_; 1406130365Smlaier CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1407130365Smlaier } 1408130365Smlaier#else /* !ALTQ */ 1409130365Smlaier if ((qlen(cl->q_) <= 1) || TV_LT(&now, &borrowed->undertime_)) { 1410130365Smlaier reset_cutoff(ifd); 1411130365Smlaier#ifdef notdef 1412130365Smlaier rmc_tl_satisfied(ifd, &now); 1413130365Smlaier#endif 1414130365Smlaier CBQTRACE(rmc_update_class_util, 'broe', ifd->cutoff_); 1415130365Smlaier } else { 1416130365Smlaier ifd->cutoff_ = borrowed->depth_; 1417130365Smlaier CBQTRACE(rmc_update_class_util, 'ffob', borrowed->depth_); 1418130365Smlaier } 1419130365Smlaier#endif /* !ALTQ */ 1420130365Smlaier } 1421130365Smlaier 1422130365Smlaier /* 1423130365Smlaier * Release class slot 1424130365Smlaier */ 1425130365Smlaier ifd->borrowed_[ifd->qo_] = NULL; 1426130365Smlaier ifd->class_[ifd->qo_] = NULL; 1427130365Smlaier ifd->qo_ = (ifd->qo_ + 1) % ifd->maxqueued_; 1428130365Smlaier ifd->queued_--; 1429130365Smlaier} 1430130365Smlaier 1431130365Smlaier/* 1432130365Smlaier * void 1433130365Smlaier * rmc_drop_action(struct rm_class *cl) - Generic (not protocol-specific) 1434130365Smlaier * over-limit action routines. These get invoked by rmc_under_limit() 1435130365Smlaier * if a class with packets to send if over its bandwidth limit & can't 1436130365Smlaier * borrow from a parent class. 1437130365Smlaier * 1438130365Smlaier * Returns: NONE 1439130365Smlaier */ 1440130365Smlaier 1441130365Smlaierstatic void 1442130365Smlaierrmc_drop_action(struct rm_class *cl) 1443130365Smlaier{ 1444130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 1445130365Smlaier 1446130365Smlaier ASSERT(qlen(cl->q_) > 0); 1447130365Smlaier _rmc_dropq(cl); 1448130365Smlaier if (qempty(cl->q_)) 1449130365Smlaier ifd->na_[cl->pri_]--; 1450130365Smlaier} 1451130365Smlaier 1452130365Smlaiervoid rmc_dropall(struct rm_class *cl) 1453130365Smlaier{ 1454130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 1455130365Smlaier 1456130365Smlaier if (!qempty(cl->q_)) { 1457130365Smlaier _flushq(cl->q_); 1458130365Smlaier 1459130365Smlaier ifd->na_[cl->pri_]--; 1460130365Smlaier } 1461130365Smlaier} 1462130365Smlaier 1463130365Smlaier#if (__FreeBSD_version > 300000) 1464130365Smlaier/* hzto() is removed from FreeBSD-3.0 */ 1465130365Smlaierstatic int hzto(struct timeval *); 1466130365Smlaier 1467130365Smlaierstatic int 1468130365Smlaierhzto(tv) 1469130365Smlaier struct timeval *tv; 1470130365Smlaier{ 1471130365Smlaier struct timeval t2; 1472130365Smlaier 1473130365Smlaier getmicrotime(&t2); 1474130365Smlaier t2.tv_sec = tv->tv_sec - t2.tv_sec; 1475130365Smlaier t2.tv_usec = tv->tv_usec - t2.tv_usec; 1476130365Smlaier return (tvtohz(&t2)); 1477130365Smlaier} 1478130365Smlaier#endif /* __FreeBSD_version > 300000 */ 1479130365Smlaier 1480130365Smlaier/* 1481130365Smlaier * void 1482130365Smlaier * rmc_delay_action(struct rm_class *cl) - This function is the generic CBQ 1483130365Smlaier * delay action routine. It is invoked via rmc_under_limit when the 1484130365Smlaier * packet is discoverd to be overlimit. 1485130365Smlaier * 1486130365Smlaier * If the delay action is result of borrow class being overlimit, then 1487130365Smlaier * delay for the offtime of the borrowing class that is overlimit. 1488130365Smlaier * 1489130365Smlaier * Returns: NONE 1490130365Smlaier */ 1491130365Smlaier 1492130365Smlaiervoid 1493130365Smlaierrmc_delay_action(struct rm_class *cl, struct rm_class *borrow) 1494130365Smlaier{ 1495130365Smlaier int delay, t, extradelay; 1496130365Smlaier 1497130365Smlaier cl->stats_.overactions++; 1498130365Smlaier TV_DELTA(&cl->undertime_, &cl->overtime_, delay); 1499130365Smlaier#ifndef BORROW_OFFTIME 1500130365Smlaier delay += cl->offtime_; 1501130365Smlaier#endif 1502130365Smlaier 1503130365Smlaier if (!cl->sleeping_) { 1504130365Smlaier CBQTRACE(rmc_delay_action, 'yled', cl->stats_.handle); 1505130365Smlaier#ifdef BORROW_OFFTIME 1506130365Smlaier if (borrow != NULL) 1507130365Smlaier extradelay = borrow->offtime_; 1508130365Smlaier else 1509130365Smlaier#endif 1510130365Smlaier extradelay = cl->offtime_; 1511130365Smlaier 1512130365Smlaier#ifdef ALTQ 1513130365Smlaier /* 1514130365Smlaier * XXX recalculate suspend time: 1515130365Smlaier * current undertime is (tidle + pkt_time) calculated 1516130365Smlaier * from the last transmission. 1517130365Smlaier * tidle: time required to bring avgidle back to 0 1518130365Smlaier * pkt_time: target waiting time for this class 1519130365Smlaier * we need to replace pkt_time by offtime 1520130365Smlaier */ 1521130365Smlaier extradelay -= cl->last_pkttime_; 1522130365Smlaier#endif 1523130365Smlaier if (extradelay > 0) { 1524130365Smlaier TV_ADD_DELTA(&cl->undertime_, extradelay, &cl->undertime_); 1525130365Smlaier delay += extradelay; 1526130365Smlaier } 1527130365Smlaier 1528130365Smlaier cl->sleeping_ = 1; 1529130365Smlaier cl->stats_.delays++; 1530130365Smlaier 1531130365Smlaier /* 1532130365Smlaier * Since packets are phased randomly with respect to the 1533130365Smlaier * clock, 1 tick (the next clock tick) can be an arbitrarily 1534130365Smlaier * short time so we have to wait for at least two ticks. 1535130365Smlaier * NOTE: If there's no other traffic, we need the timer as 1536130365Smlaier * a 'backstop' to restart this class. 1537130365Smlaier */ 1538130365Smlaier if (delay > tick * 2) { 1539130365Smlaier#ifdef __FreeBSD__ 1540130365Smlaier /* FreeBSD rounds up the tick */ 1541130365Smlaier t = hzto(&cl->undertime_); 1542130365Smlaier#else 1543130365Smlaier /* other BSDs round down the tick */ 1544130365Smlaier t = hzto(&cl->undertime_) + 1; 1545130365Smlaier#endif 1546130365Smlaier } else 1547130365Smlaier t = 2; 1548130365Smlaier CALLOUT_RESET(&cl->callout_, t, 1549130365Smlaier (timeout_t *)rmc_restart, (caddr_t)cl); 1550130365Smlaier } 1551130365Smlaier} 1552130365Smlaier 1553130365Smlaier/* 1554130365Smlaier * void 1555130365Smlaier * rmc_restart() - is just a helper routine for rmc_delay_action -- it is 1556130365Smlaier * called by the system timer code & is responsible checking if the 1557130365Smlaier * class is still sleeping (it might have been restarted as a side 1558130365Smlaier * effect of the queue scan on a packet arrival) and, if so, restarting 1559130365Smlaier * output for the class. Inspecting the class state & restarting output 1560130365Smlaier * require locking the class structure. In general the driver is 1561130365Smlaier * responsible for locking but this is the only routine that is not 1562130365Smlaier * called directly or indirectly from the interface driver so it has 1563130365Smlaier * know about system locking conventions. Under bsd, locking is done 1564130365Smlaier * by raising IPL to splimp so that's what's implemented here. On a 1565130365Smlaier * different system this would probably need to be changed. 1566130365Smlaier * 1567130365Smlaier * Returns: NONE 1568130365Smlaier */ 1569130365Smlaier 1570130365Smlaierstatic void 1571130365Smlaierrmc_restart(struct rm_class *cl) 1572130365Smlaier{ 1573130365Smlaier struct rm_ifdat *ifd = cl->ifdat_; 1574130365Smlaier int s; 1575130365Smlaier 1576130365Smlaier#ifdef __NetBSD__ 1577130365Smlaier s = splnet(); 1578130365Smlaier#else 1579130365Smlaier s = splimp(); 1580130365Smlaier#endif 1581130368Smlaier IFQ_LOCK(ifd->ifq_); 1582130365Smlaier if (cl->sleeping_) { 1583130365Smlaier cl->sleeping_ = 0; 1584130365Smlaier cl->undertime_.tv_sec = 0; 1585130365Smlaier 1586130365Smlaier if (ifd->queued_ < ifd->maxqueued_ && ifd->restart != NULL) { 1587130365Smlaier CBQTRACE(rmc_restart, 'trts', cl->stats_.handle); 1588130365Smlaier (ifd->restart)(ifd->ifq_); 1589130365Smlaier } 1590130365Smlaier } 1591130368Smlaier IFQ_UNLOCK(ifd->ifq_); 1592130365Smlaier splx(s); 1593130365Smlaier} 1594130365Smlaier 1595130365Smlaier/* 1596130365Smlaier * void 1597130365Smlaier * rmc_root_overlimit(struct rm_class *cl) - This the generic overlimit 1598130365Smlaier * handling routine for the root class of the link sharing structure. 1599130365Smlaier * 1600130365Smlaier * Returns: NONE 1601130365Smlaier */ 1602130365Smlaier 1603130365Smlaierstatic void 1604130365Smlaierrmc_root_overlimit(struct rm_class *cl, struct rm_class *borrow) 1605130365Smlaier{ 1606130365Smlaier panic("rmc_root_overlimit"); 1607130365Smlaier} 1608130365Smlaier 1609130365Smlaier/* 1610130365Smlaier * Packet Queue handling routines. Eventually, this is to localize the 1611130365Smlaier * effects on the code whether queues are red queues or droptail 1612130365Smlaier * queues. 1613130365Smlaier */ 1614130365Smlaier 1615130365Smlaierstatic int 1616130365Smlaier_rmc_addq(rm_class_t *cl, mbuf_t *m) 1617130365Smlaier{ 1618130365Smlaier#ifdef ALTQ_RIO 1619130365Smlaier if (q_is_rio(cl->q_)) 1620130365Smlaier return rio_addq((rio_t *)cl->red_, cl->q_, m, cl->pktattr_); 1621130365Smlaier#endif 1622130365Smlaier#ifdef ALTQ_RED 1623130365Smlaier if (q_is_red(cl->q_)) 1624130365Smlaier return red_addq(cl->red_, cl->q_, m, cl->pktattr_); 1625130365Smlaier#endif /* ALTQ_RED */ 1626130365Smlaier 1627130365Smlaier if (cl->flags_ & RMCF_CLEARDSCP) 1628130365Smlaier write_dsfield(m, cl->pktattr_, 0); 1629130365Smlaier 1630130365Smlaier _addq(cl->q_, m); 1631130365Smlaier return (0); 1632130365Smlaier} 1633130365Smlaier 1634130365Smlaier/* note: _rmc_dropq is not called for red */ 1635130365Smlaierstatic void 1636130365Smlaier_rmc_dropq(rm_class_t *cl) 1637130365Smlaier{ 1638130365Smlaier mbuf_t *m; 1639130365Smlaier 1640130365Smlaier if ((m = _getq(cl->q_)) != NULL) 1641130365Smlaier m_freem(m); 1642130365Smlaier} 1643130365Smlaier 1644130365Smlaierstatic mbuf_t * 1645130365Smlaier_rmc_getq(rm_class_t *cl) 1646130365Smlaier{ 1647130365Smlaier#ifdef ALTQ_RIO 1648130365Smlaier if (q_is_rio(cl->q_)) 1649130365Smlaier return rio_getq((rio_t *)cl->red_, cl->q_); 1650130365Smlaier#endif 1651130365Smlaier#ifdef ALTQ_RED 1652130365Smlaier if (q_is_red(cl->q_)) 1653130365Smlaier return red_getq(cl->red_, cl->q_); 1654130365Smlaier#endif 1655130365Smlaier return _getq(cl->q_); 1656130365Smlaier} 1657130365Smlaier 1658130365Smlaierstatic mbuf_t * 1659130365Smlaier_rmc_pollq(rm_class_t *cl) 1660130365Smlaier{ 1661130365Smlaier return qhead(cl->q_); 1662130365Smlaier} 1663130365Smlaier 1664130365Smlaier#ifdef CBQ_TRACE 1665130365Smlaier 1666130365Smlaierstruct cbqtrace cbqtrace_buffer[NCBQTRACE+1]; 1667130365Smlaierstruct cbqtrace *cbqtrace_ptr = NULL; 1668130365Smlaierint cbqtrace_count; 1669130365Smlaier 1670130365Smlaier/* 1671130365Smlaier * DDB hook to trace cbq events: 1672130365Smlaier * the last 1024 events are held in a circular buffer. 1673130365Smlaier * use "call cbqtrace_dump(N)" to display 20 events from Nth event. 1674130365Smlaier */ 1675130365Smlaiervoid cbqtrace_dump(int); 1676130365Smlaierstatic char *rmc_funcname(void *); 1677130365Smlaier 1678130365Smlaierstatic struct rmc_funcs { 1679130365Smlaier void *func; 1680130365Smlaier char *name; 1681130365Smlaier} rmc_funcs[] = 1682130365Smlaier{ 1683130365Smlaier rmc_init, "rmc_init", 1684130365Smlaier rmc_queue_packet, "rmc_queue_packet", 1685130365Smlaier rmc_under_limit, "rmc_under_limit", 1686130365Smlaier rmc_update_class_util, "rmc_update_class_util", 1687130365Smlaier rmc_delay_action, "rmc_delay_action", 1688130365Smlaier rmc_restart, "rmc_restart", 1689130365Smlaier _rmc_wrr_dequeue_next, "_rmc_wrr_dequeue_next", 1690130365Smlaier NULL, NULL 1691130365Smlaier}; 1692130365Smlaier 1693130365Smlaierstatic char *rmc_funcname(void *func) 1694130365Smlaier{ 1695130365Smlaier struct rmc_funcs *fp; 1696130365Smlaier 1697130365Smlaier for (fp = rmc_funcs; fp->func != NULL; fp++) 1698130365Smlaier if (fp->func == func) 1699130365Smlaier return (fp->name); 1700130365Smlaier return ("unknown"); 1701130365Smlaier} 1702130365Smlaier 1703130365Smlaiervoid cbqtrace_dump(int counter) 1704130365Smlaier{ 1705130365Smlaier int i, *p; 1706130365Smlaier char *cp; 1707130365Smlaier 1708130365Smlaier counter = counter % NCBQTRACE; 1709130365Smlaier p = (int *)&cbqtrace_buffer[counter]; 1710130365Smlaier 1711130365Smlaier for (i=0; i<20; i++) { 1712130365Smlaier printf("[0x%x] ", *p++); 1713130365Smlaier printf("%s: ", rmc_funcname((void *)*p++)); 1714130365Smlaier cp = (char *)p++; 1715130365Smlaier printf("%c%c%c%c: ", cp[0], cp[1], cp[2], cp[3]); 1716130365Smlaier printf("%d\n",*p++); 1717130365Smlaier 1718130365Smlaier if (p >= (int *)&cbqtrace_buffer[NCBQTRACE]) 1719130365Smlaier p = (int *)cbqtrace_buffer; 1720130365Smlaier } 1721130365Smlaier} 1722130365Smlaier#endif /* CBQ_TRACE */ 1723130365Smlaier#endif /* ALTQ_CBQ */ 1724130365Smlaier 1725130365Smlaier#if defined(ALTQ_CBQ) || defined(ALTQ_RED) || defined(ALTQ_RIO) || defined(ALTQ_HFSC) || defined(ALTQ_PRIQ) 1726130365Smlaier#if !defined(__GNUC__) || defined(ALTQ_DEBUG) 1727130365Smlaier 1728130365Smlaiervoid 1729130365Smlaier_addq(class_queue_t *q, mbuf_t *m) 1730130365Smlaier{ 1731130365Smlaier mbuf_t *m0; 1732130365Smlaier 1733130365Smlaier if ((m0 = qtail(q)) != NULL) 1734130365Smlaier m->m_nextpkt = m0->m_nextpkt; 1735130365Smlaier else 1736130365Smlaier m0 = m; 1737130365Smlaier m0->m_nextpkt = m; 1738130365Smlaier qtail(q) = m; 1739130365Smlaier qlen(q)++; 1740130365Smlaier} 1741130365Smlaier 1742130365Smlaiermbuf_t * 1743130365Smlaier_getq(class_queue_t *q) 1744130365Smlaier{ 1745130365Smlaier mbuf_t *m, *m0; 1746130365Smlaier 1747130365Smlaier if ((m = qtail(q)) == NULL) 1748130365Smlaier return (NULL); 1749130365Smlaier if ((m0 = m->m_nextpkt) != m) 1750130365Smlaier m->m_nextpkt = m0->m_nextpkt; 1751130365Smlaier else { 1752130365Smlaier ASSERT(qlen(q) == 1); 1753130365Smlaier qtail(q) = NULL; 1754130365Smlaier } 1755130365Smlaier qlen(q)--; 1756130365Smlaier m0->m_nextpkt = NULL; 1757130365Smlaier return (m0); 1758130365Smlaier} 1759130365Smlaier 1760130365Smlaier/* drop a packet at the tail of the queue */ 1761130365Smlaiermbuf_t * 1762130365Smlaier_getq_tail(class_queue_t *q) 1763130365Smlaier{ 1764130365Smlaier mbuf_t *m, *m0, *prev; 1765130365Smlaier 1766130365Smlaier if ((m = m0 = qtail(q)) == NULL) 1767130365Smlaier return NULL; 1768130365Smlaier do { 1769130365Smlaier prev = m0; 1770130365Smlaier m0 = m0->m_nextpkt; 1771130365Smlaier } while (m0 != m); 1772130365Smlaier prev->m_nextpkt = m->m_nextpkt; 1773130365Smlaier if (prev == m) { 1774130365Smlaier ASSERT(qlen(q) == 1); 1775130365Smlaier qtail(q) = NULL; 1776130365Smlaier } else 1777130365Smlaier qtail(q) = prev; 1778130365Smlaier qlen(q)--; 1779130365Smlaier m->m_nextpkt = NULL; 1780130365Smlaier return (m); 1781130365Smlaier} 1782130365Smlaier 1783130365Smlaier/* randomly select a packet in the queue */ 1784130365Smlaiermbuf_t * 1785130365Smlaier_getq_random(class_queue_t *q) 1786130365Smlaier{ 1787130365Smlaier struct mbuf *m; 1788130365Smlaier int i, n; 1789130365Smlaier 1790130365Smlaier if ((m = qtail(q)) == NULL) 1791130365Smlaier return NULL; 1792130365Smlaier if (m->m_nextpkt == m) { 1793130365Smlaier ASSERT(qlen(q) == 1); 1794130365Smlaier qtail(q) = NULL; 1795130365Smlaier } else { 1796130365Smlaier struct mbuf *prev = NULL; 1797130365Smlaier 1798130365Smlaier n = arc4random() % qlen(q) + 1; 1799130365Smlaier for (i = 0; i < n; i++) { 1800130365Smlaier prev = m; 1801130365Smlaier m = m->m_nextpkt; 1802130365Smlaier } 1803130365Smlaier prev->m_nextpkt = m->m_nextpkt; 1804130365Smlaier if (m == qtail(q)) 1805130365Smlaier qtail(q) = prev; 1806130365Smlaier } 1807130365Smlaier qlen(q)--; 1808130365Smlaier m->m_nextpkt = NULL; 1809130365Smlaier return (m); 1810130365Smlaier} 1811130365Smlaier 1812130365Smlaiervoid 1813130365Smlaier_removeq(class_queue_t *q, mbuf_t *m) 1814130365Smlaier{ 1815130365Smlaier mbuf_t *m0, *prev; 1816130365Smlaier 1817130365Smlaier m0 = qtail(q); 1818130365Smlaier do { 1819130365Smlaier prev = m0; 1820130365Smlaier m0 = m0->m_nextpkt; 1821130365Smlaier } while (m0 != m); 1822130365Smlaier prev->m_nextpkt = m->m_nextpkt; 1823130365Smlaier if (prev == m) 1824130365Smlaier qtail(q) = NULL; 1825130365Smlaier else if (qtail(q) == m) 1826130365Smlaier qtail(q) = prev; 1827130365Smlaier qlen(q)--; 1828130365Smlaier} 1829130365Smlaier 1830130365Smlaiervoid 1831130365Smlaier_flushq(class_queue_t *q) 1832130365Smlaier{ 1833130365Smlaier mbuf_t *m; 1834130365Smlaier 1835130365Smlaier while ((m = _getq(q)) != NULL) 1836130365Smlaier m_freem(m); 1837130365Smlaier ASSERT(qlen(q) == 0); 1838130365Smlaier} 1839130365Smlaier 1840130365Smlaier#endif /* !__GNUC__ || ALTQ_DEBUG */ 1841130365Smlaier#endif /* ALTQ_CBQ || ALTQ_RED || ALTQ_RIO || ALTQ_HFSC || ALTQ_PRIQ */ 1842