1130368Smlaier/* $FreeBSD$ */ 2130365Smlaier/* $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ */ 3130365Smlaier 4130365Smlaier/* 5130365Smlaier * Copyright (C) 1998-2003 6130365Smlaier * Sony Computer Science Laboratories Inc. All rights reserved. 7130365Smlaier * 8130365Smlaier * Redistribution and use in source and binary forms, with or without 9130365Smlaier * modification, are permitted provided that the following conditions 10130365Smlaier * are met: 11130365Smlaier * 1. Redistributions of source code must retain the above copyright 12130365Smlaier * notice, this list of conditions and the following disclaimer. 13130365Smlaier * 2. Redistributions in binary form must reproduce the above copyright 14130365Smlaier * notice, this list of conditions and the following disclaimer in the 15130365Smlaier * documentation and/or other materials provided with the distribution. 16130365Smlaier * 17130365Smlaier * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18130365Smlaier * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19130365Smlaier * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20130365Smlaier * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21130365Smlaier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22130365Smlaier * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23130365Smlaier * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24130365Smlaier * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25130365Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26130365Smlaier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27130365Smlaier * SUCH DAMAGE. 28130365Smlaier */ 29130365Smlaier/* 30130365Smlaier * Copyright (c) 1990-1994 Regents of the University of California. 31130365Smlaier * All rights reserved. 32130365Smlaier * 33130365Smlaier * Redistribution and use in source and binary forms, with or without 34130365Smlaier * modification, are permitted provided that the following conditions 35130365Smlaier * are met: 36130365Smlaier * 1. Redistributions of source code must retain the above copyright 37130365Smlaier * notice, this list of conditions and the following disclaimer. 38130365Smlaier * 2. Redistributions in binary form must reproduce the above copyright 39130365Smlaier * notice, this list of conditions and the following disclaimer in the 40130365Smlaier * documentation and/or other materials provided with the distribution. 41130365Smlaier * 3. All advertising materials mentioning features or use of this software 42130365Smlaier * must display the following acknowledgement: 43130365Smlaier * This product includes software developed by the Computer Systems 44130365Smlaier * Engineering Group at Lawrence Berkeley Laboratory. 45130365Smlaier * 4. Neither the name of the University nor of the Laboratory may be used 46130365Smlaier * to endorse or promote products derived from this software without 47130365Smlaier * specific prior written permission. 48130365Smlaier * 49130365Smlaier * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50130365Smlaier * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51130365Smlaier * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52130365Smlaier * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53130365Smlaier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54130365Smlaier * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55130365Smlaier * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56130365Smlaier * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57130365Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58130365Smlaier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59130365Smlaier * SUCH DAMAGE. 60130365Smlaier */ 61130365Smlaier 62130365Smlaier#if defined(__FreeBSD__) || defined(__NetBSD__) 63130365Smlaier#include "opt_altq.h" 64130365Smlaier#include "opt_inet.h" 65130365Smlaier#ifdef __FreeBSD__ 66130365Smlaier#include "opt_inet6.h" 67130365Smlaier#endif 68130365Smlaier#endif /* __FreeBSD__ || __NetBSD__ */ 69130365Smlaier#ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */ 70130365Smlaier 71130365Smlaier#include <sys/param.h> 72130365Smlaier#include <sys/malloc.h> 73130365Smlaier#include <sys/mbuf.h> 74130365Smlaier#include <sys/socket.h> 75130365Smlaier#include <sys/systm.h> 76130365Smlaier#include <sys/errno.h> 77130365Smlaier#if 1 /* ALTQ3_COMPAT */ 78130365Smlaier#include <sys/proc.h> 79130365Smlaier#include <sys/sockio.h> 80130365Smlaier#include <sys/kernel.h> 81130365Smlaier#endif 82130365Smlaier 83130365Smlaier#include <net/if.h> 84130365Smlaier 85130365Smlaier#include <netinet/in.h> 86130365Smlaier#include <netinet/in_systm.h> 87130365Smlaier#include <netinet/ip.h> 88130365Smlaier#ifdef INET6 89130365Smlaier#include <netinet/ip6.h> 90130365Smlaier#endif 91130365Smlaier 92130365Smlaier#include <net/pfvar.h> 93130365Smlaier#include <altq/altq.h> 94130365Smlaier#include <altq/altq_cdnr.h> 95130365Smlaier#include <altq/altq_red.h> 96130365Smlaier#include <altq/altq_rio.h> 97130365Smlaier#ifdef ALTQ3_COMPAT 98130365Smlaier#include <altq/altq_conf.h> 99130365Smlaier#endif 100130365Smlaier 101130365Smlaier/* 102130365Smlaier * RIO: RED with IN/OUT bit 103130365Smlaier * described in 104130365Smlaier * "Explicit Allocation of Best Effort Packet Delivery Service" 105130365Smlaier * David D. Clark and Wenjia Fang, MIT Lab for Computer Science 106130365Smlaier * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf} 107130365Smlaier * 108130365Smlaier * this implementation is extended to support more than 2 drop precedence 109130365Smlaier * values as described in RFC2597 (Assured Forwarding PHB Group). 110130365Smlaier * 111130365Smlaier */ 112130365Smlaier/* 113130365Smlaier * AF DS (differentiated service) codepoints. 114130365Smlaier * (classes can be mapped to CBQ or H-FSC classes.) 115130365Smlaier * 116130365Smlaier * 0 1 2 3 4 5 6 7 117130365Smlaier * +---+---+---+---+---+---+---+---+ 118130365Smlaier * | CLASS |DropPre| 0 | CU | 119130365Smlaier * +---+---+---+---+---+---+---+---+ 120130365Smlaier * 121130365Smlaier * class 1: 001 122130365Smlaier * class 2: 010 123130365Smlaier * class 3: 011 124130365Smlaier * class 4: 100 125130365Smlaier * 126130365Smlaier * low drop prec: 01 127130365Smlaier * medium drop prec: 10 128130365Smlaier * high drop prec: 01 129130365Smlaier */ 130130365Smlaier 131130365Smlaier/* normal red parameters */ 132130365Smlaier#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 133130365Smlaier /* q_weight = 0.00195 */ 134130365Smlaier 135130365Smlaier/* red parameters for a slow link */ 136130365Smlaier#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 137130365Smlaier /* q_weight = 0.0078125 */ 138130365Smlaier 139130365Smlaier/* red parameters for a very slow link (e.g., dialup) */ 140130365Smlaier#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 141130365Smlaier /* q_weight = 0.015625 */ 142130365Smlaier 143130365Smlaier/* fixed-point uses 12-bit decimal places */ 144130365Smlaier#define FP_SHIFT 12 /* fixed-point shift */ 145130365Smlaier 146130365Smlaier/* red parameters for drop probability */ 147130365Smlaier#define INV_P_MAX 10 /* inverse of max drop probability */ 148130365Smlaier#define TH_MIN 5 /* min threshold */ 149130365Smlaier#define TH_MAX 15 /* max threshold */ 150130365Smlaier 151130365Smlaier#define RIO_LIMIT 60 /* default max queue lenght */ 152130365Smlaier#define RIO_STATS /* collect statistics */ 153130365Smlaier 154130365Smlaier#define TV_DELTA(a, b, delta) { \ 155130365Smlaier register int xxs; \ 156130365Smlaier \ 157130365Smlaier delta = (a)->tv_usec - (b)->tv_usec; \ 158130365Smlaier if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \ 159130365Smlaier if (xxs < 0) { \ 160130365Smlaier delta = 60000000; \ 161130365Smlaier } else if (xxs > 4) { \ 162130365Smlaier if (xxs > 60) \ 163130365Smlaier delta = 60000000; \ 164130365Smlaier else \ 165130365Smlaier delta += xxs * 1000000; \ 166130365Smlaier } else while (xxs > 0) { \ 167130365Smlaier delta += 1000000; \ 168130365Smlaier xxs--; \ 169130365Smlaier } \ 170130365Smlaier } \ 171130365Smlaier} 172130365Smlaier 173130365Smlaier#ifdef ALTQ3_COMPAT 174130365Smlaier/* rio_list keeps all rio_queue_t's allocated. */ 175130365Smlaierstatic rio_queue_t *rio_list = NULL; 176130365Smlaier#endif 177130365Smlaier/* default rio parameter values */ 178130365Smlaierstatic struct redparams default_rio_params[RIO_NDROPPREC] = { 179130365Smlaier /* th_min, th_max, inv_pmax */ 180130365Smlaier { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */ 181130365Smlaier { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */ 182130365Smlaier { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */ 183130365Smlaier}; 184130365Smlaier 185130365Smlaier/* internal function prototypes */ 186130365Smlaierstatic int dscp2index(u_int8_t); 187130365Smlaier#ifdef ALTQ3_COMPAT 188130365Smlaierstatic int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 189130365Smlaierstatic struct mbuf *rio_dequeue(struct ifaltq *, int); 190130365Smlaierstatic int rio_request(struct ifaltq *, int, void *); 191130365Smlaierstatic int rio_detach(rio_queue_t *); 192130365Smlaier 193130365Smlaier/* 194130365Smlaier * rio device interface 195130365Smlaier */ 196130365Smlaieraltqdev_decl(rio); 197130365Smlaier 198130365Smlaier#endif /* ALTQ3_COMPAT */ 199130365Smlaier 200130365Smlaierrio_t * 201130365Smlaierrio_alloc(int weight, struct redparams *params, int flags, int pkttime) 202130365Smlaier{ 203130365Smlaier rio_t *rp; 204130365Smlaier int w, i; 205130365Smlaier int npkts_per_sec; 206130365Smlaier 207184205Sdes rp = malloc(sizeof(rio_t), M_DEVBUF, M_WAITOK); 208130365Smlaier if (rp == NULL) 209130365Smlaier return (NULL); 210130365Smlaier bzero(rp, sizeof(rio_t)); 211130365Smlaier 212130365Smlaier rp->rio_flags = flags; 213130365Smlaier if (pkttime == 0) 214130365Smlaier /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 215130365Smlaier rp->rio_pkttime = 800; 216130365Smlaier else 217130365Smlaier rp->rio_pkttime = pkttime; 218130365Smlaier 219130365Smlaier if (weight != 0) 220130365Smlaier rp->rio_weight = weight; 221130365Smlaier else { 222130365Smlaier /* use default */ 223130365Smlaier rp->rio_weight = W_WEIGHT; 224130365Smlaier 225130365Smlaier /* when the link is very slow, adjust red parameters */ 226130365Smlaier npkts_per_sec = 1000000 / rp->rio_pkttime; 227130365Smlaier if (npkts_per_sec < 50) { 228130365Smlaier /* up to about 400Kbps */ 229130365Smlaier rp->rio_weight = W_WEIGHT_2; 230130365Smlaier } else if (npkts_per_sec < 300) { 231130365Smlaier /* up to about 2.4Mbps */ 232130365Smlaier rp->rio_weight = W_WEIGHT_1; 233130365Smlaier } 234130365Smlaier } 235130365Smlaier 236130365Smlaier /* calculate wshift. weight must be power of 2 */ 237130365Smlaier w = rp->rio_weight; 238130365Smlaier for (i = 0; w > 1; i++) 239130365Smlaier w = w >> 1; 240130365Smlaier rp->rio_wshift = i; 241130365Smlaier w = 1 << rp->rio_wshift; 242130365Smlaier if (w != rp->rio_weight) { 243130365Smlaier printf("invalid weight value %d for red! use %d\n", 244130365Smlaier rp->rio_weight, w); 245130365Smlaier rp->rio_weight = w; 246130365Smlaier } 247130365Smlaier 248130365Smlaier /* allocate weight table */ 249130365Smlaier rp->rio_wtab = wtab_alloc(rp->rio_weight); 250130365Smlaier 251130365Smlaier for (i = 0; i < RIO_NDROPPREC; i++) { 252130365Smlaier struct dropprec_state *prec = &rp->rio_precstate[i]; 253130365Smlaier 254130365Smlaier prec->avg = 0; 255130365Smlaier prec->idle = 1; 256130365Smlaier 257130365Smlaier if (params == NULL || params[i].inv_pmax == 0) 258130365Smlaier prec->inv_pmax = default_rio_params[i].inv_pmax; 259130365Smlaier else 260130365Smlaier prec->inv_pmax = params[i].inv_pmax; 261130365Smlaier if (params == NULL || params[i].th_min == 0) 262130365Smlaier prec->th_min = default_rio_params[i].th_min; 263130365Smlaier else 264130365Smlaier prec->th_min = params[i].th_min; 265130365Smlaier if (params == NULL || params[i].th_max == 0) 266130365Smlaier prec->th_max = default_rio_params[i].th_max; 267130365Smlaier else 268130365Smlaier prec->th_max = params[i].th_max; 269130365Smlaier 270130365Smlaier /* 271130365Smlaier * th_min_s and th_max_s are scaled versions of th_min 272130365Smlaier * and th_max to be compared with avg. 273130365Smlaier */ 274130365Smlaier prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT); 275130365Smlaier prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT); 276130365Smlaier 277130365Smlaier /* 278130365Smlaier * precompute probability denominator 279130365Smlaier * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 280130365Smlaier */ 281130365Smlaier prec->probd = (2 * (prec->th_max - prec->th_min) 282130365Smlaier * prec->inv_pmax) << FP_SHIFT; 283130365Smlaier 284130365Smlaier microtime(&prec->last); 285130365Smlaier } 286130365Smlaier 287130365Smlaier return (rp); 288130365Smlaier} 289130365Smlaier 290130365Smlaiervoid 291130365Smlaierrio_destroy(rio_t *rp) 292130365Smlaier{ 293130365Smlaier wtab_destroy(rp->rio_wtab); 294184205Sdes free(rp, M_DEVBUF); 295130365Smlaier} 296130365Smlaier 297130365Smlaiervoid 298130365Smlaierrio_getstats(rio_t *rp, struct redstats *sp) 299130365Smlaier{ 300130365Smlaier int i; 301130365Smlaier 302130365Smlaier for (i = 0; i < RIO_NDROPPREC; i++) { 303130365Smlaier bcopy(&rp->q_stats[i], sp, sizeof(struct redstats)); 304130365Smlaier sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; 305130365Smlaier sp++; 306130365Smlaier } 307130365Smlaier} 308130365Smlaier 309130365Smlaier#if (RIO_NDROPPREC == 3) 310130365Smlaier/* 311130365Smlaier * internally, a drop precedence value is converted to an index 312130365Smlaier * starting from 0. 313130365Smlaier */ 314130365Smlaierstatic int 315130365Smlaierdscp2index(u_int8_t dscp) 316130365Smlaier{ 317130365Smlaier int dpindex = dscp & AF_DROPPRECMASK; 318130365Smlaier 319130365Smlaier if (dpindex == 0) 320130365Smlaier return (0); 321130365Smlaier return ((dpindex >> 3) - 1); 322130365Smlaier} 323130365Smlaier#endif 324130365Smlaier 325130365Smlaier#if 1 326130365Smlaier/* 327130365Smlaier * kludge: when a packet is dequeued, we need to know its drop precedence 328130365Smlaier * in order to keep the queue length of each drop precedence. 329130365Smlaier * use m_pkthdr.rcvif to pass this info. 330130365Smlaier */ 331130365Smlaier#define RIOM_SET_PRECINDEX(m, idx) \ 332147256Sbrooks do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0) 333130365Smlaier#define RIOM_GET_PRECINDEX(m) \ 334130365Smlaier ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \ 335130365Smlaier (m)->m_pkthdr.rcvif = NULL; idx; }) 336130365Smlaier#endif 337130365Smlaier 338130365Smlaierint 339130365Smlaierrio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, 340130365Smlaier struct altq_pktattr *pktattr) 341130365Smlaier{ 342130365Smlaier int avg, droptype; 343130365Smlaier u_int8_t dsfield, odsfield; 344130365Smlaier int dpindex, i, n, t; 345130365Smlaier struct timeval now; 346130365Smlaier struct dropprec_state *prec; 347130365Smlaier 348130365Smlaier dsfield = odsfield = read_dsfield(m, pktattr); 349130365Smlaier dpindex = dscp2index(dsfield); 350130365Smlaier 351130365Smlaier /* 352130365Smlaier * update avg of the precedence states whose drop precedence 353130365Smlaier * is larger than or equal to the drop precedence of the packet 354130365Smlaier */ 355130365Smlaier now.tv_sec = 0; 356130365Smlaier for (i = dpindex; i < RIO_NDROPPREC; i++) { 357130365Smlaier prec = &rp->rio_precstate[i]; 358130365Smlaier avg = prec->avg; 359130365Smlaier if (prec->idle) { 360130365Smlaier prec->idle = 0; 361130365Smlaier if (now.tv_sec == 0) 362130365Smlaier microtime(&now); 363130365Smlaier t = (now.tv_sec - prec->last.tv_sec); 364130365Smlaier if (t > 60) 365130365Smlaier avg = 0; 366130365Smlaier else { 367130365Smlaier t = t * 1000000 + 368130365Smlaier (now.tv_usec - prec->last.tv_usec); 369130365Smlaier n = t / rp->rio_pkttime; 370130365Smlaier /* calculate (avg = (1 - Wq)^n * avg) */ 371130365Smlaier if (n > 0) 372130365Smlaier avg = (avg >> FP_SHIFT) * 373130365Smlaier pow_w(rp->rio_wtab, n); 374130365Smlaier } 375130365Smlaier } 376130365Smlaier 377130365Smlaier /* run estimator. (avg is scaled by WEIGHT in fixed-point) */ 378130365Smlaier avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift); 379130365Smlaier prec->avg = avg; /* save the new value */ 380130365Smlaier /* 381130365Smlaier * count keeps a tally of arriving traffic that has not 382130365Smlaier * been dropped. 383130365Smlaier */ 384130365Smlaier prec->count++; 385130365Smlaier } 386130365Smlaier 387130365Smlaier prec = &rp->rio_precstate[dpindex]; 388130365Smlaier avg = prec->avg; 389130365Smlaier 390130365Smlaier /* see if we drop early */ 391130365Smlaier droptype = DTYPE_NODROP; 392130365Smlaier if (avg >= prec->th_min_s && prec->qlen > 1) { 393130365Smlaier if (avg >= prec->th_max_s) { 394130365Smlaier /* avg >= th_max: forced drop */ 395130365Smlaier droptype = DTYPE_FORCED; 396130365Smlaier } else if (prec->old == 0) { 397130365Smlaier /* first exceeds th_min */ 398130365Smlaier prec->count = 1; 399130365Smlaier prec->old = 1; 400130365Smlaier } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift, 401130365Smlaier prec->probd, prec->count)) { 402130365Smlaier /* unforced drop by red */ 403130365Smlaier droptype = DTYPE_EARLY; 404130365Smlaier } 405130365Smlaier } else { 406130365Smlaier /* avg < th_min */ 407130365Smlaier prec->old = 0; 408130365Smlaier } 409130365Smlaier 410130365Smlaier /* 411130365Smlaier * if the queue length hits the hard limit, it's a forced drop. 412130365Smlaier */ 413130365Smlaier if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 414130365Smlaier droptype = DTYPE_FORCED; 415130365Smlaier 416130365Smlaier if (droptype != DTYPE_NODROP) { 417130365Smlaier /* always drop incoming packet (as opposed to randomdrop) */ 418130365Smlaier for (i = dpindex; i < RIO_NDROPPREC; i++) 419130365Smlaier rp->rio_precstate[i].count = 0; 420130365Smlaier#ifdef RIO_STATS 421130365Smlaier if (droptype == DTYPE_EARLY) 422130365Smlaier rp->q_stats[dpindex].drop_unforced++; 423130365Smlaier else 424130365Smlaier rp->q_stats[dpindex].drop_forced++; 425130365Smlaier PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m)); 426130365Smlaier#endif 427130365Smlaier m_freem(m); 428130365Smlaier return (-1); 429130365Smlaier } 430130365Smlaier 431130365Smlaier for (i = dpindex; i < RIO_NDROPPREC; i++) 432130365Smlaier rp->rio_precstate[i].qlen++; 433130365Smlaier 434130365Smlaier /* save drop precedence index in mbuf hdr */ 435130365Smlaier RIOM_SET_PRECINDEX(m, dpindex); 436130365Smlaier 437130365Smlaier if (rp->rio_flags & RIOF_CLEARDSCP) 438130365Smlaier dsfield &= ~DSCP_MASK; 439130365Smlaier 440130365Smlaier if (dsfield != odsfield) 441130365Smlaier write_dsfield(m, pktattr, dsfield); 442130365Smlaier 443130365Smlaier _addq(q, m); 444130365Smlaier 445130365Smlaier#ifdef RIO_STATS 446130365Smlaier PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m)); 447130365Smlaier#endif 448130365Smlaier return (0); 449130365Smlaier} 450130365Smlaier 451130365Smlaierstruct mbuf * 452130365Smlaierrio_getq(rio_t *rp, class_queue_t *q) 453130365Smlaier{ 454130365Smlaier struct mbuf *m; 455130365Smlaier int dpindex, i; 456130365Smlaier 457130365Smlaier if ((m = _getq(q)) == NULL) 458130365Smlaier return NULL; 459130365Smlaier 460130365Smlaier dpindex = RIOM_GET_PRECINDEX(m); 461130365Smlaier for (i = dpindex; i < RIO_NDROPPREC; i++) { 462130365Smlaier if (--rp->rio_precstate[i].qlen == 0) { 463130365Smlaier if (rp->rio_precstate[i].idle == 0) { 464130365Smlaier rp->rio_precstate[i].idle = 1; 465130365Smlaier microtime(&rp->rio_precstate[i].last); 466130365Smlaier } 467130365Smlaier } 468130365Smlaier } 469130365Smlaier return (m); 470130365Smlaier} 471130365Smlaier 472130365Smlaier#ifdef ALTQ3_COMPAT 473130365Smlaierint 474130365Smlaierrioopen(dev, flag, fmt, p) 475130365Smlaier dev_t dev; 476130365Smlaier int flag, fmt; 477130365Smlaier#if (__FreeBSD_version > 500000) 478130365Smlaier struct thread *p; 479130365Smlaier#else 480130365Smlaier struct proc *p; 481130365Smlaier#endif 482130365Smlaier{ 483130365Smlaier /* everything will be done when the queueing scheme is attached. */ 484130365Smlaier return 0; 485130365Smlaier} 486130365Smlaier 487130365Smlaierint 488130365Smlaierrioclose(dev, flag, fmt, p) 489130365Smlaier dev_t dev; 490130365Smlaier int flag, fmt; 491130365Smlaier#if (__FreeBSD_version > 500000) 492130365Smlaier struct thread *p; 493130365Smlaier#else 494130365Smlaier struct proc *p; 495130365Smlaier#endif 496130365Smlaier{ 497130365Smlaier rio_queue_t *rqp; 498130365Smlaier int err, error = 0; 499130365Smlaier 500130365Smlaier while ((rqp = rio_list) != NULL) { 501130365Smlaier /* destroy all */ 502130365Smlaier err = rio_detach(rqp); 503130365Smlaier if (err != 0 && error == 0) 504130365Smlaier error = err; 505130365Smlaier } 506130365Smlaier 507130365Smlaier return error; 508130365Smlaier} 509130365Smlaier 510130365Smlaierint 511130365Smlaierrioioctl(dev, cmd, addr, flag, p) 512130365Smlaier dev_t dev; 513130365Smlaier ioctlcmd_t cmd; 514130365Smlaier caddr_t addr; 515130365Smlaier int flag; 516130365Smlaier#if (__FreeBSD_version > 500000) 517130365Smlaier struct thread *p; 518130365Smlaier#else 519130365Smlaier struct proc *p; 520130365Smlaier#endif 521130365Smlaier{ 522130365Smlaier rio_queue_t *rqp; 523130365Smlaier struct rio_interface *ifacep; 524130365Smlaier struct ifnet *ifp; 525130365Smlaier int error = 0; 526130365Smlaier 527130365Smlaier /* check super-user privilege */ 528130365Smlaier switch (cmd) { 529130365Smlaier case RIO_GETSTATS: 530130365Smlaier break; 531130365Smlaier default: 532164033Srwatson#if (__FreeBSD_version > 700000) 533164033Srwatson if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) 534164033Srwatson return (error); 535164033Srwatson#elsif (__FreeBSD_version > 400000) 536130365Smlaier if ((error = suser(p)) != 0) 537130365Smlaier return (error); 538130365Smlaier#else 539130365Smlaier if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 540130365Smlaier return (error); 541130365Smlaier#endif 542130365Smlaier break; 543130365Smlaier } 544130365Smlaier 545130365Smlaier switch (cmd) { 546130365Smlaier 547130365Smlaier case RIO_ENABLE: 548130365Smlaier ifacep = (struct rio_interface *)addr; 549130365Smlaier if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 550130365Smlaier error = EBADF; 551130365Smlaier break; 552130365Smlaier } 553130365Smlaier error = altq_enable(rqp->rq_ifq); 554130365Smlaier break; 555130365Smlaier 556130365Smlaier case RIO_DISABLE: 557130365Smlaier ifacep = (struct rio_interface *)addr; 558130365Smlaier if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 559130365Smlaier error = EBADF; 560130365Smlaier break; 561130365Smlaier } 562130365Smlaier error = altq_disable(rqp->rq_ifq); 563130365Smlaier break; 564130365Smlaier 565130365Smlaier case RIO_IF_ATTACH: 566130365Smlaier ifp = ifunit(((struct rio_interface *)addr)->rio_ifname); 567130365Smlaier if (ifp == NULL) { 568130365Smlaier error = ENXIO; 569130365Smlaier break; 570130365Smlaier } 571130365Smlaier 572130365Smlaier /* allocate and initialize rio_queue_t */ 573184205Sdes rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK); 574130365Smlaier if (rqp == NULL) { 575130365Smlaier error = ENOMEM; 576130365Smlaier break; 577130365Smlaier } 578130365Smlaier bzero(rqp, sizeof(rio_queue_t)); 579130365Smlaier 580184205Sdes rqp->rq_q = malloc(sizeof(class_queue_t), 581130365Smlaier M_DEVBUF, M_WAITOK); 582130365Smlaier if (rqp->rq_q == NULL) { 583184205Sdes free(rqp, M_DEVBUF); 584130365Smlaier error = ENOMEM; 585130365Smlaier break; 586130365Smlaier } 587130365Smlaier bzero(rqp->rq_q, sizeof(class_queue_t)); 588130365Smlaier 589130365Smlaier rqp->rq_rio = rio_alloc(0, NULL, 0, 0); 590130365Smlaier if (rqp->rq_rio == NULL) { 591184205Sdes free(rqp->rq_q, M_DEVBUF); 592184205Sdes free(rqp, M_DEVBUF); 593130365Smlaier error = ENOMEM; 594130365Smlaier break; 595130365Smlaier } 596130365Smlaier 597130365Smlaier rqp->rq_ifq = &ifp->if_snd; 598130365Smlaier qtail(rqp->rq_q) = NULL; 599130365Smlaier qlen(rqp->rq_q) = 0; 600130365Smlaier qlimit(rqp->rq_q) = RIO_LIMIT; 601130365Smlaier qtype(rqp->rq_q) = Q_RIO; 602130365Smlaier 603130365Smlaier /* 604130365Smlaier * set RIO to this ifnet structure. 605130365Smlaier */ 606130365Smlaier error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp, 607130365Smlaier rio_enqueue, rio_dequeue, rio_request, 608130365Smlaier NULL, NULL); 609130365Smlaier if (error) { 610130365Smlaier rio_destroy(rqp->rq_rio); 611184205Sdes free(rqp->rq_q, M_DEVBUF); 612184205Sdes free(rqp, M_DEVBUF); 613130365Smlaier break; 614130365Smlaier } 615130365Smlaier 616130365Smlaier /* add this state to the rio list */ 617130365Smlaier rqp->rq_next = rio_list; 618130365Smlaier rio_list = rqp; 619130365Smlaier break; 620130365Smlaier 621130365Smlaier case RIO_IF_DETACH: 622130365Smlaier ifacep = (struct rio_interface *)addr; 623130365Smlaier if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 624130365Smlaier error = EBADF; 625130365Smlaier break; 626130365Smlaier } 627130365Smlaier error = rio_detach(rqp); 628130365Smlaier break; 629130365Smlaier 630130365Smlaier case RIO_GETSTATS: 631130365Smlaier do { 632130365Smlaier struct rio_stats *q_stats; 633130365Smlaier rio_t *rp; 634130365Smlaier int i; 635130365Smlaier 636130365Smlaier q_stats = (struct rio_stats *)addr; 637130365Smlaier if ((rqp = altq_lookup(q_stats->iface.rio_ifname, 638130365Smlaier ALTQT_RIO)) == NULL) { 639130365Smlaier error = EBADF; 640130365Smlaier break; 641130365Smlaier } 642130365Smlaier 643130365Smlaier rp = rqp->rq_rio; 644130365Smlaier 645130365Smlaier q_stats->q_limit = qlimit(rqp->rq_q); 646130365Smlaier q_stats->weight = rp->rio_weight; 647130365Smlaier q_stats->flags = rp->rio_flags; 648130365Smlaier 649130365Smlaier for (i = 0; i < RIO_NDROPPREC; i++) { 650130365Smlaier q_stats->q_len[i] = rp->rio_precstate[i].qlen; 651130365Smlaier bcopy(&rp->q_stats[i], &q_stats->q_stats[i], 652130365Smlaier sizeof(struct redstats)); 653130365Smlaier q_stats->q_stats[i].q_avg = 654130365Smlaier rp->rio_precstate[i].avg >> rp->rio_wshift; 655130365Smlaier 656130365Smlaier q_stats->q_params[i].inv_pmax 657130365Smlaier = rp->rio_precstate[i].inv_pmax; 658130365Smlaier q_stats->q_params[i].th_min 659130365Smlaier = rp->rio_precstate[i].th_min; 660130365Smlaier q_stats->q_params[i].th_max 661130365Smlaier = rp->rio_precstate[i].th_max; 662130365Smlaier } 663130365Smlaier } while (/*CONSTCOND*/ 0); 664130365Smlaier break; 665130365Smlaier 666130365Smlaier case RIO_CONFIG: 667130365Smlaier do { 668130365Smlaier struct rio_conf *fc; 669130365Smlaier rio_t *new; 670130365Smlaier int s, limit, i; 671130365Smlaier 672130365Smlaier fc = (struct rio_conf *)addr; 673130365Smlaier if ((rqp = altq_lookup(fc->iface.rio_ifname, 674130365Smlaier ALTQT_RIO)) == NULL) { 675130365Smlaier error = EBADF; 676130365Smlaier break; 677130365Smlaier } 678130365Smlaier 679130365Smlaier new = rio_alloc(fc->rio_weight, &fc->q_params[0], 680130365Smlaier fc->rio_flags, fc->rio_pkttime); 681130365Smlaier if (new == NULL) { 682130365Smlaier error = ENOMEM; 683130365Smlaier break; 684130365Smlaier } 685130365Smlaier 686130365Smlaier#ifdef __NetBSD__ 687130365Smlaier s = splnet(); 688130365Smlaier#else 689130365Smlaier s = splimp(); 690130365Smlaier#endif 691130365Smlaier _flushq(rqp->rq_q); 692130365Smlaier limit = fc->rio_limit; 693130365Smlaier if (limit < fc->q_params[RIO_NDROPPREC-1].th_max) 694130365Smlaier limit = fc->q_params[RIO_NDROPPREC-1].th_max; 695130365Smlaier qlimit(rqp->rq_q) = limit; 696130365Smlaier 697130365Smlaier rio_destroy(rqp->rq_rio); 698130365Smlaier rqp->rq_rio = new; 699130365Smlaier 700130365Smlaier splx(s); 701130365Smlaier 702130365Smlaier /* write back new values */ 703130365Smlaier fc->rio_limit = limit; 704130365Smlaier for (i = 0; i < RIO_NDROPPREC; i++) { 705130365Smlaier fc->q_params[i].inv_pmax = 706130365Smlaier rqp->rq_rio->rio_precstate[i].inv_pmax; 707130365Smlaier fc->q_params[i].th_min = 708130365Smlaier rqp->rq_rio->rio_precstate[i].th_min; 709130365Smlaier fc->q_params[i].th_max = 710130365Smlaier rqp->rq_rio->rio_precstate[i].th_max; 711130365Smlaier } 712130365Smlaier } while (/*CONSTCOND*/ 0); 713130365Smlaier break; 714130365Smlaier 715130365Smlaier case RIO_SETDEFAULTS: 716130365Smlaier do { 717130365Smlaier struct redparams *rp; 718130365Smlaier int i; 719130365Smlaier 720130365Smlaier rp = (struct redparams *)addr; 721130365Smlaier for (i = 0; i < RIO_NDROPPREC; i++) 722130365Smlaier default_rio_params[i] = rp[i]; 723130365Smlaier } while (/*CONSTCOND*/ 0); 724130365Smlaier break; 725130365Smlaier 726130365Smlaier default: 727130365Smlaier error = EINVAL; 728130365Smlaier break; 729130365Smlaier } 730130365Smlaier 731130365Smlaier return error; 732130365Smlaier} 733130365Smlaier 734130365Smlaierstatic int 735130365Smlaierrio_detach(rqp) 736130365Smlaier rio_queue_t *rqp; 737130365Smlaier{ 738130365Smlaier rio_queue_t *tmp; 739130365Smlaier int error = 0; 740130365Smlaier 741130365Smlaier if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 742130365Smlaier altq_disable(rqp->rq_ifq); 743130365Smlaier 744130365Smlaier if ((error = altq_detach(rqp->rq_ifq))) 745130365Smlaier return (error); 746130365Smlaier 747130365Smlaier if (rio_list == rqp) 748130365Smlaier rio_list = rqp->rq_next; 749130365Smlaier else { 750130365Smlaier for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next) 751130365Smlaier if (tmp->rq_next == rqp) { 752130365Smlaier tmp->rq_next = rqp->rq_next; 753130365Smlaier break; 754130365Smlaier } 755130365Smlaier if (tmp == NULL) 756130365Smlaier printf("rio_detach: no state found in rio_list!\n"); 757130365Smlaier } 758130365Smlaier 759130365Smlaier rio_destroy(rqp->rq_rio); 760184205Sdes free(rqp->rq_q, M_DEVBUF); 761184205Sdes free(rqp, M_DEVBUF); 762130365Smlaier return (error); 763130365Smlaier} 764130365Smlaier 765130365Smlaier/* 766130365Smlaier * rio support routines 767130365Smlaier */ 768130365Smlaierstatic int 769130365Smlaierrio_request(ifq, req, arg) 770130365Smlaier struct ifaltq *ifq; 771130365Smlaier int req; 772130365Smlaier void *arg; 773130365Smlaier{ 774130365Smlaier rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 775130365Smlaier 776130368Smlaier IFQ_LOCK_ASSERT(ifq); 777130368Smlaier 778130365Smlaier switch (req) { 779130365Smlaier case ALTRQ_PURGE: 780130365Smlaier _flushq(rqp->rq_q); 781130365Smlaier if (ALTQ_IS_ENABLED(ifq)) 782130365Smlaier ifq->ifq_len = 0; 783130365Smlaier break; 784130365Smlaier } 785130365Smlaier return (0); 786130365Smlaier} 787130365Smlaier 788130365Smlaier/* 789130365Smlaier * enqueue routine: 790130365Smlaier * 791130365Smlaier * returns: 0 when successfully queued. 792130365Smlaier * ENOBUFS when drop occurs. 793130365Smlaier */ 794130365Smlaierstatic int 795130365Smlaierrio_enqueue(ifq, m, pktattr) 796130365Smlaier struct ifaltq *ifq; 797130365Smlaier struct mbuf *m; 798130365Smlaier struct altq_pktattr *pktattr; 799130365Smlaier{ 800130365Smlaier rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 801130365Smlaier int error = 0; 802130365Smlaier 803130368Smlaier IFQ_LOCK_ASSERT(ifq); 804130368Smlaier 805130365Smlaier if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0) 806130365Smlaier ifq->ifq_len++; 807130365Smlaier else 808130365Smlaier error = ENOBUFS; 809130365Smlaier return error; 810130365Smlaier} 811130365Smlaier 812130365Smlaier/* 813130365Smlaier * dequeue routine: 814130365Smlaier * must be called in splimp. 815130365Smlaier * 816130365Smlaier * returns: mbuf dequeued. 817130365Smlaier * NULL when no packet is available in the queue. 818130365Smlaier */ 819130365Smlaier 820130365Smlaierstatic struct mbuf * 821130365Smlaierrio_dequeue(ifq, op) 822130365Smlaier struct ifaltq *ifq; 823130365Smlaier int op; 824130365Smlaier{ 825130365Smlaier rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 826130365Smlaier struct mbuf *m = NULL; 827130365Smlaier 828130368Smlaier IFQ_LOCK_ASSERT(ifq); 829130368Smlaier 830130365Smlaier if (op == ALTDQ_POLL) 831130365Smlaier return qhead(rqp->rq_q); 832130365Smlaier 833130365Smlaier m = rio_getq(rqp->rq_rio, rqp->rq_q); 834130365Smlaier if (m != NULL) 835130365Smlaier ifq->ifq_len--; 836130365Smlaier return m; 837130365Smlaier} 838130365Smlaier 839130365Smlaier#ifdef KLD_MODULE 840130365Smlaier 841130365Smlaierstatic struct altqsw rio_sw = 842130365Smlaier {"rio", rioopen, rioclose, rioioctl}; 843130365Smlaier 844130365SmlaierALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw); 845130365SmlaierMODULE_VERSION(altq_rio, 1); 846130365SmlaierMODULE_DEPEND(altq_rio, altq_red, 1, 1, 1); 847130365Smlaier 848130365Smlaier#endif /* KLD_MODULE */ 849130365Smlaier#endif /* ALTQ3_COMPAT */ 850130365Smlaier 851130365Smlaier#endif /* ALTQ_RIO */ 852