1254721Semaste/* $FreeBSD$ */ 2254721Semaste/* $KAME: altq_rio.c,v 1.17 2003/07/10 12:07:49 kjc Exp $ */ 3254721Semaste 4254721Semaste/* 5254721Semaste * Copyright (C) 1998-2003 6254721Semaste * Sony Computer Science Laboratories Inc. All rights reserved. 7254721Semaste * 8254721Semaste * Redistribution and use in source and binary forms, with or without 9254721Semaste * modification, are permitted provided that the following conditions 10254721Semaste * are met: 11254721Semaste * 1. Redistributions of source code must retain the above copyright 12254721Semaste * notice, this list of conditions and the following disclaimer. 13254721Semaste * 2. Redistributions in binary form must reproduce the above copyright 14254721Semaste * notice, this list of conditions and the following disclaimer in the 15254721Semaste * documentation and/or other materials provided with the distribution. 16254721Semaste * 17254721Semaste * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18254721Semaste * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19254721Semaste * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20254721Semaste * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21254721Semaste * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22254721Semaste * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23254721Semaste * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24254721Semaste * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25254721Semaste * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26254721Semaste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27254721Semaste * SUCH DAMAGE. 28254721Semaste */ 29254721Semaste/* 30254721Semaste * Copyright (c) 1990-1994 Regents of the University of California. 31254721Semaste * All rights reserved. 32254721Semaste * 33254721Semaste * Redistribution and use in source and binary forms, with or without 34254721Semaste * modification, are permitted provided that the following conditions 35254721Semaste * are met: 36254721Semaste * 1. Redistributions of source code must retain the above copyright 37254721Semaste * notice, this list of conditions and the following disclaimer. 38254721Semaste * 2. Redistributions in binary form must reproduce the above copyright 39254721Semaste * notice, this list of conditions and the following disclaimer in the 40254721Semaste * documentation and/or other materials provided with the distribution. 41254721Semaste * 3. All advertising materials mentioning features or use of this software 42254721Semaste * must display the following acknowledgement: 43254721Semaste * This product includes software developed by the Computer Systems 44254721Semaste * Engineering Group at Lawrence Berkeley Laboratory. 45254721Semaste * 4. Neither the name of the University nor of the Laboratory may be used 46254721Semaste * to endorse or promote products derived from this software without 47254721Semaste * specific prior written permission. 48254721Semaste * 49254721Semaste * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50254721Semaste * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51254721Semaste * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52254721Semaste * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53254721Semaste * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54254721Semaste * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55254721Semaste * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56254721Semaste * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57254721Semaste * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58254721Semaste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59254721Semaste * SUCH DAMAGE. 60254721Semaste */ 61254721Semaste 62254721Semaste#if defined(__FreeBSD__) || defined(__NetBSD__) 63254721Semaste#include "opt_altq.h" 64254721Semaste#include "opt_inet.h" 65254721Semaste#ifdef __FreeBSD__ 66254721Semaste#include "opt_inet6.h" 67254721Semaste#endif 68254721Semaste#endif /* __FreeBSD__ || __NetBSD__ */ 69254721Semaste#ifdef ALTQ_RIO /* rio is enabled by ALTQ_RIO option in opt_altq.h */ 70254721Semaste 71254721Semaste#include <sys/param.h> 72254721Semaste#include <sys/malloc.h> 73254721Semaste#include <sys/mbuf.h> 74254721Semaste#include <sys/socket.h> 75254721Semaste#include <sys/systm.h> 76254721Semaste#include <sys/errno.h> 77254721Semaste#if 1 /* ALTQ3_COMPAT */ 78254721Semaste#include <sys/proc.h> 79254721Semaste#include <sys/sockio.h> 80254721Semaste#include <sys/kernel.h> 81254721Semaste#endif 82254721Semaste 83254721Semaste#include <net/if.h> 84254721Semaste#include <net/if_var.h> 85254721Semaste 86254721Semaste#include <netinet/in.h> 87254721Semaste#include <netinet/in_systm.h> 88254721Semaste#include <netinet/ip.h> 89254721Semaste#ifdef INET6 90254721Semaste#include <netinet/ip6.h> 91254721Semaste#endif 92254721Semaste 93254721Semaste#include <netpfil/pf/pf.h> 94254721Semaste#include <netpfil/pf/pf_altq.h> 95254721Semaste#include <altq/altq.h> 96254721Semaste#include <altq/altq_cdnr.h> 97254721Semaste#include <altq/altq_red.h> 98254721Semaste#include <altq/altq_rio.h> 99254721Semaste#ifdef ALTQ3_COMPAT 100254721Semaste#include <altq/altq_conf.h> 101254721Semaste#endif 102254721Semaste 103254721Semaste/* 104254721Semaste * RIO: RED with IN/OUT bit 105254721Semaste * described in 106254721Semaste * "Explicit Allocation of Best Effort Packet Delivery Service" 107254721Semaste * David D. Clark and Wenjia Fang, MIT Lab for Computer Science 108254721Semaste * http://diffserv.lcs.mit.edu/Papers/exp-alloc-ddc-wf.{ps,pdf} 109254721Semaste * 110254721Semaste * this implementation is extended to support more than 2 drop precedence 111254721Semaste * values as described in RFC2597 (Assured Forwarding PHB Group). 112254721Semaste * 113254721Semaste */ 114254721Semaste/* 115254721Semaste * AF DS (differentiated service) codepoints. 116254721Semaste * (classes can be mapped to CBQ or H-FSC classes.) 117254721Semaste * 118254721Semaste * 0 1 2 3 4 5 6 7 119254721Semaste * +---+---+---+---+---+---+---+---+ 120254721Semaste * | CLASS |DropPre| 0 | CU | 121254721Semaste * +---+---+---+---+---+---+---+---+ 122254721Semaste * 123254721Semaste * class 1: 001 124254721Semaste * class 2: 010 125254721Semaste * class 3: 011 126254721Semaste * class 4: 100 127254721Semaste * 128254721Semaste * low drop prec: 01 129254721Semaste * medium drop prec: 10 130254721Semaste * high drop prec: 01 131254721Semaste */ 132254721Semaste 133254721Semaste/* normal red parameters */ 134254721Semaste#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 135254721Semaste /* q_weight = 0.00195 */ 136254721Semaste 137254721Semaste/* red parameters for a slow link */ 138254721Semaste#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 139254721Semaste /* q_weight = 0.0078125 */ 140254721Semaste 141254721Semaste/* red parameters for a very slow link (e.g., dialup) */ 142254721Semaste#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 143254721Semaste /* q_weight = 0.015625 */ 144254721Semaste 145254721Semaste/* fixed-point uses 12-bit decimal places */ 146254721Semaste#define FP_SHIFT 12 /* fixed-point shift */ 147254721Semaste 148254721Semaste/* red parameters for drop probability */ 149254721Semaste#define INV_P_MAX 10 /* inverse of max drop probability */ 150254721Semaste#define TH_MIN 5 /* min threshold */ 151254721Semaste#define TH_MAX 15 /* max threshold */ 152254721Semaste 153254721Semaste#define RIO_LIMIT 60 /* default max queue lenght */ 154254721Semaste#define RIO_STATS /* collect statistics */ 155254721Semaste 156254721Semaste#define TV_DELTA(a, b, delta) { \ 157254721Semaste register int xxs; \ 158254721Semaste \ 159254721Semaste delta = (a)->tv_usec - (b)->tv_usec; \ 160254721Semaste if ((xxs = (a)->tv_sec - (b)->tv_sec) != 0) { \ 161254721Semaste if (xxs < 0) { \ 162254721Semaste delta = 60000000; \ 163254721Semaste } else if (xxs > 4) { \ 164254721Semaste if (xxs > 60) \ 165254721Semaste delta = 60000000; \ 166254721Semaste else \ 167254721Semaste delta += xxs * 1000000; \ 168254721Semaste } else while (xxs > 0) { \ 169254721Semaste delta += 1000000; \ 170254721Semaste xxs--; \ 171254721Semaste } \ 172254721Semaste } \ 173254721Semaste} 174254721Semaste 175254721Semaste#ifdef ALTQ3_COMPAT 176254721Semaste/* rio_list keeps all rio_queue_t's allocated. */ 177254721Semastestatic rio_queue_t *rio_list = NULL; 178254721Semaste#endif 179254721Semaste/* default rio parameter values */ 180254721Semastestatic struct redparams default_rio_params[RIO_NDROPPREC] = { 181254721Semaste /* th_min, th_max, inv_pmax */ 182254721Semaste { TH_MAX * 2 + TH_MIN, TH_MAX * 3, INV_P_MAX }, /* low drop precedence */ 183254721Semaste { TH_MAX + TH_MIN, TH_MAX * 2, INV_P_MAX }, /* medium drop precedence */ 184254721Semaste { TH_MIN, TH_MAX, INV_P_MAX } /* high drop precedence */ 185254721Semaste}; 186254721Semaste 187254721Semaste/* internal function prototypes */ 188254721Semastestatic int dscp2index(u_int8_t); 189254721Semaste#ifdef ALTQ3_COMPAT 190254721Semastestatic int rio_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 191254721Semastestatic struct mbuf *rio_dequeue(struct ifaltq *, int); 192254721Semastestatic int rio_request(struct ifaltq *, int, void *); 193254721Semastestatic int rio_detach(rio_queue_t *); 194254721Semaste 195254721Semaste/* 196254721Semaste * rio device interface 197254721Semaste */ 198254721Semastealtqdev_decl(rio); 199254721Semaste 200254721Semaste#endif /* ALTQ3_COMPAT */ 201254721Semaste 202254721Semasterio_t * 203254721Semasterio_alloc(int weight, struct redparams *params, int flags, int pkttime) 204254721Semaste{ 205254721Semaste rio_t *rp; 206254721Semaste int w, i; 207254721Semaste int npkts_per_sec; 208254721Semaste 209254721Semaste rp = malloc(sizeof(rio_t), M_DEVBUF, M_NOWAIT | M_ZERO); 210254721Semaste if (rp == NULL) 211254721Semaste return (NULL); 212254721Semaste 213254721Semaste rp->rio_flags = flags; 214254721Semaste if (pkttime == 0) 215254721Semaste /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 216254721Semaste rp->rio_pkttime = 800; 217254721Semaste else 218254721Semaste rp->rio_pkttime = pkttime; 219254721Semaste 220254721Semaste if (weight != 0) 221254721Semaste rp->rio_weight = weight; 222254721Semaste else { 223254721Semaste /* use default */ 224254721Semaste rp->rio_weight = W_WEIGHT; 225254721Semaste 226254721Semaste /* when the link is very slow, adjust red parameters */ 227254721Semaste npkts_per_sec = 1000000 / rp->rio_pkttime; 228254721Semaste if (npkts_per_sec < 50) { 229254721Semaste /* up to about 400Kbps */ 230254721Semaste rp->rio_weight = W_WEIGHT_2; 231254721Semaste } else if (npkts_per_sec < 300) { 232254721Semaste /* up to about 2.4Mbps */ 233254721Semaste rp->rio_weight = W_WEIGHT_1; 234254721Semaste } 235254721Semaste } 236254721Semaste 237254721Semaste /* calculate wshift. weight must be power of 2 */ 238254721Semaste w = rp->rio_weight; 239254721Semaste for (i = 0; w > 1; i++) 240254721Semaste w = w >> 1; 241254721Semaste rp->rio_wshift = i; 242254721Semaste w = 1 << rp->rio_wshift; 243254721Semaste if (w != rp->rio_weight) { 244254721Semaste printf("invalid weight value %d for red! use %d\n", 245254721Semaste rp->rio_weight, w); 246254721Semaste rp->rio_weight = w; 247254721Semaste } 248254721Semaste 249254721Semaste /* allocate weight table */ 250254721Semaste rp->rio_wtab = wtab_alloc(rp->rio_weight); 251254721Semaste 252254721Semaste for (i = 0; i < RIO_NDROPPREC; i++) { 253254721Semaste struct dropprec_state *prec = &rp->rio_precstate[i]; 254254721Semaste 255254721Semaste prec->avg = 0; 256254721Semaste prec->idle = 1; 257254721Semaste 258254721Semaste if (params == NULL || params[i].inv_pmax == 0) 259254721Semaste prec->inv_pmax = default_rio_params[i].inv_pmax; 260254721Semaste else 261254721Semaste prec->inv_pmax = params[i].inv_pmax; 262254721Semaste if (params == NULL || params[i].th_min == 0) 263254721Semaste prec->th_min = default_rio_params[i].th_min; 264254721Semaste else 265254721Semaste prec->th_min = params[i].th_min; 266254721Semaste if (params == NULL || params[i].th_max == 0) 267254721Semaste prec->th_max = default_rio_params[i].th_max; 268254721Semaste else 269254721Semaste prec->th_max = params[i].th_max; 270254721Semaste 271254721Semaste /* 272254721Semaste * th_min_s and th_max_s are scaled versions of th_min 273254721Semaste * and th_max to be compared with avg. 274254721Semaste */ 275254721Semaste prec->th_min_s = prec->th_min << (rp->rio_wshift + FP_SHIFT); 276254721Semaste prec->th_max_s = prec->th_max << (rp->rio_wshift + FP_SHIFT); 277254721Semaste 278254721Semaste /* 279254721Semaste * precompute probability denominator 280254721Semaste * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 281254721Semaste */ 282254721Semaste prec->probd = (2 * (prec->th_max - prec->th_min) 283254721Semaste * prec->inv_pmax) << FP_SHIFT; 284254721Semaste 285254721Semaste microtime(&prec->last); 286254721Semaste } 287254721Semaste 288254721Semaste return (rp); 289254721Semaste} 290254721Semaste 291254721Semastevoid 292254721Semasterio_destroy(rio_t *rp) 293254721Semaste{ 294254721Semaste wtab_destroy(rp->rio_wtab); 295254721Semaste free(rp, M_DEVBUF); 296254721Semaste} 297254721Semaste 298254721Semastevoid 299254721Semasterio_getstats(rio_t *rp, struct redstats *sp) 300254721Semaste{ 301254721Semaste int i; 302254721Semaste 303254721Semaste for (i = 0; i < RIO_NDROPPREC; i++) { 304254721Semaste bcopy(&rp->q_stats[i], sp, sizeof(struct redstats)); 305254721Semaste sp->q_avg = rp->rio_precstate[i].avg >> rp->rio_wshift; 306254721Semaste sp++; 307254721Semaste } 308254721Semaste} 309254721Semaste 310254721Semaste#if (RIO_NDROPPREC == 3) 311254721Semaste/* 312254721Semaste * internally, a drop precedence value is converted to an index 313254721Semaste * starting from 0. 314254721Semaste */ 315254721Semastestatic int 316254721Semastedscp2index(u_int8_t dscp) 317254721Semaste{ 318254721Semaste int dpindex = dscp & AF_DROPPRECMASK; 319254721Semaste 320254721Semaste if (dpindex == 0) 321254721Semaste return (0); 322254721Semaste return ((dpindex >> 3) - 1); 323254721Semaste} 324254721Semaste#endif 325254721Semaste 326254721Semaste#if 1 327254721Semaste/* 328254721Semaste * kludge: when a packet is dequeued, we need to know its drop precedence 329254721Semaste * in order to keep the queue length of each drop precedence. 330254721Semaste * use m_pkthdr.rcvif to pass this info. 331254721Semaste */ 332254721Semaste#define RIOM_SET_PRECINDEX(m, idx) \ 333254721Semaste do { (m)->m_pkthdr.rcvif = (void *)((long)(idx)); } while (0) 334254721Semaste#define RIOM_GET_PRECINDEX(m) \ 335254721Semaste ({ long idx; idx = (long)((m)->m_pkthdr.rcvif); \ 336254721Semaste (m)->m_pkthdr.rcvif = NULL; idx; }) 337254721Semaste#endif 338254721Semaste 339254721Semasteint 340254721Semasterio_addq(rio_t *rp, class_queue_t *q, struct mbuf *m, 341254721Semaste struct altq_pktattr *pktattr) 342254721Semaste{ 343254721Semaste int avg, droptype; 344254721Semaste u_int8_t dsfield, odsfield; 345254721Semaste int dpindex, i, n, t; 346254721Semaste struct timeval now; 347254721Semaste struct dropprec_state *prec; 348254721Semaste 349254721Semaste dsfield = odsfield = read_dsfield(m, pktattr); 350254721Semaste dpindex = dscp2index(dsfield); 351254721Semaste 352254721Semaste /* 353254721Semaste * update avg of the precedence states whose drop precedence 354254721Semaste * is larger than or equal to the drop precedence of the packet 355254721Semaste */ 356254721Semaste now.tv_sec = 0; 357254721Semaste for (i = dpindex; i < RIO_NDROPPREC; i++) { 358254721Semaste prec = &rp->rio_precstate[i]; 359254721Semaste avg = prec->avg; 360254721Semaste if (prec->idle) { 361254721Semaste prec->idle = 0; 362254721Semaste if (now.tv_sec == 0) 363254721Semaste microtime(&now); 364254721Semaste t = (now.tv_sec - prec->last.tv_sec); 365254721Semaste if (t > 60) 366254721Semaste avg = 0; 367254721Semaste else { 368254721Semaste t = t * 1000000 + 369254721Semaste (now.tv_usec - prec->last.tv_usec); 370254721Semaste n = t / rp->rio_pkttime; 371254721Semaste /* calculate (avg = (1 - Wq)^n * avg) */ 372254721Semaste if (n > 0) 373254721Semaste avg = (avg >> FP_SHIFT) * 374254721Semaste pow_w(rp->rio_wtab, n); 375254721Semaste } 376254721Semaste } 377254721Semaste 378254721Semaste /* run estimator. (avg is scaled by WEIGHT in fixed-point) */ 379254721Semaste avg += (prec->qlen << FP_SHIFT) - (avg >> rp->rio_wshift); 380254721Semaste prec->avg = avg; /* save the new value */ 381254721Semaste /* 382254721Semaste * count keeps a tally of arriving traffic that has not 383254721Semaste * been dropped. 384254721Semaste */ 385254721Semaste prec->count++; 386254721Semaste } 387254721Semaste 388254721Semaste prec = &rp->rio_precstate[dpindex]; 389254721Semaste avg = prec->avg; 390254721Semaste 391254721Semaste /* see if we drop early */ 392254721Semaste droptype = DTYPE_NODROP; 393254721Semaste if (avg >= prec->th_min_s && prec->qlen > 1) { 394254721Semaste if (avg >= prec->th_max_s) { 395254721Semaste /* avg >= th_max: forced drop */ 396254721Semaste droptype = DTYPE_FORCED; 397254721Semaste } else if (prec->old == 0) { 398254721Semaste /* first exceeds th_min */ 399254721Semaste prec->count = 1; 400254721Semaste prec->old = 1; 401254721Semaste } else if (drop_early((avg - prec->th_min_s) >> rp->rio_wshift, 402254721Semaste prec->probd, prec->count)) { 403254721Semaste /* unforced drop by red */ 404254721Semaste droptype = DTYPE_EARLY; 405254721Semaste } 406254721Semaste } else { 407254721Semaste /* avg < th_min */ 408254721Semaste prec->old = 0; 409254721Semaste } 410254721Semaste 411254721Semaste /* 412254721Semaste * if the queue length hits the hard limit, it's a forced drop. 413254721Semaste */ 414254721Semaste if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 415254721Semaste droptype = DTYPE_FORCED; 416254721Semaste 417254721Semaste if (droptype != DTYPE_NODROP) { 418254721Semaste /* always drop incoming packet (as opposed to randomdrop) */ 419254721Semaste for (i = dpindex; i < RIO_NDROPPREC; i++) 420254721Semaste rp->rio_precstate[i].count = 0; 421254721Semaste#ifdef RIO_STATS 422254721Semaste if (droptype == DTYPE_EARLY) 423254721Semaste rp->q_stats[dpindex].drop_unforced++; 424254721Semaste else 425254721Semaste rp->q_stats[dpindex].drop_forced++; 426254721Semaste PKTCNTR_ADD(&rp->q_stats[dpindex].drop_cnt, m_pktlen(m)); 427254721Semaste#endif 428254721Semaste m_freem(m); 429254721Semaste return (-1); 430254721Semaste } 431254721Semaste 432254721Semaste for (i = dpindex; i < RIO_NDROPPREC; i++) 433254721Semaste rp->rio_precstate[i].qlen++; 434254721Semaste 435254721Semaste /* save drop precedence index in mbuf hdr */ 436254721Semaste RIOM_SET_PRECINDEX(m, dpindex); 437254721Semaste 438254721Semaste if (rp->rio_flags & RIOF_CLEARDSCP) 439254721Semaste dsfield &= ~DSCP_MASK; 440254721Semaste 441254721Semaste if (dsfield != odsfield) 442254721Semaste write_dsfield(m, pktattr, dsfield); 443254721Semaste 444254721Semaste _addq(q, m); 445254721Semaste 446254721Semaste#ifdef RIO_STATS 447254721Semaste PKTCNTR_ADD(&rp->q_stats[dpindex].xmit_cnt, m_pktlen(m)); 448254721Semaste#endif 449254721Semaste return (0); 450254721Semaste} 451254721Semaste 452254721Semastestruct mbuf * 453254721Semasterio_getq(rio_t *rp, class_queue_t *q) 454254721Semaste{ 455254721Semaste struct mbuf *m; 456254721Semaste int dpindex, i; 457254721Semaste 458254721Semaste if ((m = _getq(q)) == NULL) 459254721Semaste return NULL; 460254721Semaste 461254721Semaste dpindex = RIOM_GET_PRECINDEX(m); 462254721Semaste for (i = dpindex; i < RIO_NDROPPREC; i++) { 463254721Semaste if (--rp->rio_precstate[i].qlen == 0) { 464254721Semaste if (rp->rio_precstate[i].idle == 0) { 465254721Semaste rp->rio_precstate[i].idle = 1; 466254721Semaste microtime(&rp->rio_precstate[i].last); 467254721Semaste } 468254721Semaste } 469254721Semaste } 470254721Semaste return (m); 471254721Semaste} 472254721Semaste 473254721Semaste#ifdef ALTQ3_COMPAT 474254721Semasteint 475254721Semasterioopen(dev, flag, fmt, p) 476254721Semaste dev_t dev; 477254721Semaste int flag, fmt; 478254721Semaste#if (__FreeBSD_version > 500000) 479254721Semaste struct thread *p; 480254721Semaste#else 481254721Semaste struct proc *p; 482254721Semaste#endif 483254721Semaste{ 484254721Semaste /* everything will be done when the queueing scheme is attached. */ 485254721Semaste return 0; 486254721Semaste} 487254721Semaste 488254721Semasteint 489254721Semasterioclose(dev, flag, fmt, p) 490254721Semaste dev_t dev; 491254721Semaste int flag, fmt; 492254721Semaste#if (__FreeBSD_version > 500000) 493254721Semaste struct thread *p; 494254721Semaste#else 495254721Semaste struct proc *p; 496254721Semaste#endif 497254721Semaste{ 498254721Semaste rio_queue_t *rqp; 499254721Semaste int err, error = 0; 500254721Semaste 501254721Semaste while ((rqp = rio_list) != NULL) { 502254721Semaste /* destroy all */ 503254721Semaste err = rio_detach(rqp); 504254721Semaste if (err != 0 && error == 0) 505254721Semaste error = err; 506254721Semaste } 507254721Semaste 508254721Semaste return error; 509254721Semaste} 510254721Semaste 511254721Semasteint 512254721Semasterioioctl(dev, cmd, addr, flag, p) 513254721Semaste dev_t dev; 514254721Semaste ioctlcmd_t cmd; 515254721Semaste caddr_t addr; 516254721Semaste int flag; 517254721Semaste#if (__FreeBSD_version > 500000) 518254721Semaste struct thread *p; 519254721Semaste#else 520254721Semaste struct proc *p; 521254721Semaste#endif 522254721Semaste{ 523254721Semaste rio_queue_t *rqp; 524254721Semaste struct rio_interface *ifacep; 525254721Semaste struct ifnet *ifp; 526254721Semaste int error = 0; 527254721Semaste 528254721Semaste /* check super-user privilege */ 529254721Semaste switch (cmd) { 530254721Semaste case RIO_GETSTATS: 531254721Semaste break; 532254721Semaste default: 533254721Semaste#if (__FreeBSD_version > 700000) 534254721Semaste if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) 535254721Semaste return (error); 536254721Semaste#elsif (__FreeBSD_version > 400000) 537254721Semaste if ((error = suser(p)) != 0) 538254721Semaste return (error); 539254721Semaste#else 540254721Semaste if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 541254721Semaste return (error); 542254721Semaste#endif 543254721Semaste break; 544254721Semaste } 545254721Semaste 546254721Semaste switch (cmd) { 547254721Semaste 548254721Semaste case RIO_ENABLE: 549254721Semaste ifacep = (struct rio_interface *)addr; 550254721Semaste if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 551254721Semaste error = EBADF; 552254721Semaste break; 553254721Semaste } 554254721Semaste error = altq_enable(rqp->rq_ifq); 555254721Semaste break; 556254721Semaste 557254721Semaste case RIO_DISABLE: 558254721Semaste ifacep = (struct rio_interface *)addr; 559254721Semaste if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 560254721Semaste error = EBADF; 561254721Semaste break; 562254721Semaste } 563254721Semaste error = altq_disable(rqp->rq_ifq); 564254721Semaste break; 565254721Semaste 566254721Semaste case RIO_IF_ATTACH: 567254721Semaste ifp = ifunit(((struct rio_interface *)addr)->rio_ifname); 568254721Semaste if (ifp == NULL) { 569254721Semaste error = ENXIO; 570254721Semaste break; 571254721Semaste } 572254721Semaste 573254721Semaste /* allocate and initialize rio_queue_t */ 574254721Semaste rqp = malloc(sizeof(rio_queue_t), M_DEVBUF, M_WAITOK); 575254721Semaste if (rqp == NULL) { 576254721Semaste error = ENOMEM; 577254721Semaste break; 578254721Semaste } 579254721Semaste bzero(rqp, sizeof(rio_queue_t)); 580254721Semaste 581254721Semaste rqp->rq_q = malloc(sizeof(class_queue_t), 582254721Semaste M_DEVBUF, M_WAITOK); 583254721Semaste if (rqp->rq_q == NULL) { 584254721Semaste free(rqp, M_DEVBUF); 585254721Semaste error = ENOMEM; 586254721Semaste break; 587254721Semaste } 588254721Semaste bzero(rqp->rq_q, sizeof(class_queue_t)); 589254721Semaste 590254721Semaste rqp->rq_rio = rio_alloc(0, NULL, 0, 0); 591254721Semaste if (rqp->rq_rio == NULL) { 592254721Semaste free(rqp->rq_q, M_DEVBUF); 593254721Semaste free(rqp, M_DEVBUF); 594254721Semaste error = ENOMEM; 595254721Semaste break; 596254721Semaste } 597254721Semaste 598254721Semaste rqp->rq_ifq = &ifp->if_snd; 599254721Semaste qtail(rqp->rq_q) = NULL; 600254721Semaste qlen(rqp->rq_q) = 0; 601254721Semaste qlimit(rqp->rq_q) = RIO_LIMIT; 602254721Semaste qtype(rqp->rq_q) = Q_RIO; 603254721Semaste 604254721Semaste /* 605254721Semaste * set RIO to this ifnet structure. 606254721Semaste */ 607254721Semaste error = altq_attach(rqp->rq_ifq, ALTQT_RIO, rqp, 608254721Semaste rio_enqueue, rio_dequeue, rio_request, 609254721Semaste NULL, NULL); 610254721Semaste if (error) { 611254721Semaste rio_destroy(rqp->rq_rio); 612254721Semaste free(rqp->rq_q, M_DEVBUF); 613254721Semaste free(rqp, M_DEVBUF); 614254721Semaste break; 615254721Semaste } 616254721Semaste 617254721Semaste /* add this state to the rio list */ 618254721Semaste rqp->rq_next = rio_list; 619254721Semaste rio_list = rqp; 620254721Semaste break; 621254721Semaste 622254721Semaste case RIO_IF_DETACH: 623254721Semaste ifacep = (struct rio_interface *)addr; 624254721Semaste if ((rqp = altq_lookup(ifacep->rio_ifname, ALTQT_RIO)) == NULL) { 625254721Semaste error = EBADF; 626254721Semaste break; 627254721Semaste } 628254721Semaste error = rio_detach(rqp); 629254721Semaste break; 630254721Semaste 631254721Semaste case RIO_GETSTATS: 632254721Semaste do { 633254721Semaste struct rio_stats *q_stats; 634254721Semaste rio_t *rp; 635254721Semaste int i; 636254721Semaste 637254721Semaste q_stats = (struct rio_stats *)addr; 638254721Semaste if ((rqp = altq_lookup(q_stats->iface.rio_ifname, 639254721Semaste ALTQT_RIO)) == NULL) { 640254721Semaste error = EBADF; 641254721Semaste break; 642254721Semaste } 643254721Semaste 644254721Semaste rp = rqp->rq_rio; 645254721Semaste 646254721Semaste q_stats->q_limit = qlimit(rqp->rq_q); 647254721Semaste q_stats->weight = rp->rio_weight; 648254721Semaste q_stats->flags = rp->rio_flags; 649254721Semaste 650254721Semaste for (i = 0; i < RIO_NDROPPREC; i++) { 651254721Semaste q_stats->q_len[i] = rp->rio_precstate[i].qlen; 652254721Semaste bcopy(&rp->q_stats[i], &q_stats->q_stats[i], 653254721Semaste sizeof(struct redstats)); 654263363Semaste q_stats->q_stats[i].q_avg = 655254721Semaste rp->rio_precstate[i].avg >> rp->rio_wshift; 656254721Semaste 657254721Semaste q_stats->q_params[i].inv_pmax 658254721Semaste = rp->rio_precstate[i].inv_pmax; 659254721Semaste q_stats->q_params[i].th_min 660254721Semaste = rp->rio_precstate[i].th_min; 661254721Semaste q_stats->q_params[i].th_max 662254721Semaste = rp->rio_precstate[i].th_max; 663254721Semaste } 664254721Semaste } while (/*CONSTCOND*/ 0); 665254721Semaste break; 666254721Semaste 667254721Semaste case RIO_CONFIG: 668254721Semaste do { 669254721Semaste struct rio_conf *fc; 670254721Semaste rio_t *new; 671254721Semaste int s, limit, i; 672254721Semaste 673254721Semaste fc = (struct rio_conf *)addr; 674254721Semaste if ((rqp = altq_lookup(fc->iface.rio_ifname, 675254721Semaste ALTQT_RIO)) == NULL) { 676254721Semaste error = EBADF; 677254721Semaste break; 678254721Semaste } 679254721Semaste 680254721Semaste new = rio_alloc(fc->rio_weight, &fc->q_params[0], 681254721Semaste fc->rio_flags, fc->rio_pkttime); 682254721Semaste if (new == NULL) { 683254721Semaste error = ENOMEM; 684254721Semaste break; 685254721Semaste } 686254721Semaste 687254721Semaste#ifdef __NetBSD__ 688254721Semaste s = splnet(); 689254721Semaste#else 690254721Semaste s = splimp(); 691254721Semaste#endif 692254721Semaste _flushq(rqp->rq_q); 693254721Semaste limit = fc->rio_limit; 694254721Semaste if (limit < fc->q_params[RIO_NDROPPREC-1].th_max) 695254721Semaste limit = fc->q_params[RIO_NDROPPREC-1].th_max; 696254721Semaste qlimit(rqp->rq_q) = limit; 697254721Semaste 698254721Semaste rio_destroy(rqp->rq_rio); 699254721Semaste rqp->rq_rio = new; 700254721Semaste 701254721Semaste splx(s); 702254721Semaste 703254721Semaste /* write back new values */ 704254721Semaste fc->rio_limit = limit; 705254721Semaste for (i = 0; i < RIO_NDROPPREC; i++) { 706254721Semaste fc->q_params[i].inv_pmax = 707254721Semaste rqp->rq_rio->rio_precstate[i].inv_pmax; 708254721Semaste fc->q_params[i].th_min = 709254721Semaste rqp->rq_rio->rio_precstate[i].th_min; 710254721Semaste fc->q_params[i].th_max = 711254721Semaste rqp->rq_rio->rio_precstate[i].th_max; 712254721Semaste } 713254721Semaste } while (/*CONSTCOND*/ 0); 714254721Semaste break; 715254721Semaste 716254721Semaste case RIO_SETDEFAULTS: 717254721Semaste do { 718254721Semaste struct redparams *rp; 719254721Semaste int i; 720254721Semaste 721254721Semaste rp = (struct redparams *)addr; 722254721Semaste for (i = 0; i < RIO_NDROPPREC; i++) 723254721Semaste default_rio_params[i] = rp[i]; 724254721Semaste } while (/*CONSTCOND*/ 0); 725254721Semaste break; 726254721Semaste 727254721Semaste default: 728254721Semaste error = EINVAL; 729254721Semaste break; 730254721Semaste } 731254721Semaste 732254721Semaste return error; 733254721Semaste} 734254721Semaste 735254721Semastestatic int 736254721Semasterio_detach(rqp) 737254721Semaste rio_queue_t *rqp; 738254721Semaste{ 739254721Semaste rio_queue_t *tmp; 740254721Semaste int error = 0; 741254721Semaste 742254721Semaste if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 743254721Semaste altq_disable(rqp->rq_ifq); 744254721Semaste 745254721Semaste if ((error = altq_detach(rqp->rq_ifq))) 746254721Semaste return (error); 747254721Semaste 748254721Semaste if (rio_list == rqp) 749254721Semaste rio_list = rqp->rq_next; 750254721Semaste else { 751254721Semaste for (tmp = rio_list; tmp != NULL; tmp = tmp->rq_next) 752254721Semaste if (tmp->rq_next == rqp) { 753254721Semaste tmp->rq_next = rqp->rq_next; 754254721Semaste break; 755254721Semaste } 756254721Semaste if (tmp == NULL) 757254721Semaste printf("rio_detach: no state found in rio_list!\n"); 758254721Semaste } 759254721Semaste 760254721Semaste rio_destroy(rqp->rq_rio); 761254721Semaste free(rqp->rq_q, M_DEVBUF); 762254721Semaste free(rqp, M_DEVBUF); 763254721Semaste return (error); 764} 765 766/* 767 * rio support routines 768 */ 769static int 770rio_request(ifq, req, arg) 771 struct ifaltq *ifq; 772 int req; 773 void *arg; 774{ 775 rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 776 777 IFQ_LOCK_ASSERT(ifq); 778 779 switch (req) { 780 case ALTRQ_PURGE: 781 _flushq(rqp->rq_q); 782 if (ALTQ_IS_ENABLED(ifq)) 783 ifq->ifq_len = 0; 784 break; 785 } 786 return (0); 787} 788 789/* 790 * enqueue routine: 791 * 792 * returns: 0 when successfully queued. 793 * ENOBUFS when drop occurs. 794 */ 795static int 796rio_enqueue(ifq, m, pktattr) 797 struct ifaltq *ifq; 798 struct mbuf *m; 799 struct altq_pktattr *pktattr; 800{ 801 rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 802 int error = 0; 803 804 IFQ_LOCK_ASSERT(ifq); 805 806 if (rio_addq(rqp->rq_rio, rqp->rq_q, m, pktattr) == 0) 807 ifq->ifq_len++; 808 else 809 error = ENOBUFS; 810 return error; 811} 812 813/* 814 * dequeue routine: 815 * must be called in splimp. 816 * 817 * returns: mbuf dequeued. 818 * NULL when no packet is available in the queue. 819 */ 820 821static struct mbuf * 822rio_dequeue(ifq, op) 823 struct ifaltq *ifq; 824 int op; 825{ 826 rio_queue_t *rqp = (rio_queue_t *)ifq->altq_disc; 827 struct mbuf *m = NULL; 828 829 IFQ_LOCK_ASSERT(ifq); 830 831 if (op == ALTDQ_POLL) 832 return qhead(rqp->rq_q); 833 834 m = rio_getq(rqp->rq_rio, rqp->rq_q); 835 if (m != NULL) 836 ifq->ifq_len--; 837 return m; 838} 839 840#ifdef KLD_MODULE 841 842static struct altqsw rio_sw = 843 {"rio", rioopen, rioclose, rioioctl}; 844 845ALTQ_MODULE(altq_rio, ALTQT_RIO, &rio_sw); 846MODULE_VERSION(altq_rio, 1); 847MODULE_DEPEND(altq_rio, altq_red, 1, 1, 1); 848 849#endif /* KLD_MODULE */ 850#endif /* ALTQ3_COMPAT */ 851 852#endif /* ALTQ_RIO */ 853