altq_red.c revision 184205
1/* $FreeBSD: head/sys/contrib/altq/altq/altq_red.c 184205 2008-10-23 15:53:51Z des $ */ 2/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */ 3 4/* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30/* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63#if defined(__FreeBSD__) || defined(__NetBSD__) 64#include "opt_altq.h" 65#if (__FreeBSD__ != 2) 66#include "opt_inet.h" 67#ifdef __FreeBSD__ 68#include "opt_inet6.h" 69#endif 70#endif 71#endif /* __FreeBSD__ || __NetBSD__ */ 72#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 73 74#include <sys/param.h> 75#include <sys/malloc.h> 76#include <sys/mbuf.h> 77#include <sys/socket.h> 78#include <sys/systm.h> 79#include <sys/errno.h> 80#if 1 /* ALTQ3_COMPAT */ 81#include <sys/sockio.h> 82#include <sys/proc.h> 83#include <sys/kernel.h> 84#ifdef ALTQ_FLOWVALVE 85#include <sys/queue.h> 86#include <sys/time.h> 87#endif 88#endif /* ALTQ3_COMPAT */ 89 90#include <net/if.h> 91 92#include <netinet/in.h> 93#include <netinet/in_systm.h> 94#include <netinet/ip.h> 95#ifdef INET6 96#include <netinet/ip6.h> 97#endif 98 99#include <net/pfvar.h> 100#include <altq/altq.h> 101#include <altq/altq_red.h> 102#ifdef ALTQ3_COMPAT 103#include <altq/altq_conf.h> 104#ifdef ALTQ_FLOWVALVE 105#include <altq/altq_flowvalve.h> 106#endif 107#endif 108 109/* 110 * ALTQ/RED (Random Early Detection) implementation using 32-bit 111 * fixed-point calculation. 112 * 113 * written by kjc using the ns code as a reference. 114 * you can learn more about red and ns from Sally's home page at 115 * http://www-nrg.ee.lbl.gov/floyd/ 116 * 117 * most of the red parameter values are fixed in this implementation 118 * to prevent fixed-point overflow/underflow. 119 * if you change the parameters, watch out for overflow/underflow! 120 * 121 * the parameters used are recommended values by Sally. 122 * the corresponding ns config looks: 123 * q_weight=0.00195 124 * minthresh=5 maxthresh=15 queue-size=60 125 * linterm=30 126 * dropmech=drop-tail 127 * bytes=false (can't be handled by 32-bit fixed-point) 128 * doubleq=false dqthresh=false 129 * wait=true 130 */ 131/* 132 * alternative red parameters for a slow link. 133 * 134 * assume the queue length becomes from zero to L and keeps L, it takes 135 * N packets for q_avg to reach 63% of L. 136 * when q_weight is 0.002, N is about 500 packets. 137 * for a slow link like dial-up, 500 packets takes more than 1 minute! 138 * when q_weight is 0.008, N is about 127 packets. 139 * when q_weight is 0.016, N is about 63 packets. 140 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 141 * are allowed for 0.016. 142 * see Sally's paper for more details. 143 */ 144/* normal red parameters */ 145#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 146 /* q_weight = 0.00195 */ 147 148/* red parameters for a slow link */ 149#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 150 /* q_weight = 0.0078125 */ 151 152/* red parameters for a very slow link (e.g., dialup) */ 153#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 154 /* q_weight = 0.015625 */ 155 156/* fixed-point uses 12-bit decimal places */ 157#define FP_SHIFT 12 /* fixed-point shift */ 158 159/* red parameters for drop probability */ 160#define INV_P_MAX 10 /* inverse of max drop probability */ 161#define TH_MIN 5 /* min threshold */ 162#define TH_MAX 15 /* max threshold */ 163 164#define RED_LIMIT 60 /* default max queue lenght */ 165#define RED_STATS /* collect statistics */ 166 167/* 168 * our default policy for forced-drop is drop-tail. 169 * (in altq-1.1.2 or earlier, the default was random-drop. 170 * but it makes more sense to punish the cause of the surge.) 171 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 172 */ 173 174#ifdef ALTQ3_COMPAT 175#ifdef ALTQ_FLOWVALVE 176/* 177 * flow-valve is an extention to protect red from unresponsive flows 178 * and to promote end-to-end congestion control. 179 * flow-valve observes the average drop rates of the flows that have 180 * experienced packet drops in the recent past. 181 * when the average drop rate exceeds the threshold, the flow is 182 * blocked by the flow-valve. the trapped flow should back off 183 * exponentially to escape from the flow-valve. 184 */ 185#ifdef RED_RANDOM_DROP 186#error "random-drop can't be used with flow-valve!" 187#endif 188#endif /* ALTQ_FLOWVALVE */ 189 190/* red_list keeps all red_queue_t's allocated. */ 191static red_queue_t *red_list = NULL; 192 193#endif /* ALTQ3_COMPAT */ 194 195/* default red parameter values */ 196static int default_th_min = TH_MIN; 197static int default_th_max = TH_MAX; 198static int default_inv_pmax = INV_P_MAX; 199 200#ifdef ALTQ3_COMPAT 201/* internal function prototypes */ 202static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 203static struct mbuf *red_dequeue(struct ifaltq *, int); 204static int red_request(struct ifaltq *, int, void *); 205static void red_purgeq(red_queue_t *); 206static int red_detach(red_queue_t *); 207#ifdef ALTQ_FLOWVALVE 208static __inline struct fve *flowlist_lookup(struct flowvalve *, 209 struct altq_pktattr *, struct timeval *); 210static __inline struct fve *flowlist_reclaim(struct flowvalve *, 211 struct altq_pktattr *); 212static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *); 213static __inline int fv_p2f(struct flowvalve *, int); 214#if 0 /* XXX: make the compiler happy (fv_alloc unused) */ 215static struct flowvalve *fv_alloc(struct red *); 216#endif 217static void fv_destroy(struct flowvalve *); 218static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, 219 struct fve **); 220static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, 221 struct fve *); 222#endif 223#endif /* ALTQ3_COMPAT */ 224 225/* 226 * red support routines 227 */ 228red_t * 229red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, 230 int pkttime) 231{ 232 red_t *rp; 233 int w, i; 234 int npkts_per_sec; 235 236 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK); 237 if (rp == NULL) 238 return (NULL); 239 bzero(rp, sizeof(red_t)); 240 241 rp->red_avg = 0; 242 rp->red_idle = 1; 243 244 if (weight == 0) 245 rp->red_weight = W_WEIGHT; 246 else 247 rp->red_weight = weight; 248 if (inv_pmax == 0) 249 rp->red_inv_pmax = default_inv_pmax; 250 else 251 rp->red_inv_pmax = inv_pmax; 252 if (th_min == 0) 253 rp->red_thmin = default_th_min; 254 else 255 rp->red_thmin = th_min; 256 if (th_max == 0) 257 rp->red_thmax = default_th_max; 258 else 259 rp->red_thmax = th_max; 260 261 rp->red_flags = flags; 262 263 if (pkttime == 0) 264 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 265 rp->red_pkttime = 800; 266 else 267 rp->red_pkttime = pkttime; 268 269 if (weight == 0) { 270 /* when the link is very slow, adjust red parameters */ 271 npkts_per_sec = 1000000 / rp->red_pkttime; 272 if (npkts_per_sec < 50) { 273 /* up to about 400Kbps */ 274 rp->red_weight = W_WEIGHT_2; 275 } else if (npkts_per_sec < 300) { 276 /* up to about 2.4Mbps */ 277 rp->red_weight = W_WEIGHT_1; 278 } 279 } 280 281 /* calculate wshift. weight must be power of 2 */ 282 w = rp->red_weight; 283 for (i = 0; w > 1; i++) 284 w = w >> 1; 285 rp->red_wshift = i; 286 w = 1 << rp->red_wshift; 287 if (w != rp->red_weight) { 288 printf("invalid weight value %d for red! use %d\n", 289 rp->red_weight, w); 290 rp->red_weight = w; 291 } 292 293 /* 294 * thmin_s and thmax_s are scaled versions of th_min and th_max 295 * to be compared with avg. 296 */ 297 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 298 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 299 300 /* 301 * precompute probability denominator 302 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 303 */ 304 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 305 * rp->red_inv_pmax) << FP_SHIFT; 306 307 /* allocate weight table */ 308 rp->red_wtab = wtab_alloc(rp->red_weight); 309 310 microtime(&rp->red_last); 311 return (rp); 312} 313 314void 315red_destroy(red_t *rp) 316{ 317#ifdef ALTQ3_COMPAT 318#ifdef ALTQ_FLOWVALVE 319 if (rp->red_flowvalve != NULL) 320 fv_destroy(rp->red_flowvalve); 321#endif 322#endif /* ALTQ3_COMPAT */ 323 wtab_destroy(rp->red_wtab); 324 free(rp, M_DEVBUF); 325} 326 327void 328red_getstats(red_t *rp, struct redstats *sp) 329{ 330 sp->q_avg = rp->red_avg >> rp->red_wshift; 331 sp->xmit_cnt = rp->red_stats.xmit_cnt; 332 sp->drop_cnt = rp->red_stats.drop_cnt; 333 sp->drop_forced = rp->red_stats.drop_forced; 334 sp->drop_unforced = rp->red_stats.drop_unforced; 335 sp->marked_packets = rp->red_stats.marked_packets; 336} 337 338int 339red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, 340 struct altq_pktattr *pktattr) 341{ 342 int avg, droptype; 343 int n; 344#ifdef ALTQ3_COMPAT 345#ifdef ALTQ_FLOWVALVE 346 struct fve *fve = NULL; 347 348 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 349 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 350 m_freem(m); 351 return (-1); 352 } 353#endif 354#endif /* ALTQ3_COMPAT */ 355 356 avg = rp->red_avg; 357 358 /* 359 * if we were idle, we pretend that n packets arrived during 360 * the idle period. 361 */ 362 if (rp->red_idle) { 363 struct timeval now; 364 int t; 365 366 rp->red_idle = 0; 367 microtime(&now); 368 t = (now.tv_sec - rp->red_last.tv_sec); 369 if (t > 60) { 370 /* 371 * being idle for more than 1 minute, set avg to zero. 372 * this prevents t from overflow. 373 */ 374 avg = 0; 375 } else { 376 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 377 n = t / rp->red_pkttime - 1; 378 379 /* the following line does (avg = (1 - Wq)^n * avg) */ 380 if (n > 0) 381 avg = (avg >> FP_SHIFT) * 382 pow_w(rp->red_wtab, n); 383 } 384 } 385 386 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 387 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 388 rp->red_avg = avg; /* save the new value */ 389 390 /* 391 * red_count keeps a tally of arriving traffic that has not 392 * been dropped. 393 */ 394 rp->red_count++; 395 396 /* see if we drop early */ 397 droptype = DTYPE_NODROP; 398 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 399 if (avg >= rp->red_thmax_s) { 400 /* avg >= th_max: forced drop */ 401 droptype = DTYPE_FORCED; 402 } else if (rp->red_old == 0) { 403 /* first exceeds th_min */ 404 rp->red_count = 1; 405 rp->red_old = 1; 406 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 407 rp->red_probd, rp->red_count)) { 408 /* mark or drop by red */ 409 if ((rp->red_flags & REDF_ECN) && 410 mark_ecn(m, pktattr, rp->red_flags)) { 411 /* successfully marked. do not drop. */ 412 rp->red_count = 0; 413#ifdef RED_STATS 414 rp->red_stats.marked_packets++; 415#endif 416 } else { 417 /* unforced drop by red */ 418 droptype = DTYPE_EARLY; 419 } 420 } 421 } else { 422 /* avg < th_min */ 423 rp->red_old = 0; 424 } 425 426 /* 427 * if the queue length hits the hard limit, it's a forced drop. 428 */ 429 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 430 droptype = DTYPE_FORCED; 431 432#ifdef RED_RANDOM_DROP 433 /* if successful or forced drop, enqueue this packet. */ 434 if (droptype != DTYPE_EARLY) 435 _addq(q, m); 436#else 437 /* if successful, enqueue this packet. */ 438 if (droptype == DTYPE_NODROP) 439 _addq(q, m); 440#endif 441 if (droptype != DTYPE_NODROP) { 442 if (droptype == DTYPE_EARLY) { 443 /* drop the incoming packet */ 444#ifdef RED_STATS 445 rp->red_stats.drop_unforced++; 446#endif 447 } else { 448 /* forced drop, select a victim packet in the queue. */ 449#ifdef RED_RANDOM_DROP 450 m = _getq_random(q); 451#endif 452#ifdef RED_STATS 453 rp->red_stats.drop_forced++; 454#endif 455 } 456#ifdef RED_STATS 457 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 458#endif 459 rp->red_count = 0; 460#ifdef ALTQ3_COMPAT 461#ifdef ALTQ_FLOWVALVE 462 if (rp->red_flowvalve != NULL) 463 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 464#endif 465#endif /* ALTQ3_COMPAT */ 466 m_freem(m); 467 return (-1); 468 } 469 /* successfully queued */ 470#ifdef RED_STATS 471 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 472#endif 473 return (0); 474} 475 476/* 477 * early-drop probability is calculated as follows: 478 * prob = p_max * (avg - th_min) / (th_max - th_min) 479 * prob_a = prob / (2 - count*prob) 480 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 481 * here prob_a increases as successive undrop count increases. 482 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 483 * becomes 1 when (count >= (2 / prob))). 484 */ 485int 486drop_early(int fp_len, int fp_probd, int count) 487{ 488 int d; /* denominator of drop-probability */ 489 490 d = fp_probd - count * fp_len; 491 if (d <= 0) 492 /* count exceeds the hard limit: drop or mark */ 493 return (1); 494 495 /* 496 * now the range of d is [1..600] in fixed-point. (when 497 * th_max-th_min=10 and p_max=1/30) 498 * drop probability = (avg - TH_MIN) / d 499 */ 500 501 if ((arc4random() % d) < fp_len) { 502 /* drop or mark */ 503 return (1); 504 } 505 /* no drop/mark */ 506 return (0); 507} 508 509/* 510 * try to mark CE bit to the packet. 511 * returns 1 if successfully marked, 0 otherwise. 512 */ 513int 514mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 515{ 516 struct mbuf *m0; 517 struct pf_mtag *at; 518 void *hdr; 519 int af; 520 521 at = pf_find_mtag(m); 522 if (at != NULL) { 523 af = at->af; 524 hdr = at->hdr; 525#ifdef ALTQ3_COMPAT 526 } else if (pktattr != NULL) { 527 af = pktattr->pattr_af; 528 hdr = pktattr->pattr_hdr; 529#endif /* ALTQ3_COMPAT */ 530 } else 531 return (0); 532 533 if (af != AF_INET && af != AF_INET6) 534 return (0); 535 536 /* verify that pattr_hdr is within the mbuf data */ 537 for (m0 = m; m0 != NULL; m0 = m0->m_next) 538 if (((caddr_t)hdr >= m0->m_data) && 539 ((caddr_t)hdr < m0->m_data + m0->m_len)) 540 break; 541 if (m0 == NULL) { 542 /* ick, tag info is stale */ 543 return (0); 544 } 545 546 switch (af) { 547 case AF_INET: 548 if (flags & REDF_ECN4) { 549 struct ip *ip = hdr; 550 u_int8_t otos; 551 int sum; 552 553 if (ip->ip_v != 4) 554 return (0); /* version mismatch! */ 555 556 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 557 return (0); /* not-ECT */ 558 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 559 return (1); /* already marked */ 560 561 /* 562 * ecn-capable but not marked, 563 * mark CE and update checksum 564 */ 565 otos = ip->ip_tos; 566 ip->ip_tos |= IPTOS_ECN_CE; 567 /* 568 * update checksum (from RFC1624) 569 * HC' = ~(~HC + ~m + m') 570 */ 571 sum = ~ntohs(ip->ip_sum) & 0xffff; 572 sum += (~otos & 0xffff) + ip->ip_tos; 573 sum = (sum >> 16) + (sum & 0xffff); 574 sum += (sum >> 16); /* add carry */ 575 ip->ip_sum = htons(~sum & 0xffff); 576 return (1); 577 } 578 break; 579#ifdef INET6 580 case AF_INET6: 581 if (flags & REDF_ECN6) { 582 struct ip6_hdr *ip6 = hdr; 583 u_int32_t flowlabel; 584 585 flowlabel = ntohl(ip6->ip6_flow); 586 if ((flowlabel >> 28) != 6) 587 return (0); /* version mismatch! */ 588 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 589 (IPTOS_ECN_NOTECT << 20)) 590 return (0); /* not-ECT */ 591 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 592 (IPTOS_ECN_CE << 20)) 593 return (1); /* already marked */ 594 /* 595 * ecn-capable but not marked, mark CE 596 */ 597 flowlabel |= (IPTOS_ECN_CE << 20); 598 ip6->ip6_flow = htonl(flowlabel); 599 return (1); 600 } 601 break; 602#endif /* INET6 */ 603 } 604 605 /* not marked */ 606 return (0); 607} 608 609struct mbuf * 610red_getq(rp, q) 611 red_t *rp; 612 class_queue_t *q; 613{ 614 struct mbuf *m; 615 616 if ((m = _getq(q)) == NULL) { 617 if (rp->red_idle == 0) { 618 rp->red_idle = 1; 619 microtime(&rp->red_last); 620 } 621 return NULL; 622 } 623 624 rp->red_idle = 0; 625 return (m); 626} 627 628/* 629 * helper routine to calibrate avg during idle. 630 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 631 * here Wq = 1/weight and the code assumes Wq is close to zero. 632 * 633 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 634 */ 635static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 636 637struct wtab * 638wtab_alloc(int weight) 639{ 640 struct wtab *w; 641 int i; 642 643 for (w = wtab_list; w != NULL; w = w->w_next) 644 if (w->w_weight == weight) { 645 w->w_refcount++; 646 return (w); 647 } 648 649 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK); 650 if (w == NULL) 651 panic("wtab_alloc: malloc failed!"); 652 bzero(w, sizeof(struct wtab)); 653 w->w_weight = weight; 654 w->w_refcount = 1; 655 w->w_next = wtab_list; 656 wtab_list = w; 657 658 /* initialize the weight table */ 659 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 660 for (i = 1; i < 32; i++) { 661 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 662 if (w->w_tab[i] == 0 && w->w_param_max == 0) 663 w->w_param_max = 1 << i; 664 } 665 666 return (w); 667} 668 669int 670wtab_destroy(struct wtab *w) 671{ 672 struct wtab *prev; 673 674 if (--w->w_refcount > 0) 675 return (0); 676 677 if (wtab_list == w) 678 wtab_list = w->w_next; 679 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 680 if (prev->w_next == w) { 681 prev->w_next = w->w_next; 682 break; 683 } 684 685 free(w, M_DEVBUF); 686 return (0); 687} 688 689int32_t 690pow_w(struct wtab *w, int n) 691{ 692 int i, bit; 693 int32_t val; 694 695 if (n >= w->w_param_max) 696 return (0); 697 698 val = 1 << FP_SHIFT; 699 if (n <= 0) 700 return (val); 701 702 bit = 1; 703 i = 0; 704 while (n) { 705 if (n & bit) { 706 val = (val * w->w_tab[i]) >> FP_SHIFT; 707 n &= ~bit; 708 } 709 i++; 710 bit <<= 1; 711 } 712 return (val); 713} 714 715#ifdef ALTQ3_COMPAT 716/* 717 * red device interface 718 */ 719altqdev_decl(red); 720 721int 722redopen(dev, flag, fmt, p) 723 dev_t dev; 724 int flag, fmt; 725#if (__FreeBSD_version > 500000) 726 struct thread *p; 727#else 728 struct proc *p; 729#endif 730{ 731 /* everything will be done when the queueing scheme is attached. */ 732 return 0; 733} 734 735int 736redclose(dev, flag, fmt, p) 737 dev_t dev; 738 int flag, fmt; 739#if (__FreeBSD_version > 500000) 740 struct thread *p; 741#else 742 struct proc *p; 743#endif 744{ 745 red_queue_t *rqp; 746 int err, error = 0; 747 748 while ((rqp = red_list) != NULL) { 749 /* destroy all */ 750 err = red_detach(rqp); 751 if (err != 0 && error == 0) 752 error = err; 753 } 754 755 return error; 756} 757 758int 759redioctl(dev, cmd, addr, flag, p) 760 dev_t dev; 761 ioctlcmd_t cmd; 762 caddr_t addr; 763 int flag; 764#if (__FreeBSD_version > 500000) 765 struct thread *p; 766#else 767 struct proc *p; 768#endif 769{ 770 red_queue_t *rqp; 771 struct red_interface *ifacep; 772 struct ifnet *ifp; 773 int error = 0; 774 775 /* check super-user privilege */ 776 switch (cmd) { 777 case RED_GETSTATS: 778 break; 779 default: 780#if (__FreeBSD_version > 700000) 781 if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) 782#elsif (__FreeBSD_version > 400000) 783 if ((error = suser(p)) != 0) 784#else 785 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 786#endif 787 return (error); 788 break; 789 } 790 791 switch (cmd) { 792 793 case RED_ENABLE: 794 ifacep = (struct red_interface *)addr; 795 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 796 error = EBADF; 797 break; 798 } 799 error = altq_enable(rqp->rq_ifq); 800 break; 801 802 case RED_DISABLE: 803 ifacep = (struct red_interface *)addr; 804 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 805 error = EBADF; 806 break; 807 } 808 error = altq_disable(rqp->rq_ifq); 809 break; 810 811 case RED_IF_ATTACH: 812 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 813 if (ifp == NULL) { 814 error = ENXIO; 815 break; 816 } 817 818 /* allocate and initialize red_queue_t */ 819 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK); 820 if (rqp == NULL) { 821 error = ENOMEM; 822 break; 823 } 824 bzero(rqp, sizeof(red_queue_t)); 825 826 rqp->rq_q = malloc(sizeof(class_queue_t), 827 M_DEVBUF, M_WAITOK); 828 if (rqp->rq_q == NULL) { 829 free(rqp, M_DEVBUF); 830 error = ENOMEM; 831 break; 832 } 833 bzero(rqp->rq_q, sizeof(class_queue_t)); 834 835 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 836 if (rqp->rq_red == NULL) { 837 free(rqp->rq_q, M_DEVBUF); 838 free(rqp, M_DEVBUF); 839 error = ENOMEM; 840 break; 841 } 842 843 rqp->rq_ifq = &ifp->if_snd; 844 qtail(rqp->rq_q) = NULL; 845 qlen(rqp->rq_q) = 0; 846 qlimit(rqp->rq_q) = RED_LIMIT; 847 qtype(rqp->rq_q) = Q_RED; 848 849 /* 850 * set RED to this ifnet structure. 851 */ 852 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 853 red_enqueue, red_dequeue, red_request, 854 NULL, NULL); 855 if (error) { 856 red_destroy(rqp->rq_red); 857 free(rqp->rq_q, M_DEVBUF); 858 free(rqp, M_DEVBUF); 859 break; 860 } 861 862 /* add this state to the red list */ 863 rqp->rq_next = red_list; 864 red_list = rqp; 865 break; 866 867 case RED_IF_DETACH: 868 ifacep = (struct red_interface *)addr; 869 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 870 error = EBADF; 871 break; 872 } 873 error = red_detach(rqp); 874 break; 875 876 case RED_GETSTATS: 877 do { 878 struct red_stats *q_stats; 879 red_t *rp; 880 881 q_stats = (struct red_stats *)addr; 882 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 883 ALTQT_RED)) == NULL) { 884 error = EBADF; 885 break; 886 } 887 888 q_stats->q_len = qlen(rqp->rq_q); 889 q_stats->q_limit = qlimit(rqp->rq_q); 890 891 rp = rqp->rq_red; 892 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 893 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 894 q_stats->drop_cnt = rp->red_stats.drop_cnt; 895 q_stats->drop_forced = rp->red_stats.drop_forced; 896 q_stats->drop_unforced = rp->red_stats.drop_unforced; 897 q_stats->marked_packets = rp->red_stats.marked_packets; 898 899 q_stats->weight = rp->red_weight; 900 q_stats->inv_pmax = rp->red_inv_pmax; 901 q_stats->th_min = rp->red_thmin; 902 q_stats->th_max = rp->red_thmax; 903 904#ifdef ALTQ_FLOWVALVE 905 if (rp->red_flowvalve != NULL) { 906 struct flowvalve *fv = rp->red_flowvalve; 907 q_stats->fv_flows = fv->fv_flows; 908 q_stats->fv_pass = fv->fv_stats.pass; 909 q_stats->fv_predrop = fv->fv_stats.predrop; 910 q_stats->fv_alloc = fv->fv_stats.alloc; 911 q_stats->fv_escape = fv->fv_stats.escape; 912 } else { 913#endif /* ALTQ_FLOWVALVE */ 914 q_stats->fv_flows = 0; 915 q_stats->fv_pass = 0; 916 q_stats->fv_predrop = 0; 917 q_stats->fv_alloc = 0; 918 q_stats->fv_escape = 0; 919#ifdef ALTQ_FLOWVALVE 920 } 921#endif /* ALTQ_FLOWVALVE */ 922 } while (/*CONSTCOND*/ 0); 923 break; 924 925 case RED_CONFIG: 926 do { 927 struct red_conf *fc; 928 red_t *new; 929 int s, limit; 930 931 fc = (struct red_conf *)addr; 932 if ((rqp = altq_lookup(fc->iface.red_ifname, 933 ALTQT_RED)) == NULL) { 934 error = EBADF; 935 break; 936 } 937 new = red_alloc(fc->red_weight, 938 fc->red_inv_pmax, 939 fc->red_thmin, 940 fc->red_thmax, 941 fc->red_flags, 942 fc->red_pkttime); 943 if (new == NULL) { 944 error = ENOMEM; 945 break; 946 } 947 948#ifdef __NetBSD__ 949 s = splnet(); 950#else 951 s = splimp(); 952#endif 953 red_purgeq(rqp); 954 limit = fc->red_limit; 955 if (limit < fc->red_thmax) 956 limit = fc->red_thmax; 957 qlimit(rqp->rq_q) = limit; 958 fc->red_limit = limit; /* write back the new value */ 959 960 red_destroy(rqp->rq_red); 961 rqp->rq_red = new; 962 963 splx(s); 964 965 /* write back new values */ 966 fc->red_limit = limit; 967 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 968 fc->red_thmin = rqp->rq_red->red_thmin; 969 fc->red_thmax = rqp->rq_red->red_thmax; 970 971 } while (/*CONSTCOND*/ 0); 972 break; 973 974 case RED_SETDEFAULTS: 975 do { 976 struct redparams *rp; 977 978 rp = (struct redparams *)addr; 979 980 default_th_min = rp->th_min; 981 default_th_max = rp->th_max; 982 default_inv_pmax = rp->inv_pmax; 983 } while (/*CONSTCOND*/ 0); 984 break; 985 986 default: 987 error = EINVAL; 988 break; 989 } 990 return error; 991} 992 993static int 994red_detach(rqp) 995 red_queue_t *rqp; 996{ 997 red_queue_t *tmp; 998 int error = 0; 999 1000 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1001 altq_disable(rqp->rq_ifq); 1002 1003 if ((error = altq_detach(rqp->rq_ifq))) 1004 return (error); 1005 1006 if (red_list == rqp) 1007 red_list = rqp->rq_next; 1008 else { 1009 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 1010 if (tmp->rq_next == rqp) { 1011 tmp->rq_next = rqp->rq_next; 1012 break; 1013 } 1014 if (tmp == NULL) 1015 printf("red_detach: no state found in red_list!\n"); 1016 } 1017 1018 red_destroy(rqp->rq_red); 1019 free(rqp->rq_q, M_DEVBUF); 1020 free(rqp, M_DEVBUF); 1021 return (error); 1022} 1023 1024/* 1025 * enqueue routine: 1026 * 1027 * returns: 0 when successfully queued. 1028 * ENOBUFS when drop occurs. 1029 */ 1030static int 1031red_enqueue(ifq, m, pktattr) 1032 struct ifaltq *ifq; 1033 struct mbuf *m; 1034 struct altq_pktattr *pktattr; 1035{ 1036 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1037 1038 IFQ_LOCK_ASSERT(ifq); 1039 1040 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) 1041 return ENOBUFS; 1042 ifq->ifq_len++; 1043 return 0; 1044} 1045 1046/* 1047 * dequeue routine: 1048 * must be called in splimp. 1049 * 1050 * returns: mbuf dequeued. 1051 * NULL when no packet is available in the queue. 1052 */ 1053 1054static struct mbuf * 1055red_dequeue(ifq, op) 1056 struct ifaltq *ifq; 1057 int op; 1058{ 1059 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1060 struct mbuf *m; 1061 1062 IFQ_LOCK_ASSERT(ifq); 1063 1064 if (op == ALTDQ_POLL) 1065 return qhead(rqp->rq_q); 1066 1067 /* op == ALTDQ_REMOVE */ 1068 m = red_getq(rqp->rq_red, rqp->rq_q); 1069 if (m != NULL) 1070 ifq->ifq_len--; 1071 return (m); 1072} 1073 1074static int 1075red_request(ifq, req, arg) 1076 struct ifaltq *ifq; 1077 int req; 1078 void *arg; 1079{ 1080 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1081 1082 IFQ_LOCK_ASSERT(ifq); 1083 1084 switch (req) { 1085 case ALTRQ_PURGE: 1086 red_purgeq(rqp); 1087 break; 1088 } 1089 return (0); 1090} 1091 1092static void 1093red_purgeq(rqp) 1094 red_queue_t *rqp; 1095{ 1096 _flushq(rqp->rq_q); 1097 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1098 rqp->rq_ifq->ifq_len = 0; 1099} 1100 1101#ifdef ALTQ_FLOWVALVE 1102 1103#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1104#define FV_PSCALE(x) ((x) << FV_PSHIFT) 1105#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1106#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1107#define FV_FSCALE(x) ((x) << FV_FSHIFT) 1108#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1109 1110#define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1111#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1112 1113#define FV_N 10 /* update fve_f every FV_N packets */ 1114 1115#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1116#define FV_TTHRESH 3 /* time threshold to delete fve */ 1117#define FV_ALPHA 5 /* extra packet count */ 1118 1119#define FV_STATS 1120 1121#if (__FreeBSD_version > 300000) 1122#define FV_TIMESTAMP(tp) getmicrotime(tp) 1123#else 1124#define FV_TIMESTAMP(tp) { (*(tp)) = time; } 1125#endif 1126 1127/* 1128 * Brtt table: 127 entry table to convert drop rate (p) to 1129 * the corresponding bandwidth fraction (f) 1130 * the following equation is implemented to use scaled values, 1131 * fve_p and fve_f, in the fixed point format. 1132 * 1133 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1134 * f = Brtt(p) / (max_th + alpha) 1135 */ 1136#define BRTT_SIZE 128 1137#define BRTT_SHIFT 12 1138#define BRTT_MASK 0x0007f000 1139#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1140 1141const int brtt_tab[BRTT_SIZE] = { 1142 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1143 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1144 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1145 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1146 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1147 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1148 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1149 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1150 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1151 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1152 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1153 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1154 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1155 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1156 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1157 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1158}; 1159 1160static __inline struct fve * 1161flowlist_lookup(fv, pktattr, now) 1162 struct flowvalve *fv; 1163 struct altq_pktattr *pktattr; 1164 struct timeval *now; 1165{ 1166 struct fve *fve; 1167 int flows; 1168 struct ip *ip; 1169#ifdef INET6 1170 struct ip6_hdr *ip6; 1171#endif 1172 struct timeval tthresh; 1173 1174 if (pktattr == NULL) 1175 return (NULL); 1176 1177 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1178 flows = 0; 1179 /* 1180 * search the flow list 1181 */ 1182 switch (pktattr->pattr_af) { 1183 case AF_INET: 1184 ip = (struct ip *)pktattr->pattr_hdr; 1185 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1186 if (fve->fve_lastdrop.tv_sec == 0) 1187 break; 1188 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1189 fve->fve_lastdrop.tv_sec = 0; 1190 break; 1191 } 1192 if (fve->fve_flow.flow_af == AF_INET && 1193 fve->fve_flow.flow_ip.ip_src.s_addr == 1194 ip->ip_src.s_addr && 1195 fve->fve_flow.flow_ip.ip_dst.s_addr == 1196 ip->ip_dst.s_addr) 1197 return (fve); 1198 flows++; 1199 } 1200 break; 1201#ifdef INET6 1202 case AF_INET6: 1203 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1204 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1205 if (fve->fve_lastdrop.tv_sec == 0) 1206 break; 1207 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1208 fve->fve_lastdrop.tv_sec = 0; 1209 break; 1210 } 1211 if (fve->fve_flow.flow_af == AF_INET6 && 1212 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1213 &ip6->ip6_src) && 1214 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1215 &ip6->ip6_dst)) 1216 return (fve); 1217 flows++; 1218 } 1219 break; 1220#endif /* INET6 */ 1221 1222 default: 1223 /* unknown protocol. no drop. */ 1224 return (NULL); 1225 } 1226 fv->fv_flows = flows; /* save the number of active fve's */ 1227 return (NULL); 1228} 1229 1230static __inline struct fve * 1231flowlist_reclaim(fv, pktattr) 1232 struct flowvalve *fv; 1233 struct altq_pktattr *pktattr; 1234{ 1235 struct fve *fve; 1236 struct ip *ip; 1237#ifdef INET6 1238 struct ip6_hdr *ip6; 1239#endif 1240 1241 /* 1242 * get an entry from the tail of the LRU list. 1243 */ 1244 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1245 1246 switch (pktattr->pattr_af) { 1247 case AF_INET: 1248 ip = (struct ip *)pktattr->pattr_hdr; 1249 fve->fve_flow.flow_af = AF_INET; 1250 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1251 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1252 break; 1253#ifdef INET6 1254 case AF_INET6: 1255 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1256 fve->fve_flow.flow_af = AF_INET6; 1257 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1258 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1259 break; 1260#endif 1261 } 1262 1263 fve->fve_state = Green; 1264 fve->fve_p = 0.0; 1265 fve->fve_f = 0.0; 1266 fve->fve_ifseq = fv->fv_ifseq - 1; 1267 fve->fve_count = 0; 1268 1269 fv->fv_flows++; 1270#ifdef FV_STATS 1271 fv->fv_stats.alloc++; 1272#endif 1273 return (fve); 1274} 1275 1276static __inline void 1277flowlist_move_to_head(fv, fve) 1278 struct flowvalve *fv; 1279 struct fve *fve; 1280{ 1281 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1282 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1283 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1284 } 1285} 1286 1287#if 0 /* XXX: make the compiler happy (fv_alloc unused) */ 1288/* 1289 * allocate flowvalve structure 1290 */ 1291static struct flowvalve * 1292fv_alloc(rp) 1293 struct red *rp; 1294{ 1295 struct flowvalve *fv; 1296 struct fve *fve; 1297 int i, num; 1298 1299 num = FV_FLOWLISTSIZE; 1300 fv = malloc(sizeof(struct flowvalve), 1301 M_DEVBUF, M_WAITOK); 1302 if (fv == NULL) 1303 return (NULL); 1304 bzero(fv, sizeof(struct flowvalve)); 1305 1306 fv->fv_fves = malloc(sizeof(struct fve) * num, 1307 M_DEVBUF, M_WAITOK); 1308 if (fv->fv_fves == NULL) { 1309 free(fv, M_DEVBUF); 1310 return (NULL); 1311 } 1312 bzero(fv->fv_fves, sizeof(struct fve) * num); 1313 1314 fv->fv_flows = 0; 1315 TAILQ_INIT(&fv->fv_flowlist); 1316 for (i = 0; i < num; i++) { 1317 fve = &fv->fv_fves[i]; 1318 fve->fve_lastdrop.tv_sec = 0; 1319 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1320 } 1321 1322 /* initialize drop rate threshold in scaled fixed-point */ 1323 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1324 1325 /* initialize drop rate to fraction table */ 1326 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, 1327 M_DEVBUF, M_WAITOK); 1328 if (fv->fv_p2ftab == NULL) { 1329 free(fv->fv_fves, M_DEVBUF); 1330 free(fv, M_DEVBUF); 1331 return (NULL); 1332 } 1333 /* 1334 * create the p2f table. 1335 * (shift is used to keep the precision) 1336 */ 1337 for (i = 1; i < BRTT_SIZE; i++) { 1338 int f; 1339 1340 f = brtt_tab[i] << 8; 1341 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1342 } 1343 1344 return (fv); 1345} 1346#endif 1347 1348static void fv_destroy(fv) 1349 struct flowvalve *fv; 1350{ 1351 free(fv->fv_p2ftab, M_DEVBUF); 1352 free(fv->fv_fves, M_DEVBUF); 1353 free(fv, M_DEVBUF); 1354} 1355 1356static __inline int 1357fv_p2f(fv, p) 1358 struct flowvalve *fv; 1359 int p; 1360{ 1361 int val, f; 1362 1363 if (p >= BRTT_PMAX) 1364 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1365 else if ((val = (p & BRTT_MASK))) 1366 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1367 else 1368 f = fv->fv_p2ftab[1]; 1369 return (f); 1370} 1371 1372/* 1373 * check if an arriving packet should be pre-dropped. 1374 * called from red_addq() when a packet arrives. 1375 * returns 1 when the packet should be pre-dropped. 1376 * should be called in splimp. 1377 */ 1378static int 1379fv_checkflow(fv, pktattr, fcache) 1380 struct flowvalve *fv; 1381 struct altq_pktattr *pktattr; 1382 struct fve **fcache; 1383{ 1384 struct fve *fve; 1385 struct timeval now; 1386 1387 fv->fv_ifseq++; 1388 FV_TIMESTAMP(&now); 1389 1390 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1391 /* no matching entry in the flowlist */ 1392 return (0); 1393 1394 *fcache = fve; 1395 1396 /* update fraction f for every FV_N packets */ 1397 if (++fve->fve_count == FV_N) { 1398 /* 1399 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1400 */ 1401 fve->fve_f = 1402 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1403 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1404 fve->fve_ifseq = fv->fv_ifseq; 1405 fve->fve_count = 0; 1406 } 1407 1408 /* 1409 * overpumping test 1410 */ 1411 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1412 int fthresh; 1413 1414 /* calculate a threshold */ 1415 fthresh = fv_p2f(fv, fve->fve_p); 1416 if (fve->fve_f > fthresh) 1417 fve->fve_state = Red; 1418 } 1419 1420 if (fve->fve_state == Red) { 1421 /* 1422 * backoff test 1423 */ 1424 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1425 /* no drop for at least FV_BACKOFFTHRESH sec */ 1426 fve->fve_p = 0; 1427 fve->fve_state = Green; 1428#ifdef FV_STATS 1429 fv->fv_stats.escape++; 1430#endif 1431 } else { 1432 /* block this flow */ 1433 flowlist_move_to_head(fv, fve); 1434 fve->fve_lastdrop = now; 1435#ifdef FV_STATS 1436 fv->fv_stats.predrop++; 1437#endif 1438 return (1); 1439 } 1440 } 1441 1442 /* 1443 * p = (1 - Wp) * p 1444 */ 1445 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1446 if (fve->fve_p < 0) 1447 fve->fve_p = 0; 1448#ifdef FV_STATS 1449 fv->fv_stats.pass++; 1450#endif 1451 return (0); 1452} 1453 1454/* 1455 * called from red_addq when a packet is dropped by red. 1456 * should be called in splimp. 1457 */ 1458static void fv_dropbyred(fv, pktattr, fcache) 1459 struct flowvalve *fv; 1460 struct altq_pktattr *pktattr; 1461 struct fve *fcache; 1462{ 1463 struct fve *fve; 1464 struct timeval now; 1465 1466 if (pktattr == NULL) 1467 return; 1468 FV_TIMESTAMP(&now); 1469 1470 if (fcache != NULL) 1471 /* the fve of this packet is already cached */ 1472 fve = fcache; 1473 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1474 fve = flowlist_reclaim(fv, pktattr); 1475 1476 flowlist_move_to_head(fv, fve); 1477 1478 /* 1479 * update p: the following line cancels the update 1480 * in fv_checkflow() and calculate 1481 * p = Wp + (1 - Wp) * p 1482 */ 1483 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1484 1485 fve->fve_lastdrop = now; 1486} 1487 1488#endif /* ALTQ_FLOWVALVE */ 1489 1490#ifdef KLD_MODULE 1491 1492static struct altqsw red_sw = 1493 {"red", redopen, redclose, redioctl}; 1494 1495ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1496MODULE_VERSION(altq_red, 1); 1497 1498#endif /* KLD_MODULE */ 1499#endif /* ALTQ3_COMPAT */ 1500 1501#endif /* ALTQ_RED */ 1502