1/* $NetBSD: altq_red.c,v 1.28 2008/06/18 09:06:27 yamt Exp $ */ 2/* $KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $ */ 3 4/* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30/* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63#include <sys/cdefs.h> 64__KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.28 2008/06/18 09:06:27 yamt Exp $"); 65 66#ifdef _KERNEL_OPT 67#include "opt_altq.h" 68#include "opt_inet.h" 69#include "pf.h" 70#endif 71 72#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 73 74#include <sys/param.h> 75#include <sys/malloc.h> 76#include <sys/mbuf.h> 77#include <sys/socket.h> 78#include <sys/systm.h> 79#include <sys/errno.h> 80#include <sys/kauth.h> 81#if 1 /* ALTQ3_COMPAT */ 82#include <sys/sockio.h> 83#include <sys/proc.h> 84#include <sys/kernel.h> 85#ifdef ALTQ_FLOWVALVE 86#include <sys/queue.h> 87#include <sys/time.h> 88#endif 89#endif /* ALTQ3_COMPAT */ 90#include <sys/cprng.h> 91 92#include <net/if.h> 93 94#include <netinet/in.h> 95#include <netinet/in_systm.h> 96#include <netinet/ip.h> 97#ifdef INET6 98#include <netinet/ip6.h> 99#endif 100 101#if NPF > 0 102#include <net/pfvar.h> 103#endif 104#include <altq/altq.h> 105#include <altq/altq_red.h> 106#ifdef ALTQ3_COMPAT 107#include <altq/altq_conf.h> 108#ifdef ALTQ_FLOWVALVE 109#include <altq/altq_flowvalve.h> 110#endif 111#endif 112 113/* 114 * ALTQ/RED (Random Early Detection) implementation using 32-bit 115 * fixed-point calculation. 116 * 117 * written by kjc using the ns code as a reference. 118 * you can learn more about red and ns from Sally's home page at 119 * http://www-nrg.ee.lbl.gov/floyd/ 120 * 121 * most of the red parameter values are fixed in this implementation 122 * to prevent fixed-point overflow/underflow. 123 * if you change the parameters, watch out for overflow/underflow! 124 * 125 * the parameters used are recommended values by Sally. 126 * the corresponding ns config looks: 127 * q_weight=0.00195 128 * minthresh=5 maxthresh=15 queue-size=60 129 * linterm=30 130 * dropmech=drop-tail 131 * bytes=false (can't be handled by 32-bit fixed-point) 132 * doubleq=false dqthresh=false 133 * wait=true 134 */ 135/* 136 * alternative red parameters for a slow link. 137 * 138 * assume the queue length becomes from zero to L and keeps L, it takes 139 * N packets for q_avg to reach 63% of L. 140 * when q_weight is 0.002, N is about 500 packets. 141 * for a slow link like dial-up, 500 packets takes more than 1 minute! 142 * when q_weight is 0.008, N is about 127 packets. 143 * when q_weight is 0.016, N is about 63 packets. 144 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 145 * are allowed for 0.016. 146 * see Sally's paper for more details. 147 */ 148/* normal red parameters */ 149#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 150 /* q_weight = 0.00195 */ 151 152/* red parameters for a slow link */ 153#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 154 /* q_weight = 0.0078125 */ 155 156/* red parameters for a very slow link (e.g., dialup) */ 157#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 158 /* q_weight = 0.015625 */ 159 160/* fixed-point uses 12-bit decimal places */ 161#define FP_SHIFT 12 /* fixed-point shift */ 162 163/* red parameters for drop probability */ 164#define INV_P_MAX 10 /* inverse of max drop probability */ 165#define TH_MIN 5 /* min threshold */ 166#define TH_MAX 15 /* max threshold */ 167 168#define RED_LIMIT 60 /* default max queue lenght */ 169#define RED_STATS /* collect statistics */ 170 171/* 172 * our default policy for forced-drop is drop-tail. 173 * (in altq-1.1.2 or earlier, the default was random-drop. 174 * but it makes more sense to punish the cause of the surge.) 175 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 176 */ 177 178#ifdef ALTQ3_COMPAT 179#ifdef ALTQ_FLOWVALVE 180/* 181 * flow-valve is an extention to protect red from unresponsive flows 182 * and to promote end-to-end congestion control. 183 * flow-valve observes the average drop rates of the flows that have 184 * experienced packet drops in the recent past. 185 * when the average drop rate exceeds the threshold, the flow is 186 * blocked by the flow-valve. the trapped flow should back off 187 * exponentially to escape from the flow-valve. 188 */ 189#ifdef RED_RANDOM_DROP 190#error "random-drop can't be used with flow-valve!" 191#endif 192#endif /* ALTQ_FLOWVALVE */ 193 194/* red_list keeps all red_queue_t's allocated. */ 195static red_queue_t *red_list = NULL; 196 197#endif /* ALTQ3_COMPAT */ 198 199/* default red parameter values */ 200static int default_th_min = TH_MIN; 201static int default_th_max = TH_MAX; 202static int default_inv_pmax = INV_P_MAX; 203 204#ifdef ALTQ3_COMPAT 205/* internal function prototypes */ 206static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 207static struct mbuf *red_dequeue(struct ifaltq *, int); 208static int red_request(struct ifaltq *, int, void *); 209static void red_purgeq(red_queue_t *); 210static int red_detach(red_queue_t *); 211#ifdef ALTQ_FLOWVALVE 212static inline struct fve *flowlist_lookup(struct flowvalve *, 213 struct altq_pktattr *, struct timeval *); 214static inline struct fve *flowlist_reclaim(struct flowvalve *, 215 struct altq_pktattr *); 216static inline void flowlist_move_to_head(struct flowvalve *, struct fve *); 217static inline int fv_p2f(struct flowvalve *, int); 218static struct flowvalve *fv_alloc(struct red *); 219static void fv_destroy(struct flowvalve *); 220static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, 221 struct fve **); 222static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, 223 struct fve *); 224#endif 225#endif /* ALTQ3_COMPAT */ 226 227/* 228 * red support routines 229 */ 230red_t * 231red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, 232 int pkttime) 233{ 234 red_t *rp; 235 int w, i; 236 int npkts_per_sec; 237 238 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO); 239 if (rp == NULL) 240 return (NULL); 241 242 rp->red_avg = 0; 243 rp->red_idle = 1; 244 245 if (weight == 0) 246 rp->red_weight = W_WEIGHT; 247 else 248 rp->red_weight = weight; 249 if (inv_pmax == 0) 250 rp->red_inv_pmax = default_inv_pmax; 251 else 252 rp->red_inv_pmax = inv_pmax; 253 if (th_min == 0) 254 rp->red_thmin = default_th_min; 255 else 256 rp->red_thmin = th_min; 257 if (th_max == 0) 258 rp->red_thmax = default_th_max; 259 else 260 rp->red_thmax = th_max; 261 262 rp->red_flags = flags; 263 264 if (pkttime == 0) 265 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 266 rp->red_pkttime = 800; 267 else 268 rp->red_pkttime = pkttime; 269 270 if (weight == 0) { 271 /* when the link is very slow, adjust red parameters */ 272 npkts_per_sec = 1000000 / rp->red_pkttime; 273 if (npkts_per_sec < 50) { 274 /* up to about 400Kbps */ 275 rp->red_weight = W_WEIGHT_2; 276 } else if (npkts_per_sec < 300) { 277 /* up to about 2.4Mbps */ 278 rp->red_weight = W_WEIGHT_1; 279 } 280 } 281 282 /* calculate wshift. weight must be power of 2 */ 283 w = rp->red_weight; 284 for (i = 0; w > 1; i++) 285 w = w >> 1; 286 rp->red_wshift = i; 287 w = 1 << rp->red_wshift; 288 if (w != rp->red_weight) { 289 printf("invalid weight value %d for red! use %d\n", 290 rp->red_weight, w); 291 rp->red_weight = w; 292 } 293 294 /* 295 * thmin_s and thmax_s are scaled versions of th_min and th_max 296 * to be compared with avg. 297 */ 298 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 299 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 300 301 /* 302 * precompute probability denominator 303 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 304 */ 305 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 306 * rp->red_inv_pmax) << FP_SHIFT; 307 308 /* allocate weight table */ 309 rp->red_wtab = wtab_alloc(rp->red_weight); 310 311 microtime(&rp->red_last); 312#ifdef ALTQ3_COMPAT 313#ifdef ALTQ_FLOWVALVE 314 if (flags & REDF_FLOWVALVE) 315 rp->red_flowvalve = fv_alloc(rp); 316 /* if fv_alloc failes, flowvalve is just disabled */ 317#endif 318#endif /* ALTQ3_COMPAT */ 319 return (rp); 320} 321 322void 323red_destroy(red_t *rp) 324{ 325#ifdef ALTQ3_COMPAT 326#ifdef ALTQ_FLOWVALVE 327 if (rp->red_flowvalve != NULL) 328 fv_destroy(rp->red_flowvalve); 329#endif 330#endif /* ALTQ3_COMPAT */ 331 wtab_destroy(rp->red_wtab); 332 free(rp, M_DEVBUF); 333} 334 335void 336red_getstats(red_t *rp, struct redstats *sp) 337{ 338 sp->q_avg = rp->red_avg >> rp->red_wshift; 339 sp->xmit_cnt = rp->red_stats.xmit_cnt; 340 sp->drop_cnt = rp->red_stats.drop_cnt; 341 sp->drop_forced = rp->red_stats.drop_forced; 342 sp->drop_unforced = rp->red_stats.drop_unforced; 343 sp->marked_packets = rp->red_stats.marked_packets; 344} 345 346int 347red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, 348 struct altq_pktattr *pktattr) 349{ 350 int avg, droptype; 351 int n; 352#ifdef ALTQ3_COMPAT 353#ifdef ALTQ_FLOWVALVE 354 struct fve *fve = NULL; 355 356 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 357 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 358 m_freem(m); 359 return (-1); 360 } 361#endif 362#endif /* ALTQ3_COMPAT */ 363 364 avg = rp->red_avg; 365 366 /* 367 * if we were idle, we pretend that n packets arrived during 368 * the idle period. 369 */ 370 if (rp->red_idle) { 371 struct timeval now; 372 int t; 373 374 rp->red_idle = 0; 375 microtime(&now); 376 t = (now.tv_sec - rp->red_last.tv_sec); 377 if (t > 60) { 378 /* 379 * being idle for more than 1 minute, set avg to zero. 380 * this prevents t from overflow. 381 */ 382 avg = 0; 383 } else { 384 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 385 n = t / rp->red_pkttime - 1; 386 387 /* the following line does (avg = (1 - Wq)^n * avg) */ 388 if (n > 0) 389 avg = (avg >> FP_SHIFT) * 390 pow_w(rp->red_wtab, n); 391 } 392 } 393 394 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 395 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 396 rp->red_avg = avg; /* save the new value */ 397 398 /* 399 * red_count keeps a tally of arriving traffic that has not 400 * been dropped. 401 */ 402 rp->red_count++; 403 404 /* see if we drop early */ 405 droptype = DTYPE_NODROP; 406 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 407 if (avg >= rp->red_thmax_s) { 408 /* avg >= th_max: forced drop */ 409 droptype = DTYPE_FORCED; 410 } else if (rp->red_old == 0) { 411 /* first exceeds th_min */ 412 rp->red_count = 1; 413 rp->red_old = 1; 414 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 415 rp->red_probd, rp->red_count)) { 416 /* mark or drop by red */ 417 if ((rp->red_flags & REDF_ECN) && 418 mark_ecn(m, pktattr, rp->red_flags)) { 419 /* successfully marked. do not drop. */ 420 rp->red_count = 0; 421#ifdef RED_STATS 422 rp->red_stats.marked_packets++; 423#endif 424 } else { 425 /* unforced drop by red */ 426 droptype = DTYPE_EARLY; 427 } 428 } 429 } else { 430 /* avg < th_min */ 431 rp->red_old = 0; 432 } 433 434 /* 435 * if the queue length hits the hard limit, it's a forced drop. 436 */ 437 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 438 droptype = DTYPE_FORCED; 439 440#ifdef RED_RANDOM_DROP 441 /* if successful or forced drop, enqueue this packet. */ 442 if (droptype != DTYPE_EARLY) 443 _addq(q, m); 444#else 445 /* if successful, enqueue this packet. */ 446 if (droptype == DTYPE_NODROP) 447 _addq(q, m); 448#endif 449 if (droptype != DTYPE_NODROP) { 450 if (droptype == DTYPE_EARLY) { 451 /* drop the incoming packet */ 452#ifdef RED_STATS 453 rp->red_stats.drop_unforced++; 454#endif 455 } else { 456 /* forced drop, select a victim packet in the queue. */ 457#ifdef RED_RANDOM_DROP 458 m = _getq_random(q); 459#endif 460#ifdef RED_STATS 461 rp->red_stats.drop_forced++; 462#endif 463 } 464#ifdef RED_STATS 465 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 466#endif 467 rp->red_count = 0; 468#ifdef ALTQ3_COMPAT 469#ifdef ALTQ_FLOWVALVE 470 if (rp->red_flowvalve != NULL) 471 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 472#endif 473#endif /* ALTQ3_COMPAT */ 474 m_freem(m); 475 return (-1); 476 } 477 /* successfully queued */ 478#ifdef RED_STATS 479 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 480#endif 481 return (0); 482} 483 484/* 485 * early-drop probability is calculated as follows: 486 * prob = p_max * (avg - th_min) / (th_max - th_min) 487 * prob_a = prob / (2 - count*prob) 488 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 489 * here prob_a increases as successive undrop count increases. 490 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 491 * becomes 1 when (count >= (2 / prob))). 492 */ 493int 494drop_early(int fp_len, int fp_probd, int count) 495{ 496 int d; /* denominator of drop-probability */ 497 498 d = fp_probd - count * fp_len; 499 if (d <= 0) 500 /* count exceeds the hard limit: drop or mark */ 501 return (1); 502 503 /* 504 * now the range of d is [1..600] in fixed-point. (when 505 * th_max-th_min=10 and p_max=1/30) 506 * drop probability = (avg - TH_MIN) / d 507 */ 508 509 if ((cprng_fast32() % d) < fp_len) { 510 /* drop or mark */ 511 return (1); 512 } 513 /* no drop/mark */ 514 return (0); 515} 516 517/* 518 * try to mark CE bit to the packet. 519 * returns 1 if successfully marked, 0 otherwise. 520 */ 521int 522mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 523{ 524 struct mbuf *m0; 525 struct m_tag *t; 526 struct altq_tag *at; 527 void *hdr; 528 int af; 529 530 t = m_tag_find(m, PACKET_TAG_ALTQ_QID, NULL); 531 if (t != NULL) { 532 at = (struct altq_tag *)(t + 1); 533 if (at == NULL) 534 return (0); 535 af = at->af; 536 hdr = at->hdr; 537#ifdef ALTQ3_COMPAT 538 } else if (pktattr != NULL) { 539 af = pktattr->pattr_af; 540 hdr = pktattr->pattr_hdr; 541#endif /* ALTQ3_COMPAT */ 542 } else 543 return (0); 544 545 if (af != AF_INET && af != AF_INET6) 546 return (0); 547 548 /* verify that pattr_hdr is within the mbuf data */ 549 for (m0 = m; m0 != NULL; m0 = m0->m_next) 550 if (((char *)hdr >= m0->m_data) && 551 ((char *)hdr < m0->m_data + m0->m_len)) 552 break; 553 if (m0 == NULL) { 554 /* ick, tag info is stale */ 555 return (0); 556 } 557 558 switch (af) { 559 case AF_INET: 560 if (flags & REDF_ECN4) { 561 struct ip *ip = hdr; 562 u_int8_t otos; 563 int sum; 564 565 if (ip->ip_v != 4) 566 return (0); /* version mismatch! */ 567 568 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 569 return (0); /* not-ECT */ 570 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 571 return (1); /* already marked */ 572 573 /* 574 * ecn-capable but not marked, 575 * mark CE and update checksum 576 */ 577 otos = ip->ip_tos; 578 ip->ip_tos |= IPTOS_ECN_CE; 579 /* 580 * update checksum (from RFC1624) 581 * HC' = ~(~HC + ~m + m') 582 */ 583 sum = ~ntohs(ip->ip_sum) & 0xffff; 584 sum += (~otos & 0xffff) + ip->ip_tos; 585 sum = (sum >> 16) + (sum & 0xffff); 586 sum += (sum >> 16); /* add carry */ 587 ip->ip_sum = htons(~sum & 0xffff); 588 return (1); 589 } 590 break; 591#ifdef INET6 592 case AF_INET6: 593 if (flags & REDF_ECN6) { 594 struct ip6_hdr *ip6 = hdr; 595 u_int32_t flowlabel; 596 597 flowlabel = ntohl(ip6->ip6_flow); 598 if ((flowlabel >> 28) != 6) 599 return (0); /* version mismatch! */ 600 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 601 (IPTOS_ECN_NOTECT << 20)) 602 return (0); /* not-ECT */ 603 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 604 (IPTOS_ECN_CE << 20)) 605 return (1); /* already marked */ 606 /* 607 * ecn-capable but not marked, mark CE 608 */ 609 flowlabel |= (IPTOS_ECN_CE << 20); 610 ip6->ip6_flow = htonl(flowlabel); 611 return (1); 612 } 613 break; 614#endif /* INET6 */ 615 } 616 617 /* not marked */ 618 return (0); 619} 620 621struct mbuf * 622red_getq(red_t *rp, class_queue_t *q) 623{ 624 struct mbuf *m; 625 626 if ((m = _getq(q)) == NULL) { 627 if (rp->red_idle == 0) { 628 rp->red_idle = 1; 629 microtime(&rp->red_last); 630 } 631 return NULL; 632 } 633 634 rp->red_idle = 0; 635 return (m); 636} 637 638/* 639 * helper routine to calibrate avg during idle. 640 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 641 * here Wq = 1/weight and the code assumes Wq is close to zero. 642 * 643 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 644 */ 645static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 646 647struct wtab * 648wtab_alloc(int weight) 649{ 650 struct wtab *w; 651 int i; 652 653 for (w = wtab_list; w != NULL; w = w->w_next) 654 if (w->w_weight == weight) { 655 w->w_refcount++; 656 return (w); 657 } 658 659 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO); 660 if (w == NULL) 661 panic("wtab_alloc: malloc failed!"); 662 w->w_weight = weight; 663 w->w_refcount = 1; 664 w->w_next = wtab_list; 665 wtab_list = w; 666 667 /* initialize the weight table */ 668 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 669 for (i = 1; i < 32; i++) { 670 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 671 if (w->w_tab[i] == 0 && w->w_param_max == 0) 672 w->w_param_max = 1 << i; 673 } 674 675 return (w); 676} 677 678int 679wtab_destroy(struct wtab *w) 680{ 681 struct wtab *prev; 682 683 if (--w->w_refcount > 0) 684 return (0); 685 686 if (wtab_list == w) 687 wtab_list = w->w_next; 688 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 689 if (prev->w_next == w) { 690 prev->w_next = w->w_next; 691 break; 692 } 693 694 free(w, M_DEVBUF); 695 return (0); 696} 697 698int32_t 699pow_w(struct wtab *w, int n) 700{ 701 int i, bit; 702 int32_t val; 703 704 if (n >= w->w_param_max) 705 return (0); 706 707 val = 1 << FP_SHIFT; 708 if (n <= 0) 709 return (val); 710 711 bit = 1; 712 i = 0; 713 while (n) { 714 if (n & bit) { 715 val = (val * w->w_tab[i]) >> FP_SHIFT; 716 n &= ~bit; 717 } 718 i++; 719 bit <<= 1; 720 } 721 return (val); 722} 723 724#ifdef ALTQ3_COMPAT 725/* 726 * red device interface 727 */ 728altqdev_decl(red); 729 730int 731redopen(dev_t dev, int flag, int fmt, 732 struct lwp *l) 733{ 734 /* everything will be done when the queueing scheme is attached. */ 735 return 0; 736} 737 738int 739redclose(dev_t dev, int flag, int fmt, 740 struct lwp *l) 741{ 742 red_queue_t *rqp; 743 int err, error = 0; 744 745 while ((rqp = red_list) != NULL) { 746 /* destroy all */ 747 err = red_detach(rqp); 748 if (err != 0 && error == 0) 749 error = err; 750 } 751 752 return error; 753} 754 755int 756redioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag, 757 struct lwp *l) 758{ 759 red_queue_t *rqp; 760 struct red_interface *ifacep; 761 struct ifnet *ifp; 762 struct proc *p = l->l_proc; 763 int error = 0; 764 765 /* check super-user privilege */ 766 switch (cmd) { 767 case RED_GETSTATS: 768 break; 769 default: 770#if (__FreeBSD_version > 400000) 771 if ((error = suser(p)) != 0) 772#else 773 if ((error = kauth_authorize_network(p->p_cred, 774 KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RED, NULL, 775 NULL, NULL)) != 0) 776#endif 777 return (error); 778 break; 779 } 780 781 switch (cmd) { 782 783 case RED_ENABLE: 784 ifacep = (struct red_interface *)addr; 785 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 786 error = EBADF; 787 break; 788 } 789 error = altq_enable(rqp->rq_ifq); 790 break; 791 792 case RED_DISABLE: 793 ifacep = (struct red_interface *)addr; 794 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 795 error = EBADF; 796 break; 797 } 798 error = altq_disable(rqp->rq_ifq); 799 break; 800 801 case RED_IF_ATTACH: 802 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 803 if (ifp == NULL) { 804 error = ENXIO; 805 break; 806 } 807 808 /* allocate and initialize red_queue_t */ 809 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO); 810 if (rqp == NULL) { 811 error = ENOMEM; 812 break; 813 } 814 815 rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF, 816 M_WAITOK|M_ZERO); 817 if (rqp->rq_q == NULL) { 818 free(rqp, M_DEVBUF); 819 error = ENOMEM; 820 break; 821 } 822 823 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 824 if (rqp->rq_red == NULL) { 825 free(rqp->rq_q, M_DEVBUF); 826 free(rqp, M_DEVBUF); 827 error = ENOMEM; 828 break; 829 } 830 831 rqp->rq_ifq = &ifp->if_snd; 832 qtail(rqp->rq_q) = NULL; 833 qlen(rqp->rq_q) = 0; 834 qlimit(rqp->rq_q) = RED_LIMIT; 835 qtype(rqp->rq_q) = Q_RED; 836 837 /* 838 * set RED to this ifnet structure. 839 */ 840 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 841 red_enqueue, red_dequeue, red_request, 842 NULL, NULL); 843 if (error) { 844 red_destroy(rqp->rq_red); 845 free(rqp->rq_q, M_DEVBUF); 846 free(rqp, M_DEVBUF); 847 break; 848 } 849 850 /* add this state to the red list */ 851 rqp->rq_next = red_list; 852 red_list = rqp; 853 break; 854 855 case RED_IF_DETACH: 856 ifacep = (struct red_interface *)addr; 857 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 858 error = EBADF; 859 break; 860 } 861 error = red_detach(rqp); 862 break; 863 864 case RED_GETSTATS: 865 do { 866 struct red_stats *q_stats; 867 red_t *rp; 868 869 q_stats = (struct red_stats *)addr; 870 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 871 ALTQT_RED)) == NULL) { 872 error = EBADF; 873 break; 874 } 875 876 q_stats->q_len = qlen(rqp->rq_q); 877 q_stats->q_limit = qlimit(rqp->rq_q); 878 879 rp = rqp->rq_red; 880 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 881 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 882 q_stats->drop_cnt = rp->red_stats.drop_cnt; 883 q_stats->drop_forced = rp->red_stats.drop_forced; 884 q_stats->drop_unforced = rp->red_stats.drop_unforced; 885 q_stats->marked_packets = rp->red_stats.marked_packets; 886 887 q_stats->weight = rp->red_weight; 888 q_stats->inv_pmax = rp->red_inv_pmax; 889 q_stats->th_min = rp->red_thmin; 890 q_stats->th_max = rp->red_thmax; 891 892#ifdef ALTQ_FLOWVALVE 893 if (rp->red_flowvalve != NULL) { 894 struct flowvalve *fv = rp->red_flowvalve; 895 q_stats->fv_flows = fv->fv_flows; 896 q_stats->fv_pass = fv->fv_stats.pass; 897 q_stats->fv_predrop = fv->fv_stats.predrop; 898 q_stats->fv_alloc = fv->fv_stats.alloc; 899 q_stats->fv_escape = fv->fv_stats.escape; 900 } else { 901#endif /* ALTQ_FLOWVALVE */ 902 q_stats->fv_flows = 0; 903 q_stats->fv_pass = 0; 904 q_stats->fv_predrop = 0; 905 q_stats->fv_alloc = 0; 906 q_stats->fv_escape = 0; 907#ifdef ALTQ_FLOWVALVE 908 } 909#endif /* ALTQ_FLOWVALVE */ 910 } while (/*CONSTCOND*/ 0); 911 break; 912 913 case RED_CONFIG: 914 do { 915 struct red_conf *fc; 916 red_t *new; 917 int s, limit; 918 919 fc = (struct red_conf *)addr; 920 if ((rqp = altq_lookup(fc->iface.red_ifname, 921 ALTQT_RED)) == NULL) { 922 error = EBADF; 923 break; 924 } 925 new = red_alloc(fc->red_weight, 926 fc->red_inv_pmax, 927 fc->red_thmin, 928 fc->red_thmax, 929 fc->red_flags, 930 fc->red_pkttime); 931 if (new == NULL) { 932 error = ENOMEM; 933 break; 934 } 935 936 s = splnet(); 937 red_purgeq(rqp); 938 limit = fc->red_limit; 939 if (limit < fc->red_thmax) 940 limit = fc->red_thmax; 941 qlimit(rqp->rq_q) = limit; 942 fc->red_limit = limit; /* write back the new value */ 943 944 red_destroy(rqp->rq_red); 945 rqp->rq_red = new; 946 947 splx(s); 948 949 /* write back new values */ 950 fc->red_limit = limit; 951 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 952 fc->red_thmin = rqp->rq_red->red_thmin; 953 fc->red_thmax = rqp->rq_red->red_thmax; 954 955 } while (/*CONSTCOND*/ 0); 956 break; 957 958 case RED_SETDEFAULTS: 959 do { 960 struct redparams *rp; 961 962 rp = (struct redparams *)addr; 963 964 default_th_min = rp->th_min; 965 default_th_max = rp->th_max; 966 default_inv_pmax = rp->inv_pmax; 967 } while (/*CONSTCOND*/ 0); 968 break; 969 970 default: 971 error = EINVAL; 972 break; 973 } 974 return error; 975} 976 977static int 978red_detach(red_queue_t *rqp) 979{ 980 red_queue_t *tmp; 981 int error = 0; 982 983 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 984 altq_disable(rqp->rq_ifq); 985 986 if ((error = altq_detach(rqp->rq_ifq))) 987 return (error); 988 989 if (red_list == rqp) 990 red_list = rqp->rq_next; 991 else { 992 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 993 if (tmp->rq_next == rqp) { 994 tmp->rq_next = rqp->rq_next; 995 break; 996 } 997 if (tmp == NULL) 998 printf("red_detach: no state found in red_list!\n"); 999 } 1000 1001 red_destroy(rqp->rq_red); 1002 free(rqp->rq_q, M_DEVBUF); 1003 free(rqp, M_DEVBUF); 1004 return (error); 1005} 1006 1007/* 1008 * enqueue routine: 1009 * 1010 * returns: 0 when successfully queued. 1011 * ENOBUFS when drop occurs. 1012 */ 1013static int 1014red_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) 1015{ 1016 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1017 1018 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) 1019 return ENOBUFS; 1020 ifq->ifq_len++; 1021 return 0; 1022} 1023 1024/* 1025 * dequeue routine: 1026 * must be called in splnet. 1027 * 1028 * returns: mbuf dequeued. 1029 * NULL when no packet is available in the queue. 1030 */ 1031 1032static struct mbuf * 1033red_dequeue(struct ifaltq *ifq, int op) 1034{ 1035 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1036 struct mbuf *m; 1037 1038 if (op == ALTDQ_POLL) 1039 return qhead(rqp->rq_q); 1040 1041 /* op == ALTDQ_REMOVE */ 1042 m = red_getq(rqp->rq_red, rqp->rq_q); 1043 if (m != NULL) 1044 ifq->ifq_len--; 1045 return (m); 1046} 1047 1048static int 1049red_request(struct ifaltq *ifq, int req, void *arg) 1050{ 1051 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1052 1053 switch (req) { 1054 case ALTRQ_PURGE: 1055 red_purgeq(rqp); 1056 break; 1057 } 1058 return (0); 1059} 1060 1061static void 1062red_purgeq(red_queue_t *rqp) 1063{ 1064 _flushq(rqp->rq_q); 1065 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1066 rqp->rq_ifq->ifq_len = 0; 1067} 1068 1069#ifdef ALTQ_FLOWVALVE 1070 1071#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1072#define FV_PSCALE(x) ((x) << FV_PSHIFT) 1073#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1074#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1075#define FV_FSCALE(x) ((x) << FV_FSHIFT) 1076#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1077 1078#define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1079#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1080 1081#define FV_N 10 /* update fve_f every FV_N packets */ 1082 1083#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1084#define FV_TTHRESH 3 /* time threshold to delete fve */ 1085#define FV_ALPHA 5 /* extra packet count */ 1086 1087#define FV_STATS 1088 1089#define FV_TIMESTAMP(tp) getmicrotime(tp) 1090 1091/* 1092 * Brtt table: 127 entry table to convert drop rate (p) to 1093 * the corresponding bandwidth fraction (f) 1094 * the following equation is implemented to use scaled values, 1095 * fve_p and fve_f, in the fixed point format. 1096 * 1097 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1098 * f = Brtt(p) / (max_th + alpha) 1099 */ 1100#define BRTT_SIZE 128 1101#define BRTT_SHIFT 12 1102#define BRTT_MASK 0x0007f000 1103#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1104 1105const int brtt_tab[BRTT_SIZE] = { 1106 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1107 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1108 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1109 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1110 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1111 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1112 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1113 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1114 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1115 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1116 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1117 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1118 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1119 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1120 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1121 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1122}; 1123 1124static inline struct fve * 1125flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr, 1126 struct timeval *now) 1127{ 1128 struct fve *fve; 1129 int flows; 1130 struct ip *ip; 1131#ifdef INET6 1132 struct ip6_hdr *ip6; 1133#endif 1134 struct timeval tthresh; 1135 1136 if (pktattr == NULL) 1137 return (NULL); 1138 1139 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1140 flows = 0; 1141 /* 1142 * search the flow list 1143 */ 1144 switch (pktattr->pattr_af) { 1145 case AF_INET: 1146 ip = (struct ip *)pktattr->pattr_hdr; 1147 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1148 if (fve->fve_lastdrop.tv_sec == 0) 1149 break; 1150 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1151 fve->fve_lastdrop.tv_sec = 0; 1152 break; 1153 } 1154 if (fve->fve_flow.flow_af == AF_INET && 1155 fve->fve_flow.flow_ip.ip_src.s_addr == 1156 ip->ip_src.s_addr && 1157 fve->fve_flow.flow_ip.ip_dst.s_addr == 1158 ip->ip_dst.s_addr) 1159 return (fve); 1160 flows++; 1161 } 1162 break; 1163#ifdef INET6 1164 case AF_INET6: 1165 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1166 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1167 if (fve->fve_lastdrop.tv_sec == 0) 1168 break; 1169 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1170 fve->fve_lastdrop.tv_sec = 0; 1171 break; 1172 } 1173 if (fve->fve_flow.flow_af == AF_INET6 && 1174 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1175 &ip6->ip6_src) && 1176 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1177 &ip6->ip6_dst)) 1178 return (fve); 1179 flows++; 1180 } 1181 break; 1182#endif /* INET6 */ 1183 1184 default: 1185 /* unknown protocol. no drop. */ 1186 return (NULL); 1187 } 1188 fv->fv_flows = flows; /* save the number of active fve's */ 1189 return (NULL); 1190} 1191 1192static inline struct fve * 1193flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr) 1194{ 1195 struct fve *fve; 1196 struct ip *ip; 1197#ifdef INET6 1198 struct ip6_hdr *ip6; 1199#endif 1200 1201 /* 1202 * get an entry from the tail of the LRU list. 1203 */ 1204 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1205 1206 switch (pktattr->pattr_af) { 1207 case AF_INET: 1208 ip = (struct ip *)pktattr->pattr_hdr; 1209 fve->fve_flow.flow_af = AF_INET; 1210 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1211 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1212 break; 1213#ifdef INET6 1214 case AF_INET6: 1215 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1216 fve->fve_flow.flow_af = AF_INET6; 1217 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1218 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1219 break; 1220#endif 1221 } 1222 1223 fve->fve_state = Green; 1224 fve->fve_p = 0.0; 1225 fve->fve_f = 0.0; 1226 fve->fve_ifseq = fv->fv_ifseq - 1; 1227 fve->fve_count = 0; 1228 1229 fv->fv_flows++; 1230#ifdef FV_STATS 1231 fv->fv_stats.alloc++; 1232#endif 1233 return (fve); 1234} 1235 1236static inline void 1237flowlist_move_to_head(struct flowvalve *fv, struct fve *fve) 1238{ 1239 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1240 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1241 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1242 } 1243} 1244 1245/* 1246 * allocate flowvalve structure 1247 */ 1248static struct flowvalve * 1249fv_alloc(struct red *rp) 1250{ 1251 struct flowvalve *fv; 1252 struct fve *fve; 1253 int i, num; 1254 1255 num = FV_FLOWLISTSIZE; 1256 fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO); 1257 if (fv == NULL) 1258 return (NULL); 1259 1260 fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF, 1261 M_WAITOK|M_ZERO); 1262 if (fv->fv_fves == NULL) { 1263 free(fv, M_DEVBUF); 1264 return (NULL); 1265 } 1266 1267 fv->fv_flows = 0; 1268 TAILQ_INIT(&fv->fv_flowlist); 1269 for (i = 0; i < num; i++) { 1270 fve = &fv->fv_fves[i]; 1271 fve->fve_lastdrop.tv_sec = 0; 1272 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1273 } 1274 1275 /* initialize drop rate threshold in scaled fixed-point */ 1276 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1277 1278 /* initialize drop rate to fraction table */ 1279 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK); 1280 if (fv->fv_p2ftab == NULL) { 1281 free(fv->fv_fves, M_DEVBUF); 1282 free(fv, M_DEVBUF); 1283 return (NULL); 1284 } 1285 /* 1286 * create the p2f table. 1287 * (shift is used to keep the precision) 1288 */ 1289 for (i = 1; i < BRTT_SIZE; i++) { 1290 int f; 1291 1292 f = brtt_tab[i] << 8; 1293 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1294 } 1295 1296 return (fv); 1297} 1298 1299static void 1300fv_destroy(struct flowvalve *fv) 1301{ 1302 free(fv->fv_p2ftab, M_DEVBUF); 1303 free(fv->fv_fves, M_DEVBUF); 1304 free(fv, M_DEVBUF); 1305} 1306 1307static inline int 1308fv_p2f(struct flowvalve *fv, int p) 1309{ 1310 int val, f; 1311 1312 if (p >= BRTT_PMAX) 1313 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1314 else if ((val = (p & BRTT_MASK))) 1315 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1316 else 1317 f = fv->fv_p2ftab[1]; 1318 return (f); 1319} 1320 1321/* 1322 * check if an arriving packet should be pre-dropped. 1323 * called from red_addq() when a packet arrives. 1324 * returns 1 when the packet should be pre-dropped. 1325 * should be called in splnet. 1326 */ 1327static int 1328fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr, 1329 struct fve **fcache) 1330{ 1331 struct fve *fve; 1332 struct timeval now; 1333 1334 fv->fv_ifseq++; 1335 FV_TIMESTAMP(&now); 1336 1337 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1338 /* no matching entry in the flowlist */ 1339 return (0); 1340 1341 *fcache = fve; 1342 1343 /* update fraction f for every FV_N packets */ 1344 if (++fve->fve_count == FV_N) { 1345 /* 1346 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1347 */ 1348 fve->fve_f = 1349 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1350 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1351 fve->fve_ifseq = fv->fv_ifseq; 1352 fve->fve_count = 0; 1353 } 1354 1355 /* 1356 * overpumping test 1357 */ 1358 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1359 int fthresh; 1360 1361 /* calculate a threshold */ 1362 fthresh = fv_p2f(fv, fve->fve_p); 1363 if (fve->fve_f > fthresh) 1364 fve->fve_state = Red; 1365 } 1366 1367 if (fve->fve_state == Red) { 1368 /* 1369 * backoff test 1370 */ 1371 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1372 /* no drop for at least FV_BACKOFFTHRESH sec */ 1373 fve->fve_p = 0; 1374 fve->fve_state = Green; 1375#ifdef FV_STATS 1376 fv->fv_stats.escape++; 1377#endif 1378 } else { 1379 /* block this flow */ 1380 flowlist_move_to_head(fv, fve); 1381 fve->fve_lastdrop = now; 1382#ifdef FV_STATS 1383 fv->fv_stats.predrop++; 1384#endif 1385 return (1); 1386 } 1387 } 1388 1389 /* 1390 * p = (1 - Wp) * p 1391 */ 1392 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1393 if (fve->fve_p < 0) 1394 fve->fve_p = 0; 1395#ifdef FV_STATS 1396 fv->fv_stats.pass++; 1397#endif 1398 return (0); 1399} 1400 1401/* 1402 * called from red_addq when a packet is dropped by red. 1403 * should be called in splnet. 1404 */ 1405static void 1406fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr, 1407 struct fve *fcache) 1408{ 1409 struct fve *fve; 1410 struct timeval now; 1411 1412 if (pktattr == NULL) 1413 return; 1414 FV_TIMESTAMP(&now); 1415 1416 if (fcache != NULL) 1417 /* the fve of this packet is already cached */ 1418 fve = fcache; 1419 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1420 fve = flowlist_reclaim(fv, pktattr); 1421 1422 flowlist_move_to_head(fv, fve); 1423 1424 /* 1425 * update p: the following line cancels the update 1426 * in fv_checkflow() and calculate 1427 * p = Wp + (1 - Wp) * p 1428 */ 1429 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1430 1431 fve->fve_lastdrop = now; 1432} 1433 1434#endif /* ALTQ_FLOWVALVE */ 1435 1436#ifdef KLD_MODULE 1437 1438static struct altqsw red_sw = 1439 {"red", redopen, redclose, redioctl}; 1440 1441ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1442MODULE_VERSION(altq_red, 1); 1443 1444#endif /* KLD_MODULE */ 1445#endif /* ALTQ3_COMPAT */ 1446 1447#endif /* ALTQ_RED */ 1448