1/* $FreeBSD$ */ 2/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */ 3 4/* 5 * Copyright (C) 1997-2003 6 * Sony Computer Science Laboratories Inc. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30/* 31 * Copyright (c) 1990-1994 Regents of the University of California. 32 * All rights reserved. 33 * 34 * Redistribution and use in source and binary forms, with or without 35 * modification, are permitted provided that the following conditions 36 * are met: 37 * 1. Redistributions of source code must retain the above copyright 38 * notice, this list of conditions and the following disclaimer. 39 * 2. Redistributions in binary form must reproduce the above copyright 40 * notice, this list of conditions and the following disclaimer in the 41 * documentation and/or other materials provided with the distribution. 42 * 3. All advertising materials mentioning features or use of this software 43 * must display the following acknowledgement: 44 * This product includes software developed by the Computer Systems 45 * Engineering Group at Lawrence Berkeley Laboratory. 46 * 4. Neither the name of the University nor of the Laboratory may be used 47 * to endorse or promote products derived from this software without 48 * specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63#if defined(__FreeBSD__) || defined(__NetBSD__) 64#include "opt_altq.h" 65#include "opt_inet.h" 66#ifdef __FreeBSD__ 67#include "opt_inet6.h" 68#endif 69#endif /* __FreeBSD__ || __NetBSD__ */ 70#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */ 71 72#include <sys/param.h> 73#include <sys/malloc.h> 74#include <sys/mbuf.h> 75#include <sys/socket.h> 76#include <sys/systm.h> 77#include <sys/errno.h> 78#if 1 /* ALTQ3_COMPAT */ 79#include <sys/sockio.h> 80#include <sys/proc.h> 81#include <sys/kernel.h> 82#ifdef ALTQ_FLOWVALVE 83#include <sys/queue.h> 84#include <sys/time.h> 85#endif 86#endif /* ALTQ3_COMPAT */ 87 88#include <net/if.h> 89 90#include <netinet/in.h> 91#include <netinet/in_systm.h> 92#include <netinet/ip.h> 93#ifdef INET6 94#include <netinet/ip6.h> 95#endif 96 97#include <net/pfvar.h> 98#include <altq/altq.h> 99#include <altq/altq_red.h> 100#ifdef ALTQ3_COMPAT 101#include <altq/altq_conf.h> 102#ifdef ALTQ_FLOWVALVE 103#include <altq/altq_flowvalve.h> 104#endif 105#endif 106 107/* 108 * ALTQ/RED (Random Early Detection) implementation using 32-bit 109 * fixed-point calculation. 110 * 111 * written by kjc using the ns code as a reference. 112 * you can learn more about red and ns from Sally's home page at 113 * http://www-nrg.ee.lbl.gov/floyd/ 114 * 115 * most of the red parameter values are fixed in this implementation 116 * to prevent fixed-point overflow/underflow. 117 * if you change the parameters, watch out for overflow/underflow! 118 * 119 * the parameters used are recommended values by Sally. 120 * the corresponding ns config looks: 121 * q_weight=0.00195 122 * minthresh=5 maxthresh=15 queue-size=60 123 * linterm=30 124 * dropmech=drop-tail 125 * bytes=false (can't be handled by 32-bit fixed-point) 126 * doubleq=false dqthresh=false 127 * wait=true 128 */ 129/* 130 * alternative red parameters for a slow link. 131 * 132 * assume the queue length becomes from zero to L and keeps L, it takes 133 * N packets for q_avg to reach 63% of L. 134 * when q_weight is 0.002, N is about 500 packets. 135 * for a slow link like dial-up, 500 packets takes more than 1 minute! 136 * when q_weight is 0.008, N is about 127 packets. 137 * when q_weight is 0.016, N is about 63 packets. 138 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets 139 * are allowed for 0.016. 140 * see Sally's paper for more details. 141 */ 142/* normal red parameters */ 143#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */ 144 /* q_weight = 0.00195 */ 145 146/* red parameters for a slow link */ 147#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */ 148 /* q_weight = 0.0078125 */ 149 150/* red parameters for a very slow link (e.g., dialup) */ 151#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */ 152 /* q_weight = 0.015625 */ 153 154/* fixed-point uses 12-bit decimal places */ 155#define FP_SHIFT 12 /* fixed-point shift */ 156 157/* red parameters for drop probability */ 158#define INV_P_MAX 10 /* inverse of max drop probability */ 159#define TH_MIN 5 /* min threshold */ 160#define TH_MAX 15 /* max threshold */ 161 162#define RED_LIMIT 60 /* default max queue lenght */ 163#define RED_STATS /* collect statistics */ 164 165/* 166 * our default policy for forced-drop is drop-tail. 167 * (in altq-1.1.2 or earlier, the default was random-drop. 168 * but it makes more sense to punish the cause of the surge.) 169 * to switch to the random-drop policy, define "RED_RANDOM_DROP". 170 */ 171 172#ifdef ALTQ3_COMPAT 173#ifdef ALTQ_FLOWVALVE 174/* 175 * flow-valve is an extention to protect red from unresponsive flows 176 * and to promote end-to-end congestion control. 177 * flow-valve observes the average drop rates of the flows that have 178 * experienced packet drops in the recent past. 179 * when the average drop rate exceeds the threshold, the flow is 180 * blocked by the flow-valve. the trapped flow should back off 181 * exponentially to escape from the flow-valve. 182 */ 183#ifdef RED_RANDOM_DROP 184#error "random-drop can't be used with flow-valve!" 185#endif 186#endif /* ALTQ_FLOWVALVE */ 187 188/* red_list keeps all red_queue_t's allocated. */ 189static red_queue_t *red_list = NULL; 190 191#endif /* ALTQ3_COMPAT */ 192 193/* default red parameter values */ 194static int default_th_min = TH_MIN; 195static int default_th_max = TH_MAX; 196static int default_inv_pmax = INV_P_MAX; 197 198#ifdef ALTQ3_COMPAT 199/* internal function prototypes */ 200static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 201static struct mbuf *red_dequeue(struct ifaltq *, int); 202static int red_request(struct ifaltq *, int, void *); 203static void red_purgeq(red_queue_t *); 204static int red_detach(red_queue_t *); 205#ifdef ALTQ_FLOWVALVE 206static __inline struct fve *flowlist_lookup(struct flowvalve *, 207 struct altq_pktattr *, struct timeval *); 208static __inline struct fve *flowlist_reclaim(struct flowvalve *, 209 struct altq_pktattr *); 210static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *); 211static __inline int fv_p2f(struct flowvalve *, int); 212#if 0 /* XXX: make the compiler happy (fv_alloc unused) */ 213static struct flowvalve *fv_alloc(struct red *); 214#endif 215static void fv_destroy(struct flowvalve *); 216static int fv_checkflow(struct flowvalve *, struct altq_pktattr *, 217 struct fve **); 218static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *, 219 struct fve *); 220#endif 221#endif /* ALTQ3_COMPAT */ 222 223/* 224 * red support routines 225 */ 226red_t * 227red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags, 228 int pkttime) 229{ 230 red_t *rp; 231 int w, i; 232 int npkts_per_sec; 233 234 rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK); 235 if (rp == NULL) 236 return (NULL); 237 bzero(rp, sizeof(red_t)); 238 239 rp->red_avg = 0; 240 rp->red_idle = 1; 241 242 if (weight == 0) 243 rp->red_weight = W_WEIGHT; 244 else 245 rp->red_weight = weight; 246 if (inv_pmax == 0) 247 rp->red_inv_pmax = default_inv_pmax; 248 else 249 rp->red_inv_pmax = inv_pmax; 250 if (th_min == 0) 251 rp->red_thmin = default_th_min; 252 else 253 rp->red_thmin = th_min; 254 if (th_max == 0) 255 rp->red_thmax = default_th_max; 256 else 257 rp->red_thmax = th_max; 258 259 rp->red_flags = flags; 260 261 if (pkttime == 0) 262 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */ 263 rp->red_pkttime = 800; 264 else 265 rp->red_pkttime = pkttime; 266 267 if (weight == 0) { 268 /* when the link is very slow, adjust red parameters */ 269 npkts_per_sec = 1000000 / rp->red_pkttime; 270 if (npkts_per_sec < 50) { 271 /* up to about 400Kbps */ 272 rp->red_weight = W_WEIGHT_2; 273 } else if (npkts_per_sec < 300) { 274 /* up to about 2.4Mbps */ 275 rp->red_weight = W_WEIGHT_1; 276 } 277 } 278 279 /* calculate wshift. weight must be power of 2 */ 280 w = rp->red_weight; 281 for (i = 0; w > 1; i++) 282 w = w >> 1; 283 rp->red_wshift = i; 284 w = 1 << rp->red_wshift; 285 if (w != rp->red_weight) { 286 printf("invalid weight value %d for red! use %d\n", 287 rp->red_weight, w); 288 rp->red_weight = w; 289 } 290 291 /* 292 * thmin_s and thmax_s are scaled versions of th_min and th_max 293 * to be compared with avg. 294 */ 295 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT); 296 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT); 297 298 /* 299 * precompute probability denominator 300 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point 301 */ 302 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin) 303 * rp->red_inv_pmax) << FP_SHIFT; 304 305 /* allocate weight table */ 306 rp->red_wtab = wtab_alloc(rp->red_weight); 307 308 microtime(&rp->red_last); 309 return (rp); 310} 311 312void 313red_destroy(red_t *rp) 314{ 315#ifdef ALTQ3_COMPAT 316#ifdef ALTQ_FLOWVALVE 317 if (rp->red_flowvalve != NULL) 318 fv_destroy(rp->red_flowvalve); 319#endif 320#endif /* ALTQ3_COMPAT */ 321 wtab_destroy(rp->red_wtab); 322 free(rp, M_DEVBUF); 323} 324 325void 326red_getstats(red_t *rp, struct redstats *sp) 327{ 328 sp->q_avg = rp->red_avg >> rp->red_wshift; 329 sp->xmit_cnt = rp->red_stats.xmit_cnt; 330 sp->drop_cnt = rp->red_stats.drop_cnt; 331 sp->drop_forced = rp->red_stats.drop_forced; 332 sp->drop_unforced = rp->red_stats.drop_unforced; 333 sp->marked_packets = rp->red_stats.marked_packets; 334} 335 336int 337red_addq(red_t *rp, class_queue_t *q, struct mbuf *m, 338 struct altq_pktattr *pktattr) 339{ 340 int avg, droptype; 341 int n; 342#ifdef ALTQ3_COMPAT 343#ifdef ALTQ_FLOWVALVE 344 struct fve *fve = NULL; 345 346 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0) 347 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) { 348 m_freem(m); 349 return (-1); 350 } 351#endif 352#endif /* ALTQ3_COMPAT */ 353 354 avg = rp->red_avg; 355 356 /* 357 * if we were idle, we pretend that n packets arrived during 358 * the idle period. 359 */ 360 if (rp->red_idle) { 361 struct timeval now; 362 int t; 363 364 rp->red_idle = 0; 365 microtime(&now); 366 t = (now.tv_sec - rp->red_last.tv_sec); 367 if (t > 60) { 368 /* 369 * being idle for more than 1 minute, set avg to zero. 370 * this prevents t from overflow. 371 */ 372 avg = 0; 373 } else { 374 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec); 375 n = t / rp->red_pkttime - 1; 376 377 /* the following line does (avg = (1 - Wq)^n * avg) */ 378 if (n > 0) 379 avg = (avg >> FP_SHIFT) * 380 pow_w(rp->red_wtab, n); 381 } 382 } 383 384 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */ 385 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift); 386 rp->red_avg = avg; /* save the new value */ 387 388 /* 389 * red_count keeps a tally of arriving traffic that has not 390 * been dropped. 391 */ 392 rp->red_count++; 393 394 /* see if we drop early */ 395 droptype = DTYPE_NODROP; 396 if (avg >= rp->red_thmin_s && qlen(q) > 1) { 397 if (avg >= rp->red_thmax_s) { 398 /* avg >= th_max: forced drop */ 399 droptype = DTYPE_FORCED; 400 } else if (rp->red_old == 0) { 401 /* first exceeds th_min */ 402 rp->red_count = 1; 403 rp->red_old = 1; 404 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift, 405 rp->red_probd, rp->red_count)) { 406 /* mark or drop by red */ 407 if ((rp->red_flags & REDF_ECN) && 408 mark_ecn(m, pktattr, rp->red_flags)) { 409 /* successfully marked. do not drop. */ 410 rp->red_count = 0; 411#ifdef RED_STATS 412 rp->red_stats.marked_packets++; 413#endif 414 } else { 415 /* unforced drop by red */ 416 droptype = DTYPE_EARLY; 417 } 418 } 419 } else { 420 /* avg < th_min */ 421 rp->red_old = 0; 422 } 423 424 /* 425 * if the queue length hits the hard limit, it's a forced drop. 426 */ 427 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q)) 428 droptype = DTYPE_FORCED; 429 430#ifdef RED_RANDOM_DROP 431 /* if successful or forced drop, enqueue this packet. */ 432 if (droptype != DTYPE_EARLY) 433 _addq(q, m); 434#else 435 /* if successful, enqueue this packet. */ 436 if (droptype == DTYPE_NODROP) 437 _addq(q, m); 438#endif 439 if (droptype != DTYPE_NODROP) { 440 if (droptype == DTYPE_EARLY) { 441 /* drop the incoming packet */ 442#ifdef RED_STATS 443 rp->red_stats.drop_unforced++; 444#endif 445 } else { 446 /* forced drop, select a victim packet in the queue. */ 447#ifdef RED_RANDOM_DROP 448 m = _getq_random(q); 449#endif 450#ifdef RED_STATS 451 rp->red_stats.drop_forced++; 452#endif 453 } 454#ifdef RED_STATS 455 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m)); 456#endif 457 rp->red_count = 0; 458#ifdef ALTQ3_COMPAT 459#ifdef ALTQ_FLOWVALVE 460 if (rp->red_flowvalve != NULL) 461 fv_dropbyred(rp->red_flowvalve, pktattr, fve); 462#endif 463#endif /* ALTQ3_COMPAT */ 464 m_freem(m); 465 return (-1); 466 } 467 /* successfully queued */ 468#ifdef RED_STATS 469 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m)); 470#endif 471 return (0); 472} 473 474/* 475 * early-drop probability is calculated as follows: 476 * prob = p_max * (avg - th_min) / (th_max - th_min) 477 * prob_a = prob / (2 - count*prob) 478 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min)) 479 * here prob_a increases as successive undrop count increases. 480 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)), 481 * becomes 1 when (count >= (2 / prob))). 482 */ 483int 484drop_early(int fp_len, int fp_probd, int count) 485{ 486 int d; /* denominator of drop-probability */ 487 488 d = fp_probd - count * fp_len; 489 if (d <= 0) 490 /* count exceeds the hard limit: drop or mark */ 491 return (1); 492 493 /* 494 * now the range of d is [1..600] in fixed-point. (when 495 * th_max-th_min=10 and p_max=1/30) 496 * drop probability = (avg - TH_MIN) / d 497 */ 498 499 if ((arc4random() % d) < fp_len) { 500 /* drop or mark */ 501 return (1); 502 } 503 /* no drop/mark */ 504 return (0); 505} 506 507/* 508 * try to mark CE bit to the packet. 509 * returns 1 if successfully marked, 0 otherwise. 510 */ 511int 512mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags) 513{ 514 struct mbuf *m0; 515 struct pf_mtag *at; 516 void *hdr; 517 518 at = pf_find_mtag(m); 519 if (at != NULL) { 520 hdr = at->hdr; 521#ifdef ALTQ3_COMPAT 522 } else if (pktattr != NULL) { 523 af = pktattr->pattr_af; 524 hdr = pktattr->pattr_hdr; 525#endif /* ALTQ3_COMPAT */ 526 } else 527 return (0); 528 529 /* verify that pattr_hdr is within the mbuf data */ 530 for (m0 = m; m0 != NULL; m0 = m0->m_next) 531 if (((caddr_t)hdr >= m0->m_data) && 532 ((caddr_t)hdr < m0->m_data + m0->m_len)) 533 break; 534 if (m0 == NULL) { 535 /* ick, tag info is stale */ 536 return (0); 537 } 538 539 switch (((struct ip *)hdr)->ip_v) { 540 case IPVERSION: 541 if (flags & REDF_ECN4) { 542 struct ip *ip = hdr; 543 u_int8_t otos; 544 int sum; 545 546 if (ip->ip_v != 4) 547 return (0); /* version mismatch! */ 548 549 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT) 550 return (0); /* not-ECT */ 551 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE) 552 return (1); /* already marked */ 553 554 /* 555 * ecn-capable but not marked, 556 * mark CE and update checksum 557 */ 558 otos = ip->ip_tos; 559 ip->ip_tos |= IPTOS_ECN_CE; 560 /* 561 * update checksum (from RFC1624) 562 * HC' = ~(~HC + ~m + m') 563 */ 564 sum = ~ntohs(ip->ip_sum) & 0xffff; 565 sum += (~otos & 0xffff) + ip->ip_tos; 566 sum = (sum >> 16) + (sum & 0xffff); 567 sum += (sum >> 16); /* add carry */ 568 ip->ip_sum = htons(~sum & 0xffff); 569 return (1); 570 } 571 break; 572#ifdef INET6 573 case (IPV6_VERSION >> 4): 574 if (flags & REDF_ECN6) { 575 struct ip6_hdr *ip6 = hdr; 576 u_int32_t flowlabel; 577 578 flowlabel = ntohl(ip6->ip6_flow); 579 if ((flowlabel >> 28) != 6) 580 return (0); /* version mismatch! */ 581 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 582 (IPTOS_ECN_NOTECT << 20)) 583 return (0); /* not-ECT */ 584 if ((flowlabel & (IPTOS_ECN_MASK << 20)) == 585 (IPTOS_ECN_CE << 20)) 586 return (1); /* already marked */ 587 /* 588 * ecn-capable but not marked, mark CE 589 */ 590 flowlabel |= (IPTOS_ECN_CE << 20); 591 ip6->ip6_flow = htonl(flowlabel); 592 return (1); 593 } 594 break; 595#endif /* INET6 */ 596 } 597 598 /* not marked */ 599 return (0); 600} 601 602struct mbuf * 603red_getq(rp, q) 604 red_t *rp; 605 class_queue_t *q; 606{ 607 struct mbuf *m; 608 609 if ((m = _getq(q)) == NULL) { 610 if (rp->red_idle == 0) { 611 rp->red_idle = 1; 612 microtime(&rp->red_last); 613 } 614 return NULL; 615 } 616 617 rp->red_idle = 0; 618 return (m); 619} 620 621/* 622 * helper routine to calibrate avg during idle. 623 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point 624 * here Wq = 1/weight and the code assumes Wq is close to zero. 625 * 626 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point. 627 */ 628static struct wtab *wtab_list = NULL; /* pointer to wtab list */ 629 630struct wtab * 631wtab_alloc(int weight) 632{ 633 struct wtab *w; 634 int i; 635 636 for (w = wtab_list; w != NULL; w = w->w_next) 637 if (w->w_weight == weight) { 638 w->w_refcount++; 639 return (w); 640 } 641 642 w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK); 643 if (w == NULL) 644 panic("wtab_alloc: malloc failed!"); 645 bzero(w, sizeof(struct wtab)); 646 w->w_weight = weight; 647 w->w_refcount = 1; 648 w->w_next = wtab_list; 649 wtab_list = w; 650 651 /* initialize the weight table */ 652 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight; 653 for (i = 1; i < 32; i++) { 654 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT; 655 if (w->w_tab[i] == 0 && w->w_param_max == 0) 656 w->w_param_max = 1 << i; 657 } 658 659 return (w); 660} 661 662int 663wtab_destroy(struct wtab *w) 664{ 665 struct wtab *prev; 666 667 if (--w->w_refcount > 0) 668 return (0); 669 670 if (wtab_list == w) 671 wtab_list = w->w_next; 672 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next) 673 if (prev->w_next == w) { 674 prev->w_next = w->w_next; 675 break; 676 } 677 678 free(w, M_DEVBUF); 679 return (0); 680} 681 682int32_t 683pow_w(struct wtab *w, int n) 684{ 685 int i, bit; 686 int32_t val; 687 688 if (n >= w->w_param_max) 689 return (0); 690 691 val = 1 << FP_SHIFT; 692 if (n <= 0) 693 return (val); 694 695 bit = 1; 696 i = 0; 697 while (n) { 698 if (n & bit) { 699 val = (val * w->w_tab[i]) >> FP_SHIFT; 700 n &= ~bit; 701 } 702 i++; 703 bit <<= 1; 704 } 705 return (val); 706} 707 708#ifdef ALTQ3_COMPAT 709/* 710 * red device interface 711 */ 712altqdev_decl(red); 713 714int 715redopen(dev, flag, fmt, p) 716 dev_t dev; 717 int flag, fmt; 718#if (__FreeBSD_version > 500000) 719 struct thread *p; 720#else 721 struct proc *p; 722#endif 723{ 724 /* everything will be done when the queueing scheme is attached. */ 725 return 0; 726} 727 728int 729redclose(dev, flag, fmt, p) 730 dev_t dev; 731 int flag, fmt; 732#if (__FreeBSD_version > 500000) 733 struct thread *p; 734#else 735 struct proc *p; 736#endif 737{ 738 red_queue_t *rqp; 739 int err, error = 0; 740 741 while ((rqp = red_list) != NULL) { 742 /* destroy all */ 743 err = red_detach(rqp); 744 if (err != 0 && error == 0) 745 error = err; 746 } 747 748 return error; 749} 750 751int 752redioctl(dev, cmd, addr, flag, p) 753 dev_t dev; 754 ioctlcmd_t cmd; 755 caddr_t addr; 756 int flag; 757#if (__FreeBSD_version > 500000) 758 struct thread *p; 759#else 760 struct proc *p; 761#endif 762{ 763 red_queue_t *rqp; 764 struct red_interface *ifacep; 765 struct ifnet *ifp; 766 int error = 0; 767 768 /* check super-user privilege */ 769 switch (cmd) { 770 case RED_GETSTATS: 771 break; 772 default: 773#if (__FreeBSD_version > 700000) 774 if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0) 775#elsif (__FreeBSD_version > 400000) 776 if ((error = suser(p)) != 0) 777#else 778 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0) 779#endif 780 return (error); 781 break; 782 } 783 784 switch (cmd) { 785 786 case RED_ENABLE: 787 ifacep = (struct red_interface *)addr; 788 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 789 error = EBADF; 790 break; 791 } 792 error = altq_enable(rqp->rq_ifq); 793 break; 794 795 case RED_DISABLE: 796 ifacep = (struct red_interface *)addr; 797 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 798 error = EBADF; 799 break; 800 } 801 error = altq_disable(rqp->rq_ifq); 802 break; 803 804 case RED_IF_ATTACH: 805 ifp = ifunit(((struct red_interface *)addr)->red_ifname); 806 if (ifp == NULL) { 807 error = ENXIO; 808 break; 809 } 810 811 /* allocate and initialize red_queue_t */ 812 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK); 813 if (rqp == NULL) { 814 error = ENOMEM; 815 break; 816 } 817 bzero(rqp, sizeof(red_queue_t)); 818 819 rqp->rq_q = malloc(sizeof(class_queue_t), 820 M_DEVBUF, M_WAITOK); 821 if (rqp->rq_q == NULL) { 822 free(rqp, M_DEVBUF); 823 error = ENOMEM; 824 break; 825 } 826 bzero(rqp->rq_q, sizeof(class_queue_t)); 827 828 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0); 829 if (rqp->rq_red == NULL) { 830 free(rqp->rq_q, M_DEVBUF); 831 free(rqp, M_DEVBUF); 832 error = ENOMEM; 833 break; 834 } 835 836 rqp->rq_ifq = &ifp->if_snd; 837 qtail(rqp->rq_q) = NULL; 838 qlen(rqp->rq_q) = 0; 839 qlimit(rqp->rq_q) = RED_LIMIT; 840 qtype(rqp->rq_q) = Q_RED; 841 842 /* 843 * set RED to this ifnet structure. 844 */ 845 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp, 846 red_enqueue, red_dequeue, red_request, 847 NULL, NULL); 848 if (error) { 849 red_destroy(rqp->rq_red); 850 free(rqp->rq_q, M_DEVBUF); 851 free(rqp, M_DEVBUF); 852 break; 853 } 854 855 /* add this state to the red list */ 856 rqp->rq_next = red_list; 857 red_list = rqp; 858 break; 859 860 case RED_IF_DETACH: 861 ifacep = (struct red_interface *)addr; 862 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) { 863 error = EBADF; 864 break; 865 } 866 error = red_detach(rqp); 867 break; 868 869 case RED_GETSTATS: 870 do { 871 struct red_stats *q_stats; 872 red_t *rp; 873 874 q_stats = (struct red_stats *)addr; 875 if ((rqp = altq_lookup(q_stats->iface.red_ifname, 876 ALTQT_RED)) == NULL) { 877 error = EBADF; 878 break; 879 } 880 881 q_stats->q_len = qlen(rqp->rq_q); 882 q_stats->q_limit = qlimit(rqp->rq_q); 883 884 rp = rqp->rq_red; 885 q_stats->q_avg = rp->red_avg >> rp->red_wshift; 886 q_stats->xmit_cnt = rp->red_stats.xmit_cnt; 887 q_stats->drop_cnt = rp->red_stats.drop_cnt; 888 q_stats->drop_forced = rp->red_stats.drop_forced; 889 q_stats->drop_unforced = rp->red_stats.drop_unforced; 890 q_stats->marked_packets = rp->red_stats.marked_packets; 891 892 q_stats->weight = rp->red_weight; 893 q_stats->inv_pmax = rp->red_inv_pmax; 894 q_stats->th_min = rp->red_thmin; 895 q_stats->th_max = rp->red_thmax; 896 897#ifdef ALTQ_FLOWVALVE 898 if (rp->red_flowvalve != NULL) { 899 struct flowvalve *fv = rp->red_flowvalve; 900 q_stats->fv_flows = fv->fv_flows; 901 q_stats->fv_pass = fv->fv_stats.pass; 902 q_stats->fv_predrop = fv->fv_stats.predrop; 903 q_stats->fv_alloc = fv->fv_stats.alloc; 904 q_stats->fv_escape = fv->fv_stats.escape; 905 } else { 906#endif /* ALTQ_FLOWVALVE */ 907 q_stats->fv_flows = 0; 908 q_stats->fv_pass = 0; 909 q_stats->fv_predrop = 0; 910 q_stats->fv_alloc = 0; 911 q_stats->fv_escape = 0; 912#ifdef ALTQ_FLOWVALVE 913 } 914#endif /* ALTQ_FLOWVALVE */ 915 } while (/*CONSTCOND*/ 0); 916 break; 917 918 case RED_CONFIG: 919 do { 920 struct red_conf *fc; 921 red_t *new; 922 int s, limit; 923 924 fc = (struct red_conf *)addr; 925 if ((rqp = altq_lookup(fc->iface.red_ifname, 926 ALTQT_RED)) == NULL) { 927 error = EBADF; 928 break; 929 } 930 new = red_alloc(fc->red_weight, 931 fc->red_inv_pmax, 932 fc->red_thmin, 933 fc->red_thmax, 934 fc->red_flags, 935 fc->red_pkttime); 936 if (new == NULL) { 937 error = ENOMEM; 938 break; 939 } 940 941#ifdef __NetBSD__ 942 s = splnet(); 943#else 944 s = splimp(); 945#endif 946 red_purgeq(rqp); 947 limit = fc->red_limit; 948 if (limit < fc->red_thmax) 949 limit = fc->red_thmax; 950 qlimit(rqp->rq_q) = limit; 951 fc->red_limit = limit; /* write back the new value */ 952 953 red_destroy(rqp->rq_red); 954 rqp->rq_red = new; 955 956 splx(s); 957 958 /* write back new values */ 959 fc->red_limit = limit; 960 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax; 961 fc->red_thmin = rqp->rq_red->red_thmin; 962 fc->red_thmax = rqp->rq_red->red_thmax; 963 964 } while (/*CONSTCOND*/ 0); 965 break; 966 967 case RED_SETDEFAULTS: 968 do { 969 struct redparams *rp; 970 971 rp = (struct redparams *)addr; 972 973 default_th_min = rp->th_min; 974 default_th_max = rp->th_max; 975 default_inv_pmax = rp->inv_pmax; 976 } while (/*CONSTCOND*/ 0); 977 break; 978 979 default: 980 error = EINVAL; 981 break; 982 } 983 return error; 984} 985 986static int 987red_detach(rqp) 988 red_queue_t *rqp; 989{ 990 red_queue_t *tmp; 991 int error = 0; 992 993 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 994 altq_disable(rqp->rq_ifq); 995 996 if ((error = altq_detach(rqp->rq_ifq))) 997 return (error); 998 999 if (red_list == rqp) 1000 red_list = rqp->rq_next; 1001 else { 1002 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next) 1003 if (tmp->rq_next == rqp) { 1004 tmp->rq_next = rqp->rq_next; 1005 break; 1006 } 1007 if (tmp == NULL) 1008 printf("red_detach: no state found in red_list!\n"); 1009 } 1010 1011 red_destroy(rqp->rq_red); 1012 free(rqp->rq_q, M_DEVBUF); 1013 free(rqp, M_DEVBUF); 1014 return (error); 1015} 1016 1017/* 1018 * enqueue routine: 1019 * 1020 * returns: 0 when successfully queued. 1021 * ENOBUFS when drop occurs. 1022 */ 1023static int 1024red_enqueue(ifq, m, pktattr) 1025 struct ifaltq *ifq; 1026 struct mbuf *m; 1027 struct altq_pktattr *pktattr; 1028{ 1029 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1030 1031 IFQ_LOCK_ASSERT(ifq); 1032 1033 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0) 1034 return ENOBUFS; 1035 ifq->ifq_len++; 1036 return 0; 1037} 1038 1039/* 1040 * dequeue routine: 1041 * must be called in splimp. 1042 * 1043 * returns: mbuf dequeued. 1044 * NULL when no packet is available in the queue. 1045 */ 1046 1047static struct mbuf * 1048red_dequeue(ifq, op) 1049 struct ifaltq *ifq; 1050 int op; 1051{ 1052 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1053 struct mbuf *m; 1054 1055 IFQ_LOCK_ASSERT(ifq); 1056 1057 if (op == ALTDQ_POLL) 1058 return qhead(rqp->rq_q); 1059 1060 /* op == ALTDQ_REMOVE */ 1061 m = red_getq(rqp->rq_red, rqp->rq_q); 1062 if (m != NULL) 1063 ifq->ifq_len--; 1064 return (m); 1065} 1066 1067static int 1068red_request(ifq, req, arg) 1069 struct ifaltq *ifq; 1070 int req; 1071 void *arg; 1072{ 1073 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc; 1074 1075 IFQ_LOCK_ASSERT(ifq); 1076 1077 switch (req) { 1078 case ALTRQ_PURGE: 1079 red_purgeq(rqp); 1080 break; 1081 } 1082 return (0); 1083} 1084 1085static void 1086red_purgeq(rqp) 1087 red_queue_t *rqp; 1088{ 1089 _flushq(rqp->rq_q); 1090 if (ALTQ_IS_ENABLED(rqp->rq_ifq)) 1091 rqp->rq_ifq->ifq_len = 0; 1092} 1093 1094#ifdef ALTQ_FLOWVALVE 1095 1096#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */ 1097#define FV_PSCALE(x) ((x) << FV_PSHIFT) 1098#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT) 1099#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */ 1100#define FV_FSCALE(x) ((x) << FV_FSHIFT) 1101#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT) 1102 1103#define FV_TIMER (3 * hz) /* timer value for garbage collector */ 1104#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */ 1105 1106#define FV_N 10 /* update fve_f every FV_N packets */ 1107 1108#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */ 1109#define FV_TTHRESH 3 /* time threshold to delete fve */ 1110#define FV_ALPHA 5 /* extra packet count */ 1111 1112#define FV_STATS 1113 1114#if (__FreeBSD_version > 300000) 1115#define FV_TIMESTAMP(tp) getmicrotime(tp) 1116#else 1117#define FV_TIMESTAMP(tp) { (*(tp)) = time; } 1118#endif 1119 1120/* 1121 * Brtt table: 127 entry table to convert drop rate (p) to 1122 * the corresponding bandwidth fraction (f) 1123 * the following equation is implemented to use scaled values, 1124 * fve_p and fve_f, in the fixed point format. 1125 * 1126 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p)) 1127 * f = Brtt(p) / (max_th + alpha) 1128 */ 1129#define BRTT_SIZE 128 1130#define BRTT_SHIFT 12 1131#define BRTT_MASK 0x0007f000 1132#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT)) 1133 1134const int brtt_tab[BRTT_SIZE] = { 1135 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728, 1136 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361, 1137 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333, 1138 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612, 1139 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957, 1140 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440, 1141 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184, 1142 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611, 1143 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062, 1144 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487, 1145 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222, 1146 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844, 1147 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079, 1148 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746, 1149 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722, 1150 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924 1151}; 1152 1153static __inline struct fve * 1154flowlist_lookup(fv, pktattr, now) 1155 struct flowvalve *fv; 1156 struct altq_pktattr *pktattr; 1157 struct timeval *now; 1158{ 1159 struct fve *fve; 1160 int flows; 1161 struct ip *ip; 1162#ifdef INET6 1163 struct ip6_hdr *ip6; 1164#endif 1165 struct timeval tthresh; 1166 1167 if (pktattr == NULL) 1168 return (NULL); 1169 1170 tthresh.tv_sec = now->tv_sec - FV_TTHRESH; 1171 flows = 0; 1172 /* 1173 * search the flow list 1174 */ 1175 switch (pktattr->pattr_af) { 1176 case AF_INET: 1177 ip = (struct ip *)pktattr->pattr_hdr; 1178 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1179 if (fve->fve_lastdrop.tv_sec == 0) 1180 break; 1181 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1182 fve->fve_lastdrop.tv_sec = 0; 1183 break; 1184 } 1185 if (fve->fve_flow.flow_af == AF_INET && 1186 fve->fve_flow.flow_ip.ip_src.s_addr == 1187 ip->ip_src.s_addr && 1188 fve->fve_flow.flow_ip.ip_dst.s_addr == 1189 ip->ip_dst.s_addr) 1190 return (fve); 1191 flows++; 1192 } 1193 break; 1194#ifdef INET6 1195 case AF_INET6: 1196 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1197 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){ 1198 if (fve->fve_lastdrop.tv_sec == 0) 1199 break; 1200 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) { 1201 fve->fve_lastdrop.tv_sec = 0; 1202 break; 1203 } 1204 if (fve->fve_flow.flow_af == AF_INET6 && 1205 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src, 1206 &ip6->ip6_src) && 1207 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst, 1208 &ip6->ip6_dst)) 1209 return (fve); 1210 flows++; 1211 } 1212 break; 1213#endif /* INET6 */ 1214 1215 default: 1216 /* unknown protocol. no drop. */ 1217 return (NULL); 1218 } 1219 fv->fv_flows = flows; /* save the number of active fve's */ 1220 return (NULL); 1221} 1222 1223static __inline struct fve * 1224flowlist_reclaim(fv, pktattr) 1225 struct flowvalve *fv; 1226 struct altq_pktattr *pktattr; 1227{ 1228 struct fve *fve; 1229 struct ip *ip; 1230#ifdef INET6 1231 struct ip6_hdr *ip6; 1232#endif 1233 1234 /* 1235 * get an entry from the tail of the LRU list. 1236 */ 1237 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead); 1238 1239 switch (pktattr->pattr_af) { 1240 case AF_INET: 1241 ip = (struct ip *)pktattr->pattr_hdr; 1242 fve->fve_flow.flow_af = AF_INET; 1243 fve->fve_flow.flow_ip.ip_src = ip->ip_src; 1244 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst; 1245 break; 1246#ifdef INET6 1247 case AF_INET6: 1248 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr; 1249 fve->fve_flow.flow_af = AF_INET6; 1250 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src; 1251 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst; 1252 break; 1253#endif 1254 } 1255 1256 fve->fve_state = Green; 1257 fve->fve_p = 0.0; 1258 fve->fve_f = 0.0; 1259 fve->fve_ifseq = fv->fv_ifseq - 1; 1260 fve->fve_count = 0; 1261 1262 fv->fv_flows++; 1263#ifdef FV_STATS 1264 fv->fv_stats.alloc++; 1265#endif 1266 return (fve); 1267} 1268 1269static __inline void 1270flowlist_move_to_head(fv, fve) 1271 struct flowvalve *fv; 1272 struct fve *fve; 1273{ 1274 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) { 1275 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru); 1276 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru); 1277 } 1278} 1279 1280#if 0 /* XXX: make the compiler happy (fv_alloc unused) */ 1281/* 1282 * allocate flowvalve structure 1283 */ 1284static struct flowvalve * 1285fv_alloc(rp) 1286 struct red *rp; 1287{ 1288 struct flowvalve *fv; 1289 struct fve *fve; 1290 int i, num; 1291 1292 num = FV_FLOWLISTSIZE; 1293 fv = malloc(sizeof(struct flowvalve), 1294 M_DEVBUF, M_WAITOK); 1295 if (fv == NULL) 1296 return (NULL); 1297 bzero(fv, sizeof(struct flowvalve)); 1298 1299 fv->fv_fves = malloc(sizeof(struct fve) * num, 1300 M_DEVBUF, M_WAITOK); 1301 if (fv->fv_fves == NULL) { 1302 free(fv, M_DEVBUF); 1303 return (NULL); 1304 } 1305 bzero(fv->fv_fves, sizeof(struct fve) * num); 1306 1307 fv->fv_flows = 0; 1308 TAILQ_INIT(&fv->fv_flowlist); 1309 for (i = 0; i < num; i++) { 1310 fve = &fv->fv_fves[i]; 1311 fve->fve_lastdrop.tv_sec = 0; 1312 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru); 1313 } 1314 1315 /* initialize drop rate threshold in scaled fixed-point */ 1316 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax; 1317 1318 /* initialize drop rate to fraction table */ 1319 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, 1320 M_DEVBUF, M_WAITOK); 1321 if (fv->fv_p2ftab == NULL) { 1322 free(fv->fv_fves, M_DEVBUF); 1323 free(fv, M_DEVBUF); 1324 return (NULL); 1325 } 1326 /* 1327 * create the p2f table. 1328 * (shift is used to keep the precision) 1329 */ 1330 for (i = 1; i < BRTT_SIZE; i++) { 1331 int f; 1332 1333 f = brtt_tab[i] << 8; 1334 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8; 1335 } 1336 1337 return (fv); 1338} 1339#endif 1340 1341static void fv_destroy(fv) 1342 struct flowvalve *fv; 1343{ 1344 free(fv->fv_p2ftab, M_DEVBUF); 1345 free(fv->fv_fves, M_DEVBUF); 1346 free(fv, M_DEVBUF); 1347} 1348 1349static __inline int 1350fv_p2f(fv, p) 1351 struct flowvalve *fv; 1352 int p; 1353{ 1354 int val, f; 1355 1356 if (p >= BRTT_PMAX) 1357 f = fv->fv_p2ftab[BRTT_SIZE-1]; 1358 else if ((val = (p & BRTT_MASK))) 1359 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)]; 1360 else 1361 f = fv->fv_p2ftab[1]; 1362 return (f); 1363} 1364 1365/* 1366 * check if an arriving packet should be pre-dropped. 1367 * called from red_addq() when a packet arrives. 1368 * returns 1 when the packet should be pre-dropped. 1369 * should be called in splimp. 1370 */ 1371static int 1372fv_checkflow(fv, pktattr, fcache) 1373 struct flowvalve *fv; 1374 struct altq_pktattr *pktattr; 1375 struct fve **fcache; 1376{ 1377 struct fve *fve; 1378 struct timeval now; 1379 1380 fv->fv_ifseq++; 1381 FV_TIMESTAMP(&now); 1382 1383 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1384 /* no matching entry in the flowlist */ 1385 return (0); 1386 1387 *fcache = fve; 1388 1389 /* update fraction f for every FV_N packets */ 1390 if (++fve->fve_count == FV_N) { 1391 /* 1392 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f 1393 */ 1394 fve->fve_f = 1395 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq) 1396 + fve->fve_f - FV_FUNSCALE(fve->fve_f); 1397 fve->fve_ifseq = fv->fv_ifseq; 1398 fve->fve_count = 0; 1399 } 1400 1401 /* 1402 * overpumping test 1403 */ 1404 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) { 1405 int fthresh; 1406 1407 /* calculate a threshold */ 1408 fthresh = fv_p2f(fv, fve->fve_p); 1409 if (fve->fve_f > fthresh) 1410 fve->fve_state = Red; 1411 } 1412 1413 if (fve->fve_state == Red) { 1414 /* 1415 * backoff test 1416 */ 1417 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) { 1418 /* no drop for at least FV_BACKOFFTHRESH sec */ 1419 fve->fve_p = 0; 1420 fve->fve_state = Green; 1421#ifdef FV_STATS 1422 fv->fv_stats.escape++; 1423#endif 1424 } else { 1425 /* block this flow */ 1426 flowlist_move_to_head(fv, fve); 1427 fve->fve_lastdrop = now; 1428#ifdef FV_STATS 1429 fv->fv_stats.predrop++; 1430#endif 1431 return (1); 1432 } 1433 } 1434 1435 /* 1436 * p = (1 - Wp) * p 1437 */ 1438 fve->fve_p -= FV_PUNSCALE(fve->fve_p); 1439 if (fve->fve_p < 0) 1440 fve->fve_p = 0; 1441#ifdef FV_STATS 1442 fv->fv_stats.pass++; 1443#endif 1444 return (0); 1445} 1446 1447/* 1448 * called from red_addq when a packet is dropped by red. 1449 * should be called in splimp. 1450 */ 1451static void fv_dropbyred(fv, pktattr, fcache) 1452 struct flowvalve *fv; 1453 struct altq_pktattr *pktattr; 1454 struct fve *fcache; 1455{ 1456 struct fve *fve; 1457 struct timeval now; 1458 1459 if (pktattr == NULL) 1460 return; 1461 FV_TIMESTAMP(&now); 1462 1463 if (fcache != NULL) 1464 /* the fve of this packet is already cached */ 1465 fve = fcache; 1466 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL) 1467 fve = flowlist_reclaim(fv, pktattr); 1468 1469 flowlist_move_to_head(fv, fve); 1470 1471 /* 1472 * update p: the following line cancels the update 1473 * in fv_checkflow() and calculate 1474 * p = Wp + (1 - Wp) * p 1475 */ 1476 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p; 1477 1478 fve->fve_lastdrop = now; 1479} 1480 1481#endif /* ALTQ_FLOWVALVE */ 1482 1483#ifdef KLD_MODULE 1484 1485static struct altqsw red_sw = 1486 {"red", redopen, redclose, redioctl}; 1487 1488ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw); 1489MODULE_VERSION(altq_red, 1); 1490 1491#endif /* KLD_MODULE */ 1492#endif /* ALTQ3_COMPAT */ 1493 1494#endif /* ALTQ_RED */ 1495