Deleted Added
sdiff udiff text old ( 130368 ) new ( 164033 )
full compact
1/* $FreeBSD: head/sys/contrib/altq/altq/altq_red.c 164033 2006-11-06 13:42:10Z rwatson $ */
2/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */
3
4/*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30/*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63#if defined(__FreeBSD__) || defined(__NetBSD__)
64#include "opt_altq.h"
65#if (__FreeBSD__ != 2)
66#include "opt_inet.h"
67#ifdef __FreeBSD__
68#include "opt_inet6.h"
69#endif
70#endif
71#endif /* __FreeBSD__ || __NetBSD__ */
72#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
73
74#include <sys/param.h>
75#include <sys/malloc.h>
76#include <sys/mbuf.h>
77#include <sys/socket.h>
78#include <sys/systm.h>
79#include <sys/errno.h>
80#if 1 /* ALTQ3_COMPAT */
81#include <sys/sockio.h>
82#include <sys/proc.h>
83#include <sys/kernel.h>
84#ifdef ALTQ_FLOWVALVE
85#include <sys/queue.h>
86#include <sys/time.h>
87#endif
88#endif /* ALTQ3_COMPAT */
89
90#include <net/if.h>
91
92#include <netinet/in.h>
93#include <netinet/in_systm.h>
94#include <netinet/ip.h>
95#ifdef INET6
96#include <netinet/ip6.h>
97#endif
98
99#include <net/pfvar.h>
100#include <altq/altq.h>
101#include <altq/altq_red.h>
102#ifdef ALTQ3_COMPAT
103#include <altq/altq_conf.h>
104#ifdef ALTQ_FLOWVALVE
105#include <altq/altq_flowvalve.h>
106#endif
107#endif
108
109/*
110 * ALTQ/RED (Random Early Detection) implementation using 32-bit
111 * fixed-point calculation.
112 *
113 * written by kjc using the ns code as a reference.
114 * you can learn more about red and ns from Sally's home page at
115 * http://www-nrg.ee.lbl.gov/floyd/
116 *
117 * most of the red parameter values are fixed in this implementation
118 * to prevent fixed-point overflow/underflow.
119 * if you change the parameters, watch out for overflow/underflow!
120 *
121 * the parameters used are recommended values by Sally.
122 * the corresponding ns config looks:
123 * q_weight=0.00195
124 * minthresh=5 maxthresh=15 queue-size=60
125 * linterm=30
126 * dropmech=drop-tail
127 * bytes=false (can't be handled by 32-bit fixed-point)
128 * doubleq=false dqthresh=false
129 * wait=true
130 */
131/*
132 * alternative red parameters for a slow link.
133 *
134 * assume the queue length becomes from zero to L and keeps L, it takes
135 * N packets for q_avg to reach 63% of L.
136 * when q_weight is 0.002, N is about 500 packets.
137 * for a slow link like dial-up, 500 packets takes more than 1 minute!
138 * when q_weight is 0.008, N is about 127 packets.
139 * when q_weight is 0.016, N is about 63 packets.
140 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
141 * are allowed for 0.016.
142 * see Sally's paper for more details.
143 */
144/* normal red parameters */
145#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
146 /* q_weight = 0.00195 */
147
148/* red parameters for a slow link */
149#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
150 /* q_weight = 0.0078125 */
151
152/* red parameters for a very slow link (e.g., dialup) */
153#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
154 /* q_weight = 0.015625 */
155
156/* fixed-point uses 12-bit decimal places */
157#define FP_SHIFT 12 /* fixed-point shift */
158
159/* red parameters for drop probability */
160#define INV_P_MAX 10 /* inverse of max drop probability */
161#define TH_MIN 5 /* min threshold */
162#define TH_MAX 15 /* max threshold */
163
164#define RED_LIMIT 60 /* default max queue lenght */
165#define RED_STATS /* collect statistics */
166
167/*
168 * our default policy for forced-drop is drop-tail.
169 * (in altq-1.1.2 or earlier, the default was random-drop.
170 * but it makes more sense to punish the cause of the surge.)
171 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
172 */
173
174#ifdef ALTQ3_COMPAT
175#ifdef ALTQ_FLOWVALVE
176/*
177 * flow-valve is an extention to protect red from unresponsive flows
178 * and to promote end-to-end congestion control.
179 * flow-valve observes the average drop rates of the flows that have
180 * experienced packet drops in the recent past.
181 * when the average drop rate exceeds the threshold, the flow is
182 * blocked by the flow-valve. the trapped flow should back off
183 * exponentially to escape from the flow-valve.
184 */
185#ifdef RED_RANDOM_DROP
186#error "random-drop can't be used with flow-valve!"
187#endif
188#endif /* ALTQ_FLOWVALVE */
189
190/* red_list keeps all red_queue_t's allocated. */
191static red_queue_t *red_list = NULL;
192
193#endif /* ALTQ3_COMPAT */
194
195/* default red parameter values */
196static int default_th_min = TH_MIN;
197static int default_th_max = TH_MAX;
198static int default_inv_pmax = INV_P_MAX;
199
200#ifdef ALTQ3_COMPAT
201/* internal function prototypes */
202static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
203static struct mbuf *red_dequeue(struct ifaltq *, int);
204static int red_request(struct ifaltq *, int, void *);
205static void red_purgeq(red_queue_t *);
206static int red_detach(red_queue_t *);
207#ifdef ALTQ_FLOWVALVE
208static __inline struct fve *flowlist_lookup(struct flowvalve *,
209 struct altq_pktattr *, struct timeval *);
210static __inline struct fve *flowlist_reclaim(struct flowvalve *,
211 struct altq_pktattr *);
212static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
213static __inline int fv_p2f(struct flowvalve *, int);
214#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
215static struct flowvalve *fv_alloc(struct red *);
216#endif
217static void fv_destroy(struct flowvalve *);
218static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
219 struct fve **);
220static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
221 struct fve *);
222#endif
223#endif /* ALTQ3_COMPAT */
224
225/*
226 * red support routines
227 */
228red_t *
229red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
230 int pkttime)
231{
232 red_t *rp;
233 int w, i;
234 int npkts_per_sec;
235
236 MALLOC(rp, red_t *, sizeof(red_t), M_DEVBUF, M_WAITOK);
237 if (rp == NULL)
238 return (NULL);
239 bzero(rp, sizeof(red_t));
240
241 rp->red_avg = 0;
242 rp->red_idle = 1;
243
244 if (weight == 0)
245 rp->red_weight = W_WEIGHT;
246 else
247 rp->red_weight = weight;
248 if (inv_pmax == 0)
249 rp->red_inv_pmax = default_inv_pmax;
250 else
251 rp->red_inv_pmax = inv_pmax;
252 if (th_min == 0)
253 rp->red_thmin = default_th_min;
254 else
255 rp->red_thmin = th_min;
256 if (th_max == 0)
257 rp->red_thmax = default_th_max;
258 else
259 rp->red_thmax = th_max;
260
261 rp->red_flags = flags;
262
263 if (pkttime == 0)
264 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
265 rp->red_pkttime = 800;
266 else
267 rp->red_pkttime = pkttime;
268
269 if (weight == 0) {
270 /* when the link is very slow, adjust red parameters */
271 npkts_per_sec = 1000000 / rp->red_pkttime;
272 if (npkts_per_sec < 50) {
273 /* up to about 400Kbps */
274 rp->red_weight = W_WEIGHT_2;
275 } else if (npkts_per_sec < 300) {
276 /* up to about 2.4Mbps */
277 rp->red_weight = W_WEIGHT_1;
278 }
279 }
280
281 /* calculate wshift. weight must be power of 2 */
282 w = rp->red_weight;
283 for (i = 0; w > 1; i++)
284 w = w >> 1;
285 rp->red_wshift = i;
286 w = 1 << rp->red_wshift;
287 if (w != rp->red_weight) {
288 printf("invalid weight value %d for red! use %d\n",
289 rp->red_weight, w);
290 rp->red_weight = w;
291 }
292
293 /*
294 * thmin_s and thmax_s are scaled versions of th_min and th_max
295 * to be compared with avg.
296 */
297 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
298 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
299
300 /*
301 * precompute probability denominator
302 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
303 */
304 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
305 * rp->red_inv_pmax) << FP_SHIFT;
306
307 /* allocate weight table */
308 rp->red_wtab = wtab_alloc(rp->red_weight);
309
310 microtime(&rp->red_last);
311 return (rp);
312}
313
314void
315red_destroy(red_t *rp)
316{
317#ifdef ALTQ3_COMPAT
318#ifdef ALTQ_FLOWVALVE
319 if (rp->red_flowvalve != NULL)
320 fv_destroy(rp->red_flowvalve);
321#endif
322#endif /* ALTQ3_COMPAT */
323 wtab_destroy(rp->red_wtab);
324 FREE(rp, M_DEVBUF);
325}
326
327void
328red_getstats(red_t *rp, struct redstats *sp)
329{
330 sp->q_avg = rp->red_avg >> rp->red_wshift;
331 sp->xmit_cnt = rp->red_stats.xmit_cnt;
332 sp->drop_cnt = rp->red_stats.drop_cnt;
333 sp->drop_forced = rp->red_stats.drop_forced;
334 sp->drop_unforced = rp->red_stats.drop_unforced;
335 sp->marked_packets = rp->red_stats.marked_packets;
336}
337
338int
339red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
340 struct altq_pktattr *pktattr)
341{
342 int avg, droptype;
343 int n;
344#ifdef ALTQ3_COMPAT
345#ifdef ALTQ_FLOWVALVE
346 struct fve *fve = NULL;
347
348 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
349 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
350 m_freem(m);
351 return (-1);
352 }
353#endif
354#endif /* ALTQ3_COMPAT */
355
356 avg = rp->red_avg;
357
358 /*
359 * if we were idle, we pretend that n packets arrived during
360 * the idle period.
361 */
362 if (rp->red_idle) {
363 struct timeval now;
364 int t;
365
366 rp->red_idle = 0;
367 microtime(&now);
368 t = (now.tv_sec - rp->red_last.tv_sec);
369 if (t > 60) {
370 /*
371 * being idle for more than 1 minute, set avg to zero.
372 * this prevents t from overflow.
373 */
374 avg = 0;
375 } else {
376 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
377 n = t / rp->red_pkttime - 1;
378
379 /* the following line does (avg = (1 - Wq)^n * avg) */
380 if (n > 0)
381 avg = (avg >> FP_SHIFT) *
382 pow_w(rp->red_wtab, n);
383 }
384 }
385
386 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
387 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
388 rp->red_avg = avg; /* save the new value */
389
390 /*
391 * red_count keeps a tally of arriving traffic that has not
392 * been dropped.
393 */
394 rp->red_count++;
395
396 /* see if we drop early */
397 droptype = DTYPE_NODROP;
398 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
399 if (avg >= rp->red_thmax_s) {
400 /* avg >= th_max: forced drop */
401 droptype = DTYPE_FORCED;
402 } else if (rp->red_old == 0) {
403 /* first exceeds th_min */
404 rp->red_count = 1;
405 rp->red_old = 1;
406 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
407 rp->red_probd, rp->red_count)) {
408 /* mark or drop by red */
409 if ((rp->red_flags & REDF_ECN) &&
410 mark_ecn(m, pktattr, rp->red_flags)) {
411 /* successfully marked. do not drop. */
412 rp->red_count = 0;
413#ifdef RED_STATS
414 rp->red_stats.marked_packets++;
415#endif
416 } else {
417 /* unforced drop by red */
418 droptype = DTYPE_EARLY;
419 }
420 }
421 } else {
422 /* avg < th_min */
423 rp->red_old = 0;
424 }
425
426 /*
427 * if the queue length hits the hard limit, it's a forced drop.
428 */
429 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
430 droptype = DTYPE_FORCED;
431
432#ifdef RED_RANDOM_DROP
433 /* if successful or forced drop, enqueue this packet. */
434 if (droptype != DTYPE_EARLY)
435 _addq(q, m);
436#else
437 /* if successful, enqueue this packet. */
438 if (droptype == DTYPE_NODROP)
439 _addq(q, m);
440#endif
441 if (droptype != DTYPE_NODROP) {
442 if (droptype == DTYPE_EARLY) {
443 /* drop the incoming packet */
444#ifdef RED_STATS
445 rp->red_stats.drop_unforced++;
446#endif
447 } else {
448 /* forced drop, select a victim packet in the queue. */
449#ifdef RED_RANDOM_DROP
450 m = _getq_random(q);
451#endif
452#ifdef RED_STATS
453 rp->red_stats.drop_forced++;
454#endif
455 }
456#ifdef RED_STATS
457 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
458#endif
459 rp->red_count = 0;
460#ifdef ALTQ3_COMPAT
461#ifdef ALTQ_FLOWVALVE
462 if (rp->red_flowvalve != NULL)
463 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
464#endif
465#endif /* ALTQ3_COMPAT */
466 m_freem(m);
467 return (-1);
468 }
469 /* successfully queued */
470#ifdef RED_STATS
471 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
472#endif
473 return (0);
474}
475
476/*
477 * early-drop probability is calculated as follows:
478 * prob = p_max * (avg - th_min) / (th_max - th_min)
479 * prob_a = prob / (2 - count*prob)
480 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
481 * here prob_a increases as successive undrop count increases.
482 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
483 * becomes 1 when (count >= (2 / prob))).
484 */
485int
486drop_early(int fp_len, int fp_probd, int count)
487{
488 int d; /* denominator of drop-probability */
489
490 d = fp_probd - count * fp_len;
491 if (d <= 0)
492 /* count exceeds the hard limit: drop or mark */
493 return (1);
494
495 /*
496 * now the range of d is [1..600] in fixed-point. (when
497 * th_max-th_min=10 and p_max=1/30)
498 * drop probability = (avg - TH_MIN) / d
499 */
500
501 if ((arc4random() % d) < fp_len) {
502 /* drop or mark */
503 return (1);
504 }
505 /* no drop/mark */
506 return (0);
507}
508
509/*
510 * try to mark CE bit to the packet.
511 * returns 1 if successfully marked, 0 otherwise.
512 */
513int
514mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
515{
516 struct mbuf *m0;
517 struct m_tag *t;
518 struct altq_tag *at;
519 void *hdr;
520 int af;
521
522 t = m_tag_find(m, PACKET_TAG_PF_QID, NULL);
523 if (t != NULL) {
524 at = (struct altq_tag *)(t + 1);
525 if (at == NULL)
526 return (0);
527 af = at->af;
528 hdr = at->hdr;
529#ifdef ALTQ3_COMPAT
530 } else if (pktattr != NULL) {
531 af = pktattr->pattr_af;
532 hdr = pktattr->pattr_hdr;
533#endif /* ALTQ3_COMPAT */
534 } else
535 return (0);
536
537 if (af != AF_INET && af != AF_INET6)
538 return (0);
539
540 /* verify that pattr_hdr is within the mbuf data */
541 for (m0 = m; m0 != NULL; m0 = m0->m_next)
542 if (((caddr_t)hdr >= m0->m_data) &&
543 ((caddr_t)hdr < m0->m_data + m0->m_len))
544 break;
545 if (m0 == NULL) {
546 /* ick, tag info is stale */
547 return (0);
548 }
549
550 switch (af) {
551 case AF_INET:
552 if (flags & REDF_ECN4) {
553 struct ip *ip = hdr;
554 u_int8_t otos;
555 int sum;
556
557 if (ip->ip_v != 4)
558 return (0); /* version mismatch! */
559
560 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
561 return (0); /* not-ECT */
562 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
563 return (1); /* already marked */
564
565 /*
566 * ecn-capable but not marked,
567 * mark CE and update checksum
568 */
569 otos = ip->ip_tos;
570 ip->ip_tos |= IPTOS_ECN_CE;
571 /*
572 * update checksum (from RFC1624)
573 * HC' = ~(~HC + ~m + m')
574 */
575 sum = ~ntohs(ip->ip_sum) & 0xffff;
576 sum += (~otos & 0xffff) + ip->ip_tos;
577 sum = (sum >> 16) + (sum & 0xffff);
578 sum += (sum >> 16); /* add carry */
579 ip->ip_sum = htons(~sum & 0xffff);
580 return (1);
581 }
582 break;
583#ifdef INET6
584 case AF_INET6:
585 if (flags & REDF_ECN6) {
586 struct ip6_hdr *ip6 = hdr;
587 u_int32_t flowlabel;
588
589 flowlabel = ntohl(ip6->ip6_flow);
590 if ((flowlabel >> 28) != 6)
591 return (0); /* version mismatch! */
592 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
593 (IPTOS_ECN_NOTECT << 20))
594 return (0); /* not-ECT */
595 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
596 (IPTOS_ECN_CE << 20))
597 return (1); /* already marked */
598 /*
599 * ecn-capable but not marked, mark CE
600 */
601 flowlabel |= (IPTOS_ECN_CE << 20);
602 ip6->ip6_flow = htonl(flowlabel);
603 return (1);
604 }
605 break;
606#endif /* INET6 */
607 }
608
609 /* not marked */
610 return (0);
611}
612
613struct mbuf *
614red_getq(rp, q)
615 red_t *rp;
616 class_queue_t *q;
617{
618 struct mbuf *m;
619
620 if ((m = _getq(q)) == NULL) {
621 if (rp->red_idle == 0) {
622 rp->red_idle = 1;
623 microtime(&rp->red_last);
624 }
625 return NULL;
626 }
627
628 rp->red_idle = 0;
629 return (m);
630}
631
632/*
633 * helper routine to calibrate avg during idle.
634 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
635 * here Wq = 1/weight and the code assumes Wq is close to zero.
636 *
637 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
638 */
639static struct wtab *wtab_list = NULL; /* pointer to wtab list */
640
641struct wtab *
642wtab_alloc(int weight)
643{
644 struct wtab *w;
645 int i;
646
647 for (w = wtab_list; w != NULL; w = w->w_next)
648 if (w->w_weight == weight) {
649 w->w_refcount++;
650 return (w);
651 }
652
653 MALLOC(w, struct wtab *, sizeof(struct wtab), M_DEVBUF, M_WAITOK);
654 if (w == NULL)
655 panic("wtab_alloc: malloc failed!");
656 bzero(w, sizeof(struct wtab));
657 w->w_weight = weight;
658 w->w_refcount = 1;
659 w->w_next = wtab_list;
660 wtab_list = w;
661
662 /* initialize the weight table */
663 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
664 for (i = 1; i < 32; i++) {
665 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
666 if (w->w_tab[i] == 0 && w->w_param_max == 0)
667 w->w_param_max = 1 << i;
668 }
669
670 return (w);
671}
672
673int
674wtab_destroy(struct wtab *w)
675{
676 struct wtab *prev;
677
678 if (--w->w_refcount > 0)
679 return (0);
680
681 if (wtab_list == w)
682 wtab_list = w->w_next;
683 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
684 if (prev->w_next == w) {
685 prev->w_next = w->w_next;
686 break;
687 }
688
689 FREE(w, M_DEVBUF);
690 return (0);
691}
692
693int32_t
694pow_w(struct wtab *w, int n)
695{
696 int i, bit;
697 int32_t val;
698
699 if (n >= w->w_param_max)
700 return (0);
701
702 val = 1 << FP_SHIFT;
703 if (n <= 0)
704 return (val);
705
706 bit = 1;
707 i = 0;
708 while (n) {
709 if (n & bit) {
710 val = (val * w->w_tab[i]) >> FP_SHIFT;
711 n &= ~bit;
712 }
713 i++;
714 bit <<= 1;
715 }
716 return (val);
717}
718
719#ifdef ALTQ3_COMPAT
720/*
721 * red device interface
722 */
723altqdev_decl(red);
724
725int
726redopen(dev, flag, fmt, p)
727 dev_t dev;
728 int flag, fmt;
729#if (__FreeBSD_version > 500000)
730 struct thread *p;
731#else
732 struct proc *p;
733#endif
734{
735 /* everything will be done when the queueing scheme is attached. */
736 return 0;
737}
738
739int
740redclose(dev, flag, fmt, p)
741 dev_t dev;
742 int flag, fmt;
743#if (__FreeBSD_version > 500000)
744 struct thread *p;
745#else
746 struct proc *p;
747#endif
748{
749 red_queue_t *rqp;
750 int err, error = 0;
751
752 while ((rqp = red_list) != NULL) {
753 /* destroy all */
754 err = red_detach(rqp);
755 if (err != 0 && error == 0)
756 error = err;
757 }
758
759 return error;
760}
761
762int
763redioctl(dev, cmd, addr, flag, p)
764 dev_t dev;
765 ioctlcmd_t cmd;
766 caddr_t addr;
767 int flag;
768#if (__FreeBSD_version > 500000)
769 struct thread *p;
770#else
771 struct proc *p;
772#endif
773{
774 red_queue_t *rqp;
775 struct red_interface *ifacep;
776 struct ifnet *ifp;
777 int error = 0;
778
779 /* check super-user privilege */
780 switch (cmd) {
781 case RED_GETSTATS:
782 break;
783 default:
784#if (__FreeBSD_version > 700000)
785 if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
786#elsif (__FreeBSD_version > 400000)
787 if ((error = suser(p)) != 0)
788#else
789 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
790#endif
791 return (error);
792 break;
793 }
794
795 switch (cmd) {
796
797 case RED_ENABLE:
798 ifacep = (struct red_interface *)addr;
799 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
800 error = EBADF;
801 break;
802 }
803 error = altq_enable(rqp->rq_ifq);
804 break;
805
806 case RED_DISABLE:
807 ifacep = (struct red_interface *)addr;
808 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
809 error = EBADF;
810 break;
811 }
812 error = altq_disable(rqp->rq_ifq);
813 break;
814
815 case RED_IF_ATTACH:
816 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
817 if (ifp == NULL) {
818 error = ENXIO;
819 break;
820 }
821
822 /* allocate and initialize red_queue_t */
823 MALLOC(rqp, red_queue_t *, sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
824 if (rqp == NULL) {
825 error = ENOMEM;
826 break;
827 }
828 bzero(rqp, sizeof(red_queue_t));
829
830 MALLOC(rqp->rq_q, class_queue_t *, sizeof(class_queue_t),
831 M_DEVBUF, M_WAITOK);
832 if (rqp->rq_q == NULL) {
833 FREE(rqp, M_DEVBUF);
834 error = ENOMEM;
835 break;
836 }
837 bzero(rqp->rq_q, sizeof(class_queue_t));
838
839 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
840 if (rqp->rq_red == NULL) {
841 FREE(rqp->rq_q, M_DEVBUF);
842 FREE(rqp, M_DEVBUF);
843 error = ENOMEM;
844 break;
845 }
846
847 rqp->rq_ifq = &ifp->if_snd;
848 qtail(rqp->rq_q) = NULL;
849 qlen(rqp->rq_q) = 0;
850 qlimit(rqp->rq_q) = RED_LIMIT;
851 qtype(rqp->rq_q) = Q_RED;
852
853 /*
854 * set RED to this ifnet structure.
855 */
856 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
857 red_enqueue, red_dequeue, red_request,
858 NULL, NULL);
859 if (error) {
860 red_destroy(rqp->rq_red);
861 FREE(rqp->rq_q, M_DEVBUF);
862 FREE(rqp, M_DEVBUF);
863 break;
864 }
865
866 /* add this state to the red list */
867 rqp->rq_next = red_list;
868 red_list = rqp;
869 break;
870
871 case RED_IF_DETACH:
872 ifacep = (struct red_interface *)addr;
873 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
874 error = EBADF;
875 break;
876 }
877 error = red_detach(rqp);
878 break;
879
880 case RED_GETSTATS:
881 do {
882 struct red_stats *q_stats;
883 red_t *rp;
884
885 q_stats = (struct red_stats *)addr;
886 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
887 ALTQT_RED)) == NULL) {
888 error = EBADF;
889 break;
890 }
891
892 q_stats->q_len = qlen(rqp->rq_q);
893 q_stats->q_limit = qlimit(rqp->rq_q);
894
895 rp = rqp->rq_red;
896 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
897 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
898 q_stats->drop_cnt = rp->red_stats.drop_cnt;
899 q_stats->drop_forced = rp->red_stats.drop_forced;
900 q_stats->drop_unforced = rp->red_stats.drop_unforced;
901 q_stats->marked_packets = rp->red_stats.marked_packets;
902
903 q_stats->weight = rp->red_weight;
904 q_stats->inv_pmax = rp->red_inv_pmax;
905 q_stats->th_min = rp->red_thmin;
906 q_stats->th_max = rp->red_thmax;
907
908#ifdef ALTQ_FLOWVALVE
909 if (rp->red_flowvalve != NULL) {
910 struct flowvalve *fv = rp->red_flowvalve;
911 q_stats->fv_flows = fv->fv_flows;
912 q_stats->fv_pass = fv->fv_stats.pass;
913 q_stats->fv_predrop = fv->fv_stats.predrop;
914 q_stats->fv_alloc = fv->fv_stats.alloc;
915 q_stats->fv_escape = fv->fv_stats.escape;
916 } else {
917#endif /* ALTQ_FLOWVALVE */
918 q_stats->fv_flows = 0;
919 q_stats->fv_pass = 0;
920 q_stats->fv_predrop = 0;
921 q_stats->fv_alloc = 0;
922 q_stats->fv_escape = 0;
923#ifdef ALTQ_FLOWVALVE
924 }
925#endif /* ALTQ_FLOWVALVE */
926 } while (/*CONSTCOND*/ 0);
927 break;
928
929 case RED_CONFIG:
930 do {
931 struct red_conf *fc;
932 red_t *new;
933 int s, limit;
934
935 fc = (struct red_conf *)addr;
936 if ((rqp = altq_lookup(fc->iface.red_ifname,
937 ALTQT_RED)) == NULL) {
938 error = EBADF;
939 break;
940 }
941 new = red_alloc(fc->red_weight,
942 fc->red_inv_pmax,
943 fc->red_thmin,
944 fc->red_thmax,
945 fc->red_flags,
946 fc->red_pkttime);
947 if (new == NULL) {
948 error = ENOMEM;
949 break;
950 }
951
952#ifdef __NetBSD__
953 s = splnet();
954#else
955 s = splimp();
956#endif
957 red_purgeq(rqp);
958 limit = fc->red_limit;
959 if (limit < fc->red_thmax)
960 limit = fc->red_thmax;
961 qlimit(rqp->rq_q) = limit;
962 fc->red_limit = limit; /* write back the new value */
963
964 red_destroy(rqp->rq_red);
965 rqp->rq_red = new;
966
967 splx(s);
968
969 /* write back new values */
970 fc->red_limit = limit;
971 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
972 fc->red_thmin = rqp->rq_red->red_thmin;
973 fc->red_thmax = rqp->rq_red->red_thmax;
974
975 } while (/*CONSTCOND*/ 0);
976 break;
977
978 case RED_SETDEFAULTS:
979 do {
980 struct redparams *rp;
981
982 rp = (struct redparams *)addr;
983
984 default_th_min = rp->th_min;
985 default_th_max = rp->th_max;
986 default_inv_pmax = rp->inv_pmax;
987 } while (/*CONSTCOND*/ 0);
988 break;
989
990 default:
991 error = EINVAL;
992 break;
993 }
994 return error;
995}
996
997static int
998red_detach(rqp)
999 red_queue_t *rqp;
1000{
1001 red_queue_t *tmp;
1002 int error = 0;
1003
1004 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1005 altq_disable(rqp->rq_ifq);
1006
1007 if ((error = altq_detach(rqp->rq_ifq)))
1008 return (error);
1009
1010 if (red_list == rqp)
1011 red_list = rqp->rq_next;
1012 else {
1013 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1014 if (tmp->rq_next == rqp) {
1015 tmp->rq_next = rqp->rq_next;
1016 break;
1017 }
1018 if (tmp == NULL)
1019 printf("red_detach: no state found in red_list!\n");
1020 }
1021
1022 red_destroy(rqp->rq_red);
1023 FREE(rqp->rq_q, M_DEVBUF);
1024 FREE(rqp, M_DEVBUF);
1025 return (error);
1026}
1027
1028/*
1029 * enqueue routine:
1030 *
1031 * returns: 0 when successfully queued.
1032 * ENOBUFS when drop occurs.
1033 */
1034static int
1035red_enqueue(ifq, m, pktattr)
1036 struct ifaltq *ifq;
1037 struct mbuf *m;
1038 struct altq_pktattr *pktattr;
1039{
1040 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1041
1042 IFQ_LOCK_ASSERT(ifq);
1043
1044 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1045 return ENOBUFS;
1046 ifq->ifq_len++;
1047 return 0;
1048}
1049
1050/*
1051 * dequeue routine:
1052 * must be called in splimp.
1053 *
1054 * returns: mbuf dequeued.
1055 * NULL when no packet is available in the queue.
1056 */
1057
1058static struct mbuf *
1059red_dequeue(ifq, op)
1060 struct ifaltq *ifq;
1061 int op;
1062{
1063 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1064 struct mbuf *m;
1065
1066 IFQ_LOCK_ASSERT(ifq);
1067
1068 if (op == ALTDQ_POLL)
1069 return qhead(rqp->rq_q);
1070
1071 /* op == ALTDQ_REMOVE */
1072 m = red_getq(rqp->rq_red, rqp->rq_q);
1073 if (m != NULL)
1074 ifq->ifq_len--;
1075 return (m);
1076}
1077
1078static int
1079red_request(ifq, req, arg)
1080 struct ifaltq *ifq;
1081 int req;
1082 void *arg;
1083{
1084 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1085
1086 IFQ_LOCK_ASSERT(ifq);
1087
1088 switch (req) {
1089 case ALTRQ_PURGE:
1090 red_purgeq(rqp);
1091 break;
1092 }
1093 return (0);
1094}
1095
1096static void
1097red_purgeq(rqp)
1098 red_queue_t *rqp;
1099{
1100 _flushq(rqp->rq_q);
1101 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1102 rqp->rq_ifq->ifq_len = 0;
1103}
1104
1105#ifdef ALTQ_FLOWVALVE
1106
1107#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1108#define FV_PSCALE(x) ((x) << FV_PSHIFT)
1109#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1110#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1111#define FV_FSCALE(x) ((x) << FV_FSHIFT)
1112#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1113
1114#define FV_TIMER (3 * hz) /* timer value for garbage collector */
1115#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1116
1117#define FV_N 10 /* update fve_f every FV_N packets */
1118
1119#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1120#define FV_TTHRESH 3 /* time threshold to delete fve */
1121#define FV_ALPHA 5 /* extra packet count */
1122
1123#define FV_STATS
1124
1125#if (__FreeBSD_version > 300000)
1126#define FV_TIMESTAMP(tp) getmicrotime(tp)
1127#else
1128#define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1129#endif
1130
1131/*
1132 * Brtt table: 127 entry table to convert drop rate (p) to
1133 * the corresponding bandwidth fraction (f)
1134 * the following equation is implemented to use scaled values,
1135 * fve_p and fve_f, in the fixed point format.
1136 *
1137 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1138 * f = Brtt(p) / (max_th + alpha)
1139 */
1140#define BRTT_SIZE 128
1141#define BRTT_SHIFT 12
1142#define BRTT_MASK 0x0007f000
1143#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1144
1145const int brtt_tab[BRTT_SIZE] = {
1146 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1147 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1148 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1149 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1150 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1151 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1152 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1153 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1154 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1155 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1156 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1157 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1158 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1159 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1160 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1161 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1162};
1163
1164static __inline struct fve *
1165flowlist_lookup(fv, pktattr, now)
1166 struct flowvalve *fv;
1167 struct altq_pktattr *pktattr;
1168 struct timeval *now;
1169{
1170 struct fve *fve;
1171 int flows;
1172 struct ip *ip;
1173#ifdef INET6
1174 struct ip6_hdr *ip6;
1175#endif
1176 struct timeval tthresh;
1177
1178 if (pktattr == NULL)
1179 return (NULL);
1180
1181 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1182 flows = 0;
1183 /*
1184 * search the flow list
1185 */
1186 switch (pktattr->pattr_af) {
1187 case AF_INET:
1188 ip = (struct ip *)pktattr->pattr_hdr;
1189 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1190 if (fve->fve_lastdrop.tv_sec == 0)
1191 break;
1192 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1193 fve->fve_lastdrop.tv_sec = 0;
1194 break;
1195 }
1196 if (fve->fve_flow.flow_af == AF_INET &&
1197 fve->fve_flow.flow_ip.ip_src.s_addr ==
1198 ip->ip_src.s_addr &&
1199 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1200 ip->ip_dst.s_addr)
1201 return (fve);
1202 flows++;
1203 }
1204 break;
1205#ifdef INET6
1206 case AF_INET6:
1207 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1208 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1209 if (fve->fve_lastdrop.tv_sec == 0)
1210 break;
1211 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1212 fve->fve_lastdrop.tv_sec = 0;
1213 break;
1214 }
1215 if (fve->fve_flow.flow_af == AF_INET6 &&
1216 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1217 &ip6->ip6_src) &&
1218 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1219 &ip6->ip6_dst))
1220 return (fve);
1221 flows++;
1222 }
1223 break;
1224#endif /* INET6 */
1225
1226 default:
1227 /* unknown protocol. no drop. */
1228 return (NULL);
1229 }
1230 fv->fv_flows = flows; /* save the number of active fve's */
1231 return (NULL);
1232}
1233
1234static __inline struct fve *
1235flowlist_reclaim(fv, pktattr)
1236 struct flowvalve *fv;
1237 struct altq_pktattr *pktattr;
1238{
1239 struct fve *fve;
1240 struct ip *ip;
1241#ifdef INET6
1242 struct ip6_hdr *ip6;
1243#endif
1244
1245 /*
1246 * get an entry from the tail of the LRU list.
1247 */
1248 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1249
1250 switch (pktattr->pattr_af) {
1251 case AF_INET:
1252 ip = (struct ip *)pktattr->pattr_hdr;
1253 fve->fve_flow.flow_af = AF_INET;
1254 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1255 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1256 break;
1257#ifdef INET6
1258 case AF_INET6:
1259 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1260 fve->fve_flow.flow_af = AF_INET6;
1261 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1262 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1263 break;
1264#endif
1265 }
1266
1267 fve->fve_state = Green;
1268 fve->fve_p = 0.0;
1269 fve->fve_f = 0.0;
1270 fve->fve_ifseq = fv->fv_ifseq - 1;
1271 fve->fve_count = 0;
1272
1273 fv->fv_flows++;
1274#ifdef FV_STATS
1275 fv->fv_stats.alloc++;
1276#endif
1277 return (fve);
1278}
1279
1280static __inline void
1281flowlist_move_to_head(fv, fve)
1282 struct flowvalve *fv;
1283 struct fve *fve;
1284{
1285 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1286 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1287 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1288 }
1289}
1290
1291#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
1292/*
1293 * allocate flowvalve structure
1294 */
1295static struct flowvalve *
1296fv_alloc(rp)
1297 struct red *rp;
1298{
1299 struct flowvalve *fv;
1300 struct fve *fve;
1301 int i, num;
1302
1303 num = FV_FLOWLISTSIZE;
1304 MALLOC(fv, struct flowvalve *, sizeof(struct flowvalve),
1305 M_DEVBUF, M_WAITOK);
1306 if (fv == NULL)
1307 return (NULL);
1308 bzero(fv, sizeof(struct flowvalve));
1309
1310 MALLOC(fv->fv_fves, struct fve *, sizeof(struct fve) * num,
1311 M_DEVBUF, M_WAITOK);
1312 if (fv->fv_fves == NULL) {
1313 FREE(fv, M_DEVBUF);
1314 return (NULL);
1315 }
1316 bzero(fv->fv_fves, sizeof(struct fve) * num);
1317
1318 fv->fv_flows = 0;
1319 TAILQ_INIT(&fv->fv_flowlist);
1320 for (i = 0; i < num; i++) {
1321 fve = &fv->fv_fves[i];
1322 fve->fve_lastdrop.tv_sec = 0;
1323 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1324 }
1325
1326 /* initialize drop rate threshold in scaled fixed-point */
1327 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1328
1329 /* initialize drop rate to fraction table */
1330 MALLOC(fv->fv_p2ftab, int *, sizeof(int) * BRTT_SIZE,
1331 M_DEVBUF, M_WAITOK);
1332 if (fv->fv_p2ftab == NULL) {
1333 FREE(fv->fv_fves, M_DEVBUF);
1334 FREE(fv, M_DEVBUF);
1335 return (NULL);
1336 }
1337 /*
1338 * create the p2f table.
1339 * (shift is used to keep the precision)
1340 */
1341 for (i = 1; i < BRTT_SIZE; i++) {
1342 int f;
1343
1344 f = brtt_tab[i] << 8;
1345 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1346 }
1347
1348 return (fv);
1349}
1350#endif
1351
1352static void fv_destroy(fv)
1353 struct flowvalve *fv;
1354{
1355 FREE(fv->fv_p2ftab, M_DEVBUF);
1356 FREE(fv->fv_fves, M_DEVBUF);
1357 FREE(fv, M_DEVBUF);
1358}
1359
1360static __inline int
1361fv_p2f(fv, p)
1362 struct flowvalve *fv;
1363 int p;
1364{
1365 int val, f;
1366
1367 if (p >= BRTT_PMAX)
1368 f = fv->fv_p2ftab[BRTT_SIZE-1];
1369 else if ((val = (p & BRTT_MASK)))
1370 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1371 else
1372 f = fv->fv_p2ftab[1];
1373 return (f);
1374}
1375
1376/*
1377 * check if an arriving packet should be pre-dropped.
1378 * called from red_addq() when a packet arrives.
1379 * returns 1 when the packet should be pre-dropped.
1380 * should be called in splimp.
1381 */
1382static int
1383fv_checkflow(fv, pktattr, fcache)
1384 struct flowvalve *fv;
1385 struct altq_pktattr *pktattr;
1386 struct fve **fcache;
1387{
1388 struct fve *fve;
1389 struct timeval now;
1390
1391 fv->fv_ifseq++;
1392 FV_TIMESTAMP(&now);
1393
1394 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1395 /* no matching entry in the flowlist */
1396 return (0);
1397
1398 *fcache = fve;
1399
1400 /* update fraction f for every FV_N packets */
1401 if (++fve->fve_count == FV_N) {
1402 /*
1403 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1404 */
1405 fve->fve_f =
1406 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1407 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1408 fve->fve_ifseq = fv->fv_ifseq;
1409 fve->fve_count = 0;
1410 }
1411
1412 /*
1413 * overpumping test
1414 */
1415 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1416 int fthresh;
1417
1418 /* calculate a threshold */
1419 fthresh = fv_p2f(fv, fve->fve_p);
1420 if (fve->fve_f > fthresh)
1421 fve->fve_state = Red;
1422 }
1423
1424 if (fve->fve_state == Red) {
1425 /*
1426 * backoff test
1427 */
1428 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1429 /* no drop for at least FV_BACKOFFTHRESH sec */
1430 fve->fve_p = 0;
1431 fve->fve_state = Green;
1432#ifdef FV_STATS
1433 fv->fv_stats.escape++;
1434#endif
1435 } else {
1436 /* block this flow */
1437 flowlist_move_to_head(fv, fve);
1438 fve->fve_lastdrop = now;
1439#ifdef FV_STATS
1440 fv->fv_stats.predrop++;
1441#endif
1442 return (1);
1443 }
1444 }
1445
1446 /*
1447 * p = (1 - Wp) * p
1448 */
1449 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1450 if (fve->fve_p < 0)
1451 fve->fve_p = 0;
1452#ifdef FV_STATS
1453 fv->fv_stats.pass++;
1454#endif
1455 return (0);
1456}
1457
1458/*
1459 * called from red_addq when a packet is dropped by red.
1460 * should be called in splimp.
1461 */
1462static void fv_dropbyred(fv, pktattr, fcache)
1463 struct flowvalve *fv;
1464 struct altq_pktattr *pktattr;
1465 struct fve *fcache;
1466{
1467 struct fve *fve;
1468 struct timeval now;
1469
1470 if (pktattr == NULL)
1471 return;
1472 FV_TIMESTAMP(&now);
1473
1474 if (fcache != NULL)
1475 /* the fve of this packet is already cached */
1476 fve = fcache;
1477 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1478 fve = flowlist_reclaim(fv, pktattr);
1479
1480 flowlist_move_to_head(fv, fve);
1481
1482 /*
1483 * update p: the following line cancels the update
1484 * in fv_checkflow() and calculate
1485 * p = Wp + (1 - Wp) * p
1486 */
1487 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1488
1489 fve->fve_lastdrop = now;
1490}
1491
1492#endif /* ALTQ_FLOWVALVE */
1493
1494#ifdef KLD_MODULE
1495
1496static struct altqsw red_sw =
1497 {"red", redopen, redclose, redioctl};
1498
1499ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1500MODULE_VERSION(altq_red, 1);
1501
1502#endif /* KLD_MODULE */
1503#endif /* ALTQ3_COMPAT */
1504
1505#endif /* ALTQ_RED */