1/*	$NetBSD: altq_red.c,v 1.28 2008/06/18 09:06:27 yamt Exp $	*/
2/*	$KAME: altq_red.c,v 1.20 2005/04/13 03:44:25 suz Exp $	*/
3
4/*
5 * Copyright (C) 1997-2003
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30/*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the Computer Systems
45 *	Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 *    to endorse or promote products derived from this software without
48 *    specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63#include <sys/cdefs.h>
64__KERNEL_RCSID(0, "$NetBSD: altq_red.c,v 1.28 2008/06/18 09:06:27 yamt Exp $");
65
66#ifdef _KERNEL_OPT
67#include "opt_altq.h"
68#include "opt_inet.h"
69#include "pf.h"
70#endif
71
72#ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
73
74#include <sys/param.h>
75#include <sys/malloc.h>
76#include <sys/mbuf.h>
77#include <sys/socket.h>
78#include <sys/systm.h>
79#include <sys/errno.h>
80#include <sys/kauth.h>
81#if 1 /* ALTQ3_COMPAT */
82#include <sys/sockio.h>
83#include <sys/proc.h>
84#include <sys/kernel.h>
85#ifdef ALTQ_FLOWVALVE
86#include <sys/queue.h>
87#include <sys/time.h>
88#endif
89#endif /* ALTQ3_COMPAT */
90#include <sys/cprng.h>
91
92#include <net/if.h>
93
94#include <netinet/in.h>
95#include <netinet/in_systm.h>
96#include <netinet/ip.h>
97#ifdef INET6
98#include <netinet/ip6.h>
99#endif
100
101#if NPF > 0
102#include <net/pfvar.h>
103#endif
104#include <altq/altq.h>
105#include <altq/altq_red.h>
106#ifdef ALTQ3_COMPAT
107#include <altq/altq_conf.h>
108#ifdef ALTQ_FLOWVALVE
109#include <altq/altq_flowvalve.h>
110#endif
111#endif
112
113/*
114 * ALTQ/RED (Random Early Detection) implementation using 32-bit
115 * fixed-point calculation.
116 *
117 * written by kjc using the ns code as a reference.
118 * you can learn more about red and ns from Sally's home page at
119 * http://www-nrg.ee.lbl.gov/floyd/
120 *
121 * most of the red parameter values are fixed in this implementation
122 * to prevent fixed-point overflow/underflow.
123 * if you change the parameters, watch out for overflow/underflow!
124 *
125 * the parameters used are recommended values by Sally.
126 * the corresponding ns config looks:
127 *	q_weight=0.00195
128 *	minthresh=5 maxthresh=15 queue-size=60
129 *	linterm=30
130 *	dropmech=drop-tail
131 *	bytes=false (can't be handled by 32-bit fixed-point)
132 *	doubleq=false dqthresh=false
133 *	wait=true
134 */
135/*
136 * alternative red parameters for a slow link.
137 *
138 * assume the queue length becomes from zero to L and keeps L, it takes
139 * N packets for q_avg to reach 63% of L.
140 * when q_weight is 0.002, N is about 500 packets.
141 * for a slow link like dial-up, 500 packets takes more than 1 minute!
142 * when q_weight is 0.008, N is about 127 packets.
143 * when q_weight is 0.016, N is about 63 packets.
144 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
145 * are allowed for 0.016.
146 * see Sally's paper for more details.
147 */
148/* normal red parameters */
149#define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
150				/* q_weight = 0.00195 */
151
152/* red parameters for a slow link */
153#define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
154				/* q_weight = 0.0078125 */
155
156/* red parameters for a very slow link (e.g., dialup) */
157#define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
158				/* q_weight = 0.015625 */
159
160/* fixed-point uses 12-bit decimal places */
161#define	FP_SHIFT	12	/* fixed-point shift */
162
163/* red parameters for drop probability */
164#define	INV_P_MAX	10	/* inverse of max drop probability */
165#define	TH_MIN		5	/* min threshold */
166#define	TH_MAX		15	/* max threshold */
167
168#define	RED_LIMIT	60	/* default max queue lenght */
169#define	RED_STATS		/* collect statistics */
170
171/*
172 * our default policy for forced-drop is drop-tail.
173 * (in altq-1.1.2 or earlier, the default was random-drop.
174 * but it makes more sense to punish the cause of the surge.)
175 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
176 */
177
178#ifdef ALTQ3_COMPAT
179#ifdef ALTQ_FLOWVALVE
180/*
181 * flow-valve is an extention to protect red from unresponsive flows
182 * and to promote end-to-end congestion control.
183 * flow-valve observes the average drop rates of the flows that have
184 * experienced packet drops in the recent past.
185 * when the average drop rate exceeds the threshold, the flow is
186 * blocked by the flow-valve.  the trapped flow should back off
187 * exponentially to escape from the flow-valve.
188 */
189#ifdef RED_RANDOM_DROP
190#error "random-drop can't be used with flow-valve!"
191#endif
192#endif /* ALTQ_FLOWVALVE */
193
194/* red_list keeps all red_queue_t's allocated. */
195static red_queue_t *red_list = NULL;
196
197#endif /* ALTQ3_COMPAT */
198
199/* default red parameter values */
200static int default_th_min = TH_MIN;
201static int default_th_max = TH_MAX;
202static int default_inv_pmax = INV_P_MAX;
203
204#ifdef ALTQ3_COMPAT
205/* internal function prototypes */
206static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
207static struct mbuf *red_dequeue(struct ifaltq *, int);
208static int red_request(struct ifaltq *, int, void *);
209static void red_purgeq(red_queue_t *);
210static int red_detach(red_queue_t *);
211#ifdef ALTQ_FLOWVALVE
212static inline struct fve *flowlist_lookup(struct flowvalve *,
213			 struct altq_pktattr *, struct timeval *);
214static inline struct fve *flowlist_reclaim(struct flowvalve *,
215					     struct altq_pktattr *);
216static inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
217static inline int fv_p2f(struct flowvalve *, int);
218static struct flowvalve *fv_alloc(struct red *);
219static void fv_destroy(struct flowvalve *);
220static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
221			struct fve **);
222static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
223			 struct fve *);
224#endif
225#endif /* ALTQ3_COMPAT */
226
227/*
228 * red support routines
229 */
230red_t *
231red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
232   int pkttime)
233{
234	red_t	*rp;
235	int	 w, i;
236	int	 npkts_per_sec;
237
238	rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK|M_ZERO);
239	if (rp == NULL)
240		return (NULL);
241
242	rp->red_avg = 0;
243	rp->red_idle = 1;
244
245	if (weight == 0)
246		rp->red_weight = W_WEIGHT;
247	else
248		rp->red_weight = weight;
249	if (inv_pmax == 0)
250		rp->red_inv_pmax = default_inv_pmax;
251	else
252		rp->red_inv_pmax = inv_pmax;
253	if (th_min == 0)
254		rp->red_thmin = default_th_min;
255	else
256		rp->red_thmin = th_min;
257	if (th_max == 0)
258		rp->red_thmax = default_th_max;
259	else
260		rp->red_thmax = th_max;
261
262	rp->red_flags = flags;
263
264	if (pkttime == 0)
265		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
266		rp->red_pkttime = 800;
267	else
268		rp->red_pkttime = pkttime;
269
270	if (weight == 0) {
271		/* when the link is very slow, adjust red parameters */
272		npkts_per_sec = 1000000 / rp->red_pkttime;
273		if (npkts_per_sec < 50) {
274			/* up to about 400Kbps */
275			rp->red_weight = W_WEIGHT_2;
276		} else if (npkts_per_sec < 300) {
277			/* up to about 2.4Mbps */
278			rp->red_weight = W_WEIGHT_1;
279		}
280	}
281
282	/* calculate wshift.  weight must be power of 2 */
283	w = rp->red_weight;
284	for (i = 0; w > 1; i++)
285		w = w >> 1;
286	rp->red_wshift = i;
287	w = 1 << rp->red_wshift;
288	if (w != rp->red_weight) {
289		printf("invalid weight value %d for red! use %d\n",
290		       rp->red_weight, w);
291		rp->red_weight = w;
292	}
293
294	/*
295	 * thmin_s and thmax_s are scaled versions of th_min and th_max
296	 * to be compared with avg.
297	 */
298	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
299	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
300
301	/*
302	 * precompute probability denominator
303	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
304	 */
305	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
306			 * rp->red_inv_pmax) << FP_SHIFT;
307
308	/* allocate weight table */
309	rp->red_wtab = wtab_alloc(rp->red_weight);
310
311	microtime(&rp->red_last);
312#ifdef ALTQ3_COMPAT
313#ifdef ALTQ_FLOWVALVE
314	if (flags & REDF_FLOWVALVE)
315		rp->red_flowvalve = fv_alloc(rp);
316	/* if fv_alloc failes, flowvalve is just disabled */
317#endif
318#endif /* ALTQ3_COMPAT */
319	return (rp);
320}
321
322void
323red_destroy(red_t *rp)
324{
325#ifdef ALTQ3_COMPAT
326#ifdef ALTQ_FLOWVALVE
327	if (rp->red_flowvalve != NULL)
328		fv_destroy(rp->red_flowvalve);
329#endif
330#endif /* ALTQ3_COMPAT */
331	wtab_destroy(rp->red_wtab);
332	free(rp, M_DEVBUF);
333}
334
335void
336red_getstats(red_t *rp, struct redstats *sp)
337{
338	sp->q_avg		= rp->red_avg >> rp->red_wshift;
339	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
340	sp->drop_cnt		= rp->red_stats.drop_cnt;
341	sp->drop_forced		= rp->red_stats.drop_forced;
342	sp->drop_unforced	= rp->red_stats.drop_unforced;
343	sp->marked_packets	= rp->red_stats.marked_packets;
344}
345
346int
347red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
348    struct altq_pktattr *pktattr)
349{
350	int avg, droptype;
351	int n;
352#ifdef ALTQ3_COMPAT
353#ifdef ALTQ_FLOWVALVE
354	struct fve *fve = NULL;
355
356	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
357		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
358			m_freem(m);
359			return (-1);
360		}
361#endif
362#endif /* ALTQ3_COMPAT */
363
364	avg = rp->red_avg;
365
366	/*
367	 * if we were idle, we pretend that n packets arrived during
368	 * the idle period.
369	 */
370	if (rp->red_idle) {
371		struct timeval now;
372		int t;
373
374		rp->red_idle = 0;
375		microtime(&now);
376		t = (now.tv_sec - rp->red_last.tv_sec);
377		if (t > 60) {
378			/*
379			 * being idle for more than 1 minute, set avg to zero.
380			 * this prevents t from overflow.
381			 */
382			avg = 0;
383		} else {
384			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
385			n = t / rp->red_pkttime - 1;
386
387			/* the following line does (avg = (1 - Wq)^n * avg) */
388			if (n > 0)
389				avg = (avg >> FP_SHIFT) *
390				    pow_w(rp->red_wtab, n);
391		}
392	}
393
394	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
395	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
396	rp->red_avg = avg;		/* save the new value */
397
398	/*
399	 * red_count keeps a tally of arriving traffic that has not
400	 * been dropped.
401	 */
402	rp->red_count++;
403
404	/* see if we drop early */
405	droptype = DTYPE_NODROP;
406	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
407		if (avg >= rp->red_thmax_s) {
408			/* avg >= th_max: forced drop */
409			droptype = DTYPE_FORCED;
410		} else if (rp->red_old == 0) {
411			/* first exceeds th_min */
412			rp->red_count = 1;
413			rp->red_old = 1;
414		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
415				      rp->red_probd, rp->red_count)) {
416			/* mark or drop by red */
417			if ((rp->red_flags & REDF_ECN) &&
418			    mark_ecn(m, pktattr, rp->red_flags)) {
419				/* successfully marked.  do not drop. */
420				rp->red_count = 0;
421#ifdef RED_STATS
422				rp->red_stats.marked_packets++;
423#endif
424			} else {
425				/* unforced drop by red */
426				droptype = DTYPE_EARLY;
427			}
428		}
429	} else {
430		/* avg < th_min */
431		rp->red_old = 0;
432	}
433
434	/*
435	 * if the queue length hits the hard limit, it's a forced drop.
436	 */
437	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
438		droptype = DTYPE_FORCED;
439
440#ifdef RED_RANDOM_DROP
441	/* if successful or forced drop, enqueue this packet. */
442	if (droptype != DTYPE_EARLY)
443		_addq(q, m);
444#else
445	/* if successful, enqueue this packet. */
446	if (droptype == DTYPE_NODROP)
447		_addq(q, m);
448#endif
449	if (droptype != DTYPE_NODROP) {
450		if (droptype == DTYPE_EARLY) {
451			/* drop the incoming packet */
452#ifdef RED_STATS
453			rp->red_stats.drop_unforced++;
454#endif
455		} else {
456			/* forced drop, select a victim packet in the queue. */
457#ifdef RED_RANDOM_DROP
458			m = _getq_random(q);
459#endif
460#ifdef RED_STATS
461			rp->red_stats.drop_forced++;
462#endif
463		}
464#ifdef RED_STATS
465		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
466#endif
467		rp->red_count = 0;
468#ifdef ALTQ3_COMPAT
469#ifdef ALTQ_FLOWVALVE
470		if (rp->red_flowvalve != NULL)
471			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
472#endif
473#endif /* ALTQ3_COMPAT */
474		m_freem(m);
475		return (-1);
476	}
477	/* successfully queued */
478#ifdef RED_STATS
479	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
480#endif
481	return (0);
482}
483
484/*
485 * early-drop probability is calculated as follows:
486 *   prob = p_max * (avg - th_min) / (th_max - th_min)
487 *   prob_a = prob / (2 - count*prob)
488 *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
489 * here prob_a increases as successive undrop count increases.
490 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
491 * becomes 1 when (count >= (2 / prob))).
492 */
493int
494drop_early(int fp_len, int fp_probd, int count)
495{
496	int	d;		/* denominator of drop-probability */
497
498	d = fp_probd - count * fp_len;
499	if (d <= 0)
500		/* count exceeds the hard limit: drop or mark */
501		return (1);
502
503	/*
504	 * now the range of d is [1..600] in fixed-point. (when
505	 * th_max-th_min=10 and p_max=1/30)
506	 * drop probability = (avg - TH_MIN) / d
507	 */
508
509	if ((cprng_fast32() % d) < fp_len) {
510		/* drop or mark */
511		return (1);
512	}
513	/* no drop/mark */
514	return (0);
515}
516
517/*
518 * try to mark CE bit to the packet.
519 *    returns 1 if successfully marked, 0 otherwise.
520 */
521int
522mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
523{
524	struct mbuf	*m0;
525	struct m_tag	*t;
526	struct altq_tag	*at;
527	void		*hdr;
528	int		 af;
529
530	t = m_tag_find(m, PACKET_TAG_ALTQ_QID, NULL);
531	if (t != NULL) {
532		at = (struct altq_tag *)(t + 1);
533		if (at == NULL)
534			return (0);
535		af = at->af;
536		hdr = at->hdr;
537#ifdef ALTQ3_COMPAT
538	} else if (pktattr != NULL) {
539		af = pktattr->pattr_af;
540		hdr = pktattr->pattr_hdr;
541#endif /* ALTQ3_COMPAT */
542	} else
543		return (0);
544
545	if (af != AF_INET && af != AF_INET6)
546		return (0);
547
548	/* verify that pattr_hdr is within the mbuf data */
549	for (m0 = m; m0 != NULL; m0 = m0->m_next)
550		if (((char *)hdr >= m0->m_data) &&
551		    ((char *)hdr < m0->m_data + m0->m_len))
552			break;
553	if (m0 == NULL) {
554		/* ick, tag info is stale */
555		return (0);
556	}
557
558	switch (af) {
559	case AF_INET:
560		if (flags & REDF_ECN4) {
561			struct ip *ip = hdr;
562			u_int8_t otos;
563			int sum;
564
565			if (ip->ip_v != 4)
566				return (0);	/* version mismatch! */
567
568			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
569				return (0);	/* not-ECT */
570			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
571				return (1);	/* already marked */
572
573			/*
574			 * ecn-capable but not marked,
575			 * mark CE and update checksum
576			 */
577			otos = ip->ip_tos;
578			ip->ip_tos |= IPTOS_ECN_CE;
579			/*
580			 * update checksum (from RFC1624)
581			 *	   HC' = ~(~HC + ~m + m')
582			 */
583			sum = ~ntohs(ip->ip_sum) & 0xffff;
584			sum += (~otos & 0xffff) + ip->ip_tos;
585			sum = (sum >> 16) + (sum & 0xffff);
586			sum += (sum >> 16);  /* add carry */
587			ip->ip_sum = htons(~sum & 0xffff);
588			return (1);
589		}
590		break;
591#ifdef INET6
592	case AF_INET6:
593		if (flags & REDF_ECN6) {
594			struct ip6_hdr *ip6 = hdr;
595			u_int32_t flowlabel;
596
597			flowlabel = ntohl(ip6->ip6_flow);
598			if ((flowlabel >> 28) != 6)
599				return (0);	/* version mismatch! */
600			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
601			    (IPTOS_ECN_NOTECT << 20))
602				return (0);	/* not-ECT */
603			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
604			    (IPTOS_ECN_CE << 20))
605				return (1);	/* already marked */
606			/*
607			 * ecn-capable but not marked,  mark CE
608			 */
609			flowlabel |= (IPTOS_ECN_CE << 20);
610			ip6->ip6_flow = htonl(flowlabel);
611			return (1);
612		}
613		break;
614#endif  /* INET6 */
615	}
616
617	/* not marked */
618	return (0);
619}
620
621struct mbuf *
622red_getq(red_t *rp, class_queue_t *q)
623{
624	struct mbuf *m;
625
626	if ((m = _getq(q)) == NULL) {
627		if (rp->red_idle == 0) {
628			rp->red_idle = 1;
629			microtime(&rp->red_last);
630		}
631		return NULL;
632	}
633
634	rp->red_idle = 0;
635	return (m);
636}
637
638/*
639 * helper routine to calibrate avg during idle.
640 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
641 * here Wq = 1/weight and the code assumes Wq is close to zero.
642 *
643 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
644 */
645static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
646
647struct wtab *
648wtab_alloc(int weight)
649{
650	struct wtab	*w;
651	int		 i;
652
653	for (w = wtab_list; w != NULL; w = w->w_next)
654		if (w->w_weight == weight) {
655			w->w_refcount++;
656			return (w);
657		}
658
659	w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK|M_ZERO);
660	if (w == NULL)
661		panic("wtab_alloc: malloc failed!");
662	w->w_weight = weight;
663	w->w_refcount = 1;
664	w->w_next = wtab_list;
665	wtab_list = w;
666
667	/* initialize the weight table */
668	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
669	for (i = 1; i < 32; i++) {
670		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
671		if (w->w_tab[i] == 0 && w->w_param_max == 0)
672			w->w_param_max = 1 << i;
673	}
674
675	return (w);
676}
677
678int
679wtab_destroy(struct wtab *w)
680{
681	struct wtab	*prev;
682
683	if (--w->w_refcount > 0)
684		return (0);
685
686	if (wtab_list == w)
687		wtab_list = w->w_next;
688	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
689		if (prev->w_next == w) {
690			prev->w_next = w->w_next;
691			break;
692		}
693
694	free(w, M_DEVBUF);
695	return (0);
696}
697
698int32_t
699pow_w(struct wtab *w, int n)
700{
701	int	i, bit;
702	int32_t	val;
703
704	if (n >= w->w_param_max)
705		return (0);
706
707	val = 1 << FP_SHIFT;
708	if (n <= 0)
709		return (val);
710
711	bit = 1;
712	i = 0;
713	while (n) {
714		if (n & bit) {
715			val = (val * w->w_tab[i]) >> FP_SHIFT;
716			n &= ~bit;
717		}
718		i++;
719		bit <<=  1;
720	}
721	return (val);
722}
723
724#ifdef ALTQ3_COMPAT
725/*
726 * red device interface
727 */
728altqdev_decl(red);
729
730int
731redopen(dev_t dev, int flag, int fmt,
732    struct lwp *l)
733{
734	/* everything will be done when the queueing scheme is attached. */
735	return 0;
736}
737
738int
739redclose(dev_t dev, int flag, int fmt,
740    struct lwp *l)
741{
742	red_queue_t *rqp;
743	int err, error = 0;
744
745	while ((rqp = red_list) != NULL) {
746		/* destroy all */
747		err = red_detach(rqp);
748		if (err != 0 && error == 0)
749			error = err;
750	}
751
752	return error;
753}
754
755int
756redioctl(dev_t dev, ioctlcmd_t cmd, void *addr, int flag,
757    struct lwp *l)
758{
759	red_queue_t *rqp;
760	struct red_interface *ifacep;
761	struct ifnet *ifp;
762	struct proc *p = l->l_proc;
763	int	error = 0;
764
765	/* check super-user privilege */
766	switch (cmd) {
767	case RED_GETSTATS:
768		break;
769	default:
770#if (__FreeBSD_version > 400000)
771		if ((error = suser(p)) != 0)
772#else
773		if ((error = kauth_authorize_network(p->p_cred,
774		    KAUTH_NETWORK_ALTQ, KAUTH_REQ_NETWORK_ALTQ_RED, NULL,
775		    NULL, NULL)) != 0)
776#endif
777			return (error);
778		break;
779	}
780
781	switch (cmd) {
782
783	case RED_ENABLE:
784		ifacep = (struct red_interface *)addr;
785		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
786			error = EBADF;
787			break;
788		}
789		error = altq_enable(rqp->rq_ifq);
790		break;
791
792	case RED_DISABLE:
793		ifacep = (struct red_interface *)addr;
794		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
795			error = EBADF;
796			break;
797		}
798		error = altq_disable(rqp->rq_ifq);
799		break;
800
801	case RED_IF_ATTACH:
802		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
803		if (ifp == NULL) {
804			error = ENXIO;
805			break;
806		}
807
808		/* allocate and initialize red_queue_t */
809		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK|M_ZERO);
810		if (rqp == NULL) {
811			error = ENOMEM;
812			break;
813		}
814
815		rqp->rq_q = malloc(sizeof(class_queue_t), M_DEVBUF,
816		    M_WAITOK|M_ZERO);
817		if (rqp->rq_q == NULL) {
818			free(rqp, M_DEVBUF);
819			error = ENOMEM;
820			break;
821		}
822
823		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
824		if (rqp->rq_red == NULL) {
825			free(rqp->rq_q, M_DEVBUF);
826			free(rqp, M_DEVBUF);
827			error = ENOMEM;
828			break;
829		}
830
831		rqp->rq_ifq = &ifp->if_snd;
832		qtail(rqp->rq_q) = NULL;
833		qlen(rqp->rq_q) = 0;
834		qlimit(rqp->rq_q) = RED_LIMIT;
835		qtype(rqp->rq_q) = Q_RED;
836
837		/*
838		 * set RED to this ifnet structure.
839		 */
840		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
841				    red_enqueue, red_dequeue, red_request,
842				    NULL, NULL);
843		if (error) {
844			red_destroy(rqp->rq_red);
845			free(rqp->rq_q, M_DEVBUF);
846			free(rqp, M_DEVBUF);
847			break;
848		}
849
850		/* add this state to the red list */
851		rqp->rq_next = red_list;
852		red_list = rqp;
853		break;
854
855	case RED_IF_DETACH:
856		ifacep = (struct red_interface *)addr;
857		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
858			error = EBADF;
859			break;
860		}
861		error = red_detach(rqp);
862		break;
863
864	case RED_GETSTATS:
865		do {
866			struct red_stats *q_stats;
867			red_t *rp;
868
869			q_stats = (struct red_stats *)addr;
870			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
871					     ALTQT_RED)) == NULL) {
872				error = EBADF;
873				break;
874			}
875
876			q_stats->q_len 	   = qlen(rqp->rq_q);
877			q_stats->q_limit   = qlimit(rqp->rq_q);
878
879			rp = rqp->rq_red;
880			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
881			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
882			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
883			q_stats->drop_forced   = rp->red_stats.drop_forced;
884			q_stats->drop_unforced = rp->red_stats.drop_unforced;
885			q_stats->marked_packets = rp->red_stats.marked_packets;
886
887			q_stats->weight		= rp->red_weight;
888			q_stats->inv_pmax	= rp->red_inv_pmax;
889			q_stats->th_min		= rp->red_thmin;
890			q_stats->th_max		= rp->red_thmax;
891
892#ifdef ALTQ_FLOWVALVE
893			if (rp->red_flowvalve != NULL) {
894				struct flowvalve *fv = rp->red_flowvalve;
895				q_stats->fv_flows    = fv->fv_flows;
896				q_stats->fv_pass     = fv->fv_stats.pass;
897				q_stats->fv_predrop  = fv->fv_stats.predrop;
898				q_stats->fv_alloc    = fv->fv_stats.alloc;
899				q_stats->fv_escape   = fv->fv_stats.escape;
900			} else {
901#endif /* ALTQ_FLOWVALVE */
902				q_stats->fv_flows    = 0;
903				q_stats->fv_pass     = 0;
904				q_stats->fv_predrop  = 0;
905				q_stats->fv_alloc    = 0;
906				q_stats->fv_escape   = 0;
907#ifdef ALTQ_FLOWVALVE
908			}
909#endif /* ALTQ_FLOWVALVE */
910		} while (/*CONSTCOND*/ 0);
911		break;
912
913	case RED_CONFIG:
914		do {
915			struct red_conf *fc;
916			red_t *new;
917			int s, limit;
918
919			fc = (struct red_conf *)addr;
920			if ((rqp = altq_lookup(fc->iface.red_ifname,
921					       ALTQT_RED)) == NULL) {
922				error = EBADF;
923				break;
924			}
925			new = red_alloc(fc->red_weight,
926					fc->red_inv_pmax,
927					fc->red_thmin,
928					fc->red_thmax,
929					fc->red_flags,
930					fc->red_pkttime);
931			if (new == NULL) {
932				error = ENOMEM;
933				break;
934			}
935
936			s = splnet();
937			red_purgeq(rqp);
938			limit = fc->red_limit;
939			if (limit < fc->red_thmax)
940				limit = fc->red_thmax;
941			qlimit(rqp->rq_q) = limit;
942			fc->red_limit = limit;	/* write back the new value */
943
944			red_destroy(rqp->rq_red);
945			rqp->rq_red = new;
946
947			splx(s);
948
949			/* write back new values */
950			fc->red_limit = limit;
951			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
952			fc->red_thmin = rqp->rq_red->red_thmin;
953			fc->red_thmax = rqp->rq_red->red_thmax;
954
955		} while (/*CONSTCOND*/ 0);
956		break;
957
958	case RED_SETDEFAULTS:
959		do {
960			struct redparams *rp;
961
962			rp = (struct redparams *)addr;
963
964			default_th_min = rp->th_min;
965			default_th_max = rp->th_max;
966			default_inv_pmax = rp->inv_pmax;
967		} while (/*CONSTCOND*/ 0);
968		break;
969
970	default:
971		error = EINVAL;
972		break;
973	}
974	return error;
975}
976
977static int
978red_detach(red_queue_t *rqp)
979{
980	red_queue_t *tmp;
981	int error = 0;
982
983	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
984		altq_disable(rqp->rq_ifq);
985
986	if ((error = altq_detach(rqp->rq_ifq)))
987		return (error);
988
989	if (red_list == rqp)
990		red_list = rqp->rq_next;
991	else {
992		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
993			if (tmp->rq_next == rqp) {
994				tmp->rq_next = rqp->rq_next;
995				break;
996			}
997		if (tmp == NULL)
998			printf("red_detach: no state found in red_list!\n");
999	}
1000
1001	red_destroy(rqp->rq_red);
1002	free(rqp->rq_q, M_DEVBUF);
1003	free(rqp, M_DEVBUF);
1004	return (error);
1005}
1006
1007/*
1008 * enqueue routine:
1009 *
1010 *	returns: 0 when successfully queued.
1011 *		 ENOBUFS when drop occurs.
1012 */
1013static int
1014red_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
1015{
1016	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1017
1018	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1019		return ENOBUFS;
1020	ifq->ifq_len++;
1021	return 0;
1022}
1023
1024/*
1025 * dequeue routine:
1026 *	must be called in splnet.
1027 *
1028 *	returns: mbuf dequeued.
1029 *		 NULL when no packet is available in the queue.
1030 */
1031
1032static struct mbuf *
1033red_dequeue(struct ifaltq *ifq, int op)
1034{
1035	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1036	struct mbuf *m;
1037
1038	if (op == ALTDQ_POLL)
1039		return qhead(rqp->rq_q);
1040
1041	/* op == ALTDQ_REMOVE */
1042	m =  red_getq(rqp->rq_red, rqp->rq_q);
1043	if (m != NULL)
1044		ifq->ifq_len--;
1045	return (m);
1046}
1047
1048static int
1049red_request(struct ifaltq *ifq, int req, void *arg)
1050{
1051	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1052
1053	switch (req) {
1054	case ALTRQ_PURGE:
1055		red_purgeq(rqp);
1056		break;
1057	}
1058	return (0);
1059}
1060
1061static void
1062red_purgeq(red_queue_t *rqp)
1063{
1064	_flushq(rqp->rq_q);
1065	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1066		rqp->rq_ifq->ifq_len = 0;
1067}
1068
1069#ifdef ALTQ_FLOWVALVE
1070
1071#define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
1072#define	FV_PSCALE(x)	((x) << FV_PSHIFT)
1073#define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
1074#define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
1075#define	FV_FSCALE(x)	((x) << FV_FSHIFT)
1076#define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
1077
1078#define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
1079#define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
1080
1081#define	FV_N			10	/* update fve_f every FV_N packets */
1082
1083#define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
1084#define	FV_TTHRESH		3  /* time threshold to delete fve */
1085#define	FV_ALPHA		5  /* extra packet count */
1086
1087#define	FV_STATS
1088
1089#define	FV_TIMESTAMP(tp)	getmicrotime(tp)
1090
1091/*
1092 * Brtt table: 127 entry table to convert drop rate (p) to
1093 * the corresponding bandwidth fraction (f)
1094 * the following equation is implemented to use scaled values,
1095 * fve_p and fve_f, in the fixed point format.
1096 *
1097 *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1098 *   f = Brtt(p) / (max_th + alpha)
1099 */
1100#define	BRTT_SIZE	128
1101#define	BRTT_SHIFT	12
1102#define	BRTT_MASK	0x0007f000
1103#define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
1104
1105const int brtt_tab[BRTT_SIZE] = {
1106	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1107	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1108	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1109	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1110	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1111	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1112	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1113	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1114	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1115	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1116	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1117	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1118	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1119	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1120	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1121	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1122};
1123
1124static inline struct fve *
1125flowlist_lookup(struct flowvalve *fv, struct altq_pktattr *pktattr,
1126    struct timeval *now)
1127{
1128	struct fve *fve;
1129	int flows;
1130	struct ip *ip;
1131#ifdef INET6
1132	struct ip6_hdr *ip6;
1133#endif
1134	struct timeval tthresh;
1135
1136	if (pktattr == NULL)
1137		return (NULL);
1138
1139	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1140	flows = 0;
1141	/*
1142	 * search the flow list
1143	 */
1144	switch (pktattr->pattr_af) {
1145	case AF_INET:
1146		ip = (struct ip *)pktattr->pattr_hdr;
1147		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1148			if (fve->fve_lastdrop.tv_sec == 0)
1149				break;
1150			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1151				fve->fve_lastdrop.tv_sec = 0;
1152				break;
1153			}
1154			if (fve->fve_flow.flow_af == AF_INET &&
1155			    fve->fve_flow.flow_ip.ip_src.s_addr ==
1156			    ip->ip_src.s_addr &&
1157			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
1158			    ip->ip_dst.s_addr)
1159				return (fve);
1160			flows++;
1161		}
1162		break;
1163#ifdef INET6
1164	case AF_INET6:
1165		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1166		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1167			if (fve->fve_lastdrop.tv_sec == 0)
1168				break;
1169			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1170				fve->fve_lastdrop.tv_sec = 0;
1171				break;
1172			}
1173			if (fve->fve_flow.flow_af == AF_INET6 &&
1174			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1175					       &ip6->ip6_src) &&
1176			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1177					       &ip6->ip6_dst))
1178				return (fve);
1179			flows++;
1180		}
1181		break;
1182#endif /* INET6 */
1183
1184	default:
1185		/* unknown protocol.  no drop. */
1186		return (NULL);
1187	}
1188	fv->fv_flows = flows;	/* save the number of active fve's */
1189	return (NULL);
1190}
1191
1192static inline struct fve *
1193flowlist_reclaim(struct flowvalve *fv, struct altq_pktattr *pktattr)
1194{
1195	struct fve *fve;
1196	struct ip *ip;
1197#ifdef INET6
1198	struct ip6_hdr *ip6;
1199#endif
1200
1201	/*
1202	 * get an entry from the tail of the LRU list.
1203	 */
1204	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1205
1206	switch (pktattr->pattr_af) {
1207	case AF_INET:
1208		ip = (struct ip *)pktattr->pattr_hdr;
1209		fve->fve_flow.flow_af = AF_INET;
1210		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1211		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1212		break;
1213#ifdef INET6
1214	case AF_INET6:
1215		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1216		fve->fve_flow.flow_af = AF_INET6;
1217		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1218		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1219		break;
1220#endif
1221	}
1222
1223	fve->fve_state = Green;
1224	fve->fve_p = 0.0;
1225	fve->fve_f = 0.0;
1226	fve->fve_ifseq = fv->fv_ifseq - 1;
1227	fve->fve_count = 0;
1228
1229	fv->fv_flows++;
1230#ifdef FV_STATS
1231	fv->fv_stats.alloc++;
1232#endif
1233	return (fve);
1234}
1235
1236static inline void
1237flowlist_move_to_head(struct flowvalve *fv, struct fve *fve)
1238{
1239	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1240		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1241		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1242	}
1243}
1244
1245/*
1246 * allocate flowvalve structure
1247 */
1248static struct flowvalve *
1249fv_alloc(struct red *rp)
1250{
1251	struct flowvalve *fv;
1252	struct fve *fve;
1253	int i, num;
1254
1255	num = FV_FLOWLISTSIZE;
1256	fv = malloc(sizeof(struct flowvalve), M_DEVBUF, M_WAITOK|M_ZERO);
1257	if (fv == NULL)
1258		return (NULL);
1259
1260	fv->fv_fves = malloc(sizeof(struct fve) * num, M_DEVBUF,
1261	    M_WAITOK|M_ZERO);
1262	if (fv->fv_fves == NULL) {
1263		free(fv, M_DEVBUF);
1264		return (NULL);
1265	}
1266
1267	fv->fv_flows = 0;
1268	TAILQ_INIT(&fv->fv_flowlist);
1269	for (i = 0; i < num; i++) {
1270		fve = &fv->fv_fves[i];
1271		fve->fve_lastdrop.tv_sec = 0;
1272		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1273	}
1274
1275	/* initialize drop rate threshold in scaled fixed-point */
1276	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1277
1278	/* initialize drop rate to fraction table */
1279	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE, M_DEVBUF, M_WAITOK);
1280	if (fv->fv_p2ftab == NULL) {
1281		free(fv->fv_fves, M_DEVBUF);
1282		free(fv, M_DEVBUF);
1283		return (NULL);
1284	}
1285	/*
1286	 * create the p2f table.
1287	 * (shift is used to keep the precision)
1288	 */
1289	for (i = 1; i < BRTT_SIZE; i++) {
1290		int f;
1291
1292		f = brtt_tab[i] << 8;
1293		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1294	}
1295
1296	return (fv);
1297}
1298
1299static void
1300fv_destroy(struct flowvalve *fv)
1301{
1302	free(fv->fv_p2ftab, M_DEVBUF);
1303	free(fv->fv_fves, M_DEVBUF);
1304	free(fv, M_DEVBUF);
1305}
1306
1307static inline int
1308fv_p2f(struct flowvalve *fv, int p)
1309{
1310	int val, f;
1311
1312	if (p >= BRTT_PMAX)
1313		f = fv->fv_p2ftab[BRTT_SIZE-1];
1314	else if ((val = (p & BRTT_MASK)))
1315		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1316	else
1317		f = fv->fv_p2ftab[1];
1318	return (f);
1319}
1320
1321/*
1322 * check if an arriving packet should be pre-dropped.
1323 * called from red_addq() when a packet arrives.
1324 * returns 1 when the packet should be pre-dropped.
1325 * should be called in splnet.
1326 */
1327static int
1328fv_checkflow(struct flowvalve *fv, struct altq_pktattr *pktattr,
1329    struct fve **fcache)
1330{
1331	struct fve *fve;
1332	struct timeval now;
1333
1334	fv->fv_ifseq++;
1335	FV_TIMESTAMP(&now);
1336
1337	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1338		/* no matching entry in the flowlist */
1339		return (0);
1340
1341	*fcache = fve;
1342
1343	/* update fraction f for every FV_N packets */
1344	if (++fve->fve_count == FV_N) {
1345		/*
1346		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1347		 */
1348		fve->fve_f =
1349			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1350			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
1351		fve->fve_ifseq = fv->fv_ifseq;
1352		fve->fve_count = 0;
1353	}
1354
1355	/*
1356	 * overpumping test
1357	 */
1358	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1359		int fthresh;
1360
1361		/* calculate a threshold */
1362		fthresh = fv_p2f(fv, fve->fve_p);
1363		if (fve->fve_f > fthresh)
1364			fve->fve_state = Red;
1365	}
1366
1367	if (fve->fve_state == Red) {
1368		/*
1369		 * backoff test
1370		 */
1371		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1372			/* no drop for at least FV_BACKOFFTHRESH sec */
1373			fve->fve_p = 0;
1374			fve->fve_state = Green;
1375#ifdef FV_STATS
1376			fv->fv_stats.escape++;
1377#endif
1378		} else {
1379			/* block this flow */
1380			flowlist_move_to_head(fv, fve);
1381			fve->fve_lastdrop = now;
1382#ifdef FV_STATS
1383			fv->fv_stats.predrop++;
1384#endif
1385			return (1);
1386		}
1387	}
1388
1389	/*
1390	 * p = (1 - Wp) * p
1391	 */
1392	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1393	if (fve->fve_p < 0)
1394		fve->fve_p = 0;
1395#ifdef FV_STATS
1396	fv->fv_stats.pass++;
1397#endif
1398	return (0);
1399}
1400
1401/*
1402 * called from red_addq when a packet is dropped by red.
1403 * should be called in splnet.
1404 */
1405static void
1406fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *pktattr,
1407    struct fve *fcache)
1408{
1409	struct fve *fve;
1410	struct timeval now;
1411
1412	if (pktattr == NULL)
1413		return;
1414	FV_TIMESTAMP(&now);
1415
1416	if (fcache != NULL)
1417		/* the fve of this packet is already cached */
1418		fve = fcache;
1419	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1420		fve = flowlist_reclaim(fv, pktattr);
1421
1422	flowlist_move_to_head(fv, fve);
1423
1424	/*
1425	 * update p:  the following line cancels the update
1426	 *	      in fv_checkflow() and calculate
1427	 *	p = Wp + (1 - Wp) * p
1428	 */
1429	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1430
1431	fve->fve_lastdrop = now;
1432}
1433
1434#endif /* ALTQ_FLOWVALVE */
1435
1436#ifdef KLD_MODULE
1437
1438static struct altqsw red_sw =
1439	{"red", redopen, redclose, redioctl};
1440
1441ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1442MODULE_VERSION(altq_red, 1);
1443
1444#endif /* KLD_MODULE */
1445#endif /* ALTQ3_COMPAT */
1446
1447#endif /* ALTQ_RED */
1448