1/*	$FreeBSD$	*/
2/*	$KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1997-2003
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30/*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the Computer Systems
45 *	Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 *    to endorse or promote products derived from this software without
48 *    specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63#if defined(__FreeBSD__) || defined(__NetBSD__)
64#include "opt_altq.h"
65#include "opt_inet.h"
66#ifdef __FreeBSD__
67#include "opt_inet6.h"
68#endif
69#endif /* __FreeBSD__ || __NetBSD__ */
70#ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
71
72#include <sys/param.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/socket.h>
76#include <sys/systm.h>
77#include <sys/errno.h>
78#if 1 /* ALTQ3_COMPAT */
79#include <sys/sockio.h>
80#include <sys/proc.h>
81#include <sys/kernel.h>
82#ifdef ALTQ_FLOWVALVE
83#include <sys/queue.h>
84#include <sys/time.h>
85#endif
86#endif /* ALTQ3_COMPAT */
87
88#include <net/if.h>
89
90#include <netinet/in.h>
91#include <netinet/in_systm.h>
92#include <netinet/ip.h>
93#ifdef INET6
94#include <netinet/ip6.h>
95#endif
96
97#include <net/pfvar.h>
98#include <altq/altq.h>
99#include <altq/altq_red.h>
100#ifdef ALTQ3_COMPAT
101#include <altq/altq_conf.h>
102#ifdef ALTQ_FLOWVALVE
103#include <altq/altq_flowvalve.h>
104#endif
105#endif
106
107/*
108 * ALTQ/RED (Random Early Detection) implementation using 32-bit
109 * fixed-point calculation.
110 *
111 * written by kjc using the ns code as a reference.
112 * you can learn more about red and ns from Sally's home page at
113 * http://www-nrg.ee.lbl.gov/floyd/
114 *
115 * most of the red parameter values are fixed in this implementation
116 * to prevent fixed-point overflow/underflow.
117 * if you change the parameters, watch out for overflow/underflow!
118 *
119 * the parameters used are recommended values by Sally.
120 * the corresponding ns config looks:
121 *	q_weight=0.00195
122 *	minthresh=5 maxthresh=15 queue-size=60
123 *	linterm=30
124 *	dropmech=drop-tail
125 *	bytes=false (can't be handled by 32-bit fixed-point)
126 *	doubleq=false dqthresh=false
127 *	wait=true
128 */
129/*
130 * alternative red parameters for a slow link.
131 *
132 * assume the queue length becomes from zero to L and keeps L, it takes
133 * N packets for q_avg to reach 63% of L.
134 * when q_weight is 0.002, N is about 500 packets.
135 * for a slow link like dial-up, 500 packets takes more than 1 minute!
136 * when q_weight is 0.008, N is about 127 packets.
137 * when q_weight is 0.016, N is about 63 packets.
138 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
139 * are allowed for 0.016.
140 * see Sally's paper for more details.
141 */
142/* normal red parameters */
143#define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
144				/* q_weight = 0.00195 */
145
146/* red parameters for a slow link */
147#define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
148				/* q_weight = 0.0078125 */
149
150/* red parameters for a very slow link (e.g., dialup) */
151#define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
152				/* q_weight = 0.015625 */
153
154/* fixed-point uses 12-bit decimal places */
155#define	FP_SHIFT	12	/* fixed-point shift */
156
157/* red parameters for drop probability */
158#define	INV_P_MAX	10	/* inverse of max drop probability */
159#define	TH_MIN		5	/* min threshold */
160#define	TH_MAX		15	/* max threshold */
161
162#define	RED_LIMIT	60	/* default max queue lenght */
163#define	RED_STATS		/* collect statistics */
164
165/*
166 * our default policy for forced-drop is drop-tail.
167 * (in altq-1.1.2 or earlier, the default was random-drop.
168 * but it makes more sense to punish the cause of the surge.)
169 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
170 */
171
172#ifdef ALTQ3_COMPAT
173#ifdef ALTQ_FLOWVALVE
174/*
175 * flow-valve is an extention to protect red from unresponsive flows
176 * and to promote end-to-end congestion control.
177 * flow-valve observes the average drop rates of the flows that have
178 * experienced packet drops in the recent past.
179 * when the average drop rate exceeds the threshold, the flow is
180 * blocked by the flow-valve.  the trapped flow should back off
181 * exponentially to escape from the flow-valve.
182 */
183#ifdef RED_RANDOM_DROP
184#error "random-drop can't be used with flow-valve!"
185#endif
186#endif /* ALTQ_FLOWVALVE */
187
188/* red_list keeps all red_queue_t's allocated. */
189static red_queue_t *red_list = NULL;
190
191#endif /* ALTQ3_COMPAT */
192
193/* default red parameter values */
194static int default_th_min = TH_MIN;
195static int default_th_max = TH_MAX;
196static int default_inv_pmax = INV_P_MAX;
197
198#ifdef ALTQ3_COMPAT
199/* internal function prototypes */
200static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
201static struct mbuf *red_dequeue(struct ifaltq *, int);
202static int red_request(struct ifaltq *, int, void *);
203static void red_purgeq(red_queue_t *);
204static int red_detach(red_queue_t *);
205#ifdef ALTQ_FLOWVALVE
206static __inline struct fve *flowlist_lookup(struct flowvalve *,
207			 struct altq_pktattr *, struct timeval *);
208static __inline struct fve *flowlist_reclaim(struct flowvalve *,
209					     struct altq_pktattr *);
210static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
211static __inline int fv_p2f(struct flowvalve *, int);
212#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
213static struct flowvalve *fv_alloc(struct red *);
214#endif
215static void fv_destroy(struct flowvalve *);
216static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
217			struct fve **);
218static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
219			 struct fve *);
220#endif
221#endif /* ALTQ3_COMPAT */
222
223/*
224 * red support routines
225 */
226red_t *
227red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
228   int pkttime)
229{
230	red_t	*rp;
231	int	 w, i;
232	int	 npkts_per_sec;
233
234	rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK);
235	if (rp == NULL)
236		return (NULL);
237	bzero(rp, sizeof(red_t));
238
239	rp->red_avg = 0;
240	rp->red_idle = 1;
241
242	if (weight == 0)
243		rp->red_weight = W_WEIGHT;
244	else
245		rp->red_weight = weight;
246	if (inv_pmax == 0)
247		rp->red_inv_pmax = default_inv_pmax;
248	else
249		rp->red_inv_pmax = inv_pmax;
250	if (th_min == 0)
251		rp->red_thmin = default_th_min;
252	else
253		rp->red_thmin = th_min;
254	if (th_max == 0)
255		rp->red_thmax = default_th_max;
256	else
257		rp->red_thmax = th_max;
258
259	rp->red_flags = flags;
260
261	if (pkttime == 0)
262		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
263		rp->red_pkttime = 800;
264	else
265		rp->red_pkttime = pkttime;
266
267	if (weight == 0) {
268		/* when the link is very slow, adjust red parameters */
269		npkts_per_sec = 1000000 / rp->red_pkttime;
270		if (npkts_per_sec < 50) {
271			/* up to about 400Kbps */
272			rp->red_weight = W_WEIGHT_2;
273		} else if (npkts_per_sec < 300) {
274			/* up to about 2.4Mbps */
275			rp->red_weight = W_WEIGHT_1;
276		}
277	}
278
279	/* calculate wshift.  weight must be power of 2 */
280	w = rp->red_weight;
281	for (i = 0; w > 1; i++)
282		w = w >> 1;
283	rp->red_wshift = i;
284	w = 1 << rp->red_wshift;
285	if (w != rp->red_weight) {
286		printf("invalid weight value %d for red! use %d\n",
287		       rp->red_weight, w);
288		rp->red_weight = w;
289	}
290
291	/*
292	 * thmin_s and thmax_s are scaled versions of th_min and th_max
293	 * to be compared with avg.
294	 */
295	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
296	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
297
298	/*
299	 * precompute probability denominator
300	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
301	 */
302	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
303			 * rp->red_inv_pmax) << FP_SHIFT;
304
305	/* allocate weight table */
306	rp->red_wtab = wtab_alloc(rp->red_weight);
307
308	microtime(&rp->red_last);
309	return (rp);
310}
311
312void
313red_destroy(red_t *rp)
314{
315#ifdef ALTQ3_COMPAT
316#ifdef ALTQ_FLOWVALVE
317	if (rp->red_flowvalve != NULL)
318		fv_destroy(rp->red_flowvalve);
319#endif
320#endif /* ALTQ3_COMPAT */
321	wtab_destroy(rp->red_wtab);
322	free(rp, M_DEVBUF);
323}
324
325void
326red_getstats(red_t *rp, struct redstats *sp)
327{
328	sp->q_avg		= rp->red_avg >> rp->red_wshift;
329	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
330	sp->drop_cnt		= rp->red_stats.drop_cnt;
331	sp->drop_forced		= rp->red_stats.drop_forced;
332	sp->drop_unforced	= rp->red_stats.drop_unforced;
333	sp->marked_packets	= rp->red_stats.marked_packets;
334}
335
336int
337red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
338    struct altq_pktattr *pktattr)
339{
340	int avg, droptype;
341	int n;
342#ifdef ALTQ3_COMPAT
343#ifdef ALTQ_FLOWVALVE
344	struct fve *fve = NULL;
345
346	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
347		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
348			m_freem(m);
349			return (-1);
350		}
351#endif
352#endif /* ALTQ3_COMPAT */
353
354	avg = rp->red_avg;
355
356	/*
357	 * if we were idle, we pretend that n packets arrived during
358	 * the idle period.
359	 */
360	if (rp->red_idle) {
361		struct timeval now;
362		int t;
363
364		rp->red_idle = 0;
365		microtime(&now);
366		t = (now.tv_sec - rp->red_last.tv_sec);
367		if (t > 60) {
368			/*
369			 * being idle for more than 1 minute, set avg to zero.
370			 * this prevents t from overflow.
371			 */
372			avg = 0;
373		} else {
374			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
375			n = t / rp->red_pkttime - 1;
376
377			/* the following line does (avg = (1 - Wq)^n * avg) */
378			if (n > 0)
379				avg = (avg >> FP_SHIFT) *
380				    pow_w(rp->red_wtab, n);
381		}
382	}
383
384	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
385	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
386	rp->red_avg = avg;		/* save the new value */
387
388	/*
389	 * red_count keeps a tally of arriving traffic that has not
390	 * been dropped.
391	 */
392	rp->red_count++;
393
394	/* see if we drop early */
395	droptype = DTYPE_NODROP;
396	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
397		if (avg >= rp->red_thmax_s) {
398			/* avg >= th_max: forced drop */
399			droptype = DTYPE_FORCED;
400		} else if (rp->red_old == 0) {
401			/* first exceeds th_min */
402			rp->red_count = 1;
403			rp->red_old = 1;
404		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
405				      rp->red_probd, rp->red_count)) {
406			/* mark or drop by red */
407			if ((rp->red_flags & REDF_ECN) &&
408			    mark_ecn(m, pktattr, rp->red_flags)) {
409				/* successfully marked.  do not drop. */
410				rp->red_count = 0;
411#ifdef RED_STATS
412				rp->red_stats.marked_packets++;
413#endif
414			} else {
415				/* unforced drop by red */
416				droptype = DTYPE_EARLY;
417			}
418		}
419	} else {
420		/* avg < th_min */
421		rp->red_old = 0;
422	}
423
424	/*
425	 * if the queue length hits the hard limit, it's a forced drop.
426	 */
427	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
428		droptype = DTYPE_FORCED;
429
430#ifdef RED_RANDOM_DROP
431	/* if successful or forced drop, enqueue this packet. */
432	if (droptype != DTYPE_EARLY)
433		_addq(q, m);
434#else
435	/* if successful, enqueue this packet. */
436	if (droptype == DTYPE_NODROP)
437		_addq(q, m);
438#endif
439	if (droptype != DTYPE_NODROP) {
440		if (droptype == DTYPE_EARLY) {
441			/* drop the incoming packet */
442#ifdef RED_STATS
443			rp->red_stats.drop_unforced++;
444#endif
445		} else {
446			/* forced drop, select a victim packet in the queue. */
447#ifdef RED_RANDOM_DROP
448			m = _getq_random(q);
449#endif
450#ifdef RED_STATS
451			rp->red_stats.drop_forced++;
452#endif
453		}
454#ifdef RED_STATS
455		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
456#endif
457		rp->red_count = 0;
458#ifdef ALTQ3_COMPAT
459#ifdef ALTQ_FLOWVALVE
460		if (rp->red_flowvalve != NULL)
461			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
462#endif
463#endif /* ALTQ3_COMPAT */
464		m_freem(m);
465		return (-1);
466	}
467	/* successfully queued */
468#ifdef RED_STATS
469	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
470#endif
471	return (0);
472}
473
474/*
475 * early-drop probability is calculated as follows:
476 *   prob = p_max * (avg - th_min) / (th_max - th_min)
477 *   prob_a = prob / (2 - count*prob)
478 *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
479 * here prob_a increases as successive undrop count increases.
480 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
481 * becomes 1 when (count >= (2 / prob))).
482 */
483int
484drop_early(int fp_len, int fp_probd, int count)
485{
486	int	d;		/* denominator of drop-probability */
487
488	d = fp_probd - count * fp_len;
489	if (d <= 0)
490		/* count exceeds the hard limit: drop or mark */
491		return (1);
492
493	/*
494	 * now the range of d is [1..600] in fixed-point. (when
495	 * th_max-th_min=10 and p_max=1/30)
496	 * drop probability = (avg - TH_MIN) / d
497	 */
498
499	if ((arc4random() % d) < fp_len) {
500		/* drop or mark */
501		return (1);
502	}
503	/* no drop/mark */
504	return (0);
505}
506
507/*
508 * try to mark CE bit to the packet.
509 *    returns 1 if successfully marked, 0 otherwise.
510 */
511int
512mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
513{
514	struct mbuf	*m0;
515	struct pf_mtag	*at;
516	void		*hdr;
517
518	at = pf_find_mtag(m);
519	if (at != NULL) {
520		hdr = at->hdr;
521#ifdef ALTQ3_COMPAT
522	} else if (pktattr != NULL) {
523		af = pktattr->pattr_af;
524		hdr = pktattr->pattr_hdr;
525#endif /* ALTQ3_COMPAT */
526	} else
527		return (0);
528
529	/* verify that pattr_hdr is within the mbuf data */
530	for (m0 = m; m0 != NULL; m0 = m0->m_next)
531		if (((caddr_t)hdr >= m0->m_data) &&
532		    ((caddr_t)hdr < m0->m_data + m0->m_len))
533			break;
534	if (m0 == NULL) {
535		/* ick, tag info is stale */
536		return (0);
537	}
538
539	switch (((struct ip *)hdr)->ip_v) {
540	case IPVERSION:
541		if (flags & REDF_ECN4) {
542			struct ip *ip = hdr;
543			u_int8_t otos;
544			int sum;
545
546			if (ip->ip_v != 4)
547				return (0);	/* version mismatch! */
548
549			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
550				return (0);	/* not-ECT */
551			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
552				return (1);	/* already marked */
553
554			/*
555			 * ecn-capable but not marked,
556			 * mark CE and update checksum
557			 */
558			otos = ip->ip_tos;
559			ip->ip_tos |= IPTOS_ECN_CE;
560			/*
561			 * update checksum (from RFC1624)
562			 *	   HC' = ~(~HC + ~m + m')
563			 */
564			sum = ~ntohs(ip->ip_sum) & 0xffff;
565			sum += (~otos & 0xffff) + ip->ip_tos;
566			sum = (sum >> 16) + (sum & 0xffff);
567			sum += (sum >> 16);  /* add carry */
568			ip->ip_sum = htons(~sum & 0xffff);
569			return (1);
570		}
571		break;
572#ifdef INET6
573	case (IPV6_VERSION >> 4):
574		if (flags & REDF_ECN6) {
575			struct ip6_hdr *ip6 = hdr;
576			u_int32_t flowlabel;
577
578			flowlabel = ntohl(ip6->ip6_flow);
579			if ((flowlabel >> 28) != 6)
580				return (0);	/* version mismatch! */
581			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
582			    (IPTOS_ECN_NOTECT << 20))
583				return (0);	/* not-ECT */
584			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
585			    (IPTOS_ECN_CE << 20))
586				return (1);	/* already marked */
587			/*
588			 * ecn-capable but not marked,  mark CE
589			 */
590			flowlabel |= (IPTOS_ECN_CE << 20);
591			ip6->ip6_flow = htonl(flowlabel);
592			return (1);
593		}
594		break;
595#endif  /* INET6 */
596	}
597
598	/* not marked */
599	return (0);
600}
601
602struct mbuf *
603red_getq(rp, q)
604	red_t *rp;
605	class_queue_t *q;
606{
607	struct mbuf *m;
608
609	if ((m = _getq(q)) == NULL) {
610		if (rp->red_idle == 0) {
611			rp->red_idle = 1;
612			microtime(&rp->red_last);
613		}
614		return NULL;
615	}
616
617	rp->red_idle = 0;
618	return (m);
619}
620
621/*
622 * helper routine to calibrate avg during idle.
623 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
624 * here Wq = 1/weight and the code assumes Wq is close to zero.
625 *
626 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
627 */
628static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
629
630struct wtab *
631wtab_alloc(int weight)
632{
633	struct wtab	*w;
634	int		 i;
635
636	for (w = wtab_list; w != NULL; w = w->w_next)
637		if (w->w_weight == weight) {
638			w->w_refcount++;
639			return (w);
640		}
641
642	w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK);
643	if (w == NULL)
644		panic("wtab_alloc: malloc failed!");
645	bzero(w, sizeof(struct wtab));
646	w->w_weight = weight;
647	w->w_refcount = 1;
648	w->w_next = wtab_list;
649	wtab_list = w;
650
651	/* initialize the weight table */
652	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
653	for (i = 1; i < 32; i++) {
654		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
655		if (w->w_tab[i] == 0 && w->w_param_max == 0)
656			w->w_param_max = 1 << i;
657	}
658
659	return (w);
660}
661
662int
663wtab_destroy(struct wtab *w)
664{
665	struct wtab	*prev;
666
667	if (--w->w_refcount > 0)
668		return (0);
669
670	if (wtab_list == w)
671		wtab_list = w->w_next;
672	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
673		if (prev->w_next == w) {
674			prev->w_next = w->w_next;
675			break;
676		}
677
678	free(w, M_DEVBUF);
679	return (0);
680}
681
682int32_t
683pow_w(struct wtab *w, int n)
684{
685	int	i, bit;
686	int32_t	val;
687
688	if (n >= w->w_param_max)
689		return (0);
690
691	val = 1 << FP_SHIFT;
692	if (n <= 0)
693		return (val);
694
695	bit = 1;
696	i = 0;
697	while (n) {
698		if (n & bit) {
699			val = (val * w->w_tab[i]) >> FP_SHIFT;
700			n &= ~bit;
701		}
702		i++;
703		bit <<=  1;
704	}
705	return (val);
706}
707
708#ifdef ALTQ3_COMPAT
709/*
710 * red device interface
711 */
712altqdev_decl(red);
713
714int
715redopen(dev, flag, fmt, p)
716	dev_t dev;
717	int flag, fmt;
718#if (__FreeBSD_version > 500000)
719	struct thread *p;
720#else
721	struct proc *p;
722#endif
723{
724	/* everything will be done when the queueing scheme is attached. */
725	return 0;
726}
727
728int
729redclose(dev, flag, fmt, p)
730	dev_t dev;
731	int flag, fmt;
732#if (__FreeBSD_version > 500000)
733	struct thread *p;
734#else
735	struct proc *p;
736#endif
737{
738	red_queue_t *rqp;
739	int err, error = 0;
740
741	while ((rqp = red_list) != NULL) {
742		/* destroy all */
743		err = red_detach(rqp);
744		if (err != 0 && error == 0)
745			error = err;
746	}
747
748	return error;
749}
750
751int
752redioctl(dev, cmd, addr, flag, p)
753	dev_t dev;
754	ioctlcmd_t cmd;
755	caddr_t addr;
756	int flag;
757#if (__FreeBSD_version > 500000)
758	struct thread *p;
759#else
760	struct proc *p;
761#endif
762{
763	red_queue_t *rqp;
764	struct red_interface *ifacep;
765	struct ifnet *ifp;
766	int	error = 0;
767
768	/* check super-user privilege */
769	switch (cmd) {
770	case RED_GETSTATS:
771		break;
772	default:
773#if (__FreeBSD_version > 700000)
774		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
775#elsif (__FreeBSD_version > 400000)
776		if ((error = suser(p)) != 0)
777#else
778		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
779#endif
780			return (error);
781		break;
782	}
783
784	switch (cmd) {
785
786	case RED_ENABLE:
787		ifacep = (struct red_interface *)addr;
788		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
789			error = EBADF;
790			break;
791		}
792		error = altq_enable(rqp->rq_ifq);
793		break;
794
795	case RED_DISABLE:
796		ifacep = (struct red_interface *)addr;
797		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
798			error = EBADF;
799			break;
800		}
801		error = altq_disable(rqp->rq_ifq);
802		break;
803
804	case RED_IF_ATTACH:
805		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
806		if (ifp == NULL) {
807			error = ENXIO;
808			break;
809		}
810
811		/* allocate and initialize red_queue_t */
812		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
813		if (rqp == NULL) {
814			error = ENOMEM;
815			break;
816		}
817		bzero(rqp, sizeof(red_queue_t));
818
819		rqp->rq_q = malloc(sizeof(class_queue_t),
820		       M_DEVBUF, M_WAITOK);
821		if (rqp->rq_q == NULL) {
822			free(rqp, M_DEVBUF);
823			error = ENOMEM;
824			break;
825		}
826		bzero(rqp->rq_q, sizeof(class_queue_t));
827
828		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
829		if (rqp->rq_red == NULL) {
830			free(rqp->rq_q, M_DEVBUF);
831			free(rqp, M_DEVBUF);
832			error = ENOMEM;
833			break;
834		}
835
836		rqp->rq_ifq = &ifp->if_snd;
837		qtail(rqp->rq_q) = NULL;
838		qlen(rqp->rq_q) = 0;
839		qlimit(rqp->rq_q) = RED_LIMIT;
840		qtype(rqp->rq_q) = Q_RED;
841
842		/*
843		 * set RED to this ifnet structure.
844		 */
845		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
846				    red_enqueue, red_dequeue, red_request,
847				    NULL, NULL);
848		if (error) {
849			red_destroy(rqp->rq_red);
850			free(rqp->rq_q, M_DEVBUF);
851			free(rqp, M_DEVBUF);
852			break;
853		}
854
855		/* add this state to the red list */
856		rqp->rq_next = red_list;
857		red_list = rqp;
858		break;
859
860	case RED_IF_DETACH:
861		ifacep = (struct red_interface *)addr;
862		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
863			error = EBADF;
864			break;
865		}
866		error = red_detach(rqp);
867		break;
868
869	case RED_GETSTATS:
870		do {
871			struct red_stats *q_stats;
872			red_t *rp;
873
874			q_stats = (struct red_stats *)addr;
875			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
876					     ALTQT_RED)) == NULL) {
877				error = EBADF;
878				break;
879			}
880
881			q_stats->q_len 	   = qlen(rqp->rq_q);
882			q_stats->q_limit   = qlimit(rqp->rq_q);
883
884			rp = rqp->rq_red;
885			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
886			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
887			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
888			q_stats->drop_forced   = rp->red_stats.drop_forced;
889			q_stats->drop_unforced = rp->red_stats.drop_unforced;
890			q_stats->marked_packets = rp->red_stats.marked_packets;
891
892			q_stats->weight		= rp->red_weight;
893			q_stats->inv_pmax	= rp->red_inv_pmax;
894			q_stats->th_min		= rp->red_thmin;
895			q_stats->th_max		= rp->red_thmax;
896
897#ifdef ALTQ_FLOWVALVE
898			if (rp->red_flowvalve != NULL) {
899				struct flowvalve *fv = rp->red_flowvalve;
900				q_stats->fv_flows    = fv->fv_flows;
901				q_stats->fv_pass     = fv->fv_stats.pass;
902				q_stats->fv_predrop  = fv->fv_stats.predrop;
903				q_stats->fv_alloc    = fv->fv_stats.alloc;
904				q_stats->fv_escape   = fv->fv_stats.escape;
905			} else {
906#endif /* ALTQ_FLOWVALVE */
907				q_stats->fv_flows    = 0;
908				q_stats->fv_pass     = 0;
909				q_stats->fv_predrop  = 0;
910				q_stats->fv_alloc    = 0;
911				q_stats->fv_escape   = 0;
912#ifdef ALTQ_FLOWVALVE
913			}
914#endif /* ALTQ_FLOWVALVE */
915		} while (/*CONSTCOND*/ 0);
916		break;
917
918	case RED_CONFIG:
919		do {
920			struct red_conf *fc;
921			red_t *new;
922			int s, limit;
923
924			fc = (struct red_conf *)addr;
925			if ((rqp = altq_lookup(fc->iface.red_ifname,
926					       ALTQT_RED)) == NULL) {
927				error = EBADF;
928				break;
929			}
930			new = red_alloc(fc->red_weight,
931					fc->red_inv_pmax,
932					fc->red_thmin,
933					fc->red_thmax,
934					fc->red_flags,
935					fc->red_pkttime);
936			if (new == NULL) {
937				error = ENOMEM;
938				break;
939			}
940
941#ifdef __NetBSD__
942			s = splnet();
943#else
944			s = splimp();
945#endif
946			red_purgeq(rqp);
947			limit = fc->red_limit;
948			if (limit < fc->red_thmax)
949				limit = fc->red_thmax;
950			qlimit(rqp->rq_q) = limit;
951			fc->red_limit = limit;	/* write back the new value */
952
953			red_destroy(rqp->rq_red);
954			rqp->rq_red = new;
955
956			splx(s);
957
958			/* write back new values */
959			fc->red_limit = limit;
960			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
961			fc->red_thmin = rqp->rq_red->red_thmin;
962			fc->red_thmax = rqp->rq_red->red_thmax;
963
964		} while (/*CONSTCOND*/ 0);
965		break;
966
967	case RED_SETDEFAULTS:
968		do {
969			struct redparams *rp;
970
971			rp = (struct redparams *)addr;
972
973			default_th_min = rp->th_min;
974			default_th_max = rp->th_max;
975			default_inv_pmax = rp->inv_pmax;
976		} while (/*CONSTCOND*/ 0);
977		break;
978
979	default:
980		error = EINVAL;
981		break;
982	}
983	return error;
984}
985
986static int
987red_detach(rqp)
988	red_queue_t *rqp;
989{
990	red_queue_t *tmp;
991	int error = 0;
992
993	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
994		altq_disable(rqp->rq_ifq);
995
996	if ((error = altq_detach(rqp->rq_ifq)))
997		return (error);
998
999	if (red_list == rqp)
1000		red_list = rqp->rq_next;
1001	else {
1002		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1003			if (tmp->rq_next == rqp) {
1004				tmp->rq_next = rqp->rq_next;
1005				break;
1006			}
1007		if (tmp == NULL)
1008			printf("red_detach: no state found in red_list!\n");
1009	}
1010
1011	red_destroy(rqp->rq_red);
1012	free(rqp->rq_q, M_DEVBUF);
1013	free(rqp, M_DEVBUF);
1014	return (error);
1015}
1016
1017/*
1018 * enqueue routine:
1019 *
1020 *	returns: 0 when successfully queued.
1021 *		 ENOBUFS when drop occurs.
1022 */
1023static int
1024red_enqueue(ifq, m, pktattr)
1025	struct ifaltq *ifq;
1026	struct mbuf *m;
1027	struct altq_pktattr *pktattr;
1028{
1029	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1030
1031	IFQ_LOCK_ASSERT(ifq);
1032
1033	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1034		return ENOBUFS;
1035	ifq->ifq_len++;
1036	return 0;
1037}
1038
1039/*
1040 * dequeue routine:
1041 *	must be called in splimp.
1042 *
1043 *	returns: mbuf dequeued.
1044 *		 NULL when no packet is available in the queue.
1045 */
1046
1047static struct mbuf *
1048red_dequeue(ifq, op)
1049	struct ifaltq *ifq;
1050	int op;
1051{
1052	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1053	struct mbuf *m;
1054
1055	IFQ_LOCK_ASSERT(ifq);
1056
1057	if (op == ALTDQ_POLL)
1058		return qhead(rqp->rq_q);
1059
1060	/* op == ALTDQ_REMOVE */
1061	m =  red_getq(rqp->rq_red, rqp->rq_q);
1062	if (m != NULL)
1063		ifq->ifq_len--;
1064	return (m);
1065}
1066
1067static int
1068red_request(ifq, req, arg)
1069	struct ifaltq *ifq;
1070	int req;
1071	void *arg;
1072{
1073	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1074
1075	IFQ_LOCK_ASSERT(ifq);
1076
1077	switch (req) {
1078	case ALTRQ_PURGE:
1079		red_purgeq(rqp);
1080		break;
1081	}
1082	return (0);
1083}
1084
1085static void
1086red_purgeq(rqp)
1087	red_queue_t *rqp;
1088{
1089	_flushq(rqp->rq_q);
1090	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1091		rqp->rq_ifq->ifq_len = 0;
1092}
1093
1094#ifdef ALTQ_FLOWVALVE
1095
1096#define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
1097#define	FV_PSCALE(x)	((x) << FV_PSHIFT)
1098#define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
1099#define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
1100#define	FV_FSCALE(x)	((x) << FV_FSHIFT)
1101#define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
1102
1103#define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
1104#define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
1105
1106#define	FV_N			10	/* update fve_f every FV_N packets */
1107
1108#define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
1109#define	FV_TTHRESH		3  /* time threshold to delete fve */
1110#define	FV_ALPHA		5  /* extra packet count */
1111
1112#define	FV_STATS
1113
1114#if (__FreeBSD_version > 300000)
1115#define	FV_TIMESTAMP(tp)	getmicrotime(tp)
1116#else
1117#define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
1118#endif
1119
1120/*
1121 * Brtt table: 127 entry table to convert drop rate (p) to
1122 * the corresponding bandwidth fraction (f)
1123 * the following equation is implemented to use scaled values,
1124 * fve_p and fve_f, in the fixed point format.
1125 *
1126 *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1127 *   f = Brtt(p) / (max_th + alpha)
1128 */
1129#define	BRTT_SIZE	128
1130#define	BRTT_SHIFT	12
1131#define	BRTT_MASK	0x0007f000
1132#define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
1133
1134const int brtt_tab[BRTT_SIZE] = {
1135	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1136	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1137	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1138	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1139	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1140	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1141	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1142	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1143	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1144	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1145	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1146	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1147	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1148	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1149	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1150	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1151};
1152
1153static __inline struct fve *
1154flowlist_lookup(fv, pktattr, now)
1155	struct flowvalve *fv;
1156	struct altq_pktattr *pktattr;
1157	struct timeval *now;
1158{
1159	struct fve *fve;
1160	int flows;
1161	struct ip *ip;
1162#ifdef INET6
1163	struct ip6_hdr *ip6;
1164#endif
1165	struct timeval tthresh;
1166
1167	if (pktattr == NULL)
1168		return (NULL);
1169
1170	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1171	flows = 0;
1172	/*
1173	 * search the flow list
1174	 */
1175	switch (pktattr->pattr_af) {
1176	case AF_INET:
1177		ip = (struct ip *)pktattr->pattr_hdr;
1178		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1179			if (fve->fve_lastdrop.tv_sec == 0)
1180				break;
1181			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1182				fve->fve_lastdrop.tv_sec = 0;
1183				break;
1184			}
1185			if (fve->fve_flow.flow_af == AF_INET &&
1186			    fve->fve_flow.flow_ip.ip_src.s_addr ==
1187			    ip->ip_src.s_addr &&
1188			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
1189			    ip->ip_dst.s_addr)
1190				return (fve);
1191			flows++;
1192		}
1193		break;
1194#ifdef INET6
1195	case AF_INET6:
1196		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1197		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1198			if (fve->fve_lastdrop.tv_sec == 0)
1199				break;
1200			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1201				fve->fve_lastdrop.tv_sec = 0;
1202				break;
1203			}
1204			if (fve->fve_flow.flow_af == AF_INET6 &&
1205			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1206					       &ip6->ip6_src) &&
1207			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1208					       &ip6->ip6_dst))
1209				return (fve);
1210			flows++;
1211		}
1212		break;
1213#endif /* INET6 */
1214
1215	default:
1216		/* unknown protocol.  no drop. */
1217		return (NULL);
1218	}
1219	fv->fv_flows = flows;	/* save the number of active fve's */
1220	return (NULL);
1221}
1222
1223static __inline struct fve *
1224flowlist_reclaim(fv, pktattr)
1225	struct flowvalve *fv;
1226	struct altq_pktattr *pktattr;
1227{
1228	struct fve *fve;
1229	struct ip *ip;
1230#ifdef INET6
1231	struct ip6_hdr *ip6;
1232#endif
1233
1234	/*
1235	 * get an entry from the tail of the LRU list.
1236	 */
1237	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1238
1239	switch (pktattr->pattr_af) {
1240	case AF_INET:
1241		ip = (struct ip *)pktattr->pattr_hdr;
1242		fve->fve_flow.flow_af = AF_INET;
1243		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1244		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1245		break;
1246#ifdef INET6
1247	case AF_INET6:
1248		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1249		fve->fve_flow.flow_af = AF_INET6;
1250		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1251		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1252		break;
1253#endif
1254	}
1255
1256	fve->fve_state = Green;
1257	fve->fve_p = 0.0;
1258	fve->fve_f = 0.0;
1259	fve->fve_ifseq = fv->fv_ifseq - 1;
1260	fve->fve_count = 0;
1261
1262	fv->fv_flows++;
1263#ifdef FV_STATS
1264	fv->fv_stats.alloc++;
1265#endif
1266	return (fve);
1267}
1268
1269static __inline void
1270flowlist_move_to_head(fv, fve)
1271	struct flowvalve *fv;
1272	struct fve *fve;
1273{
1274	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1275		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1276		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1277	}
1278}
1279
1280#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
1281/*
1282 * allocate flowvalve structure
1283 */
1284static struct flowvalve *
1285fv_alloc(rp)
1286	struct red *rp;
1287{
1288	struct flowvalve *fv;
1289	struct fve *fve;
1290	int i, num;
1291
1292	num = FV_FLOWLISTSIZE;
1293	fv = malloc(sizeof(struct flowvalve),
1294	       M_DEVBUF, M_WAITOK);
1295	if (fv == NULL)
1296		return (NULL);
1297	bzero(fv, sizeof(struct flowvalve));
1298
1299	fv->fv_fves = malloc(sizeof(struct fve) * num,
1300	       M_DEVBUF, M_WAITOK);
1301	if (fv->fv_fves == NULL) {
1302		free(fv, M_DEVBUF);
1303		return (NULL);
1304	}
1305	bzero(fv->fv_fves, sizeof(struct fve) * num);
1306
1307	fv->fv_flows = 0;
1308	TAILQ_INIT(&fv->fv_flowlist);
1309	for (i = 0; i < num; i++) {
1310		fve = &fv->fv_fves[i];
1311		fve->fve_lastdrop.tv_sec = 0;
1312		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1313	}
1314
1315	/* initialize drop rate threshold in scaled fixed-point */
1316	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1317
1318	/* initialize drop rate to fraction table */
1319	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
1320	       M_DEVBUF, M_WAITOK);
1321	if (fv->fv_p2ftab == NULL) {
1322		free(fv->fv_fves, M_DEVBUF);
1323		free(fv, M_DEVBUF);
1324		return (NULL);
1325	}
1326	/*
1327	 * create the p2f table.
1328	 * (shift is used to keep the precision)
1329	 */
1330	for (i = 1; i < BRTT_SIZE; i++) {
1331		int f;
1332
1333		f = brtt_tab[i] << 8;
1334		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1335	}
1336
1337	return (fv);
1338}
1339#endif
1340
1341static void fv_destroy(fv)
1342	struct flowvalve *fv;
1343{
1344	free(fv->fv_p2ftab, M_DEVBUF);
1345	free(fv->fv_fves, M_DEVBUF);
1346	free(fv, M_DEVBUF);
1347}
1348
1349static __inline int
1350fv_p2f(fv, p)
1351	struct flowvalve	*fv;
1352	int	p;
1353{
1354	int val, f;
1355
1356	if (p >= BRTT_PMAX)
1357		f = fv->fv_p2ftab[BRTT_SIZE-1];
1358	else if ((val = (p & BRTT_MASK)))
1359		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1360	else
1361		f = fv->fv_p2ftab[1];
1362	return (f);
1363}
1364
1365/*
1366 * check if an arriving packet should be pre-dropped.
1367 * called from red_addq() when a packet arrives.
1368 * returns 1 when the packet should be pre-dropped.
1369 * should be called in splimp.
1370 */
1371static int
1372fv_checkflow(fv, pktattr, fcache)
1373	struct flowvalve *fv;
1374	struct altq_pktattr *pktattr;
1375	struct fve **fcache;
1376{
1377	struct fve *fve;
1378	struct timeval now;
1379
1380	fv->fv_ifseq++;
1381	FV_TIMESTAMP(&now);
1382
1383	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1384		/* no matching entry in the flowlist */
1385		return (0);
1386
1387	*fcache = fve;
1388
1389	/* update fraction f for every FV_N packets */
1390	if (++fve->fve_count == FV_N) {
1391		/*
1392		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1393		 */
1394		fve->fve_f =
1395			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1396			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
1397		fve->fve_ifseq = fv->fv_ifseq;
1398		fve->fve_count = 0;
1399	}
1400
1401	/*
1402	 * overpumping test
1403	 */
1404	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1405		int fthresh;
1406
1407		/* calculate a threshold */
1408		fthresh = fv_p2f(fv, fve->fve_p);
1409		if (fve->fve_f > fthresh)
1410			fve->fve_state = Red;
1411	}
1412
1413	if (fve->fve_state == Red) {
1414		/*
1415		 * backoff test
1416		 */
1417		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1418			/* no drop for at least FV_BACKOFFTHRESH sec */
1419			fve->fve_p = 0;
1420			fve->fve_state = Green;
1421#ifdef FV_STATS
1422			fv->fv_stats.escape++;
1423#endif
1424		} else {
1425			/* block this flow */
1426			flowlist_move_to_head(fv, fve);
1427			fve->fve_lastdrop = now;
1428#ifdef FV_STATS
1429			fv->fv_stats.predrop++;
1430#endif
1431			return (1);
1432		}
1433	}
1434
1435	/*
1436	 * p = (1 - Wp) * p
1437	 */
1438	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1439	if (fve->fve_p < 0)
1440		fve->fve_p = 0;
1441#ifdef FV_STATS
1442	fv->fv_stats.pass++;
1443#endif
1444	return (0);
1445}
1446
1447/*
1448 * called from red_addq when a packet is dropped by red.
1449 * should be called in splimp.
1450 */
1451static void fv_dropbyred(fv, pktattr, fcache)
1452	struct flowvalve *fv;
1453	struct altq_pktattr *pktattr;
1454	struct fve *fcache;
1455{
1456	struct fve *fve;
1457	struct timeval now;
1458
1459	if (pktattr == NULL)
1460		return;
1461	FV_TIMESTAMP(&now);
1462
1463	if (fcache != NULL)
1464		/* the fve of this packet is already cached */
1465		fve = fcache;
1466	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1467		fve = flowlist_reclaim(fv, pktattr);
1468
1469	flowlist_move_to_head(fv, fve);
1470
1471	/*
1472	 * update p:  the following line cancels the update
1473	 *	      in fv_checkflow() and calculate
1474	 *	p = Wp + (1 - Wp) * p
1475	 */
1476	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1477
1478	fve->fve_lastdrop = now;
1479}
1480
1481#endif /* ALTQ_FLOWVALVE */
1482
1483#ifdef KLD_MODULE
1484
1485static struct altqsw red_sw =
1486	{"red", redopen, redclose, redioctl};
1487
1488ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1489MODULE_VERSION(altq_red, 1);
1490
1491#endif /* KLD_MODULE */
1492#endif /* ALTQ3_COMPAT */
1493
1494#endif /* ALTQ_RED */
1495