altq_red.c revision 184205
1/*	$FreeBSD: head/sys/contrib/altq/altq/altq_red.c 184205 2008-10-23 15:53:51Z des $	*/
2/*	$KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $	*/
3
4/*
5 * Copyright (C) 1997-2003
6 *	Sony Computer Science Laboratories Inc.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30/*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 *    notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 *    notice, this list of conditions and the following disclaimer in the
41 *    documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 *    must display the following acknowledgement:
44 *	This product includes software developed by the Computer Systems
45 *	Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 *    to endorse or promote products derived from this software without
48 *    specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63#if defined(__FreeBSD__) || defined(__NetBSD__)
64#include "opt_altq.h"
65#if (__FreeBSD__ != 2)
66#include "opt_inet.h"
67#ifdef __FreeBSD__
68#include "opt_inet6.h"
69#endif
70#endif
71#endif /* __FreeBSD__ || __NetBSD__ */
72#ifdef ALTQ_RED	/* red is enabled by ALTQ_RED option in opt_altq.h */
73
74#include <sys/param.h>
75#include <sys/malloc.h>
76#include <sys/mbuf.h>
77#include <sys/socket.h>
78#include <sys/systm.h>
79#include <sys/errno.h>
80#if 1 /* ALTQ3_COMPAT */
81#include <sys/sockio.h>
82#include <sys/proc.h>
83#include <sys/kernel.h>
84#ifdef ALTQ_FLOWVALVE
85#include <sys/queue.h>
86#include <sys/time.h>
87#endif
88#endif /* ALTQ3_COMPAT */
89
90#include <net/if.h>
91
92#include <netinet/in.h>
93#include <netinet/in_systm.h>
94#include <netinet/ip.h>
95#ifdef INET6
96#include <netinet/ip6.h>
97#endif
98
99#include <net/pfvar.h>
100#include <altq/altq.h>
101#include <altq/altq_red.h>
102#ifdef ALTQ3_COMPAT
103#include <altq/altq_conf.h>
104#ifdef ALTQ_FLOWVALVE
105#include <altq/altq_flowvalve.h>
106#endif
107#endif
108
109/*
110 * ALTQ/RED (Random Early Detection) implementation using 32-bit
111 * fixed-point calculation.
112 *
113 * written by kjc using the ns code as a reference.
114 * you can learn more about red and ns from Sally's home page at
115 * http://www-nrg.ee.lbl.gov/floyd/
116 *
117 * most of the red parameter values are fixed in this implementation
118 * to prevent fixed-point overflow/underflow.
119 * if you change the parameters, watch out for overflow/underflow!
120 *
121 * the parameters used are recommended values by Sally.
122 * the corresponding ns config looks:
123 *	q_weight=0.00195
124 *	minthresh=5 maxthresh=15 queue-size=60
125 *	linterm=30
126 *	dropmech=drop-tail
127 *	bytes=false (can't be handled by 32-bit fixed-point)
128 *	doubleq=false dqthresh=false
129 *	wait=true
130 */
131/*
132 * alternative red parameters for a slow link.
133 *
134 * assume the queue length becomes from zero to L and keeps L, it takes
135 * N packets for q_avg to reach 63% of L.
136 * when q_weight is 0.002, N is about 500 packets.
137 * for a slow link like dial-up, 500 packets takes more than 1 minute!
138 * when q_weight is 0.008, N is about 127 packets.
139 * when q_weight is 0.016, N is about 63 packets.
140 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
141 * are allowed for 0.016.
142 * see Sally's paper for more details.
143 */
144/* normal red parameters */
145#define	W_WEIGHT	512	/* inverse of weight of EWMA (511/512) */
146				/* q_weight = 0.00195 */
147
148/* red parameters for a slow link */
149#define	W_WEIGHT_1	128	/* inverse of weight of EWMA (127/128) */
150				/* q_weight = 0.0078125 */
151
152/* red parameters for a very slow link (e.g., dialup) */
153#define	W_WEIGHT_2	64	/* inverse of weight of EWMA (63/64) */
154				/* q_weight = 0.015625 */
155
156/* fixed-point uses 12-bit decimal places */
157#define	FP_SHIFT	12	/* fixed-point shift */
158
159/* red parameters for drop probability */
160#define	INV_P_MAX	10	/* inverse of max drop probability */
161#define	TH_MIN		5	/* min threshold */
162#define	TH_MAX		15	/* max threshold */
163
164#define	RED_LIMIT	60	/* default max queue lenght */
165#define	RED_STATS		/* collect statistics */
166
167/*
168 * our default policy for forced-drop is drop-tail.
169 * (in altq-1.1.2 or earlier, the default was random-drop.
170 * but it makes more sense to punish the cause of the surge.)
171 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
172 */
173
174#ifdef ALTQ3_COMPAT
175#ifdef ALTQ_FLOWVALVE
176/*
177 * flow-valve is an extention to protect red from unresponsive flows
178 * and to promote end-to-end congestion control.
179 * flow-valve observes the average drop rates of the flows that have
180 * experienced packet drops in the recent past.
181 * when the average drop rate exceeds the threshold, the flow is
182 * blocked by the flow-valve.  the trapped flow should back off
183 * exponentially to escape from the flow-valve.
184 */
185#ifdef RED_RANDOM_DROP
186#error "random-drop can't be used with flow-valve!"
187#endif
188#endif /* ALTQ_FLOWVALVE */
189
190/* red_list keeps all red_queue_t's allocated. */
191static red_queue_t *red_list = NULL;
192
193#endif /* ALTQ3_COMPAT */
194
195/* default red parameter values */
196static int default_th_min = TH_MIN;
197static int default_th_max = TH_MAX;
198static int default_inv_pmax = INV_P_MAX;
199
200#ifdef ALTQ3_COMPAT
201/* internal function prototypes */
202static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
203static struct mbuf *red_dequeue(struct ifaltq *, int);
204static int red_request(struct ifaltq *, int, void *);
205static void red_purgeq(red_queue_t *);
206static int red_detach(red_queue_t *);
207#ifdef ALTQ_FLOWVALVE
208static __inline struct fve *flowlist_lookup(struct flowvalve *,
209			 struct altq_pktattr *, struct timeval *);
210static __inline struct fve *flowlist_reclaim(struct flowvalve *,
211					     struct altq_pktattr *);
212static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
213static __inline int fv_p2f(struct flowvalve *, int);
214#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
215static struct flowvalve *fv_alloc(struct red *);
216#endif
217static void fv_destroy(struct flowvalve *);
218static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
219			struct fve **);
220static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
221			 struct fve *);
222#endif
223#endif /* ALTQ3_COMPAT */
224
225/*
226 * red support routines
227 */
228red_t *
229red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
230   int pkttime)
231{
232	red_t	*rp;
233	int	 w, i;
234	int	 npkts_per_sec;
235
236	rp = malloc(sizeof(red_t), M_DEVBUF, M_WAITOK);
237	if (rp == NULL)
238		return (NULL);
239	bzero(rp, sizeof(red_t));
240
241	rp->red_avg = 0;
242	rp->red_idle = 1;
243
244	if (weight == 0)
245		rp->red_weight = W_WEIGHT;
246	else
247		rp->red_weight = weight;
248	if (inv_pmax == 0)
249		rp->red_inv_pmax = default_inv_pmax;
250	else
251		rp->red_inv_pmax = inv_pmax;
252	if (th_min == 0)
253		rp->red_thmin = default_th_min;
254	else
255		rp->red_thmin = th_min;
256	if (th_max == 0)
257		rp->red_thmax = default_th_max;
258	else
259		rp->red_thmax = th_max;
260
261	rp->red_flags = flags;
262
263	if (pkttime == 0)
264		/* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
265		rp->red_pkttime = 800;
266	else
267		rp->red_pkttime = pkttime;
268
269	if (weight == 0) {
270		/* when the link is very slow, adjust red parameters */
271		npkts_per_sec = 1000000 / rp->red_pkttime;
272		if (npkts_per_sec < 50) {
273			/* up to about 400Kbps */
274			rp->red_weight = W_WEIGHT_2;
275		} else if (npkts_per_sec < 300) {
276			/* up to about 2.4Mbps */
277			rp->red_weight = W_WEIGHT_1;
278		}
279	}
280
281	/* calculate wshift.  weight must be power of 2 */
282	w = rp->red_weight;
283	for (i = 0; w > 1; i++)
284		w = w >> 1;
285	rp->red_wshift = i;
286	w = 1 << rp->red_wshift;
287	if (w != rp->red_weight) {
288		printf("invalid weight value %d for red! use %d\n",
289		       rp->red_weight, w);
290		rp->red_weight = w;
291	}
292
293	/*
294	 * thmin_s and thmax_s are scaled versions of th_min and th_max
295	 * to be compared with avg.
296	 */
297	rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
298	rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
299
300	/*
301	 * precompute probability denominator
302	 *  probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
303	 */
304	rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
305			 * rp->red_inv_pmax) << FP_SHIFT;
306
307	/* allocate weight table */
308	rp->red_wtab = wtab_alloc(rp->red_weight);
309
310	microtime(&rp->red_last);
311	return (rp);
312}
313
314void
315red_destroy(red_t *rp)
316{
317#ifdef ALTQ3_COMPAT
318#ifdef ALTQ_FLOWVALVE
319	if (rp->red_flowvalve != NULL)
320		fv_destroy(rp->red_flowvalve);
321#endif
322#endif /* ALTQ3_COMPAT */
323	wtab_destroy(rp->red_wtab);
324	free(rp, M_DEVBUF);
325}
326
327void
328red_getstats(red_t *rp, struct redstats *sp)
329{
330	sp->q_avg		= rp->red_avg >> rp->red_wshift;
331	sp->xmit_cnt		= rp->red_stats.xmit_cnt;
332	sp->drop_cnt		= rp->red_stats.drop_cnt;
333	sp->drop_forced		= rp->red_stats.drop_forced;
334	sp->drop_unforced	= rp->red_stats.drop_unforced;
335	sp->marked_packets	= rp->red_stats.marked_packets;
336}
337
338int
339red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
340    struct altq_pktattr *pktattr)
341{
342	int avg, droptype;
343	int n;
344#ifdef ALTQ3_COMPAT
345#ifdef ALTQ_FLOWVALVE
346	struct fve *fve = NULL;
347
348	if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
349		if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
350			m_freem(m);
351			return (-1);
352		}
353#endif
354#endif /* ALTQ3_COMPAT */
355
356	avg = rp->red_avg;
357
358	/*
359	 * if we were idle, we pretend that n packets arrived during
360	 * the idle period.
361	 */
362	if (rp->red_idle) {
363		struct timeval now;
364		int t;
365
366		rp->red_idle = 0;
367		microtime(&now);
368		t = (now.tv_sec - rp->red_last.tv_sec);
369		if (t > 60) {
370			/*
371			 * being idle for more than 1 minute, set avg to zero.
372			 * this prevents t from overflow.
373			 */
374			avg = 0;
375		} else {
376			t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
377			n = t / rp->red_pkttime - 1;
378
379			/* the following line does (avg = (1 - Wq)^n * avg) */
380			if (n > 0)
381				avg = (avg >> FP_SHIFT) *
382				    pow_w(rp->red_wtab, n);
383		}
384	}
385
386	/* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
387	avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
388	rp->red_avg = avg;		/* save the new value */
389
390	/*
391	 * red_count keeps a tally of arriving traffic that has not
392	 * been dropped.
393	 */
394	rp->red_count++;
395
396	/* see if we drop early */
397	droptype = DTYPE_NODROP;
398	if (avg >= rp->red_thmin_s && qlen(q) > 1) {
399		if (avg >= rp->red_thmax_s) {
400			/* avg >= th_max: forced drop */
401			droptype = DTYPE_FORCED;
402		} else if (rp->red_old == 0) {
403			/* first exceeds th_min */
404			rp->red_count = 1;
405			rp->red_old = 1;
406		} else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
407				      rp->red_probd, rp->red_count)) {
408			/* mark or drop by red */
409			if ((rp->red_flags & REDF_ECN) &&
410			    mark_ecn(m, pktattr, rp->red_flags)) {
411				/* successfully marked.  do not drop. */
412				rp->red_count = 0;
413#ifdef RED_STATS
414				rp->red_stats.marked_packets++;
415#endif
416			} else {
417				/* unforced drop by red */
418				droptype = DTYPE_EARLY;
419			}
420		}
421	} else {
422		/* avg < th_min */
423		rp->red_old = 0;
424	}
425
426	/*
427	 * if the queue length hits the hard limit, it's a forced drop.
428	 */
429	if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
430		droptype = DTYPE_FORCED;
431
432#ifdef RED_RANDOM_DROP
433	/* if successful or forced drop, enqueue this packet. */
434	if (droptype != DTYPE_EARLY)
435		_addq(q, m);
436#else
437	/* if successful, enqueue this packet. */
438	if (droptype == DTYPE_NODROP)
439		_addq(q, m);
440#endif
441	if (droptype != DTYPE_NODROP) {
442		if (droptype == DTYPE_EARLY) {
443			/* drop the incoming packet */
444#ifdef RED_STATS
445			rp->red_stats.drop_unforced++;
446#endif
447		} else {
448			/* forced drop, select a victim packet in the queue. */
449#ifdef RED_RANDOM_DROP
450			m = _getq_random(q);
451#endif
452#ifdef RED_STATS
453			rp->red_stats.drop_forced++;
454#endif
455		}
456#ifdef RED_STATS
457		PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
458#endif
459		rp->red_count = 0;
460#ifdef ALTQ3_COMPAT
461#ifdef ALTQ_FLOWVALVE
462		if (rp->red_flowvalve != NULL)
463			fv_dropbyred(rp->red_flowvalve, pktattr, fve);
464#endif
465#endif /* ALTQ3_COMPAT */
466		m_freem(m);
467		return (-1);
468	}
469	/* successfully queued */
470#ifdef RED_STATS
471	PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
472#endif
473	return (0);
474}
475
476/*
477 * early-drop probability is calculated as follows:
478 *   prob = p_max * (avg - th_min) / (th_max - th_min)
479 *   prob_a = prob / (2 - count*prob)
480 *	    = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
481 * here prob_a increases as successive undrop count increases.
482 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
483 * becomes 1 when (count >= (2 / prob))).
484 */
485int
486drop_early(int fp_len, int fp_probd, int count)
487{
488	int	d;		/* denominator of drop-probability */
489
490	d = fp_probd - count * fp_len;
491	if (d <= 0)
492		/* count exceeds the hard limit: drop or mark */
493		return (1);
494
495	/*
496	 * now the range of d is [1..600] in fixed-point. (when
497	 * th_max-th_min=10 and p_max=1/30)
498	 * drop probability = (avg - TH_MIN) / d
499	 */
500
501	if ((arc4random() % d) < fp_len) {
502		/* drop or mark */
503		return (1);
504	}
505	/* no drop/mark */
506	return (0);
507}
508
509/*
510 * try to mark CE bit to the packet.
511 *    returns 1 if successfully marked, 0 otherwise.
512 */
513int
514mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
515{
516	struct mbuf	*m0;
517	struct pf_mtag	*at;
518	void		*hdr;
519	int		 af;
520
521	at = pf_find_mtag(m);
522	if (at != NULL) {
523		af = at->af;
524		hdr = at->hdr;
525#ifdef ALTQ3_COMPAT
526	} else if (pktattr != NULL) {
527		af = pktattr->pattr_af;
528		hdr = pktattr->pattr_hdr;
529#endif /* ALTQ3_COMPAT */
530	} else
531		return (0);
532
533	if (af != AF_INET && af != AF_INET6)
534		return (0);
535
536	/* verify that pattr_hdr is within the mbuf data */
537	for (m0 = m; m0 != NULL; m0 = m0->m_next)
538		if (((caddr_t)hdr >= m0->m_data) &&
539		    ((caddr_t)hdr < m0->m_data + m0->m_len))
540			break;
541	if (m0 == NULL) {
542		/* ick, tag info is stale */
543		return (0);
544	}
545
546	switch (af) {
547	case AF_INET:
548		if (flags & REDF_ECN4) {
549			struct ip *ip = hdr;
550			u_int8_t otos;
551			int sum;
552
553			if (ip->ip_v != 4)
554				return (0);	/* version mismatch! */
555
556			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
557				return (0);	/* not-ECT */
558			if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
559				return (1);	/* already marked */
560
561			/*
562			 * ecn-capable but not marked,
563			 * mark CE and update checksum
564			 */
565			otos = ip->ip_tos;
566			ip->ip_tos |= IPTOS_ECN_CE;
567			/*
568			 * update checksum (from RFC1624)
569			 *	   HC' = ~(~HC + ~m + m')
570			 */
571			sum = ~ntohs(ip->ip_sum) & 0xffff;
572			sum += (~otos & 0xffff) + ip->ip_tos;
573			sum = (sum >> 16) + (sum & 0xffff);
574			sum += (sum >> 16);  /* add carry */
575			ip->ip_sum = htons(~sum & 0xffff);
576			return (1);
577		}
578		break;
579#ifdef INET6
580	case AF_INET6:
581		if (flags & REDF_ECN6) {
582			struct ip6_hdr *ip6 = hdr;
583			u_int32_t flowlabel;
584
585			flowlabel = ntohl(ip6->ip6_flow);
586			if ((flowlabel >> 28) != 6)
587				return (0);	/* version mismatch! */
588			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
589			    (IPTOS_ECN_NOTECT << 20))
590				return (0);	/* not-ECT */
591			if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
592			    (IPTOS_ECN_CE << 20))
593				return (1);	/* already marked */
594			/*
595			 * ecn-capable but not marked,  mark CE
596			 */
597			flowlabel |= (IPTOS_ECN_CE << 20);
598			ip6->ip6_flow = htonl(flowlabel);
599			return (1);
600		}
601		break;
602#endif  /* INET6 */
603	}
604
605	/* not marked */
606	return (0);
607}
608
609struct mbuf *
610red_getq(rp, q)
611	red_t *rp;
612	class_queue_t *q;
613{
614	struct mbuf *m;
615
616	if ((m = _getq(q)) == NULL) {
617		if (rp->red_idle == 0) {
618			rp->red_idle = 1;
619			microtime(&rp->red_last);
620		}
621		return NULL;
622	}
623
624	rp->red_idle = 0;
625	return (m);
626}
627
628/*
629 * helper routine to calibrate avg during idle.
630 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
631 * here Wq = 1/weight and the code assumes Wq is close to zero.
632 *
633 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
634 */
635static struct wtab *wtab_list = NULL;	/* pointer to wtab list */
636
637struct wtab *
638wtab_alloc(int weight)
639{
640	struct wtab	*w;
641	int		 i;
642
643	for (w = wtab_list; w != NULL; w = w->w_next)
644		if (w->w_weight == weight) {
645			w->w_refcount++;
646			return (w);
647		}
648
649	w = malloc(sizeof(struct wtab), M_DEVBUF, M_WAITOK);
650	if (w == NULL)
651		panic("wtab_alloc: malloc failed!");
652	bzero(w, sizeof(struct wtab));
653	w->w_weight = weight;
654	w->w_refcount = 1;
655	w->w_next = wtab_list;
656	wtab_list = w;
657
658	/* initialize the weight table */
659	w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
660	for (i = 1; i < 32; i++) {
661		w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
662		if (w->w_tab[i] == 0 && w->w_param_max == 0)
663			w->w_param_max = 1 << i;
664	}
665
666	return (w);
667}
668
669int
670wtab_destroy(struct wtab *w)
671{
672	struct wtab	*prev;
673
674	if (--w->w_refcount > 0)
675		return (0);
676
677	if (wtab_list == w)
678		wtab_list = w->w_next;
679	else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
680		if (prev->w_next == w) {
681			prev->w_next = w->w_next;
682			break;
683		}
684
685	free(w, M_DEVBUF);
686	return (0);
687}
688
689int32_t
690pow_w(struct wtab *w, int n)
691{
692	int	i, bit;
693	int32_t	val;
694
695	if (n >= w->w_param_max)
696		return (0);
697
698	val = 1 << FP_SHIFT;
699	if (n <= 0)
700		return (val);
701
702	bit = 1;
703	i = 0;
704	while (n) {
705		if (n & bit) {
706			val = (val * w->w_tab[i]) >> FP_SHIFT;
707			n &= ~bit;
708		}
709		i++;
710		bit <<=  1;
711	}
712	return (val);
713}
714
715#ifdef ALTQ3_COMPAT
716/*
717 * red device interface
718 */
719altqdev_decl(red);
720
721int
722redopen(dev, flag, fmt, p)
723	dev_t dev;
724	int flag, fmt;
725#if (__FreeBSD_version > 500000)
726	struct thread *p;
727#else
728	struct proc *p;
729#endif
730{
731	/* everything will be done when the queueing scheme is attached. */
732	return 0;
733}
734
735int
736redclose(dev, flag, fmt, p)
737	dev_t dev;
738	int flag, fmt;
739#if (__FreeBSD_version > 500000)
740	struct thread *p;
741#else
742	struct proc *p;
743#endif
744{
745	red_queue_t *rqp;
746	int err, error = 0;
747
748	while ((rqp = red_list) != NULL) {
749		/* destroy all */
750		err = red_detach(rqp);
751		if (err != 0 && error == 0)
752			error = err;
753	}
754
755	return error;
756}
757
758int
759redioctl(dev, cmd, addr, flag, p)
760	dev_t dev;
761	ioctlcmd_t cmd;
762	caddr_t addr;
763	int flag;
764#if (__FreeBSD_version > 500000)
765	struct thread *p;
766#else
767	struct proc *p;
768#endif
769{
770	red_queue_t *rqp;
771	struct red_interface *ifacep;
772	struct ifnet *ifp;
773	int	error = 0;
774
775	/* check super-user privilege */
776	switch (cmd) {
777	case RED_GETSTATS:
778		break;
779	default:
780#if (__FreeBSD_version > 700000)
781		if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
782#elsif (__FreeBSD_version > 400000)
783		if ((error = suser(p)) != 0)
784#else
785		if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
786#endif
787			return (error);
788		break;
789	}
790
791	switch (cmd) {
792
793	case RED_ENABLE:
794		ifacep = (struct red_interface *)addr;
795		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
796			error = EBADF;
797			break;
798		}
799		error = altq_enable(rqp->rq_ifq);
800		break;
801
802	case RED_DISABLE:
803		ifacep = (struct red_interface *)addr;
804		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
805			error = EBADF;
806			break;
807		}
808		error = altq_disable(rqp->rq_ifq);
809		break;
810
811	case RED_IF_ATTACH:
812		ifp = ifunit(((struct red_interface *)addr)->red_ifname);
813		if (ifp == NULL) {
814			error = ENXIO;
815			break;
816		}
817
818		/* allocate and initialize red_queue_t */
819		rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
820		if (rqp == NULL) {
821			error = ENOMEM;
822			break;
823		}
824		bzero(rqp, sizeof(red_queue_t));
825
826		rqp->rq_q = malloc(sizeof(class_queue_t),
827		       M_DEVBUF, M_WAITOK);
828		if (rqp->rq_q == NULL) {
829			free(rqp, M_DEVBUF);
830			error = ENOMEM;
831			break;
832		}
833		bzero(rqp->rq_q, sizeof(class_queue_t));
834
835		rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
836		if (rqp->rq_red == NULL) {
837			free(rqp->rq_q, M_DEVBUF);
838			free(rqp, M_DEVBUF);
839			error = ENOMEM;
840			break;
841		}
842
843		rqp->rq_ifq = &ifp->if_snd;
844		qtail(rqp->rq_q) = NULL;
845		qlen(rqp->rq_q) = 0;
846		qlimit(rqp->rq_q) = RED_LIMIT;
847		qtype(rqp->rq_q) = Q_RED;
848
849		/*
850		 * set RED to this ifnet structure.
851		 */
852		error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
853				    red_enqueue, red_dequeue, red_request,
854				    NULL, NULL);
855		if (error) {
856			red_destroy(rqp->rq_red);
857			free(rqp->rq_q, M_DEVBUF);
858			free(rqp, M_DEVBUF);
859			break;
860		}
861
862		/* add this state to the red list */
863		rqp->rq_next = red_list;
864		red_list = rqp;
865		break;
866
867	case RED_IF_DETACH:
868		ifacep = (struct red_interface *)addr;
869		if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
870			error = EBADF;
871			break;
872		}
873		error = red_detach(rqp);
874		break;
875
876	case RED_GETSTATS:
877		do {
878			struct red_stats *q_stats;
879			red_t *rp;
880
881			q_stats = (struct red_stats *)addr;
882			if ((rqp = altq_lookup(q_stats->iface.red_ifname,
883					     ALTQT_RED)) == NULL) {
884				error = EBADF;
885				break;
886			}
887
888			q_stats->q_len 	   = qlen(rqp->rq_q);
889			q_stats->q_limit   = qlimit(rqp->rq_q);
890
891			rp = rqp->rq_red;
892			q_stats->q_avg 	   = rp->red_avg >> rp->red_wshift;
893			q_stats->xmit_cnt  = rp->red_stats.xmit_cnt;
894			q_stats->drop_cnt  = rp->red_stats.drop_cnt;
895			q_stats->drop_forced   = rp->red_stats.drop_forced;
896			q_stats->drop_unforced = rp->red_stats.drop_unforced;
897			q_stats->marked_packets = rp->red_stats.marked_packets;
898
899			q_stats->weight		= rp->red_weight;
900			q_stats->inv_pmax	= rp->red_inv_pmax;
901			q_stats->th_min		= rp->red_thmin;
902			q_stats->th_max		= rp->red_thmax;
903
904#ifdef ALTQ_FLOWVALVE
905			if (rp->red_flowvalve != NULL) {
906				struct flowvalve *fv = rp->red_flowvalve;
907				q_stats->fv_flows    = fv->fv_flows;
908				q_stats->fv_pass     = fv->fv_stats.pass;
909				q_stats->fv_predrop  = fv->fv_stats.predrop;
910				q_stats->fv_alloc    = fv->fv_stats.alloc;
911				q_stats->fv_escape   = fv->fv_stats.escape;
912			} else {
913#endif /* ALTQ_FLOWVALVE */
914				q_stats->fv_flows    = 0;
915				q_stats->fv_pass     = 0;
916				q_stats->fv_predrop  = 0;
917				q_stats->fv_alloc    = 0;
918				q_stats->fv_escape   = 0;
919#ifdef ALTQ_FLOWVALVE
920			}
921#endif /* ALTQ_FLOWVALVE */
922		} while (/*CONSTCOND*/ 0);
923		break;
924
925	case RED_CONFIG:
926		do {
927			struct red_conf *fc;
928			red_t *new;
929			int s, limit;
930
931			fc = (struct red_conf *)addr;
932			if ((rqp = altq_lookup(fc->iface.red_ifname,
933					       ALTQT_RED)) == NULL) {
934				error = EBADF;
935				break;
936			}
937			new = red_alloc(fc->red_weight,
938					fc->red_inv_pmax,
939					fc->red_thmin,
940					fc->red_thmax,
941					fc->red_flags,
942					fc->red_pkttime);
943			if (new == NULL) {
944				error = ENOMEM;
945				break;
946			}
947
948#ifdef __NetBSD__
949			s = splnet();
950#else
951			s = splimp();
952#endif
953			red_purgeq(rqp);
954			limit = fc->red_limit;
955			if (limit < fc->red_thmax)
956				limit = fc->red_thmax;
957			qlimit(rqp->rq_q) = limit;
958			fc->red_limit = limit;	/* write back the new value */
959
960			red_destroy(rqp->rq_red);
961			rqp->rq_red = new;
962
963			splx(s);
964
965			/* write back new values */
966			fc->red_limit = limit;
967			fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
968			fc->red_thmin = rqp->rq_red->red_thmin;
969			fc->red_thmax = rqp->rq_red->red_thmax;
970
971		} while (/*CONSTCOND*/ 0);
972		break;
973
974	case RED_SETDEFAULTS:
975		do {
976			struct redparams *rp;
977
978			rp = (struct redparams *)addr;
979
980			default_th_min = rp->th_min;
981			default_th_max = rp->th_max;
982			default_inv_pmax = rp->inv_pmax;
983		} while (/*CONSTCOND*/ 0);
984		break;
985
986	default:
987		error = EINVAL;
988		break;
989	}
990	return error;
991}
992
993static int
994red_detach(rqp)
995	red_queue_t *rqp;
996{
997	red_queue_t *tmp;
998	int error = 0;
999
1000	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1001		altq_disable(rqp->rq_ifq);
1002
1003	if ((error = altq_detach(rqp->rq_ifq)))
1004		return (error);
1005
1006	if (red_list == rqp)
1007		red_list = rqp->rq_next;
1008	else {
1009		for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1010			if (tmp->rq_next == rqp) {
1011				tmp->rq_next = rqp->rq_next;
1012				break;
1013			}
1014		if (tmp == NULL)
1015			printf("red_detach: no state found in red_list!\n");
1016	}
1017
1018	red_destroy(rqp->rq_red);
1019	free(rqp->rq_q, M_DEVBUF);
1020	free(rqp, M_DEVBUF);
1021	return (error);
1022}
1023
1024/*
1025 * enqueue routine:
1026 *
1027 *	returns: 0 when successfully queued.
1028 *		 ENOBUFS when drop occurs.
1029 */
1030static int
1031red_enqueue(ifq, m, pktattr)
1032	struct ifaltq *ifq;
1033	struct mbuf *m;
1034	struct altq_pktattr *pktattr;
1035{
1036	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1037
1038	IFQ_LOCK_ASSERT(ifq);
1039
1040	if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1041		return ENOBUFS;
1042	ifq->ifq_len++;
1043	return 0;
1044}
1045
1046/*
1047 * dequeue routine:
1048 *	must be called in splimp.
1049 *
1050 *	returns: mbuf dequeued.
1051 *		 NULL when no packet is available in the queue.
1052 */
1053
1054static struct mbuf *
1055red_dequeue(ifq, op)
1056	struct ifaltq *ifq;
1057	int op;
1058{
1059	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1060	struct mbuf *m;
1061
1062	IFQ_LOCK_ASSERT(ifq);
1063
1064	if (op == ALTDQ_POLL)
1065		return qhead(rqp->rq_q);
1066
1067	/* op == ALTDQ_REMOVE */
1068	m =  red_getq(rqp->rq_red, rqp->rq_q);
1069	if (m != NULL)
1070		ifq->ifq_len--;
1071	return (m);
1072}
1073
1074static int
1075red_request(ifq, req, arg)
1076	struct ifaltq *ifq;
1077	int req;
1078	void *arg;
1079{
1080	red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1081
1082	IFQ_LOCK_ASSERT(ifq);
1083
1084	switch (req) {
1085	case ALTRQ_PURGE:
1086		red_purgeq(rqp);
1087		break;
1088	}
1089	return (0);
1090}
1091
1092static void
1093red_purgeq(rqp)
1094	red_queue_t *rqp;
1095{
1096	_flushq(rqp->rq_q);
1097	if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1098		rqp->rq_ifq->ifq_len = 0;
1099}
1100
1101#ifdef ALTQ_FLOWVALVE
1102
1103#define	FV_PSHIFT	7	/* weight of average drop rate -- 1/128 */
1104#define	FV_PSCALE(x)	((x) << FV_PSHIFT)
1105#define	FV_PUNSCALE(x)	((x) >> FV_PSHIFT)
1106#define	FV_FSHIFT	5	/* weight of average fraction -- 1/32 */
1107#define	FV_FSCALE(x)	((x) << FV_FSHIFT)
1108#define	FV_FUNSCALE(x)	((x) >> FV_FSHIFT)
1109
1110#define	FV_TIMER	(3 * hz)	/* timer value for garbage collector */
1111#define	FV_FLOWLISTSIZE		64	/* how many flows in flowlist */
1112
1113#define	FV_N			10	/* update fve_f every FV_N packets */
1114
1115#define	FV_BACKOFFTHRESH	1  /* backoff threshold interval in second */
1116#define	FV_TTHRESH		3  /* time threshold to delete fve */
1117#define	FV_ALPHA		5  /* extra packet count */
1118
1119#define	FV_STATS
1120
1121#if (__FreeBSD_version > 300000)
1122#define	FV_TIMESTAMP(tp)	getmicrotime(tp)
1123#else
1124#define	FV_TIMESTAMP(tp)	{ (*(tp)) = time; }
1125#endif
1126
1127/*
1128 * Brtt table: 127 entry table to convert drop rate (p) to
1129 * the corresponding bandwidth fraction (f)
1130 * the following equation is implemented to use scaled values,
1131 * fve_p and fve_f, in the fixed point format.
1132 *
1133 *   Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1134 *   f = Brtt(p) / (max_th + alpha)
1135 */
1136#define	BRTT_SIZE	128
1137#define	BRTT_SHIFT	12
1138#define	BRTT_MASK	0x0007f000
1139#define	BRTT_PMAX	(1 << (FV_PSHIFT + FP_SHIFT))
1140
1141const int brtt_tab[BRTT_SIZE] = {
1142	0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1143	392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1144	225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1145	145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1146	98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1147	67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1148	47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1149	33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1150	24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1151	18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1152	14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1153	10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1154	8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1155	6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1156	5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1157	4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1158};
1159
1160static __inline struct fve *
1161flowlist_lookup(fv, pktattr, now)
1162	struct flowvalve *fv;
1163	struct altq_pktattr *pktattr;
1164	struct timeval *now;
1165{
1166	struct fve *fve;
1167	int flows;
1168	struct ip *ip;
1169#ifdef INET6
1170	struct ip6_hdr *ip6;
1171#endif
1172	struct timeval tthresh;
1173
1174	if (pktattr == NULL)
1175		return (NULL);
1176
1177	tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1178	flows = 0;
1179	/*
1180	 * search the flow list
1181	 */
1182	switch (pktattr->pattr_af) {
1183	case AF_INET:
1184		ip = (struct ip *)pktattr->pattr_hdr;
1185		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1186			if (fve->fve_lastdrop.tv_sec == 0)
1187				break;
1188			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1189				fve->fve_lastdrop.tv_sec = 0;
1190				break;
1191			}
1192			if (fve->fve_flow.flow_af == AF_INET &&
1193			    fve->fve_flow.flow_ip.ip_src.s_addr ==
1194			    ip->ip_src.s_addr &&
1195			    fve->fve_flow.flow_ip.ip_dst.s_addr ==
1196			    ip->ip_dst.s_addr)
1197				return (fve);
1198			flows++;
1199		}
1200		break;
1201#ifdef INET6
1202	case AF_INET6:
1203		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1204		TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1205			if (fve->fve_lastdrop.tv_sec == 0)
1206				break;
1207			if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1208				fve->fve_lastdrop.tv_sec = 0;
1209				break;
1210			}
1211			if (fve->fve_flow.flow_af == AF_INET6 &&
1212			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1213					       &ip6->ip6_src) &&
1214			    IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1215					       &ip6->ip6_dst))
1216				return (fve);
1217			flows++;
1218		}
1219		break;
1220#endif /* INET6 */
1221
1222	default:
1223		/* unknown protocol.  no drop. */
1224		return (NULL);
1225	}
1226	fv->fv_flows = flows;	/* save the number of active fve's */
1227	return (NULL);
1228}
1229
1230static __inline struct fve *
1231flowlist_reclaim(fv, pktattr)
1232	struct flowvalve *fv;
1233	struct altq_pktattr *pktattr;
1234{
1235	struct fve *fve;
1236	struct ip *ip;
1237#ifdef INET6
1238	struct ip6_hdr *ip6;
1239#endif
1240
1241	/*
1242	 * get an entry from the tail of the LRU list.
1243	 */
1244	fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1245
1246	switch (pktattr->pattr_af) {
1247	case AF_INET:
1248		ip = (struct ip *)pktattr->pattr_hdr;
1249		fve->fve_flow.flow_af = AF_INET;
1250		fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1251		fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1252		break;
1253#ifdef INET6
1254	case AF_INET6:
1255		ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1256		fve->fve_flow.flow_af = AF_INET6;
1257		fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1258		fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1259		break;
1260#endif
1261	}
1262
1263	fve->fve_state = Green;
1264	fve->fve_p = 0.0;
1265	fve->fve_f = 0.0;
1266	fve->fve_ifseq = fv->fv_ifseq - 1;
1267	fve->fve_count = 0;
1268
1269	fv->fv_flows++;
1270#ifdef FV_STATS
1271	fv->fv_stats.alloc++;
1272#endif
1273	return (fve);
1274}
1275
1276static __inline void
1277flowlist_move_to_head(fv, fve)
1278	struct flowvalve *fv;
1279	struct fve *fve;
1280{
1281	if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1282		TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1283		TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1284	}
1285}
1286
1287#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
1288/*
1289 * allocate flowvalve structure
1290 */
1291static struct flowvalve *
1292fv_alloc(rp)
1293	struct red *rp;
1294{
1295	struct flowvalve *fv;
1296	struct fve *fve;
1297	int i, num;
1298
1299	num = FV_FLOWLISTSIZE;
1300	fv = malloc(sizeof(struct flowvalve),
1301	       M_DEVBUF, M_WAITOK);
1302	if (fv == NULL)
1303		return (NULL);
1304	bzero(fv, sizeof(struct flowvalve));
1305
1306	fv->fv_fves = malloc(sizeof(struct fve) * num,
1307	       M_DEVBUF, M_WAITOK);
1308	if (fv->fv_fves == NULL) {
1309		free(fv, M_DEVBUF);
1310		return (NULL);
1311	}
1312	bzero(fv->fv_fves, sizeof(struct fve) * num);
1313
1314	fv->fv_flows = 0;
1315	TAILQ_INIT(&fv->fv_flowlist);
1316	for (i = 0; i < num; i++) {
1317		fve = &fv->fv_fves[i];
1318		fve->fve_lastdrop.tv_sec = 0;
1319		TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1320	}
1321
1322	/* initialize drop rate threshold in scaled fixed-point */
1323	fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1324
1325	/* initialize drop rate to fraction table */
1326	fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
1327	       M_DEVBUF, M_WAITOK);
1328	if (fv->fv_p2ftab == NULL) {
1329		free(fv->fv_fves, M_DEVBUF);
1330		free(fv, M_DEVBUF);
1331		return (NULL);
1332	}
1333	/*
1334	 * create the p2f table.
1335	 * (shift is used to keep the precision)
1336	 */
1337	for (i = 1; i < BRTT_SIZE; i++) {
1338		int f;
1339
1340		f = brtt_tab[i] << 8;
1341		fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1342	}
1343
1344	return (fv);
1345}
1346#endif
1347
1348static void fv_destroy(fv)
1349	struct flowvalve *fv;
1350{
1351	free(fv->fv_p2ftab, M_DEVBUF);
1352	free(fv->fv_fves, M_DEVBUF);
1353	free(fv, M_DEVBUF);
1354}
1355
1356static __inline int
1357fv_p2f(fv, p)
1358	struct flowvalve	*fv;
1359	int	p;
1360{
1361	int val, f;
1362
1363	if (p >= BRTT_PMAX)
1364		f = fv->fv_p2ftab[BRTT_SIZE-1];
1365	else if ((val = (p & BRTT_MASK)))
1366		f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1367	else
1368		f = fv->fv_p2ftab[1];
1369	return (f);
1370}
1371
1372/*
1373 * check if an arriving packet should be pre-dropped.
1374 * called from red_addq() when a packet arrives.
1375 * returns 1 when the packet should be pre-dropped.
1376 * should be called in splimp.
1377 */
1378static int
1379fv_checkflow(fv, pktattr, fcache)
1380	struct flowvalve *fv;
1381	struct altq_pktattr *pktattr;
1382	struct fve **fcache;
1383{
1384	struct fve *fve;
1385	struct timeval now;
1386
1387	fv->fv_ifseq++;
1388	FV_TIMESTAMP(&now);
1389
1390	if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1391		/* no matching entry in the flowlist */
1392		return (0);
1393
1394	*fcache = fve;
1395
1396	/* update fraction f for every FV_N packets */
1397	if (++fve->fve_count == FV_N) {
1398		/*
1399		 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1400		 */
1401		fve->fve_f =
1402			(FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1403			+ fve->fve_f - FV_FUNSCALE(fve->fve_f);
1404		fve->fve_ifseq = fv->fv_ifseq;
1405		fve->fve_count = 0;
1406	}
1407
1408	/*
1409	 * overpumping test
1410	 */
1411	if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1412		int fthresh;
1413
1414		/* calculate a threshold */
1415		fthresh = fv_p2f(fv, fve->fve_p);
1416		if (fve->fve_f > fthresh)
1417			fve->fve_state = Red;
1418	}
1419
1420	if (fve->fve_state == Red) {
1421		/*
1422		 * backoff test
1423		 */
1424		if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1425			/* no drop for at least FV_BACKOFFTHRESH sec */
1426			fve->fve_p = 0;
1427			fve->fve_state = Green;
1428#ifdef FV_STATS
1429			fv->fv_stats.escape++;
1430#endif
1431		} else {
1432			/* block this flow */
1433			flowlist_move_to_head(fv, fve);
1434			fve->fve_lastdrop = now;
1435#ifdef FV_STATS
1436			fv->fv_stats.predrop++;
1437#endif
1438			return (1);
1439		}
1440	}
1441
1442	/*
1443	 * p = (1 - Wp) * p
1444	 */
1445	fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1446	if (fve->fve_p < 0)
1447		fve->fve_p = 0;
1448#ifdef FV_STATS
1449	fv->fv_stats.pass++;
1450#endif
1451	return (0);
1452}
1453
1454/*
1455 * called from red_addq when a packet is dropped by red.
1456 * should be called in splimp.
1457 */
1458static void fv_dropbyred(fv, pktattr, fcache)
1459	struct flowvalve *fv;
1460	struct altq_pktattr *pktattr;
1461	struct fve *fcache;
1462{
1463	struct fve *fve;
1464	struct timeval now;
1465
1466	if (pktattr == NULL)
1467		return;
1468	FV_TIMESTAMP(&now);
1469
1470	if (fcache != NULL)
1471		/* the fve of this packet is already cached */
1472		fve = fcache;
1473	else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1474		fve = flowlist_reclaim(fv, pktattr);
1475
1476	flowlist_move_to_head(fv, fve);
1477
1478	/*
1479	 * update p:  the following line cancels the update
1480	 *	      in fv_checkflow() and calculate
1481	 *	p = Wp + (1 - Wp) * p
1482	 */
1483	fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1484
1485	fve->fve_lastdrop = now;
1486}
1487
1488#endif /* ALTQ_FLOWVALVE */
1489
1490#ifdef KLD_MODULE
1491
1492static struct altqsw red_sw =
1493	{"red", redopen, redclose, redioctl};
1494
1495ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1496MODULE_VERSION(altq_red, 1);
1497
1498#endif /* KLD_MODULE */
1499#endif /* ALTQ3_COMPAT */
1500
1501#endif /* ALTQ_RED */
1502