Deleted Added
full compact
altq_red.c (257215) altq_red.c (281613)
1/* $FreeBSD: head/sys/contrib/altq/altq/altq_red.c 257215 2013-10-27 16:25:57Z glebius $ */
1/* $FreeBSD: head/sys/net/altq/altq_red.c 281613 2015-04-16 20:22:40Z glebius $ */
2/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */
3
4/*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30/*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63#if defined(__FreeBSD__) || defined(__NetBSD__)
64#include "opt_altq.h"
65#include "opt_inet.h"
66#ifdef __FreeBSD__
67#include "opt_inet6.h"
68#endif
69#endif /* __FreeBSD__ || __NetBSD__ */
70#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
71
72#include <sys/param.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/socket.h>
76#include <sys/systm.h>
77#include <sys/errno.h>
78#if 1 /* ALTQ3_COMPAT */
79#include <sys/sockio.h>
80#include <sys/proc.h>
81#include <sys/kernel.h>
82#ifdef ALTQ_FLOWVALVE
83#include <sys/queue.h>
84#include <sys/time.h>
85#endif
86#endif /* ALTQ3_COMPAT */
87
88#include <net/if.h>
89#include <net/if_var.h>
90
91#include <netinet/in.h>
92#include <netinet/in_systm.h>
93#include <netinet/ip.h>
94#ifdef INET6
95#include <netinet/ip6.h>
96#endif
97
98#include <netpfil/pf/pf.h>
99#include <netpfil/pf/pf_altq.h>
100#include <netpfil/pf/pf_mtag.h>
2/* $KAME: altq_red.c,v 1.18 2003/09/05 22:40:36 itojun Exp $ */
3
4/*
5 * Copyright (C) 1997-2003
6 * Sony Computer Science Laboratories Inc. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY SONY CSL AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL SONY CSL OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 */
30/*
31 * Copyright (c) 1990-1994 Regents of the University of California.
32 * All rights reserved.
33 *
34 * Redistribution and use in source and binary forms, with or without
35 * modification, are permitted provided that the following conditions
36 * are met:
37 * 1. Redistributions of source code must retain the above copyright
38 * notice, this list of conditions and the following disclaimer.
39 * 2. Redistributions in binary form must reproduce the above copyright
40 * notice, this list of conditions and the following disclaimer in the
41 * documentation and/or other materials provided with the distribution.
42 * 3. All advertising materials mentioning features or use of this software
43 * must display the following acknowledgement:
44 * This product includes software developed by the Computer Systems
45 * Engineering Group at Lawrence Berkeley Laboratory.
46 * 4. Neither the name of the University nor of the Laboratory may be used
47 * to endorse or promote products derived from this software without
48 * specific prior written permission.
49 *
50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
60 * SUCH DAMAGE.
61 */
62
63#if defined(__FreeBSD__) || defined(__NetBSD__)
64#include "opt_altq.h"
65#include "opt_inet.h"
66#ifdef __FreeBSD__
67#include "opt_inet6.h"
68#endif
69#endif /* __FreeBSD__ || __NetBSD__ */
70#ifdef ALTQ_RED /* red is enabled by ALTQ_RED option in opt_altq.h */
71
72#include <sys/param.h>
73#include <sys/malloc.h>
74#include <sys/mbuf.h>
75#include <sys/socket.h>
76#include <sys/systm.h>
77#include <sys/errno.h>
78#if 1 /* ALTQ3_COMPAT */
79#include <sys/sockio.h>
80#include <sys/proc.h>
81#include <sys/kernel.h>
82#ifdef ALTQ_FLOWVALVE
83#include <sys/queue.h>
84#include <sys/time.h>
85#endif
86#endif /* ALTQ3_COMPAT */
87
88#include <net/if.h>
89#include <net/if_var.h>
90
91#include <netinet/in.h>
92#include <netinet/in_systm.h>
93#include <netinet/ip.h>
94#ifdef INET6
95#include <netinet/ip6.h>
96#endif
97
98#include <netpfil/pf/pf.h>
99#include <netpfil/pf/pf_altq.h>
100#include <netpfil/pf/pf_mtag.h>
101#include
102#include
101#include <net/altq/altq.h>
102#include <net/altq/altq_red.h>
103#ifdef ALTQ3_COMPAT
103#ifdef ALTQ3_COMPAT
104#include
104#include <net/altq/altq_conf.h>
105#ifdef ALTQ_FLOWVALVE
105#ifdef ALTQ_FLOWVALVE
106#include
106#include <net/altq/altq_flowvalve.h>
107#endif
108#endif
109
110/*
111 * ALTQ/RED (Random Early Detection) implementation using 32-bit
112 * fixed-point calculation.
113 *
114 * written by kjc using the ns code as a reference.
115 * you can learn more about red and ns from Sally's home page at
116 * http://www-nrg.ee.lbl.gov/floyd/
117 *
118 * most of the red parameter values are fixed in this implementation
119 * to prevent fixed-point overflow/underflow.
120 * if you change the parameters, watch out for overflow/underflow!
121 *
122 * the parameters used are recommended values by Sally.
123 * the corresponding ns config looks:
124 * q_weight=0.00195
125 * minthresh=5 maxthresh=15 queue-size=60
126 * linterm=30
127 * dropmech=drop-tail
128 * bytes=false (can't be handled by 32-bit fixed-point)
129 * doubleq=false dqthresh=false
130 * wait=true
131 */
132/*
133 * alternative red parameters for a slow link.
134 *
135 * assume the queue length becomes from zero to L and keeps L, it takes
136 * N packets for q_avg to reach 63% of L.
137 * when q_weight is 0.002, N is about 500 packets.
138 * for a slow link like dial-up, 500 packets takes more than 1 minute!
139 * when q_weight is 0.008, N is about 127 packets.
140 * when q_weight is 0.016, N is about 63 packets.
141 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
142 * are allowed for 0.016.
143 * see Sally's paper for more details.
144 */
145/* normal red parameters */
146#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
147 /* q_weight = 0.00195 */
148
149/* red parameters for a slow link */
150#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
151 /* q_weight = 0.0078125 */
152
153/* red parameters for a very slow link (e.g., dialup) */
154#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
155 /* q_weight = 0.015625 */
156
157/* fixed-point uses 12-bit decimal places */
158#define FP_SHIFT 12 /* fixed-point shift */
159
160/* red parameters for drop probability */
161#define INV_P_MAX 10 /* inverse of max drop probability */
162#define TH_MIN 5 /* min threshold */
163#define TH_MAX 15 /* max threshold */
164
165#define RED_LIMIT 60 /* default max queue lenght */
166#define RED_STATS /* collect statistics */
167
168/*
169 * our default policy for forced-drop is drop-tail.
170 * (in altq-1.1.2 or earlier, the default was random-drop.
171 * but it makes more sense to punish the cause of the surge.)
172 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
173 */
174
175#ifdef ALTQ3_COMPAT
176#ifdef ALTQ_FLOWVALVE
177/*
178 * flow-valve is an extention to protect red from unresponsive flows
179 * and to promote end-to-end congestion control.
180 * flow-valve observes the average drop rates of the flows that have
181 * experienced packet drops in the recent past.
182 * when the average drop rate exceeds the threshold, the flow is
183 * blocked by the flow-valve. the trapped flow should back off
184 * exponentially to escape from the flow-valve.
185 */
186#ifdef RED_RANDOM_DROP
187#error "random-drop can't be used with flow-valve!"
188#endif
189#endif /* ALTQ_FLOWVALVE */
190
191/* red_list keeps all red_queue_t's allocated. */
192static red_queue_t *red_list = NULL;
193
194#endif /* ALTQ3_COMPAT */
195
196/* default red parameter values */
197static int default_th_min = TH_MIN;
198static int default_th_max = TH_MAX;
199static int default_inv_pmax = INV_P_MAX;
200
201#ifdef ALTQ3_COMPAT
202/* internal function prototypes */
203static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
204static struct mbuf *red_dequeue(struct ifaltq *, int);
205static int red_request(struct ifaltq *, int, void *);
206static void red_purgeq(red_queue_t *);
207static int red_detach(red_queue_t *);
208#ifdef ALTQ_FLOWVALVE
209static __inline struct fve *flowlist_lookup(struct flowvalve *,
210 struct altq_pktattr *, struct timeval *);
211static __inline struct fve *flowlist_reclaim(struct flowvalve *,
212 struct altq_pktattr *);
213static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
214static __inline int fv_p2f(struct flowvalve *, int);
215#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
216static struct flowvalve *fv_alloc(struct red *);
217#endif
218static void fv_destroy(struct flowvalve *);
219static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
220 struct fve **);
221static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
222 struct fve *);
223#endif
224#endif /* ALTQ3_COMPAT */
225
226/*
227 * red support routines
228 */
229red_t *
230red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
231 int pkttime)
232{
233 red_t *rp;
234 int w, i;
235 int npkts_per_sec;
236
237 rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
238 if (rp == NULL)
239 return (NULL);
240
241 if (weight == 0)
242 rp->red_weight = W_WEIGHT;
243 else
244 rp->red_weight = weight;
245
246 /* allocate weight table */
247 rp->red_wtab = wtab_alloc(rp->red_weight);
248 if (rp->red_wtab == NULL) {
249 free(rp, M_DEVBUF);
250 return (NULL);
251 }
252
253 rp->red_avg = 0;
254 rp->red_idle = 1;
255
256 if (inv_pmax == 0)
257 rp->red_inv_pmax = default_inv_pmax;
258 else
259 rp->red_inv_pmax = inv_pmax;
260 if (th_min == 0)
261 rp->red_thmin = default_th_min;
262 else
263 rp->red_thmin = th_min;
264 if (th_max == 0)
265 rp->red_thmax = default_th_max;
266 else
267 rp->red_thmax = th_max;
268
269 rp->red_flags = flags;
270
271 if (pkttime == 0)
272 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
273 rp->red_pkttime = 800;
274 else
275 rp->red_pkttime = pkttime;
276
277 if (weight == 0) {
278 /* when the link is very slow, adjust red parameters */
279 npkts_per_sec = 1000000 / rp->red_pkttime;
280 if (npkts_per_sec < 50) {
281 /* up to about 400Kbps */
282 rp->red_weight = W_WEIGHT_2;
283 } else if (npkts_per_sec < 300) {
284 /* up to about 2.4Mbps */
285 rp->red_weight = W_WEIGHT_1;
286 }
287 }
288
289 /* calculate wshift. weight must be power of 2 */
290 w = rp->red_weight;
291 for (i = 0; w > 1; i++)
292 w = w >> 1;
293 rp->red_wshift = i;
294 w = 1 << rp->red_wshift;
295 if (w != rp->red_weight) {
296 printf("invalid weight value %d for red! use %d\n",
297 rp->red_weight, w);
298 rp->red_weight = w;
299 }
300
301 /*
302 * thmin_s and thmax_s are scaled versions of th_min and th_max
303 * to be compared with avg.
304 */
305 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
306 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
307
308 /*
309 * precompute probability denominator
310 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
311 */
312 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
313 * rp->red_inv_pmax) << FP_SHIFT;
314
315 microtime(&rp->red_last);
316 return (rp);
317}
318
319void
320red_destroy(red_t *rp)
321{
322#ifdef ALTQ3_COMPAT
323#ifdef ALTQ_FLOWVALVE
324 if (rp->red_flowvalve != NULL)
325 fv_destroy(rp->red_flowvalve);
326#endif
327#endif /* ALTQ3_COMPAT */
328 wtab_destroy(rp->red_wtab);
329 free(rp, M_DEVBUF);
330}
331
332void
333red_getstats(red_t *rp, struct redstats *sp)
334{
335 sp->q_avg = rp->red_avg >> rp->red_wshift;
336 sp->xmit_cnt = rp->red_stats.xmit_cnt;
337 sp->drop_cnt = rp->red_stats.drop_cnt;
338 sp->drop_forced = rp->red_stats.drop_forced;
339 sp->drop_unforced = rp->red_stats.drop_unforced;
340 sp->marked_packets = rp->red_stats.marked_packets;
341}
342
343int
344red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
345 struct altq_pktattr *pktattr)
346{
347 int avg, droptype;
348 int n;
349#ifdef ALTQ3_COMPAT
350#ifdef ALTQ_FLOWVALVE
351 struct fve *fve = NULL;
352
353 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
354 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
355 m_freem(m);
356 return (-1);
357 }
358#endif
359#endif /* ALTQ3_COMPAT */
360
361 avg = rp->red_avg;
362
363 /*
364 * if we were idle, we pretend that n packets arrived during
365 * the idle period.
366 */
367 if (rp->red_idle) {
368 struct timeval now;
369 int t;
370
371 rp->red_idle = 0;
372 microtime(&now);
373 t = (now.tv_sec - rp->red_last.tv_sec);
374 if (t > 60) {
375 /*
376 * being idle for more than 1 minute, set avg to zero.
377 * this prevents t from overflow.
378 */
379 avg = 0;
380 } else {
381 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
382 n = t / rp->red_pkttime - 1;
383
384 /* the following line does (avg = (1 - Wq)^n * avg) */
385 if (n > 0)
386 avg = (avg >> FP_SHIFT) *
387 pow_w(rp->red_wtab, n);
388 }
389 }
390
391 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
392 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
393 rp->red_avg = avg; /* save the new value */
394
395 /*
396 * red_count keeps a tally of arriving traffic that has not
397 * been dropped.
398 */
399 rp->red_count++;
400
401 /* see if we drop early */
402 droptype = DTYPE_NODROP;
403 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
404 if (avg >= rp->red_thmax_s) {
405 /* avg >= th_max: forced drop */
406 droptype = DTYPE_FORCED;
407 } else if (rp->red_old == 0) {
408 /* first exceeds th_min */
409 rp->red_count = 1;
410 rp->red_old = 1;
411 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
412 rp->red_probd, rp->red_count)) {
413 /* mark or drop by red */
414 if ((rp->red_flags & REDF_ECN) &&
415 mark_ecn(m, pktattr, rp->red_flags)) {
416 /* successfully marked. do not drop. */
417 rp->red_count = 0;
418#ifdef RED_STATS
419 rp->red_stats.marked_packets++;
420#endif
421 } else {
422 /* unforced drop by red */
423 droptype = DTYPE_EARLY;
424 }
425 }
426 } else {
427 /* avg < th_min */
428 rp->red_old = 0;
429 }
430
431 /*
432 * if the queue length hits the hard limit, it's a forced drop.
433 */
434 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
435 droptype = DTYPE_FORCED;
436
437#ifdef RED_RANDOM_DROP
438 /* if successful or forced drop, enqueue this packet. */
439 if (droptype != DTYPE_EARLY)
440 _addq(q, m);
441#else
442 /* if successful, enqueue this packet. */
443 if (droptype == DTYPE_NODROP)
444 _addq(q, m);
445#endif
446 if (droptype != DTYPE_NODROP) {
447 if (droptype == DTYPE_EARLY) {
448 /* drop the incoming packet */
449#ifdef RED_STATS
450 rp->red_stats.drop_unforced++;
451#endif
452 } else {
453 /* forced drop, select a victim packet in the queue. */
454#ifdef RED_RANDOM_DROP
455 m = _getq_random(q);
456#endif
457#ifdef RED_STATS
458 rp->red_stats.drop_forced++;
459#endif
460 }
461#ifdef RED_STATS
462 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
463#endif
464 rp->red_count = 0;
465#ifdef ALTQ3_COMPAT
466#ifdef ALTQ_FLOWVALVE
467 if (rp->red_flowvalve != NULL)
468 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
469#endif
470#endif /* ALTQ3_COMPAT */
471 m_freem(m);
472 return (-1);
473 }
474 /* successfully queued */
475#ifdef RED_STATS
476 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
477#endif
478 return (0);
479}
480
481/*
482 * early-drop probability is calculated as follows:
483 * prob = p_max * (avg - th_min) / (th_max - th_min)
484 * prob_a = prob / (2 - count*prob)
485 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
486 * here prob_a increases as successive undrop count increases.
487 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
488 * becomes 1 when (count >= (2 / prob))).
489 */
490int
491drop_early(int fp_len, int fp_probd, int count)
492{
493 int d; /* denominator of drop-probability */
494
495 d = fp_probd - count * fp_len;
496 if (d <= 0)
497 /* count exceeds the hard limit: drop or mark */
498 return (1);
499
500 /*
501 * now the range of d is [1..600] in fixed-point. (when
502 * th_max-th_min=10 and p_max=1/30)
503 * drop probability = (avg - TH_MIN) / d
504 */
505
506 if ((arc4random() % d) < fp_len) {
507 /* drop or mark */
508 return (1);
509 }
510 /* no drop/mark */
511 return (0);
512}
513
514/*
515 * try to mark CE bit to the packet.
516 * returns 1 if successfully marked, 0 otherwise.
517 */
518int
519mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
520{
521 struct mbuf *m0;
522 struct pf_mtag *at;
523 void *hdr;
524
525 at = pf_find_mtag(m);
526 if (at != NULL) {
527 hdr = at->hdr;
528#ifdef ALTQ3_COMPAT
529 } else if (pktattr != NULL) {
530 af = pktattr->pattr_af;
531 hdr = pktattr->pattr_hdr;
532#endif /* ALTQ3_COMPAT */
533 } else
534 return (0);
535
536 /* verify that pattr_hdr is within the mbuf data */
537 for (m0 = m; m0 != NULL; m0 = m0->m_next)
538 if (((caddr_t)hdr >= m0->m_data) &&
539 ((caddr_t)hdr < m0->m_data + m0->m_len))
540 break;
541 if (m0 == NULL) {
542 /* ick, tag info is stale */
543 return (0);
544 }
545
546 switch (((struct ip *)hdr)->ip_v) {
547 case IPVERSION:
548 if (flags & REDF_ECN4) {
549 struct ip *ip = hdr;
550 u_int8_t otos;
551 int sum;
552
553 if (ip->ip_v != 4)
554 return (0); /* version mismatch! */
555
556 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
557 return (0); /* not-ECT */
558 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
559 return (1); /* already marked */
560
561 /*
562 * ecn-capable but not marked,
563 * mark CE and update checksum
564 */
565 otos = ip->ip_tos;
566 ip->ip_tos |= IPTOS_ECN_CE;
567 /*
568 * update checksum (from RFC1624)
569 * HC' = ~(~HC + ~m + m')
570 */
571 sum = ~ntohs(ip->ip_sum) & 0xffff;
572 sum += (~otos & 0xffff) + ip->ip_tos;
573 sum = (sum >> 16) + (sum & 0xffff);
574 sum += (sum >> 16); /* add carry */
575 ip->ip_sum = htons(~sum & 0xffff);
576 return (1);
577 }
578 break;
579#ifdef INET6
580 case (IPV6_VERSION >> 4):
581 if (flags & REDF_ECN6) {
582 struct ip6_hdr *ip6 = hdr;
583 u_int32_t flowlabel;
584
585 flowlabel = ntohl(ip6->ip6_flow);
586 if ((flowlabel >> 28) != 6)
587 return (0); /* version mismatch! */
588 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
589 (IPTOS_ECN_NOTECT << 20))
590 return (0); /* not-ECT */
591 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
592 (IPTOS_ECN_CE << 20))
593 return (1); /* already marked */
594 /*
595 * ecn-capable but not marked, mark CE
596 */
597 flowlabel |= (IPTOS_ECN_CE << 20);
598 ip6->ip6_flow = htonl(flowlabel);
599 return (1);
600 }
601 break;
602#endif /* INET6 */
603 }
604
605 /* not marked */
606 return (0);
607}
608
609struct mbuf *
610red_getq(rp, q)
611 red_t *rp;
612 class_queue_t *q;
613{
614 struct mbuf *m;
615
616 if ((m = _getq(q)) == NULL) {
617 if (rp->red_idle == 0) {
618 rp->red_idle = 1;
619 microtime(&rp->red_last);
620 }
621 return NULL;
622 }
623
624 rp->red_idle = 0;
625 return (m);
626}
627
628/*
629 * helper routine to calibrate avg during idle.
630 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
631 * here Wq = 1/weight and the code assumes Wq is close to zero.
632 *
633 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
634 */
635static struct wtab *wtab_list = NULL; /* pointer to wtab list */
636
637struct wtab *
638wtab_alloc(int weight)
639{
640 struct wtab *w;
641 int i;
642
643 for (w = wtab_list; w != NULL; w = w->w_next)
644 if (w->w_weight == weight) {
645 w->w_refcount++;
646 return (w);
647 }
648
649 w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
650 if (w == NULL)
651 return (NULL);
652 w->w_weight = weight;
653 w->w_refcount = 1;
654 w->w_next = wtab_list;
655 wtab_list = w;
656
657 /* initialize the weight table */
658 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
659 for (i = 1; i < 32; i++) {
660 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
661 if (w->w_tab[i] == 0 && w->w_param_max == 0)
662 w->w_param_max = 1 << i;
663 }
664
665 return (w);
666}
667
668int
669wtab_destroy(struct wtab *w)
670{
671 struct wtab *prev;
672
673 if (--w->w_refcount > 0)
674 return (0);
675
676 if (wtab_list == w)
677 wtab_list = w->w_next;
678 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
679 if (prev->w_next == w) {
680 prev->w_next = w->w_next;
681 break;
682 }
683
684 free(w, M_DEVBUF);
685 return (0);
686}
687
688int32_t
689pow_w(struct wtab *w, int n)
690{
691 int i, bit;
692 int32_t val;
693
694 if (n >= w->w_param_max)
695 return (0);
696
697 val = 1 << FP_SHIFT;
698 if (n <= 0)
699 return (val);
700
701 bit = 1;
702 i = 0;
703 while (n) {
704 if (n & bit) {
705 val = (val * w->w_tab[i]) >> FP_SHIFT;
706 n &= ~bit;
707 }
708 i++;
709 bit <<= 1;
710 }
711 return (val);
712}
713
714#ifdef ALTQ3_COMPAT
715/*
716 * red device interface
717 */
718altqdev_decl(red);
719
720int
721redopen(dev, flag, fmt, p)
722 dev_t dev;
723 int flag, fmt;
724#if (__FreeBSD_version > 500000)
725 struct thread *p;
726#else
727 struct proc *p;
728#endif
729{
730 /* everything will be done when the queueing scheme is attached. */
731 return 0;
732}
733
734int
735redclose(dev, flag, fmt, p)
736 dev_t dev;
737 int flag, fmt;
738#if (__FreeBSD_version > 500000)
739 struct thread *p;
740#else
741 struct proc *p;
742#endif
743{
744 red_queue_t *rqp;
745 int err, error = 0;
746
747 while ((rqp = red_list) != NULL) {
748 /* destroy all */
749 err = red_detach(rqp);
750 if (err != 0 && error == 0)
751 error = err;
752 }
753
754 return error;
755}
756
757int
758redioctl(dev, cmd, addr, flag, p)
759 dev_t dev;
760 ioctlcmd_t cmd;
761 caddr_t addr;
762 int flag;
763#if (__FreeBSD_version > 500000)
764 struct thread *p;
765#else
766 struct proc *p;
767#endif
768{
769 red_queue_t *rqp;
770 struct red_interface *ifacep;
771 struct ifnet *ifp;
772 int error = 0;
773
774 /* check super-user privilege */
775 switch (cmd) {
776 case RED_GETSTATS:
777 break;
778 default:
779#if (__FreeBSD_version > 700000)
780 if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
781#elsif (__FreeBSD_version > 400000)
782 if ((error = suser(p)) != 0)
783#else
784 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
785#endif
786 return (error);
787 break;
788 }
789
790 switch (cmd) {
791
792 case RED_ENABLE:
793 ifacep = (struct red_interface *)addr;
794 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
795 error = EBADF;
796 break;
797 }
798 error = altq_enable(rqp->rq_ifq);
799 break;
800
801 case RED_DISABLE:
802 ifacep = (struct red_interface *)addr;
803 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
804 error = EBADF;
805 break;
806 }
807 error = altq_disable(rqp->rq_ifq);
808 break;
809
810 case RED_IF_ATTACH:
811 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
812 if (ifp == NULL) {
813 error = ENXIO;
814 break;
815 }
816
817 /* allocate and initialize red_queue_t */
818 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
819 if (rqp == NULL) {
820 error = ENOMEM;
821 break;
822 }
823 bzero(rqp, sizeof(red_queue_t));
824
825 rqp->rq_q = malloc(sizeof(class_queue_t),
826 M_DEVBUF, M_WAITOK);
827 if (rqp->rq_q == NULL) {
828 free(rqp, M_DEVBUF);
829 error = ENOMEM;
830 break;
831 }
832 bzero(rqp->rq_q, sizeof(class_queue_t));
833
834 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
835 if (rqp->rq_red == NULL) {
836 free(rqp->rq_q, M_DEVBUF);
837 free(rqp, M_DEVBUF);
838 error = ENOMEM;
839 break;
840 }
841
842 rqp->rq_ifq = &ifp->if_snd;
843 qtail(rqp->rq_q) = NULL;
844 qlen(rqp->rq_q) = 0;
845 qlimit(rqp->rq_q) = RED_LIMIT;
846 qtype(rqp->rq_q) = Q_RED;
847
848 /*
849 * set RED to this ifnet structure.
850 */
851 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
852 red_enqueue, red_dequeue, red_request,
853 NULL, NULL);
854 if (error) {
855 red_destroy(rqp->rq_red);
856 free(rqp->rq_q, M_DEVBUF);
857 free(rqp, M_DEVBUF);
858 break;
859 }
860
861 /* add this state to the red list */
862 rqp->rq_next = red_list;
863 red_list = rqp;
864 break;
865
866 case RED_IF_DETACH:
867 ifacep = (struct red_interface *)addr;
868 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
869 error = EBADF;
870 break;
871 }
872 error = red_detach(rqp);
873 break;
874
875 case RED_GETSTATS:
876 do {
877 struct red_stats *q_stats;
878 red_t *rp;
879
880 q_stats = (struct red_stats *)addr;
881 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
882 ALTQT_RED)) == NULL) {
883 error = EBADF;
884 break;
885 }
886
887 q_stats->q_len = qlen(rqp->rq_q);
888 q_stats->q_limit = qlimit(rqp->rq_q);
889
890 rp = rqp->rq_red;
891 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
892 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
893 q_stats->drop_cnt = rp->red_stats.drop_cnt;
894 q_stats->drop_forced = rp->red_stats.drop_forced;
895 q_stats->drop_unforced = rp->red_stats.drop_unforced;
896 q_stats->marked_packets = rp->red_stats.marked_packets;
897
898 q_stats->weight = rp->red_weight;
899 q_stats->inv_pmax = rp->red_inv_pmax;
900 q_stats->th_min = rp->red_thmin;
901 q_stats->th_max = rp->red_thmax;
902
903#ifdef ALTQ_FLOWVALVE
904 if (rp->red_flowvalve != NULL) {
905 struct flowvalve *fv = rp->red_flowvalve;
906 q_stats->fv_flows = fv->fv_flows;
907 q_stats->fv_pass = fv->fv_stats.pass;
908 q_stats->fv_predrop = fv->fv_stats.predrop;
909 q_stats->fv_alloc = fv->fv_stats.alloc;
910 q_stats->fv_escape = fv->fv_stats.escape;
911 } else {
912#endif /* ALTQ_FLOWVALVE */
913 q_stats->fv_flows = 0;
914 q_stats->fv_pass = 0;
915 q_stats->fv_predrop = 0;
916 q_stats->fv_alloc = 0;
917 q_stats->fv_escape = 0;
918#ifdef ALTQ_FLOWVALVE
919 }
920#endif /* ALTQ_FLOWVALVE */
921 } while (/*CONSTCOND*/ 0);
922 break;
923
924 case RED_CONFIG:
925 do {
926 struct red_conf *fc;
927 red_t *new;
928 int s, limit;
929
930 fc = (struct red_conf *)addr;
931 if ((rqp = altq_lookup(fc->iface.red_ifname,
932 ALTQT_RED)) == NULL) {
933 error = EBADF;
934 break;
935 }
936 new = red_alloc(fc->red_weight,
937 fc->red_inv_pmax,
938 fc->red_thmin,
939 fc->red_thmax,
940 fc->red_flags,
941 fc->red_pkttime);
942 if (new == NULL) {
943 error = ENOMEM;
944 break;
945 }
946
947#ifdef __NetBSD__
948 s = splnet();
949#else
950 s = splimp();
951#endif
952 red_purgeq(rqp);
953 limit = fc->red_limit;
954 if (limit < fc->red_thmax)
955 limit = fc->red_thmax;
956 qlimit(rqp->rq_q) = limit;
957 fc->red_limit = limit; /* write back the new value */
958
959 red_destroy(rqp->rq_red);
960 rqp->rq_red = new;
961
962 splx(s);
963
964 /* write back new values */
965 fc->red_limit = limit;
966 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
967 fc->red_thmin = rqp->rq_red->red_thmin;
968 fc->red_thmax = rqp->rq_red->red_thmax;
969
970 } while (/*CONSTCOND*/ 0);
971 break;
972
973 case RED_SETDEFAULTS:
974 do {
975 struct redparams *rp;
976
977 rp = (struct redparams *)addr;
978
979 default_th_min = rp->th_min;
980 default_th_max = rp->th_max;
981 default_inv_pmax = rp->inv_pmax;
982 } while (/*CONSTCOND*/ 0);
983 break;
984
985 default:
986 error = EINVAL;
987 break;
988 }
989 return error;
990}
991
992static int
993red_detach(rqp)
994 red_queue_t *rqp;
995{
996 red_queue_t *tmp;
997 int error = 0;
998
999 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1000 altq_disable(rqp->rq_ifq);
1001
1002 if ((error = altq_detach(rqp->rq_ifq)))
1003 return (error);
1004
1005 if (red_list == rqp)
1006 red_list = rqp->rq_next;
1007 else {
1008 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1009 if (tmp->rq_next == rqp) {
1010 tmp->rq_next = rqp->rq_next;
1011 break;
1012 }
1013 if (tmp == NULL)
1014 printf("red_detach: no state found in red_list!\n");
1015 }
1016
1017 red_destroy(rqp->rq_red);
1018 free(rqp->rq_q, M_DEVBUF);
1019 free(rqp, M_DEVBUF);
1020 return (error);
1021}
1022
1023/*
1024 * enqueue routine:
1025 *
1026 * returns: 0 when successfully queued.
1027 * ENOBUFS when drop occurs.
1028 */
1029static int
1030red_enqueue(ifq, m, pktattr)
1031 struct ifaltq *ifq;
1032 struct mbuf *m;
1033 struct altq_pktattr *pktattr;
1034{
1035 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1036
1037 IFQ_LOCK_ASSERT(ifq);
1038
1039 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1040 return ENOBUFS;
1041 ifq->ifq_len++;
1042 return 0;
1043}
1044
1045/*
1046 * dequeue routine:
1047 * must be called in splimp.
1048 *
1049 * returns: mbuf dequeued.
1050 * NULL when no packet is available in the queue.
1051 */
1052
1053static struct mbuf *
1054red_dequeue(ifq, op)
1055 struct ifaltq *ifq;
1056 int op;
1057{
1058 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1059 struct mbuf *m;
1060
1061 IFQ_LOCK_ASSERT(ifq);
1062
1063 if (op == ALTDQ_POLL)
1064 return qhead(rqp->rq_q);
1065
1066 /* op == ALTDQ_REMOVE */
1067 m = red_getq(rqp->rq_red, rqp->rq_q);
1068 if (m != NULL)
1069 ifq->ifq_len--;
1070 return (m);
1071}
1072
1073static int
1074red_request(ifq, req, arg)
1075 struct ifaltq *ifq;
1076 int req;
1077 void *arg;
1078{
1079 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1080
1081 IFQ_LOCK_ASSERT(ifq);
1082
1083 switch (req) {
1084 case ALTRQ_PURGE:
1085 red_purgeq(rqp);
1086 break;
1087 }
1088 return (0);
1089}
1090
1091static void
1092red_purgeq(rqp)
1093 red_queue_t *rqp;
1094{
1095 _flushq(rqp->rq_q);
1096 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1097 rqp->rq_ifq->ifq_len = 0;
1098}
1099
1100#ifdef ALTQ_FLOWVALVE
1101
1102#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1103#define FV_PSCALE(x) ((x) << FV_PSHIFT)
1104#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1105#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1106#define FV_FSCALE(x) ((x) << FV_FSHIFT)
1107#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1108
1109#define FV_TIMER (3 * hz) /* timer value for garbage collector */
1110#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1111
1112#define FV_N 10 /* update fve_f every FV_N packets */
1113
1114#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1115#define FV_TTHRESH 3 /* time threshold to delete fve */
1116#define FV_ALPHA 5 /* extra packet count */
1117
1118#define FV_STATS
1119
1120#if (__FreeBSD_version > 300000)
1121#define FV_TIMESTAMP(tp) getmicrotime(tp)
1122#else
1123#define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1124#endif
1125
1126/*
1127 * Brtt table: 127 entry table to convert drop rate (p) to
1128 * the corresponding bandwidth fraction (f)
1129 * the following equation is implemented to use scaled values,
1130 * fve_p and fve_f, in the fixed point format.
1131 *
1132 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1133 * f = Brtt(p) / (max_th + alpha)
1134 */
1135#define BRTT_SIZE 128
1136#define BRTT_SHIFT 12
1137#define BRTT_MASK 0x0007f000
1138#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1139
1140const int brtt_tab[BRTT_SIZE] = {
1141 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1142 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1143 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1144 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1145 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1146 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1147 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1148 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1149 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1150 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1151 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1152 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1153 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1154 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1155 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1156 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1157};
1158
1159static __inline struct fve *
1160flowlist_lookup(fv, pktattr, now)
1161 struct flowvalve *fv;
1162 struct altq_pktattr *pktattr;
1163 struct timeval *now;
1164{
1165 struct fve *fve;
1166 int flows;
1167 struct ip *ip;
1168#ifdef INET6
1169 struct ip6_hdr *ip6;
1170#endif
1171 struct timeval tthresh;
1172
1173 if (pktattr == NULL)
1174 return (NULL);
1175
1176 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1177 flows = 0;
1178 /*
1179 * search the flow list
1180 */
1181 switch (pktattr->pattr_af) {
1182 case AF_INET:
1183 ip = (struct ip *)pktattr->pattr_hdr;
1184 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1185 if (fve->fve_lastdrop.tv_sec == 0)
1186 break;
1187 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1188 fve->fve_lastdrop.tv_sec = 0;
1189 break;
1190 }
1191 if (fve->fve_flow.flow_af == AF_INET &&
1192 fve->fve_flow.flow_ip.ip_src.s_addr ==
1193 ip->ip_src.s_addr &&
1194 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1195 ip->ip_dst.s_addr)
1196 return (fve);
1197 flows++;
1198 }
1199 break;
1200#ifdef INET6
1201 case AF_INET6:
1202 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1203 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1204 if (fve->fve_lastdrop.tv_sec == 0)
1205 break;
1206 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1207 fve->fve_lastdrop.tv_sec = 0;
1208 break;
1209 }
1210 if (fve->fve_flow.flow_af == AF_INET6 &&
1211 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1212 &ip6->ip6_src) &&
1213 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1214 &ip6->ip6_dst))
1215 return (fve);
1216 flows++;
1217 }
1218 break;
1219#endif /* INET6 */
1220
1221 default:
1222 /* unknown protocol. no drop. */
1223 return (NULL);
1224 }
1225 fv->fv_flows = flows; /* save the number of active fve's */
1226 return (NULL);
1227}
1228
1229static __inline struct fve *
1230flowlist_reclaim(fv, pktattr)
1231 struct flowvalve *fv;
1232 struct altq_pktattr *pktattr;
1233{
1234 struct fve *fve;
1235 struct ip *ip;
1236#ifdef INET6
1237 struct ip6_hdr *ip6;
1238#endif
1239
1240 /*
1241 * get an entry from the tail of the LRU list.
1242 */
1243 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1244
1245 switch (pktattr->pattr_af) {
1246 case AF_INET:
1247 ip = (struct ip *)pktattr->pattr_hdr;
1248 fve->fve_flow.flow_af = AF_INET;
1249 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1250 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1251 break;
1252#ifdef INET6
1253 case AF_INET6:
1254 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1255 fve->fve_flow.flow_af = AF_INET6;
1256 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1257 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1258 break;
1259#endif
1260 }
1261
1262 fve->fve_state = Green;
1263 fve->fve_p = 0.0;
1264 fve->fve_f = 0.0;
1265 fve->fve_ifseq = fv->fv_ifseq - 1;
1266 fve->fve_count = 0;
1267
1268 fv->fv_flows++;
1269#ifdef FV_STATS
1270 fv->fv_stats.alloc++;
1271#endif
1272 return (fve);
1273}
1274
1275static __inline void
1276flowlist_move_to_head(fv, fve)
1277 struct flowvalve *fv;
1278 struct fve *fve;
1279{
1280 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1281 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1282 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1283 }
1284}
1285
1286#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
1287/*
1288 * allocate flowvalve structure
1289 */
1290static struct flowvalve *
1291fv_alloc(rp)
1292 struct red *rp;
1293{
1294 struct flowvalve *fv;
1295 struct fve *fve;
1296 int i, num;
1297
1298 num = FV_FLOWLISTSIZE;
1299 fv = malloc(sizeof(struct flowvalve),
1300 M_DEVBUF, M_WAITOK);
1301 if (fv == NULL)
1302 return (NULL);
1303 bzero(fv, sizeof(struct flowvalve));
1304
1305 fv->fv_fves = malloc(sizeof(struct fve) * num,
1306 M_DEVBUF, M_WAITOK);
1307 if (fv->fv_fves == NULL) {
1308 free(fv, M_DEVBUF);
1309 return (NULL);
1310 }
1311 bzero(fv->fv_fves, sizeof(struct fve) * num);
1312
1313 fv->fv_flows = 0;
1314 TAILQ_INIT(&fv->fv_flowlist);
1315 for (i = 0; i < num; i++) {
1316 fve = &fv->fv_fves[i];
1317 fve->fve_lastdrop.tv_sec = 0;
1318 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1319 }
1320
1321 /* initialize drop rate threshold in scaled fixed-point */
1322 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1323
1324 /* initialize drop rate to fraction table */
1325 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
1326 M_DEVBUF, M_WAITOK);
1327 if (fv->fv_p2ftab == NULL) {
1328 free(fv->fv_fves, M_DEVBUF);
1329 free(fv, M_DEVBUF);
1330 return (NULL);
1331 }
1332 /*
1333 * create the p2f table.
1334 * (shift is used to keep the precision)
1335 */
1336 for (i = 1; i < BRTT_SIZE; i++) {
1337 int f;
1338
1339 f = brtt_tab[i] << 8;
1340 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1341 }
1342
1343 return (fv);
1344}
1345#endif
1346
1347static void fv_destroy(fv)
1348 struct flowvalve *fv;
1349{
1350 free(fv->fv_p2ftab, M_DEVBUF);
1351 free(fv->fv_fves, M_DEVBUF);
1352 free(fv, M_DEVBUF);
1353}
1354
1355static __inline int
1356fv_p2f(fv, p)
1357 struct flowvalve *fv;
1358 int p;
1359{
1360 int val, f;
1361
1362 if (p >= BRTT_PMAX)
1363 f = fv->fv_p2ftab[BRTT_SIZE-1];
1364 else if ((val = (p & BRTT_MASK)))
1365 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1366 else
1367 f = fv->fv_p2ftab[1];
1368 return (f);
1369}
1370
1371/*
1372 * check if an arriving packet should be pre-dropped.
1373 * called from red_addq() when a packet arrives.
1374 * returns 1 when the packet should be pre-dropped.
1375 * should be called in splimp.
1376 */
1377static int
1378fv_checkflow(fv, pktattr, fcache)
1379 struct flowvalve *fv;
1380 struct altq_pktattr *pktattr;
1381 struct fve **fcache;
1382{
1383 struct fve *fve;
1384 struct timeval now;
1385
1386 fv->fv_ifseq++;
1387 FV_TIMESTAMP(&now);
1388
1389 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1390 /* no matching entry in the flowlist */
1391 return (0);
1392
1393 *fcache = fve;
1394
1395 /* update fraction f for every FV_N packets */
1396 if (++fve->fve_count == FV_N) {
1397 /*
1398 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1399 */
1400 fve->fve_f =
1401 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1402 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1403 fve->fve_ifseq = fv->fv_ifseq;
1404 fve->fve_count = 0;
1405 }
1406
1407 /*
1408 * overpumping test
1409 */
1410 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1411 int fthresh;
1412
1413 /* calculate a threshold */
1414 fthresh = fv_p2f(fv, fve->fve_p);
1415 if (fve->fve_f > fthresh)
1416 fve->fve_state = Red;
1417 }
1418
1419 if (fve->fve_state == Red) {
1420 /*
1421 * backoff test
1422 */
1423 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1424 /* no drop for at least FV_BACKOFFTHRESH sec */
1425 fve->fve_p = 0;
1426 fve->fve_state = Green;
1427#ifdef FV_STATS
1428 fv->fv_stats.escape++;
1429#endif
1430 } else {
1431 /* block this flow */
1432 flowlist_move_to_head(fv, fve);
1433 fve->fve_lastdrop = now;
1434#ifdef FV_STATS
1435 fv->fv_stats.predrop++;
1436#endif
1437 return (1);
1438 }
1439 }
1440
1441 /*
1442 * p = (1 - Wp) * p
1443 */
1444 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1445 if (fve->fve_p < 0)
1446 fve->fve_p = 0;
1447#ifdef FV_STATS
1448 fv->fv_stats.pass++;
1449#endif
1450 return (0);
1451}
1452
1453/*
1454 * called from red_addq when a packet is dropped by red.
1455 * should be called in splimp.
1456 */
1457static void fv_dropbyred(fv, pktattr, fcache)
1458 struct flowvalve *fv;
1459 struct altq_pktattr *pktattr;
1460 struct fve *fcache;
1461{
1462 struct fve *fve;
1463 struct timeval now;
1464
1465 if (pktattr == NULL)
1466 return;
1467 FV_TIMESTAMP(&now);
1468
1469 if (fcache != NULL)
1470 /* the fve of this packet is already cached */
1471 fve = fcache;
1472 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1473 fve = flowlist_reclaim(fv, pktattr);
1474
1475 flowlist_move_to_head(fv, fve);
1476
1477 /*
1478 * update p: the following line cancels the update
1479 * in fv_checkflow() and calculate
1480 * p = Wp + (1 - Wp) * p
1481 */
1482 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1483
1484 fve->fve_lastdrop = now;
1485}
1486
1487#endif /* ALTQ_FLOWVALVE */
1488
1489#ifdef KLD_MODULE
1490
1491static struct altqsw red_sw =
1492 {"red", redopen, redclose, redioctl};
1493
1494ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1495MODULE_VERSION(altq_red, 1);
1496
1497#endif /* KLD_MODULE */
1498#endif /* ALTQ3_COMPAT */
1499
1500#endif /* ALTQ_RED */
107#endif
108#endif
109
110/*
111 * ALTQ/RED (Random Early Detection) implementation using 32-bit
112 * fixed-point calculation.
113 *
114 * written by kjc using the ns code as a reference.
115 * you can learn more about red and ns from Sally's home page at
116 * http://www-nrg.ee.lbl.gov/floyd/
117 *
118 * most of the red parameter values are fixed in this implementation
119 * to prevent fixed-point overflow/underflow.
120 * if you change the parameters, watch out for overflow/underflow!
121 *
122 * the parameters used are recommended values by Sally.
123 * the corresponding ns config looks:
124 * q_weight=0.00195
125 * minthresh=5 maxthresh=15 queue-size=60
126 * linterm=30
127 * dropmech=drop-tail
128 * bytes=false (can't be handled by 32-bit fixed-point)
129 * doubleq=false dqthresh=false
130 * wait=true
131 */
132/*
133 * alternative red parameters for a slow link.
134 *
135 * assume the queue length becomes from zero to L and keeps L, it takes
136 * N packets for q_avg to reach 63% of L.
137 * when q_weight is 0.002, N is about 500 packets.
138 * for a slow link like dial-up, 500 packets takes more than 1 minute!
139 * when q_weight is 0.008, N is about 127 packets.
140 * when q_weight is 0.016, N is about 63 packets.
141 * bursts of 50 packets are allowed for 0.002, bursts of 25 packets
142 * are allowed for 0.016.
143 * see Sally's paper for more details.
144 */
145/* normal red parameters */
146#define W_WEIGHT 512 /* inverse of weight of EWMA (511/512) */
147 /* q_weight = 0.00195 */
148
149/* red parameters for a slow link */
150#define W_WEIGHT_1 128 /* inverse of weight of EWMA (127/128) */
151 /* q_weight = 0.0078125 */
152
153/* red parameters for a very slow link (e.g., dialup) */
154#define W_WEIGHT_2 64 /* inverse of weight of EWMA (63/64) */
155 /* q_weight = 0.015625 */
156
157/* fixed-point uses 12-bit decimal places */
158#define FP_SHIFT 12 /* fixed-point shift */
159
160/* red parameters for drop probability */
161#define INV_P_MAX 10 /* inverse of max drop probability */
162#define TH_MIN 5 /* min threshold */
163#define TH_MAX 15 /* max threshold */
164
165#define RED_LIMIT 60 /* default max queue lenght */
166#define RED_STATS /* collect statistics */
167
168/*
169 * our default policy for forced-drop is drop-tail.
170 * (in altq-1.1.2 or earlier, the default was random-drop.
171 * but it makes more sense to punish the cause of the surge.)
172 * to switch to the random-drop policy, define "RED_RANDOM_DROP".
173 */
174
175#ifdef ALTQ3_COMPAT
176#ifdef ALTQ_FLOWVALVE
177/*
178 * flow-valve is an extention to protect red from unresponsive flows
179 * and to promote end-to-end congestion control.
180 * flow-valve observes the average drop rates of the flows that have
181 * experienced packet drops in the recent past.
182 * when the average drop rate exceeds the threshold, the flow is
183 * blocked by the flow-valve. the trapped flow should back off
184 * exponentially to escape from the flow-valve.
185 */
186#ifdef RED_RANDOM_DROP
187#error "random-drop can't be used with flow-valve!"
188#endif
189#endif /* ALTQ_FLOWVALVE */
190
191/* red_list keeps all red_queue_t's allocated. */
192static red_queue_t *red_list = NULL;
193
194#endif /* ALTQ3_COMPAT */
195
196/* default red parameter values */
197static int default_th_min = TH_MIN;
198static int default_th_max = TH_MAX;
199static int default_inv_pmax = INV_P_MAX;
200
201#ifdef ALTQ3_COMPAT
202/* internal function prototypes */
203static int red_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
204static struct mbuf *red_dequeue(struct ifaltq *, int);
205static int red_request(struct ifaltq *, int, void *);
206static void red_purgeq(red_queue_t *);
207static int red_detach(red_queue_t *);
208#ifdef ALTQ_FLOWVALVE
209static __inline struct fve *flowlist_lookup(struct flowvalve *,
210 struct altq_pktattr *, struct timeval *);
211static __inline struct fve *flowlist_reclaim(struct flowvalve *,
212 struct altq_pktattr *);
213static __inline void flowlist_move_to_head(struct flowvalve *, struct fve *);
214static __inline int fv_p2f(struct flowvalve *, int);
215#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
216static struct flowvalve *fv_alloc(struct red *);
217#endif
218static void fv_destroy(struct flowvalve *);
219static int fv_checkflow(struct flowvalve *, struct altq_pktattr *,
220 struct fve **);
221static void fv_dropbyred(struct flowvalve *fv, struct altq_pktattr *,
222 struct fve *);
223#endif
224#endif /* ALTQ3_COMPAT */
225
226/*
227 * red support routines
228 */
229red_t *
230red_alloc(int weight, int inv_pmax, int th_min, int th_max, int flags,
231 int pkttime)
232{
233 red_t *rp;
234 int w, i;
235 int npkts_per_sec;
236
237 rp = malloc(sizeof(red_t), M_DEVBUF, M_NOWAIT | M_ZERO);
238 if (rp == NULL)
239 return (NULL);
240
241 if (weight == 0)
242 rp->red_weight = W_WEIGHT;
243 else
244 rp->red_weight = weight;
245
246 /* allocate weight table */
247 rp->red_wtab = wtab_alloc(rp->red_weight);
248 if (rp->red_wtab == NULL) {
249 free(rp, M_DEVBUF);
250 return (NULL);
251 }
252
253 rp->red_avg = 0;
254 rp->red_idle = 1;
255
256 if (inv_pmax == 0)
257 rp->red_inv_pmax = default_inv_pmax;
258 else
259 rp->red_inv_pmax = inv_pmax;
260 if (th_min == 0)
261 rp->red_thmin = default_th_min;
262 else
263 rp->red_thmin = th_min;
264 if (th_max == 0)
265 rp->red_thmax = default_th_max;
266 else
267 rp->red_thmax = th_max;
268
269 rp->red_flags = flags;
270
271 if (pkttime == 0)
272 /* default packet time: 1000 bytes / 10Mbps * 8 * 1000000 */
273 rp->red_pkttime = 800;
274 else
275 rp->red_pkttime = pkttime;
276
277 if (weight == 0) {
278 /* when the link is very slow, adjust red parameters */
279 npkts_per_sec = 1000000 / rp->red_pkttime;
280 if (npkts_per_sec < 50) {
281 /* up to about 400Kbps */
282 rp->red_weight = W_WEIGHT_2;
283 } else if (npkts_per_sec < 300) {
284 /* up to about 2.4Mbps */
285 rp->red_weight = W_WEIGHT_1;
286 }
287 }
288
289 /* calculate wshift. weight must be power of 2 */
290 w = rp->red_weight;
291 for (i = 0; w > 1; i++)
292 w = w >> 1;
293 rp->red_wshift = i;
294 w = 1 << rp->red_wshift;
295 if (w != rp->red_weight) {
296 printf("invalid weight value %d for red! use %d\n",
297 rp->red_weight, w);
298 rp->red_weight = w;
299 }
300
301 /*
302 * thmin_s and thmax_s are scaled versions of th_min and th_max
303 * to be compared with avg.
304 */
305 rp->red_thmin_s = rp->red_thmin << (rp->red_wshift + FP_SHIFT);
306 rp->red_thmax_s = rp->red_thmax << (rp->red_wshift + FP_SHIFT);
307
308 /*
309 * precompute probability denominator
310 * probd = (2 * (TH_MAX-TH_MIN) / pmax) in fixed-point
311 */
312 rp->red_probd = (2 * (rp->red_thmax - rp->red_thmin)
313 * rp->red_inv_pmax) << FP_SHIFT;
314
315 microtime(&rp->red_last);
316 return (rp);
317}
318
319void
320red_destroy(red_t *rp)
321{
322#ifdef ALTQ3_COMPAT
323#ifdef ALTQ_FLOWVALVE
324 if (rp->red_flowvalve != NULL)
325 fv_destroy(rp->red_flowvalve);
326#endif
327#endif /* ALTQ3_COMPAT */
328 wtab_destroy(rp->red_wtab);
329 free(rp, M_DEVBUF);
330}
331
332void
333red_getstats(red_t *rp, struct redstats *sp)
334{
335 sp->q_avg = rp->red_avg >> rp->red_wshift;
336 sp->xmit_cnt = rp->red_stats.xmit_cnt;
337 sp->drop_cnt = rp->red_stats.drop_cnt;
338 sp->drop_forced = rp->red_stats.drop_forced;
339 sp->drop_unforced = rp->red_stats.drop_unforced;
340 sp->marked_packets = rp->red_stats.marked_packets;
341}
342
343int
344red_addq(red_t *rp, class_queue_t *q, struct mbuf *m,
345 struct altq_pktattr *pktattr)
346{
347 int avg, droptype;
348 int n;
349#ifdef ALTQ3_COMPAT
350#ifdef ALTQ_FLOWVALVE
351 struct fve *fve = NULL;
352
353 if (rp->red_flowvalve != NULL && rp->red_flowvalve->fv_flows > 0)
354 if (fv_checkflow(rp->red_flowvalve, pktattr, &fve)) {
355 m_freem(m);
356 return (-1);
357 }
358#endif
359#endif /* ALTQ3_COMPAT */
360
361 avg = rp->red_avg;
362
363 /*
364 * if we were idle, we pretend that n packets arrived during
365 * the idle period.
366 */
367 if (rp->red_idle) {
368 struct timeval now;
369 int t;
370
371 rp->red_idle = 0;
372 microtime(&now);
373 t = (now.tv_sec - rp->red_last.tv_sec);
374 if (t > 60) {
375 /*
376 * being idle for more than 1 minute, set avg to zero.
377 * this prevents t from overflow.
378 */
379 avg = 0;
380 } else {
381 t = t * 1000000 + (now.tv_usec - rp->red_last.tv_usec);
382 n = t / rp->red_pkttime - 1;
383
384 /* the following line does (avg = (1 - Wq)^n * avg) */
385 if (n > 0)
386 avg = (avg >> FP_SHIFT) *
387 pow_w(rp->red_wtab, n);
388 }
389 }
390
391 /* run estimator. (note: avg is scaled by WEIGHT in fixed-point) */
392 avg += (qlen(q) << FP_SHIFT) - (avg >> rp->red_wshift);
393 rp->red_avg = avg; /* save the new value */
394
395 /*
396 * red_count keeps a tally of arriving traffic that has not
397 * been dropped.
398 */
399 rp->red_count++;
400
401 /* see if we drop early */
402 droptype = DTYPE_NODROP;
403 if (avg >= rp->red_thmin_s && qlen(q) > 1) {
404 if (avg >= rp->red_thmax_s) {
405 /* avg >= th_max: forced drop */
406 droptype = DTYPE_FORCED;
407 } else if (rp->red_old == 0) {
408 /* first exceeds th_min */
409 rp->red_count = 1;
410 rp->red_old = 1;
411 } else if (drop_early((avg - rp->red_thmin_s) >> rp->red_wshift,
412 rp->red_probd, rp->red_count)) {
413 /* mark or drop by red */
414 if ((rp->red_flags & REDF_ECN) &&
415 mark_ecn(m, pktattr, rp->red_flags)) {
416 /* successfully marked. do not drop. */
417 rp->red_count = 0;
418#ifdef RED_STATS
419 rp->red_stats.marked_packets++;
420#endif
421 } else {
422 /* unforced drop by red */
423 droptype = DTYPE_EARLY;
424 }
425 }
426 } else {
427 /* avg < th_min */
428 rp->red_old = 0;
429 }
430
431 /*
432 * if the queue length hits the hard limit, it's a forced drop.
433 */
434 if (droptype == DTYPE_NODROP && qlen(q) >= qlimit(q))
435 droptype = DTYPE_FORCED;
436
437#ifdef RED_RANDOM_DROP
438 /* if successful or forced drop, enqueue this packet. */
439 if (droptype != DTYPE_EARLY)
440 _addq(q, m);
441#else
442 /* if successful, enqueue this packet. */
443 if (droptype == DTYPE_NODROP)
444 _addq(q, m);
445#endif
446 if (droptype != DTYPE_NODROP) {
447 if (droptype == DTYPE_EARLY) {
448 /* drop the incoming packet */
449#ifdef RED_STATS
450 rp->red_stats.drop_unforced++;
451#endif
452 } else {
453 /* forced drop, select a victim packet in the queue. */
454#ifdef RED_RANDOM_DROP
455 m = _getq_random(q);
456#endif
457#ifdef RED_STATS
458 rp->red_stats.drop_forced++;
459#endif
460 }
461#ifdef RED_STATS
462 PKTCNTR_ADD(&rp->red_stats.drop_cnt, m_pktlen(m));
463#endif
464 rp->red_count = 0;
465#ifdef ALTQ3_COMPAT
466#ifdef ALTQ_FLOWVALVE
467 if (rp->red_flowvalve != NULL)
468 fv_dropbyred(rp->red_flowvalve, pktattr, fve);
469#endif
470#endif /* ALTQ3_COMPAT */
471 m_freem(m);
472 return (-1);
473 }
474 /* successfully queued */
475#ifdef RED_STATS
476 PKTCNTR_ADD(&rp->red_stats.xmit_cnt, m_pktlen(m));
477#endif
478 return (0);
479}
480
481/*
482 * early-drop probability is calculated as follows:
483 * prob = p_max * (avg - th_min) / (th_max - th_min)
484 * prob_a = prob / (2 - count*prob)
485 * = (avg-th_min) / (2*(th_max-th_min)*inv_p_max - count*(avg-th_min))
486 * here prob_a increases as successive undrop count increases.
487 * (prob_a starts from prob/2, becomes prob when (count == (1 / prob)),
488 * becomes 1 when (count >= (2 / prob))).
489 */
490int
491drop_early(int fp_len, int fp_probd, int count)
492{
493 int d; /* denominator of drop-probability */
494
495 d = fp_probd - count * fp_len;
496 if (d <= 0)
497 /* count exceeds the hard limit: drop or mark */
498 return (1);
499
500 /*
501 * now the range of d is [1..600] in fixed-point. (when
502 * th_max-th_min=10 and p_max=1/30)
503 * drop probability = (avg - TH_MIN) / d
504 */
505
506 if ((arc4random() % d) < fp_len) {
507 /* drop or mark */
508 return (1);
509 }
510 /* no drop/mark */
511 return (0);
512}
513
514/*
515 * try to mark CE bit to the packet.
516 * returns 1 if successfully marked, 0 otherwise.
517 */
518int
519mark_ecn(struct mbuf *m, struct altq_pktattr *pktattr, int flags)
520{
521 struct mbuf *m0;
522 struct pf_mtag *at;
523 void *hdr;
524
525 at = pf_find_mtag(m);
526 if (at != NULL) {
527 hdr = at->hdr;
528#ifdef ALTQ3_COMPAT
529 } else if (pktattr != NULL) {
530 af = pktattr->pattr_af;
531 hdr = pktattr->pattr_hdr;
532#endif /* ALTQ3_COMPAT */
533 } else
534 return (0);
535
536 /* verify that pattr_hdr is within the mbuf data */
537 for (m0 = m; m0 != NULL; m0 = m0->m_next)
538 if (((caddr_t)hdr >= m0->m_data) &&
539 ((caddr_t)hdr < m0->m_data + m0->m_len))
540 break;
541 if (m0 == NULL) {
542 /* ick, tag info is stale */
543 return (0);
544 }
545
546 switch (((struct ip *)hdr)->ip_v) {
547 case IPVERSION:
548 if (flags & REDF_ECN4) {
549 struct ip *ip = hdr;
550 u_int8_t otos;
551 int sum;
552
553 if (ip->ip_v != 4)
554 return (0); /* version mismatch! */
555
556 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_NOTECT)
557 return (0); /* not-ECT */
558 if ((ip->ip_tos & IPTOS_ECN_MASK) == IPTOS_ECN_CE)
559 return (1); /* already marked */
560
561 /*
562 * ecn-capable but not marked,
563 * mark CE and update checksum
564 */
565 otos = ip->ip_tos;
566 ip->ip_tos |= IPTOS_ECN_CE;
567 /*
568 * update checksum (from RFC1624)
569 * HC' = ~(~HC + ~m + m')
570 */
571 sum = ~ntohs(ip->ip_sum) & 0xffff;
572 sum += (~otos & 0xffff) + ip->ip_tos;
573 sum = (sum >> 16) + (sum & 0xffff);
574 sum += (sum >> 16); /* add carry */
575 ip->ip_sum = htons(~sum & 0xffff);
576 return (1);
577 }
578 break;
579#ifdef INET6
580 case (IPV6_VERSION >> 4):
581 if (flags & REDF_ECN6) {
582 struct ip6_hdr *ip6 = hdr;
583 u_int32_t flowlabel;
584
585 flowlabel = ntohl(ip6->ip6_flow);
586 if ((flowlabel >> 28) != 6)
587 return (0); /* version mismatch! */
588 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
589 (IPTOS_ECN_NOTECT << 20))
590 return (0); /* not-ECT */
591 if ((flowlabel & (IPTOS_ECN_MASK << 20)) ==
592 (IPTOS_ECN_CE << 20))
593 return (1); /* already marked */
594 /*
595 * ecn-capable but not marked, mark CE
596 */
597 flowlabel |= (IPTOS_ECN_CE << 20);
598 ip6->ip6_flow = htonl(flowlabel);
599 return (1);
600 }
601 break;
602#endif /* INET6 */
603 }
604
605 /* not marked */
606 return (0);
607}
608
609struct mbuf *
610red_getq(rp, q)
611 red_t *rp;
612 class_queue_t *q;
613{
614 struct mbuf *m;
615
616 if ((m = _getq(q)) == NULL) {
617 if (rp->red_idle == 0) {
618 rp->red_idle = 1;
619 microtime(&rp->red_last);
620 }
621 return NULL;
622 }
623
624 rp->red_idle = 0;
625 return (m);
626}
627
628/*
629 * helper routine to calibrate avg during idle.
630 * pow_w(wtab, n) returns (1 - Wq)^n in fixed-point
631 * here Wq = 1/weight and the code assumes Wq is close to zero.
632 *
633 * w_tab[n] holds ((1 - Wq)^(2^n)) in fixed-point.
634 */
635static struct wtab *wtab_list = NULL; /* pointer to wtab list */
636
637struct wtab *
638wtab_alloc(int weight)
639{
640 struct wtab *w;
641 int i;
642
643 for (w = wtab_list; w != NULL; w = w->w_next)
644 if (w->w_weight == weight) {
645 w->w_refcount++;
646 return (w);
647 }
648
649 w = malloc(sizeof(struct wtab), M_DEVBUF, M_NOWAIT | M_ZERO);
650 if (w == NULL)
651 return (NULL);
652 w->w_weight = weight;
653 w->w_refcount = 1;
654 w->w_next = wtab_list;
655 wtab_list = w;
656
657 /* initialize the weight table */
658 w->w_tab[0] = ((weight - 1) << FP_SHIFT) / weight;
659 for (i = 1; i < 32; i++) {
660 w->w_tab[i] = (w->w_tab[i-1] * w->w_tab[i-1]) >> FP_SHIFT;
661 if (w->w_tab[i] == 0 && w->w_param_max == 0)
662 w->w_param_max = 1 << i;
663 }
664
665 return (w);
666}
667
668int
669wtab_destroy(struct wtab *w)
670{
671 struct wtab *prev;
672
673 if (--w->w_refcount > 0)
674 return (0);
675
676 if (wtab_list == w)
677 wtab_list = w->w_next;
678 else for (prev = wtab_list; prev->w_next != NULL; prev = prev->w_next)
679 if (prev->w_next == w) {
680 prev->w_next = w->w_next;
681 break;
682 }
683
684 free(w, M_DEVBUF);
685 return (0);
686}
687
688int32_t
689pow_w(struct wtab *w, int n)
690{
691 int i, bit;
692 int32_t val;
693
694 if (n >= w->w_param_max)
695 return (0);
696
697 val = 1 << FP_SHIFT;
698 if (n <= 0)
699 return (val);
700
701 bit = 1;
702 i = 0;
703 while (n) {
704 if (n & bit) {
705 val = (val * w->w_tab[i]) >> FP_SHIFT;
706 n &= ~bit;
707 }
708 i++;
709 bit <<= 1;
710 }
711 return (val);
712}
713
714#ifdef ALTQ3_COMPAT
715/*
716 * red device interface
717 */
718altqdev_decl(red);
719
720int
721redopen(dev, flag, fmt, p)
722 dev_t dev;
723 int flag, fmt;
724#if (__FreeBSD_version > 500000)
725 struct thread *p;
726#else
727 struct proc *p;
728#endif
729{
730 /* everything will be done when the queueing scheme is attached. */
731 return 0;
732}
733
734int
735redclose(dev, flag, fmt, p)
736 dev_t dev;
737 int flag, fmt;
738#if (__FreeBSD_version > 500000)
739 struct thread *p;
740#else
741 struct proc *p;
742#endif
743{
744 red_queue_t *rqp;
745 int err, error = 0;
746
747 while ((rqp = red_list) != NULL) {
748 /* destroy all */
749 err = red_detach(rqp);
750 if (err != 0 && error == 0)
751 error = err;
752 }
753
754 return error;
755}
756
757int
758redioctl(dev, cmd, addr, flag, p)
759 dev_t dev;
760 ioctlcmd_t cmd;
761 caddr_t addr;
762 int flag;
763#if (__FreeBSD_version > 500000)
764 struct thread *p;
765#else
766 struct proc *p;
767#endif
768{
769 red_queue_t *rqp;
770 struct red_interface *ifacep;
771 struct ifnet *ifp;
772 int error = 0;
773
774 /* check super-user privilege */
775 switch (cmd) {
776 case RED_GETSTATS:
777 break;
778 default:
779#if (__FreeBSD_version > 700000)
780 if ((error = priv_check(p, PRIV_ALTQ_MANAGE)) != 0)
781#elsif (__FreeBSD_version > 400000)
782 if ((error = suser(p)) != 0)
783#else
784 if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
785#endif
786 return (error);
787 break;
788 }
789
790 switch (cmd) {
791
792 case RED_ENABLE:
793 ifacep = (struct red_interface *)addr;
794 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
795 error = EBADF;
796 break;
797 }
798 error = altq_enable(rqp->rq_ifq);
799 break;
800
801 case RED_DISABLE:
802 ifacep = (struct red_interface *)addr;
803 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
804 error = EBADF;
805 break;
806 }
807 error = altq_disable(rqp->rq_ifq);
808 break;
809
810 case RED_IF_ATTACH:
811 ifp = ifunit(((struct red_interface *)addr)->red_ifname);
812 if (ifp == NULL) {
813 error = ENXIO;
814 break;
815 }
816
817 /* allocate and initialize red_queue_t */
818 rqp = malloc(sizeof(red_queue_t), M_DEVBUF, M_WAITOK);
819 if (rqp == NULL) {
820 error = ENOMEM;
821 break;
822 }
823 bzero(rqp, sizeof(red_queue_t));
824
825 rqp->rq_q = malloc(sizeof(class_queue_t),
826 M_DEVBUF, M_WAITOK);
827 if (rqp->rq_q == NULL) {
828 free(rqp, M_DEVBUF);
829 error = ENOMEM;
830 break;
831 }
832 bzero(rqp->rq_q, sizeof(class_queue_t));
833
834 rqp->rq_red = red_alloc(0, 0, 0, 0, 0, 0);
835 if (rqp->rq_red == NULL) {
836 free(rqp->rq_q, M_DEVBUF);
837 free(rqp, M_DEVBUF);
838 error = ENOMEM;
839 break;
840 }
841
842 rqp->rq_ifq = &ifp->if_snd;
843 qtail(rqp->rq_q) = NULL;
844 qlen(rqp->rq_q) = 0;
845 qlimit(rqp->rq_q) = RED_LIMIT;
846 qtype(rqp->rq_q) = Q_RED;
847
848 /*
849 * set RED to this ifnet structure.
850 */
851 error = altq_attach(rqp->rq_ifq, ALTQT_RED, rqp,
852 red_enqueue, red_dequeue, red_request,
853 NULL, NULL);
854 if (error) {
855 red_destroy(rqp->rq_red);
856 free(rqp->rq_q, M_DEVBUF);
857 free(rqp, M_DEVBUF);
858 break;
859 }
860
861 /* add this state to the red list */
862 rqp->rq_next = red_list;
863 red_list = rqp;
864 break;
865
866 case RED_IF_DETACH:
867 ifacep = (struct red_interface *)addr;
868 if ((rqp = altq_lookup(ifacep->red_ifname, ALTQT_RED)) == NULL) {
869 error = EBADF;
870 break;
871 }
872 error = red_detach(rqp);
873 break;
874
875 case RED_GETSTATS:
876 do {
877 struct red_stats *q_stats;
878 red_t *rp;
879
880 q_stats = (struct red_stats *)addr;
881 if ((rqp = altq_lookup(q_stats->iface.red_ifname,
882 ALTQT_RED)) == NULL) {
883 error = EBADF;
884 break;
885 }
886
887 q_stats->q_len = qlen(rqp->rq_q);
888 q_stats->q_limit = qlimit(rqp->rq_q);
889
890 rp = rqp->rq_red;
891 q_stats->q_avg = rp->red_avg >> rp->red_wshift;
892 q_stats->xmit_cnt = rp->red_stats.xmit_cnt;
893 q_stats->drop_cnt = rp->red_stats.drop_cnt;
894 q_stats->drop_forced = rp->red_stats.drop_forced;
895 q_stats->drop_unforced = rp->red_stats.drop_unforced;
896 q_stats->marked_packets = rp->red_stats.marked_packets;
897
898 q_stats->weight = rp->red_weight;
899 q_stats->inv_pmax = rp->red_inv_pmax;
900 q_stats->th_min = rp->red_thmin;
901 q_stats->th_max = rp->red_thmax;
902
903#ifdef ALTQ_FLOWVALVE
904 if (rp->red_flowvalve != NULL) {
905 struct flowvalve *fv = rp->red_flowvalve;
906 q_stats->fv_flows = fv->fv_flows;
907 q_stats->fv_pass = fv->fv_stats.pass;
908 q_stats->fv_predrop = fv->fv_stats.predrop;
909 q_stats->fv_alloc = fv->fv_stats.alloc;
910 q_stats->fv_escape = fv->fv_stats.escape;
911 } else {
912#endif /* ALTQ_FLOWVALVE */
913 q_stats->fv_flows = 0;
914 q_stats->fv_pass = 0;
915 q_stats->fv_predrop = 0;
916 q_stats->fv_alloc = 0;
917 q_stats->fv_escape = 0;
918#ifdef ALTQ_FLOWVALVE
919 }
920#endif /* ALTQ_FLOWVALVE */
921 } while (/*CONSTCOND*/ 0);
922 break;
923
924 case RED_CONFIG:
925 do {
926 struct red_conf *fc;
927 red_t *new;
928 int s, limit;
929
930 fc = (struct red_conf *)addr;
931 if ((rqp = altq_lookup(fc->iface.red_ifname,
932 ALTQT_RED)) == NULL) {
933 error = EBADF;
934 break;
935 }
936 new = red_alloc(fc->red_weight,
937 fc->red_inv_pmax,
938 fc->red_thmin,
939 fc->red_thmax,
940 fc->red_flags,
941 fc->red_pkttime);
942 if (new == NULL) {
943 error = ENOMEM;
944 break;
945 }
946
947#ifdef __NetBSD__
948 s = splnet();
949#else
950 s = splimp();
951#endif
952 red_purgeq(rqp);
953 limit = fc->red_limit;
954 if (limit < fc->red_thmax)
955 limit = fc->red_thmax;
956 qlimit(rqp->rq_q) = limit;
957 fc->red_limit = limit; /* write back the new value */
958
959 red_destroy(rqp->rq_red);
960 rqp->rq_red = new;
961
962 splx(s);
963
964 /* write back new values */
965 fc->red_limit = limit;
966 fc->red_inv_pmax = rqp->rq_red->red_inv_pmax;
967 fc->red_thmin = rqp->rq_red->red_thmin;
968 fc->red_thmax = rqp->rq_red->red_thmax;
969
970 } while (/*CONSTCOND*/ 0);
971 break;
972
973 case RED_SETDEFAULTS:
974 do {
975 struct redparams *rp;
976
977 rp = (struct redparams *)addr;
978
979 default_th_min = rp->th_min;
980 default_th_max = rp->th_max;
981 default_inv_pmax = rp->inv_pmax;
982 } while (/*CONSTCOND*/ 0);
983 break;
984
985 default:
986 error = EINVAL;
987 break;
988 }
989 return error;
990}
991
992static int
993red_detach(rqp)
994 red_queue_t *rqp;
995{
996 red_queue_t *tmp;
997 int error = 0;
998
999 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1000 altq_disable(rqp->rq_ifq);
1001
1002 if ((error = altq_detach(rqp->rq_ifq)))
1003 return (error);
1004
1005 if (red_list == rqp)
1006 red_list = rqp->rq_next;
1007 else {
1008 for (tmp = red_list; tmp != NULL; tmp = tmp->rq_next)
1009 if (tmp->rq_next == rqp) {
1010 tmp->rq_next = rqp->rq_next;
1011 break;
1012 }
1013 if (tmp == NULL)
1014 printf("red_detach: no state found in red_list!\n");
1015 }
1016
1017 red_destroy(rqp->rq_red);
1018 free(rqp->rq_q, M_DEVBUF);
1019 free(rqp, M_DEVBUF);
1020 return (error);
1021}
1022
1023/*
1024 * enqueue routine:
1025 *
1026 * returns: 0 when successfully queued.
1027 * ENOBUFS when drop occurs.
1028 */
1029static int
1030red_enqueue(ifq, m, pktattr)
1031 struct ifaltq *ifq;
1032 struct mbuf *m;
1033 struct altq_pktattr *pktattr;
1034{
1035 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1036
1037 IFQ_LOCK_ASSERT(ifq);
1038
1039 if (red_addq(rqp->rq_red, rqp->rq_q, m, pktattr) < 0)
1040 return ENOBUFS;
1041 ifq->ifq_len++;
1042 return 0;
1043}
1044
1045/*
1046 * dequeue routine:
1047 * must be called in splimp.
1048 *
1049 * returns: mbuf dequeued.
1050 * NULL when no packet is available in the queue.
1051 */
1052
1053static struct mbuf *
1054red_dequeue(ifq, op)
1055 struct ifaltq *ifq;
1056 int op;
1057{
1058 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1059 struct mbuf *m;
1060
1061 IFQ_LOCK_ASSERT(ifq);
1062
1063 if (op == ALTDQ_POLL)
1064 return qhead(rqp->rq_q);
1065
1066 /* op == ALTDQ_REMOVE */
1067 m = red_getq(rqp->rq_red, rqp->rq_q);
1068 if (m != NULL)
1069 ifq->ifq_len--;
1070 return (m);
1071}
1072
1073static int
1074red_request(ifq, req, arg)
1075 struct ifaltq *ifq;
1076 int req;
1077 void *arg;
1078{
1079 red_queue_t *rqp = (red_queue_t *)ifq->altq_disc;
1080
1081 IFQ_LOCK_ASSERT(ifq);
1082
1083 switch (req) {
1084 case ALTRQ_PURGE:
1085 red_purgeq(rqp);
1086 break;
1087 }
1088 return (0);
1089}
1090
1091static void
1092red_purgeq(rqp)
1093 red_queue_t *rqp;
1094{
1095 _flushq(rqp->rq_q);
1096 if (ALTQ_IS_ENABLED(rqp->rq_ifq))
1097 rqp->rq_ifq->ifq_len = 0;
1098}
1099
1100#ifdef ALTQ_FLOWVALVE
1101
1102#define FV_PSHIFT 7 /* weight of average drop rate -- 1/128 */
1103#define FV_PSCALE(x) ((x) << FV_PSHIFT)
1104#define FV_PUNSCALE(x) ((x) >> FV_PSHIFT)
1105#define FV_FSHIFT 5 /* weight of average fraction -- 1/32 */
1106#define FV_FSCALE(x) ((x) << FV_FSHIFT)
1107#define FV_FUNSCALE(x) ((x) >> FV_FSHIFT)
1108
1109#define FV_TIMER (3 * hz) /* timer value for garbage collector */
1110#define FV_FLOWLISTSIZE 64 /* how many flows in flowlist */
1111
1112#define FV_N 10 /* update fve_f every FV_N packets */
1113
1114#define FV_BACKOFFTHRESH 1 /* backoff threshold interval in second */
1115#define FV_TTHRESH 3 /* time threshold to delete fve */
1116#define FV_ALPHA 5 /* extra packet count */
1117
1118#define FV_STATS
1119
1120#if (__FreeBSD_version > 300000)
1121#define FV_TIMESTAMP(tp) getmicrotime(tp)
1122#else
1123#define FV_TIMESTAMP(tp) { (*(tp)) = time; }
1124#endif
1125
1126/*
1127 * Brtt table: 127 entry table to convert drop rate (p) to
1128 * the corresponding bandwidth fraction (f)
1129 * the following equation is implemented to use scaled values,
1130 * fve_p and fve_f, in the fixed point format.
1131 *
1132 * Brtt(p) = 1 /(sqrt(4*p/3) + min(1,3*sqrt(p*6/8)) * p * (1+32 * p*p))
1133 * f = Brtt(p) / (max_th + alpha)
1134 */
1135#define BRTT_SIZE 128
1136#define BRTT_SHIFT 12
1137#define BRTT_MASK 0x0007f000
1138#define BRTT_PMAX (1 << (FV_PSHIFT + FP_SHIFT))
1139
1140const int brtt_tab[BRTT_SIZE] = {
1141 0, 1262010, 877019, 703694, 598706, 525854, 471107, 427728,
1142 392026, 361788, 335598, 312506, 291850, 273158, 256081, 240361,
1143 225800, 212247, 199585, 187788, 178388, 169544, 161207, 153333,
1144 145888, 138841, 132165, 125836, 119834, 114141, 108739, 103612,
1145 98747, 94129, 89746, 85585, 81637, 77889, 74333, 70957,
1146 67752, 64711, 61824, 59084, 56482, 54013, 51667, 49440,
1147 47325, 45315, 43406, 41591, 39866, 38227, 36667, 35184,
1148 33773, 32430, 31151, 29933, 28774, 27668, 26615, 25611,
1149 24653, 23740, 22868, 22035, 21240, 20481, 19755, 19062,
1150 18399, 17764, 17157, 16576, 16020, 15487, 14976, 14487,
1151 14017, 13567, 13136, 12721, 12323, 11941, 11574, 11222,
1152 10883, 10557, 10243, 9942, 9652, 9372, 9103, 8844,
1153 8594, 8354, 8122, 7898, 7682, 7474, 7273, 7079,
1154 6892, 6711, 6536, 6367, 6204, 6046, 5893, 5746,
1155 5603, 5464, 5330, 5201, 5075, 4954, 4836, 4722,
1156 4611, 4504, 4400, 4299, 4201, 4106, 4014, 3924
1157};
1158
1159static __inline struct fve *
1160flowlist_lookup(fv, pktattr, now)
1161 struct flowvalve *fv;
1162 struct altq_pktattr *pktattr;
1163 struct timeval *now;
1164{
1165 struct fve *fve;
1166 int flows;
1167 struct ip *ip;
1168#ifdef INET6
1169 struct ip6_hdr *ip6;
1170#endif
1171 struct timeval tthresh;
1172
1173 if (pktattr == NULL)
1174 return (NULL);
1175
1176 tthresh.tv_sec = now->tv_sec - FV_TTHRESH;
1177 flows = 0;
1178 /*
1179 * search the flow list
1180 */
1181 switch (pktattr->pattr_af) {
1182 case AF_INET:
1183 ip = (struct ip *)pktattr->pattr_hdr;
1184 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1185 if (fve->fve_lastdrop.tv_sec == 0)
1186 break;
1187 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1188 fve->fve_lastdrop.tv_sec = 0;
1189 break;
1190 }
1191 if (fve->fve_flow.flow_af == AF_INET &&
1192 fve->fve_flow.flow_ip.ip_src.s_addr ==
1193 ip->ip_src.s_addr &&
1194 fve->fve_flow.flow_ip.ip_dst.s_addr ==
1195 ip->ip_dst.s_addr)
1196 return (fve);
1197 flows++;
1198 }
1199 break;
1200#ifdef INET6
1201 case AF_INET6:
1202 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1203 TAILQ_FOREACH(fve, &fv->fv_flowlist, fve_lru){
1204 if (fve->fve_lastdrop.tv_sec == 0)
1205 break;
1206 if (fve->fve_lastdrop.tv_sec < tthresh.tv_sec) {
1207 fve->fve_lastdrop.tv_sec = 0;
1208 break;
1209 }
1210 if (fve->fve_flow.flow_af == AF_INET6 &&
1211 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_src,
1212 &ip6->ip6_src) &&
1213 IN6_ARE_ADDR_EQUAL(&fve->fve_flow.flow_ip6.ip6_dst,
1214 &ip6->ip6_dst))
1215 return (fve);
1216 flows++;
1217 }
1218 break;
1219#endif /* INET6 */
1220
1221 default:
1222 /* unknown protocol. no drop. */
1223 return (NULL);
1224 }
1225 fv->fv_flows = flows; /* save the number of active fve's */
1226 return (NULL);
1227}
1228
1229static __inline struct fve *
1230flowlist_reclaim(fv, pktattr)
1231 struct flowvalve *fv;
1232 struct altq_pktattr *pktattr;
1233{
1234 struct fve *fve;
1235 struct ip *ip;
1236#ifdef INET6
1237 struct ip6_hdr *ip6;
1238#endif
1239
1240 /*
1241 * get an entry from the tail of the LRU list.
1242 */
1243 fve = TAILQ_LAST(&fv->fv_flowlist, fv_flowhead);
1244
1245 switch (pktattr->pattr_af) {
1246 case AF_INET:
1247 ip = (struct ip *)pktattr->pattr_hdr;
1248 fve->fve_flow.flow_af = AF_INET;
1249 fve->fve_flow.flow_ip.ip_src = ip->ip_src;
1250 fve->fve_flow.flow_ip.ip_dst = ip->ip_dst;
1251 break;
1252#ifdef INET6
1253 case AF_INET6:
1254 ip6 = (struct ip6_hdr *)pktattr->pattr_hdr;
1255 fve->fve_flow.flow_af = AF_INET6;
1256 fve->fve_flow.flow_ip6.ip6_src = ip6->ip6_src;
1257 fve->fve_flow.flow_ip6.ip6_dst = ip6->ip6_dst;
1258 break;
1259#endif
1260 }
1261
1262 fve->fve_state = Green;
1263 fve->fve_p = 0.0;
1264 fve->fve_f = 0.0;
1265 fve->fve_ifseq = fv->fv_ifseq - 1;
1266 fve->fve_count = 0;
1267
1268 fv->fv_flows++;
1269#ifdef FV_STATS
1270 fv->fv_stats.alloc++;
1271#endif
1272 return (fve);
1273}
1274
1275static __inline void
1276flowlist_move_to_head(fv, fve)
1277 struct flowvalve *fv;
1278 struct fve *fve;
1279{
1280 if (TAILQ_FIRST(&fv->fv_flowlist) != fve) {
1281 TAILQ_REMOVE(&fv->fv_flowlist, fve, fve_lru);
1282 TAILQ_INSERT_HEAD(&fv->fv_flowlist, fve, fve_lru);
1283 }
1284}
1285
1286#if 0 /* XXX: make the compiler happy (fv_alloc unused) */
1287/*
1288 * allocate flowvalve structure
1289 */
1290static struct flowvalve *
1291fv_alloc(rp)
1292 struct red *rp;
1293{
1294 struct flowvalve *fv;
1295 struct fve *fve;
1296 int i, num;
1297
1298 num = FV_FLOWLISTSIZE;
1299 fv = malloc(sizeof(struct flowvalve),
1300 M_DEVBUF, M_WAITOK);
1301 if (fv == NULL)
1302 return (NULL);
1303 bzero(fv, sizeof(struct flowvalve));
1304
1305 fv->fv_fves = malloc(sizeof(struct fve) * num,
1306 M_DEVBUF, M_WAITOK);
1307 if (fv->fv_fves == NULL) {
1308 free(fv, M_DEVBUF);
1309 return (NULL);
1310 }
1311 bzero(fv->fv_fves, sizeof(struct fve) * num);
1312
1313 fv->fv_flows = 0;
1314 TAILQ_INIT(&fv->fv_flowlist);
1315 for (i = 0; i < num; i++) {
1316 fve = &fv->fv_fves[i];
1317 fve->fve_lastdrop.tv_sec = 0;
1318 TAILQ_INSERT_TAIL(&fv->fv_flowlist, fve, fve_lru);
1319 }
1320
1321 /* initialize drop rate threshold in scaled fixed-point */
1322 fv->fv_pthresh = (FV_PSCALE(1) << FP_SHIFT) / rp->red_inv_pmax;
1323
1324 /* initialize drop rate to fraction table */
1325 fv->fv_p2ftab = malloc(sizeof(int) * BRTT_SIZE,
1326 M_DEVBUF, M_WAITOK);
1327 if (fv->fv_p2ftab == NULL) {
1328 free(fv->fv_fves, M_DEVBUF);
1329 free(fv, M_DEVBUF);
1330 return (NULL);
1331 }
1332 /*
1333 * create the p2f table.
1334 * (shift is used to keep the precision)
1335 */
1336 for (i = 1; i < BRTT_SIZE; i++) {
1337 int f;
1338
1339 f = brtt_tab[i] << 8;
1340 fv->fv_p2ftab[i] = (f / (rp->red_thmax + FV_ALPHA)) >> 8;
1341 }
1342
1343 return (fv);
1344}
1345#endif
1346
1347static void fv_destroy(fv)
1348 struct flowvalve *fv;
1349{
1350 free(fv->fv_p2ftab, M_DEVBUF);
1351 free(fv->fv_fves, M_DEVBUF);
1352 free(fv, M_DEVBUF);
1353}
1354
1355static __inline int
1356fv_p2f(fv, p)
1357 struct flowvalve *fv;
1358 int p;
1359{
1360 int val, f;
1361
1362 if (p >= BRTT_PMAX)
1363 f = fv->fv_p2ftab[BRTT_SIZE-1];
1364 else if ((val = (p & BRTT_MASK)))
1365 f = fv->fv_p2ftab[(val >> BRTT_SHIFT)];
1366 else
1367 f = fv->fv_p2ftab[1];
1368 return (f);
1369}
1370
1371/*
1372 * check if an arriving packet should be pre-dropped.
1373 * called from red_addq() when a packet arrives.
1374 * returns 1 when the packet should be pre-dropped.
1375 * should be called in splimp.
1376 */
1377static int
1378fv_checkflow(fv, pktattr, fcache)
1379 struct flowvalve *fv;
1380 struct altq_pktattr *pktattr;
1381 struct fve **fcache;
1382{
1383 struct fve *fve;
1384 struct timeval now;
1385
1386 fv->fv_ifseq++;
1387 FV_TIMESTAMP(&now);
1388
1389 if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1390 /* no matching entry in the flowlist */
1391 return (0);
1392
1393 *fcache = fve;
1394
1395 /* update fraction f for every FV_N packets */
1396 if (++fve->fve_count == FV_N) {
1397 /*
1398 * f = Wf * N / (fv_ifseq - fve_ifseq) + (1 - Wf) * f
1399 */
1400 fve->fve_f =
1401 (FV_N << FP_SHIFT) / (fv->fv_ifseq - fve->fve_ifseq)
1402 + fve->fve_f - FV_FUNSCALE(fve->fve_f);
1403 fve->fve_ifseq = fv->fv_ifseq;
1404 fve->fve_count = 0;
1405 }
1406
1407 /*
1408 * overpumping test
1409 */
1410 if (fve->fve_state == Green && fve->fve_p > fv->fv_pthresh) {
1411 int fthresh;
1412
1413 /* calculate a threshold */
1414 fthresh = fv_p2f(fv, fve->fve_p);
1415 if (fve->fve_f > fthresh)
1416 fve->fve_state = Red;
1417 }
1418
1419 if (fve->fve_state == Red) {
1420 /*
1421 * backoff test
1422 */
1423 if (now.tv_sec - fve->fve_lastdrop.tv_sec > FV_BACKOFFTHRESH) {
1424 /* no drop for at least FV_BACKOFFTHRESH sec */
1425 fve->fve_p = 0;
1426 fve->fve_state = Green;
1427#ifdef FV_STATS
1428 fv->fv_stats.escape++;
1429#endif
1430 } else {
1431 /* block this flow */
1432 flowlist_move_to_head(fv, fve);
1433 fve->fve_lastdrop = now;
1434#ifdef FV_STATS
1435 fv->fv_stats.predrop++;
1436#endif
1437 return (1);
1438 }
1439 }
1440
1441 /*
1442 * p = (1 - Wp) * p
1443 */
1444 fve->fve_p -= FV_PUNSCALE(fve->fve_p);
1445 if (fve->fve_p < 0)
1446 fve->fve_p = 0;
1447#ifdef FV_STATS
1448 fv->fv_stats.pass++;
1449#endif
1450 return (0);
1451}
1452
1453/*
1454 * called from red_addq when a packet is dropped by red.
1455 * should be called in splimp.
1456 */
1457static void fv_dropbyred(fv, pktattr, fcache)
1458 struct flowvalve *fv;
1459 struct altq_pktattr *pktattr;
1460 struct fve *fcache;
1461{
1462 struct fve *fve;
1463 struct timeval now;
1464
1465 if (pktattr == NULL)
1466 return;
1467 FV_TIMESTAMP(&now);
1468
1469 if (fcache != NULL)
1470 /* the fve of this packet is already cached */
1471 fve = fcache;
1472 else if ((fve = flowlist_lookup(fv, pktattr, &now)) == NULL)
1473 fve = flowlist_reclaim(fv, pktattr);
1474
1475 flowlist_move_to_head(fv, fve);
1476
1477 /*
1478 * update p: the following line cancels the update
1479 * in fv_checkflow() and calculate
1480 * p = Wp + (1 - Wp) * p
1481 */
1482 fve->fve_p = (1 << FP_SHIFT) + fve->fve_p;
1483
1484 fve->fve_lastdrop = now;
1485}
1486
1487#endif /* ALTQ_FLOWVALVE */
1488
1489#ifdef KLD_MODULE
1490
1491static struct altqsw red_sw =
1492 {"red", redopen, redclose, redioctl};
1493
1494ALTQ_MODULE(altq_red, ALTQT_RED, &red_sw);
1495MODULE_VERSION(altq_red, 1);
1496
1497#endif /* KLD_MODULE */
1498#endif /* ALTQ3_COMPAT */
1499
1500#endif /* ALTQ_RED */