1287009Sloos/*
2287009Sloos * CoDel - The Controlled-Delay Active Queue Management algorithm
3287009Sloos *
4287120Sloos *  Copyright (C) 2013 Ermal Lu��i <eri@FreeBSD.org>
5287009Sloos *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
6287009Sloos *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
7287009Sloos *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
8287009Sloos *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
9287009Sloos *
10287009Sloos * Redistribution and use in source and binary forms, with or without
11287009Sloos * modification, are permitted provided that the following conditions
12287009Sloos * are met:
13287009Sloos * 1. Redistributions of source code must retain the above copyright
14287009Sloos *    notice, this list of conditions, and the following disclaimer,
15287009Sloos *    without modification.
16287009Sloos * 2. Redistributions in binary form must reproduce the above copyright
17287009Sloos *    notice, this list of conditions and the following disclaimer in the
18287009Sloos *    documentation and/or other materials provided with the distribution.
19287009Sloos * 3. The names of the authors may not be used to endorse or promote products
20287009Sloos *    derived from this software without specific prior written permission.
21287009Sloos *
22287009Sloos * Alternatively, provided that this notice is retained in full, this
23287009Sloos * software may be distributed under the terms of the GNU General
24287009Sloos * Public License ("GPL") version 2, in which case the provisions of the
25287009Sloos * GPL apply INSTEAD OF those given above.
26287009Sloos *
27287009Sloos * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28287009Sloos * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29287009Sloos * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
30287009Sloos * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
31287009Sloos * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
32287009Sloos * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
33287009Sloos * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
34287009Sloos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
35287009Sloos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
36287009Sloos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
37287009Sloos * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
38287009Sloos * DAMAGE.
39287009Sloos *
40287009Sloos * $FreeBSD$
41287009Sloos */
42287009Sloos#include "opt_altq.h"
43287009Sloos#include "opt_inet.h"
44287009Sloos#include "opt_inet6.h"
45287009Sloos
46287009Sloos#ifdef ALTQ_CODEL  /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */
47287009Sloos
48287009Sloos#include <sys/param.h>
49287009Sloos#include <sys/malloc.h>
50287009Sloos#include <sys/mbuf.h>
51287009Sloos#include <sys/socket.h>
52287009Sloos#include <sys/systm.h>
53287009Sloos
54287009Sloos#include <net/if.h>
55287009Sloos#include <net/if_var.h>
56287009Sloos#include <netinet/in.h>
57287009Sloos
58287009Sloos#include <netpfil/pf/pf.h>
59287009Sloos#include <netpfil/pf/pf_altq.h>
60287009Sloos#include <net/altq/if_altq.h>
61287009Sloos#include <net/altq/altq.h>
62287009Sloos#include <net/altq/altq_codel.h>
63287009Sloos
64287009Sloosstatic int		 codel_should_drop(struct codel *, class_queue_t *,
65287009Sloos			    struct mbuf *, u_int64_t);
66287009Sloosstatic void		 codel_Newton_step(struct codel_vars *);
67287009Sloosstatic u_int64_t	 codel_control_law(u_int64_t t, u_int64_t, u_int32_t);
68287009Sloos
69287009Sloos#define	codel_time_after(a, b)		((int64_t)(a) - (int64_t)(b) > 0)
70287009Sloos#define	codel_time_after_eq(a, b)	((int64_t)(a) - (int64_t)(b) >= 0)
71287009Sloos#define	codel_time_before(a, b)		((int64_t)(a) - (int64_t)(b) < 0)
72287009Sloos#define	codel_time_before_eq(a, b)	((int64_t)(a) - (int64_t)(b) <= 0)
73287009Sloos
74287009Sloosstatic int codel_request(struct ifaltq *, int, void *);
75287009Sloos
76287009Sloosstatic int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *);
77287009Sloosstatic struct mbuf *codel_dequeue(struct ifaltq *, int);
78287009Sloos
79287009Sloosint
80287009Slooscodel_pfattach(struct pf_altq *a)
81287009Sloos{
82287009Sloos	struct ifnet *ifp;
83287009Sloos
84287009Sloos	if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL)
85287009Sloos		return (EINVAL);
86287009Sloos
87287009Sloos	return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc,
88287009Sloos	    codel_enqueue, codel_dequeue, codel_request, NULL, NULL));
89287009Sloos}
90287009Sloos
91287009Sloosint
92287009Slooscodel_add_altq(struct pf_altq *a)
93287009Sloos{
94287009Sloos	struct codel_if	*cif;
95287009Sloos	struct ifnet	*ifp;
96287009Sloos	struct codel_opts	*opts;
97287009Sloos
98287009Sloos	if ((ifp = ifunit(a->ifname)) == NULL)
99287009Sloos		return (EINVAL);
100287009Sloos	if (!ALTQ_IS_READY(&ifp->if_snd))
101287009Sloos		return (ENODEV);
102287009Sloos
103287009Sloos	opts = &a->pq_u.codel_opts;
104287009Sloos
105287009Sloos	cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO);
106287009Sloos	if (cif == NULL)
107287009Sloos		return (ENOMEM);
108287009Sloos	cif->cif_bandwidth = a->ifbandwidth;
109287009Sloos	cif->cif_ifq = &ifp->if_snd;
110287009Sloos
111287009Sloos	cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO);
112287009Sloos	if (cif->cl_q == NULL) {
113287009Sloos		free(cif, M_DEVBUF);
114287009Sloos		return (ENOMEM);
115287009Sloos	}
116287009Sloos
117287009Sloos	if (a->qlimit == 0)
118287009Sloos		a->qlimit = 50;	/* use default. */
119287009Sloos	qlimit(cif->cl_q) = a->qlimit;
120287009Sloos	qtype(cif->cl_q) = Q_CODEL;
121287009Sloos	qlen(cif->cl_q) = 0;
122287009Sloos	qsize(cif->cl_q) = 0;
123287009Sloos
124287009Sloos	if (opts->target == 0)
125287009Sloos		opts->target = 5;
126287009Sloos	if (opts->interval == 0)
127287009Sloos		opts->interval = 100;
128287009Sloos	cif->codel.params.target = machclk_freq * opts->target / 1000;
129287009Sloos	cif->codel.params.interval = machclk_freq * opts->interval / 1000;
130287009Sloos	cif->codel.params.ecn = opts->ecn;
131287009Sloos	cif->codel.stats.maxpacket = 256;
132287009Sloos
133287009Sloos	cif->cl_stats.qlength = qlen(cif->cl_q);
134287009Sloos	cif->cl_stats.qlimit = qlimit(cif->cl_q);
135287009Sloos
136287009Sloos	/* keep the state in pf_altq */
137287009Sloos	a->altq_disc = cif;
138287009Sloos
139287009Sloos	return (0);
140287009Sloos}
141287009Sloos
142287009Sloosint
143287009Slooscodel_remove_altq(struct pf_altq *a)
144287009Sloos{
145287009Sloos	struct codel_if *cif;
146287009Sloos
147287009Sloos	if ((cif = a->altq_disc) == NULL)
148287009Sloos		return (EINVAL);
149287009Sloos	a->altq_disc = NULL;
150287009Sloos
151287009Sloos	if (cif->cl_q)
152287009Sloos		free(cif->cl_q, M_DEVBUF);
153287009Sloos	free(cif, M_DEVBUF);
154287009Sloos
155287009Sloos	return (0);
156287009Sloos}
157287009Sloos
158287009Sloosint
159287009Slooscodel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes)
160287009Sloos{
161287009Sloos	struct codel_if *cif;
162287009Sloos	struct codel_ifstats stats;
163287009Sloos	int error = 0;
164287009Sloos
165287009Sloos	if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL)
166287009Sloos		return (EBADF);
167287009Sloos
168287009Sloos	if (*nbytes < sizeof(stats))
169287009Sloos		return (EINVAL);
170287009Sloos
171287009Sloos	stats = cif->cl_stats;
172287009Sloos	stats.stats = cif->codel.stats;
173287009Sloos
174287009Sloos	if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
175287009Sloos		return (error);
176287009Sloos	*nbytes = sizeof(stats);
177287009Sloos
178287009Sloos	return (0);
179287009Sloos}
180287009Sloos
181287009Sloosstatic int
182287009Slooscodel_request(struct ifaltq *ifq, int req, void *arg)
183287009Sloos{
184287009Sloos	struct codel_if	*cif = (struct codel_if *)ifq->altq_disc;
185287009Sloos	struct mbuf *m;
186287009Sloos
187287009Sloos	IFQ_LOCK_ASSERT(ifq);
188287009Sloos
189287009Sloos	switch (req) {
190287009Sloos	case ALTRQ_PURGE:
191287009Sloos		if (!ALTQ_IS_ENABLED(cif->cif_ifq))
192287009Sloos			break;
193287009Sloos
194287009Sloos		if (qempty(cif->cl_q))
195287009Sloos			break;
196287009Sloos
197287009Sloos		while ((m = _getq(cif->cl_q)) != NULL) {
198287009Sloos			PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
199287009Sloos			m_freem(m);
200287009Sloos			IFQ_DEC_LEN(cif->cif_ifq);
201287009Sloos		}
202287009Sloos		cif->cif_ifq->ifq_len = 0;
203287009Sloos		break;
204287009Sloos	}
205287009Sloos
206287009Sloos	return (0);
207287009Sloos}
208287009Sloos
209287009Sloosstatic int
210287009Slooscodel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr)
211287009Sloos{
212287009Sloos
213287009Sloos	struct codel_if *cif = (struct codel_if *) ifq->altq_disc;
214287009Sloos
215287009Sloos	IFQ_LOCK_ASSERT(ifq);
216287009Sloos
217287009Sloos	/* grab class set by classifier */
218287009Sloos	if ((m->m_flags & M_PKTHDR) == 0) {
219287009Sloos		/* should not happen */
220287009Sloos		printf("altq: packet for %s does not have pkthdr\n",
221287009Sloos		   ifq->altq_ifp->if_xname);
222287009Sloos		m_freem(m);
223287009Sloos		PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
224287009Sloos		return (ENOBUFS);
225287009Sloos	}
226287009Sloos
227287009Sloos	if (codel_addq(&cif->codel, cif->cl_q, m)) {
228287009Sloos		PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m));
229287009Sloos		return (ENOBUFS);
230287009Sloos	}
231287009Sloos	IFQ_INC_LEN(ifq);
232287009Sloos
233287009Sloos	return (0);
234287009Sloos}
235287009Sloos
236287009Sloosstatic struct mbuf *
237287009Slooscodel_dequeue(struct ifaltq *ifq, int op)
238287009Sloos{
239287009Sloos	struct codel_if *cif = (struct codel_if *)ifq->altq_disc;
240287009Sloos	struct mbuf *m;
241287009Sloos
242287009Sloos	IFQ_LOCK_ASSERT(ifq);
243287009Sloos
244287009Sloos	if (IFQ_IS_EMPTY(ifq))
245287009Sloos		return (NULL);
246287009Sloos
247287009Sloos	if (op == ALTDQ_POLL)
248287009Sloos		return (qhead(cif->cl_q));
249287009Sloos
250287009Sloos
251287009Sloos	m = codel_getq(&cif->codel, cif->cl_q);
252287009Sloos	if (m != NULL) {
253287009Sloos		IFQ_DEC_LEN(ifq);
254287009Sloos		PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m));
255287009Sloos		return (m);
256287009Sloos	}
257287009Sloos
258287009Sloos	return (NULL);
259287009Sloos}
260287009Sloos
261287009Sloosstruct codel *
262287009Slooscodel_alloc(int target, int interval, int ecn)
263287009Sloos{
264287009Sloos	struct codel *c;
265287009Sloos
266287009Sloos	c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO);
267287009Sloos	if (c != NULL) {
268287009Sloos		c->params.target = machclk_freq * target / 1000;
269287009Sloos		c->params.interval = machclk_freq * interval / 1000;
270287009Sloos		c->params.ecn = ecn;
271287009Sloos		c->stats.maxpacket = 256;
272287009Sloos	}
273287009Sloos
274287009Sloos	return (c);
275287009Sloos}
276287009Sloos
277287009Sloosvoid
278287009Slooscodel_destroy(struct codel *c)
279287009Sloos{
280287009Sloos
281287009Sloos	free(c, M_DEVBUF);
282287009Sloos}
283287009Sloos
284287009Sloos#define	MTAG_CODEL	1438031249
285287009Sloosint
286287009Slooscodel_addq(struct codel *c, class_queue_t *q, struct mbuf *m)
287287009Sloos{
288287009Sloos	struct m_tag *mtag;
289287009Sloos	uint64_t *enqueue_time;
290287009Sloos
291287009Sloos	if (qlen(q) < qlimit(q)) {
292287009Sloos		mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
293287009Sloos		if (mtag == NULL)
294287009Sloos			mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t),
295287009Sloos			    M_NOWAIT);
296287009Sloos		if (mtag == NULL) {
297287009Sloos			m_freem(m);
298287009Sloos			return (-1);
299287009Sloos		}
300287009Sloos		enqueue_time = (uint64_t *)(mtag + 1);
301287009Sloos		*enqueue_time = read_machclk();
302287009Sloos		m_tag_prepend(m, mtag);
303287009Sloos		_addq(q, m);
304287009Sloos		return (0);
305287009Sloos	}
306287009Sloos	c->drop_overlimit++;
307287009Sloos	m_freem(m);
308287009Sloos
309287009Sloos	return (-1);
310287009Sloos}
311287009Sloos
312287009Sloosstatic int
313287009Slooscodel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m,
314287009Sloos    u_int64_t now)
315287009Sloos{
316287009Sloos	struct m_tag *mtag;
317287009Sloos	uint64_t *enqueue_time;
318287009Sloos
319287009Sloos	if (m == NULL) {
320287009Sloos		c->vars.first_above_time = 0;
321287009Sloos		return (0);
322287009Sloos	}
323287009Sloos
324287009Sloos	mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL);
325287009Sloos	if (mtag == NULL) {
326287009Sloos		/* Only one warning per second. */
327287009Sloos		if (ppsratecheck(&c->last_log, &c->last_pps, 1))
328287009Sloos			printf("%s: could not found the packet mtag!\n",
329287009Sloos			    __func__);
330287009Sloos		c->vars.first_above_time = 0;
331287009Sloos		return (0);
332287009Sloos	}
333287009Sloos	enqueue_time = (uint64_t *)(mtag + 1);
334287009Sloos	c->vars.ldelay = now - *enqueue_time;
335287009Sloos	c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m));
336287009Sloos
337287009Sloos	if (codel_time_before(c->vars.ldelay, c->params.target) ||
338287009Sloos	    qsize(q) <= c->stats.maxpacket) {
339287009Sloos		/* went below - stay below for at least interval */
340287009Sloos		c->vars.first_above_time = 0;
341287009Sloos		return (0);
342287009Sloos	}
343287009Sloos	if (c->vars.first_above_time == 0) {
344287009Sloos		/* just went above from below. If we stay above
345287009Sloos		 * for at least interval we'll say it's ok to drop
346287009Sloos		 */
347287009Sloos		c->vars.first_above_time = now + c->params.interval;
348287009Sloos		return (0);
349287009Sloos	}
350287009Sloos	if (codel_time_after(now, c->vars.first_above_time))
351287009Sloos		return (1);
352287009Sloos
353287009Sloos	return (0);
354287009Sloos}
355287009Sloos
356287009Sloos/*
357287009Sloos * Run a Newton method step:
358287009Sloos * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
359287009Sloos *
360287009Sloos * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
361287009Sloos */
362287009Sloosstatic void
363287009Slooscodel_Newton_step(struct codel_vars *vars)
364287009Sloos{
365287009Sloos	uint32_t invsqrt, invsqrt2;
366287009Sloos	uint64_t val;
367287009Sloos
368287009Sloos/* sizeof_in_bits(rec_inv_sqrt) */
369287009Sloos#define	REC_INV_SQRT_BITS (8 * sizeof(u_int16_t))
370287009Sloos/* needed shift to get a Q0.32 number from rec_inv_sqrt */
371287009Sloos#define	REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS)
372287009Sloos
373287009Sloos	invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT;
374287009Sloos	invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32;
375287009Sloos	val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2);
376287009Sloos	val >>= 2; /* avoid overflow in following multiply */
377287009Sloos	val = (val * invsqrt) >> (32 - 2 + 1);
378287009Sloos
379287009Sloos	vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT;
380287009Sloos}
381287009Sloos
382287009Sloosstatic u_int64_t
383287009Slooscodel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt)
384287009Sloos{
385287009Sloos
386287009Sloos	return (t + (u_int32_t)(((u_int64_t)interval *
387287009Sloos	    (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32));
388287009Sloos}
389287009Sloos
390287009Sloosstruct mbuf *
391287009Slooscodel_getq(struct codel *c, class_queue_t *q)
392287009Sloos{
393287009Sloos	struct mbuf	*m;
394287009Sloos	u_int64_t	 now;
395287009Sloos	int		 drop;
396287009Sloos
397287009Sloos	if ((m = _getq(q)) == NULL) {
398287009Sloos		c->vars.dropping = 0;
399287009Sloos		return (m);
400287009Sloos	}
401287009Sloos
402287009Sloos	now = read_machclk();
403287009Sloos	drop = codel_should_drop(c, q, m, now);
404287009Sloos	if (c->vars.dropping) {
405287009Sloos		if (!drop) {
406287009Sloos			/* sojourn time below target - leave dropping state */
407287009Sloos			c->vars.dropping = 0;
408287009Sloos		} else if (codel_time_after_eq(now, c->vars.drop_next)) {
409287009Sloos			/* It's time for the next drop. Drop the current
410287009Sloos			 * packet and dequeue the next. The dequeue might
411287009Sloos			 * take us out of dropping state.
412287009Sloos			 * If not, schedule the next drop.
413287009Sloos			 * A large backlog might result in drop rates so high
414287009Sloos			 * that the next drop should happen now,
415287009Sloos			 * hence the while loop.
416287009Sloos			 */
417287009Sloos			while (c->vars.dropping &&
418287009Sloos			    codel_time_after_eq(now, c->vars.drop_next)) {
419287009Sloos				c->vars.count++; /* don't care of possible wrap
420287009Sloos						  * since there is no more
421287009Sloos						  * divide */
422287009Sloos				codel_Newton_step(&c->vars);
423287009Sloos				/* TODO ECN */
424287009Sloos				PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
425287009Sloos				m_freem(m);
426287009Sloos				m = _getq(q);
427287009Sloos				if (!codel_should_drop(c, q, m, now))
428287009Sloos					/* leave dropping state */
429287009Sloos					c->vars.dropping = 0;
430287009Sloos				else
431287009Sloos					/* and schedule the next drop */
432287009Sloos					c->vars.drop_next =
433287009Sloos					    codel_control_law(c->vars.drop_next,
434287009Sloos						c->params.interval,
435287009Sloos						c->vars.rec_inv_sqrt);
436287009Sloos			}
437287009Sloos		}
438287009Sloos	} else if (drop) {
439287009Sloos		/* TODO ECN */
440287009Sloos		PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m));
441287009Sloos		m_freem(m);
442287009Sloos
443287009Sloos		m = _getq(q);
444287009Sloos		drop = codel_should_drop(c, q, m, now);
445287009Sloos
446287009Sloos		c->vars.dropping = 1;
447287009Sloos		/* if min went above target close to when we last went below it
448287009Sloos		 * assume that the drop rate that controlled the queue on the
449287009Sloos		 * last cycle is a good starting point to control it now.
450287009Sloos		 */
451287009Sloos		if (codel_time_before(now - c->vars.drop_next,
452287009Sloos		    16 * c->params.interval)) {
453287009Sloos			c->vars.count = (c->vars.count - c->vars.lastcount) | 1;
454287009Sloos			/* we dont care if rec_inv_sqrt approximation
455287009Sloos			 * is not very precise :
456287009Sloos			 * Next Newton steps will correct it quadratically.
457287009Sloos			 */
458287009Sloos			codel_Newton_step(&c->vars);
459287009Sloos		} else {
460287009Sloos			c->vars.count = 1;
461287009Sloos			c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
462287009Sloos		}
463287009Sloos		c->vars.lastcount = c->vars.count;
464287009Sloos		c->vars.drop_next = codel_control_law(now, c->params.interval,
465287009Sloos		    c->vars.rec_inv_sqrt);
466287009Sloos	}
467287009Sloos
468287009Sloos	return (m);
469287009Sloos}
470287009Sloos
471287009Sloosvoid
472287009Slooscodel_getstats(struct codel *c, struct codel_stats *s)
473287009Sloos{
474287009Sloos	*s = c->stats;
475287009Sloos}
476287009Sloos
477287009Sloos#endif /* ALTQ_CODEL */
478