1287009Sloos/* 2287009Sloos * CoDel - The Controlled-Delay Active Queue Management algorithm 3287009Sloos * 4287120Sloos * Copyright (C) 2013 Ermal Lu��i <eri@FreeBSD.org> 5287009Sloos * Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com> 6287009Sloos * Copyright (C) 2011-2012 Van Jacobson <van@pollere.net> 7287009Sloos * Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net> 8287009Sloos * Copyright (C) 2012 Eric Dumazet <edumazet@google.com> 9287009Sloos * 10287009Sloos * Redistribution and use in source and binary forms, with or without 11287009Sloos * modification, are permitted provided that the following conditions 12287009Sloos * are met: 13287009Sloos * 1. Redistributions of source code must retain the above copyright 14287009Sloos * notice, this list of conditions, and the following disclaimer, 15287009Sloos * without modification. 16287009Sloos * 2. Redistributions in binary form must reproduce the above copyright 17287009Sloos * notice, this list of conditions and the following disclaimer in the 18287009Sloos * documentation and/or other materials provided with the distribution. 19287009Sloos * 3. The names of the authors may not be used to endorse or promote products 20287009Sloos * derived from this software without specific prior written permission. 21287009Sloos * 22287009Sloos * Alternatively, provided that this notice is retained in full, this 23287009Sloos * software may be distributed under the terms of the GNU General 24287009Sloos * Public License ("GPL") version 2, in which case the provisions of the 25287009Sloos * GPL apply INSTEAD OF those given above. 26287009Sloos * 27287009Sloos * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 28287009Sloos * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 29287009Sloos * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 30287009Sloos * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 31287009Sloos * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 32287009Sloos * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 33287009Sloos * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 34287009Sloos * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 35287009Sloos * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 36287009Sloos * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 37287009Sloos * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 38287009Sloos * DAMAGE. 39287009Sloos * 40287009Sloos * $FreeBSD$ 41287009Sloos */ 42287009Sloos#include "opt_altq.h" 43287009Sloos#include "opt_inet.h" 44287009Sloos#include "opt_inet6.h" 45287009Sloos 46287009Sloos#ifdef ALTQ_CODEL /* CoDel is enabled by ALTQ_CODEL option in opt_altq.h */ 47287009Sloos 48287009Sloos#include <sys/param.h> 49287009Sloos#include <sys/malloc.h> 50287009Sloos#include <sys/mbuf.h> 51287009Sloos#include <sys/socket.h> 52287009Sloos#include <sys/systm.h> 53287009Sloos 54287009Sloos#include <net/if.h> 55287009Sloos#include <net/if_var.h> 56287009Sloos#include <netinet/in.h> 57287009Sloos 58287009Sloos#include <netpfil/pf/pf.h> 59287009Sloos#include <netpfil/pf/pf_altq.h> 60287009Sloos#include <net/altq/if_altq.h> 61287009Sloos#include <net/altq/altq.h> 62287009Sloos#include <net/altq/altq_codel.h> 63287009Sloos 64287009Sloosstatic int codel_should_drop(struct codel *, class_queue_t *, 65287009Sloos struct mbuf *, u_int64_t); 66287009Sloosstatic void codel_Newton_step(struct codel_vars *); 67287009Sloosstatic u_int64_t codel_control_law(u_int64_t t, u_int64_t, u_int32_t); 68287009Sloos 69287009Sloos#define codel_time_after(a, b) ((int64_t)(a) - (int64_t)(b) > 0) 70287009Sloos#define codel_time_after_eq(a, b) ((int64_t)(a) - (int64_t)(b) >= 0) 71287009Sloos#define codel_time_before(a, b) ((int64_t)(a) - (int64_t)(b) < 0) 72287009Sloos#define codel_time_before_eq(a, b) ((int64_t)(a) - (int64_t)(b) <= 0) 73287009Sloos 74287009Sloosstatic int codel_request(struct ifaltq *, int, void *); 75287009Sloos 76287009Sloosstatic int codel_enqueue(struct ifaltq *, struct mbuf *, struct altq_pktattr *); 77287009Sloosstatic struct mbuf *codel_dequeue(struct ifaltq *, int); 78287009Sloos 79287009Sloosint 80287009Slooscodel_pfattach(struct pf_altq *a) 81287009Sloos{ 82287009Sloos struct ifnet *ifp; 83287009Sloos 84287009Sloos if ((ifp = ifunit(a->ifname)) == NULL || a->altq_disc == NULL) 85287009Sloos return (EINVAL); 86287009Sloos 87287009Sloos return (altq_attach(&ifp->if_snd, ALTQT_CODEL, a->altq_disc, 88287009Sloos codel_enqueue, codel_dequeue, codel_request, NULL, NULL)); 89287009Sloos} 90287009Sloos 91287009Sloosint 92287009Slooscodel_add_altq(struct pf_altq *a) 93287009Sloos{ 94287009Sloos struct codel_if *cif; 95287009Sloos struct ifnet *ifp; 96287009Sloos struct codel_opts *opts; 97287009Sloos 98287009Sloos if ((ifp = ifunit(a->ifname)) == NULL) 99287009Sloos return (EINVAL); 100287009Sloos if (!ALTQ_IS_READY(&ifp->if_snd)) 101287009Sloos return (ENODEV); 102287009Sloos 103287009Sloos opts = &a->pq_u.codel_opts; 104287009Sloos 105287009Sloos cif = malloc(sizeof(struct codel_if), M_DEVBUF, M_NOWAIT | M_ZERO); 106287009Sloos if (cif == NULL) 107287009Sloos return (ENOMEM); 108287009Sloos cif->cif_bandwidth = a->ifbandwidth; 109287009Sloos cif->cif_ifq = &ifp->if_snd; 110287009Sloos 111287009Sloos cif->cl_q = malloc(sizeof(class_queue_t), M_DEVBUF, M_NOWAIT | M_ZERO); 112287009Sloos if (cif->cl_q == NULL) { 113287009Sloos free(cif, M_DEVBUF); 114287009Sloos return (ENOMEM); 115287009Sloos } 116287009Sloos 117287009Sloos if (a->qlimit == 0) 118287009Sloos a->qlimit = 50; /* use default. */ 119287009Sloos qlimit(cif->cl_q) = a->qlimit; 120287009Sloos qtype(cif->cl_q) = Q_CODEL; 121287009Sloos qlen(cif->cl_q) = 0; 122287009Sloos qsize(cif->cl_q) = 0; 123287009Sloos 124287009Sloos if (opts->target == 0) 125287009Sloos opts->target = 5; 126287009Sloos if (opts->interval == 0) 127287009Sloos opts->interval = 100; 128287009Sloos cif->codel.params.target = machclk_freq * opts->target / 1000; 129287009Sloos cif->codel.params.interval = machclk_freq * opts->interval / 1000; 130287009Sloos cif->codel.params.ecn = opts->ecn; 131287009Sloos cif->codel.stats.maxpacket = 256; 132287009Sloos 133287009Sloos cif->cl_stats.qlength = qlen(cif->cl_q); 134287009Sloos cif->cl_stats.qlimit = qlimit(cif->cl_q); 135287009Sloos 136287009Sloos /* keep the state in pf_altq */ 137287009Sloos a->altq_disc = cif; 138287009Sloos 139287009Sloos return (0); 140287009Sloos} 141287009Sloos 142287009Sloosint 143287009Slooscodel_remove_altq(struct pf_altq *a) 144287009Sloos{ 145287009Sloos struct codel_if *cif; 146287009Sloos 147287009Sloos if ((cif = a->altq_disc) == NULL) 148287009Sloos return (EINVAL); 149287009Sloos a->altq_disc = NULL; 150287009Sloos 151287009Sloos if (cif->cl_q) 152287009Sloos free(cif->cl_q, M_DEVBUF); 153287009Sloos free(cif, M_DEVBUF); 154287009Sloos 155287009Sloos return (0); 156287009Sloos} 157287009Sloos 158287009Sloosint 159287009Slooscodel_getqstats(struct pf_altq *a, void *ubuf, int *nbytes) 160287009Sloos{ 161287009Sloos struct codel_if *cif; 162287009Sloos struct codel_ifstats stats; 163287009Sloos int error = 0; 164287009Sloos 165287009Sloos if ((cif = altq_lookup(a->ifname, ALTQT_CODEL)) == NULL) 166287009Sloos return (EBADF); 167287009Sloos 168287009Sloos if (*nbytes < sizeof(stats)) 169287009Sloos return (EINVAL); 170287009Sloos 171287009Sloos stats = cif->cl_stats; 172287009Sloos stats.stats = cif->codel.stats; 173287009Sloos 174287009Sloos if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0) 175287009Sloos return (error); 176287009Sloos *nbytes = sizeof(stats); 177287009Sloos 178287009Sloos return (0); 179287009Sloos} 180287009Sloos 181287009Sloosstatic int 182287009Slooscodel_request(struct ifaltq *ifq, int req, void *arg) 183287009Sloos{ 184287009Sloos struct codel_if *cif = (struct codel_if *)ifq->altq_disc; 185287009Sloos struct mbuf *m; 186287009Sloos 187287009Sloos IFQ_LOCK_ASSERT(ifq); 188287009Sloos 189287009Sloos switch (req) { 190287009Sloos case ALTRQ_PURGE: 191287009Sloos if (!ALTQ_IS_ENABLED(cif->cif_ifq)) 192287009Sloos break; 193287009Sloos 194287009Sloos if (qempty(cif->cl_q)) 195287009Sloos break; 196287009Sloos 197287009Sloos while ((m = _getq(cif->cl_q)) != NULL) { 198287009Sloos PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); 199287009Sloos m_freem(m); 200287009Sloos IFQ_DEC_LEN(cif->cif_ifq); 201287009Sloos } 202287009Sloos cif->cif_ifq->ifq_len = 0; 203287009Sloos break; 204287009Sloos } 205287009Sloos 206287009Sloos return (0); 207287009Sloos} 208287009Sloos 209287009Sloosstatic int 210287009Slooscodel_enqueue(struct ifaltq *ifq, struct mbuf *m, struct altq_pktattr *pktattr) 211287009Sloos{ 212287009Sloos 213287009Sloos struct codel_if *cif = (struct codel_if *) ifq->altq_disc; 214287009Sloos 215287009Sloos IFQ_LOCK_ASSERT(ifq); 216287009Sloos 217287009Sloos /* grab class set by classifier */ 218287009Sloos if ((m->m_flags & M_PKTHDR) == 0) { 219287009Sloos /* should not happen */ 220287009Sloos printf("altq: packet for %s does not have pkthdr\n", 221287009Sloos ifq->altq_ifp->if_xname); 222287009Sloos m_freem(m); 223287009Sloos PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); 224287009Sloos return (ENOBUFS); 225287009Sloos } 226287009Sloos 227287009Sloos if (codel_addq(&cif->codel, cif->cl_q, m)) { 228287009Sloos PKTCNTR_ADD(&cif->cl_stats.cl_dropcnt, m_pktlen(m)); 229287009Sloos return (ENOBUFS); 230287009Sloos } 231287009Sloos IFQ_INC_LEN(ifq); 232287009Sloos 233287009Sloos return (0); 234287009Sloos} 235287009Sloos 236287009Sloosstatic struct mbuf * 237287009Slooscodel_dequeue(struct ifaltq *ifq, int op) 238287009Sloos{ 239287009Sloos struct codel_if *cif = (struct codel_if *)ifq->altq_disc; 240287009Sloos struct mbuf *m; 241287009Sloos 242287009Sloos IFQ_LOCK_ASSERT(ifq); 243287009Sloos 244287009Sloos if (IFQ_IS_EMPTY(ifq)) 245287009Sloos return (NULL); 246287009Sloos 247287009Sloos if (op == ALTDQ_POLL) 248287009Sloos return (qhead(cif->cl_q)); 249287009Sloos 250287009Sloos 251287009Sloos m = codel_getq(&cif->codel, cif->cl_q); 252287009Sloos if (m != NULL) { 253287009Sloos IFQ_DEC_LEN(ifq); 254287009Sloos PKTCNTR_ADD(&cif->cl_stats.cl_xmitcnt, m_pktlen(m)); 255287009Sloos return (m); 256287009Sloos } 257287009Sloos 258287009Sloos return (NULL); 259287009Sloos} 260287009Sloos 261287009Sloosstruct codel * 262287009Slooscodel_alloc(int target, int interval, int ecn) 263287009Sloos{ 264287009Sloos struct codel *c; 265287009Sloos 266287009Sloos c = malloc(sizeof(*c), M_DEVBUF, M_NOWAIT | M_ZERO); 267287009Sloos if (c != NULL) { 268287009Sloos c->params.target = machclk_freq * target / 1000; 269287009Sloos c->params.interval = machclk_freq * interval / 1000; 270287009Sloos c->params.ecn = ecn; 271287009Sloos c->stats.maxpacket = 256; 272287009Sloos } 273287009Sloos 274287009Sloos return (c); 275287009Sloos} 276287009Sloos 277287009Sloosvoid 278287009Slooscodel_destroy(struct codel *c) 279287009Sloos{ 280287009Sloos 281287009Sloos free(c, M_DEVBUF); 282287009Sloos} 283287009Sloos 284287009Sloos#define MTAG_CODEL 1438031249 285287009Sloosint 286287009Slooscodel_addq(struct codel *c, class_queue_t *q, struct mbuf *m) 287287009Sloos{ 288287009Sloos struct m_tag *mtag; 289287009Sloos uint64_t *enqueue_time; 290287009Sloos 291287009Sloos if (qlen(q) < qlimit(q)) { 292287009Sloos mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); 293287009Sloos if (mtag == NULL) 294287009Sloos mtag = m_tag_alloc(MTAG_CODEL, 0, sizeof(uint64_t), 295287009Sloos M_NOWAIT); 296287009Sloos if (mtag == NULL) { 297287009Sloos m_freem(m); 298287009Sloos return (-1); 299287009Sloos } 300287009Sloos enqueue_time = (uint64_t *)(mtag + 1); 301287009Sloos *enqueue_time = read_machclk(); 302287009Sloos m_tag_prepend(m, mtag); 303287009Sloos _addq(q, m); 304287009Sloos return (0); 305287009Sloos } 306287009Sloos c->drop_overlimit++; 307287009Sloos m_freem(m); 308287009Sloos 309287009Sloos return (-1); 310287009Sloos} 311287009Sloos 312287009Sloosstatic int 313287009Slooscodel_should_drop(struct codel *c, class_queue_t *q, struct mbuf *m, 314287009Sloos u_int64_t now) 315287009Sloos{ 316287009Sloos struct m_tag *mtag; 317287009Sloos uint64_t *enqueue_time; 318287009Sloos 319287009Sloos if (m == NULL) { 320287009Sloos c->vars.first_above_time = 0; 321287009Sloos return (0); 322287009Sloos } 323287009Sloos 324287009Sloos mtag = m_tag_locate(m, MTAG_CODEL, 0, NULL); 325287009Sloos if (mtag == NULL) { 326287009Sloos /* Only one warning per second. */ 327287009Sloos if (ppsratecheck(&c->last_log, &c->last_pps, 1)) 328287009Sloos printf("%s: could not found the packet mtag!\n", 329287009Sloos __func__); 330287009Sloos c->vars.first_above_time = 0; 331287009Sloos return (0); 332287009Sloos } 333287009Sloos enqueue_time = (uint64_t *)(mtag + 1); 334287009Sloos c->vars.ldelay = now - *enqueue_time; 335287009Sloos c->stats.maxpacket = MAX(c->stats.maxpacket, m_pktlen(m)); 336287009Sloos 337287009Sloos if (codel_time_before(c->vars.ldelay, c->params.target) || 338287009Sloos qsize(q) <= c->stats.maxpacket) { 339287009Sloos /* went below - stay below for at least interval */ 340287009Sloos c->vars.first_above_time = 0; 341287009Sloos return (0); 342287009Sloos } 343287009Sloos if (c->vars.first_above_time == 0) { 344287009Sloos /* just went above from below. If we stay above 345287009Sloos * for at least interval we'll say it's ok to drop 346287009Sloos */ 347287009Sloos c->vars.first_above_time = now + c->params.interval; 348287009Sloos return (0); 349287009Sloos } 350287009Sloos if (codel_time_after(now, c->vars.first_above_time)) 351287009Sloos return (1); 352287009Sloos 353287009Sloos return (0); 354287009Sloos} 355287009Sloos 356287009Sloos/* 357287009Sloos * Run a Newton method step: 358287009Sloos * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2) 359287009Sloos * 360287009Sloos * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32 361287009Sloos */ 362287009Sloosstatic void 363287009Slooscodel_Newton_step(struct codel_vars *vars) 364287009Sloos{ 365287009Sloos uint32_t invsqrt, invsqrt2; 366287009Sloos uint64_t val; 367287009Sloos 368287009Sloos/* sizeof_in_bits(rec_inv_sqrt) */ 369287009Sloos#define REC_INV_SQRT_BITS (8 * sizeof(u_int16_t)) 370287009Sloos/* needed shift to get a Q0.32 number from rec_inv_sqrt */ 371287009Sloos#define REC_INV_SQRT_SHIFT (32 - REC_INV_SQRT_BITS) 372287009Sloos 373287009Sloos invsqrt = ((u_int32_t)vars->rec_inv_sqrt) << REC_INV_SQRT_SHIFT; 374287009Sloos invsqrt2 = ((u_int64_t)invsqrt * invsqrt) >> 32; 375287009Sloos val = (3LL << 32) - ((u_int64_t)vars->count * invsqrt2); 376287009Sloos val >>= 2; /* avoid overflow in following multiply */ 377287009Sloos val = (val * invsqrt) >> (32 - 2 + 1); 378287009Sloos 379287009Sloos vars->rec_inv_sqrt = val >> REC_INV_SQRT_SHIFT; 380287009Sloos} 381287009Sloos 382287009Sloosstatic u_int64_t 383287009Slooscodel_control_law(u_int64_t t, u_int64_t interval, u_int32_t rec_inv_sqrt) 384287009Sloos{ 385287009Sloos 386287009Sloos return (t + (u_int32_t)(((u_int64_t)interval * 387287009Sloos (rec_inv_sqrt << REC_INV_SQRT_SHIFT)) >> 32)); 388287009Sloos} 389287009Sloos 390287009Sloosstruct mbuf * 391287009Slooscodel_getq(struct codel *c, class_queue_t *q) 392287009Sloos{ 393287009Sloos struct mbuf *m; 394287009Sloos u_int64_t now; 395287009Sloos int drop; 396287009Sloos 397287009Sloos if ((m = _getq(q)) == NULL) { 398287009Sloos c->vars.dropping = 0; 399287009Sloos return (m); 400287009Sloos } 401287009Sloos 402287009Sloos now = read_machclk(); 403287009Sloos drop = codel_should_drop(c, q, m, now); 404287009Sloos if (c->vars.dropping) { 405287009Sloos if (!drop) { 406287009Sloos /* sojourn time below target - leave dropping state */ 407287009Sloos c->vars.dropping = 0; 408287009Sloos } else if (codel_time_after_eq(now, c->vars.drop_next)) { 409287009Sloos /* It's time for the next drop. Drop the current 410287009Sloos * packet and dequeue the next. The dequeue might 411287009Sloos * take us out of dropping state. 412287009Sloos * If not, schedule the next drop. 413287009Sloos * A large backlog might result in drop rates so high 414287009Sloos * that the next drop should happen now, 415287009Sloos * hence the while loop. 416287009Sloos */ 417287009Sloos while (c->vars.dropping && 418287009Sloos codel_time_after_eq(now, c->vars.drop_next)) { 419287009Sloos c->vars.count++; /* don't care of possible wrap 420287009Sloos * since there is no more 421287009Sloos * divide */ 422287009Sloos codel_Newton_step(&c->vars); 423287009Sloos /* TODO ECN */ 424287009Sloos PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); 425287009Sloos m_freem(m); 426287009Sloos m = _getq(q); 427287009Sloos if (!codel_should_drop(c, q, m, now)) 428287009Sloos /* leave dropping state */ 429287009Sloos c->vars.dropping = 0; 430287009Sloos else 431287009Sloos /* and schedule the next drop */ 432287009Sloos c->vars.drop_next = 433287009Sloos codel_control_law(c->vars.drop_next, 434287009Sloos c->params.interval, 435287009Sloos c->vars.rec_inv_sqrt); 436287009Sloos } 437287009Sloos } 438287009Sloos } else if (drop) { 439287009Sloos /* TODO ECN */ 440287009Sloos PKTCNTR_ADD(&c->stats.drop_cnt, m_pktlen(m)); 441287009Sloos m_freem(m); 442287009Sloos 443287009Sloos m = _getq(q); 444287009Sloos drop = codel_should_drop(c, q, m, now); 445287009Sloos 446287009Sloos c->vars.dropping = 1; 447287009Sloos /* if min went above target close to when we last went below it 448287009Sloos * assume that the drop rate that controlled the queue on the 449287009Sloos * last cycle is a good starting point to control it now. 450287009Sloos */ 451287009Sloos if (codel_time_before(now - c->vars.drop_next, 452287009Sloos 16 * c->params.interval)) { 453287009Sloos c->vars.count = (c->vars.count - c->vars.lastcount) | 1; 454287009Sloos /* we dont care if rec_inv_sqrt approximation 455287009Sloos * is not very precise : 456287009Sloos * Next Newton steps will correct it quadratically. 457287009Sloos */ 458287009Sloos codel_Newton_step(&c->vars); 459287009Sloos } else { 460287009Sloos c->vars.count = 1; 461287009Sloos c->vars.rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; 462287009Sloos } 463287009Sloos c->vars.lastcount = c->vars.count; 464287009Sloos c->vars.drop_next = codel_control_law(now, c->params.interval, 465287009Sloos c->vars.rec_inv_sqrt); 466287009Sloos } 467287009Sloos 468287009Sloos return (m); 469287009Sloos} 470287009Sloos 471287009Sloosvoid 472287009Slooscodel_getstats(struct codel *c, struct codel_stats *s) 473287009Sloos{ 474287009Sloos *s = c->stats; 475287009Sloos} 476287009Sloos 477287009Sloos#endif /* ALTQ_CODEL */ 478