1204591Sluigi/* 2204591Sluigi * Copyright (c) 2010 Riccardo Panicucci, Universita` di Pisa 3204591Sluigi * Copyright (c) 2000-2002 Luigi Rizzo, Universita` di Pisa 4204591Sluigi * All rights reserved 5204591Sluigi * 6204591Sluigi * Redistribution and use in source and binary forms, with or without 7204591Sluigi * modification, are permitted provided that the following conditions 8204591Sluigi * are met: 9204591Sluigi * 1. Redistributions of source code must retain the above copyright 10204591Sluigi * notice, this list of conditions and the following disclaimer. 11204591Sluigi * 2. Redistributions in binary form must reproduce the above copyright 12204591Sluigi * notice, this list of conditions and the following disclaimer in the 13204591Sluigi * documentation and/or other materials provided with the distribution. 14204591Sluigi * 15204591Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16204591Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17204591Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18204591Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19204591Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20204591Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21204591Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22204591Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23204591Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24204591Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25204591Sluigi * SUCH DAMAGE. 26204591Sluigi */ 27204591Sluigi 28204591Sluigi/* 29204591Sluigi * $FreeBSD: stable/11/sys/netpfil/ipfw/dn_sched_wf2q.c 325730 2017-11-12 01:26:43Z truckman $ 30204591Sluigi */ 31204591Sluigi 32204591Sluigi#ifdef _KERNEL 33204591Sluigi#include <sys/malloc.h> 34204591Sluigi#include <sys/socket.h> 35204591Sluigi#include <sys/socketvar.h> 36204591Sluigi#include <sys/kernel.h> 37325730Struckman#include <sys/lock.h> 38204591Sluigi#include <sys/mbuf.h> 39204591Sluigi#include <sys/module.h> 40325730Struckman#include <sys/rwlock.h> 41204591Sluigi#include <net/if.h> /* IFNAMSIZ */ 42204591Sluigi#include <netinet/in.h> 43204591Sluigi#include <netinet/ip_var.h> /* ipfw_rule_ref */ 44204591Sluigi#include <netinet/ip_fw.h> /* flow_id */ 45204591Sluigi#include <netinet/ip_dummynet.h> 46325730Struckman#include <netpfil/ipfw/ip_fw_private.h> 47240494Sglebius#include <netpfil/ipfw/dn_heap.h> 48240494Sglebius#include <netpfil/ipfw/ip_dn_private.h> 49300779Struckman#ifdef NEW_AQM 50300779Struckman#include <netpfil/ipfw/dn_aqm.h> 51300779Struckman#endif 52240494Sglebius#include <netpfil/ipfw/dn_sched.h> 53204591Sluigi#else 54204591Sluigi#include <dn_test.h> 55204591Sluigi#endif 56204591Sluigi 57204591Sluigi#ifndef MAX64 58204591Sluigi#define MAX64(x,y) (( (int64_t) ( (y)-(x) )) > 0 ) ? (y) : (x) 59204591Sluigi#endif 60204591Sluigi 61204591Sluigi/* 62204591Sluigi * timestamps are computed on 64 bit using fixed point arithmetic. 63204591Sluigi * LMAX_BITS, WMAX_BITS are the max number of bits for the packet len 64204591Sluigi * and sum of weights, respectively. FRAC_BITS is the number of 65204591Sluigi * fractional bits. We want FRAC_BITS >> WMAX_BITS to avoid too large 66204591Sluigi * errors when computing the inverse, FRAC_BITS < 32 so we can do 1/w 67204591Sluigi * using an unsigned 32-bit division, and to avoid wraparounds we need 68204591Sluigi * LMAX_BITS + WMAX_BITS + FRAC_BITS << 64 69204591Sluigi * As an example 70204591Sluigi * FRAC_BITS = 26, LMAX_BITS=14, WMAX_BITS = 19 71204591Sluigi */ 72204591Sluigi#ifndef FRAC_BITS 73204591Sluigi#define FRAC_BITS 28 /* shift for fixed point arithmetic */ 74204591Sluigi#define ONE_FP (1UL << FRAC_BITS) 75204591Sluigi#endif 76204591Sluigi 77204591Sluigi/* 78204591Sluigi * Private information for the scheduler instance: 79204591Sluigi * sch_heap (key is Finish time) returns the next queue to serve 80204591Sluigi * ne_heap (key is Start time) stores not-eligible queues 81204591Sluigi * idle_heap (key=start/finish time) stores idle flows. It must 82204591Sluigi * support extract-from-middle. 83204591Sluigi * A flow is only in 1 of the three heaps. 84204591Sluigi * XXX todo: use a more efficient data structure, e.g. a tree sorted 85204591Sluigi * by F with min_subtree(S) in each node 86204591Sluigi */ 87204591Sluigistruct wf2qp_si { 88204591Sluigi struct dn_heap sch_heap; /* top extract - key Finish time */ 89204591Sluigi struct dn_heap ne_heap; /* top extract - key Start time */ 90204591Sluigi struct dn_heap idle_heap; /* random extract - key Start=Finish time */ 91204591Sluigi uint64_t V; /* virtual time */ 92204591Sluigi uint32_t inv_wsum; /* inverse of sum of weights */ 93204591Sluigi uint32_t wsum; /* sum of weights */ 94204591Sluigi}; 95204591Sluigi 96204591Sluigistruct wf2qp_queue { 97204591Sluigi struct dn_queue _q; 98204591Sluigi uint64_t S, F; /* start time, finish time */ 99204591Sluigi uint32_t inv_w; /* ONE_FP / weight */ 100204591Sluigi int32_t heap_pos; /* position (index) of struct in heap */ 101204591Sluigi}; 102204591Sluigi 103204591Sluigi/* 104204591Sluigi * This file implements a WF2Q+ scheduler as it has been in dummynet 105204591Sluigi * since 2000. 106204591Sluigi * The scheduler supports per-flow queues and has O(log N) complexity. 107204591Sluigi * 108204591Sluigi * WF2Q+ needs to drain entries from the idle heap so that we 109204591Sluigi * can keep the sum of weights up to date. We can do it whenever 110204591Sluigi * we get a chance, or periodically, or following some other 111204591Sluigi * strategy. The function idle_check() drains at most N elements 112204591Sluigi * from the idle heap. 113204591Sluigi */ 114204591Sluigistatic void 115204591Sluigiidle_check(struct wf2qp_si *si, int n, int force) 116204591Sluigi{ 117204591Sluigi struct dn_heap *h = &si->idle_heap; 118204591Sluigi while (n-- > 0 && h->elements > 0 && 119204591Sluigi (force || DN_KEY_LT(HEAP_TOP(h)->key, si->V))) { 120204591Sluigi struct dn_queue *q = HEAP_TOP(h)->object; 121204591Sluigi struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q; 122204591Sluigi 123204591Sluigi heap_extract(h, NULL); 124204591Sluigi /* XXX to let the flowset delete the queue we should 125204591Sluigi * mark it as 'unused' by the scheduler. 126204591Sluigi */ 127204591Sluigi alg_fq->S = alg_fq->F + 1; /* Mark timestamp as invalid. */ 128204591Sluigi si->wsum -= q->fs->fs.par[0]; /* adjust sum of weights */ 129204591Sluigi if (si->wsum > 0) 130204591Sluigi si->inv_wsum = ONE_FP/si->wsum; 131204591Sluigi } 132204591Sluigi} 133204591Sluigi 134206845Sluigistatic int 135204591Sluigiwf2qp_enqueue(struct dn_sch_inst *_si, struct dn_queue *q, struct mbuf *m) 136204591Sluigi{ 137204591Sluigi struct dn_fsk *fs = q->fs; 138204591Sluigi struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1); 139204591Sluigi struct wf2qp_queue *alg_fq; 140204591Sluigi uint64_t len = m->m_pkthdr.len; 141204591Sluigi 142204591Sluigi if (m != q->mq.head) { 143204591Sluigi if (dn_enqueue(q, m, 0)) /* packet was dropped */ 144204591Sluigi return 1; 145204591Sluigi if (m != q->mq.head) /* queue was already busy */ 146204591Sluigi return 0; 147204591Sluigi } 148204591Sluigi 149206845Sluigi /* If reach this point, queue q was idle */ 150204591Sluigi alg_fq = (struct wf2qp_queue *)q; 151204591Sluigi 152204591Sluigi if (DN_KEY_LT(alg_fq->F, alg_fq->S)) { 153204591Sluigi /* F<S means timestamps are invalid ->brand new queue. */ 154204591Sluigi alg_fq->S = si->V; /* init start time */ 155204591Sluigi si->wsum += fs->fs.par[0]; /* add weight of new queue. */ 156204591Sluigi si->inv_wsum = ONE_FP/si->wsum; 157204591Sluigi } else { /* if it was idle then it was in the idle heap */ 158204591Sluigi heap_extract(&si->idle_heap, q); 159204591Sluigi alg_fq->S = MAX64(alg_fq->F, si->V); /* compute new S */ 160204591Sluigi } 161204591Sluigi alg_fq->F = alg_fq->S + len * alg_fq->inv_w; 162204591Sluigi 163204591Sluigi /* if nothing is backlogged, make sure this flow is eligible */ 164204591Sluigi if (si->ne_heap.elements == 0 && si->sch_heap.elements == 0) 165204591Sluigi si->V = MAX64(alg_fq->S, si->V); 166204591Sluigi 167204591Sluigi /* 168204591Sluigi * Look at eligibility. A flow is not eligibile if S>V (when 169204591Sluigi * this happens, it means that there is some other flow already 170204591Sluigi * scheduled for the same pipe, so the sch_heap cannot be 171204591Sluigi * empty). If the flow is not eligible we just store it in the 172204591Sluigi * ne_heap. Otherwise, we store in the sch_heap. 173204591Sluigi * Note that for all flows in sch_heap (SCH), S_i <= V, 174204591Sluigi * and for all flows in ne_heap (NEH), S_i > V. 175204591Sluigi * So when we need to compute max(V, min(S_i)) forall i in 176204591Sluigi * SCH+NEH, we only need to look into NEH. 177204591Sluigi */ 178204591Sluigi if (DN_KEY_LT(si->V, alg_fq->S)) { 179204591Sluigi /* S>V means flow Not eligible. */ 180204591Sluigi if (si->sch_heap.elements == 0) 181204591Sluigi D("++ ouch! not eligible but empty scheduler!"); 182204591Sluigi heap_insert(&si->ne_heap, alg_fq->S, q); 183204591Sluigi } else { 184204591Sluigi heap_insert(&si->sch_heap, alg_fq->F, q); 185204591Sluigi } 186204591Sluigi return 0; 187204591Sluigi} 188204591Sluigi 189204591Sluigi/* XXX invariant: sch > 0 || V >= min(S in neh) */ 190204591Sluigistatic struct mbuf * 191204591Sluigiwf2qp_dequeue(struct dn_sch_inst *_si) 192204591Sluigi{ 193204591Sluigi /* Access scheduler instance private data */ 194204591Sluigi struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1); 195204591Sluigi struct mbuf *m; 196204591Sluigi struct dn_queue *q; 197204591Sluigi struct dn_heap *sch = &si->sch_heap; 198204591Sluigi struct dn_heap *neh = &si->ne_heap; 199204591Sluigi struct wf2qp_queue *alg_fq; 200204591Sluigi 201204591Sluigi if (sch->elements == 0 && neh->elements == 0) { 202204591Sluigi /* we have nothing to do. We could kill the idle heap 203204591Sluigi * altogether and reset V 204204591Sluigi */ 205204591Sluigi idle_check(si, 0x7fffffff, 1); 206204591Sluigi si->V = 0; 207204591Sluigi si->wsum = 0; /* should be set already */ 208204591Sluigi return NULL; /* quick return if nothing to do */ 209204591Sluigi } 210204591Sluigi idle_check(si, 1, 0); /* drain something from the idle heap */ 211204591Sluigi 212204591Sluigi /* make sure at least one element is eligible, bumping V 213204591Sluigi * and moving entries that have become eligible. 214204591Sluigi * We need to repeat the first part twice, before and 215204591Sluigi * after extracting the candidate, or enqueue() will 216204591Sluigi * find the data structure in a wrong state. 217204591Sluigi */ 218204591Sluigi m = NULL; 219204591Sluigi for(;;) { 220204591Sluigi /* 221204591Sluigi * Compute V = max(V, min(S_i)). Remember that all elements 222204591Sluigi * in sch have by definition S_i <= V so if sch is not empty, 223204591Sluigi * V is surely the max and we must not update it. Conversely, 224204591Sluigi * if sch is empty we only need to look at neh. 225204591Sluigi * We don't need to move the queues, as it will be done at the 226204591Sluigi * next enqueue 227204591Sluigi */ 228204591Sluigi if (sch->elements == 0 && neh->elements > 0) { 229204591Sluigi si->V = MAX64(si->V, HEAP_TOP(neh)->key); 230204591Sluigi } 231204591Sluigi while (neh->elements > 0 && 232204591Sluigi DN_KEY_LEQ(HEAP_TOP(neh)->key, si->V)) { 233204591Sluigi q = HEAP_TOP(neh)->object; 234204591Sluigi alg_fq = (struct wf2qp_queue *)q; 235204591Sluigi heap_extract(neh, NULL); 236204591Sluigi heap_insert(sch, alg_fq->F, q); 237204591Sluigi } 238204591Sluigi if (m) /* pkt found in previous iteration */ 239204591Sluigi break; 240204591Sluigi /* ok we have at least one eligible pkt */ 241204591Sluigi q = HEAP_TOP(sch)->object; 242204591Sluigi alg_fq = (struct wf2qp_queue *)q; 243204591Sluigi m = dn_dequeue(q); 244204591Sluigi heap_extract(sch, NULL); /* Remove queue from heap. */ 245204591Sluigi si->V += (uint64_t)(m->m_pkthdr.len) * si->inv_wsum; 246204591Sluigi alg_fq->S = alg_fq->F; /* Update start time. */ 247204591Sluigi if (q->mq.head == 0) { /* not backlogged any more. */ 248204591Sluigi heap_insert(&si->idle_heap, alg_fq->F, q); 249204591Sluigi } else { /* Still backlogged. */ 250204591Sluigi /* Update F, store in neh or sch */ 251204591Sluigi uint64_t len = q->mq.head->m_pkthdr.len; 252204591Sluigi alg_fq->F += len * alg_fq->inv_w; 253204591Sluigi if (DN_KEY_LEQ(alg_fq->S, si->V)) { 254204591Sluigi heap_insert(sch, alg_fq->F, q); 255204591Sluigi } else { 256204591Sluigi heap_insert(neh, alg_fq->S, q); 257204591Sluigi } 258204591Sluigi } 259204591Sluigi } 260204591Sluigi return m; 261204591Sluigi} 262204591Sluigi 263204591Sluigistatic int 264204591Sluigiwf2qp_new_sched(struct dn_sch_inst *_si) 265204591Sluigi{ 266204591Sluigi struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1); 267204591Sluigi int ofs = offsetof(struct wf2qp_queue, heap_pos); 268204591Sluigi 269204591Sluigi /* all heaps support extract from middle */ 270204591Sluigi if (heap_init(&si->idle_heap, 16, ofs) || 271204591Sluigi heap_init(&si->sch_heap, 16, ofs) || 272204591Sluigi heap_init(&si->ne_heap, 16, ofs)) { 273204591Sluigi heap_free(&si->ne_heap); 274204591Sluigi heap_free(&si->sch_heap); 275204591Sluigi heap_free(&si->idle_heap); 276204591Sluigi return ENOMEM; 277204591Sluigi } 278204591Sluigi return 0; 279204591Sluigi} 280204591Sluigi 281204591Sluigistatic int 282204591Sluigiwf2qp_free_sched(struct dn_sch_inst *_si) 283204591Sluigi{ 284204591Sluigi struct wf2qp_si *si = (struct wf2qp_si *)(_si + 1); 285204591Sluigi 286204591Sluigi heap_free(&si->sch_heap); 287204591Sluigi heap_free(&si->ne_heap); 288204591Sluigi heap_free(&si->idle_heap); 289204591Sluigi 290204591Sluigi return 0; 291204591Sluigi} 292204591Sluigi 293204591Sluigistatic int 294204591Sluigiwf2qp_new_fsk(struct dn_fsk *fs) 295204591Sluigi{ 296204591Sluigi ipdn_bound_var(&fs->fs.par[0], 1, 297204591Sluigi 1, 100, "WF2Q+ weight"); 298204591Sluigi return 0; 299204591Sluigi} 300204591Sluigi 301204591Sluigistatic int 302204591Sluigiwf2qp_new_queue(struct dn_queue *_q) 303204591Sluigi{ 304204591Sluigi struct wf2qp_queue *q = (struct wf2qp_queue *)_q; 305204591Sluigi 306204591Sluigi _q->ni.oid.subtype = DN_SCHED_WF2QP; 307204591Sluigi q->F = 0; /* not strictly necessary */ 308204591Sluigi q->S = q->F + 1; /* mark timestamp as invalid. */ 309204591Sluigi q->inv_w = ONE_FP / _q->fs->fs.par[0]; 310204591Sluigi if (_q->mq.head != NULL) { 311204591Sluigi wf2qp_enqueue(_q->_si, _q, _q->mq.head); 312204591Sluigi } 313204591Sluigi return 0; 314204591Sluigi} 315204591Sluigi 316204591Sluigi/* 317204591Sluigi * Called when the infrastructure removes a queue (e.g. flowset 318204591Sluigi * is reconfigured). Nothing to do if we did not 'own' the queue, 319204591Sluigi * otherwise remove it from the right heap and adjust the sum 320204591Sluigi * of weights. 321204591Sluigi */ 322204591Sluigistatic int 323204591Sluigiwf2qp_free_queue(struct dn_queue *q) 324204591Sluigi{ 325204591Sluigi struct wf2qp_queue *alg_fq = (struct wf2qp_queue *)q; 326204591Sluigi struct wf2qp_si *si = (struct wf2qp_si *)(q->_si + 1); 327213267Sluigi 328204591Sluigi if (alg_fq->S >= alg_fq->F + 1) 329204591Sluigi return 0; /* nothing to do, not in any heap */ 330204591Sluigi si->wsum -= q->fs->fs.par[0]; 331204591Sluigi if (si->wsum > 0) 332204591Sluigi si->inv_wsum = ONE_FP/si->wsum; 333204591Sluigi 334204591Sluigi /* extract from the heap. XXX TODO we may need to adjust V 335204591Sluigi * to make sure the invariants hold. 336204591Sluigi */ 337204591Sluigi if (q->mq.head == NULL) { 338204591Sluigi heap_extract(&si->idle_heap, q); 339204591Sluigi } else if (DN_KEY_LT(si->V, alg_fq->S)) { 340204591Sluigi heap_extract(&si->ne_heap, q); 341204591Sluigi } else { 342204591Sluigi heap_extract(&si->sch_heap, q); 343204591Sluigi } 344204591Sluigi return 0; 345204591Sluigi} 346204591Sluigi 347204591Sluigi/* 348204591Sluigi * WF2Q+ scheduler descriptor 349204591Sluigi * contains the type of the scheduler, the name, the size of the 350204591Sluigi * structures and function pointers. 351204591Sluigi */ 352204591Sluigistatic struct dn_alg wf2qp_desc = { 353204591Sluigi _SI( .type = ) DN_SCHED_WF2QP, 354204591Sluigi _SI( .name = ) "WF2Q+", 355204591Sluigi _SI( .flags = ) DN_MULTIQUEUE, 356204591Sluigi 357204591Sluigi /* we need extra space in the si and the queue */ 358204591Sluigi _SI( .schk_datalen = ) 0, 359204591Sluigi _SI( .si_datalen = ) sizeof(struct wf2qp_si), 360204591Sluigi _SI( .q_datalen = ) sizeof(struct wf2qp_queue) - 361204591Sluigi sizeof(struct dn_queue), 362204591Sluigi 363204591Sluigi _SI( .enqueue = ) wf2qp_enqueue, 364204591Sluigi _SI( .dequeue = ) wf2qp_dequeue, 365204591Sluigi 366204591Sluigi _SI( .config = ) NULL, 367204591Sluigi _SI( .destroy = ) NULL, 368204591Sluigi _SI( .new_sched = ) wf2qp_new_sched, 369204591Sluigi _SI( .free_sched = ) wf2qp_free_sched, 370206845Sluigi 371204591Sluigi _SI( .new_fsk = ) wf2qp_new_fsk, 372204591Sluigi _SI( .free_fsk = ) NULL, 373204591Sluigi 374204591Sluigi _SI( .new_queue = ) wf2qp_new_queue, 375204591Sluigi _SI( .free_queue = ) wf2qp_free_queue, 376300779Struckman#ifdef NEW_AQM 377300779Struckman _SI( .getconfig = ) NULL, 378300779Struckman#endif 379300779Struckman 380204591Sluigi}; 381204591Sluigi 382204591Sluigi 383204591SluigiDECLARE_DNSCHED_MODULE(dn_wf2qp, &wf2qp_desc); 384