1300779Struckman/* 2300779Struckman * PIE - Proportional Integral controller Enhanced AQM algorithm. 3300779Struckman * 4300779Struckman * $FreeBSD: stable/11/sys/netpfil/ipfw/dn_aqm_pie.c 318904 2017-05-25 22:39:48Z truckman $ 5300779Struckman * 6300779Struckman * Copyright (C) 2016 Centre for Advanced Internet Architectures, 7300779Struckman * Swinburne University of Technology, Melbourne, Australia. 8300779Struckman * Portions of this code were made possible in part by a gift from 9300779Struckman * The Comcast Innovation Fund. 10300779Struckman * Implemented by Rasool Al-Saadi <ralsaadi@swin.edu.au> 11300779Struckman * 12300779Struckman * Redistribution and use in source and binary forms, with or without 13300779Struckman * modification, are permitted provided that the following conditions 14300779Struckman * are met: 15300779Struckman * 1. Redistributions of source code must retain the above copyright 16300779Struckman * notice, this list of conditions and the following disclaimer. 17300779Struckman * 2. Redistributions in binary form must reproduce the above copyright 18300779Struckman * notice, this list of conditions and the following disclaimer in the 19300779Struckman * documentation and/or other materials provided with the distribution. 20300779Struckman * 21300779Struckman * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22300779Struckman * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23300779Struckman * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24300779Struckman * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 25300779Struckman * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26300779Struckman * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27300779Struckman * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28300779Struckman * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29300779Struckman * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30300779Struckman * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31300779Struckman * SUCH DAMAGE. 32300779Struckman */ 33300779Struckman 34300779Struckman#include <sys/cdefs.h> 35300779Struckman#include "opt_inet6.h" 36300779Struckman 37300779Struckman#include <sys/param.h> 38300779Struckman#include <sys/systm.h> 39300779Struckman#include <sys/malloc.h> 40300779Struckman#include <sys/mbuf.h> 41300779Struckman#include <sys/kernel.h> 42300779Struckman#include <sys/lock.h> 43300779Struckman#include <sys/module.h> 44300779Struckman#include <sys/mutex.h> 45300779Struckman#include <sys/priv.h> 46300779Struckman#include <sys/proc.h> 47300779Struckman#include <sys/rwlock.h> 48300779Struckman#include <sys/socket.h> 49300779Struckman#include <sys/time.h> 50300779Struckman#include <sys/sysctl.h> 51300779Struckman 52300779Struckman#include <net/if.h> /* IFNAMSIZ, struct ifaddr, ifq head, lock.h mutex.h */ 53300779Struckman#include <net/netisr.h> 54300779Struckman#include <net/vnet.h> 55300779Struckman 56300779Struckman#include <netinet/in.h> 57300779Struckman#include <netinet/ip.h> /* ip_len, ip_off */ 58300779Struckman#include <netinet/ip_var.h> /* ip_output(), IP_FORWARDING */ 59300779Struckman#include <netinet/ip_fw.h> 60300779Struckman#include <netinet/ip_dummynet.h> 61300779Struckman#include <netinet/if_ether.h> /* various ether_* routines */ 62300779Struckman#include <netinet/ip6.h> /* for ip6_input, ip6_output prototypes */ 63300779Struckman#include <netinet6/ip6_var.h> 64300779Struckman#include <netpfil/ipfw/dn_heap.h> 65300779Struckman 66300779Struckman#ifdef NEW_AQM 67300779Struckman#include <netpfil/ipfw/ip_fw_private.h> 68300779Struckman#include <netpfil/ipfw/ip_dn_private.h> 69300779Struckman#include <netpfil/ipfw/dn_aqm.h> 70300779Struckman#include <netpfil/ipfw/dn_aqm_pie.h> 71300779Struckman#include <netpfil/ipfw/dn_sched.h> 72300779Struckman 73300779Struckman/* for debugging */ 74300779Struckman#include <sys/syslog.h> 75300779Struckman 76300779Struckmanstatic struct dn_aqm pie_desc; 77300779Struckman 78300779Struckman/* PIE defaults 79300779Struckman * target=15ms, tupdate=15ms, max_burst=150ms, 80300779Struckman * max_ecnth=0.1, alpha=0.125, beta=1.25, 81300779Struckman */ 82300779Struckmanstruct dn_aqm_pie_parms pie_sysctl = 83300779Struckman { 15 * AQM_TIME_1MS, 15 * AQM_TIME_1MS, 150 * AQM_TIME_1MS, 84300779Struckman PIE_SCALE/10 , PIE_SCALE * 0.125, PIE_SCALE * 1.25 , 85300779Struckman PIE_CAPDROP_ENABLED | PIE_DEPRATEEST_ENABLED | PIE_DERAND_ENABLED }; 86300779Struckman 87300779Struckmanstatic int 88300779Struckmanpie_sysctl_alpha_beta_handler(SYSCTL_HANDLER_ARGS) 89300779Struckman{ 90300779Struckman int error; 91300779Struckman long value; 92300779Struckman 93300779Struckman if (!strcmp(oidp->oid_name,"alpha")) 94300779Struckman value = pie_sysctl.alpha; 95300779Struckman else 96300779Struckman value = pie_sysctl.beta; 97300779Struckman 98300779Struckman value = value * 1000 / PIE_SCALE; 99300779Struckman error = sysctl_handle_long(oidp, &value, 0, req); 100300779Struckman if (error != 0 || req->newptr == NULL) 101300779Struckman return (error); 102300779Struckman if (value < 1 || value > 7 * PIE_SCALE) 103300779Struckman return (EINVAL); 104300779Struckman value = (value * PIE_SCALE) / 1000; 105300779Struckman if (!strcmp(oidp->oid_name,"alpha")) 106300779Struckman pie_sysctl.alpha = value; 107300779Struckman else 108300779Struckman pie_sysctl.beta = value; 109300779Struckman return (0); 110300779Struckman} 111300779Struckman 112300779Struckmanstatic int 113300779Struckmanpie_sysctl_target_tupdate_maxb_handler(SYSCTL_HANDLER_ARGS) 114300779Struckman{ 115300779Struckman int error; 116300779Struckman long value; 117300779Struckman 118300779Struckman if (!strcmp(oidp->oid_name,"target")) 119300779Struckman value = pie_sysctl.qdelay_ref; 120300779Struckman else if (!strcmp(oidp->oid_name,"tupdate")) 121300779Struckman value = pie_sysctl.tupdate; 122300779Struckman else 123300779Struckman value = pie_sysctl.max_burst; 124300779Struckman 125300779Struckman value = value / AQM_TIME_1US; 126300779Struckman error = sysctl_handle_long(oidp, &value, 0, req); 127300779Struckman if (error != 0 || req->newptr == NULL) 128300779Struckman return (error); 129300779Struckman if (value < 1 || value > 10 * AQM_TIME_1S) 130300779Struckman return (EINVAL); 131300779Struckman value = value * AQM_TIME_1US; 132300779Struckman 133300779Struckman if (!strcmp(oidp->oid_name,"target")) 134300779Struckman pie_sysctl.qdelay_ref = value; 135300779Struckman else if (!strcmp(oidp->oid_name,"tupdate")) 136300779Struckman pie_sysctl.tupdate = value; 137300779Struckman else 138300779Struckman pie_sysctl.max_burst = value; 139300779Struckman return (0); 140300779Struckman} 141300779Struckman 142300779Struckmanstatic int 143300779Struckmanpie_sysctl_max_ecnth_handler(SYSCTL_HANDLER_ARGS) 144300779Struckman{ 145300779Struckman int error; 146300779Struckman long value; 147300779Struckman 148300779Struckman value = pie_sysctl.max_ecnth; 149300779Struckman value = value * 1000 / PIE_SCALE; 150300779Struckman error = sysctl_handle_long(oidp, &value, 0, req); 151300779Struckman if (error != 0 || req->newptr == NULL) 152300779Struckman return (error); 153300779Struckman if (value < 1 || value > PIE_SCALE) 154300779Struckman return (EINVAL); 155300779Struckman value = (value * PIE_SCALE) / 1000; 156300779Struckman pie_sysctl.max_ecnth = value; 157300779Struckman return (0); 158300779Struckman} 159300779Struckman 160300779Struckman/* define PIE sysctl variables */ 161300779StruckmanSYSBEGIN(f4) 162300779StruckmanSYSCTL_DECL(_net_inet); 163300779StruckmanSYSCTL_DECL(_net_inet_ip); 164300779StruckmanSYSCTL_DECL(_net_inet_ip_dummynet); 165300779Struckmanstatic SYSCTL_NODE(_net_inet_ip_dummynet, OID_AUTO, 166300779Struckman pie, CTLFLAG_RW, 0, "PIE"); 167300779Struckman 168300779Struckman#ifdef SYSCTL_NODE 169300779StruckmanSYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, target, 170300779Struckman CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, 171300779Struckman pie_sysctl_target_tupdate_maxb_handler, "L", 172300779Struckman "queue target in microsecond"); 173300779StruckmanSYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, tupdate, 174300779Struckman CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, 175300779Struckman pie_sysctl_target_tupdate_maxb_handler, "L", 176300779Struckman "the frequency of drop probability calculation in microsecond"); 177300779StruckmanSYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, max_burst, 178300779Struckman CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, 179300779Struckman pie_sysctl_target_tupdate_maxb_handler, "L", 180300779Struckman "Burst allowance interval in microsecond"); 181300779Struckman 182300779StruckmanSYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, max_ecnth, 183300779Struckman CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, 184300779Struckman pie_sysctl_max_ecnth_handler, "L", 185300779Struckman "ECN safeguard threshold scaled by 1000"); 186300779Struckman 187300779StruckmanSYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, alpha, 188300779Struckman CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, 189300779Struckman pie_sysctl_alpha_beta_handler, "L", 190300779Struckman "PIE alpha scaled by 1000"); 191300779StruckmanSYSCTL_PROC(_net_inet_ip_dummynet_pie, OID_AUTO, beta, 192300779Struckman CTLTYPE_LONG | CTLFLAG_RW, NULL, 0, 193300779Struckman pie_sysctl_alpha_beta_handler, "L", 194300779Struckman "beta scaled by 1000"); 195300779Struckman#endif 196300779Struckman 197300779Struckman 198300779Struckman/* 199300779Struckman * Callout function for drop probability calculation 200300779Struckman * This function is called over tupdate ms and takes pointer of PIE 201300779Struckman * status variables as an argument 202300779Struckman */ 203300779Struckmanstatic void 204300779Struckmancalculate_drop_prob(void *x) 205300779Struckman{ 206300779Struckman int64_t p, prob, oldprob; 207300779Struckman struct dn_aqm_pie_parms *pprms; 208300779Struckman struct pie_status *pst = (struct pie_status *) x; 209318885Struckman int p_isneg; 210300779Struckman 211300779Struckman pprms = pst->parms; 212300779Struckman prob = pst->drop_prob; 213300779Struckman 214318904Struckman /* calculate current qdelay using DRE method. 215318904Struckman * If TS is used and no data in the queue, reset current_qdelay 216318904Struckman * as it stays at last value during dequeue process. 217318904Struckman */ 218318904Struckman if (pprms->flags & PIE_DEPRATEEST_ENABLED) 219300779Struckman pst->current_qdelay = ((uint64_t)pst->pq->ni.len_bytes * 220300779Struckman pst->avg_dq_time) >> PIE_DQ_THRESHOLD_BITS; 221318904Struckman else 222318904Struckman if (!pst->pq->ni.len_bytes) 223318904Struckman pst->current_qdelay = 0; 224300779Struckman 225300779Struckman /* calculate drop probability */ 226300779Struckman p = (int64_t)pprms->alpha * 227300779Struckman ((int64_t)pst->current_qdelay - (int64_t)pprms->qdelay_ref); 228300779Struckman p +=(int64_t) pprms->beta * 229300779Struckman ((int64_t)pst->current_qdelay - (int64_t)pst->qdelay_old); 230318885Struckman 231318885Struckman /* take absolute value so right shift result is well defined */ 232318885Struckman p_isneg = p < 0; 233318885Struckman if (p_isneg) { 234318885Struckman p = -p; 235318885Struckman } 236300779Struckman 237300779Struckman /* We PIE_MAX_PROB shift by 12-bits to increase the division precision */ 238300779Struckman p *= (PIE_MAX_PROB << 12) / AQM_TIME_1S; 239300779Struckman 240300779Struckman /* auto-tune drop probability */ 241301162Struckman if (prob < (PIE_MAX_PROB / 1000000)) /* 0.000001 */ 242301162Struckman p >>= 11 + PIE_FIX_POINT_BITS + 12; 243301162Struckman else if (prob < (PIE_MAX_PROB / 100000)) /* 0.00001 */ 244301162Struckman p >>= 9 + PIE_FIX_POINT_BITS + 12; 245301162Struckman else if (prob < (PIE_MAX_PROB / 10000)) /* 0.0001 */ 246301162Struckman p >>= 7 + PIE_FIX_POINT_BITS + 12; 247301162Struckman else if (prob < (PIE_MAX_PROB / 1000)) /* 0.001 */ 248301162Struckman p >>= 5 + PIE_FIX_POINT_BITS + 12; 249301162Struckman else if (prob < (PIE_MAX_PROB / 100)) /* 0.01 */ 250301162Struckman p >>= 3 + PIE_FIX_POINT_BITS + 12; 251301162Struckman else if (prob < (PIE_MAX_PROB / 10)) /* 0.1 */ 252301162Struckman p >>= 1 + PIE_FIX_POINT_BITS + 12; 253300779Struckman else 254301162Struckman p >>= PIE_FIX_POINT_BITS + 12; 255300779Struckman 256300779Struckman oldprob = prob; 257300779Struckman 258318885Struckman if (p_isneg) { 259318885Struckman prob = prob - p; 260318885Struckman 261318885Struckman /* check for multiplication underflow */ 262318885Struckman if (prob > oldprob) { 263318885Struckman prob= 0; 264318885Struckman D("underflow"); 265318885Struckman } 266318885Struckman } else { 267318885Struckman /* Cap Drop adjustment */ 268318885Struckman if ((pprms->flags & PIE_CAPDROP_ENABLED) && 269318885Struckman prob >= PIE_MAX_PROB / 10 && 270318885Struckman p > PIE_MAX_PROB / 50 ) { 271300779Struckman p = PIE_MAX_PROB / 50; 272318885Struckman } 273300779Struckman 274318885Struckman prob = prob + p; 275300779Struckman 276318885Struckman /* check for multiplication overflow */ 277300779Struckman if (prob<oldprob) { 278300779Struckman D("overflow"); 279300779Struckman prob= PIE_MAX_PROB; 280300779Struckman } 281300779Struckman } 282300779Struckman 283318885Struckman /* 284318885Struckman * decay the drop probability exponentially 285318885Struckman * and restrict it to range 0 to PIE_MAX_PROB 286318885Struckman */ 287318885Struckman if (prob < 0) { 288300779Struckman prob = 0; 289318885Struckman } else { 290318885Struckman if (pst->current_qdelay == 0 && pst->qdelay_old == 0) { 291318885Struckman /* 0.98 ~= 1- 1/64 */ 292318885Struckman prob = prob - (prob >> 6); 293318885Struckman } 294300779Struckman 295318885Struckman if (prob > PIE_MAX_PROB) { 296318885Struckman prob = PIE_MAX_PROB; 297318885Struckman } 298318885Struckman } 299318885Struckman 300300779Struckman pst->drop_prob = prob; 301300779Struckman 302300779Struckman /* store current queue delay value in old queue delay*/ 303300779Struckman pst->qdelay_old = pst->current_qdelay; 304300779Struckman 305300779Struckman /* update burst allowance */ 306300779Struckman if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance>0) { 307300779Struckman 308300779Struckman if (pst->burst_allowance > pprms->tupdate ) 309300779Struckman pst->burst_allowance -= pprms->tupdate; 310300779Struckman else 311300779Struckman pst->burst_allowance = 0; 312300779Struckman } 313300779Struckman 314300779Struckman /* reschedule calculate_drop_prob function */ 315300779Struckman if (pst->sflags & PIE_ACTIVE) 316300779Struckman callout_reset_sbt(&pst->aqm_pie_callout, 317300779Struckman (uint64_t)pprms->tupdate * SBT_1US, 0, calculate_drop_prob, pst, 0); 318300779Struckman 319300779Struckman mtx_unlock(&pst->lock_mtx); 320300779Struckman} 321300779Struckman 322300779Struckman/* 323300779Struckman * Extract a packet from the head of queue 'q' 324300779Struckman * Return a packet or NULL if the queue is empty. 325300779Struckman * If getts is set, also extract packet's timestamp from mtag. 326300779Struckman */ 327300779Struckmanstatic struct mbuf * 328300779Struckmanpie_extract_head(struct dn_queue *q, aqm_time_t *pkt_ts, int getts) 329300779Struckman{ 330300779Struckman struct m_tag *mtag; 331300779Struckman struct mbuf *m = q->mq.head; 332300779Struckman 333300779Struckman if (m == NULL) 334300779Struckman return m; 335300779Struckman q->mq.head = m->m_nextpkt; 336300779Struckman 337300779Struckman /* Update stats */ 338300779Struckman update_stats(q, -m->m_pkthdr.len, 0); 339300779Struckman 340300779Struckman if (q->ni.length == 0) /* queue is now idle */ 341300779Struckman q->q_time = dn_cfg.curr_time; 342300779Struckman 343300779Struckman if (getts) { 344300779Struckman /* extract packet TS*/ 345300779Struckman mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL); 346300779Struckman if (mtag == NULL) { 347300779Struckman D("PIE timestamp mtag not found!"); 348300779Struckman *pkt_ts = 0; 349300779Struckman } else { 350300779Struckman *pkt_ts = *(aqm_time_t *)(mtag + 1); 351300779Struckman m_tag_delete(m,mtag); 352300779Struckman } 353300779Struckman } 354300779Struckman return m; 355300779Struckman} 356300779Struckman 357300779Struckman/* 358300779Struckman * Initiate PIE variable and optionally activate it 359300779Struckman */ 360300779Struckman__inline static void 361300779Struckmaninit_activate_pie(struct pie_status *pst, int resettimer) 362300779Struckman{ 363300779Struckman struct dn_aqm_pie_parms *pprms; 364300779Struckman 365300779Struckman mtx_lock(&pst->lock_mtx); 366300779Struckman pprms = pst->parms; 367300779Struckman pst->drop_prob = 0; 368300779Struckman pst->qdelay_old = 0; 369300779Struckman pst->burst_allowance = pprms->max_burst; 370300779Struckman pst->accu_prob = 0; 371300779Struckman pst->dq_count = 0; 372300779Struckman pst->avg_dq_time = 0; 373300779Struckman pst->sflags = PIE_INMEASUREMENT; 374300779Struckman pst->measurement_start = AQM_UNOW; 375300779Struckman 376300779Struckman if (resettimer) { 377300779Struckman pst->sflags |= PIE_ACTIVE; 378300779Struckman callout_reset_sbt(&pst->aqm_pie_callout, 379300779Struckman (uint64_t)pprms->tupdate * SBT_1US, 380300779Struckman 0, calculate_drop_prob, pst, 0); 381300779Struckman } 382300779Struckman //DX(2, "PIE Activated"); 383300779Struckman mtx_unlock(&pst->lock_mtx); 384300779Struckman} 385300779Struckman 386300779Struckman/* 387300779Struckman * Deactivate PIE and stop probe update callout 388300779Struckman */ 389300779Struckman__inline static void 390300779Struckmandeactivate_pie(struct pie_status *pst) 391300779Struckman{ 392300779Struckman mtx_lock(&pst->lock_mtx); 393300779Struckman pst->sflags &= ~(PIE_ACTIVE | PIE_INMEASUREMENT); 394300779Struckman callout_stop(&pst->aqm_pie_callout); 395300779Struckman //D("PIE Deactivated"); 396300779Struckman mtx_unlock(&pst->lock_mtx); 397300779Struckman} 398300779Struckman 399300779Struckman/* 400300779Struckman * Dequeue and return a pcaket from queue 'q' or NULL if 'q' is empty. 401300779Struckman * Also, caculate depature time or queue delay using timestamp 402300779Struckman */ 403300779Struckmanstatic struct mbuf * 404300779Struckmanaqm_pie_dequeue(struct dn_queue *q) 405300779Struckman{ 406300779Struckman struct mbuf *m; 407300779Struckman struct dn_flow *ni; /* stats for scheduler instance */ 408300779Struckman struct dn_aqm_pie_parms *pprms; 409300779Struckman struct pie_status *pst; 410300779Struckman aqm_time_t now; 411300779Struckman aqm_time_t pkt_ts, dq_time; 412300779Struckman int32_t w; 413300779Struckman 414300779Struckman pst = q->aqm_status; 415300779Struckman pprms = pst->parms; 416300779Struckman ni = &q->_si->ni; 417300779Struckman 418300779Struckman /*we extarct packet ts only when Departure Rate Estimation dis not used*/ 419300779Struckman m = pie_extract_head(q, &pkt_ts, !(pprms->flags & PIE_DEPRATEEST_ENABLED)); 420300779Struckman 421300779Struckman if (!m || !(pst->sflags & PIE_ACTIVE)) 422300779Struckman return m; 423300779Struckman 424300779Struckman now = AQM_UNOW; 425300779Struckman if (pprms->flags & PIE_DEPRATEEST_ENABLED) { 426300779Struckman /* calculate average depature time */ 427300779Struckman if(pst->sflags & PIE_INMEASUREMENT) { 428300779Struckman pst->dq_count += m->m_pkthdr.len; 429300779Struckman 430300779Struckman if (pst->dq_count >= PIE_DQ_THRESHOLD) { 431300779Struckman dq_time = now - pst->measurement_start; 432300779Struckman 433300779Struckman /* 434300779Struckman * if we don't have old avg dq_time i.e PIE is (re)initialized, 435300779Struckman * don't use weight to calculate new avg_dq_time 436300779Struckman */ 437300779Struckman if(pst->avg_dq_time == 0) 438300779Struckman pst->avg_dq_time = dq_time; 439300779Struckman else { 440300779Struckman /* 441300779Struckman * weight = PIE_DQ_THRESHOLD/2^6, but we scaled 442300779Struckman * weight by 2^8. Thus, scaled 443300779Struckman * weight = PIE_DQ_THRESHOLD /2^8 444300779Struckman * */ 445300779Struckman w = PIE_DQ_THRESHOLD >> 8; 446300779Struckman pst->avg_dq_time = (dq_time* w 447300779Struckman + (pst->avg_dq_time * ((1L << 8) - w))) >> 8; 448300779Struckman pst->sflags &= ~PIE_INMEASUREMENT; 449300779Struckman } 450300779Struckman } 451300779Struckman } 452300779Struckman 453300779Struckman /* 454300779Struckman * Start new measurment cycle when the queue has 455300779Struckman * PIE_DQ_THRESHOLD worth of bytes. 456300779Struckman */ 457300779Struckman if(!(pst->sflags & PIE_INMEASUREMENT) && 458300779Struckman q->ni.len_bytes >= PIE_DQ_THRESHOLD) { 459300779Struckman pst->sflags |= PIE_INMEASUREMENT; 460300779Struckman pst->measurement_start = now; 461300779Struckman pst->dq_count = 0; 462300779Struckman } 463300779Struckman } 464300779Struckman /* Optionally, use packet timestamp to estimate queue delay */ 465300779Struckman else 466300779Struckman pst->current_qdelay = now - pkt_ts; 467300779Struckman 468300779Struckman return m; 469300779Struckman} 470300779Struckman 471300779Struckman/* 472300779Struckman * Enqueue a packet in q, subject to space and PIE queue management policy 473300779Struckman * (whose parameters are in q->fs). 474300779Struckman * Update stats for the queue and the scheduler. 475300779Struckman * Return 0 on success, 1 on drop. The packet is consumed anyways. 476300779Struckman */ 477300779Struckmanstatic int 478300779Struckmanaqm_pie_enqueue(struct dn_queue *q, struct mbuf* m) 479300779Struckman{ 480300779Struckman struct dn_fs *f; 481300779Struckman uint64_t len; 482300779Struckman uint32_t qlen; 483300779Struckman struct pie_status *pst; 484300779Struckman struct dn_aqm_pie_parms *pprms; 485300779Struckman int t; 486300779Struckman 487300779Struckman len = m->m_pkthdr.len; 488300779Struckman pst = q->aqm_status; 489300779Struckman if(!pst) { 490300779Struckman DX(2, "PIE queue is not initialized\n"); 491300779Struckman update_stats(q, 0, 1); 492300779Struckman FREE_PKT(m); 493300779Struckman return 1; 494300779Struckman } 495300779Struckman 496300779Struckman f = &(q->fs->fs); 497300779Struckman pprms = pst->parms; 498300779Struckman t = ENQUE; 499300779Struckman 500300779Struckman /* get current queue length in bytes or packets*/ 501300779Struckman qlen = (f->flags & DN_QSIZE_BYTES) ? 502300779Struckman q->ni.len_bytes : q->ni.length; 503300779Struckman 504300779Struckman /* check for queue size and drop the tail if exceed queue limit*/ 505300779Struckman if (qlen >= f->qsize) 506300779Struckman t = DROP; 507300779Struckman /* drop/mark the packet when PIE is active and burst time elapsed */ 508300779Struckman else if ((pst->sflags & PIE_ACTIVE) && pst->burst_allowance==0 509300779Struckman && drop_early(pst, q->ni.len_bytes) == DROP) { 510300779Struckman /* 511300779Struckman * if drop_prob over ECN threshold, drop the packet 512300779Struckman * otherwise mark and enqueue it. 513300779Struckman */ 514300779Struckman if ((pprms->flags & PIE_ECN_ENABLED) && pst->drop_prob < 515300779Struckman (pprms->max_ecnth << (PIE_PROB_BITS - PIE_FIX_POINT_BITS)) 516300779Struckman && ecn_mark(m)) 517300779Struckman t = ENQUE; 518300779Struckman else 519300779Struckman t = DROP; 520300779Struckman } 521300779Struckman 522300779Struckman /* Turn PIE on when 1/3 of the queue is full */ 523300779Struckman if (!(pst->sflags & PIE_ACTIVE) && qlen >= pst->one_third_q_size) { 524300779Struckman init_activate_pie(pst, 1); 525300779Struckman } 526300779Struckman 527300779Struckman /* Reset burst tolerance and optinally turn PIE off*/ 528300779Struckman if ((pst->sflags & PIE_ACTIVE) && pst->drop_prob == 0 && 529300779Struckman pst->current_qdelay < (pprms->qdelay_ref >> 1) && 530300779Struckman pst->qdelay_old < (pprms->qdelay_ref >> 1)) { 531300779Struckman 532300779Struckman pst->burst_allowance = pprms->max_burst; 533300779Struckman if ((pprms->flags & PIE_ON_OFF_MODE_ENABLED) && qlen<=0) 534300779Struckman deactivate_pie(pst); 535300779Struckman } 536300779Struckman 537300779Struckman /* Timestamp the packet if Departure Rate Estimation is disabled */ 538300779Struckman if (t != DROP && !(pprms->flags & PIE_DEPRATEEST_ENABLED)) { 539300779Struckman /* Add TS to mbuf as a TAG */ 540300779Struckman struct m_tag *mtag; 541300779Struckman mtag = m_tag_locate(m, MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, NULL); 542300779Struckman if (mtag == NULL) 543300779Struckman mtag = m_tag_alloc(MTAG_ABI_COMPAT, DN_AQM_MTAG_TS, 544300779Struckman sizeof(aqm_time_t), M_NOWAIT); 545300779Struckman if (mtag == NULL) { 546300779Struckman m_freem(m); 547300779Struckman t = DROP; 548300779Struckman } 549300779Struckman *(aqm_time_t *)(mtag + 1) = AQM_UNOW; 550300779Struckman m_tag_prepend(m, mtag); 551300779Struckman } 552300779Struckman 553300779Struckman if (t != DROP) { 554300779Struckman mq_append(&q->mq, m); 555300779Struckman update_stats(q, len, 0); 556300779Struckman return (0); 557300779Struckman } else { 558300779Struckman update_stats(q, 0, 1); 559300779Struckman 560300779Struckman /* reset accu_prob after packet drop */ 561300779Struckman pst->accu_prob = 0; 562300779Struckman FREE_PKT(m); 563300779Struckman return 1; 564300779Struckman } 565300779Struckman return 0; 566300779Struckman} 567300779Struckman 568300779Struckman/* 569300779Struckman * initialize PIE for queue 'q' 570300779Struckman * First allocate memory for PIE status. 571300779Struckman */ 572300779Struckmanstatic int 573300779Struckmanaqm_pie_init(struct dn_queue *q) 574300779Struckman{ 575300779Struckman struct pie_status *pst; 576300779Struckman struct dn_aqm_pie_parms *pprms; 577300779Struckman int err = 0; 578300779Struckman 579300779Struckman pprms = q->fs->aqmcfg; 580300779Struckman 581300779Struckman do { /* exit with break when error occurs*/ 582300779Struckman if (!pprms){ 583302338Struckman DX(2, "AQM_PIE is not configured"); 584300779Struckman err = EINVAL; 585300779Struckman break; 586300779Struckman } 587300779Struckman 588300779Struckman q->aqm_status = malloc(sizeof(struct pie_status), 589300779Struckman M_DUMMYNET, M_NOWAIT | M_ZERO); 590300779Struckman if (q->aqm_status == NULL) { 591300779Struckman D("cannot allocate PIE private data"); 592300779Struckman err = ENOMEM ; 593300779Struckman break; 594300779Struckman } 595300779Struckman 596300779Struckman pst = q->aqm_status; 597300779Struckman /* increase reference count for PIE module */ 598300779Struckman pie_desc.ref_count++; 599300779Struckman 600300779Struckman pst->pq = q; 601300779Struckman pst->parms = pprms; 602300779Struckman 603300779Struckman /* For speed optimization, we caculate 1/3 queue size once here */ 604300779Struckman // we can use x/3 = (x >>2) + (x >>4) + (x >>7) 605300779Struckman pst->one_third_q_size = q->fs->fs.qsize/3; 606300779Struckman 607300779Struckman mtx_init(&pst->lock_mtx, "mtx_pie", NULL, MTX_DEF); 608300779Struckman callout_init_mtx(&pst->aqm_pie_callout, &pst->lock_mtx, 609300779Struckman CALLOUT_RETURNUNLOCKED); 610300779Struckman 611300779Struckman pst->current_qdelay = 0; 612300779Struckman init_activate_pie(pst, !(pprms->flags & PIE_ON_OFF_MODE_ENABLED)); 613300779Struckman 614300779Struckman //DX(2, "aqm_PIE_init"); 615300779Struckman 616300779Struckman } while(0); 617300779Struckman 618300779Struckman return err; 619300779Struckman} 620300779Struckman 621300779Struckman/* 622302338Struckman * Callout function to destroy pie mtx and free PIE status memory 623302338Struckman */ 624302338Struckmanstatic void 625302338Struckmanpie_callout_cleanup(void *x) 626302338Struckman{ 627302338Struckman struct pie_status *pst = (struct pie_status *) x; 628302338Struckman 629302338Struckman mtx_unlock(&pst->lock_mtx); 630302338Struckman mtx_destroy(&pst->lock_mtx); 631302338Struckman free(x, M_DUMMYNET); 632302338Struckman DN_BH_WLOCK(); 633302338Struckman pie_desc.ref_count--; 634302338Struckman DN_BH_WUNLOCK(); 635302338Struckman} 636302338Struckman 637302338Struckman/* 638300779Struckman * Clean up PIE status for queue 'q' 639300779Struckman * Destroy memory allocated for PIE status. 640300779Struckman */ 641300779Struckmanstatic int 642300779Struckmanaqm_pie_cleanup(struct dn_queue *q) 643300779Struckman{ 644300779Struckman 645300779Struckman if(!q) { 646300779Struckman D("q is null"); 647300779Struckman return 0; 648300779Struckman } 649300779Struckman struct pie_status *pst = q->aqm_status; 650300779Struckman if(!pst) { 651300779Struckman //D("queue is already cleaned up"); 652300779Struckman return 0; 653300779Struckman } 654300779Struckman if(!q->fs || !q->fs->aqmcfg) { 655300779Struckman D("fs is null or no cfg"); 656300779Struckman return 1; 657300779Struckman } 658300779Struckman if (q->fs->aqmfp && q->fs->aqmfp->type !=DN_AQM_PIE) { 659300779Struckman D("Not PIE fs (%d)", q->fs->fs.fs_nr); 660300779Struckman return 1; 661300779Struckman } 662300779Struckman 663302338Struckman /* 664302338Struckman * Free PIE status allocated memory using pie_callout_cleanup() callout 665302338Struckman * function to avoid any potential race. 666302338Struckman * We reset aqm_pie_callout to call pie_callout_cleanup() in next 1um. This 667302338Struckman * stops the scheduled calculate_drop_prob() callout and call pie_callout_cleanup() 668302338Struckman * which does memory freeing. 669302338Struckman */ 670300779Struckman mtx_lock(&pst->lock_mtx); 671302338Struckman callout_reset_sbt(&pst->aqm_pie_callout, 672302338Struckman SBT_1US, 0, pie_callout_cleanup, pst, 0); 673302338Struckman q->aqm_status = NULL; 674302338Struckman mtx_unlock(&pst->lock_mtx); 675300779Struckman 676300779Struckman return 0; 677300779Struckman} 678300779Struckman 679300779Struckman/* 680300779Struckman * Config PIE parameters 681300779Struckman * also allocate memory for PIE configurations 682300779Struckman */ 683300779Struckmanstatic int 684300779Struckmanaqm_pie_config(struct dn_fsk* fs, struct dn_extra_parms *ep, int len) 685300779Struckman{ 686300779Struckman struct dn_aqm_pie_parms *pcfg; 687300779Struckman 688300779Struckman int l = sizeof(struct dn_extra_parms); 689300779Struckman if (len < l) { 690300779Struckman D("invalid sched parms length got %d need %d", len, l); 691300779Struckman return EINVAL; 692300779Struckman } 693300779Struckman /* we free the old cfg because maybe the orignal allocation 694300779Struckman * was used for diffirent AQM type. 695300779Struckman */ 696300779Struckman if (fs->aqmcfg) { 697300779Struckman free(fs->aqmcfg, M_DUMMYNET); 698300779Struckman fs->aqmcfg = NULL; 699300779Struckman } 700300779Struckman 701300779Struckman fs->aqmcfg = malloc(sizeof(struct dn_aqm_pie_parms), 702300779Struckman M_DUMMYNET, M_NOWAIT | M_ZERO); 703300779Struckman if (fs->aqmcfg== NULL) { 704300779Struckman D("cannot allocate PIE configuration parameters"); 705300779Struckman return ENOMEM; 706300779Struckman } 707300779Struckman 708300779Struckman /* par array contains pie configuration as follow 709300779Struckman * 0- qdelay_ref,1- tupdate, 2- max_burst 710300779Struckman * 3- max_ecnth, 4- alpha, 5- beta, 6- flags 711300779Struckman */ 712300779Struckman 713300779Struckman /* configure PIE parameters */ 714300779Struckman pcfg = fs->aqmcfg; 715300779Struckman 716300779Struckman if (ep->par[0] < 0) 717300779Struckman pcfg->qdelay_ref = pie_sysctl.qdelay_ref * AQM_TIME_1US; 718300779Struckman else 719300779Struckman pcfg->qdelay_ref = ep->par[0]; 720300779Struckman if (ep->par[1] < 0) 721300779Struckman pcfg->tupdate = pie_sysctl.tupdate * AQM_TIME_1US; 722300779Struckman else 723300779Struckman pcfg->tupdate = ep->par[1]; 724300779Struckman if (ep->par[2] < 0) 725300779Struckman pcfg->max_burst = pie_sysctl.max_burst * AQM_TIME_1US; 726300779Struckman else 727300779Struckman pcfg->max_burst = ep->par[2]; 728300779Struckman if (ep->par[3] < 0) 729300779Struckman pcfg->max_ecnth = pie_sysctl.max_ecnth; 730300779Struckman else 731300779Struckman pcfg->max_ecnth = ep->par[3]; 732300779Struckman if (ep->par[4] < 0) 733300779Struckman pcfg->alpha = pie_sysctl.alpha; 734300779Struckman else 735300779Struckman pcfg->alpha = ep->par[4]; 736300779Struckman if (ep->par[5] < 0) 737300779Struckman pcfg->beta = pie_sysctl.beta; 738300779Struckman else 739300779Struckman pcfg->beta = ep->par[5]; 740300779Struckman if (ep->par[6] < 0) 741300779Struckman pcfg->flags = pie_sysctl.flags; 742300779Struckman else 743300779Struckman pcfg->flags = ep->par[6]; 744300779Struckman 745300779Struckman /* bound PIE configurations */ 746300779Struckman pcfg->qdelay_ref = BOUND_VAR(pcfg->qdelay_ref, 1, 10 * AQM_TIME_1S); 747300779Struckman pcfg->tupdate = BOUND_VAR(pcfg->tupdate, 1, 10 * AQM_TIME_1S); 748300779Struckman pcfg->max_burst = BOUND_VAR(pcfg->max_burst, 0, 10 * AQM_TIME_1S); 749300779Struckman pcfg->max_ecnth = BOUND_VAR(pcfg->max_ecnth, 0, PIE_SCALE); 750300779Struckman pcfg->alpha = BOUND_VAR(pcfg->alpha, 0, 7 * PIE_SCALE); 751300779Struckman pcfg->beta = BOUND_VAR(pcfg->beta, 0 , 7 * PIE_SCALE); 752300779Struckman 753300779Struckman pie_desc.cfg_ref_count++; 754300779Struckman //D("pie cfg_ref_count=%d", pie_desc.cfg_ref_count); 755300779Struckman return 0; 756300779Struckman} 757300779Struckman 758300779Struckman/* 759300779Struckman * Deconfigure PIE and free memory allocation 760300779Struckman */ 761300779Struckmanstatic int 762300779Struckmanaqm_pie_deconfig(struct dn_fsk* fs) 763300779Struckman{ 764300779Struckman if (fs && fs->aqmcfg) { 765300779Struckman free(fs->aqmcfg, M_DUMMYNET); 766300779Struckman fs->aqmcfg = NULL; 767300779Struckman pie_desc.cfg_ref_count--; 768300779Struckman } 769300779Struckman return 0; 770300779Struckman} 771300779Struckman 772300779Struckman/* 773300779Struckman * Retrieve PIE configuration parameters. 774300779Struckman */ 775300779Struckmanstatic int 776300779Struckmanaqm_pie_getconfig (struct dn_fsk *fs, struct dn_extra_parms * ep) 777300779Struckman{ 778300779Struckman struct dn_aqm_pie_parms *pcfg; 779300779Struckman if (fs->aqmcfg) { 780317488Struckman strlcpy(ep->name, pie_desc.name, sizeof(ep->name)); 781300779Struckman pcfg = fs->aqmcfg; 782300779Struckman ep->par[0] = pcfg->qdelay_ref / AQM_TIME_1US; 783300779Struckman ep->par[1] = pcfg->tupdate / AQM_TIME_1US; 784300779Struckman ep->par[2] = pcfg->max_burst / AQM_TIME_1US; 785300779Struckman ep->par[3] = pcfg->max_ecnth; 786300779Struckman ep->par[4] = pcfg->alpha; 787300779Struckman ep->par[5] = pcfg->beta; 788300779Struckman ep->par[6] = pcfg->flags; 789300779Struckman 790300779Struckman return 0; 791300779Struckman } 792300779Struckman return 1; 793300779Struckman} 794300779Struckman 795300779Struckmanstatic struct dn_aqm pie_desc = { 796300779Struckman _SI( .type = ) DN_AQM_PIE, 797300779Struckman _SI( .name = ) "PIE", 798300779Struckman _SI( .ref_count = ) 0, 799300779Struckman _SI( .cfg_ref_count = ) 0, 800300779Struckman _SI( .enqueue = ) aqm_pie_enqueue, 801300779Struckman _SI( .dequeue = ) aqm_pie_dequeue, 802300779Struckman _SI( .config = ) aqm_pie_config, 803300779Struckman _SI( .deconfig = ) aqm_pie_deconfig, 804300779Struckman _SI( .getconfig = ) aqm_pie_getconfig, 805300779Struckman _SI( .init = ) aqm_pie_init, 806300779Struckman _SI( .cleanup = ) aqm_pie_cleanup, 807300779Struckman}; 808300779Struckman 809300779StruckmanDECLARE_DNAQM_MODULE(dn_aqm_pie, &pie_desc); 810300779Struckman#endif 811