1252504Slstewart/*- 2252504Slstewart * Copyright (c) 2009-2013 3252504Slstewart * Swinburne University of Technology, Melbourne, Australia 4252504Slstewart * All rights reserved. 5252504Slstewart * 6252504Slstewart * This software was developed at the Centre for Advanced Internet 7252504Slstewart * Architectures, Swinburne University of Technology, by David Hayes, made 8252504Slstewart * possible in part by a gift from The Cisco University Research Program Fund, 9252504Slstewart * a corporate advised fund of Silicon Valley Community Foundation. Development 10252504Slstewart * and testing were further assisted by a grant from the FreeBSD Foundation. 11252504Slstewart * 12252504Slstewart * Redistribution and use in source and binary forms, with or without 13252504Slstewart * modification, are permitted provided that the following conditions 14252504Slstewart * are met: 15252504Slstewart * 1. Redistributions of source code must retain the above copyright 16252504Slstewart * notice, this list of conditions and the following disclaimer. 17252504Slstewart * 2. Redistributions in binary form must reproduce the above copyright 18252504Slstewart * notice, this list of conditions and the following disclaimer in the 19252504Slstewart * documentation and/or other materials provided with the distribution. 20252504Slstewart * 21252504Slstewart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 22252504Slstewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23252504Slstewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24252504Slstewart * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 25252504Slstewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26252504Slstewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27252504Slstewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28252504Slstewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29252504Slstewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30252504Slstewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31252504Slstewart * SUCH DAMAGE. 32252504Slstewart */ 33252504Slstewart 34252504Slstewart/* 35252504Slstewart * CAIA Delay-Gradient (CDG) congestion control algorithm 36252504Slstewart * 37252504Slstewart * An implemention of the delay-gradient congestion control algorithm proposed 38252504Slstewart * in the following paper: 39252504Slstewart * 40252504Slstewart * D. A. Hayes and G. Armitage, "Revisiting TCP Congestion Control using Delay 41252504Slstewart * Gradients", in IFIP Networking, Valencia, Spain, 9-13 May 2011. 42252504Slstewart * 43252504Slstewart * Developed as part of the NewTCP research project at Swinburne University of 44252504Slstewart * Technology's Centre for Advanced Internet Architectures, Melbourne, 45252504Slstewart * Australia. More details are available at: 46252504Slstewart * http://caia.swin.edu.au/urp/newtcp/ 47252504Slstewart */ 48252504Slstewart 49252504Slstewart#include <sys/cdefs.h> 50252504Slstewart__FBSDID("$FreeBSD$"); 51252504Slstewart 52252504Slstewart#include <sys/param.h> 53252504Slstewart#include <sys/hhook.h> 54252504Slstewart#include <sys/kernel.h> 55252504Slstewart#include <sys/khelp.h> 56252504Slstewart#include <sys/limits.h> 57252504Slstewart#include <sys/lock.h> 58252504Slstewart#include <sys/malloc.h> 59252504Slstewart#include <sys/module.h> 60252504Slstewart#include <sys/queue.h> 61252504Slstewart#include <sys/socket.h> 62252504Slstewart#include <sys/socketvar.h> 63252504Slstewart#include <sys/sysctl.h> 64252504Slstewart#include <sys/systm.h> 65252504Slstewart 66252504Slstewart#include <net/if.h> 67252504Slstewart#include <net/vnet.h> 68252504Slstewart 69252504Slstewart#include <netinet/cc.h> 70252504Slstewart#include <netinet/tcp_seq.h> 71252504Slstewart#include <netinet/tcp_timer.h> 72252504Slstewart#include <netinet/tcp_var.h> 73252504Slstewart 74252504Slstewart#include <netinet/cc/cc_module.h> 75252504Slstewart 76252504Slstewart#include <netinet/khelp/h_ertt.h> 77252504Slstewart 78252504Slstewart#include <vm/uma.h> 79252504Slstewart 80252504Slstewart#define CDG_VERSION "0.1" 81252504Slstewart 82252504Slstewart#define CAST_PTR_INT(X) (*((int*)(X))) 83252504Slstewart 84252504Slstewart#ifndef VIMAGE 85252504Slstewart#define vnet_sysctl_handle_uint(oidp, arg1, arg2, req) \ 86252504Slstewart sysctl_handle_int(oidp, arg1, arg2, req) 87252504Slstewart#endif 88252504Slstewart 89252504Slstewart/* Private delay-gradient induced congestion control signal. */ 90252504Slstewart#define CC_CDG_DELAY 0x01000000 91252504Slstewart 92252504Slstewart/* NewReno window deflation factor on loss (as a percentage). */ 93252504Slstewart#define RENO_BETA 50 94252504Slstewart 95252504Slstewart/* Queue states. */ 96252504Slstewart#define CDG_Q_EMPTY 1 97252504Slstewart#define CDG_Q_RISING 2 98252504Slstewart#define CDG_Q_FALLING 3 99252504Slstewart#define CDG_Q_FULL 4 100252504Slstewart#define CDG_Q_UNKNOWN 9999 101252504Slstewart 102252504Slstewart/* Number of bit shifts used in probexp lookup table. */ 103252504Slstewart#define EXP_PREC 15 104252504Slstewart 105252504Slstewart/* Largest gradient represented in probexp lookup table. */ 106252504Slstewart#define MAXGRAD 5 107252504Slstewart 108252504Slstewart/* 109252504Slstewart * Delay Precision Enhance - number of bit shifts used for qtrend related 110252504Slstewart * integer arithmetic precision. 111252504Slstewart */ 112252504Slstewart#define D_P_E 7 113252504Slstewart 114252504Slstewartstruct qdiff_sample { 115252504Slstewart long qdiff; 116252504Slstewart STAILQ_ENTRY(qdiff_sample) qdiff_lnk; 117252504Slstewart}; 118252504Slstewart 119252504Slstewartstruct cdg { 120252504Slstewart long max_qtrend; 121252504Slstewart long min_qtrend; 122252504Slstewart STAILQ_HEAD(minrtts_head, qdiff_sample) qdiffmin_q; 123252504Slstewart STAILQ_HEAD(maxrtts_head, qdiff_sample) qdiffmax_q; 124252504Slstewart long window_incr; 125252504Slstewart /* rttcount for window increase when in congestion avoidance */ 126252504Slstewart long rtt_count; 127252504Slstewart /* maximum measured rtt within an rtt period */ 128252504Slstewart int maxrtt_in_rtt; 129252504Slstewart /* maximum measured rtt within prev rtt period */ 130252504Slstewart int maxrtt_in_prevrtt; 131252504Slstewart /* minimum measured rtt within an rtt period */ 132252504Slstewart int minrtt_in_rtt; 133252504Slstewart /* minimum measured rtt within prev rtt period */ 134252504Slstewart int minrtt_in_prevrtt; 135252504Slstewart /* consecutive congestion episode counter */ 136252504Slstewart uint32_t consec_cong_cnt; 137252504Slstewart /* when tracking a new reno type loss window */ 138252504Slstewart uint32_t shadow_w; 139252504Slstewart /* maximum number of samples in the moving average queue */ 140252504Slstewart int sample_q_size; 141252504Slstewart /* number of samples in the moving average queue */ 142252504Slstewart int num_samples; 143252504Slstewart /* estimate of the queue state of the path */ 144252504Slstewart int queue_state; 145252504Slstewart}; 146252504Slstewart 147252504Slstewart/* 148252504Slstewart * Lookup table for: 149252504Slstewart * (1 - exp(-x)) << EXP_PREC, where x = [0,MAXGRAD] in 2^-7 increments 150252504Slstewart * 151252504Slstewart * Note: probexp[0] is set to 10 (not 0) as a safety for very low increase 152252504Slstewart * gradients. 153252504Slstewart */ 154252504Slstewartstatic const int probexp[641] = { 155252504Slstewart 10,255,508,759,1008,1255,1501,1744,1985,2225,2463,2698,2932,3165,3395,3624, 156252504Slstewart 3850,4075,4299,4520,4740,4958,5175,5389,5602,5814,6024,6232,6438,6643,6846, 157252504Slstewart 7048,7248,7447,7644,7839,8033,8226,8417,8606,8794,8981,9166,9350,9532,9713, 158252504Slstewart 9892,10070,10247,10422,10596,10769,10940,11110,11278,11445,11611,11776,11939, 159252504Slstewart 12101,12262,12422,12580,12737,12893,13048,13201,13354,13505,13655,13803,13951, 160252504Slstewart 14097,14243,14387,14530,14672,14813,14952,15091,15229,15365,15500,15635,15768, 161252504Slstewart 15900,16032,16162,16291,16419,16547,16673,16798,16922,17046,17168,17289,17410, 162252504Slstewart 17529,17648,17766,17882,17998,18113,18227,18340,18453,18564,18675,18784,18893, 163252504Slstewart 19001,19108,19215,19320,19425,19529,19632,19734,19835,19936,20036,20135,20233, 164252504Slstewart 20331,20427,20523,20619,20713,20807,20900,20993,21084,21175,21265,21355,21444, 165252504Slstewart 21532,21619,21706,21792,21878,21962,22046,22130,22213,22295,22376,22457,22537, 166252504Slstewart 22617,22696,22774,22852,22929,23006,23082,23157,23232,23306,23380,23453,23525, 167252504Slstewart 23597,23669,23739,23810,23879,23949,24017,24085,24153,24220,24286,24352,24418, 168252504Slstewart 24483,24547,24611,24675,24738,24800,24862,24924,24985,25045,25106,25165,25224, 169252504Slstewart 25283,25341,25399,25456,25513,25570,25626,25681,25737,25791,25846,25899,25953, 170252504Slstewart 26006,26059,26111,26163,26214,26265,26316,26366,26416,26465,26514,26563,26611, 171252504Slstewart 26659,26707,26754,26801,26847,26893,26939,26984,27029,27074,27118,27162,27206, 172252504Slstewart 27249,27292,27335,27377,27419,27460,27502,27543,27583,27624,27664,27703,27743, 173252504Slstewart 27782,27821,27859,27897,27935,27973,28010,28047,28084,28121,28157,28193,28228, 174252504Slstewart 28263,28299,28333,28368,28402,28436,28470,28503,28536,28569,28602,28634,28667, 175252504Slstewart 28699,28730,28762,28793,28824,28854,28885,28915,28945,28975,29004,29034,29063, 176252504Slstewart 29092,29120,29149,29177,29205,29232,29260,29287,29314,29341,29368,29394,29421, 177252504Slstewart 29447,29472,29498,29524,29549,29574,29599,29623,29648,29672,29696,29720,29744, 178252504Slstewart 29767,29791,29814,29837,29860,29882,29905,29927,29949,29971,29993,30014,30036, 179252504Slstewart 30057,30078,30099,30120,30141,30161,30181,30201,30221,30241,30261,30280,30300, 180252504Slstewart 30319,30338,30357,30376,30394,30413,30431,30449,30467,30485,30503,30521,30538, 181252504Slstewart 30555,30573,30590,30607,30624,30640,30657,30673,30690,30706,30722,30738,30753, 182252504Slstewart 30769,30785,30800,30815,30831,30846,30861,30876,30890,30905,30919,30934,30948, 183252504Slstewart 30962,30976,30990,31004,31018,31031,31045,31058,31072,31085,31098,31111,31124, 184252504Slstewart 31137,31149,31162,31174,31187,31199,31211,31223,31235,31247,31259,31271,31283, 185252504Slstewart 31294,31306,31317,31328,31339,31351,31362,31373,31383,31394,31405,31416,31426, 186252504Slstewart 31436,31447,31457,31467,31477,31487,31497,31507,31517,31527,31537,31546,31556, 187252504Slstewart 31565,31574,31584,31593,31602,31611,31620,31629,31638,31647,31655,31664,31673, 188252504Slstewart 31681,31690,31698,31706,31715,31723,31731,31739,31747,31755,31763,31771,31778, 189252504Slstewart 31786,31794,31801,31809,31816,31824,31831,31838,31846,31853,31860,31867,31874, 190252504Slstewart 31881,31888,31895,31902,31908,31915,31922,31928,31935,31941,31948,31954,31960, 191252504Slstewart 31967,31973,31979,31985,31991,31997,32003,32009,32015,32021,32027,32033,32038, 192252504Slstewart 32044,32050,32055,32061,32066,32072,32077,32083,32088,32093,32098,32104,32109, 193252504Slstewart 32114,32119,32124,32129,32134,32139,32144,32149,32154,32158,32163,32168,32173, 194252504Slstewart 32177,32182,32186,32191,32195,32200,32204,32209,32213,32217,32222,32226,32230, 195252504Slstewart 32234,32238,32242,32247,32251,32255,32259,32263,32267,32270,32274,32278,32282, 196252504Slstewart 32286,32290,32293,32297,32301,32304,32308,32311,32315,32318,32322,32325,32329, 197252504Slstewart 32332,32336,32339,32342,32346,32349,32352,32356,32359,32362,32365,32368,32371, 198252504Slstewart 32374,32377,32381,32384,32387,32389,32392,32395,32398,32401,32404,32407,32410, 199252504Slstewart 32412,32415,32418,32421,32423,32426,32429,32431,32434,32437,32439,32442,32444, 200252504Slstewart 32447,32449,32452,32454,32457,32459,32461,32464,32466,32469,32471,32473,32476, 201252504Slstewart 32478,32480,32482,32485,32487,32489,32491,32493,32495,32497,32500,32502,32504, 202252504Slstewart 32506,32508,32510,32512,32514,32516,32518,32520,32522,32524,32526,32527,32529, 203252504Slstewart 32531,32533,32535,32537,32538,32540,32542,32544,32545,32547}; 204252504Slstewart 205252504Slstewartstatic uma_zone_t qdiffsample_zone; 206252504Slstewart 207252504Slstewartstatic MALLOC_DEFINE(M_CDG, "cdg data", 208252504Slstewart "Per connection data required for the CDG congestion control algorithm"); 209252504Slstewart 210252504Slstewartstatic int ertt_id; 211252504Slstewart 212252504Slstewartstatic VNET_DEFINE(uint32_t, cdg_alpha_inc); 213252504Slstewartstatic VNET_DEFINE(uint32_t, cdg_beta_delay); 214252504Slstewartstatic VNET_DEFINE(uint32_t, cdg_beta_loss); 215252504Slstewartstatic VNET_DEFINE(uint32_t, cdg_smoothing_factor); 216252504Slstewartstatic VNET_DEFINE(uint32_t, cdg_exp_backoff_scale); 217252504Slstewartstatic VNET_DEFINE(uint32_t, cdg_consec_cong); 218252504Slstewartstatic VNET_DEFINE(uint32_t, cdg_hold_backoff); 219252504Slstewart#define V_cdg_alpha_inc VNET(cdg_alpha_inc) 220252504Slstewart#define V_cdg_beta_delay VNET(cdg_beta_delay) 221252504Slstewart#define V_cdg_beta_loss VNET(cdg_beta_loss) 222252504Slstewart#define V_cdg_smoothing_factor VNET(cdg_smoothing_factor) 223252504Slstewart#define V_cdg_exp_backoff_scale VNET(cdg_exp_backoff_scale) 224252504Slstewart#define V_cdg_consec_cong VNET(cdg_consec_cong) 225252504Slstewart#define V_cdg_hold_backoff VNET(cdg_hold_backoff) 226252504Slstewart 227252504Slstewart/* Function prototypes. */ 228252504Slstewartstatic int cdg_mod_init(void); 229252504Slstewartstatic void cdg_conn_init(struct cc_var *ccv); 230252504Slstewartstatic int cdg_cb_init(struct cc_var *ccv); 231252504Slstewartstatic void cdg_cb_destroy(struct cc_var *ccv); 232252504Slstewartstatic void cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type); 233252504Slstewartstatic void cdg_ack_received(struct cc_var *ccv, uint16_t ack_type); 234252504Slstewart 235252504Slstewartstruct cc_algo cdg_cc_algo = { 236252504Slstewart .name = "cdg", 237252504Slstewart .mod_init = cdg_mod_init, 238252504Slstewart .ack_received = cdg_ack_received, 239252504Slstewart .cb_destroy = cdg_cb_destroy, 240252504Slstewart .cb_init = cdg_cb_init, 241252504Slstewart .conn_init = cdg_conn_init, 242252504Slstewart .cong_signal = cdg_cong_signal 243252504Slstewart}; 244252504Slstewart 245252504Slstewart/* Vnet created and being initialised. */ 246252504Slstewartstatic void 247252504Slstewartcdg_init_vnet(const void *unused __unused) 248252504Slstewart{ 249252504Slstewart 250252504Slstewart V_cdg_alpha_inc = 0; 251252504Slstewart V_cdg_beta_delay = 70; 252252504Slstewart V_cdg_beta_loss = 50; 253252504Slstewart V_cdg_smoothing_factor = 8; 254252504Slstewart V_cdg_exp_backoff_scale = 3; 255252504Slstewart V_cdg_consec_cong = 5; 256252504Slstewart V_cdg_hold_backoff = 5; 257252504Slstewart} 258252504Slstewart 259252504Slstewartstatic int 260252504Slstewartcdg_mod_init(void) 261252504Slstewart{ 262252504Slstewart VNET_ITERATOR_DECL(v); 263252504Slstewart 264252504Slstewart ertt_id = khelp_get_id("ertt"); 265252504Slstewart if (ertt_id <= 0) 266252504Slstewart return (EINVAL); 267252504Slstewart 268252504Slstewart qdiffsample_zone = uma_zcreate("cdg_qdiffsample", 269252504Slstewart sizeof(struct qdiff_sample), NULL, NULL, NULL, NULL, 0, 0); 270252504Slstewart 271252504Slstewart VNET_LIST_RLOCK(); 272252504Slstewart VNET_FOREACH(v) { 273252504Slstewart CURVNET_SET(v); 274252504Slstewart cdg_init_vnet(NULL); 275252504Slstewart CURVNET_RESTORE(); 276252504Slstewart } 277252504Slstewart VNET_LIST_RUNLOCK(); 278252504Slstewart 279252504Slstewart cdg_cc_algo.post_recovery = newreno_cc_algo.post_recovery; 280252504Slstewart cdg_cc_algo.after_idle = newreno_cc_algo.after_idle; 281252504Slstewart 282252504Slstewart return (0); 283252504Slstewart} 284252504Slstewart 285252504Slstewartstatic int 286252504Slstewartcdg_cb_init(struct cc_var *ccv) 287252504Slstewart{ 288252504Slstewart struct cdg *cdg_data; 289252504Slstewart 290252504Slstewart cdg_data = malloc(sizeof(struct cdg), M_CDG, M_NOWAIT); 291252504Slstewart if (cdg_data == NULL) 292252504Slstewart return (ENOMEM); 293252504Slstewart 294252504Slstewart cdg_data->shadow_w = 0; 295252504Slstewart cdg_data->max_qtrend = 0; 296252504Slstewart cdg_data->min_qtrend = 0; 297252504Slstewart cdg_data->queue_state = CDG_Q_UNKNOWN; 298252504Slstewart cdg_data->maxrtt_in_rtt = 0; 299252504Slstewart cdg_data->maxrtt_in_prevrtt = 0; 300252504Slstewart cdg_data->minrtt_in_rtt = INT_MAX; 301252504Slstewart cdg_data->minrtt_in_prevrtt = 0; 302252504Slstewart cdg_data->window_incr = 0; 303252504Slstewart cdg_data->rtt_count = 0; 304252504Slstewart cdg_data->consec_cong_cnt = 0; 305252504Slstewart cdg_data->sample_q_size = V_cdg_smoothing_factor; 306252504Slstewart cdg_data->num_samples = 0; 307252504Slstewart STAILQ_INIT(&cdg_data->qdiffmin_q); 308252504Slstewart STAILQ_INIT(&cdg_data->qdiffmax_q); 309252504Slstewart 310252504Slstewart ccv->cc_data = cdg_data; 311252504Slstewart 312252504Slstewart return (0); 313252504Slstewart} 314252504Slstewart 315252504Slstewartstatic void 316252504Slstewartcdg_conn_init(struct cc_var *ccv) 317252504Slstewart{ 318252504Slstewart struct cdg *cdg_data = ccv->cc_data; 319252504Slstewart 320252504Slstewart /* 321252504Slstewart * Initialise the shadow_cwnd in case we are competing with loss based 322252504Slstewart * flows from the start 323252504Slstewart */ 324252504Slstewart cdg_data->shadow_w = CCV(ccv, snd_cwnd); 325252504Slstewart} 326252504Slstewart 327252504Slstewartstatic void 328252504Slstewartcdg_cb_destroy(struct cc_var *ccv) 329252504Slstewart{ 330252504Slstewart struct cdg *cdg_data; 331252504Slstewart struct qdiff_sample *qds, *qds_n; 332252504Slstewart 333252504Slstewart cdg_data = ccv->cc_data; 334252504Slstewart 335252504Slstewart qds = STAILQ_FIRST(&cdg_data->qdiffmin_q); 336252504Slstewart while (qds != NULL) { 337252504Slstewart qds_n = STAILQ_NEXT(qds, qdiff_lnk); 338252504Slstewart uma_zfree(qdiffsample_zone,qds); 339252504Slstewart qds = qds_n; 340252504Slstewart } 341252504Slstewart 342252504Slstewart qds = STAILQ_FIRST(&cdg_data->qdiffmax_q); 343252504Slstewart while (qds != NULL) { 344252504Slstewart qds_n = STAILQ_NEXT(qds, qdiff_lnk); 345252504Slstewart uma_zfree(qdiffsample_zone,qds); 346252504Slstewart qds = qds_n; 347252504Slstewart } 348252504Slstewart 349252504Slstewart free(ccv->cc_data, M_CDG); 350252504Slstewart} 351252504Slstewart 352252504Slstewartstatic int 353252504Slstewartcdg_beta_handler(SYSCTL_HANDLER_ARGS) 354252504Slstewart{ 355252504Slstewart 356252504Slstewart if (req->newptr != NULL && 357252504Slstewart (CAST_PTR_INT(req->newptr) == 0 || CAST_PTR_INT(req->newptr) > 100)) 358252504Slstewart return (EINVAL); 359252504Slstewart 360252504Slstewart return (vnet_sysctl_handle_uint(oidp, arg1, arg2, req)); 361252504Slstewart} 362252504Slstewart 363252504Slstewartstatic int 364252504Slstewartcdg_exp_backoff_scale_handler(SYSCTL_HANDLER_ARGS) 365252504Slstewart{ 366252504Slstewart 367252504Slstewart if (req->newptr != NULL && CAST_PTR_INT(req->newptr) < 1) 368252504Slstewart return (EINVAL); 369252504Slstewart 370252504Slstewart return (vnet_sysctl_handle_uint(oidp, arg1, arg2, req)); 371252504Slstewart} 372252504Slstewart 373252504Slstewartstatic inline unsigned long 374252504Slstewartcdg_window_decrease(struct cc_var *ccv, unsigned long owin, unsigned int beta) 375252504Slstewart{ 376252504Slstewart 377252504Slstewart return ((ulmin(CCV(ccv, snd_wnd), owin) * beta) / 100); 378252504Slstewart} 379252504Slstewart 380252504Slstewart/* 381252504Slstewart * Window increase function 382252504Slstewart * This window increase function is independent of the initial window size 383252504Slstewart * to ensure small window flows are not discriminated against (i.e. fairness). 384252504Slstewart * It increases at 1pkt/rtt like Reno for alpha_inc rtts, and then 2pkts/rtt for 385252504Slstewart * the next alpha_inc rtts, etc. 386252504Slstewart */ 387252504Slstewartstatic void 388252504Slstewartcdg_window_increase(struct cc_var *ccv, int new_measurement) 389252504Slstewart{ 390252504Slstewart struct cdg *cdg_data; 391252504Slstewart int incr, s_w_incr; 392252504Slstewart 393252504Slstewart cdg_data = ccv->cc_data; 394252504Slstewart incr = s_w_incr = 0; 395252504Slstewart 396252504Slstewart if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)) { 397252504Slstewart /* Slow start. */ 398252504Slstewart incr = CCV(ccv, t_maxseg); 399252504Slstewart s_w_incr = incr; 400252504Slstewart cdg_data->window_incr = cdg_data->rtt_count = 0; 401252504Slstewart } else { 402252504Slstewart /* Congestion avoidance. */ 403252504Slstewart if (new_measurement) { 404252504Slstewart s_w_incr = CCV(ccv, t_maxseg); 405252504Slstewart if (V_cdg_alpha_inc == 0) { 406252504Slstewart incr = CCV(ccv, t_maxseg); 407252504Slstewart } else { 408252504Slstewart if (++cdg_data->rtt_count >= V_cdg_alpha_inc) { 409252504Slstewart cdg_data->window_incr++; 410252504Slstewart cdg_data->rtt_count = 0; 411252504Slstewart } 412252504Slstewart incr = CCV(ccv, t_maxseg) * 413252504Slstewart cdg_data->window_incr; 414252504Slstewart } 415252504Slstewart } 416252504Slstewart } 417252504Slstewart 418252504Slstewart if (cdg_data->shadow_w > 0) 419252504Slstewart cdg_data->shadow_w = ulmin(cdg_data->shadow_w + s_w_incr, 420252504Slstewart TCP_MAXWIN << CCV(ccv, snd_scale)); 421252504Slstewart 422252504Slstewart CCV(ccv, snd_cwnd) = ulmin(CCV(ccv, snd_cwnd) + incr, 423252504Slstewart TCP_MAXWIN << CCV(ccv, snd_scale)); 424252504Slstewart} 425252504Slstewart 426252504Slstewartstatic void 427252504Slstewartcdg_cong_signal(struct cc_var *ccv, uint32_t signal_type) 428252504Slstewart{ 429252504Slstewart struct cdg *cdg_data = ccv->cc_data; 430252504Slstewart 431252504Slstewart switch(signal_type) { 432252504Slstewart case CC_CDG_DELAY: 433252504Slstewart CCV(ccv, snd_ssthresh) = cdg_window_decrease(ccv, 434252504Slstewart CCV(ccv, snd_cwnd), V_cdg_beta_delay); 435252504Slstewart CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); 436252504Slstewart CCV(ccv, snd_recover) = CCV(ccv, snd_max); 437252504Slstewart cdg_data->window_incr = cdg_data->rtt_count = 0; 438252504Slstewart ENTER_CONGRECOVERY(CCV(ccv, t_flags)); 439252504Slstewart break; 440252504Slstewart case CC_NDUPACK: 441252504Slstewart /* 442252504Slstewart * If already responding to congestion OR we have guessed no 443252504Slstewart * queue in the path is full. 444252504Slstewart */ 445252504Slstewart if (IN_CONGRECOVERY(CCV(ccv, t_flags)) || 446252504Slstewart cdg_data->queue_state < CDG_Q_FULL) { 447252504Slstewart CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd); 448252504Slstewart CCV(ccv, snd_recover) = CCV(ccv, snd_max); 449252504Slstewart } else { 450252504Slstewart /* 451252504Slstewart * Loss is likely to be congestion related. We have 452252504Slstewart * inferred a queue full state, so have shadow window 453252504Slstewart * react to loss as NewReno would. 454252504Slstewart */ 455252504Slstewart if (cdg_data->shadow_w > 0) 456252504Slstewart cdg_data->shadow_w = cdg_window_decrease(ccv, 457252504Slstewart cdg_data->shadow_w, RENO_BETA); 458252504Slstewart 459252504Slstewart CCV(ccv, snd_ssthresh) = ulmax(cdg_data->shadow_w, 460252504Slstewart cdg_window_decrease(ccv, CCV(ccv, snd_cwnd), 461252504Slstewart V_cdg_beta_loss)); 462252504Slstewart 463252504Slstewart cdg_data->window_incr = cdg_data->rtt_count = 0; 464252504Slstewart } 465252504Slstewart ENTER_RECOVERY(CCV(ccv, t_flags)); 466252504Slstewart break; 467252504Slstewart default: 468252504Slstewart newreno_cc_algo.cong_signal(ccv, signal_type); 469252504Slstewart break; 470252504Slstewart } 471252504Slstewart} 472252504Slstewart 473252504Slstewart/* 474252504Slstewart * Using a negative exponential probabilistic backoff so that sources with 475252504Slstewart * varying RTTs which share the same link will, on average, have the same 476252504Slstewart * probability of backoff over time. 477252504Slstewart * 478252504Slstewart * Prob_backoff = 1 - exp(-qtrend / V_cdg_exp_backoff_scale), where 479252504Slstewart * V_cdg_exp_backoff_scale is the average qtrend for the exponential backoff. 480252504Slstewart */ 481252504Slstewartstatic inline int 482252504Slstewartprob_backoff(long qtrend) 483252504Slstewart{ 484252504Slstewart int backoff, idx, p; 485252504Slstewart 486252504Slstewart backoff = (qtrend > ((MAXGRAD * V_cdg_exp_backoff_scale) << D_P_E)); 487252504Slstewart 488252504Slstewart if (!backoff) { 489252504Slstewart if (V_cdg_exp_backoff_scale > 1) 490252504Slstewart idx = (qtrend + V_cdg_exp_backoff_scale / 2) / 491252504Slstewart V_cdg_exp_backoff_scale; 492252504Slstewart else 493252504Slstewart idx = qtrend; 494252504Slstewart 495252504Slstewart /* Backoff probability proportional to rate of queue growth. */ 496252504Slstewart p = (INT_MAX / (1 << EXP_PREC)) * probexp[idx]; 497252504Slstewart backoff = (random() < p); 498252504Slstewart } 499252504Slstewart 500252504Slstewart return (backoff); 501252504Slstewart} 502252504Slstewart 503252504Slstewartstatic inline void 504252504Slstewartcalc_moving_average(struct cdg *cdg_data, long qdiff_max, long qdiff_min) 505252504Slstewart{ 506252504Slstewart struct qdiff_sample *qds; 507252504Slstewart 508252504Slstewart ++cdg_data->num_samples; 509252504Slstewart if (cdg_data->num_samples > cdg_data->sample_q_size) { 510252504Slstewart /* Minimum RTT. */ 511252504Slstewart qds = STAILQ_FIRST(&cdg_data->qdiffmin_q); 512252504Slstewart cdg_data->min_qtrend = cdg_data->min_qtrend + 513252504Slstewart (qdiff_min - qds->qdiff) / cdg_data->sample_q_size; 514252504Slstewart STAILQ_REMOVE_HEAD(&cdg_data->qdiffmin_q, qdiff_lnk); 515252504Slstewart qds->qdiff = qdiff_min; 516252504Slstewart STAILQ_INSERT_TAIL(&cdg_data->qdiffmin_q, qds, qdiff_lnk); 517252504Slstewart 518252504Slstewart /* Maximum RTT. */ 519252504Slstewart qds = STAILQ_FIRST(&cdg_data->qdiffmax_q); 520252504Slstewart cdg_data->max_qtrend = cdg_data->max_qtrend + 521252504Slstewart (qdiff_max - qds->qdiff) / cdg_data->sample_q_size; 522252504Slstewart STAILQ_REMOVE_HEAD(&cdg_data->qdiffmax_q, qdiff_lnk); 523252504Slstewart qds->qdiff = qdiff_max; 524252504Slstewart STAILQ_INSERT_TAIL(&cdg_data->qdiffmax_q, qds, qdiff_lnk); 525252504Slstewart --cdg_data->num_samples; 526252504Slstewart } else { 527252504Slstewart qds = uma_zalloc(qdiffsample_zone, M_NOWAIT); 528252504Slstewart if (qds != NULL) { 529252504Slstewart cdg_data->min_qtrend = cdg_data->min_qtrend + 530252504Slstewart qdiff_min / cdg_data->sample_q_size; 531252504Slstewart qds->qdiff = qdiff_min; 532252504Slstewart STAILQ_INSERT_TAIL(&cdg_data->qdiffmin_q, qds, 533252504Slstewart qdiff_lnk); 534252504Slstewart } 535252504Slstewart 536252504Slstewart qds = uma_zalloc(qdiffsample_zone, M_NOWAIT); 537252504Slstewart if (qds) { 538252504Slstewart cdg_data->max_qtrend = cdg_data->max_qtrend + 539252504Slstewart qdiff_max / cdg_data->sample_q_size; 540252504Slstewart qds->qdiff = qdiff_max; 541252504Slstewart STAILQ_INSERT_TAIL(&cdg_data->qdiffmax_q, qds, 542252504Slstewart qdiff_lnk); 543252504Slstewart } 544252504Slstewart } 545252504Slstewart} 546252504Slstewart 547252504Slstewartstatic void 548252504Slstewartcdg_ack_received(struct cc_var *ccv, uint16_t ack_type) 549252504Slstewart{ 550252504Slstewart struct cdg *cdg_data; 551252504Slstewart struct ertt *e_t; 552252504Slstewart long qdiff_max, qdiff_min; 553252504Slstewart int congestion, new_measurement, slowstart; 554252504Slstewart 555252504Slstewart cdg_data = ccv->cc_data; 556252504Slstewart e_t = (struct ertt *)khelp_get_osd(CCV(ccv, osd), ertt_id); 557252504Slstewart new_measurement = e_t->flags & ERTT_NEW_MEASUREMENT; 558252504Slstewart congestion = 0; 559252504Slstewart cdg_data->maxrtt_in_rtt = imax(e_t->rtt, cdg_data->maxrtt_in_rtt); 560252504Slstewart cdg_data->minrtt_in_rtt = imin(e_t->rtt, cdg_data->minrtt_in_rtt); 561252504Slstewart 562252504Slstewart if (new_measurement) { 563252504Slstewart slowstart = (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh)); 564252504Slstewart /* 565252504Slstewart * Update smoothed gradient measurements. Since we are only 566252504Slstewart * using one measurement per RTT, use max or min rtt_in_rtt. 567252504Slstewart * This is also less noisy than a sample RTT measurement. Max 568252504Slstewart * RTT measurements can have trouble due to OS issues. 569252504Slstewart */ 570252504Slstewart if (cdg_data->maxrtt_in_prevrtt) { 571252504Slstewart qdiff_max = ((long)(cdg_data->maxrtt_in_rtt - 572252504Slstewart cdg_data->maxrtt_in_prevrtt) << D_P_E ); 573252504Slstewart qdiff_min = ((long)(cdg_data->minrtt_in_rtt - 574252504Slstewart cdg_data->minrtt_in_prevrtt) << D_P_E ); 575252504Slstewart 576252504Slstewart calc_moving_average(cdg_data, qdiff_max, qdiff_min); 577252504Slstewart 578252504Slstewart /* Probabilistic backoff with respect to gradient. */ 579252504Slstewart if (slowstart && qdiff_min > 0) 580252504Slstewart congestion = prob_backoff(qdiff_min); 581252504Slstewart else if (cdg_data->min_qtrend > 0) 582252504Slstewart congestion = prob_backoff(cdg_data->min_qtrend); 583252504Slstewart else if (slowstart && qdiff_max > 0) 584252504Slstewart congestion = prob_backoff(qdiff_max); 585252504Slstewart else if (cdg_data->max_qtrend > 0) 586252504Slstewart congestion = prob_backoff(cdg_data->max_qtrend); 587252504Slstewart 588252504Slstewart /* Update estimate of queue state. */ 589252504Slstewart if (cdg_data->min_qtrend > 0 && 590252504Slstewart cdg_data->max_qtrend <= 0) { 591252504Slstewart cdg_data->queue_state = CDG_Q_FULL; 592252504Slstewart } else if (cdg_data->min_qtrend >= 0 && 593252504Slstewart cdg_data->max_qtrend < 0) { 594252504Slstewart cdg_data->queue_state = CDG_Q_EMPTY; 595252504Slstewart cdg_data->shadow_w = 0; 596252504Slstewart } else if (cdg_data->min_qtrend > 0 && 597252504Slstewart cdg_data->max_qtrend > 0) { 598252504Slstewart cdg_data->queue_state = CDG_Q_RISING; 599252504Slstewart } else if (cdg_data->min_qtrend < 0 && 600252504Slstewart cdg_data->max_qtrend < 0) { 601252504Slstewart cdg_data->queue_state = CDG_Q_FALLING; 602252504Slstewart } 603252504Slstewart 604252504Slstewart if (cdg_data->min_qtrend < 0 || 605252504Slstewart cdg_data->max_qtrend < 0) 606252504Slstewart cdg_data->consec_cong_cnt = 0; 607252504Slstewart } 608252504Slstewart 609252504Slstewart cdg_data->minrtt_in_prevrtt = cdg_data->minrtt_in_rtt; 610252504Slstewart cdg_data->minrtt_in_rtt = INT_MAX; 611252504Slstewart cdg_data->maxrtt_in_prevrtt = cdg_data->maxrtt_in_rtt; 612252504Slstewart cdg_data->maxrtt_in_rtt = 0; 613252504Slstewart e_t->flags &= ~ERTT_NEW_MEASUREMENT; 614252504Slstewart } 615252504Slstewart 616252504Slstewart if (congestion) { 617252504Slstewart cdg_data->consec_cong_cnt++; 618252504Slstewart if (!IN_RECOVERY(CCV(ccv, t_flags))) { 619252504Slstewart if (cdg_data->consec_cong_cnt <= V_cdg_consec_cong) 620252504Slstewart cdg_cong_signal(ccv, CC_CDG_DELAY); 621252504Slstewart else 622252504Slstewart /* 623252504Slstewart * We have been backing off but the queue is not 624252504Slstewart * falling. Assume we are competing with 625252504Slstewart * loss-based flows and don't back off for the 626252504Slstewart * next V_cdg_hold_backoff RTT periods. 627252504Slstewart */ 628252504Slstewart if (cdg_data->consec_cong_cnt >= 629252504Slstewart V_cdg_consec_cong + V_cdg_hold_backoff) 630252504Slstewart cdg_data->consec_cong_cnt = 0; 631252504Slstewart 632252504Slstewart /* Won't see effect until 2nd RTT. */ 633252504Slstewart cdg_data->maxrtt_in_prevrtt = 0; 634252504Slstewart /* 635252504Slstewart * Resync shadow window in case we are competing with a 636252504Slstewart * loss based flow 637252504Slstewart */ 638252504Slstewart cdg_data->shadow_w = ulmax(CCV(ccv, snd_cwnd), 639252504Slstewart cdg_data->shadow_w); 640252504Slstewart } 641252504Slstewart } else if (ack_type == CC_ACK) 642252504Slstewart cdg_window_increase(ccv, new_measurement); 643252504Slstewart} 644252504Slstewart 645252504Slstewart/* When a vnet is created and being initialised, init the per-stack CDG vars. */ 646252504SlstewartVNET_SYSINIT(cdg_init_vnet, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, 647252504Slstewart cdg_init_vnet, NULL); 648252504Slstewart 649252504SlstewartSYSCTL_DECL(_net_inet_tcp_cc_cdg); 650252504SlstewartSYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, cdg, CTLFLAG_RW, NULL, 651252504Slstewart "CAIA delay-gradient congestion control related settings"); 652252504Slstewart 653252504SlstewartSYSCTL_STRING(_net_inet_tcp_cc_cdg, OID_AUTO, version, 654252504Slstewart CTLFLAG_RD, CDG_VERSION, sizeof(CDG_VERSION) - 1, 655252504Slstewart "Current algorithm/implementation version number"); 656252504Slstewart 657252504SlstewartSYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, alpha_inc, 658252504Slstewart CTLFLAG_RW, &VNET_NAME(cdg_alpha_inc), 0, 659252504Slstewart "Increment the window increase factor alpha by 1 MSS segment every " 660252504Slstewart "alpha_inc RTTs during congestion avoidance mode."); 661252504Slstewart 662252504SlstewartSYSCTL_VNET_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, beta_delay, 663252504Slstewart CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(cdg_beta_delay), 70, 664252504Slstewart &cdg_beta_handler, "IU", 665252504Slstewart "Delay-based window decrease factor as a percentage " 666252504Slstewart "(on delay-based backoff, w = w * beta_delay / 100)"); 667252504Slstewart 668252504SlstewartSYSCTL_VNET_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, beta_loss, 669252504Slstewart CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(cdg_beta_loss), 50, 670252504Slstewart &cdg_beta_handler, "IU", 671252504Slstewart "Loss-based window decrease factor as a percentage " 672252504Slstewart "(on loss-based backoff, w = w * beta_loss / 100)"); 673252504Slstewart 674252504SlstewartSYSCTL_VNET_PROC(_net_inet_tcp_cc_cdg, OID_AUTO, exp_backoff_scale, 675252504Slstewart CTLTYPE_UINT|CTLFLAG_RW, &VNET_NAME(cdg_exp_backoff_scale), 2, 676252504Slstewart &cdg_exp_backoff_scale_handler, "IU", 677252504Slstewart "Scaling parameter for the probabilistic exponential backoff"); 678252504Slstewart 679252504SlstewartSYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, smoothing_factor, 680252504Slstewart CTLFLAG_RW, &VNET_NAME(cdg_smoothing_factor), 8, 681252504Slstewart "Number of samples used for moving average smoothing (0 = no smoothing)"); 682252504Slstewart 683252504SlstewartSYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compete_consec_cong, 684252504Slstewart CTLFLAG_RW, &VNET_NAME(cdg_consec_cong), 5, 685252504Slstewart "Number of consecutive delay-gradient based congestion episodes which will " 686252504Slstewart "trigger loss based CC compatibility"); 687252504Slstewart 688252504SlstewartSYSCTL_VNET_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compete_hold_backoff, 689252504Slstewart CTLFLAG_RW, &VNET_NAME(cdg_hold_backoff), 5, 690252504Slstewart "Number of consecutive delay-gradient based congestion episodes to hold " 691252504Slstewart "the window backoff for loss based CC compatibility"); 692252504Slstewart 693252504SlstewartDECLARE_CC_MODULE(cdg, &cdg_cc_algo); 694252504Slstewart 695252504SlstewartMODULE_DEPEND(cdg, ertt, 1, 1, 1); 696