1218152Slstewart/*- 2218152Slstewart * Copyright (c) 2009-2010 3218152Slstewart * Swinburne University of Technology, Melbourne, Australia 4218152Slstewart * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org> 5218152Slstewart * Copyright (c) 2010-2011 The FreeBSD Foundation 6218152Slstewart * All rights reserved. 7218152Slstewart * 8218152Slstewart * This software was developed at the Centre for Advanced Internet 9220560Slstewart * Architectures, Swinburne University of Technology, by David Hayes and 10220560Slstewart * Lawrence Stewart, made possible in part by a grant from the Cisco University 11220560Slstewart * Research Program Fund at Community Foundation Silicon Valley. 12218152Slstewart * 13218152Slstewart * Portions of this software were developed at the Centre for Advanced Internet 14218152Slstewart * Architectures, Swinburne University of Technology, Melbourne, Australia by 15218152Slstewart * David Hayes under sponsorship from the FreeBSD Foundation. 16218152Slstewart * 17218152Slstewart * Redistribution and use in source and binary forms, with or without 18218152Slstewart * modification, are permitted provided that the following conditions 19218152Slstewart * are met: 20218152Slstewart * 1. Redistributions of source code must retain the above copyright 21218152Slstewart * notice, this list of conditions and the following disclaimer. 22218152Slstewart * 2. Redistributions in binary form must reproduce the above copyright 23218152Slstewart * notice, this list of conditions and the following disclaimer in the 24218152Slstewart * documentation and/or other materials provided with the distribution. 25218152Slstewart * 26218152Slstewart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27218152Slstewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28218152Slstewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29218152Slstewart * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30218152Slstewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31218152Slstewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32218152Slstewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33218152Slstewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34218152Slstewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35218152Slstewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36218152Slstewart * SUCH DAMAGE. 37218152Slstewart */ 38218152Slstewart 39218152Slstewart/* 40218152Slstewart * An implementation of the Vegas congestion control algorithm for FreeBSD, 41218152Slstewart * based on L. S. Brakmo and L. L. Peterson, "TCP Vegas: end to end congestion 42218152Slstewart * avoidance on a global internet", IEEE J. Sel. Areas Commun., vol. 13, no. 8, 43218152Slstewart * pp. 1465-1480, Oct. 1995. The original Vegas duplicate ack policy has not 44218156Slstewart * been implemented, since clock ticks are not as coarse as they were (i.e. 45218152Slstewart * 500ms) when Vegas was designed. Also, packets are timed once per RTT as in 46218152Slstewart * the original paper. 47218152Slstewart * 48218152Slstewart * Originally released as part of the NewTCP research project at Swinburne 49220560Slstewart * University of Technology's Centre for Advanced Internet Architectures, 50220560Slstewart * Melbourne, Australia, which was made possible in part by a grant from the 51220560Slstewart * Cisco University Research Program Fund at Community Foundation Silicon 52220560Slstewart * Valley. More details are available at: 53218152Slstewart * http://caia.swin.edu.au/urp/newtcp/ 54218152Slstewart */ 55218152Slstewart 56218152Slstewart#include <sys/cdefs.h> 57218152Slstewart__FBSDID("$FreeBSD: stable/11/sys/netinet/cc/cc_vegas.c 342189 2018-12-18 09:16:04Z brooks $"); 58218152Slstewart 59218152Slstewart#include <sys/param.h> 60218152Slstewart#include <sys/kernel.h> 61218152Slstewart#include <sys/khelp.h> 62218152Slstewart#include <sys/malloc.h> 63218152Slstewart#include <sys/module.h> 64218152Slstewart#include <sys/queue.h> 65218152Slstewart#include <sys/socket.h> 66218152Slstewart#include <sys/socketvar.h> 67218152Slstewart#include <sys/sysctl.h> 68218152Slstewart#include <sys/systm.h> 69218152Slstewart 70218152Slstewart#include <net/vnet.h> 71218152Slstewart 72294535Sglebius#include <netinet/tcp.h> 73218152Slstewart#include <netinet/tcp_timer.h> 74218152Slstewart#include <netinet/tcp_var.h> 75294931Sglebius#include <netinet/cc/cc.h> 76218152Slstewart#include <netinet/cc/cc_module.h> 77218152Slstewart 78218152Slstewart#include <netinet/khelp/h_ertt.h> 79218152Slstewart 80218152Slstewart/* 81218152Slstewart * Private signal type for rate based congestion signal. 82218152Slstewart * See <netinet/cc.h> for appropriate bit-range to use for private signals. 83218152Slstewart */ 84218152Slstewart#define CC_VEGAS_RATE 0x01000000 85218152Slstewart 86218152Slstewartstatic void vegas_ack_received(struct cc_var *ccv, uint16_t ack_type); 87218152Slstewartstatic void vegas_cb_destroy(struct cc_var *ccv); 88218152Slstewartstatic int vegas_cb_init(struct cc_var *ccv); 89218152Slstewartstatic void vegas_cong_signal(struct cc_var *ccv, uint32_t signal_type); 90218152Slstewartstatic void vegas_conn_init(struct cc_var *ccv); 91218152Slstewartstatic int vegas_mod_init(void); 92218152Slstewart 93218152Slstewartstruct vegas { 94218152Slstewart int slow_start_toggle; 95218152Slstewart}; 96218152Slstewart 97218152Slstewartstatic int32_t ertt_id; 98218152Slstewart 99218152Slstewartstatic VNET_DEFINE(uint32_t, vegas_alpha) = 1; 100218152Slstewartstatic VNET_DEFINE(uint32_t, vegas_beta) = 3; 101218152Slstewart#define V_vegas_alpha VNET(vegas_alpha) 102218152Slstewart#define V_vegas_beta VNET(vegas_beta) 103218152Slstewart 104220592Spluknetstatic MALLOC_DEFINE(M_VEGAS, "vegas data", 105218152Slstewart "Per connection data required for the Vegas congestion control algorithm"); 106218152Slstewart 107218152Slstewartstruct cc_algo vegas_cc_algo = { 108218152Slstewart .name = "vegas", 109218152Slstewart .ack_received = vegas_ack_received, 110218152Slstewart .cb_destroy = vegas_cb_destroy, 111218152Slstewart .cb_init = vegas_cb_init, 112218152Slstewart .cong_signal = vegas_cong_signal, 113218152Slstewart .conn_init = vegas_conn_init, 114218152Slstewart .mod_init = vegas_mod_init 115218152Slstewart}; 116218152Slstewart 117218152Slstewart/* 118218152Slstewart * The vegas window adjustment is done once every RTT, as indicated by the 119218152Slstewart * ERTT_NEW_MEASUREMENT flag. This flag is reset once the new measurment data 120218152Slstewart * has been used. 121218152Slstewart */ 122218152Slstewartstatic void 123218152Slstewartvegas_ack_received(struct cc_var *ccv, uint16_t ack_type) 124218152Slstewart{ 125218152Slstewart struct ertt *e_t; 126218152Slstewart struct vegas *vegas_data; 127218152Slstewart long actual_tx_rate, expected_tx_rate, ndiff; 128218152Slstewart 129218152Slstewart e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); 130218152Slstewart vegas_data = ccv->cc_data; 131218152Slstewart 132218152Slstewart if (e_t->flags & ERTT_NEW_MEASUREMENT) { /* Once per RTT. */ 133218152Slstewart if (e_t->minrtt && e_t->markedpkt_rtt) { 134218152Slstewart expected_tx_rate = e_t->marked_snd_cwnd / e_t->minrtt; 135218152Slstewart actual_tx_rate = e_t->bytes_tx_in_marked_rtt / 136218152Slstewart e_t->markedpkt_rtt; 137218152Slstewart ndiff = (expected_tx_rate - actual_tx_rate) * 138218152Slstewart e_t->minrtt / CCV(ccv, t_maxseg); 139218152Slstewart 140218152Slstewart if (ndiff < V_vegas_alpha) { 141218152Slstewart if (CCV(ccv, snd_cwnd) <= 142218152Slstewart CCV(ccv, snd_ssthresh)) { 143218152Slstewart vegas_data->slow_start_toggle = 144218152Slstewart vegas_data->slow_start_toggle ? 145218152Slstewart 0 : 1; 146218152Slstewart } else { 147218152Slstewart vegas_data->slow_start_toggle = 0; 148218152Slstewart CCV(ccv, snd_cwnd) = 149218152Slstewart min(CCV(ccv, snd_cwnd) + 150218152Slstewart CCV(ccv, t_maxseg), 151218152Slstewart TCP_MAXWIN << CCV(ccv, snd_scale)); 152218152Slstewart } 153218152Slstewart } else if (ndiff > V_vegas_beta) { 154218152Slstewart /* Rate-based congestion. */ 155218152Slstewart vegas_cong_signal(ccv, CC_VEGAS_RATE); 156218152Slstewart vegas_data->slow_start_toggle = 0; 157218152Slstewart } 158218152Slstewart } 159218152Slstewart e_t->flags &= ~ERTT_NEW_MEASUREMENT; 160218152Slstewart } 161218152Slstewart 162218152Slstewart if (vegas_data->slow_start_toggle) 163218152Slstewart newreno_cc_algo.ack_received(ccv, ack_type); 164218152Slstewart} 165218152Slstewart 166218152Slstewartstatic void 167218152Slstewartvegas_cb_destroy(struct cc_var *ccv) 168218152Slstewart{ 169218152Slstewart 170218152Slstewart if (ccv->cc_data != NULL) 171218152Slstewart free(ccv->cc_data, M_VEGAS); 172218152Slstewart} 173218152Slstewart 174218152Slstewartstatic int 175218152Slstewartvegas_cb_init(struct cc_var *ccv) 176218152Slstewart{ 177218152Slstewart struct vegas *vegas_data; 178218152Slstewart 179218152Slstewart vegas_data = malloc(sizeof(struct vegas), M_VEGAS, M_NOWAIT); 180218152Slstewart 181218152Slstewart if (vegas_data == NULL) 182218152Slstewart return (ENOMEM); 183218152Slstewart 184218152Slstewart vegas_data->slow_start_toggle = 1; 185218152Slstewart ccv->cc_data = vegas_data; 186218152Slstewart 187218152Slstewart return (0); 188218152Slstewart} 189218152Slstewart 190218152Slstewart/* 191218152Slstewart * If congestion has been triggered triggered by the Vegas measured rates, it is 192218152Slstewart * handled here, otherwise it falls back to newreno's congestion handling. 193218152Slstewart */ 194218152Slstewartstatic void 195218152Slstewartvegas_cong_signal(struct cc_var *ccv, uint32_t signal_type) 196218152Slstewart{ 197218152Slstewart struct vegas *vegas_data; 198218152Slstewart int presignalrecov; 199218152Slstewart 200218152Slstewart vegas_data = ccv->cc_data; 201218152Slstewart 202218152Slstewart if (IN_RECOVERY(CCV(ccv, t_flags))) 203218152Slstewart presignalrecov = 1; 204218152Slstewart else 205218152Slstewart presignalrecov = 0; 206218152Slstewart 207218152Slstewart switch(signal_type) { 208218152Slstewart case CC_VEGAS_RATE: 209218152Slstewart if (!IN_RECOVERY(CCV(ccv, t_flags))) { 210218152Slstewart CCV(ccv, snd_cwnd) = max(2 * CCV(ccv, t_maxseg), 211218152Slstewart CCV(ccv, snd_cwnd) - CCV(ccv, t_maxseg)); 212218152Slstewart if (CCV(ccv, snd_cwnd) < CCV(ccv, snd_ssthresh)) 213218152Slstewart /* Exit slow start. */ 214218152Slstewart CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd); 215218152Slstewart } 216218152Slstewart break; 217218152Slstewart 218218152Slstewart default: 219218152Slstewart newreno_cc_algo.cong_signal(ccv, signal_type); 220218152Slstewart } 221218152Slstewart 222218152Slstewart if (IN_RECOVERY(CCV(ccv, t_flags)) && !presignalrecov) 223218152Slstewart vegas_data->slow_start_toggle = 224218152Slstewart (CCV(ccv, snd_cwnd) < CCV(ccv, snd_ssthresh)) ? 1 : 0; 225218152Slstewart} 226218152Slstewart 227218152Slstewartstatic void 228218152Slstewartvegas_conn_init(struct cc_var *ccv) 229218152Slstewart{ 230218152Slstewart struct vegas *vegas_data; 231218152Slstewart 232218152Slstewart vegas_data = ccv->cc_data; 233218152Slstewart vegas_data->slow_start_toggle = 1; 234218152Slstewart} 235218152Slstewart 236218152Slstewartstatic int 237218152Slstewartvegas_mod_init(void) 238218152Slstewart{ 239218152Slstewart 240218152Slstewart ertt_id = khelp_get_id("ertt"); 241218152Slstewart if (ertt_id <= 0) { 242218152Slstewart printf("%s: h_ertt module not found\n", __func__); 243218152Slstewart return (ENOENT); 244218152Slstewart } 245218152Slstewart 246218152Slstewart vegas_cc_algo.after_idle = newreno_cc_algo.after_idle; 247218152Slstewart vegas_cc_algo.post_recovery = newreno_cc_algo.post_recovery; 248218152Slstewart 249218152Slstewart return (0); 250218152Slstewart} 251218152Slstewart 252218152Slstewartstatic int 253218152Slstewartvegas_alpha_handler(SYSCTL_HANDLER_ARGS) 254218152Slstewart{ 255218152Slstewart int error; 256218152Slstewart uint32_t new; 257218152Slstewart 258218152Slstewart new = V_vegas_alpha; 259218152Slstewart error = sysctl_handle_int(oidp, &new, 0, req); 260218152Slstewart if (error == 0 && req->newptr != NULL) { 261342189Sbrooks if (new == 0 || new > V_vegas_beta) 262218152Slstewart error = EINVAL; 263218152Slstewart else 264218152Slstewart V_vegas_alpha = new; 265218152Slstewart } 266218152Slstewart 267218152Slstewart return (error); 268218152Slstewart} 269218152Slstewart 270218152Slstewartstatic int 271218152Slstewartvegas_beta_handler(SYSCTL_HANDLER_ARGS) 272218152Slstewart{ 273218152Slstewart int error; 274218152Slstewart uint32_t new; 275218152Slstewart 276218152Slstewart new = V_vegas_beta; 277218152Slstewart error = sysctl_handle_int(oidp, &new, 0, req); 278218152Slstewart if (error == 0 && req->newptr != NULL) { 279342189Sbrooks if (new == 0 || new < V_vegas_alpha) 280218152Slstewart error = EINVAL; 281218152Slstewart else 282218152Slstewart V_vegas_beta = new; 283218152Slstewart } 284218152Slstewart 285218152Slstewart return (error); 286218152Slstewart} 287218152Slstewart 288218152SlstewartSYSCTL_DECL(_net_inet_tcp_cc_vegas); 289218152SlstewartSYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, vegas, CTLFLAG_RW, NULL, 290218152Slstewart "Vegas related settings"); 291218152Slstewart 292274225SglebiusSYSCTL_PROC(_net_inet_tcp_cc_vegas, OID_AUTO, alpha, 293274225Sglebius CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 294274225Sglebius &VNET_NAME(vegas_alpha), 1, &vegas_alpha_handler, "IU", 295274225Sglebius "vegas alpha, specified as number of \"buffers\" (0 < alpha < beta)"); 296218152Slstewart 297274225SglebiusSYSCTL_PROC(_net_inet_tcp_cc_vegas, OID_AUTO, beta, 298274225Sglebius CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, 299274225Sglebius &VNET_NAME(vegas_beta), 3, &vegas_beta_handler, "IU", 300274225Sglebius "vegas beta, specified as number of \"buffers\" (0 < alpha < beta)"); 301218152Slstewart 302218152SlstewartDECLARE_CC_MODULE(vegas, &vegas_cc_algo); 303218152SlstewartMODULE_DEPEND(vegas, ertt, 1, 1, 1); 304