1215166Slstewart/*- 2215166Slstewart * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994, 1995 3215166Slstewart * The Regents of the University of California. 4215166Slstewart * Copyright (c) 2007-2008,2010 5215166Slstewart * Swinburne University of Technology, Melbourne, Australia. 6215166Slstewart * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 7215166Slstewart * Copyright (c) 2010 The FreeBSD Foundation 8215166Slstewart * All rights reserved. 9215166Slstewart * 10215166Slstewart * This software was developed at the Centre for Advanced Internet 11220560Slstewart * Architectures, Swinburne University of Technology, by Lawrence Stewart, James 12220560Slstewart * Healy and David Hayes, made possible in part by a grant from the Cisco 13220560Slstewart * University Research Program Fund at Community Foundation Silicon Valley. 14215166Slstewart * 15215166Slstewart * Portions of this software were developed at the Centre for Advanced 16215166Slstewart * Internet Architectures, Swinburne University of Technology, Melbourne, 17215166Slstewart * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 18215166Slstewart * 19215166Slstewart * Redistribution and use in source and binary forms, with or without 20215166Slstewart * modification, are permitted provided that the following conditions 21215166Slstewart * are met: 22215166Slstewart * 1. Redistributions of source code must retain the above copyright 23215166Slstewart * notice, this list of conditions and the following disclaimer. 24215166Slstewart * 2. Redistributions in binary form must reproduce the above copyright 25215166Slstewart * notice, this list of conditions and the following disclaimer in the 26215166Slstewart * documentation and/or other materials provided with the distribution. 27215166Slstewart * 28215166Slstewart * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 29215166Slstewart * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30215166Slstewart * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31215166Slstewart * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 32215166Slstewart * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33215166Slstewart * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34215166Slstewart * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35215166Slstewart * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36215166Slstewart * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37215166Slstewart * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38215166Slstewart * SUCH DAMAGE. 39215166Slstewart */ 40215166Slstewart 41215166Slstewart/* 42215166Slstewart * This software was first released in 2007 by James Healy and Lawrence Stewart 43220560Slstewart * whilst working on the NewTCP research project at Swinburne University of 44220560Slstewart * Technology's Centre for Advanced Internet Architectures, Melbourne, 45220560Slstewart * Australia, which was made possible in part by a grant from the Cisco 46220560Slstewart * University Research Program Fund at Community Foundation Silicon Valley. 47220560Slstewart * More details are available at: 48215166Slstewart * http://caia.swin.edu.au/urp/newtcp/ 49215166Slstewart */ 50215166Slstewart 51215166Slstewart#include <sys/cdefs.h> 52215166Slstewart__FBSDID("$FreeBSD$"); 53215166Slstewart 54215166Slstewart#include <sys/param.h> 55215166Slstewart#include <sys/kernel.h> 56216107Slstewart#include <sys/malloc.h> 57215166Slstewart#include <sys/module.h> 58215166Slstewart#include <sys/socket.h> 59215166Slstewart#include <sys/socketvar.h> 60215166Slstewart#include <sys/sysctl.h> 61216107Slstewart#include <sys/systm.h> 62215166Slstewart 63216107Slstewart#include <net/vnet.h> 64215166Slstewart 65215166Slstewart#include <netinet/cc.h> 66215166Slstewart#include <netinet/tcp_seq.h> 67215166Slstewart#include <netinet/tcp_var.h> 68215166Slstewart 69215166Slstewart#include <netinet/cc/cc_module.h> 70215166Slstewart 71216107Slstewartstatic void newreno_ack_received(struct cc_var *ccv, uint16_t type); 72216107Slstewartstatic void newreno_after_idle(struct cc_var *ccv); 73216107Slstewartstatic void newreno_cong_signal(struct cc_var *ccv, uint32_t type); 74216107Slstewartstatic void newreno_post_recovery(struct cc_var *ccv); 75215166Slstewart 76215166Slstewartstruct cc_algo newreno_cc_algo = { 77215166Slstewart .name = "newreno", 78215166Slstewart .ack_received = newreno_ack_received, 79216107Slstewart .after_idle = newreno_after_idle, 80215166Slstewart .cong_signal = newreno_cong_signal, 81215166Slstewart .post_recovery = newreno_post_recovery, 82215166Slstewart}; 83215166Slstewart 84216107Slstewartstatic void 85215166Slstewartnewreno_ack_received(struct cc_var *ccv, uint16_t type) 86215166Slstewart{ 87215166Slstewart if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && 88215166Slstewart (ccv->flags & CCF_CWND_LIMITED)) { 89215166Slstewart u_int cw = CCV(ccv, snd_cwnd); 90215166Slstewart u_int incr = CCV(ccv, t_maxseg); 91215166Slstewart 92215166Slstewart /* 93215166Slstewart * Regular in-order ACK, open the congestion window. 94215166Slstewart * Method depends on which congestion control state we're 95215166Slstewart * in (slow start or cong avoid) and if ABC (RFC 3465) is 96215166Slstewart * enabled. 97215166Slstewart * 98215166Slstewart * slow start: cwnd <= ssthresh 99215166Slstewart * cong avoid: cwnd > ssthresh 100215166Slstewart * 101215166Slstewart * slow start and ABC (RFC 3465): 102215166Slstewart * Grow cwnd exponentially by the amount of data 103215166Slstewart * ACKed capping the max increment per ACK to 104215166Slstewart * (abc_l_var * maxseg) bytes. 105215166Slstewart * 106215166Slstewart * slow start without ABC (RFC 5681): 107215166Slstewart * Grow cwnd exponentially by maxseg per ACK. 108215166Slstewart * 109215166Slstewart * cong avoid and ABC (RFC 3465): 110215166Slstewart * Grow cwnd linearly by maxseg per RTT for each 111215166Slstewart * cwnd worth of ACKed data. 112215166Slstewart * 113215166Slstewart * cong avoid without ABC (RFC 5681): 114215166Slstewart * Grow cwnd linearly by approximately maxseg per RTT using 115215166Slstewart * maxseg^2 / cwnd per ACK as the increment. 116215166Slstewart * If cwnd > maxseg^2, fix the cwnd increment at 1 byte to 117215166Slstewart * avoid capping cwnd. 118215166Slstewart */ 119215166Slstewart if (cw > CCV(ccv, snd_ssthresh)) { 120215166Slstewart if (V_tcp_do_rfc3465) { 121215166Slstewart if (ccv->flags & CCF_ABC_SENTAWND) 122215166Slstewart ccv->flags &= ~CCF_ABC_SENTAWND; 123215166Slstewart else 124215166Slstewart incr = 0; 125215166Slstewart } else 126215166Slstewart incr = max((incr * incr / cw), 1); 127215166Slstewart } else if (V_tcp_do_rfc3465) { 128215166Slstewart /* 129215166Slstewart * In slow-start with ABC enabled and no RTO in sight? 130215166Slstewart * (Must not use abc_l_var > 1 if slow starting after 131215166Slstewart * an RTO. On RTO, snd_nxt = snd_una, so the 132215166Slstewart * snd_nxt == snd_max check is sufficient to 133215166Slstewart * handle this). 134215166Slstewart * 135215166Slstewart * XXXLAS: Find a way to signal SS after RTO that 136215166Slstewart * doesn't rely on tcpcb vars. 137215166Slstewart */ 138215166Slstewart if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max)) 139215166Slstewart incr = min(ccv->bytes_this_ack, 140215166Slstewart V_tcp_abc_l_var * CCV(ccv, t_maxseg)); 141215166Slstewart else 142215166Slstewart incr = min(ccv->bytes_this_ack, CCV(ccv, t_maxseg)); 143215166Slstewart } 144215166Slstewart /* ABC is on by default, so incr equals 0 frequently. */ 145215166Slstewart if (incr > 0) 146215166Slstewart CCV(ccv, snd_cwnd) = min(cw + incr, 147215166Slstewart TCP_MAXWIN << CCV(ccv, snd_scale)); 148215166Slstewart } 149215166Slstewart} 150215166Slstewart 151216107Slstewartstatic void 152216107Slstewartnewreno_after_idle(struct cc_var *ccv) 153216107Slstewart{ 154216107Slstewart int rw; 155216107Slstewart 156216107Slstewart /* 157216107Slstewart * If we've been idle for more than one retransmit timeout the old 158216107Slstewart * congestion window is no longer current and we have to reduce it to 159216107Slstewart * the restart window before we can transmit again. 160216107Slstewart * 161216107Slstewart * The restart window is the initial window or the last CWND, whichever 162216107Slstewart * is smaller. 163216107Slstewart * 164216107Slstewart * This is done to prevent us from flooding the path with a full CWND at 165216107Slstewart * wirespeed, overloading router and switch buffers along the way. 166216107Slstewart * 167216107Slstewart * See RFC5681 Section 4.1. "Restarting Idle Connections". 168216107Slstewart */ 169216107Slstewart if (V_tcp_do_rfc3390) 170216107Slstewart rw = min(4 * CCV(ccv, t_maxseg), 171216107Slstewart max(2 * CCV(ccv, t_maxseg), 4380)); 172216107Slstewart else 173216107Slstewart rw = CCV(ccv, t_maxseg) * 2; 174216107Slstewart 175216107Slstewart CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd)); 176216107Slstewart} 177216107Slstewart 178215166Slstewart/* 179216107Slstewart * Perform any necessary tasks before we enter congestion recovery. 180215166Slstewart */ 181216107Slstewartstatic void 182215166Slstewartnewreno_cong_signal(struct cc_var *ccv, uint32_t type) 183215166Slstewart{ 184215166Slstewart u_int win; 185215166Slstewart 186218167Slstewart /* Catch algos which mistakenly leak private signal types. */ 187218167Slstewart KASSERT((type & CC_SIGPRIVMASK) == 0, 188218167Slstewart ("%s: congestion signal type 0x%08x is private\n", __func__, type)); 189218167Slstewart 190215166Slstewart win = max(CCV(ccv, snd_cwnd) / 2 / CCV(ccv, t_maxseg), 2) * 191215166Slstewart CCV(ccv, t_maxseg); 192215166Slstewart 193215166Slstewart switch (type) { 194215166Slstewart case CC_NDUPACK: 195215166Slstewart if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { 196215166Slstewart if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) 197215166Slstewart CCV(ccv, snd_ssthresh) = win; 198215166Slstewart ENTER_RECOVERY(CCV(ccv, t_flags)); 199215166Slstewart } 200215166Slstewart break; 201215166Slstewart case CC_ECN: 202215166Slstewart if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { 203215166Slstewart CCV(ccv, snd_ssthresh) = win; 204215166Slstewart CCV(ccv, snd_cwnd) = win; 205215166Slstewart ENTER_CONGRECOVERY(CCV(ccv, t_flags)); 206215166Slstewart } 207215166Slstewart break; 208215166Slstewart } 209215166Slstewart} 210215166Slstewart 211215166Slstewart/* 212216107Slstewart * Perform any necessary tasks before we exit congestion recovery. 213215166Slstewart */ 214216107Slstewartstatic void 215215166Slstewartnewreno_post_recovery(struct cc_var *ccv) 216215166Slstewart{ 217293711Shiren int pipe; 218293711Shiren pipe = 0; 219293711Shiren 220215166Slstewart if (IN_FASTRECOVERY(CCV(ccv, t_flags))) { 221215166Slstewart /* 222215166Slstewart * Fast recovery will conclude after returning from this 223215166Slstewart * function. Window inflation should have left us with 224215166Slstewart * approximately snd_ssthresh outstanding data. But in case we 225215166Slstewart * would be inclined to send a burst, better to do it via the 226215166Slstewart * slow start mechanism. 227215166Slstewart * 228215166Slstewart * XXXLAS: Find a way to do this without needing curack 229215166Slstewart */ 230293711Shiren if (V_tcp_do_rfc6675_pipe) 231293711Shiren pipe = tcp_compute_pipe(ccv->ccvc.tcp); 232215166Slstewart else 233293711Shiren pipe = CCV(ccv, snd_max) - ccv->curack; 234293711Shiren 235293711Shiren if (pipe < CCV(ccv, snd_ssthresh)) 236293711Shiren CCV(ccv, snd_cwnd) = pipe + CCV(ccv, t_maxseg); 237293711Shiren else 238215166Slstewart CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); 239215166Slstewart } 240215166Slstewart} 241215166Slstewart 242216105Slstewart 243215166SlstewartDECLARE_CC_MODULE(newreno, &newreno_cc_algo); 244