cc_htcp.c revision 220560
1112158Sdas/*-
2112158Sdas * Copyright (c) 2007-2008
3112158Sdas * 	Swinburne University of Technology, Melbourne, Australia
4112158Sdas * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
5112158Sdas * Copyright (c) 2010 The FreeBSD Foundation
6112158Sdas * All rights reserved.
7112158Sdas *
8112158Sdas * This software was developed at the Centre for Advanced Internet
9112158Sdas * Architectures, Swinburne University of Technology, by Lawrence Stewart and
10112158Sdas * James Healy, made possible in part by a grant from the Cisco University
11112158Sdas * Research Program Fund at Community Foundation Silicon Valley.
12112158Sdas *
13112158Sdas * Portions of this software were developed at the Centre for Advanced
14112158Sdas * Internet Architectures, Swinburne University of Technology, Melbourne,
15112158Sdas * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
16112158Sdas *
17112158Sdas * Redistribution and use in source and binary forms, with or without
18112158Sdas * modification, are permitted provided that the following conditions
19112158Sdas * are met:
20112158Sdas * 1. Redistributions of source code must retain the above copyright
21112158Sdas *    notice, this list of conditions and the following disclaimer.
22112158Sdas * 2. Redistributions in binary form must reproduce the above copyright
23112158Sdas *    notice, this list of conditions and the following disclaimer in the
24112158Sdas *    documentation and/or other materials provided with the distribution.
25112158Sdas *
26112158Sdas * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27112158Sdas * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28112158Sdas * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29112158Sdas * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30112158Sdas * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31112158Sdas * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32112158Sdas * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33112158Sdas * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34112158Sdas * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35112158Sdas * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36112158Sdas * SUCH DAMAGE.
37112158Sdas */
38112158Sdas
39112158Sdas/*
40112158Sdas * An implementation of the H-TCP congestion control algorithm for FreeBSD,
41112158Sdas * based on the Internet Draft "draft-leith-tcp-htcp-06.txt" by Leith and
42112158Sdas * Shorten. Originally released as part of the NewTCP research project at
43112158Sdas * Swinburne University of Technology's Centre for Advanced Internet
44112158Sdas * Architectures, Melbourne, Australia, which was made possible in part by a
45112158Sdas * grant from the Cisco University Research Program Fund at Community Foundation
46112158Sdas * Silicon Valley. More details are available at:
47112158Sdas *   http://caia.swin.edu.au/urp/newtcp/
48112158Sdas */
49112158Sdas
50112158Sdas#include <sys/cdefs.h>
51112158Sdas__FBSDID("$FreeBSD: head/sys/netinet/cc/cc_htcp.c 220560 2011-04-12 08:13:18Z lstewart $");
52112158Sdas
53112158Sdas#include <sys/param.h>
54112158Sdas#include <sys/kernel.h>
55112158Sdas#include <sys/limits.h>
56112158Sdas#include <sys/malloc.h>
57112158Sdas#include <sys/module.h>
58112158Sdas#include <sys/socket.h>
59112158Sdas#include <sys/socketvar.h>
60112158Sdas#include <sys/sysctl.h>
61112158Sdas#include <sys/systm.h>
62112158Sdas
63112158Sdas#include <net/vnet.h>
64112158Sdas
65112158Sdas#include <netinet/cc.h>
66112158Sdas#include <netinet/tcp_seq.h>
67112158Sdas#include <netinet/tcp_timer.h>
68112158Sdas#include <netinet/tcp_var.h>
69112158Sdas
70112158Sdas#include <netinet/cc/cc_module.h>
71112158Sdas
72112158Sdas/* Fixed point math shifts. */
73112158Sdas#define HTCP_SHIFT 8
74112158Sdas#define HTCP_ALPHA_INC_SHIFT 4
75112158Sdas
76112158Sdas#define HTCP_INIT_ALPHA 1
77112158Sdas#define HTCP_DELTA_L hz		/* 1 sec in ticks. */
78112158Sdas#define HTCP_MINBETA 128	/* 0.5 << HTCP_SHIFT. */
79112158Sdas#define HTCP_MAXBETA 204	/* ~0.8 << HTCP_SHIFT. */
80112158Sdas#define HTCP_MINROWE 26		/* ~0.1 << HTCP_SHIFT. */
81112158Sdas#define HTCP_MAXROWE 512	/* 2 << HTCP_SHIFT. */
82112158Sdas
83112158Sdas/* RTT_ref (ms) used in the calculation of alpha if RTT scaling is enabled. */
84112158Sdas#define HTCP_RTT_REF 100
85112158Sdas
86112158Sdas/* Don't trust SRTT until this many samples have been taken. */
87112158Sdas#define HTCP_MIN_RTT_SAMPLES 8
88112158Sdas
89112158Sdas/*
90112158Sdas * HTCP_CALC_ALPHA performs a fixed point math calculation to determine the
91112158Sdas * value of alpha, based on the function defined in the HTCP spec.
92112158Sdas *
93112158Sdas * i.e. 1 + 10(delta - delta_l) + ((delta - delta_l) / 2) ^ 2
94112158Sdas *
95112158Sdas * "diff" is passed in to the macro as "delta - delta_l" and is expected to be
96112158Sdas * in units of ticks.
97112158Sdas *
98112158Sdas * The joyousnous of fixed point maths means our function implementation looks a
99112158Sdas * little funky...
100112158Sdas *
101112158Sdas * In order to maintain some precision in the calculations, a fixed point shift
102112158Sdas * HTCP_ALPHA_INC_SHIFT is used to ensure the integer divisions don't
103112158Sdas * truncate the results too badly.
104112158Sdas *
105112158Sdas * The "16" value is the "1" term in the alpha function shifted up by
106112158Sdas * HTCP_ALPHA_INC_SHIFT
107112158Sdas *
108112158Sdas * The "160" value is the "10" multiplier in the alpha function multiplied by
109112158Sdas * 2^HTCP_ALPHA_INC_SHIFT
110112158Sdas *
111112158Sdas * Specifying these as constants reduces the computations required. After
112112158Sdas * up-shifting all the terms in the function and performing the required
113112158Sdas * calculations, we down-shift the final result by HTCP_ALPHA_INC_SHIFT to
114112158Sdas * ensure it is back in the correct range.
115112158Sdas *
116112158Sdas * The "hz" terms are required as kernels can be configured to run with
117112158Sdas * different tick timers, which we have to adjust for in the alpha calculation
118112158Sdas * (which originally was defined in terms of seconds).
119112158Sdas *
120112158Sdas * We also have to be careful to constrain the value of diff such that it won't
121112158Sdas * overflow whilst performing the calculation. The middle term i.e. (160 * diff)
122112158Sdas * / hz is the limiting factor in the calculation. We must constrain diff to be
123112158Sdas * less than the max size of an int divided by the constant 160 figure
124112158Sdas * i.e. diff < INT_MAX / 160
125112158Sdas *
126112158Sdas * NB: Changing HTCP_ALPHA_INC_SHIFT will require you to MANUALLY update the
127112158Sdas * constants used in this function!
128112158Sdas */
129112158Sdas#define HTCP_CALC_ALPHA(diff) \
130112158Sdas((\
131112158Sdas	(16) + \
132112158Sdas	((160 * (diff)) / hz) + \
133112158Sdas	(((diff) / hz) * (((diff) << HTCP_ALPHA_INC_SHIFT) / (4 * hz))) \
134112158Sdas) >> HTCP_ALPHA_INC_SHIFT)
135112158Sdas
136112158Sdasstatic void	htcp_ack_received(struct cc_var *ccv, uint16_t type);
137112158Sdasstatic void	htcp_cb_destroy(struct cc_var *ccv);
138112158Sdasstatic int	htcp_cb_init(struct cc_var *ccv);
139112158Sdasstatic void	htcp_cong_signal(struct cc_var *ccv, uint32_t type);
140112158Sdasstatic int	htcp_mod_init(void);
141112158Sdasstatic void	htcp_post_recovery(struct cc_var *ccv);
142112158Sdasstatic void	htcp_recalc_alpha(struct cc_var *ccv);
143112158Sdasstatic void	htcp_recalc_beta(struct cc_var *ccv);
144112158Sdasstatic void	htcp_record_rtt(struct cc_var *ccv);
145112158Sdasstatic void	htcp_ssthresh_update(struct cc_var *ccv);
146112158Sdas
147112158Sdasstruct htcp {
148112158Sdas	/* cwnd before entering cong recovery. */
149112158Sdas	unsigned long	prev_cwnd;
150112158Sdas	/* cwnd additive increase parameter. */
151112158Sdas	int		alpha;
152112158Sdas	/* cwnd multiplicative decrease parameter. */
153112158Sdas	int		beta;
154112158Sdas	/* Largest rtt seen for the flow. */
155112158Sdas	int		maxrtt;
156112158Sdas	/* Shortest rtt seen for the flow. */
157112158Sdas	int		minrtt;
158112158Sdas	/* Time of last congestion event in ticks. */
159112158Sdas	int		t_last_cong;
160112158Sdas};
161112158Sdas
162112158Sdasstatic int htcp_rtt_ref;
163112158Sdas/*
164112158Sdas * The maximum number of ticks the value of diff can reach in
165112158Sdas * htcp_recalc_alpha() before alpha will stop increasing due to overflow.
166112158Sdas * See comment above HTCP_CALC_ALPHA for more info.
167112158Sdas */
168112158Sdasstatic int htcp_max_diff = INT_MAX / ((1 << HTCP_ALPHA_INC_SHIFT) * 10);
169112158Sdas
170112158Sdas/* Per-netstack vars. */
171112158Sdasstatic VNET_DEFINE(u_int, htcp_adaptive_backoff) = 0;
172112158Sdasstatic VNET_DEFINE(u_int, htcp_rtt_scaling) = 0;
173112158Sdas#define	V_htcp_adaptive_backoff    VNET(htcp_adaptive_backoff)
174112158Sdas#define	V_htcp_rtt_scaling    VNET(htcp_rtt_scaling)
175112158Sdas
176112158SdasMALLOC_DECLARE(M_HTCP);
177112158SdasMALLOC_DEFINE(M_HTCP, "htcp data",
178112158Sdas    "Per connection data required for the HTCP congestion control algorithm");
179112158Sdas
180112158Sdasstruct cc_algo htcp_cc_algo = {
181112158Sdas	.name = "htcp",
182112158Sdas	.ack_received = htcp_ack_received,
183112158Sdas	.cb_destroy = htcp_cb_destroy,
184112158Sdas	.cb_init = htcp_cb_init,
185112158Sdas	.cong_signal = htcp_cong_signal,
186112158Sdas	.mod_init = htcp_mod_init,
187112158Sdas	.post_recovery = htcp_post_recovery,
188112158Sdas};
189112158Sdas
190112158Sdasstatic void
191112158Sdashtcp_ack_received(struct cc_var *ccv, uint16_t type)
192112158Sdas{
193112158Sdas	struct htcp *htcp_data;
194112158Sdas
195112158Sdas	htcp_data = ccv->cc_data;
196112158Sdas	htcp_record_rtt(ccv);
197112158Sdas
198112158Sdas	/*
199112158Sdas	 * Regular ACK and we're not in cong/fast recovery and we're cwnd
200112158Sdas	 * limited and we're either not doing ABC or are slow starting or are
201112158Sdas	 * doing ABC and we've sent a cwnd's worth of bytes.
202112158Sdas	 */
203112158Sdas	if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) &&
204112158Sdas	    (ccv->flags & CCF_CWND_LIMITED) && (!V_tcp_do_rfc3465 ||
205112158Sdas	    CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
206112158Sdas	    (V_tcp_do_rfc3465 && ccv->flags & CCF_ABC_SENTAWND))) {
207112158Sdas		htcp_recalc_beta(ccv);
208112158Sdas		htcp_recalc_alpha(ccv);
209112158Sdas		/*
210112158Sdas		 * Use the logic in NewReno ack_received() for slow start and
211112158Sdas		 * for the first HTCP_DELTA_L ticks after either the flow starts
212112158Sdas		 * or a congestion event (when alpha equals 1).
213112158Sdas		 */
214112158Sdas		if (htcp_data->alpha == 1 ||
215112158Sdas		    CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh))
216112158Sdas			newreno_cc_algo.ack_received(ccv, type);
217112158Sdas		else {
218112158Sdas			if (V_tcp_do_rfc3465) {
219112158Sdas				/* Increment cwnd by alpha segments. */
220112158Sdas				CCV(ccv, snd_cwnd) += htcp_data->alpha *
221112158Sdas				    CCV(ccv, t_maxseg);
222112158Sdas				ccv->flags &= ~CCF_ABC_SENTAWND;
223112158Sdas			} else
224112158Sdas				/*
225112158Sdas				 * Increment cwnd by alpha/cwnd segments to
226112158Sdas				 * approximate an increase of alpha segments
227112158Sdas				 * per RTT.
228112158Sdas				 */
229112158Sdas				CCV(ccv, snd_cwnd) += (((htcp_data->alpha <<
230112158Sdas				    HTCP_SHIFT) / (CCV(ccv, snd_cwnd) /
231112158Sdas				    CCV(ccv, t_maxseg))) * CCV(ccv, t_maxseg))
232112158Sdas				    >> HTCP_SHIFT;
233112158Sdas		}
234112158Sdas	}
235112158Sdas}
236112158Sdas
237112158Sdasstatic void
238112158Sdashtcp_cb_destroy(struct cc_var *ccv)
239112158Sdas{
240112158Sdas
241112158Sdas	if (ccv->cc_data != NULL)
242112158Sdas		free(ccv->cc_data, M_HTCP);
243112158Sdas}
244112158Sdas
245112158Sdasstatic int
246112158Sdashtcp_cb_init(struct cc_var *ccv)
247112158Sdas{
248112158Sdas	struct htcp *htcp_data;
249112158Sdas
250112158Sdas	htcp_data = malloc(sizeof(struct htcp), M_HTCP, M_NOWAIT);
251112158Sdas
252112158Sdas	if (htcp_data == NULL)
253112158Sdas		return (ENOMEM);
254112158Sdas
255112158Sdas	/* Init some key variables with sensible defaults. */
256112158Sdas	htcp_data->alpha = HTCP_INIT_ALPHA;
257112158Sdas	htcp_data->beta = HTCP_MINBETA;
258112158Sdas	htcp_data->maxrtt = TCPTV_SRTTBASE;
259112158Sdas	htcp_data->minrtt = TCPTV_SRTTBASE;
260112158Sdas	htcp_data->prev_cwnd = 0;
261112158Sdas	htcp_data->t_last_cong = ticks;
262112158Sdas
263112158Sdas	ccv->cc_data = htcp_data;
264112158Sdas
265112158Sdas	return (0);
266112158Sdas}
267112158Sdas
268112158Sdas/*
269112158Sdas * Perform any necessary tasks before we enter congestion recovery.
270112158Sdas */
271112158Sdasstatic void
272112158Sdashtcp_cong_signal(struct cc_var *ccv, uint32_t type)
273112158Sdas{
274112158Sdas	struct htcp *htcp_data;
275112158Sdas
276112158Sdas	htcp_data = ccv->cc_data;
277112158Sdas
278112158Sdas	switch (type) {
279112158Sdas	case CC_NDUPACK:
280112158Sdas		if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) {
281112158Sdas			if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
282112158Sdas				/*
283112158Sdas				 * Apply hysteresis to maxrtt to ensure
284112158Sdas				 * reductions in the RTT are reflected in our
285112158Sdas				 * measurements.
286112158Sdas				 */
287112158Sdas				htcp_data->maxrtt = (htcp_data->minrtt +
288112158Sdas				    (htcp_data->maxrtt - htcp_data->minrtt) *
289112158Sdas				    95) / 100;
290112158Sdas				htcp_ssthresh_update(ccv);
291112158Sdas				htcp_data->t_last_cong = ticks;
292112158Sdas				htcp_data->prev_cwnd = CCV(ccv, snd_cwnd);
293112158Sdas			}
294112158Sdas			ENTER_RECOVERY(CCV(ccv, t_flags));
295112158Sdas		}
296112158Sdas		break;
297112158Sdas
298112158Sdas	case CC_ECN:
299112158Sdas		if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) {
300112158Sdas			/*
301112158Sdas			 * Apply hysteresis to maxrtt to ensure reductions in
302112158Sdas			 * the RTT are reflected in our measurements.
303112158Sdas			 */
304112158Sdas			htcp_data->maxrtt = (htcp_data->minrtt + (htcp_data->maxrtt -
305112158Sdas			    htcp_data->minrtt) * 95) / 100;
306112158Sdas			htcp_ssthresh_update(ccv);
307112158Sdas			CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
308112158Sdas			htcp_data->t_last_cong = ticks;
309112158Sdas			htcp_data->prev_cwnd = CCV(ccv, snd_cwnd);
310112158Sdas			ENTER_CONGRECOVERY(CCV(ccv, t_flags));
311112158Sdas		}
312112158Sdas		break;
313112158Sdas
314112158Sdas	case CC_RTO:
315112158Sdas		/*
316112158Sdas		 * Grab the current time and record it so we know when the
317112158Sdas		 * most recent congestion event was. Only record it when the
318112158Sdas		 * timeout has fired more than once, as there is a reasonable
319112158Sdas		 * chance the first one is a false alarm and may not indicate
320112158Sdas		 * congestion.
321112158Sdas		 */
322112158Sdas		if (CCV(ccv, t_rxtshift) >= 2)
323112158Sdas			htcp_data->t_last_cong = ticks;
324112158Sdas		break;
325112158Sdas	}
326112158Sdas}
327112158Sdas
328112158Sdasstatic int
329112158Sdashtcp_mod_init(void)
330112158Sdas{
331112158Sdas
332112158Sdas	htcp_cc_algo.after_idle = newreno_cc_algo.after_idle;
333112158Sdas
334112158Sdas	/*
335112158Sdas	 * HTCP_RTT_REF is defined in ms, and t_srtt in the tcpcb is stored in
336112158Sdas	 * units of TCP_RTT_SCALE*hz. Scale HTCP_RTT_REF to be in the same units
337112158Sdas	 * as t_srtt.
338112158Sdas	 */
339112158Sdas	htcp_rtt_ref = (HTCP_RTT_REF * TCP_RTT_SCALE * hz) / 1000;
340112158Sdas
341112158Sdas	return (0);
342112158Sdas}
343112158Sdas
344112158Sdas/*
345112158Sdas * Perform any necessary tasks before we exit congestion recovery.
346112158Sdas */
347112158Sdasstatic void
348112158Sdashtcp_post_recovery(struct cc_var *ccv)
349112158Sdas{
350112158Sdas	struct htcp *htcp_data;
351112158Sdas
352112158Sdas	htcp_data = ccv->cc_data;
353112158Sdas
354112158Sdas	if (IN_FASTRECOVERY(CCV(ccv, t_flags))) {
355112158Sdas		/*
356112158Sdas		 * If inflight data is less than ssthresh, set cwnd
357112158Sdas		 * conservatively to avoid a burst of data, as suggested in the
358112158Sdas		 * NewReno RFC. Otherwise, use the HTCP method.
359112158Sdas		 *
360112158Sdas		 * XXXLAS: Find a way to do this without needing curack
361112158Sdas		 */
362112158Sdas		if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh),
363112158Sdas		    CCV(ccv, snd_max)))
364112158Sdas			CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) - ccv->curack +
365112158Sdas			    CCV(ccv, t_maxseg);
366112158Sdas		else
367112158Sdas			CCV(ccv, snd_cwnd) = max(1, ((htcp_data->beta *
368112158Sdas			    htcp_data->prev_cwnd / CCV(ccv, t_maxseg))
369112158Sdas			    >> HTCP_SHIFT)) * CCV(ccv, t_maxseg);
370112158Sdas	}
371112158Sdas}
372112158Sdas
373112158Sdasstatic void
374112158Sdashtcp_recalc_alpha(struct cc_var *ccv)
375112158Sdas{
376112158Sdas	struct htcp *htcp_data;
377112158Sdas	int alpha, diff, now;
378112158Sdas
379112158Sdas	htcp_data = ccv->cc_data;
380112158Sdas	now = ticks;
381112158Sdas
382112158Sdas	/*
383112158Sdas	 * If ticks has wrapped around (will happen approximately once every 49
384112158Sdas	 * days on a machine with the default kern.hz=1000) and a flow straddles
385112158Sdas	 * the wrap point, our alpha calcs will be completely wrong. We cut our
386112158Sdas	 * losses and restart alpha from scratch by setting t_last_cong = now -
387112158Sdas	 * HTCP_DELTA_L.
388112158Sdas	 *
389112158Sdas	 * This does not deflate our cwnd at all. It simply slows the rate cwnd
390112158Sdas	 * is growing by until alpha regains the value it held prior to taking
391112158Sdas	 * this drastic measure.
392112158Sdas	 */
393112158Sdas	if (now < htcp_data->t_last_cong)
394112158Sdas		htcp_data->t_last_cong = now - HTCP_DELTA_L;
395112158Sdas
396112158Sdas	diff = now - htcp_data->t_last_cong - HTCP_DELTA_L;
397112158Sdas
398112158Sdas	/* Cap alpha if the value of diff would overflow HTCP_CALC_ALPHA(). */
399112158Sdas	if (diff < htcp_max_diff) {
400112158Sdas		/*
401112158Sdas		 * If it has been more than HTCP_DELTA_L ticks since congestion,
402112158Sdas		 * increase alpha according to the function defined in the spec.
403112158Sdas		 */
404112158Sdas		if (diff > 0) {
405112158Sdas			alpha = HTCP_CALC_ALPHA(diff);
406112158Sdas
407112158Sdas			/*
408112158Sdas			 * Adaptive backoff fairness adjustment:
409112158Sdas			 * 2 * (1 - beta) * alpha_raw
410112158Sdas			 */
411112158Sdas			if (V_htcp_adaptive_backoff)
412112158Sdas				alpha = max(1, (2 * ((1 << HTCP_SHIFT) -
413112158Sdas				    htcp_data->beta) * alpha) >> HTCP_SHIFT);
414112158Sdas
415112158Sdas			/*
416112158Sdas			 * RTT scaling: (RTT / RTT_ref) * alpha
417112158Sdas			 * alpha will be the raw value from HTCP_CALC_ALPHA() if
418112158Sdas			 * adaptive backoff is off, or the adjusted value if
419112158Sdas			 * adaptive backoff is on.
420112158Sdas			 */
421112158Sdas			if (V_htcp_rtt_scaling)
422112158Sdas				alpha = max(1, (min(max(HTCP_MINROWE,
423112158Sdas				    (CCV(ccv, t_srtt) << HTCP_SHIFT) /
424112158Sdas				    htcp_rtt_ref), HTCP_MAXROWE) * alpha)
425112158Sdas				    >> HTCP_SHIFT);
426112158Sdas
427112158Sdas		} else
428112158Sdas			alpha = 1;
429112158Sdas
430112158Sdas		htcp_data->alpha = alpha;
431112158Sdas	}
432112158Sdas}
433112158Sdas
434112158Sdasstatic void
435112158Sdashtcp_recalc_beta(struct cc_var *ccv)
436112158Sdas{
437112158Sdas	struct htcp *htcp_data;
438112158Sdas
439112158Sdas	htcp_data = ccv->cc_data;
440112158Sdas
441112158Sdas	/*
442112158Sdas	 * TCPTV_SRTTBASE is the initialised value of each connection's SRTT, so
443112158Sdas	 * we only calc beta if the connection's SRTT has been changed from its
444112158Sdas	 * inital value. beta is bounded to ensure it is always between
445112158Sdas	 * HTCP_MINBETA and HTCP_MAXBETA.
446112158Sdas	 */
447112158Sdas	if (V_htcp_adaptive_backoff && htcp_data->minrtt != TCPTV_SRTTBASE &&
448112158Sdas	    htcp_data->maxrtt != TCPTV_SRTTBASE)
449112158Sdas		htcp_data->beta = min(max(HTCP_MINBETA,
450112158Sdas		    (htcp_data->minrtt << HTCP_SHIFT) / htcp_data->maxrtt),
451112158Sdas		    HTCP_MAXBETA);
452112158Sdas	else
453112158Sdas		htcp_data->beta = HTCP_MINBETA;
454112158Sdas}
455112158Sdas
456112158Sdas/*
457112158Sdas * Record the minimum and maximum RTT seen for the connection. These are used in
458112158Sdas * the calculation of beta if adaptive backoff is enabled.
459112158Sdas */
460112158Sdasstatic void
461112158Sdashtcp_record_rtt(struct cc_var *ccv)
462112158Sdas{
463112158Sdas	struct htcp *htcp_data;
464112158Sdas
465112158Sdas	htcp_data = ccv->cc_data;
466112158Sdas
467112158Sdas	/* XXXLAS: Should there be some hysteresis for minrtt? */
468112158Sdas
469112158Sdas	/*
470112158Sdas	 * Record the current SRTT as our minrtt if it's the smallest we've seen
471112158Sdas	 * or minrtt is currently equal to its initialised value. Ignore SRTT
472112158Sdas	 * until a min number of samples have been taken.
473112158Sdas	 */
474112158Sdas	if ((CCV(ccv, t_srtt) < htcp_data->minrtt ||
475112158Sdas	    htcp_data->minrtt == TCPTV_SRTTBASE) &&
476112158Sdas	    (CCV(ccv, t_rttupdated) >= HTCP_MIN_RTT_SAMPLES))
477112158Sdas		htcp_data->minrtt = CCV(ccv, t_srtt);
478112158Sdas
479112158Sdas	/*
480112158Sdas	 * Record the current SRTT as our maxrtt if it's the largest we've
481112158Sdas	 * seen. Ignore SRTT until a min number of samples have been taken.
482112158Sdas	 */
483112158Sdas	if (CCV(ccv, t_srtt) > htcp_data->maxrtt
484112158Sdas	    && CCV(ccv, t_rttupdated) >= HTCP_MIN_RTT_SAMPLES)
485112158Sdas		htcp_data->maxrtt = CCV(ccv, t_srtt);
486112158Sdas}
487112158Sdas
488112158Sdas/*
489112158Sdas * Update the ssthresh in the event of congestion.
490112158Sdas */
491112158Sdasstatic void
492112158Sdashtcp_ssthresh_update(struct cc_var *ccv)
493112158Sdas{
494112158Sdas	struct htcp *htcp_data;
495112158Sdas
496112158Sdas	htcp_data = ccv->cc_data;
497112158Sdas
498112158Sdas	/*
499112158Sdas	 * On the first congestion event, set ssthresh to cwnd * 0.5, on
500112158Sdas	 * subsequent congestion events, set it to cwnd * beta.
501112158Sdas	 */
502112158Sdas	if (CCV(ccv, snd_ssthresh) == TCP_MAXWIN << TCP_MAX_WINSHIFT)
503112158Sdas		CCV(ccv, snd_ssthresh) = (CCV(ccv, snd_cwnd) * HTCP_MINBETA)
504112158Sdas		    >> HTCP_SHIFT;
505112158Sdas	else {
506112158Sdas		htcp_recalc_beta(ccv);
507112158Sdas		CCV(ccv, snd_ssthresh) = (CCV(ccv, snd_cwnd) * htcp_data->beta)
508112158Sdas		    >> HTCP_SHIFT;
509112158Sdas	}
510112158Sdas}
511112158Sdas
512112158Sdas
513112158SdasSYSCTL_DECL(_net_inet_tcp_cc_htcp);
514112158SdasSYSCTL_NODE(_net_inet_tcp_cc, OID_AUTO, htcp, CTLFLAG_RW,
515112158Sdas    NULL, "H-TCP related settings");
516112158SdasSYSCTL_VNET_UINT(_net_inet_tcp_cc_htcp, OID_AUTO, adaptive_backoff, CTLFLAG_RW,
517112158Sdas    &VNET_NAME(htcp_adaptive_backoff), 0, "enable H-TCP adaptive backoff");
518112158SdasSYSCTL_VNET_UINT(_net_inet_tcp_cc_htcp, OID_AUTO, rtt_scaling, CTLFLAG_RW,
519112158Sdas    &VNET_NAME(htcp_rtt_scaling), 0, "enable H-TCP RTT scaling");
520112158Sdas
521112158SdasDECLARE_CC_MODULE(htcp, &htcp_cc_algo);
522112158Sdas