sctp_cc_functions.c revision 219397
1/*-
2 * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
3 * Copyright (c) 2008-2011, by Randall Stewart. All rights reserved.
4 * Copyright (c) 2008-2011, by Michael Tuexen. All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions are met:
8 *
9 * a) Redistributions of source code must retain the above copyright notice,
10 *   this list of conditions and the following disclaimer.
11 *
12 * b) Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in
14 *   the documentation and/or other materials provided with the distribution.
15 *
16 * c) Neither the name of Cisco Systems, Inc. nor the names of its
17 *    contributors may be used to endorse or promote products derived
18 *    from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
22 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <netinet/sctp_os.h>
34#include <netinet/sctp_var.h>
35#include <netinet/sctp_sysctl.h>
36#include <netinet/sctp_pcb.h>
37#include <netinet/sctp_header.h>
38#include <netinet/sctputil.h>
39#include <netinet/sctp_output.h>
40#include <netinet/sctp_input.h>
41#include <netinet/sctp_indata.h>
42#include <netinet/sctp_uio.h>
43#include <netinet/sctp_timer.h>
44#include <netinet/sctp_auth.h>
45#include <netinet/sctp_asconf.h>
46#include <netinet/sctp_dtrace_declare.h>
47#include <sys/cdefs.h>
48__FBSDID("$FreeBSD: head/sys/netinet/sctp_cc_functions.c 219397 2011-03-08 11:58:25Z rrs $");
49
50static void
51sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
52{
53	struct sctp_association *assoc;
54	uint32_t cwnd_in_mtu;
55
56	assoc = &stcb->asoc;
57	cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
58	if (cwnd_in_mtu == 0) {
59		/* Using 0 means that the value of RFC 4960 is used. */
60		net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
61	} else {
62		/*
63		 * We take the minimum of the burst limit and the initial
64		 * congestion window.
65		 */
66		if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst))
67			cwnd_in_mtu = assoc->max_burst;
68		net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
69	}
70	if (stcb->asoc.sctp_cmt_on_off == 2) {
71		/* In case of resource pooling initialize appropriately */
72		net->cwnd /= assoc->numnets;
73		if (net->cwnd < (net->mtu - sizeof(struct sctphdr))) {
74			net->cwnd = net->mtu - sizeof(struct sctphdr);
75		}
76	}
77	net->ssthresh = assoc->peers_rwnd;
78
79	SDT_PROBE(sctp, cwnd, net, init,
80	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
81	    0, net->cwnd);
82	if (SCTP_BASE_SYSCTL(sctp_logging_level) &
83	    (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
84		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
85	}
86}
87
88static void
89sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
90    struct sctp_association *asoc)
91{
92	struct sctp_nets *net;
93	uint32_t t_ssthresh, t_cwnd;
94
95	/* MT FIXME: Don't compute this over and over again */
96	t_ssthresh = 0;
97	t_cwnd = 0;
98	if (asoc->sctp_cmt_on_off == 2) {
99		TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
100			t_ssthresh += net->ssthresh;
101			t_cwnd += net->cwnd;
102		}
103	}
104	/*-
105	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
106	 * (net->fast_retran_loss_recovery == 0)))
107	 */
108	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
109		if ((asoc->fast_retran_loss_recovery == 0) ||
110		    (asoc->sctp_cmt_on_off > 0)) {
111			/* out of a RFC2582 Fast recovery window? */
112			if (net->net_ack > 0) {
113				/*
114				 * per section 7.2.3, are there any
115				 * destinations that had a fast retransmit
116				 * to them. If so what we need to do is
117				 * adjust ssthresh and cwnd.
118				 */
119				struct sctp_tmit_chunk *lchk;
120				int old_cwnd = net->cwnd;
121
122				if (asoc->sctp_cmt_on_off == 2) {
123					net->ssthresh = (uint32_t) (((uint64_t) 4 *
124					    (uint64_t) net->mtu *
125					    (uint64_t) net->ssthresh) /
126					    (uint64_t) t_ssthresh);
127					if ((net->cwnd > t_cwnd / 2) &&
128					    (net->ssthresh < net->cwnd - t_cwnd / 2)) {
129						net->ssthresh = net->cwnd - t_cwnd / 2;
130					}
131					if (net->ssthresh < net->mtu) {
132						net->ssthresh = net->mtu;
133					}
134				} else {
135					net->ssthresh = net->cwnd / 2;
136					if (net->ssthresh < (net->mtu * 2)) {
137						net->ssthresh = 2 * net->mtu;
138					}
139				}
140				net->cwnd = net->ssthresh;
141				SDT_PROBE(sctp, cwnd, net, fr,
142				    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
143				    old_cwnd, net->cwnd);
144				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
145					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
146					    SCTP_CWND_LOG_FROM_FR);
147				}
148				lchk = TAILQ_FIRST(&asoc->send_queue);
149
150				net->partial_bytes_acked = 0;
151				/* Turn on fast recovery window */
152				asoc->fast_retran_loss_recovery = 1;
153				if (lchk == NULL) {
154					/* Mark end of the window */
155					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
156				} else {
157					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
158				}
159
160				/*
161				 * CMT fast recovery -- per destination
162				 * recovery variable.
163				 */
164				net->fast_retran_loss_recovery = 1;
165
166				if (lchk == NULL) {
167					/* Mark end of the window */
168					net->fast_recovery_tsn = asoc->sending_seq - 1;
169				} else {
170					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
171				}
172
173				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
174				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
175				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
176				    stcb->sctp_ep, stcb, net);
177			}
178		} else if (net->net_ack > 0) {
179			/*
180			 * Mark a peg that we WOULD have done a cwnd
181			 * reduction but RFC2582 prevented this action.
182			 */
183			SCTP_STAT_INCR(sctps_fastretransinrtt);
184		}
185	}
186}
187
188/* Defines for instantaneous bw decisions */
189#define SCTP_INST_LOOSING 1	/* Loosing to other flows */
190#define SCTP_INST_NEUTRAL 2	/* Neutral, no indication */
191#define SCTP_INST_GAINING 3	/* Gaining, step down possible */
192
193
194static int
195cc_bw_same(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw,
196    uint64_t rtt_offset, uint64_t vtag, uint8_t inst_ind)
197{
198	uint64_t oth, probepoint;
199
200	probepoint = (((uint64_t) net->cwnd) << 32);
201	if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) {
202		/*
203		 * rtt increased we don't update bw.. so we don't update the
204		 * rtt either.
205		 */
206		/* Probe point 5 */
207		probepoint |= ((5 << 16) | 1);
208		SDT_PROBE(sctp, cwnd, net, rttvar,
209		    vtag,
210		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
211		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
212		    net->flight_size,
213		    probepoint);
214		if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) {
215			if (net->cc_mod.rtcc.last_step_state == 5)
216				net->cc_mod.rtcc.step_cnt++;
217			else
218				net->cc_mod.rtcc.step_cnt = 1;
219			net->cc_mod.rtcc.last_step_state = 5;
220			if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) ||
221			    ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) &&
222			    ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) {
223				/* Try a step down */
224				oth = net->cc_mod.rtcc.vol_reduce;
225				oth <<= 16;
226				oth |= net->cc_mod.rtcc.step_cnt;
227				oth <<= 16;
228				oth |= net->cc_mod.rtcc.last_step_state;
229				SDT_PROBE(sctp, cwnd, net, rttstep,
230				    vtag,
231				    ((net->cc_mod.rtcc.lbw << 32) | nbw),
232				    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
233				    oth,
234				    probepoint);
235				if (net->cwnd > (4 * net->mtu)) {
236					net->cwnd -= net->mtu;
237					net->cc_mod.rtcc.vol_reduce++;
238				} else {
239					net->cc_mod.rtcc.step_cnt = 0;
240				}
241			}
242		}
243		return (1);
244	}
245	if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) {
246		/*
247		 * rtt decreased, there could be more room. we update both
248		 * the bw and the rtt here to lock this in as a good step
249		 * down.
250		 */
251		/* Probe point 6 */
252		probepoint |= ((6 << 16) | 0);
253		SDT_PROBE(sctp, cwnd, net, rttvar,
254		    vtag,
255		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
256		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
257		    net->flight_size,
258		    probepoint);
259		if (net->cc_mod.rtcc.steady_step) {
260			oth = net->cc_mod.rtcc.vol_reduce;
261			oth <<= 16;
262			oth |= net->cc_mod.rtcc.step_cnt;
263			oth <<= 16;
264			oth |= net->cc_mod.rtcc.last_step_state;
265			SDT_PROBE(sctp, cwnd, net, rttstep,
266			    vtag,
267			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
268			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
269			    oth,
270			    probepoint);
271			if ((net->cc_mod.rtcc.last_step_state == 5) &&
272			    (net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step)) {
273				/* Step down worked */
274				net->cc_mod.rtcc.step_cnt = 0;
275				return (1);
276			} else {
277				net->cc_mod.rtcc.last_step_state = 6;
278				net->cc_mod.rtcc.step_cnt = 0;
279			}
280		}
281		net->cc_mod.rtcc.lbw = nbw;
282		net->cc_mod.rtcc.lbw_rtt = net->rtt;
283		net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
284		if (inst_ind == SCTP_INST_GAINING)
285			return (1);
286		else if (inst_ind == SCTP_INST_NEUTRAL)
287			return (1);
288		else
289			return (0);
290	}
291	/*
292	 * Ok bw and rtt remained the same .. no update to any
293	 */
294	/* Probe point 7 */
295	probepoint |= ((7 << 16) | net->cc_mod.rtcc.ret_from_eq);
296	SDT_PROBE(sctp, cwnd, net, rttvar,
297	    vtag,
298	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
299	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
300	    net->flight_size,
301	    probepoint);
302
303	if ((net->cc_mod.rtcc.steady_step) && (inst_ind != SCTP_INST_LOOSING)) {
304		if (net->cc_mod.rtcc.last_step_state == 5)
305			net->cc_mod.rtcc.step_cnt++;
306		else
307			net->cc_mod.rtcc.step_cnt = 1;
308		net->cc_mod.rtcc.last_step_state = 5;
309		if ((net->cc_mod.rtcc.step_cnt == net->cc_mod.rtcc.steady_step) ||
310		    ((net->cc_mod.rtcc.step_cnt > net->cc_mod.rtcc.steady_step) &&
311		    ((net->cc_mod.rtcc.step_cnt % net->cc_mod.rtcc.steady_step) == 0))) {
312			/* Try a step down */
313			if (net->cwnd > (4 * net->mtu)) {
314				net->cwnd -= net->mtu;
315				net->cc_mod.rtcc.vol_reduce++;
316				return (1);
317			} else {
318				net->cc_mod.rtcc.step_cnt = 0;
319			}
320		}
321	}
322	if (inst_ind == SCTP_INST_GAINING)
323		return (1);
324	else if (inst_ind == SCTP_INST_NEUTRAL)
325		return (1);
326	else
327		return ((int)net->cc_mod.rtcc.ret_from_eq);
328}
329
330static int
331cc_bw_decrease(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw, uint64_t rtt_offset,
332    uint64_t vtag, uint8_t inst_ind)
333{
334	uint64_t oth, probepoint;
335
336	/* Bandwidth decreased. */
337	probepoint = (((uint64_t) net->cwnd) << 32);
338	if (net->rtt > net->cc_mod.rtcc.lbw_rtt + rtt_offset) {
339		/* rtt increased */
340		/* Did we add more */
341		if ((net->cwnd > net->cc_mod.rtcc.cwnd_at_bw_set) &&
342		    (inst_ind != SCTP_INST_LOOSING)) {
343			/* We caused it maybe.. back off? */
344			/* PROBE POINT 1 */
345			probepoint |= ((1 << 16) | 1);
346			SDT_PROBE(sctp, cwnd, net, rttvar,
347			    vtag,
348			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
349			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
350			    net->flight_size,
351			    probepoint);
352
353			if (net->cc_mod.rtcc.ret_from_eq) {
354				/*
355				 * Switch over to CA if we are less
356				 * aggressive
357				 */
358				net->ssthresh = net->cwnd - 1;
359				net->partial_bytes_acked = 0;
360			}
361			return (1);
362		}
363		/* Probe point 2 */
364		probepoint |= ((2 << 16) | 0);
365		SDT_PROBE(sctp, cwnd, net, rttvar,
366		    vtag,
367		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
368		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
369		    net->flight_size,
370		    probepoint);
371
372		/* Someone else - fight for more? */
373		if (net->cc_mod.rtcc.steady_step) {
374			oth = net->cc_mod.rtcc.vol_reduce;
375			oth <<= 16;
376			oth |= net->cc_mod.rtcc.step_cnt;
377			oth <<= 16;
378			oth |= net->cc_mod.rtcc.last_step_state;
379			SDT_PROBE(sctp, cwnd, net, rttstep,
380			    vtag,
381			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
382			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
383			    oth,
384			    probepoint);
385			/*
386			 * Did we voluntarily give up some? if so take one
387			 * back please
388			 */
389			if ((net->cc_mod.rtcc.vol_reduce) &&
390			    (inst_ind != SCTP_INST_GAINING)) {
391				net->cwnd += net->mtu;
392				net->cc_mod.rtcc.vol_reduce--;
393			}
394			net->cc_mod.rtcc.last_step_state = 2;
395			net->cc_mod.rtcc.step_cnt = 0;
396		}
397		goto out_decision;
398	} else if (net->rtt < net->cc_mod.rtcc.lbw_rtt - rtt_offset) {
399		/* bw & rtt decreased */
400		/* Probe point 3 */
401		probepoint |= ((3 << 16) | 0);
402		SDT_PROBE(sctp, cwnd, net, rttvar,
403		    vtag,
404		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
405		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
406		    net->flight_size,
407		    probepoint);
408		if (net->cc_mod.rtcc.steady_step) {
409			oth = net->cc_mod.rtcc.vol_reduce;
410			oth <<= 16;
411			oth |= net->cc_mod.rtcc.step_cnt;
412			oth <<= 16;
413			oth |= net->cc_mod.rtcc.last_step_state;
414			SDT_PROBE(sctp, cwnd, net, rttstep,
415			    vtag,
416			    ((net->cc_mod.rtcc.lbw << 32) | nbw),
417			    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
418			    oth,
419			    probepoint);
420			if ((net->cc_mod.rtcc.vol_reduce) &&
421			    (inst_ind != SCTP_INST_GAINING)) {
422				net->cwnd += net->mtu;
423				net->cc_mod.rtcc.vol_reduce--;
424			}
425			net->cc_mod.rtcc.last_step_state = 3;
426			net->cc_mod.rtcc.step_cnt = 0;
427		}
428		goto out_decision;
429	}
430	/* The bw decreased but rtt stayed the same */
431	/* Probe point 4 */
432	probepoint |= ((4 << 16) | 0);
433	SDT_PROBE(sctp, cwnd, net, rttvar,
434	    vtag,
435	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
436	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
437	    net->flight_size,
438	    probepoint);
439	if (net->cc_mod.rtcc.steady_step) {
440		oth = net->cc_mod.rtcc.vol_reduce;
441		oth <<= 16;
442		oth |= net->cc_mod.rtcc.step_cnt;
443		oth <<= 16;
444		oth |= net->cc_mod.rtcc.last_step_state;
445		SDT_PROBE(sctp, cwnd, net, rttstep,
446		    vtag,
447		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
448		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
449		    oth,
450		    probepoint);
451		if ((net->cc_mod.rtcc.vol_reduce) &&
452		    (inst_ind != SCTP_INST_GAINING)) {
453			net->cwnd += net->mtu;
454			net->cc_mod.rtcc.vol_reduce--;
455		}
456		net->cc_mod.rtcc.last_step_state = 4;
457		net->cc_mod.rtcc.step_cnt = 0;
458	}
459out_decision:
460	net->cc_mod.rtcc.lbw = nbw;
461	net->cc_mod.rtcc.lbw_rtt = net->rtt;
462	net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
463	if (inst_ind == SCTP_INST_GAINING) {
464		return (1);
465	} else {
466		return (0);
467	}
468}
469
470static int
471cc_bw_increase(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw,
472    uint64_t vtag, uint8_t inst_ind)
473{
474	uint64_t oth, probepoint;
475
476	/*
477	 * BW increased, so update and return 0, since all actions in our
478	 * table say to do the normal CC update. Note that we pay no
479	 * attention to the inst_ind since our overall sum is increasing.
480	 */
481	/* PROBE POINT 0 */
482	probepoint = (((uint64_t) net->cwnd) << 32);
483	SDT_PROBE(sctp, cwnd, net, rttvar,
484	    vtag,
485	    ((net->cc_mod.rtcc.lbw << 32) | nbw),
486	    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
487	    net->flight_size,
488	    probepoint);
489	if (net->cc_mod.rtcc.steady_step) {
490		oth = net->cc_mod.rtcc.vol_reduce;
491		oth <<= 16;
492		oth |= net->cc_mod.rtcc.step_cnt;
493		oth <<= 16;
494		oth |= net->cc_mod.rtcc.last_step_state;
495		SDT_PROBE(sctp, cwnd, net, rttstep,
496		    vtag,
497		    ((net->cc_mod.rtcc.lbw << 32) | nbw),
498		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
499		    oth,
500		    probepoint);
501		net->cc_mod.rtcc.last_step_state = 0;
502		net->cc_mod.rtcc.step_cnt = 0;
503		net->cc_mod.rtcc.vol_reduce = 0;
504	}
505	net->cc_mod.rtcc.lbw = nbw;
506	net->cc_mod.rtcc.lbw_rtt = net->rtt;
507	net->cc_mod.rtcc.cwnd_at_bw_set = net->cwnd;
508	return (0);
509}
510
511/* RTCC Algoritm to limit growth of cwnd, return
512 * true if you want to NOT allow cwnd growth
513 */
514static int
515cc_bw_limit(struct sctp_tcb *stcb, struct sctp_nets *net, uint64_t nbw)
516{
517	uint64_t bw_offset, rtt_offset, rtt, vtag, probepoint;
518	uint64_t bytes_for_this_rtt, inst_bw;
519	uint64_t div, inst_off;
520	int bw_shift;
521	uint8_t inst_ind;
522	int ret;
523
524	/*-
525	 * Here we need to see if we want
526	 * to limit cwnd growth due to increase
527	 * in overall rtt but no increase in bw.
528	 * We use the following table to figure
529	 * out what we should do. When we return
530	 * 0, cc update goes on as planned. If we
531	 * return 1, then no cc update happens and cwnd
532	 * stays where it is at.
533	 * ----------------------------------
534	 *   BW    |    RTT   | Action
535	 * *********************************
536	 *   INC   |    INC   | return 0
537	 * ----------------------------------
538	 *   INC   |    SAME  | return 0
539	 * ----------------------------------
540	 *   INC   |    DECR  | return 0
541	 * ----------------------------------
542	 *   SAME  |    INC   | return 1
543	 * ----------------------------------
544	 *   SAME  |    SAME  | return 1
545	 * ----------------------------------
546	 *   SAME  |    DECR  | return 0
547	 * ----------------------------------
548	 *   DECR  |    INC   | return 0 or 1 based on if we caused.
549	 * ----------------------------------
550	 *   DECR  |    SAME  | return 0
551	 * ----------------------------------
552	 *   DECR  |    DECR  | return 0
553	 * ----------------------------------
554	 *
555	 * We are a bit fuzz on what an increase or
556	 * decrease is. For BW it is the same if
557	 * it did not change within 1/64th. For
558	 * RTT it stayed the same if it did not
559	 * change within 1/32nd
560	 */
561	bw_shift = SCTP_BASE_SYSCTL(sctp_rttvar_bw);
562	rtt = stcb->asoc.my_vtag;
563	vtag = (rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) | (stcb->rport);
564	probepoint = (((uint64_t) net->cwnd) << 32);
565	rtt = net->rtt;
566	if (net->cc_mod.rtcc.rtt_set_this_sack) {
567		net->cc_mod.rtcc.rtt_set_this_sack = 0;
568		bytes_for_this_rtt = net->cc_mod.rtcc.bw_bytes - net->cc_mod.rtcc.bw_bytes_at_last_rttc;
569		net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes;
570		if (net->rtt) {
571			div = net->rtt / 1000;
572			if (div) {
573				inst_bw = bytes_for_this_rtt / div;
574				inst_off = inst_bw >> bw_shift;
575				if (inst_bw > nbw)
576					inst_ind = SCTP_INST_GAINING;
577				else if ((inst_bw + inst_off) < nbw)
578					inst_ind = SCTP_INST_LOOSING;
579				else
580					inst_ind = SCTP_INST_NEUTRAL;
581				probepoint |= ((0xb << 16) | inst_ind);
582			} else {
583				inst_bw = bytes_for_this_rtt / (uint64_t) (net->rtt);
584				/* Can't determine do not change */
585				inst_ind = net->cc_mod.rtcc.last_inst_ind;
586				probepoint |= ((0xc << 16) | inst_ind);
587			}
588		} else {
589			inst_bw = bytes_for_this_rtt;
590			/* Can't determine do not change */
591			inst_ind = net->cc_mod.rtcc.last_inst_ind;
592			probepoint |= ((0xd << 16) | inst_ind);
593		}
594		SDT_PROBE(sctp, cwnd, net, rttvar,
595		    vtag,
596		    ((nbw << 32) | inst_bw),
597		    ((net->cc_mod.rtcc.lbw_rtt << 32) | rtt),
598		    net->flight_size,
599		    probepoint);
600	} else {
601		/* No rtt measurement, use last one */
602		inst_ind = net->cc_mod.rtcc.last_inst_ind;
603	}
604	bw_offset = net->cc_mod.rtcc.lbw >> bw_shift;
605	if (nbw > net->cc_mod.rtcc.lbw + bw_offset) {
606		ret = cc_bw_increase(stcb, net, nbw, vtag, inst_ind);
607		goto out;
608	}
609	rtt_offset = net->cc_mod.rtcc.lbw_rtt >> SCTP_BASE_SYSCTL(sctp_rttvar_rtt);
610	if (nbw < net->cc_mod.rtcc.lbw - bw_offset) {
611		ret = cc_bw_decrease(stcb, net, nbw, rtt_offset, vtag, inst_ind);
612		goto out;
613	}
614	/*
615	 * If we reach here then we are in a situation where the bw stayed
616	 * the same.
617	 */
618	ret = cc_bw_same(stcb, net, nbw, rtt_offset, vtag, inst_ind);
619out:
620	net->cc_mod.rtcc.last_inst_ind = inst_ind;
621	return (ret);
622}
623
624static void
625sctp_cwnd_update_after_sack_common(struct sctp_tcb *stcb,
626    struct sctp_association *asoc,
627    int accum_moved, int reneged_all, int will_exit, int use_rtcc)
628{
629	struct sctp_nets *net;
630	int old_cwnd;
631	uint32_t t_ssthresh, t_cwnd, incr;
632
633	/* MT FIXME: Don't compute this over and over again */
634	t_ssthresh = 0;
635	t_cwnd = 0;
636	if (stcb->asoc.sctp_cmt_on_off == 2) {
637		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
638			t_ssthresh += net->ssthresh;
639			t_cwnd += net->cwnd;
640		}
641	}
642	/******************************/
643	/* update cwnd and Early FR   */
644	/******************************/
645	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
646
647#ifdef JANA_CMT_FAST_RECOVERY
648		/*
649		 * CMT fast recovery code. Need to debug.
650		 */
651		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
652			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
653			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
654				net->will_exit_fast_recovery = 1;
655			}
656		}
657#endif
658		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
659			/*
660			 * So, first of all do we need to have a Early FR
661			 * timer running?
662			 */
663			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
664			    (net->ref_count > 1) &&
665			    (net->flight_size < net->cwnd)) ||
666			    (reneged_all)) {
667				/*
668				 * yes, so in this case stop it if its
669				 * running, and then restart it. Reneging
670				 * all is a special case where we want to
671				 * run the Early FR timer and then force the
672				 * last few unacked to be sent, causing us
673				 * to illicit a sack with gaps to force out
674				 * the others.
675				 */
676				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
677					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
678					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
679					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
680				}
681				SCTP_STAT_INCR(sctps_earlyfrstrid);
682				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
683			} else {
684				/* No, stop it if its running */
685				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
686					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
687					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
688					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
689				}
690			}
691		}
692		/* if nothing was acked on this destination skip it */
693		if (net->net_ack == 0) {
694			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
695				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
696			}
697			continue;
698		}
699		if (net->net_ack2 > 0) {
700			/*
701			 * Karn's rule applies to clearing error count, this
702			 * is optional.
703			 */
704			net->error_count = 0;
705			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
706			    SCTP_ADDR_NOT_REACHABLE) {
707				/* addr came good */
708				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
709				net->dest_state |= SCTP_ADDR_REACHABLE;
710				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
711				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
712				/* now was it the primary? if so restore */
713				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
714					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
715				}
716			}
717			/*
718			 * JRS 5/14/07 - If CMT PF is on and the destination
719			 * is in PF state, set the destination to active
720			 * state and set the cwnd to one or two MTU's based
721			 * on whether PF1 or PF2 is being used.
722			 *
723			 * Should we stop any running T3 timer here?
724			 */
725			if ((asoc->sctp_cmt_on_off > 0) &&
726			    (asoc->sctp_cmt_pf > 0) &&
727			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
728				net->dest_state &= ~SCTP_ADDR_PF;
729				old_cwnd = net->cwnd;
730				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
731				SDT_PROBE(sctp, cwnd, net, ack,
732				    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
733				    old_cwnd, net->cwnd);
734				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
735				    net, net->cwnd);
736				/*
737				 * Since the cwnd value is explicitly set,
738				 * skip the code that updates the cwnd
739				 * value.
740				 */
741				goto skip_cwnd_update;
742			}
743		}
744#ifdef JANA_CMT_FAST_RECOVERY
745		/*
746		 * CMT fast recovery code
747		 */
748		/*
749		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
750		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
751		 * } else if (sctp_cmt_on_off == 0 &&
752		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
753		 */
754#endif
755
756		if (asoc->fast_retran_loss_recovery &&
757		    (will_exit == 0) &&
758		    (asoc->sctp_cmt_on_off == 0)) {
759			/*
760			 * If we are in loss recovery we skip any cwnd
761			 * update
762			 */
763			goto skip_cwnd_update;
764		}
765		/*
766		 * Did any measurements go on for this network?
767		 */
768		if (use_rtcc && (net->cc_mod.rtcc.tls_needs_set > 0)) {
769			uint64_t nbw;
770
771			/*
772			 * At this point our bw_bytes has been updated by
773			 * incoming sack information.
774			 *
775			 * But our bw may not yet be set.
776			 *
777			 */
778			if ((net->cc_mod.rtcc.new_tot_time / 1000) > 0) {
779				nbw = net->cc_mod.rtcc.bw_bytes / (net->cc_mod.rtcc.new_tot_time / 1000);
780			} else {
781				nbw = net->cc_mod.rtcc.bw_bytes;
782			}
783			if (net->cc_mod.rtcc.lbw) {
784				if (cc_bw_limit(stcb, net, nbw)) {
785					/* Hold here, no update */
786					goto skip_cwnd_update;
787				}
788			} else {
789				uint64_t vtag, probepoint;
790
791				probepoint = (((uint64_t) net->cwnd) << 32);
792				probepoint |= ((0xa << 16) | 0);
793				vtag = (net->rtt << 32) |
794				    (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
795				    (stcb->rport);
796
797				SDT_PROBE(sctp, cwnd, net, rttvar,
798				    vtag,
799				    nbw,
800				    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
801				    net->flight_size,
802				    probepoint);
803				net->cc_mod.rtcc.lbw = nbw;
804				net->cc_mod.rtcc.lbw_rtt = net->rtt;
805				if (net->cc_mod.rtcc.rtt_set_this_sack) {
806					net->cc_mod.rtcc.rtt_set_this_sack = 0;
807					net->cc_mod.rtcc.bw_bytes_at_last_rttc = net->cc_mod.rtcc.bw_bytes;
808				}
809			}
810		}
811		/*
812		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
813		 * moved.
814		 */
815		if (accum_moved ||
816		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
817			/* If the cumulative ack moved we can proceed */
818			if (net->cwnd <= net->ssthresh) {
819				/* We are in slow start */
820				if (net->flight_size + net->net_ack >= net->cwnd) {
821					old_cwnd = net->cwnd;
822					if (stcb->asoc.sctp_cmt_on_off == 2) {
823						uint32_t limit;
824
825						limit = (uint32_t) (((uint64_t) net->mtu *
826						    (uint64_t) SCTP_BASE_SYSCTL(sctp_L2_abc_variable) *
827						    (uint64_t) net->ssthresh) /
828						    (uint64_t) t_ssthresh);
829						incr = (uint32_t) (((uint64_t) net->net_ack *
830						    (uint64_t) net->ssthresh) /
831						    (uint64_t) t_ssthresh);
832						if (incr > limit) {
833							incr = limit;
834						}
835						if (incr == 0) {
836							incr = 1;
837						}
838					} else {
839						incr = net->net_ack;
840						if (incr > net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable)) {
841							incr = net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable);
842						}
843					}
844					net->cwnd += incr;
845					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
846						sctp_log_cwnd(stcb, net, incr,
847						    SCTP_CWND_LOG_FROM_SS);
848					}
849					SDT_PROBE(sctp, cwnd, net, ack,
850					    stcb->asoc.my_vtag,
851					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
852					    net,
853					    old_cwnd, net->cwnd);
854				} else {
855					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
856						sctp_log_cwnd(stcb, net, net->net_ack,
857						    SCTP_CWND_LOG_NOADV_SS);
858					}
859				}
860			} else {
861				/* We are in congestion avoidance */
862				/*
863				 * Add to pba
864				 */
865				net->partial_bytes_acked += net->net_ack;
866
867				if ((net->flight_size + net->net_ack >= net->cwnd) &&
868				    (net->partial_bytes_acked >= net->cwnd)) {
869					net->partial_bytes_acked -= net->cwnd;
870					old_cwnd = net->cwnd;
871					if (asoc->sctp_cmt_on_off == 2) {
872						incr = (uint32_t) (((uint64_t) net->mtu *
873						    (uint64_t) net->ssthresh) /
874						    (uint64_t) t_ssthresh);
875						if (incr == 0) {
876							incr = 1;
877						}
878					} else {
879						incr = net->mtu;
880					}
881					net->cwnd += incr;
882					SDT_PROBE(sctp, cwnd, net, ack,
883					    stcb->asoc.my_vtag,
884					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
885					    net,
886					    old_cwnd, net->cwnd);
887					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
888						sctp_log_cwnd(stcb, net, net->mtu,
889						    SCTP_CWND_LOG_FROM_CA);
890					}
891				} else {
892					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
893						sctp_log_cwnd(stcb, net, net->net_ack,
894						    SCTP_CWND_LOG_NOADV_CA);
895					}
896				}
897			}
898		} else {
899			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
900				sctp_log_cwnd(stcb, net, net->mtu,
901				    SCTP_CWND_LOG_NO_CUMACK);
902			}
903		}
904skip_cwnd_update:
905		/*
906		 * NOW, according to Karn's rule do we need to restore the
907		 * RTO timer back? Check our net_ack2. If not set then we
908		 * have a ambiguity.. i.e. all data ack'd was sent to more
909		 * than one place.
910		 */
911		if (net->net_ack2) {
912			/* restore any doubled timers */
913			net->RTO = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv;
914			if (net->RTO < stcb->asoc.minrto) {
915				net->RTO = stcb->asoc.minrto;
916			}
917			if (net->RTO > stcb->asoc.maxrto) {
918				net->RTO = stcb->asoc.maxrto;
919			}
920		}
921	}
922}
923
924static void
925sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net)
926{
927	int old_cwnd = net->cwnd;
928	uint32_t t_ssthresh, t_cwnd;
929
930	/* MT FIXME: Don't compute this over and over again */
931	t_ssthresh = 0;
932	t_cwnd = 0;
933	if (stcb->asoc.sctp_cmt_on_off == 2) {
934		struct sctp_nets *lnet;
935
936		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
937			t_ssthresh += lnet->ssthresh;
938			t_cwnd += lnet->cwnd;
939		}
940		net->ssthresh = (uint32_t) (((uint64_t) 4 *
941		    (uint64_t) net->mtu *
942		    (uint64_t) net->ssthresh) /
943		    (uint64_t) t_ssthresh);
944		if ((net->cwnd > t_cwnd / 2) &&
945		    (net->ssthresh < net->cwnd - t_cwnd / 2)) {
946			net->ssthresh = net->cwnd - t_cwnd / 2;
947		}
948		if (net->ssthresh < net->mtu) {
949			net->ssthresh = net->mtu;
950		}
951	} else {
952		net->ssthresh = max(net->cwnd / 2, 4 * net->mtu);
953	}
954	net->cwnd = net->mtu;
955	net->partial_bytes_acked = 0;
956	SDT_PROBE(sctp, cwnd, net, to,
957	    stcb->asoc.my_vtag,
958	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
959	    net,
960	    old_cwnd, net->cwnd);
961	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
962		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
963	}
964}
965
966static void
967sctp_cwnd_update_after_ecn_echo_common(struct sctp_tcb *stcb, struct sctp_nets *net,
968    int in_window, int num_pkt_lost, int use_rtcc)
969{
970	int old_cwnd = net->cwnd;
971
972	if ((use_rtcc) && (net->lan_type == SCTP_LAN_LOCAL) && (net->cc_mod.rtcc.use_dccc_ecn)) {
973		/* Data center Congestion Control */
974		if (in_window == 0) {
975			/*
976			 * Go to CA with the cwnd at the point we sent the
977			 * TSN that was marked with a CE.
978			 */
979			if (net->ecn_prev_cwnd < net->cwnd) {
980				/* Restore to prev cwnd */
981				net->cwnd = net->ecn_prev_cwnd - (net->mtu * num_pkt_lost);
982			} else {
983				/* Just cut in 1/2 */
984				net->cwnd /= 2;
985			}
986			/* Drop to CA */
987			net->ssthresh = net->cwnd - (num_pkt_lost * net->mtu);
988			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
989				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
990			}
991		} else {
992			/*
993			 * Further tuning down required over the drastic
994			 * orginal cut
995			 */
996			net->ssthresh -= (net->mtu * num_pkt_lost);
997			net->cwnd -= (net->mtu * num_pkt_lost);
998			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
999				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
1000			}
1001		}
1002		SCTP_STAT_INCR(sctps_ecnereducedcwnd);
1003	} else {
1004		if (in_window == 0) {
1005			SCTP_STAT_INCR(sctps_ecnereducedcwnd);
1006			net->ssthresh = net->cwnd / 2;
1007			if (net->ssthresh < net->mtu) {
1008				net->ssthresh = net->mtu;
1009				/*
1010				 * here back off the timer as well, to slow
1011				 * us down
1012				 */
1013				net->RTO <<= 1;
1014			}
1015			net->cwnd = net->ssthresh;
1016			SDT_PROBE(sctp, cwnd, net, ecn,
1017			    stcb->asoc.my_vtag,
1018			    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1019			    net,
1020			    old_cwnd, net->cwnd);
1021			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1022				sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
1023			}
1024		}
1025	}
1026
1027}
1028
1029static void
1030sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
1031    struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
1032    uint32_t * bottle_bw, uint32_t * on_queue)
1033{
1034	uint32_t bw_avail;
1035	int rtt;
1036	unsigned int incr;
1037	int old_cwnd = net->cwnd;
1038
1039	/* need real RTT in msd for this calc */
1040	rtt = net->rtt / 1000;
1041	/* get bottle neck bw */
1042	*bottle_bw = ntohl(cp->bottle_bw);
1043	/* and whats on queue */
1044	*on_queue = ntohl(cp->current_onq);
1045	/*
1046	 * adjust the on-queue if our flight is more it could be that the
1047	 * router has not yet gotten data "in-flight" to it
1048	 */
1049	if (*on_queue < net->flight_size)
1050		*on_queue = net->flight_size;
1051	/* calculate the available space */
1052	bw_avail = (*bottle_bw * rtt) / 1000;
1053	if (bw_avail > *bottle_bw) {
1054		/*
1055		 * Cap the growth to no more than the bottle neck. This can
1056		 * happen as RTT slides up due to queues. It also means if
1057		 * you have more than a 1 second RTT with a empty queue you
1058		 * will be limited to the bottle_bw per second no matter if
1059		 * other points have 1/2 the RTT and you could get more
1060		 * out...
1061		 */
1062		bw_avail = *bottle_bw;
1063	}
1064	if (*on_queue > bw_avail) {
1065		/*
1066		 * No room for anything else don't allow anything else to be
1067		 * "added to the fire".
1068		 */
1069		int seg_inflight, seg_onqueue, my_portion;
1070
1071		net->partial_bytes_acked = 0;
1072
1073		/* how much are we over queue size? */
1074		incr = *on_queue - bw_avail;
1075		if (stcb->asoc.seen_a_sack_this_pkt) {
1076			/*
1077			 * undo any cwnd adjustment that the sack might have
1078			 * made
1079			 */
1080			net->cwnd = net->prev_cwnd;
1081		}
1082		/* Now how much of that is mine? */
1083		seg_inflight = net->flight_size / net->mtu;
1084		seg_onqueue = *on_queue / net->mtu;
1085		my_portion = (incr * seg_inflight) / seg_onqueue;
1086
1087		/* Have I made an adjustment already */
1088		if (net->cwnd > net->flight_size) {
1089			/*
1090			 * for this flight I made an adjustment we need to
1091			 * decrease the portion by a share our previous
1092			 * adjustment.
1093			 */
1094			int diff_adj;
1095
1096			diff_adj = net->cwnd - net->flight_size;
1097			if (diff_adj > my_portion)
1098				my_portion = 0;
1099			else
1100				my_portion -= diff_adj;
1101		}
1102		/*
1103		 * back down to the previous cwnd (assume we have had a sack
1104		 * before this packet). minus what ever portion of the
1105		 * overage is my fault.
1106		 */
1107		net->cwnd -= my_portion;
1108
1109		/* we will NOT back down more than 1 MTU */
1110		if (net->cwnd <= net->mtu) {
1111			net->cwnd = net->mtu;
1112		}
1113		/* force into CA */
1114		net->ssthresh = net->cwnd - 1;
1115	} else {
1116		/*
1117		 * Take 1/4 of the space left or max burst up .. whichever
1118		 * is less.
1119		 */
1120		incr = (bw_avail - *on_queue) >> 2;
1121		if ((stcb->asoc.max_burst > 0) &&
1122		    (stcb->asoc.max_burst * net->mtu < incr)) {
1123			incr = stcb->asoc.max_burst * net->mtu;
1124		}
1125		net->cwnd += incr;
1126	}
1127	if (net->cwnd > bw_avail) {
1128		/* We can't exceed the pipe size */
1129		net->cwnd = bw_avail;
1130	}
1131	if (net->cwnd < net->mtu) {
1132		/* We always have 1 MTU */
1133		net->cwnd = net->mtu;
1134	}
1135	if (net->cwnd - old_cwnd != 0) {
1136		/* log only changes */
1137		SDT_PROBE(sctp, cwnd, net, pd,
1138		    stcb->asoc.my_vtag,
1139		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1140		    net,
1141		    old_cwnd, net->cwnd);
1142		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1143			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
1144			    SCTP_CWND_LOG_FROM_SAT);
1145		}
1146	}
1147}
1148
1149static void
1150sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
1151    struct sctp_nets *net, int burst_limit)
1152{
1153	int old_cwnd = net->cwnd;
1154
1155	if (net->ssthresh < net->cwnd)
1156		net->ssthresh = net->cwnd;
1157	if (burst_limit) {
1158		net->cwnd = (net->flight_size + (burst_limit * net->mtu));
1159		SDT_PROBE(sctp, cwnd, net, bl,
1160		    stcb->asoc.my_vtag,
1161		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1162		    net,
1163		    old_cwnd, net->cwnd);
1164		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1165			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST);
1166		}
1167	}
1168}
1169
1170static void
1171sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
1172    struct sctp_tcb *stcb, struct sctp_nets *net)
1173{
1174	int old_cwnd = net->cwnd;
1175
1176	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
1177	/*
1178	 * make a small adjustment to cwnd and force to CA.
1179	 */
1180	if (net->cwnd > net->mtu)
1181		/* drop down one MTU after sending */
1182		net->cwnd -= net->mtu;
1183	if (net->cwnd < net->ssthresh)
1184		/* still in SS move to CA */
1185		net->ssthresh = net->cwnd - 1;
1186	SDT_PROBE(sctp, cwnd, net, fr,
1187	    stcb->asoc.my_vtag,
1188	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
1189	    net,
1190	    old_cwnd, net->cwnd);
1191	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1192		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
1193	}
1194}
1195
1196static void
1197sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
1198    struct sctp_association *asoc,
1199    int accum_moved, int reneged_all, int will_exit)
1200{
1201	/* Passing a zero argument in last disables the rtcc algoritm */
1202	sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 0);
1203}
1204
1205static void
1206sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
1207    int in_window, int num_pkt_lost)
1208{
1209	/* Passing a zero argument in last disables the rtcc algoritm */
1210	sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 0);
1211}
1212
1213/* Here starts the RTCCVAR type CC invented by RRS which
1214 * is a slight mod to RFC2581. We reuse a common routine or
1215 * two since these algoritms are so close and need to
1216 * remain the same.
1217 */
1218static void
1219sctp_cwnd_update_rtcc_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net,
1220    int in_window, int num_pkt_lost)
1221{
1222	sctp_cwnd_update_after_ecn_echo_common(stcb, net, in_window, num_pkt_lost, 1);
1223}
1224
1225
1226static
1227void
1228sctp_cwnd_update_rtcc_tsn_acknowledged(struct sctp_nets *net,
1229    struct sctp_tmit_chunk *tp1)
1230{
1231	net->cc_mod.rtcc.bw_bytes += tp1->send_size;
1232}
1233
1234static void
1235sctp_cwnd_prepare_rtcc_net_for_sack(struct sctp_tcb *stcb,
1236    struct sctp_nets *net)
1237{
1238	if (net->cc_mod.rtcc.tls_needs_set > 0) {
1239		/* We had a bw measurment going on */
1240		struct timeval ltls;
1241
1242		SCTP_GETPTIME_TIMEVAL(&ltls);
1243		timevalsub(&ltls, &net->cc_mod.rtcc.tls);
1244		net->cc_mod.rtcc.new_tot_time = (ltls.tv_sec * 1000000) + ltls.tv_usec;
1245	}
1246}
1247
1248static void
1249sctp_cwnd_new_rtcc_transmission_begins(struct sctp_tcb *stcb,
1250    struct sctp_nets *net)
1251{
1252	uint64_t vtag, probepoint;
1253
1254	if (net->cc_mod.rtcc.lbw) {
1255		/* Clear the old bw.. we went to 0 in-flight */
1256		vtag = (net->rtt << 32) | (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
1257		    (stcb->rport);
1258		probepoint = (((uint64_t) net->cwnd) << 32);
1259		/* Probe point 8 */
1260		probepoint |= ((8 << 16) | 0);
1261		SDT_PROBE(sctp, cwnd, net, rttvar,
1262		    vtag,
1263		    ((net->cc_mod.rtcc.lbw << 32) | 0),
1264		    ((net->cc_mod.rtcc.lbw_rtt << 32) | net->rtt),
1265		    net->flight_size,
1266		    probepoint);
1267		net->cc_mod.rtcc.lbw_rtt = 0;
1268		net->cc_mod.rtcc.cwnd_at_bw_set = 0;
1269		net->cc_mod.rtcc.lbw = 0;
1270		net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0;
1271		net->cc_mod.rtcc.vol_reduce = 0;
1272		net->cc_mod.rtcc.bw_tot_time = 0;
1273		net->cc_mod.rtcc.bw_bytes = 0;
1274		net->cc_mod.rtcc.tls_needs_set = 0;
1275		if (net->cc_mod.rtcc.steady_step) {
1276			net->cc_mod.rtcc.vol_reduce = 0;
1277			net->cc_mod.rtcc.step_cnt = 0;
1278			net->cc_mod.rtcc.last_step_state = 0;
1279		}
1280		if (net->cc_mod.rtcc.ret_from_eq) {
1281			/* less aggressive one - reset cwnd too */
1282			uint32_t cwnd_in_mtu, cwnd;
1283
1284			cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
1285			if (cwnd_in_mtu == 0) {
1286				/*
1287				 * Using 0 means that the value of RFC 4960
1288				 * is used.
1289				 */
1290				cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
1291			} else {
1292				/*
1293				 * We take the minimum of the burst limit
1294				 * and the initial congestion window.
1295				 */
1296				if ((stcb->asoc.max_burst > 0) && (cwnd_in_mtu > stcb->asoc.max_burst))
1297					cwnd_in_mtu = stcb->asoc.max_burst;
1298				cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
1299			}
1300			if (net->cwnd > cwnd) {
1301				/*
1302				 * Only set if we are not a timeout (i.e.
1303				 * down to 1 mtu)
1304				 */
1305				net->cwnd = cwnd;
1306			}
1307		}
1308	}
1309}
1310
1311static void
1312sctp_set_rtcc_initial_cc_param(struct sctp_tcb *stcb,
1313    struct sctp_nets *net)
1314{
1315	uint64_t vtag, probepoint;
1316
1317	sctp_set_initial_cc_param(stcb, net);
1318	stcb->asoc.use_precise_time = 1;
1319	probepoint = (((uint64_t) net->cwnd) << 32);
1320	probepoint |= ((9 << 16) | 0);
1321	vtag = (net->rtt << 32) |
1322	    (((uint32_t) (stcb->sctp_ep->sctp_lport)) << 16) |
1323	    (stcb->rport);
1324	SDT_PROBE(sctp, cwnd, net, rttvar,
1325	    vtag,
1326	    0,
1327	    0,
1328	    0,
1329	    probepoint);
1330	net->cc_mod.rtcc.lbw_rtt = 0;
1331	net->cc_mod.rtcc.cwnd_at_bw_set = 0;
1332	net->cc_mod.rtcc.vol_reduce = 0;
1333	net->cc_mod.rtcc.lbw = 0;
1334	net->cc_mod.rtcc.vol_reduce = 0;
1335	net->cc_mod.rtcc.bw_bytes_at_last_rttc = 0;
1336	net->cc_mod.rtcc.bw_tot_time = 0;
1337	net->cc_mod.rtcc.bw_bytes = 0;
1338	net->cc_mod.rtcc.tls_needs_set = 0;
1339	net->cc_mod.rtcc.ret_from_eq = SCTP_BASE_SYSCTL(sctp_rttvar_eqret);
1340	net->cc_mod.rtcc.steady_step = SCTP_BASE_SYSCTL(sctp_steady_step);
1341	net->cc_mod.rtcc.use_dccc_ecn = SCTP_BASE_SYSCTL(sctp_use_dccc_ecn);
1342	net->cc_mod.rtcc.step_cnt = 0;
1343	net->cc_mod.rtcc.last_step_state = 0;
1344
1345
1346}
1347
1348static int
1349sctp_cwnd_rtcc_socket_option(struct sctp_tcb *stcb, int setorget,
1350    struct sctp_cc_option *cc_opt)
1351{
1352	struct sctp_nets *net;
1353
1354	if (setorget == 1) {
1355		/* a set */
1356		if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) {
1357			if ((cc_opt->aid_value.assoc_value != 0) &&
1358			    (cc_opt->aid_value.assoc_value != 1)) {
1359				return (EINVAL);
1360			}
1361			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1362				net->cc_mod.rtcc.ret_from_eq = cc_opt->aid_value.assoc_value;
1363			}
1364		} else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) {
1365			if ((cc_opt->aid_value.assoc_value != 0) &&
1366			    (cc_opt->aid_value.assoc_value != 1)) {
1367				return (EINVAL);
1368			}
1369			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1370				net->cc_mod.rtcc.use_dccc_ecn = cc_opt->aid_value.assoc_value;
1371			}
1372		} else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) {
1373			TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1374				net->cc_mod.rtcc.steady_step = cc_opt->aid_value.assoc_value;
1375			}
1376		} else {
1377			return (EINVAL);
1378		}
1379	} else {
1380		/* a get */
1381		if (cc_opt->option == SCTP_CC_OPT_RTCC_SETMODE) {
1382			net = TAILQ_FIRST(&stcb->asoc.nets);
1383			if (net == NULL) {
1384				return (EFAULT);
1385			}
1386			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.ret_from_eq;
1387		} else if (cc_opt->option == SCTP_CC_OPT_USE_DCCC_ECN) {
1388			net = TAILQ_FIRST(&stcb->asoc.nets);
1389			if (net == NULL) {
1390				return (EFAULT);
1391			}
1392			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.use_dccc_ecn;
1393		} else if (cc_opt->option == SCTP_CC_OPT_STEADY_STEP) {
1394			net = TAILQ_FIRST(&stcb->asoc.nets);
1395			if (net == NULL) {
1396				return (EFAULT);
1397			}
1398			cc_opt->aid_value.assoc_value = net->cc_mod.rtcc.steady_step;
1399		} else {
1400			return (EINVAL);
1401		}
1402	}
1403	return (0);
1404}
1405
1406static void
1407sctp_cwnd_update_rtcc_packet_transmitted(struct sctp_tcb *stcb,
1408    struct sctp_nets *net)
1409{
1410	if (net->cc_mod.rtcc.tls_needs_set == 0) {
1411		SCTP_GETPTIME_TIMEVAL(&net->cc_mod.rtcc.tls);
1412		net->cc_mod.rtcc.tls_needs_set = 2;
1413	}
1414}
1415
1416static void
1417sctp_cwnd_update_rtcc_after_sack(struct sctp_tcb *stcb,
1418    struct sctp_association *asoc,
1419    int accum_moved, int reneged_all, int will_exit)
1420{
1421	/* Passing a one argument at the last enables the rtcc algoritm */
1422	sctp_cwnd_update_after_sack_common(stcb, asoc, accum_moved, reneged_all, will_exit, 1);
1423}
1424
1425static void
1426sctp_rtt_rtcc_calculated(struct sctp_tcb *stcb,
1427    struct sctp_nets *net, struct timeval *now)
1428{
1429	net->cc_mod.rtcc.rtt_set_this_sack = 1;
1430}
1431
1432/* Here starts Sally Floyds HS-TCP */
1433
1434struct sctp_hs_raise_drop {
1435	int32_t cwnd;
1436	int32_t increase;
1437	int32_t drop_percent;
1438};
1439
1440#define SCTP_HS_TABLE_SIZE 73
1441
1442struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
1443	{38, 1, 50},		/* 0   */
1444	{118, 2, 44},		/* 1   */
1445	{221, 3, 41},		/* 2   */
1446	{347, 4, 38},		/* 3   */
1447	{495, 5, 37},		/* 4   */
1448	{663, 6, 35},		/* 5   */
1449	{851, 7, 34},		/* 6   */
1450	{1058, 8, 33},		/* 7   */
1451	{1284, 9, 32},		/* 8   */
1452	{1529, 10, 31},		/* 9   */
1453	{1793, 11, 30},		/* 10  */
1454	{2076, 12, 29},		/* 11  */
1455	{2378, 13, 28},		/* 12  */
1456	{2699, 14, 28},		/* 13  */
1457	{3039, 15, 27},		/* 14  */
1458	{3399, 16, 27},		/* 15  */
1459	{3778, 17, 26},		/* 16  */
1460	{4177, 18, 26},		/* 17  */
1461	{4596, 19, 25},		/* 18  */
1462	{5036, 20, 25},		/* 19  */
1463	{5497, 21, 24},		/* 20  */
1464	{5979, 22, 24},		/* 21  */
1465	{6483, 23, 23},		/* 22  */
1466	{7009, 24, 23},		/* 23  */
1467	{7558, 25, 22},		/* 24  */
1468	{8130, 26, 22},		/* 25  */
1469	{8726, 27, 22},		/* 26  */
1470	{9346, 28, 21},		/* 27  */
1471	{9991, 29, 21},		/* 28  */
1472	{10661, 30, 21},	/* 29  */
1473	{11358, 31, 20},	/* 30  */
1474	{12082, 32, 20},	/* 31  */
1475	{12834, 33, 20},	/* 32  */
1476	{13614, 34, 19},	/* 33  */
1477	{14424, 35, 19},	/* 34  */
1478	{15265, 36, 19},	/* 35  */
1479	{16137, 37, 19},	/* 36  */
1480	{17042, 38, 18},	/* 37  */
1481	{17981, 39, 18},	/* 38  */
1482	{18955, 40, 18},	/* 39  */
1483	{19965, 41, 17},	/* 40  */
1484	{21013, 42, 17},	/* 41  */
1485	{22101, 43, 17},	/* 42  */
1486	{23230, 44, 17},	/* 43  */
1487	{24402, 45, 16},	/* 44  */
1488	{25618, 46, 16},	/* 45  */
1489	{26881, 47, 16},	/* 46  */
1490	{28193, 48, 16},	/* 47  */
1491	{29557, 49, 15},	/* 48  */
1492	{30975, 50, 15},	/* 49  */
1493	{32450, 51, 15},	/* 50  */
1494	{33986, 52, 15},	/* 51  */
1495	{35586, 53, 14},	/* 52  */
1496	{37253, 54, 14},	/* 53  */
1497	{38992, 55, 14},	/* 54  */
1498	{40808, 56, 14},	/* 55  */
1499	{42707, 57, 13},	/* 56  */
1500	{44694, 58, 13},	/* 57  */
1501	{46776, 59, 13},	/* 58  */
1502	{48961, 60, 13},	/* 59  */
1503	{51258, 61, 13},	/* 60  */
1504	{53677, 62, 12},	/* 61  */
1505	{56230, 63, 12},	/* 62  */
1506	{58932, 64, 12},	/* 63  */
1507	{61799, 65, 12},	/* 64  */
1508	{64851, 66, 11},	/* 65  */
1509	{68113, 67, 11},	/* 66  */
1510	{71617, 68, 11},	/* 67  */
1511	{75401, 69, 10},	/* 68  */
1512	{79517, 70, 10},	/* 69  */
1513	{84035, 71, 10},	/* 70  */
1514	{89053, 72, 10},	/* 71  */
1515	{94717, 73, 9}		/* 72  */
1516};
1517
1518static void
1519sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
1520{
1521	int cur_val, i, indx, incr;
1522
1523	cur_val = net->cwnd >> 10;
1524	indx = SCTP_HS_TABLE_SIZE - 1;
1525#ifdef SCTP_DEBUG
1526	printf("HS CC CAlled.\n");
1527#endif
1528	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
1529		/* normal mode */
1530		if (net->net_ack > net->mtu) {
1531			net->cwnd += net->mtu;
1532			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1533				sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
1534			}
1535		} else {
1536			net->cwnd += net->net_ack;
1537			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1538				sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
1539			}
1540		}
1541	} else {
1542		for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
1543			if (cur_val < sctp_cwnd_adjust[i].cwnd) {
1544				indx = i;
1545				break;
1546			}
1547		}
1548		net->last_hs_used = indx;
1549		incr = ((sctp_cwnd_adjust[indx].increase) << 10);
1550		net->cwnd += incr;
1551		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1552			sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
1553		}
1554	}
1555}
1556
1557static void
1558sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
1559{
1560	int cur_val, i, indx;
1561	int old_cwnd = net->cwnd;
1562
1563	cur_val = net->cwnd >> 10;
1564	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
1565		/* normal mode */
1566		net->ssthresh = net->cwnd / 2;
1567		if (net->ssthresh < (net->mtu * 2)) {
1568			net->ssthresh = 2 * net->mtu;
1569		}
1570		net->cwnd = net->ssthresh;
1571	} else {
1572		/* drop by the proper amount */
1573		net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
1574		    sctp_cwnd_adjust[net->last_hs_used].drop_percent);
1575		net->cwnd = net->ssthresh;
1576		/* now where are we */
1577		indx = net->last_hs_used;
1578		cur_val = net->cwnd >> 10;
1579		/* reset where we are in the table */
1580		if (cur_val < sctp_cwnd_adjust[0].cwnd) {
1581			/* feel out of hs */
1582			net->last_hs_used = 0;
1583		} else {
1584			for (i = indx; i >= 1; i--) {
1585				if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) {
1586					break;
1587				}
1588			}
1589			net->last_hs_used = indx;
1590		}
1591	}
1592	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1593		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
1594	}
1595}
1596
1597static void
1598sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
1599    struct sctp_association *asoc)
1600{
1601	struct sctp_nets *net;
1602
1603	/*
1604	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
1605	 * (net->fast_retran_loss_recovery == 0)))
1606	 */
1607	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
1608		if ((asoc->fast_retran_loss_recovery == 0) ||
1609		    (asoc->sctp_cmt_on_off > 0)) {
1610			/* out of a RFC2582 Fast recovery window? */
1611			if (net->net_ack > 0) {
1612				/*
1613				 * per section 7.2.3, are there any
1614				 * destinations that had a fast retransmit
1615				 * to them. If so what we need to do is
1616				 * adjust ssthresh and cwnd.
1617				 */
1618				struct sctp_tmit_chunk *lchk;
1619
1620				sctp_hs_cwnd_decrease(stcb, net);
1621
1622				lchk = TAILQ_FIRST(&asoc->send_queue);
1623
1624				net->partial_bytes_acked = 0;
1625				/* Turn on fast recovery window */
1626				asoc->fast_retran_loss_recovery = 1;
1627				if (lchk == NULL) {
1628					/* Mark end of the window */
1629					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
1630				} else {
1631					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
1632				}
1633
1634				/*
1635				 * CMT fast recovery -- per destination
1636				 * recovery variable.
1637				 */
1638				net->fast_retran_loss_recovery = 1;
1639
1640				if (lchk == NULL) {
1641					/* Mark end of the window */
1642					net->fast_recovery_tsn = asoc->sending_seq - 1;
1643				} else {
1644					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
1645				}
1646
1647				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
1648				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
1649				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
1650				    stcb->sctp_ep, stcb, net);
1651			}
1652		} else if (net->net_ack > 0) {
1653			/*
1654			 * Mark a peg that we WOULD have done a cwnd
1655			 * reduction but RFC2582 prevented this action.
1656			 */
1657			SCTP_STAT_INCR(sctps_fastretransinrtt);
1658		}
1659	}
1660}
1661
1662static void
1663sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
1664    struct sctp_association *asoc,
1665    int accum_moved, int reneged_all, int will_exit)
1666{
1667	struct sctp_nets *net;
1668
1669	/******************************/
1670	/* update cwnd and Early FR   */
1671	/******************************/
1672	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
1673
1674#ifdef JANA_CMT_FAST_RECOVERY
1675		/*
1676		 * CMT fast recovery code. Need to debug.
1677		 */
1678		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
1679			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
1680			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
1681				net->will_exit_fast_recovery = 1;
1682			}
1683		}
1684#endif
1685		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
1686			/*
1687			 * So, first of all do we need to have a Early FR
1688			 * timer running?
1689			 */
1690			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
1691			    (net->ref_count > 1) &&
1692			    (net->flight_size < net->cwnd)) ||
1693			    (reneged_all)) {
1694				/*
1695				 * yes, so in this case stop it if its
1696				 * running, and then restart it. Reneging
1697				 * all is a special case where we want to
1698				 * run the Early FR timer and then force the
1699				 * last few unacked to be sent, causing us
1700				 * to illicit a sack with gaps to force out
1701				 * the others.
1702				 */
1703				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
1704					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
1705					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
1706					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
1707				}
1708				SCTP_STAT_INCR(sctps_earlyfrstrid);
1709				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
1710			} else {
1711				/* No, stop it if its running */
1712				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
1713					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
1714					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
1715					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
1716				}
1717			}
1718		}
1719		/* if nothing was acked on this destination skip it */
1720		if (net->net_ack == 0) {
1721			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1722				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
1723			}
1724			continue;
1725		}
1726		if (net->net_ack2 > 0) {
1727			/*
1728			 * Karn's rule applies to clearing error count, this
1729			 * is optional.
1730			 */
1731			net->error_count = 0;
1732			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
1733			    SCTP_ADDR_NOT_REACHABLE) {
1734				/* addr came good */
1735				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
1736				net->dest_state |= SCTP_ADDR_REACHABLE;
1737				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
1738				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
1739				/* now was it the primary? if so restore */
1740				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
1741					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
1742				}
1743			}
1744			/*
1745			 * JRS 5/14/07 - If CMT PF is on and the destination
1746			 * is in PF state, set the destination to active
1747			 * state and set the cwnd to one or two MTU's based
1748			 * on whether PF1 or PF2 is being used.
1749			 *
1750			 * Should we stop any running T3 timer here?
1751			 */
1752			if ((asoc->sctp_cmt_on_off > 0) &&
1753			    (asoc->sctp_cmt_pf > 0) &&
1754			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
1755				net->dest_state &= ~SCTP_ADDR_PF;
1756				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
1757				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
1758				    net, net->cwnd);
1759				/*
1760				 * Since the cwnd value is explicitly set,
1761				 * skip the code that updates the cwnd
1762				 * value.
1763				 */
1764				goto skip_cwnd_update;
1765			}
1766		}
1767#ifdef JANA_CMT_FAST_RECOVERY
1768		/*
1769		 * CMT fast recovery code
1770		 */
1771		/*
1772		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
1773		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
1774		 * } else if (sctp_cmt_on_off == 0 &&
1775		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
1776		 */
1777#endif
1778
1779		if (asoc->fast_retran_loss_recovery &&
1780		    (will_exit == 0) &&
1781		    (asoc->sctp_cmt_on_off == 0)) {
1782			/*
1783			 * If we are in loss recovery we skip any cwnd
1784			 * update
1785			 */
1786			goto skip_cwnd_update;
1787		}
1788		/*
1789		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
1790		 * moved.
1791		 */
1792		if (accum_moved ||
1793		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
1794			/* If the cumulative ack moved we can proceed */
1795			if (net->cwnd <= net->ssthresh) {
1796				/* We are in slow start */
1797				if (net->flight_size + net->net_ack >= net->cwnd) {
1798
1799					sctp_hs_cwnd_increase(stcb, net);
1800
1801				} else {
1802					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1803						sctp_log_cwnd(stcb, net, net->net_ack,
1804						    SCTP_CWND_LOG_NOADV_SS);
1805					}
1806				}
1807			} else {
1808				/* We are in congestion avoidance */
1809				net->partial_bytes_acked += net->net_ack;
1810				if ((net->flight_size + net->net_ack >= net->cwnd) &&
1811				    (net->partial_bytes_acked >= net->cwnd)) {
1812					net->partial_bytes_acked -= net->cwnd;
1813					net->cwnd += net->mtu;
1814					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1815						sctp_log_cwnd(stcb, net, net->mtu,
1816						    SCTP_CWND_LOG_FROM_CA);
1817					}
1818				} else {
1819					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1820						sctp_log_cwnd(stcb, net, net->net_ack,
1821						    SCTP_CWND_LOG_NOADV_CA);
1822					}
1823				}
1824			}
1825		} else {
1826			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1827				sctp_log_cwnd(stcb, net, net->mtu,
1828				    SCTP_CWND_LOG_NO_CUMACK);
1829			}
1830		}
1831skip_cwnd_update:
1832		/*
1833		 * NOW, according to Karn's rule do we need to restore the
1834		 * RTO timer back? Check our net_ack2. If not set then we
1835		 * have a ambiguity.. i.e. all data ack'd was sent to more
1836		 * than one place.
1837		 */
1838		if (net->net_ack2) {
1839			/* restore any doubled timers */
1840			net->RTO = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv;
1841			if (net->RTO < stcb->asoc.minrto) {
1842				net->RTO = stcb->asoc.minrto;
1843			}
1844			if (net->RTO > stcb->asoc.maxrto) {
1845				net->RTO = stcb->asoc.maxrto;
1846			}
1847		}
1848	}
1849}
1850
1851
1852/*
1853 * H-TCP congestion control. The algorithm is detailed in:
1854 * R.N.Shorten, D.J.Leith:
1855 *   "H-TCP: TCP for high-speed and long-distance networks"
1856 *   Proc. PFLDnet, Argonne, 2004.
1857 * http://www.hamilton.ie/net/htcp3.pdf
1858 */
1859
1860
1861static int use_rtt_scaling = 1;
1862static int use_bandwidth_switch = 1;
1863
1864static inline int
1865between(uint32_t seq1, uint32_t seq2, uint32_t seq3)
1866{
1867	return seq3 - seq2 >= seq1 - seq2;
1868}
1869
1870static inline uint32_t
1871htcp_cong_time(struct htcp *ca)
1872{
1873	return sctp_get_tick_count() - ca->last_cong;
1874}
1875
1876static inline uint32_t
1877htcp_ccount(struct htcp *ca)
1878{
1879	return htcp_cong_time(ca) / ca->minRTT;
1880}
1881
1882static inline void
1883htcp_reset(struct htcp *ca)
1884{
1885	ca->undo_last_cong = ca->last_cong;
1886	ca->undo_maxRTT = ca->maxRTT;
1887	ca->undo_old_maxB = ca->old_maxB;
1888	ca->last_cong = sctp_get_tick_count();
1889}
1890
1891#ifdef SCTP_NOT_USED
1892
1893static uint32_t
1894htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net)
1895{
1896	net->cc_mod.htcp_ca.last_cong = net->cc_mod.htcp_ca.undo_last_cong;
1897	net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.undo_maxRTT;
1898	net->cc_mod.htcp_ca.old_maxB = net->cc_mod.htcp_ca.undo_old_maxB;
1899	return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->cc_mod.htcp_ca.beta) * net->mtu);
1900}
1901
1902#endif
1903
1904static inline void
1905measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net)
1906{
1907	uint32_t srtt = net->lastsa >> SCTP_RTT_SHIFT;
1908
1909	/* keep track of minimum RTT seen so far, minRTT is zero at first */
1910	if (net->cc_mod.htcp_ca.minRTT > srtt || !net->cc_mod.htcp_ca.minRTT)
1911		net->cc_mod.htcp_ca.minRTT = srtt;
1912
1913	/* max RTT */
1914	if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->cc_mod.htcp_ca) > 3) {
1915		if (net->cc_mod.htcp_ca.maxRTT < net->cc_mod.htcp_ca.minRTT)
1916			net->cc_mod.htcp_ca.maxRTT = net->cc_mod.htcp_ca.minRTT;
1917		if (net->cc_mod.htcp_ca.maxRTT < srtt && srtt <= net->cc_mod.htcp_ca.maxRTT + MSEC_TO_TICKS(20))
1918			net->cc_mod.htcp_ca.maxRTT = srtt;
1919	}
1920}
1921
1922static void
1923measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net)
1924{
1925	uint32_t now = sctp_get_tick_count();
1926
1927	if (net->fast_retran_ip == 0)
1928		net->cc_mod.htcp_ca.bytes_acked = net->net_ack;
1929
1930	if (!use_bandwidth_switch)
1931		return;
1932
1933	/* achieved throughput calculations */
1934	/* JRS - not 100% sure of this statement */
1935	if (net->fast_retran_ip == 1) {
1936		net->cc_mod.htcp_ca.bytecount = 0;
1937		net->cc_mod.htcp_ca.lasttime = now;
1938		return;
1939	}
1940	net->cc_mod.htcp_ca.bytecount += net->net_ack;
1941
1942	if (net->cc_mod.htcp_ca.bytecount >= net->cwnd - ((net->cc_mod.htcp_ca.alpha >> 7 ? : 1) * net->mtu)
1943	    && now - net->cc_mod.htcp_ca.lasttime >= net->cc_mod.htcp_ca.minRTT
1944	    && net->cc_mod.htcp_ca.minRTT > 0) {
1945		uint32_t cur_Bi = net->cc_mod.htcp_ca.bytecount / net->mtu * hz / (now - net->cc_mod.htcp_ca.lasttime);
1946
1947		if (htcp_ccount(&net->cc_mod.htcp_ca) <= 3) {
1948			/* just after backoff */
1949			net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi = cur_Bi;
1950		} else {
1951			net->cc_mod.htcp_ca.Bi = (3 * net->cc_mod.htcp_ca.Bi + cur_Bi) / 4;
1952			if (net->cc_mod.htcp_ca.Bi > net->cc_mod.htcp_ca.maxB)
1953				net->cc_mod.htcp_ca.maxB = net->cc_mod.htcp_ca.Bi;
1954			if (net->cc_mod.htcp_ca.minB > net->cc_mod.htcp_ca.maxB)
1955				net->cc_mod.htcp_ca.minB = net->cc_mod.htcp_ca.maxB;
1956		}
1957		net->cc_mod.htcp_ca.bytecount = 0;
1958		net->cc_mod.htcp_ca.lasttime = now;
1959	}
1960}
1961
1962static inline void
1963htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
1964{
1965	if (use_bandwidth_switch) {
1966		uint32_t maxB = ca->maxB;
1967		uint32_t old_maxB = ca->old_maxB;
1968
1969		ca->old_maxB = ca->maxB;
1970
1971		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
1972			ca->beta = BETA_MIN;
1973			ca->modeswitch = 0;
1974			return;
1975		}
1976	}
1977	if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) {
1978		ca->beta = (minRTT << 7) / maxRTT;
1979		if (ca->beta < BETA_MIN)
1980			ca->beta = BETA_MIN;
1981		else if (ca->beta > BETA_MAX)
1982			ca->beta = BETA_MAX;
1983	} else {
1984		ca->beta = BETA_MIN;
1985		ca->modeswitch = 1;
1986	}
1987}
1988
1989static inline void
1990htcp_alpha_update(struct htcp *ca)
1991{
1992	uint32_t minRTT = ca->minRTT;
1993	uint32_t factor = 1;
1994	uint32_t diff = htcp_cong_time(ca);
1995
1996	if (diff > (uint32_t) hz) {
1997		diff -= hz;
1998		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
1999	}
2000	if (use_rtt_scaling && minRTT) {
2001		uint32_t scale = (hz << 3) / (10 * minRTT);
2002
2003		scale = min(max(scale, 1U << 2), 10U << 3);	/* clamping ratio to
2004								 * interval [0.5,10]<<3 */
2005		factor = (factor << 3) / scale;
2006		if (!factor)
2007			factor = 1;
2008	}
2009	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
2010	if (!ca->alpha)
2011		ca->alpha = ALPHA_BASE;
2012}
2013
2014/* After we have the rtt data to calculate beta, we'd still prefer to wait one
2015 * rtt before we adjust our beta to ensure we are working from a consistent
2016 * data.
2017 *
2018 * This function should be called when we hit a congestion event since only at
2019 * that point do we really have a real sense of maxRTT (the queues en route
2020 * were getting just too full now).
2021 */
2022static void
2023htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net)
2024{
2025	uint32_t minRTT = net->cc_mod.htcp_ca.minRTT;
2026	uint32_t maxRTT = net->cc_mod.htcp_ca.maxRTT;
2027
2028	htcp_beta_update(&net->cc_mod.htcp_ca, minRTT, maxRTT);
2029	htcp_alpha_update(&net->cc_mod.htcp_ca);
2030
2031	/*
2032	 * add slowly fading memory for maxRTT to accommodate routing
2033	 * changes etc
2034	 */
2035	if (minRTT > 0 && maxRTT > minRTT)
2036		net->cc_mod.htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
2037}
2038
2039static uint32_t
2040htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net)
2041{
2042	htcp_param_update(stcb, net);
2043	return max(((net->cwnd / net->mtu * net->cc_mod.htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu);
2044}
2045
2046static void
2047htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
2048{
2049	/*-
2050	 * How to handle these functions?
2051         *	if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question.
2052	 *		return;
2053	 */
2054	if (net->cwnd <= net->ssthresh) {
2055		/* We are in slow start */
2056		if (net->flight_size + net->net_ack >= net->cwnd) {
2057			if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
2058				net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
2059				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2060					sctp_log_cwnd(stcb, net, net->mtu,
2061					    SCTP_CWND_LOG_FROM_SS);
2062				}
2063			} else {
2064				net->cwnd += net->net_ack;
2065				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2066					sctp_log_cwnd(stcb, net, net->net_ack,
2067					    SCTP_CWND_LOG_FROM_SS);
2068				}
2069			}
2070		} else {
2071			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2072				sctp_log_cwnd(stcb, net, net->net_ack,
2073				    SCTP_CWND_LOG_NOADV_SS);
2074			}
2075		}
2076	} else {
2077		measure_rtt(stcb, net);
2078
2079		/*
2080		 * In dangerous area, increase slowly. In theory this is
2081		 * net->cwnd += alpha / net->cwnd
2082		 */
2083		/* What is snd_cwnd_cnt?? */
2084		if (((net->partial_bytes_acked / net->mtu * net->cc_mod.htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) {
2085			/*-
2086			 * Does SCTP have a cwnd clamp?
2087			 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS).
2088			 */
2089			net->cwnd += net->mtu;
2090			net->partial_bytes_acked = 0;
2091			htcp_alpha_update(&net->cc_mod.htcp_ca);
2092			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2093				sctp_log_cwnd(stcb, net, net->mtu,
2094				    SCTP_CWND_LOG_FROM_CA);
2095			}
2096		} else {
2097			net->partial_bytes_acked += net->net_ack;
2098			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2099				sctp_log_cwnd(stcb, net, net->net_ack,
2100				    SCTP_CWND_LOG_NOADV_CA);
2101			}
2102		}
2103
2104		net->cc_mod.htcp_ca.bytes_acked = net->mtu;
2105	}
2106}
2107
2108#ifdef SCTP_NOT_USED
2109/* Lower bound on congestion window. */
2110static uint32_t
2111htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net)
2112{
2113	return net->ssthresh;
2114}
2115
2116#endif
2117
2118static void
2119htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net)
2120{
2121	memset(&net->cc_mod.htcp_ca, 0, sizeof(struct htcp));
2122	net->cc_mod.htcp_ca.alpha = ALPHA_BASE;
2123	net->cc_mod.htcp_ca.beta = BETA_MIN;
2124	net->cc_mod.htcp_ca.bytes_acked = net->mtu;
2125	net->cc_mod.htcp_ca.last_cong = sctp_get_tick_count();
2126}
2127
2128static void
2129sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
2130{
2131	/*
2132	 * We take the max of the burst limit times a MTU or the
2133	 * INITIAL_CWND. We then limit this to 4 MTU's of sending.
2134	 */
2135	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
2136	net->ssthresh = stcb->asoc.peers_rwnd;
2137	htcp_init(stcb, net);
2138
2139	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
2140		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
2141	}
2142}
2143
2144static void
2145sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
2146    struct sctp_association *asoc,
2147    int accum_moved, int reneged_all, int will_exit)
2148{
2149	struct sctp_nets *net;
2150
2151	/******************************/
2152	/* update cwnd and Early FR   */
2153	/******************************/
2154	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
2155
2156#ifdef JANA_CMT_FAST_RECOVERY
2157		/*
2158		 * CMT fast recovery code. Need to debug.
2159		 */
2160		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
2161			if (SCTP_TSN_GE(asoc->last_acked_seq, net->fast_recovery_tsn) ||
2162			    SCTP_TSN_GE(net->pseudo_cumack, net->fast_recovery_tsn)) {
2163				net->will_exit_fast_recovery = 1;
2164			}
2165		}
2166#endif
2167		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
2168			/*
2169			 * So, first of all do we need to have a Early FR
2170			 * timer running?
2171			 */
2172			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
2173			    (net->ref_count > 1) &&
2174			    (net->flight_size < net->cwnd)) ||
2175			    (reneged_all)) {
2176				/*
2177				 * yes, so in this case stop it if its
2178				 * running, and then restart it. Reneging
2179				 * all is a special case where we want to
2180				 * run the Early FR timer and then force the
2181				 * last few unacked to be sent, causing us
2182				 * to illicit a sack with gaps to force out
2183				 * the others.
2184				 */
2185				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
2186					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
2187					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
2188					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
2189				}
2190				SCTP_STAT_INCR(sctps_earlyfrstrid);
2191				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
2192			} else {
2193				/* No, stop it if its running */
2194				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
2195					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
2196					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
2197					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
2198				}
2199			}
2200		}
2201		/* if nothing was acked on this destination skip it */
2202		if (net->net_ack == 0) {
2203			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2204				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
2205			}
2206			continue;
2207		}
2208		if (net->net_ack2 > 0) {
2209			/*
2210			 * Karn's rule applies to clearing error count, this
2211			 * is optional.
2212			 */
2213			net->error_count = 0;
2214			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
2215			    SCTP_ADDR_NOT_REACHABLE) {
2216				/* addr came good */
2217				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
2218				net->dest_state |= SCTP_ADDR_REACHABLE;
2219				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
2220				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
2221				/* now was it the primary? if so restore */
2222				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
2223					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
2224				}
2225			}
2226			/*
2227			 * JRS 5/14/07 - If CMT PF is on and the destination
2228			 * is in PF state, set the destination to active
2229			 * state and set the cwnd to one or two MTU's based
2230			 * on whether PF1 or PF2 is being used.
2231			 *
2232			 * Should we stop any running T3 timer here?
2233			 */
2234			if ((asoc->sctp_cmt_on_off > 0) &&
2235			    (asoc->sctp_cmt_pf > 0) &&
2236			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
2237				net->dest_state &= ~SCTP_ADDR_PF;
2238				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
2239				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
2240				    net, net->cwnd);
2241				/*
2242				 * Since the cwnd value is explicitly set,
2243				 * skip the code that updates the cwnd
2244				 * value.
2245				 */
2246				goto skip_cwnd_update;
2247			}
2248		}
2249#ifdef JANA_CMT_FAST_RECOVERY
2250		/*
2251		 * CMT fast recovery code
2252		 */
2253		/*
2254		 * if (sctp_cmt_on_off > 0 && net->fast_retran_loss_recovery
2255		 * && net->will_exit_fast_recovery == 0) { @@@ Do something
2256		 * } else if (sctp_cmt_on_off == 0 &&
2257		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
2258		 */
2259#endif
2260
2261		if (asoc->fast_retran_loss_recovery &&
2262		    will_exit == 0 &&
2263		    (asoc->sctp_cmt_on_off == 0)) {
2264			/*
2265			 * If we are in loss recovery we skip any cwnd
2266			 * update
2267			 */
2268			goto skip_cwnd_update;
2269		}
2270		/*
2271		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
2272		 * moved.
2273		 */
2274		if (accum_moved ||
2275		    ((asoc->sctp_cmt_on_off > 0) && net->new_pseudo_cumack)) {
2276			htcp_cong_avoid(stcb, net);
2277			measure_achieved_throughput(stcb, net);
2278		} else {
2279			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
2280				sctp_log_cwnd(stcb, net, net->mtu,
2281				    SCTP_CWND_LOG_NO_CUMACK);
2282			}
2283		}
2284skip_cwnd_update:
2285		/*
2286		 * NOW, according to Karn's rule do we need to restore the
2287		 * RTO timer back? Check our net_ack2. If not set then we
2288		 * have a ambiguity.. i.e. all data ack'd was sent to more
2289		 * than one place.
2290		 */
2291		if (net->net_ack2) {
2292			/* restore any doubled timers */
2293			net->RTO = (net->lastsa >> SCTP_RTT_SHIFT) + net->lastsv;
2294			if (net->RTO < stcb->asoc.minrto) {
2295				net->RTO = stcb->asoc.minrto;
2296			}
2297			if (net->RTO > stcb->asoc.maxrto) {
2298				net->RTO = stcb->asoc.maxrto;
2299			}
2300		}
2301	}
2302}
2303
2304static void
2305sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
2306    struct sctp_association *asoc)
2307{
2308	struct sctp_nets *net;
2309
2310	/*
2311	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off > 0) &&
2312	 * (net->fast_retran_loss_recovery == 0)))
2313	 */
2314	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
2315		if ((asoc->fast_retran_loss_recovery == 0) ||
2316		    (asoc->sctp_cmt_on_off > 0)) {
2317			/* out of a RFC2582 Fast recovery window? */
2318			if (net->net_ack > 0) {
2319				/*
2320				 * per section 7.2.3, are there any
2321				 * destinations that had a fast retransmit
2322				 * to them. If so what we need to do is
2323				 * adjust ssthresh and cwnd.
2324				 */
2325				struct sctp_tmit_chunk *lchk;
2326				int old_cwnd = net->cwnd;
2327
2328				/* JRS - reset as if state were changed */
2329				htcp_reset(&net->cc_mod.htcp_ca);
2330				net->ssthresh = htcp_recalc_ssthresh(stcb, net);
2331				net->cwnd = net->ssthresh;
2332				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2333					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
2334					    SCTP_CWND_LOG_FROM_FR);
2335				}
2336				lchk = TAILQ_FIRST(&asoc->send_queue);
2337
2338				net->partial_bytes_acked = 0;
2339				/* Turn on fast recovery window */
2340				asoc->fast_retran_loss_recovery = 1;
2341				if (lchk == NULL) {
2342					/* Mark end of the window */
2343					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
2344				} else {
2345					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
2346				}
2347
2348				/*
2349				 * CMT fast recovery -- per destination
2350				 * recovery variable.
2351				 */
2352				net->fast_retran_loss_recovery = 1;
2353
2354				if (lchk == NULL) {
2355					/* Mark end of the window */
2356					net->fast_recovery_tsn = asoc->sending_seq - 1;
2357				} else {
2358					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
2359				}
2360
2361				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
2362				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
2363				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
2364				    stcb->sctp_ep, stcb, net);
2365			}
2366		} else if (net->net_ack > 0) {
2367			/*
2368			 * Mark a peg that we WOULD have done a cwnd
2369			 * reduction but RFC2582 prevented this action.
2370			 */
2371			SCTP_STAT_INCR(sctps_fastretransinrtt);
2372		}
2373	}
2374}
2375
2376static void
2377sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
2378    struct sctp_nets *net)
2379{
2380	int old_cwnd = net->cwnd;
2381
2382	/* JRS - reset as if the state were being changed to timeout */
2383	htcp_reset(&net->cc_mod.htcp_ca);
2384	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
2385	net->cwnd = net->mtu;
2386	net->partial_bytes_acked = 0;
2387	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2388		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
2389	}
2390}
2391
2392static void
2393sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
2394    struct sctp_tcb *stcb, struct sctp_nets *net)
2395{
2396	int old_cwnd;
2397
2398	old_cwnd = net->cwnd;
2399
2400	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
2401	net->cc_mod.htcp_ca.last_cong = sctp_get_tick_count();
2402	/*
2403	 * make a small adjustment to cwnd and force to CA.
2404	 */
2405	if (net->cwnd > net->mtu)
2406		/* drop down one MTU after sending */
2407		net->cwnd -= net->mtu;
2408	if (net->cwnd < net->ssthresh)
2409		/* still in SS move to CA */
2410		net->ssthresh = net->cwnd - 1;
2411	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2412		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
2413	}
2414}
2415
2416static void
2417sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
2418    struct sctp_nets *net, int in_window, int num_pkt_lost)
2419{
2420	int old_cwnd;
2421
2422	old_cwnd = net->cwnd;
2423
2424	/* JRS - reset hctp as if state changed */
2425	if (in_window == 0) {
2426		htcp_reset(&net->cc_mod.htcp_ca);
2427		SCTP_STAT_INCR(sctps_ecnereducedcwnd);
2428		net->ssthresh = htcp_recalc_ssthresh(stcb, net);
2429		if (net->ssthresh < net->mtu) {
2430			net->ssthresh = net->mtu;
2431			/* here back off the timer as well, to slow us down */
2432			net->RTO <<= 1;
2433		}
2434		net->cwnd = net->ssthresh;
2435		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
2436			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
2437		}
2438	}
2439}
2440
2441struct sctp_cc_functions sctp_cc_functions[] = {
2442	{
2443		.sctp_set_initial_cc_param = sctp_set_initial_cc_param,
2444		.sctp_cwnd_update_after_sack = sctp_cwnd_update_after_sack,
2445		.sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr,
2446		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
2447		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo,
2448		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2449		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2450		.sctp_cwnd_update_after_fr_timer = sctp_cwnd_update_after_fr_timer
2451	},
2452	{
2453		.sctp_set_initial_cc_param = sctp_set_initial_cc_param,
2454		.sctp_cwnd_update_after_sack = sctp_hs_cwnd_update_after_sack,
2455		.sctp_cwnd_update_after_fr = sctp_hs_cwnd_update_after_fr,
2456		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
2457		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_after_ecn_echo,
2458		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2459		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2460		.sctp_cwnd_update_after_fr_timer = sctp_cwnd_update_after_fr_timer
2461	},
2462	{
2463		.sctp_set_initial_cc_param = sctp_htcp_set_initial_cc_param,
2464		.sctp_cwnd_update_after_sack = sctp_htcp_cwnd_update_after_sack,
2465		.sctp_cwnd_update_after_fr = sctp_htcp_cwnd_update_after_fr,
2466		.sctp_cwnd_update_after_timeout = sctp_htcp_cwnd_update_after_timeout,
2467		.sctp_cwnd_update_after_ecn_echo = sctp_htcp_cwnd_update_after_ecn_echo,
2468		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2469		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2470		.sctp_cwnd_update_after_fr_timer = sctp_htcp_cwnd_update_after_fr_timer
2471	},
2472	{
2473		.sctp_set_initial_cc_param = sctp_set_rtcc_initial_cc_param,
2474		.sctp_cwnd_update_after_sack = sctp_cwnd_update_rtcc_after_sack,
2475		.sctp_cwnd_update_after_fr = sctp_cwnd_update_after_fr,
2476		.sctp_cwnd_update_after_timeout = sctp_cwnd_update_after_timeout,
2477		.sctp_cwnd_update_after_ecn_echo = sctp_cwnd_update_rtcc_after_ecn_echo,
2478		.sctp_cwnd_update_after_packet_dropped = sctp_cwnd_update_after_packet_dropped,
2479		.sctp_cwnd_update_after_output = sctp_cwnd_update_after_output,
2480		.sctp_cwnd_update_after_fr_timer = sctp_cwnd_update_after_fr_timer,
2481		.sctp_cwnd_update_packet_transmitted = sctp_cwnd_update_rtcc_packet_transmitted,
2482		.sctp_cwnd_update_tsn_acknowledged = sctp_cwnd_update_rtcc_tsn_acknowledged,
2483		.sctp_cwnd_new_transmission_begins = sctp_cwnd_new_rtcc_transmission_begins,
2484		.sctp_cwnd_prepare_net_for_sack = sctp_cwnd_prepare_rtcc_net_for_sack,
2485		.sctp_cwnd_socket_option = sctp_cwnd_rtcc_socket_option,
2486		.sctp_rtt_calculated = sctp_rtt_rtcc_calculated
2487	}
2488};
2489