sctp_cc_functions.c revision 215817
1/*-
2 * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * a) Redistributions of source code must retain the above copyright notice,
8 *   this list of conditions and the following disclaimer.
9 *
10 * b) Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *   the documentation and/or other materials provided with the distribution.
13 *
14 * c) Neither the name of Cisco Systems, Inc. nor the names of its
15 *    contributors may be used to endorse or promote products derived
16 *    from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28 * THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31#include <netinet/sctp_os.h>
32#include <netinet/sctp_var.h>
33#include <netinet/sctp_sysctl.h>
34#include <netinet/sctp_pcb.h>
35#include <netinet/sctp_header.h>
36#include <netinet/sctputil.h>
37#include <netinet/sctp_output.h>
38#include <netinet/sctp_input.h>
39#include <netinet/sctp_indata.h>
40#include <netinet/sctp_uio.h>
41#include <netinet/sctp_timer.h>
42#include <netinet/sctp_auth.h>
43#include <netinet/sctp_asconf.h>
44#include <netinet/sctp_cc_functions.h>
45#include <netinet/sctp_dtrace_declare.h>
46#include <sys/cdefs.h>
47__FBSDID("$FreeBSD: head/sys/netinet/sctp_cc_functions.c 215817 2010-11-25 13:39:55Z rrs $");
48
49void
50sctp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
51{
52	struct sctp_association *assoc;
53	uint32_t cwnd_in_mtu;
54
55	assoc = &stcb->asoc;
56	/*
57	 * We take the minimum of the burst limit and the initial congestion
58	 * window. The initial congestion window is at least two times the
59	 * MTU.
60	 */
61	cwnd_in_mtu = SCTP_BASE_SYSCTL(sctp_initial_cwnd);
62	if ((assoc->max_burst > 0) && (cwnd_in_mtu > assoc->max_burst))
63		cwnd_in_mtu = assoc->max_burst;
64	net->cwnd = (net->mtu - sizeof(struct sctphdr)) * cwnd_in_mtu;
65	net->ssthresh = assoc->peers_rwnd;
66
67	SDT_PROBE(sctp, cwnd, net, init,
68	    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
69	    0, net->cwnd);
70	if (SCTP_BASE_SYSCTL(sctp_logging_level) &
71	    (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
72		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
73	}
74}
75
76void
77sctp_cwnd_update_after_fr(struct sctp_tcb *stcb,
78    struct sctp_association *asoc)
79{
80	struct sctp_nets *net;
81
82	/*-
83	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
84	 * (net->fast_retran_loss_recovery == 0)))
85	 */
86	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
87		if ((asoc->fast_retran_loss_recovery == 0) ||
88		    (asoc->sctp_cmt_on_off == 1)) {
89			/* out of a RFC2582 Fast recovery window? */
90			if (net->net_ack > 0) {
91				/*
92				 * per section 7.2.3, are there any
93				 * destinations that had a fast retransmit
94				 * to them. If so what we need to do is
95				 * adjust ssthresh and cwnd.
96				 */
97				struct sctp_tmit_chunk *lchk;
98				int old_cwnd = net->cwnd;
99
100				net->ssthresh = net->cwnd / 2;
101				if (net->ssthresh < (net->mtu * 2)) {
102					net->ssthresh = 2 * net->mtu;
103				}
104				net->cwnd = net->ssthresh;
105				SDT_PROBE(sctp, cwnd, net, fr,
106				    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
107				    old_cwnd, net->cwnd);
108				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
109					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
110					    SCTP_CWND_LOG_FROM_FR);
111				}
112				lchk = TAILQ_FIRST(&asoc->send_queue);
113
114				net->partial_bytes_acked = 0;
115				/* Turn on fast recovery window */
116				asoc->fast_retran_loss_recovery = 1;
117				if (lchk == NULL) {
118					/* Mark end of the window */
119					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
120				} else {
121					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
122				}
123
124				/*
125				 * CMT fast recovery -- per destination
126				 * recovery variable.
127				 */
128				net->fast_retran_loss_recovery = 1;
129
130				if (lchk == NULL) {
131					/* Mark end of the window */
132					net->fast_recovery_tsn = asoc->sending_seq - 1;
133				} else {
134					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
135				}
136
137				/*
138				 * Disable Nonce Sum Checking and store the
139				 * resync tsn
140				 */
141				asoc->nonce_sum_check = 0;
142				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
143
144				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
145				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
146				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
147				    stcb->sctp_ep, stcb, net);
148			}
149		} else if (net->net_ack > 0) {
150			/*
151			 * Mark a peg that we WOULD have done a cwnd
152			 * reduction but RFC2582 prevented this action.
153			 */
154			SCTP_STAT_INCR(sctps_fastretransinrtt);
155		}
156	}
157}
158
159void
160sctp_cwnd_update_after_sack(struct sctp_tcb *stcb,
161    struct sctp_association *asoc,
162    int accum_moved, int reneged_all, int will_exit)
163{
164	struct sctp_nets *net;
165	int old_cwnd;
166
167	/******************************/
168	/* update cwnd and Early FR   */
169	/******************************/
170	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
171
172#ifdef JANA_CMT_FAST_RECOVERY
173		/*
174		 * CMT fast recovery code. Need to debug.
175		 */
176		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
177			if (compare_with_wrap(asoc->last_acked_seq,
178			    net->fast_recovery_tsn, MAX_TSN) ||
179			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
180			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
181			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
182				net->will_exit_fast_recovery = 1;
183			}
184		}
185#endif
186		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
187			/*
188			 * So, first of all do we need to have a Early FR
189			 * timer running?
190			 */
191			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
192			    (net->ref_count > 1) &&
193			    (net->flight_size < net->cwnd)) ||
194			    (reneged_all)) {
195				/*
196				 * yes, so in this case stop it if its
197				 * running, and then restart it. Reneging
198				 * all is a special case where we want to
199				 * run the Early FR timer and then force the
200				 * last few unacked to be sent, causing us
201				 * to illicit a sack with gaps to force out
202				 * the others.
203				 */
204				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
205					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
206					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
207					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
208				}
209				SCTP_STAT_INCR(sctps_earlyfrstrid);
210				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
211			} else {
212				/* No, stop it if its running */
213				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
214					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
215					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
216					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
217				}
218			}
219		}
220		/* if nothing was acked on this destination skip it */
221		if (net->net_ack == 0) {
222			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
223				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
224			}
225			continue;
226		}
227		if (net->net_ack2 > 0) {
228			/*
229			 * Karn's rule applies to clearing error count, this
230			 * is optional.
231			 */
232			net->error_count = 0;
233			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
234			    SCTP_ADDR_NOT_REACHABLE) {
235				/* addr came good */
236				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
237				net->dest_state |= SCTP_ADDR_REACHABLE;
238				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
239				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
240				/* now was it the primary? if so restore */
241				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
242					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
243				}
244			}
245			/*
246			 * JRS 5/14/07 - If CMT PF is on and the destination
247			 * is in PF state, set the destination to active
248			 * state and set the cwnd to one or two MTU's based
249			 * on whether PF1 or PF2 is being used.
250			 *
251			 * Should we stop any running T3 timer here?
252			 */
253			if ((asoc->sctp_cmt_on_off == 1) &&
254			    (asoc->sctp_cmt_pf > 0) &&
255			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
256				net->dest_state &= ~SCTP_ADDR_PF;
257				old_cwnd = net->cwnd;
258				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
259				SDT_PROBE(sctp, cwnd, net, ack,
260				    stcb->asoc.my_vtag, ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)), net,
261				    old_cwnd, net->cwnd);
262				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
263				    net, net->cwnd);
264				/*
265				 * Since the cwnd value is explicitly set,
266				 * skip the code that updates the cwnd
267				 * value.
268				 */
269				goto skip_cwnd_update;
270			}
271		}
272#ifdef JANA_CMT_FAST_RECOVERY
273		/*
274		 * CMT fast recovery code
275		 */
276		/*
277		 * if (sctp_cmt_on_off == 1 &&
278		 * net->fast_retran_loss_recovery &&
279		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
280		 * else if (sctp_cmt_on_off == 0 &&
281		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
282		 */
283#endif
284
285		if (asoc->fast_retran_loss_recovery &&
286		    (will_exit == 0) &&
287		    (asoc->sctp_cmt_on_off == 0)) {
288			/*
289			 * If we are in loss recovery we skip any cwnd
290			 * update
291			 */
292			goto skip_cwnd_update;
293		}
294		/*
295		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
296		 * moved.
297		 */
298		if (accum_moved ||
299		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
300			/* If the cumulative ack moved we can proceed */
301			if (net->cwnd <= net->ssthresh) {
302				/* We are in slow start */
303				if (net->flight_size + net->net_ack >= net->cwnd) {
304					if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
305						old_cwnd = net->cwnd;
306						net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
307						SDT_PROBE(sctp, cwnd, net, ack,
308						    stcb->asoc.my_vtag,
309						    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
310						    net,
311						    old_cwnd, net->cwnd);
312						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
313							sctp_log_cwnd(stcb, net, net->mtu,
314							    SCTP_CWND_LOG_FROM_SS);
315						}
316					} else {
317						old_cwnd = net->cwnd;
318						net->cwnd += net->net_ack;
319						SDT_PROBE(sctp, cwnd, net, ack,
320						    stcb->asoc.my_vtag,
321						    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
322						    net,
323						    old_cwnd, net->cwnd);
324
325						if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
326							sctp_log_cwnd(stcb, net, net->net_ack,
327							    SCTP_CWND_LOG_FROM_SS);
328						}
329					}
330				} else {
331					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
332						sctp_log_cwnd(stcb, net, net->net_ack,
333						    SCTP_CWND_LOG_NOADV_SS);
334					}
335				}
336			} else {
337				/* We are in congestion avoidance */
338				/*
339				 * Add to pba
340				 */
341				net->partial_bytes_acked += net->net_ack;
342
343				if ((net->flight_size + net->net_ack >= net->cwnd) &&
344				    (net->partial_bytes_acked >= net->cwnd)) {
345					net->partial_bytes_acked -= net->cwnd;
346					old_cwnd = net->cwnd;
347					net->cwnd += net->mtu;
348					SDT_PROBE(sctp, cwnd, net, ack,
349					    stcb->asoc.my_vtag,
350					    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
351					    net,
352					    old_cwnd, net->cwnd);
353					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
354						sctp_log_cwnd(stcb, net, net->mtu,
355						    SCTP_CWND_LOG_FROM_CA);
356					}
357				} else {
358					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
359						sctp_log_cwnd(stcb, net, net->net_ack,
360						    SCTP_CWND_LOG_NOADV_CA);
361					}
362				}
363			}
364		} else {
365			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
366				sctp_log_cwnd(stcb, net, net->mtu,
367				    SCTP_CWND_LOG_NO_CUMACK);
368			}
369		}
370skip_cwnd_update:
371		/*
372		 * NOW, according to Karn's rule do we need to restore the
373		 * RTO timer back? Check our net_ack2. If not set then we
374		 * have a ambiguity.. i.e. all data ack'd was sent to more
375		 * than one place.
376		 */
377		if (net->net_ack2) {
378			/* restore any doubled timers */
379			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
380			if (net->RTO < stcb->asoc.minrto) {
381				net->RTO = stcb->asoc.minrto;
382			}
383			if (net->RTO > stcb->asoc.maxrto) {
384				net->RTO = stcb->asoc.maxrto;
385			}
386		}
387	}
388}
389
390void
391sctp_cwnd_update_after_timeout(struct sctp_tcb *stcb, struct sctp_nets *net)
392{
393	int old_cwnd = net->cwnd;
394
395	net->ssthresh = max(net->cwnd / 2, 4 * net->mtu);
396	net->cwnd = net->mtu;
397	net->partial_bytes_acked = 0;
398	SDT_PROBE(sctp, cwnd, net, to,
399	    stcb->asoc.my_vtag,
400	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
401	    net,
402	    old_cwnd, net->cwnd);
403	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
404		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
405	}
406}
407
408void
409sctp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net)
410{
411	int old_cwnd = net->cwnd;
412
413	SCTP_STAT_INCR(sctps_ecnereducedcwnd);
414	net->ssthresh = net->cwnd / 2;
415	if (net->ssthresh < net->mtu) {
416		net->ssthresh = net->mtu;
417		/* here back off the timer as well, to slow us down */
418		net->RTO <<= 1;
419	}
420	net->cwnd = net->ssthresh;
421	SDT_PROBE(sctp, cwnd, net, ecn,
422	    stcb->asoc.my_vtag,
423	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
424	    net,
425	    old_cwnd, net->cwnd);
426	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
427		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
428	}
429}
430
431void
432sctp_cwnd_update_after_packet_dropped(struct sctp_tcb *stcb,
433    struct sctp_nets *net, struct sctp_pktdrop_chunk *cp,
434    uint32_t * bottle_bw, uint32_t * on_queue)
435{
436	uint32_t bw_avail;
437	int rtt, incr;
438	int old_cwnd = net->cwnd;
439
440	/* need real RTT for this calc */
441	rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
442	/* get bottle neck bw */
443	*bottle_bw = ntohl(cp->bottle_bw);
444	/* and whats on queue */
445	*on_queue = ntohl(cp->current_onq);
446	/*
447	 * adjust the on-queue if our flight is more it could be that the
448	 * router has not yet gotten data "in-flight" to it
449	 */
450	if (*on_queue < net->flight_size)
451		*on_queue = net->flight_size;
452	/* calculate the available space */
453	bw_avail = (*bottle_bw * rtt) / 1000;
454	if (bw_avail > *bottle_bw) {
455		/*
456		 * Cap the growth to no more than the bottle neck. This can
457		 * happen as RTT slides up due to queues. It also means if
458		 * you have more than a 1 second RTT with a empty queue you
459		 * will be limited to the bottle_bw per second no matter if
460		 * other points have 1/2 the RTT and you could get more
461		 * out...
462		 */
463		bw_avail = *bottle_bw;
464	}
465	if (*on_queue > bw_avail) {
466		/*
467		 * No room for anything else don't allow anything else to be
468		 * "added to the fire".
469		 */
470		int seg_inflight, seg_onqueue, my_portion;
471
472		net->partial_bytes_acked = 0;
473
474		/* how much are we over queue size? */
475		incr = *on_queue - bw_avail;
476		if (stcb->asoc.seen_a_sack_this_pkt) {
477			/*
478			 * undo any cwnd adjustment that the sack might have
479			 * made
480			 */
481			net->cwnd = net->prev_cwnd;
482		}
483		/* Now how much of that is mine? */
484		seg_inflight = net->flight_size / net->mtu;
485		seg_onqueue = *on_queue / net->mtu;
486		my_portion = (incr * seg_inflight) / seg_onqueue;
487
488		/* Have I made an adjustment already */
489		if (net->cwnd > net->flight_size) {
490			/*
491			 * for this flight I made an adjustment we need to
492			 * decrease the portion by a share our previous
493			 * adjustment.
494			 */
495			int diff_adj;
496
497			diff_adj = net->cwnd - net->flight_size;
498			if (diff_adj > my_portion)
499				my_portion = 0;
500			else
501				my_portion -= diff_adj;
502		}
503		/*
504		 * back down to the previous cwnd (assume we have had a sack
505		 * before this packet). minus what ever portion of the
506		 * overage is my fault.
507		 */
508		net->cwnd -= my_portion;
509
510		/* we will NOT back down more than 1 MTU */
511		if (net->cwnd <= net->mtu) {
512			net->cwnd = net->mtu;
513		}
514		/* force into CA */
515		net->ssthresh = net->cwnd - 1;
516	} else {
517		/*
518		 * Take 1/4 of the space left or max burst up .. whichever
519		 * is less.
520		 */
521		incr = min((bw_avail - *on_queue) >> 2,
522		    stcb->asoc.max_burst * net->mtu);
523		net->cwnd += incr;
524	}
525	if (net->cwnd > bw_avail) {
526		/* We can't exceed the pipe size */
527		net->cwnd = bw_avail;
528	}
529	if (net->cwnd < net->mtu) {
530		/* We always have 1 MTU */
531		net->cwnd = net->mtu;
532	}
533	if (net->cwnd - old_cwnd != 0) {
534		/* log only changes */
535		SDT_PROBE(sctp, cwnd, net, pd,
536		    stcb->asoc.my_vtag,
537		    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
538		    net,
539		    old_cwnd, net->cwnd);
540		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
541			sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
542			    SCTP_CWND_LOG_FROM_SAT);
543		}
544	}
545}
546
547void
548sctp_cwnd_update_after_output(struct sctp_tcb *stcb,
549    struct sctp_nets *net, int burst_limit)
550{
551	int old_cwnd = net->cwnd;
552
553	if (net->ssthresh < net->cwnd)
554		net->ssthresh = net->cwnd;
555	net->cwnd = (net->flight_size + (burst_limit * net->mtu));
556	SDT_PROBE(sctp, cwnd, net, bl,
557	    stcb->asoc.my_vtag,
558	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
559	    net,
560	    old_cwnd, net->cwnd);
561	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
562		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_BRST);
563	}
564}
565
566void
567sctp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
568    struct sctp_tcb *stcb, struct sctp_nets *net)
569{
570	int old_cwnd = net->cwnd;
571
572	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
573	/*
574	 * make a small adjustment to cwnd and force to CA.
575	 */
576	if (net->cwnd > net->mtu)
577		/* drop down one MTU after sending */
578		net->cwnd -= net->mtu;
579	if (net->cwnd < net->ssthresh)
580		/* still in SS move to CA */
581		net->ssthresh = net->cwnd - 1;
582	SDT_PROBE(sctp, cwnd, net, fr,
583	    stcb->asoc.my_vtag,
584	    ((stcb->sctp_ep->sctp_lport << 16) | (stcb->rport)),
585	    net,
586	    old_cwnd, net->cwnd);
587	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
588		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
589	}
590}
591
592struct sctp_hs_raise_drop {
593	int32_t cwnd;
594	int32_t increase;
595	int32_t drop_percent;
596};
597
598#define SCTP_HS_TABLE_SIZE 73
599
600struct sctp_hs_raise_drop sctp_cwnd_adjust[SCTP_HS_TABLE_SIZE] = {
601	{38, 1, 50},		/* 0   */
602	{118, 2, 44},		/* 1   */
603	{221, 3, 41},		/* 2   */
604	{347, 4, 38},		/* 3   */
605	{495, 5, 37},		/* 4   */
606	{663, 6, 35},		/* 5   */
607	{851, 7, 34},		/* 6   */
608	{1058, 8, 33},		/* 7   */
609	{1284, 9, 32},		/* 8   */
610	{1529, 10, 31},		/* 9   */
611	{1793, 11, 30},		/* 10  */
612	{2076, 12, 29},		/* 11  */
613	{2378, 13, 28},		/* 12  */
614	{2699, 14, 28},		/* 13  */
615	{3039, 15, 27},		/* 14  */
616	{3399, 16, 27},		/* 15  */
617	{3778, 17, 26},		/* 16  */
618	{4177, 18, 26},		/* 17  */
619	{4596, 19, 25},		/* 18  */
620	{5036, 20, 25},		/* 19  */
621	{5497, 21, 24},		/* 20  */
622	{5979, 22, 24},		/* 21  */
623	{6483, 23, 23},		/* 22  */
624	{7009, 24, 23},		/* 23  */
625	{7558, 25, 22},		/* 24  */
626	{8130, 26, 22},		/* 25  */
627	{8726, 27, 22},		/* 26  */
628	{9346, 28, 21},		/* 27  */
629	{9991, 29, 21},		/* 28  */
630	{10661, 30, 21},	/* 29  */
631	{11358, 31, 20},	/* 30  */
632	{12082, 32, 20},	/* 31  */
633	{12834, 33, 20},	/* 32  */
634	{13614, 34, 19},	/* 33  */
635	{14424, 35, 19},	/* 34  */
636	{15265, 36, 19},	/* 35  */
637	{16137, 37, 19},	/* 36  */
638	{17042, 38, 18},	/* 37  */
639	{17981, 39, 18},	/* 38  */
640	{18955, 40, 18},	/* 39  */
641	{19965, 41, 17},	/* 40  */
642	{21013, 42, 17},	/* 41  */
643	{22101, 43, 17},	/* 42  */
644	{23230, 44, 17},	/* 43  */
645	{24402, 45, 16},	/* 44  */
646	{25618, 46, 16},	/* 45  */
647	{26881, 47, 16},	/* 46  */
648	{28193, 48, 16},	/* 47  */
649	{29557, 49, 15},	/* 48  */
650	{30975, 50, 15},	/* 49  */
651	{32450, 51, 15},	/* 50  */
652	{33986, 52, 15},	/* 51  */
653	{35586, 53, 14},	/* 52  */
654	{37253, 54, 14},	/* 53  */
655	{38992, 55, 14},	/* 54  */
656	{40808, 56, 14},	/* 55  */
657	{42707, 57, 13},	/* 56  */
658	{44694, 58, 13},	/* 57  */
659	{46776, 59, 13},	/* 58  */
660	{48961, 60, 13},	/* 59  */
661	{51258, 61, 13},	/* 60  */
662	{53677, 62, 12},	/* 61  */
663	{56230, 63, 12},	/* 62  */
664	{58932, 64, 12},	/* 63  */
665	{61799, 65, 12},	/* 64  */
666	{64851, 66, 11},	/* 65  */
667	{68113, 67, 11},	/* 66  */
668	{71617, 68, 11},	/* 67  */
669	{75401, 69, 10},	/* 68  */
670	{79517, 70, 10},	/* 69  */
671	{84035, 71, 10},	/* 70  */
672	{89053, 72, 10},	/* 71  */
673	{94717, 73, 9}		/* 72  */
674};
675
676static void
677sctp_hs_cwnd_increase(struct sctp_tcb *stcb, struct sctp_nets *net)
678{
679	int cur_val, i, indx, incr;
680
681	cur_val = net->cwnd >> 10;
682	indx = SCTP_HS_TABLE_SIZE - 1;
683#ifdef SCTP_DEBUG
684	printf("HS CC CAlled.\n");
685#endif
686	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
687		/* normal mode */
688		if (net->net_ack > net->mtu) {
689			net->cwnd += net->mtu;
690			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
691				sctp_log_cwnd(stcb, net, net->mtu, SCTP_CWND_LOG_FROM_SS);
692			}
693		} else {
694			net->cwnd += net->net_ack;
695			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
696				sctp_log_cwnd(stcb, net, net->net_ack, SCTP_CWND_LOG_FROM_SS);
697			}
698		}
699	} else {
700		for (i = net->last_hs_used; i < SCTP_HS_TABLE_SIZE; i++) {
701			if (cur_val < sctp_cwnd_adjust[i].cwnd) {
702				indx = i;
703				break;
704			}
705		}
706		net->last_hs_used = indx;
707		incr = ((sctp_cwnd_adjust[indx].increase) << 10);
708		net->cwnd += incr;
709		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
710			sctp_log_cwnd(stcb, net, incr, SCTP_CWND_LOG_FROM_SS);
711		}
712	}
713}
714
715static void
716sctp_hs_cwnd_decrease(struct sctp_tcb *stcb, struct sctp_nets *net)
717{
718	int cur_val, i, indx;
719	int old_cwnd = net->cwnd;
720
721	cur_val = net->cwnd >> 10;
722	if (cur_val < sctp_cwnd_adjust[0].cwnd) {
723		/* normal mode */
724		net->ssthresh = net->cwnd / 2;
725		if (net->ssthresh < (net->mtu * 2)) {
726			net->ssthresh = 2 * net->mtu;
727		}
728		net->cwnd = net->ssthresh;
729	} else {
730		/* drop by the proper amount */
731		net->ssthresh = net->cwnd - (int)((net->cwnd / 100) *
732		    sctp_cwnd_adjust[net->last_hs_used].drop_percent);
733		net->cwnd = net->ssthresh;
734		/* now where are we */
735		indx = net->last_hs_used;
736		cur_val = net->cwnd >> 10;
737		/* reset where we are in the table */
738		if (cur_val < sctp_cwnd_adjust[0].cwnd) {
739			/* feel out of hs */
740			net->last_hs_used = 0;
741		} else {
742			for (i = indx; i >= 1; i--) {
743				if (cur_val > sctp_cwnd_adjust[i - 1].cwnd) {
744					break;
745				}
746			}
747			net->last_hs_used = indx;
748		}
749	}
750	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
751		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_FR);
752	}
753}
754
755void
756sctp_hs_cwnd_update_after_fr(struct sctp_tcb *stcb,
757    struct sctp_association *asoc)
758{
759	struct sctp_nets *net;
760
761	/*
762	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
763	 * (net->fast_retran_loss_recovery == 0)))
764	 */
765	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
766		if ((asoc->fast_retran_loss_recovery == 0) ||
767		    (asoc->sctp_cmt_on_off == 1)) {
768			/* out of a RFC2582 Fast recovery window? */
769			if (net->net_ack > 0) {
770				/*
771				 * per section 7.2.3, are there any
772				 * destinations that had a fast retransmit
773				 * to them. If so what we need to do is
774				 * adjust ssthresh and cwnd.
775				 */
776				struct sctp_tmit_chunk *lchk;
777
778				sctp_hs_cwnd_decrease(stcb, net);
779
780				lchk = TAILQ_FIRST(&asoc->send_queue);
781
782				net->partial_bytes_acked = 0;
783				/* Turn on fast recovery window */
784				asoc->fast_retran_loss_recovery = 1;
785				if (lchk == NULL) {
786					/* Mark end of the window */
787					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
788				} else {
789					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
790				}
791
792				/*
793				 * CMT fast recovery -- per destination
794				 * recovery variable.
795				 */
796				net->fast_retran_loss_recovery = 1;
797
798				if (lchk == NULL) {
799					/* Mark end of the window */
800					net->fast_recovery_tsn = asoc->sending_seq - 1;
801				} else {
802					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
803				}
804
805				/*
806				 * Disable Nonce Sum Checking and store the
807				 * resync tsn
808				 */
809				asoc->nonce_sum_check = 0;
810				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
811
812				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
813				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
814				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
815				    stcb->sctp_ep, stcb, net);
816			}
817		} else if (net->net_ack > 0) {
818			/*
819			 * Mark a peg that we WOULD have done a cwnd
820			 * reduction but RFC2582 prevented this action.
821			 */
822			SCTP_STAT_INCR(sctps_fastretransinrtt);
823		}
824	}
825}
826
827void
828sctp_hs_cwnd_update_after_sack(struct sctp_tcb *stcb,
829    struct sctp_association *asoc,
830    int accum_moved, int reneged_all, int will_exit)
831{
832	struct sctp_nets *net;
833
834	/******************************/
835	/* update cwnd and Early FR   */
836	/******************************/
837	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
838
839#ifdef JANA_CMT_FAST_RECOVERY
840		/*
841		 * CMT fast recovery code. Need to debug.
842		 */
843		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
844			if (compare_with_wrap(asoc->last_acked_seq,
845			    net->fast_recovery_tsn, MAX_TSN) ||
846			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
847			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
848			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
849				net->will_exit_fast_recovery = 1;
850			}
851		}
852#endif
853		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
854			/*
855			 * So, first of all do we need to have a Early FR
856			 * timer running?
857			 */
858			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
859			    (net->ref_count > 1) &&
860			    (net->flight_size < net->cwnd)) ||
861			    (reneged_all)) {
862				/*
863				 * yes, so in this case stop it if its
864				 * running, and then restart it. Reneging
865				 * all is a special case where we want to
866				 * run the Early FR timer and then force the
867				 * last few unacked to be sent, causing us
868				 * to illicit a sack with gaps to force out
869				 * the others.
870				 */
871				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
872					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
873					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
874					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
875				}
876				SCTP_STAT_INCR(sctps_earlyfrstrid);
877				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
878			} else {
879				/* No, stop it if its running */
880				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
881					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
882					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
883					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
884				}
885			}
886		}
887		/* if nothing was acked on this destination skip it */
888		if (net->net_ack == 0) {
889			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
890				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
891			}
892			continue;
893		}
894		if (net->net_ack2 > 0) {
895			/*
896			 * Karn's rule applies to clearing error count, this
897			 * is optional.
898			 */
899			net->error_count = 0;
900			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
901			    SCTP_ADDR_NOT_REACHABLE) {
902				/* addr came good */
903				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
904				net->dest_state |= SCTP_ADDR_REACHABLE;
905				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
906				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
907				/* now was it the primary? if so restore */
908				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
909					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
910				}
911			}
912			/*
913			 * JRS 5/14/07 - If CMT PF is on and the destination
914			 * is in PF state, set the destination to active
915			 * state and set the cwnd to one or two MTU's based
916			 * on whether PF1 or PF2 is being used.
917			 *
918			 * Should we stop any running T3 timer here?
919			 */
920			if ((asoc->sctp_cmt_on_off == 1) &&
921			    (asoc->sctp_cmt_pf > 0) &&
922			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
923				net->dest_state &= ~SCTP_ADDR_PF;
924				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
925				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
926				    net, net->cwnd);
927				/*
928				 * Since the cwnd value is explicitly set,
929				 * skip the code that updates the cwnd
930				 * value.
931				 */
932				goto skip_cwnd_update;
933			}
934		}
935#ifdef JANA_CMT_FAST_RECOVERY
936		/*
937		 * CMT fast recovery code
938		 */
939		/*
940		 * if (sctp_cmt_on_off == 1 &&
941		 * net->fast_retran_loss_recovery &&
942		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
943		 * else if (sctp_cmt_on_off == 0 &&
944		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
945		 */
946#endif
947
948		if (asoc->fast_retran_loss_recovery &&
949		    (will_exit == 0) &&
950		    (asoc->sctp_cmt_on_off == 0)) {
951			/*
952			 * If we are in loss recovery we skip any cwnd
953			 * update
954			 */
955			goto skip_cwnd_update;
956		}
957		/*
958		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
959		 * moved.
960		 */
961		if (accum_moved ||
962		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
963			/* If the cumulative ack moved we can proceed */
964			if (net->cwnd <= net->ssthresh) {
965				/* We are in slow start */
966				if (net->flight_size + net->net_ack >= net->cwnd) {
967
968					sctp_hs_cwnd_increase(stcb, net);
969
970				} else {
971					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
972						sctp_log_cwnd(stcb, net, net->net_ack,
973						    SCTP_CWND_LOG_NOADV_SS);
974					}
975				}
976			} else {
977				/* We are in congestion avoidance */
978				net->partial_bytes_acked += net->net_ack;
979				if ((net->flight_size + net->net_ack >= net->cwnd) &&
980				    (net->partial_bytes_acked >= net->cwnd)) {
981					net->partial_bytes_acked -= net->cwnd;
982					net->cwnd += net->mtu;
983					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
984						sctp_log_cwnd(stcb, net, net->mtu,
985						    SCTP_CWND_LOG_FROM_CA);
986					}
987				} else {
988					if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
989						sctp_log_cwnd(stcb, net, net->net_ack,
990						    SCTP_CWND_LOG_NOADV_CA);
991					}
992				}
993			}
994		} else {
995			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
996				sctp_log_cwnd(stcb, net, net->mtu,
997				    SCTP_CWND_LOG_NO_CUMACK);
998			}
999		}
1000skip_cwnd_update:
1001		/*
1002		 * NOW, according to Karn's rule do we need to restore the
1003		 * RTO timer back? Check our net_ack2. If not set then we
1004		 * have a ambiguity.. i.e. all data ack'd was sent to more
1005		 * than one place.
1006		 */
1007		if (net->net_ack2) {
1008			/* restore any doubled timers */
1009			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
1010			if (net->RTO < stcb->asoc.minrto) {
1011				net->RTO = stcb->asoc.minrto;
1012			}
1013			if (net->RTO > stcb->asoc.maxrto) {
1014				net->RTO = stcb->asoc.maxrto;
1015			}
1016		}
1017	}
1018}
1019
1020
1021/*
1022 * H-TCP congestion control. The algorithm is detailed in:
1023 * R.N.Shorten, D.J.Leith:
1024 *   "H-TCP: TCP for high-speed and long-distance networks"
1025 *   Proc. PFLDnet, Argonne, 2004.
1026 * http://www.hamilton.ie/net/htcp3.pdf
1027 */
1028
1029
1030static int use_rtt_scaling = 1;
1031static int use_bandwidth_switch = 1;
1032
1033static inline int
1034between(uint32_t seq1, uint32_t seq2, uint32_t seq3)
1035{
1036	return seq3 - seq2 >= seq1 - seq2;
1037}
1038
1039static inline uint32_t
1040htcp_cong_time(struct htcp *ca)
1041{
1042	return sctp_get_tick_count() - ca->last_cong;
1043}
1044
1045static inline uint32_t
1046htcp_ccount(struct htcp *ca)
1047{
1048	return htcp_cong_time(ca) / ca->minRTT;
1049}
1050
1051static inline void
1052htcp_reset(struct htcp *ca)
1053{
1054	ca->undo_last_cong = ca->last_cong;
1055	ca->undo_maxRTT = ca->maxRTT;
1056	ca->undo_old_maxB = ca->old_maxB;
1057	ca->last_cong = sctp_get_tick_count();
1058}
1059
1060#ifdef SCTP_NOT_USED
1061
1062static uint32_t
1063htcp_cwnd_undo(struct sctp_tcb *stcb, struct sctp_nets *net)
1064{
1065	net->htcp_ca.last_cong = net->htcp_ca.undo_last_cong;
1066	net->htcp_ca.maxRTT = net->htcp_ca.undo_maxRTT;
1067	net->htcp_ca.old_maxB = net->htcp_ca.undo_old_maxB;
1068	return max(net->cwnd, ((net->ssthresh / net->mtu << 7) / net->htcp_ca.beta) * net->mtu);
1069}
1070
1071#endif
1072
1073static inline void
1074measure_rtt(struct sctp_tcb *stcb, struct sctp_nets *net)
1075{
1076	uint32_t srtt = net->lastsa >> 3;
1077
1078	/* keep track of minimum RTT seen so far, minRTT is zero at first */
1079	if (net->htcp_ca.minRTT > srtt || !net->htcp_ca.minRTT)
1080		net->htcp_ca.minRTT = srtt;
1081
1082	/* max RTT */
1083	if (net->fast_retran_ip == 0 && net->ssthresh < 0xFFFF && htcp_ccount(&net->htcp_ca) > 3) {
1084		if (net->htcp_ca.maxRTT < net->htcp_ca.minRTT)
1085			net->htcp_ca.maxRTT = net->htcp_ca.minRTT;
1086		if (net->htcp_ca.maxRTT < srtt && srtt <= net->htcp_ca.maxRTT + MSEC_TO_TICKS(20))
1087			net->htcp_ca.maxRTT = srtt;
1088	}
1089}
1090
1091static void
1092measure_achieved_throughput(struct sctp_tcb *stcb, struct sctp_nets *net)
1093{
1094	uint32_t now = sctp_get_tick_count();
1095
1096	if (net->fast_retran_ip == 0)
1097		net->htcp_ca.bytes_acked = net->net_ack;
1098
1099	if (!use_bandwidth_switch)
1100		return;
1101
1102	/* achieved throughput calculations */
1103	/* JRS - not 100% sure of this statement */
1104	if (net->fast_retran_ip == 1) {
1105		net->htcp_ca.bytecount = 0;
1106		net->htcp_ca.lasttime = now;
1107		return;
1108	}
1109	net->htcp_ca.bytecount += net->net_ack;
1110
1111	if (net->htcp_ca.bytecount >= net->cwnd - ((net->htcp_ca.alpha >> 7 ? : 1) * net->mtu)
1112	    && now - net->htcp_ca.lasttime >= net->htcp_ca.minRTT
1113	    && net->htcp_ca.minRTT > 0) {
1114		uint32_t cur_Bi = net->htcp_ca.bytecount / net->mtu * hz / (now - net->htcp_ca.lasttime);
1115
1116		if (htcp_ccount(&net->htcp_ca) <= 3) {
1117			/* just after backoff */
1118			net->htcp_ca.minB = net->htcp_ca.maxB = net->htcp_ca.Bi = cur_Bi;
1119		} else {
1120			net->htcp_ca.Bi = (3 * net->htcp_ca.Bi + cur_Bi) / 4;
1121			if (net->htcp_ca.Bi > net->htcp_ca.maxB)
1122				net->htcp_ca.maxB = net->htcp_ca.Bi;
1123			if (net->htcp_ca.minB > net->htcp_ca.maxB)
1124				net->htcp_ca.minB = net->htcp_ca.maxB;
1125		}
1126		net->htcp_ca.bytecount = 0;
1127		net->htcp_ca.lasttime = now;
1128	}
1129}
1130
1131static inline void
1132htcp_beta_update(struct htcp *ca, uint32_t minRTT, uint32_t maxRTT)
1133{
1134	if (use_bandwidth_switch) {
1135		uint32_t maxB = ca->maxB;
1136		uint32_t old_maxB = ca->old_maxB;
1137
1138		ca->old_maxB = ca->maxB;
1139
1140		if (!between(5 * maxB, 4 * old_maxB, 6 * old_maxB)) {
1141			ca->beta = BETA_MIN;
1142			ca->modeswitch = 0;
1143			return;
1144		}
1145	}
1146	if (ca->modeswitch && minRTT > (uint32_t) MSEC_TO_TICKS(10) && maxRTT) {
1147		ca->beta = (minRTT << 7) / maxRTT;
1148		if (ca->beta < BETA_MIN)
1149			ca->beta = BETA_MIN;
1150		else if (ca->beta > BETA_MAX)
1151			ca->beta = BETA_MAX;
1152	} else {
1153		ca->beta = BETA_MIN;
1154		ca->modeswitch = 1;
1155	}
1156}
1157
1158static inline void
1159htcp_alpha_update(struct htcp *ca)
1160{
1161	uint32_t minRTT = ca->minRTT;
1162	uint32_t factor = 1;
1163	uint32_t diff = htcp_cong_time(ca);
1164
1165	if (diff > (uint32_t) hz) {
1166		diff -= hz;
1167		factor = 1 + (10 * diff + ((diff / 2) * (diff / 2) / hz)) / hz;
1168	}
1169	if (use_rtt_scaling && minRTT) {
1170		uint32_t scale = (hz << 3) / (10 * minRTT);
1171
1172		scale = min(max(scale, 1U << 2), 10U << 3);	/* clamping ratio to
1173								 * interval [0.5,10]<<3 */
1174		factor = (factor << 3) / scale;
1175		if (!factor)
1176			factor = 1;
1177	}
1178	ca->alpha = 2 * factor * ((1 << 7) - ca->beta);
1179	if (!ca->alpha)
1180		ca->alpha = ALPHA_BASE;
1181}
1182
1183/* After we have the rtt data to calculate beta, we'd still prefer to wait one
1184 * rtt before we adjust our beta to ensure we are working from a consistent
1185 * data.
1186 *
1187 * This function should be called when we hit a congestion event since only at
1188 * that point do we really have a real sense of maxRTT (the queues en route
1189 * were getting just too full now).
1190 */
1191static void
1192htcp_param_update(struct sctp_tcb *stcb, struct sctp_nets *net)
1193{
1194	uint32_t minRTT = net->htcp_ca.minRTT;
1195	uint32_t maxRTT = net->htcp_ca.maxRTT;
1196
1197	htcp_beta_update(&net->htcp_ca, minRTT, maxRTT);
1198	htcp_alpha_update(&net->htcp_ca);
1199
1200	/*
1201	 * add slowly fading memory for maxRTT to accommodate routing
1202	 * changes etc
1203	 */
1204	if (minRTT > 0 && maxRTT > minRTT)
1205		net->htcp_ca.maxRTT = minRTT + ((maxRTT - minRTT) * 95) / 100;
1206}
1207
1208static uint32_t
1209htcp_recalc_ssthresh(struct sctp_tcb *stcb, struct sctp_nets *net)
1210{
1211	htcp_param_update(stcb, net);
1212	return max(((net->cwnd / net->mtu * net->htcp_ca.beta) >> 7) * net->mtu, 2U * net->mtu);
1213}
1214
1215static void
1216htcp_cong_avoid(struct sctp_tcb *stcb, struct sctp_nets *net)
1217{
1218	/*-
1219	 * How to handle these functions?
1220         *	if (!tcp_is_cwnd_limited(sk, in_flight)) RRS - good question.
1221	 *		return;
1222	 */
1223	if (net->cwnd <= net->ssthresh) {
1224		/* We are in slow start */
1225		if (net->flight_size + net->net_ack >= net->cwnd) {
1226			if (net->net_ack > (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable))) {
1227				net->cwnd += (net->mtu * SCTP_BASE_SYSCTL(sctp_L2_abc_variable));
1228				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1229					sctp_log_cwnd(stcb, net, net->mtu,
1230					    SCTP_CWND_LOG_FROM_SS);
1231				}
1232			} else {
1233				net->cwnd += net->net_ack;
1234				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1235					sctp_log_cwnd(stcb, net, net->net_ack,
1236					    SCTP_CWND_LOG_FROM_SS);
1237				}
1238			}
1239		} else {
1240			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1241				sctp_log_cwnd(stcb, net, net->net_ack,
1242				    SCTP_CWND_LOG_NOADV_SS);
1243			}
1244		}
1245	} else {
1246		measure_rtt(stcb, net);
1247
1248		/*
1249		 * In dangerous area, increase slowly. In theory this is
1250		 * net->cwnd += alpha / net->cwnd
1251		 */
1252		/* What is snd_cwnd_cnt?? */
1253		if (((net->partial_bytes_acked / net->mtu * net->htcp_ca.alpha) >> 7) * net->mtu >= net->cwnd) {
1254			/*-
1255			 * Does SCTP have a cwnd clamp?
1256			 * if (net->snd_cwnd < net->snd_cwnd_clamp) - Nope (RRS).
1257			 */
1258			net->cwnd += net->mtu;
1259			net->partial_bytes_acked = 0;
1260			htcp_alpha_update(&net->htcp_ca);
1261			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1262				sctp_log_cwnd(stcb, net, net->mtu,
1263				    SCTP_CWND_LOG_FROM_CA);
1264			}
1265		} else {
1266			net->partial_bytes_acked += net->net_ack;
1267			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1268				sctp_log_cwnd(stcb, net, net->net_ack,
1269				    SCTP_CWND_LOG_NOADV_CA);
1270			}
1271		}
1272
1273		net->htcp_ca.bytes_acked = net->mtu;
1274	}
1275}
1276
1277#ifdef SCTP_NOT_USED
1278/* Lower bound on congestion window. */
1279static uint32_t
1280htcp_min_cwnd(struct sctp_tcb *stcb, struct sctp_nets *net)
1281{
1282	return net->ssthresh;
1283}
1284
1285#endif
1286
1287static void
1288htcp_init(struct sctp_tcb *stcb, struct sctp_nets *net)
1289{
1290	memset(&net->htcp_ca, 0, sizeof(struct htcp));
1291	net->htcp_ca.alpha = ALPHA_BASE;
1292	net->htcp_ca.beta = BETA_MIN;
1293	net->htcp_ca.bytes_acked = net->mtu;
1294	net->htcp_ca.last_cong = sctp_get_tick_count();
1295}
1296
1297void
1298sctp_htcp_set_initial_cc_param(struct sctp_tcb *stcb, struct sctp_nets *net)
1299{
1300	/*
1301	 * We take the max of the burst limit times a MTU or the
1302	 * INITIAL_CWND. We then limit this to 4 MTU's of sending.
1303	 */
1304	net->cwnd = min((net->mtu * 4), max((2 * net->mtu), SCTP_INITIAL_CWND));
1305	net->ssthresh = stcb->asoc.peers_rwnd;
1306	htcp_init(stcb, net);
1307
1308	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_CWND_MONITOR_ENABLE | SCTP_CWND_LOGGING_ENABLE)) {
1309		sctp_log_cwnd(stcb, net, 0, SCTP_CWND_INITIALIZATION);
1310	}
1311}
1312
1313void
1314sctp_htcp_cwnd_update_after_sack(struct sctp_tcb *stcb,
1315    struct sctp_association *asoc,
1316    int accum_moved, int reneged_all, int will_exit)
1317{
1318	struct sctp_nets *net;
1319
1320	/******************************/
1321	/* update cwnd and Early FR   */
1322	/******************************/
1323	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
1324
1325#ifdef JANA_CMT_FAST_RECOVERY
1326		/*
1327		 * CMT fast recovery code. Need to debug.
1328		 */
1329		if (net->fast_retran_loss_recovery && net->new_pseudo_cumack) {
1330			if (compare_with_wrap(asoc->last_acked_seq,
1331			    net->fast_recovery_tsn, MAX_TSN) ||
1332			    (asoc->last_acked_seq == net->fast_recovery_tsn) ||
1333			    compare_with_wrap(net->pseudo_cumack, net->fast_recovery_tsn, MAX_TSN) ||
1334			    (net->pseudo_cumack == net->fast_recovery_tsn)) {
1335				net->will_exit_fast_recovery = 1;
1336			}
1337		}
1338#endif
1339		if (SCTP_BASE_SYSCTL(sctp_early_fr)) {
1340			/*
1341			 * So, first of all do we need to have a Early FR
1342			 * timer running?
1343			 */
1344			if ((!TAILQ_EMPTY(&asoc->sent_queue) &&
1345			    (net->ref_count > 1) &&
1346			    (net->flight_size < net->cwnd)) ||
1347			    (reneged_all)) {
1348				/*
1349				 * yes, so in this case stop it if its
1350				 * running, and then restart it. Reneging
1351				 * all is a special case where we want to
1352				 * run the Early FR timer and then force the
1353				 * last few unacked to be sent, causing us
1354				 * to illicit a sack with gaps to force out
1355				 * the others.
1356				 */
1357				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
1358					SCTP_STAT_INCR(sctps_earlyfrstpidsck2);
1359					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
1360					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_20);
1361				}
1362				SCTP_STAT_INCR(sctps_earlyfrstrid);
1363				sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
1364			} else {
1365				/* No, stop it if its running */
1366				if (SCTP_OS_TIMER_PENDING(&net->fr_timer.timer)) {
1367					SCTP_STAT_INCR(sctps_earlyfrstpidsck3);
1368					sctp_timer_stop(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net,
1369					    SCTP_FROM_SCTP_INDATA + SCTP_LOC_21);
1370				}
1371			}
1372		}
1373		/* if nothing was acked on this destination skip it */
1374		if (net->net_ack == 0) {
1375			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1376				sctp_log_cwnd(stcb, net, 0, SCTP_CWND_LOG_FROM_SACK);
1377			}
1378			continue;
1379		}
1380		if (net->net_ack2 > 0) {
1381			/*
1382			 * Karn's rule applies to clearing error count, this
1383			 * is optional.
1384			 */
1385			net->error_count = 0;
1386			if ((net->dest_state & SCTP_ADDR_NOT_REACHABLE) ==
1387			    SCTP_ADDR_NOT_REACHABLE) {
1388				/* addr came good */
1389				net->dest_state &= ~SCTP_ADDR_NOT_REACHABLE;
1390				net->dest_state |= SCTP_ADDR_REACHABLE;
1391				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_UP, stcb,
1392				    SCTP_RECEIVED_SACK, (void *)net, SCTP_SO_NOT_LOCKED);
1393				/* now was it the primary? if so restore */
1394				if (net->dest_state & SCTP_ADDR_WAS_PRIMARY) {
1395					(void)sctp_set_primary_addr(stcb, (struct sockaddr *)NULL, net);
1396				}
1397			}
1398			/*
1399			 * JRS 5/14/07 - If CMT PF is on and the destination
1400			 * is in PF state, set the destination to active
1401			 * state and set the cwnd to one or two MTU's based
1402			 * on whether PF1 or PF2 is being used.
1403			 *
1404			 * Should we stop any running T3 timer here?
1405			 */
1406			if ((asoc->sctp_cmt_on_off == 1) &&
1407			    (asoc->sctp_cmt_pf > 0) &&
1408			    ((net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF)) {
1409				net->dest_state &= ~SCTP_ADDR_PF;
1410				net->cwnd = net->mtu * asoc->sctp_cmt_pf;
1411				SCTPDBG(SCTP_DEBUG_INDATA1, "Destination %p moved from PF to reachable with cwnd %d.\n",
1412				    net, net->cwnd);
1413				/*
1414				 * Since the cwnd value is explicitly set,
1415				 * skip the code that updates the cwnd
1416				 * value.
1417				 */
1418				goto skip_cwnd_update;
1419			}
1420		}
1421#ifdef JANA_CMT_FAST_RECOVERY
1422		/*
1423		 * CMT fast recovery code
1424		 */
1425		/*
1426		 * if (sctp_cmt_on_off == 1 &&
1427		 * net->fast_retran_loss_recovery &&
1428		 * net->will_exit_fast_recovery == 0) { @@@ Do something }
1429		 * else if (sctp_cmt_on_off == 0 &&
1430		 * asoc->fast_retran_loss_recovery && will_exit == 0) {
1431		 */
1432#endif
1433
1434		if (asoc->fast_retran_loss_recovery &&
1435		    will_exit == 0 &&
1436		    (asoc->sctp_cmt_on_off == 0)) {
1437			/*
1438			 * If we are in loss recovery we skip any cwnd
1439			 * update
1440			 */
1441			goto skip_cwnd_update;
1442		}
1443		/*
1444		 * CMT: CUC algorithm. Update cwnd if pseudo-cumack has
1445		 * moved.
1446		 */
1447		if (accum_moved ||
1448		    ((asoc->sctp_cmt_on_off == 1) && net->new_pseudo_cumack)) {
1449			htcp_cong_avoid(stcb, net);
1450			measure_achieved_throughput(stcb, net);
1451		} else {
1452			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
1453				sctp_log_cwnd(stcb, net, net->mtu,
1454				    SCTP_CWND_LOG_NO_CUMACK);
1455			}
1456		}
1457skip_cwnd_update:
1458		/*
1459		 * NOW, according to Karn's rule do we need to restore the
1460		 * RTO timer back? Check our net_ack2. If not set then we
1461		 * have a ambiguity.. i.e. all data ack'd was sent to more
1462		 * than one place.
1463		 */
1464		if (net->net_ack2) {
1465			/* restore any doubled timers */
1466			net->RTO = ((net->lastsa >> 2) + net->lastsv) >> 1;
1467			if (net->RTO < stcb->asoc.minrto) {
1468				net->RTO = stcb->asoc.minrto;
1469			}
1470			if (net->RTO > stcb->asoc.maxrto) {
1471				net->RTO = stcb->asoc.maxrto;
1472			}
1473		}
1474	}
1475}
1476
1477void
1478sctp_htcp_cwnd_update_after_fr(struct sctp_tcb *stcb,
1479    struct sctp_association *asoc)
1480{
1481	struct sctp_nets *net;
1482
1483	/*
1484	 * CMT fast recovery code. Need to debug. ((sctp_cmt_on_off == 1) &&
1485	 * (net->fast_retran_loss_recovery == 0)))
1486	 */
1487	TAILQ_FOREACH(net, &asoc->nets, sctp_next) {
1488		if ((asoc->fast_retran_loss_recovery == 0) ||
1489		    (asoc->sctp_cmt_on_off == 1)) {
1490			/* out of a RFC2582 Fast recovery window? */
1491			if (net->net_ack > 0) {
1492				/*
1493				 * per section 7.2.3, are there any
1494				 * destinations that had a fast retransmit
1495				 * to them. If so what we need to do is
1496				 * adjust ssthresh and cwnd.
1497				 */
1498				struct sctp_tmit_chunk *lchk;
1499				int old_cwnd = net->cwnd;
1500
1501				/* JRS - reset as if state were changed */
1502				htcp_reset(&net->htcp_ca);
1503				net->ssthresh = htcp_recalc_ssthresh(stcb, net);
1504				net->cwnd = net->ssthresh;
1505				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1506					sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd),
1507					    SCTP_CWND_LOG_FROM_FR);
1508				}
1509				lchk = TAILQ_FIRST(&asoc->send_queue);
1510
1511				net->partial_bytes_acked = 0;
1512				/* Turn on fast recovery window */
1513				asoc->fast_retran_loss_recovery = 1;
1514				if (lchk == NULL) {
1515					/* Mark end of the window */
1516					asoc->fast_recovery_tsn = asoc->sending_seq - 1;
1517				} else {
1518					asoc->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
1519				}
1520
1521				/*
1522				 * CMT fast recovery -- per destination
1523				 * recovery variable.
1524				 */
1525				net->fast_retran_loss_recovery = 1;
1526
1527				if (lchk == NULL) {
1528					/* Mark end of the window */
1529					net->fast_recovery_tsn = asoc->sending_seq - 1;
1530				} else {
1531					net->fast_recovery_tsn = lchk->rec.data.TSN_seq - 1;
1532				}
1533
1534				/*
1535				 * Disable Nonce Sum Checking and store the
1536				 * resync tsn
1537				 */
1538				asoc->nonce_sum_check = 0;
1539				asoc->nonce_resync_tsn = asoc->fast_recovery_tsn + 1;
1540
1541				sctp_timer_stop(SCTP_TIMER_TYPE_SEND,
1542				    stcb->sctp_ep, stcb, net, SCTP_FROM_SCTP_INDATA + SCTP_LOC_32);
1543				sctp_timer_start(SCTP_TIMER_TYPE_SEND,
1544				    stcb->sctp_ep, stcb, net);
1545			}
1546		} else if (net->net_ack > 0) {
1547			/*
1548			 * Mark a peg that we WOULD have done a cwnd
1549			 * reduction but RFC2582 prevented this action.
1550			 */
1551			SCTP_STAT_INCR(sctps_fastretransinrtt);
1552		}
1553	}
1554}
1555
1556void
1557sctp_htcp_cwnd_update_after_timeout(struct sctp_tcb *stcb,
1558    struct sctp_nets *net)
1559{
1560	int old_cwnd = net->cwnd;
1561
1562	/* JRS - reset as if the state were being changed to timeout */
1563	htcp_reset(&net->htcp_ca);
1564	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
1565	net->cwnd = net->mtu;
1566	net->partial_bytes_acked = 0;
1567	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1568		sctp_log_cwnd(stcb, net, net->cwnd - old_cwnd, SCTP_CWND_LOG_FROM_RTX);
1569	}
1570}
1571
1572void
1573sctp_htcp_cwnd_update_after_fr_timer(struct sctp_inpcb *inp,
1574    struct sctp_tcb *stcb, struct sctp_nets *net)
1575{
1576	int old_cwnd;
1577
1578	old_cwnd = net->cwnd;
1579
1580	sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
1581	net->htcp_ca.last_cong = sctp_get_tick_count();
1582	/*
1583	 * make a small adjustment to cwnd and force to CA.
1584	 */
1585	if (net->cwnd > net->mtu)
1586		/* drop down one MTU after sending */
1587		net->cwnd -= net->mtu;
1588	if (net->cwnd < net->ssthresh)
1589		/* still in SS move to CA */
1590		net->ssthresh = net->cwnd - 1;
1591	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1592		sctp_log_cwnd(stcb, net, (old_cwnd - net->cwnd), SCTP_CWND_LOG_FROM_FR);
1593	}
1594}
1595
1596void
1597sctp_htcp_cwnd_update_after_ecn_echo(struct sctp_tcb *stcb,
1598    struct sctp_nets *net)
1599{
1600	int old_cwnd;
1601
1602	old_cwnd = net->cwnd;
1603
1604	/* JRS - reset hctp as if state changed */
1605	htcp_reset(&net->htcp_ca);
1606	SCTP_STAT_INCR(sctps_ecnereducedcwnd);
1607	net->ssthresh = htcp_recalc_ssthresh(stcb, net);
1608	if (net->ssthresh < net->mtu) {
1609		net->ssthresh = net->mtu;
1610		/* here back off the timer as well, to slow us down */
1611		net->RTO <<= 1;
1612	}
1613	net->cwnd = net->ssthresh;
1614	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1615		sctp_log_cwnd(stcb, net, (net->cwnd - old_cwnd), SCTP_CWND_LOG_FROM_SAT);
1616	}
1617}
1618