sctp_timer.c revision 171440
11558Srgrimes/*-
21558Srgrimes * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
31558Srgrimes *
41558Srgrimes * Redistribution and use in source and binary forms, with or without
51558Srgrimes * modification, are permitted provided that the following conditions are met:
61558Srgrimes *
71558Srgrimes * a) Redistributions of source code must retain the above copyright notice,
81558Srgrimes *   this list of conditions and the following disclaimer.
91558Srgrimes *
101558Srgrimes * b) Redistributions in binary form must reproduce the above copyright
111558Srgrimes *    notice, this list of conditions and the following disclaimer in
121558Srgrimes *   the documentation and/or other materials provided with the distribution.
131558Srgrimes *
141558Srgrimes * c) Neither the name of Cisco Systems, Inc. nor the names of its
151558Srgrimes *    contributors may be used to endorse or promote products derived
161558Srgrimes *    from this software without specific prior written permission.
171558Srgrimes *
181558Srgrimes * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
191558Srgrimes * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
201558Srgrimes * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
211558Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
221558Srgrimes * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
231558Srgrimes * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
241558Srgrimes * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
251558Srgrimes * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
261558Srgrimes * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
271558Srgrimes * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
281558Srgrimes * THE POSSIBILITY OF SUCH DAMAGE.
2923672Speter */
3050476Speter
311558Srgrimes/* $KAME: sctp_timer.c,v 1.29 2005/03/06 16:04:18 itojun Exp $	 */
32306980Ssevan
331558Srgrimes#include <sys/cdefs.h>
3479530Sru__FBSDID("$FreeBSD: head/sys/netinet/sctp_timer.c 171440 2007-07-14 09:36:28Z rrs $");
351558Srgrimes
3655760Sphantom#define _IP_VHL
3755760Sphantom#include <netinet/sctp_os.h>
38102231Strhodes#include <netinet/sctp_pcb.h>
391558Srgrimes#ifdef INET6
4068960Sru#include <netinet6/sctp6_var.h>
41179267Smckusick#endif
4223672Speter#include <netinet/sctp_var.h>
4323672Speter#include <netinet/sctp_sysctl.h>
44111287Sru#include <netinet/sctp_timer.h>
4579442Sdillon#include <netinet/sctputil.h>
4623672Speter#include <netinet/sctp_output.h>
47128175Sgreen#include <netinet/sctp_header.h>
4823672Speter#include <netinet/sctp_indata.h>
4923672Speter#include <netinet/sctp_asconf.h>
5023672Speter#include <netinet/sctp_input.h>
51102446Strhodes#include <netinet/sctp.h>
5268960Sru#include <netinet/sctp_uio.h>
5394911Strhodes
541558Srgrimes
5599501Scharnier
5699501Scharniervoid
5799501Scharniersctp_early_fr_timer(struct sctp_inpcb *inp,
58102231Strhodes    struct sctp_tcb *stcb,
591558Srgrimes    struct sctp_nets *net)
6057668Ssheldonh{
6157668Ssheldonh	struct sctp_tmit_chunk *chk, *tp2;
621558Srgrimes	struct timeval now, min_wait, tv;
631558Srgrimes	unsigned int cur_rtt, cnt = 0, cnt_resend = 0;
6423672Speter
651558Srgrimes	/* an early FR is occuring. */
661558Srgrimes	(void)SCTP_GETTIME_TIMEVAL(&now);
671558Srgrimes	/* get cur rto in micro-seconds */
681558Srgrimes	if (net->lastsa == 0) {
69111287Sru		/* Hmm no rtt estimate yet? */
70111287Sru		cur_rtt = stcb->asoc.initial_rto >> 2;
7122192Sjoerg	} else {
7223892Speter
7322192Sjoerg		cur_rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
7422192Sjoerg	}
751558Srgrimes	if (cur_rtt < sctp_early_fr_msec) {
761558Srgrimes		cur_rtt = sctp_early_fr_msec;
771558Srgrimes	}
781558Srgrimes	cur_rtt *= 1000;
7994911Strhodes	tv.tv_sec = cur_rtt / 1000000;
8094911Strhodes	tv.tv_usec = cur_rtt % 1000000;
811558Srgrimes	min_wait = now;
821558Srgrimes	timevalsub(&min_wait, &tv);
831558Srgrimes	if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {
84102231Strhodes		/*
85102464Strhodes		 * if we hit here, we don't have enough seconds on the clock
8694911Strhodes		 * to account for the RTO. We just let the lower seconds be
8797474Sru		 * the bounds and don't worry about it. This may mean we
8897474Sru		 * will mark a lot more than we should.
8994911Strhodes		 */
90235837Sjoel		min_wait.tv_sec = min_wait.tv_usec = 0;
91235837Sjoel	}
92235837Sjoel	chk = TAILQ_LAST(&stcb->asoc.sent_queue, sctpchunk_listhead);
93235837Sjoel	for (; chk != NULL; chk = tp2) {
94235837Sjoel		tp2 = TAILQ_PREV(chk, sctpchunk_listhead, sctp_next);
95235837Sjoel		if (chk->whoTo != net) {
96235837Sjoel			continue;
97235837Sjoel		}
981558Srgrimes		if (chk->sent == SCTP_DATAGRAM_RESEND)
9968960Sru			cnt_resend++;
10023672Speter		else if ((chk->sent > SCTP_DATAGRAM_UNSENT) &&
101111287Sru		    (chk->sent < SCTP_DATAGRAM_RESEND)) {
1021558Srgrimes			/* pending, may need retran */
1031558Srgrimes			if (chk->sent_rcv_time.tv_sec > min_wait.tv_sec) {
104102231Strhodes				/*
1051558Srgrimes				 * we have reached a chunk that was sent
10623672Speter				 * some seconds past our min.. forget it we
1071558Srgrimes				 * will find no more to send.
1081558Srgrimes				 */
1091558Srgrimes				continue;
1101558Srgrimes			} else if (chk->sent_rcv_time.tv_sec == min_wait.tv_sec) {
1111558Srgrimes				/*
11230568Sjoerg				 * we must look at the micro seconds to
11332219Ssteve				 * know.
11423672Speter				 */
11522192Sjoerg				if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) {
11622192Sjoerg					/*
117111287Sru					 * ok it was sent after our boundary
118111287Sru					 * time.
119111287Sru					 */
12022192Sjoerg					continue;
12122192Sjoerg				}
12222192Sjoerg			}
12379762Sdd			if (sctp_logging_level & SCTP_EARLYFR_LOGGING_ENABLE) {
12494911Strhodes				sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count,
12594911Strhodes				    4, SCTP_FR_MARKED_EARLY);
12694911Strhodes			}
12779762Sdd			SCTP_STAT_INCR(sctps_earlyfrmrkretrans);
12879762Sdd			chk->sent = SCTP_DATAGRAM_RESEND;
12923672Speter			sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
130117533Sgrog			/* double book size since we are doing an early FR */
13194911Strhodes			chk->book_size_scale++;
132111287Sru			cnt += chk->send_size;
133111287Sru			if ((cnt + net->flight_size) > net->cwnd) {
134111287Sru				/* Mark all we could possibly resend */
135111287Sru				break;
136111287Sru			}
137111287Sru		}
138111287Sru	}
139111287Sru	if (cnt) {
140111287Sru		/*
141111287Sru		 * JRS - Use the congestion control given in the congestion
142111287Sru		 * control module
143111287Sru		 */
144111287Sru		stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer(inp, stcb, net);
14523672Speter	} else if (cnt_resend) {
14622192Sjoerg		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR);
14722192Sjoerg	}
14879442Sdillon	/* Restart it? */
14979520Sru	if (net->flight_size < net->cwnd) {
15079520Sru		SCTP_STAT_INCR(sctps_earlyfrstrtmr);
15179520Sru		sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
15279442Sdillon	}
15379442Sdillon}
15423672Speter
15523672Spetervoid
15623672Spetersctp_audit_retranmission_queue(struct sctp_association *asoc)
15723672Speter{
15823672Speter	struct sctp_tmit_chunk *chk;
1591558Srgrimes
1601558Srgrimes	SCTPDBG(SCTP_DEBUG_TIMER4, "Audit invoked on send queue cnt:%d onqueue:%d\n",
1611558Srgrimes	    asoc->sent_queue_retran_cnt,
1621558Srgrimes	    asoc->sent_queue_cnt);
1631558Srgrimes	asoc->sent_queue_retran_cnt = 0;
16474028Sobrien	asoc->sent_queue_cnt = 0;
1651558Srgrimes	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
16660132Skris		if (chk->sent == SCTP_DATAGRAM_RESEND) {
16712377Sjoerg			sctp_ucount_incr(asoc->sent_queue_retran_cnt);
1681558Srgrimes		}
1691558Srgrimes		asoc->sent_queue_cnt++;
170108221Sru	}
1711558Srgrimes	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
1721558Srgrimes		if (chk->sent == SCTP_DATAGRAM_RESEND) {
1731558Srgrimes			sctp_ucount_incr(asoc->sent_queue_retran_cnt);
1741558Srgrimes		}
1751558Srgrimes	}
1761558Srgrimes	SCTPDBG(SCTP_DEBUG_TIMER4, "Audit completes retran:%d onqueue:%d\n",
1771558Srgrimes	    asoc->sent_queue_retran_cnt,
1781558Srgrimes	    asoc->sent_queue_cnt);
1791558Srgrimes}
1801558Srgrimes
18136997Scharnierint
1821558Srgrimessctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1831558Srgrimes    struct sctp_nets *net, uint16_t threshold)
18412377Sjoerg{
18512377Sjoerg	if (net) {
18612377Sjoerg		net->error_count++;
18721021Sobrien		SCTPDBG(SCTP_DEBUG_TIMER4, "Error count for %p now %d thresh:%d\n",
18821021Sobrien		    net, net->error_count,
18912377Sjoerg		    net->failure_threshold);
19012377Sjoerg		if (net->error_count > net->failure_threshold) {
191128175Sgreen			/* We had a threshold failure */
192128175Sgreen			if (net->dest_state & SCTP_ADDR_REACHABLE) {
193128175Sgreen				net->dest_state &= ~SCTP_ADDR_REACHABLE;
194128175Sgreen				net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
195128175Sgreen				net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY;
196128175Sgreen				if (net == stcb->asoc.primary_destination) {
197128175Sgreen					net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
198128175Sgreen				}
199128175Sgreen				/*
200128175Sgreen				 * JRS 5/14/07 - If a destination is
201129327Sru				 * unreachable, the PF bit is turned off.
202128175Sgreen				 * This allows an unambiguous use of the PF
203128175Sgreen				 * bit for destinations that are reachable
204128175Sgreen				 * but potentially failed. If the
205128175Sgreen				 * destination is set to the unreachable
206128175Sgreen				 * state, also set the destination to the PF
207128175Sgreen				 * state.
208128175Sgreen				 */
209128175Sgreen				/*
210128175Sgreen				 * Add debug message here if destination is
211128175Sgreen				 * not in PF state.
212128175Sgreen				 */
21379762Sdd				/* Stop any running T3 timers here? */
21479762Sdd				if (sctp_cmt_pf) {
21579762Sdd					net->dest_state &= ~SCTP_ADDR_PF;
21679762Sdd					SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n",
21779762Sdd					    net);
21879762Sdd				}
21979762Sdd				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
22079762Sdd				    stcb,
22179762Sdd				    SCTP_FAILED_THRESHOLD,
22279762Sdd				    (void *)net);
223107559Smckusick			}
224107559Smckusick		}
225107559Smckusick		/*********HOLD THIS COMMENT FOR PATCH OF ALTERNATE
226107788Sru		 *********ROUTING CODE
227107559Smckusick		 */
228107559Smckusick		/*********HOLD THIS COMMENT FOR END OF PATCH OF ALTERNATE
229122874Smckusick		 *********ROUTING CODE
230122874Smckusick		 */
231129327Sru	}
232107559Smckusick	if (stcb == NULL)
233161362Sthomas		return (0);
234161362Sthomas
235129327Sru	if (net) {
236122874Smckusick		if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0) {
237122874Smckusick			stcb->asoc.overall_error_count++;
238129327Sru		}
239140602Swes	} else {
240129327Sru		stcb->asoc.overall_error_count++;
241140602Swes	}
242122874Smckusick	SCTPDBG(SCTP_DEBUG_TIMER4, "Overall error count for %p now %d thresh:%u state:%x\n",
243122874Smckusick	    &stcb->asoc, stcb->asoc.overall_error_count,
244129327Sru	    (uint32_t) threshold,
245129327Sru	    ((net == NULL) ? (uint32_t) 0 : (uint32_t) net->dest_state));
246129327Sru	/*
247129327Sru	 * We specifically do not do >= to give the assoc one more change
248129327Sru	 * before we fail it.
249129327Sru	 */
250129327Sru	if (stcb->asoc.overall_error_count > threshold) {
25123672Speter		/* Abort notification sends a ULP notify */
2521558Srgrimes		struct mbuf *oper;
25336997Scharnier
2541558Srgrimes		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
2551558Srgrimes		    0, M_DONTWAIT, 1, MT_DATA);
2561558Srgrimes		if (oper) {
2571558Srgrimes			struct sctp_paramhdr *ph;
2581558Srgrimes			uint32_t *ippp;
259179267Smckusick
260179267Smckusick			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
261179267Smckusick			    sizeof(uint32_t);
262179267Smckusick			ph = mtod(oper, struct sctp_paramhdr *);
263179267Smckusick			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
264179267Smckusick			ph->param_length = htons(SCTP_BUF_LEN(oper));
265179267Smckusick			ippp = (uint32_t *) (ph + 1);
266179267Smckusick			*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
267179267Smckusick		}
268179267Smckusick		inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_1;
269179267Smckusick		sctp_abort_an_association(inp, stcb, SCTP_FAILED_THRESHOLD, oper);
270179267Smckusick		return (1);
271179267Smckusick	}
272179267Smckusick	return (0);
273179267Smckusick}
274179267Smckusick
275179267Smckusickstruct sctp_nets *
276179267Smckusicksctp_find_alternate_net(struct sctp_tcb *stcb,
277179267Smckusick    struct sctp_nets *net,
278179267Smckusick    int mode)
279179267Smckusick{
280111287Sru	/* Find and return an alternate network if possible */
281111287Sru	struct sctp_nets *alt, *mnet, *min_errors_net = NULL, *max_cwnd_net = NULL;
282111287Sru	int once;
28323672Speter
2841558Srgrimes	/* JRS 5/14/07 - Initialize min_errors to an impossible value. */
2851558Srgrimes	int min_errors = -1;
2861558Srgrimes	uint32_t max_cwnd = 0;
28736997Scharnier
2881558Srgrimes	if (stcb->asoc.numnets == 1) {
2891558Srgrimes		/* No others but net */
2901558Srgrimes		return (TAILQ_FIRST(&stcb->asoc.nets));
29123672Speter	}
29223672Speter	/*
29323672Speter	 * JRS 5/14/07 - If mode is set to 2, use the CMT PF find alternate
294155981Skeramida	 * net algorithm. This algorithm chooses the active destination (not
29579520Sru	 * in PF state) with the largest cwnd value. If all destinations are
29679520Sru	 * in PF state, unreachable, or unconfirmed, choose the desination
29723672Speter	 * that is in PF state with the lowest error count. In case of a
29823672Speter	 * tie, choose the destination that was most recently active.
29923672Speter	 */
30023672Speter	if (mode == 2) {
30123672Speter		TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) {
30223672Speter			/*
30323672Speter			 * JRS 5/14/07 - If the destination is unreachable
30423672Speter			 * or unconfirmed, skip it.
30523672Speter			 */
30623672Speter			if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) ||
30779520Sru			    (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)) {
30879520Sru				continue;
30979520Sru			}
3101558Srgrimes			/*
3111558Srgrimes			 * JRS 5/14/07 -  If the destination is reachable
31279520Sru			 * but in PF state, compare the error count of the
31379520Sru			 * destination to the minimum error count seen thus
31479520Sru			 * far. Store the destination with the lower error
3151558Srgrimes			 * count.  If the error counts are equal, store the
3161558Srgrimes			 * destination that was most recently active.
317102231Strhodes			 */
3181558Srgrimes			if (mnet->dest_state & SCTP_ADDR_PF) {
3191558Srgrimes				/*
3201558Srgrimes				 * JRS 5/14/07 - If the destination under
32179754Sdd				 * consideration is the current destination,
322102231Strhodes				 * work as if the error count is one higher.
32379520Sru				 * The actual error count will not be
32479520Sru				 * incremented until later in the t3
32579520Sru				 * handler.
3261558Srgrimes				 */
3271558Srgrimes				if (mnet == net) {
32879520Sru					if (min_errors == -1) {
32979520Sru						min_errors = mnet->error_count + 1;
33079520Sru						min_errors_net = mnet;
33179442Sdillon					} else if (mnet->error_count + 1 < min_errors) {
33279442Sdillon						min_errors = mnet->error_count + 1;
33379442Sdillon						min_errors_net = mnet;
33479442Sdillon					} else if (mnet->error_count + 1 == min_errors
33523672Speter					    && mnet->last_active > min_errors_net->last_active) {
336102231Strhodes						min_errors_net = mnet;
3371558Srgrimes						min_errors = mnet->error_count + 1;
33879520Sru					}
3391558Srgrimes					continue;
3401558Srgrimes				} else {
3411558Srgrimes					if (min_errors == -1) {
34223672Speter						min_errors = mnet->error_count;
3431558Srgrimes						min_errors_net = mnet;
34436997Scharnier					} else if (mnet->error_count < min_errors) {
345102231Strhodes						min_errors = mnet->error_count;
34679520Sru						min_errors_net = mnet;
34779520Sru					} else if (mnet->error_count == min_errors
34879520Sru					    && mnet->last_active > min_errors_net->last_active) {
3491558Srgrimes						min_errors_net = mnet;
350102231Strhodes						min_errors = mnet->error_count;
3511558Srgrimes					}
35223672Speter					continue;
3531558Srgrimes				}
35436997Scharnier			}
3551558Srgrimes			/*
35623672Speter			 * JRS 5/14/07 - If the destination is reachable and
357111287Sru			 * not in PF state, compare the cwnd of the
358111287Sru			 * destination to the highest cwnd seen thus far.
359111287Sru			 * Store the destination with the higher cwnd value.
3601558Srgrimes			 * If the cwnd values are equal, randomly choose one
3611558Srgrimes			 * of the two destinations.
36290492Sdd			 */
36373375Sobrien			if (max_cwnd < mnet->cwnd) {
36473375Sobrien				max_cwnd_net = mnet;
36573375Sobrien				max_cwnd = mnet->cwnd;
36690492Sdd			} else if (max_cwnd == mnet->cwnd) {
36790492Sdd				uint32_t rndval;
36890492Sdd				uint8_t this_random;
36990492Sdd
37073375Sobrien				if (stcb->asoc.hb_random_idx > 3) {
37199501Scharnier					rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
37299501Scharnier					memcpy(stcb->asoc.hb_random_values, &rndval, sizeof(stcb->asoc.hb_random_values));
37399501Scharnier					this_random = stcb->asoc.hb_random_values[0];
3741558Srgrimes					stcb->asoc.hb_random_idx++;
3751558Srgrimes					stcb->asoc.hb_ect_randombit = 0;
3761558Srgrimes				} else {
3771558Srgrimes					this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
3781558Srgrimes					stcb->asoc.hb_random_idx++;
3791558Srgrimes					stcb->asoc.hb_ect_randombit = 0;
38023672Speter				}
3811558Srgrimes				if (this_random % 2 == 1) {
38236997Scharnier					max_cwnd_net = mnet;
3831558Srgrimes					max_cwnd = mnet->cwnd;
3841558Srgrimes					//Useless ?
3851558Srgrimes				}
38636997Scharnier			}
3871558Srgrimes		}
3881558Srgrimes		/*
3891558Srgrimes		 * JRS 5/14/07 - After all destination have been considered
39036997Scharnier		 * as alternates, check to see if there was some active
3911558Srgrimes		 * destination (not in PF state).  If not, check to see if
3921558Srgrimes		 * there was some PF destination with the minimum number of
3931558Srgrimes		 * errors.  If not, return the original destination.  If
3941558Srgrimes		 * there is a min_errors_net, remove the PF flag from that
3951558Srgrimes		 * destination, set the cwnd to one or two MTUs, and return
3961558Srgrimes		 * the destination as an alt. If there was some active
3971558Srgrimes		 * destination with a highest cwnd, return the destination
3981558Srgrimes		 * as an alt.
3991558Srgrimes		 */
40036997Scharnier		if (max_cwnd_net == NULL) {
4011558Srgrimes			if (min_errors_net == NULL) {
4021558Srgrimes				return (net);
40336997Scharnier			}
4041558Srgrimes			min_errors_net->dest_state &= ~SCTP_ADDR_PF;
4051558Srgrimes			min_errors_net->cwnd = min_errors_net->mtu * sctp_cmt_pf;
4061558Srgrimes			if (SCTP_OS_TIMER_PENDING(&min_errors_net->rxt_timer.timer)) {
4071558Srgrimes				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
4081558Srgrimes				    stcb, min_errors_net,
4091558Srgrimes				    SCTP_FROM_SCTP_TIMER + SCTP_LOC_2);
41099501Scharnier			}
41199501Scharnier			SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to active with %d errors.\n",
41299501Scharnier			    min_errors_net, min_errors_net->error_count);
41392334Sru			return (min_errors_net);
41492334Sru		} else {
4151558Srgrimes			return (max_cwnd_net);
4161558Srgrimes		}
4171558Srgrimes	}
4181558Srgrimes	/*
4191558Srgrimes	 * JRS 5/14/07 - If mode is set to 1, use the CMT policy for
4201558Srgrimes	 * choosing an alternate net.
42136997Scharnier	 */
4221558Srgrimes	else if (mode == 1) {
4231558Srgrimes		TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) {
4241558Srgrimes			if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) ||
4251558Srgrimes			    (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)
4261558Srgrimes			    ) {
4271558Srgrimes				/*
4281558Srgrimes				 * will skip ones that are not-reachable or
4291558Srgrimes				 * unconfirmed
4301558Srgrimes				 */
4311558Srgrimes				continue;
4321558Srgrimes			}
4331558Srgrimes			if (max_cwnd < mnet->cwnd) {
43474028Sobrien				max_cwnd_net = mnet;
4351558Srgrimes				max_cwnd = mnet->cwnd;
4361558Srgrimes			} else if (max_cwnd == mnet->cwnd) {
4371558Srgrimes				uint32_t rndval;
4381558Srgrimes				uint8_t this_random;
4391558Srgrimes
440122140Skensmith				if (stcb->asoc.hb_random_idx > 3) {
441122140Skensmith					rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
442122140Skensmith					memcpy(stcb->asoc.hb_random_values, &rndval,
4431558Srgrimes					    sizeof(stcb->asoc.hb_random_values));
4441558Srgrimes					this_random = stcb->asoc.hb_random_values[0];
4451558Srgrimes					stcb->asoc.hb_random_idx = 0;
4461558Srgrimes					stcb->asoc.hb_ect_randombit = 0;
4471558Srgrimes				} else {
4481558Srgrimes					this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
4491558Srgrimes					stcb->asoc.hb_random_idx++;
4501558Srgrimes					stcb->asoc.hb_ect_randombit = 0;
4511558Srgrimes				}
4521558Srgrimes				if (this_random % 2) {
453102231Strhodes					max_cwnd_net = mnet;
4541558Srgrimes					max_cwnd = mnet->cwnd;
4551558Srgrimes				}
4561558Srgrimes			}
4571558Srgrimes		}
4581558Srgrimes		if (max_cwnd_net) {
45912377Sjoerg			return (max_cwnd_net);
460109067Ssheldonh		}
461109067Ssheldonh	}
462148600Sceri	mnet = net;
463148600Sceri	once = 0;
464148600Sceri
465148600Sceri	if (mnet == NULL) {
466148600Sceri		mnet = TAILQ_FIRST(&stcb->asoc.nets);
467109067Ssheldonh	}
468109067Ssheldonh	do {
46912377Sjoerg		alt = TAILQ_NEXT(mnet, sctp_next);
47012377Sjoerg		if (alt == NULL) {
471124551Salex			once++;
472129327Sru			if (once > 1) {
473124551Salex				break;
474109067Ssheldonh			}
4751558Srgrimes			alt = TAILQ_FIRST(&stcb->asoc.nets);
4761558Srgrimes		}
47774028Sobrien		if (alt->ro.ro_rt == NULL) {
4781558Srgrimes			if (alt->ro._s_addr) {
4791558Srgrimes				sctp_free_ifa(alt->ro._s_addr);
4801558Srgrimes				alt->ro._s_addr = NULL;
48179442Sdillon
48279442Sdillon			}
48379442Sdillon			alt->src_addr_selected = 0;
48479442Sdillon		}
4851558Srgrimes		if (
486102231Strhodes		    ((alt->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE) &&
4871558Srgrimes		    (alt->ro.ro_rt != NULL) &&
4881558Srgrimes		/* sa_ignore NO_NULL_CHK */
4891558Srgrimes		    (!(alt->dest_state & SCTP_ADDR_UNCONFIRMED))
4901558Srgrimes		    ) {
491140415Sru			/* Found a reachable address */
492140415Sru			break;
493140415Sru		}
494140415Sru		mnet = alt;
495162395Sru	} while (alt != NULL);
496162395Sru
497162395Sru	if (alt == NULL) {
498162395Sru		/* Case where NO insv network exists (dormant state) */
499162395Sru		/* we rotate destinations */
500162395Sru		once = 0;
501162395Sru		mnet = net;
502162395Sru		do {
503162395Sru			alt = TAILQ_NEXT(mnet, sctp_next);
504162395Sru			if (alt == NULL) {
505162395Sru				once++;
506140415Sru				if (once > 1) {
507140415Sru					break;
5081558Srgrimes				}
50990492Sdd				alt = TAILQ_FIRST(&stcb->asoc.nets);
51021635Swosch			}
5111558Srgrimes			/* sa_ignore NO_NULL_CHK */
51221635Swosch			if ((!(alt->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
513140415Sru			    (alt != net)) {
514140415Sru				/* Found an alternate address */
515140415Sru				break;
516140415Sru			}
517306980Ssevan			mnet = alt;
5181558Srgrimes		} while (alt != NULL);
519122140Skensmith	}
520122140Skensmith	if (alt == NULL) {
521122140Skensmith		return (net);
522129327Sru	}
523122140Skensmith	return (alt);
524129327Sru}
525129327Sru
526122140Skensmith
527122140Skensmith
528129327Srustatic void
529129327Srusctp_backoff_on_timeout(struct sctp_tcb *stcb,
530129327Sru    struct sctp_nets *net,
531129327Sru    int win_probe,
532129327Sru    int num_marked)
533129327Sru{
5341558Srgrimes	if (net->RTO == 0) {
5351558Srgrimes		net->RTO = stcb->asoc.minrto;
5361558Srgrimes	}
5371558Srgrimes	net->RTO <<= 1;
5381558Srgrimes	if (net->RTO > stcb->asoc.maxrto) {
53999501Scharnier		net->RTO = stcb->asoc.maxrto;
54099501Scharnier	}
54199501Scharnier	if ((win_probe == 0) && num_marked) {
54223672Speter		/* We don't apply penalty to window probe scenarios */
5431558Srgrimes		/* JRS - Use the congestion control given in the CC module */
54423672Speter		stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout(stcb, net);
545102231Strhodes	}
54679520Sru}
54779520Sru
54879520Srustatic int
5491558Srgrimessctp_mark_all_for_resend(struct sctp_tcb *stcb,
5501558Srgrimes    struct sctp_nets *net,
5511558Srgrimes    struct sctp_nets *alt,
5521558Srgrimes    int window_probe,
55336997Scharnier    int *num_marked)
5541558Srgrimes{
5551558Srgrimes
5561558Srgrimes	/*
5571558Srgrimes	 * Mark all chunks (well not all) that were sent to *net for
5581558Srgrimes	 * retransmission. Move them to alt for there destination as well...
559116035Scharnier	 * We only mark chunks that have been outstanding long enough to
56022505Seivind	 * have received feed-back.
56199501Scharnier	 */
56299501Scharnier	struct sctp_tmit_chunk *chk, *tp2, *could_be_sent = NULL;
56399501Scharnier	struct sctp_nets *lnets;
564111287Sru	struct timeval now, min_wait, tv;
565111287Sru	int cur_rtt;
56656407Smpp	int audit_tf, num_mk, fir;
56735911Ssteve	unsigned int cnt_mk;
56822505Seivind	uint32_t orig_flight, orig_tf;
569	uint32_t tsnlast, tsnfirst;
570
571
572	/* none in flight now */
573	audit_tf = 0;
574	fir = 0;
575	/*
576	 * figure out how long a data chunk must be pending before we can
577	 * mark it ..
578	 */
579	(void)SCTP_GETTIME_TIMEVAL(&now);
580	/* get cur rto in micro-seconds */
581	cur_rtt = (((net->lastsa >> 2) + net->lastsv) >> 1);
582	cur_rtt *= 1000;
583	if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
584		sctp_log_fr(cur_rtt,
585		    stcb->asoc.peers_rwnd,
586		    window_probe,
587		    SCTP_FR_T3_MARK_TIME);
588		sctp_log_fr(net->flight_size,
589		    SCTP_OS_TIMER_PENDING(&net->fr_timer.timer),
590		    SCTP_OS_TIMER_ACTIVE(&net->fr_timer.timer),
591		    SCTP_FR_CWND_REPORT);
592		sctp_log_fr(net->flight_size, net->cwnd, stcb->asoc.total_flight, SCTP_FR_CWND_REPORT);
593	}
594	tv.tv_sec = cur_rtt / 1000000;
595	tv.tv_usec = cur_rtt % 1000000;
596	min_wait = now;
597	timevalsub(&min_wait, &tv);
598	if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {
599		/*
600		 * if we hit here, we don't have enough seconds on the clock
601		 * to account for the RTO. We just let the lower seconds be
602		 * the bounds and don't worry about it. This may mean we
603		 * will mark a lot more than we should.
604		 */
605		min_wait.tv_sec = min_wait.tv_usec = 0;
606	}
607	if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
608		sctp_log_fr(cur_rtt, now.tv_sec, now.tv_usec, SCTP_FR_T3_MARK_TIME);
609		sctp_log_fr(0, min_wait.tv_sec, min_wait.tv_usec, SCTP_FR_T3_MARK_TIME);
610	}
611	/*
612	 * Our rwnd will be incorrect here since we are not adding back the
613	 * cnt * mbuf but we will fix that down below.
614	 */
615	orig_flight = net->flight_size;
616	orig_tf = stcb->asoc.total_flight;
617
618	net->fast_retran_ip = 0;
619	/* Now on to each chunk */
620	num_mk = cnt_mk = 0;
621	tsnfirst = tsnlast = 0;
622	chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
623	for (; chk != NULL; chk = tp2) {
624		tp2 = TAILQ_NEXT(chk, sctp_next);
625		if ((compare_with_wrap(stcb->asoc.last_acked_seq,
626		    chk->rec.data.TSN_seq,
627		    MAX_TSN)) ||
628		    (stcb->asoc.last_acked_seq == chk->rec.data.TSN_seq)) {
629			/* Strange case our list got out of order? */
630			SCTP_PRINTF("Our list is out of order?\n");
631			panic("Out of order list");
632		}
633		if ((chk->whoTo == net) && (chk->sent < SCTP_DATAGRAM_ACKED)) {
634			/*
635			 * found one to mark: If it is less than
636			 * DATAGRAM_ACKED it MUST not be a skipped or marked
637			 * TSN but instead one that is either already set
638			 * for retransmission OR one that needs
639			 * retransmission.
640			 */
641
642			/* validate its been outstanding long enough */
643			if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
644				sctp_log_fr(chk->rec.data.TSN_seq,
645				    chk->sent_rcv_time.tv_sec,
646				    chk->sent_rcv_time.tv_usec,
647				    SCTP_FR_T3_MARK_TIME);
648			}
649			if ((chk->sent_rcv_time.tv_sec > min_wait.tv_sec) && (window_probe == 0)) {
650				/*
651				 * we have reached a chunk that was sent
652				 * some seconds past our min.. forget it we
653				 * will find no more to send.
654				 */
655				if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
656					sctp_log_fr(0,
657					    chk->sent_rcv_time.tv_sec,
658					    chk->sent_rcv_time.tv_usec,
659					    SCTP_FR_T3_STOPPED);
660				}
661				continue;
662			} else if ((chk->sent_rcv_time.tv_sec == min_wait.tv_sec) &&
663			    (window_probe == 0)) {
664				/*
665				 * we must look at the micro seconds to
666				 * know.
667				 */
668				if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) {
669					/*
670					 * ok it was sent after our boundary
671					 * time.
672					 */
673					if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
674						sctp_log_fr(0,
675						    chk->sent_rcv_time.tv_sec,
676						    chk->sent_rcv_time.tv_usec,
677						    SCTP_FR_T3_STOPPED);
678					}
679					continue;
680				}
681			}
682			if (PR_SCTP_TTL_ENABLED(chk->flags)) {
683				/* Is it expired? */
684				if ((now.tv_sec > chk->rec.data.timetodrop.tv_sec) ||
685				    ((chk->rec.data.timetodrop.tv_sec == now.tv_sec) &&
686				    (now.tv_usec > chk->rec.data.timetodrop.tv_usec))) {
687					/* Yes so drop it */
688					if (chk->data) {
689						(void)sctp_release_pr_sctp_chunk(stcb,
690						    chk,
691						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
692						    &stcb->asoc.sent_queue);
693					}
694				}
695				continue;
696			}
697			if (PR_SCTP_RTX_ENABLED(chk->flags)) {
698				/* Has it been retransmitted tv_sec times? */
699				if (chk->snd_count > chk->rec.data.timetodrop.tv_sec) {
700					if (chk->data) {
701						(void)sctp_release_pr_sctp_chunk(stcb,
702						    chk,
703						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
704						    &stcb->asoc.sent_queue);
705					}
706				}
707				continue;
708			}
709			if (chk->sent < SCTP_DATAGRAM_RESEND) {
710				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
711				num_mk++;
712				if (fir == 0) {
713					fir = 1;
714					tsnfirst = chk->rec.data.TSN_seq;
715				}
716				tsnlast = chk->rec.data.TSN_seq;
717				if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
718					sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count,
719					    0, SCTP_FR_T3_MARKED);
720				}
721				if (chk->rec.data.chunk_was_revoked) {
722					/* deflate the cwnd */
723					chk->whoTo->cwnd -= chk->book_size;
724					chk->rec.data.chunk_was_revoked = 0;
725				}
726				net->marked_retrans++;
727				stcb->asoc.marked_retrans++;
728				if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
729					sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND_TO,
730					    chk->whoTo->flight_size,
731					    chk->book_size,
732					    (uintptr_t) chk->whoTo,
733					    chk->rec.data.TSN_seq);
734				}
735				sctp_flight_size_decrease(chk);
736				sctp_total_flight_decrease(stcb, chk);
737				stcb->asoc.peers_rwnd += chk->send_size;
738				stcb->asoc.peers_rwnd += sctp_peer_chunk_oh;
739			}
740			chk->sent = SCTP_DATAGRAM_RESEND;
741			SCTP_STAT_INCR(sctps_markedretrans);
742
743			/* reset the TSN for striking and other FR stuff */
744			chk->window_probe = 0;
745			chk->rec.data.doing_fast_retransmit = 0;
746			/* Clear any time so NO RTT is being done */
747			chk->do_rtt = 0;
748			if (alt != net) {
749				sctp_free_remote_addr(chk->whoTo);
750				chk->no_fr_allowed = 1;
751				chk->whoTo = alt;
752				atomic_add_int(&alt->ref_count, 1);
753			} else {
754				chk->no_fr_allowed = 0;
755				if (TAILQ_EMPTY(&stcb->asoc.send_queue)) {
756					chk->rec.data.fast_retran_tsn = stcb->asoc.sending_seq;
757				} else {
758					chk->rec.data.fast_retran_tsn = (TAILQ_FIRST(&stcb->asoc.send_queue))->rec.data.TSN_seq;
759				}
760			}
761			/*
762			 * CMT: Do not allow FRs on retransmitted TSNs.
763			 */
764			if (sctp_cmt_on_off == 1) {
765				chk->no_fr_allowed = 1;
766			}
767		} else if (chk->sent == SCTP_DATAGRAM_ACKED) {
768			/* remember highest acked one */
769			could_be_sent = chk;
770		}
771		if (chk->sent == SCTP_DATAGRAM_RESEND) {
772			cnt_mk++;
773		}
774	}
775	if ((orig_flight - net->flight_size) != (orig_tf - stcb->asoc.total_flight)) {
776		/* we did not subtract the same things? */
777		audit_tf = 1;
778	}
779	if (sctp_logging_level & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
780		sctp_log_fr(tsnfirst, tsnlast, num_mk, SCTP_FR_T3_TIMEOUT);
781	}
782#ifdef SCTP_DEBUG
783	if (num_mk) {
784		SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n",
785		    tsnlast);
786		SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%ld\n",
787		    num_mk, (u_long)stcb->asoc.peers_rwnd);
788		SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n",
789		    tsnlast);
790		SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%d\n",
791		    num_mk,
792		    (int)stcb->asoc.peers_rwnd);
793	}
794#endif
795	*num_marked = num_mk;
796	if ((stcb->asoc.sent_queue_retran_cnt == 0) && (could_be_sent)) {
797		/* fix it so we retransmit the highest acked anyway */
798		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
799		cnt_mk++;
800		could_be_sent->sent = SCTP_DATAGRAM_RESEND;
801	}
802	if (stcb->asoc.sent_queue_retran_cnt != cnt_mk) {
803#ifdef INVARIANTS
804		SCTP_PRINTF("Local Audit says there are %d for retran asoc cnt:%d\n",
805		    cnt_mk, stcb->asoc.sent_queue_retran_cnt);
806#endif
807#ifndef SCTP_AUDITING_ENABLED
808		stcb->asoc.sent_queue_retran_cnt = cnt_mk;
809#endif
810	}
811	/* Now check for a ECN Echo that may be stranded */
812	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
813		if ((chk->whoTo == net) &&
814		    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
815			sctp_free_remote_addr(chk->whoTo);
816			chk->whoTo = alt;
817			if (chk->sent != SCTP_DATAGRAM_RESEND) {
818				chk->sent = SCTP_DATAGRAM_RESEND;
819				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
820			}
821			atomic_add_int(&alt->ref_count, 1);
822		}
823	}
824	if (audit_tf) {
825		SCTPDBG(SCTP_DEBUG_TIMER4,
826		    "Audit total flight due to negative value net:%p\n",
827		    net);
828		stcb->asoc.total_flight = 0;
829		stcb->asoc.total_flight_count = 0;
830		/* Clear all networks flight size */
831		TAILQ_FOREACH(lnets, &stcb->asoc.nets, sctp_next) {
832			lnets->flight_size = 0;
833			SCTPDBG(SCTP_DEBUG_TIMER4,
834			    "Net:%p c-f cwnd:%d ssthresh:%d\n",
835			    lnets, lnets->cwnd, lnets->ssthresh);
836		}
837		TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
838			if (chk->sent < SCTP_DATAGRAM_RESEND) {
839				if (sctp_logging_level & SCTP_FLIGHT_LOGGING_ENABLE) {
840					sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
841					    chk->whoTo->flight_size,
842					    chk->book_size,
843					    (uintptr_t) chk->whoTo,
844					    chk->rec.data.TSN_seq);
845				}
846				sctp_flight_size_increase(chk);
847				sctp_total_flight_increase(stcb, chk);
848			}
849		}
850	}
851	/*
852	 * Setup the ecn nonce re-sync point. We do this since
853	 * retranmissions are NOT setup for ECN. This means that do to
854	 * Karn's rule, we don't know the total of the peers ecn bits.
855	 */
856	chk = TAILQ_FIRST(&stcb->asoc.send_queue);
857	if (chk == NULL) {
858		stcb->asoc.nonce_resync_tsn = stcb->asoc.sending_seq;
859	} else {
860		stcb->asoc.nonce_resync_tsn = chk->rec.data.TSN_seq;
861	}
862	stcb->asoc.nonce_wait_for_ecne = 0;
863	stcb->asoc.nonce_sum_check = 0;
864	/* We return 1 if we only have a window probe outstanding */
865	return (0);
866}
867
868static void
869sctp_move_all_chunks_to_alt(struct sctp_tcb *stcb,
870    struct sctp_nets *net,
871    struct sctp_nets *alt)
872{
873	struct sctp_association *asoc;
874	struct sctp_stream_out *outs;
875	struct sctp_tmit_chunk *chk;
876	struct sctp_stream_queue_pending *sp;
877
878	if (net == alt)
879		/* nothing to do */
880		return;
881
882	asoc = &stcb->asoc;
883
884	/*
885	 * now through all the streams checking for chunks sent to our bad
886	 * network.
887	 */
888	TAILQ_FOREACH(outs, &asoc->out_wheel, next_spoke) {
889		/* now clean up any chunks here */
890		TAILQ_FOREACH(sp, &outs->outqueue, next) {
891			if (sp->net == net) {
892				sctp_free_remote_addr(sp->net);
893				sp->net = alt;
894				atomic_add_int(&alt->ref_count, 1);
895			}
896		}
897	}
898	/* Now check the pending queue */
899	TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
900		if (chk->whoTo == net) {
901			sctp_free_remote_addr(chk->whoTo);
902			chk->whoTo = alt;
903			atomic_add_int(&alt->ref_count, 1);
904		}
905	}
906
907}
908
909int
910sctp_t3rxt_timer(struct sctp_inpcb *inp,
911    struct sctp_tcb *stcb,
912    struct sctp_nets *net)
913{
914	struct sctp_nets *alt;
915	int win_probe, num_mk;
916
917	if (sctp_logging_level & SCTP_FR_LOGGING_ENABLE) {
918		sctp_log_fr(0, 0, 0, SCTP_FR_T3_TIMEOUT);
919	}
920	if (sctp_logging_level & SCTP_CWND_LOGGING_ENABLE) {
921		struct sctp_nets *lnet;
922
923		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
924			if (net == lnet) {
925				sctp_log_cwnd(stcb, lnet, 1, SCTP_CWND_LOG_FROM_T3);
926			} else {
927				sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_LOG_FROM_T3);
928			}
929		}
930	}
931	/* Find an alternate and mark those for retransmission */
932	if ((stcb->asoc.peers_rwnd == 0) &&
933	    (stcb->asoc.total_flight < net->mtu)) {
934		SCTP_STAT_INCR(sctps_timowindowprobe);
935		win_probe = 1;
936	} else {
937		win_probe = 0;
938	}
939
940	/*
941	 * JRS 5/14/07 - If CMT PF is on and the destination if not already
942	 * in PF state, set the destination to PF state and store the
943	 * current time as the time that the destination was last active. In
944	 * addition, find an alternate destination with PF-based
945	 * find_alt_net().
946	 */
947	if (sctp_cmt_pf) {
948		if ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF) {
949			net->dest_state |= SCTP_ADDR_PF;
950			net->last_active = ticks;
951			SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from active to PF.\n",
952			    net);
953		}
954		alt = sctp_find_alternate_net(stcb, net, 2);
955	} else if (sctp_cmt_on_off) {
956		/*
957		 * CMT: Using RTX_SSTHRESH policy for CMT. If CMT is being
958		 * used, then pick dest with largest ssthresh for any
959		 * retransmission.
960		 */
961		alt = net;
962		alt = sctp_find_alternate_net(stcb, alt, 1);
963		/*
964		 * CUCv2: If a different dest is picked for the
965		 * retransmission, then new (rtx-)pseudo_cumack needs to be
966		 * tracked for orig dest. Let CUCv2 track new (rtx-)
967		 * pseudo-cumack always.
968		 */
969		net->find_pseudo_cumack = 1;
970		net->find_rtx_pseudo_cumack = 1;
971
972	} else {		/* CMT is OFF */
973		alt = sctp_find_alternate_net(stcb, net, 0);
974	}
975
976	(void)sctp_mark_all_for_resend(stcb, net, alt, win_probe, &num_mk);
977	/* FR Loss recovery just ended with the T3. */
978	stcb->asoc.fast_retran_loss_recovery = 0;
979
980	/* CMT FR loss recovery ended with the T3 */
981	net->fast_retran_loss_recovery = 0;
982
983	/*
984	 * setup the sat loss recovery that prevents satellite cwnd advance.
985	 */
986	stcb->asoc.sat_t3_loss_recovery = 1;
987	stcb->asoc.sat_t3_recovery_tsn = stcb->asoc.sending_seq;
988
989	/* Backoff the timer and cwnd */
990	sctp_backoff_on_timeout(stcb, net, win_probe, num_mk);
991	if (win_probe == 0) {
992		/* We don't do normal threshold management on window probes */
993		if (sctp_threshold_management(inp, stcb, net,
994		    stcb->asoc.max_send_times)) {
995			/* Association was destroyed */
996			return (1);
997		} else {
998			if (net != stcb->asoc.primary_destination) {
999				/* send a immediate HB if our RTO is stale */
1000				struct timeval now;
1001				unsigned int ms_goneby;
1002
1003				(void)SCTP_GETTIME_TIMEVAL(&now);
1004				if (net->last_sent_time.tv_sec) {
1005					ms_goneby = (now.tv_sec - net->last_sent_time.tv_sec) * 1000;
1006				} else {
1007					ms_goneby = 0;
1008				}
1009				if ((ms_goneby > net->RTO) || (net->RTO == 0)) {
1010					/*
1011					 * no recent feed back in an RTO or
1012					 * more, request a RTT update
1013					 */
1014					if (sctp_send_hb(stcb, 1, net) < 0)
1015						return 1;
1016				}
1017			}
1018		}
1019	} else {
1020		/*
1021		 * For a window probe we don't penalize the net's but only
1022		 * the association. This may fail it if SACKs are not coming
1023		 * back. If sack's are coming with rwnd locked at 0, we will
1024		 * continue to hold things waiting for rwnd to raise
1025		 */
1026		if (sctp_threshold_management(inp, stcb, NULL,
1027		    stcb->asoc.max_send_times)) {
1028			/* Association was destroyed */
1029			return (1);
1030		}
1031	}
1032	if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
1033		/* Move all pending over too */
1034		sctp_move_all_chunks_to_alt(stcb, net, alt);
1035
1036		/*
1037		 * Get the address that failed, to force a new src address
1038		 * selecton and a route allocation.
1039		 */
1040		if (net->ro._s_addr) {
1041			sctp_free_ifa(net->ro._s_addr);
1042			net->ro._s_addr = NULL;
1043		}
1044		net->src_addr_selected = 0;
1045
1046		/* Force a route allocation too */
1047		if (net->ro.ro_rt) {
1048			RTFREE(net->ro.ro_rt);
1049			net->ro.ro_rt = NULL;
1050		}
1051		/* Was it our primary? */
1052		if ((stcb->asoc.primary_destination == net) && (alt != net)) {
1053			/*
1054			 * Yes, note it as such and find an alternate note:
1055			 * this means HB code must use this to resent the
1056			 * primary if it goes active AND if someone does a
1057			 * change-primary then this flag must be cleared
1058			 * from any net structures.
1059			 */
1060			if (sctp_set_primary_addr(stcb,
1061			    (struct sockaddr *)NULL,
1062			    alt) == 0) {
1063				net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
1064			}
1065		}
1066	} else if (sctp_cmt_pf && (net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF) {
1067		/*
1068		 * JRS 5/14/07 - If the destination hasn't failed completely
1069		 * but is in PF state, a PF-heartbeat needs to be sent
1070		 * manually.
1071		 */
1072		if (sctp_send_hb(stcb, 1, net) < 0)
1073			return 1;
1074	}
1075	/*
1076	 * Special case for cookie-echo'ed case, we don't do output but must
1077	 * await the COOKIE-ACK before retransmission
1078	 */
1079	if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
1080		/*
1081		 * Here we just reset the timer and start again since we
1082		 * have not established the asoc
1083		 */
1084		sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
1085		return (0);
1086	}
1087	if (stcb->asoc.peer_supports_prsctp) {
1088		struct sctp_tmit_chunk *lchk;
1089
1090		lchk = sctp_try_advance_peer_ack_point(stcb, &stcb->asoc);
1091		/* C3. See if we need to send a Fwd-TSN */
1092		if (compare_with_wrap(stcb->asoc.advanced_peer_ack_point,
1093		    stcb->asoc.last_acked_seq, MAX_TSN)) {
1094			/*
1095			 * ISSUE with ECN, see FWD-TSN processing for notes
1096			 * on issues that will occur when the ECN NONCE
1097			 * stuff is put into SCTP for cross checking.
1098			 */
1099			send_forward_tsn(stcb, &stcb->asoc);
1100			if (lchk) {
1101				/* Assure a timer is up */
1102				sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, lchk->whoTo);
1103			}
1104		}
1105	}
1106	if (sctp_logging_level & SCTP_CWND_MONITOR_ENABLE) {
1107		sctp_log_cwnd(stcb, net, net->cwnd, SCTP_CWND_LOG_FROM_RTX);
1108	}
1109	return (0);
1110}
1111
1112int
1113sctp_t1init_timer(struct sctp_inpcb *inp,
1114    struct sctp_tcb *stcb,
1115    struct sctp_nets *net)
1116{
1117	/* bump the thresholds */
1118	if (stcb->asoc.delayed_connection) {
1119		/*
1120		 * special hook for delayed connection. The library did NOT
1121		 * complete the rest of its sends.
1122		 */
1123		stcb->asoc.delayed_connection = 0;
1124		sctp_send_initiate(inp, stcb);
1125		return (0);
1126	}
1127	if (SCTP_GET_STATE((&stcb->asoc)) != SCTP_STATE_COOKIE_WAIT) {
1128		return (0);
1129	}
1130	if (sctp_threshold_management(inp, stcb, net,
1131	    stcb->asoc.max_init_times)) {
1132		/* Association was destroyed */
1133		return (1);
1134	}
1135	stcb->asoc.dropped_special_cnt = 0;
1136	sctp_backoff_on_timeout(stcb, stcb->asoc.primary_destination, 1, 0);
1137	if (stcb->asoc.initial_init_rto_max < net->RTO) {
1138		net->RTO = stcb->asoc.initial_init_rto_max;
1139	}
1140	if (stcb->asoc.numnets > 1) {
1141		/* If we have more than one addr use it */
1142		struct sctp_nets *alt;
1143
1144		alt = sctp_find_alternate_net(stcb, stcb->asoc.primary_destination, 0);
1145		if ((alt != NULL) && (alt != stcb->asoc.primary_destination)) {
1146			sctp_move_all_chunks_to_alt(stcb, stcb->asoc.primary_destination, alt);
1147			stcb->asoc.primary_destination = alt;
1148		}
1149	}
1150	/* Send out a new init */
1151	sctp_send_initiate(inp, stcb);
1152	return (0);
1153}
1154
1155/*
1156 * For cookie and asconf we actually need to find and mark for resend, then
1157 * increment the resend counter (after all the threshold management stuff of
1158 * course).
1159 */
1160int
1161sctp_cookie_timer(struct sctp_inpcb *inp,
1162    struct sctp_tcb *stcb,
1163    struct sctp_nets *net)
1164{
1165	struct sctp_nets *alt;
1166	struct sctp_tmit_chunk *cookie;
1167
1168	/* first before all else we must find the cookie */
1169	TAILQ_FOREACH(cookie, &stcb->asoc.control_send_queue, sctp_next) {
1170		if (cookie->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
1171			break;
1172		}
1173	}
1174	if (cookie == NULL) {
1175		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
1176			/* FOOBAR! */
1177			struct mbuf *oper;
1178
1179			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
1180			    0, M_DONTWAIT, 1, MT_DATA);
1181			if (oper) {
1182				struct sctp_paramhdr *ph;
1183				uint32_t *ippp;
1184
1185				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
1186				    sizeof(uint32_t);
1187				ph = mtod(oper, struct sctp_paramhdr *);
1188				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
1189				ph->param_length = htons(SCTP_BUF_LEN(oper));
1190				ippp = (uint32_t *) (ph + 1);
1191				*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
1192			}
1193			inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4;
1194			sctp_abort_an_association(inp, stcb, SCTP_INTERNAL_ERROR,
1195			    oper);
1196		} else {
1197#ifdef INVARIANTS
1198			panic("Cookie timer expires in wrong state?");
1199#else
1200			SCTP_PRINTF("Strange in state %d not cookie-echoed yet c-e timer expires?\n", SCTP_GET_STATE(&stcb->asoc));
1201			return (0);
1202#endif
1203		}
1204		return (0);
1205	}
1206	/* Ok we found the cookie, threshold management next */
1207	if (sctp_threshold_management(inp, stcb, cookie->whoTo,
1208	    stcb->asoc.max_init_times)) {
1209		/* Assoc is over */
1210		return (1);
1211	}
1212	/*
1213	 * cleared theshold management now lets backoff the address & select
1214	 * an alternate
1215	 */
1216	stcb->asoc.dropped_special_cnt = 0;
1217	sctp_backoff_on_timeout(stcb, cookie->whoTo, 1, 0);
1218	alt = sctp_find_alternate_net(stcb, cookie->whoTo, 0);
1219	if (alt != cookie->whoTo) {
1220		sctp_free_remote_addr(cookie->whoTo);
1221		cookie->whoTo = alt;
1222		atomic_add_int(&alt->ref_count, 1);
1223	}
1224	/* Now mark the retran info */
1225	if (cookie->sent != SCTP_DATAGRAM_RESEND) {
1226		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1227	}
1228	cookie->sent = SCTP_DATAGRAM_RESEND;
1229	/*
1230	 * Now call the output routine to kick out the cookie again, Note we
1231	 * don't mark any chunks for retran so that FR will need to kick in
1232	 * to move these (or a send timer).
1233	 */
1234	return (0);
1235}
1236
1237int
1238sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1239    struct sctp_nets *net)
1240{
1241	struct sctp_nets *alt;
1242	struct sctp_tmit_chunk *strrst = NULL, *chk = NULL;
1243
1244	if (stcb->asoc.stream_reset_outstanding == 0) {
1245		return (0);
1246	}
1247	/* find the existing STRRESET, we use the seq number we sent out on */
1248	(void)sctp_find_stream_reset(stcb, stcb->asoc.str_reset_seq_out, &strrst);
1249	if (strrst == NULL) {
1250		return (0);
1251	}
1252	/* do threshold management */
1253	if (sctp_threshold_management(inp, stcb, strrst->whoTo,
1254	    stcb->asoc.max_send_times)) {
1255		/* Assoc is over */
1256		return (1);
1257	}
1258	/*
1259	 * cleared theshold management now lets backoff the address & select
1260	 * an alternate
1261	 */
1262	sctp_backoff_on_timeout(stcb, strrst->whoTo, 1, 0);
1263	alt = sctp_find_alternate_net(stcb, strrst->whoTo, 0);
1264	sctp_free_remote_addr(strrst->whoTo);
1265	strrst->whoTo = alt;
1266	atomic_add_int(&alt->ref_count, 1);
1267
1268	/* See if a ECN Echo is also stranded */
1269	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
1270		if ((chk->whoTo == net) &&
1271		    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
1272			sctp_free_remote_addr(chk->whoTo);
1273			if (chk->sent != SCTP_DATAGRAM_RESEND) {
1274				chk->sent = SCTP_DATAGRAM_RESEND;
1275				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1276			}
1277			chk->whoTo = alt;
1278			atomic_add_int(&alt->ref_count, 1);
1279		}
1280	}
1281	if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
1282		/*
1283		 * If the address went un-reachable, we need to move to
1284		 * alternates for ALL chk's in queue
1285		 */
1286		sctp_move_all_chunks_to_alt(stcb, net, alt);
1287	}
1288	/* mark the retran info */
1289	if (strrst->sent != SCTP_DATAGRAM_RESEND)
1290		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1291	strrst->sent = SCTP_DATAGRAM_RESEND;
1292
1293	/* restart the timer */
1294	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, inp, stcb, strrst->whoTo);
1295	return (0);
1296}
1297
1298int
1299sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1300    struct sctp_nets *net)
1301{
1302	struct sctp_nets *alt;
1303	struct sctp_tmit_chunk *asconf, *chk;
1304
1305	/* is this the first send, or a retransmission? */
1306	if (stcb->asoc.asconf_sent == 0) {
1307		/* compose a new ASCONF chunk and send it */
1308		sctp_send_asconf(stcb, net);
1309	} else {
1310		/* Retransmission of the existing ASCONF needed... */
1311
1312		/* find the existing ASCONF */
1313		TAILQ_FOREACH(asconf, &stcb->asoc.control_send_queue,
1314		    sctp_next) {
1315			if (asconf->rec.chunk_id.id == SCTP_ASCONF) {
1316				break;
1317			}
1318		}
1319		if (asconf == NULL) {
1320			return (0);
1321		}
1322		/* do threshold management */
1323		if (sctp_threshold_management(inp, stcb, asconf->whoTo,
1324		    stcb->asoc.max_send_times)) {
1325			/* Assoc is over */
1326			return (1);
1327		}
1328		/*
1329		 * PETER? FIX? How will the following code ever run? If the
1330		 * max_send_times is hit, threshold managment will blow away
1331		 * the association?
1332		 */
1333		if (asconf->snd_count > stcb->asoc.max_send_times) {
1334			/*
1335			 * Something is rotten, peer is not responding to
1336			 * ASCONFs but maybe is to data etc.  e.g. it is not
1337			 * properly handling the chunk type upper bits Mark
1338			 * this peer as ASCONF incapable and cleanup
1339			 */
1340			SCTPDBG(SCTP_DEBUG_TIMER1, "asconf_timer: Peer has not responded to our repeated ASCONFs\n");
1341			sctp_asconf_cleanup(stcb, net);
1342			return (0);
1343		}
1344		/*
1345		 * cleared theshold management now lets backoff the address
1346		 * & select an alternate
1347		 */
1348		sctp_backoff_on_timeout(stcb, asconf->whoTo, 1, 0);
1349		alt = sctp_find_alternate_net(stcb, asconf->whoTo, 0);
1350		sctp_free_remote_addr(asconf->whoTo);
1351		asconf->whoTo = alt;
1352		atomic_add_int(&alt->ref_count, 1);
1353
1354		/* See if a ECN Echo is also stranded */
1355		TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
1356			if ((chk->whoTo == net) &&
1357			    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
1358				sctp_free_remote_addr(chk->whoTo);
1359				chk->whoTo = alt;
1360				if (chk->sent != SCTP_DATAGRAM_RESEND) {
1361					chk->sent = SCTP_DATAGRAM_RESEND;
1362					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1363				}
1364				atomic_add_int(&alt->ref_count, 1);
1365			}
1366		}
1367		if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
1368			/*
1369			 * If the address went un-reachable, we need to move
1370			 * to alternates for ALL chk's in queue
1371			 */
1372			sctp_move_all_chunks_to_alt(stcb, net, alt);
1373		}
1374		/* mark the retran info */
1375		if (asconf->sent != SCTP_DATAGRAM_RESEND)
1376			sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1377		asconf->sent = SCTP_DATAGRAM_RESEND;
1378	}
1379	return (0);
1380}
1381
1382/*
1383 * For the shutdown and shutdown-ack, we do not keep one around on the
1384 * control queue. This means we must generate a new one and call the general
1385 * chunk output routine, AFTER having done threshold management.
1386 */
1387int
1388sctp_shutdown_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1389    struct sctp_nets *net)
1390{
1391	struct sctp_nets *alt;
1392
1393	/* first threshold managment */
1394	if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
1395		/* Assoc is over */
1396		return (1);
1397	}
1398	/* second select an alternative */
1399	alt = sctp_find_alternate_net(stcb, net, 0);
1400
1401	/* third generate a shutdown into the queue for out net */
1402	if (alt) {
1403		sctp_send_shutdown(stcb, alt);
1404	} else {
1405		/*
1406		 * if alt is NULL, there is no dest to send to??
1407		 */
1408		return (0);
1409	}
1410	/* fourth restart timer */
1411	sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, inp, stcb, alt);
1412	return (0);
1413}
1414
1415int
1416sctp_shutdownack_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1417    struct sctp_nets *net)
1418{
1419	struct sctp_nets *alt;
1420
1421	/* first threshold managment */
1422	if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
1423		/* Assoc is over */
1424		return (1);
1425	}
1426	/* second select an alternative */
1427	alt = sctp_find_alternate_net(stcb, net, 0);
1428
1429	/* third generate a shutdown into the queue for out net */
1430	sctp_send_shutdown_ack(stcb, alt);
1431
1432	/* fourth restart timer */
1433	sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, inp, stcb, alt);
1434	return (0);
1435}
1436
1437static void
1438sctp_audit_stream_queues_for_size(struct sctp_inpcb *inp,
1439    struct sctp_tcb *stcb)
1440{
1441	struct sctp_stream_out *outs;
1442	struct sctp_stream_queue_pending *sp;
1443	unsigned int chks_in_queue = 0;
1444	int being_filled = 0;
1445
1446	/*
1447	 * This function is ONLY called when the send/sent queues are empty.
1448	 */
1449	if ((stcb == NULL) || (inp == NULL))
1450		return;
1451
1452	if (stcb->asoc.sent_queue_retran_cnt) {
1453		SCTP_PRINTF("Hmm, sent_queue_retran_cnt is non-zero %d\n",
1454		    stcb->asoc.sent_queue_retran_cnt);
1455		stcb->asoc.sent_queue_retran_cnt = 0;
1456	}
1457	SCTP_TCB_SEND_LOCK(stcb);
1458	if (TAILQ_EMPTY(&stcb->asoc.out_wheel)) {
1459		int i, cnt = 0;
1460
1461		/* Check to see if a spoke fell off the wheel */
1462		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
1463			if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
1464				sctp_insert_on_wheel(stcb, &stcb->asoc, &stcb->asoc.strmout[i], 1);
1465				cnt++;
1466			}
1467		}
1468		if (cnt) {
1469			/* yep, we lost a spoke or two */
1470			SCTP_PRINTF("Found an additional %d streams NOT on outwheel, corrected\n", cnt);
1471		} else {
1472			/* no spokes lost, */
1473			stcb->asoc.total_output_queue_size = 0;
1474		}
1475		SCTP_TCB_SEND_UNLOCK(stcb);
1476		return;
1477	}
1478	SCTP_TCB_SEND_UNLOCK(stcb);
1479	/* Check to see if some data queued, if so report it */
1480	TAILQ_FOREACH(outs, &stcb->asoc.out_wheel, next_spoke) {
1481		if (!TAILQ_EMPTY(&outs->outqueue)) {
1482			TAILQ_FOREACH(sp, &outs->outqueue, next) {
1483				if (sp->msg_is_complete)
1484					being_filled++;
1485				chks_in_queue++;
1486			}
1487		}
1488	}
1489	if (chks_in_queue != stcb->asoc.stream_queue_cnt) {
1490		SCTP_PRINTF("Hmm, stream queue cnt at %d I counted %d in stream out wheel\n",
1491		    stcb->asoc.stream_queue_cnt, chks_in_queue);
1492	}
1493	if (chks_in_queue) {
1494		/* call the output queue function */
1495		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3);
1496		if ((TAILQ_EMPTY(&stcb->asoc.send_queue)) &&
1497		    (TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
1498			/*
1499			 * Probably should go in and make it go back through
1500			 * and add fragments allowed
1501			 */
1502			if (being_filled == 0) {
1503				SCTP_PRINTF("Still nothing moved %d chunks are stuck\n",
1504				    chks_in_queue);
1505			}
1506		}
1507	} else {
1508		SCTP_PRINTF("Found no chunks on any queue tot:%lu\n",
1509		    (u_long)stcb->asoc.total_output_queue_size);
1510		stcb->asoc.total_output_queue_size = 0;
1511	}
1512}
1513
1514int
1515sctp_heartbeat_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1516    struct sctp_nets *net, int cnt_of_unconf)
1517{
1518	int ret;
1519
1520	if (net) {
1521		if (net->hb_responded == 0) {
1522			if (net->ro._s_addr) {
1523				/*
1524				 * Invalidate the src address if we did not
1525				 * get a response last time.
1526				 */
1527				sctp_free_ifa(net->ro._s_addr);
1528				net->ro._s_addr = NULL;
1529				net->src_addr_selected = 0;
1530			}
1531			sctp_backoff_on_timeout(stcb, net, 1, 0);
1532		}
1533		/* Zero PBA, if it needs it */
1534		if (net->partial_bytes_acked) {
1535			net->partial_bytes_acked = 0;
1536		}
1537	}
1538	if ((stcb->asoc.total_output_queue_size > 0) &&
1539	    (TAILQ_EMPTY(&stcb->asoc.send_queue)) &&
1540	    (TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
1541		sctp_audit_stream_queues_for_size(inp, stcb);
1542	}
1543	/* Send a new HB, this will do threshold managment, pick a new dest */
1544	if (cnt_of_unconf == 0) {
1545		if (sctp_send_hb(stcb, 0, NULL) < 0) {
1546			return (1);
1547		}
1548	} else {
1549		/*
1550		 * this will send out extra hb's up to maxburst if there are
1551		 * any unconfirmed addresses.
1552		 */
1553		uint32_t cnt_sent = 0;
1554
1555		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1556			if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
1557			    (net->dest_state & SCTP_ADDR_REACHABLE)) {
1558				cnt_sent++;
1559				if (net->hb_responded == 0) {
1560					/* Did we respond last time? */
1561					if (net->ro._s_addr) {
1562						sctp_free_ifa(net->ro._s_addr);
1563						net->ro._s_addr = NULL;
1564						net->src_addr_selected = 0;
1565					}
1566				}
1567				ret = sctp_send_hb(stcb, 1, net);
1568				if (ret < 0)
1569					return 1;
1570				else if (ret == 0) {
1571					break;
1572				}
1573				if (cnt_sent >= sctp_hb_maxburst)
1574					break;
1575			}
1576		}
1577	}
1578	return (0);
1579}
1580
1581int
1582sctp_is_hb_timer_running(struct sctp_tcb *stcb)
1583{
1584	if (SCTP_OS_TIMER_PENDING(&stcb->asoc.hb_timer.timer)) {
1585		/* its running */
1586		return (1);
1587	} else {
1588		/* nope */
1589		return (0);
1590	}
1591}
1592
1593int
1594sctp_is_sack_timer_running(struct sctp_tcb *stcb)
1595{
1596	if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
1597		/* its running */
1598		return (1);
1599	} else {
1600		/* nope */
1601		return (0);
1602	}
1603}
1604
1605#define SCTP_NUMBER_OF_MTU_SIZES 18
1606static uint32_t mtu_sizes[] = {
1607	68,
1608	296,
1609	508,
1610	512,
1611	544,
1612	576,
1613	1006,
1614	1492,
1615	1500,
1616	1536,
1617	2002,
1618	2048,
1619	4352,
1620	4464,
1621	8166,
1622	17914,
1623	32000,
1624	65535
1625};
1626
1627
1628static uint32_t
1629sctp_getnext_mtu(struct sctp_inpcb *inp, uint32_t cur_mtu)
1630{
1631	/* select another MTU that is just bigger than this one */
1632	int i;
1633
1634	for (i = 0; i < SCTP_NUMBER_OF_MTU_SIZES; i++) {
1635		if (cur_mtu < mtu_sizes[i]) {
1636			/* no max_mtu is bigger than this one */
1637			return (mtu_sizes[i]);
1638		}
1639	}
1640	/* here return the highest allowable */
1641	return (cur_mtu);
1642}
1643
1644
1645void
1646sctp_pathmtu_timer(struct sctp_inpcb *inp,
1647    struct sctp_tcb *stcb,
1648    struct sctp_nets *net)
1649{
1650	uint32_t next_mtu;
1651
1652	/* restart the timer in any case */
1653	next_mtu = sctp_getnext_mtu(inp, net->mtu);
1654	if (next_mtu <= net->mtu) {
1655		/* nothing to do */
1656		return;
1657	} {
1658		uint32_t mtu;
1659
1660		if ((net->src_addr_selected == 0) ||
1661		    (net->ro._s_addr == NULL) ||
1662		    (net->ro._s_addr->localifa_flags & SCTP_BEING_DELETED)) {
1663			if ((net->ro._s_addr != NULL) && (net->ro._s_addr->localifa_flags & SCTP_BEING_DELETED)) {
1664				sctp_free_ifa(net->ro._s_addr);
1665				net->ro._s_addr = NULL;
1666				net->src_addr_selected = 0;
1667			} else if (net->ro._s_addr == NULL) {
1668				net->ro._s_addr = sctp_source_address_selection(inp,
1669				    stcb,
1670				    (sctp_route_t *) & net->ro,
1671				    net, 0, stcb->asoc.vrf_id);
1672			}
1673			if (net->ro._s_addr)
1674				net->src_addr_selected = 1;
1675		}
1676		if (net->ro._s_addr) {
1677			mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_rt);
1678			if (mtu > next_mtu) {
1679				net->mtu = next_mtu;
1680			}
1681		}
1682	}
1683	/* restart the timer */
1684	sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
1685}
1686
1687void
1688sctp_autoclose_timer(struct sctp_inpcb *inp,
1689    struct sctp_tcb *stcb,
1690    struct sctp_nets *net)
1691{
1692	struct timeval tn, *tim_touse;
1693	struct sctp_association *asoc;
1694	int ticks_gone_by;
1695
1696	(void)SCTP_GETTIME_TIMEVAL(&tn);
1697	if (stcb->asoc.sctp_autoclose_ticks &&
1698	    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
1699		/* Auto close is on */
1700		asoc = &stcb->asoc;
1701		/* pick the time to use */
1702		if (asoc->time_last_rcvd.tv_sec >
1703		    asoc->time_last_sent.tv_sec) {
1704			tim_touse = &asoc->time_last_rcvd;
1705		} else {
1706			tim_touse = &asoc->time_last_sent;
1707		}
1708		/* Now has long enough transpired to autoclose? */
1709		ticks_gone_by = SEC_TO_TICKS(tn.tv_sec - tim_touse->tv_sec);
1710		if ((ticks_gone_by > 0) &&
1711		    (ticks_gone_by >= (int)asoc->sctp_autoclose_ticks)) {
1712			/*
1713			 * autoclose time has hit, call the output routine,
1714			 * which should do nothing just to be SURE we don't
1715			 * have hanging data. We can then safely check the
1716			 * queues and know that we are clear to send
1717			 * shutdown
1718			 */
1719			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_AUTOCLOSE_TMR);
1720			/* Are we clean? */
1721			if (TAILQ_EMPTY(&asoc->send_queue) &&
1722			    TAILQ_EMPTY(&asoc->sent_queue)) {
1723				/*
1724				 * there is nothing queued to send, so I'm
1725				 * done...
1726				 */
1727				if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
1728					/* only send SHUTDOWN 1st time thru */
1729					sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
1730					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
1731					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
1732						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
1733					}
1734					asoc->state = SCTP_STATE_SHUTDOWN_SENT;
1735					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
1736					    stcb->sctp_ep, stcb,
1737					    asoc->primary_destination);
1738					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
1739					    stcb->sctp_ep, stcb,
1740					    asoc->primary_destination);
1741				}
1742			}
1743		} else {
1744			/*
1745			 * No auto close at this time, reset t-o to check
1746			 * later
1747			 */
1748			int tmp;
1749
1750			/* fool the timer startup to use the time left */
1751			tmp = asoc->sctp_autoclose_ticks;
1752			asoc->sctp_autoclose_ticks -= ticks_gone_by;
1753			sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb,
1754			    net);
1755			/* restore the real tick value */
1756			asoc->sctp_autoclose_ticks = tmp;
1757		}
1758	}
1759}
1760
1761void
1762sctp_iterator_timer(struct sctp_iterator *it)
1763{
1764	int iteration_count = 0;
1765	int inp_skip = 0;
1766
1767	/*
1768	 * only one iterator can run at a time. This is the only way we can
1769	 * cleanly pull ep's from underneath all the running interators when
1770	 * a ep is freed.
1771	 */
1772	SCTP_ITERATOR_LOCK();
1773	if (it->inp == NULL) {
1774		/* iterator is complete */
1775done_with_iterator:
1776		SCTP_ITERATOR_UNLOCK();
1777		SCTP_INP_INFO_WLOCK();
1778		TAILQ_REMOVE(&sctppcbinfo.iteratorhead, it, sctp_nxt_itr);
1779		/* stopping the callout is not needed, in theory */
1780		SCTP_INP_INFO_WUNLOCK();
1781		(void)SCTP_OS_TIMER_STOP(&it->tmr.timer);
1782		if (it->function_atend != NULL) {
1783			(*it->function_atend) (it->pointer, it->val);
1784		}
1785		SCTP_FREE(it, SCTP_M_ITER);
1786		return;
1787	}
1788select_a_new_ep:
1789	SCTP_INP_WLOCK(it->inp);
1790	while (((it->pcb_flags) &&
1791	    ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) ||
1792	    ((it->pcb_features) &&
1793	    ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) {
1794		/* endpoint flags or features don't match, so keep looking */
1795		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
1796			SCTP_INP_WUNLOCK(it->inp);
1797			goto done_with_iterator;
1798		}
1799		SCTP_INP_WUNLOCK(it->inp);
1800		it->inp = LIST_NEXT(it->inp, sctp_list);
1801		if (it->inp == NULL) {
1802			goto done_with_iterator;
1803		}
1804		SCTP_INP_WLOCK(it->inp);
1805	}
1806	if ((it->inp->inp_starting_point_for_iterator != NULL) &&
1807	    (it->inp->inp_starting_point_for_iterator != it)) {
1808		SCTP_PRINTF("Iterator collision, waiting for one at %p\n",
1809		    it->inp);
1810		SCTP_INP_WUNLOCK(it->inp);
1811		goto start_timer_return;
1812	}
1813	/* mark the current iterator on the endpoint */
1814	it->inp->inp_starting_point_for_iterator = it;
1815	SCTP_INP_WUNLOCK(it->inp);
1816	SCTP_INP_RLOCK(it->inp);
1817	/* now go through each assoc which is in the desired state */
1818	if (it->done_current_ep == 0) {
1819		if (it->function_inp != NULL)
1820			inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val);
1821		it->done_current_ep = 1;
1822	}
1823	if (it->stcb == NULL) {
1824		/* run the per instance function */
1825		it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list);
1826	}
1827	SCTP_INP_RUNLOCK(it->inp);
1828	if ((inp_skip) || it->stcb == NULL) {
1829		if (it->function_inp_end != NULL) {
1830			inp_skip = (*it->function_inp_end) (it->inp,
1831			    it->pointer,
1832			    it->val);
1833		}
1834		goto no_stcb;
1835	}
1836	if ((it->stcb) &&
1837	    (it->stcb->asoc.stcb_starting_point_for_iterator == it)) {
1838		it->stcb->asoc.stcb_starting_point_for_iterator = NULL;
1839	}
1840	while (it->stcb) {
1841		SCTP_TCB_LOCK(it->stcb);
1842		if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) {
1843			/* not in the right state... keep looking */
1844			SCTP_TCB_UNLOCK(it->stcb);
1845			goto next_assoc;
1846		}
1847		/* mark the current iterator on the assoc */
1848		it->stcb->asoc.stcb_starting_point_for_iterator = it;
1849		/* see if we have limited out the iterator loop */
1850		iteration_count++;
1851		if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) {
1852	start_timer_return:
1853			/* set a timer to continue this later */
1854			if (it->stcb)
1855				SCTP_TCB_UNLOCK(it->stcb);
1856			sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR,
1857			    (struct sctp_inpcb *)it, NULL, NULL);
1858			SCTP_ITERATOR_UNLOCK();
1859			return;
1860		}
1861		/* run function on this one */
1862		(*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
1863
1864		/*
1865		 * we lie here, it really needs to have its own type but
1866		 * first I must verify that this won't effect things :-0
1867		 */
1868		if (it->no_chunk_output == 0)
1869			sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3);
1870
1871		SCTP_TCB_UNLOCK(it->stcb);
1872next_assoc:
1873		it->stcb = LIST_NEXT(it->stcb, sctp_tcblist);
1874		if (it->stcb == NULL) {
1875			if (it->function_inp_end != NULL) {
1876				inp_skip = (*it->function_inp_end) (it->inp,
1877				    it->pointer,
1878				    it->val);
1879			}
1880		}
1881	}
1882no_stcb:
1883	/* done with all assocs on this endpoint, move on to next endpoint */
1884	it->done_current_ep = 0;
1885	SCTP_INP_WLOCK(it->inp);
1886	it->inp->inp_starting_point_for_iterator = NULL;
1887	SCTP_INP_WUNLOCK(it->inp);
1888	if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
1889		it->inp = NULL;
1890	} else {
1891		SCTP_INP_INFO_RLOCK();
1892		it->inp = LIST_NEXT(it->inp, sctp_list);
1893		SCTP_INP_INFO_RUNLOCK();
1894	}
1895	if (it->inp == NULL) {
1896		goto done_with_iterator;
1897	}
1898	goto select_a_new_ep;
1899}
1900