sctp_timer.c revision 189790
1/*-
2 * Copyright (c) 2001-2007, by Cisco Systems, Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * a) Redistributions of source code must retain the above copyright notice,
8 *   this list of conditions and the following disclaimer.
9 *
10 * b) Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in
12 *   the documentation and/or other materials provided with the distribution.
13 *
14 * c) Neither the name of Cisco Systems, Inc. nor the names of its
15 *    contributors may be used to endorse or promote products derived
16 *    from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
20 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28 * THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/* $KAME: sctp_timer.c,v 1.29 2005/03/06 16:04:18 itojun Exp $	 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/netinet/sctp_timer.c 189790 2009-03-14 13:42:13Z rrs $");
35
36#define _IP_VHL
37#include <netinet/sctp_os.h>
38#include <netinet/sctp_pcb.h>
39#ifdef INET6
40#endif
41#include <netinet/sctp_var.h>
42#include <netinet/sctp_sysctl.h>
43#include <netinet/sctp_timer.h>
44#include <netinet/sctputil.h>
45#include <netinet/sctp_output.h>
46#include <netinet/sctp_header.h>
47#include <netinet/sctp_indata.h>
48#include <netinet/sctp_asconf.h>
49#include <netinet/sctp_input.h>
50#include <netinet/sctp.h>
51#include <netinet/sctp_uio.h>
52#include <netinet/udp.h>
53
54
55void
56sctp_early_fr_timer(struct sctp_inpcb *inp,
57    struct sctp_tcb *stcb,
58    struct sctp_nets *net)
59{
60	struct sctp_tmit_chunk *chk, *tp2;
61	struct timeval now, min_wait, tv;
62	unsigned int cur_rtt, cnt = 0, cnt_resend = 0;
63
64	/* an early FR is occuring. */
65	(void)SCTP_GETTIME_TIMEVAL(&now);
66	/* get cur rto in micro-seconds */
67	if (net->lastsa == 0) {
68		/* Hmm no rtt estimate yet? */
69		cur_rtt = stcb->asoc.initial_rto >> 2;
70	} else {
71
72		cur_rtt = ((net->lastsa >> 2) + net->lastsv) >> 1;
73	}
74	if (cur_rtt < SCTP_BASE_SYSCTL(sctp_early_fr_msec)) {
75		cur_rtt = SCTP_BASE_SYSCTL(sctp_early_fr_msec);
76	}
77	cur_rtt *= 1000;
78	tv.tv_sec = cur_rtt / 1000000;
79	tv.tv_usec = cur_rtt % 1000000;
80	min_wait = now;
81	timevalsub(&min_wait, &tv);
82	if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {
83		/*
84		 * if we hit here, we don't have enough seconds on the clock
85		 * to account for the RTO. We just let the lower seconds be
86		 * the bounds and don't worry about it. This may mean we
87		 * will mark a lot more than we should.
88		 */
89		min_wait.tv_sec = min_wait.tv_usec = 0;
90	}
91	chk = TAILQ_LAST(&stcb->asoc.sent_queue, sctpchunk_listhead);
92	for (; chk != NULL; chk = tp2) {
93		tp2 = TAILQ_PREV(chk, sctpchunk_listhead, sctp_next);
94		if (chk->whoTo != net) {
95			continue;
96		}
97		if (chk->sent == SCTP_DATAGRAM_RESEND)
98			cnt_resend++;
99		else if ((chk->sent > SCTP_DATAGRAM_UNSENT) &&
100		    (chk->sent < SCTP_DATAGRAM_RESEND)) {
101			/* pending, may need retran */
102			if (chk->sent_rcv_time.tv_sec > min_wait.tv_sec) {
103				/*
104				 * we have reached a chunk that was sent
105				 * some seconds past our min.. forget it we
106				 * will find no more to send.
107				 */
108				continue;
109			} else if (chk->sent_rcv_time.tv_sec == min_wait.tv_sec) {
110				/*
111				 * we must look at the micro seconds to
112				 * know.
113				 */
114				if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) {
115					/*
116					 * ok it was sent after our boundary
117					 * time.
118					 */
119					continue;
120				}
121			}
122			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_EARLYFR_LOGGING_ENABLE) {
123				sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count,
124				    4, SCTP_FR_MARKED_EARLY);
125			}
126			SCTP_STAT_INCR(sctps_earlyfrmrkretrans);
127			chk->sent = SCTP_DATAGRAM_RESEND;
128			sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
129			/* double book size since we are doing an early FR */
130			chk->book_size_scale++;
131			cnt += chk->send_size;
132			if ((cnt + net->flight_size) > net->cwnd) {
133				/* Mark all we could possibly resend */
134				break;
135			}
136		}
137	}
138	if (cnt) {
139		/*
140		 * JRS - Use the congestion control given in the congestion
141		 * control module
142		 */
143		stcb->asoc.cc_functions.sctp_cwnd_update_after_fr_timer(inp, stcb, net);
144	} else if (cnt_resend) {
145		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_EARLY_FR_TMR, SCTP_SO_NOT_LOCKED);
146	}
147	/* Restart it? */
148	if (net->flight_size < net->cwnd) {
149		SCTP_STAT_INCR(sctps_earlyfrstrtmr);
150		sctp_timer_start(SCTP_TIMER_TYPE_EARLYFR, stcb->sctp_ep, stcb, net);
151	}
152}
153
154void
155sctp_audit_retranmission_queue(struct sctp_association *asoc)
156{
157	struct sctp_tmit_chunk *chk;
158
159	SCTPDBG(SCTP_DEBUG_TIMER4, "Audit invoked on send queue cnt:%d onqueue:%d\n",
160	    asoc->sent_queue_retran_cnt,
161	    asoc->sent_queue_cnt);
162	asoc->sent_queue_retran_cnt = 0;
163	asoc->sent_queue_cnt = 0;
164	TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) {
165		if (chk->sent == SCTP_DATAGRAM_RESEND) {
166			sctp_ucount_incr(asoc->sent_queue_retran_cnt);
167		}
168		asoc->sent_queue_cnt++;
169	}
170	TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) {
171		if (chk->sent == SCTP_DATAGRAM_RESEND) {
172			sctp_ucount_incr(asoc->sent_queue_retran_cnt);
173		}
174	}
175	TAILQ_FOREACH(chk, &asoc->asconf_send_queue, sctp_next) {
176		if (chk->sent == SCTP_DATAGRAM_RESEND) {
177			sctp_ucount_incr(asoc->sent_queue_retran_cnt);
178		}
179	}
180	SCTPDBG(SCTP_DEBUG_TIMER4, "Audit completes retran:%d onqueue:%d\n",
181	    asoc->sent_queue_retran_cnt,
182	    asoc->sent_queue_cnt);
183}
184
185int
186sctp_threshold_management(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
187    struct sctp_nets *net, uint16_t threshold)
188{
189	if (net) {
190		net->error_count++;
191		SCTPDBG(SCTP_DEBUG_TIMER4, "Error count for %p now %d thresh:%d\n",
192		    net, net->error_count,
193		    net->failure_threshold);
194		if (net->error_count > net->failure_threshold) {
195			/* We had a threshold failure */
196			if (net->dest_state & SCTP_ADDR_REACHABLE) {
197				net->dest_state &= ~SCTP_ADDR_REACHABLE;
198				net->dest_state |= SCTP_ADDR_NOT_REACHABLE;
199				net->dest_state &= ~SCTP_ADDR_REQ_PRIMARY;
200				if (net == stcb->asoc.primary_destination) {
201					net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
202				}
203				/*
204				 * JRS 5/14/07 - If a destination is
205				 * unreachable, the PF bit is turned off.
206				 * This allows an unambiguous use of the PF
207				 * bit for destinations that are reachable
208				 * but potentially failed. If the
209				 * destination is set to the unreachable
210				 * state, also set the destination to the PF
211				 * state.
212				 */
213				/*
214				 * Add debug message here if destination is
215				 * not in PF state.
216				 */
217				/* Stop any running T3 timers here? */
218				if (SCTP_BASE_SYSCTL(sctp_cmt_on_off) && SCTP_BASE_SYSCTL(sctp_cmt_pf)) {
219					net->dest_state &= ~SCTP_ADDR_PF;
220					SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to unreachable.\n",
221					    net);
222				}
223				sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN,
224				    stcb,
225				    SCTP_FAILED_THRESHOLD,
226				    (void *)net, SCTP_SO_NOT_LOCKED);
227			}
228		}
229		/*********HOLD THIS COMMENT FOR PATCH OF ALTERNATE
230		 *********ROUTING CODE
231		 */
232		/*********HOLD THIS COMMENT FOR END OF PATCH OF ALTERNATE
233		 *********ROUTING CODE
234		 */
235	}
236	if (stcb == NULL)
237		return (0);
238
239	if (net) {
240		if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0) {
241			if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
242				sctp_misc_ints(SCTP_THRESHOLD_INCR,
243				    stcb->asoc.overall_error_count,
244				    (stcb->asoc.overall_error_count + 1),
245				    SCTP_FROM_SCTP_TIMER,
246				    __LINE__);
247			}
248			stcb->asoc.overall_error_count++;
249		}
250	} else {
251		if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_THRESHOLD_LOGGING) {
252			sctp_misc_ints(SCTP_THRESHOLD_INCR,
253			    stcb->asoc.overall_error_count,
254			    (stcb->asoc.overall_error_count + 1),
255			    SCTP_FROM_SCTP_TIMER,
256			    __LINE__);
257		}
258		stcb->asoc.overall_error_count++;
259	}
260	SCTPDBG(SCTP_DEBUG_TIMER4, "Overall error count for %p now %d thresh:%u state:%x\n",
261	    &stcb->asoc, stcb->asoc.overall_error_count,
262	    (uint32_t) threshold,
263	    ((net == NULL) ? (uint32_t) 0 : (uint32_t) net->dest_state));
264	/*
265	 * We specifically do not do >= to give the assoc one more change
266	 * before we fail it.
267	 */
268	if (stcb->asoc.overall_error_count > threshold) {
269		/* Abort notification sends a ULP notify */
270		struct mbuf *oper;
271
272		oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
273		    0, M_DONTWAIT, 1, MT_DATA);
274		if (oper) {
275			struct sctp_paramhdr *ph;
276			uint32_t *ippp;
277
278			SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
279			    sizeof(uint32_t);
280			ph = mtod(oper, struct sctp_paramhdr *);
281			ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
282			ph->param_length = htons(SCTP_BUF_LEN(oper));
283			ippp = (uint32_t *) (ph + 1);
284			*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_1);
285		}
286		inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_1;
287		sctp_abort_an_association(inp, stcb, SCTP_FAILED_THRESHOLD, oper, SCTP_SO_NOT_LOCKED);
288		return (1);
289	}
290	return (0);
291}
292
293struct sctp_nets *
294sctp_find_alternate_net(struct sctp_tcb *stcb,
295    struct sctp_nets *net,
296    int mode)
297{
298	/* Find and return an alternate network if possible */
299	struct sctp_nets *alt, *mnet, *min_errors_net = NULL, *max_cwnd_net = NULL;
300	int once;
301
302	/* JRS 5/14/07 - Initialize min_errors to an impossible value. */
303	int min_errors = -1;
304	uint32_t max_cwnd = 0;
305
306	if (stcb->asoc.numnets == 1) {
307		/* No others but net */
308		return (TAILQ_FIRST(&stcb->asoc.nets));
309	}
310	/*
311	 * JRS 5/14/07 - If mode is set to 2, use the CMT PF find alternate
312	 * net algorithm. This algorithm chooses the active destination (not
313	 * in PF state) with the largest cwnd value. If all destinations are
314	 * in PF state, unreachable, or unconfirmed, choose the desination
315	 * that is in PF state with the lowest error count. In case of a
316	 * tie, choose the destination that was most recently active.
317	 */
318	if (mode == 2) {
319		TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) {
320			/*
321			 * JRS 5/14/07 - If the destination is unreachable
322			 * or unconfirmed, skip it.
323			 */
324			if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) ||
325			    (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)) {
326				continue;
327			}
328			/*
329			 * JRS 5/14/07 -  If the destination is reachable
330			 * but in PF state, compare the error count of the
331			 * destination to the minimum error count seen thus
332			 * far. Store the destination with the lower error
333			 * count.  If the error counts are equal, store the
334			 * destination that was most recently active.
335			 */
336			if (mnet->dest_state & SCTP_ADDR_PF) {
337				/*
338				 * JRS 5/14/07 - If the destination under
339				 * consideration is the current destination,
340				 * work as if the error count is one higher.
341				 * The actual error count will not be
342				 * incremented until later in the t3
343				 * handler.
344				 */
345				if (mnet == net) {
346					if (min_errors == -1) {
347						min_errors = mnet->error_count + 1;
348						min_errors_net = mnet;
349					} else if (mnet->error_count + 1 < min_errors) {
350						min_errors = mnet->error_count + 1;
351						min_errors_net = mnet;
352					} else if (mnet->error_count + 1 == min_errors
353					    && mnet->last_active > min_errors_net->last_active) {
354						min_errors_net = mnet;
355						min_errors = mnet->error_count + 1;
356					}
357					continue;
358				} else {
359					if (min_errors == -1) {
360						min_errors = mnet->error_count;
361						min_errors_net = mnet;
362					} else if (mnet->error_count < min_errors) {
363						min_errors = mnet->error_count;
364						min_errors_net = mnet;
365					} else if (mnet->error_count == min_errors
366					    && mnet->last_active > min_errors_net->last_active) {
367						min_errors_net = mnet;
368						min_errors = mnet->error_count;
369					}
370					continue;
371				}
372			}
373			/*
374			 * JRS 5/14/07 - If the destination is reachable and
375			 * not in PF state, compare the cwnd of the
376			 * destination to the highest cwnd seen thus far.
377			 * Store the destination with the higher cwnd value.
378			 * If the cwnd values are equal, randomly choose one
379			 * of the two destinations.
380			 */
381			if (max_cwnd < mnet->cwnd) {
382				max_cwnd_net = mnet;
383				max_cwnd = mnet->cwnd;
384			} else if (max_cwnd == mnet->cwnd) {
385				uint32_t rndval;
386				uint8_t this_random;
387
388				if (stcb->asoc.hb_random_idx > 3) {
389					rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
390					memcpy(stcb->asoc.hb_random_values, &rndval, sizeof(stcb->asoc.hb_random_values));
391					this_random = stcb->asoc.hb_random_values[0];
392					stcb->asoc.hb_random_idx++;
393					stcb->asoc.hb_ect_randombit = 0;
394				} else {
395					this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
396					stcb->asoc.hb_random_idx++;
397					stcb->asoc.hb_ect_randombit = 0;
398				}
399				if (this_random % 2 == 1) {
400					max_cwnd_net = mnet;
401					max_cwnd = mnet->cwnd;	/* Useless? */
402				}
403			}
404		}
405		/*
406		 * JRS 5/14/07 - After all destination have been considered
407		 * as alternates, check to see if there was some active
408		 * destination (not in PF state).  If not, check to see if
409		 * there was some PF destination with the minimum number of
410		 * errors.  If not, return the original destination.  If
411		 * there is a min_errors_net, remove the PF flag from that
412		 * destination, set the cwnd to one or two MTUs, and return
413		 * the destination as an alt. If there was some active
414		 * destination with a highest cwnd, return the destination
415		 * as an alt.
416		 */
417		if (max_cwnd_net == NULL) {
418			if (min_errors_net == NULL) {
419				return (net);
420			}
421			min_errors_net->dest_state &= ~SCTP_ADDR_PF;
422			min_errors_net->cwnd = min_errors_net->mtu * SCTP_BASE_SYSCTL(sctp_cmt_pf);
423			if (SCTP_OS_TIMER_PENDING(&min_errors_net->rxt_timer.timer)) {
424				sctp_timer_stop(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep,
425				    stcb, min_errors_net,
426				    SCTP_FROM_SCTP_TIMER + SCTP_LOC_2);
427			}
428			SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from PF to active with %d errors.\n",
429			    min_errors_net, min_errors_net->error_count);
430			return (min_errors_net);
431		} else {
432			return (max_cwnd_net);
433		}
434	}
435	/*
436	 * JRS 5/14/07 - If mode is set to 1, use the CMT policy for
437	 * choosing an alternate net.
438	 */
439	else if (mode == 1) {
440		TAILQ_FOREACH(mnet, &stcb->asoc.nets, sctp_next) {
441			if (((mnet->dest_state & SCTP_ADDR_REACHABLE) != SCTP_ADDR_REACHABLE) ||
442			    (mnet->dest_state & SCTP_ADDR_UNCONFIRMED)
443			    ) {
444				/*
445				 * will skip ones that are not-reachable or
446				 * unconfirmed
447				 */
448				continue;
449			}
450			if (max_cwnd < mnet->cwnd) {
451				max_cwnd_net = mnet;
452				max_cwnd = mnet->cwnd;
453			} else if (max_cwnd == mnet->cwnd) {
454				uint32_t rndval;
455				uint8_t this_random;
456
457				if (stcb->asoc.hb_random_idx > 3) {
458					rndval = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep);
459					memcpy(stcb->asoc.hb_random_values, &rndval,
460					    sizeof(stcb->asoc.hb_random_values));
461					this_random = stcb->asoc.hb_random_values[0];
462					stcb->asoc.hb_random_idx = 0;
463					stcb->asoc.hb_ect_randombit = 0;
464				} else {
465					this_random = stcb->asoc.hb_random_values[stcb->asoc.hb_random_idx];
466					stcb->asoc.hb_random_idx++;
467					stcb->asoc.hb_ect_randombit = 0;
468				}
469				if (this_random % 2) {
470					max_cwnd_net = mnet;
471					max_cwnd = mnet->cwnd;
472				}
473			}
474		}
475		if (max_cwnd_net) {
476			return (max_cwnd_net);
477		}
478	}
479	mnet = net;
480	once = 0;
481
482	if (mnet == NULL) {
483		mnet = TAILQ_FIRST(&stcb->asoc.nets);
484	}
485	do {
486		alt = TAILQ_NEXT(mnet, sctp_next);
487		if (alt == NULL) {
488			once++;
489			if (once > 1) {
490				break;
491			}
492			alt = TAILQ_FIRST(&stcb->asoc.nets);
493		}
494		if (alt->ro.ro_rt == NULL) {
495			if (alt->ro._s_addr) {
496				sctp_free_ifa(alt->ro._s_addr);
497				alt->ro._s_addr = NULL;
498			}
499			alt->src_addr_selected = 0;
500		}
501		if (
502		    ((alt->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE) &&
503		    (alt->ro.ro_rt != NULL) &&
504		/* sa_ignore NO_NULL_CHK */
505		    (!(alt->dest_state & SCTP_ADDR_UNCONFIRMED))
506		    ) {
507			/* Found a reachable address */
508			break;
509		}
510		mnet = alt;
511	} while (alt != NULL);
512
513	if (alt == NULL) {
514		/* Case where NO insv network exists (dormant state) */
515		/* we rotate destinations */
516		once = 0;
517		mnet = net;
518		do {
519			alt = TAILQ_NEXT(mnet, sctp_next);
520			if (alt == NULL) {
521				once++;
522				if (once > 1) {
523					break;
524				}
525				alt = TAILQ_FIRST(&stcb->asoc.nets);
526			}
527			/* sa_ignore NO_NULL_CHK */
528			if ((!(alt->dest_state & SCTP_ADDR_UNCONFIRMED)) &&
529			    (alt != net)) {
530				/* Found an alternate address */
531				break;
532			}
533			mnet = alt;
534		} while (alt != NULL);
535	}
536	if (alt == NULL) {
537		return (net);
538	}
539	return (alt);
540}
541
542
543
544static void
545sctp_backoff_on_timeout(struct sctp_tcb *stcb,
546    struct sctp_nets *net,
547    int win_probe,
548    int num_marked)
549{
550	if (net->RTO == 0) {
551		net->RTO = stcb->asoc.minrto;
552	}
553	net->RTO <<= 1;
554	if (net->RTO > stcb->asoc.maxrto) {
555		net->RTO = stcb->asoc.maxrto;
556	}
557	if ((win_probe == 0) && num_marked) {
558		/* We don't apply penalty to window probe scenarios */
559		/* JRS - Use the congestion control given in the CC module */
560		stcb->asoc.cc_functions.sctp_cwnd_update_after_timeout(stcb, net);
561	}
562}
563
564#ifndef INVARIANTS
565static void
566sctp_recover_sent_list(struct sctp_tcb *stcb)
567{
568	struct sctp_tmit_chunk *chk, *tp2;
569	struct sctp_association *asoc;
570
571	asoc = &stcb->asoc;
572	chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
573	for (; chk != NULL; chk = tp2) {
574		tp2 = TAILQ_NEXT(chk, sctp_next);
575		if ((compare_with_wrap(stcb->asoc.last_acked_seq,
576		    chk->rec.data.TSN_seq,
577		    MAX_TSN)) ||
578		    (stcb->asoc.last_acked_seq == chk->rec.data.TSN_seq)) {
579
580			SCTP_PRINTF("Found chk:%p tsn:%x <= last_acked_seq:%x\n",
581			    chk, chk->rec.data.TSN_seq, stcb->asoc.last_acked_seq);
582			TAILQ_REMOVE(&asoc->sent_queue, chk, sctp_next);
583			if (chk->pr_sctp_on) {
584				if (asoc->pr_sctp_cnt != 0)
585					asoc->pr_sctp_cnt--;
586			}
587			if (chk->data) {
588				/* sa_ignore NO_NULL_CHK */
589				sctp_free_bufspace(stcb, asoc, chk, 1);
590				sctp_m_freem(chk->data);
591				if (PR_SCTP_BUF_ENABLED(chk->flags)) {
592					asoc->sent_queue_cnt_removeable--;
593				}
594			}
595			chk->data = NULL;
596			asoc->sent_queue_cnt--;
597			sctp_free_a_chunk(stcb, chk);
598		}
599	}
600	SCTP_PRINTF("after recover order is as follows\n");
601	chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
602	for (; chk != NULL; chk = tp2) {
603		tp2 = TAILQ_NEXT(chk, sctp_next);
604		SCTP_PRINTF("chk:%p TSN:%x\n", chk, chk->rec.data.TSN_seq);
605	}
606}
607
608#endif
609
610static int
611sctp_mark_all_for_resend(struct sctp_tcb *stcb,
612    struct sctp_nets *net,
613    struct sctp_nets *alt,
614    int window_probe,
615    int *num_marked)
616{
617
618	/*
619	 * Mark all chunks (well not all) that were sent to *net for
620	 * retransmission. Move them to alt for there destination as well...
621	 * We only mark chunks that have been outstanding long enough to
622	 * have received feed-back.
623	 */
624	struct sctp_tmit_chunk *chk, *tp2, *could_be_sent = NULL;
625	struct sctp_nets *lnets;
626	struct timeval now, min_wait, tv;
627	int cur_rtt;
628	int audit_tf, num_mk, fir;
629	unsigned int cnt_mk;
630	uint32_t orig_flight, orig_tf;
631	uint32_t tsnlast, tsnfirst;
632	int recovery_cnt = 0;
633
634
635	/* none in flight now */
636	audit_tf = 0;
637	fir = 0;
638	/*
639	 * figure out how long a data chunk must be pending before we can
640	 * mark it ..
641	 */
642	(void)SCTP_GETTIME_TIMEVAL(&now);
643	/* get cur rto in micro-seconds */
644	cur_rtt = (((net->lastsa >> 2) + net->lastsv) >> 1);
645	cur_rtt *= 1000;
646	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
647		sctp_log_fr(cur_rtt,
648		    stcb->asoc.peers_rwnd,
649		    window_probe,
650		    SCTP_FR_T3_MARK_TIME);
651		sctp_log_fr(net->flight_size,
652		    SCTP_OS_TIMER_PENDING(&net->fr_timer.timer),
653		    SCTP_OS_TIMER_ACTIVE(&net->fr_timer.timer),
654		    SCTP_FR_CWND_REPORT);
655		sctp_log_fr(net->flight_size, net->cwnd, stcb->asoc.total_flight, SCTP_FR_CWND_REPORT);
656	}
657	tv.tv_sec = cur_rtt / 1000000;
658	tv.tv_usec = cur_rtt % 1000000;
659	min_wait = now;
660	timevalsub(&min_wait, &tv);
661	if (min_wait.tv_sec < 0 || min_wait.tv_usec < 0) {
662		/*
663		 * if we hit here, we don't have enough seconds on the clock
664		 * to account for the RTO. We just let the lower seconds be
665		 * the bounds and don't worry about it. This may mean we
666		 * will mark a lot more than we should.
667		 */
668		min_wait.tv_sec = min_wait.tv_usec = 0;
669	}
670	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
671		sctp_log_fr(cur_rtt, now.tv_sec, now.tv_usec, SCTP_FR_T3_MARK_TIME);
672		sctp_log_fr(0, min_wait.tv_sec, min_wait.tv_usec, SCTP_FR_T3_MARK_TIME);
673	}
674	/*
675	 * Our rwnd will be incorrect here since we are not adding back the
676	 * cnt * mbuf but we will fix that down below.
677	 */
678	orig_flight = net->flight_size;
679	orig_tf = stcb->asoc.total_flight;
680
681	net->fast_retran_ip = 0;
682	/* Now on to each chunk */
683	num_mk = cnt_mk = 0;
684	tsnfirst = tsnlast = 0;
685#ifndef INVARIANTS
686start_again:
687#endif
688	chk = TAILQ_FIRST(&stcb->asoc.sent_queue);
689	for (; chk != NULL; chk = tp2) {
690		tp2 = TAILQ_NEXT(chk, sctp_next);
691		if ((compare_with_wrap(stcb->asoc.last_acked_seq,
692		    chk->rec.data.TSN_seq,
693		    MAX_TSN)) ||
694		    (stcb->asoc.last_acked_seq == chk->rec.data.TSN_seq)) {
695			/* Strange case our list got out of order? */
696			SCTP_PRINTF("Our list is out of order? last_acked:%x chk:%x",
697			    (unsigned int)stcb->asoc.last_acked_seq, (unsigned int)chk->rec.data.TSN_seq);
698			recovery_cnt++;
699#ifdef INVARIANTS
700			panic("last acked >= chk on sent-Q");
701#else
702			SCTP_PRINTF("Recover attempts a restart cnt:%d\n", recovery_cnt);
703			sctp_recover_sent_list(stcb);
704			if (recovery_cnt < 10) {
705				goto start_again;
706			} else {
707				SCTP_PRINTF("Recovery fails %d times??\n", recovery_cnt);
708			}
709#endif
710		}
711		if ((chk->whoTo == net) && (chk->sent < SCTP_DATAGRAM_ACKED)) {
712			/*
713			 * found one to mark: If it is less than
714			 * DATAGRAM_ACKED it MUST not be a skipped or marked
715			 * TSN but instead one that is either already set
716			 * for retransmission OR one that needs
717			 * retransmission.
718			 */
719
720			/* validate its been outstanding long enough */
721			if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
722				sctp_log_fr(chk->rec.data.TSN_seq,
723				    chk->sent_rcv_time.tv_sec,
724				    chk->sent_rcv_time.tv_usec,
725				    SCTP_FR_T3_MARK_TIME);
726			}
727			if ((chk->sent_rcv_time.tv_sec > min_wait.tv_sec) && (window_probe == 0)) {
728				/*
729				 * we have reached a chunk that was sent
730				 * some seconds past our min.. forget it we
731				 * will find no more to send.
732				 */
733				if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
734					sctp_log_fr(0,
735					    chk->sent_rcv_time.tv_sec,
736					    chk->sent_rcv_time.tv_usec,
737					    SCTP_FR_T3_STOPPED);
738				}
739				continue;
740			} else if ((chk->sent_rcv_time.tv_sec == min_wait.tv_sec) &&
741			    (window_probe == 0)) {
742				/*
743				 * we must look at the micro seconds to
744				 * know.
745				 */
746				if (chk->sent_rcv_time.tv_usec >= min_wait.tv_usec) {
747					/*
748					 * ok it was sent after our boundary
749					 * time.
750					 */
751					if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
752						sctp_log_fr(0,
753						    chk->sent_rcv_time.tv_sec,
754						    chk->sent_rcv_time.tv_usec,
755						    SCTP_FR_T3_STOPPED);
756					}
757					continue;
758				}
759			}
760			if (PR_SCTP_TTL_ENABLED(chk->flags)) {
761				/* Is it expired? */
762				if ((now.tv_sec > chk->rec.data.timetodrop.tv_sec) ||
763				    ((chk->rec.data.timetodrop.tv_sec == now.tv_sec) &&
764				    (now.tv_usec > chk->rec.data.timetodrop.tv_usec))) {
765					/* Yes so drop it */
766					if (chk->data) {
767						(void)sctp_release_pr_sctp_chunk(stcb,
768						    chk,
769						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
770						    SCTP_SO_NOT_LOCKED);
771					}
772					continue;
773				}
774			}
775			if (PR_SCTP_RTX_ENABLED(chk->flags)) {
776				/* Has it been retransmitted tv_sec times? */
777				if (chk->snd_count > chk->rec.data.timetodrop.tv_sec) {
778					if (chk->data) {
779						(void)sctp_release_pr_sctp_chunk(stcb,
780						    chk,
781						    (SCTP_RESPONSE_TO_USER_REQ | SCTP_NOTIFY_DATAGRAM_SENT),
782						    SCTP_SO_NOT_LOCKED);
783					}
784					continue;
785				}
786			}
787			if (chk->sent < SCTP_DATAGRAM_RESEND) {
788				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
789				num_mk++;
790				if (fir == 0) {
791					fir = 1;
792					tsnfirst = chk->rec.data.TSN_seq;
793				}
794				tsnlast = chk->rec.data.TSN_seq;
795				if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
796					sctp_log_fr(chk->rec.data.TSN_seq, chk->snd_count,
797					    0, SCTP_FR_T3_MARKED);
798				}
799				if (chk->rec.data.chunk_was_revoked) {
800					/* deflate the cwnd */
801					chk->whoTo->cwnd -= chk->book_size;
802					chk->rec.data.chunk_was_revoked = 0;
803				}
804				net->marked_retrans++;
805				stcb->asoc.marked_retrans++;
806				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
807					sctp_misc_ints(SCTP_FLIGHT_LOG_DOWN_RSND_TO,
808					    chk->whoTo->flight_size,
809					    chk->book_size,
810					    (uintptr_t) chk->whoTo,
811					    chk->rec.data.TSN_seq);
812				}
813				sctp_flight_size_decrease(chk);
814				sctp_total_flight_decrease(stcb, chk);
815				stcb->asoc.peers_rwnd += chk->send_size;
816				stcb->asoc.peers_rwnd += SCTP_BASE_SYSCTL(sctp_peer_chunk_oh);
817			}
818			chk->sent = SCTP_DATAGRAM_RESEND;
819			SCTP_STAT_INCR(sctps_markedretrans);
820
821			/* reset the TSN for striking and other FR stuff */
822			chk->rec.data.doing_fast_retransmit = 0;
823			/* Clear any time so NO RTT is being done */
824			chk->do_rtt = 0;
825			if (alt != net) {
826				sctp_free_remote_addr(chk->whoTo);
827				chk->no_fr_allowed = 1;
828				chk->whoTo = alt;
829				atomic_add_int(&alt->ref_count, 1);
830			} else {
831				chk->no_fr_allowed = 0;
832				if (TAILQ_EMPTY(&stcb->asoc.send_queue)) {
833					chk->rec.data.fast_retran_tsn = stcb->asoc.sending_seq;
834				} else {
835					chk->rec.data.fast_retran_tsn = (TAILQ_FIRST(&stcb->asoc.send_queue))->rec.data.TSN_seq;
836				}
837			}
838			/*
839			 * CMT: Do not allow FRs on retransmitted TSNs.
840			 */
841			if (SCTP_BASE_SYSCTL(sctp_cmt_on_off) == 1) {
842				chk->no_fr_allowed = 1;
843			}
844		} else if (chk->sent == SCTP_DATAGRAM_ACKED) {
845			/* remember highest acked one */
846			could_be_sent = chk;
847		}
848		if (chk->sent == SCTP_DATAGRAM_RESEND) {
849			cnt_mk++;
850		}
851	}
852	if ((orig_flight - net->flight_size) != (orig_tf - stcb->asoc.total_flight)) {
853		/* we did not subtract the same things? */
854		audit_tf = 1;
855	}
856	if (SCTP_BASE_SYSCTL(sctp_logging_level) & (SCTP_EARLYFR_LOGGING_ENABLE | SCTP_FR_LOGGING_ENABLE)) {
857		sctp_log_fr(tsnfirst, tsnlast, num_mk, SCTP_FR_T3_TIMEOUT);
858	}
859#ifdef SCTP_DEBUG
860	if (num_mk) {
861		SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n",
862		    tsnlast);
863		SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%ld\n",
864		    num_mk, (u_long)stcb->asoc.peers_rwnd);
865		SCTPDBG(SCTP_DEBUG_TIMER1, "LAST TSN marked was %x\n",
866		    tsnlast);
867		SCTPDBG(SCTP_DEBUG_TIMER1, "Num marked for retransmission was %d peer-rwd:%d\n",
868		    num_mk,
869		    (int)stcb->asoc.peers_rwnd);
870	}
871#endif
872	*num_marked = num_mk;
873	if ((stcb->asoc.sent_queue_retran_cnt == 0) && (could_be_sent)) {
874		/* fix it so we retransmit the highest acked anyway */
875		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
876		cnt_mk++;
877		could_be_sent->sent = SCTP_DATAGRAM_RESEND;
878	}
879	if (stcb->asoc.sent_queue_retran_cnt != cnt_mk) {
880#ifdef INVARIANTS
881		SCTP_PRINTF("Local Audit says there are %d for retran asoc cnt:%d we marked:%d this time\n",
882		    cnt_mk, stcb->asoc.sent_queue_retran_cnt, num_mk);
883#endif
884#ifndef SCTP_AUDITING_ENABLED
885		stcb->asoc.sent_queue_retran_cnt = cnt_mk;
886#endif
887	}
888	/* Now check for a ECN Echo that may be stranded */
889	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
890		if ((chk->whoTo == net) &&
891		    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
892			sctp_free_remote_addr(chk->whoTo);
893			chk->whoTo = alt;
894			if (chk->sent != SCTP_DATAGRAM_RESEND) {
895				chk->sent = SCTP_DATAGRAM_RESEND;
896				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
897			}
898			atomic_add_int(&alt->ref_count, 1);
899		}
900	}
901	if (audit_tf) {
902		SCTPDBG(SCTP_DEBUG_TIMER4,
903		    "Audit total flight due to negative value net:%p\n",
904		    net);
905		stcb->asoc.total_flight = 0;
906		stcb->asoc.total_flight_count = 0;
907		/* Clear all networks flight size */
908		TAILQ_FOREACH(lnets, &stcb->asoc.nets, sctp_next) {
909			lnets->flight_size = 0;
910			SCTPDBG(SCTP_DEBUG_TIMER4,
911			    "Net:%p c-f cwnd:%d ssthresh:%d\n",
912			    lnets, lnets->cwnd, lnets->ssthresh);
913		}
914		TAILQ_FOREACH(chk, &stcb->asoc.sent_queue, sctp_next) {
915			if (chk->sent < SCTP_DATAGRAM_RESEND) {
916				if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) {
917					sctp_misc_ints(SCTP_FLIGHT_LOG_UP,
918					    chk->whoTo->flight_size,
919					    chk->book_size,
920					    (uintptr_t) chk->whoTo,
921					    chk->rec.data.TSN_seq);
922				}
923				sctp_flight_size_increase(chk);
924				sctp_total_flight_increase(stcb, chk);
925			}
926		}
927	}
928	/*
929	 * Setup the ecn nonce re-sync point. We do this since
930	 * retranmissions are NOT setup for ECN. This means that do to
931	 * Karn's rule, we don't know the total of the peers ecn bits.
932	 */
933	chk = TAILQ_FIRST(&stcb->asoc.send_queue);
934	if (chk == NULL) {
935		stcb->asoc.nonce_resync_tsn = stcb->asoc.sending_seq;
936	} else {
937		stcb->asoc.nonce_resync_tsn = chk->rec.data.TSN_seq;
938	}
939	stcb->asoc.nonce_wait_for_ecne = 0;
940	stcb->asoc.nonce_sum_check = 0;
941	/* We return 1 if we only have a window probe outstanding */
942	return (0);
943}
944
945static void
946sctp_move_all_chunks_to_alt(struct sctp_tcb *stcb,
947    struct sctp_nets *net,
948    struct sctp_nets *alt)
949{
950	struct sctp_association *asoc;
951	struct sctp_stream_out *outs;
952	struct sctp_tmit_chunk *chk;
953	struct sctp_stream_queue_pending *sp;
954
955	if (net == alt)
956		/* nothing to do */
957		return;
958
959	asoc = &stcb->asoc;
960
961	/*
962	 * now through all the streams checking for chunks sent to our bad
963	 * network.
964	 */
965	TAILQ_FOREACH(outs, &asoc->out_wheel, next_spoke) {
966		/* now clean up any chunks here */
967		TAILQ_FOREACH(sp, &outs->outqueue, next) {
968			if (sp->net == net) {
969				sctp_free_remote_addr(sp->net);
970				sp->net = alt;
971				atomic_add_int(&alt->ref_count, 1);
972			}
973		}
974	}
975	/* Now check the pending queue */
976	TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) {
977		if (chk->whoTo == net) {
978			sctp_free_remote_addr(chk->whoTo);
979			chk->whoTo = alt;
980			atomic_add_int(&alt->ref_count, 1);
981		}
982	}
983
984}
985
986int
987sctp_t3rxt_timer(struct sctp_inpcb *inp,
988    struct sctp_tcb *stcb,
989    struct sctp_nets *net)
990{
991	struct sctp_nets *alt;
992	int win_probe, num_mk;
993
994	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) {
995		sctp_log_fr(0, 0, 0, SCTP_FR_T3_TIMEOUT);
996	}
997	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) {
998		struct sctp_nets *lnet;
999
1000		TAILQ_FOREACH(lnet, &stcb->asoc.nets, sctp_next) {
1001			if (net == lnet) {
1002				sctp_log_cwnd(stcb, lnet, 1, SCTP_CWND_LOG_FROM_T3);
1003			} else {
1004				sctp_log_cwnd(stcb, lnet, 0, SCTP_CWND_LOG_FROM_T3);
1005			}
1006		}
1007	}
1008	/* Find an alternate and mark those for retransmission */
1009	if ((stcb->asoc.peers_rwnd == 0) &&
1010	    (stcb->asoc.total_flight < net->mtu)) {
1011		SCTP_STAT_INCR(sctps_timowindowprobe);
1012		win_probe = 1;
1013	} else {
1014		win_probe = 0;
1015	}
1016
1017	/*
1018	 * JRS 5/14/07 - If CMT PF is on and the destination if not already
1019	 * in PF state, set the destination to PF state and store the
1020	 * current time as the time that the destination was last active. In
1021	 * addition, find an alternate destination with PF-based
1022	 * find_alt_net().
1023	 */
1024	if (SCTP_BASE_SYSCTL(sctp_cmt_on_off) && SCTP_BASE_SYSCTL(sctp_cmt_pf)) {
1025		if ((net->dest_state & SCTP_ADDR_PF) != SCTP_ADDR_PF) {
1026			net->dest_state |= SCTP_ADDR_PF;
1027			net->last_active = sctp_get_tick_count();
1028			SCTPDBG(SCTP_DEBUG_TIMER4, "Destination %p moved from active to PF.\n",
1029			    net);
1030		}
1031		alt = sctp_find_alternate_net(stcb, net, 2);
1032	} else if (SCTP_BASE_SYSCTL(sctp_cmt_on_off)) {
1033		/*
1034		 * CMT: Using RTX_SSTHRESH policy for CMT. If CMT is being
1035		 * used, then pick dest with largest ssthresh for any
1036		 * retransmission.
1037		 */
1038		alt = net;
1039		alt = sctp_find_alternate_net(stcb, alt, 1);
1040		/*
1041		 * CUCv2: If a different dest is picked for the
1042		 * retransmission, then new (rtx-)pseudo_cumack needs to be
1043		 * tracked for orig dest. Let CUCv2 track new (rtx-)
1044		 * pseudo-cumack always.
1045		 */
1046		net->find_pseudo_cumack = 1;
1047		net->find_rtx_pseudo_cumack = 1;
1048	} else {		/* CMT is OFF */
1049		alt = sctp_find_alternate_net(stcb, net, 0);
1050	}
1051
1052	(void)sctp_mark_all_for_resend(stcb, net, alt, win_probe, &num_mk);
1053	/* FR Loss recovery just ended with the T3. */
1054	stcb->asoc.fast_retran_loss_recovery = 0;
1055
1056	/* CMT FR loss recovery ended with the T3 */
1057	net->fast_retran_loss_recovery = 0;
1058
1059	/*
1060	 * setup the sat loss recovery that prevents satellite cwnd advance.
1061	 */
1062	stcb->asoc.sat_t3_loss_recovery = 1;
1063	stcb->asoc.sat_t3_recovery_tsn = stcb->asoc.sending_seq;
1064
1065	/* Backoff the timer and cwnd */
1066	sctp_backoff_on_timeout(stcb, net, win_probe, num_mk);
1067	if (win_probe == 0) {
1068		/* We don't do normal threshold management on window probes */
1069		if (sctp_threshold_management(inp, stcb, net,
1070		    stcb->asoc.max_send_times)) {
1071			/* Association was destroyed */
1072			return (1);
1073		} else {
1074			if (net != stcb->asoc.primary_destination) {
1075				/* send a immediate HB if our RTO is stale */
1076				struct timeval now;
1077				unsigned int ms_goneby;
1078
1079				(void)SCTP_GETTIME_TIMEVAL(&now);
1080				if (net->last_sent_time.tv_sec) {
1081					ms_goneby = (now.tv_sec - net->last_sent_time.tv_sec) * 1000;
1082				} else {
1083					ms_goneby = 0;
1084				}
1085				if ((ms_goneby > net->RTO) || (net->RTO == 0)) {
1086					/*
1087					 * no recent feed back in an RTO or
1088					 * more, request a RTT update
1089					 */
1090					if (sctp_send_hb(stcb, 1, net) < 0)
1091						/*
1092						 * Less than 0 means we lost
1093						 * the assoc
1094						 */
1095						return (1);
1096				}
1097			}
1098		}
1099	} else {
1100		/*
1101		 * For a window probe we don't penalize the net's but only
1102		 * the association. This may fail it if SACKs are not coming
1103		 * back. If sack's are coming with rwnd locked at 0, we will
1104		 * continue to hold things waiting for rwnd to raise
1105		 */
1106		if (sctp_threshold_management(inp, stcb, NULL,
1107		    stcb->asoc.max_send_times)) {
1108			/* Association was destroyed */
1109			return (1);
1110		}
1111	}
1112	if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
1113		/* Move all pending over too */
1114		sctp_move_all_chunks_to_alt(stcb, net, alt);
1115
1116		/*
1117		 * Get the address that failed, to force a new src address
1118		 * selecton and a route allocation.
1119		 */
1120		if (net->ro._s_addr) {
1121			sctp_free_ifa(net->ro._s_addr);
1122			net->ro._s_addr = NULL;
1123		}
1124		net->src_addr_selected = 0;
1125
1126		/* Force a route allocation too */
1127		if (net->ro.ro_rt) {
1128			RTFREE(net->ro.ro_rt);
1129			net->ro.ro_rt = NULL;
1130		}
1131		/* Was it our primary? */
1132		if ((stcb->asoc.primary_destination == net) && (alt != net)) {
1133			/*
1134			 * Yes, note it as such and find an alternate note:
1135			 * this means HB code must use this to resent the
1136			 * primary if it goes active AND if someone does a
1137			 * change-primary then this flag must be cleared
1138			 * from any net structures.
1139			 */
1140			if (sctp_set_primary_addr(stcb,
1141			    (struct sockaddr *)NULL,
1142			    alt) == 0) {
1143				net->dest_state |= SCTP_ADDR_WAS_PRIMARY;
1144			}
1145		}
1146	} else if (SCTP_BASE_SYSCTL(sctp_cmt_on_off) && SCTP_BASE_SYSCTL(sctp_cmt_pf) && (net->dest_state & SCTP_ADDR_PF) == SCTP_ADDR_PF) {
1147		/*
1148		 * JRS 5/14/07 - If the destination hasn't failed completely
1149		 * but is in PF state, a PF-heartbeat needs to be sent
1150		 * manually.
1151		 */
1152		if (sctp_send_hb(stcb, 1, net) < 0)
1153			/* Return less than 0 means we lost the association */
1154			return (1);
1155	}
1156	/*
1157	 * Special case for cookie-echo'ed case, we don't do output but must
1158	 * await the COOKIE-ACK before retransmission
1159	 */
1160	if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
1161		/*
1162		 * Here we just reset the timer and start again since we
1163		 * have not established the asoc
1164		 */
1165		sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net);
1166		return (0);
1167	}
1168	if (stcb->asoc.peer_supports_prsctp) {
1169		struct sctp_tmit_chunk *lchk;
1170
1171		lchk = sctp_try_advance_peer_ack_point(stcb, &stcb->asoc);
1172		/* C3. See if we need to send a Fwd-TSN */
1173		if (compare_with_wrap(stcb->asoc.advanced_peer_ack_point,
1174		    stcb->asoc.last_acked_seq, MAX_TSN)) {
1175			/*
1176			 * ISSUE with ECN, see FWD-TSN processing for notes
1177			 * on issues that will occur when the ECN NONCE
1178			 * stuff is put into SCTP for cross checking.
1179			 */
1180			send_forward_tsn(stcb, &stcb->asoc);
1181			if (lchk) {
1182				/* Assure a timer is up */
1183				sctp_timer_start(SCTP_TIMER_TYPE_SEND, stcb->sctp_ep, stcb, lchk->whoTo);
1184			}
1185		}
1186	}
1187	if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_MONITOR_ENABLE) {
1188		sctp_log_cwnd(stcb, net, net->cwnd, SCTP_CWND_LOG_FROM_RTX);
1189	}
1190	return (0);
1191}
1192
1193int
1194sctp_t1init_timer(struct sctp_inpcb *inp,
1195    struct sctp_tcb *stcb,
1196    struct sctp_nets *net)
1197{
1198	/* bump the thresholds */
1199	if (stcb->asoc.delayed_connection) {
1200		/*
1201		 * special hook for delayed connection. The library did NOT
1202		 * complete the rest of its sends.
1203		 */
1204		stcb->asoc.delayed_connection = 0;
1205		sctp_send_initiate(inp, stcb, SCTP_SO_NOT_LOCKED);
1206		return (0);
1207	}
1208	if (SCTP_GET_STATE((&stcb->asoc)) != SCTP_STATE_COOKIE_WAIT) {
1209		return (0);
1210	}
1211	if (sctp_threshold_management(inp, stcb, net,
1212	    stcb->asoc.max_init_times)) {
1213		/* Association was destroyed */
1214		return (1);
1215	}
1216	stcb->asoc.dropped_special_cnt = 0;
1217	sctp_backoff_on_timeout(stcb, stcb->asoc.primary_destination, 1, 0);
1218	if (stcb->asoc.initial_init_rto_max < net->RTO) {
1219		net->RTO = stcb->asoc.initial_init_rto_max;
1220	}
1221	if (stcb->asoc.numnets > 1) {
1222		/* If we have more than one addr use it */
1223		struct sctp_nets *alt;
1224
1225		alt = sctp_find_alternate_net(stcb, stcb->asoc.primary_destination, 0);
1226		if ((alt != NULL) && (alt != stcb->asoc.primary_destination)) {
1227			sctp_move_all_chunks_to_alt(stcb, stcb->asoc.primary_destination, alt);
1228			stcb->asoc.primary_destination = alt;
1229		}
1230	}
1231	/* Send out a new init */
1232	sctp_send_initiate(inp, stcb, SCTP_SO_NOT_LOCKED);
1233	return (0);
1234}
1235
1236/*
1237 * For cookie and asconf we actually need to find and mark for resend, then
1238 * increment the resend counter (after all the threshold management stuff of
1239 * course).
1240 */
1241int
1242sctp_cookie_timer(struct sctp_inpcb *inp,
1243    struct sctp_tcb *stcb,
1244    struct sctp_nets *net)
1245{
1246	struct sctp_nets *alt;
1247	struct sctp_tmit_chunk *cookie;
1248
1249	/* first before all else we must find the cookie */
1250	TAILQ_FOREACH(cookie, &stcb->asoc.control_send_queue, sctp_next) {
1251		if (cookie->rec.chunk_id.id == SCTP_COOKIE_ECHO) {
1252			break;
1253		}
1254	}
1255	if (cookie == NULL) {
1256		if (SCTP_GET_STATE(&stcb->asoc) == SCTP_STATE_COOKIE_ECHOED) {
1257			/* FOOBAR! */
1258			struct mbuf *oper;
1259
1260			oper = sctp_get_mbuf_for_msg((sizeof(struct sctp_paramhdr) + sizeof(uint32_t)),
1261			    0, M_DONTWAIT, 1, MT_DATA);
1262			if (oper) {
1263				struct sctp_paramhdr *ph;
1264				uint32_t *ippp;
1265
1266				SCTP_BUF_LEN(oper) = sizeof(struct sctp_paramhdr) +
1267				    sizeof(uint32_t);
1268				ph = mtod(oper, struct sctp_paramhdr *);
1269				ph->param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION);
1270				ph->param_length = htons(SCTP_BUF_LEN(oper));
1271				ippp = (uint32_t *) (ph + 1);
1272				*ippp = htonl(SCTP_FROM_SCTP_TIMER + SCTP_LOC_3);
1273			}
1274			inp->last_abort_code = SCTP_FROM_SCTP_TIMER + SCTP_LOC_4;
1275			sctp_abort_an_association(inp, stcb, SCTP_INTERNAL_ERROR,
1276			    oper, SCTP_SO_NOT_LOCKED);
1277		} else {
1278#ifdef INVARIANTS
1279			panic("Cookie timer expires in wrong state?");
1280#else
1281			SCTP_PRINTF("Strange in state %d not cookie-echoed yet c-e timer expires?\n", SCTP_GET_STATE(&stcb->asoc));
1282			return (0);
1283#endif
1284		}
1285		return (0);
1286	}
1287	/* Ok we found the cookie, threshold management next */
1288	if (sctp_threshold_management(inp, stcb, cookie->whoTo,
1289	    stcb->asoc.max_init_times)) {
1290		/* Assoc is over */
1291		return (1);
1292	}
1293	/*
1294	 * cleared theshold management now lets backoff the address & select
1295	 * an alternate
1296	 */
1297	stcb->asoc.dropped_special_cnt = 0;
1298	sctp_backoff_on_timeout(stcb, cookie->whoTo, 1, 0);
1299	alt = sctp_find_alternate_net(stcb, cookie->whoTo, 0);
1300	if (alt != cookie->whoTo) {
1301		sctp_free_remote_addr(cookie->whoTo);
1302		cookie->whoTo = alt;
1303		atomic_add_int(&alt->ref_count, 1);
1304	}
1305	/* Now mark the retran info */
1306	if (cookie->sent != SCTP_DATAGRAM_RESEND) {
1307		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1308	}
1309	cookie->sent = SCTP_DATAGRAM_RESEND;
1310	/*
1311	 * Now call the output routine to kick out the cookie again, Note we
1312	 * don't mark any chunks for retran so that FR will need to kick in
1313	 * to move these (or a send timer).
1314	 */
1315	return (0);
1316}
1317
1318int
1319sctp_strreset_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1320    struct sctp_nets *net)
1321{
1322	struct sctp_nets *alt;
1323	struct sctp_tmit_chunk *strrst = NULL, *chk = NULL;
1324
1325	if (stcb->asoc.stream_reset_outstanding == 0) {
1326		return (0);
1327	}
1328	/* find the existing STRRESET, we use the seq number we sent out on */
1329	(void)sctp_find_stream_reset(stcb, stcb->asoc.str_reset_seq_out, &strrst);
1330	if (strrst == NULL) {
1331		return (0);
1332	}
1333	/* do threshold management */
1334	if (sctp_threshold_management(inp, stcb, strrst->whoTo,
1335	    stcb->asoc.max_send_times)) {
1336		/* Assoc is over */
1337		return (1);
1338	}
1339	/*
1340	 * cleared theshold management now lets backoff the address & select
1341	 * an alternate
1342	 */
1343	sctp_backoff_on_timeout(stcb, strrst->whoTo, 1, 0);
1344	alt = sctp_find_alternate_net(stcb, strrst->whoTo, 0);
1345	sctp_free_remote_addr(strrst->whoTo);
1346	strrst->whoTo = alt;
1347	atomic_add_int(&alt->ref_count, 1);
1348
1349	/* See if a ECN Echo is also stranded */
1350	TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
1351		if ((chk->whoTo == net) &&
1352		    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
1353			sctp_free_remote_addr(chk->whoTo);
1354			if (chk->sent != SCTP_DATAGRAM_RESEND) {
1355				chk->sent = SCTP_DATAGRAM_RESEND;
1356				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1357			}
1358			chk->whoTo = alt;
1359			atomic_add_int(&alt->ref_count, 1);
1360		}
1361	}
1362	if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
1363		/*
1364		 * If the address went un-reachable, we need to move to
1365		 * alternates for ALL chk's in queue
1366		 */
1367		sctp_move_all_chunks_to_alt(stcb, net, alt);
1368	}
1369	/* mark the retran info */
1370	if (strrst->sent != SCTP_DATAGRAM_RESEND)
1371		sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1372	strrst->sent = SCTP_DATAGRAM_RESEND;
1373
1374	/* restart the timer */
1375	sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, inp, stcb, strrst->whoTo);
1376	return (0);
1377}
1378
1379int
1380sctp_asconf_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1381    struct sctp_nets *net)
1382{
1383	struct sctp_nets *alt;
1384	struct sctp_tmit_chunk *asconf, *chk, *nchk;
1385
1386	/* is this a first send, or a retransmission? */
1387	if (TAILQ_EMPTY(&stcb->asoc.asconf_send_queue)) {
1388		/* compose a new ASCONF chunk and send it */
1389		sctp_send_asconf(stcb, net, SCTP_ADDR_NOT_LOCKED);
1390	} else {
1391		/*
1392		 * Retransmission of the existing ASCONF is needed
1393		 */
1394
1395		/* find the existing ASCONF */
1396		asconf = TAILQ_FIRST(&stcb->asoc.asconf_send_queue);
1397		if (asconf == NULL) {
1398			return (0);
1399		}
1400		/* do threshold management */
1401		if (sctp_threshold_management(inp, stcb, asconf->whoTo,
1402		    stcb->asoc.max_send_times)) {
1403			/* Assoc is over */
1404			return (1);
1405		}
1406		if (asconf->snd_count > stcb->asoc.max_send_times) {
1407			/*
1408			 * Something is rotten: our peer is not responding
1409			 * to ASCONFs but apparently is to other chunks.
1410			 * i.e. it is not properly handling the chunk type
1411			 * upper bits. Mark this peer as ASCONF incapable
1412			 * and cleanup.
1413			 */
1414			SCTPDBG(SCTP_DEBUG_TIMER1, "asconf_timer: Peer has not responded to our repeated ASCONFs\n");
1415			sctp_asconf_cleanup(stcb, net);
1416			return (0);
1417		}
1418		/*
1419		 * cleared threshold management, so now backoff the net and
1420		 * select an alternate
1421		 */
1422		sctp_backoff_on_timeout(stcb, asconf->whoTo, 1, 0);
1423		alt = sctp_find_alternate_net(stcb, asconf->whoTo, 0);
1424		if (asconf->whoTo != alt) {
1425			sctp_free_remote_addr(asconf->whoTo);
1426			asconf->whoTo = alt;
1427			atomic_add_int(&alt->ref_count, 1);
1428		}
1429		/* See if an ECN Echo is also stranded */
1430		TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) {
1431			if ((chk->whoTo == net) &&
1432			    (chk->rec.chunk_id.id == SCTP_ECN_ECHO)) {
1433				sctp_free_remote_addr(chk->whoTo);
1434				chk->whoTo = alt;
1435				if (chk->sent != SCTP_DATAGRAM_RESEND) {
1436					chk->sent = SCTP_DATAGRAM_RESEND;
1437					sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1438				}
1439				atomic_add_int(&alt->ref_count, 1);
1440			}
1441		}
1442		for (chk = asconf; chk; chk = nchk) {
1443			nchk = TAILQ_NEXT(chk, sctp_next);
1444			if (chk->whoTo != alt) {
1445				sctp_free_remote_addr(chk->whoTo);
1446				chk->whoTo = alt;
1447				atomic_add_int(&alt->ref_count, 1);
1448			}
1449			if (asconf->sent != SCTP_DATAGRAM_RESEND && chk->sent != SCTP_DATAGRAM_UNSENT)
1450				sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1451			chk->sent = SCTP_DATAGRAM_RESEND;
1452		}
1453		if (net->dest_state & SCTP_ADDR_NOT_REACHABLE) {
1454			/*
1455			 * If the address went un-reachable, we need to move
1456			 * to the alternate for ALL chunks in queue
1457			 */
1458			sctp_move_all_chunks_to_alt(stcb, net, alt);
1459			net = alt;
1460		}
1461		/* mark the retran info */
1462		if (asconf->sent != SCTP_DATAGRAM_RESEND)
1463			sctp_ucount_incr(stcb->asoc.sent_queue_retran_cnt);
1464		asconf->sent = SCTP_DATAGRAM_RESEND;
1465
1466		/* send another ASCONF if any and we can do */
1467		sctp_send_asconf(stcb, alt, SCTP_ADDR_NOT_LOCKED);
1468	}
1469	return (0);
1470}
1471
1472/* Mobility adaptation */
1473void
1474sctp_delete_prim_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1475    struct sctp_nets *net)
1476{
1477	if (stcb->asoc.deleted_primary == NULL) {
1478		SCTPDBG(SCTP_DEBUG_ASCONF1, "delete_prim_timer: deleted_primary is not stored...\n");
1479		sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
1480		return;
1481	}
1482	SCTPDBG(SCTP_DEBUG_ASCONF1, "delete_prim_timer: finished to keep deleted primary ");
1483	SCTPDBG_ADDR(SCTP_DEBUG_ASCONF1, &stcb->asoc.deleted_primary->ro._l_addr.sa);
1484	sctp_free_remote_addr(stcb->asoc.deleted_primary);
1485	stcb->asoc.deleted_primary = NULL;
1486	sctp_mobility_feature_off(inp, SCTP_MOBILITY_PRIM_DELETED);
1487	return;
1488}
1489
1490/*
1491 * For the shutdown and shutdown-ack, we do not keep one around on the
1492 * control queue. This means we must generate a new one and call the general
1493 * chunk output routine, AFTER having done threshold management.
1494 */
1495int
1496sctp_shutdown_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1497    struct sctp_nets *net)
1498{
1499	struct sctp_nets *alt;
1500
1501	/* first threshold managment */
1502	if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
1503		/* Assoc is over */
1504		return (1);
1505	}
1506	/* second select an alternative */
1507	alt = sctp_find_alternate_net(stcb, net, 0);
1508
1509	/* third generate a shutdown into the queue for out net */
1510	if (alt) {
1511		sctp_send_shutdown(stcb, alt);
1512	} else {
1513		/*
1514		 * if alt is NULL, there is no dest to send to??
1515		 */
1516		return (0);
1517	}
1518	/* fourth restart timer */
1519	sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, inp, stcb, alt);
1520	return (0);
1521}
1522
1523int
1524sctp_shutdownack_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1525    struct sctp_nets *net)
1526{
1527	struct sctp_nets *alt;
1528
1529	/* first threshold managment */
1530	if (sctp_threshold_management(inp, stcb, net, stcb->asoc.max_send_times)) {
1531		/* Assoc is over */
1532		return (1);
1533	}
1534	/* second select an alternative */
1535	alt = sctp_find_alternate_net(stcb, net, 0);
1536
1537	/* third generate a shutdown into the queue for out net */
1538	sctp_send_shutdown_ack(stcb, alt);
1539
1540	/* fourth restart timer */
1541	sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNACK, inp, stcb, alt);
1542	return (0);
1543}
1544
1545static void
1546sctp_audit_stream_queues_for_size(struct sctp_inpcb *inp,
1547    struct sctp_tcb *stcb)
1548{
1549	struct sctp_stream_out *outs;
1550	struct sctp_stream_queue_pending *sp;
1551	unsigned int chks_in_queue = 0;
1552	int being_filled = 0;
1553
1554	/*
1555	 * This function is ONLY called when the send/sent queues are empty.
1556	 */
1557	if ((stcb == NULL) || (inp == NULL))
1558		return;
1559
1560	if (stcb->asoc.sent_queue_retran_cnt) {
1561		SCTP_PRINTF("Hmm, sent_queue_retran_cnt is non-zero %d\n",
1562		    stcb->asoc.sent_queue_retran_cnt);
1563		stcb->asoc.sent_queue_retran_cnt = 0;
1564	}
1565	SCTP_TCB_SEND_LOCK(stcb);
1566	if (TAILQ_EMPTY(&stcb->asoc.out_wheel)) {
1567		int i, cnt = 0;
1568
1569		/* Check to see if a spoke fell off the wheel */
1570		for (i = 0; i < stcb->asoc.streamoutcnt; i++) {
1571			if (!TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) {
1572				sctp_insert_on_wheel(stcb, &stcb->asoc, &stcb->asoc.strmout[i], 1);
1573				cnt++;
1574			}
1575		}
1576		if (cnt) {
1577			/* yep, we lost a spoke or two */
1578			SCTP_PRINTF("Found an additional %d streams NOT on outwheel, corrected\n", cnt);
1579		} else {
1580			/* no spokes lost, */
1581			stcb->asoc.total_output_queue_size = 0;
1582		}
1583		SCTP_TCB_SEND_UNLOCK(stcb);
1584		return;
1585	}
1586	SCTP_TCB_SEND_UNLOCK(stcb);
1587	/* Check to see if some data queued, if so report it */
1588	TAILQ_FOREACH(outs, &stcb->asoc.out_wheel, next_spoke) {
1589		if (!TAILQ_EMPTY(&outs->outqueue)) {
1590			TAILQ_FOREACH(sp, &outs->outqueue, next) {
1591				if (sp->msg_is_complete)
1592					being_filled++;
1593				chks_in_queue++;
1594			}
1595		}
1596	}
1597	if (chks_in_queue != stcb->asoc.stream_queue_cnt) {
1598		SCTP_PRINTF("Hmm, stream queue cnt at %d I counted %d in stream out wheel\n",
1599		    stcb->asoc.stream_queue_cnt, chks_in_queue);
1600	}
1601	if (chks_in_queue) {
1602		/* call the output queue function */
1603		sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
1604		if ((TAILQ_EMPTY(&stcb->asoc.send_queue)) &&
1605		    (TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
1606			/*
1607			 * Probably should go in and make it go back through
1608			 * and add fragments allowed
1609			 */
1610			if (being_filled == 0) {
1611				SCTP_PRINTF("Still nothing moved %d chunks are stuck\n",
1612				    chks_in_queue);
1613			}
1614		}
1615	} else {
1616		SCTP_PRINTF("Found no chunks on any queue tot:%lu\n",
1617		    (u_long)stcb->asoc.total_output_queue_size);
1618		stcb->asoc.total_output_queue_size = 0;
1619	}
1620}
1621
1622int
1623sctp_heartbeat_timer(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
1624    struct sctp_nets *net, int cnt_of_unconf)
1625{
1626	int ret;
1627
1628	if (net) {
1629		if (net->hb_responded == 0) {
1630			if (net->ro._s_addr) {
1631				/*
1632				 * Invalidate the src address if we did not
1633				 * get a response last time.
1634				 */
1635				sctp_free_ifa(net->ro._s_addr);
1636				net->ro._s_addr = NULL;
1637				net->src_addr_selected = 0;
1638			}
1639			sctp_backoff_on_timeout(stcb, net, 1, 0);
1640		}
1641		/* Zero PBA, if it needs it */
1642		if (net->partial_bytes_acked) {
1643			net->partial_bytes_acked = 0;
1644		}
1645	}
1646	if ((stcb->asoc.total_output_queue_size > 0) &&
1647	    (TAILQ_EMPTY(&stcb->asoc.send_queue)) &&
1648	    (TAILQ_EMPTY(&stcb->asoc.sent_queue))) {
1649		sctp_audit_stream_queues_for_size(inp, stcb);
1650	}
1651	/* Send a new HB, this will do threshold managment, pick a new dest */
1652	if (cnt_of_unconf == 0) {
1653		if (sctp_send_hb(stcb, 0, NULL) < 0) {
1654			return (1);
1655		}
1656	} else {
1657		/*
1658		 * this will send out extra hb's up to maxburst if there are
1659		 * any unconfirmed addresses.
1660		 */
1661		uint32_t cnt_sent = 0;
1662
1663		TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
1664			if ((net->dest_state & SCTP_ADDR_UNCONFIRMED) &&
1665			    (net->dest_state & SCTP_ADDR_REACHABLE)) {
1666				cnt_sent++;
1667				if (net->hb_responded == 0) {
1668					/* Did we respond last time? */
1669					if (net->ro._s_addr) {
1670						sctp_free_ifa(net->ro._s_addr);
1671						net->ro._s_addr = NULL;
1672						net->src_addr_selected = 0;
1673					}
1674				}
1675				ret = sctp_send_hb(stcb, 1, net);
1676				if (ret < 0)
1677					return 1;
1678				else if (ret == 0) {
1679					break;
1680				}
1681				if (cnt_sent >= SCTP_BASE_SYSCTL(sctp_hb_maxburst))
1682					break;
1683			}
1684		}
1685	}
1686	return (0);
1687}
1688
1689int
1690sctp_is_hb_timer_running(struct sctp_tcb *stcb)
1691{
1692	if (SCTP_OS_TIMER_PENDING(&stcb->asoc.hb_timer.timer)) {
1693		/* its running */
1694		return (1);
1695	} else {
1696		/* nope */
1697		return (0);
1698	}
1699}
1700
1701int
1702sctp_is_sack_timer_running(struct sctp_tcb *stcb)
1703{
1704	if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) {
1705		/* its running */
1706		return (1);
1707	} else {
1708		/* nope */
1709		return (0);
1710	}
1711}
1712
1713#define SCTP_NUMBER_OF_MTU_SIZES 18
1714static uint32_t mtu_sizes[] = {
1715	68,
1716	296,
1717	508,
1718	512,
1719	544,
1720	576,
1721	1006,
1722	1492,
1723	1500,
1724	1536,
1725	2002,
1726	2048,
1727	4352,
1728	4464,
1729	8166,
1730	17914,
1731	32000,
1732	65535
1733};
1734
1735
1736static uint32_t
1737sctp_getnext_mtu(struct sctp_inpcb *inp, uint32_t cur_mtu)
1738{
1739	/* select another MTU that is just bigger than this one */
1740	int i;
1741
1742	for (i = 0; i < SCTP_NUMBER_OF_MTU_SIZES; i++) {
1743		if (cur_mtu < mtu_sizes[i]) {
1744			/* no max_mtu is bigger than this one */
1745			return (mtu_sizes[i]);
1746		}
1747	}
1748	/* here return the highest allowable */
1749	return (cur_mtu);
1750}
1751
1752
1753void
1754sctp_pathmtu_timer(struct sctp_inpcb *inp,
1755    struct sctp_tcb *stcb,
1756    struct sctp_nets *net)
1757{
1758	uint32_t next_mtu, mtu;
1759
1760	next_mtu = sctp_getnext_mtu(inp, net->mtu);
1761
1762	if ((next_mtu > net->mtu) && (net->port == 0)) {
1763		if ((net->src_addr_selected == 0) ||
1764		    (net->ro._s_addr == NULL) ||
1765		    (net->ro._s_addr->localifa_flags & SCTP_BEING_DELETED)) {
1766			if ((net->ro._s_addr != NULL) && (net->ro._s_addr->localifa_flags & SCTP_BEING_DELETED)) {
1767				sctp_free_ifa(net->ro._s_addr);
1768				net->ro._s_addr = NULL;
1769				net->src_addr_selected = 0;
1770			} else if (net->ro._s_addr == NULL) {
1771#if defined(INET6) && defined(SCTP_EMBEDDED_V6_SCOPE)
1772				if (net->ro._l_addr.sa.sa_family == AF_INET6) {
1773					struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
1774
1775					/* KAME hack: embed scopeid */
1776					(void)sa6_embedscope(sin6, MODULE_GLOBAL(MOD_INET6, ip6_use_defzone));
1777				}
1778#endif
1779
1780				net->ro._s_addr = sctp_source_address_selection(inp,
1781				    stcb,
1782				    (sctp_route_t *) & net->ro,
1783				    net, 0, stcb->asoc.vrf_id);
1784#if defined(INET6) && defined(SCTP_EMBEDDED_V6_SCOPE)
1785				if (net->ro._l_addr.sa.sa_family == AF_INET6) {
1786					struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
1787
1788					(void)sa6_recoverscope(sin6);
1789				}
1790#endif				/* INET6 */
1791			}
1792			if (net->ro._s_addr)
1793				net->src_addr_selected = 1;
1794		}
1795		if (net->ro._s_addr) {
1796			mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_rt);
1797			if (net->port) {
1798				mtu -= sizeof(struct udphdr);
1799			}
1800			if (mtu > next_mtu) {
1801				net->mtu = next_mtu;
1802			}
1803		}
1804	}
1805	/* restart the timer */
1806	sctp_timer_start(SCTP_TIMER_TYPE_PATHMTURAISE, inp, stcb, net);
1807}
1808
1809void
1810sctp_autoclose_timer(struct sctp_inpcb *inp,
1811    struct sctp_tcb *stcb,
1812    struct sctp_nets *net)
1813{
1814	struct timeval tn, *tim_touse;
1815	struct sctp_association *asoc;
1816	int ticks_gone_by;
1817
1818	(void)SCTP_GETTIME_TIMEVAL(&tn);
1819	if (stcb->asoc.sctp_autoclose_ticks &&
1820	    sctp_is_feature_on(inp, SCTP_PCB_FLAGS_AUTOCLOSE)) {
1821		/* Auto close is on */
1822		asoc = &stcb->asoc;
1823		/* pick the time to use */
1824		if (asoc->time_last_rcvd.tv_sec >
1825		    asoc->time_last_sent.tv_sec) {
1826			tim_touse = &asoc->time_last_rcvd;
1827		} else {
1828			tim_touse = &asoc->time_last_sent;
1829		}
1830		/* Now has long enough transpired to autoclose? */
1831		ticks_gone_by = SEC_TO_TICKS(tn.tv_sec - tim_touse->tv_sec);
1832		if ((ticks_gone_by > 0) &&
1833		    (ticks_gone_by >= (int)asoc->sctp_autoclose_ticks)) {
1834			/*
1835			 * autoclose time has hit, call the output routine,
1836			 * which should do nothing just to be SURE we don't
1837			 * have hanging data. We can then safely check the
1838			 * queues and know that we are clear to send
1839			 * shutdown
1840			 */
1841			sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_AUTOCLOSE_TMR, SCTP_SO_NOT_LOCKED);
1842			/* Are we clean? */
1843			if (TAILQ_EMPTY(&asoc->send_queue) &&
1844			    TAILQ_EMPTY(&asoc->sent_queue)) {
1845				/*
1846				 * there is nothing queued to send, so I'm
1847				 * done...
1848				 */
1849				if (SCTP_GET_STATE(asoc) != SCTP_STATE_SHUTDOWN_SENT) {
1850					/* only send SHUTDOWN 1st time thru */
1851					sctp_send_shutdown(stcb, stcb->asoc.primary_destination);
1852					if ((SCTP_GET_STATE(asoc) == SCTP_STATE_OPEN) ||
1853					    (SCTP_GET_STATE(asoc) == SCTP_STATE_SHUTDOWN_RECEIVED)) {
1854						SCTP_STAT_DECR_GAUGE32(sctps_currestab);
1855					}
1856					SCTP_SET_STATE(asoc, SCTP_STATE_SHUTDOWN_SENT);
1857					SCTP_CLEAR_SUBSTATE(asoc, SCTP_STATE_SHUTDOWN_PENDING);
1858					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN,
1859					    stcb->sctp_ep, stcb,
1860					    asoc->primary_destination);
1861					sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD,
1862					    stcb->sctp_ep, stcb,
1863					    asoc->primary_destination);
1864				}
1865			}
1866		} else {
1867			/*
1868			 * No auto close at this time, reset t-o to check
1869			 * later
1870			 */
1871			int tmp;
1872
1873			/* fool the timer startup to use the time left */
1874			tmp = asoc->sctp_autoclose_ticks;
1875			asoc->sctp_autoclose_ticks -= ticks_gone_by;
1876			sctp_timer_start(SCTP_TIMER_TYPE_AUTOCLOSE, inp, stcb,
1877			    net);
1878			/* restore the real tick value */
1879			asoc->sctp_autoclose_ticks = tmp;
1880		}
1881	}
1882}
1883
1884void
1885sctp_iterator_timer(struct sctp_iterator *it)
1886{
1887	int iteration_count = 0;
1888	int inp_skip = 0;
1889
1890	/*
1891	 * only one iterator can run at a time. This is the only way we can
1892	 * cleanly pull ep's from underneath all the running interators when
1893	 * a ep is freed.
1894	 */
1895	SCTP_ITERATOR_LOCK();
1896	if (it->inp == NULL) {
1897		/* iterator is complete */
1898done_with_iterator:
1899		SCTP_ITERATOR_UNLOCK();
1900		SCTP_INP_INFO_WLOCK();
1901		TAILQ_REMOVE(&SCTP_BASE_INFO(iteratorhead), it, sctp_nxt_itr);
1902		/* stopping the callout is not needed, in theory */
1903		SCTP_INP_INFO_WUNLOCK();
1904		(void)SCTP_OS_TIMER_STOP(&it->tmr.timer);
1905		if (it->function_atend != NULL) {
1906			(*it->function_atend) (it->pointer, it->val);
1907		}
1908		SCTP_FREE(it, SCTP_M_ITER);
1909		return;
1910	}
1911select_a_new_ep:
1912	SCTP_INP_WLOCK(it->inp);
1913	while (((it->pcb_flags) &&
1914	    ((it->inp->sctp_flags & it->pcb_flags) != it->pcb_flags)) ||
1915	    ((it->pcb_features) &&
1916	    ((it->inp->sctp_features & it->pcb_features) != it->pcb_features))) {
1917		/* endpoint flags or features don't match, so keep looking */
1918		if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
1919			SCTP_INP_WUNLOCK(it->inp);
1920			goto done_with_iterator;
1921		}
1922		SCTP_INP_WUNLOCK(it->inp);
1923		it->inp = LIST_NEXT(it->inp, sctp_list);
1924		if (it->inp == NULL) {
1925			goto done_with_iterator;
1926		}
1927		SCTP_INP_WLOCK(it->inp);
1928	}
1929	if ((it->inp->inp_starting_point_for_iterator != NULL) &&
1930	    (it->inp->inp_starting_point_for_iterator != it)) {
1931		SCTP_PRINTF("Iterator collision, waiting for one at %p\n",
1932		    it->inp);
1933		SCTP_INP_WUNLOCK(it->inp);
1934		goto start_timer_return;
1935	}
1936	/* mark the current iterator on the endpoint */
1937	it->inp->inp_starting_point_for_iterator = it;
1938	SCTP_INP_WUNLOCK(it->inp);
1939	SCTP_INP_RLOCK(it->inp);
1940	/* now go through each assoc which is in the desired state */
1941	if (it->done_current_ep == 0) {
1942		if (it->function_inp != NULL)
1943			inp_skip = (*it->function_inp) (it->inp, it->pointer, it->val);
1944		it->done_current_ep = 1;
1945	}
1946	if (it->stcb == NULL) {
1947		/* run the per instance function */
1948		it->stcb = LIST_FIRST(&it->inp->sctp_asoc_list);
1949	}
1950	SCTP_INP_RUNLOCK(it->inp);
1951	if ((inp_skip) || it->stcb == NULL) {
1952		if (it->function_inp_end != NULL) {
1953			inp_skip = (*it->function_inp_end) (it->inp,
1954			    it->pointer,
1955			    it->val);
1956		}
1957		goto no_stcb;
1958	}
1959	if ((it->stcb) &&
1960	    (it->stcb->asoc.stcb_starting_point_for_iterator == it)) {
1961		it->stcb->asoc.stcb_starting_point_for_iterator = NULL;
1962	}
1963	while (it->stcb) {
1964		SCTP_TCB_LOCK(it->stcb);
1965		if (it->asoc_state && ((it->stcb->asoc.state & it->asoc_state) != it->asoc_state)) {
1966			/* not in the right state... keep looking */
1967			SCTP_TCB_UNLOCK(it->stcb);
1968			goto next_assoc;
1969		}
1970		/* mark the current iterator on the assoc */
1971		it->stcb->asoc.stcb_starting_point_for_iterator = it;
1972		/* see if we have limited out the iterator loop */
1973		iteration_count++;
1974		if (iteration_count > SCTP_ITERATOR_MAX_AT_ONCE) {
1975	start_timer_return:
1976			/* set a timer to continue this later */
1977			if (it->stcb)
1978				SCTP_TCB_UNLOCK(it->stcb);
1979			sctp_timer_start(SCTP_TIMER_TYPE_ITERATOR,
1980			    (struct sctp_inpcb *)it, NULL, NULL);
1981			SCTP_ITERATOR_UNLOCK();
1982			return;
1983		}
1984		/* run function on this one */
1985		(*it->function_assoc) (it->inp, it->stcb, it->pointer, it->val);
1986
1987		/*
1988		 * we lie here, it really needs to have its own type but
1989		 * first I must verify that this won't effect things :-0
1990		 */
1991		if (it->no_chunk_output == 0)
1992			sctp_chunk_output(it->inp, it->stcb, SCTP_OUTPUT_FROM_T3, SCTP_SO_NOT_LOCKED);
1993
1994		SCTP_TCB_UNLOCK(it->stcb);
1995next_assoc:
1996		it->stcb = LIST_NEXT(it->stcb, sctp_tcblist);
1997		if (it->stcb == NULL) {
1998			if (it->function_inp_end != NULL) {
1999				inp_skip = (*it->function_inp_end) (it->inp,
2000				    it->pointer,
2001				    it->val);
2002			}
2003		}
2004	}
2005no_stcb:
2006	/* done with all assocs on this endpoint, move on to next endpoint */
2007	it->done_current_ep = 0;
2008	SCTP_INP_WLOCK(it->inp);
2009	it->inp->inp_starting_point_for_iterator = NULL;
2010	SCTP_INP_WUNLOCK(it->inp);
2011	if (it->iterator_flags & SCTP_ITERATOR_DO_SINGLE_INP) {
2012		it->inp = NULL;
2013	} else {
2014		SCTP_INP_INFO_RLOCK();
2015		it->inp = LIST_NEXT(it->inp, sctp_list);
2016		SCTP_INP_INFO_RUNLOCK();
2017	}
2018	if (it->inp == NULL) {
2019		goto done_with_iterator;
2020	}
2021	goto select_a_new_ep;
2022}
2023