1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26#include <sys/types.h>
27#include <sys/systm.h>
28#include <sys/stream.h>
29#include <sys/cmn_err.h>
30#include <sys/kmem.h>
31#define	_SUN_TPI_VERSION 2
32#include <sys/tihdr.h>
33#include <sys/socket.h>
34#include <sys/strsun.h>
35#include <sys/strsubr.h>
36
37#include <netinet/in.h>
38#include <netinet/ip6.h>
39#include <netinet/tcp_seq.h>
40#include <netinet/sctp.h>
41
42#include <inet/common.h>
43#include <inet/ip.h>
44#include <inet/ip_if.h>
45#include <inet/ip6.h>
46#include <inet/mib2.h>
47#include <inet/ipclassifier.h>
48#include <inet/ipp_common.h>
49#include <inet/ipsec_impl.h>
50#include <inet/sctp_ip.h>
51
52#include "sctp_impl.h"
53#include "sctp_asconf.h"
54#include "sctp_addr.h"
55
56static struct kmem_cache *sctp_kmem_set_cache;
57
58/*
59 * PR-SCTP comments.
60 *
61 * When we get a valid Forward TSN chunk, we check the fragment list for this
62 * SSN and preceeding SSNs free all them. Further, if this Forward TSN causes
63 * the next expected SSN to be present in the stream queue, we deliver any
64 * such stranded messages upstream. We also update the SACK info. appropriately.
65 * When checking for advancing the cumulative ack (in sctp_cumack()) we must
66 * check for abandoned chunks and messages. While traversing the tramsmit
67 * list if we come across an abandoned chunk, we can skip the message (i.e.
68 * take it out of the (re)transmit list) since this message, and hence this
69 * chunk, has been marked abandoned by sctp_rexmit(). If we come across an
70 * unsent chunk for a message this now abandoned we need to check if a
71 * Forward TSN needs to be sent, this could be a case where we deferred sending
72 * a Forward TSN in sctp_get_msg_to_send(). Further, after processing a
73 * SACK we check if the Advanced peer ack point can be moved ahead, i.e.
74 * if we can send a Forward TSN via sctp_check_abandoned_data().
75 */
76void
77sctp_free_set(sctp_set_t *s)
78{
79	sctp_set_t *p;
80
81	while (s) {
82		p = s->next;
83		kmem_cache_free(sctp_kmem_set_cache, s);
84		s = p;
85	}
86}
87
88static void
89sctp_ack_add(sctp_set_t **head, uint32_t tsn, int *num)
90{
91	sctp_set_t *p, *t;
92
93	if (head == NULL || num == NULL)
94		return;
95
96	ASSERT(*num >= 0);
97	ASSERT((*num == 0 && *head == NULL) || (*num > 0 && *head != NULL));
98
99	if (*head == NULL) {
100		*head = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
101		if (*head == NULL)
102			return;
103		(*head)->prev = (*head)->next = NULL;
104		(*head)->begin = tsn;
105		(*head)->end = tsn;
106		*num = 1;
107		return;
108	}
109
110	ASSERT((*head)->prev == NULL);
111
112	/*
113	 * Handle this special case here so we don't have to check
114	 * for it each time in the loop.
115	 */
116	if (SEQ_LT(tsn + 1, (*head)->begin)) {
117		/* add a new set, and move the head pointer */
118		t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
119		if (t == NULL)
120			return;
121		t->next = *head;
122		t->prev = NULL;
123		(*head)->prev = t;
124		t->begin = tsn;
125		t->end = tsn;
126		(*num)++;
127		*head = t;
128		return;
129	}
130
131	/*
132	 * We need to handle the following cases, where p points to
133	 * the current set (as we walk through the loop):
134	 *
135	 * 1. tsn is entirely less than p; create a new set before p.
136	 * 2. tsn borders p from less; coalesce p with tsn.
137	 * 3. tsn is withing p; do nothing.
138	 * 4. tsn borders p from greater; coalesce p with tsn.
139	 * 4a. p may now border p->next from less; if so, coalesce those
140	 *    two sets.
141	 * 5. tsn is entirely greater then all sets; add a new set at
142	 *    the end.
143	 */
144	for (p = *head; ; p = p->next) {
145		if (SEQ_LT(tsn + 1, p->begin)) {
146			/* 1: add a new set before p. */
147			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
148			if (t == NULL)
149				return;
150			t->next = p;
151			t->prev = NULL;
152			t->begin = tsn;
153			t->end = tsn;
154			if (p->prev) {
155				t->prev = p->prev;
156				p->prev->next = t;
157			}
158			p->prev = t;
159			(*num)++;
160			return;
161		}
162
163		if ((tsn + 1) == p->begin) {
164			/* 2: adjust p->begin */
165			p->begin = tsn;
166			return;
167		}
168
169		if (SEQ_GEQ(tsn, p->begin) && SEQ_LEQ(tsn, p->end)) {
170			/* 3; do nothing */
171			return;
172		}
173
174		if ((p->end + 1) == tsn) {
175			/* 4; adjust p->end */
176			p->end = tsn;
177
178			if (p->next != NULL && (tsn + 1) == p->next->begin) {
179				/* 4a: coalesce p and p->next */
180				t = p->next;
181				p->end = t->end;
182				p->next = t->next;
183				if (t->next != NULL)
184					t->next->prev = p;
185				kmem_cache_free(sctp_kmem_set_cache, t);
186				(*num)--;
187			}
188			return;
189		}
190
191		if (p->next == NULL) {
192			/* 5: add new set at the end */
193			t = kmem_cache_alloc(sctp_kmem_set_cache, KM_NOSLEEP);
194			if (t == NULL)
195				return;
196			t->next = NULL;
197			t->prev = p;
198			t->begin = tsn;
199			t->end = tsn;
200			p->next = t;
201			(*num)++;
202			return;
203		}
204
205		if (SEQ_GT(tsn, p->end + 1))
206			continue;
207	}
208}
209
210static void
211sctp_ack_rem(sctp_set_t **head, uint32_t end, int *num)
212{
213	sctp_set_t *p, *t;
214
215	if (head == NULL || *head == NULL || num == NULL)
216		return;
217
218	/* Nothing to remove */
219	if (SEQ_LT(end, (*head)->begin))
220		return;
221
222	/* Find out where to start removing sets */
223	for (p = *head; p->next; p = p->next) {
224		if (SEQ_LEQ(end, p->end))
225			break;
226	}
227
228	if (SEQ_LT(end, p->end) && SEQ_GEQ(end, p->begin)) {
229		/* adjust p */
230		p->begin = end + 1;
231		/* all done */
232		if (p == *head)
233			return;
234	} else if (SEQ_GEQ(end, p->end)) {
235		/* remove this set too */
236		p = p->next;
237	}
238
239	/* unlink everything before this set */
240	t = *head;
241	*head = p;
242	if (p != NULL && p->prev != NULL) {
243		p->prev->next = NULL;
244		p->prev = NULL;
245	}
246
247	sctp_free_set(t);
248
249	/* recount the number of sets */
250	*num = 0;
251
252	for (p = *head; p != NULL; p = p->next)
253		(*num)++;
254}
255
256void
257sctp_sets_init()
258{
259	sctp_kmem_set_cache = kmem_cache_create("sctp_set_cache",
260	    sizeof (sctp_set_t), 0, NULL, NULL, NULL, NULL,
261	    NULL, 0);
262}
263
264void
265sctp_sets_fini()
266{
267	kmem_cache_destroy(sctp_kmem_set_cache);
268}
269
270sctp_chunk_hdr_t *
271sctp_first_chunk(uchar_t *rptr, ssize_t remaining)
272{
273	sctp_chunk_hdr_t *ch;
274	uint16_t ch_len;
275
276	if (remaining < sizeof (*ch)) {
277		return (NULL);
278	}
279
280	ch = (sctp_chunk_hdr_t *)rptr;
281	ch_len = ntohs(ch->sch_len);
282
283	if (ch_len < sizeof (*ch) || remaining < ch_len) {
284		return (NULL);
285	}
286
287	return (ch);
288}
289
290sctp_chunk_hdr_t *
291sctp_next_chunk(sctp_chunk_hdr_t *ch, ssize_t *remaining)
292{
293	int pad;
294	uint16_t ch_len;
295
296	if (!ch) {
297		return (NULL);
298	}
299
300	ch_len = ntohs(ch->sch_len);
301
302	if ((pad = ch_len & (SCTP_ALIGN - 1)) != 0) {
303		pad = SCTP_ALIGN - pad;
304	}
305
306	*remaining -= (ch_len + pad);
307	ch = (sctp_chunk_hdr_t *)((char *)ch + ch_len + pad);
308
309	return (sctp_first_chunk((uchar_t *)ch, *remaining));
310}
311
312/*
313 * Attach ancillary data to a received SCTP segments.
314 * If the source address (fp) is not the primary, send up a
315 * unitdata_ind so recvfrom() can populate the msg_name field.
316 * If ancillary data is also requested, we append it to the
317 * unitdata_req. Otherwise, we just send up an optdata_ind.
318 */
319static int
320sctp_input_add_ancillary(sctp_t *sctp, mblk_t **mp, sctp_data_hdr_t *dcp,
321    sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
322{
323	struct T_unitdata_ind	*tudi;
324	int			optlen;
325	int			hdrlen;
326	uchar_t			*optptr;
327	struct cmsghdr		*cmsg;
328	mblk_t			*mp1;
329	struct sockaddr_in6	sin_buf[1];
330	struct sockaddr_in6	*sin6;
331	struct sockaddr_in	*sin4;
332	crb_t			 addflag;	/* Which pieces to add */
333	conn_t			*connp = sctp->sctp_connp;
334
335	sin4 = NULL;
336	sin6 = NULL;
337
338	optlen = hdrlen = 0;
339	addflag.crb_all = 0;
340
341	/* Figure out address size */
342	if (connp->conn_family == AF_INET) {
343		sin4 = (struct sockaddr_in *)sin_buf;
344		sin4->sin_family = AF_INET;
345		sin4->sin_port = connp->conn_fport;
346		IN6_V4MAPPED_TO_IPADDR(&fp->sf_faddr, sin4->sin_addr.s_addr);
347		hdrlen = sizeof (*tudi) + sizeof (*sin4);
348	} else {
349		sin6 = sin_buf;
350		sin6->sin6_family = AF_INET6;
351		sin6->sin6_port = connp->conn_fport;
352		sin6->sin6_addr = fp->sf_faddr;
353		hdrlen = sizeof (*tudi) + sizeof (*sin6);
354	}
355	/* If app asked to receive send / recv info */
356	if (sctp->sctp_recvsndrcvinfo)
357		optlen += sizeof (*cmsg) + sizeof (struct sctp_sndrcvinfo);
358
359	if (connp->conn_recv_ancillary.crb_all == 0)
360		goto noancillary;
361
362	if (connp->conn_recv_ancillary.crb_ip_recvpktinfo &&
363	    ira->ira_ruifindex != sctp->sctp_recvifindex) {
364		optlen += sizeof (*cmsg) + sizeof (struct in6_pktinfo);
365		if (hdrlen == 0)
366			hdrlen = sizeof (struct T_unitdata_ind);
367		addflag.crb_ip_recvpktinfo = 1;
368	}
369	/* If app asked for hoplimit and it has changed ... */
370	if (connp->conn_recv_ancillary.crb_ipv6_recvhoplimit &&
371	    ipp->ipp_hoplimit != sctp->sctp_recvhops) {
372		optlen += sizeof (*cmsg) + sizeof (uint_t);
373		if (hdrlen == 0)
374			hdrlen = sizeof (struct T_unitdata_ind);
375		addflag.crb_ipv6_recvhoplimit = 1;
376	}
377	/* If app asked for tclass and it has changed ... */
378	if (connp->conn_recv_ancillary.crb_ipv6_recvtclass &&
379	    ipp->ipp_tclass != sctp->sctp_recvtclass) {
380		optlen += sizeof (struct T_opthdr) + sizeof (uint_t);
381		if (hdrlen == 0)
382			hdrlen = sizeof (struct T_unitdata_ind);
383		addflag.crb_ipv6_recvtclass = 1;
384	}
385	/* If app asked for hopbyhop headers and it has changed ... */
386	if (connp->conn_recv_ancillary.crb_ipv6_recvhopopts &&
387	    ip_cmpbuf(sctp->sctp_hopopts, sctp->sctp_hopoptslen,
388	    (ipp->ipp_fields & IPPF_HOPOPTS),
389	    ipp->ipp_hopopts, ipp->ipp_hopoptslen)) {
390		optlen += sizeof (*cmsg) + ipp->ipp_hopoptslen -
391		    sctp->sctp_v6label_len;
392		if (hdrlen == 0)
393			hdrlen = sizeof (struct T_unitdata_ind);
394		addflag.crb_ipv6_recvhopopts = 1;
395		if (!ip_allocbuf((void **)&sctp->sctp_hopopts,
396		    &sctp->sctp_hopoptslen,
397		    (ipp->ipp_fields & IPPF_HOPOPTS),
398		    ipp->ipp_hopopts, ipp->ipp_hopoptslen))
399			return (-1);
400	}
401	/* If app asked for dst headers before routing headers ... */
402	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdrdstopts &&
403	    ip_cmpbuf(sctp->sctp_rthdrdstopts, sctp->sctp_rthdrdstoptslen,
404	    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
405	    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen)) {
406		optlen += sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
407		if (hdrlen == 0)
408			hdrlen = sizeof (struct T_unitdata_ind);
409		addflag.crb_ipv6_recvrthdrdstopts = 1;
410		if (!ip_allocbuf((void **)&sctp->sctp_rthdrdstopts,
411		    &sctp->sctp_rthdrdstoptslen,
412		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
413		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen))
414			return (-1);
415	}
416	/* If app asked for routing headers and it has changed ... */
417	if (connp->conn_recv_ancillary.crb_ipv6_recvrthdr &&
418	    ip_cmpbuf(sctp->sctp_rthdr, sctp->sctp_rthdrlen,
419	    (ipp->ipp_fields & IPPF_RTHDR),
420	    ipp->ipp_rthdr, ipp->ipp_rthdrlen)) {
421		optlen += sizeof (*cmsg) + ipp->ipp_rthdrlen;
422		if (hdrlen == 0)
423			hdrlen = sizeof (struct T_unitdata_ind);
424		addflag.crb_ipv6_recvrthdr = 1;
425		if (!ip_allocbuf((void **)&sctp->sctp_rthdr,
426		    &sctp->sctp_rthdrlen,
427		    (ipp->ipp_fields & IPPF_RTHDR),
428		    ipp->ipp_rthdr, ipp->ipp_rthdrlen))
429			return (-1);
430	}
431	/* If app asked for dest headers and it has changed ... */
432	if (connp->conn_recv_ancillary.crb_ipv6_recvdstopts &&
433	    ip_cmpbuf(sctp->sctp_dstopts, sctp->sctp_dstoptslen,
434	    (ipp->ipp_fields & IPPF_DSTOPTS),
435	    ipp->ipp_dstopts, ipp->ipp_dstoptslen)) {
436		optlen += sizeof (*cmsg) + ipp->ipp_dstoptslen;
437		if (hdrlen == 0)
438			hdrlen = sizeof (struct T_unitdata_ind);
439		addflag.crb_ipv6_recvdstopts = 1;
440		if (!ip_allocbuf((void **)&sctp->sctp_dstopts,
441		    &sctp->sctp_dstoptslen,
442		    (ipp->ipp_fields & IPPF_DSTOPTS),
443		    ipp->ipp_dstopts, ipp->ipp_dstoptslen))
444			return (-1);
445	}
446noancillary:
447	/* Nothing to add */
448	if (hdrlen == 0)
449		return (-1);
450
451	mp1 = allocb(hdrlen + optlen + sizeof (void *), BPRI_MED);
452	if (mp1 == NULL)
453		return (-1);
454	mp1->b_cont = *mp;
455	*mp = mp1;
456	mp1->b_rptr += sizeof (void *);  /* pointer worth of padding */
457	mp1->b_wptr = mp1->b_rptr + hdrlen + optlen;
458	DB_TYPE(mp1) = M_PROTO;
459	tudi = (struct T_unitdata_ind *)mp1->b_rptr;
460	tudi->PRIM_type = T_UNITDATA_IND;
461	tudi->SRC_length = sin4 ? sizeof (*sin4) : sizeof (*sin6);
462	tudi->SRC_offset = sizeof (*tudi);
463	tudi->OPT_offset = sizeof (*tudi) + tudi->SRC_length;
464	tudi->OPT_length = optlen;
465	if (sin4) {
466		bcopy(sin4, tudi + 1, sizeof (*sin4));
467	} else {
468		bcopy(sin6, tudi + 1, sizeof (*sin6));
469	}
470	optptr = (uchar_t *)tudi + tudi->OPT_offset;
471
472	if (sctp->sctp_recvsndrcvinfo) {
473		/* XXX need backout method if memory allocation fails. */
474		struct sctp_sndrcvinfo *sri;
475
476		cmsg = (struct cmsghdr *)optptr;
477		cmsg->cmsg_level = IPPROTO_SCTP;
478		cmsg->cmsg_type = SCTP_SNDRCV;
479		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*sri);
480		optptr += sizeof (*cmsg);
481
482		sri = (struct sctp_sndrcvinfo *)(cmsg + 1);
483		ASSERT(OK_32PTR(sri));
484		sri->sinfo_stream = ntohs(dcp->sdh_sid);
485		sri->sinfo_ssn = ntohs(dcp->sdh_ssn);
486		if (SCTP_DATA_GET_UBIT(dcp)) {
487			sri->sinfo_flags = MSG_UNORDERED;
488		} else {
489			sri->sinfo_flags = 0;
490		}
491		sri->sinfo_ppid = dcp->sdh_payload_id;
492		sri->sinfo_context = 0;
493		sri->sinfo_timetolive = 0;
494		sri->sinfo_tsn = ntohl(dcp->sdh_tsn);
495		sri->sinfo_cumtsn = sctp->sctp_ftsn;
496		sri->sinfo_assoc_id = 0;
497
498		optptr += sizeof (*sri);
499	}
500
501	/*
502	 * If app asked for pktinfo and the index has changed ...
503	 * Note that the local address never changes for the connection.
504	 */
505	if (addflag.crb_ip_recvpktinfo) {
506		struct in6_pktinfo *pkti;
507		uint_t ifindex;
508
509		ifindex = ira->ira_ruifindex;
510		cmsg = (struct cmsghdr *)optptr;
511		cmsg->cmsg_level = IPPROTO_IPV6;
512		cmsg->cmsg_type = IPV6_PKTINFO;
513		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (*pkti);
514		optptr += sizeof (*cmsg);
515
516		pkti = (struct in6_pktinfo *)optptr;
517		if (connp->conn_family == AF_INET6)
518			pkti->ipi6_addr = sctp->sctp_ip6h->ip6_src;
519		else
520			IN6_IPADDR_TO_V4MAPPED(sctp->sctp_ipha->ipha_src,
521			    &pkti->ipi6_addr);
522
523		pkti->ipi6_ifindex = ifindex;
524		optptr += sizeof (*pkti);
525		ASSERT(OK_32PTR(optptr));
526		/* Save as "last" value */
527		sctp->sctp_recvifindex = ifindex;
528	}
529	/* If app asked for hoplimit and it has changed ... */
530	if (addflag.crb_ipv6_recvhoplimit) {
531		cmsg = (struct cmsghdr *)optptr;
532		cmsg->cmsg_level = IPPROTO_IPV6;
533		cmsg->cmsg_type = IPV6_HOPLIMIT;
534		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
535		optptr += sizeof (*cmsg);
536
537		*(uint_t *)optptr = ipp->ipp_hoplimit;
538		optptr += sizeof (uint_t);
539		ASSERT(OK_32PTR(optptr));
540		/* Save as "last" value */
541		sctp->sctp_recvhops = ipp->ipp_hoplimit;
542	}
543	/* If app asked for tclass and it has changed ... */
544	if (addflag.crb_ipv6_recvtclass) {
545		cmsg = (struct cmsghdr *)optptr;
546		cmsg->cmsg_level = IPPROTO_IPV6;
547		cmsg->cmsg_type = IPV6_TCLASS;
548		cmsg->cmsg_len = sizeof (*cmsg) + sizeof (uint_t);
549		optptr += sizeof (*cmsg);
550
551		*(uint_t *)optptr = ipp->ipp_tclass;
552		optptr += sizeof (uint_t);
553		ASSERT(OK_32PTR(optptr));
554		/* Save as "last" value */
555		sctp->sctp_recvtclass = ipp->ipp_tclass;
556	}
557	if (addflag.crb_ipv6_recvhopopts) {
558		cmsg = (struct cmsghdr *)optptr;
559		cmsg->cmsg_level = IPPROTO_IPV6;
560		cmsg->cmsg_type = IPV6_HOPOPTS;
561		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_hopoptslen;
562		optptr += sizeof (*cmsg);
563
564		bcopy(ipp->ipp_hopopts, optptr, ipp->ipp_hopoptslen);
565		optptr += ipp->ipp_hopoptslen;
566		ASSERT(OK_32PTR(optptr));
567		/* Save as last value */
568		ip_savebuf((void **)&sctp->sctp_hopopts,
569		    &sctp->sctp_hopoptslen,
570		    (ipp->ipp_fields & IPPF_HOPOPTS),
571		    ipp->ipp_hopopts, ipp->ipp_hopoptslen);
572	}
573	if (addflag.crb_ipv6_recvrthdrdstopts) {
574		cmsg = (struct cmsghdr *)optptr;
575		cmsg->cmsg_level = IPPROTO_IPV6;
576		cmsg->cmsg_type = IPV6_RTHDRDSTOPTS;
577		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrdstoptslen;
578		optptr += sizeof (*cmsg);
579
580		bcopy(ipp->ipp_rthdrdstopts, optptr, ipp->ipp_rthdrdstoptslen);
581		optptr += ipp->ipp_rthdrdstoptslen;
582		ASSERT(OK_32PTR(optptr));
583		/* Save as last value */
584		ip_savebuf((void **)&sctp->sctp_rthdrdstopts,
585		    &sctp->sctp_rthdrdstoptslen,
586		    (ipp->ipp_fields & IPPF_RTHDRDSTOPTS),
587		    ipp->ipp_rthdrdstopts, ipp->ipp_rthdrdstoptslen);
588	}
589	if (addflag.crb_ipv6_recvrthdr) {
590		cmsg = (struct cmsghdr *)optptr;
591		cmsg->cmsg_level = IPPROTO_IPV6;
592		cmsg->cmsg_type = IPV6_RTHDR;
593		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_rthdrlen;
594		optptr += sizeof (*cmsg);
595
596		bcopy(ipp->ipp_rthdr, optptr, ipp->ipp_rthdrlen);
597		optptr += ipp->ipp_rthdrlen;
598		ASSERT(OK_32PTR(optptr));
599		/* Save as last value */
600		ip_savebuf((void **)&sctp->sctp_rthdr,
601		    &sctp->sctp_rthdrlen,
602		    (ipp->ipp_fields & IPPF_RTHDR),
603		    ipp->ipp_rthdr, ipp->ipp_rthdrlen);
604	}
605	if (addflag.crb_ipv6_recvdstopts) {
606		cmsg = (struct cmsghdr *)optptr;
607		cmsg->cmsg_level = IPPROTO_IPV6;
608		cmsg->cmsg_type = IPV6_DSTOPTS;
609		cmsg->cmsg_len = sizeof (*cmsg) + ipp->ipp_dstoptslen;
610		optptr += sizeof (*cmsg);
611
612		bcopy(ipp->ipp_dstopts, optptr, ipp->ipp_dstoptslen);
613		optptr += ipp->ipp_dstoptslen;
614		ASSERT(OK_32PTR(optptr));
615		/* Save as last value */
616		ip_savebuf((void **)&sctp->sctp_dstopts,
617		    &sctp->sctp_dstoptslen,
618		    (ipp->ipp_fields & IPPF_DSTOPTS),
619		    ipp->ipp_dstopts, ipp->ipp_dstoptslen);
620	}
621
622	ASSERT(optptr == mp1->b_wptr);
623
624	return (0);
625}
626
627void
628sctp_free_reass(sctp_instr_t *sip)
629{
630	mblk_t *mp, *mpnext, *mctl;
631#ifdef	DEBUG
632	sctp_reass_t	*srp;
633#endif
634
635	for (mp = sip->istr_reass; mp != NULL; mp = mpnext) {
636		mpnext = mp->b_next;
637		mp->b_next = NULL;
638		mp->b_prev = NULL;
639		if (DB_TYPE(mp) == M_CTL) {
640			mctl = mp;
641#ifdef	DEBUG
642			srp = (sctp_reass_t *)DB_BASE(mctl);
643			/* Partial delivery can leave empty srp */
644			ASSERT(mp->b_cont != NULL || srp->sr_got == 0);
645#endif
646			mp = mp->b_cont;
647			mctl->b_cont = NULL;
648			freeb(mctl);
649		}
650		freemsg(mp);
651	}
652	sip->istr_reass = NULL;
653}
654
655/*
656 * If the series of data fragments of which dmp is a part is successfully
657 * reassembled, the first mblk in the series is returned. dc is adjusted
658 * to point at the data chunk in the lead mblk, and b_rptr also points to
659 * the data chunk; the following mblk's b_rptr's point at the actual payload.
660 *
661 * If the series is not yet reassembled, NULL is returned. dc is not changed.
662 * XXX should probably move this up into the state machine.
663 */
664
665/* Fragment list for un-ordered messages. Partial delivery is not supported */
666static mblk_t *
667sctp_uodata_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc)
668{
669	mblk_t		*hmp;
670	mblk_t		*begin = NULL;
671	mblk_t		*end = NULL;
672	sctp_data_hdr_t	*qdc;
673	uint32_t	ntsn;
674	uint32_t	tsn = ntohl((*dc)->sdh_tsn);
675#ifdef	DEBUG
676	mblk_t		*mp1;
677#endif
678
679	/* First frag. */
680	if (sctp->sctp_uo_frags == NULL) {
681		sctp->sctp_uo_frags = dmp;
682		return (NULL);
683	}
684	hmp = sctp->sctp_uo_frags;
685	/*
686	 * Insert the segment according to the TSN, fragmented unordered
687	 * chunks are sequenced by TSN.
688	 */
689	while (hmp != NULL) {
690		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
691		ntsn = ntohl(qdc->sdh_tsn);
692		if (SEQ_GT(ntsn, tsn)) {
693			if (hmp->b_prev == NULL) {
694				dmp->b_next = hmp;
695				hmp->b_prev = dmp;
696				sctp->sctp_uo_frags = dmp;
697			} else {
698				dmp->b_next = hmp;
699				dmp->b_prev = hmp->b_prev;
700				hmp->b_prev->b_next = dmp;
701				hmp->b_prev = dmp;
702			}
703			break;
704		}
705		if (hmp->b_next == NULL) {
706			hmp->b_next = dmp;
707			dmp->b_prev = hmp;
708			break;
709		}
710		hmp = hmp->b_next;
711	}
712	/* check if we completed a msg */
713	if (SCTP_DATA_GET_BBIT(*dc)) {
714		begin = dmp;
715	} else if (SCTP_DATA_GET_EBIT(*dc)) {
716		end = dmp;
717	}
718	/*
719	 * We walk consecutive TSNs backwards till we get a seg. with
720	 * the B bit
721	 */
722	if (begin == NULL) {
723		for (hmp = dmp->b_prev; hmp != NULL; hmp = hmp->b_prev) {
724			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
725			ntsn = ntohl(qdc->sdh_tsn);
726			if ((int32_t)(tsn - ntsn) > 1) {
727				return (NULL);
728			}
729			if (SCTP_DATA_GET_BBIT(qdc)) {
730				begin = hmp;
731				break;
732			}
733			tsn = ntsn;
734		}
735	}
736	tsn = ntohl((*dc)->sdh_tsn);
737	/*
738	 * We walk consecutive TSNs till we get a seg. with the E bit
739	 */
740	if (end == NULL) {
741		for (hmp = dmp->b_next; hmp != NULL; hmp = hmp->b_next) {
742			qdc = (sctp_data_hdr_t *)hmp->b_rptr;
743			ntsn = ntohl(qdc->sdh_tsn);
744			if ((int32_t)(ntsn - tsn) > 1) {
745				return (NULL);
746			}
747			if (SCTP_DATA_GET_EBIT(qdc)) {
748				end = hmp;
749				break;
750			}
751			tsn = ntsn;
752		}
753	}
754	if (begin == NULL || end == NULL) {
755		return (NULL);
756	}
757	/* Got one!, Remove the msg from the list */
758	if (sctp->sctp_uo_frags == begin) {
759		ASSERT(begin->b_prev == NULL);
760		sctp->sctp_uo_frags = end->b_next;
761		if (end->b_next != NULL)
762			end->b_next->b_prev = NULL;
763	} else {
764		begin->b_prev->b_next = end->b_next;
765		if (end->b_next != NULL)
766			end->b_next->b_prev = begin->b_prev;
767	}
768	begin->b_prev = NULL;
769	end->b_next = NULL;
770
771	/*
772	 * Null out b_next and b_prev and chain using b_cont.
773	 */
774	dmp = end = begin;
775	hmp = begin->b_next;
776	*dc = (sctp_data_hdr_t *)begin->b_rptr;
777	begin->b_next = NULL;
778	while (hmp != NULL) {
779		qdc = (sctp_data_hdr_t *)hmp->b_rptr;
780		hmp->b_rptr = (uchar_t *)(qdc + 1);
781		end = hmp->b_next;
782		dmp->b_cont = hmp;
783		dmp = hmp;
784
785		if (end != NULL)
786			hmp->b_next = NULL;
787		hmp->b_prev = NULL;
788		hmp = end;
789	}
790	BUMP_LOCAL(sctp->sctp_reassmsgs);
791#ifdef	DEBUG
792	mp1 = begin;
793	while (mp1 != NULL) {
794		ASSERT(mp1->b_next == NULL);
795		ASSERT(mp1->b_prev == NULL);
796		mp1 = mp1->b_cont;
797	}
798#endif
799	return (begin);
800}
801
802/*
803 * Try partial delivery.
804 */
805static mblk_t *
806sctp_try_partial_delivery(sctp_t *sctp, mblk_t *hmp, sctp_reass_t *srp,
807    sctp_data_hdr_t **dc)
808{
809	mblk_t		*mp;
810	mblk_t		*dmp;
811	mblk_t		*qmp;
812	mblk_t		*prev;
813	sctp_data_hdr_t	*qdc;
814	uint32_t	tsn;
815
816	ASSERT(DB_TYPE(hmp) == M_CTL);
817
818	dprint(4, ("trypartial: got=%d, needed=%d\n",
819	    (int)(srp->sr_got), (int)(srp->sr_needed)));
820
821	mp = hmp->b_cont;
822	qdc = (sctp_data_hdr_t *)mp->b_rptr;
823
824	ASSERT(SCTP_DATA_GET_BBIT(qdc) && srp->sr_hasBchunk);
825
826	tsn = ntohl(qdc->sdh_tsn) + 1;
827
828	/*
829	 * This loop has two exit conditions: the
830	 * end of received chunks has been reached, or
831	 * there is a break in the sequence. We want
832	 * to chop the reassembly list as follows (the
833	 * numbers are TSNs):
834	 *   10 -> 11 -> 	(end of chunks)
835	 *   10 -> 11 -> | 13   (break in sequence)
836	 */
837	prev = mp;
838	mp = mp->b_cont;
839	while (mp != NULL) {
840		qdc = (sctp_data_hdr_t *)mp->b_rptr;
841		if (ntohl(qdc->sdh_tsn) != tsn)
842			break;
843		prev = mp;
844		mp = mp->b_cont;
845		tsn++;
846	}
847	/*
848	 * We are sending all the fragments upstream, we have to retain
849	 * the srp info for further fragments.
850	 */
851	if (mp == NULL) {
852		dmp = hmp->b_cont;
853		hmp->b_cont = NULL;
854		srp->sr_nexttsn = tsn;
855		srp->sr_msglen = 0;
856		srp->sr_needed = 0;
857		srp->sr_got = 0;
858		srp->sr_tail = NULL;
859	} else {
860		/*
861		 * There is a gap then some ordered frags which are not
862		 * the next deliverable tsn. When the next deliverable
863		 * frag arrives it will be set as the new list head in
864		 * sctp_data_frag() by setting the B bit.
865		 */
866		dmp = hmp->b_cont;
867		hmp->b_cont = mp;
868	}
869	srp->sr_hasBchunk = B_FALSE;
870	/*
871	 * mp now points at the last chunk in the sequence,
872	 * and prev points to mp's previous in the list.
873	 * We chop the list at prev. Subsequent fragment
874	 * deliveries will follow the normal reassembly
875	 * path unless they too exceed the sctp_pd_point.
876	 */
877	prev->b_cont = NULL;
878	srp->sr_partial_delivered = B_TRUE;
879
880	dprint(4, ("trypartial: got some, got=%d, needed=%d\n",
881	    (int)(srp->sr_got), (int)(srp->sr_needed)));
882
883	/*
884	 * Adjust all mblk's except the lead so their rptr's point to the
885	 * payload. sctp_data_chunk() will need to process the lead's
886	 * data chunk section, so leave it's rptr pointing at the data chunk.
887	 */
888	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
889	if (srp->sr_tail != NULL) {
890		srp->sr_got--;
891		ASSERT(srp->sr_got != 0);
892		if (srp->sr_needed != 0) {
893			srp->sr_needed--;
894			ASSERT(srp->sr_needed != 0);
895		}
896		srp->sr_msglen -= ntohs((*dc)->sdh_len);
897	}
898	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
899		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
900		qmp->b_rptr = (uchar_t *)(qdc + 1);
901
902		/*
903		 * Deduct the balance from got and needed here, now that
904		 * we know we are actually delivering these data.
905		 */
906		if (srp->sr_tail != NULL) {
907			srp->sr_got--;
908			ASSERT(srp->sr_got != 0);
909			if (srp->sr_needed != 0) {
910				srp->sr_needed--;
911				ASSERT(srp->sr_needed != 0);
912			}
913			srp->sr_msglen -= ntohs(qdc->sdh_len);
914		}
915	}
916	ASSERT(srp->sr_msglen == 0);
917	BUMP_LOCAL(sctp->sctp_reassmsgs);
918
919	return (dmp);
920}
921
922/*
923 * Handle received fragments for ordered delivery to upper layer protocol.
924 * Manage the per message reassembly queue and if this fragment completes
925 * reassembly of the message, or qualifies the already reassembled data
926 * for partial delivery, prepare the message for delivery upstream.
927 *
928 * tpfinished in the caller remains set only when the incoming fragment
929 * has completed the reassembly of the message associated with its ssn.
930 */
931static mblk_t *
932sctp_data_frag(sctp_t *sctp, mblk_t *dmp, sctp_data_hdr_t **dc, int *error,
933    sctp_instr_t *sip, boolean_t *tpfinished)
934{
935	mblk_t		*reassq_curr, *reassq_next, *reassq_prev;
936	mblk_t		*new_reassq;
937	mblk_t		*qmp;
938	mblk_t		*first_mp;
939	sctp_reass_t	*srp;
940	sctp_data_hdr_t	*qdc;
941	sctp_data_hdr_t	*bdc;
942	sctp_data_hdr_t	*edc;
943	uint32_t	tsn;
944	uint16_t	fraglen = 0;
945
946	*error = 0;
947
948	/*
949	 * Find the reassembly queue for this data chunk, if none
950	 * yet exists, a new per message queue will be created and
951	 * appended to the end of the list of per message queues.
952	 *
953	 * sip points on sctp_instr_t representing instream messages
954	 * as yet undelivered for this stream (sid) of the association.
955	 */
956	reassq_next = reassq_prev = sip->istr_reass;
957	for (; reassq_next != NULL; reassq_next = reassq_next->b_next) {
958		srp = (sctp_reass_t *)DB_BASE(reassq_next);
959		if (ntohs((*dc)->sdh_ssn) == srp->sr_ssn) {
960			reassq_curr = reassq_next;
961			goto foundit;
962		} else if (SSN_GT(srp->sr_ssn, ntohs((*dc)->sdh_ssn)))
963			break;
964		reassq_prev = reassq_next;
965	}
966
967	/*
968	 * First fragment of this message received, allocate a M_CTL that
969	 * will head the reassembly queue for this message. The message
970	 * and all its fragments are identified by having the same ssn.
971	 *
972	 * Arriving fragments will be inserted in tsn order on the
973	 * reassembly queue for this message (ssn), linked by b_cont.
974	 */
975	if ((new_reassq = allocb(sizeof (*srp), BPRI_MED)) == NULL) {
976		*error = ENOMEM;
977		return (NULL);
978	}
979	DB_TYPE(new_reassq) = M_CTL;
980	srp = (sctp_reass_t *)DB_BASE(new_reassq);
981	new_reassq->b_cont = dmp;
982
983	/*
984	 * All per ssn reassembly queues, (one for each message) on
985	 * this stream are doubly linked by b_next/b_prev back to the
986	 * instr_reass of the instream structure associated with this
987	 * stream id, (sip is initialized as sctp->sctp_instr[sid]).
988	 * Insert the new reassembly queue in the correct (ssn) order.
989	 */
990	if (reassq_next != NULL) {
991		if (sip->istr_reass == reassq_next) {
992			/* head insertion */
993			sip->istr_reass = new_reassq;
994			new_reassq->b_next = reassq_next;
995			new_reassq->b_prev = NULL;
996			reassq_next->b_prev = new_reassq;
997		} else {
998			/* mid queue insertion */
999			reassq_prev->b_next = new_reassq;
1000			new_reassq->b_prev = reassq_prev;
1001			new_reassq->b_next = reassq_next;
1002			reassq_next->b_prev = new_reassq;
1003		}
1004	} else {
1005		/* place new reassembly queue at the end */
1006		if (sip->istr_reass == NULL) {
1007			sip->istr_reass = new_reassq;
1008			new_reassq->b_prev = NULL;
1009		} else {
1010			reassq_prev->b_next = new_reassq;
1011			new_reassq->b_prev = reassq_prev;
1012		}
1013		new_reassq->b_next = NULL;
1014	}
1015	srp->sr_partial_delivered = B_FALSE;
1016	srp->sr_ssn = ntohs((*dc)->sdh_ssn);
1017	srp->sr_hasBchunk = B_FALSE;
1018empty_srp:
1019	srp->sr_needed = 0;
1020	srp->sr_got = 1;
1021	/* tail always the highest tsn on the reassembly queue for this ssn */
1022	srp->sr_tail = dmp;
1023	if (SCTP_DATA_GET_BBIT(*dc)) {
1024		/* Incoming frag is flagged as the beginning of message */
1025		srp->sr_msglen = ntohs((*dc)->sdh_len);
1026		srp->sr_nexttsn = ntohl((*dc)->sdh_tsn) + 1;
1027		srp->sr_hasBchunk = B_TRUE;
1028	} else if (srp->sr_partial_delivered &&
1029	    srp->sr_nexttsn == ntohl((*dc)->sdh_tsn)) {
1030		/*
1031		 * The real beginning fragment of the message was already
1032		 * delivered upward, so this is the earliest frag expected.
1033		 * Fake the B-bit then see if this frag also completes the
1034		 * message.
1035		 */
1036		SCTP_DATA_SET_BBIT(*dc);
1037		srp->sr_hasBchunk = B_TRUE;
1038		srp->sr_msglen = ntohs((*dc)->sdh_len);
1039		if (SCTP_DATA_GET_EBIT(*dc)) {
1040			/* This frag is marked as the end of message */
1041			srp->sr_needed = 1;
1042			/* Got all fragments of this message now */
1043			goto frag_done;
1044		}
1045		srp->sr_nexttsn++;
1046	}
1047
1048	/* The only fragment of this message currently queued */
1049	*tpfinished = B_FALSE;
1050	return (NULL);
1051foundit:
1052	/*
1053	 * This message already has a reassembly queue. Insert the new frag
1054	 * in the reassembly queue. Try the tail first, on the assumption
1055	 * that the fragments are arriving in order.
1056	 */
1057	qmp = srp->sr_tail;
1058
1059	/*
1060	 * A NULL tail means all existing fragments of the message have
1061	 * been entirely consumed during a partially delivery.
1062	 */
1063	if (qmp == NULL) {
1064		ASSERT(srp->sr_got == 0 && srp->sr_needed == 0 &&
1065		    srp->sr_partial_delivered);
1066		ASSERT(reassq_curr->b_cont == NULL);
1067		reassq_curr->b_cont = dmp;
1068		goto empty_srp;
1069	} else {
1070		/*
1071		 * If partial delivery did take place but the next arriving
1072		 * fragment was not the next to be delivered, or partial
1073		 * delivery broke off due to a gap, fragments remain on the
1074		 * tail. The next fragment due to be delivered still has to
1075		 * be set as the new head of list upon arrival. Fake B-bit
1076		 * on that frag then see if it also completes the message.
1077		 */
1078		if (srp->sr_partial_delivered &&
1079		    srp->sr_nexttsn == ntohl((*dc)->sdh_tsn)) {
1080			SCTP_DATA_SET_BBIT(*dc);
1081			srp->sr_hasBchunk = B_TRUE;
1082			if (SCTP_DATA_GET_EBIT(*dc)) {
1083				/* Got all fragments of this message now */
1084				goto frag_done;
1085			}
1086		}
1087	}
1088
1089	/* grab the frag header of already queued tail frag for comparison */
1090	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1091	ASSERT(qmp->b_cont == NULL);
1092
1093	/* check if the frag goes on the tail in order */
1094	if (SEQ_GT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1095		qmp->b_cont = dmp;
1096		srp->sr_tail = dmp;
1097		dmp->b_cont = NULL;
1098		if (srp->sr_hasBchunk && srp->sr_nexttsn ==
1099		    ntohl((*dc)->sdh_tsn)) {
1100			srp->sr_msglen += ntohs((*dc)->sdh_len);
1101			srp->sr_nexttsn++;
1102		}
1103		goto inserted;
1104	}
1105
1106	/* Next check if we should insert this frag at the beginning */
1107	qmp = reassq_curr->b_cont;
1108	qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1109	if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1110		dmp->b_cont = qmp;
1111		reassq_curr->b_cont = dmp;
1112		if (SCTP_DATA_GET_BBIT(*dc)) {
1113			srp->sr_hasBchunk = B_TRUE;
1114			srp->sr_nexttsn = ntohl((*dc)->sdh_tsn);
1115		}
1116		goto preinserted;
1117	}
1118
1119	/* Insert this frag in it's correct order in the middle */
1120	for (;;) {
1121		/* Tail check above should have caught this */
1122		ASSERT(qmp->b_cont != NULL);
1123
1124		qdc = (sctp_data_hdr_t *)qmp->b_cont->b_rptr;
1125		if (SEQ_LT(ntohl((*dc)->sdh_tsn), ntohl(qdc->sdh_tsn))) {
1126			/* insert here */
1127			dmp->b_cont = qmp->b_cont;
1128			qmp->b_cont = dmp;
1129			break;
1130		}
1131		qmp = qmp->b_cont;
1132	}
1133preinserted:
1134	/*
1135	 * Need head of message and to be due to deliver, otherwise skip
1136	 * the recalculation of the message length below.
1137	 */
1138	if (!srp->sr_hasBchunk || ntohl((*dc)->sdh_tsn) != srp->sr_nexttsn)
1139		goto inserted;
1140	/*
1141	 * fraglen contains the length of consecutive chunks of fragments.
1142	 * starting from the chunk we just inserted.
1143	 */
1144	tsn = srp->sr_nexttsn;
1145	for (qmp = dmp; qmp != NULL; qmp = qmp->b_cont) {
1146		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1147		if (tsn != ntohl(qdc->sdh_tsn))
1148			break;
1149		fraglen += ntohs(qdc->sdh_len);
1150		tsn++;
1151	}
1152	srp->sr_nexttsn = tsn;
1153	srp->sr_msglen += fraglen;
1154inserted:
1155	srp->sr_got++;
1156	first_mp = reassq_curr->b_cont;
1157	/* Prior to this frag either the beginning or end frag was missing */
1158	if (srp->sr_needed == 0) {
1159		/* used to check if we have the first and last fragments */
1160		bdc = (sctp_data_hdr_t *)first_mp->b_rptr;
1161		edc = (sctp_data_hdr_t *)srp->sr_tail->b_rptr;
1162
1163		/*
1164		 * If we now have both the beginning and the end of the message,
1165		 * calculate how many fragments in the complete message.
1166		 */
1167		if (SCTP_DATA_GET_BBIT(bdc) && SCTP_DATA_GET_EBIT(edc)) {
1168			srp->sr_needed = ntohl(edc->sdh_tsn) -
1169			    ntohl(bdc->sdh_tsn) + 1;
1170		}
1171	}
1172
1173	/*
1174	 * Try partial delivery if the message length has exceeded the
1175	 * partial delivery point. Only do this if we can immediately
1176	 * deliver the partially assembled message, and only partially
1177	 * deliver one message at a time (i.e. messages cannot be
1178	 * intermixed arriving at the upper layer).
1179	 * sctp_try_partial_delivery() will return a message consisting
1180	 * of only consecutive fragments.
1181	 */
1182	if (srp->sr_needed != srp->sr_got) {
1183		/* we don't have the full message yet */
1184		dmp = NULL;
1185		if (ntohl((*dc)->sdh_tsn) <= sctp->sctp_ftsn &&
1186		    srp->sr_msglen >= sctp->sctp_pd_point &&
1187		    srp->sr_ssn == sip->nextseq) {
1188			dmp = sctp_try_partial_delivery(sctp, reassq_curr,
1189			    srp, dc);
1190		}
1191		*tpfinished = B_FALSE;
1192		/*
1193		 * NULL unless a segment of the message now qualified for
1194		 * partial_delivery and has been prepared for delivery by
1195		 * sctp_try_partial_delivery().
1196		 */
1197		return (dmp);
1198	}
1199frag_done:
1200	/*
1201	 * Reassembly complete for this message, prepare the data for delivery.
1202	 * First unlink the reassembly queue for this ssn from the list of
1203	 * messages in reassembly.
1204	 */
1205	if (sip->istr_reass == reassq_curr) {
1206		sip->istr_reass = reassq_curr->b_next;
1207		if (reassq_curr->b_next)
1208			reassq_curr->b_next->b_prev = NULL;
1209	} else {
1210		ASSERT(reassq_curr->b_prev != NULL);
1211		reassq_curr->b_prev->b_next = reassq_curr->b_next;
1212		if (reassq_curr->b_next)
1213			reassq_curr->b_next->b_prev = reassq_curr->b_prev;
1214	}
1215
1216	/*
1217	 * Need to clean up b_prev and b_next as freeb() will
1218	 * ASSERT that they are unused.
1219	 */
1220	reassq_curr->b_next = NULL;
1221	reassq_curr->b_prev = NULL;
1222
1223	dmp = reassq_curr;
1224	/* point to the head of the reassembled data message */
1225	dmp = dmp->b_cont;
1226	reassq_curr->b_cont = NULL;
1227	freeb(reassq_curr);
1228	/* Tell our caller that we are returning a complete message. */
1229	*tpfinished = B_TRUE;
1230
1231	/*
1232	 * Adjust all mblk's except the lead so their rptr's point to the
1233	 * payload. sctp_data_chunk() will need to process the lead's data
1234	 * data chunk section, so leave its rptr pointing at the data chunk
1235	 * header.
1236	 */
1237	*dc = (sctp_data_hdr_t *)dmp->b_rptr;
1238	for (qmp = dmp->b_cont; qmp != NULL; qmp = qmp->b_cont) {
1239		qdc = (sctp_data_hdr_t *)qmp->b_rptr;
1240		qmp->b_rptr = (uchar_t *)(qdc + 1);
1241	}
1242	BUMP_LOCAL(sctp->sctp_reassmsgs);
1243
1244	return (dmp);
1245}
1246
1247static void
1248sctp_add_dup(uint32_t tsn, mblk_t **dups)
1249{
1250	mblk_t *mp;
1251	size_t bsize = SCTP_DUP_MBLK_SZ * sizeof (tsn);
1252
1253	if (dups == NULL) {
1254		return;
1255	}
1256
1257	/* first time? */
1258	if (*dups == NULL) {
1259		*dups = allocb(bsize, BPRI_MED);
1260		if (*dups == NULL) {
1261			return;
1262		}
1263	}
1264
1265	mp = *dups;
1266	if ((mp->b_wptr - mp->b_rptr) >= bsize) {
1267		/* maximum reached */
1268		return;
1269	}
1270
1271	/* add the duplicate tsn */
1272	bcopy(&tsn, mp->b_wptr, sizeof (tsn));
1273	mp->b_wptr += sizeof (tsn);
1274	ASSERT((mp->b_wptr - mp->b_rptr) <= bsize);
1275}
1276
1277/*
1278 * All incoming sctp data, complete messages and fragments are handled by
1279 * this function. Unless the U-bit is set in the data chunk it will be
1280 * delivered in order or queued until an in-order delivery can be made.
1281 */
1282static void
1283sctp_data_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, mblk_t *mp, mblk_t **dups,
1284    sctp_faddr_t *fp, ip_pkt_t *ipp, ip_recv_attr_t *ira)
1285{
1286	sctp_data_hdr_t *dc;
1287	mblk_t *dmp, *pmp;
1288	sctp_instr_t *instr;
1289	int ubit;
1290	int sid;
1291	int isfrag;
1292	uint16_t ssn;
1293	uint32_t oftsn;
1294	boolean_t can_deliver = B_TRUE;
1295	uint32_t tsn;
1296	int dlen;
1297	boolean_t tpfinished = B_TRUE;
1298	sctp_stack_t	*sctps = sctp->sctp_sctps;
1299	int	error;
1300
1301	/* The following are used multiple times, so we inline them */
1302#define	SCTP_ACK_IT(sctp, tsn)						\
1303	if (tsn == sctp->sctp_ftsn) {					\
1304		dprint(2, ("data_chunk: acking next %x\n", tsn));	\
1305		(sctp)->sctp_ftsn++;					\
1306		if ((sctp)->sctp_sack_gaps > 0)				\
1307			(sctp)->sctp_force_sack = 1;			\
1308	} else if (SEQ_GT(tsn, sctp->sctp_ftsn)) {			\
1309		/* Got a gap; record it */				\
1310		BUMP_LOCAL(sctp->sctp_outseqtsns);			\
1311		dprint(2, ("data_chunk: acking gap %x\n", tsn));	\
1312		sctp_ack_add(&sctp->sctp_sack_info, tsn,		\
1313		    &sctp->sctp_sack_gaps);				\
1314		sctp->sctp_force_sack = 1;				\
1315	}
1316
1317	dmp = NULL;
1318
1319	dc = (sctp_data_hdr_t *)ch;
1320	tsn = ntohl(dc->sdh_tsn);
1321
1322	dprint(3, ("sctp_data_chunk: mp=%p tsn=%x\n", (void *)mp, tsn));
1323
1324	/* Check for duplicates */
1325	if (SEQ_LT(tsn, sctp->sctp_ftsn)) {
1326		dprint(4, ("sctp_data_chunk: dropping duplicate\n"));
1327		BUMP_LOCAL(sctp->sctp_idupchunks);
1328		sctp->sctp_force_sack = 1;
1329		sctp_add_dup(dc->sdh_tsn, dups);
1330		return;
1331	}
1332
1333	/* Check for dups of sack'ed data */
1334	if (sctp->sctp_sack_info != NULL) {
1335		sctp_set_t *sp;
1336
1337		for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1338			if (SEQ_GEQ(tsn, sp->begin) && SEQ_LEQ(tsn, sp->end)) {
1339				dprint(4,
1340				    ("sctp_data_chunk: dropping dup > "
1341				    "cumtsn\n"));
1342				BUMP_LOCAL(sctp->sctp_idupchunks);
1343				sctp->sctp_force_sack = 1;
1344				sctp_add_dup(dc->sdh_tsn, dups);
1345				return;
1346			}
1347		}
1348	}
1349
1350	/* We can no longer deliver anything up, but still need to handle it. */
1351	if (SCTP_IS_DETACHED(sctp)) {
1352		SCTPS_BUMP_MIB(sctps, sctpInClosed);
1353		can_deliver = B_FALSE;
1354	}
1355
1356	dlen = ntohs(dc->sdh_len) - sizeof (*dc);
1357
1358	/*
1359	 * Check for buffer space. Note if this is the next expected TSN
1360	 * we have to take it to avoid deadlock because we cannot deliver
1361	 * later queued TSNs and thus clear buffer space without it.
1362	 * We drop anything that is purely zero window probe data here.
1363	 */
1364	if ((sctp->sctp_rwnd - sctp->sctp_rxqueued < dlen) &&
1365	    (tsn != sctp->sctp_ftsn || sctp->sctp_rwnd == 0)) {
1366		/* Drop and SACK, but don't advance the cumulative TSN. */
1367		sctp->sctp_force_sack = 1;
1368		dprint(0, ("sctp_data_chunk: exceed rwnd %d rxqueued %d "
1369		    "dlen %d ssn %d tsn %x\n", sctp->sctp_rwnd,
1370		    sctp->sctp_rxqueued, dlen, ntohs(dc->sdh_ssn),
1371		    ntohl(dc->sdh_tsn)));
1372		return;
1373	}
1374
1375	sid = ntohs(dc->sdh_sid);
1376
1377	/* Data received for a stream not negotiated for this association */
1378	if (sid >= sctp->sctp_num_istr) {
1379		sctp_bsc_t	inval_parm;
1380
1381		/* Will populate the CAUSE block in the ERROR chunk. */
1382		inval_parm.bsc_sid = dc->sdh_sid;
1383		/* RESERVED, ignored at the receiving end */
1384		inval_parm.bsc_pad = 0;
1385
1386		/* ack and drop it */
1387		sctp_add_err(sctp, SCTP_ERR_BAD_SID, (void *)&inval_parm,
1388		    sizeof (sctp_bsc_t), fp);
1389		SCTP_ACK_IT(sctp, tsn);
1390		return;
1391	}
1392
1393	/* unordered delivery OK for this data if ubit set */
1394	ubit = SCTP_DATA_GET_UBIT(dc);
1395	ASSERT(sctp->sctp_instr != NULL);
1396
1397	/* select per stream structure for this stream from the array */
1398	instr = &sctp->sctp_instr[sid];
1399	/* Initialize the stream, if not yet used */
1400	if (instr->sctp == NULL)
1401		instr->sctp = sctp;
1402
1403	/* Begin and End bit set would mean a complete message */
1404	isfrag = !(SCTP_DATA_GET_BBIT(dc) && SCTP_DATA_GET_EBIT(dc));
1405
1406	/* The ssn of this sctp message and of any fragments in it */
1407	ssn = ntohs(dc->sdh_ssn);
1408
1409	dmp = dupb(mp);
1410	if (dmp == NULL) {
1411		/* drop it and don't ack, let the peer retransmit */
1412		return;
1413	}
1414	/*
1415	 * Past header and payload, note: the underlying buffer may
1416	 * contain further chunks from the same incoming IP packet,
1417	 * if so db_ref will be greater than one.
1418	 */
1419	dmp->b_wptr = (uchar_t *)ch + ntohs(ch->sch_len);
1420
1421	sctp->sctp_rxqueued += dlen;
1422
1423	oftsn = sctp->sctp_ftsn;
1424
1425	if (isfrag) {
1426
1427		error = 0;
1428		/* fragmented data chunk */
1429		dmp->b_rptr = (uchar_t *)dc;
1430		if (ubit) {
1431			/* prepare data for unordered delivery */
1432			dmp = sctp_uodata_frag(sctp, dmp, &dc);
1433#if	DEBUG
1434			if (dmp != NULL) {
1435				ASSERT(instr ==
1436				    &sctp->sctp_instr[sid]);
1437			}
1438#endif
1439		} else {
1440			/*
1441			 * Assemble fragments and queue for ordered delivery,
1442			 * dmp returned is NULL or the head of a complete or
1443			 * "partial delivery" message. Any returned message
1444			 * and all its fragments will have the same ssn as the
1445			 * input fragment currently being handled.
1446			 */
1447			dmp = sctp_data_frag(sctp, dmp, &dc, &error, instr,
1448			    &tpfinished);
1449		}
1450		if (error == ENOMEM) {
1451			/* back out the adjustment made earlier */
1452			sctp->sctp_rxqueued -= dlen;
1453			/*
1454			 * Don't ack the segment,
1455			 * the peer will retransmit.
1456			 */
1457			return;
1458		}
1459
1460		if (dmp == NULL) {
1461			/*
1462			 * The frag has been queued for later in-order delivery,
1463			 * but the cumulative TSN may need to advance, so also
1464			 * need to perform the gap ack checks at the done label.
1465			 */
1466			SCTP_ACK_IT(sctp, tsn);
1467			DTRACE_PROBE4(sctp_data_frag_queued, sctp_t *, sctp,
1468			    int, sid, int, tsn, uint16_t, ssn);
1469			goto done;
1470		}
1471	}
1472
1473	/*
1474	 * Unless message is the next for delivery to the ulp, queue complete
1475	 * message in the correct order for ordered delivery.
1476	 * Note: tpfinished is true when the incoming chunk contains a complete
1477	 * message or is the final missing fragment which completed a message.
1478	 */
1479	if (!ubit && tpfinished && ssn != instr->nextseq) {
1480		/* Adjust rptr to point at the data chunk for compares */
1481		dmp->b_rptr = (uchar_t *)dc;
1482
1483		dprint(2,
1484		    ("data_chunk: inserted %x in pq (ssn %d expected %d)\n",
1485		    ntohl(dc->sdh_tsn), (int)(ssn), (int)(instr->nextseq)));
1486
1487		if (instr->istr_msgs == NULL) {
1488			instr->istr_msgs = dmp;
1489			ASSERT(dmp->b_prev == NULL && dmp->b_next == NULL);
1490		} else {
1491			mblk_t			*imblk = instr->istr_msgs;
1492			sctp_data_hdr_t		*idc;
1493
1494			/*
1495			 * XXXNeed to take sequence wraps into account,
1496			 * ... and a more efficient insertion algo.
1497			 */
1498			for (;;) {
1499				idc = (sctp_data_hdr_t *)imblk->b_rptr;
1500				if (SSN_GT(ntohs(idc->sdh_ssn),
1501				    ntohs(dc->sdh_ssn))) {
1502					if (instr->istr_msgs == imblk) {
1503						instr->istr_msgs = dmp;
1504						dmp->b_next = imblk;
1505						imblk->b_prev = dmp;
1506					} else {
1507						ASSERT(imblk->b_prev != NULL);
1508						imblk->b_prev->b_next = dmp;
1509						dmp->b_prev = imblk->b_prev;
1510						imblk->b_prev = dmp;
1511						dmp->b_next = imblk;
1512					}
1513					break;
1514				}
1515				if (imblk->b_next == NULL) {
1516					imblk->b_next = dmp;
1517					dmp->b_prev = imblk;
1518					break;
1519				}
1520				imblk = imblk->b_next;
1521			}
1522		}
1523		(instr->istr_nmsgs)++;
1524		(sctp->sctp_istr_nmsgs)++;
1525		SCTP_ACK_IT(sctp, tsn);
1526		DTRACE_PROBE4(sctp_pqueue_completemsg, sctp_t *, sctp,
1527		    int, sid, int, tsn, uint16_t, ssn);
1528		return;
1529	}
1530
1531	/*
1532	 * Deliver the data directly. Recalculate dlen now since
1533	 * we may have just reassembled this data.
1534	 */
1535	dlen = dmp->b_wptr - (uchar_t *)dc - sizeof (*dc);
1536	for (pmp = dmp->b_cont; pmp != NULL; pmp = pmp->b_cont)
1537		dlen += MBLKL(pmp);
1538	ASSERT(sctp->sctp_rxqueued >= dlen);
1539
1540	/* Deliver the message. */
1541	sctp->sctp_rxqueued -= dlen;
1542
1543	if (can_deliver) {
1544		/* step past header to the payload */
1545		dmp->b_rptr = (uchar_t *)(dc + 1);
1546		if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1547		    ipp, ira) == 0) {
1548			dprint(1, ("sctp_data_chunk: delivering %lu bytes\n",
1549			    msgdsize(dmp)));
1550			/*
1551			 * We overload the meaning of b_flag for SCTP sockfs
1552			 * internal use, to advise sockfs of partial delivery
1553			 * semantics.
1554			 */
1555			dmp->b_flag = tpfinished ? 0 : SCTP_PARTIAL_DATA;
1556			if (sctp->sctp_flowctrld) {
1557				sctp->sctp_rwnd -= dlen;
1558				if (sctp->sctp_rwnd < 0)
1559					sctp->sctp_rwnd = 0;
1560			}
1561			if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1562			    msgdsize(dmp), 0, &error, NULL) <= 0) {
1563				sctp->sctp_flowctrld = B_TRUE;
1564			}
1565			SCTP_ACK_IT(sctp, tsn);
1566		} else {
1567			/* No memory don't ack, the peer will retransmit. */
1568			freemsg(dmp);
1569			return;
1570		}
1571	} else {
1572		/* Closed above, ack to peer and free the data */
1573		freemsg(dmp);
1574		SCTP_ACK_IT(sctp, tsn);
1575	}
1576
1577	/*
1578	 * Data now enqueued, may already have been processed and free'd
1579	 * by the ULP (or we may have just freed it above, if we could not
1580	 * deliver), so we must not reference it (this is why we saved the
1581	 * ssn and ubit earlier).
1582	 */
1583	if (ubit != 0) {
1584		BUMP_LOCAL(sctp->sctp_iudchunks);
1585		goto done;
1586	}
1587	BUMP_LOCAL(sctp->sctp_idchunks);
1588
1589	/*
1590	 * There was a partial delivery and it has not finished,
1591	 * don't pull anything from the pqueues or increment the
1592	 * nextseq. This msg must complete before starting on
1593	 * the next ssn and the partial message must have the
1594	 * same ssn as the next expected message..
1595	 */
1596	if (!tpfinished) {
1597		DTRACE_PROBE4(sctp_partial_delivery, sctp_t *, sctp,
1598		    int, sid, int, tsn, uint16_t, ssn);
1599		/*
1600		 * Verify the partial delivery is part of the
1601		 * message expected for ordered delivery.
1602		 */
1603		if (ssn != instr->nextseq) {
1604			DTRACE_PROBE4(sctp_partial_delivery_error,
1605			    sctp_t *, sctp, int, sid, int, tsn,
1606			    uint16_t, ssn);
1607			cmn_err(CE_WARN, "sctp partial"
1608			    " delivery error, sctp 0x%p"
1609			    " sid = 0x%x ssn != nextseq"
1610			    " tsn 0x%x ftsn 0x%x"
1611			    " ssn 0x%x nextseq 0x%x",
1612			    (void *)sctp, sid,
1613			    tsn, sctp->sctp_ftsn, ssn,
1614			    instr->nextseq);
1615		}
1616
1617		ASSERT(ssn == instr->nextseq);
1618		goto done;
1619	}
1620
1621	if (ssn != instr->nextseq) {
1622		DTRACE_PROBE4(sctp_inorder_delivery_error,
1623		    sctp_t *, sctp, int, sid, int, tsn,
1624		    uint16_t, ssn);
1625		cmn_err(CE_WARN, "sctp in-order delivery error, sctp 0x%p "
1626		    "sid = 0x%x ssn != nextseq ssn 0x%x nextseq 0x%x",
1627		    (void *)sctp, sid, ssn, instr->nextseq);
1628	}
1629
1630	ASSERT(ssn == instr->nextseq);
1631
1632	DTRACE_PROBE4(sctp_deliver_completemsg, sctp_t *, sctp, int, sid,
1633	    int, tsn, uint16_t, ssn);
1634
1635	instr->nextseq = ssn + 1;
1636
1637	/*
1638	 * Deliver any successive data chunks waiting in the instr pqueue
1639	 * for the data just sent up.
1640	 */
1641	while (instr->istr_nmsgs > 0) {
1642		dmp = (mblk_t *)instr->istr_msgs;
1643		dc = (sctp_data_hdr_t *)dmp->b_rptr;
1644		ssn = ntohs(dc->sdh_ssn);
1645		tsn = ntohl(dc->sdh_tsn);
1646		/* Stop at the first gap in the sequence */
1647		if (ssn != instr->nextseq)
1648			break;
1649
1650		DTRACE_PROBE4(sctp_deliver_pqueuedmsg, sctp_t *, sctp,
1651		    int, sid, int, tsn, uint16_t, ssn);
1652		/*
1653		 * Ready to deliver all data before the gap
1654		 * to the upper layer.
1655		 */
1656		(instr->istr_nmsgs)--;
1657		(instr->nextseq)++;
1658		(sctp->sctp_istr_nmsgs)--;
1659
1660		instr->istr_msgs = instr->istr_msgs->b_next;
1661		if (instr->istr_msgs != NULL)
1662			instr->istr_msgs->b_prev = NULL;
1663		dmp->b_next = dmp->b_prev = NULL;
1664
1665		dprint(2, ("data_chunk: pulling %x from pq (ssn %d)\n",
1666		    ntohl(dc->sdh_tsn), (int)ssn));
1667
1668		/*
1669		 * Composite messages indicate this chunk was reassembled,
1670		 * each b_cont represents another TSN; Follow the chain to
1671		 * reach the frag with the last tsn in order to advance ftsn
1672		 * shortly by calling SCTP_ACK_IT().
1673		 */
1674		dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
1675		for (pmp = dmp->b_cont; pmp; pmp = pmp->b_cont)
1676			dlen += MBLKL(pmp);
1677
1678		ASSERT(sctp->sctp_rxqueued >= dlen);
1679
1680		sctp->sctp_rxqueued -= dlen;
1681		if (can_deliver) {
1682			dmp->b_rptr = (uchar_t *)(dc + 1);
1683			if (sctp_input_add_ancillary(sctp, &dmp, dc, fp,
1684			    ipp, ira) == 0) {
1685				dprint(1, ("sctp_data_chunk: delivering %lu "
1686				    "bytes\n", msgdsize(dmp)));
1687				/*
1688				 * Meaning of b_flag overloaded for SCTP sockfs
1689				 * internal use, advise sockfs of partial
1690				 * delivery semantics.
1691				 */
1692				dmp->b_flag = tpfinished ?
1693				    0 : SCTP_PARTIAL_DATA;
1694				if (sctp->sctp_flowctrld) {
1695					sctp->sctp_rwnd -= dlen;
1696					if (sctp->sctp_rwnd < 0)
1697						sctp->sctp_rwnd = 0;
1698				}
1699				if (sctp->sctp_ulp_recv(sctp->sctp_ulpd, dmp,
1700				    msgdsize(dmp), 0, &error, NULL) <= 0) {
1701					sctp->sctp_flowctrld = B_TRUE;
1702				}
1703				SCTP_ACK_IT(sctp, tsn);
1704			} else {
1705				/* don't ack, the peer will retransmit */
1706				freemsg(dmp);
1707				return;
1708			}
1709		} else {
1710			/* Closed above, ack and free the data */
1711			freemsg(dmp);
1712			SCTP_ACK_IT(sctp, tsn);
1713		}
1714	}
1715
1716done:
1717
1718	/*
1719	 * If there are gap reports pending, check if advancing
1720	 * the ftsn here closes a gap. If so, we can advance
1721	 * ftsn to the end of the set.
1722	 */
1723	if (sctp->sctp_sack_info != NULL &&
1724	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
1725		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
1726	}
1727	/*
1728	 * If ftsn has moved forward, maybe we can remove gap reports.
1729	 * NB: dmp may now be NULL, so don't dereference it here.
1730	 */
1731	if (oftsn != sctp->sctp_ftsn && sctp->sctp_sack_info != NULL) {
1732		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
1733		    &sctp->sctp_sack_gaps);
1734		dprint(2, ("data_chunk: removed acks before %x (num=%d)\n",
1735		    sctp->sctp_ftsn - 1, sctp->sctp_sack_gaps));
1736	}
1737
1738#ifdef	DEBUG
1739	if (sctp->sctp_sack_info != NULL) {
1740		ASSERT(sctp->sctp_ftsn != sctp->sctp_sack_info->begin);
1741	}
1742#endif
1743
1744#undef	SCTP_ACK_IT
1745}
1746
1747void
1748sctp_fill_sack(sctp_t *sctp, unsigned char *dst, int sacklen)
1749{
1750	sctp_chunk_hdr_t *sch;
1751	sctp_sack_chunk_t *sc;
1752	sctp_sack_frag_t *sf;
1753	uint16_t num_gaps = sctp->sctp_sack_gaps;
1754	sctp_set_t *sp;
1755
1756	/* Chunk hdr */
1757	sch = (sctp_chunk_hdr_t *)dst;
1758	sch->sch_id = CHUNK_SACK;
1759	sch->sch_flags = 0;
1760	sch->sch_len = htons(sacklen);
1761
1762	/* SACK chunk */
1763	sctp->sctp_lastacked = sctp->sctp_ftsn - 1;
1764
1765	sc = (sctp_sack_chunk_t *)(sch + 1);
1766	sc->ssc_cumtsn = htonl(sctp->sctp_lastacked);
1767	if (sctp->sctp_rxqueued < sctp->sctp_rwnd) {
1768		sc->ssc_a_rwnd = htonl(sctp->sctp_rwnd - sctp->sctp_rxqueued);
1769	} else {
1770		sc->ssc_a_rwnd = 0;
1771	}
1772	/* Remember the last window sent to peer. */
1773	sctp->sctp_arwnd = sc->ssc_a_rwnd;
1774	sc->ssc_numfrags = htons(num_gaps);
1775	sc->ssc_numdups = 0;
1776
1777	/* lay in gap reports */
1778	sf = (sctp_sack_frag_t *)(sc + 1);
1779	for (sp = sctp->sctp_sack_info; sp; sp = sp->next) {
1780		uint16_t offset;
1781
1782		/* start */
1783		if (sp->begin > sctp->sctp_lastacked) {
1784			offset = (uint16_t)(sp->begin - sctp->sctp_lastacked);
1785		} else {
1786			/* sequence number wrap */
1787			offset = (uint16_t)(UINT32_MAX - sctp->sctp_lastacked +
1788			    sp->begin);
1789		}
1790		sf->ssf_start = htons(offset);
1791
1792		/* end */
1793		if (sp->end >= sp->begin) {
1794			offset += (uint16_t)(sp->end - sp->begin);
1795		} else {
1796			/* sequence number wrap */
1797			offset += (uint16_t)(UINT32_MAX - sp->begin + sp->end);
1798		}
1799		sf->ssf_end = htons(offset);
1800
1801		sf++;
1802		/* This is just for debugging (a la the following assertion) */
1803		num_gaps--;
1804	}
1805
1806	ASSERT(num_gaps == 0);
1807
1808	/* If the SACK timer is running, stop it */
1809	if (sctp->sctp_ack_timer_running) {
1810		sctp_timer_stop(sctp->sctp_ack_mp);
1811		sctp->sctp_ack_timer_running = B_FALSE;
1812	}
1813
1814	BUMP_LOCAL(sctp->sctp_obchunks);
1815	BUMP_LOCAL(sctp->sctp_osacks);
1816}
1817
1818mblk_t *
1819sctp_make_sack(sctp_t *sctp, sctp_faddr_t *sendto, mblk_t *dups)
1820{
1821	mblk_t *smp;
1822	size_t slen;
1823	sctp_chunk_hdr_t *sch;
1824	sctp_sack_chunk_t *sc;
1825	int32_t acks_max;
1826	sctp_stack_t	*sctps = sctp->sctp_sctps;
1827	uint32_t	dups_len;
1828	sctp_faddr_t	*fp;
1829
1830	ASSERT(sendto != NULL);
1831
1832	if (sctp->sctp_force_sack) {
1833		sctp->sctp_force_sack = 0;
1834		goto checks_done;
1835	}
1836
1837	acks_max = sctps->sctps_deferred_acks_max;
1838	if (sctp->sctp_state == SCTPS_ESTABLISHED) {
1839		if (sctp->sctp_sack_toggle < acks_max) {
1840			/* no need to SACK right now */
1841			dprint(2, ("sctp_make_sack: %p no sack (toggle)\n",
1842			    (void *)sctp));
1843			return (NULL);
1844		} else if (sctp->sctp_sack_toggle >= acks_max) {
1845			sctp->sctp_sack_toggle = 0;
1846		}
1847	}
1848
1849	if (sctp->sctp_ftsn == sctp->sctp_lastacked + 1) {
1850		dprint(2, ("sctp_make_sack: %p no sack (already)\n",
1851		    (void *)sctp));
1852		return (NULL);
1853	}
1854
1855checks_done:
1856	dprint(2, ("sctp_make_sack: acking %x\n", sctp->sctp_ftsn - 1));
1857
1858	if (dups != NULL)
1859		dups_len = MBLKL(dups);
1860	else
1861		dups_len = 0;
1862	slen = sizeof (*sch) + sizeof (*sc) +
1863	    (sizeof (sctp_sack_frag_t) * sctp->sctp_sack_gaps);
1864
1865	/*
1866	 * If there are error chunks, check and see if we can send the
1867	 * SACK chunk and error chunks together in one packet.  If not,
1868	 * send the error chunks out now.
1869	 */
1870	if (sctp->sctp_err_chunks != NULL) {
1871		fp = SCTP_CHUNK_DEST(sctp->sctp_err_chunks);
1872		if (sctp->sctp_err_len + slen + dups_len > fp->sf_pmss) {
1873			if ((smp = sctp_make_mp(sctp, fp, 0)) == NULL) {
1874				SCTP_KSTAT(sctps, sctp_send_err_failed);
1875				SCTP_KSTAT(sctps, sctp_send_sack_failed);
1876				freemsg(sctp->sctp_err_chunks);
1877				sctp->sctp_err_chunks = NULL;
1878				sctp->sctp_err_len = 0;
1879				return (NULL);
1880			}
1881			smp->b_cont = sctp->sctp_err_chunks;
1882			sctp_set_iplen(sctp, smp, fp->sf_ixa);
1883			(void) conn_ip_output(smp, fp->sf_ixa);
1884			BUMP_LOCAL(sctp->sctp_opkts);
1885			sctp->sctp_err_chunks = NULL;
1886			sctp->sctp_err_len = 0;
1887		}
1888	}
1889	smp = sctp_make_mp(sctp, sendto, slen);
1890	if (smp == NULL) {
1891		SCTP_KSTAT(sctps, sctp_send_sack_failed);
1892		return (NULL);
1893	}
1894	sch = (sctp_chunk_hdr_t *)smp->b_wptr;
1895
1896	sctp_fill_sack(sctp, smp->b_wptr, slen);
1897	smp->b_wptr += slen;
1898	if (dups != NULL) {
1899		sc = (sctp_sack_chunk_t *)(sch + 1);
1900		sc->ssc_numdups = htons(MBLKL(dups) / sizeof (uint32_t));
1901		sch->sch_len = htons(slen + dups_len);
1902		smp->b_cont = dups;
1903	}
1904
1905	if (sctp->sctp_err_chunks != NULL) {
1906		linkb(smp, sctp->sctp_err_chunks);
1907		sctp->sctp_err_chunks = NULL;
1908		sctp->sctp_err_len = 0;
1909	}
1910	return (smp);
1911}
1912
1913/*
1914 * Check and see if we need to send a SACK chunk.  If it is needed,
1915 * send it out.  Return true if a SACK chunk is sent, false otherwise.
1916 */
1917boolean_t
1918sctp_sack(sctp_t *sctp, mblk_t *dups)
1919{
1920	mblk_t *smp;
1921	sctp_stack_t	*sctps = sctp->sctp_sctps;
1922
1923	/* If we are shutting down, let send_shutdown() bundle the SACK */
1924	if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
1925		sctp_send_shutdown(sctp, 0);
1926	}
1927
1928	ASSERT(sctp->sctp_lastdata != NULL);
1929
1930	if ((smp = sctp_make_sack(sctp, sctp->sctp_lastdata, dups)) == NULL) {
1931		/* The caller of sctp_sack() will not free the dups mblk. */
1932		if (dups != NULL)
1933			freeb(dups);
1934		return (B_FALSE);
1935	}
1936	dprint(2, ("sctp_sack: sending to %p %x:%x:%x:%x\n",
1937	    (void *)sctp->sctp_lastdata,
1938	    SCTP_PRINTADDR(sctp->sctp_lastdata->sf_faddr)));
1939
1940	sctp->sctp_active = LBOLT_FASTPATH64;
1941
1942	SCTPS_BUMP_MIB(sctps, sctpOutAck);
1943
1944	sctp_set_iplen(sctp, smp, sctp->sctp_lastdata->sf_ixa);
1945	(void) conn_ip_output(smp, sctp->sctp_lastdata->sf_ixa);
1946	BUMP_LOCAL(sctp->sctp_opkts);
1947	return (B_TRUE);
1948}
1949
1950/*
1951 * This is called if we have a message that was partially sent and is
1952 * abandoned. The cum TSN will be the last chunk sent for this message,
1953 * subsequent chunks will be marked ABANDONED. We send a Forward TSN
1954 * chunk in this case with the TSN of the last sent chunk so that the
1955 * peer can clean up its fragment list for this message. This message
1956 * will be removed from the transmit list when the peer sends a SACK
1957 * back.
1958 */
1959int
1960sctp_check_abandoned_msg(sctp_t *sctp, mblk_t *meta)
1961{
1962	sctp_data_hdr_t	*dh;
1963	mblk_t		*nmp;
1964	mblk_t		*head;
1965	int32_t		unsent = 0;
1966	mblk_t		*mp1 = meta->b_cont;
1967	uint32_t	adv_pap = sctp->sctp_adv_pap;
1968	sctp_faddr_t	*fp = sctp->sctp_current;
1969	sctp_stack_t	*sctps = sctp->sctp_sctps;
1970
1971	dh = (sctp_data_hdr_t *)mp1->b_rptr;
1972	if (SEQ_GEQ(sctp->sctp_lastack_rxd, ntohl(dh->sdh_tsn))) {
1973		sctp_ftsn_set_t	*sets = NULL;
1974		uint_t		nsets = 0;
1975		uint32_t	seglen = sizeof (uint32_t);
1976		boolean_t	ubit = SCTP_DATA_GET_UBIT(dh);
1977
1978		while (mp1->b_next != NULL && SCTP_CHUNK_ISSENT(mp1->b_next))
1979			mp1 = mp1->b_next;
1980		dh = (sctp_data_hdr_t *)mp1->b_rptr;
1981		sctp->sctp_adv_pap = ntohl(dh->sdh_tsn);
1982		if (!ubit &&
1983		    !sctp_add_ftsn_set(&sets, fp, meta, &nsets, &seglen)) {
1984			sctp->sctp_adv_pap = adv_pap;
1985			return (ENOMEM);
1986		}
1987		nmp = sctp_make_ftsn_chunk(sctp, fp, sets, nsets, seglen);
1988		sctp_free_ftsn_set(sets);
1989		if (nmp == NULL) {
1990			sctp->sctp_adv_pap = adv_pap;
1991			return (ENOMEM);
1992		}
1993		head = sctp_add_proto_hdr(sctp, fp, nmp, 0, NULL);
1994		if (head == NULL) {
1995			sctp->sctp_adv_pap = adv_pap;
1996			freemsg(nmp);
1997			SCTP_KSTAT(sctps, sctp_send_ftsn_failed);
1998			return (ENOMEM);
1999		}
2000		SCTP_MSG_SET_ABANDONED(meta);
2001		sctp_set_iplen(sctp, head, fp->sf_ixa);
2002		(void) conn_ip_output(head, fp->sf_ixa);
2003		BUMP_LOCAL(sctp->sctp_opkts);
2004		if (!fp->sf_timer_running)
2005			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2006		mp1 = mp1->b_next;
2007		while (mp1 != NULL) {
2008			ASSERT(!SCTP_CHUNK_ISSENT(mp1));
2009			ASSERT(!SCTP_CHUNK_ABANDONED(mp1));
2010			SCTP_ABANDON_CHUNK(mp1);
2011			dh = (sctp_data_hdr_t *)mp1->b_rptr;
2012			unsent += ntohs(dh->sdh_len) - sizeof (*dh);
2013			mp1 = mp1->b_next;
2014		}
2015		ASSERT(sctp->sctp_unsent >= unsent);
2016		sctp->sctp_unsent -= unsent;
2017		/*
2018		 * Update ULP the amount of queued data, which is
2019		 * sent-unack'ed + unsent.
2020		 */
2021		if (!SCTP_IS_DETACHED(sctp))
2022			SCTP_TXQ_UPDATE(sctp);
2023		return (0);
2024	}
2025	return (-1);
2026}
2027
2028uint32_t
2029sctp_cumack(sctp_t *sctp, uint32_t tsn, mblk_t **first_unacked)
2030{
2031	mblk_t *ump, *nump, *mp = NULL;
2032	uint16_t chunklen;
2033	uint32_t xtsn;
2034	sctp_faddr_t *fp;
2035	sctp_data_hdr_t *sdc;
2036	uint32_t cumack_forward = 0;
2037	sctp_msg_hdr_t	*mhdr;
2038	sctp_stack_t	*sctps = sctp->sctp_sctps;
2039
2040	ump = sctp->sctp_xmit_head;
2041
2042	/*
2043	 * Free messages only when they're completely acked.
2044	 */
2045	while (ump != NULL) {
2046		mhdr = (sctp_msg_hdr_t *)ump->b_rptr;
2047		for (mp = ump->b_cont; mp != NULL; mp = mp->b_next) {
2048			if (SCTP_CHUNK_ABANDONED(mp)) {
2049				ASSERT(SCTP_IS_MSG_ABANDONED(ump));
2050				mp = NULL;
2051				break;
2052			}
2053			/*
2054			 * We check for abandoned message if we are PR-SCTP
2055			 * aware, if this is not the first chunk in the
2056			 * message (b_cont) and if the message is marked
2057			 * abandoned.
2058			 */
2059			if (!SCTP_CHUNK_ISSENT(mp)) {
2060				if (sctp->sctp_prsctp_aware &&
2061				    mp != ump->b_cont &&
2062				    (SCTP_IS_MSG_ABANDONED(ump) ||
2063				    SCTP_MSG_TO_BE_ABANDONED(ump, mhdr,
2064				    sctp))) {
2065					(void) sctp_check_abandoned_msg(sctp,
2066					    ump);
2067				}
2068				goto cum_ack_done;
2069			}
2070			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2071			xtsn = ntohl(sdc->sdh_tsn);
2072			if (SEQ_GEQ(sctp->sctp_lastack_rxd, xtsn))
2073				continue;
2074			if (SEQ_GEQ(tsn, xtsn)) {
2075				fp = SCTP_CHUNK_DEST(mp);
2076				chunklen = ntohs(sdc->sdh_len);
2077
2078				if (sctp->sctp_out_time != 0 &&
2079				    xtsn == sctp->sctp_rtt_tsn) {
2080					/* Got a new RTT measurement */
2081					sctp_update_rtt(sctp, fp,
2082					    ddi_get_lbolt64() -
2083					    sctp->sctp_out_time);
2084					sctp->sctp_out_time = 0;
2085				}
2086				if (SCTP_CHUNK_ISACKED(mp))
2087					continue;
2088				SCTP_CHUNK_SET_SACKCNT(mp, 0);
2089				SCTP_CHUNK_ACKED(mp);
2090				ASSERT(fp->sf_suna >= chunklen);
2091				fp->sf_suna -= chunklen;
2092				fp->sf_acked += chunklen;
2093				cumack_forward += chunklen;
2094				ASSERT(sctp->sctp_unacked >=
2095				    (chunklen - sizeof (*sdc)));
2096				sctp->sctp_unacked -=
2097				    (chunklen - sizeof (*sdc));
2098				if (fp->sf_suna == 0) {
2099					/* all outstanding data acked */
2100					fp->sf_pba = 0;
2101					SCTP_FADDR_TIMER_STOP(fp);
2102				} else {
2103					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2104					    fp->sf_rto);
2105				}
2106			} else {
2107				goto cum_ack_done;
2108			}
2109		}
2110		nump = ump->b_next;
2111		if (nump != NULL)
2112			nump->b_prev = NULL;
2113		if (ump == sctp->sctp_xmit_tail)
2114			sctp->sctp_xmit_tail = nump;
2115		if (SCTP_IS_MSG_ABANDONED(ump)) {
2116			BUMP_LOCAL(sctp->sctp_prsctpdrop);
2117			ump->b_next = NULL;
2118			sctp_sendfail_event(sctp, ump, 0, B_TRUE);
2119		} else {
2120			sctp_free_msg(ump);
2121		}
2122		sctp->sctp_xmit_head = ump = nump;
2123	}
2124cum_ack_done:
2125	*first_unacked = mp;
2126	if (cumack_forward > 0) {
2127		SCTPS_BUMP_MIB(sctps, sctpInAck);
2128		if (SEQ_GT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn)) {
2129			sctp->sctp_recovery_tsn = sctp->sctp_lastack_rxd;
2130		}
2131
2132		/*
2133		 * Update ULP the amount of queued data, which is
2134		 * sent-unack'ed + unsent.
2135		 */
2136		if (!SCTP_IS_DETACHED(sctp))
2137			SCTP_TXQ_UPDATE(sctp);
2138
2139		/* Time to send a shutdown? */
2140		if (sctp->sctp_state == SCTPS_SHUTDOWN_PENDING) {
2141			sctp_send_shutdown(sctp, 0);
2142		}
2143		sctp->sctp_xmit_unacked = mp;
2144	} else {
2145		/* dup ack */
2146		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
2147	}
2148	sctp->sctp_lastack_rxd = tsn;
2149	if (SEQ_LT(sctp->sctp_adv_pap, sctp->sctp_lastack_rxd))
2150		sctp->sctp_adv_pap = sctp->sctp_lastack_rxd;
2151	ASSERT(sctp->sctp_xmit_head || sctp->sctp_unacked == 0);
2152
2153	return (cumack_forward);
2154}
2155
2156static int
2157sctp_set_frwnd(sctp_t *sctp, uint32_t frwnd)
2158{
2159	uint32_t orwnd;
2160
2161	if (sctp->sctp_unacked > frwnd) {
2162		sctp->sctp_frwnd = 0;
2163		return (0);
2164	}
2165	orwnd = sctp->sctp_frwnd;
2166	sctp->sctp_frwnd = frwnd - sctp->sctp_unacked;
2167	if (orwnd < sctp->sctp_frwnd) {
2168		return (1);
2169	} else {
2170		return (0);
2171	}
2172}
2173
2174/*
2175 * For un-ordered messages.
2176 * Walk the sctp->sctp_uo_frag list and remove any fragments with TSN
2177 * less than/equal to ftsn. Fragments for un-ordered messages are
2178 * strictly in sequence (w.r.t TSN).
2179 */
2180static int
2181sctp_ftsn_check_uo_frag(sctp_t *sctp, uint32_t ftsn)
2182{
2183	mblk_t		*hmp;
2184	mblk_t		*hmp_next;
2185	sctp_data_hdr_t	*dc;
2186	int		dlen = 0;
2187
2188	hmp = sctp->sctp_uo_frags;
2189	while (hmp != NULL) {
2190		hmp_next = hmp->b_next;
2191		dc = (sctp_data_hdr_t *)hmp->b_rptr;
2192		if (SEQ_GT(ntohl(dc->sdh_tsn), ftsn))
2193			return (dlen);
2194		sctp->sctp_uo_frags = hmp_next;
2195		if (hmp_next != NULL)
2196			hmp_next->b_prev = NULL;
2197		hmp->b_next = NULL;
2198		dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2199		freeb(hmp);
2200		hmp = hmp_next;
2201	}
2202	return (dlen);
2203}
2204
2205/*
2206 * For ordered messages.
2207 * Check for existing fragments for an sid-ssn pair reported as abandoned,
2208 * hence will not receive, in the Forward TSN. If there are fragments, then
2209 * we just nuke them. If and when Partial Delivery API is supported, we
2210 * would need to send a notification to the upper layer about this.
2211 */
2212static int
2213sctp_ftsn_check_frag(sctp_t *sctp, uint16_t ssn, sctp_instr_t *sip)
2214{
2215	sctp_reass_t	*srp;
2216	mblk_t		*hmp;
2217	mblk_t		*dmp;
2218	mblk_t		*hmp_next;
2219	sctp_data_hdr_t	*dc;
2220	int		dlen = 0;
2221
2222	hmp = sip->istr_reass;
2223	while (hmp != NULL) {
2224		hmp_next = hmp->b_next;
2225		srp = (sctp_reass_t *)DB_BASE(hmp);
2226		if (SSN_GT(srp->sr_ssn, ssn))
2227			return (dlen);
2228		/*
2229		 * If we had sent part of this message up, send a partial
2230		 * delivery event. Since this is ordered delivery, we should
2231		 * have sent partial message only for the next in sequence,
2232		 * hence the ASSERT. See comments in sctp_data_chunk() for
2233		 * trypartial.
2234		 */
2235		if (srp->sr_partial_delivered) {
2236			if (srp->sr_ssn != sip->nextseq)
2237				cmn_err(CE_WARN, "sctp partial"
2238				    " delivery notify, sctp 0x%p"
2239				    " sip = 0x%p ssn != nextseq"
2240				    " ssn 0x%x nextseq 0x%x",
2241				    (void *)sctp, (void *)sip,
2242				    srp->sr_ssn, sip->nextseq);
2243			ASSERT(sip->nextseq == srp->sr_ssn);
2244			sctp_partial_delivery_event(sctp);
2245		}
2246		/* Take it out of the reass queue */
2247		sip->istr_reass = hmp_next;
2248		if (hmp_next != NULL)
2249			hmp_next->b_prev = NULL;
2250		hmp->b_next = NULL;
2251		ASSERT(hmp->b_prev == NULL);
2252		dmp = hmp;
2253		ASSERT(DB_TYPE(hmp) == M_CTL);
2254		dmp = hmp->b_cont;
2255		hmp->b_cont = NULL;
2256		freeb(hmp);
2257		hmp = dmp;
2258		while (dmp != NULL) {
2259			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2260			dlen += ntohs(dc->sdh_len) - sizeof (*dc);
2261			dmp = dmp->b_cont;
2262		}
2263		freemsg(hmp);
2264		hmp = hmp_next;
2265	}
2266	return (dlen);
2267}
2268
2269/*
2270 * Update sctp_ftsn to the cumulative TSN from the Forward TSN chunk. Remove
2271 * any SACK gaps less than the newly updated sctp_ftsn. Walk through the
2272 * sid-ssn pair in the Forward TSN and for each, clean the fragment list
2273 * for this pair, if needed, and check if we can deliver subsequent
2274 * messages, if any, from the instream queue (that were waiting for this
2275 * sid-ssn message to show up). Once we are done try to update the SACK
2276 * info. We could get a duplicate Forward TSN, in which case just send
2277 * a SACK. If any of the sid values in the Forward TSN is invalid,
2278 * send back an "Invalid Stream Identifier" error and continue processing
2279 * the rest.
2280 */
2281static void
2282sctp_process_forward_tsn(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp,
2283    ip_pkt_t *ipp, ip_recv_attr_t *ira)
2284{
2285	uint32_t	*ftsn = (uint32_t *)(ch + 1);
2286	ftsn_entry_t	*ftsn_entry;
2287	sctp_instr_t	*instr;
2288	boolean_t	can_deliver = B_TRUE;
2289	size_t		dlen;
2290	int		flen;
2291	mblk_t		*dmp;
2292	mblk_t		*pmp;
2293	sctp_data_hdr_t	*dc;
2294	ssize_t		remaining;
2295	sctp_stack_t	*sctps = sctp->sctp_sctps;
2296
2297	*ftsn = ntohl(*ftsn);
2298	remaining =  ntohs(ch->sch_len) - sizeof (*ch) - sizeof (*ftsn);
2299
2300	if (SCTP_IS_DETACHED(sctp)) {
2301		SCTPS_BUMP_MIB(sctps, sctpInClosed);
2302		can_deliver = B_FALSE;
2303	}
2304	/*
2305	 * un-ordered messages don't have SID-SSN pair entries, we check
2306	 * for any fragments (for un-ordered message) to be discarded using
2307	 * the cumulative FTSN.
2308	 */
2309	flen = sctp_ftsn_check_uo_frag(sctp, *ftsn);
2310	if (flen > 0) {
2311		ASSERT(sctp->sctp_rxqueued >= flen);
2312		sctp->sctp_rxqueued -= flen;
2313	}
2314	ftsn_entry = (ftsn_entry_t *)(ftsn + 1);
2315	while (remaining >= sizeof (*ftsn_entry)) {
2316		ftsn_entry->ftsn_sid = ntohs(ftsn_entry->ftsn_sid);
2317		ftsn_entry->ftsn_ssn = ntohs(ftsn_entry->ftsn_ssn);
2318		if (ftsn_entry->ftsn_sid >= sctp->sctp_num_istr) {
2319			sctp_bsc_t	inval_parm;
2320
2321			/* Will populate the CAUSE block in the ERROR chunk. */
2322			inval_parm.bsc_sid = htons(ftsn_entry->ftsn_sid);
2323			/* RESERVED, ignored at the receiving end */
2324			inval_parm.bsc_pad = 0;
2325
2326			sctp_add_err(sctp, SCTP_ERR_BAD_SID,
2327			    (void *)&inval_parm, sizeof (sctp_bsc_t), fp);
2328			ftsn_entry++;
2329			remaining -= sizeof (*ftsn_entry);
2330			continue;
2331		}
2332		instr = &sctp->sctp_instr[ftsn_entry->ftsn_sid];
2333		flen = sctp_ftsn_check_frag(sctp, ftsn_entry->ftsn_ssn, instr);
2334		/* Indicates frags were nuked, update rxqueued */
2335		if (flen > 0) {
2336			ASSERT(sctp->sctp_rxqueued >= flen);
2337			sctp->sctp_rxqueued -= flen;
2338		}
2339		/*
2340		 * It is possible to receive an FTSN chunk with SSN smaller
2341		 * than then nextseq if this chunk is a retransmission because
2342		 * of incomplete processing when it was first processed.
2343		 */
2344		if (SSN_GE(ftsn_entry->ftsn_ssn, instr->nextseq))
2345			instr->nextseq = ftsn_entry->ftsn_ssn + 1;
2346		while (instr->istr_nmsgs > 0) {
2347			mblk_t	*next;
2348
2349			dmp = (mblk_t *)instr->istr_msgs;
2350			dc = (sctp_data_hdr_t *)dmp->b_rptr;
2351			if (ntohs(dc->sdh_ssn) != instr->nextseq)
2352				break;
2353
2354			next = dmp->b_next;
2355			dlen = dmp->b_wptr - dmp->b_rptr - sizeof (*dc);
2356			for (pmp = dmp->b_cont; pmp != NULL;
2357			    pmp = pmp->b_cont) {
2358				dlen += MBLKL(pmp);
2359			}
2360			if (can_deliver) {
2361				int error;
2362
2363				dmp->b_rptr = (uchar_t *)(dc + 1);
2364				dmp->b_next = NULL;
2365				ASSERT(dmp->b_prev == NULL);
2366				if (sctp_input_add_ancillary(sctp,
2367				    &dmp, dc, fp, ipp, ira) == 0) {
2368					sctp->sctp_rxqueued -= dlen;
2369					/*
2370					 * Override b_flag for SCTP sockfs
2371					 * internal use
2372					 */
2373
2374					dmp->b_flag = 0;
2375					if (sctp->sctp_flowctrld) {
2376						sctp->sctp_rwnd -= dlen;
2377						if (sctp->sctp_rwnd < 0)
2378							sctp->sctp_rwnd = 0;
2379					}
2380					if (sctp->sctp_ulp_recv(
2381					    sctp->sctp_ulpd, dmp, msgdsize(dmp),
2382					    0, &error, NULL) <= 0) {
2383						sctp->sctp_flowctrld = B_TRUE;
2384					}
2385				} else {
2386					/*
2387					 * We will resume processing when
2388					 * the FTSN chunk is re-xmitted.
2389					 */
2390					dmp->b_rptr = (uchar_t *)dc;
2391					dmp->b_next = next;
2392					dprint(0,
2393					    ("FTSN dequeuing %u failed\n",
2394					    ntohs(dc->sdh_ssn)));
2395					return;
2396				}
2397			} else {
2398				sctp->sctp_rxqueued -= dlen;
2399				ASSERT(dmp->b_prev == NULL);
2400				dmp->b_next = NULL;
2401				freemsg(dmp);
2402			}
2403			instr->istr_nmsgs--;
2404			instr->nextseq++;
2405			sctp->sctp_istr_nmsgs--;
2406			if (next != NULL)
2407				next->b_prev = NULL;
2408			instr->istr_msgs = next;
2409		}
2410		ftsn_entry++;
2411		remaining -= sizeof (*ftsn_entry);
2412	}
2413	/* Duplicate FTSN */
2414	if (*ftsn <= (sctp->sctp_ftsn - 1)) {
2415		sctp->sctp_force_sack = 1;
2416		return;
2417	}
2418	/* Advance cum TSN to that reported in the Forward TSN chunk */
2419	sctp->sctp_ftsn = *ftsn + 1;
2420
2421	/* Remove all the SACK gaps before the new cum TSN */
2422	if (sctp->sctp_sack_info != NULL) {
2423		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2424		    &sctp->sctp_sack_gaps);
2425	}
2426	/*
2427	 * If there are gap reports pending, check if advancing
2428	 * the ftsn here closes a gap. If so, we can advance
2429	 * ftsn to the end of the set.
2430	 * If ftsn has moved forward, maybe we can remove gap reports.
2431	 */
2432	if (sctp->sctp_sack_info != NULL &&
2433	    sctp->sctp_ftsn == sctp->sctp_sack_info->begin) {
2434		sctp->sctp_ftsn = sctp->sctp_sack_info->end + 1;
2435		sctp_ack_rem(&sctp->sctp_sack_info, sctp->sctp_ftsn - 1,
2436		    &sctp->sctp_sack_gaps);
2437	}
2438}
2439
2440/*
2441 * When we have processed a SACK we check to see if we can advance the
2442 * cumulative TSN if there are abandoned chunks immediately following
2443 * the updated cumulative TSN. If there are, we attempt to send a
2444 * Forward TSN chunk.
2445 */
2446static void
2447sctp_check_abandoned_data(sctp_t *sctp, sctp_faddr_t *fp)
2448{
2449	mblk_t		*meta = sctp->sctp_xmit_head;
2450	mblk_t		*mp;
2451	mblk_t		*nmp;
2452	uint32_t	seglen;
2453	uint32_t	adv_pap = sctp->sctp_adv_pap;
2454
2455	/*
2456	 * We only check in the first meta since otherwise we can't
2457	 * advance the cumulative ack point. We just look for chunks
2458	 * marked for retransmission, else we might prematurely
2459	 * send an FTSN for a sent, but unacked, chunk.
2460	 */
2461	for (mp = meta->b_cont; mp != NULL; mp = mp->b_next) {
2462		if (!SCTP_CHUNK_ISSENT(mp))
2463			return;
2464		if (SCTP_CHUNK_WANT_REXMIT(mp))
2465			break;
2466	}
2467	if (mp == NULL)
2468		return;
2469	sctp_check_adv_ack_pt(sctp, meta, mp);
2470	if (SEQ_GT(sctp->sctp_adv_pap, adv_pap)) {
2471		sctp_make_ftsns(sctp, meta, mp, &nmp, fp, &seglen);
2472		if (nmp == NULL) {
2473			sctp->sctp_adv_pap = adv_pap;
2474			if (!fp->sf_timer_running)
2475				SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2476			return;
2477		}
2478		sctp_set_iplen(sctp, nmp, fp->sf_ixa);
2479		(void) conn_ip_output(nmp, fp->sf_ixa);
2480		BUMP_LOCAL(sctp->sctp_opkts);
2481		if (!fp->sf_timer_running)
2482			SCTP_FADDR_TIMER_RESTART(sctp, fp, fp->sf_rto);
2483	}
2484}
2485
2486/*
2487 * The processing here follows the same logic in sctp_got_sack(), the reason
2488 * we do this separately is because, usually, gap blocks are ordered and
2489 * we can process it in sctp_got_sack(). However if they aren't we would
2490 * need to do some additional non-optimal stuff when we start processing the
2491 * unordered gaps. To that effect sctp_got_sack() does the processing in the
2492 * simple case and this does the same in the more involved case.
2493 */
2494static uint32_t
2495sctp_process_uo_gaps(sctp_t *sctp, uint32_t ctsn, sctp_sack_frag_t *ssf,
2496    int num_gaps, mblk_t *umphead, mblk_t *mphead, int *trysend,
2497    boolean_t *fast_recovery, uint32_t fr_xtsn)
2498{
2499	uint32_t		xtsn;
2500	uint32_t		gapstart = 0;
2501	uint32_t		gapend = 0;
2502	int			gapcnt;
2503	uint16_t		chunklen;
2504	sctp_data_hdr_t		*sdc;
2505	int			gstart;
2506	mblk_t			*ump = umphead;
2507	mblk_t			*mp = mphead;
2508	sctp_faddr_t		*fp;
2509	uint32_t		acked = 0;
2510	sctp_stack_t		*sctps = sctp->sctp_sctps;
2511
2512	/*
2513	 * gstart tracks the last (in the order of TSN) gapstart that
2514	 * we process in this SACK gaps walk.
2515	 */
2516	gstart = ctsn;
2517
2518	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2519	xtsn = ntohl(sdc->sdh_tsn);
2520	for (gapcnt = 0; gapcnt < num_gaps; gapcnt++, ssf++) {
2521		if (gapstart != 0) {
2522			/*
2523			 * If we have reached the end of the transmit list or
2524			 * hit an unsent chunk or encountered an unordered gap
2525			 * block start from the ctsn again.
2526			 */
2527			if (ump == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2528			    SEQ_LT(ctsn + ntohs(ssf->ssf_start), xtsn)) {
2529				ump = umphead;
2530				mp = mphead;
2531				sdc = (sctp_data_hdr_t *)mp->b_rptr;
2532				xtsn = ntohl(sdc->sdh_tsn);
2533			}
2534		}
2535
2536		gapstart = ctsn + ntohs(ssf->ssf_start);
2537		gapend = ctsn + ntohs(ssf->ssf_end);
2538
2539		/*
2540		 * Sanity checks:
2541		 *
2542		 * 1. SACK for TSN we have not sent - ABORT
2543		 * 2. Invalid or spurious gaps, ignore all gaps
2544		 */
2545		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2546		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2547			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2548			*trysend = -1;
2549			return (acked);
2550		} else if (SEQ_LT(gapend, gapstart) ||
2551		    SEQ_LEQ(gapstart, ctsn)) {
2552			break;
2553		}
2554		/*
2555		 * The xtsn can be the TSN processed for the last gap
2556		 * (gapend) or it could be the cumulative TSN. We continue
2557		 * with the last xtsn as long as the gaps are ordered, when
2558		 * we hit an unordered gap, we re-start from the cumulative
2559		 * TSN. For the first gap it is always the cumulative TSN.
2560		 */
2561		while (xtsn != gapstart) {
2562			/*
2563			 * We can't reliably check for reneged chunks
2564			 * when walking the unordered list, so we don't.
2565			 * In case the peer reneges then we will end up
2566			 * sending the reneged chunk via timeout.
2567			 */
2568			mp = mp->b_next;
2569			if (mp == NULL) {
2570				ump = ump->b_next;
2571				/*
2572				 * ump can't be NULL because of the sanity
2573				 * check above.
2574				 */
2575				ASSERT(ump != NULL);
2576				mp = ump->b_cont;
2577			}
2578			/*
2579			 * mp can't be unsent because of the sanity check
2580			 * above.
2581			 */
2582			ASSERT(SCTP_CHUNK_ISSENT(mp));
2583			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2584			xtsn = ntohl(sdc->sdh_tsn);
2585		}
2586		/*
2587		 * Now that we have found the chunk with TSN == 'gapstart',
2588		 * let's walk till we hit the chunk with TSN == 'gapend'.
2589		 * All intermediate chunks will be marked ACKED, if they
2590		 * haven't already been.
2591		 */
2592		while (SEQ_LEQ(xtsn, gapend)) {
2593			/*
2594			 * SACKed
2595			 */
2596			SCTP_CHUNK_SET_SACKCNT(mp, 0);
2597			if (!SCTP_CHUNK_ISACKED(mp)) {
2598				SCTP_CHUNK_ACKED(mp);
2599
2600				fp = SCTP_CHUNK_DEST(mp);
2601				chunklen = ntohs(sdc->sdh_len);
2602				ASSERT(fp->sf_suna >= chunklen);
2603				fp->sf_suna -= chunklen;
2604				if (fp->sf_suna == 0) {
2605					/* All outstanding data acked. */
2606					fp->sf_pba = 0;
2607					SCTP_FADDR_TIMER_STOP(fp);
2608				}
2609				fp->sf_acked += chunklen;
2610				acked += chunklen;
2611				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
2612				ASSERT(sctp->sctp_unacked >= 0);
2613			}
2614			/*
2615			 * Move to the next message in the transmit list
2616			 * if we are done with all the chunks from the current
2617			 * message. Note, it is possible to hit the end of the
2618			 * transmit list here, i.e. if we have already completed
2619			 * processing the gap block.
2620			 */
2621			mp = mp->b_next;
2622			if (mp == NULL) {
2623				ump = ump->b_next;
2624				if (ump == NULL) {
2625					ASSERT(xtsn == gapend);
2626					break;
2627				}
2628				mp = ump->b_cont;
2629			}
2630			/*
2631			 * Likewise, we can hit an unsent chunk once we have
2632			 * completed processing the gap block.
2633			 */
2634			if (!SCTP_CHUNK_ISSENT(mp)) {
2635				ASSERT(xtsn == gapend);
2636				break;
2637			}
2638			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2639			xtsn = ntohl(sdc->sdh_tsn);
2640		}
2641		/*
2642		 * We keep track of the last gap we successfully processed
2643		 * so that we can terminate the walk below for incrementing
2644		 * the SACK count.
2645		 */
2646		if (SEQ_LT(gstart, gapstart))
2647			gstart = gapstart;
2648	}
2649	/*
2650	 * Check if have incremented the SACK count for all unacked TSNs in
2651	 * sctp_got_sack(), if so we are done.
2652	 */
2653	if (SEQ_LEQ(gstart, fr_xtsn))
2654		return (acked);
2655
2656	ump = umphead;
2657	mp = mphead;
2658	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2659	xtsn = ntohl(sdc->sdh_tsn);
2660	while (SEQ_LT(xtsn, gstart)) {
2661		/*
2662		 * We have incremented SACK count for TSNs less than fr_tsn
2663		 * in sctp_got_sack(), so don't increment them again here.
2664		 */
2665		if (SEQ_GT(xtsn, fr_xtsn) && !SCTP_CHUNK_ISACKED(mp)) {
2666			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2667			if (SCTP_CHUNK_SACKCNT(mp) ==
2668			    sctps->sctps_fast_rxt_thresh) {
2669				SCTP_CHUNK_REXMIT(sctp, mp);
2670				sctp->sctp_chk_fast_rexmit = B_TRUE;
2671				*trysend = 1;
2672				if (!*fast_recovery) {
2673					/*
2674					 * Entering fast recovery.
2675					 */
2676					fp = SCTP_CHUNK_DEST(mp);
2677					fp->sf_ssthresh = fp->sf_cwnd / 2;
2678					if (fp->sf_ssthresh < 2 * fp->sf_pmss) {
2679						fp->sf_ssthresh =
2680						    2 * fp->sf_pmss;
2681					}
2682					fp->sf_cwnd = fp->sf_ssthresh;
2683					fp->sf_pba = 0;
2684					sctp->sctp_recovery_tsn =
2685					    sctp->sctp_ltsn - 1;
2686					*fast_recovery = B_TRUE;
2687				}
2688			}
2689		}
2690		mp = mp->b_next;
2691		if (mp == NULL) {
2692			ump = ump->b_next;
2693			/* We can't get to the end of the transmit list here */
2694			ASSERT(ump != NULL);
2695			mp = ump->b_cont;
2696		}
2697		/* We can't hit an unsent chunk here */
2698		ASSERT(SCTP_CHUNK_ISSENT(mp));
2699		sdc = (sctp_data_hdr_t *)mp->b_rptr;
2700		xtsn = ntohl(sdc->sdh_tsn);
2701	}
2702	return (acked);
2703}
2704
2705static int
2706sctp_got_sack(sctp_t *sctp, sctp_chunk_hdr_t *sch)
2707{
2708	sctp_sack_chunk_t	*sc;
2709	sctp_data_hdr_t		*sdc;
2710	sctp_sack_frag_t	*ssf;
2711	mblk_t			*ump;
2712	mblk_t			*mp;
2713	mblk_t			*mp1;
2714	uint32_t		cumtsn;
2715	uint32_t		xtsn;
2716	uint32_t		gapstart = 0;
2717	uint32_t		gapend = 0;
2718	uint32_t		acked = 0;
2719	uint16_t		chunklen;
2720	sctp_faddr_t		*fp;
2721	int			num_gaps;
2722	int			trysend = 0;
2723	int			i;
2724	boolean_t		fast_recovery = B_FALSE;
2725	boolean_t		cumack_forward = B_FALSE;
2726	boolean_t		fwd_tsn = B_FALSE;
2727	sctp_stack_t		*sctps = sctp->sctp_sctps;
2728
2729	BUMP_LOCAL(sctp->sctp_ibchunks);
2730	BUMP_LOCAL(sctp->sctp_isacks);
2731	chunklen = ntohs(sch->sch_len);
2732	if (chunklen < (sizeof (*sch) + sizeof (*sc)))
2733		return (0);
2734
2735	sc = (sctp_sack_chunk_t *)(sch + 1);
2736	cumtsn = ntohl(sc->ssc_cumtsn);
2737
2738	dprint(2, ("got sack cumtsn %x -> %x\n", sctp->sctp_lastack_rxd,
2739	    cumtsn));
2740
2741	/* out of order */
2742	if (SEQ_LT(cumtsn, sctp->sctp_lastack_rxd))
2743		return (0);
2744
2745	if (SEQ_GT(cumtsn, sctp->sctp_ltsn - 1)) {
2746		SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2747		/* Send an ABORT */
2748		return (-1);
2749	}
2750
2751	/*
2752	 * Cwnd only done when not in fast recovery mode.
2753	 */
2754	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_recovery_tsn))
2755		fast_recovery = B_TRUE;
2756
2757	/*
2758	 * .. and if the cum TSN is not moving ahead on account Forward TSN
2759	 */
2760	if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_adv_pap))
2761		fwd_tsn = B_TRUE;
2762
2763	if (cumtsn == sctp->sctp_lastack_rxd &&
2764	    (sctp->sctp_xmit_unacked == NULL ||
2765	    !SCTP_CHUNK_ABANDONED(sctp->sctp_xmit_unacked))) {
2766		if (sctp->sctp_xmit_unacked != NULL)
2767			mp = sctp->sctp_xmit_unacked;
2768		else if (sctp->sctp_xmit_head != NULL)
2769			mp = sctp->sctp_xmit_head->b_cont;
2770		else
2771			mp = NULL;
2772		SCTPS_BUMP_MIB(sctps, sctpInDupAck);
2773		/*
2774		 * If we were doing a zero win probe and the win
2775		 * has now opened to at least MSS, re-transmit the
2776		 * zero win probe via sctp_rexmit_packet().
2777		 */
2778		if (mp != NULL && sctp->sctp_zero_win_probe &&
2779		    ntohl(sc->ssc_a_rwnd) >= sctp->sctp_current->sf_pmss) {
2780			mblk_t	*pkt;
2781			uint_t	pkt_len;
2782			mblk_t	*mp1 = mp;
2783			mblk_t	*meta = sctp->sctp_xmit_head;
2784
2785			/*
2786			 * Reset the RTO since we have been backing-off
2787			 * to send the ZWP.
2788			 */
2789			fp = sctp->sctp_current;
2790			fp->sf_rto = fp->sf_srtt + 4 * fp->sf_rttvar;
2791			SCTP_MAX_RTO(sctp, fp);
2792			/* Resend the ZWP */
2793			pkt = sctp_rexmit_packet(sctp, &meta, &mp1, fp,
2794			    &pkt_len);
2795			if (pkt == NULL) {
2796				SCTP_KSTAT(sctps, sctp_ss_rexmit_failed);
2797				return (0);
2798			}
2799			ASSERT(pkt_len <= fp->sf_pmss);
2800			sctp->sctp_zero_win_probe = B_FALSE;
2801			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2802			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2803			sctp_set_iplen(sctp, pkt, fp->sf_ixa);
2804			(void) conn_ip_output(pkt, fp->sf_ixa);
2805			BUMP_LOCAL(sctp->sctp_opkts);
2806		}
2807	} else {
2808		if (sctp->sctp_zero_win_probe) {
2809			/*
2810			 * Reset the RTO since we have been backing-off
2811			 * to send the ZWP.
2812			 */
2813			fp = sctp->sctp_current;
2814			fp->sf_rto = fp->sf_srtt + 4 * fp->sf_rttvar;
2815			SCTP_MAX_RTO(sctp, fp);
2816			sctp->sctp_zero_win_probe = B_FALSE;
2817			/* This is probably not required */
2818			if (!sctp->sctp_rexmitting) {
2819				sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
2820				sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
2821			}
2822		}
2823		acked = sctp_cumack(sctp, cumtsn, &mp);
2824		sctp->sctp_xmit_unacked = mp;
2825		if (acked > 0) {
2826			trysend = 1;
2827			cumack_forward = B_TRUE;
2828			if (fwd_tsn && SEQ_GEQ(sctp->sctp_lastack_rxd,
2829			    sctp->sctp_adv_pap)) {
2830				cumack_forward = B_FALSE;
2831			}
2832		}
2833	}
2834	num_gaps = ntohs(sc->ssc_numfrags);
2835	UPDATE_LOCAL(sctp->sctp_gapcnt, num_gaps);
2836	if (num_gaps == 0 || mp == NULL || !SCTP_CHUNK_ISSENT(mp) ||
2837	    chunklen < (sizeof (*sch) + sizeof (*sc) +
2838	    num_gaps * sizeof (*ssf))) {
2839		goto ret;
2840	}
2841#ifdef	DEBUG
2842	/*
2843	 * Since we delete any message that has been acked completely,
2844	 * the unacked chunk must belong to sctp_xmit_head (as
2845	 * we don't have a back pointer from the mp to the meta data
2846	 * we do this).
2847	 */
2848	{
2849		mblk_t	*mp2 = sctp->sctp_xmit_head->b_cont;
2850
2851		while (mp2 != NULL) {
2852			if (mp2 == mp)
2853				break;
2854			mp2 = mp2->b_next;
2855		}
2856		ASSERT(mp2 != NULL);
2857	}
2858#endif
2859	ump = sctp->sctp_xmit_head;
2860
2861	/*
2862	 * Just remember where we started from, in case we need to call
2863	 * sctp_process_uo_gaps() if the gap blocks are unordered.
2864	 */
2865	mp1 = mp;
2866
2867	sdc = (sctp_data_hdr_t *)mp->b_rptr;
2868	xtsn = ntohl(sdc->sdh_tsn);
2869	ASSERT(xtsn == cumtsn + 1);
2870
2871	/*
2872	 * Go through SACK gaps. They are ordered based on start TSN.
2873	 */
2874	ssf = (sctp_sack_frag_t *)(sc + 1);
2875	for (i = 0; i < num_gaps; i++, ssf++) {
2876		if (gapstart != 0) {
2877			/* check for unordered gap */
2878			if (SEQ_LEQ(cumtsn + ntohs(ssf->ssf_start), gapstart)) {
2879				acked += sctp_process_uo_gaps(sctp,
2880				    cumtsn, ssf, num_gaps - i,
2881				    sctp->sctp_xmit_head, mp1,
2882				    &trysend, &fast_recovery, gapstart);
2883				if (trysend < 0) {
2884					SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2885					return (-1);
2886				}
2887				break;
2888			}
2889		}
2890		gapstart = cumtsn + ntohs(ssf->ssf_start);
2891		gapend = cumtsn + ntohs(ssf->ssf_end);
2892
2893		/*
2894		 * Sanity checks:
2895		 *
2896		 * 1. SACK for TSN we have not sent - ABORT
2897		 * 2. Invalid or spurious gaps, ignore all gaps
2898		 */
2899		if (SEQ_GT(gapstart, sctp->sctp_ltsn - 1) ||
2900		    SEQ_GT(gapend, sctp->sctp_ltsn - 1)) {
2901			SCTPS_BUMP_MIB(sctps, sctpInAckUnsent);
2902			return (-1);
2903		} else if (SEQ_LT(gapend, gapstart) ||
2904		    SEQ_LEQ(gapstart, cumtsn)) {
2905			break;
2906		}
2907		/*
2908		 * Let's start at the current TSN (for the 1st gap we start
2909		 * from the cumulative TSN, for subsequent ones we start from
2910		 * where the previous gapend was found - second while loop
2911		 * below) and walk the transmit list till we find the TSN
2912		 * corresponding to gapstart. All the unacked chunks till we
2913		 * get to the chunk with TSN == gapstart will have their
2914		 * SACKCNT incremented by 1. Note since the gap blocks are
2915		 * ordered, we won't be incrementing the SACKCNT for an
2916		 * unacked chunk by more than one while processing the gap
2917		 * blocks. If the SACKCNT for any unacked chunk exceeds
2918		 * the fast retransmit threshold, we will fast retransmit
2919		 * after processing all the gap blocks.
2920		 */
2921		ASSERT(SEQ_LEQ(xtsn, gapstart));
2922		while (xtsn != gapstart) {
2923			SCTP_CHUNK_SET_SACKCNT(mp, SCTP_CHUNK_SACKCNT(mp) + 1);
2924			if (SCTP_CHUNK_SACKCNT(mp) ==
2925			    sctps->sctps_fast_rxt_thresh) {
2926				SCTP_CHUNK_REXMIT(sctp, mp);
2927				sctp->sctp_chk_fast_rexmit = B_TRUE;
2928				trysend = 1;
2929				if (!fast_recovery) {
2930					/*
2931					 * Entering fast recovery.
2932					 */
2933					fp = SCTP_CHUNK_DEST(mp);
2934					fp->sf_ssthresh = fp->sf_cwnd / 2;
2935					if (fp->sf_ssthresh < 2 * fp->sf_pmss) {
2936						fp->sf_ssthresh =
2937						    2 * fp->sf_pmss;
2938					}
2939					fp->sf_cwnd = fp->sf_ssthresh;
2940					fp->sf_pba = 0;
2941					sctp->sctp_recovery_tsn =
2942					    sctp->sctp_ltsn - 1;
2943					fast_recovery = B_TRUE;
2944				}
2945			}
2946
2947			/*
2948			 * Peer may have reneged on this chunk, so un-sack
2949			 * it now. If the peer did renege, we need to
2950			 * readjust unacked.
2951			 */
2952			if (SCTP_CHUNK_ISACKED(mp)) {
2953				chunklen = ntohs(sdc->sdh_len);
2954				fp = SCTP_CHUNK_DEST(mp);
2955				fp->sf_suna += chunklen;
2956				sctp->sctp_unacked += chunklen - sizeof (*sdc);
2957				SCTP_CHUNK_CLEAR_ACKED(sctp, mp);
2958				if (!fp->sf_timer_running) {
2959					SCTP_FADDR_TIMER_RESTART(sctp, fp,
2960					    fp->sf_rto);
2961				}
2962			}
2963
2964			mp = mp->b_next;
2965			if (mp == NULL) {
2966				ump = ump->b_next;
2967				/*
2968				 * ump can't be NULL given the sanity check
2969				 * above.  But if it is NULL, it means that
2970				 * there is a data corruption.  We'd better
2971				 * panic.
2972				 */
2973				if (ump == NULL) {
2974					panic("Memory corruption detected: gap "
2975					    "start TSN 0x%x missing from the "
2976					    "xmit list: %p", gapstart,
2977					    (void *)sctp);
2978				}
2979				mp = ump->b_cont;
2980			}
2981			/*
2982			 * mp can't be unsent given the sanity check above.
2983			 */
2984			ASSERT(SCTP_CHUNK_ISSENT(mp));
2985			sdc = (sctp_data_hdr_t *)mp->b_rptr;
2986			xtsn = ntohl(sdc->sdh_tsn);
2987		}
2988		/*
2989		 * Now that we have found the chunk with TSN == 'gapstart',
2990		 * let's walk till we hit the chunk with TSN == 'gapend'.
2991		 * All intermediate chunks will be marked ACKED, if they
2992		 * haven't already been.
2993		 */
2994		while (SEQ_LEQ(xtsn, gapend)) {
2995			/*
2996			 * SACKed
2997			 */
2998			SCTP_CHUNK_SET_SACKCNT(mp, 0);
2999			if (!SCTP_CHUNK_ISACKED(mp)) {
3000				SCTP_CHUNK_ACKED(mp);
3001
3002				fp = SCTP_CHUNK_DEST(mp);
3003				chunklen = ntohs(sdc->sdh_len);
3004				ASSERT(fp->sf_suna >= chunklen);
3005				fp->sf_suna -= chunklen;
3006				if (fp->sf_suna == 0) {
3007					/* All outstanding data acked. */
3008					fp->sf_pba = 0;
3009					SCTP_FADDR_TIMER_STOP(fp);
3010				}
3011				fp->sf_acked += chunklen;
3012				acked += chunklen;
3013				sctp->sctp_unacked -= chunklen - sizeof (*sdc);
3014				ASSERT(sctp->sctp_unacked >= 0);
3015			}
3016			/* Go to the next chunk of the current message */
3017			mp = mp->b_next;
3018			/*
3019			 * Move to the next message in the transmit list
3020			 * if we are done with all the chunks from the current
3021			 * message. Note, it is possible to hit the end of the
3022			 * transmit list here, i.e. if we have already completed
3023			 * processing the gap block.  But the TSN must be equal
3024			 * to the gapend because of the above sanity check.
3025			 * If it is not equal, it means that some data is
3026			 * missing.
3027			 * Also, note that we break here, which means we
3028			 * continue processing gap blocks, if any. In case of
3029			 * ordered gap blocks there can't be any following
3030			 * this (if there is it will fail the sanity check
3031			 * above). In case of un-ordered gap blocks we will
3032			 * switch to sctp_process_uo_gaps().  In either case
3033			 * it should be fine to continue with NULL ump/mp,
3034			 * but we just reset it to xmit_head.
3035			 */
3036			if (mp == NULL) {
3037				ump = ump->b_next;
3038				if (ump == NULL) {
3039					if (xtsn != gapend) {
3040						panic("Memory corruption "
3041						    "detected: gap end TSN "
3042						    "0x%x missing from the "
3043						    "xmit list: %p", gapend,
3044						    (void *)sctp);
3045					}
3046					ump = sctp->sctp_xmit_head;
3047					mp = mp1;
3048					sdc = (sctp_data_hdr_t *)mp->b_rptr;
3049					xtsn = ntohl(sdc->sdh_tsn);
3050					break;
3051				}
3052				mp = ump->b_cont;
3053			}
3054			/*
3055			 * Likewise, we could hit an unsent chunk once we have
3056			 * completed processing the gap block. Again, it is
3057			 * fine to continue processing gap blocks with mp
3058			 * pointing to the unsent chunk, because if there
3059			 * are more ordered gap blocks, they will fail the
3060			 * sanity check, and if there are un-ordered gap blocks,
3061			 * we will continue processing in sctp_process_uo_gaps()
3062			 * We just reset the mp to the one we started with.
3063			 */
3064			if (!SCTP_CHUNK_ISSENT(mp)) {
3065				ASSERT(xtsn == gapend);
3066				ump = sctp->sctp_xmit_head;
3067				mp = mp1;
3068				sdc = (sctp_data_hdr_t *)mp->b_rptr;
3069				xtsn = ntohl(sdc->sdh_tsn);
3070				break;
3071			}
3072			sdc = (sctp_data_hdr_t *)mp->b_rptr;
3073			xtsn = ntohl(sdc->sdh_tsn);
3074		}
3075	}
3076	if (sctp->sctp_prsctp_aware)
3077		sctp_check_abandoned_data(sctp, sctp->sctp_current);
3078	if (sctp->sctp_chk_fast_rexmit)
3079		sctp_fast_rexmit(sctp);
3080ret:
3081	trysend += sctp_set_frwnd(sctp, ntohl(sc->ssc_a_rwnd));
3082
3083	/*
3084	 * If receive window is closed while there is unsent data,
3085	 * set a timer for doing zero window probes.
3086	 */
3087	if (sctp->sctp_frwnd == 0 && sctp->sctp_unacked == 0 &&
3088	    sctp->sctp_unsent != 0) {
3089		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
3090		    sctp->sctp_current->sf_rto);
3091	}
3092
3093	/*
3094	 * Set cwnd for all destinations.
3095	 * Congestion window gets increased only when cumulative
3096	 * TSN moves forward, we're not in fast recovery, and
3097	 * cwnd has been fully utilized (almost fully, need to allow
3098	 * some leeway due to non-MSS sized messages).
3099	 */
3100	if (sctp->sctp_current->sf_acked == acked) {
3101		/*
3102		 * Fast-path, only data sent to sctp_current got acked.
3103		 */
3104		fp = sctp->sctp_current;
3105		if (cumack_forward && !fast_recovery &&
3106		    (fp->sf_acked + fp->sf_suna > fp->sf_cwnd - fp->sf_pmss)) {
3107			if (fp->sf_cwnd < fp->sf_ssthresh) {
3108				/*
3109				 * Slow start
3110				 */
3111				if (fp->sf_acked > fp->sf_pmss) {
3112					fp->sf_cwnd += fp->sf_pmss;
3113				} else {
3114					fp->sf_cwnd += fp->sf_acked;
3115				}
3116				fp->sf_cwnd = MIN(fp->sf_cwnd,
3117				    sctp->sctp_cwnd_max);
3118			} else {
3119				/*
3120				 * Congestion avoidance
3121				 */
3122				fp->sf_pba += fp->sf_acked;
3123				if (fp->sf_pba >= fp->sf_cwnd) {
3124					fp->sf_pba -= fp->sf_cwnd;
3125					fp->sf_cwnd += fp->sf_pmss;
3126					fp->sf_cwnd = MIN(fp->sf_cwnd,
3127					    sctp->sctp_cwnd_max);
3128				}
3129			}
3130		}
3131		/*
3132		 * Limit the burst of transmitted data segments.
3133		 */
3134		if (fp->sf_suna + sctps->sctps_maxburst * fp->sf_pmss <
3135		    fp->sf_cwnd) {
3136			fp->sf_cwnd = fp->sf_suna + sctps->sctps_maxburst *
3137			    fp->sf_pmss;
3138		}
3139		fp->sf_acked = 0;
3140		goto check_ss_rxmit;
3141	}
3142	for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
3143		if (cumack_forward && fp->sf_acked && !fast_recovery &&
3144		    (fp->sf_acked + fp->sf_suna > fp->sf_cwnd - fp->sf_pmss)) {
3145			if (fp->sf_cwnd < fp->sf_ssthresh) {
3146				if (fp->sf_acked > fp->sf_pmss) {
3147					fp->sf_cwnd += fp->sf_pmss;
3148				} else {
3149					fp->sf_cwnd += fp->sf_acked;
3150				}
3151				fp->sf_cwnd = MIN(fp->sf_cwnd,
3152				    sctp->sctp_cwnd_max);
3153			} else {
3154				fp->sf_pba += fp->sf_acked;
3155				if (fp->sf_pba >= fp->sf_cwnd) {
3156					fp->sf_pba -= fp->sf_cwnd;
3157					fp->sf_cwnd += fp->sf_pmss;
3158					fp->sf_cwnd = MIN(fp->sf_cwnd,
3159					    sctp->sctp_cwnd_max);
3160				}
3161			}
3162		}
3163		if (fp->sf_suna + sctps->sctps_maxburst * fp->sf_pmss <
3164		    fp->sf_cwnd) {
3165			fp->sf_cwnd = fp->sf_suna + sctps->sctps_maxburst *
3166			    fp->sf_pmss;
3167		}
3168		fp->sf_acked = 0;
3169	}
3170	fp = sctp->sctp_current;
3171check_ss_rxmit:
3172	/*
3173	 * If this is a SACK following a timeout, check if there are
3174	 * still unacked chunks (sent before the timeout) that we can
3175	 * send.
3176	 */
3177	if (sctp->sctp_rexmitting) {
3178		if (SEQ_LT(sctp->sctp_lastack_rxd, sctp->sctp_rxt_maxtsn)) {
3179			/*
3180			 * As we are in retransmission phase, we may get a
3181			 * SACK which indicates some new chunks are received
3182			 * but cum_tsn does not advance.  During this
3183			 * phase, the other side advances cum_tsn only because
3184			 * it receives our retransmitted chunks.  Only
3185			 * this signals that some chunks are still
3186			 * missing.
3187			 */
3188			if (cumack_forward) {
3189				fp->sf_rxt_unacked -= acked;
3190				sctp_ss_rexmit(sctp);
3191			}
3192		} else {
3193			sctp->sctp_rexmitting = B_FALSE;
3194			sctp->sctp_rxt_nxttsn = sctp->sctp_ltsn;
3195			sctp->sctp_rxt_maxtsn = sctp->sctp_ltsn;
3196			fp->sf_rxt_unacked = 0;
3197		}
3198	}
3199	return (trysend);
3200}
3201
3202/*
3203 * Returns 0 if the caller should stop processing any more chunks,
3204 * 1 if the caller should skip this chunk and continue processing.
3205 */
3206static int
3207sctp_strange_chunk(sctp_t *sctp, sctp_chunk_hdr_t *ch, sctp_faddr_t *fp)
3208{
3209	size_t len;
3210
3211	BUMP_LOCAL(sctp->sctp_ibchunks);
3212	/* check top two bits for action required */
3213	if (ch->sch_id & 0x40) {	/* also matches 0xc0 */
3214		len = ntohs(ch->sch_len);
3215		sctp_add_err(sctp, SCTP_ERR_UNREC_CHUNK, ch, len, fp);
3216
3217		if ((ch->sch_id & 0xc0) == 0xc0) {
3218			/* skip and continue */
3219			return (1);
3220		} else {
3221			/* stop processing */
3222			return (0);
3223		}
3224	}
3225	if (ch->sch_id & 0x80) {
3226		/* skip and continue, no error */
3227		return (1);
3228	}
3229	/* top two bits are clear; stop processing and no error */
3230	return (0);
3231}
3232
3233/*
3234 * Basic sanity checks on all input chunks and parameters: they must
3235 * be of legitimate size for their purported type, and must follow
3236 * ordering conventions as defined in rfc2960.
3237 *
3238 * Returns 1 if the chunk and all encloded params are legitimate,
3239 * 0 otherwise.
3240 */
3241/*ARGSUSED*/
3242static int
3243sctp_check_input(sctp_t *sctp, sctp_chunk_hdr_t *ch, ssize_t len, int first)
3244{
3245	sctp_parm_hdr_t	*ph;
3246	void		*p = NULL;
3247	ssize_t		clen;
3248	uint16_t	ch_len;
3249
3250	ch_len = ntohs(ch->sch_len);
3251	if (ch_len > len) {
3252		return (0);
3253	}
3254
3255	switch (ch->sch_id) {
3256	case CHUNK_DATA:
3257		if (ch_len < sizeof (sctp_data_hdr_t)) {
3258			return (0);
3259		}
3260		return (1);
3261	case CHUNK_INIT:
3262	case CHUNK_INIT_ACK:
3263		{
3264			ssize_t	remlen = len;
3265
3266			/*
3267			 * INIT and INIT-ACK chunks must not be bundled with
3268			 * any other.
3269			 */
3270			if (!first || sctp_next_chunk(ch, &remlen) != NULL ||
3271			    (ch_len < (sizeof (*ch) +
3272			    sizeof (sctp_init_chunk_t)))) {
3273				return (0);
3274			}
3275			/* may have params that need checking */
3276			p = (char *)(ch + 1) + sizeof (sctp_init_chunk_t);
3277			clen = ch_len - (sizeof (*ch) +
3278			    sizeof (sctp_init_chunk_t));
3279		}
3280		break;
3281	case CHUNK_SACK:
3282		if (ch_len < (sizeof (*ch) + sizeof (sctp_sack_chunk_t))) {
3283			return (0);
3284		}
3285		/* dup and gap reports checked by got_sack() */
3286		return (1);
3287	case CHUNK_SHUTDOWN:
3288		if (ch_len < (sizeof (*ch) + sizeof (uint32_t))) {
3289			return (0);
3290		}
3291		return (1);
3292	case CHUNK_ABORT:
3293	case CHUNK_ERROR:
3294		if (ch_len < sizeof (*ch)) {
3295			return (0);
3296		}
3297		/* may have params that need checking */
3298		p = ch + 1;
3299		clen = ch_len - sizeof (*ch);
3300		break;
3301	case CHUNK_ECNE:
3302	case CHUNK_CWR:
3303	case CHUNK_HEARTBEAT:
3304	case CHUNK_HEARTBEAT_ACK:
3305	/* Full ASCONF chunk and parameter checks are in asconf.c */
3306	case CHUNK_ASCONF:
3307	case CHUNK_ASCONF_ACK:
3308		if (ch_len < sizeof (*ch)) {
3309			return (0);
3310		}
3311		/* heartbeat data checked by process_heartbeat() */
3312		return (1);
3313	case CHUNK_SHUTDOWN_COMPLETE:
3314		{
3315			ssize_t remlen = len;
3316
3317			/*
3318			 * SHUTDOWN-COMPLETE chunk must not be bundled with any
3319			 * other
3320			 */
3321			if (!first || sctp_next_chunk(ch, &remlen) != NULL ||
3322			    ch_len < sizeof (*ch)) {
3323				return (0);
3324			}
3325		}
3326		return (1);
3327	case CHUNK_COOKIE:
3328	case CHUNK_COOKIE_ACK:
3329	case CHUNK_SHUTDOWN_ACK:
3330		if (ch_len < sizeof (*ch) || !first) {
3331			return (0);
3332		}
3333		return (1);
3334	case CHUNK_FORWARD_TSN:
3335		if (ch_len < (sizeof (*ch) + sizeof (uint32_t)))
3336			return (0);
3337		return (1);
3338	default:
3339		return (1);	/* handled by strange_chunk() */
3340	}
3341
3342	/* check and byteorder parameters */
3343	if (clen <= 0) {
3344		return (1);
3345	}
3346	ASSERT(p != NULL);
3347
3348	ph = p;
3349	while (ph != NULL && clen > 0) {
3350		ch_len = ntohs(ph->sph_len);
3351		if (ch_len > len || ch_len < sizeof (*ph)) {
3352			return (0);
3353		}
3354		ph = sctp_next_parm(ph, &clen);
3355	}
3356
3357	/* All OK */
3358	return (1);
3359}
3360
3361static mblk_t *
3362sctp_check_in_policy(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
3363{
3364	boolean_t policy_present;
3365	ipha_t *ipha;
3366	ip6_t *ip6h;
3367	netstack_t	*ns = ipst->ips_netstack;
3368	ipsec_stack_t	*ipss = ns->netstack_ipsec;
3369
3370	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
3371		policy_present = ipss->ipsec_inbound_v4_policy_present;
3372		ipha = (ipha_t *)mp->b_rptr;
3373		ip6h = NULL;
3374	} else {
3375		policy_present = ipss->ipsec_inbound_v6_policy_present;
3376		ipha = NULL;
3377		ip6h = (ip6_t *)mp->b_rptr;
3378	}
3379
3380	if (policy_present) {
3381		/*
3382		 * The conn_t parameter is NULL because we already know
3383		 * nobody's home.
3384		 */
3385		mp = ipsec_check_global_policy(mp, (conn_t *)NULL,
3386		    ipha, ip6h, ira, ns);
3387		if (mp == NULL)
3388			return (NULL);
3389	}
3390	return (mp);
3391}
3392
3393/* Handle out-of-the-blue packets */
3394void
3395sctp_ootb_input(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
3396{
3397	sctp_t			*sctp;
3398	sctp_chunk_hdr_t	*ch;
3399	sctp_hdr_t		*sctph;
3400	in6_addr_t		src, dst;
3401	uint_t			ip_hdr_len = ira->ira_ip_hdr_length;
3402	ssize_t			mlen;
3403	sctp_stack_t		*sctps;
3404	boolean_t		secure;
3405	zoneid_t		zoneid = ira->ira_zoneid;
3406	uchar_t			*rptr;
3407
3408	ASSERT(ira->ira_ill == NULL);
3409
3410	secure = ira->ira_flags & IRAF_IPSEC_SECURE;
3411
3412	sctps = ipst->ips_netstack->netstack_sctp;
3413
3414	SCTPS_BUMP_MIB(sctps, sctpOutOfBlue);
3415	SCTPS_BUMP_MIB(sctps, sctpInSCTPPkts);
3416
3417	if (mp->b_cont != NULL) {
3418		/*
3419		 * All subsequent code is vastly simplified if it can
3420		 * assume a single contiguous chunk of data.
3421		 */
3422		if (pullupmsg(mp, -1) == 0) {
3423			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3424			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3425			freemsg(mp);
3426			return;
3427		}
3428	}
3429
3430	rptr = mp->b_rptr;
3431	sctph = ((sctp_hdr_t *)&rptr[ip_hdr_len]);
3432	if (ira->ira_flags & IRAF_IS_IPV4) {
3433		ipha_t *ipha;
3434
3435		ipha = (ipha_t *)rptr;
3436		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &src);
3437		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst);
3438	} else {
3439		ip6_t *ip6h;
3440
3441		ip6h = (ip6_t *)rptr;
3442		src = ip6h->ip6_src;
3443		dst = ip6h->ip6_dst;
3444	}
3445
3446	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
3447	if ((ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
3448		dprint(3, ("sctp_ootb_input: invalid packet\n"));
3449		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3450		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3451		freemsg(mp);
3452		return;
3453	}
3454
3455	switch (ch->sch_id) {
3456	case CHUNK_INIT:
3457		/* no listener; send abort  */
3458		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3459			return;
3460		sctp_ootb_send_abort(sctp_init2vtag(ch), 0,
3461		    NULL, 0, mp, 0, B_TRUE, ira, ipst);
3462		break;
3463	case CHUNK_INIT_ACK:
3464		/* check for changed src addr */
3465		sctp = sctp_addrlist2sctp(mp, sctph, ch, zoneid, sctps);
3466		if (sctp != NULL) {
3467			/* success; proceed to normal path */
3468			mutex_enter(&sctp->sctp_lock);
3469			if (sctp->sctp_running) {
3470				sctp_add_recvq(sctp, mp, B_FALSE, ira);
3471				mutex_exit(&sctp->sctp_lock);
3472			} else {
3473				/*
3474				 * If the source address is changed, we
3475				 * don't need to worry too much about
3476				 * out of order processing.  So we don't
3477				 * check if the recvq is empty or not here.
3478				 */
3479				sctp->sctp_running = B_TRUE;
3480				mutex_exit(&sctp->sctp_lock);
3481				sctp_input_data(sctp, mp, ira);
3482				WAKE_SCTP(sctp);
3483			}
3484			SCTP_REFRELE(sctp);
3485			return;
3486		}
3487		/* else bogus init ack; drop it */
3488		break;
3489	case CHUNK_SHUTDOWN_ACK:
3490		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3491			return;
3492		sctp_ootb_shutdown_ack(mp, ip_hdr_len, ira, ipst);
3493		return;
3494	case CHUNK_ERROR:
3495	case CHUNK_ABORT:
3496	case CHUNK_COOKIE_ACK:
3497	case CHUNK_SHUTDOWN_COMPLETE:
3498		break;
3499	default:
3500		if (secure && sctp_check_in_policy(mp, ira, ipst) == NULL)
3501			return;
3502		sctp_ootb_send_abort(sctph->sh_verf, 0,
3503		    NULL, 0, mp, 0, B_TRUE, ira, ipst);
3504		break;
3505	}
3506	freemsg(mp);
3507}
3508
3509/*
3510 * Handle sctp packets.
3511 * Note that we rele the sctp_t (the caller got a reference on it).
3512 */
3513void
3514sctp_input(conn_t *connp, ipha_t *ipha, ip6_t *ip6h, mblk_t *mp,
3515    ip_recv_attr_t *ira)
3516{
3517	sctp_t		*sctp = CONN2SCTP(connp);
3518	boolean_t	secure;
3519	ill_t		*ill = ira->ira_ill;
3520	ip_stack_t	*ipst = ill->ill_ipst;
3521	ipsec_stack_t	*ipss = ipst->ips_netstack->netstack_ipsec;
3522	iaflags_t	iraflags = ira->ira_flags;
3523	ill_t		*rill = ira->ira_rill;
3524
3525	secure = iraflags & IRAF_IPSEC_SECURE;
3526
3527	/*
3528	 * We check some fields in conn_t without holding a lock.
3529	 * This should be fine.
3530	 */
3531	if (((iraflags & IRAF_IS_IPV4) ?
3532	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
3533	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
3534	    secure) {
3535		mp = ipsec_check_inbound_policy(mp, connp, ipha,
3536		    ip6h, ira);
3537		if (mp == NULL) {
3538			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
3539			/* Note that mp is NULL */
3540			ip_drop_input("ipIfStatsInDiscards", mp, ill);
3541			SCTP_REFRELE(sctp);
3542			return;
3543		}
3544	}
3545
3546	ira->ira_ill = ira->ira_rill = NULL;
3547
3548	mutex_enter(&sctp->sctp_lock);
3549	if (sctp->sctp_running) {
3550		sctp_add_recvq(sctp, mp, B_FALSE, ira);
3551		mutex_exit(&sctp->sctp_lock);
3552		goto done;
3553	} else {
3554		sctp->sctp_running = B_TRUE;
3555		mutex_exit(&sctp->sctp_lock);
3556
3557		mutex_enter(&sctp->sctp_recvq_lock);
3558		if (sctp->sctp_recvq != NULL) {
3559			sctp_add_recvq(sctp, mp, B_TRUE, ira);
3560			mutex_exit(&sctp->sctp_recvq_lock);
3561			WAKE_SCTP(sctp);
3562			goto done;
3563		}
3564	}
3565	mutex_exit(&sctp->sctp_recvq_lock);
3566	if (ira->ira_flags & IRAF_ICMP_ERROR)
3567		sctp_icmp_error(sctp, mp);
3568	else
3569		sctp_input_data(sctp, mp, ira);
3570	WAKE_SCTP(sctp);
3571
3572done:
3573	SCTP_REFRELE(sctp);
3574	ira->ira_ill = ill;
3575	ira->ira_rill = rill;
3576}
3577
3578static void
3579sctp_process_abort(sctp_t *sctp, sctp_chunk_hdr_t *ch, int err)
3580{
3581	sctp_stack_t	*sctps = sctp->sctp_sctps;
3582
3583	SCTPS_BUMP_MIB(sctps, sctpAborted);
3584	BUMP_LOCAL(sctp->sctp_ibchunks);
3585
3586	/*
3587	 * SCTP_COMM_LOST is only sent up if the association is
3588	 * established (sctp_state >= SCTPS_ESTABLISHED).
3589	 */
3590	if (sctp->sctp_state >= SCTPS_ESTABLISHED) {
3591		sctp_assoc_event(sctp, SCTP_COMM_LOST,
3592		    ntohs(((sctp_parm_hdr_t *)(ch + 1))->sph_type), ch);
3593	}
3594
3595	sctp_clean_death(sctp, err);
3596}
3597
3598void
3599sctp_input_data(sctp_t *sctp, mblk_t *mp, ip_recv_attr_t *ira)
3600{
3601	sctp_chunk_hdr_t	*ch;
3602	ssize_t			mlen;
3603	int			gotdata;
3604	int			trysend;
3605	sctp_faddr_t		*fp;
3606	sctp_init_chunk_t	*iack;
3607	uint32_t		tsn;
3608	sctp_data_hdr_t		*sdc;
3609	ip_pkt_t		ipp;
3610	in6_addr_t		src;
3611	in6_addr_t		dst;
3612	uint_t			ifindex;
3613	sctp_hdr_t		*sctph;
3614	uint_t			ip_hdr_len = ira->ira_ip_hdr_length;
3615	mblk_t			*dups = NULL;
3616	int			recv_adaptation;
3617	boolean_t		wake_eager = B_FALSE;
3618	in6_addr_t		peer_src;
3619	int64_t			now;
3620	sctp_stack_t		*sctps = sctp->sctp_sctps;
3621	ip_stack_t		*ipst = sctps->sctps_netstack->netstack_ip;
3622	boolean_t		hb_already = B_FALSE;
3623	cred_t			*cr;
3624	pid_t			cpid;
3625	uchar_t			*rptr;
3626	conn_t			*connp = sctp->sctp_connp;
3627	boolean_t		shutdown_ack_needed = B_FALSE;
3628
3629	ASSERT(DB_TYPE(mp) == M_DATA);
3630	ASSERT(ira->ira_ill == NULL);
3631
3632	if (mp->b_cont != NULL) {
3633		/*
3634		 * All subsequent code is vastly simplified if it can
3635		 * assume a single contiguous chunk of data.
3636		 */
3637		if (pullupmsg(mp, -1) == 0) {
3638			BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3639			ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3640			freemsg(mp);
3641			return;
3642		}
3643	}
3644
3645	BUMP_LOCAL(sctp->sctp_ipkts);
3646	ifindex = ira->ira_ruifindex;
3647
3648	rptr = mp->b_rptr;
3649
3650	ipp.ipp_fields = 0;
3651	if (connp->conn_recv_ancillary.crb_all != 0) {
3652		/*
3653		 * Record packet information in the ip_pkt_t
3654		 */
3655		if (ira->ira_flags & IRAF_IS_IPV4) {
3656			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipp,
3657			    B_FALSE);
3658		} else {
3659			uint8_t nexthdrp;
3660
3661			/*
3662			 * IPv6 packets can only be received by applications
3663			 * that are prepared to receive IPv6 addresses.
3664			 * The IP fanout must ensure this.
3665			 */
3666			ASSERT(connp->conn_family == AF_INET6);
3667
3668			(void) ip_find_hdr_v6(mp, (ip6_t *)rptr, B_TRUE, &ipp,
3669			    &nexthdrp);
3670			ASSERT(nexthdrp == IPPROTO_SCTP);
3671
3672			/* Could have caused a pullup? */
3673			rptr = mp->b_rptr;
3674		}
3675	}
3676
3677	sctph = ((sctp_hdr_t *)&rptr[ip_hdr_len]);
3678
3679	if (ira->ira_flags & IRAF_IS_IPV4) {
3680		ipha_t *ipha;
3681
3682		ipha = (ipha_t *)rptr;
3683		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &src);
3684		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &dst);
3685	} else {
3686		ip6_t *ip6h;
3687
3688		ip6h = (ip6_t *)rptr;
3689		src = ip6h->ip6_src;
3690		dst = ip6h->ip6_dst;
3691	}
3692
3693	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
3694	ch = sctp_first_chunk((uchar_t *)(sctph + 1), mlen);
3695	if (ch == NULL) {
3696		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3697		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3698		freemsg(mp);
3699		return;
3700	}
3701
3702	if (!sctp_check_input(sctp, ch, mlen, 1)) {
3703		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsInDiscards);
3704		ip_drop_input("ipIfStatsInDiscards", mp, NULL);
3705		goto done;
3706	}
3707	/*
3708	 * Check verfication tag (special handling for INIT,
3709	 * COOKIE, SHUTDOWN_COMPLETE and SHUTDOWN_ACK chunks).
3710	 * ABORTs are handled in the chunk processing loop, since
3711	 * may not appear first. All other checked chunks must
3712	 * appear first, or will have been dropped by check_input().
3713	 */
3714	switch (ch->sch_id) {
3715	case CHUNK_INIT:
3716		if (sctph->sh_verf != 0) {
3717			/* drop it */
3718			goto done;
3719		}
3720		break;
3721	case CHUNK_SHUTDOWN_COMPLETE:
3722		if (sctph->sh_verf == sctp->sctp_lvtag)
3723			break;
3724		if (sctph->sh_verf == sctp->sctp_fvtag &&
3725		    SCTP_GET_TBIT(ch)) {
3726			break;
3727		}
3728		/* else drop it */
3729		goto done;
3730	case CHUNK_ABORT:
3731	case CHUNK_COOKIE:
3732		/* handled below */
3733		break;
3734	case CHUNK_SHUTDOWN_ACK:
3735		if (sctp->sctp_state > SCTPS_BOUND &&
3736		    sctp->sctp_state < SCTPS_ESTABLISHED) {
3737			/* treat as OOTB */
3738			sctp_ootb_shutdown_ack(mp, ip_hdr_len, ira, ipst);
3739			return;
3740		}
3741		/* else fallthru */
3742	default:
3743		/*
3744		 * All other packets must have a valid
3745		 * verification tag, however if this is a
3746		 * listener, we use a refined version of
3747		 * out-of-the-blue logic.
3748		 */
3749		if (sctph->sh_verf != sctp->sctp_lvtag &&
3750		    sctp->sctp_state != SCTPS_LISTEN) {
3751			/* drop it */
3752			goto done;
3753		}
3754		break;
3755	}
3756
3757	/* Have a valid sctp for this packet */
3758	fp = sctp_lookup_faddr(sctp, &src);
3759	dprint(2, ("sctp_dispatch_rput: mp=%p fp=%p sctp=%p\n", (void *)mp,
3760	    (void *)fp, (void *)sctp));
3761
3762	gotdata = 0;
3763	trysend = 0;
3764
3765	now = LBOLT_FASTPATH64;
3766	/* Process the chunks */
3767	do {
3768		dprint(3, ("sctp_dispatch_rput: state=%d, chunk id=%d\n",
3769		    sctp->sctp_state, (int)(ch->sch_id)));
3770
3771		if (ch->sch_id == CHUNK_ABORT) {
3772			if (sctph->sh_verf != sctp->sctp_lvtag &&
3773			    sctph->sh_verf != sctp->sctp_fvtag) {
3774				/* drop it */
3775				goto done;
3776			}
3777		}
3778
3779		switch (sctp->sctp_state) {
3780
3781		case SCTPS_ESTABLISHED:
3782		case SCTPS_SHUTDOWN_PENDING:
3783		case SCTPS_SHUTDOWN_SENT:
3784			switch (ch->sch_id) {
3785			case CHUNK_DATA:
3786				/* 0-length data chunks are not allowed */
3787				if (ntohs(ch->sch_len) == sizeof (*sdc)) {
3788					sdc = (sctp_data_hdr_t *)ch;
3789					tsn = sdc->sdh_tsn;
3790					sctp_send_abort(sctp, sctp->sctp_fvtag,
3791					    SCTP_ERR_NO_USR_DATA, (char *)&tsn,
3792					    sizeof (tsn), mp, 0, B_FALSE, ira);
3793					sctp_assoc_event(sctp, SCTP_COMM_LOST,
3794					    0, NULL);
3795					sctp_clean_death(sctp, ECONNABORTED);
3796					goto done;
3797				}
3798
3799				ASSERT(fp != NULL);
3800				sctp->sctp_lastdata = fp;
3801				sctp_data_chunk(sctp, ch, mp, &dups, fp,
3802				    &ipp, ira);
3803				gotdata = 1;
3804				/* Restart shutdown timer if shutting down */
3805				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
3806					/*
3807					 * If we have exceeded our max
3808					 * wait bound for waiting for a
3809					 * shutdown ack from the peer,
3810					 * abort the association.
3811					 */
3812					if (sctps->sctps_shutack_wait_bound !=
3813					    0 &&
3814					    TICK_TO_MSEC(now -
3815					    sctp->sctp_out_time) >
3816					    sctps->sctps_shutack_wait_bound) {
3817						sctp_send_abort(sctp,
3818						    sctp->sctp_fvtag, 0, NULL,
3819						    0, mp, 0, B_FALSE, ira);
3820						sctp_assoc_event(sctp,
3821						    SCTP_COMM_LOST, 0, NULL);
3822						sctp_clean_death(sctp,
3823						    ECONNABORTED);
3824						goto done;
3825					}
3826					SCTP_FADDR_TIMER_RESTART(sctp, fp,
3827					    fp->sf_rto);
3828				}
3829				break;
3830			case CHUNK_SACK:
3831				ASSERT(fp != NULL);
3832				/*
3833				 * Peer is real and alive if it can ack our
3834				 * data.
3835				 */
3836				sctp_faddr_alive(sctp, fp);
3837				trysend = sctp_got_sack(sctp, ch);
3838				if (trysend < 0) {
3839					sctp_send_abort(sctp, sctph->sh_verf,
3840					    0, NULL, 0, mp, 0, B_FALSE, ira);
3841					sctp_assoc_event(sctp,
3842					    SCTP_COMM_LOST, 0, NULL);
3843					sctp_clean_death(sctp,
3844					    ECONNABORTED);
3845					goto done;
3846				}
3847				break;
3848			case CHUNK_HEARTBEAT:
3849				if (!hb_already) {
3850					/*
3851					 * In any one packet, there should
3852					 * only be one heartbeat chunk.  So
3853					 * we should not process more than
3854					 * once.
3855					 */
3856					sctp_return_heartbeat(sctp, ch, mp);
3857					hb_already = B_TRUE;
3858				}
3859				break;
3860			case CHUNK_HEARTBEAT_ACK:
3861				sctp_process_heartbeat(sctp, ch);
3862				break;
3863			case CHUNK_SHUTDOWN:
3864				sctp_shutdown_event(sctp);
3865				trysend = sctp_shutdown_received(sctp, ch,
3866				    B_FALSE, B_FALSE, fp);
3867				BUMP_LOCAL(sctp->sctp_ibchunks);
3868				break;
3869			case CHUNK_SHUTDOWN_ACK:
3870				BUMP_LOCAL(sctp->sctp_ibchunks);
3871				if (sctp->sctp_state == SCTPS_SHUTDOWN_SENT) {
3872					sctp_shutdown_complete(sctp);
3873					SCTPS_BUMP_MIB(sctps, sctpShutdowns);
3874					sctp_assoc_event(sctp,
3875					    SCTP_SHUTDOWN_COMP, 0, NULL);
3876					sctp_clean_death(sctp, 0);
3877					goto done;
3878				}
3879				break;
3880			case CHUNK_ABORT: {
3881				sctp_saddr_ipif_t *sp;
3882
3883				/* Ignore if delete pending */
3884				sp = sctp_saddr_lookup(sctp, &dst, 0);
3885				ASSERT(sp != NULL);
3886				if (sp->saddr_ipif_delete_pending) {
3887					BUMP_LOCAL(sctp->sctp_ibchunks);
3888					break;
3889				}
3890
3891				sctp_process_abort(sctp, ch, ECONNRESET);
3892				goto done;
3893			}
3894			case CHUNK_INIT:
3895				sctp_send_initack(sctp, sctph, ch, mp, ira);
3896				break;
3897			case CHUNK_COOKIE:
3898				if (sctp_process_cookie(sctp, ch, mp, &iack,
3899				    sctph, &recv_adaptation, NULL, ira) != -1) {
3900					sctp_send_cookie_ack(sctp);
3901					sctp_assoc_event(sctp, SCTP_RESTART,
3902					    0, NULL);
3903					if (recv_adaptation) {
3904						sctp->sctp_recv_adaptation = 1;
3905						sctp_adaptation_event(sctp);
3906					}
3907				} else {
3908					SCTPS_BUMP_MIB(sctps,
3909					    sctpInInvalidCookie);
3910				}
3911				break;
3912			case CHUNK_ERROR: {
3913				int error;
3914
3915				BUMP_LOCAL(sctp->sctp_ibchunks);
3916				error = sctp_handle_error(sctp, sctph, ch, mp,
3917				    ira);
3918				if (error != 0) {
3919					sctp_assoc_event(sctp, SCTP_COMM_LOST,
3920					    0, NULL);
3921					sctp_clean_death(sctp, error);
3922					goto done;
3923				}
3924				break;
3925			}
3926			case CHUNK_ASCONF:
3927				ASSERT(fp != NULL);
3928				sctp_input_asconf(sctp, ch, fp);
3929				BUMP_LOCAL(sctp->sctp_ibchunks);
3930				break;
3931			case CHUNK_ASCONF_ACK:
3932				ASSERT(fp != NULL);
3933				sctp_faddr_alive(sctp, fp);
3934				sctp_input_asconf_ack(sctp, ch, fp);
3935				BUMP_LOCAL(sctp->sctp_ibchunks);
3936				break;
3937			case CHUNK_FORWARD_TSN:
3938				ASSERT(fp != NULL);
3939				sctp->sctp_lastdata = fp;
3940				sctp_process_forward_tsn(sctp, ch, fp,
3941				    &ipp, ira);
3942				gotdata = 1;
3943				BUMP_LOCAL(sctp->sctp_ibchunks);
3944				break;
3945			default:
3946				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
3947					goto nomorechunks;
3948				} /* else skip and continue processing */
3949				break;
3950			}
3951			break;
3952
3953		case SCTPS_LISTEN:
3954			switch (ch->sch_id) {
3955			case CHUNK_INIT:
3956				sctp_send_initack(sctp, sctph, ch, mp, ira);
3957				break;
3958			case CHUNK_COOKIE: {
3959				sctp_t *eager;
3960
3961				if (sctp_process_cookie(sctp, ch, mp, &iack,
3962				    sctph, &recv_adaptation, &peer_src,
3963				    ira) == -1) {
3964					SCTPS_BUMP_MIB(sctps,
3965					    sctpInInvalidCookie);
3966					goto done;
3967				}
3968
3969				/*
3970				 * The cookie is good; ensure that
3971				 * the peer used the verification
3972				 * tag from the init ack in the header.
3973				 */
3974				if (iack->sic_inittag != sctph->sh_verf)
3975					goto done;
3976
3977				eager = sctp_conn_request(sctp, mp, ifindex,
3978				    ip_hdr_len, iack, ira);
3979				if (eager == NULL) {
3980					sctp_send_abort(sctp, sctph->sh_verf,
3981					    SCTP_ERR_NO_RESOURCES, NULL, 0, mp,
3982					    0, B_FALSE, ira);
3983					goto done;
3984				}
3985
3986				/*
3987				 * If there were extra chunks
3988				 * bundled with the cookie,
3989				 * they must be processed
3990				 * on the eager's queue. We
3991				 * accomplish this by refeeding
3992				 * the whole packet into the
3993				 * state machine on the right
3994				 * q. The packet (mp) gets
3995				 * there via the eager's
3996				 * cookie_mp field (overloaded
3997				 * with the active open role).
3998				 * This is picked up when
3999				 * processing the null bind
4000				 * request put on the eager's
4001				 * q by sctp_accept(). We must
4002				 * first revert the cookie
4003				 * chunk's length field to network
4004				 * byteorder so it can be
4005				 * properly reprocessed on the
4006				 * eager's queue.
4007				 */
4008				SCTPS_BUMP_MIB(sctps, sctpPassiveEstab);
4009				if (mlen > ntohs(ch->sch_len)) {
4010					eager->sctp_cookie_mp = dupb(mp);
4011					/*
4012					 * If no mem, just let
4013					 * the peer retransmit.
4014					 */
4015				}
4016				sctp_assoc_event(eager, SCTP_COMM_UP, 0, NULL);
4017				if (recv_adaptation) {
4018					eager->sctp_recv_adaptation = 1;
4019					eager->sctp_rx_adaptation_code =
4020					    sctp->sctp_rx_adaptation_code;
4021					sctp_adaptation_event(eager);
4022				}
4023
4024				eager->sctp_active = now;
4025				sctp_send_cookie_ack(eager);
4026
4027				wake_eager = B_TRUE;
4028
4029				/*
4030				 * Process rest of the chunks with eager.
4031				 */
4032				sctp = eager;
4033				fp = sctp_lookup_faddr(sctp, &peer_src);
4034				/*
4035				 * Confirm peer's original source.  fp can
4036				 * only be NULL if peer does not use the
4037				 * original source as one of its addresses...
4038				 */
4039				if (fp == NULL)
4040					fp = sctp_lookup_faddr(sctp, &src);
4041				else
4042					sctp_faddr_alive(sctp, fp);
4043
4044				/*
4045				 * Validate the peer addresses.  It also starts
4046				 * the heartbeat timer.
4047				 */
4048				sctp_validate_peer(sctp);
4049				break;
4050			}
4051			/* Anything else is considered out-of-the-blue */
4052			case CHUNK_ERROR:
4053			case CHUNK_ABORT:
4054			case CHUNK_COOKIE_ACK:
4055			case CHUNK_SHUTDOWN_COMPLETE:
4056				BUMP_LOCAL(sctp->sctp_ibchunks);
4057				goto done;
4058			default:
4059				BUMP_LOCAL(sctp->sctp_ibchunks);
4060				sctp_send_abort(sctp, sctph->sh_verf, 0, NULL,
4061				    0, mp, 0, B_TRUE, ira);
4062				goto done;
4063			}
4064			break;
4065
4066		case SCTPS_COOKIE_WAIT:
4067			switch (ch->sch_id) {
4068			case CHUNK_INIT_ACK:
4069				sctp_stop_faddr_timers(sctp);
4070				sctp_faddr_alive(sctp, sctp->sctp_current);
4071				sctp_send_cookie_echo(sctp, ch, mp, ira);
4072				BUMP_LOCAL(sctp->sctp_ibchunks);
4073				break;
4074			case CHUNK_ABORT:
4075				sctp_process_abort(sctp, ch, ECONNREFUSED);
4076				goto done;
4077			case CHUNK_INIT:
4078				sctp_send_initack(sctp, sctph, ch, mp, ira);
4079				break;
4080			case CHUNK_COOKIE:
4081				cr = ira->ira_cred;
4082				cpid = ira->ira_cpid;
4083
4084				if (sctp_process_cookie(sctp, ch, mp, &iack,
4085				    sctph, &recv_adaptation, NULL, ira) == -1) {
4086					SCTPS_BUMP_MIB(sctps,
4087					    sctpInInvalidCookie);
4088					break;
4089				}
4090				sctp_send_cookie_ack(sctp);
4091				sctp_stop_faddr_timers(sctp);
4092				if (!SCTP_IS_DETACHED(sctp)) {
4093					sctp->sctp_ulp_connected(
4094					    sctp->sctp_ulpd, 0, cr, cpid);
4095					sctp_set_ulp_prop(sctp);
4096
4097				}
4098				SCTP_ASSOC_EST(sctps, sctp);
4099				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4100				if (sctp->sctp_cookie_mp) {
4101					freemsg(sctp->sctp_cookie_mp);
4102					sctp->sctp_cookie_mp = NULL;
4103				}
4104
4105				/* Validate the peer addresses. */
4106				sctp->sctp_active = now;
4107				sctp_validate_peer(sctp);
4108
4109				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4110				if (recv_adaptation) {
4111					sctp->sctp_recv_adaptation = 1;
4112					sctp_adaptation_event(sctp);
4113				}
4114				/* Try sending queued data, or ASCONFs */
4115				trysend = 1;
4116				break;
4117			default:
4118				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4119					goto nomorechunks;
4120				} /* else skip and continue processing */
4121				break;
4122			}
4123			break;
4124
4125		case SCTPS_COOKIE_ECHOED:
4126			switch (ch->sch_id) {
4127			case CHUNK_COOKIE_ACK:
4128				cr = ira->ira_cred;
4129				cpid = ira->ira_cpid;
4130
4131				if (!SCTP_IS_DETACHED(sctp)) {
4132					sctp->sctp_ulp_connected(
4133					    sctp->sctp_ulpd, 0, cr, cpid);
4134					sctp_set_ulp_prop(sctp);
4135				}
4136				if (sctp->sctp_unacked == 0)
4137					sctp_stop_faddr_timers(sctp);
4138				SCTP_ASSOC_EST(sctps, sctp);
4139				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4140				BUMP_LOCAL(sctp->sctp_ibchunks);
4141				if (sctp->sctp_cookie_mp) {
4142					freemsg(sctp->sctp_cookie_mp);
4143					sctp->sctp_cookie_mp = NULL;
4144				}
4145				sctp_faddr_alive(sctp, fp);
4146				/* Validate the peer addresses. */
4147				sctp->sctp_active = now;
4148				sctp_validate_peer(sctp);
4149
4150				/* Try sending queued data, or ASCONFs */
4151				trysend = 1;
4152				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4153				sctp_adaptation_event(sctp);
4154				break;
4155			case CHUNK_ABORT:
4156				sctp_process_abort(sctp, ch, ECONNREFUSED);
4157				goto done;
4158			case CHUNK_COOKIE:
4159				cr = ira->ira_cred;
4160				cpid = ira->ira_cpid;
4161
4162				if (sctp_process_cookie(sctp, ch, mp, &iack,
4163				    sctph, &recv_adaptation, NULL, ira) == -1) {
4164					SCTPS_BUMP_MIB(sctps,
4165					    sctpInInvalidCookie);
4166					break;
4167				}
4168				sctp_send_cookie_ack(sctp);
4169
4170				if (!SCTP_IS_DETACHED(sctp)) {
4171					sctp->sctp_ulp_connected(
4172					    sctp->sctp_ulpd, 0, cr, cpid);
4173					sctp_set_ulp_prop(sctp);
4174
4175				}
4176				if (sctp->sctp_unacked == 0)
4177					sctp_stop_faddr_timers(sctp);
4178				SCTP_ASSOC_EST(sctps, sctp);
4179				SCTPS_BUMP_MIB(sctps, sctpActiveEstab);
4180				if (sctp->sctp_cookie_mp) {
4181					freemsg(sctp->sctp_cookie_mp);
4182					sctp->sctp_cookie_mp = NULL;
4183				}
4184				/* Validate the peer addresses. */
4185				sctp->sctp_active = now;
4186				sctp_validate_peer(sctp);
4187
4188				sctp_assoc_event(sctp, SCTP_COMM_UP, 0, NULL);
4189				if (recv_adaptation) {
4190					sctp->sctp_recv_adaptation = 1;
4191					sctp_adaptation_event(sctp);
4192				}
4193				/* Try sending queued data, or ASCONFs */
4194				trysend = 1;
4195				break;
4196			case CHUNK_INIT:
4197				sctp_send_initack(sctp, sctph, ch, mp, ira);
4198				break;
4199			case CHUNK_ERROR: {
4200				sctp_parm_hdr_t *p;
4201
4202				BUMP_LOCAL(sctp->sctp_ibchunks);
4203				/* check for a stale cookie */
4204				if (ntohs(ch->sch_len) >=
4205				    (sizeof (*p) + sizeof (*ch)) +
4206				    sizeof (uint32_t)) {
4207
4208					p = (sctp_parm_hdr_t *)(ch + 1);
4209					if (p->sph_type ==
4210					    htons(SCTP_ERR_STALE_COOKIE)) {
4211						SCTPS_BUMP_MIB(sctps,
4212						    sctpAborted);
4213						sctp_error_event(sctp,
4214						    ch, B_FALSE);
4215						sctp_assoc_event(sctp,
4216						    SCTP_COMM_LOST, 0, NULL);
4217						sctp_clean_death(sctp,
4218						    ECONNREFUSED);
4219						goto done;
4220					}
4221				}
4222				break;
4223			}
4224			case CHUNK_HEARTBEAT:
4225				if (!hb_already) {
4226					sctp_return_heartbeat(sctp, ch, mp);
4227					hb_already = B_TRUE;
4228				}
4229				break;
4230			default:
4231				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4232					goto nomorechunks;
4233				} /* else skip and continue processing */
4234			} /* switch (ch->sch_id) */
4235			break;
4236
4237		case SCTPS_SHUTDOWN_ACK_SENT:
4238			switch (ch->sch_id) {
4239			case CHUNK_ABORT:
4240				/* Pass gathered wisdom to IP for keeping */
4241				sctp_update_dce(sctp);
4242				sctp_process_abort(sctp, ch, 0);
4243				goto done;
4244			case CHUNK_SHUTDOWN_COMPLETE:
4245				BUMP_LOCAL(sctp->sctp_ibchunks);
4246				SCTPS_BUMP_MIB(sctps, sctpShutdowns);
4247				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
4248				    NULL);
4249
4250				/* Pass gathered wisdom to IP for keeping */
4251				sctp_update_dce(sctp);
4252				sctp_clean_death(sctp, 0);
4253				goto done;
4254			case CHUNK_SHUTDOWN_ACK:
4255				sctp_shutdown_complete(sctp);
4256				BUMP_LOCAL(sctp->sctp_ibchunks);
4257				SCTPS_BUMP_MIB(sctps, sctpShutdowns);
4258				sctp_assoc_event(sctp, SCTP_SHUTDOWN_COMP, 0,
4259				    NULL);
4260				sctp_clean_death(sctp, 0);
4261				goto done;
4262			case CHUNK_COOKIE:
4263				(void) sctp_shutdown_received(sctp, NULL,
4264				    B_TRUE, B_FALSE, fp);
4265				BUMP_LOCAL(sctp->sctp_ibchunks);
4266				break;
4267			case CHUNK_HEARTBEAT:
4268				if (!hb_already) {
4269					sctp_return_heartbeat(sctp, ch, mp);
4270					hb_already = B_TRUE;
4271				}
4272				break;
4273			default:
4274				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4275					goto nomorechunks;
4276				} /* else skip and continue processing */
4277				break;
4278			}
4279			break;
4280
4281		case SCTPS_SHUTDOWN_RECEIVED:
4282			switch (ch->sch_id) {
4283			case CHUNK_SHUTDOWN:
4284				trysend = sctp_shutdown_received(sctp, ch,
4285				    B_FALSE, B_FALSE, fp);
4286				/*
4287				 * shutdown_ack_needed may have been set as
4288				 * mentioned in the case CHUNK_SACK below.
4289				 * If sctp_shutdown_received() above found
4290				 * the xmit queue empty the SHUTDOWN ACK chunk
4291				 * has already been sent (or scheduled to be
4292				 * sent on the timer) and the SCTP state
4293				 * changed, so reset shutdown_ack_needed.
4294				 */
4295				if (shutdown_ack_needed && (sctp->sctp_state ==
4296				    SCTPS_SHUTDOWN_ACK_SENT))
4297					shutdown_ack_needed = B_FALSE;
4298				break;
4299			case CHUNK_SACK:
4300				trysend = sctp_got_sack(sctp, ch);
4301				if (trysend < 0) {
4302					sctp_send_abort(sctp, sctph->sh_verf,
4303					    0, NULL, 0, mp, 0, B_FALSE, ira);
4304					sctp_assoc_event(sctp,
4305					    SCTP_COMM_LOST, 0, NULL);
4306					sctp_clean_death(sctp,
4307					    ECONNABORTED);
4308					goto done;
4309				}
4310
4311				/*
4312				 * All data acknowledgement after a shutdown
4313				 * should be done with SHUTDOWN chunk.
4314				 * However some peer SCTP do not conform with
4315				 * this and can unexpectedly send a SACK chunk.
4316				 * If all data are acknowledged, set
4317				 * shutdown_ack_needed here indicating that
4318				 * SHUTDOWN ACK needs to be sent later by
4319				 * sctp_send_shutdown_ack().
4320				 */
4321				if ((sctp->sctp_xmit_head == NULL) &&
4322				    (sctp->sctp_xmit_unsent == NULL))
4323					shutdown_ack_needed = B_TRUE;
4324				break;
4325			case CHUNK_ABORT:
4326				sctp_process_abort(sctp, ch, ECONNRESET);
4327				goto done;
4328			case CHUNK_HEARTBEAT:
4329				if (!hb_already) {
4330					sctp_return_heartbeat(sctp, ch, mp);
4331					hb_already = B_TRUE;
4332				}
4333				break;
4334			default:
4335				if (sctp_strange_chunk(sctp, ch, fp) == 0) {
4336					goto nomorechunks;
4337				} /* else skip and continue processing */
4338				break;
4339			}
4340			break;
4341
4342		default:
4343			/*
4344			 * The only remaining states are SCTPS_IDLE and
4345			 * SCTPS_BOUND, and we should not be getting here
4346			 * for these.
4347			 */
4348			ASSERT(0);
4349		} /* switch (sctp->sctp_state) */
4350
4351		ch = sctp_next_chunk(ch, &mlen);
4352		if (ch != NULL && !sctp_check_input(sctp, ch, mlen, 0))
4353			goto done;
4354	} while (ch != NULL);
4355
4356	/* Finished processing all chunks in packet */
4357
4358nomorechunks:
4359
4360	if (shutdown_ack_needed)
4361		sctp_send_shutdown_ack(sctp, fp, B_FALSE);
4362
4363	/* SACK if necessary */
4364	if (gotdata) {
4365		boolean_t sack_sent;
4366
4367		(sctp->sctp_sack_toggle)++;
4368		sack_sent = sctp_sack(sctp, dups);
4369		dups = NULL;
4370
4371		/* If a SACK is sent, no need to restart the timer. */
4372		if (!sack_sent && !sctp->sctp_ack_timer_running) {
4373			sctp->sctp_ack_timer_running = B_TRUE;
4374			sctp_timer(sctp, sctp->sctp_ack_mp,
4375			    MSEC_TO_TICK(sctps->sctps_deferred_ack_interval));
4376		}
4377	}
4378
4379	if (trysend) {
4380		sctp_output(sctp, UINT_MAX);
4381		if (sctp->sctp_cxmit_list != NULL)
4382			sctp_wput_asconf(sctp, NULL);
4383	}
4384	/*
4385	 * If there is unsent data, make sure a timer is running, check
4386	 * timer_mp, if sctp_closei_local() ran the timers may be free.
4387	 */
4388	if (sctp->sctp_unsent > 0 && !sctp->sctp_current->sf_timer_running &&
4389	    sctp->sctp_current->sf_timer_mp != NULL) {
4390		SCTP_FADDR_TIMER_RESTART(sctp, sctp->sctp_current,
4391		    sctp->sctp_current->sf_rto);
4392	}
4393
4394done:
4395	if (dups != NULL)
4396		freeb(dups);
4397	freemsg(mp);
4398
4399	if (sctp->sctp_err_chunks != NULL)
4400		sctp_process_err(sctp);
4401
4402	if (wake_eager) {
4403		/*
4404		 * sctp points to newly created control block, need to
4405		 * release it before exiting.
4406		 */
4407		WAKE_SCTP(sctp);
4408	}
4409}
4410
4411/*
4412 * Some amount of data got removed from ULP's receive queue and we can
4413 * push messages up if we are flow controlled before.  Reset the receive
4414 * window to full capacity (conn_rcvbuf) and check if we should send a
4415 * window update.
4416 */
4417void
4418sctp_recvd(sctp_t *sctp, int len)
4419{
4420	sctp_stack_t	*sctps = sctp->sctp_sctps;
4421	conn_t		*connp = sctp->sctp_connp;
4422	boolean_t	send_sack = B_FALSE;
4423
4424	ASSERT(sctp != NULL);
4425	RUN_SCTP(sctp);
4426
4427	sctp->sctp_flowctrld = B_FALSE;
4428	/* This is the amount of data queued in ULP. */
4429	sctp->sctp_ulp_rxqueued = connp->conn_rcvbuf - len;
4430
4431	if (connp->conn_rcvbuf - sctp->sctp_arwnd >= sctp->sctp_mss)
4432		send_sack = B_TRUE;
4433	sctp->sctp_rwnd = connp->conn_rcvbuf;
4434
4435	if (sctp->sctp_state >= SCTPS_ESTABLISHED && send_sack) {
4436		sctp->sctp_force_sack = 1;
4437		SCTPS_BUMP_MIB(sctps, sctpOutWinUpdate);
4438		(void) sctp_sack(sctp, NULL);
4439	}
4440	WAKE_SCTP(sctp);
4441}
4442