socksctpsubr.c revision 8348:4137e18bfaf0
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/types.h>
28#include <sys/t_lock.h>
29#include <sys/param.h>
30#include <sys/systm.h>
31#include <sys/debug.h>
32#include <sys/errno.h>
33#include <sys/strsubr.h>
34#include <sys/cmn_err.h>
35#include <sys/sysmacros.h>
36
37#include <sys/socket.h>
38#include <sys/socketvar.h>
39#include <sys/strsun.h>
40#include <sys/signal.h>
41
42#include <netinet/sctp.h>
43#include <inet/sctp_itf.h>
44#include <fs/sockfs/sockcommon.h>
45#include "socksctp.h"
46
47extern kmem_cache_t *sosctp_assoccache;
48/*
49 * Find a free association id. See os/fio.c file descriptor allocator
50 * for description of the algorithm.
51 */
52sctp_assoc_t
53sosctp_aid_get(struct sctp_sonode *ss)
54{
55	sctp_assoc_t id, size, ralloc;
56	struct sctp_sa_id *assocs = ss->ss_assocs;
57
58	ASSERT((ss->ss_maxassoc & (ss->ss_maxassoc + 1)) == 0);
59
60	for (id = 1; (uint32_t)id < ss->ss_maxassoc; id |= id + 1) {
61		size = id + 1;
62		if (assocs[id].ssi_alloc == size)
63			continue;
64		for (ralloc = 0, size >>= 1; size != 0; size >>= 1) {
65			ralloc += assocs[id + size].ssi_alloc;
66			if (assocs[id].ssi_alloc == ralloc + size) {
67				id += size;
68				ralloc = 0;
69			}
70		}
71		return (id);
72	}
73	return (-1);
74}
75
76/*
77 * Allocate or free ID, depending on whether incr is 1 or -1
78 */
79void
80sosctp_aid_reserve(struct sctp_sonode *ss, sctp_assoc_t id, int incr)
81{
82	struct sctp_sa_id *assocs = ss->ss_assocs;
83	sctp_assoc_t pid;
84
85	ASSERT((assocs[id].ssi_assoc == NULL && incr == 1) ||
86	    (assocs[id].ssi_assoc != NULL && incr == -1));
87
88	for (pid = id; pid >= 0; pid = (pid & (pid + 1)) - 1) {
89		assocs[pid].ssi_alloc += incr;
90	}
91}
92
93/*
94 * Increase size of the ss_assocs array to accommodate at least maxid.
95 * We keep the size of the form 2^n - 1 for benefit of sosctp_aid_get().
96 */
97int
98sosctp_aid_grow(struct sctp_sonode *ss, sctp_assoc_t maxid, int kmflags)
99{
100	sctp_assoc_t newcnt, oldcnt;
101	struct sctp_sa_id *newlist, *oldlist;
102
103	ASSERT(MUTEX_HELD(&ss->ss_so.so_lock));
104	for (newcnt = 1; newcnt <= maxid; newcnt = (newcnt << 1) | 1) {
105		continue;
106	}
107
108	mutex_exit(&ss->ss_so.so_lock);
109	newlist = kmem_alloc(newcnt * sizeof (struct sctp_sa_id), kmflags);
110	mutex_enter(&ss->ss_so.so_lock);
111	if (newlist == NULL) {
112		return (-1);
113	}
114	oldcnt = ss->ss_maxassoc;
115	if (newcnt <= oldcnt) {
116		kmem_free(newlist, newcnt * sizeof (struct sctp_sa_id));
117		return (0);
118	}
119	ASSERT((newcnt & (newcnt + 1)) == 0);
120	oldlist = ss->ss_assocs;
121	ss->ss_assocs = newlist;
122	ss->ss_maxassoc = newcnt;
123
124	bcopy(oldlist, newlist, oldcnt * sizeof (struct sctp_sa_id));
125	bzero(newlist + oldcnt,
126	    (newcnt - oldcnt) * sizeof (struct sctp_sa_id));
127	if (oldlist != NULL) {
128		kmem_free(oldlist, oldcnt * sizeof (struct sctp_sa_id));
129	}
130	return (0);
131}
132
133/*
134 * Convert a id into a pointer to sctp_sockassoc structure.
135 * Increments refcnt.
136 */
137int
138sosctp_assoc(struct sctp_sonode *ss, sctp_assoc_t id, struct sctp_soassoc **ssa)
139{
140	ASSERT(ssa != NULL);
141	ASSERT(MUTEX_HELD(&ss->ss_so.so_lock));
142	if ((uint32_t)id >= ss->ss_maxassoc) {
143		*ssa = NULL;
144		return (EINVAL);
145	}
146
147	if ((*ssa = ss->ss_assocs[id].ssi_assoc) == NULL) {
148		return (EINVAL);
149	}
150	if (((*ssa)->ssa_state & (SS_CANTSENDMORE|SS_CANTRCVMORE)) ==
151	    (SS_CANTSENDMORE|SS_CANTRCVMORE)) {
152		/*
153		 * Disconnected connection, shouldn't be found anymore
154		 */
155		*ssa = NULL;
156		return (ESHUTDOWN);
157	}
158	SSA_REFHOLD(*ssa)
159
160	return (0);
161}
162
163/*
164 * Can be called from upcall, or through system call.
165 */
166struct sctp_soassoc *
167sosctp_assoc_create(struct sctp_sonode *ss, int kmflag)
168{
169	struct sctp_soassoc *ssa;
170
171	ssa = kmem_cache_alloc(sosctp_assoccache, kmflag);
172	if (ssa != NULL) {
173		ssa->ssa_type = SOSCTP_ASSOC;
174		ssa->ssa_refcnt = 1;
175		ssa->ssa_sonode = ss;
176		ssa->ssa_state = 0;
177		ssa->ssa_error = 0;
178#if 0
179		ssa->ssa_txqueued = 0;
180#endif
181		ssa->ssa_snd_qfull = 0;
182	}
183	dprint(2, ("sosctp_assoc_create %p %p\n", (void *)ss, (void *)ssa));
184	return (ssa);
185}
186
187void
188sosctp_assoc_free(struct sctp_sonode *ss, struct sctp_soassoc *ssa)
189{
190	struct sonode *so = &ss->ss_so;
191
192	dprint(2, ("sosctp_assoc_free %p %p (%d)\n", (void *)ss, (void *)ssa,
193	    ssa->ssa_id));
194	ASSERT(MUTEX_HELD(&so->so_lock));
195	if (ssa->ssa_conn != NULL) {
196		mutex_exit(&so->so_lock);
197
198		sctp_recvd(ssa->ssa_conn, so->so_rcvbuf);
199		(void) sctp_disconnect(ssa->ssa_conn);
200		sctp_close(ssa->ssa_conn);
201
202		mutex_enter(&so->so_lock);
203		ssa->ssa_conn = NULL;
204	}
205	sosctp_aid_reserve(ss, ssa->ssa_id, -1);
206	ss->ss_assocs[ssa->ssa_id].ssi_assoc = NULL;
207	--ss->ss_assoccnt;
208	kmem_cache_free(sosctp_assoccache, ssa);
209}
210
211/*
212 * Pack the ancillary stuff taking care of alignment issues.
213 * sctp_input_add_ancillary() packs the information as:
214 * struct cmsghdr -> ancillary data + struct cmsghdr -> ancillary data + ...
215 * In the next version of SCTP, sctp_input_add_ancillary() should
216 * pack the information taking alignment into account, then we would
217 * not need this routine.
218 */
219void
220sosctp_pack_cmsg(const uchar_t *opt, struct nmsghdr *msg, int len)
221{
222	struct cmsghdr	*ocmsg;
223	struct cmsghdr	*cmsg;
224	int		optlen = 0;
225	char		*cend;
226	boolean_t	isaligned = B_TRUE;
227
228	ocmsg = (struct cmsghdr *)opt;
229	cend = (char *)opt + len;
230	/* Figure out the length incl. alignment et. al. */
231	for (;;) {
232		if ((char *)(ocmsg + 1) > cend ||
233		    ((char *)ocmsg + ocmsg->cmsg_len) > cend) {
234			break;
235		}
236		if (isaligned && !ISALIGNED_cmsghdr(ocmsg))
237			isaligned = B_FALSE;
238		optlen += ROUNDUP_cmsglen(ocmsg->cmsg_len);
239		if (ocmsg->cmsg_len > 0) {
240			ocmsg = (struct cmsghdr *)
241			    ((uchar_t *)ocmsg + ocmsg->cmsg_len);
242		} else {
243			break;
244		}
245	}
246	/* Now allocate and copy */
247	msg->msg_control = kmem_zalloc(optlen, KM_SLEEP);
248	msg->msg_controllen = optlen;
249	if (isaligned) {
250		ASSERT(optlen == len);
251		bcopy(opt, msg->msg_control, len);
252		return;
253	}
254	cmsg = (struct cmsghdr *)msg->msg_control;
255	ASSERT(ISALIGNED_cmsghdr(cmsg));
256	ocmsg = (struct cmsghdr *)opt;
257	cend = (char *)opt + len;
258	for (;;) {
259		if ((char *)(ocmsg + 1) > cend ||
260		    ((char *)ocmsg + ocmsg->cmsg_len) > cend) {
261			break;
262		}
263		bcopy(ocmsg, cmsg, ocmsg->cmsg_len);
264		if (ocmsg->cmsg_len > 0) {
265			cmsg = (struct cmsghdr *)((uchar_t *)cmsg +
266			    ROUNDUP_cmsglen(ocmsg->cmsg_len));
267			ASSERT(ISALIGNED_cmsghdr(cmsg));
268			ocmsg = (struct cmsghdr *)
269			    ((uchar_t *)ocmsg + ocmsg->cmsg_len);
270		} else {
271			break;
272		}
273	}
274}
275
276/*
277 * Find cmsghdr of specified type
278 */
279struct cmsghdr *
280sosctp_find_cmsg(const uchar_t *control, socklen_t clen, int type)
281{
282	struct cmsghdr *cmsg;
283	char *cend;
284
285	cmsg = (struct cmsghdr *)control;
286	cend = (char *)control + clen;
287
288	for (;;) {
289		if ((char *)(cmsg + 1) > cend ||
290		    ((char *)cmsg + cmsg->cmsg_len) > cend) {
291			break;
292		}
293		if ((cmsg->cmsg_level == IPPROTO_SCTP) &&
294		    (cmsg->cmsg_type == type)) {
295			return (cmsg);
296		}
297		if (cmsg->cmsg_len > 0) {
298			cmsg = CMSG_NEXT(cmsg);
299		} else {
300			break;
301		}
302	}
303	return (NULL);
304}
305
306/*
307 * Wait until the association is connected or there is an error.
308 * fmode should contain any nonblocking flags.
309 */
310static int
311sosctp_assoc_waitconnected(struct sctp_soassoc *ssa, int fmode)
312{
313	struct sonode *so = &ssa->ssa_sonode->ss_so;
314	int error = 0;
315
316	ASSERT((ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ||
317	    ssa->ssa_error != 0);
318
319	while ((ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
320	    SS_ISCONNECTING && ssa->ssa_error == 0) {
321
322		dprint(3, ("waiting for SS_ISCONNECTED on %p\n", (void *)so));
323		if (fmode & (FNDELAY|FNONBLOCK))
324			return (EINPROGRESS);
325
326		if (so->so_state & SS_CLOSING)
327			return (EINTR);
328		if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
329			/*
330			 * Return EINTR and let the application use
331			 * nonblocking techniques for detecting when
332			 * the connection has been established.
333			 */
334			return (EINTR);
335		}
336		dprint(3, ("awoken on %p\n", (void *)so));
337	}
338	if (ssa->ssa_error != 0) {
339		error = ssa->ssa_error;
340		ssa->ssa_error = 0;
341		dprint(3, ("sosctp_assoc_waitconnected: error %d\n", error));
342		return (error);
343	}
344
345	if (!(ssa->ssa_state & SS_ISCONNECTED)) {
346		/*
347		 * Another thread could have consumed so_error
348		 * e.g. by calling read. - take from sowaitconnected()
349		 */
350		error = ECONNREFUSED;
351		dprint(3, ("sosctp_waitconnected: error %d\n", error));
352		return (error);
353	}
354	return (0);
355}
356
357/*
358 * Called from connect(), sendmsg() when we need to create a new association.
359 */
360int
361sosctp_assoc_createconn(struct sctp_sonode *ss, const struct sockaddr *name,
362    socklen_t namelen, const uchar_t *control, socklen_t controllen, int fflag,
363    struct cred *cr, struct sctp_soassoc **ssap)
364{
365	struct sonode *so = &ss->ss_so;
366	struct sctp_soassoc *ssa;
367	struct sockaddr_storage laddr;
368	sctp_sockbuf_limits_t sbl;
369	sctp_assoc_t id;
370	int error;
371	struct cmsghdr *cmsg;
372
373	ASSERT(MUTEX_HELD(&so->so_lock));
374
375	/*
376	 * System needs to pick local endpoint
377	 */
378	if (!(so->so_state & SS_ISBOUND)) {
379		bzero(&laddr, sizeof (laddr));
380		laddr.ss_family = so->so_family;
381
382		error = SOP_BIND(so, (struct sockaddr *)&laddr,
383		    sizeof (laddr), _SOBIND_LOCK_HELD, cr);
384		if (error) {
385			*ssap = NULL;
386			return (error);
387		}
388	}
389
390	/*
391	 * Create a new association, and call connect on that.
392	 */
393	for (;;) {
394		id = sosctp_aid_get(ss);
395		if (id != -1) {
396			break;
397		}
398		/*
399		 * Array not large enough; increase size.
400		 */
401		(void) sosctp_aid_grow(ss, ss->ss_maxassoc, KM_SLEEP);
402	}
403	++ss->ss_assoccnt;
404	sosctp_aid_reserve(ss, id, 1);
405
406	mutex_exit(&so->so_lock);
407
408	ssa = sosctp_assoc_create(ss, KM_SLEEP);
409	ssa->ssa_wroff = ss->ss_wroff;
410	ssa->ssa_wrsize = ss->ss_wrsize;
411	ssa->ssa_conn = sctp_create(ssa, (struct sctp_s *)so->so_proto_handle,
412	    so->so_family, SCTP_CAN_BLOCK, &sosctp_assoc_upcalls, &sbl, cr);
413
414	mutex_enter(&so->so_lock);
415	ss->ss_assocs[id].ssi_assoc = ssa;
416	ssa->ssa_id = id;
417	if (ssa->ssa_conn == NULL) {
418		ASSERT(ssa->ssa_refcnt == 1);
419		sosctp_assoc_free(ss, ssa);
420		*ssap = NULL;
421		return (ENOMEM);
422	}
423	ssa->ssa_state |= SS_ISBOUND;
424
425	sosctp_assoc_isconnecting(ssa);
426	SSA_REFHOLD(ssa);
427	mutex_exit(&so->so_lock);
428
429	/*
430	 * Can specify special init params
431	 */
432	cmsg = sosctp_find_cmsg(control, controllen, SCTP_INIT);
433	if (cmsg != NULL) {
434		error = sctp_set_opt(ssa->ssa_conn, IPPROTO_SCTP, SCTP_INITMSG,
435		    cmsg + 1, cmsg->cmsg_len - sizeof (*cmsg));
436		if (error != 0)
437			goto ret_err;
438	}
439
440	if ((error = sctp_connect(ssa->ssa_conn, name, namelen)) != 0)
441		goto ret_err;
442
443	mutex_enter(&so->so_lock);
444	/*
445	 * Allow other threads to access the socket
446	 */
447	error = sosctp_assoc_waitconnected(ssa, fflag);
448
449	switch (error) {
450	case 0:
451	case EINPROGRESS:
452	case EALREADY:
453	case EINTR:
454		/* Non-fatal errors */
455		break;
456	default:
457		/*
458		 * Fatal errors.  It means that sctp_assoc_disconnected()
459		 * must have been called.  So we only need to do a
460		 * SSA_REFRELE() here to release our hold done above.
461		 */
462		ASSERT(ssa->ssa_state & (SS_CANTSENDMORE | SS_CANTRCVMORE));
463		SSA_REFRELE(ss, ssa);
464		ssa = NULL;
465		break;
466	}
467
468	*ssap = ssa;
469	return (error);
470
471ret_err:
472	mutex_enter(&so->so_lock);
473	/*
474	 * There should not be any upcall done by SCTP.  So normally the
475	 * ssa_refcnt should be 2.  And we can call sosctp_assoc_free()
476	 * directly.  But since the ssa is inserted to the ss_soassocs
477	 * array above, some thread can actually put a hold on it.  In
478	 * this special case, we "manually" decrease the ssa_refcnt by 2.
479	 */
480	if (ssa->ssa_refcnt > 2)
481		ssa->ssa_refcnt -= 2;
482	else
483		sosctp_assoc_free(ss, ssa);
484	*ssap = NULL;
485	return (error);
486}
487
488/*
489 * Inherit socket properties
490 */
491void
492sosctp_so_inherit(struct sctp_sonode *lss, struct sctp_sonode *nss)
493{
494	struct sonode *nso = &nss->ss_so;
495	struct sonode *lso = &lss->ss_so;
496
497	nso->so_options = lso->so_options & (SO_DEBUG|SO_REUSEADDR|
498	    SO_KEEPALIVE|SO_DONTROUTE|SO_BROADCAST|SO_USELOOPBACK|
499	    SO_OOBINLINE|SO_DGRAM_ERRIND|SO_LINGER);
500	nso->so_sndbuf = lso->so_sndbuf;
501	nso->so_rcvbuf = lso->so_rcvbuf;
502	nso->so_pgrp = lso->so_pgrp;
503
504	nso->so_rcvlowat = lso->so_rcvlowat;
505	nso->so_sndlowat = lso->so_sndlowat;
506}
507
508/*
509 * Branching association to it's own socket. Inherit properties from
510 * the parent, and move data from RX queue to TX.
511 */
512void
513sosctp_assoc_move(struct sctp_sonode *ss, struct sctp_sonode *nss,
514    struct sctp_soassoc *ssa)
515{
516	mblk_t *mp, **nmp, *last_mp;
517	struct sctp_soassoc *tmp;
518
519	sosctp_so_inherit(ss, nss);
520
521	nss->ss_so.so_state |= (ss->ss_so.so_state & (SS_NDELAY|SS_NONBLOCK));
522	nss->ss_so.so_state |=
523	    (ssa->ssa_state & (SS_ISCONNECTED|SS_ISCONNECTING|
524	    SS_ISDISCONNECTING|SS_CANTSENDMORE|SS_CANTRCVMORE|SS_ISBOUND));
525	nss->ss_so.so_error = ssa->ssa_error;
526#if 0
527	nss->ss_so.so_txqueued = ssa->ssa_txqueued;
528#endif
529	nss->ss_so.so_snd_qfull = ssa->ssa_snd_qfull;
530	nss->ss_wroff = ssa->ssa_wroff;
531	nss->ss_wrsize = ssa->ssa_wrsize;
532	nss->ss_so.so_rcv_queued = ssa->ssa_rcv_queued;
533	nss->ss_so.so_proto_handle = (sock_lower_handle_t)ssa->ssa_conn;
534
535	if (nss->ss_so.so_rcv_queued > 0) {
536		nmp = &ss->ss_so.so_rcv_q_head;
537		last_mp = NULL;
538		while ((mp = *nmp) != NULL) {
539			tmp = *(struct sctp_soassoc **)DB_BASE(mp);
540			if (tmp == ssa) {
541				*nmp = mp->b_next;
542				ASSERT(DB_TYPE(mp) != M_DATA);
543				if (nss->ss_so.so_rcv_q_last_head == NULL) {
544					nss->ss_so.so_rcv_q_head = mp;
545				} else {
546					nss->ss_so.so_rcv_q_last_head->b_next =
547					    mp;
548				}
549				nss->ss_so.so_rcv_q_last_head = mp;
550				nss->ss_so.so_rcv_q_last_head->b_prev = last_mp;
551				mp->b_next = NULL;
552			} else {
553				nmp = &mp->b_next;
554				last_mp = mp;
555			}
556		}
557		ss->ss_so.so_rcv_q_last_head = last_mp;
558		ss->ss_so.so_rcv_q_last_head->b_prev = last_mp;
559	}
560}
561
562void
563sosctp_assoc_isconnecting(struct sctp_soassoc *ssa)
564{
565	struct sonode *so = &ssa->ssa_sonode->ss_so;
566
567	ASSERT(MUTEX_HELD(&so->so_lock));
568
569	ssa->ssa_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
570	ssa->ssa_state |= SS_ISCONNECTING;
571	cv_broadcast(&so->so_state_cv);
572}
573
574void
575sosctp_assoc_isconnected(struct sctp_soassoc *ssa)
576{
577	struct sonode *so = &ssa->ssa_sonode->ss_so;
578
579	ASSERT(MUTEX_HELD(&so->so_lock));
580
581	ssa->ssa_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
582	ssa->ssa_state |= SS_ISCONNECTED;
583	cv_broadcast(&so->so_state_cv);
584}
585
586void
587sosctp_assoc_isdisconnecting(struct sctp_soassoc *ssa)
588{
589	struct sonode *so = &ssa->ssa_sonode->ss_so;
590
591	ASSERT(MUTEX_HELD(&so->so_lock));
592
593	ssa->ssa_state &= ~SS_ISCONNECTING;
594	ssa->ssa_state |= SS_CANTSENDMORE;
595	cv_broadcast(&so->so_state_cv);
596}
597
598void
599sosctp_assoc_isdisconnected(struct sctp_soassoc *ssa, int error)
600{
601	struct sonode *so = &ssa->ssa_sonode->ss_so;
602
603	ASSERT(MUTEX_HELD(&so->so_lock));
604
605	ssa->ssa_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
606	ssa->ssa_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
607	if (error != 0)
608		ssa->ssa_error = (ushort_t)error;
609	cv_broadcast(&so->so_state_cv);
610}
611