uipc_usrreq.c revision 24131
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
34 *	$Id: uipc_usrreq.c,v 1.21 1997/03/21 16:12:32 wpaul Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/queue.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/domain.h>
42#include <sys/fcntl.h>
43#include <sys/file.h>
44#include <sys/filedesc.h>
45#include <sys/mbuf.h>
46#include <sys/namei.h>
47#include <sys/proc.h>
48#include <sys/protosw.h>
49#include <sys/socket.h>
50#include <sys/socketvar.h>
51#include <sys/stat.h>
52#include <sys/sysctl.h>
53#include <sys/un.h>
54#include <sys/unpcb.h>
55#include <sys/vnode.h>
56
57/*
58 * Unix communications domain.
59 *
60 * TODO:
61 *	SEQPACKET, RDM
62 *	rethink name space problems
63 *	need a proper out-of-band
64 */
65static struct	sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
66static ino_t	unp_ino;		/* prototype for fake inode numbers */
67
68static int     unp_attach __P((struct socket *));
69static void    unp_detach __P((struct unpcb *));
70static int     unp_bind __P((struct unpcb *,struct mbuf *, struct proc *));
71static int     unp_connect __P((struct socket *,struct mbuf *, struct proc *));
72static void    unp_disconnect __P((struct unpcb *));
73static void    unp_shutdown __P((struct unpcb *));
74static void    unp_drop __P((struct unpcb *, int));
75static void    unp_gc __P((void));
76static void    unp_scan __P((struct mbuf *, void (*)(struct file *)));
77static void    unp_mark __P((struct file *));
78static void    unp_discard __P((struct file *));
79static int     unp_internalize __P((struct mbuf *, struct proc *));
80
81
82/*ARGSUSED*/
83int
84uipc_usrreq(so, req, m, nam, control)
85	struct socket *so;
86	int req;
87	struct mbuf *m, *nam, *control;
88{
89	struct unpcb *unp = sotounpcb(so);
90	register struct socket *so2;
91	register int error = 0;
92	struct proc *p = curproc;	/* XXX */
93
94	if (req == PRU_CONTROL)
95		return (EOPNOTSUPP);
96	if (req != PRU_SEND && control && control->m_len) {
97		error = EOPNOTSUPP;
98		goto release;
99	}
100	if (unp == 0 && req != PRU_ATTACH) {
101		error = EINVAL;
102		goto release;
103	}
104	switch (req) {
105
106	case PRU_ATTACH:
107		if (unp) {
108			error = EISCONN;
109			break;
110		}
111		error = unp_attach(so);
112		break;
113
114	case PRU_DETACH:
115		unp_detach(unp);
116		break;
117
118	case PRU_BIND:
119		error = unp_bind(unp, nam, p);
120		break;
121
122	case PRU_LISTEN:
123		if (unp->unp_vnode == 0)
124			error = EINVAL;
125		break;
126
127	case PRU_CONNECT:
128		error = unp_connect(so, nam, p);
129		break;
130
131	case PRU_CONNECT2:
132		error = unp_connect2(so, (struct socket *)nam);
133		break;
134
135	case PRU_DISCONNECT:
136		unp_disconnect(unp);
137		break;
138
139	case PRU_ACCEPT:
140		/*
141		 * Pass back name of connected socket,
142		 * if it was bound and we are still connected
143		 * (our peer may have closed already!).
144		 */
145		if (unp->unp_conn && unp->unp_conn->unp_addr) {
146			nam->m_len = unp->unp_conn->unp_addr->m_len;
147			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
148			    mtod(nam, caddr_t), (unsigned)nam->m_len);
149		} else {
150			nam->m_len = sizeof(sun_noname);
151			*(mtod(nam, struct sockaddr *)) = sun_noname;
152		}
153		break;
154
155	case PRU_SHUTDOWN:
156		socantsendmore(so);
157		unp_shutdown(unp);
158		break;
159
160	case PRU_RCVD:
161		switch (so->so_type) {
162
163		case SOCK_DGRAM:
164			panic("uipc 1");
165			/*NOTREACHED*/
166
167		case SOCK_STREAM:
168#define	rcv (&so->so_rcv)
169#define snd (&so2->so_snd)
170			if (unp->unp_conn == 0)
171				break;
172			so2 = unp->unp_conn->unp_socket;
173			/*
174			 * Adjust backpressure on sender
175			 * and wakeup any waiting to write.
176			 */
177			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
178			unp->unp_mbcnt = rcv->sb_mbcnt;
179			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
180			unp->unp_cc = rcv->sb_cc;
181			sowwakeup(so2);
182#undef snd
183#undef rcv
184			break;
185
186		default:
187			panic("uipc 2");
188		}
189		break;
190
191	case PRU_SEND:
192	case PRU_SEND_EOF:
193		if (control && (error = unp_internalize(control, p)))
194			break;
195		switch (so->so_type) {
196
197		case SOCK_DGRAM: {
198			struct sockaddr *from;
199
200			if (nam) {
201				if (unp->unp_conn) {
202					error = EISCONN;
203					break;
204				}
205				error = unp_connect(so, nam, p);
206				if (error)
207					break;
208			} else {
209				if (unp->unp_conn == 0) {
210					error = ENOTCONN;
211					break;
212				}
213			}
214			so2 = unp->unp_conn->unp_socket;
215			if (unp->unp_addr)
216				from = mtod(unp->unp_addr, struct sockaddr *);
217			else
218				from = &sun_noname;
219			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
220				sorwakeup(so2);
221				m = 0;
222				control = 0;
223			} else
224				error = ENOBUFS;
225			if (nam)
226				unp_disconnect(unp);
227			break;
228		}
229
230		case SOCK_STREAM:
231#define	rcv (&so2->so_rcv)
232#define	snd (&so->so_snd)
233			/* Connect if not connected yet. */
234			/*
235			 * Note: A better implementation would complain
236			 * if not equal to the peer's address.
237			 */
238			if ((so->so_state & SS_ISCONNECTED) == 0) {
239				if (nam) {
240		    			error = unp_connect(so, nam, p);
241					if (error)
242						break;	/* XXX */
243				} else {
244					error = ENOTCONN;
245					break;
246				}
247			}
248
249			if (so->so_state & SS_CANTSENDMORE) {
250				error = EPIPE;
251				break;
252			}
253			if (unp->unp_conn == 0)
254				panic("uipc 3");
255			so2 = unp->unp_conn->unp_socket;
256			/*
257			 * Send to paired receive port, and then reduce
258			 * send buffer hiwater marks to maintain backpressure.
259			 * Wake up readers.
260			 */
261			if (control) {
262				if (sbappendcontrol(rcv, m, control))
263					control = 0;
264			} else
265				sbappend(rcv, m);
266			snd->sb_mbmax -=
267			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
268			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
269			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
270			unp->unp_conn->unp_cc = rcv->sb_cc;
271			sorwakeup(so2);
272			m = 0;
273#undef snd
274#undef rcv
275			break;
276
277		default:
278			panic("uipc 4");
279		}
280		/*
281		 * SEND_EOF is equivalent to a SEND followed by
282		 * a SHUTDOWN.
283		 */
284		if (req == PRU_SEND_EOF) {
285			socantsendmore(so);
286			unp_shutdown(unp);
287		}
288		break;
289
290	case PRU_ABORT:
291		unp_drop(unp, ECONNABORTED);
292		break;
293
294	case PRU_SENSE:
295		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
296		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
297			so2 = unp->unp_conn->unp_socket;
298			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
299		}
300		((struct stat *) m)->st_dev = NODEV;
301		if (unp->unp_ino == 0)
302			unp->unp_ino = unp_ino++;
303		((struct stat *) m)->st_ino = unp->unp_ino;
304		return (0);
305
306	case PRU_RCVOOB:
307		return (EOPNOTSUPP);
308
309	case PRU_SENDOOB:
310		error = EOPNOTSUPP;
311		break;
312
313	case PRU_SOCKADDR:
314		if (unp->unp_addr) {
315			nam->m_len = unp->unp_addr->m_len;
316			bcopy(mtod(unp->unp_addr, caddr_t),
317			    mtod(nam, caddr_t), (unsigned)nam->m_len);
318		} else
319			nam->m_len = 0;
320		break;
321
322	case PRU_PEERADDR:
323		if (unp->unp_conn && unp->unp_conn->unp_addr) {
324			nam->m_len = unp->unp_conn->unp_addr->m_len;
325			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
326			    mtod(nam, caddr_t), (unsigned)nam->m_len);
327		} else
328			nam->m_len = 0;
329		break;
330
331	case PRU_SLOWTIMO:
332		break;
333
334	default:
335		panic("piusrreq");
336	}
337release:
338	if (control)
339		m_freem(control);
340	if (m)
341		m_freem(m);
342	return (error);
343}
344
345/*
346 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
347 * for stream sockets, although the total for sender and receiver is
348 * actually only PIPSIZ.
349 * Datagram sockets really use the sendspace as the maximum datagram size,
350 * and don't really want to reserve the sendspace.  Their recvspace should
351 * be large enough for at least one max-size datagram plus address.
352 */
353#ifndef PIPSIZ
354#define	PIPSIZ	8192
355#endif
356static u_long	unpst_sendspace = PIPSIZ;
357static u_long	unpst_recvspace = PIPSIZ;
358static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
359static u_long	unpdg_recvspace = 4*1024;
360
361static int	unp_rights;			/* file descriptors in flight */
362
363SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
364	   &unpst_sendspace, 0, "");
365SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
366	   &unpst_recvspace, 0, "");
367SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
368	   &unpdg_sendspace, 0, "");
369SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
370	   &unpdg_recvspace, 0, "");
371SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
372
373static int
374unp_attach(so)
375	struct socket *so;
376{
377	register struct mbuf *m;
378	register struct unpcb *unp;
379	int error;
380
381	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
382		switch (so->so_type) {
383
384		case SOCK_STREAM:
385			error = soreserve(so, unpst_sendspace, unpst_recvspace);
386			break;
387
388		case SOCK_DGRAM:
389			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
390			break;
391
392		default:
393			panic("unp_attach");
394		}
395		if (error)
396			return (error);
397	}
398	m = m_getclr(M_DONTWAIT, MT_PCB);
399	if (m == NULL)
400		return (ENOBUFS);
401	unp = mtod(m, struct unpcb *);
402	so->so_pcb = (caddr_t)unp;
403	unp->unp_socket = so;
404	return (0);
405}
406
407static void
408unp_detach(unp)
409	register struct unpcb *unp;
410{
411
412	if (unp->unp_vnode) {
413		unp->unp_vnode->v_socket = 0;
414		vrele(unp->unp_vnode);
415		unp->unp_vnode = 0;
416	}
417	if (unp->unp_conn)
418		unp_disconnect(unp);
419	while (unp->unp_refs)
420		unp_drop(unp->unp_refs, ECONNRESET);
421	soisdisconnected(unp->unp_socket);
422	unp->unp_socket->so_pcb = 0;
423	if (unp_rights) {
424		/*
425		 * Normally the receive buffer is flushed later,
426		 * in sofree, but if our receive buffer holds references
427		 * to descriptors that are now garbage, we will dispose
428		 * of those descriptor references after the garbage collector
429		 * gets them (resulting in a "panic: closef: count < 0").
430		 */
431		sorflush(unp->unp_socket);
432		unp_gc();
433	}
434	m_freem(unp->unp_addr);
435	(void) m_free(dtom(unp));
436}
437
438static int
439unp_bind(unp, nam, p)
440	struct unpcb *unp;
441	struct mbuf *nam;
442	struct proc *p;
443{
444	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
445	register struct vnode *vp;
446	struct vattr vattr;
447	int error;
448	struct nameidata nd;
449
450	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
451	    soun->sun_path, p);
452	if (unp->unp_vnode != NULL)
453		return (EINVAL);
454	if (nam->m_len == MLEN) {
455		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
456			return (EINVAL);
457	} else
458		*(mtod(nam, caddr_t) + nam->m_len) = 0;
459/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
460	error = namei(&nd);
461	if (error)
462		return (error);
463	vp = nd.ni_vp;
464	if (vp != NULL) {
465		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
466		if (nd.ni_dvp == vp)
467			vrele(nd.ni_dvp);
468		else
469			vput(nd.ni_dvp);
470		vrele(vp);
471		return (EADDRINUSE);
472	}
473	VATTR_NULL(&vattr);
474	vattr.va_type = VSOCK;
475	vattr.va_mode = ACCESSPERMS;
476	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
477	if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
478		return (error);
479	vp = nd.ni_vp;
480	vp->v_socket = unp->unp_socket;
481	unp->unp_vnode = vp;
482	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
483	VOP_UNLOCK(vp, 0, p);
484	return (0);
485}
486
487static int
488unp_connect(so, nam, p)
489	struct socket *so;
490	struct mbuf *nam;
491	struct proc *p;
492{
493	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
494	register struct vnode *vp;
495	register struct socket *so2, *so3;
496	struct unpcb *unp2, *unp3;
497	int error;
498	struct nameidata nd;
499
500	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
501	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
502		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
503			return (EMSGSIZE);
504	} else
505		*(mtod(nam, caddr_t) + nam->m_len) = 0;
506	error = namei(&nd);
507	if (error)
508		return (error);
509	vp = nd.ni_vp;
510	if (vp->v_type != VSOCK) {
511		error = ENOTSOCK;
512		goto bad;
513	}
514	error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p);
515	if (error)
516		goto bad;
517	so2 = vp->v_socket;
518	if (so2 == 0) {
519		error = ECONNREFUSED;
520		goto bad;
521	}
522	if (so->so_type != so2->so_type) {
523		error = EPROTOTYPE;
524		goto bad;
525	}
526	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
527		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
528		    (so3 = sonewconn(so2, 0)) == 0) {
529			error = ECONNREFUSED;
530			goto bad;
531		}
532		unp2 = sotounpcb(so2);
533		unp3 = sotounpcb(so3);
534		if (unp2->unp_addr)
535			unp3->unp_addr =
536				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
537		so2 = so3;
538	}
539	error = unp_connect2(so, so2);
540bad:
541	vput(vp);
542	return (error);
543}
544
545int
546unp_connect2(so, so2)
547	register struct socket *so;
548	register struct socket *so2;
549{
550	register struct unpcb *unp = sotounpcb(so);
551	register struct unpcb *unp2;
552
553	if (so2->so_type != so->so_type)
554		return (EPROTOTYPE);
555	unp2 = sotounpcb(so2);
556	unp->unp_conn = unp2;
557	switch (so->so_type) {
558
559	case SOCK_DGRAM:
560		unp->unp_nextref = unp2->unp_refs;
561		unp2->unp_refs = unp;
562		soisconnected(so);
563		break;
564
565	case SOCK_STREAM:
566		unp2->unp_conn = unp;
567		soisconnected(so);
568		soisconnected(so2);
569		break;
570
571	default:
572		panic("unp_connect2");
573	}
574	return (0);
575}
576
577static void
578unp_disconnect(unp)
579	struct unpcb *unp;
580{
581	register struct unpcb *unp2 = unp->unp_conn;
582
583	if (unp2 == 0)
584		return;
585	unp->unp_conn = 0;
586	switch (unp->unp_socket->so_type) {
587
588	case SOCK_DGRAM:
589		if (unp2->unp_refs == unp)
590			unp2->unp_refs = unp->unp_nextref;
591		else {
592			unp2 = unp2->unp_refs;
593			for (;;) {
594				if (unp2 == 0)
595					panic("unp_disconnect");
596				if (unp2->unp_nextref == unp)
597					break;
598				unp2 = unp2->unp_nextref;
599			}
600			unp2->unp_nextref = unp->unp_nextref;
601		}
602		unp->unp_nextref = 0;
603		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
604		break;
605
606	case SOCK_STREAM:
607		soisdisconnected(unp->unp_socket);
608		unp2->unp_conn = 0;
609		soisdisconnected(unp2->unp_socket);
610		break;
611	}
612}
613
614#ifdef notdef
615void
616unp_abort(unp)
617	struct unpcb *unp;
618{
619
620	unp_detach(unp);
621}
622#endif
623
624static void
625unp_shutdown(unp)
626	struct unpcb *unp;
627{
628	struct socket *so;
629
630	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
631	    (so = unp->unp_conn->unp_socket))
632		socantrcvmore(so);
633}
634
635static void
636unp_drop(unp, errno)
637	struct unpcb *unp;
638	int errno;
639{
640	struct socket *so = unp->unp_socket;
641
642	so->so_error = errno;
643	unp_disconnect(unp);
644	if (so->so_head) {
645		so->so_pcb = (caddr_t) 0;
646		m_freem(unp->unp_addr);
647		(void) m_free(dtom(unp));
648		sofree(so);
649	}
650}
651
652#ifdef notdef
653void
654unp_drain()
655{
656
657}
658#endif
659
660int
661unp_externalize(rights)
662	struct mbuf *rights;
663{
664	struct proc *p = curproc;		/* XXX */
665	register int i;
666	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
667	register struct file **rp = (struct file **)(cm + 1);
668	register struct file *fp;
669	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
670	int f;
671
672	/*
673	 * if the new FD's will not fit, then we free them all
674	 */
675	if (!fdavail(p, newfds)) {
676		for (i = 0; i < newfds; i++) {
677			fp = *rp;
678			unp_discard(fp);
679			*rp++ = 0;
680		}
681		return (EMSGSIZE);
682	}
683	/*
684	 * now change each pointer to an fd in the global table to
685	 * an integer that is the index to the local fd table entry
686	 * that we set up to point to the global one we are transferring.
687	 * XXX this assumes a pointer and int are the same size...!
688	 */
689	for (i = 0; i < newfds; i++) {
690		if (fdalloc(p, 0, &f))
691			panic("unp_externalize");
692		fp = *rp;
693		p->p_fd->fd_ofiles[f] = fp;
694		fp->f_msgcount--;
695		unp_rights--;
696		*(int *)rp++ = f;
697	}
698	return (0);
699}
700
701#ifndef MIN
702#define	MIN(a,b) (((a)<(b))?(a):(b))
703#endif
704
705static int
706unp_internalize(control, p)
707	struct mbuf *control;
708	struct proc *p;
709{
710	struct filedesc *fdp = p->p_fd;
711	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
712	register struct file **rp;
713	register struct file *fp;
714	register int i, fd;
715	register struct cmsgcred *cmcred;
716	int oldfds;
717
718	if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) ||
719	    cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len)
720		return (EINVAL);
721
722	/*
723	 * Fill in credential information.
724	 */
725	if (cm->cmsg_type == SCM_CREDS) {
726		cmcred = (struct cmsgcred *)(cm + 1);
727		cmcred->cmcred_pid = p->p_pid;
728		cmcred->cmcred_uid = p->p_cred->p_ruid;
729		cmcred->cmcred_gid = p->p_cred->p_rgid;
730		cmcred->cmcred_euid = p->p_ucred->cr_uid;
731		cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups,
732							CMGROUP_MAX);
733		for (i = 0; i < cmcred->cmcred_ngroups; i++)
734			cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i];
735		return(0);
736	}
737
738	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
739	/*
740	 * check that all the FDs passed in refer to legal OPEN files
741	 * If not, reject the entire operation.
742	 */
743	rp = (struct file **)(cm + 1);
744	for (i = 0; i < oldfds; i++) {
745		fd = *(int *)rp++;
746		if ((unsigned)fd >= fdp->fd_nfiles ||
747		    fdp->fd_ofiles[fd] == NULL)
748			return (EBADF);
749	}
750	/*
751	 * Now replace the integer FDs with pointers to
752	 * the associated global file table entry..
753	 * XXX this assumes a pointer and an int are the same size!
754	 */
755	rp = (struct file **)(cm + 1);
756	for (i = 0; i < oldfds; i++) {
757		fp = fdp->fd_ofiles[*(int *)rp];
758		*rp++ = fp;
759		fp->f_count++;
760		fp->f_msgcount++;
761		unp_rights++;
762	}
763	return (0);
764}
765
766static int	unp_defer, unp_gcing;
767
768static void
769unp_gc()
770{
771	register struct file *fp, *nextfp;
772	register struct socket *so;
773	struct file **extra_ref, **fpp;
774	int nunref, i;
775
776	if (unp_gcing)
777		return;
778	unp_gcing = 1;
779	unp_defer = 0;
780	/*
781	 * before going through all this, set all FDs to
782	 * be NOT defered and NOT externally accessible
783	 */
784	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
785		fp->f_flag &= ~(FMARK|FDEFER);
786	do {
787		for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
788			/*
789			 * If the file is not open, skip it
790			 */
791			if (fp->f_count == 0)
792				continue;
793			/*
794			 * If we already marked it as 'defer'  in a
795			 * previous pass, then try process it this time
796			 * and un-mark it
797			 */
798			if (fp->f_flag & FDEFER) {
799				fp->f_flag &= ~FDEFER;
800				unp_defer--;
801			} else {
802				/*
803				 * if it's not defered, then check if it's
804				 * already marked.. if so skip it
805				 */
806				if (fp->f_flag & FMARK)
807					continue;
808				/*
809				 * If all references are from messages
810				 * in transit, then skip it. it's not
811				 * externally accessible.
812				 */
813				if (fp->f_count == fp->f_msgcount)
814					continue;
815				/*
816				 * If it got this far then it must be
817				 * externally accessible.
818				 */
819				fp->f_flag |= FMARK;
820			}
821			/*
822			 * either it was defered, or it is externally
823			 * accessible and not already marked so.
824			 * Now check if it is possibly one of OUR sockets.
825			 */
826			if (fp->f_type != DTYPE_SOCKET ||
827			    (so = (struct socket *)fp->f_data) == 0)
828				continue;
829			if (so->so_proto->pr_domain != &localdomain ||
830			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
831				continue;
832#ifdef notdef
833			if (so->so_rcv.sb_flags & SB_LOCK) {
834				/*
835				 * This is problematical; it's not clear
836				 * we need to wait for the sockbuf to be
837				 * unlocked (on a uniprocessor, at least),
838				 * and it's also not clear what to do
839				 * if sbwait returns an error due to receipt
840				 * of a signal.  If sbwait does return
841				 * an error, we'll go into an infinite
842				 * loop.  Delete all of this for now.
843				 */
844				(void) sbwait(&so->so_rcv);
845				goto restart;
846			}
847#endif
848			/*
849			 * So, Ok, it's one of our sockets and it IS externally
850			 * accessible (or was defered). Now we look
851			 * to see if we hold any file descriptors in it's
852			 * message buffers. Follow those links and mark them
853			 * as accessible too.
854			 */
855			unp_scan(so->so_rcv.sb_mb, unp_mark);
856		}
857	} while (unp_defer);
858	/*
859	 * We grab an extra reference to each of the file table entries
860	 * that are not otherwise accessible and then free the rights
861	 * that are stored in messages on them.
862	 *
863	 * The bug in the orginal code is a little tricky, so I'll describe
864	 * what's wrong with it here.
865	 *
866	 * It is incorrect to simply unp_discard each entry for f_msgcount
867	 * times -- consider the case of sockets A and B that contain
868	 * references to each other.  On a last close of some other socket,
869	 * we trigger a gc since the number of outstanding rights (unp_rights)
870	 * is non-zero.  If during the sweep phase the gc code un_discards,
871	 * we end up doing a (full) closef on the descriptor.  A closef on A
872	 * results in the following chain.  Closef calls soo_close, which
873	 * calls soclose.   Soclose calls first (through the switch
874	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
875	 * returns because the previous instance had set unp_gcing, and
876	 * we return all the way back to soclose, which marks the socket
877	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
878	 * to free up the rights that are queued in messages on the socket A,
879	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
880	 * switch unp_dispose, which unp_scans with unp_discard.  This second
881	 * instance of unp_discard just calls closef on B.
882	 *
883	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
884	 * which results in another closef on A.  Unfortunately, A is already
885	 * being closed, and the descriptor has already been marked with
886	 * SS_NOFDREF, and soclose panics at this point.
887	 *
888	 * Here, we first take an extra reference to each inaccessible
889	 * descriptor.  Then, we call sorflush ourself, since we know
890	 * it is a Unix domain socket anyhow.  After we destroy all the
891	 * rights carried in messages, we do a last closef to get rid
892	 * of our extra reference.  This is the last close, and the
893	 * unp_detach etc will shut down the socket.
894	 *
895	 * 91/09/19, bsy@cs.cmu.edu
896	 */
897	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
898	for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
899	    fp = nextfp) {
900		nextfp = fp->f_list.le_next;
901		/*
902		 * If it's not open, skip it
903		 */
904		if (fp->f_count == 0)
905			continue;
906		/*
907		 * If all refs are from msgs, and it's not marked accessible
908		 * then it must be referenced from some unreachable cycle
909		 * of (shut-down) FDs, so include it in our
910		 * list of FDs to remove
911		 */
912		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
913			*fpp++ = fp;
914			nunref++;
915			fp->f_count++;
916		}
917	}
918	/*
919	 * for each FD on our hit list, do the following two things
920	 */
921	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
922		sorflush((struct socket *)(*fpp)->f_data);
923	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
924		closef(*fpp, (struct proc *) NULL);
925	free((caddr_t)extra_ref, M_FILE);
926	unp_gcing = 0;
927}
928
929void
930unp_dispose(m)
931	struct mbuf *m;
932{
933
934	if (m)
935		unp_scan(m, unp_discard);
936}
937
938static void
939unp_scan(m0, op)
940	register struct mbuf *m0;
941	void (*op) __P((struct file *));
942{
943	register struct mbuf *m;
944	register struct file **rp;
945	register struct cmsghdr *cm;
946	register int i;
947	int qfds;
948
949	while (m0) {
950		for (m = m0; m; m = m->m_next)
951			if (m->m_type == MT_CONTROL &&
952			    m->m_len >= sizeof(*cm)) {
953				cm = mtod(m, struct cmsghdr *);
954				if (cm->cmsg_level != SOL_SOCKET ||
955				    cm->cmsg_type != SCM_RIGHTS)
956					continue;
957				qfds = (cm->cmsg_len - sizeof *cm)
958						/ sizeof (struct file *);
959				rp = (struct file **)(cm + 1);
960				for (i = 0; i < qfds; i++)
961					(*op)(*rp++);
962				break;		/* XXX, but saves time */
963			}
964		m0 = m0->m_act;
965	}
966}
967
968static void
969unp_mark(fp)
970	struct file *fp;
971{
972
973	if (fp->f_flag & FMARK)
974		return;
975	unp_defer++;
976	fp->f_flag |= (FMARK|FDEFER);
977}
978
979static void
980unp_discard(fp)
981	struct file *fp;
982{
983
984	fp->f_msgcount--;
985	unp_rights--;
986	(void) closef(fp, (struct proc *)NULL);
987}
988