uipc_usrreq.c revision 24083
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
34 *	$Id: uipc_usrreq.c,v 1.20 1997/02/24 20:30:58 wollman Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/queue.h>
39#include <sys/systm.h>
40#include <sys/kernel.h>
41#include <sys/domain.h>
42#include <sys/file.h>
43#include <sys/filedesc.h>
44#include <sys/mbuf.h>
45#include <sys/namei.h>
46#include <sys/proc.h>
47#include <sys/protosw.h>
48#include <sys/socket.h>
49#include <sys/socketvar.h>
50#include <sys/stat.h>
51#include <sys/sysctl.h>
52#include <sys/un.h>
53#include <sys/unpcb.h>
54#include <sys/vnode.h>
55
56/*
57 * Unix communications domain.
58 *
59 * TODO:
60 *	SEQPACKET, RDM
61 *	rethink name space problems
62 *	need a proper out-of-band
63 */
64static struct	sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
65static ino_t	unp_ino;		/* prototype for fake inode numbers */
66
67static int     unp_attach __P((struct socket *));
68static void    unp_detach __P((struct unpcb *));
69static int     unp_bind __P((struct unpcb *,struct mbuf *, struct proc *));
70static int     unp_connect __P((struct socket *,struct mbuf *, struct proc *));
71static void    unp_disconnect __P((struct unpcb *));
72static void    unp_shutdown __P((struct unpcb *));
73static void    unp_drop __P((struct unpcb *, int));
74static void    unp_gc __P((void));
75static void    unp_scan __P((struct mbuf *, void (*)(struct file *)));
76static void    unp_mark __P((struct file *));
77static void    unp_discard __P((struct file *));
78static int     unp_internalize __P((struct mbuf *, struct proc *));
79
80
81/*ARGSUSED*/
82int
83uipc_usrreq(so, req, m, nam, control)
84	struct socket *so;
85	int req;
86	struct mbuf *m, *nam, *control;
87{
88	struct unpcb *unp = sotounpcb(so);
89	register struct socket *so2;
90	register int error = 0;
91	struct proc *p = curproc;	/* XXX */
92
93	if (req == PRU_CONTROL)
94		return (EOPNOTSUPP);
95	if (req != PRU_SEND && control && control->m_len) {
96		error = EOPNOTSUPP;
97		goto release;
98	}
99	if (unp == 0 && req != PRU_ATTACH) {
100		error = EINVAL;
101		goto release;
102	}
103	switch (req) {
104
105	case PRU_ATTACH:
106		if (unp) {
107			error = EISCONN;
108			break;
109		}
110		error = unp_attach(so);
111		break;
112
113	case PRU_DETACH:
114		unp_detach(unp);
115		break;
116
117	case PRU_BIND:
118		error = unp_bind(unp, nam, p);
119		break;
120
121	case PRU_LISTEN:
122		if (unp->unp_vnode == 0)
123			error = EINVAL;
124		break;
125
126	case PRU_CONNECT:
127		error = unp_connect(so, nam, p);
128		break;
129
130	case PRU_CONNECT2:
131		error = unp_connect2(so, (struct socket *)nam);
132		break;
133
134	case PRU_DISCONNECT:
135		unp_disconnect(unp);
136		break;
137
138	case PRU_ACCEPT:
139		/*
140		 * Pass back name of connected socket,
141		 * if it was bound and we are still connected
142		 * (our peer may have closed already!).
143		 */
144		if (unp->unp_conn && unp->unp_conn->unp_addr) {
145			nam->m_len = unp->unp_conn->unp_addr->m_len;
146			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
147			    mtod(nam, caddr_t), (unsigned)nam->m_len);
148		} else {
149			nam->m_len = sizeof(sun_noname);
150			*(mtod(nam, struct sockaddr *)) = sun_noname;
151		}
152		break;
153
154	case PRU_SHUTDOWN:
155		socantsendmore(so);
156		unp_shutdown(unp);
157		break;
158
159	case PRU_RCVD:
160		switch (so->so_type) {
161
162		case SOCK_DGRAM:
163			panic("uipc 1");
164			/*NOTREACHED*/
165
166		case SOCK_STREAM:
167#define	rcv (&so->so_rcv)
168#define snd (&so2->so_snd)
169			if (unp->unp_conn == 0)
170				break;
171			so2 = unp->unp_conn->unp_socket;
172			/*
173			 * Adjust backpressure on sender
174			 * and wakeup any waiting to write.
175			 */
176			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
177			unp->unp_mbcnt = rcv->sb_mbcnt;
178			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
179			unp->unp_cc = rcv->sb_cc;
180			sowwakeup(so2);
181#undef snd
182#undef rcv
183			break;
184
185		default:
186			panic("uipc 2");
187		}
188		break;
189
190	case PRU_SEND:
191	case PRU_SEND_EOF:
192		if (control && (error = unp_internalize(control, p)))
193			break;
194		switch (so->so_type) {
195
196		case SOCK_DGRAM: {
197			struct sockaddr *from;
198
199			if (nam) {
200				if (unp->unp_conn) {
201					error = EISCONN;
202					break;
203				}
204				error = unp_connect(so, nam, p);
205				if (error)
206					break;
207			} else {
208				if (unp->unp_conn == 0) {
209					error = ENOTCONN;
210					break;
211				}
212			}
213			so2 = unp->unp_conn->unp_socket;
214			if (unp->unp_addr)
215				from = mtod(unp->unp_addr, struct sockaddr *);
216			else
217				from = &sun_noname;
218			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
219				sorwakeup(so2);
220				m = 0;
221				control = 0;
222			} else
223				error = ENOBUFS;
224			if (nam)
225				unp_disconnect(unp);
226			break;
227		}
228
229		case SOCK_STREAM:
230#define	rcv (&so2->so_rcv)
231#define	snd (&so->so_snd)
232			/* Connect if not connected yet. */
233			/*
234			 * Note: A better implementation would complain
235			 * if not equal to the peer's address.
236			 */
237			if ((so->so_state & SS_ISCONNECTED) == 0) {
238				if (nam) {
239		    			error = unp_connect(so, nam, p);
240					if (error)
241						break;	/* XXX */
242				} else {
243					error = ENOTCONN;
244					break;
245				}
246			}
247
248			if (so->so_state & SS_CANTSENDMORE) {
249				error = EPIPE;
250				break;
251			}
252			if (unp->unp_conn == 0)
253				panic("uipc 3");
254			so2 = unp->unp_conn->unp_socket;
255			/*
256			 * Send to paired receive port, and then reduce
257			 * send buffer hiwater marks to maintain backpressure.
258			 * Wake up readers.
259			 */
260			if (control) {
261				if (sbappendcontrol(rcv, m, control))
262					control = 0;
263			} else
264				sbappend(rcv, m);
265			snd->sb_mbmax -=
266			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
267			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
268			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
269			unp->unp_conn->unp_cc = rcv->sb_cc;
270			sorwakeup(so2);
271			m = 0;
272#undef snd
273#undef rcv
274			break;
275
276		default:
277			panic("uipc 4");
278		}
279		/*
280		 * SEND_EOF is equivalent to a SEND followed by
281		 * a SHUTDOWN.
282		 */
283		if (req == PRU_SEND_EOF) {
284			socantsendmore(so);
285			unp_shutdown(unp);
286		}
287		break;
288
289	case PRU_ABORT:
290		unp_drop(unp, ECONNABORTED);
291		break;
292
293	case PRU_SENSE:
294		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
295		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
296			so2 = unp->unp_conn->unp_socket;
297			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
298		}
299		((struct stat *) m)->st_dev = NODEV;
300		if (unp->unp_ino == 0)
301			unp->unp_ino = unp_ino++;
302		((struct stat *) m)->st_ino = unp->unp_ino;
303		return (0);
304
305	case PRU_RCVOOB:
306		return (EOPNOTSUPP);
307
308	case PRU_SENDOOB:
309		error = EOPNOTSUPP;
310		break;
311
312	case PRU_SOCKADDR:
313		if (unp->unp_addr) {
314			nam->m_len = unp->unp_addr->m_len;
315			bcopy(mtod(unp->unp_addr, caddr_t),
316			    mtod(nam, caddr_t), (unsigned)nam->m_len);
317		} else
318			nam->m_len = 0;
319		break;
320
321	case PRU_PEERADDR:
322		if (unp->unp_conn && unp->unp_conn->unp_addr) {
323			nam->m_len = unp->unp_conn->unp_addr->m_len;
324			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
325			    mtod(nam, caddr_t), (unsigned)nam->m_len);
326		} else
327			nam->m_len = 0;
328		break;
329
330	case PRU_SLOWTIMO:
331		break;
332
333	default:
334		panic("piusrreq");
335	}
336release:
337	if (control)
338		m_freem(control);
339	if (m)
340		m_freem(m);
341	return (error);
342}
343
344/*
345 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
346 * for stream sockets, although the total for sender and receiver is
347 * actually only PIPSIZ.
348 * Datagram sockets really use the sendspace as the maximum datagram size,
349 * and don't really want to reserve the sendspace.  Their recvspace should
350 * be large enough for at least one max-size datagram plus address.
351 */
352#ifndef PIPSIZ
353#define	PIPSIZ	8192
354#endif
355static u_long	unpst_sendspace = PIPSIZ;
356static u_long	unpst_recvspace = PIPSIZ;
357static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
358static u_long	unpdg_recvspace = 4*1024;
359
360static int	unp_rights;			/* file descriptors in flight */
361
362SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
363	   &unpst_sendspace, 0, "");
364SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
365	   &unpst_recvspace, 0, "");
366SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
367	   &unpdg_sendspace, 0, "");
368SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
369	   &unpdg_recvspace, 0, "");
370SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
371
372static int
373unp_attach(so)
374	struct socket *so;
375{
376	register struct mbuf *m;
377	register struct unpcb *unp;
378	int error;
379
380	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
381		switch (so->so_type) {
382
383		case SOCK_STREAM:
384			error = soreserve(so, unpst_sendspace, unpst_recvspace);
385			break;
386
387		case SOCK_DGRAM:
388			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
389			break;
390
391		default:
392			panic("unp_attach");
393		}
394		if (error)
395			return (error);
396	}
397	m = m_getclr(M_DONTWAIT, MT_PCB);
398	if (m == NULL)
399		return (ENOBUFS);
400	unp = mtod(m, struct unpcb *);
401	so->so_pcb = (caddr_t)unp;
402	unp->unp_socket = so;
403	return (0);
404}
405
406static void
407unp_detach(unp)
408	register struct unpcb *unp;
409{
410
411	if (unp->unp_vnode) {
412		unp->unp_vnode->v_socket = 0;
413		vrele(unp->unp_vnode);
414		unp->unp_vnode = 0;
415	}
416	if (unp->unp_conn)
417		unp_disconnect(unp);
418	while (unp->unp_refs)
419		unp_drop(unp->unp_refs, ECONNRESET);
420	soisdisconnected(unp->unp_socket);
421	unp->unp_socket->so_pcb = 0;
422	if (unp_rights) {
423		/*
424		 * Normally the receive buffer is flushed later,
425		 * in sofree, but if our receive buffer holds references
426		 * to descriptors that are now garbage, we will dispose
427		 * of those descriptor references after the garbage collector
428		 * gets them (resulting in a "panic: closef: count < 0").
429		 */
430		sorflush(unp->unp_socket);
431		unp_gc();
432	}
433	m_freem(unp->unp_addr);
434	(void) m_free(dtom(unp));
435}
436
437static int
438unp_bind(unp, nam, p)
439	struct unpcb *unp;
440	struct mbuf *nam;
441	struct proc *p;
442{
443	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
444	register struct vnode *vp;
445	struct vattr vattr;
446	int error;
447	struct nameidata nd;
448
449	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
450	    soun->sun_path, p);
451	if (unp->unp_vnode != NULL)
452		return (EINVAL);
453	if (nam->m_len == MLEN) {
454		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
455			return (EINVAL);
456	} else
457		*(mtod(nam, caddr_t) + nam->m_len) = 0;
458/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
459	error = namei(&nd);
460	if (error)
461		return (error);
462	vp = nd.ni_vp;
463	if (vp != NULL) {
464		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
465		if (nd.ni_dvp == vp)
466			vrele(nd.ni_dvp);
467		else
468			vput(nd.ni_dvp);
469		vrele(vp);
470		return (EADDRINUSE);
471	}
472	VATTR_NULL(&vattr);
473	vattr.va_type = VSOCK;
474	vattr.va_mode = ACCESSPERMS;
475	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
476	if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
477		return (error);
478	vp = nd.ni_vp;
479	vp->v_socket = unp->unp_socket;
480	unp->unp_vnode = vp;
481	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
482	VOP_UNLOCK(vp, 0, p);
483	return (0);
484}
485
486static int
487unp_connect(so, nam, p)
488	struct socket *so;
489	struct mbuf *nam;
490	struct proc *p;
491{
492	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
493	register struct vnode *vp;
494	register struct socket *so2, *so3;
495	struct unpcb *unp2, *unp3;
496	int error;
497	struct nameidata nd;
498
499	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
500	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
501		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
502			return (EMSGSIZE);
503	} else
504		*(mtod(nam, caddr_t) + nam->m_len) = 0;
505	error = namei(&nd);
506	if (error)
507		return (error);
508	vp = nd.ni_vp;
509	if (vp->v_type != VSOCK) {
510		error = ENOTSOCK;
511		goto bad;
512	}
513	error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p);
514	if (error)
515		goto bad;
516	so2 = vp->v_socket;
517	if (so2 == 0) {
518		error = ECONNREFUSED;
519		goto bad;
520	}
521	if (so->so_type != so2->so_type) {
522		error = EPROTOTYPE;
523		goto bad;
524	}
525	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
526		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
527		    (so3 = sonewconn(so2, 0)) == 0) {
528			error = ECONNREFUSED;
529			goto bad;
530		}
531		unp2 = sotounpcb(so2);
532		unp3 = sotounpcb(so3);
533		if (unp2->unp_addr)
534			unp3->unp_addr =
535				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
536		so2 = so3;
537	}
538	error = unp_connect2(so, so2);
539bad:
540	vput(vp);
541	return (error);
542}
543
544int
545unp_connect2(so, so2)
546	register struct socket *so;
547	register struct socket *so2;
548{
549	register struct unpcb *unp = sotounpcb(so);
550	register struct unpcb *unp2;
551
552	if (so2->so_type != so->so_type)
553		return (EPROTOTYPE);
554	unp2 = sotounpcb(so2);
555	unp->unp_conn = unp2;
556	switch (so->so_type) {
557
558	case SOCK_DGRAM:
559		unp->unp_nextref = unp2->unp_refs;
560		unp2->unp_refs = unp;
561		soisconnected(so);
562		break;
563
564	case SOCK_STREAM:
565		unp2->unp_conn = unp;
566		soisconnected(so);
567		soisconnected(so2);
568		break;
569
570	default:
571		panic("unp_connect2");
572	}
573	return (0);
574}
575
576static void
577unp_disconnect(unp)
578	struct unpcb *unp;
579{
580	register struct unpcb *unp2 = unp->unp_conn;
581
582	if (unp2 == 0)
583		return;
584	unp->unp_conn = 0;
585	switch (unp->unp_socket->so_type) {
586
587	case SOCK_DGRAM:
588		if (unp2->unp_refs == unp)
589			unp2->unp_refs = unp->unp_nextref;
590		else {
591			unp2 = unp2->unp_refs;
592			for (;;) {
593				if (unp2 == 0)
594					panic("unp_disconnect");
595				if (unp2->unp_nextref == unp)
596					break;
597				unp2 = unp2->unp_nextref;
598			}
599			unp2->unp_nextref = unp->unp_nextref;
600		}
601		unp->unp_nextref = 0;
602		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
603		break;
604
605	case SOCK_STREAM:
606		soisdisconnected(unp->unp_socket);
607		unp2->unp_conn = 0;
608		soisdisconnected(unp2->unp_socket);
609		break;
610	}
611}
612
613#ifdef notdef
614void
615unp_abort(unp)
616	struct unpcb *unp;
617{
618
619	unp_detach(unp);
620}
621#endif
622
623static void
624unp_shutdown(unp)
625	struct unpcb *unp;
626{
627	struct socket *so;
628
629	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
630	    (so = unp->unp_conn->unp_socket))
631		socantrcvmore(so);
632}
633
634static void
635unp_drop(unp, errno)
636	struct unpcb *unp;
637	int errno;
638{
639	struct socket *so = unp->unp_socket;
640
641	so->so_error = errno;
642	unp_disconnect(unp);
643	if (so->so_head) {
644		so->so_pcb = (caddr_t) 0;
645		m_freem(unp->unp_addr);
646		(void) m_free(dtom(unp));
647		sofree(so);
648	}
649}
650
651#ifdef notdef
652void
653unp_drain()
654{
655
656}
657#endif
658
659int
660unp_externalize(rights)
661	struct mbuf *rights;
662{
663	struct proc *p = curproc;		/* XXX */
664	register int i;
665	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
666	register struct file **rp = (struct file **)(cm + 1);
667	register struct file *fp;
668	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
669	int f;
670
671	/*
672	 * if the new FD's will not fit, then we free them all
673	 */
674	if (!fdavail(p, newfds)) {
675		for (i = 0; i < newfds; i++) {
676			fp = *rp;
677			unp_discard(fp);
678			*rp++ = 0;
679		}
680		return (EMSGSIZE);
681	}
682	/*
683	 * now change each pointer to an fd in the global table to
684	 * an integer that is the index to the local fd table entry
685	 * that we set up to point to the global one we are transferring.
686	 * XXX this assumes a pointer and int are the same size...!
687	 */
688	for (i = 0; i < newfds; i++) {
689		if (fdalloc(p, 0, &f))
690			panic("unp_externalize");
691		fp = *rp;
692		p->p_fd->fd_ofiles[f] = fp;
693		fp->f_msgcount--;
694		unp_rights--;
695		*(int *)rp++ = f;
696	}
697	return (0);
698}
699
700#ifndef MIN
701#define	MIN(a,b) (((a)<(b))?(a):(b))
702#endif
703
704static int
705unp_internalize(control, p)
706	struct mbuf *control;
707	struct proc *p;
708{
709	struct filedesc *fdp = p->p_fd;
710	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
711	register struct file **rp;
712	register struct file *fp;
713	register int i, fd;
714	register struct cmsgcred *cmcred;
715	int oldfds;
716
717	if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) ||
718	    cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len)
719		return (EINVAL);
720
721	/*
722	 * Fill in credential information.
723	 */
724	if (cm->cmsg_type == SCM_CREDS) {
725		cmcred = (struct cmsgcred *)(cm + 1);
726		cmcred->cmcred_pid = p->p_pid;
727		cmcred->cmcred_uid = p->p_cred->p_ruid;
728		cmcred->cmcred_gid = p->p_cred->p_rgid;
729		cmcred->cmcred_euid = p->p_ucred->cr_uid;
730		cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups,
731							CMGROUP_MAX);
732		for (i = 0; i < cmcred->cmcred_ngroups; i++)
733			cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i];
734		return(0);
735	}
736
737	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
738	/*
739	 * check that all the FDs passed in refer to legal OPEN files
740	 * If not, reject the entire operation.
741	 */
742	rp = (struct file **)(cm + 1);
743	for (i = 0; i < oldfds; i++) {
744		fd = *(int *)rp++;
745		if ((unsigned)fd >= fdp->fd_nfiles ||
746		    fdp->fd_ofiles[fd] == NULL)
747			return (EBADF);
748	}
749	/*
750	 * Now replace the integer FDs with pointers to
751	 * the associated global file table entry..
752	 * XXX this assumes a pointer and an int are the same size!
753	 */
754	rp = (struct file **)(cm + 1);
755	for (i = 0; i < oldfds; i++) {
756		fp = fdp->fd_ofiles[*(int *)rp];
757		*rp++ = fp;
758		fp->f_count++;
759		fp->f_msgcount++;
760		unp_rights++;
761	}
762	return (0);
763}
764
765static int	unp_defer, unp_gcing;
766
767static void
768unp_gc()
769{
770	register struct file *fp, *nextfp;
771	register struct socket *so;
772	struct file **extra_ref, **fpp;
773	int nunref, i;
774
775	if (unp_gcing)
776		return;
777	unp_gcing = 1;
778	unp_defer = 0;
779	/*
780	 * before going through all this, set all FDs to
781	 * be NOT defered and NOT externally accessible
782	 */
783	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
784		fp->f_flag &= ~(FMARK|FDEFER);
785	do {
786		for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
787			/*
788			 * If the file is not open, skip it
789			 */
790			if (fp->f_count == 0)
791				continue;
792			/*
793			 * If we already marked it as 'defer'  in a
794			 * previous pass, then try process it this time
795			 * and un-mark it
796			 */
797			if (fp->f_flag & FDEFER) {
798				fp->f_flag &= ~FDEFER;
799				unp_defer--;
800			} else {
801				/*
802				 * if it's not defered, then check if it's
803				 * already marked.. if so skip it
804				 */
805				if (fp->f_flag & FMARK)
806					continue;
807				/*
808				 * If all references are from messages
809				 * in transit, then skip it. it's not
810				 * externally accessible.
811				 */
812				if (fp->f_count == fp->f_msgcount)
813					continue;
814				/*
815				 * If it got this far then it must be
816				 * externally accessible.
817				 */
818				fp->f_flag |= FMARK;
819			}
820			/*
821			 * either it was defered, or it is externally
822			 * accessible and not already marked so.
823			 * Now check if it is possibly one of OUR sockets.
824			 */
825			if (fp->f_type != DTYPE_SOCKET ||
826			    (so = (struct socket *)fp->f_data) == 0)
827				continue;
828			if (so->so_proto->pr_domain != &localdomain ||
829			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
830				continue;
831#ifdef notdef
832			if (so->so_rcv.sb_flags & SB_LOCK) {
833				/*
834				 * This is problematical; it's not clear
835				 * we need to wait for the sockbuf to be
836				 * unlocked (on a uniprocessor, at least),
837				 * and it's also not clear what to do
838				 * if sbwait returns an error due to receipt
839				 * of a signal.  If sbwait does return
840				 * an error, we'll go into an infinite
841				 * loop.  Delete all of this for now.
842				 */
843				(void) sbwait(&so->so_rcv);
844				goto restart;
845			}
846#endif
847			/*
848			 * So, Ok, it's one of our sockets and it IS externally
849			 * accessible (or was defered). Now we look
850			 * to see if we hold any file descriptors in it's
851			 * message buffers. Follow those links and mark them
852			 * as accessible too.
853			 */
854			unp_scan(so->so_rcv.sb_mb, unp_mark);
855		}
856	} while (unp_defer);
857	/*
858	 * We grab an extra reference to each of the file table entries
859	 * that are not otherwise accessible and then free the rights
860	 * that are stored in messages on them.
861	 *
862	 * The bug in the orginal code is a little tricky, so I'll describe
863	 * what's wrong with it here.
864	 *
865	 * It is incorrect to simply unp_discard each entry for f_msgcount
866	 * times -- consider the case of sockets A and B that contain
867	 * references to each other.  On a last close of some other socket,
868	 * we trigger a gc since the number of outstanding rights (unp_rights)
869	 * is non-zero.  If during the sweep phase the gc code un_discards,
870	 * we end up doing a (full) closef on the descriptor.  A closef on A
871	 * results in the following chain.  Closef calls soo_close, which
872	 * calls soclose.   Soclose calls first (through the switch
873	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
874	 * returns because the previous instance had set unp_gcing, and
875	 * we return all the way back to soclose, which marks the socket
876	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
877	 * to free up the rights that are queued in messages on the socket A,
878	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
879	 * switch unp_dispose, which unp_scans with unp_discard.  This second
880	 * instance of unp_discard just calls closef on B.
881	 *
882	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
883	 * which results in another closef on A.  Unfortunately, A is already
884	 * being closed, and the descriptor has already been marked with
885	 * SS_NOFDREF, and soclose panics at this point.
886	 *
887	 * Here, we first take an extra reference to each inaccessible
888	 * descriptor.  Then, we call sorflush ourself, since we know
889	 * it is a Unix domain socket anyhow.  After we destroy all the
890	 * rights carried in messages, we do a last closef to get rid
891	 * of our extra reference.  This is the last close, and the
892	 * unp_detach etc will shut down the socket.
893	 *
894	 * 91/09/19, bsy@cs.cmu.edu
895	 */
896	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
897	for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
898	    fp = nextfp) {
899		nextfp = fp->f_list.le_next;
900		/*
901		 * If it's not open, skip it
902		 */
903		if (fp->f_count == 0)
904			continue;
905		/*
906		 * If all refs are from msgs, and it's not marked accessible
907		 * then it must be referenced from some unreachable cycle
908		 * of (shut-down) FDs, so include it in our
909		 * list of FDs to remove
910		 */
911		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
912			*fpp++ = fp;
913			nunref++;
914			fp->f_count++;
915		}
916	}
917	/*
918	 * for each FD on our hit list, do the following two things
919	 */
920	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
921		sorflush((struct socket *)(*fpp)->f_data);
922	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
923		closef(*fpp, (struct proc *) NULL);
924	free((caddr_t)extra_ref, M_FILE);
925	unp_gcing = 0;
926}
927
928void
929unp_dispose(m)
930	struct mbuf *m;
931{
932
933	if (m)
934		unp_scan(m, unp_discard);
935}
936
937static void
938unp_scan(m0, op)
939	register struct mbuf *m0;
940	void (*op) __P((struct file *));
941{
942	register struct mbuf *m;
943	register struct file **rp;
944	register struct cmsghdr *cm;
945	register int i;
946	int qfds;
947
948	while (m0) {
949		for (m = m0; m; m = m->m_next)
950			if (m->m_type == MT_CONTROL &&
951			    m->m_len >= sizeof(*cm)) {
952				cm = mtod(m, struct cmsghdr *);
953				if (cm->cmsg_level != SOL_SOCKET ||
954				    cm->cmsg_type != SCM_RIGHTS)
955					continue;
956				qfds = (cm->cmsg_len - sizeof *cm)
957						/ sizeof (struct file *);
958				rp = (struct file **)(cm + 1);
959				for (i = 0; i < qfds; i++)
960					(*op)(*rp++);
961				break;		/* XXX, but saves time */
962			}
963		m0 = m0->m_act;
964	}
965}
966
967static void
968unp_mark(fp)
969	struct file *fp;
970{
971
972	if (fp->f_flag & FMARK)
973		return;
974	unp_defer++;
975	fp->f_flag |= (FMARK|FDEFER);
976}
977
978static void
979unp_discard(fp)
980	struct file *fp;
981{
982
983	fp->f_msgcount--;
984	unp_rights--;
985	(void) closef(fp, (struct proc *)NULL);
986}
987