uipc_usrreq.c revision 22975
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
34 *	$Id$
35 */
36
37#include <sys/param.h>
38#include <sys/queue.h>
39#include <sys/systm.h>
40#include <sys/proc.h>
41#include <sys/filedesc.h>
42#include <sys/domain.h>
43#include <sys/protosw.h>
44#include <sys/stat.h>
45#include <sys/socket.h>
46#include <sys/socketvar.h>
47#include <sys/unpcb.h>
48#include <sys/un.h>
49#include <sys/namei.h>
50#include <sys/vnode.h>
51#include <sys/file.h>
52#include <sys/stat.h>
53#include <sys/mbuf.h>
54
55/*
56 * Unix communications domain.
57 *
58 * TODO:
59 *	SEQPACKET, RDM
60 *	rethink name space problems
61 *	need a proper out-of-band
62 */
63static struct	sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
64static ino_t	unp_ino;		/* prototype for fake inode numbers */
65
66static int     unp_attach __P((struct socket *));
67static void    unp_detach __P((struct unpcb *));
68static int     unp_bind __P((struct unpcb *,struct mbuf *, struct proc *));
69static int     unp_connect __P((struct socket *,struct mbuf *, struct proc *));
70static void    unp_disconnect __P((struct unpcb *));
71static void    unp_shutdown __P((struct unpcb *));
72static void    unp_drop __P((struct unpcb *, int));
73static void    unp_gc __P((void));
74static void    unp_scan __P((struct mbuf *, void (*)(struct file *)));
75static void    unp_mark __P((struct file *));
76static void    unp_discard __P((struct file *));
77static int     unp_internalize __P((struct mbuf *, struct proc *));
78
79
80/*ARGSUSED*/
81int
82uipc_usrreq(so, req, m, nam, control)
83	struct socket *so;
84	int req;
85	struct mbuf *m, *nam, *control;
86{
87	struct unpcb *unp = sotounpcb(so);
88	register struct socket *so2;
89	register int error = 0;
90	struct proc *p = curproc;	/* XXX */
91
92	if (req == PRU_CONTROL)
93		return (EOPNOTSUPP);
94	if (req != PRU_SEND && control && control->m_len) {
95		error = EOPNOTSUPP;
96		goto release;
97	}
98	if (unp == 0 && req != PRU_ATTACH) {
99		error = EINVAL;
100		goto release;
101	}
102	switch (req) {
103
104	case PRU_ATTACH:
105		if (unp) {
106			error = EISCONN;
107			break;
108		}
109		error = unp_attach(so);
110		break;
111
112	case PRU_DETACH:
113		unp_detach(unp);
114		break;
115
116	case PRU_BIND:
117		error = unp_bind(unp, nam, p);
118		break;
119
120	case PRU_LISTEN:
121		if (unp->unp_vnode == 0)
122			error = EINVAL;
123		break;
124
125	case PRU_CONNECT:
126		error = unp_connect(so, nam, p);
127		break;
128
129	case PRU_CONNECT2:
130		error = unp_connect2(so, (struct socket *)nam);
131		break;
132
133	case PRU_DISCONNECT:
134		unp_disconnect(unp);
135		break;
136
137	case PRU_ACCEPT:
138		/*
139		 * Pass back name of connected socket,
140		 * if it was bound and we are still connected
141		 * (our peer may have closed already!).
142		 */
143		if (unp->unp_conn && unp->unp_conn->unp_addr) {
144			nam->m_len = unp->unp_conn->unp_addr->m_len;
145			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
146			    mtod(nam, caddr_t), (unsigned)nam->m_len);
147		} else {
148			nam->m_len = sizeof(sun_noname);
149			*(mtod(nam, struct sockaddr *)) = sun_noname;
150		}
151		break;
152
153	case PRU_SHUTDOWN:
154		socantsendmore(so);
155		unp_shutdown(unp);
156		break;
157
158	case PRU_RCVD:
159		switch (so->so_type) {
160
161		case SOCK_DGRAM:
162			panic("uipc 1");
163			/*NOTREACHED*/
164
165		case SOCK_STREAM:
166#define	rcv (&so->so_rcv)
167#define snd (&so2->so_snd)
168			if (unp->unp_conn == 0)
169				break;
170			so2 = unp->unp_conn->unp_socket;
171			/*
172			 * Adjust backpressure on sender
173			 * and wakeup any waiting to write.
174			 */
175			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
176			unp->unp_mbcnt = rcv->sb_mbcnt;
177			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
178			unp->unp_cc = rcv->sb_cc;
179			sowwakeup(so2);
180#undef snd
181#undef rcv
182			break;
183
184		default:
185			panic("uipc 2");
186		}
187		break;
188
189	case PRU_SEND:
190	case PRU_SEND_EOF:
191		if (control && (error = unp_internalize(control, p)))
192			break;
193		switch (so->so_type) {
194
195		case SOCK_DGRAM: {
196			struct sockaddr *from;
197
198			if (nam) {
199				if (unp->unp_conn) {
200					error = EISCONN;
201					break;
202				}
203				error = unp_connect(so, nam, p);
204				if (error)
205					break;
206			} else {
207				if (unp->unp_conn == 0) {
208					error = ENOTCONN;
209					break;
210				}
211			}
212			so2 = unp->unp_conn->unp_socket;
213			if (unp->unp_addr)
214				from = mtod(unp->unp_addr, struct sockaddr *);
215			else
216				from = &sun_noname;
217			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
218				sorwakeup(so2);
219				m = 0;
220				control = 0;
221			} else
222				error = ENOBUFS;
223			if (nam)
224				unp_disconnect(unp);
225			break;
226		}
227
228		case SOCK_STREAM:
229#define	rcv (&so2->so_rcv)
230#define	snd (&so->so_snd)
231			/* Connect if not connected yet. */
232			/*
233			 * Note: A better implementation would complain
234			 * if not equal to the peer's address.
235			 */
236			if ((so->so_state & SS_ISCONNECTED) == 0) {
237				if (nam) {
238		    			error = unp_connect(so, nam, p);
239					if (error)
240						break;	/* XXX */
241				} else {
242					error = ENOTCONN;
243					break;
244				}
245			}
246
247			if (so->so_state & SS_CANTSENDMORE) {
248				error = EPIPE;
249				break;
250			}
251			if (unp->unp_conn == 0)
252				panic("uipc 3");
253			so2 = unp->unp_conn->unp_socket;
254			/*
255			 * Send to paired receive port, and then reduce
256			 * send buffer hiwater marks to maintain backpressure.
257			 * Wake up readers.
258			 */
259			if (control) {
260				if (sbappendcontrol(rcv, m, control))
261					control = 0;
262			} else
263				sbappend(rcv, m);
264			snd->sb_mbmax -=
265			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
266			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
267			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
268			unp->unp_conn->unp_cc = rcv->sb_cc;
269			sorwakeup(so2);
270			m = 0;
271#undef snd
272#undef rcv
273			break;
274
275		default:
276			panic("uipc 4");
277		}
278		/*
279		 * SEND_EOF is equivalent to a SEND followed by
280		 * a SHUTDOWN.
281		 */
282		if (req == PRU_SEND_EOF) {
283			socantsendmore(so);
284			unp_shutdown(unp);
285		}
286		break;
287
288	case PRU_ABORT:
289		unp_drop(unp, ECONNABORTED);
290		break;
291
292	case PRU_SENSE:
293		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
294		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
295			so2 = unp->unp_conn->unp_socket;
296			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
297		}
298		((struct stat *) m)->st_dev = NODEV;
299		if (unp->unp_ino == 0)
300			unp->unp_ino = unp_ino++;
301		((struct stat *) m)->st_ino = unp->unp_ino;
302		return (0);
303
304	case PRU_RCVOOB:
305		return (EOPNOTSUPP);
306
307	case PRU_SENDOOB:
308		error = EOPNOTSUPP;
309		break;
310
311	case PRU_SOCKADDR:
312		if (unp->unp_addr) {
313			nam->m_len = unp->unp_addr->m_len;
314			bcopy(mtod(unp->unp_addr, caddr_t),
315			    mtod(nam, caddr_t), (unsigned)nam->m_len);
316		} else
317			nam->m_len = 0;
318		break;
319
320	case PRU_PEERADDR:
321		if (unp->unp_conn && unp->unp_conn->unp_addr) {
322			nam->m_len = unp->unp_conn->unp_addr->m_len;
323			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
324			    mtod(nam, caddr_t), (unsigned)nam->m_len);
325		} else
326			nam->m_len = 0;
327		break;
328
329	case PRU_SLOWTIMO:
330		break;
331
332	default:
333		panic("piusrreq");
334	}
335release:
336	if (control)
337		m_freem(control);
338	if (m)
339		m_freem(m);
340	return (error);
341}
342
343/*
344 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
345 * for stream sockets, although the total for sender and receiver is
346 * actually only PIPSIZ.
347 * Datagram sockets really use the sendspace as the maximum datagram size,
348 * and don't really want to reserve the sendspace.  Their recvspace should
349 * be large enough for at least one max-size datagram plus address.
350 */
351#ifndef PIPSIZ
352#define	PIPSIZ	8192
353#endif
354static u_long	unpst_sendspace = PIPSIZ;
355static u_long	unpst_recvspace = PIPSIZ;
356static u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
357static u_long	unpdg_recvspace = 4*1024;
358
359static int	unp_rights;			/* file descriptors in flight */
360
361static int
362unp_attach(so)
363	struct socket *so;
364{
365	register struct mbuf *m;
366	register struct unpcb *unp;
367	int error;
368
369	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
370		switch (so->so_type) {
371
372		case SOCK_STREAM:
373			error = soreserve(so, unpst_sendspace, unpst_recvspace);
374			break;
375
376		case SOCK_DGRAM:
377			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
378			break;
379
380		default:
381			panic("unp_attach");
382		}
383		if (error)
384			return (error);
385	}
386	m = m_getclr(M_DONTWAIT, MT_PCB);
387	if (m == NULL)
388		return (ENOBUFS);
389	unp = mtod(m, struct unpcb *);
390	so->so_pcb = (caddr_t)unp;
391	unp->unp_socket = so;
392	return (0);
393}
394
395static void
396unp_detach(unp)
397	register struct unpcb *unp;
398{
399
400	if (unp->unp_vnode) {
401		unp->unp_vnode->v_socket = 0;
402		vrele(unp->unp_vnode);
403		unp->unp_vnode = 0;
404	}
405	if (unp->unp_conn)
406		unp_disconnect(unp);
407	while (unp->unp_refs)
408		unp_drop(unp->unp_refs, ECONNRESET);
409	soisdisconnected(unp->unp_socket);
410	unp->unp_socket->so_pcb = 0;
411	if (unp_rights) {
412		/*
413		 * Normally the receive buffer is flushed later,
414		 * in sofree, but if our receive buffer holds references
415		 * to descriptors that are now garbage, we will dispose
416		 * of those descriptor references after the garbage collector
417		 * gets them (resulting in a "panic: closef: count < 0").
418		 */
419		sorflush(unp->unp_socket);
420		unp_gc();
421	}
422	m_freem(unp->unp_addr);
423	(void) m_free(dtom(unp));
424}
425
426static int
427unp_bind(unp, nam, p)
428	struct unpcb *unp;
429	struct mbuf *nam;
430	struct proc *p;
431{
432	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
433	register struct vnode *vp;
434	struct vattr vattr;
435	int error;
436	struct nameidata nd;
437
438	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
439	    soun->sun_path, p);
440	if (unp->unp_vnode != NULL)
441		return (EINVAL);
442	if (nam->m_len == MLEN) {
443		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
444			return (EINVAL);
445	} else
446		*(mtod(nam, caddr_t) + nam->m_len) = 0;
447/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
448	error = namei(&nd);
449	if (error)
450		return (error);
451	vp = nd.ni_vp;
452	if (vp != NULL) {
453		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
454		if (nd.ni_dvp == vp)
455			vrele(nd.ni_dvp);
456		else
457			vput(nd.ni_dvp);
458		vrele(vp);
459		return (EADDRINUSE);
460	}
461	VATTR_NULL(&vattr);
462	vattr.va_type = VSOCK;
463	vattr.va_mode = ACCESSPERMS;
464	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
465	if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr))
466		return (error);
467	vp = nd.ni_vp;
468	vp->v_socket = unp->unp_socket;
469	unp->unp_vnode = vp;
470	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
471	VOP_UNLOCK(vp, 0, p);
472	return (0);
473}
474
475static int
476unp_connect(so, nam, p)
477	struct socket *so;
478	struct mbuf *nam;
479	struct proc *p;
480{
481	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
482	register struct vnode *vp;
483	register struct socket *so2, *so3;
484	struct unpcb *unp2, *unp3;
485	int error;
486	struct nameidata nd;
487
488	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
489	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
490		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
491			return (EMSGSIZE);
492	} else
493		*(mtod(nam, caddr_t) + nam->m_len) = 0;
494	error = namei(&nd);
495	if (error)
496		return (error);
497	vp = nd.ni_vp;
498	if (vp->v_type != VSOCK) {
499		error = ENOTSOCK;
500		goto bad;
501	}
502	error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p);
503	if (error)
504		goto bad;
505	so2 = vp->v_socket;
506	if (so2 == 0) {
507		error = ECONNREFUSED;
508		goto bad;
509	}
510	if (so->so_type != so2->so_type) {
511		error = EPROTOTYPE;
512		goto bad;
513	}
514	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
515		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
516		    (so3 = sonewconn(so2, 0)) == 0) {
517			error = ECONNREFUSED;
518			goto bad;
519		}
520		unp2 = sotounpcb(so2);
521		unp3 = sotounpcb(so3);
522		if (unp2->unp_addr)
523			unp3->unp_addr =
524				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
525		so2 = so3;
526	}
527	error = unp_connect2(so, so2);
528bad:
529	vput(vp);
530	return (error);
531}
532
533int
534unp_connect2(so, so2)
535	register struct socket *so;
536	register struct socket *so2;
537{
538	register struct unpcb *unp = sotounpcb(so);
539	register struct unpcb *unp2;
540
541	if (so2->so_type != so->so_type)
542		return (EPROTOTYPE);
543	unp2 = sotounpcb(so2);
544	unp->unp_conn = unp2;
545	switch (so->so_type) {
546
547	case SOCK_DGRAM:
548		unp->unp_nextref = unp2->unp_refs;
549		unp2->unp_refs = unp;
550		soisconnected(so);
551		break;
552
553	case SOCK_STREAM:
554		unp2->unp_conn = unp;
555		soisconnected(so);
556		soisconnected(so2);
557		break;
558
559	default:
560		panic("unp_connect2");
561	}
562	return (0);
563}
564
565static void
566unp_disconnect(unp)
567	struct unpcb *unp;
568{
569	register struct unpcb *unp2 = unp->unp_conn;
570
571	if (unp2 == 0)
572		return;
573	unp->unp_conn = 0;
574	switch (unp->unp_socket->so_type) {
575
576	case SOCK_DGRAM:
577		if (unp2->unp_refs == unp)
578			unp2->unp_refs = unp->unp_nextref;
579		else {
580			unp2 = unp2->unp_refs;
581			for (;;) {
582				if (unp2 == 0)
583					panic("unp_disconnect");
584				if (unp2->unp_nextref == unp)
585					break;
586				unp2 = unp2->unp_nextref;
587			}
588			unp2->unp_nextref = unp->unp_nextref;
589		}
590		unp->unp_nextref = 0;
591		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
592		break;
593
594	case SOCK_STREAM:
595		soisdisconnected(unp->unp_socket);
596		unp2->unp_conn = 0;
597		soisdisconnected(unp2->unp_socket);
598		break;
599	}
600}
601
602#ifdef notdef
603void
604unp_abort(unp)
605	struct unpcb *unp;
606{
607
608	unp_detach(unp);
609}
610#endif
611
612static void
613unp_shutdown(unp)
614	struct unpcb *unp;
615{
616	struct socket *so;
617
618	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
619	    (so = unp->unp_conn->unp_socket))
620		socantrcvmore(so);
621}
622
623static void
624unp_drop(unp, errno)
625	struct unpcb *unp;
626	int errno;
627{
628	struct socket *so = unp->unp_socket;
629
630	so->so_error = errno;
631	unp_disconnect(unp);
632	if (so->so_head) {
633		so->so_pcb = (caddr_t) 0;
634		m_freem(unp->unp_addr);
635		(void) m_free(dtom(unp));
636		sofree(so);
637	}
638}
639
640#ifdef notdef
641void
642unp_drain()
643{
644
645}
646#endif
647
648int
649unp_externalize(rights)
650	struct mbuf *rights;
651{
652	struct proc *p = curproc;		/* XXX */
653	register int i;
654	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
655	register struct file **rp = (struct file **)(cm + 1);
656	register struct file *fp;
657	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
658	int f;
659
660	/*
661	 * if the new FD's will not fit, then we free them all
662	 */
663	if (!fdavail(p, newfds)) {
664		for (i = 0; i < newfds; i++) {
665			fp = *rp;
666			unp_discard(fp);
667			*rp++ = 0;
668		}
669		return (EMSGSIZE);
670	}
671	/*
672	 * now change each pointer to an fd in the global table to
673	 * an integer that is the index to the local fd table entry
674	 * that we set up to point to the global one we are transferring.
675	 * XXX this assumes a pointer and int are the same size...!
676	 */
677	for (i = 0; i < newfds; i++) {
678		if (fdalloc(p, 0, &f))
679			panic("unp_externalize");
680		fp = *rp;
681		p->p_fd->fd_ofiles[f] = fp;
682		fp->f_msgcount--;
683		unp_rights--;
684		*(int *)rp++ = f;
685	}
686	return (0);
687}
688
689static int
690unp_internalize(control, p)
691	struct mbuf *control;
692	struct proc *p;
693{
694	struct filedesc *fdp = p->p_fd;
695	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
696	register struct file **rp;
697	register struct file *fp;
698	register int i, fd;
699	int oldfds;
700
701	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
702	    cm->cmsg_len != control->m_len)
703		return (EINVAL);
704	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
705	/*
706	 * check that all the FDs passed in refer to legal OPEN files
707	 * If not, reject the entire operation.
708	 */
709	rp = (struct file **)(cm + 1);
710	for (i = 0; i < oldfds; i++) {
711		fd = *(int *)rp++;
712		if ((unsigned)fd >= fdp->fd_nfiles ||
713		    fdp->fd_ofiles[fd] == NULL)
714			return (EBADF);
715	}
716	/*
717	 * Now replace the integer FDs with pointers to
718	 * the associated global file table entry..
719	 * XXX this assumes a pointer and an int are the same size!
720	 */
721	rp = (struct file **)(cm + 1);
722	for (i = 0; i < oldfds; i++) {
723		fp = fdp->fd_ofiles[*(int *)rp];
724		*rp++ = fp;
725		fp->f_count++;
726		fp->f_msgcount++;
727		unp_rights++;
728	}
729	return (0);
730}
731
732static int	unp_defer, unp_gcing;
733
734static void
735unp_gc()
736{
737	register struct file *fp, *nextfp;
738	register struct socket *so;
739	struct file **extra_ref, **fpp;
740	int nunref, i;
741
742	if (unp_gcing)
743		return;
744	unp_gcing = 1;
745	unp_defer = 0;
746	/*
747	 * before going through all this, set all FDs to
748	 * be NOT defered and NOT externally accessible
749	 */
750	for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
751		fp->f_flag &= ~(FMARK|FDEFER);
752	do {
753		for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
754			/*
755			 * If the file is not open, skip it
756			 */
757			if (fp->f_count == 0)
758				continue;
759			/*
760			 * If we already marked it as 'defer'  in a
761			 * previous pass, then try process it this time
762			 * and un-mark it
763			 */
764			if (fp->f_flag & FDEFER) {
765				fp->f_flag &= ~FDEFER;
766				unp_defer--;
767			} else {
768				/*
769				 * if it's not defered, then check if it's
770				 * already marked.. if so skip it
771				 */
772				if (fp->f_flag & FMARK)
773					continue;
774				/*
775				 * If all references are from messages
776				 * in transit, then skip it. it's not
777				 * externally accessible.
778				 */
779				if (fp->f_count == fp->f_msgcount)
780					continue;
781				/*
782				 * If it got this far then it must be
783				 * externally accessible.
784				 */
785				fp->f_flag |= FMARK;
786			}
787			/*
788			 * either it was defered, or it is externally
789			 * accessible and not already marked so.
790			 * Now check if it is possibly one of OUR sockets.
791			 */
792			if (fp->f_type != DTYPE_SOCKET ||
793			    (so = (struct socket *)fp->f_data) == 0)
794				continue;
795			if (so->so_proto->pr_domain != &localdomain ||
796			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
797				continue;
798#ifdef notdef
799			if (so->so_rcv.sb_flags & SB_LOCK) {
800				/*
801				 * This is problematical; it's not clear
802				 * we need to wait for the sockbuf to be
803				 * unlocked (on a uniprocessor, at least),
804				 * and it's also not clear what to do
805				 * if sbwait returns an error due to receipt
806				 * of a signal.  If sbwait does return
807				 * an error, we'll go into an infinite
808				 * loop.  Delete all of this for now.
809				 */
810				(void) sbwait(&so->so_rcv);
811				goto restart;
812			}
813#endif
814			/*
815			 * So, Ok, it's one of our sockets and it IS externally
816			 * accessible (or was defered). Now we look
817			 * to see if we hold any file descriptors in it's
818			 * message buffers. Follow those links and mark them
819			 * as accessible too.
820			 */
821			unp_scan(so->so_rcv.sb_mb, unp_mark);
822		}
823	} while (unp_defer);
824	/*
825	 * We grab an extra reference to each of the file table entries
826	 * that are not otherwise accessible and then free the rights
827	 * that are stored in messages on them.
828	 *
829	 * The bug in the orginal code is a little tricky, so I'll describe
830	 * what's wrong with it here.
831	 *
832	 * It is incorrect to simply unp_discard each entry for f_msgcount
833	 * times -- consider the case of sockets A and B that contain
834	 * references to each other.  On a last close of some other socket,
835	 * we trigger a gc since the number of outstanding rights (unp_rights)
836	 * is non-zero.  If during the sweep phase the gc code un_discards,
837	 * we end up doing a (full) closef on the descriptor.  A closef on A
838	 * results in the following chain.  Closef calls soo_close, which
839	 * calls soclose.   Soclose calls first (through the switch
840	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
841	 * returns because the previous instance had set unp_gcing, and
842	 * we return all the way back to soclose, which marks the socket
843	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
844	 * to free up the rights that are queued in messages on the socket A,
845	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
846	 * switch unp_dispose, which unp_scans with unp_discard.  This second
847	 * instance of unp_discard just calls closef on B.
848	 *
849	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
850	 * which results in another closef on A.  Unfortunately, A is already
851	 * being closed, and the descriptor has already been marked with
852	 * SS_NOFDREF, and soclose panics at this point.
853	 *
854	 * Here, we first take an extra reference to each inaccessible
855	 * descriptor.  Then, we call sorflush ourself, since we know
856	 * it is a Unix domain socket anyhow.  After we destroy all the
857	 * rights carried in messages, we do a last closef to get rid
858	 * of our extra reference.  This is the last close, and the
859	 * unp_detach etc will shut down the socket.
860	 *
861	 * 91/09/19, bsy@cs.cmu.edu
862	 */
863	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
864	for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
865	    fp = nextfp) {
866		nextfp = fp->f_list.le_next;
867		/*
868		 * If it's not open, skip it
869		 */
870		if (fp->f_count == 0)
871			continue;
872		/*
873		 * If all refs are from msgs, and it's not marked accessible
874		 * then it must be referenced from some unreachable cycle
875		 * of (shut-down) FDs, so include it in our
876		 * list of FDs to remove
877		 */
878		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
879			*fpp++ = fp;
880			nunref++;
881			fp->f_count++;
882		}
883	}
884	/*
885	 * for each FD on our hit list, do the following two things
886	 */
887	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
888		sorflush((struct socket *)(*fpp)->f_data);
889	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
890		closef(*fpp, (struct proc *) NULL);
891	free((caddr_t)extra_ref, M_FILE);
892	unp_gcing = 0;
893}
894
895void
896unp_dispose(m)
897	struct mbuf *m;
898{
899
900	if (m)
901		unp_scan(m, unp_discard);
902}
903
904static void
905unp_scan(m0, op)
906	register struct mbuf *m0;
907	void (*op) __P((struct file *));
908{
909	register struct mbuf *m;
910	register struct file **rp;
911	register struct cmsghdr *cm;
912	register int i;
913	int qfds;
914
915	while (m0) {
916		for (m = m0; m; m = m->m_next)
917			if (m->m_type == MT_CONTROL &&
918			    m->m_len >= sizeof(*cm)) {
919				cm = mtod(m, struct cmsghdr *);
920				if (cm->cmsg_level != SOL_SOCKET ||
921				    cm->cmsg_type != SCM_RIGHTS)
922					continue;
923				qfds = (cm->cmsg_len - sizeof *cm)
924						/ sizeof (struct file *);
925				rp = (struct file **)(cm + 1);
926				for (i = 0; i < qfds; i++)
927					(*op)(*rp++);
928				break;		/* XXX, but saves time */
929			}
930		m0 = m0->m_act;
931	}
932}
933
934static void
935unp_mark(fp)
936	struct file *fp;
937{
938
939	if (fp->f_flag & FMARK)
940		return;
941	unp_defer++;
942	fp->f_flag |= (FMARK|FDEFER);
943}
944
945static void
946unp_discard(fp)
947	struct file *fp;
948{
949
950	fp->f_msgcount--;
951	unp_rights--;
952	(void) closef(fp, (struct proc *)NULL);
953}
954