uipc_usrreq.c revision 8876
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	From: @(#)uipc_usrreq.c	8.3 (Berkeley) 1/4/94
34 *	$Id: uipc_usrreq.c,v 1.8 1995/05/11 00:13:06 wollman Exp $
35 */
36
37#include <sys/param.h>
38#include <sys/systm.h>
39#include <sys/proc.h>
40#include <sys/filedesc.h>
41#include <sys/domain.h>
42#include <sys/protosw.h>
43#include <sys/stat.h>
44#include <sys/socket.h>
45#include <sys/socketvar.h>
46#include <sys/unpcb.h>
47#include <sys/un.h>
48#include <sys/namei.h>
49#include <sys/vnode.h>
50#include <sys/file.h>
51#include <sys/stat.h>
52#include <sys/mbuf.h>
53
54/*
55 * Unix communications domain.
56 *
57 * TODO:
58 *	SEQPACKET, RDM
59 *	rethink name space problems
60 *	need a proper out-of-band
61 */
62struct	sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
63ino_t	unp_ino;			/* prototype for fake inode numbers */
64
65/*ARGSUSED*/
66int
67uipc_usrreq(so, req, m, nam, control)
68	struct socket *so;
69	int req;
70	struct mbuf *m, *nam, *control;
71{
72	struct unpcb *unp = sotounpcb(so);
73	register struct socket *so2;
74	register int error = 0;
75	struct proc *p = curproc;	/* XXX */
76
77	if (req == PRU_CONTROL)
78		return (EOPNOTSUPP);
79	if (req != PRU_SEND && control && control->m_len) {
80		error = EOPNOTSUPP;
81		goto release;
82	}
83	if (unp == 0 && req != PRU_ATTACH) {
84		error = EINVAL;
85		goto release;
86	}
87	switch (req) {
88
89	case PRU_ATTACH:
90		if (unp) {
91			error = EISCONN;
92			break;
93		}
94		error = unp_attach(so);
95		break;
96
97	case PRU_DETACH:
98		unp_detach(unp);
99		break;
100
101	case PRU_BIND:
102		error = unp_bind(unp, nam, p);
103		break;
104
105	case PRU_LISTEN:
106		if (unp->unp_vnode == 0)
107			error = EINVAL;
108		break;
109
110	case PRU_CONNECT:
111		error = unp_connect(so, nam, p);
112		break;
113
114	case PRU_CONNECT2:
115		error = unp_connect2(so, (struct socket *)nam);
116		break;
117
118	case PRU_DISCONNECT:
119		unp_disconnect(unp);
120		break;
121
122	case PRU_ACCEPT:
123		/*
124		 * Pass back name of connected socket,
125		 * if it was bound and we are still connected
126		 * (our peer may have closed already!).
127		 */
128		if (unp->unp_conn && unp->unp_conn->unp_addr) {
129			nam->m_len = unp->unp_conn->unp_addr->m_len;
130			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
131			    mtod(nam, caddr_t), (unsigned)nam->m_len);
132		} else {
133			nam->m_len = sizeof(sun_noname);
134			*(mtod(nam, struct sockaddr *)) = sun_noname;
135		}
136		break;
137
138	case PRU_SHUTDOWN:
139		socantsendmore(so);
140		unp_shutdown(unp);
141		break;
142
143	case PRU_RCVD:
144		switch (so->so_type) {
145
146		case SOCK_DGRAM:
147			panic("uipc 1");
148			/*NOTREACHED*/
149
150		case SOCK_STREAM:
151#define	rcv (&so->so_rcv)
152#define snd (&so2->so_snd)
153			if (unp->unp_conn == 0)
154				break;
155			so2 = unp->unp_conn->unp_socket;
156			/*
157			 * Adjust backpressure on sender
158			 * and wakeup any waiting to write.
159			 */
160			snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
161			unp->unp_mbcnt = rcv->sb_mbcnt;
162			snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
163			unp->unp_cc = rcv->sb_cc;
164			sowwakeup(so2);
165#undef snd
166#undef rcv
167			break;
168
169		default:
170			panic("uipc 2");
171		}
172		break;
173
174	case PRU_SEND:
175	case PRU_SEND_EOF:
176		if (control && (error = unp_internalize(control, p)))
177			break;
178		switch (so->so_type) {
179
180		case SOCK_DGRAM: {
181			struct sockaddr *from;
182
183			if (nam) {
184				if (unp->unp_conn) {
185					error = EISCONN;
186					break;
187				}
188				error = unp_connect(so, nam, p);
189				if (error)
190					break;
191			} else {
192				if (unp->unp_conn == 0) {
193					error = ENOTCONN;
194					break;
195				}
196			}
197			so2 = unp->unp_conn->unp_socket;
198			if (unp->unp_addr)
199				from = mtod(unp->unp_addr, struct sockaddr *);
200			else
201				from = &sun_noname;
202			if (sbappendaddr(&so2->so_rcv, from, m, control)) {
203				sorwakeup(so2);
204				m = 0;
205				control = 0;
206			} else
207				error = ENOBUFS;
208			if (nam)
209				unp_disconnect(unp);
210			break;
211		}
212
213		case SOCK_STREAM:
214#define	rcv (&so2->so_rcv)
215#define	snd (&so->so_snd)
216			/* Connect if not connected yet. */
217			/*
218			 * Note: A better implementation would complain
219			 * if not equal to the peer's address.
220			 */
221			if ((so->so_state & SS_ISCONNECTED) == 0) {
222				if (nam) {
223		    			error = unp_connect(so, nam, p);
224					if (error)
225						break;	/* XXX */
226				} else {
227					error = ENOTCONN;
228					break;
229				}
230			}
231
232			if (so->so_state & SS_CANTSENDMORE) {
233				error = EPIPE;
234				break;
235			}
236			if (unp->unp_conn == 0)
237				panic("uipc 3");
238			so2 = unp->unp_conn->unp_socket;
239			/*
240			 * Send to paired receive port, and then reduce
241			 * send buffer hiwater marks to maintain backpressure.
242			 * Wake up readers.
243			 */
244			if (control) {
245				if (sbappendcontrol(rcv, m, control))
246					control = 0;
247			} else
248				sbappend(rcv, m);
249			snd->sb_mbmax -=
250			    rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
251			unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
252			snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
253			unp->unp_conn->unp_cc = rcv->sb_cc;
254			sorwakeup(so2);
255			m = 0;
256#undef snd
257#undef rcv
258			break;
259
260		default:
261			panic("uipc 4");
262		}
263		/*
264		 * SEND_EOF is equivalent to a SEND followed by
265		 * a SHUTDOWN.
266		 */
267		if (req == PRU_SEND_EOF) {
268			socantsendmore(so);
269			unp_shutdown(unp);
270		}
271		break;
272
273	case PRU_ABORT:
274		unp_drop(unp, ECONNABORTED);
275		break;
276
277	case PRU_SENSE:
278		((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
279		if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
280			so2 = unp->unp_conn->unp_socket;
281			((struct stat *) m)->st_blksize += so2->so_rcv.sb_cc;
282		}
283		((struct stat *) m)->st_dev = NODEV;
284		if (unp->unp_ino == 0)
285			unp->unp_ino = unp_ino++;
286		((struct stat *) m)->st_ino = unp->unp_ino;
287		return (0);
288
289	case PRU_RCVOOB:
290		return (EOPNOTSUPP);
291
292	case PRU_SENDOOB:
293		error = EOPNOTSUPP;
294		break;
295
296	case PRU_SOCKADDR:
297		if (unp->unp_addr) {
298			nam->m_len = unp->unp_addr->m_len;
299			bcopy(mtod(unp->unp_addr, caddr_t),
300			    mtod(nam, caddr_t), (unsigned)nam->m_len);
301		} else
302			nam->m_len = 0;
303		break;
304
305	case PRU_PEERADDR:
306		if (unp->unp_conn && unp->unp_conn->unp_addr) {
307			nam->m_len = unp->unp_conn->unp_addr->m_len;
308			bcopy(mtod(unp->unp_conn->unp_addr, caddr_t),
309			    mtod(nam, caddr_t), (unsigned)nam->m_len);
310		} else
311			nam->m_len = 0;
312		break;
313
314	case PRU_SLOWTIMO:
315		break;
316
317	default:
318		panic("piusrreq");
319	}
320release:
321	if (control)
322		m_freem(control);
323	if (m)
324		m_freem(m);
325	return (error);
326}
327
328/*
329 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
330 * for stream sockets, although the total for sender and receiver is
331 * actually only PIPSIZ.
332 * Datagram sockets really use the sendspace as the maximum datagram size,
333 * and don't really want to reserve the sendspace.  Their recvspace should
334 * be large enough for at least one max-size datagram plus address.
335 */
336#define	PIPSIZ	4096
337u_long	unpst_sendspace = PIPSIZ;
338u_long	unpst_recvspace = PIPSIZ;
339u_long	unpdg_sendspace = 2*1024;	/* really max datagram size */
340u_long	unpdg_recvspace = 4*1024;
341
342int	unp_rights;			/* file descriptors in flight */
343
344int
345unp_attach(so)
346	struct socket *so;
347{
348	register struct mbuf *m;
349	register struct unpcb *unp;
350	int error;
351
352	if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
353		switch (so->so_type) {
354
355		case SOCK_STREAM:
356			error = soreserve(so, unpst_sendspace, unpst_recvspace);
357			break;
358
359		case SOCK_DGRAM:
360			error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
361			break;
362
363		default:
364			panic("unp_attach");
365		}
366		if (error)
367			return (error);
368	}
369	m = m_getclr(M_DONTWAIT, MT_PCB);
370	if (m == NULL)
371		return (ENOBUFS);
372	unp = mtod(m, struct unpcb *);
373	so->so_pcb = (caddr_t)unp;
374	unp->unp_socket = so;
375	return (0);
376}
377
378void
379unp_detach(unp)
380	register struct unpcb *unp;
381{
382
383	if (unp->unp_vnode) {
384		unp->unp_vnode->v_socket = 0;
385		vrele(unp->unp_vnode);
386		unp->unp_vnode = 0;
387	}
388	if (unp->unp_conn)
389		unp_disconnect(unp);
390	while (unp->unp_refs)
391		unp_drop(unp->unp_refs, ECONNRESET);
392	soisdisconnected(unp->unp_socket);
393	unp->unp_socket->so_pcb = 0;
394	m_freem(unp->unp_addr);
395	(void) m_free(dtom(unp));
396	if (unp_rights) {
397		/*
398		 * Normally the receive buffer is flushed later,
399		 * in sofree, but if our receive buffer holds references
400		 * to descriptors that are now garbage, we will dispose
401		 * of those descriptor references after the garbage collector
402		 * gets them (resulting in a "panic: closef: count < 0").
403		 */
404		sorflush(unp->unp_socket);
405		unp_gc();
406	}
407}
408
409int
410unp_bind(unp, nam, p)
411	struct unpcb *unp;
412	struct mbuf *nam;
413	struct proc *p;
414{
415	struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
416	register struct vnode *vp;
417	struct vattr vattr;
418	int error;
419	struct nameidata nd;
420
421	NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
422		soun->sun_path, p);
423	if (unp->unp_vnode != NULL)
424		return (EINVAL);
425	if (nam->m_len == MLEN) {
426		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
427			return (EINVAL);
428	} else
429		*(mtod(nam, caddr_t) + nam->m_len) = 0;
430/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
431	error = namei(&nd);
432	if (error)
433		return (error);
434	vp = nd.ni_vp;
435	if (vp != NULL) {
436		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
437		if (nd.ni_dvp == vp)
438			vrele(nd.ni_dvp);
439		else
440			vput(nd.ni_dvp);
441		vrele(vp);
442		return (EADDRINUSE);
443	}
444	VATTR_NULL(&vattr);
445	vattr.va_type = VSOCK;
446	vattr.va_mode = ACCESSPERMS;
447	LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
448	error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
449	if (error)
450		return (error);
451	vp = nd.ni_vp;
452	vp->v_socket = unp->unp_socket;
453	unp->unp_vnode = vp;
454	unp->unp_addr = m_copy(nam, 0, (int)M_COPYALL);
455	VOP_UNLOCK(vp);
456	return (0);
457}
458
459int
460unp_connect(so, nam, p)
461	struct socket *so;
462	struct mbuf *nam;
463	struct proc *p;
464{
465	register struct sockaddr_un *soun = mtod(nam, struct sockaddr_un *);
466	register struct vnode *vp;
467	register struct socket *so2, *so3;
468	struct unpcb *unp2, *unp3;
469	int error;
470	struct nameidata nd;
471
472	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, soun->sun_path, p);
473	if (nam->m_data + nam->m_len == &nam->m_dat[MLEN]) {	/* XXX */
474		if (*(mtod(nam, caddr_t) + nam->m_len - 1) != 0)
475			return (EMSGSIZE);
476	} else
477		*(mtod(nam, caddr_t) + nam->m_len) = 0;
478	error = namei(&nd);
479	if (error)
480		return (error);
481	vp = nd.ni_vp;
482	if (vp->v_type != VSOCK) {
483		error = ENOTSOCK;
484		goto bad;
485	}
486	error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p);
487	if (error)
488		goto bad;
489	so2 = vp->v_socket;
490	if (so2 == 0) {
491		error = ECONNREFUSED;
492		goto bad;
493	}
494	if (so->so_type != so2->so_type) {
495		error = EPROTOTYPE;
496		goto bad;
497	}
498	if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
499		if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
500		    (so3 = sonewconn(so2, 0)) == 0) {
501			error = ECONNREFUSED;
502			goto bad;
503		}
504		unp2 = sotounpcb(so2);
505		unp3 = sotounpcb(so3);
506		if (unp2->unp_addr)
507			unp3->unp_addr =
508				  m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
509		so2 = so3;
510	}
511	error = unp_connect2(so, so2);
512bad:
513	vput(vp);
514	return (error);
515}
516
517int
518unp_connect2(so, so2)
519	register struct socket *so;
520	register struct socket *so2;
521{
522	register struct unpcb *unp = sotounpcb(so);
523	register struct unpcb *unp2;
524
525	if (so2->so_type != so->so_type)
526		return (EPROTOTYPE);
527	unp2 = sotounpcb(so2);
528	unp->unp_conn = unp2;
529	switch (so->so_type) {
530
531	case SOCK_DGRAM:
532		unp->unp_nextref = unp2->unp_refs;
533		unp2->unp_refs = unp;
534		soisconnected(so);
535		break;
536
537	case SOCK_STREAM:
538		unp2->unp_conn = unp;
539		soisconnected(so);
540		soisconnected(so2);
541		break;
542
543	default:
544		panic("unp_connect2");
545	}
546	return (0);
547}
548
549void
550unp_disconnect(unp)
551	struct unpcb *unp;
552{
553	register struct unpcb *unp2 = unp->unp_conn;
554
555	if (unp2 == 0)
556		return;
557	unp->unp_conn = 0;
558	switch (unp->unp_socket->so_type) {
559
560	case SOCK_DGRAM:
561		if (unp2->unp_refs == unp)
562			unp2->unp_refs = unp->unp_nextref;
563		else {
564			unp2 = unp2->unp_refs;
565			for (;;) {
566				if (unp2 == 0)
567					panic("unp_disconnect");
568				if (unp2->unp_nextref == unp)
569					break;
570				unp2 = unp2->unp_nextref;
571			}
572			unp2->unp_nextref = unp->unp_nextref;
573		}
574		unp->unp_nextref = 0;
575		unp->unp_socket->so_state &= ~SS_ISCONNECTED;
576		break;
577
578	case SOCK_STREAM:
579		soisdisconnected(unp->unp_socket);
580		unp2->unp_conn = 0;
581		soisdisconnected(unp2->unp_socket);
582		break;
583	}
584}
585
586#ifdef notdef
587void
588unp_abort(unp)
589	struct unpcb *unp;
590{
591
592	unp_detach(unp);
593}
594#endif
595
596void
597unp_shutdown(unp)
598	struct unpcb *unp;
599{
600	struct socket *so;
601
602	if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
603	    (so = unp->unp_conn->unp_socket))
604		socantrcvmore(so);
605}
606
607void
608unp_drop(unp, errno)
609	struct unpcb *unp;
610	int errno;
611{
612	struct socket *so = unp->unp_socket;
613
614	so->so_error = errno;
615	unp_disconnect(unp);
616	if (so->so_head) {
617		so->so_pcb = (caddr_t) 0;
618		m_freem(unp->unp_addr);
619		(void) m_free(dtom(unp));
620		sofree(so);
621	}
622}
623
624#ifdef notdef
625void
626unp_drain()
627{
628
629}
630#endif
631
632int
633unp_externalize(rights)
634	struct mbuf *rights;
635{
636	struct proc *p = curproc;		/* XXX */
637	register int i;
638	register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
639	register struct file **rp = (struct file **)(cm + 1);
640	register struct file *fp;
641	int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
642	int f;
643
644	if (!fdavail(p, newfds)) {
645		for (i = 0; i < newfds; i++) {
646			fp = *rp;
647			unp_discard(fp);
648			*rp++ = 0;
649		}
650		return (EMSGSIZE);
651	}
652	for (i = 0; i < newfds; i++) {
653		if (fdalloc(p, 0, &f))
654			panic("unp_externalize");
655		fp = *rp;
656		p->p_fd->fd_ofiles[f] = fp;
657		fp->f_msgcount--;
658		unp_rights--;
659		*(int *)rp++ = f;
660	}
661	return (0);
662}
663
664int
665unp_internalize(control, p)
666	struct mbuf *control;
667	struct proc *p;
668{
669	struct filedesc *fdp = p->p_fd;
670	register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
671	register struct file **rp;
672	register struct file *fp;
673	register int i, fd;
674	int oldfds;
675
676	if (cm->cmsg_type != SCM_RIGHTS || cm->cmsg_level != SOL_SOCKET ||
677	    cm->cmsg_len != control->m_len)
678		return (EINVAL);
679	oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
680	rp = (struct file **)(cm + 1);
681	for (i = 0; i < oldfds; i++) {
682		fd = *(int *)rp++;
683		if ((unsigned)fd >= fdp->fd_nfiles ||
684		    fdp->fd_ofiles[fd] == NULL)
685			return (EBADF);
686	}
687	rp = (struct file **)(cm + 1);
688	for (i = 0; i < oldfds; i++) {
689		fp = fdp->fd_ofiles[*(int *)rp];
690		*rp++ = fp;
691		fp->f_count++;
692		fp->f_msgcount++;
693		unp_rights++;
694	}
695	return (0);
696}
697
698int	unp_defer, unp_gcing;
699extern	struct domain localdomain;
700
701void
702unp_gc()
703{
704	register struct file *fp, *nextfp;
705	register struct socket *so;
706	struct file **extra_ref, **fpp;
707	int nunref, i;
708
709	if (unp_gcing)
710		return;
711	unp_gcing = 1;
712	unp_defer = 0;
713	for (fp = filehead; fp; fp = fp->f_filef)
714		fp->f_flag &= ~(FMARK|FDEFER);
715	do {
716		for (fp = filehead; fp; fp = fp->f_filef) {
717			if (fp->f_count == 0)
718				continue;
719			if (fp->f_flag & FDEFER) {
720				fp->f_flag &= ~FDEFER;
721				unp_defer--;
722			} else {
723				if (fp->f_flag & FMARK)
724					continue;
725				if (fp->f_count == fp->f_msgcount)
726					continue;
727				fp->f_flag |= FMARK;
728			}
729			if (fp->f_type != DTYPE_SOCKET ||
730			    (so = (struct socket *)fp->f_data) == 0)
731				continue;
732			if (so->so_proto->pr_domain != &localdomain ||
733			    (so->so_proto->pr_flags&PR_RIGHTS) == 0)
734				continue;
735#ifdef notdef
736			if (so->so_rcv.sb_flags & SB_LOCK) {
737				/*
738				 * This is problematical; it's not clear
739				 * we need to wait for the sockbuf to be
740				 * unlocked (on a uniprocessor, at least),
741				 * and it's also not clear what to do
742				 * if sbwait returns an error due to receipt
743				 * of a signal.  If sbwait does return
744				 * an error, we'll go into an infinite
745				 * loop.  Delete all of this for now.
746				 */
747				(void) sbwait(&so->so_rcv);
748				goto restart;
749			}
750#endif
751			unp_scan(so->so_rcv.sb_mb, unp_mark);
752		}
753	} while (unp_defer);
754	/*
755	 * We grab an extra reference to each of the file table entries
756	 * that are not otherwise accessible and then free the rights
757	 * that are stored in messages on them.
758	 *
759	 * The bug in the orginal code is a little tricky, so I'll describe
760	 * what's wrong with it here.
761	 *
762	 * It is incorrect to simply unp_discard each entry for f_msgcount
763	 * times -- consider the case of sockets A and B that contain
764	 * references to each other.  On a last close of some other socket,
765	 * we trigger a gc since the number of outstanding rights (unp_rights)
766	 * is non-zero.  If during the sweep phase the gc code un_discards,
767	 * we end up doing a (full) closef on the descriptor.  A closef on A
768	 * results in the following chain.  Closef calls soo_close, which
769	 * calls soclose.   Soclose calls first (through the switch
770	 * uipc_usrreq) unp_detach, which re-invokes unp_gc.  Unp_gc simply
771	 * returns because the previous instance had set unp_gcing, and
772	 * we return all the way back to soclose, which marks the socket
773	 * with SS_NOFDREF, and then calls sofree.  Sofree calls sorflush
774	 * to free up the rights that are queued in messages on the socket A,
775	 * i.e., the reference on B.  The sorflush calls via the dom_dispose
776	 * switch unp_dispose, which unp_scans with unp_discard.  This second
777	 * instance of unp_discard just calls closef on B.
778	 *
779	 * Well, a similar chain occurs on B, resulting in a sorflush on B,
780	 * which results in another closef on A.  Unfortunately, A is already
781	 * being closed, and the descriptor has already been marked with
782	 * SS_NOFDREF, and soclose panics at this point.
783	 *
784	 * Here, we first take an extra reference to each inaccessible
785	 * descriptor.  Then, we call sorflush ourself, since we know
786	 * it is a Unix domain socket anyhow.  After we destroy all the
787	 * rights carried in messages, we do a last closef to get rid
788	 * of our extra reference.  This is the last close, and the
789	 * unp_detach etc will shut down the socket.
790	 *
791	 * 91/09/19, bsy@cs.cmu.edu
792	 */
793	extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
794	for (nunref = 0, fp = filehead, fpp = extra_ref; fp; fp = nextfp) {
795		nextfp = fp->f_filef;
796		if (fp->f_count == 0)
797			continue;
798		if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
799			*fpp++ = fp;
800			nunref++;
801			fp->f_count++;
802		}
803	}
804	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
805		sorflush((struct socket *)(*fpp)->f_data);
806	for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
807		closef(*fpp,(struct proc*) NULL);
808	free((caddr_t)extra_ref, M_FILE);
809	unp_gcing = 0;
810}
811
812void
813unp_dispose(m)
814	struct mbuf *m;
815{
816	if (m)
817		unp_scan(m, unp_discard);
818}
819
820void
821unp_scan(m0, op)
822	register struct mbuf *m0;
823	void (*op)(struct file *);
824{
825	register struct mbuf *m;
826	register struct file **rp;
827	register struct cmsghdr *cm;
828	register int i;
829	int qfds;
830
831	while (m0) {
832		for (m = m0; m; m = m->m_next)
833			if (m->m_type == MT_CONTROL &&
834			    m->m_len >= sizeof(*cm)) {
835				cm = mtod(m, struct cmsghdr *);
836				if (cm->cmsg_level != SOL_SOCKET ||
837				    cm->cmsg_type != SCM_RIGHTS)
838					continue;
839				qfds = (cm->cmsg_len - sizeof *cm)
840						/ sizeof (struct file *);
841				rp = (struct file **)(cm + 1);
842				for (i = 0; i < qfds; i++)
843					(*op)(*rp++);
844				break;		/* XXX, but saves time */
845			}
846		m0 = m0->m_act;
847	}
848}
849
850void
851unp_mark(fp)
852	struct file *fp;
853{
854
855	if (fp->f_flag & FMARK)
856		return;
857	unp_defer++;
858	fp->f_flag |= (FMARK|FDEFER);
859}
860
861void
862unp_discard(fp)
863	struct file *fp;
864{
865
866	fp->f_msgcount--;
867	unp_rights--;
868	(void) closef(fp, (struct proc *)NULL);
869}
870