Deleted Added
full compact
uipc_usrreq.c (130398) uipc_usrreq.c (130480)
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
30 */
31
32#include <sys/cdefs.h>
1/*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 4. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: head/sys/kern/uipc_usrreq.c 130398 2004-06-13 02:50:07Z rwatson $");
33__FBSDID("$FreeBSD: head/sys/kern/uipc_usrreq.c 130480 2004-06-14 18:16:22Z rwatson $");
34
35#include "opt_mac.h"
36
37#include <sys/param.h>
38#include <sys/domain.h>
39#include <sys/fcntl.h>
40#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
41#include <sys/file.h>
42#include <sys/filedesc.h>
43#include <sys/jail.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/mac.h>
47#include <sys/mbuf.h>
48#include <sys/mutex.h>
49#include <sys/namei.h>
50#include <sys/proc.h>
51#include <sys/protosw.h>
52#include <sys/resourcevar.h>
53#include <sys/socket.h>
54#include <sys/socketvar.h>
55#include <sys/signalvar.h>
56#include <sys/stat.h>
57#include <sys/sx.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60#include <sys/un.h>
61#include <sys/unpcb.h>
62#include <sys/vnode.h>
63
64#include <vm/uma.h>
65
66static uma_zone_t unp_zone;
67static unp_gen_t unp_gencnt;
68static u_int unp_count;
69
70static struct unp_head unp_shead, unp_dhead;
71
72/*
73 * Unix communications domain.
74 *
75 * TODO:
76 * SEQPACKET, RDM
77 * rethink name space problems
78 * need a proper out-of-band
79 * lock pushdown
80 */
81static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
82static ino_t unp_ino; /* prototype for fake inode numbers */
83
84static struct mtx unp_mtx;
85#define UNP_LOCK_INIT() \
86 mtx_init(&unp_mtx, "unp", NULL, MTX_DEF)
87#define UNP_LOCK() mtx_lock(&unp_mtx)
88#define UNP_UNLOCK() mtx_unlock(&unp_mtx)
89#define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED)
90
91static int unp_attach(struct socket *);
92static void unp_detach(struct unpcb *);
93static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
94static int unp_connect(struct socket *,struct sockaddr *, struct thread *);
95static int unp_connect2(struct socket *so, struct socket *so2);
96static void unp_disconnect(struct unpcb *);
97static void unp_shutdown(struct unpcb *);
98static void unp_drop(struct unpcb *, int);
99static void unp_gc(void);
100static void unp_scan(struct mbuf *, void (*)(struct file *));
101static void unp_mark(struct file *);
102static void unp_discard(struct file *);
103static void unp_freerights(struct file **, int);
104static int unp_internalize(struct mbuf **, struct thread *);
105static int unp_listen(struct unpcb *, struct thread *);
106
107static int
108uipc_abort(struct socket *so)
109{
110 struct unpcb *unp = sotounpcb(so);
111
112 if (unp == NULL)
113 return (EINVAL);
114 UNP_LOCK();
115 unp_drop(unp, ECONNABORTED);
116 unp_detach(unp); /* NB: unlocks */
117 SOCK_LOCK(so);
118 sotryfree(so);
119 return (0);
120}
121
122static int
123uipc_accept(struct socket *so, struct sockaddr **nam)
124{
125 struct unpcb *unp = sotounpcb(so);
126 const struct sockaddr *sa;
127
128 if (unp == NULL)
129 return (EINVAL);
130
131 /*
132 * Pass back name of connected socket,
133 * if it was bound and we are still connected
134 * (our peer may have closed already!).
135 */
136 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
137 UNP_LOCK();
138 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL)
139 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
140 else
141 sa = &sun_noname;
142 bcopy(sa, *nam, sa->sa_len);
143 UNP_UNLOCK();
144 return (0);
145}
146
147static int
148uipc_attach(struct socket *so, int proto, struct thread *td)
149{
150 struct unpcb *unp = sotounpcb(so);
151
152 if (unp != NULL)
153 return (EISCONN);
154 return (unp_attach(so));
155}
156
157static int
158uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
159{
160 struct unpcb *unp = sotounpcb(so);
161
162 if (unp == NULL)
163 return (EINVAL);
164
165 return (unp_bind(unp, nam, td));
166}
167
168static int
169uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
170{
171 struct unpcb *unp = sotounpcb(so);
172 int error;
173
174 if (unp == NULL)
175 return (EINVAL);
176 UNP_LOCK();
177 error = unp_connect(so, nam, curthread);
178 UNP_UNLOCK();
179 return (error);
180}
181
182int
183uipc_connect2(struct socket *so1, struct socket *so2)
184{
185 struct unpcb *unp = sotounpcb(so1);
186 int error;
187
188 if (unp == NULL)
189 return (EINVAL);
190
191 UNP_LOCK();
192 error = unp_connect2(so1, so2);
193 UNP_UNLOCK();
194 return (error);
195}
196
197/* control is EOPNOTSUPP */
198
199static int
200uipc_detach(struct socket *so)
201{
202 struct unpcb *unp = sotounpcb(so);
203
204 if (unp == NULL)
205 return (EINVAL);
206
207 UNP_LOCK();
208 unp_detach(unp); /* NB: unlocks unp */
209 return (0);
210}
211
212static int
213uipc_disconnect(struct socket *so)
214{
215 struct unpcb *unp = sotounpcb(so);
216
217 if (unp == NULL)
218 return (EINVAL);
219 UNP_LOCK();
220 unp_disconnect(unp);
221 UNP_UNLOCK();
222 return (0);
223}
224
225static int
226uipc_listen(struct socket *so, struct thread *td)
227{
228 struct unpcb *unp = sotounpcb(so);
229 int error;
230
231 if (unp == NULL || unp->unp_vnode == NULL)
232 return (EINVAL);
233 UNP_LOCK();
234 error = unp_listen(unp, td);
235 UNP_UNLOCK();
236 return (error);
237}
238
239static int
240uipc_peeraddr(struct socket *so, struct sockaddr **nam)
241{
242 struct unpcb *unp = sotounpcb(so);
243 const struct sockaddr *sa;
244
245 if (unp == NULL)
246 return (EINVAL);
247 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
248 UNP_LOCK();
249 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL)
250 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
251 else {
252 /*
253 * XXX: It seems that this test always fails even when
254 * connection is established. So, this else clause is
255 * added as workaround to return PF_LOCAL sockaddr.
256 */
257 sa = &sun_noname;
258 }
259 bcopy(sa, *nam, sa->sa_len);
260 UNP_UNLOCK();
261 return (0);
262}
263
264static int
265uipc_rcvd(struct socket *so, int flags)
266{
267 struct unpcb *unp = sotounpcb(so);
268 struct socket *so2;
269 u_long newhiwat;
270
271 if (unp == NULL)
272 return (EINVAL);
273 UNP_LOCK();
274 switch (so->so_type) {
275 case SOCK_DGRAM:
276 panic("uipc_rcvd DGRAM?");
277 /*NOTREACHED*/
278
279 case SOCK_STREAM:
280 if (unp->unp_conn == NULL)
281 break;
282 so2 = unp->unp_conn->unp_socket;
283 /*
284 * Adjust backpressure on sender
285 * and wakeup any waiting to write.
286 */
287 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
288 unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
289 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc -
290 so->so_rcv.sb_cc;
291 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
292 newhiwat, RLIM_INFINITY);
293 unp->unp_cc = so->so_rcv.sb_cc;
294 sowwakeup(so2);
295 break;
296
297 default:
298 panic("uipc_rcvd unknown socktype");
299 }
300 UNP_UNLOCK();
301 return (0);
302}
303
304/* pru_rcvoob is EOPNOTSUPP */
305
306static int
307uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
308 struct mbuf *control, struct thread *td)
309{
310 int error = 0;
311 struct unpcb *unp = sotounpcb(so);
312 struct socket *so2;
313 u_long newhiwat;
314
315 if (unp == NULL) {
316 error = EINVAL;
317 goto release;
318 }
319 if (flags & PRUS_OOB) {
320 error = EOPNOTSUPP;
321 goto release;
322 }
323
324 if (control != NULL && (error = unp_internalize(&control, td)))
325 goto release;
326
327 UNP_LOCK();
328 switch (so->so_type) {
329 case SOCK_DGRAM:
330 {
331 const struct sockaddr *from;
332
333 if (nam != NULL) {
334 if (unp->unp_conn != NULL) {
335 error = EISCONN;
336 break;
337 }
338 error = unp_connect(so, nam, td);
339 if (error)
340 break;
341 } else {
342 if (unp->unp_conn == NULL) {
343 error = ENOTCONN;
344 break;
345 }
346 }
347 so2 = unp->unp_conn->unp_socket;
348 if (unp->unp_addr != NULL)
349 from = (struct sockaddr *)unp->unp_addr;
350 else
351 from = &sun_noname;
352 if (sbappendaddr(&so2->so_rcv, from, m, control)) {
353 sorwakeup(so2);
354 m = NULL;
355 control = NULL;
356 } else {
357 error = ENOBUFS;
358 }
359 if (nam != NULL)
360 unp_disconnect(unp);
361 break;
362 }
363
364 case SOCK_STREAM:
365 /* Connect if not connected yet. */
366 /*
367 * Note: A better implementation would complain
368 * if not equal to the peer's address.
369 */
370 if ((so->so_state & SS_ISCONNECTED) == 0) {
371 if (nam != NULL) {
372 error = unp_connect(so, nam, td);
373 if (error)
374 break; /* XXX */
375 } else {
376 error = ENOTCONN;
377 break;
378 }
379 }
380
34
35#include "opt_mac.h"
36
37#include <sys/param.h>
38#include <sys/domain.h>
39#include <sys/fcntl.h>
40#include <sys/malloc.h> /* XXX must be before <sys/file.h> */
41#include <sys/file.h>
42#include <sys/filedesc.h>
43#include <sys/jail.h>
44#include <sys/kernel.h>
45#include <sys/lock.h>
46#include <sys/mac.h>
47#include <sys/mbuf.h>
48#include <sys/mutex.h>
49#include <sys/namei.h>
50#include <sys/proc.h>
51#include <sys/protosw.h>
52#include <sys/resourcevar.h>
53#include <sys/socket.h>
54#include <sys/socketvar.h>
55#include <sys/signalvar.h>
56#include <sys/stat.h>
57#include <sys/sx.h>
58#include <sys/sysctl.h>
59#include <sys/systm.h>
60#include <sys/un.h>
61#include <sys/unpcb.h>
62#include <sys/vnode.h>
63
64#include <vm/uma.h>
65
66static uma_zone_t unp_zone;
67static unp_gen_t unp_gencnt;
68static u_int unp_count;
69
70static struct unp_head unp_shead, unp_dhead;
71
72/*
73 * Unix communications domain.
74 *
75 * TODO:
76 * SEQPACKET, RDM
77 * rethink name space problems
78 * need a proper out-of-band
79 * lock pushdown
80 */
81static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
82static ino_t unp_ino; /* prototype for fake inode numbers */
83
84static struct mtx unp_mtx;
85#define UNP_LOCK_INIT() \
86 mtx_init(&unp_mtx, "unp", NULL, MTX_DEF)
87#define UNP_LOCK() mtx_lock(&unp_mtx)
88#define UNP_UNLOCK() mtx_unlock(&unp_mtx)
89#define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED)
90
91static int unp_attach(struct socket *);
92static void unp_detach(struct unpcb *);
93static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *);
94static int unp_connect(struct socket *,struct sockaddr *, struct thread *);
95static int unp_connect2(struct socket *so, struct socket *so2);
96static void unp_disconnect(struct unpcb *);
97static void unp_shutdown(struct unpcb *);
98static void unp_drop(struct unpcb *, int);
99static void unp_gc(void);
100static void unp_scan(struct mbuf *, void (*)(struct file *));
101static void unp_mark(struct file *);
102static void unp_discard(struct file *);
103static void unp_freerights(struct file **, int);
104static int unp_internalize(struct mbuf **, struct thread *);
105static int unp_listen(struct unpcb *, struct thread *);
106
107static int
108uipc_abort(struct socket *so)
109{
110 struct unpcb *unp = sotounpcb(so);
111
112 if (unp == NULL)
113 return (EINVAL);
114 UNP_LOCK();
115 unp_drop(unp, ECONNABORTED);
116 unp_detach(unp); /* NB: unlocks */
117 SOCK_LOCK(so);
118 sotryfree(so);
119 return (0);
120}
121
122static int
123uipc_accept(struct socket *so, struct sockaddr **nam)
124{
125 struct unpcb *unp = sotounpcb(so);
126 const struct sockaddr *sa;
127
128 if (unp == NULL)
129 return (EINVAL);
130
131 /*
132 * Pass back name of connected socket,
133 * if it was bound and we are still connected
134 * (our peer may have closed already!).
135 */
136 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
137 UNP_LOCK();
138 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL)
139 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
140 else
141 sa = &sun_noname;
142 bcopy(sa, *nam, sa->sa_len);
143 UNP_UNLOCK();
144 return (0);
145}
146
147static int
148uipc_attach(struct socket *so, int proto, struct thread *td)
149{
150 struct unpcb *unp = sotounpcb(so);
151
152 if (unp != NULL)
153 return (EISCONN);
154 return (unp_attach(so));
155}
156
157static int
158uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
159{
160 struct unpcb *unp = sotounpcb(so);
161
162 if (unp == NULL)
163 return (EINVAL);
164
165 return (unp_bind(unp, nam, td));
166}
167
168static int
169uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
170{
171 struct unpcb *unp = sotounpcb(so);
172 int error;
173
174 if (unp == NULL)
175 return (EINVAL);
176 UNP_LOCK();
177 error = unp_connect(so, nam, curthread);
178 UNP_UNLOCK();
179 return (error);
180}
181
182int
183uipc_connect2(struct socket *so1, struct socket *so2)
184{
185 struct unpcb *unp = sotounpcb(so1);
186 int error;
187
188 if (unp == NULL)
189 return (EINVAL);
190
191 UNP_LOCK();
192 error = unp_connect2(so1, so2);
193 UNP_UNLOCK();
194 return (error);
195}
196
197/* control is EOPNOTSUPP */
198
199static int
200uipc_detach(struct socket *so)
201{
202 struct unpcb *unp = sotounpcb(so);
203
204 if (unp == NULL)
205 return (EINVAL);
206
207 UNP_LOCK();
208 unp_detach(unp); /* NB: unlocks unp */
209 return (0);
210}
211
212static int
213uipc_disconnect(struct socket *so)
214{
215 struct unpcb *unp = sotounpcb(so);
216
217 if (unp == NULL)
218 return (EINVAL);
219 UNP_LOCK();
220 unp_disconnect(unp);
221 UNP_UNLOCK();
222 return (0);
223}
224
225static int
226uipc_listen(struct socket *so, struct thread *td)
227{
228 struct unpcb *unp = sotounpcb(so);
229 int error;
230
231 if (unp == NULL || unp->unp_vnode == NULL)
232 return (EINVAL);
233 UNP_LOCK();
234 error = unp_listen(unp, td);
235 UNP_UNLOCK();
236 return (error);
237}
238
239static int
240uipc_peeraddr(struct socket *so, struct sockaddr **nam)
241{
242 struct unpcb *unp = sotounpcb(so);
243 const struct sockaddr *sa;
244
245 if (unp == NULL)
246 return (EINVAL);
247 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
248 UNP_LOCK();
249 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL)
250 sa = (struct sockaddr *) unp->unp_conn->unp_addr;
251 else {
252 /*
253 * XXX: It seems that this test always fails even when
254 * connection is established. So, this else clause is
255 * added as workaround to return PF_LOCAL sockaddr.
256 */
257 sa = &sun_noname;
258 }
259 bcopy(sa, *nam, sa->sa_len);
260 UNP_UNLOCK();
261 return (0);
262}
263
264static int
265uipc_rcvd(struct socket *so, int flags)
266{
267 struct unpcb *unp = sotounpcb(so);
268 struct socket *so2;
269 u_long newhiwat;
270
271 if (unp == NULL)
272 return (EINVAL);
273 UNP_LOCK();
274 switch (so->so_type) {
275 case SOCK_DGRAM:
276 panic("uipc_rcvd DGRAM?");
277 /*NOTREACHED*/
278
279 case SOCK_STREAM:
280 if (unp->unp_conn == NULL)
281 break;
282 so2 = unp->unp_conn->unp_socket;
283 /*
284 * Adjust backpressure on sender
285 * and wakeup any waiting to write.
286 */
287 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt;
288 unp->unp_mbcnt = so->so_rcv.sb_mbcnt;
289 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc -
290 so->so_rcv.sb_cc;
291 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat,
292 newhiwat, RLIM_INFINITY);
293 unp->unp_cc = so->so_rcv.sb_cc;
294 sowwakeup(so2);
295 break;
296
297 default:
298 panic("uipc_rcvd unknown socktype");
299 }
300 UNP_UNLOCK();
301 return (0);
302}
303
304/* pru_rcvoob is EOPNOTSUPP */
305
306static int
307uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
308 struct mbuf *control, struct thread *td)
309{
310 int error = 0;
311 struct unpcb *unp = sotounpcb(so);
312 struct socket *so2;
313 u_long newhiwat;
314
315 if (unp == NULL) {
316 error = EINVAL;
317 goto release;
318 }
319 if (flags & PRUS_OOB) {
320 error = EOPNOTSUPP;
321 goto release;
322 }
323
324 if (control != NULL && (error = unp_internalize(&control, td)))
325 goto release;
326
327 UNP_LOCK();
328 switch (so->so_type) {
329 case SOCK_DGRAM:
330 {
331 const struct sockaddr *from;
332
333 if (nam != NULL) {
334 if (unp->unp_conn != NULL) {
335 error = EISCONN;
336 break;
337 }
338 error = unp_connect(so, nam, td);
339 if (error)
340 break;
341 } else {
342 if (unp->unp_conn == NULL) {
343 error = ENOTCONN;
344 break;
345 }
346 }
347 so2 = unp->unp_conn->unp_socket;
348 if (unp->unp_addr != NULL)
349 from = (struct sockaddr *)unp->unp_addr;
350 else
351 from = &sun_noname;
352 if (sbappendaddr(&so2->so_rcv, from, m, control)) {
353 sorwakeup(so2);
354 m = NULL;
355 control = NULL;
356 } else {
357 error = ENOBUFS;
358 }
359 if (nam != NULL)
360 unp_disconnect(unp);
361 break;
362 }
363
364 case SOCK_STREAM:
365 /* Connect if not connected yet. */
366 /*
367 * Note: A better implementation would complain
368 * if not equal to the peer's address.
369 */
370 if ((so->so_state & SS_ISCONNECTED) == 0) {
371 if (nam != NULL) {
372 error = unp_connect(so, nam, td);
373 if (error)
374 break; /* XXX */
375 } else {
376 error = ENOTCONN;
377 break;
378 }
379 }
380
381 if (so->so_state & SS_CANTSENDMORE) {
381 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
382 error = EPIPE;
383 break;
384 }
385 if (unp->unp_conn == NULL)
386 panic("uipc_send connected but no connection?");
387 so2 = unp->unp_conn->unp_socket;
388 /*
389 * Send to paired receive port, and then reduce
390 * send buffer hiwater marks to maintain backpressure.
391 * Wake up readers.
392 */
393 if (control != NULL) {
394 if (sbappendcontrol(&so2->so_rcv, m, control))
395 control = NULL;
396 } else {
397 sbappend(&so2->so_rcv, m);
398 }
399 so->so_snd.sb_mbmax -=
400 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
401 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt;
402 newhiwat = so->so_snd.sb_hiwat -
403 (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc);
404 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
405 newhiwat, RLIM_INFINITY);
406 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
407 sorwakeup(so2);
408 m = NULL;
409 break;
410
411 default:
412 panic("uipc_send unknown socktype");
413 }
414
415 /*
416 * SEND_EOF is equivalent to a SEND followed by
417 * a SHUTDOWN.
418 */
419 if (flags & PRUS_EOF) {
420 socantsendmore(so);
421 unp_shutdown(unp);
422 }
423 UNP_UNLOCK();
424
425 if (control != NULL && error != 0)
426 unp_dispose(control);
427
428release:
429 if (control != NULL)
430 m_freem(control);
431 if (m != NULL)
432 m_freem(m);
433 return (error);
434}
435
436static int
437uipc_sense(struct socket *so, struct stat *sb)
438{
439 struct unpcb *unp = sotounpcb(so);
440 struct socket *so2;
441
442 if (unp == NULL)
443 return (EINVAL);
444 UNP_LOCK();
445 sb->st_blksize = so->so_snd.sb_hiwat;
446 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
447 so2 = unp->unp_conn->unp_socket;
448 sb->st_blksize += so2->so_rcv.sb_cc;
449 }
450 sb->st_dev = NOUDEV;
451 if (unp->unp_ino == 0)
452 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
453 sb->st_ino = unp->unp_ino;
454 UNP_UNLOCK();
455 return (0);
456}
457
458static int
459uipc_shutdown(struct socket *so)
460{
461 struct unpcb *unp = sotounpcb(so);
462
463 if (unp == NULL)
464 return (EINVAL);
465 UNP_LOCK();
466 socantsendmore(so);
467 unp_shutdown(unp);
468 UNP_UNLOCK();
469 return (0);
470}
471
472static int
473uipc_sockaddr(struct socket *so, struct sockaddr **nam)
474{
475 struct unpcb *unp = sotounpcb(so);
476 const struct sockaddr *sa;
477
478 if (unp == NULL)
479 return (EINVAL);
480 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
481 UNP_LOCK();
482 if (unp->unp_addr != NULL)
483 sa = (struct sockaddr *) unp->unp_addr;
484 else
485 sa = &sun_noname;
486 bcopy(sa, *nam, sa->sa_len);
487 UNP_UNLOCK();
488 return (0);
489}
490
491struct pr_usrreqs uipc_usrreqs = {
492 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
493 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
494 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
495 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
496 sosend, soreceive, sopoll, pru_sosetlabel_null
497};
498
499int
500uipc_ctloutput(so, sopt)
501 struct socket *so;
502 struct sockopt *sopt;
503{
504 struct unpcb *unp = sotounpcb(so);
505 struct xucred xu;
506 int error;
507
508 switch (sopt->sopt_dir) {
509 case SOPT_GET:
510 switch (sopt->sopt_name) {
511 case LOCAL_PEERCRED:
512 error = 0;
513 UNP_LOCK();
514 if (unp->unp_flags & UNP_HAVEPC)
515 xu = unp->unp_peercred;
516 else {
517 if (so->so_type == SOCK_STREAM)
518 error = ENOTCONN;
519 else
520 error = EINVAL;
521 }
522 UNP_UNLOCK();
523 if (error == 0)
524 error = sooptcopyout(sopt, &xu, sizeof(xu));
525 break;
526 default:
527 error = EOPNOTSUPP;
528 break;
529 }
530 break;
531 case SOPT_SET:
532 default:
533 error = EOPNOTSUPP;
534 break;
535 }
536 return (error);
537}
538
539/*
540 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
541 * for stream sockets, although the total for sender and receiver is
542 * actually only PIPSIZ.
543 * Datagram sockets really use the sendspace as the maximum datagram size,
544 * and don't really want to reserve the sendspace. Their recvspace should
545 * be large enough for at least one max-size datagram plus address.
546 */
547#ifndef PIPSIZ
548#define PIPSIZ 8192
549#endif
550static u_long unpst_sendspace = PIPSIZ;
551static u_long unpst_recvspace = PIPSIZ;
552static u_long unpdg_sendspace = 2*1024; /* really max datagram size */
553static u_long unpdg_recvspace = 4*1024;
554
555static int unp_rights; /* file descriptors in flight */
556
557SYSCTL_DECL(_net_local_stream);
558SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
559 &unpst_sendspace, 0, "");
560SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
561 &unpst_recvspace, 0, "");
562SYSCTL_DECL(_net_local_dgram);
563SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
564 &unpdg_sendspace, 0, "");
565SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
566 &unpdg_recvspace, 0, "");
567SYSCTL_DECL(_net_local);
568SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
569
570static int
571unp_attach(so)
572 struct socket *so;
573{
574 register struct unpcb *unp;
575 int error;
576
577 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
578 switch (so->so_type) {
579
580 case SOCK_STREAM:
581 error = soreserve(so, unpst_sendspace, unpst_recvspace);
582 break;
583
584 case SOCK_DGRAM:
585 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
586 break;
587
588 default:
589 panic("unp_attach");
590 }
591 if (error)
592 return (error);
593 }
594 unp = uma_zalloc(unp_zone, M_WAITOK);
595 if (unp == NULL)
596 return (ENOBUFS);
597 bzero(unp, sizeof *unp);
598 LIST_INIT(&unp->unp_refs);
599 unp->unp_socket = so;
600
601 UNP_LOCK();
602 unp->unp_gencnt = ++unp_gencnt;
603 unp_count++;
604 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
605 : &unp_shead, unp, unp_link);
606 UNP_UNLOCK();
607
608 so->so_pcb = unp;
609 return (0);
610}
611
612static void
613unp_detach(unp)
614 register struct unpcb *unp;
615{
616 struct vnode *vp;
617
618 UNP_LOCK_ASSERT();
619
620 LIST_REMOVE(unp, unp_link);
621 unp->unp_gencnt = ++unp_gencnt;
622 --unp_count;
623 if ((vp = unp->unp_vnode) != NULL) {
624 /*
625 * XXXRW: should v_socket be frobbed only while holding
626 * Giant?
627 */
628 unp->unp_vnode->v_socket = NULL;
629 unp->unp_vnode = NULL;
630 }
631 if (unp->unp_conn != NULL)
632 unp_disconnect(unp);
633 while (!LIST_EMPTY(&unp->unp_refs)) {
634 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
635 unp_drop(ref, ECONNRESET);
636 }
637 soisdisconnected(unp->unp_socket);
638 unp->unp_socket->so_pcb = NULL;
639 if (unp_rights) {
640 /*
641 * Normally the receive buffer is flushed later,
642 * in sofree, but if our receive buffer holds references
643 * to descriptors that are now garbage, we will dispose
644 * of those descriptor references after the garbage collector
645 * gets them (resulting in a "panic: closef: count < 0").
646 */
647 sorflush(unp->unp_socket);
648 unp_gc();
649 }
650 if (unp->unp_addr != NULL)
651 FREE(unp->unp_addr, M_SONAME);
652 UNP_UNLOCK();
653 uma_zfree(unp_zone, unp);
654 if (vp) {
655 mtx_lock(&Giant);
656 vrele(vp);
657 mtx_unlock(&Giant);
658 }
659}
660
661static int
662unp_bind(unp, nam, td)
663 struct unpcb *unp;
664 struct sockaddr *nam;
665 struct thread *td;
666{
667 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
668 struct vnode *vp;
669 struct mount *mp;
670 struct vattr vattr;
671 int error, namelen;
672 struct nameidata nd;
673 char *buf;
674
675 /*
676 * XXXRW: This test-and-set of unp_vnode is non-atomic; the
677 * unlocked read here is fine, but the value of unp_vnode needs
678 * to be tested again after we do all the lookups to see if the
679 * pcb is still unbound?
680 */
681 if (unp->unp_vnode != NULL)
682 return (EINVAL);
683
684 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
685 if (namelen <= 0)
686 return (EINVAL);
687
688 buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
689 strlcpy(buf, soun->sun_path, namelen + 1);
690
691 mtx_lock(&Giant);
692restart:
693 mtx_assert(&Giant, MA_OWNED);
694 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
695 buf, td);
696/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
697 error = namei(&nd);
698 if (error)
699 goto done;
700 vp = nd.ni_vp;
701 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
702 NDFREE(&nd, NDF_ONLY_PNBUF);
703 if (nd.ni_dvp == vp)
704 vrele(nd.ni_dvp);
705 else
706 vput(nd.ni_dvp);
707 if (vp != NULL) {
708 vrele(vp);
709 error = EADDRINUSE;
710 goto done;
711 }
712 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
713 if (error)
714 goto done;
715 goto restart;
716 }
717 VATTR_NULL(&vattr);
718 vattr.va_type = VSOCK;
719 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
720#ifdef MAC
721 error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
722 &vattr);
723#endif
724 if (error == 0) {
725 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
726 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
727 }
728 NDFREE(&nd, NDF_ONLY_PNBUF);
729 vput(nd.ni_dvp);
730 if (error)
731 goto done;
732 vp = nd.ni_vp;
733 ASSERT_VOP_LOCKED(vp, "unp_bind");
734 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
735 UNP_LOCK();
736 vp->v_socket = unp->unp_socket;
737 unp->unp_vnode = vp;
738 unp->unp_addr = soun;
739 UNP_UNLOCK();
740 VOP_UNLOCK(vp, 0, td);
741 vn_finished_write(mp);
742done:
743 mtx_unlock(&Giant);
744 free(buf, M_TEMP);
745 return (error);
746}
747
748static int
749unp_connect(so, nam, td)
750 struct socket *so;
751 struct sockaddr *nam;
752 struct thread *td;
753{
754 register struct sockaddr_un *soun = (struct sockaddr_un *)nam;
755 register struct vnode *vp;
756 register struct socket *so2, *so3;
757 struct unpcb *unp = sotounpcb(so);
758 struct unpcb *unp2, *unp3;
759 int error, len;
760 struct nameidata nd;
761 char buf[SOCK_MAXADDRLEN];
762 struct sockaddr *sa;
763
764 UNP_LOCK_ASSERT();
765
766 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
767 if (len <= 0)
768 return (EINVAL);
769 strlcpy(buf, soun->sun_path, len + 1);
770 UNP_UNLOCK();
771 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
772 mtx_lock(&Giant);
773 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
774 error = namei(&nd);
775 if (error)
776 vp = NULL;
777 else
778 vp = nd.ni_vp;
779 ASSERT_VOP_LOCKED(vp, "unp_connect");
780 NDFREE(&nd, NDF_ONLY_PNBUF);
781 if (error)
782 goto bad;
783
784 if (vp->v_type != VSOCK) {
785 error = ENOTSOCK;
786 goto bad;
787 }
788 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
789 if (error)
790 goto bad;
791 so2 = vp->v_socket;
792 if (so2 == NULL) {
793 error = ECONNREFUSED;
794 goto bad;
795 }
796 if (so->so_type != so2->so_type) {
797 error = EPROTOTYPE;
798 goto bad;
799 }
800 mtx_unlock(&Giant);
801 UNP_LOCK();
802 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
803 if (so2->so_options & SO_ACCEPTCONN) {
804 /*
805 * NB: drop locks here so unp_attach is entered
806 * w/o locks; this avoids a recursive lock
807 * of the head and holding sleep locks across
808 * a (potentially) blocking malloc.
809 */
810 UNP_UNLOCK();
811 so3 = sonewconn(so2, 0);
812 UNP_LOCK();
813 } else
814 so3 = NULL;
815 if (so3 == NULL) {
816 error = ECONNREFUSED;
817 goto bad2;
818 }
819 unp = sotounpcb(so);
820 unp2 = sotounpcb(so2);
821 unp3 = sotounpcb(so3);
822 if (unp2->unp_addr != NULL) {
823 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
824 unp3->unp_addr = (struct sockaddr_un *) sa;
825 sa = NULL;
826 }
827 /*
828 * unp_peercred management:
829 *
830 * The connecter's (client's) credentials are copied
831 * from its process structure at the time of connect()
832 * (which is now).
833 */
834 cru2x(td->td_ucred, &unp3->unp_peercred);
835 unp3->unp_flags |= UNP_HAVEPC;
836 /*
837 * The receiver's (server's) credentials are copied
838 * from the unp_peercred member of socket on which the
839 * former called listen(); unp_listen() cached that
840 * process's credentials at that time so we can use
841 * them now.
842 */
843 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
844 ("unp_connect: listener without cached peercred"));
845 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
846 sizeof(unp->unp_peercred));
847 unp->unp_flags |= UNP_HAVEPC;
848#ifdef MAC
849 SOCK_LOCK(so);
850 mac_set_socket_peer_from_socket(so, so3);
851 mac_set_socket_peer_from_socket(so3, so);
852 SOCK_UNLOCK(so);
853#endif
854
855 so2 = so3;
856 }
857 error = unp_connect2(so, so2);
858bad2:
859 UNP_UNLOCK();
860 mtx_lock(&Giant);
861bad:
862 mtx_assert(&Giant, MA_OWNED);
863 if (vp != NULL)
864 vput(vp);
865 mtx_unlock(&Giant);
866 free(sa, M_SONAME);
867 UNP_LOCK();
868 return (error);
869}
870
871static int
872unp_connect2(so, so2)
873 register struct socket *so;
874 register struct socket *so2;
875{
876 register struct unpcb *unp = sotounpcb(so);
877 register struct unpcb *unp2;
878
879 UNP_LOCK_ASSERT();
880
881 if (so2->so_type != so->so_type)
882 return (EPROTOTYPE);
883 unp2 = sotounpcb(so2);
884 unp->unp_conn = unp2;
885 switch (so->so_type) {
886
887 case SOCK_DGRAM:
888 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
889 soisconnected(so);
890 break;
891
892 case SOCK_STREAM:
893 unp2->unp_conn = unp;
894 soisconnected(so);
895 soisconnected(so2);
896 break;
897
898 default:
899 panic("unp_connect2");
900 }
901 return (0);
902}
903
904static void
905unp_disconnect(unp)
906 struct unpcb *unp;
907{
908 register struct unpcb *unp2 = unp->unp_conn;
909
910 UNP_LOCK_ASSERT();
911
912 if (unp2 == NULL)
913 return;
914 unp->unp_conn = NULL;
915 switch (unp->unp_socket->so_type) {
916
917 case SOCK_DGRAM:
918 LIST_REMOVE(unp, unp_reflink);
919 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
920 break;
921
922 case SOCK_STREAM:
923 soisdisconnected(unp->unp_socket);
924 unp2->unp_conn = NULL;
925 soisdisconnected(unp2->unp_socket);
926 break;
927 }
928}
929
930#ifdef notdef
931void
932unp_abort(unp)
933 struct unpcb *unp;
934{
935
936 unp_detach(unp);
937}
938#endif
939
940/*
941 * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed
942 * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers
943 * are safe to reference. It first scans the list of struct unpcb's to
944 * generate a pointer list, then it rescans its list one entry at a time to
945 * externalize and copyout. It checks the generation number to see if a
946 * struct unpcb has been reused, and will skip it if so.
947 */
948static int
949unp_pcblist(SYSCTL_HANDLER_ARGS)
950{
951 int error, i, n;
952 struct unpcb *unp, **unp_list;
953 unp_gen_t gencnt;
954 struct xunpgen *xug;
955 struct unp_head *head;
956 struct xunpcb *xu;
957
958 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
959
960 /*
961 * The process of preparing the PCB list is too time-consuming and
962 * resource-intensive to repeat twice on every request.
963 */
964 if (req->oldptr == NULL) {
965 n = unp_count;
966 req->oldidx = 2 * (sizeof *xug)
967 + (n + n/8) * sizeof(struct xunpcb);
968 return (0);
969 }
970
971 if (req->newptr != NULL)
972 return (EPERM);
973
974 /*
975 * OK, now we're committed to doing something.
976 */
977 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
978 UNP_LOCK();
979 gencnt = unp_gencnt;
980 n = unp_count;
981 UNP_UNLOCK();
982
983 xug->xug_len = sizeof *xug;
984 xug->xug_count = n;
985 xug->xug_gen = gencnt;
986 xug->xug_sogen = so_gencnt;
987 error = SYSCTL_OUT(req, xug, sizeof *xug);
988 if (error) {
989 free(xug, M_TEMP);
990 return (error);
991 }
992
993 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
994
995 UNP_LOCK();
996 for (unp = LIST_FIRST(head), i = 0; unp && i < n;
997 unp = LIST_NEXT(unp, unp_link)) {
998 if (unp->unp_gencnt <= gencnt) {
999 if (cr_cansee(req->td->td_ucred,
1000 unp->unp_socket->so_cred))
1001 continue;
1002 unp_list[i++] = unp;
1003 }
1004 }
1005 UNP_UNLOCK();
1006 n = i; /* in case we lost some during malloc */
1007
1008 error = 0;
1009 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK);
1010 for (i = 0; i < n; i++) {
1011 unp = unp_list[i];
1012 if (unp->unp_gencnt <= gencnt) {
1013 xu->xu_len = sizeof *xu;
1014 xu->xu_unpp = unp;
1015 /*
1016 * XXX - need more locking here to protect against
1017 * connect/disconnect races for SMP.
1018 */
1019 if (unp->unp_addr != NULL)
1020 bcopy(unp->unp_addr, &xu->xu_addr,
1021 unp->unp_addr->sun_len);
1022 if (unp->unp_conn != NULL &&
1023 unp->unp_conn->unp_addr != NULL)
1024 bcopy(unp->unp_conn->unp_addr,
1025 &xu->xu_caddr,
1026 unp->unp_conn->unp_addr->sun_len);
1027 bcopy(unp, &xu->xu_unp, sizeof *unp);
1028 sotoxsocket(unp->unp_socket, &xu->xu_socket);
1029 error = SYSCTL_OUT(req, xu, sizeof *xu);
1030 }
1031 }
1032 free(xu, M_TEMP);
1033 if (!error) {
1034 /*
1035 * Give the user an updated idea of our state.
1036 * If the generation differs from what we told
1037 * her before, she knows that something happened
1038 * while we were processing this request, and it
1039 * might be necessary to retry.
1040 */
1041 xug->xug_gen = unp_gencnt;
1042 xug->xug_sogen = so_gencnt;
1043 xug->xug_count = unp_count;
1044 error = SYSCTL_OUT(req, xug, sizeof *xug);
1045 }
1046 free(unp_list, M_TEMP);
1047 free(xug, M_TEMP);
1048 return (error);
1049}
1050
1051SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
1052 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1053 "List of active local datagram sockets");
1054SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
1055 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1056 "List of active local stream sockets");
1057
1058static void
1059unp_shutdown(unp)
1060 struct unpcb *unp;
1061{
1062 struct socket *so;
1063
1064 UNP_LOCK_ASSERT();
1065
1066 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
1067 (so = unp->unp_conn->unp_socket))
1068 socantrcvmore(so);
1069}
1070
1071static void
1072unp_drop(unp, errno)
1073 struct unpcb *unp;
1074 int errno;
1075{
1076 struct socket *so = unp->unp_socket;
1077
1078 UNP_LOCK_ASSERT();
1079
1080 so->so_error = errno;
1081 unp_disconnect(unp);
1082}
1083
1084#ifdef notdef
1085void
1086unp_drain()
1087{
1088
1089}
1090#endif
1091
1092static void
1093unp_freerights(rp, fdcount)
1094 struct file **rp;
1095 int fdcount;
1096{
1097 int i;
1098 struct file *fp;
1099
1100 for (i = 0; i < fdcount; i++) {
1101 fp = *rp;
1102 /*
1103 * zero the pointer before calling
1104 * unp_discard since it may end up
1105 * in unp_gc()..
1106 */
1107 *rp++ = 0;
1108 unp_discard(fp);
1109 }
1110}
1111
1112int
1113unp_externalize(control, controlp)
1114 struct mbuf *control, **controlp;
1115{
1116 struct thread *td = curthread; /* XXX */
1117 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1118 int i;
1119 int *fdp;
1120 struct file **rp;
1121 struct file *fp;
1122 void *data;
1123 socklen_t clen = control->m_len, datalen;
1124 int error, newfds;
1125 int f;
1126 u_int newlen;
1127
1128 error = 0;
1129 if (controlp != NULL) /* controlp == NULL => free control messages */
1130 *controlp = NULL;
1131
1132 while (cm != NULL) {
1133 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
1134 error = EINVAL;
1135 break;
1136 }
1137
1138 data = CMSG_DATA(cm);
1139 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1140
1141 if (cm->cmsg_level == SOL_SOCKET
1142 && cm->cmsg_type == SCM_RIGHTS) {
1143 newfds = datalen / sizeof(struct file *);
1144 rp = data;
1145
1146 /* If we're not outputting the descriptors free them. */
1147 if (error || controlp == NULL) {
1148 unp_freerights(rp, newfds);
1149 goto next;
1150 }
1151 FILEDESC_LOCK(td->td_proc->p_fd);
1152 /* if the new FD's will not fit free them. */
1153 if (!fdavail(td, newfds)) {
1154 FILEDESC_UNLOCK(td->td_proc->p_fd);
1155 error = EMSGSIZE;
1156 unp_freerights(rp, newfds);
1157 goto next;
1158 }
1159 /*
1160 * now change each pointer to an fd in the global
1161 * table to an integer that is the index to the
1162 * local fd table entry that we set up to point
1163 * to the global one we are transferring.
1164 */
1165 newlen = newfds * sizeof(int);
1166 *controlp = sbcreatecontrol(NULL, newlen,
1167 SCM_RIGHTS, SOL_SOCKET);
1168 if (*controlp == NULL) {
1169 FILEDESC_UNLOCK(td->td_proc->p_fd);
1170 error = E2BIG;
1171 unp_freerights(rp, newfds);
1172 goto next;
1173 }
1174
1175 fdp = (int *)
1176 CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1177 for (i = 0; i < newfds; i++) {
1178 if (fdalloc(td, 0, &f))
1179 panic("unp_externalize fdalloc failed");
1180 fp = *rp++;
1181 td->td_proc->p_fd->fd_ofiles[f] = fp;
1182 FILE_LOCK(fp);
1183 fp->f_msgcount--;
1184 FILE_UNLOCK(fp);
1185 unp_rights--;
1186 *fdp++ = f;
1187 }
1188 FILEDESC_UNLOCK(td->td_proc->p_fd);
1189 } else { /* We can just copy anything else across */
1190 if (error || controlp == NULL)
1191 goto next;
1192 *controlp = sbcreatecontrol(NULL, datalen,
1193 cm->cmsg_type, cm->cmsg_level);
1194 if (*controlp == NULL) {
1195 error = ENOBUFS;
1196 goto next;
1197 }
1198 bcopy(data,
1199 CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
1200 datalen);
1201 }
1202
1203 controlp = &(*controlp)->m_next;
1204
1205next:
1206 if (CMSG_SPACE(datalen) < clen) {
1207 clen -= CMSG_SPACE(datalen);
1208 cm = (struct cmsghdr *)
1209 ((caddr_t)cm + CMSG_SPACE(datalen));
1210 } else {
1211 clen = 0;
1212 cm = NULL;
1213 }
1214 }
1215
1216 m_freem(control);
1217
1218 return (error);
1219}
1220
1221void
1222unp_init(void)
1223{
1224 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
1225 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1226 if (unp_zone == NULL)
1227 panic("unp_init");
1228 uma_zone_set_max(unp_zone, nmbclusters);
1229 LIST_INIT(&unp_dhead);
1230 LIST_INIT(&unp_shead);
1231
1232 UNP_LOCK_INIT();
1233}
1234
1235static int
1236unp_internalize(controlp, td)
1237 struct mbuf **controlp;
1238 struct thread *td;
1239{
1240 struct mbuf *control = *controlp;
1241 struct proc *p = td->td_proc;
1242 struct filedesc *fdescp = p->p_fd;
1243 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1244 struct cmsgcred *cmcred;
1245 struct file **rp;
1246 struct file *fp;
1247 struct timeval *tv;
1248 int i, fd, *fdp;
1249 void *data;
1250 socklen_t clen = control->m_len, datalen;
1251 int error, oldfds;
1252 u_int newlen;
1253
1254 error = 0;
1255 *controlp = NULL;
1256
1257 while (cm != NULL) {
1258 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
1259 || cm->cmsg_len > clen) {
1260 error = EINVAL;
1261 goto out;
1262 }
1263
1264 data = CMSG_DATA(cm);
1265 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1266
1267 switch (cm->cmsg_type) {
1268 /*
1269 * Fill in credential information.
1270 */
1271 case SCM_CREDS:
1272 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
1273 SCM_CREDS, SOL_SOCKET);
1274 if (*controlp == NULL) {
1275 error = ENOBUFS;
1276 goto out;
1277 }
1278
1279 cmcred = (struct cmsgcred *)
1280 CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1281 cmcred->cmcred_pid = p->p_pid;
1282 cmcred->cmcred_uid = td->td_ucred->cr_ruid;
1283 cmcred->cmcred_gid = td->td_ucred->cr_rgid;
1284 cmcred->cmcred_euid = td->td_ucred->cr_uid;
1285 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
1286 CMGROUP_MAX);
1287 for (i = 0; i < cmcred->cmcred_ngroups; i++)
1288 cmcred->cmcred_groups[i] =
1289 td->td_ucred->cr_groups[i];
1290 break;
1291
1292 case SCM_RIGHTS:
1293 oldfds = datalen / sizeof (int);
1294 /*
1295 * check that all the FDs passed in refer to legal files
1296 * If not, reject the entire operation.
1297 */
1298 fdp = data;
1299 FILEDESC_LOCK(fdescp);
1300 for (i = 0; i < oldfds; i++) {
1301 fd = *fdp++;
1302 if ((unsigned)fd >= fdescp->fd_nfiles ||
1303 fdescp->fd_ofiles[fd] == NULL) {
1304 FILEDESC_UNLOCK(fdescp);
1305 error = EBADF;
1306 goto out;
1307 }
1308 fp = fdescp->fd_ofiles[fd];
1309 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
1310 FILEDESC_UNLOCK(fdescp);
1311 error = EOPNOTSUPP;
1312 goto out;
1313 }
1314
1315 }
1316 /*
1317 * Now replace the integer FDs with pointers to
1318 * the associated global file table entry..
1319 */
1320 newlen = oldfds * sizeof(struct file *);
1321 *controlp = sbcreatecontrol(NULL, newlen,
1322 SCM_RIGHTS, SOL_SOCKET);
1323 if (*controlp == NULL) {
1324 FILEDESC_UNLOCK(fdescp);
1325 error = E2BIG;
1326 goto out;
1327 }
1328
1329 fdp = data;
1330 rp = (struct file **)
1331 CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1332 for (i = 0; i < oldfds; i++) {
1333 fp = fdescp->fd_ofiles[*fdp++];
1334 *rp++ = fp;
1335 FILE_LOCK(fp);
1336 fp->f_count++;
1337 fp->f_msgcount++;
1338 FILE_UNLOCK(fp);
1339 unp_rights++;
1340 }
1341 FILEDESC_UNLOCK(fdescp);
1342 break;
1343
1344 case SCM_TIMESTAMP:
1345 *controlp = sbcreatecontrol(NULL, sizeof(*tv),
1346 SCM_TIMESTAMP, SOL_SOCKET);
1347 if (*controlp == NULL) {
1348 error = ENOBUFS;
1349 goto out;
1350 }
1351 tv = (struct timeval *)
1352 CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1353 microtime(tv);
1354 break;
1355
1356 default:
1357 error = EINVAL;
1358 goto out;
1359 }
1360
1361 controlp = &(*controlp)->m_next;
1362
1363 if (CMSG_SPACE(datalen) < clen) {
1364 clen -= CMSG_SPACE(datalen);
1365 cm = (struct cmsghdr *)
1366 ((caddr_t)cm + CMSG_SPACE(datalen));
1367 } else {
1368 clen = 0;
1369 cm = NULL;
1370 }
1371 }
1372
1373out:
1374 m_freem(control);
1375
1376 return (error);
1377}
1378
1379static int unp_defer, unp_gcing;
1380
1381static void
1382unp_gc()
1383{
1384 register struct file *fp, *nextfp;
1385 register struct socket *so;
1386 struct file **extra_ref, **fpp;
1387 int nunref, i;
1388
1389 UNP_LOCK_ASSERT();
1390
1391 if (unp_gcing)
1392 return;
1393 unp_gcing = 1;
1394 unp_defer = 0;
1395 /*
1396 * before going through all this, set all FDs to
1397 * be NOT defered and NOT externally accessible
1398 */
1399 /*
1400 * XXXRW: Acquiring a sleep lock while holding UNP
1401 * mutex cannot be a good thing.
1402 */
1403 sx_slock(&filelist_lock);
1404 LIST_FOREACH(fp, &filehead, f_list)
1405 fp->f_gcflag &= ~(FMARK|FDEFER);
1406 do {
1407 LIST_FOREACH(fp, &filehead, f_list) {
1408 FILE_LOCK(fp);
1409 /*
1410 * If the file is not open, skip it
1411 */
1412 if (fp->f_count == 0) {
1413 FILE_UNLOCK(fp);
1414 continue;
1415 }
1416 /*
1417 * If we already marked it as 'defer' in a
1418 * previous pass, then try process it this time
1419 * and un-mark it
1420 */
1421 if (fp->f_gcflag & FDEFER) {
1422 fp->f_gcflag &= ~FDEFER;
1423 unp_defer--;
1424 } else {
1425 /*
1426 * if it's not defered, then check if it's
1427 * already marked.. if so skip it
1428 */
1429 if (fp->f_gcflag & FMARK) {
1430 FILE_UNLOCK(fp);
1431 continue;
1432 }
1433 /*
1434 * If all references are from messages
1435 * in transit, then skip it. it's not
1436 * externally accessible.
1437 */
1438 if (fp->f_count == fp->f_msgcount) {
1439 FILE_UNLOCK(fp);
1440 continue;
1441 }
1442 /*
1443 * If it got this far then it must be
1444 * externally accessible.
1445 */
1446 fp->f_gcflag |= FMARK;
1447 }
1448 /*
1449 * either it was defered, or it is externally
1450 * accessible and not already marked so.
1451 * Now check if it is possibly one of OUR sockets.
1452 */
1453 if (fp->f_type != DTYPE_SOCKET ||
1454 (so = fp->f_data) == NULL) {
1455 FILE_UNLOCK(fp);
1456 continue;
1457 }
1458 FILE_UNLOCK(fp);
1459 if (so->so_proto->pr_domain != &localdomain ||
1460 (so->so_proto->pr_flags&PR_RIGHTS) == 0)
1461 continue;
1462#ifdef notdef
1463 if (so->so_rcv.sb_flags & SB_LOCK) {
1464 /*
1465 * This is problematical; it's not clear
1466 * we need to wait for the sockbuf to be
1467 * unlocked (on a uniprocessor, at least),
1468 * and it's also not clear what to do
1469 * if sbwait returns an error due to receipt
1470 * of a signal. If sbwait does return
1471 * an error, we'll go into an infinite
1472 * loop. Delete all of this for now.
1473 */
1474 (void) sbwait(&so->so_rcv);
1475 goto restart;
1476 }
1477#endif
1478 /*
1479 * So, Ok, it's one of our sockets and it IS externally
1480 * accessible (or was defered). Now we look
1481 * to see if we hold any file descriptors in its
1482 * message buffers. Follow those links and mark them
1483 * as accessible too.
1484 */
1485 unp_scan(so->so_rcv.sb_mb, unp_mark);
1486 }
1487 } while (unp_defer);
1488 sx_sunlock(&filelist_lock);
1489 /*
1490 * We grab an extra reference to each of the file table entries
1491 * that are not otherwise accessible and then free the rights
1492 * that are stored in messages on them.
1493 *
1494 * The bug in the orginal code is a little tricky, so I'll describe
1495 * what's wrong with it here.
1496 *
1497 * It is incorrect to simply unp_discard each entry for f_msgcount
1498 * times -- consider the case of sockets A and B that contain
1499 * references to each other. On a last close of some other socket,
1500 * we trigger a gc since the number of outstanding rights (unp_rights)
1501 * is non-zero. If during the sweep phase the gc code un_discards,
1502 * we end up doing a (full) closef on the descriptor. A closef on A
1503 * results in the following chain. Closef calls soo_close, which
1504 * calls soclose. Soclose calls first (through the switch
1505 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
1506 * returns because the previous instance had set unp_gcing, and
1507 * we return all the way back to soclose, which marks the socket
1508 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
1509 * to free up the rights that are queued in messages on the socket A,
1510 * i.e., the reference on B. The sorflush calls via the dom_dispose
1511 * switch unp_dispose, which unp_scans with unp_discard. This second
1512 * instance of unp_discard just calls closef on B.
1513 *
1514 * Well, a similar chain occurs on B, resulting in a sorflush on B,
1515 * which results in another closef on A. Unfortunately, A is already
1516 * being closed, and the descriptor has already been marked with
1517 * SS_NOFDREF, and soclose panics at this point.
1518 *
1519 * Here, we first take an extra reference to each inaccessible
1520 * descriptor. Then, we call sorflush ourself, since we know
1521 * it is a Unix domain socket anyhow. After we destroy all the
1522 * rights carried in messages, we do a last closef to get rid
1523 * of our extra reference. This is the last close, and the
1524 * unp_detach etc will shut down the socket.
1525 *
1526 * 91/09/19, bsy@cs.cmu.edu
1527 */
1528 extra_ref = malloc(nfiles * sizeof(struct file *), M_TEMP, M_WAITOK);
1529 sx_slock(&filelist_lock);
1530 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
1531 fp != NULL; fp = nextfp) {
1532 nextfp = LIST_NEXT(fp, f_list);
1533 FILE_LOCK(fp);
1534 /*
1535 * If it's not open, skip it
1536 */
1537 if (fp->f_count == 0) {
1538 FILE_UNLOCK(fp);
1539 continue;
1540 }
1541 /*
1542 * If all refs are from msgs, and it's not marked accessible
1543 * then it must be referenced from some unreachable cycle
1544 * of (shut-down) FDs, so include it in our
1545 * list of FDs to remove
1546 */
1547 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
1548 *fpp++ = fp;
1549 nunref++;
1550 fp->f_count++;
1551 }
1552 FILE_UNLOCK(fp);
1553 }
1554 sx_sunlock(&filelist_lock);
1555 /*
1556 * for each FD on our hit list, do the following two things
1557 */
1558 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1559 struct file *tfp = *fpp;
1560 FILE_LOCK(tfp);
1561 if (tfp->f_type == DTYPE_SOCKET &&
1562 tfp->f_data != NULL) {
1563 FILE_UNLOCK(tfp);
1564 sorflush(tfp->f_data);
1565 } else {
1566 FILE_UNLOCK(tfp);
1567 }
1568 }
1569 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
1570 closef(*fpp, (struct thread *) NULL);
1571 free(extra_ref, M_TEMP);
1572 unp_gcing = 0;
1573}
1574
1575void
1576unp_dispose(m)
1577 struct mbuf *m;
1578{
1579
1580 if (m)
1581 unp_scan(m, unp_discard);
1582}
1583
1584static int
1585unp_listen(unp, td)
1586 struct unpcb *unp;
1587 struct thread *td;
1588{
1589 UNP_LOCK_ASSERT();
1590
1591 /*
1592 * XXXRW: Why populate the local peer cred with our own credential?
1593 */
1594 cru2x(td->td_ucred, &unp->unp_peercred);
1595 unp->unp_flags |= UNP_HAVEPCCACHED;
1596 return (0);
1597}
1598
1599static void
1600unp_scan(m0, op)
1601 register struct mbuf *m0;
1602 void (*op)(struct file *);
1603{
1604 struct mbuf *m;
1605 struct file **rp;
1606 struct cmsghdr *cm;
1607 void *data;
1608 int i;
1609 socklen_t clen, datalen;
1610 int qfds;
1611
1612 while (m0 != NULL) {
1613 for (m = m0; m; m = m->m_next) {
1614 if (m->m_type != MT_CONTROL)
1615 continue;
1616
1617 cm = mtod(m, struct cmsghdr *);
1618 clen = m->m_len;
1619
1620 while (cm != NULL) {
1621 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
1622 break;
1623
1624 data = CMSG_DATA(cm);
1625 datalen = (caddr_t)cm + cm->cmsg_len
1626 - (caddr_t)data;
1627
1628 if (cm->cmsg_level == SOL_SOCKET &&
1629 cm->cmsg_type == SCM_RIGHTS) {
1630 qfds = datalen / sizeof (struct file *);
1631 rp = data;
1632 for (i = 0; i < qfds; i++)
1633 (*op)(*rp++);
1634 }
1635
1636 if (CMSG_SPACE(datalen) < clen) {
1637 clen -= CMSG_SPACE(datalen);
1638 cm = (struct cmsghdr *)
1639 ((caddr_t)cm + CMSG_SPACE(datalen));
1640 } else {
1641 clen = 0;
1642 cm = NULL;
1643 }
1644 }
1645 }
1646 m0 = m0->m_act;
1647 }
1648}
1649
1650static void
1651unp_mark(fp)
1652 struct file *fp;
1653{
1654 if (fp->f_gcflag & FMARK)
1655 return;
1656 unp_defer++;
1657 fp->f_gcflag |= (FMARK|FDEFER);
1658}
1659
1660static void
1661unp_discard(fp)
1662 struct file *fp;
1663{
1664 FILE_LOCK(fp);
1665 fp->f_msgcount--;
1666 unp_rights--;
1667 FILE_UNLOCK(fp);
1668 (void) closef(fp, (struct thread *)NULL);
1669}
382 error = EPIPE;
383 break;
384 }
385 if (unp->unp_conn == NULL)
386 panic("uipc_send connected but no connection?");
387 so2 = unp->unp_conn->unp_socket;
388 /*
389 * Send to paired receive port, and then reduce
390 * send buffer hiwater marks to maintain backpressure.
391 * Wake up readers.
392 */
393 if (control != NULL) {
394 if (sbappendcontrol(&so2->so_rcv, m, control))
395 control = NULL;
396 } else {
397 sbappend(&so2->so_rcv, m);
398 }
399 so->so_snd.sb_mbmax -=
400 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt;
401 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt;
402 newhiwat = so->so_snd.sb_hiwat -
403 (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc);
404 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat,
405 newhiwat, RLIM_INFINITY);
406 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc;
407 sorwakeup(so2);
408 m = NULL;
409 break;
410
411 default:
412 panic("uipc_send unknown socktype");
413 }
414
415 /*
416 * SEND_EOF is equivalent to a SEND followed by
417 * a SHUTDOWN.
418 */
419 if (flags & PRUS_EOF) {
420 socantsendmore(so);
421 unp_shutdown(unp);
422 }
423 UNP_UNLOCK();
424
425 if (control != NULL && error != 0)
426 unp_dispose(control);
427
428release:
429 if (control != NULL)
430 m_freem(control);
431 if (m != NULL)
432 m_freem(m);
433 return (error);
434}
435
436static int
437uipc_sense(struct socket *so, struct stat *sb)
438{
439 struct unpcb *unp = sotounpcb(so);
440 struct socket *so2;
441
442 if (unp == NULL)
443 return (EINVAL);
444 UNP_LOCK();
445 sb->st_blksize = so->so_snd.sb_hiwat;
446 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) {
447 so2 = unp->unp_conn->unp_socket;
448 sb->st_blksize += so2->so_rcv.sb_cc;
449 }
450 sb->st_dev = NOUDEV;
451 if (unp->unp_ino == 0)
452 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino;
453 sb->st_ino = unp->unp_ino;
454 UNP_UNLOCK();
455 return (0);
456}
457
458static int
459uipc_shutdown(struct socket *so)
460{
461 struct unpcb *unp = sotounpcb(so);
462
463 if (unp == NULL)
464 return (EINVAL);
465 UNP_LOCK();
466 socantsendmore(so);
467 unp_shutdown(unp);
468 UNP_UNLOCK();
469 return (0);
470}
471
472static int
473uipc_sockaddr(struct socket *so, struct sockaddr **nam)
474{
475 struct unpcb *unp = sotounpcb(so);
476 const struct sockaddr *sa;
477
478 if (unp == NULL)
479 return (EINVAL);
480 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
481 UNP_LOCK();
482 if (unp->unp_addr != NULL)
483 sa = (struct sockaddr *) unp->unp_addr;
484 else
485 sa = &sun_noname;
486 bcopy(sa, *nam, sa->sa_len);
487 UNP_UNLOCK();
488 return (0);
489}
490
491struct pr_usrreqs uipc_usrreqs = {
492 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
493 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
494 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
495 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
496 sosend, soreceive, sopoll, pru_sosetlabel_null
497};
498
499int
500uipc_ctloutput(so, sopt)
501 struct socket *so;
502 struct sockopt *sopt;
503{
504 struct unpcb *unp = sotounpcb(so);
505 struct xucred xu;
506 int error;
507
508 switch (sopt->sopt_dir) {
509 case SOPT_GET:
510 switch (sopt->sopt_name) {
511 case LOCAL_PEERCRED:
512 error = 0;
513 UNP_LOCK();
514 if (unp->unp_flags & UNP_HAVEPC)
515 xu = unp->unp_peercred;
516 else {
517 if (so->so_type == SOCK_STREAM)
518 error = ENOTCONN;
519 else
520 error = EINVAL;
521 }
522 UNP_UNLOCK();
523 if (error == 0)
524 error = sooptcopyout(sopt, &xu, sizeof(xu));
525 break;
526 default:
527 error = EOPNOTSUPP;
528 break;
529 }
530 break;
531 case SOPT_SET:
532 default:
533 error = EOPNOTSUPP;
534 break;
535 }
536 return (error);
537}
538
539/*
540 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
541 * for stream sockets, although the total for sender and receiver is
542 * actually only PIPSIZ.
543 * Datagram sockets really use the sendspace as the maximum datagram size,
544 * and don't really want to reserve the sendspace. Their recvspace should
545 * be large enough for at least one max-size datagram plus address.
546 */
547#ifndef PIPSIZ
548#define PIPSIZ 8192
549#endif
550static u_long unpst_sendspace = PIPSIZ;
551static u_long unpst_recvspace = PIPSIZ;
552static u_long unpdg_sendspace = 2*1024; /* really max datagram size */
553static u_long unpdg_recvspace = 4*1024;
554
555static int unp_rights; /* file descriptors in flight */
556
557SYSCTL_DECL(_net_local_stream);
558SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
559 &unpst_sendspace, 0, "");
560SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
561 &unpst_recvspace, 0, "");
562SYSCTL_DECL(_net_local_dgram);
563SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
564 &unpdg_sendspace, 0, "");
565SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
566 &unpdg_recvspace, 0, "");
567SYSCTL_DECL(_net_local);
568SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
569
570static int
571unp_attach(so)
572 struct socket *so;
573{
574 register struct unpcb *unp;
575 int error;
576
577 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
578 switch (so->so_type) {
579
580 case SOCK_STREAM:
581 error = soreserve(so, unpst_sendspace, unpst_recvspace);
582 break;
583
584 case SOCK_DGRAM:
585 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
586 break;
587
588 default:
589 panic("unp_attach");
590 }
591 if (error)
592 return (error);
593 }
594 unp = uma_zalloc(unp_zone, M_WAITOK);
595 if (unp == NULL)
596 return (ENOBUFS);
597 bzero(unp, sizeof *unp);
598 LIST_INIT(&unp->unp_refs);
599 unp->unp_socket = so;
600
601 UNP_LOCK();
602 unp->unp_gencnt = ++unp_gencnt;
603 unp_count++;
604 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
605 : &unp_shead, unp, unp_link);
606 UNP_UNLOCK();
607
608 so->so_pcb = unp;
609 return (0);
610}
611
612static void
613unp_detach(unp)
614 register struct unpcb *unp;
615{
616 struct vnode *vp;
617
618 UNP_LOCK_ASSERT();
619
620 LIST_REMOVE(unp, unp_link);
621 unp->unp_gencnt = ++unp_gencnt;
622 --unp_count;
623 if ((vp = unp->unp_vnode) != NULL) {
624 /*
625 * XXXRW: should v_socket be frobbed only while holding
626 * Giant?
627 */
628 unp->unp_vnode->v_socket = NULL;
629 unp->unp_vnode = NULL;
630 }
631 if (unp->unp_conn != NULL)
632 unp_disconnect(unp);
633 while (!LIST_EMPTY(&unp->unp_refs)) {
634 struct unpcb *ref = LIST_FIRST(&unp->unp_refs);
635 unp_drop(ref, ECONNRESET);
636 }
637 soisdisconnected(unp->unp_socket);
638 unp->unp_socket->so_pcb = NULL;
639 if (unp_rights) {
640 /*
641 * Normally the receive buffer is flushed later,
642 * in sofree, but if our receive buffer holds references
643 * to descriptors that are now garbage, we will dispose
644 * of those descriptor references after the garbage collector
645 * gets them (resulting in a "panic: closef: count < 0").
646 */
647 sorflush(unp->unp_socket);
648 unp_gc();
649 }
650 if (unp->unp_addr != NULL)
651 FREE(unp->unp_addr, M_SONAME);
652 UNP_UNLOCK();
653 uma_zfree(unp_zone, unp);
654 if (vp) {
655 mtx_lock(&Giant);
656 vrele(vp);
657 mtx_unlock(&Giant);
658 }
659}
660
661static int
662unp_bind(unp, nam, td)
663 struct unpcb *unp;
664 struct sockaddr *nam;
665 struct thread *td;
666{
667 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
668 struct vnode *vp;
669 struct mount *mp;
670 struct vattr vattr;
671 int error, namelen;
672 struct nameidata nd;
673 char *buf;
674
675 /*
676 * XXXRW: This test-and-set of unp_vnode is non-atomic; the
677 * unlocked read here is fine, but the value of unp_vnode needs
678 * to be tested again after we do all the lookups to see if the
679 * pcb is still unbound?
680 */
681 if (unp->unp_vnode != NULL)
682 return (EINVAL);
683
684 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
685 if (namelen <= 0)
686 return (EINVAL);
687
688 buf = malloc(namelen + 1, M_TEMP, M_WAITOK);
689 strlcpy(buf, soun->sun_path, namelen + 1);
690
691 mtx_lock(&Giant);
692restart:
693 mtx_assert(&Giant, MA_OWNED);
694 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE,
695 buf, td);
696/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
697 error = namei(&nd);
698 if (error)
699 goto done;
700 vp = nd.ni_vp;
701 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) {
702 NDFREE(&nd, NDF_ONLY_PNBUF);
703 if (nd.ni_dvp == vp)
704 vrele(nd.ni_dvp);
705 else
706 vput(nd.ni_dvp);
707 if (vp != NULL) {
708 vrele(vp);
709 error = EADDRINUSE;
710 goto done;
711 }
712 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH);
713 if (error)
714 goto done;
715 goto restart;
716 }
717 VATTR_NULL(&vattr);
718 vattr.va_type = VSOCK;
719 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask);
720#ifdef MAC
721 error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
722 &vattr);
723#endif
724 if (error == 0) {
725 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE);
726 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
727 }
728 NDFREE(&nd, NDF_ONLY_PNBUF);
729 vput(nd.ni_dvp);
730 if (error)
731 goto done;
732 vp = nd.ni_vp;
733 ASSERT_VOP_LOCKED(vp, "unp_bind");
734 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK);
735 UNP_LOCK();
736 vp->v_socket = unp->unp_socket;
737 unp->unp_vnode = vp;
738 unp->unp_addr = soun;
739 UNP_UNLOCK();
740 VOP_UNLOCK(vp, 0, td);
741 vn_finished_write(mp);
742done:
743 mtx_unlock(&Giant);
744 free(buf, M_TEMP);
745 return (error);
746}
747
748static int
749unp_connect(so, nam, td)
750 struct socket *so;
751 struct sockaddr *nam;
752 struct thread *td;
753{
754 register struct sockaddr_un *soun = (struct sockaddr_un *)nam;
755 register struct vnode *vp;
756 register struct socket *so2, *so3;
757 struct unpcb *unp = sotounpcb(so);
758 struct unpcb *unp2, *unp3;
759 int error, len;
760 struct nameidata nd;
761 char buf[SOCK_MAXADDRLEN];
762 struct sockaddr *sa;
763
764 UNP_LOCK_ASSERT();
765
766 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
767 if (len <= 0)
768 return (EINVAL);
769 strlcpy(buf, soun->sun_path, len + 1);
770 UNP_UNLOCK();
771 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK);
772 mtx_lock(&Giant);
773 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td);
774 error = namei(&nd);
775 if (error)
776 vp = NULL;
777 else
778 vp = nd.ni_vp;
779 ASSERT_VOP_LOCKED(vp, "unp_connect");
780 NDFREE(&nd, NDF_ONLY_PNBUF);
781 if (error)
782 goto bad;
783
784 if (vp->v_type != VSOCK) {
785 error = ENOTSOCK;
786 goto bad;
787 }
788 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td);
789 if (error)
790 goto bad;
791 so2 = vp->v_socket;
792 if (so2 == NULL) {
793 error = ECONNREFUSED;
794 goto bad;
795 }
796 if (so->so_type != so2->so_type) {
797 error = EPROTOTYPE;
798 goto bad;
799 }
800 mtx_unlock(&Giant);
801 UNP_LOCK();
802 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
803 if (so2->so_options & SO_ACCEPTCONN) {
804 /*
805 * NB: drop locks here so unp_attach is entered
806 * w/o locks; this avoids a recursive lock
807 * of the head and holding sleep locks across
808 * a (potentially) blocking malloc.
809 */
810 UNP_UNLOCK();
811 so3 = sonewconn(so2, 0);
812 UNP_LOCK();
813 } else
814 so3 = NULL;
815 if (so3 == NULL) {
816 error = ECONNREFUSED;
817 goto bad2;
818 }
819 unp = sotounpcb(so);
820 unp2 = sotounpcb(so2);
821 unp3 = sotounpcb(so3);
822 if (unp2->unp_addr != NULL) {
823 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len);
824 unp3->unp_addr = (struct sockaddr_un *) sa;
825 sa = NULL;
826 }
827 /*
828 * unp_peercred management:
829 *
830 * The connecter's (client's) credentials are copied
831 * from its process structure at the time of connect()
832 * (which is now).
833 */
834 cru2x(td->td_ucred, &unp3->unp_peercred);
835 unp3->unp_flags |= UNP_HAVEPC;
836 /*
837 * The receiver's (server's) credentials are copied
838 * from the unp_peercred member of socket on which the
839 * former called listen(); unp_listen() cached that
840 * process's credentials at that time so we can use
841 * them now.
842 */
843 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED,
844 ("unp_connect: listener without cached peercred"));
845 memcpy(&unp->unp_peercred, &unp2->unp_peercred,
846 sizeof(unp->unp_peercred));
847 unp->unp_flags |= UNP_HAVEPC;
848#ifdef MAC
849 SOCK_LOCK(so);
850 mac_set_socket_peer_from_socket(so, so3);
851 mac_set_socket_peer_from_socket(so3, so);
852 SOCK_UNLOCK(so);
853#endif
854
855 so2 = so3;
856 }
857 error = unp_connect2(so, so2);
858bad2:
859 UNP_UNLOCK();
860 mtx_lock(&Giant);
861bad:
862 mtx_assert(&Giant, MA_OWNED);
863 if (vp != NULL)
864 vput(vp);
865 mtx_unlock(&Giant);
866 free(sa, M_SONAME);
867 UNP_LOCK();
868 return (error);
869}
870
871static int
872unp_connect2(so, so2)
873 register struct socket *so;
874 register struct socket *so2;
875{
876 register struct unpcb *unp = sotounpcb(so);
877 register struct unpcb *unp2;
878
879 UNP_LOCK_ASSERT();
880
881 if (so2->so_type != so->so_type)
882 return (EPROTOTYPE);
883 unp2 = sotounpcb(so2);
884 unp->unp_conn = unp2;
885 switch (so->so_type) {
886
887 case SOCK_DGRAM:
888 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
889 soisconnected(so);
890 break;
891
892 case SOCK_STREAM:
893 unp2->unp_conn = unp;
894 soisconnected(so);
895 soisconnected(so2);
896 break;
897
898 default:
899 panic("unp_connect2");
900 }
901 return (0);
902}
903
904static void
905unp_disconnect(unp)
906 struct unpcb *unp;
907{
908 register struct unpcb *unp2 = unp->unp_conn;
909
910 UNP_LOCK_ASSERT();
911
912 if (unp2 == NULL)
913 return;
914 unp->unp_conn = NULL;
915 switch (unp->unp_socket->so_type) {
916
917 case SOCK_DGRAM:
918 LIST_REMOVE(unp, unp_reflink);
919 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
920 break;
921
922 case SOCK_STREAM:
923 soisdisconnected(unp->unp_socket);
924 unp2->unp_conn = NULL;
925 soisdisconnected(unp2->unp_socket);
926 break;
927 }
928}
929
930#ifdef notdef
931void
932unp_abort(unp)
933 struct unpcb *unp;
934{
935
936 unp_detach(unp);
937}
938#endif
939
940/*
941 * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed
942 * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers
943 * are safe to reference. It first scans the list of struct unpcb's to
944 * generate a pointer list, then it rescans its list one entry at a time to
945 * externalize and copyout. It checks the generation number to see if a
946 * struct unpcb has been reused, and will skip it if so.
947 */
948static int
949unp_pcblist(SYSCTL_HANDLER_ARGS)
950{
951 int error, i, n;
952 struct unpcb *unp, **unp_list;
953 unp_gen_t gencnt;
954 struct xunpgen *xug;
955 struct unp_head *head;
956 struct xunpcb *xu;
957
958 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
959
960 /*
961 * The process of preparing the PCB list is too time-consuming and
962 * resource-intensive to repeat twice on every request.
963 */
964 if (req->oldptr == NULL) {
965 n = unp_count;
966 req->oldidx = 2 * (sizeof *xug)
967 + (n + n/8) * sizeof(struct xunpcb);
968 return (0);
969 }
970
971 if (req->newptr != NULL)
972 return (EPERM);
973
974 /*
975 * OK, now we're committed to doing something.
976 */
977 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK);
978 UNP_LOCK();
979 gencnt = unp_gencnt;
980 n = unp_count;
981 UNP_UNLOCK();
982
983 xug->xug_len = sizeof *xug;
984 xug->xug_count = n;
985 xug->xug_gen = gencnt;
986 xug->xug_sogen = so_gencnt;
987 error = SYSCTL_OUT(req, xug, sizeof *xug);
988 if (error) {
989 free(xug, M_TEMP);
990 return (error);
991 }
992
993 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
994
995 UNP_LOCK();
996 for (unp = LIST_FIRST(head), i = 0; unp && i < n;
997 unp = LIST_NEXT(unp, unp_link)) {
998 if (unp->unp_gencnt <= gencnt) {
999 if (cr_cansee(req->td->td_ucred,
1000 unp->unp_socket->so_cred))
1001 continue;
1002 unp_list[i++] = unp;
1003 }
1004 }
1005 UNP_UNLOCK();
1006 n = i; /* in case we lost some during malloc */
1007
1008 error = 0;
1009 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK);
1010 for (i = 0; i < n; i++) {
1011 unp = unp_list[i];
1012 if (unp->unp_gencnt <= gencnt) {
1013 xu->xu_len = sizeof *xu;
1014 xu->xu_unpp = unp;
1015 /*
1016 * XXX - need more locking here to protect against
1017 * connect/disconnect races for SMP.
1018 */
1019 if (unp->unp_addr != NULL)
1020 bcopy(unp->unp_addr, &xu->xu_addr,
1021 unp->unp_addr->sun_len);
1022 if (unp->unp_conn != NULL &&
1023 unp->unp_conn->unp_addr != NULL)
1024 bcopy(unp->unp_conn->unp_addr,
1025 &xu->xu_caddr,
1026 unp->unp_conn->unp_addr->sun_len);
1027 bcopy(unp, &xu->xu_unp, sizeof *unp);
1028 sotoxsocket(unp->unp_socket, &xu->xu_socket);
1029 error = SYSCTL_OUT(req, xu, sizeof *xu);
1030 }
1031 }
1032 free(xu, M_TEMP);
1033 if (!error) {
1034 /*
1035 * Give the user an updated idea of our state.
1036 * If the generation differs from what we told
1037 * her before, she knows that something happened
1038 * while we were processing this request, and it
1039 * might be necessary to retry.
1040 */
1041 xug->xug_gen = unp_gencnt;
1042 xug->xug_sogen = so_gencnt;
1043 xug->xug_count = unp_count;
1044 error = SYSCTL_OUT(req, xug, sizeof *xug);
1045 }
1046 free(unp_list, M_TEMP);
1047 free(xug, M_TEMP);
1048 return (error);
1049}
1050
1051SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
1052 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
1053 "List of active local datagram sockets");
1054SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
1055 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
1056 "List of active local stream sockets");
1057
1058static void
1059unp_shutdown(unp)
1060 struct unpcb *unp;
1061{
1062 struct socket *so;
1063
1064 UNP_LOCK_ASSERT();
1065
1066 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
1067 (so = unp->unp_conn->unp_socket))
1068 socantrcvmore(so);
1069}
1070
1071static void
1072unp_drop(unp, errno)
1073 struct unpcb *unp;
1074 int errno;
1075{
1076 struct socket *so = unp->unp_socket;
1077
1078 UNP_LOCK_ASSERT();
1079
1080 so->so_error = errno;
1081 unp_disconnect(unp);
1082}
1083
1084#ifdef notdef
1085void
1086unp_drain()
1087{
1088
1089}
1090#endif
1091
1092static void
1093unp_freerights(rp, fdcount)
1094 struct file **rp;
1095 int fdcount;
1096{
1097 int i;
1098 struct file *fp;
1099
1100 for (i = 0; i < fdcount; i++) {
1101 fp = *rp;
1102 /*
1103 * zero the pointer before calling
1104 * unp_discard since it may end up
1105 * in unp_gc()..
1106 */
1107 *rp++ = 0;
1108 unp_discard(fp);
1109 }
1110}
1111
1112int
1113unp_externalize(control, controlp)
1114 struct mbuf *control, **controlp;
1115{
1116 struct thread *td = curthread; /* XXX */
1117 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1118 int i;
1119 int *fdp;
1120 struct file **rp;
1121 struct file *fp;
1122 void *data;
1123 socklen_t clen = control->m_len, datalen;
1124 int error, newfds;
1125 int f;
1126 u_int newlen;
1127
1128 error = 0;
1129 if (controlp != NULL) /* controlp == NULL => free control messages */
1130 *controlp = NULL;
1131
1132 while (cm != NULL) {
1133 if (sizeof(*cm) > clen || cm->cmsg_len > clen) {
1134 error = EINVAL;
1135 break;
1136 }
1137
1138 data = CMSG_DATA(cm);
1139 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1140
1141 if (cm->cmsg_level == SOL_SOCKET
1142 && cm->cmsg_type == SCM_RIGHTS) {
1143 newfds = datalen / sizeof(struct file *);
1144 rp = data;
1145
1146 /* If we're not outputting the descriptors free them. */
1147 if (error || controlp == NULL) {
1148 unp_freerights(rp, newfds);
1149 goto next;
1150 }
1151 FILEDESC_LOCK(td->td_proc->p_fd);
1152 /* if the new FD's will not fit free them. */
1153 if (!fdavail(td, newfds)) {
1154 FILEDESC_UNLOCK(td->td_proc->p_fd);
1155 error = EMSGSIZE;
1156 unp_freerights(rp, newfds);
1157 goto next;
1158 }
1159 /*
1160 * now change each pointer to an fd in the global
1161 * table to an integer that is the index to the
1162 * local fd table entry that we set up to point
1163 * to the global one we are transferring.
1164 */
1165 newlen = newfds * sizeof(int);
1166 *controlp = sbcreatecontrol(NULL, newlen,
1167 SCM_RIGHTS, SOL_SOCKET);
1168 if (*controlp == NULL) {
1169 FILEDESC_UNLOCK(td->td_proc->p_fd);
1170 error = E2BIG;
1171 unp_freerights(rp, newfds);
1172 goto next;
1173 }
1174
1175 fdp = (int *)
1176 CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1177 for (i = 0; i < newfds; i++) {
1178 if (fdalloc(td, 0, &f))
1179 panic("unp_externalize fdalloc failed");
1180 fp = *rp++;
1181 td->td_proc->p_fd->fd_ofiles[f] = fp;
1182 FILE_LOCK(fp);
1183 fp->f_msgcount--;
1184 FILE_UNLOCK(fp);
1185 unp_rights--;
1186 *fdp++ = f;
1187 }
1188 FILEDESC_UNLOCK(td->td_proc->p_fd);
1189 } else { /* We can just copy anything else across */
1190 if (error || controlp == NULL)
1191 goto next;
1192 *controlp = sbcreatecontrol(NULL, datalen,
1193 cm->cmsg_type, cm->cmsg_level);
1194 if (*controlp == NULL) {
1195 error = ENOBUFS;
1196 goto next;
1197 }
1198 bcopy(data,
1199 CMSG_DATA(mtod(*controlp, struct cmsghdr *)),
1200 datalen);
1201 }
1202
1203 controlp = &(*controlp)->m_next;
1204
1205next:
1206 if (CMSG_SPACE(datalen) < clen) {
1207 clen -= CMSG_SPACE(datalen);
1208 cm = (struct cmsghdr *)
1209 ((caddr_t)cm + CMSG_SPACE(datalen));
1210 } else {
1211 clen = 0;
1212 cm = NULL;
1213 }
1214 }
1215
1216 m_freem(control);
1217
1218 return (error);
1219}
1220
1221void
1222unp_init(void)
1223{
1224 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL,
1225 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
1226 if (unp_zone == NULL)
1227 panic("unp_init");
1228 uma_zone_set_max(unp_zone, nmbclusters);
1229 LIST_INIT(&unp_dhead);
1230 LIST_INIT(&unp_shead);
1231
1232 UNP_LOCK_INIT();
1233}
1234
1235static int
1236unp_internalize(controlp, td)
1237 struct mbuf **controlp;
1238 struct thread *td;
1239{
1240 struct mbuf *control = *controlp;
1241 struct proc *p = td->td_proc;
1242 struct filedesc *fdescp = p->p_fd;
1243 struct cmsghdr *cm = mtod(control, struct cmsghdr *);
1244 struct cmsgcred *cmcred;
1245 struct file **rp;
1246 struct file *fp;
1247 struct timeval *tv;
1248 int i, fd, *fdp;
1249 void *data;
1250 socklen_t clen = control->m_len, datalen;
1251 int error, oldfds;
1252 u_int newlen;
1253
1254 error = 0;
1255 *controlp = NULL;
1256
1257 while (cm != NULL) {
1258 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET
1259 || cm->cmsg_len > clen) {
1260 error = EINVAL;
1261 goto out;
1262 }
1263
1264 data = CMSG_DATA(cm);
1265 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data;
1266
1267 switch (cm->cmsg_type) {
1268 /*
1269 * Fill in credential information.
1270 */
1271 case SCM_CREDS:
1272 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred),
1273 SCM_CREDS, SOL_SOCKET);
1274 if (*controlp == NULL) {
1275 error = ENOBUFS;
1276 goto out;
1277 }
1278
1279 cmcred = (struct cmsgcred *)
1280 CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1281 cmcred->cmcred_pid = p->p_pid;
1282 cmcred->cmcred_uid = td->td_ucred->cr_ruid;
1283 cmcred->cmcred_gid = td->td_ucred->cr_rgid;
1284 cmcred->cmcred_euid = td->td_ucred->cr_uid;
1285 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups,
1286 CMGROUP_MAX);
1287 for (i = 0; i < cmcred->cmcred_ngroups; i++)
1288 cmcred->cmcred_groups[i] =
1289 td->td_ucred->cr_groups[i];
1290 break;
1291
1292 case SCM_RIGHTS:
1293 oldfds = datalen / sizeof (int);
1294 /*
1295 * check that all the FDs passed in refer to legal files
1296 * If not, reject the entire operation.
1297 */
1298 fdp = data;
1299 FILEDESC_LOCK(fdescp);
1300 for (i = 0; i < oldfds; i++) {
1301 fd = *fdp++;
1302 if ((unsigned)fd >= fdescp->fd_nfiles ||
1303 fdescp->fd_ofiles[fd] == NULL) {
1304 FILEDESC_UNLOCK(fdescp);
1305 error = EBADF;
1306 goto out;
1307 }
1308 fp = fdescp->fd_ofiles[fd];
1309 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
1310 FILEDESC_UNLOCK(fdescp);
1311 error = EOPNOTSUPP;
1312 goto out;
1313 }
1314
1315 }
1316 /*
1317 * Now replace the integer FDs with pointers to
1318 * the associated global file table entry..
1319 */
1320 newlen = oldfds * sizeof(struct file *);
1321 *controlp = sbcreatecontrol(NULL, newlen,
1322 SCM_RIGHTS, SOL_SOCKET);
1323 if (*controlp == NULL) {
1324 FILEDESC_UNLOCK(fdescp);
1325 error = E2BIG;
1326 goto out;
1327 }
1328
1329 fdp = data;
1330 rp = (struct file **)
1331 CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1332 for (i = 0; i < oldfds; i++) {
1333 fp = fdescp->fd_ofiles[*fdp++];
1334 *rp++ = fp;
1335 FILE_LOCK(fp);
1336 fp->f_count++;
1337 fp->f_msgcount++;
1338 FILE_UNLOCK(fp);
1339 unp_rights++;
1340 }
1341 FILEDESC_UNLOCK(fdescp);
1342 break;
1343
1344 case SCM_TIMESTAMP:
1345 *controlp = sbcreatecontrol(NULL, sizeof(*tv),
1346 SCM_TIMESTAMP, SOL_SOCKET);
1347 if (*controlp == NULL) {
1348 error = ENOBUFS;
1349 goto out;
1350 }
1351 tv = (struct timeval *)
1352 CMSG_DATA(mtod(*controlp, struct cmsghdr *));
1353 microtime(tv);
1354 break;
1355
1356 default:
1357 error = EINVAL;
1358 goto out;
1359 }
1360
1361 controlp = &(*controlp)->m_next;
1362
1363 if (CMSG_SPACE(datalen) < clen) {
1364 clen -= CMSG_SPACE(datalen);
1365 cm = (struct cmsghdr *)
1366 ((caddr_t)cm + CMSG_SPACE(datalen));
1367 } else {
1368 clen = 0;
1369 cm = NULL;
1370 }
1371 }
1372
1373out:
1374 m_freem(control);
1375
1376 return (error);
1377}
1378
1379static int unp_defer, unp_gcing;
1380
1381static void
1382unp_gc()
1383{
1384 register struct file *fp, *nextfp;
1385 register struct socket *so;
1386 struct file **extra_ref, **fpp;
1387 int nunref, i;
1388
1389 UNP_LOCK_ASSERT();
1390
1391 if (unp_gcing)
1392 return;
1393 unp_gcing = 1;
1394 unp_defer = 0;
1395 /*
1396 * before going through all this, set all FDs to
1397 * be NOT defered and NOT externally accessible
1398 */
1399 /*
1400 * XXXRW: Acquiring a sleep lock while holding UNP
1401 * mutex cannot be a good thing.
1402 */
1403 sx_slock(&filelist_lock);
1404 LIST_FOREACH(fp, &filehead, f_list)
1405 fp->f_gcflag &= ~(FMARK|FDEFER);
1406 do {
1407 LIST_FOREACH(fp, &filehead, f_list) {
1408 FILE_LOCK(fp);
1409 /*
1410 * If the file is not open, skip it
1411 */
1412 if (fp->f_count == 0) {
1413 FILE_UNLOCK(fp);
1414 continue;
1415 }
1416 /*
1417 * If we already marked it as 'defer' in a
1418 * previous pass, then try process it this time
1419 * and un-mark it
1420 */
1421 if (fp->f_gcflag & FDEFER) {
1422 fp->f_gcflag &= ~FDEFER;
1423 unp_defer--;
1424 } else {
1425 /*
1426 * if it's not defered, then check if it's
1427 * already marked.. if so skip it
1428 */
1429 if (fp->f_gcflag & FMARK) {
1430 FILE_UNLOCK(fp);
1431 continue;
1432 }
1433 /*
1434 * If all references are from messages
1435 * in transit, then skip it. it's not
1436 * externally accessible.
1437 */
1438 if (fp->f_count == fp->f_msgcount) {
1439 FILE_UNLOCK(fp);
1440 continue;
1441 }
1442 /*
1443 * If it got this far then it must be
1444 * externally accessible.
1445 */
1446 fp->f_gcflag |= FMARK;
1447 }
1448 /*
1449 * either it was defered, or it is externally
1450 * accessible and not already marked so.
1451 * Now check if it is possibly one of OUR sockets.
1452 */
1453 if (fp->f_type != DTYPE_SOCKET ||
1454 (so = fp->f_data) == NULL) {
1455 FILE_UNLOCK(fp);
1456 continue;
1457 }
1458 FILE_UNLOCK(fp);
1459 if (so->so_proto->pr_domain != &localdomain ||
1460 (so->so_proto->pr_flags&PR_RIGHTS) == 0)
1461 continue;
1462#ifdef notdef
1463 if (so->so_rcv.sb_flags & SB_LOCK) {
1464 /*
1465 * This is problematical; it's not clear
1466 * we need to wait for the sockbuf to be
1467 * unlocked (on a uniprocessor, at least),
1468 * and it's also not clear what to do
1469 * if sbwait returns an error due to receipt
1470 * of a signal. If sbwait does return
1471 * an error, we'll go into an infinite
1472 * loop. Delete all of this for now.
1473 */
1474 (void) sbwait(&so->so_rcv);
1475 goto restart;
1476 }
1477#endif
1478 /*
1479 * So, Ok, it's one of our sockets and it IS externally
1480 * accessible (or was defered). Now we look
1481 * to see if we hold any file descriptors in its
1482 * message buffers. Follow those links and mark them
1483 * as accessible too.
1484 */
1485 unp_scan(so->so_rcv.sb_mb, unp_mark);
1486 }
1487 } while (unp_defer);
1488 sx_sunlock(&filelist_lock);
1489 /*
1490 * We grab an extra reference to each of the file table entries
1491 * that are not otherwise accessible and then free the rights
1492 * that are stored in messages on them.
1493 *
1494 * The bug in the orginal code is a little tricky, so I'll describe
1495 * what's wrong with it here.
1496 *
1497 * It is incorrect to simply unp_discard each entry for f_msgcount
1498 * times -- consider the case of sockets A and B that contain
1499 * references to each other. On a last close of some other socket,
1500 * we trigger a gc since the number of outstanding rights (unp_rights)
1501 * is non-zero. If during the sweep phase the gc code un_discards,
1502 * we end up doing a (full) closef on the descriptor. A closef on A
1503 * results in the following chain. Closef calls soo_close, which
1504 * calls soclose. Soclose calls first (through the switch
1505 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
1506 * returns because the previous instance had set unp_gcing, and
1507 * we return all the way back to soclose, which marks the socket
1508 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
1509 * to free up the rights that are queued in messages on the socket A,
1510 * i.e., the reference on B. The sorflush calls via the dom_dispose
1511 * switch unp_dispose, which unp_scans with unp_discard. This second
1512 * instance of unp_discard just calls closef on B.
1513 *
1514 * Well, a similar chain occurs on B, resulting in a sorflush on B,
1515 * which results in another closef on A. Unfortunately, A is already
1516 * being closed, and the descriptor has already been marked with
1517 * SS_NOFDREF, and soclose panics at this point.
1518 *
1519 * Here, we first take an extra reference to each inaccessible
1520 * descriptor. Then, we call sorflush ourself, since we know
1521 * it is a Unix domain socket anyhow. After we destroy all the
1522 * rights carried in messages, we do a last closef to get rid
1523 * of our extra reference. This is the last close, and the
1524 * unp_detach etc will shut down the socket.
1525 *
1526 * 91/09/19, bsy@cs.cmu.edu
1527 */
1528 extra_ref = malloc(nfiles * sizeof(struct file *), M_TEMP, M_WAITOK);
1529 sx_slock(&filelist_lock);
1530 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
1531 fp != NULL; fp = nextfp) {
1532 nextfp = LIST_NEXT(fp, f_list);
1533 FILE_LOCK(fp);
1534 /*
1535 * If it's not open, skip it
1536 */
1537 if (fp->f_count == 0) {
1538 FILE_UNLOCK(fp);
1539 continue;
1540 }
1541 /*
1542 * If all refs are from msgs, and it's not marked accessible
1543 * then it must be referenced from some unreachable cycle
1544 * of (shut-down) FDs, so include it in our
1545 * list of FDs to remove
1546 */
1547 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
1548 *fpp++ = fp;
1549 nunref++;
1550 fp->f_count++;
1551 }
1552 FILE_UNLOCK(fp);
1553 }
1554 sx_sunlock(&filelist_lock);
1555 /*
1556 * for each FD on our hit list, do the following two things
1557 */
1558 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1559 struct file *tfp = *fpp;
1560 FILE_LOCK(tfp);
1561 if (tfp->f_type == DTYPE_SOCKET &&
1562 tfp->f_data != NULL) {
1563 FILE_UNLOCK(tfp);
1564 sorflush(tfp->f_data);
1565 } else {
1566 FILE_UNLOCK(tfp);
1567 }
1568 }
1569 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
1570 closef(*fpp, (struct thread *) NULL);
1571 free(extra_ref, M_TEMP);
1572 unp_gcing = 0;
1573}
1574
1575void
1576unp_dispose(m)
1577 struct mbuf *m;
1578{
1579
1580 if (m)
1581 unp_scan(m, unp_discard);
1582}
1583
1584static int
1585unp_listen(unp, td)
1586 struct unpcb *unp;
1587 struct thread *td;
1588{
1589 UNP_LOCK_ASSERT();
1590
1591 /*
1592 * XXXRW: Why populate the local peer cred with our own credential?
1593 */
1594 cru2x(td->td_ucred, &unp->unp_peercred);
1595 unp->unp_flags |= UNP_HAVEPCCACHED;
1596 return (0);
1597}
1598
1599static void
1600unp_scan(m0, op)
1601 register struct mbuf *m0;
1602 void (*op)(struct file *);
1603{
1604 struct mbuf *m;
1605 struct file **rp;
1606 struct cmsghdr *cm;
1607 void *data;
1608 int i;
1609 socklen_t clen, datalen;
1610 int qfds;
1611
1612 while (m0 != NULL) {
1613 for (m = m0; m; m = m->m_next) {
1614 if (m->m_type != MT_CONTROL)
1615 continue;
1616
1617 cm = mtod(m, struct cmsghdr *);
1618 clen = m->m_len;
1619
1620 while (cm != NULL) {
1621 if (sizeof(*cm) > clen || cm->cmsg_len > clen)
1622 break;
1623
1624 data = CMSG_DATA(cm);
1625 datalen = (caddr_t)cm + cm->cmsg_len
1626 - (caddr_t)data;
1627
1628 if (cm->cmsg_level == SOL_SOCKET &&
1629 cm->cmsg_type == SCM_RIGHTS) {
1630 qfds = datalen / sizeof (struct file *);
1631 rp = data;
1632 for (i = 0; i < qfds; i++)
1633 (*op)(*rp++);
1634 }
1635
1636 if (CMSG_SPACE(datalen) < clen) {
1637 clen -= CMSG_SPACE(datalen);
1638 cm = (struct cmsghdr *)
1639 ((caddr_t)cm + CMSG_SPACE(datalen));
1640 } else {
1641 clen = 0;
1642 cm = NULL;
1643 }
1644 }
1645 }
1646 m0 = m0->m_act;
1647 }
1648}
1649
1650static void
1651unp_mark(fp)
1652 struct file *fp;
1653{
1654 if (fp->f_gcflag & FMARK)
1655 return;
1656 unp_defer++;
1657 fp->f_gcflag |= (FMARK|FDEFER);
1658}
1659
1660static void
1661unp_discard(fp)
1662 struct file *fp;
1663{
1664 FILE_LOCK(fp);
1665 fp->f_msgcount--;
1666 unp_rights--;
1667 FILE_UNLOCK(fp);
1668 (void) closef(fp, (struct thread *)NULL);
1669}