34 35#include "opt_mac.h" 36 37#include <sys/param.h> 38#include <sys/domain.h> 39#include <sys/fcntl.h> 40#include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 41#include <sys/file.h> 42#include <sys/filedesc.h> 43#include <sys/jail.h> 44#include <sys/kernel.h> 45#include <sys/lock.h> 46#include <sys/mac.h> 47#include <sys/mbuf.h> 48#include <sys/mutex.h> 49#include <sys/namei.h> 50#include <sys/proc.h> 51#include <sys/protosw.h> 52#include <sys/resourcevar.h> 53#include <sys/socket.h> 54#include <sys/socketvar.h> 55#include <sys/signalvar.h> 56#include <sys/stat.h> 57#include <sys/sx.h> 58#include <sys/sysctl.h> 59#include <sys/systm.h> 60#include <sys/un.h> 61#include <sys/unpcb.h> 62#include <sys/vnode.h> 63 64#include <vm/uma.h> 65 66static uma_zone_t unp_zone; 67static unp_gen_t unp_gencnt; 68static u_int unp_count; 69 70static struct unp_head unp_shead, unp_dhead; 71 72/* 73 * Unix communications domain. 74 * 75 * TODO: 76 * SEQPACKET, RDM 77 * rethink name space problems 78 * need a proper out-of-band 79 * lock pushdown 80 */ 81static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 82static ino_t unp_ino; /* prototype for fake inode numbers */ 83 84static struct mtx unp_mtx; 85#define UNP_LOCK_INIT() \ 86 mtx_init(&unp_mtx, "unp", NULL, MTX_DEF) 87#define UNP_LOCK() mtx_lock(&unp_mtx) 88#define UNP_UNLOCK() mtx_unlock(&unp_mtx) 89#define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) 90 91static int unp_attach(struct socket *); 92static void unp_detach(struct unpcb *); 93static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); 94static int unp_connect(struct socket *,struct sockaddr *, struct thread *); 95static int unp_connect2(struct socket *so, struct socket *so2); 96static void unp_disconnect(struct unpcb *); 97static void unp_shutdown(struct unpcb *); 98static void unp_drop(struct unpcb *, int); 99static void unp_gc(void); 100static void unp_scan(struct mbuf *, void (*)(struct file *)); 101static void unp_mark(struct file *); 102static void unp_discard(struct file *); 103static void unp_freerights(struct file **, int); 104static int unp_internalize(struct mbuf **, struct thread *); 105static int unp_listen(struct unpcb *, struct thread *); 106 107static int 108uipc_abort(struct socket *so) 109{ 110 struct unpcb *unp = sotounpcb(so); 111 112 if (unp == NULL) 113 return (EINVAL); 114 UNP_LOCK(); 115 unp_drop(unp, ECONNABORTED); 116 unp_detach(unp); /* NB: unlocks */ 117 SOCK_LOCK(so); 118 sotryfree(so); 119 return (0); 120} 121 122static int 123uipc_accept(struct socket *so, struct sockaddr **nam) 124{ 125 struct unpcb *unp = sotounpcb(so); 126 const struct sockaddr *sa; 127 128 if (unp == NULL) 129 return (EINVAL); 130 131 /* 132 * Pass back name of connected socket, 133 * if it was bound and we are still connected 134 * (our peer may have closed already!). 135 */ 136 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 137 UNP_LOCK(); 138 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) 139 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 140 else 141 sa = &sun_noname; 142 bcopy(sa, *nam, sa->sa_len); 143 UNP_UNLOCK(); 144 return (0); 145} 146 147static int 148uipc_attach(struct socket *so, int proto, struct thread *td) 149{ 150 struct unpcb *unp = sotounpcb(so); 151 152 if (unp != NULL) 153 return (EISCONN); 154 return (unp_attach(so)); 155} 156 157static int 158uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 159{ 160 struct unpcb *unp = sotounpcb(so); 161 162 if (unp == NULL) 163 return (EINVAL); 164 165 return (unp_bind(unp, nam, td)); 166} 167 168static int 169uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 170{ 171 struct unpcb *unp = sotounpcb(so); 172 int error; 173 174 if (unp == NULL) 175 return (EINVAL); 176 UNP_LOCK(); 177 error = unp_connect(so, nam, curthread); 178 UNP_UNLOCK(); 179 return (error); 180} 181 182int 183uipc_connect2(struct socket *so1, struct socket *so2) 184{ 185 struct unpcb *unp = sotounpcb(so1); 186 int error; 187 188 if (unp == NULL) 189 return (EINVAL); 190 191 UNP_LOCK(); 192 error = unp_connect2(so1, so2); 193 UNP_UNLOCK(); 194 return (error); 195} 196 197/* control is EOPNOTSUPP */ 198 199static int 200uipc_detach(struct socket *so) 201{ 202 struct unpcb *unp = sotounpcb(so); 203 204 if (unp == NULL) 205 return (EINVAL); 206 207 UNP_LOCK(); 208 unp_detach(unp); /* NB: unlocks unp */ 209 return (0); 210} 211 212static int 213uipc_disconnect(struct socket *so) 214{ 215 struct unpcb *unp = sotounpcb(so); 216 217 if (unp == NULL) 218 return (EINVAL); 219 UNP_LOCK(); 220 unp_disconnect(unp); 221 UNP_UNLOCK(); 222 return (0); 223} 224 225static int 226uipc_listen(struct socket *so, struct thread *td) 227{ 228 struct unpcb *unp = sotounpcb(so); 229 int error; 230 231 if (unp == NULL || unp->unp_vnode == NULL) 232 return (EINVAL); 233 UNP_LOCK(); 234 error = unp_listen(unp, td); 235 UNP_UNLOCK(); 236 return (error); 237} 238 239static int 240uipc_peeraddr(struct socket *so, struct sockaddr **nam) 241{ 242 struct unpcb *unp = sotounpcb(so); 243 const struct sockaddr *sa; 244 245 if (unp == NULL) 246 return (EINVAL); 247 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 248 UNP_LOCK(); 249 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL) 250 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 251 else { 252 /* 253 * XXX: It seems that this test always fails even when 254 * connection is established. So, this else clause is 255 * added as workaround to return PF_LOCAL sockaddr. 256 */ 257 sa = &sun_noname; 258 } 259 bcopy(sa, *nam, sa->sa_len); 260 UNP_UNLOCK(); 261 return (0); 262} 263 264static int 265uipc_rcvd(struct socket *so, int flags) 266{ 267 struct unpcb *unp = sotounpcb(so); 268 struct socket *so2; 269 u_long newhiwat; 270 271 if (unp == NULL) 272 return (EINVAL); 273 UNP_LOCK(); 274 switch (so->so_type) { 275 case SOCK_DGRAM: 276 panic("uipc_rcvd DGRAM?"); 277 /*NOTREACHED*/ 278 279 case SOCK_STREAM: 280 if (unp->unp_conn == NULL) 281 break; 282 so2 = unp->unp_conn->unp_socket; 283 /* 284 * Adjust backpressure on sender 285 * and wakeup any waiting to write. 286 */ 287 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; 288 unp->unp_mbcnt = so->so_rcv.sb_mbcnt; 289 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - 290 so->so_rcv.sb_cc; 291 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 292 newhiwat, RLIM_INFINITY); 293 unp->unp_cc = so->so_rcv.sb_cc; 294 sowwakeup(so2); 295 break; 296 297 default: 298 panic("uipc_rcvd unknown socktype"); 299 } 300 UNP_UNLOCK(); 301 return (0); 302} 303 304/* pru_rcvoob is EOPNOTSUPP */ 305 306static int 307uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 308 struct mbuf *control, struct thread *td) 309{ 310 int error = 0; 311 struct unpcb *unp = sotounpcb(so); 312 struct socket *so2; 313 u_long newhiwat; 314 315 if (unp == NULL) { 316 error = EINVAL; 317 goto release; 318 } 319 if (flags & PRUS_OOB) { 320 error = EOPNOTSUPP; 321 goto release; 322 } 323 324 if (control != NULL && (error = unp_internalize(&control, td))) 325 goto release; 326 327 UNP_LOCK(); 328 switch (so->so_type) { 329 case SOCK_DGRAM: 330 { 331 const struct sockaddr *from; 332 333 if (nam != NULL) { 334 if (unp->unp_conn != NULL) { 335 error = EISCONN; 336 break; 337 } 338 error = unp_connect(so, nam, td); 339 if (error) 340 break; 341 } else { 342 if (unp->unp_conn == NULL) { 343 error = ENOTCONN; 344 break; 345 } 346 } 347 so2 = unp->unp_conn->unp_socket; 348 if (unp->unp_addr != NULL) 349 from = (struct sockaddr *)unp->unp_addr; 350 else 351 from = &sun_noname; 352 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 353 sorwakeup(so2); 354 m = NULL; 355 control = NULL; 356 } else { 357 error = ENOBUFS; 358 } 359 if (nam != NULL) 360 unp_disconnect(unp); 361 break; 362 } 363 364 case SOCK_STREAM: 365 /* Connect if not connected yet. */ 366 /* 367 * Note: A better implementation would complain 368 * if not equal to the peer's address. 369 */ 370 if ((so->so_state & SS_ISCONNECTED) == 0) { 371 if (nam != NULL) { 372 error = unp_connect(so, nam, td); 373 if (error) 374 break; /* XXX */ 375 } else { 376 error = ENOTCONN; 377 break; 378 } 379 } 380
| 34 35#include "opt_mac.h" 36 37#include <sys/param.h> 38#include <sys/domain.h> 39#include <sys/fcntl.h> 40#include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 41#include <sys/file.h> 42#include <sys/filedesc.h> 43#include <sys/jail.h> 44#include <sys/kernel.h> 45#include <sys/lock.h> 46#include <sys/mac.h> 47#include <sys/mbuf.h> 48#include <sys/mutex.h> 49#include <sys/namei.h> 50#include <sys/proc.h> 51#include <sys/protosw.h> 52#include <sys/resourcevar.h> 53#include <sys/socket.h> 54#include <sys/socketvar.h> 55#include <sys/signalvar.h> 56#include <sys/stat.h> 57#include <sys/sx.h> 58#include <sys/sysctl.h> 59#include <sys/systm.h> 60#include <sys/un.h> 61#include <sys/unpcb.h> 62#include <sys/vnode.h> 63 64#include <vm/uma.h> 65 66static uma_zone_t unp_zone; 67static unp_gen_t unp_gencnt; 68static u_int unp_count; 69 70static struct unp_head unp_shead, unp_dhead; 71 72/* 73 * Unix communications domain. 74 * 75 * TODO: 76 * SEQPACKET, RDM 77 * rethink name space problems 78 * need a proper out-of-band 79 * lock pushdown 80 */ 81static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 82static ino_t unp_ino; /* prototype for fake inode numbers */ 83 84static struct mtx unp_mtx; 85#define UNP_LOCK_INIT() \ 86 mtx_init(&unp_mtx, "unp", NULL, MTX_DEF) 87#define UNP_LOCK() mtx_lock(&unp_mtx) 88#define UNP_UNLOCK() mtx_unlock(&unp_mtx) 89#define UNP_LOCK_ASSERT() mtx_assert(&unp_mtx, MA_OWNED) 90 91static int unp_attach(struct socket *); 92static void unp_detach(struct unpcb *); 93static int unp_bind(struct unpcb *,struct sockaddr *, struct thread *); 94static int unp_connect(struct socket *,struct sockaddr *, struct thread *); 95static int unp_connect2(struct socket *so, struct socket *so2); 96static void unp_disconnect(struct unpcb *); 97static void unp_shutdown(struct unpcb *); 98static void unp_drop(struct unpcb *, int); 99static void unp_gc(void); 100static void unp_scan(struct mbuf *, void (*)(struct file *)); 101static void unp_mark(struct file *); 102static void unp_discard(struct file *); 103static void unp_freerights(struct file **, int); 104static int unp_internalize(struct mbuf **, struct thread *); 105static int unp_listen(struct unpcb *, struct thread *); 106 107static int 108uipc_abort(struct socket *so) 109{ 110 struct unpcb *unp = sotounpcb(so); 111 112 if (unp == NULL) 113 return (EINVAL); 114 UNP_LOCK(); 115 unp_drop(unp, ECONNABORTED); 116 unp_detach(unp); /* NB: unlocks */ 117 SOCK_LOCK(so); 118 sotryfree(so); 119 return (0); 120} 121 122static int 123uipc_accept(struct socket *so, struct sockaddr **nam) 124{ 125 struct unpcb *unp = sotounpcb(so); 126 const struct sockaddr *sa; 127 128 if (unp == NULL) 129 return (EINVAL); 130 131 /* 132 * Pass back name of connected socket, 133 * if it was bound and we are still connected 134 * (our peer may have closed already!). 135 */ 136 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 137 UNP_LOCK(); 138 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr != NULL) 139 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 140 else 141 sa = &sun_noname; 142 bcopy(sa, *nam, sa->sa_len); 143 UNP_UNLOCK(); 144 return (0); 145} 146 147static int 148uipc_attach(struct socket *so, int proto, struct thread *td) 149{ 150 struct unpcb *unp = sotounpcb(so); 151 152 if (unp != NULL) 153 return (EISCONN); 154 return (unp_attach(so)); 155} 156 157static int 158uipc_bind(struct socket *so, struct sockaddr *nam, struct thread *td) 159{ 160 struct unpcb *unp = sotounpcb(so); 161 162 if (unp == NULL) 163 return (EINVAL); 164 165 return (unp_bind(unp, nam, td)); 166} 167 168static int 169uipc_connect(struct socket *so, struct sockaddr *nam, struct thread *td) 170{ 171 struct unpcb *unp = sotounpcb(so); 172 int error; 173 174 if (unp == NULL) 175 return (EINVAL); 176 UNP_LOCK(); 177 error = unp_connect(so, nam, curthread); 178 UNP_UNLOCK(); 179 return (error); 180} 181 182int 183uipc_connect2(struct socket *so1, struct socket *so2) 184{ 185 struct unpcb *unp = sotounpcb(so1); 186 int error; 187 188 if (unp == NULL) 189 return (EINVAL); 190 191 UNP_LOCK(); 192 error = unp_connect2(so1, so2); 193 UNP_UNLOCK(); 194 return (error); 195} 196 197/* control is EOPNOTSUPP */ 198 199static int 200uipc_detach(struct socket *so) 201{ 202 struct unpcb *unp = sotounpcb(so); 203 204 if (unp == NULL) 205 return (EINVAL); 206 207 UNP_LOCK(); 208 unp_detach(unp); /* NB: unlocks unp */ 209 return (0); 210} 211 212static int 213uipc_disconnect(struct socket *so) 214{ 215 struct unpcb *unp = sotounpcb(so); 216 217 if (unp == NULL) 218 return (EINVAL); 219 UNP_LOCK(); 220 unp_disconnect(unp); 221 UNP_UNLOCK(); 222 return (0); 223} 224 225static int 226uipc_listen(struct socket *so, struct thread *td) 227{ 228 struct unpcb *unp = sotounpcb(so); 229 int error; 230 231 if (unp == NULL || unp->unp_vnode == NULL) 232 return (EINVAL); 233 UNP_LOCK(); 234 error = unp_listen(unp, td); 235 UNP_UNLOCK(); 236 return (error); 237} 238 239static int 240uipc_peeraddr(struct socket *so, struct sockaddr **nam) 241{ 242 struct unpcb *unp = sotounpcb(so); 243 const struct sockaddr *sa; 244 245 if (unp == NULL) 246 return (EINVAL); 247 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 248 UNP_LOCK(); 249 if (unp->unp_conn != NULL && unp->unp_conn->unp_addr!= NULL) 250 sa = (struct sockaddr *) unp->unp_conn->unp_addr; 251 else { 252 /* 253 * XXX: It seems that this test always fails even when 254 * connection is established. So, this else clause is 255 * added as workaround to return PF_LOCAL sockaddr. 256 */ 257 sa = &sun_noname; 258 } 259 bcopy(sa, *nam, sa->sa_len); 260 UNP_UNLOCK(); 261 return (0); 262} 263 264static int 265uipc_rcvd(struct socket *so, int flags) 266{ 267 struct unpcb *unp = sotounpcb(so); 268 struct socket *so2; 269 u_long newhiwat; 270 271 if (unp == NULL) 272 return (EINVAL); 273 UNP_LOCK(); 274 switch (so->so_type) { 275 case SOCK_DGRAM: 276 panic("uipc_rcvd DGRAM?"); 277 /*NOTREACHED*/ 278 279 case SOCK_STREAM: 280 if (unp->unp_conn == NULL) 281 break; 282 so2 = unp->unp_conn->unp_socket; 283 /* 284 * Adjust backpressure on sender 285 * and wakeup any waiting to write. 286 */ 287 so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; 288 unp->unp_mbcnt = so->so_rcv.sb_mbcnt; 289 newhiwat = so2->so_snd.sb_hiwat + unp->unp_cc - 290 so->so_rcv.sb_cc; 291 (void)chgsbsize(so2->so_cred->cr_uidinfo, &so2->so_snd.sb_hiwat, 292 newhiwat, RLIM_INFINITY); 293 unp->unp_cc = so->so_rcv.sb_cc; 294 sowwakeup(so2); 295 break; 296 297 default: 298 panic("uipc_rcvd unknown socktype"); 299 } 300 UNP_UNLOCK(); 301 return (0); 302} 303 304/* pru_rcvoob is EOPNOTSUPP */ 305 306static int 307uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 308 struct mbuf *control, struct thread *td) 309{ 310 int error = 0; 311 struct unpcb *unp = sotounpcb(so); 312 struct socket *so2; 313 u_long newhiwat; 314 315 if (unp == NULL) { 316 error = EINVAL; 317 goto release; 318 } 319 if (flags & PRUS_OOB) { 320 error = EOPNOTSUPP; 321 goto release; 322 } 323 324 if (control != NULL && (error = unp_internalize(&control, td))) 325 goto release; 326 327 UNP_LOCK(); 328 switch (so->so_type) { 329 case SOCK_DGRAM: 330 { 331 const struct sockaddr *from; 332 333 if (nam != NULL) { 334 if (unp->unp_conn != NULL) { 335 error = EISCONN; 336 break; 337 } 338 error = unp_connect(so, nam, td); 339 if (error) 340 break; 341 } else { 342 if (unp->unp_conn == NULL) { 343 error = ENOTCONN; 344 break; 345 } 346 } 347 so2 = unp->unp_conn->unp_socket; 348 if (unp->unp_addr != NULL) 349 from = (struct sockaddr *)unp->unp_addr; 350 else 351 from = &sun_noname; 352 if (sbappendaddr(&so2->so_rcv, from, m, control)) { 353 sorwakeup(so2); 354 m = NULL; 355 control = NULL; 356 } else { 357 error = ENOBUFS; 358 } 359 if (nam != NULL) 360 unp_disconnect(unp); 361 break; 362 } 363 364 case SOCK_STREAM: 365 /* Connect if not connected yet. */ 366 /* 367 * Note: A better implementation would complain 368 * if not equal to the peer's address. 369 */ 370 if ((so->so_state & SS_ISCONNECTED) == 0) { 371 if (nam != NULL) { 372 error = unp_connect(so, nam, td); 373 if (error) 374 break; /* XXX */ 375 } else { 376 error = ENOTCONN; 377 break; 378 } 379 } 380
|
382 error = EPIPE; 383 break; 384 } 385 if (unp->unp_conn == NULL) 386 panic("uipc_send connected but no connection?"); 387 so2 = unp->unp_conn->unp_socket; 388 /* 389 * Send to paired receive port, and then reduce 390 * send buffer hiwater marks to maintain backpressure. 391 * Wake up readers. 392 */ 393 if (control != NULL) { 394 if (sbappendcontrol(&so2->so_rcv, m, control)) 395 control = NULL; 396 } else { 397 sbappend(&so2->so_rcv, m); 398 } 399 so->so_snd.sb_mbmax -= 400 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; 401 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; 402 newhiwat = so->so_snd.sb_hiwat - 403 (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc); 404 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 405 newhiwat, RLIM_INFINITY); 406 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; 407 sorwakeup(so2); 408 m = NULL; 409 break; 410 411 default: 412 panic("uipc_send unknown socktype"); 413 } 414 415 /* 416 * SEND_EOF is equivalent to a SEND followed by 417 * a SHUTDOWN. 418 */ 419 if (flags & PRUS_EOF) { 420 socantsendmore(so); 421 unp_shutdown(unp); 422 } 423 UNP_UNLOCK(); 424 425 if (control != NULL && error != 0) 426 unp_dispose(control); 427 428release: 429 if (control != NULL) 430 m_freem(control); 431 if (m != NULL) 432 m_freem(m); 433 return (error); 434} 435 436static int 437uipc_sense(struct socket *so, struct stat *sb) 438{ 439 struct unpcb *unp = sotounpcb(so); 440 struct socket *so2; 441 442 if (unp == NULL) 443 return (EINVAL); 444 UNP_LOCK(); 445 sb->st_blksize = so->so_snd.sb_hiwat; 446 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { 447 so2 = unp->unp_conn->unp_socket; 448 sb->st_blksize += so2->so_rcv.sb_cc; 449 } 450 sb->st_dev = NOUDEV; 451 if (unp->unp_ino == 0) 452 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 453 sb->st_ino = unp->unp_ino; 454 UNP_UNLOCK(); 455 return (0); 456} 457 458static int 459uipc_shutdown(struct socket *so) 460{ 461 struct unpcb *unp = sotounpcb(so); 462 463 if (unp == NULL) 464 return (EINVAL); 465 UNP_LOCK(); 466 socantsendmore(so); 467 unp_shutdown(unp); 468 UNP_UNLOCK(); 469 return (0); 470} 471 472static int 473uipc_sockaddr(struct socket *so, struct sockaddr **nam) 474{ 475 struct unpcb *unp = sotounpcb(so); 476 const struct sockaddr *sa; 477 478 if (unp == NULL) 479 return (EINVAL); 480 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 481 UNP_LOCK(); 482 if (unp->unp_addr != NULL) 483 sa = (struct sockaddr *) unp->unp_addr; 484 else 485 sa = &sun_noname; 486 bcopy(sa, *nam, sa->sa_len); 487 UNP_UNLOCK(); 488 return (0); 489} 490 491struct pr_usrreqs uipc_usrreqs = { 492 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 493 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 494 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 495 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 496 sosend, soreceive, sopoll, pru_sosetlabel_null 497}; 498 499int 500uipc_ctloutput(so, sopt) 501 struct socket *so; 502 struct sockopt *sopt; 503{ 504 struct unpcb *unp = sotounpcb(so); 505 struct xucred xu; 506 int error; 507 508 switch (sopt->sopt_dir) { 509 case SOPT_GET: 510 switch (sopt->sopt_name) { 511 case LOCAL_PEERCRED: 512 error = 0; 513 UNP_LOCK(); 514 if (unp->unp_flags & UNP_HAVEPC) 515 xu = unp->unp_peercred; 516 else { 517 if (so->so_type == SOCK_STREAM) 518 error = ENOTCONN; 519 else 520 error = EINVAL; 521 } 522 UNP_UNLOCK(); 523 if (error == 0) 524 error = sooptcopyout(sopt, &xu, sizeof(xu)); 525 break; 526 default: 527 error = EOPNOTSUPP; 528 break; 529 } 530 break; 531 case SOPT_SET: 532 default: 533 error = EOPNOTSUPP; 534 break; 535 } 536 return (error); 537} 538 539/* 540 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 541 * for stream sockets, although the total for sender and receiver is 542 * actually only PIPSIZ. 543 * Datagram sockets really use the sendspace as the maximum datagram size, 544 * and don't really want to reserve the sendspace. Their recvspace should 545 * be large enough for at least one max-size datagram plus address. 546 */ 547#ifndef PIPSIZ 548#define PIPSIZ 8192 549#endif 550static u_long unpst_sendspace = PIPSIZ; 551static u_long unpst_recvspace = PIPSIZ; 552static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 553static u_long unpdg_recvspace = 4*1024; 554 555static int unp_rights; /* file descriptors in flight */ 556 557SYSCTL_DECL(_net_local_stream); 558SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 559 &unpst_sendspace, 0, ""); 560SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 561 &unpst_recvspace, 0, ""); 562SYSCTL_DECL(_net_local_dgram); 563SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 564 &unpdg_sendspace, 0, ""); 565SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 566 &unpdg_recvspace, 0, ""); 567SYSCTL_DECL(_net_local); 568SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 569 570static int 571unp_attach(so) 572 struct socket *so; 573{ 574 register struct unpcb *unp; 575 int error; 576 577 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 578 switch (so->so_type) { 579 580 case SOCK_STREAM: 581 error = soreserve(so, unpst_sendspace, unpst_recvspace); 582 break; 583 584 case SOCK_DGRAM: 585 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 586 break; 587 588 default: 589 panic("unp_attach"); 590 } 591 if (error) 592 return (error); 593 } 594 unp = uma_zalloc(unp_zone, M_WAITOK); 595 if (unp == NULL) 596 return (ENOBUFS); 597 bzero(unp, sizeof *unp); 598 LIST_INIT(&unp->unp_refs); 599 unp->unp_socket = so; 600 601 UNP_LOCK(); 602 unp->unp_gencnt = ++unp_gencnt; 603 unp_count++; 604 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 605 : &unp_shead, unp, unp_link); 606 UNP_UNLOCK(); 607 608 so->so_pcb = unp; 609 return (0); 610} 611 612static void 613unp_detach(unp) 614 register struct unpcb *unp; 615{ 616 struct vnode *vp; 617 618 UNP_LOCK_ASSERT(); 619 620 LIST_REMOVE(unp, unp_link); 621 unp->unp_gencnt = ++unp_gencnt; 622 --unp_count; 623 if ((vp = unp->unp_vnode) != NULL) { 624 /* 625 * XXXRW: should v_socket be frobbed only while holding 626 * Giant? 627 */ 628 unp->unp_vnode->v_socket = NULL; 629 unp->unp_vnode = NULL; 630 } 631 if (unp->unp_conn != NULL) 632 unp_disconnect(unp); 633 while (!LIST_EMPTY(&unp->unp_refs)) { 634 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 635 unp_drop(ref, ECONNRESET); 636 } 637 soisdisconnected(unp->unp_socket); 638 unp->unp_socket->so_pcb = NULL; 639 if (unp_rights) { 640 /* 641 * Normally the receive buffer is flushed later, 642 * in sofree, but if our receive buffer holds references 643 * to descriptors that are now garbage, we will dispose 644 * of those descriptor references after the garbage collector 645 * gets them (resulting in a "panic: closef: count < 0"). 646 */ 647 sorflush(unp->unp_socket); 648 unp_gc(); 649 } 650 if (unp->unp_addr != NULL) 651 FREE(unp->unp_addr, M_SONAME); 652 UNP_UNLOCK(); 653 uma_zfree(unp_zone, unp); 654 if (vp) { 655 mtx_lock(&Giant); 656 vrele(vp); 657 mtx_unlock(&Giant); 658 } 659} 660 661static int 662unp_bind(unp, nam, td) 663 struct unpcb *unp; 664 struct sockaddr *nam; 665 struct thread *td; 666{ 667 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 668 struct vnode *vp; 669 struct mount *mp; 670 struct vattr vattr; 671 int error, namelen; 672 struct nameidata nd; 673 char *buf; 674 675 /* 676 * XXXRW: This test-and-set of unp_vnode is non-atomic; the 677 * unlocked read here is fine, but the value of unp_vnode needs 678 * to be tested again after we do all the lookups to see if the 679 * pcb is still unbound? 680 */ 681 if (unp->unp_vnode != NULL) 682 return (EINVAL); 683 684 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 685 if (namelen <= 0) 686 return (EINVAL); 687 688 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 689 strlcpy(buf, soun->sun_path, namelen + 1); 690 691 mtx_lock(&Giant); 692restart: 693 mtx_assert(&Giant, MA_OWNED); 694 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE, 695 buf, td); 696/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 697 error = namei(&nd); 698 if (error) 699 goto done; 700 vp = nd.ni_vp; 701 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 702 NDFREE(&nd, NDF_ONLY_PNBUF); 703 if (nd.ni_dvp == vp) 704 vrele(nd.ni_dvp); 705 else 706 vput(nd.ni_dvp); 707 if (vp != NULL) { 708 vrele(vp); 709 error = EADDRINUSE; 710 goto done; 711 } 712 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 713 if (error) 714 goto done; 715 goto restart; 716 } 717 VATTR_NULL(&vattr); 718 vattr.va_type = VSOCK; 719 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 720#ifdef MAC 721 error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 722 &vattr); 723#endif 724 if (error == 0) { 725 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 726 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 727 } 728 NDFREE(&nd, NDF_ONLY_PNBUF); 729 vput(nd.ni_dvp); 730 if (error) 731 goto done; 732 vp = nd.ni_vp; 733 ASSERT_VOP_LOCKED(vp, "unp_bind"); 734 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 735 UNP_LOCK(); 736 vp->v_socket = unp->unp_socket; 737 unp->unp_vnode = vp; 738 unp->unp_addr = soun; 739 UNP_UNLOCK(); 740 VOP_UNLOCK(vp, 0, td); 741 vn_finished_write(mp); 742done: 743 mtx_unlock(&Giant); 744 free(buf, M_TEMP); 745 return (error); 746} 747 748static int 749unp_connect(so, nam, td) 750 struct socket *so; 751 struct sockaddr *nam; 752 struct thread *td; 753{ 754 register struct sockaddr_un *soun = (struct sockaddr_un *)nam; 755 register struct vnode *vp; 756 register struct socket *so2, *so3; 757 struct unpcb *unp = sotounpcb(so); 758 struct unpcb *unp2, *unp3; 759 int error, len; 760 struct nameidata nd; 761 char buf[SOCK_MAXADDRLEN]; 762 struct sockaddr *sa; 763 764 UNP_LOCK_ASSERT(); 765 766 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 767 if (len <= 0) 768 return (EINVAL); 769 strlcpy(buf, soun->sun_path, len + 1); 770 UNP_UNLOCK(); 771 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 772 mtx_lock(&Giant); 773 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td); 774 error = namei(&nd); 775 if (error) 776 vp = NULL; 777 else 778 vp = nd.ni_vp; 779 ASSERT_VOP_LOCKED(vp, "unp_connect"); 780 NDFREE(&nd, NDF_ONLY_PNBUF); 781 if (error) 782 goto bad; 783 784 if (vp->v_type != VSOCK) { 785 error = ENOTSOCK; 786 goto bad; 787 } 788 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 789 if (error) 790 goto bad; 791 so2 = vp->v_socket; 792 if (so2 == NULL) { 793 error = ECONNREFUSED; 794 goto bad; 795 } 796 if (so->so_type != so2->so_type) { 797 error = EPROTOTYPE; 798 goto bad; 799 } 800 mtx_unlock(&Giant); 801 UNP_LOCK(); 802 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 803 if (so2->so_options & SO_ACCEPTCONN) { 804 /* 805 * NB: drop locks here so unp_attach is entered 806 * w/o locks; this avoids a recursive lock 807 * of the head and holding sleep locks across 808 * a (potentially) blocking malloc. 809 */ 810 UNP_UNLOCK(); 811 so3 = sonewconn(so2, 0); 812 UNP_LOCK(); 813 } else 814 so3 = NULL; 815 if (so3 == NULL) { 816 error = ECONNREFUSED; 817 goto bad2; 818 } 819 unp = sotounpcb(so); 820 unp2 = sotounpcb(so2); 821 unp3 = sotounpcb(so3); 822 if (unp2->unp_addr != NULL) { 823 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 824 unp3->unp_addr = (struct sockaddr_un *) sa; 825 sa = NULL; 826 } 827 /* 828 * unp_peercred management: 829 * 830 * The connecter's (client's) credentials are copied 831 * from its process structure at the time of connect() 832 * (which is now). 833 */ 834 cru2x(td->td_ucred, &unp3->unp_peercred); 835 unp3->unp_flags |= UNP_HAVEPC; 836 /* 837 * The receiver's (server's) credentials are copied 838 * from the unp_peercred member of socket on which the 839 * former called listen(); unp_listen() cached that 840 * process's credentials at that time so we can use 841 * them now. 842 */ 843 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 844 ("unp_connect: listener without cached peercred")); 845 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 846 sizeof(unp->unp_peercred)); 847 unp->unp_flags |= UNP_HAVEPC; 848#ifdef MAC 849 SOCK_LOCK(so); 850 mac_set_socket_peer_from_socket(so, so3); 851 mac_set_socket_peer_from_socket(so3, so); 852 SOCK_UNLOCK(so); 853#endif 854 855 so2 = so3; 856 } 857 error = unp_connect2(so, so2); 858bad2: 859 UNP_UNLOCK(); 860 mtx_lock(&Giant); 861bad: 862 mtx_assert(&Giant, MA_OWNED); 863 if (vp != NULL) 864 vput(vp); 865 mtx_unlock(&Giant); 866 free(sa, M_SONAME); 867 UNP_LOCK(); 868 return (error); 869} 870 871static int 872unp_connect2(so, so2) 873 register struct socket *so; 874 register struct socket *so2; 875{ 876 register struct unpcb *unp = sotounpcb(so); 877 register struct unpcb *unp2; 878 879 UNP_LOCK_ASSERT(); 880 881 if (so2->so_type != so->so_type) 882 return (EPROTOTYPE); 883 unp2 = sotounpcb(so2); 884 unp->unp_conn = unp2; 885 switch (so->so_type) { 886 887 case SOCK_DGRAM: 888 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 889 soisconnected(so); 890 break; 891 892 case SOCK_STREAM: 893 unp2->unp_conn = unp; 894 soisconnected(so); 895 soisconnected(so2); 896 break; 897 898 default: 899 panic("unp_connect2"); 900 } 901 return (0); 902} 903 904static void 905unp_disconnect(unp) 906 struct unpcb *unp; 907{ 908 register struct unpcb *unp2 = unp->unp_conn; 909 910 UNP_LOCK_ASSERT(); 911 912 if (unp2 == NULL) 913 return; 914 unp->unp_conn = NULL; 915 switch (unp->unp_socket->so_type) { 916 917 case SOCK_DGRAM: 918 LIST_REMOVE(unp, unp_reflink); 919 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 920 break; 921 922 case SOCK_STREAM: 923 soisdisconnected(unp->unp_socket); 924 unp2->unp_conn = NULL; 925 soisdisconnected(unp2->unp_socket); 926 break; 927 } 928} 929 930#ifdef notdef 931void 932unp_abort(unp) 933 struct unpcb *unp; 934{ 935 936 unp_detach(unp); 937} 938#endif 939 940/* 941 * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed 942 * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers 943 * are safe to reference. It first scans the list of struct unpcb's to 944 * generate a pointer list, then it rescans its list one entry at a time to 945 * externalize and copyout. It checks the generation number to see if a 946 * struct unpcb has been reused, and will skip it if so. 947 */ 948static int 949unp_pcblist(SYSCTL_HANDLER_ARGS) 950{ 951 int error, i, n; 952 struct unpcb *unp, **unp_list; 953 unp_gen_t gencnt; 954 struct xunpgen *xug; 955 struct unp_head *head; 956 struct xunpcb *xu; 957 958 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 959 960 /* 961 * The process of preparing the PCB list is too time-consuming and 962 * resource-intensive to repeat twice on every request. 963 */ 964 if (req->oldptr == NULL) { 965 n = unp_count; 966 req->oldidx = 2 * (sizeof *xug) 967 + (n + n/8) * sizeof(struct xunpcb); 968 return (0); 969 } 970 971 if (req->newptr != NULL) 972 return (EPERM); 973 974 /* 975 * OK, now we're committed to doing something. 976 */ 977 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 978 UNP_LOCK(); 979 gencnt = unp_gencnt; 980 n = unp_count; 981 UNP_UNLOCK(); 982 983 xug->xug_len = sizeof *xug; 984 xug->xug_count = n; 985 xug->xug_gen = gencnt; 986 xug->xug_sogen = so_gencnt; 987 error = SYSCTL_OUT(req, xug, sizeof *xug); 988 if (error) { 989 free(xug, M_TEMP); 990 return (error); 991 } 992 993 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 994 995 UNP_LOCK(); 996 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 997 unp = LIST_NEXT(unp, unp_link)) { 998 if (unp->unp_gencnt <= gencnt) { 999 if (cr_cansee(req->td->td_ucred, 1000 unp->unp_socket->so_cred)) 1001 continue; 1002 unp_list[i++] = unp; 1003 } 1004 } 1005 UNP_UNLOCK(); 1006 n = i; /* in case we lost some during malloc */ 1007 1008 error = 0; 1009 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK); 1010 for (i = 0; i < n; i++) { 1011 unp = unp_list[i]; 1012 if (unp->unp_gencnt <= gencnt) { 1013 xu->xu_len = sizeof *xu; 1014 xu->xu_unpp = unp; 1015 /* 1016 * XXX - need more locking here to protect against 1017 * connect/disconnect races for SMP. 1018 */ 1019 if (unp->unp_addr != NULL) 1020 bcopy(unp->unp_addr, &xu->xu_addr, 1021 unp->unp_addr->sun_len); 1022 if (unp->unp_conn != NULL && 1023 unp->unp_conn->unp_addr != NULL) 1024 bcopy(unp->unp_conn->unp_addr, 1025 &xu->xu_caddr, 1026 unp->unp_conn->unp_addr->sun_len); 1027 bcopy(unp, &xu->xu_unp, sizeof *unp); 1028 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1029 error = SYSCTL_OUT(req, xu, sizeof *xu); 1030 } 1031 } 1032 free(xu, M_TEMP); 1033 if (!error) { 1034 /* 1035 * Give the user an updated idea of our state. 1036 * If the generation differs from what we told 1037 * her before, she knows that something happened 1038 * while we were processing this request, and it 1039 * might be necessary to retry. 1040 */ 1041 xug->xug_gen = unp_gencnt; 1042 xug->xug_sogen = so_gencnt; 1043 xug->xug_count = unp_count; 1044 error = SYSCTL_OUT(req, xug, sizeof *xug); 1045 } 1046 free(unp_list, M_TEMP); 1047 free(xug, M_TEMP); 1048 return (error); 1049} 1050 1051SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1052 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1053 "List of active local datagram sockets"); 1054SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1055 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1056 "List of active local stream sockets"); 1057 1058static void 1059unp_shutdown(unp) 1060 struct unpcb *unp; 1061{ 1062 struct socket *so; 1063 1064 UNP_LOCK_ASSERT(); 1065 1066 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 1067 (so = unp->unp_conn->unp_socket)) 1068 socantrcvmore(so); 1069} 1070 1071static void 1072unp_drop(unp, errno) 1073 struct unpcb *unp; 1074 int errno; 1075{ 1076 struct socket *so = unp->unp_socket; 1077 1078 UNP_LOCK_ASSERT(); 1079 1080 so->so_error = errno; 1081 unp_disconnect(unp); 1082} 1083 1084#ifdef notdef 1085void 1086unp_drain() 1087{ 1088 1089} 1090#endif 1091 1092static void 1093unp_freerights(rp, fdcount) 1094 struct file **rp; 1095 int fdcount; 1096{ 1097 int i; 1098 struct file *fp; 1099 1100 for (i = 0; i < fdcount; i++) { 1101 fp = *rp; 1102 /* 1103 * zero the pointer before calling 1104 * unp_discard since it may end up 1105 * in unp_gc().. 1106 */ 1107 *rp++ = 0; 1108 unp_discard(fp); 1109 } 1110} 1111 1112int 1113unp_externalize(control, controlp) 1114 struct mbuf *control, **controlp; 1115{ 1116 struct thread *td = curthread; /* XXX */ 1117 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1118 int i; 1119 int *fdp; 1120 struct file **rp; 1121 struct file *fp; 1122 void *data; 1123 socklen_t clen = control->m_len, datalen; 1124 int error, newfds; 1125 int f; 1126 u_int newlen; 1127 1128 error = 0; 1129 if (controlp != NULL) /* controlp == NULL => free control messages */ 1130 *controlp = NULL; 1131 1132 while (cm != NULL) { 1133 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1134 error = EINVAL; 1135 break; 1136 } 1137 1138 data = CMSG_DATA(cm); 1139 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1140 1141 if (cm->cmsg_level == SOL_SOCKET 1142 && cm->cmsg_type == SCM_RIGHTS) { 1143 newfds = datalen / sizeof(struct file *); 1144 rp = data; 1145 1146 /* If we're not outputting the descriptors free them. */ 1147 if (error || controlp == NULL) { 1148 unp_freerights(rp, newfds); 1149 goto next; 1150 } 1151 FILEDESC_LOCK(td->td_proc->p_fd); 1152 /* if the new FD's will not fit free them. */ 1153 if (!fdavail(td, newfds)) { 1154 FILEDESC_UNLOCK(td->td_proc->p_fd); 1155 error = EMSGSIZE; 1156 unp_freerights(rp, newfds); 1157 goto next; 1158 } 1159 /* 1160 * now change each pointer to an fd in the global 1161 * table to an integer that is the index to the 1162 * local fd table entry that we set up to point 1163 * to the global one we are transferring. 1164 */ 1165 newlen = newfds * sizeof(int); 1166 *controlp = sbcreatecontrol(NULL, newlen, 1167 SCM_RIGHTS, SOL_SOCKET); 1168 if (*controlp == NULL) { 1169 FILEDESC_UNLOCK(td->td_proc->p_fd); 1170 error = E2BIG; 1171 unp_freerights(rp, newfds); 1172 goto next; 1173 } 1174 1175 fdp = (int *) 1176 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1177 for (i = 0; i < newfds; i++) { 1178 if (fdalloc(td, 0, &f)) 1179 panic("unp_externalize fdalloc failed"); 1180 fp = *rp++; 1181 td->td_proc->p_fd->fd_ofiles[f] = fp; 1182 FILE_LOCK(fp); 1183 fp->f_msgcount--; 1184 FILE_UNLOCK(fp); 1185 unp_rights--; 1186 *fdp++ = f; 1187 } 1188 FILEDESC_UNLOCK(td->td_proc->p_fd); 1189 } else { /* We can just copy anything else across */ 1190 if (error || controlp == NULL) 1191 goto next; 1192 *controlp = sbcreatecontrol(NULL, datalen, 1193 cm->cmsg_type, cm->cmsg_level); 1194 if (*controlp == NULL) { 1195 error = ENOBUFS; 1196 goto next; 1197 } 1198 bcopy(data, 1199 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1200 datalen); 1201 } 1202 1203 controlp = &(*controlp)->m_next; 1204 1205next: 1206 if (CMSG_SPACE(datalen) < clen) { 1207 clen -= CMSG_SPACE(datalen); 1208 cm = (struct cmsghdr *) 1209 ((caddr_t)cm + CMSG_SPACE(datalen)); 1210 } else { 1211 clen = 0; 1212 cm = NULL; 1213 } 1214 } 1215 1216 m_freem(control); 1217 1218 return (error); 1219} 1220 1221void 1222unp_init(void) 1223{ 1224 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1225 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1226 if (unp_zone == NULL) 1227 panic("unp_init"); 1228 uma_zone_set_max(unp_zone, nmbclusters); 1229 LIST_INIT(&unp_dhead); 1230 LIST_INIT(&unp_shead); 1231 1232 UNP_LOCK_INIT(); 1233} 1234 1235static int 1236unp_internalize(controlp, td) 1237 struct mbuf **controlp; 1238 struct thread *td; 1239{ 1240 struct mbuf *control = *controlp; 1241 struct proc *p = td->td_proc; 1242 struct filedesc *fdescp = p->p_fd; 1243 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1244 struct cmsgcred *cmcred; 1245 struct file **rp; 1246 struct file *fp; 1247 struct timeval *tv; 1248 int i, fd, *fdp; 1249 void *data; 1250 socklen_t clen = control->m_len, datalen; 1251 int error, oldfds; 1252 u_int newlen; 1253 1254 error = 0; 1255 *controlp = NULL; 1256 1257 while (cm != NULL) { 1258 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1259 || cm->cmsg_len > clen) { 1260 error = EINVAL; 1261 goto out; 1262 } 1263 1264 data = CMSG_DATA(cm); 1265 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1266 1267 switch (cm->cmsg_type) { 1268 /* 1269 * Fill in credential information. 1270 */ 1271 case SCM_CREDS: 1272 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1273 SCM_CREDS, SOL_SOCKET); 1274 if (*controlp == NULL) { 1275 error = ENOBUFS; 1276 goto out; 1277 } 1278 1279 cmcred = (struct cmsgcred *) 1280 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1281 cmcred->cmcred_pid = p->p_pid; 1282 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1283 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1284 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1285 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1286 CMGROUP_MAX); 1287 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1288 cmcred->cmcred_groups[i] = 1289 td->td_ucred->cr_groups[i]; 1290 break; 1291 1292 case SCM_RIGHTS: 1293 oldfds = datalen / sizeof (int); 1294 /* 1295 * check that all the FDs passed in refer to legal files 1296 * If not, reject the entire operation. 1297 */ 1298 fdp = data; 1299 FILEDESC_LOCK(fdescp); 1300 for (i = 0; i < oldfds; i++) { 1301 fd = *fdp++; 1302 if ((unsigned)fd >= fdescp->fd_nfiles || 1303 fdescp->fd_ofiles[fd] == NULL) { 1304 FILEDESC_UNLOCK(fdescp); 1305 error = EBADF; 1306 goto out; 1307 } 1308 fp = fdescp->fd_ofiles[fd]; 1309 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1310 FILEDESC_UNLOCK(fdescp); 1311 error = EOPNOTSUPP; 1312 goto out; 1313 } 1314 1315 } 1316 /* 1317 * Now replace the integer FDs with pointers to 1318 * the associated global file table entry.. 1319 */ 1320 newlen = oldfds * sizeof(struct file *); 1321 *controlp = sbcreatecontrol(NULL, newlen, 1322 SCM_RIGHTS, SOL_SOCKET); 1323 if (*controlp == NULL) { 1324 FILEDESC_UNLOCK(fdescp); 1325 error = E2BIG; 1326 goto out; 1327 } 1328 1329 fdp = data; 1330 rp = (struct file **) 1331 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1332 for (i = 0; i < oldfds; i++) { 1333 fp = fdescp->fd_ofiles[*fdp++]; 1334 *rp++ = fp; 1335 FILE_LOCK(fp); 1336 fp->f_count++; 1337 fp->f_msgcount++; 1338 FILE_UNLOCK(fp); 1339 unp_rights++; 1340 } 1341 FILEDESC_UNLOCK(fdescp); 1342 break; 1343 1344 case SCM_TIMESTAMP: 1345 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1346 SCM_TIMESTAMP, SOL_SOCKET); 1347 if (*controlp == NULL) { 1348 error = ENOBUFS; 1349 goto out; 1350 } 1351 tv = (struct timeval *) 1352 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1353 microtime(tv); 1354 break; 1355 1356 default: 1357 error = EINVAL; 1358 goto out; 1359 } 1360 1361 controlp = &(*controlp)->m_next; 1362 1363 if (CMSG_SPACE(datalen) < clen) { 1364 clen -= CMSG_SPACE(datalen); 1365 cm = (struct cmsghdr *) 1366 ((caddr_t)cm + CMSG_SPACE(datalen)); 1367 } else { 1368 clen = 0; 1369 cm = NULL; 1370 } 1371 } 1372 1373out: 1374 m_freem(control); 1375 1376 return (error); 1377} 1378 1379static int unp_defer, unp_gcing; 1380 1381static void 1382unp_gc() 1383{ 1384 register struct file *fp, *nextfp; 1385 register struct socket *so; 1386 struct file **extra_ref, **fpp; 1387 int nunref, i; 1388 1389 UNP_LOCK_ASSERT(); 1390 1391 if (unp_gcing) 1392 return; 1393 unp_gcing = 1; 1394 unp_defer = 0; 1395 /* 1396 * before going through all this, set all FDs to 1397 * be NOT defered and NOT externally accessible 1398 */ 1399 /* 1400 * XXXRW: Acquiring a sleep lock while holding UNP 1401 * mutex cannot be a good thing. 1402 */ 1403 sx_slock(&filelist_lock); 1404 LIST_FOREACH(fp, &filehead, f_list) 1405 fp->f_gcflag &= ~(FMARK|FDEFER); 1406 do { 1407 LIST_FOREACH(fp, &filehead, f_list) { 1408 FILE_LOCK(fp); 1409 /* 1410 * If the file is not open, skip it 1411 */ 1412 if (fp->f_count == 0) { 1413 FILE_UNLOCK(fp); 1414 continue; 1415 } 1416 /* 1417 * If we already marked it as 'defer' in a 1418 * previous pass, then try process it this time 1419 * and un-mark it 1420 */ 1421 if (fp->f_gcflag & FDEFER) { 1422 fp->f_gcflag &= ~FDEFER; 1423 unp_defer--; 1424 } else { 1425 /* 1426 * if it's not defered, then check if it's 1427 * already marked.. if so skip it 1428 */ 1429 if (fp->f_gcflag & FMARK) { 1430 FILE_UNLOCK(fp); 1431 continue; 1432 } 1433 /* 1434 * If all references are from messages 1435 * in transit, then skip it. it's not 1436 * externally accessible. 1437 */ 1438 if (fp->f_count == fp->f_msgcount) { 1439 FILE_UNLOCK(fp); 1440 continue; 1441 } 1442 /* 1443 * If it got this far then it must be 1444 * externally accessible. 1445 */ 1446 fp->f_gcflag |= FMARK; 1447 } 1448 /* 1449 * either it was defered, or it is externally 1450 * accessible and not already marked so. 1451 * Now check if it is possibly one of OUR sockets. 1452 */ 1453 if (fp->f_type != DTYPE_SOCKET || 1454 (so = fp->f_data) == NULL) { 1455 FILE_UNLOCK(fp); 1456 continue; 1457 } 1458 FILE_UNLOCK(fp); 1459 if (so->so_proto->pr_domain != &localdomain || 1460 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1461 continue; 1462#ifdef notdef 1463 if (so->so_rcv.sb_flags & SB_LOCK) { 1464 /* 1465 * This is problematical; it's not clear 1466 * we need to wait for the sockbuf to be 1467 * unlocked (on a uniprocessor, at least), 1468 * and it's also not clear what to do 1469 * if sbwait returns an error due to receipt 1470 * of a signal. If sbwait does return 1471 * an error, we'll go into an infinite 1472 * loop. Delete all of this for now. 1473 */ 1474 (void) sbwait(&so->so_rcv); 1475 goto restart; 1476 } 1477#endif 1478 /* 1479 * So, Ok, it's one of our sockets and it IS externally 1480 * accessible (or was defered). Now we look 1481 * to see if we hold any file descriptors in its 1482 * message buffers. Follow those links and mark them 1483 * as accessible too. 1484 */ 1485 unp_scan(so->so_rcv.sb_mb, unp_mark); 1486 } 1487 } while (unp_defer); 1488 sx_sunlock(&filelist_lock); 1489 /* 1490 * We grab an extra reference to each of the file table entries 1491 * that are not otherwise accessible and then free the rights 1492 * that are stored in messages on them. 1493 * 1494 * The bug in the orginal code is a little tricky, so I'll describe 1495 * what's wrong with it here. 1496 * 1497 * It is incorrect to simply unp_discard each entry for f_msgcount 1498 * times -- consider the case of sockets A and B that contain 1499 * references to each other. On a last close of some other socket, 1500 * we trigger a gc since the number of outstanding rights (unp_rights) 1501 * is non-zero. If during the sweep phase the gc code un_discards, 1502 * we end up doing a (full) closef on the descriptor. A closef on A 1503 * results in the following chain. Closef calls soo_close, which 1504 * calls soclose. Soclose calls first (through the switch 1505 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1506 * returns because the previous instance had set unp_gcing, and 1507 * we return all the way back to soclose, which marks the socket 1508 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1509 * to free up the rights that are queued in messages on the socket A, 1510 * i.e., the reference on B. The sorflush calls via the dom_dispose 1511 * switch unp_dispose, which unp_scans with unp_discard. This second 1512 * instance of unp_discard just calls closef on B. 1513 * 1514 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1515 * which results in another closef on A. Unfortunately, A is already 1516 * being closed, and the descriptor has already been marked with 1517 * SS_NOFDREF, and soclose panics at this point. 1518 * 1519 * Here, we first take an extra reference to each inaccessible 1520 * descriptor. Then, we call sorflush ourself, since we know 1521 * it is a Unix domain socket anyhow. After we destroy all the 1522 * rights carried in messages, we do a last closef to get rid 1523 * of our extra reference. This is the last close, and the 1524 * unp_detach etc will shut down the socket. 1525 * 1526 * 91/09/19, bsy@cs.cmu.edu 1527 */ 1528 extra_ref = malloc(nfiles * sizeof(struct file *), M_TEMP, M_WAITOK); 1529 sx_slock(&filelist_lock); 1530 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 1531 fp != NULL; fp = nextfp) { 1532 nextfp = LIST_NEXT(fp, f_list); 1533 FILE_LOCK(fp); 1534 /* 1535 * If it's not open, skip it 1536 */ 1537 if (fp->f_count == 0) { 1538 FILE_UNLOCK(fp); 1539 continue; 1540 } 1541 /* 1542 * If all refs are from msgs, and it's not marked accessible 1543 * then it must be referenced from some unreachable cycle 1544 * of (shut-down) FDs, so include it in our 1545 * list of FDs to remove 1546 */ 1547 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) { 1548 *fpp++ = fp; 1549 nunref++; 1550 fp->f_count++; 1551 } 1552 FILE_UNLOCK(fp); 1553 } 1554 sx_sunlock(&filelist_lock); 1555 /* 1556 * for each FD on our hit list, do the following two things 1557 */ 1558 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1559 struct file *tfp = *fpp; 1560 FILE_LOCK(tfp); 1561 if (tfp->f_type == DTYPE_SOCKET && 1562 tfp->f_data != NULL) { 1563 FILE_UNLOCK(tfp); 1564 sorflush(tfp->f_data); 1565 } else { 1566 FILE_UNLOCK(tfp); 1567 } 1568 } 1569 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1570 closef(*fpp, (struct thread *) NULL); 1571 free(extra_ref, M_TEMP); 1572 unp_gcing = 0; 1573} 1574 1575void 1576unp_dispose(m) 1577 struct mbuf *m; 1578{ 1579 1580 if (m) 1581 unp_scan(m, unp_discard); 1582} 1583 1584static int 1585unp_listen(unp, td) 1586 struct unpcb *unp; 1587 struct thread *td; 1588{ 1589 UNP_LOCK_ASSERT(); 1590 1591 /* 1592 * XXXRW: Why populate the local peer cred with our own credential? 1593 */ 1594 cru2x(td->td_ucred, &unp->unp_peercred); 1595 unp->unp_flags |= UNP_HAVEPCCACHED; 1596 return (0); 1597} 1598 1599static void 1600unp_scan(m0, op) 1601 register struct mbuf *m0; 1602 void (*op)(struct file *); 1603{ 1604 struct mbuf *m; 1605 struct file **rp; 1606 struct cmsghdr *cm; 1607 void *data; 1608 int i; 1609 socklen_t clen, datalen; 1610 int qfds; 1611 1612 while (m0 != NULL) { 1613 for (m = m0; m; m = m->m_next) { 1614 if (m->m_type != MT_CONTROL) 1615 continue; 1616 1617 cm = mtod(m, struct cmsghdr *); 1618 clen = m->m_len; 1619 1620 while (cm != NULL) { 1621 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 1622 break; 1623 1624 data = CMSG_DATA(cm); 1625 datalen = (caddr_t)cm + cm->cmsg_len 1626 - (caddr_t)data; 1627 1628 if (cm->cmsg_level == SOL_SOCKET && 1629 cm->cmsg_type == SCM_RIGHTS) { 1630 qfds = datalen / sizeof (struct file *); 1631 rp = data; 1632 for (i = 0; i < qfds; i++) 1633 (*op)(*rp++); 1634 } 1635 1636 if (CMSG_SPACE(datalen) < clen) { 1637 clen -= CMSG_SPACE(datalen); 1638 cm = (struct cmsghdr *) 1639 ((caddr_t)cm + CMSG_SPACE(datalen)); 1640 } else { 1641 clen = 0; 1642 cm = NULL; 1643 } 1644 } 1645 } 1646 m0 = m0->m_act; 1647 } 1648} 1649 1650static void 1651unp_mark(fp) 1652 struct file *fp; 1653{ 1654 if (fp->f_gcflag & FMARK) 1655 return; 1656 unp_defer++; 1657 fp->f_gcflag |= (FMARK|FDEFER); 1658} 1659 1660static void 1661unp_discard(fp) 1662 struct file *fp; 1663{ 1664 FILE_LOCK(fp); 1665 fp->f_msgcount--; 1666 unp_rights--; 1667 FILE_UNLOCK(fp); 1668 (void) closef(fp, (struct thread *)NULL); 1669}
| 382 error = EPIPE; 383 break; 384 } 385 if (unp->unp_conn == NULL) 386 panic("uipc_send connected but no connection?"); 387 so2 = unp->unp_conn->unp_socket; 388 /* 389 * Send to paired receive port, and then reduce 390 * send buffer hiwater marks to maintain backpressure. 391 * Wake up readers. 392 */ 393 if (control != NULL) { 394 if (sbappendcontrol(&so2->so_rcv, m, control)) 395 control = NULL; 396 } else { 397 sbappend(&so2->so_rcv, m); 398 } 399 so->so_snd.sb_mbmax -= 400 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; 401 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; 402 newhiwat = so->so_snd.sb_hiwat - 403 (so2->so_rcv.sb_cc - unp->unp_conn->unp_cc); 404 (void)chgsbsize(so->so_cred->cr_uidinfo, &so->so_snd.sb_hiwat, 405 newhiwat, RLIM_INFINITY); 406 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; 407 sorwakeup(so2); 408 m = NULL; 409 break; 410 411 default: 412 panic("uipc_send unknown socktype"); 413 } 414 415 /* 416 * SEND_EOF is equivalent to a SEND followed by 417 * a SHUTDOWN. 418 */ 419 if (flags & PRUS_EOF) { 420 socantsendmore(so); 421 unp_shutdown(unp); 422 } 423 UNP_UNLOCK(); 424 425 if (control != NULL && error != 0) 426 unp_dispose(control); 427 428release: 429 if (control != NULL) 430 m_freem(control); 431 if (m != NULL) 432 m_freem(m); 433 return (error); 434} 435 436static int 437uipc_sense(struct socket *so, struct stat *sb) 438{ 439 struct unpcb *unp = sotounpcb(so); 440 struct socket *so2; 441 442 if (unp == NULL) 443 return (EINVAL); 444 UNP_LOCK(); 445 sb->st_blksize = so->so_snd.sb_hiwat; 446 if (so->so_type == SOCK_STREAM && unp->unp_conn != NULL) { 447 so2 = unp->unp_conn->unp_socket; 448 sb->st_blksize += so2->so_rcv.sb_cc; 449 } 450 sb->st_dev = NOUDEV; 451 if (unp->unp_ino == 0) 452 unp->unp_ino = (++unp_ino == 0) ? ++unp_ino : unp_ino; 453 sb->st_ino = unp->unp_ino; 454 UNP_UNLOCK(); 455 return (0); 456} 457 458static int 459uipc_shutdown(struct socket *so) 460{ 461 struct unpcb *unp = sotounpcb(so); 462 463 if (unp == NULL) 464 return (EINVAL); 465 UNP_LOCK(); 466 socantsendmore(so); 467 unp_shutdown(unp); 468 UNP_UNLOCK(); 469 return (0); 470} 471 472static int 473uipc_sockaddr(struct socket *so, struct sockaddr **nam) 474{ 475 struct unpcb *unp = sotounpcb(so); 476 const struct sockaddr *sa; 477 478 if (unp == NULL) 479 return (EINVAL); 480 *nam = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 481 UNP_LOCK(); 482 if (unp->unp_addr != NULL) 483 sa = (struct sockaddr *) unp->unp_addr; 484 else 485 sa = &sun_noname; 486 bcopy(sa, *nam, sa->sa_len); 487 UNP_UNLOCK(); 488 return (0); 489} 490 491struct pr_usrreqs uipc_usrreqs = { 492 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 493 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 494 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 495 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 496 sosend, soreceive, sopoll, pru_sosetlabel_null 497}; 498 499int 500uipc_ctloutput(so, sopt) 501 struct socket *so; 502 struct sockopt *sopt; 503{ 504 struct unpcb *unp = sotounpcb(so); 505 struct xucred xu; 506 int error; 507 508 switch (sopt->sopt_dir) { 509 case SOPT_GET: 510 switch (sopt->sopt_name) { 511 case LOCAL_PEERCRED: 512 error = 0; 513 UNP_LOCK(); 514 if (unp->unp_flags & UNP_HAVEPC) 515 xu = unp->unp_peercred; 516 else { 517 if (so->so_type == SOCK_STREAM) 518 error = ENOTCONN; 519 else 520 error = EINVAL; 521 } 522 UNP_UNLOCK(); 523 if (error == 0) 524 error = sooptcopyout(sopt, &xu, sizeof(xu)); 525 break; 526 default: 527 error = EOPNOTSUPP; 528 break; 529 } 530 break; 531 case SOPT_SET: 532 default: 533 error = EOPNOTSUPP; 534 break; 535 } 536 return (error); 537} 538 539/* 540 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 541 * for stream sockets, although the total for sender and receiver is 542 * actually only PIPSIZ. 543 * Datagram sockets really use the sendspace as the maximum datagram size, 544 * and don't really want to reserve the sendspace. Their recvspace should 545 * be large enough for at least one max-size datagram plus address. 546 */ 547#ifndef PIPSIZ 548#define PIPSIZ 8192 549#endif 550static u_long unpst_sendspace = PIPSIZ; 551static u_long unpst_recvspace = PIPSIZ; 552static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 553static u_long unpdg_recvspace = 4*1024; 554 555static int unp_rights; /* file descriptors in flight */ 556 557SYSCTL_DECL(_net_local_stream); 558SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 559 &unpst_sendspace, 0, ""); 560SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 561 &unpst_recvspace, 0, ""); 562SYSCTL_DECL(_net_local_dgram); 563SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 564 &unpdg_sendspace, 0, ""); 565SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 566 &unpdg_recvspace, 0, ""); 567SYSCTL_DECL(_net_local); 568SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 569 570static int 571unp_attach(so) 572 struct socket *so; 573{ 574 register struct unpcb *unp; 575 int error; 576 577 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 578 switch (so->so_type) { 579 580 case SOCK_STREAM: 581 error = soreserve(so, unpst_sendspace, unpst_recvspace); 582 break; 583 584 case SOCK_DGRAM: 585 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 586 break; 587 588 default: 589 panic("unp_attach"); 590 } 591 if (error) 592 return (error); 593 } 594 unp = uma_zalloc(unp_zone, M_WAITOK); 595 if (unp == NULL) 596 return (ENOBUFS); 597 bzero(unp, sizeof *unp); 598 LIST_INIT(&unp->unp_refs); 599 unp->unp_socket = so; 600 601 UNP_LOCK(); 602 unp->unp_gencnt = ++unp_gencnt; 603 unp_count++; 604 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 605 : &unp_shead, unp, unp_link); 606 UNP_UNLOCK(); 607 608 so->so_pcb = unp; 609 return (0); 610} 611 612static void 613unp_detach(unp) 614 register struct unpcb *unp; 615{ 616 struct vnode *vp; 617 618 UNP_LOCK_ASSERT(); 619 620 LIST_REMOVE(unp, unp_link); 621 unp->unp_gencnt = ++unp_gencnt; 622 --unp_count; 623 if ((vp = unp->unp_vnode) != NULL) { 624 /* 625 * XXXRW: should v_socket be frobbed only while holding 626 * Giant? 627 */ 628 unp->unp_vnode->v_socket = NULL; 629 unp->unp_vnode = NULL; 630 } 631 if (unp->unp_conn != NULL) 632 unp_disconnect(unp); 633 while (!LIST_EMPTY(&unp->unp_refs)) { 634 struct unpcb *ref = LIST_FIRST(&unp->unp_refs); 635 unp_drop(ref, ECONNRESET); 636 } 637 soisdisconnected(unp->unp_socket); 638 unp->unp_socket->so_pcb = NULL; 639 if (unp_rights) { 640 /* 641 * Normally the receive buffer is flushed later, 642 * in sofree, but if our receive buffer holds references 643 * to descriptors that are now garbage, we will dispose 644 * of those descriptor references after the garbage collector 645 * gets them (resulting in a "panic: closef: count < 0"). 646 */ 647 sorflush(unp->unp_socket); 648 unp_gc(); 649 } 650 if (unp->unp_addr != NULL) 651 FREE(unp->unp_addr, M_SONAME); 652 UNP_UNLOCK(); 653 uma_zfree(unp_zone, unp); 654 if (vp) { 655 mtx_lock(&Giant); 656 vrele(vp); 657 mtx_unlock(&Giant); 658 } 659} 660 661static int 662unp_bind(unp, nam, td) 663 struct unpcb *unp; 664 struct sockaddr *nam; 665 struct thread *td; 666{ 667 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 668 struct vnode *vp; 669 struct mount *mp; 670 struct vattr vattr; 671 int error, namelen; 672 struct nameidata nd; 673 char *buf; 674 675 /* 676 * XXXRW: This test-and-set of unp_vnode is non-atomic; the 677 * unlocked read here is fine, but the value of unp_vnode needs 678 * to be tested again after we do all the lookups to see if the 679 * pcb is still unbound? 680 */ 681 if (unp->unp_vnode != NULL) 682 return (EINVAL); 683 684 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 685 if (namelen <= 0) 686 return (EINVAL); 687 688 buf = malloc(namelen + 1, M_TEMP, M_WAITOK); 689 strlcpy(buf, soun->sun_path, namelen + 1); 690 691 mtx_lock(&Giant); 692restart: 693 mtx_assert(&Giant, MA_OWNED); 694 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT | SAVENAME, UIO_SYSSPACE, 695 buf, td); 696/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 697 error = namei(&nd); 698 if (error) 699 goto done; 700 vp = nd.ni_vp; 701 if (vp != NULL || vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 702 NDFREE(&nd, NDF_ONLY_PNBUF); 703 if (nd.ni_dvp == vp) 704 vrele(nd.ni_dvp); 705 else 706 vput(nd.ni_dvp); 707 if (vp != NULL) { 708 vrele(vp); 709 error = EADDRINUSE; 710 goto done; 711 } 712 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 713 if (error) 714 goto done; 715 goto restart; 716 } 717 VATTR_NULL(&vattr); 718 vattr.va_type = VSOCK; 719 vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); 720#ifdef MAC 721 error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 722 &vattr); 723#endif 724 if (error == 0) { 725 VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); 726 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 727 } 728 NDFREE(&nd, NDF_ONLY_PNBUF); 729 vput(nd.ni_dvp); 730 if (error) 731 goto done; 732 vp = nd.ni_vp; 733 ASSERT_VOP_LOCKED(vp, "unp_bind"); 734 soun = (struct sockaddr_un *)sodupsockaddr(nam, M_WAITOK); 735 UNP_LOCK(); 736 vp->v_socket = unp->unp_socket; 737 unp->unp_vnode = vp; 738 unp->unp_addr = soun; 739 UNP_UNLOCK(); 740 VOP_UNLOCK(vp, 0, td); 741 vn_finished_write(mp); 742done: 743 mtx_unlock(&Giant); 744 free(buf, M_TEMP); 745 return (error); 746} 747 748static int 749unp_connect(so, nam, td) 750 struct socket *so; 751 struct sockaddr *nam; 752 struct thread *td; 753{ 754 register struct sockaddr_un *soun = (struct sockaddr_un *)nam; 755 register struct vnode *vp; 756 register struct socket *so2, *so3; 757 struct unpcb *unp = sotounpcb(so); 758 struct unpcb *unp2, *unp3; 759 int error, len; 760 struct nameidata nd; 761 char buf[SOCK_MAXADDRLEN]; 762 struct sockaddr *sa; 763 764 UNP_LOCK_ASSERT(); 765 766 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 767 if (len <= 0) 768 return (EINVAL); 769 strlcpy(buf, soun->sun_path, len + 1); 770 UNP_UNLOCK(); 771 sa = malloc(sizeof(struct sockaddr_un), M_SONAME, M_WAITOK); 772 mtx_lock(&Giant); 773 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, td); 774 error = namei(&nd); 775 if (error) 776 vp = NULL; 777 else 778 vp = nd.ni_vp; 779 ASSERT_VOP_LOCKED(vp, "unp_connect"); 780 NDFREE(&nd, NDF_ONLY_PNBUF); 781 if (error) 782 goto bad; 783 784 if (vp->v_type != VSOCK) { 785 error = ENOTSOCK; 786 goto bad; 787 } 788 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 789 if (error) 790 goto bad; 791 so2 = vp->v_socket; 792 if (so2 == NULL) { 793 error = ECONNREFUSED; 794 goto bad; 795 } 796 if (so->so_type != so2->so_type) { 797 error = EPROTOTYPE; 798 goto bad; 799 } 800 mtx_unlock(&Giant); 801 UNP_LOCK(); 802 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 803 if (so2->so_options & SO_ACCEPTCONN) { 804 /* 805 * NB: drop locks here so unp_attach is entered 806 * w/o locks; this avoids a recursive lock 807 * of the head and holding sleep locks across 808 * a (potentially) blocking malloc. 809 */ 810 UNP_UNLOCK(); 811 so3 = sonewconn(so2, 0); 812 UNP_LOCK(); 813 } else 814 so3 = NULL; 815 if (so3 == NULL) { 816 error = ECONNREFUSED; 817 goto bad2; 818 } 819 unp = sotounpcb(so); 820 unp2 = sotounpcb(so2); 821 unp3 = sotounpcb(so3); 822 if (unp2->unp_addr != NULL) { 823 bcopy(unp2->unp_addr, sa, unp2->unp_addr->sun_len); 824 unp3->unp_addr = (struct sockaddr_un *) sa; 825 sa = NULL; 826 } 827 /* 828 * unp_peercred management: 829 * 830 * The connecter's (client's) credentials are copied 831 * from its process structure at the time of connect() 832 * (which is now). 833 */ 834 cru2x(td->td_ucred, &unp3->unp_peercred); 835 unp3->unp_flags |= UNP_HAVEPC; 836 /* 837 * The receiver's (server's) credentials are copied 838 * from the unp_peercred member of socket on which the 839 * former called listen(); unp_listen() cached that 840 * process's credentials at that time so we can use 841 * them now. 842 */ 843 KASSERT(unp2->unp_flags & UNP_HAVEPCCACHED, 844 ("unp_connect: listener without cached peercred")); 845 memcpy(&unp->unp_peercred, &unp2->unp_peercred, 846 sizeof(unp->unp_peercred)); 847 unp->unp_flags |= UNP_HAVEPC; 848#ifdef MAC 849 SOCK_LOCK(so); 850 mac_set_socket_peer_from_socket(so, so3); 851 mac_set_socket_peer_from_socket(so3, so); 852 SOCK_UNLOCK(so); 853#endif 854 855 so2 = so3; 856 } 857 error = unp_connect2(so, so2); 858bad2: 859 UNP_UNLOCK(); 860 mtx_lock(&Giant); 861bad: 862 mtx_assert(&Giant, MA_OWNED); 863 if (vp != NULL) 864 vput(vp); 865 mtx_unlock(&Giant); 866 free(sa, M_SONAME); 867 UNP_LOCK(); 868 return (error); 869} 870 871static int 872unp_connect2(so, so2) 873 register struct socket *so; 874 register struct socket *so2; 875{ 876 register struct unpcb *unp = sotounpcb(so); 877 register struct unpcb *unp2; 878 879 UNP_LOCK_ASSERT(); 880 881 if (so2->so_type != so->so_type) 882 return (EPROTOTYPE); 883 unp2 = sotounpcb(so2); 884 unp->unp_conn = unp2; 885 switch (so->so_type) { 886 887 case SOCK_DGRAM: 888 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 889 soisconnected(so); 890 break; 891 892 case SOCK_STREAM: 893 unp2->unp_conn = unp; 894 soisconnected(so); 895 soisconnected(so2); 896 break; 897 898 default: 899 panic("unp_connect2"); 900 } 901 return (0); 902} 903 904static void 905unp_disconnect(unp) 906 struct unpcb *unp; 907{ 908 register struct unpcb *unp2 = unp->unp_conn; 909 910 UNP_LOCK_ASSERT(); 911 912 if (unp2 == NULL) 913 return; 914 unp->unp_conn = NULL; 915 switch (unp->unp_socket->so_type) { 916 917 case SOCK_DGRAM: 918 LIST_REMOVE(unp, unp_reflink); 919 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 920 break; 921 922 case SOCK_STREAM: 923 soisdisconnected(unp->unp_socket); 924 unp2->unp_conn = NULL; 925 soisdisconnected(unp2->unp_socket); 926 break; 927 } 928} 929 930#ifdef notdef 931void 932unp_abort(unp) 933 struct unpcb *unp; 934{ 935 936 unp_detach(unp); 937} 938#endif 939 940/* 941 * unp_pcblist() assumes that UNIX domain socket memory is never reclaimed 942 * by the zone (UMA_ZONE_NOFREE), and as such potentially stale pointers 943 * are safe to reference. It first scans the list of struct unpcb's to 944 * generate a pointer list, then it rescans its list one entry at a time to 945 * externalize and copyout. It checks the generation number to see if a 946 * struct unpcb has been reused, and will skip it if so. 947 */ 948static int 949unp_pcblist(SYSCTL_HANDLER_ARGS) 950{ 951 int error, i, n; 952 struct unpcb *unp, **unp_list; 953 unp_gen_t gencnt; 954 struct xunpgen *xug; 955 struct unp_head *head; 956 struct xunpcb *xu; 957 958 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 959 960 /* 961 * The process of preparing the PCB list is too time-consuming and 962 * resource-intensive to repeat twice on every request. 963 */ 964 if (req->oldptr == NULL) { 965 n = unp_count; 966 req->oldidx = 2 * (sizeof *xug) 967 + (n + n/8) * sizeof(struct xunpcb); 968 return (0); 969 } 970 971 if (req->newptr != NULL) 972 return (EPERM); 973 974 /* 975 * OK, now we're committed to doing something. 976 */ 977 xug = malloc(sizeof(*xug), M_TEMP, M_WAITOK); 978 UNP_LOCK(); 979 gencnt = unp_gencnt; 980 n = unp_count; 981 UNP_UNLOCK(); 982 983 xug->xug_len = sizeof *xug; 984 xug->xug_count = n; 985 xug->xug_gen = gencnt; 986 xug->xug_sogen = so_gencnt; 987 error = SYSCTL_OUT(req, xug, sizeof *xug); 988 if (error) { 989 free(xug, M_TEMP); 990 return (error); 991 } 992 993 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 994 995 UNP_LOCK(); 996 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 997 unp = LIST_NEXT(unp, unp_link)) { 998 if (unp->unp_gencnt <= gencnt) { 999 if (cr_cansee(req->td->td_ucred, 1000 unp->unp_socket->so_cred)) 1001 continue; 1002 unp_list[i++] = unp; 1003 } 1004 } 1005 UNP_UNLOCK(); 1006 n = i; /* in case we lost some during malloc */ 1007 1008 error = 0; 1009 xu = malloc(sizeof(*xu), M_TEMP, M_WAITOK); 1010 for (i = 0; i < n; i++) { 1011 unp = unp_list[i]; 1012 if (unp->unp_gencnt <= gencnt) { 1013 xu->xu_len = sizeof *xu; 1014 xu->xu_unpp = unp; 1015 /* 1016 * XXX - need more locking here to protect against 1017 * connect/disconnect races for SMP. 1018 */ 1019 if (unp->unp_addr != NULL) 1020 bcopy(unp->unp_addr, &xu->xu_addr, 1021 unp->unp_addr->sun_len); 1022 if (unp->unp_conn != NULL && 1023 unp->unp_conn->unp_addr != NULL) 1024 bcopy(unp->unp_conn->unp_addr, 1025 &xu->xu_caddr, 1026 unp->unp_conn->unp_addr->sun_len); 1027 bcopy(unp, &xu->xu_unp, sizeof *unp); 1028 sotoxsocket(unp->unp_socket, &xu->xu_socket); 1029 error = SYSCTL_OUT(req, xu, sizeof *xu); 1030 } 1031 } 1032 free(xu, M_TEMP); 1033 if (!error) { 1034 /* 1035 * Give the user an updated idea of our state. 1036 * If the generation differs from what we told 1037 * her before, she knows that something happened 1038 * while we were processing this request, and it 1039 * might be necessary to retry. 1040 */ 1041 xug->xug_gen = unp_gencnt; 1042 xug->xug_sogen = so_gencnt; 1043 xug->xug_count = unp_count; 1044 error = SYSCTL_OUT(req, xug, sizeof *xug); 1045 } 1046 free(unp_list, M_TEMP); 1047 free(xug, M_TEMP); 1048 return (error); 1049} 1050 1051SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 1052 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 1053 "List of active local datagram sockets"); 1054SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 1055 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 1056 "List of active local stream sockets"); 1057 1058static void 1059unp_shutdown(unp) 1060 struct unpcb *unp; 1061{ 1062 struct socket *so; 1063 1064 UNP_LOCK_ASSERT(); 1065 1066 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 1067 (so = unp->unp_conn->unp_socket)) 1068 socantrcvmore(so); 1069} 1070 1071static void 1072unp_drop(unp, errno) 1073 struct unpcb *unp; 1074 int errno; 1075{ 1076 struct socket *so = unp->unp_socket; 1077 1078 UNP_LOCK_ASSERT(); 1079 1080 so->so_error = errno; 1081 unp_disconnect(unp); 1082} 1083 1084#ifdef notdef 1085void 1086unp_drain() 1087{ 1088 1089} 1090#endif 1091 1092static void 1093unp_freerights(rp, fdcount) 1094 struct file **rp; 1095 int fdcount; 1096{ 1097 int i; 1098 struct file *fp; 1099 1100 for (i = 0; i < fdcount; i++) { 1101 fp = *rp; 1102 /* 1103 * zero the pointer before calling 1104 * unp_discard since it may end up 1105 * in unp_gc().. 1106 */ 1107 *rp++ = 0; 1108 unp_discard(fp); 1109 } 1110} 1111 1112int 1113unp_externalize(control, controlp) 1114 struct mbuf *control, **controlp; 1115{ 1116 struct thread *td = curthread; /* XXX */ 1117 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1118 int i; 1119 int *fdp; 1120 struct file **rp; 1121 struct file *fp; 1122 void *data; 1123 socklen_t clen = control->m_len, datalen; 1124 int error, newfds; 1125 int f; 1126 u_int newlen; 1127 1128 error = 0; 1129 if (controlp != NULL) /* controlp == NULL => free control messages */ 1130 *controlp = NULL; 1131 1132 while (cm != NULL) { 1133 if (sizeof(*cm) > clen || cm->cmsg_len > clen) { 1134 error = EINVAL; 1135 break; 1136 } 1137 1138 data = CMSG_DATA(cm); 1139 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1140 1141 if (cm->cmsg_level == SOL_SOCKET 1142 && cm->cmsg_type == SCM_RIGHTS) { 1143 newfds = datalen / sizeof(struct file *); 1144 rp = data; 1145 1146 /* If we're not outputting the descriptors free them. */ 1147 if (error || controlp == NULL) { 1148 unp_freerights(rp, newfds); 1149 goto next; 1150 } 1151 FILEDESC_LOCK(td->td_proc->p_fd); 1152 /* if the new FD's will not fit free them. */ 1153 if (!fdavail(td, newfds)) { 1154 FILEDESC_UNLOCK(td->td_proc->p_fd); 1155 error = EMSGSIZE; 1156 unp_freerights(rp, newfds); 1157 goto next; 1158 } 1159 /* 1160 * now change each pointer to an fd in the global 1161 * table to an integer that is the index to the 1162 * local fd table entry that we set up to point 1163 * to the global one we are transferring. 1164 */ 1165 newlen = newfds * sizeof(int); 1166 *controlp = sbcreatecontrol(NULL, newlen, 1167 SCM_RIGHTS, SOL_SOCKET); 1168 if (*controlp == NULL) { 1169 FILEDESC_UNLOCK(td->td_proc->p_fd); 1170 error = E2BIG; 1171 unp_freerights(rp, newfds); 1172 goto next; 1173 } 1174 1175 fdp = (int *) 1176 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1177 for (i = 0; i < newfds; i++) { 1178 if (fdalloc(td, 0, &f)) 1179 panic("unp_externalize fdalloc failed"); 1180 fp = *rp++; 1181 td->td_proc->p_fd->fd_ofiles[f] = fp; 1182 FILE_LOCK(fp); 1183 fp->f_msgcount--; 1184 FILE_UNLOCK(fp); 1185 unp_rights--; 1186 *fdp++ = f; 1187 } 1188 FILEDESC_UNLOCK(td->td_proc->p_fd); 1189 } else { /* We can just copy anything else across */ 1190 if (error || controlp == NULL) 1191 goto next; 1192 *controlp = sbcreatecontrol(NULL, datalen, 1193 cm->cmsg_type, cm->cmsg_level); 1194 if (*controlp == NULL) { 1195 error = ENOBUFS; 1196 goto next; 1197 } 1198 bcopy(data, 1199 CMSG_DATA(mtod(*controlp, struct cmsghdr *)), 1200 datalen); 1201 } 1202 1203 controlp = &(*controlp)->m_next; 1204 1205next: 1206 if (CMSG_SPACE(datalen) < clen) { 1207 clen -= CMSG_SPACE(datalen); 1208 cm = (struct cmsghdr *) 1209 ((caddr_t)cm + CMSG_SPACE(datalen)); 1210 } else { 1211 clen = 0; 1212 cm = NULL; 1213 } 1214 } 1215 1216 m_freem(control); 1217 1218 return (error); 1219} 1220 1221void 1222unp_init(void) 1223{ 1224 unp_zone = uma_zcreate("unpcb", sizeof(struct unpcb), NULL, NULL, 1225 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); 1226 if (unp_zone == NULL) 1227 panic("unp_init"); 1228 uma_zone_set_max(unp_zone, nmbclusters); 1229 LIST_INIT(&unp_dhead); 1230 LIST_INIT(&unp_shead); 1231 1232 UNP_LOCK_INIT(); 1233} 1234 1235static int 1236unp_internalize(controlp, td) 1237 struct mbuf **controlp; 1238 struct thread *td; 1239{ 1240 struct mbuf *control = *controlp; 1241 struct proc *p = td->td_proc; 1242 struct filedesc *fdescp = p->p_fd; 1243 struct cmsghdr *cm = mtod(control, struct cmsghdr *); 1244 struct cmsgcred *cmcred; 1245 struct file **rp; 1246 struct file *fp; 1247 struct timeval *tv; 1248 int i, fd, *fdp; 1249 void *data; 1250 socklen_t clen = control->m_len, datalen; 1251 int error, oldfds; 1252 u_int newlen; 1253 1254 error = 0; 1255 *controlp = NULL; 1256 1257 while (cm != NULL) { 1258 if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET 1259 || cm->cmsg_len > clen) { 1260 error = EINVAL; 1261 goto out; 1262 } 1263 1264 data = CMSG_DATA(cm); 1265 datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; 1266 1267 switch (cm->cmsg_type) { 1268 /* 1269 * Fill in credential information. 1270 */ 1271 case SCM_CREDS: 1272 *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), 1273 SCM_CREDS, SOL_SOCKET); 1274 if (*controlp == NULL) { 1275 error = ENOBUFS; 1276 goto out; 1277 } 1278 1279 cmcred = (struct cmsgcred *) 1280 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1281 cmcred->cmcred_pid = p->p_pid; 1282 cmcred->cmcred_uid = td->td_ucred->cr_ruid; 1283 cmcred->cmcred_gid = td->td_ucred->cr_rgid; 1284 cmcred->cmcred_euid = td->td_ucred->cr_uid; 1285 cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, 1286 CMGROUP_MAX); 1287 for (i = 0; i < cmcred->cmcred_ngroups; i++) 1288 cmcred->cmcred_groups[i] = 1289 td->td_ucred->cr_groups[i]; 1290 break; 1291 1292 case SCM_RIGHTS: 1293 oldfds = datalen / sizeof (int); 1294 /* 1295 * check that all the FDs passed in refer to legal files 1296 * If not, reject the entire operation. 1297 */ 1298 fdp = data; 1299 FILEDESC_LOCK(fdescp); 1300 for (i = 0; i < oldfds; i++) { 1301 fd = *fdp++; 1302 if ((unsigned)fd >= fdescp->fd_nfiles || 1303 fdescp->fd_ofiles[fd] == NULL) { 1304 FILEDESC_UNLOCK(fdescp); 1305 error = EBADF; 1306 goto out; 1307 } 1308 fp = fdescp->fd_ofiles[fd]; 1309 if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { 1310 FILEDESC_UNLOCK(fdescp); 1311 error = EOPNOTSUPP; 1312 goto out; 1313 } 1314 1315 } 1316 /* 1317 * Now replace the integer FDs with pointers to 1318 * the associated global file table entry.. 1319 */ 1320 newlen = oldfds * sizeof(struct file *); 1321 *controlp = sbcreatecontrol(NULL, newlen, 1322 SCM_RIGHTS, SOL_SOCKET); 1323 if (*controlp == NULL) { 1324 FILEDESC_UNLOCK(fdescp); 1325 error = E2BIG; 1326 goto out; 1327 } 1328 1329 fdp = data; 1330 rp = (struct file **) 1331 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1332 for (i = 0; i < oldfds; i++) { 1333 fp = fdescp->fd_ofiles[*fdp++]; 1334 *rp++ = fp; 1335 FILE_LOCK(fp); 1336 fp->f_count++; 1337 fp->f_msgcount++; 1338 FILE_UNLOCK(fp); 1339 unp_rights++; 1340 } 1341 FILEDESC_UNLOCK(fdescp); 1342 break; 1343 1344 case SCM_TIMESTAMP: 1345 *controlp = sbcreatecontrol(NULL, sizeof(*tv), 1346 SCM_TIMESTAMP, SOL_SOCKET); 1347 if (*controlp == NULL) { 1348 error = ENOBUFS; 1349 goto out; 1350 } 1351 tv = (struct timeval *) 1352 CMSG_DATA(mtod(*controlp, struct cmsghdr *)); 1353 microtime(tv); 1354 break; 1355 1356 default: 1357 error = EINVAL; 1358 goto out; 1359 } 1360 1361 controlp = &(*controlp)->m_next; 1362 1363 if (CMSG_SPACE(datalen) < clen) { 1364 clen -= CMSG_SPACE(datalen); 1365 cm = (struct cmsghdr *) 1366 ((caddr_t)cm + CMSG_SPACE(datalen)); 1367 } else { 1368 clen = 0; 1369 cm = NULL; 1370 } 1371 } 1372 1373out: 1374 m_freem(control); 1375 1376 return (error); 1377} 1378 1379static int unp_defer, unp_gcing; 1380 1381static void 1382unp_gc() 1383{ 1384 register struct file *fp, *nextfp; 1385 register struct socket *so; 1386 struct file **extra_ref, **fpp; 1387 int nunref, i; 1388 1389 UNP_LOCK_ASSERT(); 1390 1391 if (unp_gcing) 1392 return; 1393 unp_gcing = 1; 1394 unp_defer = 0; 1395 /* 1396 * before going through all this, set all FDs to 1397 * be NOT defered and NOT externally accessible 1398 */ 1399 /* 1400 * XXXRW: Acquiring a sleep lock while holding UNP 1401 * mutex cannot be a good thing. 1402 */ 1403 sx_slock(&filelist_lock); 1404 LIST_FOREACH(fp, &filehead, f_list) 1405 fp->f_gcflag &= ~(FMARK|FDEFER); 1406 do { 1407 LIST_FOREACH(fp, &filehead, f_list) { 1408 FILE_LOCK(fp); 1409 /* 1410 * If the file is not open, skip it 1411 */ 1412 if (fp->f_count == 0) { 1413 FILE_UNLOCK(fp); 1414 continue; 1415 } 1416 /* 1417 * If we already marked it as 'defer' in a 1418 * previous pass, then try process it this time 1419 * and un-mark it 1420 */ 1421 if (fp->f_gcflag & FDEFER) { 1422 fp->f_gcflag &= ~FDEFER; 1423 unp_defer--; 1424 } else { 1425 /* 1426 * if it's not defered, then check if it's 1427 * already marked.. if so skip it 1428 */ 1429 if (fp->f_gcflag & FMARK) { 1430 FILE_UNLOCK(fp); 1431 continue; 1432 } 1433 /* 1434 * If all references are from messages 1435 * in transit, then skip it. it's not 1436 * externally accessible. 1437 */ 1438 if (fp->f_count == fp->f_msgcount) { 1439 FILE_UNLOCK(fp); 1440 continue; 1441 } 1442 /* 1443 * If it got this far then it must be 1444 * externally accessible. 1445 */ 1446 fp->f_gcflag |= FMARK; 1447 } 1448 /* 1449 * either it was defered, or it is externally 1450 * accessible and not already marked so. 1451 * Now check if it is possibly one of OUR sockets. 1452 */ 1453 if (fp->f_type != DTYPE_SOCKET || 1454 (so = fp->f_data) == NULL) { 1455 FILE_UNLOCK(fp); 1456 continue; 1457 } 1458 FILE_UNLOCK(fp); 1459 if (so->so_proto->pr_domain != &localdomain || 1460 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1461 continue; 1462#ifdef notdef 1463 if (so->so_rcv.sb_flags & SB_LOCK) { 1464 /* 1465 * This is problematical; it's not clear 1466 * we need to wait for the sockbuf to be 1467 * unlocked (on a uniprocessor, at least), 1468 * and it's also not clear what to do 1469 * if sbwait returns an error due to receipt 1470 * of a signal. If sbwait does return 1471 * an error, we'll go into an infinite 1472 * loop. Delete all of this for now. 1473 */ 1474 (void) sbwait(&so->so_rcv); 1475 goto restart; 1476 } 1477#endif 1478 /* 1479 * So, Ok, it's one of our sockets and it IS externally 1480 * accessible (or was defered). Now we look 1481 * to see if we hold any file descriptors in its 1482 * message buffers. Follow those links and mark them 1483 * as accessible too. 1484 */ 1485 unp_scan(so->so_rcv.sb_mb, unp_mark); 1486 } 1487 } while (unp_defer); 1488 sx_sunlock(&filelist_lock); 1489 /* 1490 * We grab an extra reference to each of the file table entries 1491 * that are not otherwise accessible and then free the rights 1492 * that are stored in messages on them. 1493 * 1494 * The bug in the orginal code is a little tricky, so I'll describe 1495 * what's wrong with it here. 1496 * 1497 * It is incorrect to simply unp_discard each entry for f_msgcount 1498 * times -- consider the case of sockets A and B that contain 1499 * references to each other. On a last close of some other socket, 1500 * we trigger a gc since the number of outstanding rights (unp_rights) 1501 * is non-zero. If during the sweep phase the gc code un_discards, 1502 * we end up doing a (full) closef on the descriptor. A closef on A 1503 * results in the following chain. Closef calls soo_close, which 1504 * calls soclose. Soclose calls first (through the switch 1505 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1506 * returns because the previous instance had set unp_gcing, and 1507 * we return all the way back to soclose, which marks the socket 1508 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1509 * to free up the rights that are queued in messages on the socket A, 1510 * i.e., the reference on B. The sorflush calls via the dom_dispose 1511 * switch unp_dispose, which unp_scans with unp_discard. This second 1512 * instance of unp_discard just calls closef on B. 1513 * 1514 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1515 * which results in another closef on A. Unfortunately, A is already 1516 * being closed, and the descriptor has already been marked with 1517 * SS_NOFDREF, and soclose panics at this point. 1518 * 1519 * Here, we first take an extra reference to each inaccessible 1520 * descriptor. Then, we call sorflush ourself, since we know 1521 * it is a Unix domain socket anyhow. After we destroy all the 1522 * rights carried in messages, we do a last closef to get rid 1523 * of our extra reference. This is the last close, and the 1524 * unp_detach etc will shut down the socket. 1525 * 1526 * 91/09/19, bsy@cs.cmu.edu 1527 */ 1528 extra_ref = malloc(nfiles * sizeof(struct file *), M_TEMP, M_WAITOK); 1529 sx_slock(&filelist_lock); 1530 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; 1531 fp != NULL; fp = nextfp) { 1532 nextfp = LIST_NEXT(fp, f_list); 1533 FILE_LOCK(fp); 1534 /* 1535 * If it's not open, skip it 1536 */ 1537 if (fp->f_count == 0) { 1538 FILE_UNLOCK(fp); 1539 continue; 1540 } 1541 /* 1542 * If all refs are from msgs, and it's not marked accessible 1543 * then it must be referenced from some unreachable cycle 1544 * of (shut-down) FDs, so include it in our 1545 * list of FDs to remove 1546 */ 1547 if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) { 1548 *fpp++ = fp; 1549 nunref++; 1550 fp->f_count++; 1551 } 1552 FILE_UNLOCK(fp); 1553 } 1554 sx_sunlock(&filelist_lock); 1555 /* 1556 * for each FD on our hit list, do the following two things 1557 */ 1558 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1559 struct file *tfp = *fpp; 1560 FILE_LOCK(tfp); 1561 if (tfp->f_type == DTYPE_SOCKET && 1562 tfp->f_data != NULL) { 1563 FILE_UNLOCK(tfp); 1564 sorflush(tfp->f_data); 1565 } else { 1566 FILE_UNLOCK(tfp); 1567 } 1568 } 1569 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1570 closef(*fpp, (struct thread *) NULL); 1571 free(extra_ref, M_TEMP); 1572 unp_gcing = 0; 1573} 1574 1575void 1576unp_dispose(m) 1577 struct mbuf *m; 1578{ 1579 1580 if (m) 1581 unp_scan(m, unp_discard); 1582} 1583 1584static int 1585unp_listen(unp, td) 1586 struct unpcb *unp; 1587 struct thread *td; 1588{ 1589 UNP_LOCK_ASSERT(); 1590 1591 /* 1592 * XXXRW: Why populate the local peer cred with our own credential? 1593 */ 1594 cru2x(td->td_ucred, &unp->unp_peercred); 1595 unp->unp_flags |= UNP_HAVEPCCACHED; 1596 return (0); 1597} 1598 1599static void 1600unp_scan(m0, op) 1601 register struct mbuf *m0; 1602 void (*op)(struct file *); 1603{ 1604 struct mbuf *m; 1605 struct file **rp; 1606 struct cmsghdr *cm; 1607 void *data; 1608 int i; 1609 socklen_t clen, datalen; 1610 int qfds; 1611 1612 while (m0 != NULL) { 1613 for (m = m0; m; m = m->m_next) { 1614 if (m->m_type != MT_CONTROL) 1615 continue; 1616 1617 cm = mtod(m, struct cmsghdr *); 1618 clen = m->m_len; 1619 1620 while (cm != NULL) { 1621 if (sizeof(*cm) > clen || cm->cmsg_len > clen) 1622 break; 1623 1624 data = CMSG_DATA(cm); 1625 datalen = (caddr_t)cm + cm->cmsg_len 1626 - (caddr_t)data; 1627 1628 if (cm->cmsg_level == SOL_SOCKET && 1629 cm->cmsg_type == SCM_RIGHTS) { 1630 qfds = datalen / sizeof (struct file *); 1631 rp = data; 1632 for (i = 0; i < qfds; i++) 1633 (*op)(*rp++); 1634 } 1635 1636 if (CMSG_SPACE(datalen) < clen) { 1637 clen -= CMSG_SPACE(datalen); 1638 cm = (struct cmsghdr *) 1639 ((caddr_t)cm + CMSG_SPACE(datalen)); 1640 } else { 1641 clen = 0; 1642 cm = NULL; 1643 } 1644 } 1645 } 1646 m0 = m0->m_act; 1647 } 1648} 1649 1650static void 1651unp_mark(fp) 1652 struct file *fp; 1653{ 1654 if (fp->f_gcflag & FMARK) 1655 return; 1656 unp_defer++; 1657 fp->f_gcflag |= (FMARK|FDEFER); 1658} 1659 1660static void 1661unp_discard(fp) 1662 struct file *fp; 1663{ 1664 FILE_LOCK(fp); 1665 fp->f_msgcount--; 1666 unp_rights--; 1667 FILE_UNLOCK(fp); 1668 (void) closef(fp, (struct thread *)NULL); 1669}
|