uipc_usrreq.c revision 61976
150276Speter/* 2178866Srafan * Copyright (c) 1982, 1986, 1989, 1991, 1993 350276Speter * The Regents of the University of California. All rights reserved. 450276Speter * 550276Speter * Redistribution and use in source and binary forms, with or without 650276Speter * modification, are permitted provided that the following conditions 750276Speter * are met: 850276Speter * 1. Redistributions of source code must retain the above copyright 950276Speter * notice, this list of conditions and the following disclaimer. 1050276Speter * 2. Redistributions in binary form must reproduce the above copyright 1150276Speter * notice, this list of conditions and the following disclaimer in the 1250276Speter * documentation and/or other materials provided with the distribution. 1350276Speter * 3. All advertising materials mentioning features or use of this software 1450276Speter * must display the following acknowledgement: 1550276Speter * This product includes software developed by the University of 1650276Speter * California, Berkeley and its contributors. 1750276Speter * 4. Neither the name of the University nor the names of its contributors 1850276Speter * may be used to endorse or promote products derived from this software 1950276Speter * without specific prior written permission. 2050276Speter * 2150276Speter * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2250276Speter * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2350276Speter * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2450276Speter * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2550276Speter * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2650276Speter * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2750276Speter * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2850276Speter * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2950276Speter * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3050276Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3150276Speter * SUCH DAMAGE. 32166124Srafan * 3350276Speter * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94 3450276Speter * $FreeBSD: head/sys/kern/uipc_usrreq.c 61976 2000-06-22 22:27:16Z alfred $ 3550276Speter */ 3650276Speter 3750276Speter#include <sys/param.h> 3850276Speter#include <sys/systm.h> 3950276Speter#include <sys/kernel.h> 4050276Speter#include <sys/domain.h> 4150276Speter#include <sys/fcntl.h> 4250276Speter#include <sys/malloc.h> /* XXX must be before <sys/file.h> */ 4350276Speter#include <sys/file.h> 4462449Speter#include <sys/filedesc.h> 4550276Speter#include <sys/mbuf.h> 46184989Srafan#include <sys/namei.h> 4750276Speter#include <sys/proc.h> 48184989Srafan#include <sys/protosw.h> 49184989Srafan#include <sys/socket.h> 50184989Srafan#include <sys/socketvar.h> 5176726Speter#include <sys/resourcevar.h> 5262449Speter#include <sys/stat.h> 5350276Speter#include <sys/sysctl.h> 5462449Speter#include <sys/un.h> 5550276Speter#include <sys/unpcb.h> 5662449Speter#include <sys/vnode.h> 5762449Speter 5862449Speter#include <vm/vm_zone.h> 5962449Speter 6062449Speterstatic struct vm_zone *unp_zone; 6150276Speterstatic unp_gen_t unp_gencnt; 6250276Speterstatic u_int unp_count; 6376726Speter 6462449Speterstatic struct unp_head unp_shead, unp_dhead; 6550276Speter 6662449Speter/* 6750276Speter * Unix communications domain. 6862449Speter * 6962449Speter * TODO: 7050276Speter * SEQPACKET, RDM 7162449Speter * rethink name space problems 7250276Speter * need a proper out-of-band 7350276Speter * lock pushdown 7476726Speter */ 7562449Speterstatic struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; 7650276Speterstatic ino_t unp_ino; /* prototype for fake inode numbers */ 7762449Speter 7850276Speterstatic int unp_attach __P((struct socket *)); 79166124Srafanstatic void unp_detach __P((struct unpcb *)); 8062449Speterstatic int unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *)); 8150276Speterstatic int unp_connect __P((struct socket *,struct sockaddr *, 8262449Speter struct proc *)); 8362449Speterstatic void unp_disconnect __P((struct unpcb *)); 8462449Speterstatic void unp_shutdown __P((struct unpcb *)); 8550276Speterstatic void unp_drop __P((struct unpcb *, int)); 8650276Speterstatic void unp_gc __P((void)); 8776726Speterstatic void unp_scan __P((struct mbuf *, void (*)(struct file *))); 8862449Speterstatic void unp_mark __P((struct file *)); 8950276Speterstatic void unp_discard __P((struct file *)); 9062449Speterstatic int unp_internalize __P((struct mbuf *, struct proc *)); 9150276Speter 9262449Speterstatic int 9362449Speteruipc_abort(struct socket *so) 9450276Speter{ 9550276Speter struct unpcb *unp = sotounpcb(so); 9662449Speter 9762449Speter if (unp == 0) 9862449Speter return EINVAL; 9962449Speter unp_drop(unp, ECONNABORTED); 10050276Speter return 0; 10150276Speter} 10276726Speter 10362449Speterstatic int 10450276Speteruipc_accept(struct socket *so, struct sockaddr **nam) 105166124Srafan{ 10650276Speter struct unpcb *unp = sotounpcb(so); 10762449Speter 10862449Speter if (unp == 0) 10962449Speter return EINVAL; 11062449Speter 11162449Speter /* 11250276Speter * Pass back name of connected socket, 11350276Speter * if it was bound and we are still connected 11476726Speter * (our peer may have closed already!). 11562449Speter */ 11650276Speter if (unp->unp_conn && unp->unp_conn->unp_addr) { 11762449Speter *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 11850276Speter 1); 11962449Speter } else { 12062449Speter *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1); 12162449Speter } 12297049Speter return 0; 12350276Speter} 12450276Speter 12576726Speterstatic int 12662449Speteruipc_attach(struct socket *so, int proto, struct proc *p) 12750276Speter{ 12862449Speter struct unpcb *unp = sotounpcb(so); 12950276Speter 13062449Speter if (unp != 0) 13162449Speter return EISCONN; 132178866Srafan return unp_attach(so); 13362449Speter} 13462449Speter 13550276Speterstatic int 13650276Speteruipc_bind(struct socket *so, struct sockaddr *nam, struct proc *p) 13776726Speter{ 13862449Speter struct unpcb *unp = sotounpcb(so); 13950276Speter 140184989Srafan if (unp == 0) 141166124Srafan return EINVAL; 14262449Speter 14362449Speter return unp_bind(unp, nam, p); 144184989Srafan} 145166124Srafan 14650276Speterstatic int 14750276Speteruipc_connect(struct socket *so, struct sockaddr *nam, struct proc *p) 14850276Speter{ 14950276Speter struct unpcb *unp = sotounpcb(so); 15076726Speter 15162449Speter if (unp == 0) 15250276Speter return EINVAL; 153184989Srafan return unp_connect(so, nam, curproc); 15450276Speter} 15562449Speter 156184989Srafanstatic int 157166124Srafanuipc_connect2(struct socket *so1, struct socket *so2) 15850276Speter{ 15950276Speter struct unpcb *unp = sotounpcb(so1); 16076726Speter 16162449Speter if (unp == 0) 16250276Speter return EINVAL; 16362449Speter 164166124Srafan return unp_connect2(so1, so2); 165166124Srafan} 166166124Srafan 167166124Srafan/* control is EOPNOTSUPP */ 168166124Srafan 169166124Srafanstatic int 17050276Speteruipc_detach(struct socket *so) 17150276Speter{ 17250276Speter struct unpcb *unp = sotounpcb(so); 17350276Speter 17450276Speter if (unp == 0) 17550276Speter return EINVAL; 17650276Speter 17750276Speter unp_detach(unp); 17850276Speter return 0; 17966963Speter} 18062449Speter 181178866Srafanstatic int 18250276Speteruipc_disconnect(struct socket *so) 18350276Speter{ 18462449Speter struct unpcb *unp = sotounpcb(so); 18550276Speter 18662449Speter if (unp == 0) 18750276Speter return EINVAL; 18862449Speter unp_disconnect(unp); 18976726Speter return 0; 19050276Speter} 19150276Speter 19276726Speterstatic int 19362449Speteruipc_listen(struct socket *so, struct proc *p) 19450276Speter{ 19550276Speter struct unpcb *unp = sotounpcb(so); 196166124Srafan 19750276Speter if (unp == 0 || unp->unp_vnode == 0) 19850276Speter return EINVAL; 19950276Speter return 0; 200184989Srafan} 201184989Srafan 202184989Srafanstatic int 203184989Srafanuipc_peeraddr(struct socket *so, struct sockaddr **nam) 204184989Srafan{ 205184989Srafan struct unpcb *unp = sotounpcb(so); 206184989Srafan 207184989Srafan if (unp == 0) 208184989Srafan return EINVAL; 209184989Srafan if (unp->unp_conn && unp->unp_conn->unp_addr) 210184989Srafan *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr, 211184989Srafan 1); 212184989Srafan return 0; 213184989Srafan} 214184989Srafan 215184989Srafanstatic int 216184989Srafanuipc_rcvd(struct socket *so, int flags) 217184989Srafan{ 218184989Srafan struct unpcb *unp = sotounpcb(so); 219184989Srafan struct socket *so2; 220184989Srafan 221184989Srafan if (unp == 0) 222184989Srafan return EINVAL; 223184989Srafan switch (so->so_type) { 224184989Srafan case SOCK_DGRAM: 225184989Srafan panic("uipc_rcvd DGRAM?"); 226184989Srafan /*NOTREACHED*/ 227184989Srafan 228184989Srafan case SOCK_STREAM: 22950276Speter if (unp->unp_conn == 0) 23050276Speter break; 23150276Speter so2 = unp->unp_conn->unp_socket; 23250276Speter /* 23350276Speter * Adjust backpressure on sender 23450276Speter * and wakeup any waiting to write. 23550276Speter */ 23676726Speter so2->so_snd.sb_mbmax += unp->unp_mbcnt - so->so_rcv.sb_mbcnt; 237178866Srafan unp->unp_mbcnt = so->so_rcv.sb_mbcnt; 23850276Speter so2->so_snd.sb_hiwat += unp->unp_cc - so->so_rcv.sb_cc; 239184989Srafan (void)chgsbsize(so2->so_cred->cr_uid, 24050276Speter (rlim_t)unp->unp_cc - so->so_rcv.sb_cc, RLIM_INFINITY); 241178866Srafan unp->unp_cc = so->so_rcv.sb_cc; 242184989Srafan sowwakeup(so2); 243184989Srafan break; 244184989Srafan 245184989Srafan default: 246184989Srafan panic("uipc_rcvd unknown socktype"); 247184989Srafan } 248184989Srafan return 0; 249184989Srafan} 250184989Srafan 251184989Srafan/* pru_rcvoob is EOPNOTSUPP */ 252184989Srafan 253184989Srafanstatic int 254184989Srafanuipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, 255184989Srafan struct mbuf *control, struct proc *p) 256184989Srafan{ 257184989Srafan int error = 0; 258184989Srafan struct unpcb *unp = sotounpcb(so); 259184989Srafan struct socket *so2; 260184989Srafan 261184989Srafan if (unp == 0) { 262184989Srafan error = EINVAL; 263184989Srafan goto release; 264184989Srafan } 265184989Srafan if (flags & PRUS_OOB) { 266184989Srafan error = EOPNOTSUPP; 267184989Srafan goto release; 268184989Srafan } 269184989Srafan 270184989Srafan if (control && (error = unp_internalize(control, p))) 271184989Srafan goto release; 272184989Srafan 273166124Srafan switch (so->so_type) { 27462449Speter case SOCK_DGRAM: 275184989Srafan { 27650276Speter struct sockaddr *from; 277184989Srafan 278184989Srafan if (nam) { 279184989Srafan if (unp->unp_conn) { 280184989Srafan error = EISCONN; 281184989Srafan break; 282184989Srafan } 283184989Srafan error = unp_connect(so, nam, p); 284184989Srafan if (error) 285184989Srafan break; 286184989Srafan } else { 287184989Srafan if (unp->unp_conn == 0) { 288184989Srafan error = ENOTCONN; 289184989Srafan break; 290184989Srafan } 291184989Srafan } 292184989Srafan so2 = unp->unp_conn->unp_socket; 293184989Srafan if (unp->unp_addr) 294184989Srafan from = (struct sockaddr *)unp->unp_addr; 295184989Srafan else 296184989Srafan from = &sun_noname; 297184989Srafan if (sbappendaddr(&so2->so_rcv, from, m, control)) { 298184989Srafan sorwakeup(so2); 299184989Srafan m = 0; 300184989Srafan control = 0; 301184989Srafan } else 302184989Srafan error = ENOBUFS; 303184989Srafan if (nam) 304184989Srafan unp_disconnect(unp); 305184989Srafan break; 306184989Srafan } 307184989Srafan 308184989Srafan case SOCK_STREAM: 309184989Srafan /* Connect if not connected yet. */ 310184989Srafan /* 311184989Srafan * Note: A better implementation would complain 312184989Srafan * if not equal to the peer's address. 313184989Srafan */ 314184989Srafan if ((so->so_state & SS_ISCONNECTED) == 0) { 315184989Srafan if (nam) { 316184989Srafan error = unp_connect(so, nam, p); 317184989Srafan if (error) 318184989Srafan break; /* XXX */ 319184989Srafan } else { 320184989Srafan error = ENOTCONN; 321184989Srafan break; 322184989Srafan } 323184989Srafan } 324184989Srafan 325184989Srafan if (so->so_state & SS_CANTSENDMORE) { 326184989Srafan error = EPIPE; 327184989Srafan break; 328 } 329 if (unp->unp_conn == 0) 330 panic("uipc_send connected but no connection?"); 331 so2 = unp->unp_conn->unp_socket; 332 /* 333 * Send to paired receive port, and then reduce 334 * send buffer hiwater marks to maintain backpressure. 335 * Wake up readers. 336 */ 337 if (control) { 338 if (sbappendcontrol(&so2->so_rcv, m, control)) 339 control = 0; 340 } else 341 sbappend(&so2->so_rcv, m); 342 so->so_snd.sb_mbmax -= 343 so2->so_rcv.sb_mbcnt - unp->unp_conn->unp_mbcnt; 344 unp->unp_conn->unp_mbcnt = so2->so_rcv.sb_mbcnt; 345 so->so_snd.sb_hiwat -= 346 so2->so_rcv.sb_cc - unp->unp_conn->unp_cc; 347 (void)chgsbsize(so->so_cred->cr_uid, 348 (rlim_t)unp->unp_conn->unp_cc - so2->so_rcv.sb_cc, RLIM_INFINITY); 349 unp->unp_conn->unp_cc = so2->so_rcv.sb_cc; 350 sorwakeup(so2); 351 m = 0; 352 break; 353 354 default: 355 panic("uipc_send unknown socktype"); 356 } 357 358 /* 359 * SEND_EOF is equivalent to a SEND followed by 360 * a SHUTDOWN. 361 */ 362 if (flags & PRUS_EOF) { 363 socantsendmore(so); 364 unp_shutdown(unp); 365 } 366 367 if (control && error != 0) 368 unp_dispose(control); 369 370release: 371 if (control) 372 m_freem(control); 373 if (m) 374 m_freem(m); 375 return error; 376} 377 378static int 379uipc_sense(struct socket *so, struct stat *sb) 380{ 381 struct unpcb *unp = sotounpcb(so); 382 struct socket *so2; 383 384 if (unp == 0) 385 return EINVAL; 386 sb->st_blksize = so->so_snd.sb_hiwat; 387 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) { 388 so2 = unp->unp_conn->unp_socket; 389 sb->st_blksize += so2->so_rcv.sb_cc; 390 } 391 sb->st_dev = NOUDEV; 392 if (unp->unp_ino == 0) 393 unp->unp_ino = unp_ino++; 394 sb->st_ino = unp->unp_ino; 395 return (0); 396} 397 398static int 399uipc_shutdown(struct socket *so) 400{ 401 struct unpcb *unp = sotounpcb(so); 402 403 if (unp == 0) 404 return EINVAL; 405 socantsendmore(so); 406 unp_shutdown(unp); 407 return 0; 408} 409 410static int 411uipc_sockaddr(struct socket *so, struct sockaddr **nam) 412{ 413 struct unpcb *unp = sotounpcb(so); 414 415 if (unp == 0) 416 return EINVAL; 417 if (unp->unp_addr) 418 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1); 419 return 0; 420} 421 422struct pr_usrreqs uipc_usrreqs = { 423 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect, 424 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect, 425 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp, 426 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr, 427 sosend, soreceive, sopoll 428}; 429 430/* 431 * Both send and receive buffers are allocated PIPSIZ bytes of buffering 432 * for stream sockets, although the total for sender and receiver is 433 * actually only PIPSIZ. 434 * Datagram sockets really use the sendspace as the maximum datagram size, 435 * and don't really want to reserve the sendspace. Their recvspace should 436 * be large enough for at least one max-size datagram plus address. 437 */ 438#ifndef PIPSIZ 439#define PIPSIZ 8192 440#endif 441static u_long unpst_sendspace = PIPSIZ; 442static u_long unpst_recvspace = PIPSIZ; 443static u_long unpdg_sendspace = 2*1024; /* really max datagram size */ 444static u_long unpdg_recvspace = 4*1024; 445 446static int unp_rights; /* file descriptors in flight */ 447 448SYSCTL_DECL(_net_local_stream); 449SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW, 450 &unpst_sendspace, 0, ""); 451SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW, 452 &unpst_recvspace, 0, ""); 453SYSCTL_DECL(_net_local_dgram); 454SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW, 455 &unpdg_sendspace, 0, ""); 456SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW, 457 &unpdg_recvspace, 0, ""); 458SYSCTL_DECL(_net_local); 459SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, ""); 460 461static int 462unp_attach(so) 463 struct socket *so; 464{ 465 register struct unpcb *unp; 466 int error; 467 468 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { 469 switch (so->so_type) { 470 471 case SOCK_STREAM: 472 error = soreserve(so, unpst_sendspace, unpst_recvspace); 473 break; 474 475 case SOCK_DGRAM: 476 error = soreserve(so, unpdg_sendspace, unpdg_recvspace); 477 break; 478 479 default: 480 panic("unp_attach"); 481 } 482 if (error) 483 return (error); 484 } 485 unp = zalloc(unp_zone); 486 if (unp == NULL) 487 return (ENOBUFS); 488 bzero(unp, sizeof *unp); 489 unp->unp_gencnt = ++unp_gencnt; 490 unp_count++; 491 LIST_INIT(&unp->unp_refs); 492 unp->unp_socket = so; 493 unp->unp_rvnode = curproc->p_fd->fd_rdir; 494 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead 495 : &unp_shead, unp, unp_link); 496 so->so_pcb = (caddr_t)unp; 497 return (0); 498} 499 500static void 501unp_detach(unp) 502 register struct unpcb *unp; 503{ 504 LIST_REMOVE(unp, unp_link); 505 unp->unp_gencnt = ++unp_gencnt; 506 --unp_count; 507 if (unp->unp_vnode) { 508 unp->unp_vnode->v_socket = 0; 509 vrele(unp->unp_vnode); 510 unp->unp_vnode = 0; 511 } 512 if (unp->unp_conn) 513 unp_disconnect(unp); 514 while (!LIST_EMPTY(&unp->unp_refs)) 515 unp_drop(LIST_FIRST(&unp->unp_refs), ECONNRESET); 516 soisdisconnected(unp->unp_socket); 517 unp->unp_socket->so_pcb = 0; 518 if (unp_rights) { 519 /* 520 * Normally the receive buffer is flushed later, 521 * in sofree, but if our receive buffer holds references 522 * to descriptors that are now garbage, we will dispose 523 * of those descriptor references after the garbage collector 524 * gets them (resulting in a "panic: closef: count < 0"). 525 */ 526 sorflush(unp->unp_socket); 527 unp_gc(); 528 } 529 if (unp->unp_addr) 530 FREE(unp->unp_addr, M_SONAME); 531 zfree(unp_zone, unp); 532} 533 534static int 535unp_bind(unp, nam, p) 536 struct unpcb *unp; 537 struct sockaddr *nam; 538 struct proc *p; 539{ 540 struct sockaddr_un *soun = (struct sockaddr_un *)nam; 541 register struct vnode *vp; 542 struct vattr vattr; 543 int error, namelen; 544 struct nameidata nd; 545 char buf[SOCK_MAXADDRLEN]; 546 547 if (unp->unp_vnode != NULL) 548 return (EINVAL); 549#define offsetof(s, e) ((char *)&((s *)0)->e - (char *)((s *)0)) 550 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path); 551 if (namelen <= 0) 552 return EINVAL; 553 strncpy(buf, soun->sun_path, namelen); 554 buf[namelen] = 0; /* null-terminate the string */ 555 NDINIT(&nd, CREATE, NOFOLLOW | LOCKPARENT, UIO_SYSSPACE, 556 buf, p); 557/* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */ 558 error = namei(&nd); 559 if (error) 560 return (error); 561 vp = nd.ni_vp; 562 if (vp != NULL) { 563 NDFREE(&nd, NDF_ONLY_PNBUF); 564 if (nd.ni_dvp == vp) 565 vrele(nd.ni_dvp); 566 else 567 vput(nd.ni_dvp); 568 vrele(vp); 569 return (EADDRINUSE); 570 } 571 VATTR_NULL(&vattr); 572 vattr.va_type = VSOCK; 573 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask); 574 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); 575 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 576 NDFREE(&nd, NDF_ONLY_PNBUF); 577 vput(nd.ni_dvp); 578 if (error) 579 return (error); 580 vp = nd.ni_vp; 581 vp->v_socket = unp->unp_socket; 582 unp->unp_vnode = vp; 583 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1); 584 VOP_UNLOCK(vp, 0, p); 585 return (0); 586} 587 588static int 589unp_connect(so, nam, p) 590 struct socket *so; 591 struct sockaddr *nam; 592 struct proc *p; 593{ 594 register struct sockaddr_un *soun = (struct sockaddr_un *)nam; 595 register struct vnode *vp; 596 register struct socket *so2, *so3; 597 struct unpcb *unp2, *unp3; 598 int error, len; 599 struct nameidata nd; 600 char buf[SOCK_MAXADDRLEN]; 601 602 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path); 603 if (len <= 0) 604 return EINVAL; 605 strncpy(buf, soun->sun_path, len); 606 buf[len] = 0; 607 608 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p); 609 error = namei(&nd); 610 if (error) 611 return (error); 612 vp = nd.ni_vp; 613 NDFREE(&nd, NDF_ONLY_PNBUF); 614 if (vp->v_type != VSOCK) { 615 error = ENOTSOCK; 616 goto bad; 617 } 618 error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p); 619 if (error) 620 goto bad; 621 so2 = vp->v_socket; 622 if (so2 == 0) { 623 error = ECONNREFUSED; 624 goto bad; 625 } 626 if (so->so_type != so2->so_type) { 627 error = EPROTOTYPE; 628 goto bad; 629 } 630 if (so->so_proto->pr_flags & PR_CONNREQUIRED) { 631 if ((so2->so_options & SO_ACCEPTCONN) == 0 || 632 (so3 = sonewconn3(so2, 0, p)) == 0) { 633 error = ECONNREFUSED; 634 goto bad; 635 } 636 unp2 = sotounpcb(so2); 637 unp3 = sotounpcb(so3); 638 if (unp2->unp_addr) 639 unp3->unp_addr = (struct sockaddr_un *) 640 dup_sockaddr((struct sockaddr *) 641 unp2->unp_addr, 1); 642 so2 = so3; 643 } 644 error = unp_connect2(so, so2); 645bad: 646 vput(vp); 647 return (error); 648} 649 650int 651unp_connect2(so, so2) 652 register struct socket *so; 653 register struct socket *so2; 654{ 655 register struct unpcb *unp = sotounpcb(so); 656 register struct unpcb *unp2; 657 658 if (so2->so_type != so->so_type) 659 return (EPROTOTYPE); 660 unp2 = sotounpcb(so2); 661 unp->unp_conn = unp2; 662 switch (so->so_type) { 663 664 case SOCK_DGRAM: 665 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink); 666 soisconnected(so); 667 break; 668 669 case SOCK_STREAM: 670 unp2->unp_conn = unp; 671 soisconnected(so); 672 soisconnected(so2); 673 break; 674 675 default: 676 panic("unp_connect2"); 677 } 678 return (0); 679} 680 681static void 682unp_disconnect(unp) 683 struct unpcb *unp; 684{ 685 register struct unpcb *unp2 = unp->unp_conn; 686 687 if (unp2 == 0) 688 return; 689 unp->unp_conn = 0; 690 switch (unp->unp_socket->so_type) { 691 692 case SOCK_DGRAM: 693 LIST_REMOVE(unp, unp_reflink); 694 unp->unp_socket->so_state &= ~SS_ISCONNECTED; 695 break; 696 697 case SOCK_STREAM: 698 soisdisconnected(unp->unp_socket); 699 unp2->unp_conn = 0; 700 soisdisconnected(unp2->unp_socket); 701 break; 702 } 703} 704 705#ifdef notdef 706void 707unp_abort(unp) 708 struct unpcb *unp; 709{ 710 711 unp_detach(unp); 712} 713#endif 714 715static int 716prison_unpcb(struct proc *p, struct unpcb *unp) 717{ 718 if (!p->p_prison) 719 return (0); 720 if (p->p_fd->fd_rdir == unp->unp_rvnode) 721 return (0); 722 return (1); 723} 724 725static int 726unp_pcblist SYSCTL_HANDLER_ARGS 727{ 728 int error, i, n; 729 struct unpcb *unp, **unp_list; 730 unp_gen_t gencnt; 731 struct xunpgen xug; 732 struct unp_head *head; 733 734 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead); 735 736 /* 737 * The process of preparing the PCB list is too time-consuming and 738 * resource-intensive to repeat twice on every request. 739 */ 740 if (req->oldptr == 0) { 741 n = unp_count; 742 req->oldidx = 2 * (sizeof xug) 743 + (n + n/8) * sizeof(struct xunpcb); 744 return 0; 745 } 746 747 if (req->newptr != 0) 748 return EPERM; 749 750 /* 751 * OK, now we're committed to doing something. 752 */ 753 gencnt = unp_gencnt; 754 n = unp_count; 755 756 xug.xug_len = sizeof xug; 757 xug.xug_count = n; 758 xug.xug_gen = gencnt; 759 xug.xug_sogen = so_gencnt; 760 error = SYSCTL_OUT(req, &xug, sizeof xug); 761 if (error) 762 return error; 763 764 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK); 765 if (unp_list == 0) 766 return ENOMEM; 767 768 for (unp = LIST_FIRST(head), i = 0; unp && i < n; 769 unp = LIST_NEXT(unp, unp_link)) { 770 if (unp->unp_gencnt <= gencnt && !prison_unpcb(req->p, unp)) 771 unp_list[i++] = unp; 772 } 773 n = i; /* in case we lost some during malloc */ 774 775 error = 0; 776 for (i = 0; i < n; i++) { 777 unp = unp_list[i]; 778 if (unp->unp_gencnt <= gencnt) { 779 struct xunpcb xu; 780 xu.xu_len = sizeof xu; 781 xu.xu_unpp = unp; 782 /* 783 * XXX - need more locking here to protect against 784 * connect/disconnect races for SMP. 785 */ 786 if (unp->unp_addr) 787 bcopy(unp->unp_addr, &xu.xu_addr, 788 unp->unp_addr->sun_len); 789 if (unp->unp_conn && unp->unp_conn->unp_addr) 790 bcopy(unp->unp_conn->unp_addr, 791 &xu.xu_caddr, 792 unp->unp_conn->unp_addr->sun_len); 793 bcopy(unp, &xu.xu_unp, sizeof *unp); 794 sotoxsocket(unp->unp_socket, &xu.xu_socket); 795 error = SYSCTL_OUT(req, &xu, sizeof xu); 796 } 797 } 798 if (!error) { 799 /* 800 * Give the user an updated idea of our state. 801 * If the generation differs from what we told 802 * her before, she knows that something happened 803 * while we were processing this request, and it 804 * might be necessary to retry. 805 */ 806 xug.xug_gen = unp_gencnt; 807 xug.xug_sogen = so_gencnt; 808 xug.xug_count = unp_count; 809 error = SYSCTL_OUT(req, &xug, sizeof xug); 810 } 811 free(unp_list, M_TEMP); 812 return error; 813} 814 815SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD, 816 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb", 817 "List of active local datagram sockets"); 818SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD, 819 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb", 820 "List of active local stream sockets"); 821 822static void 823unp_shutdown(unp) 824 struct unpcb *unp; 825{ 826 struct socket *so; 827 828 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn && 829 (so = unp->unp_conn->unp_socket)) 830 socantrcvmore(so); 831} 832 833static void 834unp_drop(unp, errno) 835 struct unpcb *unp; 836 int errno; 837{ 838 struct socket *so = unp->unp_socket; 839 840 so->so_error = errno; 841 unp_disconnect(unp); 842 if (so->so_head) { 843 LIST_REMOVE(unp, unp_link); 844 unp->unp_gencnt = ++unp_gencnt; 845 unp_count--; 846 so->so_pcb = (caddr_t) 0; 847 if (unp->unp_addr) 848 FREE(unp->unp_addr, M_SONAME); 849 zfree(unp_zone, unp); 850 sofree(so); 851 } 852} 853 854#ifdef notdef 855void 856unp_drain() 857{ 858 859} 860#endif 861 862int 863unp_externalize(rights) 864 struct mbuf *rights; 865{ 866 struct proc *p = curproc; /* XXX */ 867 register int i; 868 register struct cmsghdr *cm = mtod(rights, struct cmsghdr *); 869 register int *fdp; 870 register struct file **rp; 871 register struct file *fp; 872 int newfds = (cm->cmsg_len - (CMSG_DATA(cm) - (u_char *)cm)) 873 / sizeof (struct file *); 874 int f; 875 876 /* 877 * if the new FD's will not fit, then we free them all 878 */ 879 if (!fdavail(p, newfds)) { 880 rp = (struct file **)CMSG_DATA(cm); 881 for (i = 0; i < newfds; i++) { 882 fp = *rp; 883 /* 884 * zero the pointer before calling unp_discard, 885 * since it may end up in unp_gc().. 886 */ 887 *rp++ = 0; 888 unp_discard(fp); 889 } 890 return (EMSGSIZE); 891 } 892 /* 893 * now change each pointer to an fd in the global table to 894 * an integer that is the index to the local fd table entry 895 * that we set up to point to the global one we are transferring. 896 * If sizeof (struct file *) is bigger than or equal to sizeof int, 897 * then do it in forward order. In that case, an integer will 898 * always come in the same place or before its corresponding 899 * struct file pointer. 900 * If sizeof (struct file *) is smaller than sizeof int, then 901 * do it in reverse order. 902 */ 903 if (sizeof (struct file *) >= sizeof (int)) { 904 fdp = (int *)(cm + 1); 905 rp = (struct file **)CMSG_DATA(cm); 906 for (i = 0; i < newfds; i++) { 907 if (fdalloc(p, 0, &f)) 908 panic("unp_externalize"); 909 fp = *rp++; 910 p->p_fd->fd_ofiles[f] = fp; 911 fp->f_msgcount--; 912 unp_rights--; 913 *fdp++ = f; 914 } 915 } else { 916 fdp = (int *)(cm + 1) + newfds - 1; 917 rp = (struct file **)CMSG_DATA(cm) + newfds - 1; 918 for (i = 0; i < newfds; i++) { 919 if (fdalloc(p, 0, &f)) 920 panic("unp_externalize"); 921 fp = *rp--; 922 p->p_fd->fd_ofiles[f] = fp; 923 fp->f_msgcount--; 924 unp_rights--; 925 *fdp-- = f; 926 } 927 } 928 929 /* 930 * Adjust length, in case sizeof(struct file *) and sizeof(int) 931 * differs. 932 */ 933 cm->cmsg_len = CMSG_LEN(newfds * sizeof(int)); 934 rights->m_len = cm->cmsg_len; 935 return (0); 936} 937 938void 939unp_init(void) 940{ 941 unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0); 942 if (unp_zone == 0) 943 panic("unp_init"); 944 LIST_INIT(&unp_dhead); 945 LIST_INIT(&unp_shead); 946} 947 948#ifndef MIN 949#define MIN(a,b) (((a)<(b))?(a):(b)) 950#endif 951 952static int 953unp_internalize(control, p) 954 struct mbuf *control; 955 struct proc *p; 956{ 957 struct filedesc *fdescp = p->p_fd; 958 register struct cmsghdr *cm = mtod(control, struct cmsghdr *); 959 register struct file **rp; 960 register struct file *fp; 961 register int i, fd, *fdp; 962 register struct cmsgcred *cmcred; 963 int oldfds; 964 u_int newlen; 965 966 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) || 967 cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len) 968 return (EINVAL); 969 970 /* 971 * Fill in credential information. 972 */ 973 if (cm->cmsg_type == SCM_CREDS) { 974 cmcred = (struct cmsgcred *)(cm + 1); 975 cmcred->cmcred_pid = p->p_pid; 976 cmcred->cmcred_uid = p->p_cred->p_ruid; 977 cmcred->cmcred_gid = p->p_cred->p_rgid; 978 cmcred->cmcred_euid = p->p_ucred->cr_uid; 979 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups, 980 CMGROUP_MAX); 981 for (i = 0; i < cmcred->cmcred_ngroups; i++) 982 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i]; 983 return(0); 984 } 985 986 oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int); 987 /* 988 * check that all the FDs passed in refer to legal OPEN files 989 * If not, reject the entire operation. 990 */ 991 fdp = (int *)(cm + 1); 992 for (i = 0; i < oldfds; i++) { 993 fd = *fdp++; 994 if ((unsigned)fd >= fdescp->fd_nfiles || 995 fdescp->fd_ofiles[fd] == NULL) 996 return (EBADF); 997 } 998 /* 999 * Now replace the integer FDs with pointers to 1000 * the associated global file table entry.. 1001 * Allocate a bigger buffer as necessary. But if an cluster is not 1002 * enough, return E2BIG. 1003 */ 1004 newlen = CMSG_LEN(oldfds * sizeof(struct file *)); 1005 if (newlen > MCLBYTES) 1006 return (E2BIG); 1007 if (newlen - control->m_len > M_TRAILINGSPACE(control)) { 1008 if (control->m_flags & M_EXT) 1009 return (E2BIG); 1010 MCLGET(control, M_WAIT); 1011 if ((control->m_flags & M_EXT) == 0) 1012 return (ENOBUFS); 1013 1014 /* copy the data to the cluster */ 1015 memcpy(mtod(control, char *), cm, cm->cmsg_len); 1016 cm = mtod(control, struct cmsghdr *); 1017 } 1018 1019 /* 1020 * Adjust length, in case sizeof(struct file *) and sizeof(int) 1021 * differs. 1022 */ 1023 control->m_len = cm->cmsg_len = newlen; 1024 1025 /* 1026 * Transform the file descriptors into struct file pointers. 1027 * If sizeof (struct file *) is bigger than or equal to sizeof int, 1028 * then do it in reverse order so that the int won't get until 1029 * we're done. 1030 * If sizeof (struct file *) is smaller than sizeof int, then 1031 * do it in forward order. 1032 */ 1033 if (sizeof (struct file *) >= sizeof (int)) { 1034 fdp = (int *)(cm + 1) + oldfds - 1; 1035 rp = (struct file **)CMSG_DATA(cm) + oldfds - 1; 1036 for (i = 0; i < oldfds; i++) { 1037 fp = fdescp->fd_ofiles[*fdp--]; 1038 *rp-- = fp; 1039 fp->f_count++; 1040 fp->f_msgcount++; 1041 unp_rights++; 1042 } 1043 } else { 1044 fdp = (int *)(cm + 1); 1045 rp = (struct file **)CMSG_DATA(cm); 1046 for (i = 0; i < oldfds; i++) { 1047 fp = fdescp->fd_ofiles[*fdp++]; 1048 *rp++ = fp; 1049 fp->f_count++; 1050 fp->f_msgcount++; 1051 unp_rights++; 1052 } 1053 } 1054 return (0); 1055} 1056 1057static int unp_defer, unp_gcing; 1058 1059static void 1060unp_gc() 1061{ 1062 register struct file *fp, *nextfp; 1063 register struct socket *so; 1064 struct file **extra_ref, **fpp; 1065 int nunref, i; 1066 1067 if (unp_gcing) 1068 return; 1069 unp_gcing = 1; 1070 unp_defer = 0; 1071 /* 1072 * before going through all this, set all FDs to 1073 * be NOT defered and NOT externally accessible 1074 */ 1075 LIST_FOREACH(fp, &filehead, f_list) 1076 fp->f_flag &= ~(FMARK|FDEFER); 1077 do { 1078 LIST_FOREACH(fp, &filehead, f_list) { 1079 /* 1080 * If the file is not open, skip it 1081 */ 1082 if (fp->f_count == 0) 1083 continue; 1084 /* 1085 * If we already marked it as 'defer' in a 1086 * previous pass, then try process it this time 1087 * and un-mark it 1088 */ 1089 if (fp->f_flag & FDEFER) { 1090 fp->f_flag &= ~FDEFER; 1091 unp_defer--; 1092 } else { 1093 /* 1094 * if it's not defered, then check if it's 1095 * already marked.. if so skip it 1096 */ 1097 if (fp->f_flag & FMARK) 1098 continue; 1099 /* 1100 * If all references are from messages 1101 * in transit, then skip it. it's not 1102 * externally accessible. 1103 */ 1104 if (fp->f_count == fp->f_msgcount) 1105 continue; 1106 /* 1107 * If it got this far then it must be 1108 * externally accessible. 1109 */ 1110 fp->f_flag |= FMARK; 1111 } 1112 /* 1113 * either it was defered, or it is externally 1114 * accessible and not already marked so. 1115 * Now check if it is possibly one of OUR sockets. 1116 */ 1117 if (fp->f_type != DTYPE_SOCKET || 1118 (so = (struct socket *)fp->f_data) == 0) 1119 continue; 1120 if (so->so_proto->pr_domain != &localdomain || 1121 (so->so_proto->pr_flags&PR_RIGHTS) == 0) 1122 continue; 1123#ifdef notdef 1124 if (so->so_rcv.sb_flags & SB_LOCK) { 1125 /* 1126 * This is problematical; it's not clear 1127 * we need to wait for the sockbuf to be 1128 * unlocked (on a uniprocessor, at least), 1129 * and it's also not clear what to do 1130 * if sbwait returns an error due to receipt 1131 * of a signal. If sbwait does return 1132 * an error, we'll go into an infinite 1133 * loop. Delete all of this for now. 1134 */ 1135 (void) sbwait(&so->so_rcv); 1136 goto restart; 1137 } 1138#endif 1139 /* 1140 * So, Ok, it's one of our sockets and it IS externally 1141 * accessible (or was defered). Now we look 1142 * to see if we hold any file descriptors in its 1143 * message buffers. Follow those links and mark them 1144 * as accessible too. 1145 */ 1146 unp_scan(so->so_rcv.sb_mb, unp_mark); 1147 } 1148 } while (unp_defer); 1149 /* 1150 * We grab an extra reference to each of the file table entries 1151 * that are not otherwise accessible and then free the rights 1152 * that are stored in messages on them. 1153 * 1154 * The bug in the orginal code is a little tricky, so I'll describe 1155 * what's wrong with it here. 1156 * 1157 * It is incorrect to simply unp_discard each entry for f_msgcount 1158 * times -- consider the case of sockets A and B that contain 1159 * references to each other. On a last close of some other socket, 1160 * we trigger a gc since the number of outstanding rights (unp_rights) 1161 * is non-zero. If during the sweep phase the gc code un_discards, 1162 * we end up doing a (full) closef on the descriptor. A closef on A 1163 * results in the following chain. Closef calls soo_close, which 1164 * calls soclose. Soclose calls first (through the switch 1165 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply 1166 * returns because the previous instance had set unp_gcing, and 1167 * we return all the way back to soclose, which marks the socket 1168 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush 1169 * to free up the rights that are queued in messages on the socket A, 1170 * i.e., the reference on B. The sorflush calls via the dom_dispose 1171 * switch unp_dispose, which unp_scans with unp_discard. This second 1172 * instance of unp_discard just calls closef on B. 1173 * 1174 * Well, a similar chain occurs on B, resulting in a sorflush on B, 1175 * which results in another closef on A. Unfortunately, A is already 1176 * being closed, and the descriptor has already been marked with 1177 * SS_NOFDREF, and soclose panics at this point. 1178 * 1179 * Here, we first take an extra reference to each inaccessible 1180 * descriptor. Then, we call sorflush ourself, since we know 1181 * it is a Unix domain socket anyhow. After we destroy all the 1182 * rights carried in messages, we do a last closef to get rid 1183 * of our extra reference. This is the last close, and the 1184 * unp_detach etc will shut down the socket. 1185 * 1186 * 91/09/19, bsy@cs.cmu.edu 1187 */ 1188 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); 1189 for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0; 1190 fp = nextfp) { 1191 nextfp = LIST_NEXT(fp, f_list); 1192 /* 1193 * If it's not open, skip it 1194 */ 1195 if (fp->f_count == 0) 1196 continue; 1197 /* 1198 * If all refs are from msgs, and it's not marked accessible 1199 * then it must be referenced from some unreachable cycle 1200 * of (shut-down) FDs, so include it in our 1201 * list of FDs to remove 1202 */ 1203 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { 1204 *fpp++ = fp; 1205 nunref++; 1206 fp->f_count++; 1207 } 1208 } 1209 /* 1210 * for each FD on our hit list, do the following two things 1211 */ 1212 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { 1213 struct file *tfp = *fpp; 1214 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) 1215 sorflush((struct socket *)(tfp->f_data)); 1216 } 1217 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) 1218 closef(*fpp, (struct proc *) NULL); 1219 free((caddr_t)extra_ref, M_FILE); 1220 unp_gcing = 0; 1221} 1222 1223void 1224unp_dispose(m) 1225 struct mbuf *m; 1226{ 1227 1228 if (m) 1229 unp_scan(m, unp_discard); 1230} 1231 1232static void 1233unp_scan(m0, op) 1234 register struct mbuf *m0; 1235 void (*op) __P((struct file *)); 1236{ 1237 register struct mbuf *m; 1238 register struct file **rp; 1239 register struct cmsghdr *cm; 1240 register int i; 1241 int qfds; 1242 1243 while (m0) { 1244 for (m = m0; m; m = m->m_next) 1245 if (m->m_type == MT_CONTROL && 1246 m->m_len >= sizeof(*cm)) { 1247 cm = mtod(m, struct cmsghdr *); 1248 if (cm->cmsg_level != SOL_SOCKET || 1249 cm->cmsg_type != SCM_RIGHTS) 1250 continue; 1251 qfds = (cm->cmsg_len - 1252 (CMSG_DATA(cm) - (u_char *)cm)) 1253 / sizeof (struct file *); 1254 rp = (struct file **)CMSG_DATA(cm); 1255 for (i = 0; i < qfds; i++) 1256 (*op)(*rp++); 1257 break; /* XXX, but saves time */ 1258 } 1259 m0 = m0->m_act; 1260 } 1261} 1262 1263static void 1264unp_mark(fp) 1265 struct file *fp; 1266{ 1267 1268 if (fp->f_flag & FMARK) 1269 return; 1270 unp_defer++; 1271 fp->f_flag |= (FMARK|FDEFER); 1272} 1273 1274static void 1275unp_discard(fp) 1276 struct file *fp; 1277{ 1278 1279 fp->f_msgcount--; 1280 unp_rights--; 1281 (void) closef(fp, (struct proc *)NULL); 1282} 1283