uipc_syscalls.c revision 147784
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 147784 2005-07-05 22:49:10Z rwatson $"); 37 38#include "opt_compat.h" 39#include "opt_ktrace.h" 40#include "opt_mac.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/lock.h> 46#include <sys/mac.h> 47#include <sys/mutex.h> 48#include <sys/sysproto.h> 49#include <sys/malloc.h> 50#include <sys/filedesc.h> 51#include <sys/event.h> 52#include <sys/proc.h> 53#include <sys/fcntl.h> 54#include <sys/file.h> 55#include <sys/filio.h> 56#include <sys/mount.h> 57#include <sys/mbuf.h> 58#include <sys/protosw.h> 59#include <sys/sf_buf.h> 60#include <sys/socket.h> 61#include <sys/socketvar.h> 62#include <sys/signalvar.h> 63#include <sys/syscallsubr.h> 64#include <sys/sysctl.h> 65#include <sys/uio.h> 66#include <sys/vnode.h> 67#ifdef KTRACE 68#include <sys/ktrace.h> 69#endif 70 71#include <vm/vm.h> 72#include <vm/vm_object.h> 73#include <vm/vm_page.h> 74#include <vm/vm_pageout.h> 75#include <vm/vm_kern.h> 76#include <vm/vm_extern.h> 77 78static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81static int accept1(struct thread *td, struct accept_args *uap, int compat); 82static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88/* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91int nsfbufs; 92int nsfbufspeak; 93int nsfbufsused; 94 95SYSCTL_DECL(_kern_ipc); 96SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 97 "Maximum number of sendfile(2) sf_bufs available"); 98SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 99 "Number of sendfile(2) sf_bufs at peak usage"); 100SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 101 "Number of sendfile(2) sf_bufs in use"); 102 103/* 104 * Convert a user file descriptor to a kernel file entry. A reference on the 105 * file entry is held upon returning. This is lighter weight than 106 * fgetsock(), which bumps the socket reference drops the file reference 107 * count instead, as this approach avoids several additional mutex operations 108 * associated with the additional reference count. 109 */ 110static int 111getsock(struct filedesc *fdp, int fd, struct file **fpp) 112{ 113 struct file *fp; 114 int error; 115 116 fp = NULL; 117 if (fdp == NULL) 118 error = EBADF; 119 else { 120 FILEDESC_LOCK_FAST(fdp); 121 fp = fget_locked(fdp, fd); 122 if (fp == NULL) 123 error = EBADF; 124 else if (fp->f_type != DTYPE_SOCKET) { 125 fp = NULL; 126 error = ENOTSOCK; 127 } else { 128 fhold(fp); 129 error = 0; 130 } 131 FILEDESC_UNLOCK_FAST(fdp); 132 } 133 *fpp = fp; 134 return (error); 135} 136 137/* 138 * System call interface to the socket abstraction. 139 */ 140#if defined(COMPAT_43) 141#define COMPAT_OLDSOCK 142#endif 143 144/* 145 * MPSAFE 146 */ 147int 148socket(td, uap) 149 struct thread *td; 150 register struct socket_args /* { 151 int domain; 152 int type; 153 int protocol; 154 } */ *uap; 155{ 156 struct filedesc *fdp; 157 struct socket *so; 158 struct file *fp; 159 int fd, error; 160 161#ifdef MAC 162 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 163 uap->protocol); 164 if (error) 165 return (error); 166#endif 167 fdp = td->td_proc->p_fd; 168 error = falloc(td, &fp, &fd); 169 if (error) 170 return (error); 171 /* An extra reference on `fp' has been held for us by falloc(). */ 172 NET_LOCK_GIANT(); 173 error = socreate(uap->domain, &so, uap->type, uap->protocol, 174 td->td_ucred, td); 175 NET_UNLOCK_GIANT(); 176 if (error) { 177 fdclose(fdp, fp, fd, td); 178 } else { 179 FILEDESC_LOCK_FAST(fdp); 180 fp->f_data = so; /* already has ref count */ 181 fp->f_flag = FREAD|FWRITE; 182 fp->f_ops = &socketops; 183 fp->f_type = DTYPE_SOCKET; 184 FILEDESC_UNLOCK_FAST(fdp); 185 td->td_retval[0] = fd; 186 } 187 fdrop(fp, td); 188 return (error); 189} 190 191/* 192 * MPSAFE 193 */ 194/* ARGSUSED */ 195int 196bind(td, uap) 197 struct thread *td; 198 register struct bind_args /* { 199 int s; 200 caddr_t name; 201 int namelen; 202 } */ *uap; 203{ 204 struct sockaddr *sa; 205 int error; 206 207 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 208 return (error); 209 210 return (kern_bind(td, uap->s, sa)); 211} 212 213int 214kern_bind(td, fd, sa) 215 struct thread *td; 216 int fd; 217 struct sockaddr *sa; 218{ 219 struct socket *so; 220 struct file *fp; 221 int error; 222 223 NET_LOCK_GIANT(); 224 error = getsock(td->td_proc->p_fd, fd, &fp); 225 if (error) 226 goto done2; 227 so = fp->f_data; 228#ifdef MAC 229 SOCK_LOCK(so); 230 error = mac_check_socket_bind(td->td_ucred, so, sa); 231 SOCK_UNLOCK(so); 232 if (error) 233 goto done1; 234#endif 235 error = sobind(so, sa, td); 236#ifdef MAC 237done1: 238#endif 239 fdrop(fp, td); 240done2: 241 NET_UNLOCK_GIANT(); 242 FREE(sa, M_SONAME); 243 return (error); 244} 245 246/* 247 * MPSAFE 248 */ 249/* ARGSUSED */ 250int 251listen(td, uap) 252 struct thread *td; 253 register struct listen_args /* { 254 int s; 255 int backlog; 256 } */ *uap; 257{ 258 struct socket *so; 259 struct file *fp; 260 int error; 261 262 NET_LOCK_GIANT(); 263 error = getsock(td->td_proc->p_fd, uap->s, &fp); 264 if (error == 0) { 265 so = fp->f_data; 266#ifdef MAC 267 SOCK_LOCK(so); 268 error = mac_check_socket_listen(td->td_ucred, so); 269 SOCK_UNLOCK(so); 270 if (error) 271 goto done; 272#endif 273 error = solisten(so, uap->backlog, td); 274#ifdef MAC 275done: 276#endif 277 fdrop(fp, td); 278 } 279 NET_UNLOCK_GIANT(); 280 return(error); 281} 282 283/* 284 * accept1() 285 * MPSAFE 286 */ 287static int 288accept1(td, uap, compat) 289 struct thread *td; 290 register struct accept_args /* { 291 int s; 292 struct sockaddr * __restrict name; 293 socklen_t * __restrict anamelen; 294 } */ *uap; 295 int compat; 296{ 297 struct filedesc *fdp; 298 struct file *nfp = NULL; 299 struct sockaddr *sa = NULL; 300 socklen_t namelen; 301 int error; 302 struct socket *head, *so; 303 int fd; 304 u_int fflag; 305 pid_t pgid; 306 int tmp; 307 308 fdp = td->td_proc->p_fd; 309 if (uap->name) { 310 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 311 if(error) 312 return (error); 313 if (namelen < 0) 314 return (EINVAL); 315 } 316 NET_LOCK_GIANT(); 317 error = fgetsock(td, uap->s, &head, &fflag); 318 if (error) 319 goto done2; 320 if ((head->so_options & SO_ACCEPTCONN) == 0) { 321 error = EINVAL; 322 goto done; 323 } 324#ifdef MAC 325 SOCK_LOCK(head); 326 error = mac_check_socket_accept(td->td_ucred, head); 327 SOCK_UNLOCK(head); 328 if (error != 0) 329 goto done; 330#endif 331 error = falloc(td, &nfp, &fd); 332 if (error) 333 goto done; 334 ACCEPT_LOCK(); 335 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 336 ACCEPT_UNLOCK(); 337 error = EWOULDBLOCK; 338 goto noconnection; 339 } 340 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 341 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 342 head->so_error = ECONNABORTED; 343 break; 344 } 345 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 346 "accept", 0); 347 if (error) { 348 ACCEPT_UNLOCK(); 349 goto noconnection; 350 } 351 } 352 if (head->so_error) { 353 error = head->so_error; 354 head->so_error = 0; 355 ACCEPT_UNLOCK(); 356 goto noconnection; 357 } 358 so = TAILQ_FIRST(&head->so_comp); 359 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 360 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 361 362 /* 363 * Before changing the flags on the socket, we have to bump the 364 * reference count. Otherwise, if the protocol calls sofree(), 365 * the socket will be released due to a zero refcount. 366 */ 367 SOCK_LOCK(so); /* soref() and so_state update */ 368 soref(so); /* file descriptor reference */ 369 370 TAILQ_REMOVE(&head->so_comp, so, so_list); 371 head->so_qlen--; 372 so->so_state |= (head->so_state & SS_NBIO); 373 so->so_qstate &= ~SQ_COMP; 374 so->so_head = NULL; 375 376 SOCK_UNLOCK(so); 377 ACCEPT_UNLOCK(); 378 379 /* An extra reference on `nfp' has been held for us by falloc(). */ 380 td->td_retval[0] = fd; 381 382 /* connection has been removed from the listen queue */ 383 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 384 385 pgid = fgetown(&head->so_sigio); 386 if (pgid != 0) 387 fsetown(pgid, &so->so_sigio); 388 389 FILE_LOCK(nfp); 390 nfp->f_data = so; /* nfp has ref count from falloc */ 391 nfp->f_flag = fflag; 392 nfp->f_ops = &socketops; 393 nfp->f_type = DTYPE_SOCKET; 394 FILE_UNLOCK(nfp); 395 /* Sync socket nonblocking/async state with file flags */ 396 tmp = fflag & FNONBLOCK; 397 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 398 tmp = fflag & FASYNC; 399 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 400 sa = 0; 401 error = soaccept(so, &sa); 402 if (error) { 403 /* 404 * return a namelen of zero for older code which might 405 * ignore the return value from accept. 406 */ 407 if (uap->name != NULL) { 408 namelen = 0; 409 (void) copyout(&namelen, 410 uap->anamelen, sizeof(*uap->anamelen)); 411 } 412 goto noconnection; 413 } 414 if (sa == NULL) { 415 namelen = 0; 416 if (uap->name) 417 goto gotnoname; 418 error = 0; 419 goto done; 420 } 421 if (uap->name) { 422 /* check sa_len before it is destroyed */ 423 if (namelen > sa->sa_len) 424 namelen = sa->sa_len; 425#ifdef COMPAT_OLDSOCK 426 if (compat) 427 ((struct osockaddr *)sa)->sa_family = 428 sa->sa_family; 429#endif 430 error = copyout(sa, uap->name, (u_int)namelen); 431 if (!error) 432gotnoname: 433 error = copyout(&namelen, 434 uap->anamelen, sizeof (*uap->anamelen)); 435 } 436noconnection: 437 if (sa) 438 FREE(sa, M_SONAME); 439 440 /* 441 * close the new descriptor, assuming someone hasn't ripped it 442 * out from under us. 443 */ 444 if (error) 445 fdclose(fdp, nfp, fd, td); 446 447 /* 448 * Release explicitly held references before returning. 449 */ 450done: 451 if (nfp != NULL) 452 fdrop(nfp, td); 453 fputsock(head); 454done2: 455 NET_UNLOCK_GIANT(); 456 return (error); 457} 458 459/* 460 * MPSAFE (accept1() is MPSAFE) 461 */ 462int 463accept(td, uap) 464 struct thread *td; 465 struct accept_args *uap; 466{ 467 468 return (accept1(td, uap, 0)); 469} 470 471#ifdef COMPAT_OLDSOCK 472/* 473 * MPSAFE (accept1() is MPSAFE) 474 */ 475int 476oaccept(td, uap) 477 struct thread *td; 478 struct accept_args *uap; 479{ 480 481 return (accept1(td, uap, 1)); 482} 483#endif /* COMPAT_OLDSOCK */ 484 485/* 486 * MPSAFE 487 */ 488/* ARGSUSED */ 489int 490connect(td, uap) 491 struct thread *td; 492 register struct connect_args /* { 493 int s; 494 caddr_t name; 495 int namelen; 496 } */ *uap; 497{ 498 struct sockaddr *sa; 499 int error; 500 501 error = getsockaddr(&sa, uap->name, uap->namelen); 502 if (error) 503 return (error); 504 505 return (kern_connect(td, uap->s, sa)); 506} 507 508 509int 510kern_connect(td, fd, sa) 511 struct thread *td; 512 int fd; 513 struct sockaddr *sa; 514{ 515 struct socket *so; 516 struct file *fp; 517 int error; 518 int interrupted = 0; 519 520 NET_LOCK_GIANT(); 521 error = getsock(td->td_proc->p_fd, fd, &fp); 522 if (error) 523 goto done2; 524 so = fp->f_data; 525 if (so->so_state & SS_ISCONNECTING) { 526 error = EALREADY; 527 goto done1; 528 } 529#ifdef MAC 530 SOCK_LOCK(so); 531 error = mac_check_socket_connect(td->td_ucred, so, sa); 532 SOCK_UNLOCK(so); 533 if (error) 534 goto bad; 535#endif 536 error = soconnect(so, sa, td); 537 if (error) 538 goto bad; 539 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 540 error = EINPROGRESS; 541 goto done1; 542 } 543 SOCK_LOCK(so); 544 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 545 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 546 "connec", 0); 547 if (error) { 548 if (error == EINTR || error == ERESTART) 549 interrupted = 1; 550 break; 551 } 552 } 553 if (error == 0) { 554 error = so->so_error; 555 so->so_error = 0; 556 } 557 SOCK_UNLOCK(so); 558bad: 559 if (!interrupted) 560 so->so_state &= ~SS_ISCONNECTING; 561 if (error == ERESTART) 562 error = EINTR; 563done1: 564 fdrop(fp, td); 565done2: 566 NET_UNLOCK_GIANT(); 567 FREE(sa, M_SONAME); 568 return (error); 569} 570 571/* 572 * MPSAFE 573 */ 574int 575socketpair(td, uap) 576 struct thread *td; 577 register struct socketpair_args /* { 578 int domain; 579 int type; 580 int protocol; 581 int *rsv; 582 } */ *uap; 583{ 584 register struct filedesc *fdp = td->td_proc->p_fd; 585 struct file *fp1, *fp2; 586 struct socket *so1, *so2; 587 int fd, error, sv[2]; 588 589#ifdef MAC 590 /* We might want to have a separate check for socket pairs. */ 591 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 592 uap->protocol); 593 if (error) 594 return (error); 595#endif 596 597 NET_LOCK_GIANT(); 598 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 599 td->td_ucred, td); 600 if (error) 601 goto done2; 602 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 603 td->td_ucred, td); 604 if (error) 605 goto free1; 606 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 607 error = falloc(td, &fp1, &fd); 608 if (error) 609 goto free2; 610 sv[0] = fd; 611 fp1->f_data = so1; /* so1 already has ref count */ 612 error = falloc(td, &fp2, &fd); 613 if (error) 614 goto free3; 615 fp2->f_data = so2; /* so2 already has ref count */ 616 sv[1] = fd; 617 error = soconnect2(so1, so2); 618 if (error) 619 goto free4; 620 if (uap->type == SOCK_DGRAM) { 621 /* 622 * Datagram socket connection is asymmetric. 623 */ 624 error = soconnect2(so2, so1); 625 if (error) 626 goto free4; 627 } 628 FILE_LOCK(fp1); 629 fp1->f_flag = FREAD|FWRITE; 630 fp1->f_ops = &socketops; 631 fp1->f_type = DTYPE_SOCKET; 632 FILE_UNLOCK(fp1); 633 FILE_LOCK(fp2); 634 fp2->f_flag = FREAD|FWRITE; 635 fp2->f_ops = &socketops; 636 fp2->f_type = DTYPE_SOCKET; 637 FILE_UNLOCK(fp2); 638 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 639 fdrop(fp1, td); 640 fdrop(fp2, td); 641 goto done2; 642free4: 643 fdclose(fdp, fp2, sv[1], td); 644 fdrop(fp2, td); 645free3: 646 fdclose(fdp, fp1, sv[0], td); 647 fdrop(fp1, td); 648free2: 649 (void)soclose(so2); 650free1: 651 (void)soclose(so1); 652done2: 653 NET_UNLOCK_GIANT(); 654 return (error); 655} 656 657static int 658sendit(td, s, mp, flags) 659 register struct thread *td; 660 int s; 661 register struct msghdr *mp; 662 int flags; 663{ 664 struct mbuf *control; 665 struct sockaddr *to; 666 int error; 667 668 if (mp->msg_name != NULL) { 669 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 670 if (error) { 671 to = NULL; 672 goto bad; 673 } 674 mp->msg_name = to; 675 } else { 676 to = NULL; 677 } 678 679 if (mp->msg_control) { 680 if (mp->msg_controllen < sizeof(struct cmsghdr) 681#ifdef COMPAT_OLDSOCK 682 && mp->msg_flags != MSG_COMPAT 683#endif 684 ) { 685 error = EINVAL; 686 goto bad; 687 } 688 error = sockargs(&control, mp->msg_control, 689 mp->msg_controllen, MT_CONTROL); 690 if (error) 691 goto bad; 692#ifdef COMPAT_OLDSOCK 693 if (mp->msg_flags == MSG_COMPAT) { 694 register struct cmsghdr *cm; 695 696 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 697 if (control == 0) { 698 error = ENOBUFS; 699 goto bad; 700 } else { 701 cm = mtod(control, struct cmsghdr *); 702 cm->cmsg_len = control->m_len; 703 cm->cmsg_level = SOL_SOCKET; 704 cm->cmsg_type = SCM_RIGHTS; 705 } 706 } 707#endif 708 } else { 709 control = NULL; 710 } 711 712 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 713 714bad: 715 if (to) 716 FREE(to, M_SONAME); 717 return (error); 718} 719 720int 721kern_sendit(td, s, mp, flags, control, segflg) 722 struct thread *td; 723 int s; 724 struct msghdr *mp; 725 int flags; 726 struct mbuf *control; 727 enum uio_seg segflg; 728{ 729 struct file *fp; 730 struct uio auio; 731 struct iovec *iov; 732 struct socket *so; 733 int i; 734 int len, error; 735#ifdef KTRACE 736 struct uio *ktruio = NULL; 737#endif 738 739 NET_LOCK_GIANT(); 740 error = getsock(td->td_proc->p_fd, s, &fp); 741 if (error) 742 goto bad2; 743 so = (struct socket *)fp->f_data; 744 745#ifdef MAC 746 SOCK_LOCK(so); 747 error = mac_check_socket_send(td->td_ucred, so); 748 SOCK_UNLOCK(so); 749 if (error) 750 goto bad; 751#endif 752 753 auio.uio_iov = mp->msg_iov; 754 auio.uio_iovcnt = mp->msg_iovlen; 755 auio.uio_segflg = segflg; 756 auio.uio_rw = UIO_WRITE; 757 auio.uio_td = td; 758 auio.uio_offset = 0; /* XXX */ 759 auio.uio_resid = 0; 760 iov = mp->msg_iov; 761 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 762 if ((auio.uio_resid += iov->iov_len) < 0) { 763 error = EINVAL; 764 goto bad; 765 } 766 } 767#ifdef KTRACE 768 if (KTRPOINT(td, KTR_GENIO)) 769 ktruio = cloneuio(&auio); 770#endif 771 len = auio.uio_resid; 772 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 773 0, control, flags, td); 774 if (error) { 775 if (auio.uio_resid != len && (error == ERESTART || 776 error == EINTR || error == EWOULDBLOCK)) 777 error = 0; 778 /* Generation of SIGPIPE can be controlled per socket */ 779 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 780 !(flags & MSG_NOSIGNAL)) { 781 PROC_LOCK(td->td_proc); 782 psignal(td->td_proc, SIGPIPE); 783 PROC_UNLOCK(td->td_proc); 784 } 785 } 786 if (error == 0) 787 td->td_retval[0] = len - auio.uio_resid; 788#ifdef KTRACE 789 if (ktruio != NULL) { 790 ktruio->uio_resid = td->td_retval[0]; 791 ktrgenio(s, UIO_WRITE, ktruio, error); 792 } 793#endif 794bad: 795 fdrop(fp, td); 796bad2: 797 NET_UNLOCK_GIANT(); 798 return (error); 799} 800 801/* 802 * MPSAFE 803 */ 804int 805sendto(td, uap) 806 struct thread *td; 807 register struct sendto_args /* { 808 int s; 809 caddr_t buf; 810 size_t len; 811 int flags; 812 caddr_t to; 813 int tolen; 814 } */ *uap; 815{ 816 struct msghdr msg; 817 struct iovec aiov; 818 int error; 819 820 msg.msg_name = uap->to; 821 msg.msg_namelen = uap->tolen; 822 msg.msg_iov = &aiov; 823 msg.msg_iovlen = 1; 824 msg.msg_control = 0; 825#ifdef COMPAT_OLDSOCK 826 msg.msg_flags = 0; 827#endif 828 aiov.iov_base = uap->buf; 829 aiov.iov_len = uap->len; 830 error = sendit(td, uap->s, &msg, uap->flags); 831 return (error); 832} 833 834#ifdef COMPAT_OLDSOCK 835/* 836 * MPSAFE 837 */ 838int 839osend(td, uap) 840 struct thread *td; 841 register struct osend_args /* { 842 int s; 843 caddr_t buf; 844 int len; 845 int flags; 846 } */ *uap; 847{ 848 struct msghdr msg; 849 struct iovec aiov; 850 int error; 851 852 msg.msg_name = 0; 853 msg.msg_namelen = 0; 854 msg.msg_iov = &aiov; 855 msg.msg_iovlen = 1; 856 aiov.iov_base = uap->buf; 857 aiov.iov_len = uap->len; 858 msg.msg_control = 0; 859 msg.msg_flags = 0; 860 error = sendit(td, uap->s, &msg, uap->flags); 861 return (error); 862} 863 864/* 865 * MPSAFE 866 */ 867int 868osendmsg(td, uap) 869 struct thread *td; 870 struct osendmsg_args /* { 871 int s; 872 caddr_t msg; 873 int flags; 874 } */ *uap; 875{ 876 struct msghdr msg; 877 struct iovec *iov; 878 int error; 879 880 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 881 if (error) 882 return (error); 883 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 884 if (error) 885 return (error); 886 msg.msg_iov = iov; 887 msg.msg_flags = MSG_COMPAT; 888 error = sendit(td, uap->s, &msg, uap->flags); 889 free(iov, M_IOV); 890 return (error); 891} 892#endif 893 894/* 895 * MPSAFE 896 */ 897int 898sendmsg(td, uap) 899 struct thread *td; 900 struct sendmsg_args /* { 901 int s; 902 caddr_t msg; 903 int flags; 904 } */ *uap; 905{ 906 struct msghdr msg; 907 struct iovec *iov; 908 int error; 909 910 error = copyin(uap->msg, &msg, sizeof (msg)); 911 if (error) 912 return (error); 913 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 914 if (error) 915 return (error); 916 msg.msg_iov = iov; 917#ifdef COMPAT_OLDSOCK 918 msg.msg_flags = 0; 919#endif 920 error = sendit(td, uap->s, &msg, uap->flags); 921 free(iov, M_IOV); 922 return (error); 923} 924 925static int 926recvit(td, s, mp, namelenp) 927 struct thread *td; 928 int s; 929 struct msghdr *mp; 930 void *namelenp; 931{ 932 struct uio auio; 933 struct iovec *iov; 934 int i; 935 socklen_t len; 936 int error; 937 struct mbuf *m, *control = 0; 938 caddr_t ctlbuf; 939 struct file *fp; 940 struct socket *so; 941 struct sockaddr *fromsa = 0; 942#ifdef KTRACE 943 struct uio *ktruio = NULL; 944#endif 945 946 NET_LOCK_GIANT(); 947 error = getsock(td->td_proc->p_fd, s, &fp); 948 if (error) { 949 NET_UNLOCK_GIANT(); 950 return (error); 951 } 952 so = fp->f_data; 953 954#ifdef MAC 955 SOCK_LOCK(so); 956 error = mac_check_socket_receive(td->td_ucred, so); 957 SOCK_UNLOCK(so); 958 if (error) { 959 fdrop(fp, td); 960 NET_UNLOCK_GIANT(); 961 return (error); 962 } 963#endif 964 965 auio.uio_iov = mp->msg_iov; 966 auio.uio_iovcnt = mp->msg_iovlen; 967 auio.uio_segflg = UIO_USERSPACE; 968 auio.uio_rw = UIO_READ; 969 auio.uio_td = td; 970 auio.uio_offset = 0; /* XXX */ 971 auio.uio_resid = 0; 972 iov = mp->msg_iov; 973 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 974 if ((auio.uio_resid += iov->iov_len) < 0) { 975 fdrop(fp, td); 976 NET_UNLOCK_GIANT(); 977 return (EINVAL); 978 } 979 } 980#ifdef KTRACE 981 if (KTRPOINT(td, KTR_GENIO)) 982 ktruio = cloneuio(&auio); 983#endif 984 len = auio.uio_resid; 985 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 986 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 987 &mp->msg_flags); 988 if (error) { 989 if (auio.uio_resid != (int)len && (error == ERESTART || 990 error == EINTR || error == EWOULDBLOCK)) 991 error = 0; 992 } 993#ifdef KTRACE 994 if (ktruio != NULL) { 995 ktruio->uio_resid = (int)len - auio.uio_resid; 996 ktrgenio(s, UIO_READ, ktruio, error); 997 } 998#endif 999 if (error) 1000 goto out; 1001 td->td_retval[0] = (int)len - auio.uio_resid; 1002 if (mp->msg_name) { 1003 len = mp->msg_namelen; 1004 if (len <= 0 || fromsa == 0) 1005 len = 0; 1006 else { 1007 /* save sa_len before it is destroyed by MSG_COMPAT */ 1008 len = MIN(len, fromsa->sa_len); 1009#ifdef COMPAT_OLDSOCK 1010 if (mp->msg_flags & MSG_COMPAT) 1011 ((struct osockaddr *)fromsa)->sa_family = 1012 fromsa->sa_family; 1013#endif 1014 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1015 if (error) 1016 goto out; 1017 } 1018 mp->msg_namelen = len; 1019 if (namelenp && 1020 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 1021#ifdef COMPAT_OLDSOCK 1022 if (mp->msg_flags & MSG_COMPAT) 1023 error = 0; /* old recvfrom didn't check */ 1024 else 1025#endif 1026 goto out; 1027 } 1028 } 1029 if (mp->msg_control) { 1030#ifdef COMPAT_OLDSOCK 1031 /* 1032 * We assume that old recvmsg calls won't receive access 1033 * rights and other control info, esp. as control info 1034 * is always optional and those options didn't exist in 4.3. 1035 * If we receive rights, trim the cmsghdr; anything else 1036 * is tossed. 1037 */ 1038 if (control && mp->msg_flags & MSG_COMPAT) { 1039 if (mtod(control, struct cmsghdr *)->cmsg_level != 1040 SOL_SOCKET || 1041 mtod(control, struct cmsghdr *)->cmsg_type != 1042 SCM_RIGHTS) { 1043 mp->msg_controllen = 0; 1044 goto out; 1045 } 1046 control->m_len -= sizeof (struct cmsghdr); 1047 control->m_data += sizeof (struct cmsghdr); 1048 } 1049#endif 1050 len = mp->msg_controllen; 1051 m = control; 1052 mp->msg_controllen = 0; 1053 ctlbuf = mp->msg_control; 1054 1055 while (m && len > 0) { 1056 unsigned int tocopy; 1057 1058 if (len >= m->m_len) 1059 tocopy = m->m_len; 1060 else { 1061 mp->msg_flags |= MSG_CTRUNC; 1062 tocopy = len; 1063 } 1064 1065 if ((error = copyout(mtod(m, caddr_t), 1066 ctlbuf, tocopy)) != 0) 1067 goto out; 1068 1069 ctlbuf += tocopy; 1070 len -= tocopy; 1071 m = m->m_next; 1072 } 1073 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1074 } 1075out: 1076 fdrop(fp, td); 1077 NET_UNLOCK_GIANT(); 1078 if (fromsa) 1079 FREE(fromsa, M_SONAME); 1080 if (control) 1081 m_freem(control); 1082 return (error); 1083} 1084 1085/* 1086 * MPSAFE 1087 */ 1088int 1089recvfrom(td, uap) 1090 struct thread *td; 1091 register struct recvfrom_args /* { 1092 int s; 1093 caddr_t buf; 1094 size_t len; 1095 int flags; 1096 struct sockaddr * __restrict from; 1097 socklen_t * __restrict fromlenaddr; 1098 } */ *uap; 1099{ 1100 struct msghdr msg; 1101 struct iovec aiov; 1102 int error; 1103 1104 if (uap->fromlenaddr) { 1105 error = copyin(uap->fromlenaddr, 1106 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1107 if (error) 1108 goto done2; 1109 } else { 1110 msg.msg_namelen = 0; 1111 } 1112 msg.msg_name = uap->from; 1113 msg.msg_iov = &aiov; 1114 msg.msg_iovlen = 1; 1115 aiov.iov_base = uap->buf; 1116 aiov.iov_len = uap->len; 1117 msg.msg_control = 0; 1118 msg.msg_flags = uap->flags; 1119 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1120done2: 1121 return(error); 1122} 1123 1124#ifdef COMPAT_OLDSOCK 1125/* 1126 * MPSAFE 1127 */ 1128int 1129orecvfrom(td, uap) 1130 struct thread *td; 1131 struct recvfrom_args *uap; 1132{ 1133 1134 uap->flags |= MSG_COMPAT; 1135 return (recvfrom(td, uap)); 1136} 1137#endif 1138 1139 1140#ifdef COMPAT_OLDSOCK 1141/* 1142 * MPSAFE 1143 */ 1144int 1145orecv(td, uap) 1146 struct thread *td; 1147 register struct orecv_args /* { 1148 int s; 1149 caddr_t buf; 1150 int len; 1151 int flags; 1152 } */ *uap; 1153{ 1154 struct msghdr msg; 1155 struct iovec aiov; 1156 int error; 1157 1158 msg.msg_name = 0; 1159 msg.msg_namelen = 0; 1160 msg.msg_iov = &aiov; 1161 msg.msg_iovlen = 1; 1162 aiov.iov_base = uap->buf; 1163 aiov.iov_len = uap->len; 1164 msg.msg_control = 0; 1165 msg.msg_flags = uap->flags; 1166 error = recvit(td, uap->s, &msg, NULL); 1167 return (error); 1168} 1169 1170/* 1171 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1172 * overlays the new one, missing only the flags, and with the (old) access 1173 * rights where the control fields are now. 1174 * 1175 * MPSAFE 1176 */ 1177int 1178orecvmsg(td, uap) 1179 struct thread *td; 1180 struct orecvmsg_args /* { 1181 int s; 1182 struct omsghdr *msg; 1183 int flags; 1184 } */ *uap; 1185{ 1186 struct msghdr msg; 1187 struct iovec *iov; 1188 int error; 1189 1190 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1191 if (error) 1192 return (error); 1193 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1194 if (error) 1195 return (error); 1196 msg.msg_flags = uap->flags | MSG_COMPAT; 1197 msg.msg_iov = iov; 1198 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1199 if (msg.msg_controllen && error == 0) 1200 error = copyout(&msg.msg_controllen, 1201 &uap->msg->msg_accrightslen, sizeof (int)); 1202 free(iov, M_IOV); 1203 return (error); 1204} 1205#endif 1206 1207/* 1208 * MPSAFE 1209 */ 1210int 1211recvmsg(td, uap) 1212 struct thread *td; 1213 struct recvmsg_args /* { 1214 int s; 1215 struct msghdr *msg; 1216 int flags; 1217 } */ *uap; 1218{ 1219 struct msghdr msg; 1220 struct iovec *uiov, *iov; 1221 int error; 1222 1223 error = copyin(uap->msg, &msg, sizeof (msg)); 1224 if (error) 1225 return (error); 1226 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1227 if (error) 1228 return (error); 1229 msg.msg_flags = uap->flags; 1230#ifdef COMPAT_OLDSOCK 1231 msg.msg_flags &= ~MSG_COMPAT; 1232#endif 1233 uiov = msg.msg_iov; 1234 msg.msg_iov = iov; 1235 error = recvit(td, uap->s, &msg, NULL); 1236 if (error == 0) { 1237 msg.msg_iov = uiov; 1238 error = copyout(&msg, uap->msg, sizeof(msg)); 1239 } 1240 free(iov, M_IOV); 1241 return (error); 1242} 1243 1244/* 1245 * MPSAFE 1246 */ 1247/* ARGSUSED */ 1248int 1249shutdown(td, uap) 1250 struct thread *td; 1251 register struct shutdown_args /* { 1252 int s; 1253 int how; 1254 } */ *uap; 1255{ 1256 struct socket *so; 1257 struct file *fp; 1258 int error; 1259 1260 NET_LOCK_GIANT(); 1261 error = getsock(td->td_proc->p_fd, uap->s, &fp); 1262 if (error == 0) { 1263 so = fp->f_data; 1264 error = soshutdown(so, uap->how); 1265 fdrop(fp, td); 1266 } 1267 NET_UNLOCK_GIANT(); 1268 return (error); 1269} 1270 1271/* 1272 * MPSAFE 1273 */ 1274/* ARGSUSED */ 1275int 1276setsockopt(td, uap) 1277 struct thread *td; 1278 register struct setsockopt_args /* { 1279 int s; 1280 int level; 1281 int name; 1282 caddr_t val; 1283 int valsize; 1284 } */ *uap; 1285{ 1286 1287 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1288 uap->val, UIO_USERSPACE, uap->valsize)); 1289} 1290 1291int 1292kern_setsockopt(td, s, level, name, val, valseg, valsize) 1293 struct thread *td; 1294 int s; 1295 int level; 1296 int name; 1297 void *val; 1298 enum uio_seg valseg; 1299 socklen_t valsize; 1300{ 1301 int error; 1302 struct socket *so; 1303 struct file *fp; 1304 struct sockopt sopt; 1305 1306 if (val == NULL && valsize != 0) 1307 return (EFAULT); 1308 if (valsize < 0) 1309 return (EINVAL); 1310 1311 sopt.sopt_dir = SOPT_SET; 1312 sopt.sopt_level = level; 1313 sopt.sopt_name = name; 1314 sopt.sopt_val = val; 1315 sopt.sopt_valsize = valsize; 1316 switch (valseg) { 1317 case UIO_USERSPACE: 1318 sopt.sopt_td = td; 1319 break; 1320 case UIO_SYSSPACE: 1321 sopt.sopt_td = NULL; 1322 break; 1323 default: 1324 panic("kern_setsockopt called with bad valseg"); 1325 } 1326 1327 NET_LOCK_GIANT(); 1328 error = getsock(td->td_proc->p_fd, s, &fp); 1329 if (error == 0) { 1330 so = fp->f_data; 1331 error = sosetopt(so, &sopt); 1332 fdrop(fp, td); 1333 } 1334 NET_UNLOCK_GIANT(); 1335 return(error); 1336} 1337 1338/* 1339 * MPSAFE 1340 */ 1341/* ARGSUSED */ 1342int 1343getsockopt(td, uap) 1344 struct thread *td; 1345 register struct getsockopt_args /* { 1346 int s; 1347 int level; 1348 int name; 1349 void * __restrict val; 1350 socklen_t * __restrict avalsize; 1351 } */ *uap; 1352{ 1353 socklen_t valsize; 1354 int error; 1355 1356 if (uap->val) { 1357 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1358 if (error) 1359 return (error); 1360 } 1361 1362 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1363 uap->val, UIO_USERSPACE, &valsize); 1364 1365 if (error == 0) 1366 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1367 return (error); 1368} 1369 1370/* 1371 * Kernel version of getsockopt. 1372 * optval can be a userland or userspace. optlen is always a kernel pointer. 1373 */ 1374int 1375kern_getsockopt(td, s, level, name, val, valseg, valsize) 1376 struct thread *td; 1377 int s; 1378 int level; 1379 int name; 1380 void *val; 1381 enum uio_seg valseg; 1382 socklen_t *valsize; 1383{ 1384 int error; 1385 struct socket *so; 1386 struct file *fp; 1387 struct sockopt sopt; 1388 1389 if (val == NULL) 1390 *valsize = 0; 1391 if (*valsize < 0) 1392 return (EINVAL); 1393 1394 sopt.sopt_dir = SOPT_GET; 1395 sopt.sopt_level = level; 1396 sopt.sopt_name = name; 1397 sopt.sopt_val = val; 1398 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1399 switch (valseg) { 1400 case UIO_USERSPACE: 1401 sopt.sopt_td = td; 1402 break; 1403 case UIO_SYSSPACE: 1404 sopt.sopt_td = NULL; 1405 break; 1406 default: 1407 panic("kern_getsockopt called with bad valseg"); 1408 } 1409 1410 NET_LOCK_GIANT(); 1411 error = getsock(td->td_proc->p_fd, s, &fp); 1412 if (error == 0) { 1413 so = fp->f_data; 1414 error = sogetopt(so, &sopt); 1415 *valsize = sopt.sopt_valsize; 1416 fdrop(fp, td); 1417 } 1418 NET_UNLOCK_GIANT(); 1419 return (error); 1420} 1421 1422/* 1423 * getsockname1() - Get socket name. 1424 * 1425 * MPSAFE 1426 */ 1427/* ARGSUSED */ 1428static int 1429getsockname1(td, uap, compat) 1430 struct thread *td; 1431 register struct getsockname_args /* { 1432 int fdes; 1433 struct sockaddr * __restrict asa; 1434 socklen_t * __restrict alen; 1435 } */ *uap; 1436 int compat; 1437{ 1438 struct socket *so; 1439 struct sockaddr *sa; 1440 struct file *fp; 1441 socklen_t len; 1442 int error; 1443 1444 NET_LOCK_GIANT(); 1445 error = getsock(td->td_proc->p_fd, uap->fdes, &fp); 1446 if (error) 1447 goto done2; 1448 so = fp->f_data; 1449 error = copyin(uap->alen, &len, sizeof (len)); 1450 if (error) 1451 goto done1; 1452 if (len < 0) { 1453 error = EINVAL; 1454 goto done1; 1455 } 1456 sa = 0; 1457 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1458 if (error) 1459 goto bad; 1460 if (sa == 0) { 1461 len = 0; 1462 goto gotnothing; 1463 } 1464 1465 len = MIN(len, sa->sa_len); 1466#ifdef COMPAT_OLDSOCK 1467 if (compat) 1468 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1469#endif 1470 error = copyout(sa, uap->asa, (u_int)len); 1471 if (error == 0) 1472gotnothing: 1473 error = copyout(&len, uap->alen, sizeof (len)); 1474bad: 1475 if (sa) 1476 FREE(sa, M_SONAME); 1477done1: 1478 fdrop(fp, td); 1479done2: 1480 NET_UNLOCK_GIANT(); 1481 return (error); 1482} 1483 1484/* 1485 * MPSAFE 1486 */ 1487int 1488getsockname(td, uap) 1489 struct thread *td; 1490 struct getsockname_args *uap; 1491{ 1492 1493 return (getsockname1(td, uap, 0)); 1494} 1495 1496#ifdef COMPAT_OLDSOCK 1497/* 1498 * MPSAFE 1499 */ 1500int 1501ogetsockname(td, uap) 1502 struct thread *td; 1503 struct getsockname_args *uap; 1504{ 1505 1506 return (getsockname1(td, uap, 1)); 1507} 1508#endif /* COMPAT_OLDSOCK */ 1509 1510/* 1511 * getpeername1() - Get name of peer for connected socket. 1512 * 1513 * MPSAFE 1514 */ 1515/* ARGSUSED */ 1516static int 1517getpeername1(td, uap, compat) 1518 struct thread *td; 1519 register struct getpeername_args /* { 1520 int fdes; 1521 struct sockaddr * __restrict asa; 1522 socklen_t * __restrict alen; 1523 } */ *uap; 1524 int compat; 1525{ 1526 struct socket *so; 1527 struct sockaddr *sa; 1528 struct file *fp; 1529 socklen_t len; 1530 int error; 1531 1532 NET_LOCK_GIANT(); 1533 error = getsock(td->td_proc->p_fd, uap->fdes, &fp); 1534 if (error) 1535 goto done2; 1536 so = fp->f_data; 1537 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1538 error = ENOTCONN; 1539 goto done1; 1540 } 1541 error = copyin(uap->alen, &len, sizeof (len)); 1542 if (error) 1543 goto done1; 1544 if (len < 0) { 1545 error = EINVAL; 1546 goto done1; 1547 } 1548 sa = 0; 1549 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1550 if (error) 1551 goto bad; 1552 if (sa == 0) { 1553 len = 0; 1554 goto gotnothing; 1555 } 1556 len = MIN(len, sa->sa_len); 1557#ifdef COMPAT_OLDSOCK 1558 if (compat) 1559 ((struct osockaddr *)sa)->sa_family = 1560 sa->sa_family; 1561#endif 1562 error = copyout(sa, uap->asa, (u_int)len); 1563 if (error) 1564 goto bad; 1565gotnothing: 1566 error = copyout(&len, uap->alen, sizeof (len)); 1567bad: 1568 if (sa) 1569 FREE(sa, M_SONAME); 1570done1: 1571 fdrop(fp, td); 1572done2: 1573 NET_UNLOCK_GIANT(); 1574 return (error); 1575} 1576 1577/* 1578 * MPSAFE 1579 */ 1580int 1581getpeername(td, uap) 1582 struct thread *td; 1583 struct getpeername_args *uap; 1584{ 1585 1586 return (getpeername1(td, uap, 0)); 1587} 1588 1589#ifdef COMPAT_OLDSOCK 1590/* 1591 * MPSAFE 1592 */ 1593int 1594ogetpeername(td, uap) 1595 struct thread *td; 1596 struct ogetpeername_args *uap; 1597{ 1598 1599 /* XXX uap should have type `getpeername_args *' to begin with. */ 1600 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1601} 1602#endif /* COMPAT_OLDSOCK */ 1603 1604int 1605sockargs(mp, buf, buflen, type) 1606 struct mbuf **mp; 1607 caddr_t buf; 1608 int buflen, type; 1609{ 1610 register struct sockaddr *sa; 1611 register struct mbuf *m; 1612 int error; 1613 1614 if ((u_int)buflen > MLEN) { 1615#ifdef COMPAT_OLDSOCK 1616 if (type == MT_SONAME && (u_int)buflen <= 112) 1617 buflen = MLEN; /* unix domain compat. hack */ 1618 else 1619#endif 1620 if ((u_int)buflen > MCLBYTES) 1621 return (EINVAL); 1622 } 1623 m = m_get(M_TRYWAIT, type); 1624 if (m == NULL) 1625 return (ENOBUFS); 1626 if ((u_int)buflen > MLEN) { 1627 MCLGET(m, M_TRYWAIT); 1628 if ((m->m_flags & M_EXT) == 0) { 1629 m_free(m); 1630 return (ENOBUFS); 1631 } 1632 } 1633 m->m_len = buflen; 1634 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1635 if (error) 1636 (void) m_free(m); 1637 else { 1638 *mp = m; 1639 if (type == MT_SONAME) { 1640 sa = mtod(m, struct sockaddr *); 1641 1642#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1643 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1644 sa->sa_family = sa->sa_len; 1645#endif 1646 sa->sa_len = buflen; 1647 } 1648 } 1649 return (error); 1650} 1651 1652int 1653getsockaddr(namp, uaddr, len) 1654 struct sockaddr **namp; 1655 caddr_t uaddr; 1656 size_t len; 1657{ 1658 struct sockaddr *sa; 1659 int error; 1660 1661 if (len > SOCK_MAXADDRLEN) 1662 return (ENAMETOOLONG); 1663 if (len < offsetof(struct sockaddr, sa_data[0])) 1664 return (EINVAL); 1665 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1666 error = copyin(uaddr, sa, len); 1667 if (error) { 1668 FREE(sa, M_SONAME); 1669 } else { 1670#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1671 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1672 sa->sa_family = sa->sa_len; 1673#endif 1674 sa->sa_len = len; 1675 *namp = sa; 1676 } 1677 return (error); 1678} 1679 1680/* 1681 * Detach mapped page and release resources back to the system. 1682 */ 1683void 1684sf_buf_mext(void *addr, void *args) 1685{ 1686 vm_page_t m; 1687 1688 m = sf_buf_page(args); 1689 sf_buf_free(args); 1690 vm_page_lock_queues(); 1691 vm_page_unwire(m, 0); 1692 /* 1693 * Check for the object going away on us. This can 1694 * happen since we don't hold a reference to it. 1695 * If so, we're responsible for freeing the page. 1696 */ 1697 if (m->wire_count == 0 && m->object == NULL) 1698 vm_page_free(m); 1699 vm_page_unlock_queues(); 1700} 1701 1702/* 1703 * sendfile(2) 1704 * 1705 * MPSAFE 1706 * 1707 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1708 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1709 * 1710 * Send a file specified by 'fd' and starting at 'offset' to a socket 1711 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1712 * nbytes == 0. Optionally add a header and/or trailer to the socket 1713 * output. If specified, write the total number of bytes sent into *sbytes. 1714 * 1715 */ 1716int 1717sendfile(struct thread *td, struct sendfile_args *uap) 1718{ 1719 1720 return (do_sendfile(td, uap, 0)); 1721} 1722 1723#ifdef COMPAT_FREEBSD4 1724int 1725freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1726{ 1727 struct sendfile_args args; 1728 1729 args.fd = uap->fd; 1730 args.s = uap->s; 1731 args.offset = uap->offset; 1732 args.nbytes = uap->nbytes; 1733 args.hdtr = uap->hdtr; 1734 args.sbytes = uap->sbytes; 1735 args.flags = uap->flags; 1736 1737 return (do_sendfile(td, &args, 1)); 1738} 1739#endif /* COMPAT_FREEBSD4 */ 1740 1741static int 1742do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1743{ 1744 struct vnode *vp; 1745 struct vm_object *obj; 1746 struct socket *so = NULL; 1747 struct mbuf *m, *m_header = NULL; 1748 struct sf_buf *sf; 1749 struct vm_page *pg; 1750 struct writev_args nuap; 1751 struct sf_hdtr hdtr; 1752 struct uio *hdr_uio = NULL; 1753 off_t off, xfsize, hdtr_size, sbytes = 0; 1754 int error, headersize = 0, headersent = 0; 1755 1756 mtx_lock(&Giant); 1757 1758 hdtr_size = 0; 1759 1760 /* 1761 * The descriptor must be a regular file and have a backing VM object. 1762 */ 1763 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1764 goto done; 1765 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1766 obj = vp->v_object; 1767 VOP_UNLOCK(vp, 0, td); 1768 if (obj == NULL) { 1769 error = EINVAL; 1770 goto done; 1771 } 1772 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1773 goto done; 1774 if (so->so_type != SOCK_STREAM) { 1775 error = EINVAL; 1776 goto done; 1777 } 1778 if ((so->so_state & SS_ISCONNECTED) == 0) { 1779 error = ENOTCONN; 1780 goto done; 1781 } 1782 if (uap->offset < 0) { 1783 error = EINVAL; 1784 goto done; 1785 } 1786 1787#ifdef MAC 1788 SOCK_LOCK(so); 1789 error = mac_check_socket_send(td->td_ucred, so); 1790 SOCK_UNLOCK(so); 1791 if (error) 1792 goto done; 1793#endif 1794 1795 /* 1796 * If specified, get the pointer to the sf_hdtr struct for 1797 * any headers/trailers. 1798 */ 1799 if (uap->hdtr != NULL) { 1800 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1801 if (error) 1802 goto done; 1803 /* 1804 * Send any headers. 1805 */ 1806 if (hdtr.headers != NULL) { 1807 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1808 if (error) 1809 goto done; 1810 hdr_uio->uio_td = td; 1811 hdr_uio->uio_rw = UIO_WRITE; 1812 if (hdr_uio->uio_resid > 0) { 1813 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0); 1814 if (m_header == NULL) 1815 goto done; 1816 headersize = m_header->m_pkthdr.len; 1817 if (compat) 1818 sbytes += headersize; 1819 } 1820 } 1821 } 1822 1823 /* 1824 * Protect against multiple writers to the socket. 1825 */ 1826 SOCKBUF_LOCK(&so->so_snd); 1827 (void) sblock(&so->so_snd, M_WAITOK); 1828 SOCKBUF_UNLOCK(&so->so_snd); 1829 1830 /* 1831 * Loop through the pages in the file, starting with the requested 1832 * offset. Get a file page (do I/O if necessary), map the file page 1833 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1834 * it on the socket. 1835 */ 1836 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1837 vm_pindex_t pindex; 1838 vm_offset_t pgoff; 1839 1840 pindex = OFF_TO_IDX(off); 1841 VM_OBJECT_LOCK(obj); 1842retry_lookup: 1843 /* 1844 * Calculate the amount to transfer. Not to exceed a page, 1845 * the EOF, or the passed in nbytes. 1846 */ 1847 xfsize = obj->un_pager.vnp.vnp_size - off; 1848 VM_OBJECT_UNLOCK(obj); 1849 if (xfsize > PAGE_SIZE) 1850 xfsize = PAGE_SIZE; 1851 pgoff = (vm_offset_t)(off & PAGE_MASK); 1852 if (PAGE_SIZE - pgoff < xfsize) 1853 xfsize = PAGE_SIZE - pgoff; 1854 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1855 xfsize = uap->nbytes - sbytes; 1856 if (xfsize <= 0) { 1857 if (m_header != NULL) { 1858 m = m_header; 1859 m_header = NULL; 1860 SOCKBUF_LOCK(&so->so_snd); 1861 goto retry_space; 1862 } else 1863 break; 1864 } 1865 /* 1866 * Optimize the non-blocking case by looking at the socket space 1867 * before going to the extra work of constituting the sf_buf. 1868 */ 1869 SOCKBUF_LOCK(&so->so_snd); 1870 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1871 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 1872 error = EPIPE; 1873 else 1874 error = EAGAIN; 1875 sbunlock(&so->so_snd); 1876 SOCKBUF_UNLOCK(&so->so_snd); 1877 goto done; 1878 } 1879 SOCKBUF_UNLOCK(&so->so_snd); 1880 VM_OBJECT_LOCK(obj); 1881 /* 1882 * Attempt to look up the page. 1883 * 1884 * Allocate if not found 1885 * 1886 * Wait and loop if busy. 1887 */ 1888 pg = vm_page_lookup(obj, pindex); 1889 1890 if (pg == NULL) { 1891 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | 1892 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1893 if (pg == NULL) { 1894 VM_OBJECT_UNLOCK(obj); 1895 VM_WAIT; 1896 VM_OBJECT_LOCK(obj); 1897 goto retry_lookup; 1898 } 1899 vm_page_lock_queues(); 1900 } else { 1901 vm_page_lock_queues(); 1902 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1903 goto retry_lookup; 1904 /* 1905 * Wire the page so it does not get ripped out from 1906 * under us. 1907 */ 1908 vm_page_wire(pg); 1909 } 1910 1911 /* 1912 * If page is not valid for what we need, initiate I/O 1913 */ 1914 1915 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1916 VM_OBJECT_UNLOCK(obj); 1917 } else if (uap->flags & SF_NODISKIO) { 1918 error = EBUSY; 1919 } else { 1920 int bsize, resid; 1921 1922 /* 1923 * Ensure that our page is still around when the I/O 1924 * completes. 1925 */ 1926 vm_page_io_start(pg); 1927 vm_page_unlock_queues(); 1928 VM_OBJECT_UNLOCK(obj); 1929 1930 /* 1931 * Get the page from backing store. 1932 */ 1933 bsize = vp->v_mount->mnt_stat.f_iosize; 1934 vn_lock(vp, LK_SHARED | LK_RETRY, td); 1935 /* 1936 * XXXMAC: Because we don't have fp->f_cred here, 1937 * we pass in NOCRED. This is probably wrong, but 1938 * is consistent with our original implementation. 1939 */ 1940 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1941 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1942 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 1943 td->td_ucred, NOCRED, &resid, td); 1944 VOP_UNLOCK(vp, 0, td); 1945 VM_OBJECT_LOCK(obj); 1946 vm_page_lock_queues(); 1947 vm_page_io_finish(pg); 1948 if (!error) 1949 VM_OBJECT_UNLOCK(obj); 1950 mbstat.sf_iocnt++; 1951 } 1952 1953 if (error) { 1954 vm_page_unwire(pg, 0); 1955 /* 1956 * See if anyone else might know about this page. 1957 * If not and it is not valid, then free it. 1958 */ 1959 if (pg->wire_count == 0 && pg->valid == 0 && 1960 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1961 pg->hold_count == 0) { 1962 vm_page_free(pg); 1963 } 1964 vm_page_unlock_queues(); 1965 VM_OBJECT_UNLOCK(obj); 1966 SOCKBUF_LOCK(&so->so_snd); 1967 sbunlock(&so->so_snd); 1968 SOCKBUF_UNLOCK(&so->so_snd); 1969 goto done; 1970 } 1971 vm_page_unlock_queues(); 1972 1973 /* 1974 * Get a sendfile buf. We usually wait as long as necessary, 1975 * but this wait can be interrupted. 1976 */ 1977 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { 1978 mbstat.sf_allocfail++; 1979 vm_page_lock_queues(); 1980 vm_page_unwire(pg, 0); 1981 if (pg->wire_count == 0 && pg->object == NULL) 1982 vm_page_free(pg); 1983 vm_page_unlock_queues(); 1984 SOCKBUF_LOCK(&so->so_snd); 1985 sbunlock(&so->so_snd); 1986 SOCKBUF_UNLOCK(&so->so_snd); 1987 error = EINTR; 1988 goto done; 1989 } 1990 1991 /* 1992 * Get an mbuf header and set it up as having external storage. 1993 */ 1994 if (m_header) 1995 MGET(m, M_TRYWAIT, MT_DATA); 1996 else 1997 MGETHDR(m, M_TRYWAIT, MT_DATA); 1998 if (m == NULL) { 1999 error = ENOBUFS; 2000 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2001 SOCKBUF_LOCK(&so->so_snd); 2002 sbunlock(&so->so_snd); 2003 SOCKBUF_UNLOCK(&so->so_snd); 2004 goto done; 2005 } 2006 /* 2007 * Setup external storage for mbuf. 2008 */ 2009 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 2010 EXT_SFBUF); 2011 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 2012 m->m_pkthdr.len = m->m_len = xfsize; 2013 2014 if (m_header) { 2015 m_cat(m_header, m); 2016 m = m_header; 2017 m_header = NULL; 2018 m_fixhdr(m); 2019 } 2020 2021 /* 2022 * Add the buffer to the socket buffer chain. 2023 */ 2024 SOCKBUF_LOCK(&so->so_snd); 2025retry_space: 2026 /* 2027 * Make sure that the socket is still able to take more data. 2028 * CANTSENDMORE being true usually means that the connection 2029 * was closed. so_error is true when an error was sensed after 2030 * a previous send. 2031 * The state is checked after the page mapping and buffer 2032 * allocation above since those operations may block and make 2033 * any socket checks stale. From this point forward, nothing 2034 * blocks before the pru_send (or more accurately, any blocking 2035 * results in a loop back to here to re-check). 2036 */ 2037 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2038 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 2039 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2040 error = EPIPE; 2041 } else { 2042 error = so->so_error; 2043 so->so_error = 0; 2044 } 2045 m_freem(m); 2046 sbunlock(&so->so_snd); 2047 SOCKBUF_UNLOCK(&so->so_snd); 2048 goto done; 2049 } 2050 /* 2051 * Wait for socket space to become available. We do this just 2052 * after checking the connection state above in order to avoid 2053 * a race condition with sbwait(). 2054 */ 2055 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2056 if (so->so_state & SS_NBIO) { 2057 m_freem(m); 2058 sbunlock(&so->so_snd); 2059 SOCKBUF_UNLOCK(&so->so_snd); 2060 error = EAGAIN; 2061 goto done; 2062 } 2063 error = sbwait(&so->so_snd); 2064 /* 2065 * An error from sbwait usually indicates that we've 2066 * been interrupted by a signal. If we've sent anything 2067 * then return bytes sent, otherwise return the error. 2068 */ 2069 if (error) { 2070 m_freem(m); 2071 sbunlock(&so->so_snd); 2072 SOCKBUF_UNLOCK(&so->so_snd); 2073 goto done; 2074 } 2075 goto retry_space; 2076 } 2077 SOCKBUF_UNLOCK(&so->so_snd); 2078 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2079 if (error) { 2080 SOCKBUF_LOCK(&so->so_snd); 2081 sbunlock(&so->so_snd); 2082 SOCKBUF_UNLOCK(&so->so_snd); 2083 goto done; 2084 } 2085 headersent = 1; 2086 } 2087 SOCKBUF_LOCK(&so->so_snd); 2088 sbunlock(&so->so_snd); 2089 SOCKBUF_UNLOCK(&so->so_snd); 2090 2091 /* 2092 * Send trailers. Wimp out and use writev(2). 2093 */ 2094 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2095 nuap.fd = uap->s; 2096 nuap.iovp = hdtr.trailers; 2097 nuap.iovcnt = hdtr.trl_cnt; 2098 error = writev(td, &nuap); 2099 if (error) 2100 goto done; 2101 if (compat) 2102 sbytes += td->td_retval[0]; 2103 else 2104 hdtr_size += td->td_retval[0]; 2105 } 2106 2107done: 2108 if (headersent) { 2109 if (!compat) 2110 hdtr_size += headersize; 2111 } else { 2112 if (compat) 2113 sbytes -= headersize; 2114 } 2115 /* 2116 * If there was no error we have to clear td->td_retval[0] 2117 * because it may have been set by writev. 2118 */ 2119 if (error == 0) { 2120 td->td_retval[0] = 0; 2121 } 2122 if (uap->sbytes != NULL) { 2123 if (!compat) 2124 sbytes += hdtr_size; 2125 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2126 } 2127 if (vp) 2128 vrele(vp); 2129 if (so) 2130 fputsock(so); 2131 if (hdr_uio != NULL) 2132 free(hdr_uio, M_IOV); 2133 if (m_header) 2134 m_freem(m_header); 2135 2136 mtx_unlock(&Giant); 2137 2138 if (error == ERESTART) 2139 error = EINTR; 2140 2141 return (error); 2142} 2143