kern_sendfile.c revision 110294
1/* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 * $FreeBSD: head/sys/kern/uipc_syscalls.c 110294 2003-02-03 17:36:52Z ume $ 38 */ 39 40#include "opt_compat.h" 41#include "opt_ktrace.h" 42#include "opt_mac.h" 43 44#include <sys/param.h> 45#include <sys/systm.h> 46#include <sys/kernel.h> 47#include <sys/lock.h> 48#include <sys/mac.h> 49#include <sys/mutex.h> 50#include <sys/sysproto.h> 51#include <sys/malloc.h> 52#include <sys/filedesc.h> 53#include <sys/event.h> 54#include <sys/proc.h> 55#include <sys/fcntl.h> 56#include <sys/file.h> 57#include <sys/lock.h> 58#include <sys/mount.h> 59#include <sys/mbuf.h> 60#include <sys/protosw.h> 61#include <sys/socket.h> 62#include <sys/socketvar.h> 63#include <sys/signalvar.h> 64#include <sys/syscallsubr.h> 65#include <sys/uio.h> 66#include <sys/vnode.h> 67#ifdef KTRACE 68#include <sys/ktrace.h> 69#endif 70 71#include <vm/vm.h> 72#include <vm/vm_object.h> 73#include <vm/vm_page.h> 74#include <vm/vm_pageout.h> 75#include <vm/vm_kern.h> 76#include <vm/vm_extern.h> 77 78static void sf_buf_init(void *arg); 79SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 80 81static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 82static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 83 84static int accept1(struct thread *td, struct accept_args *uap, int compat); 85static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 86static int getsockname1(struct thread *td, struct getsockname_args *uap, 87 int compat); 88static int getpeername1(struct thread *td, struct getpeername_args *uap, 89 int compat); 90 91/* 92 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 93 * sf_freelist head with the sf_lock mutex. 94 */ 95static struct { 96 SLIST_HEAD(, sf_buf) sf_head; 97 struct mtx sf_lock; 98} sf_freelist; 99 100vm_offset_t sf_base; 101struct sf_buf *sf_bufs; 102u_int sf_buf_alloc_want; 103 104/* 105 * System call interface to the socket abstraction. 106 */ 107#if defined(COMPAT_43) || defined(COMPAT_SUNOS) 108#define COMPAT_OLDSOCK 109#endif 110 111/* 112 * MPSAFE 113 */ 114int 115socket(td, uap) 116 struct thread *td; 117 register struct socket_args /* { 118 int domain; 119 int type; 120 int protocol; 121 } */ *uap; 122{ 123 struct filedesc *fdp; 124 struct socket *so; 125 struct file *fp; 126 int fd, error; 127 128 mtx_lock(&Giant); 129 fdp = td->td_proc->p_fd; 130 error = falloc(td, &fp, &fd); 131 if (error) 132 goto done2; 133 fhold(fp); 134 error = socreate(uap->domain, &so, uap->type, uap->protocol, 135 td->td_ucred, td); 136 FILEDESC_LOCK(fdp); 137 if (error) { 138 if (fdp->fd_ofiles[fd] == fp) { 139 fdp->fd_ofiles[fd] = NULL; 140 FILEDESC_UNLOCK(fdp); 141 fdrop(fp, td); 142 } else 143 FILEDESC_UNLOCK(fdp); 144 } else { 145 fp->f_data = so; /* already has ref count */ 146 fp->f_flag = FREAD|FWRITE; 147 fp->f_ops = &socketops; 148 fp->f_type = DTYPE_SOCKET; 149 FILEDESC_UNLOCK(fdp); 150 td->td_retval[0] = fd; 151 } 152 fdrop(fp, td); 153done2: 154 mtx_unlock(&Giant); 155 return (error); 156} 157 158/* 159 * MPSAFE 160 */ 161/* ARGSUSED */ 162int 163bind(td, uap) 164 struct thread *td; 165 register struct bind_args /* { 166 int s; 167 caddr_t name; 168 int namelen; 169 } */ *uap; 170{ 171 struct sockaddr *sa; 172 int error; 173 174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 175 return (error); 176 177 return (kern_bind(td, uap->s, sa)); 178} 179 180int 181kern_bind(td, fd, sa) 182 struct thread *td; 183 int fd; 184 struct sockaddr *sa; 185{ 186 struct socket *so; 187 int error; 188 189 mtx_lock(&Giant); 190 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 191 goto done2; 192#ifdef MAC 193 error = mac_check_socket_bind(td->td_ucred, so, sa); 194 if (error) 195 goto done1; 196#endif 197 error = sobind(so, sa, td); 198#ifdef MAC 199done1: 200#endif 201 fputsock(so); 202done2: 203 mtx_unlock(&Giant); 204 FREE(sa, M_SONAME); 205 return (error); 206} 207 208/* 209 * MPSAFE 210 */ 211/* ARGSUSED */ 212int 213listen(td, uap) 214 struct thread *td; 215 register struct listen_args /* { 216 int s; 217 int backlog; 218 } */ *uap; 219{ 220 struct socket *so; 221 int error; 222 223 mtx_lock(&Giant); 224 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 225#ifdef MAC 226 error = mac_check_socket_listen(td->td_ucred, so); 227 if (error) 228 goto done; 229#endif 230 error = solisten(so, uap->backlog, td); 231#ifdef MAC 232done: 233#endif 234 fputsock(so); 235 } 236 mtx_unlock(&Giant); 237 return(error); 238} 239 240/* 241 * accept1() 242 * MPSAFE 243 */ 244static int 245accept1(td, uap, compat) 246 struct thread *td; 247 register struct accept_args /* { 248 int s; 249 caddr_t name; 250 int *anamelen; 251 } */ *uap; 252 int compat; 253{ 254 struct filedesc *fdp; 255 struct file *nfp = NULL; 256 struct sockaddr *sa; 257 int namelen, error, s; 258 struct socket *head, *so; 259 int fd; 260 u_int fflag; 261 pid_t pgid; 262 263 mtx_lock(&Giant); 264 fdp = td->td_proc->p_fd; 265 if (uap->name) { 266 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 267 if(error) 268 goto done2; 269 if (namelen < 0) { 270 error = EINVAL; 271 goto done2; 272 } 273 } 274 error = fgetsock(td, uap->s, &head, &fflag); 275 if (error) 276 goto done2; 277 s = splnet(); 278 if ((head->so_options & SO_ACCEPTCONN) == 0) { 279 splx(s); 280 error = EINVAL; 281 goto done; 282 } 283 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 284 if (head->so_state & SS_CANTRCVMORE) { 285 head->so_error = ECONNABORTED; 286 break; 287 } 288 if ((head->so_state & SS_NBIO) != 0) { 289 head->so_error = EWOULDBLOCK; 290 break; 291 } 292 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 293 "accept", 0); 294 if (error) { 295 splx(s); 296 goto done; 297 } 298 } 299 if (head->so_error) { 300 error = head->so_error; 301 head->so_error = 0; 302 splx(s); 303 goto done; 304 } 305 306 /* 307 * At this point we know that there is at least one connection 308 * ready to be accepted. Remove it from the queue prior to 309 * allocating the file descriptor for it since falloc() may 310 * block allowing another process to accept the connection 311 * instead. 312 */ 313 so = TAILQ_FIRST(&head->so_comp); 314 TAILQ_REMOVE(&head->so_comp, so, so_list); 315 head->so_qlen--; 316 317 error = falloc(td, &nfp, &fd); 318 if (error) { 319 /* 320 * Probably ran out of file descriptors. Put the 321 * unaccepted connection back onto the queue and 322 * do another wakeup so some other process might 323 * have a chance at it. 324 */ 325 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 326 head->so_qlen++; 327 wakeup_one(&head->so_timeo); 328 splx(s); 329 goto done; 330 } 331 fhold(nfp); 332 td->td_retval[0] = fd; 333 334 /* connection has been removed from the listen queue */ 335 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 336 337 so->so_state &= ~SS_COMP; 338 so->so_head = NULL; 339 pgid = fgetown(&head->so_sigio); 340 if (pgid != 0) 341 fsetown(pgid, &so->so_sigio); 342 343 FILE_LOCK(nfp); 344 soref(so); /* file descriptor reference */ 345 nfp->f_data = so; /* nfp has ref count from falloc */ 346 nfp->f_flag = fflag; 347 nfp->f_ops = &socketops; 348 nfp->f_type = DTYPE_SOCKET; 349 FILE_UNLOCK(nfp); 350 sa = 0; 351 error = soaccept(so, &sa); 352 if (error) { 353 /* 354 * return a namelen of zero for older code which might 355 * ignore the return value from accept. 356 */ 357 if (uap->name != NULL) { 358 namelen = 0; 359 (void) copyout(&namelen, 360 uap->anamelen, sizeof(*uap->anamelen)); 361 } 362 goto noconnection; 363 } 364 if (sa == NULL) { 365 namelen = 0; 366 if (uap->name) 367 goto gotnoname; 368 splx(s); 369 error = 0; 370 goto done; 371 } 372 if (uap->name) { 373 /* check sa_len before it is destroyed */ 374 if (namelen > sa->sa_len) 375 namelen = sa->sa_len; 376#ifdef COMPAT_OLDSOCK 377 if (compat) 378 ((struct osockaddr *)sa)->sa_family = 379 sa->sa_family; 380#endif 381 error = copyout(sa, uap->name, (u_int)namelen); 382 if (!error) 383gotnoname: 384 error = copyout(&namelen, 385 uap->anamelen, sizeof (*uap->anamelen)); 386 } 387noconnection: 388 if (sa) 389 FREE(sa, M_SONAME); 390 391 /* 392 * close the new descriptor, assuming someone hasn't ripped it 393 * out from under us. 394 */ 395 if (error) { 396 FILEDESC_LOCK(fdp); 397 if (fdp->fd_ofiles[fd] == nfp) { 398 fdp->fd_ofiles[fd] = NULL; 399 FILEDESC_UNLOCK(fdp); 400 fdrop(nfp, td); 401 } else { 402 FILEDESC_UNLOCK(fdp); 403 } 404 } 405 splx(s); 406 407 /* 408 * Release explicitly held references before returning. 409 */ 410done: 411 if (nfp != NULL) 412 fdrop(nfp, td); 413 fputsock(head); 414done2: 415 mtx_unlock(&Giant); 416 return (error); 417} 418 419/* 420 * MPSAFE (accept1() is MPSAFE) 421 */ 422int 423accept(td, uap) 424 struct thread *td; 425 struct accept_args *uap; 426{ 427 428 return (accept1(td, uap, 0)); 429} 430 431#ifdef COMPAT_OLDSOCK 432/* 433 * MPSAFE (accept1() is MPSAFE) 434 */ 435int 436oaccept(td, uap) 437 struct thread *td; 438 struct accept_args *uap; 439{ 440 441 return (accept1(td, uap, 1)); 442} 443#endif /* COMPAT_OLDSOCK */ 444 445/* 446 * MPSAFE 447 */ 448/* ARGSUSED */ 449int 450connect(td, uap) 451 struct thread *td; 452 register struct connect_args /* { 453 int s; 454 caddr_t name; 455 int namelen; 456 } */ *uap; 457{ 458 struct sockaddr *sa; 459 int error; 460 461 error = getsockaddr(&sa, uap->name, uap->namelen); 462 if (error) 463 return error; 464 465 return (kern_connect(td, uap->s, sa)); 466} 467 468 469int 470kern_connect(td, fd, sa) 471 struct thread *td; 472 int fd; 473 struct sockaddr *sa; 474{ 475 struct socket *so; 476 int error, s; 477 478 mtx_lock(&Giant); 479 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 480 goto done2; 481 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 482 error = EALREADY; 483 goto done1; 484 } 485#ifdef MAC 486 error = mac_check_socket_connect(td->td_ucred, so, sa); 487 if (error) 488 goto bad; 489#endif 490 error = soconnect(so, sa, td); 491 if (error) 492 goto bad; 493 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 494 error = EINPROGRESS; 495 goto done1; 496 } 497 s = splnet(); 498 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 499 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 500 if (error) 501 break; 502 } 503 if (error == 0) { 504 error = so->so_error; 505 so->so_error = 0; 506 } 507 splx(s); 508bad: 509 so->so_state &= ~SS_ISCONNECTING; 510 if (error == ERESTART) 511 error = EINTR; 512done1: 513 fputsock(so); 514done2: 515 mtx_unlock(&Giant); 516 FREE(sa, M_SONAME); 517 return (error); 518} 519 520/* 521 * MPSAFE 522 */ 523int 524socketpair(td, uap) 525 struct thread *td; 526 register struct socketpair_args /* { 527 int domain; 528 int type; 529 int protocol; 530 int *rsv; 531 } */ *uap; 532{ 533 register struct filedesc *fdp = td->td_proc->p_fd; 534 struct file *fp1, *fp2; 535 struct socket *so1, *so2; 536 int fd, error, sv[2]; 537 538 mtx_lock(&Giant); 539 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 540 td->td_ucred, td); 541 if (error) 542 goto done2; 543 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 544 td->td_ucred, td); 545 if (error) 546 goto free1; 547 error = falloc(td, &fp1, &fd); 548 if (error) 549 goto free2; 550 fhold(fp1); 551 sv[0] = fd; 552 fp1->f_data = so1; /* so1 already has ref count */ 553 error = falloc(td, &fp2, &fd); 554 if (error) 555 goto free3; 556 fhold(fp2); 557 fp2->f_data = so2; /* so2 already has ref count */ 558 sv[1] = fd; 559 error = soconnect2(so1, so2); 560 if (error) 561 goto free4; 562 if (uap->type == SOCK_DGRAM) { 563 /* 564 * Datagram socket connection is asymmetric. 565 */ 566 error = soconnect2(so2, so1); 567 if (error) 568 goto free4; 569 } 570 FILE_LOCK(fp1); 571 fp1->f_flag = FREAD|FWRITE; 572 fp1->f_ops = &socketops; 573 fp1->f_type = DTYPE_SOCKET; 574 FILE_UNLOCK(fp1); 575 FILE_LOCK(fp2); 576 fp2->f_flag = FREAD|FWRITE; 577 fp2->f_ops = &socketops; 578 fp2->f_type = DTYPE_SOCKET; 579 FILE_UNLOCK(fp2); 580 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 581 fdrop(fp1, td); 582 fdrop(fp2, td); 583 goto done2; 584free4: 585 FILEDESC_LOCK(fdp); 586 if (fdp->fd_ofiles[sv[1]] == fp2) { 587 fdp->fd_ofiles[sv[1]] = NULL; 588 FILEDESC_UNLOCK(fdp); 589 fdrop(fp2, td); 590 } else 591 FILEDESC_UNLOCK(fdp); 592 fdrop(fp2, td); 593free3: 594 FILEDESC_LOCK(fdp); 595 if (fdp->fd_ofiles[sv[0]] == fp1) { 596 fdp->fd_ofiles[sv[0]] = NULL; 597 FILEDESC_UNLOCK(fdp); 598 fdrop(fp1, td); 599 } else 600 FILEDESC_UNLOCK(fdp); 601 fdrop(fp1, td); 602free2: 603 (void)soclose(so2); 604free1: 605 (void)soclose(so1); 606done2: 607 mtx_unlock(&Giant); 608 return (error); 609} 610 611static int 612sendit(td, s, mp, flags) 613 register struct thread *td; 614 int s; 615 register struct msghdr *mp; 616 int flags; 617{ 618 struct uio auio; 619 register struct iovec *iov; 620 register int i; 621 struct mbuf *control; 622 struct sockaddr *to = NULL; 623 int len, error; 624 struct socket *so; 625#ifdef KTRACE 626 struct iovec *ktriov = NULL; 627 struct uio ktruio; 628 int iovlen; 629#endif 630 631 if ((error = fgetsock(td, s, &so, NULL)) != 0) 632 return (error); 633 634#ifdef MAC 635 error = mac_check_socket_send(td->td_ucred, so); 636 if (error) 637 goto bad; 638#endif 639 640 auio.uio_iov = mp->msg_iov; 641 auio.uio_iovcnt = mp->msg_iovlen; 642 auio.uio_segflg = UIO_USERSPACE; 643 auio.uio_rw = UIO_WRITE; 644 auio.uio_td = td; 645 auio.uio_offset = 0; /* XXX */ 646 auio.uio_resid = 0; 647 iov = mp->msg_iov; 648 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 649 if ((auio.uio_resid += iov->iov_len) < 0) { 650 error = EINVAL; 651 goto bad; 652 } 653 } 654 if (mp->msg_name) { 655 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 656 if (error) 657 goto bad; 658 } 659 if (mp->msg_control) { 660 if (mp->msg_controllen < sizeof(struct cmsghdr) 661#ifdef COMPAT_OLDSOCK 662 && mp->msg_flags != MSG_COMPAT 663#endif 664 ) { 665 error = EINVAL; 666 goto bad; 667 } 668 error = sockargs(&control, mp->msg_control, 669 mp->msg_controllen, MT_CONTROL); 670 if (error) 671 goto bad; 672#ifdef COMPAT_OLDSOCK 673 if (mp->msg_flags == MSG_COMPAT) { 674 register struct cmsghdr *cm; 675 676 M_PREPEND(control, sizeof(*cm), 0); 677 if (control == 0) { 678 error = ENOBUFS; 679 goto bad; 680 } else { 681 cm = mtod(control, struct cmsghdr *); 682 cm->cmsg_len = control->m_len; 683 cm->cmsg_level = SOL_SOCKET; 684 cm->cmsg_type = SCM_RIGHTS; 685 } 686 } 687#endif 688 } else { 689 control = 0; 690 } 691#ifdef KTRACE 692 if (KTRPOINT(td, KTR_GENIO)) { 693 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 694 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, 0); 695 bcopy(auio.uio_iov, ktriov, iovlen); 696 ktruio = auio; 697 } 698#endif 699 len = auio.uio_resid; 700 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control, 701 flags, td); 702 if (error) { 703 if (auio.uio_resid != len && (error == ERESTART || 704 error == EINTR || error == EWOULDBLOCK)) 705 error = 0; 706 /* Generation of SIGPIPE can be controlled per socket */ 707 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 708 PROC_LOCK(td->td_proc); 709 psignal(td->td_proc, SIGPIPE); 710 PROC_UNLOCK(td->td_proc); 711 } 712 } 713 if (error == 0) 714 td->td_retval[0] = len - auio.uio_resid; 715#ifdef KTRACE 716 if (ktriov != NULL) { 717 if (error == 0) { 718 ktruio.uio_iov = ktriov; 719 ktruio.uio_resid = td->td_retval[0]; 720 ktrgenio(s, UIO_WRITE, &ktruio, error); 721 } 722 FREE(ktriov, M_TEMP); 723 } 724#endif 725bad: 726 fputsock(so); 727 if (to) 728 FREE(to, M_SONAME); 729 return (error); 730} 731 732/* 733 * MPSAFE 734 */ 735int 736sendto(td, uap) 737 struct thread *td; 738 register struct sendto_args /* { 739 int s; 740 caddr_t buf; 741 size_t len; 742 int flags; 743 caddr_t to; 744 int tolen; 745 } */ *uap; 746{ 747 struct msghdr msg; 748 struct iovec aiov; 749 int error; 750 751 msg.msg_name = uap->to; 752 msg.msg_namelen = uap->tolen; 753 msg.msg_iov = &aiov; 754 msg.msg_iovlen = 1; 755 msg.msg_control = 0; 756#ifdef COMPAT_OLDSOCK 757 msg.msg_flags = 0; 758#endif 759 aiov.iov_base = uap->buf; 760 aiov.iov_len = uap->len; 761 mtx_lock(&Giant); 762 error = sendit(td, uap->s, &msg, uap->flags); 763 mtx_unlock(&Giant); 764 return (error); 765} 766 767#ifdef COMPAT_OLDSOCK 768/* 769 * MPSAFE 770 */ 771int 772osend(td, uap) 773 struct thread *td; 774 register struct osend_args /* { 775 int s; 776 caddr_t buf; 777 int len; 778 int flags; 779 } */ *uap; 780{ 781 struct msghdr msg; 782 struct iovec aiov; 783 int error; 784 785 msg.msg_name = 0; 786 msg.msg_namelen = 0; 787 msg.msg_iov = &aiov; 788 msg.msg_iovlen = 1; 789 aiov.iov_base = uap->buf; 790 aiov.iov_len = uap->len; 791 msg.msg_control = 0; 792 msg.msg_flags = 0; 793 mtx_lock(&Giant); 794 error = sendit(td, uap->s, &msg, uap->flags); 795 mtx_unlock(&Giant); 796 return (error); 797} 798 799/* 800 * MPSAFE 801 */ 802int 803osendmsg(td, uap) 804 struct thread *td; 805 register struct osendmsg_args /* { 806 int s; 807 caddr_t msg; 808 int flags; 809 } */ *uap; 810{ 811 struct msghdr msg; 812 struct iovec aiov[UIO_SMALLIOV], *iov; 813 int error; 814 815 mtx_lock(&Giant); 816 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 817 if (error) 818 goto done2; 819 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 820 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 821 error = EMSGSIZE; 822 goto done2; 823 } 824 MALLOC(iov, struct iovec *, 825 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 826 0); 827 } else { 828 iov = aiov; 829 } 830 error = copyin(msg.msg_iov, iov, 831 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 832 if (error) 833 goto done; 834 msg.msg_flags = MSG_COMPAT; 835 msg.msg_iov = iov; 836 error = sendit(td, uap->s, &msg, uap->flags); 837done: 838 if (iov != aiov) 839 FREE(iov, M_IOV); 840done2: 841 mtx_unlock(&Giant); 842 return (error); 843} 844#endif 845 846/* 847 * MPSAFE 848 */ 849int 850sendmsg(td, uap) 851 struct thread *td; 852 register struct sendmsg_args /* { 853 int s; 854 caddr_t msg; 855 int flags; 856 } */ *uap; 857{ 858 struct msghdr msg; 859 struct iovec aiov[UIO_SMALLIOV], *iov; 860 int error; 861 862 mtx_lock(&Giant); 863 error = copyin(uap->msg, &msg, sizeof (msg)); 864 if (error) 865 goto done2; 866 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 867 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 868 error = EMSGSIZE; 869 goto done2; 870 } 871 MALLOC(iov, struct iovec *, 872 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 873 0); 874 } else { 875 iov = aiov; 876 } 877 if (msg.msg_iovlen && 878 (error = copyin(msg.msg_iov, iov, 879 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 880 goto done; 881 msg.msg_iov = iov; 882#ifdef COMPAT_OLDSOCK 883 msg.msg_flags = 0; 884#endif 885 error = sendit(td, uap->s, &msg, uap->flags); 886done: 887 if (iov != aiov) 888 FREE(iov, M_IOV); 889done2: 890 mtx_unlock(&Giant); 891 return (error); 892} 893 894static int 895recvit(td, s, mp, namelenp) 896 register struct thread *td; 897 int s; 898 register struct msghdr *mp; 899 void *namelenp; 900{ 901 struct uio auio; 902 register struct iovec *iov; 903 register int i; 904 int len, error; 905 struct mbuf *m, *control = 0; 906 caddr_t ctlbuf; 907 struct socket *so; 908 struct sockaddr *fromsa = 0; 909#ifdef KTRACE 910 struct iovec *ktriov = NULL; 911 struct uio ktruio; 912 int iovlen; 913#endif 914 915 if ((error = fgetsock(td, s, &so, NULL)) != 0) 916 return (error); 917 918#ifdef MAC 919 error = mac_check_socket_receive(td->td_ucred, so); 920 if (error) { 921 fputsock(so); 922 return (error); 923 } 924#endif 925 926 auio.uio_iov = mp->msg_iov; 927 auio.uio_iovcnt = mp->msg_iovlen; 928 auio.uio_segflg = UIO_USERSPACE; 929 auio.uio_rw = UIO_READ; 930 auio.uio_td = td; 931 auio.uio_offset = 0; /* XXX */ 932 auio.uio_resid = 0; 933 iov = mp->msg_iov; 934 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 935 if ((auio.uio_resid += iov->iov_len) < 0) { 936 fputsock(so); 937 return (EINVAL); 938 } 939 } 940#ifdef KTRACE 941 if (KTRPOINT(td, KTR_GENIO)) { 942 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 943 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, 0); 944 bcopy(auio.uio_iov, ktriov, iovlen); 945 ktruio = auio; 946 } 947#endif 948 len = auio.uio_resid; 949 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 950 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 951 &mp->msg_flags); 952 if (error) { 953 if (auio.uio_resid != len && (error == ERESTART || 954 error == EINTR || error == EWOULDBLOCK)) 955 error = 0; 956 } 957#ifdef KTRACE 958 if (ktriov != NULL) { 959 if (error == 0) { 960 ktruio.uio_iov = ktriov; 961 ktruio.uio_resid = len - auio.uio_resid; 962 ktrgenio(s, UIO_READ, &ktruio, error); 963 } 964 FREE(ktriov, M_TEMP); 965 } 966#endif 967 if (error) 968 goto out; 969 td->td_retval[0] = len - auio.uio_resid; 970 if (mp->msg_name) { 971 len = mp->msg_namelen; 972 if (len <= 0 || fromsa == 0) 973 len = 0; 974 else { 975 /* save sa_len before it is destroyed by MSG_COMPAT */ 976 len = MIN(len, fromsa->sa_len); 977#ifdef COMPAT_OLDSOCK 978 if (mp->msg_flags & MSG_COMPAT) 979 ((struct osockaddr *)fromsa)->sa_family = 980 fromsa->sa_family; 981#endif 982 error = copyout(fromsa, mp->msg_name, (unsigned)len); 983 if (error) 984 goto out; 985 } 986 mp->msg_namelen = len; 987 if (namelenp && 988 (error = copyout(&len, namelenp, sizeof (int)))) { 989#ifdef COMPAT_OLDSOCK 990 if (mp->msg_flags & MSG_COMPAT) 991 error = 0; /* old recvfrom didn't check */ 992 else 993#endif 994 goto out; 995 } 996 } 997 if (mp->msg_control) { 998#ifdef COMPAT_OLDSOCK 999 /* 1000 * We assume that old recvmsg calls won't receive access 1001 * rights and other control info, esp. as control info 1002 * is always optional and those options didn't exist in 4.3. 1003 * If we receive rights, trim the cmsghdr; anything else 1004 * is tossed. 1005 */ 1006 if (control && mp->msg_flags & MSG_COMPAT) { 1007 if (mtod(control, struct cmsghdr *)->cmsg_level != 1008 SOL_SOCKET || 1009 mtod(control, struct cmsghdr *)->cmsg_type != 1010 SCM_RIGHTS) { 1011 mp->msg_controllen = 0; 1012 goto out; 1013 } 1014 control->m_len -= sizeof (struct cmsghdr); 1015 control->m_data += sizeof (struct cmsghdr); 1016 } 1017#endif 1018 len = mp->msg_controllen; 1019 m = control; 1020 mp->msg_controllen = 0; 1021 ctlbuf = mp->msg_control; 1022 1023 while (m && len > 0) { 1024 unsigned int tocopy; 1025 1026 if (len >= m->m_len) 1027 tocopy = m->m_len; 1028 else { 1029 mp->msg_flags |= MSG_CTRUNC; 1030 tocopy = len; 1031 } 1032 1033 if ((error = copyout(mtod(m, caddr_t), 1034 ctlbuf, tocopy)) != 0) 1035 goto out; 1036 1037 ctlbuf += tocopy; 1038 len -= tocopy; 1039 m = m->m_next; 1040 } 1041 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1042 } 1043out: 1044 fputsock(so); 1045 if (fromsa) 1046 FREE(fromsa, M_SONAME); 1047 if (control) 1048 m_freem(control); 1049 return (error); 1050} 1051 1052/* 1053 * MPSAFE 1054 */ 1055int 1056recvfrom(td, uap) 1057 struct thread *td; 1058 register struct recvfrom_args /* { 1059 int s; 1060 caddr_t buf; 1061 size_t len; 1062 int flags; 1063 caddr_t from; 1064 int *fromlenaddr; 1065 } */ *uap; 1066{ 1067 struct msghdr msg; 1068 struct iovec aiov; 1069 int error; 1070 1071 mtx_lock(&Giant); 1072 if (uap->fromlenaddr) { 1073 error = copyin(uap->fromlenaddr, 1074 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1075 if (error) 1076 goto done2; 1077 } else { 1078 msg.msg_namelen = 0; 1079 } 1080 msg.msg_name = uap->from; 1081 msg.msg_iov = &aiov; 1082 msg.msg_iovlen = 1; 1083 aiov.iov_base = uap->buf; 1084 aiov.iov_len = uap->len; 1085 msg.msg_control = 0; 1086 msg.msg_flags = uap->flags; 1087 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1088done2: 1089 mtx_unlock(&Giant); 1090 return(error); 1091} 1092 1093#ifdef COMPAT_OLDSOCK 1094/* 1095 * MPSAFE 1096 */ 1097int 1098orecvfrom(td, uap) 1099 struct thread *td; 1100 struct recvfrom_args *uap; 1101{ 1102 1103 uap->flags |= MSG_COMPAT; 1104 return (recvfrom(td, uap)); 1105} 1106#endif 1107 1108 1109#ifdef COMPAT_OLDSOCK 1110/* 1111 * MPSAFE 1112 */ 1113int 1114orecv(td, uap) 1115 struct thread *td; 1116 register struct orecv_args /* { 1117 int s; 1118 caddr_t buf; 1119 int len; 1120 int flags; 1121 } */ *uap; 1122{ 1123 struct msghdr msg; 1124 struct iovec aiov; 1125 int error; 1126 1127 mtx_lock(&Giant); 1128 msg.msg_name = 0; 1129 msg.msg_namelen = 0; 1130 msg.msg_iov = &aiov; 1131 msg.msg_iovlen = 1; 1132 aiov.iov_base = uap->buf; 1133 aiov.iov_len = uap->len; 1134 msg.msg_control = 0; 1135 msg.msg_flags = uap->flags; 1136 error = recvit(td, uap->s, &msg, NULL); 1137 mtx_unlock(&Giant); 1138 return (error); 1139} 1140 1141/* 1142 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1143 * overlays the new one, missing only the flags, and with the (old) access 1144 * rights where the control fields are now. 1145 * 1146 * MPSAFE 1147 */ 1148int 1149orecvmsg(td, uap) 1150 struct thread *td; 1151 register struct orecvmsg_args /* { 1152 int s; 1153 struct omsghdr *msg; 1154 int flags; 1155 } */ *uap; 1156{ 1157 struct msghdr msg; 1158 struct iovec aiov[UIO_SMALLIOV], *iov; 1159 int error; 1160 1161 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1162 if (error) 1163 return (error); 1164 1165 mtx_lock(&Giant); 1166 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1167 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1168 error = EMSGSIZE; 1169 goto done2; 1170 } 1171 MALLOC(iov, struct iovec *, 1172 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1173 0); 1174 } else { 1175 iov = aiov; 1176 } 1177 msg.msg_flags = uap->flags | MSG_COMPAT; 1178 error = copyin(msg.msg_iov, iov, 1179 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1180 if (error) 1181 goto done; 1182 msg.msg_iov = iov; 1183 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1184 1185 if (msg.msg_controllen && error == 0) 1186 error = copyout(&msg.msg_controllen, 1187 &uap->msg->msg_accrightslen, sizeof (int)); 1188done: 1189 if (iov != aiov) 1190 FREE(iov, M_IOV); 1191done2: 1192 mtx_unlock(&Giant); 1193 return (error); 1194} 1195#endif 1196 1197/* 1198 * MPSAFE 1199 */ 1200int 1201recvmsg(td, uap) 1202 struct thread *td; 1203 register struct recvmsg_args /* { 1204 int s; 1205 struct msghdr *msg; 1206 int flags; 1207 } */ *uap; 1208{ 1209 struct msghdr msg; 1210 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1211 register int error; 1212 1213 mtx_lock(&Giant); 1214 error = copyin(uap->msg, &msg, sizeof (msg)); 1215 if (error) 1216 goto done2; 1217 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1218 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1219 error = EMSGSIZE; 1220 goto done2; 1221 } 1222 MALLOC(iov, struct iovec *, 1223 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1224 0); 1225 } else { 1226 iov = aiov; 1227 } 1228#ifdef COMPAT_OLDSOCK 1229 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1230#else 1231 msg.msg_flags = uap->flags; 1232#endif 1233 uiov = msg.msg_iov; 1234 msg.msg_iov = iov; 1235 error = copyin(uiov, iov, 1236 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1237 if (error) 1238 goto done; 1239 error = recvit(td, uap->s, &msg, NULL); 1240 if (!error) { 1241 msg.msg_iov = uiov; 1242 error = copyout(&msg, uap->msg, sizeof(msg)); 1243 } 1244done: 1245 if (iov != aiov) 1246 FREE(iov, M_IOV); 1247done2: 1248 mtx_unlock(&Giant); 1249 return (error); 1250} 1251 1252/* 1253 * MPSAFE 1254 */ 1255/* ARGSUSED */ 1256int 1257shutdown(td, uap) 1258 struct thread *td; 1259 register struct shutdown_args /* { 1260 int s; 1261 int how; 1262 } */ *uap; 1263{ 1264 struct socket *so; 1265 int error; 1266 1267 mtx_lock(&Giant); 1268 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1269 error = soshutdown(so, uap->how); 1270 fputsock(so); 1271 } 1272 mtx_unlock(&Giant); 1273 return(error); 1274} 1275 1276/* 1277 * MPSAFE 1278 */ 1279/* ARGSUSED */ 1280int 1281setsockopt(td, uap) 1282 struct thread *td; 1283 register struct setsockopt_args /* { 1284 int s; 1285 int level; 1286 int name; 1287 caddr_t val; 1288 int valsize; 1289 } */ *uap; 1290{ 1291 struct socket *so; 1292 struct sockopt sopt; 1293 int error; 1294 1295 if (uap->val == 0 && uap->valsize != 0) 1296 return (EFAULT); 1297 if (uap->valsize < 0) 1298 return (EINVAL); 1299 1300 mtx_lock(&Giant); 1301 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1302 sopt.sopt_dir = SOPT_SET; 1303 sopt.sopt_level = uap->level; 1304 sopt.sopt_name = uap->name; 1305 sopt.sopt_val = uap->val; 1306 sopt.sopt_valsize = uap->valsize; 1307 sopt.sopt_td = td; 1308 error = sosetopt(so, &sopt); 1309 fputsock(so); 1310 } 1311 mtx_unlock(&Giant); 1312 return(error); 1313} 1314 1315/* 1316 * MPSAFE 1317 */ 1318/* ARGSUSED */ 1319int 1320getsockopt(td, uap) 1321 struct thread *td; 1322 register struct getsockopt_args /* { 1323 int s; 1324 int level; 1325 int name; 1326 caddr_t val; 1327 int *avalsize; 1328 } */ *uap; 1329{ 1330 int valsize, error; 1331 struct socket *so; 1332 struct sockopt sopt; 1333 1334 mtx_lock(&Giant); 1335 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1336 goto done2; 1337 if (uap->val) { 1338 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1339 if (error) 1340 goto done1; 1341 if (valsize < 0) { 1342 error = EINVAL; 1343 goto done1; 1344 } 1345 } else { 1346 valsize = 0; 1347 } 1348 1349 sopt.sopt_dir = SOPT_GET; 1350 sopt.sopt_level = uap->level; 1351 sopt.sopt_name = uap->name; 1352 sopt.sopt_val = uap->val; 1353 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1354 sopt.sopt_td = td; 1355 1356 error = sogetopt(so, &sopt); 1357 if (error == 0) { 1358 valsize = sopt.sopt_valsize; 1359 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1360 } 1361done1: 1362 fputsock(so); 1363done2: 1364 mtx_unlock(&Giant); 1365 return (error); 1366} 1367 1368/* 1369 * getsockname1() - Get socket name. 1370 * 1371 * MPSAFE 1372 */ 1373/* ARGSUSED */ 1374static int 1375getsockname1(td, uap, compat) 1376 struct thread *td; 1377 register struct getsockname_args /* { 1378 int fdes; 1379 caddr_t asa; 1380 int *alen; 1381 } */ *uap; 1382 int compat; 1383{ 1384 struct socket *so; 1385 struct sockaddr *sa; 1386 int len, error; 1387 1388 mtx_lock(&Giant); 1389 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1390 goto done2; 1391 error = copyin(uap->alen, &len, sizeof (len)); 1392 if (error) 1393 goto done1; 1394 if (len < 0) { 1395 error = EINVAL; 1396 goto done1; 1397 } 1398 sa = 0; 1399 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1400 if (error) 1401 goto bad; 1402 if (sa == 0) { 1403 len = 0; 1404 goto gotnothing; 1405 } 1406 1407 len = MIN(len, sa->sa_len); 1408#ifdef COMPAT_OLDSOCK 1409 if (compat) 1410 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1411#endif 1412 error = copyout(sa, uap->asa, (u_int)len); 1413 if (error == 0) 1414gotnothing: 1415 error = copyout(&len, uap->alen, sizeof (len)); 1416bad: 1417 if (sa) 1418 FREE(sa, M_SONAME); 1419done1: 1420 fputsock(so); 1421done2: 1422 mtx_unlock(&Giant); 1423 return (error); 1424} 1425 1426/* 1427 * MPSAFE 1428 */ 1429int 1430getsockname(td, uap) 1431 struct thread *td; 1432 struct getsockname_args *uap; 1433{ 1434 1435 return (getsockname1(td, uap, 0)); 1436} 1437 1438#ifdef COMPAT_OLDSOCK 1439/* 1440 * MPSAFE 1441 */ 1442int 1443ogetsockname(td, uap) 1444 struct thread *td; 1445 struct getsockname_args *uap; 1446{ 1447 1448 return (getsockname1(td, uap, 1)); 1449} 1450#endif /* COMPAT_OLDSOCK */ 1451 1452/* 1453 * getpeername1() - Get name of peer for connected socket. 1454 * 1455 * MPSAFE 1456 */ 1457/* ARGSUSED */ 1458static int 1459getpeername1(td, uap, compat) 1460 struct thread *td; 1461 register struct getpeername_args /* { 1462 int fdes; 1463 caddr_t asa; 1464 int *alen; 1465 } */ *uap; 1466 int compat; 1467{ 1468 struct socket *so; 1469 struct sockaddr *sa; 1470 int len, error; 1471 1472 mtx_lock(&Giant); 1473 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1474 goto done2; 1475 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1476 error = ENOTCONN; 1477 goto done1; 1478 } 1479 error = copyin(uap->alen, &len, sizeof (len)); 1480 if (error) 1481 goto done1; 1482 if (len < 0) { 1483 error = EINVAL; 1484 goto done1; 1485 } 1486 sa = 0; 1487 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1488 if (error) 1489 goto bad; 1490 if (sa == 0) { 1491 len = 0; 1492 goto gotnothing; 1493 } 1494 len = MIN(len, sa->sa_len); 1495#ifdef COMPAT_OLDSOCK 1496 if (compat) 1497 ((struct osockaddr *)sa)->sa_family = 1498 sa->sa_family; 1499#endif 1500 error = copyout(sa, uap->asa, (u_int)len); 1501 if (error) 1502 goto bad; 1503gotnothing: 1504 error = copyout(&len, uap->alen, sizeof (len)); 1505bad: 1506 if (sa) 1507 FREE(sa, M_SONAME); 1508done1: 1509 fputsock(so); 1510done2: 1511 mtx_unlock(&Giant); 1512 return (error); 1513} 1514 1515/* 1516 * MPSAFE 1517 */ 1518int 1519getpeername(td, uap) 1520 struct thread *td; 1521 struct getpeername_args *uap; 1522{ 1523 1524 return (getpeername1(td, uap, 0)); 1525} 1526 1527#ifdef COMPAT_OLDSOCK 1528/* 1529 * MPSAFE 1530 */ 1531int 1532ogetpeername(td, uap) 1533 struct thread *td; 1534 struct ogetpeername_args *uap; 1535{ 1536 1537 /* XXX uap should have type `getpeername_args *' to begin with. */ 1538 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1539} 1540#endif /* COMPAT_OLDSOCK */ 1541 1542int 1543sockargs(mp, buf, buflen, type) 1544 struct mbuf **mp; 1545 caddr_t buf; 1546 int buflen, type; 1547{ 1548 register struct sockaddr *sa; 1549 register struct mbuf *m; 1550 int error; 1551 1552 if ((u_int)buflen > MLEN) { 1553#ifdef COMPAT_OLDSOCK 1554 if (type == MT_SONAME && (u_int)buflen <= 112) 1555 buflen = MLEN; /* unix domain compat. hack */ 1556 else 1557#endif 1558 return (EINVAL); 1559 } 1560 m = m_get(0, type); 1561 if (m == NULL) 1562 return (ENOBUFS); 1563 m->m_len = buflen; 1564 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1565 if (error) 1566 (void) m_free(m); 1567 else { 1568 *mp = m; 1569 if (type == MT_SONAME) { 1570 sa = mtod(m, struct sockaddr *); 1571 1572#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1573 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1574 sa->sa_family = sa->sa_len; 1575#endif 1576 sa->sa_len = buflen; 1577 } 1578 } 1579 return (error); 1580} 1581 1582int 1583getsockaddr(namp, uaddr, len) 1584 struct sockaddr **namp; 1585 caddr_t uaddr; 1586 size_t len; 1587{ 1588 struct sockaddr *sa; 1589 int error; 1590 1591 if (len > SOCK_MAXADDRLEN) 1592 return ENAMETOOLONG; 1593 MALLOC(sa, struct sockaddr *, len, M_SONAME, 0); 1594 error = copyin(uaddr, sa, len); 1595 if (error) { 1596 FREE(sa, M_SONAME); 1597 } else { 1598#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1599 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1600 sa->sa_family = sa->sa_len; 1601#endif 1602 sa->sa_len = len; 1603 *namp = sa; 1604 } 1605 return error; 1606} 1607 1608/* 1609 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1610 */ 1611static void 1612sf_buf_init(void *arg) 1613{ 1614 int i; 1615 1616 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 1617 mtx_lock(&sf_freelist.sf_lock); 1618 SLIST_INIT(&sf_freelist.sf_head); 1619 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE); 1620 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1621 M_NOWAIT | M_ZERO); 1622 for (i = 0; i < nsfbufs; i++) { 1623 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1624 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1625 } 1626 sf_buf_alloc_want = 0; 1627 mtx_unlock(&sf_freelist.sf_lock); 1628} 1629 1630/* 1631 * Get an sf_buf from the freelist. Will block if none are available. 1632 */ 1633struct sf_buf * 1634sf_buf_alloc() 1635{ 1636 struct sf_buf *sf; 1637 int error; 1638 1639 mtx_lock(&sf_freelist.sf_lock); 1640 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1641 sf_buf_alloc_want++; 1642 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1643 "sfbufa", 0); 1644 sf_buf_alloc_want--; 1645 1646 /* 1647 * If we got a signal, don't risk going back to sleep. 1648 */ 1649 if (error) 1650 break; 1651 } 1652 if (sf != NULL) 1653 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1654 mtx_unlock(&sf_freelist.sf_lock); 1655 return (sf); 1656} 1657 1658#define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT]) 1659 1660/* 1661 * Detatch mapped page and release resources back to the system. 1662 */ 1663void 1664sf_buf_free(void *addr, void *args) 1665{ 1666 struct sf_buf *sf; 1667 struct vm_page *m; 1668 1669 GIANT_REQUIRED; 1670 1671 sf = dtosf(addr); 1672 pmap_qremove((vm_offset_t)addr, 1); 1673 m = sf->m; 1674 vm_page_lock_queues(); 1675 vm_page_unwire(m, 0); 1676 /* 1677 * Check for the object going away on us. This can 1678 * happen since we don't hold a reference to it. 1679 * If so, we're responsible for freeing the page. 1680 */ 1681 if (m->wire_count == 0 && m->object == NULL) 1682 vm_page_free(m); 1683 vm_page_unlock_queues(); 1684 sf->m = NULL; 1685 mtx_lock(&sf_freelist.sf_lock); 1686 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1687 if (sf_buf_alloc_want > 0) 1688 wakeup_one(&sf_freelist); 1689 mtx_unlock(&sf_freelist.sf_lock); 1690} 1691 1692/* 1693 * sendfile(2) 1694 * 1695 * MPSAFE 1696 * 1697 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1698 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1699 * 1700 * Send a file specified by 'fd' and starting at 'offset' to a socket 1701 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1702 * nbytes == 0. Optionally add a header and/or trailer to the socket 1703 * output. If specified, write the total number of bytes sent into *sbytes. 1704 * 1705 */ 1706int 1707sendfile(struct thread *td, struct sendfile_args *uap) 1708{ 1709 1710 return (do_sendfile(td, uap, 0)); 1711} 1712 1713#ifdef COMPAT_FREEBSD4 1714int 1715freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1716{ 1717 struct sendfile_args args; 1718 1719 args.fd = uap->fd; 1720 args.s = uap->s; 1721 args.offset = uap->offset; 1722 args.nbytes = uap->nbytes; 1723 args.hdtr = uap->hdtr; 1724 args.sbytes = uap->sbytes; 1725 args.flags = uap->flags; 1726 1727 return (do_sendfile(td, &args, 1)); 1728} 1729#endif /* COMPAT_FREEBSD4 */ 1730 1731static int 1732do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1733{ 1734 struct vnode *vp; 1735 struct vm_object *obj; 1736 struct socket *so = NULL; 1737 struct mbuf *m; 1738 struct sf_buf *sf; 1739 struct vm_page *pg; 1740 struct writev_args nuap; 1741 struct sf_hdtr hdtr; 1742 off_t off, xfsize, hdtr_size, sbytes = 0; 1743 int error, s; 1744 1745 mtx_lock(&Giant); 1746 1747 hdtr_size = 0; 1748 1749 /* 1750 * The descriptor must be a regular file and have a backing VM object. 1751 */ 1752 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1753 goto done; 1754 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1755 error = EINVAL; 1756 goto done; 1757 } 1758 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1759 goto done; 1760 if (so->so_type != SOCK_STREAM) { 1761 error = EINVAL; 1762 goto done; 1763 } 1764 if ((so->so_state & SS_ISCONNECTED) == 0) { 1765 error = ENOTCONN; 1766 goto done; 1767 } 1768 if (uap->offset < 0) { 1769 error = EINVAL; 1770 goto done; 1771 } 1772 1773#ifdef MAC 1774 error = mac_check_socket_send(td->td_ucred, so); 1775 if (error) 1776 goto done; 1777#endif 1778 1779 /* 1780 * If specified, get the pointer to the sf_hdtr struct for 1781 * any headers/trailers. 1782 */ 1783 if (uap->hdtr != NULL) { 1784 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1785 if (error) 1786 goto done; 1787 /* 1788 * Send any headers. Wimp out and use writev(2). 1789 */ 1790 if (hdtr.headers != NULL) { 1791 nuap.fd = uap->s; 1792 nuap.iovp = hdtr.headers; 1793 nuap.iovcnt = hdtr.hdr_cnt; 1794 error = writev(td, &nuap); 1795 if (error) 1796 goto done; 1797 if (compat) 1798 sbytes += td->td_retval[0]; 1799 else 1800 hdtr_size += td->td_retval[0]; 1801 } 1802 } 1803 1804 /* 1805 * Protect against multiple writers to the socket. 1806 */ 1807 (void) sblock(&so->so_snd, 0); 1808 1809 /* 1810 * Loop through the pages in the file, starting with the requested 1811 * offset. Get a file page (do I/O if necessary), map the file page 1812 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1813 * it on the socket. 1814 */ 1815 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1816 vm_pindex_t pindex; 1817 vm_offset_t pgoff; 1818 1819 pindex = OFF_TO_IDX(off); 1820retry_lookup: 1821 /* 1822 * Calculate the amount to transfer. Not to exceed a page, 1823 * the EOF, or the passed in nbytes. 1824 */ 1825 xfsize = obj->un_pager.vnp.vnp_size - off; 1826 if (xfsize > PAGE_SIZE) 1827 xfsize = PAGE_SIZE; 1828 pgoff = (vm_offset_t)(off & PAGE_MASK); 1829 if (PAGE_SIZE - pgoff < xfsize) 1830 xfsize = PAGE_SIZE - pgoff; 1831 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1832 xfsize = uap->nbytes - sbytes; 1833 if (xfsize <= 0) 1834 break; 1835 /* 1836 * Optimize the non-blocking case by looking at the socket space 1837 * before going to the extra work of constituting the sf_buf. 1838 */ 1839 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1840 if (so->so_state & SS_CANTSENDMORE) 1841 error = EPIPE; 1842 else 1843 error = EAGAIN; 1844 sbunlock(&so->so_snd); 1845 goto done; 1846 } 1847 /* 1848 * Attempt to look up the page. 1849 * 1850 * Allocate if not found 1851 * 1852 * Wait and loop if busy. 1853 */ 1854 pg = vm_page_lookup(obj, pindex); 1855 1856 if (pg == NULL) { 1857 pg = vm_page_alloc(obj, pindex, 1858 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1859 if (pg == NULL) { 1860 VM_WAIT; 1861 goto retry_lookup; 1862 } 1863 vm_page_lock_queues(); 1864 vm_page_wakeup(pg); 1865 } else { 1866 vm_page_lock_queues(); 1867 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1868 goto retry_lookup; 1869 /* 1870 * Wire the page so it does not get ripped out from 1871 * under us. 1872 */ 1873 vm_page_wire(pg); 1874 } 1875 1876 /* 1877 * If page is not valid for what we need, initiate I/O 1878 */ 1879 1880 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1881 int bsize, resid; 1882 1883 /* 1884 * Ensure that our page is still around when the I/O 1885 * completes. 1886 */ 1887 vm_page_io_start(pg); 1888 vm_page_unlock_queues(); 1889 1890 /* 1891 * Get the page from backing store. 1892 */ 1893 bsize = vp->v_mount->mnt_stat.f_iosize; 1894 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1895 /* 1896 * XXXMAC: Because we don't have fp->f_cred here, 1897 * we pass in NOCRED. This is probably wrong, but 1898 * is consistent with our original implementation. 1899 */ 1900 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1901 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1902 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1903 td->td_ucred, NOCRED, &resid, td); 1904 VOP_UNLOCK(vp, 0, td); 1905 vm_page_lock_queues(); 1906 vm_page_flag_clear(pg, PG_ZERO); 1907 vm_page_io_finish(pg); 1908 if (error) { 1909 vm_page_unwire(pg, 0); 1910 /* 1911 * See if anyone else might know about this page. 1912 * If not and it is not valid, then free it. 1913 */ 1914 if (pg->wire_count == 0 && pg->valid == 0 && 1915 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1916 pg->hold_count == 0) { 1917 vm_page_busy(pg); 1918 vm_page_free(pg); 1919 } 1920 vm_page_unlock_queues(); 1921 sbunlock(&so->so_snd); 1922 goto done; 1923 } 1924 } 1925 vm_page_unlock_queues(); 1926 1927 /* 1928 * Get a sendfile buf. We usually wait as long as necessary, 1929 * but this wait can be interrupted. 1930 */ 1931 if ((sf = sf_buf_alloc()) == NULL) { 1932 vm_page_lock_queues(); 1933 vm_page_unwire(pg, 0); 1934 if (pg->wire_count == 0 && pg->object == NULL) 1935 vm_page_free(pg); 1936 vm_page_unlock_queues(); 1937 sbunlock(&so->so_snd); 1938 error = EINTR; 1939 goto done; 1940 } 1941 1942 /* 1943 * Allocate a kernel virtual page and insert the physical page 1944 * into it. 1945 */ 1946 sf->m = pg; 1947 pmap_qenter(sf->kva, &pg, 1); 1948 /* 1949 * Get an mbuf header and set it up as having external storage. 1950 */ 1951 MGETHDR(m, 0, MT_DATA); 1952 if (m == NULL) { 1953 error = ENOBUFS; 1954 sf_buf_free((void *)sf->kva, NULL); 1955 sbunlock(&so->so_snd); 1956 goto done; 1957 } 1958 /* 1959 * Setup external storage for mbuf. 1960 */ 1961 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, NULL, M_RDONLY, 1962 EXT_SFBUF); 1963 m->m_data = (char *) sf->kva + pgoff; 1964 m->m_pkthdr.len = m->m_len = xfsize; 1965 /* 1966 * Add the buffer to the socket buffer chain. 1967 */ 1968 s = splnet(); 1969retry_space: 1970 /* 1971 * Make sure that the socket is still able to take more data. 1972 * CANTSENDMORE being true usually means that the connection 1973 * was closed. so_error is true when an error was sensed after 1974 * a previous send. 1975 * The state is checked after the page mapping and buffer 1976 * allocation above since those operations may block and make 1977 * any socket checks stale. From this point forward, nothing 1978 * blocks before the pru_send (or more accurately, any blocking 1979 * results in a loop back to here to re-check). 1980 */ 1981 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1982 if (so->so_state & SS_CANTSENDMORE) { 1983 error = EPIPE; 1984 } else { 1985 error = so->so_error; 1986 so->so_error = 0; 1987 } 1988 m_freem(m); 1989 sbunlock(&so->so_snd); 1990 splx(s); 1991 goto done; 1992 } 1993 /* 1994 * Wait for socket space to become available. We do this just 1995 * after checking the connection state above in order to avoid 1996 * a race condition with sbwait(). 1997 */ 1998 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1999 if (so->so_state & SS_NBIO) { 2000 m_freem(m); 2001 sbunlock(&so->so_snd); 2002 splx(s); 2003 error = EAGAIN; 2004 goto done; 2005 } 2006 error = sbwait(&so->so_snd); 2007 /* 2008 * An error from sbwait usually indicates that we've 2009 * been interrupted by a signal. If we've sent anything 2010 * then return bytes sent, otherwise return the error. 2011 */ 2012 if (error) { 2013 m_freem(m); 2014 sbunlock(&so->so_snd); 2015 splx(s); 2016 goto done; 2017 } 2018 goto retry_space; 2019 } 2020 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2021 splx(s); 2022 if (error) { 2023 sbunlock(&so->so_snd); 2024 goto done; 2025 } 2026 } 2027 sbunlock(&so->so_snd); 2028 2029 /* 2030 * Send trailers. Wimp out and use writev(2). 2031 */ 2032 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2033 nuap.fd = uap->s; 2034 nuap.iovp = hdtr.trailers; 2035 nuap.iovcnt = hdtr.trl_cnt; 2036 error = writev(td, &nuap); 2037 if (error) 2038 goto done; 2039 if (compat) 2040 sbytes += td->td_retval[0]; 2041 else 2042 hdtr_size += td->td_retval[0]; 2043 } 2044 2045done: 2046 /* 2047 * If there was no error we have to clear td->td_retval[0] 2048 * because it may have been set by writev. 2049 */ 2050 if (error == 0) { 2051 td->td_retval[0] = 0; 2052 } 2053 if (uap->sbytes != NULL) { 2054 if (!compat) 2055 sbytes += hdtr_size; 2056 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2057 } 2058 if (vp) 2059 vrele(vp); 2060 if (so) 2061 fputsock(so); 2062 mtx_unlock(&Giant); 2063 return (error); 2064} 2065