kern_sendfile.c revision 118448
1/* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 */ 38 39#include <sys/cdefs.h> 40__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 118448 2003-08-04 21:28:57Z dwmalone $"); 41 42#include "opt_compat.h" 43#include "opt_ktrace.h" 44#include "opt_mac.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/lock.h> 50#include <sys/mac.h> 51#include <sys/mutex.h> 52#include <sys/sysproto.h> 53#include <sys/malloc.h> 54#include <sys/filedesc.h> 55#include <sys/event.h> 56#include <sys/proc.h> 57#include <sys/fcntl.h> 58#include <sys/file.h> 59#include <sys/filio.h> 60#include <sys/mount.h> 61#include <sys/mbuf.h> 62#include <sys/protosw.h> 63#include <sys/socket.h> 64#include <sys/socketvar.h> 65#include <sys/signalvar.h> 66#include <sys/syscallsubr.h> 67#include <sys/uio.h> 68#include <sys/vnode.h> 69#ifdef KTRACE 70#include <sys/ktrace.h> 71#endif 72 73#include <vm/vm.h> 74#include <vm/vm_object.h> 75#include <vm/vm_page.h> 76#include <vm/vm_pageout.h> 77#include <vm/vm_kern.h> 78#include <vm/vm_extern.h> 79 80static void sf_buf_init(void *arg); 81SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL) 82 83static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 84static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 85 86static int accept1(struct thread *td, struct accept_args *uap, int compat); 87static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 88static int getsockname1(struct thread *td, struct getsockname_args *uap, 89 int compat); 90static int getpeername1(struct thread *td, struct getpeername_args *uap, 91 int compat); 92 93/* 94 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the 95 * sf_freelist head with the sf_lock mutex. 96 */ 97static struct { 98 SLIST_HEAD(, sf_buf) sf_head; 99 struct mtx sf_lock; 100} sf_freelist; 101 102static u_int sf_buf_alloc_want; 103 104/* 105 * System call interface to the socket abstraction. 106 */ 107#if defined(COMPAT_43) || defined(COMPAT_SUNOS) 108#define COMPAT_OLDSOCK 109#endif 110 111/* 112 * MPSAFE 113 */ 114int 115socket(td, uap) 116 struct thread *td; 117 register struct socket_args /* { 118 int domain; 119 int type; 120 int protocol; 121 } */ *uap; 122{ 123 struct filedesc *fdp; 124 struct socket *so; 125 struct file *fp; 126 int fd, error; 127 128 mtx_lock(&Giant); 129 fdp = td->td_proc->p_fd; 130 error = falloc(td, &fp, &fd); 131 if (error) 132 goto done2; 133 fhold(fp); 134 error = socreate(uap->domain, &so, uap->type, uap->protocol, 135 td->td_ucred, td); 136 FILEDESC_LOCK(fdp); 137 if (error) { 138 if (fdp->fd_ofiles[fd] == fp) { 139 fdp->fd_ofiles[fd] = NULL; 140 FILEDESC_UNLOCK(fdp); 141 fdrop(fp, td); 142 } else 143 FILEDESC_UNLOCK(fdp); 144 } else { 145 fp->f_data = so; /* already has ref count */ 146 fp->f_flag = FREAD|FWRITE; 147 fp->f_ops = &socketops; 148 fp->f_type = DTYPE_SOCKET; 149 FILEDESC_UNLOCK(fdp); 150 td->td_retval[0] = fd; 151 } 152 fdrop(fp, td); 153done2: 154 mtx_unlock(&Giant); 155 return (error); 156} 157 158/* 159 * MPSAFE 160 */ 161/* ARGSUSED */ 162int 163bind(td, uap) 164 struct thread *td; 165 register struct bind_args /* { 166 int s; 167 caddr_t name; 168 int namelen; 169 } */ *uap; 170{ 171 struct sockaddr *sa; 172 int error; 173 174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 175 return (error); 176 177 return (kern_bind(td, uap->s, sa)); 178} 179 180int 181kern_bind(td, fd, sa) 182 struct thread *td; 183 int fd; 184 struct sockaddr *sa; 185{ 186 struct socket *so; 187 int error; 188 189 mtx_lock(&Giant); 190 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 191 goto done2; 192#ifdef MAC 193 error = mac_check_socket_bind(td->td_ucred, so, sa); 194 if (error) 195 goto done1; 196#endif 197 error = sobind(so, sa, td); 198#ifdef MAC 199done1: 200#endif 201 fputsock(so); 202done2: 203 mtx_unlock(&Giant); 204 FREE(sa, M_SONAME); 205 return (error); 206} 207 208/* 209 * MPSAFE 210 */ 211/* ARGSUSED */ 212int 213listen(td, uap) 214 struct thread *td; 215 register struct listen_args /* { 216 int s; 217 int backlog; 218 } */ *uap; 219{ 220 struct socket *so; 221 int error; 222 223 mtx_lock(&Giant); 224 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 225#ifdef MAC 226 error = mac_check_socket_listen(td->td_ucred, so); 227 if (error) 228 goto done; 229#endif 230 error = solisten(so, uap->backlog, td); 231#ifdef MAC 232done: 233#endif 234 fputsock(so); 235 } 236 mtx_unlock(&Giant); 237 return(error); 238} 239 240/* 241 * accept1() 242 * MPSAFE 243 */ 244static int 245accept1(td, uap, compat) 246 struct thread *td; 247 register struct accept_args /* { 248 int s; 249 caddr_t name; 250 int *anamelen; 251 } */ *uap; 252 int compat; 253{ 254 struct filedesc *fdp; 255 struct file *nfp = NULL; 256 struct sockaddr *sa; 257 int namelen, error, s; 258 struct socket *head, *so; 259 int fd; 260 u_int fflag; 261 pid_t pgid; 262 int tmp; 263 264 fdp = td->td_proc->p_fd; 265 if (uap->name) { 266 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 267 if(error) 268 goto done3; 269 if (namelen < 0) { 270 error = EINVAL; 271 goto done3; 272 } 273 } 274 mtx_lock(&Giant); 275 error = fgetsock(td, uap->s, &head, &fflag); 276 if (error) 277 goto done2; 278 s = splnet(); 279 if ((head->so_options & SO_ACCEPTCONN) == 0) { 280 splx(s); 281 error = EINVAL; 282 goto done; 283 } 284 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 285 if (head->so_state & SS_CANTRCVMORE) { 286 head->so_error = ECONNABORTED; 287 break; 288 } 289 if ((head->so_state & SS_NBIO) != 0) { 290 head->so_error = EWOULDBLOCK; 291 break; 292 } 293 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 294 "accept", 0); 295 if (error) { 296 splx(s); 297 goto done; 298 } 299 } 300 if (head->so_error) { 301 error = head->so_error; 302 head->so_error = 0; 303 splx(s); 304 goto done; 305 } 306 307 /* 308 * At this point we know that there is at least one connection 309 * ready to be accepted. Remove it from the queue prior to 310 * allocating the file descriptor for it since falloc() may 311 * block allowing another process to accept the connection 312 * instead. 313 */ 314 so = TAILQ_FIRST(&head->so_comp); 315 TAILQ_REMOVE(&head->so_comp, so, so_list); 316 head->so_qlen--; 317 318 error = falloc(td, &nfp, &fd); 319 if (error) { 320 /* 321 * Probably ran out of file descriptors. Put the 322 * unaccepted connection back onto the queue and 323 * do another wakeup so some other process might 324 * have a chance at it. 325 */ 326 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 327 head->so_qlen++; 328 wakeup_one(&head->so_timeo); 329 splx(s); 330 goto done; 331 } 332 fhold(nfp); 333 td->td_retval[0] = fd; 334 335 /* connection has been removed from the listen queue */ 336 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 337 338 so->so_state &= ~SS_COMP; 339 so->so_head = NULL; 340 pgid = fgetown(&head->so_sigio); 341 if (pgid != 0) 342 fsetown(pgid, &so->so_sigio); 343 344 FILE_LOCK(nfp); 345 soref(so); /* file descriptor reference */ 346 nfp->f_data = so; /* nfp has ref count from falloc */ 347 nfp->f_flag = fflag; 348 nfp->f_ops = &socketops; 349 nfp->f_type = DTYPE_SOCKET; 350 FILE_UNLOCK(nfp); 351 /* Sync socket nonblocking/async state with file flags */ 352 tmp = fflag & FNONBLOCK; 353 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 354 tmp = fflag & FASYNC; 355 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 356 sa = 0; 357 error = soaccept(so, &sa); 358 if (error) { 359 /* 360 * return a namelen of zero for older code which might 361 * ignore the return value from accept. 362 */ 363 if (uap->name != NULL) { 364 namelen = 0; 365 (void) copyout(&namelen, 366 uap->anamelen, sizeof(*uap->anamelen)); 367 } 368 goto noconnection; 369 } 370 if (sa == NULL) { 371 namelen = 0; 372 if (uap->name) 373 goto gotnoname; 374 splx(s); 375 error = 0; 376 goto done; 377 } 378 if (uap->name) { 379 /* check sa_len before it is destroyed */ 380 if (namelen > sa->sa_len) 381 namelen = sa->sa_len; 382#ifdef COMPAT_OLDSOCK 383 if (compat) 384 ((struct osockaddr *)sa)->sa_family = 385 sa->sa_family; 386#endif 387 error = copyout(sa, uap->name, (u_int)namelen); 388 if (!error) 389gotnoname: 390 error = copyout(&namelen, 391 uap->anamelen, sizeof (*uap->anamelen)); 392 } 393noconnection: 394 if (sa) 395 FREE(sa, M_SONAME); 396 397 /* 398 * close the new descriptor, assuming someone hasn't ripped it 399 * out from under us. 400 */ 401 if (error) { 402 FILEDESC_LOCK(fdp); 403 if (fdp->fd_ofiles[fd] == nfp) { 404 fdp->fd_ofiles[fd] = NULL; 405 FILEDESC_UNLOCK(fdp); 406 fdrop(nfp, td); 407 } else { 408 FILEDESC_UNLOCK(fdp); 409 } 410 } 411 splx(s); 412 413 /* 414 * Release explicitly held references before returning. 415 */ 416done: 417 if (nfp != NULL) 418 fdrop(nfp, td); 419 fputsock(head); 420done2: 421 mtx_unlock(&Giant); 422done3: 423 return (error); 424} 425 426/* 427 * MPSAFE (accept1() is MPSAFE) 428 */ 429int 430accept(td, uap) 431 struct thread *td; 432 struct accept_args *uap; 433{ 434 435 return (accept1(td, uap, 0)); 436} 437 438#ifdef COMPAT_OLDSOCK 439/* 440 * MPSAFE (accept1() is MPSAFE) 441 */ 442int 443oaccept(td, uap) 444 struct thread *td; 445 struct accept_args *uap; 446{ 447 448 return (accept1(td, uap, 1)); 449} 450#endif /* COMPAT_OLDSOCK */ 451 452/* 453 * MPSAFE 454 */ 455/* ARGSUSED */ 456int 457connect(td, uap) 458 struct thread *td; 459 register struct connect_args /* { 460 int s; 461 caddr_t name; 462 int namelen; 463 } */ *uap; 464{ 465 struct sockaddr *sa; 466 int error; 467 468 error = getsockaddr(&sa, uap->name, uap->namelen); 469 if (error) 470 return error; 471 472 return (kern_connect(td, uap->s, sa)); 473} 474 475 476int 477kern_connect(td, fd, sa) 478 struct thread *td; 479 int fd; 480 struct sockaddr *sa; 481{ 482 struct socket *so; 483 int error, s; 484 485 mtx_lock(&Giant); 486 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 487 goto done2; 488 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 489 error = EALREADY; 490 goto done1; 491 } 492#ifdef MAC 493 error = mac_check_socket_connect(td->td_ucred, so, sa); 494 if (error) 495 goto bad; 496#endif 497 error = soconnect(so, sa, td); 498 if (error) 499 goto bad; 500 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 501 error = EINPROGRESS; 502 goto done1; 503 } 504 s = splnet(); 505 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 506 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 507 if (error) 508 break; 509 } 510 if (error == 0) { 511 error = so->so_error; 512 so->so_error = 0; 513 } 514 splx(s); 515bad: 516 so->so_state &= ~SS_ISCONNECTING; 517 if (error == ERESTART) 518 error = EINTR; 519done1: 520 fputsock(so); 521done2: 522 mtx_unlock(&Giant); 523 FREE(sa, M_SONAME); 524 return (error); 525} 526 527/* 528 * MPSAFE 529 */ 530int 531socketpair(td, uap) 532 struct thread *td; 533 register struct socketpair_args /* { 534 int domain; 535 int type; 536 int protocol; 537 int *rsv; 538 } */ *uap; 539{ 540 register struct filedesc *fdp = td->td_proc->p_fd; 541 struct file *fp1, *fp2; 542 struct socket *so1, *so2; 543 int fd, error, sv[2]; 544 545 mtx_lock(&Giant); 546 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 547 td->td_ucred, td); 548 if (error) 549 goto done2; 550 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 551 td->td_ucred, td); 552 if (error) 553 goto free1; 554 error = falloc(td, &fp1, &fd); 555 if (error) 556 goto free2; 557 fhold(fp1); 558 sv[0] = fd; 559 fp1->f_data = so1; /* so1 already has ref count */ 560 error = falloc(td, &fp2, &fd); 561 if (error) 562 goto free3; 563 fhold(fp2); 564 fp2->f_data = so2; /* so2 already has ref count */ 565 sv[1] = fd; 566 error = soconnect2(so1, so2); 567 if (error) 568 goto free4; 569 if (uap->type == SOCK_DGRAM) { 570 /* 571 * Datagram socket connection is asymmetric. 572 */ 573 error = soconnect2(so2, so1); 574 if (error) 575 goto free4; 576 } 577 FILE_LOCK(fp1); 578 fp1->f_flag = FREAD|FWRITE; 579 fp1->f_ops = &socketops; 580 fp1->f_type = DTYPE_SOCKET; 581 FILE_UNLOCK(fp1); 582 FILE_LOCK(fp2); 583 fp2->f_flag = FREAD|FWRITE; 584 fp2->f_ops = &socketops; 585 fp2->f_type = DTYPE_SOCKET; 586 FILE_UNLOCK(fp2); 587 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 588 fdrop(fp1, td); 589 fdrop(fp2, td); 590 goto done2; 591free4: 592 FILEDESC_LOCK(fdp); 593 if (fdp->fd_ofiles[sv[1]] == fp2) { 594 fdp->fd_ofiles[sv[1]] = NULL; 595 FILEDESC_UNLOCK(fdp); 596 fdrop(fp2, td); 597 } else 598 FILEDESC_UNLOCK(fdp); 599 fdrop(fp2, td); 600free3: 601 FILEDESC_LOCK(fdp); 602 if (fdp->fd_ofiles[sv[0]] == fp1) { 603 fdp->fd_ofiles[sv[0]] = NULL; 604 FILEDESC_UNLOCK(fdp); 605 fdrop(fp1, td); 606 } else 607 FILEDESC_UNLOCK(fdp); 608 fdrop(fp1, td); 609free2: 610 (void)soclose(so2); 611free1: 612 (void)soclose(so1); 613done2: 614 mtx_unlock(&Giant); 615 return (error); 616} 617 618static int 619sendit(td, s, mp, flags) 620 register struct thread *td; 621 int s; 622 register struct msghdr *mp; 623 int flags; 624{ 625 struct mbuf *control; 626 struct sockaddr *to; 627 int error; 628 629 if (mp->msg_name != NULL) { 630 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 631 if (error) { 632 to = NULL; 633 goto bad; 634 } 635 mp->msg_name = to; 636 } else 637 to = NULL; 638 639 if (mp->msg_control) { 640 if (mp->msg_controllen < sizeof(struct cmsghdr) 641#ifdef COMPAT_OLDSOCK 642 && mp->msg_flags != MSG_COMPAT 643#endif 644 ) { 645 error = EINVAL; 646 goto bad; 647 } 648 error = sockargs(&control, mp->msg_control, 649 mp->msg_controllen, MT_CONTROL); 650 if (error) 651 goto bad; 652#ifdef COMPAT_OLDSOCK 653 if (mp->msg_flags == MSG_COMPAT) { 654 register struct cmsghdr *cm; 655 656 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 657 if (control == 0) { 658 error = ENOBUFS; 659 goto bad; 660 } else { 661 cm = mtod(control, struct cmsghdr *); 662 cm->cmsg_len = control->m_len; 663 cm->cmsg_level = SOL_SOCKET; 664 cm->cmsg_type = SCM_RIGHTS; 665 } 666 } 667#endif 668 } else { 669 control = NULL; 670 } 671 672 error = kern_sendit(td, s, mp, flags, control); 673 674bad: 675 if (to) 676 FREE(to, M_SONAME); 677 return (error); 678} 679 680int 681kern_sendit(td, s, mp, flags, control) 682 struct thread *td; 683 int s; 684 struct msghdr *mp; 685 int flags; 686 struct mbuf *control; 687{ 688 struct uio auio; 689 struct iovec *iov; 690 struct socket *so; 691 int i; 692 int len, error; 693#ifdef KTRACE 694 struct iovec *ktriov = NULL; 695 struct uio ktruio; 696 int iovlen; 697#endif 698 699 mtx_lock(&Giant); 700 if ((error = fgetsock(td, s, &so, NULL)) != 0) 701 goto bad2; 702 703#ifdef MAC 704 error = mac_check_socket_send(td->td_ucred, so); 705 if (error) 706 goto bad; 707#endif 708 709 auio.uio_iov = mp->msg_iov; 710 auio.uio_iovcnt = mp->msg_iovlen; 711 auio.uio_segflg = UIO_USERSPACE; 712 auio.uio_rw = UIO_WRITE; 713 auio.uio_td = td; 714 auio.uio_offset = 0; /* XXX */ 715 auio.uio_resid = 0; 716 iov = mp->msg_iov; 717 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 718 if ((auio.uio_resid += iov->iov_len) < 0) { 719 error = EINVAL; 720 goto bad; 721 } 722 } 723#ifdef KTRACE 724 if (KTRPOINT(td, KTR_GENIO)) { 725 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 726 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 727 bcopy(auio.uio_iov, ktriov, iovlen); 728 ktruio = auio; 729 } 730#endif 731 len = auio.uio_resid; 732 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 733 0, control, flags, td); 734 if (error) { 735 if (auio.uio_resid != len && (error == ERESTART || 736 error == EINTR || error == EWOULDBLOCK)) 737 error = 0; 738 /* Generation of SIGPIPE can be controlled per socket */ 739 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 740 PROC_LOCK(td->td_proc); 741 psignal(td->td_proc, SIGPIPE); 742 PROC_UNLOCK(td->td_proc); 743 } 744 } 745 if (error == 0) 746 td->td_retval[0] = len - auio.uio_resid; 747#ifdef KTRACE 748 if (ktriov != NULL) { 749 if (error == 0) { 750 ktruio.uio_iov = ktriov; 751 ktruio.uio_resid = td->td_retval[0]; 752 ktrgenio(s, UIO_WRITE, &ktruio, error); 753 } 754 FREE(ktriov, M_TEMP); 755 } 756#endif 757bad: 758 fputsock(so); 759bad2: 760 mtx_unlock(&Giant); 761 return (error); 762} 763 764/* 765 * MPSAFE 766 */ 767int 768sendto(td, uap) 769 struct thread *td; 770 register struct sendto_args /* { 771 int s; 772 caddr_t buf; 773 size_t len; 774 int flags; 775 caddr_t to; 776 int tolen; 777 } */ *uap; 778{ 779 struct msghdr msg; 780 struct iovec aiov; 781 int error; 782 783 msg.msg_name = uap->to; 784 msg.msg_namelen = uap->tolen; 785 msg.msg_iov = &aiov; 786 msg.msg_iovlen = 1; 787 msg.msg_control = 0; 788#ifdef COMPAT_OLDSOCK 789 msg.msg_flags = 0; 790#endif 791 aiov.iov_base = uap->buf; 792 aiov.iov_len = uap->len; 793 error = sendit(td, uap->s, &msg, uap->flags); 794 return (error); 795} 796 797#ifdef COMPAT_OLDSOCK 798/* 799 * MPSAFE 800 */ 801int 802osend(td, uap) 803 struct thread *td; 804 register struct osend_args /* { 805 int s; 806 caddr_t buf; 807 int len; 808 int flags; 809 } */ *uap; 810{ 811 struct msghdr msg; 812 struct iovec aiov; 813 int error; 814 815 msg.msg_name = 0; 816 msg.msg_namelen = 0; 817 msg.msg_iov = &aiov; 818 msg.msg_iovlen = 1; 819 aiov.iov_base = uap->buf; 820 aiov.iov_len = uap->len; 821 msg.msg_control = 0; 822 msg.msg_flags = 0; 823 error = sendit(td, uap->s, &msg, uap->flags); 824 return (error); 825} 826 827/* 828 * MPSAFE 829 */ 830int 831osendmsg(td, uap) 832 struct thread *td; 833 register struct osendmsg_args /* { 834 int s; 835 caddr_t msg; 836 int flags; 837 } */ *uap; 838{ 839 struct msghdr msg; 840 struct iovec aiov[UIO_SMALLIOV], *iov; 841 int error; 842 843 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 844 if (error) 845 goto done2; 846 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 847 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 848 error = EMSGSIZE; 849 goto done2; 850 } 851 MALLOC(iov, struct iovec *, 852 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 853 M_WAITOK); 854 } else { 855 iov = aiov; 856 } 857 error = copyin(msg.msg_iov, iov, 858 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 859 if (error) 860 goto done; 861 msg.msg_flags = MSG_COMPAT; 862 msg.msg_iov = iov; 863 error = sendit(td, uap->s, &msg, uap->flags); 864done: 865 if (iov != aiov) 866 FREE(iov, M_IOV); 867done2: 868 return (error); 869} 870#endif 871 872/* 873 * MPSAFE 874 */ 875int 876sendmsg(td, uap) 877 struct thread *td; 878 register struct sendmsg_args /* { 879 int s; 880 caddr_t msg; 881 int flags; 882 } */ *uap; 883{ 884 struct msghdr msg; 885 struct iovec aiov[UIO_SMALLIOV], *iov; 886 int error; 887 888 error = copyin(uap->msg, &msg, sizeof (msg)); 889 if (error) 890 goto done2; 891 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 892 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 893 error = EMSGSIZE; 894 goto done2; 895 } 896 MALLOC(iov, struct iovec *, 897 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 898 M_WAITOK); 899 } else { 900 iov = aiov; 901 } 902 if (msg.msg_iovlen && 903 (error = copyin(msg.msg_iov, iov, 904 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 905 goto done; 906 msg.msg_iov = iov; 907#ifdef COMPAT_OLDSOCK 908 msg.msg_flags = 0; 909#endif 910 error = sendit(td, uap->s, &msg, uap->flags); 911done: 912 if (iov != aiov) 913 FREE(iov, M_IOV); 914done2: 915 return (error); 916} 917 918static int 919recvit(td, s, mp, namelenp) 920 register struct thread *td; 921 int s; 922 register struct msghdr *mp; 923 void *namelenp; 924{ 925 struct uio auio; 926 register struct iovec *iov; 927 register int i; 928 int len, error; 929 struct mbuf *m, *control = 0; 930 caddr_t ctlbuf; 931 struct socket *so; 932 struct sockaddr *fromsa = 0; 933#ifdef KTRACE 934 struct iovec *ktriov = NULL; 935 struct uio ktruio; 936 int iovlen; 937#endif 938 939 mtx_lock(&Giant); 940 if ((error = fgetsock(td, s, &so, NULL)) != 0) 941 return (error); 942 943#ifdef MAC 944 error = mac_check_socket_receive(td->td_ucred, so); 945 if (error) { 946 fputsock(so); 947 return (error); 948 } 949#endif 950 951 auio.uio_iov = mp->msg_iov; 952 auio.uio_iovcnt = mp->msg_iovlen; 953 auio.uio_segflg = UIO_USERSPACE; 954 auio.uio_rw = UIO_READ; 955 auio.uio_td = td; 956 auio.uio_offset = 0; /* XXX */ 957 auio.uio_resid = 0; 958 iov = mp->msg_iov; 959 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 960 if ((auio.uio_resid += iov->iov_len) < 0) { 961 fputsock(so); 962 return (EINVAL); 963 } 964 } 965#ifdef KTRACE 966 if (KTRPOINT(td, KTR_GENIO)) { 967 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 968 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 969 bcopy(auio.uio_iov, ktriov, iovlen); 970 ktruio = auio; 971 } 972#endif 973 len = auio.uio_resid; 974 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 975 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 976 &mp->msg_flags); 977 if (error) { 978 if (auio.uio_resid != len && (error == ERESTART || 979 error == EINTR || error == EWOULDBLOCK)) 980 error = 0; 981 } 982#ifdef KTRACE 983 if (ktriov != NULL) { 984 if (error == 0) { 985 ktruio.uio_iov = ktriov; 986 ktruio.uio_resid = len - auio.uio_resid; 987 ktrgenio(s, UIO_READ, &ktruio, error); 988 } 989 FREE(ktriov, M_TEMP); 990 } 991#endif 992 if (error) 993 goto out; 994 td->td_retval[0] = len - auio.uio_resid; 995 if (mp->msg_name) { 996 len = mp->msg_namelen; 997 if (len <= 0 || fromsa == 0) 998 len = 0; 999 else { 1000 /* save sa_len before it is destroyed by MSG_COMPAT */ 1001 len = MIN(len, fromsa->sa_len); 1002#ifdef COMPAT_OLDSOCK 1003 if (mp->msg_flags & MSG_COMPAT) 1004 ((struct osockaddr *)fromsa)->sa_family = 1005 fromsa->sa_family; 1006#endif 1007 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1008 if (error) 1009 goto out; 1010 } 1011 mp->msg_namelen = len; 1012 if (namelenp && 1013 (error = copyout(&len, namelenp, sizeof (int)))) { 1014#ifdef COMPAT_OLDSOCK 1015 if (mp->msg_flags & MSG_COMPAT) 1016 error = 0; /* old recvfrom didn't check */ 1017 else 1018#endif 1019 goto out; 1020 } 1021 } 1022 if (mp->msg_control) { 1023#ifdef COMPAT_OLDSOCK 1024 /* 1025 * We assume that old recvmsg calls won't receive access 1026 * rights and other control info, esp. as control info 1027 * is always optional and those options didn't exist in 4.3. 1028 * If we receive rights, trim the cmsghdr; anything else 1029 * is tossed. 1030 */ 1031 if (control && mp->msg_flags & MSG_COMPAT) { 1032 if (mtod(control, struct cmsghdr *)->cmsg_level != 1033 SOL_SOCKET || 1034 mtod(control, struct cmsghdr *)->cmsg_type != 1035 SCM_RIGHTS) { 1036 mp->msg_controllen = 0; 1037 goto out; 1038 } 1039 control->m_len -= sizeof (struct cmsghdr); 1040 control->m_data += sizeof (struct cmsghdr); 1041 } 1042#endif 1043 len = mp->msg_controllen; 1044 m = control; 1045 mp->msg_controllen = 0; 1046 ctlbuf = mp->msg_control; 1047 1048 while (m && len > 0) { 1049 unsigned int tocopy; 1050 1051 if (len >= m->m_len) 1052 tocopy = m->m_len; 1053 else { 1054 mp->msg_flags |= MSG_CTRUNC; 1055 tocopy = len; 1056 } 1057 1058 if ((error = copyout(mtod(m, caddr_t), 1059 ctlbuf, tocopy)) != 0) 1060 goto out; 1061 1062 ctlbuf += tocopy; 1063 len -= tocopy; 1064 m = m->m_next; 1065 } 1066 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1067 } 1068out: 1069 fputsock(so); 1070 mtx_unlock(&Giant); 1071 if (fromsa) 1072 FREE(fromsa, M_SONAME); 1073 if (control) 1074 m_freem(control); 1075 return (error); 1076} 1077 1078/* 1079 * MPSAFE 1080 */ 1081int 1082recvfrom(td, uap) 1083 struct thread *td; 1084 register struct recvfrom_args /* { 1085 int s; 1086 caddr_t buf; 1087 size_t len; 1088 int flags; 1089 caddr_t from; 1090 int *fromlenaddr; 1091 } */ *uap; 1092{ 1093 struct msghdr msg; 1094 struct iovec aiov; 1095 int error; 1096 1097 if (uap->fromlenaddr) { 1098 error = copyin(uap->fromlenaddr, 1099 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1100 if (error) 1101 goto done2; 1102 } else { 1103 msg.msg_namelen = 0; 1104 } 1105 msg.msg_name = uap->from; 1106 msg.msg_iov = &aiov; 1107 msg.msg_iovlen = 1; 1108 aiov.iov_base = uap->buf; 1109 aiov.iov_len = uap->len; 1110 msg.msg_control = 0; 1111 msg.msg_flags = uap->flags; 1112 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1113done2: 1114 return(error); 1115} 1116 1117#ifdef COMPAT_OLDSOCK 1118/* 1119 * MPSAFE 1120 */ 1121int 1122orecvfrom(td, uap) 1123 struct thread *td; 1124 struct recvfrom_args *uap; 1125{ 1126 1127 uap->flags |= MSG_COMPAT; 1128 return (recvfrom(td, uap)); 1129} 1130#endif 1131 1132 1133#ifdef COMPAT_OLDSOCK 1134/* 1135 * MPSAFE 1136 */ 1137int 1138orecv(td, uap) 1139 struct thread *td; 1140 register struct orecv_args /* { 1141 int s; 1142 caddr_t buf; 1143 int len; 1144 int flags; 1145 } */ *uap; 1146{ 1147 struct msghdr msg; 1148 struct iovec aiov; 1149 int error; 1150 1151 msg.msg_name = 0; 1152 msg.msg_namelen = 0; 1153 msg.msg_iov = &aiov; 1154 msg.msg_iovlen = 1; 1155 aiov.iov_base = uap->buf; 1156 aiov.iov_len = uap->len; 1157 msg.msg_control = 0; 1158 msg.msg_flags = uap->flags; 1159 error = recvit(td, uap->s, &msg, NULL); 1160 return (error); 1161} 1162 1163/* 1164 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1165 * overlays the new one, missing only the flags, and with the (old) access 1166 * rights where the control fields are now. 1167 * 1168 * MPSAFE 1169 */ 1170int 1171orecvmsg(td, uap) 1172 struct thread *td; 1173 register struct orecvmsg_args /* { 1174 int s; 1175 struct omsghdr *msg; 1176 int flags; 1177 } */ *uap; 1178{ 1179 struct msghdr msg; 1180 struct iovec aiov[UIO_SMALLIOV], *iov; 1181 int error; 1182 1183 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1184 if (error) 1185 return (error); 1186 1187 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1188 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1189 error = EMSGSIZE; 1190 goto done2; 1191 } 1192 MALLOC(iov, struct iovec *, 1193 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1194 M_WAITOK); 1195 } else { 1196 iov = aiov; 1197 } 1198 msg.msg_flags = uap->flags | MSG_COMPAT; 1199 error = copyin(msg.msg_iov, iov, 1200 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1201 if (error) 1202 goto done; 1203 msg.msg_iov = iov; 1204 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1205 1206 if (msg.msg_controllen && error == 0) 1207 error = copyout(&msg.msg_controllen, 1208 &uap->msg->msg_accrightslen, sizeof (int)); 1209done: 1210 if (iov != aiov) 1211 FREE(iov, M_IOV); 1212done2: 1213 return (error); 1214} 1215#endif 1216 1217/* 1218 * MPSAFE 1219 */ 1220int 1221recvmsg(td, uap) 1222 struct thread *td; 1223 register struct recvmsg_args /* { 1224 int s; 1225 struct msghdr *msg; 1226 int flags; 1227 } */ *uap; 1228{ 1229 struct msghdr msg; 1230 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1231 register int error; 1232 1233 error = copyin(uap->msg, &msg, sizeof (msg)); 1234 if (error) 1235 goto done2; 1236 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1237 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1238 error = EMSGSIZE; 1239 goto done2; 1240 } 1241 MALLOC(iov, struct iovec *, 1242 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1243 M_WAITOK); 1244 } else { 1245 iov = aiov; 1246 } 1247#ifdef COMPAT_OLDSOCK 1248 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1249#else 1250 msg.msg_flags = uap->flags; 1251#endif 1252 uiov = msg.msg_iov; 1253 msg.msg_iov = iov; 1254 error = copyin(uiov, iov, 1255 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1256 if (error) 1257 goto done; 1258 error = recvit(td, uap->s, &msg, NULL); 1259 if (!error) { 1260 msg.msg_iov = uiov; 1261 error = copyout(&msg, uap->msg, sizeof(msg)); 1262 } 1263done: 1264 if (iov != aiov) 1265 FREE(iov, M_IOV); 1266done2: 1267 return (error); 1268} 1269 1270/* 1271 * MPSAFE 1272 */ 1273/* ARGSUSED */ 1274int 1275shutdown(td, uap) 1276 struct thread *td; 1277 register struct shutdown_args /* { 1278 int s; 1279 int how; 1280 } */ *uap; 1281{ 1282 struct socket *so; 1283 int error; 1284 1285 mtx_lock(&Giant); 1286 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1287 error = soshutdown(so, uap->how); 1288 fputsock(so); 1289 } 1290 mtx_unlock(&Giant); 1291 return(error); 1292} 1293 1294/* 1295 * MPSAFE 1296 */ 1297/* ARGSUSED */ 1298int 1299setsockopt(td, uap) 1300 struct thread *td; 1301 register struct setsockopt_args /* { 1302 int s; 1303 int level; 1304 int name; 1305 caddr_t val; 1306 int valsize; 1307 } */ *uap; 1308{ 1309 struct socket *so; 1310 struct sockopt sopt; 1311 int error; 1312 1313 if (uap->val == 0 && uap->valsize != 0) 1314 return (EFAULT); 1315 if (uap->valsize < 0) 1316 return (EINVAL); 1317 1318 mtx_lock(&Giant); 1319 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1320 sopt.sopt_dir = SOPT_SET; 1321 sopt.sopt_level = uap->level; 1322 sopt.sopt_name = uap->name; 1323 sopt.sopt_val = uap->val; 1324 sopt.sopt_valsize = uap->valsize; 1325 sopt.sopt_td = td; 1326 error = sosetopt(so, &sopt); 1327 fputsock(so); 1328 } 1329 mtx_unlock(&Giant); 1330 return(error); 1331} 1332 1333/* 1334 * MPSAFE 1335 */ 1336/* ARGSUSED */ 1337int 1338getsockopt(td, uap) 1339 struct thread *td; 1340 register struct getsockopt_args /* { 1341 int s; 1342 int level; 1343 int name; 1344 caddr_t val; 1345 int *avalsize; 1346 } */ *uap; 1347{ 1348 int valsize, error; 1349 struct socket *so; 1350 struct sockopt sopt; 1351 1352 mtx_lock(&Giant); 1353 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1354 goto done2; 1355 if (uap->val) { 1356 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1357 if (error) 1358 goto done1; 1359 if (valsize < 0) { 1360 error = EINVAL; 1361 goto done1; 1362 } 1363 } else { 1364 valsize = 0; 1365 } 1366 1367 sopt.sopt_dir = SOPT_GET; 1368 sopt.sopt_level = uap->level; 1369 sopt.sopt_name = uap->name; 1370 sopt.sopt_val = uap->val; 1371 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1372 sopt.sopt_td = td; 1373 1374 error = sogetopt(so, &sopt); 1375 if (error == 0) { 1376 valsize = sopt.sopt_valsize; 1377 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1378 } 1379done1: 1380 fputsock(so); 1381done2: 1382 mtx_unlock(&Giant); 1383 return (error); 1384} 1385 1386/* 1387 * getsockname1() - Get socket name. 1388 * 1389 * MPSAFE 1390 */ 1391/* ARGSUSED */ 1392static int 1393getsockname1(td, uap, compat) 1394 struct thread *td; 1395 register struct getsockname_args /* { 1396 int fdes; 1397 caddr_t asa; 1398 int *alen; 1399 } */ *uap; 1400 int compat; 1401{ 1402 struct socket *so; 1403 struct sockaddr *sa; 1404 int len, error; 1405 1406 mtx_lock(&Giant); 1407 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1408 goto done2; 1409 error = copyin(uap->alen, &len, sizeof (len)); 1410 if (error) 1411 goto done1; 1412 if (len < 0) { 1413 error = EINVAL; 1414 goto done1; 1415 } 1416 sa = 0; 1417 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1418 if (error) 1419 goto bad; 1420 if (sa == 0) { 1421 len = 0; 1422 goto gotnothing; 1423 } 1424 1425 len = MIN(len, sa->sa_len); 1426#ifdef COMPAT_OLDSOCK 1427 if (compat) 1428 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1429#endif 1430 error = copyout(sa, uap->asa, (u_int)len); 1431 if (error == 0) 1432gotnothing: 1433 error = copyout(&len, uap->alen, sizeof (len)); 1434bad: 1435 if (sa) 1436 FREE(sa, M_SONAME); 1437done1: 1438 fputsock(so); 1439done2: 1440 mtx_unlock(&Giant); 1441 return (error); 1442} 1443 1444/* 1445 * MPSAFE 1446 */ 1447int 1448getsockname(td, uap) 1449 struct thread *td; 1450 struct getsockname_args *uap; 1451{ 1452 1453 return (getsockname1(td, uap, 0)); 1454} 1455 1456#ifdef COMPAT_OLDSOCK 1457/* 1458 * MPSAFE 1459 */ 1460int 1461ogetsockname(td, uap) 1462 struct thread *td; 1463 struct getsockname_args *uap; 1464{ 1465 1466 return (getsockname1(td, uap, 1)); 1467} 1468#endif /* COMPAT_OLDSOCK */ 1469 1470/* 1471 * getpeername1() - Get name of peer for connected socket. 1472 * 1473 * MPSAFE 1474 */ 1475/* ARGSUSED */ 1476static int 1477getpeername1(td, uap, compat) 1478 struct thread *td; 1479 register struct getpeername_args /* { 1480 int fdes; 1481 caddr_t asa; 1482 int *alen; 1483 } */ *uap; 1484 int compat; 1485{ 1486 struct socket *so; 1487 struct sockaddr *sa; 1488 int len, error; 1489 1490 mtx_lock(&Giant); 1491 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1492 goto done2; 1493 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1494 error = ENOTCONN; 1495 goto done1; 1496 } 1497 error = copyin(uap->alen, &len, sizeof (len)); 1498 if (error) 1499 goto done1; 1500 if (len < 0) { 1501 error = EINVAL; 1502 goto done1; 1503 } 1504 sa = 0; 1505 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1506 if (error) 1507 goto bad; 1508 if (sa == 0) { 1509 len = 0; 1510 goto gotnothing; 1511 } 1512 len = MIN(len, sa->sa_len); 1513#ifdef COMPAT_OLDSOCK 1514 if (compat) 1515 ((struct osockaddr *)sa)->sa_family = 1516 sa->sa_family; 1517#endif 1518 error = copyout(sa, uap->asa, (u_int)len); 1519 if (error) 1520 goto bad; 1521gotnothing: 1522 error = copyout(&len, uap->alen, sizeof (len)); 1523bad: 1524 if (sa) 1525 FREE(sa, M_SONAME); 1526done1: 1527 fputsock(so); 1528done2: 1529 mtx_unlock(&Giant); 1530 return (error); 1531} 1532 1533/* 1534 * MPSAFE 1535 */ 1536int 1537getpeername(td, uap) 1538 struct thread *td; 1539 struct getpeername_args *uap; 1540{ 1541 1542 return (getpeername1(td, uap, 0)); 1543} 1544 1545#ifdef COMPAT_OLDSOCK 1546/* 1547 * MPSAFE 1548 */ 1549int 1550ogetpeername(td, uap) 1551 struct thread *td; 1552 struct ogetpeername_args *uap; 1553{ 1554 1555 /* XXX uap should have type `getpeername_args *' to begin with. */ 1556 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1557} 1558#endif /* COMPAT_OLDSOCK */ 1559 1560int 1561sockargs(mp, buf, buflen, type) 1562 struct mbuf **mp; 1563 caddr_t buf; 1564 int buflen, type; 1565{ 1566 register struct sockaddr *sa; 1567 register struct mbuf *m; 1568 int error; 1569 1570 if ((u_int)buflen > MLEN) { 1571#ifdef COMPAT_OLDSOCK 1572 if (type == MT_SONAME && (u_int)buflen <= 112) 1573 buflen = MLEN; /* unix domain compat. hack */ 1574 else 1575#endif 1576 return (EINVAL); 1577 } 1578 m = m_get(M_TRYWAIT, type); 1579 if (m == NULL) 1580 return (ENOBUFS); 1581 m->m_len = buflen; 1582 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1583 if (error) 1584 (void) m_free(m); 1585 else { 1586 *mp = m; 1587 if (type == MT_SONAME) { 1588 sa = mtod(m, struct sockaddr *); 1589 1590#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1591 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1592 sa->sa_family = sa->sa_len; 1593#endif 1594 sa->sa_len = buflen; 1595 } 1596 } 1597 return (error); 1598} 1599 1600int 1601getsockaddr(namp, uaddr, len) 1602 struct sockaddr **namp; 1603 caddr_t uaddr; 1604 size_t len; 1605{ 1606 struct sockaddr *sa; 1607 int error; 1608 1609 if (len > SOCK_MAXADDRLEN) 1610 return ENAMETOOLONG; 1611 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1612 error = copyin(uaddr, sa, len); 1613 if (error) { 1614 FREE(sa, M_SONAME); 1615 } else { 1616#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1617 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1618 sa->sa_family = sa->sa_len; 1619#endif 1620 sa->sa_len = len; 1621 *namp = sa; 1622 } 1623 return error; 1624} 1625 1626/* 1627 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) 1628 */ 1629static void 1630sf_buf_init(void *arg) 1631{ 1632 struct sf_buf *sf_bufs; 1633 vm_offset_t sf_base; 1634 int i; 1635 1636 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); 1637 mtx_lock(&sf_freelist.sf_lock); 1638 SLIST_INIT(&sf_freelist.sf_head); 1639 sf_base = kmem_alloc_nofault(kernel_map, nsfbufs * PAGE_SIZE); 1640 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, 1641 M_NOWAIT | M_ZERO); 1642 for (i = 0; i < nsfbufs; i++) { 1643 sf_bufs[i].kva = sf_base + i * PAGE_SIZE; 1644 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); 1645 } 1646 sf_buf_alloc_want = 0; 1647 mtx_unlock(&sf_freelist.sf_lock); 1648} 1649 1650/* 1651 * Get an sf_buf from the freelist. Will block if none are available. 1652 */ 1653struct sf_buf * 1654sf_buf_alloc(struct vm_page *m) 1655{ 1656 struct sf_buf *sf; 1657 int error; 1658 1659 mtx_lock(&sf_freelist.sf_lock); 1660 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { 1661 sf_buf_alloc_want++; 1662 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH, 1663 "sfbufa", 0); 1664 sf_buf_alloc_want--; 1665 1666 /* 1667 * If we got a signal, don't risk going back to sleep. 1668 */ 1669 if (error) 1670 break; 1671 } 1672 if (sf != NULL) { 1673 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); 1674 sf->m = m; 1675 pmap_qenter(sf->kva, &sf->m, 1); 1676 } 1677 mtx_unlock(&sf_freelist.sf_lock); 1678 return (sf); 1679} 1680 1681/* 1682 * Detatch mapped page and release resources back to the system. 1683 */ 1684void 1685sf_buf_free(void *addr, void *args) 1686{ 1687 struct sf_buf *sf; 1688 struct vm_page *m; 1689 1690 sf = args; 1691 pmap_qremove((vm_offset_t)addr, 1); 1692 m = sf->m; 1693 vm_page_lock_queues(); 1694 vm_page_unwire(m, 0); 1695 /* 1696 * Check for the object going away on us. This can 1697 * happen since we don't hold a reference to it. 1698 * If so, we're responsible for freeing the page. 1699 */ 1700 if (m->wire_count == 0 && m->object == NULL) 1701 vm_page_free(m); 1702 vm_page_unlock_queues(); 1703 sf->m = NULL; 1704 mtx_lock(&sf_freelist.sf_lock); 1705 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); 1706 if (sf_buf_alloc_want > 0) 1707 wakeup_one(&sf_freelist); 1708 mtx_unlock(&sf_freelist.sf_lock); 1709} 1710 1711/* 1712 * sendfile(2) 1713 * 1714 * MPSAFE 1715 * 1716 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1717 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1718 * 1719 * Send a file specified by 'fd' and starting at 'offset' to a socket 1720 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1721 * nbytes == 0. Optionally add a header and/or trailer to the socket 1722 * output. If specified, write the total number of bytes sent into *sbytes. 1723 * 1724 */ 1725int 1726sendfile(struct thread *td, struct sendfile_args *uap) 1727{ 1728 1729 return (do_sendfile(td, uap, 0)); 1730} 1731 1732#ifdef COMPAT_FREEBSD4 1733int 1734freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1735{ 1736 struct sendfile_args args; 1737 1738 args.fd = uap->fd; 1739 args.s = uap->s; 1740 args.offset = uap->offset; 1741 args.nbytes = uap->nbytes; 1742 args.hdtr = uap->hdtr; 1743 args.sbytes = uap->sbytes; 1744 args.flags = uap->flags; 1745 1746 return (do_sendfile(td, &args, 1)); 1747} 1748#endif /* COMPAT_FREEBSD4 */ 1749 1750static int 1751do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1752{ 1753 struct vnode *vp; 1754 struct vm_object *obj; 1755 struct socket *so = NULL; 1756 struct mbuf *m; 1757 struct sf_buf *sf; 1758 struct vm_page *pg; 1759 struct writev_args nuap; 1760 struct sf_hdtr hdtr; 1761 off_t off, xfsize, hdtr_size, sbytes = 0; 1762 int error, s; 1763 1764 mtx_lock(&Giant); 1765 1766 hdtr_size = 0; 1767 1768 /* 1769 * The descriptor must be a regular file and have a backing VM object. 1770 */ 1771 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1772 goto done; 1773 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1774 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1775 error = EINVAL; 1776 VOP_UNLOCK(vp, 0, td); 1777 goto done; 1778 } 1779 VOP_UNLOCK(vp, 0, td); 1780 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1781 goto done; 1782 if (so->so_type != SOCK_STREAM) { 1783 error = EINVAL; 1784 goto done; 1785 } 1786 if ((so->so_state & SS_ISCONNECTED) == 0) { 1787 error = ENOTCONN; 1788 goto done; 1789 } 1790 if (uap->offset < 0) { 1791 error = EINVAL; 1792 goto done; 1793 } 1794 1795#ifdef MAC 1796 error = mac_check_socket_send(td->td_ucred, so); 1797 if (error) 1798 goto done; 1799#endif 1800 1801 /* 1802 * If specified, get the pointer to the sf_hdtr struct for 1803 * any headers/trailers. 1804 */ 1805 if (uap->hdtr != NULL) { 1806 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1807 if (error) 1808 goto done; 1809 /* 1810 * Send any headers. Wimp out and use writev(2). 1811 */ 1812 if (hdtr.headers != NULL) { 1813 nuap.fd = uap->s; 1814 nuap.iovp = hdtr.headers; 1815 nuap.iovcnt = hdtr.hdr_cnt; 1816 error = writev(td, &nuap); 1817 if (error) 1818 goto done; 1819 if (compat) 1820 sbytes += td->td_retval[0]; 1821 else 1822 hdtr_size += td->td_retval[0]; 1823 } 1824 } 1825 1826 /* 1827 * Protect against multiple writers to the socket. 1828 */ 1829 (void) sblock(&so->so_snd, M_WAITOK); 1830 1831 /* 1832 * Loop through the pages in the file, starting with the requested 1833 * offset. Get a file page (do I/O if necessary), map the file page 1834 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1835 * it on the socket. 1836 */ 1837 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1838 vm_pindex_t pindex; 1839 vm_offset_t pgoff; 1840 1841 pindex = OFF_TO_IDX(off); 1842 VM_OBJECT_LOCK(obj); 1843retry_lookup: 1844 /* 1845 * Calculate the amount to transfer. Not to exceed a page, 1846 * the EOF, or the passed in nbytes. 1847 */ 1848 xfsize = obj->un_pager.vnp.vnp_size - off; 1849 VM_OBJECT_UNLOCK(obj); 1850 if (xfsize > PAGE_SIZE) 1851 xfsize = PAGE_SIZE; 1852 pgoff = (vm_offset_t)(off & PAGE_MASK); 1853 if (PAGE_SIZE - pgoff < xfsize) 1854 xfsize = PAGE_SIZE - pgoff; 1855 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1856 xfsize = uap->nbytes - sbytes; 1857 if (xfsize <= 0) 1858 break; 1859 /* 1860 * Optimize the non-blocking case by looking at the socket space 1861 * before going to the extra work of constituting the sf_buf. 1862 */ 1863 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1864 if (so->so_state & SS_CANTSENDMORE) 1865 error = EPIPE; 1866 else 1867 error = EAGAIN; 1868 sbunlock(&so->so_snd); 1869 goto done; 1870 } 1871 VM_OBJECT_LOCK(obj); 1872 /* 1873 * Attempt to look up the page. 1874 * 1875 * Allocate if not found 1876 * 1877 * Wait and loop if busy. 1878 */ 1879 pg = vm_page_lookup(obj, pindex); 1880 1881 if (pg == NULL) { 1882 pg = vm_page_alloc(obj, pindex, 1883 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1884 if (pg == NULL) { 1885 VM_OBJECT_UNLOCK(obj); 1886 VM_WAIT; 1887 VM_OBJECT_LOCK(obj); 1888 goto retry_lookup; 1889 } 1890 vm_page_lock_queues(); 1891 vm_page_wakeup(pg); 1892 } else { 1893 vm_page_lock_queues(); 1894 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1895 goto retry_lookup; 1896 /* 1897 * Wire the page so it does not get ripped out from 1898 * under us. 1899 */ 1900 vm_page_wire(pg); 1901 } 1902 1903 /* 1904 * If page is not valid for what we need, initiate I/O 1905 */ 1906 1907 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1908 int bsize, resid; 1909 1910 /* 1911 * Ensure that our page is still around when the I/O 1912 * completes. 1913 */ 1914 vm_page_io_start(pg); 1915 vm_page_unlock_queues(); 1916 VM_OBJECT_UNLOCK(obj); 1917 1918 /* 1919 * Get the page from backing store. 1920 */ 1921 bsize = vp->v_mount->mnt_stat.f_iosize; 1922 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1923 /* 1924 * XXXMAC: Because we don't have fp->f_cred here, 1925 * we pass in NOCRED. This is probably wrong, but 1926 * is consistent with our original implementation. 1927 */ 1928 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1929 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1930 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1931 td->td_ucred, NOCRED, &resid, td); 1932 VOP_UNLOCK(vp, 0, td); 1933 if (error) 1934 VM_OBJECT_LOCK(obj); 1935 vm_page_lock_queues(); 1936 vm_page_flag_clear(pg, PG_ZERO); 1937 vm_page_io_finish(pg); 1938 if (error) { 1939 vm_page_unwire(pg, 0); 1940 /* 1941 * See if anyone else might know about this page. 1942 * If not and it is not valid, then free it. 1943 */ 1944 if (pg->wire_count == 0 && pg->valid == 0 && 1945 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1946 pg->hold_count == 0) { 1947 vm_page_busy(pg); 1948 vm_page_free(pg); 1949 } 1950 vm_page_unlock_queues(); 1951 VM_OBJECT_UNLOCK(obj); 1952 sbunlock(&so->so_snd); 1953 goto done; 1954 } 1955 } else 1956 VM_OBJECT_UNLOCK(obj); 1957 vm_page_unlock_queues(); 1958 1959 /* 1960 * Get a sendfile buf. We usually wait as long as necessary, 1961 * but this wait can be interrupted. 1962 */ 1963 if ((sf = sf_buf_alloc(pg)) == NULL) { 1964 vm_page_lock_queues(); 1965 vm_page_unwire(pg, 0); 1966 if (pg->wire_count == 0 && pg->object == NULL) 1967 vm_page_free(pg); 1968 vm_page_unlock_queues(); 1969 sbunlock(&so->so_snd); 1970 error = EINTR; 1971 goto done; 1972 } 1973 1974 /* 1975 * Get an mbuf header and set it up as having external storage. 1976 */ 1977 MGETHDR(m, M_TRYWAIT, MT_DATA); 1978 if (m == NULL) { 1979 error = ENOBUFS; 1980 sf_buf_free((void *)sf->kva, sf); 1981 sbunlock(&so->so_snd); 1982 goto done; 1983 } 1984 /* 1985 * Setup external storage for mbuf. 1986 */ 1987 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, sf, M_RDONLY, 1988 EXT_SFBUF); 1989 m->m_data = (char *) sf->kva + pgoff; 1990 m->m_pkthdr.len = m->m_len = xfsize; 1991 /* 1992 * Add the buffer to the socket buffer chain. 1993 */ 1994 s = splnet(); 1995retry_space: 1996 /* 1997 * Make sure that the socket is still able to take more data. 1998 * CANTSENDMORE being true usually means that the connection 1999 * was closed. so_error is true when an error was sensed after 2000 * a previous send. 2001 * The state is checked after the page mapping and buffer 2002 * allocation above since those operations may block and make 2003 * any socket checks stale. From this point forward, nothing 2004 * blocks before the pru_send (or more accurately, any blocking 2005 * results in a loop back to here to re-check). 2006 */ 2007 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 2008 if (so->so_state & SS_CANTSENDMORE) { 2009 error = EPIPE; 2010 } else { 2011 error = so->so_error; 2012 so->so_error = 0; 2013 } 2014 m_freem(m); 2015 sbunlock(&so->so_snd); 2016 splx(s); 2017 goto done; 2018 } 2019 /* 2020 * Wait for socket space to become available. We do this just 2021 * after checking the connection state above in order to avoid 2022 * a race condition with sbwait(). 2023 */ 2024 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2025 if (so->so_state & SS_NBIO) { 2026 m_freem(m); 2027 sbunlock(&so->so_snd); 2028 splx(s); 2029 error = EAGAIN; 2030 goto done; 2031 } 2032 error = sbwait(&so->so_snd); 2033 /* 2034 * An error from sbwait usually indicates that we've 2035 * been interrupted by a signal. If we've sent anything 2036 * then return bytes sent, otherwise return the error. 2037 */ 2038 if (error) { 2039 m_freem(m); 2040 sbunlock(&so->so_snd); 2041 splx(s); 2042 goto done; 2043 } 2044 goto retry_space; 2045 } 2046 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2047 splx(s); 2048 if (error) { 2049 sbunlock(&so->so_snd); 2050 goto done; 2051 } 2052 } 2053 sbunlock(&so->so_snd); 2054 2055 /* 2056 * Send trailers. Wimp out and use writev(2). 2057 */ 2058 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2059 nuap.fd = uap->s; 2060 nuap.iovp = hdtr.trailers; 2061 nuap.iovcnt = hdtr.trl_cnt; 2062 error = writev(td, &nuap); 2063 if (error) 2064 goto done; 2065 if (compat) 2066 sbytes += td->td_retval[0]; 2067 else 2068 hdtr_size += td->td_retval[0]; 2069 } 2070 2071done: 2072 /* 2073 * If there was no error we have to clear td->td_retval[0] 2074 * because it may have been set by writev. 2075 */ 2076 if (error == 0) { 2077 td->td_retval[0] = 0; 2078 } 2079 if (uap->sbytes != NULL) { 2080 if (!compat) 2081 sbytes += hdtr_size; 2082 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2083 } 2084 if (vp) 2085 vrele(vp); 2086 if (so) 2087 fputsock(so); 2088 mtx_unlock(&Giant); 2089 return (error); 2090} 2091