kern_sendfile.c revision 123844
1/* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 */ 38 39#include <sys/cdefs.h> 40__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 123844 2003-12-25 23:44:38Z dwmalone $"); 41 42#include "opt_compat.h" 43#include "opt_ktrace.h" 44#include "opt_mac.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/lock.h> 50#include <sys/mac.h> 51#include <sys/mutex.h> 52#include <sys/sysproto.h> 53#include <sys/malloc.h> 54#include <sys/filedesc.h> 55#include <sys/event.h> 56#include <sys/proc.h> 57#include <sys/fcntl.h> 58#include <sys/file.h> 59#include <sys/filio.h> 60#include <sys/mount.h> 61#include <sys/mbuf.h> 62#include <sys/protosw.h> 63#include <sys/sf_buf.h> 64#include <sys/socket.h> 65#include <sys/socketvar.h> 66#include <sys/signalvar.h> 67#include <sys/syscallsubr.h> 68#include <sys/uio.h> 69#include <sys/vnode.h> 70#ifdef KTRACE 71#include <sys/ktrace.h> 72#endif 73 74#include <vm/vm.h> 75#include <vm/vm_object.h> 76#include <vm/vm_page.h> 77#include <vm/vm_pageout.h> 78#include <vm/vm_kern.h> 79#include <vm/vm_extern.h> 80 81static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 82static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 83 84static int accept1(struct thread *td, struct accept_args *uap, int compat); 85static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 86static int getsockname1(struct thread *td, struct getsockname_args *uap, 87 int compat); 88static int getpeername1(struct thread *td, struct getpeername_args *uap, 89 int compat); 90 91/* 92 * System call interface to the socket abstraction. 93 */ 94#if defined(COMPAT_43) || defined(COMPAT_SUNOS) 95#define COMPAT_OLDSOCK 96#endif 97 98/* 99 * MPSAFE 100 */ 101int 102socket(td, uap) 103 struct thread *td; 104 register struct socket_args /* { 105 int domain; 106 int type; 107 int protocol; 108 } */ *uap; 109{ 110 struct filedesc *fdp; 111 struct socket *so; 112 struct file *fp; 113 int fd, error; 114 115 fdp = td->td_proc->p_fd; 116 error = falloc(td, &fp, &fd); 117 if (error) 118 goto done2; 119 /* An extra reference on `fp' has been held for us by falloc(). */ 120 mtx_lock(&Giant); 121 error = socreate(uap->domain, &so, uap->type, uap->protocol, 122 td->td_ucred, td); 123 mtx_unlock(&Giant); 124 FILEDESC_LOCK(fdp); 125 if (error) { 126 if (fdp->fd_ofiles[fd] == fp) { 127 fdp->fd_ofiles[fd] = NULL; 128 FILEDESC_UNLOCK(fdp); 129 fdrop(fp, td); 130 } else 131 FILEDESC_UNLOCK(fdp); 132 } else { 133 fp->f_data = so; /* already has ref count */ 134 fp->f_flag = FREAD|FWRITE; 135 fp->f_ops = &socketops; 136 fp->f_type = DTYPE_SOCKET; 137 FILEDESC_UNLOCK(fdp); 138 td->td_retval[0] = fd; 139 } 140 fdrop(fp, td); 141done2: 142 return (error); 143} 144 145/* 146 * MPSAFE 147 */ 148/* ARGSUSED */ 149int 150bind(td, uap) 151 struct thread *td; 152 register struct bind_args /* { 153 int s; 154 caddr_t name; 155 int namelen; 156 } */ *uap; 157{ 158 struct sockaddr *sa; 159 int error; 160 161 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 162 return (error); 163 164 return (kern_bind(td, uap->s, sa)); 165} 166 167int 168kern_bind(td, fd, sa) 169 struct thread *td; 170 int fd; 171 struct sockaddr *sa; 172{ 173 struct socket *so; 174 int error; 175 176 mtx_lock(&Giant); 177 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 178 goto done2; 179#ifdef MAC 180 error = mac_check_socket_bind(td->td_ucred, so, sa); 181 if (error) 182 goto done1; 183#endif 184 error = sobind(so, sa, td); 185#ifdef MAC 186done1: 187#endif 188 fputsock(so); 189done2: 190 mtx_unlock(&Giant); 191 FREE(sa, M_SONAME); 192 return (error); 193} 194 195/* 196 * MPSAFE 197 */ 198/* ARGSUSED */ 199int 200listen(td, uap) 201 struct thread *td; 202 register struct listen_args /* { 203 int s; 204 int backlog; 205 } */ *uap; 206{ 207 struct socket *so; 208 int error; 209 210 mtx_lock(&Giant); 211 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 212#ifdef MAC 213 error = mac_check_socket_listen(td->td_ucred, so); 214 if (error) 215 goto done; 216#endif 217 error = solisten(so, uap->backlog, td); 218#ifdef MAC 219done: 220#endif 221 fputsock(so); 222 } 223 mtx_unlock(&Giant); 224 return(error); 225} 226 227/* 228 * accept1() 229 * MPSAFE 230 */ 231static int 232accept1(td, uap, compat) 233 struct thread *td; 234 register struct accept_args /* { 235 int s; 236 struct sockaddr * __restrict name; 237 socklen_t * __restrict anamelen; 238 } */ *uap; 239 int compat; 240{ 241 struct filedesc *fdp; 242 struct file *nfp = NULL; 243 struct sockaddr *sa; 244 socklen_t namelen; 245 int error, s; 246 struct socket *head, *so; 247 int fd; 248 u_int fflag; 249 pid_t pgid; 250 int tmp; 251 252 fdp = td->td_proc->p_fd; 253 if (uap->name) { 254 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 255 if(error) 256 goto done3; 257 if (namelen < 0) { 258 error = EINVAL; 259 goto done3; 260 } 261 } 262 mtx_lock(&Giant); 263 error = fgetsock(td, uap->s, &head, &fflag); 264 if (error) 265 goto done2; 266 s = splnet(); 267 if ((head->so_options & SO_ACCEPTCONN) == 0) { 268 splx(s); 269 error = EINVAL; 270 goto done; 271 } 272 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 273 if (head->so_state & SS_CANTRCVMORE) { 274 head->so_error = ECONNABORTED; 275 break; 276 } 277 if ((head->so_state & SS_NBIO) != 0) { 278 head->so_error = EWOULDBLOCK; 279 break; 280 } 281 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 282 "accept", 0); 283 if (error) { 284 splx(s); 285 goto done; 286 } 287 } 288 if (head->so_error) { 289 error = head->so_error; 290 head->so_error = 0; 291 splx(s); 292 goto done; 293 } 294 295 /* 296 * At this point we know that there is at least one connection 297 * ready to be accepted. Remove it from the queue prior to 298 * allocating the file descriptor for it since falloc() may 299 * block allowing another process to accept the connection 300 * instead. 301 */ 302 so = TAILQ_FIRST(&head->so_comp); 303 TAILQ_REMOVE(&head->so_comp, so, so_list); 304 head->so_qlen--; 305 306 error = falloc(td, &nfp, &fd); 307 if (error) { 308 /* 309 * Probably ran out of file descriptors. Put the 310 * unaccepted connection back onto the queue and 311 * do another wakeup so some other process might 312 * have a chance at it. 313 */ 314 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 315 head->so_qlen++; 316 wakeup_one(&head->so_timeo); 317 splx(s); 318 goto done; 319 } 320 /* An extra reference on `nfp' has been held for us by falloc(). */ 321 td->td_retval[0] = fd; 322 323 /* connection has been removed from the listen queue */ 324 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 325 326 so->so_state &= ~SS_COMP; 327 so->so_head = NULL; 328 pgid = fgetown(&head->so_sigio); 329 if (pgid != 0) 330 fsetown(pgid, &so->so_sigio); 331 332 FILE_LOCK(nfp); 333 soref(so); /* file descriptor reference */ 334 nfp->f_data = so; /* nfp has ref count from falloc */ 335 nfp->f_flag = fflag; 336 nfp->f_ops = &socketops; 337 nfp->f_type = DTYPE_SOCKET; 338 FILE_UNLOCK(nfp); 339 /* Sync socket nonblocking/async state with file flags */ 340 tmp = fflag & FNONBLOCK; 341 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 342 tmp = fflag & FASYNC; 343 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 344 sa = 0; 345 error = soaccept(so, &sa); 346 if (error) { 347 /* 348 * return a namelen of zero for older code which might 349 * ignore the return value from accept. 350 */ 351 if (uap->name != NULL) { 352 namelen = 0; 353 (void) copyout(&namelen, 354 uap->anamelen, sizeof(*uap->anamelen)); 355 } 356 goto noconnection; 357 } 358 if (sa == NULL) { 359 namelen = 0; 360 if (uap->name) 361 goto gotnoname; 362 splx(s); 363 error = 0; 364 goto done; 365 } 366 if (uap->name) { 367 /* check sa_len before it is destroyed */ 368 if (namelen > sa->sa_len) 369 namelen = sa->sa_len; 370#ifdef COMPAT_OLDSOCK 371 if (compat) 372 ((struct osockaddr *)sa)->sa_family = 373 sa->sa_family; 374#endif 375 error = copyout(sa, uap->name, (u_int)namelen); 376 if (!error) 377gotnoname: 378 error = copyout(&namelen, 379 uap->anamelen, sizeof (*uap->anamelen)); 380 } 381noconnection: 382 if (sa) 383 FREE(sa, M_SONAME); 384 385 /* 386 * close the new descriptor, assuming someone hasn't ripped it 387 * out from under us. 388 */ 389 if (error) { 390 FILEDESC_LOCK(fdp); 391 if (fdp->fd_ofiles[fd] == nfp) { 392 fdp->fd_ofiles[fd] = NULL; 393 FILEDESC_UNLOCK(fdp); 394 fdrop(nfp, td); 395 } else { 396 FILEDESC_UNLOCK(fdp); 397 } 398 } 399 splx(s); 400 401 /* 402 * Release explicitly held references before returning. 403 */ 404done: 405 if (nfp != NULL) 406 fdrop(nfp, td); 407 fputsock(head); 408done2: 409 mtx_unlock(&Giant); 410done3: 411 return (error); 412} 413 414/* 415 * MPSAFE (accept1() is MPSAFE) 416 */ 417int 418accept(td, uap) 419 struct thread *td; 420 struct accept_args *uap; 421{ 422 423 return (accept1(td, uap, 0)); 424} 425 426#ifdef COMPAT_OLDSOCK 427/* 428 * MPSAFE (accept1() is MPSAFE) 429 */ 430int 431oaccept(td, uap) 432 struct thread *td; 433 struct accept_args *uap; 434{ 435 436 return (accept1(td, uap, 1)); 437} 438#endif /* COMPAT_OLDSOCK */ 439 440/* 441 * MPSAFE 442 */ 443/* ARGSUSED */ 444int 445connect(td, uap) 446 struct thread *td; 447 register struct connect_args /* { 448 int s; 449 caddr_t name; 450 int namelen; 451 } */ *uap; 452{ 453 struct sockaddr *sa; 454 int error; 455 456 error = getsockaddr(&sa, uap->name, uap->namelen); 457 if (error) 458 return error; 459 460 return (kern_connect(td, uap->s, sa)); 461} 462 463 464int 465kern_connect(td, fd, sa) 466 struct thread *td; 467 int fd; 468 struct sockaddr *sa; 469{ 470 struct socket *so; 471 int error, s; 472 int interrupted = 0; 473 474 mtx_lock(&Giant); 475 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 476 goto done2; 477 if (so->so_state & SS_ISCONNECTING) { 478 error = EALREADY; 479 goto done1; 480 } 481#ifdef MAC 482 error = mac_check_socket_connect(td->td_ucred, so, sa); 483 if (error) 484 goto bad; 485#endif 486 error = soconnect(so, sa, td); 487 if (error) 488 goto bad; 489 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 490 error = EINPROGRESS; 491 goto done1; 492 } 493 s = splnet(); 494 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 495 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 496 if (error) { 497 if (error == EINTR || error == ERESTART) 498 interrupted = 1; 499 break; 500 } 501 } 502 if (error == 0) { 503 error = so->so_error; 504 so->so_error = 0; 505 } 506 splx(s); 507bad: 508 if (!interrupted) 509 so->so_state &= ~SS_ISCONNECTING; 510 if (error == ERESTART) 511 error = EINTR; 512done1: 513 fputsock(so); 514done2: 515 mtx_unlock(&Giant); 516 FREE(sa, M_SONAME); 517 return (error); 518} 519 520/* 521 * MPSAFE 522 */ 523int 524socketpair(td, uap) 525 struct thread *td; 526 register struct socketpair_args /* { 527 int domain; 528 int type; 529 int protocol; 530 int *rsv; 531 } */ *uap; 532{ 533 register struct filedesc *fdp = td->td_proc->p_fd; 534 struct file *fp1, *fp2; 535 struct socket *so1, *so2; 536 int fd, error, sv[2]; 537 538 mtx_lock(&Giant); 539 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 540 td->td_ucred, td); 541 if (error) 542 goto done2; 543 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 544 td->td_ucred, td); 545 if (error) 546 goto free1; 547 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 548 error = falloc(td, &fp1, &fd); 549 if (error) 550 goto free2; 551 sv[0] = fd; 552 fp1->f_data = so1; /* so1 already has ref count */ 553 error = falloc(td, &fp2, &fd); 554 if (error) 555 goto free3; 556 fp2->f_data = so2; /* so2 already has ref count */ 557 sv[1] = fd; 558 error = soconnect2(so1, so2); 559 if (error) 560 goto free4; 561 if (uap->type == SOCK_DGRAM) { 562 /* 563 * Datagram socket connection is asymmetric. 564 */ 565 error = soconnect2(so2, so1); 566 if (error) 567 goto free4; 568 } 569 FILE_LOCK(fp1); 570 fp1->f_flag = FREAD|FWRITE; 571 fp1->f_ops = &socketops; 572 fp1->f_type = DTYPE_SOCKET; 573 FILE_UNLOCK(fp1); 574 FILE_LOCK(fp2); 575 fp2->f_flag = FREAD|FWRITE; 576 fp2->f_ops = &socketops; 577 fp2->f_type = DTYPE_SOCKET; 578 FILE_UNLOCK(fp2); 579 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 580 fdrop(fp1, td); 581 fdrop(fp2, td); 582 goto done2; 583free4: 584 FILEDESC_LOCK(fdp); 585 if (fdp->fd_ofiles[sv[1]] == fp2) { 586 fdp->fd_ofiles[sv[1]] = NULL; 587 FILEDESC_UNLOCK(fdp); 588 fdrop(fp2, td); 589 } else 590 FILEDESC_UNLOCK(fdp); 591 fdrop(fp2, td); 592free3: 593 FILEDESC_LOCK(fdp); 594 if (fdp->fd_ofiles[sv[0]] == fp1) { 595 fdp->fd_ofiles[sv[0]] = NULL; 596 FILEDESC_UNLOCK(fdp); 597 fdrop(fp1, td); 598 } else 599 FILEDESC_UNLOCK(fdp); 600 fdrop(fp1, td); 601free2: 602 (void)soclose(so2); 603free1: 604 (void)soclose(so1); 605done2: 606 mtx_unlock(&Giant); 607 return (error); 608} 609 610static int 611sendit(td, s, mp, flags) 612 register struct thread *td; 613 int s; 614 register struct msghdr *mp; 615 int flags; 616{ 617 struct mbuf *control; 618 struct sockaddr *to; 619 int error; 620 621 if (mp->msg_name != NULL) { 622 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 623 if (error) { 624 to = NULL; 625 goto bad; 626 } 627 mp->msg_name = to; 628 } else 629 to = NULL; 630 631 if (mp->msg_control) { 632 if (mp->msg_controllen < sizeof(struct cmsghdr) 633#ifdef COMPAT_OLDSOCK 634 && mp->msg_flags != MSG_COMPAT 635#endif 636 ) { 637 error = EINVAL; 638 goto bad; 639 } 640 error = sockargs(&control, mp->msg_control, 641 mp->msg_controllen, MT_CONTROL); 642 if (error) 643 goto bad; 644#ifdef COMPAT_OLDSOCK 645 if (mp->msg_flags == MSG_COMPAT) { 646 register struct cmsghdr *cm; 647 648 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 649 if (control == 0) { 650 error = ENOBUFS; 651 goto bad; 652 } else { 653 cm = mtod(control, struct cmsghdr *); 654 cm->cmsg_len = control->m_len; 655 cm->cmsg_level = SOL_SOCKET; 656 cm->cmsg_type = SCM_RIGHTS; 657 } 658 } 659#endif 660 } else { 661 control = NULL; 662 } 663 664 error = kern_sendit(td, s, mp, flags, control); 665 666bad: 667 if (to) 668 FREE(to, M_SONAME); 669 return (error); 670} 671 672int 673kern_sendit(td, s, mp, flags, control) 674 struct thread *td; 675 int s; 676 struct msghdr *mp; 677 int flags; 678 struct mbuf *control; 679{ 680 struct uio auio; 681 struct iovec *iov; 682 struct socket *so; 683 int i; 684 int len, error; 685#ifdef KTRACE 686 struct iovec *ktriov = NULL; 687 struct uio ktruio; 688 int iovlen; 689#endif 690 691 mtx_lock(&Giant); 692 if ((error = fgetsock(td, s, &so, NULL)) != 0) 693 goto bad2; 694 695#ifdef MAC 696 error = mac_check_socket_send(td->td_ucred, so); 697 if (error) 698 goto bad; 699#endif 700 701 auio.uio_iov = mp->msg_iov; 702 auio.uio_iovcnt = mp->msg_iovlen; 703 auio.uio_segflg = UIO_USERSPACE; 704 auio.uio_rw = UIO_WRITE; 705 auio.uio_td = td; 706 auio.uio_offset = 0; /* XXX */ 707 auio.uio_resid = 0; 708 iov = mp->msg_iov; 709 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 710 if ((auio.uio_resid += iov->iov_len) < 0) { 711 error = EINVAL; 712 goto bad; 713 } 714 } 715#ifdef KTRACE 716 if (KTRPOINT(td, KTR_GENIO)) { 717 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 718 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 719 bcopy(auio.uio_iov, ktriov, iovlen); 720 ktruio = auio; 721 } 722#endif 723 len = auio.uio_resid; 724 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 725 0, control, flags, td); 726 if (error) { 727 if (auio.uio_resid != len && (error == ERESTART || 728 error == EINTR || error == EWOULDBLOCK)) 729 error = 0; 730 /* Generation of SIGPIPE can be controlled per socket */ 731 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 732 PROC_LOCK(td->td_proc); 733 psignal(td->td_proc, SIGPIPE); 734 PROC_UNLOCK(td->td_proc); 735 } 736 } 737 if (error == 0) 738 td->td_retval[0] = len - auio.uio_resid; 739#ifdef KTRACE 740 if (ktriov != NULL) { 741 if (error == 0) { 742 ktruio.uio_iov = ktriov; 743 ktruio.uio_resid = td->td_retval[0]; 744 ktrgenio(s, UIO_WRITE, &ktruio, error); 745 } 746 FREE(ktriov, M_TEMP); 747 } 748#endif 749bad: 750 fputsock(so); 751bad2: 752 mtx_unlock(&Giant); 753 return (error); 754} 755 756/* 757 * MPSAFE 758 */ 759int 760sendto(td, uap) 761 struct thread *td; 762 register struct sendto_args /* { 763 int s; 764 caddr_t buf; 765 size_t len; 766 int flags; 767 caddr_t to; 768 int tolen; 769 } */ *uap; 770{ 771 struct msghdr msg; 772 struct iovec aiov; 773 int error; 774 775 msg.msg_name = uap->to; 776 msg.msg_namelen = uap->tolen; 777 msg.msg_iov = &aiov; 778 msg.msg_iovlen = 1; 779 msg.msg_control = 0; 780#ifdef COMPAT_OLDSOCK 781 msg.msg_flags = 0; 782#endif 783 aiov.iov_base = uap->buf; 784 aiov.iov_len = uap->len; 785 error = sendit(td, uap->s, &msg, uap->flags); 786 return (error); 787} 788 789#ifdef COMPAT_OLDSOCK 790/* 791 * MPSAFE 792 */ 793int 794osend(td, uap) 795 struct thread *td; 796 register struct osend_args /* { 797 int s; 798 caddr_t buf; 799 int len; 800 int flags; 801 } */ *uap; 802{ 803 struct msghdr msg; 804 struct iovec aiov; 805 int error; 806 807 msg.msg_name = 0; 808 msg.msg_namelen = 0; 809 msg.msg_iov = &aiov; 810 msg.msg_iovlen = 1; 811 aiov.iov_base = uap->buf; 812 aiov.iov_len = uap->len; 813 msg.msg_control = 0; 814 msg.msg_flags = 0; 815 error = sendit(td, uap->s, &msg, uap->flags); 816 return (error); 817} 818 819/* 820 * MPSAFE 821 */ 822int 823osendmsg(td, uap) 824 struct thread *td; 825 register struct osendmsg_args /* { 826 int s; 827 caddr_t msg; 828 int flags; 829 } */ *uap; 830{ 831 struct msghdr msg; 832 struct iovec aiov[UIO_SMALLIOV], *iov; 833 int error; 834 835 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 836 if (error) 837 goto done2; 838 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 839 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 840 error = EMSGSIZE; 841 goto done2; 842 } 843 MALLOC(iov, struct iovec *, 844 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 845 M_WAITOK); 846 } else { 847 iov = aiov; 848 } 849 error = copyin(msg.msg_iov, iov, 850 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 851 if (error) 852 goto done; 853 msg.msg_flags = MSG_COMPAT; 854 msg.msg_iov = iov; 855 error = sendit(td, uap->s, &msg, uap->flags); 856done: 857 if (iov != aiov) 858 FREE(iov, M_IOV); 859done2: 860 return (error); 861} 862#endif 863 864/* 865 * MPSAFE 866 */ 867int 868sendmsg(td, uap) 869 struct thread *td; 870 register struct sendmsg_args /* { 871 int s; 872 caddr_t msg; 873 int flags; 874 } */ *uap; 875{ 876 struct msghdr msg; 877 struct iovec aiov[UIO_SMALLIOV], *iov; 878 int error; 879 880 error = copyin(uap->msg, &msg, sizeof (msg)); 881 if (error) 882 goto done2; 883 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 884 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 885 error = EMSGSIZE; 886 goto done2; 887 } 888 MALLOC(iov, struct iovec *, 889 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 890 M_WAITOK); 891 } else { 892 iov = aiov; 893 } 894 if (msg.msg_iovlen && 895 (error = copyin(msg.msg_iov, iov, 896 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 897 goto done; 898 msg.msg_iov = iov; 899#ifdef COMPAT_OLDSOCK 900 msg.msg_flags = 0; 901#endif 902 error = sendit(td, uap->s, &msg, uap->flags); 903done: 904 if (iov != aiov) 905 FREE(iov, M_IOV); 906done2: 907 return (error); 908} 909 910static int 911recvit(td, s, mp, namelenp) 912 register struct thread *td; 913 int s; 914 register struct msghdr *mp; 915 void *namelenp; 916{ 917 struct uio auio; 918 register struct iovec *iov; 919 register int i; 920 socklen_t len; 921 int error; 922 struct mbuf *m, *control = 0; 923 caddr_t ctlbuf; 924 struct socket *so; 925 struct sockaddr *fromsa = 0; 926#ifdef KTRACE 927 struct iovec *ktriov = NULL; 928 struct uio ktruio; 929 int iovlen; 930#endif 931 932 mtx_lock(&Giant); 933 if ((error = fgetsock(td, s, &so, NULL)) != 0) { 934 mtx_unlock(&Giant); 935 return (error); 936 } 937 938#ifdef MAC 939 error = mac_check_socket_receive(td->td_ucred, so); 940 if (error) { 941 fputsock(so); 942 mtx_unlock(&Giant); 943 return (error); 944 } 945#endif 946 947 auio.uio_iov = mp->msg_iov; 948 auio.uio_iovcnt = mp->msg_iovlen; 949 auio.uio_segflg = UIO_USERSPACE; 950 auio.uio_rw = UIO_READ; 951 auio.uio_td = td; 952 auio.uio_offset = 0; /* XXX */ 953 auio.uio_resid = 0; 954 iov = mp->msg_iov; 955 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 956 if ((auio.uio_resid += iov->iov_len) < 0) { 957 fputsock(so); 958 return (EINVAL); 959 } 960 } 961#ifdef KTRACE 962 if (KTRPOINT(td, KTR_GENIO)) { 963 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 964 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 965 bcopy(auio.uio_iov, ktriov, iovlen); 966 ktruio = auio; 967 } 968#endif 969 len = auio.uio_resid; 970 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 971 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 972 &mp->msg_flags); 973 if (error) { 974 if (auio.uio_resid != (int)len && (error == ERESTART || 975 error == EINTR || error == EWOULDBLOCK)) 976 error = 0; 977 } 978#ifdef KTRACE 979 if (ktriov != NULL) { 980 if (error == 0) { 981 ktruio.uio_iov = ktriov; 982 ktruio.uio_resid = (int)len - auio.uio_resid; 983 ktrgenio(s, UIO_READ, &ktruio, error); 984 } 985 FREE(ktriov, M_TEMP); 986 } 987#endif 988 if (error) 989 goto out; 990 td->td_retval[0] = (int)len - auio.uio_resid; 991 if (mp->msg_name) { 992 len = mp->msg_namelen; 993 if (len <= 0 || fromsa == 0) 994 len = 0; 995 else { 996 /* save sa_len before it is destroyed by MSG_COMPAT */ 997 len = MIN(len, fromsa->sa_len); 998#ifdef COMPAT_OLDSOCK 999 if (mp->msg_flags & MSG_COMPAT) 1000 ((struct osockaddr *)fromsa)->sa_family = 1001 fromsa->sa_family; 1002#endif 1003 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1004 if (error) 1005 goto out; 1006 } 1007 mp->msg_namelen = len; 1008 if (namelenp && 1009 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 1010#ifdef COMPAT_OLDSOCK 1011 if (mp->msg_flags & MSG_COMPAT) 1012 error = 0; /* old recvfrom didn't check */ 1013 else 1014#endif 1015 goto out; 1016 } 1017 } 1018 if (mp->msg_control) { 1019#ifdef COMPAT_OLDSOCK 1020 /* 1021 * We assume that old recvmsg calls won't receive access 1022 * rights and other control info, esp. as control info 1023 * is always optional and those options didn't exist in 4.3. 1024 * If we receive rights, trim the cmsghdr; anything else 1025 * is tossed. 1026 */ 1027 if (control && mp->msg_flags & MSG_COMPAT) { 1028 if (mtod(control, struct cmsghdr *)->cmsg_level != 1029 SOL_SOCKET || 1030 mtod(control, struct cmsghdr *)->cmsg_type != 1031 SCM_RIGHTS) { 1032 mp->msg_controllen = 0; 1033 goto out; 1034 } 1035 control->m_len -= sizeof (struct cmsghdr); 1036 control->m_data += sizeof (struct cmsghdr); 1037 } 1038#endif 1039 len = mp->msg_controllen; 1040 m = control; 1041 mp->msg_controllen = 0; 1042 ctlbuf = mp->msg_control; 1043 1044 while (m && len > 0) { 1045 unsigned int tocopy; 1046 1047 if (len >= m->m_len) 1048 tocopy = m->m_len; 1049 else { 1050 mp->msg_flags |= MSG_CTRUNC; 1051 tocopy = len; 1052 } 1053 1054 if ((error = copyout(mtod(m, caddr_t), 1055 ctlbuf, tocopy)) != 0) 1056 goto out; 1057 1058 ctlbuf += tocopy; 1059 len -= tocopy; 1060 m = m->m_next; 1061 } 1062 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1063 } 1064out: 1065 fputsock(so); 1066 mtx_unlock(&Giant); 1067 if (fromsa) 1068 FREE(fromsa, M_SONAME); 1069 if (control) 1070 m_freem(control); 1071 return (error); 1072} 1073 1074/* 1075 * MPSAFE 1076 */ 1077int 1078recvfrom(td, uap) 1079 struct thread *td; 1080 register struct recvfrom_args /* { 1081 int s; 1082 caddr_t buf; 1083 size_t len; 1084 int flags; 1085 struct sockaddr * __restrict from; 1086 socklen_t * __restrict fromlenaddr; 1087 } */ *uap; 1088{ 1089 struct msghdr msg; 1090 struct iovec aiov; 1091 int error; 1092 1093 if (uap->fromlenaddr) { 1094 error = copyin(uap->fromlenaddr, 1095 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1096 if (error) 1097 goto done2; 1098 } else { 1099 msg.msg_namelen = 0; 1100 } 1101 msg.msg_name = uap->from; 1102 msg.msg_iov = &aiov; 1103 msg.msg_iovlen = 1; 1104 aiov.iov_base = uap->buf; 1105 aiov.iov_len = uap->len; 1106 msg.msg_control = 0; 1107 msg.msg_flags = uap->flags; 1108 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1109done2: 1110 return(error); 1111} 1112 1113#ifdef COMPAT_OLDSOCK 1114/* 1115 * MPSAFE 1116 */ 1117int 1118orecvfrom(td, uap) 1119 struct thread *td; 1120 struct recvfrom_args *uap; 1121{ 1122 1123 uap->flags |= MSG_COMPAT; 1124 return (recvfrom(td, uap)); 1125} 1126#endif 1127 1128 1129#ifdef COMPAT_OLDSOCK 1130/* 1131 * MPSAFE 1132 */ 1133int 1134orecv(td, uap) 1135 struct thread *td; 1136 register struct orecv_args /* { 1137 int s; 1138 caddr_t buf; 1139 int len; 1140 int flags; 1141 } */ *uap; 1142{ 1143 struct msghdr msg; 1144 struct iovec aiov; 1145 int error; 1146 1147 msg.msg_name = 0; 1148 msg.msg_namelen = 0; 1149 msg.msg_iov = &aiov; 1150 msg.msg_iovlen = 1; 1151 aiov.iov_base = uap->buf; 1152 aiov.iov_len = uap->len; 1153 msg.msg_control = 0; 1154 msg.msg_flags = uap->flags; 1155 error = recvit(td, uap->s, &msg, NULL); 1156 return (error); 1157} 1158 1159/* 1160 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1161 * overlays the new one, missing only the flags, and with the (old) access 1162 * rights where the control fields are now. 1163 * 1164 * MPSAFE 1165 */ 1166int 1167orecvmsg(td, uap) 1168 struct thread *td; 1169 register struct orecvmsg_args /* { 1170 int s; 1171 struct omsghdr *msg; 1172 int flags; 1173 } */ *uap; 1174{ 1175 struct msghdr msg; 1176 struct iovec aiov[UIO_SMALLIOV], *iov; 1177 int error; 1178 1179 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1180 if (error) 1181 return (error); 1182 1183 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1184 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1185 error = EMSGSIZE; 1186 goto done2; 1187 } 1188 MALLOC(iov, struct iovec *, 1189 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1190 M_WAITOK); 1191 } else { 1192 iov = aiov; 1193 } 1194 msg.msg_flags = uap->flags | MSG_COMPAT; 1195 error = copyin(msg.msg_iov, iov, 1196 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1197 if (error) 1198 goto done; 1199 msg.msg_iov = iov; 1200 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1201 1202 if (msg.msg_controllen && error == 0) 1203 error = copyout(&msg.msg_controllen, 1204 &uap->msg->msg_accrightslen, sizeof (int)); 1205done: 1206 if (iov != aiov) 1207 FREE(iov, M_IOV); 1208done2: 1209 return (error); 1210} 1211#endif 1212 1213/* 1214 * MPSAFE 1215 */ 1216int 1217recvmsg(td, uap) 1218 struct thread *td; 1219 register struct recvmsg_args /* { 1220 int s; 1221 struct msghdr *msg; 1222 int flags; 1223 } */ *uap; 1224{ 1225 struct msghdr msg; 1226 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1227 register int error; 1228 1229 error = copyin(uap->msg, &msg, sizeof (msg)); 1230 if (error) 1231 goto done2; 1232 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1233 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1234 error = EMSGSIZE; 1235 goto done2; 1236 } 1237 MALLOC(iov, struct iovec *, 1238 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1239 M_WAITOK); 1240 } else { 1241 iov = aiov; 1242 } 1243#ifdef COMPAT_OLDSOCK 1244 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1245#else 1246 msg.msg_flags = uap->flags; 1247#endif 1248 uiov = msg.msg_iov; 1249 msg.msg_iov = iov; 1250 error = copyin(uiov, iov, 1251 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1252 if (error) 1253 goto done; 1254 error = recvit(td, uap->s, &msg, NULL); 1255 if (!error) { 1256 msg.msg_iov = uiov; 1257 error = copyout(&msg, uap->msg, sizeof(msg)); 1258 } 1259done: 1260 if (iov != aiov) 1261 FREE(iov, M_IOV); 1262done2: 1263 return (error); 1264} 1265 1266/* 1267 * MPSAFE 1268 */ 1269/* ARGSUSED */ 1270int 1271shutdown(td, uap) 1272 struct thread *td; 1273 register struct shutdown_args /* { 1274 int s; 1275 int how; 1276 } */ *uap; 1277{ 1278 struct socket *so; 1279 int error; 1280 1281 mtx_lock(&Giant); 1282 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1283 error = soshutdown(so, uap->how); 1284 fputsock(so); 1285 } 1286 mtx_unlock(&Giant); 1287 return(error); 1288} 1289 1290/* 1291 * MPSAFE 1292 */ 1293/* ARGSUSED */ 1294int 1295setsockopt(td, uap) 1296 struct thread *td; 1297 register struct setsockopt_args /* { 1298 int s; 1299 int level; 1300 int name; 1301 caddr_t val; 1302 int valsize; 1303 } */ *uap; 1304{ 1305 struct socket *so; 1306 struct sockopt sopt; 1307 int error; 1308 1309 if (uap->val == 0 && uap->valsize != 0) 1310 return (EFAULT); 1311 if (uap->valsize < 0) 1312 return (EINVAL); 1313 1314 mtx_lock(&Giant); 1315 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1316 sopt.sopt_dir = SOPT_SET; 1317 sopt.sopt_level = uap->level; 1318 sopt.sopt_name = uap->name; 1319 sopt.sopt_val = uap->val; 1320 sopt.sopt_valsize = uap->valsize; 1321 sopt.sopt_td = td; 1322 error = sosetopt(so, &sopt); 1323 fputsock(so); 1324 } 1325 mtx_unlock(&Giant); 1326 return(error); 1327} 1328 1329/* 1330 * MPSAFE 1331 */ 1332/* ARGSUSED */ 1333int 1334getsockopt(td, uap) 1335 struct thread *td; 1336 register struct getsockopt_args /* { 1337 int s; 1338 int level; 1339 int name; 1340 void * __restrict val; 1341 socklen_t * __restrict avalsize; 1342 } */ *uap; 1343{ 1344 socklen_t valsize; 1345 int error; 1346 struct socket *so; 1347 struct sockopt sopt; 1348 1349 mtx_lock(&Giant); 1350 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1351 goto done2; 1352 if (uap->val) { 1353 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1354 if (error) 1355 goto done1; 1356 if (valsize < 0) { 1357 error = EINVAL; 1358 goto done1; 1359 } 1360 } else { 1361 valsize = 0; 1362 } 1363 1364 sopt.sopt_dir = SOPT_GET; 1365 sopt.sopt_level = uap->level; 1366 sopt.sopt_name = uap->name; 1367 sopt.sopt_val = uap->val; 1368 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1369 sopt.sopt_td = td; 1370 1371 error = sogetopt(so, &sopt); 1372 if (error == 0) { 1373 valsize = sopt.sopt_valsize; 1374 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1375 } 1376done1: 1377 fputsock(so); 1378done2: 1379 mtx_unlock(&Giant); 1380 return (error); 1381} 1382 1383/* 1384 * getsockname1() - Get socket name. 1385 * 1386 * MPSAFE 1387 */ 1388/* ARGSUSED */ 1389static int 1390getsockname1(td, uap, compat) 1391 struct thread *td; 1392 register struct getsockname_args /* { 1393 int fdes; 1394 struct sockaddr * __restrict asa; 1395 socklen_t * __restrict alen; 1396 } */ *uap; 1397 int compat; 1398{ 1399 struct socket *so; 1400 struct sockaddr *sa; 1401 socklen_t len; 1402 int error; 1403 1404 mtx_lock(&Giant); 1405 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1406 goto done2; 1407 error = copyin(uap->alen, &len, sizeof (len)); 1408 if (error) 1409 goto done1; 1410 if (len < 0) { 1411 error = EINVAL; 1412 goto done1; 1413 } 1414 sa = 0; 1415 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1416 if (error) 1417 goto bad; 1418 if (sa == 0) { 1419 len = 0; 1420 goto gotnothing; 1421 } 1422 1423 len = MIN(len, sa->sa_len); 1424#ifdef COMPAT_OLDSOCK 1425 if (compat) 1426 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1427#endif 1428 error = copyout(sa, uap->asa, (u_int)len); 1429 if (error == 0) 1430gotnothing: 1431 error = copyout(&len, uap->alen, sizeof (len)); 1432bad: 1433 if (sa) 1434 FREE(sa, M_SONAME); 1435done1: 1436 fputsock(so); 1437done2: 1438 mtx_unlock(&Giant); 1439 return (error); 1440} 1441 1442/* 1443 * MPSAFE 1444 */ 1445int 1446getsockname(td, uap) 1447 struct thread *td; 1448 struct getsockname_args *uap; 1449{ 1450 1451 return (getsockname1(td, uap, 0)); 1452} 1453 1454#ifdef COMPAT_OLDSOCK 1455/* 1456 * MPSAFE 1457 */ 1458int 1459ogetsockname(td, uap) 1460 struct thread *td; 1461 struct getsockname_args *uap; 1462{ 1463 1464 return (getsockname1(td, uap, 1)); 1465} 1466#endif /* COMPAT_OLDSOCK */ 1467 1468/* 1469 * getpeername1() - Get name of peer for connected socket. 1470 * 1471 * MPSAFE 1472 */ 1473/* ARGSUSED */ 1474static int 1475getpeername1(td, uap, compat) 1476 struct thread *td; 1477 register struct getpeername_args /* { 1478 int fdes; 1479 struct sockaddr * __restrict asa; 1480 socklen_t * __restrict alen; 1481 } */ *uap; 1482 int compat; 1483{ 1484 struct socket *so; 1485 struct sockaddr *sa; 1486 socklen_t len; 1487 int error; 1488 1489 mtx_lock(&Giant); 1490 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1491 goto done2; 1492 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1493 error = ENOTCONN; 1494 goto done1; 1495 } 1496 error = copyin(uap->alen, &len, sizeof (len)); 1497 if (error) 1498 goto done1; 1499 if (len < 0) { 1500 error = EINVAL; 1501 goto done1; 1502 } 1503 sa = 0; 1504 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1505 if (error) 1506 goto bad; 1507 if (sa == 0) { 1508 len = 0; 1509 goto gotnothing; 1510 } 1511 len = MIN(len, sa->sa_len); 1512#ifdef COMPAT_OLDSOCK 1513 if (compat) 1514 ((struct osockaddr *)sa)->sa_family = 1515 sa->sa_family; 1516#endif 1517 error = copyout(sa, uap->asa, (u_int)len); 1518 if (error) 1519 goto bad; 1520gotnothing: 1521 error = copyout(&len, uap->alen, sizeof (len)); 1522bad: 1523 if (sa) 1524 FREE(sa, M_SONAME); 1525done1: 1526 fputsock(so); 1527done2: 1528 mtx_unlock(&Giant); 1529 return (error); 1530} 1531 1532/* 1533 * MPSAFE 1534 */ 1535int 1536getpeername(td, uap) 1537 struct thread *td; 1538 struct getpeername_args *uap; 1539{ 1540 1541 return (getpeername1(td, uap, 0)); 1542} 1543 1544#ifdef COMPAT_OLDSOCK 1545/* 1546 * MPSAFE 1547 */ 1548int 1549ogetpeername(td, uap) 1550 struct thread *td; 1551 struct ogetpeername_args *uap; 1552{ 1553 1554 /* XXX uap should have type `getpeername_args *' to begin with. */ 1555 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1556} 1557#endif /* COMPAT_OLDSOCK */ 1558 1559int 1560sockargs(mp, buf, buflen, type) 1561 struct mbuf **mp; 1562 caddr_t buf; 1563 int buflen, type; 1564{ 1565 register struct sockaddr *sa; 1566 register struct mbuf *m; 1567 int error; 1568 1569 if ((u_int)buflen > MLEN) { 1570#ifdef COMPAT_OLDSOCK 1571 if (type == MT_SONAME && (u_int)buflen <= 112) 1572 buflen = MLEN; /* unix domain compat. hack */ 1573 else 1574#endif 1575 return (EINVAL); 1576 } 1577 m = m_get(M_TRYWAIT, type); 1578 if (m == NULL) 1579 return (ENOBUFS); 1580 m->m_len = buflen; 1581 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1582 if (error) 1583 (void) m_free(m); 1584 else { 1585 *mp = m; 1586 if (type == MT_SONAME) { 1587 sa = mtod(m, struct sockaddr *); 1588 1589#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1590 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1591 sa->sa_family = sa->sa_len; 1592#endif 1593 sa->sa_len = buflen; 1594 } 1595 } 1596 return (error); 1597} 1598 1599int 1600getsockaddr(namp, uaddr, len) 1601 struct sockaddr **namp; 1602 caddr_t uaddr; 1603 size_t len; 1604{ 1605 struct sockaddr *sa; 1606 int error; 1607 1608 if (len > SOCK_MAXADDRLEN) 1609 return ENAMETOOLONG; 1610 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1611 error = copyin(uaddr, sa, len); 1612 if (error) { 1613 FREE(sa, M_SONAME); 1614 } else { 1615#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1616 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1617 sa->sa_family = sa->sa_len; 1618#endif 1619 sa->sa_len = len; 1620 *namp = sa; 1621 } 1622 return error; 1623} 1624 1625/* 1626 * sendfile(2) 1627 * 1628 * MPSAFE 1629 * 1630 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1631 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1632 * 1633 * Send a file specified by 'fd' and starting at 'offset' to a socket 1634 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1635 * nbytes == 0. Optionally add a header and/or trailer to the socket 1636 * output. If specified, write the total number of bytes sent into *sbytes. 1637 * 1638 */ 1639int 1640sendfile(struct thread *td, struct sendfile_args *uap) 1641{ 1642 1643 return (do_sendfile(td, uap, 0)); 1644} 1645 1646#ifdef COMPAT_FREEBSD4 1647int 1648freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1649{ 1650 struct sendfile_args args; 1651 1652 args.fd = uap->fd; 1653 args.s = uap->s; 1654 args.offset = uap->offset; 1655 args.nbytes = uap->nbytes; 1656 args.hdtr = uap->hdtr; 1657 args.sbytes = uap->sbytes; 1658 args.flags = uap->flags; 1659 1660 return (do_sendfile(td, &args, 1)); 1661} 1662#endif /* COMPAT_FREEBSD4 */ 1663 1664static int 1665do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1666{ 1667 struct vnode *vp; 1668 struct vm_object *obj; 1669 struct socket *so = NULL; 1670 struct mbuf *m; 1671 struct sf_buf *sf; 1672 struct vm_page *pg; 1673 struct writev_args nuap; 1674 struct sf_hdtr hdtr; 1675 off_t off, xfsize, hdtr_size, sbytes = 0; 1676 int error, s; 1677 1678 mtx_lock(&Giant); 1679 1680 hdtr_size = 0; 1681 1682 /* 1683 * The descriptor must be a regular file and have a backing VM object. 1684 */ 1685 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1686 goto done; 1687 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1688 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1689 error = EINVAL; 1690 VOP_UNLOCK(vp, 0, td); 1691 goto done; 1692 } 1693 VOP_UNLOCK(vp, 0, td); 1694 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1695 goto done; 1696 if (so->so_type != SOCK_STREAM) { 1697 error = EINVAL; 1698 goto done; 1699 } 1700 if ((so->so_state & SS_ISCONNECTED) == 0) { 1701 error = ENOTCONN; 1702 goto done; 1703 } 1704 if (uap->offset < 0) { 1705 error = EINVAL; 1706 goto done; 1707 } 1708 1709#ifdef MAC 1710 error = mac_check_socket_send(td->td_ucred, so); 1711 if (error) 1712 goto done; 1713#endif 1714 1715 /* 1716 * If specified, get the pointer to the sf_hdtr struct for 1717 * any headers/trailers. 1718 */ 1719 if (uap->hdtr != NULL) { 1720 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1721 if (error) 1722 goto done; 1723 /* 1724 * Send any headers. Wimp out and use writev(2). 1725 */ 1726 if (hdtr.headers != NULL) { 1727 nuap.fd = uap->s; 1728 nuap.iovp = hdtr.headers; 1729 nuap.iovcnt = hdtr.hdr_cnt; 1730 error = writev(td, &nuap); 1731 if (error) 1732 goto done; 1733 if (compat) 1734 sbytes += td->td_retval[0]; 1735 else 1736 hdtr_size += td->td_retval[0]; 1737 } 1738 } 1739 1740 /* 1741 * Protect against multiple writers to the socket. 1742 */ 1743 (void) sblock(&so->so_snd, M_WAITOK); 1744 1745 /* 1746 * Loop through the pages in the file, starting with the requested 1747 * offset. Get a file page (do I/O if necessary), map the file page 1748 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1749 * it on the socket. 1750 */ 1751 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1752 vm_pindex_t pindex; 1753 vm_offset_t pgoff; 1754 1755 pindex = OFF_TO_IDX(off); 1756 VM_OBJECT_LOCK(obj); 1757retry_lookup: 1758 /* 1759 * Calculate the amount to transfer. Not to exceed a page, 1760 * the EOF, or the passed in nbytes. 1761 */ 1762 xfsize = obj->un_pager.vnp.vnp_size - off; 1763 VM_OBJECT_UNLOCK(obj); 1764 if (xfsize > PAGE_SIZE) 1765 xfsize = PAGE_SIZE; 1766 pgoff = (vm_offset_t)(off & PAGE_MASK); 1767 if (PAGE_SIZE - pgoff < xfsize) 1768 xfsize = PAGE_SIZE - pgoff; 1769 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1770 xfsize = uap->nbytes - sbytes; 1771 if (xfsize <= 0) 1772 break; 1773 /* 1774 * Optimize the non-blocking case by looking at the socket space 1775 * before going to the extra work of constituting the sf_buf. 1776 */ 1777 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1778 if (so->so_state & SS_CANTSENDMORE) 1779 error = EPIPE; 1780 else 1781 error = EAGAIN; 1782 sbunlock(&so->so_snd); 1783 goto done; 1784 } 1785 VM_OBJECT_LOCK(obj); 1786 /* 1787 * Attempt to look up the page. 1788 * 1789 * Allocate if not found 1790 * 1791 * Wait and loop if busy. 1792 */ 1793 pg = vm_page_lookup(obj, pindex); 1794 1795 if (pg == NULL) { 1796 pg = vm_page_alloc(obj, pindex, 1797 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1798 if (pg == NULL) { 1799 VM_OBJECT_UNLOCK(obj); 1800 VM_WAIT; 1801 VM_OBJECT_LOCK(obj); 1802 goto retry_lookup; 1803 } 1804 vm_page_lock_queues(); 1805 vm_page_wakeup(pg); 1806 } else { 1807 vm_page_lock_queues(); 1808 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1809 goto retry_lookup; 1810 /* 1811 * Wire the page so it does not get ripped out from 1812 * under us. 1813 */ 1814 vm_page_wire(pg); 1815 } 1816 1817 /* 1818 * If page is not valid for what we need, initiate I/O 1819 */ 1820 1821 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1822 int bsize, resid; 1823 1824 /* 1825 * Ensure that our page is still around when the I/O 1826 * completes. 1827 */ 1828 vm_page_io_start(pg); 1829 vm_page_unlock_queues(); 1830 VM_OBJECT_UNLOCK(obj); 1831 1832 /* 1833 * Get the page from backing store. 1834 */ 1835 bsize = vp->v_mount->mnt_stat.f_iosize; 1836 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1837 /* 1838 * XXXMAC: Because we don't have fp->f_cred here, 1839 * we pass in NOCRED. This is probably wrong, but 1840 * is consistent with our original implementation. 1841 */ 1842 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1843 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1844 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1845 td->td_ucred, NOCRED, &resid, td); 1846 VOP_UNLOCK(vp, 0, td); 1847 if (error) 1848 VM_OBJECT_LOCK(obj); 1849 vm_page_lock_queues(); 1850 vm_page_flag_clear(pg, PG_ZERO); 1851 vm_page_io_finish(pg); 1852 if (error) { 1853 vm_page_unwire(pg, 0); 1854 /* 1855 * See if anyone else might know about this page. 1856 * If not and it is not valid, then free it. 1857 */ 1858 if (pg->wire_count == 0 && pg->valid == 0 && 1859 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1860 pg->hold_count == 0) { 1861 vm_page_busy(pg); 1862 vm_page_free(pg); 1863 } 1864 vm_page_unlock_queues(); 1865 VM_OBJECT_UNLOCK(obj); 1866 sbunlock(&so->so_snd); 1867 goto done; 1868 } 1869 } else 1870 VM_OBJECT_UNLOCK(obj); 1871 vm_page_unlock_queues(); 1872 1873 /* 1874 * Get a sendfile buf. We usually wait as long as necessary, 1875 * but this wait can be interrupted. 1876 */ 1877 if ((sf = sf_buf_alloc(pg)) == NULL) { 1878 vm_page_lock_queues(); 1879 vm_page_unwire(pg, 0); 1880 if (pg->wire_count == 0 && pg->object == NULL) 1881 vm_page_free(pg); 1882 vm_page_unlock_queues(); 1883 sbunlock(&so->so_snd); 1884 error = EINTR; 1885 goto done; 1886 } 1887 1888 /* 1889 * Get an mbuf header and set it up as having external storage. 1890 */ 1891 MGETHDR(m, M_TRYWAIT, MT_DATA); 1892 if (m == NULL) { 1893 error = ENOBUFS; 1894 sf_buf_free((void *)sf_buf_kva(sf), sf); 1895 sbunlock(&so->so_snd); 1896 goto done; 1897 } 1898 /* 1899 * Setup external storage for mbuf. 1900 */ 1901 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_free, sf, M_RDONLY, 1902 EXT_SFBUF); 1903 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1904 m->m_pkthdr.len = m->m_len = xfsize; 1905 /* 1906 * Add the buffer to the socket buffer chain. 1907 */ 1908 s = splnet(); 1909retry_space: 1910 /* 1911 * Make sure that the socket is still able to take more data. 1912 * CANTSENDMORE being true usually means that the connection 1913 * was closed. so_error is true when an error was sensed after 1914 * a previous send. 1915 * The state is checked after the page mapping and buffer 1916 * allocation above since those operations may block and make 1917 * any socket checks stale. From this point forward, nothing 1918 * blocks before the pru_send (or more accurately, any blocking 1919 * results in a loop back to here to re-check). 1920 */ 1921 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1922 if (so->so_state & SS_CANTSENDMORE) { 1923 error = EPIPE; 1924 } else { 1925 error = so->so_error; 1926 so->so_error = 0; 1927 } 1928 m_freem(m); 1929 sbunlock(&so->so_snd); 1930 splx(s); 1931 goto done; 1932 } 1933 /* 1934 * Wait for socket space to become available. We do this just 1935 * after checking the connection state above in order to avoid 1936 * a race condition with sbwait(). 1937 */ 1938 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1939 if (so->so_state & SS_NBIO) { 1940 m_freem(m); 1941 sbunlock(&so->so_snd); 1942 splx(s); 1943 error = EAGAIN; 1944 goto done; 1945 } 1946 error = sbwait(&so->so_snd); 1947 /* 1948 * An error from sbwait usually indicates that we've 1949 * been interrupted by a signal. If we've sent anything 1950 * then return bytes sent, otherwise return the error. 1951 */ 1952 if (error) { 1953 m_freem(m); 1954 sbunlock(&so->so_snd); 1955 splx(s); 1956 goto done; 1957 } 1958 goto retry_space; 1959 } 1960 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 1961 splx(s); 1962 if (error) { 1963 sbunlock(&so->so_snd); 1964 goto done; 1965 } 1966 } 1967 sbunlock(&so->so_snd); 1968 1969 /* 1970 * Send trailers. Wimp out and use writev(2). 1971 */ 1972 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1973 nuap.fd = uap->s; 1974 nuap.iovp = hdtr.trailers; 1975 nuap.iovcnt = hdtr.trl_cnt; 1976 error = writev(td, &nuap); 1977 if (error) 1978 goto done; 1979 if (compat) 1980 sbytes += td->td_retval[0]; 1981 else 1982 hdtr_size += td->td_retval[0]; 1983 } 1984 1985done: 1986 /* 1987 * If there was no error we have to clear td->td_retval[0] 1988 * because it may have been set by writev. 1989 */ 1990 if (error == 0) { 1991 td->td_retval[0] = 0; 1992 } 1993 if (uap->sbytes != NULL) { 1994 if (!compat) 1995 sbytes += hdtr_size; 1996 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 1997 } 1998 if (vp) 1999 vrele(vp); 2000 if (so) 2001 fputsock(so); 2002 2003 mtx_unlock(&Giant); 2004 2005 if (error == ERESTART) 2006 error = EINTR; 2007 2008 return (error); 2009} 2010