kern_sendfile.c revision 124396
1/* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 */ 38 39#include <sys/cdefs.h> 40__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 124396 2004-01-11 19:56:42Z des $"); 41 42#include "opt_compat.h" 43#include "opt_ktrace.h" 44#include "opt_mac.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/lock.h> 50#include <sys/mac.h> 51#include <sys/mutex.h> 52#include <sys/sysproto.h> 53#include <sys/malloc.h> 54#include <sys/filedesc.h> 55#include <sys/event.h> 56#include <sys/proc.h> 57#include <sys/fcntl.h> 58#include <sys/file.h> 59#include <sys/filio.h> 60#include <sys/mount.h> 61#include <sys/mbuf.h> 62#include <sys/protosw.h> 63#include <sys/sf_buf.h> 64#include <sys/socket.h> 65#include <sys/socketvar.h> 66#include <sys/signalvar.h> 67#include <sys/syscallsubr.h> 68#include <sys/uio.h> 69#include <sys/vnode.h> 70#ifdef KTRACE 71#include <sys/ktrace.h> 72#endif 73 74#include <vm/vm.h> 75#include <vm/vm_object.h> 76#include <vm/vm_page.h> 77#include <vm/vm_pageout.h> 78#include <vm/vm_kern.h> 79#include <vm/vm_extern.h> 80 81static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 82static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 83 84static int accept1(struct thread *td, struct accept_args *uap, int compat); 85static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 86static int getsockname1(struct thread *td, struct getsockname_args *uap, 87 int compat); 88static int getpeername1(struct thread *td, struct getpeername_args *uap, 89 int compat); 90 91/* 92 * System call interface to the socket abstraction. 93 */ 94#if defined(COMPAT_43) || defined(COMPAT_SUNOS) 95#define COMPAT_OLDSOCK 96#endif 97 98/* 99 * MPSAFE 100 */ 101int 102socket(td, uap) 103 struct thread *td; 104 register struct socket_args /* { 105 int domain; 106 int type; 107 int protocol; 108 } */ *uap; 109{ 110 struct filedesc *fdp; 111 struct socket *so; 112 struct file *fp; 113 int fd, error; 114 115 fdp = td->td_proc->p_fd; 116 error = falloc(td, &fp, &fd); 117 if (error) 118 goto done2; 119 /* An extra reference on `fp' has been held for us by falloc(). */ 120 mtx_lock(&Giant); 121 error = socreate(uap->domain, &so, uap->type, uap->protocol, 122 td->td_ucred, td); 123 mtx_unlock(&Giant); 124 FILEDESC_LOCK(fdp); 125 if (error) { 126 if (fdp->fd_ofiles[fd] == fp) { 127 fdp->fd_ofiles[fd] = NULL; 128 fdunused(fdp, fd); 129 FILEDESC_UNLOCK(fdp); 130 fdrop(fp, td); 131 } else { 132 FILEDESC_UNLOCK(fdp); 133 } 134 } else { 135 fp->f_data = so; /* already has ref count */ 136 fp->f_flag = FREAD|FWRITE; 137 fp->f_ops = &socketops; 138 fp->f_type = DTYPE_SOCKET; 139 FILEDESC_UNLOCK(fdp); 140 td->td_retval[0] = fd; 141 } 142 fdrop(fp, td); 143done2: 144 return (error); 145} 146 147/* 148 * MPSAFE 149 */ 150/* ARGSUSED */ 151int 152bind(td, uap) 153 struct thread *td; 154 register struct bind_args /* { 155 int s; 156 caddr_t name; 157 int namelen; 158 } */ *uap; 159{ 160 struct sockaddr *sa; 161 int error; 162 163 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 164 return (error); 165 166 return (kern_bind(td, uap->s, sa)); 167} 168 169int 170kern_bind(td, fd, sa) 171 struct thread *td; 172 int fd; 173 struct sockaddr *sa; 174{ 175 struct socket *so; 176 int error; 177 178 mtx_lock(&Giant); 179 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 180 goto done2; 181#ifdef MAC 182 error = mac_check_socket_bind(td->td_ucred, so, sa); 183 if (error) 184 goto done1; 185#endif 186 error = sobind(so, sa, td); 187#ifdef MAC 188done1: 189#endif 190 fputsock(so); 191done2: 192 mtx_unlock(&Giant); 193 FREE(sa, M_SONAME); 194 return (error); 195} 196 197/* 198 * MPSAFE 199 */ 200/* ARGSUSED */ 201int 202listen(td, uap) 203 struct thread *td; 204 register struct listen_args /* { 205 int s; 206 int backlog; 207 } */ *uap; 208{ 209 struct socket *so; 210 int error; 211 212 mtx_lock(&Giant); 213 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 214#ifdef MAC 215 error = mac_check_socket_listen(td->td_ucred, so); 216 if (error) 217 goto done; 218#endif 219 error = solisten(so, uap->backlog, td); 220#ifdef MAC 221done: 222#endif 223 fputsock(so); 224 } 225 mtx_unlock(&Giant); 226 return(error); 227} 228 229/* 230 * accept1() 231 * MPSAFE 232 */ 233static int 234accept1(td, uap, compat) 235 struct thread *td; 236 register struct accept_args /* { 237 int s; 238 struct sockaddr * __restrict name; 239 socklen_t * __restrict anamelen; 240 } */ *uap; 241 int compat; 242{ 243 struct filedesc *fdp; 244 struct file *nfp = NULL; 245 struct sockaddr *sa; 246 socklen_t namelen; 247 int error, s; 248 struct socket *head, *so; 249 int fd; 250 u_int fflag; 251 pid_t pgid; 252 int tmp; 253 254 fdp = td->td_proc->p_fd; 255 if (uap->name) { 256 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 257 if(error) 258 goto done3; 259 if (namelen < 0) { 260 error = EINVAL; 261 goto done3; 262 } 263 } 264 mtx_lock(&Giant); 265 error = fgetsock(td, uap->s, &head, &fflag); 266 if (error) 267 goto done2; 268 s = splnet(); 269 if ((head->so_options & SO_ACCEPTCONN) == 0) { 270 splx(s); 271 error = EINVAL; 272 goto done; 273 } 274 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 275 if (head->so_state & SS_CANTRCVMORE) { 276 head->so_error = ECONNABORTED; 277 break; 278 } 279 if ((head->so_state & SS_NBIO) != 0) { 280 head->so_error = EWOULDBLOCK; 281 break; 282 } 283 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 284 "accept", 0); 285 if (error) { 286 splx(s); 287 goto done; 288 } 289 } 290 if (head->so_error) { 291 error = head->so_error; 292 head->so_error = 0; 293 splx(s); 294 goto done; 295 } 296 297 /* 298 * At this point we know that there is at least one connection 299 * ready to be accepted. Remove it from the queue prior to 300 * allocating the file descriptor for it since falloc() may 301 * block allowing another process to accept the connection 302 * instead. 303 */ 304 so = TAILQ_FIRST(&head->so_comp); 305 TAILQ_REMOVE(&head->so_comp, so, so_list); 306 head->so_qlen--; 307 308 error = falloc(td, &nfp, &fd); 309 if (error) { 310 /* 311 * Probably ran out of file descriptors. Put the 312 * unaccepted connection back onto the queue and 313 * do another wakeup so some other process might 314 * have a chance at it. 315 */ 316 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 317 head->so_qlen++; 318 wakeup_one(&head->so_timeo); 319 splx(s); 320 goto done; 321 } 322 /* An extra reference on `nfp' has been held for us by falloc(). */ 323 td->td_retval[0] = fd; 324 325 /* connection has been removed from the listen queue */ 326 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 327 328 so->so_state &= ~SS_COMP; 329 so->so_head = NULL; 330 pgid = fgetown(&head->so_sigio); 331 if (pgid != 0) 332 fsetown(pgid, &so->so_sigio); 333 334 FILE_LOCK(nfp); 335 soref(so); /* file descriptor reference */ 336 nfp->f_data = so; /* nfp has ref count from falloc */ 337 nfp->f_flag = fflag; 338 nfp->f_ops = &socketops; 339 nfp->f_type = DTYPE_SOCKET; 340 FILE_UNLOCK(nfp); 341 /* Sync socket nonblocking/async state with file flags */ 342 tmp = fflag & FNONBLOCK; 343 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 344 tmp = fflag & FASYNC; 345 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 346 sa = 0; 347 error = soaccept(so, &sa); 348 if (error) { 349 /* 350 * return a namelen of zero for older code which might 351 * ignore the return value from accept. 352 */ 353 if (uap->name != NULL) { 354 namelen = 0; 355 (void) copyout(&namelen, 356 uap->anamelen, sizeof(*uap->anamelen)); 357 } 358 goto noconnection; 359 } 360 if (sa == NULL) { 361 namelen = 0; 362 if (uap->name) 363 goto gotnoname; 364 splx(s); 365 error = 0; 366 goto done; 367 } 368 if (uap->name) { 369 /* check sa_len before it is destroyed */ 370 if (namelen > sa->sa_len) 371 namelen = sa->sa_len; 372#ifdef COMPAT_OLDSOCK 373 if (compat) 374 ((struct osockaddr *)sa)->sa_family = 375 sa->sa_family; 376#endif 377 error = copyout(sa, uap->name, (u_int)namelen); 378 if (!error) 379gotnoname: 380 error = copyout(&namelen, 381 uap->anamelen, sizeof (*uap->anamelen)); 382 } 383noconnection: 384 if (sa) 385 FREE(sa, M_SONAME); 386 387 /* 388 * close the new descriptor, assuming someone hasn't ripped it 389 * out from under us. 390 */ 391 if (error) { 392 FILEDESC_LOCK(fdp); 393 if (fdp->fd_ofiles[fd] == nfp) { 394 fdp->fd_ofiles[fd] = NULL; 395 fdunused(fdp, fd); 396 FILEDESC_UNLOCK(fdp); 397 fdrop(nfp, td); 398 } else { 399 FILEDESC_UNLOCK(fdp); 400 } 401 } 402 splx(s); 403 404 /* 405 * Release explicitly held references before returning. 406 */ 407done: 408 if (nfp != NULL) 409 fdrop(nfp, td); 410 fputsock(head); 411done2: 412 mtx_unlock(&Giant); 413done3: 414 return (error); 415} 416 417/* 418 * MPSAFE (accept1() is MPSAFE) 419 */ 420int 421accept(td, uap) 422 struct thread *td; 423 struct accept_args *uap; 424{ 425 426 return (accept1(td, uap, 0)); 427} 428 429#ifdef COMPAT_OLDSOCK 430/* 431 * MPSAFE (accept1() is MPSAFE) 432 */ 433int 434oaccept(td, uap) 435 struct thread *td; 436 struct accept_args *uap; 437{ 438 439 return (accept1(td, uap, 1)); 440} 441#endif /* COMPAT_OLDSOCK */ 442 443/* 444 * MPSAFE 445 */ 446/* ARGSUSED */ 447int 448connect(td, uap) 449 struct thread *td; 450 register struct connect_args /* { 451 int s; 452 caddr_t name; 453 int namelen; 454 } */ *uap; 455{ 456 struct sockaddr *sa; 457 int error; 458 459 error = getsockaddr(&sa, uap->name, uap->namelen); 460 if (error) 461 return (error); 462 463 return (kern_connect(td, uap->s, sa)); 464} 465 466 467int 468kern_connect(td, fd, sa) 469 struct thread *td; 470 int fd; 471 struct sockaddr *sa; 472{ 473 struct socket *so; 474 int error, s; 475 int interrupted = 0; 476 477 mtx_lock(&Giant); 478 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 479 goto done2; 480 if (so->so_state & SS_ISCONNECTING) { 481 error = EALREADY; 482 goto done1; 483 } 484#ifdef MAC 485 error = mac_check_socket_connect(td->td_ucred, so, sa); 486 if (error) 487 goto bad; 488#endif 489 error = soconnect(so, sa, td); 490 if (error) 491 goto bad; 492 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 493 error = EINPROGRESS; 494 goto done1; 495 } 496 s = splnet(); 497 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 498 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 499 if (error) { 500 if (error == EINTR || error == ERESTART) 501 interrupted = 1; 502 break; 503 } 504 } 505 if (error == 0) { 506 error = so->so_error; 507 so->so_error = 0; 508 } 509 splx(s); 510bad: 511 if (!interrupted) 512 so->so_state &= ~SS_ISCONNECTING; 513 if (error == ERESTART) 514 error = EINTR; 515done1: 516 fputsock(so); 517done2: 518 mtx_unlock(&Giant); 519 FREE(sa, M_SONAME); 520 return (error); 521} 522 523/* 524 * MPSAFE 525 */ 526int 527socketpair(td, uap) 528 struct thread *td; 529 register struct socketpair_args /* { 530 int domain; 531 int type; 532 int protocol; 533 int *rsv; 534 } */ *uap; 535{ 536 register struct filedesc *fdp = td->td_proc->p_fd; 537 struct file *fp1, *fp2; 538 struct socket *so1, *so2; 539 int fd, error, sv[2]; 540 541 mtx_lock(&Giant); 542 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 543 td->td_ucred, td); 544 if (error) 545 goto done2; 546 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 547 td->td_ucred, td); 548 if (error) 549 goto free1; 550 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 551 error = falloc(td, &fp1, &fd); 552 if (error) 553 goto free2; 554 sv[0] = fd; 555 fp1->f_data = so1; /* so1 already has ref count */ 556 error = falloc(td, &fp2, &fd); 557 if (error) 558 goto free3; 559 fp2->f_data = so2; /* so2 already has ref count */ 560 sv[1] = fd; 561 error = soconnect2(so1, so2); 562 if (error) 563 goto free4; 564 if (uap->type == SOCK_DGRAM) { 565 /* 566 * Datagram socket connection is asymmetric. 567 */ 568 error = soconnect2(so2, so1); 569 if (error) 570 goto free4; 571 } 572 FILE_LOCK(fp1); 573 fp1->f_flag = FREAD|FWRITE; 574 fp1->f_ops = &socketops; 575 fp1->f_type = DTYPE_SOCKET; 576 FILE_UNLOCK(fp1); 577 FILE_LOCK(fp2); 578 fp2->f_flag = FREAD|FWRITE; 579 fp2->f_ops = &socketops; 580 fp2->f_type = DTYPE_SOCKET; 581 FILE_UNLOCK(fp2); 582 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 583 fdrop(fp1, td); 584 fdrop(fp2, td); 585 goto done2; 586free4: 587 FILEDESC_LOCK(fdp); 588 if (fdp->fd_ofiles[sv[1]] == fp2) { 589 fdp->fd_ofiles[sv[1]] = NULL; 590 fdunused(fdp, sv[1]); 591 FILEDESC_UNLOCK(fdp); 592 fdrop(fp2, td); 593 } else { 594 FILEDESC_UNLOCK(fdp); 595 } 596 fdrop(fp2, td); 597free3: 598 FILEDESC_LOCK(fdp); 599 if (fdp->fd_ofiles[sv[0]] == fp1) { 600 fdp->fd_ofiles[sv[0]] = NULL; 601 fdunused(fdp, sv[0]); 602 FILEDESC_UNLOCK(fdp); 603 fdrop(fp1, td); 604 } else { 605 FILEDESC_UNLOCK(fdp); 606 } 607 fdrop(fp1, td); 608free2: 609 (void)soclose(so2); 610free1: 611 (void)soclose(so1); 612done2: 613 mtx_unlock(&Giant); 614 return (error); 615} 616 617static int 618sendit(td, s, mp, flags) 619 register struct thread *td; 620 int s; 621 register struct msghdr *mp; 622 int flags; 623{ 624 struct mbuf *control; 625 struct sockaddr *to; 626 int error; 627 628 if (mp->msg_name != NULL) { 629 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 630 if (error) { 631 to = NULL; 632 goto bad; 633 } 634 mp->msg_name = to; 635 } else { 636 to = NULL; 637 } 638 639 if (mp->msg_control) { 640 if (mp->msg_controllen < sizeof(struct cmsghdr) 641#ifdef COMPAT_OLDSOCK 642 && mp->msg_flags != MSG_COMPAT 643#endif 644 ) { 645 error = EINVAL; 646 goto bad; 647 } 648 error = sockargs(&control, mp->msg_control, 649 mp->msg_controllen, MT_CONTROL); 650 if (error) 651 goto bad; 652#ifdef COMPAT_OLDSOCK 653 if (mp->msg_flags == MSG_COMPAT) { 654 register struct cmsghdr *cm; 655 656 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 657 if (control == 0) { 658 error = ENOBUFS; 659 goto bad; 660 } else { 661 cm = mtod(control, struct cmsghdr *); 662 cm->cmsg_len = control->m_len; 663 cm->cmsg_level = SOL_SOCKET; 664 cm->cmsg_type = SCM_RIGHTS; 665 } 666 } 667#endif 668 } else { 669 control = NULL; 670 } 671 672 error = kern_sendit(td, s, mp, flags, control); 673 674bad: 675 if (to) 676 FREE(to, M_SONAME); 677 return (error); 678} 679 680int 681kern_sendit(td, s, mp, flags, control) 682 struct thread *td; 683 int s; 684 struct msghdr *mp; 685 int flags; 686 struct mbuf *control; 687{ 688 struct uio auio; 689 struct iovec *iov; 690 struct socket *so; 691 int i; 692 int len, error; 693#ifdef KTRACE 694 struct iovec *ktriov = NULL; 695 struct uio ktruio; 696 int iovlen; 697#endif 698 699 mtx_lock(&Giant); 700 if ((error = fgetsock(td, s, &so, NULL)) != 0) 701 goto bad2; 702 703#ifdef MAC 704 error = mac_check_socket_send(td->td_ucred, so); 705 if (error) 706 goto bad; 707#endif 708 709 auio.uio_iov = mp->msg_iov; 710 auio.uio_iovcnt = mp->msg_iovlen; 711 auio.uio_segflg = UIO_USERSPACE; 712 auio.uio_rw = UIO_WRITE; 713 auio.uio_td = td; 714 auio.uio_offset = 0; /* XXX */ 715 auio.uio_resid = 0; 716 iov = mp->msg_iov; 717 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 718 if ((auio.uio_resid += iov->iov_len) < 0) { 719 error = EINVAL; 720 goto bad; 721 } 722 } 723#ifdef KTRACE 724 if (KTRPOINT(td, KTR_GENIO)) { 725 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 726 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 727 bcopy(auio.uio_iov, ktriov, iovlen); 728 ktruio = auio; 729 } 730#endif 731 len = auio.uio_resid; 732 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 733 0, control, flags, td); 734 if (error) { 735 if (auio.uio_resid != len && (error == ERESTART || 736 error == EINTR || error == EWOULDBLOCK)) 737 error = 0; 738 /* Generation of SIGPIPE can be controlled per socket */ 739 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 740 PROC_LOCK(td->td_proc); 741 psignal(td->td_proc, SIGPIPE); 742 PROC_UNLOCK(td->td_proc); 743 } 744 } 745 if (error == 0) 746 td->td_retval[0] = len - auio.uio_resid; 747#ifdef KTRACE 748 if (ktriov != NULL) { 749 if (error == 0) { 750 ktruio.uio_iov = ktriov; 751 ktruio.uio_resid = td->td_retval[0]; 752 ktrgenio(s, UIO_WRITE, &ktruio, error); 753 } 754 FREE(ktriov, M_TEMP); 755 } 756#endif 757bad: 758 fputsock(so); 759bad2: 760 mtx_unlock(&Giant); 761 return (error); 762} 763 764/* 765 * MPSAFE 766 */ 767int 768sendto(td, uap) 769 struct thread *td; 770 register struct sendto_args /* { 771 int s; 772 caddr_t buf; 773 size_t len; 774 int flags; 775 caddr_t to; 776 int tolen; 777 } */ *uap; 778{ 779 struct msghdr msg; 780 struct iovec aiov; 781 int error; 782 783 msg.msg_name = uap->to; 784 msg.msg_namelen = uap->tolen; 785 msg.msg_iov = &aiov; 786 msg.msg_iovlen = 1; 787 msg.msg_control = 0; 788#ifdef COMPAT_OLDSOCK 789 msg.msg_flags = 0; 790#endif 791 aiov.iov_base = uap->buf; 792 aiov.iov_len = uap->len; 793 error = sendit(td, uap->s, &msg, uap->flags); 794 return (error); 795} 796 797#ifdef COMPAT_OLDSOCK 798/* 799 * MPSAFE 800 */ 801int 802osend(td, uap) 803 struct thread *td; 804 register struct osend_args /* { 805 int s; 806 caddr_t buf; 807 int len; 808 int flags; 809 } */ *uap; 810{ 811 struct msghdr msg; 812 struct iovec aiov; 813 int error; 814 815 msg.msg_name = 0; 816 msg.msg_namelen = 0; 817 msg.msg_iov = &aiov; 818 msg.msg_iovlen = 1; 819 aiov.iov_base = uap->buf; 820 aiov.iov_len = uap->len; 821 msg.msg_control = 0; 822 msg.msg_flags = 0; 823 error = sendit(td, uap->s, &msg, uap->flags); 824 return (error); 825} 826 827/* 828 * MPSAFE 829 */ 830int 831osendmsg(td, uap) 832 struct thread *td; 833 register struct osendmsg_args /* { 834 int s; 835 caddr_t msg; 836 int flags; 837 } */ *uap; 838{ 839 struct msghdr msg; 840 struct iovec aiov[UIO_SMALLIOV], *iov; 841 int error; 842 843 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 844 if (error) 845 goto done2; 846 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 847 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 848 error = EMSGSIZE; 849 goto done2; 850 } 851 MALLOC(iov, struct iovec *, 852 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 853 M_WAITOK); 854 } else { 855 iov = aiov; 856 } 857 error = copyin(msg.msg_iov, iov, 858 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 859 if (error) 860 goto done; 861 msg.msg_flags = MSG_COMPAT; 862 msg.msg_iov = iov; 863 error = sendit(td, uap->s, &msg, uap->flags); 864done: 865 if (iov != aiov) 866 FREE(iov, M_IOV); 867done2: 868 return (error); 869} 870#endif 871 872/* 873 * MPSAFE 874 */ 875int 876sendmsg(td, uap) 877 struct thread *td; 878 register struct sendmsg_args /* { 879 int s; 880 caddr_t msg; 881 int flags; 882 } */ *uap; 883{ 884 struct msghdr msg; 885 struct iovec aiov[UIO_SMALLIOV], *iov; 886 int error; 887 888 error = copyin(uap->msg, &msg, sizeof (msg)); 889 if (error) 890 goto done2; 891 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 892 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 893 error = EMSGSIZE; 894 goto done2; 895 } 896 MALLOC(iov, struct iovec *, 897 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 898 M_WAITOK); 899 } else { 900 iov = aiov; 901 } 902 if (msg.msg_iovlen && 903 (error = copyin(msg.msg_iov, iov, 904 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 905 goto done; 906 msg.msg_iov = iov; 907#ifdef COMPAT_OLDSOCK 908 msg.msg_flags = 0; 909#endif 910 error = sendit(td, uap->s, &msg, uap->flags); 911done: 912 if (iov != aiov) 913 FREE(iov, M_IOV); 914done2: 915 return (error); 916} 917 918static int 919recvit(td, s, mp, namelenp) 920 register struct thread *td; 921 int s; 922 register struct msghdr *mp; 923 void *namelenp; 924{ 925 struct uio auio; 926 register struct iovec *iov; 927 register int i; 928 socklen_t len; 929 int error; 930 struct mbuf *m, *control = 0; 931 caddr_t ctlbuf; 932 struct socket *so; 933 struct sockaddr *fromsa = 0; 934#ifdef KTRACE 935 struct iovec *ktriov = NULL; 936 struct uio ktruio; 937 int iovlen; 938#endif 939 940 mtx_lock(&Giant); 941 if ((error = fgetsock(td, s, &so, NULL)) != 0) { 942 mtx_unlock(&Giant); 943 return (error); 944 } 945 946#ifdef MAC 947 error = mac_check_socket_receive(td->td_ucred, so); 948 if (error) { 949 fputsock(so); 950 mtx_unlock(&Giant); 951 return (error); 952 } 953#endif 954 955 auio.uio_iov = mp->msg_iov; 956 auio.uio_iovcnt = mp->msg_iovlen; 957 auio.uio_segflg = UIO_USERSPACE; 958 auio.uio_rw = UIO_READ; 959 auio.uio_td = td; 960 auio.uio_offset = 0; /* XXX */ 961 auio.uio_resid = 0; 962 iov = mp->msg_iov; 963 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 964 if ((auio.uio_resid += iov->iov_len) < 0) { 965 fputsock(so); 966 return (EINVAL); 967 } 968 } 969#ifdef KTRACE 970 if (KTRPOINT(td, KTR_GENIO)) { 971 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 972 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 973 bcopy(auio.uio_iov, ktriov, iovlen); 974 ktruio = auio; 975 } 976#endif 977 len = auio.uio_resid; 978 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 979 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 980 &mp->msg_flags); 981 if (error) { 982 if (auio.uio_resid != (int)len && (error == ERESTART || 983 error == EINTR || error == EWOULDBLOCK)) 984 error = 0; 985 } 986#ifdef KTRACE 987 if (ktriov != NULL) { 988 if (error == 0) { 989 ktruio.uio_iov = ktriov; 990 ktruio.uio_resid = (int)len - auio.uio_resid; 991 ktrgenio(s, UIO_READ, &ktruio, error); 992 } 993 FREE(ktriov, M_TEMP); 994 } 995#endif 996 if (error) 997 goto out; 998 td->td_retval[0] = (int)len - auio.uio_resid; 999 if (mp->msg_name) { 1000 len = mp->msg_namelen; 1001 if (len <= 0 || fromsa == 0) 1002 len = 0; 1003 else { 1004 /* save sa_len before it is destroyed by MSG_COMPAT */ 1005 len = MIN(len, fromsa->sa_len); 1006#ifdef COMPAT_OLDSOCK 1007 if (mp->msg_flags & MSG_COMPAT) 1008 ((struct osockaddr *)fromsa)->sa_family = 1009 fromsa->sa_family; 1010#endif 1011 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1012 if (error) 1013 goto out; 1014 } 1015 mp->msg_namelen = len; 1016 if (namelenp && 1017 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 1018#ifdef COMPAT_OLDSOCK 1019 if (mp->msg_flags & MSG_COMPAT) 1020 error = 0; /* old recvfrom didn't check */ 1021 else 1022#endif 1023 goto out; 1024 } 1025 } 1026 if (mp->msg_control) { 1027#ifdef COMPAT_OLDSOCK 1028 /* 1029 * We assume that old recvmsg calls won't receive access 1030 * rights and other control info, esp. as control info 1031 * is always optional and those options didn't exist in 4.3. 1032 * If we receive rights, trim the cmsghdr; anything else 1033 * is tossed. 1034 */ 1035 if (control && mp->msg_flags & MSG_COMPAT) { 1036 if (mtod(control, struct cmsghdr *)->cmsg_level != 1037 SOL_SOCKET || 1038 mtod(control, struct cmsghdr *)->cmsg_type != 1039 SCM_RIGHTS) { 1040 mp->msg_controllen = 0; 1041 goto out; 1042 } 1043 control->m_len -= sizeof (struct cmsghdr); 1044 control->m_data += sizeof (struct cmsghdr); 1045 } 1046#endif 1047 len = mp->msg_controllen; 1048 m = control; 1049 mp->msg_controllen = 0; 1050 ctlbuf = mp->msg_control; 1051 1052 while (m && len > 0) { 1053 unsigned int tocopy; 1054 1055 if (len >= m->m_len) 1056 tocopy = m->m_len; 1057 else { 1058 mp->msg_flags |= MSG_CTRUNC; 1059 tocopy = len; 1060 } 1061 1062 if ((error = copyout(mtod(m, caddr_t), 1063 ctlbuf, tocopy)) != 0) 1064 goto out; 1065 1066 ctlbuf += tocopy; 1067 len -= tocopy; 1068 m = m->m_next; 1069 } 1070 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1071 } 1072out: 1073 fputsock(so); 1074 mtx_unlock(&Giant); 1075 if (fromsa) 1076 FREE(fromsa, M_SONAME); 1077 if (control) 1078 m_freem(control); 1079 return (error); 1080} 1081 1082/* 1083 * MPSAFE 1084 */ 1085int 1086recvfrom(td, uap) 1087 struct thread *td; 1088 register struct recvfrom_args /* { 1089 int s; 1090 caddr_t buf; 1091 size_t len; 1092 int flags; 1093 struct sockaddr * __restrict from; 1094 socklen_t * __restrict fromlenaddr; 1095 } */ *uap; 1096{ 1097 struct msghdr msg; 1098 struct iovec aiov; 1099 int error; 1100 1101 if (uap->fromlenaddr) { 1102 error = copyin(uap->fromlenaddr, 1103 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1104 if (error) 1105 goto done2; 1106 } else { 1107 msg.msg_namelen = 0; 1108 } 1109 msg.msg_name = uap->from; 1110 msg.msg_iov = &aiov; 1111 msg.msg_iovlen = 1; 1112 aiov.iov_base = uap->buf; 1113 aiov.iov_len = uap->len; 1114 msg.msg_control = 0; 1115 msg.msg_flags = uap->flags; 1116 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1117done2: 1118 return(error); 1119} 1120 1121#ifdef COMPAT_OLDSOCK 1122/* 1123 * MPSAFE 1124 */ 1125int 1126orecvfrom(td, uap) 1127 struct thread *td; 1128 struct recvfrom_args *uap; 1129{ 1130 1131 uap->flags |= MSG_COMPAT; 1132 return (recvfrom(td, uap)); 1133} 1134#endif 1135 1136 1137#ifdef COMPAT_OLDSOCK 1138/* 1139 * MPSAFE 1140 */ 1141int 1142orecv(td, uap) 1143 struct thread *td; 1144 register struct orecv_args /* { 1145 int s; 1146 caddr_t buf; 1147 int len; 1148 int flags; 1149 } */ *uap; 1150{ 1151 struct msghdr msg; 1152 struct iovec aiov; 1153 int error; 1154 1155 msg.msg_name = 0; 1156 msg.msg_namelen = 0; 1157 msg.msg_iov = &aiov; 1158 msg.msg_iovlen = 1; 1159 aiov.iov_base = uap->buf; 1160 aiov.iov_len = uap->len; 1161 msg.msg_control = 0; 1162 msg.msg_flags = uap->flags; 1163 error = recvit(td, uap->s, &msg, NULL); 1164 return (error); 1165} 1166 1167/* 1168 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1169 * overlays the new one, missing only the flags, and with the (old) access 1170 * rights where the control fields are now. 1171 * 1172 * MPSAFE 1173 */ 1174int 1175orecvmsg(td, uap) 1176 struct thread *td; 1177 register struct orecvmsg_args /* { 1178 int s; 1179 struct omsghdr *msg; 1180 int flags; 1181 } */ *uap; 1182{ 1183 struct msghdr msg; 1184 struct iovec aiov[UIO_SMALLIOV], *iov; 1185 int error; 1186 1187 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1188 if (error) 1189 return (error); 1190 1191 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1192 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1193 error = EMSGSIZE; 1194 goto done2; 1195 } 1196 MALLOC(iov, struct iovec *, 1197 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1198 M_WAITOK); 1199 } else { 1200 iov = aiov; 1201 } 1202 msg.msg_flags = uap->flags | MSG_COMPAT; 1203 error = copyin(msg.msg_iov, iov, 1204 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1205 if (error) 1206 goto done; 1207 msg.msg_iov = iov; 1208 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1209 1210 if (msg.msg_controllen && error == 0) 1211 error = copyout(&msg.msg_controllen, 1212 &uap->msg->msg_accrightslen, sizeof (int)); 1213done: 1214 if (iov != aiov) 1215 FREE(iov, M_IOV); 1216done2: 1217 return (error); 1218} 1219#endif 1220 1221/* 1222 * MPSAFE 1223 */ 1224int 1225recvmsg(td, uap) 1226 struct thread *td; 1227 register struct recvmsg_args /* { 1228 int s; 1229 struct msghdr *msg; 1230 int flags; 1231 } */ *uap; 1232{ 1233 struct msghdr msg; 1234 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1235 register int error; 1236 1237 error = copyin(uap->msg, &msg, sizeof (msg)); 1238 if (error) 1239 goto done2; 1240 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1241 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1242 error = EMSGSIZE; 1243 goto done2; 1244 } 1245 MALLOC(iov, struct iovec *, 1246 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1247 M_WAITOK); 1248 } else { 1249 iov = aiov; 1250 } 1251#ifdef COMPAT_OLDSOCK 1252 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1253#else 1254 msg.msg_flags = uap->flags; 1255#endif 1256 uiov = msg.msg_iov; 1257 msg.msg_iov = iov; 1258 error = copyin(uiov, iov, 1259 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1260 if (error) 1261 goto done; 1262 error = recvit(td, uap->s, &msg, NULL); 1263 if (!error) { 1264 msg.msg_iov = uiov; 1265 error = copyout(&msg, uap->msg, sizeof(msg)); 1266 } 1267done: 1268 if (iov != aiov) 1269 FREE(iov, M_IOV); 1270done2: 1271 return (error); 1272} 1273 1274/* 1275 * MPSAFE 1276 */ 1277/* ARGSUSED */ 1278int 1279shutdown(td, uap) 1280 struct thread *td; 1281 register struct shutdown_args /* { 1282 int s; 1283 int how; 1284 } */ *uap; 1285{ 1286 struct socket *so; 1287 int error; 1288 1289 mtx_lock(&Giant); 1290 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1291 error = soshutdown(so, uap->how); 1292 fputsock(so); 1293 } 1294 mtx_unlock(&Giant); 1295 return(error); 1296} 1297 1298/* 1299 * MPSAFE 1300 */ 1301/* ARGSUSED */ 1302int 1303setsockopt(td, uap) 1304 struct thread *td; 1305 register struct setsockopt_args /* { 1306 int s; 1307 int level; 1308 int name; 1309 caddr_t val; 1310 int valsize; 1311 } */ *uap; 1312{ 1313 struct socket *so; 1314 struct sockopt sopt; 1315 int error; 1316 1317 if (uap->val == 0 && uap->valsize != 0) 1318 return (EFAULT); 1319 if (uap->valsize < 0) 1320 return (EINVAL); 1321 1322 mtx_lock(&Giant); 1323 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1324 sopt.sopt_dir = SOPT_SET; 1325 sopt.sopt_level = uap->level; 1326 sopt.sopt_name = uap->name; 1327 sopt.sopt_val = uap->val; 1328 sopt.sopt_valsize = uap->valsize; 1329 sopt.sopt_td = td; 1330 error = sosetopt(so, &sopt); 1331 fputsock(so); 1332 } 1333 mtx_unlock(&Giant); 1334 return(error); 1335} 1336 1337/* 1338 * MPSAFE 1339 */ 1340/* ARGSUSED */ 1341int 1342getsockopt(td, uap) 1343 struct thread *td; 1344 register struct getsockopt_args /* { 1345 int s; 1346 int level; 1347 int name; 1348 void * __restrict val; 1349 socklen_t * __restrict avalsize; 1350 } */ *uap; 1351{ 1352 socklen_t valsize; 1353 int error; 1354 struct socket *so; 1355 struct sockopt sopt; 1356 1357 mtx_lock(&Giant); 1358 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1359 goto done2; 1360 if (uap->val) { 1361 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1362 if (error) 1363 goto done1; 1364 if (valsize < 0) { 1365 error = EINVAL; 1366 goto done1; 1367 } 1368 } else { 1369 valsize = 0; 1370 } 1371 1372 sopt.sopt_dir = SOPT_GET; 1373 sopt.sopt_level = uap->level; 1374 sopt.sopt_name = uap->name; 1375 sopt.sopt_val = uap->val; 1376 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1377 sopt.sopt_td = td; 1378 1379 error = sogetopt(so, &sopt); 1380 if (error == 0) { 1381 valsize = sopt.sopt_valsize; 1382 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1383 } 1384done1: 1385 fputsock(so); 1386done2: 1387 mtx_unlock(&Giant); 1388 return (error); 1389} 1390 1391/* 1392 * getsockname1() - Get socket name. 1393 * 1394 * MPSAFE 1395 */ 1396/* ARGSUSED */ 1397static int 1398getsockname1(td, uap, compat) 1399 struct thread *td; 1400 register struct getsockname_args /* { 1401 int fdes; 1402 struct sockaddr * __restrict asa; 1403 socklen_t * __restrict alen; 1404 } */ *uap; 1405 int compat; 1406{ 1407 struct socket *so; 1408 struct sockaddr *sa; 1409 socklen_t len; 1410 int error; 1411 1412 mtx_lock(&Giant); 1413 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1414 goto done2; 1415 error = copyin(uap->alen, &len, sizeof (len)); 1416 if (error) 1417 goto done1; 1418 if (len < 0) { 1419 error = EINVAL; 1420 goto done1; 1421 } 1422 sa = 0; 1423 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1424 if (error) 1425 goto bad; 1426 if (sa == 0) { 1427 len = 0; 1428 goto gotnothing; 1429 } 1430 1431 len = MIN(len, sa->sa_len); 1432#ifdef COMPAT_OLDSOCK 1433 if (compat) 1434 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1435#endif 1436 error = copyout(sa, uap->asa, (u_int)len); 1437 if (error == 0) 1438gotnothing: 1439 error = copyout(&len, uap->alen, sizeof (len)); 1440bad: 1441 if (sa) 1442 FREE(sa, M_SONAME); 1443done1: 1444 fputsock(so); 1445done2: 1446 mtx_unlock(&Giant); 1447 return (error); 1448} 1449 1450/* 1451 * MPSAFE 1452 */ 1453int 1454getsockname(td, uap) 1455 struct thread *td; 1456 struct getsockname_args *uap; 1457{ 1458 1459 return (getsockname1(td, uap, 0)); 1460} 1461 1462#ifdef COMPAT_OLDSOCK 1463/* 1464 * MPSAFE 1465 */ 1466int 1467ogetsockname(td, uap) 1468 struct thread *td; 1469 struct getsockname_args *uap; 1470{ 1471 1472 return (getsockname1(td, uap, 1)); 1473} 1474#endif /* COMPAT_OLDSOCK */ 1475 1476/* 1477 * getpeername1() - Get name of peer for connected socket. 1478 * 1479 * MPSAFE 1480 */ 1481/* ARGSUSED */ 1482static int 1483getpeername1(td, uap, compat) 1484 struct thread *td; 1485 register struct getpeername_args /* { 1486 int fdes; 1487 struct sockaddr * __restrict asa; 1488 socklen_t * __restrict alen; 1489 } */ *uap; 1490 int compat; 1491{ 1492 struct socket *so; 1493 struct sockaddr *sa; 1494 socklen_t len; 1495 int error; 1496 1497 mtx_lock(&Giant); 1498 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1499 goto done2; 1500 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1501 error = ENOTCONN; 1502 goto done1; 1503 } 1504 error = copyin(uap->alen, &len, sizeof (len)); 1505 if (error) 1506 goto done1; 1507 if (len < 0) { 1508 error = EINVAL; 1509 goto done1; 1510 } 1511 sa = 0; 1512 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1513 if (error) 1514 goto bad; 1515 if (sa == 0) { 1516 len = 0; 1517 goto gotnothing; 1518 } 1519 len = MIN(len, sa->sa_len); 1520#ifdef COMPAT_OLDSOCK 1521 if (compat) 1522 ((struct osockaddr *)sa)->sa_family = 1523 sa->sa_family; 1524#endif 1525 error = copyout(sa, uap->asa, (u_int)len); 1526 if (error) 1527 goto bad; 1528gotnothing: 1529 error = copyout(&len, uap->alen, sizeof (len)); 1530bad: 1531 if (sa) 1532 FREE(sa, M_SONAME); 1533done1: 1534 fputsock(so); 1535done2: 1536 mtx_unlock(&Giant); 1537 return (error); 1538} 1539 1540/* 1541 * MPSAFE 1542 */ 1543int 1544getpeername(td, uap) 1545 struct thread *td; 1546 struct getpeername_args *uap; 1547{ 1548 1549 return (getpeername1(td, uap, 0)); 1550} 1551 1552#ifdef COMPAT_OLDSOCK 1553/* 1554 * MPSAFE 1555 */ 1556int 1557ogetpeername(td, uap) 1558 struct thread *td; 1559 struct ogetpeername_args *uap; 1560{ 1561 1562 /* XXX uap should have type `getpeername_args *' to begin with. */ 1563 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1564} 1565#endif /* COMPAT_OLDSOCK */ 1566 1567int 1568sockargs(mp, buf, buflen, type) 1569 struct mbuf **mp; 1570 caddr_t buf; 1571 int buflen, type; 1572{ 1573 register struct sockaddr *sa; 1574 register struct mbuf *m; 1575 int error; 1576 1577 if ((u_int)buflen > MLEN) { 1578#ifdef COMPAT_OLDSOCK 1579 if (type == MT_SONAME && (u_int)buflen <= 112) 1580 buflen = MLEN; /* unix domain compat. hack */ 1581 else 1582#endif 1583 return (EINVAL); 1584 } 1585 m = m_get(M_TRYWAIT, type); 1586 if (m == NULL) 1587 return (ENOBUFS); 1588 m->m_len = buflen; 1589 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1590 if (error) 1591 (void) m_free(m); 1592 else { 1593 *mp = m; 1594 if (type == MT_SONAME) { 1595 sa = mtod(m, struct sockaddr *); 1596 1597#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1598 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1599 sa->sa_family = sa->sa_len; 1600#endif 1601 sa->sa_len = buflen; 1602 } 1603 } 1604 return (error); 1605} 1606 1607int 1608getsockaddr(namp, uaddr, len) 1609 struct sockaddr **namp; 1610 caddr_t uaddr; 1611 size_t len; 1612{ 1613 struct sockaddr *sa; 1614 int error; 1615 1616 if (len > SOCK_MAXADDRLEN) 1617 return (ENAMETOOLONG); 1618 if (len < offsetof(struct sockaddr, sa_data[0])) 1619 return (EINVAL); 1620 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1621 error = copyin(uaddr, sa, len); 1622 if (error) { 1623 FREE(sa, M_SONAME); 1624 } else { 1625#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1626 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1627 sa->sa_family = sa->sa_len; 1628#endif 1629 sa->sa_len = len; 1630 *namp = sa; 1631 } 1632 return (error); 1633} 1634 1635/* 1636 * sendfile(2) 1637 * 1638 * MPSAFE 1639 * 1640 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1641 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1642 * 1643 * Send a file specified by 'fd' and starting at 'offset' to a socket 1644 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1645 * nbytes == 0. Optionally add a header and/or trailer to the socket 1646 * output. If specified, write the total number of bytes sent into *sbytes. 1647 * 1648 */ 1649int 1650sendfile(struct thread *td, struct sendfile_args *uap) 1651{ 1652 1653 return (do_sendfile(td, uap, 0)); 1654} 1655 1656#ifdef COMPAT_FREEBSD4 1657int 1658freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1659{ 1660 struct sendfile_args args; 1661 1662 args.fd = uap->fd; 1663 args.s = uap->s; 1664 args.offset = uap->offset; 1665 args.nbytes = uap->nbytes; 1666 args.hdtr = uap->hdtr; 1667 args.sbytes = uap->sbytes; 1668 args.flags = uap->flags; 1669 1670 return (do_sendfile(td, &args, 1)); 1671} 1672#endif /* COMPAT_FREEBSD4 */ 1673 1674static int 1675do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1676{ 1677 struct vnode *vp; 1678 struct vm_object *obj; 1679 struct socket *so = NULL; 1680 struct mbuf *m; 1681 struct sf_buf *sf; 1682 struct vm_page *pg; 1683 struct writev_args nuap; 1684 struct sf_hdtr hdtr; 1685 off_t off, xfsize, hdtr_size, sbytes = 0; 1686 int error, s; 1687 1688 mtx_lock(&Giant); 1689 1690 hdtr_size = 0; 1691 1692 /* 1693 * The descriptor must be a regular file and have a backing VM object. 1694 */ 1695 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1696 goto done; 1697 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1698 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1699 error = EINVAL; 1700 VOP_UNLOCK(vp, 0, td); 1701 goto done; 1702 } 1703 VOP_UNLOCK(vp, 0, td); 1704 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1705 goto done; 1706 if (so->so_type != SOCK_STREAM) { 1707 error = EINVAL; 1708 goto done; 1709 } 1710 if ((so->so_state & SS_ISCONNECTED) == 0) { 1711 error = ENOTCONN; 1712 goto done; 1713 } 1714 if (uap->offset < 0) { 1715 error = EINVAL; 1716 goto done; 1717 } 1718 1719#ifdef MAC 1720 error = mac_check_socket_send(td->td_ucred, so); 1721 if (error) 1722 goto done; 1723#endif 1724 1725 /* 1726 * If specified, get the pointer to the sf_hdtr struct for 1727 * any headers/trailers. 1728 */ 1729 if (uap->hdtr != NULL) { 1730 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1731 if (error) 1732 goto done; 1733 /* 1734 * Send any headers. Wimp out and use writev(2). 1735 */ 1736 if (hdtr.headers != NULL) { 1737 nuap.fd = uap->s; 1738 nuap.iovp = hdtr.headers; 1739 nuap.iovcnt = hdtr.hdr_cnt; 1740 error = writev(td, &nuap); 1741 if (error) 1742 goto done; 1743 if (compat) 1744 sbytes += td->td_retval[0]; 1745 else 1746 hdtr_size += td->td_retval[0]; 1747 } 1748 } 1749 1750 /* 1751 * Protect against multiple writers to the socket. 1752 */ 1753 (void) sblock(&so->so_snd, M_WAITOK); 1754 1755 /* 1756 * Loop through the pages in the file, starting with the requested 1757 * offset. Get a file page (do I/O if necessary), map the file page 1758 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1759 * it on the socket. 1760 */ 1761 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1762 vm_pindex_t pindex; 1763 vm_offset_t pgoff; 1764 1765 pindex = OFF_TO_IDX(off); 1766 VM_OBJECT_LOCK(obj); 1767retry_lookup: 1768 /* 1769 * Calculate the amount to transfer. Not to exceed a page, 1770 * the EOF, or the passed in nbytes. 1771 */ 1772 xfsize = obj->un_pager.vnp.vnp_size - off; 1773 VM_OBJECT_UNLOCK(obj); 1774 if (xfsize > PAGE_SIZE) 1775 xfsize = PAGE_SIZE; 1776 pgoff = (vm_offset_t)(off & PAGE_MASK); 1777 if (PAGE_SIZE - pgoff < xfsize) 1778 xfsize = PAGE_SIZE - pgoff; 1779 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1780 xfsize = uap->nbytes - sbytes; 1781 if (xfsize <= 0) 1782 break; 1783 /* 1784 * Optimize the non-blocking case by looking at the socket space 1785 * before going to the extra work of constituting the sf_buf. 1786 */ 1787 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1788 if (so->so_state & SS_CANTSENDMORE) 1789 error = EPIPE; 1790 else 1791 error = EAGAIN; 1792 sbunlock(&so->so_snd); 1793 goto done; 1794 } 1795 VM_OBJECT_LOCK(obj); 1796 /* 1797 * Attempt to look up the page. 1798 * 1799 * Allocate if not found 1800 * 1801 * Wait and loop if busy. 1802 */ 1803 pg = vm_page_lookup(obj, pindex); 1804 1805 if (pg == NULL) { 1806 pg = vm_page_alloc(obj, pindex, 1807 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1808 if (pg == NULL) { 1809 VM_OBJECT_UNLOCK(obj); 1810 VM_WAIT; 1811 VM_OBJECT_LOCK(obj); 1812 goto retry_lookup; 1813 } 1814 vm_page_lock_queues(); 1815 vm_page_wakeup(pg); 1816 } else { 1817 vm_page_lock_queues(); 1818 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1819 goto retry_lookup; 1820 /* 1821 * Wire the page so it does not get ripped out from 1822 * under us. 1823 */ 1824 vm_page_wire(pg); 1825 } 1826 1827 /* 1828 * If page is not valid for what we need, initiate I/O 1829 */ 1830 1831 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) { 1832 int bsize, resid; 1833 1834 /* 1835 * Ensure that our page is still around when the I/O 1836 * completes. 1837 */ 1838 vm_page_io_start(pg); 1839 vm_page_unlock_queues(); 1840 VM_OBJECT_UNLOCK(obj); 1841 1842 /* 1843 * Get the page from backing store. 1844 */ 1845 bsize = vp->v_mount->mnt_stat.f_iosize; 1846 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1847 /* 1848 * XXXMAC: Because we don't have fp->f_cred here, 1849 * we pass in NOCRED. This is probably wrong, but 1850 * is consistent with our original implementation. 1851 */ 1852 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1853 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1854 IO_VMIO | ((MAXBSIZE / bsize) << 16), 1855 td->td_ucred, NOCRED, &resid, td); 1856 VOP_UNLOCK(vp, 0, td); 1857 if (error) 1858 VM_OBJECT_LOCK(obj); 1859 vm_page_lock_queues(); 1860 vm_page_flag_clear(pg, PG_ZERO); 1861 vm_page_io_finish(pg); 1862 if (error) { 1863 vm_page_unwire(pg, 0); 1864 /* 1865 * See if anyone else might know about this page. 1866 * If not and it is not valid, then free it. 1867 */ 1868 if (pg->wire_count == 0 && pg->valid == 0 && 1869 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1870 pg->hold_count == 0) { 1871 vm_page_busy(pg); 1872 vm_page_free(pg); 1873 } 1874 vm_page_unlock_queues(); 1875 VM_OBJECT_UNLOCK(obj); 1876 sbunlock(&so->so_snd); 1877 goto done; 1878 } 1879 mbstat.sf_iocnt++; 1880 } else { 1881 VM_OBJECT_UNLOCK(obj); 1882 } 1883 vm_page_unlock_queues(); 1884 1885 /* 1886 * Get a sendfile buf. We usually wait as long as necessary, 1887 * but this wait can be interrupted. 1888 */ 1889 if ((sf = sf_buf_alloc(pg)) == NULL) { 1890 mbstat.sf_allocfail++; 1891 vm_page_lock_queues(); 1892 vm_page_unwire(pg, 0); 1893 if (pg->wire_count == 0 && pg->object == NULL) 1894 vm_page_free(pg); 1895 vm_page_unlock_queues(); 1896 sbunlock(&so->so_snd); 1897 error = EINTR; 1898 goto done; 1899 } 1900 1901 /* 1902 * Get an mbuf header and set it up as having external storage. 1903 */ 1904 MGETHDR(m, M_TRYWAIT, MT_DATA); 1905 if (m == NULL) { 1906 error = ENOBUFS; 1907 sf_buf_free((void *)sf_buf_kva(sf), sf); 1908 sbunlock(&so->so_snd); 1909 goto done; 1910 } 1911 /* 1912 * Setup external storage for mbuf. 1913 */ 1914 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_free, sf, M_RDONLY, 1915 EXT_SFBUF); 1916 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1917 m->m_pkthdr.len = m->m_len = xfsize; 1918 /* 1919 * Add the buffer to the socket buffer chain. 1920 */ 1921 s = splnet(); 1922retry_space: 1923 /* 1924 * Make sure that the socket is still able to take more data. 1925 * CANTSENDMORE being true usually means that the connection 1926 * was closed. so_error is true when an error was sensed after 1927 * a previous send. 1928 * The state is checked after the page mapping and buffer 1929 * allocation above since those operations may block and make 1930 * any socket checks stale. From this point forward, nothing 1931 * blocks before the pru_send (or more accurately, any blocking 1932 * results in a loop back to here to re-check). 1933 */ 1934 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1935 if (so->so_state & SS_CANTSENDMORE) { 1936 error = EPIPE; 1937 } else { 1938 error = so->so_error; 1939 so->so_error = 0; 1940 } 1941 m_freem(m); 1942 sbunlock(&so->so_snd); 1943 splx(s); 1944 goto done; 1945 } 1946 /* 1947 * Wait for socket space to become available. We do this just 1948 * after checking the connection state above in order to avoid 1949 * a race condition with sbwait(). 1950 */ 1951 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1952 if (so->so_state & SS_NBIO) { 1953 m_freem(m); 1954 sbunlock(&so->so_snd); 1955 splx(s); 1956 error = EAGAIN; 1957 goto done; 1958 } 1959 error = sbwait(&so->so_snd); 1960 /* 1961 * An error from sbwait usually indicates that we've 1962 * been interrupted by a signal. If we've sent anything 1963 * then return bytes sent, otherwise return the error. 1964 */ 1965 if (error) { 1966 m_freem(m); 1967 sbunlock(&so->so_snd); 1968 splx(s); 1969 goto done; 1970 } 1971 goto retry_space; 1972 } 1973 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 1974 splx(s); 1975 if (error) { 1976 sbunlock(&so->so_snd); 1977 goto done; 1978 } 1979 } 1980 sbunlock(&so->so_snd); 1981 1982 /* 1983 * Send trailers. Wimp out and use writev(2). 1984 */ 1985 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 1986 nuap.fd = uap->s; 1987 nuap.iovp = hdtr.trailers; 1988 nuap.iovcnt = hdtr.trl_cnt; 1989 error = writev(td, &nuap); 1990 if (error) 1991 goto done; 1992 if (compat) 1993 sbytes += td->td_retval[0]; 1994 else 1995 hdtr_size += td->td_retval[0]; 1996 } 1997 1998done: 1999 /* 2000 * If there was no error we have to clear td->td_retval[0] 2001 * because it may have been set by writev. 2002 */ 2003 if (error == 0) { 2004 td->td_retval[0] = 0; 2005 } 2006 if (uap->sbytes != NULL) { 2007 if (!compat) 2008 sbytes += hdtr_size; 2009 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2010 } 2011 if (vp) 2012 vrele(vp); 2013 if (so) 2014 fputsock(so); 2015 2016 mtx_unlock(&Giant); 2017 2018 if (error == ERESTART) 2019 error = EINTR; 2020 2021 return (error); 2022} 2023