kern_sendfile.c revision 136047
1/* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 136047 2004-10-02 05:37:47Z alc $"); 37 38#include "opt_compat.h" 39#include "opt_ktrace.h" 40#include "opt_mac.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/lock.h> 46#include <sys/mac.h> 47#include <sys/mutex.h> 48#include <sys/sysproto.h> 49#include <sys/malloc.h> 50#include <sys/filedesc.h> 51#include <sys/event.h> 52#include <sys/proc.h> 53#include <sys/fcntl.h> 54#include <sys/file.h> 55#include <sys/filio.h> 56#include <sys/mount.h> 57#include <sys/mbuf.h> 58#include <sys/protosw.h> 59#include <sys/sf_buf.h> 60#include <sys/socket.h> 61#include <sys/socketvar.h> 62#include <sys/signalvar.h> 63#include <sys/syscallsubr.h> 64#include <sys/sysctl.h> 65#include <sys/uio.h> 66#include <sys/vnode.h> 67#ifdef KTRACE 68#include <sys/ktrace.h> 69#endif 70 71#include <vm/vm.h> 72#include <vm/vm_object.h> 73#include <vm/vm_page.h> 74#include <vm/vm_pageout.h> 75#include <vm/vm_kern.h> 76#include <vm/vm_extern.h> 77 78static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81static int accept1(struct thread *td, struct accept_args *uap, int compat); 82static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88/* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91int nsfbufs; 92int nsfbufspeak; 93int nsfbufsused; 94 95SYSCTL_DECL(_kern_ipc); 96SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 97 "Maximum number of sendfile(2) sf_bufs available"); 98SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 99 "Number of sendfile(2) sf_bufs at peak usage"); 100SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 101 "Number of sendfile(2) sf_bufs in use"); 102 103/* 104 * System call interface to the socket abstraction. 105 */ 106#if defined(COMPAT_43) 107#define COMPAT_OLDSOCK 108#endif 109 110/* 111 * MPSAFE 112 */ 113int 114socket(td, uap) 115 struct thread *td; 116 register struct socket_args /* { 117 int domain; 118 int type; 119 int protocol; 120 } */ *uap; 121{ 122 struct filedesc *fdp; 123 struct socket *so; 124 struct file *fp; 125 int fd, error; 126 127 fdp = td->td_proc->p_fd; 128 error = falloc(td, &fp, &fd); 129 if (error) 130 return (error); 131 /* An extra reference on `fp' has been held for us by falloc(). */ 132 NET_LOCK_GIANT(); 133 error = socreate(uap->domain, &so, uap->type, uap->protocol, 134 td->td_ucred, td); 135 NET_UNLOCK_GIANT(); 136 FILEDESC_LOCK(fdp); 137 if (error) { 138 if (fdp->fd_ofiles[fd] == fp) { 139 fdp->fd_ofiles[fd] = NULL; 140 fdunused(fdp, fd); 141 FILEDESC_UNLOCK(fdp); 142 fdrop(fp, td); 143 } else { 144 FILEDESC_UNLOCK(fdp); 145 } 146 } else { 147 fp->f_data = so; /* already has ref count */ 148 fp->f_flag = FREAD|FWRITE; 149 fp->f_ops = &socketops; 150 fp->f_type = DTYPE_SOCKET; 151 FILEDESC_UNLOCK(fdp); 152 td->td_retval[0] = fd; 153 } 154 fdrop(fp, td); 155 return (error); 156} 157 158/* 159 * MPSAFE 160 */ 161/* ARGSUSED */ 162int 163bind(td, uap) 164 struct thread *td; 165 register struct bind_args /* { 166 int s; 167 caddr_t name; 168 int namelen; 169 } */ *uap; 170{ 171 struct sockaddr *sa; 172 int error; 173 174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 175 return (error); 176 177 return (kern_bind(td, uap->s, sa)); 178} 179 180int 181kern_bind(td, fd, sa) 182 struct thread *td; 183 int fd; 184 struct sockaddr *sa; 185{ 186 struct socket *so; 187 int error; 188 189 NET_LOCK_GIANT(); 190 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 191 goto done2; 192#ifdef MAC 193 SOCK_LOCK(so); 194 error = mac_check_socket_bind(td->td_ucred, so, sa); 195 SOCK_UNLOCK(so); 196 if (error) 197 goto done1; 198#endif 199 error = sobind(so, sa, td); 200#ifdef MAC 201done1: 202#endif 203 fputsock(so); 204done2: 205 NET_UNLOCK_GIANT(); 206 FREE(sa, M_SONAME); 207 return (error); 208} 209 210/* 211 * MPSAFE 212 */ 213/* ARGSUSED */ 214int 215listen(td, uap) 216 struct thread *td; 217 register struct listen_args /* { 218 int s; 219 int backlog; 220 } */ *uap; 221{ 222 struct socket *so; 223 int error; 224 225 NET_LOCK_GIANT(); 226 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 227#ifdef MAC 228 SOCK_LOCK(so); 229 error = mac_check_socket_listen(td->td_ucred, so); 230 SOCK_UNLOCK(so); 231 if (error) 232 goto done; 233#endif 234 error = solisten(so, uap->backlog, td); 235#ifdef MAC 236done: 237#endif 238 fputsock(so); 239 } 240 NET_UNLOCK_GIANT(); 241 return(error); 242} 243 244/* 245 * accept1() 246 * MPSAFE 247 */ 248static int 249accept1(td, uap, compat) 250 struct thread *td; 251 register struct accept_args /* { 252 int s; 253 struct sockaddr * __restrict name; 254 socklen_t * __restrict anamelen; 255 } */ *uap; 256 int compat; 257{ 258 struct filedesc *fdp; 259 struct file *nfp = NULL; 260 struct sockaddr *sa = NULL; 261 socklen_t namelen; 262 int error; 263 struct socket *head, *so; 264 int fd; 265 u_int fflag; 266 pid_t pgid; 267 int tmp; 268 269 fdp = td->td_proc->p_fd; 270 if (uap->name) { 271 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 272 if(error) 273 return (error); 274 if (namelen < 0) 275 return (EINVAL); 276 } 277 NET_LOCK_GIANT(); 278 error = fgetsock(td, uap->s, &head, &fflag); 279 if (error) 280 goto done2; 281 if ((head->so_options & SO_ACCEPTCONN) == 0) { 282 error = EINVAL; 283 goto done; 284 } 285 error = falloc(td, &nfp, &fd); 286 if (error) 287 goto done; 288 ACCEPT_LOCK(); 289 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 290 ACCEPT_UNLOCK(); 291 error = EWOULDBLOCK; 292 goto noconnection; 293 } 294 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 295 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 296 head->so_error = ECONNABORTED; 297 break; 298 } 299 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 300 "accept", 0); 301 if (error) { 302 ACCEPT_UNLOCK(); 303 goto noconnection; 304 } 305 } 306 if (head->so_error) { 307 error = head->so_error; 308 head->so_error = 0; 309 ACCEPT_UNLOCK(); 310 goto noconnection; 311 } 312 so = TAILQ_FIRST(&head->so_comp); 313 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 314 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 315 316 /* 317 * Before changing the flags on the socket, we have to bump the 318 * reference count. Otherwise, if the protocol calls sofree(), 319 * the socket will be released due to a zero refcount. 320 */ 321 SOCK_LOCK(so); 322 soref(so); /* file descriptor reference */ 323 SOCK_UNLOCK(so); 324 325 TAILQ_REMOVE(&head->so_comp, so, so_list); 326 head->so_qlen--; 327 so->so_state |= (head->so_state & SS_NBIO); 328 so->so_qstate &= ~SQ_COMP; 329 so->so_head = NULL; 330 331 ACCEPT_UNLOCK(); 332 333 /* An extra reference on `nfp' has been held for us by falloc(). */ 334 td->td_retval[0] = fd; 335 336 /* connection has been removed from the listen queue */ 337 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 338 339 pgid = fgetown(&head->so_sigio); 340 if (pgid != 0) 341 fsetown(pgid, &so->so_sigio); 342 343 FILE_LOCK(nfp); 344 nfp->f_data = so; /* nfp has ref count from falloc */ 345 nfp->f_flag = fflag; 346 nfp->f_ops = &socketops; 347 nfp->f_type = DTYPE_SOCKET; 348 FILE_UNLOCK(nfp); 349 /* Sync socket nonblocking/async state with file flags */ 350 tmp = fflag & FNONBLOCK; 351 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 352 tmp = fflag & FASYNC; 353 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 354 sa = 0; 355 error = soaccept(so, &sa); 356 if (error) { 357 /* 358 * return a namelen of zero for older code which might 359 * ignore the return value from accept. 360 */ 361 if (uap->name != NULL) { 362 namelen = 0; 363 (void) copyout(&namelen, 364 uap->anamelen, sizeof(*uap->anamelen)); 365 } 366 goto noconnection; 367 } 368 if (sa == NULL) { 369 namelen = 0; 370 if (uap->name) 371 goto gotnoname; 372 error = 0; 373 goto done; 374 } 375 if (uap->name) { 376 /* check sa_len before it is destroyed */ 377 if (namelen > sa->sa_len) 378 namelen = sa->sa_len; 379#ifdef COMPAT_OLDSOCK 380 if (compat) 381 ((struct osockaddr *)sa)->sa_family = 382 sa->sa_family; 383#endif 384 error = copyout(sa, uap->name, (u_int)namelen); 385 if (!error) 386gotnoname: 387 error = copyout(&namelen, 388 uap->anamelen, sizeof (*uap->anamelen)); 389 } 390noconnection: 391 if (sa) 392 FREE(sa, M_SONAME); 393 394 /* 395 * close the new descriptor, assuming someone hasn't ripped it 396 * out from under us. 397 */ 398 if (error) { 399 FILEDESC_LOCK(fdp); 400 if (fdp->fd_ofiles[fd] == nfp) { 401 fdp->fd_ofiles[fd] = NULL; 402 fdunused(fdp, fd); 403 FILEDESC_UNLOCK(fdp); 404 fdrop(nfp, td); 405 } else { 406 FILEDESC_UNLOCK(fdp); 407 } 408 } 409 410 /* 411 * Release explicitly held references before returning. 412 */ 413done: 414 if (nfp != NULL) 415 fdrop(nfp, td); 416 fputsock(head); 417done2: 418 NET_UNLOCK_GIANT(); 419 return (error); 420} 421 422/* 423 * MPSAFE (accept1() is MPSAFE) 424 */ 425int 426accept(td, uap) 427 struct thread *td; 428 struct accept_args *uap; 429{ 430 431 return (accept1(td, uap, 0)); 432} 433 434#ifdef COMPAT_OLDSOCK 435/* 436 * MPSAFE (accept1() is MPSAFE) 437 */ 438int 439oaccept(td, uap) 440 struct thread *td; 441 struct accept_args *uap; 442{ 443 444 return (accept1(td, uap, 1)); 445} 446#endif /* COMPAT_OLDSOCK */ 447 448/* 449 * MPSAFE 450 */ 451/* ARGSUSED */ 452int 453connect(td, uap) 454 struct thread *td; 455 register struct connect_args /* { 456 int s; 457 caddr_t name; 458 int namelen; 459 } */ *uap; 460{ 461 struct sockaddr *sa; 462 int error; 463 464 error = getsockaddr(&sa, uap->name, uap->namelen); 465 if (error) 466 return (error); 467 468 return (kern_connect(td, uap->s, sa)); 469} 470 471 472int 473kern_connect(td, fd, sa) 474 struct thread *td; 475 int fd; 476 struct sockaddr *sa; 477{ 478 struct socket *so; 479 int error, s; 480 int interrupted = 0; 481 482 NET_LOCK_GIANT(); 483 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 484 goto done2; 485 if (so->so_state & SS_ISCONNECTING) { 486 error = EALREADY; 487 goto done1; 488 } 489#ifdef MAC 490 SOCK_LOCK(so); 491 error = mac_check_socket_connect(td->td_ucred, so, sa); 492 SOCK_UNLOCK(so); 493 if (error) 494 goto bad; 495#endif 496 error = soconnect(so, sa, td); 497 if (error) 498 goto bad; 499 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 500 error = EINPROGRESS; 501 goto done1; 502 } 503 s = splnet(); 504 SOCK_LOCK(so); 505 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 506 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 507 "connec", 0); 508 if (error) { 509 if (error == EINTR || error == ERESTART) 510 interrupted = 1; 511 break; 512 } 513 } 514 if (error == 0) { 515 error = so->so_error; 516 so->so_error = 0; 517 } 518 SOCK_UNLOCK(so); 519 splx(s); 520bad: 521 if (!interrupted) 522 so->so_state &= ~SS_ISCONNECTING; 523 if (error == ERESTART) 524 error = EINTR; 525done1: 526 fputsock(so); 527done2: 528 NET_UNLOCK_GIANT(); 529 FREE(sa, M_SONAME); 530 return (error); 531} 532 533/* 534 * MPSAFE 535 */ 536int 537socketpair(td, uap) 538 struct thread *td; 539 register struct socketpair_args /* { 540 int domain; 541 int type; 542 int protocol; 543 int *rsv; 544 } */ *uap; 545{ 546 register struct filedesc *fdp = td->td_proc->p_fd; 547 struct file *fp1, *fp2; 548 struct socket *so1, *so2; 549 int fd, error, sv[2]; 550 551 NET_LOCK_GIANT(); 552 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 553 td->td_ucred, td); 554 if (error) 555 goto done2; 556 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 557 td->td_ucred, td); 558 if (error) 559 goto free1; 560 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 561 error = falloc(td, &fp1, &fd); 562 if (error) 563 goto free2; 564 sv[0] = fd; 565 fp1->f_data = so1; /* so1 already has ref count */ 566 error = falloc(td, &fp2, &fd); 567 if (error) 568 goto free3; 569 fp2->f_data = so2; /* so2 already has ref count */ 570 sv[1] = fd; 571 error = soconnect2(so1, so2); 572 if (error) 573 goto free4; 574 if (uap->type == SOCK_DGRAM) { 575 /* 576 * Datagram socket connection is asymmetric. 577 */ 578 error = soconnect2(so2, so1); 579 if (error) 580 goto free4; 581 } 582 FILE_LOCK(fp1); 583 fp1->f_flag = FREAD|FWRITE; 584 fp1->f_ops = &socketops; 585 fp1->f_type = DTYPE_SOCKET; 586 FILE_UNLOCK(fp1); 587 FILE_LOCK(fp2); 588 fp2->f_flag = FREAD|FWRITE; 589 fp2->f_ops = &socketops; 590 fp2->f_type = DTYPE_SOCKET; 591 FILE_UNLOCK(fp2); 592 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 593 fdrop(fp1, td); 594 fdrop(fp2, td); 595 goto done2; 596free4: 597 FILEDESC_LOCK(fdp); 598 if (fdp->fd_ofiles[sv[1]] == fp2) { 599 fdp->fd_ofiles[sv[1]] = NULL; 600 fdunused(fdp, sv[1]); 601 FILEDESC_UNLOCK(fdp); 602 fdrop(fp2, td); 603 } else { 604 FILEDESC_UNLOCK(fdp); 605 } 606 fdrop(fp2, td); 607free3: 608 FILEDESC_LOCK(fdp); 609 if (fdp->fd_ofiles[sv[0]] == fp1) { 610 fdp->fd_ofiles[sv[0]] = NULL; 611 fdunused(fdp, sv[0]); 612 FILEDESC_UNLOCK(fdp); 613 fdrop(fp1, td); 614 } else { 615 FILEDESC_UNLOCK(fdp); 616 } 617 fdrop(fp1, td); 618free2: 619 (void)soclose(so2); 620free1: 621 (void)soclose(so1); 622done2: 623 NET_UNLOCK_GIANT(); 624 return (error); 625} 626 627static int 628sendit(td, s, mp, flags) 629 register struct thread *td; 630 int s; 631 register struct msghdr *mp; 632 int flags; 633{ 634 struct mbuf *control; 635 struct sockaddr *to; 636 int error; 637 638 if (mp->msg_name != NULL) { 639 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 640 if (error) { 641 to = NULL; 642 goto bad; 643 } 644 mp->msg_name = to; 645 } else { 646 to = NULL; 647 } 648 649 if (mp->msg_control) { 650 if (mp->msg_controllen < sizeof(struct cmsghdr) 651#ifdef COMPAT_OLDSOCK 652 && mp->msg_flags != MSG_COMPAT 653#endif 654 ) { 655 error = EINVAL; 656 goto bad; 657 } 658 error = sockargs(&control, mp->msg_control, 659 mp->msg_controllen, MT_CONTROL); 660 if (error) 661 goto bad; 662#ifdef COMPAT_OLDSOCK 663 if (mp->msg_flags == MSG_COMPAT) { 664 register struct cmsghdr *cm; 665 666 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 667 if (control == 0) { 668 error = ENOBUFS; 669 goto bad; 670 } else { 671 cm = mtod(control, struct cmsghdr *); 672 cm->cmsg_len = control->m_len; 673 cm->cmsg_level = SOL_SOCKET; 674 cm->cmsg_type = SCM_RIGHTS; 675 } 676 } 677#endif 678 } else { 679 control = NULL; 680 } 681 682 error = kern_sendit(td, s, mp, flags, control); 683 684bad: 685 if (to) 686 FREE(to, M_SONAME); 687 return (error); 688} 689 690int 691kern_sendit(td, s, mp, flags, control) 692 struct thread *td; 693 int s; 694 struct msghdr *mp; 695 int flags; 696 struct mbuf *control; 697{ 698 struct uio auio; 699 struct iovec *iov; 700 struct socket *so; 701 int i; 702 int len, error; 703#ifdef KTRACE 704 struct uio *ktruio = NULL; 705#endif 706 707 NET_LOCK_GIANT(); 708 if ((error = fgetsock(td, s, &so, NULL)) != 0) 709 goto bad2; 710 711#ifdef MAC 712 SOCK_LOCK(so); 713 error = mac_check_socket_send(td->td_ucred, so); 714 SOCK_UNLOCK(so); 715 if (error) 716 goto bad; 717#endif 718 719 auio.uio_iov = mp->msg_iov; 720 auio.uio_iovcnt = mp->msg_iovlen; 721 auio.uio_segflg = UIO_USERSPACE; 722 auio.uio_rw = UIO_WRITE; 723 auio.uio_td = td; 724 auio.uio_offset = 0; /* XXX */ 725 auio.uio_resid = 0; 726 iov = mp->msg_iov; 727 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 728 if ((auio.uio_resid += iov->iov_len) < 0) { 729 error = EINVAL; 730 goto bad; 731 } 732 } 733#ifdef KTRACE 734 if (KTRPOINT(td, KTR_GENIO)) 735 ktruio = cloneuio(&auio); 736#endif 737 len = auio.uio_resid; 738 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 739 0, control, flags, td); 740 if (error) { 741 if (auio.uio_resid != len && (error == ERESTART || 742 error == EINTR || error == EWOULDBLOCK)) 743 error = 0; 744 /* Generation of SIGPIPE can be controlled per socket */ 745 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 746 PROC_LOCK(td->td_proc); 747 psignal(td->td_proc, SIGPIPE); 748 PROC_UNLOCK(td->td_proc); 749 } 750 } 751 if (error == 0) 752 td->td_retval[0] = len - auio.uio_resid; 753#ifdef KTRACE 754 if (ktruio != NULL) { 755 ktruio->uio_resid = td->td_retval[0]; 756 ktrgenio(s, UIO_WRITE, ktruio, error); 757 } 758#endif 759bad: 760 fputsock(so); 761bad2: 762 NET_UNLOCK_GIANT(); 763 return (error); 764} 765 766/* 767 * MPSAFE 768 */ 769int 770sendto(td, uap) 771 struct thread *td; 772 register struct sendto_args /* { 773 int s; 774 caddr_t buf; 775 size_t len; 776 int flags; 777 caddr_t to; 778 int tolen; 779 } */ *uap; 780{ 781 struct msghdr msg; 782 struct iovec aiov; 783 int error; 784 785 msg.msg_name = uap->to; 786 msg.msg_namelen = uap->tolen; 787 msg.msg_iov = &aiov; 788 msg.msg_iovlen = 1; 789 msg.msg_control = 0; 790#ifdef COMPAT_OLDSOCK 791 msg.msg_flags = 0; 792#endif 793 aiov.iov_base = uap->buf; 794 aiov.iov_len = uap->len; 795 error = sendit(td, uap->s, &msg, uap->flags); 796 return (error); 797} 798 799#ifdef COMPAT_OLDSOCK 800/* 801 * MPSAFE 802 */ 803int 804osend(td, uap) 805 struct thread *td; 806 register struct osend_args /* { 807 int s; 808 caddr_t buf; 809 int len; 810 int flags; 811 } */ *uap; 812{ 813 struct msghdr msg; 814 struct iovec aiov; 815 int error; 816 817 msg.msg_name = 0; 818 msg.msg_namelen = 0; 819 msg.msg_iov = &aiov; 820 msg.msg_iovlen = 1; 821 aiov.iov_base = uap->buf; 822 aiov.iov_len = uap->len; 823 msg.msg_control = 0; 824 msg.msg_flags = 0; 825 error = sendit(td, uap->s, &msg, uap->flags); 826 return (error); 827} 828 829/* 830 * MPSAFE 831 */ 832int 833osendmsg(td, uap) 834 struct thread *td; 835 struct osendmsg_args /* { 836 int s; 837 caddr_t msg; 838 int flags; 839 } */ *uap; 840{ 841 struct msghdr msg; 842 struct iovec *iov; 843 int error; 844 845 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 846 if (error) 847 return (error); 848 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 849 if (error) 850 return (error); 851 msg.msg_iov = iov; 852 msg.msg_flags = MSG_COMPAT; 853 error = sendit(td, uap->s, &msg, uap->flags); 854 free(iov, M_IOV); 855 return (error); 856} 857#endif 858 859/* 860 * MPSAFE 861 */ 862int 863sendmsg(td, uap) 864 struct thread *td; 865 struct sendmsg_args /* { 866 int s; 867 caddr_t msg; 868 int flags; 869 } */ *uap; 870{ 871 struct msghdr msg; 872 struct iovec *iov; 873 int error; 874 875 error = copyin(uap->msg, &msg, sizeof (msg)); 876 if (error) 877 return (error); 878 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 879 if (error) 880 return (error); 881 msg.msg_iov = iov; 882#ifdef COMPAT_OLDSOCK 883 msg.msg_flags = 0; 884#endif 885 error = sendit(td, uap->s, &msg, uap->flags); 886 free(iov, M_IOV); 887 return (error); 888} 889 890static int 891recvit(td, s, mp, namelenp) 892 struct thread *td; 893 int s; 894 struct msghdr *mp; 895 void *namelenp; 896{ 897 struct uio auio; 898 struct iovec *iov; 899 int i; 900 socklen_t len; 901 int error; 902 struct mbuf *m, *control = 0; 903 caddr_t ctlbuf; 904 struct socket *so; 905 struct sockaddr *fromsa = 0; 906#ifdef KTRACE 907 struct uio *ktruio = NULL; 908#endif 909 910 NET_LOCK_GIANT(); 911 if ((error = fgetsock(td, s, &so, NULL)) != 0) { 912 NET_UNLOCK_GIANT(); 913 return (error); 914 } 915 916#ifdef MAC 917 SOCK_LOCK(so); 918 error = mac_check_socket_receive(td->td_ucred, so); 919 SOCK_UNLOCK(so); 920 if (error) { 921 fputsock(so); 922 NET_UNLOCK_GIANT(); 923 return (error); 924 } 925#endif 926 927 auio.uio_iov = mp->msg_iov; 928 auio.uio_iovcnt = mp->msg_iovlen; 929 auio.uio_segflg = UIO_USERSPACE; 930 auio.uio_rw = UIO_READ; 931 auio.uio_td = td; 932 auio.uio_offset = 0; /* XXX */ 933 auio.uio_resid = 0; 934 iov = mp->msg_iov; 935 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 936 if ((auio.uio_resid += iov->iov_len) < 0) { 937 fputsock(so); 938 NET_UNLOCK_GIANT(); 939 return (EINVAL); 940 } 941 } 942#ifdef KTRACE 943 if (KTRPOINT(td, KTR_GENIO)) 944 ktruio = cloneuio(&auio); 945#endif 946 len = auio.uio_resid; 947 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 948 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 949 &mp->msg_flags); 950 if (error) { 951 if (auio.uio_resid != (int)len && (error == ERESTART || 952 error == EINTR || error == EWOULDBLOCK)) 953 error = 0; 954 } 955#ifdef KTRACE 956 if (ktruio != NULL) { 957 ktruio->uio_resid = (int)len - auio.uio_resid; 958 ktrgenio(s, UIO_READ, ktruio, error); 959 } 960#endif 961 if (error) 962 goto out; 963 td->td_retval[0] = (int)len - auio.uio_resid; 964 if (mp->msg_name) { 965 len = mp->msg_namelen; 966 if (len <= 0 || fromsa == 0) 967 len = 0; 968 else { 969 /* save sa_len before it is destroyed by MSG_COMPAT */ 970 len = MIN(len, fromsa->sa_len); 971#ifdef COMPAT_OLDSOCK 972 if (mp->msg_flags & MSG_COMPAT) 973 ((struct osockaddr *)fromsa)->sa_family = 974 fromsa->sa_family; 975#endif 976 error = copyout(fromsa, mp->msg_name, (unsigned)len); 977 if (error) 978 goto out; 979 } 980 mp->msg_namelen = len; 981 if (namelenp && 982 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 983#ifdef COMPAT_OLDSOCK 984 if (mp->msg_flags & MSG_COMPAT) 985 error = 0; /* old recvfrom didn't check */ 986 else 987#endif 988 goto out; 989 } 990 } 991 if (mp->msg_control) { 992#ifdef COMPAT_OLDSOCK 993 /* 994 * We assume that old recvmsg calls won't receive access 995 * rights and other control info, esp. as control info 996 * is always optional and those options didn't exist in 4.3. 997 * If we receive rights, trim the cmsghdr; anything else 998 * is tossed. 999 */ 1000 if (control && mp->msg_flags & MSG_COMPAT) { 1001 if (mtod(control, struct cmsghdr *)->cmsg_level != 1002 SOL_SOCKET || 1003 mtod(control, struct cmsghdr *)->cmsg_type != 1004 SCM_RIGHTS) { 1005 mp->msg_controllen = 0; 1006 goto out; 1007 } 1008 control->m_len -= sizeof (struct cmsghdr); 1009 control->m_data += sizeof (struct cmsghdr); 1010 } 1011#endif 1012 len = mp->msg_controllen; 1013 m = control; 1014 mp->msg_controllen = 0; 1015 ctlbuf = mp->msg_control; 1016 1017 while (m && len > 0) { 1018 unsigned int tocopy; 1019 1020 if (len >= m->m_len) 1021 tocopy = m->m_len; 1022 else { 1023 mp->msg_flags |= MSG_CTRUNC; 1024 tocopy = len; 1025 } 1026 1027 if ((error = copyout(mtod(m, caddr_t), 1028 ctlbuf, tocopy)) != 0) 1029 goto out; 1030 1031 ctlbuf += tocopy; 1032 len -= tocopy; 1033 m = m->m_next; 1034 } 1035 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1036 } 1037out: 1038 fputsock(so); 1039 NET_UNLOCK_GIANT(); 1040 if (fromsa) 1041 FREE(fromsa, M_SONAME); 1042 if (control) 1043 m_freem(control); 1044 return (error); 1045} 1046 1047/* 1048 * MPSAFE 1049 */ 1050int 1051recvfrom(td, uap) 1052 struct thread *td; 1053 register struct recvfrom_args /* { 1054 int s; 1055 caddr_t buf; 1056 size_t len; 1057 int flags; 1058 struct sockaddr * __restrict from; 1059 socklen_t * __restrict fromlenaddr; 1060 } */ *uap; 1061{ 1062 struct msghdr msg; 1063 struct iovec aiov; 1064 int error; 1065 1066 if (uap->fromlenaddr) { 1067 error = copyin(uap->fromlenaddr, 1068 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1069 if (error) 1070 goto done2; 1071 } else { 1072 msg.msg_namelen = 0; 1073 } 1074 msg.msg_name = uap->from; 1075 msg.msg_iov = &aiov; 1076 msg.msg_iovlen = 1; 1077 aiov.iov_base = uap->buf; 1078 aiov.iov_len = uap->len; 1079 msg.msg_control = 0; 1080 msg.msg_flags = uap->flags; 1081 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1082done2: 1083 return(error); 1084} 1085 1086#ifdef COMPAT_OLDSOCK 1087/* 1088 * MPSAFE 1089 */ 1090int 1091orecvfrom(td, uap) 1092 struct thread *td; 1093 struct recvfrom_args *uap; 1094{ 1095 1096 uap->flags |= MSG_COMPAT; 1097 return (recvfrom(td, uap)); 1098} 1099#endif 1100 1101 1102#ifdef COMPAT_OLDSOCK 1103/* 1104 * MPSAFE 1105 */ 1106int 1107orecv(td, uap) 1108 struct thread *td; 1109 register struct orecv_args /* { 1110 int s; 1111 caddr_t buf; 1112 int len; 1113 int flags; 1114 } */ *uap; 1115{ 1116 struct msghdr msg; 1117 struct iovec aiov; 1118 int error; 1119 1120 msg.msg_name = 0; 1121 msg.msg_namelen = 0; 1122 msg.msg_iov = &aiov; 1123 msg.msg_iovlen = 1; 1124 aiov.iov_base = uap->buf; 1125 aiov.iov_len = uap->len; 1126 msg.msg_control = 0; 1127 msg.msg_flags = uap->flags; 1128 error = recvit(td, uap->s, &msg, NULL); 1129 return (error); 1130} 1131 1132/* 1133 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1134 * overlays the new one, missing only the flags, and with the (old) access 1135 * rights where the control fields are now. 1136 * 1137 * MPSAFE 1138 */ 1139int 1140orecvmsg(td, uap) 1141 struct thread *td; 1142 struct orecvmsg_args /* { 1143 int s; 1144 struct omsghdr *msg; 1145 int flags; 1146 } */ *uap; 1147{ 1148 struct msghdr msg; 1149 struct iovec *iov; 1150 int error; 1151 1152 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1153 if (error) 1154 return (error); 1155 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1156 if (error) 1157 return (error); 1158 msg.msg_flags = uap->flags | MSG_COMPAT; 1159 msg.msg_iov = iov; 1160 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1161 if (msg.msg_controllen && error == 0) 1162 error = copyout(&msg.msg_controllen, 1163 &uap->msg->msg_accrightslen, sizeof (int)); 1164 free(iov, M_IOV); 1165 return (error); 1166} 1167#endif 1168 1169/* 1170 * MPSAFE 1171 */ 1172int 1173recvmsg(td, uap) 1174 struct thread *td; 1175 struct recvmsg_args /* { 1176 int s; 1177 struct msghdr *msg; 1178 int flags; 1179 } */ *uap; 1180{ 1181 struct msghdr msg; 1182 struct iovec *uiov, *iov; 1183 int error; 1184 1185 error = copyin(uap->msg, &msg, sizeof (msg)); 1186 if (error) 1187 return (error); 1188 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1189 if (error) 1190 return (error); 1191 msg.msg_flags = uap->flags; 1192#ifdef COMPAT_OLDSOCK 1193 msg.msg_flags &= ~MSG_COMPAT; 1194#endif 1195 uiov = msg.msg_iov; 1196 msg.msg_iov = iov; 1197 error = recvit(td, uap->s, &msg, NULL); 1198 if (error == 0) { 1199 msg.msg_iov = uiov; 1200 error = copyout(&msg, uap->msg, sizeof(msg)); 1201 } 1202 free(iov, M_IOV); 1203 return (error); 1204} 1205 1206/* 1207 * MPSAFE 1208 */ 1209/* ARGSUSED */ 1210int 1211shutdown(td, uap) 1212 struct thread *td; 1213 register struct shutdown_args /* { 1214 int s; 1215 int how; 1216 } */ *uap; 1217{ 1218 struct socket *so; 1219 int error; 1220 1221 NET_LOCK_GIANT(); 1222 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1223 error = soshutdown(so, uap->how); 1224 fputsock(so); 1225 } 1226 NET_UNLOCK_GIANT(); 1227 return(error); 1228} 1229 1230/* 1231 * MPSAFE 1232 */ 1233/* ARGSUSED */ 1234int 1235setsockopt(td, uap) 1236 struct thread *td; 1237 register struct setsockopt_args /* { 1238 int s; 1239 int level; 1240 int name; 1241 caddr_t val; 1242 int valsize; 1243 } */ *uap; 1244{ 1245 1246 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1247 uap->val, UIO_USERSPACE, uap->valsize)); 1248} 1249 1250int 1251kern_setsockopt(td, s, level, name, val, valseg, valsize) 1252 struct thread *td; 1253 int s; 1254 int level; 1255 int name; 1256 void *val; 1257 enum uio_seg valseg; 1258 socklen_t valsize; 1259{ 1260 int error; 1261 struct socket *so; 1262 struct sockopt sopt; 1263 1264 if (val == NULL && valsize != 0) 1265 return (EFAULT); 1266 if (valsize < 0) 1267 return (EINVAL); 1268 1269 sopt.sopt_dir = SOPT_SET; 1270 sopt.sopt_level = level; 1271 sopt.sopt_name = name; 1272 sopt.sopt_val = val; 1273 sopt.sopt_valsize = valsize; 1274 switch (valseg) { 1275 case UIO_USERSPACE: 1276 sopt.sopt_td = td; 1277 break; 1278 case UIO_SYSSPACE: 1279 sopt.sopt_td = NULL; 1280 break; 1281 default: 1282 panic("kern_setsockopt called with bad valseg"); 1283 } 1284 1285 NET_LOCK_GIANT(); 1286 if ((error = fgetsock(td, s, &so, NULL)) == 0) { 1287 error = sosetopt(so, &sopt); 1288 fputsock(so); 1289 } 1290 NET_UNLOCK_GIANT(); 1291 return(error); 1292} 1293 1294/* 1295 * MPSAFE 1296 */ 1297/* ARGSUSED */ 1298int 1299getsockopt(td, uap) 1300 struct thread *td; 1301 register struct getsockopt_args /* { 1302 int s; 1303 int level; 1304 int name; 1305 void * __restrict val; 1306 socklen_t * __restrict avalsize; 1307 } */ *uap; 1308{ 1309 socklen_t valsize; 1310 int error; 1311 1312 if (uap->val) { 1313 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1314 if (error) 1315 return (error); 1316 } 1317 1318 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1319 uap->val, UIO_USERSPACE, &valsize); 1320 1321 if (error == 0) 1322 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1323 return (error); 1324} 1325 1326/* 1327 * Kernel version of getsockopt. 1328 * optval can be a userland or userspace. optlen is always a kernel pointer. 1329 */ 1330int 1331kern_getsockopt(td, s, level, name, val, valseg, valsize) 1332 struct thread *td; 1333 int s; 1334 int level; 1335 int name; 1336 void *val; 1337 enum uio_seg valseg; 1338 socklen_t *valsize; 1339{ 1340 int error; 1341 struct socket *so; 1342 struct sockopt sopt; 1343 1344 if (val == NULL) 1345 *valsize = 0; 1346 if (*valsize < 0) 1347 return (EINVAL); 1348 1349 sopt.sopt_dir = SOPT_GET; 1350 sopt.sopt_level = level; 1351 sopt.sopt_name = name; 1352 sopt.sopt_val = val; 1353 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1354 switch (valseg) { 1355 case UIO_USERSPACE: 1356 sopt.sopt_td = td; 1357 break; 1358 case UIO_SYSSPACE: 1359 sopt.sopt_td = NULL; 1360 break; 1361 default: 1362 panic("kern_getsockopt called with bad valseg"); 1363 } 1364 1365 NET_LOCK_GIANT(); 1366 if ((error = fgetsock(td, s, &so, NULL)) == 0) { 1367 error = sogetopt(so, &sopt); 1368 *valsize = sopt.sopt_valsize; 1369 fputsock(so); 1370 } 1371 NET_UNLOCK_GIANT(); 1372 return (error); 1373} 1374 1375/* 1376 * getsockname1() - Get socket name. 1377 * 1378 * MPSAFE 1379 */ 1380/* ARGSUSED */ 1381static int 1382getsockname1(td, uap, compat) 1383 struct thread *td; 1384 register struct getsockname_args /* { 1385 int fdes; 1386 struct sockaddr * __restrict asa; 1387 socklen_t * __restrict alen; 1388 } */ *uap; 1389 int compat; 1390{ 1391 struct socket *so; 1392 struct sockaddr *sa; 1393 socklen_t len; 1394 int error; 1395 1396 NET_LOCK_GIANT(); 1397 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1398 goto done2; 1399 error = copyin(uap->alen, &len, sizeof (len)); 1400 if (error) 1401 goto done1; 1402 if (len < 0) { 1403 error = EINVAL; 1404 goto done1; 1405 } 1406 sa = 0; 1407 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1408 if (error) 1409 goto bad; 1410 if (sa == 0) { 1411 len = 0; 1412 goto gotnothing; 1413 } 1414 1415 len = MIN(len, sa->sa_len); 1416#ifdef COMPAT_OLDSOCK 1417 if (compat) 1418 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1419#endif 1420 error = copyout(sa, uap->asa, (u_int)len); 1421 if (error == 0) 1422gotnothing: 1423 error = copyout(&len, uap->alen, sizeof (len)); 1424bad: 1425 if (sa) 1426 FREE(sa, M_SONAME); 1427done1: 1428 fputsock(so); 1429done2: 1430 NET_UNLOCK_GIANT(); 1431 return (error); 1432} 1433 1434/* 1435 * MPSAFE 1436 */ 1437int 1438getsockname(td, uap) 1439 struct thread *td; 1440 struct getsockname_args *uap; 1441{ 1442 1443 return (getsockname1(td, uap, 0)); 1444} 1445 1446#ifdef COMPAT_OLDSOCK 1447/* 1448 * MPSAFE 1449 */ 1450int 1451ogetsockname(td, uap) 1452 struct thread *td; 1453 struct getsockname_args *uap; 1454{ 1455 1456 return (getsockname1(td, uap, 1)); 1457} 1458#endif /* COMPAT_OLDSOCK */ 1459 1460/* 1461 * getpeername1() - Get name of peer for connected socket. 1462 * 1463 * MPSAFE 1464 */ 1465/* ARGSUSED */ 1466static int 1467getpeername1(td, uap, compat) 1468 struct thread *td; 1469 register struct getpeername_args /* { 1470 int fdes; 1471 struct sockaddr * __restrict asa; 1472 socklen_t * __restrict alen; 1473 } */ *uap; 1474 int compat; 1475{ 1476 struct socket *so; 1477 struct sockaddr *sa; 1478 socklen_t len; 1479 int error; 1480 1481 NET_LOCK_GIANT(); 1482 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1483 goto done2; 1484 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1485 error = ENOTCONN; 1486 goto done1; 1487 } 1488 error = copyin(uap->alen, &len, sizeof (len)); 1489 if (error) 1490 goto done1; 1491 if (len < 0) { 1492 error = EINVAL; 1493 goto done1; 1494 } 1495 sa = 0; 1496 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1497 if (error) 1498 goto bad; 1499 if (sa == 0) { 1500 len = 0; 1501 goto gotnothing; 1502 } 1503 len = MIN(len, sa->sa_len); 1504#ifdef COMPAT_OLDSOCK 1505 if (compat) 1506 ((struct osockaddr *)sa)->sa_family = 1507 sa->sa_family; 1508#endif 1509 error = copyout(sa, uap->asa, (u_int)len); 1510 if (error) 1511 goto bad; 1512gotnothing: 1513 error = copyout(&len, uap->alen, sizeof (len)); 1514bad: 1515 if (sa) 1516 FREE(sa, M_SONAME); 1517done1: 1518 fputsock(so); 1519done2: 1520 NET_UNLOCK_GIANT(); 1521 return (error); 1522} 1523 1524/* 1525 * MPSAFE 1526 */ 1527int 1528getpeername(td, uap) 1529 struct thread *td; 1530 struct getpeername_args *uap; 1531{ 1532 1533 return (getpeername1(td, uap, 0)); 1534} 1535 1536#ifdef COMPAT_OLDSOCK 1537/* 1538 * MPSAFE 1539 */ 1540int 1541ogetpeername(td, uap) 1542 struct thread *td; 1543 struct ogetpeername_args *uap; 1544{ 1545 1546 /* XXX uap should have type `getpeername_args *' to begin with. */ 1547 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1548} 1549#endif /* COMPAT_OLDSOCK */ 1550 1551int 1552sockargs(mp, buf, buflen, type) 1553 struct mbuf **mp; 1554 caddr_t buf; 1555 int buflen, type; 1556{ 1557 register struct sockaddr *sa; 1558 register struct mbuf *m; 1559 int error; 1560 1561 if ((u_int)buflen > MLEN) { 1562#ifdef COMPAT_OLDSOCK 1563 if (type == MT_SONAME && (u_int)buflen <= 112) 1564 buflen = MLEN; /* unix domain compat. hack */ 1565 else 1566#endif 1567 if ((u_int)buflen > MCLBYTES) 1568 return (EINVAL); 1569 } 1570 m = m_get(M_TRYWAIT, type); 1571 if (m == NULL) 1572 return (ENOBUFS); 1573 if ((u_int)buflen > MLEN) { 1574 MCLGET(m, M_TRYWAIT); 1575 if ((m->m_flags & M_EXT) == 0) { 1576 m_free(m); 1577 return (ENOBUFS); 1578 } 1579 } 1580 m->m_len = buflen; 1581 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1582 if (error) 1583 (void) m_free(m); 1584 else { 1585 *mp = m; 1586 if (type == MT_SONAME) { 1587 sa = mtod(m, struct sockaddr *); 1588 1589#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1590 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1591 sa->sa_family = sa->sa_len; 1592#endif 1593 sa->sa_len = buflen; 1594 } 1595 } 1596 return (error); 1597} 1598 1599int 1600getsockaddr(namp, uaddr, len) 1601 struct sockaddr **namp; 1602 caddr_t uaddr; 1603 size_t len; 1604{ 1605 struct sockaddr *sa; 1606 int error; 1607 1608 if (len > SOCK_MAXADDRLEN) 1609 return (ENAMETOOLONG); 1610 if (len < offsetof(struct sockaddr, sa_data[0])) 1611 return (EINVAL); 1612 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1613 error = copyin(uaddr, sa, len); 1614 if (error) { 1615 FREE(sa, M_SONAME); 1616 } else { 1617#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1618 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1619 sa->sa_family = sa->sa_len; 1620#endif 1621 sa->sa_len = len; 1622 *namp = sa; 1623 } 1624 return (error); 1625} 1626 1627/* 1628 * Detach mapped page and release resources back to the system. 1629 */ 1630void 1631sf_buf_mext(void *addr, void *args) 1632{ 1633 vm_page_t m; 1634 1635 m = sf_buf_page(args); 1636 sf_buf_free(args); 1637 vm_page_lock_queues(); 1638 vm_page_unwire(m, 0); 1639 /* 1640 * Check for the object going away on us. This can 1641 * happen since we don't hold a reference to it. 1642 * If so, we're responsible for freeing the page. 1643 */ 1644 if (m->wire_count == 0 && m->object == NULL) 1645 vm_page_free(m); 1646 vm_page_unlock_queues(); 1647} 1648 1649/* 1650 * sendfile(2) 1651 * 1652 * MPSAFE 1653 * 1654 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1655 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1656 * 1657 * Send a file specified by 'fd' and starting at 'offset' to a socket 1658 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1659 * nbytes == 0. Optionally add a header and/or trailer to the socket 1660 * output. If specified, write the total number of bytes sent into *sbytes. 1661 * 1662 */ 1663int 1664sendfile(struct thread *td, struct sendfile_args *uap) 1665{ 1666 1667 return (do_sendfile(td, uap, 0)); 1668} 1669 1670#ifdef COMPAT_FREEBSD4 1671int 1672freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1673{ 1674 struct sendfile_args args; 1675 1676 args.fd = uap->fd; 1677 args.s = uap->s; 1678 args.offset = uap->offset; 1679 args.nbytes = uap->nbytes; 1680 args.hdtr = uap->hdtr; 1681 args.sbytes = uap->sbytes; 1682 args.flags = uap->flags; 1683 1684 return (do_sendfile(td, &args, 1)); 1685} 1686#endif /* COMPAT_FREEBSD4 */ 1687 1688static int 1689do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1690{ 1691 struct vnode *vp; 1692 struct vm_object *obj; 1693 struct socket *so = NULL; 1694 struct mbuf *m, *m_header = NULL; 1695 struct sf_buf *sf; 1696 struct vm_page *pg; 1697 struct writev_args nuap; 1698 struct sf_hdtr hdtr; 1699 struct uio *hdr_uio = NULL; 1700 off_t off, xfsize, hdtr_size, sbytes = 0; 1701 int error, headersize = 0, headersent = 0; 1702 1703 mtx_lock(&Giant); 1704 1705 hdtr_size = 0; 1706 1707 /* 1708 * The descriptor must be a regular file and have a backing VM object. 1709 */ 1710 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1711 goto done; 1712 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1713 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1714 error = EINVAL; 1715 VOP_UNLOCK(vp, 0, td); 1716 goto done; 1717 } 1718 VOP_UNLOCK(vp, 0, td); 1719 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1720 goto done; 1721 if (so->so_type != SOCK_STREAM) { 1722 error = EINVAL; 1723 goto done; 1724 } 1725 if ((so->so_state & SS_ISCONNECTED) == 0) { 1726 error = ENOTCONN; 1727 goto done; 1728 } 1729 if (uap->offset < 0) { 1730 error = EINVAL; 1731 goto done; 1732 } 1733 1734#ifdef MAC 1735 SOCK_LOCK(so); 1736 error = mac_check_socket_send(td->td_ucred, so); 1737 SOCK_UNLOCK(so); 1738 if (error) 1739 goto done; 1740#endif 1741 1742 /* 1743 * If specified, get the pointer to the sf_hdtr struct for 1744 * any headers/trailers. 1745 */ 1746 if (uap->hdtr != NULL) { 1747 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1748 if (error) 1749 goto done; 1750 /* 1751 * Send any headers. 1752 */ 1753 if (hdtr.headers != NULL) { 1754 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1755 if (error) 1756 goto done; 1757 hdr_uio->uio_td = td; 1758 hdr_uio->uio_rw = UIO_WRITE; 1759 if (hdr_uio->uio_resid > 0) { 1760 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0); 1761 if (m_header == NULL) 1762 goto done; 1763 headersize = m_header->m_pkthdr.len; 1764 if (compat) 1765 sbytes += headersize; 1766 } 1767 } 1768 } 1769 1770 /* 1771 * Protect against multiple writers to the socket. 1772 */ 1773 SOCKBUF_LOCK(&so->so_snd); 1774 (void) sblock(&so->so_snd, M_WAITOK); 1775 SOCKBUF_UNLOCK(&so->so_snd); 1776 1777 /* 1778 * Loop through the pages in the file, starting with the requested 1779 * offset. Get a file page (do I/O if necessary), map the file page 1780 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1781 * it on the socket. 1782 */ 1783 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1784 vm_pindex_t pindex; 1785 vm_offset_t pgoff; 1786 1787 pindex = OFF_TO_IDX(off); 1788 VM_OBJECT_LOCK(obj); 1789retry_lookup: 1790 /* 1791 * Calculate the amount to transfer. Not to exceed a page, 1792 * the EOF, or the passed in nbytes. 1793 */ 1794 xfsize = obj->un_pager.vnp.vnp_size - off; 1795 VM_OBJECT_UNLOCK(obj); 1796 if (xfsize > PAGE_SIZE) 1797 xfsize = PAGE_SIZE; 1798 pgoff = (vm_offset_t)(off & PAGE_MASK); 1799 if (PAGE_SIZE - pgoff < xfsize) 1800 xfsize = PAGE_SIZE - pgoff; 1801 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1802 xfsize = uap->nbytes - sbytes; 1803 if (xfsize <= 0) { 1804 if (m_header != NULL) { 1805 m = m_header; 1806 m_header = NULL; 1807 SOCKBUF_LOCK(&so->so_snd); 1808 goto retry_space; 1809 } else 1810 break; 1811 } 1812 /* 1813 * Optimize the non-blocking case by looking at the socket space 1814 * before going to the extra work of constituting the sf_buf. 1815 */ 1816 SOCKBUF_LOCK(&so->so_snd); 1817 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1818 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 1819 error = EPIPE; 1820 else 1821 error = EAGAIN; 1822 sbunlock(&so->so_snd); 1823 SOCKBUF_UNLOCK(&so->so_snd); 1824 goto done; 1825 } 1826 SOCKBUF_UNLOCK(&so->so_snd); 1827 VM_OBJECT_LOCK(obj); 1828 /* 1829 * Attempt to look up the page. 1830 * 1831 * Allocate if not found 1832 * 1833 * Wait and loop if busy. 1834 */ 1835 pg = vm_page_lookup(obj, pindex); 1836 1837 if (pg == NULL) { 1838 pg = vm_page_alloc(obj, pindex, 1839 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1840 if (pg == NULL) { 1841 VM_OBJECT_UNLOCK(obj); 1842 VM_WAIT; 1843 VM_OBJECT_LOCK(obj); 1844 goto retry_lookup; 1845 } 1846 vm_page_lock_queues(); 1847 vm_page_wakeup(pg); 1848 } else { 1849 vm_page_lock_queues(); 1850 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1851 goto retry_lookup; 1852 /* 1853 * Wire the page so it does not get ripped out from 1854 * under us. 1855 */ 1856 vm_page_wire(pg); 1857 } 1858 1859 /* 1860 * If page is not valid for what we need, initiate I/O 1861 */ 1862 1863 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1864 VM_OBJECT_UNLOCK(obj); 1865 } else if (uap->flags & SF_NODISKIO) { 1866 error = EBUSY; 1867 } else { 1868 int bsize, resid; 1869 1870 /* 1871 * Ensure that our page is still around when the I/O 1872 * completes. 1873 */ 1874 vm_page_io_start(pg); 1875 vm_page_unlock_queues(); 1876 VM_OBJECT_UNLOCK(obj); 1877 1878 /* 1879 * Get the page from backing store. 1880 */ 1881 bsize = vp->v_mount->mnt_stat.f_iosize; 1882 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1883 /* 1884 * XXXMAC: Because we don't have fp->f_cred here, 1885 * we pass in NOCRED. This is probably wrong, but 1886 * is consistent with our original implementation. 1887 */ 1888 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1889 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1890 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 1891 td->td_ucred, NOCRED, &resid, td); 1892 VOP_UNLOCK(vp, 0, td); 1893 if (error) 1894 VM_OBJECT_LOCK(obj); 1895 vm_page_lock_queues(); 1896 vm_page_io_finish(pg); 1897 mbstat.sf_iocnt++; 1898 } 1899 1900 if (error) { 1901 vm_page_unwire(pg, 0); 1902 /* 1903 * See if anyone else might know about this page. 1904 * If not and it is not valid, then free it. 1905 */ 1906 if (pg->wire_count == 0 && pg->valid == 0 && 1907 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1908 pg->hold_count == 0) { 1909 vm_page_busy(pg); 1910 vm_page_free(pg); 1911 } 1912 vm_page_unlock_queues(); 1913 VM_OBJECT_UNLOCK(obj); 1914 SOCKBUF_LOCK(&so->so_snd); 1915 sbunlock(&so->so_snd); 1916 SOCKBUF_UNLOCK(&so->so_snd); 1917 goto done; 1918 } 1919 vm_page_unlock_queues(); 1920 1921 /* 1922 * Get a sendfile buf. We usually wait as long as necessary, 1923 * but this wait can be interrupted. 1924 */ 1925 if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) { 1926 mbstat.sf_allocfail++; 1927 vm_page_lock_queues(); 1928 vm_page_unwire(pg, 0); 1929 if (pg->wire_count == 0 && pg->object == NULL) 1930 vm_page_free(pg); 1931 vm_page_unlock_queues(); 1932 SOCKBUF_LOCK(&so->so_snd); 1933 sbunlock(&so->so_snd); 1934 SOCKBUF_UNLOCK(&so->so_snd); 1935 error = EINTR; 1936 goto done; 1937 } 1938 1939 /* 1940 * Get an mbuf header and set it up as having external storage. 1941 */ 1942 if (m_header) 1943 MGET(m, M_TRYWAIT, MT_DATA); 1944 else 1945 MGETHDR(m, M_TRYWAIT, MT_DATA); 1946 if (m == NULL) { 1947 error = ENOBUFS; 1948 sf_buf_mext((void *)sf_buf_kva(sf), sf); 1949 SOCKBUF_LOCK(&so->so_snd); 1950 sbunlock(&so->so_snd); 1951 SOCKBUF_UNLOCK(&so->so_snd); 1952 goto done; 1953 } 1954 /* 1955 * Setup external storage for mbuf. 1956 */ 1957 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 1958 EXT_SFBUF); 1959 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1960 m->m_pkthdr.len = m->m_len = xfsize; 1961 1962 if (m_header) { 1963 m_cat(m_header, m); 1964 m = m_header; 1965 m_header = NULL; 1966 m_fixhdr(m); 1967 } 1968 1969 /* 1970 * Add the buffer to the socket buffer chain. 1971 */ 1972 SOCKBUF_LOCK(&so->so_snd); 1973retry_space: 1974 /* 1975 * Make sure that the socket is still able to take more data. 1976 * CANTSENDMORE being true usually means that the connection 1977 * was closed. so_error is true when an error was sensed after 1978 * a previous send. 1979 * The state is checked after the page mapping and buffer 1980 * allocation above since those operations may block and make 1981 * any socket checks stale. From this point forward, nothing 1982 * blocks before the pru_send (or more accurately, any blocking 1983 * results in a loop back to here to re-check). 1984 */ 1985 SOCKBUF_LOCK_ASSERT(&so->so_snd); 1986 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 1987 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1988 error = EPIPE; 1989 } else { 1990 error = so->so_error; 1991 so->so_error = 0; 1992 } 1993 m_freem(m); 1994 sbunlock(&so->so_snd); 1995 SOCKBUF_UNLOCK(&so->so_snd); 1996 goto done; 1997 } 1998 /* 1999 * Wait for socket space to become available. We do this just 2000 * after checking the connection state above in order to avoid 2001 * a race condition with sbwait(). 2002 */ 2003 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2004 if (so->so_state & SS_NBIO) { 2005 m_freem(m); 2006 sbunlock(&so->so_snd); 2007 SOCKBUF_UNLOCK(&so->so_snd); 2008 error = EAGAIN; 2009 goto done; 2010 } 2011 error = sbwait(&so->so_snd); 2012 /* 2013 * An error from sbwait usually indicates that we've 2014 * been interrupted by a signal. If we've sent anything 2015 * then return bytes sent, otherwise return the error. 2016 */ 2017 if (error) { 2018 m_freem(m); 2019 sbunlock(&so->so_snd); 2020 SOCKBUF_UNLOCK(&so->so_snd); 2021 goto done; 2022 } 2023 goto retry_space; 2024 } 2025 SOCKBUF_UNLOCK(&so->so_snd); 2026 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2027 if (error) { 2028 SOCKBUF_LOCK(&so->so_snd); 2029 sbunlock(&so->so_snd); 2030 SOCKBUF_UNLOCK(&so->so_snd); 2031 goto done; 2032 } 2033 headersent = 1; 2034 } 2035 SOCKBUF_LOCK(&so->so_snd); 2036 sbunlock(&so->so_snd); 2037 SOCKBUF_UNLOCK(&so->so_snd); 2038 2039 /* 2040 * Send trailers. Wimp out and use writev(2). 2041 */ 2042 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2043 nuap.fd = uap->s; 2044 nuap.iovp = hdtr.trailers; 2045 nuap.iovcnt = hdtr.trl_cnt; 2046 error = writev(td, &nuap); 2047 if (error) 2048 goto done; 2049 if (compat) 2050 sbytes += td->td_retval[0]; 2051 else 2052 hdtr_size += td->td_retval[0]; 2053 } 2054 2055done: 2056 if (headersent) { 2057 if (!compat) 2058 hdtr_size += headersize; 2059 } else { 2060 if (compat) 2061 sbytes -= headersize; 2062 } 2063 /* 2064 * If there was no error we have to clear td->td_retval[0] 2065 * because it may have been set by writev. 2066 */ 2067 if (error == 0) { 2068 td->td_retval[0] = 0; 2069 } 2070 if (uap->sbytes != NULL) { 2071 if (!compat) 2072 sbytes += hdtr_size; 2073 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2074 } 2075 if (vp) 2076 vrele(vp); 2077 if (so) 2078 fputsock(so); 2079 if (hdr_uio != NULL) 2080 free(hdr_uio, M_IOV); 2081 if (m_header) 2082 m_freem(m_header); 2083 2084 mtx_unlock(&Giant); 2085 2086 if (error == ERESTART) 2087 error = EINTR; 2088 2089 return (error); 2090} 2091