kern_sendfile.c revision 161125
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 161125 2006-08-09 17:43:27Z alc $"); 37 38#include "opt_compat.h" 39#include "opt_ktrace.h" 40#include "opt_mac.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/lock.h> 46#include <sys/mac.h> 47#include <sys/mutex.h> 48#include <sys/sysproto.h> 49#include <sys/malloc.h> 50#include <sys/filedesc.h> 51#include <sys/event.h> 52#include <sys/proc.h> 53#include <sys/fcntl.h> 54#include <sys/file.h> 55#include <sys/filio.h> 56#include <sys/mount.h> 57#include <sys/mbuf.h> 58#include <sys/protosw.h> 59#include <sys/sf_buf.h> 60#include <sys/socket.h> 61#include <sys/socketvar.h> 62#include <sys/signalvar.h> 63#include <sys/syscallsubr.h> 64#include <sys/sysctl.h> 65#include <sys/uio.h> 66#include <sys/vnode.h> 67#ifdef KTRACE 68#include <sys/ktrace.h> 69#endif 70 71#include <vm/vm.h> 72#include <vm/vm_object.h> 73#include <vm/vm_page.h> 74#include <vm/vm_pageout.h> 75#include <vm/vm_kern.h> 76#include <vm/vm_extern.h> 77 78static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81static int accept1(struct thread *td, struct accept_args *uap, int compat); 82static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88/* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91int nsfbufs; 92int nsfbufspeak; 93int nsfbufsused; 94 95SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 96 "Maximum number of sendfile(2) sf_bufs available"); 97SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 98 "Number of sendfile(2) sf_bufs at peak usage"); 99SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 100 "Number of sendfile(2) sf_bufs in use"); 101 102/* 103 * Convert a user file descriptor to a kernel file entry. A reference on the 104 * file entry is held upon returning. This is lighter weight than 105 * fgetsock(), which bumps the socket reference drops the file reference 106 * count instead, as this approach avoids several additional mutex operations 107 * associated with the additional reference count. If requested, return the 108 * open file flags. 109 */ 110static int 111getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp) 112{ 113 struct file *fp; 114 int error; 115 116 fp = NULL; 117 if (fdp == NULL) 118 error = EBADF; 119 else { 120 FILEDESC_LOCK_FAST(fdp); 121 fp = fget_locked(fdp, fd); 122 if (fp == NULL) 123 error = EBADF; 124 else if (fp->f_type != DTYPE_SOCKET) { 125 fp = NULL; 126 error = ENOTSOCK; 127 } else { 128 fhold(fp); 129 if (fflagp != NULL) 130 *fflagp = fp->f_flag; 131 error = 0; 132 } 133 FILEDESC_UNLOCK_FAST(fdp); 134 } 135 *fpp = fp; 136 return (error); 137} 138 139/* 140 * System call interface to the socket abstraction. 141 */ 142#if defined(COMPAT_43) 143#define COMPAT_OLDSOCK 144#endif 145 146/* 147 * MPSAFE 148 */ 149int 150socket(td, uap) 151 struct thread *td; 152 register struct socket_args /* { 153 int domain; 154 int type; 155 int protocol; 156 } */ *uap; 157{ 158 struct filedesc *fdp; 159 struct socket *so; 160 struct file *fp; 161 int fd, error; 162 163#ifdef MAC 164 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 165 uap->protocol); 166 if (error) 167 return (error); 168#endif 169 fdp = td->td_proc->p_fd; 170 error = falloc(td, &fp, &fd); 171 if (error) 172 return (error); 173 /* An extra reference on `fp' has been held for us by falloc(). */ 174 NET_LOCK_GIANT(); 175 error = socreate(uap->domain, &so, uap->type, uap->protocol, 176 td->td_ucred, td); 177 NET_UNLOCK_GIANT(); 178 if (error) { 179 fdclose(fdp, fp, fd, td); 180 } else { 181 FILEDESC_LOCK_FAST(fdp); 182 fp->f_data = so; /* already has ref count */ 183 fp->f_flag = FREAD|FWRITE; 184 fp->f_ops = &socketops; 185 fp->f_type = DTYPE_SOCKET; 186 FILEDESC_UNLOCK_FAST(fdp); 187 td->td_retval[0] = fd; 188 } 189 fdrop(fp, td); 190 return (error); 191} 192 193/* 194 * MPSAFE 195 */ 196/* ARGSUSED */ 197int 198bind(td, uap) 199 struct thread *td; 200 register struct bind_args /* { 201 int s; 202 caddr_t name; 203 int namelen; 204 } */ *uap; 205{ 206 struct sockaddr *sa; 207 int error; 208 209 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 210 return (error); 211 212 error = kern_bind(td, uap->s, sa); 213 free(sa, M_SONAME); 214 return (error); 215} 216 217int 218kern_bind(td, fd, sa) 219 struct thread *td; 220 int fd; 221 struct sockaddr *sa; 222{ 223 struct socket *so; 224 struct file *fp; 225 int error; 226 227 NET_LOCK_GIANT(); 228 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 229 if (error) 230 goto done2; 231 so = fp->f_data; 232#ifdef MAC 233 SOCK_LOCK(so); 234 error = mac_check_socket_bind(td->td_ucred, so, sa); 235 SOCK_UNLOCK(so); 236 if (error) 237 goto done1; 238#endif 239 error = sobind(so, sa, td); 240#ifdef MAC 241done1: 242#endif 243 fdrop(fp, td); 244done2: 245 NET_UNLOCK_GIANT(); 246 return (error); 247} 248 249/* 250 * MPSAFE 251 */ 252/* ARGSUSED */ 253int 254listen(td, uap) 255 struct thread *td; 256 register struct listen_args /* { 257 int s; 258 int backlog; 259 } */ *uap; 260{ 261 struct socket *so; 262 struct file *fp; 263 int error; 264 265 NET_LOCK_GIANT(); 266 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 267 if (error == 0) { 268 so = fp->f_data; 269#ifdef MAC 270 SOCK_LOCK(so); 271 error = mac_check_socket_listen(td->td_ucred, so); 272 SOCK_UNLOCK(so); 273 if (error) 274 goto done; 275#endif 276 error = solisten(so, uap->backlog, td); 277#ifdef MAC 278done: 279#endif 280 fdrop(fp, td); 281 } 282 NET_UNLOCK_GIANT(); 283 return(error); 284} 285 286/* 287 * accept1() 288 * MPSAFE 289 */ 290static int 291accept1(td, uap, compat) 292 struct thread *td; 293 register struct accept_args /* { 294 int s; 295 struct sockaddr * __restrict name; 296 socklen_t * __restrict anamelen; 297 } */ *uap; 298 int compat; 299{ 300 struct sockaddr *name; 301 socklen_t namelen; 302 struct file *fp; 303 int error; 304 305 if (uap->name == NULL) 306 return (kern_accept(td, uap->s, NULL, NULL, NULL)); 307 308 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 309 if (error) 310 return (error); 311 312 error = kern_accept(td, uap->s, &name, &namelen, &fp); 313 314 /* 315 * return a namelen of zero for older code which might 316 * ignore the return value from accept. 317 */ 318 if (error) { 319 (void) copyout(&namelen, 320 uap->anamelen, sizeof(*uap->anamelen)); 321 return (error); 322 } 323 324 if (error == 0 && name != NULL) { 325#ifdef COMPAT_OLDSOCK 326 if (compat) 327 ((struct osockaddr *)name)->sa_family = 328 name->sa_family; 329#endif 330 error = copyout(name, uap->name, namelen); 331 } 332 if (error == 0) 333 error = copyout(&namelen, uap->anamelen, 334 sizeof(namelen)); 335 if (error) 336 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 337 fdrop(fp, td); 338 free(name, M_SONAME); 339 return (error); 340} 341 342int 343kern_accept(struct thread *td, int s, struct sockaddr **name, 344 socklen_t *namelen, struct file **fp) 345{ 346 struct filedesc *fdp; 347 struct file *headfp, *nfp = NULL; 348 struct sockaddr *sa = NULL; 349 int error; 350 struct socket *head, *so; 351 int fd; 352 u_int fflag; 353 pid_t pgid; 354 int tmp; 355 356 if (name) { 357 *name = NULL; 358 if (*namelen < 0) 359 return (EINVAL); 360 } 361 362 fdp = td->td_proc->p_fd; 363 NET_LOCK_GIANT(); 364 error = getsock(fdp, s, &headfp, &fflag); 365 if (error) 366 goto done2; 367 head = headfp->f_data; 368 if ((head->so_options & SO_ACCEPTCONN) == 0) { 369 error = EINVAL; 370 goto done; 371 } 372#ifdef MAC 373 SOCK_LOCK(head); 374 error = mac_check_socket_accept(td->td_ucred, head); 375 SOCK_UNLOCK(head); 376 if (error != 0) 377 goto done; 378#endif 379 error = falloc(td, &nfp, &fd); 380 if (error) 381 goto done; 382 ACCEPT_LOCK(); 383 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 384 ACCEPT_UNLOCK(); 385 error = EWOULDBLOCK; 386 goto noconnection; 387 } 388 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 389 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 390 head->so_error = ECONNABORTED; 391 break; 392 } 393 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 394 "accept", 0); 395 if (error) { 396 ACCEPT_UNLOCK(); 397 goto noconnection; 398 } 399 } 400 if (head->so_error) { 401 error = head->so_error; 402 head->so_error = 0; 403 ACCEPT_UNLOCK(); 404 goto noconnection; 405 } 406 so = TAILQ_FIRST(&head->so_comp); 407 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 408 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 409 410 /* 411 * Before changing the flags on the socket, we have to bump the 412 * reference count. Otherwise, if the protocol calls sofree(), 413 * the socket will be released due to a zero refcount. 414 */ 415 SOCK_LOCK(so); /* soref() and so_state update */ 416 soref(so); /* file descriptor reference */ 417 418 TAILQ_REMOVE(&head->so_comp, so, so_list); 419 head->so_qlen--; 420 so->so_state |= (head->so_state & SS_NBIO); 421 so->so_qstate &= ~SQ_COMP; 422 so->so_head = NULL; 423 424 SOCK_UNLOCK(so); 425 ACCEPT_UNLOCK(); 426 427 /* An extra reference on `nfp' has been held for us by falloc(). */ 428 td->td_retval[0] = fd; 429 430 /* connection has been removed from the listen queue */ 431 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 432 433 pgid = fgetown(&head->so_sigio); 434 if (pgid != 0) 435 fsetown(pgid, &so->so_sigio); 436 437 FILE_LOCK(nfp); 438 nfp->f_data = so; /* nfp has ref count from falloc */ 439 nfp->f_flag = fflag; 440 nfp->f_ops = &socketops; 441 nfp->f_type = DTYPE_SOCKET; 442 FILE_UNLOCK(nfp); 443 /* Sync socket nonblocking/async state with file flags */ 444 tmp = fflag & FNONBLOCK; 445 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 446 tmp = fflag & FASYNC; 447 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 448 sa = 0; 449 error = soaccept(so, &sa); 450 if (error) { 451 /* 452 * return a namelen of zero for older code which might 453 * ignore the return value from accept. 454 */ 455 if (name) 456 *namelen = 0; 457 goto noconnection; 458 } 459 if (sa == NULL) { 460 if (name) 461 *namelen = 0; 462 goto done; 463 } 464 if (name) { 465 /* check sa_len before it is destroyed */ 466 if (*namelen > sa->sa_len) 467 *namelen = sa->sa_len; 468 *name = sa; 469 sa = NULL; 470 } 471noconnection: 472 if (sa) 473 FREE(sa, M_SONAME); 474 475 /* 476 * close the new descriptor, assuming someone hasn't ripped it 477 * out from under us. 478 */ 479 if (error) 480 fdclose(fdp, nfp, fd, td); 481 482 /* 483 * Release explicitly held references before returning. We return 484 * a reference on nfp to the caller on success if they request it. 485 */ 486done: 487 if (fp != NULL) { 488 if (error == 0) { 489 *fp = nfp; 490 nfp = NULL; 491 } else 492 *fp = NULL; 493 } 494 if (nfp != NULL) 495 fdrop(nfp, td); 496 fdrop(headfp, td); 497done2: 498 NET_UNLOCK_GIANT(); 499 return (error); 500} 501 502/* 503 * MPSAFE (accept1() is MPSAFE) 504 */ 505int 506accept(td, uap) 507 struct thread *td; 508 struct accept_args *uap; 509{ 510 511 return (accept1(td, uap, 0)); 512} 513 514#ifdef COMPAT_OLDSOCK 515/* 516 * MPSAFE (accept1() is MPSAFE) 517 */ 518int 519oaccept(td, uap) 520 struct thread *td; 521 struct accept_args *uap; 522{ 523 524 return (accept1(td, uap, 1)); 525} 526#endif /* COMPAT_OLDSOCK */ 527 528/* 529 * MPSAFE 530 */ 531/* ARGSUSED */ 532int 533connect(td, uap) 534 struct thread *td; 535 register struct connect_args /* { 536 int s; 537 caddr_t name; 538 int namelen; 539 } */ *uap; 540{ 541 struct sockaddr *sa; 542 int error; 543 544 error = getsockaddr(&sa, uap->name, uap->namelen); 545 if (error) 546 return (error); 547 548 error = kern_connect(td, uap->s, sa); 549 free(sa, M_SONAME); 550 return (error); 551} 552 553 554int 555kern_connect(td, fd, sa) 556 struct thread *td; 557 int fd; 558 struct sockaddr *sa; 559{ 560 struct socket *so; 561 struct file *fp; 562 int error; 563 int interrupted = 0; 564 565 NET_LOCK_GIANT(); 566 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 567 if (error) 568 goto done2; 569 so = fp->f_data; 570 if (so->so_state & SS_ISCONNECTING) { 571 error = EALREADY; 572 goto done1; 573 } 574#ifdef MAC 575 SOCK_LOCK(so); 576 error = mac_check_socket_connect(td->td_ucred, so, sa); 577 SOCK_UNLOCK(so); 578 if (error) 579 goto bad; 580#endif 581 error = soconnect(so, sa, td); 582 if (error) 583 goto bad; 584 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 585 error = EINPROGRESS; 586 goto done1; 587 } 588 SOCK_LOCK(so); 589 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 590 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 591 "connec", 0); 592 if (error) { 593 if (error == EINTR || error == ERESTART) 594 interrupted = 1; 595 break; 596 } 597 } 598 if (error == 0) { 599 error = so->so_error; 600 so->so_error = 0; 601 } 602 SOCK_UNLOCK(so); 603bad: 604 if (!interrupted) 605 so->so_state &= ~SS_ISCONNECTING; 606 if (error == ERESTART) 607 error = EINTR; 608done1: 609 fdrop(fp, td); 610done2: 611 NET_UNLOCK_GIANT(); 612 return (error); 613} 614 615/* 616 * MPSAFE 617 */ 618int 619socketpair(td, uap) 620 struct thread *td; 621 register struct socketpair_args /* { 622 int domain; 623 int type; 624 int protocol; 625 int *rsv; 626 } */ *uap; 627{ 628 register struct filedesc *fdp = td->td_proc->p_fd; 629 struct file *fp1, *fp2; 630 struct socket *so1, *so2; 631 int fd, error, sv[2]; 632 633#ifdef MAC 634 /* We might want to have a separate check for socket pairs. */ 635 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 636 uap->protocol); 637 if (error) 638 return (error); 639#endif 640 641 NET_LOCK_GIANT(); 642 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 643 td->td_ucred, td); 644 if (error) 645 goto done2; 646 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 647 td->td_ucred, td); 648 if (error) 649 goto free1; 650 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 651 error = falloc(td, &fp1, &fd); 652 if (error) 653 goto free2; 654 sv[0] = fd; 655 fp1->f_data = so1; /* so1 already has ref count */ 656 error = falloc(td, &fp2, &fd); 657 if (error) 658 goto free3; 659 fp2->f_data = so2; /* so2 already has ref count */ 660 sv[1] = fd; 661 error = soconnect2(so1, so2); 662 if (error) 663 goto free4; 664 if (uap->type == SOCK_DGRAM) { 665 /* 666 * Datagram socket connection is asymmetric. 667 */ 668 error = soconnect2(so2, so1); 669 if (error) 670 goto free4; 671 } 672 FILE_LOCK(fp1); 673 fp1->f_flag = FREAD|FWRITE; 674 fp1->f_ops = &socketops; 675 fp1->f_type = DTYPE_SOCKET; 676 FILE_UNLOCK(fp1); 677 FILE_LOCK(fp2); 678 fp2->f_flag = FREAD|FWRITE; 679 fp2->f_ops = &socketops; 680 fp2->f_type = DTYPE_SOCKET; 681 FILE_UNLOCK(fp2); 682 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 683 fdrop(fp1, td); 684 fdrop(fp2, td); 685 goto done2; 686free4: 687 fdclose(fdp, fp2, sv[1], td); 688 fdrop(fp2, td); 689free3: 690 fdclose(fdp, fp1, sv[0], td); 691 fdrop(fp1, td); 692free2: 693 (void)soclose(so2); 694free1: 695 (void)soclose(so1); 696done2: 697 NET_UNLOCK_GIANT(); 698 return (error); 699} 700 701static int 702sendit(td, s, mp, flags) 703 register struct thread *td; 704 int s; 705 register struct msghdr *mp; 706 int flags; 707{ 708 struct mbuf *control; 709 struct sockaddr *to; 710 int error; 711 712 if (mp->msg_name != NULL) { 713 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 714 if (error) { 715 to = NULL; 716 goto bad; 717 } 718 mp->msg_name = to; 719 } else { 720 to = NULL; 721 } 722 723 if (mp->msg_control) { 724 if (mp->msg_controllen < sizeof(struct cmsghdr) 725#ifdef COMPAT_OLDSOCK 726 && mp->msg_flags != MSG_COMPAT 727#endif 728 ) { 729 error = EINVAL; 730 goto bad; 731 } 732 error = sockargs(&control, mp->msg_control, 733 mp->msg_controllen, MT_CONTROL); 734 if (error) 735 goto bad; 736#ifdef COMPAT_OLDSOCK 737 if (mp->msg_flags == MSG_COMPAT) { 738 register struct cmsghdr *cm; 739 740 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 741 if (control == 0) { 742 error = ENOBUFS; 743 goto bad; 744 } else { 745 cm = mtod(control, struct cmsghdr *); 746 cm->cmsg_len = control->m_len; 747 cm->cmsg_level = SOL_SOCKET; 748 cm->cmsg_type = SCM_RIGHTS; 749 } 750 } 751#endif 752 } else { 753 control = NULL; 754 } 755 756 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 757 758bad: 759 if (to) 760 FREE(to, M_SONAME); 761 return (error); 762} 763 764int 765kern_sendit(td, s, mp, flags, control, segflg) 766 struct thread *td; 767 int s; 768 struct msghdr *mp; 769 int flags; 770 struct mbuf *control; 771 enum uio_seg segflg; 772{ 773 struct file *fp; 774 struct uio auio; 775 struct iovec *iov; 776 struct socket *so; 777 int i; 778 int len, error; 779#ifdef KTRACE 780 struct uio *ktruio = NULL; 781#endif 782 783 NET_LOCK_GIANT(); 784 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 785 if (error) 786 goto bad2; 787 so = (struct socket *)fp->f_data; 788 789#ifdef MAC 790 SOCK_LOCK(so); 791 error = mac_check_socket_send(td->td_ucred, so); 792 SOCK_UNLOCK(so); 793 if (error) 794 goto bad; 795#endif 796 797 auio.uio_iov = mp->msg_iov; 798 auio.uio_iovcnt = mp->msg_iovlen; 799 auio.uio_segflg = segflg; 800 auio.uio_rw = UIO_WRITE; 801 auio.uio_td = td; 802 auio.uio_offset = 0; /* XXX */ 803 auio.uio_resid = 0; 804 iov = mp->msg_iov; 805 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 806 if ((auio.uio_resid += iov->iov_len) < 0) { 807 error = EINVAL; 808 goto bad; 809 } 810 } 811#ifdef KTRACE 812 if (KTRPOINT(td, KTR_GENIO)) 813 ktruio = cloneuio(&auio); 814#endif 815 len = auio.uio_resid; 816 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 817 if (error) { 818 if (auio.uio_resid != len && (error == ERESTART || 819 error == EINTR || error == EWOULDBLOCK)) 820 error = 0; 821 /* Generation of SIGPIPE can be controlled per socket */ 822 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 823 !(flags & MSG_NOSIGNAL)) { 824 PROC_LOCK(td->td_proc); 825 psignal(td->td_proc, SIGPIPE); 826 PROC_UNLOCK(td->td_proc); 827 } 828 } 829 if (error == 0) 830 td->td_retval[0] = len - auio.uio_resid; 831#ifdef KTRACE 832 if (ktruio != NULL) { 833 ktruio->uio_resid = td->td_retval[0]; 834 ktrgenio(s, UIO_WRITE, ktruio, error); 835 } 836#endif 837bad: 838 fdrop(fp, td); 839bad2: 840 NET_UNLOCK_GIANT(); 841 return (error); 842} 843 844/* 845 * MPSAFE 846 */ 847int 848sendto(td, uap) 849 struct thread *td; 850 register struct sendto_args /* { 851 int s; 852 caddr_t buf; 853 size_t len; 854 int flags; 855 caddr_t to; 856 int tolen; 857 } */ *uap; 858{ 859 struct msghdr msg; 860 struct iovec aiov; 861 int error; 862 863 msg.msg_name = uap->to; 864 msg.msg_namelen = uap->tolen; 865 msg.msg_iov = &aiov; 866 msg.msg_iovlen = 1; 867 msg.msg_control = 0; 868#ifdef COMPAT_OLDSOCK 869 msg.msg_flags = 0; 870#endif 871 aiov.iov_base = uap->buf; 872 aiov.iov_len = uap->len; 873 error = sendit(td, uap->s, &msg, uap->flags); 874 return (error); 875} 876 877#ifdef COMPAT_OLDSOCK 878/* 879 * MPSAFE 880 */ 881int 882osend(td, uap) 883 struct thread *td; 884 register struct osend_args /* { 885 int s; 886 caddr_t buf; 887 int len; 888 int flags; 889 } */ *uap; 890{ 891 struct msghdr msg; 892 struct iovec aiov; 893 int error; 894 895 msg.msg_name = 0; 896 msg.msg_namelen = 0; 897 msg.msg_iov = &aiov; 898 msg.msg_iovlen = 1; 899 aiov.iov_base = uap->buf; 900 aiov.iov_len = uap->len; 901 msg.msg_control = 0; 902 msg.msg_flags = 0; 903 error = sendit(td, uap->s, &msg, uap->flags); 904 return (error); 905} 906 907/* 908 * MPSAFE 909 */ 910int 911osendmsg(td, uap) 912 struct thread *td; 913 struct osendmsg_args /* { 914 int s; 915 caddr_t msg; 916 int flags; 917 } */ *uap; 918{ 919 struct msghdr msg; 920 struct iovec *iov; 921 int error; 922 923 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 924 if (error) 925 return (error); 926 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 927 if (error) 928 return (error); 929 msg.msg_iov = iov; 930 msg.msg_flags = MSG_COMPAT; 931 error = sendit(td, uap->s, &msg, uap->flags); 932 free(iov, M_IOV); 933 return (error); 934} 935#endif 936 937/* 938 * MPSAFE 939 */ 940int 941sendmsg(td, uap) 942 struct thread *td; 943 struct sendmsg_args /* { 944 int s; 945 caddr_t msg; 946 int flags; 947 } */ *uap; 948{ 949 struct msghdr msg; 950 struct iovec *iov; 951 int error; 952 953 error = copyin(uap->msg, &msg, sizeof (msg)); 954 if (error) 955 return (error); 956 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 957 if (error) 958 return (error); 959 msg.msg_iov = iov; 960#ifdef COMPAT_OLDSOCK 961 msg.msg_flags = 0; 962#endif 963 error = sendit(td, uap->s, &msg, uap->flags); 964 free(iov, M_IOV); 965 return (error); 966} 967 968int 969kern_recvit(td, s, mp, fromseg, controlp) 970 struct thread *td; 971 int s; 972 struct msghdr *mp; 973 enum uio_seg fromseg; 974 struct mbuf **controlp; 975{ 976 struct uio auio; 977 struct iovec *iov; 978 int i; 979 socklen_t len; 980 int error; 981 struct mbuf *m, *control = 0; 982 caddr_t ctlbuf; 983 struct file *fp; 984 struct socket *so; 985 struct sockaddr *fromsa = 0; 986#ifdef KTRACE 987 struct uio *ktruio = NULL; 988#endif 989 990 if(controlp != NULL) 991 *controlp = 0; 992 993 NET_LOCK_GIANT(); 994 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 995 if (error) { 996 NET_UNLOCK_GIANT(); 997 return (error); 998 } 999 so = fp->f_data; 1000 1001#ifdef MAC 1002 SOCK_LOCK(so); 1003 error = mac_check_socket_receive(td->td_ucred, so); 1004 SOCK_UNLOCK(so); 1005 if (error) { 1006 fdrop(fp, td); 1007 NET_UNLOCK_GIANT(); 1008 return (error); 1009 } 1010#endif 1011 1012 auio.uio_iov = mp->msg_iov; 1013 auio.uio_iovcnt = mp->msg_iovlen; 1014 auio.uio_segflg = UIO_USERSPACE; 1015 auio.uio_rw = UIO_READ; 1016 auio.uio_td = td; 1017 auio.uio_offset = 0; /* XXX */ 1018 auio.uio_resid = 0; 1019 iov = mp->msg_iov; 1020 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 1021 if ((auio.uio_resid += iov->iov_len) < 0) { 1022 fdrop(fp, td); 1023 NET_UNLOCK_GIANT(); 1024 return (EINVAL); 1025 } 1026 } 1027#ifdef KTRACE 1028 if (KTRPOINT(td, KTR_GENIO)) 1029 ktruio = cloneuio(&auio); 1030#endif 1031 len = auio.uio_resid; 1032 error = soreceive(so, &fromsa, &auio, (struct mbuf **)0, 1033 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 1034 &mp->msg_flags); 1035 if (error) { 1036 if (auio.uio_resid != (int)len && (error == ERESTART || 1037 error == EINTR || error == EWOULDBLOCK)) 1038 error = 0; 1039 } 1040#ifdef KTRACE 1041 if (ktruio != NULL) { 1042 ktruio->uio_resid = (int)len - auio.uio_resid; 1043 ktrgenio(s, UIO_READ, ktruio, error); 1044 } 1045#endif 1046 if (error) 1047 goto out; 1048 td->td_retval[0] = (int)len - auio.uio_resid; 1049 if (mp->msg_name) { 1050 len = mp->msg_namelen; 1051 if (len <= 0 || fromsa == 0) 1052 len = 0; 1053 else { 1054 /* save sa_len before it is destroyed by MSG_COMPAT */ 1055 len = MIN(len, fromsa->sa_len); 1056#ifdef COMPAT_OLDSOCK 1057 if (mp->msg_flags & MSG_COMPAT) 1058 ((struct osockaddr *)fromsa)->sa_family = 1059 fromsa->sa_family; 1060#endif 1061 if (fromseg == UIO_USERSPACE) { 1062 error = copyout(fromsa, mp->msg_name, 1063 (unsigned)len); 1064 if (error) 1065 goto out; 1066 } else 1067 bcopy(fromsa, mp->msg_name, len); 1068 } 1069 mp->msg_namelen = len; 1070 } 1071 if (mp->msg_control && controlp == NULL) { 1072#ifdef COMPAT_OLDSOCK 1073 /* 1074 * We assume that old recvmsg calls won't receive access 1075 * rights and other control info, esp. as control info 1076 * is always optional and those options didn't exist in 4.3. 1077 * If we receive rights, trim the cmsghdr; anything else 1078 * is tossed. 1079 */ 1080 if (control && mp->msg_flags & MSG_COMPAT) { 1081 if (mtod(control, struct cmsghdr *)->cmsg_level != 1082 SOL_SOCKET || 1083 mtod(control, struct cmsghdr *)->cmsg_type != 1084 SCM_RIGHTS) { 1085 mp->msg_controllen = 0; 1086 goto out; 1087 } 1088 control->m_len -= sizeof (struct cmsghdr); 1089 control->m_data += sizeof (struct cmsghdr); 1090 } 1091#endif 1092 len = mp->msg_controllen; 1093 m = control; 1094 mp->msg_controllen = 0; 1095 ctlbuf = mp->msg_control; 1096 1097 while (m && len > 0) { 1098 unsigned int tocopy; 1099 1100 if (len >= m->m_len) 1101 tocopy = m->m_len; 1102 else { 1103 mp->msg_flags |= MSG_CTRUNC; 1104 tocopy = len; 1105 } 1106 1107 if ((error = copyout(mtod(m, caddr_t), 1108 ctlbuf, tocopy)) != 0) 1109 goto out; 1110 1111 ctlbuf += tocopy; 1112 len -= tocopy; 1113 m = m->m_next; 1114 } 1115 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1116 } 1117out: 1118 fdrop(fp, td); 1119 NET_UNLOCK_GIANT(); 1120 if (fromsa) 1121 FREE(fromsa, M_SONAME); 1122 1123 if (error == 0 && controlp != NULL) 1124 *controlp = control; 1125 else if (control) 1126 m_freem(control); 1127 1128 return (error); 1129} 1130 1131static int 1132recvit(td, s, mp, namelenp) 1133 struct thread *td; 1134 int s; 1135 struct msghdr *mp; 1136 void *namelenp; 1137{ 1138 int error; 1139 1140 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1141 if (error) 1142 return (error); 1143 if (namelenp) { 1144 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1145#ifdef COMPAT_OLDSOCK 1146 if (mp->msg_flags & MSG_COMPAT) 1147 error = 0; /* old recvfrom didn't check */ 1148#endif 1149 } 1150 return (error); 1151} 1152 1153/* 1154 * MPSAFE 1155 */ 1156int 1157recvfrom(td, uap) 1158 struct thread *td; 1159 register struct recvfrom_args /* { 1160 int s; 1161 caddr_t buf; 1162 size_t len; 1163 int flags; 1164 struct sockaddr * __restrict from; 1165 socklen_t * __restrict fromlenaddr; 1166 } */ *uap; 1167{ 1168 struct msghdr msg; 1169 struct iovec aiov; 1170 int error; 1171 1172 if (uap->fromlenaddr) { 1173 error = copyin(uap->fromlenaddr, 1174 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1175 if (error) 1176 goto done2; 1177 } else { 1178 msg.msg_namelen = 0; 1179 } 1180 msg.msg_name = uap->from; 1181 msg.msg_iov = &aiov; 1182 msg.msg_iovlen = 1; 1183 aiov.iov_base = uap->buf; 1184 aiov.iov_len = uap->len; 1185 msg.msg_control = 0; 1186 msg.msg_flags = uap->flags; 1187 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1188done2: 1189 return(error); 1190} 1191 1192#ifdef COMPAT_OLDSOCK 1193/* 1194 * MPSAFE 1195 */ 1196int 1197orecvfrom(td, uap) 1198 struct thread *td; 1199 struct recvfrom_args *uap; 1200{ 1201 1202 uap->flags |= MSG_COMPAT; 1203 return (recvfrom(td, uap)); 1204} 1205#endif 1206 1207 1208#ifdef COMPAT_OLDSOCK 1209/* 1210 * MPSAFE 1211 */ 1212int 1213orecv(td, uap) 1214 struct thread *td; 1215 register struct orecv_args /* { 1216 int s; 1217 caddr_t buf; 1218 int len; 1219 int flags; 1220 } */ *uap; 1221{ 1222 struct msghdr msg; 1223 struct iovec aiov; 1224 int error; 1225 1226 msg.msg_name = 0; 1227 msg.msg_namelen = 0; 1228 msg.msg_iov = &aiov; 1229 msg.msg_iovlen = 1; 1230 aiov.iov_base = uap->buf; 1231 aiov.iov_len = uap->len; 1232 msg.msg_control = 0; 1233 msg.msg_flags = uap->flags; 1234 error = recvit(td, uap->s, &msg, NULL); 1235 return (error); 1236} 1237 1238/* 1239 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1240 * overlays the new one, missing only the flags, and with the (old) access 1241 * rights where the control fields are now. 1242 * 1243 * MPSAFE 1244 */ 1245int 1246orecvmsg(td, uap) 1247 struct thread *td; 1248 struct orecvmsg_args /* { 1249 int s; 1250 struct omsghdr *msg; 1251 int flags; 1252 } */ *uap; 1253{ 1254 struct msghdr msg; 1255 struct iovec *iov; 1256 int error; 1257 1258 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1259 if (error) 1260 return (error); 1261 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1262 if (error) 1263 return (error); 1264 msg.msg_flags = uap->flags | MSG_COMPAT; 1265 msg.msg_iov = iov; 1266 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1267 if (msg.msg_controllen && error == 0) 1268 error = copyout(&msg.msg_controllen, 1269 &uap->msg->msg_accrightslen, sizeof (int)); 1270 free(iov, M_IOV); 1271 return (error); 1272} 1273#endif 1274 1275/* 1276 * MPSAFE 1277 */ 1278int 1279recvmsg(td, uap) 1280 struct thread *td; 1281 struct recvmsg_args /* { 1282 int s; 1283 struct msghdr *msg; 1284 int flags; 1285 } */ *uap; 1286{ 1287 struct msghdr msg; 1288 struct iovec *uiov, *iov; 1289 int error; 1290 1291 error = copyin(uap->msg, &msg, sizeof (msg)); 1292 if (error) 1293 return (error); 1294 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1295 if (error) 1296 return (error); 1297 msg.msg_flags = uap->flags; 1298#ifdef COMPAT_OLDSOCK 1299 msg.msg_flags &= ~MSG_COMPAT; 1300#endif 1301 uiov = msg.msg_iov; 1302 msg.msg_iov = iov; 1303 error = recvit(td, uap->s, &msg, NULL); 1304 if (error == 0) { 1305 msg.msg_iov = uiov; 1306 error = copyout(&msg, uap->msg, sizeof(msg)); 1307 } 1308 free(iov, M_IOV); 1309 return (error); 1310} 1311 1312/* 1313 * MPSAFE 1314 */ 1315/* ARGSUSED */ 1316int 1317shutdown(td, uap) 1318 struct thread *td; 1319 register struct shutdown_args /* { 1320 int s; 1321 int how; 1322 } */ *uap; 1323{ 1324 struct socket *so; 1325 struct file *fp; 1326 int error; 1327 1328 NET_LOCK_GIANT(); 1329 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 1330 if (error == 0) { 1331 so = fp->f_data; 1332 error = soshutdown(so, uap->how); 1333 fdrop(fp, td); 1334 } 1335 NET_UNLOCK_GIANT(); 1336 return (error); 1337} 1338 1339/* 1340 * MPSAFE 1341 */ 1342/* ARGSUSED */ 1343int 1344setsockopt(td, uap) 1345 struct thread *td; 1346 register struct setsockopt_args /* { 1347 int s; 1348 int level; 1349 int name; 1350 caddr_t val; 1351 int valsize; 1352 } */ *uap; 1353{ 1354 1355 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1356 uap->val, UIO_USERSPACE, uap->valsize)); 1357} 1358 1359int 1360kern_setsockopt(td, s, level, name, val, valseg, valsize) 1361 struct thread *td; 1362 int s; 1363 int level; 1364 int name; 1365 void *val; 1366 enum uio_seg valseg; 1367 socklen_t valsize; 1368{ 1369 int error; 1370 struct socket *so; 1371 struct file *fp; 1372 struct sockopt sopt; 1373 1374 if (val == NULL && valsize != 0) 1375 return (EFAULT); 1376 if ((int)valsize < 0) 1377 return (EINVAL); 1378 1379 sopt.sopt_dir = SOPT_SET; 1380 sopt.sopt_level = level; 1381 sopt.sopt_name = name; 1382 sopt.sopt_val = val; 1383 sopt.sopt_valsize = valsize; 1384 switch (valseg) { 1385 case UIO_USERSPACE: 1386 sopt.sopt_td = td; 1387 break; 1388 case UIO_SYSSPACE: 1389 sopt.sopt_td = NULL; 1390 break; 1391 default: 1392 panic("kern_setsockopt called with bad valseg"); 1393 } 1394 1395 NET_LOCK_GIANT(); 1396 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1397 if (error == 0) { 1398 so = fp->f_data; 1399 error = sosetopt(so, &sopt); 1400 fdrop(fp, td); 1401 } 1402 NET_UNLOCK_GIANT(); 1403 return(error); 1404} 1405 1406/* 1407 * MPSAFE 1408 */ 1409/* ARGSUSED */ 1410int 1411getsockopt(td, uap) 1412 struct thread *td; 1413 register struct getsockopt_args /* { 1414 int s; 1415 int level; 1416 int name; 1417 void * __restrict val; 1418 socklen_t * __restrict avalsize; 1419 } */ *uap; 1420{ 1421 socklen_t valsize; 1422 int error; 1423 1424 if (uap->val) { 1425 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1426 if (error) 1427 return (error); 1428 } 1429 1430 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1431 uap->val, UIO_USERSPACE, &valsize); 1432 1433 if (error == 0) 1434 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1435 return (error); 1436} 1437 1438/* 1439 * Kernel version of getsockopt. 1440 * optval can be a userland or userspace. optlen is always a kernel pointer. 1441 */ 1442int 1443kern_getsockopt(td, s, level, name, val, valseg, valsize) 1444 struct thread *td; 1445 int s; 1446 int level; 1447 int name; 1448 void *val; 1449 enum uio_seg valseg; 1450 socklen_t *valsize; 1451{ 1452 int error; 1453 struct socket *so; 1454 struct file *fp; 1455 struct sockopt sopt; 1456 1457 if (val == NULL) 1458 *valsize = 0; 1459 if ((int)*valsize < 0) 1460 return (EINVAL); 1461 1462 sopt.sopt_dir = SOPT_GET; 1463 sopt.sopt_level = level; 1464 sopt.sopt_name = name; 1465 sopt.sopt_val = val; 1466 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1467 switch (valseg) { 1468 case UIO_USERSPACE: 1469 sopt.sopt_td = td; 1470 break; 1471 case UIO_SYSSPACE: 1472 sopt.sopt_td = NULL; 1473 break; 1474 default: 1475 panic("kern_getsockopt called with bad valseg"); 1476 } 1477 1478 NET_LOCK_GIANT(); 1479 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1480 if (error == 0) { 1481 so = fp->f_data; 1482 error = sogetopt(so, &sopt); 1483 *valsize = sopt.sopt_valsize; 1484 fdrop(fp, td); 1485 } 1486 NET_UNLOCK_GIANT(); 1487 return (error); 1488} 1489 1490/* 1491 * getsockname1() - Get socket name. 1492 * 1493 * MPSAFE 1494 */ 1495/* ARGSUSED */ 1496static int 1497getsockname1(td, uap, compat) 1498 struct thread *td; 1499 register struct getsockname_args /* { 1500 int fdes; 1501 struct sockaddr * __restrict asa; 1502 socklen_t * __restrict alen; 1503 } */ *uap; 1504 int compat; 1505{ 1506 struct sockaddr *sa; 1507 socklen_t len; 1508 int error; 1509 1510 error = copyin(uap->alen, &len, sizeof(len)); 1511 if (error) 1512 return (error); 1513 1514 error = kern_getsockname(td, uap->fdes, &sa, &len); 1515 if (error) 1516 return (error); 1517 1518 if (len != 0) { 1519#ifdef COMPAT_OLDSOCK 1520 if (compat) 1521 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1522#endif 1523 error = copyout(sa, uap->asa, (u_int)len); 1524 } 1525 free(sa, M_SONAME); 1526 if (error == 0) 1527 error = copyout(&len, uap->alen, sizeof(len)); 1528 return (error); 1529} 1530 1531int 1532kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1533 socklen_t *alen) 1534{ 1535 struct socket *so; 1536 struct file *fp; 1537 socklen_t len; 1538 int error; 1539 1540 if (*alen < 0) 1541 return (EINVAL); 1542 1543 NET_LOCK_GIANT(); 1544 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1545 if (error) 1546 goto done; 1547 so = fp->f_data; 1548 *sa = NULL; 1549 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1550 if (error) 1551 goto bad; 1552 if (*sa == NULL) 1553 len = 0; 1554 else 1555 len = MIN(*alen, (*sa)->sa_len); 1556 *alen = len; 1557bad: 1558 fdrop(fp, td); 1559 if (error && *sa) { 1560 free(*sa, M_SONAME); 1561 *sa = NULL; 1562 } 1563done: 1564 NET_UNLOCK_GIANT(); 1565 return (error); 1566} 1567 1568/* 1569 * MPSAFE 1570 */ 1571int 1572getsockname(td, uap) 1573 struct thread *td; 1574 struct getsockname_args *uap; 1575{ 1576 1577 return (getsockname1(td, uap, 0)); 1578} 1579 1580#ifdef COMPAT_OLDSOCK 1581/* 1582 * MPSAFE 1583 */ 1584int 1585ogetsockname(td, uap) 1586 struct thread *td; 1587 struct getsockname_args *uap; 1588{ 1589 1590 return (getsockname1(td, uap, 1)); 1591} 1592#endif /* COMPAT_OLDSOCK */ 1593 1594/* 1595 * getpeername1() - Get name of peer for connected socket. 1596 * 1597 * MPSAFE 1598 */ 1599/* ARGSUSED */ 1600static int 1601getpeername1(td, uap, compat) 1602 struct thread *td; 1603 register struct getpeername_args /* { 1604 int fdes; 1605 struct sockaddr * __restrict asa; 1606 socklen_t * __restrict alen; 1607 } */ *uap; 1608 int compat; 1609{ 1610 struct sockaddr *sa; 1611 socklen_t len; 1612 int error; 1613 1614 error = copyin(uap->alen, &len, sizeof (len)); 1615 if (error) 1616 return (error); 1617 1618 error = kern_getpeername(td, uap->fdes, &sa, &len); 1619 if (error) 1620 return (error); 1621 1622 if (len != 0) { 1623#ifdef COMPAT_OLDSOCK 1624 if (compat) 1625 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1626#endif 1627 error = copyout(sa, uap->asa, (u_int)len); 1628 } 1629 free(sa, M_SONAME); 1630 if (error == 0) 1631 error = copyout(&len, uap->alen, sizeof(len)); 1632 return (error); 1633} 1634 1635int 1636kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1637 socklen_t *alen) 1638{ 1639 struct socket *so; 1640 struct file *fp; 1641 socklen_t len; 1642 int error; 1643 1644 if (*alen < 0) 1645 return (EINVAL); 1646 1647 NET_LOCK_GIANT(); 1648 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 1649 if (error) 1650 goto done2; 1651 so = fp->f_data; 1652 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1653 error = ENOTCONN; 1654 goto done1; 1655 } 1656 *sa = NULL; 1657 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1658 if (error) 1659 goto bad; 1660 if (*sa == NULL) 1661 len = 0; 1662 else 1663 len = MIN(*alen, (*sa)->sa_len); 1664 *alen = len; 1665bad: 1666 if (error && *sa) { 1667 free(*sa, M_SONAME); 1668 *sa = NULL; 1669 } 1670done1: 1671 fdrop(fp, td); 1672done2: 1673 NET_UNLOCK_GIANT(); 1674 return (error); 1675} 1676 1677/* 1678 * MPSAFE 1679 */ 1680int 1681getpeername(td, uap) 1682 struct thread *td; 1683 struct getpeername_args *uap; 1684{ 1685 1686 return (getpeername1(td, uap, 0)); 1687} 1688 1689#ifdef COMPAT_OLDSOCK 1690/* 1691 * MPSAFE 1692 */ 1693int 1694ogetpeername(td, uap) 1695 struct thread *td; 1696 struct ogetpeername_args *uap; 1697{ 1698 1699 /* XXX uap should have type `getpeername_args *' to begin with. */ 1700 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1701} 1702#endif /* COMPAT_OLDSOCK */ 1703 1704int 1705sockargs(mp, buf, buflen, type) 1706 struct mbuf **mp; 1707 caddr_t buf; 1708 int buflen, type; 1709{ 1710 register struct sockaddr *sa; 1711 register struct mbuf *m; 1712 int error; 1713 1714 if ((u_int)buflen > MLEN) { 1715#ifdef COMPAT_OLDSOCK 1716 if (type == MT_SONAME && (u_int)buflen <= 112) 1717 buflen = MLEN; /* unix domain compat. hack */ 1718 else 1719#endif 1720 if ((u_int)buflen > MCLBYTES) 1721 return (EINVAL); 1722 } 1723 m = m_get(M_TRYWAIT, type); 1724 if (m == NULL) 1725 return (ENOBUFS); 1726 if ((u_int)buflen > MLEN) { 1727 MCLGET(m, M_TRYWAIT); 1728 if ((m->m_flags & M_EXT) == 0) { 1729 m_free(m); 1730 return (ENOBUFS); 1731 } 1732 } 1733 m->m_len = buflen; 1734 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1735 if (error) 1736 (void) m_free(m); 1737 else { 1738 *mp = m; 1739 if (type == MT_SONAME) { 1740 sa = mtod(m, struct sockaddr *); 1741 1742#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1743 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1744 sa->sa_family = sa->sa_len; 1745#endif 1746 sa->sa_len = buflen; 1747 } 1748 } 1749 return (error); 1750} 1751 1752int 1753getsockaddr(namp, uaddr, len) 1754 struct sockaddr **namp; 1755 caddr_t uaddr; 1756 size_t len; 1757{ 1758 struct sockaddr *sa; 1759 int error; 1760 1761 if (len > SOCK_MAXADDRLEN) 1762 return (ENAMETOOLONG); 1763 if (len < offsetof(struct sockaddr, sa_data[0])) 1764 return (EINVAL); 1765 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1766 error = copyin(uaddr, sa, len); 1767 if (error) { 1768 FREE(sa, M_SONAME); 1769 } else { 1770#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1771 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1772 sa->sa_family = sa->sa_len; 1773#endif 1774 sa->sa_len = len; 1775 *namp = sa; 1776 } 1777 return (error); 1778} 1779 1780/* 1781 * Detach mapped page and release resources back to the system. 1782 */ 1783void 1784sf_buf_mext(void *addr, void *args) 1785{ 1786 vm_page_t m; 1787 1788 m = sf_buf_page(args); 1789 sf_buf_free(args); 1790 vm_page_lock_queues(); 1791 vm_page_unwire(m, 0); 1792 /* 1793 * Check for the object going away on us. This can 1794 * happen since we don't hold a reference to it. 1795 * If so, we're responsible for freeing the page. 1796 */ 1797 if (m->wire_count == 0 && m->object == NULL) 1798 vm_page_free(m); 1799 vm_page_unlock_queues(); 1800} 1801 1802/* 1803 * sendfile(2) 1804 * 1805 * MPSAFE 1806 * 1807 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1808 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1809 * 1810 * Send a file specified by 'fd' and starting at 'offset' to a socket 1811 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1812 * nbytes == 0. Optionally add a header and/or trailer to the socket 1813 * output. If specified, write the total number of bytes sent into *sbytes. 1814 * 1815 */ 1816int 1817sendfile(struct thread *td, struct sendfile_args *uap) 1818{ 1819 1820 return (do_sendfile(td, uap, 0)); 1821} 1822 1823static int 1824do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1825{ 1826 struct sf_hdtr hdtr; 1827 struct uio *hdr_uio, *trl_uio; 1828 int error; 1829 1830 hdr_uio = trl_uio = NULL; 1831 1832 if (uap->hdtr != NULL) { 1833 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1834 if (error) 1835 goto out; 1836 if (hdtr.headers != NULL) { 1837 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1838 if (error) 1839 goto out; 1840 } 1841 if (hdtr.trailers != NULL) { 1842 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1843 if (error) 1844 goto out; 1845 1846 } 1847 } 1848 1849 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1850out: 1851 if (hdr_uio) 1852 free(hdr_uio, M_IOV); 1853 if (trl_uio) 1854 free(trl_uio, M_IOV); 1855 return (error); 1856} 1857 1858#ifdef COMPAT_FREEBSD4 1859int 1860freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1861{ 1862 struct sendfile_args args; 1863 1864 args.fd = uap->fd; 1865 args.s = uap->s; 1866 args.offset = uap->offset; 1867 args.nbytes = uap->nbytes; 1868 args.hdtr = uap->hdtr; 1869 args.sbytes = uap->sbytes; 1870 args.flags = uap->flags; 1871 1872 return (do_sendfile(td, &args, 1)); 1873} 1874#endif /* COMPAT_FREEBSD4 */ 1875 1876int 1877kern_sendfile(struct thread *td, struct sendfile_args *uap, 1878 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1879{ 1880 struct file *sock_fp; 1881 struct vnode *vp; 1882 struct vm_object *obj = NULL; 1883 struct socket *so = NULL; 1884 struct mbuf *m, *m_header = NULL; 1885 struct sf_buf *sf; 1886 struct vm_page *pg; 1887 off_t off, xfsize, hdtr_size, sbytes = 0; 1888 int error, headersize = 0, headersent = 0; 1889 int vfslocked; 1890 1891 NET_LOCK_GIANT(); 1892 1893 hdtr_size = 0; 1894 1895 /* 1896 * The descriptor must be a regular file and have a backing VM object. 1897 */ 1898 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1899 goto done; 1900 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1901 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1902 obj = vp->v_object; 1903 if (obj != NULL) { 1904 /* 1905 * Temporarily increase the backing VM object's reference 1906 * count so that a forced reclamation of its vnode does not 1907 * immediately destroy it. 1908 */ 1909 VM_OBJECT_LOCK(obj); 1910 if ((obj->flags & OBJ_DEAD) == 0) { 1911 vm_object_reference_locked(obj); 1912 VM_OBJECT_UNLOCK(obj); 1913 } else { 1914 VM_OBJECT_UNLOCK(obj); 1915 obj = NULL; 1916 } 1917 } 1918 VOP_UNLOCK(vp, 0, td); 1919 VFS_UNLOCK_GIANT(vfslocked); 1920 if (obj == NULL) { 1921 error = EINVAL; 1922 goto done; 1923 } 1924 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, NULL)) != 0) 1925 goto done; 1926 so = sock_fp->f_data; 1927 if (so->so_type != SOCK_STREAM) { 1928 error = EINVAL; 1929 goto done; 1930 } 1931 if ((so->so_state & SS_ISCONNECTED) == 0) { 1932 error = ENOTCONN; 1933 goto done; 1934 } 1935 if (uap->offset < 0) { 1936 error = EINVAL; 1937 goto done; 1938 } 1939 1940#ifdef MAC 1941 SOCK_LOCK(so); 1942 error = mac_check_socket_send(td->td_ucred, so); 1943 SOCK_UNLOCK(so); 1944 if (error) 1945 goto done; 1946#endif 1947 1948 /* 1949 * If specified, get the pointer to the sf_hdtr struct for 1950 * any headers/trailers. 1951 */ 1952 if (hdr_uio != NULL) { 1953 hdr_uio->uio_td = td; 1954 hdr_uio->uio_rw = UIO_WRITE; 1955 if (hdr_uio->uio_resid > 0) { 1956 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0); 1957 if (m_header == NULL) 1958 goto done; 1959 headersize = m_header->m_pkthdr.len; 1960 if (compat) 1961 sbytes += headersize; 1962 } 1963 } 1964 1965 /* 1966 * Protect against multiple writers to the socket. 1967 */ 1968 SOCKBUF_LOCK(&so->so_snd); 1969 (void) sblock(&so->so_snd, M_WAITOK); 1970 SOCKBUF_UNLOCK(&so->so_snd); 1971 1972 /* 1973 * Loop through the pages in the file, starting with the requested 1974 * offset. Get a file page (do I/O if necessary), map the file page 1975 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1976 * it on the socket. 1977 */ 1978 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1979 vm_pindex_t pindex; 1980 vm_offset_t pgoff; 1981 1982 pindex = OFF_TO_IDX(off); 1983 VM_OBJECT_LOCK(obj); 1984retry_lookup: 1985 /* 1986 * Calculate the amount to transfer. Not to exceed a page, 1987 * the EOF, or the passed in nbytes. 1988 */ 1989 xfsize = obj->un_pager.vnp.vnp_size - off; 1990 VM_OBJECT_UNLOCK(obj); 1991 if (xfsize > PAGE_SIZE) 1992 xfsize = PAGE_SIZE; 1993 pgoff = (vm_offset_t)(off & PAGE_MASK); 1994 if (PAGE_SIZE - pgoff < xfsize) 1995 xfsize = PAGE_SIZE - pgoff; 1996 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1997 xfsize = uap->nbytes - sbytes; 1998 if (xfsize <= 0) { 1999 if (m_header != NULL) { 2000 m = m_header; 2001 m_header = NULL; 2002 SOCKBUF_LOCK(&so->so_snd); 2003 goto retry_space; 2004 } else 2005 break; 2006 } 2007 /* 2008 * Optimize the non-blocking case by looking at the socket space 2009 * before going to the extra work of constituting the sf_buf. 2010 */ 2011 SOCKBUF_LOCK(&so->so_snd); 2012 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 2013 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 2014 error = EPIPE; 2015 else 2016 error = EAGAIN; 2017 sbunlock(&so->so_snd); 2018 SOCKBUF_UNLOCK(&so->so_snd); 2019 goto done; 2020 } 2021 SOCKBUF_UNLOCK(&so->so_snd); 2022 VM_OBJECT_LOCK(obj); 2023 /* 2024 * Attempt to look up the page. 2025 * 2026 * Allocate if not found 2027 * 2028 * Wait and loop if busy. 2029 */ 2030 pg = vm_page_lookup(obj, pindex); 2031 2032 if (pg == NULL) { 2033 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | 2034 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 2035 if (pg == NULL) { 2036 VM_OBJECT_UNLOCK(obj); 2037 VM_WAIT; 2038 VM_OBJECT_LOCK(obj); 2039 goto retry_lookup; 2040 } 2041 } else if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 2042 goto retry_lookup; 2043 else { 2044 /* 2045 * Wire the page so it does not get ripped out from 2046 * under us. 2047 */ 2048 vm_page_lock_queues(); 2049 vm_page_wire(pg); 2050 vm_page_unlock_queues(); 2051 } 2052 2053 /* 2054 * If page is not valid for what we need, initiate I/O 2055 */ 2056 2057 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 2058 VM_OBJECT_UNLOCK(obj); 2059 } else if (uap->flags & SF_NODISKIO) { 2060 error = EBUSY; 2061 } else { 2062 int bsize, resid; 2063 2064 /* 2065 * Ensure that our page is still around when the I/O 2066 * completes. 2067 */ 2068 vm_page_io_start(pg); 2069 VM_OBJECT_UNLOCK(obj); 2070 2071 /* 2072 * Get the page from backing store. 2073 */ 2074 bsize = vp->v_mount->mnt_stat.f_iosize; 2075 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2076 vn_lock(vp, LK_SHARED | LK_RETRY, td); 2077 /* 2078 * XXXMAC: Because we don't have fp->f_cred here, 2079 * we pass in NOCRED. This is probably wrong, but 2080 * is consistent with our original implementation. 2081 */ 2082 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2083 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2084 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2085 td->td_ucred, NOCRED, &resid, td); 2086 VOP_UNLOCK(vp, 0, td); 2087 VFS_UNLOCK_GIANT(vfslocked); 2088 VM_OBJECT_LOCK(obj); 2089 vm_page_io_finish(pg); 2090 if (!error) 2091 VM_OBJECT_UNLOCK(obj); 2092 mbstat.sf_iocnt++; 2093 } 2094 2095 if (error) { 2096 vm_page_lock_queues(); 2097 vm_page_unwire(pg, 0); 2098 /* 2099 * See if anyone else might know about this page. 2100 * If not and it is not valid, then free it. 2101 */ 2102 if (pg->wire_count == 0 && pg->valid == 0 && 2103 pg->busy == 0 && !(pg->flags & PG_BUSY) && 2104 pg->hold_count == 0) { 2105 vm_page_free(pg); 2106 } 2107 vm_page_unlock_queues(); 2108 VM_OBJECT_UNLOCK(obj); 2109 SOCKBUF_LOCK(&so->so_snd); 2110 sbunlock(&so->so_snd); 2111 SOCKBUF_UNLOCK(&so->so_snd); 2112 goto done; 2113 } 2114 2115 /* 2116 * Get a sendfile buf. We usually wait as long as necessary, 2117 * but this wait can be interrupted. 2118 */ 2119 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { 2120 mbstat.sf_allocfail++; 2121 vm_page_lock_queues(); 2122 vm_page_unwire(pg, 0); 2123 if (pg->wire_count == 0 && pg->object == NULL) 2124 vm_page_free(pg); 2125 vm_page_unlock_queues(); 2126 SOCKBUF_LOCK(&so->so_snd); 2127 sbunlock(&so->so_snd); 2128 SOCKBUF_UNLOCK(&so->so_snd); 2129 error = EINTR; 2130 goto done; 2131 } 2132 2133 /* 2134 * Get an mbuf header and set it up as having external storage. 2135 */ 2136 if (m_header) 2137 MGET(m, M_TRYWAIT, MT_DATA); 2138 else 2139 MGETHDR(m, M_TRYWAIT, MT_DATA); 2140 if (m == NULL) { 2141 error = ENOBUFS; 2142 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2143 SOCKBUF_LOCK(&so->so_snd); 2144 sbunlock(&so->so_snd); 2145 SOCKBUF_UNLOCK(&so->so_snd); 2146 goto done; 2147 } 2148 /* 2149 * Setup external storage for mbuf. 2150 */ 2151 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 2152 EXT_SFBUF); 2153 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 2154 m->m_pkthdr.len = m->m_len = xfsize; 2155 2156 if (m_header) { 2157 m_cat(m_header, m); 2158 m = m_header; 2159 m_header = NULL; 2160 m_fixhdr(m); 2161 } 2162 2163 /* 2164 * Add the buffer to the socket buffer chain. 2165 */ 2166 SOCKBUF_LOCK(&so->so_snd); 2167retry_space: 2168 /* 2169 * Make sure that the socket is still able to take more data. 2170 * CANTSENDMORE being true usually means that the connection 2171 * was closed. so_error is true when an error was sensed after 2172 * a previous send. 2173 * The state is checked after the page mapping and buffer 2174 * allocation above since those operations may block and make 2175 * any socket checks stale. From this point forward, nothing 2176 * blocks before the pru_send (or more accurately, any blocking 2177 * results in a loop back to here to re-check). 2178 */ 2179 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2180 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 2181 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2182 error = EPIPE; 2183 } else { 2184 error = so->so_error; 2185 so->so_error = 0; 2186 } 2187 m_freem(m); 2188 sbunlock(&so->so_snd); 2189 SOCKBUF_UNLOCK(&so->so_snd); 2190 goto done; 2191 } 2192 /* 2193 * Wait for socket space to become available. We do this just 2194 * after checking the connection state above in order to avoid 2195 * a race condition with sbwait(). 2196 */ 2197 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2198 if (so->so_state & SS_NBIO) { 2199 m_freem(m); 2200 sbunlock(&so->so_snd); 2201 SOCKBUF_UNLOCK(&so->so_snd); 2202 error = EAGAIN; 2203 goto done; 2204 } 2205 error = sbwait(&so->so_snd); 2206 /* 2207 * An error from sbwait usually indicates that we've 2208 * been interrupted by a signal. If we've sent anything 2209 * then return bytes sent, otherwise return the error. 2210 */ 2211 if (error) { 2212 m_freem(m); 2213 sbunlock(&so->so_snd); 2214 SOCKBUF_UNLOCK(&so->so_snd); 2215 goto done; 2216 } 2217 goto retry_space; 2218 } 2219 SOCKBUF_UNLOCK(&so->so_snd); 2220 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2221 if (error) { 2222 SOCKBUF_LOCK(&so->so_snd); 2223 sbunlock(&so->so_snd); 2224 SOCKBUF_UNLOCK(&so->so_snd); 2225 goto done; 2226 } 2227 headersent = 1; 2228 } 2229 SOCKBUF_LOCK(&so->so_snd); 2230 sbunlock(&so->so_snd); 2231 SOCKBUF_UNLOCK(&so->so_snd); 2232 2233 /* 2234 * Send trailers. Wimp out and use writev(2). 2235 */ 2236 if (trl_uio != NULL) { 2237 error = kern_writev(td, uap->s, trl_uio); 2238 if (error) 2239 goto done; 2240 if (compat) 2241 sbytes += td->td_retval[0]; 2242 else 2243 hdtr_size += td->td_retval[0]; 2244 } 2245 2246done: 2247 if (headersent) { 2248 if (!compat) 2249 hdtr_size += headersize; 2250 } else { 2251 if (compat) 2252 sbytes -= headersize; 2253 } 2254 /* 2255 * If there was no error we have to clear td->td_retval[0] 2256 * because it may have been set by writev. 2257 */ 2258 if (error == 0) { 2259 td->td_retval[0] = 0; 2260 } 2261 if (uap->sbytes != NULL) { 2262 if (!compat) 2263 sbytes += hdtr_size; 2264 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2265 } 2266 if (obj != NULL) 2267 vm_object_deallocate(obj); 2268 if (vp != NULL) { 2269 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2270 vrele(vp); 2271 VFS_UNLOCK_GIANT(vfslocked); 2272 } 2273 if (so) 2274 fdrop(sock_fp, td); 2275 if (m_header) 2276 m_freem(m_header); 2277 2278 NET_UNLOCK_GIANT(); 2279 2280 if (error == ERESTART) 2281 error = EINTR; 2282 2283 return (error); 2284} 2285