kern_sendfile.c revision 127788
1/* 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 37 */ 38 39#include <sys/cdefs.h> 40__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 127788 2004-04-03 09:16:27Z alc $"); 41 42#include "opt_compat.h" 43#include "opt_ktrace.h" 44#include "opt_mac.h" 45 46#include <sys/param.h> 47#include <sys/systm.h> 48#include <sys/kernel.h> 49#include <sys/lock.h> 50#include <sys/mac.h> 51#include <sys/mutex.h> 52#include <sys/sysproto.h> 53#include <sys/malloc.h> 54#include <sys/filedesc.h> 55#include <sys/event.h> 56#include <sys/proc.h> 57#include <sys/fcntl.h> 58#include <sys/file.h> 59#include <sys/filio.h> 60#include <sys/mount.h> 61#include <sys/mbuf.h> 62#include <sys/protosw.h> 63#include <sys/sf_buf.h> 64#include <sys/socket.h> 65#include <sys/socketvar.h> 66#include <sys/signalvar.h> 67#include <sys/syscallsubr.h> 68#include <sys/uio.h> 69#include <sys/vnode.h> 70#ifdef KTRACE 71#include <sys/ktrace.h> 72#endif 73 74#include <vm/vm.h> 75#include <vm/vm_object.h> 76#include <vm/vm_page.h> 77#include <vm/vm_pageout.h> 78#include <vm/vm_kern.h> 79#include <vm/vm_extern.h> 80 81static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 82static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 83 84static int accept1(struct thread *td, struct accept_args *uap, int compat); 85static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 86static int getsockname1(struct thread *td, struct getsockname_args *uap, 87 int compat); 88static int getpeername1(struct thread *td, struct getpeername_args *uap, 89 int compat); 90 91/* 92 * System call interface to the socket abstraction. 93 */ 94#if defined(COMPAT_43) || defined(COMPAT_SUNOS) 95#define COMPAT_OLDSOCK 96#endif 97 98/* 99 * MPSAFE 100 */ 101int 102socket(td, uap) 103 struct thread *td; 104 register struct socket_args /* { 105 int domain; 106 int type; 107 int protocol; 108 } */ *uap; 109{ 110 struct filedesc *fdp; 111 struct socket *so; 112 struct file *fp; 113 int fd, error; 114 115 fdp = td->td_proc->p_fd; 116 error = falloc(td, &fp, &fd); 117 if (error) 118 return (error); 119 /* An extra reference on `fp' has been held for us by falloc(). */ 120 NET_LOCK_GIANT(); 121 error = socreate(uap->domain, &so, uap->type, uap->protocol, 122 td->td_ucred, td); 123 NET_UNLOCK_GIANT(); 124 FILEDESC_LOCK(fdp); 125 if (error) { 126 if (fdp->fd_ofiles[fd] == fp) { 127 fdp->fd_ofiles[fd] = NULL; 128 fdunused(fdp, fd); 129 FILEDESC_UNLOCK(fdp); 130 fdrop(fp, td); 131 } else { 132 FILEDESC_UNLOCK(fdp); 133 } 134 } else { 135 fp->f_data = so; /* already has ref count */ 136 fp->f_flag = FREAD|FWRITE; 137 fp->f_ops = &socketops; 138 fp->f_type = DTYPE_SOCKET; 139 FILEDESC_UNLOCK(fdp); 140 td->td_retval[0] = fd; 141 } 142 fdrop(fp, td); 143 return (error); 144} 145 146/* 147 * MPSAFE 148 */ 149/* ARGSUSED */ 150int 151bind(td, uap) 152 struct thread *td; 153 register struct bind_args /* { 154 int s; 155 caddr_t name; 156 int namelen; 157 } */ *uap; 158{ 159 struct sockaddr *sa; 160 int error; 161 162 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 163 return (error); 164 165 return (kern_bind(td, uap->s, sa)); 166} 167 168int 169kern_bind(td, fd, sa) 170 struct thread *td; 171 int fd; 172 struct sockaddr *sa; 173{ 174 struct socket *so; 175 int error; 176 177 NET_LOCK_GIANT(); 178 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 179 goto done2; 180#ifdef MAC 181 error = mac_check_socket_bind(td->td_ucred, so, sa); 182 if (error) 183 goto done1; 184#endif 185 error = sobind(so, sa, td); 186#ifdef MAC 187done1: 188#endif 189 fputsock(so); 190done2: 191 NET_UNLOCK_GIANT(); 192 FREE(sa, M_SONAME); 193 return (error); 194} 195 196/* 197 * MPSAFE 198 */ 199/* ARGSUSED */ 200int 201listen(td, uap) 202 struct thread *td; 203 register struct listen_args /* { 204 int s; 205 int backlog; 206 } */ *uap; 207{ 208 struct socket *so; 209 int error; 210 211 NET_LOCK_GIANT(); 212 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 213#ifdef MAC 214 error = mac_check_socket_listen(td->td_ucred, so); 215 if (error) 216 goto done; 217#endif 218 error = solisten(so, uap->backlog, td); 219#ifdef MAC 220done: 221#endif 222 fputsock(so); 223 } 224 NET_UNLOCK_GIANT(); 225 return(error); 226} 227 228/* 229 * accept1() 230 * MPSAFE 231 */ 232static int 233accept1(td, uap, compat) 234 struct thread *td; 235 register struct accept_args /* { 236 int s; 237 struct sockaddr * __restrict name; 238 socklen_t * __restrict anamelen; 239 } */ *uap; 240 int compat; 241{ 242 struct filedesc *fdp; 243 struct file *nfp = NULL; 244 struct sockaddr *sa; 245 socklen_t namelen; 246 int error, s; 247 struct socket *head, *so; 248 int fd; 249 u_int fflag; 250 pid_t pgid; 251 int tmp; 252 253 fdp = td->td_proc->p_fd; 254 if (uap->name) { 255 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 256 if(error) 257 goto done3; 258 if (namelen < 0) { 259 error = EINVAL; 260 goto done3; 261 } 262 } 263 NET_LOCK_GIANT(); 264 error = fgetsock(td, uap->s, &head, &fflag); 265 if (error) 266 goto done2; 267 s = splnet(); 268 if ((head->so_options & SO_ACCEPTCONN) == 0) { 269 splx(s); 270 error = EINVAL; 271 goto done; 272 } 273 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 274 if (head->so_state & SS_CANTRCVMORE) { 275 head->so_error = ECONNABORTED; 276 break; 277 } 278 if ((head->so_state & SS_NBIO) != 0) { 279 head->so_error = EWOULDBLOCK; 280 break; 281 } 282 error = tsleep(&head->so_timeo, PSOCK | PCATCH, 283 "accept", 0); 284 if (error) { 285 splx(s); 286 goto done; 287 } 288 } 289 if (head->so_error) { 290 error = head->so_error; 291 head->so_error = 0; 292 splx(s); 293 goto done; 294 } 295 296 /* 297 * At this point we know that there is at least one connection 298 * ready to be accepted. Remove it from the queue prior to 299 * allocating the file descriptor for it since falloc() may 300 * block allowing another process to accept the connection 301 * instead. 302 */ 303 so = TAILQ_FIRST(&head->so_comp); 304 TAILQ_REMOVE(&head->so_comp, so, so_list); 305 head->so_qlen--; 306 307 error = falloc(td, &nfp, &fd); 308 if (error) { 309 /* 310 * Probably ran out of file descriptors. Put the 311 * unaccepted connection back onto the queue and 312 * do another wakeup so some other process might 313 * have a chance at it. 314 */ 315 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list); 316 head->so_qlen++; 317 wakeup_one(&head->so_timeo); 318 splx(s); 319 goto done; 320 } 321 /* An extra reference on `nfp' has been held for us by falloc(). */ 322 td->td_retval[0] = fd; 323 324 /* connection has been removed from the listen queue */ 325 KNOTE(&head->so_rcv.sb_sel.si_note, 0); 326 327 so->so_state &= ~SS_COMP; 328 so->so_head = NULL; 329 pgid = fgetown(&head->so_sigio); 330 if (pgid != 0) 331 fsetown(pgid, &so->so_sigio); 332 333 FILE_LOCK(nfp); 334 soref(so); /* file descriptor reference */ 335 nfp->f_data = so; /* nfp has ref count from falloc */ 336 nfp->f_flag = fflag; 337 nfp->f_ops = &socketops; 338 nfp->f_type = DTYPE_SOCKET; 339 FILE_UNLOCK(nfp); 340 /* Sync socket nonblocking/async state with file flags */ 341 tmp = fflag & FNONBLOCK; 342 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 343 tmp = fflag & FASYNC; 344 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 345 sa = 0; 346 error = soaccept(so, &sa); 347 if (error) { 348 /* 349 * return a namelen of zero for older code which might 350 * ignore the return value from accept. 351 */ 352 if (uap->name != NULL) { 353 namelen = 0; 354 (void) copyout(&namelen, 355 uap->anamelen, sizeof(*uap->anamelen)); 356 } 357 goto noconnection; 358 } 359 if (sa == NULL) { 360 namelen = 0; 361 if (uap->name) 362 goto gotnoname; 363 splx(s); 364 error = 0; 365 goto done; 366 } 367 if (uap->name) { 368 /* check sa_len before it is destroyed */ 369 if (namelen > sa->sa_len) 370 namelen = sa->sa_len; 371#ifdef COMPAT_OLDSOCK 372 if (compat) 373 ((struct osockaddr *)sa)->sa_family = 374 sa->sa_family; 375#endif 376 error = copyout(sa, uap->name, (u_int)namelen); 377 if (!error) 378gotnoname: 379 error = copyout(&namelen, 380 uap->anamelen, sizeof (*uap->anamelen)); 381 } 382noconnection: 383 if (sa) 384 FREE(sa, M_SONAME); 385 386 /* 387 * close the new descriptor, assuming someone hasn't ripped it 388 * out from under us. 389 */ 390 if (error) { 391 FILEDESC_LOCK(fdp); 392 if (fdp->fd_ofiles[fd] == nfp) { 393 fdp->fd_ofiles[fd] = NULL; 394 fdunused(fdp, fd); 395 FILEDESC_UNLOCK(fdp); 396 fdrop(nfp, td); 397 } else { 398 FILEDESC_UNLOCK(fdp); 399 } 400 } 401 splx(s); 402 403 /* 404 * Release explicitly held references before returning. 405 */ 406done: 407 if (nfp != NULL) 408 fdrop(nfp, td); 409 fputsock(head); 410done2: 411 NET_UNLOCK_GIANT(); 412done3: 413 return (error); 414} 415 416/* 417 * MPSAFE (accept1() is MPSAFE) 418 */ 419int 420accept(td, uap) 421 struct thread *td; 422 struct accept_args *uap; 423{ 424 425 return (accept1(td, uap, 0)); 426} 427 428#ifdef COMPAT_OLDSOCK 429/* 430 * MPSAFE (accept1() is MPSAFE) 431 */ 432int 433oaccept(td, uap) 434 struct thread *td; 435 struct accept_args *uap; 436{ 437 438 return (accept1(td, uap, 1)); 439} 440#endif /* COMPAT_OLDSOCK */ 441 442/* 443 * MPSAFE 444 */ 445/* ARGSUSED */ 446int 447connect(td, uap) 448 struct thread *td; 449 register struct connect_args /* { 450 int s; 451 caddr_t name; 452 int namelen; 453 } */ *uap; 454{ 455 struct sockaddr *sa; 456 int error; 457 458 error = getsockaddr(&sa, uap->name, uap->namelen); 459 if (error) 460 return (error); 461 462 return (kern_connect(td, uap->s, sa)); 463} 464 465 466int 467kern_connect(td, fd, sa) 468 struct thread *td; 469 int fd; 470 struct sockaddr *sa; 471{ 472 struct socket *so; 473 int error, s; 474 int interrupted = 0; 475 476 NET_LOCK_GIANT(); 477 if ((error = fgetsock(td, fd, &so, NULL)) != 0) 478 goto done2; 479 if (so->so_state & SS_ISCONNECTING) { 480 error = EALREADY; 481 goto done1; 482 } 483#ifdef MAC 484 error = mac_check_socket_connect(td->td_ucred, so, sa); 485 if (error) 486 goto bad; 487#endif 488 error = soconnect(so, sa, td); 489 if (error) 490 goto bad; 491 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 492 error = EINPROGRESS; 493 goto done1; 494 } 495 s = splnet(); 496 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 497 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0); 498 if (error) { 499 if (error == EINTR || error == ERESTART) 500 interrupted = 1; 501 break; 502 } 503 } 504 if (error == 0) { 505 error = so->so_error; 506 so->so_error = 0; 507 } 508 splx(s); 509bad: 510 if (!interrupted) 511 so->so_state &= ~SS_ISCONNECTING; 512 if (error == ERESTART) 513 error = EINTR; 514done1: 515 fputsock(so); 516done2: 517 NET_UNLOCK_GIANT(); 518 FREE(sa, M_SONAME); 519 return (error); 520} 521 522/* 523 * MPSAFE 524 */ 525int 526socketpair(td, uap) 527 struct thread *td; 528 register struct socketpair_args /* { 529 int domain; 530 int type; 531 int protocol; 532 int *rsv; 533 } */ *uap; 534{ 535 register struct filedesc *fdp = td->td_proc->p_fd; 536 struct file *fp1, *fp2; 537 struct socket *so1, *so2; 538 int fd, error, sv[2]; 539 540 NET_LOCK_GIANT(); 541 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 542 td->td_ucred, td); 543 if (error) 544 goto done2; 545 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 546 td->td_ucred, td); 547 if (error) 548 goto free1; 549 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 550 error = falloc(td, &fp1, &fd); 551 if (error) 552 goto free2; 553 sv[0] = fd; 554 fp1->f_data = so1; /* so1 already has ref count */ 555 error = falloc(td, &fp2, &fd); 556 if (error) 557 goto free3; 558 fp2->f_data = so2; /* so2 already has ref count */ 559 sv[1] = fd; 560 error = soconnect2(so1, so2); 561 if (error) 562 goto free4; 563 if (uap->type == SOCK_DGRAM) { 564 /* 565 * Datagram socket connection is asymmetric. 566 */ 567 error = soconnect2(so2, so1); 568 if (error) 569 goto free4; 570 } 571 FILE_LOCK(fp1); 572 fp1->f_flag = FREAD|FWRITE; 573 fp1->f_ops = &socketops; 574 fp1->f_type = DTYPE_SOCKET; 575 FILE_UNLOCK(fp1); 576 FILE_LOCK(fp2); 577 fp2->f_flag = FREAD|FWRITE; 578 fp2->f_ops = &socketops; 579 fp2->f_type = DTYPE_SOCKET; 580 FILE_UNLOCK(fp2); 581 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 582 fdrop(fp1, td); 583 fdrop(fp2, td); 584 goto done2; 585free4: 586 FILEDESC_LOCK(fdp); 587 if (fdp->fd_ofiles[sv[1]] == fp2) { 588 fdp->fd_ofiles[sv[1]] = NULL; 589 fdunused(fdp, sv[1]); 590 FILEDESC_UNLOCK(fdp); 591 fdrop(fp2, td); 592 } else { 593 FILEDESC_UNLOCK(fdp); 594 } 595 fdrop(fp2, td); 596free3: 597 FILEDESC_LOCK(fdp); 598 if (fdp->fd_ofiles[sv[0]] == fp1) { 599 fdp->fd_ofiles[sv[0]] = NULL; 600 fdunused(fdp, sv[0]); 601 FILEDESC_UNLOCK(fdp); 602 fdrop(fp1, td); 603 } else { 604 FILEDESC_UNLOCK(fdp); 605 } 606 fdrop(fp1, td); 607free2: 608 (void)soclose(so2); 609free1: 610 (void)soclose(so1); 611done2: 612 NET_UNLOCK_GIANT(); 613 return (error); 614} 615 616static int 617sendit(td, s, mp, flags) 618 register struct thread *td; 619 int s; 620 register struct msghdr *mp; 621 int flags; 622{ 623 struct mbuf *control; 624 struct sockaddr *to; 625 int error; 626 627 if (mp->msg_name != NULL) { 628 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 629 if (error) { 630 to = NULL; 631 goto bad; 632 } 633 mp->msg_name = to; 634 } else { 635 to = NULL; 636 } 637 638 if (mp->msg_control) { 639 if (mp->msg_controllen < sizeof(struct cmsghdr) 640#ifdef COMPAT_OLDSOCK 641 && mp->msg_flags != MSG_COMPAT 642#endif 643 ) { 644 error = EINVAL; 645 goto bad; 646 } 647 error = sockargs(&control, mp->msg_control, 648 mp->msg_controllen, MT_CONTROL); 649 if (error) 650 goto bad; 651#ifdef COMPAT_OLDSOCK 652 if (mp->msg_flags == MSG_COMPAT) { 653 register struct cmsghdr *cm; 654 655 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 656 if (control == 0) { 657 error = ENOBUFS; 658 goto bad; 659 } else { 660 cm = mtod(control, struct cmsghdr *); 661 cm->cmsg_len = control->m_len; 662 cm->cmsg_level = SOL_SOCKET; 663 cm->cmsg_type = SCM_RIGHTS; 664 } 665 } 666#endif 667 } else { 668 control = NULL; 669 } 670 671 error = kern_sendit(td, s, mp, flags, control); 672 673bad: 674 if (to) 675 FREE(to, M_SONAME); 676 return (error); 677} 678 679int 680kern_sendit(td, s, mp, flags, control) 681 struct thread *td; 682 int s; 683 struct msghdr *mp; 684 int flags; 685 struct mbuf *control; 686{ 687 struct uio auio; 688 struct iovec *iov; 689 struct socket *so; 690 int i; 691 int len, error; 692#ifdef KTRACE 693 struct iovec *ktriov = NULL; 694 struct uio ktruio; 695 int iovlen; 696#endif 697 698 NET_LOCK_GIANT(); 699 if ((error = fgetsock(td, s, &so, NULL)) != 0) 700 goto bad2; 701 702#ifdef MAC 703 error = mac_check_socket_send(td->td_ucred, so); 704 if (error) 705 goto bad; 706#endif 707 708 auio.uio_iov = mp->msg_iov; 709 auio.uio_iovcnt = mp->msg_iovlen; 710 auio.uio_segflg = UIO_USERSPACE; 711 auio.uio_rw = UIO_WRITE; 712 auio.uio_td = td; 713 auio.uio_offset = 0; /* XXX */ 714 auio.uio_resid = 0; 715 iov = mp->msg_iov; 716 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 717 if ((auio.uio_resid += iov->iov_len) < 0) { 718 error = EINVAL; 719 goto bad; 720 } 721 } 722#ifdef KTRACE 723 if (KTRPOINT(td, KTR_GENIO)) { 724 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 725 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 726 bcopy(auio.uio_iov, ktriov, iovlen); 727 ktruio = auio; 728 } 729#endif 730 len = auio.uio_resid; 731 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 732 0, control, flags, td); 733 if (error) { 734 if (auio.uio_resid != len && (error == ERESTART || 735 error == EINTR || error == EWOULDBLOCK)) 736 error = 0; 737 /* Generation of SIGPIPE can be controlled per socket */ 738 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) { 739 PROC_LOCK(td->td_proc); 740 psignal(td->td_proc, SIGPIPE); 741 PROC_UNLOCK(td->td_proc); 742 } 743 } 744 if (error == 0) 745 td->td_retval[0] = len - auio.uio_resid; 746#ifdef KTRACE 747 if (ktriov != NULL) { 748 if (error == 0) { 749 ktruio.uio_iov = ktriov; 750 ktruio.uio_resid = td->td_retval[0]; 751 ktrgenio(s, UIO_WRITE, &ktruio, error); 752 } 753 FREE(ktriov, M_TEMP); 754 } 755#endif 756bad: 757 fputsock(so); 758bad2: 759 NET_UNLOCK_GIANT(); 760 return (error); 761} 762 763/* 764 * MPSAFE 765 */ 766int 767sendto(td, uap) 768 struct thread *td; 769 register struct sendto_args /* { 770 int s; 771 caddr_t buf; 772 size_t len; 773 int flags; 774 caddr_t to; 775 int tolen; 776 } */ *uap; 777{ 778 struct msghdr msg; 779 struct iovec aiov; 780 int error; 781 782 msg.msg_name = uap->to; 783 msg.msg_namelen = uap->tolen; 784 msg.msg_iov = &aiov; 785 msg.msg_iovlen = 1; 786 msg.msg_control = 0; 787#ifdef COMPAT_OLDSOCK 788 msg.msg_flags = 0; 789#endif 790 aiov.iov_base = uap->buf; 791 aiov.iov_len = uap->len; 792 error = sendit(td, uap->s, &msg, uap->flags); 793 return (error); 794} 795 796#ifdef COMPAT_OLDSOCK 797/* 798 * MPSAFE 799 */ 800int 801osend(td, uap) 802 struct thread *td; 803 register struct osend_args /* { 804 int s; 805 caddr_t buf; 806 int len; 807 int flags; 808 } */ *uap; 809{ 810 struct msghdr msg; 811 struct iovec aiov; 812 int error; 813 814 msg.msg_name = 0; 815 msg.msg_namelen = 0; 816 msg.msg_iov = &aiov; 817 msg.msg_iovlen = 1; 818 aiov.iov_base = uap->buf; 819 aiov.iov_len = uap->len; 820 msg.msg_control = 0; 821 msg.msg_flags = 0; 822 error = sendit(td, uap->s, &msg, uap->flags); 823 return (error); 824} 825 826/* 827 * MPSAFE 828 */ 829int 830osendmsg(td, uap) 831 struct thread *td; 832 register struct osendmsg_args /* { 833 int s; 834 caddr_t msg; 835 int flags; 836 } */ *uap; 837{ 838 struct msghdr msg; 839 struct iovec aiov[UIO_SMALLIOV], *iov; 840 int error; 841 842 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 843 if (error) 844 goto done2; 845 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 846 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 847 error = EMSGSIZE; 848 goto done2; 849 } 850 MALLOC(iov, struct iovec *, 851 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 852 M_WAITOK); 853 } else { 854 iov = aiov; 855 } 856 error = copyin(msg.msg_iov, iov, 857 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 858 if (error) 859 goto done; 860 msg.msg_flags = MSG_COMPAT; 861 msg.msg_iov = iov; 862 error = sendit(td, uap->s, &msg, uap->flags); 863done: 864 if (iov != aiov) 865 FREE(iov, M_IOV); 866done2: 867 return (error); 868} 869#endif 870 871/* 872 * MPSAFE 873 */ 874int 875sendmsg(td, uap) 876 struct thread *td; 877 register struct sendmsg_args /* { 878 int s; 879 caddr_t msg; 880 int flags; 881 } */ *uap; 882{ 883 struct msghdr msg; 884 struct iovec aiov[UIO_SMALLIOV], *iov; 885 int error; 886 887 error = copyin(uap->msg, &msg, sizeof (msg)); 888 if (error) 889 goto done2; 890 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 891 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 892 error = EMSGSIZE; 893 goto done2; 894 } 895 MALLOC(iov, struct iovec *, 896 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 897 M_WAITOK); 898 } else { 899 iov = aiov; 900 } 901 if (msg.msg_iovlen && 902 (error = copyin(msg.msg_iov, iov, 903 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))))) 904 goto done; 905 msg.msg_iov = iov; 906#ifdef COMPAT_OLDSOCK 907 msg.msg_flags = 0; 908#endif 909 error = sendit(td, uap->s, &msg, uap->flags); 910done: 911 if (iov != aiov) 912 FREE(iov, M_IOV); 913done2: 914 return (error); 915} 916 917static int 918recvit(td, s, mp, namelenp) 919 register struct thread *td; 920 int s; 921 register struct msghdr *mp; 922 void *namelenp; 923{ 924 struct uio auio; 925 register struct iovec *iov; 926 register int i; 927 socklen_t len; 928 int error; 929 struct mbuf *m, *control = 0; 930 caddr_t ctlbuf; 931 struct socket *so; 932 struct sockaddr *fromsa = 0; 933#ifdef KTRACE 934 struct iovec *ktriov = NULL; 935 struct uio ktruio; 936 int iovlen; 937#endif 938 939 NET_LOCK_GIANT(); 940 if ((error = fgetsock(td, s, &so, NULL)) != 0) { 941 NET_UNLOCK_GIANT(); 942 return (error); 943 } 944 945#ifdef MAC 946 error = mac_check_socket_receive(td->td_ucred, so); 947 if (error) { 948 fputsock(so); 949 NET_UNLOCK_GIANT(); 950 return (error); 951 } 952#endif 953 954 auio.uio_iov = mp->msg_iov; 955 auio.uio_iovcnt = mp->msg_iovlen; 956 auio.uio_segflg = UIO_USERSPACE; 957 auio.uio_rw = UIO_READ; 958 auio.uio_td = td; 959 auio.uio_offset = 0; /* XXX */ 960 auio.uio_resid = 0; 961 iov = mp->msg_iov; 962 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 963 if ((auio.uio_resid += iov->iov_len) < 0) { 964 fputsock(so); 965 NET_UNLOCK_GIANT(); 966 return (EINVAL); 967 } 968 } 969#ifdef KTRACE 970 if (KTRPOINT(td, KTR_GENIO)) { 971 iovlen = auio.uio_iovcnt * sizeof (struct iovec); 972 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 973 bcopy(auio.uio_iov, ktriov, iovlen); 974 ktruio = auio; 975 } 976#endif 977 len = auio.uio_resid; 978 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 979 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 980 &mp->msg_flags); 981 if (error) { 982 if (auio.uio_resid != (int)len && (error == ERESTART || 983 error == EINTR || error == EWOULDBLOCK)) 984 error = 0; 985 } 986#ifdef KTRACE 987 if (ktriov != NULL) { 988 if (error == 0) { 989 ktruio.uio_iov = ktriov; 990 ktruio.uio_resid = (int)len - auio.uio_resid; 991 ktrgenio(s, UIO_READ, &ktruio, error); 992 } 993 FREE(ktriov, M_TEMP); 994 } 995#endif 996 if (error) 997 goto out; 998 td->td_retval[0] = (int)len - auio.uio_resid; 999 if (mp->msg_name) { 1000 len = mp->msg_namelen; 1001 if (len <= 0 || fromsa == 0) 1002 len = 0; 1003 else { 1004 /* save sa_len before it is destroyed by MSG_COMPAT */ 1005 len = MIN(len, fromsa->sa_len); 1006#ifdef COMPAT_OLDSOCK 1007 if (mp->msg_flags & MSG_COMPAT) 1008 ((struct osockaddr *)fromsa)->sa_family = 1009 fromsa->sa_family; 1010#endif 1011 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1012 if (error) 1013 goto out; 1014 } 1015 mp->msg_namelen = len; 1016 if (namelenp && 1017 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 1018#ifdef COMPAT_OLDSOCK 1019 if (mp->msg_flags & MSG_COMPAT) 1020 error = 0; /* old recvfrom didn't check */ 1021 else 1022#endif 1023 goto out; 1024 } 1025 } 1026 if (mp->msg_control) { 1027#ifdef COMPAT_OLDSOCK 1028 /* 1029 * We assume that old recvmsg calls won't receive access 1030 * rights and other control info, esp. as control info 1031 * is always optional and those options didn't exist in 4.3. 1032 * If we receive rights, trim the cmsghdr; anything else 1033 * is tossed. 1034 */ 1035 if (control && mp->msg_flags & MSG_COMPAT) { 1036 if (mtod(control, struct cmsghdr *)->cmsg_level != 1037 SOL_SOCKET || 1038 mtod(control, struct cmsghdr *)->cmsg_type != 1039 SCM_RIGHTS) { 1040 mp->msg_controllen = 0; 1041 goto out; 1042 } 1043 control->m_len -= sizeof (struct cmsghdr); 1044 control->m_data += sizeof (struct cmsghdr); 1045 } 1046#endif 1047 len = mp->msg_controllen; 1048 m = control; 1049 mp->msg_controllen = 0; 1050 ctlbuf = mp->msg_control; 1051 1052 while (m && len > 0) { 1053 unsigned int tocopy; 1054 1055 if (len >= m->m_len) 1056 tocopy = m->m_len; 1057 else { 1058 mp->msg_flags |= MSG_CTRUNC; 1059 tocopy = len; 1060 } 1061 1062 if ((error = copyout(mtod(m, caddr_t), 1063 ctlbuf, tocopy)) != 0) 1064 goto out; 1065 1066 ctlbuf += tocopy; 1067 len -= tocopy; 1068 m = m->m_next; 1069 } 1070 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1071 } 1072out: 1073 fputsock(so); 1074 NET_UNLOCK_GIANT(); 1075 if (fromsa) 1076 FREE(fromsa, M_SONAME); 1077 if (control) 1078 m_freem(control); 1079 return (error); 1080} 1081 1082/* 1083 * MPSAFE 1084 */ 1085int 1086recvfrom(td, uap) 1087 struct thread *td; 1088 register struct recvfrom_args /* { 1089 int s; 1090 caddr_t buf; 1091 size_t len; 1092 int flags; 1093 struct sockaddr * __restrict from; 1094 socklen_t * __restrict fromlenaddr; 1095 } */ *uap; 1096{ 1097 struct msghdr msg; 1098 struct iovec aiov; 1099 int error; 1100 1101 if (uap->fromlenaddr) { 1102 error = copyin(uap->fromlenaddr, 1103 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1104 if (error) 1105 goto done2; 1106 } else { 1107 msg.msg_namelen = 0; 1108 } 1109 msg.msg_name = uap->from; 1110 msg.msg_iov = &aiov; 1111 msg.msg_iovlen = 1; 1112 aiov.iov_base = uap->buf; 1113 aiov.iov_len = uap->len; 1114 msg.msg_control = 0; 1115 msg.msg_flags = uap->flags; 1116 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1117done2: 1118 return(error); 1119} 1120 1121#ifdef COMPAT_OLDSOCK 1122/* 1123 * MPSAFE 1124 */ 1125int 1126orecvfrom(td, uap) 1127 struct thread *td; 1128 struct recvfrom_args *uap; 1129{ 1130 1131 uap->flags |= MSG_COMPAT; 1132 return (recvfrom(td, uap)); 1133} 1134#endif 1135 1136 1137#ifdef COMPAT_OLDSOCK 1138/* 1139 * MPSAFE 1140 */ 1141int 1142orecv(td, uap) 1143 struct thread *td; 1144 register struct orecv_args /* { 1145 int s; 1146 caddr_t buf; 1147 int len; 1148 int flags; 1149 } */ *uap; 1150{ 1151 struct msghdr msg; 1152 struct iovec aiov; 1153 int error; 1154 1155 msg.msg_name = 0; 1156 msg.msg_namelen = 0; 1157 msg.msg_iov = &aiov; 1158 msg.msg_iovlen = 1; 1159 aiov.iov_base = uap->buf; 1160 aiov.iov_len = uap->len; 1161 msg.msg_control = 0; 1162 msg.msg_flags = uap->flags; 1163 error = recvit(td, uap->s, &msg, NULL); 1164 return (error); 1165} 1166 1167/* 1168 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1169 * overlays the new one, missing only the flags, and with the (old) access 1170 * rights where the control fields are now. 1171 * 1172 * MPSAFE 1173 */ 1174int 1175orecvmsg(td, uap) 1176 struct thread *td; 1177 register struct orecvmsg_args /* { 1178 int s; 1179 struct omsghdr *msg; 1180 int flags; 1181 } */ *uap; 1182{ 1183 struct msghdr msg; 1184 struct iovec aiov[UIO_SMALLIOV], *iov; 1185 int error; 1186 1187 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1188 if (error) 1189 return (error); 1190 1191 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1192 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1193 error = EMSGSIZE; 1194 goto done2; 1195 } 1196 MALLOC(iov, struct iovec *, 1197 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1198 M_WAITOK); 1199 } else { 1200 iov = aiov; 1201 } 1202 msg.msg_flags = uap->flags | MSG_COMPAT; 1203 error = copyin(msg.msg_iov, iov, 1204 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1205 if (error) 1206 goto done; 1207 msg.msg_iov = iov; 1208 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1209 1210 if (msg.msg_controllen && error == 0) 1211 error = copyout(&msg.msg_controllen, 1212 &uap->msg->msg_accrightslen, sizeof (int)); 1213done: 1214 if (iov != aiov) 1215 FREE(iov, M_IOV); 1216done2: 1217 return (error); 1218} 1219#endif 1220 1221/* 1222 * MPSAFE 1223 */ 1224int 1225recvmsg(td, uap) 1226 struct thread *td; 1227 register struct recvmsg_args /* { 1228 int s; 1229 struct msghdr *msg; 1230 int flags; 1231 } */ *uap; 1232{ 1233 struct msghdr msg; 1234 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov; 1235 register int error; 1236 1237 error = copyin(uap->msg, &msg, sizeof (msg)); 1238 if (error) 1239 goto done2; 1240 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) { 1241 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) { 1242 error = EMSGSIZE; 1243 goto done2; 1244 } 1245 MALLOC(iov, struct iovec *, 1246 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV, 1247 M_WAITOK); 1248 } else { 1249 iov = aiov; 1250 } 1251#ifdef COMPAT_OLDSOCK 1252 msg.msg_flags = uap->flags &~ MSG_COMPAT; 1253#else 1254 msg.msg_flags = uap->flags; 1255#endif 1256 uiov = msg.msg_iov; 1257 msg.msg_iov = iov; 1258 error = copyin(uiov, iov, 1259 (unsigned)(msg.msg_iovlen * sizeof (struct iovec))); 1260 if (error) 1261 goto done; 1262 error = recvit(td, uap->s, &msg, NULL); 1263 if (!error) { 1264 msg.msg_iov = uiov; 1265 error = copyout(&msg, uap->msg, sizeof(msg)); 1266 } 1267done: 1268 if (iov != aiov) 1269 FREE(iov, M_IOV); 1270done2: 1271 return (error); 1272} 1273 1274/* 1275 * MPSAFE 1276 */ 1277/* ARGSUSED */ 1278int 1279shutdown(td, uap) 1280 struct thread *td; 1281 register struct shutdown_args /* { 1282 int s; 1283 int how; 1284 } */ *uap; 1285{ 1286 struct socket *so; 1287 int error; 1288 1289 NET_LOCK_GIANT(); 1290 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1291 error = soshutdown(so, uap->how); 1292 fputsock(so); 1293 } 1294 NET_UNLOCK_GIANT(); 1295 return(error); 1296} 1297 1298/* 1299 * MPSAFE 1300 */ 1301/* ARGSUSED */ 1302int 1303setsockopt(td, uap) 1304 struct thread *td; 1305 register struct setsockopt_args /* { 1306 int s; 1307 int level; 1308 int name; 1309 caddr_t val; 1310 int valsize; 1311 } */ *uap; 1312{ 1313 struct socket *so; 1314 struct sockopt sopt; 1315 int error; 1316 1317 if (uap->val == 0 && uap->valsize != 0) 1318 return (EFAULT); 1319 if (uap->valsize < 0) 1320 return (EINVAL); 1321 1322 NET_LOCK_GIANT(); 1323 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) { 1324 sopt.sopt_dir = SOPT_SET; 1325 sopt.sopt_level = uap->level; 1326 sopt.sopt_name = uap->name; 1327 sopt.sopt_val = uap->val; 1328 sopt.sopt_valsize = uap->valsize; 1329 sopt.sopt_td = td; 1330 error = sosetopt(so, &sopt); 1331 fputsock(so); 1332 } 1333 NET_UNLOCK_GIANT(); 1334 return(error); 1335} 1336 1337/* 1338 * MPSAFE 1339 */ 1340/* ARGSUSED */ 1341int 1342getsockopt(td, uap) 1343 struct thread *td; 1344 register struct getsockopt_args /* { 1345 int s; 1346 int level; 1347 int name; 1348 void * __restrict val; 1349 socklen_t * __restrict avalsize; 1350 } */ *uap; 1351{ 1352 socklen_t valsize; 1353 int error; 1354 struct socket *so; 1355 struct sockopt sopt; 1356 1357 NET_LOCK_GIANT(); 1358 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1359 goto done2; 1360 if (uap->val) { 1361 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1362 if (error) 1363 goto done1; 1364 if (valsize < 0) { 1365 error = EINVAL; 1366 goto done1; 1367 } 1368 } else { 1369 valsize = 0; 1370 } 1371 1372 sopt.sopt_dir = SOPT_GET; 1373 sopt.sopt_level = uap->level; 1374 sopt.sopt_name = uap->name; 1375 sopt.sopt_val = uap->val; 1376 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1377 sopt.sopt_td = td; 1378 1379 error = sogetopt(so, &sopt); 1380 if (error == 0) { 1381 valsize = sopt.sopt_valsize; 1382 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1383 } 1384done1: 1385 fputsock(so); 1386done2: 1387 NET_UNLOCK_GIANT(); 1388 return (error); 1389} 1390 1391/* 1392 * getsockname1() - Get socket name. 1393 * 1394 * MPSAFE 1395 */ 1396/* ARGSUSED */ 1397static int 1398getsockname1(td, uap, compat) 1399 struct thread *td; 1400 register struct getsockname_args /* { 1401 int fdes; 1402 struct sockaddr * __restrict asa; 1403 socklen_t * __restrict alen; 1404 } */ *uap; 1405 int compat; 1406{ 1407 struct socket *so; 1408 struct sockaddr *sa; 1409 socklen_t len; 1410 int error; 1411 1412 NET_LOCK_GIANT(); 1413 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1414 goto done2; 1415 error = copyin(uap->alen, &len, sizeof (len)); 1416 if (error) 1417 goto done1; 1418 if (len < 0) { 1419 error = EINVAL; 1420 goto done1; 1421 } 1422 sa = 0; 1423 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1424 if (error) 1425 goto bad; 1426 if (sa == 0) { 1427 len = 0; 1428 goto gotnothing; 1429 } 1430 1431 len = MIN(len, sa->sa_len); 1432#ifdef COMPAT_OLDSOCK 1433 if (compat) 1434 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1435#endif 1436 error = copyout(sa, uap->asa, (u_int)len); 1437 if (error == 0) 1438gotnothing: 1439 error = copyout(&len, uap->alen, sizeof (len)); 1440bad: 1441 if (sa) 1442 FREE(sa, M_SONAME); 1443done1: 1444 fputsock(so); 1445done2: 1446 NET_UNLOCK_GIANT(); 1447 return (error); 1448} 1449 1450/* 1451 * MPSAFE 1452 */ 1453int 1454getsockname(td, uap) 1455 struct thread *td; 1456 struct getsockname_args *uap; 1457{ 1458 1459 return (getsockname1(td, uap, 0)); 1460} 1461 1462#ifdef COMPAT_OLDSOCK 1463/* 1464 * MPSAFE 1465 */ 1466int 1467ogetsockname(td, uap) 1468 struct thread *td; 1469 struct getsockname_args *uap; 1470{ 1471 1472 return (getsockname1(td, uap, 1)); 1473} 1474#endif /* COMPAT_OLDSOCK */ 1475 1476/* 1477 * getpeername1() - Get name of peer for connected socket. 1478 * 1479 * MPSAFE 1480 */ 1481/* ARGSUSED */ 1482static int 1483getpeername1(td, uap, compat) 1484 struct thread *td; 1485 register struct getpeername_args /* { 1486 int fdes; 1487 struct sockaddr * __restrict asa; 1488 socklen_t * __restrict alen; 1489 } */ *uap; 1490 int compat; 1491{ 1492 struct socket *so; 1493 struct sockaddr *sa; 1494 socklen_t len; 1495 int error; 1496 1497 NET_LOCK_GIANT(); 1498 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0) 1499 goto done2; 1500 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1501 error = ENOTCONN; 1502 goto done1; 1503 } 1504 error = copyin(uap->alen, &len, sizeof (len)); 1505 if (error) 1506 goto done1; 1507 if (len < 0) { 1508 error = EINVAL; 1509 goto done1; 1510 } 1511 sa = 0; 1512 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1513 if (error) 1514 goto bad; 1515 if (sa == 0) { 1516 len = 0; 1517 goto gotnothing; 1518 } 1519 len = MIN(len, sa->sa_len); 1520#ifdef COMPAT_OLDSOCK 1521 if (compat) 1522 ((struct osockaddr *)sa)->sa_family = 1523 sa->sa_family; 1524#endif 1525 error = copyout(sa, uap->asa, (u_int)len); 1526 if (error) 1527 goto bad; 1528gotnothing: 1529 error = copyout(&len, uap->alen, sizeof (len)); 1530bad: 1531 if (sa) 1532 FREE(sa, M_SONAME); 1533done1: 1534 fputsock(so); 1535done2: 1536 NET_UNLOCK_GIANT(); 1537 return (error); 1538} 1539 1540/* 1541 * MPSAFE 1542 */ 1543int 1544getpeername(td, uap) 1545 struct thread *td; 1546 struct getpeername_args *uap; 1547{ 1548 1549 return (getpeername1(td, uap, 0)); 1550} 1551 1552#ifdef COMPAT_OLDSOCK 1553/* 1554 * MPSAFE 1555 */ 1556int 1557ogetpeername(td, uap) 1558 struct thread *td; 1559 struct ogetpeername_args *uap; 1560{ 1561 1562 /* XXX uap should have type `getpeername_args *' to begin with. */ 1563 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1564} 1565#endif /* COMPAT_OLDSOCK */ 1566 1567int 1568sockargs(mp, buf, buflen, type) 1569 struct mbuf **mp; 1570 caddr_t buf; 1571 int buflen, type; 1572{ 1573 register struct sockaddr *sa; 1574 register struct mbuf *m; 1575 int error; 1576 1577 if ((u_int)buflen > MLEN) { 1578#ifdef COMPAT_OLDSOCK 1579 if (type == MT_SONAME && (u_int)buflen <= 112) 1580 buflen = MLEN; /* unix domain compat. hack */ 1581 else 1582#endif 1583 return (EINVAL); 1584 } 1585 m = m_get(M_TRYWAIT, type); 1586 if (m == NULL) 1587 return (ENOBUFS); 1588 m->m_len = buflen; 1589 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1590 if (error) 1591 (void) m_free(m); 1592 else { 1593 *mp = m; 1594 if (type == MT_SONAME) { 1595 sa = mtod(m, struct sockaddr *); 1596 1597#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1598 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1599 sa->sa_family = sa->sa_len; 1600#endif 1601 sa->sa_len = buflen; 1602 } 1603 } 1604 return (error); 1605} 1606 1607int 1608getsockaddr(namp, uaddr, len) 1609 struct sockaddr **namp; 1610 caddr_t uaddr; 1611 size_t len; 1612{ 1613 struct sockaddr *sa; 1614 int error; 1615 1616 if (len > SOCK_MAXADDRLEN) 1617 return (ENAMETOOLONG); 1618 if (len < offsetof(struct sockaddr, sa_data[0])) 1619 return (EINVAL); 1620 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1621 error = copyin(uaddr, sa, len); 1622 if (error) { 1623 FREE(sa, M_SONAME); 1624 } else { 1625#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1626 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1627 sa->sa_family = sa->sa_len; 1628#endif 1629 sa->sa_len = len; 1630 *namp = sa; 1631 } 1632 return (error); 1633} 1634 1635/* 1636 * Detatch mapped page and release resources back to the system. 1637 */ 1638void 1639sf_buf_mext(void *addr, void *args) 1640{ 1641 vm_page_t m; 1642 1643 m = sf_buf_page(args); 1644 sf_buf_free(args); 1645 vm_page_lock_queues(); 1646 vm_page_unwire(m, 0); 1647 /* 1648 * Check for the object going away on us. This can 1649 * happen since we don't hold a reference to it. 1650 * If so, we're responsible for freeing the page. 1651 */ 1652 if (m->wire_count == 0 && m->object == NULL) 1653 vm_page_free(m); 1654 vm_page_unlock_queues(); 1655} 1656 1657/* 1658 * sendfile(2) 1659 * 1660 * MPSAFE 1661 * 1662 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1663 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1664 * 1665 * Send a file specified by 'fd' and starting at 'offset' to a socket 1666 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1667 * nbytes == 0. Optionally add a header and/or trailer to the socket 1668 * output. If specified, write the total number of bytes sent into *sbytes. 1669 * 1670 */ 1671int 1672sendfile(struct thread *td, struct sendfile_args *uap) 1673{ 1674 1675 return (do_sendfile(td, uap, 0)); 1676} 1677 1678#ifdef COMPAT_FREEBSD4 1679int 1680freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1681{ 1682 struct sendfile_args args; 1683 1684 args.fd = uap->fd; 1685 args.s = uap->s; 1686 args.offset = uap->offset; 1687 args.nbytes = uap->nbytes; 1688 args.hdtr = uap->hdtr; 1689 args.sbytes = uap->sbytes; 1690 args.flags = uap->flags; 1691 1692 return (do_sendfile(td, &args, 1)); 1693} 1694#endif /* COMPAT_FREEBSD4 */ 1695 1696static int 1697do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1698{ 1699 struct vnode *vp; 1700 struct vm_object *obj; 1701 struct socket *so = NULL; 1702 struct mbuf *m, *m_header = NULL; 1703 struct sf_buf *sf; 1704 struct vm_page *pg; 1705 struct writev_args nuap; 1706 struct sf_hdtr hdtr; 1707 struct uio hdr_uio; 1708 off_t off, xfsize, hdtr_size, sbytes = 0; 1709 int error, s, headersize = 0, headersent = 0; 1710 struct iovec *hdr_iov = NULL; 1711 1712 NET_LOCK_GIANT(); 1713 1714 hdtr_size = 0; 1715 1716 /* 1717 * The descriptor must be a regular file and have a backing VM object. 1718 */ 1719 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1720 goto done; 1721 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1722 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) { 1723 error = EINVAL; 1724 VOP_UNLOCK(vp, 0, td); 1725 goto done; 1726 } 1727 VOP_UNLOCK(vp, 0, td); 1728 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0) 1729 goto done; 1730 if (so->so_type != SOCK_STREAM) { 1731 error = EINVAL; 1732 goto done; 1733 } 1734 if ((so->so_state & SS_ISCONNECTED) == 0) { 1735 error = ENOTCONN; 1736 goto done; 1737 } 1738 if (uap->offset < 0) { 1739 error = EINVAL; 1740 goto done; 1741 } 1742 1743#ifdef MAC 1744 error = mac_check_socket_send(td->td_ucred, so); 1745 if (error) 1746 goto done; 1747#endif 1748 1749 /* 1750 * If specified, get the pointer to the sf_hdtr struct for 1751 * any headers/trailers. 1752 */ 1753 if (uap->hdtr != NULL) { 1754 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1755 if (error) 1756 goto done; 1757 /* 1758 * Send any headers. 1759 */ 1760 if (hdtr.headers != NULL) { 1761 hdr_uio.uio_td = td; 1762 hdr_uio.uio_rw = UIO_WRITE; 1763 error = uiofromiov(hdtr.headers, hdtr.hdr_cnt, 1764 &hdr_uio); 1765 if (error) 1766 goto done; 1767 /* Cache hdr_iov, m_uiotombuf may change it. */ 1768 hdr_iov = hdr_uio.uio_iov; 1769 if (hdr_uio.uio_resid > 0) { 1770 m_header = m_uiotombuf(&hdr_uio, M_DONTWAIT, 0); 1771 if (m_header == NULL) 1772 goto done; 1773 headersize = m_header->m_pkthdr.len; 1774 if (compat) 1775 sbytes += headersize; 1776 } 1777 } 1778 } 1779 1780 /* 1781 * Protect against multiple writers to the socket. 1782 */ 1783 (void) sblock(&so->so_snd, M_WAITOK); 1784 1785 /* 1786 * Loop through the pages in the file, starting with the requested 1787 * offset. Get a file page (do I/O if necessary), map the file page 1788 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1789 * it on the socket. 1790 */ 1791 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1792 vm_pindex_t pindex; 1793 vm_offset_t pgoff; 1794 1795 pindex = OFF_TO_IDX(off); 1796 VM_OBJECT_LOCK(obj); 1797retry_lookup: 1798 /* 1799 * Calculate the amount to transfer. Not to exceed a page, 1800 * the EOF, or the passed in nbytes. 1801 */ 1802 xfsize = obj->un_pager.vnp.vnp_size - off; 1803 VM_OBJECT_UNLOCK(obj); 1804 if (xfsize > PAGE_SIZE) 1805 xfsize = PAGE_SIZE; 1806 pgoff = (vm_offset_t)(off & PAGE_MASK); 1807 if (PAGE_SIZE - pgoff < xfsize) 1808 xfsize = PAGE_SIZE - pgoff; 1809 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1810 xfsize = uap->nbytes - sbytes; 1811 if (xfsize <= 0) 1812 break; 1813 /* 1814 * Optimize the non-blocking case by looking at the socket space 1815 * before going to the extra work of constituting the sf_buf. 1816 */ 1817 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1818 if (so->so_state & SS_CANTSENDMORE) 1819 error = EPIPE; 1820 else 1821 error = EAGAIN; 1822 sbunlock(&so->so_snd); 1823 goto done; 1824 } 1825 VM_OBJECT_LOCK(obj); 1826 /* 1827 * Attempt to look up the page. 1828 * 1829 * Allocate if not found 1830 * 1831 * Wait and loop if busy. 1832 */ 1833 pg = vm_page_lookup(obj, pindex); 1834 1835 if (pg == NULL) { 1836 pg = vm_page_alloc(obj, pindex, 1837 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1838 if (pg == NULL) { 1839 VM_OBJECT_UNLOCK(obj); 1840 VM_WAIT; 1841 VM_OBJECT_LOCK(obj); 1842 goto retry_lookup; 1843 } 1844 vm_page_lock_queues(); 1845 vm_page_wakeup(pg); 1846 } else { 1847 vm_page_lock_queues(); 1848 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1849 goto retry_lookup; 1850 /* 1851 * Wire the page so it does not get ripped out from 1852 * under us. 1853 */ 1854 vm_page_wire(pg); 1855 } 1856 1857 /* 1858 * If page is not valid for what we need, initiate I/O 1859 */ 1860 1861 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1862 VM_OBJECT_UNLOCK(obj); 1863 } else if (uap->flags & SF_NODISKIO) { 1864 error = EBUSY; 1865 } else { 1866 int bsize, resid; 1867 1868 /* 1869 * Ensure that our page is still around when the I/O 1870 * completes. 1871 */ 1872 vm_page_io_start(pg); 1873 vm_page_unlock_queues(); 1874 VM_OBJECT_UNLOCK(obj); 1875 1876 /* 1877 * Get the page from backing store. 1878 */ 1879 bsize = vp->v_mount->mnt_stat.f_iosize; 1880 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td); 1881 /* 1882 * XXXMAC: Because we don't have fp->f_cred here, 1883 * we pass in NOCRED. This is probably wrong, but 1884 * is consistent with our original implementation. 1885 */ 1886 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 1887 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 1888 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 1889 td->td_ucred, NOCRED, &resid, td); 1890 VOP_UNLOCK(vp, 0, td); 1891 if (error) 1892 VM_OBJECT_LOCK(obj); 1893 vm_page_lock_queues(); 1894 vm_page_flag_clear(pg, PG_ZERO); 1895 vm_page_io_finish(pg); 1896 mbstat.sf_iocnt++; 1897 } 1898 1899 if (error) { 1900 vm_page_unwire(pg, 0); 1901 /* 1902 * See if anyone else might know about this page. 1903 * If not and it is not valid, then free it. 1904 */ 1905 if (pg->wire_count == 0 && pg->valid == 0 && 1906 pg->busy == 0 && !(pg->flags & PG_BUSY) && 1907 pg->hold_count == 0) { 1908 vm_page_busy(pg); 1909 vm_page_free(pg); 1910 } 1911 vm_page_unlock_queues(); 1912 VM_OBJECT_UNLOCK(obj); 1913 sbunlock(&so->so_snd); 1914 goto done; 1915 } 1916 vm_page_unlock_queues(); 1917 1918 /* 1919 * Get a sendfile buf. We usually wait as long as necessary, 1920 * but this wait can be interrupted. 1921 */ 1922 if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) { 1923 mbstat.sf_allocfail++; 1924 vm_page_lock_queues(); 1925 vm_page_unwire(pg, 0); 1926 if (pg->wire_count == 0 && pg->object == NULL) 1927 vm_page_free(pg); 1928 vm_page_unlock_queues(); 1929 sbunlock(&so->so_snd); 1930 error = EINTR; 1931 goto done; 1932 } 1933 1934 /* 1935 * Get an mbuf header and set it up as having external storage. 1936 */ 1937 if (m_header) 1938 MGET(m, M_TRYWAIT, MT_DATA); 1939 else 1940 MGETHDR(m, M_TRYWAIT, MT_DATA); 1941 if (m == NULL) { 1942 error = ENOBUFS; 1943 sf_buf_mext((void *)sf_buf_kva(sf), sf); 1944 sbunlock(&so->so_snd); 1945 goto done; 1946 } 1947 /* 1948 * Setup external storage for mbuf. 1949 */ 1950 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 1951 EXT_SFBUF); 1952 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 1953 m->m_pkthdr.len = m->m_len = xfsize; 1954 1955 if (m_header) { 1956 m_cat(m_header, m); 1957 m = m_header; 1958 m_header = NULL; 1959 m_fixhdr(m); 1960 } 1961 1962 /* 1963 * Add the buffer to the socket buffer chain. 1964 */ 1965 s = splnet(); 1966retry_space: 1967 /* 1968 * Make sure that the socket is still able to take more data. 1969 * CANTSENDMORE being true usually means that the connection 1970 * was closed. so_error is true when an error was sensed after 1971 * a previous send. 1972 * The state is checked after the page mapping and buffer 1973 * allocation above since those operations may block and make 1974 * any socket checks stale. From this point forward, nothing 1975 * blocks before the pru_send (or more accurately, any blocking 1976 * results in a loop back to here to re-check). 1977 */ 1978 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 1979 if (so->so_state & SS_CANTSENDMORE) { 1980 error = EPIPE; 1981 } else { 1982 error = so->so_error; 1983 so->so_error = 0; 1984 } 1985 m_freem(m); 1986 sbunlock(&so->so_snd); 1987 splx(s); 1988 goto done; 1989 } 1990 /* 1991 * Wait for socket space to become available. We do this just 1992 * after checking the connection state above in order to avoid 1993 * a race condition with sbwait(). 1994 */ 1995 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 1996 if (so->so_state & SS_NBIO) { 1997 m_freem(m); 1998 sbunlock(&so->so_snd); 1999 splx(s); 2000 error = EAGAIN; 2001 goto done; 2002 } 2003 error = sbwait(&so->so_snd); 2004 /* 2005 * An error from sbwait usually indicates that we've 2006 * been interrupted by a signal. If we've sent anything 2007 * then return bytes sent, otherwise return the error. 2008 */ 2009 if (error) { 2010 m_freem(m); 2011 sbunlock(&so->so_snd); 2012 splx(s); 2013 goto done; 2014 } 2015 goto retry_space; 2016 } 2017 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2018 splx(s); 2019 if (error) { 2020 sbunlock(&so->so_snd); 2021 goto done; 2022 } 2023 headersent = 1; 2024 } 2025 sbunlock(&so->so_snd); 2026 2027 /* 2028 * Send trailers. Wimp out and use writev(2). 2029 */ 2030 if (uap->hdtr != NULL && hdtr.trailers != NULL) { 2031 nuap.fd = uap->s; 2032 nuap.iovp = hdtr.trailers; 2033 nuap.iovcnt = hdtr.trl_cnt; 2034 error = writev(td, &nuap); 2035 if (error) 2036 goto done; 2037 if (compat) 2038 sbytes += td->td_retval[0]; 2039 else 2040 hdtr_size += td->td_retval[0]; 2041 } 2042 2043done: 2044 if (headersent) { 2045 if (!compat) 2046 hdtr_size += headersize; 2047 } else { 2048 if (compat) 2049 sbytes -= headersize; 2050 } 2051 /* 2052 * If there was no error we have to clear td->td_retval[0] 2053 * because it may have been set by writev. 2054 */ 2055 if (error == 0) { 2056 td->td_retval[0] = 0; 2057 } 2058 if (uap->sbytes != NULL) { 2059 if (!compat) 2060 sbytes += hdtr_size; 2061 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2062 } 2063 if (vp) 2064 vrele(vp); 2065 if (so) 2066 fputsock(so); 2067 if (hdr_iov) 2068 FREE(hdr_iov, M_IOV); 2069 if (m_header) 2070 m_freem(m_header); 2071 2072 NET_UNLOCK_GIANT(); 2073 2074 if (error == ERESTART) 2075 error = EINTR; 2076 2077 return (error); 2078} 2079