uipc_syscalls.c revision 159789
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 159789 2006-06-20 12:36:40Z gnn $"); 37 38#include "opt_compat.h" 39#include "opt_ktrace.h" 40#include "opt_mac.h" 41 42#include <sys/param.h> 43#include <sys/systm.h> 44#include <sys/kernel.h> 45#include <sys/lock.h> 46#include <sys/mac.h> 47#include <sys/mutex.h> 48#include <sys/sysproto.h> 49#include <sys/malloc.h> 50#include <sys/filedesc.h> 51#include <sys/event.h> 52#include <sys/proc.h> 53#include <sys/fcntl.h> 54#include <sys/file.h> 55#include <sys/filio.h> 56#include <sys/mount.h> 57#include <sys/mbuf.h> 58#include <sys/protosw.h> 59#include <sys/sf_buf.h> 60#include <sys/socket.h> 61#include <sys/socketvar.h> 62#include <sys/signalvar.h> 63#include <sys/syscallsubr.h> 64#include <sys/sysctl.h> 65#include <sys/uio.h> 66#include <sys/vnode.h> 67#ifdef KTRACE 68#include <sys/ktrace.h> 69#endif 70 71#include <vm/vm.h> 72#include <vm/vm_object.h> 73#include <vm/vm_page.h> 74#include <vm/vm_pageout.h> 75#include <vm/vm_kern.h> 76#include <vm/vm_extern.h> 77 78static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 79static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 80 81static int accept1(struct thread *td, struct accept_args *uap, int compat); 82static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 83static int getsockname1(struct thread *td, struct getsockname_args *uap, 84 int compat); 85static int getpeername1(struct thread *td, struct getpeername_args *uap, 86 int compat); 87 88/* 89 * NSFBUFS-related variables and associated sysctls 90 */ 91int nsfbufs; 92int nsfbufspeak; 93int nsfbufsused; 94 95SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 96 "Maximum number of sendfile(2) sf_bufs available"); 97SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 98 "Number of sendfile(2) sf_bufs at peak usage"); 99SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 100 "Number of sendfile(2) sf_bufs in use"); 101 102/* 103 * Convert a user file descriptor to a kernel file entry. A reference on the 104 * file entry is held upon returning. This is lighter weight than 105 * fgetsock(), which bumps the socket reference drops the file reference 106 * count instead, as this approach avoids several additional mutex operations 107 * associated with the additional reference count. If requested, return the 108 * open file flags. 109 */ 110static int 111getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp) 112{ 113 struct file *fp; 114 int error; 115 116 fp = NULL; 117 if (fdp == NULL) 118 error = EBADF; 119 else { 120 FILEDESC_LOCK_FAST(fdp); 121 fp = fget_locked(fdp, fd); 122 if (fp == NULL) 123 error = EBADF; 124 else if (fp->f_type != DTYPE_SOCKET) { 125 fp = NULL; 126 error = ENOTSOCK; 127 } else { 128 fhold(fp); 129 if (fflagp != NULL) 130 *fflagp = fp->f_flag; 131 error = 0; 132 } 133 FILEDESC_UNLOCK_FAST(fdp); 134 } 135 *fpp = fp; 136 return (error); 137} 138 139/* 140 * System call interface to the socket abstraction. 141 */ 142#if defined(COMPAT_43) 143#define COMPAT_OLDSOCK 144#endif 145 146/* 147 * MPSAFE 148 */ 149int 150socket(td, uap) 151 struct thread *td; 152 register struct socket_args /* { 153 int domain; 154 int type; 155 int protocol; 156 } */ *uap; 157{ 158 struct filedesc *fdp; 159 struct socket *so; 160 struct file *fp; 161 int fd, error; 162 163#ifdef MAC 164 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 165 uap->protocol); 166 if (error) 167 return (error); 168#endif 169 fdp = td->td_proc->p_fd; 170 error = falloc(td, &fp, &fd); 171 if (error) 172 return (error); 173 /* An extra reference on `fp' has been held for us by falloc(). */ 174 NET_LOCK_GIANT(); 175 error = socreate(uap->domain, &so, uap->type, uap->protocol, 176 td->td_ucred, td); 177 NET_UNLOCK_GIANT(); 178 if (error) { 179 fdclose(fdp, fp, fd, td); 180 } else { 181 FILEDESC_LOCK_FAST(fdp); 182 fp->f_data = so; /* already has ref count */ 183 fp->f_flag = FREAD|FWRITE; 184 fp->f_ops = &socketops; 185 fp->f_type = DTYPE_SOCKET; 186 FILEDESC_UNLOCK_FAST(fdp); 187 td->td_retval[0] = fd; 188 } 189 fdrop(fp, td); 190 return (error); 191} 192 193/* 194 * MPSAFE 195 */ 196/* ARGSUSED */ 197int 198bind(td, uap) 199 struct thread *td; 200 register struct bind_args /* { 201 int s; 202 caddr_t name; 203 int namelen; 204 } */ *uap; 205{ 206 struct sockaddr *sa; 207 int error; 208 209 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 210 return (error); 211 212 return (kern_bind(td, uap->s, sa)); 213} 214 215int 216kern_bind(td, fd, sa) 217 struct thread *td; 218 int fd; 219 struct sockaddr *sa; 220{ 221 struct socket *so; 222 struct file *fp; 223 int error; 224 225 NET_LOCK_GIANT(); 226 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 227 if (error) 228 goto done2; 229 so = fp->f_data; 230#ifdef MAC 231 SOCK_LOCK(so); 232 error = mac_check_socket_bind(td->td_ucred, so, sa); 233 SOCK_UNLOCK(so); 234 if (error) 235 goto done1; 236#endif 237 error = sobind(so, sa, td); 238#ifdef MAC 239done1: 240#endif 241 fdrop(fp, td); 242done2: 243 NET_UNLOCK_GIANT(); 244 FREE(sa, M_SONAME); 245 return (error); 246} 247 248/* 249 * MPSAFE 250 */ 251/* ARGSUSED */ 252int 253listen(td, uap) 254 struct thread *td; 255 register struct listen_args /* { 256 int s; 257 int backlog; 258 } */ *uap; 259{ 260 struct socket *so; 261 struct file *fp; 262 int error; 263 264 NET_LOCK_GIANT(); 265 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 266 if (error == 0) { 267 so = fp->f_data; 268#ifdef MAC 269 SOCK_LOCK(so); 270 error = mac_check_socket_listen(td->td_ucred, so); 271 SOCK_UNLOCK(so); 272 if (error) 273 goto done; 274#endif 275 error = solisten(so, uap->backlog, td); 276#ifdef MAC 277done: 278#endif 279 fdrop(fp, td); 280 } 281 NET_UNLOCK_GIANT(); 282 return(error); 283} 284 285/* 286 * accept1() 287 * MPSAFE 288 */ 289static int 290accept1(td, uap, compat) 291 struct thread *td; 292 register struct accept_args /* { 293 int s; 294 struct sockaddr * __restrict name; 295 socklen_t * __restrict anamelen; 296 } */ *uap; 297 int compat; 298{ 299 struct filedesc *fdp; 300 struct file *headfp, *nfp = NULL; 301 struct sockaddr *sa = NULL; 302 socklen_t namelen; 303 int error; 304 struct socket *head, *so; 305 int fd; 306 u_int fflag; 307 pid_t pgid; 308 int tmp; 309 310 fdp = td->td_proc->p_fd; 311 if (uap->name) { 312 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 313 if(error) 314 return (error); 315 if (namelen < 0) 316 return (EINVAL); 317 } 318 NET_LOCK_GIANT(); 319 error = getsock(fdp, uap->s, &headfp, &fflag); 320 if (error) 321 goto done2; 322 head = headfp->f_data; 323 if ((head->so_options & SO_ACCEPTCONN) == 0) { 324 error = EINVAL; 325 goto done; 326 } 327#ifdef MAC 328 SOCK_LOCK(head); 329 error = mac_check_socket_accept(td->td_ucred, head); 330 SOCK_UNLOCK(head); 331 if (error != 0) 332 goto done; 333#endif 334 error = falloc(td, &nfp, &fd); 335 if (error) 336 goto done; 337 ACCEPT_LOCK(); 338 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 339 ACCEPT_UNLOCK(); 340 error = EWOULDBLOCK; 341 goto noconnection; 342 } 343 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 344 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 345 head->so_error = ECONNABORTED; 346 break; 347 } 348 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 349 "accept", 0); 350 if (error) { 351 ACCEPT_UNLOCK(); 352 goto noconnection; 353 } 354 } 355 if (head->so_error) { 356 error = head->so_error; 357 head->so_error = 0; 358 ACCEPT_UNLOCK(); 359 goto noconnection; 360 } 361 so = TAILQ_FIRST(&head->so_comp); 362 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 363 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 364 365 /* 366 * Before changing the flags on the socket, we have to bump the 367 * reference count. Otherwise, if the protocol calls sofree(), 368 * the socket will be released due to a zero refcount. 369 */ 370 SOCK_LOCK(so); /* soref() and so_state update */ 371 soref(so); /* file descriptor reference */ 372 373 TAILQ_REMOVE(&head->so_comp, so, so_list); 374 head->so_qlen--; 375 so->so_state |= (head->so_state & SS_NBIO); 376 so->so_qstate &= ~SQ_COMP; 377 so->so_head = NULL; 378 379 SOCK_UNLOCK(so); 380 ACCEPT_UNLOCK(); 381 382 /* An extra reference on `nfp' has been held for us by falloc(). */ 383 td->td_retval[0] = fd; 384 385 /* connection has been removed from the listen queue */ 386 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 387 388 pgid = fgetown(&head->so_sigio); 389 if (pgid != 0) 390 fsetown(pgid, &so->so_sigio); 391 392 FILE_LOCK(nfp); 393 nfp->f_data = so; /* nfp has ref count from falloc */ 394 nfp->f_flag = fflag; 395 nfp->f_ops = &socketops; 396 nfp->f_type = DTYPE_SOCKET; 397 FILE_UNLOCK(nfp); 398 /* Sync socket nonblocking/async state with file flags */ 399 tmp = fflag & FNONBLOCK; 400 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 401 tmp = fflag & FASYNC; 402 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 403 sa = 0; 404 error = soaccept(so, &sa); 405 if (error) { 406 /* 407 * return a namelen of zero for older code which might 408 * ignore the return value from accept. 409 */ 410 if (uap->name != NULL) { 411 namelen = 0; 412 (void) copyout(&namelen, 413 uap->anamelen, sizeof(*uap->anamelen)); 414 } 415 goto noconnection; 416 } 417 if (sa == NULL) { 418 namelen = 0; 419 if (uap->name) 420 goto gotnoname; 421 error = 0; 422 goto done; 423 } 424 if (uap->name) { 425 /* check sa_len before it is destroyed */ 426 if (namelen > sa->sa_len) 427 namelen = sa->sa_len; 428#ifdef COMPAT_OLDSOCK 429 if (compat) 430 ((struct osockaddr *)sa)->sa_family = 431 sa->sa_family; 432#endif 433 error = copyout(sa, uap->name, (u_int)namelen); 434 if (!error) 435gotnoname: 436 error = copyout(&namelen, 437 uap->anamelen, sizeof (*uap->anamelen)); 438 } 439noconnection: 440 if (sa) 441 FREE(sa, M_SONAME); 442 443 /* 444 * close the new descriptor, assuming someone hasn't ripped it 445 * out from under us. 446 */ 447 if (error) 448 fdclose(fdp, nfp, fd, td); 449 450 /* 451 * Release explicitly held references before returning. 452 */ 453done: 454 if (nfp != NULL) 455 fdrop(nfp, td); 456 fdrop(headfp, td); 457done2: 458 NET_UNLOCK_GIANT(); 459 return (error); 460} 461 462/* 463 * MPSAFE (accept1() is MPSAFE) 464 */ 465int 466accept(td, uap) 467 struct thread *td; 468 struct accept_args *uap; 469{ 470 471 return (accept1(td, uap, 0)); 472} 473 474#ifdef COMPAT_OLDSOCK 475/* 476 * MPSAFE (accept1() is MPSAFE) 477 */ 478int 479oaccept(td, uap) 480 struct thread *td; 481 struct accept_args *uap; 482{ 483 484 return (accept1(td, uap, 1)); 485} 486#endif /* COMPAT_OLDSOCK */ 487 488/* 489 * MPSAFE 490 */ 491/* ARGSUSED */ 492int 493connect(td, uap) 494 struct thread *td; 495 register struct connect_args /* { 496 int s; 497 caddr_t name; 498 int namelen; 499 } */ *uap; 500{ 501 struct sockaddr *sa; 502 int error; 503 504 error = getsockaddr(&sa, uap->name, uap->namelen); 505 if (error) 506 return (error); 507 508 return (kern_connect(td, uap->s, sa)); 509} 510 511 512int 513kern_connect(td, fd, sa) 514 struct thread *td; 515 int fd; 516 struct sockaddr *sa; 517{ 518 struct socket *so; 519 struct file *fp; 520 int error; 521 int interrupted = 0; 522 523 NET_LOCK_GIANT(); 524 error = getsock(td->td_proc->p_fd, fd, &fp, NULL); 525 if (error) 526 goto done2; 527 so = fp->f_data; 528 if (so->so_state & SS_ISCONNECTING) { 529 error = EALREADY; 530 goto done1; 531 } 532#ifdef MAC 533 SOCK_LOCK(so); 534 error = mac_check_socket_connect(td->td_ucred, so, sa); 535 SOCK_UNLOCK(so); 536 if (error) 537 goto bad; 538#endif 539 error = soconnect(so, sa, td); 540 if (error) 541 goto bad; 542 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 543 error = EINPROGRESS; 544 goto done1; 545 } 546 SOCK_LOCK(so); 547 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 548 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 549 "connec", 0); 550 if (error) { 551 if (error == EINTR || error == ERESTART) 552 interrupted = 1; 553 break; 554 } 555 } 556 if (error == 0) { 557 error = so->so_error; 558 so->so_error = 0; 559 } 560 SOCK_UNLOCK(so); 561bad: 562 if (!interrupted) 563 so->so_state &= ~SS_ISCONNECTING; 564 if (error == ERESTART) 565 error = EINTR; 566done1: 567 fdrop(fp, td); 568done2: 569 NET_UNLOCK_GIANT(); 570 FREE(sa, M_SONAME); 571 return (error); 572} 573 574/* 575 * MPSAFE 576 */ 577int 578socketpair(td, uap) 579 struct thread *td; 580 register struct socketpair_args /* { 581 int domain; 582 int type; 583 int protocol; 584 int *rsv; 585 } */ *uap; 586{ 587 register struct filedesc *fdp = td->td_proc->p_fd; 588 struct file *fp1, *fp2; 589 struct socket *so1, *so2; 590 int fd, error, sv[2]; 591 592#ifdef MAC 593 /* We might want to have a separate check for socket pairs. */ 594 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type, 595 uap->protocol); 596 if (error) 597 return (error); 598#endif 599 600 NET_LOCK_GIANT(); 601 error = socreate(uap->domain, &so1, uap->type, uap->protocol, 602 td->td_ucred, td); 603 if (error) 604 goto done2; 605 error = socreate(uap->domain, &so2, uap->type, uap->protocol, 606 td->td_ucred, td); 607 if (error) 608 goto free1; 609 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 610 error = falloc(td, &fp1, &fd); 611 if (error) 612 goto free2; 613 sv[0] = fd; 614 fp1->f_data = so1; /* so1 already has ref count */ 615 error = falloc(td, &fp2, &fd); 616 if (error) 617 goto free3; 618 fp2->f_data = so2; /* so2 already has ref count */ 619 sv[1] = fd; 620 error = soconnect2(so1, so2); 621 if (error) 622 goto free4; 623 if (uap->type == SOCK_DGRAM) { 624 /* 625 * Datagram socket connection is asymmetric. 626 */ 627 error = soconnect2(so2, so1); 628 if (error) 629 goto free4; 630 } 631 FILE_LOCK(fp1); 632 fp1->f_flag = FREAD|FWRITE; 633 fp1->f_ops = &socketops; 634 fp1->f_type = DTYPE_SOCKET; 635 FILE_UNLOCK(fp1); 636 FILE_LOCK(fp2); 637 fp2->f_flag = FREAD|FWRITE; 638 fp2->f_ops = &socketops; 639 fp2->f_type = DTYPE_SOCKET; 640 FILE_UNLOCK(fp2); 641 error = copyout(sv, uap->rsv, 2 * sizeof (int)); 642 fdrop(fp1, td); 643 fdrop(fp2, td); 644 goto done2; 645free4: 646 fdclose(fdp, fp2, sv[1], td); 647 fdrop(fp2, td); 648free3: 649 fdclose(fdp, fp1, sv[0], td); 650 fdrop(fp1, td); 651free2: 652 (void)soclose(so2); 653free1: 654 (void)soclose(so1); 655done2: 656 NET_UNLOCK_GIANT(); 657 return (error); 658} 659 660static int 661sendit(td, s, mp, flags) 662 register struct thread *td; 663 int s; 664 register struct msghdr *mp; 665 int flags; 666{ 667 struct mbuf *control; 668 struct sockaddr *to; 669 int error; 670 671 if (mp->msg_name != NULL) { 672 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 673 if (error) { 674 to = NULL; 675 goto bad; 676 } 677 mp->msg_name = to; 678 } else { 679 to = NULL; 680 } 681 682 if (mp->msg_control) { 683 if (mp->msg_controllen < sizeof(struct cmsghdr) 684#ifdef COMPAT_OLDSOCK 685 && mp->msg_flags != MSG_COMPAT 686#endif 687 ) { 688 error = EINVAL; 689 goto bad; 690 } 691 error = sockargs(&control, mp->msg_control, 692 mp->msg_controllen, MT_CONTROL); 693 if (error) 694 goto bad; 695#ifdef COMPAT_OLDSOCK 696 if (mp->msg_flags == MSG_COMPAT) { 697 register struct cmsghdr *cm; 698 699 M_PREPEND(control, sizeof(*cm), M_TRYWAIT); 700 if (control == 0) { 701 error = ENOBUFS; 702 goto bad; 703 } else { 704 cm = mtod(control, struct cmsghdr *); 705 cm->cmsg_len = control->m_len; 706 cm->cmsg_level = SOL_SOCKET; 707 cm->cmsg_type = SCM_RIGHTS; 708 } 709 } 710#endif 711 } else { 712 control = NULL; 713 } 714 715 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 716 717bad: 718 if (to) 719 FREE(to, M_SONAME); 720 return (error); 721} 722 723int 724kern_sendit(td, s, mp, flags, control, segflg) 725 struct thread *td; 726 int s; 727 struct msghdr *mp; 728 int flags; 729 struct mbuf *control; 730 enum uio_seg segflg; 731{ 732 struct file *fp; 733 struct uio auio; 734 struct iovec *iov; 735 struct socket *so; 736 int i; 737 int len, error; 738#ifdef KTRACE 739 struct uio *ktruio = NULL; 740#endif 741 742 NET_LOCK_GIANT(); 743 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 744 if (error) 745 goto bad2; 746 so = (struct socket *)fp->f_data; 747 748#ifdef MAC 749 SOCK_LOCK(so); 750 error = mac_check_socket_send(td->td_ucred, so); 751 SOCK_UNLOCK(so); 752 if (error) 753 goto bad; 754#endif 755 756 auio.uio_iov = mp->msg_iov; 757 auio.uio_iovcnt = mp->msg_iovlen; 758 auio.uio_segflg = segflg; 759 auio.uio_rw = UIO_WRITE; 760 auio.uio_td = td; 761 auio.uio_offset = 0; /* XXX */ 762 auio.uio_resid = 0; 763 iov = mp->msg_iov; 764 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 765 if ((auio.uio_resid += iov->iov_len) < 0) { 766 error = EINVAL; 767 goto bad; 768 } 769 } 770#ifdef KTRACE 771 if (KTRPOINT(td, KTR_GENIO)) 772 ktruio = cloneuio(&auio); 773#endif 774 len = auio.uio_resid; 775 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio, 776 0, control, flags, td); 777 if (error) { 778 if (auio.uio_resid != len && (error == ERESTART || 779 error == EINTR || error == EWOULDBLOCK)) 780 error = 0; 781 /* Generation of SIGPIPE can be controlled per socket */ 782 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 783 !(flags & MSG_NOSIGNAL)) { 784 PROC_LOCK(td->td_proc); 785 psignal(td->td_proc, SIGPIPE); 786 PROC_UNLOCK(td->td_proc); 787 } 788 } 789 if (error == 0) 790 td->td_retval[0] = len - auio.uio_resid; 791#ifdef KTRACE 792 if (ktruio != NULL) { 793 ktruio->uio_resid = td->td_retval[0]; 794 ktrgenio(s, UIO_WRITE, ktruio, error); 795 } 796#endif 797bad: 798 fdrop(fp, td); 799bad2: 800 NET_UNLOCK_GIANT(); 801 return (error); 802} 803 804/* 805 * MPSAFE 806 */ 807int 808sendto(td, uap) 809 struct thread *td; 810 register struct sendto_args /* { 811 int s; 812 caddr_t buf; 813 size_t len; 814 int flags; 815 caddr_t to; 816 int tolen; 817 } */ *uap; 818{ 819 struct msghdr msg; 820 struct iovec aiov; 821 int error; 822 823 msg.msg_name = uap->to; 824 msg.msg_namelen = uap->tolen; 825 msg.msg_iov = &aiov; 826 msg.msg_iovlen = 1; 827 msg.msg_control = 0; 828#ifdef COMPAT_OLDSOCK 829 msg.msg_flags = 0; 830#endif 831 aiov.iov_base = uap->buf; 832 aiov.iov_len = uap->len; 833 error = sendit(td, uap->s, &msg, uap->flags); 834 return (error); 835} 836 837#ifdef COMPAT_OLDSOCK 838/* 839 * MPSAFE 840 */ 841int 842osend(td, uap) 843 struct thread *td; 844 register struct osend_args /* { 845 int s; 846 caddr_t buf; 847 int len; 848 int flags; 849 } */ *uap; 850{ 851 struct msghdr msg; 852 struct iovec aiov; 853 int error; 854 855 msg.msg_name = 0; 856 msg.msg_namelen = 0; 857 msg.msg_iov = &aiov; 858 msg.msg_iovlen = 1; 859 aiov.iov_base = uap->buf; 860 aiov.iov_len = uap->len; 861 msg.msg_control = 0; 862 msg.msg_flags = 0; 863 error = sendit(td, uap->s, &msg, uap->flags); 864 return (error); 865} 866 867/* 868 * MPSAFE 869 */ 870int 871osendmsg(td, uap) 872 struct thread *td; 873 struct osendmsg_args /* { 874 int s; 875 caddr_t msg; 876 int flags; 877 } */ *uap; 878{ 879 struct msghdr msg; 880 struct iovec *iov; 881 int error; 882 883 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 884 if (error) 885 return (error); 886 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 887 if (error) 888 return (error); 889 msg.msg_iov = iov; 890 msg.msg_flags = MSG_COMPAT; 891 error = sendit(td, uap->s, &msg, uap->flags); 892 free(iov, M_IOV); 893 return (error); 894} 895#endif 896 897/* 898 * MPSAFE 899 */ 900int 901sendmsg(td, uap) 902 struct thread *td; 903 struct sendmsg_args /* { 904 int s; 905 caddr_t msg; 906 int flags; 907 } */ *uap; 908{ 909 struct msghdr msg; 910 struct iovec *iov; 911 int error; 912 913 error = copyin(uap->msg, &msg, sizeof (msg)); 914 if (error) 915 return (error); 916 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 917 if (error) 918 return (error); 919 msg.msg_iov = iov; 920#ifdef COMPAT_OLDSOCK 921 msg.msg_flags = 0; 922#endif 923 error = sendit(td, uap->s, &msg, uap->flags); 924 free(iov, M_IOV); 925 return (error); 926} 927 928int 929kern_recvit(td, s, mp, namelenp, segflg, controlp) 930 struct thread *td; 931 int s; 932 struct msghdr *mp; 933 void *namelenp; 934 enum uio_seg segflg; 935 struct mbuf **controlp; 936{ 937 struct uio auio; 938 struct iovec *iov; 939 int i; 940 socklen_t len; 941 int error; 942 struct mbuf *m, *control = 0; 943 caddr_t ctlbuf; 944 struct file *fp; 945 struct socket *so; 946 struct sockaddr *fromsa = 0; 947#ifdef KTRACE 948 struct uio *ktruio = NULL; 949#endif 950 951 if(controlp != NULL) 952 *controlp = 0; 953 954 NET_LOCK_GIANT(); 955 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 956 if (error) { 957 NET_UNLOCK_GIANT(); 958 return (error); 959 } 960 so = fp->f_data; 961 962#ifdef MAC 963 SOCK_LOCK(so); 964 error = mac_check_socket_receive(td->td_ucred, so); 965 SOCK_UNLOCK(so); 966 if (error) { 967 fdrop(fp, td); 968 NET_UNLOCK_GIANT(); 969 return (error); 970 } 971#endif 972 973 auio.uio_iov = mp->msg_iov; 974 auio.uio_iovcnt = mp->msg_iovlen; 975 auio.uio_segflg = segflg; 976 auio.uio_rw = UIO_READ; 977 auio.uio_td = td; 978 auio.uio_offset = 0; /* XXX */ 979 auio.uio_resid = 0; 980 iov = mp->msg_iov; 981 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 982 if ((auio.uio_resid += iov->iov_len) < 0) { 983 fdrop(fp, td); 984 NET_UNLOCK_GIANT(); 985 return (EINVAL); 986 } 987 } 988#ifdef KTRACE 989 if (KTRPOINT(td, KTR_GENIO)) 990 ktruio = cloneuio(&auio); 991#endif 992 len = auio.uio_resid; 993 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio, 994 (struct mbuf **)0, 995 (mp->msg_control || controlp) ? &control : (struct mbuf **)0, 996 &mp->msg_flags); 997 if (error) { 998 if (auio.uio_resid != (int)len && (error == ERESTART || 999 error == EINTR || error == EWOULDBLOCK)) 1000 error = 0; 1001 } 1002#ifdef KTRACE 1003 if (ktruio != NULL) { 1004 ktruio->uio_resid = (int)len - auio.uio_resid; 1005 ktrgenio(s, UIO_READ, ktruio, error); 1006 } 1007#endif 1008 if (error) 1009 goto out; 1010 td->td_retval[0] = (int)len - auio.uio_resid; 1011 if (mp->msg_name) { 1012 len = mp->msg_namelen; 1013 if (len <= 0 || fromsa == 0) 1014 len = 0; 1015 else { 1016 /* save sa_len before it is destroyed by MSG_COMPAT */ 1017 len = MIN(len, fromsa->sa_len); 1018#ifdef COMPAT_OLDSOCK 1019 if (mp->msg_flags & MSG_COMPAT) 1020 ((struct osockaddr *)fromsa)->sa_family = 1021 fromsa->sa_family; 1022#endif 1023 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1024 if (error) 1025 goto out; 1026 } 1027 mp->msg_namelen = len; 1028 if (namelenp && 1029 (error = copyout(&len, namelenp, sizeof (socklen_t)))) { 1030#ifdef COMPAT_OLDSOCK 1031 if (mp->msg_flags & MSG_COMPAT) 1032 error = 0; /* old recvfrom didn't check */ 1033 else 1034#endif 1035 goto out; 1036 } 1037 } 1038 if (mp->msg_control && controlp == NULL) { 1039#ifdef COMPAT_OLDSOCK 1040 /* 1041 * We assume that old recvmsg calls won't receive access 1042 * rights and other control info, esp. as control info 1043 * is always optional and those options didn't exist in 4.3. 1044 * If we receive rights, trim the cmsghdr; anything else 1045 * is tossed. 1046 */ 1047 if (control && mp->msg_flags & MSG_COMPAT) { 1048 if (mtod(control, struct cmsghdr *)->cmsg_level != 1049 SOL_SOCKET || 1050 mtod(control, struct cmsghdr *)->cmsg_type != 1051 SCM_RIGHTS) { 1052 mp->msg_controllen = 0; 1053 goto out; 1054 } 1055 control->m_len -= sizeof (struct cmsghdr); 1056 control->m_data += sizeof (struct cmsghdr); 1057 } 1058#endif 1059 len = mp->msg_controllen; 1060 m = control; 1061 mp->msg_controllen = 0; 1062 ctlbuf = mp->msg_control; 1063 1064 while (m && len > 0) { 1065 unsigned int tocopy; 1066 1067 if (len >= m->m_len) 1068 tocopy = m->m_len; 1069 else { 1070 mp->msg_flags |= MSG_CTRUNC; 1071 tocopy = len; 1072 } 1073 1074 if ((error = copyout(mtod(m, caddr_t), 1075 ctlbuf, tocopy)) != 0) 1076 goto out; 1077 1078 ctlbuf += tocopy; 1079 len -= tocopy; 1080 m = m->m_next; 1081 } 1082 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1083 } 1084out: 1085 fdrop(fp, td); 1086 NET_UNLOCK_GIANT(); 1087 if (fromsa) 1088 FREE(fromsa, M_SONAME); 1089 1090 if (error == 0 && controlp != NULL) 1091 *controlp = control; 1092 else if (control) 1093 m_freem(control); 1094 1095 return (error); 1096} 1097 1098static int 1099recvit(td, s, mp, namelenp) 1100 struct thread *td; 1101 int s; 1102 struct msghdr *mp; 1103 void *namelenp; 1104{ 1105 1106 return (kern_recvit(td, s, mp, namelenp, UIO_USERSPACE, NULL)); 1107} 1108 1109/* 1110 * MPSAFE 1111 */ 1112int 1113recvfrom(td, uap) 1114 struct thread *td; 1115 register struct recvfrom_args /* { 1116 int s; 1117 caddr_t buf; 1118 size_t len; 1119 int flags; 1120 struct sockaddr * __restrict from; 1121 socklen_t * __restrict fromlenaddr; 1122 } */ *uap; 1123{ 1124 struct msghdr msg; 1125 struct iovec aiov; 1126 int error; 1127 1128 if (uap->fromlenaddr) { 1129 error = copyin(uap->fromlenaddr, 1130 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1131 if (error) 1132 goto done2; 1133 } else { 1134 msg.msg_namelen = 0; 1135 } 1136 msg.msg_name = uap->from; 1137 msg.msg_iov = &aiov; 1138 msg.msg_iovlen = 1; 1139 aiov.iov_base = uap->buf; 1140 aiov.iov_len = uap->len; 1141 msg.msg_control = 0; 1142 msg.msg_flags = uap->flags; 1143 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1144done2: 1145 return(error); 1146} 1147 1148#ifdef COMPAT_OLDSOCK 1149/* 1150 * MPSAFE 1151 */ 1152int 1153orecvfrom(td, uap) 1154 struct thread *td; 1155 struct recvfrom_args *uap; 1156{ 1157 1158 uap->flags |= MSG_COMPAT; 1159 return (recvfrom(td, uap)); 1160} 1161#endif 1162 1163 1164#ifdef COMPAT_OLDSOCK 1165/* 1166 * MPSAFE 1167 */ 1168int 1169orecv(td, uap) 1170 struct thread *td; 1171 register struct orecv_args /* { 1172 int s; 1173 caddr_t buf; 1174 int len; 1175 int flags; 1176 } */ *uap; 1177{ 1178 struct msghdr msg; 1179 struct iovec aiov; 1180 int error; 1181 1182 msg.msg_name = 0; 1183 msg.msg_namelen = 0; 1184 msg.msg_iov = &aiov; 1185 msg.msg_iovlen = 1; 1186 aiov.iov_base = uap->buf; 1187 aiov.iov_len = uap->len; 1188 msg.msg_control = 0; 1189 msg.msg_flags = uap->flags; 1190 error = recvit(td, uap->s, &msg, NULL); 1191 return (error); 1192} 1193 1194/* 1195 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1196 * overlays the new one, missing only the flags, and with the (old) access 1197 * rights where the control fields are now. 1198 * 1199 * MPSAFE 1200 */ 1201int 1202orecvmsg(td, uap) 1203 struct thread *td; 1204 struct orecvmsg_args /* { 1205 int s; 1206 struct omsghdr *msg; 1207 int flags; 1208 } */ *uap; 1209{ 1210 struct msghdr msg; 1211 struct iovec *iov; 1212 int error; 1213 1214 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1215 if (error) 1216 return (error); 1217 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1218 if (error) 1219 return (error); 1220 msg.msg_flags = uap->flags | MSG_COMPAT; 1221 msg.msg_iov = iov; 1222 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1223 if (msg.msg_controllen && error == 0) 1224 error = copyout(&msg.msg_controllen, 1225 &uap->msg->msg_accrightslen, sizeof (int)); 1226 free(iov, M_IOV); 1227 return (error); 1228} 1229#endif 1230 1231/* 1232 * MPSAFE 1233 */ 1234int 1235recvmsg(td, uap) 1236 struct thread *td; 1237 struct recvmsg_args /* { 1238 int s; 1239 struct msghdr *msg; 1240 int flags; 1241 } */ *uap; 1242{ 1243 struct msghdr msg; 1244 struct iovec *uiov, *iov; 1245 int error; 1246 1247 error = copyin(uap->msg, &msg, sizeof (msg)); 1248 if (error) 1249 return (error); 1250 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1251 if (error) 1252 return (error); 1253 msg.msg_flags = uap->flags; 1254#ifdef COMPAT_OLDSOCK 1255 msg.msg_flags &= ~MSG_COMPAT; 1256#endif 1257 uiov = msg.msg_iov; 1258 msg.msg_iov = iov; 1259 error = recvit(td, uap->s, &msg, NULL); 1260 if (error == 0) { 1261 msg.msg_iov = uiov; 1262 error = copyout(&msg, uap->msg, sizeof(msg)); 1263 } 1264 free(iov, M_IOV); 1265 return (error); 1266} 1267 1268/* 1269 * MPSAFE 1270 */ 1271/* ARGSUSED */ 1272int 1273shutdown(td, uap) 1274 struct thread *td; 1275 register struct shutdown_args /* { 1276 int s; 1277 int how; 1278 } */ *uap; 1279{ 1280 struct socket *so; 1281 struct file *fp; 1282 int error; 1283 1284 NET_LOCK_GIANT(); 1285 error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); 1286 if (error == 0) { 1287 so = fp->f_data; 1288 error = soshutdown(so, uap->how); 1289 fdrop(fp, td); 1290 } 1291 NET_UNLOCK_GIANT(); 1292 return (error); 1293} 1294 1295/* 1296 * MPSAFE 1297 */ 1298/* ARGSUSED */ 1299int 1300setsockopt(td, uap) 1301 struct thread *td; 1302 register struct setsockopt_args /* { 1303 int s; 1304 int level; 1305 int name; 1306 caddr_t val; 1307 int valsize; 1308 } */ *uap; 1309{ 1310 1311 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1312 uap->val, UIO_USERSPACE, uap->valsize)); 1313} 1314 1315int 1316kern_setsockopt(td, s, level, name, val, valseg, valsize) 1317 struct thread *td; 1318 int s; 1319 int level; 1320 int name; 1321 void *val; 1322 enum uio_seg valseg; 1323 socklen_t valsize; 1324{ 1325 int error; 1326 struct socket *so; 1327 struct file *fp; 1328 struct sockopt sopt; 1329 1330 if (val == NULL && valsize != 0) 1331 return (EFAULT); 1332 if ((int)valsize < 0) 1333 return (EINVAL); 1334 1335 sopt.sopt_dir = SOPT_SET; 1336 sopt.sopt_level = level; 1337 sopt.sopt_name = name; 1338 sopt.sopt_val = val; 1339 sopt.sopt_valsize = valsize; 1340 switch (valseg) { 1341 case UIO_USERSPACE: 1342 sopt.sopt_td = td; 1343 break; 1344 case UIO_SYSSPACE: 1345 sopt.sopt_td = NULL; 1346 break; 1347 default: 1348 panic("kern_setsockopt called with bad valseg"); 1349 } 1350 1351 NET_LOCK_GIANT(); 1352 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1353 if (error == 0) { 1354 so = fp->f_data; 1355 error = sosetopt(so, &sopt); 1356 fdrop(fp, td); 1357 } 1358 NET_UNLOCK_GIANT(); 1359 return(error); 1360} 1361 1362/* 1363 * MPSAFE 1364 */ 1365/* ARGSUSED */ 1366int 1367getsockopt(td, uap) 1368 struct thread *td; 1369 register struct getsockopt_args /* { 1370 int s; 1371 int level; 1372 int name; 1373 void * __restrict val; 1374 socklen_t * __restrict avalsize; 1375 } */ *uap; 1376{ 1377 socklen_t valsize; 1378 int error; 1379 1380 if (uap->val) { 1381 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1382 if (error) 1383 return (error); 1384 } 1385 1386 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1387 uap->val, UIO_USERSPACE, &valsize); 1388 1389 if (error == 0) 1390 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1391 return (error); 1392} 1393 1394/* 1395 * Kernel version of getsockopt. 1396 * optval can be a userland or userspace. optlen is always a kernel pointer. 1397 */ 1398int 1399kern_getsockopt(td, s, level, name, val, valseg, valsize) 1400 struct thread *td; 1401 int s; 1402 int level; 1403 int name; 1404 void *val; 1405 enum uio_seg valseg; 1406 socklen_t *valsize; 1407{ 1408 int error; 1409 struct socket *so; 1410 struct file *fp; 1411 struct sockopt sopt; 1412 1413 if (val == NULL) 1414 *valsize = 0; 1415 if ((int)*valsize < 0) 1416 return (EINVAL); 1417 1418 sopt.sopt_dir = SOPT_GET; 1419 sopt.sopt_level = level; 1420 sopt.sopt_name = name; 1421 sopt.sopt_val = val; 1422 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1423 switch (valseg) { 1424 case UIO_USERSPACE: 1425 sopt.sopt_td = td; 1426 break; 1427 case UIO_SYSSPACE: 1428 sopt.sopt_td = NULL; 1429 break; 1430 default: 1431 panic("kern_getsockopt called with bad valseg"); 1432 } 1433 1434 NET_LOCK_GIANT(); 1435 error = getsock(td->td_proc->p_fd, s, &fp, NULL); 1436 if (error == 0) { 1437 so = fp->f_data; 1438 error = sogetopt(so, &sopt); 1439 *valsize = sopt.sopt_valsize; 1440 fdrop(fp, td); 1441 } 1442 NET_UNLOCK_GIANT(); 1443 return (error); 1444} 1445 1446/* 1447 * getsockname1() - Get socket name. 1448 * 1449 * MPSAFE 1450 */ 1451/* ARGSUSED */ 1452static int 1453getsockname1(td, uap, compat) 1454 struct thread *td; 1455 register struct getsockname_args /* { 1456 int fdes; 1457 struct sockaddr * __restrict asa; 1458 socklen_t * __restrict alen; 1459 } */ *uap; 1460 int compat; 1461{ 1462 struct socket *so; 1463 struct sockaddr *sa; 1464 struct file *fp; 1465 socklen_t len; 1466 int error; 1467 1468 NET_LOCK_GIANT(); 1469 error = getsock(td->td_proc->p_fd, uap->fdes, &fp, NULL); 1470 if (error) 1471 goto done2; 1472 so = fp->f_data; 1473 error = copyin(uap->alen, &len, sizeof (len)); 1474 if (error) 1475 goto done1; 1476 if (len < 0) { 1477 error = EINVAL; 1478 goto done1; 1479 } 1480 sa = 0; 1481 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1482 if (error) 1483 goto bad; 1484 if (sa == 0) { 1485 len = 0; 1486 goto gotnothing; 1487 } 1488 1489 len = MIN(len, sa->sa_len); 1490#ifdef COMPAT_OLDSOCK 1491 if (compat) 1492 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1493#endif 1494 error = copyout(sa, uap->asa, (u_int)len); 1495 if (error == 0) 1496gotnothing: 1497 error = copyout(&len, uap->alen, sizeof (len)); 1498bad: 1499 if (sa) 1500 FREE(sa, M_SONAME); 1501done1: 1502 fdrop(fp, td); 1503done2: 1504 NET_UNLOCK_GIANT(); 1505 return (error); 1506} 1507 1508/* 1509 * MPSAFE 1510 */ 1511int 1512getsockname(td, uap) 1513 struct thread *td; 1514 struct getsockname_args *uap; 1515{ 1516 1517 return (getsockname1(td, uap, 0)); 1518} 1519 1520#ifdef COMPAT_OLDSOCK 1521/* 1522 * MPSAFE 1523 */ 1524int 1525ogetsockname(td, uap) 1526 struct thread *td; 1527 struct getsockname_args *uap; 1528{ 1529 1530 return (getsockname1(td, uap, 1)); 1531} 1532#endif /* COMPAT_OLDSOCK */ 1533 1534/* 1535 * getpeername1() - Get name of peer for connected socket. 1536 * 1537 * MPSAFE 1538 */ 1539/* ARGSUSED */ 1540static int 1541getpeername1(td, uap, compat) 1542 struct thread *td; 1543 register struct getpeername_args /* { 1544 int fdes; 1545 struct sockaddr * __restrict asa; 1546 socklen_t * __restrict alen; 1547 } */ *uap; 1548 int compat; 1549{ 1550 struct socket *so; 1551 struct sockaddr *sa; 1552 struct file *fp; 1553 socklen_t len; 1554 int error; 1555 1556 NET_LOCK_GIANT(); 1557 error = getsock(td->td_proc->p_fd, uap->fdes, &fp, NULL); 1558 if (error) 1559 goto done2; 1560 so = fp->f_data; 1561 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1562 error = ENOTCONN; 1563 goto done1; 1564 } 1565 error = copyin(uap->alen, &len, sizeof (len)); 1566 if (error) 1567 goto done1; 1568 if (len < 0) { 1569 error = EINVAL; 1570 goto done1; 1571 } 1572 sa = 0; 1573 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1574 if (error) 1575 goto bad; 1576 if (sa == 0) { 1577 len = 0; 1578 goto gotnothing; 1579 } 1580 len = MIN(len, sa->sa_len); 1581#ifdef COMPAT_OLDSOCK 1582 if (compat) 1583 ((struct osockaddr *)sa)->sa_family = 1584 sa->sa_family; 1585#endif 1586 error = copyout(sa, uap->asa, (u_int)len); 1587 if (error) 1588 goto bad; 1589gotnothing: 1590 error = copyout(&len, uap->alen, sizeof (len)); 1591bad: 1592 if (sa) 1593 FREE(sa, M_SONAME); 1594done1: 1595 fdrop(fp, td); 1596done2: 1597 NET_UNLOCK_GIANT(); 1598 return (error); 1599} 1600 1601/* 1602 * MPSAFE 1603 */ 1604int 1605getpeername(td, uap) 1606 struct thread *td; 1607 struct getpeername_args *uap; 1608{ 1609 1610 return (getpeername1(td, uap, 0)); 1611} 1612 1613#ifdef COMPAT_OLDSOCK 1614/* 1615 * MPSAFE 1616 */ 1617int 1618ogetpeername(td, uap) 1619 struct thread *td; 1620 struct ogetpeername_args *uap; 1621{ 1622 1623 /* XXX uap should have type `getpeername_args *' to begin with. */ 1624 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1625} 1626#endif /* COMPAT_OLDSOCK */ 1627 1628int 1629sockargs(mp, buf, buflen, type) 1630 struct mbuf **mp; 1631 caddr_t buf; 1632 int buflen, type; 1633{ 1634 register struct sockaddr *sa; 1635 register struct mbuf *m; 1636 int error; 1637 1638 if ((u_int)buflen > MLEN) { 1639#ifdef COMPAT_OLDSOCK 1640 if (type == MT_SONAME && (u_int)buflen <= 112) 1641 buflen = MLEN; /* unix domain compat. hack */ 1642 else 1643#endif 1644 if ((u_int)buflen > MCLBYTES) 1645 return (EINVAL); 1646 } 1647 m = m_get(M_TRYWAIT, type); 1648 if (m == NULL) 1649 return (ENOBUFS); 1650 if ((u_int)buflen > MLEN) { 1651 MCLGET(m, M_TRYWAIT); 1652 if ((m->m_flags & M_EXT) == 0) { 1653 m_free(m); 1654 return (ENOBUFS); 1655 } 1656 } 1657 m->m_len = buflen; 1658 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1659 if (error) 1660 (void) m_free(m); 1661 else { 1662 *mp = m; 1663 if (type == MT_SONAME) { 1664 sa = mtod(m, struct sockaddr *); 1665 1666#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1667 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1668 sa->sa_family = sa->sa_len; 1669#endif 1670 sa->sa_len = buflen; 1671 } 1672 } 1673 return (error); 1674} 1675 1676int 1677getsockaddr(namp, uaddr, len) 1678 struct sockaddr **namp; 1679 caddr_t uaddr; 1680 size_t len; 1681{ 1682 struct sockaddr *sa; 1683 int error; 1684 1685 if (len > SOCK_MAXADDRLEN) 1686 return (ENAMETOOLONG); 1687 if (len < offsetof(struct sockaddr, sa_data[0])) 1688 return (EINVAL); 1689 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK); 1690 error = copyin(uaddr, sa, len); 1691 if (error) { 1692 FREE(sa, M_SONAME); 1693 } else { 1694#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1695 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1696 sa->sa_family = sa->sa_len; 1697#endif 1698 sa->sa_len = len; 1699 *namp = sa; 1700 } 1701 return (error); 1702} 1703 1704/* 1705 * Detach mapped page and release resources back to the system. 1706 */ 1707void 1708sf_buf_mext(void *addr, void *args) 1709{ 1710 vm_page_t m; 1711 1712 m = sf_buf_page(args); 1713 sf_buf_free(args); 1714 vm_page_lock_queues(); 1715 vm_page_unwire(m, 0); 1716 /* 1717 * Check for the object going away on us. This can 1718 * happen since we don't hold a reference to it. 1719 * If so, we're responsible for freeing the page. 1720 */ 1721 if (m->wire_count == 0 && m->object == NULL) 1722 vm_page_free(m); 1723 vm_page_unlock_queues(); 1724} 1725 1726/* 1727 * sendfile(2) 1728 * 1729 * MPSAFE 1730 * 1731 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1732 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1733 * 1734 * Send a file specified by 'fd' and starting at 'offset' to a socket 1735 * specified by 's'. Send only 'nbytes' of the file or until EOF if 1736 * nbytes == 0. Optionally add a header and/or trailer to the socket 1737 * output. If specified, write the total number of bytes sent into *sbytes. 1738 * 1739 */ 1740int 1741sendfile(struct thread *td, struct sendfile_args *uap) 1742{ 1743 1744 return (do_sendfile(td, uap, 0)); 1745} 1746 1747static int 1748do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1749{ 1750 struct sf_hdtr hdtr; 1751 struct uio *hdr_uio, *trl_uio; 1752 int error; 1753 1754 hdr_uio = trl_uio = NULL; 1755 1756 if (uap->hdtr != NULL) { 1757 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1758 if (error) 1759 goto out; 1760 if (hdtr.headers != NULL) { 1761 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1762 if (error) 1763 goto out; 1764 } 1765 if (hdtr.trailers != NULL) { 1766 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1767 if (error) 1768 goto out; 1769 1770 } 1771 } 1772 1773 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1774out: 1775 if (hdr_uio) 1776 free(hdr_uio, M_IOV); 1777 if (trl_uio) 1778 free(trl_uio, M_IOV); 1779 return (error); 1780} 1781 1782#ifdef COMPAT_FREEBSD4 1783int 1784freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1785{ 1786 struct sendfile_args args; 1787 1788 args.fd = uap->fd; 1789 args.s = uap->s; 1790 args.offset = uap->offset; 1791 args.nbytes = uap->nbytes; 1792 args.hdtr = uap->hdtr; 1793 args.sbytes = uap->sbytes; 1794 args.flags = uap->flags; 1795 1796 return (do_sendfile(td, &args, 1)); 1797} 1798#endif /* COMPAT_FREEBSD4 */ 1799 1800int 1801kern_sendfile(struct thread *td, struct sendfile_args *uap, 1802 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1803{ 1804 struct file *sock_fp; 1805 struct vnode *vp; 1806 struct vm_object *obj = NULL; 1807 struct socket *so = NULL; 1808 struct mbuf *m, *m_header = NULL; 1809 struct sf_buf *sf; 1810 struct vm_page *pg; 1811 off_t off, xfsize, hdtr_size, sbytes = 0; 1812 int error, headersize = 0, headersent = 0; 1813 int vfslocked; 1814 1815 NET_LOCK_GIANT(); 1816 1817 hdtr_size = 0; 1818 1819 /* 1820 * The descriptor must be a regular file and have a backing VM object. 1821 */ 1822 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) 1823 goto done; 1824 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 1825 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1826 obj = vp->v_object; 1827 if (obj != NULL) { 1828 /* 1829 * Temporarily increase the backing VM object's reference 1830 * count so that a forced reclamation of its vnode does not 1831 * immediately destroy it. 1832 */ 1833 VM_OBJECT_LOCK(obj); 1834 if ((obj->flags & OBJ_DEAD) == 0) { 1835 vm_object_reference_locked(obj); 1836 VM_OBJECT_UNLOCK(obj); 1837 } else { 1838 VM_OBJECT_UNLOCK(obj); 1839 obj = NULL; 1840 } 1841 } 1842 VOP_UNLOCK(vp, 0, td); 1843 VFS_UNLOCK_GIANT(vfslocked); 1844 if (obj == NULL) { 1845 error = EINVAL; 1846 goto done; 1847 } 1848 if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, NULL)) != 0) 1849 goto done; 1850 so = sock_fp->f_data; 1851 if (so->so_type != SOCK_STREAM) { 1852 error = EINVAL; 1853 goto done; 1854 } 1855 if ((so->so_state & SS_ISCONNECTED) == 0) { 1856 error = ENOTCONN; 1857 goto done; 1858 } 1859 if (uap->offset < 0) { 1860 error = EINVAL; 1861 goto done; 1862 } 1863 1864#ifdef MAC 1865 SOCK_LOCK(so); 1866 error = mac_check_socket_send(td->td_ucred, so); 1867 SOCK_UNLOCK(so); 1868 if (error) 1869 goto done; 1870#endif 1871 1872 /* 1873 * If specified, get the pointer to the sf_hdtr struct for 1874 * any headers/trailers. 1875 */ 1876 if (hdr_uio != NULL) { 1877 hdr_uio->uio_td = td; 1878 hdr_uio->uio_rw = UIO_WRITE; 1879 if (hdr_uio->uio_resid > 0) { 1880 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0); 1881 if (m_header == NULL) 1882 goto done; 1883 headersize = m_header->m_pkthdr.len; 1884 if (compat) 1885 sbytes += headersize; 1886 } 1887 } 1888 1889 /* 1890 * Protect against multiple writers to the socket. 1891 */ 1892 SOCKBUF_LOCK(&so->so_snd); 1893 (void) sblock(&so->so_snd, M_WAITOK); 1894 SOCKBUF_UNLOCK(&so->so_snd); 1895 1896 /* 1897 * Loop through the pages in the file, starting with the requested 1898 * offset. Get a file page (do I/O if necessary), map the file page 1899 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1900 * it on the socket. 1901 */ 1902 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 1903 vm_pindex_t pindex; 1904 vm_offset_t pgoff; 1905 1906 pindex = OFF_TO_IDX(off); 1907 VM_OBJECT_LOCK(obj); 1908retry_lookup: 1909 /* 1910 * Calculate the amount to transfer. Not to exceed a page, 1911 * the EOF, or the passed in nbytes. 1912 */ 1913 xfsize = obj->un_pager.vnp.vnp_size - off; 1914 VM_OBJECT_UNLOCK(obj); 1915 if (xfsize > PAGE_SIZE) 1916 xfsize = PAGE_SIZE; 1917 pgoff = (vm_offset_t)(off & PAGE_MASK); 1918 if (PAGE_SIZE - pgoff < xfsize) 1919 xfsize = PAGE_SIZE - pgoff; 1920 if (uap->nbytes && xfsize > (uap->nbytes - sbytes)) 1921 xfsize = uap->nbytes - sbytes; 1922 if (xfsize <= 0) { 1923 if (m_header != NULL) { 1924 m = m_header; 1925 m_header = NULL; 1926 SOCKBUF_LOCK(&so->so_snd); 1927 goto retry_space; 1928 } else 1929 break; 1930 } 1931 /* 1932 * Optimize the non-blocking case by looking at the socket space 1933 * before going to the extra work of constituting the sf_buf. 1934 */ 1935 SOCKBUF_LOCK(&so->so_snd); 1936 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) { 1937 if (so->so_snd.sb_state & SBS_CANTSENDMORE) 1938 error = EPIPE; 1939 else 1940 error = EAGAIN; 1941 sbunlock(&so->so_snd); 1942 SOCKBUF_UNLOCK(&so->so_snd); 1943 goto done; 1944 } 1945 SOCKBUF_UNLOCK(&so->so_snd); 1946 VM_OBJECT_LOCK(obj); 1947 /* 1948 * Attempt to look up the page. 1949 * 1950 * Allocate if not found 1951 * 1952 * Wait and loop if busy. 1953 */ 1954 pg = vm_page_lookup(obj, pindex); 1955 1956 if (pg == NULL) { 1957 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY | 1958 VM_ALLOC_NORMAL | VM_ALLOC_WIRED); 1959 if (pg == NULL) { 1960 VM_OBJECT_UNLOCK(obj); 1961 VM_WAIT; 1962 VM_OBJECT_LOCK(obj); 1963 goto retry_lookup; 1964 } 1965 vm_page_lock_queues(); 1966 } else { 1967 vm_page_lock_queues(); 1968 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy")) 1969 goto retry_lookup; 1970 /* 1971 * Wire the page so it does not get ripped out from 1972 * under us. 1973 */ 1974 vm_page_wire(pg); 1975 } 1976 1977 /* 1978 * If page is not valid for what we need, initiate I/O 1979 */ 1980 1981 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) { 1982 VM_OBJECT_UNLOCK(obj); 1983 } else if (uap->flags & SF_NODISKIO) { 1984 error = EBUSY; 1985 } else { 1986 int bsize, resid; 1987 1988 /* 1989 * Ensure that our page is still around when the I/O 1990 * completes. 1991 */ 1992 vm_page_io_start(pg); 1993 vm_page_unlock_queues(); 1994 VM_OBJECT_UNLOCK(obj); 1995 1996 /* 1997 * Get the page from backing store. 1998 */ 1999 bsize = vp->v_mount->mnt_stat.f_iosize; 2000 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2001 vn_lock(vp, LK_SHARED | LK_RETRY, td); 2002 /* 2003 * XXXMAC: Because we don't have fp->f_cred here, 2004 * we pass in NOCRED. This is probably wrong, but 2005 * is consistent with our original implementation. 2006 */ 2007 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2008 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2009 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2010 td->td_ucred, NOCRED, &resid, td); 2011 VOP_UNLOCK(vp, 0, td); 2012 VFS_UNLOCK_GIANT(vfslocked); 2013 VM_OBJECT_LOCK(obj); 2014 vm_page_lock_queues(); 2015 vm_page_io_finish(pg); 2016 if (!error) 2017 VM_OBJECT_UNLOCK(obj); 2018 mbstat.sf_iocnt++; 2019 } 2020 2021 if (error) { 2022 vm_page_unwire(pg, 0); 2023 /* 2024 * See if anyone else might know about this page. 2025 * If not and it is not valid, then free it. 2026 */ 2027 if (pg->wire_count == 0 && pg->valid == 0 && 2028 pg->busy == 0 && !(pg->flags & PG_BUSY) && 2029 pg->hold_count == 0) { 2030 vm_page_free(pg); 2031 } 2032 vm_page_unlock_queues(); 2033 VM_OBJECT_UNLOCK(obj); 2034 SOCKBUF_LOCK(&so->so_snd); 2035 sbunlock(&so->so_snd); 2036 SOCKBUF_UNLOCK(&so->so_snd); 2037 goto done; 2038 } 2039 vm_page_unlock_queues(); 2040 2041 /* 2042 * Get a sendfile buf. We usually wait as long as necessary, 2043 * but this wait can be interrupted. 2044 */ 2045 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) { 2046 mbstat.sf_allocfail++; 2047 vm_page_lock_queues(); 2048 vm_page_unwire(pg, 0); 2049 if (pg->wire_count == 0 && pg->object == NULL) 2050 vm_page_free(pg); 2051 vm_page_unlock_queues(); 2052 SOCKBUF_LOCK(&so->so_snd); 2053 sbunlock(&so->so_snd); 2054 SOCKBUF_UNLOCK(&so->so_snd); 2055 error = EINTR; 2056 goto done; 2057 } 2058 2059 /* 2060 * Get an mbuf header and set it up as having external storage. 2061 */ 2062 if (m_header) 2063 MGET(m, M_TRYWAIT, MT_DATA); 2064 else 2065 MGETHDR(m, M_TRYWAIT, MT_DATA); 2066 if (m == NULL) { 2067 error = ENOBUFS; 2068 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2069 SOCKBUF_LOCK(&so->so_snd); 2070 sbunlock(&so->so_snd); 2071 SOCKBUF_UNLOCK(&so->so_snd); 2072 goto done; 2073 } 2074 /* 2075 * Setup external storage for mbuf. 2076 */ 2077 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY, 2078 EXT_SFBUF); 2079 m->m_data = (char *)sf_buf_kva(sf) + pgoff; 2080 m->m_pkthdr.len = m->m_len = xfsize; 2081 2082 if (m_header) { 2083 m_cat(m_header, m); 2084 m = m_header; 2085 m_header = NULL; 2086 m_fixhdr(m); 2087 } 2088 2089 /* 2090 * Add the buffer to the socket buffer chain. 2091 */ 2092 SOCKBUF_LOCK(&so->so_snd); 2093retry_space: 2094 /* 2095 * Make sure that the socket is still able to take more data. 2096 * CANTSENDMORE being true usually means that the connection 2097 * was closed. so_error is true when an error was sensed after 2098 * a previous send. 2099 * The state is checked after the page mapping and buffer 2100 * allocation above since those operations may block and make 2101 * any socket checks stale. From this point forward, nothing 2102 * blocks before the pru_send (or more accurately, any blocking 2103 * results in a loop back to here to re-check). 2104 */ 2105 SOCKBUF_LOCK_ASSERT(&so->so_snd); 2106 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) { 2107 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2108 error = EPIPE; 2109 } else { 2110 error = so->so_error; 2111 so->so_error = 0; 2112 } 2113 m_freem(m); 2114 sbunlock(&so->so_snd); 2115 SOCKBUF_UNLOCK(&so->so_snd); 2116 goto done; 2117 } 2118 /* 2119 * Wait for socket space to become available. We do this just 2120 * after checking the connection state above in order to avoid 2121 * a race condition with sbwait(). 2122 */ 2123 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) { 2124 if (so->so_state & SS_NBIO) { 2125 m_freem(m); 2126 sbunlock(&so->so_snd); 2127 SOCKBUF_UNLOCK(&so->so_snd); 2128 error = EAGAIN; 2129 goto done; 2130 } 2131 error = sbwait(&so->so_snd); 2132 /* 2133 * An error from sbwait usually indicates that we've 2134 * been interrupted by a signal. If we've sent anything 2135 * then return bytes sent, otherwise return the error. 2136 */ 2137 if (error) { 2138 m_freem(m); 2139 sbunlock(&so->so_snd); 2140 SOCKBUF_UNLOCK(&so->so_snd); 2141 goto done; 2142 } 2143 goto retry_space; 2144 } 2145 SOCKBUF_UNLOCK(&so->so_snd); 2146 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td); 2147 if (error) { 2148 SOCKBUF_LOCK(&so->so_snd); 2149 sbunlock(&so->so_snd); 2150 SOCKBUF_UNLOCK(&so->so_snd); 2151 goto done; 2152 } 2153 headersent = 1; 2154 } 2155 SOCKBUF_LOCK(&so->so_snd); 2156 sbunlock(&so->so_snd); 2157 SOCKBUF_UNLOCK(&so->so_snd); 2158 2159 /* 2160 * Send trailers. Wimp out and use writev(2). 2161 */ 2162 if (trl_uio != NULL) { 2163 error = kern_writev(td, uap->s, trl_uio); 2164 if (error) 2165 goto done; 2166 if (compat) 2167 sbytes += td->td_retval[0]; 2168 else 2169 hdtr_size += td->td_retval[0]; 2170 } 2171 2172done: 2173 if (headersent) { 2174 if (!compat) 2175 hdtr_size += headersize; 2176 } else { 2177 if (compat) 2178 sbytes -= headersize; 2179 } 2180 /* 2181 * If there was no error we have to clear td->td_retval[0] 2182 * because it may have been set by writev. 2183 */ 2184 if (error == 0) { 2185 td->td_retval[0] = 0; 2186 } 2187 if (uap->sbytes != NULL) { 2188 if (!compat) 2189 sbytes += hdtr_size; 2190 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2191 } 2192 if (obj != NULL) 2193 vm_object_deallocate(obj); 2194 if (vp != NULL) { 2195 vfslocked = VFS_LOCK_GIANT(vp->v_mount); 2196 vrele(vp); 2197 VFS_UNLOCK_GIANT(vfslocked); 2198 } 2199 if (so) 2200 fdrop(sock_fp, td); 2201 if (m_header) 2202 m_freem(m_header); 2203 2204 NET_UNLOCK_GIANT(); 2205 2206 if (error == ERESTART) 2207 error = EINTR; 2208 2209 return (error); 2210} 2211