kern_sendfile.c revision 247602
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 247602 2013-03-02 00:53:12Z pjd $"); 37 38#include "opt_capsicum.h" 39#include "opt_inet.h" 40#include "opt_inet6.h" 41#include "opt_sctp.h" 42#include "opt_compat.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/capability.h> 48#include <sys/kernel.h> 49#include <sys/lock.h> 50#include <sys/mutex.h> 51#include <sys/sysproto.h> 52#include <sys/malloc.h> 53#include <sys/filedesc.h> 54#include <sys/event.h> 55#include <sys/proc.h> 56#include <sys/fcntl.h> 57#include <sys/file.h> 58#include <sys/filio.h> 59#include <sys/jail.h> 60#include <sys/mount.h> 61#include <sys/mbuf.h> 62#include <sys/protosw.h> 63#include <sys/sf_buf.h> 64#include <sys/sysent.h> 65#include <sys/socket.h> 66#include <sys/socketvar.h> 67#include <sys/signalvar.h> 68#include <sys/syscallsubr.h> 69#include <sys/sysctl.h> 70#include <sys/uio.h> 71#include <sys/vnode.h> 72#ifdef KTRACE 73#include <sys/ktrace.h> 74#endif 75#ifdef COMPAT_FREEBSD32 76#include <compat/freebsd32/freebsd32_util.h> 77#endif 78 79#include <net/vnet.h> 80 81#include <security/audit/audit.h> 82#include <security/mac/mac_framework.h> 83 84#include <vm/vm.h> 85#include <vm/vm_param.h> 86#include <vm/vm_object.h> 87#include <vm/vm_page.h> 88#include <vm/vm_pageout.h> 89#include <vm/vm_kern.h> 90#include <vm/vm_extern.h> 91 92#if defined(INET) || defined(INET6) 93#ifdef SCTP 94#include <netinet/sctp.h> 95#include <netinet/sctp_peeloff.h> 96#endif /* SCTP */ 97#endif /* INET || INET6 */ 98 99static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 100static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 101 102static int accept1(struct thread *td, struct accept_args *uap, int compat); 103static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat); 104static int getsockname1(struct thread *td, struct getsockname_args *uap, 105 int compat); 106static int getpeername1(struct thread *td, struct getpeername_args *uap, 107 int compat); 108 109/* 110 * NSFBUFS-related variables and associated sysctls 111 */ 112int nsfbufs; 113int nsfbufspeak; 114int nsfbufsused; 115 116SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, 117 "Maximum number of sendfile(2) sf_bufs available"); 118SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, 119 "Number of sendfile(2) sf_bufs at peak usage"); 120SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, 121 "Number of sendfile(2) sf_bufs in use"); 122 123/* 124 * Convert a user file descriptor to a kernel file entry and check if required 125 * capability rights are present. 126 * A reference on the file entry is held upon returning. 127 */ 128static int 129getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights, 130 struct file **fpp, u_int *fflagp) 131{ 132 struct file *fp; 133 int error; 134 135 error = fget_unlocked(fdp, fd, rights, 0, &fp, NULL); 136 if (error != 0) 137 return (error); 138 if (fp->f_type != DTYPE_SOCKET) { 139 fdrop(fp, curthread); 140 return (ENOTSOCK); 141 } 142 if (fflagp != NULL) 143 *fflagp = fp->f_flag; 144 *fpp = fp; 145 return (0); 146} 147 148/* 149 * System call interface to the socket abstraction. 150 */ 151#if defined(COMPAT_43) 152#define COMPAT_OLDSOCK 153#endif 154 155int 156sys_socket(td, uap) 157 struct thread *td; 158 struct socket_args /* { 159 int domain; 160 int type; 161 int protocol; 162 } */ *uap; 163{ 164 struct socket *so; 165 struct file *fp; 166 int fd, error; 167 168 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 169#ifdef MAC 170 error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type, 171 uap->protocol); 172 if (error) 173 return (error); 174#endif 175 error = falloc(td, &fp, &fd, 0); 176 if (error) 177 return (error); 178 /* An extra reference on `fp' has been held for us by falloc(). */ 179 error = socreate(uap->domain, &so, uap->type, uap->protocol, 180 td->td_ucred, td); 181 if (error) { 182 fdclose(td->td_proc->p_fd, fp, fd, td); 183 } else { 184 finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); 185 td->td_retval[0] = fd; 186 } 187 fdrop(fp, td); 188 return (error); 189} 190 191/* ARGSUSED */ 192int 193sys_bind(td, uap) 194 struct thread *td; 195 struct bind_args /* { 196 int s; 197 caddr_t name; 198 int namelen; 199 } */ *uap; 200{ 201 struct sockaddr *sa; 202 int error; 203 204 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0) 205 return (error); 206 207 error = kern_bind(td, uap->s, sa); 208 free(sa, M_SONAME); 209 return (error); 210} 211 212int 213kern_bind(td, fd, sa) 214 struct thread *td; 215 int fd; 216 struct sockaddr *sa; 217{ 218 struct socket *so; 219 struct file *fp; 220 int error; 221 222 AUDIT_ARG_FD(fd); 223 AUDIT_ARG_SOCKADDR(td, sa); 224 error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL); 225 if (error) 226 return (error); 227 so = fp->f_data; 228#ifdef KTRACE 229 if (KTRPOINT(td, KTR_STRUCT)) 230 ktrsockaddr(sa); 231#endif 232#ifdef MAC 233 error = mac_socket_check_bind(td->td_ucred, so, sa); 234 if (error == 0) 235#endif 236 error = sobind(so, sa, td); 237 fdrop(fp, td); 238 return (error); 239} 240 241/* ARGSUSED */ 242int 243sys_listen(td, uap) 244 struct thread *td; 245 struct listen_args /* { 246 int s; 247 int backlog; 248 } */ *uap; 249{ 250 struct socket *so; 251 struct file *fp; 252 int error; 253 254 AUDIT_ARG_FD(uap->s); 255 error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL); 256 if (error == 0) { 257 so = fp->f_data; 258#ifdef MAC 259 error = mac_socket_check_listen(td->td_ucred, so); 260 if (error == 0) 261#endif 262 error = solisten(so, uap->backlog, td); 263 fdrop(fp, td); 264 } 265 return(error); 266} 267 268/* 269 * accept1() 270 */ 271static int 272accept1(td, uap, compat) 273 struct thread *td; 274 struct accept_args /* { 275 int s; 276 struct sockaddr * __restrict name; 277 socklen_t * __restrict anamelen; 278 } */ *uap; 279 int compat; 280{ 281 struct sockaddr *name; 282 socklen_t namelen; 283 struct file *fp; 284 int error; 285 286 if (uap->name == NULL) 287 return (kern_accept(td, uap->s, NULL, NULL, NULL)); 288 289 error = copyin(uap->anamelen, &namelen, sizeof (namelen)); 290 if (error) 291 return (error); 292 293 error = kern_accept(td, uap->s, &name, &namelen, &fp); 294 295 /* 296 * return a namelen of zero for older code which might 297 * ignore the return value from accept. 298 */ 299 if (error) { 300 (void) copyout(&namelen, 301 uap->anamelen, sizeof(*uap->anamelen)); 302 return (error); 303 } 304 305 if (error == 0 && name != NULL) { 306#ifdef COMPAT_OLDSOCK 307 if (compat) 308 ((struct osockaddr *)name)->sa_family = 309 name->sa_family; 310#endif 311 error = copyout(name, uap->name, namelen); 312 } 313 if (error == 0) 314 error = copyout(&namelen, uap->anamelen, 315 sizeof(namelen)); 316 if (error) 317 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 318 fdrop(fp, td); 319 free(name, M_SONAME); 320 return (error); 321} 322 323int 324kern_accept(struct thread *td, int s, struct sockaddr **name, 325 socklen_t *namelen, struct file **fp) 326{ 327 struct filedesc *fdp; 328 struct file *headfp, *nfp = NULL; 329 struct sockaddr *sa = NULL; 330 int error; 331 struct socket *head, *so; 332 int fd; 333 u_int fflag; 334 pid_t pgid; 335 int tmp; 336 337 if (name) { 338 *name = NULL; 339 if (*namelen < 0) 340 return (EINVAL); 341 } 342 343 AUDIT_ARG_FD(s); 344 fdp = td->td_proc->p_fd; 345 error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag); 346 if (error) 347 return (error); 348 head = headfp->f_data; 349 if ((head->so_options & SO_ACCEPTCONN) == 0) { 350 error = EINVAL; 351 goto done; 352 } 353#ifdef MAC 354 error = mac_socket_check_accept(td->td_ucred, head); 355 if (error != 0) 356 goto done; 357#endif 358 error = falloc(td, &nfp, &fd, 0); 359 if (error) 360 goto done; 361 ACCEPT_LOCK(); 362 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 363 ACCEPT_UNLOCK(); 364 error = EWOULDBLOCK; 365 goto noconnection; 366 } 367 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 368 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 369 head->so_error = ECONNABORTED; 370 break; 371 } 372 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 373 "accept", 0); 374 if (error) { 375 ACCEPT_UNLOCK(); 376 goto noconnection; 377 } 378 } 379 if (head->so_error) { 380 error = head->so_error; 381 head->so_error = 0; 382 ACCEPT_UNLOCK(); 383 goto noconnection; 384 } 385 so = TAILQ_FIRST(&head->so_comp); 386 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 387 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 388 389 /* 390 * Before changing the flags on the socket, we have to bump the 391 * reference count. Otherwise, if the protocol calls sofree(), 392 * the socket will be released due to a zero refcount. 393 */ 394 SOCK_LOCK(so); /* soref() and so_state update */ 395 soref(so); /* file descriptor reference */ 396 397 TAILQ_REMOVE(&head->so_comp, so, so_list); 398 head->so_qlen--; 399 so->so_state |= (head->so_state & SS_NBIO); 400 so->so_qstate &= ~SQ_COMP; 401 so->so_head = NULL; 402 403 SOCK_UNLOCK(so); 404 ACCEPT_UNLOCK(); 405 406 /* An extra reference on `nfp' has been held for us by falloc(). */ 407 td->td_retval[0] = fd; 408 409 /* connection has been removed from the listen queue */ 410 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 411 412 pgid = fgetown(&head->so_sigio); 413 if (pgid != 0) 414 fsetown(pgid, &so->so_sigio); 415 416 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 417 /* Sync socket nonblocking/async state with file flags */ 418 tmp = fflag & FNONBLOCK; 419 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 420 tmp = fflag & FASYNC; 421 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 422 sa = 0; 423 error = soaccept(so, &sa); 424 if (error) { 425 /* 426 * return a namelen of zero for older code which might 427 * ignore the return value from accept. 428 */ 429 if (name) 430 *namelen = 0; 431 goto noconnection; 432 } 433 if (sa == NULL) { 434 if (name) 435 *namelen = 0; 436 goto done; 437 } 438 AUDIT_ARG_SOCKADDR(td, sa); 439 if (name) { 440 /* check sa_len before it is destroyed */ 441 if (*namelen > sa->sa_len) 442 *namelen = sa->sa_len; 443#ifdef KTRACE 444 if (KTRPOINT(td, KTR_STRUCT)) 445 ktrsockaddr(sa); 446#endif 447 *name = sa; 448 sa = NULL; 449 } 450noconnection: 451 if (sa) 452 free(sa, M_SONAME); 453 454 /* 455 * close the new descriptor, assuming someone hasn't ripped it 456 * out from under us. 457 */ 458 if (error) 459 fdclose(fdp, nfp, fd, td); 460 461 /* 462 * Release explicitly held references before returning. We return 463 * a reference on nfp to the caller on success if they request it. 464 */ 465done: 466 if (fp != NULL) { 467 if (error == 0) { 468 *fp = nfp; 469 nfp = NULL; 470 } else 471 *fp = NULL; 472 } 473 if (nfp != NULL) 474 fdrop(nfp, td); 475 fdrop(headfp, td); 476 return (error); 477} 478 479int 480sys_accept(td, uap) 481 struct thread *td; 482 struct accept_args *uap; 483{ 484 485 return (accept1(td, uap, 0)); 486} 487 488#ifdef COMPAT_OLDSOCK 489int 490oaccept(td, uap) 491 struct thread *td; 492 struct accept_args *uap; 493{ 494 495 return (accept1(td, uap, 1)); 496} 497#endif /* COMPAT_OLDSOCK */ 498 499/* ARGSUSED */ 500int 501sys_connect(td, uap) 502 struct thread *td; 503 struct connect_args /* { 504 int s; 505 caddr_t name; 506 int namelen; 507 } */ *uap; 508{ 509 struct sockaddr *sa; 510 int error; 511 512 error = getsockaddr(&sa, uap->name, uap->namelen); 513 if (error) 514 return (error); 515 516 error = kern_connect(td, uap->s, sa); 517 free(sa, M_SONAME); 518 return (error); 519} 520 521 522int 523kern_connect(td, fd, sa) 524 struct thread *td; 525 int fd; 526 struct sockaddr *sa; 527{ 528 struct socket *so; 529 struct file *fp; 530 int error; 531 int interrupted = 0; 532 533 AUDIT_ARG_FD(fd); 534 AUDIT_ARG_SOCKADDR(td, sa); 535 error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL); 536 if (error) 537 return (error); 538 so = fp->f_data; 539 if (so->so_state & SS_ISCONNECTING) { 540 error = EALREADY; 541 goto done1; 542 } 543#ifdef KTRACE 544 if (KTRPOINT(td, KTR_STRUCT)) 545 ktrsockaddr(sa); 546#endif 547#ifdef MAC 548 error = mac_socket_check_connect(td->td_ucred, so, sa); 549 if (error) 550 goto bad; 551#endif 552 error = soconnect(so, sa, td); 553 if (error) 554 goto bad; 555 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 556 error = EINPROGRESS; 557 goto done1; 558 } 559 SOCK_LOCK(so); 560 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 561 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 562 "connec", 0); 563 if (error) { 564 if (error == EINTR || error == ERESTART) 565 interrupted = 1; 566 break; 567 } 568 } 569 if (error == 0) { 570 error = so->so_error; 571 so->so_error = 0; 572 } 573 SOCK_UNLOCK(so); 574bad: 575 if (!interrupted) 576 so->so_state &= ~SS_ISCONNECTING; 577 if (error == ERESTART) 578 error = EINTR; 579done1: 580 fdrop(fp, td); 581 return (error); 582} 583 584int 585kern_socketpair(struct thread *td, int domain, int type, int protocol, 586 int *rsv) 587{ 588 struct filedesc *fdp = td->td_proc->p_fd; 589 struct file *fp1, *fp2; 590 struct socket *so1, *so2; 591 int fd, error; 592 593 AUDIT_ARG_SOCKET(domain, type, protocol); 594#ifdef MAC 595 /* We might want to have a separate check for socket pairs. */ 596 error = mac_socket_check_create(td->td_ucred, domain, type, 597 protocol); 598 if (error) 599 return (error); 600#endif 601 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 602 if (error) 603 return (error); 604 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 605 if (error) 606 goto free1; 607 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 608 error = falloc(td, &fp1, &fd, 0); 609 if (error) 610 goto free2; 611 rsv[0] = fd; 612 fp1->f_data = so1; /* so1 already has ref count */ 613 error = falloc(td, &fp2, &fd, 0); 614 if (error) 615 goto free3; 616 fp2->f_data = so2; /* so2 already has ref count */ 617 rsv[1] = fd; 618 error = soconnect2(so1, so2); 619 if (error) 620 goto free4; 621 if (type == SOCK_DGRAM) { 622 /* 623 * Datagram socket connection is asymmetric. 624 */ 625 error = soconnect2(so2, so1); 626 if (error) 627 goto free4; 628 } 629 finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops); 630 finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops); 631 fdrop(fp1, td); 632 fdrop(fp2, td); 633 return (0); 634free4: 635 fdclose(fdp, fp2, rsv[1], td); 636 fdrop(fp2, td); 637free3: 638 fdclose(fdp, fp1, rsv[0], td); 639 fdrop(fp1, td); 640free2: 641 if (so2 != NULL) 642 (void)soclose(so2); 643free1: 644 if (so1 != NULL) 645 (void)soclose(so1); 646 return (error); 647} 648 649int 650sys_socketpair(struct thread *td, struct socketpair_args *uap) 651{ 652 int error, sv[2]; 653 654 error = kern_socketpair(td, uap->domain, uap->type, 655 uap->protocol, sv); 656 if (error) 657 return (error); 658 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 659 if (error) { 660 (void)kern_close(td, sv[0]); 661 (void)kern_close(td, sv[1]); 662 } 663 return (error); 664} 665 666static int 667sendit(td, s, mp, flags) 668 struct thread *td; 669 int s; 670 struct msghdr *mp; 671 int flags; 672{ 673 struct mbuf *control; 674 struct sockaddr *to; 675 int error; 676 677#ifdef CAPABILITY_MODE 678 if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL)) 679 return (ECAPMODE); 680#endif 681 682 if (mp->msg_name != NULL) { 683 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 684 if (error) { 685 to = NULL; 686 goto bad; 687 } 688 mp->msg_name = to; 689 } else { 690 to = NULL; 691 } 692 693 if (mp->msg_control) { 694 if (mp->msg_controllen < sizeof(struct cmsghdr) 695#ifdef COMPAT_OLDSOCK 696 && mp->msg_flags != MSG_COMPAT 697#endif 698 ) { 699 error = EINVAL; 700 goto bad; 701 } 702 error = sockargs(&control, mp->msg_control, 703 mp->msg_controllen, MT_CONTROL); 704 if (error) 705 goto bad; 706#ifdef COMPAT_OLDSOCK 707 if (mp->msg_flags == MSG_COMPAT) { 708 struct cmsghdr *cm; 709 710 M_PREPEND(control, sizeof(*cm), M_WAITOK); 711 cm = mtod(control, struct cmsghdr *); 712 cm->cmsg_len = control->m_len; 713 cm->cmsg_level = SOL_SOCKET; 714 cm->cmsg_type = SCM_RIGHTS; 715 } 716#endif 717 } else { 718 control = NULL; 719 } 720 721 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 722 723bad: 724 if (to) 725 free(to, M_SONAME); 726 return (error); 727} 728 729int 730kern_sendit(td, s, mp, flags, control, segflg) 731 struct thread *td; 732 int s; 733 struct msghdr *mp; 734 int flags; 735 struct mbuf *control; 736 enum uio_seg segflg; 737{ 738 struct file *fp; 739 struct uio auio; 740 struct iovec *iov; 741 struct socket *so; 742 int i, error; 743 ssize_t len; 744 cap_rights_t rights; 745#ifdef KTRACE 746 struct uio *ktruio = NULL; 747#endif 748 749 AUDIT_ARG_FD(s); 750 rights = CAP_SEND; 751 if (mp->msg_name != NULL) { 752 AUDIT_ARG_SOCKADDR(td, mp->msg_name); 753 rights |= CAP_CONNECT; 754 } 755 error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL); 756 if (error) 757 return (error); 758 so = (struct socket *)fp->f_data; 759 760#ifdef KTRACE 761 if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) 762 ktrsockaddr(mp->msg_name); 763#endif 764#ifdef MAC 765 if (mp->msg_name != NULL) { 766 error = mac_socket_check_connect(td->td_ucred, so, 767 mp->msg_name); 768 if (error) 769 goto bad; 770 } 771 error = mac_socket_check_send(td->td_ucred, so); 772 if (error) 773 goto bad; 774#endif 775 776 auio.uio_iov = mp->msg_iov; 777 auio.uio_iovcnt = mp->msg_iovlen; 778 auio.uio_segflg = segflg; 779 auio.uio_rw = UIO_WRITE; 780 auio.uio_td = td; 781 auio.uio_offset = 0; /* XXX */ 782 auio.uio_resid = 0; 783 iov = mp->msg_iov; 784 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 785 if ((auio.uio_resid += iov->iov_len) < 0) { 786 error = EINVAL; 787 goto bad; 788 } 789 } 790#ifdef KTRACE 791 if (KTRPOINT(td, KTR_GENIO)) 792 ktruio = cloneuio(&auio); 793#endif 794 len = auio.uio_resid; 795 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 796 if (error) { 797 if (auio.uio_resid != len && (error == ERESTART || 798 error == EINTR || error == EWOULDBLOCK)) 799 error = 0; 800 /* Generation of SIGPIPE can be controlled per socket */ 801 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 802 !(flags & MSG_NOSIGNAL)) { 803 PROC_LOCK(td->td_proc); 804 tdsignal(td, SIGPIPE); 805 PROC_UNLOCK(td->td_proc); 806 } 807 } 808 if (error == 0) 809 td->td_retval[0] = len - auio.uio_resid; 810#ifdef KTRACE 811 if (ktruio != NULL) { 812 ktruio->uio_resid = td->td_retval[0]; 813 ktrgenio(s, UIO_WRITE, ktruio, error); 814 } 815#endif 816bad: 817 fdrop(fp, td); 818 return (error); 819} 820 821int 822sys_sendto(td, uap) 823 struct thread *td; 824 struct sendto_args /* { 825 int s; 826 caddr_t buf; 827 size_t len; 828 int flags; 829 caddr_t to; 830 int tolen; 831 } */ *uap; 832{ 833 struct msghdr msg; 834 struct iovec aiov; 835 int error; 836 837 msg.msg_name = uap->to; 838 msg.msg_namelen = uap->tolen; 839 msg.msg_iov = &aiov; 840 msg.msg_iovlen = 1; 841 msg.msg_control = 0; 842#ifdef COMPAT_OLDSOCK 843 msg.msg_flags = 0; 844#endif 845 aiov.iov_base = uap->buf; 846 aiov.iov_len = uap->len; 847 error = sendit(td, uap->s, &msg, uap->flags); 848 return (error); 849} 850 851#ifdef COMPAT_OLDSOCK 852int 853osend(td, uap) 854 struct thread *td; 855 struct osend_args /* { 856 int s; 857 caddr_t buf; 858 int len; 859 int flags; 860 } */ *uap; 861{ 862 struct msghdr msg; 863 struct iovec aiov; 864 int error; 865 866 msg.msg_name = 0; 867 msg.msg_namelen = 0; 868 msg.msg_iov = &aiov; 869 msg.msg_iovlen = 1; 870 aiov.iov_base = uap->buf; 871 aiov.iov_len = uap->len; 872 msg.msg_control = 0; 873 msg.msg_flags = 0; 874 error = sendit(td, uap->s, &msg, uap->flags); 875 return (error); 876} 877 878int 879osendmsg(td, uap) 880 struct thread *td; 881 struct osendmsg_args /* { 882 int s; 883 caddr_t msg; 884 int flags; 885 } */ *uap; 886{ 887 struct msghdr msg; 888 struct iovec *iov; 889 int error; 890 891 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 892 if (error) 893 return (error); 894 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 895 if (error) 896 return (error); 897 msg.msg_iov = iov; 898 msg.msg_flags = MSG_COMPAT; 899 error = sendit(td, uap->s, &msg, uap->flags); 900 free(iov, M_IOV); 901 return (error); 902} 903#endif 904 905int 906sys_sendmsg(td, uap) 907 struct thread *td; 908 struct sendmsg_args /* { 909 int s; 910 caddr_t msg; 911 int flags; 912 } */ *uap; 913{ 914 struct msghdr msg; 915 struct iovec *iov; 916 int error; 917 918 error = copyin(uap->msg, &msg, sizeof (msg)); 919 if (error) 920 return (error); 921 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 922 if (error) 923 return (error); 924 msg.msg_iov = iov; 925#ifdef COMPAT_OLDSOCK 926 msg.msg_flags = 0; 927#endif 928 error = sendit(td, uap->s, &msg, uap->flags); 929 free(iov, M_IOV); 930 return (error); 931} 932 933int 934kern_recvit(td, s, mp, fromseg, controlp) 935 struct thread *td; 936 int s; 937 struct msghdr *mp; 938 enum uio_seg fromseg; 939 struct mbuf **controlp; 940{ 941 struct uio auio; 942 struct iovec *iov; 943 int i; 944 ssize_t len; 945 int error; 946 struct mbuf *m, *control = NULL; 947 caddr_t ctlbuf; 948 struct file *fp; 949 struct socket *so; 950 struct sockaddr *fromsa = NULL; 951#ifdef KTRACE 952 struct uio *ktruio = NULL; 953#endif 954 955 if (controlp != NULL) 956 *controlp = NULL; 957 958 AUDIT_ARG_FD(s); 959 error = getsock_cap(td->td_proc->p_fd, s, CAP_RECV, &fp, NULL); 960 if (error) 961 return (error); 962 so = fp->f_data; 963 964#ifdef MAC 965 error = mac_socket_check_receive(td->td_ucred, so); 966 if (error) { 967 fdrop(fp, td); 968 return (error); 969 } 970#endif 971 972 auio.uio_iov = mp->msg_iov; 973 auio.uio_iovcnt = mp->msg_iovlen; 974 auio.uio_segflg = UIO_USERSPACE; 975 auio.uio_rw = UIO_READ; 976 auio.uio_td = td; 977 auio.uio_offset = 0; /* XXX */ 978 auio.uio_resid = 0; 979 iov = mp->msg_iov; 980 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 981 if ((auio.uio_resid += iov->iov_len) < 0) { 982 fdrop(fp, td); 983 return (EINVAL); 984 } 985 } 986#ifdef KTRACE 987 if (KTRPOINT(td, KTR_GENIO)) 988 ktruio = cloneuio(&auio); 989#endif 990 len = auio.uio_resid; 991 error = soreceive(so, &fromsa, &auio, NULL, 992 (mp->msg_control || controlp) ? &control : NULL, 993 &mp->msg_flags); 994 if (error) { 995 if (auio.uio_resid != len && (error == ERESTART || 996 error == EINTR || error == EWOULDBLOCK)) 997 error = 0; 998 } 999 if (fromsa != NULL) 1000 AUDIT_ARG_SOCKADDR(td, fromsa); 1001#ifdef KTRACE 1002 if (ktruio != NULL) { 1003 ktruio->uio_resid = len - auio.uio_resid; 1004 ktrgenio(s, UIO_READ, ktruio, error); 1005 } 1006#endif 1007 if (error) 1008 goto out; 1009 td->td_retval[0] = len - auio.uio_resid; 1010 if (mp->msg_name) { 1011 len = mp->msg_namelen; 1012 if (len <= 0 || fromsa == NULL) 1013 len = 0; 1014 else { 1015 /* save sa_len before it is destroyed by MSG_COMPAT */ 1016 len = MIN(len, fromsa->sa_len); 1017#ifdef COMPAT_OLDSOCK 1018 if (mp->msg_flags & MSG_COMPAT) 1019 ((struct osockaddr *)fromsa)->sa_family = 1020 fromsa->sa_family; 1021#endif 1022 if (fromseg == UIO_USERSPACE) { 1023 error = copyout(fromsa, mp->msg_name, 1024 (unsigned)len); 1025 if (error) 1026 goto out; 1027 } else 1028 bcopy(fromsa, mp->msg_name, len); 1029 } 1030 mp->msg_namelen = len; 1031 } 1032 if (mp->msg_control && controlp == NULL) { 1033#ifdef COMPAT_OLDSOCK 1034 /* 1035 * We assume that old recvmsg calls won't receive access 1036 * rights and other control info, esp. as control info 1037 * is always optional and those options didn't exist in 4.3. 1038 * If we receive rights, trim the cmsghdr; anything else 1039 * is tossed. 1040 */ 1041 if (control && mp->msg_flags & MSG_COMPAT) { 1042 if (mtod(control, struct cmsghdr *)->cmsg_level != 1043 SOL_SOCKET || 1044 mtod(control, struct cmsghdr *)->cmsg_type != 1045 SCM_RIGHTS) { 1046 mp->msg_controllen = 0; 1047 goto out; 1048 } 1049 control->m_len -= sizeof (struct cmsghdr); 1050 control->m_data += sizeof (struct cmsghdr); 1051 } 1052#endif 1053 len = mp->msg_controllen; 1054 m = control; 1055 mp->msg_controllen = 0; 1056 ctlbuf = mp->msg_control; 1057 1058 while (m && len > 0) { 1059 unsigned int tocopy; 1060 1061 if (len >= m->m_len) 1062 tocopy = m->m_len; 1063 else { 1064 mp->msg_flags |= MSG_CTRUNC; 1065 tocopy = len; 1066 } 1067 1068 if ((error = copyout(mtod(m, caddr_t), 1069 ctlbuf, tocopy)) != 0) 1070 goto out; 1071 1072 ctlbuf += tocopy; 1073 len -= tocopy; 1074 m = m->m_next; 1075 } 1076 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1077 } 1078out: 1079 fdrop(fp, td); 1080#ifdef KTRACE 1081 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1082 ktrsockaddr(fromsa); 1083#endif 1084 if (fromsa) 1085 free(fromsa, M_SONAME); 1086 1087 if (error == 0 && controlp != NULL) 1088 *controlp = control; 1089 else if (control) 1090 m_freem(control); 1091 1092 return (error); 1093} 1094 1095static int 1096recvit(td, s, mp, namelenp) 1097 struct thread *td; 1098 int s; 1099 struct msghdr *mp; 1100 void *namelenp; 1101{ 1102 int error; 1103 1104 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1105 if (error) 1106 return (error); 1107 if (namelenp) { 1108 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1109#ifdef COMPAT_OLDSOCK 1110 if (mp->msg_flags & MSG_COMPAT) 1111 error = 0; /* old recvfrom didn't check */ 1112#endif 1113 } 1114 return (error); 1115} 1116 1117int 1118sys_recvfrom(td, uap) 1119 struct thread *td; 1120 struct recvfrom_args /* { 1121 int s; 1122 caddr_t buf; 1123 size_t len; 1124 int flags; 1125 struct sockaddr * __restrict from; 1126 socklen_t * __restrict fromlenaddr; 1127 } */ *uap; 1128{ 1129 struct msghdr msg; 1130 struct iovec aiov; 1131 int error; 1132 1133 if (uap->fromlenaddr) { 1134 error = copyin(uap->fromlenaddr, 1135 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1136 if (error) 1137 goto done2; 1138 } else { 1139 msg.msg_namelen = 0; 1140 } 1141 msg.msg_name = uap->from; 1142 msg.msg_iov = &aiov; 1143 msg.msg_iovlen = 1; 1144 aiov.iov_base = uap->buf; 1145 aiov.iov_len = uap->len; 1146 msg.msg_control = 0; 1147 msg.msg_flags = uap->flags; 1148 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1149done2: 1150 return(error); 1151} 1152 1153#ifdef COMPAT_OLDSOCK 1154int 1155orecvfrom(td, uap) 1156 struct thread *td; 1157 struct recvfrom_args *uap; 1158{ 1159 1160 uap->flags |= MSG_COMPAT; 1161 return (sys_recvfrom(td, uap)); 1162} 1163#endif 1164 1165#ifdef COMPAT_OLDSOCK 1166int 1167orecv(td, uap) 1168 struct thread *td; 1169 struct orecv_args /* { 1170 int s; 1171 caddr_t buf; 1172 int len; 1173 int flags; 1174 } */ *uap; 1175{ 1176 struct msghdr msg; 1177 struct iovec aiov; 1178 int error; 1179 1180 msg.msg_name = 0; 1181 msg.msg_namelen = 0; 1182 msg.msg_iov = &aiov; 1183 msg.msg_iovlen = 1; 1184 aiov.iov_base = uap->buf; 1185 aiov.iov_len = uap->len; 1186 msg.msg_control = 0; 1187 msg.msg_flags = uap->flags; 1188 error = recvit(td, uap->s, &msg, NULL); 1189 return (error); 1190} 1191 1192/* 1193 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1194 * overlays the new one, missing only the flags, and with the (old) access 1195 * rights where the control fields are now. 1196 */ 1197int 1198orecvmsg(td, uap) 1199 struct thread *td; 1200 struct orecvmsg_args /* { 1201 int s; 1202 struct omsghdr *msg; 1203 int flags; 1204 } */ *uap; 1205{ 1206 struct msghdr msg; 1207 struct iovec *iov; 1208 int error; 1209 1210 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1211 if (error) 1212 return (error); 1213 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1214 if (error) 1215 return (error); 1216 msg.msg_flags = uap->flags | MSG_COMPAT; 1217 msg.msg_iov = iov; 1218 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1219 if (msg.msg_controllen && error == 0) 1220 error = copyout(&msg.msg_controllen, 1221 &uap->msg->msg_accrightslen, sizeof (int)); 1222 free(iov, M_IOV); 1223 return (error); 1224} 1225#endif 1226 1227int 1228sys_recvmsg(td, uap) 1229 struct thread *td; 1230 struct recvmsg_args /* { 1231 int s; 1232 struct msghdr *msg; 1233 int flags; 1234 } */ *uap; 1235{ 1236 struct msghdr msg; 1237 struct iovec *uiov, *iov; 1238 int error; 1239 1240 error = copyin(uap->msg, &msg, sizeof (msg)); 1241 if (error) 1242 return (error); 1243 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1244 if (error) 1245 return (error); 1246 msg.msg_flags = uap->flags; 1247#ifdef COMPAT_OLDSOCK 1248 msg.msg_flags &= ~MSG_COMPAT; 1249#endif 1250 uiov = msg.msg_iov; 1251 msg.msg_iov = iov; 1252 error = recvit(td, uap->s, &msg, NULL); 1253 if (error == 0) { 1254 msg.msg_iov = uiov; 1255 error = copyout(&msg, uap->msg, sizeof(msg)); 1256 } 1257 free(iov, M_IOV); 1258 return (error); 1259} 1260 1261/* ARGSUSED */ 1262int 1263sys_shutdown(td, uap) 1264 struct thread *td; 1265 struct shutdown_args /* { 1266 int s; 1267 int how; 1268 } */ *uap; 1269{ 1270 struct socket *so; 1271 struct file *fp; 1272 int error; 1273 1274 AUDIT_ARG_FD(uap->s); 1275 error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp, 1276 NULL); 1277 if (error == 0) { 1278 so = fp->f_data; 1279 error = soshutdown(so, uap->how); 1280 fdrop(fp, td); 1281 } 1282 return (error); 1283} 1284 1285/* ARGSUSED */ 1286int 1287sys_setsockopt(td, uap) 1288 struct thread *td; 1289 struct setsockopt_args /* { 1290 int s; 1291 int level; 1292 int name; 1293 caddr_t val; 1294 int valsize; 1295 } */ *uap; 1296{ 1297 1298 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1299 uap->val, UIO_USERSPACE, uap->valsize)); 1300} 1301 1302int 1303kern_setsockopt(td, s, level, name, val, valseg, valsize) 1304 struct thread *td; 1305 int s; 1306 int level; 1307 int name; 1308 void *val; 1309 enum uio_seg valseg; 1310 socklen_t valsize; 1311{ 1312 int error; 1313 struct socket *so; 1314 struct file *fp; 1315 struct sockopt sopt; 1316 1317 if (val == NULL && valsize != 0) 1318 return (EFAULT); 1319 if ((int)valsize < 0) 1320 return (EINVAL); 1321 1322 sopt.sopt_dir = SOPT_SET; 1323 sopt.sopt_level = level; 1324 sopt.sopt_name = name; 1325 sopt.sopt_val = val; 1326 sopt.sopt_valsize = valsize; 1327 switch (valseg) { 1328 case UIO_USERSPACE: 1329 sopt.sopt_td = td; 1330 break; 1331 case UIO_SYSSPACE: 1332 sopt.sopt_td = NULL; 1333 break; 1334 default: 1335 panic("kern_setsockopt called with bad valseg"); 1336 } 1337 1338 AUDIT_ARG_FD(s); 1339 error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL); 1340 if (error == 0) { 1341 so = fp->f_data; 1342 error = sosetopt(so, &sopt); 1343 fdrop(fp, td); 1344 } 1345 return(error); 1346} 1347 1348/* ARGSUSED */ 1349int 1350sys_getsockopt(td, uap) 1351 struct thread *td; 1352 struct getsockopt_args /* { 1353 int s; 1354 int level; 1355 int name; 1356 void * __restrict val; 1357 socklen_t * __restrict avalsize; 1358 } */ *uap; 1359{ 1360 socklen_t valsize; 1361 int error; 1362 1363 if (uap->val) { 1364 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1365 if (error) 1366 return (error); 1367 } 1368 1369 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1370 uap->val, UIO_USERSPACE, &valsize); 1371 1372 if (error == 0) 1373 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1374 return (error); 1375} 1376 1377/* 1378 * Kernel version of getsockopt. 1379 * optval can be a userland or userspace. optlen is always a kernel pointer. 1380 */ 1381int 1382kern_getsockopt(td, s, level, name, val, valseg, valsize) 1383 struct thread *td; 1384 int s; 1385 int level; 1386 int name; 1387 void *val; 1388 enum uio_seg valseg; 1389 socklen_t *valsize; 1390{ 1391 int error; 1392 struct socket *so; 1393 struct file *fp; 1394 struct sockopt sopt; 1395 1396 if (val == NULL) 1397 *valsize = 0; 1398 if ((int)*valsize < 0) 1399 return (EINVAL); 1400 1401 sopt.sopt_dir = SOPT_GET; 1402 sopt.sopt_level = level; 1403 sopt.sopt_name = name; 1404 sopt.sopt_val = val; 1405 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1406 switch (valseg) { 1407 case UIO_USERSPACE: 1408 sopt.sopt_td = td; 1409 break; 1410 case UIO_SYSSPACE: 1411 sopt.sopt_td = NULL; 1412 break; 1413 default: 1414 panic("kern_getsockopt called with bad valseg"); 1415 } 1416 1417 AUDIT_ARG_FD(s); 1418 error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL); 1419 if (error == 0) { 1420 so = fp->f_data; 1421 error = sogetopt(so, &sopt); 1422 *valsize = sopt.sopt_valsize; 1423 fdrop(fp, td); 1424 } 1425 return (error); 1426} 1427 1428/* 1429 * getsockname1() - Get socket name. 1430 */ 1431/* ARGSUSED */ 1432static int 1433getsockname1(td, uap, compat) 1434 struct thread *td; 1435 struct getsockname_args /* { 1436 int fdes; 1437 struct sockaddr * __restrict asa; 1438 socklen_t * __restrict alen; 1439 } */ *uap; 1440 int compat; 1441{ 1442 struct sockaddr *sa; 1443 socklen_t len; 1444 int error; 1445 1446 error = copyin(uap->alen, &len, sizeof(len)); 1447 if (error) 1448 return (error); 1449 1450 error = kern_getsockname(td, uap->fdes, &sa, &len); 1451 if (error) 1452 return (error); 1453 1454 if (len != 0) { 1455#ifdef COMPAT_OLDSOCK 1456 if (compat) 1457 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1458#endif 1459 error = copyout(sa, uap->asa, (u_int)len); 1460 } 1461 free(sa, M_SONAME); 1462 if (error == 0) 1463 error = copyout(&len, uap->alen, sizeof(len)); 1464 return (error); 1465} 1466 1467int 1468kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1469 socklen_t *alen) 1470{ 1471 struct socket *so; 1472 struct file *fp; 1473 socklen_t len; 1474 int error; 1475 1476 if (*alen < 0) 1477 return (EINVAL); 1478 1479 AUDIT_ARG_FD(fd); 1480 error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL); 1481 if (error) 1482 return (error); 1483 so = fp->f_data; 1484 *sa = NULL; 1485 CURVNET_SET(so->so_vnet); 1486 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1487 CURVNET_RESTORE(); 1488 if (error) 1489 goto bad; 1490 if (*sa == NULL) 1491 len = 0; 1492 else 1493 len = MIN(*alen, (*sa)->sa_len); 1494 *alen = len; 1495#ifdef KTRACE 1496 if (KTRPOINT(td, KTR_STRUCT)) 1497 ktrsockaddr(*sa); 1498#endif 1499bad: 1500 fdrop(fp, td); 1501 if (error && *sa) { 1502 free(*sa, M_SONAME); 1503 *sa = NULL; 1504 } 1505 return (error); 1506} 1507 1508int 1509sys_getsockname(td, uap) 1510 struct thread *td; 1511 struct getsockname_args *uap; 1512{ 1513 1514 return (getsockname1(td, uap, 0)); 1515} 1516 1517#ifdef COMPAT_OLDSOCK 1518int 1519ogetsockname(td, uap) 1520 struct thread *td; 1521 struct getsockname_args *uap; 1522{ 1523 1524 return (getsockname1(td, uap, 1)); 1525} 1526#endif /* COMPAT_OLDSOCK */ 1527 1528/* 1529 * getpeername1() - Get name of peer for connected socket. 1530 */ 1531/* ARGSUSED */ 1532static int 1533getpeername1(td, uap, compat) 1534 struct thread *td; 1535 struct getpeername_args /* { 1536 int fdes; 1537 struct sockaddr * __restrict asa; 1538 socklen_t * __restrict alen; 1539 } */ *uap; 1540 int compat; 1541{ 1542 struct sockaddr *sa; 1543 socklen_t len; 1544 int error; 1545 1546 error = copyin(uap->alen, &len, sizeof (len)); 1547 if (error) 1548 return (error); 1549 1550 error = kern_getpeername(td, uap->fdes, &sa, &len); 1551 if (error) 1552 return (error); 1553 1554 if (len != 0) { 1555#ifdef COMPAT_OLDSOCK 1556 if (compat) 1557 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1558#endif 1559 error = copyout(sa, uap->asa, (u_int)len); 1560 } 1561 free(sa, M_SONAME); 1562 if (error == 0) 1563 error = copyout(&len, uap->alen, sizeof(len)); 1564 return (error); 1565} 1566 1567int 1568kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1569 socklen_t *alen) 1570{ 1571 struct socket *so; 1572 struct file *fp; 1573 socklen_t len; 1574 int error; 1575 1576 if (*alen < 0) 1577 return (EINVAL); 1578 1579 AUDIT_ARG_FD(fd); 1580 error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL); 1581 if (error) 1582 return (error); 1583 so = fp->f_data; 1584 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1585 error = ENOTCONN; 1586 goto done; 1587 } 1588 *sa = NULL; 1589 CURVNET_SET(so->so_vnet); 1590 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1591 CURVNET_RESTORE(); 1592 if (error) 1593 goto bad; 1594 if (*sa == NULL) 1595 len = 0; 1596 else 1597 len = MIN(*alen, (*sa)->sa_len); 1598 *alen = len; 1599#ifdef KTRACE 1600 if (KTRPOINT(td, KTR_STRUCT)) 1601 ktrsockaddr(*sa); 1602#endif 1603bad: 1604 if (error && *sa) { 1605 free(*sa, M_SONAME); 1606 *sa = NULL; 1607 } 1608done: 1609 fdrop(fp, td); 1610 return (error); 1611} 1612 1613int 1614sys_getpeername(td, uap) 1615 struct thread *td; 1616 struct getpeername_args *uap; 1617{ 1618 1619 return (getpeername1(td, uap, 0)); 1620} 1621 1622#ifdef COMPAT_OLDSOCK 1623int 1624ogetpeername(td, uap) 1625 struct thread *td; 1626 struct ogetpeername_args *uap; 1627{ 1628 1629 /* XXX uap should have type `getpeername_args *' to begin with. */ 1630 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1631} 1632#endif /* COMPAT_OLDSOCK */ 1633 1634int 1635sockargs(mp, buf, buflen, type) 1636 struct mbuf **mp; 1637 caddr_t buf; 1638 int buflen, type; 1639{ 1640 struct sockaddr *sa; 1641 struct mbuf *m; 1642 int error; 1643 1644 if ((u_int)buflen > MLEN) { 1645#ifdef COMPAT_OLDSOCK 1646 if (type == MT_SONAME && (u_int)buflen <= 112) 1647 buflen = MLEN; /* unix domain compat. hack */ 1648 else 1649#endif 1650 if ((u_int)buflen > MCLBYTES) 1651 return (EINVAL); 1652 } 1653 m = m_get(M_WAITOK, type); 1654 if ((u_int)buflen > MLEN) 1655 MCLGET(m, M_WAITOK); 1656 m->m_len = buflen; 1657 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1658 if (error) 1659 (void) m_free(m); 1660 else { 1661 *mp = m; 1662 if (type == MT_SONAME) { 1663 sa = mtod(m, struct sockaddr *); 1664 1665#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1666 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1667 sa->sa_family = sa->sa_len; 1668#endif 1669 sa->sa_len = buflen; 1670 } 1671 } 1672 return (error); 1673} 1674 1675int 1676getsockaddr(namp, uaddr, len) 1677 struct sockaddr **namp; 1678 caddr_t uaddr; 1679 size_t len; 1680{ 1681 struct sockaddr *sa; 1682 int error; 1683 1684 if (len > SOCK_MAXADDRLEN) 1685 return (ENAMETOOLONG); 1686 if (len < offsetof(struct sockaddr, sa_data[0])) 1687 return (EINVAL); 1688 sa = malloc(len, M_SONAME, M_WAITOK); 1689 error = copyin(uaddr, sa, len); 1690 if (error) { 1691 free(sa, M_SONAME); 1692 } else { 1693#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1694 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1695 sa->sa_family = sa->sa_len; 1696#endif 1697 sa->sa_len = len; 1698 *namp = sa; 1699 } 1700 return (error); 1701} 1702 1703#include <sys/condvar.h> 1704 1705struct sendfile_sync { 1706 struct mtx mtx; 1707 struct cv cv; 1708 unsigned count; 1709}; 1710 1711/* 1712 * Detach mapped page and release resources back to the system. 1713 */ 1714void 1715sf_buf_mext(void *addr, void *args) 1716{ 1717 vm_page_t m; 1718 struct sendfile_sync *sfs; 1719 1720 m = sf_buf_page(args); 1721 sf_buf_free(args); 1722 vm_page_lock(m); 1723 vm_page_unwire(m, 0); 1724 /* 1725 * Check for the object going away on us. This can 1726 * happen since we don't hold a reference to it. 1727 * If so, we're responsible for freeing the page. 1728 */ 1729 if (m->wire_count == 0 && m->object == NULL) 1730 vm_page_free(m); 1731 vm_page_unlock(m); 1732 if (addr == NULL) 1733 return; 1734 sfs = addr; 1735 mtx_lock(&sfs->mtx); 1736 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1737 if (--sfs->count == 0) 1738 cv_signal(&sfs->cv); 1739 mtx_unlock(&sfs->mtx); 1740} 1741 1742/* 1743 * sendfile(2) 1744 * 1745 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1746 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1747 * 1748 * Send a file specified by 'fd' and starting at 'offset' to a socket 1749 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1750 * 0. Optionally add a header and/or trailer to the socket output. If 1751 * specified, write the total number of bytes sent into *sbytes. 1752 */ 1753int 1754sys_sendfile(struct thread *td, struct sendfile_args *uap) 1755{ 1756 1757 return (do_sendfile(td, uap, 0)); 1758} 1759 1760static int 1761do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1762{ 1763 struct sf_hdtr hdtr; 1764 struct uio *hdr_uio, *trl_uio; 1765 int error; 1766 1767 hdr_uio = trl_uio = NULL; 1768 1769 if (uap->hdtr != NULL) { 1770 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 1771 if (error) 1772 goto out; 1773 if (hdtr.headers != NULL) { 1774 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 1775 if (error) 1776 goto out; 1777 } 1778 if (hdtr.trailers != NULL) { 1779 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 1780 if (error) 1781 goto out; 1782 1783 } 1784 } 1785 1786 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat); 1787out: 1788 if (hdr_uio) 1789 free(hdr_uio, M_IOV); 1790 if (trl_uio) 1791 free(trl_uio, M_IOV); 1792 return (error); 1793} 1794 1795#ifdef COMPAT_FREEBSD4 1796int 1797freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 1798{ 1799 struct sendfile_args args; 1800 1801 args.fd = uap->fd; 1802 args.s = uap->s; 1803 args.offset = uap->offset; 1804 args.nbytes = uap->nbytes; 1805 args.hdtr = uap->hdtr; 1806 args.sbytes = uap->sbytes; 1807 args.flags = uap->flags; 1808 1809 return (do_sendfile(td, &args, 1)); 1810} 1811#endif /* COMPAT_FREEBSD4 */ 1812 1813int 1814kern_sendfile(struct thread *td, struct sendfile_args *uap, 1815 struct uio *hdr_uio, struct uio *trl_uio, int compat) 1816{ 1817 struct file *sock_fp; 1818 struct vnode *vp; 1819 struct vm_object *obj = NULL; 1820 struct socket *so = NULL; 1821 struct mbuf *m = NULL; 1822 struct sf_buf *sf; 1823 struct vm_page *pg; 1824 off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; 1825 int error, hdrlen = 0, mnw = 0; 1826 struct sendfile_sync *sfs = NULL; 1827 1828 /* 1829 * The file descriptor must be a regular file and have a 1830 * backing VM object. 1831 * File offset must be positive. If it goes beyond EOF 1832 * we send only the header/trailer and no payload data. 1833 */ 1834 AUDIT_ARG_FD(uap->fd); 1835 /* 1836 * sendfile(2) can start at any offset within a file so we require 1837 * CAP_READ+CAP_SEEK = CAP_PREAD. 1838 */ 1839 if ((error = fgetvp_read(td, uap->fd, CAP_PREAD, &vp)) != 0) 1840 goto out; 1841 vn_lock(vp, LK_SHARED | LK_RETRY); 1842 if (vp->v_type == VREG) { 1843 obj = vp->v_object; 1844 if (obj != NULL) { 1845 /* 1846 * Temporarily increase the backing VM 1847 * object's reference count so that a forced 1848 * reclamation of its vnode does not 1849 * immediately destroy it. 1850 */ 1851 VM_OBJECT_LOCK(obj); 1852 if ((obj->flags & OBJ_DEAD) == 0) { 1853 vm_object_reference_locked(obj); 1854 VM_OBJECT_UNLOCK(obj); 1855 } else { 1856 VM_OBJECT_UNLOCK(obj); 1857 obj = NULL; 1858 } 1859 } 1860 } 1861 VOP_UNLOCK(vp, 0); 1862 if (obj == NULL) { 1863 error = EINVAL; 1864 goto out; 1865 } 1866 if (uap->offset < 0) { 1867 error = EINVAL; 1868 goto out; 1869 } 1870 1871 /* 1872 * The socket must be a stream socket and connected. 1873 * Remember if it a blocking or non-blocking socket. 1874 */ 1875 if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SEND, 1876 &sock_fp, NULL)) != 0) 1877 goto out; 1878 so = sock_fp->f_data; 1879 if (so->so_type != SOCK_STREAM) { 1880 error = EINVAL; 1881 goto out; 1882 } 1883 if ((so->so_state & SS_ISCONNECTED) == 0) { 1884 error = ENOTCONN; 1885 goto out; 1886 } 1887 /* 1888 * Do not wait on memory allocations but return ENOMEM for 1889 * caller to retry later. 1890 * XXX: Experimental. 1891 */ 1892 if (uap->flags & SF_MNOWAIT) 1893 mnw = 1; 1894 1895 if (uap->flags & SF_SYNC) { 1896 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); 1897 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); 1898 cv_init(&sfs->cv, "sendfile"); 1899 } 1900 1901#ifdef MAC 1902 error = mac_socket_check_send(td->td_ucred, so); 1903 if (error) 1904 goto out; 1905#endif 1906 1907 /* If headers are specified copy them into mbufs. */ 1908 if (hdr_uio != NULL) { 1909 hdr_uio->uio_td = td; 1910 hdr_uio->uio_rw = UIO_WRITE; 1911 if (hdr_uio->uio_resid > 0) { 1912 /* 1913 * In FBSD < 5.0 the nbytes to send also included 1914 * the header. If compat is specified subtract the 1915 * header size from nbytes. 1916 */ 1917 if (compat) { 1918 if (uap->nbytes > hdr_uio->uio_resid) 1919 uap->nbytes -= hdr_uio->uio_resid; 1920 else 1921 uap->nbytes = 0; 1922 } 1923 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 1924 0, 0, 0); 1925 if (m == NULL) { 1926 error = mnw ? EAGAIN : ENOBUFS; 1927 goto out; 1928 } 1929 hdrlen = m_length(m, NULL); 1930 } 1931 } 1932 1933 /* 1934 * Protect against multiple writers to the socket. 1935 * 1936 * XXXRW: Historically this has assumed non-interruptibility, so now 1937 * we implement that, but possibly shouldn't. 1938 */ 1939 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 1940 1941 /* 1942 * Loop through the pages of the file, starting with the requested 1943 * offset. Get a file page (do I/O if necessary), map the file page 1944 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 1945 * it on the socket. 1946 * This is done in two loops. The inner loop turns as many pages 1947 * as it can, up to available socket buffer space, without blocking 1948 * into mbufs to have it bulk delivered into the socket send buffer. 1949 * The outer loop checks the state and available space of the socket 1950 * and takes care of the overall progress. 1951 */ 1952 for (off = uap->offset, rem = uap->nbytes; ; ) { 1953 struct mbuf *mtail = NULL; 1954 int loopbytes = 0; 1955 int space = 0; 1956 int done = 0; 1957 1958 /* 1959 * Check the socket state for ongoing connection, 1960 * no errors and space in socket buffer. 1961 * If space is low allow for the remainder of the 1962 * file to be processed if it fits the socket buffer. 1963 * Otherwise block in waiting for sufficient space 1964 * to proceed, or if the socket is nonblocking, return 1965 * to userland with EAGAIN while reporting how far 1966 * we've come. 1967 * We wait until the socket buffer has significant free 1968 * space to do bulk sends. This makes good use of file 1969 * system read ahead and allows packet segmentation 1970 * offloading hardware to take over lots of work. If 1971 * we were not careful here we would send off only one 1972 * sfbuf at a time. 1973 */ 1974 SOCKBUF_LOCK(&so->so_snd); 1975 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 1976 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 1977retry_space: 1978 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 1979 error = EPIPE; 1980 SOCKBUF_UNLOCK(&so->so_snd); 1981 goto done; 1982 } else if (so->so_error) { 1983 error = so->so_error; 1984 so->so_error = 0; 1985 SOCKBUF_UNLOCK(&so->so_snd); 1986 goto done; 1987 } 1988 space = sbspace(&so->so_snd); 1989 if (space < rem && 1990 (space <= 0 || 1991 space < so->so_snd.sb_lowat)) { 1992 if (so->so_state & SS_NBIO) { 1993 SOCKBUF_UNLOCK(&so->so_snd); 1994 error = EAGAIN; 1995 goto done; 1996 } 1997 /* 1998 * sbwait drops the lock while sleeping. 1999 * When we loop back to retry_space the 2000 * state may have changed and we retest 2001 * for it. 2002 */ 2003 error = sbwait(&so->so_snd); 2004 /* 2005 * An error from sbwait usually indicates that we've 2006 * been interrupted by a signal. If we've sent anything 2007 * then return bytes sent, otherwise return the error. 2008 */ 2009 if (error) { 2010 SOCKBUF_UNLOCK(&so->so_snd); 2011 goto done; 2012 } 2013 goto retry_space; 2014 } 2015 SOCKBUF_UNLOCK(&so->so_snd); 2016 2017 /* 2018 * Reduce space in the socket buffer by the size of 2019 * the header mbuf chain. 2020 * hdrlen is set to 0 after the first loop. 2021 */ 2022 space -= hdrlen; 2023 2024 /* 2025 * Loop and construct maximum sized mbuf chain to be bulk 2026 * dumped into socket buffer. 2027 */ 2028 while (space > loopbytes) { 2029 vm_pindex_t pindex; 2030 vm_offset_t pgoff; 2031 struct mbuf *m0; 2032 2033 VM_OBJECT_LOCK(obj); 2034 /* 2035 * Calculate the amount to transfer. 2036 * Not to exceed a page, the EOF, 2037 * or the passed in nbytes. 2038 */ 2039 pgoff = (vm_offset_t)(off & PAGE_MASK); 2040 xfsize = omin(PAGE_SIZE - pgoff, 2041 obj->un_pager.vnp.vnp_size - uap->offset - 2042 fsbytes - loopbytes); 2043 if (uap->nbytes) 2044 rem = (uap->nbytes - fsbytes - loopbytes); 2045 else 2046 rem = obj->un_pager.vnp.vnp_size - 2047 uap->offset - fsbytes - loopbytes; 2048 xfsize = omin(rem, xfsize); 2049 xfsize = omin(space - loopbytes, xfsize); 2050 if (xfsize <= 0) { 2051 VM_OBJECT_UNLOCK(obj); 2052 done = 1; /* all data sent */ 2053 break; 2054 } 2055 2056 /* 2057 * Attempt to look up the page. Allocate 2058 * if not found or wait and loop if busy. 2059 */ 2060 pindex = OFF_TO_IDX(off); 2061 pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | 2062 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY); 2063 2064 /* 2065 * Check if page is valid for what we need, 2066 * otherwise initiate I/O. 2067 * If we already turned some pages into mbufs, 2068 * send them off before we come here again and 2069 * block. 2070 */ 2071 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) 2072 VM_OBJECT_UNLOCK(obj); 2073 else if (m != NULL) 2074 error = EAGAIN; /* send what we already got */ 2075 else if (uap->flags & SF_NODISKIO) 2076 error = EBUSY; 2077 else { 2078 int bsize; 2079 ssize_t resid; 2080 2081 /* 2082 * Ensure that our page is still around 2083 * when the I/O completes. 2084 */ 2085 vm_page_io_start(pg); 2086 VM_OBJECT_UNLOCK(obj); 2087 2088 /* 2089 * Get the page from backing store. 2090 */ 2091 error = vn_lock(vp, LK_SHARED); 2092 if (error != 0) 2093 goto after_read; 2094 bsize = vp->v_mount->mnt_stat.f_iosize; 2095 2096 /* 2097 * XXXMAC: Because we don't have fp->f_cred 2098 * here, we pass in NOCRED. This is probably 2099 * wrong, but is consistent with our original 2100 * implementation. 2101 */ 2102 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, 2103 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | 2104 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), 2105 td->td_ucred, NOCRED, &resid, td); 2106 VOP_UNLOCK(vp, 0); 2107 after_read: 2108 VM_OBJECT_LOCK(obj); 2109 vm_page_io_finish(pg); 2110 if (!error) 2111 VM_OBJECT_UNLOCK(obj); 2112 mbstat.sf_iocnt++; 2113 } 2114 if (error) { 2115 vm_page_lock(pg); 2116 vm_page_unwire(pg, 0); 2117 /* 2118 * See if anyone else might know about 2119 * this page. If not and it is not valid, 2120 * then free it. 2121 */ 2122 if (pg->wire_count == 0 && pg->valid == 0 && 2123 pg->busy == 0 && !(pg->oflags & VPO_BUSY)) 2124 vm_page_free(pg); 2125 vm_page_unlock(pg); 2126 VM_OBJECT_UNLOCK(obj); 2127 if (error == EAGAIN) 2128 error = 0; /* not a real error */ 2129 break; 2130 } 2131 2132 /* 2133 * Get a sendfile buf. When allocating the 2134 * first buffer for mbuf chain, we usually 2135 * wait as long as necessary, but this wait 2136 * can be interrupted. For consequent 2137 * buffers, do not sleep, since several 2138 * threads might exhaust the buffers and then 2139 * deadlock. 2140 */ 2141 sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : 2142 SFB_CATCH); 2143 if (sf == NULL) { 2144 mbstat.sf_allocfail++; 2145 vm_page_lock(pg); 2146 vm_page_unwire(pg, 0); 2147 KASSERT(pg->object != NULL, 2148 ("kern_sendfile: object disappeared")); 2149 vm_page_unlock(pg); 2150 if (m == NULL) 2151 error = (mnw ? EAGAIN : EINTR); 2152 break; 2153 } 2154 2155 /* 2156 * Get an mbuf and set it up as having 2157 * external storage. 2158 */ 2159 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2160 if (m0 == NULL) { 2161 error = (mnw ? EAGAIN : ENOBUFS); 2162 sf_buf_mext((void *)sf_buf_kva(sf), sf); 2163 break; 2164 } 2165 MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, 2166 sfs, sf, M_RDONLY, EXT_SFBUF); 2167 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2168 m0->m_len = xfsize; 2169 2170 /* Append to mbuf chain. */ 2171 if (mtail != NULL) 2172 mtail->m_next = m0; 2173 else if (m != NULL) 2174 m_last(m)->m_next = m0; 2175 else 2176 m = m0; 2177 mtail = m0; 2178 2179 /* Keep track of bits processed. */ 2180 loopbytes += xfsize; 2181 off += xfsize; 2182 2183 if (sfs != NULL) { 2184 mtx_lock(&sfs->mtx); 2185 sfs->count++; 2186 mtx_unlock(&sfs->mtx); 2187 } 2188 } 2189 2190 /* Add the buffer chain to the socket buffer. */ 2191 if (m != NULL) { 2192 int mlen, err; 2193 2194 mlen = m_length(m, NULL); 2195 SOCKBUF_LOCK(&so->so_snd); 2196 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2197 error = EPIPE; 2198 SOCKBUF_UNLOCK(&so->so_snd); 2199 goto done; 2200 } 2201 SOCKBUF_UNLOCK(&so->so_snd); 2202 CURVNET_SET(so->so_vnet); 2203 /* Avoid error aliasing. */ 2204 err = (*so->so_proto->pr_usrreqs->pru_send) 2205 (so, 0, m, NULL, NULL, td); 2206 CURVNET_RESTORE(); 2207 if (err == 0) { 2208 /* 2209 * We need two counters to get the 2210 * file offset and nbytes to send 2211 * right: 2212 * - sbytes contains the total amount 2213 * of bytes sent, including headers. 2214 * - fsbytes contains the total amount 2215 * of bytes sent from the file. 2216 */ 2217 sbytes += mlen; 2218 fsbytes += mlen; 2219 if (hdrlen) { 2220 fsbytes -= hdrlen; 2221 hdrlen = 0; 2222 } 2223 } else if (error == 0) 2224 error = err; 2225 m = NULL; /* pru_send always consumes */ 2226 } 2227 2228 /* Quit outer loop on error or when we're done. */ 2229 if (done) 2230 break; 2231 if (error) 2232 goto done; 2233 } 2234 2235 /* 2236 * Send trailers. Wimp out and use writev(2). 2237 */ 2238 if (trl_uio != NULL) { 2239 sbunlock(&so->so_snd); 2240 error = kern_writev(td, uap->s, trl_uio); 2241 if (error == 0) 2242 sbytes += td->td_retval[0]; 2243 goto out; 2244 } 2245 2246done: 2247 sbunlock(&so->so_snd); 2248out: 2249 /* 2250 * If there was no error we have to clear td->td_retval[0] 2251 * because it may have been set by writev. 2252 */ 2253 if (error == 0) { 2254 td->td_retval[0] = 0; 2255 } 2256 if (uap->sbytes != NULL) { 2257 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2258 } 2259 if (obj != NULL) 2260 vm_object_deallocate(obj); 2261 if (vp != NULL) 2262 vrele(vp); 2263 if (so) 2264 fdrop(sock_fp, td); 2265 if (m) 2266 m_freem(m); 2267 2268 if (sfs != NULL) { 2269 mtx_lock(&sfs->mtx); 2270 if (sfs->count != 0) 2271 cv_wait(&sfs->cv, &sfs->mtx); 2272 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 2273 cv_destroy(&sfs->cv); 2274 mtx_destroy(&sfs->mtx); 2275 free(sfs, M_TEMP); 2276 } 2277 2278 if (error == ERESTART) 2279 error = EINTR; 2280 2281 return (error); 2282} 2283 2284/* 2285 * SCTP syscalls. 2286 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2287 * otherwise all return EOPNOTSUPP. 2288 * XXX: We should make this loadable one day. 2289 */ 2290int 2291sys_sctp_peeloff(td, uap) 2292 struct thread *td; 2293 struct sctp_peeloff_args /* { 2294 int sd; 2295 caddr_t name; 2296 } */ *uap; 2297{ 2298#if (defined(INET) || defined(INET6)) && defined(SCTP) 2299 struct file *nfp = NULL; 2300 int error; 2301 struct socket *head, *so; 2302 int fd; 2303 u_int fflag; 2304 2305 AUDIT_ARG_FD(uap->sd); 2306 error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag); 2307 if (error) 2308 goto done2; 2309 if (head->so_proto->pr_protocol != IPPROTO_SCTP) { 2310 error = EOPNOTSUPP; 2311 goto done; 2312 } 2313 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2314 if (error) 2315 goto done; 2316 /* 2317 * At this point we know we do have a assoc to pull 2318 * we proceed to get the fd setup. This may block 2319 * but that is ok. 2320 */ 2321 2322 error = falloc(td, &nfp, &fd, 0); 2323 if (error) 2324 goto done; 2325 td->td_retval[0] = fd; 2326 2327 CURVNET_SET(head->so_vnet); 2328 so = sonewconn(head, SS_ISCONNECTED); 2329 if (so == NULL) 2330 goto noconnection; 2331 /* 2332 * Before changing the flags on the socket, we have to bump the 2333 * reference count. Otherwise, if the protocol calls sofree(), 2334 * the socket will be released due to a zero refcount. 2335 */ 2336 SOCK_LOCK(so); 2337 soref(so); /* file descriptor reference */ 2338 SOCK_UNLOCK(so); 2339 2340 ACCEPT_LOCK(); 2341 2342 TAILQ_REMOVE(&head->so_comp, so, so_list); 2343 head->so_qlen--; 2344 so->so_state |= (head->so_state & SS_NBIO); 2345 so->so_state &= ~SS_NOFDREF; 2346 so->so_qstate &= ~SQ_COMP; 2347 so->so_head = NULL; 2348 ACCEPT_UNLOCK(); 2349 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2350 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2351 if (error) 2352 goto noconnection; 2353 if (head->so_sigio != NULL) 2354 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2355 2356noconnection: 2357 /* 2358 * close the new descriptor, assuming someone hasn't ripped it 2359 * out from under us. 2360 */ 2361 if (error) 2362 fdclose(td->td_proc->p_fd, nfp, fd, td); 2363 2364 /* 2365 * Release explicitly held references before returning. 2366 */ 2367 CURVNET_RESTORE(); 2368done: 2369 if (nfp != NULL) 2370 fdrop(nfp, td); 2371 fputsock(head); 2372done2: 2373 return (error); 2374#else /* SCTP */ 2375 return (EOPNOTSUPP); 2376#endif /* SCTP */ 2377} 2378 2379int 2380sys_sctp_generic_sendmsg (td, uap) 2381 struct thread *td; 2382 struct sctp_generic_sendmsg_args /* { 2383 int sd, 2384 caddr_t msg, 2385 int mlen, 2386 caddr_t to, 2387 __socklen_t tolen, 2388 struct sctp_sndrcvinfo *sinfo, 2389 int flags 2390 } */ *uap; 2391{ 2392#if (defined(INET) || defined(INET6)) && defined(SCTP) 2393 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2394 struct socket *so; 2395 struct file *fp = NULL; 2396 int error = 0, len; 2397 struct sockaddr *to = NULL; 2398#ifdef KTRACE 2399 struct uio *ktruio = NULL; 2400#endif 2401 struct uio auio; 2402 struct iovec iov[1]; 2403 cap_rights_t rights; 2404 2405 if (uap->sinfo) { 2406 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2407 if (error) 2408 return (error); 2409 u_sinfo = &sinfo; 2410 } 2411 2412 rights = CAP_SEND; 2413 if (uap->tolen) { 2414 error = getsockaddr(&to, uap->to, uap->tolen); 2415 if (error) { 2416 to = NULL; 2417 goto sctp_bad2; 2418 } 2419 rights |= CAP_CONNECT; 2420 } 2421 2422 AUDIT_ARG_FD(uap->sd); 2423 error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL); 2424 if (error) 2425 goto sctp_bad; 2426#ifdef KTRACE 2427 if (to && (KTRPOINT(td, KTR_STRUCT))) 2428 ktrsockaddr(to); 2429#endif 2430 2431 iov[0].iov_base = uap->msg; 2432 iov[0].iov_len = uap->mlen; 2433 2434 so = (struct socket *)fp->f_data; 2435 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2436 error = EOPNOTSUPP; 2437 goto sctp_bad; 2438 } 2439#ifdef MAC 2440 error = mac_socket_check_send(td->td_ucred, so); 2441 if (error) 2442 goto sctp_bad; 2443#endif /* MAC */ 2444 2445 auio.uio_iov = iov; 2446 auio.uio_iovcnt = 1; 2447 auio.uio_segflg = UIO_USERSPACE; 2448 auio.uio_rw = UIO_WRITE; 2449 auio.uio_td = td; 2450 auio.uio_offset = 0; /* XXX */ 2451 auio.uio_resid = 0; 2452 len = auio.uio_resid = uap->mlen; 2453 CURVNET_SET(so->so_vnet); 2454 error = sctp_lower_sosend(so, to, &auio, 2455 (struct mbuf *)NULL, (struct mbuf *)NULL, 2456 uap->flags, u_sinfo, td); 2457 CURVNET_RESTORE(); 2458 if (error) { 2459 if (auio.uio_resid != len && (error == ERESTART || 2460 error == EINTR || error == EWOULDBLOCK)) 2461 error = 0; 2462 /* Generation of SIGPIPE can be controlled per socket. */ 2463 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2464 !(uap->flags & MSG_NOSIGNAL)) { 2465 PROC_LOCK(td->td_proc); 2466 tdsignal(td, SIGPIPE); 2467 PROC_UNLOCK(td->td_proc); 2468 } 2469 } 2470 if (error == 0) 2471 td->td_retval[0] = len - auio.uio_resid; 2472#ifdef KTRACE 2473 if (ktruio != NULL) { 2474 ktruio->uio_resid = td->td_retval[0]; 2475 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2476 } 2477#endif /* KTRACE */ 2478sctp_bad: 2479 if (fp) 2480 fdrop(fp, td); 2481sctp_bad2: 2482 if (to) 2483 free(to, M_SONAME); 2484 return (error); 2485#else /* SCTP */ 2486 return (EOPNOTSUPP); 2487#endif /* SCTP */ 2488} 2489 2490int 2491sys_sctp_generic_sendmsg_iov(td, uap) 2492 struct thread *td; 2493 struct sctp_generic_sendmsg_iov_args /* { 2494 int sd, 2495 struct iovec *iov, 2496 int iovlen, 2497 caddr_t to, 2498 __socklen_t tolen, 2499 struct sctp_sndrcvinfo *sinfo, 2500 int flags 2501 } */ *uap; 2502{ 2503#if (defined(INET) || defined(INET6)) && defined(SCTP) 2504 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2505 struct socket *so; 2506 struct file *fp = NULL; 2507 int error=0, i; 2508 ssize_t len; 2509 struct sockaddr *to = NULL; 2510#ifdef KTRACE 2511 struct uio *ktruio = NULL; 2512#endif 2513 struct uio auio; 2514 struct iovec *iov, *tiov; 2515 cap_rights_t rights; 2516 2517 if (uap->sinfo) { 2518 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2519 if (error) 2520 return (error); 2521 u_sinfo = &sinfo; 2522 } 2523 rights = CAP_SEND; 2524 if (uap->tolen) { 2525 error = getsockaddr(&to, uap->to, uap->tolen); 2526 if (error) { 2527 to = NULL; 2528 goto sctp_bad2; 2529 } 2530 rights |= CAP_CONNECT; 2531 } 2532 2533 AUDIT_ARG_FD(uap->sd); 2534 error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL); 2535 if (error) 2536 goto sctp_bad1; 2537 2538#ifdef COMPAT_FREEBSD32 2539 if (SV_CURPROC_FLAG(SV_ILP32)) 2540 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2541 uap->iovlen, &iov, EMSGSIZE); 2542 else 2543#endif 2544 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2545 if (error) 2546 goto sctp_bad1; 2547#ifdef KTRACE 2548 if (to && (KTRPOINT(td, KTR_STRUCT))) 2549 ktrsockaddr(to); 2550#endif 2551 2552 so = (struct socket *)fp->f_data; 2553 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2554 error = EOPNOTSUPP; 2555 goto sctp_bad; 2556 } 2557#ifdef MAC 2558 error = mac_socket_check_send(td->td_ucred, so); 2559 if (error) 2560 goto sctp_bad; 2561#endif /* MAC */ 2562 2563 auio.uio_iov = iov; 2564 auio.uio_iovcnt = uap->iovlen; 2565 auio.uio_segflg = UIO_USERSPACE; 2566 auio.uio_rw = UIO_WRITE; 2567 auio.uio_td = td; 2568 auio.uio_offset = 0; /* XXX */ 2569 auio.uio_resid = 0; 2570 tiov = iov; 2571 for (i = 0; i <uap->iovlen; i++, tiov++) { 2572 if ((auio.uio_resid += tiov->iov_len) < 0) { 2573 error = EINVAL; 2574 goto sctp_bad; 2575 } 2576 } 2577 len = auio.uio_resid; 2578 CURVNET_SET(so->so_vnet); 2579 error = sctp_lower_sosend(so, to, &auio, 2580 (struct mbuf *)NULL, (struct mbuf *)NULL, 2581 uap->flags, u_sinfo, td); 2582 CURVNET_RESTORE(); 2583 if (error) { 2584 if (auio.uio_resid != len && (error == ERESTART || 2585 error == EINTR || error == EWOULDBLOCK)) 2586 error = 0; 2587 /* Generation of SIGPIPE can be controlled per socket */ 2588 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2589 !(uap->flags & MSG_NOSIGNAL)) { 2590 PROC_LOCK(td->td_proc); 2591 tdsignal(td, SIGPIPE); 2592 PROC_UNLOCK(td->td_proc); 2593 } 2594 } 2595 if (error == 0) 2596 td->td_retval[0] = len - auio.uio_resid; 2597#ifdef KTRACE 2598 if (ktruio != NULL) { 2599 ktruio->uio_resid = td->td_retval[0]; 2600 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2601 } 2602#endif /* KTRACE */ 2603sctp_bad: 2604 free(iov, M_IOV); 2605sctp_bad1: 2606 if (fp) 2607 fdrop(fp, td); 2608sctp_bad2: 2609 if (to) 2610 free(to, M_SONAME); 2611 return (error); 2612#else /* SCTP */ 2613 return (EOPNOTSUPP); 2614#endif /* SCTP */ 2615} 2616 2617int 2618sys_sctp_generic_recvmsg(td, uap) 2619 struct thread *td; 2620 struct sctp_generic_recvmsg_args /* { 2621 int sd, 2622 struct iovec *iov, 2623 int iovlen, 2624 struct sockaddr *from, 2625 __socklen_t *fromlenaddr, 2626 struct sctp_sndrcvinfo *sinfo, 2627 int *msg_flags 2628 } */ *uap; 2629{ 2630#if (defined(INET) || defined(INET6)) && defined(SCTP) 2631 uint8_t sockbufstore[256]; 2632 struct uio auio; 2633 struct iovec *iov, *tiov; 2634 struct sctp_sndrcvinfo sinfo; 2635 struct socket *so; 2636 struct file *fp = NULL; 2637 struct sockaddr *fromsa; 2638 int fromlen; 2639 ssize_t len; 2640 int i, msg_flags; 2641 int error = 0; 2642#ifdef KTRACE 2643 struct uio *ktruio = NULL; 2644#endif 2645 2646 AUDIT_ARG_FD(uap->sd); 2647 error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_RECV, &fp, NULL); 2648 if (error) { 2649 return (error); 2650 } 2651#ifdef COMPAT_FREEBSD32 2652 if (SV_CURPROC_FLAG(SV_ILP32)) 2653 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2654 uap->iovlen, &iov, EMSGSIZE); 2655 else 2656#endif 2657 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2658 if (error) 2659 goto out1; 2660 2661 so = fp->f_data; 2662 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2663 error = EOPNOTSUPP; 2664 goto out; 2665 } 2666#ifdef MAC 2667 error = mac_socket_check_receive(td->td_ucred, so); 2668 if (error) { 2669 goto out; 2670 } 2671#endif /* MAC */ 2672 2673 if (uap->fromlenaddr) { 2674 error = copyin(uap->fromlenaddr, 2675 &fromlen, sizeof (fromlen)); 2676 if (error) { 2677 goto out; 2678 } 2679 } else { 2680 fromlen = 0; 2681 } 2682 if (uap->msg_flags) { 2683 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 2684 if (error) { 2685 goto out; 2686 } 2687 } else { 2688 msg_flags = 0; 2689 } 2690 auio.uio_iov = iov; 2691 auio.uio_iovcnt = uap->iovlen; 2692 auio.uio_segflg = UIO_USERSPACE; 2693 auio.uio_rw = UIO_READ; 2694 auio.uio_td = td; 2695 auio.uio_offset = 0; /* XXX */ 2696 auio.uio_resid = 0; 2697 tiov = iov; 2698 for (i = 0; i <uap->iovlen; i++, tiov++) { 2699 if ((auio.uio_resid += tiov->iov_len) < 0) { 2700 error = EINVAL; 2701 goto out; 2702 } 2703 } 2704 len = auio.uio_resid; 2705 fromsa = (struct sockaddr *)sockbufstore; 2706 2707#ifdef KTRACE 2708 if (KTRPOINT(td, KTR_GENIO)) 2709 ktruio = cloneuio(&auio); 2710#endif /* KTRACE */ 2711 memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo)); 2712 CURVNET_SET(so->so_vnet); 2713 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 2714 fromsa, fromlen, &msg_flags, 2715 (struct sctp_sndrcvinfo *)&sinfo, 1); 2716 CURVNET_RESTORE(); 2717 if (error) { 2718 if (auio.uio_resid != len && (error == ERESTART || 2719 error == EINTR || error == EWOULDBLOCK)) 2720 error = 0; 2721 } else { 2722 if (uap->sinfo) 2723 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 2724 } 2725#ifdef KTRACE 2726 if (ktruio != NULL) { 2727 ktruio->uio_resid = len - auio.uio_resid; 2728 ktrgenio(uap->sd, UIO_READ, ktruio, error); 2729 } 2730#endif /* KTRACE */ 2731 if (error) 2732 goto out; 2733 td->td_retval[0] = len - auio.uio_resid; 2734 2735 if (fromlen && uap->from) { 2736 len = fromlen; 2737 if (len <= 0 || fromsa == 0) 2738 len = 0; 2739 else { 2740 len = MIN(len, fromsa->sa_len); 2741 error = copyout(fromsa, uap->from, (size_t)len); 2742 if (error) 2743 goto out; 2744 } 2745 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 2746 if (error) { 2747 goto out; 2748 } 2749 } 2750#ifdef KTRACE 2751 if (KTRPOINT(td, KTR_STRUCT)) 2752 ktrsockaddr(fromsa); 2753#endif 2754 if (uap->msg_flags) { 2755 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 2756 if (error) { 2757 goto out; 2758 } 2759 } 2760out: 2761 free(iov, M_IOV); 2762out1: 2763 if (fp) 2764 fdrop(fp, td); 2765 2766 return (error); 2767#else /* SCTP */ 2768 return (EOPNOTSUPP); 2769#endif /* SCTP */ 2770} 2771