kern_sendfile.c revision 258788
1/*- 2 * Copyright (c) 1982, 1986, 1989, 1990, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * sendfile(2) and related extensions: 6 * Copyright (c) 1998, David Greenman. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 4. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD: head/sys/kern/uipc_syscalls.c 258788 2013-12-01 03:53:21Z adrian $"); 37 38#include "opt_capsicum.h" 39#include "opt_inet.h" 40#include "opt_inet6.h" 41#include "opt_sctp.h" 42#include "opt_compat.h" 43#include "opt_ktrace.h" 44 45#include <sys/param.h> 46#include <sys/systm.h> 47#include <sys/capability.h> 48#include <sys/condvar.h> 49#include <sys/kernel.h> 50#include <sys/lock.h> 51#include <sys/mutex.h> 52#include <sys/sysproto.h> 53#include <sys/malloc.h> 54#include <sys/filedesc.h> 55#include <sys/event.h> 56#include <sys/proc.h> 57#include <sys/fcntl.h> 58#include <sys/file.h> 59#include <sys/filio.h> 60#include <sys/jail.h> 61#include <sys/mman.h> 62#include <sys/mount.h> 63#include <sys/mbuf.h> 64#include <sys/protosw.h> 65#include <sys/rwlock.h> 66#include <sys/sf_buf.h> 67#include <sys/sf_sync.h> 68#include <sys/sysent.h> 69#include <sys/socket.h> 70#include <sys/socketvar.h> 71#include <sys/signalvar.h> 72#include <sys/syscallsubr.h> 73#include <sys/sysctl.h> 74#include <sys/uio.h> 75#include <sys/vnode.h> 76#ifdef KTRACE 77#include <sys/ktrace.h> 78#endif 79#ifdef COMPAT_FREEBSD32 80#include <compat/freebsd32/freebsd32_util.h> 81#endif 82 83#include <net/vnet.h> 84 85#include <security/audit/audit.h> 86#include <security/mac/mac_framework.h> 87 88#include <vm/vm.h> 89#include <vm/vm_param.h> 90#include <vm/vm_object.h> 91#include <vm/vm_page.h> 92#include <vm/vm_pager.h> 93#include <vm/vm_kern.h> 94#include <vm/vm_extern.h> 95 96#if defined(INET) || defined(INET6) 97#ifdef SCTP 98#include <netinet/sctp.h> 99#include <netinet/sctp_peeloff.h> 100#endif /* SCTP */ 101#endif /* INET || INET6 */ 102 103/* 104 * Flags for accept1() and kern_accept4(), in addition to SOCK_CLOEXEC 105 * and SOCK_NONBLOCK. 106 */ 107#define ACCEPT4_INHERIT 0x1 108#define ACCEPT4_COMPAT 0x2 109 110static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); 111static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); 112 113static int accept1(struct thread *td, int s, struct sockaddr *uname, 114 socklen_t *anamelen, int flags); 115static int do_sendfile(struct thread *td, struct sendfile_args *uap, 116 int compat); 117static int getsockname1(struct thread *td, struct getsockname_args *uap, 118 int compat); 119static int getpeername1(struct thread *td, struct getpeername_args *uap, 120 int compat); 121 122counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; 123 124/* 125 * sendfile(2)-related variables and associated sysctls 126 */ 127static SYSCTL_NODE(_kern_ipc, OID_AUTO, sendfile, CTLFLAG_RW, 0, 128 "sendfile(2) tunables"); 129static int sfreadahead = 1; 130SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW, 131 &sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks"); 132 133 134static void 135sfstat_init(const void *unused) 136{ 137 138 COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t), 139 M_WAITOK); 140} 141SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); 142 143static int 144sfstat_sysctl(SYSCTL_HANDLER_ARGS) 145{ 146 struct sfstat s; 147 148 COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t)); 149 if (req->newptr) 150 COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t)); 151 return (SYSCTL_OUT(req, &s, sizeof(s))); 152} 153SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW, 154 NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); 155 156/* 157 * Convert a user file descriptor to a kernel file entry and check if required 158 * capability rights are present. 159 * A reference on the file entry is held upon returning. 160 */ 161static int 162getsock_cap(struct filedesc *fdp, int fd, cap_rights_t *rightsp, 163 struct file **fpp, u_int *fflagp) 164{ 165 struct file *fp; 166 int error; 167 168 error = fget_unlocked(fdp, fd, rightsp, 0, &fp, NULL); 169 if (error != 0) 170 return (error); 171 if (fp->f_type != DTYPE_SOCKET) { 172 fdrop(fp, curthread); 173 return (ENOTSOCK); 174 } 175 if (fflagp != NULL) 176 *fflagp = fp->f_flag; 177 *fpp = fp; 178 return (0); 179} 180 181/* 182 * System call interface to the socket abstraction. 183 */ 184#if defined(COMPAT_43) 185#define COMPAT_OLDSOCK 186#endif 187 188int 189sys_socket(td, uap) 190 struct thread *td; 191 struct socket_args /* { 192 int domain; 193 int type; 194 int protocol; 195 } */ *uap; 196{ 197 struct socket *so; 198 struct file *fp; 199 int fd, error, type, oflag, fflag; 200 201 AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); 202 203 type = uap->type; 204 oflag = 0; 205 fflag = 0; 206 if ((type & SOCK_CLOEXEC) != 0) { 207 type &= ~SOCK_CLOEXEC; 208 oflag |= O_CLOEXEC; 209 } 210 if ((type & SOCK_NONBLOCK) != 0) { 211 type &= ~SOCK_NONBLOCK; 212 fflag |= FNONBLOCK; 213 } 214 215#ifdef MAC 216 error = mac_socket_check_create(td->td_ucred, uap->domain, type, 217 uap->protocol); 218 if (error != 0) 219 return (error); 220#endif 221 error = falloc(td, &fp, &fd, oflag); 222 if (error != 0) 223 return (error); 224 /* An extra reference on `fp' has been held for us by falloc(). */ 225 error = socreate(uap->domain, &so, type, uap->protocol, 226 td->td_ucred, td); 227 if (error != 0) { 228 fdclose(td->td_proc->p_fd, fp, fd, td); 229 } else { 230 finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops); 231 if ((fflag & FNONBLOCK) != 0) 232 (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td); 233 td->td_retval[0] = fd; 234 } 235 fdrop(fp, td); 236 return (error); 237} 238 239/* ARGSUSED */ 240int 241sys_bind(td, uap) 242 struct thread *td; 243 struct bind_args /* { 244 int s; 245 caddr_t name; 246 int namelen; 247 } */ *uap; 248{ 249 struct sockaddr *sa; 250 int error; 251 252 error = getsockaddr(&sa, uap->name, uap->namelen); 253 if (error == 0) { 254 error = kern_bind(td, uap->s, sa); 255 free(sa, M_SONAME); 256 } 257 return (error); 258} 259 260static int 261kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 262{ 263 struct socket *so; 264 struct file *fp; 265 cap_rights_t rights; 266 int error; 267 268 AUDIT_ARG_FD(fd); 269 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 270 error = getsock_cap(td->td_proc->p_fd, fd, 271 cap_rights_init(&rights, CAP_BIND), &fp, NULL); 272 if (error != 0) 273 return (error); 274 so = fp->f_data; 275#ifdef KTRACE 276 if (KTRPOINT(td, KTR_STRUCT)) 277 ktrsockaddr(sa); 278#endif 279#ifdef MAC 280 error = mac_socket_check_bind(td->td_ucred, so, sa); 281 if (error == 0) { 282#endif 283 if (dirfd == AT_FDCWD) 284 error = sobind(so, sa, td); 285 else 286 error = sobindat(dirfd, so, sa, td); 287#ifdef MAC 288 } 289#endif 290 fdrop(fp, td); 291 return (error); 292} 293 294int 295kern_bind(struct thread *td, int fd, struct sockaddr *sa) 296{ 297 298 return (kern_bindat(td, AT_FDCWD, fd, sa)); 299} 300 301/* ARGSUSED */ 302int 303sys_bindat(td, uap) 304 struct thread *td; 305 struct bindat_args /* { 306 int fd; 307 int s; 308 caddr_t name; 309 int namelen; 310 } */ *uap; 311{ 312 struct sockaddr *sa; 313 int error; 314 315 error = getsockaddr(&sa, uap->name, uap->namelen); 316 if (error == 0) { 317 error = kern_bindat(td, uap->fd, uap->s, sa); 318 free(sa, M_SONAME); 319 } 320 return (error); 321} 322 323/* ARGSUSED */ 324int 325sys_listen(td, uap) 326 struct thread *td; 327 struct listen_args /* { 328 int s; 329 int backlog; 330 } */ *uap; 331{ 332 struct socket *so; 333 struct file *fp; 334 cap_rights_t rights; 335 int error; 336 337 AUDIT_ARG_FD(uap->s); 338 error = getsock_cap(td->td_proc->p_fd, uap->s, 339 cap_rights_init(&rights, CAP_LISTEN), &fp, NULL); 340 if (error == 0) { 341 so = fp->f_data; 342#ifdef MAC 343 error = mac_socket_check_listen(td->td_ucred, so); 344 if (error == 0) 345#endif 346 error = solisten(so, uap->backlog, td); 347 fdrop(fp, td); 348 } 349 return(error); 350} 351 352/* 353 * accept1() 354 */ 355static int 356accept1(td, s, uname, anamelen, flags) 357 struct thread *td; 358 int s; 359 struct sockaddr *uname; 360 socklen_t *anamelen; 361 int flags; 362{ 363 struct sockaddr *name; 364 socklen_t namelen; 365 struct file *fp; 366 int error; 367 368 if (uname == NULL) 369 return (kern_accept4(td, s, NULL, NULL, flags, NULL)); 370 371 error = copyin(anamelen, &namelen, sizeof (namelen)); 372 if (error != 0) 373 return (error); 374 375 error = kern_accept4(td, s, &name, &namelen, flags, &fp); 376 377 /* 378 * return a namelen of zero for older code which might 379 * ignore the return value from accept. 380 */ 381 if (error != 0) { 382 (void) copyout(&namelen, anamelen, sizeof(*anamelen)); 383 return (error); 384 } 385 386 if (error == 0 && uname != NULL) { 387#ifdef COMPAT_OLDSOCK 388 if (flags & ACCEPT4_COMPAT) 389 ((struct osockaddr *)name)->sa_family = 390 name->sa_family; 391#endif 392 error = copyout(name, uname, namelen); 393 } 394 if (error == 0) 395 error = copyout(&namelen, anamelen, 396 sizeof(namelen)); 397 if (error != 0) 398 fdclose(td->td_proc->p_fd, fp, td->td_retval[0], td); 399 fdrop(fp, td); 400 free(name, M_SONAME); 401 return (error); 402} 403 404int 405kern_accept(struct thread *td, int s, struct sockaddr **name, 406 socklen_t *namelen, struct file **fp) 407{ 408 return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp)); 409} 410 411int 412kern_accept4(struct thread *td, int s, struct sockaddr **name, 413 socklen_t *namelen, int flags, struct file **fp) 414{ 415 struct filedesc *fdp; 416 struct file *headfp, *nfp = NULL; 417 struct sockaddr *sa = NULL; 418 struct socket *head, *so; 419 cap_rights_t rights; 420 u_int fflag; 421 pid_t pgid; 422 int error, fd, tmp; 423 424 if (name != NULL) 425 *name = NULL; 426 427 AUDIT_ARG_FD(s); 428 fdp = td->td_proc->p_fd; 429 error = getsock_cap(fdp, s, cap_rights_init(&rights, CAP_ACCEPT), 430 &headfp, &fflag); 431 if (error != 0) 432 return (error); 433 head = headfp->f_data; 434 if ((head->so_options & SO_ACCEPTCONN) == 0) { 435 error = EINVAL; 436 goto done; 437 } 438#ifdef MAC 439 error = mac_socket_check_accept(td->td_ucred, head); 440 if (error != 0) 441 goto done; 442#endif 443 error = falloc(td, &nfp, &fd, (flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0); 444 if (error != 0) 445 goto done; 446 ACCEPT_LOCK(); 447 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { 448 ACCEPT_UNLOCK(); 449 error = EWOULDBLOCK; 450 goto noconnection; 451 } 452 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 453 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) { 454 head->so_error = ECONNABORTED; 455 break; 456 } 457 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH, 458 "accept", 0); 459 if (error != 0) { 460 ACCEPT_UNLOCK(); 461 goto noconnection; 462 } 463 } 464 if (head->so_error) { 465 error = head->so_error; 466 head->so_error = 0; 467 ACCEPT_UNLOCK(); 468 goto noconnection; 469 } 470 so = TAILQ_FIRST(&head->so_comp); 471 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP")); 472 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP")); 473 474 /* 475 * Before changing the flags on the socket, we have to bump the 476 * reference count. Otherwise, if the protocol calls sofree(), 477 * the socket will be released due to a zero refcount. 478 */ 479 SOCK_LOCK(so); /* soref() and so_state update */ 480 soref(so); /* file descriptor reference */ 481 482 TAILQ_REMOVE(&head->so_comp, so, so_list); 483 head->so_qlen--; 484 if (flags & ACCEPT4_INHERIT) 485 so->so_state |= (head->so_state & SS_NBIO); 486 else 487 so->so_state |= (flags & SOCK_NONBLOCK) ? SS_NBIO : 0; 488 so->so_qstate &= ~SQ_COMP; 489 so->so_head = NULL; 490 491 SOCK_UNLOCK(so); 492 ACCEPT_UNLOCK(); 493 494 /* An extra reference on `nfp' has been held for us by falloc(). */ 495 td->td_retval[0] = fd; 496 497 /* connection has been removed from the listen queue */ 498 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0); 499 500 if (flags & ACCEPT4_INHERIT) { 501 pgid = fgetown(&head->so_sigio); 502 if (pgid != 0) 503 fsetown(pgid, &so->so_sigio); 504 } else { 505 fflag &= ~(FNONBLOCK | FASYNC); 506 if (flags & SOCK_NONBLOCK) 507 fflag |= FNONBLOCK; 508 } 509 510 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 511 /* Sync socket nonblocking/async state with file flags */ 512 tmp = fflag & FNONBLOCK; 513 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td); 514 tmp = fflag & FASYNC; 515 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); 516 sa = 0; 517 error = soaccept(so, &sa); 518 if (error != 0) { 519 /* 520 * return a namelen of zero for older code which might 521 * ignore the return value from accept. 522 */ 523 if (name) 524 *namelen = 0; 525 goto noconnection; 526 } 527 if (sa == NULL) { 528 if (name) 529 *namelen = 0; 530 goto done; 531 } 532 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa); 533 if (name) { 534 /* check sa_len before it is destroyed */ 535 if (*namelen > sa->sa_len) 536 *namelen = sa->sa_len; 537#ifdef KTRACE 538 if (KTRPOINT(td, KTR_STRUCT)) 539 ktrsockaddr(sa); 540#endif 541 *name = sa; 542 sa = NULL; 543 } 544noconnection: 545 free(sa, M_SONAME); 546 547 /* 548 * close the new descriptor, assuming someone hasn't ripped it 549 * out from under us. 550 */ 551 if (error != 0) 552 fdclose(fdp, nfp, fd, td); 553 554 /* 555 * Release explicitly held references before returning. We return 556 * a reference on nfp to the caller on success if they request it. 557 */ 558done: 559 if (fp != NULL) { 560 if (error == 0) { 561 *fp = nfp; 562 nfp = NULL; 563 } else 564 *fp = NULL; 565 } 566 if (nfp != NULL) 567 fdrop(nfp, td); 568 fdrop(headfp, td); 569 return (error); 570} 571 572int 573sys_accept(td, uap) 574 struct thread *td; 575 struct accept_args *uap; 576{ 577 578 return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT)); 579} 580 581int 582sys_accept4(td, uap) 583 struct thread *td; 584 struct accept4_args *uap; 585{ 586 587 if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) 588 return (EINVAL); 589 590 return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags)); 591} 592 593#ifdef COMPAT_OLDSOCK 594int 595oaccept(td, uap) 596 struct thread *td; 597 struct accept_args *uap; 598{ 599 600 return (accept1(td, uap->s, uap->name, uap->anamelen, 601 ACCEPT4_INHERIT | ACCEPT4_COMPAT)); 602} 603#endif /* COMPAT_OLDSOCK */ 604 605/* ARGSUSED */ 606int 607sys_connect(td, uap) 608 struct thread *td; 609 struct connect_args /* { 610 int s; 611 caddr_t name; 612 int namelen; 613 } */ *uap; 614{ 615 struct sockaddr *sa; 616 int error; 617 618 error = getsockaddr(&sa, uap->name, uap->namelen); 619 if (error == 0) { 620 error = kern_connect(td, uap->s, sa); 621 free(sa, M_SONAME); 622 } 623 return (error); 624} 625 626static int 627kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa) 628{ 629 struct socket *so; 630 struct file *fp; 631 cap_rights_t rights; 632 int error, interrupted = 0; 633 634 AUDIT_ARG_FD(fd); 635 AUDIT_ARG_SOCKADDR(td, dirfd, sa); 636 error = getsock_cap(td->td_proc->p_fd, fd, 637 cap_rights_init(&rights, CAP_CONNECT), &fp, NULL); 638 if (error != 0) 639 return (error); 640 so = fp->f_data; 641 if (so->so_state & SS_ISCONNECTING) { 642 error = EALREADY; 643 goto done1; 644 } 645#ifdef KTRACE 646 if (KTRPOINT(td, KTR_STRUCT)) 647 ktrsockaddr(sa); 648#endif 649#ifdef MAC 650 error = mac_socket_check_connect(td->td_ucred, so, sa); 651 if (error != 0) 652 goto bad; 653#endif 654 if (dirfd == AT_FDCWD) 655 error = soconnect(so, sa, td); 656 else 657 error = soconnectat(dirfd, so, sa, td); 658 if (error != 0) 659 goto bad; 660 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 661 error = EINPROGRESS; 662 goto done1; 663 } 664 SOCK_LOCK(so); 665 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 666 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH, 667 "connec", 0); 668 if (error != 0) { 669 if (error == EINTR || error == ERESTART) 670 interrupted = 1; 671 break; 672 } 673 } 674 if (error == 0) { 675 error = so->so_error; 676 so->so_error = 0; 677 } 678 SOCK_UNLOCK(so); 679bad: 680 if (!interrupted) 681 so->so_state &= ~SS_ISCONNECTING; 682 if (error == ERESTART) 683 error = EINTR; 684done1: 685 fdrop(fp, td); 686 return (error); 687} 688 689int 690kern_connect(struct thread *td, int fd, struct sockaddr *sa) 691{ 692 693 return (kern_connectat(td, AT_FDCWD, fd, sa)); 694} 695 696/* ARGSUSED */ 697int 698sys_connectat(td, uap) 699 struct thread *td; 700 struct connectat_args /* { 701 int fd; 702 int s; 703 caddr_t name; 704 int namelen; 705 } */ *uap; 706{ 707 struct sockaddr *sa; 708 int error; 709 710 error = getsockaddr(&sa, uap->name, uap->namelen); 711 if (error == 0) { 712 error = kern_connectat(td, uap->fd, uap->s, sa); 713 free(sa, M_SONAME); 714 } 715 return (error); 716} 717 718int 719kern_socketpair(struct thread *td, int domain, int type, int protocol, 720 int *rsv) 721{ 722 struct filedesc *fdp = td->td_proc->p_fd; 723 struct file *fp1, *fp2; 724 struct socket *so1, *so2; 725 int fd, error, oflag, fflag; 726 727 AUDIT_ARG_SOCKET(domain, type, protocol); 728 729 oflag = 0; 730 fflag = 0; 731 if ((type & SOCK_CLOEXEC) != 0) { 732 type &= ~SOCK_CLOEXEC; 733 oflag |= O_CLOEXEC; 734 } 735 if ((type & SOCK_NONBLOCK) != 0) { 736 type &= ~SOCK_NONBLOCK; 737 fflag |= FNONBLOCK; 738 } 739#ifdef MAC 740 /* We might want to have a separate check for socket pairs. */ 741 error = mac_socket_check_create(td->td_ucred, domain, type, 742 protocol); 743 if (error != 0) 744 return (error); 745#endif 746 error = socreate(domain, &so1, type, protocol, td->td_ucred, td); 747 if (error != 0) 748 return (error); 749 error = socreate(domain, &so2, type, protocol, td->td_ucred, td); 750 if (error != 0) 751 goto free1; 752 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ 753 error = falloc(td, &fp1, &fd, oflag); 754 if (error != 0) 755 goto free2; 756 rsv[0] = fd; 757 fp1->f_data = so1; /* so1 already has ref count */ 758 error = falloc(td, &fp2, &fd, oflag); 759 if (error != 0) 760 goto free3; 761 fp2->f_data = so2; /* so2 already has ref count */ 762 rsv[1] = fd; 763 error = soconnect2(so1, so2); 764 if (error != 0) 765 goto free4; 766 if (type == SOCK_DGRAM) { 767 /* 768 * Datagram socket connection is asymmetric. 769 */ 770 error = soconnect2(so2, so1); 771 if (error != 0) 772 goto free4; 773 } 774 finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data, 775 &socketops); 776 finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data, 777 &socketops); 778 if ((fflag & FNONBLOCK) != 0) { 779 (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td); 780 (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td); 781 } 782 fdrop(fp1, td); 783 fdrop(fp2, td); 784 return (0); 785free4: 786 fdclose(fdp, fp2, rsv[1], td); 787 fdrop(fp2, td); 788free3: 789 fdclose(fdp, fp1, rsv[0], td); 790 fdrop(fp1, td); 791free2: 792 if (so2 != NULL) 793 (void)soclose(so2); 794free1: 795 if (so1 != NULL) 796 (void)soclose(so1); 797 return (error); 798} 799 800int 801sys_socketpair(struct thread *td, struct socketpair_args *uap) 802{ 803 int error, sv[2]; 804 805 error = kern_socketpair(td, uap->domain, uap->type, 806 uap->protocol, sv); 807 if (error != 0) 808 return (error); 809 error = copyout(sv, uap->rsv, 2 * sizeof(int)); 810 if (error != 0) { 811 (void)kern_close(td, sv[0]); 812 (void)kern_close(td, sv[1]); 813 } 814 return (error); 815} 816 817static int 818sendit(td, s, mp, flags) 819 struct thread *td; 820 int s; 821 struct msghdr *mp; 822 int flags; 823{ 824 struct mbuf *control; 825 struct sockaddr *to; 826 int error; 827 828#ifdef CAPABILITY_MODE 829 if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL)) 830 return (ECAPMODE); 831#endif 832 833 if (mp->msg_name != NULL) { 834 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); 835 if (error != 0) { 836 to = NULL; 837 goto bad; 838 } 839 mp->msg_name = to; 840 } else { 841 to = NULL; 842 } 843 844 if (mp->msg_control) { 845 if (mp->msg_controllen < sizeof(struct cmsghdr) 846#ifdef COMPAT_OLDSOCK 847 && mp->msg_flags != MSG_COMPAT 848#endif 849 ) { 850 error = EINVAL; 851 goto bad; 852 } 853 error = sockargs(&control, mp->msg_control, 854 mp->msg_controllen, MT_CONTROL); 855 if (error != 0) 856 goto bad; 857#ifdef COMPAT_OLDSOCK 858 if (mp->msg_flags == MSG_COMPAT) { 859 struct cmsghdr *cm; 860 861 M_PREPEND(control, sizeof(*cm), M_WAITOK); 862 cm = mtod(control, struct cmsghdr *); 863 cm->cmsg_len = control->m_len; 864 cm->cmsg_level = SOL_SOCKET; 865 cm->cmsg_type = SCM_RIGHTS; 866 } 867#endif 868 } else { 869 control = NULL; 870 } 871 872 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE); 873 874bad: 875 free(to, M_SONAME); 876 return (error); 877} 878 879int 880kern_sendit(td, s, mp, flags, control, segflg) 881 struct thread *td; 882 int s; 883 struct msghdr *mp; 884 int flags; 885 struct mbuf *control; 886 enum uio_seg segflg; 887{ 888 struct file *fp; 889 struct uio auio; 890 struct iovec *iov; 891 struct socket *so; 892 cap_rights_t rights; 893#ifdef KTRACE 894 struct uio *ktruio = NULL; 895#endif 896 ssize_t len; 897 int i, error; 898 899 AUDIT_ARG_FD(s); 900 cap_rights_init(&rights, CAP_SEND); 901 if (mp->msg_name != NULL) { 902 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name); 903 cap_rights_set(&rights, CAP_CONNECT); 904 } 905 error = getsock_cap(td->td_proc->p_fd, s, &rights, &fp, NULL); 906 if (error != 0) 907 return (error); 908 so = (struct socket *)fp->f_data; 909 910#ifdef KTRACE 911 if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) 912 ktrsockaddr(mp->msg_name); 913#endif 914#ifdef MAC 915 if (mp->msg_name != NULL) { 916 error = mac_socket_check_connect(td->td_ucred, so, 917 mp->msg_name); 918 if (error != 0) 919 goto bad; 920 } 921 error = mac_socket_check_send(td->td_ucred, so); 922 if (error != 0) 923 goto bad; 924#endif 925 926 auio.uio_iov = mp->msg_iov; 927 auio.uio_iovcnt = mp->msg_iovlen; 928 auio.uio_segflg = segflg; 929 auio.uio_rw = UIO_WRITE; 930 auio.uio_td = td; 931 auio.uio_offset = 0; /* XXX */ 932 auio.uio_resid = 0; 933 iov = mp->msg_iov; 934 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 935 if ((auio.uio_resid += iov->iov_len) < 0) { 936 error = EINVAL; 937 goto bad; 938 } 939 } 940#ifdef KTRACE 941 if (KTRPOINT(td, KTR_GENIO)) 942 ktruio = cloneuio(&auio); 943#endif 944 len = auio.uio_resid; 945 error = sosend(so, mp->msg_name, &auio, 0, control, flags, td); 946 if (error != 0) { 947 if (auio.uio_resid != len && (error == ERESTART || 948 error == EINTR || error == EWOULDBLOCK)) 949 error = 0; 950 /* Generation of SIGPIPE can be controlled per socket */ 951 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 952 !(flags & MSG_NOSIGNAL)) { 953 PROC_LOCK(td->td_proc); 954 tdsignal(td, SIGPIPE); 955 PROC_UNLOCK(td->td_proc); 956 } 957 } 958 if (error == 0) 959 td->td_retval[0] = len - auio.uio_resid; 960#ifdef KTRACE 961 if (ktruio != NULL) { 962 ktruio->uio_resid = td->td_retval[0]; 963 ktrgenio(s, UIO_WRITE, ktruio, error); 964 } 965#endif 966bad: 967 fdrop(fp, td); 968 return (error); 969} 970 971int 972sys_sendto(td, uap) 973 struct thread *td; 974 struct sendto_args /* { 975 int s; 976 caddr_t buf; 977 size_t len; 978 int flags; 979 caddr_t to; 980 int tolen; 981 } */ *uap; 982{ 983 struct msghdr msg; 984 struct iovec aiov; 985 986 msg.msg_name = uap->to; 987 msg.msg_namelen = uap->tolen; 988 msg.msg_iov = &aiov; 989 msg.msg_iovlen = 1; 990 msg.msg_control = 0; 991#ifdef COMPAT_OLDSOCK 992 msg.msg_flags = 0; 993#endif 994 aiov.iov_base = uap->buf; 995 aiov.iov_len = uap->len; 996 return (sendit(td, uap->s, &msg, uap->flags)); 997} 998 999#ifdef COMPAT_OLDSOCK 1000int 1001osend(td, uap) 1002 struct thread *td; 1003 struct osend_args /* { 1004 int s; 1005 caddr_t buf; 1006 int len; 1007 int flags; 1008 } */ *uap; 1009{ 1010 struct msghdr msg; 1011 struct iovec aiov; 1012 1013 msg.msg_name = 0; 1014 msg.msg_namelen = 0; 1015 msg.msg_iov = &aiov; 1016 msg.msg_iovlen = 1; 1017 aiov.iov_base = uap->buf; 1018 aiov.iov_len = uap->len; 1019 msg.msg_control = 0; 1020 msg.msg_flags = 0; 1021 return (sendit(td, uap->s, &msg, uap->flags)); 1022} 1023 1024int 1025osendmsg(td, uap) 1026 struct thread *td; 1027 struct osendmsg_args /* { 1028 int s; 1029 caddr_t msg; 1030 int flags; 1031 } */ *uap; 1032{ 1033 struct msghdr msg; 1034 struct iovec *iov; 1035 int error; 1036 1037 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1038 if (error != 0) 1039 return (error); 1040 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1041 if (error != 0) 1042 return (error); 1043 msg.msg_iov = iov; 1044 msg.msg_flags = MSG_COMPAT; 1045 error = sendit(td, uap->s, &msg, uap->flags); 1046 free(iov, M_IOV); 1047 return (error); 1048} 1049#endif 1050 1051int 1052sys_sendmsg(td, uap) 1053 struct thread *td; 1054 struct sendmsg_args /* { 1055 int s; 1056 caddr_t msg; 1057 int flags; 1058 } */ *uap; 1059{ 1060 struct msghdr msg; 1061 struct iovec *iov; 1062 int error; 1063 1064 error = copyin(uap->msg, &msg, sizeof (msg)); 1065 if (error != 0) 1066 return (error); 1067 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1068 if (error != 0) 1069 return (error); 1070 msg.msg_iov = iov; 1071#ifdef COMPAT_OLDSOCK 1072 msg.msg_flags = 0; 1073#endif 1074 error = sendit(td, uap->s, &msg, uap->flags); 1075 free(iov, M_IOV); 1076 return (error); 1077} 1078 1079int 1080kern_recvit(td, s, mp, fromseg, controlp) 1081 struct thread *td; 1082 int s; 1083 struct msghdr *mp; 1084 enum uio_seg fromseg; 1085 struct mbuf **controlp; 1086{ 1087 struct uio auio; 1088 struct iovec *iov; 1089 struct mbuf *m, *control = NULL; 1090 caddr_t ctlbuf; 1091 struct file *fp; 1092 struct socket *so; 1093 struct sockaddr *fromsa = NULL; 1094 cap_rights_t rights; 1095#ifdef KTRACE 1096 struct uio *ktruio = NULL; 1097#endif 1098 ssize_t len; 1099 int error, i; 1100 1101 if (controlp != NULL) 1102 *controlp = NULL; 1103 1104 AUDIT_ARG_FD(s); 1105 error = getsock_cap(td->td_proc->p_fd, s, 1106 cap_rights_init(&rights, CAP_RECV), &fp, NULL); 1107 if (error != 0) 1108 return (error); 1109 so = fp->f_data; 1110 1111#ifdef MAC 1112 error = mac_socket_check_receive(td->td_ucred, so); 1113 if (error != 0) { 1114 fdrop(fp, td); 1115 return (error); 1116 } 1117#endif 1118 1119 auio.uio_iov = mp->msg_iov; 1120 auio.uio_iovcnt = mp->msg_iovlen; 1121 auio.uio_segflg = UIO_USERSPACE; 1122 auio.uio_rw = UIO_READ; 1123 auio.uio_td = td; 1124 auio.uio_offset = 0; /* XXX */ 1125 auio.uio_resid = 0; 1126 iov = mp->msg_iov; 1127 for (i = 0; i < mp->msg_iovlen; i++, iov++) { 1128 if ((auio.uio_resid += iov->iov_len) < 0) { 1129 fdrop(fp, td); 1130 return (EINVAL); 1131 } 1132 } 1133#ifdef KTRACE 1134 if (KTRPOINT(td, KTR_GENIO)) 1135 ktruio = cloneuio(&auio); 1136#endif 1137 len = auio.uio_resid; 1138 error = soreceive(so, &fromsa, &auio, NULL, 1139 (mp->msg_control || controlp) ? &control : NULL, 1140 &mp->msg_flags); 1141 if (error != 0) { 1142 if (auio.uio_resid != len && (error == ERESTART || 1143 error == EINTR || error == EWOULDBLOCK)) 1144 error = 0; 1145 } 1146 if (fromsa != NULL) 1147 AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa); 1148#ifdef KTRACE 1149 if (ktruio != NULL) { 1150 ktruio->uio_resid = len - auio.uio_resid; 1151 ktrgenio(s, UIO_READ, ktruio, error); 1152 } 1153#endif 1154 if (error != 0) 1155 goto out; 1156 td->td_retval[0] = len - auio.uio_resid; 1157 if (mp->msg_name) { 1158 len = mp->msg_namelen; 1159 if (len <= 0 || fromsa == NULL) 1160 len = 0; 1161 else { 1162 /* save sa_len before it is destroyed by MSG_COMPAT */ 1163 len = MIN(len, fromsa->sa_len); 1164#ifdef COMPAT_OLDSOCK 1165 if (mp->msg_flags & MSG_COMPAT) 1166 ((struct osockaddr *)fromsa)->sa_family = 1167 fromsa->sa_family; 1168#endif 1169 if (fromseg == UIO_USERSPACE) { 1170 error = copyout(fromsa, mp->msg_name, 1171 (unsigned)len); 1172 if (error != 0) 1173 goto out; 1174 } else 1175 bcopy(fromsa, mp->msg_name, len); 1176 } 1177 mp->msg_namelen = len; 1178 } 1179 if (mp->msg_control && controlp == NULL) { 1180#ifdef COMPAT_OLDSOCK 1181 /* 1182 * We assume that old recvmsg calls won't receive access 1183 * rights and other control info, esp. as control info 1184 * is always optional and those options didn't exist in 4.3. 1185 * If we receive rights, trim the cmsghdr; anything else 1186 * is tossed. 1187 */ 1188 if (control && mp->msg_flags & MSG_COMPAT) { 1189 if (mtod(control, struct cmsghdr *)->cmsg_level != 1190 SOL_SOCKET || 1191 mtod(control, struct cmsghdr *)->cmsg_type != 1192 SCM_RIGHTS) { 1193 mp->msg_controllen = 0; 1194 goto out; 1195 } 1196 control->m_len -= sizeof (struct cmsghdr); 1197 control->m_data += sizeof (struct cmsghdr); 1198 } 1199#endif 1200 len = mp->msg_controllen; 1201 m = control; 1202 mp->msg_controllen = 0; 1203 ctlbuf = mp->msg_control; 1204 1205 while (m && len > 0) { 1206 unsigned int tocopy; 1207 1208 if (len >= m->m_len) 1209 tocopy = m->m_len; 1210 else { 1211 mp->msg_flags |= MSG_CTRUNC; 1212 tocopy = len; 1213 } 1214 1215 if ((error = copyout(mtod(m, caddr_t), 1216 ctlbuf, tocopy)) != 0) 1217 goto out; 1218 1219 ctlbuf += tocopy; 1220 len -= tocopy; 1221 m = m->m_next; 1222 } 1223 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; 1224 } 1225out: 1226 fdrop(fp, td); 1227#ifdef KTRACE 1228 if (fromsa && KTRPOINT(td, KTR_STRUCT)) 1229 ktrsockaddr(fromsa); 1230#endif 1231 free(fromsa, M_SONAME); 1232 1233 if (error == 0 && controlp != NULL) 1234 *controlp = control; 1235 else if (control) 1236 m_freem(control); 1237 1238 return (error); 1239} 1240 1241static int 1242recvit(td, s, mp, namelenp) 1243 struct thread *td; 1244 int s; 1245 struct msghdr *mp; 1246 void *namelenp; 1247{ 1248 int error; 1249 1250 error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL); 1251 if (error != 0) 1252 return (error); 1253 if (namelenp != NULL) { 1254 error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t)); 1255#ifdef COMPAT_OLDSOCK 1256 if (mp->msg_flags & MSG_COMPAT) 1257 error = 0; /* old recvfrom didn't check */ 1258#endif 1259 } 1260 return (error); 1261} 1262 1263int 1264sys_recvfrom(td, uap) 1265 struct thread *td; 1266 struct recvfrom_args /* { 1267 int s; 1268 caddr_t buf; 1269 size_t len; 1270 int flags; 1271 struct sockaddr * __restrict from; 1272 socklen_t * __restrict fromlenaddr; 1273 } */ *uap; 1274{ 1275 struct msghdr msg; 1276 struct iovec aiov; 1277 int error; 1278 1279 if (uap->fromlenaddr) { 1280 error = copyin(uap->fromlenaddr, 1281 &msg.msg_namelen, sizeof (msg.msg_namelen)); 1282 if (error != 0) 1283 goto done2; 1284 } else { 1285 msg.msg_namelen = 0; 1286 } 1287 msg.msg_name = uap->from; 1288 msg.msg_iov = &aiov; 1289 msg.msg_iovlen = 1; 1290 aiov.iov_base = uap->buf; 1291 aiov.iov_len = uap->len; 1292 msg.msg_control = 0; 1293 msg.msg_flags = uap->flags; 1294 error = recvit(td, uap->s, &msg, uap->fromlenaddr); 1295done2: 1296 return (error); 1297} 1298 1299#ifdef COMPAT_OLDSOCK 1300int 1301orecvfrom(td, uap) 1302 struct thread *td; 1303 struct recvfrom_args *uap; 1304{ 1305 1306 uap->flags |= MSG_COMPAT; 1307 return (sys_recvfrom(td, uap)); 1308} 1309#endif 1310 1311#ifdef COMPAT_OLDSOCK 1312int 1313orecv(td, uap) 1314 struct thread *td; 1315 struct orecv_args /* { 1316 int s; 1317 caddr_t buf; 1318 int len; 1319 int flags; 1320 } */ *uap; 1321{ 1322 struct msghdr msg; 1323 struct iovec aiov; 1324 1325 msg.msg_name = 0; 1326 msg.msg_namelen = 0; 1327 msg.msg_iov = &aiov; 1328 msg.msg_iovlen = 1; 1329 aiov.iov_base = uap->buf; 1330 aiov.iov_len = uap->len; 1331 msg.msg_control = 0; 1332 msg.msg_flags = uap->flags; 1333 return (recvit(td, uap->s, &msg, NULL)); 1334} 1335 1336/* 1337 * Old recvmsg. This code takes advantage of the fact that the old msghdr 1338 * overlays the new one, missing only the flags, and with the (old) access 1339 * rights where the control fields are now. 1340 */ 1341int 1342orecvmsg(td, uap) 1343 struct thread *td; 1344 struct orecvmsg_args /* { 1345 int s; 1346 struct omsghdr *msg; 1347 int flags; 1348 } */ *uap; 1349{ 1350 struct msghdr msg; 1351 struct iovec *iov; 1352 int error; 1353 1354 error = copyin(uap->msg, &msg, sizeof (struct omsghdr)); 1355 if (error != 0) 1356 return (error); 1357 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1358 if (error != 0) 1359 return (error); 1360 msg.msg_flags = uap->flags | MSG_COMPAT; 1361 msg.msg_iov = iov; 1362 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen); 1363 if (msg.msg_controllen && error == 0) 1364 error = copyout(&msg.msg_controllen, 1365 &uap->msg->msg_accrightslen, sizeof (int)); 1366 free(iov, M_IOV); 1367 return (error); 1368} 1369#endif 1370 1371int 1372sys_recvmsg(td, uap) 1373 struct thread *td; 1374 struct recvmsg_args /* { 1375 int s; 1376 struct msghdr *msg; 1377 int flags; 1378 } */ *uap; 1379{ 1380 struct msghdr msg; 1381 struct iovec *uiov, *iov; 1382 int error; 1383 1384 error = copyin(uap->msg, &msg, sizeof (msg)); 1385 if (error != 0) 1386 return (error); 1387 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); 1388 if (error != 0) 1389 return (error); 1390 msg.msg_flags = uap->flags; 1391#ifdef COMPAT_OLDSOCK 1392 msg.msg_flags &= ~MSG_COMPAT; 1393#endif 1394 uiov = msg.msg_iov; 1395 msg.msg_iov = iov; 1396 error = recvit(td, uap->s, &msg, NULL); 1397 if (error == 0) { 1398 msg.msg_iov = uiov; 1399 error = copyout(&msg, uap->msg, sizeof(msg)); 1400 } 1401 free(iov, M_IOV); 1402 return (error); 1403} 1404 1405/* ARGSUSED */ 1406int 1407sys_shutdown(td, uap) 1408 struct thread *td; 1409 struct shutdown_args /* { 1410 int s; 1411 int how; 1412 } */ *uap; 1413{ 1414 struct socket *so; 1415 struct file *fp; 1416 cap_rights_t rights; 1417 int error; 1418 1419 AUDIT_ARG_FD(uap->s); 1420 error = getsock_cap(td->td_proc->p_fd, uap->s, 1421 cap_rights_init(&rights, CAP_SHUTDOWN), &fp, NULL); 1422 if (error == 0) { 1423 so = fp->f_data; 1424 error = soshutdown(so, uap->how); 1425 fdrop(fp, td); 1426 } 1427 return (error); 1428} 1429 1430/* ARGSUSED */ 1431int 1432sys_setsockopt(td, uap) 1433 struct thread *td; 1434 struct setsockopt_args /* { 1435 int s; 1436 int level; 1437 int name; 1438 caddr_t val; 1439 int valsize; 1440 } */ *uap; 1441{ 1442 1443 return (kern_setsockopt(td, uap->s, uap->level, uap->name, 1444 uap->val, UIO_USERSPACE, uap->valsize)); 1445} 1446 1447int 1448kern_setsockopt(td, s, level, name, val, valseg, valsize) 1449 struct thread *td; 1450 int s; 1451 int level; 1452 int name; 1453 void *val; 1454 enum uio_seg valseg; 1455 socklen_t valsize; 1456{ 1457 struct socket *so; 1458 struct file *fp; 1459 struct sockopt sopt; 1460 cap_rights_t rights; 1461 int error; 1462 1463 if (val == NULL && valsize != 0) 1464 return (EFAULT); 1465 if ((int)valsize < 0) 1466 return (EINVAL); 1467 1468 sopt.sopt_dir = SOPT_SET; 1469 sopt.sopt_level = level; 1470 sopt.sopt_name = name; 1471 sopt.sopt_val = val; 1472 sopt.sopt_valsize = valsize; 1473 switch (valseg) { 1474 case UIO_USERSPACE: 1475 sopt.sopt_td = td; 1476 break; 1477 case UIO_SYSSPACE: 1478 sopt.sopt_td = NULL; 1479 break; 1480 default: 1481 panic("kern_setsockopt called with bad valseg"); 1482 } 1483 1484 AUDIT_ARG_FD(s); 1485 error = getsock_cap(td->td_proc->p_fd, s, 1486 cap_rights_init(&rights, CAP_SETSOCKOPT), &fp, NULL); 1487 if (error == 0) { 1488 so = fp->f_data; 1489 error = sosetopt(so, &sopt); 1490 fdrop(fp, td); 1491 } 1492 return(error); 1493} 1494 1495/* ARGSUSED */ 1496int 1497sys_getsockopt(td, uap) 1498 struct thread *td; 1499 struct getsockopt_args /* { 1500 int s; 1501 int level; 1502 int name; 1503 void * __restrict val; 1504 socklen_t * __restrict avalsize; 1505 } */ *uap; 1506{ 1507 socklen_t valsize; 1508 int error; 1509 1510 if (uap->val) { 1511 error = copyin(uap->avalsize, &valsize, sizeof (valsize)); 1512 if (error != 0) 1513 return (error); 1514 } 1515 1516 error = kern_getsockopt(td, uap->s, uap->level, uap->name, 1517 uap->val, UIO_USERSPACE, &valsize); 1518 1519 if (error == 0) 1520 error = copyout(&valsize, uap->avalsize, sizeof (valsize)); 1521 return (error); 1522} 1523 1524/* 1525 * Kernel version of getsockopt. 1526 * optval can be a userland or userspace. optlen is always a kernel pointer. 1527 */ 1528int 1529kern_getsockopt(td, s, level, name, val, valseg, valsize) 1530 struct thread *td; 1531 int s; 1532 int level; 1533 int name; 1534 void *val; 1535 enum uio_seg valseg; 1536 socklen_t *valsize; 1537{ 1538 struct socket *so; 1539 struct file *fp; 1540 struct sockopt sopt; 1541 cap_rights_t rights; 1542 int error; 1543 1544 if (val == NULL) 1545 *valsize = 0; 1546 if ((int)*valsize < 0) 1547 return (EINVAL); 1548 1549 sopt.sopt_dir = SOPT_GET; 1550 sopt.sopt_level = level; 1551 sopt.sopt_name = name; 1552 sopt.sopt_val = val; 1553 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */ 1554 switch (valseg) { 1555 case UIO_USERSPACE: 1556 sopt.sopt_td = td; 1557 break; 1558 case UIO_SYSSPACE: 1559 sopt.sopt_td = NULL; 1560 break; 1561 default: 1562 panic("kern_getsockopt called with bad valseg"); 1563 } 1564 1565 AUDIT_ARG_FD(s); 1566 error = getsock_cap(td->td_proc->p_fd, s, 1567 cap_rights_init(&rights, CAP_GETSOCKOPT), &fp, NULL); 1568 if (error == 0) { 1569 so = fp->f_data; 1570 error = sogetopt(so, &sopt); 1571 *valsize = sopt.sopt_valsize; 1572 fdrop(fp, td); 1573 } 1574 return (error); 1575} 1576 1577/* 1578 * getsockname1() - Get socket name. 1579 */ 1580/* ARGSUSED */ 1581static int 1582getsockname1(td, uap, compat) 1583 struct thread *td; 1584 struct getsockname_args /* { 1585 int fdes; 1586 struct sockaddr * __restrict asa; 1587 socklen_t * __restrict alen; 1588 } */ *uap; 1589 int compat; 1590{ 1591 struct sockaddr *sa; 1592 socklen_t len; 1593 int error; 1594 1595 error = copyin(uap->alen, &len, sizeof(len)); 1596 if (error != 0) 1597 return (error); 1598 1599 error = kern_getsockname(td, uap->fdes, &sa, &len); 1600 if (error != 0) 1601 return (error); 1602 1603 if (len != 0) { 1604#ifdef COMPAT_OLDSOCK 1605 if (compat) 1606 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1607#endif 1608 error = copyout(sa, uap->asa, (u_int)len); 1609 } 1610 free(sa, M_SONAME); 1611 if (error == 0) 1612 error = copyout(&len, uap->alen, sizeof(len)); 1613 return (error); 1614} 1615 1616int 1617kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, 1618 socklen_t *alen) 1619{ 1620 struct socket *so; 1621 struct file *fp; 1622 cap_rights_t rights; 1623 socklen_t len; 1624 int error; 1625 1626 AUDIT_ARG_FD(fd); 1627 error = getsock_cap(td->td_proc->p_fd, fd, 1628 cap_rights_init(&rights, CAP_GETSOCKNAME), &fp, NULL); 1629 if (error != 0) 1630 return (error); 1631 so = fp->f_data; 1632 *sa = NULL; 1633 CURVNET_SET(so->so_vnet); 1634 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa); 1635 CURVNET_RESTORE(); 1636 if (error != 0) 1637 goto bad; 1638 if (*sa == NULL) 1639 len = 0; 1640 else 1641 len = MIN(*alen, (*sa)->sa_len); 1642 *alen = len; 1643#ifdef KTRACE 1644 if (KTRPOINT(td, KTR_STRUCT)) 1645 ktrsockaddr(*sa); 1646#endif 1647bad: 1648 fdrop(fp, td); 1649 if (error != 0 && *sa != NULL) { 1650 free(*sa, M_SONAME); 1651 *sa = NULL; 1652 } 1653 return (error); 1654} 1655 1656int 1657sys_getsockname(td, uap) 1658 struct thread *td; 1659 struct getsockname_args *uap; 1660{ 1661 1662 return (getsockname1(td, uap, 0)); 1663} 1664 1665#ifdef COMPAT_OLDSOCK 1666int 1667ogetsockname(td, uap) 1668 struct thread *td; 1669 struct getsockname_args *uap; 1670{ 1671 1672 return (getsockname1(td, uap, 1)); 1673} 1674#endif /* COMPAT_OLDSOCK */ 1675 1676/* 1677 * getpeername1() - Get name of peer for connected socket. 1678 */ 1679/* ARGSUSED */ 1680static int 1681getpeername1(td, uap, compat) 1682 struct thread *td; 1683 struct getpeername_args /* { 1684 int fdes; 1685 struct sockaddr * __restrict asa; 1686 socklen_t * __restrict alen; 1687 } */ *uap; 1688 int compat; 1689{ 1690 struct sockaddr *sa; 1691 socklen_t len; 1692 int error; 1693 1694 error = copyin(uap->alen, &len, sizeof (len)); 1695 if (error != 0) 1696 return (error); 1697 1698 error = kern_getpeername(td, uap->fdes, &sa, &len); 1699 if (error != 0) 1700 return (error); 1701 1702 if (len != 0) { 1703#ifdef COMPAT_OLDSOCK 1704 if (compat) 1705 ((struct osockaddr *)sa)->sa_family = sa->sa_family; 1706#endif 1707 error = copyout(sa, uap->asa, (u_int)len); 1708 } 1709 free(sa, M_SONAME); 1710 if (error == 0) 1711 error = copyout(&len, uap->alen, sizeof(len)); 1712 return (error); 1713} 1714 1715int 1716kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, 1717 socklen_t *alen) 1718{ 1719 struct socket *so; 1720 struct file *fp; 1721 cap_rights_t rights; 1722 socklen_t len; 1723 int error; 1724 1725 AUDIT_ARG_FD(fd); 1726 error = getsock_cap(td->td_proc->p_fd, fd, 1727 cap_rights_init(&rights, CAP_GETPEERNAME), &fp, NULL); 1728 if (error != 0) 1729 return (error); 1730 so = fp->f_data; 1731 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1732 error = ENOTCONN; 1733 goto done; 1734 } 1735 *sa = NULL; 1736 CURVNET_SET(so->so_vnet); 1737 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa); 1738 CURVNET_RESTORE(); 1739 if (error != 0) 1740 goto bad; 1741 if (*sa == NULL) 1742 len = 0; 1743 else 1744 len = MIN(*alen, (*sa)->sa_len); 1745 *alen = len; 1746#ifdef KTRACE 1747 if (KTRPOINT(td, KTR_STRUCT)) 1748 ktrsockaddr(*sa); 1749#endif 1750bad: 1751 if (error != 0 && *sa != NULL) { 1752 free(*sa, M_SONAME); 1753 *sa = NULL; 1754 } 1755done: 1756 fdrop(fp, td); 1757 return (error); 1758} 1759 1760int 1761sys_getpeername(td, uap) 1762 struct thread *td; 1763 struct getpeername_args *uap; 1764{ 1765 1766 return (getpeername1(td, uap, 0)); 1767} 1768 1769#ifdef COMPAT_OLDSOCK 1770int 1771ogetpeername(td, uap) 1772 struct thread *td; 1773 struct ogetpeername_args *uap; 1774{ 1775 1776 /* XXX uap should have type `getpeername_args *' to begin with. */ 1777 return (getpeername1(td, (struct getpeername_args *)uap, 1)); 1778} 1779#endif /* COMPAT_OLDSOCK */ 1780 1781int 1782sockargs(mp, buf, buflen, type) 1783 struct mbuf **mp; 1784 caddr_t buf; 1785 int buflen, type; 1786{ 1787 struct sockaddr *sa; 1788 struct mbuf *m; 1789 int error; 1790 1791 if (buflen > MLEN) { 1792#ifdef COMPAT_OLDSOCK 1793 if (type == MT_SONAME && buflen <= 112) 1794 buflen = MLEN; /* unix domain compat. hack */ 1795 else 1796#endif 1797 if (buflen > MCLBYTES) 1798 return (EINVAL); 1799 } 1800 m = m_get2(buflen, M_WAITOK, type, 0); 1801 m->m_len = buflen; 1802 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen); 1803 if (error != 0) 1804 (void) m_free(m); 1805 else { 1806 *mp = m; 1807 if (type == MT_SONAME) { 1808 sa = mtod(m, struct sockaddr *); 1809 1810#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1811 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1812 sa->sa_family = sa->sa_len; 1813#endif 1814 sa->sa_len = buflen; 1815 } 1816 } 1817 return (error); 1818} 1819 1820int 1821getsockaddr(namp, uaddr, len) 1822 struct sockaddr **namp; 1823 caddr_t uaddr; 1824 size_t len; 1825{ 1826 struct sockaddr *sa; 1827 int error; 1828 1829 if (len > SOCK_MAXADDRLEN) 1830 return (ENAMETOOLONG); 1831 if (len < offsetof(struct sockaddr, sa_data[0])) 1832 return (EINVAL); 1833 sa = malloc(len, M_SONAME, M_WAITOK); 1834 error = copyin(uaddr, sa, len); 1835 if (error != 0) { 1836 free(sa, M_SONAME); 1837 } else { 1838#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN 1839 if (sa->sa_family == 0 && sa->sa_len < AF_MAX) 1840 sa->sa_family = sa->sa_len; 1841#endif 1842 sa->sa_len = len; 1843 *namp = sa; 1844 } 1845 return (error); 1846} 1847 1848/* 1849 * Detach mapped page and release resources back to the system. 1850 */ 1851int 1852sf_buf_mext(struct mbuf *mb, void *addr, void *args) 1853{ 1854 vm_page_t m; 1855 struct sendfile_sync *sfs; 1856 1857 m = sf_buf_page(args); 1858 sf_buf_free(args); 1859 vm_page_lock(m); 1860 vm_page_unwire(m, 0); 1861 /* 1862 * Check for the object going away on us. This can 1863 * happen since we don't hold a reference to it. 1864 * If so, we're responsible for freeing the page. 1865 */ 1866 if (m->wire_count == 0 && m->object == NULL) 1867 vm_page_free(m); 1868 vm_page_unlock(m); 1869 if (addr != NULL) { 1870 sfs = addr; 1871 sf_sync_deref(sfs); 1872 } 1873 return (EXT_FREE_OK); 1874} 1875 1876void 1877sf_sync_deref(struct sendfile_sync *sfs) 1878{ 1879 1880 if (sfs == NULL) 1881 return; 1882 1883 mtx_lock(&sfs->mtx); 1884 KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); 1885 if (--sfs->count == 0) 1886 cv_signal(&sfs->cv); 1887 mtx_unlock(&sfs->mtx); 1888} 1889 1890/* 1891 * Allocate a sendfile_sync state structure. 1892 * 1893 * For now this only knows about the "sleep" sync, but later it will 1894 * grow various other personalities. 1895 */ 1896struct sendfile_sync * 1897sf_sync_alloc(uint32_t flags) 1898{ 1899 struct sendfile_sync *sfs; 1900 1901 sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); 1902 mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); 1903 cv_init(&sfs->cv, "sendfile"); 1904 sfs->flags = flags; 1905 1906 return (sfs); 1907} 1908 1909/* 1910 * Take a reference to a sfsync instance. 1911 * 1912 * This has to map 1:1 to free calls coming in via sf_buf_mext(), 1913 * so typically this will be referenced once for each mbuf allocated. 1914 */ 1915void 1916sf_sync_ref(struct sendfile_sync *sfs) 1917{ 1918 1919 if (sfs == NULL) 1920 return; 1921 1922 mtx_lock(&sfs->mtx); 1923 sfs->count++; 1924 mtx_unlock(&sfs->mtx); 1925} 1926 1927void 1928sf_sync_syscall_wait(struct sendfile_sync *sfs) 1929{ 1930 1931 if (sfs == NULL) 1932 return; 1933 1934 mtx_lock(&sfs->mtx); 1935 if (sfs->count != 0) 1936 cv_wait(&sfs->cv, &sfs->mtx); 1937 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 1938 mtx_unlock(&sfs->mtx); 1939} 1940 1941void 1942sf_sync_free(struct sendfile_sync *sfs) 1943{ 1944 1945 if (sfs == NULL) 1946 return; 1947 1948 /* 1949 * XXX we should ensure that nothing else has this 1950 * locked before freeing. 1951 */ 1952 mtx_lock(&sfs->mtx); 1953 KASSERT(sfs->count == 0, ("sendfile sync still busy")); 1954 cv_destroy(&sfs->cv); 1955 mtx_destroy(&sfs->mtx); 1956 free(sfs, M_TEMP); 1957} 1958 1959/* 1960 * sendfile(2) 1961 * 1962 * int sendfile(int fd, int s, off_t offset, size_t nbytes, 1963 * struct sf_hdtr *hdtr, off_t *sbytes, int flags) 1964 * 1965 * Send a file specified by 'fd' and starting at 'offset' to a socket 1966 * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == 1967 * 0. Optionally add a header and/or trailer to the socket output. If 1968 * specified, write the total number of bytes sent into *sbytes. 1969 */ 1970int 1971sys_sendfile(struct thread *td, struct sendfile_args *uap) 1972{ 1973 1974 return (do_sendfile(td, uap, 0)); 1975} 1976 1977static int 1978do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) 1979{ 1980 struct sf_hdtr hdtr; 1981 struct uio *hdr_uio, *trl_uio; 1982 struct file *fp; 1983 cap_rights_t rights; 1984 int error; 1985 off_t sbytes; 1986 struct sendfile_sync *sfs; 1987 1988 /* 1989 * File offset must be positive. If it goes beyond EOF 1990 * we send only the header/trailer and no payload data. 1991 */ 1992 if (uap->offset < 0) 1993 return (EINVAL); 1994 1995 hdr_uio = trl_uio = NULL; 1996 sfs = NULL; 1997 1998 if (uap->hdtr != NULL) { 1999 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); 2000 if (error != 0) 2001 goto out; 2002 if (hdtr.headers != NULL) { 2003 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); 2004 if (error != 0) 2005 goto out; 2006 } 2007 if (hdtr.trailers != NULL) { 2008 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); 2009 if (error != 0) 2010 goto out; 2011 2012 } 2013 } 2014 2015 AUDIT_ARG_FD(uap->fd); 2016 2017 /* 2018 * sendfile(2) can start at any offset within a file so we require 2019 * CAP_READ+CAP_SEEK = CAP_PREAD. 2020 */ 2021 if ((error = fget_read(td, uap->fd, 2022 cap_rights_init(&rights, CAP_PREAD), &fp)) != 0) { 2023 goto out; 2024 } 2025 2026 /* 2027 * If we need to wait for completion, initialise the sfsync 2028 * state here. 2029 */ 2030 if (uap->flags & SF_SYNC) 2031 sfs = sf_sync_alloc(uap->flags & SF_SYNC); 2032 2033 error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, 2034 uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, sfs, td); 2035 2036 /* 2037 * If appropriate, do the wait and free here. 2038 */ 2039 if (sfs != NULL) { 2040 sf_sync_syscall_wait(sfs); 2041 sf_sync_free(sfs); 2042 } 2043 2044 /* 2045 * XXX Should we wait until the send has completed before freeing the source 2046 * file handle? It's the previous behaviour, sure, but is it required? 2047 * We've wired down the page references after all. 2048 */ 2049 fdrop(fp, td); 2050 2051 if (uap->sbytes != NULL) { 2052 copyout(&sbytes, uap->sbytes, sizeof(off_t)); 2053 } 2054out: 2055 free(hdr_uio, M_IOV); 2056 free(trl_uio, M_IOV); 2057 return (error); 2058} 2059 2060#ifdef COMPAT_FREEBSD4 2061int 2062freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) 2063{ 2064 struct sendfile_args args; 2065 2066 args.fd = uap->fd; 2067 args.s = uap->s; 2068 args.offset = uap->offset; 2069 args.nbytes = uap->nbytes; 2070 args.hdtr = uap->hdtr; 2071 args.sbytes = uap->sbytes; 2072 args.flags = uap->flags; 2073 2074 return (do_sendfile(td, &args, 1)); 2075} 2076#endif /* COMPAT_FREEBSD4 */ 2077 2078static int 2079sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd, 2080 off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res) 2081{ 2082 vm_page_t m; 2083 vm_pindex_t pindex; 2084 ssize_t resid; 2085 int error, readahead, rv; 2086 2087 pindex = OFF_TO_IDX(off); 2088 VM_OBJECT_WLOCK(obj); 2089 m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY | 2090 VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL); 2091 2092 /* 2093 * Check if page is valid for what we need, otherwise initiate I/O. 2094 * 2095 * The non-zero nd argument prevents disk I/O, instead we 2096 * return the caller what he specified in nd. In particular, 2097 * if we already turned some pages into mbufs, nd == EAGAIN 2098 * and the main function send them the pages before we come 2099 * here again and block. 2100 */ 2101 if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) { 2102 if (vp == NULL) 2103 vm_page_xunbusy(m); 2104 VM_OBJECT_WUNLOCK(obj); 2105 *res = m; 2106 return (0); 2107 } else if (nd != 0) { 2108 if (vp == NULL) 2109 vm_page_xunbusy(m); 2110 error = nd; 2111 goto free_page; 2112 } 2113 2114 /* 2115 * Get the page from backing store. 2116 */ 2117 error = 0; 2118 if (vp != NULL) { 2119 VM_OBJECT_WUNLOCK(obj); 2120 readahead = sfreadahead * MAXBSIZE; 2121 2122 /* 2123 * Use vn_rdwr() instead of the pager interface for 2124 * the vnode, to allow the read-ahead. 2125 * 2126 * XXXMAC: Because we don't have fp->f_cred here, we 2127 * pass in NOCRED. This is probably wrong, but is 2128 * consistent with our original implementation. 2129 */ 2130 error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off), 2131 UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead / 2132 bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); 2133 SFSTAT_INC(sf_iocnt); 2134 VM_OBJECT_WLOCK(obj); 2135 } else { 2136 if (vm_pager_has_page(obj, pindex, NULL, NULL)) { 2137 rv = vm_pager_get_pages(obj, &m, 1, 0); 2138 SFSTAT_INC(sf_iocnt); 2139 m = vm_page_lookup(obj, pindex); 2140 if (m == NULL) 2141 error = EIO; 2142 else if (rv != VM_PAGER_OK) { 2143 vm_page_lock(m); 2144 vm_page_free(m); 2145 vm_page_unlock(m); 2146 m = NULL; 2147 error = EIO; 2148 } 2149 } else { 2150 pmap_zero_page(m); 2151 m->valid = VM_PAGE_BITS_ALL; 2152 m->dirty = 0; 2153 } 2154 if (m != NULL) 2155 vm_page_xunbusy(m); 2156 } 2157 if (error == 0) { 2158 *res = m; 2159 } else if (m != NULL) { 2160free_page: 2161 vm_page_lock(m); 2162 vm_page_unwire(m, 0); 2163 2164 /* 2165 * See if anyone else might know about this page. If 2166 * not and it is not valid, then free it. 2167 */ 2168 if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m)) 2169 vm_page_free(m); 2170 vm_page_unlock(m); 2171 } 2172 KASSERT(error != 0 || (m->wire_count > 0 && 2173 vm_page_is_valid(m, off & PAGE_MASK, xfsize)), 2174 ("wrong page state m %p off %#jx xfsize %d", m, (uintmax_t)off, 2175 xfsize)); 2176 VM_OBJECT_WUNLOCK(obj); 2177 return (error); 2178} 2179 2180static int 2181sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res, 2182 struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size, 2183 int *bsize) 2184{ 2185 struct vattr va; 2186 vm_object_t obj; 2187 struct vnode *vp; 2188 struct shmfd *shmfd; 2189 int error; 2190 2191 vp = *vp_res = NULL; 2192 obj = NULL; 2193 shmfd = *shmfd_res = NULL; 2194 *bsize = 0; 2195 2196 /* 2197 * The file descriptor must be a regular file and have a 2198 * backing VM object. 2199 */ 2200 if (fp->f_type == DTYPE_VNODE) { 2201 vp = fp->f_vnode; 2202 vn_lock(vp, LK_SHARED | LK_RETRY); 2203 if (vp->v_type != VREG) { 2204 error = EINVAL; 2205 goto out; 2206 } 2207 *bsize = vp->v_mount->mnt_stat.f_iosize; 2208 error = VOP_GETATTR(vp, &va, td->td_ucred); 2209 if (error != 0) 2210 goto out; 2211 *obj_size = va.va_size; 2212 obj = vp->v_object; 2213 if (obj == NULL) { 2214 error = EINVAL; 2215 goto out; 2216 } 2217 } else if (fp->f_type == DTYPE_SHM) { 2218 shmfd = fp->f_data; 2219 obj = shmfd->shm_object; 2220 *obj_size = shmfd->shm_size; 2221 } else { 2222 error = EINVAL; 2223 goto out; 2224 } 2225 2226 VM_OBJECT_WLOCK(obj); 2227 if ((obj->flags & OBJ_DEAD) != 0) { 2228 VM_OBJECT_WUNLOCK(obj); 2229 error = EBADF; 2230 goto out; 2231 } 2232 2233 /* 2234 * Temporarily increase the backing VM object's reference 2235 * count so that a forced reclamation of its vnode does not 2236 * immediately destroy it. 2237 */ 2238 vm_object_reference_locked(obj); 2239 VM_OBJECT_WUNLOCK(obj); 2240 *obj_res = obj; 2241 *vp_res = vp; 2242 *shmfd_res = shmfd; 2243 2244out: 2245 if (vp != NULL) 2246 VOP_UNLOCK(vp, 0); 2247 return (error); 2248} 2249 2250static int 2251kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp, 2252 struct socket **so) 2253{ 2254 cap_rights_t rights; 2255 int error; 2256 2257 *sock_fp = NULL; 2258 *so = NULL; 2259 2260 /* 2261 * The socket must be a stream socket and connected. 2262 */ 2263 error = getsock_cap(td->td_proc->p_fd, s, cap_rights_init(&rights, 2264 CAP_SEND), sock_fp, NULL); 2265 if (error != 0) 2266 return (error); 2267 *so = (*sock_fp)->f_data; 2268 if ((*so)->so_type != SOCK_STREAM) 2269 return (EINVAL); 2270 if (((*so)->so_state & SS_ISCONNECTED) == 0) 2271 return (ENOTCONN); 2272 return (0); 2273} 2274 2275int 2276vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, 2277 struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, 2278 int kflags, struct sendfile_sync *sfs, struct thread *td) 2279{ 2280 struct file *sock_fp; 2281 struct vnode *vp; 2282 struct vm_object *obj; 2283 struct socket *so; 2284 struct mbuf *m; 2285 struct sf_buf *sf; 2286 struct vm_page *pg; 2287 struct shmfd *shmfd; 2288 struct vattr va; 2289 off_t off, xfsize, fsbytes, sbytes, rem, obj_size; 2290 int error, bsize, nd, hdrlen, mnw; 2291 bool inflight_called; 2292 2293 pg = NULL; 2294 obj = NULL; 2295 so = NULL; 2296 m = NULL; 2297 fsbytes = sbytes = 0; 2298 hdrlen = mnw = 0; 2299 rem = nbytes; 2300 obj_size = 0; 2301 inflight_called = false; 2302 2303 error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize); 2304 if (error != 0) 2305 return (error); 2306 if (rem == 0) 2307 rem = obj_size; 2308 2309 error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so); 2310 if (error != 0) 2311 goto out; 2312 2313 /* 2314 * Do not wait on memory allocations but return ENOMEM for 2315 * caller to retry later. 2316 * XXX: Experimental. 2317 */ 2318 if (flags & SF_MNOWAIT) 2319 mnw = 1; 2320 2321#ifdef MAC 2322 error = mac_socket_check_send(td->td_ucred, so); 2323 if (error != 0) 2324 goto out; 2325#endif 2326 2327 /* If headers are specified copy them into mbufs. */ 2328 if (hdr_uio != NULL) { 2329 hdr_uio->uio_td = td; 2330 hdr_uio->uio_rw = UIO_WRITE; 2331 if (hdr_uio->uio_resid > 0) { 2332 /* 2333 * In FBSD < 5.0 the nbytes to send also included 2334 * the header. If compat is specified subtract the 2335 * header size from nbytes. 2336 */ 2337 if (kflags & SFK_COMPAT) { 2338 if (nbytes > hdr_uio->uio_resid) 2339 nbytes -= hdr_uio->uio_resid; 2340 else 2341 nbytes = 0; 2342 } 2343 m = m_uiotombuf(hdr_uio, (mnw ? M_NOWAIT : M_WAITOK), 2344 0, 0, 0); 2345 if (m == NULL) { 2346 error = mnw ? EAGAIN : ENOBUFS; 2347 goto out; 2348 } 2349 hdrlen = m_length(m, NULL); 2350 } 2351 } 2352 2353 /* 2354 * Protect against multiple writers to the socket. 2355 * 2356 * XXXRW: Historically this has assumed non-interruptibility, so now 2357 * we implement that, but possibly shouldn't. 2358 */ 2359 (void)sblock(&so->so_snd, SBL_WAIT | SBL_NOINTR); 2360 2361 /* 2362 * Loop through the pages of the file, starting with the requested 2363 * offset. Get a file page (do I/O if necessary), map the file page 2364 * into an sf_buf, attach an mbuf header to the sf_buf, and queue 2365 * it on the socket. 2366 * This is done in two loops. The inner loop turns as many pages 2367 * as it can, up to available socket buffer space, without blocking 2368 * into mbufs to have it bulk delivered into the socket send buffer. 2369 * The outer loop checks the state and available space of the socket 2370 * and takes care of the overall progress. 2371 */ 2372 for (off = offset; ; ) { 2373 struct mbuf *mtail; 2374 int loopbytes; 2375 int space; 2376 int done; 2377 2378 if ((nbytes != 0 && nbytes == fsbytes) || 2379 (nbytes == 0 && obj_size == fsbytes)) 2380 break; 2381 2382 mtail = NULL; 2383 loopbytes = 0; 2384 space = 0; 2385 done = 0; 2386 2387 /* 2388 * Check the socket state for ongoing connection, 2389 * no errors and space in socket buffer. 2390 * If space is low allow for the remainder of the 2391 * file to be processed if it fits the socket buffer. 2392 * Otherwise block in waiting for sufficient space 2393 * to proceed, or if the socket is nonblocking, return 2394 * to userland with EAGAIN while reporting how far 2395 * we've come. 2396 * We wait until the socket buffer has significant free 2397 * space to do bulk sends. This makes good use of file 2398 * system read ahead and allows packet segmentation 2399 * offloading hardware to take over lots of work. If 2400 * we were not careful here we would send off only one 2401 * sfbuf at a time. 2402 */ 2403 SOCKBUF_LOCK(&so->so_snd); 2404 if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) 2405 so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; 2406retry_space: 2407 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2408 error = EPIPE; 2409 SOCKBUF_UNLOCK(&so->so_snd); 2410 goto done; 2411 } else if (so->so_error) { 2412 error = so->so_error; 2413 so->so_error = 0; 2414 SOCKBUF_UNLOCK(&so->so_snd); 2415 goto done; 2416 } 2417 space = sbspace(&so->so_snd); 2418 if (space < rem && 2419 (space <= 0 || 2420 space < so->so_snd.sb_lowat)) { 2421 if (so->so_state & SS_NBIO) { 2422 SOCKBUF_UNLOCK(&so->so_snd); 2423 error = EAGAIN; 2424 goto done; 2425 } 2426 /* 2427 * sbwait drops the lock while sleeping. 2428 * When we loop back to retry_space the 2429 * state may have changed and we retest 2430 * for it. 2431 */ 2432 error = sbwait(&so->so_snd); 2433 /* 2434 * An error from sbwait usually indicates that we've 2435 * been interrupted by a signal. If we've sent anything 2436 * then return bytes sent, otherwise return the error. 2437 */ 2438 if (error != 0) { 2439 SOCKBUF_UNLOCK(&so->so_snd); 2440 goto done; 2441 } 2442 goto retry_space; 2443 } 2444 SOCKBUF_UNLOCK(&so->so_snd); 2445 2446 /* 2447 * Reduce space in the socket buffer by the size of 2448 * the header mbuf chain. 2449 * hdrlen is set to 0 after the first loop. 2450 */ 2451 space -= hdrlen; 2452 2453 if (vp != NULL) { 2454 error = vn_lock(vp, LK_SHARED); 2455 if (error != 0) 2456 goto done; 2457 error = VOP_GETATTR(vp, &va, td->td_ucred); 2458 if (error != 0 || off >= va.va_size) { 2459 VOP_UNLOCK(vp, 0); 2460 goto done; 2461 } 2462 obj_size = va.va_size; 2463 } 2464 2465 /* 2466 * Loop and construct maximum sized mbuf chain to be bulk 2467 * dumped into socket buffer. 2468 */ 2469 while (space > loopbytes) { 2470 vm_offset_t pgoff; 2471 struct mbuf *m0; 2472 2473 /* 2474 * Calculate the amount to transfer. 2475 * Not to exceed a page, the EOF, 2476 * or the passed in nbytes. 2477 */ 2478 pgoff = (vm_offset_t)(off & PAGE_MASK); 2479 rem = obj_size - offset; 2480 if (nbytes != 0) 2481 rem = omin(rem, nbytes); 2482 rem -= fsbytes + loopbytes; 2483 xfsize = omin(PAGE_SIZE - pgoff, rem); 2484 xfsize = omin(space - loopbytes, xfsize); 2485 if (xfsize <= 0) { 2486 done = 1; /* all data sent */ 2487 break; 2488 } 2489 2490 /* 2491 * Attempt to look up the page. Allocate 2492 * if not found or wait and loop if busy. 2493 */ 2494 if (m != NULL) 2495 nd = EAGAIN; /* send what we already got */ 2496 else if ((flags & SF_NODISKIO) != 0) 2497 nd = EBUSY; 2498 else 2499 nd = 0; 2500 error = sendfile_readpage(obj, vp, nd, off, 2501 xfsize, bsize, td, &pg); 2502 if (error != 0) { 2503 if (error == EAGAIN) 2504 error = 0; /* not a real error */ 2505 break; 2506 } 2507 2508 /* 2509 * Get a sendfile buf. When allocating the 2510 * first buffer for mbuf chain, we usually 2511 * wait as long as necessary, but this wait 2512 * can be interrupted. For consequent 2513 * buffers, do not sleep, since several 2514 * threads might exhaust the buffers and then 2515 * deadlock. 2516 */ 2517 sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : 2518 SFB_CATCH); 2519 if (sf == NULL) { 2520 SFSTAT_INC(sf_allocfail); 2521 vm_page_lock(pg); 2522 vm_page_unwire(pg, 0); 2523 KASSERT(pg->object != NULL, 2524 ("%s: object disappeared", __func__)); 2525 vm_page_unlock(pg); 2526 if (m == NULL) 2527 error = (mnw ? EAGAIN : EINTR); 2528 break; 2529 } 2530 2531 /* 2532 * Get an mbuf and set it up as having 2533 * external storage. 2534 */ 2535 m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); 2536 if (m0 == NULL) { 2537 error = (mnw ? EAGAIN : ENOBUFS); 2538 (void)sf_buf_mext(NULL, NULL, sf); 2539 break; 2540 } 2541 if (m_extadd(m0, (caddr_t )sf_buf_kva(sf), PAGE_SIZE, 2542 sf_buf_mext, sfs, sf, M_RDONLY, EXT_SFBUF, 2543 (mnw ? M_NOWAIT : M_WAITOK)) != 0) { 2544 error = (mnw ? EAGAIN : ENOBUFS); 2545 (void)sf_buf_mext(NULL, NULL, sf); 2546 m_freem(m0); 2547 break; 2548 } 2549 m0->m_data = (char *)sf_buf_kva(sf) + pgoff; 2550 m0->m_len = xfsize; 2551 2552 /* Append to mbuf chain. */ 2553 if (mtail != NULL) 2554 mtail->m_next = m0; 2555 else if (m != NULL) 2556 m_last(m)->m_next = m0; 2557 else 2558 m = m0; 2559 mtail = m0; 2560 2561 /* Keep track of bits processed. */ 2562 loopbytes += xfsize; 2563 off += xfsize; 2564 2565 /* 2566 * XXX eventually this should be a sfsync 2567 * method call! 2568 */ 2569 if (sfs != NULL) 2570 sf_sync_ref(sfs); 2571 } 2572 2573 if (vp != NULL) 2574 VOP_UNLOCK(vp, 0); 2575 2576 /* Add the buffer chain to the socket buffer. */ 2577 if (m != NULL) { 2578 int mlen, err; 2579 2580 mlen = m_length(m, NULL); 2581 SOCKBUF_LOCK(&so->so_snd); 2582 if (so->so_snd.sb_state & SBS_CANTSENDMORE) { 2583 error = EPIPE; 2584 SOCKBUF_UNLOCK(&so->so_snd); 2585 goto done; 2586 } 2587 SOCKBUF_UNLOCK(&so->so_snd); 2588 CURVNET_SET(so->so_vnet); 2589 /* Avoid error aliasing. */ 2590 err = (*so->so_proto->pr_usrreqs->pru_send) 2591 (so, 0, m, NULL, NULL, td); 2592 CURVNET_RESTORE(); 2593 if (err == 0) { 2594 /* 2595 * We need two counters to get the 2596 * file offset and nbytes to send 2597 * right: 2598 * - sbytes contains the total amount 2599 * of bytes sent, including headers. 2600 * - fsbytes contains the total amount 2601 * of bytes sent from the file. 2602 */ 2603 sbytes += mlen; 2604 fsbytes += mlen; 2605 if (hdrlen) { 2606 fsbytes -= hdrlen; 2607 hdrlen = 0; 2608 } 2609 } else if (error == 0) 2610 error = err; 2611 m = NULL; /* pru_send always consumes */ 2612 } 2613 2614 /* Quit outer loop on error or when we're done. */ 2615 if (done) 2616 break; 2617 if (error != 0) 2618 goto done; 2619 } 2620 2621 /* 2622 * Send trailers. Wimp out and use writev(2). 2623 */ 2624 if (trl_uio != NULL) { 2625 sbunlock(&so->so_snd); 2626 error = kern_writev(td, sockfd, trl_uio); 2627 if (error == 0) 2628 sbytes += td->td_retval[0]; 2629 goto out; 2630 } 2631 2632done: 2633 sbunlock(&so->so_snd); 2634out: 2635 /* 2636 * If there was no error we have to clear td->td_retval[0] 2637 * because it may have been set by writev. 2638 */ 2639 if (error == 0) { 2640 td->td_retval[0] = 0; 2641 } 2642 if (sent != NULL) { 2643 (*sent) = sbytes; 2644 } 2645 if (obj != NULL) 2646 vm_object_deallocate(obj); 2647 if (so) 2648 fdrop(sock_fp, td); 2649 if (m) 2650 m_freem(m); 2651 2652 if (error == ERESTART) 2653 error = EINTR; 2654 2655 return (error); 2656} 2657 2658/* 2659 * SCTP syscalls. 2660 * Functionality only compiled in if SCTP is defined in the kernel Makefile, 2661 * otherwise all return EOPNOTSUPP. 2662 * XXX: We should make this loadable one day. 2663 */ 2664int 2665sys_sctp_peeloff(td, uap) 2666 struct thread *td; 2667 struct sctp_peeloff_args /* { 2668 int sd; 2669 caddr_t name; 2670 } */ *uap; 2671{ 2672#if (defined(INET) || defined(INET6)) && defined(SCTP) 2673 struct file *nfp = NULL; 2674 struct socket *head, *so; 2675 cap_rights_t rights; 2676 u_int fflag; 2677 int error, fd; 2678 2679 AUDIT_ARG_FD(uap->sd); 2680 error = fgetsock(td, uap->sd, cap_rights_init(&rights, CAP_PEELOFF), 2681 &head, &fflag); 2682 if (error != 0) 2683 goto done2; 2684 if (head->so_proto->pr_protocol != IPPROTO_SCTP) { 2685 error = EOPNOTSUPP; 2686 goto done; 2687 } 2688 error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); 2689 if (error != 0) 2690 goto done; 2691 /* 2692 * At this point we know we do have a assoc to pull 2693 * we proceed to get the fd setup. This may block 2694 * but that is ok. 2695 */ 2696 2697 error = falloc(td, &nfp, &fd, 0); 2698 if (error != 0) 2699 goto done; 2700 td->td_retval[0] = fd; 2701 2702 CURVNET_SET(head->so_vnet); 2703 so = sonewconn(head, SS_ISCONNECTED); 2704 if (so == NULL) { 2705 error = ENOMEM; 2706 goto noconnection; 2707 } 2708 /* 2709 * Before changing the flags on the socket, we have to bump the 2710 * reference count. Otherwise, if the protocol calls sofree(), 2711 * the socket will be released due to a zero refcount. 2712 */ 2713 SOCK_LOCK(so); 2714 soref(so); /* file descriptor reference */ 2715 SOCK_UNLOCK(so); 2716 2717 ACCEPT_LOCK(); 2718 2719 TAILQ_REMOVE(&head->so_comp, so, so_list); 2720 head->so_qlen--; 2721 so->so_state |= (head->so_state & SS_NBIO); 2722 so->so_state &= ~SS_NOFDREF; 2723 so->so_qstate &= ~SQ_COMP; 2724 so->so_head = NULL; 2725 ACCEPT_UNLOCK(); 2726 finit(nfp, fflag, DTYPE_SOCKET, so, &socketops); 2727 error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name); 2728 if (error != 0) 2729 goto noconnection; 2730 if (head->so_sigio != NULL) 2731 fsetown(fgetown(&head->so_sigio), &so->so_sigio); 2732 2733noconnection: 2734 /* 2735 * close the new descriptor, assuming someone hasn't ripped it 2736 * out from under us. 2737 */ 2738 if (error != 0) 2739 fdclose(td->td_proc->p_fd, nfp, fd, td); 2740 2741 /* 2742 * Release explicitly held references before returning. 2743 */ 2744 CURVNET_RESTORE(); 2745done: 2746 if (nfp != NULL) 2747 fdrop(nfp, td); 2748 fputsock(head); 2749done2: 2750 return (error); 2751#else /* SCTP */ 2752 return (EOPNOTSUPP); 2753#endif /* SCTP */ 2754} 2755 2756int 2757sys_sctp_generic_sendmsg (td, uap) 2758 struct thread *td; 2759 struct sctp_generic_sendmsg_args /* { 2760 int sd, 2761 caddr_t msg, 2762 int mlen, 2763 caddr_t to, 2764 __socklen_t tolen, 2765 struct sctp_sndrcvinfo *sinfo, 2766 int flags 2767 } */ *uap; 2768{ 2769#if (defined(INET) || defined(INET6)) && defined(SCTP) 2770 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2771 struct socket *so; 2772 struct file *fp = NULL; 2773 struct sockaddr *to = NULL; 2774#ifdef KTRACE 2775 struct uio *ktruio = NULL; 2776#endif 2777 struct uio auio; 2778 struct iovec iov[1]; 2779 cap_rights_t rights; 2780 int error = 0, len; 2781 2782 if (uap->sinfo != NULL) { 2783 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2784 if (error != 0) 2785 return (error); 2786 u_sinfo = &sinfo; 2787 } 2788 2789 cap_rights_init(&rights, CAP_SEND); 2790 if (uap->tolen != 0) { 2791 error = getsockaddr(&to, uap->to, uap->tolen); 2792 if (error != 0) { 2793 to = NULL; 2794 goto sctp_bad2; 2795 } 2796 cap_rights_set(&rights, CAP_CONNECT); 2797 } 2798 2799 AUDIT_ARG_FD(uap->sd); 2800 error = getsock_cap(td->td_proc->p_fd, uap->sd, &rights, &fp, NULL); 2801 if (error != 0) 2802 goto sctp_bad; 2803#ifdef KTRACE 2804 if (to && (KTRPOINT(td, KTR_STRUCT))) 2805 ktrsockaddr(to); 2806#endif 2807 2808 iov[0].iov_base = uap->msg; 2809 iov[0].iov_len = uap->mlen; 2810 2811 so = (struct socket *)fp->f_data; 2812 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2813 error = EOPNOTSUPP; 2814 goto sctp_bad; 2815 } 2816#ifdef MAC 2817 error = mac_socket_check_send(td->td_ucred, so); 2818 if (error != 0) 2819 goto sctp_bad; 2820#endif /* MAC */ 2821 2822 auio.uio_iov = iov; 2823 auio.uio_iovcnt = 1; 2824 auio.uio_segflg = UIO_USERSPACE; 2825 auio.uio_rw = UIO_WRITE; 2826 auio.uio_td = td; 2827 auio.uio_offset = 0; /* XXX */ 2828 auio.uio_resid = 0; 2829 len = auio.uio_resid = uap->mlen; 2830 CURVNET_SET(so->so_vnet); 2831 error = sctp_lower_sosend(so, to, &auio, (struct mbuf *)NULL, 2832 (struct mbuf *)NULL, uap->flags, u_sinfo, td); 2833 CURVNET_RESTORE(); 2834 if (error != 0) { 2835 if (auio.uio_resid != len && (error == ERESTART || 2836 error == EINTR || error == EWOULDBLOCK)) 2837 error = 0; 2838 /* Generation of SIGPIPE can be controlled per socket. */ 2839 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2840 !(uap->flags & MSG_NOSIGNAL)) { 2841 PROC_LOCK(td->td_proc); 2842 tdsignal(td, SIGPIPE); 2843 PROC_UNLOCK(td->td_proc); 2844 } 2845 } 2846 if (error == 0) 2847 td->td_retval[0] = len - auio.uio_resid; 2848#ifdef KTRACE 2849 if (ktruio != NULL) { 2850 ktruio->uio_resid = td->td_retval[0]; 2851 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2852 } 2853#endif /* KTRACE */ 2854sctp_bad: 2855 if (fp != NULL) 2856 fdrop(fp, td); 2857sctp_bad2: 2858 free(to, M_SONAME); 2859 return (error); 2860#else /* SCTP */ 2861 return (EOPNOTSUPP); 2862#endif /* SCTP */ 2863} 2864 2865int 2866sys_sctp_generic_sendmsg_iov(td, uap) 2867 struct thread *td; 2868 struct sctp_generic_sendmsg_iov_args /* { 2869 int sd, 2870 struct iovec *iov, 2871 int iovlen, 2872 caddr_t to, 2873 __socklen_t tolen, 2874 struct sctp_sndrcvinfo *sinfo, 2875 int flags 2876 } */ *uap; 2877{ 2878#if (defined(INET) || defined(INET6)) && defined(SCTP) 2879 struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; 2880 struct socket *so; 2881 struct file *fp = NULL; 2882 struct sockaddr *to = NULL; 2883#ifdef KTRACE 2884 struct uio *ktruio = NULL; 2885#endif 2886 struct uio auio; 2887 struct iovec *iov, *tiov; 2888 cap_rights_t rights; 2889 ssize_t len; 2890 int error, i; 2891 2892 if (uap->sinfo != NULL) { 2893 error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); 2894 if (error != 0) 2895 return (error); 2896 u_sinfo = &sinfo; 2897 } 2898 cap_rights_init(&rights, CAP_SEND); 2899 if (uap->tolen != 0) { 2900 error = getsockaddr(&to, uap->to, uap->tolen); 2901 if (error != 0) { 2902 to = NULL; 2903 goto sctp_bad2; 2904 } 2905 cap_rights_set(&rights, CAP_CONNECT); 2906 } 2907 2908 AUDIT_ARG_FD(uap->sd); 2909 error = getsock_cap(td->td_proc->p_fd, uap->sd, &rights, &fp, NULL); 2910 if (error != 0) 2911 goto sctp_bad1; 2912 2913#ifdef COMPAT_FREEBSD32 2914 if (SV_CURPROC_FLAG(SV_ILP32)) 2915 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 2916 uap->iovlen, &iov, EMSGSIZE); 2917 else 2918#endif 2919 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 2920 if (error != 0) 2921 goto sctp_bad1; 2922#ifdef KTRACE 2923 if (to && (KTRPOINT(td, KTR_STRUCT))) 2924 ktrsockaddr(to); 2925#endif 2926 2927 so = (struct socket *)fp->f_data; 2928 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 2929 error = EOPNOTSUPP; 2930 goto sctp_bad; 2931 } 2932#ifdef MAC 2933 error = mac_socket_check_send(td->td_ucred, so); 2934 if (error != 0) 2935 goto sctp_bad; 2936#endif /* MAC */ 2937 2938 auio.uio_iov = iov; 2939 auio.uio_iovcnt = uap->iovlen; 2940 auio.uio_segflg = UIO_USERSPACE; 2941 auio.uio_rw = UIO_WRITE; 2942 auio.uio_td = td; 2943 auio.uio_offset = 0; /* XXX */ 2944 auio.uio_resid = 0; 2945 tiov = iov; 2946 for (i = 0; i <uap->iovlen; i++, tiov++) { 2947 if ((auio.uio_resid += tiov->iov_len) < 0) { 2948 error = EINVAL; 2949 goto sctp_bad; 2950 } 2951 } 2952 len = auio.uio_resid; 2953 CURVNET_SET(so->so_vnet); 2954 error = sctp_lower_sosend(so, to, &auio, 2955 (struct mbuf *)NULL, (struct mbuf *)NULL, 2956 uap->flags, u_sinfo, td); 2957 CURVNET_RESTORE(); 2958 if (error != 0) { 2959 if (auio.uio_resid != len && (error == ERESTART || 2960 error == EINTR || error == EWOULDBLOCK)) 2961 error = 0; 2962 /* Generation of SIGPIPE can be controlled per socket */ 2963 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && 2964 !(uap->flags & MSG_NOSIGNAL)) { 2965 PROC_LOCK(td->td_proc); 2966 tdsignal(td, SIGPIPE); 2967 PROC_UNLOCK(td->td_proc); 2968 } 2969 } 2970 if (error == 0) 2971 td->td_retval[0] = len - auio.uio_resid; 2972#ifdef KTRACE 2973 if (ktruio != NULL) { 2974 ktruio->uio_resid = td->td_retval[0]; 2975 ktrgenio(uap->sd, UIO_WRITE, ktruio, error); 2976 } 2977#endif /* KTRACE */ 2978sctp_bad: 2979 free(iov, M_IOV); 2980sctp_bad1: 2981 if (fp != NULL) 2982 fdrop(fp, td); 2983sctp_bad2: 2984 free(to, M_SONAME); 2985 return (error); 2986#else /* SCTP */ 2987 return (EOPNOTSUPP); 2988#endif /* SCTP */ 2989} 2990 2991int 2992sys_sctp_generic_recvmsg(td, uap) 2993 struct thread *td; 2994 struct sctp_generic_recvmsg_args /* { 2995 int sd, 2996 struct iovec *iov, 2997 int iovlen, 2998 struct sockaddr *from, 2999 __socklen_t *fromlenaddr, 3000 struct sctp_sndrcvinfo *sinfo, 3001 int *msg_flags 3002 } */ *uap; 3003{ 3004#if (defined(INET) || defined(INET6)) && defined(SCTP) 3005 uint8_t sockbufstore[256]; 3006 struct uio auio; 3007 struct iovec *iov, *tiov; 3008 struct sctp_sndrcvinfo sinfo; 3009 struct socket *so; 3010 struct file *fp = NULL; 3011 struct sockaddr *fromsa; 3012 cap_rights_t rights; 3013#ifdef KTRACE 3014 struct uio *ktruio = NULL; 3015#endif 3016 ssize_t len; 3017 int error, fromlen, i, msg_flags; 3018 3019 AUDIT_ARG_FD(uap->sd); 3020 error = getsock_cap(td->td_proc->p_fd, uap->sd, 3021 cap_rights_init(&rights, CAP_RECV), &fp, NULL); 3022 if (error != 0) 3023 return (error); 3024#ifdef COMPAT_FREEBSD32 3025 if (SV_CURPROC_FLAG(SV_ILP32)) 3026 error = freebsd32_copyiniov((struct iovec32 *)uap->iov, 3027 uap->iovlen, &iov, EMSGSIZE); 3028 else 3029#endif 3030 error = copyiniov(uap->iov, uap->iovlen, &iov, EMSGSIZE); 3031 if (error != 0) 3032 goto out1; 3033 3034 so = fp->f_data; 3035 if (so->so_proto->pr_protocol != IPPROTO_SCTP) { 3036 error = EOPNOTSUPP; 3037 goto out; 3038 } 3039#ifdef MAC 3040 error = mac_socket_check_receive(td->td_ucred, so); 3041 if (error != 0) 3042 goto out; 3043#endif /* MAC */ 3044 3045 if (uap->fromlenaddr != NULL) { 3046 error = copyin(uap->fromlenaddr, &fromlen, sizeof (fromlen)); 3047 if (error != 0) 3048 goto out; 3049 } else { 3050 fromlen = 0; 3051 } 3052 if (uap->msg_flags) { 3053 error = copyin(uap->msg_flags, &msg_flags, sizeof (int)); 3054 if (error != 0) 3055 goto out; 3056 } else { 3057 msg_flags = 0; 3058 } 3059 auio.uio_iov = iov; 3060 auio.uio_iovcnt = uap->iovlen; 3061 auio.uio_segflg = UIO_USERSPACE; 3062 auio.uio_rw = UIO_READ; 3063 auio.uio_td = td; 3064 auio.uio_offset = 0; /* XXX */ 3065 auio.uio_resid = 0; 3066 tiov = iov; 3067 for (i = 0; i <uap->iovlen; i++, tiov++) { 3068 if ((auio.uio_resid += tiov->iov_len) < 0) { 3069 error = EINVAL; 3070 goto out; 3071 } 3072 } 3073 len = auio.uio_resid; 3074 fromsa = (struct sockaddr *)sockbufstore; 3075 3076#ifdef KTRACE 3077 if (KTRPOINT(td, KTR_GENIO)) 3078 ktruio = cloneuio(&auio); 3079#endif /* KTRACE */ 3080 memset(&sinfo, 0, sizeof(struct sctp_sndrcvinfo)); 3081 CURVNET_SET(so->so_vnet); 3082 error = sctp_sorecvmsg(so, &auio, (struct mbuf **)NULL, 3083 fromsa, fromlen, &msg_flags, 3084 (struct sctp_sndrcvinfo *)&sinfo, 1); 3085 CURVNET_RESTORE(); 3086 if (error != 0) { 3087 if (auio.uio_resid != len && (error == ERESTART || 3088 error == EINTR || error == EWOULDBLOCK)) 3089 error = 0; 3090 } else { 3091 if (uap->sinfo) 3092 error = copyout(&sinfo, uap->sinfo, sizeof (sinfo)); 3093 } 3094#ifdef KTRACE 3095 if (ktruio != NULL) { 3096 ktruio->uio_resid = len - auio.uio_resid; 3097 ktrgenio(uap->sd, UIO_READ, ktruio, error); 3098 } 3099#endif /* KTRACE */ 3100 if (error != 0) 3101 goto out; 3102 td->td_retval[0] = len - auio.uio_resid; 3103 3104 if (fromlen && uap->from) { 3105 len = fromlen; 3106 if (len <= 0 || fromsa == 0) 3107 len = 0; 3108 else { 3109 len = MIN(len, fromsa->sa_len); 3110 error = copyout(fromsa, uap->from, (size_t)len); 3111 if (error != 0) 3112 goto out; 3113 } 3114 error = copyout(&len, uap->fromlenaddr, sizeof (socklen_t)); 3115 if (error != 0) 3116 goto out; 3117 } 3118#ifdef KTRACE 3119 if (KTRPOINT(td, KTR_STRUCT)) 3120 ktrsockaddr(fromsa); 3121#endif 3122 if (uap->msg_flags) { 3123 error = copyout(&msg_flags, uap->msg_flags, sizeof (int)); 3124 if (error != 0) 3125 goto out; 3126 } 3127out: 3128 free(iov, M_IOV); 3129out1: 3130 if (fp != NULL) 3131 fdrop(fp, td); 3132 3133 return (error); 3134#else /* SCTP */ 3135 return (EOPNOTSUPP); 3136#endif /* SCTP */ 3137} 3138