1/* 2 * Copyright (c) 2000-2011 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1982, 1986, 1989, 1990, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * sendfile(2) and related extensions: 33 * Copyright (c) 1998, David Greenman. All rights reserved. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgement: 45 * This product includes software developed by the University of 46 * California, Berkeley and its contributors. 47 * 4. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 * 63 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94 64 */ 65/* 66 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 67 * support for mandatory and extensible security protections. This notice 68 * is included in support of clause 2.2 (b) of the Apple Public License, 69 * Version 2.0. 70 */ 71 72#include <sys/param.h> 73#include <sys/systm.h> 74#include <sys/filedesc.h> 75#include <sys/proc_internal.h> 76#include <sys/file_internal.h> 77#include <sys/vnode_internal.h> 78#include <sys/malloc.h> 79#include <sys/mbuf.h> 80#include <kern/lock.h> 81#include <sys/domain.h> 82#include <sys/protosw.h> 83#include <sys/signalvar.h> 84#include <sys/socket.h> 85#include <sys/socketvar.h> 86#include <sys/kernel.h> 87#include <sys/uio_internal.h> 88#include <sys/kauth.h> 89#include <kern/task.h> 90 91#include <security/audit/audit.h> 92 93#include <sys/kdebug.h> 94#include <sys/sysproto.h> 95#include <netinet/in.h> 96#include <net/route.h> 97#include <netinet/in_pcb.h> 98 99#if CONFIG_MACF_SOCKET_SUBSET 100#include <security/mac_framework.h> 101#endif /* MAC_SOCKET_SUBSET */ 102 103#define f_flag f_fglob->fg_flag 104#define f_type f_fglob->fg_type 105#define f_msgcount f_fglob->fg_msgcount 106#define f_cred f_fglob->fg_cred 107#define f_ops f_fglob->fg_ops 108#define f_offset f_fglob->fg_offset 109#define f_data f_fglob->fg_data 110 111 112#define DBG_LAYER_IN_BEG NETDBG_CODE(DBG_NETSOCK, 0) 113#define DBG_LAYER_IN_END NETDBG_CODE(DBG_NETSOCK, 2) 114#define DBG_LAYER_OUT_BEG NETDBG_CODE(DBG_NETSOCK, 1) 115#define DBG_LAYER_OUT_END NETDBG_CODE(DBG_NETSOCK, 3) 116#define DBG_FNC_SENDMSG NETDBG_CODE(DBG_NETSOCK, (1 << 8) | 1) 117#define DBG_FNC_SENDTO NETDBG_CODE(DBG_NETSOCK, (2 << 8) | 1) 118#define DBG_FNC_SENDIT NETDBG_CODE(DBG_NETSOCK, (3 << 8) | 1) 119#define DBG_FNC_RECVFROM NETDBG_CODE(DBG_NETSOCK, (5 << 8)) 120#define DBG_FNC_RECVMSG NETDBG_CODE(DBG_NETSOCK, (6 << 8)) 121#define DBG_FNC_RECVIT NETDBG_CODE(DBG_NETSOCK, (7 << 8)) 122#define DBG_FNC_SENDFILE NETDBG_CODE(DBG_NETSOCK, (10 << 8)) 123#define DBG_FNC_SENDFILE_WAIT NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 1)) 124#define DBG_FNC_SENDFILE_READ NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 2)) 125#define DBG_FNC_SENDFILE_SEND NETDBG_CODE(DBG_NETSOCK, ((10 << 8) | 3)) 126 127 128#define HACK_FOR_4056224 1 129#if HACK_FOR_4056224 130static pid_t last_pid_4056224 = 0; 131#endif /* HACK_FOR_4056224 */ 132 133/* TODO: should be in header file */ 134int falloc_locked(proc_t, struct fileproc **, int *, vfs_context_t, int); 135 136static int sendit(struct proc *, int, struct user_msghdr *, uio_t, int, 137 int32_t *); 138static int recvit(struct proc *, int, struct user_msghdr *, uio_t, user_addr_t, 139 int32_t *); 140static int getsockaddr(struct socket *, struct sockaddr **, user_addr_t, 141 size_t, boolean_t); 142static int getsockaddr_s(struct socket *, struct sockaddr_storage *, 143 user_addr_t, size_t, boolean_t); 144#if SENDFILE 145static void alloc_sendpkt(int, size_t, unsigned int *, struct mbuf **, 146 boolean_t); 147#endif /* SENDFILE */ 148 149/* 150 * System call interface to the socket abstraction. 151 */ 152 153extern struct fileops socketops; 154 155/* 156 * Returns: 0 Success 157 * EACCES Mandatory Access Control failure 158 * falloc:ENFILE 159 * falloc:EMFILE 160 * falloc:ENOMEM 161 * socreate:EAFNOSUPPORT 162 * socreate:EPROTOTYPE 163 * socreate:EPROTONOSUPPORT 164 * socreate:ENOBUFS 165 * socreate:ENOMEM 166 * socreate:EISCONN 167 * socreate:??? [other protocol families, IPSEC] 168 */ 169int 170socket(struct proc *p, struct socket_args *uap, int32_t *retval) 171{ 172 struct socket *so; 173 struct fileproc *fp; 174 int fd, error; 175 176 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol); 177#if CONFIG_MACF_SOCKET_SUBSET 178 if ((error = mac_socket_check_create(kauth_cred_get(), uap->domain, 179 uap->type, uap->protocol)) != 0) 180 return (error); 181#endif /* MAC_SOCKET_SUBSET */ 182 183 error = falloc(p, &fp, &fd, vfs_context_current()); 184 if (error) { 185 return (error); 186 } 187 fp->f_flag = FREAD|FWRITE; 188 fp->f_type = DTYPE_SOCKET; 189 fp->f_ops = &socketops; 190 191 error = socreate(uap->domain, &so, uap->type, uap->protocol); 192 if (error) { 193 fp_free(p, fd, fp); 194 } else { 195 thread_t thread; 196 struct uthread *ut; 197 198 thread = current_thread(); 199 ut = get_bsdthread_info(thread); 200 201 /* if this is a backgrounded thread then throttle all new sockets */ 202 if (proc_get_selfthread_isbackground() != 0) { 203 so->so_traffic_mgt_flags |= TRAFFIC_MGT_SO_BACKGROUND; 204 so->so_background_thread = thread; 205 } 206 fp->f_data = (caddr_t)so; 207 208 proc_fdlock(p); 209 procfdtbl_releasefd(p, fd, NULL); 210 211 fp_drop(p, fd, fp, 1); 212 proc_fdunlock(p); 213 214 *retval = fd; 215 } 216 return (error); 217} 218 219/* 220 * Returns: 0 Success 221 * EDESTADDRREQ Destination address required 222 * EBADF Bad file descriptor 223 * EACCES Mandatory Access Control failure 224 * file_socket:ENOTSOCK 225 * file_socket:EBADF 226 * getsockaddr:ENAMETOOLONG Filename too long 227 * getsockaddr:EINVAL Invalid argument 228 * getsockaddr:ENOMEM Not enough space 229 * getsockaddr:EFAULT Bad address 230 * sobind:??? 231 */ 232/* ARGSUSED */ 233int 234bind(__unused proc_t p, struct bind_args *uap, __unused int32_t *retval) 235{ 236 struct sockaddr_storage ss; 237 struct sockaddr *sa = NULL; 238 struct socket *so; 239 boolean_t want_free = TRUE; 240 int error; 241 242 AUDIT_ARG(fd, uap->s); 243 error = file_socket(uap->s, &so); 244 if (error != 0) 245 return (error); 246 if (so == NULL) { 247 error = EBADF; 248 goto out; 249 } 250 if (uap->name == USER_ADDR_NULL) { 251 error = EDESTADDRREQ; 252 goto out; 253 } 254 if (uap->namelen > sizeof (ss)) { 255 error = getsockaddr(so, &sa, uap->name, uap->namelen, TRUE); 256 } else { 257 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, TRUE); 258 if (error == 0) { 259 sa = (struct sockaddr *)&ss; 260 want_free = FALSE; 261 } 262 } 263 if (error != 0) 264 goto out; 265 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa); 266#if CONFIG_MACF_SOCKET_SUBSET 267 if ((error = mac_socket_check_bind(kauth_cred_get(), so, sa)) == 0) 268 error = sobind(so, sa); 269#else 270 error = sobind(so, sa); 271#endif /* MAC_SOCKET_SUBSET */ 272 if (want_free) 273 FREE(sa, M_SONAME); 274out: 275 file_drop(uap->s); 276 return (error); 277} 278 279/* 280 * Returns: 0 Success 281 * EBADF 282 * EACCES Mandatory Access Control failure 283 * file_socket:ENOTSOCK 284 * file_socket:EBADF 285 * solisten:EINVAL 286 * solisten:EOPNOTSUPP 287 * solisten:??? 288 */ 289int 290listen(__unused struct proc *p, struct listen_args *uap, 291 __unused int32_t *retval) 292{ 293 int error; 294 struct socket *so; 295 296 AUDIT_ARG(fd, uap->s); 297 error = file_socket(uap->s, &so); 298 if (error) 299 return (error); 300 if (so != NULL) 301#if CONFIG_MACF_SOCKET_SUBSET 302 { 303 error = mac_socket_check_listen(kauth_cred_get(), so); 304 if (error == 0) 305 error = solisten(so, uap->backlog); 306 } 307#else 308 error = solisten(so, uap->backlog); 309#endif /* MAC_SOCKET_SUBSET */ 310 else 311 error = EBADF; 312 313 file_drop(uap->s); 314 return (error); 315} 316 317/* 318 * Returns: fp_getfsock:EBADF Bad file descriptor 319 * fp_getfsock:EOPNOTSUPP ... 320 * xlate => :ENOTSOCK Socket operation on non-socket 321 * :EFAULT Bad address on copyin/copyout 322 * :EBADF Bad file descriptor 323 * :EOPNOTSUPP Operation not supported on socket 324 * :EINVAL Invalid argument 325 * :EWOULDBLOCK Operation would block 326 * :ECONNABORTED Connection aborted 327 * :EINTR Interrupted function 328 * :EACCES Mandatory Access Control failure 329 * falloc_locked:ENFILE Too many files open in system 330 * falloc_locked::EMFILE Too many open files 331 * falloc_locked::ENOMEM Not enough space 332 * 0 Success 333 */ 334int 335accept_nocancel(struct proc *p, struct accept_nocancel_args *uap, 336 int32_t *retval) 337{ 338 struct fileproc *fp; 339 struct sockaddr *sa = NULL; 340 socklen_t namelen; 341 int error; 342 struct socket *head, *so = NULL; 343 lck_mtx_t *mutex_held; 344 int fd = uap->s; 345 int newfd; 346 short fflag; /* type must match fp->f_flag */ 347 int dosocklock = 0; 348 349 *retval = -1; 350 351 AUDIT_ARG(fd, uap->s); 352 353 if (uap->name) { 354 error = copyin(uap->anamelen, (caddr_t)&namelen, 355 sizeof (socklen_t)); 356 if (error) 357 return (error); 358 } 359 error = fp_getfsock(p, fd, &fp, &head); 360 if (error) { 361 if (error == EOPNOTSUPP) 362 error = ENOTSOCK; 363 return (error); 364 } 365 if (head == NULL) { 366 error = EBADF; 367 goto out; 368 } 369#if CONFIG_MACF_SOCKET_SUBSET 370 if ((error = mac_socket_check_accept(kauth_cred_get(), head)) != 0) 371 goto out; 372#endif /* MAC_SOCKET_SUBSET */ 373 374 socket_lock(head, 1); 375 376 if (head->so_proto->pr_getlock != NULL) { 377 mutex_held = (*head->so_proto->pr_getlock)(head, 0); 378 dosocklock = 1; 379 } else { 380 mutex_held = head->so_proto->pr_domain->dom_mtx; 381 dosocklock = 0; 382 } 383 384 if ((head->so_options & SO_ACCEPTCONN) == 0) { 385 if ((head->so_proto->pr_flags & PR_CONNREQUIRED) == 0) { 386 error = EOPNOTSUPP; 387 } else { 388 /* POSIX: The socket is not accepting connections */ 389 error = EINVAL; 390 } 391 socket_unlock(head, 1); 392 goto out; 393 } 394 if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) { 395 socket_unlock(head, 1); 396 error = EWOULDBLOCK; 397 goto out; 398 } 399 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { 400 if (head->so_state & SS_CANTRCVMORE) { 401 head->so_error = ECONNABORTED; 402 break; 403 } 404 if (head->so_usecount < 1) 405 panic("accept: head=%p refcount=%d\n", head, 406 head->so_usecount); 407 error = msleep((caddr_t)&head->so_timeo, mutex_held, 408 PSOCK | PCATCH, "accept", 0); 409 if (head->so_usecount < 1) 410 panic("accept: 2 head=%p refcount=%d\n", head, 411 head->so_usecount); 412 if ((head->so_state & SS_DRAINING)) { 413 error = ECONNABORTED; 414 } 415 if (error) { 416 socket_unlock(head, 1); 417 goto out; 418 } 419 } 420 if (head->so_error) { 421 error = head->so_error; 422 head->so_error = 0; 423 socket_unlock(head, 1); 424 goto out; 425 } 426 427 428 /* 429 * At this point we know that there is at least one connection 430 * ready to be accepted. Remove it from the queue prior to 431 * allocating the file descriptor for it since falloc() may 432 * block allowing another process to accept the connection 433 * instead. 434 */ 435 lck_mtx_assert(mutex_held, LCK_MTX_ASSERT_OWNED); 436 so = TAILQ_FIRST(&head->so_comp); 437 TAILQ_REMOVE(&head->so_comp, so, so_list); 438 head->so_qlen--; 439 /* unlock head to avoid deadlock with select, keep a ref on head */ 440 socket_unlock(head, 0); 441 442#if CONFIG_MACF_SOCKET_SUBSET 443 /* 444 * Pass the pre-accepted socket to the MAC framework. This is 445 * cheaper than allocating a file descriptor for the socket, 446 * calling the protocol accept callback, and possibly freeing 447 * the file descriptor should the MAC check fails. 448 */ 449 if ((error = mac_socket_check_accepted(kauth_cred_get(), so)) != 0) { 450 so->so_state &= ~(SS_NOFDREF | SS_COMP); 451 so->so_head = NULL; 452 soclose(so); 453 /* Drop reference on listening socket */ 454 sodereference(head); 455 goto out; 456 } 457#endif /* MAC_SOCKET_SUBSET */ 458 459 /* 460 * Pass the pre-accepted socket to any interested socket filter(s). 461 * Upon failure, the socket would have been closed by the callee. 462 */ 463 if (so->so_filt != NULL && (error = soacceptfilter(so)) != 0) { 464 /* Drop reference on listening socket */ 465 sodereference(head); 466 /* Propagate socket filter's error code to the caller */ 467 goto out; 468 } 469 470 fflag = fp->f_flag; 471 error = falloc(p, &fp, &newfd, vfs_context_current()); 472 if (error) { 473 /* 474 * Probably ran out of file descriptors. 475 * 476 * <rdar://problem/8554930> 477 * Don't put this back on the socket like we used to, that 478 * just causes the client to spin. Drop the socket. 479 */ 480 so->so_state &= ~(SS_NOFDREF | SS_COMP); 481 so->so_head = NULL; 482 soclose(so); 483 sodereference(head); 484 goto out; 485 } 486 *retval = newfd; 487 fp->f_type = DTYPE_SOCKET; 488 fp->f_flag = fflag; 489 fp->f_ops = &socketops; 490 fp->f_data = (caddr_t)so; 491 socket_lock(head, 0); 492 if (dosocklock) 493 socket_lock(so, 1); 494 so->so_state &= ~SS_COMP; 495 so->so_head = NULL; 496 (void) soacceptlock(so, &sa, 0); 497 socket_unlock(head, 1); 498 if (sa == NULL) { 499 namelen = 0; 500 if (uap->name) 501 goto gotnoname; 502 error = 0; 503 goto releasefd; 504 } 505 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa); 506 507 if (uap->name) { 508 socklen_t sa_len; 509 510 /* save sa_len before it is destroyed */ 511 sa_len = sa->sa_len; 512 namelen = MIN(namelen, sa_len); 513 error = copyout(sa, uap->name, namelen); 514 if (!error) 515 /* return the actual, untruncated address length */ 516 namelen = sa_len; 517gotnoname: 518 error = copyout((caddr_t)&namelen, uap->anamelen, 519 sizeof (socklen_t)); 520 } 521 FREE(sa, M_SONAME); 522 523releasefd: 524 /* 525 * If the socket has been marked as inactive by sosetdefunct(), 526 * disallow further operations on it. 527 */ 528 if (so->so_flags & SOF_DEFUNCT) { 529 sodefunct(current_proc(), so, 530 SHUTDOWN_SOCKET_LEVEL_DISCONNECT_INTERNAL); 531 } 532 533 if (dosocklock) 534 socket_unlock(so, 1); 535 536 proc_fdlock(p); 537 procfdtbl_releasefd(p, newfd, NULL); 538 fp_drop(p, newfd, fp, 1); 539 proc_fdunlock(p); 540 541out: 542 file_drop(fd); 543 return (error); 544} 545 546int 547accept(struct proc *p, struct accept_args *uap, int32_t *retval) 548{ 549 __pthread_testcancel(1); 550 return(accept_nocancel(p, (struct accept_nocancel_args *)uap, retval)); 551} 552 553/* 554 * Returns: 0 Success 555 * EBADF Bad file descriptor 556 * EALREADY Connection already in progress 557 * EINPROGRESS Operation in progress 558 * ECONNABORTED Connection aborted 559 * EINTR Interrupted function 560 * EACCES Mandatory Access Control failure 561 * file_socket:ENOTSOCK 562 * file_socket:EBADF 563 * getsockaddr:ENAMETOOLONG Filename too long 564 * getsockaddr:EINVAL Invalid argument 565 * getsockaddr:ENOMEM Not enough space 566 * getsockaddr:EFAULT Bad address 567 * soconnectlock:EOPNOTSUPP 568 * soconnectlock:EISCONN 569 * soconnectlock:??? [depends on protocol, filters] 570 * msleep:EINTR 571 * 572 * Imputed: so_error error may be set from so_error, which 573 * may have been set by soconnectlock. 574 */ 575/* ARGSUSED */ 576int 577connect(struct proc *p, struct connect_args *uap, int32_t *retval) 578{ 579 __pthread_testcancel(1); 580 return(connect_nocancel(p, (struct connect_nocancel_args *)uap, retval)); 581} 582 583int 584connect_nocancel(__unused proc_t p, struct connect_nocancel_args *uap, __unused int32_t *retval) 585{ 586 struct socket *so; 587 struct sockaddr_storage ss; 588 struct sockaddr *sa = NULL; 589 lck_mtx_t *mutex_held; 590 boolean_t want_free = TRUE; 591 int error; 592 int fd = uap->s; 593 boolean_t dgram; 594 595 AUDIT_ARG(fd, uap->s); 596 error = file_socket(fd, &so); 597 if (error != 0) 598 return (error); 599 if (so == NULL) { 600 error = EBADF; 601 goto out; 602 } 603 604 /* 605 * Ask getsockaddr{_s} to not translate AF_UNSPEC to AF_INET 606 * if this is a datagram socket; translate for other types. 607 */ 608 dgram = (so->so_type == SOCK_DGRAM); 609 610 /* Get socket address now before we obtain socket lock */ 611 if (uap->namelen > sizeof (ss)) { 612 error = getsockaddr(so, &sa, uap->name, uap->namelen, !dgram); 613 } else { 614 error = getsockaddr_s(so, &ss, uap->name, uap->namelen, !dgram); 615 if (error == 0) { 616 sa = (struct sockaddr *)&ss; 617 want_free = FALSE; 618 } 619 } 620 if (error != 0) 621 goto out; 622 623 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), sa); 624#if CONFIG_MACF_SOCKET_SUBSET 625 if ((error = mac_socket_check_connect(kauth_cred_get(), so, sa)) != 0) { 626 if (want_free) 627 FREE(sa, M_SONAME); 628 goto out; 629 } 630#endif /* MAC_SOCKET_SUBSET */ 631 socket_lock(so, 1); 632 633 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 634 if (want_free) 635 FREE(sa, M_SONAME); 636 socket_unlock(so, 1); 637 error = EALREADY; 638 goto out; 639 } 640 error = soconnectlock(so, sa, 0); 641 if (error) 642 goto bad; 643 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { 644 if (want_free) 645 FREE(sa, M_SONAME); 646 socket_unlock(so, 1); 647 error = EINPROGRESS; 648 goto out; 649 } 650 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { 651 if (so->so_proto->pr_getlock != NULL) 652 mutex_held = (*so->so_proto->pr_getlock)(so, 0); 653 else 654 mutex_held = so->so_proto->pr_domain->dom_mtx; 655 error = msleep((caddr_t)&so->so_timeo, mutex_held, 656 PSOCK | PCATCH, "connect", 0); 657 if ((so->so_state & SS_DRAINING)) { 658 error = ECONNABORTED; 659 } 660 if (error) 661 break; 662 } 663 if (error == 0) { 664 error = so->so_error; 665 so->so_error = 0; 666 } 667bad: 668 so->so_state &= ~SS_ISCONNECTING; 669 socket_unlock(so, 1); 670 if (want_free) 671 FREE(sa, M_SONAME); 672 if (error == ERESTART) 673 error = EINTR; 674out: 675 file_drop(fd); 676 return (error); 677} 678 679/* 680 * Returns: 0 Success 681 * socreate:EAFNOSUPPORT 682 * socreate:EPROTOTYPE 683 * socreate:EPROTONOSUPPORT 684 * socreate:ENOBUFS 685 * socreate:ENOMEM 686 * socreate:EISCONN 687 * socreate:??? [other protocol families, IPSEC] 688 * falloc:ENFILE 689 * falloc:EMFILE 690 * falloc:ENOMEM 691 * copyout:EFAULT 692 * soconnect2:EINVAL 693 * soconnect2:EPROTOTYPE 694 * soconnect2:??? [other protocol families[ 695 */ 696int 697socketpair(struct proc *p, struct socketpair_args *uap, 698 __unused int32_t *retval) 699{ 700 struct fileproc *fp1, *fp2; 701 struct socket *so1, *so2; 702 int fd, error, sv[2]; 703 704 AUDIT_ARG(socket, uap->domain, uap->type, uap->protocol); 705 error = socreate(uap->domain, &so1, uap->type, uap->protocol); 706 if (error) 707 return (error); 708 error = socreate(uap->domain, &so2, uap->type, uap->protocol); 709 if (error) 710 goto free1; 711 712 error = falloc(p, &fp1, &fd, vfs_context_current()); 713 if (error) { 714 goto free2; 715 } 716 fp1->f_flag = FREAD|FWRITE; 717 fp1->f_type = DTYPE_SOCKET; 718 fp1->f_ops = &socketops; 719 fp1->f_data = (caddr_t)so1; 720 sv[0] = fd; 721 722 error = falloc(p, &fp2, &fd, vfs_context_current()); 723 if (error) { 724 goto free3; 725 } 726 fp2->f_flag = FREAD|FWRITE; 727 fp2->f_type = DTYPE_SOCKET; 728 fp2->f_ops = &socketops; 729 fp2->f_data = (caddr_t)so2; 730 sv[1] = fd; 731 732 error = soconnect2(so1, so2); 733 if (error) { 734 goto free4; 735 } 736 if (uap->type == SOCK_DGRAM) { 737 /* 738 * Datagram socket connection is asymmetric. 739 */ 740 error = soconnect2(so2, so1); 741 if (error) { 742 goto free4; 743 } 744 } 745 746 if ((error = copyout(sv, uap->rsv, 2 * sizeof (int))) != 0) 747 goto free4; 748 749 proc_fdlock(p); 750 procfdtbl_releasefd(p, sv[0], NULL); 751 procfdtbl_releasefd(p, sv[1], NULL); 752 fp_drop(p, sv[0], fp1, 1); 753 fp_drop(p, sv[1], fp2, 1); 754 proc_fdunlock(p); 755 756 return (0); 757free4: 758 fp_free(p, sv[1], fp2); 759free3: 760 fp_free(p, sv[0], fp1); 761free2: 762 (void) soclose(so2); 763free1: 764 (void) soclose(so1); 765 return (error); 766} 767 768/* 769 * Returns: 0 Success 770 * EINVAL 771 * ENOBUFS 772 * EBADF 773 * EPIPE 774 * EACCES Mandatory Access Control failure 775 * file_socket:ENOTSOCK 776 * file_socket:EBADF 777 * getsockaddr:ENAMETOOLONG Filename too long 778 * getsockaddr:EINVAL Invalid argument 779 * getsockaddr:ENOMEM Not enough space 780 * getsockaddr:EFAULT Bad address 781 * <pru_sosend>:EACCES[TCP] 782 * <pru_sosend>:EADDRINUSE[TCP] 783 * <pru_sosend>:EADDRNOTAVAIL[TCP] 784 * <pru_sosend>:EAFNOSUPPORT[TCP] 785 * <pru_sosend>:EAGAIN[TCP] 786 * <pru_sosend>:EBADF 787 * <pru_sosend>:ECONNRESET[TCP] 788 * <pru_sosend>:EFAULT 789 * <pru_sosend>:EHOSTUNREACH[TCP] 790 * <pru_sosend>:EINTR 791 * <pru_sosend>:EINVAL 792 * <pru_sosend>:EISCONN[AF_INET] 793 * <pru_sosend>:EMSGSIZE[TCP] 794 * <pru_sosend>:ENETDOWN[TCP] 795 * <pru_sosend>:ENETUNREACH[TCP] 796 * <pru_sosend>:ENOBUFS 797 * <pru_sosend>:ENOMEM[TCP] 798 * <pru_sosend>:ENOTCONN[AF_INET] 799 * <pru_sosend>:EOPNOTSUPP 800 * <pru_sosend>:EPERM[TCP] 801 * <pru_sosend>:EPIPE 802 * <pru_sosend>:EWOULDBLOCK 803 * <pru_sosend>:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL] 804 * <pru_sosend>:???[AF_INET] [whatever a filter author chooses] 805 * <pru_sosend>:??? [value from so_error] 806 * sockargs:??? 807 */ 808static int 809sendit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, 810 int flags, int32_t *retval) 811{ 812 struct mbuf *control = NULL; 813 struct sockaddr_storage ss; 814 struct sockaddr *to = NULL; 815 boolean_t want_free = TRUE; 816 int error; 817 struct socket *so; 818 user_ssize_t len; 819 820 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_START, 0, 0, 0, 0, 0); 821 822 error = file_socket(s, &so); 823 if (error) { 824 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0); 825 return (error); 826 } 827 if (so == NULL) { 828 error = EBADF; 829 goto out; 830 } 831 if (mp->msg_name != USER_ADDR_NULL) { 832 if (mp->msg_namelen > sizeof (ss)) { 833 error = getsockaddr(so, &to, mp->msg_name, 834 mp->msg_namelen, TRUE); 835 } else { 836 error = getsockaddr_s(so, &ss, mp->msg_name, 837 mp->msg_namelen, TRUE); 838 if (error == 0) { 839 to = (struct sockaddr *)&ss; 840 want_free = FALSE; 841 } 842 } 843 if (error != 0) 844 goto out; 845 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), to); 846 } 847 if (mp->msg_control != USER_ADDR_NULL) { 848 if (mp->msg_controllen < sizeof (struct cmsghdr)) { 849 error = EINVAL; 850 goto bad; 851 } 852 error = sockargs(&control, mp->msg_control, 853 mp->msg_controllen, MT_CONTROL); 854 if (error != 0) 855 goto bad; 856 } 857 858#if CONFIG_MACF_SOCKET_SUBSET 859 /* 860 * We check the state without holding the socket lock; 861 * if a race condition occurs, it would simply result 862 * in an extra call to the MAC check function. 863 */ 864 if ( to != NULL && 865 !(so->so_state & SS_DEFUNCT) && 866 (error = mac_socket_check_send(kauth_cred_get(), so, to)) != 0) 867 goto bad; 868#endif /* MAC_SOCKET_SUBSET */ 869 870 len = uio_resid(uiop); 871 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, uiop, 0, control, 872 flags); 873 if (error != 0) { 874 if (uio_resid(uiop) != len && (error == ERESTART || 875 error == EINTR || error == EWOULDBLOCK)) 876 error = 0; 877 /* Generation of SIGPIPE can be controlled per socket */ 878 if (error == EPIPE && !(so->so_flags & SOF_NOSIGPIPE)) 879 psignal(p, SIGPIPE); 880 } 881 if (error == 0) 882 *retval = (int)(len - uio_resid(uiop)); 883bad: 884 if (to != NULL && want_free) 885 FREE(to, M_SONAME); 886out: 887 KERNEL_DEBUG(DBG_FNC_SENDIT | DBG_FUNC_END, error, 0, 0, 0, 0); 888 file_drop(s); 889 return (error); 890} 891 892/* 893 * Returns: 0 Success 894 * ENOMEM 895 * sendit:??? [see sendit definition in this file] 896 * write:??? [4056224: applicable for pipes] 897 */ 898int 899sendto(struct proc *p, struct sendto_args *uap, int32_t *retval) 900{ 901 __pthread_testcancel(1); 902 return(sendto_nocancel(p, (struct sendto_nocancel_args *)uap, retval)); 903} 904 905int 906sendto_nocancel(struct proc *p, struct sendto_nocancel_args *uap, int32_t *retval) 907{ 908 struct user_msghdr msg; 909 int error; 910 uio_t auio = NULL; 911 912 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_START, 0, 0, 0, 0, 0); 913 AUDIT_ARG(fd, uap->s); 914 915 auio = uio_create(1, 0, 916 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 917 UIO_WRITE); 918 if (auio == NULL) { 919 return (ENOMEM); 920 } 921 uio_addiov(auio, uap->buf, uap->len); 922 923 msg.msg_name = uap->to; 924 msg.msg_namelen = uap->tolen; 925 /* no need to set up msg_iov. sendit uses uio_t we send it */ 926 msg.msg_iov = 0; 927 msg.msg_iovlen = 0; 928 msg.msg_control = 0; 929 msg.msg_flags = 0; 930 931 error = sendit(p, uap->s, &msg, auio, uap->flags, retval); 932 933 if (auio != NULL) { 934 uio_free(auio); 935 } 936 937#if HACK_FOR_4056224 938 /* 939 * Radar 4056224 940 * Temporary workaround to let send() and recv() work over 941 * a pipe for binary compatibility 942 * This will be removed in the release following Tiger 943 */ 944 if (error == ENOTSOCK) { 945 struct fileproc *fp; 946 947 if (fp_lookup(p, uap->s, &fp, 0) == 0) { 948 (void) fp_drop(p, uap->s, fp, 0); 949 950 if (fp->f_type == DTYPE_PIPE) { 951 struct write_args write_uap; 952 user_ssize_t write_retval; 953 954 if (p->p_pid > last_pid_4056224) { 955 last_pid_4056224 = p->p_pid; 956 957 printf("%s[%d] uses send/recv " 958 "on a pipe\n", p->p_comm, p->p_pid); 959 } 960 961 bzero(&write_uap, sizeof (struct write_args)); 962 write_uap.fd = uap->s; 963 write_uap.cbuf = uap->buf; 964 write_uap.nbyte = uap->len; 965 966 error = write(p, &write_uap, &write_retval); 967 *retval = (int)write_retval; 968 } 969 } 970 } 971#endif /* HACK_FOR_4056224 */ 972 973 KERNEL_DEBUG(DBG_FNC_SENDTO | DBG_FUNC_END, error, *retval, 0, 0, 0); 974 975 return (error); 976} 977 978/* 979 * Returns: 0 Success 980 * ENOBUFS 981 * copyin:EFAULT 982 * sendit:??? [see sendit definition in this file] 983 */ 984int 985sendmsg(struct proc *p, struct sendmsg_args *uap, int32_t *retval) 986{ 987 __pthread_testcancel(1); 988 return(sendmsg_nocancel(p, (struct sendmsg_nocancel_args *)uap, retval)); 989} 990 991int 992sendmsg_nocancel(struct proc *p, struct sendmsg_nocancel_args *uap, int32_t *retval) 993{ 994 struct user32_msghdr msg32; 995 struct user64_msghdr msg64; 996 struct user_msghdr user_msg; 997 caddr_t msghdrp; 998 int size_of_msghdr; 999 int error; 1000 uio_t auio = NULL; 1001 struct user_iovec *iovp; 1002 1003 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_START, 0, 0, 0, 0, 0); 1004 AUDIT_ARG(fd, uap->s); 1005 if (IS_64BIT_PROCESS(p)) { 1006 msghdrp = (caddr_t)&msg64; 1007 size_of_msghdr = sizeof (msg64); 1008 } else { 1009 msghdrp = (caddr_t)&msg32; 1010 size_of_msghdr = sizeof (msg32); 1011 } 1012 error = copyin(uap->msg, msghdrp, size_of_msghdr); 1013 if (error) { 1014 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0); 1015 return (error); 1016 } 1017 1018 if (IS_64BIT_PROCESS(p)) { 1019 user_msg.msg_flags = msg64.msg_flags; 1020 user_msg.msg_controllen = msg64.msg_controllen; 1021 user_msg.msg_control = msg64.msg_control; 1022 user_msg.msg_iovlen = msg64.msg_iovlen; 1023 user_msg.msg_iov = msg64.msg_iov; 1024 user_msg.msg_namelen = msg64.msg_namelen; 1025 user_msg.msg_name = msg64.msg_name; 1026 } else { 1027 user_msg.msg_flags = msg32.msg_flags; 1028 user_msg.msg_controllen = msg32.msg_controllen; 1029 user_msg.msg_control = msg32.msg_control; 1030 user_msg.msg_iovlen = msg32.msg_iovlen; 1031 user_msg.msg_iov = msg32.msg_iov; 1032 user_msg.msg_namelen = msg32.msg_namelen; 1033 user_msg.msg_name = msg32.msg_name; 1034 } 1035 1036 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) { 1037 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, EMSGSIZE, 1038 0, 0, 0, 0); 1039 return (EMSGSIZE); 1040 } 1041 1042 /* allocate a uio large enough to hold the number of iovecs passed */ 1043 auio = uio_create(user_msg.msg_iovlen, 0, 1044 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 1045 UIO_WRITE); 1046 if (auio == NULL) { 1047 error = ENOBUFS; 1048 goto done; 1049 } 1050 1051 if (user_msg.msg_iovlen) { 1052 /* 1053 * get location of iovecs within the uio. 1054 * then copyin the iovecs from user space. 1055 */ 1056 iovp = uio_iovsaddr(auio); 1057 if (iovp == NULL) { 1058 error = ENOBUFS; 1059 goto done; 1060 } 1061 error = copyin_user_iovec_array(user_msg.msg_iov, 1062 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 1063 user_msg.msg_iovlen, iovp); 1064 if (error) 1065 goto done; 1066 user_msg.msg_iov = CAST_USER_ADDR_T(iovp); 1067 1068 /* finish setup of uio_t */ 1069 uio_calculateresid(auio); 1070 } else { 1071 user_msg.msg_iov = 0; 1072 } 1073 1074 /* msg_flags is ignored for send */ 1075 user_msg.msg_flags = 0; 1076 1077 error = sendit(p, uap->s, &user_msg, auio, uap->flags, retval); 1078done: 1079 if (auio != NULL) { 1080 uio_free(auio); 1081 } 1082 KERNEL_DEBUG(DBG_FNC_SENDMSG | DBG_FUNC_END, error, 0, 0, 0, 0); 1083 1084 return (error); 1085} 1086 1087/* 1088 * Returns: 0 Success 1089 * ENOTSOCK 1090 * EINVAL 1091 * EBADF 1092 * EACCES Mandatory Access Control failure 1093 * copyout:EFAULT 1094 * fp_lookup:EBADF 1095 * <pru_soreceive>:ENOBUFS 1096 * <pru_soreceive>:ENOTCONN 1097 * <pru_soreceive>:EWOULDBLOCK 1098 * <pru_soreceive>:EFAULT 1099 * <pru_soreceive>:EINTR 1100 * <pru_soreceive>:EBADF 1101 * <pru_soreceive>:EINVAL 1102 * <pru_soreceive>:EMSGSIZE 1103 * <pru_soreceive>:??? 1104 * 1105 * Notes: Additional return values from calls through <pru_soreceive> 1106 * depend on protocols other than TCP or AF_UNIX, which are 1107 * documented above. 1108 */ 1109static int 1110recvit(struct proc *p, int s, struct user_msghdr *mp, uio_t uiop, 1111 user_addr_t namelenp, int32_t *retval) 1112{ 1113 int len, error; 1114 struct mbuf *m, *control = 0; 1115 user_addr_t ctlbuf; 1116 struct socket *so; 1117 struct sockaddr *fromsa = 0; 1118 struct fileproc *fp; 1119 1120 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_START, 0, 0, 0, 0, 0); 1121 proc_fdlock(p); 1122 if ((error = fp_lookup(p, s, &fp, 1))) { 1123 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0); 1124 proc_fdunlock(p); 1125 return (error); 1126 } 1127 if (fp->f_type != DTYPE_SOCKET) { 1128 fp_drop(p, s, fp, 1); 1129 proc_fdunlock(p); 1130 return (ENOTSOCK); 1131 } 1132 1133 so = (struct socket *)fp->f_data; 1134 if (so == NULL) { 1135 fp_drop(p, s, fp, 1); 1136 proc_fdunlock(p); 1137 return (EBADF); 1138 } 1139 1140 proc_fdunlock(p); 1141 1142#if CONFIG_MACF_SOCKET_SUBSET 1143 /* 1144 * We check the state without holding the socket lock; 1145 * if a race condition occurs, it would simply result 1146 * in an extra call to the MAC check function. 1147 */ 1148 if (!(so->so_state & SS_DEFUNCT) && 1149 !(so->so_state & SS_ISCONNECTED) && 1150 (error = mac_socket_check_receive(kauth_cred_get(), so)) != 0) 1151 goto out1; 1152#endif /* MAC_SOCKET_SUBSET */ 1153 if (uio_resid(uiop) < 0) { 1154 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, EINVAL, 0, 0, 0, 0); 1155 error = EINVAL; 1156 goto out1; 1157 } 1158 1159 len = uio_resid(uiop); 1160 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, uiop, 1161 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0, 1162 &mp->msg_flags); 1163 if (fromsa) 1164 AUDIT_ARG(sockaddr, vfs_context_cwd(vfs_context_current()), 1165 fromsa); 1166 if (error) { 1167 if (uio_resid(uiop) != len && (error == ERESTART || 1168 error == EINTR || error == EWOULDBLOCK)) 1169 error = 0; 1170 } 1171 1172 if (error) 1173 goto out; 1174 1175 *retval = len - uio_resid(uiop); 1176 if (mp->msg_name) { 1177 socklen_t sa_len = 0; 1178 1179 len = mp->msg_namelen; 1180 if (len <= 0 || fromsa == 0) { 1181 len = 0; 1182 } else { 1183#ifndef MIN 1184#define MIN(a, b) ((a) > (b) ? (b) : (a)) 1185#endif 1186 sa_len = fromsa->sa_len; 1187 len = MIN((unsigned int)len, sa_len); 1188 error = copyout(fromsa, mp->msg_name, (unsigned)len); 1189 if (error) 1190 goto out; 1191 } 1192 mp->msg_namelen = sa_len; 1193 /* return the actual, untruncated address length */ 1194 if (namelenp && 1195 (error = copyout((caddr_t)&sa_len, namelenp, 1196 sizeof (int)))) { 1197 goto out; 1198 } 1199 } 1200 if (mp->msg_control) { 1201 len = mp->msg_controllen; 1202 m = control; 1203 mp->msg_controllen = 0; 1204 ctlbuf = mp->msg_control; 1205 1206 while (m && len > 0) { 1207 unsigned int tocopy; 1208 struct cmsghdr *cp = mtod(m, struct cmsghdr *); 1209 int cp_size = CMSG_ALIGN(cp->cmsg_len); 1210 int buflen = m->m_len; 1211 1212 while (buflen > 0 && len > 0) { 1213 1214 /* 1215 SCM_TIMESTAMP hack because struct timeval has a 1216 * different size for 32 bits and 64 bits processes 1217 */ 1218 if (cp->cmsg_level == SOL_SOCKET && cp->cmsg_type == SCM_TIMESTAMP) { 1219 unsigned char tmp_buffer[CMSG_SPACE(sizeof(struct user64_timeval))]; 1220 struct cmsghdr *tmp_cp = (struct cmsghdr *)(void *)tmp_buffer; 1221 int tmp_space; 1222 struct timeval *tv = (struct timeval *)(void *)CMSG_DATA(cp); 1223 1224 tmp_cp->cmsg_level = SOL_SOCKET; 1225 tmp_cp->cmsg_type = SCM_TIMESTAMP; 1226 1227 if (proc_is64bit(p)) { 1228 struct user64_timeval *tv64 = (struct user64_timeval *)(void *)CMSG_DATA(tmp_cp); 1229 1230 tv64->tv_sec = tv->tv_sec; 1231 tv64->tv_usec = tv->tv_usec; 1232 1233 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user64_timeval)); 1234 tmp_space = CMSG_SPACE(sizeof(struct user64_timeval)); 1235 } else { 1236 struct user32_timeval *tv32 = (struct user32_timeval *)(void *)CMSG_DATA(tmp_cp); 1237 1238 tv32->tv_sec = tv->tv_sec; 1239 tv32->tv_usec = tv->tv_usec; 1240 1241 tmp_cp->cmsg_len = CMSG_LEN(sizeof(struct user32_timeval)); 1242 tmp_space = CMSG_SPACE(sizeof(struct user32_timeval)); 1243 } 1244 if (len >= tmp_space) { 1245 tocopy = tmp_space; 1246 } else { 1247 mp->msg_flags |= MSG_CTRUNC; 1248 tocopy = len; 1249 } 1250 error = copyout(tmp_buffer, ctlbuf, tocopy); 1251 if (error) 1252 goto out; 1253 1254 } else { 1255 1256 if (cp_size > buflen) { 1257 panic("cp_size > buflen, something wrong with alignment!"); 1258 } 1259 1260 if (len >= cp_size) { 1261 tocopy = cp_size; 1262 } else { 1263 mp->msg_flags |= MSG_CTRUNC; 1264 tocopy = len; 1265 } 1266 1267 error = copyout((caddr_t) cp, ctlbuf, 1268 tocopy); 1269 if (error) 1270 goto out; 1271 } 1272 1273 1274 ctlbuf += tocopy; 1275 len -= tocopy; 1276 1277 buflen -= cp_size; 1278 cp = (struct cmsghdr *)(void *)((unsigned char *) cp + cp_size); 1279 cp_size = CMSG_ALIGN(cp->cmsg_len); 1280 } 1281 1282 m = m->m_next; 1283 } 1284 mp->msg_controllen = ctlbuf - mp->msg_control; 1285 } 1286out: 1287 if (fromsa) 1288 FREE(fromsa, M_SONAME); 1289 if (control) 1290 m_freem(control); 1291 KERNEL_DEBUG(DBG_FNC_RECVIT | DBG_FUNC_END, error, 0, 0, 0, 0); 1292out1: 1293 fp_drop(p, s, fp, 0); 1294 return (error); 1295} 1296 1297/* 1298 * Returns: 0 Success 1299 * ENOMEM 1300 * copyin:EFAULT 1301 * recvit:??? 1302 * read:??? [4056224: applicable for pipes] 1303 * 1304 * Notes: The read entry point is only called as part of support for 1305 * binary backward compatability; new code should use read 1306 * instead of recv or recvfrom when attempting to read data 1307 * from pipes. 1308 * 1309 * For full documentation of the return codes from recvit, see 1310 * the block header for the recvit function. 1311 */ 1312int 1313recvfrom(struct proc *p, struct recvfrom_args *uap, int32_t *retval) 1314{ 1315 __pthread_testcancel(1); 1316 return(recvfrom_nocancel(p, (struct recvfrom_nocancel_args *)uap, retval)); 1317} 1318 1319int 1320recvfrom_nocancel(struct proc *p, struct recvfrom_nocancel_args *uap, int32_t *retval) 1321{ 1322 struct user_msghdr msg; 1323 int error; 1324 uio_t auio = NULL; 1325 1326 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_START, 0, 0, 0, 0, 0); 1327 AUDIT_ARG(fd, uap->s); 1328 1329 if (uap->fromlenaddr) { 1330 error = copyin(uap->fromlenaddr, 1331 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen)); 1332 if (error) 1333 return (error); 1334 } else { 1335 msg.msg_namelen = 0; 1336 } 1337 msg.msg_name = uap->from; 1338 auio = uio_create(1, 0, 1339 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 1340 UIO_READ); 1341 if (auio == NULL) { 1342 return (ENOMEM); 1343 } 1344 1345 uio_addiov(auio, uap->buf, uap->len); 1346 /* no need to set up msg_iov. recvit uses uio_t we send it */ 1347 msg.msg_iov = 0; 1348 msg.msg_iovlen = 0; 1349 msg.msg_control = 0; 1350 msg.msg_controllen = 0; 1351 msg.msg_flags = uap->flags; 1352 error = recvit(p, uap->s, &msg, auio, uap->fromlenaddr, retval); 1353 if (auio != NULL) { 1354 uio_free(auio); 1355 } 1356 1357#if HACK_FOR_4056224 1358 /* 1359 * Radar 4056224 1360 * Temporary workaround to let send() and recv() work over 1361 * a pipe for binary compatibility 1362 * This will be removed in the release following Tiger 1363 */ 1364 if (error == ENOTSOCK && proc_is64bit(p) == 0) { 1365 struct fileproc *fp; 1366 1367 if (fp_lookup(p, uap->s, &fp, 0) == 0) { 1368 (void) fp_drop(p, uap->s, fp, 0); 1369 1370 if (fp->f_type == DTYPE_PIPE) { 1371 struct read_args read_uap; 1372 user_ssize_t read_retval; 1373 1374 if (p->p_pid > last_pid_4056224) { 1375 last_pid_4056224 = p->p_pid; 1376 1377 printf("%s[%d] uses send/recv on " 1378 "a pipe\n", p->p_comm, p->p_pid); 1379 } 1380 1381 bzero(&read_uap, sizeof (struct read_args)); 1382 read_uap.fd = uap->s; 1383 read_uap.cbuf = uap->buf; 1384 read_uap.nbyte = uap->len; 1385 1386 error = read(p, &read_uap, &read_retval); 1387 *retval = (int)read_retval; 1388 } 1389 } 1390 } 1391#endif /* HACK_FOR_4056224 */ 1392 1393 KERNEL_DEBUG(DBG_FNC_RECVFROM | DBG_FUNC_END, error, 0, 0, 0, 0); 1394 1395 return (error); 1396} 1397 1398/* 1399 * Returns: 0 Success 1400 * EMSGSIZE 1401 * ENOMEM 1402 * copyin:EFAULT 1403 * copyout:EFAULT 1404 * recvit:??? 1405 * 1406 * Notes: For full documentation of the return codes from recvit, see 1407 * the block header for the recvit function. 1408 */ 1409int 1410recvmsg(struct proc *p, struct recvmsg_args *uap, int32_t *retval) 1411{ 1412 __pthread_testcancel(1); 1413 return(recvmsg_nocancel(p, (struct recvmsg_nocancel_args *)uap, retval)); 1414} 1415 1416int 1417recvmsg_nocancel(struct proc *p, struct recvmsg_nocancel_args *uap, int32_t *retval) 1418{ 1419 struct user32_msghdr msg32; 1420 struct user64_msghdr msg64; 1421 struct user_msghdr user_msg; 1422 caddr_t msghdrp; 1423 int size_of_msghdr; 1424 user_addr_t uiov; 1425 int error; 1426 uio_t auio = NULL; 1427 struct user_iovec *iovp; 1428 1429 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_START, 0, 0, 0, 0, 0); 1430 AUDIT_ARG(fd, uap->s); 1431 if (IS_64BIT_PROCESS(p)) { 1432 msghdrp = (caddr_t)&msg64; 1433 size_of_msghdr = sizeof (msg64); 1434 } else { 1435 msghdrp = (caddr_t)&msg32; 1436 size_of_msghdr = sizeof (msg32); 1437 } 1438 error = copyin(uap->msg, msghdrp, size_of_msghdr); 1439 if (error) { 1440 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0); 1441 return (error); 1442 } 1443 1444 /* only need to copy if user process is not 64-bit */ 1445 if (IS_64BIT_PROCESS(p)) { 1446 user_msg.msg_flags = msg64.msg_flags; 1447 user_msg.msg_controllen = msg64.msg_controllen; 1448 user_msg.msg_control = msg64.msg_control; 1449 user_msg.msg_iovlen = msg64.msg_iovlen; 1450 user_msg.msg_iov = msg64.msg_iov; 1451 user_msg.msg_namelen = msg64.msg_namelen; 1452 user_msg.msg_name = msg64.msg_name; 1453 } else { 1454 user_msg.msg_flags = msg32.msg_flags; 1455 user_msg.msg_controllen = msg32.msg_controllen; 1456 user_msg.msg_control = msg32.msg_control; 1457 user_msg.msg_iovlen = msg32.msg_iovlen; 1458 user_msg.msg_iov = msg32.msg_iov; 1459 user_msg.msg_namelen = msg32.msg_namelen; 1460 user_msg.msg_name = msg32.msg_name; 1461 } 1462 1463 if (user_msg.msg_iovlen <= 0 || user_msg.msg_iovlen > UIO_MAXIOV) { 1464 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, EMSGSIZE, 1465 0, 0, 0, 0); 1466 return (EMSGSIZE); 1467 } 1468 1469 user_msg.msg_flags = uap->flags; 1470 1471 /* allocate a uio large enough to hold the number of iovecs passed */ 1472 auio = uio_create(user_msg.msg_iovlen, 0, 1473 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 1474 UIO_READ); 1475 if (auio == NULL) { 1476 error = ENOMEM; 1477 goto done; 1478 } 1479 1480 /* 1481 * get location of iovecs within the uio. then copyin the iovecs from 1482 * user space. 1483 */ 1484 iovp = uio_iovsaddr(auio); 1485 if (iovp == NULL) { 1486 error = ENOMEM; 1487 goto done; 1488 } 1489 uiov = user_msg.msg_iov; 1490 user_msg.msg_iov = CAST_USER_ADDR_T(iovp); 1491 error = copyin_user_iovec_array(uiov, 1492 IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32, 1493 user_msg.msg_iovlen, iovp); 1494 if (error) 1495 goto done; 1496 1497 /* finish setup of uio_t */ 1498 uio_calculateresid(auio); 1499 1500 error = recvit(p, uap->s, &user_msg, auio, 0, retval); 1501 if (!error) { 1502 user_msg.msg_iov = uiov; 1503 if (IS_64BIT_PROCESS(p)) { 1504 msg64.msg_flags = user_msg.msg_flags; 1505 msg64.msg_controllen = user_msg.msg_controllen; 1506 msg64.msg_control = user_msg.msg_control; 1507 msg64.msg_iovlen = user_msg.msg_iovlen; 1508 msg64.msg_iov = user_msg.msg_iov; 1509 msg64.msg_namelen = user_msg.msg_namelen; 1510 msg64.msg_name = user_msg.msg_name; 1511 } else { 1512 msg32.msg_flags = user_msg.msg_flags; 1513 msg32.msg_controllen = user_msg.msg_controllen; 1514 msg32.msg_control = user_msg.msg_control; 1515 msg32.msg_iovlen = user_msg.msg_iovlen; 1516 msg32.msg_iov = user_msg.msg_iov; 1517 msg32.msg_namelen = user_msg.msg_namelen; 1518 msg32.msg_name = user_msg.msg_name; 1519 } 1520 error = copyout(msghdrp, uap->msg, size_of_msghdr); 1521 } 1522done: 1523 if (auio != NULL) { 1524 uio_free(auio); 1525 } 1526 KERNEL_DEBUG(DBG_FNC_RECVMSG | DBG_FUNC_END, error, 0, 0, 0, 0); 1527 return (error); 1528} 1529 1530/* 1531 * Returns: 0 Success 1532 * EBADF 1533 * file_socket:ENOTSOCK 1534 * file_socket:EBADF 1535 * soshutdown:EINVAL 1536 * soshutdown:ENOTCONN 1537 * soshutdown:EADDRNOTAVAIL[TCP] 1538 * soshutdown:ENOBUFS[TCP] 1539 * soshutdown:EMSGSIZE[TCP] 1540 * soshutdown:EHOSTUNREACH[TCP] 1541 * soshutdown:ENETUNREACH[TCP] 1542 * soshutdown:ENETDOWN[TCP] 1543 * soshutdown:ENOMEM[TCP] 1544 * soshutdown:EACCES[TCP] 1545 * soshutdown:EMSGSIZE[TCP] 1546 * soshutdown:ENOBUFS[TCP] 1547 * soshutdown:???[TCP] [ignorable: mostly IPSEC/firewall/DLIL] 1548 * soshutdown:??? [other protocol families] 1549 */ 1550/* ARGSUSED */ 1551int 1552shutdown(__unused struct proc *p, struct shutdown_args *uap, 1553 __unused int32_t *retval) 1554{ 1555 struct socket *so; 1556 int error; 1557 1558 AUDIT_ARG(fd, uap->s); 1559 error = file_socket(uap->s, &so); 1560 if (error) 1561 return (error); 1562 if (so == NULL) { 1563 error = EBADF; 1564 goto out; 1565 } 1566 error = soshutdown((struct socket *)so, uap->how); 1567out: 1568 file_drop(uap->s); 1569 return (error); 1570} 1571 1572/* 1573 * Returns: 0 Success 1574 * EFAULT 1575 * EINVAL 1576 * EACCES Mandatory Access Control failure 1577 * file_socket:ENOTSOCK 1578 * file_socket:EBADF 1579 * sosetopt:EINVAL 1580 * sosetopt:ENOPROTOOPT 1581 * sosetopt:ENOBUFS 1582 * sosetopt:EDOM 1583 * sosetopt:EFAULT 1584 * sosetopt:EOPNOTSUPP[AF_UNIX] 1585 * sosetopt:??? 1586 */ 1587/* ARGSUSED */ 1588int 1589setsockopt(struct proc *p, struct setsockopt_args *uap, 1590 __unused int32_t *retval) 1591{ 1592 struct socket *so; 1593 struct sockopt sopt; 1594 int error; 1595 1596 AUDIT_ARG(fd, uap->s); 1597 if (uap->val == 0 && uap->valsize != 0) 1598 return (EFAULT); 1599 /* No bounds checking on size (it's unsigned) */ 1600 1601 error = file_socket(uap->s, &so); 1602 if (error) 1603 return (error); 1604 1605 sopt.sopt_dir = SOPT_SET; 1606 sopt.sopt_level = uap->level; 1607 sopt.sopt_name = uap->name; 1608 sopt.sopt_val = uap->val; 1609 sopt.sopt_valsize = uap->valsize; 1610 sopt.sopt_p = p; 1611 1612 if (so == NULL) { 1613 error = EINVAL; 1614 goto out; 1615 } 1616#if CONFIG_MACF_SOCKET_SUBSET 1617 if ((error = mac_socket_check_setsockopt(kauth_cred_get(), so, 1618 &sopt)) != 0) 1619 goto out; 1620#endif /* MAC_SOCKET_SUBSET */ 1621 error = sosetopt(so, &sopt); 1622out: 1623 file_drop(uap->s); 1624 return (error); 1625} 1626 1627 1628 1629/* 1630 * Returns: 0 Success 1631 * EINVAL 1632 * EBADF 1633 * EACCES Mandatory Access Control failure 1634 * copyin:EFAULT 1635 * copyout:EFAULT 1636 * file_socket:ENOTSOCK 1637 * file_socket:EBADF 1638 * sogetopt:??? 1639 */ 1640int 1641getsockopt(struct proc *p, struct getsockopt_args *uap, 1642 __unused int32_t *retval) 1643{ 1644 int error; 1645 socklen_t valsize; 1646 struct sockopt sopt; 1647 struct socket *so; 1648 1649 error = file_socket(uap->s, &so); 1650 if (error) 1651 return (error); 1652 if (uap->val) { 1653 error = copyin(uap->avalsize, (caddr_t)&valsize, 1654 sizeof (valsize)); 1655 if (error) 1656 goto out; 1657 /* No bounds checking on size (it's unsigned) */ 1658 } else { 1659 valsize = 0; 1660 } 1661 sopt.sopt_dir = SOPT_GET; 1662 sopt.sopt_level = uap->level; 1663 sopt.sopt_name = uap->name; 1664 sopt.sopt_val = uap->val; 1665 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */ 1666 sopt.sopt_p = p; 1667 1668 if (so == NULL) { 1669 error = EBADF; 1670 goto out; 1671 } 1672#if CONFIG_MACF_SOCKET_SUBSET 1673 if ((error = mac_socket_check_getsockopt(kauth_cred_get(), so, 1674 &sopt)) != 0) 1675 goto out; 1676#endif /* MAC_SOCKET_SUBSET */ 1677 error = sogetopt((struct socket *)so, &sopt); 1678 if (error == 0) { 1679 valsize = sopt.sopt_valsize; 1680 error = copyout((caddr_t)&valsize, uap->avalsize, 1681 sizeof (valsize)); 1682 } 1683out: 1684 file_drop(uap->s); 1685 return (error); 1686} 1687 1688 1689/* 1690 * Get socket name. 1691 * 1692 * Returns: 0 Success 1693 * EBADF 1694 * file_socket:ENOTSOCK 1695 * file_socket:EBADF 1696 * copyin:EFAULT 1697 * copyout:EFAULT 1698 * <pru_sockaddr>:ENOBUFS[TCP] 1699 * <pru_sockaddr>:ECONNRESET[TCP] 1700 * <pru_sockaddr>:EINVAL[AF_UNIX] 1701 * <sf_getsockname>:??? 1702 */ 1703/* ARGSUSED */ 1704int 1705getsockname(__unused struct proc *p, struct getsockname_args *uap, 1706 __unused int32_t *retval) 1707{ 1708 struct socket *so; 1709 struct sockaddr *sa; 1710 socklen_t len; 1711 socklen_t sa_len; 1712 int error; 1713 1714 error = file_socket(uap->fdes, &so); 1715 if (error) 1716 return (error); 1717 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t)); 1718 if (error) 1719 goto out; 1720 if (so == NULL) { 1721 error = EBADF; 1722 goto out; 1723 } 1724 sa = 0; 1725 socket_lock(so, 1); 1726 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); 1727 if (error == 0) { 1728 error = sflt_getsockname(so, &sa); 1729 if (error == EJUSTRETURN) 1730 error = 0; 1731 } 1732 socket_unlock(so, 1); 1733 if (error) 1734 goto bad; 1735 if (sa == 0) { 1736 len = 0; 1737 goto gotnothing; 1738 } 1739 1740 sa_len = sa->sa_len; 1741 len = MIN(len, sa_len); 1742 error = copyout((caddr_t)sa, uap->asa, len); 1743 if (error) 1744 goto bad; 1745 /* return the actual, untruncated address length */ 1746 len = sa_len; 1747gotnothing: 1748 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t)); 1749bad: 1750 if (sa) 1751 FREE(sa, M_SONAME); 1752out: 1753 file_drop(uap->fdes); 1754 return (error); 1755} 1756 1757/* 1758 * Get name of peer for connected socket. 1759 * 1760 * Returns: 0 Success 1761 * EBADF 1762 * EINVAL 1763 * ENOTCONN 1764 * file_socket:ENOTSOCK 1765 * file_socket:EBADF 1766 * copyin:EFAULT 1767 * copyout:EFAULT 1768 * <pru_peeraddr>:??? 1769 * <sf_getpeername>:??? 1770 */ 1771/* ARGSUSED */ 1772int 1773getpeername(__unused struct proc *p, struct getpeername_args *uap, 1774 __unused int32_t *retval) 1775{ 1776 struct socket *so; 1777 struct sockaddr *sa; 1778 socklen_t len; 1779 socklen_t sa_len; 1780 int error; 1781 1782 error = file_socket(uap->fdes, &so); 1783 if (error) 1784 return (error); 1785 if (so == NULL) { 1786 error = EBADF; 1787 goto out; 1788 } 1789 1790 socket_lock(so, 1); 1791 1792 if ((so->so_state & (SS_CANTRCVMORE | SS_CANTSENDMORE)) == 1793 (SS_CANTRCVMORE | SS_CANTSENDMORE)) { 1794 /* the socket has been shutdown, no more getpeername's */ 1795 socket_unlock(so, 1); 1796 error = EINVAL; 1797 goto out; 1798 } 1799 1800 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { 1801 socket_unlock(so, 1); 1802 error = ENOTCONN; 1803 goto out; 1804 } 1805 error = copyin(uap->alen, (caddr_t)&len, sizeof (socklen_t)); 1806 if (error) { 1807 socket_unlock(so, 1); 1808 goto out; 1809 } 1810 sa = 0; 1811 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); 1812 if (error == 0) { 1813 error = sflt_getpeername(so, &sa); 1814 if (error == EJUSTRETURN) 1815 error = 0; 1816 } 1817 socket_unlock(so, 1); 1818 if (error) 1819 goto bad; 1820 if (sa == 0) { 1821 len = 0; 1822 goto gotnothing; 1823 } 1824 sa_len = sa->sa_len; 1825 len = MIN(len, sa_len); 1826 error = copyout(sa, uap->asa, len); 1827 if (error) 1828 goto bad; 1829 /* return the actual, untruncated address length */ 1830 len = sa_len; 1831gotnothing: 1832 error = copyout((caddr_t)&len, uap->alen, sizeof (socklen_t)); 1833bad: 1834 if (sa) FREE(sa, M_SONAME); 1835out: 1836 file_drop(uap->fdes); 1837 return (error); 1838} 1839 1840int 1841sockargs(struct mbuf **mp, user_addr_t data, int buflen, int type) 1842{ 1843 struct sockaddr *sa; 1844 struct mbuf *m; 1845 int error; 1846 1847 size_t alloc_buflen = (size_t)buflen; 1848 1849 if(alloc_buflen > INT_MAX/2) 1850 return (EINVAL); 1851#ifdef __LP64__ 1852 /* The fd's in the buffer must expand to be pointers, thus we need twice as much space */ 1853 if(type == MT_CONTROL) 1854 alloc_buflen = ((buflen - sizeof(struct cmsghdr))*2) + sizeof(struct cmsghdr); 1855#endif 1856 if (alloc_buflen > MLEN) { 1857 if (type == MT_SONAME && alloc_buflen <= 112) 1858 alloc_buflen = MLEN; /* unix domain compat. hack */ 1859 else if (alloc_buflen > MCLBYTES) 1860 return (EINVAL); 1861 } 1862 m = m_get(M_WAIT, type); 1863 if (m == NULL) 1864 return (ENOBUFS); 1865 if (alloc_buflen > MLEN) { 1866 MCLGET(m, M_WAIT); 1867 if ((m->m_flags & M_EXT) == 0) { 1868 m_free(m); 1869 return (ENOBUFS); 1870 } 1871 } 1872 /* K64: We still copyin the original buflen because it gets expanded later 1873 * and we lie about the size of the mbuf because it only affects unp_* functions 1874 */ 1875 m->m_len = buflen; 1876 error = copyin(data, mtod(m, caddr_t), (u_int)buflen); 1877 if (error) { 1878 (void) m_free(m); 1879 } else { 1880 *mp = m; 1881 if (type == MT_SONAME) { 1882 sa = mtod(m, struct sockaddr *); 1883 sa->sa_len = buflen; 1884 } 1885 } 1886 return (error); 1887} 1888 1889/* 1890 * Given a user_addr_t of length len, allocate and fill out a *sa. 1891 * 1892 * Returns: 0 Success 1893 * ENAMETOOLONG Filename too long 1894 * EINVAL Invalid argument 1895 * ENOMEM Not enough space 1896 * copyin:EFAULT Bad address 1897 */ 1898static int 1899getsockaddr(struct socket *so, struct sockaddr **namp, user_addr_t uaddr, 1900 size_t len, boolean_t translate_unspec) 1901{ 1902 struct sockaddr *sa; 1903 int error; 1904 1905 if (len > SOCK_MAXADDRLEN) 1906 return (ENAMETOOLONG); 1907 1908 if (len < offsetof(struct sockaddr, sa_data[0])) 1909 return (EINVAL); 1910 1911 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK | M_ZERO); 1912 if (sa == NULL) { 1913 return (ENOMEM); 1914 } 1915 error = copyin(uaddr, (caddr_t)sa, len); 1916 if (error) { 1917 FREE(sa, M_SONAME); 1918 } else { 1919 /* 1920 * Force sa_family to AF_INET on AF_INET sockets to handle 1921 * legacy applications that use AF_UNSPEC (0). On all other 1922 * sockets we leave it unchanged and let the lower layer 1923 * handle it. 1924 */ 1925 if (translate_unspec && sa->sa_family == AF_UNSPEC && 1926 INP_CHECK_SOCKAF(so, AF_INET) && 1927 len == sizeof (struct sockaddr_in)) 1928 sa->sa_family = AF_INET; 1929 1930 sa->sa_len = len; 1931 *namp = sa; 1932 } 1933 return (error); 1934} 1935 1936static int 1937getsockaddr_s(struct socket *so, struct sockaddr_storage *ss, 1938 user_addr_t uaddr, size_t len, boolean_t translate_unspec) 1939{ 1940 int error; 1941 1942 if (ss == NULL || uaddr == USER_ADDR_NULL || 1943 len < offsetof(struct sockaddr, sa_data[0])) 1944 return (EINVAL); 1945 1946 /* 1947 * sockaddr_storage size is less than SOCK_MAXADDRLEN, 1948 * so the check here is inclusive. 1949 */ 1950 if (len > sizeof (*ss)) 1951 return (ENAMETOOLONG); 1952 1953 bzero(ss, sizeof (*ss)); 1954 error = copyin(uaddr, (caddr_t)ss, len); 1955 if (error == 0) { 1956 /* 1957 * Force sa_family to AF_INET on AF_INET sockets to handle 1958 * legacy applications that use AF_UNSPEC (0). On all other 1959 * sockets we leave it unchanged and let the lower layer 1960 * handle it. 1961 */ 1962 if (translate_unspec && ss->ss_family == AF_UNSPEC && 1963 INP_CHECK_SOCKAF(so, AF_INET) && 1964 len == sizeof (struct sockaddr_in)) 1965 ss->ss_family = AF_INET; 1966 1967 ss->ss_len = len; 1968 } 1969 return (error); 1970} 1971 1972#if SENDFILE 1973 1974SYSCTL_DECL(_kern_ipc); 1975 1976#define SFUIOBUFS 64 1977static int sendfileuiobufs = SFUIOBUFS; 1978SYSCTL_INT(_kern_ipc, OID_AUTO, sendfileuiobufs, CTLFLAG_RW | CTLFLAG_LOCKED, &sendfileuiobufs, 1979 0, ""); 1980 1981/* Macros to compute the number of mbufs needed depending on cluster size */ 1982#define HOWMANY_16K(n) ((((unsigned int)(n) - 1) >> (PGSHIFT + 2)) + 1) 1983#define HOWMANY_4K(n) ((((unsigned int)(n) - 1) >> PGSHIFT) + 1) 1984 1985/* Upper send limit in bytes (sendfileuiobufs * PAGESIZE) */ 1986#define SENDFILE_MAX_BYTES (sendfileuiobufs << PGSHIFT) 1987 1988/* Upper send limit in the number of mbuf clusters */ 1989#define SENDFILE_MAX_16K HOWMANY_16K(SENDFILE_MAX_BYTES) 1990#define SENDFILE_MAX_4K HOWMANY_4K(SENDFILE_MAX_BYTES) 1991 1992size_t mbuf_pkt_maxlen(mbuf_t m); 1993 1994__private_extern__ size_t 1995mbuf_pkt_maxlen(mbuf_t m) 1996{ 1997 size_t maxlen = 0; 1998 1999 while (m) { 2000 maxlen += mbuf_maxlen(m); 2001 m = mbuf_next(m); 2002 } 2003 return (maxlen); 2004} 2005 2006static void 2007alloc_sendpkt(int how, size_t pktlen, unsigned int *maxchunks, 2008 struct mbuf **m, boolean_t jumbocl) 2009{ 2010 unsigned int needed; 2011 2012 if (pktlen == 0) 2013 panic("%s: pktlen (%ld) must be non-zero\n", __func__, pktlen); 2014 2015 /* 2016 * Try to allocate for the whole thing. Since we want full control 2017 * over the buffer size and be able to accept partial result, we can't 2018 * use mbuf_allocpacket(). The logic below is similar to sosend(). 2019 */ 2020 *m = NULL; 2021 if (pktlen > MBIGCLBYTES && jumbocl) { 2022 needed = MIN(SENDFILE_MAX_16K, HOWMANY_16K(pktlen)); 2023 *m = m_getpackets_internal(&needed, 1, how, 0, M16KCLBYTES); 2024 } 2025 if (*m == NULL) { 2026 needed = MIN(SENDFILE_MAX_4K, HOWMANY_4K(pktlen)); 2027 *m = m_getpackets_internal(&needed, 1, how, 0, MBIGCLBYTES); 2028 } 2029 2030 /* 2031 * Our previous attempt(s) at allocation had failed; the system 2032 * may be short on mbufs, and we want to block until they are 2033 * available. This time, ask just for 1 mbuf and don't return 2034 * until we get it. 2035 */ 2036 if (*m == NULL) { 2037 needed = 1; 2038 *m = m_getpackets_internal(&needed, 1, M_WAIT, 1, MBIGCLBYTES); 2039 } 2040 if (*m == NULL) 2041 panic("%s: blocking allocation returned NULL\n", __func__); 2042 2043 *maxchunks = needed; 2044} 2045 2046/* 2047 * sendfile(2). 2048 * int sendfile(int fd, int s, off_t offset, off_t *nbytes, 2049 * struct sf_hdtr *hdtr, int flags) 2050 * 2051 * Send a file specified by 'fd' and starting at 'offset' to a socket 2052 * specified by 's'. Send only '*nbytes' of the file or until EOF if 2053 * *nbytes == 0. Optionally add a header and/or trailer to the socket 2054 * output. If specified, write the total number of bytes sent into *nbytes. 2055 */ 2056int 2057sendfile(struct proc *p, struct sendfile_args *uap, __unused int *retval) 2058{ 2059 struct fileproc *fp; 2060 struct vnode *vp; 2061 struct socket *so; 2062 struct writev_nocancel_args nuap; 2063 user_ssize_t writev_retval; 2064 struct user_sf_hdtr user_hdtr; 2065 struct user32_sf_hdtr user32_hdtr; 2066 struct user64_sf_hdtr user64_hdtr; 2067 off_t off, xfsize; 2068 off_t nbytes = 0, sbytes = 0; 2069 int error = 0; 2070 size_t sizeof_hdtr; 2071 off_t file_size; 2072 struct vfs_context context = *vfs_context_current(); 2073#define ENXIO_10146739_DBG(err_str) { \ 2074 if (error == ENXIO) { \ 2075 printf(err_str, \ 2076 __func__, \ 2077 "File a radar related to rdar://10146739 \n"); \ 2078 } \ 2079} 2080 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_START), uap->s, 2081 0, 0, 0, 0); 2082 2083 AUDIT_ARG(fd, uap->fd); 2084 AUDIT_ARG(value32, uap->s); 2085 2086 /* 2087 * Do argument checking. Must be a regular file in, stream 2088 * type and connected socket out, positive offset. 2089 */ 2090 if ((error = fp_getfvp(p, uap->fd, &fp, &vp))) { 2091 ENXIO_10146739_DBG("%s: fp_getfvp error. %s"); 2092 goto done; 2093 } 2094 if ((fp->f_flag & FREAD) == 0) { 2095 error = EBADF; 2096 goto done1; 2097 } 2098 if (vnode_isreg(vp) == 0) { 2099 error = ENOTSUP; 2100 goto done1; 2101 } 2102 error = file_socket(uap->s, &so); 2103 if (error) { 2104 ENXIO_10146739_DBG("%s: file_socket error. %s"); 2105 goto done1; 2106 } 2107 if (so == NULL) { 2108 error = EBADF; 2109 goto done2; 2110 } 2111 if (so->so_type != SOCK_STREAM) { 2112 error = EINVAL; 2113 goto done2; 2114 } 2115 if ((so->so_state & SS_ISCONNECTED) == 0) { 2116 error = ENOTCONN; 2117 goto done2; 2118 } 2119 if (uap->offset < 0) { 2120 error = EINVAL; 2121 goto done2; 2122 } 2123 if (uap->nbytes == USER_ADDR_NULL) { 2124 error = EINVAL; 2125 goto done2; 2126 } 2127 if (uap->flags != 0) { 2128 error = EINVAL; 2129 goto done2; 2130 } 2131 2132 context.vc_ucred = fp->f_fglob->fg_cred; 2133 2134#if CONFIG_MACF_SOCKET_SUBSET 2135 /* JMM - fetch connected sockaddr? */ 2136 error = mac_socket_check_send(context.vc_ucred, so, NULL); 2137 if (error) 2138 goto done2; 2139#endif 2140 2141 /* 2142 * Get number of bytes to send 2143 * Should it applies to size of header and trailer? 2144 * JMM - error handling? 2145 */ 2146 copyin(uap->nbytes, &nbytes, sizeof (off_t)); 2147 2148 /* 2149 * If specified, get the pointer to the sf_hdtr struct for 2150 * any headers/trailers. 2151 */ 2152 if (uap->hdtr != USER_ADDR_NULL) { 2153 caddr_t hdtrp; 2154 2155 bzero(&user_hdtr, sizeof (user_hdtr)); 2156 if (IS_64BIT_PROCESS(p)) { 2157 hdtrp = (caddr_t)&user64_hdtr; 2158 sizeof_hdtr = sizeof (user64_hdtr); 2159 } else { 2160 hdtrp = (caddr_t)&user32_hdtr; 2161 sizeof_hdtr = sizeof (user32_hdtr); 2162 } 2163 error = copyin(uap->hdtr, hdtrp, sizeof_hdtr); 2164 if (error) 2165 goto done2; 2166 if (IS_64BIT_PROCESS(p)) { 2167 user_hdtr.headers = user64_hdtr.headers; 2168 user_hdtr.hdr_cnt = user64_hdtr.hdr_cnt; 2169 user_hdtr.trailers = user64_hdtr.trailers; 2170 user_hdtr.trl_cnt = user64_hdtr.trl_cnt; 2171 } else { 2172 user_hdtr.headers = user32_hdtr.headers; 2173 user_hdtr.hdr_cnt = user32_hdtr.hdr_cnt; 2174 user_hdtr.trailers = user32_hdtr.trailers; 2175 user_hdtr.trl_cnt = user32_hdtr.trl_cnt; 2176 } 2177 2178 /* 2179 * Send any headers. Wimp out and use writev(2). 2180 */ 2181 if (user_hdtr.headers != USER_ADDR_NULL) { 2182 bzero(&nuap, sizeof (struct writev_args)); 2183 nuap.fd = uap->s; 2184 nuap.iovp = user_hdtr.headers; 2185 nuap.iovcnt = user_hdtr.hdr_cnt; 2186 error = writev_nocancel(p, &nuap, &writev_retval); 2187 if (error) { 2188 ENXIO_10146739_DBG("%s: writev_nocancel error. %s"); 2189 goto done2; 2190 } 2191 sbytes += writev_retval; 2192 } 2193 } 2194 2195 /* 2196 * Get the file size for 2 reasons: 2197 * 1. We don't want to allocate more mbufs than necessary 2198 * 2. We don't want to read past the end of file 2199 */ 2200 if ((error = vnode_size(vp, &file_size, vfs_context_current())) != 0) { 2201 ENXIO_10146739_DBG("%s: vnode_size error. %s"); 2202 goto done2; 2203 } 2204 2205 /* 2206 * Simply read file data into a chain of mbufs that used with scatter 2207 * gather reads. We're not (yet?) setup to use zero copy external 2208 * mbufs that point to the file pages. 2209 */ 2210 socket_lock(so, 1); 2211 error = sblock(&so->so_snd, M_WAIT); 2212 if (error) { 2213 socket_unlock(so, 1); 2214 goto done2; 2215 } 2216 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) { 2217 mbuf_t m0 = NULL, m; 2218 unsigned int nbufs = sendfileuiobufs, i; 2219 uio_t auio; 2220 char uio_buf[UIO_SIZEOF(sendfileuiobufs)]; /* 1 KB !!! */ 2221 size_t uiolen; 2222 user_ssize_t rlen; 2223 off_t pgoff; 2224 size_t pktlen; 2225 boolean_t jumbocl; 2226 2227 /* 2228 * Calculate the amount to transfer. 2229 * Align to round number of pages. 2230 * Not to exceed send socket buffer, 2231 * the EOF, or the passed in nbytes. 2232 */ 2233 xfsize = sbspace(&so->so_snd); 2234 2235 if (xfsize <= 0) { 2236 if (so->so_state & SS_CANTSENDMORE) { 2237 error = EPIPE; 2238 goto done3; 2239 } else if ((so->so_state & SS_NBIO)) { 2240 error = EAGAIN; 2241 goto done3; 2242 } else { 2243 xfsize = PAGE_SIZE; 2244 } 2245 } 2246 2247 if (xfsize > SENDFILE_MAX_BYTES) 2248 xfsize = SENDFILE_MAX_BYTES; 2249 else if (xfsize > PAGE_SIZE) 2250 xfsize = trunc_page(xfsize); 2251 pgoff = off & PAGE_MASK_64; 2252 if (pgoff > 0 && PAGE_SIZE - pgoff < xfsize) 2253 xfsize = PAGE_SIZE_64 - pgoff; 2254 if (nbytes && xfsize > (nbytes - sbytes)) 2255 xfsize = nbytes - sbytes; 2256 if (xfsize <= 0) 2257 break; 2258 if (off + xfsize > file_size) 2259 xfsize = file_size - off; 2260 if (xfsize <= 0) 2261 break; 2262 2263 /* 2264 * Attempt to use larger than system page-size clusters for 2265 * large writes only if there is a jumbo cluster pool and 2266 * if the socket is marked accordingly. 2267 */ 2268 jumbocl = sosendjcl && njcl > 0 && 2269 ((so->so_flags & SOF_MULTIPAGES) || sosendjcl_ignore_capab); 2270 2271 socket_unlock(so, 0); 2272 alloc_sendpkt(M_WAIT, xfsize, &nbufs, &m0, jumbocl); 2273 pktlen = mbuf_pkt_maxlen(m0); 2274 if (pktlen < (size_t)xfsize) 2275 xfsize = pktlen; 2276 2277 auio = uio_createwithbuffer(nbufs, off, UIO_SYSSPACE, 2278 UIO_READ, &uio_buf[0], sizeof (uio_buf)); 2279 if (auio == NULL) { 2280 printf("sendfile failed. nbufs = %d. %s", nbufs, 2281 "File a radar related to rdar://10146739.\n"); 2282 mbuf_freem(m0); 2283 error = ENXIO; 2284 socket_lock(so, 0); 2285 goto done3; 2286 } 2287 2288 for (i = 0, m = m0, uiolen = 0; 2289 i < nbufs && m != NULL && uiolen < (size_t)xfsize; 2290 i++, m = mbuf_next(m)) { 2291 size_t mlen = mbuf_maxlen(m); 2292 2293 if (mlen + uiolen > (size_t)xfsize) 2294 mlen = xfsize - uiolen; 2295 mbuf_setlen(m, mlen); 2296 uio_addiov(auio, CAST_USER_ADDR_T(mbuf_datastart(m)), 2297 mlen); 2298 uiolen += mlen; 2299 } 2300 2301 if (xfsize != uio_resid(auio)) 2302 printf("sendfile: xfsize: %lld != uio_resid(auio): " 2303 "%lld\n", xfsize, (long long)uio_resid(auio)); 2304 2305 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_START), 2306 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff), 2307 (unsigned int)(xfsize & 0x0ffffffff), 0, 0); 2308 error = fo_read(fp, auio, FOF_OFFSET, &context); 2309 socket_lock(so, 0); 2310 if (error != 0) { 2311 if (uio_resid(auio) != xfsize && (error == ERESTART || 2312 error == EINTR || error == EWOULDBLOCK)) { 2313 error = 0; 2314 } else { 2315 ENXIO_10146739_DBG("%s: fo_read error. %s"); 2316 mbuf_freem(m0); 2317 goto done3; 2318 } 2319 } 2320 xfsize -= uio_resid(auio); 2321 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_READ | DBG_FUNC_END), 2322 uap->s, (unsigned int)((xfsize >> 32) & 0x0ffffffff), 2323 (unsigned int)(xfsize & 0x0ffffffff), 0, 0); 2324 2325 if (xfsize == 0) { 2326 //printf("sendfile: fo_read 0 bytes, EOF\n"); 2327 break; 2328 } 2329 if (xfsize + off > file_size) 2330 printf("sendfile: xfsize: %lld + off: %lld > file_size:" 2331 "%lld\n", xfsize, off, file_size); 2332 for (i = 0, m = m0, rlen = 0; 2333 i < nbufs && m != NULL && rlen < xfsize; 2334 i++, m = mbuf_next(m)) { 2335 size_t mlen = mbuf_maxlen(m); 2336 2337 if (rlen + mlen > (size_t)xfsize) 2338 mlen = xfsize - rlen; 2339 mbuf_setlen(m, mlen); 2340 2341 rlen += mlen; 2342 } 2343 mbuf_pkthdr_setlen(m0, xfsize); 2344 2345retry_space: 2346 /* 2347 * Make sure that the socket is still able to take more data. 2348 * CANTSENDMORE being true usually means that the connection 2349 * was closed. so_error is true when an error was sensed after 2350 * a previous send. 2351 * The state is checked after the page mapping and buffer 2352 * allocation above since those operations may block and make 2353 * any socket checks stale. From this point forward, nothing 2354 * blocks before the pru_send (or more accurately, any blocking 2355 * results in a loop back to here to re-check). 2356 */ 2357 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) { 2358 if (so->so_state & SS_CANTSENDMORE) { 2359 error = EPIPE; 2360 } else { 2361 error = so->so_error; 2362 so->so_error = 0; 2363 } 2364 m_freem(m0); 2365 ENXIO_10146739_DBG("%s: Unexpected socket error. %s"); 2366 goto done3; 2367 } 2368 /* 2369 * Wait for socket space to become available. We do this just 2370 * after checking the connection state above in order to avoid 2371 * a race condition with sbwait(). 2372 */ 2373 if (sbspace(&so->so_snd) < (long)so->so_snd.sb_lowat) { 2374 if (so->so_state & SS_NBIO) { 2375 m_freem(m0); 2376 error = EAGAIN; 2377 goto done3; 2378 } 2379 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT | 2380 DBG_FUNC_START), uap->s, 0, 0, 0, 0); 2381 error = sbwait(&so->so_snd); 2382 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_WAIT| 2383 DBG_FUNC_END), uap->s, 0, 0, 0, 0); 2384 /* 2385 * An error from sbwait usually indicates that we've 2386 * been interrupted by a signal. If we've sent anything 2387 * then return bytes sent, otherwise return the error. 2388 */ 2389 if (error) { 2390 m_freem(m0); 2391 goto done3; 2392 } 2393 goto retry_space; 2394 } 2395 2396 struct mbuf *control = NULL; 2397 { 2398 /* 2399 * Socket filter processing 2400 */ 2401 2402 error = sflt_data_out(so, NULL, &m0, &control, 0); 2403 if (error) { 2404 if (error == EJUSTRETURN) { 2405 error = 0; 2406 continue; 2407 } 2408 ENXIO_10146739_DBG("%s: sflt_data_out error. %s"); 2409 goto done3; 2410 } 2411 /* 2412 * End Socket filter processing 2413 */ 2414 } 2415 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START), 2416 uap->s, 0, 0, 0, 0); 2417 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m0, 2418 0, control, p); 2419 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE_SEND | DBG_FUNC_START), 2420 uap->s, 0, 0, 0, 0); 2421 if (error) { 2422 ENXIO_10146739_DBG("%s: pru_send error. %s"); 2423 goto done3; 2424 } 2425 } 2426 sbunlock(&so->so_snd, 0); /* will unlock socket */ 2427 /* 2428 * Send trailers. Wimp out and use writev(2). 2429 */ 2430 if (uap->hdtr != USER_ADDR_NULL && 2431 user_hdtr.trailers != USER_ADDR_NULL) { 2432 bzero(&nuap, sizeof (struct writev_args)); 2433 nuap.fd = uap->s; 2434 nuap.iovp = user_hdtr.trailers; 2435 nuap.iovcnt = user_hdtr.trl_cnt; 2436 error = writev_nocancel(p, &nuap, &writev_retval); 2437 if (error) { 2438 ENXIO_10146739_DBG("%s: writev_nocancel error. %s"); 2439 goto done2; 2440 } 2441 sbytes += writev_retval; 2442 } 2443done2: 2444 file_drop(uap->s); 2445done1: 2446 file_drop(uap->fd); 2447done: 2448 if (uap->nbytes != USER_ADDR_NULL) { 2449 /* XXX this appears bogus for some early failure conditions */ 2450 copyout(&sbytes, uap->nbytes, sizeof (off_t)); 2451 } 2452 KERNEL_DEBUG_CONSTANT((DBG_FNC_SENDFILE | DBG_FUNC_END), uap->s, 2453 (unsigned int)((sbytes >> 32) & 0x0ffffffff), 2454 (unsigned int)(sbytes & 0x0ffffffff), error, 0); 2455 return (error); 2456done3: 2457 sbunlock(&so->so_snd, 0); /* will unlock socket */ 2458 goto done2; 2459} 2460 2461 2462#endif /* SENDFILE */ 2463