1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1982, 1986, 1989, 1993 31 * The Regents of the University of California. All rights reserved. 32 * (c) UNIX System Laboratories, Inc. 33 * All or some portions of this file are derived from material licensed 34 * to the University of California by American Telephone and Telegraph 35 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 36 * the permission of UNIX System Laboratories, Inc. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95 67 */ 68/* 69 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce 70 * support for mandatory and extensible security protections. This notice 71 * is included in support of clause 2.2 (b) of the Apple Public License, 72 * Version 2.0. 73 */ 74 75#include <sys/param.h> 76#include <sys/systm.h> 77#include <sys/filedesc.h> 78#include <sys/ioctl.h> 79#include <sys/file_internal.h> 80#include <sys/proc_internal.h> 81#include <sys/socketvar.h> 82#include <sys/uio_internal.h> 83#include <sys/kernel.h> 84#include <sys/stat.h> 85#include <sys/malloc.h> 86#include <sys/sysproto.h> 87 88#include <sys/mount_internal.h> 89#include <sys/protosw.h> 90#include <sys/ev.h> 91#include <sys/user.h> 92#include <sys/kdebug.h> 93#include <sys/poll.h> 94#include <sys/event.h> 95#include <sys/eventvar.h> 96 97#include <mach/mach_types.h> 98#include <kern/kern_types.h> 99#include <kern/assert.h> 100#include <kern/kalloc.h> 101#include <kern/thread.h> 102#include <kern/clock.h> 103 104#include <sys/mbuf.h> 105#include <sys/socket.h> 106#include <sys/socketvar.h> 107#include <sys/errno.h> 108#include <sys/syscall.h> 109#include <sys/pipe.h> 110 111#include <bsm/audit_kernel.h> 112 113#include <net/if.h> 114#include <net/route.h> 115 116#include <netinet/in.h> 117#include <netinet/in_systm.h> 118#include <netinet/ip.h> 119#include <netinet/in_pcb.h> 120#include <netinet/ip_var.h> 121#include <netinet/ip6.h> 122#include <netinet/tcp.h> 123#include <netinet/tcp_fsm.h> 124#include <netinet/tcp_seq.h> 125#include <netinet/tcp_timer.h> 126#include <netinet/tcp_var.h> 127#include <netinet/tcpip.h> 128#include <netinet/tcp_debug.h> 129/* for wait queue based select */ 130#include <kern/wait_queue.h> 131#include <kern/kalloc.h> 132#include <sys/vnode_internal.h> 133 134/* XXX should be in a header file somewhere */ 135void evsofree(struct socket *); 136void evpipefree(struct pipe *); 137void postpipeevent(struct pipe *, int); 138void postevent(struct socket *, struct sockbuf *, int); 139extern kern_return_t IOBSDGetPlatformUUID(__darwin_uuid_t uuid, mach_timespec_t timeoutp); 140 141int rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval); 142int wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval); 143extern void *get_bsduthreadarg(thread_t); 144extern int *get_bsduthreadrval(thread_t); 145 146__private_extern__ int dofileread(vfs_context_t ctx, struct fileproc *fp, 147 user_addr_t bufp, user_size_t nbyte, 148 off_t offset, int flags, user_ssize_t *retval); 149__private_extern__ int dofilewrite(vfs_context_t ctx, struct fileproc *fp, 150 user_addr_t bufp, user_size_t nbyte, 151 off_t offset, int flags, user_ssize_t *retval); 152__private_extern__ int preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_vnode); 153__private_extern__ void donefileread(struct proc *p, struct fileproc *fp_ret, int fd); 154 155#if NETAT 156extern int appletalk_inited; 157#endif /* NETAT */ 158 159#define f_flag f_fglob->fg_flag 160#define f_type f_fglob->fg_type 161#define f_msgcount f_fglob->fg_msgcount 162#define f_cred f_fglob->fg_cred 163#define f_ops f_fglob->fg_ops 164#define f_offset f_fglob->fg_offset 165#define f_data f_fglob->fg_data 166 167/* 168 * Read system call. 169 * 170 * Returns: 0 Success 171 * preparefileread:EBADF 172 * preparefileread:ESPIPE 173 * preparefileread:ENXIO 174 * preparefileread:EBADF 175 * dofileread:??? 176 */ 177int 178read(struct proc *p, struct read_args *uap, user_ssize_t *retval) 179{ 180 __pthread_testcancel(1); 181 return(read_nocancel(p, (struct read_nocancel_args *)uap, retval)); 182} 183 184int 185read_nocancel(struct proc *p, struct read_nocancel_args *uap, user_ssize_t *retval) 186{ 187 struct fileproc *fp; 188 int error; 189 int fd = uap->fd; 190 191 if ( (error = preparefileread(p, &fp, fd, 0)) ) 192 return (error); 193 194 error = dofileread(vfs_context_current(), fp, uap->cbuf, uap->nbyte, 195 (off_t)-1, 0, retval); 196 197 donefileread(p, fp, fd); 198 199 return (error); 200} 201 202/* 203 * Pread system call 204 * 205 * Returns: 0 Success 206 * preparefileread:EBADF 207 * preparefileread:ESPIPE 208 * preparefileread:ENXIO 209 * preparefileread:EBADF 210 * dofileread:??? 211 */ 212int 213pread(struct proc *p, struct pread_args *uap, user_ssize_t *retval) 214{ 215 __pthread_testcancel(1); 216 return(pread_nocancel(p, (struct pread_nocancel_args *)uap, retval)); 217} 218 219int 220pread_nocancel(struct proc *p, struct pread_nocancel_args *uap, user_ssize_t *retval) 221{ 222 struct fileproc *fp = NULL; /* fp set by preparefileread() */ 223 int fd = uap->fd; 224 int error; 225 226 if ( (error = preparefileread(p, &fp, fd, 1)) ) 227 goto out; 228 229 error = dofileread(vfs_context_current(), fp, uap->buf, uap->nbyte, 230 uap->offset, FOF_OFFSET, retval); 231 232 donefileread(p, fp, fd); 233 234 if (!error) 235 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pread) | DBG_FUNC_NONE), 236 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0); 237 238out: 239 return (error); 240} 241 242/* 243 * Code common for read and pread 244 */ 245 246void 247donefileread(struct proc *p, struct fileproc *fp, int fd) 248{ 249 proc_fdlock_spin(p); 250 251 fp->f_flags &= ~FP_INCHRREAD; 252 253 fp_drop(p, fd, fp, 1); 254 proc_fdunlock(p); 255} 256 257/* 258 * Returns: 0 Success 259 * EBADF 260 * ESPIPE 261 * ENXIO 262 * fp_lookup:EBADF 263 * fo_read:??? 264 */ 265int 266preparefileread(struct proc *p, struct fileproc **fp_ret, int fd, int check_for_pread) 267{ 268 vnode_t vp; 269 int error; 270 struct fileproc *fp; 271 272 proc_fdlock_spin(p); 273 274 error = fp_lookup(p, fd, &fp, 1); 275 276 if (error) { 277 proc_fdunlock(p); 278 return (error); 279 } 280 if ((fp->f_flag & FREAD) == 0) { 281 error = EBADF; 282 goto out; 283 } 284 if (check_for_pread && (fp->f_type != DTYPE_VNODE)) { 285 error = ESPIPE; 286 goto out; 287 } 288 if (fp->f_type == DTYPE_VNODE) { 289 vp = (struct vnode *)fp->f_fglob->fg_data; 290 291 if (check_for_pread && (vnode_isfifo(vp))) { 292 error = ESPIPE; 293 goto out; 294 } 295 if (check_for_pread && (vp->v_flag & VISTTY)) { 296 error = ENXIO; 297 goto out; 298 } 299 if (vp->v_type == VCHR) 300 fp->f_flags |= FP_INCHRREAD; 301 } 302 303 *fp_ret = fp; 304 305 proc_fdunlock(p); 306 return (0); 307 308out: 309 fp_drop(p, fd, fp, 1); 310 proc_fdunlock(p); 311 return (error); 312} 313 314 315/* 316 * Returns: 0 Success 317 * EINVAL 318 * fo_read:??? 319 */ 320__private_extern__ int 321dofileread(vfs_context_t ctx, struct fileproc *fp, 322 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags, 323 user_ssize_t *retval) 324{ 325 uio_t auio; 326 user_ssize_t bytecnt; 327 long error = 0; 328 char uio_buf[ UIO_SIZEOF(1) ]; 329 330 // LP64todo - do we want to raise this? 331 if (nbyte > INT_MAX) 332 return (EINVAL); 333 334 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { 335 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_READ, 336 &uio_buf[0], sizeof(uio_buf)); 337 } else { 338 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_READ, 339 &uio_buf[0], sizeof(uio_buf)); 340 } 341 uio_addiov(auio, bufp, nbyte); 342 343 bytecnt = nbyte; 344 345 if ((error = fo_read(fp, auio, flags, ctx))) { 346 if (uio_resid(auio) != bytecnt && (error == ERESTART || 347 error == EINTR || error == EWOULDBLOCK)) 348 error = 0; 349 } 350 bytecnt -= uio_resid(auio); 351 352 *retval = bytecnt; 353 354 return (error); 355} 356 357/* 358 * Scatter read system call. 359 * 360 * Returns: 0 Success 361 * EINVAL 362 * ENOMEM 363 * copyin:EFAULT 364 * rd_uio:??? 365 */ 366int 367readv(struct proc *p, struct readv_args *uap, user_ssize_t *retval) 368{ 369 __pthread_testcancel(1); 370 return(readv_nocancel(p, (struct readv_nocancel_args *)uap, retval)); 371} 372 373int 374readv_nocancel(struct proc *p, struct readv_nocancel_args *uap, user_ssize_t *retval) 375{ 376 uio_t auio = NULL; 377 int error; 378 int size_of_iovec; 379 struct user_iovec *iovp; 380 381 /* Verify range bedfore calling uio_create() */ 382 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) 383 return (EINVAL); 384 385 /* allocate a uio large enough to hold the number of iovecs passed */ 386 auio = uio_create(uap->iovcnt, 0, 387 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 388 UIO_READ); 389 390 /* get location of iovecs within the uio. then copyin the iovecs from 391 * user space. 392 */ 393 iovp = uio_iovsaddr(auio); 394 if (iovp == NULL) { 395 error = ENOMEM; 396 goto ExitThisRoutine; 397 } 398 size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec)); 399 error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec)); 400 if (error) { 401 goto ExitThisRoutine; 402 } 403 404 /* finalize uio_t for use and do the IO 405 */ 406 uio_calculateresid(auio); 407 error = rd_uio(p, uap->fd, auio, retval); 408 409ExitThisRoutine: 410 if (auio != NULL) { 411 uio_free(auio); 412 } 413 return (error); 414} 415 416/* 417 * Write system call 418 * 419 * Returns: 0 Success 420 * EBADF 421 * fp_lookup:EBADF 422 * dofilewrite:??? 423 */ 424int 425write(struct proc *p, struct write_args *uap, user_ssize_t *retval) 426{ 427 __pthread_testcancel(1); 428 return(write_nocancel(p, (struct write_nocancel_args *)uap, retval)); 429 430} 431 432int 433write_nocancel(struct proc *p, struct write_nocancel_args *uap, user_ssize_t *retval) 434{ 435 struct fileproc *fp; 436 int error; 437 int fd = uap->fd; 438 439 error = fp_lookup(p,fd,&fp,0); 440 if (error) 441 return(error); 442 if ((fp->f_flag & FWRITE) == 0) { 443 error = EBADF; 444 } else { 445 struct vfs_context context = *(vfs_context_current()); 446 context.vc_ucred = fp->f_fglob->fg_cred; 447 448 error = dofilewrite(&context, fp, uap->cbuf, uap->nbyte, 449 (off_t)-1, 0, retval); 450 } 451 if (error == 0) 452 fp_drop_written(p, fd, fp); 453 else 454 fp_drop(p, fd, fp, 0); 455 return(error); 456} 457 458/* 459 * pwrite system call 460 * 461 * Returns: 0 Success 462 * EBADF 463 * ESPIPE 464 * ENXIO 465 * EINVAL 466 * fp_lookup:EBADF 467 * dofilewrite:??? 468 */ 469int 470pwrite(struct proc *p, struct pwrite_args *uap, user_ssize_t *retval) 471{ 472 __pthread_testcancel(1); 473 return(pwrite_nocancel(p, (struct pwrite_nocancel_args *)uap, retval)); 474} 475 476int 477pwrite_nocancel(struct proc *p, struct pwrite_nocancel_args *uap, user_ssize_t *retval) 478{ 479 struct fileproc *fp; 480 int error; 481 int fd = uap->fd; 482 vnode_t vp = (vnode_t)0; 483 484 error = fp_lookup(p,fd,&fp,0); 485 if (error) 486 return(error); 487 488 if ((fp->f_flag & FWRITE) == 0) { 489 error = EBADF; 490 } else { 491 struct vfs_context context = *vfs_context_current(); 492 context.vc_ucred = fp->f_fglob->fg_cred; 493 494 if (fp->f_type != DTYPE_VNODE) { 495 error = ESPIPE; 496 goto errout; 497 } 498 vp = (vnode_t)fp->f_fglob->fg_data; 499 if (vnode_isfifo(vp)) { 500 error = ESPIPE; 501 goto errout; 502 } 503 if ((vp->v_flag & VISTTY)) { 504 error = ENXIO; 505 goto errout; 506 } 507 if (uap->offset == (off_t)-1) { 508 error = EINVAL; 509 goto errout; 510 } 511 512 error = dofilewrite(&context, fp, uap->buf, uap->nbyte, 513 uap->offset, FOF_OFFSET, retval); 514 } 515errout: 516 if (error == 0) 517 fp_drop_written(p, fd, fp); 518 else 519 fp_drop(p, fd, fp, 0); 520 521 if (!error) 522 KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_pwrite) | DBG_FUNC_NONE), 523 uap->fd, uap->nbyte, (unsigned int)((uap->offset >> 32)), (unsigned int)(uap->offset), 0); 524 525 return(error); 526} 527 528/* 529 * Returns: 0 Success 530 * EINVAL 531 * <fo_write>:EPIPE 532 * <fo_write>:??? [indirect through struct fileops] 533 */ 534__private_extern__ int 535dofilewrite(vfs_context_t ctx, struct fileproc *fp, 536 user_addr_t bufp, user_size_t nbyte, off_t offset, int flags, 537 user_ssize_t *retval) 538{ 539 uio_t auio; 540 long error = 0; 541 user_ssize_t bytecnt; 542 char uio_buf[ UIO_SIZEOF(1) ]; 543 544 // LP64todo - do we want to raise this? 545 if (nbyte > INT_MAX) 546 return (EINVAL); 547 548 if (IS_64BIT_PROCESS(vfs_context_proc(ctx))) { 549 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE64, UIO_WRITE, 550 &uio_buf[0], sizeof(uio_buf)); 551 } else { 552 auio = uio_createwithbuffer(1, offset, UIO_USERSPACE32, UIO_WRITE, 553 &uio_buf[0], sizeof(uio_buf)); 554 } 555 uio_addiov(auio, bufp, nbyte); 556 557 bytecnt = nbyte; 558 if ((error = fo_write(fp, auio, flags, ctx))) { 559 if (uio_resid(auio) != bytecnt && (error == ERESTART || 560 error == EINTR || error == EWOULDBLOCK)) 561 error = 0; 562 /* The socket layer handles SIGPIPE */ 563 if (error == EPIPE && fp->f_type != DTYPE_SOCKET) { 564 /* XXX Raise the signal on the thread? */ 565 psignal(vfs_context_proc(ctx), SIGPIPE); 566 } 567 } 568 bytecnt -= uio_resid(auio); 569 *retval = bytecnt; 570 571 return (error); 572} 573 574/* 575 * Gather write system call 576 */ 577int 578writev(struct proc *p, struct writev_args *uap, user_ssize_t *retval) 579{ 580 __pthread_testcancel(1); 581 return(writev_nocancel(p, (struct writev_nocancel_args *)uap, retval)); 582} 583 584int 585writev_nocancel(struct proc *p, struct writev_nocancel_args *uap, user_ssize_t *retval) 586{ 587 uio_t auio = NULL; 588 int error; 589 int size_of_iovec; 590 struct user_iovec *iovp; 591 592 /* Verify range bedfore calling uio_create() */ 593 if (uap->iovcnt <= 0 || uap->iovcnt > UIO_MAXIOV) 594 return (EINVAL); 595 596 /* allocate a uio large enough to hold the number of iovecs passed */ 597 auio = uio_create(uap->iovcnt, 0, 598 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 599 UIO_WRITE); 600 601 /* get location of iovecs within the uio. then copyin the iovecs from 602 * user space. 603 */ 604 iovp = uio_iovsaddr(auio); 605 if (iovp == NULL) { 606 error = ENOMEM; 607 goto ExitThisRoutine; 608 } 609 size_of_iovec = (IS_64BIT_PROCESS(p) ? sizeof(struct user_iovec) : sizeof(struct iovec)); 610 error = copyin(uap->iovp, (caddr_t)iovp, (uap->iovcnt * size_of_iovec)); 611 if (error) { 612 goto ExitThisRoutine; 613 } 614 615 /* finalize uio_t for use and do the IO 616 */ 617 uio_calculateresid(auio); 618 error = wr_uio(p, uap->fd, auio, retval); 619 620ExitThisRoutine: 621 if (auio != NULL) { 622 uio_free(auio); 623 } 624 return (error); 625} 626 627 628int 629wr_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval) 630{ 631 struct fileproc *fp; 632 int error; 633 user_ssize_t count; 634 struct vfs_context context = *vfs_context_current(); 635 636 error = fp_lookup(p,fdes,&fp,0); 637 if (error) 638 return(error); 639 640 if ((fp->f_flag & FWRITE) == 0) { 641 error = EBADF; 642 goto out; 643 } 644 count = uio_resid(uio); 645 646 context.vc_ucred = fp->f_cred; 647 error = fo_write(fp, uio, 0, &context); 648 if (error) { 649 if (uio_resid(uio) != count && (error == ERESTART || 650 error == EINTR || error == EWOULDBLOCK)) 651 error = 0; 652 /* The socket layer handles SIGPIPE */ 653 if (error == EPIPE && fp->f_type != DTYPE_SOCKET) 654 psignal(p, SIGPIPE); 655 } 656 *retval = count - uio_resid(uio); 657 658out: 659 if ( (error == 0) ) 660 fp_drop_written(p, fdes, fp); 661 else 662 fp_drop(p, fdes, fp, 0); 663 return(error); 664} 665 666 667int 668rd_uio(struct proc *p, int fdes, uio_t uio, user_ssize_t *retval) 669{ 670 struct fileproc *fp; 671 int error; 672 user_ssize_t count; 673 struct vfs_context context = *vfs_context_current(); 674 675 if ( (error = preparefileread(p, &fp, fdes, 0)) ) 676 return (error); 677 678 count = uio_resid(uio); 679 680 context.vc_ucred = fp->f_cred; 681 682 error = fo_read(fp, uio, 0, &context); 683 684 if (error) { 685 if (uio_resid(uio) != count && (error == ERESTART || 686 error == EINTR || error == EWOULDBLOCK)) 687 error = 0; 688 } 689 *retval = count - uio_resid(uio); 690 691 donefileread(p, fp, fdes); 692 693 return (error); 694} 695 696/* 697 * Ioctl system call 698 * 699 * Returns: 0 Success 700 * EBADF 701 * ENOTTY 702 * ENOMEM 703 * ESRCH 704 * copyin:EFAULT 705 * copyoutEFAULT 706 * fp_lookup:EBADF Bad file descriptor 707 * fo_ioctl:??? 708 */ 709int 710ioctl(struct proc *p, struct ioctl_args *uap, __unused register_t *retval) 711{ 712 struct fileproc *fp; 713 u_long com; 714 int error = 0; 715 u_int size; 716 caddr_t datap, memp; 717 boolean_t is64bit; 718 int tmp; 719#define STK_PARAMS 128 720 char stkbuf[STK_PARAMS]; 721 int fd = uap->fd; 722 struct vfs_context context = *vfs_context_current(); 723 724 AUDIT_ARG(fd, uap->fd); 725 AUDIT_ARG(cmd, CAST_DOWN(int, uap->com)); /* LP64todo: uap->com is a user-land long */ 726 AUDIT_ARG(addr, uap->data); 727 728 is64bit = proc_is64bit(p); 729 730 proc_fdlock(p); 731 error = fp_lookup(p,fd,&fp,1); 732 if (error) { 733 proc_fdunlock(p); 734 return(error); 735 } 736 737 AUDIT_ARG(file, p, fp); 738 739 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 740 error = EBADF; 741 goto out; 742 } 743 744 context.vc_ucred = fp->f_fglob->fg_cred; 745 746#if CONFIG_MACF 747 error = mac_file_check_ioctl(context.vc_ucred, fp->f_fglob, uap->com); 748 if (error) 749 goto out; 750#endif 751 752#if NETAT 753 /* 754 * ### LD 6/11/97 Hack Alert: this is to get AppleTalk to work 755 * while implementing an ATioctl system call 756 */ 757 { 758 if (appletalk_inited && ((uap->com & 0x0000FFFF) == 0xff99)) { 759 u_long fixed_command; 760 761#ifdef APPLETALK_DEBUG 762 kprintf("ioctl: special AppleTalk \n"); 763#endif 764 datap = &stkbuf[0]; 765 *(user_addr_t *)datap = uap->data; 766 fixed_command = _IOW(0, 0xff99, uap->data); 767 error = fo_ioctl(fp, fixed_command, datap, &context); 768 goto out; 769 } 770 } 771 772#endif /* NETAT */ 773 774 775 switch (com = uap->com) { 776 case FIONCLEX: 777 *fdflags(p, uap->fd) &= ~UF_EXCLOSE; 778 error =0; 779 goto out; 780 case FIOCLEX: 781 *fdflags(p, uap->fd) |= UF_EXCLOSE; 782 error =0; 783 goto out; 784 } 785 786 /* 787 * Interpret high order word to find amount of data to be 788 * copied to/from the user's address space. 789 */ 790 size = IOCPARM_LEN(com); 791 if (size > IOCPARM_MAX) { 792 error = ENOTTY; 793 goto out; 794 } 795 memp = NULL; 796 if (size > sizeof (stkbuf)) { 797 proc_fdunlock(p); 798 if ((memp = (caddr_t)kalloc(size)) == 0) { 799 proc_fdlock(p); 800 error = ENOMEM; 801 goto out; 802 } 803 proc_fdlock(p); 804 datap = memp; 805 } else 806 datap = &stkbuf[0]; 807 if (com&IOC_IN) { 808 if (size) { 809 proc_fdunlock(p); 810 error = copyin(uap->data, datap, size); 811 if (error) { 812 if (memp) 813 kfree(memp, size); 814 proc_fdlock(p); 815 goto out; 816 } 817 proc_fdlock(p); 818 } else { 819 /* XXX - IOC_IN and no size? we should proably return an error here!! */ 820 if (is64bit) { 821 *(user_addr_t *)datap = uap->data; 822 } 823 else { 824 *(uint32_t *)datap = (uint32_t)uap->data; 825 } 826 } 827 } else if ((com&IOC_OUT) && size) 828 /* 829 * Zero the buffer so the user always 830 * gets back something deterministic. 831 */ 832 bzero(datap, size); 833 else if (com&IOC_VOID) { 834 /* XXX - this is odd since IOC_VOID means no parameters */ 835 if (is64bit) { 836 *(user_addr_t *)datap = uap->data; 837 } 838 else { 839 *(uint32_t *)datap = (uint32_t)uap->data; 840 } 841 } 842 843 switch (com) { 844 845 case FIONBIO: 846 if ( (tmp = *(int *)datap) ) 847 fp->f_flag |= FNONBLOCK; 848 else 849 fp->f_flag &= ~FNONBLOCK; 850 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, &context); 851 break; 852 853 case FIOASYNC: 854 if ( (tmp = *(int *)datap) ) 855 fp->f_flag |= FASYNC; 856 else 857 fp->f_flag &= ~FASYNC; 858 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, &context); 859 break; 860 861 case FIOSETOWN: 862 tmp = *(int *)datap; 863 if (fp->f_type == DTYPE_SOCKET) { 864 ((struct socket *)fp->f_data)->so_pgid = tmp; 865 error = 0; 866 break; 867 } 868 if (fp->f_type == DTYPE_PIPE) { 869 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context); 870 break; 871 } 872 if (tmp <= 0) { 873 tmp = -tmp; 874 } else { 875 struct proc *p1 = proc_find(tmp); 876 if (p1 == 0) { 877 error = ESRCH; 878 break; 879 } 880 tmp = p1->p_pgrpid; 881 proc_rele(p1); 882 } 883 error = fo_ioctl(fp, (int)TIOCSPGRP, (caddr_t)&tmp, &context); 884 break; 885 886 case FIOGETOWN: 887 if (fp->f_type == DTYPE_SOCKET) { 888 error = 0; 889 *(int *)datap = ((struct socket *)fp->f_data)->so_pgid; 890 break; 891 } 892 error = fo_ioctl(fp, TIOCGPGRP, datap, &context); 893 *(int *)datap = -*(int *)datap; 894 break; 895 896 default: 897 error = fo_ioctl(fp, com, datap, &context); 898 /* 899 * Copy any data to user, size was 900 * already set and checked above. 901 */ 902 if (error == 0 && (com&IOC_OUT) && size) 903 error = copyout(datap, uap->data, (u_int)size); 904 break; 905 } 906 proc_fdunlock(p); 907 if (memp) 908 kfree(memp, size); 909 proc_fdlock(p); 910out: 911 fp_drop(p, fd, fp, 1); 912 proc_fdunlock(p); 913 return(error); 914} 915 916int selwait, nselcoll; 917#define SEL_FIRSTPASS 1 918#define SEL_SECONDPASS 2 919extern int selcontinue(int error); 920extern int selprocess(int error, int sel_pass); 921static int selscan(struct proc *p, struct _select * sel, 922 int nfd, register_t *retval, int sel_pass, wait_queue_sub_t wqsub); 923static int selcount(struct proc *p, u_int32_t *ibits, u_int32_t *obits, 924 int nfd, int * count, int *kfcount); 925static int seldrop(struct proc *p, u_int32_t *ibits, int nfd); 926extern uint64_t tvtoabstime(struct timeval *tvp); 927 928/* 929 * Select system call. 930 * 931 * Returns: 0 Success 932 * EINVAL Invalid argument 933 * EAGAIN Nonconformant error if allocation fails 934 * selprocess:??? 935 */ 936int 937select(struct proc *p, struct select_args *uap, register_t *retval) 938{ 939 __pthread_testcancel(1); 940 return(select_nocancel(p, (struct select_nocancel_args *)uap, retval)); 941} 942 943int 944select_nocancel(struct proc *p, struct select_nocancel_args *uap, register_t *retval) 945{ 946 int error = 0; 947 u_int ni, nw, size; 948 thread_t th_act; 949 struct uthread *uth; 950 struct _select *sel; 951 int needzerofill = 1; 952 int count = 0; 953 int kfcount = 0; 954 955 th_act = current_thread(); 956 uth = get_bsdthread_info(th_act); 957 sel = &uth->uu_select; 958 retval = (int *)get_bsduthreadrval(th_act); 959 *retval = 0; 960 961 if (uap->nd < 0) { 962 return (EINVAL); 963 } 964 965 /* select on thread of process that already called proc_exit() */ 966 if (p->p_fd == NULL) { 967 return (EBADF); 968 } 969 970 if (uap->nd > p->p_fd->fd_nfiles) 971 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 972 973 nw = howmany(uap->nd, NFDBITS); 974 ni = nw * sizeof(fd_mask); 975 976 /* 977 * if the previously allocated space for the bits is smaller than 978 * what is requested or no space has yet been allocated for this 979 * thread, allocate enough space now. 980 * 981 * Note: If this process fails, select() will return EAGAIN; this 982 * is the same thing pool() returns in a no-memory situation, but 983 * it is not a POSIX compliant error code for select(). 984 */ 985 if (sel->nbytes < (3 * ni)) { 986 int nbytes = 3 * ni; 987 988 /* Free previous allocation, if any */ 989 if (sel->ibits != NULL) 990 FREE(sel->ibits, M_TEMP); 991 if (sel->obits != NULL) { 992 FREE(sel->obits, M_TEMP); 993 /* NULL out; subsequent ibits allocation may fail */ 994 sel->obits = NULL; 995 } 996 997 MALLOC(sel->ibits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO); 998 if (sel->ibits == NULL) 999 return (EAGAIN); 1000 MALLOC(sel->obits, u_int32_t *, nbytes, M_TEMP, M_WAITOK | M_ZERO); 1001 if (sel->obits == NULL) { 1002 FREE(sel->ibits, M_TEMP); 1003 sel->ibits = NULL; 1004 return (EAGAIN); 1005 } 1006 sel->nbytes = nbytes; 1007 needzerofill = 0; 1008 } 1009 1010 if (needzerofill) { 1011 bzero((caddr_t)sel->ibits, sel->nbytes); 1012 bzero((caddr_t)sel->obits, sel->nbytes); 1013 } 1014 1015 /* 1016 * get the bits from the user address space 1017 */ 1018#define getbits(name, x) \ 1019 do { \ 1020 if (uap->name && (error = copyin(uap->name, \ 1021 (caddr_t)&sel->ibits[(x) * nw], ni))) \ 1022 goto continuation; \ 1023 } while (0) 1024 1025 getbits(in, 0); 1026 getbits(ou, 1); 1027 getbits(ex, 2); 1028#undef getbits 1029 1030 if (uap->tv) { 1031 struct timeval atv; 1032 if (IS_64BIT_PROCESS(p)) { 1033 struct user_timeval atv64; 1034 error = copyin(uap->tv, (caddr_t)&atv64, sizeof(atv64)); 1035 /* Loses resolution - assume timeout < 68 years */ 1036 atv.tv_sec = atv64.tv_sec; 1037 atv.tv_usec = atv64.tv_usec; 1038 } else { 1039 error = copyin(uap->tv, (caddr_t)&atv, sizeof(atv)); 1040 } 1041 if (error) 1042 goto continuation; 1043 if (itimerfix(&atv)) { 1044 error = EINVAL; 1045 goto continuation; 1046 } 1047 1048 clock_absolutetime_interval_to_deadline( 1049 tvtoabstime(&atv), &sel->abstime); 1050 } 1051 else 1052 sel->abstime = 0; 1053 1054 sel->kfcount = 0; 1055 if ( (error = selcount(p, sel->ibits, sel->obits, uap->nd, &count, &kfcount)) ) { 1056 goto continuation; 1057 } 1058 sel->count = count; 1059 sel->kfcount = kfcount; 1060 size = SIZEOF_WAITQUEUE_SET + (count * SIZEOF_WAITQUEUE_LINK); 1061 if (uth->uu_allocsize) { 1062 if (uth->uu_wqset == 0) 1063 panic("select: wql memory smashed"); 1064 /* needed for the select now */ 1065 if (size > uth->uu_allocsize) { 1066 kfree(uth->uu_wqset, uth->uu_allocsize); 1067 uth->uu_allocsize = size; 1068 uth->uu_wqset = (wait_queue_set_t)kalloc(size); 1069 if (uth->uu_wqset == (wait_queue_set_t)NULL) 1070 panic("failed to allocate memory for waitqueue\n"); 1071 } 1072 } else { 1073 sel->count = count; 1074 uth->uu_allocsize = size; 1075 uth->uu_wqset = (wait_queue_set_t)kalloc(uth->uu_allocsize); 1076 if (uth->uu_wqset == (wait_queue_set_t)NULL) 1077 panic("failed to allocate memory for waitqueue\n"); 1078 } 1079 bzero(uth->uu_wqset, size); 1080 sel->wql = (char *)uth->uu_wqset + SIZEOF_WAITQUEUE_SET; 1081 wait_queue_set_init(uth->uu_wqset, (SYNC_POLICY_FIFO | SYNC_POLICY_PREPOST)); 1082 1083continuation: 1084 return selprocess(error, SEL_FIRSTPASS); 1085} 1086 1087int 1088selcontinue(int error) 1089{ 1090 return selprocess(error, SEL_SECONDPASS); 1091} 1092 1093int 1094selprocess(int error, int sel_pass) 1095{ 1096 int ncoll; 1097 u_int ni, nw; 1098 thread_t th_act; 1099 struct uthread *uth; 1100 struct proc *p; 1101 struct select_args *uap; 1102 int *retval; 1103 struct _select *sel; 1104 int unwind = 1; 1105 int prepost = 0; 1106 int somewakeup = 0; 1107 int doretry = 0; 1108 wait_result_t wait_result; 1109 1110 p = current_proc(); 1111 th_act = current_thread(); 1112 uap = (struct select_args *)get_bsduthreadarg(th_act); 1113 retval = (int *)get_bsduthreadrval(th_act); 1114 uth = get_bsdthread_info(th_act); 1115 sel = &uth->uu_select; 1116 1117 /* if it is first pass wait queue is not setup yet */ 1118 if ((error != 0) && (sel_pass == SEL_FIRSTPASS)) 1119 unwind = 0; 1120 if (sel->count == 0) 1121 unwind = 0; 1122retry: 1123 if (error != 0) { 1124 goto done; 1125 } 1126 1127 ncoll = nselcoll; 1128 OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag); 1129 /* skip scans if the select is just for timeouts */ 1130 if (sel->count) { 1131 if (sel_pass == SEL_FIRSTPASS) 1132 wait_queue_sub_clearrefs(uth->uu_wqset); 1133 1134 error = selscan(p, sel, uap->nd, retval, sel_pass, (wait_queue_sub_t)uth->uu_wqset); 1135 if (error || *retval) { 1136 goto done; 1137 } 1138 if (prepost) { 1139 /* if the select of log, then we canwakeup and discover some one 1140 * else already read the data; go toselct again if time permits 1141 */ 1142 prepost = 0; 1143 doretry = 1; 1144 } 1145 if (somewakeup) { 1146 somewakeup = 0; 1147 doretry = 1; 1148 } 1149 } 1150 1151 if (uap->tv) { 1152 uint64_t now; 1153 1154 clock_get_uptime(&now); 1155 if (now >= sel->abstime) 1156 goto done; 1157 } 1158 1159 if (doretry) { 1160 /* cleanup obits and try again */ 1161 doretry = 0; 1162 sel_pass = SEL_FIRSTPASS; 1163 goto retry; 1164 } 1165 1166 /* 1167 * To effect a poll, the timeout argument should be 1168 * non-nil, pointing to a zero-valued timeval structure. 1169 */ 1170 if (uap->tv && sel->abstime == 0) { 1171 goto done; 1172 } 1173 1174 /* No spurious wakeups due to colls,no need to check for them */ 1175 if ((sel_pass == SEL_SECONDPASS) || ((p->p_flag & P_SELECT) == 0)) { 1176 sel_pass = SEL_FIRSTPASS; 1177 goto retry; 1178 } 1179 1180 OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag); 1181 1182 /* if the select is just for timeout skip check */ 1183 if (sel->count &&(sel_pass == SEL_SECONDPASS)) 1184 panic("selprocess: 2nd pass assertwaiting"); 1185 1186 /* Wait Queue Subordinate has waitqueue as first element */ 1187 wait_result = wait_queue_assert_wait((wait_queue_t)uth->uu_wqset, 1188 &selwait, THREAD_ABORTSAFE, sel->abstime); 1189 if (wait_result != THREAD_AWAKENED) { 1190 /* there are no preposted events */ 1191 error = tsleep1(NULL, PSOCK | PCATCH, 1192 "select", 0, selcontinue); 1193 } else { 1194 prepost = 1; 1195 error = 0; 1196 } 1197 1198 sel_pass = SEL_SECONDPASS; 1199 if (error == 0) { 1200 if (!prepost) 1201 somewakeup =1; 1202 goto retry; 1203 } 1204done: 1205 if (unwind) { 1206 wait_subqueue_unlink_all(uth->uu_wqset); 1207 seldrop(p, sel->ibits, uap->nd); 1208 } 1209 OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag); 1210 /* select is not restarted after signals... */ 1211 if (error == ERESTART) 1212 error = EINTR; 1213 if (error == EWOULDBLOCK) 1214 error = 0; 1215 nw = howmany(uap->nd, NFDBITS); 1216 ni = nw * sizeof(fd_mask); 1217 1218#define putbits(name, x) \ 1219 do { \ 1220 if (uap->name && (error2 = \ 1221 copyout((caddr_t)&sel->obits[(x) * nw], uap->name, ni))) \ 1222 error = error2; \ 1223 } while (0) 1224 1225 if (error == 0) { 1226 int error2; 1227 1228 putbits(in, 0); 1229 putbits(ou, 1); 1230 putbits(ex, 2); 1231#undef putbits 1232 } 1233 return(error); 1234} 1235 1236static int 1237selscan(struct proc *p, struct _select *sel, int nfd, register_t *retval, 1238 int sel_pass, wait_queue_sub_t wqsub) 1239{ 1240 struct filedesc *fdp = p->p_fd; 1241 int msk, i, j, fd; 1242 u_int32_t bits; 1243 struct fileproc *fp; 1244 int n = 0; 1245 int nc = 0; 1246 static int flag[3] = { FREAD, FWRITE, 0 }; 1247 u_int32_t *iptr, *optr; 1248 u_int nw; 1249 u_int32_t *ibits, *obits; 1250 char * wql; 1251 char * wql_ptr; 1252 int count, kfcount; 1253 boolean_t funnel_state; 1254 vnode_t vp; 1255 struct vfs_context context = *vfs_context_current(); 1256 1257 /* 1258 * Problems when reboot; due to MacOSX signal probs 1259 * in Beaker1C ; verify that the p->p_fd is valid 1260 */ 1261 if (fdp == NULL) { 1262 *retval=0; 1263 return(EIO); 1264 } 1265 ibits = sel->ibits; 1266 obits = sel->obits; 1267 wql = sel->wql; 1268 1269 nw = howmany(nfd, NFDBITS); 1270 1271 count = sel->count; 1272 kfcount = sel->kfcount; 1273 1274 if (kfcount > count) 1275 panic("selscan: count < kfcount"); 1276 1277 if (kfcount != 0) { 1278 funnel_state = thread_funnel_set(kernel_flock, TRUE); 1279 1280 proc_fdlock(p); 1281 for (msk = 0; msk < 3; msk++) { 1282 iptr = (u_int32_t *)&ibits[msk * nw]; 1283 optr = (u_int32_t *)&obits[msk * nw]; 1284 1285 for (i = 0; i < nfd; i += NFDBITS) { 1286 bits = iptr[i/NFDBITS]; 1287 1288 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 1289 bits &= ~(1 << j); 1290 fp = fdp->fd_ofiles[fd]; 1291 1292 if (fp == NULL || 1293 (fdp->fd_ofileflags[fd] & UF_RESERVED)) { 1294 proc_fdunlock(p); 1295 thread_funnel_set(kernel_flock, funnel_state); 1296 return(EBADF); 1297 } 1298 if (sel_pass == SEL_SECONDPASS) { 1299 wql_ptr = (char *)0; 1300 fp->f_flags &= ~FP_INSELECT; 1301 fp->f_waddr = (void *)0; 1302 } else { 1303 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK); 1304 fp->f_flags |= FP_INSELECT; 1305 fp->f_waddr = (void *)wqsub; 1306 } 1307 1308 context.vc_ucred = fp->f_cred; 1309 1310 if (fp->f_ops && (fp->f_type == DTYPE_VNODE) 1311 && ((vp = (struct vnode *)fp->f_data) != NULLVP) 1312 && (vp->v_type == VCHR) 1313 && fo_select(fp, flag[msk], wql_ptr, &context)) { 1314 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); 1315 n++; 1316 } 1317 nc++; 1318 } 1319 } 1320 } 1321 proc_fdunlock(p); 1322 thread_funnel_set(kernel_flock, funnel_state); 1323 } 1324 1325 nc = 0; 1326 if (kfcount != count) { 1327 proc_fdlock(p); 1328 for (msk = 0; msk < 3; msk++) { 1329 iptr = (u_int32_t *)&ibits[msk * nw]; 1330 optr = (u_int32_t *)&obits[msk * nw]; 1331 1332 for (i = 0; i < nfd; i += NFDBITS) { 1333 bits = iptr[i/NFDBITS]; 1334 1335 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 1336 bits &= ~(1 << j); 1337 fp = fdp->fd_ofiles[fd]; 1338 1339 if (fp == NULL || 1340 (fdp->fd_ofileflags[fd] & UF_RESERVED)) { 1341 proc_fdunlock(p); 1342 return(EBADF); 1343 } 1344 if (sel_pass == SEL_SECONDPASS) { 1345 wql_ptr = (char *)0; 1346 fp->f_flags &= ~FP_INSELECT; 1347 fp->f_waddr = (void *)0; 1348 } else { 1349 wql_ptr = (wql + nc * SIZEOF_WAITQUEUE_LINK); 1350 fp->f_flags |= FP_INSELECT; 1351 fp->f_waddr = (void *)wqsub; 1352 } 1353 1354 context.vc_ucred = fp->f_cred; 1355 1356 if ((fp->f_ops && 1357 ((fp->f_type != DTYPE_VNODE) 1358 || (((vp = (struct vnode *)fp->f_data) != NULLVP) 1359 && (vp->v_type != VCHR)) 1360 ) 1361 && fo_select(fp, flag[msk], wql_ptr, &context))) { 1362 optr[fd/NFDBITS] |= (1 << (fd % NFDBITS)); 1363 n++; 1364 } 1365 nc++; 1366 } 1367 } 1368 } 1369 proc_fdunlock(p); 1370 } 1371 *retval = n; 1372 return (0); 1373} 1374 1375int poll_callback(struct kqueue *, struct kevent *, void *); 1376 1377struct poll_continue_args { 1378 user_addr_t pca_fds; 1379 u_int pca_nfds; 1380 u_int pca_rfds; 1381}; 1382 1383int 1384poll(struct proc *p, struct poll_args *uap, register_t *retval) 1385{ 1386 __pthread_testcancel(1); 1387 return(poll_nocancel(p, (struct poll_nocancel_args *)uap, retval)); 1388} 1389 1390 1391int 1392poll_nocancel(struct proc *p, struct poll_nocancel_args *uap, register_t *retval) 1393{ 1394 struct poll_continue_args *cont; 1395 struct pollfd *fds; 1396 struct kqueue *kq; 1397 struct timeval atv; 1398 int ncoll, error = 0; 1399 u_int nfds = uap->nfds; 1400 u_int rfds = 0; 1401 u_int i; 1402 size_t ni; 1403 1404 /* 1405 * This is kinda bogus. We have fd limits, but that is not 1406 * really related to the size of the pollfd array. Make sure 1407 * we let the process use at least FD_SETSIZE entries and at 1408 * least enough for the current limits. We want to be reasonably 1409 * safe, but not overly restrictive. 1410 */ 1411 if (nfds > OPEN_MAX || 1412 (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && (proc_suser(p) || nfds > FD_SETSIZE))) 1413 return (EINVAL); 1414 1415 kq = kqueue_alloc(p); 1416 if (kq == NULL) 1417 return (EAGAIN); 1418 1419 ni = nfds * sizeof(struct pollfd) + sizeof(struct poll_continue_args); 1420 MALLOC(cont, struct poll_continue_args *, ni, M_TEMP, M_WAITOK); 1421 if (NULL == cont) { 1422 error = EAGAIN; 1423 goto out; 1424 } 1425 1426 fds = (struct pollfd *)&cont[1]; 1427 error = copyin(uap->fds, fds, nfds * sizeof(struct pollfd)); 1428 if (error) 1429 goto out; 1430 1431 if (uap->timeout != -1) { 1432 struct timeval rtv; 1433 1434 atv.tv_sec = uap->timeout / 1000; 1435 atv.tv_usec = (uap->timeout % 1000) * 1000; 1436 if (itimerfix(&atv)) { 1437 error = EINVAL; 1438 goto out; 1439 } 1440 getmicrouptime(&rtv); 1441 timevaladd(&atv, &rtv); 1442 } else { 1443 atv.tv_sec = 0; 1444 atv.tv_usec = 0; 1445 } 1446 1447 /* JMM - all this P_SELECT stuff is bogus */ 1448 ncoll = nselcoll; 1449 OSBitOrAtomic(P_SELECT, (UInt32 *)&p->p_flag); 1450 for (i = 0; i < nfds; i++) { 1451 short events = fds[i].events; 1452 struct kevent kev; 1453 int kerror = 0; 1454 1455 /* per spec, ignore fd values below zero */ 1456 if (fds[i].fd < 0) { 1457 fds[i].revents = 0; 1458 continue; 1459 } 1460 1461 /* convert the poll event into a kqueue kevent */ 1462 kev.ident = fds[i].fd; 1463 kev.flags = EV_ADD | EV_ONESHOT | EV_POLL; 1464 kev.fflags = NOTE_LOWAT; 1465 kev.data = 1; /* efficiency be damned: any data should trigger */ 1466 kev.udata = CAST_USER_ADDR_T(&fds[i]); 1467 1468 /* Handle input events */ 1469 if (events & ( POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND | POLLHUP )) { 1470 kev.filter = EVFILT_READ; 1471 if (!(events & ( POLLIN | POLLRDNORM ))) 1472 kev.flags |= EV_OOBAND; 1473 kerror = kevent_register(kq, &kev, p); 1474 } 1475 1476 /* Handle output events */ 1477 if (kerror == 0 && 1478 events & ( POLLOUT | POLLWRNORM | POLLWRBAND )) { 1479 kev.filter = EVFILT_WRITE; 1480 kerror = kevent_register(kq, &kev, p); 1481 } 1482 1483 /* Handle BSD extension vnode events */ 1484 if (kerror == 0 && 1485 events & ( POLLEXTEND | POLLATTRIB | POLLNLINK | POLLWRITE )) { 1486 kev.filter = EVFILT_VNODE; 1487 kev.fflags = 0; 1488 if (events & POLLEXTEND) 1489 kev.fflags |= NOTE_EXTEND; 1490 if (events & POLLATTRIB) 1491 kev.fflags |= NOTE_ATTRIB; 1492 if (events & POLLNLINK) 1493 kev.fflags |= NOTE_LINK; 1494 if (events & POLLWRITE) 1495 kev.fflags |= NOTE_WRITE; 1496 kerror = kevent_register(kq, &kev, p); 1497 } 1498 1499 if (kerror != 0) { 1500 fds[i].revents = POLLNVAL; 1501 rfds++; 1502 } else 1503 fds[i].revents = 0; 1504 } 1505 1506 /* Did we have any trouble registering? */ 1507 if (rfds > 0) 1508 goto done; 1509 1510 /* scan for, and possibly wait for, the kevents to trigger */ 1511 cont->pca_fds = uap->fds; 1512 cont->pca_nfds = nfds; 1513 cont->pca_rfds = rfds; 1514 error = kevent_scan(kq, poll_callback, NULL, cont, &atv, p); 1515 rfds = cont->pca_rfds; 1516 1517 done: 1518 OSBitAndAtomic(~((uint32_t)P_SELECT), (UInt32 *)&p->p_flag); 1519 /* poll is not restarted after signals... */ 1520 if (error == ERESTART) 1521 error = EINTR; 1522 if (error == EWOULDBLOCK) 1523 error = 0; 1524 if (error == 0) { 1525 error = copyout(fds, uap->fds, nfds * sizeof(struct pollfd)); 1526 *retval = rfds; 1527 } 1528 1529 out: 1530 if (NULL != cont) 1531 FREE(cont, M_TEMP); 1532 1533 kqueue_dealloc(kq); 1534 return (error); 1535} 1536 1537int 1538poll_callback(__unused struct kqueue *kq, struct kevent *kevp, void *data) 1539{ 1540 struct poll_continue_args *cont = (struct poll_continue_args *)data; 1541 struct pollfd *fds = CAST_DOWN(struct pollfd *, kevp->udata); 1542 short mask; 1543 1544 /* convert the results back into revents */ 1545 if (kevp->flags & EV_EOF) 1546 fds->revents |= POLLHUP; 1547 if (kevp->flags & EV_ERROR) 1548 fds->revents |= POLLERR; 1549 1550 switch (kevp->filter) { 1551 case EVFILT_READ: 1552 if (fds->revents & POLLHUP) 1553 mask = (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND ); 1554 else { 1555 mask = 0; 1556 if (kevp->data != 0) 1557 mask |= (POLLIN | POLLRDNORM ); 1558 if (kevp->flags & EV_OOBAND) 1559 mask |= ( POLLPRI | POLLRDBAND ); 1560 } 1561 fds->revents |= (fds->events & mask); 1562 break; 1563 1564 case EVFILT_WRITE: 1565 if (!(fds->revents & POLLHUP)) 1566 fds->revents |= (fds->events & ( POLLOUT | POLLWRNORM | POLLWRBAND )); 1567 break; 1568 1569 case EVFILT_VNODE: 1570 if (kevp->fflags & NOTE_EXTEND) 1571 fds->revents |= (fds->events & POLLEXTEND); 1572 if (kevp->fflags & NOTE_ATTRIB) 1573 fds->revents |= (fds->events & POLLATTRIB); 1574 if (kevp->fflags & NOTE_LINK) 1575 fds->revents |= (fds->events & POLLNLINK); 1576 if (kevp->fflags & NOTE_WRITE) 1577 fds->revents |= (fds->events & POLLWRITE); 1578 break; 1579 } 1580 1581 if (fds->revents) 1582 cont->pca_rfds++; 1583 1584 return 0; 1585} 1586 1587int 1588seltrue(__unused dev_t dev, __unused int flag, __unused struct proc *p) 1589{ 1590 1591 return (1); 1592} 1593 1594static int 1595selcount(struct proc *p, u_int32_t *ibits, __unused u_int32_t *obits, 1596 int nfd, int *countp, int * kfcountp) 1597{ 1598 struct filedesc *fdp = p->p_fd; 1599 int msk, i, j, fd; 1600 u_int32_t bits; 1601 struct fileproc *fp; 1602 int n = 0; 1603 u_int32_t *iptr; 1604 u_int nw; 1605 int error=0; 1606 int kfc = 0; 1607 int dropcount; 1608 vnode_t vp; 1609 1610 /* 1611 * Problems when reboot; due to MacOSX signal probs 1612 * in Beaker1C ; verify that the p->p_fd is valid 1613 */ 1614 if (fdp == NULL) { 1615 *countp = 0; 1616 *kfcountp = 0; 1617 return(EIO); 1618 } 1619 nw = howmany(nfd, NFDBITS); 1620 1621 proc_fdlock(p); 1622 for (msk = 0; msk < 3; msk++) { 1623 iptr = (u_int32_t *)&ibits[msk * nw]; 1624 for (i = 0; i < nfd; i += NFDBITS) { 1625 bits = iptr[i/NFDBITS]; 1626 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 1627 bits &= ~(1 << j); 1628 fp = fdp->fd_ofiles[fd]; 1629 if (fp == NULL || 1630 (fdp->fd_ofileflags[fd] & UF_RESERVED)) { 1631 *countp = 0; 1632 *kfcountp = 0; 1633 error = EBADF; 1634 goto bad; 1635 } 1636 fp->f_iocount++; 1637 if ((fp->f_type == DTYPE_VNODE) 1638 && ((vp = (struct vnode *)fp->f_data) != NULLVP) 1639 && (vp->v_type == VCHR) ) 1640 kfc++; 1641 1642 n++; 1643 } 1644 } 1645 } 1646 proc_fdunlock(p); 1647 1648 *countp = n; 1649 *kfcountp = kfc; 1650 return (0); 1651bad: 1652 dropcount = 0; 1653 1654 if (n== 0) 1655 goto out; 1656 /* undo the iocounts */ 1657 for (msk = 0; msk < 3; msk++) { 1658 iptr = (u_int32_t *)&ibits[msk * nw]; 1659 for (i = 0; i < nfd; i += NFDBITS) { 1660 bits = iptr[i/NFDBITS]; 1661 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 1662 bits &= ~(1 << j); 1663 fp = fdp->fd_ofiles[fd]; 1664 if (dropcount >= n) 1665 goto out; 1666 fp->f_iocount--; 1667 1668 if (p->p_fpdrainwait && fp->f_iocount == 0) { 1669 p->p_fpdrainwait = 0; 1670 wakeup(&p->p_fpdrainwait); 1671 } 1672 dropcount++; 1673 } 1674 } 1675 } 1676out: 1677 proc_fdunlock(p); 1678 return(error); 1679} 1680 1681static int 1682seldrop(struct proc *p, u_int32_t *ibits, int nfd) 1683{ 1684 struct filedesc *fdp = p->p_fd; 1685 int msk, i, j, fd; 1686 u_int32_t bits; 1687 struct fileproc *fp; 1688 int n = 0; 1689 u_int32_t *iptr; 1690 u_int nw; 1691 1692 /* 1693 * Problems when reboot; due to MacOSX signal probs 1694 * in Beaker1C ; verify that the p->p_fd is valid 1695 */ 1696 if (fdp == NULL) { 1697 return(EIO); 1698 } 1699 1700 nw = howmany(nfd, NFDBITS); 1701 1702 1703 proc_fdlock(p); 1704 for (msk = 0; msk < 3; msk++) { 1705 iptr = (u_int32_t *)&ibits[msk * nw]; 1706 for (i = 0; i < nfd; i += NFDBITS) { 1707 bits = iptr[i/NFDBITS]; 1708 while ((j = ffs(bits)) && (fd = i + --j) < nfd) { 1709 bits &= ~(1 << j); 1710 fp = fdp->fd_ofiles[fd]; 1711 if (fp == NULL 1712#if 0 1713 /* if you are here then it is being closed */ 1714 || (fdp->fd_ofileflags[fd] & UF_RESERVED) 1715#endif 1716 ) { 1717 proc_fdunlock(p); 1718 return(EBADF); 1719 } 1720 n++; 1721 fp->f_iocount--; 1722 fp->f_flags &= ~FP_INSELECT; 1723 1724 if (p->p_fpdrainwait && fp->f_iocount == 0) { 1725 p->p_fpdrainwait = 0; 1726 wakeup(&p->p_fpdrainwait); 1727 } 1728 } 1729 } 1730 } 1731 proc_fdunlock(p); 1732 return (0); 1733} 1734 1735/* 1736 * Record a select request. 1737 */ 1738void 1739selrecord(__unused struct proc *selector, struct selinfo *sip, void * p_wql) 1740{ 1741 thread_t cur_act = current_thread(); 1742 struct uthread * ut = get_bsdthread_info(cur_act); 1743 1744 /* need to look at collisions */ 1745 1746 if ((p_wql == (void *)0) && ((sip->si_flags & SI_INITED) == 0)) { 1747 return; 1748 } 1749 1750 /*do not record if this is second pass of select */ 1751 if((p_wql == (void *)0)) { 1752 return; 1753 } 1754 1755 if ((sip->si_flags & SI_INITED) == 0) { 1756 wait_queue_init(&sip->si_wait_queue, SYNC_POLICY_FIFO); 1757 sip->si_flags |= SI_INITED; 1758 sip->si_flags &= ~SI_CLEAR; 1759 } 1760 1761 if (sip->si_flags & SI_RECORDED) { 1762 sip->si_flags |= SI_COLL; 1763 } else 1764 sip->si_flags &= ~SI_COLL; 1765 1766 sip->si_flags |= SI_RECORDED; 1767 if (!wait_queue_member(&sip->si_wait_queue, ut->uu_wqset)) 1768 wait_queue_link_noalloc(&sip->si_wait_queue, ut->uu_wqset, 1769 (wait_queue_link_t)p_wql); 1770 1771 return; 1772} 1773 1774void 1775selwakeup(struct selinfo *sip) 1776{ 1777 1778 if ((sip->si_flags & SI_INITED) == 0) { 1779 return; 1780 } 1781 1782 if (sip->si_flags & SI_COLL) { 1783 nselcoll++; 1784 sip->si_flags &= ~SI_COLL; 1785#if 0 1786 /* will not support */ 1787 //wakeup((caddr_t)&selwait); 1788#endif 1789 } 1790 1791 if (sip->si_flags & SI_RECORDED) { 1792 wait_queue_wakeup_all(&sip->si_wait_queue, &selwait, THREAD_AWAKENED); 1793 sip->si_flags &= ~SI_RECORDED; 1794 } 1795 1796} 1797 1798void 1799selthreadclear(struct selinfo *sip) 1800{ 1801 1802 if ((sip->si_flags & SI_INITED) == 0) { 1803 return; 1804 } 1805 if (sip->si_flags & SI_RECORDED) { 1806 selwakeup(sip); 1807 sip->si_flags &= ~(SI_RECORDED | SI_COLL); 1808 } 1809 sip->si_flags |= SI_CLEAR; 1810 wait_queue_unlinkall_nofree(&sip->si_wait_queue); 1811} 1812 1813 1814 1815 1816#define DBG_POST 0x10 1817#define DBG_WATCH 0x11 1818#define DBG_WAIT 0x12 1819#define DBG_MOD 0x13 1820#define DBG_EWAKEUP 0x14 1821#define DBG_ENQUEUE 0x15 1822#define DBG_DEQUEUE 0x16 1823 1824#define DBG_MISC_POST MISCDBG_CODE(DBG_EVENT,DBG_POST) 1825#define DBG_MISC_WATCH MISCDBG_CODE(DBG_EVENT,DBG_WATCH) 1826#define DBG_MISC_WAIT MISCDBG_CODE(DBG_EVENT,DBG_WAIT) 1827#define DBG_MISC_MOD MISCDBG_CODE(DBG_EVENT,DBG_MOD) 1828#define DBG_MISC_EWAKEUP MISCDBG_CODE(DBG_EVENT,DBG_EWAKEUP) 1829#define DBG_MISC_ENQUEUE MISCDBG_CODE(DBG_EVENT,DBG_ENQUEUE) 1830#define DBG_MISC_DEQUEUE MISCDBG_CODE(DBG_EVENT,DBG_DEQUEUE) 1831 1832 1833#define EVPROCDEQUE(p, evq) do { \ 1834 proc_lock(p); \ 1835 if (evq->ee_flags & EV_QUEUED) { \ 1836 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); \ 1837 evq->ee_flags &= ~EV_QUEUED; \ 1838 } \ 1839 proc_unlock(p); \ 1840} while (0); 1841 1842 1843/* 1844 * called upon socket close. deque and free all events for 1845 * the socket... socket must be locked by caller. 1846 */ 1847void 1848evsofree(struct socket *sp) 1849{ 1850 struct eventqelt *evq, *next; 1851 proc_t p; 1852 1853 if (sp == NULL) 1854 return; 1855 1856 for (evq = sp->so_evlist.tqh_first; evq != NULL; evq = next) { 1857 next = evq->ee_slist.tqe_next; 1858 p = evq->ee_proc; 1859 1860 if (evq->ee_flags & EV_QUEUED) { 1861 EVPROCDEQUE(p, evq); 1862 } 1863 TAILQ_REMOVE(&sp->so_evlist, evq, ee_slist); // remove from socket q 1864 FREE(evq, M_TEMP); 1865 } 1866} 1867 1868 1869/* 1870 * called upon pipe close. deque and free all events for 1871 * the pipe... pipe must be locked by caller 1872 */ 1873void 1874evpipefree(struct pipe *cpipe) 1875{ 1876 struct eventqelt *evq, *next; 1877 proc_t p; 1878 1879 for (evq = cpipe->pipe_evlist.tqh_first; evq != NULL; evq = next) { 1880 next = evq->ee_slist.tqe_next; 1881 p = evq->ee_proc; 1882 1883 EVPROCDEQUE(p, evq); 1884 1885 TAILQ_REMOVE(&cpipe->pipe_evlist, evq, ee_slist); // remove from pipe q 1886 FREE(evq, M_TEMP); 1887 } 1888} 1889 1890 1891/* 1892 * enqueue this event if it's not already queued. wakeup 1893 * the proc if we do queue this event to it... 1894 * entered with proc lock held... we drop it before 1895 * doing the wakeup and return in that state 1896 */ 1897static void 1898evprocenque(struct eventqelt *evq) 1899{ 1900 proc_t p; 1901 1902 assert(evq); 1903 p = evq->ee_proc; 1904 1905 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_START, (uint32_t)evq, evq->ee_flags, evq->ee_eventmask,0,0); 1906 1907 proc_lock(p); 1908 1909 if (evq->ee_flags & EV_QUEUED) { 1910 proc_unlock(p); 1911 1912 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0); 1913 return; 1914 } 1915 evq->ee_flags |= EV_QUEUED; 1916 1917 TAILQ_INSERT_TAIL(&p->p_evlist, evq, ee_plist); 1918 1919 proc_unlock(p); 1920 1921 wakeup(&p->p_evlist); 1922 1923 KERNEL_DEBUG(DBG_MISC_ENQUEUE|DBG_FUNC_END, 0,0,0,0,0); 1924} 1925 1926 1927/* 1928 * pipe lock must be taken by the caller 1929 */ 1930void 1931postpipeevent(struct pipe *pipep, int event) 1932{ 1933 int mask; 1934 struct eventqelt *evq; 1935 1936 if (pipep == NULL) 1937 return; 1938 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, event,0,0,1,0); 1939 1940 for (evq = pipep->pipe_evlist.tqh_first; 1941 evq != NULL; evq = evq->ee_slist.tqe_next) { 1942 1943 if (evq->ee_eventmask == 0) 1944 continue; 1945 mask = 0; 1946 1947 switch (event & (EV_RWBYTES | EV_RCLOSED | EV_WCLOSED)) { 1948 1949 case EV_RWBYTES: 1950 if ((evq->ee_eventmask & EV_RE) && pipep->pipe_buffer.cnt) { 1951 mask |= EV_RE; 1952 evq->ee_req.er_rcnt = pipep->pipe_buffer.cnt; 1953 } 1954 if ((evq->ee_eventmask & EV_WR) && 1955 (pipep->pipe_buffer.size - pipep->pipe_buffer.cnt) >= PIPE_BUF) { 1956 1957 if (pipep->pipe_state & PIPE_EOF) { 1958 mask |= EV_WR|EV_RESET; 1959 break; 1960 } 1961 mask |= EV_WR; 1962 evq->ee_req.er_wcnt = pipep->pipe_buffer.size - pipep->pipe_buffer.cnt; 1963 } 1964 break; 1965 1966 case EV_WCLOSED: 1967 case EV_RCLOSED: 1968 if ((evq->ee_eventmask & EV_RE)) { 1969 mask |= EV_RE|EV_RCLOSED; 1970 } 1971 if ((evq->ee_eventmask & EV_WR)) { 1972 mask |= EV_WR|EV_WCLOSED; 1973 } 1974 break; 1975 1976 default: 1977 return; 1978 } 1979 if (mask) { 1980 /* 1981 * disarm... postevents are nops until this event is 'read' via 1982 * waitevent and then re-armed via modwatch 1983 */ 1984 evq->ee_eventmask = 0; 1985 1986 /* 1987 * since events are disarmed until after the waitevent 1988 * the ee_req.er_xxxx fields can't change once we've 1989 * inserted this event into the proc queue... 1990 * therefore, the waitevent will see a 'consistent' 1991 * snapshot of the event, even though it won't hold 1992 * the pipe lock, and we're updating the event outside 1993 * of the proc lock, which it will hold 1994 */ 1995 evq->ee_req.er_eventbits |= mask; 1996 1997 KERNEL_DEBUG(DBG_MISC_POST, (uint32_t)evq, evq->ee_req.er_eventbits, mask, 1,0); 1998 1999 evprocenque(evq); 2000 } 2001 } 2002 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, 0,0,0,1,0); 2003} 2004 2005#if SOCKETS 2006/* 2007 * given either a sockbuf or a socket run down the 2008 * event list and queue ready events found... 2009 * the socket must be locked by the caller 2010 */ 2011void 2012postevent(struct socket *sp, struct sockbuf *sb, int event) 2013{ 2014 int mask; 2015 struct eventqelt *evq; 2016 struct tcpcb *tp; 2017 2018 if (sb) 2019 sp = sb->sb_so; 2020 if (sp == NULL) 2021 return; 2022 2023 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_START, (int)sp, event, 0, 0, 0); 2024 2025 for (evq = sp->so_evlist.tqh_first; 2026 evq != NULL; evq = evq->ee_slist.tqe_next) { 2027 2028 if (evq->ee_eventmask == 0) 2029 continue; 2030 mask = 0; 2031 2032 /* ready for reading: 2033 - byte cnt >= receive low water mark 2034 - read-half of conn closed 2035 - conn pending for listening sock 2036 - socket error pending 2037 2038 ready for writing 2039 - byte cnt avail >= send low water mark 2040 - write half of conn closed 2041 - socket error pending 2042 - non-blocking conn completed successfully 2043 2044 exception pending 2045 - out of band data 2046 - sock at out of band mark 2047 */ 2048 2049 switch (event & EV_DMASK) { 2050 2051 case EV_OOB: 2052 if ((evq->ee_eventmask & EV_EX)) { 2053 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) 2054 mask |= EV_EX|EV_OOB; 2055 } 2056 break; 2057 2058 case EV_RWBYTES|EV_OOB: 2059 if ((evq->ee_eventmask & EV_EX)) { 2060 if (sp->so_oobmark || ((sp->so_state & SS_RCVATMARK))) 2061 mask |= EV_EX|EV_OOB; 2062 } 2063 /* 2064 * fall into the next case 2065 */ 2066 case EV_RWBYTES: 2067 if ((evq->ee_eventmask & EV_RE) && soreadable(sp)) { 2068 if (sp->so_error) { 2069 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) { 2070 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) || 2071 (tp->t_state == TCPS_CLOSED)) { 2072 mask |= EV_RE|EV_RESET; 2073 break; 2074 } 2075 } 2076 } 2077 mask |= EV_RE; 2078 evq->ee_req.er_rcnt = sp->so_rcv.sb_cc; 2079 2080 if (sp->so_state & SS_CANTRCVMORE) { 2081 mask |= EV_FIN; 2082 break; 2083 } 2084 } 2085 if ((evq->ee_eventmask & EV_WR) && sowriteable(sp)) { 2086 if (sp->so_error) { 2087 if ((sp->so_type == SOCK_STREAM) && ((sp->so_error == ECONNREFUSED) || (sp->so_error == ECONNRESET))) { 2088 if ((sp->so_pcb == 0) || (((struct inpcb *)sp->so_pcb)->inp_state == INPCB_STATE_DEAD) || !(tp = sototcpcb(sp)) || 2089 (tp->t_state == TCPS_CLOSED)) { 2090 mask |= EV_WR|EV_RESET; 2091 break; 2092 } 2093 } 2094 } 2095 mask |= EV_WR; 2096 evq->ee_req.er_wcnt = sbspace(&sp->so_snd); 2097 } 2098 break; 2099 2100 case EV_RCONN: 2101 if ((evq->ee_eventmask & EV_RE)) { 2102 mask |= EV_RE|EV_RCONN; 2103 evq->ee_req.er_rcnt = sp->so_qlen + 1; // incl this one 2104 } 2105 break; 2106 2107 case EV_WCONN: 2108 if ((evq->ee_eventmask & EV_WR)) { 2109 mask |= EV_WR|EV_WCONN; 2110 } 2111 break; 2112 2113 case EV_RCLOSED: 2114 if ((evq->ee_eventmask & EV_RE)) { 2115 mask |= EV_RE|EV_RCLOSED; 2116 } 2117 break; 2118 2119 case EV_WCLOSED: 2120 if ((evq->ee_eventmask & EV_WR)) { 2121 mask |= EV_WR|EV_WCLOSED; 2122 } 2123 break; 2124 2125 case EV_FIN: 2126 if (evq->ee_eventmask & EV_RE) { 2127 mask |= EV_RE|EV_FIN; 2128 } 2129 break; 2130 2131 case EV_RESET: 2132 case EV_TIMEOUT: 2133 if (evq->ee_eventmask & EV_RE) { 2134 mask |= EV_RE | event; 2135 } 2136 if (evq->ee_eventmask & EV_WR) { 2137 mask |= EV_WR | event; 2138 } 2139 break; 2140 2141 default: 2142 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, -1, 0, 0, 0); 2143 return; 2144 } /* switch */ 2145 2146 KERNEL_DEBUG(DBG_MISC_POST, (int)evq, evq->ee_eventmask, evq->ee_req.er_eventbits, mask, 0); 2147 2148 if (mask) { 2149 /* 2150 * disarm... postevents are nops until this event is 'read' via 2151 * waitevent and then re-armed via modwatch 2152 */ 2153 evq->ee_eventmask = 0; 2154 2155 /* 2156 * since events are disarmed until after the waitevent 2157 * the ee_req.er_xxxx fields can't change once we've 2158 * inserted this event into the proc queue... 2159 * since waitevent can't see this event until we 2160 * enqueue it, waitevent will see a 'consistent' 2161 * snapshot of the event, even though it won't hold 2162 * the socket lock, and we're updating the event outside 2163 * of the proc lock, which it will hold 2164 */ 2165 evq->ee_req.er_eventbits |= mask; 2166 2167 evprocenque(evq); 2168 } 2169 } 2170 KERNEL_DEBUG(DBG_MISC_POST|DBG_FUNC_END, (int)sp, 0, 0, 0, 0); 2171} 2172#endif /* SOCKETS */ 2173 2174 2175/* 2176 * watchevent system call. user passes us an event to watch 2177 * for. we malloc an event object, initialize it, and queue 2178 * it to the open socket. when the event occurs, postevent() 2179 * will enque it back to our proc where we can retrieve it 2180 * via waitevent(). 2181 * 2182 * should this prevent duplicate events on same socket? 2183 * 2184 * Returns: 2185 * ENOMEM No memory for operation 2186 * copyin:EFAULT 2187 */ 2188int 2189watchevent(proc_t p, struct watchevent_args *uap, __unused int *retval) 2190{ 2191 struct eventqelt *evq = (struct eventqelt *)0; 2192 struct eventqelt *np = NULL; 2193 struct eventreq64 *erp; 2194 struct fileproc *fp = NULL; 2195 int error; 2196 2197 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_START, 0,0,0,0,0); 2198 2199 // get a qelt and fill with users req 2200 MALLOC(evq, struct eventqelt *, sizeof(struct eventqelt), M_TEMP, M_WAITOK); 2201 2202 if (evq == NULL) 2203 return (ENOMEM); 2204 erp = &evq->ee_req; 2205 2206 // get users request pkt 2207 2208 if (IS_64BIT_PROCESS(p)) { 2209 error = copyin(uap->u_req, (caddr_t)erp, sizeof(struct eventreq64)); 2210 } else { 2211 struct eventreq32 er32; 2212 2213 error = copyin(uap->u_req, (caddr_t)&er32, sizeof(struct eventreq32)); 2214 if (error == 0) { 2215 /* 2216 * the user only passes in the 2217 * er_type, er_handle and er_data... 2218 * the other fields are initialized 2219 * below, so don't bother to copy 2220 */ 2221 erp->er_type = er32.er_type; 2222 erp->er_handle = er32.er_handle; 2223 erp->er_data = (user_addr_t)er32.er_data; 2224 } 2225 } 2226 if (error) { 2227 FREE(evq, M_TEMP); 2228 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0); 2229 2230 return(error); 2231 } 2232 KERNEL_DEBUG(DBG_MISC_WATCH, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0); 2233 2234 // validate, freeing qelt if errors 2235 error = 0; 2236 proc_fdlock(p); 2237 2238 if (erp->er_type != EV_FD) { 2239 error = EINVAL; 2240 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) { 2241 error = EBADF; 2242#if SOCKETS 2243 } else if (fp->f_type == DTYPE_SOCKET) { 2244 socket_lock((struct socket *)fp->f_data, 1); 2245 np = ((struct socket *)fp->f_data)->so_evlist.tqh_first; 2246#endif /* SOCKETS */ 2247 } else if (fp->f_type == DTYPE_PIPE) { 2248 PIPE_LOCK((struct pipe *)fp->f_data); 2249 np = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first; 2250 } else { 2251 fp_drop(p, erp->er_handle, fp, 1); 2252 error = EINVAL; 2253 } 2254 proc_fdunlock(p); 2255 2256 if (error) { 2257 FREE(evq, M_TEMP); 2258 2259 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, error,0,0,0,0); 2260 return(error); 2261 } 2262 2263 /* 2264 * only allow one watch per file per proc 2265 */ 2266 for ( ; np != NULL; np = np->ee_slist.tqe_next) { 2267 if (np->ee_proc == p) { 2268#if SOCKETS 2269 if (fp->f_type == DTYPE_SOCKET) 2270 socket_unlock((struct socket *)fp->f_data, 1); 2271 else 2272#endif /* SOCKETS */ 2273 PIPE_UNLOCK((struct pipe *)fp->f_data); 2274 fp_drop(p, erp->er_handle, fp, 0); 2275 FREE(evq, M_TEMP); 2276 2277 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0); 2278 return(EINVAL); 2279 } 2280 } 2281 erp->er_ecnt = erp->er_rcnt = erp->er_wcnt = erp->er_eventbits = 0; 2282 evq->ee_proc = p; 2283 evq->ee_eventmask = uap->u_eventmask & EV_MASK; 2284 evq->ee_flags = 0; 2285 2286#if SOCKETS 2287 if (fp->f_type == DTYPE_SOCKET) { 2288 TAILQ_INSERT_TAIL(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist); 2289 postevent((struct socket *)fp->f_data, 0, EV_RWBYTES); // catch existing events 2290 2291 socket_unlock((struct socket *)fp->f_data, 1); 2292 } else 2293#endif /* SOCKETS */ 2294 { 2295 TAILQ_INSERT_TAIL(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist); 2296 postpipeevent((struct pipe *)fp->f_data, EV_RWBYTES); 2297 2298 PIPE_UNLOCK((struct pipe *)fp->f_data); 2299 } 2300 fp_drop_event(p, erp->er_handle, fp); 2301 2302 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, 0,0,0,0,0); 2303 return(0); 2304} 2305 2306 2307 2308/* 2309 * waitevent system call. 2310 * grabs the next waiting event for this proc and returns 2311 * it. if no events, user can request to sleep with timeout 2312 * or without or poll mode 2313 * ((tv != NULL && interval == 0) || tv == -1) 2314 */ 2315int 2316waitevent(proc_t p, struct waitevent_args *uap, int *retval) 2317{ 2318 int error = 0; 2319 struct eventqelt *evq; 2320 struct eventreq64 *erp; 2321 uint64_t abstime, interval; 2322 boolean_t fast_poll = FALSE; 2323 union { 2324 struct eventreq64 er64; 2325 struct eventreq32 er32; 2326 } uer; 2327 2328 interval = 0; 2329 2330 if (uap->tv) { 2331 struct timeval atv; 2332 /* 2333 * check for fast poll method 2334 */ 2335 if (IS_64BIT_PROCESS(p)) { 2336 if (uap->tv == (user_addr_t)-1) 2337 fast_poll = TRUE; 2338 } else if (uap->tv == (user_addr_t)((uint32_t)-1)) 2339 fast_poll = TRUE; 2340 2341 if (fast_poll == TRUE) { 2342 if (p->p_evlist.tqh_first == NULL) { 2343 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_NONE, -1,0,0,0,0); 2344 /* 2345 * poll failed 2346 */ 2347 *retval = 1; 2348 return (0); 2349 } 2350 proc_lock(p); 2351 goto retry; 2352 } 2353 error = copyin(uap->tv, (caddr_t)&atv, sizeof (atv)); 2354 2355 if (error) 2356 return(error); 2357 if (itimerfix(&atv)) { 2358 error = EINVAL; 2359 return(error); 2360 } 2361 interval = tvtoabstime(&atv); 2362 } 2363 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_START, 0,0,0,0,0); 2364 2365 proc_lock(p); 2366retry: 2367 if ((evq = p->p_evlist.tqh_first) != NULL) { 2368 /* 2369 * found one... make a local copy while it's still on the queue 2370 * to prevent it from changing while in the midst of copying 2371 * don't want to hold the proc lock across a copyout because 2372 * it might block on a page fault at the target in user space 2373 */ 2374 erp = &evq->ee_req; 2375 2376 if (IS_64BIT_PROCESS(p)) 2377 bcopy((caddr_t)erp, (caddr_t)&uer.er64, sizeof (struct eventreq64)); 2378 else { 2379 uer.er32.er_type = erp->er_type; 2380 uer.er32.er_handle = erp->er_handle; 2381 uer.er32.er_data = (uint32_t)erp->er_data; 2382 uer.er32.er_ecnt = erp->er_ecnt; 2383 uer.er32.er_rcnt = erp->er_rcnt; 2384 uer.er32.er_wcnt = erp->er_wcnt; 2385 uer.er32.er_eventbits = erp->er_eventbits; 2386 } 2387 TAILQ_REMOVE(&p->p_evlist, evq, ee_plist); 2388 2389 evq->ee_flags &= ~EV_QUEUED; 2390 2391 proc_unlock(p); 2392 2393 if (IS_64BIT_PROCESS(p)) 2394 error = copyout((caddr_t)&uer.er64, uap->u_req, sizeof(struct eventreq64)); 2395 else 2396 error = copyout((caddr_t)&uer.er32, uap->u_req, sizeof(struct eventreq32)); 2397 2398 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error, 2399 evq->ee_req.er_handle,evq->ee_req.er_eventbits,(uint32_t)evq,0); 2400 return (error); 2401 } 2402 else { 2403 if (uap->tv && interval == 0) { 2404 proc_unlock(p); 2405 *retval = 1; // poll failed 2406 2407 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, error,0,0,0,0); 2408 return (error); 2409 } 2410 if (interval != 0) 2411 clock_absolutetime_interval_to_deadline(interval, &abstime); 2412 else 2413 abstime = 0; 2414 2415 KERNEL_DEBUG(DBG_MISC_WAIT, 1,(uint32_t)&p->p_evlist,0,0,0); 2416 2417 error = msleep1(&p->p_evlist, &p->p_mlock, (PSOCK | PCATCH), "waitevent", abstime); 2418 2419 KERNEL_DEBUG(DBG_MISC_WAIT, 2,(uint32_t)&p->p_evlist,0,0,0); 2420 2421 if (error == 0) 2422 goto retry; 2423 if (error == ERESTART) 2424 error = EINTR; 2425 if (error == EWOULDBLOCK) { 2426 *retval = 1; 2427 error = 0; 2428 } 2429 } 2430 proc_unlock(p); 2431 2432 KERNEL_DEBUG(DBG_MISC_WAIT|DBG_FUNC_END, 0,0,0,0,0); 2433 return (error); 2434} 2435 2436 2437/* 2438 * modwatch system call. user passes in event to modify. 2439 * if we find it we reset the event bits and que/deque event 2440 * it needed. 2441 */ 2442int 2443modwatch(proc_t p, struct modwatch_args *uap, __unused int *retval) 2444{ 2445 struct eventreq64 er; 2446 struct eventreq64 *erp = &er; 2447 struct eventqelt *evq = NULL; /* protected by error return */ 2448 int error; 2449 struct fileproc *fp; 2450 int flag; 2451 2452 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_START, 0,0,0,0,0); 2453 2454 /* 2455 * get user's request pkt 2456 * just need the er_type and er_handle which sit above the 2457 * problematic er_data (32/64 issue)... so only copy in 2458 * those 2 fields 2459 */ 2460 if ((error = copyin(uap->u_req, (caddr_t)erp, sizeof(er.er_type) + sizeof(er.er_handle)))) { 2461 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0); 2462 return(error); 2463 } 2464 proc_fdlock(p); 2465 2466 if (erp->er_type != EV_FD) { 2467 error = EINVAL; 2468 } else if ((error = fp_lookup(p, erp->er_handle, &fp, 1)) != 0) { 2469 error = EBADF; 2470#if SOCKETS 2471 } else if (fp->f_type == DTYPE_SOCKET) { 2472 socket_lock((struct socket *)fp->f_data, 1); 2473 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first; 2474#endif /* SOCKETS */ 2475 } else if (fp->f_type == DTYPE_PIPE) { 2476 PIPE_LOCK((struct pipe *)fp->f_data); 2477 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first; 2478 } else { 2479 fp_drop(p, erp->er_handle, fp, 1); 2480 error = EINVAL; 2481 } 2482 2483 if (error) { 2484 proc_fdunlock(p); 2485 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, error,0,0,0,0); 2486 return(error); 2487 } 2488 2489 if ((uap->u_eventmask == EV_RM) && (fp->f_flags & FP_WAITEVENT)) { 2490 fp->f_flags &= ~FP_WAITEVENT; 2491 } 2492 proc_fdunlock(p); 2493 2494 // locate event if possible 2495 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) { 2496 if (evq->ee_proc == p) 2497 break; 2498 } 2499 if (evq == NULL) { 2500#if SOCKETS 2501 if (fp->f_type == DTYPE_SOCKET) 2502 socket_unlock((struct socket *)fp->f_data, 1); 2503 else 2504#endif /* SOCKETS */ 2505 PIPE_UNLOCK((struct pipe *)fp->f_data); 2506 fp_drop(p, erp->er_handle, fp, 0); 2507 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, EINVAL,0,0,0,0); 2508 return(EINVAL); 2509 } 2510 KERNEL_DEBUG(DBG_MISC_MOD, erp->er_handle,uap->u_eventmask,(uint32_t)evq,0,0); 2511 2512 if (uap->u_eventmask == EV_RM) { 2513 EVPROCDEQUE(p, evq); 2514 2515#if SOCKETS 2516 if (fp->f_type == DTYPE_SOCKET) { 2517 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist); 2518 socket_unlock((struct socket *)fp->f_data, 1); 2519 } else 2520#endif /* SOCKETS */ 2521 { 2522 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist); 2523 PIPE_UNLOCK((struct pipe *)fp->f_data); 2524 } 2525 fp_drop(p, erp->er_handle, fp, 0); 2526 FREE(evq, M_TEMP); 2527 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, 0,0,0,0,0); 2528 return(0); 2529 } 2530 switch (uap->u_eventmask & EV_MASK) { 2531 2532 case 0: 2533 flag = 0; 2534 break; 2535 2536 case EV_RE: 2537 case EV_WR: 2538 case EV_RE|EV_WR: 2539 flag = EV_RWBYTES; 2540 break; 2541 2542 case EV_EX: 2543 flag = EV_OOB; 2544 break; 2545 2546 case EV_EX|EV_RE: 2547 case EV_EX|EV_WR: 2548 case EV_EX|EV_RE|EV_WR: 2549 flag = EV_OOB|EV_RWBYTES; 2550 break; 2551 2552 default: 2553#if SOCKETS 2554 if (fp->f_type == DTYPE_SOCKET) 2555 socket_unlock((struct socket *)fp->f_data, 1); 2556 else 2557#endif /* SOCKETS */ 2558 PIPE_UNLOCK((struct pipe *)fp->f_data); 2559 fp_drop(p, erp->er_handle, fp, 0); 2560 KERNEL_DEBUG(DBG_MISC_WATCH|DBG_FUNC_END, EINVAL,0,0,0,0); 2561 return(EINVAL); 2562 } 2563 /* 2564 * since we're holding the socket/pipe lock, the event 2565 * cannot go from the unqueued state to the queued state 2566 * however, it can go from the queued state to the unqueued state 2567 * since that direction is protected by the proc_lock... 2568 * so do a quick check for EV_QUEUED w/o holding the proc lock 2569 * since by far the common case will be NOT EV_QUEUED, this saves 2570 * us taking the proc_lock the majority of the time 2571 */ 2572 if (evq->ee_flags & EV_QUEUED) { 2573 /* 2574 * EVPROCDEQUE will recheck the state after it grabs the proc_lock 2575 */ 2576 EVPROCDEQUE(p, evq); 2577 } 2578 /* 2579 * while the event is off the proc queue and 2580 * we're holding the socket/pipe lock 2581 * it's safe to update these fields... 2582 */ 2583 evq->ee_req.er_eventbits = 0; 2584 evq->ee_eventmask = uap->u_eventmask & EV_MASK; 2585 2586#if SOCKETS 2587 if (fp->f_type == DTYPE_SOCKET) { 2588 postevent((struct socket *)fp->f_data, 0, flag); 2589 socket_unlock((struct socket *)fp->f_data, 1); 2590 } else 2591#endif /* SOCKETS */ 2592 { 2593 postpipeevent((struct pipe *)fp->f_data, flag); 2594 PIPE_UNLOCK((struct pipe *)fp->f_data); 2595 } 2596 fp_drop(p, erp->er_handle, fp, 0); 2597 KERNEL_DEBUG(DBG_MISC_MOD|DBG_FUNC_END, evq->ee_req.er_handle,evq->ee_eventmask,(uint32_t)fp->f_data,flag,0); 2598 return(0); 2599} 2600 2601/* this routine is called from the close of fd with proc_fdlock held */ 2602int 2603waitevent_close(struct proc *p, struct fileproc *fp) 2604{ 2605 struct eventqelt *evq; 2606 2607 2608 fp->f_flags &= ~FP_WAITEVENT; 2609 2610#if SOCKETS 2611 if (fp->f_type == DTYPE_SOCKET) { 2612 socket_lock((struct socket *)fp->f_data, 1); 2613 evq = ((struct socket *)fp->f_data)->so_evlist.tqh_first; 2614 } else 2615#endif /* SOCKETS */ 2616 if (fp->f_type == DTYPE_PIPE) { 2617 PIPE_LOCK((struct pipe *)fp->f_data); 2618 evq = ((struct pipe *)fp->f_data)->pipe_evlist.tqh_first; 2619 } 2620 else { 2621 return(EINVAL); 2622 } 2623 proc_fdunlock(p); 2624 2625 2626 // locate event if possible 2627 for ( ; evq != NULL; evq = evq->ee_slist.tqe_next) { 2628 if (evq->ee_proc == p) 2629 break; 2630 } 2631 if (evq == NULL) { 2632#if SOCKETS 2633 if (fp->f_type == DTYPE_SOCKET) 2634 socket_unlock((struct socket *)fp->f_data, 1); 2635 else 2636#endif /* SOCKETS */ 2637 PIPE_UNLOCK((struct pipe *)fp->f_data); 2638 2639 proc_fdlock(p); 2640 2641 return(EINVAL); 2642 } 2643 EVPROCDEQUE(p, evq); 2644 2645#if SOCKETS 2646 if (fp->f_type == DTYPE_SOCKET) { 2647 TAILQ_REMOVE(&((struct socket *)fp->f_data)->so_evlist, evq, ee_slist); 2648 socket_unlock((struct socket *)fp->f_data, 1); 2649 } else 2650#endif /* SOCKETS */ 2651 { 2652 TAILQ_REMOVE(&((struct pipe *)fp->f_data)->pipe_evlist, evq, ee_slist); 2653 PIPE_UNLOCK((struct pipe *)fp->f_data); 2654 } 2655 FREE(evq, M_TEMP); 2656 2657 proc_fdlock(p); 2658 2659 return(0); 2660} 2661 2662 2663/* 2664 * gethostuuid 2665 * 2666 * Description: Get the host UUID from IOKit and return it to user space. 2667 * 2668 * Parameters: uuid_buf Pointer to buffer to receive UUID 2669 * timeout Timespec for timout 2670 * 2671 * Returns: 0 Success 2672 * EWOULDBLOCK Timeout is too short 2673 * copyout:EFAULT Bad user buffer 2674 * 2675 * Notes: A timeout seems redundant, since if it's tolerable to not 2676 * have a system UUID in hand, then why ask for one? 2677 */ 2678int 2679gethostuuid(struct proc *p, struct gethostuuid_args *uap, __unused register_t *retval) 2680{ 2681 kern_return_t kret; 2682 int error; 2683 mach_timespec_t mach_ts; /* for IOKit call */ 2684 __darwin_uuid_t uuid_kern; /* for IOKit call */ 2685 2686 /* Convert the 32/64 bit timespec into a mach_timespec_t */ 2687 if ( proc_is64bit(p) ) { 2688 struct user_timespec ts; 2689 error = copyin(uap->timeoutp, &ts, sizeof(ts)); 2690 if (error) 2691 return (error); 2692 mach_ts.tv_sec = ts.tv_sec; 2693 mach_ts.tv_nsec = ts.tv_nsec; 2694 } else { 2695 struct timespec ts; 2696 error = copyin(uap->timeoutp, &ts, sizeof(ts) ); 2697 if (error) 2698 return (error); 2699 mach_ts.tv_sec = ts.tv_sec; 2700 mach_ts.tv_nsec = ts.tv_nsec; 2701 } 2702 2703 /* Call IOKit with the stack buffer to get the UUID */ 2704 kret = IOBSDGetPlatformUUID(uuid_kern, mach_ts); 2705 2706 /* 2707 * If we get it, copy out the data to the user buffer; note that a 2708 * uuid_t is an array of characters, so this is size invariant for 2709 * 32 vs. 64 bit. 2710 */ 2711 if (kret == KERN_SUCCESS) { 2712 error = copyout(uuid_kern, uap->uuid_buf, sizeof(uuid_kern)); 2713 } else { 2714 error = EWOULDBLOCK; 2715 } 2716 2717 return (error); 2718} 2719