sys_generic.c revision 331722
1122205Sharti/*- 2122205Sharti * Copyright (c) 1982, 1986, 1989, 1993 3122205Sharti * The Regents of the University of California. All rights reserved. 4122205Sharti * (c) UNIX System Laboratories, Inc. 5122205Sharti * All or some portions of this file are derived from material licensed 6122205Sharti * to the University of California by American Telephone and Telegraph 7122205Sharti * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8122205Sharti * the permission of UNIX System Laboratories, Inc. 9122205Sharti * 10122205Sharti * Redistribution and use in source and binary forms, with or without 11122205Sharti * modification, are permitted provided that the following conditions 12122205Sharti * are met: 13122205Sharti * 1. Redistributions of source code must retain the above copyright 14122205Sharti * notice, this list of conditions and the following disclaimer. 15122205Sharti * 2. Redistributions in binary form must reproduce the above copyright 16122205Sharti * notice, this list of conditions and the following disclaimer in the 17122205Sharti * documentation and/or other materials provided with the distribution. 18122205Sharti * 4. Neither the name of the University nor the names of its contributors 19122205Sharti * may be used to endorse or promote products derived from this software 20122205Sharti * without specific prior written permission. 21122205Sharti * 22122205Sharti * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23122205Sharti * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24122205Sharti * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25122205Sharti * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26122205Sharti * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27122205Sharti * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28122205Sharti * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29133492Sharti * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30122205Sharti * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31122205Sharti * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32122205Sharti * SUCH DAMAGE. 33122205Sharti * 34122205Sharti * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 35122205Sharti */ 36122205Sharti 37122205Sharti#include <sys/cdefs.h> 38122205Sharti__FBSDID("$FreeBSD: stable/11/sys/kern/sys_generic.c 331722 2018-03-29 02:50:57Z eadler $"); 39122205Sharti 40122205Sharti#include "opt_capsicum.h" 41122205Sharti#include "opt_compat.h" 42122205Sharti#include "opt_ktrace.h" 43122205Sharti 44122205Sharti#include <sys/param.h> 45122205Sharti#include <sys/systm.h> 46122205Sharti#include <sys/sysproto.h> 47122205Sharti#include <sys/capsicum.h> 48122205Sharti#include <sys/filedesc.h> 49122205Sharti#include <sys/filio.h> 50122205Sharti#include <sys/fcntl.h> 51122205Sharti#include <sys/file.h> 52122205Sharti#include <sys/lock.h> 53122205Sharti#include <sys/proc.h> 54133492Sharti#include <sys/signalvar.h> 55122205Sharti#include <sys/socketvar.h> 56122205Sharti#include <sys/uio.h> 57122205Sharti#include <sys/kernel.h> 58122205Sharti#include <sys/ktr.h> 59122205Sharti#include <sys/limits.h> 60122205Sharti#include <sys/malloc.h> 61122205Sharti#include <sys/poll.h> 62122205Sharti#include <sys/resourcevar.h> 63122205Sharti#include <sys/selinfo.h> 64122205Sharti#include <sys/sleepqueue.h> 65122205Sharti#include <sys/syscallsubr.h> 66122205Sharti#include <sys/sysctl.h> 67122205Sharti#include <sys/sysent.h> 68133492Sharti#include <sys/vnode.h> 69133492Sharti#include <sys/bio.h> 70133492Sharti#include <sys/buf.h> 71133492Sharti#include <sys/condvar.h> 72133492Sharti#ifdef KTRACE 73133492Sharti#include <sys/ktrace.h> 74133492Sharti#endif 75133492Sharti 76133492Sharti#include <security/audit/audit.h> 77133492Sharti 78133492Sharti/* 79133492Sharti * The following macro defines how many bytes will be allocated from 80133492Sharti * the stack instead of memory allocated when passing the IOCTL data 81133492Sharti * structures from userspace and to the kernel. Some IOCTLs having 82133492Sharti * small data structures are used very frequently and this small 83133492Sharti * buffer on the stack gives a significant speedup improvement for 84133492Sharti * those requests. The value of this define should be greater or equal 85133492Sharti * to 64 bytes and should also be power of two. The data structure is 86133492Sharti * currently hard-aligned to a 8-byte boundary on the stack. This 87133492Sharti * should currently be sufficient for all supported platforms. 88133492Sharti */ 89122205Sharti#define SYS_IOCTL_SMALL_SIZE 128 /* bytes */ 90122205Sharti#define SYS_IOCTL_SMALL_ALIGN 8 /* bytes */ 91122205Sharti 92122205Sharti#ifdef __LP64__ 93122205Shartistatic int iosize_max_clamp = 0; 94122205ShartiSYSCTL_INT(_debug, OID_AUTO, iosize_max_clamp, CTLFLAG_RW, 95122205Sharti &iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX"); 96122205Shartistatic int devfs_iosize_max_clamp = 1; 97122205ShartiSYSCTL_INT(_debug, OID_AUTO, devfs_iosize_max_clamp, CTLFLAG_RW, 98122205Sharti &devfs_iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX for devices"); 99122205Sharti#endif 100122205Sharti 101122205Sharti/* 102122205Sharti * Assert that the return value of read(2) and write(2) syscalls fits 103122205Sharti * into a register. If not, an architecture will need to provide the 104122205Sharti * usermode wrappers to reconstruct the result. 105122205Sharti */ 106122205ShartiCTASSERT(sizeof(register_t) >= sizeof(size_t)); 107122205Sharti 108122205Shartistatic MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 109122205Shartistatic MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 110122205ShartiMALLOC_DEFINE(M_IOV, "iov", "large iov's"); 111122205Sharti 112122205Shartistatic int pollout(struct thread *, struct pollfd *, struct pollfd *, 113122205Sharti u_int); 114122205Shartistatic int pollscan(struct thread *, struct pollfd *, u_int); 115122205Shartistatic int pollrescan(struct thread *); 116122205Shartistatic int selscan(struct thread *, fd_mask **, fd_mask **, int); 117122205Shartistatic int selrescan(struct thread *, fd_mask **, fd_mask **); 118122205Shartistatic void selfdalloc(struct thread *, void *); 119122205Shartistatic void selfdfree(struct seltd *, struct selfd *); 120122205Shartistatic int dofileread(struct thread *, int, struct file *, struct uio *, 121122205Sharti off_t, int); 122122205Shartistatic int dofilewrite(struct thread *, int, struct file *, struct uio *, 123122205Sharti off_t, int); 124122205Shartistatic void doselwakeup(struct selinfo *, int); 125122205Shartistatic void seltdinit(struct thread *); 126122205Shartistatic int seltdwait(struct thread *, sbintime_t, sbintime_t); 127122205Shartistatic void seltdclear(struct thread *); 128122205Sharti 129122205Sharti/* 130122205Sharti * One seltd per-thread allocated on demand as needed. 131122205Sharti * 132122205Sharti * t - protected by st_mtx 133122205Sharti * k - Only accessed by curthread or read-only 134122205Sharti */ 135122205Shartistruct seltd { 136122205Sharti STAILQ_HEAD(, selfd) st_selq; /* (k) List of selfds. */ 137122205Sharti struct selfd *st_free1; /* (k) free fd for read set. */ 138122205Sharti struct selfd *st_free2; /* (k) free fd for write set. */ 139122205Sharti struct mtx st_mtx; /* Protects struct seltd */ 140122205Sharti struct cv st_wait; /* (t) Wait channel. */ 141122205Sharti int st_flags; /* (t) SELTD_ flags. */ 142122205Sharti}; 143122205Sharti 144122205Sharti#define SELTD_PENDING 0x0001 /* We have pending events. */ 145122205Sharti#define SELTD_RESCAN 0x0002 /* Doing a rescan. */ 146122205Sharti 147131826Sharti/* 148122205Sharti * One selfd allocated per-thread per-file-descriptor. 149122205Sharti * f - protected by sf_mtx 150122205Sharti */ 151122205Shartistruct selfd { 152122205Sharti STAILQ_ENTRY(selfd) sf_link; /* (k) fds owned by this td. */ 153122205Sharti TAILQ_ENTRY(selfd) sf_threads; /* (f) fds on this selinfo. */ 154122205Sharti struct selinfo *sf_si; /* (f) selinfo when linked. */ 155122205Sharti struct mtx *sf_mtx; /* Pointer to selinfo mtx. */ 156122205Sharti struct seltd *sf_td; /* (k) owning seltd. */ 157122205Sharti void *sf_cookie; /* (k) fd or pollfd. */ 158122205Sharti u_int sf_refs; 159122205Sharti}; 160122205Sharti 161122205Shartistatic uma_zone_t selfd_zone; 162122205Shartistatic struct mtx_pool *mtxpool_select; 163122205Sharti 164122205Sharti#ifdef __LP64__ 165122205Shartisize_t 166122205Shartidevfs_iosize_max(void) 167122205Sharti{ 168122205Sharti 169122205Sharti return (devfs_iosize_max_clamp || SV_CURPROC_FLAG(SV_ILP32) ? 170122205Sharti INT_MAX : SSIZE_MAX); 171122205Sharti} 172122205Sharti 173122205Shartisize_t 174122205Shartiiosize_max(void) 175122205Sharti{ 176122205Sharti 177122205Sharti return (iosize_max_clamp || SV_CURPROC_FLAG(SV_ILP32) ? 178122205Sharti INT_MAX : SSIZE_MAX); 179122205Sharti} 180122205Sharti#endif 181122205Sharti 182122205Sharti#ifndef _SYS_SYSPROTO_H_ 183122205Shartistruct read_args { 184122205Sharti int fd; 185122205Sharti void *buf; 186122205Sharti size_t nbyte; 187122205Sharti}; 188131826Sharti#endif 189122205Shartiint 190122205Shartisys_read(td, uap) 191131826Sharti struct thread *td; 192122205Sharti struct read_args *uap; 193122205Sharti{ 194122205Sharti struct uio auio; 195122205Sharti struct iovec aiov; 196122205Sharti int error; 197122205Sharti 198122205Sharti if (uap->nbyte > IOSIZE_MAX) 199122205Sharti return (EINVAL); 200122205Sharti aiov.iov_base = uap->buf; 201122205Sharti aiov.iov_len = uap->nbyte; 202122205Sharti auio.uio_iov = &aiov; 203122205Sharti auio.uio_iovcnt = 1; 204122205Sharti auio.uio_resid = uap->nbyte; 205122205Sharti auio.uio_segflg = UIO_USERSPACE; 206122205Sharti error = kern_readv(td, uap->fd, &auio); 207122205Sharti return(error); 208122205Sharti} 209122205Sharti 210122205Sharti/* 211122205Sharti * Positioned read system call 212122205Sharti */ 213122205Sharti#ifndef _SYS_SYSPROTO_H_ 214122205Shartistruct pread_args { 215122205Sharti int fd; 216122205Sharti void *buf; 217122205Sharti size_t nbyte; 218122205Sharti int pad; 219122205Sharti off_t offset; 220122205Sharti}; 221122205Sharti#endif 222122205Shartiint 223122205Shartisys_pread(struct thread *td, struct pread_args *uap) 224122205Sharti{ 225122205Sharti 226122205Sharti return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); 227122205Sharti} 228122205Sharti 229122205Shartiint 230122205Shartikern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset) 231122205Sharti{ 232122205Sharti struct uio auio; 233122205Sharti struct iovec aiov; 234122205Sharti int error; 235122205Sharti 236122205Sharti if (nbyte > IOSIZE_MAX) 237122205Sharti return (EINVAL); 238122205Sharti aiov.iov_base = buf; 239122205Sharti aiov.iov_len = nbyte; 240122205Sharti auio.uio_iov = &aiov; 241122205Sharti auio.uio_iovcnt = 1; 242122205Sharti auio.uio_resid = nbyte; 243122205Sharti auio.uio_segflg = UIO_USERSPACE; 244122205Sharti error = kern_preadv(td, fd, &auio, offset); 245122205Sharti return (error); 246122205Sharti} 247122205Sharti 248122205Sharti#if defined(COMPAT_FREEBSD6) 249122205Shartiint 250122205Shartifreebsd6_pread(struct thread *td, struct freebsd6_pread_args *uap) 251122205Sharti{ 252122205Sharti 253122205Sharti return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); 254122205Sharti} 255122205Sharti#endif 256122205Sharti 257131826Sharti/* 258122205Sharti * Scatter read system call. 259122205Sharti */ 260122205Sharti#ifndef _SYS_SYSPROTO_H_ 261122205Shartistruct readv_args { 262122205Sharti int fd; 263122205Sharti struct iovec *iovp; 264122205Sharti u_int iovcnt; 265122205Sharti}; 266122205Sharti#endif 267122205Shartiint 268122205Shartisys_readv(struct thread *td, struct readv_args *uap) 269122205Sharti{ 270122205Sharti struct uio *auio; 271122205Sharti int error; 272122205Sharti 273122205Sharti error = copyinuio(uap->iovp, uap->iovcnt, &auio); 274122205Sharti if (error) 275122205Sharti return (error); 276122205Sharti error = kern_readv(td, uap->fd, auio); 277122205Sharti free(auio, M_IOV); 278122205Sharti return (error); 279122205Sharti} 280122205Sharti 281122205Shartiint 282122205Shartikern_readv(struct thread *td, int fd, struct uio *auio) 283122205Sharti{ 284122205Sharti struct file *fp; 285122205Sharti cap_rights_t rights; 286122205Sharti int error; 287122205Sharti 288122205Sharti error = fget_read(td, fd, cap_rights_init(&rights, CAP_READ), &fp); 289122205Sharti if (error) 290122205Sharti return (error); 291122205Sharti error = dofileread(td, fd, fp, auio, (off_t)-1, 0); 292122205Sharti fdrop(fp, td); 293122205Sharti return (error); 294122205Sharti} 295122205Sharti 296122205Sharti/* 297122205Sharti * Scatter positioned read system call. 298122205Sharti */ 299122205Sharti#ifndef _SYS_SYSPROTO_H_ 300122205Shartistruct preadv_args { 301122205Sharti int fd; 302122205Sharti struct iovec *iovp; 303122205Sharti u_int iovcnt; 304122205Sharti off_t offset; 305122205Sharti}; 306122205Sharti#endif 307122205Shartiint 308122205Shartisys_preadv(struct thread *td, struct preadv_args *uap) 309122205Sharti{ 310122205Sharti struct uio *auio; 311122205Sharti int error; 312122205Sharti 313122205Sharti error = copyinuio(uap->iovp, uap->iovcnt, &auio); 314122205Sharti if (error) 315122205Sharti return (error); 316122205Sharti error = kern_preadv(td, uap->fd, auio, uap->offset); 317122205Sharti free(auio, M_IOV); 318122205Sharti return (error); 319122205Sharti} 320122205Sharti 321122205Shartiint 322122205Shartikern_preadv(td, fd, auio, offset) 323122205Sharti struct thread *td; 324122205Sharti int fd; 325122205Sharti struct uio *auio; 326122205Sharti off_t offset; 327122205Sharti{ 328122205Sharti struct file *fp; 329122205Sharti cap_rights_t rights; 330122205Sharti int error; 331122205Sharti 332122205Sharti error = fget_read(td, fd, cap_rights_init(&rights, CAP_PREAD), &fp); 333122205Sharti if (error) 334122205Sharti return (error); 335122205Sharti if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) 336122205Sharti error = ESPIPE; 337122205Sharti else if (offset < 0 && 338122205Sharti (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) 339122205Sharti error = EINVAL; 340122205Sharti else 341122205Sharti error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); 342122205Sharti fdrop(fp, td); 343122205Sharti return (error); 344122205Sharti} 345122205Sharti 346122205Sharti/* 347122205Sharti * Common code for readv and preadv that reads data in 348122205Sharti * from a file using the passed in uio, offset, and flags. 349122205Sharti */ 350122205Shartistatic int 351122205Shartidofileread(td, fd, fp, auio, offset, flags) 352122205Sharti struct thread *td; 353122205Sharti int fd; 354122205Sharti struct file *fp; 355122205Sharti struct uio *auio; 356122205Sharti off_t offset; 357122205Sharti int flags; 358122205Sharti{ 359122205Sharti ssize_t cnt; 360122205Sharti int error; 361122205Sharti#ifdef KTRACE 362122205Sharti struct uio *ktruio = NULL; 363122205Sharti#endif 364122205Sharti 365122205Sharti AUDIT_ARG_FD(fd); 366122205Sharti 367122205Sharti /* Finish zero length reads right here */ 368122205Sharti if (auio->uio_resid == 0) { 369122205Sharti td->td_retval[0] = 0; 370122205Sharti return(0); 371122205Sharti } 372122205Sharti auio->uio_rw = UIO_READ; 373122205Sharti auio->uio_offset = offset; 374122205Sharti auio->uio_td = td; 375122205Sharti#ifdef KTRACE 376122205Sharti if (KTRPOINT(td, KTR_GENIO)) 377122205Sharti ktruio = cloneuio(auio); 378122205Sharti#endif 379122205Sharti cnt = auio->uio_resid; 380122205Sharti if ((error = fo_read(fp, auio, td->td_ucred, flags, td))) { 381122205Sharti if (auio->uio_resid != cnt && (error == ERESTART || 382122205Sharti error == EINTR || error == EWOULDBLOCK)) 383122205Sharti error = 0; 384122205Sharti } 385122205Sharti cnt -= auio->uio_resid; 386122205Sharti#ifdef KTRACE 387122205Sharti if (ktruio != NULL) { 388122205Sharti ktruio->uio_resid = cnt; 389122205Sharti ktrgenio(fd, UIO_READ, ktruio, error); 390122205Sharti } 391122205Sharti#endif 392122205Sharti td->td_retval[0] = cnt; 393122205Sharti return (error); 394122205Sharti} 395122205Sharti 396122205Sharti#ifndef _SYS_SYSPROTO_H_ 397122205Shartistruct write_args { 398122205Sharti int fd; 399122205Sharti const void *buf; 400122205Sharti size_t nbyte; 401213789Srpaulo}; 402122205Sharti#endif 403122205Shartiint 404122205Shartisys_write(td, uap) 405122205Sharti struct thread *td; 406122205Sharti struct write_args *uap; 407122205Sharti{ 408122205Sharti struct uio auio; 409122205Sharti struct iovec aiov; 410122205Sharti int error; 411122205Sharti 412122205Sharti if (uap->nbyte > IOSIZE_MAX) 413122205Sharti return (EINVAL); 414122205Sharti aiov.iov_base = (void *)(uintptr_t)uap->buf; 415122205Sharti aiov.iov_len = uap->nbyte; 416122205Sharti auio.uio_iov = &aiov; 417122205Sharti auio.uio_iovcnt = 1; 418122205Sharti auio.uio_resid = uap->nbyte; 419122205Sharti auio.uio_segflg = UIO_USERSPACE; 420122205Sharti error = kern_writev(td, uap->fd, &auio); 421122205Sharti return(error); 422122205Sharti} 423122205Sharti 424122205Sharti/* 425122205Sharti * Positioned write system call. 426122205Sharti */ 427122205Sharti#ifndef _SYS_SYSPROTO_H_ 428122205Shartistruct pwrite_args { 429122205Sharti int fd; 430122205Sharti const void *buf; 431122205Sharti size_t nbyte; 432122205Sharti int pad; 433122205Sharti off_t offset; 434122205Sharti}; 435122205Sharti#endif 436122205Shartiint 437122205Shartisys_pwrite(struct thread *td, struct pwrite_args *uap) 438122205Sharti{ 439122205Sharti 440122205Sharti return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); 441122205Sharti} 442122205Sharti 443122205Shartiint 444122205Shartikern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, 445122205Sharti off_t offset) 446122205Sharti{ 447122205Sharti struct uio auio; 448122205Sharti struct iovec aiov; 449122205Sharti int error; 450122205Sharti 451122205Sharti if (nbyte > IOSIZE_MAX) 452122205Sharti return (EINVAL); 453122205Sharti aiov.iov_base = (void *)(uintptr_t)buf; 454122205Sharti aiov.iov_len = nbyte; 455122205Sharti auio.uio_iov = &aiov; 456122205Sharti auio.uio_iovcnt = 1; 457122205Sharti auio.uio_resid = nbyte; 458122205Sharti auio.uio_segflg = UIO_USERSPACE; 459122205Sharti error = kern_pwritev(td, fd, &auio, offset); 460122205Sharti return(error); 461122205Sharti} 462122205Sharti 463122205Sharti#if defined(COMPAT_FREEBSD6) 464122205Shartiint 465122205Shartifreebsd6_pwrite(struct thread *td, struct freebsd6_pwrite_args *uap) 466122205Sharti{ 467122205Sharti 468122205Sharti return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); 469131826Sharti} 470122205Sharti#endif 471122205Sharti 472122205Sharti/* 473122205Sharti * Gather write system call. 474122205Sharti */ 475122205Sharti#ifndef _SYS_SYSPROTO_H_ 476122205Shartistruct writev_args { 477122205Sharti int fd; 478122205Sharti struct iovec *iovp; 479122205Sharti u_int iovcnt; 480122205Sharti}; 481122205Sharti#endif 482122205Shartiint 483122205Shartisys_writev(struct thread *td, struct writev_args *uap) 484122205Sharti{ 485122205Sharti struct uio *auio; 486122205Sharti int error; 487122205Sharti 488122205Sharti error = copyinuio(uap->iovp, uap->iovcnt, &auio); 489122205Sharti if (error) 490122205Sharti return (error); 491122205Sharti error = kern_writev(td, uap->fd, auio); 492122205Sharti free(auio, M_IOV); 493122205Sharti return (error); 494122205Sharti} 495122205Sharti 496122205Shartiint 497122205Shartikern_writev(struct thread *td, int fd, struct uio *auio) 498122205Sharti{ 499122205Sharti struct file *fp; 500122205Sharti cap_rights_t rights; 501122205Sharti int error; 502122205Sharti 503122205Sharti error = fget_write(td, fd, cap_rights_init(&rights, CAP_WRITE), &fp); 504122205Sharti if (error) 505122205Sharti return (error); 506122205Sharti error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0); 507122205Sharti fdrop(fp, td); 508122205Sharti return (error); 509122205Sharti} 510122205Sharti 511122205Sharti/* 512122205Sharti * Gather positioned write system call. 513122205Sharti */ 514122205Sharti#ifndef _SYS_SYSPROTO_H_ 515122205Shartistruct pwritev_args { 516122205Sharti int fd; 517122205Sharti struct iovec *iovp; 518122205Sharti u_int iovcnt; 519122205Sharti off_t offset; 520122205Sharti}; 521122205Sharti#endif 522122205Shartiint 523122205Shartisys_pwritev(struct thread *td, struct pwritev_args *uap) 524122205Sharti{ 525122205Sharti struct uio *auio; 526122205Sharti int error; 527122205Sharti 528122205Sharti error = copyinuio(uap->iovp, uap->iovcnt, &auio); 529122205Sharti if (error) 530122205Sharti return (error); 531122205Sharti error = kern_pwritev(td, uap->fd, auio, uap->offset); 532122205Sharti free(auio, M_IOV); 533122205Sharti return (error); 534122205Sharti} 535122205Sharti 536122205Shartiint 537122205Shartikern_pwritev(td, fd, auio, offset) 538122205Sharti struct thread *td; 539122205Sharti struct uio *auio; 540122205Sharti int fd; 541122205Sharti off_t offset; 542122205Sharti{ 543122205Sharti struct file *fp; 544122205Sharti cap_rights_t rights; 545122205Sharti int error; 546122205Sharti 547122205Sharti error = fget_write(td, fd, cap_rights_init(&rights, CAP_PWRITE), &fp); 548122205Sharti if (error) 549122205Sharti return (error); 550122205Sharti if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) 551122205Sharti error = ESPIPE; 552122205Sharti else if (offset < 0 && 553122205Sharti (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) 554122205Sharti error = EINVAL; 555122205Sharti else 556122205Sharti error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); 557122205Sharti fdrop(fp, td); 558122205Sharti return (error); 559122205Sharti} 560122205Sharti 561122205Sharti/* 562122205Sharti * Common code for writev and pwritev that writes data to 563122205Sharti * a file using the passed in uio, offset, and flags. 564122205Sharti */ 565122205Shartistatic int 566122205Shartidofilewrite(td, fd, fp, auio, offset, flags) 567122205Sharti struct thread *td; 568122205Sharti int fd; 569122205Sharti struct file *fp; 570122205Sharti struct uio *auio; 571122205Sharti off_t offset; 572122205Sharti int flags; 573122205Sharti{ 574122205Sharti ssize_t cnt; 575122205Sharti int error; 576122205Sharti#ifdef KTRACE 577122205Sharti struct uio *ktruio = NULL; 578122205Sharti#endif 579122205Sharti 580122205Sharti AUDIT_ARG_FD(fd); 581213789Srpaulo auio->uio_rw = UIO_WRITE; 582122205Sharti auio->uio_td = td; 583122205Sharti auio->uio_offset = offset; 584122205Sharti#ifdef KTRACE 585122205Sharti if (KTRPOINT(td, KTR_GENIO)) 586122205Sharti ktruio = cloneuio(auio); 587122205Sharti#endif 588122205Sharti cnt = auio->uio_resid; 589122205Sharti if (fp->f_type == DTYPE_VNODE && 590122205Sharti (fp->f_vnread_flags & FDEVFS_VNODE) == 0) 591122205Sharti bwillwrite(); 592122205Sharti if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) { 593122205Sharti if (auio->uio_resid != cnt && (error == ERESTART || 594122205Sharti error == EINTR || error == EWOULDBLOCK)) 595122205Sharti error = 0; 596122205Sharti /* Socket layer is responsible for issuing SIGPIPE. */ 597122205Sharti if (fp->f_type != DTYPE_SOCKET && error == EPIPE) { 598122205Sharti PROC_LOCK(td->td_proc); 599122205Sharti tdsignal(td, SIGPIPE); 600122205Sharti PROC_UNLOCK(td->td_proc); 601122205Sharti } 602122205Sharti } 603122205Sharti cnt -= auio->uio_resid; 604122205Sharti#ifdef KTRACE 605122205Sharti if (ktruio != NULL) { 606122205Sharti ktruio->uio_resid = cnt; 607122205Sharti ktrgenio(fd, UIO_WRITE, ktruio, error); 608122205Sharti } 609122205Sharti#endif 610122205Sharti td->td_retval[0] = cnt; 611122205Sharti return (error); 612122205Sharti} 613122205Sharti 614122205Sharti/* 615122205Sharti * Truncate a file given a file descriptor. 616122205Sharti * 617122205Sharti * Can't use fget_write() here, since must return EINVAL and not EBADF if the 618122205Sharti * descriptor isn't writable. 619122205Sharti */ 620122205Shartiint 621122205Shartikern_ftruncate(td, fd, length) 622122205Sharti struct thread *td; 623122205Sharti int fd; 624122205Sharti off_t length; 625122205Sharti{ 626122205Sharti struct file *fp; 627122205Sharti cap_rights_t rights; 628122205Sharti int error; 629122205Sharti 630122205Sharti AUDIT_ARG_FD(fd); 631122205Sharti if (length < 0) 632122205Sharti return (EINVAL); 633122205Sharti error = fget(td, fd, cap_rights_init(&rights, CAP_FTRUNCATE), &fp); 634122205Sharti if (error) 635122205Sharti return (error); 636122205Sharti AUDIT_ARG_FILE(td->td_proc, fp); 637122205Sharti if (!(fp->f_flag & FWRITE)) { 638122205Sharti fdrop(fp, td); 639122205Sharti return (EINVAL); 640122205Sharti } 641122205Sharti error = fo_truncate(fp, length, td->td_ucred, td); 642122205Sharti fdrop(fp, td); 643122205Sharti return (error); 644122205Sharti} 645122205Sharti 646122205Sharti#ifndef _SYS_SYSPROTO_H_ 647122205Shartistruct ftruncate_args { 648122205Sharti int fd; 649122205Sharti int pad; 650122205Sharti off_t length; 651122205Sharti}; 652122205Sharti#endif 653122205Shartiint 654122205Shartisys_ftruncate(td, uap) 655122205Sharti struct thread *td; 656122205Sharti struct ftruncate_args *uap; 657122205Sharti{ 658122205Sharti 659122205Sharti return (kern_ftruncate(td, uap->fd, uap->length)); 660122205Sharti} 661122205Sharti 662122205Sharti#if defined(COMPAT_43) 663122205Sharti#ifndef _SYS_SYSPROTO_H_ 664122205Shartistruct oftruncate_args { 665122205Sharti int fd; 666122205Sharti long length; 667122205Sharti}; 668122205Sharti#endif 669122205Shartiint 670122205Shartioftruncate(td, uap) 671122205Sharti struct thread *td; 672122205Sharti struct oftruncate_args *uap; 673122205Sharti{ 674122205Sharti 675122205Sharti return (kern_ftruncate(td, uap->fd, uap->length)); 676122205Sharti} 677122205Sharti#endif /* COMPAT_43 */ 678122205Sharti 679122205Sharti#ifndef _SYS_SYSPROTO_H_ 680122205Shartistruct ioctl_args { 681122205Sharti int fd; 682122205Sharti u_long com; 683122205Sharti caddr_t data; 684122205Sharti}; 685122205Sharti#endif 686122205Sharti/* ARGSUSED */ 687122205Shartiint 688122205Shartisys_ioctl(struct thread *td, struct ioctl_args *uap) 689122205Sharti{ 690122205Sharti u_char smalldata[SYS_IOCTL_SMALL_SIZE] __aligned(SYS_IOCTL_SMALL_ALIGN); 691122205Sharti u_long com; 692122205Sharti int arg, error; 693122205Sharti u_int size; 694122205Sharti caddr_t data; 695122205Sharti 696122205Sharti if (uap->com > 0xffffffff) { 697122205Sharti printf( 698122205Sharti "WARNING pid %d (%s): ioctl sign-extension ioctl %lx\n", 699122205Sharti td->td_proc->p_pid, td->td_name, uap->com); 700122205Sharti uap->com &= 0xffffffff; 701122205Sharti } 702122205Sharti com = uap->com; 703122205Sharti 704122205Sharti /* 705122205Sharti * Interpret high order word to find amount of data to be 706122205Sharti * copied to/from the user's address space. 707122205Sharti */ 708122205Sharti size = IOCPARM_LEN(com); 709122205Sharti if ((size > IOCPARM_MAX) || 710122205Sharti ((com & (IOC_VOID | IOC_IN | IOC_OUT)) == 0) || 711122205Sharti#if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43) 712122205Sharti ((com & IOC_OUT) && size == 0) || 713122205Sharti#else 714122205Sharti ((com & (IOC_IN | IOC_OUT)) && size == 0) || 715122205Sharti#endif 716122205Sharti ((com & IOC_VOID) && size > 0 && size != sizeof(int))) 717122205Sharti return (ENOTTY); 718122205Sharti 719122205Sharti if (size > 0) { 720122205Sharti if (com & IOC_VOID) { 721122205Sharti /* Integer argument. */ 722122205Sharti arg = (intptr_t)uap->data; 723122205Sharti data = (void *)&arg; 724122205Sharti size = 0; 725122205Sharti } else { 726122205Sharti if (size > SYS_IOCTL_SMALL_SIZE) 727122205Sharti data = malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 728122205Sharti else 729122205Sharti data = smalldata; 730122205Sharti } 731122205Sharti } else 732122205Sharti data = (void *)&uap->data; 733122205Sharti if (com & IOC_IN) { 734122205Sharti error = copyin(uap->data, data, (u_int)size); 735122205Sharti if (error != 0) 736122205Sharti goto out; 737122205Sharti } else if (com & IOC_OUT) { 738122205Sharti /* 739122205Sharti * Zero the buffer so the user always 740122205Sharti * gets back something deterministic. 741122205Sharti */ 742122205Sharti bzero(data, size); 743122205Sharti } 744122205Sharti 745122205Sharti error = kern_ioctl(td, uap->fd, com, data); 746122205Sharti 747122205Sharti if (error == 0 && (com & IOC_OUT)) 748122205Sharti error = copyout(data, uap->data, (u_int)size); 749122205Sharti 750122205Shartiout: 751122205Sharti if (size > SYS_IOCTL_SMALL_SIZE) 752122205Sharti free(data, M_IOCTLOPS); 753122205Sharti return (error); 754122205Sharti} 755122205Sharti 756122205Shartiint 757122205Shartikern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) 758122205Sharti{ 759122205Sharti struct file *fp; 760122205Sharti struct filedesc *fdp; 761122205Sharti#ifndef CAPABILITIES 762122205Sharti cap_rights_t rights; 763122205Sharti#endif 764122205Sharti int error, tmp, locked; 765122205Sharti 766213789Srpaulo AUDIT_ARG_FD(fd); 767122205Sharti AUDIT_ARG_CMD(com); 768122205Sharti 769122205Sharti fdp = td->td_proc->p_fd; 770122205Sharti 771122205Sharti switch (com) { 772122205Sharti case FIONCLEX: 773122205Sharti case FIOCLEX: 774122205Sharti FILEDESC_XLOCK(fdp); 775122205Sharti locked = LA_XLOCKED; 776122205Sharti break; 777122205Sharti default: 778122205Sharti#ifdef CAPABILITIES 779122205Sharti FILEDESC_SLOCK(fdp); 780122205Sharti locked = LA_SLOCKED; 781122205Sharti#else 782122205Sharti locked = LA_UNLOCKED; 783122205Sharti#endif 784122205Sharti break; 785122205Sharti } 786122205Sharti 787122205Sharti#ifdef CAPABILITIES 788122205Sharti if ((fp = fget_locked(fdp, fd)) == NULL) { 789122205Sharti error = EBADF; 790122205Sharti goto out; 791122205Sharti } 792122205Sharti if ((error = cap_ioctl_check(fdp, fd, com)) != 0) { 793122205Sharti fp = NULL; /* fhold() was not called yet */ 794122205Sharti goto out; 795122205Sharti } 796122205Sharti fhold(fp); 797122205Sharti if (locked == LA_SLOCKED) { 798122205Sharti FILEDESC_SUNLOCK(fdp); 799122205Sharti locked = LA_UNLOCKED; 800122205Sharti } 801122205Sharti#else 802122205Sharti error = fget(td, fd, cap_rights_init(&rights, CAP_IOCTL), &fp); 803122205Sharti if (error != 0) { 804122205Sharti fp = NULL; 805122205Sharti goto out; 806122205Sharti } 807122205Sharti#endif 808122205Sharti if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 809122205Sharti error = EBADF; 810122205Sharti goto out; 811122205Sharti } 812122205Sharti 813122205Sharti switch (com) { 814122205Sharti case FIONCLEX: 815122205Sharti fdp->fd_ofiles[fd].fde_flags &= ~UF_EXCLOSE; 816122205Sharti goto out; 817122205Sharti case FIOCLEX: 818122205Sharti fdp->fd_ofiles[fd].fde_flags |= UF_EXCLOSE; 819122205Sharti goto out; 820122205Sharti case FIONBIO: 821122205Sharti if ((tmp = *(int *)data)) 822122205Sharti atomic_set_int(&fp->f_flag, FNONBLOCK); 823122205Sharti else 824122205Sharti atomic_clear_int(&fp->f_flag, FNONBLOCK); 825122205Sharti data = (void *)&tmp; 826122205Sharti break; 827122205Sharti case FIOASYNC: 828122205Sharti if ((tmp = *(int *)data)) 829122205Sharti atomic_set_int(&fp->f_flag, FASYNC); 830122205Sharti else 831122205Sharti atomic_clear_int(&fp->f_flag, FASYNC); 832131826Sharti data = (void *)&tmp; 833122205Sharti break; 834122205Sharti } 835122205Sharti 836122205Sharti error = fo_ioctl(fp, com, data, td->td_ucred, td); 837122205Shartiout: 838122205Sharti switch (locked) { 839122205Sharti case LA_XLOCKED: 840122205Sharti FILEDESC_XUNLOCK(fdp); 841122205Sharti break; 842122205Sharti#ifdef CAPABILITIES 843122205Sharti case LA_SLOCKED: 844122205Sharti FILEDESC_SUNLOCK(fdp); 845122205Sharti break; 846122205Sharti#endif 847122205Sharti default: 848122205Sharti FILEDESC_UNLOCK_ASSERT(fdp); 849122205Sharti break; 850122205Sharti } 851122205Sharti if (fp != NULL) 852122205Sharti fdrop(fp, td); 853122205Sharti return (error); 854122205Sharti} 855122205Sharti 856122205Shartiint 857122205Shartipoll_no_poll(int events) 858122205Sharti{ 859122205Sharti /* 860122205Sharti * Return true for read/write. If the user asked for something 861122205Sharti * special, return POLLNVAL, so that clients have a way of 862122205Sharti * determining reliably whether or not the extended 863122205Sharti * functionality is present without hard-coding knowledge 864122205Sharti * of specific filesystem implementations. 865122205Sharti */ 866122205Sharti if (events & ~POLLSTANDARD) 867122205Sharti return (POLLNVAL); 868122205Sharti 869131826Sharti return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 870122205Sharti} 871122205Sharti 872122205Shartiint 873122205Shartisys_pselect(struct thread *td, struct pselect_args *uap) 874122205Sharti{ 875122205Sharti struct timespec ts; 876122205Sharti struct timeval tv, *tvp; 877122205Sharti sigset_t set, *uset; 878122205Sharti int error; 879122205Sharti 880122205Sharti if (uap->ts != NULL) { 881122205Sharti error = copyin(uap->ts, &ts, sizeof(ts)); 882122205Sharti if (error != 0) 883122205Sharti return (error); 884122205Sharti TIMESPEC_TO_TIMEVAL(&tv, &ts); 885122205Sharti tvp = &tv; 886122205Sharti } else 887122205Sharti tvp = NULL; 888122205Sharti if (uap->sm != NULL) { 889122205Sharti error = copyin(uap->sm, &set, sizeof(set)); 890122205Sharti if (error != 0) 891122205Sharti return (error); 892122205Sharti uset = &set; 893122205Sharti } else 894122205Sharti uset = NULL; 895122205Sharti return (kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, 896122205Sharti uset, NFDBITS)); 897122205Sharti} 898122205Sharti 899122205Shartiint 900122205Shartikern_pselect(struct thread *td, int nd, fd_set *in, fd_set *ou, fd_set *ex, 901122205Sharti struct timeval *tvp, sigset_t *uset, int abi_nfdbits) 902122205Sharti{ 903122205Sharti int error; 904122205Sharti 905122205Sharti if (uset != NULL) { 906122205Sharti error = kern_sigprocmask(td, SIG_SETMASK, uset, 907122205Sharti &td->td_oldsigmask, 0); 908122205Sharti if (error != 0) 909122205Sharti return (error); 910122205Sharti td->td_pflags |= TDP_OLDMASK; 911122205Sharti /* 912122205Sharti * Make sure that ast() is called on return to 913122205Sharti * usermode and TDP_OLDMASK is cleared, restoring old 914122205Sharti * sigmask. 915122205Sharti */ 916122205Sharti thread_lock(td); 917122205Sharti td->td_flags |= TDF_ASTPENDING; 918131826Sharti thread_unlock(td); 919122205Sharti } 920122205Sharti error = kern_select(td, nd, in, ou, ex, tvp, abi_nfdbits); 921122205Sharti return (error); 922122205Sharti} 923122205Sharti 924122205Sharti#ifndef _SYS_SYSPROTO_H_ 925122205Shartistruct select_args { 926122205Sharti int nd; 927122205Sharti fd_set *in, *ou, *ex; 928122205Sharti struct timeval *tv; 929122205Sharti}; 930122205Sharti#endif 931122205Shartiint 932122205Shartisys_select(struct thread *td, struct select_args *uap) 933122205Sharti{ 934122205Sharti struct timeval tv, *tvp; 935122205Sharti int error; 936122205Sharti 937122205Sharti if (uap->tv != NULL) { 938122205Sharti error = copyin(uap->tv, &tv, sizeof(tv)); 939122205Sharti if (error) 940122205Sharti return (error); 941122205Sharti tvp = &tv; 942122205Sharti } else 943122205Sharti tvp = NULL; 944122205Sharti 945122205Sharti return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, 946122205Sharti NFDBITS)); 947122205Sharti} 948122205Sharti 949122205Sharti/* 950131826Sharti * In the unlikely case when user specified n greater then the last 951122205Sharti * open file descriptor, check that no bits are set after the last 952122205Sharti * valid fd. We must return EBADF if any is set. 953122205Sharti * 954122205Sharti * There are applications that rely on the behaviour. 955122205Sharti * 956122205Sharti * nd is fd_lastfile + 1. 957122205Sharti */ 958122205Shartistatic int 959122205Shartiselect_check_badfd(fd_set *fd_in, int nd, int ndu, int abi_nfdbits) 960122205Sharti{ 961122205Sharti char *addr, *oaddr; 962122205Sharti int b, i, res; 963122205Sharti uint8_t bits; 964122205Sharti 965122205Sharti if (nd >= ndu || fd_in == NULL) 966122205Sharti return (0); 967122205Sharti 968122205Sharti oaddr = NULL; 969122205Sharti bits = 0; /* silence gcc */ 970122205Sharti for (i = nd; i < ndu; i++) { 971122205Sharti b = i / NBBY; 972122205Sharti#if BYTE_ORDER == LITTLE_ENDIAN 973122205Sharti addr = (char *)fd_in + b; 974122205Sharti#else 975122205Sharti addr = (char *)fd_in; 976122205Sharti if (abi_nfdbits == NFDBITS) { 977122205Sharti addr += rounddown(b, sizeof(fd_mask)) + 978122205Sharti sizeof(fd_mask) - 1 - b % sizeof(fd_mask); 979122205Sharti } else { 980122205Sharti addr += rounddown(b, sizeof(uint32_t)) + 981122205Sharti sizeof(uint32_t) - 1 - b % sizeof(uint32_t); 982122205Sharti } 983131826Sharti#endif 984122205Sharti if (addr != oaddr) { 985122205Sharti res = fubyte(addr); 986122205Sharti if (res == -1) 987122205Sharti return (EFAULT); 988122205Sharti oaddr = addr; 989122205Sharti bits = res; 990122205Sharti } 991122205Sharti if ((bits & (1 << (i % NBBY))) != 0) 992122205Sharti return (EBADF); 993122205Sharti } 994122205Sharti return (0); 995122205Sharti} 996122205Sharti 997122205Shartiint 998122205Shartikern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, 999122205Sharti fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits) 1000122205Sharti{ 1001122205Sharti struct filedesc *fdp; 1002122205Sharti /* 1003122205Sharti * The magic 2048 here is chosen to be just enough for FD_SETSIZE 1004122205Sharti * infds with the new FD_SETSIZE of 1024, and more than enough for 1005122205Sharti * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 1006122205Sharti * of 256. 1007122205Sharti */ 1008122205Sharti fd_mask s_selbits[howmany(2048, NFDBITS)]; 1009122205Sharti fd_mask *ibits[3], *obits[3], *selbits, *sbp; 1010122205Sharti struct timeval rtv; 1011122205Sharti sbintime_t asbt, precision, rsbt; 1012122205Sharti u_int nbufbytes, ncpbytes, ncpubytes, nfdbits; 1013122205Sharti int error, lf, ndu; 1014122205Sharti 1015122205Sharti if (nd < 0) 1016122205Sharti return (EINVAL); 1017122205Sharti fdp = td->td_proc->p_fd; 1018122205Sharti ndu = nd; 1019122205Sharti lf = fdp->fd_lastfile; 1020122205Sharti if (nd > lf + 1) 1021122205Sharti nd = lf + 1; 1022122205Sharti 1023122205Sharti error = select_check_badfd(fd_in, nd, ndu, abi_nfdbits); 1024122205Sharti if (error != 0) 1025122205Sharti return (error); 1026122205Sharti error = select_check_badfd(fd_ou, nd, ndu, abi_nfdbits); 1027122205Sharti if (error != 0) 1028122205Sharti return (error); 1029122205Sharti error = select_check_badfd(fd_ex, nd, ndu, abi_nfdbits); 1030122205Sharti if (error != 0) 1031122205Sharti return (error); 1032122205Sharti 1033122205Sharti /* 1034122205Sharti * Allocate just enough bits for the non-null fd_sets. Use the 1035122205Sharti * preallocated auto buffer if possible. 1036122205Sharti */ 1037122205Sharti nfdbits = roundup(nd, NFDBITS); 1038122205Sharti ncpbytes = nfdbits / NBBY; 1039122205Sharti ncpubytes = roundup(nd, abi_nfdbits) / NBBY; 1040122205Sharti nbufbytes = 0; 1041122205Sharti if (fd_in != NULL) 1042122205Sharti nbufbytes += 2 * ncpbytes; 1043122205Sharti if (fd_ou != NULL) 1044122205Sharti nbufbytes += 2 * ncpbytes; 1045122205Sharti if (fd_ex != NULL) 1046122205Sharti nbufbytes += 2 * ncpbytes; 1047122205Sharti if (nbufbytes <= sizeof s_selbits) 1048122205Sharti selbits = &s_selbits[0]; 1049122205Sharti else 1050122205Sharti selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 1051122205Sharti 1052122205Sharti /* 1053122205Sharti * Assign pointers into the bit buffers and fetch the input bits. 1054122205Sharti * Put the output buffers together so that they can be bzeroed 1055122205Sharti * together. 1056122205Sharti */ 1057122205Sharti sbp = selbits; 1058122205Sharti#define getbits(name, x) \ 1059122205Sharti do { \ 1060122205Sharti if (name == NULL) { \ 1061122205Sharti ibits[x] = NULL; \ 1062122205Sharti obits[x] = NULL; \ 1063122205Sharti } else { \ 1064122205Sharti ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 1065122205Sharti obits[x] = sbp; \ 1066122205Sharti sbp += ncpbytes / sizeof *sbp; \ 1067122205Sharti error = copyin(name, ibits[x], ncpubytes); \ 1068213789Srpaulo if (error != 0) \ 1069122205Sharti goto done; \ 1070122205Sharti bzero((char *)ibits[x] + ncpubytes, \ 1071122205Sharti ncpbytes - ncpubytes); \ 1072122205Sharti } \ 1073122205Sharti } while (0) 1074122205Sharti getbits(fd_in, 0); 1075122205Sharti getbits(fd_ou, 1); 1076122205Sharti getbits(fd_ex, 2); 1077122205Sharti#undef getbits 1078122205Sharti 1079122205Sharti#if BYTE_ORDER == BIG_ENDIAN && defined(__LP64__) 1080122205Sharti /* 1081122205Sharti * XXX: swizzle_fdset assumes that if abi_nfdbits != NFDBITS, 1082122205Sharti * we are running under 32-bit emulation. This should be more 1083122205Sharti * generic. 1084122205Sharti */ 1085122205Sharti#define swizzle_fdset(bits) \ 1086122205Sharti if (abi_nfdbits != NFDBITS && bits != NULL) { \ 1087122205Sharti int i; \ 1088122205Sharti for (i = 0; i < ncpbytes / sizeof *sbp; i++) \ 1089122205Sharti bits[i] = (bits[i] >> 32) | (bits[i] << 32); \ 1090122205Sharti } 1091122205Sharti#else 1092122205Sharti#define swizzle_fdset(bits) 1093122205Sharti#endif 1094122205Sharti 1095122205Sharti /* Make sure the bit order makes it through an ABI transition */ 1096122205Sharti swizzle_fdset(ibits[0]); 1097122205Sharti swizzle_fdset(ibits[1]); 1098122205Sharti swizzle_fdset(ibits[2]); 1099122205Sharti 1100122205Sharti if (nbufbytes != 0) 1101122205Sharti bzero(selbits, nbufbytes / 2); 1102122205Sharti 1103122205Sharti precision = 0; 1104122205Sharti if (tvp != NULL) { 1105122205Sharti rtv = *tvp; 1106122205Sharti if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || 1107122205Sharti rtv.tv_usec >= 1000000) { 1108122205Sharti error = EINVAL; 1109122205Sharti goto done; 1110122205Sharti } 1111122205Sharti if (!timevalisset(&rtv)) 1112122205Sharti asbt = 0; 1113122205Sharti else if (rtv.tv_sec <= INT32_MAX) { 1114122205Sharti rsbt = tvtosbt(rtv); 1115122205Sharti precision = rsbt; 1116122205Sharti precision >>= tc_precexp; 1117122205Sharti if (TIMESEL(&asbt, rsbt)) 1118122205Sharti asbt += tc_tick_sbt; 1119122205Sharti if (asbt <= SBT_MAX - rsbt) 1120122205Sharti asbt += rsbt; 1121122205Sharti else 1122122205Sharti asbt = -1; 1123122205Sharti } else 1124122205Sharti asbt = -1; 1125122205Sharti } else 1126122205Sharti asbt = -1; 1127122205Sharti seltdinit(td); 1128122205Sharti /* Iterate until the timeout expires or descriptors become ready. */ 1129122205Sharti for (;;) { 1130122205Sharti error = selscan(td, ibits, obits, nd); 1131122205Sharti if (error || td->td_retval[0] != 0) 1132122205Sharti break; 1133122205Sharti error = seltdwait(td, asbt, precision); 1134122205Sharti if (error) 1135122205Sharti break; 1136122205Sharti error = selrescan(td, ibits, obits); 1137122205Sharti if (error || td->td_retval[0] != 0) 1138122205Sharti break; 1139122205Sharti } 1140122205Sharti seltdclear(td); 1141122205Sharti 1142122205Shartidone: 1143122205Sharti /* select is not restarted after signals... */ 1144122205Sharti if (error == ERESTART) 1145122205Sharti error = EINTR; 1146122205Sharti if (error == EWOULDBLOCK) 1147122205Sharti error = 0; 1148122205Sharti 1149122205Sharti /* swizzle bit order back, if necessary */ 1150122205Sharti swizzle_fdset(obits[0]); 1151122205Sharti swizzle_fdset(obits[1]); 1152122205Sharti swizzle_fdset(obits[2]); 1153122205Sharti#undef swizzle_fdset 1154122205Sharti 1155122205Sharti#define putbits(name, x) \ 1156122205Sharti if (name && (error2 = copyout(obits[x], name, ncpubytes))) \ 1157122205Sharti error = error2; 1158122205Sharti if (error == 0) { 1159122205Sharti int error2; 1160122205Sharti 1161122205Sharti putbits(fd_in, 0); 1162122205Sharti putbits(fd_ou, 1); 1163122205Sharti putbits(fd_ex, 2); 1164122205Sharti#undef putbits 1165122205Sharti } 1166122205Sharti if (selbits != &s_selbits[0]) 1167122205Sharti free(selbits, M_SELECT); 1168122205Sharti 1169122205Sharti return (error); 1170122205Sharti} 1171122205Sharti/* 1172122205Sharti * Convert a select bit set to poll flags. 1173122205Sharti * 1174122205Sharti * The backend always returns POLLHUP/POLLERR if appropriate and we 1175122205Sharti * return this as a set bit in any set. 1176122205Sharti */ 1177122205Shartistatic int select_flags[3] = { 1178122205Sharti POLLRDNORM | POLLHUP | POLLERR, 1179122205Sharti POLLWRNORM | POLLHUP | POLLERR, 1180122205Sharti POLLRDBAND | POLLERR 1181122205Sharti}; 1182122205Sharti 1183122205Sharti/* 1184122205Sharti * Compute the fo_poll flags required for a fd given by the index and 1185122205Sharti * bit position in the fd_mask array. 1186122205Sharti */ 1187122205Shartistatic __inline int 1188122205Shartiselflags(fd_mask **ibits, int idx, fd_mask bit) 1189122205Sharti{ 1190122205Sharti int flags; 1191122205Sharti int msk; 1192122205Sharti 1193122205Sharti flags = 0; 1194122205Sharti for (msk = 0; msk < 3; msk++) { 1195122205Sharti if (ibits[msk] == NULL) 1196122205Sharti continue; 1197122205Sharti if ((ibits[msk][idx] & bit) == 0) 1198122205Sharti continue; 1199122205Sharti flags |= select_flags[msk]; 1200122205Sharti } 1201122205Sharti return (flags); 1202122205Sharti} 1203122205Sharti 1204122205Sharti/* 1205122205Sharti * Set the appropriate output bits given a mask of fired events and the 1206122205Sharti * input bits originally requested. 1207122205Sharti */ 1208122205Shartistatic __inline int 1209122205Shartiselsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events) 1210122205Sharti{ 1211122205Sharti int msk; 1212122205Sharti int n; 1213122205Sharti 1214122205Sharti n = 0; 1215122205Sharti for (msk = 0; msk < 3; msk++) { 1216122205Sharti if ((events & select_flags[msk]) == 0) 1217122205Sharti continue; 1218122205Sharti if (ibits[msk] == NULL) 1219122205Sharti continue; 1220122205Sharti if ((ibits[msk][idx] & bit) == 0) 1221122205Sharti continue; 1222122205Sharti /* 1223122205Sharti * XXX Check for a duplicate set. This can occur because a 1224122205Sharti * socket calls selrecord() twice for each poll() call 1225122205Sharti * resulting in two selfds per real fd. selrescan() will 1226122205Sharti * call selsetbits twice as a result. 1227122205Sharti */ 1228122205Sharti if ((obits[msk][idx] & bit) != 0) 1229122205Sharti continue; 1230122205Sharti obits[msk][idx] |= bit; 1231122205Sharti n++; 1232122205Sharti } 1233122205Sharti 1234122205Sharti return (n); 1235122205Sharti} 1236122205Sharti 1237122205Shartistatic __inline int 1238122205Shartigetselfd_cap(struct filedesc *fdp, int fd, struct file **fpp) 1239122205Sharti{ 1240122205Sharti cap_rights_t rights; 1241122205Sharti 1242122205Sharti cap_rights_init(&rights, CAP_EVENT); 1243122205Sharti 1244122205Sharti return (fget_unlocked(fdp, fd, &rights, fpp, NULL)); 1245122205Sharti} 1246122205Sharti 1247122205Sharti/* 1248122205Sharti * Traverse the list of fds attached to this thread's seltd and check for 1249122205Sharti * completion. 1250122205Sharti */ 1251122205Shartistatic int 1252122205Shartiselrescan(struct thread *td, fd_mask **ibits, fd_mask **obits) 1253122205Sharti{ 1254122205Sharti struct filedesc *fdp; 1255122205Sharti struct selinfo *si; 1256122205Sharti struct seltd *stp; 1257122205Sharti struct selfd *sfp; 1258122205Sharti struct selfd *sfn; 1259122205Sharti struct file *fp; 1260122205Sharti fd_mask bit; 1261122205Sharti int fd, ev, n, idx; 1262122205Sharti int error; 1263122205Sharti 1264122205Sharti fdp = td->td_proc->p_fd; 1265122205Sharti stp = td->td_sel; 1266122205Sharti n = 0; 1267122205Sharti STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { 1268122205Sharti fd = (int)(uintptr_t)sfp->sf_cookie; 1269122205Sharti si = sfp->sf_si; 1270122205Sharti selfdfree(stp, sfp); 1271122205Sharti /* If the selinfo wasn't cleared the event didn't fire. */ 1272122205Sharti if (si != NULL) 1273122205Sharti continue; 1274122205Sharti error = getselfd_cap(fdp, fd, &fp); 1275122205Sharti if (error) 1276122205Sharti return (error); 1277122205Sharti idx = fd / NFDBITS; 1278122205Sharti bit = (fd_mask)1 << (fd % NFDBITS); 1279122205Sharti ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td); 1280122205Sharti fdrop(fp, td); 1281122205Sharti if (ev != 0) 1282122205Sharti n += selsetbits(ibits, obits, idx, bit, ev); 1283122205Sharti } 1284122205Sharti stp->st_flags = 0; 1285122205Sharti td->td_retval[0] = n; 1286122205Sharti return (0); 1287122205Sharti} 1288122205Sharti 1289122205Sharti/* 1290122205Sharti * Perform the initial filedescriptor scan and register ourselves with 1291122205Sharti * each selinfo. 1292122205Sharti */ 1293131826Shartistatic int 1294122205Shartiselscan(td, ibits, obits, nfd) 1295122205Sharti struct thread *td; 1296122205Sharti fd_mask **ibits, **obits; 1297122205Sharti int nfd; 1298122205Sharti{ 1299122205Sharti struct filedesc *fdp; 1300122205Sharti struct file *fp; 1301122205Sharti fd_mask bit; 1302122205Sharti int ev, flags, end, fd; 1303122205Sharti int n, idx; 1304122205Sharti int error; 1305122205Sharti 1306122205Sharti fdp = td->td_proc->p_fd; 1307122205Sharti n = 0; 1308122205Sharti for (idx = 0, fd = 0; fd < nfd; idx++) { 1309122205Sharti end = imin(fd + NFDBITS, nfd); 1310122205Sharti for (bit = 1; fd < end; bit <<= 1, fd++) { 1311122205Sharti /* Compute the list of events we're interested in. */ 1312122205Sharti flags = selflags(ibits, idx, bit); 1313122205Sharti if (flags == 0) 1314122205Sharti continue; 1315122205Sharti error = getselfd_cap(fdp, fd, &fp); 1316122205Sharti if (error) 1317122205Sharti return (error); 1318122205Sharti selfdalloc(td, (void *)(uintptr_t)fd); 1319122205Sharti ev = fo_poll(fp, flags, td->td_ucred, td); 1320122205Sharti fdrop(fp, td); 1321122205Sharti if (ev != 0) 1322122205Sharti n += selsetbits(ibits, obits, idx, bit, ev); 1323122205Sharti } 1324122205Sharti } 1325122205Sharti 1326122205Sharti td->td_retval[0] = n; 1327122205Sharti return (0); 1328122205Sharti} 1329122205Sharti 1330122205Shartiint 1331122205Shartisys_poll(struct thread *td, struct poll_args *uap) 1332122205Sharti{ 1333122205Sharti struct timespec ts, *tsp; 1334122205Sharti 1335122205Sharti if (uap->timeout != INFTIM) { 1336122205Sharti if (uap->timeout < 0) 1337122205Sharti return (EINVAL); 1338122205Sharti ts.tv_sec = uap->timeout / 1000; 1339122205Sharti ts.tv_nsec = (uap->timeout % 1000) * 1000000; 1340122205Sharti tsp = &ts; 1341122205Sharti } else 1342122205Sharti tsp = NULL; 1343122205Sharti 1344122205Sharti return (kern_poll(td, uap->fds, uap->nfds, tsp, NULL)); 1345122205Sharti} 1346122205Sharti 1347122205Shartiint 1348122205Shartikern_poll(struct thread *td, struct pollfd *fds, u_int nfds, 1349122205Sharti struct timespec *tsp, sigset_t *uset) 1350122205Sharti{ 1351122205Sharti struct pollfd *bits; 1352122205Sharti struct pollfd smallbits[32]; 1353122205Sharti sbintime_t sbt, precision, tmp; 1354122205Sharti time_t over; 1355122205Sharti struct timespec ts; 1356122205Sharti int error; 1357122205Sharti size_t ni; 1358122205Sharti 1359122205Sharti precision = 0; 1360122205Sharti if (tsp != NULL) { 1361122205Sharti if (tsp->tv_sec < 0) 1362122205Sharti return (EINVAL); 1363122205Sharti if (tsp->tv_nsec < 0 || tsp->tv_nsec >= 1000000000) 1364122205Sharti return (EINVAL); 1365122205Sharti if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) 1366122205Sharti sbt = 0; 1367122205Sharti else { 1368122205Sharti ts = *tsp; 1369122205Sharti if (ts.tv_sec > INT32_MAX / 2) { 1370122205Sharti over = ts.tv_sec - INT32_MAX / 2; 1371122205Sharti ts.tv_sec -= over; 1372122205Sharti } else 1373122205Sharti over = 0; 1374122205Sharti tmp = tstosbt(ts); 1375122205Sharti precision = tmp; 1376122205Sharti precision >>= tc_precexp; 1377122205Sharti if (TIMESEL(&sbt, tmp)) 1378122205Sharti sbt += tc_tick_sbt; 1379122205Sharti sbt += tmp; 1380122205Sharti } 1381122205Sharti } else 1382122205Sharti sbt = -1; 1383122205Sharti 1384122205Sharti if (nfds > maxfilesperproc && nfds > FD_SETSIZE) 1385122205Sharti return (EINVAL); 1386122205Sharti ni = nfds * sizeof(struct pollfd); 1387122205Sharti if (ni > sizeof(smallbits)) 1388122205Sharti bits = malloc(ni, M_TEMP, M_WAITOK); 1389122205Sharti else 1390122205Sharti bits = smallbits; 1391122205Sharti error = copyin(fds, bits, ni); 1392122205Sharti if (error) 1393122205Sharti goto done; 1394122205Sharti 1395122205Sharti if (uset != NULL) { 1396122205Sharti error = kern_sigprocmask(td, SIG_SETMASK, uset, 1397122205Sharti &td->td_oldsigmask, 0); 1398122205Sharti if (error) 1399122205Sharti goto done; 1400122205Sharti td->td_pflags |= TDP_OLDMASK; 1401122205Sharti /* 1402122205Sharti * Make sure that ast() is called on return to 1403122205Sharti * usermode and TDP_OLDMASK is cleared, restoring old 1404122205Sharti * sigmask. 1405122205Sharti */ 1406122205Sharti thread_lock(td); 1407122205Sharti td->td_flags |= TDF_ASTPENDING; 1408122205Sharti thread_unlock(td); 1409122205Sharti } 1410122205Sharti 1411122205Sharti seltdinit(td); 1412122205Sharti /* Iterate until the timeout expires or descriptors become ready. */ 1413122205Sharti for (;;) { 1414122205Sharti error = pollscan(td, bits, nfds); 1415122205Sharti if (error || td->td_retval[0] != 0) 1416122205Sharti break; 1417122205Sharti error = seltdwait(td, sbt, precision); 1418122205Sharti if (error) 1419122205Sharti break; 1420122205Sharti error = pollrescan(td); 1421122205Sharti if (error || td->td_retval[0] != 0) 1422122205Sharti break; 1423122205Sharti } 1424122205Sharti seltdclear(td); 1425122205Sharti 1426122205Shartidone: 1427122205Sharti /* poll is not restarted after signals... */ 1428122205Sharti if (error == ERESTART) 1429122205Sharti error = EINTR; 1430122205Sharti if (error == EWOULDBLOCK) 1431122205Sharti error = 0; 1432122205Sharti if (error == 0) { 1433122205Sharti error = pollout(td, bits, fds, nfds); 1434122205Sharti if (error) 1435122205Sharti goto out; 1436122205Sharti } 1437122205Shartiout: 1438122205Sharti if (ni > sizeof(smallbits)) 1439122205Sharti free(bits, M_TEMP); 1440122205Sharti return (error); 1441122205Sharti} 1442122205Sharti 1443122205Shartiint 1444122205Shartisys_ppoll(struct thread *td, struct ppoll_args *uap) 1445122205Sharti{ 1446122205Sharti struct timespec ts, *tsp; 1447122205Sharti sigset_t set, *ssp; 1448122205Sharti int error; 1449122205Sharti 1450122205Sharti if (uap->ts != NULL) { 1451122205Sharti error = copyin(uap->ts, &ts, sizeof(ts)); 1452122205Sharti if (error) 1453122205Sharti return (error); 1454122205Sharti tsp = &ts; 1455122205Sharti } else 1456122205Sharti tsp = NULL; 1457122205Sharti if (uap->set != NULL) { 1458122205Sharti error = copyin(uap->set, &set, sizeof(set)); 1459122205Sharti if (error) 1460122205Sharti return (error); 1461122205Sharti ssp = &set; 1462122205Sharti } else 1463122205Sharti ssp = NULL; 1464122205Sharti /* 1465122205Sharti * fds is still a pointer to user space. kern_poll() will 1466122205Sharti * take care of copyin that array to the kernel space. 1467122205Sharti */ 1468122205Sharti 1469122205Sharti return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp)); 1470122205Sharti} 1471122205Sharti 1472122205Shartistatic int 1473122205Shartipollrescan(struct thread *td) 1474122205Sharti{ 1475122205Sharti struct seltd *stp; 1476122205Sharti struct selfd *sfp; 1477122205Sharti struct selfd *sfn; 1478122205Sharti struct selinfo *si; 1479122205Sharti struct filedesc *fdp; 1480122205Sharti struct file *fp; 1481122205Sharti struct pollfd *fd; 1482122205Sharti#ifdef CAPABILITIES 1483122205Sharti cap_rights_t rights; 1484122205Sharti#endif 1485122205Sharti int n; 1486122205Sharti 1487122205Sharti n = 0; 1488122205Sharti fdp = td->td_proc->p_fd; 1489122205Sharti stp = td->td_sel; 1490122205Sharti FILEDESC_SLOCK(fdp); 1491122205Sharti STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { 1492122205Sharti fd = (struct pollfd *)sfp->sf_cookie; 1493122205Sharti si = sfp->sf_si; 1494122205Sharti selfdfree(stp, sfp); 1495122205Sharti /* If the selinfo wasn't cleared the event didn't fire. */ 1496122205Sharti if (si != NULL) 1497122205Sharti continue; 1498122205Sharti fp = fdp->fd_ofiles[fd->fd].fde_file; 1499122205Sharti#ifdef CAPABILITIES 1500122205Sharti if (fp == NULL || 1501122205Sharti cap_check(cap_rights(fdp, fd->fd), 1502122205Sharti cap_rights_init(&rights, CAP_EVENT)) != 0) 1503122205Sharti#else 1504122205Sharti if (fp == NULL) 1505122205Sharti#endif 1506122205Sharti { 1507122205Sharti fd->revents = POLLNVAL; 1508122205Sharti n++; 1509122205Sharti continue; 1510122205Sharti } 1511122205Sharti 1512122205Sharti /* 1513122205Sharti * Note: backend also returns POLLHUP and 1514122205Sharti * POLLERR if appropriate. 1515122205Sharti */ 1516122205Sharti fd->revents = fo_poll(fp, fd->events, td->td_ucred, td); 1517122205Sharti if (fd->revents != 0) 1518122205Sharti n++; 1519122205Sharti } 1520122205Sharti FILEDESC_SUNLOCK(fdp); 1521122205Sharti stp->st_flags = 0; 1522122205Sharti td->td_retval[0] = n; 1523122205Sharti return (0); 1524122205Sharti} 1525122205Sharti 1526122205Sharti 1527122205Shartistatic int 1528122205Shartipollout(td, fds, ufds, nfd) 1529122205Sharti struct thread *td; 1530122205Sharti struct pollfd *fds; 1531122205Sharti struct pollfd *ufds; 1532122205Sharti u_int nfd; 1533122205Sharti{ 1534122205Sharti int error = 0; 1535122205Sharti u_int i = 0; 1536122205Sharti u_int n = 0; 1537122205Sharti 1538122205Sharti for (i = 0; i < nfd; i++) { 1539122205Sharti error = copyout(&fds->revents, &ufds->revents, 1540122205Sharti sizeof(ufds->revents)); 1541122205Sharti if (error) 1542122205Sharti return (error); 1543122205Sharti if (fds->revents != 0) 1544122205Sharti n++; 1545122205Sharti fds++; 1546122205Sharti ufds++; 1547122205Sharti } 1548131826Sharti td->td_retval[0] = n; 1549122205Sharti return (0); 1550122205Sharti} 1551122205Sharti 1552122205Shartistatic int 1553122205Shartipollscan(td, fds, nfd) 1554122205Sharti struct thread *td; 1555122205Sharti struct pollfd *fds; 1556122205Sharti u_int nfd; 1557122205Sharti{ 1558122205Sharti struct filedesc *fdp = td->td_proc->p_fd; 1559122205Sharti struct file *fp; 1560122205Sharti#ifdef CAPABILITIES 1561122205Sharti cap_rights_t rights; 1562122205Sharti#endif 1563122205Sharti int i, n = 0; 1564122205Sharti 1565122205Sharti FILEDESC_SLOCK(fdp); 1566122205Sharti for (i = 0; i < nfd; i++, fds++) { 1567122205Sharti if (fds->fd > fdp->fd_lastfile) { 1568122205Sharti fds->revents = POLLNVAL; 1569122205Sharti n++; 1570122205Sharti } else if (fds->fd < 0) { 1571122205Sharti fds->revents = 0; 1572122205Sharti } else { 1573122205Sharti fp = fdp->fd_ofiles[fds->fd].fde_file; 1574122205Sharti#ifdef CAPABILITIES 1575122205Sharti if (fp == NULL || 1576122205Sharti cap_check(cap_rights(fdp, fds->fd), 1577122205Sharti cap_rights_init(&rights, CAP_EVENT)) != 0) 1578122205Sharti#else 1579131826Sharti if (fp == NULL) 1580122205Sharti#endif 1581122205Sharti { 1582122205Sharti fds->revents = POLLNVAL; 1583122205Sharti n++; 1584122205Sharti } else { 1585122205Sharti /* 1586122205Sharti * Note: backend also returns POLLHUP and 1587122205Sharti * POLLERR if appropriate. 1588122205Sharti */ 1589122205Sharti selfdalloc(td, fds); 1590122205Sharti fds->revents = fo_poll(fp, fds->events, 1591122205Sharti td->td_ucred, td); 1592122205Sharti /* 1593122205Sharti * POSIX requires POLLOUT to be never 1594122205Sharti * set simultaneously with POLLHUP. 1595122205Sharti */ 1596122205Sharti if ((fds->revents & POLLHUP) != 0) 1597122205Sharti fds->revents &= ~POLLOUT; 1598122205Sharti 1599122205Sharti if (fds->revents != 0) 1600122205Sharti n++; 1601122205Sharti } 1602122205Sharti } 1603122205Sharti } 1604122205Sharti FILEDESC_SUNLOCK(fdp); 1605122205Sharti td->td_retval[0] = n; 1606122205Sharti return (0); 1607122205Sharti} 1608122205Sharti 1609122205Sharti/* 1610122205Sharti * OpenBSD poll system call. 1611122205Sharti * 1612122205Sharti * XXX this isn't quite a true representation.. OpenBSD uses select ops. 1613122205Sharti */ 1614122205Sharti#ifndef _SYS_SYSPROTO_H_ 1615122205Shartistruct openbsd_poll_args { 1616122205Sharti struct pollfd *fds; 1617122205Sharti u_int nfds; 1618122205Sharti int timeout; 1619122205Sharti}; 1620122205Sharti#endif 1621122205Shartiint 1622122205Shartisys_openbsd_poll(td, uap) 1623122205Sharti register struct thread *td; 1624122205Sharti register struct openbsd_poll_args *uap; 1625122205Sharti{ 1626122205Sharti return (sys_poll(td, (struct poll_args *)uap)); 1627122205Sharti} 1628122205Sharti 1629122205Sharti/* 1630122205Sharti * XXX This was created specifically to support netncp and netsmb. This 1631122205Sharti * allows the caller to specify a socket to wait for events on. It returns 1632122205Sharti * 0 if any events matched and an error otherwise. There is no way to 1633122205Sharti * determine which events fired. 1634122205Sharti */ 1635122205Shartiint 1636122205Shartiselsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td) 1637122205Sharti{ 1638122205Sharti struct timeval rtv; 1639122205Sharti sbintime_t asbt, precision, rsbt; 1640122205Sharti int error; 1641122205Sharti 1642122205Sharti precision = 0; /* stupid gcc! */ 1643122205Sharti if (tvp != NULL) { 1644122205Sharti rtv = *tvp; 1645122205Sharti if (rtv.tv_sec < 0 || rtv.tv_usec < 0 || 1646122205Sharti rtv.tv_usec >= 1000000) 1647122205Sharti return (EINVAL); 1648122205Sharti if (!timevalisset(&rtv)) 1649122205Sharti asbt = 0; 1650122205Sharti else if (rtv.tv_sec <= INT32_MAX) { 1651122205Sharti rsbt = tvtosbt(rtv); 1652122205Sharti precision = rsbt; 1653122205Sharti precision >>= tc_precexp; 1654122205Sharti if (TIMESEL(&asbt, rsbt)) 1655122205Sharti asbt += tc_tick_sbt; 1656122205Sharti if (asbt <= SBT_MAX - rsbt) 1657122205Sharti asbt += rsbt; 1658122205Sharti else 1659122205Sharti asbt = -1; 1660122205Sharti } else 1661122205Sharti asbt = -1; 1662122205Sharti } else 1663122205Sharti asbt = -1; 1664122205Sharti seltdinit(td); 1665122205Sharti /* 1666122205Sharti * Iterate until the timeout expires or the socket becomes ready. 1667122205Sharti */ 1668122205Sharti for (;;) { 1669122205Sharti selfdalloc(td, NULL); 1670122205Sharti error = sopoll(so, events, NULL, td); 1671122205Sharti /* error here is actually the ready events. */ 1672122205Sharti if (error) 1673122205Sharti return (0); 1674122205Sharti error = seltdwait(td, asbt, precision); 1675122205Sharti if (error) 1676122205Sharti break; 1677122205Sharti } 1678122205Sharti seltdclear(td); 1679122205Sharti /* XXX Duplicates ncp/smb behavior. */ 1680122205Sharti if (error == ERESTART) 1681122205Sharti error = 0; 1682122205Sharti return (error); 1683122205Sharti} 1684122205Sharti 1685122205Sharti/* 1686122205Sharti * Preallocate two selfds associated with 'cookie'. Some fo_poll routines 1687122205Sharti * have two select sets, one for read and another for write. 1688122205Sharti */ 1689122205Shartistatic void 1690122205Shartiselfdalloc(struct thread *td, void *cookie) 1691122205Sharti{ 1692122205Sharti struct seltd *stp; 1693122205Sharti 1694122205Sharti stp = td->td_sel; 1695122205Sharti if (stp->st_free1 == NULL) 1696122205Sharti stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO); 1697122205Sharti stp->st_free1->sf_td = stp; 1698122205Sharti stp->st_free1->sf_cookie = cookie; 1699122205Sharti if (stp->st_free2 == NULL) 1700122205Sharti stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO); 1701122205Sharti stp->st_free2->sf_td = stp; 1702122205Sharti stp->st_free2->sf_cookie = cookie; 1703122205Sharti} 1704122205Sharti 1705122205Shartistatic void 1706122205Shartiselfdfree(struct seltd *stp, struct selfd *sfp) 1707122205Sharti{ 1708122205Sharti STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link); 1709122205Sharti if (sfp->sf_si != NULL) { 1710122205Sharti mtx_lock(sfp->sf_mtx); 1711122205Sharti if (sfp->sf_si != NULL) { 1712122205Sharti TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads); 1713122205Sharti refcount_release(&sfp->sf_refs); 1714122205Sharti } 1715122205Sharti mtx_unlock(sfp->sf_mtx); 1716122205Sharti } 1717122205Sharti if (refcount_release(&sfp->sf_refs)) 1718122205Sharti uma_zfree(selfd_zone, sfp); 1719122205Sharti} 1720122205Sharti 1721122205Sharti/* Drain the waiters tied to all the selfd belonging the specified selinfo. */ 1722122205Shartivoid 1723122205Shartiseldrain(sip) 1724122205Sharti struct selinfo *sip; 1725122205Sharti{ 1726122205Sharti 1727122205Sharti /* 1728122205Sharti * This feature is already provided by doselwakeup(), thus it is 1729122205Sharti * enough to go for it. 1730122205Sharti * Eventually, the context, should take care to avoid races 1731122205Sharti * between thread calling select()/poll() and file descriptor 1732122205Sharti * detaching, but, again, the races are just the same as 1733122205Sharti * selwakeup(). 1734122205Sharti */ 1735122205Sharti doselwakeup(sip, -1); 1736122205Sharti} 1737122205Sharti 1738122205Sharti/* 1739122205Sharti * Record a select request. 1740122205Sharti */ 1741122205Shartivoid 1742122205Shartiselrecord(selector, sip) 1743122205Sharti struct thread *selector; 1744122205Sharti struct selinfo *sip; 1745122205Sharti{ 1746122205Sharti struct selfd *sfp; 1747122205Sharti struct seltd *stp; 1748122205Sharti struct mtx *mtxp; 1749122205Sharti 1750122205Sharti stp = selector->td_sel; 1751122205Sharti /* 1752122205Sharti * Don't record when doing a rescan. 1753122205Sharti */ 1754122205Sharti if (stp->st_flags & SELTD_RESCAN) 1755122205Sharti return; 1756122205Sharti /* 1757122205Sharti * Grab one of the preallocated descriptors. 1758122205Sharti */ 1759122205Sharti sfp = NULL; 1760122205Sharti if ((sfp = stp->st_free1) != NULL) 1761122205Sharti stp->st_free1 = NULL; 1762122205Sharti else if ((sfp = stp->st_free2) != NULL) 1763122205Sharti stp->st_free2 = NULL; 1764122205Sharti else 1765122205Sharti panic("selrecord: No free selfd on selq"); 1766122205Sharti mtxp = sip->si_mtx; 1767122205Sharti if (mtxp == NULL) 1768122205Sharti mtxp = mtx_pool_find(mtxpool_select, sip); 1769122205Sharti /* 1770122205Sharti * Initialize the sfp and queue it in the thread. 1771122205Sharti */ 1772122205Sharti sfp->sf_si = sip; 1773122205Sharti sfp->sf_mtx = mtxp; 1774122205Sharti refcount_init(&sfp->sf_refs, 2); 1775122205Sharti STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link); 1776122205Sharti /* 1777122205Sharti * Now that we've locked the sip, check for initialization. 1778122205Sharti */ 1779122205Sharti mtx_lock(mtxp); 1780122205Sharti if (sip->si_mtx == NULL) { 1781122205Sharti sip->si_mtx = mtxp; 1782122205Sharti TAILQ_INIT(&sip->si_tdlist); 1783122205Sharti } 1784122205Sharti /* 1785122205Sharti * Add this thread to the list of selfds listening on this selinfo. 1786122205Sharti */ 1787122205Sharti TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads); 1788122205Sharti mtx_unlock(sip->si_mtx); 1789122205Sharti} 1790122205Sharti 1791122205Sharti/* Wake up a selecting thread. */ 1792122205Shartivoid 1793122205Shartiselwakeup(sip) 1794122205Sharti struct selinfo *sip; 1795122205Sharti{ 1796122205Sharti doselwakeup(sip, -1); 1797122205Sharti} 1798122205Sharti 1799122205Sharti/* Wake up a selecting thread, and set its priority. */ 1800122205Shartivoid 1801122205Shartiselwakeuppri(sip, pri) 1802122205Sharti struct selinfo *sip; 1803122205Sharti int pri; 1804122205Sharti{ 1805122205Sharti doselwakeup(sip, pri); 1806122205Sharti} 1807122205Sharti 1808122205Sharti/* 1809122205Sharti * Do a wakeup when a selectable event occurs. 1810122205Sharti */ 1811122205Shartistatic void 1812122205Shartidoselwakeup(sip, pri) 1813122205Sharti struct selinfo *sip; 1814122205Sharti int pri; 1815122205Sharti{ 1816122205Sharti struct selfd *sfp; 1817122205Sharti struct selfd *sfn; 1818122205Sharti struct seltd *stp; 1819122205Sharti 1820122205Sharti /* If it's not initialized there can't be any waiters. */ 1821122205Sharti if (sip->si_mtx == NULL) 1822122205Sharti return; 1823122205Sharti /* 1824122205Sharti * Locking the selinfo locks all selfds associated with it. 1825122205Sharti */ 1826122205Sharti mtx_lock(sip->si_mtx); 1827122205Sharti TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) { 1828122205Sharti /* 1829122205Sharti * Once we remove this sfp from the list and clear the 1830122205Sharti * sf_si seltdclear will know to ignore this si. 1831122205Sharti */ 1832122205Sharti TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads); 1833122205Sharti sfp->sf_si = NULL; 1834122205Sharti stp = sfp->sf_td; 1835122205Sharti mtx_lock(&stp->st_mtx); 1836122205Sharti stp->st_flags |= SELTD_PENDING; 1837122205Sharti cv_broadcastpri(&stp->st_wait, pri); 1838122205Sharti mtx_unlock(&stp->st_mtx); 1839122205Sharti if (refcount_release(&sfp->sf_refs)) 1840122205Sharti uma_zfree(selfd_zone, sfp); 1841122205Sharti } 1842122205Sharti mtx_unlock(sip->si_mtx); 1843122205Sharti} 1844122205Sharti 1845122205Shartistatic void 1846122205Shartiseltdinit(struct thread *td) 1847122205Sharti{ 1848122205Sharti struct seltd *stp; 1849122205Sharti 1850122205Sharti if ((stp = td->td_sel) != NULL) 1851122205Sharti goto out; 1852122205Sharti td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO); 1853122205Sharti mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF); 1854122205Sharti cv_init(&stp->st_wait, "select"); 1855122205Shartiout: 1856122205Sharti stp->st_flags = 0; 1857122205Sharti STAILQ_INIT(&stp->st_selq); 1858122205Sharti} 1859122205Sharti 1860122205Shartistatic int 1861122205Shartiseltdwait(struct thread *td, sbintime_t sbt, sbintime_t precision) 1862122205Sharti{ 1863122205Sharti struct seltd *stp; 1864122205Sharti int error; 1865122205Sharti 1866122205Sharti stp = td->td_sel; 1867122205Sharti /* 1868122205Sharti * An event of interest may occur while we do not hold the seltd 1869122205Sharti * locked so check the pending flag before we sleep. 1870122205Sharti */ 1871122205Sharti mtx_lock(&stp->st_mtx); 1872122205Sharti /* 1873122205Sharti * Any further calls to selrecord will be a rescan. 1874122205Sharti */ 1875122205Sharti stp->st_flags |= SELTD_RESCAN; 1876122205Sharti if (stp->st_flags & SELTD_PENDING) { 1877122205Sharti mtx_unlock(&stp->st_mtx); 1878122205Sharti return (0); 1879122205Sharti } 1880122205Sharti if (sbt == 0) 1881122205Sharti error = EWOULDBLOCK; 1882122205Sharti else if (sbt != -1) 1883122205Sharti error = cv_timedwait_sig_sbt(&stp->st_wait, &stp->st_mtx, 1884122205Sharti sbt, precision, C_ABSOLUTE); 1885122205Sharti else 1886122205Sharti error = cv_wait_sig(&stp->st_wait, &stp->st_mtx); 1887122205Sharti mtx_unlock(&stp->st_mtx); 1888122205Sharti 1889122205Sharti return (error); 1890122205Sharti} 1891122205Sharti 1892122205Shartivoid 1893122205Shartiseltdfini(struct thread *td) 1894122205Sharti{ 1895122205Sharti struct seltd *stp; 1896122205Sharti 1897122205Sharti stp = td->td_sel; 1898122205Sharti if (stp == NULL) 1899122205Sharti return; 1900122205Sharti if (stp->st_free1) 1901122205Sharti uma_zfree(selfd_zone, stp->st_free1); 1902122205Sharti if (stp->st_free2) 1903122205Sharti uma_zfree(selfd_zone, stp->st_free2); 1904122205Sharti td->td_sel = NULL; 1905122205Sharti cv_destroy(&stp->st_wait); 1906122205Sharti mtx_destroy(&stp->st_mtx); 1907122205Sharti free(stp, M_SELECT); 1908122205Sharti} 1909122205Sharti 1910122205Sharti/* 1911122205Sharti * Remove the references to the thread from all of the objects we were 1912122205Sharti * polling. 1913122205Sharti */ 1914122205Shartistatic void 1915122205Shartiseltdclear(struct thread *td) 1916122205Sharti{ 1917122205Sharti struct seltd *stp; 1918122205Sharti struct selfd *sfp; 1919122205Sharti struct selfd *sfn; 1920122205Sharti 1921122205Sharti stp = td->td_sel; 1922122205Sharti STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) 1923122205Sharti selfdfree(stp, sfp); 1924122205Sharti stp->st_flags = 0; 1925122205Sharti} 1926122205Sharti 1927122205Shartistatic void selectinit(void *); 1928122205ShartiSYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL); 1929122205Shartistatic void 1930122205Shartiselectinit(void *dummy __unused) 1931122205Sharti{ 1932122205Sharti 1933122205Sharti selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL, 1934122205Sharti NULL, NULL, UMA_ALIGN_PTR, 0); 1935122205Sharti mtxpool_select = mtx_pool_create("select mtxpool", 128, MTX_DEF); 1936122205Sharti} 1937122205Sharti 1938122205Sharti/* 1939122205Sharti * Set up a syscall return value that follows the convention specified for 1940122205Sharti * posix_* functions. 1941122205Sharti */ 1942122205Shartiint 1943122205Shartikern_posix_error(struct thread *td, int error) 1944122205Sharti{ 1945122205Sharti 1946131826Sharti if (error <= 0) 1947122205Sharti return (error); 1948122205Sharti td->td_errno = error; 1949122205Sharti td->td_pflags |= TDP_NERRNO; 1950122205Sharti td->td_retval[0] = error; 1951122205Sharti return (0); 1952122205Sharti} 1953122205Sharti