sys_generic.c revision 114216
164562Sgshapiro/* 264562Sgshapiro * Copyright (c) 1982, 1986, 1989, 1993 364562Sgshapiro * The Regents of the University of California. All rights reserved. 464562Sgshapiro * (c) UNIX System Laboratories, Inc. 564562Sgshapiro * All or some portions of this file are derived from material licensed 6111823Sgshapiro * to the University of California by American Telephone and Telegraph 764562Sgshapiro * Co. or Unix System Laboratories, Inc. and are reproduced herein with 864562Sgshapiro * the permission of UNIX System Laboratories, Inc. 964562Sgshapiro * 1064562Sgshapiro * Redistribution and use in source and binary forms, with or without 1164562Sgshapiro * modification, are permitted provided that the following conditions 1264562Sgshapiro * are met: 1364562Sgshapiro * 1. Redistributions of source code must retain the above copyright 1464562Sgshapiro * notice, this list of conditions and the following disclaimer. 1564562Sgshapiro * 2. Redistributions in binary form must reproduce the above copyright 1664562Sgshapiro * notice, this list of conditions and the following disclaimer in the 17159609Sgshapiro * documentation and/or other materials provided with the distribution. 1864562Sgshapiro * 3. All advertising materials mentioning features or use of this software 1964562Sgshapiro * must display the following acknowledgement: 2064562Sgshapiro * This product includes software developed by the University of 2164562Sgshapiro * California, Berkeley and its contributors. 2264562Sgshapiro * 4. Neither the name of the University nor the names of its contributors 2364562Sgshapiro * may be used to endorse or promote products derived from this software 2464562Sgshapiro * without specific prior written permission. 2564562Sgshapiro * 26159609Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27159609Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28159609Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29159609Sgshapiro * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30159609Sgshapiro * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31159609Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32159609Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33159609Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34159609Sgshapiro * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35159609Sgshapiro * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36159609Sgshapiro * SUCH DAMAGE. 37159609Sgshapiro * 38159609Sgshapiro * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39159609Sgshapiro * $FreeBSD: head/sys/kern/sys_generic.c 114216 2003-04-29 13:36:06Z kan $ 40159609Sgshapiro */ 41159609Sgshapiro 42159609Sgshapiro#include "opt_ktrace.h" 43159609Sgshapiro 44159609Sgshapiro#include <sys/param.h> 45159609Sgshapiro#include <sys/systm.h> 46159609Sgshapiro#include <sys/sysproto.h> 4780785Sgshapiro#include <sys/filedesc.h> 48159609Sgshapiro#include <sys/filio.h> 49159609Sgshapiro#include <sys/fcntl.h> 50159609Sgshapiro#include <sys/file.h> 51159609Sgshapiro#include <sys/proc.h> 52159609Sgshapiro#include <sys/signalvar.h> 53159609Sgshapiro#include <sys/socketvar.h> 54159609Sgshapiro#include <sys/uio.h> 55159609Sgshapiro#include <sys/kernel.h> 56159609Sgshapiro#include <sys/limits.h> 57159609Sgshapiro#include <sys/malloc.h> 58159609Sgshapiro#include <sys/poll.h> 59159609Sgshapiro#include <sys/resourcevar.h> 60159609Sgshapiro#include <sys/selinfo.h> 61159609Sgshapiro#include <sys/syscallsubr.h> 62159609Sgshapiro#include <sys/sysctl.h> 63159609Sgshapiro#include <sys/sysent.h> 64159609Sgshapiro#include <sys/bio.h> 65159609Sgshapiro#include <sys/buf.h> 6680785Sgshapiro#include <sys/condvar.h> 67159609Sgshapiro#ifdef KTRACE 68159609Sgshapiro#include <sys/ktrace.h> 69159609Sgshapiro#endif 70159609Sgshapiro#include <vm/vm.h> 71159609Sgshapiro#include <vm/vm_page.h> 72159609Sgshapiro 73159609Sgshapirostatic MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 74159609Sgshapirostatic MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 75159609SgshapiroMALLOC_DEFINE(M_IOV, "iov", "large iov's"); 76159609Sgshapiro 77159609Sgshapirostatic int pollscan(struct thread *, struct pollfd *, u_int); 78159609Sgshapirostatic int selscan(struct thread *, fd_mask **, fd_mask **, int); 79159609Sgshapirostatic int dofileread(struct thread *, struct file *, int, void *, 80159609Sgshapiro size_t, off_t, int); 81159609Sgshapirostatic int dofilewrite(struct thread *, struct file *, int, 82159609Sgshapiro const void *, size_t, off_t, int); 83159609Sgshapiro 84159609Sgshapiro/* 85159609Sgshapiro * Read system call. 86159609Sgshapiro */ 87159609Sgshapiro#ifndef _SYS_SYSPROTO_H_ 88159609Sgshapirostruct read_args { 89159609Sgshapiro int fd; 90159609Sgshapiro void *buf; 91159609Sgshapiro size_t nbyte; 92159609Sgshapiro}; 93159609Sgshapiro#endif 94159609Sgshapiro/* 95159609Sgshapiro * MPSAFE 96159609Sgshapiro */ 97159609Sgshapiroint 98159609Sgshapiroread(td, uap) 99159609Sgshapiro struct thread *td; 100159609Sgshapiro struct read_args *uap; 101159609Sgshapiro{ 102159609Sgshapiro struct file *fp; 103159609Sgshapiro int error; 104159609Sgshapiro 105159609Sgshapiro if ((error = fget_read(td, uap->fd, &fp)) == 0) { 106159609Sgshapiro error = dofileread(td, fp, uap->fd, uap->buf, 107159609Sgshapiro uap->nbyte, (off_t)-1, 0); 108159609Sgshapiro fdrop(fp, td); 109159609Sgshapiro } 110159609Sgshapiro return(error); 111159609Sgshapiro} 112159609Sgshapiro 113159609Sgshapiro/* 114159609Sgshapiro * Pread system call 115159609Sgshapiro */ 116159609Sgshapiro#ifndef _SYS_SYSPROTO_H_ 117159609Sgshapirostruct pread_args { 118159609Sgshapiro int fd; 119159609Sgshapiro void *buf; 120159609Sgshapiro size_t nbyte; 121159609Sgshapiro int pad; 122159609Sgshapiro off_t offset; 123159609Sgshapiro}; 124159609Sgshapiro#endif 125159609Sgshapiro/* 126159609Sgshapiro * MPSAFE 127159609Sgshapiro */ 128159609Sgshapiroint 129159609Sgshapiropread(td, uap) 130159609Sgshapiro struct thread *td; 131159609Sgshapiro struct pread_args *uap; 132159609Sgshapiro{ 133159609Sgshapiro struct file *fp; 134159609Sgshapiro int error; 135159609Sgshapiro 136159609Sgshapiro if ((error = fget_read(td, uap->fd, &fp)) != 0) 137159609Sgshapiro return (error); 138159609Sgshapiro if (fp->f_type != DTYPE_VNODE) { 139159609Sgshapiro error = ESPIPE; 140159609Sgshapiro } else { 141159609Sgshapiro error = dofileread(td, fp, uap->fd, uap->buf, uap->nbyte, 14264562Sgshapiro uap->offset, FOF_OFFSET); 14364562Sgshapiro } 14464562Sgshapiro fdrop(fp, td); 145249729Sgshapiro return(error); 146249729Sgshapiro} 147249729Sgshapiro 148249729Sgshapiro/* 149249729Sgshapiro * Code common for read and pread 150249729Sgshapiro */ 151249729Sgshapirostatic int 152249729Sgshapirodofileread(td, fp, fd, buf, nbyte, offset, flags) 153249729Sgshapiro struct thread *td; 154249729Sgshapiro struct file *fp; 155249729Sgshapiro int fd, flags; 156249729Sgshapiro void *buf; 157249729Sgshapiro size_t nbyte; 158249729Sgshapiro off_t offset; 159249729Sgshapiro{ 160249729Sgshapiro struct uio auio; 161249729Sgshapiro struct iovec aiov; 162249729Sgshapiro long cnt, error = 0; 163249729Sgshapiro#ifdef KTRACE 164249729Sgshapiro struct iovec ktriov; 165249729Sgshapiro struct uio ktruio; 166249729Sgshapiro int didktr = 0; 167249729Sgshapiro#endif 168249729Sgshapiro 169249729Sgshapiro aiov.iov_base = buf; 170249729Sgshapiro aiov.iov_len = nbyte; 171249729Sgshapiro auio.uio_iov = &aiov; 172249729Sgshapiro auio.uio_iovcnt = 1; 173249729Sgshapiro auio.uio_offset = offset; 174249729Sgshapiro if (nbyte > INT_MAX) 175249729Sgshapiro return (EINVAL); 176249729Sgshapiro auio.uio_resid = nbyte; 177249729Sgshapiro auio.uio_rw = UIO_READ; 178249729Sgshapiro auio.uio_segflg = UIO_USERSPACE; 179249729Sgshapiro auio.uio_td = td; 180249729Sgshapiro#ifdef KTRACE 181249729Sgshapiro /* 182249729Sgshapiro * if tracing, save a copy of iovec 183249729Sgshapiro */ 184249729Sgshapiro if (KTRPOINT(td, KTR_GENIO)) { 185249729Sgshapiro ktriov = aiov; 186249729Sgshapiro ktruio = auio; 187249729Sgshapiro didktr = 1; 188249729Sgshapiro } 189249729Sgshapiro#endif 190249729Sgshapiro cnt = nbyte; 191249729Sgshapiro 192249729Sgshapiro if ((error = fo_read(fp, &auio, td->td_ucred, flags, td))) { 193249729Sgshapiro if (auio.uio_resid != cnt && (error == ERESTART || 194249729Sgshapiro error == EINTR || error == EWOULDBLOCK)) 195249729Sgshapiro error = 0; 196249729Sgshapiro } 197249729Sgshapiro cnt -= auio.uio_resid; 198249729Sgshapiro#ifdef KTRACE 199249729Sgshapiro if (didktr && error == 0) { 200249729Sgshapiro ktruio.uio_iov = &ktriov; 201249729Sgshapiro ktruio.uio_resid = cnt; 202249729Sgshapiro ktrgenio(fd, UIO_READ, &ktruio, error); 203249729Sgshapiro } 204249729Sgshapiro#endif 205249729Sgshapiro td->td_retval[0] = cnt; 206249729Sgshapiro return (error); 207249729Sgshapiro} 208249729Sgshapiro 209249729Sgshapiro/* 210249729Sgshapiro * Scatter read system call. 211249729Sgshapiro */ 212249729Sgshapiro#ifndef _SYS_SYSPROTO_H_ 213249729Sgshapirostruct readv_args { 214249729Sgshapiro int fd; 215249729Sgshapiro struct iovec *iovp; 216249729Sgshapiro u_int iovcnt; 217249729Sgshapiro}; 218249729Sgshapiro#endif 219249729Sgshapiro/* 220249729Sgshapiro * MPSAFE 221249729Sgshapiro */ 222249729Sgshapiroint 223249729Sgshapiroreadv(td, uap) 224249729Sgshapiro struct thread *td; 225249729Sgshapiro struct readv_args *uap; 226249729Sgshapiro{ 227249729Sgshapiro struct file *fp; 228249729Sgshapiro struct uio auio; 229249729Sgshapiro struct iovec *iov; 230249729Sgshapiro struct iovec *needfree; 231249729Sgshapiro struct iovec aiov[UIO_SMALLIOV]; 232249729Sgshapiro long i, cnt; 233249729Sgshapiro int error; 234249729Sgshapiro u_int iovlen; 235249729Sgshapiro#ifdef KTRACE 236249729Sgshapiro struct iovec *ktriov = NULL; 237249729Sgshapiro struct uio ktruio; 238249729Sgshapiro#endif 239249729Sgshapiro 240249729Sgshapiro if ((error = fget_read(td, uap->fd, &fp)) != 0) 241249729Sgshapiro return (error); 242249729Sgshapiro needfree = NULL; 243249729Sgshapiro /* note: can't use iovlen until iovcnt is validated */ 244249729Sgshapiro iovlen = uap->iovcnt * sizeof (struct iovec); 245249729Sgshapiro if (uap->iovcnt > UIO_SMALLIOV) { 246249729Sgshapiro if (uap->iovcnt > UIO_MAXIOV) { 247249729Sgshapiro error = EINVAL; 248249729Sgshapiro goto done; 249249729Sgshapiro } 250249729Sgshapiro MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 251249729Sgshapiro needfree = iov; 252249729Sgshapiro } else 253249729Sgshapiro iov = aiov; 254249729Sgshapiro auio.uio_iov = iov; 255249729Sgshapiro auio.uio_iovcnt = uap->iovcnt; 256249729Sgshapiro auio.uio_rw = UIO_READ; 257249729Sgshapiro auio.uio_segflg = UIO_USERSPACE; 258249729Sgshapiro auio.uio_td = td; 259249729Sgshapiro auio.uio_offset = -1; 260249729Sgshapiro if ((error = copyin(uap->iovp, iov, iovlen))) 261249729Sgshapiro goto done; 262249729Sgshapiro auio.uio_resid = 0; 263249729Sgshapiro for (i = 0; i < uap->iovcnt; i++) { 264249729Sgshapiro if (iov->iov_len > INT_MAX - auio.uio_resid) { 265249729Sgshapiro error = EINVAL; 266249729Sgshapiro goto done; 267249729Sgshapiro } 268249729Sgshapiro auio.uio_resid += iov->iov_len; 269249729Sgshapiro iov++; 270249729Sgshapiro } 271249729Sgshapiro#ifdef KTRACE 272249729Sgshapiro /* 273249729Sgshapiro * if tracing, save a copy of iovec 274249729Sgshapiro */ 275249729Sgshapiro if (KTRPOINT(td, KTR_GENIO)) { 276249729Sgshapiro MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 277249729Sgshapiro bcopy(auio.uio_iov, ktriov, iovlen); 278249729Sgshapiro ktruio = auio; 279249729Sgshapiro } 280249729Sgshapiro#endif 281249729Sgshapiro cnt = auio.uio_resid; 282244833Sgshapiro if ((error = fo_read(fp, &auio, td->td_ucred, 0, td))) { 283244833Sgshapiro if (auio.uio_resid != cnt && (error == ERESTART || 284244833Sgshapiro error == EINTR || error == EWOULDBLOCK)) 285244833Sgshapiro error = 0; 286244833Sgshapiro } 287244833Sgshapiro cnt -= auio.uio_resid; 288244833Sgshapiro#ifdef KTRACE 289244833Sgshapiro if (ktriov != NULL) { 290244833Sgshapiro if (error == 0) { 291244833Sgshapiro ktruio.uio_iov = ktriov; 292244833Sgshapiro ktruio.uio_resid = cnt; 293244833Sgshapiro ktrgenio(uap->fd, UIO_READ, &ktruio, error); 294244833Sgshapiro } 295244833Sgshapiro FREE(ktriov, M_TEMP); 296244833Sgshapiro } 297244833Sgshapiro#endif 298244833Sgshapiro td->td_retval[0] = cnt; 299244833Sgshapirodone: 300244833Sgshapiro fdrop(fp, td); 301244833Sgshapiro if (needfree) 302244833Sgshapiro FREE(needfree, M_IOV); 303244833Sgshapiro return (error); 304244833Sgshapiro} 305244833Sgshapiro 306244833Sgshapiro/* 307244833Sgshapiro * Write system call 308244833Sgshapiro */ 309244833Sgshapiro#ifndef _SYS_SYSPROTO_H_ 310244833Sgshapirostruct write_args { 311244833Sgshapiro int fd; 312244833Sgshapiro const void *buf; 313244833Sgshapiro size_t nbyte; 314244833Sgshapiro}; 315244833Sgshapiro#endif 316244833Sgshapiro/* 317244833Sgshapiro * MPSAFE 318244833Sgshapiro */ 319244833Sgshapiroint 320244833Sgshapirowrite(td, uap) 321244833Sgshapiro struct thread *td; 322244833Sgshapiro struct write_args *uap; 323244833Sgshapiro{ 324244833Sgshapiro struct file *fp; 325244833Sgshapiro int error; 326244833Sgshapiro 327244833Sgshapiro if ((error = fget_write(td, uap->fd, &fp)) == 0) { 328244833Sgshapiro error = dofilewrite(td, fp, uap->fd, uap->buf, uap->nbyte, 329244833Sgshapiro (off_t)-1, 0); 330244833Sgshapiro fdrop(fp, td); 331244833Sgshapiro } else { 332244833Sgshapiro error = EBADF; /* XXX this can't be right */ 333244833Sgshapiro } 334244833Sgshapiro return(error); 335244833Sgshapiro} 336244833Sgshapiro 337244833Sgshapiro/* 338244833Sgshapiro * Pwrite system call 339244833Sgshapiro */ 340244833Sgshapiro#ifndef _SYS_SYSPROTO_H_ 341244833Sgshapirostruct pwrite_args { 342244833Sgshapiro int fd; 343244833Sgshapiro const void *buf; 344244833Sgshapiro size_t nbyte; 345244833Sgshapiro int pad; 346244833Sgshapiro off_t offset; 347244833Sgshapiro}; 348244833Sgshapiro#endif 349244833Sgshapiro/* 350244833Sgshapiro * MPSAFE 351244833Sgshapiro */ 352244833Sgshapiroint 353244833Sgshapiropwrite(td, uap) 354244833Sgshapiro struct thread *td; 355244833Sgshapiro struct pwrite_args *uap; 356244833Sgshapiro{ 357244833Sgshapiro struct file *fp; 358244833Sgshapiro int error; 359244833Sgshapiro 360244833Sgshapiro if ((error = fget_write(td, uap->fd, &fp)) == 0) { 361244833Sgshapiro if (fp->f_type == DTYPE_VNODE) { 362244833Sgshapiro error = dofilewrite(td, fp, uap->fd, uap->buf, 363244833Sgshapiro uap->nbyte, uap->offset, FOF_OFFSET); 364244833Sgshapiro } else { 365244833Sgshapiro error = ESPIPE; 366244833Sgshapiro } 367244833Sgshapiro fdrop(fp, td); 368244833Sgshapiro } else { 369244833Sgshapiro error = EBADF; /* this can't be right */ 370244833Sgshapiro } 371244833Sgshapiro return(error); 372244833Sgshapiro} 373244833Sgshapiro 374244833Sgshapirostatic int 375244833Sgshapirodofilewrite(td, fp, fd, buf, nbyte, offset, flags) 376244833Sgshapiro struct thread *td; 377244833Sgshapiro struct file *fp; 378244833Sgshapiro int fd, flags; 379244833Sgshapiro const void *buf; 380244833Sgshapiro size_t nbyte; 381244833Sgshapiro off_t offset; 382244833Sgshapiro{ 383244833Sgshapiro struct uio auio; 384244833Sgshapiro struct iovec aiov; 385244833Sgshapiro long cnt, error = 0; 386244833Sgshapiro#ifdef KTRACE 387244833Sgshapiro struct iovec ktriov; 388244833Sgshapiro struct uio ktruio; 389244833Sgshapiro int didktr = 0; 390244833Sgshapiro#endif 391244833Sgshapiro 392244833Sgshapiro aiov.iov_base = (void *)(uintptr_t)buf; 393244833Sgshapiro aiov.iov_len = nbyte; 394244833Sgshapiro auio.uio_iov = &aiov; 395244833Sgshapiro auio.uio_iovcnt = 1; 396244833Sgshapiro auio.uio_offset = offset; 397244833Sgshapiro if (nbyte > INT_MAX) 398244833Sgshapiro return (EINVAL); 399244833Sgshapiro auio.uio_resid = nbyte; 400244833Sgshapiro auio.uio_rw = UIO_WRITE; 401244833Sgshapiro auio.uio_segflg = UIO_USERSPACE; 402244833Sgshapiro auio.uio_td = td; 403244833Sgshapiro#ifdef KTRACE 404244833Sgshapiro /* 405244833Sgshapiro * if tracing, save a copy of iovec and uio 406244833Sgshapiro */ 407244833Sgshapiro if (KTRPOINT(td, KTR_GENIO)) { 408244833Sgshapiro ktriov = aiov; 409244833Sgshapiro ktruio = auio; 410244833Sgshapiro didktr = 1; 411244833Sgshapiro } 412244833Sgshapiro#endif 413244833Sgshapiro cnt = nbyte; 414244833Sgshapiro if (fp->f_type == DTYPE_VNODE) 415244833Sgshapiro bwillwrite(); 416244833Sgshapiro if ((error = fo_write(fp, &auio, td->td_ucred, flags, td))) { 417244833Sgshapiro if (auio.uio_resid != cnt && (error == ERESTART || 418244833Sgshapiro error == EINTR || error == EWOULDBLOCK)) 419244833Sgshapiro error = 0; 420244833Sgshapiro /* Socket layer is responsible for issuing SIGPIPE. */ 421244833Sgshapiro if (error == EPIPE && fp->f_type != DTYPE_SOCKET) { 422244833Sgshapiro PROC_LOCK(td->td_proc); 423244833Sgshapiro psignal(td->td_proc, SIGPIPE); 424244833Sgshapiro PROC_UNLOCK(td->td_proc); 425223067Sgshapiro } 426223067Sgshapiro } 427223067Sgshapiro cnt -= auio.uio_resid; 428223067Sgshapiro#ifdef KTRACE 429223067Sgshapiro if (didktr && error == 0) { 430223067Sgshapiro ktruio.uio_iov = &ktriov; 431223067Sgshapiro ktruio.uio_resid = cnt; 432223067Sgshapiro ktrgenio(fd, UIO_WRITE, &ktruio, error); 433223067Sgshapiro } 434223067Sgshapiro#endif 435223067Sgshapiro td->td_retval[0] = cnt; 436223067Sgshapiro return (error); 437223067Sgshapiro} 438223067Sgshapiro 439223067Sgshapiro/* 440223067Sgshapiro * Gather write system call 441223067Sgshapiro */ 442223067Sgshapiro#ifndef _SYS_SYSPROTO_H_ 443223067Sgshapirostruct writev_args { 444223067Sgshapiro int fd; 445223067Sgshapiro struct iovec *iovp; 446223067Sgshapiro u_int iovcnt; 447223067Sgshapiro}; 448223067Sgshapiro#endif 449223067Sgshapiro/* 450223067Sgshapiro * MPSAFE 451223067Sgshapiro */ 452223067Sgshapiroint 453223067Sgshapirowritev(td, uap) 454223067Sgshapiro struct thread *td; 455223067Sgshapiro register struct writev_args *uap; 456223067Sgshapiro{ 457223067Sgshapiro struct file *fp; 458223067Sgshapiro struct uio auio; 459223067Sgshapiro register struct iovec *iov; 460223067Sgshapiro struct iovec *needfree; 461223067Sgshapiro struct iovec aiov[UIO_SMALLIOV]; 462223067Sgshapiro long i, cnt, error = 0; 463223067Sgshapiro u_int iovlen; 464223067Sgshapiro#ifdef KTRACE 465223067Sgshapiro struct iovec *ktriov = NULL; 466223067Sgshapiro struct uio ktruio; 467223067Sgshapiro#endif 468223067Sgshapiro 469223067Sgshapiro mtx_lock(&Giant); 470223067Sgshapiro if ((error = fget_write(td, uap->fd, &fp)) != 0) { 471223067Sgshapiro error = EBADF; 472223067Sgshapiro goto done2; 473223067Sgshapiro } 474223067Sgshapiro /* note: can't use iovlen until iovcnt is validated */ 475223067Sgshapiro iovlen = uap->iovcnt * sizeof (struct iovec); 476223067Sgshapiro if (uap->iovcnt > UIO_SMALLIOV) { 477223067Sgshapiro if (uap->iovcnt > UIO_MAXIOV) { 478223067Sgshapiro needfree = NULL; 479223067Sgshapiro error = EINVAL; 480223067Sgshapiro goto done; 481223067Sgshapiro } 482223067Sgshapiro MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 483223067Sgshapiro needfree = iov; 484223067Sgshapiro } else { 485223067Sgshapiro iov = aiov; 486223067Sgshapiro needfree = NULL; 487223067Sgshapiro } 488223067Sgshapiro auio.uio_iov = iov; 489223067Sgshapiro auio.uio_iovcnt = uap->iovcnt; 490223067Sgshapiro auio.uio_rw = UIO_WRITE; 491223067Sgshapiro auio.uio_segflg = UIO_USERSPACE; 492223067Sgshapiro auio.uio_td = td; 493223067Sgshapiro auio.uio_offset = -1; 494223067Sgshapiro if ((error = copyin(uap->iovp, iov, iovlen))) 495223067Sgshapiro goto done; 496223067Sgshapiro auio.uio_resid = 0; 497223067Sgshapiro for (i = 0; i < uap->iovcnt; i++) { 498223067Sgshapiro if (iov->iov_len > INT_MAX - auio.uio_resid) { 499223067Sgshapiro error = EINVAL; 500223067Sgshapiro goto done; 501223067Sgshapiro } 502223067Sgshapiro auio.uio_resid += iov->iov_len; 503223067Sgshapiro iov++; 504223067Sgshapiro } 505223067Sgshapiro#ifdef KTRACE 506223067Sgshapiro /* 507223067Sgshapiro * if tracing, save a copy of iovec and uio 508223067Sgshapiro */ 509223067Sgshapiro if (KTRPOINT(td, KTR_GENIO)) { 510223067Sgshapiro MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 511223067Sgshapiro bcopy(auio.uio_iov, ktriov, iovlen); 512223067Sgshapiro ktruio = auio; 513223067Sgshapiro } 514223067Sgshapiro#endif 515223067Sgshapiro cnt = auio.uio_resid; 516223067Sgshapiro if (fp->f_type == DTYPE_VNODE) 517223067Sgshapiro bwillwrite(); 518223067Sgshapiro if ((error = fo_write(fp, &auio, td->td_ucred, 0, td))) { 519223067Sgshapiro if (auio.uio_resid != cnt && (error == ERESTART || 520223067Sgshapiro error == EINTR || error == EWOULDBLOCK)) 521223067Sgshapiro error = 0; 522223067Sgshapiro if (error == EPIPE) { 523223067Sgshapiro PROC_LOCK(td->td_proc); 524223067Sgshapiro psignal(td->td_proc, SIGPIPE); 525223067Sgshapiro PROC_UNLOCK(td->td_proc); 526223067Sgshapiro } 527223067Sgshapiro } 528223067Sgshapiro cnt -= auio.uio_resid; 529223067Sgshapiro#ifdef KTRACE 530223067Sgshapiro if (ktriov != NULL) { 531223067Sgshapiro if (error == 0) { 532223067Sgshapiro ktruio.uio_iov = ktriov; 533244833Sgshapiro ktruio.uio_resid = cnt; 534223067Sgshapiro ktrgenio(uap->fd, UIO_WRITE, &ktruio, error); 535223067Sgshapiro } 536223067Sgshapiro FREE(ktriov, M_TEMP); 537223067Sgshapiro } 538223067Sgshapiro#endif 539223067Sgshapiro td->td_retval[0] = cnt; 540223067Sgshapirodone: 541223067Sgshapiro fdrop(fp, td); 542223067Sgshapiro if (needfree) 543223067Sgshapiro FREE(needfree, M_IOV); 544223067Sgshapirodone2: 545223067Sgshapiro mtx_unlock(&Giant); 546223067Sgshapiro return (error); 547223067Sgshapiro} 548223067Sgshapiro 549223067Sgshapiro/* 550223067Sgshapiro * Ioctl system call 551223067Sgshapiro */ 552223067Sgshapiro#ifndef _SYS_SYSPROTO_H_ 553223067Sgshapirostruct ioctl_args { 554223067Sgshapiro int fd; 555223067Sgshapiro u_long com; 556223067Sgshapiro caddr_t data; 557223067Sgshapiro}; 558223067Sgshapiro#endif 559223067Sgshapiro/* 560223067Sgshapiro * MPSAFE 561223067Sgshapiro */ 562223067Sgshapiro/* ARGSUSED */ 563223067Sgshapiroint 564223067Sgshapiroioctl(td, uap) 565223067Sgshapiro struct thread *td; 566223067Sgshapiro register struct ioctl_args *uap; 567223067Sgshapiro{ 568223067Sgshapiro struct file *fp; 569223067Sgshapiro register struct filedesc *fdp; 570223067Sgshapiro register u_long com; 571223067Sgshapiro int error = 0; 572223067Sgshapiro register u_int size; 573223067Sgshapiro caddr_t data, memp; 574223067Sgshapiro int tmp; 575223067Sgshapiro#define STK_PARAMS 128 576223067Sgshapiro union { 577223067Sgshapiro char stkbuf[STK_PARAMS]; 578223067Sgshapiro long align; 579223067Sgshapiro } ubuf; 580223067Sgshapiro 581223067Sgshapiro if ((error = fget(td, uap->fd, &fp)) != 0) 582223067Sgshapiro return (error); 583223067Sgshapiro mtx_lock(&Giant); 584223067Sgshapiro if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 585223067Sgshapiro fdrop(fp, td); 586223067Sgshapiro mtx_unlock(&Giant); 587223067Sgshapiro return (EBADF); 588223067Sgshapiro } 589223067Sgshapiro fdp = td->td_proc->p_fd; 590223067Sgshapiro switch (com = uap->com) { 591223067Sgshapiro case FIONCLEX: 592223067Sgshapiro FILEDESC_LOCK(fdp); 593223067Sgshapiro fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 594223067Sgshapiro FILEDESC_UNLOCK(fdp); 595223067Sgshapiro fdrop(fp, td); 596223067Sgshapiro mtx_unlock(&Giant); 597223067Sgshapiro return (0); 598223067Sgshapiro case FIOCLEX: 599223067Sgshapiro FILEDESC_LOCK(fdp); 600223067Sgshapiro fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 601223067Sgshapiro FILEDESC_UNLOCK(fdp); 602223067Sgshapiro fdrop(fp, td); 603223067Sgshapiro mtx_unlock(&Giant); 604223067Sgshapiro return (0); 605223067Sgshapiro } 606223067Sgshapiro 607223067Sgshapiro /* 608223067Sgshapiro * Interpret high order word to find amount of data to be 609223067Sgshapiro * copied to/from the user's address space. 610223067Sgshapiro */ 611223067Sgshapiro size = IOCPARM_LEN(com); 612223067Sgshapiro if (size > IOCPARM_MAX) { 613223067Sgshapiro fdrop(fp, td); 614223067Sgshapiro mtx_unlock(&Giant); 615223067Sgshapiro return (ENOTTY); 616223067Sgshapiro } 617223067Sgshapiro 618223067Sgshapiro memp = NULL; 619223067Sgshapiro if (size > sizeof (ubuf.stkbuf)) { 620223067Sgshapiro memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 621223067Sgshapiro data = memp; 622223067Sgshapiro } else { 623223067Sgshapiro data = ubuf.stkbuf; 624223067Sgshapiro } 625223067Sgshapiro if (com&IOC_IN) { 626223067Sgshapiro if (size) { 627223067Sgshapiro error = copyin(uap->data, data, (u_int)size); 628223067Sgshapiro if (error) { 629223067Sgshapiro if (memp) 630223067Sgshapiro free(memp, M_IOCTLOPS); 631223067Sgshapiro fdrop(fp, td); 632223067Sgshapiro goto done; 633223067Sgshapiro } 634223067Sgshapiro } else { 635223067Sgshapiro *(caddr_t *)data = uap->data; 636223067Sgshapiro } 637223067Sgshapiro } else if ((com&IOC_OUT) && size) { 638223067Sgshapiro /* 639223067Sgshapiro * Zero the buffer so the user always 640223067Sgshapiro * gets back something deterministic. 641203004Sgshapiro */ 642203004Sgshapiro bzero(data, size); 643203004Sgshapiro } else if (com&IOC_VOID) { 644203004Sgshapiro *(caddr_t *)data = uap->data; 645203004Sgshapiro } 646203004Sgshapiro 647203004Sgshapiro switch (com) { 648203004Sgshapiro 649203004Sgshapiro case FIONBIO: 650203004Sgshapiro FILE_LOCK(fp); 651203004Sgshapiro if ((tmp = *(int *)data)) 652203004Sgshapiro fp->f_flag |= FNONBLOCK; 653203004Sgshapiro else 654203004Sgshapiro fp->f_flag &= ~FNONBLOCK; 655203004Sgshapiro FILE_UNLOCK(fp); 656203004Sgshapiro error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td); 657203004Sgshapiro break; 658203004Sgshapiro 659203004Sgshapiro case FIOASYNC: 660203004Sgshapiro FILE_LOCK(fp); 661203004Sgshapiro if ((tmp = *(int *)data)) 662203004Sgshapiro fp->f_flag |= FASYNC; 663203004Sgshapiro else 664203004Sgshapiro fp->f_flag &= ~FASYNC; 665203004Sgshapiro FILE_UNLOCK(fp); 666203004Sgshapiro error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td); 667203004Sgshapiro break; 668203004Sgshapiro 669203004Sgshapiro default: 670203004Sgshapiro error = fo_ioctl(fp, com, data, td->td_ucred, td); 671203004Sgshapiro /* 672203004Sgshapiro * Copy any data to user, size was 673203004Sgshapiro * already set and checked above. 674203004Sgshapiro */ 675203004Sgshapiro if (error == 0 && (com&IOC_OUT) && size) 676203004Sgshapiro error = copyout(data, uap->data, (u_int)size); 677203004Sgshapiro break; 678203004Sgshapiro } 679203004Sgshapiro if (memp) 680203004Sgshapiro free(memp, M_IOCTLOPS); 681203004Sgshapiro fdrop(fp, td); 682203004Sgshapirodone: 683203004Sgshapiro mtx_unlock(&Giant); 684203004Sgshapiro return (error); 685203004Sgshapiro} 686203004Sgshapiro 687203004Sgshapiro/* 688203004Sgshapiro * sellock and selwait are initialized in selectinit() via SYSINIT. 689203004Sgshapiro */ 690203004Sgshapirostruct mtx sellock; 691203004Sgshapirostruct cv selwait; 692203004Sgshapirou_int nselcoll; /* Select collisions since boot */ 693203004SgshapiroSYSCTL_UINT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 694203004Sgshapiro 695203004Sgshapiro/* 696203004Sgshapiro * Select system call. 697203004Sgshapiro */ 698203004Sgshapiro#ifndef _SYS_SYSPROTO_H_ 699203004Sgshapirostruct select_args { 700203004Sgshapiro int nd; 701203004Sgshapiro fd_set *in, *ou, *ex; 702203004Sgshapiro struct timeval *tv; 703203004Sgshapiro}; 704203004Sgshapiro#endif 705203004Sgshapiro/* 706203004Sgshapiro * MPSAFE 707203004Sgshapiro */ 708203004Sgshapiroint 709203004Sgshapiroselect(td, uap) 710203004Sgshapiro register struct thread *td; 711203004Sgshapiro register struct select_args *uap; 712203004Sgshapiro{ 713203004Sgshapiro struct timeval tv, *tvp; 714182352Sgshapiro int error; 715182352Sgshapiro 716182352Sgshapiro if (uap->tv != NULL) { 717182352Sgshapiro error = copyin(uap->tv, &tv, sizeof(tv)); 718182352Sgshapiro if (error) 719182352Sgshapiro return (error); 720182352Sgshapiro tvp = &tv; 721182352Sgshapiro } else 722182352Sgshapiro tvp = NULL; 723182352Sgshapiro 724182352Sgshapiro return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp)); 725182352Sgshapiro} 726182352Sgshapiro 727182352Sgshapiroint 728182352Sgshapirokern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, 729182352Sgshapiro fd_set *fd_ex, struct timeval *tvp) 730182352Sgshapiro{ 731182352Sgshapiro struct filedesc *fdp; 732182352Sgshapiro /* 733182352Sgshapiro * The magic 2048 here is chosen to be just enough for FD_SETSIZE 734182352Sgshapiro * infds with the new FD_SETSIZE of 1024, and more than enough for 735182352Sgshapiro * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 736182352Sgshapiro * of 256. 737182352Sgshapiro */ 738182352Sgshapiro fd_mask s_selbits[howmany(2048, NFDBITS)]; 739182352Sgshapiro fd_mask *ibits[3], *obits[3], *selbits, *sbp; 740182352Sgshapiro struct timeval atv, rtv, ttv; 741182352Sgshapiro int error, timo; 742182352Sgshapiro u_int ncoll, nbufbytes, ncpbytes, nfdbits; 743182352Sgshapiro 744182352Sgshapiro if (nd < 0) 745182352Sgshapiro return (EINVAL); 746182352Sgshapiro fdp = td->td_proc->p_fd; 747182352Sgshapiro mtx_lock(&Giant); 748182352Sgshapiro FILEDESC_LOCK(fdp); 749182352Sgshapiro 750182352Sgshapiro if (nd > td->td_proc->p_fd->fd_nfiles) 751182352Sgshapiro nd = td->td_proc->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 752182352Sgshapiro FILEDESC_UNLOCK(fdp); 753182352Sgshapiro 754182352Sgshapiro /* 755182352Sgshapiro * Allocate just enough bits for the non-null fd_sets. Use the 756182352Sgshapiro * preallocated auto buffer if possible. 757182352Sgshapiro */ 758182352Sgshapiro nfdbits = roundup(nd, NFDBITS); 759182352Sgshapiro ncpbytes = nfdbits / NBBY; 760182352Sgshapiro nbufbytes = 0; 761182352Sgshapiro if (fd_in != NULL) 762182352Sgshapiro nbufbytes += 2 * ncpbytes; 763182352Sgshapiro if (fd_ou != NULL) 764182352Sgshapiro nbufbytes += 2 * ncpbytes; 765182352Sgshapiro if (fd_ex != NULL) 766182352Sgshapiro nbufbytes += 2 * ncpbytes; 767182352Sgshapiro if (nbufbytes <= sizeof s_selbits) 768182352Sgshapiro selbits = &s_selbits[0]; 769182352Sgshapiro else 770182352Sgshapiro selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 771182352Sgshapiro 772182352Sgshapiro /* 773182352Sgshapiro * Assign pointers into the bit buffers and fetch the input bits. 774182352Sgshapiro * Put the output buffers together so that they can be bzeroed 775182352Sgshapiro * together. 776182352Sgshapiro */ 777168515Sgshapiro sbp = selbits; 778168515Sgshapiro#define getbits(name, x) \ 779168515Sgshapiro do { \ 780168515Sgshapiro if (name == NULL) \ 781168515Sgshapiro ibits[x] = NULL; \ 782168515Sgshapiro else { \ 783168515Sgshapiro ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 784168515Sgshapiro obits[x] = sbp; \ 785168515Sgshapiro sbp += ncpbytes / sizeof *sbp; \ 786168515Sgshapiro error = copyin(name, ibits[x], ncpbytes); \ 787168515Sgshapiro if (error != 0) \ 788168515Sgshapiro goto done_nosellock; \ 789168515Sgshapiro } \ 790168515Sgshapiro } while (0) 791168515Sgshapiro getbits(fd_in, 0); 792168515Sgshapiro getbits(fd_ou, 1); 793168515Sgshapiro getbits(fd_ex, 2); 794168515Sgshapiro#undef getbits 795168515Sgshapiro if (nbufbytes != 0) 796168515Sgshapiro bzero(selbits, nbufbytes / 2); 797168515Sgshapiro 798168515Sgshapiro if (tvp != NULL) { 799168515Sgshapiro atv = *tvp; 800168515Sgshapiro if (itimerfix(&atv)) { 801168515Sgshapiro error = EINVAL; 802168515Sgshapiro goto done_nosellock; 803168515Sgshapiro } 804168515Sgshapiro getmicrouptime(&rtv); 805168515Sgshapiro timevaladd(&atv, &rtv); 806168515Sgshapiro } else { 807168515Sgshapiro atv.tv_sec = 0; 808168515Sgshapiro atv.tv_usec = 0; 809168515Sgshapiro } 810168515Sgshapiro timo = 0; 811168515Sgshapiro TAILQ_INIT(&td->td_selq); 812168515Sgshapiro mtx_lock(&sellock); 813168515Sgshapiroretry: 814168515Sgshapiro ncoll = nselcoll; 815168515Sgshapiro mtx_lock_spin(&sched_lock); 816168515Sgshapiro td->td_flags |= TDF_SELECT; 817168515Sgshapiro mtx_unlock_spin(&sched_lock); 818168515Sgshapiro mtx_unlock(&sellock); 819168515Sgshapiro 820168515Sgshapiro error = selscan(td, ibits, obits, nd); 821168515Sgshapiro mtx_lock(&sellock); 822168515Sgshapiro if (error || td->td_retval[0]) 823168515Sgshapiro goto done; 824168515Sgshapiro if (atv.tv_sec || atv.tv_usec) { 825168515Sgshapiro getmicrouptime(&rtv); 826168515Sgshapiro if (timevalcmp(&rtv, &atv, >=)) 827168515Sgshapiro goto done; 828168515Sgshapiro ttv = atv; 829168515Sgshapiro timevalsub(&ttv, &rtv); 830168515Sgshapiro timo = ttv.tv_sec > 24 * 60 * 60 ? 831168515Sgshapiro 24 * 60 * 60 * hz : tvtohz(&ttv); 832168515Sgshapiro } 833168515Sgshapiro 834168515Sgshapiro /* 835168515Sgshapiro * An event of interest may occur while we do not hold 836168515Sgshapiro * sellock, so check TDF_SELECT and the number of 837168515Sgshapiro * collisions and rescan the file descriptors if 838168515Sgshapiro * necessary. 839168515Sgshapiro */ 840168515Sgshapiro mtx_lock_spin(&sched_lock); 841168515Sgshapiro if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { 842168515Sgshapiro mtx_unlock_spin(&sched_lock); 843168515Sgshapiro goto retry; 844168515Sgshapiro } 845168515Sgshapiro mtx_unlock_spin(&sched_lock); 846168515Sgshapiro 847168515Sgshapiro if (timo > 0) 848168515Sgshapiro error = cv_timedwait_sig(&selwait, &sellock, timo); 849168515Sgshapiro else 850168515Sgshapiro error = cv_wait_sig(&selwait, &sellock); 851168515Sgshapiro 852168515Sgshapiro if (error == 0) 853157001Sgshapiro goto retry; 854157001Sgshapiro 855157001Sgshapirodone: 856157001Sgshapiro clear_selinfo_list(td); 857157001Sgshapiro mtx_lock_spin(&sched_lock); 858159609Sgshapiro td->td_flags &= ~TDF_SELECT; 859157001Sgshapiro mtx_unlock_spin(&sched_lock); 860157001Sgshapiro mtx_unlock(&sellock); 861157001Sgshapiro 862157001Sgshapirodone_nosellock: 863157001Sgshapiro /* select is not restarted after signals... */ 864157001Sgshapiro if (error == ERESTART) 865157001Sgshapiro error = EINTR; 866157001Sgshapiro if (error == EWOULDBLOCK) 867157001Sgshapiro error = 0; 868157001Sgshapiro#define putbits(name, x) \ 869157001Sgshapiro if (name && (error2 = copyout(obits[x], name, ncpbytes))) \ 870157001Sgshapiro error = error2; 871157001Sgshapiro if (error == 0) { 872157001Sgshapiro int error2; 873157001Sgshapiro 874157001Sgshapiro putbits(fd_in, 0); 875157001Sgshapiro putbits(fd_ou, 1); 876157001Sgshapiro putbits(fd_ex, 2); 877157001Sgshapiro#undef putbits 878157001Sgshapiro } 879157001Sgshapiro if (selbits != &s_selbits[0]) 880157001Sgshapiro free(selbits, M_SELECT); 881157001Sgshapiro 882157001Sgshapiro mtx_unlock(&Giant); 883157001Sgshapiro return (error); 884157001Sgshapiro} 885157001Sgshapiro 886157001Sgshapirostatic int 887157001Sgshapiroselscan(td, ibits, obits, nfd) 888157001Sgshapiro struct thread *td; 889157001Sgshapiro fd_mask **ibits, **obits; 890157001Sgshapiro int nfd; 891157001Sgshapiro{ 892157001Sgshapiro int msk, i, fd; 893157001Sgshapiro fd_mask bits; 894157001Sgshapiro struct file *fp; 895157001Sgshapiro int n = 0; 896157001Sgshapiro /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 897157001Sgshapiro static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 898157001Sgshapiro struct filedesc *fdp = td->td_proc->p_fd; 899157001Sgshapiro 900157001Sgshapiro FILEDESC_LOCK(fdp); 901157001Sgshapiro for (msk = 0; msk < 3; msk++) { 902157001Sgshapiro if (ibits[msk] == NULL) 903157001Sgshapiro continue; 904157001Sgshapiro for (i = 0; i < nfd; i += NFDBITS) { 905157001Sgshapiro bits = ibits[msk][i/NFDBITS]; 906157001Sgshapiro /* ffs(int mask) not portable, fd_mask is long */ 907157001Sgshapiro for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 908157001Sgshapiro if (!(bits & 1)) 909157001Sgshapiro continue; 910157001Sgshapiro if ((fp = fget_locked(fdp, fd)) == NULL) { 911157001Sgshapiro FILEDESC_UNLOCK(fdp); 912157001Sgshapiro return (EBADF); 913157001Sgshapiro } 914157001Sgshapiro if (fo_poll(fp, flag[msk], td->td_ucred, 915157001Sgshapiro td)) { 916157001Sgshapiro obits[msk][(fd)/NFDBITS] |= 917157001Sgshapiro ((fd_mask)1 << ((fd) % NFDBITS)); 918157001Sgshapiro n++; 919157001Sgshapiro } 920157001Sgshapiro } 921157001Sgshapiro } 922157001Sgshapiro } 923157001Sgshapiro FILEDESC_UNLOCK(fdp); 924157001Sgshapiro td->td_retval[0] = n; 925157001Sgshapiro return (0); 926157001Sgshapiro} 927157001Sgshapiro 928157001Sgshapiro/* 929157001Sgshapiro * Poll system call. 930141858Sgshapiro */ 931141858Sgshapiro#ifndef _SYS_SYSPROTO_H_ 932141858Sgshapirostruct poll_args { 933141858Sgshapiro struct pollfd *fds; 934141858Sgshapiro u_int nfds; 935159609Sgshapiro int timeout; 936141858Sgshapiro}; 937141858Sgshapiro#endif 938141858Sgshapiro/* 939141858Sgshapiro * MPSAFE 940141858Sgshapiro */ 941141858Sgshapiroint 942141858Sgshapiropoll(td, uap) 943141858Sgshapiro struct thread *td; 944141858Sgshapiro struct poll_args *uap; 945141858Sgshapiro{ 946141858Sgshapiro caddr_t bits; 947141858Sgshapiro char smallbits[32 * sizeof(struct pollfd)]; 948141858Sgshapiro struct timeval atv, rtv, ttv; 949141858Sgshapiro int error = 0, timo; 950141858Sgshapiro u_int ncoll, nfds; 951141858Sgshapiro size_t ni; 952141858Sgshapiro 953141858Sgshapiro nfds = uap->nfds; 954141858Sgshapiro 955141858Sgshapiro mtx_lock(&Giant); 956141858Sgshapiro /* 957141858Sgshapiro * This is kinda bogus. We have fd limits, but that is not 958141858Sgshapiro * really related to the size of the pollfd array. Make sure 959141858Sgshapiro * we let the process use at least FD_SETSIZE entries and at 960141858Sgshapiro * least enough for the current limits. We want to be reasonably 961141858Sgshapiro * safe, but not overly restrictive. 962141858Sgshapiro */ 963141858Sgshapiro if ((nfds > td->td_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur) && 964141858Sgshapiro (nfds > FD_SETSIZE)) { 965141858Sgshapiro error = EINVAL; 966141858Sgshapiro goto done2; 967141858Sgshapiro } 968141858Sgshapiro ni = nfds * sizeof(struct pollfd); 969141858Sgshapiro if (ni > sizeof(smallbits)) 970141858Sgshapiro bits = malloc(ni, M_TEMP, M_WAITOK); 971141858Sgshapiro else 972141858Sgshapiro bits = smallbits; 973141858Sgshapiro error = copyin(uap->fds, bits, ni); 974141858Sgshapiro if (error) 975141858Sgshapiro goto done_nosellock; 976141858Sgshapiro if (uap->timeout != INFTIM) { 977141858Sgshapiro atv.tv_sec = uap->timeout / 1000; 978141858Sgshapiro atv.tv_usec = (uap->timeout % 1000) * 1000; 979141858Sgshapiro if (itimerfix(&atv)) { 980141858Sgshapiro error = EINVAL; 981141858Sgshapiro goto done_nosellock; 982159609Sgshapiro } 983159609Sgshapiro getmicrouptime(&rtv); 984159609Sgshapiro timevaladd(&atv, &rtv); 985159609Sgshapiro } else { 986159609Sgshapiro atv.tv_sec = 0; 987159609Sgshapiro atv.tv_usec = 0; 988159609Sgshapiro } 989159609Sgshapiro timo = 0; 990159609Sgshapiro TAILQ_INIT(&td->td_selq); 991159609Sgshapiro mtx_lock(&sellock); 992159609Sgshapiroretry: 993159609Sgshapiro ncoll = nselcoll; 994159609Sgshapiro mtx_lock_spin(&sched_lock); 995159609Sgshapiro td->td_flags |= TDF_SELECT; 996159609Sgshapiro mtx_unlock_spin(&sched_lock); 997159609Sgshapiro mtx_unlock(&sellock); 998159609Sgshapiro 999159609Sgshapiro error = pollscan(td, (struct pollfd *)bits, nfds); 1000159609Sgshapiro mtx_lock(&sellock); 1001159609Sgshapiro if (error || td->td_retval[0]) 1002159609Sgshapiro goto done; 1003159609Sgshapiro if (atv.tv_sec || atv.tv_usec) { 1004159609Sgshapiro getmicrouptime(&rtv); 1005159609Sgshapiro if (timevalcmp(&rtv, &atv, >=)) 1006159609Sgshapiro goto done; 1007159609Sgshapiro ttv = atv; 1008159609Sgshapiro timevalsub(&ttv, &rtv); 1009159609Sgshapiro timo = ttv.tv_sec > 24 * 60 * 60 ? 1010159609Sgshapiro 24 * 60 * 60 * hz : tvtohz(&ttv); 1011159609Sgshapiro } 1012159609Sgshapiro /* 1013159609Sgshapiro * An event of interest may occur while we do not hold 1014159609Sgshapiro * sellock, so check TDF_SELECT and the number of collisions 1015159609Sgshapiro * and rescan the file descriptors if necessary. 1016159609Sgshapiro */ 1017159609Sgshapiro mtx_lock_spin(&sched_lock); 1018159609Sgshapiro if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { 1019159609Sgshapiro mtx_unlock_spin(&sched_lock); 1020159609Sgshapiro goto retry; 1021159609Sgshapiro } 1022159609Sgshapiro mtx_unlock_spin(&sched_lock); 1023141858Sgshapiro 1024141858Sgshapiro if (timo > 0) 1025141858Sgshapiro error = cv_timedwait_sig(&selwait, &sellock, timo); 1026125820Sgshapiro else 1027125820Sgshapiro error = cv_wait_sig(&selwait, &sellock); 1028125820Sgshapiro 1029125820Sgshapiro if (error == 0) 1030125820Sgshapiro goto retry; 1031159609Sgshapiro 1032125820Sgshapirodone: 1033125820Sgshapiro clear_selinfo_list(td); 1034125820Sgshapiro mtx_lock_spin(&sched_lock); 1035125820Sgshapiro td->td_flags &= ~TDF_SELECT; 1036125820Sgshapiro mtx_unlock_spin(&sched_lock); 1037125820Sgshapiro mtx_unlock(&sellock); 1038125820Sgshapiro 1039125820Sgshapirodone_nosellock: 1040125820Sgshapiro /* poll is not restarted after signals... */ 1041125820Sgshapiro if (error == ERESTART) 1042125820Sgshapiro error = EINTR; 1043125820Sgshapiro if (error == EWOULDBLOCK) 1044125820Sgshapiro error = 0; 1045125820Sgshapiro if (error == 0) { 1046125820Sgshapiro error = copyout(bits, uap->fds, ni); 1047125820Sgshapiro if (error) 1048125820Sgshapiro goto out; 1049125820Sgshapiro } 1050125820Sgshapiroout: 1051125820Sgshapiro if (ni > sizeof(smallbits)) 1052125820Sgshapiro free(bits, M_TEMP); 1053125820Sgshapirodone2: 1054125820Sgshapiro mtx_unlock(&Giant); 1055125820Sgshapiro return (error); 1056125820Sgshapiro} 1057125820Sgshapiro 1058125820Sgshapirostatic int 1059125820Sgshapiropollscan(td, fds, nfd) 1060125820Sgshapiro struct thread *td; 1061125820Sgshapiro struct pollfd *fds; 1062125820Sgshapiro u_int nfd; 1063125820Sgshapiro{ 1064125820Sgshapiro register struct filedesc *fdp = td->td_proc->p_fd; 1065125820Sgshapiro int i; 1066125820Sgshapiro struct file *fp; 1067125820Sgshapiro int n = 0; 1068125820Sgshapiro 1069125820Sgshapiro FILEDESC_LOCK(fdp); 1070125820Sgshapiro for (i = 0; i < nfd; i++, fds++) { 1071125820Sgshapiro if (fds->fd >= fdp->fd_nfiles) { 1072125820Sgshapiro fds->revents = POLLNVAL; 1073125820Sgshapiro n++; 1074125820Sgshapiro } else if (fds->fd < 0) { 1075159609Sgshapiro fds->revents = 0; 1076159609Sgshapiro } else { 1077159609Sgshapiro fp = fdp->fd_ofiles[fds->fd]; 1078159609Sgshapiro if (fp == NULL) { 1079159609Sgshapiro fds->revents = POLLNVAL; 1080159609Sgshapiro n++; 1081159609Sgshapiro } else { 1082159609Sgshapiro /* 1083159609Sgshapiro * Note: backend also returns POLLHUP and 1084159609Sgshapiro * POLLERR if appropriate. 1085159609Sgshapiro */ 1086159609Sgshapiro fds->revents = fo_poll(fp, fds->events, 1087159609Sgshapiro td->td_ucred, td); 1088159609Sgshapiro if (fds->revents != 0) 1089125820Sgshapiro n++; 1090159609Sgshapiro } 1091159609Sgshapiro } 1092159609Sgshapiro } 1093159609Sgshapiro FILEDESC_UNLOCK(fdp); 1094159609Sgshapiro td->td_retval[0] = n; 1095159609Sgshapiro return (0); 1096159609Sgshapiro} 1097159609Sgshapiro 1098159609Sgshapiro/* 1099159609Sgshapiro * OpenBSD poll system call. 1100159609Sgshapiro * XXX this isn't quite a true representation.. OpenBSD uses select ops. 1101159609Sgshapiro */ 1102159609Sgshapiro#ifndef _SYS_SYSPROTO_H_ 1103159609Sgshapirostruct openbsd_poll_args { 1104159609Sgshapiro struct pollfd *fds; 1105159609Sgshapiro u_int nfds; 1106159609Sgshapiro int timeout; 1107159609Sgshapiro}; 1108159609Sgshapiro#endif 1109159609Sgshapiro/* 1110159609Sgshapiro * MPSAFE 1111159609Sgshapiro */ 1112159609Sgshapiroint 1113159609Sgshapiroopenbsd_poll(td, uap) 1114125820Sgshapiro register struct thread *td; 1115125820Sgshapiro register struct openbsd_poll_args *uap; 1116125820Sgshapiro{ 1117111823Sgshapiro return (poll(td, (struct poll_args *)uap)); 1118111823Sgshapiro} 1119111823Sgshapiro 1120111823Sgshapiro/* 1121111823Sgshapiro * Remove the references to the thread from all of the objects 1122159609Sgshapiro * we were polling. 1123111823Sgshapiro * 1124111823Sgshapiro * This code assumes that the underlying owner of the selinfo 1125111823Sgshapiro * structure will hold sellock before it changes it, and that 1126111823Sgshapiro * it will unlink itself from our list if it goes away. 1127111823Sgshapiro */ 1128111823Sgshapirovoid 1129111823Sgshapiroclear_selinfo_list(td) 1130111823Sgshapiro struct thread *td; 1131111823Sgshapiro{ 1132111823Sgshapiro struct selinfo *si; 1133111823Sgshapiro 1134111823Sgshapiro mtx_assert(&sellock, MA_OWNED); 1135111823Sgshapiro TAILQ_FOREACH(si, &td->td_selq, si_thrlist) 1136111823Sgshapiro si->si_thread = NULL; 1137111823Sgshapiro TAILQ_INIT(&td->td_selq); 1138111823Sgshapiro} 1139111823Sgshapiro 1140111823Sgshapiro/*ARGSUSED*/ 1141111823Sgshapiroint 1142111823Sgshapiroseltrue(dev, events, td) 1143111823Sgshapiro dev_t dev; 1144111823Sgshapiro int events; 1145111823Sgshapiro struct thread *td; 1146111823Sgshapiro{ 1147111823Sgshapiro 1148111823Sgshapiro return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1149111823Sgshapiro} 1150111823Sgshapiro 1151111823Sgshapiro/* 1152111823Sgshapiro * Record a select request. 1153111823Sgshapiro */ 1154111823Sgshapirovoid 1155111823Sgshapiroselrecord(selector, sip) 1156111823Sgshapiro struct thread *selector; 1157111823Sgshapiro struct selinfo *sip; 1158111823Sgshapiro{ 1159159609Sgshapiro 1160159609Sgshapiro mtx_lock(&sellock); 1161159609Sgshapiro /* 1162159609Sgshapiro * If the selinfo's thread pointer is NULL then take ownership of it. 1163159609Sgshapiro * 1164159609Sgshapiro * If the thread pointer is not NULL and it points to another 1165159609Sgshapiro * thread, then we have a collision. 1166159609Sgshapiro * 1167159609Sgshapiro * If the thread pointer is not NULL and points back to us then leave 1168159609Sgshapiro * it alone as we've already added pointed it at us and added it to 1169159609Sgshapiro * our list. 1170159609Sgshapiro */ 1171159609Sgshapiro if (sip->si_thread == NULL) { 1172159609Sgshapiro sip->si_thread = selector; 1173159609Sgshapiro TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist); 1174159609Sgshapiro } else if (sip->si_thread != selector) { 1175159609Sgshapiro sip->si_flags |= SI_COLL; 1176159609Sgshapiro } 1177159609Sgshapiro 1178159609Sgshapiro mtx_unlock(&sellock); 1179159609Sgshapiro} 1180159609Sgshapiro 1181159609Sgshapiro/* 1182159609Sgshapiro * Do a wakeup when a selectable event occurs. 1183159609Sgshapiro */ 1184159609Sgshapirovoid 1185159609Sgshapiroselwakeup(sip) 1186159609Sgshapiro struct selinfo *sip; 1187159609Sgshapiro{ 1188159609Sgshapiro struct thread *td; 1189159609Sgshapiro 1190159609Sgshapiro mtx_lock(&sellock); 1191159609Sgshapiro td = sip->si_thread; 1192159609Sgshapiro if ((sip->si_flags & SI_COLL) != 0) { 1193159609Sgshapiro nselcoll++; 1194159609Sgshapiro sip->si_flags &= ~SI_COLL; 1195159609Sgshapiro cv_broadcast(&selwait); 1196159609Sgshapiro } 1197159609Sgshapiro if (td == NULL) { 1198159609Sgshapiro mtx_unlock(&sellock); 1199159609Sgshapiro return; 1200159609Sgshapiro } 1201159609Sgshapiro TAILQ_REMOVE(&td->td_selq, sip, si_thrlist); 1202159609Sgshapiro sip->si_thread = NULL; 1203159609Sgshapiro mtx_lock_spin(&sched_lock); 1204159609Sgshapiro if (td->td_wchan == &selwait) { 1205159609Sgshapiro cv_waitq_remove(td); 1206159609Sgshapiro TD_CLR_SLEEPING(td); 1207159609Sgshapiro setrunnable(td); 1208159609Sgshapiro } else 1209159609Sgshapiro td->td_flags &= ~TDF_SELECT; 1210159609Sgshapiro mtx_unlock_spin(&sched_lock); 1211159609Sgshapiro mtx_unlock(&sellock); 1212159609Sgshapiro} 1213159609Sgshapiro 1214159609Sgshapirostatic void selectinit(void *); 1215159609SgshapiroSYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL) 1216159609Sgshapiro 1217159609Sgshapiro/* ARGSUSED*/ 1218159609Sgshapirostatic void 1219159609Sgshapiroselectinit(dummy) 1220159609Sgshapiro void *dummy; 1221159609Sgshapiro{ 1222159609Sgshapiro cv_init(&selwait, "select"); 1223159609Sgshapiro mtx_init(&sellock, "sellck", NULL, MTX_DEF); 1224111823Sgshapiro} 1225111823Sgshapiro