sys_generic.c revision 89306
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1982, 1986, 1989, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * (c) UNIX System Laboratories, Inc. 51541Srgrimes * All or some portions of this file are derived from material licensed 61541Srgrimes * to the University of California by American Telephone and Telegraph 71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 81541Srgrimes * the permission of UNIX System Laboratories, Inc. 91541Srgrimes * 101541Srgrimes * Redistribution and use in source and binary forms, with or without 111541Srgrimes * modification, are permitted provided that the following conditions 121541Srgrimes * are met: 131541Srgrimes * 1. Redistributions of source code must retain the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer. 151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer in the 171541Srgrimes * documentation and/or other materials provided with the distribution. 181541Srgrimes * 3. All advertising materials mentioning features or use of this software 191541Srgrimes * must display the following acknowledgement: 201541Srgrimes * This product includes software developed by the University of 211541Srgrimes * California, Berkeley and its contributors. 221541Srgrimes * 4. Neither the name of the University nor the names of its contributors 231541Srgrimes * may be used to endorse or promote products derived from this software 241541Srgrimes * without specific prior written permission. 251541Srgrimes * 261541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 271541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 281541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 291541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 301541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 311541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 321541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 331541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 341541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 351541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 361541Srgrimes * SUCH DAMAGE. 371541Srgrimes * 381541Srgrimes * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 3950477Speter * $FreeBSD: head/sys/kern/sys_generic.c 89306 2002-01-13 11:58:06Z alfred $ 401541Srgrimes */ 411541Srgrimes 4213203Swollman#include "opt_ktrace.h" 4313203Swollman 441541Srgrimes#include <sys/param.h> 451541Srgrimes#include <sys/systm.h> 4612221Sbde#include <sys/sysproto.h> 471541Srgrimes#include <sys/filedesc.h> 4824206Sbde#include <sys/filio.h> 4924131Sbde#include <sys/fcntl.h> 501541Srgrimes#include <sys/file.h> 511541Srgrimes#include <sys/proc.h> 523308Sphk#include <sys/signalvar.h> 531541Srgrimes#include <sys/socketvar.h> 541541Srgrimes#include <sys/uio.h> 551541Srgrimes#include <sys/kernel.h> 561541Srgrimes#include <sys/malloc.h> 5729351Speter#include <sys/poll.h> 5872146Speter#include <sys/resourcevar.h> 5970834Swollman#include <sys/selinfo.h> 6055478Speter#include <sys/sysctl.h> 6129351Speter#include <sys/sysent.h> 6268883Sdillon#include <sys/bio.h> 6368883Sdillon#include <sys/buf.h> 6476564Stanimura#include <sys/condvar.h> 651541Srgrimes#ifdef KTRACE 661541Srgrimes#include <sys/ktrace.h> 671541Srgrimes#endif 6868883Sdillon#include <vm/vm.h> 6968883Sdillon#include <vm/vm_page.h> 701541Srgrimes 7138517Sdfr#include <machine/limits.h> 7238517Sdfr 7330354Sphkstatic MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 7430354Sphkstatic MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 7530354SphkMALLOC_DEFINE(M_IOV, "iov", "large iov's"); 7630309Sphk 7783366Sjulianstatic int pollscan __P((struct thread *, struct pollfd *, u_int)); 7883366Sjulianstatic int pollholddrop __P((struct thread *, struct pollfd *, u_int, int)); 7983366Sjulianstatic int selscan __P((struct thread *, fd_mask **, fd_mask **, int)); 8083366Sjulianstatic int selholddrop __P((struct thread *, fd_mask *, fd_mask *, int, int)); 8183366Sjulianstatic int dofileread __P((struct thread *, struct file *, int, void *, 8245311Sdt size_t, off_t, int)); 8383366Sjulianstatic int dofilewrite __P((struct thread *, struct file *, int, 8445311Sdt const void *, size_t, off_t, int)); 853485Sphk 8689306Salfredstruct file* 8789306Salfredholdfp(fdp, fd, flag) 8889306Salfred struct filedesc* fdp; 8989306Salfred int fd, flag; 9089306Salfred{ 9189306Salfred struct file* fp; 9289306Salfred 9389306Salfred FILEDESC_LOCK(fdp); 9489306Salfred if (((u_int)fd) >= fdp->fd_nfiles || 9589306Salfred (fp = fdp->fd_ofiles[fd]) == NULL) { 9689306Salfred FILEDESC_UNLOCK(fdp); 9789306Salfred return (NULL); 9889306Salfred } 9989306Salfred FILE_LOCK(fp); 10089306Salfred FILEDESC_UNLOCK(fdp); 10189306Salfred if ((fp->f_flag & flag) == 0) { 10289306Salfred FILE_UNLOCK(fp); 10389306Salfred return (NULL); 10489306Salfred } 10589306Salfred fp->f_count++; 10689306Salfred FILE_UNLOCK(fp); 10789306Salfred return (fp); 10889306Salfred} 10989306Salfred 1101541Srgrimes/* 1111541Srgrimes * Read system call. 1121541Srgrimes */ 11312221Sbde#ifndef _SYS_SYSPROTO_H_ 1141541Srgrimesstruct read_args { 1151541Srgrimes int fd; 11638864Sbde void *buf; 11738864Sbde size_t nbyte; 1181541Srgrimes}; 11912221Sbde#endif 12082752Sdillon/* 12182752Sdillon * MPSAFE 12282752Sdillon */ 1231549Srgrimesint 12483366Sjulianread(td, uap) 12583366Sjulian struct thread *td; 12686341Sdillon struct read_args *uap; 1271541Srgrimes{ 12886341Sdillon struct file *fp; 12968883Sdillon int error; 1301541Srgrimes 13182752Sdillon mtx_lock(&Giant); 13286341Sdillon if ((error = fget_read(td, uap->fd, &fp)) == 0) { 13383366Sjulian error = dofileread(td, fp, uap->fd, uap->buf, 13482752Sdillon uap->nbyte, (off_t)-1, 0); 13583366Sjulian fdrop(fp, td); 13682752Sdillon } 13782752Sdillon mtx_unlock(&Giant); 13868883Sdillon return(error); 1391541Srgrimes} 1401541Srgrimes 1411541Srgrimes/* 14245311Sdt * Pread system call 14345065Salc */ 14445065Salc#ifndef _SYS_SYSPROTO_H_ 14545065Salcstruct pread_args { 14645065Salc int fd; 14745065Salc void *buf; 14845065Salc size_t nbyte; 14945311Sdt int pad; 15045311Sdt off_t offset; 15145065Salc}; 15245065Salc#endif 15382752Sdillon/* 15482752Sdillon * MPSAFE 15582752Sdillon */ 15645065Salcint 15783366Sjulianpread(td, uap) 15883366Sjulian struct thread *td; 15986341Sdillon struct pread_args *uap; 16045065Salc{ 16186341Sdillon struct file *fp; 16268883Sdillon int error; 16345311Sdt 16489306Salfred fp = holdfp(td->td_proc->p_fd, uap->fd, FREAD); 16589306Salfred if (fp == NULL) 16689306Salfred return (EBADF); 16789306Salfred if (fp->f_type != DTYPE_VNODE) { 16889306Salfred error = ESPIPE; 16989306Salfred } else { 17089306Salfred mtx_lock(&Giant); 17189306Salfred error = dofileread(td, fp, uap->fd, uap->buf, uap->nbyte, 17289306Salfred uap->offset, FOF_OFFSET); 17389306Salfred mtx_unlock(&Giant); 17468883Sdillon } 17589306Salfred fdrop(fp, td); 17668883Sdillon return(error); 17745311Sdt} 17845311Sdt 17945311Sdt/* 18045311Sdt * Code common for read and pread 18145311Sdt */ 18245311Sdtint 18383366Sjuliandofileread(td, fp, fd, buf, nbyte, offset, flags) 18483366Sjulian struct thread *td; 18545311Sdt struct file *fp; 18645311Sdt int fd, flags; 18745311Sdt void *buf; 18845311Sdt size_t nbyte; 18945311Sdt off_t offset; 19045311Sdt{ 19145065Salc struct uio auio; 19245065Salc struct iovec aiov; 19345065Salc long cnt, error = 0; 19445065Salc#ifdef KTRACE 19545065Salc struct iovec ktriov; 19662378Sgreen struct uio ktruio; 19763905Sgreen int didktr = 0; 19845065Salc#endif 19945065Salc 20045311Sdt aiov.iov_base = (caddr_t)buf; 20145311Sdt aiov.iov_len = nbyte; 20245065Salc auio.uio_iov = &aiov; 20345065Salc auio.uio_iovcnt = 1; 20445311Sdt auio.uio_offset = offset; 20545311Sdt if (nbyte > INT_MAX) 20645065Salc return (EINVAL); 20745311Sdt auio.uio_resid = nbyte; 20845065Salc auio.uio_rw = UIO_READ; 20945065Salc auio.uio_segflg = UIO_USERSPACE; 21083366Sjulian auio.uio_td = td; 21145065Salc#ifdef KTRACE 21245065Salc /* 21345065Salc * if tracing, save a copy of iovec 21445065Salc */ 21583366Sjulian if (KTRPOINT(td->td_proc, KTR_GENIO)) { 21645065Salc ktriov = aiov; 21762378Sgreen ktruio = auio; 21863905Sgreen didktr = 1; 21962378Sgreen } 22045065Salc#endif 22145311Sdt cnt = nbyte; 22268883Sdillon 22383366Sjulian if ((error = fo_read(fp, &auio, fp->f_cred, flags, td))) { 22445065Salc if (auio.uio_resid != cnt && (error == ERESTART || 22545065Salc error == EINTR || error == EWOULDBLOCK)) 22645065Salc error = 0; 22768883Sdillon } 22845065Salc cnt -= auio.uio_resid; 22945065Salc#ifdef KTRACE 23063905Sgreen if (didktr && error == 0) { 23162378Sgreen ktruio.uio_iov = &ktriov; 23262378Sgreen ktruio.uio_resid = cnt; 23383366Sjulian ktrgenio(td->td_proc->p_tracep, fd, UIO_READ, &ktruio, error); 23462378Sgreen } 23545065Salc#endif 23683366Sjulian td->td_retval[0] = cnt; 23745065Salc return (error); 23845065Salc} 23945065Salc 24045065Salc/* 2411541Srgrimes * Scatter read system call. 2421541Srgrimes */ 24312221Sbde#ifndef _SYS_SYSPROTO_H_ 2441541Srgrimesstruct readv_args { 24512208Sbde int fd; 2461541Srgrimes struct iovec *iovp; 2471541Srgrimes u_int iovcnt; 2481541Srgrimes}; 24912221Sbde#endif 25082752Sdillon/* 25182752Sdillon * MPSAFE 25282752Sdillon */ 2531549Srgrimesint 25483366Sjulianreadv(td, uap) 25583366Sjulian struct thread *td; 25686341Sdillon struct readv_args *uap; 2571541Srgrimes{ 25886341Sdillon struct file *fp; 2591541Srgrimes struct uio auio; 26086341Sdillon struct iovec *iov; 2611541Srgrimes struct iovec *needfree; 2621541Srgrimes struct iovec aiov[UIO_SMALLIOV]; 2631541Srgrimes long i, cnt, error = 0; 2641541Srgrimes u_int iovlen; 2651541Srgrimes#ifdef KTRACE 2661541Srgrimes struct iovec *ktriov = NULL; 26762378Sgreen struct uio ktruio; 2681541Srgrimes#endif 26982752Sdillon mtx_lock(&Giant); 2701541Srgrimes 27186341Sdillon if ((error = fget_read(td, uap->fd, &fp)) != 0) 27282752Sdillon goto done2; 2731541Srgrimes /* note: can't use iovlen until iovcnt is validated */ 2741541Srgrimes iovlen = uap->iovcnt * sizeof (struct iovec); 2751541Srgrimes if (uap->iovcnt > UIO_SMALLIOV) { 27682752Sdillon if (uap->iovcnt > UIO_MAXIOV) { 27782752Sdillon error = EINVAL; 27882752Sdillon goto done2; 27982752Sdillon } 2801541Srgrimes MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 2811541Srgrimes needfree = iov; 2821541Srgrimes } else { 2831541Srgrimes iov = aiov; 2841541Srgrimes needfree = NULL; 2851541Srgrimes } 2861541Srgrimes auio.uio_iov = iov; 2871541Srgrimes auio.uio_iovcnt = uap->iovcnt; 2881541Srgrimes auio.uio_rw = UIO_READ; 2891541Srgrimes auio.uio_segflg = UIO_USERSPACE; 29083366Sjulian auio.uio_td = td; 29126671Sdyson auio.uio_offset = -1; 2923098Sphk if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 2931541Srgrimes goto done; 2941541Srgrimes auio.uio_resid = 0; 2951541Srgrimes for (i = 0; i < uap->iovcnt; i++) { 29638517Sdfr if (iov->iov_len > INT_MAX - auio.uio_resid) { 2971541Srgrimes error = EINVAL; 2981541Srgrimes goto done; 2991541Srgrimes } 30038517Sdfr auio.uio_resid += iov->iov_len; 3011541Srgrimes iov++; 3021541Srgrimes } 3031541Srgrimes#ifdef KTRACE 3041541Srgrimes /* 3051541Srgrimes * if tracing, save a copy of iovec 3061541Srgrimes */ 30783366Sjulian if (KTRPOINT(td->td_proc, KTR_GENIO)) { 3081541Srgrimes MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 3091541Srgrimes bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 31062378Sgreen ktruio = auio; 3111541Srgrimes } 3121541Srgrimes#endif 3131541Srgrimes cnt = auio.uio_resid; 31483366Sjulian if ((error = fo_read(fp, &auio, fp->f_cred, 0, td))) { 3151541Srgrimes if (auio.uio_resid != cnt && (error == ERESTART || 3161541Srgrimes error == EINTR || error == EWOULDBLOCK)) 3171541Srgrimes error = 0; 31868883Sdillon } 3191541Srgrimes cnt -= auio.uio_resid; 3201541Srgrimes#ifdef KTRACE 3211541Srgrimes if (ktriov != NULL) { 32262378Sgreen if (error == 0) { 32362378Sgreen ktruio.uio_iov = ktriov; 32462378Sgreen ktruio.uio_resid = cnt; 32583366Sjulian ktrgenio(td->td_proc->p_tracep, uap->fd, UIO_READ, &ktruio, 32662378Sgreen error); 32762378Sgreen } 3281541Srgrimes FREE(ktriov, M_TEMP); 3291541Srgrimes } 3301541Srgrimes#endif 33183366Sjulian td->td_retval[0] = cnt; 3321541Srgrimesdone: 33383366Sjulian fdrop(fp, td); 3341541Srgrimes if (needfree) 3351541Srgrimes FREE(needfree, M_IOV); 33682752Sdillondone2: 33782752Sdillon mtx_unlock(&Giant); 3381541Srgrimes return (error); 3391541Srgrimes} 3401541Srgrimes 3411541Srgrimes/* 3421541Srgrimes * Write system call 3431541Srgrimes */ 34412221Sbde#ifndef _SYS_SYSPROTO_H_ 3451541Srgrimesstruct write_args { 3461541Srgrimes int fd; 34738864Sbde const void *buf; 34838864Sbde size_t nbyte; 3491541Srgrimes}; 35012221Sbde#endif 35182752Sdillon/* 35282752Sdillon * MPSAFE 35382752Sdillon */ 3541549Srgrimesint 35583366Sjulianwrite(td, uap) 35683366Sjulian struct thread *td; 35786341Sdillon struct write_args *uap; 3581541Srgrimes{ 35986341Sdillon struct file *fp; 36068883Sdillon int error; 3611541Srgrimes 36282752Sdillon mtx_lock(&Giant); 36386341Sdillon if ((error = fget_write(td, uap->fd, &fp)) == 0) { 36483366Sjulian error = dofilewrite(td, fp, uap->fd, uap->buf, uap->nbyte, 36582752Sdillon (off_t)-1, 0); 36683366Sjulian fdrop(fp, td); 36782752Sdillon } else { 36886341Sdillon error = EBADF; /* XXX this can't be right */ 36982752Sdillon } 37082752Sdillon mtx_unlock(&Giant); 37168883Sdillon return(error); 3721541Srgrimes} 3731541Srgrimes 3741541Srgrimes/* 37545311Sdt * Pwrite system call 37645065Salc */ 37745065Salc#ifndef _SYS_SYSPROTO_H_ 37845065Salcstruct pwrite_args { 37945065Salc int fd; 38045065Salc const void *buf; 38145065Salc size_t nbyte; 38245311Sdt int pad; 38345311Sdt off_t offset; 38445065Salc}; 38545065Salc#endif 38682752Sdillon/* 38782752Sdillon * MPSAFE 38882752Sdillon */ 38945065Salcint 39083366Sjulianpwrite(td, uap) 39183366Sjulian struct thread *td; 39286341Sdillon struct pwrite_args *uap; 39345065Salc{ 39486341Sdillon struct file *fp; 39568883Sdillon int error; 39645311Sdt 39782752Sdillon mtx_lock(&Giant); 39886341Sdillon if ((error = fget_write(td, uap->fd, &fp)) == 0) { 39986341Sdillon if (fp->f_type == DTYPE_VNODE) { 40086341Sdillon error = dofilewrite(td, fp, uap->fd, uap->buf, 40186341Sdillon uap->nbyte, uap->offset, FOF_OFFSET); 40286341Sdillon } else { 40386341Sdillon error = ESPIPE; 40486341Sdillon } 40583366Sjulian fdrop(fp, td); 40668883Sdillon } else { 40786341Sdillon error = EBADF; /* this can't be right */ 40868883Sdillon } 40968883Sdillon return(error); 41045311Sdt} 41145311Sdt 41245311Sdtstatic int 41383366Sjuliandofilewrite(td, fp, fd, buf, nbyte, offset, flags) 41483366Sjulian struct thread *td; 41545311Sdt struct file *fp; 41645311Sdt int fd, flags; 41745311Sdt const void *buf; 41845311Sdt size_t nbyte; 41945311Sdt off_t offset; 42045311Sdt{ 42145065Salc struct uio auio; 42245065Salc struct iovec aiov; 42345065Salc long cnt, error = 0; 42445065Salc#ifdef KTRACE 42545065Salc struct iovec ktriov; 42662378Sgreen struct uio ktruio; 42763905Sgreen int didktr = 0; 42845065Salc#endif 42945065Salc 43063974Speter aiov.iov_base = (void *)(uintptr_t)buf; 43145311Sdt aiov.iov_len = nbyte; 43245065Salc auio.uio_iov = &aiov; 43345065Salc auio.uio_iovcnt = 1; 43445311Sdt auio.uio_offset = offset; 43545311Sdt if (nbyte > INT_MAX) 43645065Salc return (EINVAL); 43745311Sdt auio.uio_resid = nbyte; 43845065Salc auio.uio_rw = UIO_WRITE; 43945065Salc auio.uio_segflg = UIO_USERSPACE; 44083366Sjulian auio.uio_td = td; 44145065Salc#ifdef KTRACE 44245065Salc /* 44362378Sgreen * if tracing, save a copy of iovec and uio 44445065Salc */ 44583366Sjulian if (KTRPOINT(td->td_proc, KTR_GENIO)) { 44645065Salc ktriov = aiov; 44762378Sgreen ktruio = auio; 44863905Sgreen didktr = 1; 44962378Sgreen } 45045065Salc#endif 45145311Sdt cnt = nbyte; 45269407Salfred if (fp->f_type == DTYPE_VNODE) 45369407Salfred bwillwrite(); 45483366Sjulian if ((error = fo_write(fp, &auio, fp->f_cred, flags, td))) { 45545065Salc if (auio.uio_resid != cnt && (error == ERESTART || 45645065Salc error == EINTR || error == EWOULDBLOCK)) 45745065Salc error = 0; 45873929Sjhb if (error == EPIPE) { 45983366Sjulian PROC_LOCK(td->td_proc); 46083366Sjulian psignal(td->td_proc, SIGPIPE); 46183366Sjulian PROC_UNLOCK(td->td_proc); 46273929Sjhb } 46345065Salc } 46445065Salc cnt -= auio.uio_resid; 46545065Salc#ifdef KTRACE 46663905Sgreen if (didktr && error == 0) { 46762378Sgreen ktruio.uio_iov = &ktriov; 46862378Sgreen ktruio.uio_resid = cnt; 46983366Sjulian ktrgenio(td->td_proc->p_tracep, fd, UIO_WRITE, &ktruio, error); 47062378Sgreen } 47145065Salc#endif 47283366Sjulian td->td_retval[0] = cnt; 47345065Salc return (error); 47445065Salc} 47545065Salc 47645065Salc/* 4771541Srgrimes * Gather write system call 4781541Srgrimes */ 47912221Sbde#ifndef _SYS_SYSPROTO_H_ 4801541Srgrimesstruct writev_args { 4811541Srgrimes int fd; 4821541Srgrimes struct iovec *iovp; 4831541Srgrimes u_int iovcnt; 4841541Srgrimes}; 48512221Sbde#endif 48682752Sdillon/* 48782752Sdillon * MPSAFE 48882752Sdillon */ 4891549Srgrimesint 49083366Sjulianwritev(td, uap) 49183366Sjulian struct thread *td; 4921541Srgrimes register struct writev_args *uap; 4931541Srgrimes{ 49486341Sdillon struct file *fp; 4951541Srgrimes struct uio auio; 4961541Srgrimes register struct iovec *iov; 4971541Srgrimes struct iovec *needfree; 4981541Srgrimes struct iovec aiov[UIO_SMALLIOV]; 4991541Srgrimes long i, cnt, error = 0; 5001541Srgrimes u_int iovlen; 5011541Srgrimes#ifdef KTRACE 5021541Srgrimes struct iovec *ktriov = NULL; 50362378Sgreen struct uio ktruio; 5041541Srgrimes#endif 5051541Srgrimes 50682752Sdillon mtx_lock(&Giant); 50786341Sdillon if ((error = fget_write(td, uap->fd, &fp)) != 0) { 50882752Sdillon error = EBADF; 50982752Sdillon goto done2; 51082752Sdillon } 5111541Srgrimes /* note: can't use iovlen until iovcnt is validated */ 5121541Srgrimes iovlen = uap->iovcnt * sizeof (struct iovec); 5131541Srgrimes if (uap->iovcnt > UIO_SMALLIOV) { 51452227Sgreen if (uap->iovcnt > UIO_MAXIOV) { 51552227Sgreen needfree = NULL; 51652227Sgreen error = EINVAL; 51752227Sgreen goto done; 51852227Sgreen } 5191541Srgrimes MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 5201541Srgrimes needfree = iov; 5211541Srgrimes } else { 5221541Srgrimes iov = aiov; 5231541Srgrimes needfree = NULL; 5241541Srgrimes } 5251541Srgrimes auio.uio_iov = iov; 5261541Srgrimes auio.uio_iovcnt = uap->iovcnt; 5271541Srgrimes auio.uio_rw = UIO_WRITE; 5281541Srgrimes auio.uio_segflg = UIO_USERSPACE; 52983366Sjulian auio.uio_td = td; 53026671Sdyson auio.uio_offset = -1; 5313098Sphk if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 5321541Srgrimes goto done; 5331541Srgrimes auio.uio_resid = 0; 5341541Srgrimes for (i = 0; i < uap->iovcnt; i++) { 53538517Sdfr if (iov->iov_len > INT_MAX - auio.uio_resid) { 5361541Srgrimes error = EINVAL; 5371541Srgrimes goto done; 5381541Srgrimes } 53938517Sdfr auio.uio_resid += iov->iov_len; 5401541Srgrimes iov++; 5411541Srgrimes } 5421541Srgrimes#ifdef KTRACE 5431541Srgrimes /* 54462378Sgreen * if tracing, save a copy of iovec and uio 5451541Srgrimes */ 54683366Sjulian if (KTRPOINT(td->td_proc, KTR_GENIO)) { 5471541Srgrimes MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 5481541Srgrimes bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 54962378Sgreen ktruio = auio; 5501541Srgrimes } 5511541Srgrimes#endif 5521541Srgrimes cnt = auio.uio_resid; 55369733Sdillon if (fp->f_type == DTYPE_VNODE) 55469733Sdillon bwillwrite(); 55583366Sjulian if ((error = fo_write(fp, &auio, fp->f_cred, 0, td))) { 5561541Srgrimes if (auio.uio_resid != cnt && (error == ERESTART || 5571541Srgrimes error == EINTR || error == EWOULDBLOCK)) 5581541Srgrimes error = 0; 55973929Sjhb if (error == EPIPE) { 56083366Sjulian PROC_LOCK(td->td_proc); 56183366Sjulian psignal(td->td_proc, SIGPIPE); 56283366Sjulian PROC_UNLOCK(td->td_proc); 56373929Sjhb } 5641541Srgrimes } 5651541Srgrimes cnt -= auio.uio_resid; 5661541Srgrimes#ifdef KTRACE 5671541Srgrimes if (ktriov != NULL) { 56862378Sgreen if (error == 0) { 56962378Sgreen ktruio.uio_iov = ktriov; 57062378Sgreen ktruio.uio_resid = cnt; 57183366Sjulian ktrgenio(td->td_proc->p_tracep, uap->fd, UIO_WRITE, &ktruio, 57262378Sgreen error); 57362378Sgreen } 5741541Srgrimes FREE(ktriov, M_TEMP); 5751541Srgrimes } 5761541Srgrimes#endif 57783366Sjulian td->td_retval[0] = cnt; 5781541Srgrimesdone: 57983366Sjulian fdrop(fp, td); 5801541Srgrimes if (needfree) 5811541Srgrimes FREE(needfree, M_IOV); 58282752Sdillondone2: 58382752Sdillon mtx_unlock(&Giant); 5841541Srgrimes return (error); 5851541Srgrimes} 5861541Srgrimes 5871541Srgrimes/* 5881541Srgrimes * Ioctl system call 5891541Srgrimes */ 59012221Sbde#ifndef _SYS_SYSPROTO_H_ 5911541Srgrimesstruct ioctl_args { 5921541Srgrimes int fd; 59338517Sdfr u_long com; 5941541Srgrimes caddr_t data; 5951541Srgrimes}; 59612221Sbde#endif 59782752Sdillon/* 59882752Sdillon * MPSAFE 59982752Sdillon */ 6001541Srgrimes/* ARGSUSED */ 6011549Srgrimesint 60283366Sjulianioctl(td, uap) 60383366Sjulian struct thread *td; 6041541Srgrimes register struct ioctl_args *uap; 6051541Srgrimes{ 6061541Srgrimes register struct file *fp; 6071541Srgrimes register struct filedesc *fdp; 60836846Sdfr register u_long com; 60982752Sdillon int error = 0; 6101541Srgrimes register u_int size; 6111541Srgrimes caddr_t data, memp; 6121541Srgrimes int tmp; 6131541Srgrimes#define STK_PARAMS 128 61460269Sdillon union { 61560269Sdillon char stkbuf[STK_PARAMS]; 61660269Sdillon long align; 61760269Sdillon } ubuf; 6181541Srgrimes 61989306Salfred fp = ffind_hold(td, uap->fd); 62089306Salfred if (fp == NULL) 62189306Salfred return (EBADF); 62282752Sdillon if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 62389306Salfred fdrop(fp, td); 62489306Salfred return (EBADF); 62582752Sdillon } 62689306Salfred fdp = td->td_proc->p_fd; 6271541Srgrimes switch (com = uap->com) { 6281541Srgrimes case FIONCLEX: 62989306Salfred FILEDESC_LOCK(fdp); 6301541Srgrimes fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 63189306Salfred FILEDESC_UNLOCK(fdp); 63289306Salfred fdrop(fp, td); 63389306Salfred return (0); 6341541Srgrimes case FIOCLEX: 63589306Salfred FILEDESC_LOCK(fdp); 6361541Srgrimes fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 63789306Salfred FILEDESC_UNLOCK(fdp); 63889306Salfred fdrop(fp, td); 63989306Salfred return (0); 6401541Srgrimes } 6411541Srgrimes 6421541Srgrimes /* 6431541Srgrimes * Interpret high order word to find amount of data to be 6441541Srgrimes * copied to/from the user's address space. 6451541Srgrimes */ 6461541Srgrimes size = IOCPARM_LEN(com); 64782752Sdillon if (size > IOCPARM_MAX) { 64889306Salfred fdrop(fp, td); 64989306Salfred return (ENOTTY); 65082752Sdillon } 65168883Sdillon 65289306Salfred mtx_lock(&Giant); 6531541Srgrimes memp = NULL; 65460269Sdillon if (size > sizeof (ubuf.stkbuf)) { 6551541Srgrimes memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 6561541Srgrimes data = memp; 65768883Sdillon } else { 65860269Sdillon data = ubuf.stkbuf; 65968883Sdillon } 6601541Srgrimes if (com&IOC_IN) { 6611541Srgrimes if (size) { 6621541Srgrimes error = copyin(uap->data, data, (u_int)size); 6631541Srgrimes if (error) { 6641541Srgrimes if (memp) 6651541Srgrimes free(memp, M_IOCTLOPS); 66683366Sjulian fdrop(fp, td); 66789306Salfred goto done; 6681541Srgrimes } 66968883Sdillon } else { 6701541Srgrimes *(caddr_t *)data = uap->data; 67168883Sdillon } 67268883Sdillon } else if ((com&IOC_OUT) && size) { 6731541Srgrimes /* 6741541Srgrimes * Zero the buffer so the user always 6751541Srgrimes * gets back something deterministic. 6761541Srgrimes */ 6771541Srgrimes bzero(data, size); 67868883Sdillon } else if (com&IOC_VOID) { 6791541Srgrimes *(caddr_t *)data = uap->data; 68068883Sdillon } 6811541Srgrimes 6821541Srgrimes switch (com) { 6831541Srgrimes 6841541Srgrimes case FIONBIO: 68589306Salfred FILE_LOCK(fp); 6863098Sphk if ((tmp = *(int *)data)) 6871541Srgrimes fp->f_flag |= FNONBLOCK; 6881541Srgrimes else 6891541Srgrimes fp->f_flag &= ~FNONBLOCK; 69089306Salfred FILE_UNLOCK(fp); 69183366Sjulian error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td); 6921541Srgrimes break; 6931541Srgrimes 6941541Srgrimes case FIOASYNC: 69589306Salfred FILE_LOCK(fp); 6963098Sphk if ((tmp = *(int *)data)) 6971541Srgrimes fp->f_flag |= FASYNC; 6981541Srgrimes else 6991541Srgrimes fp->f_flag &= ~FASYNC; 70089306Salfred FILE_UNLOCK(fp); 70183366Sjulian error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td); 7021541Srgrimes break; 7031541Srgrimes 7041541Srgrimes default: 70583366Sjulian error = fo_ioctl(fp, com, data, td); 7061541Srgrimes /* 7071541Srgrimes * Copy any data to user, size was 7081541Srgrimes * already set and checked above. 7091541Srgrimes */ 7101541Srgrimes if (error == 0 && (com&IOC_OUT) && size) 7111541Srgrimes error = copyout(data, uap->data, (u_int)size); 7121541Srgrimes break; 7131541Srgrimes } 7141541Srgrimes if (memp) 7151541Srgrimes free(memp, M_IOCTLOPS); 71683366Sjulian fdrop(fp, td); 71789306Salfreddone: 71882752Sdillon mtx_unlock(&Giant); 7191541Srgrimes return (error); 7201541Srgrimes} 7211541Srgrimes 72255478Speterstatic int nselcoll; /* Select collisions since boot */ 72376564Stanimurastruct cv selwait; 72455478SpeterSYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 7251541Srgrimes 7261541Srgrimes/* 7271541Srgrimes * Select system call. 7281541Srgrimes */ 72912221Sbde#ifndef _SYS_SYSPROTO_H_ 7301541Srgrimesstruct select_args { 73117702Ssmpatel int nd; 7321541Srgrimes fd_set *in, *ou, *ex; 7331541Srgrimes struct timeval *tv; 7341541Srgrimes}; 73512221Sbde#endif 73682752Sdillon/* 73782752Sdillon * MPSAFE 73882752Sdillon */ 7391549Srgrimesint 74083366Sjulianselect(td, uap) 74183366Sjulian register struct thread *td; 7421541Srgrimes register struct select_args *uap; 7431541Srgrimes{ 74489306Salfred struct filedesc *fdp; 74522945Sbde /* 74622945Sbde * The magic 2048 here is chosen to be just enough for FD_SETSIZE 74722945Sbde * infds with the new FD_SETSIZE of 1024, and more than enough for 74822945Sbde * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 74922945Sbde * of 256. 75022945Sbde */ 75122945Sbde fd_mask s_selbits[howmany(2048, NFDBITS)]; 75276564Stanimura fd_mask s_heldbits[howmany(2048, NFDBITS)]; 75376564Stanimura fd_mask *ibits[3], *obits[3], *selbits, *sbp, *heldbits, *hibits, *hobits; 75435029Sphk struct timeval atv, rtv, ttv; 75576564Stanimura int ncoll, error, timo, i; 75622945Sbde u_int nbufbytes, ncpbytes, nfdbits; 7571541Srgrimes 75817702Ssmpatel if (uap->nd < 0) 75917713Ssmpatel return (EINVAL); 76089306Salfred fdp = td->td_proc->p_fd; 76182752Sdillon mtx_lock(&Giant); 76289306Salfred FILEDESC_LOCK(fdp); 76382752Sdillon 76483366Sjulian if (uap->nd > td->td_proc->p_fd->fd_nfiles) 76583366Sjulian uap->nd = td->td_proc->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 76689306Salfred FILEDESC_UNLOCK(fdp); 76717702Ssmpatel 76822945Sbde /* 76922945Sbde * Allocate just enough bits for the non-null fd_sets. Use the 77022945Sbde * preallocated auto buffer if possible. 77122945Sbde */ 77222945Sbde nfdbits = roundup(uap->nd, NFDBITS); 77322945Sbde ncpbytes = nfdbits / NBBY; 77422945Sbde nbufbytes = 0; 77522945Sbde if (uap->in != NULL) 77622945Sbde nbufbytes += 2 * ncpbytes; 77722945Sbde if (uap->ou != NULL) 77822945Sbde nbufbytes += 2 * ncpbytes; 77922945Sbde if (uap->ex != NULL) 78022945Sbde nbufbytes += 2 * ncpbytes; 78122945Sbde if (nbufbytes <= sizeof s_selbits) 78222945Sbde selbits = &s_selbits[0]; 78322945Sbde else 78422945Sbde selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 78576564Stanimura if (2 * ncpbytes <= sizeof s_heldbits) { 78676564Stanimura bzero(s_heldbits, sizeof(s_heldbits)); 78776564Stanimura heldbits = &s_heldbits[0]; 78876564Stanimura } else 78976564Stanimura heldbits = malloc(2 * ncpbytes, M_SELECT, M_WAITOK | M_ZERO); 79017702Ssmpatel 79117702Ssmpatel /* 79222945Sbde * Assign pointers into the bit buffers and fetch the input bits. 79322945Sbde * Put the output buffers together so that they can be bzeroed 79422945Sbde * together. 79517702Ssmpatel */ 79622945Sbde sbp = selbits; 79776564Stanimura hibits = heldbits + ncpbytes / sizeof *heldbits; 79876564Stanimura hobits = heldbits; 7991541Srgrimes#define getbits(name, x) \ 80022945Sbde do { \ 80122945Sbde if (uap->name == NULL) \ 80222945Sbde ibits[x] = NULL; \ 80322945Sbde else { \ 80422945Sbde ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 80522945Sbde obits[x] = sbp; \ 80622945Sbde sbp += ncpbytes / sizeof *sbp; \ 80722945Sbde error = copyin(uap->name, ibits[x], ncpbytes); \ 80876564Stanimura if (error != 0) \ 80976564Stanimura goto done_noproclock; \ 81076564Stanimura for (i = 0; \ 81176564Stanimura i < ncpbytes / sizeof ibits[i][0]; \ 81276564Stanimura i++) \ 81376564Stanimura hibits[i] |= ibits[x][i]; \ 81422945Sbde } \ 81522945Sbde } while (0) 8161541Srgrimes getbits(in, 0); 8171541Srgrimes getbits(ou, 1); 8181541Srgrimes getbits(ex, 2); 8191541Srgrimes#undef getbits 82022945Sbde if (nbufbytes != 0) 82122945Sbde bzero(selbits, nbufbytes / 2); 8221541Srgrimes 8231541Srgrimes if (uap->tv) { 8241541Srgrimes error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 8251541Srgrimes sizeof (atv)); 82676564Stanimura if (error) 82776564Stanimura goto done_noproclock; 8281541Srgrimes if (itimerfix(&atv)) { 8291541Srgrimes error = EINVAL; 83076564Stanimura goto done_noproclock; 8311541Srgrimes } 83236119Sphk getmicrouptime(&rtv); 83335029Sphk timevaladd(&atv, &rtv); 83463057Sjhb } else { 83535029Sphk atv.tv_sec = 0; 83663057Sjhb atv.tv_usec = 0; 83763057Sjhb } 83883366Sjulian selholddrop(td, hibits, hobits, uap->nd, 1); 83935029Sphk timo = 0; 84083366Sjulian PROC_LOCK(td->td_proc); 8411541Srgrimesretry: 8421541Srgrimes ncoll = nselcoll; 84383799Sjhb mtx_lock_spin(&sched_lock); 84483366Sjulian td->td_flags |= TDF_SELECT; 84583799Sjhb mtx_unlock_spin(&sched_lock); 84683366Sjulian PROC_UNLOCK(td->td_proc); 84783366Sjulian error = selscan(td, ibits, obits, uap->nd); 84883366Sjulian PROC_LOCK(td->td_proc); 84983366Sjulian if (error || td->td_retval[0]) 8501541Srgrimes goto done; 85163049Sjhb if (atv.tv_sec || atv.tv_usec) { 85236119Sphk getmicrouptime(&rtv); 85376618Stanimura if (timevalcmp(&rtv, &atv, >=)) { 85476618Stanimura /* 85576618Stanimura * An event of our interest may occur during locking a process. 85676618Stanimura * In order to avoid missing the event that occured during locking 85783366Sjulian * the process, test TDF_SELECT and rescan file descriptors if 85876618Stanimura * necessary. 85976618Stanimura */ 86083799Sjhb mtx_lock_spin(&sched_lock); 86183366Sjulian if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { 86276618Stanimura ncoll = nselcoll; 86383366Sjulian td->td_flags |= TDF_SELECT; 86483799Sjhb mtx_unlock_spin(&sched_lock); 86583366Sjulian PROC_UNLOCK(td->td_proc); 86683366Sjulian error = selscan(td, ibits, obits, uap->nd); 86783366Sjulian PROC_LOCK(td->td_proc); 86883799Sjhb } else 86983799Sjhb mtx_unlock_spin(&sched_lock); 87035029Sphk goto done; 87176618Stanimura } 87235029Sphk ttv = atv; 87335029Sphk timevalsub(&ttv, &rtv); 87435029Sphk timo = ttv.tv_sec > 24 * 60 * 60 ? 87535029Sphk 24 * 60 * 60 * hz : tvtohz(&ttv); 87635029Sphk } 87783799Sjhb mtx_lock_spin(&sched_lock); 87883366Sjulian td->td_flags &= ~TDF_SELECT; 87983799Sjhb mtx_unlock_spin(&sched_lock); 88055943Sjasone 88176564Stanimura if (timo > 0) 88283366Sjulian error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo); 88376564Stanimura else 88483366Sjulian error = cv_wait_sig(&selwait, &td->td_proc->p_mtx); 88555943Sjasone 8861541Srgrimes if (error == 0) 8871541Srgrimes goto retry; 88876564Stanimura 8891541Srgrimesdone: 89083799Sjhb mtx_lock_spin(&sched_lock); 89183366Sjulian td->td_flags &= ~TDF_SELECT; 89283799Sjhb mtx_unlock_spin(&sched_lock); 89383366Sjulian PROC_UNLOCK(td->td_proc); 89483366Sjulian selholddrop(td, hibits, hobits, uap->nd, 0); 89576564Stanimuradone_noproclock: 8961541Srgrimes /* select is not restarted after signals... */ 8971541Srgrimes if (error == ERESTART) 8981541Srgrimes error = EINTR; 8991541Srgrimes if (error == EWOULDBLOCK) 9001541Srgrimes error = 0; 9011541Srgrimes#define putbits(name, x) \ 90222945Sbde if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 9031541Srgrimes error = error2; 9041541Srgrimes if (error == 0) { 9051541Srgrimes int error2; 9061541Srgrimes 9071541Srgrimes putbits(in, 0); 9081541Srgrimes putbits(ou, 1); 9091541Srgrimes putbits(ex, 2); 9101541Srgrimes#undef putbits 9111541Srgrimes } 91222945Sbde if (selbits != &s_selbits[0]) 91322945Sbde free(selbits, M_SELECT); 91476564Stanimura if (heldbits != &s_heldbits[0]) 91576564Stanimura free(heldbits, M_SELECT); 91682752Sdillon 91782752Sdillon mtx_unlock(&Giant); 9181541Srgrimes return (error); 9191541Srgrimes} 9201541Srgrimes 92189306Salfred/* 92289306Salfred * Used to hold then release a group of fds for select(2). 92389306Salfred * Hold (hold == 1) or release (hold == 0) a group of filedescriptors. 92489306Salfred * if holding then use ibits setting the bits in obits, otherwise use obits. 92589306Salfred */ 92612819Sphkstatic int 92783366Sjulianselholddrop(td, ibits, obits, nfd, hold) 92883366Sjulian struct thread *td; 92976564Stanimura fd_mask *ibits, *obits; 93076564Stanimura int nfd, hold; 93176564Stanimura{ 93283366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 93376564Stanimura int i, fd; 93476564Stanimura fd_mask bits; 93576564Stanimura struct file *fp; 93676564Stanimura 93789306Salfred FILEDESC_LOCK(fdp); 93876564Stanimura for (i = 0; i < nfd; i += NFDBITS) { 93976564Stanimura if (hold) 94076564Stanimura bits = ibits[i/NFDBITS]; 94176564Stanimura else 94276564Stanimura bits = obits[i/NFDBITS]; 94376564Stanimura /* ffs(int mask) not portable, fd_mask is long */ 94476564Stanimura for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 94576564Stanimura if (!(bits & 1)) 94676564Stanimura continue; 94776564Stanimura fp = fdp->fd_ofiles[fd]; 94889306Salfred if (fp == NULL) { 94989306Salfred FILEDESC_UNLOCK(fdp); 95076564Stanimura return (EBADF); 95189306Salfred } 95276564Stanimura if (hold) { 95376564Stanimura fhold(fp); 95476564Stanimura obits[(fd)/NFDBITS] |= 95576564Stanimura ((fd_mask)1 << ((fd) % NFDBITS)); 95689306Salfred } else { 95789306Salfred /* XXX: optimize by making a special 95889306Salfred * version of fdrop that only unlocks 95989306Salfred * the filedesc if needed? This would 96089306Salfred * redcuce the number of lock/unlock 96189306Salfred * pairs by quite a bit. 96289306Salfred */ 96389306Salfred FILEDESC_UNLOCK(fdp); 96483366Sjulian fdrop(fp, td); 96589306Salfred FILEDESC_LOCK(fdp); 96689306Salfred } 96776564Stanimura } 96876564Stanimura } 96989306Salfred FILEDESC_UNLOCK(fdp); 97076564Stanimura return (0); 97176564Stanimura} 97276564Stanimura 97376564Stanimurastatic int 97483366Sjulianselscan(td, ibits, obits, nfd) 97583366Sjulian struct thread *td; 97617702Ssmpatel fd_mask **ibits, **obits; 97730994Sphk int nfd; 9781541Srgrimes{ 97957357Speter int msk, i, fd; 98057357Speter fd_mask bits; 9811541Srgrimes struct file *fp; 9821541Srgrimes int n = 0; 98331364Sbde /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 98431364Sbde static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 9851541Srgrimes 9861541Srgrimes for (msk = 0; msk < 3; msk++) { 98722945Sbde if (ibits[msk] == NULL) 98822945Sbde continue; 9891541Srgrimes for (i = 0; i < nfd; i += NFDBITS) { 99017702Ssmpatel bits = ibits[msk][i/NFDBITS]; 99157357Speter /* ffs(int mask) not portable, fd_mask is long */ 99257357Speter for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 99357357Speter if (!(bits & 1)) 99457357Speter continue; 99589306Salfred fp = ffind_hold(td, fd); 9961541Srgrimes if (fp == NULL) 9971541Srgrimes return (EBADF); 99883366Sjulian if (fo_poll(fp, flag[msk], fp->f_cred, td)) { 99922945Sbde obits[msk][(fd)/NFDBITS] |= 100057357Speter ((fd_mask)1 << ((fd) % NFDBITS)); 10011541Srgrimes n++; 10021541Srgrimes } 100389306Salfred fdrop(fp, td); 10041541Srgrimes } 10051541Srgrimes } 10061541Srgrimes } 100783366Sjulian td->td_retval[0] = n; 10081541Srgrimes return (0); 10091541Srgrimes} 10101541Srgrimes 101129351Speter/* 101229351Speter * Poll system call. 101329351Speter */ 101429351Speter#ifndef _SYS_SYSPROTO_H_ 101529351Speterstruct poll_args { 101629351Speter struct pollfd *fds; 101729351Speter u_int nfds; 101829351Speter int timeout; 101929351Speter}; 102029351Speter#endif 102182752Sdillon/* 102282752Sdillon * MPSAFE 102382752Sdillon */ 102429351Speterint 102583366Sjulianpoll(td, uap) 102683366Sjulian struct thread *td; 102773159Sjlemon struct poll_args *uap; 102829351Speter{ 102929351Speter caddr_t bits; 103029351Speter char smallbits[32 * sizeof(struct pollfd)]; 103135029Sphk struct timeval atv, rtv, ttv; 103276564Stanimura int ncoll, error = 0, timo; 103373159Sjlemon u_int nfds; 103429351Speter size_t ni; 103576564Stanimura struct pollfd p_heldbits[32]; 103676564Stanimura struct pollfd *heldbits; 103729351Speter 103872146Speter nfds = SCARG(uap, nfds); 103982752Sdillon 104082752Sdillon mtx_lock(&Giant); 104172146Speter /* 104272203Speter * This is kinda bogus. We have fd limits, but that is not 104372203Speter * really related to the size of the pollfd array. Make sure 104472203Speter * we let the process use at least FD_SETSIZE entries and at 104572203Speter * least enough for the current limits. We want to be reasonably 104672203Speter * safe, but not overly restrictive. 104772146Speter */ 104883366Sjulian if ((nfds > td->td_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur) && 104983366Sjulian (nfds > FD_SETSIZE)) { 105082752Sdillon error = EINVAL; 105182752Sdillon goto done2; 105282752Sdillon } 105372146Speter ni = nfds * sizeof(struct pollfd); 105429351Speter if (ni > sizeof(smallbits)) 105529351Speter bits = malloc(ni, M_TEMP, M_WAITOK); 105629351Speter else 105729351Speter bits = smallbits; 105876564Stanimura if (ni > sizeof(p_heldbits)) 105976564Stanimura heldbits = malloc(ni, M_TEMP, M_WAITOK); 106076564Stanimura else { 106176564Stanimura bzero(p_heldbits, sizeof(p_heldbits)); 106276564Stanimura heldbits = p_heldbits; 106376564Stanimura } 106429351Speter error = copyin(SCARG(uap, fds), bits, ni); 106529351Speter if (error) 106676564Stanimura goto done_noproclock; 106776564Stanimura bcopy(bits, heldbits, ni); 106829351Speter if (SCARG(uap, timeout) != INFTIM) { 106929351Speter atv.tv_sec = SCARG(uap, timeout) / 1000; 107029351Speter atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 107129351Speter if (itimerfix(&atv)) { 107229351Speter error = EINVAL; 107376564Stanimura goto done_noproclock; 107429351Speter } 107536119Sphk getmicrouptime(&rtv); 107635029Sphk timevaladd(&atv, &rtv); 107763057Sjhb } else { 107835029Sphk atv.tv_sec = 0; 107963057Sjhb atv.tv_usec = 0; 108063057Sjhb } 108183366Sjulian pollholddrop(td, heldbits, nfds, 1); 108235029Sphk timo = 0; 108383366Sjulian PROC_LOCK(td->td_proc); 108429351Speterretry: 108529351Speter ncoll = nselcoll; 108683799Sjhb mtx_lock_spin(&sched_lock); 108783366Sjulian td->td_flags |= TDF_SELECT; 108883799Sjhb mtx_unlock_spin(&sched_lock); 108983366Sjulian PROC_UNLOCK(td->td_proc); 109083366Sjulian error = pollscan(td, (struct pollfd *)bits, nfds); 109183366Sjulian PROC_LOCK(td->td_proc); 109283366Sjulian if (error || td->td_retval[0]) 109329351Speter goto done; 109463049Sjhb if (atv.tv_sec || atv.tv_usec) { 109536119Sphk getmicrouptime(&rtv); 109676618Stanimura if (timevalcmp(&rtv, &atv, >=)) { 109776618Stanimura /* 109876618Stanimura * An event of our interest may occur during locking a process. 109976618Stanimura * In order to avoid missing the event that occured during locking 110083366Sjulian * the process, test TDF_SELECT and rescan file descriptors if 110176618Stanimura * necessary. 110276618Stanimura */ 110383799Sjhb mtx_lock_spin(&sched_lock); 110483366Sjulian if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { 110576618Stanimura ncoll = nselcoll; 110683366Sjulian td->td_flags |= TDF_SELECT; 110783799Sjhb mtx_unlock_spin(&sched_lock); 110883366Sjulian PROC_UNLOCK(td->td_proc); 110983366Sjulian error = pollscan(td, (struct pollfd *)bits, nfds); 111083366Sjulian PROC_LOCK(td->td_proc); 111183799Sjhb } else 111283799Sjhb mtx_unlock_spin(&sched_lock); 111335029Sphk goto done; 111476618Stanimura } 111535029Sphk ttv = atv; 111635029Sphk timevalsub(&ttv, &rtv); 111735029Sphk timo = ttv.tv_sec > 24 * 60 * 60 ? 111835029Sphk 24 * 60 * 60 * hz : tvtohz(&ttv); 111929351Speter } 112083799Sjhb mtx_lock_spin(&sched_lock); 112183366Sjulian td->td_flags &= ~TDF_SELECT; 112283799Sjhb mtx_unlock_spin(&sched_lock); 112376564Stanimura if (timo > 0) 112483366Sjulian error = cv_timedwait_sig(&selwait, &td->td_proc->p_mtx, timo); 112576564Stanimura else 112683366Sjulian error = cv_wait_sig(&selwait, &td->td_proc->p_mtx); 112729351Speter if (error == 0) 112829351Speter goto retry; 112976564Stanimura 113029351Speterdone: 113183799Sjhb mtx_lock_spin(&sched_lock); 113283366Sjulian td->td_flags &= ~TDF_SELECT; 113383799Sjhb mtx_unlock_spin(&sched_lock); 113483366Sjulian PROC_UNLOCK(td->td_proc); 113583366Sjulian pollholddrop(td, heldbits, nfds, 0); 113676564Stanimuradone_noproclock: 113729351Speter /* poll is not restarted after signals... */ 113829351Speter if (error == ERESTART) 113929351Speter error = EINTR; 114029351Speter if (error == EWOULDBLOCK) 114129351Speter error = 0; 114229351Speter if (error == 0) { 114329351Speter error = copyout(bits, SCARG(uap, fds), ni); 114429351Speter if (error) 114529351Speter goto out; 114629351Speter } 114729351Speterout: 114829351Speter if (ni > sizeof(smallbits)) 114929351Speter free(bits, M_TEMP); 115076564Stanimura if (ni > sizeof(p_heldbits)) 115176564Stanimura free(heldbits, M_TEMP); 115282752Sdillondone2: 115382752Sdillon mtx_unlock(&Giant); 115429351Speter return (error); 115529351Speter} 115629351Speter 115729351Speterstatic int 115883366Sjulianpollholddrop(td, fds, nfd, hold) 115983366Sjulian struct thread *td; 116076564Stanimura struct pollfd *fds; 116176564Stanimura u_int nfd; 116276564Stanimura int hold; 116376564Stanimura{ 116483366Sjulian register struct filedesc *fdp = td->td_proc->p_fd; 116576564Stanimura int i; 116676564Stanimura struct file *fp; 116776564Stanimura 116889306Salfred FILEDESC_LOCK(fdp); 116976564Stanimura for (i = 0; i < nfd; i++, fds++) { 117076564Stanimura if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) { 117176564Stanimura fp = fdp->fd_ofiles[fds->fd]; 117276564Stanimura if (hold) { 117376564Stanimura if (fp != NULL) { 117476564Stanimura fhold(fp); 117576564Stanimura fds->revents = 1; 117676564Stanimura } else 117776564Stanimura fds->revents = 0; 117889306Salfred } else if(fp != NULL && fds->revents) { 117989306Salfred FILE_LOCK(fp); 118089306Salfred FILEDESC_UNLOCK(fdp); 118189306Salfred fdrop_locked(fp, td); 118289306Salfred FILEDESC_LOCK(fdp); 118389306Salfred } 118476564Stanimura } 118576564Stanimura } 118689306Salfred FILEDESC_UNLOCK(fdp); 118776564Stanimura return (0); 118876564Stanimura} 118976564Stanimura 119076564Stanimurastatic int 119183366Sjulianpollscan(td, fds, nfd) 119283366Sjulian struct thread *td; 119329351Speter struct pollfd *fds; 119473159Sjlemon u_int nfd; 119529351Speter{ 119683366Sjulian register struct filedesc *fdp = td->td_proc->p_fd; 119729351Speter int i; 119829351Speter struct file *fp; 119929351Speter int n = 0; 120029351Speter 120129351Speter for (i = 0; i < nfd; i++, fds++) { 120289306Salfred FILEDESC_LOCK(fdp); 120341632Sjkh if (fds->fd >= fdp->fd_nfiles) { 120429351Speter fds->revents = POLLNVAL; 120529351Speter n++; 120689306Salfred FILEDESC_UNLOCK(fdp); 120741632Sjkh } else if (fds->fd < 0) { 120841632Sjkh fds->revents = 0; 120989306Salfred FILEDESC_UNLOCK(fdp); 121029351Speter } else { 121129351Speter fp = fdp->fd_ofiles[fds->fd]; 121289306Salfred FILEDESC_UNLOCK(fdp); 121368883Sdillon if (fp == NULL) { 121429351Speter fds->revents = POLLNVAL; 121529351Speter n++; 121629351Speter } else { 121731364Sbde /* 121831364Sbde * Note: backend also returns POLLHUP and 121931364Sbde * POLLERR if appropriate. 122031364Sbde */ 122151418Sgreen fds->revents = fo_poll(fp, fds->events, 122283366Sjulian fp->f_cred, td); 122329351Speter if (fds->revents != 0) 122429351Speter n++; 122529351Speter } 122629351Speter } 122729351Speter } 122883366Sjulian td->td_retval[0] = n; 122929351Speter return (0); 123029351Speter} 123129351Speter 123229351Speter/* 123329351Speter * OpenBSD poll system call. 123429351Speter * XXX this isn't quite a true representation.. OpenBSD uses select ops. 123529351Speter */ 123629351Speter#ifndef _SYS_SYSPROTO_H_ 123729351Speterstruct openbsd_poll_args { 123829351Speter struct pollfd *fds; 123929351Speter u_int nfds; 124029351Speter int timeout; 124129351Speter}; 124229351Speter#endif 124382752Sdillon/* 124482752Sdillon * MPSAFE 124582752Sdillon */ 124629351Speterint 124783366Sjulianopenbsd_poll(td, uap) 124883366Sjulian register struct thread *td; 124929351Speter register struct openbsd_poll_args *uap; 125029351Speter{ 125183366Sjulian return (poll(td, (struct poll_args *)uap)); 125229351Speter} 125329351Speter 12541541Srgrimes/*ARGSUSED*/ 12551549Srgrimesint 125683366Sjulianseltrue(dev, events, td) 12571541Srgrimes dev_t dev; 125829351Speter int events; 125983366Sjulian struct thread *td; 12601541Srgrimes{ 12611541Srgrimes 126229351Speter return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 12631541Srgrimes} 12641541Srgrimes 126583366Sjulianstatic int 126683366Sjulianfind_thread_in_proc(struct proc *p, struct thread *td) 126783366Sjulian{ 126883366Sjulian struct thread *td2; 126983366Sjulian FOREACH_THREAD_IN_PROC(p, td2) { 127083366Sjulian if (td2 == td) { 127183366Sjulian return (1); 127283366Sjulian } 127383366Sjulian } 127483366Sjulian return (0); 127583366Sjulian} 127683366Sjulian 12771541Srgrimes/* 12781541Srgrimes * Record a select request. 12791541Srgrimes */ 12801541Srgrimesvoid 12811541Srgrimesselrecord(selector, sip) 128283366Sjulian struct thread *selector; 12831541Srgrimes struct selinfo *sip; 12841541Srgrimes{ 12851541Srgrimes struct proc *p; 12861541Srgrimes pid_t mypid; 12871541Srgrimes 128883366Sjulian mypid = selector->td_proc->p_pid; 128983366Sjulian if ((sip->si_pid == mypid) && 129083366Sjulian (sip->si_thread == selector)) { /* XXXKSE should be an ID? */ 12911541Srgrimes return; 129283366Sjulian } 129383366Sjulian if (sip->si_pid && 129483366Sjulian (p = pfind(sip->si_pid)) && 129583366Sjulian (find_thread_in_proc(p, sip->si_thread))) { 129672200Sbmilekic mtx_lock_spin(&sched_lock); 129783366Sjulian if (sip->si_thread->td_wchan == (caddr_t)&selwait) { 129872200Sbmilekic mtx_unlock_spin(&sched_lock); 129975893Sjhb PROC_UNLOCK(p); 130071566Sjhb sip->si_flags |= SI_COLL; 130171566Sjhb return; 130271566Sjhb } 130372200Sbmilekic mtx_unlock_spin(&sched_lock); 130475893Sjhb PROC_UNLOCK(p); 130571566Sjhb } 130671566Sjhb sip->si_pid = mypid; 130783366Sjulian sip->si_thread = selector; 13081541Srgrimes} 13091541Srgrimes 13101541Srgrimes/* 13111541Srgrimes * Do a wakeup when a selectable event occurs. 13121541Srgrimes */ 13131541Srgrimesvoid 13141541Srgrimesselwakeup(sip) 13151541Srgrimes register struct selinfo *sip; 13161541Srgrimes{ 131783366Sjulian struct thread *td; 13181541Srgrimes register struct proc *p; 13191541Srgrimes 13201541Srgrimes if (sip->si_pid == 0) 13211541Srgrimes return; 13221541Srgrimes if (sip->si_flags & SI_COLL) { 13231541Srgrimes nselcoll++; 13241541Srgrimes sip->si_flags &= ~SI_COLL; 132576564Stanimura cv_broadcast(&selwait); 13261541Srgrimes } 13271541Srgrimes p = pfind(sip->si_pid); 13281541Srgrimes sip->si_pid = 0; 132983366Sjulian td = sip->si_thread; 13301541Srgrimes if (p != NULL) { 133183366Sjulian if (!find_thread_in_proc(p, td)) { 133283366Sjulian PROC_UNLOCK(p); /* lock is in pfind() */; 133383366Sjulian return; 133483366Sjulian } 133572200Sbmilekic mtx_lock_spin(&sched_lock); 133683366Sjulian if (td->td_wchan == (caddr_t)&selwait) { 133783366Sjulian if (td->td_proc->p_stat == SSLEEP) 133883366Sjulian setrunnable(td); 13391541Srgrimes else 134083366Sjulian cv_waitq_remove(td); 134175893Sjhb } else 134283366Sjulian td->td_flags &= ~TDF_SELECT; 134375893Sjhb mtx_unlock_spin(&sched_lock); 134483366Sjulian PROC_UNLOCK(p); /* Lock is in pfind() */ 13451541Srgrimes } 13461541Srgrimes} 134776564Stanimura 134876564Stanimurastatic void selectinit __P((void *)); 134976564StanimuraSYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL) 135076564Stanimura 135176564Stanimura/* ARGSUSED*/ 135276564Stanimurastatic void 135376564Stanimuraselectinit(dummy) 135476564Stanimura void *dummy; 135576564Stanimura{ 135676564Stanimura cv_init(&selwait, "select"); 135776564Stanimura} 1358