sys_generic.c revision 189450
1139804Simp/*- 21541Srgrimes * Copyright (c) 1982, 1986, 1989, 1993 31541Srgrimes * The Regents of the University of California. All rights reserved. 41541Srgrimes * (c) UNIX System Laboratories, Inc. 51541Srgrimes * All or some portions of this file are derived from material licensed 61541Srgrimes * to the University of California by American Telephone and Telegraph 71541Srgrimes * Co. or Unix System Laboratories, Inc. and are reproduced herein with 81541Srgrimes * the permission of UNIX System Laboratories, Inc. 91541Srgrimes * 101541Srgrimes * Redistribution and use in source and binary forms, with or without 111541Srgrimes * modification, are permitted provided that the following conditions 121541Srgrimes * are met: 131541Srgrimes * 1. Redistributions of source code must retain the above copyright 141541Srgrimes * notice, this list of conditions and the following disclaimer. 151541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 161541Srgrimes * notice, this list of conditions and the following disclaimer in the 171541Srgrimes * documentation and/or other materials provided with the distribution. 181541Srgrimes * 4. Neither the name of the University nor the names of its contributors 191541Srgrimes * may be used to endorse or promote products derived from this software 201541Srgrimes * without specific prior written permission. 211541Srgrimes * 221541Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 231541Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 241541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 251541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 261541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 271541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 281541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 291541Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 301541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 311541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 321541Srgrimes * SUCH DAMAGE. 331541Srgrimes * 341541Srgrimes * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 351541Srgrimes */ 361541Srgrimes 37116182Sobrien#include <sys/cdefs.h> 38116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_generic.c 189450 2009-03-06 15:35:37Z kib $"); 39116182Sobrien 40147676Speter#include "opt_compat.h" 4113203Swollman#include "opt_ktrace.h" 4213203Swollman 431541Srgrimes#include <sys/param.h> 441541Srgrimes#include <sys/systm.h> 4512221Sbde#include <sys/sysproto.h> 461541Srgrimes#include <sys/filedesc.h> 4724206Sbde#include <sys/filio.h> 4824131Sbde#include <sys/fcntl.h> 491541Srgrimes#include <sys/file.h> 501541Srgrimes#include <sys/proc.h> 513308Sphk#include <sys/signalvar.h> 521541Srgrimes#include <sys/socketvar.h> 531541Srgrimes#include <sys/uio.h> 541541Srgrimes#include <sys/kernel.h> 55175140Sjhb#include <sys/ktr.h> 56114216Skan#include <sys/limits.h> 571541Srgrimes#include <sys/malloc.h> 5829351Speter#include <sys/poll.h> 5972146Speter#include <sys/resourcevar.h> 6070834Swollman#include <sys/selinfo.h> 61126326Sjhb#include <sys/sleepqueue.h> 62102779Siedowse#include <sys/syscallsubr.h> 6355478Speter#include <sys/sysctl.h> 6429351Speter#include <sys/sysent.h> 65124736Sache#include <sys/vnode.h> 6668883Sdillon#include <sys/bio.h> 6768883Sdillon#include <sys/buf.h> 6876564Stanimura#include <sys/condvar.h> 691541Srgrimes#ifdef KTRACE 701541Srgrimes#include <sys/ktrace.h> 711541Srgrimes#endif 721541Srgrimes 73175140Sjhb#include <security/audit/audit.h> 74174647Sjeff 7530354Sphkstatic MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 7630354Sphkstatic MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 7730354SphkMALLOC_DEFINE(M_IOV, "iov", "large iov's"); 7830309Sphk 7991972Salfredstatic int pollscan(struct thread *, struct pollfd *, u_int); 80174647Sjeffstatic int pollrescan(struct thread *); 8191972Salfredstatic int selscan(struct thread *, fd_mask **, fd_mask **, int); 82174647Sjeffstatic int selrescan(struct thread *, fd_mask **, fd_mask **); 83174647Sjeffstatic void selfdalloc(struct thread *, void *); 84174647Sjeffstatic void selfdfree(struct seltd *, struct selfd *); 85147813Sjhbstatic int dofileread(struct thread *, int, struct file *, struct uio *, 86147813Sjhb off_t, int); 87147813Sjhbstatic int dofilewrite(struct thread *, int, struct file *, struct uio *, 88147813Sjhb off_t, int); 89122352Stanimurastatic void doselwakeup(struct selinfo *, int); 90174647Sjeffstatic void seltdinit(struct thread *); 91174647Sjeffstatic int seltdwait(struct thread *, int); 92174647Sjeffstatic void seltdclear(struct thread *); 933485Sphk 94174647Sjeff/* 95174647Sjeff * One seltd per-thread allocated on demand as needed. 96174647Sjeff * 97174647Sjeff * t - protected by st_mtx 98174647Sjeff * k - Only accessed by curthread or read-only 99174647Sjeff */ 100174647Sjeffstruct seltd { 101174647Sjeff STAILQ_HEAD(, selfd) st_selq; /* (k) List of selfds. */ 102174647Sjeff struct selfd *st_free1; /* (k) free fd for read set. */ 103174647Sjeff struct selfd *st_free2; /* (k) free fd for write set. */ 104174647Sjeff struct mtx st_mtx; /* Protects struct seltd */ 105174647Sjeff struct cv st_wait; /* (t) Wait channel. */ 106174647Sjeff int st_flags; /* (t) SELTD_ flags. */ 107174647Sjeff}; 108174647Sjeff 109174647Sjeff#define SELTD_PENDING 0x0001 /* We have pending events. */ 110174647Sjeff#define SELTD_RESCAN 0x0002 /* Doing a rescan. */ 111174647Sjeff 112174647Sjeff/* 113174647Sjeff * One selfd allocated per-thread per-file-descriptor. 114174647Sjeff * f - protected by sf_mtx 115174647Sjeff */ 116174647Sjeffstruct selfd { 117174647Sjeff STAILQ_ENTRY(selfd) sf_link; /* (k) fds owned by this td. */ 118174647Sjeff TAILQ_ENTRY(selfd) sf_threads; /* (f) fds on this selinfo. */ 119174647Sjeff struct selinfo *sf_si; /* (f) selinfo when linked. */ 120174647Sjeff struct mtx *sf_mtx; /* Pointer to selinfo mtx. */ 121174647Sjeff struct seltd *sf_td; /* (k) owning seltd. */ 122174647Sjeff void *sf_cookie; /* (k) fd or pollfd. */ 123174647Sjeff}; 124174647Sjeff 125174647Sjeffstatic uma_zone_t selfd_zone; 126174647Sjeff 12712221Sbde#ifndef _SYS_SYSPROTO_H_ 1281541Srgrimesstruct read_args { 1291541Srgrimes int fd; 13038864Sbde void *buf; 13138864Sbde size_t nbyte; 1321541Srgrimes}; 13312221Sbde#endif 1341549Srgrimesint 13583366Sjulianread(td, uap) 13683366Sjulian struct thread *td; 13786341Sdillon struct read_args *uap; 1381541Srgrimes{ 139147813Sjhb struct uio auio; 140147813Sjhb struct iovec aiov; 14168883Sdillon int error; 1421541Srgrimes 143147813Sjhb if (uap->nbyte > INT_MAX) 144147813Sjhb return (EINVAL); 145147813Sjhb aiov.iov_base = uap->buf; 146147813Sjhb aiov.iov_len = uap->nbyte; 147147813Sjhb auio.uio_iov = &aiov; 148147813Sjhb auio.uio_iovcnt = 1; 149147813Sjhb auio.uio_resid = uap->nbyte; 150147813Sjhb auio.uio_segflg = UIO_USERSPACE; 151147813Sjhb error = kern_readv(td, uap->fd, &auio); 15268883Sdillon return(error); 1531541Srgrimes} 1541541Srgrimes 1551541Srgrimes/* 156147813Sjhb * Positioned read system call 15745065Salc */ 15845065Salc#ifndef _SYS_SYSPROTO_H_ 15945065Salcstruct pread_args { 16045065Salc int fd; 16145065Salc void *buf; 16245065Salc size_t nbyte; 16345311Sdt int pad; 16445311Sdt off_t offset; 16545065Salc}; 16645065Salc#endif 16745065Salcint 16883366Sjulianpread(td, uap) 16983366Sjulian struct thread *td; 17086341Sdillon struct pread_args *uap; 17145065Salc{ 17245065Salc struct uio auio; 17345065Salc struct iovec aiov; 174147813Sjhb int error; 17545065Salc 176147813Sjhb if (uap->nbyte > INT_MAX) 177147813Sjhb return (EINVAL); 178147813Sjhb aiov.iov_base = uap->buf; 179147813Sjhb aiov.iov_len = uap->nbyte; 18045065Salc auio.uio_iov = &aiov; 18145065Salc auio.uio_iovcnt = 1; 182147813Sjhb auio.uio_resid = uap->nbyte; 18345065Salc auio.uio_segflg = UIO_USERSPACE; 184147813Sjhb error = kern_preadv(td, uap->fd, &auio, uap->offset); 185147813Sjhb return(error); 18645065Salc} 18745065Salc 188171212Speterint 189171212Speterfreebsd6_pread(td, uap) 190171212Speter struct thread *td; 191171212Speter struct freebsd6_pread_args *uap; 192171212Speter{ 193171212Speter struct pread_args oargs; 194171212Speter 195171212Speter oargs.fd = uap->fd; 196171212Speter oargs.buf = uap->buf; 197171212Speter oargs.nbyte = uap->nbyte; 198171212Speter oargs.offset = uap->offset; 199171212Speter return (pread(td, &oargs)); 200171212Speter} 201171212Speter 20245065Salc/* 2031541Srgrimes * Scatter read system call. 2041541Srgrimes */ 20512221Sbde#ifndef _SYS_SYSPROTO_H_ 2061541Srgrimesstruct readv_args { 20712208Sbde int fd; 2081541Srgrimes struct iovec *iovp; 2091541Srgrimes u_int iovcnt; 2101541Srgrimes}; 21112221Sbde#endif 2121549Srgrimesint 213131897Sphkreadv(struct thread *td, struct readv_args *uap) 2141541Srgrimes{ 215144445Sjhb struct uio *auio; 216144445Sjhb int error; 217144445Sjhb 218144445Sjhb error = copyinuio(uap->iovp, uap->iovcnt, &auio); 219144445Sjhb if (error) 220144445Sjhb return (error); 221144445Sjhb error = kern_readv(td, uap->fd, auio); 222144445Sjhb free(auio, M_IOV); 223144445Sjhb return (error); 224144445Sjhb} 225144445Sjhb 226144445Sjhbint 227144445Sjhbkern_readv(struct thread *td, int fd, struct uio *auio) 228144445Sjhb{ 22986341Sdillon struct file *fp; 23096243Salc int error; 231147813Sjhb 232147813Sjhb error = fget_read(td, fd, &fp); 233147813Sjhb if (error) 234147813Sjhb return (error); 235147813Sjhb error = dofileread(td, fd, fp, auio, (off_t)-1, 0); 236147813Sjhb fdrop(fp, td); 237147813Sjhb return (error); 238147813Sjhb} 239147813Sjhb 240147813Sjhb/* 241147813Sjhb * Scatter positioned read system call. 242147813Sjhb */ 243147813Sjhb#ifndef _SYS_SYSPROTO_H_ 244147813Sjhbstruct preadv_args { 245147813Sjhb int fd; 246147813Sjhb struct iovec *iovp; 247147813Sjhb u_int iovcnt; 248147813Sjhb off_t offset; 249147813Sjhb}; 2501541Srgrimes#endif 251147813Sjhbint 252147813Sjhbpreadv(struct thread *td, struct preadv_args *uap) 253147813Sjhb{ 254147813Sjhb struct uio *auio; 255147813Sjhb int error; 2561541Srgrimes 257147813Sjhb error = copyinuio(uap->iovp, uap->iovcnt, &auio); 258147813Sjhb if (error) 259147813Sjhb return (error); 260147813Sjhb error = kern_preadv(td, uap->fd, auio, uap->offset); 261147813Sjhb free(auio, M_IOV); 262147813Sjhb return (error); 263147813Sjhb} 264147813Sjhb 265147813Sjhbint 266147813Sjhbkern_preadv(td, fd, auio, offset) 267147813Sjhb struct thread *td; 268147813Sjhb int fd; 269147813Sjhb struct uio *auio; 270147813Sjhb off_t offset; 271147813Sjhb{ 272147813Sjhb struct file *fp; 273147813Sjhb int error; 274147813Sjhb 275144445Sjhb error = fget_read(td, fd, &fp); 276131897Sphk if (error) 27796243Salc return (error); 278147813Sjhb if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) 279147813Sjhb error = ESPIPE; 280147813Sjhb else if (offset < 0 && fp->f_vnode->v_type != VCHR) 281147813Sjhb error = EINVAL; 282147813Sjhb else 283147813Sjhb error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); 284147813Sjhb fdrop(fp, td); 285147813Sjhb return (error); 286147813Sjhb} 287147813Sjhb 288147813Sjhb/* 289147813Sjhb * Common code for readv and preadv that reads data in 290147813Sjhb * from a file using the passed in uio, offset, and flags. 291147813Sjhb */ 292147813Sjhbstatic int 293147813Sjhbdofileread(td, fd, fp, auio, offset, flags) 294147813Sjhb struct thread *td; 295147813Sjhb int fd; 296147813Sjhb struct file *fp; 297147813Sjhb struct uio *auio; 298147813Sjhb off_t offset; 299147813Sjhb int flags; 300147813Sjhb{ 301147813Sjhb ssize_t cnt; 302147813Sjhb int error; 303147813Sjhb#ifdef KTRACE 304147813Sjhb struct uio *ktruio = NULL; 305147813Sjhb#endif 306147813Sjhb 307140800Sphk /* Finish zero length reads right here */ 308140800Sphk if (auio->uio_resid == 0) { 309140800Sphk td->td_retval[0] = 0; 310140800Sphk return(0); 311140800Sphk } 312131897Sphk auio->uio_rw = UIO_READ; 313147813Sjhb auio->uio_offset = offset; 314131897Sphk auio->uio_td = td; 3151541Srgrimes#ifdef KTRACE 316131897Sphk if (KTRPOINT(td, KTR_GENIO)) 317131897Sphk ktruio = cloneuio(auio); 3181541Srgrimes#endif 319131897Sphk cnt = auio->uio_resid; 320147813Sjhb if ((error = fo_read(fp, auio, td->td_ucred, flags, td))) { 321131897Sphk if (auio->uio_resid != cnt && (error == ERESTART || 3221541Srgrimes error == EINTR || error == EWOULDBLOCK)) 3231541Srgrimes error = 0; 32468883Sdillon } 325131897Sphk cnt -= auio->uio_resid; 3261541Srgrimes#ifdef KTRACE 327131897Sphk if (ktruio != NULL) { 328131897Sphk ktruio->uio_resid = cnt; 329144445Sjhb ktrgenio(fd, UIO_READ, ktruio, error); 3301541Srgrimes } 3311541Srgrimes#endif 33283366Sjulian td->td_retval[0] = cnt; 3331541Srgrimes return (error); 3341541Srgrimes} 3351541Srgrimes 33612221Sbde#ifndef _SYS_SYSPROTO_H_ 3371541Srgrimesstruct write_args { 3381541Srgrimes int fd; 33938864Sbde const void *buf; 34038864Sbde size_t nbyte; 3411541Srgrimes}; 34212221Sbde#endif 3431549Srgrimesint 34483366Sjulianwrite(td, uap) 34583366Sjulian struct thread *td; 34686341Sdillon struct write_args *uap; 3471541Srgrimes{ 348147813Sjhb struct uio auio; 349147813Sjhb struct iovec aiov; 35068883Sdillon int error; 3511541Srgrimes 352147813Sjhb if (uap->nbyte > INT_MAX) 353147813Sjhb return (EINVAL); 354147813Sjhb aiov.iov_base = (void *)(uintptr_t)uap->buf; 355147813Sjhb aiov.iov_len = uap->nbyte; 356147813Sjhb auio.uio_iov = &aiov; 357147813Sjhb auio.uio_iovcnt = 1; 358147813Sjhb auio.uio_resid = uap->nbyte; 359147813Sjhb auio.uio_segflg = UIO_USERSPACE; 360147813Sjhb error = kern_writev(td, uap->fd, &auio); 36168883Sdillon return(error); 3621541Srgrimes} 3631541Srgrimes 3641541Srgrimes/* 365167211Srwatson * Positioned write system call. 36645065Salc */ 36745065Salc#ifndef _SYS_SYSPROTO_H_ 36845065Salcstruct pwrite_args { 36945065Salc int fd; 37045065Salc const void *buf; 37145065Salc size_t nbyte; 37245311Sdt int pad; 37345311Sdt off_t offset; 37445065Salc}; 37545065Salc#endif 37645065Salcint 37783366Sjulianpwrite(td, uap) 37883366Sjulian struct thread *td; 37986341Sdillon struct pwrite_args *uap; 38045065Salc{ 38145065Salc struct uio auio; 38245065Salc struct iovec aiov; 383147813Sjhb int error; 38445065Salc 385147813Sjhb if (uap->nbyte > INT_MAX) 386147813Sjhb return (EINVAL); 387147813Sjhb aiov.iov_base = (void *)(uintptr_t)uap->buf; 388147813Sjhb aiov.iov_len = uap->nbyte; 38945065Salc auio.uio_iov = &aiov; 39045065Salc auio.uio_iovcnt = 1; 391147813Sjhb auio.uio_resid = uap->nbyte; 39245065Salc auio.uio_segflg = UIO_USERSPACE; 393147813Sjhb error = kern_pwritev(td, uap->fd, &auio, uap->offset); 394147813Sjhb return(error); 39545065Salc} 39645065Salc 397171212Speterint 398171212Speterfreebsd6_pwrite(td, uap) 399171212Speter struct thread *td; 400171212Speter struct freebsd6_pwrite_args *uap; 401171212Speter{ 402171212Speter struct pwrite_args oargs; 403171212Speter 404171212Speter oargs.fd = uap->fd; 405171212Speter oargs.buf = uap->buf; 406171212Speter oargs.nbyte = uap->nbyte; 407171212Speter oargs.offset = uap->offset; 408171212Speter return (pwrite(td, &oargs)); 409171212Speter} 410171212Speter 41145065Salc/* 412167211Srwatson * Gather write system call. 4131541Srgrimes */ 41412221Sbde#ifndef _SYS_SYSPROTO_H_ 4151541Srgrimesstruct writev_args { 4161541Srgrimes int fd; 4171541Srgrimes struct iovec *iovp; 4181541Srgrimes u_int iovcnt; 4191541Srgrimes}; 42012221Sbde#endif 4211549Srgrimesint 422131897Sphkwritev(struct thread *td, struct writev_args *uap) 4231541Srgrimes{ 424144445Sjhb struct uio *auio; 425144445Sjhb int error; 426144445Sjhb 427144445Sjhb error = copyinuio(uap->iovp, uap->iovcnt, &auio); 428144445Sjhb if (error) 429144445Sjhb return (error); 430144445Sjhb error = kern_writev(td, uap->fd, auio); 431144445Sjhb free(auio, M_IOV); 432144445Sjhb return (error); 433144445Sjhb} 434144445Sjhb 435144445Sjhbint 436144445Sjhbkern_writev(struct thread *td, int fd, struct uio *auio) 437144445Sjhb{ 43886341Sdillon struct file *fp; 439131897Sphk int error; 440147813Sjhb 441147813Sjhb error = fget_write(td, fd, &fp); 442147813Sjhb if (error) 443154073Sjhb return (error); 444147813Sjhb error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0); 445147813Sjhb fdrop(fp, td); 446147813Sjhb return (error); 447147813Sjhb} 448147813Sjhb 449147813Sjhb/* 450167211Srwatson * Gather positioned write system call. 451147813Sjhb */ 452147813Sjhb#ifndef _SYS_SYSPROTO_H_ 453147813Sjhbstruct pwritev_args { 454147813Sjhb int fd; 455147813Sjhb struct iovec *iovp; 456147813Sjhb u_int iovcnt; 457147813Sjhb off_t offset; 458147813Sjhb}; 459147813Sjhb#endif 460147813Sjhbint 461147813Sjhbpwritev(struct thread *td, struct pwritev_args *uap) 462147813Sjhb{ 463147813Sjhb struct uio *auio; 464147813Sjhb int error; 465147813Sjhb 466147813Sjhb error = copyinuio(uap->iovp, uap->iovcnt, &auio); 467147813Sjhb if (error) 468147813Sjhb return (error); 469147813Sjhb error = kern_pwritev(td, uap->fd, auio, uap->offset); 470147813Sjhb free(auio, M_IOV); 471147813Sjhb return (error); 472147813Sjhb} 473147813Sjhb 474147813Sjhbint 475147813Sjhbkern_pwritev(td, fd, auio, offset) 476147813Sjhb struct thread *td; 477147813Sjhb struct uio *auio; 478147813Sjhb int fd; 479147813Sjhb off_t offset; 480147813Sjhb{ 481147813Sjhb struct file *fp; 482147813Sjhb int error; 483147813Sjhb 484147813Sjhb error = fget_write(td, fd, &fp); 485147813Sjhb if (error) 486154073Sjhb return (error); 487147813Sjhb if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) 488147813Sjhb error = ESPIPE; 489147813Sjhb else if (offset < 0 && fp->f_vnode->v_type != VCHR) 490147813Sjhb error = EINVAL; 491147813Sjhb else 492147813Sjhb error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); 493147813Sjhb fdrop(fp, td); 494147813Sjhb return (error); 495147813Sjhb} 496147813Sjhb 497147813Sjhb/* 498147813Sjhb * Common code for writev and pwritev that writes data to 499147813Sjhb * a file using the passed in uio, offset, and flags. 500147813Sjhb */ 501147813Sjhbstatic int 502147813Sjhbdofilewrite(td, fd, fp, auio, offset, flags) 503147813Sjhb struct thread *td; 504147813Sjhb int fd; 505147813Sjhb struct file *fp; 506147813Sjhb struct uio *auio; 507147813Sjhb off_t offset; 508147813Sjhb int flags; 509147813Sjhb{ 510147813Sjhb ssize_t cnt; 511147813Sjhb int error; 5121541Srgrimes#ifdef KTRACE 513131897Sphk struct uio *ktruio = NULL; 5141541Srgrimes#endif 5151541Srgrimes 516131897Sphk auio->uio_rw = UIO_WRITE; 517131897Sphk auio->uio_td = td; 518147813Sjhb auio->uio_offset = offset; 5191541Srgrimes#ifdef KTRACE 520131897Sphk if (KTRPOINT(td, KTR_GENIO)) 521131897Sphk ktruio = cloneuio(auio); 5221541Srgrimes#endif 523131897Sphk cnt = auio->uio_resid; 52469733Sdillon if (fp->f_type == DTYPE_VNODE) 52569733Sdillon bwillwrite(); 526147813Sjhb if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) { 527131897Sphk if (auio->uio_resid != cnt && (error == ERESTART || 5281541Srgrimes error == EINTR || error == EWOULDBLOCK)) 5291541Srgrimes error = 0; 530147813Sjhb /* Socket layer is responsible for issuing SIGPIPE. */ 531167150Sbms if (fp->f_type != DTYPE_SOCKET && error == EPIPE) { 53283366Sjulian PROC_LOCK(td->td_proc); 53383366Sjulian psignal(td->td_proc, SIGPIPE); 53483366Sjulian PROC_UNLOCK(td->td_proc); 53573929Sjhb } 5361541Srgrimes } 537131897Sphk cnt -= auio->uio_resid; 5381541Srgrimes#ifdef KTRACE 539131897Sphk if (ktruio != NULL) { 540131897Sphk ktruio->uio_resid = cnt; 541144445Sjhb ktrgenio(fd, UIO_WRITE, ktruio, error); 5421541Srgrimes } 5431541Srgrimes#endif 54483366Sjulian td->td_retval[0] = cnt; 5451541Srgrimes return (error); 5461541Srgrimes} 5471541Srgrimes 548175140Sjhb/* 549175140Sjhb * Truncate a file given a file descriptor. 550175140Sjhb * 551175140Sjhb * Can't use fget_write() here, since must return EINVAL and not EBADF if the 552175140Sjhb * descriptor isn't writable. 553175140Sjhb */ 554175140Sjhbint 555175140Sjhbkern_ftruncate(td, fd, length) 556175140Sjhb struct thread *td; 557175140Sjhb int fd; 558175140Sjhb off_t length; 559175140Sjhb{ 560175140Sjhb struct file *fp; 561175140Sjhb int error; 562175140Sjhb 563175140Sjhb AUDIT_ARG(fd, fd); 564175140Sjhb if (length < 0) 565175140Sjhb return (EINVAL); 566175140Sjhb error = fget(td, fd, &fp); 567175140Sjhb if (error) 568175140Sjhb return (error); 569175140Sjhb AUDIT_ARG(file, td->td_proc, fp); 570175140Sjhb if (!(fp->f_flag & FWRITE)) { 571175140Sjhb fdrop(fp, td); 572175140Sjhb return (EINVAL); 573175140Sjhb } 574175140Sjhb error = fo_truncate(fp, length, td->td_ucred, td); 575175140Sjhb fdrop(fp, td); 576175140Sjhb return (error); 577175140Sjhb} 578175140Sjhb 57912221Sbde#ifndef _SYS_SYSPROTO_H_ 580175140Sjhbstruct ftruncate_args { 581175140Sjhb int fd; 582175140Sjhb int pad; 583175140Sjhb off_t length; 584175140Sjhb}; 585175140Sjhb#endif 586175140Sjhbint 587175140Sjhbftruncate(td, uap) 588175140Sjhb struct thread *td; 589175140Sjhb struct ftruncate_args *uap; 590175140Sjhb{ 591175140Sjhb 592175140Sjhb return (kern_ftruncate(td, uap->fd, uap->length)); 593175140Sjhb} 594175140Sjhb 595175140Sjhb#if defined(COMPAT_43) 596175140Sjhb#ifndef _SYS_SYSPROTO_H_ 597175140Sjhbstruct oftruncate_args { 598175140Sjhb int fd; 599175140Sjhb long length; 600175140Sjhb}; 601175140Sjhb#endif 602175140Sjhbint 603175140Sjhboftruncate(td, uap) 604175140Sjhb struct thread *td; 605175140Sjhb struct oftruncate_args *uap; 606175140Sjhb{ 607175140Sjhb 608175140Sjhb return (kern_ftruncate(td, uap->fd, uap->length)); 609175140Sjhb} 610175140Sjhb#endif /* COMPAT_43 */ 611175140Sjhb 612175140Sjhb#ifndef _SYS_SYSPROTO_H_ 6131541Srgrimesstruct ioctl_args { 6141541Srgrimes int fd; 61538517Sdfr u_long com; 6161541Srgrimes caddr_t data; 6171541Srgrimes}; 61812221Sbde#endif 6191541Srgrimes/* ARGSUSED */ 6201549Srgrimesint 621137687Sphkioctl(struct thread *td, struct ioctl_args *uap) 6221541Srgrimes{ 623137687Sphk u_long com; 624162711Sru int arg, error; 625137687Sphk u_int size; 626162711Sru caddr_t data; 6271541Srgrimes 628140406Sphk if (uap->com > 0xffffffff) { 629140406Sphk printf( 630140406Sphk "WARNING pid %d (%s): ioctl sign-extension ioctl %lx\n", 631173600Sjulian td->td_proc->p_pid, td->td_name, uap->com); 632140406Sphk uap->com &= 0xffffffff; 633140406Sphk } 634160192Sjhb com = uap->com; 6351541Srgrimes 6361541Srgrimes /* 6371541Srgrimes * Interpret high order word to find amount of data to be 6381541Srgrimes * copied to/from the user's address space. 6391541Srgrimes */ 6401541Srgrimes size = IOCPARM_LEN(com); 641137689Sphk if ((size > IOCPARM_MAX) || 642137689Sphk ((com & (IOC_VOID | IOC_IN | IOC_OUT)) == 0) || 643147676Speter#if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43) 644147676Speter ((com & IOC_OUT) && size == 0) || 645147676Speter#else 646147676Speter ((com & (IOC_IN | IOC_OUT)) && size == 0) || 647147676Speter#endif 648162711Sru ((com & IOC_VOID) && size > 0 && size != sizeof(int))) 64989306Salfred return (ENOTTY); 65068883Sdillon 651137689Sphk if (size > 0) { 652183297Sobrien if (com & IOC_VOID) { 653162711Sru /* Integer argument. */ 654162711Sru arg = (intptr_t)uap->data; 655162711Sru data = (void *)&arg; 656162711Sru size = 0; 657183297Sobrien } else 658183297Sobrien data = malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 659162711Sru } else 660137689Sphk data = (void *)&uap->data; 661137689Sphk if (com & IOC_IN) { 662137689Sphk error = copyin(uap->data, data, (u_int)size); 663137689Sphk if (error) { 664163355Sru if (size > 0) 665163355Sru free(data, M_IOCTLOPS); 666137689Sphk return (error); 66768883Sdillon } 668137689Sphk } else if (com & IOC_OUT) { 6691541Srgrimes /* 6701541Srgrimes * Zero the buffer so the user always 6711541Srgrimes * gets back something deterministic. 6721541Srgrimes */ 6731541Srgrimes bzero(data, size); 67468883Sdillon } 6751541Srgrimes 676160192Sjhb error = kern_ioctl(td, uap->fd, com, data); 677160192Sjhb 678160192Sjhb if (error == 0 && (com & IOC_OUT)) 679160192Sjhb error = copyout(data, uap->data, (u_int)size); 680160192Sjhb 681162711Sru if (size > 0) 682162711Sru free(data, M_IOCTLOPS); 683160192Sjhb return (error); 684160192Sjhb} 685160192Sjhb 686160192Sjhbint 687160192Sjhbkern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) 688160192Sjhb{ 689160192Sjhb struct file *fp; 690160192Sjhb struct filedesc *fdp; 691160192Sjhb int error; 692160192Sjhb int tmp; 693160192Sjhb 694160192Sjhb if ((error = fget(td, fd, &fp)) != 0) 695160192Sjhb return (error); 696160192Sjhb if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 697160192Sjhb fdrop(fp, td); 698160192Sjhb return (EBADF); 699160192Sjhb } 700160192Sjhb fdp = td->td_proc->p_fd; 701160192Sjhb switch (com) { 702160192Sjhb case FIONCLEX: 703168355Srwatson FILEDESC_XLOCK(fdp); 704160192Sjhb fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 705168355Srwatson FILEDESC_XUNLOCK(fdp); 706160192Sjhb goto out; 707160192Sjhb case FIOCLEX: 708168355Srwatson FILEDESC_XLOCK(fdp); 709160192Sjhb fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 710168355Srwatson FILEDESC_XUNLOCK(fdp); 711160192Sjhb goto out; 712160192Sjhb case FIONBIO: 7133098Sphk if ((tmp = *(int *)data)) 714174988Sjeff atomic_set_int(&fp->f_flag, FNONBLOCK); 7151541Srgrimes else 716174988Sjeff atomic_clear_int(&fp->f_flag, FNONBLOCK); 717137773Sphk data = (void *)&tmp; 718160192Sjhb break; 719160192Sjhb case FIOASYNC: 7203098Sphk if ((tmp = *(int *)data)) 721174988Sjeff atomic_set_int(&fp->f_flag, FASYNC); 7221541Srgrimes else 723174988Sjeff atomic_clear_int(&fp->f_flag, FASYNC); 724137773Sphk data = (void *)&tmp; 725160192Sjhb break; 726137773Sphk } 7271541Srgrimes 728137773Sphk error = fo_ioctl(fp, com, data, td->td_ucred, td); 729160192Sjhbout: 73083366Sjulian fdrop(fp, td); 7311541Srgrimes return (error); 7321541Srgrimes} 7331541Srgrimes 734189450Skibint 735189450Skibpoll_no_poll(int events) 736189450Skib{ 737189450Skib /* 738189450Skib * Return true for read/write. If the user asked for something 739189450Skib * special, return POLLNVAL, so that clients have a way of 740189450Skib * determining reliably whether or not the extended 741189450Skib * functionality is present without hard-coding knowledge 742189450Skib * of specific filesystem implementations. 743189450Skib */ 744189450Skib if (events & ~POLLSTANDARD) 745189450Skib return (POLLNVAL); 746189450Skib 747189450Skib return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 748189450Skib} 749189450Skib 75012221Sbde#ifndef _SYS_SYSPROTO_H_ 7511541Srgrimesstruct select_args { 75217702Ssmpatel int nd; 7531541Srgrimes fd_set *in, *ou, *ex; 7541541Srgrimes struct timeval *tv; 7551541Srgrimes}; 75612221Sbde#endif 7571549Srgrimesint 75883366Sjulianselect(td, uap) 75983366Sjulian register struct thread *td; 7601541Srgrimes register struct select_args *uap; 7611541Srgrimes{ 762102779Siedowse struct timeval tv, *tvp; 763102779Siedowse int error; 764102779Siedowse 765102779Siedowse if (uap->tv != NULL) { 766102779Siedowse error = copyin(uap->tv, &tv, sizeof(tv)); 767102779Siedowse if (error) 768102779Siedowse return (error); 769102779Siedowse tvp = &tv; 770102779Siedowse } else 771102779Siedowse tvp = NULL; 772102779Siedowse 773102779Siedowse return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp)); 774102779Siedowse} 775102779Siedowse 776102779Siedowseint 777102779Siedowsekern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, 778102779Siedowse fd_set *fd_ex, struct timeval *tvp) 779102779Siedowse{ 78089306Salfred struct filedesc *fdp; 78122945Sbde /* 78222945Sbde * The magic 2048 here is chosen to be just enough for FD_SETSIZE 78322945Sbde * infds with the new FD_SETSIZE of 1024, and more than enough for 78422945Sbde * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 78522945Sbde * of 256. 78622945Sbde */ 78722945Sbde fd_mask s_selbits[howmany(2048, NFDBITS)]; 78889969Salfred fd_mask *ibits[3], *obits[3], *selbits, *sbp; 78935029Sphk struct timeval atv, rtv, ttv; 79098133Skbyanc int error, timo; 791174647Sjeff u_int nbufbytes, ncpbytes, nfdbits; 7921541Srgrimes 793102779Siedowse if (nd < 0) 79417713Ssmpatel return (EINVAL); 79589306Salfred fdp = td->td_proc->p_fd; 796137805Sphk 797168355Srwatson FILEDESC_SLOCK(fdp); 798102779Siedowse if (nd > td->td_proc->p_fd->fd_nfiles) 799102779Siedowse nd = td->td_proc->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 800168355Srwatson FILEDESC_SUNLOCK(fdp); 80117702Ssmpatel 80222945Sbde /* 80322945Sbde * Allocate just enough bits for the non-null fd_sets. Use the 80422945Sbde * preallocated auto buffer if possible. 80522945Sbde */ 806102779Siedowse nfdbits = roundup(nd, NFDBITS); 80722945Sbde ncpbytes = nfdbits / NBBY; 80822945Sbde nbufbytes = 0; 809102779Siedowse if (fd_in != NULL) 81022945Sbde nbufbytes += 2 * ncpbytes; 811102779Siedowse if (fd_ou != NULL) 81222945Sbde nbufbytes += 2 * ncpbytes; 813102779Siedowse if (fd_ex != NULL) 81422945Sbde nbufbytes += 2 * ncpbytes; 81522945Sbde if (nbufbytes <= sizeof s_selbits) 81622945Sbde selbits = &s_selbits[0]; 81722945Sbde else 818111119Simp selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 81917702Ssmpatel 82017702Ssmpatel /* 82122945Sbde * Assign pointers into the bit buffers and fetch the input bits. 82222945Sbde * Put the output buffers together so that they can be bzeroed 82322945Sbde * together. 82417702Ssmpatel */ 82522945Sbde sbp = selbits; 8261541Srgrimes#define getbits(name, x) \ 82722945Sbde do { \ 828102779Siedowse if (name == NULL) \ 82922945Sbde ibits[x] = NULL; \ 83022945Sbde else { \ 83122945Sbde ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 83222945Sbde obits[x] = sbp; \ 83322945Sbde sbp += ncpbytes / sizeof *sbp; \ 834102779Siedowse error = copyin(name, ibits[x], ncpbytes); \ 83576564Stanimura if (error != 0) \ 836174647Sjeff goto done; \ 83722945Sbde } \ 83822945Sbde } while (0) 839102779Siedowse getbits(fd_in, 0); 840102779Siedowse getbits(fd_ou, 1); 841102779Siedowse getbits(fd_ex, 2); 8421541Srgrimes#undef getbits 84322945Sbde if (nbufbytes != 0) 84422945Sbde bzero(selbits, nbufbytes / 2); 8451541Srgrimes 846102779Siedowse if (tvp != NULL) { 847102779Siedowse atv = *tvp; 8481541Srgrimes if (itimerfix(&atv)) { 8491541Srgrimes error = EINVAL; 850174647Sjeff goto done; 8511541Srgrimes } 85236119Sphk getmicrouptime(&rtv); 85335029Sphk timevaladd(&atv, &rtv); 85463057Sjhb } else { 85535029Sphk atv.tv_sec = 0; 85663057Sjhb atv.tv_usec = 0; 85763057Sjhb } 85835029Sphk timo = 0; 859174647Sjeff seltdinit(td); 860174647Sjeff /* Iterate until the timeout expires or descriptors become ready. */ 861174647Sjeff for (;;) { 862174647Sjeff error = selscan(td, ibits, obits, nd); 863174647Sjeff if (error || td->td_retval[0] != 0) 864174647Sjeff break; 865174647Sjeff if (atv.tv_sec || atv.tv_usec) { 866174647Sjeff getmicrouptime(&rtv); 867174647Sjeff if (timevalcmp(&rtv, &atv, >=)) 868174647Sjeff break; 869174647Sjeff ttv = atv; 870174647Sjeff timevalsub(&ttv, &rtv); 871174647Sjeff timo = ttv.tv_sec > 24 * 60 * 60 ? 872174647Sjeff 24 * 60 * 60 * hz : tvtohz(&ttv); 873174647Sjeff } 874174647Sjeff error = seltdwait(td, timo); 875174647Sjeff if (error) 876174647Sjeff break; 877174647Sjeff error = selrescan(td, ibits, obits); 878174647Sjeff if (error || td->td_retval[0] != 0) 879174647Sjeff break; 88035029Sphk } 881174647Sjeff seltdclear(td); 88292252Salfred 8831541Srgrimesdone: 8841541Srgrimes /* select is not restarted after signals... */ 8851541Srgrimes if (error == ERESTART) 8861541Srgrimes error = EINTR; 8871541Srgrimes if (error == EWOULDBLOCK) 8881541Srgrimes error = 0; 8891541Srgrimes#define putbits(name, x) \ 890102779Siedowse if (name && (error2 = copyout(obits[x], name, ncpbytes))) \ 8911541Srgrimes error = error2; 8921541Srgrimes if (error == 0) { 8931541Srgrimes int error2; 8941541Srgrimes 895102779Siedowse putbits(fd_in, 0); 896102779Siedowse putbits(fd_ou, 1); 897102779Siedowse putbits(fd_ex, 2); 8981541Srgrimes#undef putbits 8991541Srgrimes } 90022945Sbde if (selbits != &s_selbits[0]) 90122945Sbde free(selbits, M_SELECT); 90282752Sdillon 9031541Srgrimes return (error); 9041541Srgrimes} 905187677Sjeff/* 906187677Sjeff * Convert a select bit set to poll flags. 907187682Sjeff * 908187677Sjeff * The backend always returns POLLHUP/POLLERR if appropriate and we 909187677Sjeff * return this as a set bit in any set. 910187677Sjeff */ 911187677Sjeffstatic int select_flags[3] = { 912187677Sjeff POLLRDNORM | POLLHUP | POLLERR, 913187677Sjeff POLLWRNORM | POLLHUP | POLLERR, 914187677Sjeff POLLRDBAND | POLLHUP | POLLERR 915187677Sjeff}; 9161541Srgrimes 917174647Sjeff/* 918187677Sjeff * Compute the fo_poll flags required for a fd given by the index and 919187677Sjeff * bit position in the fd_mask array. 920187677Sjeff */ 921187677Sjeffstatic __inline int 922187996Ssepotvinselflags(fd_mask **ibits, int idx, fd_mask bit) 923187677Sjeff{ 924187677Sjeff int flags; 925187677Sjeff int msk; 926187677Sjeff 927187677Sjeff flags = 0; 928187677Sjeff for (msk = 0; msk < 3; msk++) { 929187677Sjeff if (ibits[msk] == NULL) 930187677Sjeff continue; 931187996Ssepotvin if ((ibits[msk][idx] & bit) == 0) 932187677Sjeff continue; 933187677Sjeff flags |= select_flags[msk]; 934187677Sjeff } 935187677Sjeff return (flags); 936187677Sjeff} 937187677Sjeff 938187677Sjeff/* 939187677Sjeff * Set the appropriate output bits given a mask of fired events and the 940187677Sjeff * input bits originally requested. 941187677Sjeff */ 942187677Sjeffstatic __inline int 943187677Sjeffselsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events) 944187677Sjeff{ 945187677Sjeff int msk; 946187677Sjeff int n; 947187677Sjeff 948187677Sjeff n = 0; 949187677Sjeff for (msk = 0; msk < 3; msk++) { 950187677Sjeff if ((events & select_flags[msk]) == 0) 951187677Sjeff continue; 952187677Sjeff if (ibits[msk] == NULL) 953187677Sjeff continue; 954187677Sjeff if ((ibits[msk][idx] & bit) == 0) 955187677Sjeff continue; 956187677Sjeff /* 957187677Sjeff * XXX Check for a duplicate set. This can occur because a 958187677Sjeff * socket calls selrecord() twice for each poll() call 959187677Sjeff * resulting in two selfds per real fd. selrescan() will 960187677Sjeff * call selsetbits twice as a result. 961187677Sjeff */ 962187677Sjeff if ((obits[msk][idx] & bit) != 0) 963187677Sjeff continue; 964187677Sjeff obits[msk][idx] |= bit; 965187677Sjeff n++; 966187677Sjeff } 967187677Sjeff 968187677Sjeff return (n); 969187677Sjeff} 970187677Sjeff 971187677Sjeff/* 972174647Sjeff * Traverse the list of fds attached to this thread's seltd and check for 973174647Sjeff * completion. 974174647Sjeff */ 97512819Sphkstatic int 976174647Sjeffselrescan(struct thread *td, fd_mask **ibits, fd_mask **obits) 977174647Sjeff{ 978187677Sjeff struct filedesc *fdp; 979187677Sjeff struct selinfo *si; 980174647Sjeff struct seltd *stp; 981174647Sjeff struct selfd *sfp; 982174647Sjeff struct selfd *sfn; 983174647Sjeff struct file *fp; 984187693Sjeff fd_mask bit; 985187693Sjeff int fd, ev, n, idx; 986174647Sjeff 987187677Sjeff fdp = td->td_proc->p_fd; 988174647Sjeff stp = td->td_sel; 989187677Sjeff n = 0; 990174647Sjeff FILEDESC_SLOCK(fdp); 991174647Sjeff STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { 992174647Sjeff fd = (int)(uintptr_t)sfp->sf_cookie; 993174647Sjeff si = sfp->sf_si; 994174647Sjeff selfdfree(stp, sfp); 995174647Sjeff /* If the selinfo wasn't cleared the event didn't fire. */ 996174647Sjeff if (si != NULL) 997174647Sjeff continue; 998174647Sjeff if ((fp = fget_locked(fdp, fd)) == NULL) { 999174647Sjeff FILEDESC_SUNLOCK(fdp); 1000174647Sjeff return (EBADF); 1001174647Sjeff } 1002187677Sjeff idx = fd / NFDBITS; 1003187693Sjeff bit = (fd_mask)1 << (fd % NFDBITS); 1004187677Sjeff ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td); 1005187677Sjeff if (ev != 0) 1006187677Sjeff n += selsetbits(ibits, obits, idx, bit, ev); 1007174647Sjeff } 1008174647Sjeff FILEDESC_SUNLOCK(fdp); 1009174647Sjeff stp->st_flags = 0; 1010174647Sjeff td->td_retval[0] = n; 1011174647Sjeff return (0); 1012174647Sjeff} 1013174647Sjeff 1014174647Sjeff/* 1015174647Sjeff * Perform the initial filedescriptor scan and register ourselves with 1016174647Sjeff * each selinfo. 1017174647Sjeff */ 1018174647Sjeffstatic int 101983366Sjulianselscan(td, ibits, obits, nfd) 102083366Sjulian struct thread *td; 102117702Ssmpatel fd_mask **ibits, **obits; 102230994Sphk int nfd; 10231541Srgrimes{ 1024187677Sjeff struct filedesc *fdp; 10251541Srgrimes struct file *fp; 1026187693Sjeff fd_mask bit; 1027187677Sjeff int ev, flags, end, fd; 1028187693Sjeff int n, idx; 10291541Srgrimes 1030187677Sjeff fdp = td->td_proc->p_fd; 1031187677Sjeff n = 0; 1032168355Srwatson FILEDESC_SLOCK(fdp); 1033187693Sjeff for (idx = 0, fd = 0; fd < nfd; idx++) { 1034187677Sjeff end = imin(fd + NFDBITS, nfd); 1035187677Sjeff for (bit = 1; fd < end; bit <<= 1, fd++) { 1036187677Sjeff /* Compute the list of events we're interested in. */ 1037187677Sjeff flags = selflags(ibits, idx, bit); 1038187677Sjeff if (flags == 0) 1039187677Sjeff continue; 1040187677Sjeff if ((fp = fget_locked(fdp, fd)) == NULL) { 1041187677Sjeff FILEDESC_SUNLOCK(fdp); 1042187677Sjeff return (EBADF); 10431541Srgrimes } 1044187677Sjeff selfdalloc(td, (void *)(uintptr_t)fd); 1045187677Sjeff ev = fo_poll(fp, flags, td->td_ucred, td); 1046187677Sjeff if (ev != 0) 1047187677Sjeff n += selsetbits(ibits, obits, idx, bit, ev); 10481541Srgrimes } 10491541Srgrimes } 1050187677Sjeff 1051168355Srwatson FILEDESC_SUNLOCK(fdp); 105283366Sjulian td->td_retval[0] = n; 10531541Srgrimes return (0); 10541541Srgrimes} 10551541Srgrimes 105629351Speter#ifndef _SYS_SYSPROTO_H_ 105729351Speterstruct poll_args { 105829351Speter struct pollfd *fds; 105929351Speter u_int nfds; 106029351Speter int timeout; 106129351Speter}; 106229351Speter#endif 106329351Speterint 106483366Sjulianpoll(td, uap) 106583366Sjulian struct thread *td; 106673159Sjlemon struct poll_args *uap; 106729351Speter{ 1068134404Sandre struct pollfd *bits; 1069134404Sandre struct pollfd smallbits[32]; 107035029Sphk struct timeval atv, rtv, ttv; 107198133Skbyanc int error = 0, timo; 1072174647Sjeff u_int nfds; 107329351Speter size_t ni; 107429351Speter 1075107849Salfred nfds = uap->nfds; 1076177368Sjeff if (nfds > maxfilesperproc && nfds > FD_SETSIZE) 1077174647Sjeff return (EINVAL); 107872146Speter ni = nfds * sizeof(struct pollfd); 107929351Speter if (ni > sizeof(smallbits)) 1080111119Simp bits = malloc(ni, M_TEMP, M_WAITOK); 108129351Speter else 108229351Speter bits = smallbits; 1083107849Salfred error = copyin(uap->fds, bits, ni); 108429351Speter if (error) 1085174647Sjeff goto done; 1086107849Salfred if (uap->timeout != INFTIM) { 1087107849Salfred atv.tv_sec = uap->timeout / 1000; 1088107849Salfred atv.tv_usec = (uap->timeout % 1000) * 1000; 108929351Speter if (itimerfix(&atv)) { 109029351Speter error = EINVAL; 1091174647Sjeff goto done; 109229351Speter } 109336119Sphk getmicrouptime(&rtv); 109435029Sphk timevaladd(&atv, &rtv); 109563057Sjhb } else { 109635029Sphk atv.tv_sec = 0; 109763057Sjhb atv.tv_usec = 0; 109863057Sjhb } 109935029Sphk timo = 0; 1100174647Sjeff seltdinit(td); 1101174647Sjeff /* Iterate until the timeout expires or descriptors become ready. */ 1102174647Sjeff for (;;) { 1103174647Sjeff error = pollscan(td, bits, nfds); 1104174647Sjeff if (error || td->td_retval[0] != 0) 1105174647Sjeff break; 1106174647Sjeff if (atv.tv_sec || atv.tv_usec) { 1107174647Sjeff getmicrouptime(&rtv); 1108174647Sjeff if (timevalcmp(&rtv, &atv, >=)) 1109174647Sjeff break; 1110174647Sjeff ttv = atv; 1111174647Sjeff timevalsub(&ttv, &rtv); 1112174647Sjeff timo = ttv.tv_sec > 24 * 60 * 60 ? 1113174647Sjeff 24 * 60 * 60 * hz : tvtohz(&ttv); 1114174647Sjeff } 1115174647Sjeff error = seltdwait(td, timo); 1116174647Sjeff if (error) 1117174647Sjeff break; 1118174647Sjeff error = pollrescan(td); 1119174647Sjeff if (error || td->td_retval[0] != 0) 1120174647Sjeff break; 112129351Speter } 1122174647Sjeff seltdclear(td); 112392252Salfred 112429351Speterdone: 112529351Speter /* poll is not restarted after signals... */ 112629351Speter if (error == ERESTART) 112729351Speter error = EINTR; 112829351Speter if (error == EWOULDBLOCK) 112929351Speter error = 0; 113029351Speter if (error == 0) { 1131107849Salfred error = copyout(bits, uap->fds, ni); 113229351Speter if (error) 113329351Speter goto out; 113429351Speter } 113529351Speterout: 113629351Speter if (ni > sizeof(smallbits)) 113729351Speter free(bits, M_TEMP); 113829351Speter return (error); 113929351Speter} 114029351Speter 114129351Speterstatic int 1142174647Sjeffpollrescan(struct thread *td) 1143174647Sjeff{ 1144174647Sjeff struct seltd *stp; 1145174647Sjeff struct selfd *sfp; 1146174647Sjeff struct selfd *sfn; 1147174647Sjeff struct selinfo *si; 1148174647Sjeff struct filedesc *fdp; 1149174647Sjeff struct file *fp; 1150174647Sjeff struct pollfd *fd; 1151174647Sjeff int n; 1152174647Sjeff 1153174647Sjeff n = 0; 1154174647Sjeff fdp = td->td_proc->p_fd; 1155174647Sjeff stp = td->td_sel; 1156174647Sjeff FILEDESC_SLOCK(fdp); 1157174647Sjeff STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { 1158174647Sjeff fd = (struct pollfd *)sfp->sf_cookie; 1159174647Sjeff si = sfp->sf_si; 1160174647Sjeff selfdfree(stp, sfp); 1161174647Sjeff /* If the selinfo wasn't cleared the event didn't fire. */ 1162174647Sjeff if (si != NULL) 1163174647Sjeff continue; 1164174647Sjeff fp = fdp->fd_ofiles[fd->fd]; 1165174647Sjeff if (fp == NULL) { 1166174647Sjeff fd->revents = POLLNVAL; 1167174647Sjeff n++; 1168174647Sjeff continue; 1169174647Sjeff } 1170174647Sjeff /* 1171174647Sjeff * Note: backend also returns POLLHUP and 1172174647Sjeff * POLLERR if appropriate. 1173174647Sjeff */ 1174174647Sjeff fd->revents = fo_poll(fp, fd->events, td->td_ucred, td); 1175174647Sjeff if (fd->revents != 0) 1176174647Sjeff n++; 1177174647Sjeff } 1178174647Sjeff FILEDESC_SUNLOCK(fdp); 1179174647Sjeff stp->st_flags = 0; 1180174647Sjeff td->td_retval[0] = n; 1181174647Sjeff return (0); 1182174647Sjeff} 1183174647Sjeff 1184174647Sjeff 1185174647Sjeffstatic int 118683366Sjulianpollscan(td, fds, nfd) 118783366Sjulian struct thread *td; 118829351Speter struct pollfd *fds; 118973159Sjlemon u_int nfd; 119029351Speter{ 1191174647Sjeff struct filedesc *fdp = td->td_proc->p_fd; 119229351Speter int i; 119329351Speter struct file *fp; 119429351Speter int n = 0; 119529351Speter 1196168355Srwatson FILEDESC_SLOCK(fdp); 119729351Speter for (i = 0; i < nfd; i++, fds++) { 119841632Sjkh if (fds->fd >= fdp->fd_nfiles) { 119929351Speter fds->revents = POLLNVAL; 120029351Speter n++; 120141632Sjkh } else if (fds->fd < 0) { 120241632Sjkh fds->revents = 0; 120329351Speter } else { 120429351Speter fp = fdp->fd_ofiles[fds->fd]; 120568883Sdillon if (fp == NULL) { 120629351Speter fds->revents = POLLNVAL; 120729351Speter n++; 120829351Speter } else { 120931364Sbde /* 121031364Sbde * Note: backend also returns POLLHUP and 121131364Sbde * POLLERR if appropriate. 121231364Sbde */ 1213174647Sjeff selfdalloc(td, fds); 121451418Sgreen fds->revents = fo_poll(fp, fds->events, 1215101983Srwatson td->td_ucred, td); 121629351Speter if (fds->revents != 0) 121729351Speter n++; 121829351Speter } 121929351Speter } 122029351Speter } 1221168355Srwatson FILEDESC_SUNLOCK(fdp); 122283366Sjulian td->td_retval[0] = n; 122329351Speter return (0); 122429351Speter} 122529351Speter 122629351Speter/* 122729351Speter * OpenBSD poll system call. 1228167211Srwatson * 122929351Speter * XXX this isn't quite a true representation.. OpenBSD uses select ops. 123029351Speter */ 123129351Speter#ifndef _SYS_SYSPROTO_H_ 123229351Speterstruct openbsd_poll_args { 123329351Speter struct pollfd *fds; 123429351Speter u_int nfds; 123529351Speter int timeout; 123629351Speter}; 123729351Speter#endif 123829351Speterint 123983366Sjulianopenbsd_poll(td, uap) 124083366Sjulian register struct thread *td; 124129351Speter register struct openbsd_poll_args *uap; 124229351Speter{ 124383366Sjulian return (poll(td, (struct poll_args *)uap)); 124429351Speter} 124529351Speter 124692252Salfred/* 1247174647Sjeff * XXX This was created specifically to support netncp and netsmb. This 1248174647Sjeff * allows the caller to specify a socket to wait for events on. It returns 1249174647Sjeff * 0 if any events matched and an error otherwise. There is no way to 1250174647Sjeff * determine which events fired. 125192252Salfred */ 1252174647Sjeffint 1253174647Sjeffselsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td) 125492252Salfred{ 1255174647Sjeff struct timeval atv, rtv, ttv; 1256174647Sjeff int error, timo; 125792252Salfred 1258174647Sjeff if (tvp != NULL) { 1259174647Sjeff atv = *tvp; 1260174647Sjeff if (itimerfix(&atv)) 1261174647Sjeff return (EINVAL); 1262174647Sjeff getmicrouptime(&rtv); 1263174647Sjeff timevaladd(&atv, &rtv); 1264174647Sjeff } else { 1265174647Sjeff atv.tv_sec = 0; 1266174647Sjeff atv.tv_usec = 0; 1267174647Sjeff } 1268174647Sjeff 1269174647Sjeff timo = 0; 1270174647Sjeff seltdinit(td); 1271174647Sjeff /* 1272174647Sjeff * Iterate until the timeout expires or the socket becomes ready. 1273174647Sjeff */ 1274174647Sjeff for (;;) { 1275174647Sjeff selfdalloc(td, NULL); 1276174647Sjeff error = sopoll(so, events, NULL, td); 1277174647Sjeff /* error here is actually the ready events. */ 1278174647Sjeff if (error) 1279174647Sjeff return (0); 1280174647Sjeff if (atv.tv_sec || atv.tv_usec) { 1281174647Sjeff getmicrouptime(&rtv); 1282174647Sjeff if (timevalcmp(&rtv, &atv, >=)) { 1283174647Sjeff seltdclear(td); 1284174647Sjeff return (EWOULDBLOCK); 1285174647Sjeff } 1286174647Sjeff ttv = atv; 1287174647Sjeff timevalsub(&ttv, &rtv); 1288174647Sjeff timo = ttv.tv_sec > 24 * 60 * 60 ? 1289174647Sjeff 24 * 60 * 60 * hz : tvtohz(&ttv); 1290174647Sjeff } 1291174647Sjeff error = seltdwait(td, timo); 1292174647Sjeff seltdclear(td); 1293174647Sjeff if (error) 1294174647Sjeff break; 1295174647Sjeff } 1296174647Sjeff /* XXX Duplicates ncp/smb behavior. */ 1297174647Sjeff if (error == ERESTART) 1298174647Sjeff error = 0; 1299174647Sjeff return (error); 130092252Salfred} 130192252Salfred 13021541Srgrimes/* 1303174647Sjeff * Preallocate two selfds associated with 'cookie'. Some fo_poll routines 1304174647Sjeff * have two select sets, one for read and another for write. 1305174647Sjeff */ 1306174647Sjeffstatic void 1307174647Sjeffselfdalloc(struct thread *td, void *cookie) 1308174647Sjeff{ 1309174647Sjeff struct seltd *stp; 1310174647Sjeff 1311174647Sjeff stp = td->td_sel; 1312174647Sjeff if (stp->st_free1 == NULL) 1313174647Sjeff stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO); 1314174647Sjeff stp->st_free1->sf_td = stp; 1315174647Sjeff stp->st_free1->sf_cookie = cookie; 1316174647Sjeff if (stp->st_free2 == NULL) 1317174647Sjeff stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO); 1318174647Sjeff stp->st_free2->sf_td = stp; 1319174647Sjeff stp->st_free2->sf_cookie = cookie; 1320174647Sjeff} 1321174647Sjeff 1322174647Sjeffstatic void 1323174647Sjeffselfdfree(struct seltd *stp, struct selfd *sfp) 1324174647Sjeff{ 1325174647Sjeff STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link); 1326174647Sjeff mtx_lock(sfp->sf_mtx); 1327174647Sjeff if (sfp->sf_si) 1328174647Sjeff TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads); 1329174647Sjeff mtx_unlock(sfp->sf_mtx); 1330174647Sjeff uma_zfree(selfd_zone, sfp); 1331174647Sjeff} 1332174647Sjeff 1333174647Sjeff/* 13341541Srgrimes * Record a select request. 13351541Srgrimes */ 13361541Srgrimesvoid 13371541Srgrimesselrecord(selector, sip) 133883366Sjulian struct thread *selector; 13391541Srgrimes struct selinfo *sip; 13401541Srgrimes{ 1341174647Sjeff struct selfd *sfp; 1342174647Sjeff struct seltd *stp; 1343174647Sjeff struct mtx *mtxp; 13441541Srgrimes 1345174647Sjeff stp = selector->td_sel; 134692252Salfred /* 1347174647Sjeff * Don't record when doing a rescan. 134892252Salfred */ 1349174647Sjeff if (stp->st_flags & SELTD_RESCAN) 1350174647Sjeff return; 1351174647Sjeff /* 1352174647Sjeff * Grab one of the preallocated descriptors. 1353174647Sjeff */ 1354174647Sjeff sfp = NULL; 1355174647Sjeff if ((sfp = stp->st_free1) != NULL) 1356174647Sjeff stp->st_free1 = NULL; 1357174647Sjeff else if ((sfp = stp->st_free2) != NULL) 1358174647Sjeff stp->st_free2 = NULL; 1359174647Sjeff else 1360174647Sjeff panic("selrecord: No free selfd on selq"); 1361174647Sjeff mtxp = mtx_pool_find(mtxpool_sleep, sip); 1362174647Sjeff /* 1363174647Sjeff * Initialize the sfp and queue it in the thread. 1364174647Sjeff */ 1365174647Sjeff sfp->sf_si = sip; 1366174647Sjeff sfp->sf_mtx = mtxp; 1367174647Sjeff STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link); 1368174647Sjeff /* 1369174647Sjeff * Now that we've locked the sip, check for initialization. 1370174647Sjeff */ 1371174647Sjeff mtx_lock(mtxp); 1372174647Sjeff if (sip->si_mtx == NULL) { 1373174647Sjeff sip->si_mtx = mtxp; 1374174647Sjeff TAILQ_INIT(&sip->si_tdlist); 137583366Sjulian } 1376174647Sjeff /* 1377174647Sjeff * Add this thread to the list of selfds listening on this selinfo. 1378174647Sjeff */ 1379174647Sjeff TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads); 1380174647Sjeff mtx_unlock(sip->si_mtx); 13811541Srgrimes} 13821541Srgrimes 1383122352Stanimura/* Wake up a selecting thread. */ 1384122352Stanimuravoid 1385122352Stanimuraselwakeup(sip) 1386122352Stanimura struct selinfo *sip; 1387122352Stanimura{ 1388122352Stanimura doselwakeup(sip, -1); 1389122352Stanimura} 1390122352Stanimura 1391122352Stanimura/* Wake up a selecting thread, and set its priority. */ 1392122352Stanimuravoid 1393122352Stanimuraselwakeuppri(sip, pri) 1394122352Stanimura struct selinfo *sip; 1395122352Stanimura int pri; 1396122352Stanimura{ 1397122352Stanimura doselwakeup(sip, pri); 1398122352Stanimura} 1399122352Stanimura 14001541Srgrimes/* 14011541Srgrimes * Do a wakeup when a selectable event occurs. 14021541Srgrimes */ 1403122352Stanimurastatic void 1404122352Stanimuradoselwakeup(sip, pri) 140592252Salfred struct selinfo *sip; 1406122352Stanimura int pri; 14071541Srgrimes{ 1408174647Sjeff struct selfd *sfp; 1409174647Sjeff struct selfd *sfn; 1410174647Sjeff struct seltd *stp; 14111541Srgrimes 1412174647Sjeff /* If it's not initialized there can't be any waiters. */ 1413174647Sjeff if (sip->si_mtx == NULL) 141492252Salfred return; 1415174647Sjeff /* 1416174647Sjeff * Locking the selinfo locks all selfds associated with it. 1417174647Sjeff */ 1418174647Sjeff mtx_lock(sip->si_mtx); 1419174647Sjeff TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) { 1420174647Sjeff /* 1421174647Sjeff * Once we remove this sfp from the list and clear the 1422174647Sjeff * sf_si seltdclear will know to ignore this si. 1423174647Sjeff */ 1424174647Sjeff TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads); 1425174647Sjeff sfp->sf_si = NULL; 1426174647Sjeff stp = sfp->sf_td; 1427174647Sjeff mtx_lock(&stp->st_mtx); 1428174647Sjeff stp->st_flags |= SELTD_PENDING; 1429174647Sjeff cv_broadcastpri(&stp->st_wait, pri); 1430174647Sjeff mtx_unlock(&stp->st_mtx); 14311541Srgrimes } 1432174647Sjeff mtx_unlock(sip->si_mtx); 14331541Srgrimes} 143476564Stanimura 1435174647Sjeffstatic void 1436174647Sjeffseltdinit(struct thread *td) 1437174647Sjeff{ 1438174647Sjeff struct seltd *stp; 143976564Stanimura 1440174647Sjeff if ((stp = td->td_sel) != NULL) 1441174647Sjeff goto out; 1442174647Sjeff td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO); 1443174647Sjeff mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF); 1444174647Sjeff cv_init(&stp->st_wait, "select"); 1445174647Sjeffout: 1446174647Sjeff stp->st_flags = 0; 1447174647Sjeff STAILQ_INIT(&stp->st_selq); 1448174647Sjeff} 1449174647Sjeff 1450174647Sjeffstatic int 1451174647Sjeffseltdwait(struct thread *td, int timo) 1452174647Sjeff{ 1453174647Sjeff struct seltd *stp; 1454174647Sjeff int error; 1455174647Sjeff 1456174647Sjeff stp = td->td_sel; 1457174647Sjeff /* 1458174647Sjeff * An event of interest may occur while we do not hold the seltd 1459174647Sjeff * locked so check the pending flag before we sleep. 1460174647Sjeff */ 1461174647Sjeff mtx_lock(&stp->st_mtx); 1462174647Sjeff /* 1463174647Sjeff * Any further calls to selrecord will be a rescan. 1464174647Sjeff */ 1465174647Sjeff stp->st_flags |= SELTD_RESCAN; 1466174647Sjeff if (stp->st_flags & SELTD_PENDING) { 1467174647Sjeff mtx_unlock(&stp->st_mtx); 1468174647Sjeff return (0); 1469174647Sjeff } 1470174647Sjeff if (timo > 0) 1471174647Sjeff error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo); 1472174647Sjeff else 1473174647Sjeff error = cv_wait_sig(&stp->st_wait, &stp->st_mtx); 1474174647Sjeff mtx_unlock(&stp->st_mtx); 1475174647Sjeff 1476174647Sjeff return (error); 1477174647Sjeff} 1478174647Sjeff 1479174647Sjeffvoid 1480174647Sjeffseltdfini(struct thread *td) 1481174647Sjeff{ 1482174647Sjeff struct seltd *stp; 1483174647Sjeff 1484174647Sjeff stp = td->td_sel; 1485174647Sjeff if (stp == NULL) 1486174647Sjeff return; 1487174647Sjeff if (stp->st_free1) 1488174647Sjeff uma_zfree(selfd_zone, stp->st_free1); 1489174647Sjeff if (stp->st_free2) 1490174647Sjeff uma_zfree(selfd_zone, stp->st_free2); 1491174647Sjeff td->td_sel = NULL; 1492174647Sjeff free(stp, M_SELECT); 1493174647Sjeff} 1494174647Sjeff 1495174647Sjeff/* 1496174647Sjeff * Remove the references to the thread from all of the objects we were 1497174647Sjeff * polling. 1498174647Sjeff */ 149976564Stanimurastatic void 1500174647Sjeffseltdclear(struct thread *td) 150176564Stanimura{ 1502174647Sjeff struct seltd *stp; 1503174647Sjeff struct selfd *sfp; 1504174647Sjeff struct selfd *sfn; 1505174647Sjeff 1506174647Sjeff stp = td->td_sel; 1507174647Sjeff STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) 1508174647Sjeff selfdfree(stp, sfp); 1509174647Sjeff stp->st_flags = 0; 151076564Stanimura} 1511174647Sjeff 1512174647Sjeffstatic void selectinit(void *); 1513174647SjeffSYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL); 1514174647Sjeffstatic void 1515174647Sjeffselectinit(void *dummy __unused) 1516174647Sjeff{ 1517174647Sjeff selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL, 1518174647Sjeff NULL, NULL, UMA_ALIGN_PTR, 0); 1519174647Sjeff} 1520