sys_pipe.c revision 118929
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson */ 1913675Sdyson 2013675Sdyson/* 2113675Sdyson * This file contains a high-performance replacement for the socket-based 2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2313675Sdyson * all features of sockets, but does do everything that pipes normally 2413675Sdyson * do. 2513675Sdyson */ 2613675Sdyson 2713907Sdyson/* 2813907Sdyson * This code has two modes of operation, a small write mode and a large 2913907Sdyson * write mode. The small write mode acts like conventional pipes with 3013907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3113907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3213907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3313907Sdyson * the receiving process can copy it directly from the pages in the sending 3413907Sdyson * process. 3513907Sdyson * 3613907Sdyson * If the sending process receives a signal, it is possible that it will 3713913Sdyson * go away, and certainly its address space can change, because control 3813907Sdyson * is returned back to the user-mode side. In that case, the pipe code 3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4013907Sdyson * kernel buffer, and the receiving process will grab the data from the 4113907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4213907Sdyson * the copy operation is normally eliminated. 4313907Sdyson * 4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4513907Sdyson * happen for small transfers so that the system will not spend all of 46118764Ssilby * its time context switching. 47117325Ssilby * 48118764Ssilby * In order to limit the resource use of pipes, two sysctls exist: 49117325Ssilby * 50118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 51118764Ssilby * address space available to us in pipe_map. Whenever the amount in use 52118764Ssilby * exceeds half of this value, all new pipes will be created with size 53118764Ssilby * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited 54118764Ssilby * as well. This value is loader tunable only. 55117325Ssilby * 56117325Ssilby * kern.ipc.maxpipekvawired - This value limits the amount of memory that may 57117325Ssilby * be wired in order to facilitate direct copies using page flipping. 58117325Ssilby * Whenever this value is exceeded, pipes will fall back to using regular 59118764Ssilby * copies. This value is sysctl controllable at all times. 60117325Ssilby * 61117325Ssilby * These values are autotuned in subr_param.c. 62117325Ssilby * 63117325Ssilby * Memory usage may be monitored through the sysctls 64117325Ssilby * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired. 65117325Ssilby * 6613907Sdyson */ 6713907Sdyson 68116182Sobrien#include <sys/cdefs.h> 69116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 118929 2003-08-15 04:31:01Z jmg $"); 70116182Sobrien 71101768Srwatson#include "opt_mac.h" 72101768Srwatson 7313675Sdyson#include <sys/param.h> 7413675Sdyson#include <sys/systm.h> 7524131Sbde#include <sys/fcntl.h> 7613675Sdyson#include <sys/file.h> 7713675Sdyson#include <sys/filedesc.h> 7824206Sbde#include <sys/filio.h> 7991372Salfred#include <sys/kernel.h> 8076166Smarkm#include <sys/lock.h> 81101768Srwatson#include <sys/mac.h> 8276827Salfred#include <sys/mutex.h> 8324206Sbde#include <sys/ttycom.h> 8413675Sdyson#include <sys/stat.h> 8591968Salfred#include <sys/malloc.h> 8629356Speter#include <sys/poll.h> 8770834Swollman#include <sys/selinfo.h> 8813675Sdyson#include <sys/signalvar.h> 89117325Ssilby#include <sys/sysctl.h> 9013675Sdyson#include <sys/sysproto.h> 9113675Sdyson#include <sys/pipe.h> 9276166Smarkm#include <sys/proc.h> 9355112Sbde#include <sys/vnode.h> 9434924Sbde#include <sys/uio.h> 9559288Sjlemon#include <sys/event.h> 9613675Sdyson 9713675Sdyson#include <vm/vm.h> 9813675Sdyson#include <vm/vm_param.h> 9913675Sdyson#include <vm/vm_object.h> 10013675Sdyson#include <vm/vm_kern.h> 10113675Sdyson#include <vm/vm_extern.h> 10213675Sdyson#include <vm/pmap.h> 10313675Sdyson#include <vm/vm_map.h> 10413907Sdyson#include <vm/vm_page.h> 10592751Sjeff#include <vm/uma.h> 10613675Sdyson 10714037Sdyson/* 10814037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 10914037Sdyson * approx 30% decrease in transfer rate. This could be useful for 11014037Sdyson * NetBSD or OpenBSD. 11114037Sdyson */ 11214037Sdyson/* #define PIPE_NODIRECT */ 11314037Sdyson 11414037Sdyson/* 11514037Sdyson * interfaces to the outside world 11614037Sdyson */ 117108255Sphkstatic fo_rdwr_t pipe_read; 118108255Sphkstatic fo_rdwr_t pipe_write; 119108255Sphkstatic fo_ioctl_t pipe_ioctl; 120108255Sphkstatic fo_poll_t pipe_poll; 121108255Sphkstatic fo_kqfilter_t pipe_kqfilter; 122108255Sphkstatic fo_stat_t pipe_stat; 123108255Sphkstatic fo_close_t pipe_close; 12413675Sdyson 12572521Sjlemonstatic struct fileops pipeops = { 126116546Sphk .fo_read = pipe_read, 127116546Sphk .fo_write = pipe_write, 128116546Sphk .fo_ioctl = pipe_ioctl, 129116546Sphk .fo_poll = pipe_poll, 130116546Sphk .fo_kqfilter = pipe_kqfilter, 131116546Sphk .fo_stat = pipe_stat, 132116546Sphk .fo_close = pipe_close, 133116546Sphk .fo_flags = DFLAG_PASSABLE 13472521Sjlemon}; 13513675Sdyson 13659288Sjlemonstatic void filt_pipedetach(struct knote *kn); 13759288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 13859288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 13959288Sjlemon 14072521Sjlemonstatic struct filterops pipe_rfiltops = 14172521Sjlemon { 1, NULL, filt_pipedetach, filt_piperead }; 14272521Sjlemonstatic struct filterops pipe_wfiltops = 14372521Sjlemon { 1, NULL, filt_pipedetach, filt_pipewrite }; 14459288Sjlemon 14592305Salfred#define PIPE_GET_GIANT(pipe) \ 14691362Salfred do { \ 14792305Salfred KASSERT(((pipe)->pipe_state & PIPE_LOCKFL) != 0, \ 14892305Salfred ("%s:%d PIPE_GET_GIANT: line pipe not locked", \ 14992305Salfred __FILE__, __LINE__)); \ 15092305Salfred PIPE_UNLOCK(pipe); \ 15191362Salfred mtx_lock(&Giant); \ 15291362Salfred } while (0) 15372521Sjlemon 15491362Salfred#define PIPE_DROP_GIANT(pipe) \ 15591362Salfred do { \ 15691362Salfred mtx_unlock(&Giant); \ 15792305Salfred PIPE_LOCK(pipe); \ 15891362Salfred } while (0) 15991362Salfred 16013675Sdyson/* 16113675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 16213675Sdyson * space is pageable. The pipe code will try to maintain locality of 16313675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 16413675Sdyson * will not wipe the cache. 16513675Sdyson */ 16613907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 16713907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 16813675Sdyson 16913907Sdyson/* 17017163Sdyson * Limit the number of "big" pipes 17117163Sdyson */ 17217163Sdyson#define LIMITBIGPIPES 32 17333181Seivindstatic int nbigpipe; 17417163Sdyson 175117325Ssilbystatic int amountpipes; 17617124Sbdestatic int amountpipekva; 177117325Ssilbystatic int amountpipekvawired; 17813907Sdyson 179117325SsilbySYSCTL_DECL(_kern_ipc); 180117325Ssilby 181118764SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD, 182117325Ssilby &maxpipekva, 0, "Pipe KVA limit"); 183117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW, 184117325Ssilby &maxpipekvawired, 0, "Pipe KVA wired limit"); 185117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD, 186117364Ssilby &amountpipes, 0, "Current # of pipes"); 187117364SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD, 188117364Ssilby &nbigpipe, 0, "Current # of big pipes"); 189117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 190117325Ssilby &amountpipekva, 0, "Pipe KVA usage"); 191117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD, 192117325Ssilby &amountpipekvawired, 0, "Pipe wired KVA usage"); 193117325Ssilby 19491413Salfredstatic void pipeinit(void *dummy __unused); 19591413Salfredstatic void pipeclose(struct pipe *cpipe); 19691413Salfredstatic void pipe_free_kmem(struct pipe *cpipe); 19791413Salfredstatic int pipe_create(struct pipe **cpipep); 19891413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch); 19991413Salfredstatic __inline void pipeunlock(struct pipe *cpipe); 20091413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe); 20114037Sdyson#ifndef PIPE_NODIRECT 20291413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 20391413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 20491413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 20591413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe); 20614037Sdyson#endif 20791413Salfredstatic int pipespace(struct pipe *cpipe, int size); 20813675Sdyson 20992751Sjeffstatic uma_zone_t pipe_zone; 21027899Sdyson 21191372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 21291372Salfred 21391372Salfredstatic void 21491372Salfredpipeinit(void *dummy __unused) 21591372Salfred{ 216118880Salc 21792654Sjeff pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL, 21892654Sjeff NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 219118880Salc KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 22091372Salfred} 22191372Salfred 22213675Sdyson/* 22313675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 22413675Sdyson */ 22513675Sdyson 22613675Sdyson/* ARGSUSED */ 22713675Sdysonint 22883366Sjulianpipe(td, uap) 22983366Sjulian struct thread *td; 23013675Sdyson struct pipe_args /* { 23113675Sdyson int dummy; 23213675Sdyson } */ *uap; 23313675Sdyson{ 23483366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 23513675Sdyson struct file *rf, *wf; 23613675Sdyson struct pipe *rpipe, *wpipe; 23791968Salfred struct mtx *pmtx; 23813675Sdyson int fd, error; 23927899Sdyson 240111119Simp pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO); 24191968Salfred 24276756Salfred rpipe = wpipe = NULL; 24376364Salfred if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 24476364Salfred pipeclose(rpipe); 24576364Salfred pipeclose(wpipe); 24691968Salfred free(pmtx, M_TEMP); 24776364Salfred return (ENFILE); 24876364Salfred } 24976364Salfred 25013907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 25113907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 25213675Sdyson 25383366Sjulian error = falloc(td, &rf, &fd); 25470915Sdwmalone if (error) { 25570915Sdwmalone pipeclose(rpipe); 25670915Sdwmalone pipeclose(wpipe); 25791968Salfred free(pmtx, M_TEMP); 25870915Sdwmalone return (error); 25970915Sdwmalone } 26070915Sdwmalone fhold(rf); 26183366Sjulian td->td_retval[0] = fd; 26270915Sdwmalone 26370803Sdwmalone /* 26470803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 26570803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 26670803Sdwmalone * to avoid races against processes which manage to dup() the read 26770803Sdwmalone * side while we are blocked trying to allocate the write side. 26870803Sdwmalone */ 26989306Salfred FILE_LOCK(rf); 27013675Sdyson rf->f_flag = FREAD | FWRITE; 27113675Sdyson rf->f_type = DTYPE_PIPE; 272109153Sdillon rf->f_data = rpipe; 27313675Sdyson rf->f_ops = &pipeops; 27489306Salfred FILE_UNLOCK(rf); 27583366Sjulian error = falloc(td, &wf, &fd); 27670915Sdwmalone if (error) { 27789306Salfred FILEDESC_LOCK(fdp); 27883366Sjulian if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 27983366Sjulian fdp->fd_ofiles[td->td_retval[0]] = NULL; 28089306Salfred FILEDESC_UNLOCK(fdp); 28183366Sjulian fdrop(rf, td); 28289306Salfred } else 28389306Salfred FILEDESC_UNLOCK(fdp); 28483366Sjulian fdrop(rf, td); 28570915Sdwmalone /* rpipe has been closed by fdrop(). */ 28670915Sdwmalone pipeclose(wpipe); 28791968Salfred free(pmtx, M_TEMP); 28870915Sdwmalone return (error); 28970915Sdwmalone } 29089306Salfred FILE_LOCK(wf); 29113675Sdyson wf->f_flag = FREAD | FWRITE; 29213675Sdyson wf->f_type = DTYPE_PIPE; 293109153Sdillon wf->f_data = wpipe; 29413675Sdyson wf->f_ops = &pipeops; 29589306Salfred FILE_UNLOCK(wf); 29683366Sjulian td->td_retval[1] = fd; 29713675Sdyson rpipe->pipe_peer = wpipe; 29813675Sdyson wpipe->pipe_peer = rpipe; 299101768Srwatson#ifdef MAC 300101768Srwatson /* 301101768Srwatson * struct pipe represents a pipe endpoint. The MAC label is shared 302101768Srwatson * between the connected endpoints. As a result mac_init_pipe() and 303101768Srwatson * mac_create_pipe() should only be called on one of the endpoints 304101768Srwatson * after they have been connected. 305101768Srwatson */ 306101768Srwatson mac_init_pipe(rpipe); 307101768Srwatson mac_create_pipe(td->td_ucred, rpipe); 308101768Srwatson#endif 30993818Sjhb mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 31091968Salfred rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 31183366Sjulian fdrop(rf, td); 31213675Sdyson 31313675Sdyson return (0); 31413675Sdyson} 31513675Sdyson 31613909Sdyson/* 31713909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 31876364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 31976364Salfred * it will retain the old buffer. 32076364Salfred * If it fails it will return ENOMEM. 32113909Sdyson */ 32276364Salfredstatic int 32376364Salfredpipespace(cpipe, size) 32413675Sdyson struct pipe *cpipe; 32576364Salfred int size; 32613675Sdyson{ 32776364Salfred struct vm_object *object; 32876364Salfred caddr_t buffer; 32913688Sdyson int npages, error; 330117325Ssilby static int curfail = 0; 331117325Ssilby static struct timeval lastfail; 33213675Sdyson 33391412Salfred KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 33491412Salfred ("pipespace: pipe mutex locked")); 33579224Sdillon 336118764Ssilby size = round_page(size); 337118764Ssilby npages = size / PAGE_SIZE; 33813675Sdyson /* 33913675Sdyson * Create an object, I don't like the idea of paging to/from 34013675Sdyson * kernel_object. 34114037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 34213675Sdyson */ 34376364Salfred object = vm_object_allocate(OBJT_DEFAULT, npages); 344118764Ssilby buffer = (caddr_t) vm_map_min(pipe_map); 34513675Sdyson 34613675Sdyson /* 34713675Sdyson * Insert the object into the kernel map, and allocate kva for it. 34813675Sdyson * The map entry is, by default, pageable. 34914037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 35013675Sdyson */ 351118764Ssilby error = vm_map_find(pipe_map, object, 0, 35276364Salfred (vm_offset_t *) &buffer, size, 1, 35313688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 35413675Sdyson 35576364Salfred if (error != KERN_SUCCESS) { 35676364Salfred vm_object_deallocate(object); 357118764Ssilby if (ppsratecheck(&lastfail, &curfail, 1)) 358118764Ssilby printf("kern.maxpipekva exceeded, please see tuning(7).\n"); 35976364Salfred return (ENOMEM); 36076364Salfred } 36176364Salfred 36276364Salfred /* free old resources if we're resizing */ 36376364Salfred pipe_free_kmem(cpipe); 36476364Salfred cpipe->pipe_buffer.buffer = buffer; 36576364Salfred cpipe->pipe_buffer.size = size; 36676364Salfred cpipe->pipe_buffer.in = 0; 36776364Salfred cpipe->pipe_buffer.out = 0; 36876364Salfred cpipe->pipe_buffer.cnt = 0; 369117325Ssilby atomic_add_int(&amountpipes, 1); 370110816Salc atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 37176364Salfred return (0); 37213907Sdyson} 37313688Sdyson 37413907Sdyson/* 37513907Sdyson * initialize and allocate VM and memory for pipe 37613907Sdyson */ 37776364Salfredstatic int 37876364Salfredpipe_create(cpipep) 37976364Salfred struct pipe **cpipep; 38076364Salfred{ 38113907Sdyson struct pipe *cpipe; 38276364Salfred int error; 38313907Sdyson 384111119Simp *cpipep = uma_zalloc(pipe_zone, M_WAITOK); 38576364Salfred if (*cpipep == NULL) 38676364Salfred return (ENOMEM); 38717163Sdyson 38876364Salfred cpipe = *cpipep; 38976364Salfred 39076364Salfred /* 39176364Salfred * protect so pipeclose() doesn't follow a junk pointer 39276364Salfred * if pipespace() fails. 39376364Salfred */ 39476754Salfred bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 39513675Sdyson cpipe->pipe_state = 0; 39613675Sdyson cpipe->pipe_peer = NULL; 39713675Sdyson cpipe->pipe_busy = 0; 39813907Sdyson 39914037Sdyson#ifndef PIPE_NODIRECT 40013907Sdyson /* 40113907Sdyson * pipe data structure initializations to support direct pipe I/O 40213907Sdyson */ 40313907Sdyson cpipe->pipe_map.cnt = 0; 40413907Sdyson cpipe->pipe_map.kva = 0; 40513907Sdyson cpipe->pipe_map.pos = 0; 40613907Sdyson cpipe->pipe_map.npages = 0; 40717124Sbde /* cpipe->pipe_map.ms[] = invalid */ 40814037Sdyson#endif 40976364Salfred 41091412Salfred cpipe->pipe_mtxp = NULL; /* avoid pipespace assertion */ 411117325Ssilby /* 412117325Ssilby * Reduce to 1/4th pipe size if we're over our global max. 413117325Ssilby */ 414118764Ssilby if (amountpipekva > maxpipekva / 2) 415117325Ssilby error = pipespace(cpipe, SMALL_PIPE_SIZE); 416117325Ssilby else 417117325Ssilby error = pipespace(cpipe, PIPE_SIZE); 41876760Salfred if (error) 41976364Salfred return (error); 42076364Salfred 42176364Salfred vfs_timestamp(&cpipe->pipe_ctime); 42276364Salfred cpipe->pipe_atime = cpipe->pipe_ctime; 42376364Salfred cpipe->pipe_mtime = cpipe->pipe_ctime; 42476364Salfred 42576364Salfred return (0); 42613675Sdyson} 42713675Sdyson 42813675Sdyson 42913675Sdyson/* 43013675Sdyson * lock a pipe for I/O, blocking other access 43113675Sdyson */ 43213675Sdysonstatic __inline int 43313907Sdysonpipelock(cpipe, catch) 43413675Sdyson struct pipe *cpipe; 43513907Sdyson int catch; 43613675Sdyson{ 43713776Sdyson int error; 43876364Salfred 43991362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 44091362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 44113675Sdyson cpipe->pipe_state |= PIPE_LWANT; 44291362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 44391362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 44476760Salfred "pipelk", 0); 44576760Salfred if (error != 0) 44676760Salfred return (error); 44713675Sdyson } 44891362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 44976760Salfred return (0); 45013675Sdyson} 45113675Sdyson 45213675Sdyson/* 45313675Sdyson * unlock a pipe I/O lock 45413675Sdyson */ 45513675Sdysonstatic __inline void 45613675Sdysonpipeunlock(cpipe) 45713675Sdyson struct pipe *cpipe; 45813675Sdyson{ 45976364Salfred 46091362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 46191362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 46213675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 46313675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 46414177Sdyson wakeup(cpipe); 46513675Sdyson } 46613675Sdyson} 46713675Sdyson 46814037Sdysonstatic __inline void 46914037Sdysonpipeselwakeup(cpipe) 47014037Sdyson struct pipe *cpipe; 47114037Sdyson{ 47276364Salfred 47314037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 47414037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 47514037Sdyson selwakeup(&cpipe->pipe_sel); 47614037Sdyson } 47741086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 47895883Salfred pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 47959288Sjlemon KNOTE(&cpipe->pipe_sel.si_note, 0); 48014037Sdyson} 48114037Sdyson 48213675Sdyson/* ARGSUSED */ 48313675Sdysonstatic int 484101941Srwatsonpipe_read(fp, uio, active_cred, flags, td) 48513675Sdyson struct file *fp; 48613675Sdyson struct uio *uio; 487101941Srwatson struct ucred *active_cred; 48883366Sjulian struct thread *td; 48945311Sdt int flags; 49013675Sdyson{ 491109153Sdillon struct pipe *rpipe = fp->f_data; 49247748Salc int error; 49313675Sdyson int nread = 0; 49418863Sdyson u_int size; 49513675Sdyson 49691362Salfred PIPE_LOCK(rpipe); 49713675Sdyson ++rpipe->pipe_busy; 49847748Salc error = pipelock(rpipe, 1); 49947748Salc if (error) 50047748Salc goto unlocked_error; 50147748Salc 502101768Srwatson#ifdef MAC 503102115Srwatson error = mac_check_pipe_read(active_cred, rpipe); 504101768Srwatson if (error) 505101768Srwatson goto locked_error; 506101768Srwatson#endif 507101768Srwatson 50813675Sdyson while (uio->uio_resid) { 50913907Sdyson /* 51013907Sdyson * normal pipe buffer receive 51113907Sdyson */ 51213675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 51318863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 51413675Sdyson if (size > rpipe->pipe_buffer.cnt) 51513675Sdyson size = rpipe->pipe_buffer.cnt; 51618863Sdyson if (size > (u_int) uio->uio_resid) 51718863Sdyson size = (u_int) uio->uio_resid; 51847748Salc 51991362Salfred PIPE_UNLOCK(rpipe); 520116127Smux error = uiomove( 521116127Smux &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 522116127Smux size, uio); 52391362Salfred PIPE_LOCK(rpipe); 52476760Salfred if (error) 52513675Sdyson break; 52676760Salfred 52713675Sdyson rpipe->pipe_buffer.out += size; 52813675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 52913675Sdyson rpipe->pipe_buffer.out = 0; 53013675Sdyson 53113675Sdyson rpipe->pipe_buffer.cnt -= size; 53247748Salc 53347748Salc /* 53447748Salc * If there is no more to read in the pipe, reset 53547748Salc * its pointers to the beginning. This improves 53647748Salc * cache hit stats. 53747748Salc */ 53847748Salc if (rpipe->pipe_buffer.cnt == 0) { 53947748Salc rpipe->pipe_buffer.in = 0; 54047748Salc rpipe->pipe_buffer.out = 0; 54147748Salc } 54213675Sdyson nread += size; 54314037Sdyson#ifndef PIPE_NODIRECT 54413907Sdyson /* 54513907Sdyson * Direct copy, bypassing a kernel buffer. 54613907Sdyson */ 54713907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 54847748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 54947748Salc caddr_t va; 55018863Sdyson if (size > (u_int) uio->uio_resid) 55118863Sdyson size = (u_int) uio->uio_resid; 55247748Salc 55376760Salfred va = (caddr_t) rpipe->pipe_map.kva + 55476760Salfred rpipe->pipe_map.pos; 55591362Salfred PIPE_UNLOCK(rpipe); 55647748Salc error = uiomove(va, size, uio); 55791362Salfred PIPE_LOCK(rpipe); 55813907Sdyson if (error) 55913907Sdyson break; 56013907Sdyson nread += size; 56113907Sdyson rpipe->pipe_map.pos += size; 56213907Sdyson rpipe->pipe_map.cnt -= size; 56313907Sdyson if (rpipe->pipe_map.cnt == 0) { 56413907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 56513907Sdyson wakeup(rpipe); 56613907Sdyson } 56714037Sdyson#endif 56813675Sdyson } else { 56913675Sdyson /* 57013675Sdyson * detect EOF condition 57176760Salfred * read returns 0 on EOF, no need to set error 57213675Sdyson */ 57376760Salfred if (rpipe->pipe_state & PIPE_EOF) 57413675Sdyson break; 57543623Sdillon 57613675Sdyson /* 57713675Sdyson * If the "write-side" has been blocked, wake it up now. 57813675Sdyson */ 57913675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 58013675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 58113675Sdyson wakeup(rpipe); 58213675Sdyson } 58343623Sdillon 58443623Sdillon /* 58547748Salc * Break if some data was read. 58643623Sdillon */ 58747748Salc if (nread > 0) 58813675Sdyson break; 58916960Sdyson 59043623Sdillon /* 591116127Smux * Unlock the pipe buffer for our remaining processing. 592116127Smux * We will either break out with an error or we will 593116127Smux * sleep and relock to loop. 59443623Sdillon */ 59547748Salc pipeunlock(rpipe); 59643623Sdillon 59713675Sdyson /* 59847748Salc * Handle non-blocking mode operation or 59947748Salc * wait for more data. 60013675Sdyson */ 60176760Salfred if (fp->f_flag & FNONBLOCK) { 60247748Salc error = EAGAIN; 60376760Salfred } else { 60447748Salc rpipe->pipe_state |= PIPE_WANTR; 60591362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 60691362Salfred PRIBIO | PCATCH, 60777140Salfred "piperd", 0)) == 0) 60847748Salc error = pipelock(rpipe, 1); 60913675Sdyson } 61047748Salc if (error) 61147748Salc goto unlocked_error; 61213675Sdyson } 61313675Sdyson } 614101768Srwatson#ifdef MAC 615101768Srwatsonlocked_error: 616101768Srwatson#endif 61747748Salc pipeunlock(rpipe); 61813675Sdyson 61991362Salfred /* XXX: should probably do this before getting any locks. */ 62024101Sbde if (error == 0) 62155112Sbde vfs_timestamp(&rpipe->pipe_atime); 62247748Salcunlocked_error: 62347748Salc --rpipe->pipe_busy; 62413913Sdyson 62547748Salc /* 62647748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 62747748Salc */ 62813675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 62913675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 63013675Sdyson wakeup(rpipe); 63113675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 63213675Sdyson /* 63347748Salc * Handle write blocking hysteresis. 63413675Sdyson */ 63513675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 63613675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 63713675Sdyson wakeup(rpipe); 63813675Sdyson } 63913675Sdyson } 64014037Sdyson 64114802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 64214037Sdyson pipeselwakeup(rpipe); 64314037Sdyson 64491362Salfred PIPE_UNLOCK(rpipe); 64576760Salfred return (error); 64613675Sdyson} 64713675Sdyson 64814037Sdyson#ifndef PIPE_NODIRECT 64913907Sdyson/* 65013907Sdyson * Map the sending processes' buffer into kernel space and wire it. 65113907Sdyson * This is similar to a physical write operation. 65213907Sdyson */ 65313675Sdysonstatic int 65413907Sdysonpipe_build_write_buffer(wpipe, uio) 65513907Sdyson struct pipe *wpipe; 65613675Sdyson struct uio *uio; 65713675Sdyson{ 65818863Sdyson u_int size; 65994566Stmm int i; 660112569Sjake vm_offset_t addr, endaddr; 661112569Sjake vm_paddr_t paddr; 66213907Sdyson 66379224Sdillon GIANT_REQUIRED; 66491412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 66579224Sdillon 66618863Sdyson size = (u_int) uio->uio_iov->iov_len; 66713907Sdyson if (size > wpipe->pipe_buffer.size) 66813907Sdyson size = wpipe->pipe_buffer.size; 66913907Sdyson 67040286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 67176760Salfred addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 67276760Salfred for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 67394566Stmm vm_page_t m; 67494566Stmm 67599899Salc /* 67699899Salc * vm_fault_quick() can sleep. Consequently, 67799899Salc * vm_page_lock_queue() and vm_page_unlock_queue() 67899899Salc * should not be performed outside of this loop. 67999899Salc */ 68051474Sdillon if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 68194608Stmm (paddr = pmap_extract(vmspace_pmap(curproc->p_vmspace), 68294608Stmm addr)) == 0) { 68313907Sdyson int j; 68476760Salfred 68599899Salc vm_page_lock_queues(); 686117325Ssilby for (j = 0; j < i; j++) { 687118757Salc vm_page_unhold(wpipe->pipe_map.ms[j]); 688117325Ssilby } 68999899Salc vm_page_unlock_queues(); 69076760Salfred return (EFAULT); 69113907Sdyson } 69213907Sdyson 69394566Stmm m = PHYS_TO_VM_PAGE(paddr); 69499899Salc vm_page_lock_queues(); 695118757Salc vm_page_hold(m); 69699899Salc vm_page_unlock_queues(); 69713907Sdyson wpipe->pipe_map.ms[i] = m; 69813907Sdyson } 69913907Sdyson 70013907Sdyson/* 70113907Sdyson * set up the control block 70213907Sdyson */ 70313907Sdyson wpipe->pipe_map.npages = i; 70476760Salfred wpipe->pipe_map.pos = 70576760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 70613907Sdyson wpipe->pipe_map.cnt = size; 70713907Sdyson 70813907Sdyson/* 70913907Sdyson * and map the buffer 71013907Sdyson */ 71113907Sdyson if (wpipe->pipe_map.kva == 0) { 71213912Sdyson /* 71313912Sdyson * We need to allocate space for an extra page because the 71413912Sdyson * address range might (will) span pages at times. 71513912Sdyson */ 716118220Salc wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map, 71713912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 718118764Ssilby atomic_add_int(&amountpipekvawired, 719110816Salc wpipe->pipe_buffer.size + PAGE_SIZE); 72013907Sdyson } 72113907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 72213907Sdyson wpipe->pipe_map.npages); 72313907Sdyson 72413907Sdyson/* 72513907Sdyson * and update the uio data 72613907Sdyson */ 72713907Sdyson 72813907Sdyson uio->uio_iov->iov_len -= size; 729104908Smike uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 73013907Sdyson if (uio->uio_iov->iov_len == 0) 73113907Sdyson uio->uio_iov++; 73213907Sdyson uio->uio_resid -= size; 73313907Sdyson uio->uio_offset += size; 73476760Salfred return (0); 73513907Sdyson} 73613907Sdyson 73713907Sdyson/* 73813907Sdyson * unmap and unwire the process buffer 73913907Sdyson */ 74013907Sdysonstatic void 74113907Sdysonpipe_destroy_write_buffer(wpipe) 74276760Salfred struct pipe *wpipe; 74313907Sdyson{ 74413907Sdyson int i; 74576364Salfred 74679224Sdillon GIANT_REQUIRED; 74791412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 74879224Sdillon 74917163Sdyson if (wpipe->pipe_map.kva) { 75017163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 75113907Sdyson 752118764Ssilby if (amountpipekvawired > maxpipekvawired / 2) { 753118764Ssilby /* Conserve address space */ 75413907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 75513907Sdyson wpipe->pipe_map.kva = 0; 75613907Sdyson kmem_free(kernel_map, kva, 75713912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 758118764Ssilby atomic_subtract_int(&amountpipekvawired, 759110816Salc wpipe->pipe_buffer.size + PAGE_SIZE); 76013907Sdyson } 76113907Sdyson } 76299899Salc vm_page_lock_queues(); 763117325Ssilby for (i = 0; i < wpipe->pipe_map.npages; i++) { 764118757Salc vm_page_unhold(wpipe->pipe_map.ms[i]); 765117325Ssilby } 76699899Salc vm_page_unlock_queues(); 76791653Stanimura wpipe->pipe_map.npages = 0; 76813907Sdyson} 76913907Sdyson 77013907Sdyson/* 77113907Sdyson * In the case of a signal, the writing process might go away. This 77213907Sdyson * code copies the data into the circular buffer so that the source 77313907Sdyson * pages can be freed without loss of data. 77413907Sdyson */ 77513907Sdysonstatic void 77613907Sdysonpipe_clone_write_buffer(wpipe) 77776364Salfred struct pipe *wpipe; 77813907Sdyson{ 77913907Sdyson int size; 78013907Sdyson int pos; 78113907Sdyson 78291362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 78313907Sdyson size = wpipe->pipe_map.cnt; 78413907Sdyson pos = wpipe->pipe_map.pos; 78513907Sdyson 78613907Sdyson wpipe->pipe_buffer.in = size; 78713907Sdyson wpipe->pipe_buffer.out = 0; 78813907Sdyson wpipe->pipe_buffer.cnt = size; 78913907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 79013907Sdyson 79191412Salfred PIPE_GET_GIANT(wpipe); 79292959Salfred bcopy((caddr_t) wpipe->pipe_map.kva + pos, 793100527Salfred wpipe->pipe_buffer.buffer, size); 79413907Sdyson pipe_destroy_write_buffer(wpipe); 79591412Salfred PIPE_DROP_GIANT(wpipe); 79613907Sdyson} 79713907Sdyson 79813907Sdyson/* 79913907Sdyson * This implements the pipe buffer write mechanism. Note that only 80013907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 80113907Sdyson * If there are any characters in the pipe buffer, the direct write will 80213907Sdyson * be deferred until the receiving process grabs all of the bytes from 80313907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 80413907Sdyson */ 80513907Sdysonstatic int 80613907Sdysonpipe_direct_write(wpipe, uio) 80713907Sdyson struct pipe *wpipe; 80813907Sdyson struct uio *uio; 80913907Sdyson{ 81013907Sdyson int error; 81176364Salfred 81213951Sdysonretry: 81391362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 81413907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 81576760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 81613951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 81713951Sdyson wakeup(wpipe); 81813951Sdyson } 81913992Sdyson wpipe->pipe_state |= PIPE_WANTW; 82091362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 82191362Salfred PRIBIO | PCATCH, "pipdww", 0); 82214802Sdyson if (error) 82313907Sdyson goto error1; 82414802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 82514802Sdyson error = EPIPE; 82614802Sdyson goto error1; 82714802Sdyson } 82813907Sdyson } 82913907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 83013951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 83176760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 83213951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 83313951Sdyson wakeup(wpipe); 83413951Sdyson } 83513951Sdyson 83613992Sdyson wpipe->pipe_state |= PIPE_WANTW; 83791362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 83891362Salfred PRIBIO | PCATCH, "pipdwc", 0); 83914802Sdyson if (error) 84013907Sdyson goto error1; 84114802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 84214802Sdyson error = EPIPE; 84314802Sdyson goto error1; 84413907Sdyson } 84513951Sdyson goto retry; 84613907Sdyson } 84713907Sdyson 84813951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 84913951Sdyson 85092305Salfred pipelock(wpipe, 0); 85191362Salfred PIPE_GET_GIANT(wpipe); 85213907Sdyson error = pipe_build_write_buffer(wpipe, uio); 85391362Salfred PIPE_DROP_GIANT(wpipe); 85492305Salfred pipeunlock(wpipe); 85513907Sdyson if (error) { 85613907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 85713907Sdyson goto error1; 85813907Sdyson } 85913907Sdyson 86013907Sdyson error = 0; 86113907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 86213907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 86313907Sdyson pipelock(wpipe, 0); 86491362Salfred PIPE_GET_GIANT(wpipe); 86513907Sdyson pipe_destroy_write_buffer(wpipe); 86691362Salfred PIPE_DROP_GIANT(wpipe); 867112981Shsu pipeselwakeup(wpipe); 86813907Sdyson pipeunlock(wpipe); 86914802Sdyson error = EPIPE; 87014802Sdyson goto error1; 87113907Sdyson } 87213992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 87313992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 87413992Sdyson wakeup(wpipe); 87513992Sdyson } 87614037Sdyson pipeselwakeup(wpipe); 87791362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 87891362Salfred "pipdwt", 0); 87913907Sdyson } 88013907Sdyson 88113907Sdyson pipelock(wpipe,0); 88213907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 88313907Sdyson /* 88413907Sdyson * this bit of trickery substitutes a kernel buffer for 88513907Sdyson * the process that might be going away. 88613907Sdyson */ 88713907Sdyson pipe_clone_write_buffer(wpipe); 88813907Sdyson } else { 88991412Salfred PIPE_GET_GIANT(wpipe); 89013907Sdyson pipe_destroy_write_buffer(wpipe); 89191412Salfred PIPE_DROP_GIANT(wpipe); 89213907Sdyson } 89313907Sdyson pipeunlock(wpipe); 89476760Salfred return (error); 89513907Sdyson 89613907Sdysonerror1: 89713907Sdyson wakeup(wpipe); 89876760Salfred return (error); 89913907Sdyson} 90014037Sdyson#endif 90113907Sdyson 90216960Sdysonstatic int 903101941Srwatsonpipe_write(fp, uio, active_cred, flags, td) 90416960Sdyson struct file *fp; 90513907Sdyson struct uio *uio; 906101941Srwatson struct ucred *active_cred; 90783366Sjulian struct thread *td; 90845311Sdt int flags; 90913907Sdyson{ 91013675Sdyson int error = 0; 91113913Sdyson int orig_resid; 91216960Sdyson struct pipe *wpipe, *rpipe; 91316960Sdyson 914109153Sdillon rpipe = fp->f_data; 91516960Sdyson wpipe = rpipe->pipe_peer; 91616960Sdyson 91791395Salfred PIPE_LOCK(rpipe); 91813675Sdyson /* 91913675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 92013675Sdyson */ 92116960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 92291395Salfred PIPE_UNLOCK(rpipe); 92376760Salfred return (EPIPE); 92413675Sdyson } 925101768Srwatson#ifdef MAC 926102115Srwatson error = mac_check_pipe_write(active_cred, wpipe); 927101768Srwatson if (error) { 928101768Srwatson PIPE_UNLOCK(rpipe); 929101768Srwatson return (error); 930101768Srwatson } 931101768Srwatson#endif 93277676Sdillon ++wpipe->pipe_busy; 93313675Sdyson 93417163Sdyson /* 93517163Sdyson * If it is advantageous to resize the pipe buffer, do 93617163Sdyson * so. 93717163Sdyson */ 93817163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 939118764Ssilby (amountpipekva < maxpipekva / 2) && 94017163Sdyson (nbigpipe < LIMITBIGPIPES) && 94117163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 94217163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 94317163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 94417163Sdyson 945105009Salfred if ((error = pipelock(wpipe, 1)) == 0) { 946118799Salc PIPE_UNLOCK(wpipe); 94776364Salfred if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 948117364Ssilby atomic_add_int(&nbigpipe, 1); 949118799Salc PIPE_LOCK(wpipe); 95013907Sdyson pipeunlock(wpipe); 95113907Sdyson } 95213907Sdyson } 95377676Sdillon 95477676Sdillon /* 95577676Sdillon * If an early error occured unbusy and return, waking up any pending 95677676Sdillon * readers. 95777676Sdillon */ 95877676Sdillon if (error) { 95977676Sdillon --wpipe->pipe_busy; 96077676Sdillon if ((wpipe->pipe_busy == 0) && 96177676Sdillon (wpipe->pipe_state & PIPE_WANT)) { 96277676Sdillon wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 96377676Sdillon wakeup(wpipe); 96477676Sdillon } 96591395Salfred PIPE_UNLOCK(rpipe); 96677676Sdillon return(error); 96777676Sdillon } 96876364Salfred 96913913Sdyson orig_resid = uio->uio_resid; 97077676Sdillon 97113675Sdyson while (uio->uio_resid) { 97213907Sdyson int space; 97376760Salfred 97414037Sdyson#ifndef PIPE_NODIRECT 97513907Sdyson /* 97613907Sdyson * If the transfer is large, we can gain performance if 97713907Sdyson * we do process-to-process copies directly. 97816416Sdyson * If the write is non-blocking, we don't use the 97916416Sdyson * direct write mechanism. 98058505Sdillon * 98158505Sdillon * The direct write mechanism will detect the reader going 98258505Sdillon * away on us. 98313907Sdyson */ 98417163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 98517163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 986118764Ssilby amountpipekvawired + uio->uio_resid < maxpipekvawired) { 987105009Salfred error = pipe_direct_write(wpipe, uio); 98876760Salfred if (error) 98913907Sdyson break; 99013907Sdyson continue; 99191362Salfred } 99214037Sdyson#endif 99313907Sdyson 99413907Sdyson /* 99513907Sdyson * Pipe buffered writes cannot be coincidental with 99613907Sdyson * direct writes. We wait until the currently executing 99713907Sdyson * direct write is completed before we start filling the 99858505Sdillon * pipe buffer. We break out if a signal occurs or the 99958505Sdillon * reader goes away. 100013907Sdyson */ 100113907Sdyson retrywrite: 100213907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 100313992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 100413992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 100513992Sdyson wakeup(wpipe); 100613992Sdyson } 100791395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 100891362Salfred "pipbww", 0); 100958505Sdillon if (wpipe->pipe_state & PIPE_EOF) 101058505Sdillon break; 101113907Sdyson if (error) 101213907Sdyson break; 101313907Sdyson } 101458505Sdillon if (wpipe->pipe_state & PIPE_EOF) { 101558505Sdillon error = EPIPE; 101658505Sdillon break; 101758505Sdillon } 101813907Sdyson 101913907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 102014644Sdyson 102114644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 102213913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 102313913Sdyson space = 0; 102413907Sdyson 1025118230Spb if (space > 0) { 102613907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 102754534Stegge int size; /* Transfer size */ 102854534Stegge int segsize; /* first segment to transfer */ 102976760Salfred 103013907Sdyson /* 103113907Sdyson * It is possible for a direct write to 103213907Sdyson * slip in on us... handle it here... 103313907Sdyson */ 103413907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 103513907Sdyson pipeunlock(wpipe); 103613907Sdyson goto retrywrite; 103713907Sdyson } 103854534Stegge /* 103954534Stegge * If a process blocked in uiomove, our 104054534Stegge * value for space might be bad. 104158505Sdillon * 104258505Sdillon * XXX will we be ok if the reader has gone 104358505Sdillon * away here? 104454534Stegge */ 104554534Stegge if (space > wpipe->pipe_buffer.size - 104654534Stegge wpipe->pipe_buffer.cnt) { 104754534Stegge pipeunlock(wpipe); 104854534Stegge goto retrywrite; 104954534Stegge } 105054534Stegge 105154534Stegge /* 105254534Stegge * Transfer size is minimum of uio transfer 105354534Stegge * and free space in pipe buffer. 105454534Stegge */ 105554534Stegge if (space > uio->uio_resid) 105654534Stegge size = uio->uio_resid; 105754534Stegge else 105854534Stegge size = space; 105954534Stegge /* 106054534Stegge * First segment to transfer is minimum of 106154534Stegge * transfer size and contiguous space in 106254534Stegge * pipe buffer. If first segment to transfer 106354534Stegge * is less than the transfer size, we've got 106454534Stegge * a wraparound in the buffer. 106554534Stegge */ 106654534Stegge segsize = wpipe->pipe_buffer.size - 106754534Stegge wpipe->pipe_buffer.in; 106854534Stegge if (segsize > size) 106954534Stegge segsize = size; 107054534Stegge 107154534Stegge /* Transfer first segment */ 107254534Stegge 107391395Salfred PIPE_UNLOCK(rpipe); 107454534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 107554534Stegge segsize, uio); 107691395Salfred PIPE_LOCK(rpipe); 107754534Stegge 107854534Stegge if (error == 0 && segsize < size) { 107954534Stegge /* 108054534Stegge * Transfer remaining part now, to 108154534Stegge * support atomic writes. Wraparound 108254534Stegge * happened. 108354534Stegge */ 108454534Stegge if (wpipe->pipe_buffer.in + segsize != 108554534Stegge wpipe->pipe_buffer.size) 1086116127Smux panic("Expected pipe buffer " 1087116127Smux "wraparound disappeared"); 108854534Stegge 108991395Salfred PIPE_UNLOCK(rpipe); 1090116127Smux error = uiomove( 1091116127Smux &wpipe->pipe_buffer.buffer[0], 1092116127Smux size - segsize, uio); 109391395Salfred PIPE_LOCK(rpipe); 109454534Stegge } 109554534Stegge if (error == 0) { 109654534Stegge wpipe->pipe_buffer.in += size; 109754534Stegge if (wpipe->pipe_buffer.in >= 109854534Stegge wpipe->pipe_buffer.size) { 1099116127Smux if (wpipe->pipe_buffer.in != 1100116127Smux size - segsize + 1101116127Smux wpipe->pipe_buffer.size) 1102116127Smux panic("Expected " 1103116127Smux "wraparound bad"); 1104116127Smux wpipe->pipe_buffer.in = size - 1105116127Smux segsize; 110654534Stegge } 110754534Stegge 110854534Stegge wpipe->pipe_buffer.cnt += size; 1109116127Smux if (wpipe->pipe_buffer.cnt > 1110116127Smux wpipe->pipe_buffer.size) 111154534Stegge panic("Pipe buffer overflow"); 111254534Stegge 111354534Stegge } 111413675Sdyson pipeunlock(wpipe); 111513675Sdyson } 111613675Sdyson if (error) 111713675Sdyson break; 111813675Sdyson 111913675Sdyson } else { 112013675Sdyson /* 112113675Sdyson * If the "read-side" has been blocked, wake it up now. 112213675Sdyson */ 112313675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 112413675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 112513675Sdyson wakeup(wpipe); 112613675Sdyson } 112714037Sdyson 112813675Sdyson /* 112913675Sdyson * don't block on non-blocking I/O 113013675Sdyson */ 113116960Sdyson if (fp->f_flag & FNONBLOCK) { 113213907Sdyson error = EAGAIN; 113313675Sdyson break; 113413675Sdyson } 113513907Sdyson 113614037Sdyson /* 113714037Sdyson * We have no more space and have something to offer, 113829356Speter * wake up select/poll. 113914037Sdyson */ 114014037Sdyson pipeselwakeup(wpipe); 114114037Sdyson 114213675Sdyson wpipe->pipe_state |= PIPE_WANTW; 114391395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 114491362Salfred PRIBIO | PCATCH, "pipewr", 0); 114576760Salfred if (error != 0) 114613675Sdyson break; 114713675Sdyson /* 114813675Sdyson * If read side wants to go away, we just issue a signal 114913675Sdyson * to ourselves. 115013675Sdyson */ 115113675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 115213774Sdyson error = EPIPE; 115313907Sdyson break; 115413675Sdyson } 115513675Sdyson } 115613675Sdyson } 115713675Sdyson 115814644Sdyson --wpipe->pipe_busy; 115977676Sdillon 116076760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 116176760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 116213675Sdyson wakeup(wpipe); 116313675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 116413675Sdyson /* 116513675Sdyson * If we have put any characters in the buffer, we wake up 116613675Sdyson * the reader. 116713675Sdyson */ 116813675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 116913675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 117013675Sdyson wakeup(wpipe); 117113675Sdyson } 117213675Sdyson } 117313909Sdyson 117413909Sdyson /* 117513909Sdyson * Don't return EPIPE if I/O was successful 117613909Sdyson */ 117713907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 117877676Sdillon (uio->uio_resid == 0) && 117977676Sdillon (error == EPIPE)) { 118013907Sdyson error = 0; 118177676Sdillon } 118213913Sdyson 118324101Sbde if (error == 0) 118455112Sbde vfs_timestamp(&wpipe->pipe_mtime); 118524101Sbde 118614037Sdyson /* 118714037Sdyson * We have something to offer, 118829356Speter * wake up select/poll. 118914037Sdyson */ 119014177Sdyson if (wpipe->pipe_buffer.cnt) 119114037Sdyson pipeselwakeup(wpipe); 119213907Sdyson 119391395Salfred PIPE_UNLOCK(rpipe); 119476760Salfred return (error); 119513675Sdyson} 119613675Sdyson 119713675Sdyson/* 119813675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 119913675Sdyson */ 1200104094Sphkstatic int 1201102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td) 120213675Sdyson struct file *fp; 120336735Sdfr u_long cmd; 120499009Salfred void *data; 1205102003Srwatson struct ucred *active_cred; 120683366Sjulian struct thread *td; 120713675Sdyson{ 1208109153Sdillon struct pipe *mpipe = fp->f_data; 1209101768Srwatson#ifdef MAC 1210101768Srwatson int error; 1211104269Srwatson#endif 121213675Sdyson 1213104269Srwatson PIPE_LOCK(mpipe); 1214104269Srwatson 1215104269Srwatson#ifdef MAC 1216102003Srwatson error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data); 1217101768Srwatson if (error) 1218101768Srwatson return (error); 1219101768Srwatson#endif 1220101768Srwatson 122113675Sdyson switch (cmd) { 122213675Sdyson 122313675Sdyson case FIONBIO: 1224104269Srwatson PIPE_UNLOCK(mpipe); 122513675Sdyson return (0); 122613675Sdyson 122713675Sdyson case FIOASYNC: 122813675Sdyson if (*(int *)data) { 122913675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 123013675Sdyson } else { 123113675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 123213675Sdyson } 123391362Salfred PIPE_UNLOCK(mpipe); 123413675Sdyson return (0); 123513675Sdyson 123613675Sdyson case FIONREAD: 123714037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 123814037Sdyson *(int *)data = mpipe->pipe_map.cnt; 123914037Sdyson else 124014037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 124191362Salfred PIPE_UNLOCK(mpipe); 124213675Sdyson return (0); 124313675Sdyson 124441086Struckman case FIOSETOWN: 1245104269Srwatson PIPE_UNLOCK(mpipe); 124641086Struckman return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 124741086Struckman 124841086Struckman case FIOGETOWN: 1249104269Srwatson PIPE_UNLOCK(mpipe); 1250104393Struckman *(int *)data = fgetown(&mpipe->pipe_sigio); 125113675Sdyson return (0); 125213675Sdyson 125341086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 125441086Struckman case TIOCSPGRP: 1255104269Srwatson PIPE_UNLOCK(mpipe); 125641086Struckman return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 125741086Struckman 125841086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 125918863Sdyson case TIOCGPGRP: 1260104269Srwatson PIPE_UNLOCK(mpipe); 1261104393Struckman *(int *)data = -fgetown(&mpipe->pipe_sigio); 126213675Sdyson return (0); 126313675Sdyson 126413675Sdyson } 1265104269Srwatson PIPE_UNLOCK(mpipe); 126617124Sbde return (ENOTTY); 126713675Sdyson} 126813675Sdyson 1269104094Sphkstatic int 1270101983Srwatsonpipe_poll(fp, events, active_cred, td) 127113675Sdyson struct file *fp; 127229356Speter int events; 1273101983Srwatson struct ucred *active_cred; 127483366Sjulian struct thread *td; 127513675Sdyson{ 1276109153Sdillon struct pipe *rpipe = fp->f_data; 127713675Sdyson struct pipe *wpipe; 127829356Speter int revents = 0; 1279101768Srwatson#ifdef MAC 1280101768Srwatson int error; 1281101768Srwatson#endif 128213675Sdyson 128313675Sdyson wpipe = rpipe->pipe_peer; 128491362Salfred PIPE_LOCK(rpipe); 1285101768Srwatson#ifdef MAC 1286102115Srwatson error = mac_check_pipe_poll(active_cred, rpipe); 1287101768Srwatson if (error) 1288101768Srwatson goto locked_error; 1289101768Srwatson#endif 129029356Speter if (events & (POLLIN | POLLRDNORM)) 129129356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 129229356Speter (rpipe->pipe_buffer.cnt > 0) || 129329356Speter (rpipe->pipe_state & PIPE_EOF)) 129429356Speter revents |= events & (POLLIN | POLLRDNORM); 129513675Sdyson 129629356Speter if (events & (POLLOUT | POLLWRNORM)) 129729356Speter if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 129843311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 129943311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 130029356Speter revents |= events & (POLLOUT | POLLWRNORM); 130113675Sdyson 130229356Speter if ((rpipe->pipe_state & PIPE_EOF) || 130329356Speter (wpipe == NULL) || 130429356Speter (wpipe->pipe_state & PIPE_EOF)) 130529356Speter revents |= POLLHUP; 130629356Speter 130729356Speter if (revents == 0) { 130829356Speter if (events & (POLLIN | POLLRDNORM)) { 130983805Sjhb selrecord(td, &rpipe->pipe_sel); 131029356Speter rpipe->pipe_state |= PIPE_SEL; 131113675Sdyson } 131213675Sdyson 131329356Speter if (events & (POLLOUT | POLLWRNORM)) { 131483805Sjhb selrecord(td, &wpipe->pipe_sel); 131530164Speter wpipe->pipe_state |= PIPE_SEL; 131613907Sdyson } 131713675Sdyson } 1318101768Srwatson#ifdef MAC 1319101768Srwatsonlocked_error: 1320101768Srwatson#endif 132191362Salfred PIPE_UNLOCK(rpipe); 132229356Speter 132329356Speter return (revents); 132413675Sdyson} 132513675Sdyson 132698989Salfred/* 132798989Salfred * We shouldn't need locks here as we're doing a read and this should 132898989Salfred * be a natural race. 132998989Salfred */ 133052983Speterstatic int 1331101983Srwatsonpipe_stat(fp, ub, active_cred, td) 133252983Speter struct file *fp; 133352983Speter struct stat *ub; 1334101983Srwatson struct ucred *active_cred; 133583366Sjulian struct thread *td; 133613675Sdyson{ 1337109153Sdillon struct pipe *pipe = fp->f_data; 1338101768Srwatson#ifdef MAC 1339101768Srwatson int error; 134052983Speter 1341104269Srwatson PIPE_LOCK(pipe); 1342102115Srwatson error = mac_check_pipe_stat(active_cred, pipe); 1343104269Srwatson PIPE_UNLOCK(pipe); 1344101768Srwatson if (error) 1345101768Srwatson return (error); 1346101768Srwatson#endif 1347100527Salfred bzero(ub, sizeof(*ub)); 134817124Sbde ub->st_mode = S_IFIFO; 134913907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 135013675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 135113675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 135234901Sphk ub->st_atimespec = pipe->pipe_atime; 135334901Sphk ub->st_mtimespec = pipe->pipe_mtime; 135434901Sphk ub->st_ctimespec = pipe->pipe_ctime; 135560404Schris ub->st_uid = fp->f_cred->cr_uid; 135660404Schris ub->st_gid = fp->f_cred->cr_gid; 135717124Sbde /* 135860404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 135917124Sbde * XXX (st_dev, st_ino) should be unique. 136017124Sbde */ 136176760Salfred return (0); 136213675Sdyson} 136313675Sdyson 136413675Sdyson/* ARGSUSED */ 136513675Sdysonstatic int 136683366Sjulianpipe_close(fp, td) 136713675Sdyson struct file *fp; 136883366Sjulian struct thread *td; 136913675Sdyson{ 1370109153Sdillon struct pipe *cpipe = fp->f_data; 137116322Sgpalmer 137249413Sgreen fp->f_ops = &badfileops; 1373109153Sdillon fp->f_data = NULL; 137496122Salfred funsetown(&cpipe->pipe_sigio); 137513675Sdyson pipeclose(cpipe); 137676760Salfred return (0); 137713675Sdyson} 137813675Sdyson 137976364Salfredstatic void 138076364Salfredpipe_free_kmem(cpipe) 138176364Salfred struct pipe *cpipe; 138276364Salfred{ 138391412Salfred 138491412Salfred KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 138591412Salfred ("pipespace: pipe mutex locked")); 138676364Salfred 138776364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 138876364Salfred if (cpipe->pipe_buffer.size > PIPE_SIZE) 1389117364Ssilby atomic_subtract_int(&nbigpipe, 1); 1390110816Salc atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size); 1391117325Ssilby atomic_subtract_int(&amountpipes, 1); 1392118764Ssilby vm_map_remove(pipe_map, 1393118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer, 1394118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 139576364Salfred cpipe->pipe_buffer.buffer = NULL; 139676364Salfred } 139776364Salfred#ifndef PIPE_NODIRECT 1398102241Sarchie if (cpipe->pipe_map.kva != 0) { 1399118764Ssilby atomic_subtract_int(&amountpipekvawired, 1400110816Salc cpipe->pipe_buffer.size + PAGE_SIZE); 140176364Salfred kmem_free(kernel_map, 140276364Salfred cpipe->pipe_map.kva, 140376364Salfred cpipe->pipe_buffer.size + PAGE_SIZE); 140476364Salfred cpipe->pipe_map.cnt = 0; 140576364Salfred cpipe->pipe_map.kva = 0; 140676364Salfred cpipe->pipe_map.pos = 0; 140776364Salfred cpipe->pipe_map.npages = 0; 140876364Salfred } 140976364Salfred#endif 141076364Salfred} 141176364Salfred 141213675Sdyson/* 141313675Sdyson * shutdown the pipe 141413675Sdyson */ 141513675Sdysonstatic void 141613675Sdysonpipeclose(cpipe) 141713675Sdyson struct pipe *cpipe; 141813675Sdyson{ 141913907Sdyson struct pipe *ppipe; 142091968Salfred int hadpeer; 142176364Salfred 142291968Salfred if (cpipe == NULL) 142391968Salfred return; 142491968Salfred 142591968Salfred hadpeer = 0; 142691968Salfred 142791968Salfred /* partially created pipes won't have a valid mutex. */ 142891968Salfred if (PIPE_MTX(cpipe) != NULL) 142991362Salfred PIPE_LOCK(cpipe); 143013907Sdyson 143191968Salfred pipeselwakeup(cpipe); 143213907Sdyson 143391968Salfred /* 143491968Salfred * If the other side is blocked, wake it up saying that 143591968Salfred * we want to close it down. 143691968Salfred */ 143791968Salfred while (cpipe->pipe_busy) { 143891968Salfred wakeup(cpipe); 143991968Salfred cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 144091968Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 144191968Salfred } 144213675Sdyson 1443101768Srwatson#ifdef MAC 1444101768Srwatson if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) 1445101768Srwatson mac_destroy_pipe(cpipe); 1446101768Srwatson#endif 1447101768Srwatson 144891968Salfred /* 144991968Salfred * Disconnect from peer 145091968Salfred */ 145191968Salfred if ((ppipe = cpipe->pipe_peer) != NULL) { 145291968Salfred hadpeer++; 145391968Salfred pipeselwakeup(ppipe); 145413907Sdyson 145591968Salfred ppipe->pipe_state |= PIPE_EOF; 145691968Salfred wakeup(ppipe); 145791968Salfred KNOTE(&ppipe->pipe_sel.si_note, 0); 145891968Salfred ppipe->pipe_peer = NULL; 145991968Salfred } 146091968Salfred /* 146191968Salfred * free resources 146291968Salfred */ 146391968Salfred if (PIPE_MTX(cpipe) != NULL) { 146491968Salfred PIPE_UNLOCK(cpipe); 146591968Salfred if (!hadpeer) { 146691968Salfred mtx_destroy(PIPE_MTX(cpipe)); 146791968Salfred free(PIPE_MTX(cpipe), M_TEMP); 146813675Sdyson } 146913675Sdyson } 147091968Salfred pipe_free_kmem(cpipe); 147192751Sjeff uma_zfree(pipe_zone, cpipe); 147213675Sdyson} 147359288Sjlemon 147472521Sjlemon/*ARGSUSED*/ 147559288Sjlemonstatic int 147672521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 147759288Sjlemon{ 147889306Salfred struct pipe *cpipe; 147959288Sjlemon 1480109153Sdillon cpipe = kn->kn_fp->f_data; 148172521Sjlemon switch (kn->kn_filter) { 148272521Sjlemon case EVFILT_READ: 148372521Sjlemon kn->kn_fop = &pipe_rfiltops; 148472521Sjlemon break; 148572521Sjlemon case EVFILT_WRITE: 148672521Sjlemon kn->kn_fop = &pipe_wfiltops; 148778292Sjlemon cpipe = cpipe->pipe_peer; 1488101382Sdes if (cpipe == NULL) 1489101382Sdes /* other end of pipe has been closed */ 1490118929Sjmg return (EPIPE); 149172521Sjlemon break; 149272521Sjlemon default: 149372521Sjlemon return (1); 149472521Sjlemon } 1495100527Salfred kn->kn_hook = cpipe; 149678292Sjlemon 149791372Salfred PIPE_LOCK(cpipe); 149878292Sjlemon SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 149991372Salfred PIPE_UNLOCK(cpipe); 150059288Sjlemon return (0); 150159288Sjlemon} 150259288Sjlemon 150359288Sjlemonstatic void 150459288Sjlemonfilt_pipedetach(struct knote *kn) 150559288Sjlemon{ 150678292Sjlemon struct pipe *cpipe = (struct pipe *)kn->kn_hook; 150759288Sjlemon 150891372Salfred PIPE_LOCK(cpipe); 150978292Sjlemon SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 151091372Salfred PIPE_UNLOCK(cpipe); 151159288Sjlemon} 151259288Sjlemon 151359288Sjlemon/*ARGSUSED*/ 151459288Sjlemonstatic int 151559288Sjlemonfilt_piperead(struct knote *kn, long hint) 151659288Sjlemon{ 1517109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 151859288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 151959288Sjlemon 152091372Salfred PIPE_LOCK(rpipe); 152159288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 152259288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 152359288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 152459288Sjlemon 152559288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 152659288Sjlemon (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 152791372Salfred kn->kn_flags |= EV_EOF; 152891372Salfred PIPE_UNLOCK(rpipe); 152959288Sjlemon return (1); 153059288Sjlemon } 153191372Salfred PIPE_UNLOCK(rpipe); 153259288Sjlemon return (kn->kn_data > 0); 153359288Sjlemon} 153459288Sjlemon 153559288Sjlemon/*ARGSUSED*/ 153659288Sjlemonstatic int 153759288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 153859288Sjlemon{ 1539109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 154059288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 154159288Sjlemon 154291372Salfred PIPE_LOCK(rpipe); 154359288Sjlemon if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 154459288Sjlemon kn->kn_data = 0; 154559288Sjlemon kn->kn_flags |= EV_EOF; 154691372Salfred PIPE_UNLOCK(rpipe); 154759288Sjlemon return (1); 154859288Sjlemon } 154959288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 155065855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 155159288Sjlemon kn->kn_data = 0; 155259288Sjlemon 155391372Salfred PIPE_UNLOCK(rpipe); 155459288Sjlemon return (kn->kn_data >= PIPE_BUF); 155559288Sjlemon} 1556