sys_pipe.c revision 119811
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson */ 1913675Sdyson 2013675Sdyson/* 2113675Sdyson * This file contains a high-performance replacement for the socket-based 2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2313675Sdyson * all features of sockets, but does do everything that pipes normally 2413675Sdyson * do. 2513675Sdyson */ 2613675Sdyson 2713907Sdyson/* 2813907Sdyson * This code has two modes of operation, a small write mode and a large 2913907Sdyson * write mode. The small write mode acts like conventional pipes with 3013907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3113907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3213907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3313907Sdyson * the receiving process can copy it directly from the pages in the sending 3413907Sdyson * process. 3513907Sdyson * 3613907Sdyson * If the sending process receives a signal, it is possible that it will 3713913Sdyson * go away, and certainly its address space can change, because control 3813907Sdyson * is returned back to the user-mode side. In that case, the pipe code 3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4013907Sdyson * kernel buffer, and the receiving process will grab the data from the 4113907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4213907Sdyson * the copy operation is normally eliminated. 4313907Sdyson * 4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4513907Sdyson * happen for small transfers so that the system will not spend all of 46118764Ssilby * its time context switching. 47117325Ssilby * 48118764Ssilby * In order to limit the resource use of pipes, two sysctls exist: 49117325Ssilby * 50118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 51118764Ssilby * address space available to us in pipe_map. Whenever the amount in use 52118764Ssilby * exceeds half of this value, all new pipes will be created with size 53118764Ssilby * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited 54118764Ssilby * as well. This value is loader tunable only. 55117325Ssilby * 56117325Ssilby * kern.ipc.maxpipekvawired - This value limits the amount of memory that may 57117325Ssilby * be wired in order to facilitate direct copies using page flipping. 58117325Ssilby * Whenever this value is exceeded, pipes will fall back to using regular 59118764Ssilby * copies. This value is sysctl controllable at all times. 60117325Ssilby * 61117325Ssilby * These values are autotuned in subr_param.c. 62117325Ssilby * 63117325Ssilby * Memory usage may be monitored through the sysctls 64117325Ssilby * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired. 65117325Ssilby * 6613907Sdyson */ 6713907Sdyson 68116182Sobrien#include <sys/cdefs.h> 69116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 119811 2003-09-06 21:02:10Z alc $"); 70116182Sobrien 71101768Srwatson#include "opt_mac.h" 72101768Srwatson 7313675Sdyson#include <sys/param.h> 7413675Sdyson#include <sys/systm.h> 7524131Sbde#include <sys/fcntl.h> 7613675Sdyson#include <sys/file.h> 7713675Sdyson#include <sys/filedesc.h> 7824206Sbde#include <sys/filio.h> 7991372Salfred#include <sys/kernel.h> 8076166Smarkm#include <sys/lock.h> 81101768Srwatson#include <sys/mac.h> 8276827Salfred#include <sys/mutex.h> 8324206Sbde#include <sys/ttycom.h> 8413675Sdyson#include <sys/stat.h> 8591968Salfred#include <sys/malloc.h> 8629356Speter#include <sys/poll.h> 8770834Swollman#include <sys/selinfo.h> 8813675Sdyson#include <sys/signalvar.h> 89117325Ssilby#include <sys/sysctl.h> 9013675Sdyson#include <sys/sysproto.h> 9113675Sdyson#include <sys/pipe.h> 9276166Smarkm#include <sys/proc.h> 9355112Sbde#include <sys/vnode.h> 9434924Sbde#include <sys/uio.h> 9559288Sjlemon#include <sys/event.h> 9613675Sdyson 9713675Sdyson#include <vm/vm.h> 9813675Sdyson#include <vm/vm_param.h> 9913675Sdyson#include <vm/vm_object.h> 10013675Sdyson#include <vm/vm_kern.h> 10113675Sdyson#include <vm/vm_extern.h> 10213675Sdyson#include <vm/pmap.h> 10313675Sdyson#include <vm/vm_map.h> 10413907Sdyson#include <vm/vm_page.h> 10592751Sjeff#include <vm/uma.h> 10613675Sdyson 10714037Sdyson/* 10814037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 10914037Sdyson * approx 30% decrease in transfer rate. This could be useful for 11014037Sdyson * NetBSD or OpenBSD. 11114037Sdyson */ 11214037Sdyson/* #define PIPE_NODIRECT */ 11314037Sdyson 11414037Sdyson/* 11514037Sdyson * interfaces to the outside world 11614037Sdyson */ 117108255Sphkstatic fo_rdwr_t pipe_read; 118108255Sphkstatic fo_rdwr_t pipe_write; 119108255Sphkstatic fo_ioctl_t pipe_ioctl; 120108255Sphkstatic fo_poll_t pipe_poll; 121108255Sphkstatic fo_kqfilter_t pipe_kqfilter; 122108255Sphkstatic fo_stat_t pipe_stat; 123108255Sphkstatic fo_close_t pipe_close; 12413675Sdyson 12572521Sjlemonstatic struct fileops pipeops = { 126116546Sphk .fo_read = pipe_read, 127116546Sphk .fo_write = pipe_write, 128116546Sphk .fo_ioctl = pipe_ioctl, 129116546Sphk .fo_poll = pipe_poll, 130116546Sphk .fo_kqfilter = pipe_kqfilter, 131116546Sphk .fo_stat = pipe_stat, 132116546Sphk .fo_close = pipe_close, 133116546Sphk .fo_flags = DFLAG_PASSABLE 13472521Sjlemon}; 13513675Sdyson 13659288Sjlemonstatic void filt_pipedetach(struct knote *kn); 13759288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 13859288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 13959288Sjlemon 14072521Sjlemonstatic struct filterops pipe_rfiltops = 14172521Sjlemon { 1, NULL, filt_pipedetach, filt_piperead }; 14272521Sjlemonstatic struct filterops pipe_wfiltops = 14372521Sjlemon { 1, NULL, filt_pipedetach, filt_pipewrite }; 14459288Sjlemon 14592305Salfred#define PIPE_GET_GIANT(pipe) \ 14691362Salfred do { \ 14792305Salfred KASSERT(((pipe)->pipe_state & PIPE_LOCKFL) != 0, \ 14892305Salfred ("%s:%d PIPE_GET_GIANT: line pipe not locked", \ 14992305Salfred __FILE__, __LINE__)); \ 15092305Salfred PIPE_UNLOCK(pipe); \ 15191362Salfred mtx_lock(&Giant); \ 15291362Salfred } while (0) 15372521Sjlemon 15491362Salfred#define PIPE_DROP_GIANT(pipe) \ 15591362Salfred do { \ 15691362Salfred mtx_unlock(&Giant); \ 15792305Salfred PIPE_LOCK(pipe); \ 15891362Salfred } while (0) 15991362Salfred 16013675Sdyson/* 16113675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 16213675Sdyson * space is pageable. The pipe code will try to maintain locality of 16313675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 16413675Sdyson * will not wipe the cache. 16513675Sdyson */ 16613907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 16713907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 16813675Sdyson 16913907Sdyson/* 17017163Sdyson * Limit the number of "big" pipes 17117163Sdyson */ 17217163Sdyson#define LIMITBIGPIPES 32 17333181Seivindstatic int nbigpipe; 17417163Sdyson 175117325Ssilbystatic int amountpipes; 17617124Sbdestatic int amountpipekva; 177117325Ssilbystatic int amountpipekvawired; 17813907Sdyson 179117325SsilbySYSCTL_DECL(_kern_ipc); 180117325Ssilby 181118764SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD, 182117325Ssilby &maxpipekva, 0, "Pipe KVA limit"); 183117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW, 184117325Ssilby &maxpipekvawired, 0, "Pipe KVA wired limit"); 185117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD, 186117364Ssilby &amountpipes, 0, "Current # of pipes"); 187117364SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD, 188117364Ssilby &nbigpipe, 0, "Current # of big pipes"); 189117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 190117325Ssilby &amountpipekva, 0, "Pipe KVA usage"); 191117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD, 192117325Ssilby &amountpipekvawired, 0, "Pipe wired KVA usage"); 193117325Ssilby 19491413Salfredstatic void pipeinit(void *dummy __unused); 19591413Salfredstatic void pipeclose(struct pipe *cpipe); 19691413Salfredstatic void pipe_free_kmem(struct pipe *cpipe); 19791413Salfredstatic int pipe_create(struct pipe **cpipep); 19891413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch); 19991413Salfredstatic __inline void pipeunlock(struct pipe *cpipe); 20091413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe); 20114037Sdyson#ifndef PIPE_NODIRECT 20291413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 20391413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 20491413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 20591413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe); 20614037Sdyson#endif 20791413Salfredstatic int pipespace(struct pipe *cpipe, int size); 20813675Sdyson 20992751Sjeffstatic uma_zone_t pipe_zone; 21027899Sdyson 21191372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 21291372Salfred 21391372Salfredstatic void 21491372Salfredpipeinit(void *dummy __unused) 21591372Salfred{ 216118880Salc 21792654Sjeff pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL, 21892654Sjeff NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 219118880Salc KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 22091372Salfred} 22191372Salfred 22213675Sdyson/* 22313675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 22413675Sdyson */ 22513675Sdyson 22613675Sdyson/* ARGSUSED */ 22713675Sdysonint 22883366Sjulianpipe(td, uap) 22983366Sjulian struct thread *td; 23013675Sdyson struct pipe_args /* { 23113675Sdyson int dummy; 23213675Sdyson } */ *uap; 23313675Sdyson{ 23483366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 23513675Sdyson struct file *rf, *wf; 23613675Sdyson struct pipe *rpipe, *wpipe; 23791968Salfred struct mtx *pmtx; 23813675Sdyson int fd, error; 23927899Sdyson 240111119Simp pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO); 24191968Salfred 24276756Salfred rpipe = wpipe = NULL; 24376364Salfred if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 24476364Salfred pipeclose(rpipe); 24576364Salfred pipeclose(wpipe); 24691968Salfred free(pmtx, M_TEMP); 24776364Salfred return (ENFILE); 24876364Salfred } 24976364Salfred 25013907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 25113907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 25213675Sdyson 25383366Sjulian error = falloc(td, &rf, &fd); 25470915Sdwmalone if (error) { 25570915Sdwmalone pipeclose(rpipe); 25670915Sdwmalone pipeclose(wpipe); 25791968Salfred free(pmtx, M_TEMP); 25870915Sdwmalone return (error); 25970915Sdwmalone } 26070915Sdwmalone fhold(rf); 26183366Sjulian td->td_retval[0] = fd; 26270915Sdwmalone 26370803Sdwmalone /* 26470803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 26570803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 26670803Sdwmalone * to avoid races against processes which manage to dup() the read 26770803Sdwmalone * side while we are blocked trying to allocate the write side. 26870803Sdwmalone */ 26989306Salfred FILE_LOCK(rf); 27013675Sdyson rf->f_flag = FREAD | FWRITE; 27113675Sdyson rf->f_type = DTYPE_PIPE; 272109153Sdillon rf->f_data = rpipe; 27313675Sdyson rf->f_ops = &pipeops; 27489306Salfred FILE_UNLOCK(rf); 27583366Sjulian error = falloc(td, &wf, &fd); 27670915Sdwmalone if (error) { 27789306Salfred FILEDESC_LOCK(fdp); 27883366Sjulian if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 27983366Sjulian fdp->fd_ofiles[td->td_retval[0]] = NULL; 28089306Salfred FILEDESC_UNLOCK(fdp); 28183366Sjulian fdrop(rf, td); 28289306Salfred } else 28389306Salfred FILEDESC_UNLOCK(fdp); 28483366Sjulian fdrop(rf, td); 28570915Sdwmalone /* rpipe has been closed by fdrop(). */ 28670915Sdwmalone pipeclose(wpipe); 28791968Salfred free(pmtx, M_TEMP); 28870915Sdwmalone return (error); 28970915Sdwmalone } 29089306Salfred FILE_LOCK(wf); 29113675Sdyson wf->f_flag = FREAD | FWRITE; 29213675Sdyson wf->f_type = DTYPE_PIPE; 293109153Sdillon wf->f_data = wpipe; 29413675Sdyson wf->f_ops = &pipeops; 29589306Salfred FILE_UNLOCK(wf); 29683366Sjulian td->td_retval[1] = fd; 29713675Sdyson rpipe->pipe_peer = wpipe; 29813675Sdyson wpipe->pipe_peer = rpipe; 299101768Srwatson#ifdef MAC 300101768Srwatson /* 301101768Srwatson * struct pipe represents a pipe endpoint. The MAC label is shared 302101768Srwatson * between the connected endpoints. As a result mac_init_pipe() and 303101768Srwatson * mac_create_pipe() should only be called on one of the endpoints 304101768Srwatson * after they have been connected. 305101768Srwatson */ 306101768Srwatson mac_init_pipe(rpipe); 307101768Srwatson mac_create_pipe(td->td_ucred, rpipe); 308101768Srwatson#endif 30993818Sjhb mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 31091968Salfred rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 31183366Sjulian fdrop(rf, td); 31213675Sdyson 31313675Sdyson return (0); 31413675Sdyson} 31513675Sdyson 31613909Sdyson/* 31713909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 31876364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 31976364Salfred * it will retain the old buffer. 32076364Salfred * If it fails it will return ENOMEM. 32113909Sdyson */ 32276364Salfredstatic int 32376364Salfredpipespace(cpipe, size) 32413675Sdyson struct pipe *cpipe; 32576364Salfred int size; 32613675Sdyson{ 32776364Salfred struct vm_object *object; 32876364Salfred caddr_t buffer; 32913688Sdyson int npages, error; 330117325Ssilby static int curfail = 0; 331117325Ssilby static struct timeval lastfail; 33213675Sdyson 33391412Salfred KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 33491412Salfred ("pipespace: pipe mutex locked")); 33579224Sdillon 336118764Ssilby size = round_page(size); 337118764Ssilby npages = size / PAGE_SIZE; 33813675Sdyson /* 33913675Sdyson * Create an object, I don't like the idea of paging to/from 34013675Sdyson * kernel_object. 34114037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 34213675Sdyson */ 34376364Salfred object = vm_object_allocate(OBJT_DEFAULT, npages); 344118764Ssilby buffer = (caddr_t) vm_map_min(pipe_map); 34513675Sdyson 34613675Sdyson /* 34713675Sdyson * Insert the object into the kernel map, and allocate kva for it. 34813675Sdyson * The map entry is, by default, pageable. 34914037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 35013675Sdyson */ 351118764Ssilby error = vm_map_find(pipe_map, object, 0, 35276364Salfred (vm_offset_t *) &buffer, size, 1, 35313688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 35413675Sdyson 35576364Salfred if (error != KERN_SUCCESS) { 35676364Salfred vm_object_deallocate(object); 357118764Ssilby if (ppsratecheck(&lastfail, &curfail, 1)) 358118764Ssilby printf("kern.maxpipekva exceeded, please see tuning(7).\n"); 35976364Salfred return (ENOMEM); 36076364Salfred } 36176364Salfred 36276364Salfred /* free old resources if we're resizing */ 36376364Salfred pipe_free_kmem(cpipe); 36476364Salfred cpipe->pipe_buffer.buffer = buffer; 36576364Salfred cpipe->pipe_buffer.size = size; 36676364Salfred cpipe->pipe_buffer.in = 0; 36776364Salfred cpipe->pipe_buffer.out = 0; 36876364Salfred cpipe->pipe_buffer.cnt = 0; 369117325Ssilby atomic_add_int(&amountpipes, 1); 370110816Salc atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 37176364Salfred return (0); 37213907Sdyson} 37313688Sdyson 37413907Sdyson/* 37513907Sdyson * initialize and allocate VM and memory for pipe 37613907Sdyson */ 37776364Salfredstatic int 37876364Salfredpipe_create(cpipep) 37976364Salfred struct pipe **cpipep; 38076364Salfred{ 38113907Sdyson struct pipe *cpipe; 38276364Salfred int error; 38313907Sdyson 384111119Simp *cpipep = uma_zalloc(pipe_zone, M_WAITOK); 38576364Salfred if (*cpipep == NULL) 38676364Salfred return (ENOMEM); 38717163Sdyson 38876364Salfred cpipe = *cpipep; 38976364Salfred 39076364Salfred /* 39176364Salfred * protect so pipeclose() doesn't follow a junk pointer 39276364Salfred * if pipespace() fails. 39376364Salfred */ 39476754Salfred bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 39513675Sdyson cpipe->pipe_state = 0; 39613675Sdyson cpipe->pipe_peer = NULL; 39713675Sdyson cpipe->pipe_busy = 0; 39813907Sdyson 39914037Sdyson#ifndef PIPE_NODIRECT 40013907Sdyson /* 40113907Sdyson * pipe data structure initializations to support direct pipe I/O 40213907Sdyson */ 40313907Sdyson cpipe->pipe_map.cnt = 0; 40413907Sdyson cpipe->pipe_map.kva = 0; 40513907Sdyson cpipe->pipe_map.pos = 0; 40613907Sdyson cpipe->pipe_map.npages = 0; 40717124Sbde /* cpipe->pipe_map.ms[] = invalid */ 40814037Sdyson#endif 40976364Salfred 41091412Salfred cpipe->pipe_mtxp = NULL; /* avoid pipespace assertion */ 411117325Ssilby /* 412117325Ssilby * Reduce to 1/4th pipe size if we're over our global max. 413117325Ssilby */ 414118764Ssilby if (amountpipekva > maxpipekva / 2) 415117325Ssilby error = pipespace(cpipe, SMALL_PIPE_SIZE); 416117325Ssilby else 417117325Ssilby error = pipespace(cpipe, PIPE_SIZE); 41876760Salfred if (error) 41976364Salfred return (error); 42076364Salfred 42176364Salfred vfs_timestamp(&cpipe->pipe_ctime); 42276364Salfred cpipe->pipe_atime = cpipe->pipe_ctime; 42376364Salfred cpipe->pipe_mtime = cpipe->pipe_ctime; 42476364Salfred 42576364Salfred return (0); 42613675Sdyson} 42713675Sdyson 42813675Sdyson 42913675Sdyson/* 43013675Sdyson * lock a pipe for I/O, blocking other access 43113675Sdyson */ 43213675Sdysonstatic __inline int 43313907Sdysonpipelock(cpipe, catch) 43413675Sdyson struct pipe *cpipe; 43513907Sdyson int catch; 43613675Sdyson{ 43713776Sdyson int error; 43876364Salfred 43991362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 44091362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 44113675Sdyson cpipe->pipe_state |= PIPE_LWANT; 44291362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 44391362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 44476760Salfred "pipelk", 0); 44576760Salfred if (error != 0) 44676760Salfred return (error); 44713675Sdyson } 44891362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 44976760Salfred return (0); 45013675Sdyson} 45113675Sdyson 45213675Sdyson/* 45313675Sdyson * unlock a pipe I/O lock 45413675Sdyson */ 45513675Sdysonstatic __inline void 45613675Sdysonpipeunlock(cpipe) 45713675Sdyson struct pipe *cpipe; 45813675Sdyson{ 45976364Salfred 46091362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 46191362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 46213675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 46313675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 46414177Sdyson wakeup(cpipe); 46513675Sdyson } 46613675Sdyson} 46713675Sdyson 46814037Sdysonstatic __inline void 46914037Sdysonpipeselwakeup(cpipe) 47014037Sdyson struct pipe *cpipe; 47114037Sdyson{ 47276364Salfred 47314037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 47414037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 47514037Sdyson selwakeup(&cpipe->pipe_sel); 47614037Sdyson } 47741086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 47895883Salfred pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 47959288Sjlemon KNOTE(&cpipe->pipe_sel.si_note, 0); 48014037Sdyson} 48114037Sdyson 48213675Sdyson/* ARGSUSED */ 48313675Sdysonstatic int 484101941Srwatsonpipe_read(fp, uio, active_cred, flags, td) 48513675Sdyson struct file *fp; 48613675Sdyson struct uio *uio; 487101941Srwatson struct ucred *active_cred; 48883366Sjulian struct thread *td; 48945311Sdt int flags; 49013675Sdyson{ 491109153Sdillon struct pipe *rpipe = fp->f_data; 49247748Salc int error; 49313675Sdyson int nread = 0; 49418863Sdyson u_int size; 49513675Sdyson 49691362Salfred PIPE_LOCK(rpipe); 49713675Sdyson ++rpipe->pipe_busy; 49847748Salc error = pipelock(rpipe, 1); 49947748Salc if (error) 50047748Salc goto unlocked_error; 50147748Salc 502101768Srwatson#ifdef MAC 503102115Srwatson error = mac_check_pipe_read(active_cred, rpipe); 504101768Srwatson if (error) 505101768Srwatson goto locked_error; 506101768Srwatson#endif 507101768Srwatson 50813675Sdyson while (uio->uio_resid) { 50913907Sdyson /* 51013907Sdyson * normal pipe buffer receive 51113907Sdyson */ 51213675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 51318863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 51413675Sdyson if (size > rpipe->pipe_buffer.cnt) 51513675Sdyson size = rpipe->pipe_buffer.cnt; 51618863Sdyson if (size > (u_int) uio->uio_resid) 51718863Sdyson size = (u_int) uio->uio_resid; 51847748Salc 51991362Salfred PIPE_UNLOCK(rpipe); 520116127Smux error = uiomove( 521116127Smux &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 522116127Smux size, uio); 52391362Salfred PIPE_LOCK(rpipe); 52476760Salfred if (error) 52513675Sdyson break; 52676760Salfred 52713675Sdyson rpipe->pipe_buffer.out += size; 52813675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 52913675Sdyson rpipe->pipe_buffer.out = 0; 53013675Sdyson 53113675Sdyson rpipe->pipe_buffer.cnt -= size; 53247748Salc 53347748Salc /* 53447748Salc * If there is no more to read in the pipe, reset 53547748Salc * its pointers to the beginning. This improves 53647748Salc * cache hit stats. 53747748Salc */ 53847748Salc if (rpipe->pipe_buffer.cnt == 0) { 53947748Salc rpipe->pipe_buffer.in = 0; 54047748Salc rpipe->pipe_buffer.out = 0; 54147748Salc } 54213675Sdyson nread += size; 54314037Sdyson#ifndef PIPE_NODIRECT 54413907Sdyson /* 54513907Sdyson * Direct copy, bypassing a kernel buffer. 54613907Sdyson */ 54713907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 54847748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 54947748Salc caddr_t va; 55018863Sdyson if (size > (u_int) uio->uio_resid) 55118863Sdyson size = (u_int) uio->uio_resid; 55247748Salc 55376760Salfred va = (caddr_t) rpipe->pipe_map.kva + 55476760Salfred rpipe->pipe_map.pos; 55591362Salfred PIPE_UNLOCK(rpipe); 55647748Salc error = uiomove(va, size, uio); 55791362Salfred PIPE_LOCK(rpipe); 55813907Sdyson if (error) 55913907Sdyson break; 56013907Sdyson nread += size; 56113907Sdyson rpipe->pipe_map.pos += size; 56213907Sdyson rpipe->pipe_map.cnt -= size; 56313907Sdyson if (rpipe->pipe_map.cnt == 0) { 56413907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 56513907Sdyson wakeup(rpipe); 56613907Sdyson } 56714037Sdyson#endif 56813675Sdyson } else { 56913675Sdyson /* 57013675Sdyson * detect EOF condition 57176760Salfred * read returns 0 on EOF, no need to set error 57213675Sdyson */ 57376760Salfred if (rpipe->pipe_state & PIPE_EOF) 57413675Sdyson break; 57543623Sdillon 57613675Sdyson /* 57713675Sdyson * If the "write-side" has been blocked, wake it up now. 57813675Sdyson */ 57913675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 58013675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 58113675Sdyson wakeup(rpipe); 58213675Sdyson } 58343623Sdillon 58443623Sdillon /* 58547748Salc * Break if some data was read. 58643623Sdillon */ 58747748Salc if (nread > 0) 58813675Sdyson break; 58916960Sdyson 59043623Sdillon /* 591116127Smux * Unlock the pipe buffer for our remaining processing. 592116127Smux * We will either break out with an error or we will 593116127Smux * sleep and relock to loop. 59443623Sdillon */ 59547748Salc pipeunlock(rpipe); 59643623Sdillon 59713675Sdyson /* 59847748Salc * Handle non-blocking mode operation or 59947748Salc * wait for more data. 60013675Sdyson */ 60176760Salfred if (fp->f_flag & FNONBLOCK) { 60247748Salc error = EAGAIN; 60376760Salfred } else { 60447748Salc rpipe->pipe_state |= PIPE_WANTR; 60591362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 60691362Salfred PRIBIO | PCATCH, 60777140Salfred "piperd", 0)) == 0) 60847748Salc error = pipelock(rpipe, 1); 60913675Sdyson } 61047748Salc if (error) 61147748Salc goto unlocked_error; 61213675Sdyson } 61313675Sdyson } 614101768Srwatson#ifdef MAC 615101768Srwatsonlocked_error: 616101768Srwatson#endif 61747748Salc pipeunlock(rpipe); 61813675Sdyson 61991362Salfred /* XXX: should probably do this before getting any locks. */ 62024101Sbde if (error == 0) 62155112Sbde vfs_timestamp(&rpipe->pipe_atime); 62247748Salcunlocked_error: 62347748Salc --rpipe->pipe_busy; 62413913Sdyson 62547748Salc /* 62647748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 62747748Salc */ 62813675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 62913675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 63013675Sdyson wakeup(rpipe); 63113675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 63213675Sdyson /* 63347748Salc * Handle write blocking hysteresis. 63413675Sdyson */ 63513675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 63613675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 63713675Sdyson wakeup(rpipe); 63813675Sdyson } 63913675Sdyson } 64014037Sdyson 64114802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 64214037Sdyson pipeselwakeup(rpipe); 64314037Sdyson 64491362Salfred PIPE_UNLOCK(rpipe); 64576760Salfred return (error); 64613675Sdyson} 64713675Sdyson 64814037Sdyson#ifndef PIPE_NODIRECT 64913907Sdyson/* 65013907Sdyson * Map the sending processes' buffer into kernel space and wire it. 65113907Sdyson * This is similar to a physical write operation. 65213907Sdyson */ 65313675Sdysonstatic int 65413907Sdysonpipe_build_write_buffer(wpipe, uio) 65513907Sdyson struct pipe *wpipe; 65613675Sdyson struct uio *uio; 65713675Sdyson{ 65818863Sdyson u_int size; 65994566Stmm int i; 660112569Sjake vm_offset_t addr, endaddr; 661112569Sjake vm_paddr_t paddr; 66213907Sdyson 66379224Sdillon GIANT_REQUIRED; 66491412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 66579224Sdillon 66618863Sdyson size = (u_int) uio->uio_iov->iov_len; 66713907Sdyson if (size > wpipe->pipe_buffer.size) 66813907Sdyson size = wpipe->pipe_buffer.size; 66913907Sdyson 67040286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 67176760Salfred addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 67276760Salfred for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 67394566Stmm vm_page_t m; 67494566Stmm 67599899Salc /* 67699899Salc * vm_fault_quick() can sleep. Consequently, 67799899Salc * vm_page_lock_queue() and vm_page_unlock_queue() 67899899Salc * should not be performed outside of this loop. 67999899Salc */ 68051474Sdillon if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 68194608Stmm (paddr = pmap_extract(vmspace_pmap(curproc->p_vmspace), 68294608Stmm addr)) == 0) { 68313907Sdyson int j; 68476760Salfred 68599899Salc vm_page_lock_queues(); 686117325Ssilby for (j = 0; j < i; j++) { 687118757Salc vm_page_unhold(wpipe->pipe_map.ms[j]); 688117325Ssilby } 68999899Salc vm_page_unlock_queues(); 69076760Salfred return (EFAULT); 69113907Sdyson } 69213907Sdyson 69394566Stmm m = PHYS_TO_VM_PAGE(paddr); 69499899Salc vm_page_lock_queues(); 695118757Salc vm_page_hold(m); 69699899Salc vm_page_unlock_queues(); 69713907Sdyson wpipe->pipe_map.ms[i] = m; 69813907Sdyson } 69913907Sdyson 70013907Sdyson/* 70113907Sdyson * set up the control block 70213907Sdyson */ 70313907Sdyson wpipe->pipe_map.npages = i; 70476760Salfred wpipe->pipe_map.pos = 70576760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 70613907Sdyson wpipe->pipe_map.cnt = size; 70713907Sdyson 70813907Sdyson/* 70913907Sdyson * and map the buffer 71013907Sdyson */ 71113907Sdyson if (wpipe->pipe_map.kva == 0) { 71213912Sdyson /* 71313912Sdyson * We need to allocate space for an extra page because the 71413912Sdyson * address range might (will) span pages at times. 71513912Sdyson */ 716118220Salc wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map, 71713912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 718118764Ssilby atomic_add_int(&amountpipekvawired, 719110816Salc wpipe->pipe_buffer.size + PAGE_SIZE); 72013907Sdyson } 72113907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 72213907Sdyson wpipe->pipe_map.npages); 72313907Sdyson 72413907Sdyson/* 72513907Sdyson * and update the uio data 72613907Sdyson */ 72713907Sdyson 72813907Sdyson uio->uio_iov->iov_len -= size; 729104908Smike uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 73013907Sdyson if (uio->uio_iov->iov_len == 0) 73113907Sdyson uio->uio_iov++; 73213907Sdyson uio->uio_resid -= size; 73313907Sdyson uio->uio_offset += size; 73476760Salfred return (0); 73513907Sdyson} 73613907Sdyson 73713907Sdyson/* 73813907Sdyson * unmap and unwire the process buffer 73913907Sdyson */ 74013907Sdysonstatic void 74113907Sdysonpipe_destroy_write_buffer(wpipe) 74276760Salfred struct pipe *wpipe; 74313907Sdyson{ 74413907Sdyson int i; 74576364Salfred 74691412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 74717163Sdyson if (wpipe->pipe_map.kva) { 74817163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 74913907Sdyson 750118764Ssilby if (amountpipekvawired > maxpipekvawired / 2) { 751118764Ssilby /* Conserve address space */ 75213907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 75313907Sdyson wpipe->pipe_map.kva = 0; 75413907Sdyson kmem_free(kernel_map, kva, 755119811Salc wpipe->pipe_buffer.size + PAGE_SIZE); 756118764Ssilby atomic_subtract_int(&amountpipekvawired, 757110816Salc wpipe->pipe_buffer.size + PAGE_SIZE); 75813907Sdyson } 75913907Sdyson } 76099899Salc vm_page_lock_queues(); 761117325Ssilby for (i = 0; i < wpipe->pipe_map.npages; i++) { 762118757Salc vm_page_unhold(wpipe->pipe_map.ms[i]); 763117325Ssilby } 76499899Salc vm_page_unlock_queues(); 76591653Stanimura wpipe->pipe_map.npages = 0; 76613907Sdyson} 76713907Sdyson 76813907Sdyson/* 76913907Sdyson * In the case of a signal, the writing process might go away. This 77013907Sdyson * code copies the data into the circular buffer so that the source 77113907Sdyson * pages can be freed without loss of data. 77213907Sdyson */ 77313907Sdysonstatic void 77413907Sdysonpipe_clone_write_buffer(wpipe) 77576364Salfred struct pipe *wpipe; 77613907Sdyson{ 77713907Sdyson int size; 77813907Sdyson int pos; 77913907Sdyson 78091362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 78113907Sdyson size = wpipe->pipe_map.cnt; 78213907Sdyson pos = wpipe->pipe_map.pos; 78313907Sdyson 78413907Sdyson wpipe->pipe_buffer.in = size; 78513907Sdyson wpipe->pipe_buffer.out = 0; 78613907Sdyson wpipe->pipe_buffer.cnt = size; 78713907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 78813907Sdyson 789119811Salc PIPE_UNLOCK(wpipe); 79092959Salfred bcopy((caddr_t) wpipe->pipe_map.kva + pos, 791100527Salfred wpipe->pipe_buffer.buffer, size); 79213907Sdyson pipe_destroy_write_buffer(wpipe); 793119811Salc PIPE_LOCK(wpipe); 79413907Sdyson} 79513907Sdyson 79613907Sdyson/* 79713907Sdyson * This implements the pipe buffer write mechanism. Note that only 79813907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 79913907Sdyson * If there are any characters in the pipe buffer, the direct write will 80013907Sdyson * be deferred until the receiving process grabs all of the bytes from 80113907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 80213907Sdyson */ 80313907Sdysonstatic int 80413907Sdysonpipe_direct_write(wpipe, uio) 80513907Sdyson struct pipe *wpipe; 80613907Sdyson struct uio *uio; 80713907Sdyson{ 80813907Sdyson int error; 80976364Salfred 81013951Sdysonretry: 81191362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 81213907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 81376760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 81413951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 81513951Sdyson wakeup(wpipe); 81613951Sdyson } 81713992Sdyson wpipe->pipe_state |= PIPE_WANTW; 81891362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 81991362Salfred PRIBIO | PCATCH, "pipdww", 0); 82014802Sdyson if (error) 82113907Sdyson goto error1; 82214802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 82314802Sdyson error = EPIPE; 82414802Sdyson goto error1; 82514802Sdyson } 82613907Sdyson } 82713907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 82813951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 82976760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 83013951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 83113951Sdyson wakeup(wpipe); 83213951Sdyson } 83313951Sdyson 83413992Sdyson wpipe->pipe_state |= PIPE_WANTW; 83591362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 83691362Salfred PRIBIO | PCATCH, "pipdwc", 0); 83714802Sdyson if (error) 83813907Sdyson goto error1; 83914802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 84014802Sdyson error = EPIPE; 84114802Sdyson goto error1; 84213907Sdyson } 84313951Sdyson goto retry; 84413907Sdyson } 84513907Sdyson 84613951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 84713951Sdyson 84892305Salfred pipelock(wpipe, 0); 84991362Salfred PIPE_GET_GIANT(wpipe); 85013907Sdyson error = pipe_build_write_buffer(wpipe, uio); 85191362Salfred PIPE_DROP_GIANT(wpipe); 85292305Salfred pipeunlock(wpipe); 85313907Sdyson if (error) { 85413907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 85513907Sdyson goto error1; 85613907Sdyson } 85713907Sdyson 85813907Sdyson error = 0; 85913907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 86013907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 86113907Sdyson pipelock(wpipe, 0); 862119811Salc PIPE_UNLOCK(wpipe); 86313907Sdyson pipe_destroy_write_buffer(wpipe); 864119811Salc PIPE_LOCK(wpipe); 865112981Shsu pipeselwakeup(wpipe); 86613907Sdyson pipeunlock(wpipe); 86714802Sdyson error = EPIPE; 86814802Sdyson goto error1; 86913907Sdyson } 87013992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 87113992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 87213992Sdyson wakeup(wpipe); 87313992Sdyson } 87414037Sdyson pipeselwakeup(wpipe); 87591362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 87691362Salfred "pipdwt", 0); 87713907Sdyson } 87813907Sdyson 87913907Sdyson pipelock(wpipe,0); 88013907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 88113907Sdyson /* 88213907Sdyson * this bit of trickery substitutes a kernel buffer for 88313907Sdyson * the process that might be going away. 88413907Sdyson */ 88513907Sdyson pipe_clone_write_buffer(wpipe); 88613907Sdyson } else { 887119811Salc PIPE_UNLOCK(wpipe); 88813907Sdyson pipe_destroy_write_buffer(wpipe); 889119811Salc PIPE_LOCK(wpipe); 89013907Sdyson } 89113907Sdyson pipeunlock(wpipe); 89276760Salfred return (error); 89313907Sdyson 89413907Sdysonerror1: 89513907Sdyson wakeup(wpipe); 89676760Salfred return (error); 89713907Sdyson} 89814037Sdyson#endif 89913907Sdyson 90016960Sdysonstatic int 901101941Srwatsonpipe_write(fp, uio, active_cred, flags, td) 90216960Sdyson struct file *fp; 90313907Sdyson struct uio *uio; 904101941Srwatson struct ucred *active_cred; 90583366Sjulian struct thread *td; 90645311Sdt int flags; 90713907Sdyson{ 90813675Sdyson int error = 0; 90913913Sdyson int orig_resid; 91016960Sdyson struct pipe *wpipe, *rpipe; 91116960Sdyson 912109153Sdillon rpipe = fp->f_data; 91316960Sdyson wpipe = rpipe->pipe_peer; 91416960Sdyson 91591395Salfred PIPE_LOCK(rpipe); 91613675Sdyson /* 91713675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 91813675Sdyson */ 91916960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 92091395Salfred PIPE_UNLOCK(rpipe); 92176760Salfred return (EPIPE); 92213675Sdyson } 923101768Srwatson#ifdef MAC 924102115Srwatson error = mac_check_pipe_write(active_cred, wpipe); 925101768Srwatson if (error) { 926101768Srwatson PIPE_UNLOCK(rpipe); 927101768Srwatson return (error); 928101768Srwatson } 929101768Srwatson#endif 93077676Sdillon ++wpipe->pipe_busy; 93113675Sdyson 93217163Sdyson /* 93317163Sdyson * If it is advantageous to resize the pipe buffer, do 93417163Sdyson * so. 93517163Sdyson */ 93617163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 937118764Ssilby (amountpipekva < maxpipekva / 2) && 93817163Sdyson (nbigpipe < LIMITBIGPIPES) && 93917163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 94017163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 94117163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 94217163Sdyson 943105009Salfred if ((error = pipelock(wpipe, 1)) == 0) { 944118799Salc PIPE_UNLOCK(wpipe); 94576364Salfred if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 946117364Ssilby atomic_add_int(&nbigpipe, 1); 947118799Salc PIPE_LOCK(wpipe); 94813907Sdyson pipeunlock(wpipe); 94913907Sdyson } 95013907Sdyson } 95177676Sdillon 95277676Sdillon /* 95377676Sdillon * If an early error occured unbusy and return, waking up any pending 95477676Sdillon * readers. 95577676Sdillon */ 95677676Sdillon if (error) { 95777676Sdillon --wpipe->pipe_busy; 95877676Sdillon if ((wpipe->pipe_busy == 0) && 95977676Sdillon (wpipe->pipe_state & PIPE_WANT)) { 96077676Sdillon wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 96177676Sdillon wakeup(wpipe); 96277676Sdillon } 96391395Salfred PIPE_UNLOCK(rpipe); 96477676Sdillon return(error); 96577676Sdillon } 96676364Salfred 96713913Sdyson orig_resid = uio->uio_resid; 96877676Sdillon 96913675Sdyson while (uio->uio_resid) { 97013907Sdyson int space; 97176760Salfred 97214037Sdyson#ifndef PIPE_NODIRECT 97313907Sdyson /* 97413907Sdyson * If the transfer is large, we can gain performance if 97513907Sdyson * we do process-to-process copies directly. 97616416Sdyson * If the write is non-blocking, we don't use the 97716416Sdyson * direct write mechanism. 97858505Sdillon * 97958505Sdillon * The direct write mechanism will detect the reader going 98058505Sdillon * away on us. 98113907Sdyson */ 98217163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 98317163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 984118764Ssilby amountpipekvawired + uio->uio_resid < maxpipekvawired) { 985105009Salfred error = pipe_direct_write(wpipe, uio); 98676760Salfred if (error) 98713907Sdyson break; 98813907Sdyson continue; 98991362Salfred } 99014037Sdyson#endif 99113907Sdyson 99213907Sdyson /* 99313907Sdyson * Pipe buffered writes cannot be coincidental with 99413907Sdyson * direct writes. We wait until the currently executing 99513907Sdyson * direct write is completed before we start filling the 99658505Sdillon * pipe buffer. We break out if a signal occurs or the 99758505Sdillon * reader goes away. 99813907Sdyson */ 99913907Sdyson retrywrite: 100013907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 100113992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 100213992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 100313992Sdyson wakeup(wpipe); 100413992Sdyson } 100591395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 100691362Salfred "pipbww", 0); 100758505Sdillon if (wpipe->pipe_state & PIPE_EOF) 100858505Sdillon break; 100913907Sdyson if (error) 101013907Sdyson break; 101113907Sdyson } 101258505Sdillon if (wpipe->pipe_state & PIPE_EOF) { 101358505Sdillon error = EPIPE; 101458505Sdillon break; 101558505Sdillon } 101613907Sdyson 101713907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 101814644Sdyson 101914644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 102013913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 102113913Sdyson space = 0; 102213907Sdyson 1023118230Spb if (space > 0) { 102413907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 102554534Stegge int size; /* Transfer size */ 102654534Stegge int segsize; /* first segment to transfer */ 102776760Salfred 102813907Sdyson /* 102913907Sdyson * It is possible for a direct write to 103013907Sdyson * slip in on us... handle it here... 103113907Sdyson */ 103213907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 103313907Sdyson pipeunlock(wpipe); 103413907Sdyson goto retrywrite; 103513907Sdyson } 103654534Stegge /* 103754534Stegge * If a process blocked in uiomove, our 103854534Stegge * value for space might be bad. 103958505Sdillon * 104058505Sdillon * XXX will we be ok if the reader has gone 104158505Sdillon * away here? 104254534Stegge */ 104354534Stegge if (space > wpipe->pipe_buffer.size - 104454534Stegge wpipe->pipe_buffer.cnt) { 104554534Stegge pipeunlock(wpipe); 104654534Stegge goto retrywrite; 104754534Stegge } 104854534Stegge 104954534Stegge /* 105054534Stegge * Transfer size is minimum of uio transfer 105154534Stegge * and free space in pipe buffer. 105254534Stegge */ 105354534Stegge if (space > uio->uio_resid) 105454534Stegge size = uio->uio_resid; 105554534Stegge else 105654534Stegge size = space; 105754534Stegge /* 105854534Stegge * First segment to transfer is minimum of 105954534Stegge * transfer size and contiguous space in 106054534Stegge * pipe buffer. If first segment to transfer 106154534Stegge * is less than the transfer size, we've got 106254534Stegge * a wraparound in the buffer. 106354534Stegge */ 106454534Stegge segsize = wpipe->pipe_buffer.size - 106554534Stegge wpipe->pipe_buffer.in; 106654534Stegge if (segsize > size) 106754534Stegge segsize = size; 106854534Stegge 106954534Stegge /* Transfer first segment */ 107054534Stegge 107191395Salfred PIPE_UNLOCK(rpipe); 107254534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 107354534Stegge segsize, uio); 107491395Salfred PIPE_LOCK(rpipe); 107554534Stegge 107654534Stegge if (error == 0 && segsize < size) { 107754534Stegge /* 107854534Stegge * Transfer remaining part now, to 107954534Stegge * support atomic writes. Wraparound 108054534Stegge * happened. 108154534Stegge */ 108254534Stegge if (wpipe->pipe_buffer.in + segsize != 108354534Stegge wpipe->pipe_buffer.size) 1084116127Smux panic("Expected pipe buffer " 1085116127Smux "wraparound disappeared"); 108654534Stegge 108791395Salfred PIPE_UNLOCK(rpipe); 1088116127Smux error = uiomove( 1089116127Smux &wpipe->pipe_buffer.buffer[0], 1090116127Smux size - segsize, uio); 109191395Salfred PIPE_LOCK(rpipe); 109254534Stegge } 109354534Stegge if (error == 0) { 109454534Stegge wpipe->pipe_buffer.in += size; 109554534Stegge if (wpipe->pipe_buffer.in >= 109654534Stegge wpipe->pipe_buffer.size) { 1097116127Smux if (wpipe->pipe_buffer.in != 1098116127Smux size - segsize + 1099116127Smux wpipe->pipe_buffer.size) 1100116127Smux panic("Expected " 1101116127Smux "wraparound bad"); 1102116127Smux wpipe->pipe_buffer.in = size - 1103116127Smux segsize; 110454534Stegge } 110554534Stegge 110654534Stegge wpipe->pipe_buffer.cnt += size; 1107116127Smux if (wpipe->pipe_buffer.cnt > 1108116127Smux wpipe->pipe_buffer.size) 110954534Stegge panic("Pipe buffer overflow"); 111054534Stegge 111154534Stegge } 111213675Sdyson pipeunlock(wpipe); 111313675Sdyson } 111413675Sdyson if (error) 111513675Sdyson break; 111613675Sdyson 111713675Sdyson } else { 111813675Sdyson /* 111913675Sdyson * If the "read-side" has been blocked, wake it up now. 112013675Sdyson */ 112113675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 112213675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 112313675Sdyson wakeup(wpipe); 112413675Sdyson } 112514037Sdyson 112613675Sdyson /* 112713675Sdyson * don't block on non-blocking I/O 112813675Sdyson */ 112916960Sdyson if (fp->f_flag & FNONBLOCK) { 113013907Sdyson error = EAGAIN; 113113675Sdyson break; 113213675Sdyson } 113313907Sdyson 113414037Sdyson /* 113514037Sdyson * We have no more space and have something to offer, 113629356Speter * wake up select/poll. 113714037Sdyson */ 113814037Sdyson pipeselwakeup(wpipe); 113914037Sdyson 114013675Sdyson wpipe->pipe_state |= PIPE_WANTW; 114191395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 114291362Salfred PRIBIO | PCATCH, "pipewr", 0); 114376760Salfred if (error != 0) 114413675Sdyson break; 114513675Sdyson /* 114613675Sdyson * If read side wants to go away, we just issue a signal 114713675Sdyson * to ourselves. 114813675Sdyson */ 114913675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 115013774Sdyson error = EPIPE; 115113907Sdyson break; 115213675Sdyson } 115313675Sdyson } 115413675Sdyson } 115513675Sdyson 115614644Sdyson --wpipe->pipe_busy; 115777676Sdillon 115876760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 115976760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 116013675Sdyson wakeup(wpipe); 116113675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 116213675Sdyson /* 116313675Sdyson * If we have put any characters in the buffer, we wake up 116413675Sdyson * the reader. 116513675Sdyson */ 116613675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 116713675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 116813675Sdyson wakeup(wpipe); 116913675Sdyson } 117013675Sdyson } 117113909Sdyson 117213909Sdyson /* 117313909Sdyson * Don't return EPIPE if I/O was successful 117413909Sdyson */ 117513907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 117677676Sdillon (uio->uio_resid == 0) && 117777676Sdillon (error == EPIPE)) { 117813907Sdyson error = 0; 117977676Sdillon } 118013913Sdyson 118124101Sbde if (error == 0) 118255112Sbde vfs_timestamp(&wpipe->pipe_mtime); 118324101Sbde 118414037Sdyson /* 118514037Sdyson * We have something to offer, 118629356Speter * wake up select/poll. 118714037Sdyson */ 118814177Sdyson if (wpipe->pipe_buffer.cnt) 118914037Sdyson pipeselwakeup(wpipe); 119013907Sdyson 119191395Salfred PIPE_UNLOCK(rpipe); 119276760Salfred return (error); 119313675Sdyson} 119413675Sdyson 119513675Sdyson/* 119613675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 119713675Sdyson */ 1198104094Sphkstatic int 1199102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td) 120013675Sdyson struct file *fp; 120136735Sdfr u_long cmd; 120299009Salfred void *data; 1203102003Srwatson struct ucred *active_cred; 120483366Sjulian struct thread *td; 120513675Sdyson{ 1206109153Sdillon struct pipe *mpipe = fp->f_data; 1207101768Srwatson#ifdef MAC 1208101768Srwatson int error; 1209104269Srwatson#endif 121013675Sdyson 1211104269Srwatson PIPE_LOCK(mpipe); 1212104269Srwatson 1213104269Srwatson#ifdef MAC 1214102003Srwatson error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data); 1215101768Srwatson if (error) 1216101768Srwatson return (error); 1217101768Srwatson#endif 1218101768Srwatson 121913675Sdyson switch (cmd) { 122013675Sdyson 122113675Sdyson case FIONBIO: 1222104269Srwatson PIPE_UNLOCK(mpipe); 122313675Sdyson return (0); 122413675Sdyson 122513675Sdyson case FIOASYNC: 122613675Sdyson if (*(int *)data) { 122713675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 122813675Sdyson } else { 122913675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 123013675Sdyson } 123191362Salfred PIPE_UNLOCK(mpipe); 123213675Sdyson return (0); 123313675Sdyson 123413675Sdyson case FIONREAD: 123514037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 123614037Sdyson *(int *)data = mpipe->pipe_map.cnt; 123714037Sdyson else 123814037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 123991362Salfred PIPE_UNLOCK(mpipe); 124013675Sdyson return (0); 124113675Sdyson 124241086Struckman case FIOSETOWN: 1243104269Srwatson PIPE_UNLOCK(mpipe); 124441086Struckman return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 124541086Struckman 124641086Struckman case FIOGETOWN: 1247104269Srwatson PIPE_UNLOCK(mpipe); 1248104393Struckman *(int *)data = fgetown(&mpipe->pipe_sigio); 124913675Sdyson return (0); 125013675Sdyson 125141086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 125241086Struckman case TIOCSPGRP: 1253104269Srwatson PIPE_UNLOCK(mpipe); 125441086Struckman return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 125541086Struckman 125641086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 125718863Sdyson case TIOCGPGRP: 1258104269Srwatson PIPE_UNLOCK(mpipe); 1259104393Struckman *(int *)data = -fgetown(&mpipe->pipe_sigio); 126013675Sdyson return (0); 126113675Sdyson 126213675Sdyson } 1263104269Srwatson PIPE_UNLOCK(mpipe); 126417124Sbde return (ENOTTY); 126513675Sdyson} 126613675Sdyson 1267104094Sphkstatic int 1268101983Srwatsonpipe_poll(fp, events, active_cred, td) 126913675Sdyson struct file *fp; 127029356Speter int events; 1271101983Srwatson struct ucred *active_cred; 127283366Sjulian struct thread *td; 127313675Sdyson{ 1274109153Sdillon struct pipe *rpipe = fp->f_data; 127513675Sdyson struct pipe *wpipe; 127629356Speter int revents = 0; 1277101768Srwatson#ifdef MAC 1278101768Srwatson int error; 1279101768Srwatson#endif 128013675Sdyson 128113675Sdyson wpipe = rpipe->pipe_peer; 128291362Salfred PIPE_LOCK(rpipe); 1283101768Srwatson#ifdef MAC 1284102115Srwatson error = mac_check_pipe_poll(active_cred, rpipe); 1285101768Srwatson if (error) 1286101768Srwatson goto locked_error; 1287101768Srwatson#endif 128829356Speter if (events & (POLLIN | POLLRDNORM)) 128929356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 129029356Speter (rpipe->pipe_buffer.cnt > 0) || 129129356Speter (rpipe->pipe_state & PIPE_EOF)) 129229356Speter revents |= events & (POLLIN | POLLRDNORM); 129313675Sdyson 129429356Speter if (events & (POLLOUT | POLLWRNORM)) 129529356Speter if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 129643311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 129743311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 129829356Speter revents |= events & (POLLOUT | POLLWRNORM); 129913675Sdyson 130029356Speter if ((rpipe->pipe_state & PIPE_EOF) || 130129356Speter (wpipe == NULL) || 130229356Speter (wpipe->pipe_state & PIPE_EOF)) 130329356Speter revents |= POLLHUP; 130429356Speter 130529356Speter if (revents == 0) { 130629356Speter if (events & (POLLIN | POLLRDNORM)) { 130783805Sjhb selrecord(td, &rpipe->pipe_sel); 130829356Speter rpipe->pipe_state |= PIPE_SEL; 130913675Sdyson } 131013675Sdyson 131129356Speter if (events & (POLLOUT | POLLWRNORM)) { 131283805Sjhb selrecord(td, &wpipe->pipe_sel); 131330164Speter wpipe->pipe_state |= PIPE_SEL; 131413907Sdyson } 131513675Sdyson } 1316101768Srwatson#ifdef MAC 1317101768Srwatsonlocked_error: 1318101768Srwatson#endif 131991362Salfred PIPE_UNLOCK(rpipe); 132029356Speter 132129356Speter return (revents); 132213675Sdyson} 132313675Sdyson 132498989Salfred/* 132598989Salfred * We shouldn't need locks here as we're doing a read and this should 132698989Salfred * be a natural race. 132798989Salfred */ 132852983Speterstatic int 1329101983Srwatsonpipe_stat(fp, ub, active_cred, td) 133052983Speter struct file *fp; 133152983Speter struct stat *ub; 1332101983Srwatson struct ucred *active_cred; 133383366Sjulian struct thread *td; 133413675Sdyson{ 1335109153Sdillon struct pipe *pipe = fp->f_data; 1336101768Srwatson#ifdef MAC 1337101768Srwatson int error; 133852983Speter 1339104269Srwatson PIPE_LOCK(pipe); 1340102115Srwatson error = mac_check_pipe_stat(active_cred, pipe); 1341104269Srwatson PIPE_UNLOCK(pipe); 1342101768Srwatson if (error) 1343101768Srwatson return (error); 1344101768Srwatson#endif 1345100527Salfred bzero(ub, sizeof(*ub)); 134617124Sbde ub->st_mode = S_IFIFO; 134713907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 134813675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 134913675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 135034901Sphk ub->st_atimespec = pipe->pipe_atime; 135134901Sphk ub->st_mtimespec = pipe->pipe_mtime; 135234901Sphk ub->st_ctimespec = pipe->pipe_ctime; 135360404Schris ub->st_uid = fp->f_cred->cr_uid; 135460404Schris ub->st_gid = fp->f_cred->cr_gid; 135517124Sbde /* 135660404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 135717124Sbde * XXX (st_dev, st_ino) should be unique. 135817124Sbde */ 135976760Salfred return (0); 136013675Sdyson} 136113675Sdyson 136213675Sdyson/* ARGSUSED */ 136313675Sdysonstatic int 136483366Sjulianpipe_close(fp, td) 136513675Sdyson struct file *fp; 136683366Sjulian struct thread *td; 136713675Sdyson{ 1368109153Sdillon struct pipe *cpipe = fp->f_data; 136916322Sgpalmer 137049413Sgreen fp->f_ops = &badfileops; 1371109153Sdillon fp->f_data = NULL; 137296122Salfred funsetown(&cpipe->pipe_sigio); 137313675Sdyson pipeclose(cpipe); 137476760Salfred return (0); 137513675Sdyson} 137613675Sdyson 137776364Salfredstatic void 137876364Salfredpipe_free_kmem(cpipe) 137976364Salfred struct pipe *cpipe; 138076364Salfred{ 138191412Salfred 138291412Salfred KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 138391412Salfred ("pipespace: pipe mutex locked")); 138476364Salfred 138576364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 138676364Salfred if (cpipe->pipe_buffer.size > PIPE_SIZE) 1387117364Ssilby atomic_subtract_int(&nbigpipe, 1); 1388110816Salc atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size); 1389117325Ssilby atomic_subtract_int(&amountpipes, 1); 1390118764Ssilby vm_map_remove(pipe_map, 1391118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer, 1392118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 139376364Salfred cpipe->pipe_buffer.buffer = NULL; 139476364Salfred } 139576364Salfred#ifndef PIPE_NODIRECT 1396102241Sarchie if (cpipe->pipe_map.kva != 0) { 1397118764Ssilby atomic_subtract_int(&amountpipekvawired, 1398110816Salc cpipe->pipe_buffer.size + PAGE_SIZE); 139976364Salfred kmem_free(kernel_map, 140076364Salfred cpipe->pipe_map.kva, 140176364Salfred cpipe->pipe_buffer.size + PAGE_SIZE); 140276364Salfred cpipe->pipe_map.cnt = 0; 140376364Salfred cpipe->pipe_map.kva = 0; 140476364Salfred cpipe->pipe_map.pos = 0; 140576364Salfred cpipe->pipe_map.npages = 0; 140676364Salfred } 140776364Salfred#endif 140876364Salfred} 140976364Salfred 141013675Sdyson/* 141113675Sdyson * shutdown the pipe 141213675Sdyson */ 141313675Sdysonstatic void 141413675Sdysonpipeclose(cpipe) 141513675Sdyson struct pipe *cpipe; 141613675Sdyson{ 141713907Sdyson struct pipe *ppipe; 141891968Salfred int hadpeer; 141976364Salfred 142091968Salfred if (cpipe == NULL) 142191968Salfred return; 142291968Salfred 142391968Salfred hadpeer = 0; 142491968Salfred 142591968Salfred /* partially created pipes won't have a valid mutex. */ 142691968Salfred if (PIPE_MTX(cpipe) != NULL) 142791362Salfred PIPE_LOCK(cpipe); 142813907Sdyson 142991968Salfred pipeselwakeup(cpipe); 143013907Sdyson 143191968Salfred /* 143291968Salfred * If the other side is blocked, wake it up saying that 143391968Salfred * we want to close it down. 143491968Salfred */ 143591968Salfred while (cpipe->pipe_busy) { 143691968Salfred wakeup(cpipe); 143791968Salfred cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 143891968Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 143991968Salfred } 144013675Sdyson 1441101768Srwatson#ifdef MAC 1442101768Srwatson if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) 1443101768Srwatson mac_destroy_pipe(cpipe); 1444101768Srwatson#endif 1445101768Srwatson 144691968Salfred /* 144791968Salfred * Disconnect from peer 144891968Salfred */ 144991968Salfred if ((ppipe = cpipe->pipe_peer) != NULL) { 145091968Salfred hadpeer++; 145191968Salfred pipeselwakeup(ppipe); 145213907Sdyson 145391968Salfred ppipe->pipe_state |= PIPE_EOF; 145491968Salfred wakeup(ppipe); 145591968Salfred KNOTE(&ppipe->pipe_sel.si_note, 0); 145691968Salfred ppipe->pipe_peer = NULL; 145791968Salfred } 145891968Salfred /* 145991968Salfred * free resources 146091968Salfred */ 146191968Salfred if (PIPE_MTX(cpipe) != NULL) { 146291968Salfred PIPE_UNLOCK(cpipe); 146391968Salfred if (!hadpeer) { 146491968Salfred mtx_destroy(PIPE_MTX(cpipe)); 146591968Salfred free(PIPE_MTX(cpipe), M_TEMP); 146613675Sdyson } 146713675Sdyson } 146891968Salfred pipe_free_kmem(cpipe); 146992751Sjeff uma_zfree(pipe_zone, cpipe); 147013675Sdyson} 147159288Sjlemon 147272521Sjlemon/*ARGSUSED*/ 147359288Sjlemonstatic int 147472521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 147559288Sjlemon{ 147689306Salfred struct pipe *cpipe; 147759288Sjlemon 1478109153Sdillon cpipe = kn->kn_fp->f_data; 147972521Sjlemon switch (kn->kn_filter) { 148072521Sjlemon case EVFILT_READ: 148172521Sjlemon kn->kn_fop = &pipe_rfiltops; 148272521Sjlemon break; 148372521Sjlemon case EVFILT_WRITE: 148472521Sjlemon kn->kn_fop = &pipe_wfiltops; 148578292Sjlemon cpipe = cpipe->pipe_peer; 1486101382Sdes if (cpipe == NULL) 1487101382Sdes /* other end of pipe has been closed */ 1488118929Sjmg return (EPIPE); 148972521Sjlemon break; 149072521Sjlemon default: 149172521Sjlemon return (1); 149272521Sjlemon } 1493100527Salfred kn->kn_hook = cpipe; 149478292Sjlemon 149591372Salfred PIPE_LOCK(cpipe); 149678292Sjlemon SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 149791372Salfred PIPE_UNLOCK(cpipe); 149859288Sjlemon return (0); 149959288Sjlemon} 150059288Sjlemon 150159288Sjlemonstatic void 150259288Sjlemonfilt_pipedetach(struct knote *kn) 150359288Sjlemon{ 150478292Sjlemon struct pipe *cpipe = (struct pipe *)kn->kn_hook; 150559288Sjlemon 150691372Salfred PIPE_LOCK(cpipe); 150778292Sjlemon SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 150891372Salfred PIPE_UNLOCK(cpipe); 150959288Sjlemon} 151059288Sjlemon 151159288Sjlemon/*ARGSUSED*/ 151259288Sjlemonstatic int 151359288Sjlemonfilt_piperead(struct knote *kn, long hint) 151459288Sjlemon{ 1515109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 151659288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 151759288Sjlemon 151891372Salfred PIPE_LOCK(rpipe); 151959288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 152059288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 152159288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 152259288Sjlemon 152359288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 152459288Sjlemon (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 152591372Salfred kn->kn_flags |= EV_EOF; 152691372Salfred PIPE_UNLOCK(rpipe); 152759288Sjlemon return (1); 152859288Sjlemon } 152991372Salfred PIPE_UNLOCK(rpipe); 153059288Sjlemon return (kn->kn_data > 0); 153159288Sjlemon} 153259288Sjlemon 153359288Sjlemon/*ARGSUSED*/ 153459288Sjlemonstatic int 153559288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 153659288Sjlemon{ 1537109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 153859288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 153959288Sjlemon 154091372Salfred PIPE_LOCK(rpipe); 154159288Sjlemon if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 154259288Sjlemon kn->kn_data = 0; 154359288Sjlemon kn->kn_flags |= EV_EOF; 154491372Salfred PIPE_UNLOCK(rpipe); 154559288Sjlemon return (1); 154659288Sjlemon } 154759288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 154865855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 154959288Sjlemon kn->kn_data = 0; 155059288Sjlemon 155191372Salfred PIPE_UNLOCK(rpipe); 155259288Sjlemon return (kn->kn_data >= PIPE_BUF); 155359288Sjlemon} 1554