sys_pipe.c revision 121256
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson */ 1913675Sdyson 2013675Sdyson/* 2113675Sdyson * This file contains a high-performance replacement for the socket-based 2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2313675Sdyson * all features of sockets, but does do everything that pipes normally 2413675Sdyson * do. 2513675Sdyson */ 2613675Sdyson 2713907Sdyson/* 2813907Sdyson * This code has two modes of operation, a small write mode and a large 2913907Sdyson * write mode. The small write mode acts like conventional pipes with 3013907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3113907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3213907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3313907Sdyson * the receiving process can copy it directly from the pages in the sending 3413907Sdyson * process. 3513907Sdyson * 3613907Sdyson * If the sending process receives a signal, it is possible that it will 3713913Sdyson * go away, and certainly its address space can change, because control 3813907Sdyson * is returned back to the user-mode side. In that case, the pipe code 3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4013907Sdyson * kernel buffer, and the receiving process will grab the data from the 4113907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4213907Sdyson * the copy operation is normally eliminated. 4313907Sdyson * 4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4513907Sdyson * happen for small transfers so that the system will not spend all of 46118764Ssilby * its time context switching. 47117325Ssilby * 48118764Ssilby * In order to limit the resource use of pipes, two sysctls exist: 49117325Ssilby * 50118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 51118764Ssilby * address space available to us in pipe_map. Whenever the amount in use 52118764Ssilby * exceeds half of this value, all new pipes will be created with size 53118764Ssilby * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited 54118764Ssilby * as well. This value is loader tunable only. 55117325Ssilby * 56117325Ssilby * kern.ipc.maxpipekvawired - This value limits the amount of memory that may 57117325Ssilby * be wired in order to facilitate direct copies using page flipping. 58117325Ssilby * Whenever this value is exceeded, pipes will fall back to using regular 59118764Ssilby * copies. This value is sysctl controllable at all times. 60117325Ssilby * 61117325Ssilby * These values are autotuned in subr_param.c. 62117325Ssilby * 63117325Ssilby * Memory usage may be monitored through the sysctls 64117325Ssilby * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired. 65117325Ssilby * 6613907Sdyson */ 6713907Sdyson 68116182Sobrien#include <sys/cdefs.h> 69116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 121256 2003-10-19 20:41:07Z dwmalone $"); 70116182Sobrien 71101768Srwatson#include "opt_mac.h" 72101768Srwatson 7313675Sdyson#include <sys/param.h> 7413675Sdyson#include <sys/systm.h> 7524131Sbde#include <sys/fcntl.h> 7613675Sdyson#include <sys/file.h> 7713675Sdyson#include <sys/filedesc.h> 7824206Sbde#include <sys/filio.h> 7991372Salfred#include <sys/kernel.h> 8076166Smarkm#include <sys/lock.h> 81101768Srwatson#include <sys/mac.h> 8276827Salfred#include <sys/mutex.h> 8324206Sbde#include <sys/ttycom.h> 8413675Sdyson#include <sys/stat.h> 8591968Salfred#include <sys/malloc.h> 8629356Speter#include <sys/poll.h> 8770834Swollman#include <sys/selinfo.h> 8813675Sdyson#include <sys/signalvar.h> 89117325Ssilby#include <sys/sysctl.h> 9013675Sdyson#include <sys/sysproto.h> 9113675Sdyson#include <sys/pipe.h> 9276166Smarkm#include <sys/proc.h> 9355112Sbde#include <sys/vnode.h> 9434924Sbde#include <sys/uio.h> 9559288Sjlemon#include <sys/event.h> 9613675Sdyson 9713675Sdyson#include <vm/vm.h> 9813675Sdyson#include <vm/vm_param.h> 9913675Sdyson#include <vm/vm_object.h> 10013675Sdyson#include <vm/vm_kern.h> 10113675Sdyson#include <vm/vm_extern.h> 10213675Sdyson#include <vm/pmap.h> 10313675Sdyson#include <vm/vm_map.h> 10413907Sdyson#include <vm/vm_page.h> 10592751Sjeff#include <vm/uma.h> 10613675Sdyson 10714037Sdyson/* 10814037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 10914037Sdyson * approx 30% decrease in transfer rate. This could be useful for 11014037Sdyson * NetBSD or OpenBSD. 11114037Sdyson */ 11214037Sdyson/* #define PIPE_NODIRECT */ 11314037Sdyson 11414037Sdyson/* 11514037Sdyson * interfaces to the outside world 11614037Sdyson */ 117108255Sphkstatic fo_rdwr_t pipe_read; 118108255Sphkstatic fo_rdwr_t pipe_write; 119108255Sphkstatic fo_ioctl_t pipe_ioctl; 120108255Sphkstatic fo_poll_t pipe_poll; 121108255Sphkstatic fo_kqfilter_t pipe_kqfilter; 122108255Sphkstatic fo_stat_t pipe_stat; 123108255Sphkstatic fo_close_t pipe_close; 12413675Sdyson 12572521Sjlemonstatic struct fileops pipeops = { 126116546Sphk .fo_read = pipe_read, 127116546Sphk .fo_write = pipe_write, 128116546Sphk .fo_ioctl = pipe_ioctl, 129116546Sphk .fo_poll = pipe_poll, 130116546Sphk .fo_kqfilter = pipe_kqfilter, 131116546Sphk .fo_stat = pipe_stat, 132116546Sphk .fo_close = pipe_close, 133116546Sphk .fo_flags = DFLAG_PASSABLE 13472521Sjlemon}; 13513675Sdyson 13659288Sjlemonstatic void filt_pipedetach(struct knote *kn); 13759288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 13859288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 13959288Sjlemon 14072521Sjlemonstatic struct filterops pipe_rfiltops = 14172521Sjlemon { 1, NULL, filt_pipedetach, filt_piperead }; 14272521Sjlemonstatic struct filterops pipe_wfiltops = 14372521Sjlemon { 1, NULL, filt_pipedetach, filt_pipewrite }; 14459288Sjlemon 14513675Sdyson/* 14613675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 14713675Sdyson * space is pageable. The pipe code will try to maintain locality of 14813675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 14913675Sdyson * will not wipe the cache. 15013675Sdyson */ 15113907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 15213907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 15313675Sdyson 15413907Sdyson/* 15517163Sdyson * Limit the number of "big" pipes 15617163Sdyson */ 15717163Sdyson#define LIMITBIGPIPES 32 15833181Seivindstatic int nbigpipe; 15917163Sdyson 160117325Ssilbystatic int amountpipes; 16117124Sbdestatic int amountpipekva; 162117325Ssilbystatic int amountpipekvawired; 16313907Sdyson 164117325SsilbySYSCTL_DECL(_kern_ipc); 165117325Ssilby 166118764SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD, 167117325Ssilby &maxpipekva, 0, "Pipe KVA limit"); 168117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW, 169117325Ssilby &maxpipekvawired, 0, "Pipe KVA wired limit"); 170117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD, 171117364Ssilby &amountpipes, 0, "Current # of pipes"); 172117364SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD, 173117364Ssilby &nbigpipe, 0, "Current # of big pipes"); 174117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 175117325Ssilby &amountpipekva, 0, "Pipe KVA usage"); 176117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD, 177117325Ssilby &amountpipekvawired, 0, "Pipe wired KVA usage"); 178117325Ssilby 17991413Salfredstatic void pipeinit(void *dummy __unused); 18091413Salfredstatic void pipeclose(struct pipe *cpipe); 18191413Salfredstatic void pipe_free_kmem(struct pipe *cpipe); 18291413Salfredstatic int pipe_create(struct pipe **cpipep); 18391413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch); 18491413Salfredstatic __inline void pipeunlock(struct pipe *cpipe); 18591413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe); 18614037Sdyson#ifndef PIPE_NODIRECT 18791413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 18891413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 18991413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 19091413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe); 19114037Sdyson#endif 19291413Salfredstatic int pipespace(struct pipe *cpipe, int size); 19313675Sdyson 19492751Sjeffstatic uma_zone_t pipe_zone; 19527899Sdyson 19691372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 19791372Salfred 19891372Salfredstatic void 19991372Salfredpipeinit(void *dummy __unused) 20091372Salfred{ 201118880Salc 20292654Sjeff pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL, 20392654Sjeff NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 204118880Salc KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 20591372Salfred} 20691372Salfred 20713675Sdyson/* 20813675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 20913675Sdyson */ 21013675Sdyson 21113675Sdyson/* ARGSUSED */ 21213675Sdysonint 21383366Sjulianpipe(td, uap) 21483366Sjulian struct thread *td; 21513675Sdyson struct pipe_args /* { 21613675Sdyson int dummy; 21713675Sdyson } */ *uap; 21813675Sdyson{ 21983366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 22013675Sdyson struct file *rf, *wf; 22113675Sdyson struct pipe *rpipe, *wpipe; 22291968Salfred struct mtx *pmtx; 22313675Sdyson int fd, error; 22427899Sdyson 225111119Simp pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO); 22691968Salfred 22776756Salfred rpipe = wpipe = NULL; 22876364Salfred if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 22976364Salfred pipeclose(rpipe); 23076364Salfred pipeclose(wpipe); 23191968Salfred free(pmtx, M_TEMP); 23276364Salfred return (ENFILE); 23376364Salfred } 23476364Salfred 23513907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 23613907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 23713675Sdyson 23883366Sjulian error = falloc(td, &rf, &fd); 23970915Sdwmalone if (error) { 24070915Sdwmalone pipeclose(rpipe); 24170915Sdwmalone pipeclose(wpipe); 24291968Salfred free(pmtx, M_TEMP); 24370915Sdwmalone return (error); 24470915Sdwmalone } 245121256Sdwmalone /* An extra reference on `rf' has been held for us by falloc(). */ 24683366Sjulian td->td_retval[0] = fd; 24770915Sdwmalone 24870803Sdwmalone /* 24970803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 25070803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 25170803Sdwmalone * to avoid races against processes which manage to dup() the read 25270803Sdwmalone * side while we are blocked trying to allocate the write side. 25370803Sdwmalone */ 25489306Salfred FILE_LOCK(rf); 25513675Sdyson rf->f_flag = FREAD | FWRITE; 25613675Sdyson rf->f_type = DTYPE_PIPE; 257109153Sdillon rf->f_data = rpipe; 25813675Sdyson rf->f_ops = &pipeops; 25989306Salfred FILE_UNLOCK(rf); 26083366Sjulian error = falloc(td, &wf, &fd); 26170915Sdwmalone if (error) { 26289306Salfred FILEDESC_LOCK(fdp); 26383366Sjulian if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 26483366Sjulian fdp->fd_ofiles[td->td_retval[0]] = NULL; 26589306Salfred FILEDESC_UNLOCK(fdp); 26683366Sjulian fdrop(rf, td); 26789306Salfred } else 26889306Salfred FILEDESC_UNLOCK(fdp); 26983366Sjulian fdrop(rf, td); 27070915Sdwmalone /* rpipe has been closed by fdrop(). */ 27170915Sdwmalone pipeclose(wpipe); 27291968Salfred free(pmtx, M_TEMP); 27370915Sdwmalone return (error); 27470915Sdwmalone } 275121256Sdwmalone /* An extra reference on `wf' has been held for us by falloc(). */ 27689306Salfred FILE_LOCK(wf); 27713675Sdyson wf->f_flag = FREAD | FWRITE; 27813675Sdyson wf->f_type = DTYPE_PIPE; 279109153Sdillon wf->f_data = wpipe; 28013675Sdyson wf->f_ops = &pipeops; 28189306Salfred FILE_UNLOCK(wf); 282121256Sdwmalone fdrop(wf, td); 28383366Sjulian td->td_retval[1] = fd; 28413675Sdyson rpipe->pipe_peer = wpipe; 28513675Sdyson wpipe->pipe_peer = rpipe; 286101768Srwatson#ifdef MAC 287101768Srwatson /* 288101768Srwatson * struct pipe represents a pipe endpoint. The MAC label is shared 289101768Srwatson * between the connected endpoints. As a result mac_init_pipe() and 290101768Srwatson * mac_create_pipe() should only be called on one of the endpoints 291101768Srwatson * after they have been connected. 292101768Srwatson */ 293101768Srwatson mac_init_pipe(rpipe); 294101768Srwatson mac_create_pipe(td->td_ucred, rpipe); 295101768Srwatson#endif 29693818Sjhb mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 29791968Salfred rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 29883366Sjulian fdrop(rf, td); 29913675Sdyson 30013675Sdyson return (0); 30113675Sdyson} 30213675Sdyson 30313909Sdyson/* 30413909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 30576364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 30676364Salfred * it will retain the old buffer. 30776364Salfred * If it fails it will return ENOMEM. 30813909Sdyson */ 30976364Salfredstatic int 31076364Salfredpipespace(cpipe, size) 31113675Sdyson struct pipe *cpipe; 31276364Salfred int size; 31313675Sdyson{ 31476364Salfred struct vm_object *object; 31576364Salfred caddr_t buffer; 31613688Sdyson int npages, error; 317117325Ssilby static int curfail = 0; 318117325Ssilby static struct timeval lastfail; 31913675Sdyson 32091412Salfred KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 32191412Salfred ("pipespace: pipe mutex locked")); 32279224Sdillon 323118764Ssilby size = round_page(size); 324118764Ssilby npages = size / PAGE_SIZE; 32513675Sdyson /* 32613675Sdyson * Create an object, I don't like the idea of paging to/from 32713675Sdyson * kernel_object. 32814037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 32913675Sdyson */ 33076364Salfred object = vm_object_allocate(OBJT_DEFAULT, npages); 331118764Ssilby buffer = (caddr_t) vm_map_min(pipe_map); 33213675Sdyson 33313675Sdyson /* 33413675Sdyson * Insert the object into the kernel map, and allocate kva for it. 33513675Sdyson * The map entry is, by default, pageable. 33614037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 33713675Sdyson */ 338118764Ssilby error = vm_map_find(pipe_map, object, 0, 33976364Salfred (vm_offset_t *) &buffer, size, 1, 34013688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 34113675Sdyson 34276364Salfred if (error != KERN_SUCCESS) { 34376364Salfred vm_object_deallocate(object); 344118764Ssilby if (ppsratecheck(&lastfail, &curfail, 1)) 345118764Ssilby printf("kern.maxpipekva exceeded, please see tuning(7).\n"); 34676364Salfred return (ENOMEM); 34776364Salfred } 34876364Salfred 34976364Salfred /* free old resources if we're resizing */ 35076364Salfred pipe_free_kmem(cpipe); 35176364Salfred cpipe->pipe_buffer.buffer = buffer; 35276364Salfred cpipe->pipe_buffer.size = size; 35376364Salfred cpipe->pipe_buffer.in = 0; 35476364Salfred cpipe->pipe_buffer.out = 0; 35576364Salfred cpipe->pipe_buffer.cnt = 0; 356117325Ssilby atomic_add_int(&amountpipes, 1); 357110816Salc atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 35876364Salfred return (0); 35913907Sdyson} 36013688Sdyson 36113907Sdyson/* 36213907Sdyson * initialize and allocate VM and memory for pipe 36313907Sdyson */ 36476364Salfredstatic int 36576364Salfredpipe_create(cpipep) 36676364Salfred struct pipe **cpipep; 36776364Salfred{ 36813907Sdyson struct pipe *cpipe; 36976364Salfred int error; 37013907Sdyson 371111119Simp *cpipep = uma_zalloc(pipe_zone, M_WAITOK); 37276364Salfred if (*cpipep == NULL) 37376364Salfred return (ENOMEM); 37417163Sdyson 37576364Salfred cpipe = *cpipep; 37676364Salfred 37776364Salfred /* 37876364Salfred * protect so pipeclose() doesn't follow a junk pointer 37976364Salfred * if pipespace() fails. 38076364Salfred */ 38176754Salfred bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 38213675Sdyson cpipe->pipe_state = 0; 38313675Sdyson cpipe->pipe_peer = NULL; 38413675Sdyson cpipe->pipe_busy = 0; 38513907Sdyson 38614037Sdyson#ifndef PIPE_NODIRECT 38713907Sdyson /* 38813907Sdyson * pipe data structure initializations to support direct pipe I/O 38913907Sdyson */ 39013907Sdyson cpipe->pipe_map.cnt = 0; 39113907Sdyson cpipe->pipe_map.kva = 0; 39213907Sdyson cpipe->pipe_map.pos = 0; 39313907Sdyson cpipe->pipe_map.npages = 0; 39417124Sbde /* cpipe->pipe_map.ms[] = invalid */ 39514037Sdyson#endif 39676364Salfred 39791412Salfred cpipe->pipe_mtxp = NULL; /* avoid pipespace assertion */ 398117325Ssilby /* 399117325Ssilby * Reduce to 1/4th pipe size if we're over our global max. 400117325Ssilby */ 401118764Ssilby if (amountpipekva > maxpipekva / 2) 402117325Ssilby error = pipespace(cpipe, SMALL_PIPE_SIZE); 403117325Ssilby else 404117325Ssilby error = pipespace(cpipe, PIPE_SIZE); 40576760Salfred if (error) 40676364Salfred return (error); 40776364Salfred 40876364Salfred vfs_timestamp(&cpipe->pipe_ctime); 40976364Salfred cpipe->pipe_atime = cpipe->pipe_ctime; 41076364Salfred cpipe->pipe_mtime = cpipe->pipe_ctime; 41176364Salfred 41276364Salfred return (0); 41313675Sdyson} 41413675Sdyson 41513675Sdyson 41613675Sdyson/* 41713675Sdyson * lock a pipe for I/O, blocking other access 41813675Sdyson */ 41913675Sdysonstatic __inline int 42013907Sdysonpipelock(cpipe, catch) 42113675Sdyson struct pipe *cpipe; 42213907Sdyson int catch; 42313675Sdyson{ 42413776Sdyson int error; 42576364Salfred 42691362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 42791362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 42813675Sdyson cpipe->pipe_state |= PIPE_LWANT; 42991362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 43091362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 43176760Salfred "pipelk", 0); 43276760Salfred if (error != 0) 43376760Salfred return (error); 43413675Sdyson } 43591362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 43676760Salfred return (0); 43713675Sdyson} 43813675Sdyson 43913675Sdyson/* 44013675Sdyson * unlock a pipe I/O lock 44113675Sdyson */ 44213675Sdysonstatic __inline void 44313675Sdysonpipeunlock(cpipe) 44413675Sdyson struct pipe *cpipe; 44513675Sdyson{ 44676364Salfred 44791362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 44891362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 44913675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 45013675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 45114177Sdyson wakeup(cpipe); 45213675Sdyson } 45313675Sdyson} 45413675Sdyson 45514037Sdysonstatic __inline void 45614037Sdysonpipeselwakeup(cpipe) 45714037Sdyson struct pipe *cpipe; 45814037Sdyson{ 45976364Salfred 46014037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 46114037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 46214037Sdyson selwakeup(&cpipe->pipe_sel); 46314037Sdyson } 46441086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 46595883Salfred pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 46659288Sjlemon KNOTE(&cpipe->pipe_sel.si_note, 0); 46714037Sdyson} 46814037Sdyson 46913675Sdyson/* ARGSUSED */ 47013675Sdysonstatic int 471101941Srwatsonpipe_read(fp, uio, active_cred, flags, td) 47213675Sdyson struct file *fp; 47313675Sdyson struct uio *uio; 474101941Srwatson struct ucred *active_cred; 47583366Sjulian struct thread *td; 47645311Sdt int flags; 47713675Sdyson{ 478109153Sdillon struct pipe *rpipe = fp->f_data; 47947748Salc int error; 48013675Sdyson int nread = 0; 48118863Sdyson u_int size; 48213675Sdyson 48391362Salfred PIPE_LOCK(rpipe); 48413675Sdyson ++rpipe->pipe_busy; 48547748Salc error = pipelock(rpipe, 1); 48647748Salc if (error) 48747748Salc goto unlocked_error; 48847748Salc 489101768Srwatson#ifdef MAC 490102115Srwatson error = mac_check_pipe_read(active_cred, rpipe); 491101768Srwatson if (error) 492101768Srwatson goto locked_error; 493101768Srwatson#endif 494101768Srwatson 49513675Sdyson while (uio->uio_resid) { 49613907Sdyson /* 49713907Sdyson * normal pipe buffer receive 49813907Sdyson */ 49913675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 50018863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 50113675Sdyson if (size > rpipe->pipe_buffer.cnt) 50213675Sdyson size = rpipe->pipe_buffer.cnt; 50318863Sdyson if (size > (u_int) uio->uio_resid) 50418863Sdyson size = (u_int) uio->uio_resid; 50547748Salc 50691362Salfred PIPE_UNLOCK(rpipe); 507116127Smux error = uiomove( 508116127Smux &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 509116127Smux size, uio); 51091362Salfred PIPE_LOCK(rpipe); 51176760Salfred if (error) 51213675Sdyson break; 51376760Salfred 51413675Sdyson rpipe->pipe_buffer.out += size; 51513675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 51613675Sdyson rpipe->pipe_buffer.out = 0; 51713675Sdyson 51813675Sdyson rpipe->pipe_buffer.cnt -= size; 51947748Salc 52047748Salc /* 52147748Salc * If there is no more to read in the pipe, reset 52247748Salc * its pointers to the beginning. This improves 52347748Salc * cache hit stats. 52447748Salc */ 52547748Salc if (rpipe->pipe_buffer.cnt == 0) { 52647748Salc rpipe->pipe_buffer.in = 0; 52747748Salc rpipe->pipe_buffer.out = 0; 52847748Salc } 52913675Sdyson nread += size; 53014037Sdyson#ifndef PIPE_NODIRECT 53113907Sdyson /* 53213907Sdyson * Direct copy, bypassing a kernel buffer. 53313907Sdyson */ 53413907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 53547748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 53647748Salc caddr_t va; 53718863Sdyson if (size > (u_int) uio->uio_resid) 53818863Sdyson size = (u_int) uio->uio_resid; 53947748Salc 54076760Salfred va = (caddr_t) rpipe->pipe_map.kva + 54176760Salfred rpipe->pipe_map.pos; 54291362Salfred PIPE_UNLOCK(rpipe); 54347748Salc error = uiomove(va, size, uio); 54491362Salfred PIPE_LOCK(rpipe); 54513907Sdyson if (error) 54613907Sdyson break; 54713907Sdyson nread += size; 54813907Sdyson rpipe->pipe_map.pos += size; 54913907Sdyson rpipe->pipe_map.cnt -= size; 55013907Sdyson if (rpipe->pipe_map.cnt == 0) { 55113907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 55213907Sdyson wakeup(rpipe); 55313907Sdyson } 55414037Sdyson#endif 55513675Sdyson } else { 55613675Sdyson /* 55713675Sdyson * detect EOF condition 55876760Salfred * read returns 0 on EOF, no need to set error 55913675Sdyson */ 56076760Salfred if (rpipe->pipe_state & PIPE_EOF) 56113675Sdyson break; 56243623Sdillon 56313675Sdyson /* 56413675Sdyson * If the "write-side" has been blocked, wake it up now. 56513675Sdyson */ 56613675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 56713675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 56813675Sdyson wakeup(rpipe); 56913675Sdyson } 57043623Sdillon 57143623Sdillon /* 57247748Salc * Break if some data was read. 57343623Sdillon */ 57447748Salc if (nread > 0) 57513675Sdyson break; 57616960Sdyson 57743623Sdillon /* 578116127Smux * Unlock the pipe buffer for our remaining processing. 579116127Smux * We will either break out with an error or we will 580116127Smux * sleep and relock to loop. 58143623Sdillon */ 58247748Salc pipeunlock(rpipe); 58343623Sdillon 58413675Sdyson /* 58547748Salc * Handle non-blocking mode operation or 58647748Salc * wait for more data. 58713675Sdyson */ 58876760Salfred if (fp->f_flag & FNONBLOCK) { 58947748Salc error = EAGAIN; 59076760Salfred } else { 59147748Salc rpipe->pipe_state |= PIPE_WANTR; 59291362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 59391362Salfred PRIBIO | PCATCH, 59477140Salfred "piperd", 0)) == 0) 59547748Salc error = pipelock(rpipe, 1); 59613675Sdyson } 59747748Salc if (error) 59847748Salc goto unlocked_error; 59913675Sdyson } 60013675Sdyson } 601101768Srwatson#ifdef MAC 602101768Srwatsonlocked_error: 603101768Srwatson#endif 60447748Salc pipeunlock(rpipe); 60513675Sdyson 60691362Salfred /* XXX: should probably do this before getting any locks. */ 60724101Sbde if (error == 0) 60855112Sbde vfs_timestamp(&rpipe->pipe_atime); 60947748Salcunlocked_error: 61047748Salc --rpipe->pipe_busy; 61113913Sdyson 61247748Salc /* 61347748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 61447748Salc */ 61513675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 61613675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 61713675Sdyson wakeup(rpipe); 61813675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 61913675Sdyson /* 62047748Salc * Handle write blocking hysteresis. 62113675Sdyson */ 62213675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 62313675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 62413675Sdyson wakeup(rpipe); 62513675Sdyson } 62613675Sdyson } 62714037Sdyson 62814802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 62914037Sdyson pipeselwakeup(rpipe); 63014037Sdyson 63191362Salfred PIPE_UNLOCK(rpipe); 63276760Salfred return (error); 63313675Sdyson} 63413675Sdyson 63514037Sdyson#ifndef PIPE_NODIRECT 63613907Sdyson/* 63713907Sdyson * Map the sending processes' buffer into kernel space and wire it. 63813907Sdyson * This is similar to a physical write operation. 63913907Sdyson */ 64013675Sdysonstatic int 64113907Sdysonpipe_build_write_buffer(wpipe, uio) 64213907Sdyson struct pipe *wpipe; 64313675Sdyson struct uio *uio; 64413675Sdyson{ 645119872Salc pmap_t pmap; 64618863Sdyson u_int size; 647119872Salc int i, j; 648112569Sjake vm_offset_t addr, endaddr; 64913907Sdyson 65091412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 65179224Sdillon 65218863Sdyson size = (u_int) uio->uio_iov->iov_len; 65313907Sdyson if (size > wpipe->pipe_buffer.size) 65413907Sdyson size = wpipe->pipe_buffer.size; 65513907Sdyson 656119872Salc pmap = vmspace_pmap(curproc->p_vmspace); 65740286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 65876760Salfred addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 65976760Salfred for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 66099899Salc /* 66199899Salc * vm_fault_quick() can sleep. Consequently, 66299899Salc * vm_page_lock_queue() and vm_page_unlock_queue() 66399899Salc * should not be performed outside of this loop. 66499899Salc */ 665119872Salc race: 666119872Salc if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { 66799899Salc vm_page_lock_queues(); 668119872Salc for (j = 0; j < i; j++) 669118757Salc vm_page_unhold(wpipe->pipe_map.ms[j]); 67099899Salc vm_page_unlock_queues(); 67176760Salfred return (EFAULT); 67213907Sdyson } 673120000Salc wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, 674120000Salc VM_PROT_READ); 675119872Salc if (wpipe->pipe_map.ms[i] == NULL) 676119872Salc goto race; 67713907Sdyson } 67813907Sdyson 67913907Sdyson/* 68013907Sdyson * set up the control block 68113907Sdyson */ 68213907Sdyson wpipe->pipe_map.npages = i; 68376760Salfred wpipe->pipe_map.pos = 68476760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 68513907Sdyson wpipe->pipe_map.cnt = size; 68613907Sdyson 68713907Sdyson/* 68813907Sdyson * and map the buffer 68913907Sdyson */ 69013907Sdyson if (wpipe->pipe_map.kva == 0) { 69113912Sdyson /* 69213912Sdyson * We need to allocate space for an extra page because the 69313912Sdyson * address range might (will) span pages at times. 69413912Sdyson */ 695118220Salc wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map, 69613912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 697118764Ssilby atomic_add_int(&amountpipekvawired, 698110816Salc wpipe->pipe_buffer.size + PAGE_SIZE); 69913907Sdyson } 70013907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 70113907Sdyson wpipe->pipe_map.npages); 70213907Sdyson 70313907Sdyson/* 70413907Sdyson * and update the uio data 70513907Sdyson */ 70613907Sdyson 70713907Sdyson uio->uio_iov->iov_len -= size; 708104908Smike uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 70913907Sdyson if (uio->uio_iov->iov_len == 0) 71013907Sdyson uio->uio_iov++; 71113907Sdyson uio->uio_resid -= size; 71213907Sdyson uio->uio_offset += size; 71376760Salfred return (0); 71413907Sdyson} 71513907Sdyson 71613907Sdyson/* 71713907Sdyson * unmap and unwire the process buffer 71813907Sdyson */ 71913907Sdysonstatic void 72013907Sdysonpipe_destroy_write_buffer(wpipe) 72176760Salfred struct pipe *wpipe; 72213907Sdyson{ 72313907Sdyson int i; 72476364Salfred 72591412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 72617163Sdyson if (wpipe->pipe_map.kva) { 72717163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 72813907Sdyson 729118764Ssilby if (amountpipekvawired > maxpipekvawired / 2) { 730118764Ssilby /* Conserve address space */ 73113907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 73213907Sdyson wpipe->pipe_map.kva = 0; 73313907Sdyson kmem_free(kernel_map, kva, 734119811Salc wpipe->pipe_buffer.size + PAGE_SIZE); 735118764Ssilby atomic_subtract_int(&amountpipekvawired, 736110816Salc wpipe->pipe_buffer.size + PAGE_SIZE); 73713907Sdyson } 73813907Sdyson } 73999899Salc vm_page_lock_queues(); 740117325Ssilby for (i = 0; i < wpipe->pipe_map.npages; i++) { 741118757Salc vm_page_unhold(wpipe->pipe_map.ms[i]); 742117325Ssilby } 74399899Salc vm_page_unlock_queues(); 74491653Stanimura wpipe->pipe_map.npages = 0; 74513907Sdyson} 74613907Sdyson 74713907Sdyson/* 74813907Sdyson * In the case of a signal, the writing process might go away. This 74913907Sdyson * code copies the data into the circular buffer so that the source 75013907Sdyson * pages can be freed without loss of data. 75113907Sdyson */ 75213907Sdysonstatic void 75313907Sdysonpipe_clone_write_buffer(wpipe) 75476364Salfred struct pipe *wpipe; 75513907Sdyson{ 75613907Sdyson int size; 75713907Sdyson int pos; 75813907Sdyson 75991362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 76013907Sdyson size = wpipe->pipe_map.cnt; 76113907Sdyson pos = wpipe->pipe_map.pos; 76213907Sdyson 76313907Sdyson wpipe->pipe_buffer.in = size; 76413907Sdyson wpipe->pipe_buffer.out = 0; 76513907Sdyson wpipe->pipe_buffer.cnt = size; 76613907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 76713907Sdyson 768119811Salc PIPE_UNLOCK(wpipe); 76992959Salfred bcopy((caddr_t) wpipe->pipe_map.kva + pos, 770100527Salfred wpipe->pipe_buffer.buffer, size); 77113907Sdyson pipe_destroy_write_buffer(wpipe); 772119811Salc PIPE_LOCK(wpipe); 77313907Sdyson} 77413907Sdyson 77513907Sdyson/* 77613907Sdyson * This implements the pipe buffer write mechanism. Note that only 77713907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 77813907Sdyson * If there are any characters in the pipe buffer, the direct write will 77913907Sdyson * be deferred until the receiving process grabs all of the bytes from 78013907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 78113907Sdyson */ 78213907Sdysonstatic int 78313907Sdysonpipe_direct_write(wpipe, uio) 78413907Sdyson struct pipe *wpipe; 78513907Sdyson struct uio *uio; 78613907Sdyson{ 78713907Sdyson int error; 78876364Salfred 78913951Sdysonretry: 79091362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 79113907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 79276760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 79313951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 79413951Sdyson wakeup(wpipe); 79513951Sdyson } 79613992Sdyson wpipe->pipe_state |= PIPE_WANTW; 79791362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 79891362Salfred PRIBIO | PCATCH, "pipdww", 0); 79914802Sdyson if (error) 80013907Sdyson goto error1; 80114802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 80214802Sdyson error = EPIPE; 80314802Sdyson goto error1; 80414802Sdyson } 80513907Sdyson } 80613907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 80713951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 80876760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 80913951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 81013951Sdyson wakeup(wpipe); 81113951Sdyson } 81213951Sdyson 81313992Sdyson wpipe->pipe_state |= PIPE_WANTW; 81491362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 81591362Salfred PRIBIO | PCATCH, "pipdwc", 0); 81614802Sdyson if (error) 81713907Sdyson goto error1; 81814802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 81914802Sdyson error = EPIPE; 82014802Sdyson goto error1; 82113907Sdyson } 82213951Sdyson goto retry; 82313907Sdyson } 82413907Sdyson 82513951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 82613951Sdyson 82792305Salfred pipelock(wpipe, 0); 828119872Salc PIPE_UNLOCK(wpipe); 82913907Sdyson error = pipe_build_write_buffer(wpipe, uio); 830119872Salc PIPE_LOCK(wpipe); 83192305Salfred pipeunlock(wpipe); 83213907Sdyson if (error) { 83313907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 83413907Sdyson goto error1; 83513907Sdyson } 83613907Sdyson 83713907Sdyson error = 0; 83813907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 83913907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 84013907Sdyson pipelock(wpipe, 0); 841119811Salc PIPE_UNLOCK(wpipe); 84213907Sdyson pipe_destroy_write_buffer(wpipe); 843119811Salc PIPE_LOCK(wpipe); 844112981Shsu pipeselwakeup(wpipe); 84513907Sdyson pipeunlock(wpipe); 84614802Sdyson error = EPIPE; 84714802Sdyson goto error1; 84813907Sdyson } 84913992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 85013992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 85113992Sdyson wakeup(wpipe); 85213992Sdyson } 85314037Sdyson pipeselwakeup(wpipe); 85491362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 85591362Salfred "pipdwt", 0); 85613907Sdyson } 85713907Sdyson 85813907Sdyson pipelock(wpipe,0); 85913907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 86013907Sdyson /* 86113907Sdyson * this bit of trickery substitutes a kernel buffer for 86213907Sdyson * the process that might be going away. 86313907Sdyson */ 86413907Sdyson pipe_clone_write_buffer(wpipe); 86513907Sdyson } else { 866119811Salc PIPE_UNLOCK(wpipe); 86713907Sdyson pipe_destroy_write_buffer(wpipe); 868119811Salc PIPE_LOCK(wpipe); 86913907Sdyson } 87013907Sdyson pipeunlock(wpipe); 87176760Salfred return (error); 87213907Sdyson 87313907Sdysonerror1: 87413907Sdyson wakeup(wpipe); 87576760Salfred return (error); 87613907Sdyson} 87714037Sdyson#endif 87813907Sdyson 87916960Sdysonstatic int 880101941Srwatsonpipe_write(fp, uio, active_cred, flags, td) 88116960Sdyson struct file *fp; 88213907Sdyson struct uio *uio; 883101941Srwatson struct ucred *active_cred; 88483366Sjulian struct thread *td; 88545311Sdt int flags; 88613907Sdyson{ 88713675Sdyson int error = 0; 88813913Sdyson int orig_resid; 88916960Sdyson struct pipe *wpipe, *rpipe; 89016960Sdyson 891109153Sdillon rpipe = fp->f_data; 89216960Sdyson wpipe = rpipe->pipe_peer; 89316960Sdyson 89491395Salfred PIPE_LOCK(rpipe); 89513675Sdyson /* 89613675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 89713675Sdyson */ 89816960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 89991395Salfred PIPE_UNLOCK(rpipe); 90076760Salfred return (EPIPE); 90113675Sdyson } 902101768Srwatson#ifdef MAC 903102115Srwatson error = mac_check_pipe_write(active_cred, wpipe); 904101768Srwatson if (error) { 905101768Srwatson PIPE_UNLOCK(rpipe); 906101768Srwatson return (error); 907101768Srwatson } 908101768Srwatson#endif 90977676Sdillon ++wpipe->pipe_busy; 91013675Sdyson 91117163Sdyson /* 91217163Sdyson * If it is advantageous to resize the pipe buffer, do 91317163Sdyson * so. 91417163Sdyson */ 91517163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 916118764Ssilby (amountpipekva < maxpipekva / 2) && 91717163Sdyson (nbigpipe < LIMITBIGPIPES) && 91817163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 91917163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 92017163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 92117163Sdyson 922105009Salfred if ((error = pipelock(wpipe, 1)) == 0) { 923118799Salc PIPE_UNLOCK(wpipe); 92476364Salfred if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 925117364Ssilby atomic_add_int(&nbigpipe, 1); 926118799Salc PIPE_LOCK(wpipe); 92713907Sdyson pipeunlock(wpipe); 92813907Sdyson } 92913907Sdyson } 93077676Sdillon 93177676Sdillon /* 93277676Sdillon * If an early error occured unbusy and return, waking up any pending 93377676Sdillon * readers. 93477676Sdillon */ 93577676Sdillon if (error) { 93677676Sdillon --wpipe->pipe_busy; 93777676Sdillon if ((wpipe->pipe_busy == 0) && 93877676Sdillon (wpipe->pipe_state & PIPE_WANT)) { 93977676Sdillon wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 94077676Sdillon wakeup(wpipe); 94177676Sdillon } 94291395Salfred PIPE_UNLOCK(rpipe); 94377676Sdillon return(error); 94477676Sdillon } 94576364Salfred 94613913Sdyson orig_resid = uio->uio_resid; 94777676Sdillon 94813675Sdyson while (uio->uio_resid) { 94913907Sdyson int space; 95076760Salfred 95114037Sdyson#ifndef PIPE_NODIRECT 95213907Sdyson /* 95313907Sdyson * If the transfer is large, we can gain performance if 95413907Sdyson * we do process-to-process copies directly. 95516416Sdyson * If the write is non-blocking, we don't use the 95616416Sdyson * direct write mechanism. 95758505Sdillon * 95858505Sdillon * The direct write mechanism will detect the reader going 95958505Sdillon * away on us. 96013907Sdyson */ 96117163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 96217163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 963118764Ssilby amountpipekvawired + uio->uio_resid < maxpipekvawired) { 964105009Salfred error = pipe_direct_write(wpipe, uio); 96576760Salfred if (error) 96613907Sdyson break; 96713907Sdyson continue; 96891362Salfred } 96914037Sdyson#endif 97013907Sdyson 97113907Sdyson /* 97213907Sdyson * Pipe buffered writes cannot be coincidental with 97313907Sdyson * direct writes. We wait until the currently executing 97413907Sdyson * direct write is completed before we start filling the 97558505Sdillon * pipe buffer. We break out if a signal occurs or the 97658505Sdillon * reader goes away. 97713907Sdyson */ 97813907Sdyson retrywrite: 97913907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 98013992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 98113992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 98213992Sdyson wakeup(wpipe); 98313992Sdyson } 98491395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 98591362Salfred "pipbww", 0); 98658505Sdillon if (wpipe->pipe_state & PIPE_EOF) 98758505Sdillon break; 98813907Sdyson if (error) 98913907Sdyson break; 99013907Sdyson } 99158505Sdillon if (wpipe->pipe_state & PIPE_EOF) { 99258505Sdillon error = EPIPE; 99358505Sdillon break; 99458505Sdillon } 99513907Sdyson 99613907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 99714644Sdyson 99814644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 99913913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 100013913Sdyson space = 0; 100113907Sdyson 1002118230Spb if (space > 0) { 100313907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 100454534Stegge int size; /* Transfer size */ 100554534Stegge int segsize; /* first segment to transfer */ 100676760Salfred 100713907Sdyson /* 100813907Sdyson * It is possible for a direct write to 100913907Sdyson * slip in on us... handle it here... 101013907Sdyson */ 101113907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 101213907Sdyson pipeunlock(wpipe); 101313907Sdyson goto retrywrite; 101413907Sdyson } 101554534Stegge /* 101654534Stegge * If a process blocked in uiomove, our 101754534Stegge * value for space might be bad. 101858505Sdillon * 101958505Sdillon * XXX will we be ok if the reader has gone 102058505Sdillon * away here? 102154534Stegge */ 102254534Stegge if (space > wpipe->pipe_buffer.size - 102354534Stegge wpipe->pipe_buffer.cnt) { 102454534Stegge pipeunlock(wpipe); 102554534Stegge goto retrywrite; 102654534Stegge } 102754534Stegge 102854534Stegge /* 102954534Stegge * Transfer size is minimum of uio transfer 103054534Stegge * and free space in pipe buffer. 103154534Stegge */ 103254534Stegge if (space > uio->uio_resid) 103354534Stegge size = uio->uio_resid; 103454534Stegge else 103554534Stegge size = space; 103654534Stegge /* 103754534Stegge * First segment to transfer is minimum of 103854534Stegge * transfer size and contiguous space in 103954534Stegge * pipe buffer. If first segment to transfer 104054534Stegge * is less than the transfer size, we've got 104154534Stegge * a wraparound in the buffer. 104254534Stegge */ 104354534Stegge segsize = wpipe->pipe_buffer.size - 104454534Stegge wpipe->pipe_buffer.in; 104554534Stegge if (segsize > size) 104654534Stegge segsize = size; 104754534Stegge 104854534Stegge /* Transfer first segment */ 104954534Stegge 105091395Salfred PIPE_UNLOCK(rpipe); 105154534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 105254534Stegge segsize, uio); 105391395Salfred PIPE_LOCK(rpipe); 105454534Stegge 105554534Stegge if (error == 0 && segsize < size) { 105654534Stegge /* 105754534Stegge * Transfer remaining part now, to 105854534Stegge * support atomic writes. Wraparound 105954534Stegge * happened. 106054534Stegge */ 106154534Stegge if (wpipe->pipe_buffer.in + segsize != 106254534Stegge wpipe->pipe_buffer.size) 1063116127Smux panic("Expected pipe buffer " 1064116127Smux "wraparound disappeared"); 106554534Stegge 106691395Salfred PIPE_UNLOCK(rpipe); 1067116127Smux error = uiomove( 1068116127Smux &wpipe->pipe_buffer.buffer[0], 1069116127Smux size - segsize, uio); 107091395Salfred PIPE_LOCK(rpipe); 107154534Stegge } 107254534Stegge if (error == 0) { 107354534Stegge wpipe->pipe_buffer.in += size; 107454534Stegge if (wpipe->pipe_buffer.in >= 107554534Stegge wpipe->pipe_buffer.size) { 1076116127Smux if (wpipe->pipe_buffer.in != 1077116127Smux size - segsize + 1078116127Smux wpipe->pipe_buffer.size) 1079116127Smux panic("Expected " 1080116127Smux "wraparound bad"); 1081116127Smux wpipe->pipe_buffer.in = size - 1082116127Smux segsize; 108354534Stegge } 108454534Stegge 108554534Stegge wpipe->pipe_buffer.cnt += size; 1086116127Smux if (wpipe->pipe_buffer.cnt > 1087116127Smux wpipe->pipe_buffer.size) 108854534Stegge panic("Pipe buffer overflow"); 108954534Stegge 109054534Stegge } 109113675Sdyson pipeunlock(wpipe); 109213675Sdyson } 109313675Sdyson if (error) 109413675Sdyson break; 109513675Sdyson 109613675Sdyson } else { 109713675Sdyson /* 109813675Sdyson * If the "read-side" has been blocked, wake it up now. 109913675Sdyson */ 110013675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 110113675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 110213675Sdyson wakeup(wpipe); 110313675Sdyson } 110414037Sdyson 110513675Sdyson /* 110613675Sdyson * don't block on non-blocking I/O 110713675Sdyson */ 110816960Sdyson if (fp->f_flag & FNONBLOCK) { 110913907Sdyson error = EAGAIN; 111013675Sdyson break; 111113675Sdyson } 111213907Sdyson 111314037Sdyson /* 111414037Sdyson * We have no more space and have something to offer, 111529356Speter * wake up select/poll. 111614037Sdyson */ 111714037Sdyson pipeselwakeup(wpipe); 111814037Sdyson 111913675Sdyson wpipe->pipe_state |= PIPE_WANTW; 112091395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 112191362Salfred PRIBIO | PCATCH, "pipewr", 0); 112276760Salfred if (error != 0) 112313675Sdyson break; 112413675Sdyson /* 112513675Sdyson * If read side wants to go away, we just issue a signal 112613675Sdyson * to ourselves. 112713675Sdyson */ 112813675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 112913774Sdyson error = EPIPE; 113013907Sdyson break; 113113675Sdyson } 113213675Sdyson } 113313675Sdyson } 113413675Sdyson 113514644Sdyson --wpipe->pipe_busy; 113677676Sdillon 113776760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 113876760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 113913675Sdyson wakeup(wpipe); 114013675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 114113675Sdyson /* 114213675Sdyson * If we have put any characters in the buffer, we wake up 114313675Sdyson * the reader. 114413675Sdyson */ 114513675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 114613675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 114713675Sdyson wakeup(wpipe); 114813675Sdyson } 114913675Sdyson } 115013909Sdyson 115113909Sdyson /* 115213909Sdyson * Don't return EPIPE if I/O was successful 115313909Sdyson */ 115413907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 115577676Sdillon (uio->uio_resid == 0) && 115677676Sdillon (error == EPIPE)) { 115713907Sdyson error = 0; 115877676Sdillon } 115913913Sdyson 116024101Sbde if (error == 0) 116155112Sbde vfs_timestamp(&wpipe->pipe_mtime); 116224101Sbde 116314037Sdyson /* 116414037Sdyson * We have something to offer, 116529356Speter * wake up select/poll. 116614037Sdyson */ 116714177Sdyson if (wpipe->pipe_buffer.cnt) 116814037Sdyson pipeselwakeup(wpipe); 116913907Sdyson 117091395Salfred PIPE_UNLOCK(rpipe); 117176760Salfred return (error); 117213675Sdyson} 117313675Sdyson 117413675Sdyson/* 117513675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 117613675Sdyson */ 1177104094Sphkstatic int 1178102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td) 117913675Sdyson struct file *fp; 118036735Sdfr u_long cmd; 118199009Salfred void *data; 1182102003Srwatson struct ucred *active_cred; 118383366Sjulian struct thread *td; 118413675Sdyson{ 1185109153Sdillon struct pipe *mpipe = fp->f_data; 1186101768Srwatson#ifdef MAC 1187101768Srwatson int error; 1188104269Srwatson#endif 118913675Sdyson 1190104269Srwatson PIPE_LOCK(mpipe); 1191104269Srwatson 1192104269Srwatson#ifdef MAC 1193102003Srwatson error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data); 1194101768Srwatson if (error) 1195101768Srwatson return (error); 1196101768Srwatson#endif 1197101768Srwatson 119813675Sdyson switch (cmd) { 119913675Sdyson 120013675Sdyson case FIONBIO: 1201104269Srwatson PIPE_UNLOCK(mpipe); 120213675Sdyson return (0); 120313675Sdyson 120413675Sdyson case FIOASYNC: 120513675Sdyson if (*(int *)data) { 120613675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 120713675Sdyson } else { 120813675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 120913675Sdyson } 121091362Salfred PIPE_UNLOCK(mpipe); 121113675Sdyson return (0); 121213675Sdyson 121313675Sdyson case FIONREAD: 121414037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 121514037Sdyson *(int *)data = mpipe->pipe_map.cnt; 121614037Sdyson else 121714037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 121891362Salfred PIPE_UNLOCK(mpipe); 121913675Sdyson return (0); 122013675Sdyson 122141086Struckman case FIOSETOWN: 1222104269Srwatson PIPE_UNLOCK(mpipe); 122341086Struckman return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 122441086Struckman 122541086Struckman case FIOGETOWN: 1226104269Srwatson PIPE_UNLOCK(mpipe); 1227104393Struckman *(int *)data = fgetown(&mpipe->pipe_sigio); 122813675Sdyson return (0); 122913675Sdyson 123041086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 123141086Struckman case TIOCSPGRP: 1232104269Srwatson PIPE_UNLOCK(mpipe); 123341086Struckman return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 123441086Struckman 123541086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 123618863Sdyson case TIOCGPGRP: 1237104269Srwatson PIPE_UNLOCK(mpipe); 1238104393Struckman *(int *)data = -fgetown(&mpipe->pipe_sigio); 123913675Sdyson return (0); 124013675Sdyson 124113675Sdyson } 1242104269Srwatson PIPE_UNLOCK(mpipe); 124317124Sbde return (ENOTTY); 124413675Sdyson} 124513675Sdyson 1246104094Sphkstatic int 1247101983Srwatsonpipe_poll(fp, events, active_cred, td) 124813675Sdyson struct file *fp; 124929356Speter int events; 1250101983Srwatson struct ucred *active_cred; 125183366Sjulian struct thread *td; 125213675Sdyson{ 1253109153Sdillon struct pipe *rpipe = fp->f_data; 125413675Sdyson struct pipe *wpipe; 125529356Speter int revents = 0; 1256101768Srwatson#ifdef MAC 1257101768Srwatson int error; 1258101768Srwatson#endif 125913675Sdyson 126013675Sdyson wpipe = rpipe->pipe_peer; 126191362Salfred PIPE_LOCK(rpipe); 1262101768Srwatson#ifdef MAC 1263102115Srwatson error = mac_check_pipe_poll(active_cred, rpipe); 1264101768Srwatson if (error) 1265101768Srwatson goto locked_error; 1266101768Srwatson#endif 126729356Speter if (events & (POLLIN | POLLRDNORM)) 126829356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 126929356Speter (rpipe->pipe_buffer.cnt > 0) || 127029356Speter (rpipe->pipe_state & PIPE_EOF)) 127129356Speter revents |= events & (POLLIN | POLLRDNORM); 127213675Sdyson 127329356Speter if (events & (POLLOUT | POLLWRNORM)) 127429356Speter if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 127543311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 127643311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 127729356Speter revents |= events & (POLLOUT | POLLWRNORM); 127813675Sdyson 127929356Speter if ((rpipe->pipe_state & PIPE_EOF) || 128029356Speter (wpipe == NULL) || 128129356Speter (wpipe->pipe_state & PIPE_EOF)) 128229356Speter revents |= POLLHUP; 128329356Speter 128429356Speter if (revents == 0) { 128529356Speter if (events & (POLLIN | POLLRDNORM)) { 128683805Sjhb selrecord(td, &rpipe->pipe_sel); 128729356Speter rpipe->pipe_state |= PIPE_SEL; 128813675Sdyson } 128913675Sdyson 129029356Speter if (events & (POLLOUT | POLLWRNORM)) { 129183805Sjhb selrecord(td, &wpipe->pipe_sel); 129230164Speter wpipe->pipe_state |= PIPE_SEL; 129313907Sdyson } 129413675Sdyson } 1295101768Srwatson#ifdef MAC 1296101768Srwatsonlocked_error: 1297101768Srwatson#endif 129891362Salfred PIPE_UNLOCK(rpipe); 129929356Speter 130029356Speter return (revents); 130113675Sdyson} 130213675Sdyson 130398989Salfred/* 130498989Salfred * We shouldn't need locks here as we're doing a read and this should 130598989Salfred * be a natural race. 130698989Salfred */ 130752983Speterstatic int 1308101983Srwatsonpipe_stat(fp, ub, active_cred, td) 130952983Speter struct file *fp; 131052983Speter struct stat *ub; 1311101983Srwatson struct ucred *active_cred; 131283366Sjulian struct thread *td; 131313675Sdyson{ 1314109153Sdillon struct pipe *pipe = fp->f_data; 1315101768Srwatson#ifdef MAC 1316101768Srwatson int error; 131752983Speter 1318104269Srwatson PIPE_LOCK(pipe); 1319102115Srwatson error = mac_check_pipe_stat(active_cred, pipe); 1320104269Srwatson PIPE_UNLOCK(pipe); 1321101768Srwatson if (error) 1322101768Srwatson return (error); 1323101768Srwatson#endif 1324100527Salfred bzero(ub, sizeof(*ub)); 132517124Sbde ub->st_mode = S_IFIFO; 132613907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 132713675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 132813675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 132934901Sphk ub->st_atimespec = pipe->pipe_atime; 133034901Sphk ub->st_mtimespec = pipe->pipe_mtime; 133134901Sphk ub->st_ctimespec = pipe->pipe_ctime; 133260404Schris ub->st_uid = fp->f_cred->cr_uid; 133360404Schris ub->st_gid = fp->f_cred->cr_gid; 133417124Sbde /* 133560404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 133617124Sbde * XXX (st_dev, st_ino) should be unique. 133717124Sbde */ 133876760Salfred return (0); 133913675Sdyson} 134013675Sdyson 134113675Sdyson/* ARGSUSED */ 134213675Sdysonstatic int 134383366Sjulianpipe_close(fp, td) 134413675Sdyson struct file *fp; 134583366Sjulian struct thread *td; 134613675Sdyson{ 1347109153Sdillon struct pipe *cpipe = fp->f_data; 134816322Sgpalmer 134949413Sgreen fp->f_ops = &badfileops; 1350109153Sdillon fp->f_data = NULL; 135196122Salfred funsetown(&cpipe->pipe_sigio); 135213675Sdyson pipeclose(cpipe); 135376760Salfred return (0); 135413675Sdyson} 135513675Sdyson 135676364Salfredstatic void 135776364Salfredpipe_free_kmem(cpipe) 135876364Salfred struct pipe *cpipe; 135976364Salfred{ 136091412Salfred 136191412Salfred KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 136291412Salfred ("pipespace: pipe mutex locked")); 136376364Salfred 136476364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 136576364Salfred if (cpipe->pipe_buffer.size > PIPE_SIZE) 1366117364Ssilby atomic_subtract_int(&nbigpipe, 1); 1367110816Salc atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size); 1368117325Ssilby atomic_subtract_int(&amountpipes, 1); 1369118764Ssilby vm_map_remove(pipe_map, 1370118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer, 1371118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 137276364Salfred cpipe->pipe_buffer.buffer = NULL; 137376364Salfred } 137476364Salfred#ifndef PIPE_NODIRECT 1375102241Sarchie if (cpipe->pipe_map.kva != 0) { 1376118764Ssilby atomic_subtract_int(&amountpipekvawired, 1377110816Salc cpipe->pipe_buffer.size + PAGE_SIZE); 137876364Salfred kmem_free(kernel_map, 137976364Salfred cpipe->pipe_map.kva, 138076364Salfred cpipe->pipe_buffer.size + PAGE_SIZE); 138176364Salfred cpipe->pipe_map.cnt = 0; 138276364Salfred cpipe->pipe_map.kva = 0; 138376364Salfred cpipe->pipe_map.pos = 0; 138476364Salfred cpipe->pipe_map.npages = 0; 138576364Salfred } 138676364Salfred#endif 138776364Salfred} 138876364Salfred 138913675Sdyson/* 139013675Sdyson * shutdown the pipe 139113675Sdyson */ 139213675Sdysonstatic void 139313675Sdysonpipeclose(cpipe) 139413675Sdyson struct pipe *cpipe; 139513675Sdyson{ 139613907Sdyson struct pipe *ppipe; 139791968Salfred int hadpeer; 139876364Salfred 139991968Salfred if (cpipe == NULL) 140091968Salfred return; 140191968Salfred 140291968Salfred hadpeer = 0; 140391968Salfred 140491968Salfred /* partially created pipes won't have a valid mutex. */ 140591968Salfred if (PIPE_MTX(cpipe) != NULL) 140691362Salfred PIPE_LOCK(cpipe); 140713907Sdyson 140891968Salfred pipeselwakeup(cpipe); 140913907Sdyson 141091968Salfred /* 141191968Salfred * If the other side is blocked, wake it up saying that 141291968Salfred * we want to close it down. 141391968Salfred */ 141491968Salfred while (cpipe->pipe_busy) { 141591968Salfred wakeup(cpipe); 141691968Salfred cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 141791968Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 141891968Salfred } 141913675Sdyson 1420101768Srwatson#ifdef MAC 1421101768Srwatson if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) 1422101768Srwatson mac_destroy_pipe(cpipe); 1423101768Srwatson#endif 1424101768Srwatson 142591968Salfred /* 142691968Salfred * Disconnect from peer 142791968Salfred */ 142891968Salfred if ((ppipe = cpipe->pipe_peer) != NULL) { 142991968Salfred hadpeer++; 143091968Salfred pipeselwakeup(ppipe); 143113907Sdyson 143291968Salfred ppipe->pipe_state |= PIPE_EOF; 143391968Salfred wakeup(ppipe); 143491968Salfred KNOTE(&ppipe->pipe_sel.si_note, 0); 143591968Salfred ppipe->pipe_peer = NULL; 143691968Salfred } 143791968Salfred /* 143891968Salfred * free resources 143991968Salfred */ 144091968Salfred if (PIPE_MTX(cpipe) != NULL) { 144191968Salfred PIPE_UNLOCK(cpipe); 144291968Salfred if (!hadpeer) { 144391968Salfred mtx_destroy(PIPE_MTX(cpipe)); 144491968Salfred free(PIPE_MTX(cpipe), M_TEMP); 144513675Sdyson } 144613675Sdyson } 144791968Salfred pipe_free_kmem(cpipe); 144892751Sjeff uma_zfree(pipe_zone, cpipe); 144913675Sdyson} 145059288Sjlemon 145172521Sjlemon/*ARGSUSED*/ 145259288Sjlemonstatic int 145372521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 145459288Sjlemon{ 145589306Salfred struct pipe *cpipe; 145659288Sjlemon 1457109153Sdillon cpipe = kn->kn_fp->f_data; 145872521Sjlemon switch (kn->kn_filter) { 145972521Sjlemon case EVFILT_READ: 146072521Sjlemon kn->kn_fop = &pipe_rfiltops; 146172521Sjlemon break; 146272521Sjlemon case EVFILT_WRITE: 146372521Sjlemon kn->kn_fop = &pipe_wfiltops; 146478292Sjlemon cpipe = cpipe->pipe_peer; 1465101382Sdes if (cpipe == NULL) 1466101382Sdes /* other end of pipe has been closed */ 1467118929Sjmg return (EPIPE); 146872521Sjlemon break; 146972521Sjlemon default: 147072521Sjlemon return (1); 147172521Sjlemon } 147278292Sjlemon 147391372Salfred PIPE_LOCK(cpipe); 147478292Sjlemon SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 147591372Salfred PIPE_UNLOCK(cpipe); 147659288Sjlemon return (0); 147759288Sjlemon} 147859288Sjlemon 147959288Sjlemonstatic void 148059288Sjlemonfilt_pipedetach(struct knote *kn) 148159288Sjlemon{ 1482121018Sjmg struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 148359288Sjlemon 1484121018Sjmg if (kn->kn_filter == EVFILT_WRITE) { 1485121018Sjmg if (cpipe->pipe_peer == NULL) 1486121018Sjmg return; 1487121018Sjmg cpipe = cpipe->pipe_peer; 1488121018Sjmg } 1489121018Sjmg 149091372Salfred PIPE_LOCK(cpipe); 149178292Sjlemon SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 149291372Salfred PIPE_UNLOCK(cpipe); 149359288Sjlemon} 149459288Sjlemon 149559288Sjlemon/*ARGSUSED*/ 149659288Sjlemonstatic int 149759288Sjlemonfilt_piperead(struct knote *kn, long hint) 149859288Sjlemon{ 1499109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 150059288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 150159288Sjlemon 150291372Salfred PIPE_LOCK(rpipe); 150359288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 150459288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 150559288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 150659288Sjlemon 150759288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 150859288Sjlemon (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 150991372Salfred kn->kn_flags |= EV_EOF; 151091372Salfred PIPE_UNLOCK(rpipe); 151159288Sjlemon return (1); 151259288Sjlemon } 151391372Salfred PIPE_UNLOCK(rpipe); 151459288Sjlemon return (kn->kn_data > 0); 151559288Sjlemon} 151659288Sjlemon 151759288Sjlemon/*ARGSUSED*/ 151859288Sjlemonstatic int 151959288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 152059288Sjlemon{ 1521109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 152259288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 152359288Sjlemon 152491372Salfred PIPE_LOCK(rpipe); 152559288Sjlemon if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 152659288Sjlemon kn->kn_data = 0; 152759288Sjlemon kn->kn_flags |= EV_EOF; 152891372Salfred PIPE_UNLOCK(rpipe); 152959288Sjlemon return (1); 153059288Sjlemon } 153159288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 153265855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 153359288Sjlemon kn->kn_data = 0; 153459288Sjlemon 153591372Salfred PIPE_UNLOCK(rpipe); 153659288Sjlemon return (kn->kn_data >= PIPE_BUF); 153759288Sjlemon} 1538