sys_pipe.c revision 83366
125605Skjc/* 225605Skjc * Copyright (c) 1996 John S. Dyson 325605Skjc * All rights reserved. 425605Skjc * 525605Skjc * Redistribution and use in source and binary forms, with or without 625605Skjc * modification, are permitted provided that the following conditions 725605Skjc * are met: 825605Skjc * 1. Redistributions of source code must retain the above copyright 925605Skjc * notice immediately at the beginning of the file, without modification, 1025605Skjc * this list of conditions, and the following disclaimer. 1125605Skjc * 2. Redistributions in binary form must reproduce the above copyright 1225605Skjc * notice, this list of conditions and the following disclaimer in the 1325605Skjc * documentation and/or other materials provided with the distribution. 1425605Skjc * 3. Absolutely no warranty of function or purpose is made by the author 1525605Skjc * John S. Dyson. 1625605Skjc * 4. Modifications may be freely made to this file if the above conditions 1725605Skjc * are met. 1825605Skjc * 1925605Skjc * $FreeBSD: head/sys/kern/sys_pipe.c 83366 2001-09-12 08:38:13Z julian $ 2025605Skjc */ 2125605Skjc 2225605Skjc/* 2325605Skjc * This file contains a high-performance replacement for the socket-based 2425605Skjc * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2525605Skjc * all features of sockets, but does do everything that pipes normally 2625605Skjc * do. 2725605Skjc */ 2825605Skjc 2925605Skjc/* 3025605Skjc * This code has two modes of operation, a small write mode and a large 3125605Skjc * write mode. The small write mode acts like conventional pipes with 3225605Skjc * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3325605Skjc * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3425605Skjc * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3525605Skjc * the receiving process can copy it directly from the pages in the sending 3625605Skjc * process. 3725605Skjc * 38116189Sobrien * If the sending process receives a signal, it is possible that it will 39116189Sobrien * go away, and certainly its address space can change, because control 40116189Sobrien * is returned back to the user-mode side. In that case, the pipe code 4125605Skjc * arranges to copy the buffer supplied by the user process, to a pageable 4295759Stanimura * kernel buffer, and the receiving process will grab the data from the 4325605Skjc * pageable kernel buffer. Since signals don't happen all that often, 4495759Stanimura * the copy operation is normally eliminated. 4529024Sbde * 4625605Skjc * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4795759Stanimura * happen for small transfers so that the system will not spend all of 4895759Stanimura * its time context switching. PIPE_SIZE is constrained by the 4925605Skjc * amount of kernel virtual memory. 5025605Skjc */ 5195759Stanimura 5295759Stanimura#include <sys/param.h> 5395759Stanimura#include <sys/systm.h> 54118541Sharti#include <sys/fcntl.h> 5525605Skjc#include <sys/file.h> 5625605Skjc#include <sys/filedesc.h> 5725605Skjc#include <sys/filio.h> 5825605Skjc#include <sys/lock.h> 5925605Skjc#include <sys/mutex.h> 6025605Skjc#include <sys/ttycom.h> 6125605Skjc#include <sys/stat.h> 6225605Skjc#include <sys/poll.h> 6325605Skjc#include <sys/selinfo.h> 6433181Seivind#include <sys/signalvar.h> 6533181Seivind#include <sys/sysproto.h> 6625605Skjc#include <sys/pipe.h> 6733181Seivind#include <sys/proc.h> 6833181Seivind#include <sys/vnode.h> 6925605Skjc#include <sys/uio.h> 7025605Skjc#include <sys/event.h> 7125605Skjc 7225605Skjc#include <vm/vm.h> 7325605Skjc#include <vm/vm_param.h> 7425605Skjc#include <vm/vm_object.h> 7525605Skjc#include <vm/vm_kern.h> 7625605Skjc#include <vm/vm_extern.h> 7792745Salfred#include <vm/pmap.h> 7892745Salfred#include <vm/vm_map.h> 7992745Salfred#include <vm/vm_page.h> 8092745Salfred#include <vm/vm_zone.h> 8192745Salfred 8292745Salfred/* 83118541Sharti * Use this define if you want to disable *fancy* VM things. Expect an 8492745Salfred * approx 30% decrease in transfer rate. This could be useful for 8592745Salfred * NetBSD or OpenBSD. 86118541Sharti */ 8792745Salfred/* #define PIPE_NODIRECT */ 8892745Salfred 8992745Salfred/* 9025605Skjc * interfaces to the outside world 9125605Skjc */ 9291458Speterstatic int pipe_read __P((struct file *fp, struct uio *uio, 9325605Skjc struct ucred *cred, int flags, struct thread *td)); 9425605Skjcstatic int pipe_write __P((struct file *fp, struct uio *uio, 9525605Skjc struct ucred *cred, int flags, struct thread *td)); 9625605Skjcstatic int pipe_close __P((struct file *fp, struct thread *td)); 9725605Skjcstatic int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 98118541Sharti struct thread *td)); 9925605Skjcstatic int pipe_kqfilter __P((struct file *fp, struct knote *kn)); 10025605Skjcstatic int pipe_stat __P((struct file *fp, struct stat *sb, struct thread *td)); 10125605Skjcstatic int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct thread *td)); 10225605Skjc 10325605Skjcstatic struct fileops pipeops = { 10425605Skjc pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 10525605Skjc pipe_stat, pipe_close 10625605Skjc}; 10725605Skjc 10825605Skjcstatic void filt_pipedetach(struct knote *kn); 10925605Skjcstatic int filt_piperead(struct knote *kn, long hint); 11025605Skjcstatic int filt_pipewrite(struct knote *kn, long hint); 11125605Skjc 11225605Skjcstatic struct filterops pipe_rfiltops = 11325605Skjc { 1, NULL, filt_pipedetach, filt_piperead }; 114111119Simpstatic struct filterops pipe_wfiltops = 11525605Skjc { 1, NULL, filt_pipedetach, filt_pipewrite }; 11625605Skjc 11725605Skjc 11825605Skjc/* 11925605Skjc * Default pipe buffer size(s), this can be kind-of large now because pipe 12025605Skjc * space is pageable. The pipe code will try to maintain locality of 12125605Skjc * reference for performance reasons, so small amounts of outstanding I/O 12225605Skjc * will not wipe the cache. 12325605Skjc */ 12425605Skjc#define MINPIPESIZE (PIPE_SIZE/3) 12525605Skjc#define MAXPIPESIZE (2*PIPE_SIZE/3) 12625605Skjc 12725605Skjc/* 128118541Sharti * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 12925605Skjc * is there so that on large systems, we don't exhaust it. 13025605Skjc */ 13125605Skjc#define MAXPIPEKVA (8*1024*1024) 13225605Skjc 13325605Skjc/* 13425605Skjc * Limit for direct transfers, we cannot, of course limit 13525605Skjc * the amount of kva for pipes in general though. 13625605Skjc */ 13725605Skjc#define LIMITPIPEKVA (16*1024*1024) 13825605Skjc 13986487Sdillon/* 14025605Skjc * Limit the number of "big" pipes 14125605Skjc */ 14225605Skjc#define LIMITBIGPIPES 32 14325605Skjcstatic int nbigpipe; 14425605Skjc 14525605Skjcstatic int amountpipekva; 14691458Speter 14725605Skjcstatic void pipeclose __P((struct pipe *cpipe)); 14825605Skjcstatic void pipe_free_kmem __P((struct pipe *cpipe)); 14925605Skjcstatic int pipe_create __P((struct pipe **cpipep)); 150118543Shartistatic __inline int pipelock __P((struct pipe *cpipe, int catch)); 15125605Skjcstatic __inline void pipeunlock __P((struct pipe *cpipe)); 15225605Skjcstatic __inline void pipeselwakeup __P((struct pipe *cpipe)); 15325605Skjc#ifndef PIPE_NODIRECT 15425605Skjcstatic int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 15525605Skjcstatic void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 156118541Shartistatic int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 15725605Skjcstatic void pipe_clone_write_buffer __P((struct pipe *wpipe)); 15825605Skjc#endif 15925605Skjcstatic int pipespace __P((struct pipe *cpipe, int size)); 16025605Skjc 16125605Skjcstatic vm_zone_t pipe_zone; 16225605Skjc 16325605Skjc/* 16425605Skjc * The pipe system call for the DTYPE_PIPE type of pipes 16528270Swollman */ 16625605Skjc 16725605Skjc/* ARGSUSED */ 16825605Skjcint 16925605Skjcpipe(td, uap) 17025605Skjc struct thread *td; 17125605Skjc struct pipe_args /* { 17225605Skjc int dummy; 17325605Skjc } */ *uap; 17425605Skjc{ 17525605Skjc struct filedesc *fdp = td->td_proc->p_fd; 176118541Sharti struct file *rf, *wf; 177118541Sharti struct pipe *rpipe, *wpipe; 17825605Skjc int fd, error; 17925605Skjc 18025605Skjc if (pipe_zone == NULL) 18125605Skjc pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4); 18225605Skjc 18325605Skjc rpipe = wpipe = NULL; 18425605Skjc if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 18525605Skjc pipeclose(rpipe); 18625605Skjc pipeclose(wpipe); 18725605Skjc return (ENFILE); 18825605Skjc } 18925605Skjc 19025605Skjc rpipe->pipe_state |= PIPE_DIRECTOK; 19125605Skjc wpipe->pipe_state |= PIPE_DIRECTOK; 19225605Skjc 19325605Skjc error = falloc(td, &rf, &fd); 19425605Skjc if (error) { 19525605Skjc pipeclose(rpipe); 19625605Skjc pipeclose(wpipe); 19725605Skjc return (error); 19825605Skjc } 19925605Skjc fhold(rf); 20025605Skjc td->td_retval[0] = fd; 201118543Sharti 20225605Skjc /* 203118543Sharti * Warning: once we've gotten past allocation of the fd for the 204118543Sharti * read-side, we can only drop the read side via fdrop() in order 205118543Sharti * to avoid races against processes which manage to dup() the read 206118543Sharti * side while we are blocked trying to allocate the write side. 207118543Sharti */ 208118543Sharti rf->f_flag = FREAD | FWRITE; 209118543Sharti rf->f_type = DTYPE_PIPE; 210118543Sharti rf->f_data = (caddr_t)rpipe; 211118543Sharti rf->f_ops = &pipeops; 21225605Skjc error = falloc(td, &wf, &fd); 21325605Skjc if (error) { 214118543Sharti if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 21525605Skjc fdp->fd_ofiles[td->td_retval[0]] = NULL; 21625605Skjc fdrop(rf, td); 21725605Skjc } 21825605Skjc fdrop(rf, td); 21925605Skjc /* rpipe has been closed by fdrop(). */ 22025605Skjc pipeclose(wpipe); 22125605Skjc return (error); 22225605Skjc } 22325605Skjc wf->f_flag = FREAD | FWRITE; 22425605Skjc wf->f_type = DTYPE_PIPE; 22525605Skjc wf->f_data = (caddr_t)wpipe; 22625605Skjc wf->f_ops = &pipeops; 22725605Skjc td->td_retval[1] = fd; 22825605Skjc 22925605Skjc rpipe->pipe_peer = wpipe; 23025605Skjc wpipe->pipe_peer = rpipe; 23125605Skjc fdrop(rf, td); 23225605Skjc 233118543Sharti return (0); 23425605Skjc} 23525605Skjc 23625605Skjc/* 23725605Skjc * Allocate kva for pipe circular buffer, the space is pageable 238118541Sharti * This routine will 'realloc' the size of a pipe safely, if it fails 23925605Skjc * it will retain the old buffer. 24025605Skjc * If it fails it will return ENOMEM. 24125605Skjc */ 24225605Skjcstatic int 24325605Skjcpipespace(cpipe, size) 24425605Skjc struct pipe *cpipe; 24525605Skjc int size; 24625605Skjc{ 24725605Skjc struct vm_object *object; 24825605Skjc caddr_t buffer; 24925605Skjc int npages, error; 25025605Skjc 25125605Skjc GIANT_REQUIRED; 25225605Skjc 25325605Skjc npages = round_page(size)/PAGE_SIZE; 254118543Sharti /* 255118543Sharti * Create an object, I don't like the idea of paging to/from 25625605Skjc * kernel_object. 25725605Skjc * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 258118543Sharti */ 25925605Skjc object = vm_object_allocate(OBJT_DEFAULT, npages); 26025605Skjc buffer = (caddr_t) vm_map_min(kernel_map); 26125605Skjc 26225605Skjc /* 26325605Skjc * Insert the object into the kernel map, and allocate kva for it. 26425605Skjc * The map entry is, by default, pageable. 26525605Skjc * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 26625605Skjc */ 26725605Skjc error = vm_map_find(kernel_map, object, 0, 26825605Skjc (vm_offset_t *) &buffer, size, 1, 26925605Skjc VM_PROT_ALL, VM_PROT_ALL, 0); 27025605Skjc 27125605Skjc if (error != KERN_SUCCESS) { 27225605Skjc vm_object_deallocate(object); 273118541Sharti return (ENOMEM); 27425605Skjc } 27525605Skjc 27625605Skjc /* free old resources if we're resizing */ 27728270Swollman pipe_free_kmem(cpipe); 278118541Sharti cpipe->pipe_buffer.object = object; 27925605Skjc cpipe->pipe_buffer.buffer = buffer; 28025605Skjc cpipe->pipe_buffer.size = size; 28125605Skjc cpipe->pipe_buffer.in = 0; 28225605Skjc cpipe->pipe_buffer.out = 0; 28325605Skjc cpipe->pipe_buffer.cnt = 0; 28425605Skjc amountpipekva += cpipe->pipe_buffer.size; 28525605Skjc return (0); 286118541Sharti} 28725605Skjc 28825605Skjc/* 28925605Skjc * initialize and allocate VM and memory for pipe 29025605Skjc */ 29125605Skjcstatic int 29225605Skjcpipe_create(cpipep) 29325605Skjc struct pipe **cpipep; 29425605Skjc{ 29525605Skjc struct pipe *cpipe; 29625605Skjc int error; 29725605Skjc 29825605Skjc *cpipep = zalloc(pipe_zone); 29925605Skjc if (*cpipep == NULL) 30025605Skjc return (ENOMEM); 30125605Skjc 302111119Simp cpipe = *cpipep; 30325605Skjc 30425605Skjc /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ 30525605Skjc cpipe->pipe_buffer.object = NULL; 30625605Skjc#ifndef PIPE_NODIRECT 30725605Skjc cpipe->pipe_map.kva = NULL; 30825605Skjc#endif 30925605Skjc /* 31025605Skjc * protect so pipeclose() doesn't follow a junk pointer 31125605Skjc * if pipespace() fails. 31225605Skjc */ 31325605Skjc bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 31425605Skjc cpipe->pipe_state = 0; 31525605Skjc cpipe->pipe_peer = NULL; 31625605Skjc cpipe->pipe_busy = 0; 31725605Skjc 31825605Skjc#ifndef PIPE_NODIRECT 31925605Skjc /* 32028270Swollman * pipe data structure initializations to support direct pipe I/O 32125605Skjc */ 32225605Skjc cpipe->pipe_map.cnt = 0; 32328270Swollman cpipe->pipe_map.kva = 0; 32425605Skjc cpipe->pipe_map.pos = 0; 32525605Skjc cpipe->pipe_map.npages = 0; 32625605Skjc /* cpipe->pipe_map.ms[] = invalid */ 327118541Sharti#endif 32825605Skjc 32925605Skjc error = pipespace(cpipe, PIPE_SIZE); 33025605Skjc if (error) 33125605Skjc return (error); 33225605Skjc 33328270Swollman vfs_timestamp(&cpipe->pipe_ctime); 33425605Skjc cpipe->pipe_atime = cpipe->pipe_ctime; 33528270Swollman cpipe->pipe_mtime = cpipe->pipe_ctime; 33625605Skjc 33741514Sarchie return (0); 33841514Sarchie} 33925605Skjc 34025605Skjc 34128270Swollman/* 34225605Skjc * lock a pipe for I/O, blocking other access 34325605Skjc */ 34425605Skjcstatic __inline int 34525605Skjcpipelock(cpipe, catch) 34625605Skjc struct pipe *cpipe; 34725605Skjc int catch; 34825605Skjc{ 34936735Sdfr int error; 350118541Sharti 35125605Skjc while (cpipe->pipe_state & PIPE_LOCK) { 35225605Skjc cpipe->pipe_state |= PIPE_LWANT; 35325605Skjc error = tsleep(cpipe, catch ? (PRIBIO | PCATCH) : PRIBIO, 35425605Skjc "pipelk", 0); 35525605Skjc if (error != 0) 356118541Sharti return (error); 35725605Skjc } 35825605Skjc cpipe->pipe_state |= PIPE_LOCK; 35925605Skjc return (0); 36025605Skjc} 36125605Skjc 362118547Sharti/* 363118547Sharti * unlock a pipe I/O lock 364118547Sharti */ 365118547Shartistatic __inline void 36625605Skjcpipeunlock(cpipe) 367118547Sharti struct pipe *cpipe; 36825605Skjc{ 36925605Skjc 37025605Skjc cpipe->pipe_state &= ~PIPE_LOCK; 37125605Skjc if (cpipe->pipe_state & PIPE_LWANT) { 37225605Skjc cpipe->pipe_state &= ~PIPE_LWANT; 37325605Skjc wakeup(cpipe); 37425605Skjc } 37525605Skjc} 37625605Skjc 377118541Shartistatic __inline void 37825605Skjcpipeselwakeup(cpipe) 37925605Skjc struct pipe *cpipe; 38025605Skjc{ 38191458Speter 38225605Skjc if (cpipe->pipe_state & PIPE_SEL) { 383118541Sharti cpipe->pipe_state &= ~PIPE_SEL; 38425605Skjc selwakeup(&cpipe->pipe_sel); 38525605Skjc } 38625605Skjc if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 38728270Swollman pgsigio(cpipe->pipe_sigio, SIGIO, 0); 38825605Skjc KNOTE(&cpipe->pipe_sel.si_note, 0); 389118541Sharti} 39025605Skjc 39125605Skjc/* ARGSUSED */ 39225605Skjcstatic int 39325605Skjcpipe_read(fp, uio, cred, flags, td) 39425605Skjc struct file *fp; 39525605Skjc struct uio *uio; 39625605Skjc struct ucred *cred; 39725605Skjc struct thread *td; 39825605Skjc int flags; 39929366Speter{ 40025605Skjc struct pipe *rpipe = (struct pipe *) fp->f_data; 40125605Skjc int error; 40225605Skjc int nread = 0; 40325605Skjc u_int size; 40425605Skjc 40525605Skjc ++rpipe->pipe_busy; 40625605Skjc error = pipelock(rpipe, 1); 40725605Skjc if (error) 40825605Skjc goto unlocked_error; 40925605Skjc 41025605Skjc while (uio->uio_resid) { 41125605Skjc /* 41225605Skjc * normal pipe buffer receive 41325605Skjc */ 41425605Skjc if (rpipe->pipe_buffer.cnt > 0) { 41525605Skjc size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 41625605Skjc if (size > rpipe->pipe_buffer.cnt) 41725605Skjc size = rpipe->pipe_buffer.cnt; 41825605Skjc if (size > (u_int) uio->uio_resid) 41925605Skjc size = (u_int) uio->uio_resid; 42025605Skjc 42125605Skjc error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 42225605Skjc size, uio); 42325605Skjc if (error) 42425605Skjc break; 42525605Skjc 42625605Skjc rpipe->pipe_buffer.out += size; 42725605Skjc if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 42825605Skjc rpipe->pipe_buffer.out = 0; 42925605Skjc 43025605Skjc rpipe->pipe_buffer.cnt -= size; 43125605Skjc 43225605Skjc /* 43325605Skjc * If there is no more to read in the pipe, reset 43425605Skjc * its pointers to the beginning. This improves 43525605Skjc * cache hit stats. 43625605Skjc */ 43725605Skjc if (rpipe->pipe_buffer.cnt == 0) { 43825605Skjc rpipe->pipe_buffer.in = 0; 43925605Skjc rpipe->pipe_buffer.out = 0; 44025605Skjc } 44125605Skjc nread += size; 44225605Skjc#ifndef PIPE_NODIRECT 44325605Skjc /* 44425605Skjc * Direct copy, bypassing a kernel buffer. 44525605Skjc */ 44625605Skjc } else if ((size = rpipe->pipe_map.cnt) && 44725605Skjc (rpipe->pipe_state & PIPE_DIRECTW)) { 44825605Skjc caddr_t va; 44925605Skjc if (size > (u_int) uio->uio_resid) 45025605Skjc size = (u_int) uio->uio_resid; 45125605Skjc 45225605Skjc va = (caddr_t) rpipe->pipe_map.kva + 45325605Skjc rpipe->pipe_map.pos; 454111119Simp error = uiomove(va, size, uio); 45525605Skjc if (error) 45625605Skjc break; 45725605Skjc nread += size; 45825605Skjc rpipe->pipe_map.pos += size; 45925605Skjc rpipe->pipe_map.cnt -= size; 46025605Skjc if (rpipe->pipe_map.cnt == 0) { 46125605Skjc rpipe->pipe_state &= ~PIPE_DIRECTW; 46225605Skjc wakeup(rpipe); 46325605Skjc } 46425605Skjc#endif 46525605Skjc } else { 46625605Skjc /* 46786487Sdillon * detect EOF condition 46825605Skjc * read returns 0 on EOF, no need to set error 46925605Skjc */ 47025605Skjc if (rpipe->pipe_state & PIPE_EOF) 47125605Skjc break; 47225605Skjc 47325605Skjc /* 47425605Skjc * If the "write-side" has been blocked, wake it up now. 47525605Skjc */ 47625605Skjc if (rpipe->pipe_state & PIPE_WANTW) { 47725605Skjc rpipe->pipe_state &= ~PIPE_WANTW; 47825605Skjc wakeup(rpipe); 47925605Skjc } 48025605Skjc 48125605Skjc /* 48225605Skjc * Break if some data was read. 48325605Skjc */ 48425605Skjc if (nread > 0) 48525605Skjc break; 48625605Skjc 48725605Skjc /* 48825605Skjc * Unlock the pipe buffer for our remaining processing. We 48925605Skjc * will either break out with an error or we will sleep and 49025605Skjc * relock to loop. 49125605Skjc */ 49225605Skjc pipeunlock(rpipe); 49325605Skjc 49425605Skjc /* 49525605Skjc * Handle non-blocking mode operation or 49625605Skjc * wait for more data. 49725605Skjc */ 49825605Skjc if (fp->f_flag & FNONBLOCK) { 49925605Skjc error = EAGAIN; 50025605Skjc } else { 50125605Skjc rpipe->pipe_state |= PIPE_WANTR; 50225605Skjc if ((error = tsleep(rpipe, PRIBIO | PCATCH, 50325605Skjc "piperd", 0)) == 0) 50425605Skjc error = pipelock(rpipe, 1); 50525605Skjc } 50625605Skjc if (error) 50725605Skjc goto unlocked_error; 50825605Skjc } 50925605Skjc } 51025605Skjc pipeunlock(rpipe); 51125605Skjc 51225605Skjc if (error == 0) 51325605Skjc vfs_timestamp(&rpipe->pipe_atime); 51425605Skjcunlocked_error: 51525605Skjc --rpipe->pipe_busy; 51625605Skjc 51725605Skjc /* 51825605Skjc * PIPE_WANT processing only makes sense if pipe_busy is 0. 51925605Skjc */ 52025605Skjc if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 52125605Skjc rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 52225605Skjc wakeup(rpipe); 52325605Skjc } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 52425605Skjc /* 52525605Skjc * Handle write blocking hysteresis. 52625605Skjc */ 52725605Skjc if (rpipe->pipe_state & PIPE_WANTW) { 52825605Skjc rpipe->pipe_state &= ~PIPE_WANTW; 52925605Skjc wakeup(rpipe); 53025605Skjc } 53125605Skjc } 53225605Skjc 53325605Skjc if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 53425605Skjc pipeselwakeup(rpipe); 53525605Skjc 53625605Skjc return (error); 53725605Skjc} 53825605Skjc 53925605Skjc#ifndef PIPE_NODIRECT 54025605Skjc/* 54125605Skjc * Map the sending processes' buffer into kernel space and wire it. 54225605Skjc * This is similar to a physical write operation. 54325605Skjc */ 54425605Skjcstatic int 54525605Skjcpipe_build_write_buffer(wpipe, uio) 54625605Skjc struct pipe *wpipe; 54725605Skjc struct uio *uio; 54825605Skjc{ 54925605Skjc u_int size; 55025605Skjc int i; 55125605Skjc vm_offset_t addr, endaddr, paddr; 55225605Skjc 55325605Skjc GIANT_REQUIRED; 55425605Skjc 55525605Skjc size = (u_int) uio->uio_iov->iov_len; 55625605Skjc if (size > wpipe->pipe_buffer.size) 55725605Skjc size = wpipe->pipe_buffer.size; 55825605Skjc 55925605Skjc endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 56025605Skjc addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 56125605Skjc for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 56225605Skjc vm_page_t m; 56325605Skjc 56425605Skjc if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 56525605Skjc (paddr = pmap_kextract(addr)) == 0) { 56625605Skjc int j; 56725605Skjc 56825605Skjc for (j = 0; j < i; j++) 56925605Skjc vm_page_unwire(wpipe->pipe_map.ms[j], 1); 57025605Skjc return (EFAULT); 57125605Skjc } 57225605Skjc 57325605Skjc m = PHYS_TO_VM_PAGE(paddr); 57425605Skjc vm_page_wire(m); 57525605Skjc wpipe->pipe_map.ms[i] = m; 57625605Skjc } 57725605Skjc 57825605Skjc/* 57925605Skjc * set up the control block 58025605Skjc */ 58125605Skjc wpipe->pipe_map.npages = i; 58225605Skjc wpipe->pipe_map.pos = 58325605Skjc ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 584111119Simp wpipe->pipe_map.cnt = size; 58525605Skjc 58625605Skjc/* 58725605Skjc * and map the buffer 58825605Skjc */ 58925605Skjc if (wpipe->pipe_map.kva == 0) { 59025605Skjc /* 59125605Skjc * We need to allocate space for an extra page because the 59225605Skjc * address range might (will) span pages at times. 59325605Skjc */ 59425605Skjc wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 59525605Skjc wpipe->pipe_buffer.size + PAGE_SIZE); 59625605Skjc amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 59725605Skjc } 59825605Skjc pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 59925605Skjc wpipe->pipe_map.npages); 60025605Skjc 60125605Skjc/* 60225605Skjc * and update the uio data 60325605Skjc */ 60425605Skjc 60525605Skjc uio->uio_iov->iov_len -= size; 60625605Skjc uio->uio_iov->iov_base += size; 60725605Skjc if (uio->uio_iov->iov_len == 0) 60825605Skjc uio->uio_iov++; 60925605Skjc uio->uio_resid -= size; 61041514Sarchie uio->uio_offset += size; 61141514Sarchie return (0); 61225605Skjc} 61325605Skjc 61425605Skjc/* 61525605Skjc * unmap and unwire the process buffer 61625605Skjc */ 61725605Skjcstatic void 61825605Skjcpipe_destroy_write_buffer(wpipe) 61925605Skjc struct pipe *wpipe; 62025605Skjc{ 62125605Skjc int i; 62225605Skjc 62325605Skjc GIANT_REQUIRED; 62425605Skjc 62525605Skjc if (wpipe->pipe_map.kva) { 62625605Skjc pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 62725605Skjc 62825605Skjc if (amountpipekva > MAXPIPEKVA) { 62925605Skjc vm_offset_t kva = wpipe->pipe_map.kva; 63025605Skjc wpipe->pipe_map.kva = 0; 63125605Skjc kmem_free(kernel_map, kva, 63225605Skjc wpipe->pipe_buffer.size + PAGE_SIZE); 63325605Skjc amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 63425605Skjc } 63525605Skjc } 63625605Skjc for (i = 0; i < wpipe->pipe_map.npages; i++) 63725605Skjc vm_page_unwire(wpipe->pipe_map.ms[i], 1); 63825605Skjc} 63925605Skjc 64025605Skjc/* 64125605Skjc * In the case of a signal, the writing process might go away. This 64225605Skjc * code copies the data into the circular buffer so that the source 64325605Skjc * pages can be freed without loss of data. 64425605Skjc */ 64525605Skjcstatic void 64625605Skjcpipe_clone_write_buffer(wpipe) 64725605Skjc struct pipe *wpipe; 64825605Skjc{ 64925605Skjc int size; 65025605Skjc int pos; 65125605Skjc 65225605Skjc size = wpipe->pipe_map.cnt; 65325605Skjc pos = wpipe->pipe_map.pos; 65425605Skjc bcopy((caddr_t) wpipe->pipe_map.kva + pos, 65525605Skjc (caddr_t) wpipe->pipe_buffer.buffer, size); 65625605Skjc 65725605Skjc wpipe->pipe_buffer.in = size; 65825605Skjc wpipe->pipe_buffer.out = 0; 65925605Skjc wpipe->pipe_buffer.cnt = size; 66025605Skjc wpipe->pipe_state &= ~PIPE_DIRECTW; 66125605Skjc 66225605Skjc pipe_destroy_write_buffer(wpipe); 66325605Skjc} 66425605Skjc 66525605Skjc/* 66625605Skjc * This implements the pipe buffer write mechanism. Note that only 66725605Skjc * a direct write OR a normal pipe write can be pending at any given time. 66825605Skjc * If there are any characters in the pipe buffer, the direct write will 66925605Skjc * be deferred until the receiving process grabs all of the bytes from 67025605Skjc * the pipe buffer. Then the direct mapping write is set-up. 67125605Skjc */ 67225605Skjcstatic int 67325605Skjcpipe_direct_write(wpipe, uio) 67425605Skjc struct pipe *wpipe; 67525605Skjc struct uio *uio; 67625605Skjc{ 67725605Skjc int error; 67825605Skjc 67925605Skjcretry: 68025605Skjc while (wpipe->pipe_state & PIPE_DIRECTW) { 68125605Skjc if (wpipe->pipe_state & PIPE_WANTR) { 682111888Sjlemon wpipe->pipe_state &= ~PIPE_WANTR; 68325605Skjc wakeup(wpipe); 684118541Sharti } 685118541Sharti wpipe->pipe_state |= PIPE_WANTW; 686118541Sharti error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0); 68725605Skjc if (error) 68825605Skjc goto error1; 689118541Sharti if (wpipe->pipe_state & PIPE_EOF) { 69025605Skjc error = EPIPE; 69125605Skjc goto error1; 692118541Sharti } 693118541Sharti } 69425605Skjc wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 695118541Sharti if (wpipe->pipe_buffer.cnt > 0) { 696118541Sharti if (wpipe->pipe_state & PIPE_WANTR) { 697118541Sharti wpipe->pipe_state &= ~PIPE_WANTR; 69825605Skjc wakeup(wpipe); 699118541Sharti } 700118541Sharti 701118541Sharti wpipe->pipe_state |= PIPE_WANTW; 702118541Sharti error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0); 703118541Sharti if (error) 704118541Sharti goto error1; 70525605Skjc if (wpipe->pipe_state & PIPE_EOF) { 706118541Sharti error = EPIPE; 707118541Sharti goto error1; 708118541Sharti } 709118541Sharti goto retry; 71025605Skjc } 71125605Skjc 712118541Sharti wpipe->pipe_state |= PIPE_DIRECTW; 71325605Skjc 71425605Skjc error = pipe_build_write_buffer(wpipe, uio); 715118541Sharti if (error) { 71625605Skjc wpipe->pipe_state &= ~PIPE_DIRECTW; 71725605Skjc goto error1; 71825605Skjc } 719118547Sharti 72025605Skjc error = 0; 721118541Sharti while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 722118541Sharti if (wpipe->pipe_state & PIPE_EOF) { 72325605Skjc pipelock(wpipe, 0); 724118541Sharti pipe_destroy_write_buffer(wpipe); 725118541Sharti pipeunlock(wpipe); 726118541Sharti pipeselwakeup(wpipe); 72725605Skjc error = EPIPE; 728118541Sharti goto error1; 729118541Sharti } 73025605Skjc if (wpipe->pipe_state & PIPE_WANTR) { 731118541Sharti wpipe->pipe_state &= ~PIPE_WANTR; 732118541Sharti wakeup(wpipe); 73325605Skjc } 73425605Skjc pipeselwakeup(wpipe); 73525605Skjc error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0); 73625605Skjc } 73725605Skjc 73825605Skjc pipelock(wpipe,0); 739118541Sharti if (wpipe->pipe_state & PIPE_DIRECTW) { 740118541Sharti /* 74125605Skjc * this bit of trickery substitutes a kernel buffer for 742118541Sharti * the process that might be going away. 743118541Sharti */ 74425605Skjc pipe_clone_write_buffer(wpipe); 74525605Skjc } else { 74625605Skjc pipe_destroy_write_buffer(wpipe); 74725605Skjc } 74825605Skjc pipeunlock(wpipe); 74925605Skjc return (error); 750118541Sharti 751118541Shartierror1: 75225605Skjc wakeup(wpipe); 753118541Sharti return (error); 754118541Sharti} 75525605Skjc#endif 756 757static int 758pipe_write(fp, uio, cred, flags, td) 759 struct file *fp; 760 struct uio *uio; 761 struct ucred *cred; 762 struct thread *td; 763 int flags; 764{ 765 int error = 0; 766 int orig_resid; 767 struct pipe *wpipe, *rpipe; 768 769 rpipe = (struct pipe *) fp->f_data; 770 wpipe = rpipe->pipe_peer; 771 772 /* 773 * detect loss of pipe read side, issue SIGPIPE if lost. 774 */ 775 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 776 return (EPIPE); 777 } 778 ++wpipe->pipe_busy; 779 780 /* 781 * If it is advantageous to resize the pipe buffer, do 782 * so. 783 */ 784 if ((uio->uio_resid > PIPE_SIZE) && 785 (nbigpipe < LIMITBIGPIPES) && 786 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 787 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 788 (wpipe->pipe_buffer.cnt == 0)) { 789 790 if ((error = pipelock(wpipe,1)) == 0) { 791 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 792 nbigpipe++; 793 pipeunlock(wpipe); 794 } 795 } 796 797 /* 798 * If an early error occured unbusy and return, waking up any pending 799 * readers. 800 */ 801 if (error) { 802 --wpipe->pipe_busy; 803 if ((wpipe->pipe_busy == 0) && 804 (wpipe->pipe_state & PIPE_WANT)) { 805 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 806 wakeup(wpipe); 807 } 808 return(error); 809 } 810 811 KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 812 813 orig_resid = uio->uio_resid; 814 815 while (uio->uio_resid) { 816 int space; 817 818#ifndef PIPE_NODIRECT 819 /* 820 * If the transfer is large, we can gain performance if 821 * we do process-to-process copies directly. 822 * If the write is non-blocking, we don't use the 823 * direct write mechanism. 824 * 825 * The direct write mechanism will detect the reader going 826 * away on us. 827 */ 828 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 829 (fp->f_flag & FNONBLOCK) == 0 && 830 (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 831 (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 832 error = pipe_direct_write( wpipe, uio); 833 if (error) 834 break; 835 continue; 836 } 837#endif 838 839 /* 840 * Pipe buffered writes cannot be coincidental with 841 * direct writes. We wait until the currently executing 842 * direct write is completed before we start filling the 843 * pipe buffer. We break out if a signal occurs or the 844 * reader goes away. 845 */ 846 retrywrite: 847 while (wpipe->pipe_state & PIPE_DIRECTW) { 848 if (wpipe->pipe_state & PIPE_WANTR) { 849 wpipe->pipe_state &= ~PIPE_WANTR; 850 wakeup(wpipe); 851 } 852 error = tsleep(wpipe, PRIBIO | PCATCH, "pipbww", 0); 853 if (wpipe->pipe_state & PIPE_EOF) 854 break; 855 if (error) 856 break; 857 } 858 if (wpipe->pipe_state & PIPE_EOF) { 859 error = EPIPE; 860 break; 861 } 862 863 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 864 865 /* Writes of size <= PIPE_BUF must be atomic. */ 866 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 867 space = 0; 868 869 if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 870 if ((error = pipelock(wpipe,1)) == 0) { 871 int size; /* Transfer size */ 872 int segsize; /* first segment to transfer */ 873 874 /* 875 * It is possible for a direct write to 876 * slip in on us... handle it here... 877 */ 878 if (wpipe->pipe_state & PIPE_DIRECTW) { 879 pipeunlock(wpipe); 880 goto retrywrite; 881 } 882 /* 883 * If a process blocked in uiomove, our 884 * value for space might be bad. 885 * 886 * XXX will we be ok if the reader has gone 887 * away here? 888 */ 889 if (space > wpipe->pipe_buffer.size - 890 wpipe->pipe_buffer.cnt) { 891 pipeunlock(wpipe); 892 goto retrywrite; 893 } 894 895 /* 896 * Transfer size is minimum of uio transfer 897 * and free space in pipe buffer. 898 */ 899 if (space > uio->uio_resid) 900 size = uio->uio_resid; 901 else 902 size = space; 903 /* 904 * First segment to transfer is minimum of 905 * transfer size and contiguous space in 906 * pipe buffer. If first segment to transfer 907 * is less than the transfer size, we've got 908 * a wraparound in the buffer. 909 */ 910 segsize = wpipe->pipe_buffer.size - 911 wpipe->pipe_buffer.in; 912 if (segsize > size) 913 segsize = size; 914 915 /* Transfer first segment */ 916 917 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 918 segsize, uio); 919 920 if (error == 0 && segsize < size) { 921 /* 922 * Transfer remaining part now, to 923 * support atomic writes. Wraparound 924 * happened. 925 */ 926 if (wpipe->pipe_buffer.in + segsize != 927 wpipe->pipe_buffer.size) 928 panic("Expected pipe buffer wraparound disappeared"); 929 930 error = uiomove(&wpipe->pipe_buffer.buffer[0], 931 size - segsize, uio); 932 } 933 if (error == 0) { 934 wpipe->pipe_buffer.in += size; 935 if (wpipe->pipe_buffer.in >= 936 wpipe->pipe_buffer.size) { 937 if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 938 panic("Expected wraparound bad"); 939 wpipe->pipe_buffer.in = size - segsize; 940 } 941 942 wpipe->pipe_buffer.cnt += size; 943 if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 944 panic("Pipe buffer overflow"); 945 946 } 947 pipeunlock(wpipe); 948 } 949 if (error) 950 break; 951 952 } else { 953 /* 954 * If the "read-side" has been blocked, wake it up now. 955 */ 956 if (wpipe->pipe_state & PIPE_WANTR) { 957 wpipe->pipe_state &= ~PIPE_WANTR; 958 wakeup(wpipe); 959 } 960 961 /* 962 * don't block on non-blocking I/O 963 */ 964 if (fp->f_flag & FNONBLOCK) { 965 error = EAGAIN; 966 break; 967 } 968 969 /* 970 * We have no more space and have something to offer, 971 * wake up select/poll. 972 */ 973 pipeselwakeup(wpipe); 974 975 wpipe->pipe_state |= PIPE_WANTW; 976 error = tsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0); 977 if (error != 0) 978 break; 979 /* 980 * If read side wants to go away, we just issue a signal 981 * to ourselves. 982 */ 983 if (wpipe->pipe_state & PIPE_EOF) { 984 error = EPIPE; 985 break; 986 } 987 } 988 } 989 990 --wpipe->pipe_busy; 991 992 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 993 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 994 wakeup(wpipe); 995 } else if (wpipe->pipe_buffer.cnt > 0) { 996 /* 997 * If we have put any characters in the buffer, we wake up 998 * the reader. 999 */ 1000 if (wpipe->pipe_state & PIPE_WANTR) { 1001 wpipe->pipe_state &= ~PIPE_WANTR; 1002 wakeup(wpipe); 1003 } 1004 } 1005 1006 /* 1007 * Don't return EPIPE if I/O was successful 1008 */ 1009 if ((wpipe->pipe_buffer.cnt == 0) && 1010 (uio->uio_resid == 0) && 1011 (error == EPIPE)) { 1012 error = 0; 1013 } 1014 1015 if (error == 0) 1016 vfs_timestamp(&wpipe->pipe_mtime); 1017 1018 /* 1019 * We have something to offer, 1020 * wake up select/poll. 1021 */ 1022 if (wpipe->pipe_buffer.cnt) 1023 pipeselwakeup(wpipe); 1024 1025 return (error); 1026} 1027 1028/* 1029 * we implement a very minimal set of ioctls for compatibility with sockets. 1030 */ 1031int 1032pipe_ioctl(fp, cmd, data, td) 1033 struct file *fp; 1034 u_long cmd; 1035 caddr_t data; 1036 struct thread *td; 1037{ 1038 struct pipe *mpipe = (struct pipe *)fp->f_data; 1039 1040 switch (cmd) { 1041 1042 case FIONBIO: 1043 return (0); 1044 1045 case FIOASYNC: 1046 if (*(int *)data) { 1047 mpipe->pipe_state |= PIPE_ASYNC; 1048 } else { 1049 mpipe->pipe_state &= ~PIPE_ASYNC; 1050 } 1051 return (0); 1052 1053 case FIONREAD: 1054 if (mpipe->pipe_state & PIPE_DIRECTW) 1055 *(int *)data = mpipe->pipe_map.cnt; 1056 else 1057 *(int *)data = mpipe->pipe_buffer.cnt; 1058 return (0); 1059 1060 case FIOSETOWN: 1061 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1062 1063 case FIOGETOWN: 1064 *(int *)data = fgetown(mpipe->pipe_sigio); 1065 return (0); 1066 1067 /* This is deprecated, FIOSETOWN should be used instead. */ 1068 case TIOCSPGRP: 1069 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1070 1071 /* This is deprecated, FIOGETOWN should be used instead. */ 1072 case TIOCGPGRP: 1073 *(int *)data = -fgetown(mpipe->pipe_sigio); 1074 return (0); 1075 1076 } 1077 return (ENOTTY); 1078} 1079 1080int 1081pipe_poll(fp, events, cred, td) 1082 struct file *fp; 1083 int events; 1084 struct ucred *cred; 1085 struct thread *td; 1086{ 1087 struct pipe *rpipe = (struct pipe *)fp->f_data; 1088 struct pipe *wpipe; 1089 int revents = 0; 1090 1091 wpipe = rpipe->pipe_peer; 1092 if (events & (POLLIN | POLLRDNORM)) 1093 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1094 (rpipe->pipe_buffer.cnt > 0) || 1095 (rpipe->pipe_state & PIPE_EOF)) 1096 revents |= events & (POLLIN | POLLRDNORM); 1097 1098 if (events & (POLLOUT | POLLWRNORM)) 1099 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 1100 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1101 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1102 revents |= events & (POLLOUT | POLLWRNORM); 1103 1104 if ((rpipe->pipe_state & PIPE_EOF) || 1105 (wpipe == NULL) || 1106 (wpipe->pipe_state & PIPE_EOF)) 1107 revents |= POLLHUP; 1108 1109 if (revents == 0) { 1110 if (events & (POLLIN | POLLRDNORM)) { 1111 selrecord(curthread, &rpipe->pipe_sel); 1112 rpipe->pipe_state |= PIPE_SEL; 1113 } 1114 1115 if (events & (POLLOUT | POLLWRNORM)) { 1116 selrecord(curthread, &wpipe->pipe_sel); 1117 wpipe->pipe_state |= PIPE_SEL; 1118 } 1119 } 1120 1121 return (revents); 1122} 1123 1124static int 1125pipe_stat(fp, ub, td) 1126 struct file *fp; 1127 struct stat *ub; 1128 struct thread *td; 1129{ 1130 struct pipe *pipe = (struct pipe *)fp->f_data; 1131 1132 bzero((caddr_t)ub, sizeof(*ub)); 1133 ub->st_mode = S_IFIFO; 1134 ub->st_blksize = pipe->pipe_buffer.size; 1135 ub->st_size = pipe->pipe_buffer.cnt; 1136 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1137 ub->st_atimespec = pipe->pipe_atime; 1138 ub->st_mtimespec = pipe->pipe_mtime; 1139 ub->st_ctimespec = pipe->pipe_ctime; 1140 ub->st_uid = fp->f_cred->cr_uid; 1141 ub->st_gid = fp->f_cred->cr_gid; 1142 /* 1143 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1144 * XXX (st_dev, st_ino) should be unique. 1145 */ 1146 return (0); 1147} 1148 1149/* ARGSUSED */ 1150static int 1151pipe_close(fp, td) 1152 struct file *fp; 1153 struct thread *td; 1154{ 1155 struct pipe *cpipe = (struct pipe *)fp->f_data; 1156 1157 fp->f_ops = &badfileops; 1158 fp->f_data = NULL; 1159 funsetown(cpipe->pipe_sigio); 1160 pipeclose(cpipe); 1161 return (0); 1162} 1163 1164static void 1165pipe_free_kmem(cpipe) 1166 struct pipe *cpipe; 1167{ 1168 GIANT_REQUIRED; 1169 1170 if (cpipe->pipe_buffer.buffer != NULL) { 1171 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1172 --nbigpipe; 1173 amountpipekva -= cpipe->pipe_buffer.size; 1174 kmem_free(kernel_map, 1175 (vm_offset_t)cpipe->pipe_buffer.buffer, 1176 cpipe->pipe_buffer.size); 1177 cpipe->pipe_buffer.buffer = NULL; 1178 } 1179#ifndef PIPE_NODIRECT 1180 if (cpipe->pipe_map.kva != NULL) { 1181 amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 1182 kmem_free(kernel_map, 1183 cpipe->pipe_map.kva, 1184 cpipe->pipe_buffer.size + PAGE_SIZE); 1185 cpipe->pipe_map.cnt = 0; 1186 cpipe->pipe_map.kva = 0; 1187 cpipe->pipe_map.pos = 0; 1188 cpipe->pipe_map.npages = 0; 1189 } 1190#endif 1191} 1192 1193/* 1194 * shutdown the pipe 1195 */ 1196static void 1197pipeclose(cpipe) 1198 struct pipe *cpipe; 1199{ 1200 struct pipe *ppipe; 1201 1202 if (cpipe) { 1203 1204 pipeselwakeup(cpipe); 1205 1206 /* 1207 * If the other side is blocked, wake it up saying that 1208 * we want to close it down. 1209 */ 1210 while (cpipe->pipe_busy) { 1211 wakeup(cpipe); 1212 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 1213 tsleep(cpipe, PRIBIO, "pipecl", 0); 1214 } 1215 1216 /* 1217 * Disconnect from peer 1218 */ 1219 if ((ppipe = cpipe->pipe_peer) != NULL) { 1220 pipeselwakeup(ppipe); 1221 1222 ppipe->pipe_state |= PIPE_EOF; 1223 wakeup(ppipe); 1224 ppipe->pipe_peer = NULL; 1225 } 1226 /* 1227 * free resources 1228 */ 1229 pipe_free_kmem(cpipe); 1230 zfree(pipe_zone, cpipe); 1231 } 1232} 1233 1234/*ARGSUSED*/ 1235static int 1236pipe_kqfilter(struct file *fp, struct knote *kn) 1237{ 1238 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1239 1240 switch (kn->kn_filter) { 1241 case EVFILT_READ: 1242 kn->kn_fop = &pipe_rfiltops; 1243 break; 1244 case EVFILT_WRITE: 1245 kn->kn_fop = &pipe_wfiltops; 1246 cpipe = cpipe->pipe_peer; 1247 break; 1248 default: 1249 return (1); 1250 } 1251 kn->kn_hook = (caddr_t)cpipe; 1252 1253 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1254 return (0); 1255} 1256 1257static void 1258filt_pipedetach(struct knote *kn) 1259{ 1260 struct pipe *cpipe = (struct pipe *)kn->kn_hook; 1261 1262 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1263} 1264 1265/*ARGSUSED*/ 1266static int 1267filt_piperead(struct knote *kn, long hint) 1268{ 1269 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1270 struct pipe *wpipe = rpipe->pipe_peer; 1271 1272 kn->kn_data = rpipe->pipe_buffer.cnt; 1273 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1274 kn->kn_data = rpipe->pipe_map.cnt; 1275 1276 if ((rpipe->pipe_state & PIPE_EOF) || 1277 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1278 kn->kn_flags |= EV_EOF; 1279 return (1); 1280 } 1281 return (kn->kn_data > 0); 1282} 1283 1284/*ARGSUSED*/ 1285static int 1286filt_pipewrite(struct knote *kn, long hint) 1287{ 1288 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1289 struct pipe *wpipe = rpipe->pipe_peer; 1290 1291 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1292 kn->kn_data = 0; 1293 kn->kn_flags |= EV_EOF; 1294 return (1); 1295 } 1296 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1297 if (wpipe->pipe_state & PIPE_DIRECTW) 1298 kn->kn_data = 0; 1299 1300 return (kn->kn_data >= PIPE_BUF); 1301} 1302