sys_pipe.c revision 43623
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson * 1943623Sdillon * $Id: sys_pipe.c,v 1.49 1999/01/28 00:57:47 dillon Exp $ 2013675Sdyson */ 2113675Sdyson 2213675Sdyson/* 2313675Sdyson * This file contains a high-performance replacement for the socket-based 2413675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2513675Sdyson * all features of sockets, but does do everything that pipes normally 2613675Sdyson * do. 2713675Sdyson */ 2813675Sdyson 2913907Sdyson/* 3013907Sdyson * This code has two modes of operation, a small write mode and a large 3113907Sdyson * write mode. The small write mode acts like conventional pipes with 3213907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3313907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3413907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3513907Sdyson * the receiving process can copy it directly from the pages in the sending 3613907Sdyson * process. 3713907Sdyson * 3813907Sdyson * If the sending process receives a signal, it is possible that it will 3913913Sdyson * go away, and certainly its address space can change, because control 4013907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4113907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4213907Sdyson * kernel buffer, and the receiving process will grab the data from the 4313907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4413907Sdyson * the copy operation is normally eliminated. 4513907Sdyson * 4613907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4713907Sdyson * happen for small transfers so that the system will not spend all of 4813913Sdyson * its time context switching. PIPE_SIZE is constrained by the 4913907Sdyson * amount of kernel virtual memory. 5013907Sdyson */ 5113907Sdyson 5213675Sdyson#include <sys/param.h> 5313675Sdyson#include <sys/systm.h> 5413675Sdyson#include <sys/proc.h> 5524131Sbde#include <sys/fcntl.h> 5613675Sdyson#include <sys/file.h> 5713675Sdyson#include <sys/filedesc.h> 5824206Sbde#include <sys/filio.h> 5924206Sbde#include <sys/ttycom.h> 6013675Sdyson#include <sys/stat.h> 6129356Speter#include <sys/poll.h> 6243278Sbde#include <sys/select.h> 6313675Sdyson#include <sys/signalvar.h> 6413675Sdyson#include <sys/sysproto.h> 6513675Sdyson#include <sys/pipe.h> 6634924Sbde#include <sys/uio.h> 6713675Sdyson 6813675Sdyson#include <vm/vm.h> 6913675Sdyson#include <vm/vm_prot.h> 7013675Sdyson#include <vm/vm_param.h> 7122521Sdyson#include <sys/lock.h> 7213675Sdyson#include <vm/vm_object.h> 7313675Sdyson#include <vm/vm_kern.h> 7413675Sdyson#include <vm/vm_extern.h> 7513675Sdyson#include <vm/pmap.h> 7613675Sdyson#include <vm/vm_map.h> 7713907Sdyson#include <vm/vm_page.h> 7827899Sdyson#include <vm/vm_zone.h> 7913675Sdyson 8014037Sdyson/* 8114037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 8214037Sdyson * approx 30% decrease in transfer rate. This could be useful for 8314037Sdyson * NetBSD or OpenBSD. 8414037Sdyson */ 8514037Sdyson/* #define PIPE_NODIRECT */ 8614037Sdyson 8714037Sdyson/* 8814037Sdyson * interfaces to the outside world 8914037Sdyson */ 9013675Sdysonstatic int pipe_read __P((struct file *fp, struct uio *uio, 9113675Sdyson struct ucred *cred)); 9213675Sdysonstatic int pipe_write __P((struct file *fp, struct uio *uio, 9313675Sdyson struct ucred *cred)); 9413675Sdysonstatic int pipe_close __P((struct file *fp, struct proc *p)); 9529356Speterstatic int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 9629356Speter struct proc *p)); 9736735Sdfrstatic int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 9813675Sdyson 9913675Sdysonstatic struct fileops pipeops = 10029356Speter { pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_close }; 10113675Sdyson 10213675Sdyson/* 10313675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 10413675Sdyson * space is pageable. The pipe code will try to maintain locality of 10513675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 10613675Sdyson * will not wipe the cache. 10713675Sdyson */ 10813907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 10913907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 11013675Sdyson 11113907Sdyson/* 11213907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 11313907Sdyson * is there so that on large systems, we don't exhaust it. 11413907Sdyson */ 11513907Sdyson#define MAXPIPEKVA (8*1024*1024) 11613907Sdyson 11713907Sdyson/* 11813907Sdyson * Limit for direct transfers, we cannot, of course limit 11913907Sdyson * the amount of kva for pipes in general though. 12013907Sdyson */ 12113907Sdyson#define LIMITPIPEKVA (16*1024*1024) 12217163Sdyson 12317163Sdyson/* 12417163Sdyson * Limit the number of "big" pipes 12517163Sdyson */ 12617163Sdyson#define LIMITBIGPIPES 32 12733181Seivindstatic int nbigpipe; 12817163Sdyson 12917124Sbdestatic int amountpipekva; 13013907Sdyson 13113675Sdysonstatic void pipeclose __P((struct pipe *cpipe)); 13213675Sdysonstatic void pipeinit __P((struct pipe *cpipe)); 13313907Sdysonstatic __inline int pipelock __P((struct pipe *cpipe, int catch)); 13413675Sdysonstatic __inline void pipeunlock __P((struct pipe *cpipe)); 13514122Speterstatic __inline void pipeselwakeup __P((struct pipe *cpipe)); 13614037Sdyson#ifndef PIPE_NODIRECT 13713907Sdysonstatic int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 13813907Sdysonstatic void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 13913907Sdysonstatic int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 14013907Sdysonstatic void pipe_clone_write_buffer __P((struct pipe *wpipe)); 14114037Sdyson#endif 14213907Sdysonstatic void pipespace __P((struct pipe *cpipe)); 14313675Sdyson 14433181Seivindstatic vm_zone_t pipe_zone; 14527899Sdyson 14613675Sdyson/* 14713675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 14813675Sdyson */ 14913675Sdyson 15013675Sdyson/* ARGSUSED */ 15113675Sdysonint 15230994Sphkpipe(p, uap) 15313675Sdyson struct proc *p; 15413675Sdyson struct pipe_args /* { 15513675Sdyson int dummy; 15613675Sdyson } */ *uap; 15713675Sdyson{ 15813675Sdyson register struct filedesc *fdp = p->p_fd; 15913675Sdyson struct file *rf, *wf; 16013675Sdyson struct pipe *rpipe, *wpipe; 16113675Sdyson int fd, error; 16213675Sdyson 16327899Sdyson if (pipe_zone == NULL) 16427923Sdyson pipe_zone = zinit("PIPE", sizeof (struct pipe), 0, 0, 4); 16527899Sdyson 16627899Sdyson rpipe = zalloc( pipe_zone); 16713675Sdyson pipeinit(rpipe); 16813907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 16927899Sdyson wpipe = zalloc( pipe_zone); 17013675Sdyson pipeinit(wpipe); 17113907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 17213675Sdyson 17313675Sdyson error = falloc(p, &rf, &fd); 17413675Sdyson if (error) 17513675Sdyson goto free2; 17630994Sphk p->p_retval[0] = fd; 17713675Sdyson rf->f_flag = FREAD | FWRITE; 17813675Sdyson rf->f_type = DTYPE_PIPE; 17913675Sdyson rf->f_ops = &pipeops; 18013675Sdyson rf->f_data = (caddr_t)rpipe; 18113675Sdyson error = falloc(p, &wf, &fd); 18213675Sdyson if (error) 18313675Sdyson goto free3; 18413675Sdyson wf->f_flag = FREAD | FWRITE; 18513675Sdyson wf->f_type = DTYPE_PIPE; 18613675Sdyson wf->f_ops = &pipeops; 18713675Sdyson wf->f_data = (caddr_t)wpipe; 18830994Sphk p->p_retval[1] = fd; 18913675Sdyson 19013675Sdyson rpipe->pipe_peer = wpipe; 19113675Sdyson wpipe->pipe_peer = rpipe; 19213675Sdyson 19313675Sdyson return (0); 19413675Sdysonfree3: 19513675Sdyson ffree(rf); 19630994Sphk fdp->fd_ofiles[p->p_retval[0]] = 0; 19713675Sdysonfree2: 19813675Sdyson (void)pipeclose(wpipe); 19913675Sdyson (void)pipeclose(rpipe); 20013675Sdyson return (error); 20113675Sdyson} 20213675Sdyson 20313909Sdyson/* 20413909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 20513909Sdyson */ 20613675Sdysonstatic void 20713907Sdysonpipespace(cpipe) 20813675Sdyson struct pipe *cpipe; 20913675Sdyson{ 21013688Sdyson int npages, error; 21113675Sdyson 21213907Sdyson npages = round_page(cpipe->pipe_buffer.size)/PAGE_SIZE; 21313675Sdyson /* 21413675Sdyson * Create an object, I don't like the idea of paging to/from 21513675Sdyson * kernel_object. 21614037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 21713675Sdyson */ 21813675Sdyson cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages); 21913688Sdyson cpipe->pipe_buffer.buffer = (caddr_t) vm_map_min(kernel_map); 22013675Sdyson 22113675Sdyson /* 22213675Sdyson * Insert the object into the kernel map, and allocate kva for it. 22313675Sdyson * The map entry is, by default, pageable. 22414037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 22513675Sdyson */ 22613688Sdyson error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0, 22713907Sdyson (vm_offset_t *) &cpipe->pipe_buffer.buffer, 22813907Sdyson cpipe->pipe_buffer.size, 1, 22913688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 23013675Sdyson 23113688Sdyson if (error != KERN_SUCCESS) 23213688Sdyson panic("pipeinit: cannot allocate pipe -- out of kvm -- code = %d", error); 23313907Sdyson amountpipekva += cpipe->pipe_buffer.size; 23413907Sdyson} 23513688Sdyson 23613907Sdyson/* 23713907Sdyson * initialize and allocate VM and memory for pipe 23813907Sdyson */ 23913907Sdysonstatic void 24013907Sdysonpipeinit(cpipe) 24113907Sdyson struct pipe *cpipe; 24213907Sdyson{ 24313907Sdyson 24413675Sdyson cpipe->pipe_buffer.in = 0; 24513675Sdyson cpipe->pipe_buffer.out = 0; 24613675Sdyson cpipe->pipe_buffer.cnt = 0; 24713907Sdyson cpipe->pipe_buffer.size = PIPE_SIZE; 24817163Sdyson 24913907Sdyson /* Buffer kva gets dynamically allocated */ 25013907Sdyson cpipe->pipe_buffer.buffer = NULL; 25117124Sbde /* cpipe->pipe_buffer.object = invalid */ 25213675Sdyson 25313675Sdyson cpipe->pipe_state = 0; 25413675Sdyson cpipe->pipe_peer = NULL; 25513675Sdyson cpipe->pipe_busy = 0; 25634901Sphk getnanotime(&cpipe->pipe_ctime); 25724101Sbde cpipe->pipe_atime = cpipe->pipe_ctime; 25824101Sbde cpipe->pipe_mtime = cpipe->pipe_ctime; 25913675Sdyson bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); 26013907Sdyson 26114037Sdyson#ifndef PIPE_NODIRECT 26213907Sdyson /* 26313907Sdyson * pipe data structure initializations to support direct pipe I/O 26413907Sdyson */ 26513907Sdyson cpipe->pipe_map.cnt = 0; 26613907Sdyson cpipe->pipe_map.kva = 0; 26713907Sdyson cpipe->pipe_map.pos = 0; 26813907Sdyson cpipe->pipe_map.npages = 0; 26917124Sbde /* cpipe->pipe_map.ms[] = invalid */ 27014037Sdyson#endif 27113675Sdyson} 27213675Sdyson 27313675Sdyson 27413675Sdyson/* 27513675Sdyson * lock a pipe for I/O, blocking other access 27613675Sdyson */ 27713675Sdysonstatic __inline int 27813907Sdysonpipelock(cpipe, catch) 27913675Sdyson struct pipe *cpipe; 28013907Sdyson int catch; 28113675Sdyson{ 28213776Sdyson int error; 28313675Sdyson while (cpipe->pipe_state & PIPE_LOCK) { 28413675Sdyson cpipe->pipe_state |= PIPE_LWANT; 28543301Sdillon if ((error = tsleep( cpipe, 28643301Sdillon catch?(PRIBIO|PCATCH):PRIBIO, "pipelk", 0)) != 0) { 28713776Sdyson return error; 28813675Sdyson } 28913675Sdyson } 29013675Sdyson cpipe->pipe_state |= PIPE_LOCK; 29113675Sdyson return 0; 29213675Sdyson} 29313675Sdyson 29413675Sdyson/* 29513675Sdyson * unlock a pipe I/O lock 29613675Sdyson */ 29713675Sdysonstatic __inline void 29813675Sdysonpipeunlock(cpipe) 29913675Sdyson struct pipe *cpipe; 30013675Sdyson{ 30113675Sdyson cpipe->pipe_state &= ~PIPE_LOCK; 30213675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 30313675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 30414177Sdyson wakeup(cpipe); 30513675Sdyson } 30613675Sdyson} 30713675Sdyson 30814037Sdysonstatic __inline void 30914037Sdysonpipeselwakeup(cpipe) 31014037Sdyson struct pipe *cpipe; 31114037Sdyson{ 31214037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 31314037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 31414037Sdyson selwakeup(&cpipe->pipe_sel); 31514037Sdyson } 31641086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 31741086Struckman pgsigio(cpipe->pipe_sigio, SIGIO, 0); 31814037Sdyson} 31914037Sdyson 32013675Sdyson/* ARGSUSED */ 32113675Sdysonstatic int 32213675Sdysonpipe_read(fp, uio, cred) 32313675Sdyson struct file *fp; 32413675Sdyson struct uio *uio; 32513675Sdyson struct ucred *cred; 32613675Sdyson{ 32713675Sdyson 32813675Sdyson struct pipe *rpipe = (struct pipe *) fp->f_data; 32913675Sdyson int error = 0; 33013675Sdyson int nread = 0; 33118863Sdyson u_int size; 33213675Sdyson 33313675Sdyson ++rpipe->pipe_busy; 33413675Sdyson while (uio->uio_resid) { 33513907Sdyson /* 33613907Sdyson * normal pipe buffer receive 33713907Sdyson */ 33813675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 33918863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 34013675Sdyson if (size > rpipe->pipe_buffer.cnt) 34113675Sdyson size = rpipe->pipe_buffer.cnt; 34218863Sdyson if (size > (u_int) uio->uio_resid) 34318863Sdyson size = (u_int) uio->uio_resid; 34413907Sdyson if ((error = pipelock(rpipe,1)) == 0) { 34513675Sdyson error = uiomove( &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 34613675Sdyson size, uio); 34713675Sdyson pipeunlock(rpipe); 34813675Sdyson } 34913675Sdyson if (error) { 35013675Sdyson break; 35113675Sdyson } 35213675Sdyson rpipe->pipe_buffer.out += size; 35313675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 35413675Sdyson rpipe->pipe_buffer.out = 0; 35513675Sdyson 35613675Sdyson rpipe->pipe_buffer.cnt -= size; 35713675Sdyson nread += size; 35814037Sdyson#ifndef PIPE_NODIRECT 35913907Sdyson /* 36013907Sdyson * Direct copy, bypassing a kernel buffer. 36113907Sdyson */ 36213907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 36313907Sdyson (rpipe->pipe_state & PIPE_DIRECTW)) { 36413907Sdyson caddr_t va; 36518863Sdyson if (size > (u_int) uio->uio_resid) 36618863Sdyson size = (u_int) uio->uio_resid; 36713907Sdyson if ((error = pipelock(rpipe,1)) == 0) { 36813907Sdyson va = (caddr_t) rpipe->pipe_map.kva + rpipe->pipe_map.pos; 36913907Sdyson error = uiomove(va, size, uio); 37013907Sdyson pipeunlock(rpipe); 37113907Sdyson } 37213907Sdyson if (error) 37313907Sdyson break; 37413907Sdyson nread += size; 37513907Sdyson rpipe->pipe_map.pos += size; 37613907Sdyson rpipe->pipe_map.cnt -= size; 37713907Sdyson if (rpipe->pipe_map.cnt == 0) { 37813907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 37913907Sdyson wakeup(rpipe); 38013907Sdyson } 38114037Sdyson#endif 38213675Sdyson } else { 38313675Sdyson /* 38443623Sdillon * If there is no more to read in the pipe, reset 38543623Sdillon * its pointers to the beginning. This improves 38643623Sdillon * cache hit stats. 38743623Sdillon * 38843623Sdillon * We get this over with now because it may block 38943623Sdillon * and cause the state to change out from under us, 39043623Sdillon * rather then have to re-test the state both before 39143623Sdillon * and after this fragment. 39243623Sdillon */ 39343623Sdillon 39443623Sdillon if ((error = pipelock(rpipe,1)) == 0) { 39543623Sdillon if (rpipe->pipe_buffer.cnt == 0) { 39643623Sdillon rpipe->pipe_buffer.in = 0; 39743623Sdillon rpipe->pipe_buffer.out = 0; 39843623Sdillon } 39943623Sdillon pipeunlock(rpipe); 40043623Sdillon 40143623Sdillon /* 40243623Sdillon * If pipe filled up due to pipelock 40343623Sdillon * blocking, loop back up. 40443623Sdillon */ 40543623Sdillon if (rpipe->pipe_buffer.cnt > 0) 40643623Sdillon continue; 40743623Sdillon } 40843623Sdillon 40943623Sdillon /* 41013675Sdyson * detect EOF condition 41113675Sdyson */ 41213675Sdyson if (rpipe->pipe_state & PIPE_EOF) { 41314802Sdyson /* XXX error = ? */ 41413675Sdyson break; 41513675Sdyson } 41643623Sdillon 41713675Sdyson /* 41813675Sdyson * If the "write-side" has been blocked, wake it up now. 41913675Sdyson */ 42013675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 42113675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 42213675Sdyson wakeup(rpipe); 42313675Sdyson } 42443623Sdillon 42543623Sdillon /* 42643623Sdillon * break if error (signal via pipelock), or if some 42743623Sdillon * data was read 42843623Sdillon */ 42943623Sdillon if (error || nread > 0) 43013675Sdyson break; 43116960Sdyson 43243623Sdillon /* 43343623Sdillon * Handle non-blocking mode operation 43443623Sdillon */ 43543623Sdillon 43616960Sdyson if (fp->f_flag & FNONBLOCK) { 43713774Sdyson error = EAGAIN; 43813774Sdyson break; 43913774Sdyson } 44013675Sdyson 44113675Sdyson /* 44243623Sdillon * Wait for more data 44313675Sdyson */ 44414177Sdyson 44513675Sdyson rpipe->pipe_state |= PIPE_WANTR; 44643301Sdillon if ((error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) != 0) { 44713675Sdyson break; 44813675Sdyson } 44913675Sdyson } 45013675Sdyson } 45113675Sdyson 45224101Sbde if (error == 0) 45334901Sphk getnanotime(&rpipe->pipe_atime); 45413913Sdyson 45513675Sdyson --rpipe->pipe_busy; 45613675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 45713675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 45813675Sdyson wakeup(rpipe); 45913675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 46013675Sdyson /* 46113675Sdyson * If there is no more to read in the pipe, reset 46213913Sdyson * its pointers to the beginning. This improves 46313675Sdyson * cache hit stats. 46413675Sdyson */ 46517163Sdyson if (rpipe->pipe_buffer.cnt == 0) { 46617163Sdyson if ((error == 0) && (error = pipelock(rpipe,1)) == 0) { 46713675Sdyson rpipe->pipe_buffer.in = 0; 46813675Sdyson rpipe->pipe_buffer.out = 0; 46917163Sdyson pipeunlock(rpipe); 47013675Sdyson } 47113675Sdyson } 47213675Sdyson 47313675Sdyson /* 47413675Sdyson * If the "write-side" has been blocked, wake it up now. 47513675Sdyson */ 47613675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 47713675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 47813675Sdyson wakeup(rpipe); 47913675Sdyson } 48013675Sdyson } 48114037Sdyson 48214802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 48314037Sdyson pipeselwakeup(rpipe); 48414037Sdyson 48513675Sdyson return error; 48613675Sdyson} 48713675Sdyson 48814037Sdyson#ifndef PIPE_NODIRECT 48913907Sdyson/* 49013907Sdyson * Map the sending processes' buffer into kernel space and wire it. 49113907Sdyson * This is similar to a physical write operation. 49213907Sdyson */ 49313675Sdysonstatic int 49413907Sdysonpipe_build_write_buffer(wpipe, uio) 49513907Sdyson struct pipe *wpipe; 49613675Sdyson struct uio *uio; 49713675Sdyson{ 49818863Sdyson u_int size; 49913907Sdyson int i; 50013907Sdyson vm_offset_t addr, endaddr, paddr; 50113907Sdyson 50218863Sdyson size = (u_int) uio->uio_iov->iov_len; 50313907Sdyson if (size > wpipe->pipe_buffer.size) 50413907Sdyson size = wpipe->pipe_buffer.size; 50513907Sdyson 50640286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 50740286Sdg for(i = 0, addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 50813907Sdyson addr < endaddr; 50913907Sdyson addr += PAGE_SIZE, i+=1) { 51013907Sdyson 51113907Sdyson vm_page_t m; 51213907Sdyson 51313909Sdyson vm_fault_quick( (caddr_t) addr, VM_PROT_READ); 51413907Sdyson paddr = pmap_kextract(addr); 51513907Sdyson if (!paddr) { 51613907Sdyson int j; 51713907Sdyson for(j=0;j<i;j++) 51840700Sdg vm_page_unwire(wpipe->pipe_map.ms[j], 1); 51913907Sdyson return EFAULT; 52013907Sdyson } 52113907Sdyson 52213907Sdyson m = PHYS_TO_VM_PAGE(paddr); 52313907Sdyson vm_page_wire(m); 52413907Sdyson wpipe->pipe_map.ms[i] = m; 52513907Sdyson } 52613907Sdyson 52713907Sdyson/* 52813907Sdyson * set up the control block 52913907Sdyson */ 53013907Sdyson wpipe->pipe_map.npages = i; 53113907Sdyson wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 53213907Sdyson wpipe->pipe_map.cnt = size; 53313907Sdyson 53413907Sdyson/* 53513907Sdyson * and map the buffer 53613907Sdyson */ 53713907Sdyson if (wpipe->pipe_map.kva == 0) { 53813912Sdyson /* 53913912Sdyson * We need to allocate space for an extra page because the 54013912Sdyson * address range might (will) span pages at times. 54113912Sdyson */ 54213907Sdyson wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 54313912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 54413912Sdyson amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 54513907Sdyson } 54613907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 54713907Sdyson wpipe->pipe_map.npages); 54813907Sdyson 54913907Sdyson/* 55013907Sdyson * and update the uio data 55113907Sdyson */ 55213907Sdyson 55313907Sdyson uio->uio_iov->iov_len -= size; 55413907Sdyson uio->uio_iov->iov_base += size; 55513907Sdyson if (uio->uio_iov->iov_len == 0) 55613907Sdyson uio->uio_iov++; 55713907Sdyson uio->uio_resid -= size; 55813907Sdyson uio->uio_offset += size; 55913907Sdyson return 0; 56013907Sdyson} 56113907Sdyson 56213907Sdyson/* 56313907Sdyson * unmap and unwire the process buffer 56413907Sdyson */ 56513907Sdysonstatic void 56613907Sdysonpipe_destroy_write_buffer(wpipe) 56713907Sdysonstruct pipe *wpipe; 56813907Sdyson{ 56913907Sdyson int i; 57017163Sdyson if (wpipe->pipe_map.kva) { 57117163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 57213907Sdyson 57313907Sdyson if (amountpipekva > MAXPIPEKVA) { 57413907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 57513907Sdyson wpipe->pipe_map.kva = 0; 57613907Sdyson kmem_free(kernel_map, kva, 57713912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 57813912Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 57913907Sdyson } 58013907Sdyson } 58113907Sdyson for (i=0;i<wpipe->pipe_map.npages;i++) 58240700Sdg vm_page_unwire(wpipe->pipe_map.ms[i], 1); 58313907Sdyson} 58413907Sdyson 58513907Sdyson/* 58613907Sdyson * In the case of a signal, the writing process might go away. This 58713907Sdyson * code copies the data into the circular buffer so that the source 58813907Sdyson * pages can be freed without loss of data. 58913907Sdyson */ 59013907Sdysonstatic void 59113907Sdysonpipe_clone_write_buffer(wpipe) 59213907Sdysonstruct pipe *wpipe; 59313907Sdyson{ 59413907Sdyson int size; 59513907Sdyson int pos; 59613907Sdyson 59713907Sdyson size = wpipe->pipe_map.cnt; 59813907Sdyson pos = wpipe->pipe_map.pos; 59913907Sdyson bcopy((caddr_t) wpipe->pipe_map.kva+pos, 60013907Sdyson (caddr_t) wpipe->pipe_buffer.buffer, 60113907Sdyson size); 60213907Sdyson 60313907Sdyson wpipe->pipe_buffer.in = size; 60413907Sdyson wpipe->pipe_buffer.out = 0; 60513907Sdyson wpipe->pipe_buffer.cnt = size; 60613907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 60713907Sdyson 60813907Sdyson pipe_destroy_write_buffer(wpipe); 60913907Sdyson} 61013907Sdyson 61113907Sdyson/* 61213907Sdyson * This implements the pipe buffer write mechanism. Note that only 61313907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 61413907Sdyson * If there are any characters in the pipe buffer, the direct write will 61513907Sdyson * be deferred until the receiving process grabs all of the bytes from 61613907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 61713907Sdyson */ 61813907Sdysonstatic int 61913907Sdysonpipe_direct_write(wpipe, uio) 62013907Sdyson struct pipe *wpipe; 62113907Sdyson struct uio *uio; 62213907Sdyson{ 62313907Sdyson int error; 62413951Sdysonretry: 62513907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 62613951Sdyson if ( wpipe->pipe_state & PIPE_WANTR) { 62713951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 62813951Sdyson wakeup(wpipe); 62913951Sdyson } 63013992Sdyson wpipe->pipe_state |= PIPE_WANTW; 63113907Sdyson error = tsleep(wpipe, 63213907Sdyson PRIBIO|PCATCH, "pipdww", 0); 63314802Sdyson if (error) 63413907Sdyson goto error1; 63514802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 63614802Sdyson error = EPIPE; 63714802Sdyson goto error1; 63814802Sdyson } 63913907Sdyson } 64013907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 64113951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 64213951Sdyson if ( wpipe->pipe_state & PIPE_WANTR) { 64313951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 64413951Sdyson wakeup(wpipe); 64513951Sdyson } 64613951Sdyson 64713992Sdyson wpipe->pipe_state |= PIPE_WANTW; 64813907Sdyson error = tsleep(wpipe, 64913907Sdyson PRIBIO|PCATCH, "pipdwc", 0); 65014802Sdyson if (error) 65113907Sdyson goto error1; 65214802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 65314802Sdyson error = EPIPE; 65414802Sdyson goto error1; 65513907Sdyson } 65613951Sdyson goto retry; 65713907Sdyson } 65813907Sdyson 65913951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 66013951Sdyson 66113907Sdyson error = pipe_build_write_buffer(wpipe, uio); 66213907Sdyson if (error) { 66313907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 66413907Sdyson goto error1; 66513907Sdyson } 66613907Sdyson 66713907Sdyson error = 0; 66813907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 66913907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 67013907Sdyson pipelock(wpipe, 0); 67113907Sdyson pipe_destroy_write_buffer(wpipe); 67213907Sdyson pipeunlock(wpipe); 67314037Sdyson pipeselwakeup(wpipe); 67414802Sdyson error = EPIPE; 67514802Sdyson goto error1; 67613907Sdyson } 67713992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 67813992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 67913992Sdyson wakeup(wpipe); 68013992Sdyson } 68114037Sdyson pipeselwakeup(wpipe); 68213907Sdyson error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwt", 0); 68313907Sdyson } 68413907Sdyson 68513907Sdyson pipelock(wpipe,0); 68613907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 68713907Sdyson /* 68813907Sdyson * this bit of trickery substitutes a kernel buffer for 68913907Sdyson * the process that might be going away. 69013907Sdyson */ 69113907Sdyson pipe_clone_write_buffer(wpipe); 69213907Sdyson } else { 69313907Sdyson pipe_destroy_write_buffer(wpipe); 69413907Sdyson } 69513907Sdyson pipeunlock(wpipe); 69613907Sdyson return error; 69713907Sdyson 69813907Sdysonerror1: 69913907Sdyson wakeup(wpipe); 70013907Sdyson return error; 70113907Sdyson} 70214037Sdyson#endif 70313907Sdyson 70416960Sdysonstatic int 70516960Sdysonpipe_write(fp, uio, cred) 70616960Sdyson struct file *fp; 70713907Sdyson struct uio *uio; 70816960Sdyson struct ucred *cred; 70913907Sdyson{ 71013675Sdyson int error = 0; 71113913Sdyson int orig_resid; 71213675Sdyson 71316960Sdyson struct pipe *wpipe, *rpipe; 71416960Sdyson 71516960Sdyson rpipe = (struct pipe *) fp->f_data; 71616960Sdyson wpipe = rpipe->pipe_peer; 71716960Sdyson 71813675Sdyson /* 71913675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 72013675Sdyson */ 72116960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 72213774Sdyson return EPIPE; 72313675Sdyson } 72413675Sdyson 72517163Sdyson /* 72617163Sdyson * If it is advantageous to resize the pipe buffer, do 72717163Sdyson * so. 72817163Sdyson */ 72917163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 73017163Sdyson (nbigpipe < LIMITBIGPIPES) && 73117163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 73217163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 73317163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 73417163Sdyson 73517163Sdyson if (wpipe->pipe_buffer.buffer) { 73617163Sdyson amountpipekva -= wpipe->pipe_buffer.size; 73717163Sdyson kmem_free(kernel_map, 73817163Sdyson (vm_offset_t)wpipe->pipe_buffer.buffer, 73917163Sdyson wpipe->pipe_buffer.size); 74017163Sdyson } 74117163Sdyson 74217163Sdyson#ifndef PIPE_NODIRECT 74317163Sdyson if (wpipe->pipe_map.kva) { 74417163Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 74517163Sdyson kmem_free(kernel_map, 74617163Sdyson wpipe->pipe_map.kva, 74717163Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 74817163Sdyson } 74917163Sdyson#endif 75017163Sdyson 75117163Sdyson wpipe->pipe_buffer.in = 0; 75217163Sdyson wpipe->pipe_buffer.out = 0; 75317163Sdyson wpipe->pipe_buffer.cnt = 0; 75417163Sdyson wpipe->pipe_buffer.size = BIG_PIPE_SIZE; 75517163Sdyson wpipe->pipe_buffer.buffer = NULL; 75617163Sdyson ++nbigpipe; 75717163Sdyson 75817163Sdyson#ifndef PIPE_NODIRECT 75917163Sdyson wpipe->pipe_map.cnt = 0; 76017163Sdyson wpipe->pipe_map.kva = 0; 76117163Sdyson wpipe->pipe_map.pos = 0; 76217163Sdyson wpipe->pipe_map.npages = 0; 76317163Sdyson#endif 76417163Sdyson 76517163Sdyson } 76617163Sdyson 76717163Sdyson 76813907Sdyson if( wpipe->pipe_buffer.buffer == NULL) { 76913907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 77013907Sdyson pipespace(wpipe); 77113907Sdyson pipeunlock(wpipe); 77213907Sdyson } else { 77313907Sdyson return error; 77413907Sdyson } 77513907Sdyson } 77613907Sdyson 77713675Sdyson ++wpipe->pipe_busy; 77813913Sdyson orig_resid = uio->uio_resid; 77913675Sdyson while (uio->uio_resid) { 78013907Sdyson int space; 78114037Sdyson#ifndef PIPE_NODIRECT 78213907Sdyson /* 78313907Sdyson * If the transfer is large, we can gain performance if 78413907Sdyson * we do process-to-process copies directly. 78516416Sdyson * If the write is non-blocking, we don't use the 78616416Sdyson * direct write mechanism. 78713907Sdyson */ 78817163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 78917163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 79017163Sdyson (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 79113907Sdyson (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 79213907Sdyson error = pipe_direct_write( wpipe, uio); 79313907Sdyson if (error) { 79413907Sdyson break; 79513907Sdyson } 79613907Sdyson continue; 79713907Sdyson } 79814037Sdyson#endif 79913907Sdyson 80013907Sdyson /* 80113907Sdyson * Pipe buffered writes cannot be coincidental with 80213907Sdyson * direct writes. We wait until the currently executing 80313907Sdyson * direct write is completed before we start filling the 80413907Sdyson * pipe buffer. 80513907Sdyson */ 80613907Sdyson retrywrite: 80713907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 80813992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 80913992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 81013992Sdyson wakeup(wpipe); 81113992Sdyson } 81213907Sdyson error = tsleep(wpipe, 81313907Sdyson PRIBIO|PCATCH, "pipbww", 0); 81413907Sdyson if (error) 81513907Sdyson break; 81613907Sdyson } 81713907Sdyson 81813907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 81914644Sdyson 82014644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 82114644Sdyson /* XXX perhaps they need to be contiguous to be atomic? */ 82213913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 82313913Sdyson space = 0; 82413907Sdyson 82517163Sdyson if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 82617163Sdyson /* 82717163Sdyson * This set the maximum transfer as a segment of 82817163Sdyson * the buffer. 82917163Sdyson */ 83013675Sdyson int size = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in; 83117163Sdyson /* 83217163Sdyson * space is the size left in the buffer 83317163Sdyson */ 83413675Sdyson if (size > space) 83513675Sdyson size = space; 83617163Sdyson /* 83717163Sdyson * now limit it to the size of the uio transfer 83817163Sdyson */ 83913675Sdyson if (size > uio->uio_resid) 84013675Sdyson size = uio->uio_resid; 84113907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 84213907Sdyson /* 84313907Sdyson * It is possible for a direct write to 84413907Sdyson * slip in on us... handle it here... 84513907Sdyson */ 84613907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 84713907Sdyson pipeunlock(wpipe); 84813907Sdyson goto retrywrite; 84913907Sdyson } 85013675Sdyson error = uiomove( &wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 85113675Sdyson size, uio); 85213675Sdyson pipeunlock(wpipe); 85313675Sdyson } 85413675Sdyson if (error) 85513675Sdyson break; 85613675Sdyson 85713675Sdyson wpipe->pipe_buffer.in += size; 85813675Sdyson if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size) 85913675Sdyson wpipe->pipe_buffer.in = 0; 86013675Sdyson 86113675Sdyson wpipe->pipe_buffer.cnt += size; 86213675Sdyson } else { 86313675Sdyson /* 86413675Sdyson * If the "read-side" has been blocked, wake it up now. 86513675Sdyson */ 86613675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 86713675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 86813675Sdyson wakeup(wpipe); 86913675Sdyson } 87014037Sdyson 87113675Sdyson /* 87213675Sdyson * don't block on non-blocking I/O 87313675Sdyson */ 87416960Sdyson if (fp->f_flag & FNONBLOCK) { 87513907Sdyson error = EAGAIN; 87613675Sdyson break; 87713675Sdyson } 87813907Sdyson 87914037Sdyson /* 88014037Sdyson * We have no more space and have something to offer, 88129356Speter * wake up select/poll. 88214037Sdyson */ 88314037Sdyson pipeselwakeup(wpipe); 88414037Sdyson 88513675Sdyson wpipe->pipe_state |= PIPE_WANTW; 88643301Sdillon if ((error = tsleep(wpipe, (PRIBIO+1)|PCATCH, "pipewr", 0)) != 0) { 88713675Sdyson break; 88813675Sdyson } 88913675Sdyson /* 89013675Sdyson * If read side wants to go away, we just issue a signal 89113675Sdyson * to ourselves. 89213675Sdyson */ 89313675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 89413774Sdyson error = EPIPE; 89513907Sdyson break; 89613675Sdyson } 89713675Sdyson } 89813675Sdyson } 89913675Sdyson 90014644Sdyson --wpipe->pipe_busy; 90113675Sdyson if ((wpipe->pipe_busy == 0) && 90213675Sdyson (wpipe->pipe_state & PIPE_WANT)) { 90313675Sdyson wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR); 90413675Sdyson wakeup(wpipe); 90513675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 90613675Sdyson /* 90713675Sdyson * If we have put any characters in the buffer, we wake up 90813675Sdyson * the reader. 90913675Sdyson */ 91013675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 91113675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 91213675Sdyson wakeup(wpipe); 91313675Sdyson } 91413675Sdyson } 91513909Sdyson 91613909Sdyson /* 91713909Sdyson * Don't return EPIPE if I/O was successful 91813909Sdyson */ 91913907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 92013907Sdyson (uio->uio_resid == 0) && 92113907Sdyson (error == EPIPE)) 92213907Sdyson error = 0; 92313913Sdyson 92424101Sbde if (error == 0) 92534901Sphk getnanotime(&wpipe->pipe_mtime); 92624101Sbde 92714037Sdyson /* 92814037Sdyson * We have something to offer, 92929356Speter * wake up select/poll. 93014037Sdyson */ 93114177Sdyson if (wpipe->pipe_buffer.cnt) 93214037Sdyson pipeselwakeup(wpipe); 93313907Sdyson 93413675Sdyson return error; 93513675Sdyson} 93613675Sdyson 93713675Sdyson/* 93813675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 93913675Sdyson */ 94013675Sdysonint 94113675Sdysonpipe_ioctl(fp, cmd, data, p) 94213675Sdyson struct file *fp; 94336735Sdfr u_long cmd; 94413675Sdyson register caddr_t data; 94513675Sdyson struct proc *p; 94613675Sdyson{ 94713675Sdyson register struct pipe *mpipe = (struct pipe *)fp->f_data; 94813675Sdyson 94913675Sdyson switch (cmd) { 95013675Sdyson 95113675Sdyson case FIONBIO: 95213675Sdyson return (0); 95313675Sdyson 95413675Sdyson case FIOASYNC: 95513675Sdyson if (*(int *)data) { 95613675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 95713675Sdyson } else { 95813675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 95913675Sdyson } 96013675Sdyson return (0); 96113675Sdyson 96213675Sdyson case FIONREAD: 96314037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 96414037Sdyson *(int *)data = mpipe->pipe_map.cnt; 96514037Sdyson else 96614037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 96713675Sdyson return (0); 96813675Sdyson 96941086Struckman case FIOSETOWN: 97041086Struckman return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 97141086Struckman 97241086Struckman case FIOGETOWN: 97341086Struckman *(int *)data = fgetown(mpipe->pipe_sigio); 97413675Sdyson return (0); 97513675Sdyson 97641086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 97741086Struckman case TIOCSPGRP: 97841086Struckman return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 97941086Struckman 98041086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 98118863Sdyson case TIOCGPGRP: 98241086Struckman *(int *)data = -fgetown(mpipe->pipe_sigio); 98313675Sdyson return (0); 98413675Sdyson 98513675Sdyson } 98617124Sbde return (ENOTTY); 98713675Sdyson} 98813675Sdyson 98913675Sdysonint 99029356Speterpipe_poll(fp, events, cred, p) 99113675Sdyson struct file *fp; 99229356Speter int events; 99329356Speter struct ucred *cred; 99413675Sdyson struct proc *p; 99513675Sdyson{ 99613675Sdyson register struct pipe *rpipe = (struct pipe *)fp->f_data; 99713675Sdyson struct pipe *wpipe; 99829356Speter int revents = 0; 99913675Sdyson 100013675Sdyson wpipe = rpipe->pipe_peer; 100129356Speter if (events & (POLLIN | POLLRDNORM)) 100229356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 100329356Speter (rpipe->pipe_buffer.cnt > 0) || 100429356Speter (rpipe->pipe_state & PIPE_EOF)) 100529356Speter revents |= events & (POLLIN | POLLRDNORM); 100613675Sdyson 100729356Speter if (events & (POLLOUT | POLLWRNORM)) 100829356Speter if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 100943311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 101043311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 101129356Speter revents |= events & (POLLOUT | POLLWRNORM); 101213675Sdyson 101329356Speter if ((rpipe->pipe_state & PIPE_EOF) || 101429356Speter (wpipe == NULL) || 101529356Speter (wpipe->pipe_state & PIPE_EOF)) 101629356Speter revents |= POLLHUP; 101729356Speter 101829356Speter if (revents == 0) { 101929356Speter if (events & (POLLIN | POLLRDNORM)) { 102029356Speter selrecord(p, &rpipe->pipe_sel); 102129356Speter rpipe->pipe_state |= PIPE_SEL; 102213675Sdyson } 102313675Sdyson 102429356Speter if (events & (POLLOUT | POLLWRNORM)) { 102530164Speter selrecord(p, &wpipe->pipe_sel); 102630164Speter wpipe->pipe_state |= PIPE_SEL; 102713907Sdyson } 102813675Sdyson } 102929356Speter 103029356Speter return (revents); 103113675Sdyson} 103213675Sdyson 103313675Sdysonint 103413675Sdysonpipe_stat(pipe, ub) 103513675Sdyson register struct pipe *pipe; 103613675Sdyson register struct stat *ub; 103713675Sdyson{ 103813675Sdyson bzero((caddr_t)ub, sizeof (*ub)); 103917124Sbde ub->st_mode = S_IFIFO; 104013907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 104113675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 104213675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 104334901Sphk ub->st_atimespec = pipe->pipe_atime; 104434901Sphk ub->st_mtimespec = pipe->pipe_mtime; 104534901Sphk ub->st_ctimespec = pipe->pipe_ctime; 104617124Sbde /* 104717124Sbde * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, 104817124Sbde * st_flags, st_gen. 104917124Sbde * XXX (st_dev, st_ino) should be unique. 105017124Sbde */ 105113675Sdyson return 0; 105213675Sdyson} 105313675Sdyson 105413675Sdyson/* ARGSUSED */ 105513675Sdysonstatic int 105613675Sdysonpipe_close(fp, p) 105713675Sdyson struct file *fp; 105813675Sdyson struct proc *p; 105913675Sdyson{ 106013675Sdyson struct pipe *cpipe = (struct pipe *)fp->f_data; 106116322Sgpalmer 106241086Struckman funsetown(cpipe->pipe_sigio); 106313675Sdyson pipeclose(cpipe); 106413675Sdyson fp->f_data = NULL; 106513675Sdyson return 0; 106613675Sdyson} 106713675Sdyson 106813675Sdyson/* 106913675Sdyson * shutdown the pipe 107013675Sdyson */ 107113675Sdysonstatic void 107213675Sdysonpipeclose(cpipe) 107313675Sdyson struct pipe *cpipe; 107413675Sdyson{ 107513907Sdyson struct pipe *ppipe; 107613675Sdyson if (cpipe) { 107713907Sdyson 107814037Sdyson pipeselwakeup(cpipe); 107913907Sdyson 108013675Sdyson /* 108113675Sdyson * If the other side is blocked, wake it up saying that 108213675Sdyson * we want to close it down. 108313675Sdyson */ 108413675Sdyson while (cpipe->pipe_busy) { 108513675Sdyson wakeup(cpipe); 108613675Sdyson cpipe->pipe_state |= PIPE_WANT|PIPE_EOF; 108713675Sdyson tsleep(cpipe, PRIBIO, "pipecl", 0); 108813675Sdyson } 108913675Sdyson 109013675Sdyson /* 109113675Sdyson * Disconnect from peer 109213675Sdyson */ 109343301Sdillon if ((ppipe = cpipe->pipe_peer) != NULL) { 109414037Sdyson pipeselwakeup(ppipe); 109513907Sdyson 109613907Sdyson ppipe->pipe_state |= PIPE_EOF; 109713907Sdyson wakeup(ppipe); 109813907Sdyson ppipe->pipe_peer = NULL; 109913675Sdyson } 110013675Sdyson 110113675Sdyson /* 110213675Sdyson * free resources 110313675Sdyson */ 110413907Sdyson if (cpipe->pipe_buffer.buffer) { 110517163Sdyson if (cpipe->pipe_buffer.size > PIPE_SIZE) 110617163Sdyson --nbigpipe; 110713907Sdyson amountpipekva -= cpipe->pipe_buffer.size; 110813907Sdyson kmem_free(kernel_map, 110913907Sdyson (vm_offset_t)cpipe->pipe_buffer.buffer, 111013907Sdyson cpipe->pipe_buffer.size); 111113907Sdyson } 111214037Sdyson#ifndef PIPE_NODIRECT 111313907Sdyson if (cpipe->pipe_map.kva) { 111413912Sdyson amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 111513907Sdyson kmem_free(kernel_map, 111613907Sdyson cpipe->pipe_map.kva, 111713912Sdyson cpipe->pipe_buffer.size + PAGE_SIZE); 111813907Sdyson } 111914037Sdyson#endif 112027899Sdyson zfree(pipe_zone, cpipe); 112113675Sdyson } 112213675Sdyson} 1123