sys_pipe.c revision 17163
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson * 1917163Sdyson * $Id: sys_pipe.c,v 1.19 1996/07/12 08:14:58 bde Exp $ 2013675Sdyson */ 2113675Sdyson 2213675Sdyson#ifndef OLD_PIPE 2313675Sdyson 2413675Sdyson/* 2513675Sdyson * This file contains a high-performance replacement for the socket-based 2613675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2713675Sdyson * all features of sockets, but does do everything that pipes normally 2813675Sdyson * do. 2913675Sdyson */ 3013675Sdyson 3113907Sdyson/* 3213907Sdyson * This code has two modes of operation, a small write mode and a large 3313907Sdyson * write mode. The small write mode acts like conventional pipes with 3413907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3513907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3613907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3713907Sdyson * the receiving process can copy it directly from the pages in the sending 3813907Sdyson * process. 3913907Sdyson * 4013907Sdyson * If the sending process receives a signal, it is possible that it will 4113913Sdyson * go away, and certainly its address space can change, because control 4213907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4313907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4413907Sdyson * kernel buffer, and the receiving process will grab the data from the 4513907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4613907Sdyson * the copy operation is normally eliminated. 4713907Sdyson * 4813907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4913907Sdyson * happen for small transfers so that the system will not spend all of 5013913Sdyson * its time context switching. PIPE_SIZE is constrained by the 5113907Sdyson * amount of kernel virtual memory. 5213907Sdyson */ 5313907Sdyson 5413675Sdyson#include <sys/param.h> 5513675Sdyson#include <sys/systm.h> 5613675Sdyson#include <sys/proc.h> 5713675Sdyson#include <sys/file.h> 5813675Sdyson#include <sys/protosw.h> 5913675Sdyson#include <sys/stat.h> 6013675Sdyson#include <sys/filedesc.h> 6113675Sdyson#include <sys/malloc.h> 6213675Sdyson#include <sys/ioctl.h> 6313675Sdyson#include <sys/stat.h> 6413675Sdyson#include <sys/select.h> 6513675Sdyson#include <sys/signalvar.h> 6613675Sdyson#include <sys/errno.h> 6713675Sdyson#include <sys/queue.h> 6813675Sdyson#include <sys/vmmeter.h> 6913675Sdyson#include <sys/kernel.h> 7013675Sdyson#include <sys/sysproto.h> 7113675Sdyson#include <sys/pipe.h> 7213675Sdyson 7313675Sdyson#include <vm/vm.h> 7413675Sdyson#include <vm/vm_prot.h> 7513675Sdyson#include <vm/vm_param.h> 7613675Sdyson#include <vm/lock.h> 7713675Sdyson#include <vm/vm_object.h> 7813675Sdyson#include <vm/vm_kern.h> 7913675Sdyson#include <vm/vm_extern.h> 8013675Sdyson#include <vm/pmap.h> 8113675Sdyson#include <vm/vm_map.h> 8213907Sdyson#include <vm/vm_page.h> 8313675Sdyson 8414037Sdyson/* 8514037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 8614037Sdyson * approx 30% decrease in transfer rate. This could be useful for 8714037Sdyson * NetBSD or OpenBSD. 8814037Sdyson */ 8914037Sdyson/* #define PIPE_NODIRECT */ 9014037Sdyson 9114037Sdyson/* 9214037Sdyson * interfaces to the outside world 9314037Sdyson */ 9413675Sdysonstatic int pipe_read __P((struct file *fp, struct uio *uio, 9513675Sdyson struct ucred *cred)); 9613675Sdysonstatic int pipe_write __P((struct file *fp, struct uio *uio, 9713675Sdyson struct ucred *cred)); 9813675Sdysonstatic int pipe_close __P((struct file *fp, struct proc *p)); 9913675Sdysonstatic int pipe_select __P((struct file *fp, int which, struct proc *p)); 10013675Sdysonstatic int pipe_ioctl __P((struct file *fp, int cmd, caddr_t data, struct proc *p)); 10113675Sdyson 10213675Sdysonstatic struct fileops pipeops = 10313675Sdyson { pipe_read, pipe_write, pipe_ioctl, pipe_select, pipe_close }; 10413675Sdyson 10513675Sdyson/* 10613675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 10713675Sdyson * space is pageable. The pipe code will try to maintain locality of 10813675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 10913675Sdyson * will not wipe the cache. 11013675Sdyson */ 11113907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 11213907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 11313675Sdyson 11413907Sdyson/* 11513907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 11613907Sdyson * is there so that on large systems, we don't exhaust it. 11713907Sdyson */ 11813907Sdyson#define MAXPIPEKVA (8*1024*1024) 11913907Sdyson 12013907Sdyson/* 12113907Sdyson * Limit for direct transfers, we cannot, of course limit 12213907Sdyson * the amount of kva for pipes in general though. 12313907Sdyson */ 12413907Sdyson#define LIMITPIPEKVA (16*1024*1024) 12517163Sdyson 12617163Sdyson/* 12717163Sdyson * Limit the number of "big" pipes 12817163Sdyson */ 12917163Sdyson#define LIMITBIGPIPES 32 13017163Sdysonint nbigpipe; 13117163Sdyson 13217124Sbdestatic int amountpipekva; 13313907Sdyson 13413675Sdysonstatic void pipeclose __P((struct pipe *cpipe)); 13513675Sdysonstatic void pipeinit __P((struct pipe *cpipe)); 13613907Sdysonstatic __inline int pipelock __P((struct pipe *cpipe, int catch)); 13713675Sdysonstatic __inline void pipeunlock __P((struct pipe *cpipe)); 13814122Speterstatic __inline void pipeselwakeup __P((struct pipe *cpipe)); 13914037Sdyson#ifndef PIPE_NODIRECT 14013907Sdysonstatic int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 14113907Sdysonstatic void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 14213907Sdysonstatic int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 14313907Sdysonstatic void pipe_clone_write_buffer __P((struct pipe *wpipe)); 14414037Sdyson#endif 14513907Sdysonstatic void pipespace __P((struct pipe *cpipe)); 14613675Sdyson 14713675Sdyson/* 14813675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 14913675Sdyson */ 15013675Sdyson 15113675Sdyson/* ARGSUSED */ 15213675Sdysonint 15313675Sdysonpipe(p, uap, retval) 15413675Sdyson struct proc *p; 15513675Sdyson struct pipe_args /* { 15613675Sdyson int dummy; 15713675Sdyson } */ *uap; 15813675Sdyson int retval[]; 15913675Sdyson{ 16013675Sdyson register struct filedesc *fdp = p->p_fd; 16113675Sdyson struct file *rf, *wf; 16213675Sdyson struct pipe *rpipe, *wpipe; 16313675Sdyson int fd, error; 16413675Sdyson 16513675Sdyson rpipe = malloc( sizeof (*rpipe), M_TEMP, M_WAITOK); 16613675Sdyson pipeinit(rpipe); 16713907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 16813675Sdyson wpipe = malloc( sizeof (*wpipe), M_TEMP, M_WAITOK); 16913675Sdyson pipeinit(wpipe); 17013907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 17113675Sdyson 17213675Sdyson error = falloc(p, &rf, &fd); 17313675Sdyson if (error) 17413675Sdyson goto free2; 17513675Sdyson retval[0] = fd; 17613675Sdyson rf->f_flag = FREAD | FWRITE; 17713675Sdyson rf->f_type = DTYPE_PIPE; 17813675Sdyson rf->f_ops = &pipeops; 17913675Sdyson rf->f_data = (caddr_t)rpipe; 18013675Sdyson error = falloc(p, &wf, &fd); 18113675Sdyson if (error) 18213675Sdyson goto free3; 18313675Sdyson wf->f_flag = FREAD | FWRITE; 18413675Sdyson wf->f_type = DTYPE_PIPE; 18513675Sdyson wf->f_ops = &pipeops; 18613675Sdyson wf->f_data = (caddr_t)wpipe; 18713675Sdyson retval[1] = fd; 18813675Sdyson 18913675Sdyson rpipe->pipe_peer = wpipe; 19013675Sdyson wpipe->pipe_peer = rpipe; 19113675Sdyson 19213675Sdyson return (0); 19313675Sdysonfree3: 19413675Sdyson ffree(rf); 19513675Sdyson fdp->fd_ofiles[retval[0]] = 0; 19613675Sdysonfree2: 19713675Sdyson (void)pipeclose(wpipe); 19813675Sdyson (void)pipeclose(rpipe); 19913675Sdyson return (error); 20013675Sdyson} 20113675Sdyson 20213909Sdyson/* 20313909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 20413909Sdyson */ 20513675Sdysonstatic void 20613907Sdysonpipespace(cpipe) 20713675Sdyson struct pipe *cpipe; 20813675Sdyson{ 20913688Sdyson int npages, error; 21013675Sdyson 21113907Sdyson npages = round_page(cpipe->pipe_buffer.size)/PAGE_SIZE; 21213675Sdyson /* 21313675Sdyson * Create an object, I don't like the idea of paging to/from 21413675Sdyson * kernel_object. 21514037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 21613675Sdyson */ 21713675Sdyson cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages); 21813688Sdyson cpipe->pipe_buffer.buffer = (caddr_t) vm_map_min(kernel_map); 21913675Sdyson 22013675Sdyson /* 22113675Sdyson * Insert the object into the kernel map, and allocate kva for it. 22213675Sdyson * The map entry is, by default, pageable. 22314037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 22413675Sdyson */ 22513688Sdyson error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0, 22613907Sdyson (vm_offset_t *) &cpipe->pipe_buffer.buffer, 22713907Sdyson cpipe->pipe_buffer.size, 1, 22813688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 22913675Sdyson 23013688Sdyson if (error != KERN_SUCCESS) 23113688Sdyson panic("pipeinit: cannot allocate pipe -- out of kvm -- code = %d", error); 23213907Sdyson amountpipekva += cpipe->pipe_buffer.size; 23313907Sdyson} 23413688Sdyson 23513907Sdyson/* 23613907Sdyson * initialize and allocate VM and memory for pipe 23713907Sdyson */ 23813907Sdysonstatic void 23913907Sdysonpipeinit(cpipe) 24013907Sdyson struct pipe *cpipe; 24113907Sdyson{ 24213913Sdyson int s; 24313907Sdyson 24413675Sdyson cpipe->pipe_buffer.in = 0; 24513675Sdyson cpipe->pipe_buffer.out = 0; 24613675Sdyson cpipe->pipe_buffer.cnt = 0; 24713907Sdyson cpipe->pipe_buffer.size = PIPE_SIZE; 24817163Sdyson 24913907Sdyson /* Buffer kva gets dynamically allocated */ 25013907Sdyson cpipe->pipe_buffer.buffer = NULL; 25117124Sbde /* cpipe->pipe_buffer.object = invalid */ 25213675Sdyson 25313675Sdyson cpipe->pipe_state = 0; 25413675Sdyson cpipe->pipe_peer = NULL; 25513675Sdyson cpipe->pipe_busy = 0; 25613913Sdyson s = splhigh(); 25713675Sdyson cpipe->pipe_ctime = time; 25813675Sdyson cpipe->pipe_atime = time; 25913675Sdyson cpipe->pipe_mtime = time; 26013913Sdyson splx(s); 26113675Sdyson bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); 26217124Sbde cpipe->pipe_pgid = NO_PID; 26313907Sdyson 26414037Sdyson#ifndef PIPE_NODIRECT 26513907Sdyson /* 26613907Sdyson * pipe data structure initializations to support direct pipe I/O 26713907Sdyson */ 26813907Sdyson cpipe->pipe_map.cnt = 0; 26913907Sdyson cpipe->pipe_map.kva = 0; 27013907Sdyson cpipe->pipe_map.pos = 0; 27113907Sdyson cpipe->pipe_map.npages = 0; 27217124Sbde /* cpipe->pipe_map.ms[] = invalid */ 27314037Sdyson#endif 27413675Sdyson} 27513675Sdyson 27613675Sdyson 27713675Sdyson/* 27813675Sdyson * lock a pipe for I/O, blocking other access 27913675Sdyson */ 28013675Sdysonstatic __inline int 28113907Sdysonpipelock(cpipe, catch) 28213675Sdyson struct pipe *cpipe; 28313907Sdyson int catch; 28413675Sdyson{ 28513776Sdyson int error; 28613675Sdyson while (cpipe->pipe_state & PIPE_LOCK) { 28713675Sdyson cpipe->pipe_state |= PIPE_LWANT; 28814177Sdyson if (error = tsleep( cpipe, 28913907Sdyson catch?(PRIBIO|PCATCH):PRIBIO, "pipelk", 0)) { 29013776Sdyson return error; 29113675Sdyson } 29213675Sdyson } 29313675Sdyson cpipe->pipe_state |= PIPE_LOCK; 29413675Sdyson return 0; 29513675Sdyson} 29613675Sdyson 29713675Sdyson/* 29813675Sdyson * unlock a pipe I/O lock 29913675Sdyson */ 30013675Sdysonstatic __inline void 30113675Sdysonpipeunlock(cpipe) 30213675Sdyson struct pipe *cpipe; 30313675Sdyson{ 30413675Sdyson cpipe->pipe_state &= ~PIPE_LOCK; 30513675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 30613675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 30714177Sdyson wakeup(cpipe); 30813675Sdyson } 30913675Sdyson} 31013675Sdyson 31114037Sdysonstatic __inline void 31214037Sdysonpipeselwakeup(cpipe) 31314037Sdyson struct pipe *cpipe; 31414037Sdyson{ 31514037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 31614037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 31714037Sdyson selwakeup(&cpipe->pipe_sel); 31814037Sdyson } 31914037Sdyson} 32014037Sdyson 32113675Sdyson/* ARGSUSED */ 32213675Sdysonstatic int 32313675Sdysonpipe_read(fp, uio, cred) 32413675Sdyson struct file *fp; 32513675Sdyson struct uio *uio; 32613675Sdyson struct ucred *cred; 32713675Sdyson{ 32813675Sdyson 32913675Sdyson struct pipe *rpipe = (struct pipe *) fp->f_data; 33013675Sdyson int error = 0; 33113675Sdyson int nread = 0; 33213907Sdyson int size; 33313675Sdyson 33413675Sdyson ++rpipe->pipe_busy; 33513675Sdyson while (uio->uio_resid) { 33613907Sdyson /* 33713907Sdyson * normal pipe buffer receive 33813907Sdyson */ 33913675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 34013675Sdyson int size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 34113675Sdyson if (size > rpipe->pipe_buffer.cnt) 34213675Sdyson size = rpipe->pipe_buffer.cnt; 34313675Sdyson if (size > uio->uio_resid) 34413675Sdyson size = uio->uio_resid; 34513907Sdyson if ((error = pipelock(rpipe,1)) == 0) { 34613675Sdyson error = uiomove( &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 34713675Sdyson size, uio); 34813675Sdyson pipeunlock(rpipe); 34913675Sdyson } 35013675Sdyson if (error) { 35113675Sdyson break; 35213675Sdyson } 35313675Sdyson rpipe->pipe_buffer.out += size; 35413675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 35513675Sdyson rpipe->pipe_buffer.out = 0; 35613675Sdyson 35713675Sdyson rpipe->pipe_buffer.cnt -= size; 35813675Sdyson nread += size; 35914037Sdyson#ifndef PIPE_NODIRECT 36013907Sdyson /* 36113907Sdyson * Direct copy, bypassing a kernel buffer. 36213907Sdyson */ 36313907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 36413907Sdyson (rpipe->pipe_state & PIPE_DIRECTW)) { 36513907Sdyson caddr_t va; 36613907Sdyson if (size > uio->uio_resid) 36713907Sdyson size = uio->uio_resid; 36813907Sdyson if ((error = pipelock(rpipe,1)) == 0) { 36913907Sdyson va = (caddr_t) rpipe->pipe_map.kva + rpipe->pipe_map.pos; 37013907Sdyson error = uiomove(va, size, uio); 37113907Sdyson pipeunlock(rpipe); 37213907Sdyson } 37313907Sdyson if (error) 37413907Sdyson break; 37513907Sdyson nread += size; 37613907Sdyson rpipe->pipe_map.pos += size; 37713907Sdyson rpipe->pipe_map.cnt -= size; 37813907Sdyson if (rpipe->pipe_map.cnt == 0) { 37913907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 38013907Sdyson wakeup(rpipe); 38113907Sdyson } 38214037Sdyson#endif 38313675Sdyson } else { 38413675Sdyson /* 38513675Sdyson * detect EOF condition 38613675Sdyson */ 38713675Sdyson if (rpipe->pipe_state & PIPE_EOF) { 38814802Sdyson /* XXX error = ? */ 38913675Sdyson break; 39013675Sdyson } 39113675Sdyson /* 39213675Sdyson * If the "write-side" has been blocked, wake it up now. 39313675Sdyson */ 39413675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 39513675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 39613675Sdyson wakeup(rpipe); 39713675Sdyson } 39813774Sdyson if (nread > 0) 39913675Sdyson break; 40016960Sdyson 40116960Sdyson if (fp->f_flag & FNONBLOCK) { 40213774Sdyson error = EAGAIN; 40313774Sdyson break; 40413774Sdyson } 40513675Sdyson 40613675Sdyson /* 40713675Sdyson * If there is no more to read in the pipe, reset 40813913Sdyson * its pointers to the beginning. This improves 40913675Sdyson * cache hit stats. 41013675Sdyson */ 41113675Sdyson 41213907Sdyson if ((error = pipelock(rpipe,1)) == 0) { 41313675Sdyson if (rpipe->pipe_buffer.cnt == 0) { 41413675Sdyson rpipe->pipe_buffer.in = 0; 41513675Sdyson rpipe->pipe_buffer.out = 0; 41613675Sdyson } 41713675Sdyson pipeunlock(rpipe); 41813675Sdyson } else { 41913675Sdyson break; 42013675Sdyson } 42114177Sdyson 42214177Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 42314177Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 42414177Sdyson wakeup(rpipe); 42514177Sdyson } 42614177Sdyson 42713675Sdyson rpipe->pipe_state |= PIPE_WANTR; 42813776Sdyson if (error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) { 42913675Sdyson break; 43013675Sdyson } 43113675Sdyson } 43213675Sdyson } 43313675Sdyson 43413913Sdyson if (error == 0) { 43513913Sdyson int s = splhigh(); 43613913Sdyson rpipe->pipe_atime = time; 43713913Sdyson splx(s); 43813913Sdyson } 43913913Sdyson 44013675Sdyson --rpipe->pipe_busy; 44113675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 44213675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 44313675Sdyson wakeup(rpipe); 44413675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 44513675Sdyson /* 44613675Sdyson * If there is no more to read in the pipe, reset 44713913Sdyson * its pointers to the beginning. This improves 44813675Sdyson * cache hit stats. 44913675Sdyson */ 45017163Sdyson if (rpipe->pipe_buffer.cnt == 0) { 45117163Sdyson if ((error == 0) && (error = pipelock(rpipe,1)) == 0) { 45213675Sdyson rpipe->pipe_buffer.in = 0; 45313675Sdyson rpipe->pipe_buffer.out = 0; 45417163Sdyson pipeunlock(rpipe); 45513675Sdyson } 45613675Sdyson } 45713675Sdyson 45813675Sdyson /* 45913675Sdyson * If the "write-side" has been blocked, wake it up now. 46013675Sdyson */ 46113675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 46213675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 46313675Sdyson wakeup(rpipe); 46413675Sdyson } 46513675Sdyson } 46614037Sdyson 46714802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 46814037Sdyson pipeselwakeup(rpipe); 46914037Sdyson 47013675Sdyson return error; 47113675Sdyson} 47213675Sdyson 47314037Sdyson#ifndef PIPE_NODIRECT 47413907Sdyson/* 47513907Sdyson * Map the sending processes' buffer into kernel space and wire it. 47613907Sdyson * This is similar to a physical write operation. 47713907Sdyson */ 47813675Sdysonstatic int 47913907Sdysonpipe_build_write_buffer(wpipe, uio) 48013907Sdyson struct pipe *wpipe; 48113675Sdyson struct uio *uio; 48213675Sdyson{ 48313907Sdyson int size; 48413907Sdyson int i; 48513907Sdyson vm_offset_t addr, endaddr, paddr; 48613907Sdyson 48713907Sdyson size = uio->uio_iov->iov_len; 48813907Sdyson if (size > wpipe->pipe_buffer.size) 48913907Sdyson size = wpipe->pipe_buffer.size; 49013907Sdyson 49113907Sdyson endaddr = round_page(uio->uio_iov->iov_base + size); 49213907Sdyson for(i = 0, addr = trunc_page(uio->uio_iov->iov_base); 49313907Sdyson addr < endaddr; 49413907Sdyson addr += PAGE_SIZE, i+=1) { 49513907Sdyson 49613907Sdyson vm_page_t m; 49713907Sdyson 49813909Sdyson vm_fault_quick( (caddr_t) addr, VM_PROT_READ); 49913907Sdyson paddr = pmap_kextract(addr); 50013907Sdyson if (!paddr) { 50113907Sdyson int j; 50213907Sdyson for(j=0;j<i;j++) 50313907Sdyson vm_page_unwire(wpipe->pipe_map.ms[j]); 50413907Sdyson return EFAULT; 50513907Sdyson } 50613907Sdyson 50713907Sdyson m = PHYS_TO_VM_PAGE(paddr); 50813907Sdyson vm_page_wire(m); 50913907Sdyson wpipe->pipe_map.ms[i] = m; 51013907Sdyson } 51113907Sdyson 51213907Sdyson/* 51313907Sdyson * set up the control block 51413907Sdyson */ 51513907Sdyson wpipe->pipe_map.npages = i; 51613907Sdyson wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 51713907Sdyson wpipe->pipe_map.cnt = size; 51813907Sdyson 51913907Sdyson/* 52013907Sdyson * and map the buffer 52113907Sdyson */ 52213907Sdyson if (wpipe->pipe_map.kva == 0) { 52313912Sdyson /* 52413912Sdyson * We need to allocate space for an extra page because the 52513912Sdyson * address range might (will) span pages at times. 52613912Sdyson */ 52713907Sdyson wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 52813912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 52913912Sdyson amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 53013907Sdyson } 53113907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 53213907Sdyson wpipe->pipe_map.npages); 53313907Sdyson 53413907Sdyson/* 53513907Sdyson * and update the uio data 53613907Sdyson */ 53713907Sdyson 53813907Sdyson uio->uio_iov->iov_len -= size; 53913907Sdyson uio->uio_iov->iov_base += size; 54013907Sdyson if (uio->uio_iov->iov_len == 0) 54113907Sdyson uio->uio_iov++; 54213907Sdyson uio->uio_resid -= size; 54313907Sdyson uio->uio_offset += size; 54413907Sdyson return 0; 54513907Sdyson} 54613907Sdyson 54713907Sdyson/* 54813907Sdyson * unmap and unwire the process buffer 54913907Sdyson */ 55013907Sdysonstatic void 55113907Sdysonpipe_destroy_write_buffer(wpipe) 55213907Sdysonstruct pipe *wpipe; 55313907Sdyson{ 55413907Sdyson int i; 55517163Sdyson if (wpipe->pipe_map.kva) { 55617163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 55713907Sdyson 55813907Sdyson if (amountpipekva > MAXPIPEKVA) { 55913907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 56013907Sdyson wpipe->pipe_map.kva = 0; 56113907Sdyson kmem_free(kernel_map, kva, 56213912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 56313912Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 56413907Sdyson } 56513907Sdyson } 56613907Sdyson for (i=0;i<wpipe->pipe_map.npages;i++) 56713907Sdyson vm_page_unwire(wpipe->pipe_map.ms[i]); 56813907Sdyson} 56913907Sdyson 57013907Sdyson/* 57113907Sdyson * In the case of a signal, the writing process might go away. This 57213907Sdyson * code copies the data into the circular buffer so that the source 57313907Sdyson * pages can be freed without loss of data. 57413907Sdyson */ 57513907Sdysonstatic void 57613907Sdysonpipe_clone_write_buffer(wpipe) 57713907Sdysonstruct pipe *wpipe; 57813907Sdyson{ 57913907Sdyson int size; 58013907Sdyson int pos; 58113907Sdyson 58213907Sdyson size = wpipe->pipe_map.cnt; 58313907Sdyson pos = wpipe->pipe_map.pos; 58413907Sdyson bcopy((caddr_t) wpipe->pipe_map.kva+pos, 58513907Sdyson (caddr_t) wpipe->pipe_buffer.buffer, 58613907Sdyson size); 58713907Sdyson 58813907Sdyson wpipe->pipe_buffer.in = size; 58913907Sdyson wpipe->pipe_buffer.out = 0; 59013907Sdyson wpipe->pipe_buffer.cnt = size; 59113907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 59213907Sdyson 59313907Sdyson pipe_destroy_write_buffer(wpipe); 59413907Sdyson} 59513907Sdyson 59613907Sdyson/* 59713907Sdyson * This implements the pipe buffer write mechanism. Note that only 59813907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 59913907Sdyson * If there are any characters in the pipe buffer, the direct write will 60013907Sdyson * be deferred until the receiving process grabs all of the bytes from 60113907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 60213907Sdyson */ 60313907Sdysonstatic int 60413907Sdysonpipe_direct_write(wpipe, uio) 60513907Sdyson struct pipe *wpipe; 60613907Sdyson struct uio *uio; 60713907Sdyson{ 60813907Sdyson int error; 60913951Sdysonretry: 61013907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 61113951Sdyson if ( wpipe->pipe_state & PIPE_WANTR) { 61213951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 61313951Sdyson wakeup(wpipe); 61413951Sdyson } 61513992Sdyson wpipe->pipe_state |= PIPE_WANTW; 61613907Sdyson error = tsleep(wpipe, 61713907Sdyson PRIBIO|PCATCH, "pipdww", 0); 61814802Sdyson if (error) 61913907Sdyson goto error1; 62014802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 62114802Sdyson error = EPIPE; 62214802Sdyson goto error1; 62314802Sdyson } 62413907Sdyson } 62513907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 62613951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 62713951Sdyson if ( wpipe->pipe_state & PIPE_WANTR) { 62813951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 62913951Sdyson wakeup(wpipe); 63013951Sdyson } 63113951Sdyson 63213992Sdyson wpipe->pipe_state |= PIPE_WANTW; 63313907Sdyson error = tsleep(wpipe, 63413907Sdyson PRIBIO|PCATCH, "pipdwc", 0); 63514802Sdyson if (error) 63613907Sdyson goto error1; 63714802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 63814802Sdyson error = EPIPE; 63914802Sdyson goto error1; 64013907Sdyson } 64113951Sdyson goto retry; 64213907Sdyson } 64313907Sdyson 64413951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 64513951Sdyson 64613907Sdyson error = pipe_build_write_buffer(wpipe, uio); 64713907Sdyson if (error) { 64813907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 64913907Sdyson goto error1; 65013907Sdyson } 65113907Sdyson 65213907Sdyson error = 0; 65313907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 65413907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 65513907Sdyson pipelock(wpipe, 0); 65613907Sdyson pipe_destroy_write_buffer(wpipe); 65713907Sdyson pipeunlock(wpipe); 65814037Sdyson pipeselwakeup(wpipe); 65914802Sdyson error = EPIPE; 66014802Sdyson goto error1; 66113907Sdyson } 66213992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 66313992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 66413992Sdyson wakeup(wpipe); 66513992Sdyson } 66614037Sdyson pipeselwakeup(wpipe); 66713907Sdyson error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwt", 0); 66813907Sdyson } 66913907Sdyson 67013907Sdyson pipelock(wpipe,0); 67113907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 67213907Sdyson /* 67313907Sdyson * this bit of trickery substitutes a kernel buffer for 67413907Sdyson * the process that might be going away. 67513907Sdyson */ 67613907Sdyson pipe_clone_write_buffer(wpipe); 67713907Sdyson } else { 67813907Sdyson pipe_destroy_write_buffer(wpipe); 67913907Sdyson } 68013907Sdyson pipeunlock(wpipe); 68113907Sdyson return error; 68213907Sdyson 68313907Sdysonerror1: 68413907Sdyson wakeup(wpipe); 68513907Sdyson return error; 68613907Sdyson} 68714037Sdyson#endif 68813907Sdyson 68916960Sdysonstatic int 69016960Sdysonpipe_write(fp, uio, cred) 69116960Sdyson struct file *fp; 69213907Sdyson struct uio *uio; 69316960Sdyson struct ucred *cred; 69413907Sdyson{ 69513675Sdyson int error = 0; 69613913Sdyson int orig_resid; 69713675Sdyson 69816960Sdyson struct pipe *wpipe, *rpipe; 69916960Sdyson 70016960Sdyson rpipe = (struct pipe *) fp->f_data; 70116960Sdyson wpipe = rpipe->pipe_peer; 70216960Sdyson 70313675Sdyson /* 70413675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 70513675Sdyson */ 70616960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 70713774Sdyson return EPIPE; 70813675Sdyson } 70913675Sdyson 71017163Sdyson /* 71117163Sdyson * If it is advantageous to resize the pipe buffer, do 71217163Sdyson * so. 71317163Sdyson */ 71417163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 71517163Sdyson (nbigpipe < LIMITBIGPIPES) && 71617163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 71717163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 71817163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 71917163Sdyson 72017163Sdyson if (wpipe->pipe_buffer.buffer) { 72117163Sdyson amountpipekva -= wpipe->pipe_buffer.size; 72217163Sdyson kmem_free(kernel_map, 72317163Sdyson (vm_offset_t)wpipe->pipe_buffer.buffer, 72417163Sdyson wpipe->pipe_buffer.size); 72517163Sdyson } 72617163Sdyson 72717163Sdyson#ifndef PIPE_NODIRECT 72817163Sdyson if (wpipe->pipe_map.kva) { 72917163Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 73017163Sdyson kmem_free(kernel_map, 73117163Sdyson wpipe->pipe_map.kva, 73217163Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 73317163Sdyson } 73417163Sdyson#endif 73517163Sdyson 73617163Sdyson wpipe->pipe_buffer.in = 0; 73717163Sdyson wpipe->pipe_buffer.out = 0; 73817163Sdyson wpipe->pipe_buffer.cnt = 0; 73917163Sdyson wpipe->pipe_buffer.size = BIG_PIPE_SIZE; 74017163Sdyson wpipe->pipe_buffer.buffer = NULL; 74117163Sdyson ++nbigpipe; 74217163Sdyson 74317163Sdyson#ifndef PIPE_NODIRECT 74417163Sdyson wpipe->pipe_map.cnt = 0; 74517163Sdyson wpipe->pipe_map.kva = 0; 74617163Sdyson wpipe->pipe_map.pos = 0; 74717163Sdyson wpipe->pipe_map.npages = 0; 74817163Sdyson#endif 74917163Sdyson 75017163Sdyson } 75117163Sdyson 75217163Sdyson 75313907Sdyson if( wpipe->pipe_buffer.buffer == NULL) { 75413907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 75513907Sdyson pipespace(wpipe); 75613907Sdyson pipeunlock(wpipe); 75713907Sdyson } else { 75813907Sdyson return error; 75913907Sdyson } 76013907Sdyson } 76113907Sdyson 76213675Sdyson ++wpipe->pipe_busy; 76313913Sdyson orig_resid = uio->uio_resid; 76413675Sdyson while (uio->uio_resid) { 76513907Sdyson int space; 76614037Sdyson#ifndef PIPE_NODIRECT 76713907Sdyson /* 76813907Sdyson * If the transfer is large, we can gain performance if 76913907Sdyson * we do process-to-process copies directly. 77016416Sdyson * If the write is non-blocking, we don't use the 77116416Sdyson * direct write mechanism. 77213907Sdyson */ 77317163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 77417163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 77517163Sdyson (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 77613907Sdyson (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 77713907Sdyson error = pipe_direct_write( wpipe, uio); 77813907Sdyson if (error) { 77913907Sdyson break; 78013907Sdyson } 78113907Sdyson continue; 78213907Sdyson } 78314037Sdyson#endif 78413907Sdyson 78513907Sdyson /* 78613907Sdyson * Pipe buffered writes cannot be coincidental with 78713907Sdyson * direct writes. We wait until the currently executing 78813907Sdyson * direct write is completed before we start filling the 78913907Sdyson * pipe buffer. 79013907Sdyson */ 79113907Sdyson retrywrite: 79213907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 79313992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 79413992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 79513992Sdyson wakeup(wpipe); 79613992Sdyson } 79713907Sdyson error = tsleep(wpipe, 79813907Sdyson PRIBIO|PCATCH, "pipbww", 0); 79913907Sdyson if (error) 80013907Sdyson break; 80113907Sdyson } 80213907Sdyson 80313907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 80414644Sdyson 80514644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 80614644Sdyson /* XXX perhaps they need to be contiguous to be atomic? */ 80713913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 80813913Sdyson space = 0; 80913907Sdyson 81017163Sdyson if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 81117163Sdyson /* 81217163Sdyson * This set the maximum transfer as a segment of 81317163Sdyson * the buffer. 81417163Sdyson */ 81513675Sdyson int size = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in; 81617163Sdyson /* 81717163Sdyson * space is the size left in the buffer 81817163Sdyson */ 81913675Sdyson if (size > space) 82013675Sdyson size = space; 82117163Sdyson /* 82217163Sdyson * now limit it to the size of the uio transfer 82317163Sdyson */ 82413675Sdyson if (size > uio->uio_resid) 82513675Sdyson size = uio->uio_resid; 82613907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 82713907Sdyson /* 82813907Sdyson * It is possible for a direct write to 82913907Sdyson * slip in on us... handle it here... 83013907Sdyson */ 83113907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 83213907Sdyson pipeunlock(wpipe); 83313907Sdyson goto retrywrite; 83413907Sdyson } 83513675Sdyson error = uiomove( &wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 83613675Sdyson size, uio); 83713675Sdyson pipeunlock(wpipe); 83813675Sdyson } 83913675Sdyson if (error) 84013675Sdyson break; 84113675Sdyson 84213675Sdyson wpipe->pipe_buffer.in += size; 84313675Sdyson if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size) 84413675Sdyson wpipe->pipe_buffer.in = 0; 84513675Sdyson 84613675Sdyson wpipe->pipe_buffer.cnt += size; 84713675Sdyson } else { 84813675Sdyson /* 84913675Sdyson * If the "read-side" has been blocked, wake it up now. 85013675Sdyson */ 85113675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 85213675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 85313675Sdyson wakeup(wpipe); 85413675Sdyson } 85514037Sdyson 85613675Sdyson /* 85713675Sdyson * don't block on non-blocking I/O 85813675Sdyson */ 85916960Sdyson if (fp->f_flag & FNONBLOCK) { 86013907Sdyson error = EAGAIN; 86113675Sdyson break; 86213675Sdyson } 86313907Sdyson 86414037Sdyson /* 86514037Sdyson * We have no more space and have something to offer, 86614037Sdyson * wake up selects. 86714037Sdyson */ 86814037Sdyson pipeselwakeup(wpipe); 86914037Sdyson 87013675Sdyson wpipe->pipe_state |= PIPE_WANTW; 87113776Sdyson if (error = tsleep(wpipe, (PRIBIO+1)|PCATCH, "pipewr", 0)) { 87213675Sdyson break; 87313675Sdyson } 87413675Sdyson /* 87513675Sdyson * If read side wants to go away, we just issue a signal 87613675Sdyson * to ourselves. 87713675Sdyson */ 87813675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 87913774Sdyson error = EPIPE; 88013907Sdyson break; 88113675Sdyson } 88213675Sdyson } 88313675Sdyson } 88413675Sdyson 88514644Sdyson --wpipe->pipe_busy; 88613675Sdyson if ((wpipe->pipe_busy == 0) && 88713675Sdyson (wpipe->pipe_state & PIPE_WANT)) { 88813675Sdyson wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR); 88913675Sdyson wakeup(wpipe); 89013675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 89113675Sdyson /* 89213675Sdyson * If we have put any characters in the buffer, we wake up 89313675Sdyson * the reader. 89413675Sdyson */ 89513675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 89613675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 89713675Sdyson wakeup(wpipe); 89813675Sdyson } 89913675Sdyson } 90013909Sdyson 90113909Sdyson /* 90213909Sdyson * Don't return EPIPE if I/O was successful 90313909Sdyson */ 90413907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 90513907Sdyson (uio->uio_resid == 0) && 90613907Sdyson (error == EPIPE)) 90713907Sdyson error = 0; 90813913Sdyson 90914802Sdyson if (error == 0) { 91013913Sdyson int s = splhigh(); 91113913Sdyson wpipe->pipe_mtime = time; 91213913Sdyson splx(s); 91313913Sdyson } 91414037Sdyson /* 91514037Sdyson * We have something to offer, 91614037Sdyson * wake up select. 91714037Sdyson */ 91814177Sdyson if (wpipe->pipe_buffer.cnt) 91914037Sdyson pipeselwakeup(wpipe); 92013907Sdyson 92113675Sdyson return error; 92213675Sdyson} 92313675Sdyson 92413675Sdyson/* 92513675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 92613675Sdyson */ 92713675Sdysonint 92813675Sdysonpipe_ioctl(fp, cmd, data, p) 92913675Sdyson struct file *fp; 93013675Sdyson int cmd; 93113675Sdyson register caddr_t data; 93213675Sdyson struct proc *p; 93313675Sdyson{ 93413675Sdyson register struct pipe *mpipe = (struct pipe *)fp->f_data; 93513675Sdyson 93613675Sdyson switch (cmd) { 93713675Sdyson 93813675Sdyson case FIONBIO: 93913675Sdyson return (0); 94013675Sdyson 94113675Sdyson case FIOASYNC: 94213675Sdyson if (*(int *)data) { 94313675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 94413675Sdyson } else { 94513675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 94613675Sdyson } 94713675Sdyson return (0); 94813675Sdyson 94913675Sdyson case FIONREAD: 95014037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 95114037Sdyson *(int *)data = mpipe->pipe_map.cnt; 95214037Sdyson else 95314037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 95413675Sdyson return (0); 95513675Sdyson 95613675Sdyson case SIOCSPGRP: 95713675Sdyson mpipe->pipe_pgid = *(int *)data; 95813675Sdyson return (0); 95913675Sdyson 96013675Sdyson case SIOCGPGRP: 96113675Sdyson *(int *)data = mpipe->pipe_pgid; 96213675Sdyson return (0); 96313675Sdyson 96413675Sdyson } 96517124Sbde return (ENOTTY); 96613675Sdyson} 96713675Sdyson 96813675Sdysonint 96913675Sdysonpipe_select(fp, which, p) 97013675Sdyson struct file *fp; 97113675Sdyson int which; 97213675Sdyson struct proc *p; 97313675Sdyson{ 97413675Sdyson register struct pipe *rpipe = (struct pipe *)fp->f_data; 97513675Sdyson struct pipe *wpipe; 97613675Sdyson 97713675Sdyson wpipe = rpipe->pipe_peer; 97813675Sdyson switch (which) { 97913675Sdyson 98013675Sdyson case FREAD: 98114177Sdyson if ( (rpipe->pipe_state & PIPE_DIRECTW) || 98214177Sdyson (rpipe->pipe_buffer.cnt > 0) || 98313907Sdyson (rpipe->pipe_state & PIPE_EOF)) { 98413675Sdyson return (1); 98513675Sdyson } 98613675Sdyson selrecord(p, &rpipe->pipe_sel); 98713675Sdyson rpipe->pipe_state |= PIPE_SEL; 98813675Sdyson break; 98913675Sdyson 99013675Sdyson case FWRITE: 99113907Sdyson if ((wpipe == NULL) || 99213907Sdyson (wpipe->pipe_state & PIPE_EOF) || 99314177Sdyson (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 99414177Sdyson (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) { 99513675Sdyson return (1); 99613675Sdyson } 99713675Sdyson selrecord(p, &wpipe->pipe_sel); 99813675Sdyson wpipe->pipe_state |= PIPE_SEL; 99913675Sdyson break; 100013675Sdyson 100113675Sdyson case 0: 100213907Sdyson if ((rpipe->pipe_state & PIPE_EOF) || 100313907Sdyson (wpipe == NULL) || 100413907Sdyson (wpipe->pipe_state & PIPE_EOF)) { 100513907Sdyson return (1); 100613907Sdyson } 100713907Sdyson 100813675Sdyson selrecord(p, &rpipe->pipe_sel); 100913675Sdyson rpipe->pipe_state |= PIPE_SEL; 101013675Sdyson break; 101113675Sdyson } 101213675Sdyson return (0); 101313675Sdyson} 101413675Sdyson 101513675Sdysonint 101613675Sdysonpipe_stat(pipe, ub) 101713675Sdyson register struct pipe *pipe; 101813675Sdyson register struct stat *ub; 101913675Sdyson{ 102013675Sdyson bzero((caddr_t)ub, sizeof (*ub)); 102117124Sbde ub->st_mode = S_IFIFO; 102213907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 102313675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 102413675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 102513675Sdyson TIMEVAL_TO_TIMESPEC(&pipe->pipe_atime, &ub->st_atimespec); 102613675Sdyson TIMEVAL_TO_TIMESPEC(&pipe->pipe_mtime, &ub->st_mtimespec); 102713675Sdyson TIMEVAL_TO_TIMESPEC(&pipe->pipe_ctime, &ub->st_ctimespec); 102817124Sbde /* 102917124Sbde * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev, 103017124Sbde * st_flags, st_gen. 103117124Sbde * XXX (st_dev, st_ino) should be unique. 103217124Sbde */ 103313675Sdyson return 0; 103413675Sdyson} 103513675Sdyson 103613675Sdyson/* ARGSUSED */ 103713675Sdysonstatic int 103813675Sdysonpipe_close(fp, p) 103913675Sdyson struct file *fp; 104013675Sdyson struct proc *p; 104113675Sdyson{ 104213675Sdyson struct pipe *cpipe = (struct pipe *)fp->f_data; 104316322Sgpalmer 104413675Sdyson pipeclose(cpipe); 104513675Sdyson fp->f_data = NULL; 104613675Sdyson return 0; 104713675Sdyson} 104813675Sdyson 104913675Sdyson/* 105013675Sdyson * shutdown the pipe 105113675Sdyson */ 105213675Sdysonstatic void 105313675Sdysonpipeclose(cpipe) 105413675Sdyson struct pipe *cpipe; 105513675Sdyson{ 105613907Sdyson struct pipe *ppipe; 105713675Sdyson if (cpipe) { 105813907Sdyson 105914037Sdyson pipeselwakeup(cpipe); 106013907Sdyson 106113675Sdyson /* 106213675Sdyson * If the other side is blocked, wake it up saying that 106313675Sdyson * we want to close it down. 106413675Sdyson */ 106513675Sdyson while (cpipe->pipe_busy) { 106613675Sdyson wakeup(cpipe); 106713675Sdyson cpipe->pipe_state |= PIPE_WANT|PIPE_EOF; 106813675Sdyson tsleep(cpipe, PRIBIO, "pipecl", 0); 106913675Sdyson } 107013675Sdyson 107113675Sdyson /* 107213675Sdyson * Disconnect from peer 107313675Sdyson */ 107413907Sdyson if (ppipe = cpipe->pipe_peer) { 107514037Sdyson pipeselwakeup(ppipe); 107613907Sdyson 107713907Sdyson ppipe->pipe_state |= PIPE_EOF; 107813907Sdyson wakeup(ppipe); 107913907Sdyson ppipe->pipe_peer = NULL; 108013675Sdyson } 108113675Sdyson 108213675Sdyson /* 108313675Sdyson * free resources 108413675Sdyson */ 108513907Sdyson if (cpipe->pipe_buffer.buffer) { 108617163Sdyson if (cpipe->pipe_buffer.size > PIPE_SIZE) 108717163Sdyson --nbigpipe; 108813907Sdyson amountpipekva -= cpipe->pipe_buffer.size; 108913907Sdyson kmem_free(kernel_map, 109013907Sdyson (vm_offset_t)cpipe->pipe_buffer.buffer, 109113907Sdyson cpipe->pipe_buffer.size); 109213907Sdyson } 109314037Sdyson#ifndef PIPE_NODIRECT 109413907Sdyson if (cpipe->pipe_map.kva) { 109513912Sdyson amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 109613907Sdyson kmem_free(kernel_map, 109713907Sdyson cpipe->pipe_map.kva, 109813912Sdyson cpipe->pipe_buffer.size + PAGE_SIZE); 109913907Sdyson } 110014037Sdyson#endif 110113675Sdyson free(cpipe, M_TEMP); 110213675Sdyson } 110313675Sdyson} 110413675Sdyson#endif 1105