sys_pipe.c revision 76827
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson * 1950477Speter * $FreeBSD: head/sys/kern/sys_pipe.c 76827 2001-05-19 01:28:09Z alfred $ 2013675Sdyson */ 2113675Sdyson 2213675Sdyson/* 2313675Sdyson * This file contains a high-performance replacement for the socket-based 2413675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2513675Sdyson * all features of sockets, but does do everything that pipes normally 2613675Sdyson * do. 2713675Sdyson */ 2813675Sdyson 2913907Sdyson/* 3013907Sdyson * This code has two modes of operation, a small write mode and a large 3113907Sdyson * write mode. The small write mode acts like conventional pipes with 3213907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3313907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3413907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3513907Sdyson * the receiving process can copy it directly from the pages in the sending 3613907Sdyson * process. 3713907Sdyson * 3813907Sdyson * If the sending process receives a signal, it is possible that it will 3913913Sdyson * go away, and certainly its address space can change, because control 4013907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4113907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4213907Sdyson * kernel buffer, and the receiving process will grab the data from the 4313907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4413907Sdyson * the copy operation is normally eliminated. 4513907Sdyson * 4613907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4713907Sdyson * happen for small transfers so that the system will not spend all of 4813913Sdyson * its time context switching. PIPE_SIZE is constrained by the 4913907Sdyson * amount of kernel virtual memory. 5013907Sdyson */ 5113907Sdyson 5213675Sdyson#include <sys/param.h> 5313675Sdyson#include <sys/systm.h> 5424131Sbde#include <sys/fcntl.h> 5513675Sdyson#include <sys/file.h> 5613675Sdyson#include <sys/filedesc.h> 5724206Sbde#include <sys/filio.h> 5876166Smarkm#include <sys/lock.h> 5976827Salfred#include <sys/mutex.h> 6024206Sbde#include <sys/ttycom.h> 6113675Sdyson#include <sys/stat.h> 6229356Speter#include <sys/poll.h> 6370834Swollman#include <sys/selinfo.h> 6413675Sdyson#include <sys/signalvar.h> 6513675Sdyson#include <sys/sysproto.h> 6613675Sdyson#include <sys/pipe.h> 6776166Smarkm#include <sys/proc.h> 6855112Sbde#include <sys/vnode.h> 6934924Sbde#include <sys/uio.h> 7059288Sjlemon#include <sys/event.h> 7113675Sdyson 7213675Sdyson#include <vm/vm.h> 7313675Sdyson#include <vm/vm_param.h> 7413675Sdyson#include <vm/vm_object.h> 7513675Sdyson#include <vm/vm_kern.h> 7613675Sdyson#include <vm/vm_extern.h> 7713675Sdyson#include <vm/pmap.h> 7813675Sdyson#include <vm/vm_map.h> 7913907Sdyson#include <vm/vm_page.h> 8027899Sdyson#include <vm/vm_zone.h> 8113675Sdyson 8214037Sdyson/* 8314037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 8414037Sdyson * approx 30% decrease in transfer rate. This could be useful for 8514037Sdyson * NetBSD or OpenBSD. 8614037Sdyson */ 8714037Sdyson/* #define PIPE_NODIRECT */ 8814037Sdyson 8914037Sdyson/* 9014037Sdyson * interfaces to the outside world 9114037Sdyson */ 9213675Sdysonstatic int pipe_read __P((struct file *fp, struct uio *uio, 9351418Sgreen struct ucred *cred, int flags, struct proc *p)); 9413675Sdysonstatic int pipe_write __P((struct file *fp, struct uio *uio, 9551418Sgreen struct ucred *cred, int flags, struct proc *p)); 9613675Sdysonstatic int pipe_close __P((struct file *fp, struct proc *p)); 9729356Speterstatic int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 9829356Speter struct proc *p)); 9972521Sjlemonstatic int pipe_kqfilter __P((struct file *fp, struct knote *kn)); 10052983Speterstatic int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 10136735Sdfrstatic int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 10213675Sdyson 10372521Sjlemonstatic struct fileops pipeops = { 10472521Sjlemon pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 10572521Sjlemon pipe_stat, pipe_close 10672521Sjlemon}; 10713675Sdyson 10859288Sjlemonstatic void filt_pipedetach(struct knote *kn); 10959288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 11059288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 11159288Sjlemon 11272521Sjlemonstatic struct filterops pipe_rfiltops = 11372521Sjlemon { 1, NULL, filt_pipedetach, filt_piperead }; 11472521Sjlemonstatic struct filterops pipe_wfiltops = 11572521Sjlemon { 1, NULL, filt_pipedetach, filt_pipewrite }; 11659288Sjlemon 11772521Sjlemon 11813675Sdyson/* 11913675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 12013675Sdyson * space is pageable. The pipe code will try to maintain locality of 12113675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 12213675Sdyson * will not wipe the cache. 12313675Sdyson */ 12413907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 12513907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 12613675Sdyson 12713907Sdyson/* 12813907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 12913907Sdyson * is there so that on large systems, we don't exhaust it. 13013907Sdyson */ 13113907Sdyson#define MAXPIPEKVA (8*1024*1024) 13213907Sdyson 13313907Sdyson/* 13413907Sdyson * Limit for direct transfers, we cannot, of course limit 13513907Sdyson * the amount of kva for pipes in general though. 13613907Sdyson */ 13713907Sdyson#define LIMITPIPEKVA (16*1024*1024) 13817163Sdyson 13917163Sdyson/* 14017163Sdyson * Limit the number of "big" pipes 14117163Sdyson */ 14217163Sdyson#define LIMITBIGPIPES 32 14333181Seivindstatic int nbigpipe; 14417163Sdyson 14517124Sbdestatic int amountpipekva; 14613907Sdyson 14713675Sdysonstatic void pipeclose __P((struct pipe *cpipe)); 14876364Salfredstatic void pipe_free_kmem __P((struct pipe *cpipe)); 14976364Salfredstatic int pipe_create __P((struct pipe **cpipep)); 15013907Sdysonstatic __inline int pipelock __P((struct pipe *cpipe, int catch)); 15113675Sdysonstatic __inline void pipeunlock __P((struct pipe *cpipe)); 15214122Speterstatic __inline void pipeselwakeup __P((struct pipe *cpipe)); 15314037Sdyson#ifndef PIPE_NODIRECT 15413907Sdysonstatic int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 15513907Sdysonstatic void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 15613907Sdysonstatic int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 15713907Sdysonstatic void pipe_clone_write_buffer __P((struct pipe *wpipe)); 15814037Sdyson#endif 15976364Salfredstatic int pipespace __P((struct pipe *cpipe, int size)); 16013675Sdyson 16133181Seivindstatic vm_zone_t pipe_zone; 16227899Sdyson 16313675Sdyson/* 16413675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 16513675Sdyson */ 16613675Sdyson 16713675Sdyson/* ARGSUSED */ 16813675Sdysonint 16930994Sphkpipe(p, uap) 17013675Sdyson struct proc *p; 17113675Sdyson struct pipe_args /* { 17213675Sdyson int dummy; 17313675Sdyson } */ *uap; 17413675Sdyson{ 17576364Salfred struct filedesc *fdp = p->p_fd; 17613675Sdyson struct file *rf, *wf; 17713675Sdyson struct pipe *rpipe, *wpipe; 17813675Sdyson int fd, error; 17913675Sdyson 18027899Sdyson if (pipe_zone == NULL) 18176760Salfred pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4); 18227899Sdyson 18376756Salfred rpipe = wpipe = NULL; 18476364Salfred if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 18576364Salfred pipeclose(rpipe); 18676364Salfred pipeclose(wpipe); 18776364Salfred return (ENFILE); 18876364Salfred } 18976364Salfred 19013907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 19113907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 19213675Sdyson 19370915Sdwmalone error = falloc(p, &rf, &fd); 19470915Sdwmalone if (error) { 19570915Sdwmalone pipeclose(rpipe); 19670915Sdwmalone pipeclose(wpipe); 19770915Sdwmalone return (error); 19870915Sdwmalone } 19970915Sdwmalone fhold(rf); 20070915Sdwmalone p->p_retval[0] = fd; 20170915Sdwmalone 20270803Sdwmalone /* 20370803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 20470803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 20570803Sdwmalone * to avoid races against processes which manage to dup() the read 20670803Sdwmalone * side while we are blocked trying to allocate the write side. 20770803Sdwmalone */ 20813675Sdyson rf->f_flag = FREAD | FWRITE; 20913675Sdyson rf->f_type = DTYPE_PIPE; 21049413Sgreen rf->f_data = (caddr_t)rpipe; 21113675Sdyson rf->f_ops = &pipeops; 21213675Sdyson error = falloc(p, &wf, &fd); 21370915Sdwmalone if (error) { 21470915Sdwmalone if (fdp->fd_ofiles[p->p_retval[0]] == rf) { 21570915Sdwmalone fdp->fd_ofiles[p->p_retval[0]] = NULL; 21670915Sdwmalone fdrop(rf, p); 21770915Sdwmalone } 21870915Sdwmalone fdrop(rf, p); 21970915Sdwmalone /* rpipe has been closed by fdrop(). */ 22070915Sdwmalone pipeclose(wpipe); 22170915Sdwmalone return (error); 22270915Sdwmalone } 22313675Sdyson wf->f_flag = FREAD | FWRITE; 22413675Sdyson wf->f_type = DTYPE_PIPE; 22549413Sgreen wf->f_data = (caddr_t)wpipe; 22613675Sdyson wf->f_ops = &pipeops; 22730994Sphk p->p_retval[1] = fd; 22813675Sdyson 22913675Sdyson rpipe->pipe_peer = wpipe; 23013675Sdyson wpipe->pipe_peer = rpipe; 23168883Sdillon fdrop(rf, p); 23213675Sdyson 23313675Sdyson return (0); 23413675Sdyson} 23513675Sdyson 23613909Sdyson/* 23713909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 23876364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 23976364Salfred * it will retain the old buffer. 24076364Salfred * If it fails it will return ENOMEM. 24113909Sdyson */ 24276364Salfredstatic int 24376364Salfredpipespace(cpipe, size) 24413675Sdyson struct pipe *cpipe; 24576364Salfred int size; 24613675Sdyson{ 24776364Salfred struct vm_object *object; 24876364Salfred caddr_t buffer; 24913688Sdyson int npages, error; 25013675Sdyson 25176364Salfred npages = round_page(size)/PAGE_SIZE; 25213675Sdyson /* 25313675Sdyson * Create an object, I don't like the idea of paging to/from 25413675Sdyson * kernel_object. 25514037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 25613675Sdyson */ 25776827Salfred mtx_lock(&vm_mtx); 25876364Salfred object = vm_object_allocate(OBJT_DEFAULT, npages); 25976364Salfred buffer = (caddr_t) vm_map_min(kernel_map); 26013675Sdyson 26113675Sdyson /* 26213675Sdyson * Insert the object into the kernel map, and allocate kva for it. 26313675Sdyson * The map entry is, by default, pageable. 26414037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 26513675Sdyson */ 26676364Salfred error = vm_map_find(kernel_map, object, 0, 26776364Salfred (vm_offset_t *) &buffer, size, 1, 26813688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 26976827Salfred mtx_unlock(&vm_mtx); 27013675Sdyson 27176364Salfred if (error != KERN_SUCCESS) { 27276364Salfred vm_object_deallocate(object); 27376364Salfred return (ENOMEM); 27476364Salfred } 27576364Salfred 27676364Salfred /* free old resources if we're resizing */ 27776364Salfred pipe_free_kmem(cpipe); 27876364Salfred cpipe->pipe_buffer.object = object; 27976364Salfred cpipe->pipe_buffer.buffer = buffer; 28076364Salfred cpipe->pipe_buffer.size = size; 28176364Salfred cpipe->pipe_buffer.in = 0; 28276364Salfred cpipe->pipe_buffer.out = 0; 28376364Salfred cpipe->pipe_buffer.cnt = 0; 28413907Sdyson amountpipekva += cpipe->pipe_buffer.size; 28576364Salfred return (0); 28613907Sdyson} 28713688Sdyson 28813907Sdyson/* 28913907Sdyson * initialize and allocate VM and memory for pipe 29013907Sdyson */ 29176364Salfredstatic int 29276364Salfredpipe_create(cpipep) 29376364Salfred struct pipe **cpipep; 29476364Salfred{ 29513907Sdyson struct pipe *cpipe; 29676364Salfred int error; 29713907Sdyson 29876364Salfred *cpipep = zalloc(pipe_zone); 29976364Salfred if (*cpipep == NULL) 30076364Salfred return (ENOMEM); 30117163Sdyson 30276364Salfred cpipe = *cpipep; 30376364Salfred 30476364Salfred /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ 30576364Salfred cpipe->pipe_buffer.object = NULL; 30676364Salfred#ifndef PIPE_NODIRECT 30776364Salfred cpipe->pipe_map.kva = NULL; 30876364Salfred#endif 30976364Salfred /* 31076364Salfred * protect so pipeclose() doesn't follow a junk pointer 31176364Salfred * if pipespace() fails. 31276364Salfred */ 31376754Salfred bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 31413675Sdyson cpipe->pipe_state = 0; 31513675Sdyson cpipe->pipe_peer = NULL; 31613675Sdyson cpipe->pipe_busy = 0; 31713907Sdyson 31814037Sdyson#ifndef PIPE_NODIRECT 31913907Sdyson /* 32013907Sdyson * pipe data structure initializations to support direct pipe I/O 32113907Sdyson */ 32213907Sdyson cpipe->pipe_map.cnt = 0; 32313907Sdyson cpipe->pipe_map.kva = 0; 32413907Sdyson cpipe->pipe_map.pos = 0; 32513907Sdyson cpipe->pipe_map.npages = 0; 32617124Sbde /* cpipe->pipe_map.ms[] = invalid */ 32714037Sdyson#endif 32876364Salfred 32976364Salfred error = pipespace(cpipe, PIPE_SIZE); 33076760Salfred if (error) 33176364Salfred return (error); 33276364Salfred 33376364Salfred vfs_timestamp(&cpipe->pipe_ctime); 33476364Salfred cpipe->pipe_atime = cpipe->pipe_ctime; 33576364Salfred cpipe->pipe_mtime = cpipe->pipe_ctime; 33676364Salfred 33776364Salfred return (0); 33813675Sdyson} 33913675Sdyson 34013675Sdyson 34113675Sdyson/* 34213675Sdyson * lock a pipe for I/O, blocking other access 34313675Sdyson */ 34413675Sdysonstatic __inline int 34513907Sdysonpipelock(cpipe, catch) 34613675Sdyson struct pipe *cpipe; 34713907Sdyson int catch; 34813675Sdyson{ 34913776Sdyson int error; 35076364Salfred 35113675Sdyson while (cpipe->pipe_state & PIPE_LOCK) { 35213675Sdyson cpipe->pipe_state |= PIPE_LWANT; 35376760Salfred error = tsleep(cpipe, catch ? (PRIBIO | PCATCH) : PRIBIO, 35476760Salfred "pipelk", 0); 35576760Salfred if (error != 0) 35676760Salfred return (error); 35713675Sdyson } 35813675Sdyson cpipe->pipe_state |= PIPE_LOCK; 35976760Salfred return (0); 36013675Sdyson} 36113675Sdyson 36213675Sdyson/* 36313675Sdyson * unlock a pipe I/O lock 36413675Sdyson */ 36513675Sdysonstatic __inline void 36613675Sdysonpipeunlock(cpipe) 36713675Sdyson struct pipe *cpipe; 36813675Sdyson{ 36976364Salfred 37013675Sdyson cpipe->pipe_state &= ~PIPE_LOCK; 37113675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 37213675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 37314177Sdyson wakeup(cpipe); 37413675Sdyson } 37513675Sdyson} 37613675Sdyson 37714037Sdysonstatic __inline void 37814037Sdysonpipeselwakeup(cpipe) 37914037Sdyson struct pipe *cpipe; 38014037Sdyson{ 38176364Salfred 38214037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 38314037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 38414037Sdyson selwakeup(&cpipe->pipe_sel); 38514037Sdyson } 38641086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 38741086Struckman pgsigio(cpipe->pipe_sigio, SIGIO, 0); 38859288Sjlemon KNOTE(&cpipe->pipe_sel.si_note, 0); 38914037Sdyson} 39014037Sdyson 39113675Sdyson/* ARGSUSED */ 39213675Sdysonstatic int 39351418Sgreenpipe_read(fp, uio, cred, flags, p) 39413675Sdyson struct file *fp; 39513675Sdyson struct uio *uio; 39613675Sdyson struct ucred *cred; 39751418Sgreen struct proc *p; 39845311Sdt int flags; 39913675Sdyson{ 40013675Sdyson struct pipe *rpipe = (struct pipe *) fp->f_data; 40147748Salc int error; 40213675Sdyson int nread = 0; 40318863Sdyson u_int size; 40413675Sdyson 40513675Sdyson ++rpipe->pipe_busy; 40647748Salc error = pipelock(rpipe, 1); 40747748Salc if (error) 40847748Salc goto unlocked_error; 40947748Salc 41013675Sdyson while (uio->uio_resid) { 41113907Sdyson /* 41213907Sdyson * normal pipe buffer receive 41313907Sdyson */ 41413675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 41518863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 41613675Sdyson if (size > rpipe->pipe_buffer.cnt) 41713675Sdyson size = rpipe->pipe_buffer.cnt; 41818863Sdyson if (size > (u_int) uio->uio_resid) 41918863Sdyson size = (u_int) uio->uio_resid; 42047748Salc 42147748Salc error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 42213675Sdyson size, uio); 42376760Salfred if (error) 42413675Sdyson break; 42576760Salfred 42613675Sdyson rpipe->pipe_buffer.out += size; 42713675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 42813675Sdyson rpipe->pipe_buffer.out = 0; 42913675Sdyson 43013675Sdyson rpipe->pipe_buffer.cnt -= size; 43147748Salc 43247748Salc /* 43347748Salc * If there is no more to read in the pipe, reset 43447748Salc * its pointers to the beginning. This improves 43547748Salc * cache hit stats. 43647748Salc */ 43747748Salc if (rpipe->pipe_buffer.cnt == 0) { 43847748Salc rpipe->pipe_buffer.in = 0; 43947748Salc rpipe->pipe_buffer.out = 0; 44047748Salc } 44113675Sdyson nread += size; 44214037Sdyson#ifndef PIPE_NODIRECT 44313907Sdyson /* 44413907Sdyson * Direct copy, bypassing a kernel buffer. 44513907Sdyson */ 44613907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 44747748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 44847748Salc caddr_t va; 44918863Sdyson if (size > (u_int) uio->uio_resid) 45018863Sdyson size = (u_int) uio->uio_resid; 45147748Salc 45276760Salfred va = (caddr_t) rpipe->pipe_map.kva + 45376760Salfred rpipe->pipe_map.pos; 45447748Salc error = uiomove(va, size, uio); 45513907Sdyson if (error) 45613907Sdyson break; 45713907Sdyson nread += size; 45813907Sdyson rpipe->pipe_map.pos += size; 45913907Sdyson rpipe->pipe_map.cnt -= size; 46013907Sdyson if (rpipe->pipe_map.cnt == 0) { 46113907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 46213907Sdyson wakeup(rpipe); 46313907Sdyson } 46414037Sdyson#endif 46513675Sdyson } else { 46613675Sdyson /* 46713675Sdyson * detect EOF condition 46876760Salfred * read returns 0 on EOF, no need to set error 46913675Sdyson */ 47076760Salfred if (rpipe->pipe_state & PIPE_EOF) 47113675Sdyson break; 47243623Sdillon 47313675Sdyson /* 47413675Sdyson * If the "write-side" has been blocked, wake it up now. 47513675Sdyson */ 47613675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 47713675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 47813675Sdyson wakeup(rpipe); 47913675Sdyson } 48043623Sdillon 48143623Sdillon /* 48247748Salc * Break if some data was read. 48343623Sdillon */ 48447748Salc if (nread > 0) 48513675Sdyson break; 48616960Sdyson 48743623Sdillon /* 48847748Salc * Unlock the pipe buffer for our remaining processing. We 48947748Salc * will either break out with an error or we will sleep and 49047748Salc * relock to loop. 49143623Sdillon */ 49247748Salc pipeunlock(rpipe); 49343623Sdillon 49413675Sdyson /* 49547748Salc * Handle non-blocking mode operation or 49647748Salc * wait for more data. 49713675Sdyson */ 49876760Salfred if (fp->f_flag & FNONBLOCK) { 49947748Salc error = EAGAIN; 50076760Salfred } else { 50147748Salc rpipe->pipe_state |= PIPE_WANTR; 50247748Salc if ((error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) == 0) 50347748Salc error = pipelock(rpipe, 1); 50413675Sdyson } 50547748Salc if (error) 50647748Salc goto unlocked_error; 50713675Sdyson } 50813675Sdyson } 50947748Salc pipeunlock(rpipe); 51013675Sdyson 51124101Sbde if (error == 0) 51255112Sbde vfs_timestamp(&rpipe->pipe_atime); 51347748Salcunlocked_error: 51447748Salc --rpipe->pipe_busy; 51513913Sdyson 51647748Salc /* 51747748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 51847748Salc */ 51913675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 52013675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 52113675Sdyson wakeup(rpipe); 52213675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 52313675Sdyson /* 52447748Salc * Handle write blocking hysteresis. 52513675Sdyson */ 52613675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 52713675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 52813675Sdyson wakeup(rpipe); 52913675Sdyson } 53013675Sdyson } 53114037Sdyson 53214802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 53314037Sdyson pipeselwakeup(rpipe); 53414037Sdyson 53576760Salfred return (error); 53613675Sdyson} 53713675Sdyson 53814037Sdyson#ifndef PIPE_NODIRECT 53913907Sdyson/* 54013907Sdyson * Map the sending processes' buffer into kernel space and wire it. 54113907Sdyson * This is similar to a physical write operation. 54213907Sdyson */ 54313675Sdysonstatic int 54413907Sdysonpipe_build_write_buffer(wpipe, uio) 54513907Sdyson struct pipe *wpipe; 54613675Sdyson struct uio *uio; 54713675Sdyson{ 54818863Sdyson u_int size; 54913907Sdyson int i; 55013907Sdyson vm_offset_t addr, endaddr, paddr; 55113907Sdyson 55218863Sdyson size = (u_int) uio->uio_iov->iov_len; 55313907Sdyson if (size > wpipe->pipe_buffer.size) 55413907Sdyson size = wpipe->pipe_buffer.size; 55513907Sdyson 55640286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 55776827Salfred mtx_lock(&vm_mtx); 55876760Salfred addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 55976760Salfred for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 56013907Sdyson vm_page_t m; 56113907Sdyson 56251474Sdillon if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 56351474Sdillon (paddr = pmap_kextract(addr)) == 0) { 56413907Sdyson int j; 56576760Salfred 56676760Salfred for (j = 0; j < i; j++) 56740700Sdg vm_page_unwire(wpipe->pipe_map.ms[j], 1); 56876827Salfred mtx_unlock(&vm_mtx); 56976760Salfred return (EFAULT); 57013907Sdyson } 57113907Sdyson 57213907Sdyson m = PHYS_TO_VM_PAGE(paddr); 57313907Sdyson vm_page_wire(m); 57413907Sdyson wpipe->pipe_map.ms[i] = m; 57513907Sdyson } 57613907Sdyson 57713907Sdyson/* 57813907Sdyson * set up the control block 57913907Sdyson */ 58013907Sdyson wpipe->pipe_map.npages = i; 58176760Salfred wpipe->pipe_map.pos = 58276760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 58313907Sdyson wpipe->pipe_map.cnt = size; 58413907Sdyson 58513907Sdyson/* 58613907Sdyson * and map the buffer 58713907Sdyson */ 58813907Sdyson if (wpipe->pipe_map.kva == 0) { 58913912Sdyson /* 59013912Sdyson * We need to allocate space for an extra page because the 59113912Sdyson * address range might (will) span pages at times. 59213912Sdyson */ 59313907Sdyson wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 59413912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 59513912Sdyson amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 59613907Sdyson } 59713907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 59813907Sdyson wpipe->pipe_map.npages); 59913907Sdyson 60076827Salfred mtx_unlock(&vm_mtx); 60113907Sdyson/* 60213907Sdyson * and update the uio data 60313907Sdyson */ 60413907Sdyson 60513907Sdyson uio->uio_iov->iov_len -= size; 60613907Sdyson uio->uio_iov->iov_base += size; 60713907Sdyson if (uio->uio_iov->iov_len == 0) 60813907Sdyson uio->uio_iov++; 60913907Sdyson uio->uio_resid -= size; 61013907Sdyson uio->uio_offset += size; 61176760Salfred return (0); 61213907Sdyson} 61313907Sdyson 61413907Sdyson/* 61513907Sdyson * unmap and unwire the process buffer 61613907Sdyson */ 61713907Sdysonstatic void 61813907Sdysonpipe_destroy_write_buffer(wpipe) 61976760Salfred struct pipe *wpipe; 62013907Sdyson{ 62113907Sdyson int i; 62276364Salfred 62317163Sdyson if (wpipe->pipe_map.kva) { 62417163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 62513907Sdyson 62613907Sdyson if (amountpipekva > MAXPIPEKVA) { 62713907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 62813907Sdyson wpipe->pipe_map.kva = 0; 62913907Sdyson kmem_free(kernel_map, kva, 63013912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 63113912Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 63213907Sdyson } 63313907Sdyson } 63476827Salfred mtx_lock(&vm_mtx); 63576760Salfred for (i = 0; i < wpipe->pipe_map.npages; i++) 63640700Sdg vm_page_unwire(wpipe->pipe_map.ms[i], 1); 63776827Salfred mtx_unlock(&vm_mtx); 63813907Sdyson} 63913907Sdyson 64013907Sdyson/* 64113907Sdyson * In the case of a signal, the writing process might go away. This 64213907Sdyson * code copies the data into the circular buffer so that the source 64313907Sdyson * pages can be freed without loss of data. 64413907Sdyson */ 64513907Sdysonstatic void 64613907Sdysonpipe_clone_write_buffer(wpipe) 64776364Salfred struct pipe *wpipe; 64813907Sdyson{ 64913907Sdyson int size; 65013907Sdyson int pos; 65113907Sdyson 65213907Sdyson size = wpipe->pipe_map.cnt; 65313907Sdyson pos = wpipe->pipe_map.pos; 65476760Salfred bcopy((caddr_t) wpipe->pipe_map.kva + pos, 65576760Salfred (caddr_t) wpipe->pipe_buffer.buffer, size); 65613907Sdyson 65713907Sdyson wpipe->pipe_buffer.in = size; 65813907Sdyson wpipe->pipe_buffer.out = 0; 65913907Sdyson wpipe->pipe_buffer.cnt = size; 66013907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 66113907Sdyson 66213907Sdyson pipe_destroy_write_buffer(wpipe); 66313907Sdyson} 66413907Sdyson 66513907Sdyson/* 66613907Sdyson * This implements the pipe buffer write mechanism. Note that only 66713907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 66813907Sdyson * If there are any characters in the pipe buffer, the direct write will 66913907Sdyson * be deferred until the receiving process grabs all of the bytes from 67013907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 67113907Sdyson */ 67213907Sdysonstatic int 67313907Sdysonpipe_direct_write(wpipe, uio) 67413907Sdyson struct pipe *wpipe; 67513907Sdyson struct uio *uio; 67613907Sdyson{ 67713907Sdyson int error; 67876364Salfred 67913951Sdysonretry: 68013907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 68176760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 68213951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 68313951Sdyson wakeup(wpipe); 68413951Sdyson } 68513992Sdyson wpipe->pipe_state |= PIPE_WANTW; 68676760Salfred error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0); 68714802Sdyson if (error) 68813907Sdyson goto error1; 68914802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 69014802Sdyson error = EPIPE; 69114802Sdyson goto error1; 69214802Sdyson } 69313907Sdyson } 69413907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 69513951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 69676760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 69713951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 69813951Sdyson wakeup(wpipe); 69913951Sdyson } 70013951Sdyson 70113992Sdyson wpipe->pipe_state |= PIPE_WANTW; 70276760Salfred error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0); 70314802Sdyson if (error) 70413907Sdyson goto error1; 70514802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 70614802Sdyson error = EPIPE; 70714802Sdyson goto error1; 70813907Sdyson } 70913951Sdyson goto retry; 71013907Sdyson } 71113907Sdyson 71213951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 71313951Sdyson 71413907Sdyson error = pipe_build_write_buffer(wpipe, uio); 71513907Sdyson if (error) { 71613907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 71713907Sdyson goto error1; 71813907Sdyson } 71913907Sdyson 72013907Sdyson error = 0; 72113907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 72213907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 72313907Sdyson pipelock(wpipe, 0); 72413907Sdyson pipe_destroy_write_buffer(wpipe); 72513907Sdyson pipeunlock(wpipe); 72614037Sdyson pipeselwakeup(wpipe); 72714802Sdyson error = EPIPE; 72814802Sdyson goto error1; 72913907Sdyson } 73013992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 73113992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 73213992Sdyson wakeup(wpipe); 73313992Sdyson } 73414037Sdyson pipeselwakeup(wpipe); 73576760Salfred error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0); 73613907Sdyson } 73713907Sdyson 73813907Sdyson pipelock(wpipe,0); 73913907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 74013907Sdyson /* 74113907Sdyson * this bit of trickery substitutes a kernel buffer for 74213907Sdyson * the process that might be going away. 74313907Sdyson */ 74413907Sdyson pipe_clone_write_buffer(wpipe); 74513907Sdyson } else { 74613907Sdyson pipe_destroy_write_buffer(wpipe); 74713907Sdyson } 74813907Sdyson pipeunlock(wpipe); 74976760Salfred return (error); 75013907Sdyson 75113907Sdysonerror1: 75213907Sdyson wakeup(wpipe); 75376760Salfred return (error); 75413907Sdyson} 75514037Sdyson#endif 75613907Sdyson 75716960Sdysonstatic int 75851418Sgreenpipe_write(fp, uio, cred, flags, p) 75916960Sdyson struct file *fp; 76013907Sdyson struct uio *uio; 76116960Sdyson struct ucred *cred; 76251418Sgreen struct proc *p; 76345311Sdt int flags; 76413907Sdyson{ 76513675Sdyson int error = 0; 76613913Sdyson int orig_resid; 76716960Sdyson struct pipe *wpipe, *rpipe; 76816960Sdyson 76916960Sdyson rpipe = (struct pipe *) fp->f_data; 77016960Sdyson wpipe = rpipe->pipe_peer; 77116960Sdyson 77213675Sdyson /* 77313675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 77413675Sdyson */ 77516960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 77676760Salfred return (EPIPE); 77713675Sdyson } 77813675Sdyson 77917163Sdyson /* 78017163Sdyson * If it is advantageous to resize the pipe buffer, do 78117163Sdyson * so. 78217163Sdyson */ 78317163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 78417163Sdyson (nbigpipe < LIMITBIGPIPES) && 78517163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 78617163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 78717163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 78817163Sdyson 78913907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 79076364Salfred if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 79176364Salfred nbigpipe++; 79213907Sdyson pipeunlock(wpipe); 79313907Sdyson } else { 79476760Salfred return (error); 79513907Sdyson } 79613907Sdyson } 79776364Salfred 79876364Salfred KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 79913907Sdyson 80013675Sdyson ++wpipe->pipe_busy; 80113913Sdyson orig_resid = uio->uio_resid; 80213675Sdyson while (uio->uio_resid) { 80313907Sdyson int space; 80476760Salfred 80514037Sdyson#ifndef PIPE_NODIRECT 80613907Sdyson /* 80713907Sdyson * If the transfer is large, we can gain performance if 80813907Sdyson * we do process-to-process copies directly. 80916416Sdyson * If the write is non-blocking, we don't use the 81016416Sdyson * direct write mechanism. 81158505Sdillon * 81258505Sdillon * The direct write mechanism will detect the reader going 81358505Sdillon * away on us. 81413907Sdyson */ 81517163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 81617163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 81717163Sdyson (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 81813907Sdyson (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 81913907Sdyson error = pipe_direct_write( wpipe, uio); 82076760Salfred if (error) 82113907Sdyson break; 82213907Sdyson continue; 82313907Sdyson } 82414037Sdyson#endif 82513907Sdyson 82613907Sdyson /* 82713907Sdyson * Pipe buffered writes cannot be coincidental with 82813907Sdyson * direct writes. We wait until the currently executing 82913907Sdyson * direct write is completed before we start filling the 83058505Sdillon * pipe buffer. We break out if a signal occurs or the 83158505Sdillon * reader goes away. 83213907Sdyson */ 83313907Sdyson retrywrite: 83413907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 83513992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 83613992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 83713992Sdyson wakeup(wpipe); 83813992Sdyson } 83976760Salfred error = tsleep(wpipe, PRIBIO | PCATCH, "pipbww", 0); 84058505Sdillon if (wpipe->pipe_state & PIPE_EOF) 84158505Sdillon break; 84213907Sdyson if (error) 84313907Sdyson break; 84413907Sdyson } 84558505Sdillon if (wpipe->pipe_state & PIPE_EOF) { 84658505Sdillon error = EPIPE; 84758505Sdillon break; 84858505Sdillon } 84913907Sdyson 85013907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 85114644Sdyson 85214644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 85313913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 85413913Sdyson space = 0; 85513907Sdyson 85617163Sdyson if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 85713907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 85854534Stegge int size; /* Transfer size */ 85954534Stegge int segsize; /* first segment to transfer */ 86076760Salfred 86113907Sdyson /* 86213907Sdyson * It is possible for a direct write to 86313907Sdyson * slip in on us... handle it here... 86413907Sdyson */ 86513907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 86613907Sdyson pipeunlock(wpipe); 86713907Sdyson goto retrywrite; 86813907Sdyson } 86954534Stegge /* 87054534Stegge * If a process blocked in uiomove, our 87154534Stegge * value for space might be bad. 87258505Sdillon * 87358505Sdillon * XXX will we be ok if the reader has gone 87458505Sdillon * away here? 87554534Stegge */ 87654534Stegge if (space > wpipe->pipe_buffer.size - 87754534Stegge wpipe->pipe_buffer.cnt) { 87854534Stegge pipeunlock(wpipe); 87954534Stegge goto retrywrite; 88054534Stegge } 88154534Stegge 88254534Stegge /* 88354534Stegge * Transfer size is minimum of uio transfer 88454534Stegge * and free space in pipe buffer. 88554534Stegge */ 88654534Stegge if (space > uio->uio_resid) 88754534Stegge size = uio->uio_resid; 88854534Stegge else 88954534Stegge size = space; 89054534Stegge /* 89154534Stegge * First segment to transfer is minimum of 89254534Stegge * transfer size and contiguous space in 89354534Stegge * pipe buffer. If first segment to transfer 89454534Stegge * is less than the transfer size, we've got 89554534Stegge * a wraparound in the buffer. 89654534Stegge */ 89754534Stegge segsize = wpipe->pipe_buffer.size - 89854534Stegge wpipe->pipe_buffer.in; 89954534Stegge if (segsize > size) 90054534Stegge segsize = size; 90154534Stegge 90254534Stegge /* Transfer first segment */ 90354534Stegge 90454534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 90554534Stegge segsize, uio); 90654534Stegge 90754534Stegge if (error == 0 && segsize < size) { 90854534Stegge /* 90954534Stegge * Transfer remaining part now, to 91054534Stegge * support atomic writes. Wraparound 91154534Stegge * happened. 91254534Stegge */ 91354534Stegge if (wpipe->pipe_buffer.in + segsize != 91454534Stegge wpipe->pipe_buffer.size) 91554534Stegge panic("Expected pipe buffer wraparound disappeared"); 91654534Stegge 91754534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[0], 91854534Stegge size - segsize, uio); 91954534Stegge } 92054534Stegge if (error == 0) { 92154534Stegge wpipe->pipe_buffer.in += size; 92254534Stegge if (wpipe->pipe_buffer.in >= 92354534Stegge wpipe->pipe_buffer.size) { 92454534Stegge if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 92554534Stegge panic("Expected wraparound bad"); 92654534Stegge wpipe->pipe_buffer.in = size - segsize; 92754534Stegge } 92854534Stegge 92954534Stegge wpipe->pipe_buffer.cnt += size; 93054534Stegge if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 93154534Stegge panic("Pipe buffer overflow"); 93254534Stegge 93354534Stegge } 93413675Sdyson pipeunlock(wpipe); 93513675Sdyson } 93613675Sdyson if (error) 93713675Sdyson break; 93813675Sdyson 93913675Sdyson } else { 94013675Sdyson /* 94113675Sdyson * If the "read-side" has been blocked, wake it up now. 94213675Sdyson */ 94313675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 94413675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 94513675Sdyson wakeup(wpipe); 94613675Sdyson } 94714037Sdyson 94813675Sdyson /* 94913675Sdyson * don't block on non-blocking I/O 95013675Sdyson */ 95116960Sdyson if (fp->f_flag & FNONBLOCK) { 95213907Sdyson error = EAGAIN; 95313675Sdyson break; 95413675Sdyson } 95513907Sdyson 95614037Sdyson /* 95714037Sdyson * We have no more space and have something to offer, 95829356Speter * wake up select/poll. 95914037Sdyson */ 96014037Sdyson pipeselwakeup(wpipe); 96114037Sdyson 96213675Sdyson wpipe->pipe_state |= PIPE_WANTW; 96376760Salfred error = tsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0); 96476760Salfred if (error != 0) 96513675Sdyson break; 96613675Sdyson /* 96713675Sdyson * If read side wants to go away, we just issue a signal 96813675Sdyson * to ourselves. 96913675Sdyson */ 97013675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 97113774Sdyson error = EPIPE; 97213907Sdyson break; 97313675Sdyson } 97413675Sdyson } 97513675Sdyson } 97613675Sdyson 97714644Sdyson --wpipe->pipe_busy; 97876760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 97976760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 98013675Sdyson wakeup(wpipe); 98113675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 98213675Sdyson /* 98313675Sdyson * If we have put any characters in the buffer, we wake up 98413675Sdyson * the reader. 98513675Sdyson */ 98613675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 98713675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 98813675Sdyson wakeup(wpipe); 98913675Sdyson } 99013675Sdyson } 99113909Sdyson 99213909Sdyson /* 99313909Sdyson * Don't return EPIPE if I/O was successful 99413909Sdyson */ 99513907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 99613907Sdyson (uio->uio_resid == 0) && 99713907Sdyson (error == EPIPE)) 99813907Sdyson error = 0; 99913913Sdyson 100024101Sbde if (error == 0) 100155112Sbde vfs_timestamp(&wpipe->pipe_mtime); 100224101Sbde 100314037Sdyson /* 100414037Sdyson * We have something to offer, 100529356Speter * wake up select/poll. 100614037Sdyson */ 100714177Sdyson if (wpipe->pipe_buffer.cnt) 100814037Sdyson pipeselwakeup(wpipe); 100913907Sdyson 101076760Salfred return (error); 101113675Sdyson} 101213675Sdyson 101313675Sdyson/* 101413675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 101513675Sdyson */ 101613675Sdysonint 101713675Sdysonpipe_ioctl(fp, cmd, data, p) 101813675Sdyson struct file *fp; 101936735Sdfr u_long cmd; 102076364Salfred caddr_t data; 102113675Sdyson struct proc *p; 102213675Sdyson{ 102376364Salfred struct pipe *mpipe = (struct pipe *)fp->f_data; 102413675Sdyson 102513675Sdyson switch (cmd) { 102613675Sdyson 102713675Sdyson case FIONBIO: 102813675Sdyson return (0); 102913675Sdyson 103013675Sdyson case FIOASYNC: 103113675Sdyson if (*(int *)data) { 103213675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 103313675Sdyson } else { 103413675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 103513675Sdyson } 103613675Sdyson return (0); 103713675Sdyson 103813675Sdyson case FIONREAD: 103914037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 104014037Sdyson *(int *)data = mpipe->pipe_map.cnt; 104114037Sdyson else 104214037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 104313675Sdyson return (0); 104413675Sdyson 104541086Struckman case FIOSETOWN: 104641086Struckman return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 104741086Struckman 104841086Struckman case FIOGETOWN: 104941086Struckman *(int *)data = fgetown(mpipe->pipe_sigio); 105013675Sdyson return (0); 105113675Sdyson 105241086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 105341086Struckman case TIOCSPGRP: 105441086Struckman return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 105541086Struckman 105641086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 105718863Sdyson case TIOCGPGRP: 105841086Struckman *(int *)data = -fgetown(mpipe->pipe_sigio); 105913675Sdyson return (0); 106013675Sdyson 106113675Sdyson } 106217124Sbde return (ENOTTY); 106313675Sdyson} 106413675Sdyson 106513675Sdysonint 106629356Speterpipe_poll(fp, events, cred, p) 106713675Sdyson struct file *fp; 106829356Speter int events; 106929356Speter struct ucred *cred; 107013675Sdyson struct proc *p; 107113675Sdyson{ 107276364Salfred struct pipe *rpipe = (struct pipe *)fp->f_data; 107313675Sdyson struct pipe *wpipe; 107429356Speter int revents = 0; 107513675Sdyson 107613675Sdyson wpipe = rpipe->pipe_peer; 107729356Speter if (events & (POLLIN | POLLRDNORM)) 107829356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 107929356Speter (rpipe->pipe_buffer.cnt > 0) || 108029356Speter (rpipe->pipe_state & PIPE_EOF)) 108129356Speter revents |= events & (POLLIN | POLLRDNORM); 108213675Sdyson 108329356Speter if (events & (POLLOUT | POLLWRNORM)) 108429356Speter if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 108543311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 108643311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 108729356Speter revents |= events & (POLLOUT | POLLWRNORM); 108813675Sdyson 108929356Speter if ((rpipe->pipe_state & PIPE_EOF) || 109029356Speter (wpipe == NULL) || 109129356Speter (wpipe->pipe_state & PIPE_EOF)) 109229356Speter revents |= POLLHUP; 109329356Speter 109429356Speter if (revents == 0) { 109529356Speter if (events & (POLLIN | POLLRDNORM)) { 109629356Speter selrecord(p, &rpipe->pipe_sel); 109729356Speter rpipe->pipe_state |= PIPE_SEL; 109813675Sdyson } 109913675Sdyson 110029356Speter if (events & (POLLOUT | POLLWRNORM)) { 110130164Speter selrecord(p, &wpipe->pipe_sel); 110230164Speter wpipe->pipe_state |= PIPE_SEL; 110313907Sdyson } 110413675Sdyson } 110529356Speter 110629356Speter return (revents); 110713675Sdyson} 110813675Sdyson 110952983Speterstatic int 111052983Speterpipe_stat(fp, ub, p) 111152983Speter struct file *fp; 111252983Speter struct stat *ub; 111352983Speter struct proc *p; 111413675Sdyson{ 111552983Speter struct pipe *pipe = (struct pipe *)fp->f_data; 111652983Speter 111776760Salfred bzero((caddr_t)ub, sizeof(*ub)); 111817124Sbde ub->st_mode = S_IFIFO; 111913907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 112013675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 112113675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 112234901Sphk ub->st_atimespec = pipe->pipe_atime; 112334901Sphk ub->st_mtimespec = pipe->pipe_mtime; 112434901Sphk ub->st_ctimespec = pipe->pipe_ctime; 112560404Schris ub->st_uid = fp->f_cred->cr_uid; 112660404Schris ub->st_gid = fp->f_cred->cr_gid; 112717124Sbde /* 112860404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 112917124Sbde * XXX (st_dev, st_ino) should be unique. 113017124Sbde */ 113176760Salfred return (0); 113213675Sdyson} 113313675Sdyson 113413675Sdyson/* ARGSUSED */ 113513675Sdysonstatic int 113613675Sdysonpipe_close(fp, p) 113713675Sdyson struct file *fp; 113813675Sdyson struct proc *p; 113913675Sdyson{ 114013675Sdyson struct pipe *cpipe = (struct pipe *)fp->f_data; 114116322Sgpalmer 114249413Sgreen fp->f_ops = &badfileops; 114349413Sgreen fp->f_data = NULL; 114441086Struckman funsetown(cpipe->pipe_sigio); 114513675Sdyson pipeclose(cpipe); 114676760Salfred return (0); 114713675Sdyson} 114813675Sdyson 114976364Salfredstatic void 115076364Salfredpipe_free_kmem(cpipe) 115176364Salfred struct pipe *cpipe; 115276364Salfred{ 115376364Salfred 115476364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 115576364Salfred if (cpipe->pipe_buffer.size > PIPE_SIZE) 115676364Salfred --nbigpipe; 115776364Salfred amountpipekva -= cpipe->pipe_buffer.size; 115876364Salfred kmem_free(kernel_map, 115976364Salfred (vm_offset_t)cpipe->pipe_buffer.buffer, 116076364Salfred cpipe->pipe_buffer.size); 116176364Salfred cpipe->pipe_buffer.buffer = NULL; 116276364Salfred } 116376364Salfred#ifndef PIPE_NODIRECT 116476364Salfred if (cpipe->pipe_map.kva != NULL) { 116576364Salfred amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 116676364Salfred kmem_free(kernel_map, 116776364Salfred cpipe->pipe_map.kva, 116876364Salfred cpipe->pipe_buffer.size + PAGE_SIZE); 116976364Salfred cpipe->pipe_map.cnt = 0; 117076364Salfred cpipe->pipe_map.kva = 0; 117176364Salfred cpipe->pipe_map.pos = 0; 117276364Salfred cpipe->pipe_map.npages = 0; 117376364Salfred } 117476364Salfred#endif 117576364Salfred} 117676364Salfred 117713675Sdyson/* 117813675Sdyson * shutdown the pipe 117913675Sdyson */ 118013675Sdysonstatic void 118113675Sdysonpipeclose(cpipe) 118213675Sdyson struct pipe *cpipe; 118313675Sdyson{ 118413907Sdyson struct pipe *ppipe; 118576364Salfred 118613675Sdyson if (cpipe) { 118713907Sdyson 118814037Sdyson pipeselwakeup(cpipe); 118913907Sdyson 119013675Sdyson /* 119113675Sdyson * If the other side is blocked, wake it up saying that 119213675Sdyson * we want to close it down. 119313675Sdyson */ 119413675Sdyson while (cpipe->pipe_busy) { 119513675Sdyson wakeup(cpipe); 119676760Salfred cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 119713675Sdyson tsleep(cpipe, PRIBIO, "pipecl", 0); 119813675Sdyson } 119913675Sdyson 120013675Sdyson /* 120113675Sdyson * Disconnect from peer 120213675Sdyson */ 120343301Sdillon if ((ppipe = cpipe->pipe_peer) != NULL) { 120414037Sdyson pipeselwakeup(ppipe); 120513907Sdyson 120613907Sdyson ppipe->pipe_state |= PIPE_EOF; 120713907Sdyson wakeup(ppipe); 120813907Sdyson ppipe->pipe_peer = NULL; 120913675Sdyson } 121013675Sdyson /* 121113675Sdyson * free resources 121213675Sdyson */ 121376827Salfred mtx_lock(&vm_mtx); 121476364Salfred pipe_free_kmem(cpipe); 121527899Sdyson zfree(pipe_zone, cpipe); 121676827Salfred mtx_unlock(&vm_mtx); 121713675Sdyson } 121813675Sdyson} 121959288Sjlemon 122072521Sjlemon/*ARGSUSED*/ 122159288Sjlemonstatic int 122272521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 122359288Sjlemon{ 122459288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 122559288Sjlemon 122672521Sjlemon switch (kn->kn_filter) { 122772521Sjlemon case EVFILT_READ: 122872521Sjlemon kn->kn_fop = &pipe_rfiltops; 122972521Sjlemon break; 123072521Sjlemon case EVFILT_WRITE: 123172521Sjlemon kn->kn_fop = &pipe_wfiltops; 123272521Sjlemon break; 123372521Sjlemon default: 123472521Sjlemon return (1); 123572521Sjlemon } 123672521Sjlemon 123759288Sjlemon SLIST_INSERT_HEAD(&rpipe->pipe_sel.si_note, kn, kn_selnext); 123859288Sjlemon return (0); 123959288Sjlemon} 124059288Sjlemon 124159288Sjlemonstatic void 124259288Sjlemonfilt_pipedetach(struct knote *kn) 124359288Sjlemon{ 124459288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 124559288Sjlemon 124660938Sjake SLIST_REMOVE(&rpipe->pipe_sel.si_note, kn, knote, kn_selnext); 124759288Sjlemon} 124859288Sjlemon 124959288Sjlemon/*ARGSUSED*/ 125059288Sjlemonstatic int 125159288Sjlemonfilt_piperead(struct knote *kn, long hint) 125259288Sjlemon{ 125359288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 125459288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 125559288Sjlemon 125659288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 125759288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 125859288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 125959288Sjlemon 126059288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 126159288Sjlemon (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 126259288Sjlemon kn->kn_flags |= EV_EOF; 126359288Sjlemon return (1); 126459288Sjlemon } 126559288Sjlemon return (kn->kn_data > 0); 126659288Sjlemon} 126759288Sjlemon 126859288Sjlemon/*ARGSUSED*/ 126959288Sjlemonstatic int 127059288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 127159288Sjlemon{ 127259288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 127359288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 127459288Sjlemon 127559288Sjlemon if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 127659288Sjlemon kn->kn_data = 0; 127759288Sjlemon kn->kn_flags |= EV_EOF; 127859288Sjlemon return (1); 127959288Sjlemon } 128059288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 128165855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 128259288Sjlemon kn->kn_data = 0; 128359288Sjlemon 128459288Sjlemon return (kn->kn_data >= PIPE_BUF); 128559288Sjlemon} 1286