sys_pipe.c revision 76166
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson * 1950477Speter * $FreeBSD: head/sys/kern/sys_pipe.c 76166 2001-05-01 08:13:21Z markm $ 2013675Sdyson */ 2113675Sdyson 2213675Sdyson/* 2313675Sdyson * This file contains a high-performance replacement for the socket-based 2413675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2513675Sdyson * all features of sockets, but does do everything that pipes normally 2613675Sdyson * do. 2713675Sdyson */ 2813675Sdyson 2913907Sdyson/* 3013907Sdyson * This code has two modes of operation, a small write mode and a large 3113907Sdyson * write mode. The small write mode acts like conventional pipes with 3213907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3313907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3413907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3513907Sdyson * the receiving process can copy it directly from the pages in the sending 3613907Sdyson * process. 3713907Sdyson * 3813907Sdyson * If the sending process receives a signal, it is possible that it will 3913913Sdyson * go away, and certainly its address space can change, because control 4013907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4113907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4213907Sdyson * kernel buffer, and the receiving process will grab the data from the 4313907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4413907Sdyson * the copy operation is normally eliminated. 4513907Sdyson * 4613907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4713907Sdyson * happen for small transfers so that the system will not spend all of 4813913Sdyson * its time context switching. PIPE_SIZE is constrained by the 4913907Sdyson * amount of kernel virtual memory. 5013907Sdyson */ 5113907Sdyson 5213675Sdyson#include <sys/param.h> 5313675Sdyson#include <sys/systm.h> 5424131Sbde#include <sys/fcntl.h> 5513675Sdyson#include <sys/file.h> 5613675Sdyson#include <sys/filedesc.h> 5724206Sbde#include <sys/filio.h> 5876166Smarkm#include <sys/lock.h> 5924206Sbde#include <sys/ttycom.h> 6013675Sdyson#include <sys/stat.h> 6129356Speter#include <sys/poll.h> 6270834Swollman#include <sys/selinfo.h> 6313675Sdyson#include <sys/signalvar.h> 6413675Sdyson#include <sys/sysproto.h> 6513675Sdyson#include <sys/pipe.h> 6676166Smarkm#include <sys/proc.h> 6755112Sbde#include <sys/vnode.h> 6834924Sbde#include <sys/uio.h> 6959288Sjlemon#include <sys/event.h> 7013675Sdyson 7113675Sdyson#include <vm/vm.h> 7213675Sdyson#include <vm/vm_param.h> 7313675Sdyson#include <vm/vm_object.h> 7413675Sdyson#include <vm/vm_kern.h> 7513675Sdyson#include <vm/vm_extern.h> 7613675Sdyson#include <vm/pmap.h> 7713675Sdyson#include <vm/vm_map.h> 7813907Sdyson#include <vm/vm_page.h> 7927899Sdyson#include <vm/vm_zone.h> 8013675Sdyson 8114037Sdyson/* 8214037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 8314037Sdyson * approx 30% decrease in transfer rate. This could be useful for 8414037Sdyson * NetBSD or OpenBSD. 8514037Sdyson */ 8614037Sdyson/* #define PIPE_NODIRECT */ 8714037Sdyson 8814037Sdyson/* 8914037Sdyson * interfaces to the outside world 9014037Sdyson */ 9113675Sdysonstatic int pipe_read __P((struct file *fp, struct uio *uio, 9251418Sgreen struct ucred *cred, int flags, struct proc *p)); 9313675Sdysonstatic int pipe_write __P((struct file *fp, struct uio *uio, 9451418Sgreen struct ucred *cred, int flags, struct proc *p)); 9513675Sdysonstatic int pipe_close __P((struct file *fp, struct proc *p)); 9629356Speterstatic int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 9729356Speter struct proc *p)); 9872521Sjlemonstatic int pipe_kqfilter __P((struct file *fp, struct knote *kn)); 9952983Speterstatic int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 10036735Sdfrstatic int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 10113675Sdyson 10272521Sjlemonstatic struct fileops pipeops = { 10372521Sjlemon pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 10472521Sjlemon pipe_stat, pipe_close 10572521Sjlemon}; 10613675Sdyson 10759288Sjlemonstatic void filt_pipedetach(struct knote *kn); 10859288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 10959288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 11059288Sjlemon 11172521Sjlemonstatic struct filterops pipe_rfiltops = 11272521Sjlemon { 1, NULL, filt_pipedetach, filt_piperead }; 11372521Sjlemonstatic struct filterops pipe_wfiltops = 11472521Sjlemon { 1, NULL, filt_pipedetach, filt_pipewrite }; 11559288Sjlemon 11672521Sjlemon 11713675Sdyson/* 11813675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 11913675Sdyson * space is pageable. The pipe code will try to maintain locality of 12013675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 12113675Sdyson * will not wipe the cache. 12213675Sdyson */ 12313907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 12413907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 12513675Sdyson 12613907Sdyson/* 12713907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 12813907Sdyson * is there so that on large systems, we don't exhaust it. 12913907Sdyson */ 13013907Sdyson#define MAXPIPEKVA (8*1024*1024) 13113907Sdyson 13213907Sdyson/* 13313907Sdyson * Limit for direct transfers, we cannot, of course limit 13413907Sdyson * the amount of kva for pipes in general though. 13513907Sdyson */ 13613907Sdyson#define LIMITPIPEKVA (16*1024*1024) 13717163Sdyson 13817163Sdyson/* 13917163Sdyson * Limit the number of "big" pipes 14017163Sdyson */ 14117163Sdyson#define LIMITBIGPIPES 32 14233181Seivindstatic int nbigpipe; 14317163Sdyson 14417124Sbdestatic int amountpipekva; 14513907Sdyson 14613675Sdysonstatic void pipeclose __P((struct pipe *cpipe)); 14713675Sdysonstatic void pipeinit __P((struct pipe *cpipe)); 14813907Sdysonstatic __inline int pipelock __P((struct pipe *cpipe, int catch)); 14913675Sdysonstatic __inline void pipeunlock __P((struct pipe *cpipe)); 15014122Speterstatic __inline void pipeselwakeup __P((struct pipe *cpipe)); 15114037Sdyson#ifndef PIPE_NODIRECT 15213907Sdysonstatic int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 15313907Sdysonstatic void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 15413907Sdysonstatic int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 15513907Sdysonstatic void pipe_clone_write_buffer __P((struct pipe *wpipe)); 15614037Sdyson#endif 15713907Sdysonstatic void pipespace __P((struct pipe *cpipe)); 15813675Sdyson 15933181Seivindstatic vm_zone_t pipe_zone; 16027899Sdyson 16113675Sdyson/* 16213675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 16313675Sdyson */ 16413675Sdyson 16513675Sdyson/* ARGSUSED */ 16613675Sdysonint 16730994Sphkpipe(p, uap) 16813675Sdyson struct proc *p; 16913675Sdyson struct pipe_args /* { 17013675Sdyson int dummy; 17113675Sdyson } */ *uap; 17213675Sdyson{ 17313675Sdyson register struct filedesc *fdp = p->p_fd; 17413675Sdyson struct file *rf, *wf; 17513675Sdyson struct pipe *rpipe, *wpipe; 17613675Sdyson int fd, error; 17713675Sdyson 17827899Sdyson if (pipe_zone == NULL) 17927923Sdyson pipe_zone = zinit("PIPE", sizeof (struct pipe), 0, 0, 4); 18027899Sdyson 18127899Sdyson rpipe = zalloc( pipe_zone); 18213675Sdyson pipeinit(rpipe); 18313907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 18427899Sdyson wpipe = zalloc( pipe_zone); 18513675Sdyson pipeinit(wpipe); 18613907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 18713675Sdyson 18870915Sdwmalone error = falloc(p, &rf, &fd); 18970915Sdwmalone if (error) { 19070915Sdwmalone pipeclose(rpipe); 19170915Sdwmalone pipeclose(wpipe); 19270915Sdwmalone return (error); 19370915Sdwmalone } 19470915Sdwmalone fhold(rf); 19570915Sdwmalone p->p_retval[0] = fd; 19670915Sdwmalone 19770803Sdwmalone /* 19870803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 19970803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 20070803Sdwmalone * to avoid races against processes which manage to dup() the read 20170803Sdwmalone * side while we are blocked trying to allocate the write side. 20270803Sdwmalone */ 20313675Sdyson rf->f_flag = FREAD | FWRITE; 20413675Sdyson rf->f_type = DTYPE_PIPE; 20549413Sgreen rf->f_data = (caddr_t)rpipe; 20613675Sdyson rf->f_ops = &pipeops; 20713675Sdyson error = falloc(p, &wf, &fd); 20870915Sdwmalone if (error) { 20970915Sdwmalone if (fdp->fd_ofiles[p->p_retval[0]] == rf) { 21070915Sdwmalone fdp->fd_ofiles[p->p_retval[0]] = NULL; 21170915Sdwmalone fdrop(rf, p); 21270915Sdwmalone } 21370915Sdwmalone fdrop(rf, p); 21470915Sdwmalone /* rpipe has been closed by fdrop(). */ 21570915Sdwmalone pipeclose(wpipe); 21670915Sdwmalone return (error); 21770915Sdwmalone } 21813675Sdyson wf->f_flag = FREAD | FWRITE; 21913675Sdyson wf->f_type = DTYPE_PIPE; 22049413Sgreen wf->f_data = (caddr_t)wpipe; 22113675Sdyson wf->f_ops = &pipeops; 22230994Sphk p->p_retval[1] = fd; 22313675Sdyson 22413675Sdyson rpipe->pipe_peer = wpipe; 22513675Sdyson wpipe->pipe_peer = rpipe; 22668883Sdillon fdrop(rf, p); 22713675Sdyson 22813675Sdyson return (0); 22913675Sdyson} 23013675Sdyson 23113909Sdyson/* 23213909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 23313909Sdyson */ 23413675Sdysonstatic void 23513907Sdysonpipespace(cpipe) 23613675Sdyson struct pipe *cpipe; 23713675Sdyson{ 23813688Sdyson int npages, error; 23913675Sdyson 24013907Sdyson npages = round_page(cpipe->pipe_buffer.size)/PAGE_SIZE; 24113675Sdyson /* 24213675Sdyson * Create an object, I don't like the idea of paging to/from 24313675Sdyson * kernel_object. 24414037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 24513675Sdyson */ 24613675Sdyson cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages); 24713688Sdyson cpipe->pipe_buffer.buffer = (caddr_t) vm_map_min(kernel_map); 24813675Sdyson 24913675Sdyson /* 25013675Sdyson * Insert the object into the kernel map, and allocate kva for it. 25113675Sdyson * The map entry is, by default, pageable. 25214037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 25313675Sdyson */ 25413688Sdyson error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0, 25513907Sdyson (vm_offset_t *) &cpipe->pipe_buffer.buffer, 25613907Sdyson cpipe->pipe_buffer.size, 1, 25713688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 25813675Sdyson 25913688Sdyson if (error != KERN_SUCCESS) 26013688Sdyson panic("pipeinit: cannot allocate pipe -- out of kvm -- code = %d", error); 26113907Sdyson amountpipekva += cpipe->pipe_buffer.size; 26213907Sdyson} 26313688Sdyson 26413907Sdyson/* 26513907Sdyson * initialize and allocate VM and memory for pipe 26613907Sdyson */ 26713907Sdysonstatic void 26813907Sdysonpipeinit(cpipe) 26913907Sdyson struct pipe *cpipe; 27013907Sdyson{ 27113907Sdyson 27213675Sdyson cpipe->pipe_buffer.in = 0; 27313675Sdyson cpipe->pipe_buffer.out = 0; 27413675Sdyson cpipe->pipe_buffer.cnt = 0; 27513907Sdyson cpipe->pipe_buffer.size = PIPE_SIZE; 27617163Sdyson 27713907Sdyson /* Buffer kva gets dynamically allocated */ 27813907Sdyson cpipe->pipe_buffer.buffer = NULL; 27917124Sbde /* cpipe->pipe_buffer.object = invalid */ 28013675Sdyson 28113675Sdyson cpipe->pipe_state = 0; 28213675Sdyson cpipe->pipe_peer = NULL; 28313675Sdyson cpipe->pipe_busy = 0; 28455112Sbde vfs_timestamp(&cpipe->pipe_ctime); 28524101Sbde cpipe->pipe_atime = cpipe->pipe_ctime; 28624101Sbde cpipe->pipe_mtime = cpipe->pipe_ctime; 28713675Sdyson bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); 28813907Sdyson 28914037Sdyson#ifndef PIPE_NODIRECT 29013907Sdyson /* 29113907Sdyson * pipe data structure initializations to support direct pipe I/O 29213907Sdyson */ 29313907Sdyson cpipe->pipe_map.cnt = 0; 29413907Sdyson cpipe->pipe_map.kva = 0; 29513907Sdyson cpipe->pipe_map.pos = 0; 29613907Sdyson cpipe->pipe_map.npages = 0; 29717124Sbde /* cpipe->pipe_map.ms[] = invalid */ 29814037Sdyson#endif 29913675Sdyson} 30013675Sdyson 30113675Sdyson 30213675Sdyson/* 30313675Sdyson * lock a pipe for I/O, blocking other access 30413675Sdyson */ 30513675Sdysonstatic __inline int 30613907Sdysonpipelock(cpipe, catch) 30713675Sdyson struct pipe *cpipe; 30813907Sdyson int catch; 30913675Sdyson{ 31013776Sdyson int error; 31113675Sdyson while (cpipe->pipe_state & PIPE_LOCK) { 31213675Sdyson cpipe->pipe_state |= PIPE_LWANT; 31343301Sdillon if ((error = tsleep( cpipe, 31443301Sdillon catch?(PRIBIO|PCATCH):PRIBIO, "pipelk", 0)) != 0) { 31513776Sdyson return error; 31613675Sdyson } 31713675Sdyson } 31813675Sdyson cpipe->pipe_state |= PIPE_LOCK; 31913675Sdyson return 0; 32013675Sdyson} 32113675Sdyson 32213675Sdyson/* 32313675Sdyson * unlock a pipe I/O lock 32413675Sdyson */ 32513675Sdysonstatic __inline void 32613675Sdysonpipeunlock(cpipe) 32713675Sdyson struct pipe *cpipe; 32813675Sdyson{ 32913675Sdyson cpipe->pipe_state &= ~PIPE_LOCK; 33013675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 33113675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 33214177Sdyson wakeup(cpipe); 33313675Sdyson } 33413675Sdyson} 33513675Sdyson 33614037Sdysonstatic __inline void 33714037Sdysonpipeselwakeup(cpipe) 33814037Sdyson struct pipe *cpipe; 33914037Sdyson{ 34014037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 34114037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 34214037Sdyson selwakeup(&cpipe->pipe_sel); 34314037Sdyson } 34441086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 34541086Struckman pgsigio(cpipe->pipe_sigio, SIGIO, 0); 34659288Sjlemon KNOTE(&cpipe->pipe_sel.si_note, 0); 34714037Sdyson} 34814037Sdyson 34913675Sdyson/* ARGSUSED */ 35013675Sdysonstatic int 35151418Sgreenpipe_read(fp, uio, cred, flags, p) 35213675Sdyson struct file *fp; 35313675Sdyson struct uio *uio; 35413675Sdyson struct ucred *cred; 35551418Sgreen struct proc *p; 35645311Sdt int flags; 35713675Sdyson{ 35813675Sdyson 35913675Sdyson struct pipe *rpipe = (struct pipe *) fp->f_data; 36047748Salc int error; 36113675Sdyson int nread = 0; 36218863Sdyson u_int size; 36313675Sdyson 36413675Sdyson ++rpipe->pipe_busy; 36547748Salc error = pipelock(rpipe, 1); 36647748Salc if (error) 36747748Salc goto unlocked_error; 36847748Salc 36913675Sdyson while (uio->uio_resid) { 37013907Sdyson /* 37113907Sdyson * normal pipe buffer receive 37213907Sdyson */ 37313675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 37418863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 37513675Sdyson if (size > rpipe->pipe_buffer.cnt) 37613675Sdyson size = rpipe->pipe_buffer.cnt; 37718863Sdyson if (size > (u_int) uio->uio_resid) 37818863Sdyson size = (u_int) uio->uio_resid; 37947748Salc 38047748Salc error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 38113675Sdyson size, uio); 38213675Sdyson if (error) { 38313675Sdyson break; 38413675Sdyson } 38513675Sdyson rpipe->pipe_buffer.out += size; 38613675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 38713675Sdyson rpipe->pipe_buffer.out = 0; 38813675Sdyson 38913675Sdyson rpipe->pipe_buffer.cnt -= size; 39047748Salc 39147748Salc /* 39247748Salc * If there is no more to read in the pipe, reset 39347748Salc * its pointers to the beginning. This improves 39447748Salc * cache hit stats. 39547748Salc */ 39647748Salc if (rpipe->pipe_buffer.cnt == 0) { 39747748Salc rpipe->pipe_buffer.in = 0; 39847748Salc rpipe->pipe_buffer.out = 0; 39947748Salc } 40013675Sdyson nread += size; 40114037Sdyson#ifndef PIPE_NODIRECT 40213907Sdyson /* 40313907Sdyson * Direct copy, bypassing a kernel buffer. 40413907Sdyson */ 40513907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 40647748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 40747748Salc caddr_t va; 40818863Sdyson if (size > (u_int) uio->uio_resid) 40918863Sdyson size = (u_int) uio->uio_resid; 41047748Salc 41147748Salc va = (caddr_t) rpipe->pipe_map.kva + rpipe->pipe_map.pos; 41247748Salc error = uiomove(va, size, uio); 41313907Sdyson if (error) 41413907Sdyson break; 41513907Sdyson nread += size; 41613907Sdyson rpipe->pipe_map.pos += size; 41713907Sdyson rpipe->pipe_map.cnt -= size; 41813907Sdyson if (rpipe->pipe_map.cnt == 0) { 41913907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 42013907Sdyson wakeup(rpipe); 42113907Sdyson } 42214037Sdyson#endif 42313675Sdyson } else { 42413675Sdyson /* 42513675Sdyson * detect EOF condition 42613675Sdyson */ 42713675Sdyson if (rpipe->pipe_state & PIPE_EOF) { 42814802Sdyson /* XXX error = ? */ 42913675Sdyson break; 43013675Sdyson } 43143623Sdillon 43213675Sdyson /* 43313675Sdyson * If the "write-side" has been blocked, wake it up now. 43413675Sdyson */ 43513675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 43613675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 43713675Sdyson wakeup(rpipe); 43813675Sdyson } 43943623Sdillon 44043623Sdillon /* 44147748Salc * Break if some data was read. 44243623Sdillon */ 44347748Salc if (nread > 0) 44413675Sdyson break; 44516960Sdyson 44643623Sdillon /* 44747748Salc * Unlock the pipe buffer for our remaining processing. We 44847748Salc * will either break out with an error or we will sleep and 44947748Salc * relock to loop. 45043623Sdillon */ 45147748Salc pipeunlock(rpipe); 45243623Sdillon 45313675Sdyson /* 45447748Salc * Handle non-blocking mode operation or 45547748Salc * wait for more data. 45613675Sdyson */ 45747748Salc if (fp->f_flag & FNONBLOCK) 45847748Salc error = EAGAIN; 45947748Salc else { 46047748Salc rpipe->pipe_state |= PIPE_WANTR; 46147748Salc if ((error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) == 0) 46247748Salc error = pipelock(rpipe, 1); 46313675Sdyson } 46447748Salc if (error) 46547748Salc goto unlocked_error; 46613675Sdyson } 46713675Sdyson } 46847748Salc pipeunlock(rpipe); 46913675Sdyson 47024101Sbde if (error == 0) 47155112Sbde vfs_timestamp(&rpipe->pipe_atime); 47247748Salcunlocked_error: 47347748Salc --rpipe->pipe_busy; 47413913Sdyson 47547748Salc /* 47647748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 47747748Salc */ 47813675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 47913675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 48013675Sdyson wakeup(rpipe); 48113675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 48213675Sdyson /* 48347748Salc * Handle write blocking hysteresis. 48413675Sdyson */ 48513675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 48613675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 48713675Sdyson wakeup(rpipe); 48813675Sdyson } 48913675Sdyson } 49014037Sdyson 49114802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 49214037Sdyson pipeselwakeup(rpipe); 49314037Sdyson 49413675Sdyson return error; 49513675Sdyson} 49613675Sdyson 49714037Sdyson#ifndef PIPE_NODIRECT 49813907Sdyson/* 49913907Sdyson * Map the sending processes' buffer into kernel space and wire it. 50013907Sdyson * This is similar to a physical write operation. 50113907Sdyson */ 50213675Sdysonstatic int 50313907Sdysonpipe_build_write_buffer(wpipe, uio) 50413907Sdyson struct pipe *wpipe; 50513675Sdyson struct uio *uio; 50613675Sdyson{ 50718863Sdyson u_int size; 50813907Sdyson int i; 50913907Sdyson vm_offset_t addr, endaddr, paddr; 51013907Sdyson 51118863Sdyson size = (u_int) uio->uio_iov->iov_len; 51213907Sdyson if (size > wpipe->pipe_buffer.size) 51313907Sdyson size = wpipe->pipe_buffer.size; 51413907Sdyson 51540286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 51640286Sdg for(i = 0, addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 51713907Sdyson addr < endaddr; 51813907Sdyson addr += PAGE_SIZE, i+=1) { 51913907Sdyson 52013907Sdyson vm_page_t m; 52113907Sdyson 52251474Sdillon if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 52351474Sdillon (paddr = pmap_kextract(addr)) == 0) { 52413907Sdyson int j; 52513907Sdyson for(j=0;j<i;j++) 52640700Sdg vm_page_unwire(wpipe->pipe_map.ms[j], 1); 52713907Sdyson return EFAULT; 52813907Sdyson } 52913907Sdyson 53013907Sdyson m = PHYS_TO_VM_PAGE(paddr); 53113907Sdyson vm_page_wire(m); 53213907Sdyson wpipe->pipe_map.ms[i] = m; 53313907Sdyson } 53413907Sdyson 53513907Sdyson/* 53613907Sdyson * set up the control block 53713907Sdyson */ 53813907Sdyson wpipe->pipe_map.npages = i; 53913907Sdyson wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 54013907Sdyson wpipe->pipe_map.cnt = size; 54113907Sdyson 54213907Sdyson/* 54313907Sdyson * and map the buffer 54413907Sdyson */ 54513907Sdyson if (wpipe->pipe_map.kva == 0) { 54613912Sdyson /* 54713912Sdyson * We need to allocate space for an extra page because the 54813912Sdyson * address range might (will) span pages at times. 54913912Sdyson */ 55013907Sdyson wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 55113912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 55213912Sdyson amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 55313907Sdyson } 55413907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 55513907Sdyson wpipe->pipe_map.npages); 55613907Sdyson 55713907Sdyson/* 55813907Sdyson * and update the uio data 55913907Sdyson */ 56013907Sdyson 56113907Sdyson uio->uio_iov->iov_len -= size; 56213907Sdyson uio->uio_iov->iov_base += size; 56313907Sdyson if (uio->uio_iov->iov_len == 0) 56413907Sdyson uio->uio_iov++; 56513907Sdyson uio->uio_resid -= size; 56613907Sdyson uio->uio_offset += size; 56713907Sdyson return 0; 56813907Sdyson} 56913907Sdyson 57013907Sdyson/* 57113907Sdyson * unmap and unwire the process buffer 57213907Sdyson */ 57313907Sdysonstatic void 57413907Sdysonpipe_destroy_write_buffer(wpipe) 57513907Sdysonstruct pipe *wpipe; 57613907Sdyson{ 57713907Sdyson int i; 57817163Sdyson if (wpipe->pipe_map.kva) { 57917163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 58013907Sdyson 58113907Sdyson if (amountpipekva > MAXPIPEKVA) { 58213907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 58313907Sdyson wpipe->pipe_map.kva = 0; 58413907Sdyson kmem_free(kernel_map, kva, 58513912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 58613912Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 58713907Sdyson } 58813907Sdyson } 58913907Sdyson for (i=0;i<wpipe->pipe_map.npages;i++) 59040700Sdg vm_page_unwire(wpipe->pipe_map.ms[i], 1); 59113907Sdyson} 59213907Sdyson 59313907Sdyson/* 59413907Sdyson * In the case of a signal, the writing process might go away. This 59513907Sdyson * code copies the data into the circular buffer so that the source 59613907Sdyson * pages can be freed without loss of data. 59713907Sdyson */ 59813907Sdysonstatic void 59913907Sdysonpipe_clone_write_buffer(wpipe) 60013907Sdysonstruct pipe *wpipe; 60113907Sdyson{ 60213907Sdyson int size; 60313907Sdyson int pos; 60413907Sdyson 60513907Sdyson size = wpipe->pipe_map.cnt; 60613907Sdyson pos = wpipe->pipe_map.pos; 60713907Sdyson bcopy((caddr_t) wpipe->pipe_map.kva+pos, 60813907Sdyson (caddr_t) wpipe->pipe_buffer.buffer, 60913907Sdyson size); 61013907Sdyson 61113907Sdyson wpipe->pipe_buffer.in = size; 61213907Sdyson wpipe->pipe_buffer.out = 0; 61313907Sdyson wpipe->pipe_buffer.cnt = size; 61413907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 61513907Sdyson 61613907Sdyson pipe_destroy_write_buffer(wpipe); 61713907Sdyson} 61813907Sdyson 61913907Sdyson/* 62013907Sdyson * This implements the pipe buffer write mechanism. Note that only 62113907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 62213907Sdyson * If there are any characters in the pipe buffer, the direct write will 62313907Sdyson * be deferred until the receiving process grabs all of the bytes from 62413907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 62513907Sdyson */ 62613907Sdysonstatic int 62713907Sdysonpipe_direct_write(wpipe, uio) 62813907Sdyson struct pipe *wpipe; 62913907Sdyson struct uio *uio; 63013907Sdyson{ 63113907Sdyson int error; 63213951Sdysonretry: 63313907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 63413951Sdyson if ( wpipe->pipe_state & PIPE_WANTR) { 63513951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 63613951Sdyson wakeup(wpipe); 63713951Sdyson } 63813992Sdyson wpipe->pipe_state |= PIPE_WANTW; 63913907Sdyson error = tsleep(wpipe, 64013907Sdyson PRIBIO|PCATCH, "pipdww", 0); 64114802Sdyson if (error) 64213907Sdyson goto error1; 64314802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 64414802Sdyson error = EPIPE; 64514802Sdyson goto error1; 64614802Sdyson } 64713907Sdyson } 64813907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 64913951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 65013951Sdyson if ( wpipe->pipe_state & PIPE_WANTR) { 65113951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 65213951Sdyson wakeup(wpipe); 65313951Sdyson } 65413951Sdyson 65513992Sdyson wpipe->pipe_state |= PIPE_WANTW; 65613907Sdyson error = tsleep(wpipe, 65713907Sdyson PRIBIO|PCATCH, "pipdwc", 0); 65814802Sdyson if (error) 65913907Sdyson goto error1; 66014802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 66114802Sdyson error = EPIPE; 66214802Sdyson goto error1; 66313907Sdyson } 66413951Sdyson goto retry; 66513907Sdyson } 66613907Sdyson 66713951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 66813951Sdyson 66913907Sdyson error = pipe_build_write_buffer(wpipe, uio); 67013907Sdyson if (error) { 67113907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 67213907Sdyson goto error1; 67313907Sdyson } 67413907Sdyson 67513907Sdyson error = 0; 67613907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 67713907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 67813907Sdyson pipelock(wpipe, 0); 67913907Sdyson pipe_destroy_write_buffer(wpipe); 68013907Sdyson pipeunlock(wpipe); 68114037Sdyson pipeselwakeup(wpipe); 68214802Sdyson error = EPIPE; 68314802Sdyson goto error1; 68413907Sdyson } 68513992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 68613992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 68713992Sdyson wakeup(wpipe); 68813992Sdyson } 68914037Sdyson pipeselwakeup(wpipe); 69013907Sdyson error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwt", 0); 69113907Sdyson } 69213907Sdyson 69313907Sdyson pipelock(wpipe,0); 69413907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 69513907Sdyson /* 69613907Sdyson * this bit of trickery substitutes a kernel buffer for 69713907Sdyson * the process that might be going away. 69813907Sdyson */ 69913907Sdyson pipe_clone_write_buffer(wpipe); 70013907Sdyson } else { 70113907Sdyson pipe_destroy_write_buffer(wpipe); 70213907Sdyson } 70313907Sdyson pipeunlock(wpipe); 70413907Sdyson return error; 70513907Sdyson 70613907Sdysonerror1: 70713907Sdyson wakeup(wpipe); 70813907Sdyson return error; 70913907Sdyson} 71014037Sdyson#endif 71113907Sdyson 71216960Sdysonstatic int 71351418Sgreenpipe_write(fp, uio, cred, flags, p) 71416960Sdyson struct file *fp; 71513907Sdyson struct uio *uio; 71616960Sdyson struct ucred *cred; 71751418Sgreen struct proc *p; 71845311Sdt int flags; 71913907Sdyson{ 72013675Sdyson int error = 0; 72113913Sdyson int orig_resid; 72213675Sdyson 72316960Sdyson struct pipe *wpipe, *rpipe; 72416960Sdyson 72516960Sdyson rpipe = (struct pipe *) fp->f_data; 72616960Sdyson wpipe = rpipe->pipe_peer; 72716960Sdyson 72813675Sdyson /* 72913675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 73013675Sdyson */ 73116960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 73213774Sdyson return EPIPE; 73313675Sdyson } 73413675Sdyson 73517163Sdyson /* 73617163Sdyson * If it is advantageous to resize the pipe buffer, do 73717163Sdyson * so. 73817163Sdyson */ 73917163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 74017163Sdyson (nbigpipe < LIMITBIGPIPES) && 74117163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 74217163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 74317163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 74417163Sdyson 74517163Sdyson if (wpipe->pipe_buffer.buffer) { 74617163Sdyson amountpipekva -= wpipe->pipe_buffer.size; 74717163Sdyson kmem_free(kernel_map, 74817163Sdyson (vm_offset_t)wpipe->pipe_buffer.buffer, 74917163Sdyson wpipe->pipe_buffer.size); 75017163Sdyson } 75117163Sdyson 75217163Sdyson#ifndef PIPE_NODIRECT 75317163Sdyson if (wpipe->pipe_map.kva) { 75417163Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 75517163Sdyson kmem_free(kernel_map, 75617163Sdyson wpipe->pipe_map.kva, 75717163Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 75817163Sdyson } 75917163Sdyson#endif 76017163Sdyson 76117163Sdyson wpipe->pipe_buffer.in = 0; 76217163Sdyson wpipe->pipe_buffer.out = 0; 76317163Sdyson wpipe->pipe_buffer.cnt = 0; 76417163Sdyson wpipe->pipe_buffer.size = BIG_PIPE_SIZE; 76517163Sdyson wpipe->pipe_buffer.buffer = NULL; 76617163Sdyson ++nbigpipe; 76717163Sdyson 76817163Sdyson#ifndef PIPE_NODIRECT 76917163Sdyson wpipe->pipe_map.cnt = 0; 77017163Sdyson wpipe->pipe_map.kva = 0; 77117163Sdyson wpipe->pipe_map.pos = 0; 77217163Sdyson wpipe->pipe_map.npages = 0; 77317163Sdyson#endif 77417163Sdyson 77517163Sdyson } 77617163Sdyson 77717163Sdyson 77813907Sdyson if( wpipe->pipe_buffer.buffer == NULL) { 77913907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 78013907Sdyson pipespace(wpipe); 78113907Sdyson pipeunlock(wpipe); 78213907Sdyson } else { 78313907Sdyson return error; 78413907Sdyson } 78513907Sdyson } 78613907Sdyson 78713675Sdyson ++wpipe->pipe_busy; 78813913Sdyson orig_resid = uio->uio_resid; 78913675Sdyson while (uio->uio_resid) { 79013907Sdyson int space; 79114037Sdyson#ifndef PIPE_NODIRECT 79213907Sdyson /* 79313907Sdyson * If the transfer is large, we can gain performance if 79413907Sdyson * we do process-to-process copies directly. 79516416Sdyson * If the write is non-blocking, we don't use the 79616416Sdyson * direct write mechanism. 79758505Sdillon * 79858505Sdillon * The direct write mechanism will detect the reader going 79958505Sdillon * away on us. 80013907Sdyson */ 80117163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 80217163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 80317163Sdyson (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 80413907Sdyson (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 80513907Sdyson error = pipe_direct_write( wpipe, uio); 80613907Sdyson if (error) { 80713907Sdyson break; 80813907Sdyson } 80913907Sdyson continue; 81013907Sdyson } 81114037Sdyson#endif 81213907Sdyson 81313907Sdyson /* 81413907Sdyson * Pipe buffered writes cannot be coincidental with 81513907Sdyson * direct writes. We wait until the currently executing 81613907Sdyson * direct write is completed before we start filling the 81758505Sdillon * pipe buffer. We break out if a signal occurs or the 81858505Sdillon * reader goes away. 81913907Sdyson */ 82013907Sdyson retrywrite: 82113907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 82213992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 82313992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 82413992Sdyson wakeup(wpipe); 82513992Sdyson } 82658505Sdillon error = tsleep(wpipe, PRIBIO|PCATCH, "pipbww", 0); 82758505Sdillon if (wpipe->pipe_state & PIPE_EOF) 82858505Sdillon break; 82913907Sdyson if (error) 83013907Sdyson break; 83113907Sdyson } 83258505Sdillon if (wpipe->pipe_state & PIPE_EOF) { 83358505Sdillon error = EPIPE; 83458505Sdillon break; 83558505Sdillon } 83613907Sdyson 83713907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 83814644Sdyson 83914644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 84013913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 84113913Sdyson space = 0; 84213907Sdyson 84317163Sdyson if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 84413907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 84554534Stegge int size; /* Transfer size */ 84654534Stegge int segsize; /* first segment to transfer */ 84713907Sdyson /* 84813907Sdyson * It is possible for a direct write to 84913907Sdyson * slip in on us... handle it here... 85013907Sdyson */ 85113907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 85213907Sdyson pipeunlock(wpipe); 85313907Sdyson goto retrywrite; 85413907Sdyson } 85554534Stegge /* 85654534Stegge * If a process blocked in uiomove, our 85754534Stegge * value for space might be bad. 85858505Sdillon * 85958505Sdillon * XXX will we be ok if the reader has gone 86058505Sdillon * away here? 86154534Stegge */ 86254534Stegge if (space > wpipe->pipe_buffer.size - 86354534Stegge wpipe->pipe_buffer.cnt) { 86454534Stegge pipeunlock(wpipe); 86554534Stegge goto retrywrite; 86654534Stegge } 86754534Stegge 86854534Stegge /* 86954534Stegge * Transfer size is minimum of uio transfer 87054534Stegge * and free space in pipe buffer. 87154534Stegge */ 87254534Stegge if (space > uio->uio_resid) 87354534Stegge size = uio->uio_resid; 87454534Stegge else 87554534Stegge size = space; 87654534Stegge /* 87754534Stegge * First segment to transfer is minimum of 87854534Stegge * transfer size and contiguous space in 87954534Stegge * pipe buffer. If first segment to transfer 88054534Stegge * is less than the transfer size, we've got 88154534Stegge * a wraparound in the buffer. 88254534Stegge */ 88354534Stegge segsize = wpipe->pipe_buffer.size - 88454534Stegge wpipe->pipe_buffer.in; 88554534Stegge if (segsize > size) 88654534Stegge segsize = size; 88754534Stegge 88854534Stegge /* Transfer first segment */ 88954534Stegge 89054534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 89154534Stegge segsize, uio); 89254534Stegge 89354534Stegge if (error == 0 && segsize < size) { 89454534Stegge /* 89554534Stegge * Transfer remaining part now, to 89654534Stegge * support atomic writes. Wraparound 89754534Stegge * happened. 89854534Stegge */ 89954534Stegge if (wpipe->pipe_buffer.in + segsize != 90054534Stegge wpipe->pipe_buffer.size) 90154534Stegge panic("Expected pipe buffer wraparound disappeared"); 90254534Stegge 90354534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[0], 90454534Stegge size - segsize, uio); 90554534Stegge } 90654534Stegge if (error == 0) { 90754534Stegge wpipe->pipe_buffer.in += size; 90854534Stegge if (wpipe->pipe_buffer.in >= 90954534Stegge wpipe->pipe_buffer.size) { 91054534Stegge if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 91154534Stegge panic("Expected wraparound bad"); 91254534Stegge wpipe->pipe_buffer.in = size - segsize; 91354534Stegge } 91454534Stegge 91554534Stegge wpipe->pipe_buffer.cnt += size; 91654534Stegge if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 91754534Stegge panic("Pipe buffer overflow"); 91854534Stegge 91954534Stegge } 92013675Sdyson pipeunlock(wpipe); 92113675Sdyson } 92213675Sdyson if (error) 92313675Sdyson break; 92413675Sdyson 92513675Sdyson } else { 92613675Sdyson /* 92713675Sdyson * If the "read-side" has been blocked, wake it up now. 92813675Sdyson */ 92913675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 93013675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 93113675Sdyson wakeup(wpipe); 93213675Sdyson } 93314037Sdyson 93413675Sdyson /* 93513675Sdyson * don't block on non-blocking I/O 93613675Sdyson */ 93716960Sdyson if (fp->f_flag & FNONBLOCK) { 93813907Sdyson error = EAGAIN; 93913675Sdyson break; 94013675Sdyson } 94113907Sdyson 94214037Sdyson /* 94314037Sdyson * We have no more space and have something to offer, 94429356Speter * wake up select/poll. 94514037Sdyson */ 94614037Sdyson pipeselwakeup(wpipe); 94714037Sdyson 94813675Sdyson wpipe->pipe_state |= PIPE_WANTW; 94943301Sdillon if ((error = tsleep(wpipe, (PRIBIO+1)|PCATCH, "pipewr", 0)) != 0) { 95013675Sdyson break; 95113675Sdyson } 95213675Sdyson /* 95313675Sdyson * If read side wants to go away, we just issue a signal 95413675Sdyson * to ourselves. 95513675Sdyson */ 95613675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 95713774Sdyson error = EPIPE; 95813907Sdyson break; 95913675Sdyson } 96013675Sdyson } 96113675Sdyson } 96213675Sdyson 96314644Sdyson --wpipe->pipe_busy; 96413675Sdyson if ((wpipe->pipe_busy == 0) && 96513675Sdyson (wpipe->pipe_state & PIPE_WANT)) { 96613675Sdyson wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR); 96713675Sdyson wakeup(wpipe); 96813675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 96913675Sdyson /* 97013675Sdyson * If we have put any characters in the buffer, we wake up 97113675Sdyson * the reader. 97213675Sdyson */ 97313675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 97413675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 97513675Sdyson wakeup(wpipe); 97613675Sdyson } 97713675Sdyson } 97813909Sdyson 97913909Sdyson /* 98013909Sdyson * Don't return EPIPE if I/O was successful 98113909Sdyson */ 98213907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 98313907Sdyson (uio->uio_resid == 0) && 98413907Sdyson (error == EPIPE)) 98513907Sdyson error = 0; 98613913Sdyson 98724101Sbde if (error == 0) 98855112Sbde vfs_timestamp(&wpipe->pipe_mtime); 98924101Sbde 99014037Sdyson /* 99114037Sdyson * We have something to offer, 99229356Speter * wake up select/poll. 99314037Sdyson */ 99414177Sdyson if (wpipe->pipe_buffer.cnt) 99514037Sdyson pipeselwakeup(wpipe); 99613907Sdyson 99713675Sdyson return error; 99813675Sdyson} 99913675Sdyson 100013675Sdyson/* 100113675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 100213675Sdyson */ 100313675Sdysonint 100413675Sdysonpipe_ioctl(fp, cmd, data, p) 100513675Sdyson struct file *fp; 100636735Sdfr u_long cmd; 100713675Sdyson register caddr_t data; 100813675Sdyson struct proc *p; 100913675Sdyson{ 101013675Sdyson register struct pipe *mpipe = (struct pipe *)fp->f_data; 101113675Sdyson 101213675Sdyson switch (cmd) { 101313675Sdyson 101413675Sdyson case FIONBIO: 101513675Sdyson return (0); 101613675Sdyson 101713675Sdyson case FIOASYNC: 101813675Sdyson if (*(int *)data) { 101913675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 102013675Sdyson } else { 102113675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 102213675Sdyson } 102313675Sdyson return (0); 102413675Sdyson 102513675Sdyson case FIONREAD: 102614037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 102714037Sdyson *(int *)data = mpipe->pipe_map.cnt; 102814037Sdyson else 102914037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 103013675Sdyson return (0); 103113675Sdyson 103241086Struckman case FIOSETOWN: 103341086Struckman return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 103441086Struckman 103541086Struckman case FIOGETOWN: 103641086Struckman *(int *)data = fgetown(mpipe->pipe_sigio); 103713675Sdyson return (0); 103813675Sdyson 103941086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 104041086Struckman case TIOCSPGRP: 104141086Struckman return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 104241086Struckman 104341086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 104418863Sdyson case TIOCGPGRP: 104541086Struckman *(int *)data = -fgetown(mpipe->pipe_sigio); 104613675Sdyson return (0); 104713675Sdyson 104813675Sdyson } 104917124Sbde return (ENOTTY); 105013675Sdyson} 105113675Sdyson 105213675Sdysonint 105329356Speterpipe_poll(fp, events, cred, p) 105413675Sdyson struct file *fp; 105529356Speter int events; 105629356Speter struct ucred *cred; 105713675Sdyson struct proc *p; 105813675Sdyson{ 105913675Sdyson register struct pipe *rpipe = (struct pipe *)fp->f_data; 106013675Sdyson struct pipe *wpipe; 106129356Speter int revents = 0; 106213675Sdyson 106313675Sdyson wpipe = rpipe->pipe_peer; 106429356Speter if (events & (POLLIN | POLLRDNORM)) 106529356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 106629356Speter (rpipe->pipe_buffer.cnt > 0) || 106729356Speter (rpipe->pipe_state & PIPE_EOF)) 106829356Speter revents |= events & (POLLIN | POLLRDNORM); 106913675Sdyson 107029356Speter if (events & (POLLOUT | POLLWRNORM)) 107129356Speter if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 107243311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 107343311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 107429356Speter revents |= events & (POLLOUT | POLLWRNORM); 107513675Sdyson 107629356Speter if ((rpipe->pipe_state & PIPE_EOF) || 107729356Speter (wpipe == NULL) || 107829356Speter (wpipe->pipe_state & PIPE_EOF)) 107929356Speter revents |= POLLHUP; 108029356Speter 108129356Speter if (revents == 0) { 108229356Speter if (events & (POLLIN | POLLRDNORM)) { 108329356Speter selrecord(p, &rpipe->pipe_sel); 108429356Speter rpipe->pipe_state |= PIPE_SEL; 108513675Sdyson } 108613675Sdyson 108729356Speter if (events & (POLLOUT | POLLWRNORM)) { 108830164Speter selrecord(p, &wpipe->pipe_sel); 108930164Speter wpipe->pipe_state |= PIPE_SEL; 109013907Sdyson } 109113675Sdyson } 109229356Speter 109329356Speter return (revents); 109413675Sdyson} 109513675Sdyson 109652983Speterstatic int 109752983Speterpipe_stat(fp, ub, p) 109852983Speter struct file *fp; 109952983Speter struct stat *ub; 110052983Speter struct proc *p; 110113675Sdyson{ 110252983Speter struct pipe *pipe = (struct pipe *)fp->f_data; 110352983Speter 110413675Sdyson bzero((caddr_t)ub, sizeof (*ub)); 110517124Sbde ub->st_mode = S_IFIFO; 110613907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 110713675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 110813675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 110934901Sphk ub->st_atimespec = pipe->pipe_atime; 111034901Sphk ub->st_mtimespec = pipe->pipe_mtime; 111134901Sphk ub->st_ctimespec = pipe->pipe_ctime; 111260404Schris ub->st_uid = fp->f_cred->cr_uid; 111360404Schris ub->st_gid = fp->f_cred->cr_gid; 111417124Sbde /* 111560404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 111617124Sbde * XXX (st_dev, st_ino) should be unique. 111717124Sbde */ 111813675Sdyson return 0; 111913675Sdyson} 112013675Sdyson 112113675Sdyson/* ARGSUSED */ 112213675Sdysonstatic int 112313675Sdysonpipe_close(fp, p) 112413675Sdyson struct file *fp; 112513675Sdyson struct proc *p; 112613675Sdyson{ 112713675Sdyson struct pipe *cpipe = (struct pipe *)fp->f_data; 112816322Sgpalmer 112949413Sgreen fp->f_ops = &badfileops; 113049413Sgreen fp->f_data = NULL; 113141086Struckman funsetown(cpipe->pipe_sigio); 113213675Sdyson pipeclose(cpipe); 113313675Sdyson return 0; 113413675Sdyson} 113513675Sdyson 113613675Sdyson/* 113713675Sdyson * shutdown the pipe 113813675Sdyson */ 113913675Sdysonstatic void 114013675Sdysonpipeclose(cpipe) 114113675Sdyson struct pipe *cpipe; 114213675Sdyson{ 114313907Sdyson struct pipe *ppipe; 114413675Sdyson if (cpipe) { 114513907Sdyson 114614037Sdyson pipeselwakeup(cpipe); 114713907Sdyson 114813675Sdyson /* 114913675Sdyson * If the other side is blocked, wake it up saying that 115013675Sdyson * we want to close it down. 115113675Sdyson */ 115213675Sdyson while (cpipe->pipe_busy) { 115313675Sdyson wakeup(cpipe); 115413675Sdyson cpipe->pipe_state |= PIPE_WANT|PIPE_EOF; 115513675Sdyson tsleep(cpipe, PRIBIO, "pipecl", 0); 115613675Sdyson } 115713675Sdyson 115813675Sdyson /* 115913675Sdyson * Disconnect from peer 116013675Sdyson */ 116143301Sdillon if ((ppipe = cpipe->pipe_peer) != NULL) { 116214037Sdyson pipeselwakeup(ppipe); 116313907Sdyson 116413907Sdyson ppipe->pipe_state |= PIPE_EOF; 116513907Sdyson wakeup(ppipe); 116613907Sdyson ppipe->pipe_peer = NULL; 116713675Sdyson } 116813675Sdyson 116913675Sdyson /* 117013675Sdyson * free resources 117113675Sdyson */ 117213907Sdyson if (cpipe->pipe_buffer.buffer) { 117317163Sdyson if (cpipe->pipe_buffer.size > PIPE_SIZE) 117417163Sdyson --nbigpipe; 117513907Sdyson amountpipekva -= cpipe->pipe_buffer.size; 117613907Sdyson kmem_free(kernel_map, 117713907Sdyson (vm_offset_t)cpipe->pipe_buffer.buffer, 117813907Sdyson cpipe->pipe_buffer.size); 117913907Sdyson } 118014037Sdyson#ifndef PIPE_NODIRECT 118113907Sdyson if (cpipe->pipe_map.kva) { 118213912Sdyson amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 118313907Sdyson kmem_free(kernel_map, 118413907Sdyson cpipe->pipe_map.kva, 118513912Sdyson cpipe->pipe_buffer.size + PAGE_SIZE); 118613907Sdyson } 118714037Sdyson#endif 118827899Sdyson zfree(pipe_zone, cpipe); 118913675Sdyson } 119013675Sdyson} 119159288Sjlemon 119272521Sjlemon/*ARGSUSED*/ 119359288Sjlemonstatic int 119472521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 119559288Sjlemon{ 119659288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 119759288Sjlemon 119872521Sjlemon switch (kn->kn_filter) { 119972521Sjlemon case EVFILT_READ: 120072521Sjlemon kn->kn_fop = &pipe_rfiltops; 120172521Sjlemon break; 120272521Sjlemon case EVFILT_WRITE: 120372521Sjlemon kn->kn_fop = &pipe_wfiltops; 120472521Sjlemon break; 120572521Sjlemon default: 120672521Sjlemon return (1); 120772521Sjlemon } 120872521Sjlemon 120959288Sjlemon SLIST_INSERT_HEAD(&rpipe->pipe_sel.si_note, kn, kn_selnext); 121059288Sjlemon return (0); 121159288Sjlemon} 121259288Sjlemon 121359288Sjlemonstatic void 121459288Sjlemonfilt_pipedetach(struct knote *kn) 121559288Sjlemon{ 121659288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 121759288Sjlemon 121860938Sjake SLIST_REMOVE(&rpipe->pipe_sel.si_note, kn, knote, kn_selnext); 121959288Sjlemon} 122059288Sjlemon 122159288Sjlemon/*ARGSUSED*/ 122259288Sjlemonstatic int 122359288Sjlemonfilt_piperead(struct knote *kn, long hint) 122459288Sjlemon{ 122559288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 122659288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 122759288Sjlemon 122859288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 122959288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 123059288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 123159288Sjlemon 123259288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 123359288Sjlemon (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 123459288Sjlemon kn->kn_flags |= EV_EOF; 123559288Sjlemon return (1); 123659288Sjlemon } 123759288Sjlemon return (kn->kn_data > 0); 123859288Sjlemon} 123959288Sjlemon 124059288Sjlemon/*ARGSUSED*/ 124159288Sjlemonstatic int 124259288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 124359288Sjlemon{ 124459288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 124559288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 124659288Sjlemon 124759288Sjlemon if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 124859288Sjlemon kn->kn_data = 0; 124959288Sjlemon kn->kn_flags |= EV_EOF; 125059288Sjlemon return (1); 125159288Sjlemon } 125259288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 125365855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 125459288Sjlemon kn->kn_data = 0; 125559288Sjlemon 125659288Sjlemon return (kn->kn_data >= PIPE_BUF); 125759288Sjlemon} 1258