sys_pipe.c revision 76364
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson * 1950477Speter * $FreeBSD: head/sys/kern/sys_pipe.c 76364 2001-05-08 09:09:18Z alfred $ 2013675Sdyson */ 2113675Sdyson 2213675Sdyson/* 2313675Sdyson * This file contains a high-performance replacement for the socket-based 2413675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2513675Sdyson * all features of sockets, but does do everything that pipes normally 2613675Sdyson * do. 2713675Sdyson */ 2813675Sdyson 2913907Sdyson/* 3013907Sdyson * This code has two modes of operation, a small write mode and a large 3113907Sdyson * write mode. The small write mode acts like conventional pipes with 3213907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3313907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3413907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3513907Sdyson * the receiving process can copy it directly from the pages in the sending 3613907Sdyson * process. 3713907Sdyson * 3813907Sdyson * If the sending process receives a signal, it is possible that it will 3913913Sdyson * go away, and certainly its address space can change, because control 4013907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4113907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4213907Sdyson * kernel buffer, and the receiving process will grab the data from the 4313907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4413907Sdyson * the copy operation is normally eliminated. 4513907Sdyson * 4613907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4713907Sdyson * happen for small transfers so that the system will not spend all of 4813913Sdyson * its time context switching. PIPE_SIZE is constrained by the 4913907Sdyson * amount of kernel virtual memory. 5013907Sdyson */ 5113907Sdyson 5213675Sdyson#include <sys/param.h> 5313675Sdyson#include <sys/systm.h> 5424131Sbde#include <sys/fcntl.h> 5513675Sdyson#include <sys/file.h> 5613675Sdyson#include <sys/filedesc.h> 5724206Sbde#include <sys/filio.h> 5876166Smarkm#include <sys/lock.h> 5924206Sbde#include <sys/ttycom.h> 6013675Sdyson#include <sys/stat.h> 6129356Speter#include <sys/poll.h> 6270834Swollman#include <sys/selinfo.h> 6313675Sdyson#include <sys/signalvar.h> 6413675Sdyson#include <sys/sysproto.h> 6513675Sdyson#include <sys/pipe.h> 6676166Smarkm#include <sys/proc.h> 6755112Sbde#include <sys/vnode.h> 6834924Sbde#include <sys/uio.h> 6959288Sjlemon#include <sys/event.h> 7013675Sdyson 7113675Sdyson#include <vm/vm.h> 7213675Sdyson#include <vm/vm_param.h> 7313675Sdyson#include <vm/vm_object.h> 7413675Sdyson#include <vm/vm_kern.h> 7513675Sdyson#include <vm/vm_extern.h> 7613675Sdyson#include <vm/pmap.h> 7713675Sdyson#include <vm/vm_map.h> 7813907Sdyson#include <vm/vm_page.h> 7927899Sdyson#include <vm/vm_zone.h> 8013675Sdyson 8114037Sdyson/* 8214037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 8314037Sdyson * approx 30% decrease in transfer rate. This could be useful for 8414037Sdyson * NetBSD or OpenBSD. 8514037Sdyson */ 8614037Sdyson/* #define PIPE_NODIRECT */ 8714037Sdyson 8814037Sdyson/* 8914037Sdyson * interfaces to the outside world 9014037Sdyson */ 9113675Sdysonstatic int pipe_read __P((struct file *fp, struct uio *uio, 9251418Sgreen struct ucred *cred, int flags, struct proc *p)); 9313675Sdysonstatic int pipe_write __P((struct file *fp, struct uio *uio, 9451418Sgreen struct ucred *cred, int flags, struct proc *p)); 9513675Sdysonstatic int pipe_close __P((struct file *fp, struct proc *p)); 9629356Speterstatic int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 9729356Speter struct proc *p)); 9872521Sjlemonstatic int pipe_kqfilter __P((struct file *fp, struct knote *kn)); 9952983Speterstatic int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 10036735Sdfrstatic int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 10113675Sdyson 10272521Sjlemonstatic struct fileops pipeops = { 10372521Sjlemon pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 10472521Sjlemon pipe_stat, pipe_close 10572521Sjlemon}; 10613675Sdyson 10759288Sjlemonstatic void filt_pipedetach(struct knote *kn); 10859288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 10959288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 11059288Sjlemon 11172521Sjlemonstatic struct filterops pipe_rfiltops = 11272521Sjlemon { 1, NULL, filt_pipedetach, filt_piperead }; 11372521Sjlemonstatic struct filterops pipe_wfiltops = 11472521Sjlemon { 1, NULL, filt_pipedetach, filt_pipewrite }; 11559288Sjlemon 11672521Sjlemon 11713675Sdyson/* 11813675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 11913675Sdyson * space is pageable. The pipe code will try to maintain locality of 12013675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 12113675Sdyson * will not wipe the cache. 12213675Sdyson */ 12313907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 12413907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 12513675Sdyson 12613907Sdyson/* 12713907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 12813907Sdyson * is there so that on large systems, we don't exhaust it. 12913907Sdyson */ 13013907Sdyson#define MAXPIPEKVA (8*1024*1024) 13113907Sdyson 13213907Sdyson/* 13313907Sdyson * Limit for direct transfers, we cannot, of course limit 13413907Sdyson * the amount of kva for pipes in general though. 13513907Sdyson */ 13613907Sdyson#define LIMITPIPEKVA (16*1024*1024) 13717163Sdyson 13817163Sdyson/* 13917163Sdyson * Limit the number of "big" pipes 14017163Sdyson */ 14117163Sdyson#define LIMITBIGPIPES 32 14233181Seivindstatic int nbigpipe; 14317163Sdyson 14417124Sbdestatic int amountpipekva; 14513907Sdyson 14613675Sdysonstatic void pipeclose __P((struct pipe *cpipe)); 14776364Salfredstatic void pipe_free_kmem __P((struct pipe *cpipe)); 14876364Salfredstatic int pipe_create __P((struct pipe **cpipep)); 14913907Sdysonstatic __inline int pipelock __P((struct pipe *cpipe, int catch)); 15013675Sdysonstatic __inline void pipeunlock __P((struct pipe *cpipe)); 15114122Speterstatic __inline void pipeselwakeup __P((struct pipe *cpipe)); 15214037Sdyson#ifndef PIPE_NODIRECT 15313907Sdysonstatic int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 15413907Sdysonstatic void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 15513907Sdysonstatic int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 15613907Sdysonstatic void pipe_clone_write_buffer __P((struct pipe *wpipe)); 15714037Sdyson#endif 15876364Salfredstatic int pipespace __P((struct pipe *cpipe, int size)); 15913675Sdyson 16033181Seivindstatic vm_zone_t pipe_zone; 16127899Sdyson 16213675Sdyson/* 16313675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 16413675Sdyson */ 16513675Sdyson 16613675Sdyson/* ARGSUSED */ 16713675Sdysonint 16830994Sphkpipe(p, uap) 16913675Sdyson struct proc *p; 17013675Sdyson struct pipe_args /* { 17113675Sdyson int dummy; 17213675Sdyson } */ *uap; 17313675Sdyson{ 17476364Salfred struct filedesc *fdp = p->p_fd; 17513675Sdyson struct file *rf, *wf; 17613675Sdyson struct pipe *rpipe, *wpipe; 17713675Sdyson int fd, error; 17813675Sdyson 17927899Sdyson if (pipe_zone == NULL) 18027923Sdyson pipe_zone = zinit("PIPE", sizeof (struct pipe), 0, 0, 4); 18127899Sdyson 18276364Salfred if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 18376364Salfred pipeclose(rpipe); 18476364Salfred pipeclose(wpipe); 18576364Salfred return (ENFILE); 18676364Salfred } 18776364Salfred 18813907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 18913907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 19013675Sdyson 19170915Sdwmalone error = falloc(p, &rf, &fd); 19270915Sdwmalone if (error) { 19370915Sdwmalone pipeclose(rpipe); 19470915Sdwmalone pipeclose(wpipe); 19570915Sdwmalone return (error); 19670915Sdwmalone } 19770915Sdwmalone fhold(rf); 19870915Sdwmalone p->p_retval[0] = fd; 19970915Sdwmalone 20070803Sdwmalone /* 20170803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 20270803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 20370803Sdwmalone * to avoid races against processes which manage to dup() the read 20470803Sdwmalone * side while we are blocked trying to allocate the write side. 20570803Sdwmalone */ 20613675Sdyson rf->f_flag = FREAD | FWRITE; 20713675Sdyson rf->f_type = DTYPE_PIPE; 20849413Sgreen rf->f_data = (caddr_t)rpipe; 20913675Sdyson rf->f_ops = &pipeops; 21013675Sdyson error = falloc(p, &wf, &fd); 21170915Sdwmalone if (error) { 21270915Sdwmalone if (fdp->fd_ofiles[p->p_retval[0]] == rf) { 21370915Sdwmalone fdp->fd_ofiles[p->p_retval[0]] = NULL; 21470915Sdwmalone fdrop(rf, p); 21570915Sdwmalone } 21670915Sdwmalone fdrop(rf, p); 21770915Sdwmalone /* rpipe has been closed by fdrop(). */ 21870915Sdwmalone pipeclose(wpipe); 21970915Sdwmalone return (error); 22070915Sdwmalone } 22113675Sdyson wf->f_flag = FREAD | FWRITE; 22213675Sdyson wf->f_type = DTYPE_PIPE; 22349413Sgreen wf->f_data = (caddr_t)wpipe; 22413675Sdyson wf->f_ops = &pipeops; 22530994Sphk p->p_retval[1] = fd; 22613675Sdyson 22713675Sdyson rpipe->pipe_peer = wpipe; 22813675Sdyson wpipe->pipe_peer = rpipe; 22968883Sdillon fdrop(rf, p); 23013675Sdyson 23113675Sdyson return (0); 23213675Sdyson} 23313675Sdyson 23413909Sdyson/* 23513909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 23676364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 23776364Salfred * it will retain the old buffer. 23876364Salfred * If it fails it will return ENOMEM. 23913909Sdyson */ 24076364Salfredstatic int 24176364Salfredpipespace(cpipe, size) 24213675Sdyson struct pipe *cpipe; 24376364Salfred int size; 24413675Sdyson{ 24576364Salfred struct vm_object *object; 24676364Salfred caddr_t buffer; 24713688Sdyson int npages, error; 24813675Sdyson 24976364Salfred npages = round_page(size)/PAGE_SIZE; 25013675Sdyson /* 25113675Sdyson * Create an object, I don't like the idea of paging to/from 25213675Sdyson * kernel_object. 25314037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 25413675Sdyson */ 25576364Salfred object = vm_object_allocate(OBJT_DEFAULT, npages); 25676364Salfred buffer = (caddr_t) vm_map_min(kernel_map); 25713675Sdyson 25813675Sdyson /* 25913675Sdyson * Insert the object into the kernel map, and allocate kva for it. 26013675Sdyson * The map entry is, by default, pageable. 26114037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 26213675Sdyson */ 26376364Salfred error = vm_map_find(kernel_map, object, 0, 26476364Salfred (vm_offset_t *) &buffer, size, 1, 26513688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 26613675Sdyson 26776364Salfred if (error != KERN_SUCCESS) { 26876364Salfred vm_object_deallocate(object); 26976364Salfred return (ENOMEM); 27076364Salfred } 27176364Salfred 27276364Salfred /* free old resources if we're resizing */ 27376364Salfred pipe_free_kmem(cpipe); 27476364Salfred cpipe->pipe_buffer.object = object; 27576364Salfred cpipe->pipe_buffer.buffer = buffer; 27676364Salfred cpipe->pipe_buffer.size = size; 27776364Salfred cpipe->pipe_buffer.in = 0; 27876364Salfred cpipe->pipe_buffer.out = 0; 27976364Salfred cpipe->pipe_buffer.cnt = 0; 28013907Sdyson amountpipekva += cpipe->pipe_buffer.size; 28176364Salfred return (0); 28213907Sdyson} 28313688Sdyson 28413907Sdyson/* 28513907Sdyson * initialize and allocate VM and memory for pipe 28613907Sdyson */ 28776364Salfredstatic int 28876364Salfredpipe_create(cpipep) 28976364Salfred struct pipe **cpipep; 29076364Salfred{ 29113907Sdyson struct pipe *cpipe; 29276364Salfred int error; 29313907Sdyson 29476364Salfred *cpipep = zalloc(pipe_zone); 29576364Salfred if (*cpipep == NULL) 29676364Salfred return (ENOMEM); 29717163Sdyson 29876364Salfred cpipe = *cpipep; 29976364Salfred 30076364Salfred /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ 30176364Salfred cpipe->pipe_buffer.object = NULL; 30276364Salfred#ifndef PIPE_NODIRECT 30376364Salfred cpipe->pipe_map.kva = NULL; 30476364Salfred#endif 30576364Salfred /* 30676364Salfred * protect so pipeclose() doesn't follow a junk pointer 30776364Salfred * if pipespace() fails. 30876364Salfred */ 30913675Sdyson cpipe->pipe_state = 0; 31013675Sdyson cpipe->pipe_peer = NULL; 31113675Sdyson cpipe->pipe_busy = 0; 31213907Sdyson 31314037Sdyson#ifndef PIPE_NODIRECT 31413907Sdyson /* 31513907Sdyson * pipe data structure initializations to support direct pipe I/O 31613907Sdyson */ 31713907Sdyson cpipe->pipe_map.cnt = 0; 31813907Sdyson cpipe->pipe_map.kva = 0; 31913907Sdyson cpipe->pipe_map.pos = 0; 32013907Sdyson cpipe->pipe_map.npages = 0; 32117124Sbde /* cpipe->pipe_map.ms[] = invalid */ 32214037Sdyson#endif 32376364Salfred 32476364Salfred error = pipespace(cpipe, PIPE_SIZE); 32576364Salfred if (error) { 32676364Salfred return (error); 32776364Salfred } 32876364Salfred 32976364Salfred vfs_timestamp(&cpipe->pipe_ctime); 33076364Salfred cpipe->pipe_atime = cpipe->pipe_ctime; 33176364Salfred cpipe->pipe_mtime = cpipe->pipe_ctime; 33276364Salfred bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); 33376364Salfred 33476364Salfred return (0); 33513675Sdyson} 33613675Sdyson 33713675Sdyson 33813675Sdyson/* 33913675Sdyson * lock a pipe for I/O, blocking other access 34013675Sdyson */ 34113675Sdysonstatic __inline int 34213907Sdysonpipelock(cpipe, catch) 34313675Sdyson struct pipe *cpipe; 34413907Sdyson int catch; 34513675Sdyson{ 34613776Sdyson int error; 34776364Salfred 34813675Sdyson while (cpipe->pipe_state & PIPE_LOCK) { 34913675Sdyson cpipe->pipe_state |= PIPE_LWANT; 35043301Sdillon if ((error = tsleep( cpipe, 35143301Sdillon catch?(PRIBIO|PCATCH):PRIBIO, "pipelk", 0)) != 0) { 35213776Sdyson return error; 35313675Sdyson } 35413675Sdyson } 35513675Sdyson cpipe->pipe_state |= PIPE_LOCK; 35613675Sdyson return 0; 35713675Sdyson} 35813675Sdyson 35913675Sdyson/* 36013675Sdyson * unlock a pipe I/O lock 36113675Sdyson */ 36213675Sdysonstatic __inline void 36313675Sdysonpipeunlock(cpipe) 36413675Sdyson struct pipe *cpipe; 36513675Sdyson{ 36676364Salfred 36713675Sdyson cpipe->pipe_state &= ~PIPE_LOCK; 36813675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 36913675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 37014177Sdyson wakeup(cpipe); 37113675Sdyson } 37213675Sdyson} 37313675Sdyson 37414037Sdysonstatic __inline void 37514037Sdysonpipeselwakeup(cpipe) 37614037Sdyson struct pipe *cpipe; 37714037Sdyson{ 37876364Salfred 37914037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 38014037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 38114037Sdyson selwakeup(&cpipe->pipe_sel); 38214037Sdyson } 38341086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 38441086Struckman pgsigio(cpipe->pipe_sigio, SIGIO, 0); 38559288Sjlemon KNOTE(&cpipe->pipe_sel.si_note, 0); 38614037Sdyson} 38714037Sdyson 38813675Sdyson/* ARGSUSED */ 38913675Sdysonstatic int 39051418Sgreenpipe_read(fp, uio, cred, flags, p) 39113675Sdyson struct file *fp; 39213675Sdyson struct uio *uio; 39313675Sdyson struct ucred *cred; 39451418Sgreen struct proc *p; 39545311Sdt int flags; 39613675Sdyson{ 39713675Sdyson struct pipe *rpipe = (struct pipe *) fp->f_data; 39847748Salc int error; 39913675Sdyson int nread = 0; 40018863Sdyson u_int size; 40113675Sdyson 40213675Sdyson ++rpipe->pipe_busy; 40347748Salc error = pipelock(rpipe, 1); 40447748Salc if (error) 40547748Salc goto unlocked_error; 40647748Salc 40713675Sdyson while (uio->uio_resid) { 40813907Sdyson /* 40913907Sdyson * normal pipe buffer receive 41013907Sdyson */ 41113675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 41218863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 41313675Sdyson if (size > rpipe->pipe_buffer.cnt) 41413675Sdyson size = rpipe->pipe_buffer.cnt; 41518863Sdyson if (size > (u_int) uio->uio_resid) 41618863Sdyson size = (u_int) uio->uio_resid; 41747748Salc 41847748Salc error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 41913675Sdyson size, uio); 42013675Sdyson if (error) { 42113675Sdyson break; 42213675Sdyson } 42313675Sdyson rpipe->pipe_buffer.out += size; 42413675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 42513675Sdyson rpipe->pipe_buffer.out = 0; 42613675Sdyson 42713675Sdyson rpipe->pipe_buffer.cnt -= size; 42847748Salc 42947748Salc /* 43047748Salc * If there is no more to read in the pipe, reset 43147748Salc * its pointers to the beginning. This improves 43247748Salc * cache hit stats. 43347748Salc */ 43447748Salc if (rpipe->pipe_buffer.cnt == 0) { 43547748Salc rpipe->pipe_buffer.in = 0; 43647748Salc rpipe->pipe_buffer.out = 0; 43747748Salc } 43813675Sdyson nread += size; 43914037Sdyson#ifndef PIPE_NODIRECT 44013907Sdyson /* 44113907Sdyson * Direct copy, bypassing a kernel buffer. 44213907Sdyson */ 44313907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 44447748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 44547748Salc caddr_t va; 44618863Sdyson if (size > (u_int) uio->uio_resid) 44718863Sdyson size = (u_int) uio->uio_resid; 44847748Salc 44947748Salc va = (caddr_t) rpipe->pipe_map.kva + rpipe->pipe_map.pos; 45047748Salc error = uiomove(va, size, uio); 45113907Sdyson if (error) 45213907Sdyson break; 45313907Sdyson nread += size; 45413907Sdyson rpipe->pipe_map.pos += size; 45513907Sdyson rpipe->pipe_map.cnt -= size; 45613907Sdyson if (rpipe->pipe_map.cnt == 0) { 45713907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 45813907Sdyson wakeup(rpipe); 45913907Sdyson } 46014037Sdyson#endif 46113675Sdyson } else { 46213675Sdyson /* 46313675Sdyson * detect EOF condition 46413675Sdyson */ 46513675Sdyson if (rpipe->pipe_state & PIPE_EOF) { 46614802Sdyson /* XXX error = ? */ 46713675Sdyson break; 46813675Sdyson } 46943623Sdillon 47013675Sdyson /* 47113675Sdyson * If the "write-side" has been blocked, wake it up now. 47213675Sdyson */ 47313675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 47413675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 47513675Sdyson wakeup(rpipe); 47613675Sdyson } 47743623Sdillon 47843623Sdillon /* 47947748Salc * Break if some data was read. 48043623Sdillon */ 48147748Salc if (nread > 0) 48213675Sdyson break; 48316960Sdyson 48443623Sdillon /* 48547748Salc * Unlock the pipe buffer for our remaining processing. We 48647748Salc * will either break out with an error or we will sleep and 48747748Salc * relock to loop. 48843623Sdillon */ 48947748Salc pipeunlock(rpipe); 49043623Sdillon 49113675Sdyson /* 49247748Salc * Handle non-blocking mode operation or 49347748Salc * wait for more data. 49413675Sdyson */ 49547748Salc if (fp->f_flag & FNONBLOCK) 49647748Salc error = EAGAIN; 49747748Salc else { 49847748Salc rpipe->pipe_state |= PIPE_WANTR; 49947748Salc if ((error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) == 0) 50047748Salc error = pipelock(rpipe, 1); 50113675Sdyson } 50247748Salc if (error) 50347748Salc goto unlocked_error; 50413675Sdyson } 50513675Sdyson } 50647748Salc pipeunlock(rpipe); 50713675Sdyson 50824101Sbde if (error == 0) 50955112Sbde vfs_timestamp(&rpipe->pipe_atime); 51047748Salcunlocked_error: 51147748Salc --rpipe->pipe_busy; 51213913Sdyson 51347748Salc /* 51447748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 51547748Salc */ 51613675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 51713675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 51813675Sdyson wakeup(rpipe); 51913675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 52013675Sdyson /* 52147748Salc * Handle write blocking hysteresis. 52213675Sdyson */ 52313675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 52413675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 52513675Sdyson wakeup(rpipe); 52613675Sdyson } 52713675Sdyson } 52814037Sdyson 52914802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 53014037Sdyson pipeselwakeup(rpipe); 53114037Sdyson 53213675Sdyson return error; 53313675Sdyson} 53413675Sdyson 53514037Sdyson#ifndef PIPE_NODIRECT 53613907Sdyson/* 53713907Sdyson * Map the sending processes' buffer into kernel space and wire it. 53813907Sdyson * This is similar to a physical write operation. 53913907Sdyson */ 54013675Sdysonstatic int 54113907Sdysonpipe_build_write_buffer(wpipe, uio) 54213907Sdyson struct pipe *wpipe; 54313675Sdyson struct uio *uio; 54413675Sdyson{ 54518863Sdyson u_int size; 54613907Sdyson int i; 54713907Sdyson vm_offset_t addr, endaddr, paddr; 54813907Sdyson 54918863Sdyson size = (u_int) uio->uio_iov->iov_len; 55013907Sdyson if (size > wpipe->pipe_buffer.size) 55113907Sdyson size = wpipe->pipe_buffer.size; 55213907Sdyson 55340286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 55440286Sdg for(i = 0, addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 55513907Sdyson addr < endaddr; 55613907Sdyson addr += PAGE_SIZE, i+=1) { 55713907Sdyson 55813907Sdyson vm_page_t m; 55913907Sdyson 56051474Sdillon if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 56151474Sdillon (paddr = pmap_kextract(addr)) == 0) { 56213907Sdyson int j; 56313907Sdyson for(j=0;j<i;j++) 56440700Sdg vm_page_unwire(wpipe->pipe_map.ms[j], 1); 56513907Sdyson return EFAULT; 56613907Sdyson } 56713907Sdyson 56813907Sdyson m = PHYS_TO_VM_PAGE(paddr); 56913907Sdyson vm_page_wire(m); 57013907Sdyson wpipe->pipe_map.ms[i] = m; 57113907Sdyson } 57213907Sdyson 57313907Sdyson/* 57413907Sdyson * set up the control block 57513907Sdyson */ 57613907Sdyson wpipe->pipe_map.npages = i; 57713907Sdyson wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 57813907Sdyson wpipe->pipe_map.cnt = size; 57913907Sdyson 58013907Sdyson/* 58113907Sdyson * and map the buffer 58213907Sdyson */ 58313907Sdyson if (wpipe->pipe_map.kva == 0) { 58413912Sdyson /* 58513912Sdyson * We need to allocate space for an extra page because the 58613912Sdyson * address range might (will) span pages at times. 58713912Sdyson */ 58813907Sdyson wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 58913912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 59013912Sdyson amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 59113907Sdyson } 59213907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 59313907Sdyson wpipe->pipe_map.npages); 59413907Sdyson 59513907Sdyson/* 59613907Sdyson * and update the uio data 59713907Sdyson */ 59813907Sdyson 59913907Sdyson uio->uio_iov->iov_len -= size; 60013907Sdyson uio->uio_iov->iov_base += size; 60113907Sdyson if (uio->uio_iov->iov_len == 0) 60213907Sdyson uio->uio_iov++; 60313907Sdyson uio->uio_resid -= size; 60413907Sdyson uio->uio_offset += size; 60513907Sdyson return 0; 60613907Sdyson} 60713907Sdyson 60813907Sdyson/* 60913907Sdyson * unmap and unwire the process buffer 61013907Sdyson */ 61113907Sdysonstatic void 61213907Sdysonpipe_destroy_write_buffer(wpipe) 61313907Sdysonstruct pipe *wpipe; 61413907Sdyson{ 61513907Sdyson int i; 61676364Salfred 61717163Sdyson if (wpipe->pipe_map.kva) { 61817163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 61913907Sdyson 62013907Sdyson if (amountpipekva > MAXPIPEKVA) { 62113907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 62213907Sdyson wpipe->pipe_map.kva = 0; 62313907Sdyson kmem_free(kernel_map, kva, 62413912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 62513912Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 62613907Sdyson } 62713907Sdyson } 62813907Sdyson for (i=0;i<wpipe->pipe_map.npages;i++) 62940700Sdg vm_page_unwire(wpipe->pipe_map.ms[i], 1); 63013907Sdyson} 63113907Sdyson 63213907Sdyson/* 63313907Sdyson * In the case of a signal, the writing process might go away. This 63413907Sdyson * code copies the data into the circular buffer so that the source 63513907Sdyson * pages can be freed without loss of data. 63613907Sdyson */ 63713907Sdysonstatic void 63813907Sdysonpipe_clone_write_buffer(wpipe) 63976364Salfred struct pipe *wpipe; 64013907Sdyson{ 64113907Sdyson int size; 64213907Sdyson int pos; 64313907Sdyson 64413907Sdyson size = wpipe->pipe_map.cnt; 64513907Sdyson pos = wpipe->pipe_map.pos; 64613907Sdyson bcopy((caddr_t) wpipe->pipe_map.kva+pos, 64713907Sdyson (caddr_t) wpipe->pipe_buffer.buffer, 64813907Sdyson size); 64913907Sdyson 65013907Sdyson wpipe->pipe_buffer.in = size; 65113907Sdyson wpipe->pipe_buffer.out = 0; 65213907Sdyson wpipe->pipe_buffer.cnt = size; 65313907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 65413907Sdyson 65513907Sdyson pipe_destroy_write_buffer(wpipe); 65613907Sdyson} 65713907Sdyson 65813907Sdyson/* 65913907Sdyson * This implements the pipe buffer write mechanism. Note that only 66013907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 66113907Sdyson * If there are any characters in the pipe buffer, the direct write will 66213907Sdyson * be deferred until the receiving process grabs all of the bytes from 66313907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 66413907Sdyson */ 66513907Sdysonstatic int 66613907Sdysonpipe_direct_write(wpipe, uio) 66713907Sdyson struct pipe *wpipe; 66813907Sdyson struct uio *uio; 66913907Sdyson{ 67013907Sdyson int error; 67176364Salfred 67213951Sdysonretry: 67313907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 67413951Sdyson if ( wpipe->pipe_state & PIPE_WANTR) { 67513951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 67613951Sdyson wakeup(wpipe); 67713951Sdyson } 67813992Sdyson wpipe->pipe_state |= PIPE_WANTW; 67913907Sdyson error = tsleep(wpipe, 68013907Sdyson PRIBIO|PCATCH, "pipdww", 0); 68114802Sdyson if (error) 68213907Sdyson goto error1; 68314802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 68414802Sdyson error = EPIPE; 68514802Sdyson goto error1; 68614802Sdyson } 68713907Sdyson } 68813907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 68913951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 69013951Sdyson if ( wpipe->pipe_state & PIPE_WANTR) { 69113951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 69213951Sdyson wakeup(wpipe); 69313951Sdyson } 69413951Sdyson 69513992Sdyson wpipe->pipe_state |= PIPE_WANTW; 69613907Sdyson error = tsleep(wpipe, 69713907Sdyson PRIBIO|PCATCH, "pipdwc", 0); 69814802Sdyson if (error) 69913907Sdyson goto error1; 70014802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 70114802Sdyson error = EPIPE; 70214802Sdyson goto error1; 70313907Sdyson } 70413951Sdyson goto retry; 70513907Sdyson } 70613907Sdyson 70713951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 70813951Sdyson 70913907Sdyson error = pipe_build_write_buffer(wpipe, uio); 71013907Sdyson if (error) { 71113907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 71213907Sdyson goto error1; 71313907Sdyson } 71413907Sdyson 71513907Sdyson error = 0; 71613907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 71713907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 71813907Sdyson pipelock(wpipe, 0); 71913907Sdyson pipe_destroy_write_buffer(wpipe); 72013907Sdyson pipeunlock(wpipe); 72114037Sdyson pipeselwakeup(wpipe); 72214802Sdyson error = EPIPE; 72314802Sdyson goto error1; 72413907Sdyson } 72513992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 72613992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 72713992Sdyson wakeup(wpipe); 72813992Sdyson } 72914037Sdyson pipeselwakeup(wpipe); 73013907Sdyson error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwt", 0); 73113907Sdyson } 73213907Sdyson 73313907Sdyson pipelock(wpipe,0); 73413907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 73513907Sdyson /* 73613907Sdyson * this bit of trickery substitutes a kernel buffer for 73713907Sdyson * the process that might be going away. 73813907Sdyson */ 73913907Sdyson pipe_clone_write_buffer(wpipe); 74013907Sdyson } else { 74113907Sdyson pipe_destroy_write_buffer(wpipe); 74213907Sdyson } 74313907Sdyson pipeunlock(wpipe); 74413907Sdyson return error; 74513907Sdyson 74613907Sdysonerror1: 74713907Sdyson wakeup(wpipe); 74813907Sdyson return error; 74913907Sdyson} 75014037Sdyson#endif 75113907Sdyson 75216960Sdysonstatic int 75351418Sgreenpipe_write(fp, uio, cred, flags, p) 75416960Sdyson struct file *fp; 75513907Sdyson struct uio *uio; 75616960Sdyson struct ucred *cred; 75751418Sgreen struct proc *p; 75845311Sdt int flags; 75913907Sdyson{ 76013675Sdyson int error = 0; 76113913Sdyson int orig_resid; 76216960Sdyson struct pipe *wpipe, *rpipe; 76316960Sdyson 76416960Sdyson rpipe = (struct pipe *) fp->f_data; 76516960Sdyson wpipe = rpipe->pipe_peer; 76616960Sdyson 76713675Sdyson /* 76813675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 76913675Sdyson */ 77016960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 77113774Sdyson return EPIPE; 77213675Sdyson } 77313675Sdyson 77417163Sdyson /* 77517163Sdyson * If it is advantageous to resize the pipe buffer, do 77617163Sdyson * so. 77717163Sdyson */ 77817163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 77917163Sdyson (nbigpipe < LIMITBIGPIPES) && 78017163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 78117163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 78217163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 78317163Sdyson 78413907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 78576364Salfred if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 78676364Salfred nbigpipe++; 78713907Sdyson pipeunlock(wpipe); 78813907Sdyson } else { 78913907Sdyson return error; 79013907Sdyson } 79113907Sdyson } 79276364Salfred 79376364Salfred KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 79413907Sdyson 79513675Sdyson ++wpipe->pipe_busy; 79613913Sdyson orig_resid = uio->uio_resid; 79713675Sdyson while (uio->uio_resid) { 79813907Sdyson int space; 79914037Sdyson#ifndef PIPE_NODIRECT 80013907Sdyson /* 80113907Sdyson * If the transfer is large, we can gain performance if 80213907Sdyson * we do process-to-process copies directly. 80316416Sdyson * If the write is non-blocking, we don't use the 80416416Sdyson * direct write mechanism. 80558505Sdillon * 80658505Sdillon * The direct write mechanism will detect the reader going 80758505Sdillon * away on us. 80813907Sdyson */ 80917163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 81017163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 81117163Sdyson (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 81213907Sdyson (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 81313907Sdyson error = pipe_direct_write( wpipe, uio); 81413907Sdyson if (error) { 81513907Sdyson break; 81613907Sdyson } 81713907Sdyson continue; 81813907Sdyson } 81914037Sdyson#endif 82013907Sdyson 82113907Sdyson /* 82213907Sdyson * Pipe buffered writes cannot be coincidental with 82313907Sdyson * direct writes. We wait until the currently executing 82413907Sdyson * direct write is completed before we start filling the 82558505Sdillon * pipe buffer. We break out if a signal occurs or the 82658505Sdillon * reader goes away. 82713907Sdyson */ 82813907Sdyson retrywrite: 82913907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 83013992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 83113992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 83213992Sdyson wakeup(wpipe); 83313992Sdyson } 83458505Sdillon error = tsleep(wpipe, PRIBIO|PCATCH, "pipbww", 0); 83558505Sdillon if (wpipe->pipe_state & PIPE_EOF) 83658505Sdillon break; 83713907Sdyson if (error) 83813907Sdyson break; 83913907Sdyson } 84058505Sdillon if (wpipe->pipe_state & PIPE_EOF) { 84158505Sdillon error = EPIPE; 84258505Sdillon break; 84358505Sdillon } 84413907Sdyson 84513907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 84614644Sdyson 84714644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 84813913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 84913913Sdyson space = 0; 85013907Sdyson 85117163Sdyson if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 85213907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 85354534Stegge int size; /* Transfer size */ 85454534Stegge int segsize; /* first segment to transfer */ 85513907Sdyson /* 85613907Sdyson * It is possible for a direct write to 85713907Sdyson * slip in on us... handle it here... 85813907Sdyson */ 85913907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 86013907Sdyson pipeunlock(wpipe); 86113907Sdyson goto retrywrite; 86213907Sdyson } 86354534Stegge /* 86454534Stegge * If a process blocked in uiomove, our 86554534Stegge * value for space might be bad. 86658505Sdillon * 86758505Sdillon * XXX will we be ok if the reader has gone 86858505Sdillon * away here? 86954534Stegge */ 87054534Stegge if (space > wpipe->pipe_buffer.size - 87154534Stegge wpipe->pipe_buffer.cnt) { 87254534Stegge pipeunlock(wpipe); 87354534Stegge goto retrywrite; 87454534Stegge } 87554534Stegge 87654534Stegge /* 87754534Stegge * Transfer size is minimum of uio transfer 87854534Stegge * and free space in pipe buffer. 87954534Stegge */ 88054534Stegge if (space > uio->uio_resid) 88154534Stegge size = uio->uio_resid; 88254534Stegge else 88354534Stegge size = space; 88454534Stegge /* 88554534Stegge * First segment to transfer is minimum of 88654534Stegge * transfer size and contiguous space in 88754534Stegge * pipe buffer. If first segment to transfer 88854534Stegge * is less than the transfer size, we've got 88954534Stegge * a wraparound in the buffer. 89054534Stegge */ 89154534Stegge segsize = wpipe->pipe_buffer.size - 89254534Stegge wpipe->pipe_buffer.in; 89354534Stegge if (segsize > size) 89454534Stegge segsize = size; 89554534Stegge 89654534Stegge /* Transfer first segment */ 89754534Stegge 89854534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 89954534Stegge segsize, uio); 90054534Stegge 90154534Stegge if (error == 0 && segsize < size) { 90254534Stegge /* 90354534Stegge * Transfer remaining part now, to 90454534Stegge * support atomic writes. Wraparound 90554534Stegge * happened. 90654534Stegge */ 90754534Stegge if (wpipe->pipe_buffer.in + segsize != 90854534Stegge wpipe->pipe_buffer.size) 90954534Stegge panic("Expected pipe buffer wraparound disappeared"); 91054534Stegge 91154534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[0], 91254534Stegge size - segsize, uio); 91354534Stegge } 91454534Stegge if (error == 0) { 91554534Stegge wpipe->pipe_buffer.in += size; 91654534Stegge if (wpipe->pipe_buffer.in >= 91754534Stegge wpipe->pipe_buffer.size) { 91854534Stegge if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 91954534Stegge panic("Expected wraparound bad"); 92054534Stegge wpipe->pipe_buffer.in = size - segsize; 92154534Stegge } 92254534Stegge 92354534Stegge wpipe->pipe_buffer.cnt += size; 92454534Stegge if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 92554534Stegge panic("Pipe buffer overflow"); 92654534Stegge 92754534Stegge } 92813675Sdyson pipeunlock(wpipe); 92913675Sdyson } 93013675Sdyson if (error) 93113675Sdyson break; 93213675Sdyson 93313675Sdyson } else { 93413675Sdyson /* 93513675Sdyson * If the "read-side" has been blocked, wake it up now. 93613675Sdyson */ 93713675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 93813675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 93913675Sdyson wakeup(wpipe); 94013675Sdyson } 94114037Sdyson 94213675Sdyson /* 94313675Sdyson * don't block on non-blocking I/O 94413675Sdyson */ 94516960Sdyson if (fp->f_flag & FNONBLOCK) { 94613907Sdyson error = EAGAIN; 94713675Sdyson break; 94813675Sdyson } 94913907Sdyson 95014037Sdyson /* 95114037Sdyson * We have no more space and have something to offer, 95229356Speter * wake up select/poll. 95314037Sdyson */ 95414037Sdyson pipeselwakeup(wpipe); 95514037Sdyson 95613675Sdyson wpipe->pipe_state |= PIPE_WANTW; 95743301Sdillon if ((error = tsleep(wpipe, (PRIBIO+1)|PCATCH, "pipewr", 0)) != 0) { 95813675Sdyson break; 95913675Sdyson } 96013675Sdyson /* 96113675Sdyson * If read side wants to go away, we just issue a signal 96213675Sdyson * to ourselves. 96313675Sdyson */ 96413675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 96513774Sdyson error = EPIPE; 96613907Sdyson break; 96713675Sdyson } 96813675Sdyson } 96913675Sdyson } 97013675Sdyson 97114644Sdyson --wpipe->pipe_busy; 97213675Sdyson if ((wpipe->pipe_busy == 0) && 97313675Sdyson (wpipe->pipe_state & PIPE_WANT)) { 97413675Sdyson wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR); 97513675Sdyson wakeup(wpipe); 97613675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 97713675Sdyson /* 97813675Sdyson * If we have put any characters in the buffer, we wake up 97913675Sdyson * the reader. 98013675Sdyson */ 98113675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 98213675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 98313675Sdyson wakeup(wpipe); 98413675Sdyson } 98513675Sdyson } 98613909Sdyson 98713909Sdyson /* 98813909Sdyson * Don't return EPIPE if I/O was successful 98913909Sdyson */ 99013907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 99113907Sdyson (uio->uio_resid == 0) && 99213907Sdyson (error == EPIPE)) 99313907Sdyson error = 0; 99413913Sdyson 99524101Sbde if (error == 0) 99655112Sbde vfs_timestamp(&wpipe->pipe_mtime); 99724101Sbde 99814037Sdyson /* 99914037Sdyson * We have something to offer, 100029356Speter * wake up select/poll. 100114037Sdyson */ 100214177Sdyson if (wpipe->pipe_buffer.cnt) 100314037Sdyson pipeselwakeup(wpipe); 100413907Sdyson 100513675Sdyson return error; 100613675Sdyson} 100713675Sdyson 100813675Sdyson/* 100913675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 101013675Sdyson */ 101113675Sdysonint 101213675Sdysonpipe_ioctl(fp, cmd, data, p) 101313675Sdyson struct file *fp; 101436735Sdfr u_long cmd; 101576364Salfred caddr_t data; 101613675Sdyson struct proc *p; 101713675Sdyson{ 101876364Salfred struct pipe *mpipe = (struct pipe *)fp->f_data; 101913675Sdyson 102013675Sdyson switch (cmd) { 102113675Sdyson 102213675Sdyson case FIONBIO: 102313675Sdyson return (0); 102413675Sdyson 102513675Sdyson case FIOASYNC: 102613675Sdyson if (*(int *)data) { 102713675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 102813675Sdyson } else { 102913675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 103013675Sdyson } 103113675Sdyson return (0); 103213675Sdyson 103313675Sdyson case FIONREAD: 103414037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 103514037Sdyson *(int *)data = mpipe->pipe_map.cnt; 103614037Sdyson else 103714037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 103813675Sdyson return (0); 103913675Sdyson 104041086Struckman case FIOSETOWN: 104141086Struckman return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 104241086Struckman 104341086Struckman case FIOGETOWN: 104441086Struckman *(int *)data = fgetown(mpipe->pipe_sigio); 104513675Sdyson return (0); 104613675Sdyson 104741086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 104841086Struckman case TIOCSPGRP: 104941086Struckman return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 105041086Struckman 105141086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 105218863Sdyson case TIOCGPGRP: 105341086Struckman *(int *)data = -fgetown(mpipe->pipe_sigio); 105413675Sdyson return (0); 105513675Sdyson 105613675Sdyson } 105717124Sbde return (ENOTTY); 105813675Sdyson} 105913675Sdyson 106013675Sdysonint 106129356Speterpipe_poll(fp, events, cred, p) 106213675Sdyson struct file *fp; 106329356Speter int events; 106429356Speter struct ucred *cred; 106513675Sdyson struct proc *p; 106613675Sdyson{ 106776364Salfred struct pipe *rpipe = (struct pipe *)fp->f_data; 106813675Sdyson struct pipe *wpipe; 106929356Speter int revents = 0; 107013675Sdyson 107113675Sdyson wpipe = rpipe->pipe_peer; 107229356Speter if (events & (POLLIN | POLLRDNORM)) 107329356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 107429356Speter (rpipe->pipe_buffer.cnt > 0) || 107529356Speter (rpipe->pipe_state & PIPE_EOF)) 107629356Speter revents |= events & (POLLIN | POLLRDNORM); 107713675Sdyson 107829356Speter if (events & (POLLOUT | POLLWRNORM)) 107929356Speter if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 108043311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 108143311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 108229356Speter revents |= events & (POLLOUT | POLLWRNORM); 108313675Sdyson 108429356Speter if ((rpipe->pipe_state & PIPE_EOF) || 108529356Speter (wpipe == NULL) || 108629356Speter (wpipe->pipe_state & PIPE_EOF)) 108729356Speter revents |= POLLHUP; 108829356Speter 108929356Speter if (revents == 0) { 109029356Speter if (events & (POLLIN | POLLRDNORM)) { 109129356Speter selrecord(p, &rpipe->pipe_sel); 109229356Speter rpipe->pipe_state |= PIPE_SEL; 109313675Sdyson } 109413675Sdyson 109529356Speter if (events & (POLLOUT | POLLWRNORM)) { 109630164Speter selrecord(p, &wpipe->pipe_sel); 109730164Speter wpipe->pipe_state |= PIPE_SEL; 109813907Sdyson } 109913675Sdyson } 110029356Speter 110129356Speter return (revents); 110213675Sdyson} 110313675Sdyson 110452983Speterstatic int 110552983Speterpipe_stat(fp, ub, p) 110652983Speter struct file *fp; 110752983Speter struct stat *ub; 110852983Speter struct proc *p; 110913675Sdyson{ 111052983Speter struct pipe *pipe = (struct pipe *)fp->f_data; 111152983Speter 111213675Sdyson bzero((caddr_t)ub, sizeof (*ub)); 111317124Sbde ub->st_mode = S_IFIFO; 111413907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 111513675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 111613675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 111734901Sphk ub->st_atimespec = pipe->pipe_atime; 111834901Sphk ub->st_mtimespec = pipe->pipe_mtime; 111934901Sphk ub->st_ctimespec = pipe->pipe_ctime; 112060404Schris ub->st_uid = fp->f_cred->cr_uid; 112160404Schris ub->st_gid = fp->f_cred->cr_gid; 112217124Sbde /* 112360404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 112417124Sbde * XXX (st_dev, st_ino) should be unique. 112517124Sbde */ 112613675Sdyson return 0; 112713675Sdyson} 112813675Sdyson 112913675Sdyson/* ARGSUSED */ 113013675Sdysonstatic int 113113675Sdysonpipe_close(fp, p) 113213675Sdyson struct file *fp; 113313675Sdyson struct proc *p; 113413675Sdyson{ 113513675Sdyson struct pipe *cpipe = (struct pipe *)fp->f_data; 113616322Sgpalmer 113749413Sgreen fp->f_ops = &badfileops; 113849413Sgreen fp->f_data = NULL; 113941086Struckman funsetown(cpipe->pipe_sigio); 114013675Sdyson pipeclose(cpipe); 114113675Sdyson return 0; 114213675Sdyson} 114313675Sdyson 114476364Salfredstatic void 114576364Salfredpipe_free_kmem(cpipe) 114676364Salfred struct pipe *cpipe; 114776364Salfred{ 114876364Salfred 114976364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 115076364Salfred if (cpipe->pipe_buffer.size > PIPE_SIZE) 115176364Salfred --nbigpipe; 115276364Salfred amountpipekva -= cpipe->pipe_buffer.size; 115376364Salfred kmem_free(kernel_map, 115476364Salfred (vm_offset_t)cpipe->pipe_buffer.buffer, 115576364Salfred cpipe->pipe_buffer.size); 115676364Salfred cpipe->pipe_buffer.buffer = NULL; 115776364Salfred } 115876364Salfred#ifndef PIPE_NODIRECT 115976364Salfred if (cpipe->pipe_map.kva != NULL) { 116076364Salfred amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 116176364Salfred kmem_free(kernel_map, 116276364Salfred cpipe->pipe_map.kva, 116376364Salfred cpipe->pipe_buffer.size + PAGE_SIZE); 116476364Salfred cpipe->pipe_map.cnt = 0; 116576364Salfred cpipe->pipe_map.kva = 0; 116676364Salfred cpipe->pipe_map.pos = 0; 116776364Salfred cpipe->pipe_map.npages = 0; 116876364Salfred } 116976364Salfred#endif 117076364Salfred} 117176364Salfred 117213675Sdyson/* 117313675Sdyson * shutdown the pipe 117413675Sdyson */ 117513675Sdysonstatic void 117613675Sdysonpipeclose(cpipe) 117713675Sdyson struct pipe *cpipe; 117813675Sdyson{ 117913907Sdyson struct pipe *ppipe; 118076364Salfred 118113675Sdyson if (cpipe) { 118213907Sdyson 118314037Sdyson pipeselwakeup(cpipe); 118413907Sdyson 118513675Sdyson /* 118613675Sdyson * If the other side is blocked, wake it up saying that 118713675Sdyson * we want to close it down. 118813675Sdyson */ 118913675Sdyson while (cpipe->pipe_busy) { 119013675Sdyson wakeup(cpipe); 119113675Sdyson cpipe->pipe_state |= PIPE_WANT|PIPE_EOF; 119213675Sdyson tsleep(cpipe, PRIBIO, "pipecl", 0); 119313675Sdyson } 119413675Sdyson 119513675Sdyson /* 119613675Sdyson * Disconnect from peer 119713675Sdyson */ 119843301Sdillon if ((ppipe = cpipe->pipe_peer) != NULL) { 119914037Sdyson pipeselwakeup(ppipe); 120013907Sdyson 120113907Sdyson ppipe->pipe_state |= PIPE_EOF; 120213907Sdyson wakeup(ppipe); 120313907Sdyson ppipe->pipe_peer = NULL; 120413675Sdyson } 120513675Sdyson 120613675Sdyson /* 120713675Sdyson * free resources 120813675Sdyson */ 120976364Salfred pipe_free_kmem(cpipe); 121027899Sdyson zfree(pipe_zone, cpipe); 121113675Sdyson } 121213675Sdyson} 121359288Sjlemon 121472521Sjlemon/*ARGSUSED*/ 121559288Sjlemonstatic int 121672521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 121759288Sjlemon{ 121859288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 121959288Sjlemon 122072521Sjlemon switch (kn->kn_filter) { 122172521Sjlemon case EVFILT_READ: 122272521Sjlemon kn->kn_fop = &pipe_rfiltops; 122372521Sjlemon break; 122472521Sjlemon case EVFILT_WRITE: 122572521Sjlemon kn->kn_fop = &pipe_wfiltops; 122672521Sjlemon break; 122772521Sjlemon default: 122872521Sjlemon return (1); 122972521Sjlemon } 123072521Sjlemon 123159288Sjlemon SLIST_INSERT_HEAD(&rpipe->pipe_sel.si_note, kn, kn_selnext); 123259288Sjlemon return (0); 123359288Sjlemon} 123459288Sjlemon 123559288Sjlemonstatic void 123659288Sjlemonfilt_pipedetach(struct knote *kn) 123759288Sjlemon{ 123859288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 123959288Sjlemon 124060938Sjake SLIST_REMOVE(&rpipe->pipe_sel.si_note, kn, knote, kn_selnext); 124159288Sjlemon} 124259288Sjlemon 124359288Sjlemon/*ARGSUSED*/ 124459288Sjlemonstatic int 124559288Sjlemonfilt_piperead(struct knote *kn, long hint) 124659288Sjlemon{ 124759288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 124859288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 124959288Sjlemon 125059288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 125159288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 125259288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 125359288Sjlemon 125459288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 125559288Sjlemon (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 125659288Sjlemon kn->kn_flags |= EV_EOF; 125759288Sjlemon return (1); 125859288Sjlemon } 125959288Sjlemon return (kn->kn_data > 0); 126059288Sjlemon} 126159288Sjlemon 126259288Sjlemon/*ARGSUSED*/ 126359288Sjlemonstatic int 126459288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 126559288Sjlemon{ 126659288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 126759288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 126859288Sjlemon 126959288Sjlemon if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 127059288Sjlemon kn->kn_data = 0; 127159288Sjlemon kn->kn_flags |= EV_EOF; 127259288Sjlemon return (1); 127359288Sjlemon } 127459288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 127565855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 127659288Sjlemon kn->kn_data = 0; 127759288Sjlemon 127859288Sjlemon return (kn->kn_data >= PIPE_BUF); 127959288Sjlemon} 1280