sys_pipe.c revision 60404
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson * 1950477Speter * $FreeBSD: head/sys/kern/sys_pipe.c 60404 2000-05-11 22:08:20Z chris $ 2013675Sdyson */ 2113675Sdyson 2213675Sdyson/* 2313675Sdyson * This file contains a high-performance replacement for the socket-based 2413675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2513675Sdyson * all features of sockets, but does do everything that pipes normally 2613675Sdyson * do. 2713675Sdyson */ 2813675Sdyson 2913907Sdyson/* 3013907Sdyson * This code has two modes of operation, a small write mode and a large 3113907Sdyson * write mode. The small write mode acts like conventional pipes with 3213907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3313907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3413907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3513907Sdyson * the receiving process can copy it directly from the pages in the sending 3613907Sdyson * process. 3713907Sdyson * 3813907Sdyson * If the sending process receives a signal, it is possible that it will 3913913Sdyson * go away, and certainly its address space can change, because control 4013907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4113907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4213907Sdyson * kernel buffer, and the receiving process will grab the data from the 4313907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4413907Sdyson * the copy operation is normally eliminated. 4513907Sdyson * 4613907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4713907Sdyson * happen for small transfers so that the system will not spend all of 4813913Sdyson * its time context switching. PIPE_SIZE is constrained by the 4913907Sdyson * amount of kernel virtual memory. 5013907Sdyson */ 5113907Sdyson 5213675Sdyson#include <sys/param.h> 5313675Sdyson#include <sys/systm.h> 5413675Sdyson#include <sys/proc.h> 5524131Sbde#include <sys/fcntl.h> 5613675Sdyson#include <sys/file.h> 5713675Sdyson#include <sys/filedesc.h> 5824206Sbde#include <sys/filio.h> 5924206Sbde#include <sys/ttycom.h> 6013675Sdyson#include <sys/stat.h> 6129356Speter#include <sys/poll.h> 6243278Sbde#include <sys/select.h> 6313675Sdyson#include <sys/signalvar.h> 6413675Sdyson#include <sys/sysproto.h> 6513675Sdyson#include <sys/pipe.h> 6655112Sbde#include <sys/vnode.h> 6734924Sbde#include <sys/uio.h> 6859288Sjlemon#include <sys/event.h> 6913675Sdyson 7013675Sdyson#include <vm/vm.h> 7113675Sdyson#include <vm/vm_param.h> 7222521Sdyson#include <sys/lock.h> 7313675Sdyson#include <vm/vm_object.h> 7413675Sdyson#include <vm/vm_kern.h> 7513675Sdyson#include <vm/vm_extern.h> 7613675Sdyson#include <vm/pmap.h> 7713675Sdyson#include <vm/vm_map.h> 7813907Sdyson#include <vm/vm_page.h> 7927899Sdyson#include <vm/vm_zone.h> 8013675Sdyson 8114037Sdyson/* 8214037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 8314037Sdyson * approx 30% decrease in transfer rate. This could be useful for 8414037Sdyson * NetBSD or OpenBSD. 8514037Sdyson */ 8614037Sdyson/* #define PIPE_NODIRECT */ 8714037Sdyson 8814037Sdyson/* 8914037Sdyson * interfaces to the outside world 9014037Sdyson */ 9113675Sdysonstatic int pipe_read __P((struct file *fp, struct uio *uio, 9251418Sgreen struct ucred *cred, int flags, struct proc *p)); 9313675Sdysonstatic int pipe_write __P((struct file *fp, struct uio *uio, 9451418Sgreen struct ucred *cred, int flags, struct proc *p)); 9513675Sdysonstatic int pipe_close __P((struct file *fp, struct proc *p)); 9629356Speterstatic int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 9729356Speter struct proc *p)); 9852983Speterstatic int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 9936735Sdfrstatic int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 10013675Sdyson 10113675Sdysonstatic struct fileops pipeops = 10252983Speter { pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_stat, pipe_close }; 10313675Sdyson 10459288Sjlemonstatic int filt_pipeattach(struct knote *kn); 10559288Sjlemonstatic void filt_pipedetach(struct knote *kn); 10659288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 10759288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 10859288Sjlemon 10959288Sjlemonstruct filterops pipe_rwfiltops[] = { 11059288Sjlemon { 1, filt_pipeattach, filt_pipedetach, filt_piperead }, 11159288Sjlemon { 1, filt_pipeattach, filt_pipedetach, filt_pipewrite }, 11259288Sjlemon}; 11359288Sjlemon 11413675Sdyson/* 11513675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 11613675Sdyson * space is pageable. The pipe code will try to maintain locality of 11713675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 11813675Sdyson * will not wipe the cache. 11913675Sdyson */ 12013907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 12113907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 12213675Sdyson 12313907Sdyson/* 12413907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 12513907Sdyson * is there so that on large systems, we don't exhaust it. 12613907Sdyson */ 12713907Sdyson#define MAXPIPEKVA (8*1024*1024) 12813907Sdyson 12913907Sdyson/* 13013907Sdyson * Limit for direct transfers, we cannot, of course limit 13113907Sdyson * the amount of kva for pipes in general though. 13213907Sdyson */ 13313907Sdyson#define LIMITPIPEKVA (16*1024*1024) 13417163Sdyson 13517163Sdyson/* 13617163Sdyson * Limit the number of "big" pipes 13717163Sdyson */ 13817163Sdyson#define LIMITBIGPIPES 32 13933181Seivindstatic int nbigpipe; 14017163Sdyson 14117124Sbdestatic int amountpipekva; 14213907Sdyson 14313675Sdysonstatic void pipeclose __P((struct pipe *cpipe)); 14413675Sdysonstatic void pipeinit __P((struct pipe *cpipe)); 14513907Sdysonstatic __inline int pipelock __P((struct pipe *cpipe, int catch)); 14613675Sdysonstatic __inline void pipeunlock __P((struct pipe *cpipe)); 14714122Speterstatic __inline void pipeselwakeup __P((struct pipe *cpipe)); 14814037Sdyson#ifndef PIPE_NODIRECT 14913907Sdysonstatic int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 15013907Sdysonstatic void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 15113907Sdysonstatic int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 15213907Sdysonstatic void pipe_clone_write_buffer __P((struct pipe *wpipe)); 15314037Sdyson#endif 15413907Sdysonstatic void pipespace __P((struct pipe *cpipe)); 15513675Sdyson 15633181Seivindstatic vm_zone_t pipe_zone; 15727899Sdyson 15813675Sdyson/* 15913675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 16013675Sdyson */ 16113675Sdyson 16213675Sdyson/* ARGSUSED */ 16313675Sdysonint 16430994Sphkpipe(p, uap) 16513675Sdyson struct proc *p; 16613675Sdyson struct pipe_args /* { 16713675Sdyson int dummy; 16813675Sdyson } */ *uap; 16913675Sdyson{ 17013675Sdyson register struct filedesc *fdp = p->p_fd; 17113675Sdyson struct file *rf, *wf; 17213675Sdyson struct pipe *rpipe, *wpipe; 17313675Sdyson int fd, error; 17413675Sdyson 17527899Sdyson if (pipe_zone == NULL) 17627923Sdyson pipe_zone = zinit("PIPE", sizeof (struct pipe), 0, 0, 4); 17727899Sdyson 17827899Sdyson rpipe = zalloc( pipe_zone); 17913675Sdyson pipeinit(rpipe); 18013907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 18127899Sdyson wpipe = zalloc( pipe_zone); 18213675Sdyson pipeinit(wpipe); 18313907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 18413675Sdyson 18513675Sdyson error = falloc(p, &rf, &fd); 18613675Sdyson if (error) 18713675Sdyson goto free2; 18830994Sphk p->p_retval[0] = fd; 18913675Sdyson rf->f_flag = FREAD | FWRITE; 19013675Sdyson rf->f_type = DTYPE_PIPE; 19149413Sgreen rf->f_data = (caddr_t)rpipe; 19213675Sdyson rf->f_ops = &pipeops; 19313675Sdyson error = falloc(p, &wf, &fd); 19413675Sdyson if (error) 19513675Sdyson goto free3; 19613675Sdyson wf->f_flag = FREAD | FWRITE; 19713675Sdyson wf->f_type = DTYPE_PIPE; 19849413Sgreen wf->f_data = (caddr_t)wpipe; 19913675Sdyson wf->f_ops = &pipeops; 20030994Sphk p->p_retval[1] = fd; 20113675Sdyson 20213675Sdyson rpipe->pipe_peer = wpipe; 20313675Sdyson wpipe->pipe_peer = rpipe; 20413675Sdyson 20513675Sdyson return (0); 20613675Sdysonfree3: 20749413Sgreen fdp->fd_ofiles[p->p_retval[0]] = 0; 20813675Sdyson ffree(rf); 20913675Sdysonfree2: 21013675Sdyson (void)pipeclose(wpipe); 21113675Sdyson (void)pipeclose(rpipe); 21213675Sdyson return (error); 21313675Sdyson} 21413675Sdyson 21513909Sdyson/* 21613909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 21713909Sdyson */ 21813675Sdysonstatic void 21913907Sdysonpipespace(cpipe) 22013675Sdyson struct pipe *cpipe; 22113675Sdyson{ 22213688Sdyson int npages, error; 22313675Sdyson 22413907Sdyson npages = round_page(cpipe->pipe_buffer.size)/PAGE_SIZE; 22513675Sdyson /* 22613675Sdyson * Create an object, I don't like the idea of paging to/from 22713675Sdyson * kernel_object. 22814037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 22913675Sdyson */ 23013675Sdyson cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages); 23113688Sdyson cpipe->pipe_buffer.buffer = (caddr_t) vm_map_min(kernel_map); 23213675Sdyson 23313675Sdyson /* 23413675Sdyson * Insert the object into the kernel map, and allocate kva for it. 23513675Sdyson * The map entry is, by default, pageable. 23614037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 23713675Sdyson */ 23813688Sdyson error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0, 23913907Sdyson (vm_offset_t *) &cpipe->pipe_buffer.buffer, 24013907Sdyson cpipe->pipe_buffer.size, 1, 24113688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 24213675Sdyson 24313688Sdyson if (error != KERN_SUCCESS) 24413688Sdyson panic("pipeinit: cannot allocate pipe -- out of kvm -- code = %d", error); 24513907Sdyson amountpipekva += cpipe->pipe_buffer.size; 24613907Sdyson} 24713688Sdyson 24813907Sdyson/* 24913907Sdyson * initialize and allocate VM and memory for pipe 25013907Sdyson */ 25113907Sdysonstatic void 25213907Sdysonpipeinit(cpipe) 25313907Sdyson struct pipe *cpipe; 25413907Sdyson{ 25513907Sdyson 25613675Sdyson cpipe->pipe_buffer.in = 0; 25713675Sdyson cpipe->pipe_buffer.out = 0; 25813675Sdyson cpipe->pipe_buffer.cnt = 0; 25913907Sdyson cpipe->pipe_buffer.size = PIPE_SIZE; 26017163Sdyson 26113907Sdyson /* Buffer kva gets dynamically allocated */ 26213907Sdyson cpipe->pipe_buffer.buffer = NULL; 26317124Sbde /* cpipe->pipe_buffer.object = invalid */ 26413675Sdyson 26513675Sdyson cpipe->pipe_state = 0; 26613675Sdyson cpipe->pipe_peer = NULL; 26713675Sdyson cpipe->pipe_busy = 0; 26855112Sbde vfs_timestamp(&cpipe->pipe_ctime); 26924101Sbde cpipe->pipe_atime = cpipe->pipe_ctime; 27024101Sbde cpipe->pipe_mtime = cpipe->pipe_ctime; 27113675Sdyson bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel); 27213907Sdyson 27314037Sdyson#ifndef PIPE_NODIRECT 27413907Sdyson /* 27513907Sdyson * pipe data structure initializations to support direct pipe I/O 27613907Sdyson */ 27713907Sdyson cpipe->pipe_map.cnt = 0; 27813907Sdyson cpipe->pipe_map.kva = 0; 27913907Sdyson cpipe->pipe_map.pos = 0; 28013907Sdyson cpipe->pipe_map.npages = 0; 28117124Sbde /* cpipe->pipe_map.ms[] = invalid */ 28214037Sdyson#endif 28313675Sdyson} 28413675Sdyson 28513675Sdyson 28613675Sdyson/* 28713675Sdyson * lock a pipe for I/O, blocking other access 28813675Sdyson */ 28913675Sdysonstatic __inline int 29013907Sdysonpipelock(cpipe, catch) 29113675Sdyson struct pipe *cpipe; 29213907Sdyson int catch; 29313675Sdyson{ 29413776Sdyson int error; 29513675Sdyson while (cpipe->pipe_state & PIPE_LOCK) { 29613675Sdyson cpipe->pipe_state |= PIPE_LWANT; 29743301Sdillon if ((error = tsleep( cpipe, 29843301Sdillon catch?(PRIBIO|PCATCH):PRIBIO, "pipelk", 0)) != 0) { 29913776Sdyson return error; 30013675Sdyson } 30113675Sdyson } 30213675Sdyson cpipe->pipe_state |= PIPE_LOCK; 30313675Sdyson return 0; 30413675Sdyson} 30513675Sdyson 30613675Sdyson/* 30713675Sdyson * unlock a pipe I/O lock 30813675Sdyson */ 30913675Sdysonstatic __inline void 31013675Sdysonpipeunlock(cpipe) 31113675Sdyson struct pipe *cpipe; 31213675Sdyson{ 31313675Sdyson cpipe->pipe_state &= ~PIPE_LOCK; 31413675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 31513675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 31614177Sdyson wakeup(cpipe); 31713675Sdyson } 31813675Sdyson} 31913675Sdyson 32014037Sdysonstatic __inline void 32114037Sdysonpipeselwakeup(cpipe) 32214037Sdyson struct pipe *cpipe; 32314037Sdyson{ 32414037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 32514037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 32614037Sdyson selwakeup(&cpipe->pipe_sel); 32714037Sdyson } 32841086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 32941086Struckman pgsigio(cpipe->pipe_sigio, SIGIO, 0); 33059288Sjlemon KNOTE(&cpipe->pipe_sel.si_note, 0); 33114037Sdyson} 33214037Sdyson 33313675Sdyson/* ARGSUSED */ 33413675Sdysonstatic int 33551418Sgreenpipe_read(fp, uio, cred, flags, p) 33613675Sdyson struct file *fp; 33713675Sdyson struct uio *uio; 33813675Sdyson struct ucred *cred; 33951418Sgreen struct proc *p; 34045311Sdt int flags; 34113675Sdyson{ 34213675Sdyson 34313675Sdyson struct pipe *rpipe = (struct pipe *) fp->f_data; 34447748Salc int error; 34513675Sdyson int nread = 0; 34618863Sdyson u_int size; 34713675Sdyson 34813675Sdyson ++rpipe->pipe_busy; 34947748Salc error = pipelock(rpipe, 1); 35047748Salc if (error) 35147748Salc goto unlocked_error; 35247748Salc 35313675Sdyson while (uio->uio_resid) { 35413907Sdyson /* 35513907Sdyson * normal pipe buffer receive 35613907Sdyson */ 35713675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 35818863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 35913675Sdyson if (size > rpipe->pipe_buffer.cnt) 36013675Sdyson size = rpipe->pipe_buffer.cnt; 36118863Sdyson if (size > (u_int) uio->uio_resid) 36218863Sdyson size = (u_int) uio->uio_resid; 36347748Salc 36447748Salc error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 36513675Sdyson size, uio); 36613675Sdyson if (error) { 36713675Sdyson break; 36813675Sdyson } 36913675Sdyson rpipe->pipe_buffer.out += size; 37013675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 37113675Sdyson rpipe->pipe_buffer.out = 0; 37213675Sdyson 37313675Sdyson rpipe->pipe_buffer.cnt -= size; 37447748Salc 37547748Salc /* 37647748Salc * If there is no more to read in the pipe, reset 37747748Salc * its pointers to the beginning. This improves 37847748Salc * cache hit stats. 37947748Salc */ 38047748Salc if (rpipe->pipe_buffer.cnt == 0) { 38147748Salc rpipe->pipe_buffer.in = 0; 38247748Salc rpipe->pipe_buffer.out = 0; 38347748Salc } 38413675Sdyson nread += size; 38514037Sdyson#ifndef PIPE_NODIRECT 38613907Sdyson /* 38713907Sdyson * Direct copy, bypassing a kernel buffer. 38813907Sdyson */ 38913907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 39047748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 39147748Salc caddr_t va; 39218863Sdyson if (size > (u_int) uio->uio_resid) 39318863Sdyson size = (u_int) uio->uio_resid; 39447748Salc 39547748Salc va = (caddr_t) rpipe->pipe_map.kva + rpipe->pipe_map.pos; 39647748Salc error = uiomove(va, size, uio); 39713907Sdyson if (error) 39813907Sdyson break; 39913907Sdyson nread += size; 40013907Sdyson rpipe->pipe_map.pos += size; 40113907Sdyson rpipe->pipe_map.cnt -= size; 40213907Sdyson if (rpipe->pipe_map.cnt == 0) { 40313907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 40413907Sdyson wakeup(rpipe); 40513907Sdyson } 40614037Sdyson#endif 40713675Sdyson } else { 40813675Sdyson /* 40913675Sdyson * detect EOF condition 41013675Sdyson */ 41113675Sdyson if (rpipe->pipe_state & PIPE_EOF) { 41214802Sdyson /* XXX error = ? */ 41313675Sdyson break; 41413675Sdyson } 41543623Sdillon 41613675Sdyson /* 41713675Sdyson * If the "write-side" has been blocked, wake it up now. 41813675Sdyson */ 41913675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 42013675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 42113675Sdyson wakeup(rpipe); 42213675Sdyson } 42343623Sdillon 42443623Sdillon /* 42547748Salc * Break if some data was read. 42643623Sdillon */ 42747748Salc if (nread > 0) 42813675Sdyson break; 42916960Sdyson 43043623Sdillon /* 43147748Salc * Unlock the pipe buffer for our remaining processing. We 43247748Salc * will either break out with an error or we will sleep and 43347748Salc * relock to loop. 43443623Sdillon */ 43547748Salc pipeunlock(rpipe); 43643623Sdillon 43713675Sdyson /* 43847748Salc * Handle non-blocking mode operation or 43947748Salc * wait for more data. 44013675Sdyson */ 44147748Salc if (fp->f_flag & FNONBLOCK) 44247748Salc error = EAGAIN; 44347748Salc else { 44447748Salc rpipe->pipe_state |= PIPE_WANTR; 44547748Salc if ((error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) == 0) 44647748Salc error = pipelock(rpipe, 1); 44713675Sdyson } 44847748Salc if (error) 44947748Salc goto unlocked_error; 45013675Sdyson } 45113675Sdyson } 45247748Salc pipeunlock(rpipe); 45313675Sdyson 45424101Sbde if (error == 0) 45555112Sbde vfs_timestamp(&rpipe->pipe_atime); 45647748Salcunlocked_error: 45747748Salc --rpipe->pipe_busy; 45813913Sdyson 45947748Salc /* 46047748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 46147748Salc */ 46213675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 46313675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 46413675Sdyson wakeup(rpipe); 46513675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 46613675Sdyson /* 46747748Salc * Handle write blocking hysteresis. 46813675Sdyson */ 46913675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 47013675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 47113675Sdyson wakeup(rpipe); 47213675Sdyson } 47313675Sdyson } 47414037Sdyson 47514802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 47614037Sdyson pipeselwakeup(rpipe); 47714037Sdyson 47813675Sdyson return error; 47913675Sdyson} 48013675Sdyson 48114037Sdyson#ifndef PIPE_NODIRECT 48213907Sdyson/* 48313907Sdyson * Map the sending processes' buffer into kernel space and wire it. 48413907Sdyson * This is similar to a physical write operation. 48513907Sdyson */ 48613675Sdysonstatic int 48713907Sdysonpipe_build_write_buffer(wpipe, uio) 48813907Sdyson struct pipe *wpipe; 48913675Sdyson struct uio *uio; 49013675Sdyson{ 49118863Sdyson u_int size; 49213907Sdyson int i; 49313907Sdyson vm_offset_t addr, endaddr, paddr; 49413907Sdyson 49518863Sdyson size = (u_int) uio->uio_iov->iov_len; 49613907Sdyson if (size > wpipe->pipe_buffer.size) 49713907Sdyson size = wpipe->pipe_buffer.size; 49813907Sdyson 49940286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 50040286Sdg for(i = 0, addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 50113907Sdyson addr < endaddr; 50213907Sdyson addr += PAGE_SIZE, i+=1) { 50313907Sdyson 50413907Sdyson vm_page_t m; 50513907Sdyson 50651474Sdillon if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 50751474Sdillon (paddr = pmap_kextract(addr)) == 0) { 50813907Sdyson int j; 50913907Sdyson for(j=0;j<i;j++) 51040700Sdg vm_page_unwire(wpipe->pipe_map.ms[j], 1); 51113907Sdyson return EFAULT; 51213907Sdyson } 51313907Sdyson 51413907Sdyson m = PHYS_TO_VM_PAGE(paddr); 51513907Sdyson vm_page_wire(m); 51613907Sdyson wpipe->pipe_map.ms[i] = m; 51713907Sdyson } 51813907Sdyson 51913907Sdyson/* 52013907Sdyson * set up the control block 52113907Sdyson */ 52213907Sdyson wpipe->pipe_map.npages = i; 52313907Sdyson wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 52413907Sdyson wpipe->pipe_map.cnt = size; 52513907Sdyson 52613907Sdyson/* 52713907Sdyson * and map the buffer 52813907Sdyson */ 52913907Sdyson if (wpipe->pipe_map.kva == 0) { 53013912Sdyson /* 53113912Sdyson * We need to allocate space for an extra page because the 53213912Sdyson * address range might (will) span pages at times. 53313912Sdyson */ 53413907Sdyson wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 53513912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 53613912Sdyson amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 53713907Sdyson } 53813907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 53913907Sdyson wpipe->pipe_map.npages); 54013907Sdyson 54113907Sdyson/* 54213907Sdyson * and update the uio data 54313907Sdyson */ 54413907Sdyson 54513907Sdyson uio->uio_iov->iov_len -= size; 54613907Sdyson uio->uio_iov->iov_base += size; 54713907Sdyson if (uio->uio_iov->iov_len == 0) 54813907Sdyson uio->uio_iov++; 54913907Sdyson uio->uio_resid -= size; 55013907Sdyson uio->uio_offset += size; 55113907Sdyson return 0; 55213907Sdyson} 55313907Sdyson 55413907Sdyson/* 55513907Sdyson * unmap and unwire the process buffer 55613907Sdyson */ 55713907Sdysonstatic void 55813907Sdysonpipe_destroy_write_buffer(wpipe) 55913907Sdysonstruct pipe *wpipe; 56013907Sdyson{ 56113907Sdyson int i; 56217163Sdyson if (wpipe->pipe_map.kva) { 56317163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 56413907Sdyson 56513907Sdyson if (amountpipekva > MAXPIPEKVA) { 56613907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 56713907Sdyson wpipe->pipe_map.kva = 0; 56813907Sdyson kmem_free(kernel_map, kva, 56913912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 57013912Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 57113907Sdyson } 57213907Sdyson } 57313907Sdyson for (i=0;i<wpipe->pipe_map.npages;i++) 57440700Sdg vm_page_unwire(wpipe->pipe_map.ms[i], 1); 57513907Sdyson} 57613907Sdyson 57713907Sdyson/* 57813907Sdyson * In the case of a signal, the writing process might go away. This 57913907Sdyson * code copies the data into the circular buffer so that the source 58013907Sdyson * pages can be freed without loss of data. 58113907Sdyson */ 58213907Sdysonstatic void 58313907Sdysonpipe_clone_write_buffer(wpipe) 58413907Sdysonstruct pipe *wpipe; 58513907Sdyson{ 58613907Sdyson int size; 58713907Sdyson int pos; 58813907Sdyson 58913907Sdyson size = wpipe->pipe_map.cnt; 59013907Sdyson pos = wpipe->pipe_map.pos; 59113907Sdyson bcopy((caddr_t) wpipe->pipe_map.kva+pos, 59213907Sdyson (caddr_t) wpipe->pipe_buffer.buffer, 59313907Sdyson size); 59413907Sdyson 59513907Sdyson wpipe->pipe_buffer.in = size; 59613907Sdyson wpipe->pipe_buffer.out = 0; 59713907Sdyson wpipe->pipe_buffer.cnt = size; 59813907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 59913907Sdyson 60013907Sdyson pipe_destroy_write_buffer(wpipe); 60113907Sdyson} 60213907Sdyson 60313907Sdyson/* 60413907Sdyson * This implements the pipe buffer write mechanism. Note that only 60513907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 60613907Sdyson * If there are any characters in the pipe buffer, the direct write will 60713907Sdyson * be deferred until the receiving process grabs all of the bytes from 60813907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 60913907Sdyson */ 61013907Sdysonstatic int 61113907Sdysonpipe_direct_write(wpipe, uio) 61213907Sdyson struct pipe *wpipe; 61313907Sdyson struct uio *uio; 61413907Sdyson{ 61513907Sdyson int error; 61613951Sdysonretry: 61713907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 61813951Sdyson if ( wpipe->pipe_state & PIPE_WANTR) { 61913951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 62013951Sdyson wakeup(wpipe); 62113951Sdyson } 62213992Sdyson wpipe->pipe_state |= PIPE_WANTW; 62313907Sdyson error = tsleep(wpipe, 62413907Sdyson PRIBIO|PCATCH, "pipdww", 0); 62514802Sdyson if (error) 62613907Sdyson goto error1; 62714802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 62814802Sdyson error = EPIPE; 62914802Sdyson goto error1; 63014802Sdyson } 63113907Sdyson } 63213907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 63313951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 63413951Sdyson if ( wpipe->pipe_state & PIPE_WANTR) { 63513951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 63613951Sdyson wakeup(wpipe); 63713951Sdyson } 63813951Sdyson 63913992Sdyson wpipe->pipe_state |= PIPE_WANTW; 64013907Sdyson error = tsleep(wpipe, 64113907Sdyson PRIBIO|PCATCH, "pipdwc", 0); 64214802Sdyson if (error) 64313907Sdyson goto error1; 64414802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 64514802Sdyson error = EPIPE; 64614802Sdyson goto error1; 64713907Sdyson } 64813951Sdyson goto retry; 64913907Sdyson } 65013907Sdyson 65113951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 65213951Sdyson 65313907Sdyson error = pipe_build_write_buffer(wpipe, uio); 65413907Sdyson if (error) { 65513907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 65613907Sdyson goto error1; 65713907Sdyson } 65813907Sdyson 65913907Sdyson error = 0; 66013907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 66113907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 66213907Sdyson pipelock(wpipe, 0); 66313907Sdyson pipe_destroy_write_buffer(wpipe); 66413907Sdyson pipeunlock(wpipe); 66514037Sdyson pipeselwakeup(wpipe); 66614802Sdyson error = EPIPE; 66714802Sdyson goto error1; 66813907Sdyson } 66913992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 67013992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 67113992Sdyson wakeup(wpipe); 67213992Sdyson } 67314037Sdyson pipeselwakeup(wpipe); 67413907Sdyson error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwt", 0); 67513907Sdyson } 67613907Sdyson 67713907Sdyson pipelock(wpipe,0); 67813907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 67913907Sdyson /* 68013907Sdyson * this bit of trickery substitutes a kernel buffer for 68113907Sdyson * the process that might be going away. 68213907Sdyson */ 68313907Sdyson pipe_clone_write_buffer(wpipe); 68413907Sdyson } else { 68513907Sdyson pipe_destroy_write_buffer(wpipe); 68613907Sdyson } 68713907Sdyson pipeunlock(wpipe); 68813907Sdyson return error; 68913907Sdyson 69013907Sdysonerror1: 69113907Sdyson wakeup(wpipe); 69213907Sdyson return error; 69313907Sdyson} 69414037Sdyson#endif 69513907Sdyson 69616960Sdysonstatic int 69751418Sgreenpipe_write(fp, uio, cred, flags, p) 69816960Sdyson struct file *fp; 69913907Sdyson struct uio *uio; 70016960Sdyson struct ucred *cred; 70151418Sgreen struct proc *p; 70245311Sdt int flags; 70313907Sdyson{ 70413675Sdyson int error = 0; 70513913Sdyson int orig_resid; 70613675Sdyson 70716960Sdyson struct pipe *wpipe, *rpipe; 70816960Sdyson 70916960Sdyson rpipe = (struct pipe *) fp->f_data; 71016960Sdyson wpipe = rpipe->pipe_peer; 71116960Sdyson 71213675Sdyson /* 71313675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 71413675Sdyson */ 71516960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 71613774Sdyson return EPIPE; 71713675Sdyson } 71813675Sdyson 71917163Sdyson /* 72017163Sdyson * If it is advantageous to resize the pipe buffer, do 72117163Sdyson * so. 72217163Sdyson */ 72317163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 72417163Sdyson (nbigpipe < LIMITBIGPIPES) && 72517163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 72617163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 72717163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 72817163Sdyson 72917163Sdyson if (wpipe->pipe_buffer.buffer) { 73017163Sdyson amountpipekva -= wpipe->pipe_buffer.size; 73117163Sdyson kmem_free(kernel_map, 73217163Sdyson (vm_offset_t)wpipe->pipe_buffer.buffer, 73317163Sdyson wpipe->pipe_buffer.size); 73417163Sdyson } 73517163Sdyson 73617163Sdyson#ifndef PIPE_NODIRECT 73717163Sdyson if (wpipe->pipe_map.kva) { 73817163Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 73917163Sdyson kmem_free(kernel_map, 74017163Sdyson wpipe->pipe_map.kva, 74117163Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 74217163Sdyson } 74317163Sdyson#endif 74417163Sdyson 74517163Sdyson wpipe->pipe_buffer.in = 0; 74617163Sdyson wpipe->pipe_buffer.out = 0; 74717163Sdyson wpipe->pipe_buffer.cnt = 0; 74817163Sdyson wpipe->pipe_buffer.size = BIG_PIPE_SIZE; 74917163Sdyson wpipe->pipe_buffer.buffer = NULL; 75017163Sdyson ++nbigpipe; 75117163Sdyson 75217163Sdyson#ifndef PIPE_NODIRECT 75317163Sdyson wpipe->pipe_map.cnt = 0; 75417163Sdyson wpipe->pipe_map.kva = 0; 75517163Sdyson wpipe->pipe_map.pos = 0; 75617163Sdyson wpipe->pipe_map.npages = 0; 75717163Sdyson#endif 75817163Sdyson 75917163Sdyson } 76017163Sdyson 76117163Sdyson 76213907Sdyson if( wpipe->pipe_buffer.buffer == NULL) { 76313907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 76413907Sdyson pipespace(wpipe); 76513907Sdyson pipeunlock(wpipe); 76613907Sdyson } else { 76713907Sdyson return error; 76813907Sdyson } 76913907Sdyson } 77013907Sdyson 77113675Sdyson ++wpipe->pipe_busy; 77213913Sdyson orig_resid = uio->uio_resid; 77313675Sdyson while (uio->uio_resid) { 77413907Sdyson int space; 77514037Sdyson#ifndef PIPE_NODIRECT 77613907Sdyson /* 77713907Sdyson * If the transfer is large, we can gain performance if 77813907Sdyson * we do process-to-process copies directly. 77916416Sdyson * If the write is non-blocking, we don't use the 78016416Sdyson * direct write mechanism. 78158505Sdillon * 78258505Sdillon * The direct write mechanism will detect the reader going 78358505Sdillon * away on us. 78413907Sdyson */ 78517163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 78617163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 78717163Sdyson (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 78813907Sdyson (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 78913907Sdyson error = pipe_direct_write( wpipe, uio); 79013907Sdyson if (error) { 79113907Sdyson break; 79213907Sdyson } 79313907Sdyson continue; 79413907Sdyson } 79514037Sdyson#endif 79613907Sdyson 79713907Sdyson /* 79813907Sdyson * Pipe buffered writes cannot be coincidental with 79913907Sdyson * direct writes. We wait until the currently executing 80013907Sdyson * direct write is completed before we start filling the 80158505Sdillon * pipe buffer. We break out if a signal occurs or the 80258505Sdillon * reader goes away. 80313907Sdyson */ 80413907Sdyson retrywrite: 80513907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 80613992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 80713992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 80813992Sdyson wakeup(wpipe); 80913992Sdyson } 81058505Sdillon error = tsleep(wpipe, PRIBIO|PCATCH, "pipbww", 0); 81158505Sdillon if (wpipe->pipe_state & PIPE_EOF) 81258505Sdillon break; 81313907Sdyson if (error) 81413907Sdyson break; 81513907Sdyson } 81658505Sdillon if (wpipe->pipe_state & PIPE_EOF) { 81758505Sdillon error = EPIPE; 81858505Sdillon break; 81958505Sdillon } 82013907Sdyson 82113907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 82214644Sdyson 82314644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 82413913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 82513913Sdyson space = 0; 82613907Sdyson 82717163Sdyson if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 82813907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 82954534Stegge int size; /* Transfer size */ 83054534Stegge int segsize; /* first segment to transfer */ 83113907Sdyson /* 83213907Sdyson * It is possible for a direct write to 83313907Sdyson * slip in on us... handle it here... 83413907Sdyson */ 83513907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 83613907Sdyson pipeunlock(wpipe); 83713907Sdyson goto retrywrite; 83813907Sdyson } 83954534Stegge /* 84054534Stegge * If a process blocked in uiomove, our 84154534Stegge * value for space might be bad. 84258505Sdillon * 84358505Sdillon * XXX will we be ok if the reader has gone 84458505Sdillon * away here? 84554534Stegge */ 84654534Stegge if (space > wpipe->pipe_buffer.size - 84754534Stegge wpipe->pipe_buffer.cnt) { 84854534Stegge pipeunlock(wpipe); 84954534Stegge goto retrywrite; 85054534Stegge } 85154534Stegge 85254534Stegge /* 85354534Stegge * Transfer size is minimum of uio transfer 85454534Stegge * and free space in pipe buffer. 85554534Stegge */ 85654534Stegge if (space > uio->uio_resid) 85754534Stegge size = uio->uio_resid; 85854534Stegge else 85954534Stegge size = space; 86054534Stegge /* 86154534Stegge * First segment to transfer is minimum of 86254534Stegge * transfer size and contiguous space in 86354534Stegge * pipe buffer. If first segment to transfer 86454534Stegge * is less than the transfer size, we've got 86554534Stegge * a wraparound in the buffer. 86654534Stegge */ 86754534Stegge segsize = wpipe->pipe_buffer.size - 86854534Stegge wpipe->pipe_buffer.in; 86954534Stegge if (segsize > size) 87054534Stegge segsize = size; 87154534Stegge 87254534Stegge /* Transfer first segment */ 87354534Stegge 87454534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 87554534Stegge segsize, uio); 87654534Stegge 87754534Stegge if (error == 0 && segsize < size) { 87854534Stegge /* 87954534Stegge * Transfer remaining part now, to 88054534Stegge * support atomic writes. Wraparound 88154534Stegge * happened. 88254534Stegge */ 88354534Stegge if (wpipe->pipe_buffer.in + segsize != 88454534Stegge wpipe->pipe_buffer.size) 88554534Stegge panic("Expected pipe buffer wraparound disappeared"); 88654534Stegge 88754534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[0], 88854534Stegge size - segsize, uio); 88954534Stegge } 89054534Stegge if (error == 0) { 89154534Stegge wpipe->pipe_buffer.in += size; 89254534Stegge if (wpipe->pipe_buffer.in >= 89354534Stegge wpipe->pipe_buffer.size) { 89454534Stegge if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 89554534Stegge panic("Expected wraparound bad"); 89654534Stegge wpipe->pipe_buffer.in = size - segsize; 89754534Stegge } 89854534Stegge 89954534Stegge wpipe->pipe_buffer.cnt += size; 90054534Stegge if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 90154534Stegge panic("Pipe buffer overflow"); 90254534Stegge 90354534Stegge } 90413675Sdyson pipeunlock(wpipe); 90513675Sdyson } 90613675Sdyson if (error) 90713675Sdyson break; 90813675Sdyson 90913675Sdyson } else { 91013675Sdyson /* 91113675Sdyson * If the "read-side" has been blocked, wake it up now. 91213675Sdyson */ 91313675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 91413675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 91513675Sdyson wakeup(wpipe); 91613675Sdyson } 91714037Sdyson 91813675Sdyson /* 91913675Sdyson * don't block on non-blocking I/O 92013675Sdyson */ 92116960Sdyson if (fp->f_flag & FNONBLOCK) { 92213907Sdyson error = EAGAIN; 92313675Sdyson break; 92413675Sdyson } 92513907Sdyson 92614037Sdyson /* 92714037Sdyson * We have no more space and have something to offer, 92829356Speter * wake up select/poll. 92914037Sdyson */ 93014037Sdyson pipeselwakeup(wpipe); 93114037Sdyson 93213675Sdyson wpipe->pipe_state |= PIPE_WANTW; 93343301Sdillon if ((error = tsleep(wpipe, (PRIBIO+1)|PCATCH, "pipewr", 0)) != 0) { 93413675Sdyson break; 93513675Sdyson } 93613675Sdyson /* 93713675Sdyson * If read side wants to go away, we just issue a signal 93813675Sdyson * to ourselves. 93913675Sdyson */ 94013675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 94113774Sdyson error = EPIPE; 94213907Sdyson break; 94313675Sdyson } 94413675Sdyson } 94513675Sdyson } 94613675Sdyson 94714644Sdyson --wpipe->pipe_busy; 94813675Sdyson if ((wpipe->pipe_busy == 0) && 94913675Sdyson (wpipe->pipe_state & PIPE_WANT)) { 95013675Sdyson wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR); 95113675Sdyson wakeup(wpipe); 95213675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 95313675Sdyson /* 95413675Sdyson * If we have put any characters in the buffer, we wake up 95513675Sdyson * the reader. 95613675Sdyson */ 95713675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 95813675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 95913675Sdyson wakeup(wpipe); 96013675Sdyson } 96113675Sdyson } 96213909Sdyson 96313909Sdyson /* 96413909Sdyson * Don't return EPIPE if I/O was successful 96513909Sdyson */ 96613907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 96713907Sdyson (uio->uio_resid == 0) && 96813907Sdyson (error == EPIPE)) 96913907Sdyson error = 0; 97013913Sdyson 97124101Sbde if (error == 0) 97255112Sbde vfs_timestamp(&wpipe->pipe_mtime); 97324101Sbde 97414037Sdyson /* 97514037Sdyson * We have something to offer, 97629356Speter * wake up select/poll. 97714037Sdyson */ 97814177Sdyson if (wpipe->pipe_buffer.cnt) 97914037Sdyson pipeselwakeup(wpipe); 98013907Sdyson 98113675Sdyson return error; 98213675Sdyson} 98313675Sdyson 98413675Sdyson/* 98513675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 98613675Sdyson */ 98713675Sdysonint 98813675Sdysonpipe_ioctl(fp, cmd, data, p) 98913675Sdyson struct file *fp; 99036735Sdfr u_long cmd; 99113675Sdyson register caddr_t data; 99213675Sdyson struct proc *p; 99313675Sdyson{ 99413675Sdyson register struct pipe *mpipe = (struct pipe *)fp->f_data; 99513675Sdyson 99613675Sdyson switch (cmd) { 99713675Sdyson 99813675Sdyson case FIONBIO: 99913675Sdyson return (0); 100013675Sdyson 100113675Sdyson case FIOASYNC: 100213675Sdyson if (*(int *)data) { 100313675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 100413675Sdyson } else { 100513675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 100613675Sdyson } 100713675Sdyson return (0); 100813675Sdyson 100913675Sdyson case FIONREAD: 101014037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 101114037Sdyson *(int *)data = mpipe->pipe_map.cnt; 101214037Sdyson else 101314037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 101413675Sdyson return (0); 101513675Sdyson 101641086Struckman case FIOSETOWN: 101741086Struckman return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 101841086Struckman 101941086Struckman case FIOGETOWN: 102041086Struckman *(int *)data = fgetown(mpipe->pipe_sigio); 102113675Sdyson return (0); 102213675Sdyson 102341086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 102441086Struckman case TIOCSPGRP: 102541086Struckman return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 102641086Struckman 102741086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 102818863Sdyson case TIOCGPGRP: 102941086Struckman *(int *)data = -fgetown(mpipe->pipe_sigio); 103013675Sdyson return (0); 103113675Sdyson 103213675Sdyson } 103317124Sbde return (ENOTTY); 103413675Sdyson} 103513675Sdyson 103613675Sdysonint 103729356Speterpipe_poll(fp, events, cred, p) 103813675Sdyson struct file *fp; 103929356Speter int events; 104029356Speter struct ucred *cred; 104113675Sdyson struct proc *p; 104213675Sdyson{ 104313675Sdyson register struct pipe *rpipe = (struct pipe *)fp->f_data; 104413675Sdyson struct pipe *wpipe; 104529356Speter int revents = 0; 104613675Sdyson 104713675Sdyson wpipe = rpipe->pipe_peer; 104829356Speter if (events & (POLLIN | POLLRDNORM)) 104929356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 105029356Speter (rpipe->pipe_buffer.cnt > 0) || 105129356Speter (rpipe->pipe_state & PIPE_EOF)) 105229356Speter revents |= events & (POLLIN | POLLRDNORM); 105313675Sdyson 105429356Speter if (events & (POLLOUT | POLLWRNORM)) 105529356Speter if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 105643311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 105743311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 105829356Speter revents |= events & (POLLOUT | POLLWRNORM); 105913675Sdyson 106029356Speter if ((rpipe->pipe_state & PIPE_EOF) || 106129356Speter (wpipe == NULL) || 106229356Speter (wpipe->pipe_state & PIPE_EOF)) 106329356Speter revents |= POLLHUP; 106429356Speter 106529356Speter if (revents == 0) { 106629356Speter if (events & (POLLIN | POLLRDNORM)) { 106729356Speter selrecord(p, &rpipe->pipe_sel); 106829356Speter rpipe->pipe_state |= PIPE_SEL; 106913675Sdyson } 107013675Sdyson 107129356Speter if (events & (POLLOUT | POLLWRNORM)) { 107230164Speter selrecord(p, &wpipe->pipe_sel); 107330164Speter wpipe->pipe_state |= PIPE_SEL; 107413907Sdyson } 107513675Sdyson } 107629356Speter 107729356Speter return (revents); 107813675Sdyson} 107913675Sdyson 108052983Speterstatic int 108152983Speterpipe_stat(fp, ub, p) 108252983Speter struct file *fp; 108352983Speter struct stat *ub; 108452983Speter struct proc *p; 108513675Sdyson{ 108652983Speter struct pipe *pipe = (struct pipe *)fp->f_data; 108752983Speter 108813675Sdyson bzero((caddr_t)ub, sizeof (*ub)); 108917124Sbde ub->st_mode = S_IFIFO; 109013907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 109113675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 109213675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 109334901Sphk ub->st_atimespec = pipe->pipe_atime; 109434901Sphk ub->st_mtimespec = pipe->pipe_mtime; 109534901Sphk ub->st_ctimespec = pipe->pipe_ctime; 109660404Schris ub->st_uid = fp->f_cred->cr_uid; 109760404Schris ub->st_gid = fp->f_cred->cr_gid; 109817124Sbde /* 109960404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 110017124Sbde * XXX (st_dev, st_ino) should be unique. 110117124Sbde */ 110213675Sdyson return 0; 110313675Sdyson} 110413675Sdyson 110513675Sdyson/* ARGSUSED */ 110613675Sdysonstatic int 110713675Sdysonpipe_close(fp, p) 110813675Sdyson struct file *fp; 110913675Sdyson struct proc *p; 111013675Sdyson{ 111113675Sdyson struct pipe *cpipe = (struct pipe *)fp->f_data; 111216322Sgpalmer 111349413Sgreen fp->f_ops = &badfileops; 111449413Sgreen fp->f_data = NULL; 111541086Struckman funsetown(cpipe->pipe_sigio); 111613675Sdyson pipeclose(cpipe); 111713675Sdyson return 0; 111813675Sdyson} 111913675Sdyson 112013675Sdyson/* 112113675Sdyson * shutdown the pipe 112213675Sdyson */ 112313675Sdysonstatic void 112413675Sdysonpipeclose(cpipe) 112513675Sdyson struct pipe *cpipe; 112613675Sdyson{ 112713907Sdyson struct pipe *ppipe; 112813675Sdyson if (cpipe) { 112913907Sdyson 113014037Sdyson pipeselwakeup(cpipe); 113113907Sdyson 113213675Sdyson /* 113313675Sdyson * If the other side is blocked, wake it up saying that 113413675Sdyson * we want to close it down. 113513675Sdyson */ 113613675Sdyson while (cpipe->pipe_busy) { 113713675Sdyson wakeup(cpipe); 113813675Sdyson cpipe->pipe_state |= PIPE_WANT|PIPE_EOF; 113913675Sdyson tsleep(cpipe, PRIBIO, "pipecl", 0); 114013675Sdyson } 114113675Sdyson 114213675Sdyson /* 114313675Sdyson * Disconnect from peer 114413675Sdyson */ 114543301Sdillon if ((ppipe = cpipe->pipe_peer) != NULL) { 114614037Sdyson pipeselwakeup(ppipe); 114713907Sdyson 114813907Sdyson ppipe->pipe_state |= PIPE_EOF; 114913907Sdyson wakeup(ppipe); 115013907Sdyson ppipe->pipe_peer = NULL; 115113675Sdyson } 115213675Sdyson 115313675Sdyson /* 115413675Sdyson * free resources 115513675Sdyson */ 115613907Sdyson if (cpipe->pipe_buffer.buffer) { 115717163Sdyson if (cpipe->pipe_buffer.size > PIPE_SIZE) 115817163Sdyson --nbigpipe; 115913907Sdyson amountpipekva -= cpipe->pipe_buffer.size; 116013907Sdyson kmem_free(kernel_map, 116113907Sdyson (vm_offset_t)cpipe->pipe_buffer.buffer, 116213907Sdyson cpipe->pipe_buffer.size); 116313907Sdyson } 116414037Sdyson#ifndef PIPE_NODIRECT 116513907Sdyson if (cpipe->pipe_map.kva) { 116613912Sdyson amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 116713907Sdyson kmem_free(kernel_map, 116813907Sdyson cpipe->pipe_map.kva, 116913912Sdyson cpipe->pipe_buffer.size + PAGE_SIZE); 117013907Sdyson } 117114037Sdyson#endif 117227899Sdyson zfree(pipe_zone, cpipe); 117313675Sdyson } 117413675Sdyson} 117559288Sjlemon 117659288Sjlemonstatic int 117759288Sjlemonfilt_pipeattach(struct knote *kn) 117859288Sjlemon{ 117959288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 118059288Sjlemon 118159288Sjlemon SLIST_INSERT_HEAD(&rpipe->pipe_sel.si_note, kn, kn_selnext); 118259288Sjlemon return (0); 118359288Sjlemon} 118459288Sjlemon 118559288Sjlemonstatic void 118659288Sjlemonfilt_pipedetach(struct knote *kn) 118759288Sjlemon{ 118859288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 118959288Sjlemon 119059288Sjlemon SLIST_REMOVE(&rpipe->pipe_sel.si_note, kn, knote, kn_selnext); 119159288Sjlemon} 119259288Sjlemon 119359288Sjlemon/*ARGSUSED*/ 119459288Sjlemonstatic int 119559288Sjlemonfilt_piperead(struct knote *kn, long hint) 119659288Sjlemon{ 119759288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 119859288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 119959288Sjlemon 120059288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 120159288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 120259288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 120359288Sjlemon 120459288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 120559288Sjlemon (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 120659288Sjlemon kn->kn_flags |= EV_EOF; 120759288Sjlemon return (1); 120859288Sjlemon } 120959288Sjlemon return (kn->kn_data > 0); 121059288Sjlemon} 121159288Sjlemon 121259288Sjlemon/*ARGSUSED*/ 121359288Sjlemonstatic int 121459288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 121559288Sjlemon{ 121659288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 121759288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 121859288Sjlemon 121959288Sjlemon if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 122059288Sjlemon kn->kn_data = 0; 122159288Sjlemon kn->kn_flags |= EV_EOF; 122259288Sjlemon return (1); 122359288Sjlemon } 122459288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 122559288Sjlemon if ((wpipe->pipe_state & PIPE_DIRECTW) == 0) 122659288Sjlemon kn->kn_data = 0; 122759288Sjlemon 122859288Sjlemon return (kn->kn_data >= PIPE_BUF); 122959288Sjlemon} 1230