sys_pipe.c revision 91412
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson * 1950477Speter * $FreeBSD: head/sys/kern/sys_pipe.c 91412 2002-02-27 18:49:58Z alfred $ 2013675Sdyson */ 2113675Sdyson 2213675Sdyson/* 2313675Sdyson * This file contains a high-performance replacement for the socket-based 2413675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2513675Sdyson * all features of sockets, but does do everything that pipes normally 2613675Sdyson * do. 2713675Sdyson */ 2813675Sdyson 2913907Sdyson/* 3013907Sdyson * This code has two modes of operation, a small write mode and a large 3113907Sdyson * write mode. The small write mode acts like conventional pipes with 3213907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3313907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3413907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3513907Sdyson * the receiving process can copy it directly from the pages in the sending 3613907Sdyson * process. 3713907Sdyson * 3813907Sdyson * If the sending process receives a signal, it is possible that it will 3913913Sdyson * go away, and certainly its address space can change, because control 4013907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4113907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4213907Sdyson * kernel buffer, and the receiving process will grab the data from the 4313907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4413907Sdyson * the copy operation is normally eliminated. 4513907Sdyson * 4613907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4713907Sdyson * happen for small transfers so that the system will not spend all of 4813913Sdyson * its time context switching. PIPE_SIZE is constrained by the 4913907Sdyson * amount of kernel virtual memory. 5013907Sdyson */ 5113907Sdyson 5213675Sdyson#include <sys/param.h> 5313675Sdyson#include <sys/systm.h> 5424131Sbde#include <sys/fcntl.h> 5513675Sdyson#include <sys/file.h> 5613675Sdyson#include <sys/filedesc.h> 5724206Sbde#include <sys/filio.h> 5891372Salfred#include <sys/kernel.h> 5976166Smarkm#include <sys/lock.h> 6076827Salfred#include <sys/mutex.h> 6124206Sbde#include <sys/ttycom.h> 6213675Sdyson#include <sys/stat.h> 6329356Speter#include <sys/poll.h> 6470834Swollman#include <sys/selinfo.h> 6513675Sdyson#include <sys/signalvar.h> 6613675Sdyson#include <sys/sysproto.h> 6713675Sdyson#include <sys/pipe.h> 6876166Smarkm#include <sys/proc.h> 6955112Sbde#include <sys/vnode.h> 7034924Sbde#include <sys/uio.h> 7159288Sjlemon#include <sys/event.h> 7213675Sdyson 7313675Sdyson#include <vm/vm.h> 7413675Sdyson#include <vm/vm_param.h> 7513675Sdyson#include <vm/vm_object.h> 7613675Sdyson#include <vm/vm_kern.h> 7713675Sdyson#include <vm/vm_extern.h> 7813675Sdyson#include <vm/pmap.h> 7913675Sdyson#include <vm/vm_map.h> 8013907Sdyson#include <vm/vm_page.h> 8127899Sdyson#include <vm/vm_zone.h> 8213675Sdyson 8314037Sdyson/* 8414037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 8514037Sdyson * approx 30% decrease in transfer rate. This could be useful for 8614037Sdyson * NetBSD or OpenBSD. 8714037Sdyson */ 8814037Sdyson/* #define PIPE_NODIRECT */ 8914037Sdyson 9014037Sdyson/* 9114037Sdyson * interfaces to the outside world 9214037Sdyson */ 9313675Sdysonstatic int pipe_read __P((struct file *fp, struct uio *uio, 9483366Sjulian struct ucred *cred, int flags, struct thread *td)); 9513675Sdysonstatic int pipe_write __P((struct file *fp, struct uio *uio, 9683366Sjulian struct ucred *cred, int flags, struct thread *td)); 9783366Sjulianstatic int pipe_close __P((struct file *fp, struct thread *td)); 9829356Speterstatic int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 9983366Sjulian struct thread *td)); 10072521Sjlemonstatic int pipe_kqfilter __P((struct file *fp, struct knote *kn)); 10183366Sjulianstatic int pipe_stat __P((struct file *fp, struct stat *sb, struct thread *td)); 10283366Sjulianstatic int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct thread *td)); 10313675Sdyson 10472521Sjlemonstatic struct fileops pipeops = { 10572521Sjlemon pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 10672521Sjlemon pipe_stat, pipe_close 10772521Sjlemon}; 10813675Sdyson 10959288Sjlemonstatic void filt_pipedetach(struct knote *kn); 11059288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 11159288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 11259288Sjlemon 11372521Sjlemonstatic struct filterops pipe_rfiltops = 11472521Sjlemon { 1, NULL, filt_pipedetach, filt_piperead }; 11572521Sjlemonstatic struct filterops pipe_wfiltops = 11672521Sjlemon { 1, NULL, filt_pipedetach, filt_pipewrite }; 11759288Sjlemon 11891362Salfred#define PIPE_GET_GIANT(pipe) \ 11991362Salfred do { \ 12091362Salfred PIPE_UNLOCK(wpipe); \ 12191362Salfred mtx_lock(&Giant); \ 12291362Salfred } while (0) 12372521Sjlemon 12491362Salfred#define PIPE_DROP_GIANT(pipe) \ 12591362Salfred do { \ 12691362Salfred mtx_unlock(&Giant); \ 12791362Salfred PIPE_LOCK(wpipe); \ 12891362Salfred } while (0) 12991362Salfred 13013675Sdyson/* 13113675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 13213675Sdyson * space is pageable. The pipe code will try to maintain locality of 13313675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 13413675Sdyson * will not wipe the cache. 13513675Sdyson */ 13613907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 13713907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 13813675Sdyson 13913907Sdyson/* 14013907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 14113907Sdyson * is there so that on large systems, we don't exhaust it. 14213907Sdyson */ 14313907Sdyson#define MAXPIPEKVA (8*1024*1024) 14413907Sdyson 14513907Sdyson/* 14613907Sdyson * Limit for direct transfers, we cannot, of course limit 14713907Sdyson * the amount of kva for pipes in general though. 14813907Sdyson */ 14913907Sdyson#define LIMITPIPEKVA (16*1024*1024) 15017163Sdyson 15117163Sdyson/* 15217163Sdyson * Limit the number of "big" pipes 15317163Sdyson */ 15417163Sdyson#define LIMITBIGPIPES 32 15533181Seivindstatic int nbigpipe; 15617163Sdyson 15717124Sbdestatic int amountpipekva; 15813907Sdyson 15991372Salfredstatic void pipeinit __P((void *dummy __unused)); 16013675Sdysonstatic void pipeclose __P((struct pipe *cpipe)); 16176364Salfredstatic void pipe_free_kmem __P((struct pipe *cpipe)); 16276364Salfredstatic int pipe_create __P((struct pipe **cpipep)); 16313907Sdysonstatic __inline int pipelock __P((struct pipe *cpipe, int catch)); 16413675Sdysonstatic __inline void pipeunlock __P((struct pipe *cpipe)); 16514122Speterstatic __inline void pipeselwakeup __P((struct pipe *cpipe)); 16614037Sdyson#ifndef PIPE_NODIRECT 16713907Sdysonstatic int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 16813907Sdysonstatic void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 16913907Sdysonstatic int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 17013907Sdysonstatic void pipe_clone_write_buffer __P((struct pipe *wpipe)); 17114037Sdyson#endif 17276364Salfredstatic int pipespace __P((struct pipe *cpipe, int size)); 17313675Sdyson 17433181Seivindstatic vm_zone_t pipe_zone; 17527899Sdyson 17691372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 17791372Salfred 17891372Salfredstatic void 17991372Salfredpipeinit(void *dummy __unused) 18091372Salfred{ 18191372Salfred 18291372Salfred pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4); 18391372Salfred} 18491372Salfred 18513675Sdyson/* 18613675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 18713675Sdyson */ 18813675Sdyson 18913675Sdyson/* ARGSUSED */ 19013675Sdysonint 19183366Sjulianpipe(td, uap) 19283366Sjulian struct thread *td; 19313675Sdyson struct pipe_args /* { 19413675Sdyson int dummy; 19513675Sdyson } */ *uap; 19613675Sdyson{ 19783366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 19813675Sdyson struct file *rf, *wf; 19913675Sdyson struct pipe *rpipe, *wpipe; 20013675Sdyson int fd, error; 20191362Salfred 20291372Salfred KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 20327899Sdyson 20476756Salfred rpipe = wpipe = NULL; 20576364Salfred if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 20676364Salfred pipeclose(rpipe); 20776364Salfred pipeclose(wpipe); 20876364Salfred return (ENFILE); 20976364Salfred } 21076364Salfred 21113907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 21213907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 21313675Sdyson 21483366Sjulian error = falloc(td, &rf, &fd); 21570915Sdwmalone if (error) { 21670915Sdwmalone pipeclose(rpipe); 21770915Sdwmalone pipeclose(wpipe); 21870915Sdwmalone return (error); 21970915Sdwmalone } 22070915Sdwmalone fhold(rf); 22183366Sjulian td->td_retval[0] = fd; 22270915Sdwmalone 22370803Sdwmalone /* 22470803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 22570803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 22670803Sdwmalone * to avoid races against processes which manage to dup() the read 22770803Sdwmalone * side while we are blocked trying to allocate the write side. 22870803Sdwmalone */ 22989306Salfred FILE_LOCK(rf); 23013675Sdyson rf->f_flag = FREAD | FWRITE; 23113675Sdyson rf->f_type = DTYPE_PIPE; 23249413Sgreen rf->f_data = (caddr_t)rpipe; 23313675Sdyson rf->f_ops = &pipeops; 23489306Salfred FILE_UNLOCK(rf); 23583366Sjulian error = falloc(td, &wf, &fd); 23670915Sdwmalone if (error) { 23789306Salfred FILEDESC_LOCK(fdp); 23883366Sjulian if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 23983366Sjulian fdp->fd_ofiles[td->td_retval[0]] = NULL; 24089306Salfred FILEDESC_UNLOCK(fdp); 24183366Sjulian fdrop(rf, td); 24289306Salfred } else 24389306Salfred FILEDESC_UNLOCK(fdp); 24483366Sjulian fdrop(rf, td); 24570915Sdwmalone /* rpipe has been closed by fdrop(). */ 24670915Sdwmalone pipeclose(wpipe); 24770915Sdwmalone return (error); 24870915Sdwmalone } 24989306Salfred FILE_LOCK(wf); 25013675Sdyson wf->f_flag = FREAD | FWRITE; 25113675Sdyson wf->f_type = DTYPE_PIPE; 25249413Sgreen wf->f_data = (caddr_t)wpipe; 25313675Sdyson wf->f_ops = &pipeops; 25489306Salfred FILE_UNLOCK(wf); 25583366Sjulian td->td_retval[1] = fd; 25613675Sdyson rpipe->pipe_peer = wpipe; 25713675Sdyson wpipe->pipe_peer = rpipe; 25891362Salfred rpipe->pipe_mtxp = wpipe->pipe_mtxp = mtx_pool_alloc(); 25983366Sjulian fdrop(rf, td); 26013675Sdyson 26113675Sdyson return (0); 26213675Sdyson} 26313675Sdyson 26413909Sdyson/* 26513909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 26676364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 26776364Salfred * it will retain the old buffer. 26876364Salfred * If it fails it will return ENOMEM. 26913909Sdyson */ 27076364Salfredstatic int 27176364Salfredpipespace(cpipe, size) 27213675Sdyson struct pipe *cpipe; 27376364Salfred int size; 27413675Sdyson{ 27576364Salfred struct vm_object *object; 27676364Salfred caddr_t buffer; 27713688Sdyson int npages, error; 27813675Sdyson 27979224Sdillon GIANT_REQUIRED; 28091412Salfred KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 28191412Salfred ("pipespace: pipe mutex locked")); 28279224Sdillon 28376364Salfred npages = round_page(size)/PAGE_SIZE; 28413675Sdyson /* 28513675Sdyson * Create an object, I don't like the idea of paging to/from 28613675Sdyson * kernel_object. 28714037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 28813675Sdyson */ 28976364Salfred object = vm_object_allocate(OBJT_DEFAULT, npages); 29076364Salfred buffer = (caddr_t) vm_map_min(kernel_map); 29113675Sdyson 29213675Sdyson /* 29313675Sdyson * Insert the object into the kernel map, and allocate kva for it. 29413675Sdyson * The map entry is, by default, pageable. 29514037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 29613675Sdyson */ 29776364Salfred error = vm_map_find(kernel_map, object, 0, 29876364Salfred (vm_offset_t *) &buffer, size, 1, 29913688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 30013675Sdyson 30176364Salfred if (error != KERN_SUCCESS) { 30276364Salfred vm_object_deallocate(object); 30376364Salfred return (ENOMEM); 30476364Salfred } 30576364Salfred 30676364Salfred /* free old resources if we're resizing */ 30776364Salfred pipe_free_kmem(cpipe); 30876364Salfred cpipe->pipe_buffer.object = object; 30976364Salfred cpipe->pipe_buffer.buffer = buffer; 31076364Salfred cpipe->pipe_buffer.size = size; 31176364Salfred cpipe->pipe_buffer.in = 0; 31276364Salfred cpipe->pipe_buffer.out = 0; 31376364Salfred cpipe->pipe_buffer.cnt = 0; 31413907Sdyson amountpipekva += cpipe->pipe_buffer.size; 31576364Salfred return (0); 31613907Sdyson} 31713688Sdyson 31813907Sdyson/* 31913907Sdyson * initialize and allocate VM and memory for pipe 32013907Sdyson */ 32176364Salfredstatic int 32276364Salfredpipe_create(cpipep) 32376364Salfred struct pipe **cpipep; 32476364Salfred{ 32513907Sdyson struct pipe *cpipe; 32676364Salfred int error; 32713907Sdyson 32876364Salfred *cpipep = zalloc(pipe_zone); 32976364Salfred if (*cpipep == NULL) 33076364Salfred return (ENOMEM); 33117163Sdyson 33276364Salfred cpipe = *cpipep; 33376364Salfred 33476364Salfred /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ 33576364Salfred cpipe->pipe_buffer.object = NULL; 33676364Salfred#ifndef PIPE_NODIRECT 33776364Salfred cpipe->pipe_map.kva = NULL; 33876364Salfred#endif 33976364Salfred /* 34076364Salfred * protect so pipeclose() doesn't follow a junk pointer 34176364Salfred * if pipespace() fails. 34276364Salfred */ 34376754Salfred bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 34413675Sdyson cpipe->pipe_state = 0; 34513675Sdyson cpipe->pipe_peer = NULL; 34613675Sdyson cpipe->pipe_busy = 0; 34713907Sdyson 34814037Sdyson#ifndef PIPE_NODIRECT 34913907Sdyson /* 35013907Sdyson * pipe data structure initializations to support direct pipe I/O 35113907Sdyson */ 35213907Sdyson cpipe->pipe_map.cnt = 0; 35313907Sdyson cpipe->pipe_map.kva = 0; 35413907Sdyson cpipe->pipe_map.pos = 0; 35513907Sdyson cpipe->pipe_map.npages = 0; 35617124Sbde /* cpipe->pipe_map.ms[] = invalid */ 35714037Sdyson#endif 35876364Salfred 35991412Salfred cpipe->pipe_mtxp = NULL; /* avoid pipespace assertion */ 36076364Salfred error = pipespace(cpipe, PIPE_SIZE); 36176760Salfred if (error) 36276364Salfred return (error); 36376364Salfred 36476364Salfred vfs_timestamp(&cpipe->pipe_ctime); 36576364Salfred cpipe->pipe_atime = cpipe->pipe_ctime; 36676364Salfred cpipe->pipe_mtime = cpipe->pipe_ctime; 36776364Salfred 36876364Salfred return (0); 36913675Sdyson} 37013675Sdyson 37113675Sdyson 37213675Sdyson/* 37313675Sdyson * lock a pipe for I/O, blocking other access 37413675Sdyson */ 37513675Sdysonstatic __inline int 37613907Sdysonpipelock(cpipe, catch) 37713675Sdyson struct pipe *cpipe; 37813907Sdyson int catch; 37913675Sdyson{ 38013776Sdyson int error; 38176364Salfred 38291362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 38391362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 38413675Sdyson cpipe->pipe_state |= PIPE_LWANT; 38591362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 38691362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 38776760Salfred "pipelk", 0); 38876760Salfred if (error != 0) 38976760Salfred return (error); 39013675Sdyson } 39191362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 39276760Salfred return (0); 39313675Sdyson} 39413675Sdyson 39513675Sdyson/* 39613675Sdyson * unlock a pipe I/O lock 39713675Sdyson */ 39813675Sdysonstatic __inline void 39913675Sdysonpipeunlock(cpipe) 40013675Sdyson struct pipe *cpipe; 40113675Sdyson{ 40276364Salfred 40391362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 40491362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 40513675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 40613675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 40714177Sdyson wakeup(cpipe); 40813675Sdyson } 40913675Sdyson} 41013675Sdyson 41114037Sdysonstatic __inline void 41214037Sdysonpipeselwakeup(cpipe) 41314037Sdyson struct pipe *cpipe; 41414037Sdyson{ 41576364Salfred 41614037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 41714037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 41814037Sdyson selwakeup(&cpipe->pipe_sel); 41914037Sdyson } 42041086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 42141086Struckman pgsigio(cpipe->pipe_sigio, SIGIO, 0); 42259288Sjlemon KNOTE(&cpipe->pipe_sel.si_note, 0); 42314037Sdyson} 42414037Sdyson 42513675Sdyson/* ARGSUSED */ 42613675Sdysonstatic int 42783366Sjulianpipe_read(fp, uio, cred, flags, td) 42813675Sdyson struct file *fp; 42913675Sdyson struct uio *uio; 43013675Sdyson struct ucred *cred; 43183366Sjulian struct thread *td; 43245311Sdt int flags; 43313675Sdyson{ 43413675Sdyson struct pipe *rpipe = (struct pipe *) fp->f_data; 43547748Salc int error; 43613675Sdyson int nread = 0; 43718863Sdyson u_int size; 43813675Sdyson 43991362Salfred PIPE_LOCK(rpipe); 44013675Sdyson ++rpipe->pipe_busy; 44147748Salc error = pipelock(rpipe, 1); 44247748Salc if (error) 44347748Salc goto unlocked_error; 44447748Salc 44513675Sdyson while (uio->uio_resid) { 44613907Sdyson /* 44713907Sdyson * normal pipe buffer receive 44813907Sdyson */ 44913675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 45018863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 45113675Sdyson if (size > rpipe->pipe_buffer.cnt) 45213675Sdyson size = rpipe->pipe_buffer.cnt; 45318863Sdyson if (size > (u_int) uio->uio_resid) 45418863Sdyson size = (u_int) uio->uio_resid; 45547748Salc 45691362Salfred PIPE_UNLOCK(rpipe); 45747748Salc error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 45813675Sdyson size, uio); 45991362Salfred PIPE_LOCK(rpipe); 46076760Salfred if (error) 46113675Sdyson break; 46276760Salfred 46313675Sdyson rpipe->pipe_buffer.out += size; 46413675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 46513675Sdyson rpipe->pipe_buffer.out = 0; 46613675Sdyson 46713675Sdyson rpipe->pipe_buffer.cnt -= size; 46847748Salc 46947748Salc /* 47047748Salc * If there is no more to read in the pipe, reset 47147748Salc * its pointers to the beginning. This improves 47247748Salc * cache hit stats. 47347748Salc */ 47447748Salc if (rpipe->pipe_buffer.cnt == 0) { 47547748Salc rpipe->pipe_buffer.in = 0; 47647748Salc rpipe->pipe_buffer.out = 0; 47747748Salc } 47813675Sdyson nread += size; 47914037Sdyson#ifndef PIPE_NODIRECT 48013907Sdyson /* 48113907Sdyson * Direct copy, bypassing a kernel buffer. 48213907Sdyson */ 48313907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 48447748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 48547748Salc caddr_t va; 48618863Sdyson if (size > (u_int) uio->uio_resid) 48718863Sdyson size = (u_int) uio->uio_resid; 48847748Salc 48976760Salfred va = (caddr_t) rpipe->pipe_map.kva + 49076760Salfred rpipe->pipe_map.pos; 49191362Salfred PIPE_UNLOCK(rpipe); 49247748Salc error = uiomove(va, size, uio); 49391362Salfred PIPE_LOCK(rpipe); 49413907Sdyson if (error) 49513907Sdyson break; 49613907Sdyson nread += size; 49713907Sdyson rpipe->pipe_map.pos += size; 49813907Sdyson rpipe->pipe_map.cnt -= size; 49913907Sdyson if (rpipe->pipe_map.cnt == 0) { 50013907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 50113907Sdyson wakeup(rpipe); 50213907Sdyson } 50314037Sdyson#endif 50413675Sdyson } else { 50513675Sdyson /* 50613675Sdyson * detect EOF condition 50776760Salfred * read returns 0 on EOF, no need to set error 50813675Sdyson */ 50976760Salfred if (rpipe->pipe_state & PIPE_EOF) 51013675Sdyson break; 51143623Sdillon 51213675Sdyson /* 51313675Sdyson * If the "write-side" has been blocked, wake it up now. 51413675Sdyson */ 51513675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 51613675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 51713675Sdyson wakeup(rpipe); 51813675Sdyson } 51943623Sdillon 52043623Sdillon /* 52147748Salc * Break if some data was read. 52243623Sdillon */ 52347748Salc if (nread > 0) 52413675Sdyson break; 52516960Sdyson 52643623Sdillon /* 52747748Salc * Unlock the pipe buffer for our remaining processing. We 52847748Salc * will either break out with an error or we will sleep and 52947748Salc * relock to loop. 53043623Sdillon */ 53147748Salc pipeunlock(rpipe); 53243623Sdillon 53313675Sdyson /* 53447748Salc * Handle non-blocking mode operation or 53547748Salc * wait for more data. 53613675Sdyson */ 53776760Salfred if (fp->f_flag & FNONBLOCK) { 53847748Salc error = EAGAIN; 53976760Salfred } else { 54047748Salc rpipe->pipe_state |= PIPE_WANTR; 54191362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 54291362Salfred PRIBIO | PCATCH, 54377140Salfred "piperd", 0)) == 0) 54447748Salc error = pipelock(rpipe, 1); 54513675Sdyson } 54647748Salc if (error) 54747748Salc goto unlocked_error; 54813675Sdyson } 54913675Sdyson } 55047748Salc pipeunlock(rpipe); 55113675Sdyson 55291362Salfred /* XXX: should probably do this before getting any locks. */ 55324101Sbde if (error == 0) 55455112Sbde vfs_timestamp(&rpipe->pipe_atime); 55547748Salcunlocked_error: 55647748Salc --rpipe->pipe_busy; 55713913Sdyson 55847748Salc /* 55947748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 56047748Salc */ 56113675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 56213675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 56313675Sdyson wakeup(rpipe); 56413675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 56513675Sdyson /* 56647748Salc * Handle write blocking hysteresis. 56713675Sdyson */ 56813675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 56913675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 57013675Sdyson wakeup(rpipe); 57113675Sdyson } 57213675Sdyson } 57314037Sdyson 57414802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 57514037Sdyson pipeselwakeup(rpipe); 57614037Sdyson 57791362Salfred PIPE_UNLOCK(rpipe); 57876760Salfred return (error); 57913675Sdyson} 58013675Sdyson 58114037Sdyson#ifndef PIPE_NODIRECT 58213907Sdyson/* 58313907Sdyson * Map the sending processes' buffer into kernel space and wire it. 58413907Sdyson * This is similar to a physical write operation. 58513907Sdyson */ 58613675Sdysonstatic int 58713907Sdysonpipe_build_write_buffer(wpipe, uio) 58813907Sdyson struct pipe *wpipe; 58913675Sdyson struct uio *uio; 59013675Sdyson{ 59118863Sdyson u_int size; 59213907Sdyson int i; 59313907Sdyson vm_offset_t addr, endaddr, paddr; 59413907Sdyson 59579224Sdillon GIANT_REQUIRED; 59691412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 59779224Sdillon 59818863Sdyson size = (u_int) uio->uio_iov->iov_len; 59913907Sdyson if (size > wpipe->pipe_buffer.size) 60013907Sdyson size = wpipe->pipe_buffer.size; 60113907Sdyson 60240286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 60376760Salfred addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 60476760Salfred for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 60513907Sdyson vm_page_t m; 60613907Sdyson 60751474Sdillon if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 60851474Sdillon (paddr = pmap_kextract(addr)) == 0) { 60913907Sdyson int j; 61076760Salfred 61176760Salfred for (j = 0; j < i; j++) 61240700Sdg vm_page_unwire(wpipe->pipe_map.ms[j], 1); 61376760Salfred return (EFAULT); 61413907Sdyson } 61513907Sdyson 61613907Sdyson m = PHYS_TO_VM_PAGE(paddr); 61713907Sdyson vm_page_wire(m); 61813907Sdyson wpipe->pipe_map.ms[i] = m; 61913907Sdyson } 62013907Sdyson 62113907Sdyson/* 62213907Sdyson * set up the control block 62313907Sdyson */ 62413907Sdyson wpipe->pipe_map.npages = i; 62576760Salfred wpipe->pipe_map.pos = 62676760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 62713907Sdyson wpipe->pipe_map.cnt = size; 62813907Sdyson 62913907Sdyson/* 63013907Sdyson * and map the buffer 63113907Sdyson */ 63213907Sdyson if (wpipe->pipe_map.kva == 0) { 63313912Sdyson /* 63413912Sdyson * We need to allocate space for an extra page because the 63513912Sdyson * address range might (will) span pages at times. 63613912Sdyson */ 63713907Sdyson wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 63813912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 63913912Sdyson amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 64013907Sdyson } 64113907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 64213907Sdyson wpipe->pipe_map.npages); 64313907Sdyson 64413907Sdyson/* 64513907Sdyson * and update the uio data 64613907Sdyson */ 64713907Sdyson 64813907Sdyson uio->uio_iov->iov_len -= size; 64913907Sdyson uio->uio_iov->iov_base += size; 65013907Sdyson if (uio->uio_iov->iov_len == 0) 65113907Sdyson uio->uio_iov++; 65213907Sdyson uio->uio_resid -= size; 65313907Sdyson uio->uio_offset += size; 65476760Salfred return (0); 65513907Sdyson} 65613907Sdyson 65713907Sdyson/* 65813907Sdyson * unmap and unwire the process buffer 65913907Sdyson */ 66013907Sdysonstatic void 66113907Sdysonpipe_destroy_write_buffer(wpipe) 66276760Salfred struct pipe *wpipe; 66313907Sdyson{ 66413907Sdyson int i; 66576364Salfred 66679224Sdillon GIANT_REQUIRED; 66791412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 66879224Sdillon 66917163Sdyson if (wpipe->pipe_map.kva) { 67017163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 67113907Sdyson 67213907Sdyson if (amountpipekva > MAXPIPEKVA) { 67313907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 67413907Sdyson wpipe->pipe_map.kva = 0; 67513907Sdyson kmem_free(kernel_map, kva, 67613912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 67713912Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 67813907Sdyson } 67913907Sdyson } 68076760Salfred for (i = 0; i < wpipe->pipe_map.npages; i++) 68140700Sdg vm_page_unwire(wpipe->pipe_map.ms[i], 1); 68213907Sdyson} 68313907Sdyson 68413907Sdyson/* 68513907Sdyson * In the case of a signal, the writing process might go away. This 68613907Sdyson * code copies the data into the circular buffer so that the source 68713907Sdyson * pages can be freed without loss of data. 68813907Sdyson */ 68913907Sdysonstatic void 69013907Sdysonpipe_clone_write_buffer(wpipe) 69176364Salfred struct pipe *wpipe; 69213907Sdyson{ 69313907Sdyson int size; 69413907Sdyson int pos; 69513907Sdyson 69691362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 69713907Sdyson size = wpipe->pipe_map.cnt; 69813907Sdyson pos = wpipe->pipe_map.pos; 69976760Salfred bcopy((caddr_t) wpipe->pipe_map.kva + pos, 70076760Salfred (caddr_t) wpipe->pipe_buffer.buffer, size); 70113907Sdyson 70213907Sdyson wpipe->pipe_buffer.in = size; 70313907Sdyson wpipe->pipe_buffer.out = 0; 70413907Sdyson wpipe->pipe_buffer.cnt = size; 70513907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 70613907Sdyson 70791412Salfred PIPE_GET_GIANT(wpipe); 70813907Sdyson pipe_destroy_write_buffer(wpipe); 70991412Salfred PIPE_DROP_GIANT(wpipe); 71013907Sdyson} 71113907Sdyson 71213907Sdyson/* 71313907Sdyson * This implements the pipe buffer write mechanism. Note that only 71413907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 71513907Sdyson * If there are any characters in the pipe buffer, the direct write will 71613907Sdyson * be deferred until the receiving process grabs all of the bytes from 71713907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 71813907Sdyson */ 71913907Sdysonstatic int 72013907Sdysonpipe_direct_write(wpipe, uio) 72113907Sdyson struct pipe *wpipe; 72213907Sdyson struct uio *uio; 72313907Sdyson{ 72413907Sdyson int error; 72576364Salfred 72613951Sdysonretry: 72791362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 72813907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 72976760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 73013951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 73113951Sdyson wakeup(wpipe); 73213951Sdyson } 73313992Sdyson wpipe->pipe_state |= PIPE_WANTW; 73491362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 73591362Salfred PRIBIO | PCATCH, "pipdww", 0); 73614802Sdyson if (error) 73713907Sdyson goto error1; 73814802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 73914802Sdyson error = EPIPE; 74014802Sdyson goto error1; 74114802Sdyson } 74213907Sdyson } 74313907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 74413951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 74576760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 74613951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 74713951Sdyson wakeup(wpipe); 74813951Sdyson } 74913951Sdyson 75013992Sdyson wpipe->pipe_state |= PIPE_WANTW; 75191362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 75291362Salfred PRIBIO | PCATCH, "pipdwc", 0); 75314802Sdyson if (error) 75413907Sdyson goto error1; 75514802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 75614802Sdyson error = EPIPE; 75714802Sdyson goto error1; 75813907Sdyson } 75913951Sdyson goto retry; 76013907Sdyson } 76113907Sdyson 76213951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 76313951Sdyson 76491362Salfred PIPE_GET_GIANT(wpipe); 76513907Sdyson error = pipe_build_write_buffer(wpipe, uio); 76691362Salfred PIPE_DROP_GIANT(wpipe); 76713907Sdyson if (error) { 76813907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 76913907Sdyson goto error1; 77013907Sdyson } 77113907Sdyson 77213907Sdyson error = 0; 77313907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 77413907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 77513907Sdyson pipelock(wpipe, 0); 77691362Salfred PIPE_GET_GIANT(wpipe); 77713907Sdyson pipe_destroy_write_buffer(wpipe); 77891362Salfred PIPE_DROP_GIANT(wpipe); 77913907Sdyson pipeunlock(wpipe); 78014037Sdyson pipeselwakeup(wpipe); 78114802Sdyson error = EPIPE; 78214802Sdyson goto error1; 78313907Sdyson } 78413992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 78513992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 78613992Sdyson wakeup(wpipe); 78713992Sdyson } 78814037Sdyson pipeselwakeup(wpipe); 78991362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 79091362Salfred "pipdwt", 0); 79113907Sdyson } 79213907Sdyson 79313907Sdyson pipelock(wpipe,0); 79413907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 79513907Sdyson /* 79613907Sdyson * this bit of trickery substitutes a kernel buffer for 79713907Sdyson * the process that might be going away. 79813907Sdyson */ 79913907Sdyson pipe_clone_write_buffer(wpipe); 80013907Sdyson } else { 80191412Salfred PIPE_GET_GIANT(wpipe); 80213907Sdyson pipe_destroy_write_buffer(wpipe); 80391412Salfred PIPE_DROP_GIANT(wpipe); 80413907Sdyson } 80513907Sdyson pipeunlock(wpipe); 80676760Salfred return (error); 80713907Sdyson 80813907Sdysonerror1: 80913907Sdyson wakeup(wpipe); 81076760Salfred return (error); 81113907Sdyson} 81214037Sdyson#endif 81313907Sdyson 81416960Sdysonstatic int 81583366Sjulianpipe_write(fp, uio, cred, flags, td) 81616960Sdyson struct file *fp; 81713907Sdyson struct uio *uio; 81816960Sdyson struct ucred *cred; 81983366Sjulian struct thread *td; 82045311Sdt int flags; 82113907Sdyson{ 82213675Sdyson int error = 0; 82313913Sdyson int orig_resid; 82416960Sdyson struct pipe *wpipe, *rpipe; 82516960Sdyson 82616960Sdyson rpipe = (struct pipe *) fp->f_data; 82716960Sdyson wpipe = rpipe->pipe_peer; 82816960Sdyson 82991395Salfred PIPE_LOCK(rpipe); 83013675Sdyson /* 83113675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 83213675Sdyson */ 83316960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 83491395Salfred PIPE_UNLOCK(rpipe); 83576760Salfred return (EPIPE); 83613675Sdyson } 83777676Sdillon ++wpipe->pipe_busy; 83813675Sdyson 83917163Sdyson /* 84017163Sdyson * If it is advantageous to resize the pipe buffer, do 84117163Sdyson * so. 84217163Sdyson */ 84317163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 84417163Sdyson (nbigpipe < LIMITBIGPIPES) && 84517163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 84617163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 84717163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 84817163Sdyson 84913907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 85091395Salfred PIPE_GET_GIANT(rpipe); 85176364Salfred if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 85276364Salfred nbigpipe++; 85391395Salfred PIPE_DROP_GIANT(rpipe); 85413907Sdyson pipeunlock(wpipe); 85513907Sdyson } 85613907Sdyson } 85777676Sdillon 85877676Sdillon /* 85977676Sdillon * If an early error occured unbusy and return, waking up any pending 86077676Sdillon * readers. 86177676Sdillon */ 86277676Sdillon if (error) { 86377676Sdillon --wpipe->pipe_busy; 86477676Sdillon if ((wpipe->pipe_busy == 0) && 86577676Sdillon (wpipe->pipe_state & PIPE_WANT)) { 86677676Sdillon wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 86777676Sdillon wakeup(wpipe); 86877676Sdillon } 86991395Salfred PIPE_UNLOCK(rpipe); 87077676Sdillon return(error); 87177676Sdillon } 87276364Salfred 87376364Salfred KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 87413907Sdyson 87513913Sdyson orig_resid = uio->uio_resid; 87677676Sdillon 87713675Sdyson while (uio->uio_resid) { 87813907Sdyson int space; 87976760Salfred 88014037Sdyson#ifndef PIPE_NODIRECT 88113907Sdyson /* 88213907Sdyson * If the transfer is large, we can gain performance if 88313907Sdyson * we do process-to-process copies directly. 88416416Sdyson * If the write is non-blocking, we don't use the 88516416Sdyson * direct write mechanism. 88658505Sdillon * 88758505Sdillon * The direct write mechanism will detect the reader going 88858505Sdillon * away on us. 88913907Sdyson */ 89017163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 89117163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 89217163Sdyson (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 89313907Sdyson (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 89413907Sdyson error = pipe_direct_write( wpipe, uio); 89576760Salfred if (error) 89613907Sdyson break; 89713907Sdyson continue; 89891362Salfred } 89914037Sdyson#endif 90013907Sdyson 90113907Sdyson /* 90213907Sdyson * Pipe buffered writes cannot be coincidental with 90313907Sdyson * direct writes. We wait until the currently executing 90413907Sdyson * direct write is completed before we start filling the 90558505Sdillon * pipe buffer. We break out if a signal occurs or the 90658505Sdillon * reader goes away. 90713907Sdyson */ 90813907Sdyson retrywrite: 90913907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 91013992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 91113992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 91213992Sdyson wakeup(wpipe); 91313992Sdyson } 91491395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 91591362Salfred "pipbww", 0); 91658505Sdillon if (wpipe->pipe_state & PIPE_EOF) 91758505Sdillon break; 91813907Sdyson if (error) 91913907Sdyson break; 92013907Sdyson } 92158505Sdillon if (wpipe->pipe_state & PIPE_EOF) { 92258505Sdillon error = EPIPE; 92358505Sdillon break; 92458505Sdillon } 92513907Sdyson 92613907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 92714644Sdyson 92814644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 92913913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 93013913Sdyson space = 0; 93113907Sdyson 93217163Sdyson if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 93313907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 93454534Stegge int size; /* Transfer size */ 93554534Stegge int segsize; /* first segment to transfer */ 93676760Salfred 93713907Sdyson /* 93813907Sdyson * It is possible for a direct write to 93913907Sdyson * slip in on us... handle it here... 94013907Sdyson */ 94113907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 94213907Sdyson pipeunlock(wpipe); 94313907Sdyson goto retrywrite; 94413907Sdyson } 94554534Stegge /* 94654534Stegge * If a process blocked in uiomove, our 94754534Stegge * value for space might be bad. 94858505Sdillon * 94958505Sdillon * XXX will we be ok if the reader has gone 95058505Sdillon * away here? 95154534Stegge */ 95254534Stegge if (space > wpipe->pipe_buffer.size - 95354534Stegge wpipe->pipe_buffer.cnt) { 95454534Stegge pipeunlock(wpipe); 95554534Stegge goto retrywrite; 95654534Stegge } 95754534Stegge 95854534Stegge /* 95954534Stegge * Transfer size is minimum of uio transfer 96054534Stegge * and free space in pipe buffer. 96154534Stegge */ 96254534Stegge if (space > uio->uio_resid) 96354534Stegge size = uio->uio_resid; 96454534Stegge else 96554534Stegge size = space; 96654534Stegge /* 96754534Stegge * First segment to transfer is minimum of 96854534Stegge * transfer size and contiguous space in 96954534Stegge * pipe buffer. If first segment to transfer 97054534Stegge * is less than the transfer size, we've got 97154534Stegge * a wraparound in the buffer. 97254534Stegge */ 97354534Stegge segsize = wpipe->pipe_buffer.size - 97454534Stegge wpipe->pipe_buffer.in; 97554534Stegge if (segsize > size) 97654534Stegge segsize = size; 97754534Stegge 97854534Stegge /* Transfer first segment */ 97954534Stegge 98091395Salfred PIPE_UNLOCK(rpipe); 98154534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 98254534Stegge segsize, uio); 98391395Salfred PIPE_LOCK(rpipe); 98454534Stegge 98554534Stegge if (error == 0 && segsize < size) { 98654534Stegge /* 98754534Stegge * Transfer remaining part now, to 98854534Stegge * support atomic writes. Wraparound 98954534Stegge * happened. 99054534Stegge */ 99154534Stegge if (wpipe->pipe_buffer.in + segsize != 99254534Stegge wpipe->pipe_buffer.size) 99354534Stegge panic("Expected pipe buffer wraparound disappeared"); 99454534Stegge 99591395Salfred PIPE_UNLOCK(rpipe); 99654534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[0], 99754534Stegge size - segsize, uio); 99891395Salfred PIPE_LOCK(rpipe); 99954534Stegge } 100054534Stegge if (error == 0) { 100154534Stegge wpipe->pipe_buffer.in += size; 100254534Stegge if (wpipe->pipe_buffer.in >= 100354534Stegge wpipe->pipe_buffer.size) { 100454534Stegge if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 100554534Stegge panic("Expected wraparound bad"); 100654534Stegge wpipe->pipe_buffer.in = size - segsize; 100754534Stegge } 100854534Stegge 100954534Stegge wpipe->pipe_buffer.cnt += size; 101054534Stegge if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 101154534Stegge panic("Pipe buffer overflow"); 101254534Stegge 101354534Stegge } 101413675Sdyson pipeunlock(wpipe); 101513675Sdyson } 101613675Sdyson if (error) 101713675Sdyson break; 101813675Sdyson 101913675Sdyson } else { 102013675Sdyson /* 102113675Sdyson * If the "read-side" has been blocked, wake it up now. 102213675Sdyson */ 102313675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 102413675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 102513675Sdyson wakeup(wpipe); 102613675Sdyson } 102714037Sdyson 102813675Sdyson /* 102913675Sdyson * don't block on non-blocking I/O 103013675Sdyson */ 103116960Sdyson if (fp->f_flag & FNONBLOCK) { 103213907Sdyson error = EAGAIN; 103313675Sdyson break; 103413675Sdyson } 103513907Sdyson 103614037Sdyson /* 103714037Sdyson * We have no more space and have something to offer, 103829356Speter * wake up select/poll. 103914037Sdyson */ 104014037Sdyson pipeselwakeup(wpipe); 104114037Sdyson 104213675Sdyson wpipe->pipe_state |= PIPE_WANTW; 104391395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 104491362Salfred PRIBIO | PCATCH, "pipewr", 0); 104576760Salfred if (error != 0) 104613675Sdyson break; 104713675Sdyson /* 104813675Sdyson * If read side wants to go away, we just issue a signal 104913675Sdyson * to ourselves. 105013675Sdyson */ 105113675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 105213774Sdyson error = EPIPE; 105313907Sdyson break; 105413675Sdyson } 105513675Sdyson } 105613675Sdyson } 105713675Sdyson 105814644Sdyson --wpipe->pipe_busy; 105977676Sdillon 106076760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 106176760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 106213675Sdyson wakeup(wpipe); 106313675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 106413675Sdyson /* 106513675Sdyson * If we have put any characters in the buffer, we wake up 106613675Sdyson * the reader. 106713675Sdyson */ 106813675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 106913675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 107013675Sdyson wakeup(wpipe); 107113675Sdyson } 107213675Sdyson } 107313909Sdyson 107413909Sdyson /* 107513909Sdyson * Don't return EPIPE if I/O was successful 107613909Sdyson */ 107713907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 107877676Sdillon (uio->uio_resid == 0) && 107977676Sdillon (error == EPIPE)) { 108013907Sdyson error = 0; 108177676Sdillon } 108213913Sdyson 108324101Sbde if (error == 0) 108455112Sbde vfs_timestamp(&wpipe->pipe_mtime); 108524101Sbde 108614037Sdyson /* 108714037Sdyson * We have something to offer, 108829356Speter * wake up select/poll. 108914037Sdyson */ 109014177Sdyson if (wpipe->pipe_buffer.cnt) 109114037Sdyson pipeselwakeup(wpipe); 109213907Sdyson 109391395Salfred PIPE_UNLOCK(rpipe); 109476760Salfred return (error); 109513675Sdyson} 109613675Sdyson 109713675Sdyson/* 109813675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 109913675Sdyson */ 110013675Sdysonint 110183366Sjulianpipe_ioctl(fp, cmd, data, td) 110213675Sdyson struct file *fp; 110336735Sdfr u_long cmd; 110476364Salfred caddr_t data; 110583366Sjulian struct thread *td; 110613675Sdyson{ 110776364Salfred struct pipe *mpipe = (struct pipe *)fp->f_data; 110813675Sdyson 110913675Sdyson switch (cmd) { 111013675Sdyson 111113675Sdyson case FIONBIO: 111213675Sdyson return (0); 111313675Sdyson 111413675Sdyson case FIOASYNC: 111591362Salfred PIPE_LOCK(mpipe); 111613675Sdyson if (*(int *)data) { 111713675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 111813675Sdyson } else { 111913675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 112013675Sdyson } 112191362Salfred PIPE_UNLOCK(mpipe); 112213675Sdyson return (0); 112313675Sdyson 112413675Sdyson case FIONREAD: 112591362Salfred PIPE_LOCK(mpipe); 112614037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 112714037Sdyson *(int *)data = mpipe->pipe_map.cnt; 112814037Sdyson else 112914037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 113091362Salfred PIPE_UNLOCK(mpipe); 113113675Sdyson return (0); 113213675Sdyson 113341086Struckman case FIOSETOWN: 113441086Struckman return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 113541086Struckman 113641086Struckman case FIOGETOWN: 113741086Struckman *(int *)data = fgetown(mpipe->pipe_sigio); 113813675Sdyson return (0); 113913675Sdyson 114041086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 114141086Struckman case TIOCSPGRP: 114241086Struckman return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 114341086Struckman 114441086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 114518863Sdyson case TIOCGPGRP: 114641086Struckman *(int *)data = -fgetown(mpipe->pipe_sigio); 114713675Sdyson return (0); 114813675Sdyson 114913675Sdyson } 115017124Sbde return (ENOTTY); 115113675Sdyson} 115213675Sdyson 115313675Sdysonint 115483366Sjulianpipe_poll(fp, events, cred, td) 115513675Sdyson struct file *fp; 115629356Speter int events; 115729356Speter struct ucred *cred; 115883366Sjulian struct thread *td; 115913675Sdyson{ 116076364Salfred struct pipe *rpipe = (struct pipe *)fp->f_data; 116113675Sdyson struct pipe *wpipe; 116229356Speter int revents = 0; 116313675Sdyson 116413675Sdyson wpipe = rpipe->pipe_peer; 116591362Salfred PIPE_LOCK(rpipe); 116629356Speter if (events & (POLLIN | POLLRDNORM)) 116729356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 116829356Speter (rpipe->pipe_buffer.cnt > 0) || 116929356Speter (rpipe->pipe_state & PIPE_EOF)) 117029356Speter revents |= events & (POLLIN | POLLRDNORM); 117113675Sdyson 117229356Speter if (events & (POLLOUT | POLLWRNORM)) 117329356Speter if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 117443311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 117543311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 117629356Speter revents |= events & (POLLOUT | POLLWRNORM); 117713675Sdyson 117829356Speter if ((rpipe->pipe_state & PIPE_EOF) || 117929356Speter (wpipe == NULL) || 118029356Speter (wpipe->pipe_state & PIPE_EOF)) 118129356Speter revents |= POLLHUP; 118229356Speter 118329356Speter if (revents == 0) { 118429356Speter if (events & (POLLIN | POLLRDNORM)) { 118583805Sjhb selrecord(td, &rpipe->pipe_sel); 118629356Speter rpipe->pipe_state |= PIPE_SEL; 118713675Sdyson } 118813675Sdyson 118929356Speter if (events & (POLLOUT | POLLWRNORM)) { 119083805Sjhb selrecord(td, &wpipe->pipe_sel); 119130164Speter wpipe->pipe_state |= PIPE_SEL; 119213907Sdyson } 119313675Sdyson } 119491362Salfred PIPE_UNLOCK(rpipe); 119529356Speter 119629356Speter return (revents); 119713675Sdyson} 119813675Sdyson 119952983Speterstatic int 120083366Sjulianpipe_stat(fp, ub, td) 120152983Speter struct file *fp; 120252983Speter struct stat *ub; 120383366Sjulian struct thread *td; 120413675Sdyson{ 120552983Speter struct pipe *pipe = (struct pipe *)fp->f_data; 120652983Speter 120776760Salfred bzero((caddr_t)ub, sizeof(*ub)); 120817124Sbde ub->st_mode = S_IFIFO; 120913907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 121013675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 121113675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 121234901Sphk ub->st_atimespec = pipe->pipe_atime; 121334901Sphk ub->st_mtimespec = pipe->pipe_mtime; 121434901Sphk ub->st_ctimespec = pipe->pipe_ctime; 121560404Schris ub->st_uid = fp->f_cred->cr_uid; 121660404Schris ub->st_gid = fp->f_cred->cr_gid; 121717124Sbde /* 121860404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 121917124Sbde * XXX (st_dev, st_ino) should be unique. 122017124Sbde */ 122176760Salfred return (0); 122213675Sdyson} 122313675Sdyson 122413675Sdyson/* ARGSUSED */ 122513675Sdysonstatic int 122683366Sjulianpipe_close(fp, td) 122713675Sdyson struct file *fp; 122883366Sjulian struct thread *td; 122913675Sdyson{ 123013675Sdyson struct pipe *cpipe = (struct pipe *)fp->f_data; 123116322Sgpalmer 123249413Sgreen fp->f_ops = &badfileops; 123349413Sgreen fp->f_data = NULL; 123441086Struckman funsetown(cpipe->pipe_sigio); 123513675Sdyson pipeclose(cpipe); 123676760Salfred return (0); 123713675Sdyson} 123813675Sdyson 123976364Salfredstatic void 124076364Salfredpipe_free_kmem(cpipe) 124176364Salfred struct pipe *cpipe; 124276364Salfred{ 124391412Salfred 124479224Sdillon GIANT_REQUIRED; 124591412Salfred KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 124691412Salfred ("pipespace: pipe mutex locked")); 124776364Salfred 124876364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 124976364Salfred if (cpipe->pipe_buffer.size > PIPE_SIZE) 125076364Salfred --nbigpipe; 125176364Salfred amountpipekva -= cpipe->pipe_buffer.size; 125276364Salfred kmem_free(kernel_map, 125376364Salfred (vm_offset_t)cpipe->pipe_buffer.buffer, 125476364Salfred cpipe->pipe_buffer.size); 125576364Salfred cpipe->pipe_buffer.buffer = NULL; 125676364Salfred } 125776364Salfred#ifndef PIPE_NODIRECT 125876364Salfred if (cpipe->pipe_map.kva != NULL) { 125976364Salfred amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 126076364Salfred kmem_free(kernel_map, 126176364Salfred cpipe->pipe_map.kva, 126276364Salfred cpipe->pipe_buffer.size + PAGE_SIZE); 126376364Salfred cpipe->pipe_map.cnt = 0; 126476364Salfred cpipe->pipe_map.kva = 0; 126576364Salfred cpipe->pipe_map.pos = 0; 126676364Salfred cpipe->pipe_map.npages = 0; 126776364Salfred } 126876364Salfred#endif 126976364Salfred} 127076364Salfred 127113675Sdyson/* 127213675Sdyson * shutdown the pipe 127313675Sdyson */ 127413675Sdysonstatic void 127513675Sdysonpipeclose(cpipe) 127613675Sdyson struct pipe *cpipe; 127713675Sdyson{ 127813907Sdyson struct pipe *ppipe; 127976364Salfred 128013675Sdyson if (cpipe) { 128191362Salfred PIPE_LOCK(cpipe); 128213907Sdyson 128314037Sdyson pipeselwakeup(cpipe); 128413907Sdyson 128513675Sdyson /* 128613675Sdyson * If the other side is blocked, wake it up saying that 128713675Sdyson * we want to close it down. 128813675Sdyson */ 128913675Sdyson while (cpipe->pipe_busy) { 129013675Sdyson wakeup(cpipe); 129176760Salfred cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 129291362Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 129313675Sdyson } 129413675Sdyson 129513675Sdyson /* 129613675Sdyson * Disconnect from peer 129713675Sdyson */ 129843301Sdillon if ((ppipe = cpipe->pipe_peer) != NULL) { 129914037Sdyson pipeselwakeup(ppipe); 130013907Sdyson 130113907Sdyson ppipe->pipe_state |= PIPE_EOF; 130213907Sdyson wakeup(ppipe); 130386598Ssobomax KNOTE(&ppipe->pipe_sel.si_note, 0); 130413907Sdyson ppipe->pipe_peer = NULL; 130513675Sdyson } 130613675Sdyson /* 130713675Sdyson * free resources 130813675Sdyson */ 130991362Salfred PIPE_UNLOCK(cpipe); 131091362Salfred mtx_lock(&Giant); 131176364Salfred pipe_free_kmem(cpipe); 131227899Sdyson zfree(pipe_zone, cpipe); 131391362Salfred mtx_unlock(&Giant); 131413675Sdyson } 131513675Sdyson} 131659288Sjlemon 131772521Sjlemon/*ARGSUSED*/ 131859288Sjlemonstatic int 131972521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 132059288Sjlemon{ 132189306Salfred struct pipe *cpipe; 132259288Sjlemon 132389306Salfred cpipe = (struct pipe *)kn->kn_fp->f_data; 132472521Sjlemon switch (kn->kn_filter) { 132572521Sjlemon case EVFILT_READ: 132672521Sjlemon kn->kn_fop = &pipe_rfiltops; 132772521Sjlemon break; 132872521Sjlemon case EVFILT_WRITE: 132972521Sjlemon kn->kn_fop = &pipe_wfiltops; 133078292Sjlemon cpipe = cpipe->pipe_peer; 133172521Sjlemon break; 133272521Sjlemon default: 133372521Sjlemon return (1); 133472521Sjlemon } 133578292Sjlemon kn->kn_hook = (caddr_t)cpipe; 133678292Sjlemon 133791372Salfred PIPE_LOCK(cpipe); 133878292Sjlemon SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 133991372Salfred PIPE_UNLOCK(cpipe); 134059288Sjlemon return (0); 134159288Sjlemon} 134259288Sjlemon 134359288Sjlemonstatic void 134459288Sjlemonfilt_pipedetach(struct knote *kn) 134559288Sjlemon{ 134678292Sjlemon struct pipe *cpipe = (struct pipe *)kn->kn_hook; 134759288Sjlemon 134891372Salfred PIPE_LOCK(cpipe); 134978292Sjlemon SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 135091372Salfred PIPE_UNLOCK(cpipe); 135159288Sjlemon} 135259288Sjlemon 135359288Sjlemon/*ARGSUSED*/ 135459288Sjlemonstatic int 135559288Sjlemonfilt_piperead(struct knote *kn, long hint) 135659288Sjlemon{ 135759288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 135859288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 135959288Sjlemon 136091372Salfred PIPE_LOCK(rpipe); 136159288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 136259288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 136359288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 136459288Sjlemon 136559288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 136659288Sjlemon (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 136791372Salfred kn->kn_flags |= EV_EOF; 136891372Salfred PIPE_UNLOCK(rpipe); 136959288Sjlemon return (1); 137059288Sjlemon } 137191372Salfred PIPE_UNLOCK(rpipe); 137259288Sjlemon return (kn->kn_data > 0); 137359288Sjlemon} 137459288Sjlemon 137559288Sjlemon/*ARGSUSED*/ 137659288Sjlemonstatic int 137759288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 137859288Sjlemon{ 137959288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 138059288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 138159288Sjlemon 138291372Salfred PIPE_LOCK(rpipe); 138359288Sjlemon if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 138459288Sjlemon kn->kn_data = 0; 138559288Sjlemon kn->kn_flags |= EV_EOF; 138691372Salfred PIPE_UNLOCK(rpipe); 138759288Sjlemon return (1); 138859288Sjlemon } 138959288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 139065855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 139159288Sjlemon kn->kn_data = 0; 139259288Sjlemon 139391372Salfred PIPE_UNLOCK(rpipe); 139459288Sjlemon return (kn->kn_data >= PIPE_BUF); 139559288Sjlemon} 1396