sys_pipe.c revision 92751
113675Sdyson/* 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson * 1950477Speter * $FreeBSD: head/sys/kern/sys_pipe.c 92751 2002-03-20 04:09:59Z jeff $ 2013675Sdyson */ 2113675Sdyson 2213675Sdyson/* 2313675Sdyson * This file contains a high-performance replacement for the socket-based 2413675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2513675Sdyson * all features of sockets, but does do everything that pipes normally 2613675Sdyson * do. 2713675Sdyson */ 2813675Sdyson 2913907Sdyson/* 3013907Sdyson * This code has two modes of operation, a small write mode and a large 3113907Sdyson * write mode. The small write mode acts like conventional pipes with 3213907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3313907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3413907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3513907Sdyson * the receiving process can copy it directly from the pages in the sending 3613907Sdyson * process. 3713907Sdyson * 3813907Sdyson * If the sending process receives a signal, it is possible that it will 3913913Sdyson * go away, and certainly its address space can change, because control 4013907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4113907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4213907Sdyson * kernel buffer, and the receiving process will grab the data from the 4313907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4413907Sdyson * the copy operation is normally eliminated. 4513907Sdyson * 4613907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4713907Sdyson * happen for small transfers so that the system will not spend all of 4813913Sdyson * its time context switching. PIPE_SIZE is constrained by the 4913907Sdyson * amount of kernel virtual memory. 5013907Sdyson */ 5113907Sdyson 5213675Sdyson#include <sys/param.h> 5313675Sdyson#include <sys/systm.h> 5424131Sbde#include <sys/fcntl.h> 5513675Sdyson#include <sys/file.h> 5613675Sdyson#include <sys/filedesc.h> 5724206Sbde#include <sys/filio.h> 5891372Salfred#include <sys/kernel.h> 5976166Smarkm#include <sys/lock.h> 6076827Salfred#include <sys/mutex.h> 6124206Sbde#include <sys/ttycom.h> 6213675Sdyson#include <sys/stat.h> 6391968Salfred#include <sys/malloc.h> 6429356Speter#include <sys/poll.h> 6570834Swollman#include <sys/selinfo.h> 6613675Sdyson#include <sys/signalvar.h> 6713675Sdyson#include <sys/sysproto.h> 6813675Sdyson#include <sys/pipe.h> 6976166Smarkm#include <sys/proc.h> 7055112Sbde#include <sys/vnode.h> 7134924Sbde#include <sys/uio.h> 7259288Sjlemon#include <sys/event.h> 7313675Sdyson 7413675Sdyson#include <vm/vm.h> 7513675Sdyson#include <vm/vm_param.h> 7613675Sdyson#include <vm/vm_object.h> 7713675Sdyson#include <vm/vm_kern.h> 7813675Sdyson#include <vm/vm_extern.h> 7913675Sdyson#include <vm/pmap.h> 8013675Sdyson#include <vm/vm_map.h> 8113907Sdyson#include <vm/vm_page.h> 8292751Sjeff#include <vm/uma.h> 8313675Sdyson 8414037Sdyson/* 8514037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 8614037Sdyson * approx 30% decrease in transfer rate. This could be useful for 8714037Sdyson * NetBSD or OpenBSD. 8814037Sdyson */ 8914037Sdyson/* #define PIPE_NODIRECT */ 9014037Sdyson 9114037Sdyson/* 9214037Sdyson * interfaces to the outside world 9314037Sdyson */ 9491413Salfredstatic int pipe_read(struct file *fp, struct uio *uio, 9591413Salfred struct ucred *cred, int flags, struct thread *td); 9691413Salfredstatic int pipe_write(struct file *fp, struct uio *uio, 9791413Salfred struct ucred *cred, int flags, struct thread *td); 9891413Salfredstatic int pipe_close(struct file *fp, struct thread *td); 9991413Salfredstatic int pipe_poll(struct file *fp, int events, struct ucred *cred, 10091413Salfred struct thread *td); 10191413Salfredstatic int pipe_kqfilter(struct file *fp, struct knote *kn); 10291413Salfredstatic int pipe_stat(struct file *fp, struct stat *sb, struct thread *td); 10391413Salfredstatic int pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct thread *td); 10413675Sdyson 10572521Sjlemonstatic struct fileops pipeops = { 10672521Sjlemon pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 10772521Sjlemon pipe_stat, pipe_close 10872521Sjlemon}; 10913675Sdyson 11059288Sjlemonstatic void filt_pipedetach(struct knote *kn); 11159288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 11259288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 11359288Sjlemon 11472521Sjlemonstatic struct filterops pipe_rfiltops = 11572521Sjlemon { 1, NULL, filt_pipedetach, filt_piperead }; 11672521Sjlemonstatic struct filterops pipe_wfiltops = 11772521Sjlemon { 1, NULL, filt_pipedetach, filt_pipewrite }; 11859288Sjlemon 11992305Salfred#define PIPE_GET_GIANT(pipe) \ 12091362Salfred do { \ 12192305Salfred KASSERT(((pipe)->pipe_state & PIPE_LOCKFL) != 0, \ 12292305Salfred ("%s:%d PIPE_GET_GIANT: line pipe not locked", \ 12392305Salfred __FILE__, __LINE__)); \ 12492305Salfred PIPE_UNLOCK(pipe); \ 12591362Salfred mtx_lock(&Giant); \ 12691362Salfred } while (0) 12772521Sjlemon 12891362Salfred#define PIPE_DROP_GIANT(pipe) \ 12991362Salfred do { \ 13091362Salfred mtx_unlock(&Giant); \ 13192305Salfred PIPE_LOCK(pipe); \ 13291362Salfred } while (0) 13391362Salfred 13413675Sdyson/* 13513675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 13613675Sdyson * space is pageable. The pipe code will try to maintain locality of 13713675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 13813675Sdyson * will not wipe the cache. 13913675Sdyson */ 14013907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 14113907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 14213675Sdyson 14313907Sdyson/* 14413907Sdyson * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 14513907Sdyson * is there so that on large systems, we don't exhaust it. 14613907Sdyson */ 14713907Sdyson#define MAXPIPEKVA (8*1024*1024) 14813907Sdyson 14913907Sdyson/* 15013907Sdyson * Limit for direct transfers, we cannot, of course limit 15113907Sdyson * the amount of kva for pipes in general though. 15213907Sdyson */ 15313907Sdyson#define LIMITPIPEKVA (16*1024*1024) 15417163Sdyson 15517163Sdyson/* 15617163Sdyson * Limit the number of "big" pipes 15717163Sdyson */ 15817163Sdyson#define LIMITBIGPIPES 32 15933181Seivindstatic int nbigpipe; 16017163Sdyson 16117124Sbdestatic int amountpipekva; 16213907Sdyson 16391413Salfredstatic void pipeinit(void *dummy __unused); 16491413Salfredstatic void pipeclose(struct pipe *cpipe); 16591413Salfredstatic void pipe_free_kmem(struct pipe *cpipe); 16691413Salfredstatic int pipe_create(struct pipe **cpipep); 16791413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch); 16891413Salfredstatic __inline void pipeunlock(struct pipe *cpipe); 16991413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe); 17014037Sdyson#ifndef PIPE_NODIRECT 17191413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 17291413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 17391413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 17491413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe); 17514037Sdyson#endif 17691413Salfredstatic int pipespace(struct pipe *cpipe, int size); 17713675Sdyson 17892751Sjeffstatic uma_zone_t pipe_zone; 17927899Sdyson 18091372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 18191372Salfred 18291372Salfredstatic void 18391372Salfredpipeinit(void *dummy __unused) 18491372Salfred{ 18592654Sjeff pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL, 18692654Sjeff NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 18791372Salfred} 18891372Salfred 18913675Sdyson/* 19013675Sdyson * The pipe system call for the DTYPE_PIPE type of pipes 19113675Sdyson */ 19213675Sdyson 19313675Sdyson/* ARGSUSED */ 19413675Sdysonint 19583366Sjulianpipe(td, uap) 19683366Sjulian struct thread *td; 19713675Sdyson struct pipe_args /* { 19813675Sdyson int dummy; 19913675Sdyson } */ *uap; 20013675Sdyson{ 20183366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 20213675Sdyson struct file *rf, *wf; 20313675Sdyson struct pipe *rpipe, *wpipe; 20491968Salfred struct mtx *pmtx; 20513675Sdyson int fd, error; 20691362Salfred 20791372Salfred KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 20827899Sdyson 20991968Salfred pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO); 21091968Salfred 21176756Salfred rpipe = wpipe = NULL; 21276364Salfred if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 21376364Salfred pipeclose(rpipe); 21476364Salfred pipeclose(wpipe); 21591968Salfred free(pmtx, M_TEMP); 21676364Salfred return (ENFILE); 21776364Salfred } 21876364Salfred 21913907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 22013907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 22113675Sdyson 22283366Sjulian error = falloc(td, &rf, &fd); 22370915Sdwmalone if (error) { 22470915Sdwmalone pipeclose(rpipe); 22570915Sdwmalone pipeclose(wpipe); 22691968Salfred free(pmtx, M_TEMP); 22770915Sdwmalone return (error); 22870915Sdwmalone } 22970915Sdwmalone fhold(rf); 23083366Sjulian td->td_retval[0] = fd; 23170915Sdwmalone 23270803Sdwmalone /* 23370803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 23470803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 23570803Sdwmalone * to avoid races against processes which manage to dup() the read 23670803Sdwmalone * side while we are blocked trying to allocate the write side. 23770803Sdwmalone */ 23889306Salfred FILE_LOCK(rf); 23913675Sdyson rf->f_flag = FREAD | FWRITE; 24013675Sdyson rf->f_type = DTYPE_PIPE; 24149413Sgreen rf->f_data = (caddr_t)rpipe; 24213675Sdyson rf->f_ops = &pipeops; 24389306Salfred FILE_UNLOCK(rf); 24483366Sjulian error = falloc(td, &wf, &fd); 24570915Sdwmalone if (error) { 24689306Salfred FILEDESC_LOCK(fdp); 24783366Sjulian if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 24883366Sjulian fdp->fd_ofiles[td->td_retval[0]] = NULL; 24989306Salfred FILEDESC_UNLOCK(fdp); 25083366Sjulian fdrop(rf, td); 25189306Salfred } else 25289306Salfred FILEDESC_UNLOCK(fdp); 25383366Sjulian fdrop(rf, td); 25470915Sdwmalone /* rpipe has been closed by fdrop(). */ 25570915Sdwmalone pipeclose(wpipe); 25691968Salfred free(pmtx, M_TEMP); 25770915Sdwmalone return (error); 25870915Sdwmalone } 25989306Salfred FILE_LOCK(wf); 26013675Sdyson wf->f_flag = FREAD | FWRITE; 26113675Sdyson wf->f_type = DTYPE_PIPE; 26249413Sgreen wf->f_data = (caddr_t)wpipe; 26313675Sdyson wf->f_ops = &pipeops; 26489306Salfred FILE_UNLOCK(wf); 26583366Sjulian td->td_retval[1] = fd; 26613675Sdyson rpipe->pipe_peer = wpipe; 26713675Sdyson wpipe->pipe_peer = rpipe; 26891968Salfred mtx_init(pmtx, "pipe mutex", MTX_DEF); 26991968Salfred rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 27083366Sjulian fdrop(rf, td); 27113675Sdyson 27213675Sdyson return (0); 27313675Sdyson} 27413675Sdyson 27513909Sdyson/* 27613909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 27776364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 27876364Salfred * it will retain the old buffer. 27976364Salfred * If it fails it will return ENOMEM. 28013909Sdyson */ 28176364Salfredstatic int 28276364Salfredpipespace(cpipe, size) 28313675Sdyson struct pipe *cpipe; 28476364Salfred int size; 28513675Sdyson{ 28676364Salfred struct vm_object *object; 28776364Salfred caddr_t buffer; 28813688Sdyson int npages, error; 28913675Sdyson 29079224Sdillon GIANT_REQUIRED; 29191412Salfred KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 29291412Salfred ("pipespace: pipe mutex locked")); 29379224Sdillon 29476364Salfred npages = round_page(size)/PAGE_SIZE; 29513675Sdyson /* 29613675Sdyson * Create an object, I don't like the idea of paging to/from 29713675Sdyson * kernel_object. 29814037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 29913675Sdyson */ 30076364Salfred object = vm_object_allocate(OBJT_DEFAULT, npages); 30176364Salfred buffer = (caddr_t) vm_map_min(kernel_map); 30213675Sdyson 30313675Sdyson /* 30413675Sdyson * Insert the object into the kernel map, and allocate kva for it. 30513675Sdyson * The map entry is, by default, pageable. 30614037Sdyson * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 30713675Sdyson */ 30876364Salfred error = vm_map_find(kernel_map, object, 0, 30976364Salfred (vm_offset_t *) &buffer, size, 1, 31013688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 31113675Sdyson 31276364Salfred if (error != KERN_SUCCESS) { 31376364Salfred vm_object_deallocate(object); 31476364Salfred return (ENOMEM); 31576364Salfred } 31676364Salfred 31776364Salfred /* free old resources if we're resizing */ 31876364Salfred pipe_free_kmem(cpipe); 31976364Salfred cpipe->pipe_buffer.object = object; 32076364Salfred cpipe->pipe_buffer.buffer = buffer; 32176364Salfred cpipe->pipe_buffer.size = size; 32276364Salfred cpipe->pipe_buffer.in = 0; 32376364Salfred cpipe->pipe_buffer.out = 0; 32476364Salfred cpipe->pipe_buffer.cnt = 0; 32513907Sdyson amountpipekva += cpipe->pipe_buffer.size; 32676364Salfred return (0); 32713907Sdyson} 32813688Sdyson 32913907Sdyson/* 33013907Sdyson * initialize and allocate VM and memory for pipe 33113907Sdyson */ 33276364Salfredstatic int 33376364Salfredpipe_create(cpipep) 33476364Salfred struct pipe **cpipep; 33576364Salfred{ 33613907Sdyson struct pipe *cpipe; 33776364Salfred int error; 33813907Sdyson 33992751Sjeff *cpipep = uma_zalloc(pipe_zone, M_WAITOK); 34076364Salfred if (*cpipep == NULL) 34176364Salfred return (ENOMEM); 34217163Sdyson 34376364Salfred cpipe = *cpipep; 34476364Salfred 34576364Salfred /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ 34676364Salfred cpipe->pipe_buffer.object = NULL; 34776364Salfred#ifndef PIPE_NODIRECT 34876364Salfred cpipe->pipe_map.kva = NULL; 34976364Salfred#endif 35076364Salfred /* 35176364Salfred * protect so pipeclose() doesn't follow a junk pointer 35276364Salfred * if pipespace() fails. 35376364Salfred */ 35476754Salfred bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 35513675Sdyson cpipe->pipe_state = 0; 35613675Sdyson cpipe->pipe_peer = NULL; 35713675Sdyson cpipe->pipe_busy = 0; 35813907Sdyson 35914037Sdyson#ifndef PIPE_NODIRECT 36013907Sdyson /* 36113907Sdyson * pipe data structure initializations to support direct pipe I/O 36213907Sdyson */ 36313907Sdyson cpipe->pipe_map.cnt = 0; 36413907Sdyson cpipe->pipe_map.kva = 0; 36513907Sdyson cpipe->pipe_map.pos = 0; 36613907Sdyson cpipe->pipe_map.npages = 0; 36717124Sbde /* cpipe->pipe_map.ms[] = invalid */ 36814037Sdyson#endif 36976364Salfred 37091412Salfred cpipe->pipe_mtxp = NULL; /* avoid pipespace assertion */ 37176364Salfred error = pipespace(cpipe, PIPE_SIZE); 37276760Salfred if (error) 37376364Salfred return (error); 37476364Salfred 37576364Salfred vfs_timestamp(&cpipe->pipe_ctime); 37676364Salfred cpipe->pipe_atime = cpipe->pipe_ctime; 37776364Salfred cpipe->pipe_mtime = cpipe->pipe_ctime; 37876364Salfred 37976364Salfred return (0); 38013675Sdyson} 38113675Sdyson 38213675Sdyson 38313675Sdyson/* 38413675Sdyson * lock a pipe for I/O, blocking other access 38513675Sdyson */ 38613675Sdysonstatic __inline int 38713907Sdysonpipelock(cpipe, catch) 38813675Sdyson struct pipe *cpipe; 38913907Sdyson int catch; 39013675Sdyson{ 39113776Sdyson int error; 39276364Salfred 39391362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 39491362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 39513675Sdyson cpipe->pipe_state |= PIPE_LWANT; 39691362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 39791362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 39876760Salfred "pipelk", 0); 39976760Salfred if (error != 0) 40076760Salfred return (error); 40113675Sdyson } 40291362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 40376760Salfred return (0); 40413675Sdyson} 40513675Sdyson 40613675Sdyson/* 40713675Sdyson * unlock a pipe I/O lock 40813675Sdyson */ 40913675Sdysonstatic __inline void 41013675Sdysonpipeunlock(cpipe) 41113675Sdyson struct pipe *cpipe; 41213675Sdyson{ 41376364Salfred 41491362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 41591362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 41613675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 41713675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 41814177Sdyson wakeup(cpipe); 41913675Sdyson } 42013675Sdyson} 42113675Sdyson 42214037Sdysonstatic __inline void 42314037Sdysonpipeselwakeup(cpipe) 42414037Sdyson struct pipe *cpipe; 42514037Sdyson{ 42676364Salfred 42714037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 42814037Sdyson cpipe->pipe_state &= ~PIPE_SEL; 42914037Sdyson selwakeup(&cpipe->pipe_sel); 43014037Sdyson } 43141086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 43241086Struckman pgsigio(cpipe->pipe_sigio, SIGIO, 0); 43359288Sjlemon KNOTE(&cpipe->pipe_sel.si_note, 0); 43414037Sdyson} 43514037Sdyson 43613675Sdyson/* ARGSUSED */ 43713675Sdysonstatic int 43883366Sjulianpipe_read(fp, uio, cred, flags, td) 43913675Sdyson struct file *fp; 44013675Sdyson struct uio *uio; 44113675Sdyson struct ucred *cred; 44283366Sjulian struct thread *td; 44345311Sdt int flags; 44413675Sdyson{ 44513675Sdyson struct pipe *rpipe = (struct pipe *) fp->f_data; 44647748Salc int error; 44713675Sdyson int nread = 0; 44818863Sdyson u_int size; 44913675Sdyson 45091362Salfred PIPE_LOCK(rpipe); 45113675Sdyson ++rpipe->pipe_busy; 45247748Salc error = pipelock(rpipe, 1); 45347748Salc if (error) 45447748Salc goto unlocked_error; 45547748Salc 45613675Sdyson while (uio->uio_resid) { 45713907Sdyson /* 45813907Sdyson * normal pipe buffer receive 45913907Sdyson */ 46013675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 46118863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 46213675Sdyson if (size > rpipe->pipe_buffer.cnt) 46313675Sdyson size = rpipe->pipe_buffer.cnt; 46418863Sdyson if (size > (u_int) uio->uio_resid) 46518863Sdyson size = (u_int) uio->uio_resid; 46647748Salc 46791362Salfred PIPE_UNLOCK(rpipe); 46847748Salc error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 46913675Sdyson size, uio); 47091362Salfred PIPE_LOCK(rpipe); 47176760Salfred if (error) 47213675Sdyson break; 47376760Salfred 47413675Sdyson rpipe->pipe_buffer.out += size; 47513675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 47613675Sdyson rpipe->pipe_buffer.out = 0; 47713675Sdyson 47813675Sdyson rpipe->pipe_buffer.cnt -= size; 47947748Salc 48047748Salc /* 48147748Salc * If there is no more to read in the pipe, reset 48247748Salc * its pointers to the beginning. This improves 48347748Salc * cache hit stats. 48447748Salc */ 48547748Salc if (rpipe->pipe_buffer.cnt == 0) { 48647748Salc rpipe->pipe_buffer.in = 0; 48747748Salc rpipe->pipe_buffer.out = 0; 48847748Salc } 48913675Sdyson nread += size; 49014037Sdyson#ifndef PIPE_NODIRECT 49113907Sdyson /* 49213907Sdyson * Direct copy, bypassing a kernel buffer. 49313907Sdyson */ 49413907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 49547748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 49647748Salc caddr_t va; 49718863Sdyson if (size > (u_int) uio->uio_resid) 49818863Sdyson size = (u_int) uio->uio_resid; 49947748Salc 50076760Salfred va = (caddr_t) rpipe->pipe_map.kva + 50176760Salfred rpipe->pipe_map.pos; 50291362Salfred PIPE_UNLOCK(rpipe); 50347748Salc error = uiomove(va, size, uio); 50491362Salfred PIPE_LOCK(rpipe); 50513907Sdyson if (error) 50613907Sdyson break; 50713907Sdyson nread += size; 50813907Sdyson rpipe->pipe_map.pos += size; 50913907Sdyson rpipe->pipe_map.cnt -= size; 51013907Sdyson if (rpipe->pipe_map.cnt == 0) { 51113907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 51213907Sdyson wakeup(rpipe); 51313907Sdyson } 51414037Sdyson#endif 51513675Sdyson } else { 51613675Sdyson /* 51713675Sdyson * detect EOF condition 51876760Salfred * read returns 0 on EOF, no need to set error 51913675Sdyson */ 52076760Salfred if (rpipe->pipe_state & PIPE_EOF) 52113675Sdyson break; 52243623Sdillon 52313675Sdyson /* 52413675Sdyson * If the "write-side" has been blocked, wake it up now. 52513675Sdyson */ 52613675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 52713675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 52813675Sdyson wakeup(rpipe); 52913675Sdyson } 53043623Sdillon 53143623Sdillon /* 53247748Salc * Break if some data was read. 53343623Sdillon */ 53447748Salc if (nread > 0) 53513675Sdyson break; 53616960Sdyson 53743623Sdillon /* 53847748Salc * Unlock the pipe buffer for our remaining processing. We 53947748Salc * will either break out with an error or we will sleep and 54047748Salc * relock to loop. 54143623Sdillon */ 54247748Salc pipeunlock(rpipe); 54343623Sdillon 54413675Sdyson /* 54547748Salc * Handle non-blocking mode operation or 54647748Salc * wait for more data. 54713675Sdyson */ 54876760Salfred if (fp->f_flag & FNONBLOCK) { 54947748Salc error = EAGAIN; 55076760Salfred } else { 55147748Salc rpipe->pipe_state |= PIPE_WANTR; 55291362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 55391362Salfred PRIBIO | PCATCH, 55477140Salfred "piperd", 0)) == 0) 55547748Salc error = pipelock(rpipe, 1); 55613675Sdyson } 55747748Salc if (error) 55847748Salc goto unlocked_error; 55913675Sdyson } 56013675Sdyson } 56147748Salc pipeunlock(rpipe); 56213675Sdyson 56391362Salfred /* XXX: should probably do this before getting any locks. */ 56424101Sbde if (error == 0) 56555112Sbde vfs_timestamp(&rpipe->pipe_atime); 56647748Salcunlocked_error: 56747748Salc --rpipe->pipe_busy; 56813913Sdyson 56947748Salc /* 57047748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 57147748Salc */ 57213675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 57313675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 57413675Sdyson wakeup(rpipe); 57513675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 57613675Sdyson /* 57747748Salc * Handle write blocking hysteresis. 57813675Sdyson */ 57913675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 58013675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 58113675Sdyson wakeup(rpipe); 58213675Sdyson } 58313675Sdyson } 58414037Sdyson 58514802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 58614037Sdyson pipeselwakeup(rpipe); 58714037Sdyson 58891362Salfred PIPE_UNLOCK(rpipe); 58976760Salfred return (error); 59013675Sdyson} 59113675Sdyson 59214037Sdyson#ifndef PIPE_NODIRECT 59313907Sdyson/* 59413907Sdyson * Map the sending processes' buffer into kernel space and wire it. 59513907Sdyson * This is similar to a physical write operation. 59613907Sdyson */ 59713675Sdysonstatic int 59813907Sdysonpipe_build_write_buffer(wpipe, uio) 59913907Sdyson struct pipe *wpipe; 60013675Sdyson struct uio *uio; 60113675Sdyson{ 60218863Sdyson u_int size; 60313907Sdyson int i; 60413907Sdyson vm_offset_t addr, endaddr, paddr; 60513907Sdyson 60679224Sdillon GIANT_REQUIRED; 60791412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 60879224Sdillon 60918863Sdyson size = (u_int) uio->uio_iov->iov_len; 61013907Sdyson if (size > wpipe->pipe_buffer.size) 61113907Sdyson size = wpipe->pipe_buffer.size; 61213907Sdyson 61340286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 61476760Salfred addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 61576760Salfred for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 61613907Sdyson vm_page_t m; 61713907Sdyson 61851474Sdillon if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 61951474Sdillon (paddr = pmap_kextract(addr)) == 0) { 62013907Sdyson int j; 62176760Salfred 62276760Salfred for (j = 0; j < i; j++) 62340700Sdg vm_page_unwire(wpipe->pipe_map.ms[j], 1); 62476760Salfred return (EFAULT); 62513907Sdyson } 62613907Sdyson 62713907Sdyson m = PHYS_TO_VM_PAGE(paddr); 62813907Sdyson vm_page_wire(m); 62913907Sdyson wpipe->pipe_map.ms[i] = m; 63013907Sdyson } 63113907Sdyson 63213907Sdyson/* 63313907Sdyson * set up the control block 63413907Sdyson */ 63513907Sdyson wpipe->pipe_map.npages = i; 63676760Salfred wpipe->pipe_map.pos = 63776760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 63813907Sdyson wpipe->pipe_map.cnt = size; 63913907Sdyson 64013907Sdyson/* 64113907Sdyson * and map the buffer 64213907Sdyson */ 64313907Sdyson if (wpipe->pipe_map.kva == 0) { 64413912Sdyson /* 64513912Sdyson * We need to allocate space for an extra page because the 64613912Sdyson * address range might (will) span pages at times. 64713912Sdyson */ 64813907Sdyson wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 64913912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 65013912Sdyson amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 65113907Sdyson } 65213907Sdyson pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 65313907Sdyson wpipe->pipe_map.npages); 65413907Sdyson 65513907Sdyson/* 65613907Sdyson * and update the uio data 65713907Sdyson */ 65813907Sdyson 65913907Sdyson uio->uio_iov->iov_len -= size; 66013907Sdyson uio->uio_iov->iov_base += size; 66113907Sdyson if (uio->uio_iov->iov_len == 0) 66213907Sdyson uio->uio_iov++; 66313907Sdyson uio->uio_resid -= size; 66413907Sdyson uio->uio_offset += size; 66576760Salfred return (0); 66613907Sdyson} 66713907Sdyson 66813907Sdyson/* 66913907Sdyson * unmap and unwire the process buffer 67013907Sdyson */ 67113907Sdysonstatic void 67213907Sdysonpipe_destroy_write_buffer(wpipe) 67376760Salfred struct pipe *wpipe; 67413907Sdyson{ 67513907Sdyson int i; 67676364Salfred 67779224Sdillon GIANT_REQUIRED; 67891412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 67979224Sdillon 68017163Sdyson if (wpipe->pipe_map.kva) { 68117163Sdyson pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 68213907Sdyson 68313907Sdyson if (amountpipekva > MAXPIPEKVA) { 68413907Sdyson vm_offset_t kva = wpipe->pipe_map.kva; 68513907Sdyson wpipe->pipe_map.kva = 0; 68613907Sdyson kmem_free(kernel_map, kva, 68713912Sdyson wpipe->pipe_buffer.size + PAGE_SIZE); 68813912Sdyson amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 68913907Sdyson } 69013907Sdyson } 69176760Salfred for (i = 0; i < wpipe->pipe_map.npages; i++) 69240700Sdg vm_page_unwire(wpipe->pipe_map.ms[i], 1); 69391653Stanimura wpipe->pipe_map.npages = 0; 69413907Sdyson} 69513907Sdyson 69613907Sdyson/* 69713907Sdyson * In the case of a signal, the writing process might go away. This 69813907Sdyson * code copies the data into the circular buffer so that the source 69913907Sdyson * pages can be freed without loss of data. 70013907Sdyson */ 70113907Sdysonstatic void 70213907Sdysonpipe_clone_write_buffer(wpipe) 70376364Salfred struct pipe *wpipe; 70413907Sdyson{ 70513907Sdyson int size; 70613907Sdyson int pos; 70713907Sdyson 70891362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 70913907Sdyson size = wpipe->pipe_map.cnt; 71013907Sdyson pos = wpipe->pipe_map.pos; 71176760Salfred bcopy((caddr_t) wpipe->pipe_map.kva + pos, 71276760Salfred (caddr_t) wpipe->pipe_buffer.buffer, size); 71313907Sdyson 71413907Sdyson wpipe->pipe_buffer.in = size; 71513907Sdyson wpipe->pipe_buffer.out = 0; 71613907Sdyson wpipe->pipe_buffer.cnt = size; 71713907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 71813907Sdyson 71991412Salfred PIPE_GET_GIANT(wpipe); 72013907Sdyson pipe_destroy_write_buffer(wpipe); 72191412Salfred PIPE_DROP_GIANT(wpipe); 72213907Sdyson} 72313907Sdyson 72413907Sdyson/* 72513907Sdyson * This implements the pipe buffer write mechanism. Note that only 72613907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 72713907Sdyson * If there are any characters in the pipe buffer, the direct write will 72813907Sdyson * be deferred until the receiving process grabs all of the bytes from 72913907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 73013907Sdyson */ 73113907Sdysonstatic int 73213907Sdysonpipe_direct_write(wpipe, uio) 73313907Sdyson struct pipe *wpipe; 73413907Sdyson struct uio *uio; 73513907Sdyson{ 73613907Sdyson int error; 73776364Salfred 73813951Sdysonretry: 73991362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 74013907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 74176760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 74213951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 74313951Sdyson wakeup(wpipe); 74413951Sdyson } 74513992Sdyson wpipe->pipe_state |= PIPE_WANTW; 74691362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 74791362Salfred PRIBIO | PCATCH, "pipdww", 0); 74814802Sdyson if (error) 74913907Sdyson goto error1; 75014802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 75114802Sdyson error = EPIPE; 75214802Sdyson goto error1; 75314802Sdyson } 75413907Sdyson } 75513907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 75613951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 75776760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 75813951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 75913951Sdyson wakeup(wpipe); 76013951Sdyson } 76113951Sdyson 76213992Sdyson wpipe->pipe_state |= PIPE_WANTW; 76391362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 76491362Salfred PRIBIO | PCATCH, "pipdwc", 0); 76514802Sdyson if (error) 76613907Sdyson goto error1; 76714802Sdyson if (wpipe->pipe_state & PIPE_EOF) { 76814802Sdyson error = EPIPE; 76914802Sdyson goto error1; 77013907Sdyson } 77113951Sdyson goto retry; 77213907Sdyson } 77313907Sdyson 77413951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 77513951Sdyson 77692305Salfred pipelock(wpipe, 0); 77791362Salfred PIPE_GET_GIANT(wpipe); 77813907Sdyson error = pipe_build_write_buffer(wpipe, uio); 77991362Salfred PIPE_DROP_GIANT(wpipe); 78092305Salfred pipeunlock(wpipe); 78113907Sdyson if (error) { 78213907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 78313907Sdyson goto error1; 78413907Sdyson } 78513907Sdyson 78613907Sdyson error = 0; 78713907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 78813907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 78913907Sdyson pipelock(wpipe, 0); 79091362Salfred PIPE_GET_GIANT(wpipe); 79113907Sdyson pipe_destroy_write_buffer(wpipe); 79291362Salfred PIPE_DROP_GIANT(wpipe); 79313907Sdyson pipeunlock(wpipe); 79414037Sdyson pipeselwakeup(wpipe); 79514802Sdyson error = EPIPE; 79614802Sdyson goto error1; 79713907Sdyson } 79813992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 79913992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 80013992Sdyson wakeup(wpipe); 80113992Sdyson } 80214037Sdyson pipeselwakeup(wpipe); 80391362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 80491362Salfred "pipdwt", 0); 80513907Sdyson } 80613907Sdyson 80713907Sdyson pipelock(wpipe,0); 80813907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 80913907Sdyson /* 81013907Sdyson * this bit of trickery substitutes a kernel buffer for 81113907Sdyson * the process that might be going away. 81213907Sdyson */ 81313907Sdyson pipe_clone_write_buffer(wpipe); 81413907Sdyson } else { 81591412Salfred PIPE_GET_GIANT(wpipe); 81613907Sdyson pipe_destroy_write_buffer(wpipe); 81791412Salfred PIPE_DROP_GIANT(wpipe); 81813907Sdyson } 81913907Sdyson pipeunlock(wpipe); 82076760Salfred return (error); 82113907Sdyson 82213907Sdysonerror1: 82313907Sdyson wakeup(wpipe); 82476760Salfred return (error); 82513907Sdyson} 82614037Sdyson#endif 82713907Sdyson 82816960Sdysonstatic int 82983366Sjulianpipe_write(fp, uio, cred, flags, td) 83016960Sdyson struct file *fp; 83113907Sdyson struct uio *uio; 83216960Sdyson struct ucred *cred; 83383366Sjulian struct thread *td; 83445311Sdt int flags; 83513907Sdyson{ 83613675Sdyson int error = 0; 83713913Sdyson int orig_resid; 83816960Sdyson struct pipe *wpipe, *rpipe; 83916960Sdyson 84016960Sdyson rpipe = (struct pipe *) fp->f_data; 84116960Sdyson wpipe = rpipe->pipe_peer; 84216960Sdyson 84391395Salfred PIPE_LOCK(rpipe); 84413675Sdyson /* 84513675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 84613675Sdyson */ 84716960Sdyson if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 84891395Salfred PIPE_UNLOCK(rpipe); 84976760Salfred return (EPIPE); 85013675Sdyson } 85177676Sdillon ++wpipe->pipe_busy; 85213675Sdyson 85317163Sdyson /* 85417163Sdyson * If it is advantageous to resize the pipe buffer, do 85517163Sdyson * so. 85617163Sdyson */ 85717163Sdyson if ((uio->uio_resid > PIPE_SIZE) && 85817163Sdyson (nbigpipe < LIMITBIGPIPES) && 85917163Sdyson (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 86017163Sdyson (wpipe->pipe_buffer.size <= PIPE_SIZE) && 86117163Sdyson (wpipe->pipe_buffer.cnt == 0)) { 86217163Sdyson 86313907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 86492305Salfred PIPE_GET_GIANT(wpipe); 86576364Salfred if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 86676364Salfred nbigpipe++; 86792305Salfred PIPE_DROP_GIANT(wpipe); 86813907Sdyson pipeunlock(wpipe); 86913907Sdyson } 87013907Sdyson } 87177676Sdillon 87277676Sdillon /* 87377676Sdillon * If an early error occured unbusy and return, waking up any pending 87477676Sdillon * readers. 87577676Sdillon */ 87677676Sdillon if (error) { 87777676Sdillon --wpipe->pipe_busy; 87877676Sdillon if ((wpipe->pipe_busy == 0) && 87977676Sdillon (wpipe->pipe_state & PIPE_WANT)) { 88077676Sdillon wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 88177676Sdillon wakeup(wpipe); 88277676Sdillon } 88391395Salfred PIPE_UNLOCK(rpipe); 88477676Sdillon return(error); 88577676Sdillon } 88676364Salfred 88776364Salfred KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 88813907Sdyson 88913913Sdyson orig_resid = uio->uio_resid; 89077676Sdillon 89113675Sdyson while (uio->uio_resid) { 89213907Sdyson int space; 89376760Salfred 89414037Sdyson#ifndef PIPE_NODIRECT 89513907Sdyson /* 89613907Sdyson * If the transfer is large, we can gain performance if 89713907Sdyson * we do process-to-process copies directly. 89816416Sdyson * If the write is non-blocking, we don't use the 89916416Sdyson * direct write mechanism. 90058505Sdillon * 90158505Sdillon * The direct write mechanism will detect the reader going 90258505Sdillon * away on us. 90313907Sdyson */ 90417163Sdyson if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 90517163Sdyson (fp->f_flag & FNONBLOCK) == 0 && 90617163Sdyson (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 90713907Sdyson (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 90813907Sdyson error = pipe_direct_write( wpipe, uio); 90976760Salfred if (error) 91013907Sdyson break; 91113907Sdyson continue; 91291362Salfred } 91314037Sdyson#endif 91413907Sdyson 91513907Sdyson /* 91613907Sdyson * Pipe buffered writes cannot be coincidental with 91713907Sdyson * direct writes. We wait until the currently executing 91813907Sdyson * direct write is completed before we start filling the 91958505Sdillon * pipe buffer. We break out if a signal occurs or the 92058505Sdillon * reader goes away. 92113907Sdyson */ 92213907Sdyson retrywrite: 92313907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 92413992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 92513992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 92613992Sdyson wakeup(wpipe); 92713992Sdyson } 92891395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 92991362Salfred "pipbww", 0); 93058505Sdillon if (wpipe->pipe_state & PIPE_EOF) 93158505Sdillon break; 93213907Sdyson if (error) 93313907Sdyson break; 93413907Sdyson } 93558505Sdillon if (wpipe->pipe_state & PIPE_EOF) { 93658505Sdillon error = EPIPE; 93758505Sdillon break; 93858505Sdillon } 93913907Sdyson 94013907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 94114644Sdyson 94214644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 94313913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 94413913Sdyson space = 0; 94513907Sdyson 94617163Sdyson if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 94713907Sdyson if ((error = pipelock(wpipe,1)) == 0) { 94854534Stegge int size; /* Transfer size */ 94954534Stegge int segsize; /* first segment to transfer */ 95076760Salfred 95113907Sdyson /* 95213907Sdyson * It is possible for a direct write to 95313907Sdyson * slip in on us... handle it here... 95413907Sdyson */ 95513907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 95613907Sdyson pipeunlock(wpipe); 95713907Sdyson goto retrywrite; 95813907Sdyson } 95954534Stegge /* 96054534Stegge * If a process blocked in uiomove, our 96154534Stegge * value for space might be bad. 96258505Sdillon * 96358505Sdillon * XXX will we be ok if the reader has gone 96458505Sdillon * away here? 96554534Stegge */ 96654534Stegge if (space > wpipe->pipe_buffer.size - 96754534Stegge wpipe->pipe_buffer.cnt) { 96854534Stegge pipeunlock(wpipe); 96954534Stegge goto retrywrite; 97054534Stegge } 97154534Stegge 97254534Stegge /* 97354534Stegge * Transfer size is minimum of uio transfer 97454534Stegge * and free space in pipe buffer. 97554534Stegge */ 97654534Stegge if (space > uio->uio_resid) 97754534Stegge size = uio->uio_resid; 97854534Stegge else 97954534Stegge size = space; 98054534Stegge /* 98154534Stegge * First segment to transfer is minimum of 98254534Stegge * transfer size and contiguous space in 98354534Stegge * pipe buffer. If first segment to transfer 98454534Stegge * is less than the transfer size, we've got 98554534Stegge * a wraparound in the buffer. 98654534Stegge */ 98754534Stegge segsize = wpipe->pipe_buffer.size - 98854534Stegge wpipe->pipe_buffer.in; 98954534Stegge if (segsize > size) 99054534Stegge segsize = size; 99154534Stegge 99254534Stegge /* Transfer first segment */ 99354534Stegge 99491395Salfred PIPE_UNLOCK(rpipe); 99554534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 99654534Stegge segsize, uio); 99791395Salfred PIPE_LOCK(rpipe); 99854534Stegge 99954534Stegge if (error == 0 && segsize < size) { 100054534Stegge /* 100154534Stegge * Transfer remaining part now, to 100254534Stegge * support atomic writes. Wraparound 100354534Stegge * happened. 100454534Stegge */ 100554534Stegge if (wpipe->pipe_buffer.in + segsize != 100654534Stegge wpipe->pipe_buffer.size) 100754534Stegge panic("Expected pipe buffer wraparound disappeared"); 100854534Stegge 100991395Salfred PIPE_UNLOCK(rpipe); 101054534Stegge error = uiomove(&wpipe->pipe_buffer.buffer[0], 101154534Stegge size - segsize, uio); 101291395Salfred PIPE_LOCK(rpipe); 101354534Stegge } 101454534Stegge if (error == 0) { 101554534Stegge wpipe->pipe_buffer.in += size; 101654534Stegge if (wpipe->pipe_buffer.in >= 101754534Stegge wpipe->pipe_buffer.size) { 101854534Stegge if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 101954534Stegge panic("Expected wraparound bad"); 102054534Stegge wpipe->pipe_buffer.in = size - segsize; 102154534Stegge } 102254534Stegge 102354534Stegge wpipe->pipe_buffer.cnt += size; 102454534Stegge if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 102554534Stegge panic("Pipe buffer overflow"); 102654534Stegge 102754534Stegge } 102813675Sdyson pipeunlock(wpipe); 102913675Sdyson } 103013675Sdyson if (error) 103113675Sdyson break; 103213675Sdyson 103313675Sdyson } else { 103413675Sdyson /* 103513675Sdyson * If the "read-side" has been blocked, wake it up now. 103613675Sdyson */ 103713675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 103813675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 103913675Sdyson wakeup(wpipe); 104013675Sdyson } 104114037Sdyson 104213675Sdyson /* 104313675Sdyson * don't block on non-blocking I/O 104413675Sdyson */ 104516960Sdyson if (fp->f_flag & FNONBLOCK) { 104613907Sdyson error = EAGAIN; 104713675Sdyson break; 104813675Sdyson } 104913907Sdyson 105014037Sdyson /* 105114037Sdyson * We have no more space and have something to offer, 105229356Speter * wake up select/poll. 105314037Sdyson */ 105414037Sdyson pipeselwakeup(wpipe); 105514037Sdyson 105613675Sdyson wpipe->pipe_state |= PIPE_WANTW; 105791395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 105891362Salfred PRIBIO | PCATCH, "pipewr", 0); 105976760Salfred if (error != 0) 106013675Sdyson break; 106113675Sdyson /* 106213675Sdyson * If read side wants to go away, we just issue a signal 106313675Sdyson * to ourselves. 106413675Sdyson */ 106513675Sdyson if (wpipe->pipe_state & PIPE_EOF) { 106613774Sdyson error = EPIPE; 106713907Sdyson break; 106813675Sdyson } 106913675Sdyson } 107013675Sdyson } 107113675Sdyson 107214644Sdyson --wpipe->pipe_busy; 107377676Sdillon 107476760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 107576760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 107613675Sdyson wakeup(wpipe); 107713675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 107813675Sdyson /* 107913675Sdyson * If we have put any characters in the buffer, we wake up 108013675Sdyson * the reader. 108113675Sdyson */ 108213675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 108313675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 108413675Sdyson wakeup(wpipe); 108513675Sdyson } 108613675Sdyson } 108713909Sdyson 108813909Sdyson /* 108913909Sdyson * Don't return EPIPE if I/O was successful 109013909Sdyson */ 109113907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 109277676Sdillon (uio->uio_resid == 0) && 109377676Sdillon (error == EPIPE)) { 109413907Sdyson error = 0; 109577676Sdillon } 109613913Sdyson 109724101Sbde if (error == 0) 109855112Sbde vfs_timestamp(&wpipe->pipe_mtime); 109924101Sbde 110014037Sdyson /* 110114037Sdyson * We have something to offer, 110229356Speter * wake up select/poll. 110314037Sdyson */ 110414177Sdyson if (wpipe->pipe_buffer.cnt) 110514037Sdyson pipeselwakeup(wpipe); 110613907Sdyson 110791395Salfred PIPE_UNLOCK(rpipe); 110876760Salfred return (error); 110913675Sdyson} 111013675Sdyson 111113675Sdyson/* 111213675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 111313675Sdyson */ 111413675Sdysonint 111583366Sjulianpipe_ioctl(fp, cmd, data, td) 111613675Sdyson struct file *fp; 111736735Sdfr u_long cmd; 111876364Salfred caddr_t data; 111983366Sjulian struct thread *td; 112013675Sdyson{ 112176364Salfred struct pipe *mpipe = (struct pipe *)fp->f_data; 112213675Sdyson 112313675Sdyson switch (cmd) { 112413675Sdyson 112513675Sdyson case FIONBIO: 112613675Sdyson return (0); 112713675Sdyson 112813675Sdyson case FIOASYNC: 112991362Salfred PIPE_LOCK(mpipe); 113013675Sdyson if (*(int *)data) { 113113675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 113213675Sdyson } else { 113313675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 113413675Sdyson } 113591362Salfred PIPE_UNLOCK(mpipe); 113613675Sdyson return (0); 113713675Sdyson 113813675Sdyson case FIONREAD: 113991362Salfred PIPE_LOCK(mpipe); 114014037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 114114037Sdyson *(int *)data = mpipe->pipe_map.cnt; 114214037Sdyson else 114314037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 114491362Salfred PIPE_UNLOCK(mpipe); 114513675Sdyson return (0); 114613675Sdyson 114741086Struckman case FIOSETOWN: 114841086Struckman return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 114941086Struckman 115041086Struckman case FIOGETOWN: 115141086Struckman *(int *)data = fgetown(mpipe->pipe_sigio); 115213675Sdyson return (0); 115313675Sdyson 115441086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 115541086Struckman case TIOCSPGRP: 115641086Struckman return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 115741086Struckman 115841086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 115918863Sdyson case TIOCGPGRP: 116041086Struckman *(int *)data = -fgetown(mpipe->pipe_sigio); 116113675Sdyson return (0); 116213675Sdyson 116313675Sdyson } 116417124Sbde return (ENOTTY); 116513675Sdyson} 116613675Sdyson 116713675Sdysonint 116883366Sjulianpipe_poll(fp, events, cred, td) 116913675Sdyson struct file *fp; 117029356Speter int events; 117129356Speter struct ucred *cred; 117283366Sjulian struct thread *td; 117313675Sdyson{ 117476364Salfred struct pipe *rpipe = (struct pipe *)fp->f_data; 117513675Sdyson struct pipe *wpipe; 117629356Speter int revents = 0; 117713675Sdyson 117813675Sdyson wpipe = rpipe->pipe_peer; 117991362Salfred PIPE_LOCK(rpipe); 118029356Speter if (events & (POLLIN | POLLRDNORM)) 118129356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 118229356Speter (rpipe->pipe_buffer.cnt > 0) || 118329356Speter (rpipe->pipe_state & PIPE_EOF)) 118429356Speter revents |= events & (POLLIN | POLLRDNORM); 118513675Sdyson 118629356Speter if (events & (POLLOUT | POLLWRNORM)) 118729356Speter if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 118843311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 118943311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 119029356Speter revents |= events & (POLLOUT | POLLWRNORM); 119113675Sdyson 119229356Speter if ((rpipe->pipe_state & PIPE_EOF) || 119329356Speter (wpipe == NULL) || 119429356Speter (wpipe->pipe_state & PIPE_EOF)) 119529356Speter revents |= POLLHUP; 119629356Speter 119729356Speter if (revents == 0) { 119829356Speter if (events & (POLLIN | POLLRDNORM)) { 119983805Sjhb selrecord(td, &rpipe->pipe_sel); 120029356Speter rpipe->pipe_state |= PIPE_SEL; 120113675Sdyson } 120213675Sdyson 120329356Speter if (events & (POLLOUT | POLLWRNORM)) { 120483805Sjhb selrecord(td, &wpipe->pipe_sel); 120530164Speter wpipe->pipe_state |= PIPE_SEL; 120613907Sdyson } 120713675Sdyson } 120891362Salfred PIPE_UNLOCK(rpipe); 120929356Speter 121029356Speter return (revents); 121113675Sdyson} 121213675Sdyson 121352983Speterstatic int 121483366Sjulianpipe_stat(fp, ub, td) 121552983Speter struct file *fp; 121652983Speter struct stat *ub; 121783366Sjulian struct thread *td; 121813675Sdyson{ 121952983Speter struct pipe *pipe = (struct pipe *)fp->f_data; 122052983Speter 122176760Salfred bzero((caddr_t)ub, sizeof(*ub)); 122217124Sbde ub->st_mode = S_IFIFO; 122313907Sdyson ub->st_blksize = pipe->pipe_buffer.size; 122413675Sdyson ub->st_size = pipe->pipe_buffer.cnt; 122513675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 122634901Sphk ub->st_atimespec = pipe->pipe_atime; 122734901Sphk ub->st_mtimespec = pipe->pipe_mtime; 122834901Sphk ub->st_ctimespec = pipe->pipe_ctime; 122960404Schris ub->st_uid = fp->f_cred->cr_uid; 123060404Schris ub->st_gid = fp->f_cred->cr_gid; 123117124Sbde /* 123260404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 123317124Sbde * XXX (st_dev, st_ino) should be unique. 123417124Sbde */ 123576760Salfred return (0); 123613675Sdyson} 123713675Sdyson 123813675Sdyson/* ARGSUSED */ 123913675Sdysonstatic int 124083366Sjulianpipe_close(fp, td) 124113675Sdyson struct file *fp; 124283366Sjulian struct thread *td; 124313675Sdyson{ 124413675Sdyson struct pipe *cpipe = (struct pipe *)fp->f_data; 124516322Sgpalmer 124649413Sgreen fp->f_ops = &badfileops; 124749413Sgreen fp->f_data = NULL; 124841086Struckman funsetown(cpipe->pipe_sigio); 124913675Sdyson pipeclose(cpipe); 125076760Salfred return (0); 125113675Sdyson} 125213675Sdyson 125376364Salfredstatic void 125476364Salfredpipe_free_kmem(cpipe) 125576364Salfred struct pipe *cpipe; 125676364Salfred{ 125791412Salfred 125879224Sdillon GIANT_REQUIRED; 125991412Salfred KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 126091412Salfred ("pipespace: pipe mutex locked")); 126176364Salfred 126276364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 126376364Salfred if (cpipe->pipe_buffer.size > PIPE_SIZE) 126476364Salfred --nbigpipe; 126576364Salfred amountpipekva -= cpipe->pipe_buffer.size; 126676364Salfred kmem_free(kernel_map, 126776364Salfred (vm_offset_t)cpipe->pipe_buffer.buffer, 126876364Salfred cpipe->pipe_buffer.size); 126976364Salfred cpipe->pipe_buffer.buffer = NULL; 127076364Salfred } 127176364Salfred#ifndef PIPE_NODIRECT 127276364Salfred if (cpipe->pipe_map.kva != NULL) { 127376364Salfred amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 127476364Salfred kmem_free(kernel_map, 127576364Salfred cpipe->pipe_map.kva, 127676364Salfred cpipe->pipe_buffer.size + PAGE_SIZE); 127776364Salfred cpipe->pipe_map.cnt = 0; 127876364Salfred cpipe->pipe_map.kva = 0; 127976364Salfred cpipe->pipe_map.pos = 0; 128076364Salfred cpipe->pipe_map.npages = 0; 128176364Salfred } 128276364Salfred#endif 128376364Salfred} 128476364Salfred 128513675Sdyson/* 128613675Sdyson * shutdown the pipe 128713675Sdyson */ 128813675Sdysonstatic void 128913675Sdysonpipeclose(cpipe) 129013675Sdyson struct pipe *cpipe; 129113675Sdyson{ 129213907Sdyson struct pipe *ppipe; 129391968Salfred int hadpeer; 129476364Salfred 129591968Salfred if (cpipe == NULL) 129691968Salfred return; 129791968Salfred 129891968Salfred hadpeer = 0; 129991968Salfred 130091968Salfred /* partially created pipes won't have a valid mutex. */ 130191968Salfred if (PIPE_MTX(cpipe) != NULL) 130291362Salfred PIPE_LOCK(cpipe); 130313907Sdyson 130491968Salfred pipeselwakeup(cpipe); 130513907Sdyson 130691968Salfred /* 130791968Salfred * If the other side is blocked, wake it up saying that 130891968Salfred * we want to close it down. 130991968Salfred */ 131091968Salfred while (cpipe->pipe_busy) { 131191968Salfred wakeup(cpipe); 131291968Salfred cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 131391968Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 131491968Salfred } 131513675Sdyson 131691968Salfred /* 131791968Salfred * Disconnect from peer 131891968Salfred */ 131991968Salfred if ((ppipe = cpipe->pipe_peer) != NULL) { 132091968Salfred hadpeer++; 132191968Salfred pipeselwakeup(ppipe); 132213907Sdyson 132391968Salfred ppipe->pipe_state |= PIPE_EOF; 132491968Salfred wakeup(ppipe); 132591968Salfred KNOTE(&ppipe->pipe_sel.si_note, 0); 132691968Salfred ppipe->pipe_peer = NULL; 132791968Salfred } 132891968Salfred /* 132991968Salfred * free resources 133091968Salfred */ 133191968Salfred if (PIPE_MTX(cpipe) != NULL) { 133291968Salfred PIPE_UNLOCK(cpipe); 133391968Salfred if (!hadpeer) { 133491968Salfred mtx_destroy(PIPE_MTX(cpipe)); 133591968Salfred free(PIPE_MTX(cpipe), M_TEMP); 133613675Sdyson } 133713675Sdyson } 133891968Salfred mtx_lock(&Giant); 133991968Salfred pipe_free_kmem(cpipe); 134092751Sjeff uma_zfree(pipe_zone, cpipe); 134191968Salfred mtx_unlock(&Giant); 134213675Sdyson} 134359288Sjlemon 134472521Sjlemon/*ARGSUSED*/ 134559288Sjlemonstatic int 134672521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 134759288Sjlemon{ 134889306Salfred struct pipe *cpipe; 134959288Sjlemon 135089306Salfred cpipe = (struct pipe *)kn->kn_fp->f_data; 135172521Sjlemon switch (kn->kn_filter) { 135272521Sjlemon case EVFILT_READ: 135372521Sjlemon kn->kn_fop = &pipe_rfiltops; 135472521Sjlemon break; 135572521Sjlemon case EVFILT_WRITE: 135672521Sjlemon kn->kn_fop = &pipe_wfiltops; 135778292Sjlemon cpipe = cpipe->pipe_peer; 135872521Sjlemon break; 135972521Sjlemon default: 136072521Sjlemon return (1); 136172521Sjlemon } 136278292Sjlemon kn->kn_hook = (caddr_t)cpipe; 136378292Sjlemon 136491372Salfred PIPE_LOCK(cpipe); 136578292Sjlemon SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 136691372Salfred PIPE_UNLOCK(cpipe); 136759288Sjlemon return (0); 136859288Sjlemon} 136959288Sjlemon 137059288Sjlemonstatic void 137159288Sjlemonfilt_pipedetach(struct knote *kn) 137259288Sjlemon{ 137378292Sjlemon struct pipe *cpipe = (struct pipe *)kn->kn_hook; 137459288Sjlemon 137591372Salfred PIPE_LOCK(cpipe); 137678292Sjlemon SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 137791372Salfred PIPE_UNLOCK(cpipe); 137859288Sjlemon} 137959288Sjlemon 138059288Sjlemon/*ARGSUSED*/ 138159288Sjlemonstatic int 138259288Sjlemonfilt_piperead(struct knote *kn, long hint) 138359288Sjlemon{ 138459288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 138559288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 138659288Sjlemon 138791372Salfred PIPE_LOCK(rpipe); 138859288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 138959288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 139059288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 139159288Sjlemon 139259288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 139359288Sjlemon (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 139491372Salfred kn->kn_flags |= EV_EOF; 139591372Salfred PIPE_UNLOCK(rpipe); 139659288Sjlemon return (1); 139759288Sjlemon } 139891372Salfred PIPE_UNLOCK(rpipe); 139959288Sjlemon return (kn->kn_data > 0); 140059288Sjlemon} 140159288Sjlemon 140259288Sjlemon/*ARGSUSED*/ 140359288Sjlemonstatic int 140459288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 140559288Sjlemon{ 140659288Sjlemon struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 140759288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 140859288Sjlemon 140991372Salfred PIPE_LOCK(rpipe); 141059288Sjlemon if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 141159288Sjlemon kn->kn_data = 0; 141259288Sjlemon kn->kn_flags |= EV_EOF; 141391372Salfred PIPE_UNLOCK(rpipe); 141459288Sjlemon return (1); 141559288Sjlemon } 141659288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 141765855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 141859288Sjlemon kn->kn_data = 0; 141959288Sjlemon 142091372Salfred PIPE_UNLOCK(rpipe); 142159288Sjlemon return (kn->kn_data >= PIPE_BUF); 142259288Sjlemon} 1423