sys_pipe.c revision 77140
11541Srgrimes/* 21541Srgrimes * Copyright (c) 1996 John S. Dyson 31541Srgrimes * All rights reserved. 41541Srgrimes * 51541Srgrimes * Redistribution and use in source and binary forms, with or without 61541Srgrimes * modification, are permitted provided that the following conditions 71541Srgrimes * are met: 81541Srgrimes * 1. Redistributions of source code must retain the above copyright 91541Srgrimes * notice immediately at the beginning of the file, without modification, 101541Srgrimes * this list of conditions, and the following disclaimer. 111541Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 121541Srgrimes * notice, this list of conditions and the following disclaimer in the 131541Srgrimes * documentation and/or other materials provided with the distribution. 141541Srgrimes * 3. Absolutely no warranty of function or purpose is made by the author 151541Srgrimes * John S. Dyson. 161541Srgrimes * 4. Modifications may be freely made to this file if the above conditions 171541Srgrimes * are met. 181541Srgrimes * 191541Srgrimes * $FreeBSD: head/sys/kern/sys_pipe.c 77140 2001-05-24 18:06:22Z alfred $ 201541Srgrimes */ 211541Srgrimes 221541Srgrimes/* 231541Srgrimes * This file contains a high-performance replacement for the socket-based 241541Srgrimes * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 251541Srgrimes * all features of sockets, but does do everything that pipes normally 261541Srgrimes * do. 271541Srgrimes */ 281541Srgrimes 291541Srgrimes/* 301541Srgrimes * This code has two modes of operation, a small write mode and a large 311541Srgrimes * write mode. The small write mode acts like conventional pipes with 321541Srgrimes * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 331541Srgrimes * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 341541Srgrimes * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 351541Srgrimes * the receiving process can copy it directly from the pages in the sending 361541Srgrimes * process. 371541Srgrimes * 381541Srgrimes * If the sending process receives a signal, it is possible that it will 3950477Speter * go away, and certainly its address space can change, because control 401541Srgrimes * is returned back to the user-mode side. In that case, the pipe code 411541Srgrimes * arranges to copy the buffer supplied by the user process, to a pageable 421541Srgrimes * kernel buffer, and the receiving process will grab the data from the 431541Srgrimes * pageable kernel buffer. Since signals don't happen all that often, 441541Srgrimes * the copy operation is normally eliminated. 451541Srgrimes * 4631778Seivind * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4731778Seivind * happen for small transfers so that the system will not spend all of 481541Srgrimes * its time context switching. PIPE_SIZE is constrained by the 491541Srgrimes * amount of kernel virtual memory. 501541Srgrimes */ 5112221Sbde 5241059Speter#include <sys/param.h> 531541Srgrimes#include <sys/systm.h> 541541Srgrimes#include <sys/fcntl.h> 5531891Ssef#include <sys/file.h> 5661287Srwatson#include <sys/filedesc.h> 571541Srgrimes#include <sys/filio.h> 5830354Sphk#include <sys/lock.h> 5930354Sphk#include <sys/mutex.h> 6012221Sbde#include <sys/ttycom.h> 6111332Sswallace#include <sys/stat.h> 621541Srgrimes#include <sys/poll.h> 631541Srgrimes#include <sys/selinfo.h> 6412221Sbde#include <sys/signalvar.h> 651541Srgrimes#include <sys/sysproto.h> 6658717Sdillon#include <sys/pipe.h> 6758717Sdillon#include <sys/proc.h> 6858717Sdillon#include <sys/vnode.h> 691541Srgrimes#include <sys/uio.h> 701549Srgrimes#include <sys/event.h> 7130994Sphk 721541Srgrimes#include <vm/vm.h> 7311332Sswallace#include <vm/vm_param.h> 741541Srgrimes#include <vm/vm_object.h> 751541Srgrimes#include <vm/vm_kern.h> 7630994Sphk#include <vm/vm_extern.h> 771541Srgrimes#include <vm/pmap.h> 7830994Sphk#include <vm/vm_map.h> 791541Srgrimes#include <vm/vm_page.h> 801541Srgrimes#include <vm/vm_zone.h> 811541Srgrimes 821541Srgrimes/* 8312221Sbde * Use this define if you want to disable *fancy* VM things. Expect an 8411332Sswallace * approx 30% decrease in transfer rate. This could be useful for 8511332Sswallace * NetBSD or OpenBSD. 8611332Sswallace */ 8712221Sbde/* #define PIPE_NODIRECT */ 881541Srgrimes 891549Srgrimes/* 9030994Sphk * interfaces to the outside world 911541Srgrimes */ 9211332Sswallacestatic int pipe_read __P((struct file *fp, struct uio *uio, 931541Srgrimes struct ucred *cred, int flags, struct proc *p)); 941541Srgrimesstatic int pipe_write __P((struct file *fp, struct uio *uio, 9530994Sphk struct ucred *cred, int flags, struct proc *p)); 961541Srgrimesstatic int pipe_close __P((struct file *fp, struct proc *p)); 971541Srgrimesstatic int pipe_poll __P((struct file *fp, int events, struct ucred *cred, 981541Srgrimes struct proc *p)); 9958717Sdillonstatic int pipe_kqfilter __P((struct file *fp, struct knote *kn)); 10058717Sdillonstatic int pipe_stat __P((struct file *fp, struct stat *sb, struct proc *p)); 10158717Sdillonstatic int pipe_ioctl __P((struct file *fp, u_long cmd, caddr_t data, struct proc *p)); 10258717Sdillon 10358717Sdillonstatic struct fileops pipeops = { 10412221Sbde pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter, 10511332Sswallace pipe_stat, pipe_close 10611332Sswallace}; 10711332Sswallace 10812221Sbdestatic void filt_pipedetach(struct knote *kn); 10911332Sswallacestatic int filt_piperead(struct knote *kn, long hint); 1101549Srgrimesstatic int filt_pipewrite(struct knote *kn, long hint); 11130994Sphk 1121541Srgrimesstatic struct filterops pipe_rfiltops = 11311332Sswallace { 1, NULL, filt_pipedetach, filt_piperead }; 1141541Srgrimesstatic struct filterops pipe_wfiltops = 1151541Srgrimes { 1, NULL, filt_pipedetach, filt_pipewrite }; 11630994Sphk 1171541Srgrimes 1181541Srgrimes/* 1191541Srgrimes * Default pipe buffer size(s), this can be kind-of large now because pipe 12028401Speter * space is pageable. The pipe code will try to maintain locality of 12112221Sbde * reference for performance reasons, so small amounts of outstanding I/O 12228401Speter * will not wipe the cache. 12328401Speter */ 12428401Speter#define MINPIPESIZE (PIPE_SIZE/3) 12528401Speter#define MAXPIPESIZE (2*PIPE_SIZE/3) 12628401Speter 12728401Speter/* 12830994Sphk * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 12928401Speter * is there so that on large systems, we don't exhaust it. 13028401Speter */ 13128401Speter#define MAXPIPEKVA (8*1024*1024) 13241726Struckman 13341726Struckman/* 13441726Struckman * Limit for direct transfers, we cannot, of course limit 13528401Speter * the amount of kva for pipes in general though. 13628401Speter */ 13728401Speter#define LIMITPIPEKVA (16*1024*1024) 13841726Struckman 13928401Speter/* 14028401Speter * Limit the number of "big" pipes 14141726Struckman */ 14228401Speter#define LIMITBIGPIPES 32 14328401Speterstatic int nbigpipe; 14428401Speter 14528401Speterstatic int amountpipekva; 14628401Speter 14728401Speterstatic void pipeclose __P((struct pipe *cpipe)); 14828401Speterstatic void pipe_free_kmem __P((struct pipe *cpipe)); 14928401Speterstatic int pipe_create __P((struct pipe **cpipep)); 15028401Speterstatic __inline int pipelock __P((struct pipe *cpipe, int catch)); 15128401Speterstatic __inline void pipeunlock __P((struct pipe *cpipe)); 15228401Speterstatic __inline void pipeselwakeup __P((struct pipe *cpipe)); 15328401Speter#ifndef PIPE_NODIRECT 15428401Speterstatic int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio)); 15530994Sphkstatic void pipe_destroy_write_buffer __P((struct pipe *wpipe)); 15628401Speterstatic int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio)); 15728401Speterstatic void pipe_clone_write_buffer __P((struct pipe *wpipe)); 15828401Speter#endif 15941726Struckmanstatic int pipespace __P((struct pipe *cpipe, int size)); 16041726Struckman 16141726Struckmanstatic vm_zone_t pipe_zone; 16228401Speter 16328401Speter/* 16428401Speter * The pipe system call for the DTYPE_PIPE type of pipes 16541726Struckman */ 16628401Speter 16728401Speter/* ARGSUSED */ 16841726Struckmanint 16928401Speterpipe(p, uap) 17028401Speter struct proc *p; 17128401Speter struct pipe_args /* { 17228401Speter int dummy; 17358941Sdillon } */ *uap; 17458941Sdillon{ 17558941Sdillon struct filedesc *fdp = p->p_fd; 17628401Speter struct file *rf, *wf; 17711332Sswallace struct pipe *rpipe, *wpipe; 17811332Sswallace int fd, error; 17911332Sswallace 18012221Sbde if (pipe_zone == NULL) 18111332Sswallace pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4); 1821541Srgrimes 1831549Srgrimes rpipe = wpipe = NULL; 18430994Sphk if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 1851541Srgrimes pipeclose(rpipe); 18611332Sswallace pipeclose(wpipe); 1871541Srgrimes return (ENFILE); 1881541Srgrimes } 18930994Sphk 1901541Srgrimes rpipe->pipe_state |= PIPE_DIRECTOK; 19130994Sphk wpipe->pipe_state |= PIPE_DIRECTOK; 1921541Srgrimes 1931541Srgrimes error = falloc(p, &rf, &fd); 1941541Srgrimes if (error) { 1951541Srgrimes pipeclose(rpipe); 19658941Sdillon pipeclose(wpipe); 19758941Sdillon return (error); 19858941Sdillon } 19912221Sbde fhold(rf); 20011332Sswallace p->p_retval[0] = fd; 20111332Sswallace 20211332Sswallace /* 20312221Sbde * Warning: once we've gotten past allocation of the fd for the 20411332Sswallace * read-side, we can only drop the read side via fdrop() in order 2051541Srgrimes * to avoid races against processes which manage to dup() the read 2061549Srgrimes * side while we are blocked trying to allocate the write side. 20730994Sphk */ 2081541Srgrimes rf->f_flag = FREAD | FWRITE; 20911332Sswallace rf->f_type = DTYPE_PIPE; 2101541Srgrimes rf->f_data = (caddr_t)rpipe; 2111541Srgrimes rf->f_ops = &pipeops; 21230994Sphk error = falloc(p, &wf, &fd); 2131541Srgrimes if (error) { 2141541Srgrimes if (fdp->fd_ofiles[p->p_retval[0]] == rf) { 2151541Srgrimes fdp->fd_ofiles[p->p_retval[0]] = NULL; 21658941Sdillon fdrop(rf, p); 21758941Sdillon } 21858941Sdillon fdrop(rf, p); 21912221Sbde /* rpipe has been closed by fdrop(). */ 22011332Sswallace pipeclose(wpipe); 22111332Sswallace return (error); 22211332Sswallace } 22312221Sbde wf->f_flag = FREAD | FWRITE; 22411332Sswallace wf->f_type = DTYPE_PIPE; 2251541Srgrimes wf->f_data = (caddr_t)wpipe; 2261549Srgrimes wf->f_ops = &pipeops; 22730994Sphk p->p_retval[1] = fd; 2281541Srgrimes 22911332Sswallace rpipe->pipe_peer = wpipe; 2301541Srgrimes wpipe->pipe_peer = rpipe; 2311541Srgrimes fdrop(rf, p); 23230994Sphk 2331541Srgrimes return (0); 23430994Sphk} 2351541Srgrimes 2361541Srgrimes/* 2371541Srgrimes * Allocate kva for pipe circular buffer, the space is pageable 2381541Srgrimes * This routine will 'realloc' the size of a pipe safely, if it fails 2391541Srgrimes * it will retain the old buffer. 2401541Srgrimes * If it fails it will return ENOMEM. 2411541Srgrimes */ 2421541Srgrimesstatic int 2431541Srgrimespipespace(cpipe, size) 24412221Sbde struct pipe *cpipe; 24511332Sswallace int size; 24611332Sswallace{ 24711332Sswallace struct vm_object *object; 24812221Sbde caddr_t buffer; 24911332Sswallace int npages, error; 2501541Srgrimes 2511549Srgrimes npages = round_page(size)/PAGE_SIZE; 25230994Sphk /* 2531541Srgrimes * Create an object, I don't like the idea of paging to/from 25411332Sswallace * kernel_object. 2551541Srgrimes * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 2561541Srgrimes */ 25730994Sphk mtx_lock(&vm_mtx); 2581541Srgrimes object = vm_object_allocate(OBJT_DEFAULT, npages); 2591541Srgrimes buffer = (caddr_t) vm_map_min(kernel_map); 2601541Srgrimes 26112221Sbde /* 2621541Srgrimes * Insert the object into the kernel map, and allocate kva for it. 2631541Srgrimes * The map entry is, by default, pageable. 2641541Srgrimes * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 2651541Srgrimes */ 26612221Sbde error = vm_map_find(kernel_map, object, 0, 2671549Srgrimes (vm_offset_t *) &buffer, size, 1, 26830994Sphk VM_PROT_ALL, VM_PROT_ALL, 0); 2691541Srgrimes 2701541Srgrimes if (error != KERN_SUCCESS) { 2711541Srgrimes vm_object_deallocate(object); 2721541Srgrimes mtx_unlock(&vm_mtx); 2731541Srgrimes return (ENOMEM); 2741541Srgrimes } 2751541Srgrimes 2761541Srgrimes /* free old resources if we're resizing */ 27730994Sphk pipe_free_kmem(cpipe); 2781541Srgrimes mtx_unlock(&vm_mtx); 2791541Srgrimes cpipe->pipe_buffer.object = object; 2801541Srgrimes cpipe->pipe_buffer.buffer = buffer; 2811541Srgrimes cpipe->pipe_buffer.size = size; 2821541Srgrimes cpipe->pipe_buffer.in = 0; 2833098Sphk cpipe->pipe_buffer.out = 0; 2843098Sphk cpipe->pipe_buffer.cnt = 0; 2851541Srgrimes amountpipekva += cpipe->pipe_buffer.size; 28630994Sphk return (0); 2871541Srgrimes} 2881541Srgrimes 2891541Srgrimes/* 29012221Sbde * initialize and allocate VM and memory for pipe 29112207Sbde */ 29211332Sswallacestatic int 29311332Sswallacepipe_create(cpipep) 29412221Sbde struct pipe **cpipep; 29511332Sswallace{ 2961541Srgrimes struct pipe *cpipe; 2971549Srgrimes int error; 29830994Sphk 2991541Srgrimes *cpipep = zalloc(pipe_zone); 30012207Sbde if (*cpipep == NULL) 3011541Srgrimes return (ENOMEM); 3021541Srgrimes 3031541Srgrimes cpipe = *cpipep; 3041541Srgrimes 3051541Srgrimes /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ 3061541Srgrimes cpipe->pipe_buffer.object = NULL; 30730994Sphk#ifndef PIPE_NODIRECT 3081541Srgrimes cpipe->pipe_map.kva = NULL; 3091541Srgrimes#endif 3101541Srgrimes /* 3111541Srgrimes * protect so pipeclose() doesn't follow a junk pointer 3121541Srgrimes * if pipespace() fails. 3131541Srgrimes */ 3141541Srgrimes bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 3151541Srgrimes cpipe->pipe_state = 0; 3161541Srgrimes cpipe->pipe_peer = NULL; 3171541Srgrimes cpipe->pipe_busy = 0; 3181541Srgrimes 3191541Srgrimes#ifndef PIPE_NODIRECT 3201541Srgrimes /* 3211541Srgrimes * pipe data structure initializations to support direct pipe I/O 3221541Srgrimes */ 3231541Srgrimes cpipe->pipe_map.cnt = 0; 3241541Srgrimes cpipe->pipe_map.kva = 0; 32512221Sbde cpipe->pipe_map.pos = 0; 3261541Srgrimes cpipe->pipe_map.npages = 0; 3271541Srgrimes /* cpipe->pipe_map.ms[] = invalid */ 3281541Srgrimes#endif 3291541Srgrimes 33012221Sbde error = pipespace(cpipe, PIPE_SIZE); 3311541Srgrimes if (error) 3321549Srgrimes return (error); 33330994Sphk 3341541Srgrimes vfs_timestamp(&cpipe->pipe_ctime); 3351541Srgrimes cpipe->pipe_atime = cpipe->pipe_ctime; 3361541Srgrimes cpipe->pipe_mtime = cpipe->pipe_ctime; 3371541Srgrimes 3381541Srgrimes return (0); 3391541Srgrimes} 34020677Sbde 34120677Sbde 3421541Srgrimes/* 3431541Srgrimes * lock a pipe for I/O, blocking other access 3441541Srgrimes */ 34515985Sdgstatic __inline int 3461541Srgrimespipelock(cpipe, catch) 3471541Srgrimes struct pipe *cpipe; 3481541Srgrimes int catch; 3491541Srgrimes{ 3501541Srgrimes int error; 3511541Srgrimes 3521541Srgrimes while (cpipe->pipe_state & PIPE_LOCK) { 3531541Srgrimes cpipe->pipe_state |= PIPE_LWANT; 3541541Srgrimes error = tsleep(cpipe, catch ? (PRIBIO | PCATCH) : PRIBIO, 3551541Srgrimes "pipelk", 0); 3561541Srgrimes if (error != 0) 3571541Srgrimes return (error); 3581541Srgrimes } 3591541Srgrimes cpipe->pipe_state |= PIPE_LOCK; 3601541Srgrimes return (0); 3611541Srgrimes} 36224448Speter 36324448Speter/* 36424448Speter * unlock a pipe I/O lock 36524448Speter */ 36624448Speterstatic __inline void 36724448Speterpipeunlock(cpipe) 36824448Speter struct pipe *cpipe; 36924448Speter{ 37024448Speter 37124448Speter cpipe->pipe_state &= ~PIPE_LOCK; 37224448Speter if (cpipe->pipe_state & PIPE_LWANT) { 37324448Speter cpipe->pipe_state &= ~PIPE_LWANT; 37412221Sbde wakeup(cpipe); 3751541Srgrimes } 3761541Srgrimes} 3771541Srgrimes 37812221Sbdestatic __inline void 3791541Srgrimespipeselwakeup(cpipe) 3801549Srgrimes struct pipe *cpipe; 38130994Sphk{ 3821541Srgrimes 3831541Srgrimes if (cpipe->pipe_state & PIPE_SEL) { 3841541Srgrimes cpipe->pipe_state &= ~PIPE_SEL; 3851541Srgrimes selwakeup(&cpipe->pipe_sel); 3861541Srgrimes } 3871541Srgrimes if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 3881541Srgrimes pgsigio(cpipe->pipe_sigio, SIGIO, 0); 38924448Speter KNOTE(&cpipe->pipe_sel.si_note, 0); 39024448Speter} 39124448Speter 39224448Speter/* ARGSUSED */ 39324448Speterstatic int 39424448Speterpipe_read(fp, uio, cred, flags, p) 39524448Speter struct file *fp; 39624448Speter struct uio *uio; 39724448Speter struct ucred *cred; 39824448Speter struct proc *p; 39924448Speter int flags; 40024448Speter{ 40124448Speter struct pipe *rpipe = (struct pipe *) fp->f_data; 40224448Speter int error; 40324448Speter int nread = 0; 40424448Speter u_int size; 40524448Speter 4061541Srgrimes ++rpipe->pipe_busy; 40724448Speter error = pipelock(rpipe, 1); 40817994Sache if (error) 40924448Speter goto unlocked_error; 41017994Sache 41124448Speter while (uio->uio_resid) { 41224448Speter /* 41324448Speter * normal pipe buffer receive 41446155Sphk */ 4151541Srgrimes if (rpipe->pipe_buffer.cnt > 0) { 41624448Speter size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 41724448Speter if (size > rpipe->pipe_buffer.cnt) 4181541Srgrimes size = rpipe->pipe_buffer.cnt; 41924448Speter if (size > (u_int) uio->uio_resid) 42024448Speter size = (u_int) uio->uio_resid; 4211541Srgrimes 42217994Sache error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 42324448Speter size, uio); 42424448Speter if (error) 42517994Sache break; 42646155Sphk 42717994Sache rpipe->pipe_buffer.out += size; 42824448Speter if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 42924448Speter rpipe->pipe_buffer.out = 0; 43024448Speter 43124448Speter rpipe->pipe_buffer.cnt -= size; 43224448Speter 43324448Speter /* 43424448Speter * If there is no more to read in the pipe, reset 43524448Speter * its pointers to the beginning. This improves 43624448Speter * cache hit stats. 43724448Speter */ 43824448Speter if (rpipe->pipe_buffer.cnt == 0) { 43924448Speter rpipe->pipe_buffer.in = 0; 44024448Speter rpipe->pipe_buffer.out = 0; 44131891Ssef } 44224448Speter nread += size; 44324448Speter#ifndef PIPE_NODIRECT 44424448Speter /* 44524448Speter * Direct copy, bypassing a kernel buffer. 44624448Speter */ 44724448Speter } else if ((size = rpipe->pipe_map.cnt) && 44824448Speter (rpipe->pipe_state & PIPE_DIRECTW)) { 44924448Speter caddr_t va; 45024448Speter if (size > (u_int) uio->uio_resid) 45124448Speter size = (u_int) uio->uio_resid; 45231891Ssef 45324448Speter va = (caddr_t) rpipe->pipe_map.kva + 4548141Sache rpipe->pipe_map.pos; 45524448Speter error = uiomove(va, size, uio); 45624448Speter if (error) 45724448Speter break; 45824448Speter nread += size; 45924448Speter rpipe->pipe_map.pos += size; 46024448Speter rpipe->pipe_map.cnt -= size; 46124448Speter if (rpipe->pipe_map.cnt == 0) { 46224448Speter rpipe->pipe_state &= ~PIPE_DIRECTW; 46331891Ssef wakeup(rpipe); 46424448Speter } 4651541Srgrimes#endif 4661541Srgrimes } else { 4671541Srgrimes /* 46812221Sbde * detect EOF condition 4691541Srgrimes * read returns 0 on EOF, no need to set error 4701541Srgrimes */ 4711541Srgrimes if (rpipe->pipe_state & PIPE_EOF) 47212221Sbde break; 4731541Srgrimes 4741549Srgrimes /* 47530994Sphk * If the "write-side" has been blocked, wake it up now. 4761541Srgrimes */ 4771541Srgrimes if (rpipe->pipe_state & PIPE_WANTW) { 4781541Srgrimes rpipe->pipe_state &= ~PIPE_WANTW; 4791541Srgrimes wakeup(rpipe); 4801541Srgrimes } 4811541Srgrimes 4821541Srgrimes /* 4831541Srgrimes * Break if some data was read. 48424449Speter */ 48524449Speter if (nread > 0) 48646155Sphk break; 4871541Srgrimes 4881541Srgrimes /* 4891541Srgrimes * Unlock the pipe buffer for our remaining processing. We 4901541Srgrimes * will either break out with an error or we will sleep and 4911541Srgrimes * relock to loop. 49224449Speter */ 49324449Speter pipeunlock(rpipe); 49424449Speter 49531891Ssef /* 49624449Speter * Handle non-blocking mode operation or 4971541Srgrimes * wait for more data. 4981541Srgrimes */ 4991541Srgrimes if (fp->f_flag & FNONBLOCK) { 50012221Sbde error = EAGAIN; 5011541Srgrimes } else { 5021541Srgrimes rpipe->pipe_state |= PIPE_WANTR; 5031541Srgrimes if ((error = tsleep(rpipe, PRIBIO | PCATCH, 50412221Sbde "piperd", 0)) == 0) 5051541Srgrimes error = pipelock(rpipe, 1); 5061549Srgrimes } 50730994Sphk if (error) 5081541Srgrimes goto unlocked_error; 5091541Srgrimes } 5101541Srgrimes } 5111541Srgrimes pipeunlock(rpipe); 5121541Srgrimes 5131541Srgrimes if (error == 0) 5141541Srgrimes vfs_timestamp(&rpipe->pipe_atime); 51524448Speterunlocked_error: 51624448Speter --rpipe->pipe_busy; 51724448Speter 51824448Speter /* 51924448Speter * PIPE_WANT processing only makes sense if pipe_busy is 0. 52024448Speter */ 52124448Speter if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 52224448Speter rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 52324448Speter wakeup(rpipe); 52424448Speter } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 52524448Speter /* 5261541Srgrimes * Handle write blocking hysteresis. 52724448Speter */ 52817994Sache if (rpipe->pipe_state & PIPE_WANTW) { 52924448Speter rpipe->pipe_state &= ~PIPE_WANTW; 53017994Sache wakeup(rpipe); 53124448Speter } 53224448Speter } 53324448Speter 53446155Sphk if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 5351541Srgrimes pipeselwakeup(rpipe); 53624448Speter 53717994Sache return (error); 53824448Speter} 53924448Speter 54024448Speter#ifndef PIPE_NODIRECT 54124448Speter/* 54224448Speter * Map the sending processes' buffer into kernel space and wire it. 54324448Speter * This is similar to a physical write operation. 54424448Speter */ 54517994Sachestatic int 54646155Sphkpipe_build_write_buffer(wpipe, uio) 54724448Speter struct pipe *wpipe; 54824448Speter struct uio *uio; 54924448Speter{ 55024448Speter u_int size; 55124448Speter int i; 55224448Speter vm_offset_t addr, endaddr, paddr; 55324448Speter 55431891Ssef size = (u_int) uio->uio_iov->iov_len; 55524448Speter if (size > wpipe->pipe_buffer.size) 55624448Speter size = wpipe->pipe_buffer.size; 55724448Speter 55824448Speter endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 55924448Speter mtx_lock(&vm_mtx); 56024448Speter addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 56124448Speter for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 56224448Speter vm_page_t m; 56324448Speter 56424448Speter if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 56531891Ssef (paddr = pmap_kextract(addr)) == 0) { 56624448Speter int j; 5678141Sache 56824448Speter for (j = 0; j < i; j++) 56924448Speter vm_page_unwire(wpipe->pipe_map.ms[j], 1); 57024448Speter mtx_unlock(&vm_mtx); 57124448Speter return (EFAULT); 57224448Speter } 57324448Speter 57424448Speter m = PHYS_TO_VM_PAGE(paddr); 57531891Ssef vm_page_wire(m); 57624448Speter wpipe->pipe_map.ms[i] = m; 5771541Srgrimes } 5781541Srgrimes 5791541Srgrimes/* 58012221Sbde * set up the control block 5811541Srgrimes */ 5821541Srgrimes wpipe->pipe_map.npages = i; 5831541Srgrimes wpipe->pipe_map.pos = 58412221Sbde ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 5851541Srgrimes wpipe->pipe_map.cnt = size; 5861549Srgrimes 58730994Sphk/* 5881541Srgrimes * and map the buffer 5891541Srgrimes */ 5901541Srgrimes if (wpipe->pipe_map.kva == 0) { 5911541Srgrimes /* 5921541Srgrimes * We need to allocate space for an extra page because the 5931541Srgrimes * address range might (will) span pages at times. 5941541Srgrimes */ 5951541Srgrimes wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 59624449Speter wpipe->pipe_buffer.size + PAGE_SIZE); 59724449Speter amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE; 59846155Sphk } 5991541Srgrimes pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 60024449Speter wpipe->pipe_map.npages); 60124449Speter 60224449Speter mtx_unlock(&vm_mtx); 60331891Ssef/* 60424449Speter * and update the uio data 6051541Srgrimes */ 6061541Srgrimes 6071541Srgrimes uio->uio_iov->iov_len -= size; 60812221Sbde uio->uio_iov->iov_base += size; 6091541Srgrimes if (uio->uio_iov->iov_len == 0) 6101541Srgrimes uio->uio_iov++; 6111541Srgrimes uio->uio_resid -= size; 6121541Srgrimes uio->uio_offset += size; 61312221Sbde return (0); 6141541Srgrimes} 6151549Srgrimes 61630994Sphk/* 6171541Srgrimes * unmap and unwire the process buffer 6181541Srgrimes */ 6191541Srgrimesstatic void 6201541Srgrimespipe_destroy_write_buffer(wpipe) 6211541Srgrimes struct pipe *wpipe; 6221541Srgrimes{ 6231541Srgrimes int i; 62446155Sphk 6251541Srgrimes mtx_lock(&vm_mtx); 62612063Sdg if (wpipe->pipe_map.kva) { 62724447Speter pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 6281541Srgrimes 62924447Speter if (amountpipekva > MAXPIPEKVA) { 63024447Speter vm_offset_t kva = wpipe->pipe_map.kva; 63124447Speter wpipe->pipe_map.kva = 0; 63224447Speter kmem_free(kernel_map, kva, 6331541Srgrimes wpipe->pipe_buffer.size + PAGE_SIZE); 63424447Speter amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; 63524447Speter } 63624447Speter } 63724447Speter for (i = 0; i < wpipe->pipe_map.npages; i++) 63824447Speter vm_page_unwire(wpipe->pipe_map.ms[i], 1); 63924447Speter mtx_unlock(&vm_mtx); 64024447Speter} 64124447Speter 64224447Speter/* 64324447Speter * In the case of a signal, the writing process might go away. This 64424447Speter * code copies the data into the circular buffer so that the source 64524447Speter * pages can be freed without loss of data. 64624447Speter */ 64724447Speterstatic void 64831891Ssefpipe_clone_write_buffer(wpipe) 6491541Srgrimes struct pipe *wpipe; 6501541Srgrimes{ 6511541Srgrimes int size; 65212221Sbde int pos; 6531541Srgrimes 6549238Sache size = wpipe->pipe_map.cnt; 6559238Sache pos = wpipe->pipe_map.pos; 6561541Srgrimes bcopy((caddr_t) wpipe->pipe_map.kva + pos, 65712221Sbde (caddr_t) wpipe->pipe_buffer.buffer, size); 6581541Srgrimes 6591549Srgrimes wpipe->pipe_buffer.in = size; 66030994Sphk wpipe->pipe_buffer.out = 0; 6611541Srgrimes wpipe->pipe_buffer.cnt = size; 6621541Srgrimes wpipe->pipe_state &= ~PIPE_DIRECTW; 6631541Srgrimes 6641541Srgrimes pipe_destroy_write_buffer(wpipe); 6659238Sache} 6668135Sache 6671541Srgrimes/* 6689238Sache * This implements the pipe buffer write mechanism. Note that only 6699238Sache * a direct write OR a normal pipe write can be pending at any given time. 67043311Sdillon * If there are any characters in the pipe buffer, the direct write will 67143311Sdillon * be deferred until the receiving process grabs all of the bytes from 67243311Sdillon * the pipe buffer. Then the direct mapping write is set-up. 67346155Sphk */ 6748135Sachestatic int 6759238Sachepipe_direct_write(wpipe, uio) 67624450Speter struct pipe *wpipe; 67724450Speter struct uio *uio; 6789238Sache{ 67931891Ssef int error; 68024450Speter 68124450Speterretry: 6829238Sache while (wpipe->pipe_state & PIPE_DIRECTW) { 6839238Sache if (wpipe->pipe_state & PIPE_WANTR) { 6849238Sache wpipe->pipe_state &= ~PIPE_WANTR; 68531891Ssef wakeup(wpipe); 6868135Sache } 68724559Speter wpipe->pipe_state |= PIPE_WANTW; 68824559Speter error = tsleep(wpipe, PRIBIO | PCATCH, "pipdww", 0); 6898111Sache if (error) 69031891Ssef goto error1; 69124450Speter if (wpipe->pipe_state & PIPE_EOF) { 6928135Sache error = EPIPE; 6931541Srgrimes goto error1; 6941541Srgrimes } 69512221Sbde } 6961541Srgrimes wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 6979238Sache if (wpipe->pipe_buffer.cnt > 0) { 6989238Sache if (wpipe->pipe_state & PIPE_WANTR) { 6991541Srgrimes wpipe->pipe_state &= ~PIPE_WANTR; 70012221Sbde wakeup(wpipe); 7011541Srgrimes } 7021549Srgrimes 70330994Sphk wpipe->pipe_state |= PIPE_WANTW; 7041541Srgrimes error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwc", 0); 7051541Srgrimes if (error) 7061541Srgrimes goto error1; 7071541Srgrimes if (wpipe->pipe_state & PIPE_EOF) { 7089238Sache error = EPIPE; 7098135Sache goto error1; 7101541Srgrimes } 7119238Sache goto retry; 7129238Sache } 71343311Sdillon 71443311Sdillon wpipe->pipe_state |= PIPE_DIRECTW; 71543311Sdillon 71646155Sphk error = pipe_build_write_buffer(wpipe, uio); 7178135Sache if (error) { 7189238Sache wpipe->pipe_state &= ~PIPE_DIRECTW; 71924450Speter goto error1; 72024450Speter } 7219238Sache 72231891Ssef error = 0; 72324450Speter while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 72424450Speter if (wpipe->pipe_state & PIPE_EOF) { 7259238Sache pipelock(wpipe, 0); 72631891Ssef pipe_destroy_write_buffer(wpipe); 72724450Speter pipeunlock(wpipe); 72824559Speter pipeselwakeup(wpipe); 72924559Speter error = EPIPE; 7308111Sache goto error1; 73131891Ssef } 73224450Speter if (wpipe->pipe_state & PIPE_WANTR) { 7338135Sache wpipe->pipe_state &= ~PIPE_WANTR; 7341541Srgrimes wakeup(wpipe); 7351541Srgrimes } 73656115Speter pipeselwakeup(wpipe); 73756115Speter error = tsleep(wpipe, PRIBIO | PCATCH, "pipdwt", 0); 73856115Speter } 73956115Speter 74056115Speter pipelock(wpipe,0); 74124453Speter if (wpipe->pipe_state & PIPE_DIRECTW) { 74256115Speter /* 74356115Speter * this bit of trickery substitutes a kernel buffer for 74456115Speter * the process that might be going away. 74556115Speter */ 74656115Speter pipe_clone_write_buffer(wpipe); 74756115Speter } else { 74856115Speter pipe_destroy_write_buffer(wpipe); 74956115Speter } 75056115Speter pipeunlock(wpipe); 75156115Speter return (error); 75256115Speter 75356115Spetererror1: 75456115Speter wakeup(wpipe); 75556115Speter return (error); 75656115Speter} 75756115Speter#endif 75856115Speter 75956115Speterstatic int 76056115Speterpipe_write(fp, uio, cred, flags, p) 76156115Speter struct file *fp; 76256115Speter struct uio *uio; 76356115Speter struct ucred *cred; 76456115Speter struct proc *p; 76556115Speter int flags; 76656115Speter{ 76756115Speter int error = 0; 76856115Speter int orig_resid; 76956115Speter struct pipe *wpipe, *rpipe; 77056115Speter 77156115Speter rpipe = (struct pipe *) fp->f_data; 77256115Speter wpipe = rpipe->pipe_peer; 77356115Speter 77456115Speter /* 77556115Speter * detect loss of pipe read side, issue SIGPIPE if lost. 77656115Speter */ 77756115Speter if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 77856115Speter return (EPIPE); 77956115Speter } 78056115Speter 78156115Speter /* 78256115Speter * If it is advantageous to resize the pipe buffer, do 78356115Speter * so. 78456115Speter */ 78556115Speter if ((uio->uio_resid > PIPE_SIZE) && 78656115Speter (nbigpipe < LIMITBIGPIPES) && 78756115Speter (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 78856115Speter (wpipe->pipe_buffer.size <= PIPE_SIZE) && 78956115Speter (wpipe->pipe_buffer.cnt == 0)) { 79056115Speter 79156115Speter if ((error = pipelock(wpipe,1)) == 0) { 79256115Speter if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 79356115Speter nbigpipe++; 79456115Speter pipeunlock(wpipe); 79556115Speter } else { 79656115Speter return (error); 79756115Speter } 79856115Speter } 79956115Speter 80056115Speter KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone")); 80156115Speter 80256115Speter ++wpipe->pipe_busy; 80356115Speter orig_resid = uio->uio_resid; 80456115Speter while (uio->uio_resid) { 80556115Speter int space; 80656115Speter 80756115Speter#ifndef PIPE_NODIRECT 80856115Speter /* 80956115Speter * If the transfer is large, we can gain performance if 81056115Speter * we do process-to-process copies directly. 81156115Speter * If the write is non-blocking, we don't use the 81256115Speter * direct write mechanism. 81356115Speter * 81456115Speter * The direct write mechanism will detect the reader going 81556115Speter * away on us. 81656115Speter */ 81756115Speter if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 81856115Speter (fp->f_flag & FNONBLOCK) == 0 && 81956115Speter (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 82056115Speter (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 82156115Speter error = pipe_direct_write( wpipe, uio); 82256115Speter if (error) 82356115Speter break; 82456115Speter continue; 82556115Speter } 82656115Speter#endif 82756115Speter 82856115Speter /* 82956115Speter * Pipe buffered writes cannot be coincidental with 83056115Speter * direct writes. We wait until the currently executing 83156115Speter * direct write is completed before we start filling the 83256115Speter * pipe buffer. We break out if a signal occurs or the 83356115Speter * reader goes away. 83456115Speter */ 83556115Speter retrywrite: 83656115Speter while (wpipe->pipe_state & PIPE_DIRECTW) { 83756115Speter if (wpipe->pipe_state & PIPE_WANTR) { 83856115Speter wpipe->pipe_state &= ~PIPE_WANTR; 83956115Speter wakeup(wpipe); 84056115Speter } 84156115Speter error = tsleep(wpipe, PRIBIO | PCATCH, "pipbww", 0); 84256115Speter if (wpipe->pipe_state & PIPE_EOF) 84356115Speter break; 84456115Speter if (error) 84556115Speter break; 84656115Speter } 84756115Speter if (wpipe->pipe_state & PIPE_EOF) { 84856115Speter error = EPIPE; 84956115Speter break; 85056115Speter } 85156115Speter 85256115Speter space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 85356115Speter 85456115Speter /* Writes of size <= PIPE_BUF must be atomic. */ 85556115Speter if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 85656115Speter space = 0; 85756115Speter 85856115Speter if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 85956115Speter if ((error = pipelock(wpipe,1)) == 0) { 86056115Speter int size; /* Transfer size */ 86156115Speter int segsize; /* first segment to transfer */ 86256115Speter 86356115Speter /* 86456115Speter * It is possible for a direct write to 86556115Speter * slip in on us... handle it here... 86656115Speter */ 86756115Speter if (wpipe->pipe_state & PIPE_DIRECTW) { 86856115Speter pipeunlock(wpipe); 86956115Speter goto retrywrite; 87056115Speter } 87156115Speter /* 87256115Speter * If a process blocked in uiomove, our 87356115Speter * value for space might be bad. 87456115Speter * 87556115Speter * XXX will we be ok if the reader has gone 87656115Speter * away here? 87756115Speter */ 87856115Speter if (space > wpipe->pipe_buffer.size - 87956115Speter wpipe->pipe_buffer.cnt) { 88056115Speter pipeunlock(wpipe); 88156115Speter goto retrywrite; 88256115Speter } 88356115Speter 88456115Speter /* 88556115Speter * Transfer size is minimum of uio transfer 88656115Speter * and free space in pipe buffer. 88756115Speter */ 88856115Speter if (space > uio->uio_resid) 88956115Speter size = uio->uio_resid; 89056115Speter else 89156115Speter size = space; 89256115Speter /* 89356115Speter * First segment to transfer is minimum of 89456115Speter * transfer size and contiguous space in 89524453Speter * pipe buffer. If first segment to transfer 89624453Speter * is less than the transfer size, we've got 89724453Speter * a wraparound in the buffer. 89824453Speter */ 89924453Speter segsize = wpipe->pipe_buffer.size - 90024453Speter wpipe->pipe_buffer.in; 90130994Sphk if (segsize > size) 90224453Speter segsize = size; 90324453Speter 90424453Speter /* Transfer first segment */ 90524453Speter 90624453Speter error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 90724453Speter segsize, uio); 90824453Speter 90924453Speter if (error == 0 && segsize < size) { 91024453Speter /* 91124453Speter * Transfer remaining part now, to 91224453Speter * support atomic writes. Wraparound 91360216Speter * happened. 91424453Speter */ 91524453Speter if (wpipe->pipe_buffer.in + segsize != 91624453Speter wpipe->pipe_buffer.size) 9171541Srgrimes panic("Expected pipe buffer wraparound disappeared"); 9181541Srgrimes 9191541Srgrimes error = uiomove(&wpipe->pipe_buffer.buffer[0], 9201549Srgrimes size - segsize, uio); 9211541Srgrimes } 9221541Srgrimes if (error == 0) { 9231541Srgrimes wpipe->pipe_buffer.in += size; 9241541Srgrimes if (wpipe->pipe_buffer.in >= 9251541Srgrimes wpipe->pipe_buffer.size) { 9261541Srgrimes if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size) 9271541Srgrimes panic("Expected wraparound bad"); 9281541Srgrimes wpipe->pipe_buffer.in = size - segsize; 9291541Srgrimes } 9301541Srgrimes 9311541Srgrimes wpipe->pipe_buffer.cnt += size; 9321541Srgrimes if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size) 9331541Srgrimes panic("Pipe buffer overflow"); 9341541Srgrimes 93561287Srwatson } 93661287Srwatson pipeunlock(wpipe); 93761287Srwatson } 93861287Srwatson if (error) 93961287Srwatson break; 9401541Srgrimes 9411541Srgrimes } else { 9421541Srgrimes /* 9431541Srgrimes * If the "read-side" has been blocked, wake it up now. 9441541Srgrimes */ 9451541Srgrimes if (wpipe->pipe_state & PIPE_WANTR) { 9461549Srgrimes wpipe->pipe_state &= ~PIPE_WANTR; 94746112Sphk wakeup(wpipe); 94846112Sphk } 94946112Sphk 95046155Sphk /* 95146112Sphk * don't block on non-blocking I/O 95246112Sphk */ 95346112Sphk if (fp->f_flag & FNONBLOCK) { 95446155Sphk error = EAGAIN; 9551541Srgrimes break; 95646155Sphk } 95746155Sphk 9581541Srgrimes /* 95961282Srwatson * We have no more space and have something to offer, 96061282Srwatson * wake up select/poll. 96146155Sphk */ 96246155Sphk pipeselwakeup(wpipe); 96346155Sphk 9641541Srgrimes wpipe->pipe_state |= PIPE_WANTW; 96546155Sphk error = tsleep(wpipe, PRIBIO | PCATCH, "pipewr", 0); 96646155Sphk if (error != 0) 96746155Sphk break; 96846155Sphk /* 96946155Sphk * If read side wants to go away, we just issue a signal 97046155Sphk * to ourselves. 97146155Sphk */ 97246155Sphk if (wpipe->pipe_state & PIPE_EOF) { 97346155Sphk error = EPIPE; 9741541Srgrimes break; 9751541Srgrimes } 9761541Srgrimes } 97753518Sphk } 97853518Sphk 97953518Sphk --wpipe->pipe_busy; 98053518Sphk if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 98153518Sphk wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 98253518Sphk wakeup(wpipe); 98353518Sphk } else if (wpipe->pipe_buffer.cnt > 0) { 98453518Sphk /* 98553518Sphk * If we have put any characters in the buffer, we wake up 98653518Sphk * the reader. 98753518Sphk */ 98853518Sphk if (wpipe->pipe_state & PIPE_WANTR) { 98953518Sphk wpipe->pipe_state &= ~PIPE_WANTR; 99053518Sphk wakeup(wpipe); 99153518Sphk } 99253518Sphk } 99353518Sphk 99453518Sphk /* 99553518Sphk * Don't return EPIPE if I/O was successful 99653518Sphk */ 99753518Sphk if ((wpipe->pipe_buffer.cnt == 0) && 99853518Sphk (uio->uio_resid == 0) && 99953518Sphk (error == EPIPE)) 100053518Sphk error = 0; 100153518Sphk 10021541Srgrimes if (error == 0) 10031541Srgrimes vfs_timestamp(&wpipe->pipe_mtime); 10041541Srgrimes 10051541Srgrimes /* 10061541Srgrimes * We have something to offer, 10071541Srgrimes * wake up select/poll. 10081541Srgrimes */ 10091541Srgrimes if (wpipe->pipe_buffer.cnt) 10101541Srgrimes pipeselwakeup(wpipe); 10111541Srgrimes 10121541Srgrimes return (error); 10131541Srgrimes} 10141541Srgrimes 10151541Srgrimes/* 10161541Srgrimes * we implement a very minimal set of ioctls for compatibility with sockets. 10171541Srgrimes */ 10181541Srgrimesint 10191549Srgrimespipe_ioctl(fp, cmd, data, p) 10201541Srgrimes struct file *fp; 10211541Srgrimes u_long cmd; 10221541Srgrimes caddr_t data; 10231541Srgrimes struct proc *p; 10241541Srgrimes{ 10251541Srgrimes struct pipe *mpipe = (struct pipe *)fp->f_data; 10261541Srgrimes 10271541Srgrimes switch (cmd) { 10281541Srgrimes 10291541Srgrimes case FIONBIO: 10301541Srgrimes return (0); 10311541Srgrimes 10321541Srgrimes case FIOASYNC: 10331541Srgrimes if (*(int *)data) { 10341541Srgrimes mpipe->pipe_state |= PIPE_ASYNC; 10351541Srgrimes } else { 10361541Srgrimes mpipe->pipe_state &= ~PIPE_ASYNC; 10371541Srgrimes } 10381541Srgrimes return (0); 10391541Srgrimes 10401541Srgrimes case FIONREAD: 10411541Srgrimes if (mpipe->pipe_state & PIPE_DIRECTW) 10421541Srgrimes *(int *)data = mpipe->pipe_map.cnt; 10431541Srgrimes else 10441541Srgrimes *(int *)data = mpipe->pipe_buffer.cnt; 10451541Srgrimes return (0); 10461541Srgrimes 10471541Srgrimes case FIOSETOWN: 10481541Srgrimes return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 10491541Srgrimes 10501541Srgrimes case FIOGETOWN: 10511541Srgrimes *(int *)data = fgetown(mpipe->pipe_sigio); 10521541Srgrimes return (0); 10531541Srgrimes 10541541Srgrimes /* This is deprecated, FIOSETOWN should be used instead. */ 10551541Srgrimes case TIOCSPGRP: 10561541Srgrimes return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 10571541Srgrimes 10581541Srgrimes /* This is deprecated, FIOGETOWN should be used instead. */ 10591541Srgrimes case TIOCGPGRP: 10601541Srgrimes *(int *)data = -fgetown(mpipe->pipe_sigio); 10611541Srgrimes return (0); 10621541Srgrimes 106312221Sbde } 10641541Srgrimes return (ENOTTY); 10651541Srgrimes} 10661541Srgrimes 10671541Srgrimesint 106812221Sbdepipe_poll(fp, events, cred, p) 10691541Srgrimes struct file *fp; 10701549Srgrimes int events; 107130994Sphk struct ucred *cred; 10721541Srgrimes struct proc *p; 10731541Srgrimes{ 10741541Srgrimes struct pipe *rpipe = (struct pipe *)fp->f_data; 10751541Srgrimes struct pipe *wpipe; 107623358Sache int revents = 0; 107723359Sache 10781541Srgrimes wpipe = rpipe->pipe_peer; 10791541Srgrimes if (events & (POLLIN | POLLRDNORM)) 10801541Srgrimes if ((rpipe->pipe_state & PIPE_DIRECTW) || 10811541Srgrimes (rpipe->pipe_buffer.cnt > 0) || 10821541Srgrimes (rpipe->pipe_state & PIPE_EOF)) 10831541Srgrimes revents |= events & (POLLIN | POLLRDNORM); 10841541Srgrimes 108512221Sbde if (events & (POLLOUT | POLLWRNORM)) 10861541Srgrimes if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 10871541Srgrimes (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 10881541Srgrimes (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 108912221Sbde revents |= events & (POLLOUT | POLLWRNORM); 10901541Srgrimes 10911549Srgrimes if ((rpipe->pipe_state & PIPE_EOF) || 109230994Sphk (wpipe == NULL) || 10931541Srgrimes (wpipe->pipe_state & PIPE_EOF)) 10941541Srgrimes revents |= POLLHUP; 10951541Srgrimes 10961541Srgrimes if (revents == 0) { 109723330Sache if (events & (POLLIN | POLLRDNORM)) { 10981541Srgrimes selrecord(p, &rpipe->pipe_sel); 109946155Sphk rpipe->pipe_state |= PIPE_SEL; 11001541Srgrimes } 110122522Sdavidn 110236845Sdfr if (events & (POLLOUT | POLLWRNORM)) { 11031541Srgrimes selrecord(p, &wpipe->pipe_sel); 11041541Srgrimes wpipe->pipe_state |= PIPE_SEL; 110522522Sdavidn } 110622522Sdavidn } 110723330Sache 11081541Srgrimes return (revents); 11091541Srgrimes} 111031891Ssef 111131891Ssefstatic int 111231891Ssefpipe_stat(fp, ub, p) 111355338Sphk struct file *fp; 111431891Ssef struct stat *ub; 111531891Ssef struct proc *p; 111655707Ssef{ 111731891Ssef struct pipe *pipe = (struct pipe *)fp->f_data; 111831891Ssef 1119 bzero((caddr_t)ub, sizeof(*ub)); 1120 ub->st_mode = S_IFIFO; 1121 ub->st_blksize = pipe->pipe_buffer.size; 1122 ub->st_size = pipe->pipe_buffer.cnt; 1123 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1124 ub->st_atimespec = pipe->pipe_atime; 1125 ub->st_mtimespec = pipe->pipe_mtime; 1126 ub->st_ctimespec = pipe->pipe_ctime; 1127 ub->st_uid = fp->f_cred->cr_uid; 1128 ub->st_gid = fp->f_cred->cr_gid; 1129 /* 1130 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1131 * XXX (st_dev, st_ino) should be unique. 1132 */ 1133 return (0); 1134} 1135 1136/* ARGSUSED */ 1137static int 1138pipe_close(fp, p) 1139 struct file *fp; 1140 struct proc *p; 1141{ 1142 struct pipe *cpipe = (struct pipe *)fp->f_data; 1143 1144 fp->f_ops = &badfileops; 1145 fp->f_data = NULL; 1146 funsetown(cpipe->pipe_sigio); 1147 pipeclose(cpipe); 1148 return (0); 1149} 1150 1151static void 1152pipe_free_kmem(cpipe) 1153 struct pipe *cpipe; 1154{ 1155 1156 mtx_assert(&vm_mtx, MA_OWNED); 1157 if (cpipe->pipe_buffer.buffer != NULL) { 1158 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1159 --nbigpipe; 1160 amountpipekva -= cpipe->pipe_buffer.size; 1161 kmem_free(kernel_map, 1162 (vm_offset_t)cpipe->pipe_buffer.buffer, 1163 cpipe->pipe_buffer.size); 1164 cpipe->pipe_buffer.buffer = NULL; 1165 } 1166#ifndef PIPE_NODIRECT 1167 if (cpipe->pipe_map.kva != NULL) { 1168 amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE; 1169 kmem_free(kernel_map, 1170 cpipe->pipe_map.kva, 1171 cpipe->pipe_buffer.size + PAGE_SIZE); 1172 cpipe->pipe_map.cnt = 0; 1173 cpipe->pipe_map.kva = 0; 1174 cpipe->pipe_map.pos = 0; 1175 cpipe->pipe_map.npages = 0; 1176 } 1177#endif 1178} 1179 1180/* 1181 * shutdown the pipe 1182 */ 1183static void 1184pipeclose(cpipe) 1185 struct pipe *cpipe; 1186{ 1187 struct pipe *ppipe; 1188 1189 if (cpipe) { 1190 1191 pipeselwakeup(cpipe); 1192 1193 /* 1194 * If the other side is blocked, wake it up saying that 1195 * we want to close it down. 1196 */ 1197 while (cpipe->pipe_busy) { 1198 wakeup(cpipe); 1199 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 1200 tsleep(cpipe, PRIBIO, "pipecl", 0); 1201 } 1202 1203 /* 1204 * Disconnect from peer 1205 */ 1206 if ((ppipe = cpipe->pipe_peer) != NULL) { 1207 pipeselwakeup(ppipe); 1208 1209 ppipe->pipe_state |= PIPE_EOF; 1210 wakeup(ppipe); 1211 ppipe->pipe_peer = NULL; 1212 } 1213 /* 1214 * free resources 1215 */ 1216 mtx_lock(&vm_mtx); 1217 pipe_free_kmem(cpipe); 1218 /* XXX: erm, doesn't zalloc already have its own locks and 1219 * not need the giant vm lock? 1220 */ 1221 zfree(pipe_zone, cpipe); 1222 mtx_unlock(&vm_mtx); 1223 } 1224} 1225 1226/*ARGSUSED*/ 1227static int 1228pipe_kqfilter(struct file *fp, struct knote *kn) 1229{ 1230 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1231 1232 switch (kn->kn_filter) { 1233 case EVFILT_READ: 1234 kn->kn_fop = &pipe_rfiltops; 1235 break; 1236 case EVFILT_WRITE: 1237 kn->kn_fop = &pipe_wfiltops; 1238 break; 1239 default: 1240 return (1); 1241 } 1242 1243 SLIST_INSERT_HEAD(&rpipe->pipe_sel.si_note, kn, kn_selnext); 1244 return (0); 1245} 1246 1247static void 1248filt_pipedetach(struct knote *kn) 1249{ 1250 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1251 1252 SLIST_REMOVE(&rpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1253} 1254 1255/*ARGSUSED*/ 1256static int 1257filt_piperead(struct knote *kn, long hint) 1258{ 1259 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1260 struct pipe *wpipe = rpipe->pipe_peer; 1261 1262 kn->kn_data = rpipe->pipe_buffer.cnt; 1263 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1264 kn->kn_data = rpipe->pipe_map.cnt; 1265 1266 if ((rpipe->pipe_state & PIPE_EOF) || 1267 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1268 kn->kn_flags |= EV_EOF; 1269 return (1); 1270 } 1271 return (kn->kn_data > 0); 1272} 1273 1274/*ARGSUSED*/ 1275static int 1276filt_pipewrite(struct knote *kn, long hint) 1277{ 1278 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1279 struct pipe *wpipe = rpipe->pipe_peer; 1280 1281 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1282 kn->kn_data = 0; 1283 kn->kn_flags |= EV_EOF; 1284 return (1); 1285 } 1286 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1287 if (wpipe->pipe_state & PIPE_DIRECTW) 1288 kn->kn_data = 0; 1289 1290 return (kn->kn_data >= PIPE_BUF); 1291} 1292