sys_pipe.c revision 226042
1139804Simp/*- 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson */ 1913675Sdyson 2013675Sdyson/* 2113675Sdyson * This file contains a high-performance replacement for the socket-based 2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2313675Sdyson * all features of sockets, but does do everything that pipes normally 2413675Sdyson * do. 2513675Sdyson */ 2613675Sdyson 2713907Sdyson/* 2813907Sdyson * This code has two modes of operation, a small write mode and a large 2913907Sdyson * write mode. The small write mode acts like conventional pipes with 3013907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3113907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 32219801Salc * and PIPE_SIZE in size, the sending process pins the underlying pages in 33219801Salc * memory, and the receiving process copies directly from these pinned pages 34219801Salc * in the sending process. 3513907Sdyson * 3613907Sdyson * If the sending process receives a signal, it is possible that it will 3713913Sdyson * go away, and certainly its address space can change, because control 3813907Sdyson * is returned back to the user-mode side. In that case, the pipe code 3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4013907Sdyson * kernel buffer, and the receiving process will grab the data from the 4113907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4213907Sdyson * the copy operation is normally eliminated. 4313907Sdyson * 4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4513907Sdyson * happen for small transfers so that the system will not spend all of 46118764Ssilby * its time context switching. 47117325Ssilby * 48118764Ssilby * In order to limit the resource use of pipes, two sysctls exist: 49117325Ssilby * 50118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 51133790Ssilby * address space available to us in pipe_map. This value is normally 52133790Ssilby * autotuned, but may also be loader tuned. 53117325Ssilby * 54133790Ssilby * kern.ipc.pipekva - This read-only sysctl tracks the current amount of 55133790Ssilby * memory in use by pipes. 56117325Ssilby * 57133790Ssilby * Based on how large pipekva is relative to maxpipekva, the following 58133790Ssilby * will happen: 59117325Ssilby * 60133790Ssilby * 0% - 50%: 61133790Ssilby * New pipes are given 16K of memory backing, pipes may dynamically 62133790Ssilby * grow to as large as 64K where needed. 63133790Ssilby * 50% - 75%: 64133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 65133790Ssilby * existing pipes may NOT grow. 66133790Ssilby * 75% - 100%: 67133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 68133790Ssilby * existing pipes will be shrunk down to 4K whenever possible. 69133049Ssilby * 70133790Ssilby * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0. If 71133790Ssilby * that is set, the only resize that will occur is the 0 -> SMALL_PIPE_SIZE 72133790Ssilby * resize which MUST occur for reverse-direction pipes when they are 73133790Ssilby * first used. 74133790Ssilby * 75133790Ssilby * Additional information about the current state of pipes may be obtained 76133790Ssilby * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail, 77133790Ssilby * and kern.ipc.piperesizefail. 78133790Ssilby * 79133049Ssilby * Locking rules: There are two locks present here: A mutex, used via 80133049Ssilby * PIPE_LOCK, and a flag, used via pipelock(). All locking is done via 81133049Ssilby * the flag, as mutexes can not persist over uiomove. The mutex 82133049Ssilby * exists only to guard access to the flag, and is not in itself a 83133790Ssilby * locking mechanism. Also note that there is only a single mutex for 84133790Ssilby * both directions of a pipe. 85133049Ssilby * 86133049Ssilby * As pipelock() may have to sleep before it can acquire the flag, it 87133049Ssilby * is important to reread all data after a call to pipelock(); everything 88133049Ssilby * in the structure may have changed. 8913907Sdyson */ 9013907Sdyson 91116182Sobrien#include <sys/cdefs.h> 92116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 226042 2011-10-05 16:56:06Z kib $"); 93116182Sobrien 9413675Sdyson#include <sys/param.h> 9513675Sdyson#include <sys/systm.h> 96226042Skib#include <sys/conf.h> 9724131Sbde#include <sys/fcntl.h> 9813675Sdyson#include <sys/file.h> 9913675Sdyson#include <sys/filedesc.h> 10024206Sbde#include <sys/filio.h> 10191372Salfred#include <sys/kernel.h> 10276166Smarkm#include <sys/lock.h> 10376827Salfred#include <sys/mutex.h> 10424206Sbde#include <sys/ttycom.h> 10513675Sdyson#include <sys/stat.h> 10691968Salfred#include <sys/malloc.h> 10729356Speter#include <sys/poll.h> 10870834Swollman#include <sys/selinfo.h> 10913675Sdyson#include <sys/signalvar.h> 110184849Sed#include <sys/syscallsubr.h> 111117325Ssilby#include <sys/sysctl.h> 11213675Sdyson#include <sys/sysproto.h> 11313675Sdyson#include <sys/pipe.h> 11476166Smarkm#include <sys/proc.h> 11555112Sbde#include <sys/vnode.h> 11634924Sbde#include <sys/uio.h> 11759288Sjlemon#include <sys/event.h> 11813675Sdyson 119163606Srwatson#include <security/mac/mac_framework.h> 120163606Srwatson 12113675Sdyson#include <vm/vm.h> 12213675Sdyson#include <vm/vm_param.h> 12313675Sdyson#include <vm/vm_object.h> 12413675Sdyson#include <vm/vm_kern.h> 12513675Sdyson#include <vm/vm_extern.h> 12613675Sdyson#include <vm/pmap.h> 12713675Sdyson#include <vm/vm_map.h> 12813907Sdyson#include <vm/vm_page.h> 12992751Sjeff#include <vm/uma.h> 13013675Sdyson 13114037Sdyson/* 13214037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 13314037Sdyson * approx 30% decrease in transfer rate. This could be useful for 13414037Sdyson * NetBSD or OpenBSD. 13514037Sdyson */ 13614037Sdyson/* #define PIPE_NODIRECT */ 13714037Sdyson 13814037Sdyson/* 13914037Sdyson * interfaces to the outside world 14014037Sdyson */ 141108255Sphkstatic fo_rdwr_t pipe_read; 142108255Sphkstatic fo_rdwr_t pipe_write; 143175140Sjhbstatic fo_truncate_t pipe_truncate; 144108255Sphkstatic fo_ioctl_t pipe_ioctl; 145108255Sphkstatic fo_poll_t pipe_poll; 146108255Sphkstatic fo_kqfilter_t pipe_kqfilter; 147108255Sphkstatic fo_stat_t pipe_stat; 148108255Sphkstatic fo_close_t pipe_close; 14913675Sdyson 15072521Sjlemonstatic struct fileops pipeops = { 151116546Sphk .fo_read = pipe_read, 152116546Sphk .fo_write = pipe_write, 153175140Sjhb .fo_truncate = pipe_truncate, 154116546Sphk .fo_ioctl = pipe_ioctl, 155116546Sphk .fo_poll = pipe_poll, 156116546Sphk .fo_kqfilter = pipe_kqfilter, 157116546Sphk .fo_stat = pipe_stat, 158116546Sphk .fo_close = pipe_close, 159224914Skib .fo_chmod = invfo_chmod, 160224914Skib .fo_chown = invfo_chown, 161116546Sphk .fo_flags = DFLAG_PASSABLE 16272521Sjlemon}; 16313675Sdyson 16459288Sjlemonstatic void filt_pipedetach(struct knote *kn); 16559288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 16659288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 16759288Sjlemon 168197134Srwatsonstatic struct filterops pipe_rfiltops = { 169197134Srwatson .f_isfd = 1, 170197134Srwatson .f_detach = filt_pipedetach, 171197134Srwatson .f_event = filt_piperead 172197134Srwatson}; 173197134Srwatsonstatic struct filterops pipe_wfiltops = { 174197134Srwatson .f_isfd = 1, 175197134Srwatson .f_detach = filt_pipedetach, 176197134Srwatson .f_event = filt_pipewrite 177197134Srwatson}; 17859288Sjlemon 17913675Sdyson/* 18013675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 18113675Sdyson * space is pageable. The pipe code will try to maintain locality of 18213675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 18313675Sdyson * will not wipe the cache. 18413675Sdyson */ 18513907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 18613907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 18713675Sdyson 188189649Sjhbstatic long amountpipekva; 189133790Ssilbystatic int pipefragretry; 190133790Ssilbystatic int pipeallocfail; 191133790Ssilbystatic int piperesizefail; 192133790Ssilbystatic int piperesizeallowed = 1; 19313907Sdyson 194189649SjhbSYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN, 195117325Ssilby &maxpipekva, 0, "Pipe KVA limit"); 196189649SjhbSYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 197117325Ssilby &amountpipekva, 0, "Pipe KVA usage"); 198133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD, 199133790Ssilby &pipefragretry, 0, "Pipe allocation retries due to fragmentation"); 200133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD, 201133790Ssilby &pipeallocfail, 0, "Pipe allocation failures"); 202133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD, 203133790Ssilby &piperesizefail, 0, "Pipe resize failures"); 204133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, 205133790Ssilby &piperesizeallowed, 0, "Pipe resizing allowed"); 206117325Ssilby 20791413Salfredstatic void pipeinit(void *dummy __unused); 20891413Salfredstatic void pipeclose(struct pipe *cpipe); 20991413Salfredstatic void pipe_free_kmem(struct pipe *cpipe); 210133790Ssilbystatic int pipe_create(struct pipe *pipe, int backing); 21191413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch); 21291413Salfredstatic __inline void pipeunlock(struct pipe *cpipe); 21391413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe); 21414037Sdyson#ifndef PIPE_NODIRECT 21591413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 21691413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 21791413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 21891413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe); 21914037Sdyson#endif 22091413Salfredstatic int pipespace(struct pipe *cpipe, int size); 221132579Srwatsonstatic int pipespace_new(struct pipe *cpipe, int size); 22213675Sdyson 223132987Sgreenstatic int pipe_zone_ctor(void *mem, int size, void *arg, int flags); 224132987Sgreenstatic int pipe_zone_init(void *mem, int size, int flags); 225125293Srwatsonstatic void pipe_zone_fini(void *mem, int size); 226125293Srwatson 22792751Sjeffstatic uma_zone_t pipe_zone; 228226042Skibstatic struct unrhdr *pipeino_unr; 229226042Skibstatic dev_t pipedev_ino; 23027899Sdyson 23191372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 23291372Salfred 23391372Salfredstatic void 23491372Salfredpipeinit(void *dummy __unused) 23591372Salfred{ 236118880Salc 237170022Srwatson pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair), 238170022Srwatson pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, 239125293Srwatson UMA_ALIGN_PTR, 0); 240118880Salc KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 241226042Skib pipeino_unr = new_unrhdr(1, INT32_MAX, NULL); 242226042Skib KASSERT(pipeino_unr != NULL, ("pipe fake inodes not initialized")); 243226042Skib pipedev_ino = devfs_alloc_cdp_inode(); 244226042Skib KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized")); 24591372Salfred} 24691372Salfred 247132987Sgreenstatic int 248132987Sgreenpipe_zone_ctor(void *mem, int size, void *arg, int flags) 249125293Srwatson{ 250125293Srwatson struct pipepair *pp; 251125293Srwatson struct pipe *rpipe, *wpipe; 252125293Srwatson 253125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); 254125293Srwatson 255125293Srwatson pp = (struct pipepair *)mem; 256125293Srwatson 257125293Srwatson /* 258125293Srwatson * We zero both pipe endpoints to make sure all the kmem pointers 259125293Srwatson * are NULL, flag fields are zero'd, etc. We timestamp both 260125293Srwatson * endpoints with the same time. 261125293Srwatson */ 262125293Srwatson rpipe = &pp->pp_rpipe; 263125293Srwatson bzero(rpipe, sizeof(*rpipe)); 264125293Srwatson vfs_timestamp(&rpipe->pipe_ctime); 265125293Srwatson rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; 266125293Srwatson 267125293Srwatson wpipe = &pp->pp_wpipe; 268125293Srwatson bzero(wpipe, sizeof(*wpipe)); 269125293Srwatson wpipe->pipe_ctime = rpipe->pipe_ctime; 270125293Srwatson wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; 271125293Srwatson 272125293Srwatson rpipe->pipe_peer = wpipe; 273125293Srwatson rpipe->pipe_pair = pp; 274125293Srwatson wpipe->pipe_peer = rpipe; 275125293Srwatson wpipe->pipe_pair = pp; 276125293Srwatson 277125293Srwatson /* 278125293Srwatson * Mark both endpoints as present; they will later get free'd 279125293Srwatson * one at a time. When both are free'd, then the whole pair 280125293Srwatson * is released. 281125293Srwatson */ 282179243Skib rpipe->pipe_present = PIPE_ACTIVE; 283179243Skib wpipe->pipe_present = PIPE_ACTIVE; 284125293Srwatson 285125293Srwatson /* 286125293Srwatson * Eventually, the MAC Framework may initialize the label 287125293Srwatson * in ctor or init, but for now we do it elswhere to avoid 288125293Srwatson * blocking in ctor or init. 289125293Srwatson */ 290125293Srwatson pp->pp_label = NULL; 291125293Srwatson 292132987Sgreen return (0); 293125293Srwatson} 294125293Srwatson 295132987Sgreenstatic int 296132987Sgreenpipe_zone_init(void *mem, int size, int flags) 297125293Srwatson{ 298125293Srwatson struct pipepair *pp; 299125293Srwatson 300125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); 301125293Srwatson 302125293Srwatson pp = (struct pipepair *)mem; 303125293Srwatson 304125293Srwatson mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 305132987Sgreen return (0); 306125293Srwatson} 307125293Srwatson 308125293Srwatsonstatic void 309125293Srwatsonpipe_zone_fini(void *mem, int size) 310125293Srwatson{ 311125293Srwatson struct pipepair *pp; 312125293Srwatson 313125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); 314125293Srwatson 315125293Srwatson pp = (struct pipepair *)mem; 316125293Srwatson 317125293Srwatson mtx_destroy(&pp->pp_mtx); 318125293Srwatson} 319125293Srwatson 32013675Sdyson/* 321167232Srwatson * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let 322167232Srwatson * the zone pick up the pieces via pipeclose(). 32313675Sdyson */ 32413675Sdysonint 325184849Sedkern_pipe(struct thread *td, int fildes[2]) 32613675Sdyson{ 32783366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 32813675Sdyson struct file *rf, *wf; 329125293Srwatson struct pipepair *pp; 33013675Sdyson struct pipe *rpipe, *wpipe; 33113675Sdyson int fd, error; 33227899Sdyson 333125293Srwatson pp = uma_zalloc(pipe_zone, M_WAITOK); 334125293Srwatson#ifdef MAC 335125293Srwatson /* 336126249Srwatson * The MAC label is shared between the connected endpoints. As a 337172930Srwatson * result mac_pipe_init() and mac_pipe_create() are called once 338126249Srwatson * for the pair, and not on the endpoints. 339125293Srwatson */ 340172930Srwatson mac_pipe_init(pp); 341172930Srwatson mac_pipe_create(td->td_ucred, pp); 342125293Srwatson#endif 343125293Srwatson rpipe = &pp->pp_rpipe; 344125293Srwatson wpipe = &pp->pp_wpipe; 345125293Srwatson 346193951Skib knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe)); 347193951Skib knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe)); 348140369Ssilby 349133790Ssilby /* Only the forward direction pipe is backed by default */ 350155035Sglebius if ((error = pipe_create(rpipe, 1)) != 0 || 351155035Sglebius (error = pipe_create(wpipe, 0)) != 0) { 352124394Sdes pipeclose(rpipe); 353124394Sdes pipeclose(wpipe); 354155035Sglebius return (error); 35576364Salfred } 356124394Sdes 35713907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 35813907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 35913675Sdyson 360220245Skib error = falloc(td, &rf, &fd, 0); 36170915Sdwmalone if (error) { 36270915Sdwmalone pipeclose(rpipe); 36370915Sdwmalone pipeclose(wpipe); 36470915Sdwmalone return (error); 36570915Sdwmalone } 366121256Sdwmalone /* An extra reference on `rf' has been held for us by falloc(). */ 367184849Sed fildes[0] = fd; 36870915Sdwmalone 36970803Sdwmalone /* 37070803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 37170803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 37270803Sdwmalone * to avoid races against processes which manage to dup() the read 37370803Sdwmalone * side while we are blocked trying to allocate the write side. 37470803Sdwmalone */ 375174988Sjeff finit(rf, FREAD | FWRITE, DTYPE_PIPE, rpipe, &pipeops); 376220245Skib error = falloc(td, &wf, &fd, 0); 37770915Sdwmalone if (error) { 378184849Sed fdclose(fdp, rf, fildes[0], td); 37983366Sjulian fdrop(rf, td); 38070915Sdwmalone /* rpipe has been closed by fdrop(). */ 38170915Sdwmalone pipeclose(wpipe); 38270915Sdwmalone return (error); 38370915Sdwmalone } 384121256Sdwmalone /* An extra reference on `wf' has been held for us by falloc(). */ 385174988Sjeff finit(wf, FREAD | FWRITE, DTYPE_PIPE, wpipe, &pipeops); 386121256Sdwmalone fdrop(wf, td); 387184849Sed fildes[1] = fd; 38883366Sjulian fdrop(rf, td); 38913675Sdyson 39013675Sdyson return (0); 39113675Sdyson} 39213675Sdyson 393184849Sed/* ARGSUSED */ 394184849Sedint 395225617Skmacysys_pipe(struct thread *td, struct pipe_args *uap) 396184849Sed{ 397184849Sed int error; 398184849Sed int fildes[2]; 399184849Sed 400184849Sed error = kern_pipe(td, fildes); 401184849Sed if (error) 402184849Sed return (error); 403184849Sed 404184849Sed td->td_retval[0] = fildes[0]; 405184849Sed td->td_retval[1] = fildes[1]; 406184849Sed 407184849Sed return (0); 408184849Sed} 409184849Sed 41013909Sdyson/* 41113909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 41276364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 41376364Salfred * it will retain the old buffer. 41476364Salfred * If it fails it will return ENOMEM. 41513909Sdyson */ 41676364Salfredstatic int 417132579Srwatsonpipespace_new(cpipe, size) 41813675Sdyson struct pipe *cpipe; 41976364Salfred int size; 42013675Sdyson{ 42176364Salfred caddr_t buffer; 422133790Ssilby int error, cnt, firstseg; 423117325Ssilby static int curfail = 0; 424117325Ssilby static struct timeval lastfail; 42513675Sdyson 426125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); 427133790Ssilby KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW), 428133790Ssilby ("pipespace: resize of direct writes not allowed")); 429133790Ssilbyretry: 430133790Ssilby cnt = cpipe->pipe_buffer.cnt; 431133790Ssilby if (cnt > size) 432133790Ssilby size = cnt; 43379224Sdillon 434118764Ssilby size = round_page(size); 435118764Ssilby buffer = (caddr_t) vm_map_min(pipe_map); 43613675Sdyson 437122163Salc error = vm_map_find(pipe_map, NULL, 0, 43876364Salfred (vm_offset_t *) &buffer, size, 1, 43913688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 44076364Salfred if (error != KERN_SUCCESS) { 441133790Ssilby if ((cpipe->pipe_buffer.buffer == NULL) && 442133790Ssilby (size > SMALL_PIPE_SIZE)) { 443133790Ssilby size = SMALL_PIPE_SIZE; 444133790Ssilby pipefragretry++; 445133790Ssilby goto retry; 446133790Ssilby } 447133790Ssilby if (cpipe->pipe_buffer.buffer == NULL) { 448133790Ssilby pipeallocfail++; 449133790Ssilby if (ppsratecheck(&lastfail, &curfail, 1)) 450133790Ssilby printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); 451133790Ssilby } else { 452133790Ssilby piperesizefail++; 453133790Ssilby } 45476364Salfred return (ENOMEM); 45576364Salfred } 45676364Salfred 457133790Ssilby /* copy data, then free old resources if we're resizing */ 458133790Ssilby if (cnt > 0) { 459133790Ssilby if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) { 460133790Ssilby firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out; 461133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 462133790Ssilby buffer, firstseg); 463133790Ssilby if ((cnt - firstseg) > 0) 464133790Ssilby bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg], 465133790Ssilby cpipe->pipe_buffer.in); 466133790Ssilby } else { 467133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 468133790Ssilby buffer, cnt); 469133790Ssilby } 470133790Ssilby } 47176364Salfred pipe_free_kmem(cpipe); 47276364Salfred cpipe->pipe_buffer.buffer = buffer; 47376364Salfred cpipe->pipe_buffer.size = size; 474133790Ssilby cpipe->pipe_buffer.in = cnt; 47576364Salfred cpipe->pipe_buffer.out = 0; 476133790Ssilby cpipe->pipe_buffer.cnt = cnt; 477189649Sjhb atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size); 47876364Salfred return (0); 47913907Sdyson} 48013688Sdyson 48113907Sdyson/* 482132579Srwatson * Wrapper for pipespace_new() that performs locking assertions. 483132579Srwatson */ 484132579Srwatsonstatic int 485132579Srwatsonpipespace(cpipe, size) 486132579Srwatson struct pipe *cpipe; 487132579Srwatson int size; 488132579Srwatson{ 489132579Srwatson 490133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 491133049Ssilby ("Unlocked pipe passed to pipespace")); 492132579Srwatson return (pipespace_new(cpipe, size)); 493132579Srwatson} 494132579Srwatson 495132579Srwatson/* 49613675Sdyson * lock a pipe for I/O, blocking other access 49713675Sdyson */ 49813675Sdysonstatic __inline int 49913907Sdysonpipelock(cpipe, catch) 50013675Sdyson struct pipe *cpipe; 50113907Sdyson int catch; 50213675Sdyson{ 50313776Sdyson int error; 50476364Salfred 50591362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 50691362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 50713675Sdyson cpipe->pipe_state |= PIPE_LWANT; 50891362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 50991362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 51076760Salfred "pipelk", 0); 511124394Sdes if (error != 0) 51276760Salfred return (error); 51313675Sdyson } 51491362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 51576760Salfred return (0); 51613675Sdyson} 51713675Sdyson 51813675Sdyson/* 51913675Sdyson * unlock a pipe I/O lock 52013675Sdyson */ 52113675Sdysonstatic __inline void 52213675Sdysonpipeunlock(cpipe) 52313675Sdyson struct pipe *cpipe; 52413675Sdyson{ 52576364Salfred 52691362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 527133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 528133049Ssilby ("Unlocked pipe passed to pipeunlock")); 52991362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 53013675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 53113675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 53214177Sdyson wakeup(cpipe); 53313675Sdyson } 53413675Sdyson} 53513675Sdyson 53614037Sdysonstatic __inline void 53714037Sdysonpipeselwakeup(cpipe) 53814037Sdyson struct pipe *cpipe; 53914037Sdyson{ 54076364Salfred 541126252Srwatson PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 54214037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 543122352Stanimura selwakeuppri(&cpipe->pipe_sel, PSOCK); 544174647Sjeff if (!SEL_WAITING(&cpipe->pipe_sel)) 545174647Sjeff cpipe->pipe_state &= ~PIPE_SEL; 54614037Sdyson } 54741086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 54895883Salfred pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 549133741Sjmg KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0); 55014037Sdyson} 55114037Sdyson 552126131Sgreen/* 553126131Sgreen * Initialize and allocate VM and memory for pipe. The structure 554126131Sgreen * will start out zero'd from the ctor, so we just manage the kmem. 555126131Sgreen */ 556126131Sgreenstatic int 557133790Ssilbypipe_create(pipe, backing) 558126131Sgreen struct pipe *pipe; 559133790Ssilby int backing; 560126131Sgreen{ 561126131Sgreen int error; 562126131Sgreen 563133790Ssilby if (backing) { 564133790Ssilby if (amountpipekva > maxpipekva / 2) 565133790Ssilby error = pipespace_new(pipe, SMALL_PIPE_SIZE); 566133790Ssilby else 567133790Ssilby error = pipespace_new(pipe, PIPE_SIZE); 568133790Ssilby } else { 569133790Ssilby /* If we're not backing this pipe, no need to do anything. */ 570133790Ssilby error = 0; 571133790Ssilby } 572226042Skib if (error == 0) { 573226042Skib pipe->pipe_ino = alloc_unr(pipeino_unr); 574226042Skib if (pipe->pipe_ino == -1) 575226042Skib /* pipeclose will clear allocated kva */ 576226042Skib error = ENOMEM; 577226042Skib } 578132579Srwatson return (error); 579126131Sgreen} 580126131Sgreen 58113675Sdyson/* ARGSUSED */ 58213675Sdysonstatic int 583101941Srwatsonpipe_read(fp, uio, active_cred, flags, td) 58413675Sdyson struct file *fp; 58513675Sdyson struct uio *uio; 586101941Srwatson struct ucred *active_cred; 58783366Sjulian struct thread *td; 58845311Sdt int flags; 58913675Sdyson{ 590109153Sdillon struct pipe *rpipe = fp->f_data; 59147748Salc int error; 59213675Sdyson int nread = 0; 59318863Sdyson u_int size; 59413675Sdyson 59591362Salfred PIPE_LOCK(rpipe); 59613675Sdyson ++rpipe->pipe_busy; 59747748Salc error = pipelock(rpipe, 1); 59847748Salc if (error) 59947748Salc goto unlocked_error; 60047748Salc 601101768Srwatson#ifdef MAC 602172930Srwatson error = mac_pipe_check_read(active_cred, rpipe->pipe_pair); 603101768Srwatson if (error) 604101768Srwatson goto locked_error; 605101768Srwatson#endif 606133790Ssilby if (amountpipekva > (3 * maxpipekva) / 4) { 607133790Ssilby if (!(rpipe->pipe_state & PIPE_DIRECTW) && 608133790Ssilby (rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 609133790Ssilby (rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 610133790Ssilby (piperesizeallowed == 1)) { 611133790Ssilby PIPE_UNLOCK(rpipe); 612133790Ssilby pipespace(rpipe, SMALL_PIPE_SIZE); 613133790Ssilby PIPE_LOCK(rpipe); 614133790Ssilby } 615133790Ssilby } 616101768Srwatson 61713675Sdyson while (uio->uio_resid) { 61813907Sdyson /* 61913907Sdyson * normal pipe buffer receive 62013907Sdyson */ 62113675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 62218863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 62313675Sdyson if (size > rpipe->pipe_buffer.cnt) 62413675Sdyson size = rpipe->pipe_buffer.cnt; 62518863Sdyson if (size > (u_int) uio->uio_resid) 62618863Sdyson size = (u_int) uio->uio_resid; 62747748Salc 62891362Salfred PIPE_UNLOCK(rpipe); 629116127Smux error = uiomove( 630116127Smux &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 631116127Smux size, uio); 63291362Salfred PIPE_LOCK(rpipe); 63376760Salfred if (error) 63413675Sdyson break; 63576760Salfred 63613675Sdyson rpipe->pipe_buffer.out += size; 63713675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 63813675Sdyson rpipe->pipe_buffer.out = 0; 63913675Sdyson 64013675Sdyson rpipe->pipe_buffer.cnt -= size; 64147748Salc 64247748Salc /* 64347748Salc * If there is no more to read in the pipe, reset 64447748Salc * its pointers to the beginning. This improves 64547748Salc * cache hit stats. 64647748Salc */ 64747748Salc if (rpipe->pipe_buffer.cnt == 0) { 64847748Salc rpipe->pipe_buffer.in = 0; 64947748Salc rpipe->pipe_buffer.out = 0; 65047748Salc } 65113675Sdyson nread += size; 65214037Sdyson#ifndef PIPE_NODIRECT 65313907Sdyson /* 65413907Sdyson * Direct copy, bypassing a kernel buffer. 65513907Sdyson */ 65613907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 65747748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 65818863Sdyson if (size > (u_int) uio->uio_resid) 65918863Sdyson size = (u_int) uio->uio_resid; 66047748Salc 66191362Salfred PIPE_UNLOCK(rpipe); 662127501Salc error = uiomove_fromphys(rpipe->pipe_map.ms, 663127501Salc rpipe->pipe_map.pos, size, uio); 66491362Salfred PIPE_LOCK(rpipe); 66513907Sdyson if (error) 66613907Sdyson break; 66713907Sdyson nread += size; 66813907Sdyson rpipe->pipe_map.pos += size; 66913907Sdyson rpipe->pipe_map.cnt -= size; 67013907Sdyson if (rpipe->pipe_map.cnt == 0) { 67113907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 67213907Sdyson wakeup(rpipe); 67313907Sdyson } 67414037Sdyson#endif 67513675Sdyson } else { 67613675Sdyson /* 67713675Sdyson * detect EOF condition 67876760Salfred * read returns 0 on EOF, no need to set error 67913675Sdyson */ 68076760Salfred if (rpipe->pipe_state & PIPE_EOF) 68113675Sdyson break; 68243623Sdillon 68313675Sdyson /* 68413675Sdyson * If the "write-side" has been blocked, wake it up now. 68513675Sdyson */ 68613675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 68713675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 68813675Sdyson wakeup(rpipe); 68913675Sdyson } 69043623Sdillon 69143623Sdillon /* 69247748Salc * Break if some data was read. 69343623Sdillon */ 69447748Salc if (nread > 0) 69513675Sdyson break; 69616960Sdyson 69743623Sdillon /* 698124394Sdes * Unlock the pipe buffer for our remaining processing. 699116127Smux * We will either break out with an error or we will 700116127Smux * sleep and relock to loop. 70143623Sdillon */ 70247748Salc pipeunlock(rpipe); 70343623Sdillon 70413675Sdyson /* 70547748Salc * Handle non-blocking mode operation or 70647748Salc * wait for more data. 70713675Sdyson */ 70876760Salfred if (fp->f_flag & FNONBLOCK) { 70947748Salc error = EAGAIN; 71076760Salfred } else { 71147748Salc rpipe->pipe_state |= PIPE_WANTR; 71291362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 71391362Salfred PRIBIO | PCATCH, 71477140Salfred "piperd", 0)) == 0) 71547748Salc error = pipelock(rpipe, 1); 71613675Sdyson } 71747748Salc if (error) 71847748Salc goto unlocked_error; 71913675Sdyson } 72013675Sdyson } 721101768Srwatson#ifdef MAC 722101768Srwatsonlocked_error: 723101768Srwatson#endif 72447748Salc pipeunlock(rpipe); 72513675Sdyson 72691362Salfred /* XXX: should probably do this before getting any locks. */ 72724101Sbde if (error == 0) 72855112Sbde vfs_timestamp(&rpipe->pipe_atime); 72947748Salcunlocked_error: 73047748Salc --rpipe->pipe_busy; 73113913Sdyson 73247748Salc /* 73347748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 73447748Salc */ 73513675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 73613675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 73713675Sdyson wakeup(rpipe); 73813675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 73913675Sdyson /* 74047748Salc * Handle write blocking hysteresis. 74113675Sdyson */ 74213675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 74313675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 74413675Sdyson wakeup(rpipe); 74513675Sdyson } 74613675Sdyson } 74714037Sdyson 74814802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 74914037Sdyson pipeselwakeup(rpipe); 75014037Sdyson 75191362Salfred PIPE_UNLOCK(rpipe); 75276760Salfred return (error); 75313675Sdyson} 75413675Sdyson 75514037Sdyson#ifndef PIPE_NODIRECT 75613907Sdyson/* 75713907Sdyson * Map the sending processes' buffer into kernel space and wire it. 75813907Sdyson * This is similar to a physical write operation. 75913907Sdyson */ 76013675Sdysonstatic int 76113907Sdysonpipe_build_write_buffer(wpipe, uio) 76213907Sdyson struct pipe *wpipe; 76313675Sdyson struct uio *uio; 76413675Sdyson{ 76518863Sdyson u_int size; 766216511Salc int i; 76713907Sdyson 76891412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 769133790Ssilby KASSERT(wpipe->pipe_state & PIPE_DIRECTW, 770133790Ssilby ("Clone attempt on non-direct write pipe!")); 77179224Sdillon 77218863Sdyson size = (u_int) uio->uio_iov->iov_len; 77313907Sdyson if (size > wpipe->pipe_buffer.size) 77413907Sdyson size = wpipe->pipe_buffer.size; 77513907Sdyson 776216699Salc if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 777216699Salc (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ, 778216699Salc wpipe->pipe_map.ms, PIPENPAGES)) < 0) 779193893Scperciva return (EFAULT); 78013907Sdyson 78113907Sdyson/* 78213907Sdyson * set up the control block 78313907Sdyson */ 78413907Sdyson wpipe->pipe_map.npages = i; 78576760Salfred wpipe->pipe_map.pos = 78676760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 78713907Sdyson wpipe->pipe_map.cnt = size; 78813907Sdyson 78913907Sdyson/* 79013907Sdyson * and update the uio data 79113907Sdyson */ 79213907Sdyson 79313907Sdyson uio->uio_iov->iov_len -= size; 794104908Smike uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 79513907Sdyson if (uio->uio_iov->iov_len == 0) 79613907Sdyson uio->uio_iov++; 79713907Sdyson uio->uio_resid -= size; 79813907Sdyson uio->uio_offset += size; 79976760Salfred return (0); 80013907Sdyson} 80113907Sdyson 80213907Sdyson/* 80313907Sdyson * unmap and unwire the process buffer 80413907Sdyson */ 80513907Sdysonstatic void 80613907Sdysonpipe_destroy_write_buffer(wpipe) 80776760Salfred struct pipe *wpipe; 80813907Sdyson{ 80976364Salfred 810127501Salc PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 811216511Salc vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages); 81291653Stanimura wpipe->pipe_map.npages = 0; 81313907Sdyson} 81413907Sdyson 81513907Sdyson/* 81613907Sdyson * In the case of a signal, the writing process might go away. This 81713907Sdyson * code copies the data into the circular buffer so that the source 81813907Sdyson * pages can be freed without loss of data. 81913907Sdyson */ 82013907Sdysonstatic void 82113907Sdysonpipe_clone_write_buffer(wpipe) 82276364Salfred struct pipe *wpipe; 82313907Sdyson{ 824127501Salc struct uio uio; 825127501Salc struct iovec iov; 82613907Sdyson int size; 82713907Sdyson int pos; 82813907Sdyson 82991362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 83013907Sdyson size = wpipe->pipe_map.cnt; 83113907Sdyson pos = wpipe->pipe_map.pos; 83213907Sdyson 83313907Sdyson wpipe->pipe_buffer.in = size; 83413907Sdyson wpipe->pipe_buffer.out = 0; 83513907Sdyson wpipe->pipe_buffer.cnt = size; 83613907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 83713907Sdyson 838119811Salc PIPE_UNLOCK(wpipe); 839127501Salc iov.iov_base = wpipe->pipe_buffer.buffer; 840127501Salc iov.iov_len = size; 841127501Salc uio.uio_iov = &iov; 842127501Salc uio.uio_iovcnt = 1; 843127501Salc uio.uio_offset = 0; 844127501Salc uio.uio_resid = size; 845127501Salc uio.uio_segflg = UIO_SYSSPACE; 846127501Salc uio.uio_rw = UIO_READ; 847127501Salc uio.uio_td = curthread; 848127501Salc uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio); 849127501Salc PIPE_LOCK(wpipe); 85013907Sdyson pipe_destroy_write_buffer(wpipe); 85113907Sdyson} 85213907Sdyson 85313907Sdyson/* 85413907Sdyson * This implements the pipe buffer write mechanism. Note that only 85513907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 85613907Sdyson * If there are any characters in the pipe buffer, the direct write will 85713907Sdyson * be deferred until the receiving process grabs all of the bytes from 85813907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 85913907Sdyson */ 86013907Sdysonstatic int 86113907Sdysonpipe_direct_write(wpipe, uio) 86213907Sdyson struct pipe *wpipe; 86313907Sdyson struct uio *uio; 86413907Sdyson{ 86513907Sdyson int error; 86676364Salfred 86713951Sdysonretry: 86891362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 869133049Ssilby error = pipelock(wpipe, 1); 870133049Ssilby if (wpipe->pipe_state & PIPE_EOF) 871133049Ssilby error = EPIPE; 872133049Ssilby if (error) { 873133049Ssilby pipeunlock(wpipe); 874133049Ssilby goto error1; 875133049Ssilby } 87613907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 87776760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 87813951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 87913951Sdyson wakeup(wpipe); 88013951Sdyson } 881173750Sdumbbell pipeselwakeup(wpipe); 88213992Sdyson wpipe->pipe_state |= PIPE_WANTW; 883133049Ssilby pipeunlock(wpipe); 88491362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 88591362Salfred PRIBIO | PCATCH, "pipdww", 0); 88614802Sdyson if (error) 88713907Sdyson goto error1; 888133049Ssilby else 889133049Ssilby goto retry; 89013907Sdyson } 89113907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 89213951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 89376760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 89413951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 89513951Sdyson wakeup(wpipe); 89613951Sdyson } 897173750Sdumbbell pipeselwakeup(wpipe); 89813992Sdyson wpipe->pipe_state |= PIPE_WANTW; 899133049Ssilby pipeunlock(wpipe); 90091362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 90191362Salfred PRIBIO | PCATCH, "pipdwc", 0); 90214802Sdyson if (error) 90313907Sdyson goto error1; 904133049Ssilby else 905133049Ssilby goto retry; 90613907Sdyson } 90713907Sdyson 90813951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 90913951Sdyson 910119872Salc PIPE_UNLOCK(wpipe); 91113907Sdyson error = pipe_build_write_buffer(wpipe, uio); 912119872Salc PIPE_LOCK(wpipe); 91313907Sdyson if (error) { 91413907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 915133049Ssilby pipeunlock(wpipe); 91613907Sdyson goto error1; 91713907Sdyson } 91813907Sdyson 91913907Sdyson error = 0; 92013907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 92113907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 92213907Sdyson pipe_destroy_write_buffer(wpipe); 923112981Shsu pipeselwakeup(wpipe); 92413907Sdyson pipeunlock(wpipe); 92514802Sdyson error = EPIPE; 92614802Sdyson goto error1; 92713907Sdyson } 92813992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 92913992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 93013992Sdyson wakeup(wpipe); 93113992Sdyson } 93214037Sdyson pipeselwakeup(wpipe); 933133049Ssilby pipeunlock(wpipe); 93491362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 93591362Salfred "pipdwt", 0); 936133049Ssilby pipelock(wpipe, 0); 93713907Sdyson } 93813907Sdyson 939126131Sgreen if (wpipe->pipe_state & PIPE_EOF) 940126131Sgreen error = EPIPE; 94113907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 94213907Sdyson /* 94313907Sdyson * this bit of trickery substitutes a kernel buffer for 94413907Sdyson * the process that might be going away. 94513907Sdyson */ 94613907Sdyson pipe_clone_write_buffer(wpipe); 94713907Sdyson } else { 94813907Sdyson pipe_destroy_write_buffer(wpipe); 94913907Sdyson } 95013907Sdyson pipeunlock(wpipe); 95176760Salfred return (error); 95213907Sdyson 95313907Sdysonerror1: 95413907Sdyson wakeup(wpipe); 95576760Salfred return (error); 95613907Sdyson} 95714037Sdyson#endif 958124394Sdes 95916960Sdysonstatic int 960101941Srwatsonpipe_write(fp, uio, active_cred, flags, td) 96116960Sdyson struct file *fp; 96213907Sdyson struct uio *uio; 963101941Srwatson struct ucred *active_cred; 96483366Sjulian struct thread *td; 96545311Sdt int flags; 96613907Sdyson{ 96713675Sdyson int error = 0; 968133790Ssilby int desiredsize, orig_resid; 96916960Sdyson struct pipe *wpipe, *rpipe; 97016960Sdyson 971109153Sdillon rpipe = fp->f_data; 97216960Sdyson wpipe = rpipe->pipe_peer; 97316960Sdyson 97491395Salfred PIPE_LOCK(rpipe); 975133049Ssilby error = pipelock(wpipe, 1); 976133049Ssilby if (error) { 977133049Ssilby PIPE_UNLOCK(rpipe); 978133049Ssilby return (error); 979133049Ssilby } 98013675Sdyson /* 98113675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 98213675Sdyson */ 983179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 984179243Skib (wpipe->pipe_state & PIPE_EOF)) { 985133049Ssilby pipeunlock(wpipe); 98691395Salfred PIPE_UNLOCK(rpipe); 98776760Salfred return (EPIPE); 98813675Sdyson } 989101768Srwatson#ifdef MAC 990172930Srwatson error = mac_pipe_check_write(active_cred, wpipe->pipe_pair); 991101768Srwatson if (error) { 992133049Ssilby pipeunlock(wpipe); 993101768Srwatson PIPE_UNLOCK(rpipe); 994101768Srwatson return (error); 995101768Srwatson } 996101768Srwatson#endif 99777676Sdillon ++wpipe->pipe_busy; 99813675Sdyson 999133790Ssilby /* Choose a larger size if it's advantageous */ 1000133790Ssilby desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size); 1001133790Ssilby while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) { 1002133790Ssilby if (piperesizeallowed != 1) 1003133790Ssilby break; 1004133790Ssilby if (amountpipekva > maxpipekva / 2) 1005133790Ssilby break; 1006133790Ssilby if (desiredsize == BIG_PIPE_SIZE) 1007133790Ssilby break; 1008133790Ssilby desiredsize = desiredsize * 2; 1009133790Ssilby } 101017163Sdyson 1011133790Ssilby /* Choose a smaller size if we're in a OOM situation */ 1012133790Ssilby if ((amountpipekva > (3 * maxpipekva) / 4) && 1013133790Ssilby (wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 1014133790Ssilby (wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 1015133790Ssilby (piperesizeallowed == 1)) 1016133790Ssilby desiredsize = SMALL_PIPE_SIZE; 1017133790Ssilby 1018133790Ssilby /* Resize if the above determined that a new size was necessary */ 1019133790Ssilby if ((desiredsize != wpipe->pipe_buffer.size) && 1020133790Ssilby ((wpipe->pipe_state & PIPE_DIRECTW) == 0)) { 1021133049Ssilby PIPE_UNLOCK(wpipe); 1022133790Ssilby pipespace(wpipe, desiredsize); 1023133049Ssilby PIPE_LOCK(wpipe); 102413907Sdyson } 1025133790Ssilby if (wpipe->pipe_buffer.size == 0) { 1026133790Ssilby /* 1027133790Ssilby * This can only happen for reverse direction use of pipes 1028133790Ssilby * in a complete OOM situation. 1029133790Ssilby */ 1030133790Ssilby error = ENOMEM; 1031133790Ssilby --wpipe->pipe_busy; 1032133790Ssilby pipeunlock(wpipe); 1033133790Ssilby PIPE_UNLOCK(wpipe); 1034133790Ssilby return (error); 1035133790Ssilby } 103677676Sdillon 1037133049Ssilby pipeunlock(wpipe); 1038124394Sdes 103913913Sdyson orig_resid = uio->uio_resid; 104077676Sdillon 104113675Sdyson while (uio->uio_resid) { 104213907Sdyson int space; 104376760Salfred 1044133049Ssilby pipelock(wpipe, 0); 1045133049Ssilby if (wpipe->pipe_state & PIPE_EOF) { 1046133049Ssilby pipeunlock(wpipe); 1047133049Ssilby error = EPIPE; 1048133049Ssilby break; 1049133049Ssilby } 105014037Sdyson#ifndef PIPE_NODIRECT 105113907Sdyson /* 105213907Sdyson * If the transfer is large, we can gain performance if 105313907Sdyson * we do process-to-process copies directly. 105416416Sdyson * If the write is non-blocking, we don't use the 105516416Sdyson * direct write mechanism. 105658505Sdillon * 105758505Sdillon * The direct write mechanism will detect the reader going 105858505Sdillon * away on us. 105913907Sdyson */ 1060165347Spjd if (uio->uio_segflg == UIO_USERSPACE && 1061165347Spjd uio->uio_iov->iov_len >= PIPE_MINDIRECT && 1062165347Spjd wpipe->pipe_buffer.size >= PIPE_MINDIRECT && 1063127501Salc (fp->f_flag & FNONBLOCK) == 0) { 1064133049Ssilby pipeunlock(wpipe); 1065105009Salfred error = pipe_direct_write(wpipe, uio); 106676760Salfred if (error) 106713907Sdyson break; 106813907Sdyson continue; 106991362Salfred } 107014037Sdyson#endif 107113907Sdyson 107213907Sdyson /* 107313907Sdyson * Pipe buffered writes cannot be coincidental with 107413907Sdyson * direct writes. We wait until the currently executing 107513907Sdyson * direct write is completed before we start filling the 107658505Sdillon * pipe buffer. We break out if a signal occurs or the 107758505Sdillon * reader goes away. 107813907Sdyson */ 1079133049Ssilby if (wpipe->pipe_state & PIPE_DIRECTW) { 108013992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 108113992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 108213992Sdyson wakeup(wpipe); 108313992Sdyson } 1084173750Sdumbbell pipeselwakeup(wpipe); 1085173750Sdumbbell wpipe->pipe_state |= PIPE_WANTW; 1086133049Ssilby pipeunlock(wpipe); 108791395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 108891362Salfred "pipbww", 0); 108913907Sdyson if (error) 109013907Sdyson break; 1091133049Ssilby else 1092133049Ssilby continue; 109313907Sdyson } 109413907Sdyson 109513907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 109614644Sdyson 109714644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 109813913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 109913913Sdyson space = 0; 110013907Sdyson 1101118230Spb if (space > 0) { 1102133049Ssilby int size; /* Transfer size */ 1103133049Ssilby int segsize; /* first segment to transfer */ 110476760Salfred 1105133049Ssilby /* 1106133049Ssilby * Transfer size is minimum of uio transfer 1107133049Ssilby * and free space in pipe buffer. 1108133049Ssilby */ 1109133049Ssilby if (space > uio->uio_resid) 1110133049Ssilby size = uio->uio_resid; 1111133049Ssilby else 1112133049Ssilby size = space; 1113133049Ssilby /* 1114133049Ssilby * First segment to transfer is minimum of 1115133049Ssilby * transfer size and contiguous space in 1116133049Ssilby * pipe buffer. If first segment to transfer 1117133049Ssilby * is less than the transfer size, we've got 1118133049Ssilby * a wraparound in the buffer. 1119133049Ssilby */ 1120133049Ssilby segsize = wpipe->pipe_buffer.size - 1121133049Ssilby wpipe->pipe_buffer.in; 1122133049Ssilby if (segsize > size) 1123133049Ssilby segsize = size; 112454534Stegge 1125133049Ssilby /* Transfer first segment */ 1126133049Ssilby 1127133049Ssilby PIPE_UNLOCK(rpipe); 1128133049Ssilby error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1129133049Ssilby segsize, uio); 1130133049Ssilby PIPE_LOCK(rpipe); 1131133049Ssilby 1132133049Ssilby if (error == 0 && segsize < size) { 1133133049Ssilby KASSERT(wpipe->pipe_buffer.in + segsize == 1134133049Ssilby wpipe->pipe_buffer.size, 1135133049Ssilby ("Pipe buffer wraparound disappeared")); 113654534Stegge /* 1137133049Ssilby * Transfer remaining part now, to 1138133049Ssilby * support atomic writes. Wraparound 1139133049Ssilby * happened. 114054534Stegge */ 1141124394Sdes 114291395Salfred PIPE_UNLOCK(rpipe); 1143133049Ssilby error = uiomove( 1144133049Ssilby &wpipe->pipe_buffer.buffer[0], 1145133049Ssilby size - segsize, uio); 114691395Salfred PIPE_LOCK(rpipe); 1147133049Ssilby } 1148133049Ssilby if (error == 0) { 1149133049Ssilby wpipe->pipe_buffer.in += size; 1150133049Ssilby if (wpipe->pipe_buffer.in >= 1151133049Ssilby wpipe->pipe_buffer.size) { 1152133049Ssilby KASSERT(wpipe->pipe_buffer.in == 1153133049Ssilby size - segsize + 1154133049Ssilby wpipe->pipe_buffer.size, 1155133049Ssilby ("Expected wraparound bad")); 1156133049Ssilby wpipe->pipe_buffer.in = size - segsize; 115754534Stegge } 1158124394Sdes 1159133049Ssilby wpipe->pipe_buffer.cnt += size; 1160133049Ssilby KASSERT(wpipe->pipe_buffer.cnt <= 1161133049Ssilby wpipe->pipe_buffer.size, 1162133049Ssilby ("Pipe buffer overflow")); 116313675Sdyson } 1164133049Ssilby pipeunlock(wpipe); 1165153484Sdelphij if (error != 0) 1166153484Sdelphij break; 116713675Sdyson } else { 116813675Sdyson /* 116913675Sdyson * If the "read-side" has been blocked, wake it up now. 117013675Sdyson */ 117113675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 117213675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 117313675Sdyson wakeup(wpipe); 117413675Sdyson } 117514037Sdyson 117613675Sdyson /* 117713675Sdyson * don't block on non-blocking I/O 117813675Sdyson */ 117916960Sdyson if (fp->f_flag & FNONBLOCK) { 118013907Sdyson error = EAGAIN; 1181133049Ssilby pipeunlock(wpipe); 118213675Sdyson break; 118313675Sdyson } 118413907Sdyson 118514037Sdyson /* 118614037Sdyson * We have no more space and have something to offer, 118729356Speter * wake up select/poll. 118814037Sdyson */ 118914037Sdyson pipeselwakeup(wpipe); 119014037Sdyson 119113675Sdyson wpipe->pipe_state |= PIPE_WANTW; 1192133049Ssilby pipeunlock(wpipe); 119391395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 119491362Salfred PRIBIO | PCATCH, "pipewr", 0); 119576760Salfred if (error != 0) 119613675Sdyson break; 119713675Sdyson } 119813675Sdyson } 119913675Sdyson 1200133049Ssilby pipelock(wpipe, 0); 120114644Sdyson --wpipe->pipe_busy; 120277676Sdillon 120376760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 120476760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 120513675Sdyson wakeup(wpipe); 120613675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 120713675Sdyson /* 120813675Sdyson * If we have put any characters in the buffer, we wake up 120913675Sdyson * the reader. 121013675Sdyson */ 121113675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 121213675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 121313675Sdyson wakeup(wpipe); 121413675Sdyson } 121513675Sdyson } 121613909Sdyson 121713909Sdyson /* 121813909Sdyson * Don't return EPIPE if I/O was successful 121913909Sdyson */ 122013907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 122177676Sdillon (uio->uio_resid == 0) && 122277676Sdillon (error == EPIPE)) { 122313907Sdyson error = 0; 122477676Sdillon } 122513913Sdyson 122624101Sbde if (error == 0) 122755112Sbde vfs_timestamp(&wpipe->pipe_mtime); 122824101Sbde 122914037Sdyson /* 123014037Sdyson * We have something to offer, 123129356Speter * wake up select/poll. 123214037Sdyson */ 123314177Sdyson if (wpipe->pipe_buffer.cnt) 123414037Sdyson pipeselwakeup(wpipe); 123513907Sdyson 1236133049Ssilby pipeunlock(wpipe); 123791395Salfred PIPE_UNLOCK(rpipe); 123876760Salfred return (error); 123913675Sdyson} 124013675Sdyson 1241175140Sjhb/* ARGSUSED */ 1242175140Sjhbstatic int 1243175140Sjhbpipe_truncate(fp, length, active_cred, td) 1244175140Sjhb struct file *fp; 1245175140Sjhb off_t length; 1246175140Sjhb struct ucred *active_cred; 1247175140Sjhb struct thread *td; 1248175140Sjhb{ 1249175140Sjhb 1250175140Sjhb return (EINVAL); 1251175140Sjhb} 1252175140Sjhb 125313675Sdyson/* 125413675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 125513675Sdyson */ 1256104094Sphkstatic int 1257102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td) 125813675Sdyson struct file *fp; 125936735Sdfr u_long cmd; 126099009Salfred void *data; 1261102003Srwatson struct ucred *active_cred; 126283366Sjulian struct thread *td; 126313675Sdyson{ 1264109153Sdillon struct pipe *mpipe = fp->f_data; 1265101768Srwatson int error; 126613675Sdyson 1267104269Srwatson PIPE_LOCK(mpipe); 1268104269Srwatson 1269104269Srwatson#ifdef MAC 1270172930Srwatson error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data); 1271121970Srwatson if (error) { 1272121970Srwatson PIPE_UNLOCK(mpipe); 1273101768Srwatson return (error); 1274121970Srwatson } 1275101768Srwatson#endif 1276101768Srwatson 1277137752Sphk error = 0; 127813675Sdyson switch (cmd) { 127913675Sdyson 128013675Sdyson case FIONBIO: 1281137752Sphk break; 128213675Sdyson 128313675Sdyson case FIOASYNC: 128413675Sdyson if (*(int *)data) { 128513675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 128613675Sdyson } else { 128713675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 128813675Sdyson } 1289137752Sphk break; 129013675Sdyson 129113675Sdyson case FIONREAD: 129214037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 129314037Sdyson *(int *)data = mpipe->pipe_map.cnt; 129414037Sdyson else 129514037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 1296137752Sphk break; 129713675Sdyson 129841086Struckman case FIOSETOWN: 1299138032Srwatson PIPE_UNLOCK(mpipe); 1300137752Sphk error = fsetown(*(int *)data, &mpipe->pipe_sigio); 1301138032Srwatson goto out_unlocked; 130241086Struckman 130341086Struckman case FIOGETOWN: 1304104393Struckman *(int *)data = fgetown(&mpipe->pipe_sigio); 1305137752Sphk break; 130613675Sdyson 130741086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 130841086Struckman case TIOCSPGRP: 1309138032Srwatson PIPE_UNLOCK(mpipe); 1310137752Sphk error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); 1311138032Srwatson goto out_unlocked; 131241086Struckman 131341086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 131418863Sdyson case TIOCGPGRP: 1315104393Struckman *(int *)data = -fgetown(&mpipe->pipe_sigio); 1316137752Sphk break; 131713675Sdyson 1318137752Sphk default: 1319137752Sphk error = ENOTTY; 1320137764Sphk break; 132113675Sdyson } 1322104269Srwatson PIPE_UNLOCK(mpipe); 1323138032Srwatsonout_unlocked: 1324137752Sphk return (error); 132513675Sdyson} 132613675Sdyson 1327104094Sphkstatic int 1328101983Srwatsonpipe_poll(fp, events, active_cred, td) 132913675Sdyson struct file *fp; 133029356Speter int events; 1331101983Srwatson struct ucred *active_cred; 133283366Sjulian struct thread *td; 133313675Sdyson{ 1334109153Sdillon struct pipe *rpipe = fp->f_data; 133513675Sdyson struct pipe *wpipe; 133629356Speter int revents = 0; 1337101768Srwatson#ifdef MAC 1338101768Srwatson int error; 1339101768Srwatson#endif 134013675Sdyson 134113675Sdyson wpipe = rpipe->pipe_peer; 134291362Salfred PIPE_LOCK(rpipe); 1343101768Srwatson#ifdef MAC 1344172930Srwatson error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair); 1345101768Srwatson if (error) 1346101768Srwatson goto locked_error; 1347101768Srwatson#endif 134829356Speter if (events & (POLLIN | POLLRDNORM)) 134929356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 1350195423Skib (rpipe->pipe_buffer.cnt > 0)) 135129356Speter revents |= events & (POLLIN | POLLRDNORM); 135213675Sdyson 135329356Speter if (events & (POLLOUT | POLLWRNORM)) 1354179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1355179243Skib (wpipe->pipe_state & PIPE_EOF) || 135643311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 135743311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 135829356Speter revents |= events & (POLLOUT | POLLWRNORM); 135913675Sdyson 1360195423Skib if ((events & POLLINIGNEOF) == 0) { 1361195423Skib if (rpipe->pipe_state & PIPE_EOF) { 1362195423Skib revents |= (events & (POLLIN | POLLRDNORM)); 1363195423Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1364195423Skib (wpipe->pipe_state & PIPE_EOF)) 1365195423Skib revents |= POLLHUP; 1366195423Skib } 1367195423Skib } 136829356Speter 136929356Speter if (revents == 0) { 137029356Speter if (events & (POLLIN | POLLRDNORM)) { 137183805Sjhb selrecord(td, &rpipe->pipe_sel); 1372174647Sjeff if (SEL_WAITING(&rpipe->pipe_sel)) 1373174647Sjeff rpipe->pipe_state |= PIPE_SEL; 137413675Sdyson } 137513675Sdyson 137629356Speter if (events & (POLLOUT | POLLWRNORM)) { 137783805Sjhb selrecord(td, &wpipe->pipe_sel); 1378174647Sjeff if (SEL_WAITING(&wpipe->pipe_sel)) 1379174647Sjeff wpipe->pipe_state |= PIPE_SEL; 138013907Sdyson } 138113675Sdyson } 1382101768Srwatson#ifdef MAC 1383101768Srwatsonlocked_error: 1384101768Srwatson#endif 138591362Salfred PIPE_UNLOCK(rpipe); 138629356Speter 138729356Speter return (revents); 138813675Sdyson} 138913675Sdyson 139098989Salfred/* 139198989Salfred * We shouldn't need locks here as we're doing a read and this should 139298989Salfred * be a natural race. 139398989Salfred */ 139452983Speterstatic int 1395101983Srwatsonpipe_stat(fp, ub, active_cred, td) 139652983Speter struct file *fp; 139752983Speter struct stat *ub; 1398101983Srwatson struct ucred *active_cred; 139983366Sjulian struct thread *td; 140013675Sdyson{ 1401109153Sdillon struct pipe *pipe = fp->f_data; 1402101768Srwatson#ifdef MAC 1403101768Srwatson int error; 140452983Speter 1405104269Srwatson PIPE_LOCK(pipe); 1406172930Srwatson error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); 1407104269Srwatson PIPE_UNLOCK(pipe); 1408101768Srwatson if (error) 1409101768Srwatson return (error); 1410101768Srwatson#endif 1411100527Salfred bzero(ub, sizeof(*ub)); 141217124Sbde ub->st_mode = S_IFIFO; 1413133790Ssilby ub->st_blksize = PAGE_SIZE; 1414132436Ssilby if (pipe->pipe_state & PIPE_DIRECTW) 1415132436Ssilby ub->st_size = pipe->pipe_map.cnt; 1416132436Ssilby else 1417132436Ssilby ub->st_size = pipe->pipe_buffer.cnt; 141813675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1419205792Sed ub->st_atim = pipe->pipe_atime; 1420205792Sed ub->st_mtim = pipe->pipe_mtime; 1421205792Sed ub->st_ctim = pipe->pipe_ctime; 142260404Schris ub->st_uid = fp->f_cred->cr_uid; 142360404Schris ub->st_gid = fp->f_cred->cr_gid; 1424226042Skib ub->st_dev = pipedev_ino; 1425226042Skib ub->st_ino = pipe->pipe_ino; 142617124Sbde /* 1427226042Skib * Left as 0: st_nlink, st_rdev, st_flags, st_gen. 142817124Sbde */ 142976760Salfred return (0); 143013675Sdyson} 143113675Sdyson 143213675Sdyson/* ARGSUSED */ 143313675Sdysonstatic int 143483366Sjulianpipe_close(fp, td) 143513675Sdyson struct file *fp; 143683366Sjulian struct thread *td; 143713675Sdyson{ 1438109153Sdillon struct pipe *cpipe = fp->f_data; 143916322Sgpalmer 144049413Sgreen fp->f_ops = &badfileops; 1441109153Sdillon fp->f_data = NULL; 144296122Salfred funsetown(&cpipe->pipe_sigio); 144313675Sdyson pipeclose(cpipe); 144476760Salfred return (0); 144513675Sdyson} 144613675Sdyson 144776364Salfredstatic void 144876364Salfredpipe_free_kmem(cpipe) 144976364Salfred struct pipe *cpipe; 145076364Salfred{ 145191412Salfred 1452125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), 1453125293Srwatson ("pipe_free_kmem: pipe mutex locked")); 145476364Salfred 145576364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 1456189649Sjhb atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size); 1457118764Ssilby vm_map_remove(pipe_map, 1458118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer, 1459118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 146076364Salfred cpipe->pipe_buffer.buffer = NULL; 146176364Salfred } 146276364Salfred#ifndef PIPE_NODIRECT 1463127501Salc { 146476364Salfred cpipe->pipe_map.cnt = 0; 146576364Salfred cpipe->pipe_map.pos = 0; 146676364Salfred cpipe->pipe_map.npages = 0; 146776364Salfred } 146876364Salfred#endif 146976364Salfred} 147076364Salfred 147113675Sdyson/* 147213675Sdyson * shutdown the pipe 147313675Sdyson */ 147413675Sdysonstatic void 147513675Sdysonpipeclose(cpipe) 147613675Sdyson struct pipe *cpipe; 147713675Sdyson{ 1478125293Srwatson struct pipepair *pp; 147913907Sdyson struct pipe *ppipe; 1480226042Skib ino_t ino; 148176364Salfred 1482125293Srwatson KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); 148391968Salfred 1484125293Srwatson PIPE_LOCK(cpipe); 1485133049Ssilby pipelock(cpipe, 0); 1486125293Srwatson pp = cpipe->pipe_pair; 148791968Salfred 148891968Salfred pipeselwakeup(cpipe); 148913907Sdyson 149091968Salfred /* 149191968Salfred * If the other side is blocked, wake it up saying that 149291968Salfred * we want to close it down. 149391968Salfred */ 1494126131Sgreen cpipe->pipe_state |= PIPE_EOF; 149591968Salfred while (cpipe->pipe_busy) { 149691968Salfred wakeup(cpipe); 1497126131Sgreen cpipe->pipe_state |= PIPE_WANT; 1498133049Ssilby pipeunlock(cpipe); 149991968Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1500133049Ssilby pipelock(cpipe, 0); 150191968Salfred } 150213675Sdyson 1503101768Srwatson 150491968Salfred /* 1505125293Srwatson * Disconnect from peer, if any. 150691968Salfred */ 1507125293Srwatson ppipe = cpipe->pipe_peer; 1508179243Skib if (ppipe->pipe_present == PIPE_ACTIVE) { 150991968Salfred pipeselwakeup(ppipe); 151013907Sdyson 151191968Salfred ppipe->pipe_state |= PIPE_EOF; 151291968Salfred wakeup(ppipe); 1513133741Sjmg KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0); 151491968Salfred } 1515125293Srwatson 151691968Salfred /* 1517125293Srwatson * Mark this endpoint as free. Release kmem resources. We 1518125293Srwatson * don't mark this endpoint as unused until we've finished 1519125293Srwatson * doing that, or the pipe might disappear out from under 1520125293Srwatson * us. 152191968Salfred */ 1522125293Srwatson PIPE_UNLOCK(cpipe); 1523125293Srwatson pipe_free_kmem(cpipe); 1524125293Srwatson PIPE_LOCK(cpipe); 1525179243Skib cpipe->pipe_present = PIPE_CLOSING; 1526126131Sgreen pipeunlock(cpipe); 1527179243Skib 1528179243Skib /* 1529179243Skib * knlist_clear() may sleep dropping the PIPE_MTX. Set the 1530179243Skib * PIPE_FINALIZED, that allows other end to free the 1531179243Skib * pipe_pair, only after the knotes are completely dismantled. 1532179243Skib */ 1533133741Sjmg knlist_clear(&cpipe->pipe_sel.si_note, 1); 1534179243Skib cpipe->pipe_present = PIPE_FINALIZED; 1535225177Sattilio seldrain(&cpipe->pipe_sel); 1536133741Sjmg knlist_destroy(&cpipe->pipe_sel.si_note); 1537125293Srwatson 1538125293Srwatson /* 1539226042Skib * Postpone the destroy of the fake inode number allocated for 1540226042Skib * our end, until pipe mtx is unlocked. 1541226042Skib */ 1542226042Skib ino = cpipe->pipe_ino; 1543226042Skib 1544226042Skib /* 1545125293Srwatson * If both endpoints are now closed, release the memory for the 1546125293Srwatson * pipe pair. If not, unlock. 1547125293Srwatson */ 1548179243Skib if (ppipe->pipe_present == PIPE_FINALIZED) { 154991968Salfred PIPE_UNLOCK(cpipe); 1550125293Srwatson#ifdef MAC 1551172930Srwatson mac_pipe_destroy(pp); 1552125293Srwatson#endif 1553125293Srwatson uma_zfree(pipe_zone, cpipe->pipe_pair); 1554125293Srwatson } else 1555125293Srwatson PIPE_UNLOCK(cpipe); 1556226042Skib 1557226042Skib if (ino > 0) 1558226042Skib free_unr(pipeino_unr, cpipe->pipe_ino); 155913675Sdyson} 156059288Sjlemon 156172521Sjlemon/*ARGSUSED*/ 156259288Sjlemonstatic int 156372521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 156459288Sjlemon{ 156589306Salfred struct pipe *cpipe; 156659288Sjlemon 1567109153Sdillon cpipe = kn->kn_fp->f_data; 1568126131Sgreen PIPE_LOCK(cpipe); 156972521Sjlemon switch (kn->kn_filter) { 157072521Sjlemon case EVFILT_READ: 157172521Sjlemon kn->kn_fop = &pipe_rfiltops; 157272521Sjlemon break; 157372521Sjlemon case EVFILT_WRITE: 157472521Sjlemon kn->kn_fop = &pipe_wfiltops; 1575179243Skib if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) { 1576101382Sdes /* other end of pipe has been closed */ 1577126131Sgreen PIPE_UNLOCK(cpipe); 1578118929Sjmg return (EPIPE); 1579126131Sgreen } 1580126131Sgreen cpipe = cpipe->pipe_peer; 158172521Sjlemon break; 158272521Sjlemon default: 1583126131Sgreen PIPE_UNLOCK(cpipe); 1584133741Sjmg return (EINVAL); 158572521Sjlemon } 158678292Sjlemon 1587133741Sjmg knlist_add(&cpipe->pipe_sel.si_note, kn, 1); 158891372Salfred PIPE_UNLOCK(cpipe); 158959288Sjlemon return (0); 159059288Sjlemon} 159159288Sjlemon 159259288Sjlemonstatic void 159359288Sjlemonfilt_pipedetach(struct knote *kn) 159459288Sjlemon{ 1595121018Sjmg struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 159659288Sjlemon 1597126131Sgreen PIPE_LOCK(cpipe); 1598179242Skib if (kn->kn_filter == EVFILT_WRITE) 1599121018Sjmg cpipe = cpipe->pipe_peer; 1600133741Sjmg knlist_remove(&cpipe->pipe_sel.si_note, kn, 1); 160191372Salfred PIPE_UNLOCK(cpipe); 160259288Sjlemon} 160359288Sjlemon 160459288Sjlemon/*ARGSUSED*/ 160559288Sjlemonstatic int 160659288Sjlemonfilt_piperead(struct knote *kn, long hint) 160759288Sjlemon{ 1608109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 160959288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 1610133741Sjmg int ret; 161159288Sjlemon 161291372Salfred PIPE_LOCK(rpipe); 161359288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 161459288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 161559288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 161659288Sjlemon 161759288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 1618179243Skib wpipe->pipe_present != PIPE_ACTIVE || 1619179243Skib (wpipe->pipe_state & PIPE_EOF)) { 162091372Salfred kn->kn_flags |= EV_EOF; 162191372Salfred PIPE_UNLOCK(rpipe); 162259288Sjlemon return (1); 162359288Sjlemon } 1624133741Sjmg ret = kn->kn_data > 0; 162591372Salfred PIPE_UNLOCK(rpipe); 1626133741Sjmg return ret; 162759288Sjlemon} 162859288Sjlemon 162959288Sjlemon/*ARGSUSED*/ 163059288Sjlemonstatic int 163159288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 163259288Sjlemon{ 1633109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 163459288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 163559288Sjlemon 163691372Salfred PIPE_LOCK(rpipe); 1637179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1638179243Skib (wpipe->pipe_state & PIPE_EOF)) { 163959288Sjlemon kn->kn_data = 0; 1640124394Sdes kn->kn_flags |= EV_EOF; 164191372Salfred PIPE_UNLOCK(rpipe); 164259288Sjlemon return (1); 164359288Sjlemon } 164459288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 164565855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 164659288Sjlemon kn->kn_data = 0; 164759288Sjlemon 164891372Salfred PIPE_UNLOCK(rpipe); 164959288Sjlemon return (kn->kn_data >= PIPE_BUF); 165059288Sjlemon} 1651