1139804Simp/*- 213675Sdyson * Copyright (c) 1996 John S. Dyson 3232055Skmacy * Copyright (c) 2012 Giovanni Trematerra 413675Sdyson * All rights reserved. 513675Sdyson * 613675Sdyson * Redistribution and use in source and binary forms, with or without 713675Sdyson * modification, are permitted provided that the following conditions 813675Sdyson * are met: 913675Sdyson * 1. Redistributions of source code must retain the above copyright 1013675Sdyson * notice immediately at the beginning of the file, without modification, 1113675Sdyson * this list of conditions, and the following disclaimer. 1213675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1313675Sdyson * notice, this list of conditions and the following disclaimer in the 1413675Sdyson * documentation and/or other materials provided with the distribution. 1513675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1613675Sdyson * John S. Dyson. 1714037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1813675Sdyson * are met. 1913675Sdyson */ 2013675Sdyson 2113675Sdyson/* 2213675Sdyson * This file contains a high-performance replacement for the socket-based 2313675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2413675Sdyson * all features of sockets, but does do everything that pipes normally 2513675Sdyson * do. 2613675Sdyson */ 2713675Sdyson 2813907Sdyson/* 2913907Sdyson * This code has two modes of operation, a small write mode and a large 3013907Sdyson * write mode. The small write mode acts like conventional pipes with 3113907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3213907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 33219801Salc * and PIPE_SIZE in size, the sending process pins the underlying pages in 34219801Salc * memory, and the receiving process copies directly from these pinned pages 35219801Salc * in the sending process. 3613907Sdyson * 3713907Sdyson * If the sending process receives a signal, it is possible that it will 3813913Sdyson * go away, and certainly its address space can change, because control 3913907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4013907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4113907Sdyson * kernel buffer, and the receiving process will grab the data from the 4213907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4313907Sdyson * the copy operation is normally eliminated. 4413907Sdyson * 4513907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4613907Sdyson * happen for small transfers so that the system will not spend all of 47118764Ssilby * its time context switching. 48117325Ssilby * 49118764Ssilby * In order to limit the resource use of pipes, two sysctls exist: 50117325Ssilby * 51118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 52133790Ssilby * address space available to us in pipe_map. This value is normally 53133790Ssilby * autotuned, but may also be loader tuned. 54117325Ssilby * 55133790Ssilby * kern.ipc.pipekva - This read-only sysctl tracks the current amount of 56133790Ssilby * memory in use by pipes. 57117325Ssilby * 58133790Ssilby * Based on how large pipekva is relative to maxpipekva, the following 59133790Ssilby * will happen: 60117325Ssilby * 61133790Ssilby * 0% - 50%: 62133790Ssilby * New pipes are given 16K of memory backing, pipes may dynamically 63133790Ssilby * grow to as large as 64K where needed. 64133790Ssilby * 50% - 75%: 65133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 66133790Ssilby * existing pipes may NOT grow. 67133790Ssilby * 75% - 100%: 68133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 69133790Ssilby * existing pipes will be shrunk down to 4K whenever possible. 70133049Ssilby * 71133790Ssilby * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0. If 72133790Ssilby * that is set, the only resize that will occur is the 0 -> SMALL_PIPE_SIZE 73133790Ssilby * resize which MUST occur for reverse-direction pipes when they are 74133790Ssilby * first used. 75133790Ssilby * 76133790Ssilby * Additional information about the current state of pipes may be obtained 77133790Ssilby * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail, 78133790Ssilby * and kern.ipc.piperesizefail. 79133790Ssilby * 80133049Ssilby * Locking rules: There are two locks present here: A mutex, used via 81133049Ssilby * PIPE_LOCK, and a flag, used via pipelock(). All locking is done via 82133049Ssilby * the flag, as mutexes can not persist over uiomove. The mutex 83133049Ssilby * exists only to guard access to the flag, and is not in itself a 84133790Ssilby * locking mechanism. Also note that there is only a single mutex for 85133790Ssilby * both directions of a pipe. 86133049Ssilby * 87133049Ssilby * As pipelock() may have to sleep before it can acquire the flag, it 88133049Ssilby * is important to reread all data after a call to pipelock(); everything 89133049Ssilby * in the structure may have changed. 9013907Sdyson */ 9113907Sdyson 92116182Sobrien#include <sys/cdefs.h> 93116182Sobrien__FBSDID("$FreeBSD: releng/10.3/sys/kern/sys_pipe.c 285971 2015-07-28 18:37:23Z cem $"); 94116182Sobrien 9513675Sdyson#include <sys/param.h> 9613675Sdyson#include <sys/systm.h> 97226042Skib#include <sys/conf.h> 9824131Sbde#include <sys/fcntl.h> 9913675Sdyson#include <sys/file.h> 10013675Sdyson#include <sys/filedesc.h> 10124206Sbde#include <sys/filio.h> 10291372Salfred#include <sys/kernel.h> 10376166Smarkm#include <sys/lock.h> 10476827Salfred#include <sys/mutex.h> 10524206Sbde#include <sys/ttycom.h> 10613675Sdyson#include <sys/stat.h> 10791968Salfred#include <sys/malloc.h> 10829356Speter#include <sys/poll.h> 10970834Swollman#include <sys/selinfo.h> 11013675Sdyson#include <sys/signalvar.h> 111184849Sed#include <sys/syscallsubr.h> 112117325Ssilby#include <sys/sysctl.h> 11313675Sdyson#include <sys/sysproto.h> 11413675Sdyson#include <sys/pipe.h> 11576166Smarkm#include <sys/proc.h> 11655112Sbde#include <sys/vnode.h> 11734924Sbde#include <sys/uio.h> 11859288Sjlemon#include <sys/event.h> 11913675Sdyson 120163606Srwatson#include <security/mac/mac_framework.h> 121163606Srwatson 12213675Sdyson#include <vm/vm.h> 12313675Sdyson#include <vm/vm_param.h> 12413675Sdyson#include <vm/vm_object.h> 12513675Sdyson#include <vm/vm_kern.h> 12613675Sdyson#include <vm/vm_extern.h> 12713675Sdyson#include <vm/pmap.h> 12813675Sdyson#include <vm/vm_map.h> 12913907Sdyson#include <vm/vm_page.h> 13092751Sjeff#include <vm/uma.h> 13113675Sdyson 13214037Sdyson/* 13314037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 13414037Sdyson * approx 30% decrease in transfer rate. This could be useful for 13514037Sdyson * NetBSD or OpenBSD. 13614037Sdyson */ 13714037Sdyson/* #define PIPE_NODIRECT */ 13814037Sdyson 139232055Skmacy#define PIPE_PEER(pipe) \ 140232055Skmacy (((pipe)->pipe_state & PIPE_NAMED) ? (pipe) : ((pipe)->pipe_peer)) 141232055Skmacy 14214037Sdyson/* 14314037Sdyson * interfaces to the outside world 14414037Sdyson */ 145108255Sphkstatic fo_rdwr_t pipe_read; 146108255Sphkstatic fo_rdwr_t pipe_write; 147175140Sjhbstatic fo_truncate_t pipe_truncate; 148108255Sphkstatic fo_ioctl_t pipe_ioctl; 149108255Sphkstatic fo_poll_t pipe_poll; 150108255Sphkstatic fo_kqfilter_t pipe_kqfilter; 151108255Sphkstatic fo_stat_t pipe_stat; 152108255Sphkstatic fo_close_t pipe_close; 153232183Sjillesstatic fo_chmod_t pipe_chmod; 154232183Sjillesstatic fo_chown_t pipe_chown; 15513675Sdyson 156232055Skmacystruct fileops pipeops = { 157116546Sphk .fo_read = pipe_read, 158116546Sphk .fo_write = pipe_write, 159175140Sjhb .fo_truncate = pipe_truncate, 160116546Sphk .fo_ioctl = pipe_ioctl, 161116546Sphk .fo_poll = pipe_poll, 162116546Sphk .fo_kqfilter = pipe_kqfilter, 163116546Sphk .fo_stat = pipe_stat, 164116546Sphk .fo_close = pipe_close, 165232183Sjilles .fo_chmod = pipe_chmod, 166232183Sjilles .fo_chown = pipe_chown, 167254356Sglebius .fo_sendfile = invfo_sendfile, 168116546Sphk .fo_flags = DFLAG_PASSABLE 16972521Sjlemon}; 17013675Sdyson 17159288Sjlemonstatic void filt_pipedetach(struct knote *kn); 172232055Skmacystatic void filt_pipedetach_notsup(struct knote *kn); 173232055Skmacystatic int filt_pipenotsup(struct knote *kn, long hint); 17459288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 17559288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 17659288Sjlemon 177232055Skmacystatic struct filterops pipe_nfiltops = { 178232055Skmacy .f_isfd = 1, 179232055Skmacy .f_detach = filt_pipedetach_notsup, 180232055Skmacy .f_event = filt_pipenotsup 181232055Skmacy}; 182197134Srwatsonstatic struct filterops pipe_rfiltops = { 183197134Srwatson .f_isfd = 1, 184197134Srwatson .f_detach = filt_pipedetach, 185197134Srwatson .f_event = filt_piperead 186197134Srwatson}; 187197134Srwatsonstatic struct filterops pipe_wfiltops = { 188197134Srwatson .f_isfd = 1, 189197134Srwatson .f_detach = filt_pipedetach, 190197134Srwatson .f_event = filt_pipewrite 191197134Srwatson}; 19259288Sjlemon 19313675Sdyson/* 19413675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 19513675Sdyson * space is pageable. The pipe code will try to maintain locality of 19613675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 19713675Sdyson * will not wipe the cache. 19813675Sdyson */ 19913907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 20013907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 20113675Sdyson 202189649Sjhbstatic long amountpipekva; 203133790Ssilbystatic int pipefragretry; 204133790Ssilbystatic int pipeallocfail; 205133790Ssilbystatic int piperesizefail; 206133790Ssilbystatic int piperesizeallowed = 1; 20713907Sdyson 208189649SjhbSYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN, 209117325Ssilby &maxpipekva, 0, "Pipe KVA limit"); 210189649SjhbSYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 211117325Ssilby &amountpipekva, 0, "Pipe KVA usage"); 212133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD, 213133790Ssilby &pipefragretry, 0, "Pipe allocation retries due to fragmentation"); 214133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD, 215133790Ssilby &pipeallocfail, 0, "Pipe allocation failures"); 216133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD, 217133790Ssilby &piperesizefail, 0, "Pipe resize failures"); 218133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, 219133790Ssilby &piperesizeallowed, 0, "Pipe resizing allowed"); 220117325Ssilby 22191413Salfredstatic void pipeinit(void *dummy __unused); 22291413Salfredstatic void pipeclose(struct pipe *cpipe); 22391413Salfredstatic void pipe_free_kmem(struct pipe *cpipe); 224268335Smjgstatic void pipe_create(struct pipe *pipe, int backing); 225268335Smjgstatic void pipe_paircreate(struct thread *td, struct pipepair **p_pp); 22691413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch); 22791413Salfredstatic __inline void pipeunlock(struct pipe *cpipe); 22814037Sdyson#ifndef PIPE_NODIRECT 22991413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 23091413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 23191413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 23291413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe); 23314037Sdyson#endif 23491413Salfredstatic int pipespace(struct pipe *cpipe, int size); 235132579Srwatsonstatic int pipespace_new(struct pipe *cpipe, int size); 23613675Sdyson 237132987Sgreenstatic int pipe_zone_ctor(void *mem, int size, void *arg, int flags); 238132987Sgreenstatic int pipe_zone_init(void *mem, int size, int flags); 239125293Srwatsonstatic void pipe_zone_fini(void *mem, int size); 240125293Srwatson 24192751Sjeffstatic uma_zone_t pipe_zone; 242226042Skibstatic struct unrhdr *pipeino_unr; 243226042Skibstatic dev_t pipedev_ino; 24427899Sdyson 24591372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 24691372Salfred 24791372Salfredstatic void 24891372Salfredpipeinit(void *dummy __unused) 24991372Salfred{ 250118880Salc 251170022Srwatson pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair), 252170022Srwatson pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, 253125293Srwatson UMA_ALIGN_PTR, 0); 254118880Salc KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 255226042Skib pipeino_unr = new_unrhdr(1, INT32_MAX, NULL); 256226042Skib KASSERT(pipeino_unr != NULL, ("pipe fake inodes not initialized")); 257226042Skib pipedev_ino = devfs_alloc_cdp_inode(); 258226042Skib KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized")); 25991372Salfred} 26091372Salfred 261132987Sgreenstatic int 262132987Sgreenpipe_zone_ctor(void *mem, int size, void *arg, int flags) 263125293Srwatson{ 264125293Srwatson struct pipepair *pp; 265125293Srwatson struct pipe *rpipe, *wpipe; 266125293Srwatson 267125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); 268125293Srwatson 269125293Srwatson pp = (struct pipepair *)mem; 270125293Srwatson 271125293Srwatson /* 272125293Srwatson * We zero both pipe endpoints to make sure all the kmem pointers 273125293Srwatson * are NULL, flag fields are zero'd, etc. We timestamp both 274125293Srwatson * endpoints with the same time. 275125293Srwatson */ 276125293Srwatson rpipe = &pp->pp_rpipe; 277125293Srwatson bzero(rpipe, sizeof(*rpipe)); 278125293Srwatson vfs_timestamp(&rpipe->pipe_ctime); 279125293Srwatson rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; 280125293Srwatson 281125293Srwatson wpipe = &pp->pp_wpipe; 282125293Srwatson bzero(wpipe, sizeof(*wpipe)); 283125293Srwatson wpipe->pipe_ctime = rpipe->pipe_ctime; 284125293Srwatson wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; 285125293Srwatson 286125293Srwatson rpipe->pipe_peer = wpipe; 287125293Srwatson rpipe->pipe_pair = pp; 288125293Srwatson wpipe->pipe_peer = rpipe; 289125293Srwatson wpipe->pipe_pair = pp; 290125293Srwatson 291125293Srwatson /* 292125293Srwatson * Mark both endpoints as present; they will later get free'd 293125293Srwatson * one at a time. When both are free'd, then the whole pair 294125293Srwatson * is released. 295125293Srwatson */ 296179243Skib rpipe->pipe_present = PIPE_ACTIVE; 297179243Skib wpipe->pipe_present = PIPE_ACTIVE; 298125293Srwatson 299125293Srwatson /* 300125293Srwatson * Eventually, the MAC Framework may initialize the label 301125293Srwatson * in ctor or init, but for now we do it elswhere to avoid 302125293Srwatson * blocking in ctor or init. 303125293Srwatson */ 304125293Srwatson pp->pp_label = NULL; 305125293Srwatson 306132987Sgreen return (0); 307125293Srwatson} 308125293Srwatson 309132987Sgreenstatic int 310132987Sgreenpipe_zone_init(void *mem, int size, int flags) 311125293Srwatson{ 312125293Srwatson struct pipepair *pp; 313125293Srwatson 314125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); 315125293Srwatson 316125293Srwatson pp = (struct pipepair *)mem; 317125293Srwatson 318276353Skib mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF); 319132987Sgreen return (0); 320125293Srwatson} 321125293Srwatson 322125293Srwatsonstatic void 323125293Srwatsonpipe_zone_fini(void *mem, int size) 324125293Srwatson{ 325125293Srwatson struct pipepair *pp; 326125293Srwatson 327125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); 328125293Srwatson 329125293Srwatson pp = (struct pipepair *)mem; 330125293Srwatson 331125293Srwatson mtx_destroy(&pp->pp_mtx); 332125293Srwatson} 333125293Srwatson 334268335Smjgstatic void 335232055Skmacypipe_paircreate(struct thread *td, struct pipepair **p_pp) 33613675Sdyson{ 337125293Srwatson struct pipepair *pp; 33813675Sdyson struct pipe *rpipe, *wpipe; 33927899Sdyson 340232055Skmacy *p_pp = pp = uma_zalloc(pipe_zone, M_WAITOK); 341125293Srwatson#ifdef MAC 342125293Srwatson /* 343126249Srwatson * The MAC label is shared between the connected endpoints. As a 344172930Srwatson * result mac_pipe_init() and mac_pipe_create() are called once 345126249Srwatson * for the pair, and not on the endpoints. 346125293Srwatson */ 347172930Srwatson mac_pipe_init(pp); 348172930Srwatson mac_pipe_create(td->td_ucred, pp); 349125293Srwatson#endif 350125293Srwatson rpipe = &pp->pp_rpipe; 351125293Srwatson wpipe = &pp->pp_wpipe; 352125293Srwatson 353193951Skib knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe)); 354193951Skib knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe)); 355140369Ssilby 356133790Ssilby /* Only the forward direction pipe is backed by default */ 357268335Smjg pipe_create(rpipe, 1); 358268335Smjg pipe_create(wpipe, 0); 359124394Sdes 36013907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 36113907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 362232055Skmacy} 36313675Sdyson 364268335Smjgvoid 365232055Skmacypipe_named_ctor(struct pipe **ppipe, struct thread *td) 366232055Skmacy{ 367232055Skmacy struct pipepair *pp; 368232055Skmacy 369268335Smjg pipe_paircreate(td, &pp); 370232055Skmacy pp->pp_rpipe.pipe_state |= PIPE_NAMED; 371232055Skmacy *ppipe = &pp->pp_rpipe; 372232055Skmacy} 373232055Skmacy 374232055Skmacyvoid 375232055Skmacypipe_dtor(struct pipe *dpipe) 376232055Skmacy{ 377278310Skib struct pipe *peer; 378232055Skmacy ino_t ino; 379232055Skmacy 380232055Skmacy ino = dpipe->pipe_ino; 381278310Skib peer = (dpipe->pipe_state & PIPE_NAMED) != 0 ? dpipe->pipe_peer : NULL; 382232055Skmacy funsetown(&dpipe->pipe_sigio); 383232055Skmacy pipeclose(dpipe); 384278310Skib if (peer != NULL) { 385278310Skib funsetown(&peer->pipe_sigio); 386278310Skib pipeclose(peer); 387232055Skmacy } 388232055Skmacy if (ino != 0 && ino != (ino_t)-1) 389232055Skmacy free_unr(pipeino_unr, ino); 390232055Skmacy} 391232055Skmacy 392232055Skmacy/* 393232055Skmacy * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let 394232055Skmacy * the zone pick up the pieces via pipeclose(). 395232055Skmacy */ 396232055Skmacyint 397232055Skmacykern_pipe(struct thread *td, int fildes[2]) 398232055Skmacy{ 399234352Sjkim 400248951Sjilles return (kern_pipe2(td, fildes, 0)); 401234352Sjkim} 402234352Sjkim 403234352Sjkimint 404248951Sjilleskern_pipe2(struct thread *td, int fildes[2], int flags) 405234352Sjkim{ 406232055Skmacy struct filedesc *fdp; 407232055Skmacy struct file *rf, *wf; 408232055Skmacy struct pipe *rpipe, *wpipe; 409232055Skmacy struct pipepair *pp; 410234352Sjkim int fd, fflags, error; 411232055Skmacy 412232055Skmacy fdp = td->td_proc->p_fd; 413268335Smjg pipe_paircreate(td, &pp); 414232055Skmacy rpipe = &pp->pp_rpipe; 415232055Skmacy wpipe = &pp->pp_wpipe; 416234352Sjkim error = falloc(td, &rf, &fd, flags); 41770915Sdwmalone if (error) { 41870915Sdwmalone pipeclose(rpipe); 41970915Sdwmalone pipeclose(wpipe); 42070915Sdwmalone return (error); 42170915Sdwmalone } 422121256Sdwmalone /* An extra reference on `rf' has been held for us by falloc(). */ 423184849Sed fildes[0] = fd; 42470915Sdwmalone 425234352Sjkim fflags = FREAD | FWRITE; 426234352Sjkim if ((flags & O_NONBLOCK) != 0) 427234352Sjkim fflags |= FNONBLOCK; 428234352Sjkim 42970803Sdwmalone /* 43070803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 43170803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 43270803Sdwmalone * to avoid races against processes which manage to dup() the read 43370803Sdwmalone * side while we are blocked trying to allocate the write side. 43470803Sdwmalone */ 435234352Sjkim finit(rf, fflags, DTYPE_PIPE, rpipe, &pipeops); 436234352Sjkim error = falloc(td, &wf, &fd, flags); 43770915Sdwmalone if (error) { 438184849Sed fdclose(fdp, rf, fildes[0], td); 43983366Sjulian fdrop(rf, td); 44070915Sdwmalone /* rpipe has been closed by fdrop(). */ 44170915Sdwmalone pipeclose(wpipe); 44270915Sdwmalone return (error); 44370915Sdwmalone } 444121256Sdwmalone /* An extra reference on `wf' has been held for us by falloc(). */ 445234352Sjkim finit(wf, fflags, DTYPE_PIPE, wpipe, &pipeops); 446121256Sdwmalone fdrop(wf, td); 447184849Sed fildes[1] = fd; 44883366Sjulian fdrop(rf, td); 44913675Sdyson 45013675Sdyson return (0); 45113675Sdyson} 45213675Sdyson 453184849Sed/* ARGSUSED */ 454184849Sedint 455225617Skmacysys_pipe(struct thread *td, struct pipe_args *uap) 456184849Sed{ 457184849Sed int error; 458184849Sed int fildes[2]; 459184849Sed 460184849Sed error = kern_pipe(td, fildes); 461184849Sed if (error) 462184849Sed return (error); 463246907Spjd 464184849Sed td->td_retval[0] = fildes[0]; 465184849Sed td->td_retval[1] = fildes[1]; 466184849Sed 467184849Sed return (0); 468184849Sed} 469184849Sed 470250159Sjillesint 471250159Sjillessys_pipe2(struct thread *td, struct pipe2_args *uap) 472250159Sjilles{ 473250159Sjilles int error, fildes[2]; 474250159Sjilles 475250159Sjilles if (uap->flags & ~(O_CLOEXEC | O_NONBLOCK)) 476250159Sjilles return (EINVAL); 477250159Sjilles error = kern_pipe2(td, fildes, uap->flags); 478250159Sjilles if (error) 479250159Sjilles return (error); 480250159Sjilles error = copyout(fildes, uap->fildes, 2 * sizeof(int)); 481250159Sjilles if (error) { 482250159Sjilles (void)kern_close(td, fildes[0]); 483250159Sjilles (void)kern_close(td, fildes[1]); 484250159Sjilles } 485250159Sjilles return (error); 486250159Sjilles} 487250159Sjilles 48813909Sdyson/* 48913909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 49076364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 49176364Salfred * it will retain the old buffer. 49276364Salfred * If it fails it will return ENOMEM. 49313909Sdyson */ 49476364Salfredstatic int 495132579Srwatsonpipespace_new(cpipe, size) 49613675Sdyson struct pipe *cpipe; 49776364Salfred int size; 49813675Sdyson{ 49976364Salfred caddr_t buffer; 500133790Ssilby int error, cnt, firstseg; 501117325Ssilby static int curfail = 0; 502117325Ssilby static struct timeval lastfail; 50313675Sdyson 504125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); 505133790Ssilby KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW), 506133790Ssilby ("pipespace: resize of direct writes not allowed")); 507133790Ssilbyretry: 508133790Ssilby cnt = cpipe->pipe_buffer.cnt; 509133790Ssilby if (cnt > size) 510133790Ssilby size = cnt; 51179224Sdillon 512118764Ssilby size = round_page(size); 513118764Ssilby buffer = (caddr_t) vm_map_min(pipe_map); 51413675Sdyson 515122163Salc error = vm_map_find(pipe_map, NULL, 0, 516255426Sjhb (vm_offset_t *) &buffer, size, 0, VMFS_ANY_SPACE, 51713688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 51876364Salfred if (error != KERN_SUCCESS) { 519133790Ssilby if ((cpipe->pipe_buffer.buffer == NULL) && 520133790Ssilby (size > SMALL_PIPE_SIZE)) { 521133790Ssilby size = SMALL_PIPE_SIZE; 522133790Ssilby pipefragretry++; 523133790Ssilby goto retry; 524133790Ssilby } 525133790Ssilby if (cpipe->pipe_buffer.buffer == NULL) { 526133790Ssilby pipeallocfail++; 527133790Ssilby if (ppsratecheck(&lastfail, &curfail, 1)) 528133790Ssilby printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); 529133790Ssilby } else { 530133790Ssilby piperesizefail++; 531133790Ssilby } 53276364Salfred return (ENOMEM); 53376364Salfred } 53476364Salfred 535133790Ssilby /* copy data, then free old resources if we're resizing */ 536133790Ssilby if (cnt > 0) { 537133790Ssilby if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) { 538133790Ssilby firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out; 539133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 540133790Ssilby buffer, firstseg); 541133790Ssilby if ((cnt - firstseg) > 0) 542133790Ssilby bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg], 543133790Ssilby cpipe->pipe_buffer.in); 544133790Ssilby } else { 545133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 546133790Ssilby buffer, cnt); 547133790Ssilby } 548133790Ssilby } 54976364Salfred pipe_free_kmem(cpipe); 55076364Salfred cpipe->pipe_buffer.buffer = buffer; 55176364Salfred cpipe->pipe_buffer.size = size; 552133790Ssilby cpipe->pipe_buffer.in = cnt; 55376364Salfred cpipe->pipe_buffer.out = 0; 554133790Ssilby cpipe->pipe_buffer.cnt = cnt; 555189649Sjhb atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size); 55676364Salfred return (0); 55713907Sdyson} 55813688Sdyson 55913907Sdyson/* 560132579Srwatson * Wrapper for pipespace_new() that performs locking assertions. 561132579Srwatson */ 562132579Srwatsonstatic int 563132579Srwatsonpipespace(cpipe, size) 564132579Srwatson struct pipe *cpipe; 565132579Srwatson int size; 566132579Srwatson{ 567132579Srwatson 568133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 569133049Ssilby ("Unlocked pipe passed to pipespace")); 570132579Srwatson return (pipespace_new(cpipe, size)); 571132579Srwatson} 572132579Srwatson 573132579Srwatson/* 57413675Sdyson * lock a pipe for I/O, blocking other access 57513675Sdyson */ 57613675Sdysonstatic __inline int 57713907Sdysonpipelock(cpipe, catch) 57813675Sdyson struct pipe *cpipe; 57913907Sdyson int catch; 58013675Sdyson{ 58113776Sdyson int error; 58276364Salfred 58391362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 58491362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 58513675Sdyson cpipe->pipe_state |= PIPE_LWANT; 58691362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 58791362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 58876760Salfred "pipelk", 0); 589124394Sdes if (error != 0) 59076760Salfred return (error); 59113675Sdyson } 59291362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 59376760Salfred return (0); 59413675Sdyson} 59513675Sdyson 59613675Sdyson/* 59713675Sdyson * unlock a pipe I/O lock 59813675Sdyson */ 59913675Sdysonstatic __inline void 60013675Sdysonpipeunlock(cpipe) 60113675Sdyson struct pipe *cpipe; 60213675Sdyson{ 60376364Salfred 60491362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 605133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 606133049Ssilby ("Unlocked pipe passed to pipeunlock")); 60791362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 60813675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 60913675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 61014177Sdyson wakeup(cpipe); 61113675Sdyson } 61213675Sdyson} 61313675Sdyson 614238928Sdavidxuvoid 61514037Sdysonpipeselwakeup(cpipe) 61614037Sdyson struct pipe *cpipe; 61714037Sdyson{ 61876364Salfred 619126252Srwatson PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 62014037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 621122352Stanimura selwakeuppri(&cpipe->pipe_sel, PSOCK); 622174647Sjeff if (!SEL_WAITING(&cpipe->pipe_sel)) 623174647Sjeff cpipe->pipe_state &= ~PIPE_SEL; 62414037Sdyson } 62541086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 62695883Salfred pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 627133741Sjmg KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0); 62814037Sdyson} 62914037Sdyson 630126131Sgreen/* 631126131Sgreen * Initialize and allocate VM and memory for pipe. The structure 632126131Sgreen * will start out zero'd from the ctor, so we just manage the kmem. 633126131Sgreen */ 634268335Smjgstatic void 635133790Ssilbypipe_create(pipe, backing) 636126131Sgreen struct pipe *pipe; 637133790Ssilby int backing; 638126131Sgreen{ 639126131Sgreen 640133790Ssilby if (backing) { 641268335Smjg /* 642268335Smjg * Note that these functions can fail if pipe map is exhausted 643268335Smjg * (as a result of too many pipes created), but we ignore the 644268335Smjg * error as it is not fatal and could be provoked by 645268335Smjg * unprivileged users. The only consequence is worse performance 646268335Smjg * with given pipe. 647268335Smjg */ 648133790Ssilby if (amountpipekva > maxpipekva / 2) 649268335Smjg (void)pipespace_new(pipe, SMALL_PIPE_SIZE); 650133790Ssilby else 651268335Smjg (void)pipespace_new(pipe, PIPE_SIZE); 652133790Ssilby } 653268335Smjg 654228306Skib pipe->pipe_ino = -1; 655126131Sgreen} 656126131Sgreen 65713675Sdyson/* ARGSUSED */ 65813675Sdysonstatic int 659101941Srwatsonpipe_read(fp, uio, active_cred, flags, td) 66013675Sdyson struct file *fp; 66113675Sdyson struct uio *uio; 662101941Srwatson struct ucred *active_cred; 66383366Sjulian struct thread *td; 66445311Sdt int flags; 66513675Sdyson{ 666232055Skmacy struct pipe *rpipe; 66747748Salc int error; 66813675Sdyson int nread = 0; 669232495Skib int size; 67013675Sdyson 671232055Skmacy rpipe = fp->f_data; 67291362Salfred PIPE_LOCK(rpipe); 67313675Sdyson ++rpipe->pipe_busy; 67447748Salc error = pipelock(rpipe, 1); 67547748Salc if (error) 67647748Salc goto unlocked_error; 67747748Salc 678101768Srwatson#ifdef MAC 679172930Srwatson error = mac_pipe_check_read(active_cred, rpipe->pipe_pair); 680101768Srwatson if (error) 681101768Srwatson goto locked_error; 682101768Srwatson#endif 683133790Ssilby if (amountpipekva > (3 * maxpipekva) / 4) { 684133790Ssilby if (!(rpipe->pipe_state & PIPE_DIRECTW) && 685133790Ssilby (rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 686133790Ssilby (rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 687133790Ssilby (piperesizeallowed == 1)) { 688133790Ssilby PIPE_UNLOCK(rpipe); 689133790Ssilby pipespace(rpipe, SMALL_PIPE_SIZE); 690133790Ssilby PIPE_LOCK(rpipe); 691133790Ssilby } 692133790Ssilby } 693101768Srwatson 69413675Sdyson while (uio->uio_resid) { 69513907Sdyson /* 69613907Sdyson * normal pipe buffer receive 69713907Sdyson */ 69813675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 69918863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 70013675Sdyson if (size > rpipe->pipe_buffer.cnt) 70113675Sdyson size = rpipe->pipe_buffer.cnt; 702231949Skib if (size > uio->uio_resid) 703232495Skib size = uio->uio_resid; 70447748Salc 70591362Salfred PIPE_UNLOCK(rpipe); 706116127Smux error = uiomove( 707116127Smux &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 708116127Smux size, uio); 70991362Salfred PIPE_LOCK(rpipe); 71076760Salfred if (error) 71113675Sdyson break; 71276760Salfred 71313675Sdyson rpipe->pipe_buffer.out += size; 71413675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 71513675Sdyson rpipe->pipe_buffer.out = 0; 71613675Sdyson 71713675Sdyson rpipe->pipe_buffer.cnt -= size; 71847748Salc 71947748Salc /* 72047748Salc * If there is no more to read in the pipe, reset 72147748Salc * its pointers to the beginning. This improves 72247748Salc * cache hit stats. 72347748Salc */ 72447748Salc if (rpipe->pipe_buffer.cnt == 0) { 72547748Salc rpipe->pipe_buffer.in = 0; 72647748Salc rpipe->pipe_buffer.out = 0; 72747748Salc } 72813675Sdyson nread += size; 72914037Sdyson#ifndef PIPE_NODIRECT 73013907Sdyson /* 73113907Sdyson * Direct copy, bypassing a kernel buffer. 73213907Sdyson */ 73313907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 73447748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 735231949Skib if (size > uio->uio_resid) 73618863Sdyson size = (u_int) uio->uio_resid; 73747748Salc 73891362Salfred PIPE_UNLOCK(rpipe); 739127501Salc error = uiomove_fromphys(rpipe->pipe_map.ms, 740127501Salc rpipe->pipe_map.pos, size, uio); 74191362Salfred PIPE_LOCK(rpipe); 74213907Sdyson if (error) 74313907Sdyson break; 74413907Sdyson nread += size; 74513907Sdyson rpipe->pipe_map.pos += size; 74613907Sdyson rpipe->pipe_map.cnt -= size; 74713907Sdyson if (rpipe->pipe_map.cnt == 0) { 748238928Sdavidxu rpipe->pipe_state &= ~(PIPE_DIRECTW|PIPE_WANTW); 74913907Sdyson wakeup(rpipe); 75013907Sdyson } 75114037Sdyson#endif 75213675Sdyson } else { 75313675Sdyson /* 75413675Sdyson * detect EOF condition 75576760Salfred * read returns 0 on EOF, no need to set error 75613675Sdyson */ 75776760Salfred if (rpipe->pipe_state & PIPE_EOF) 75813675Sdyson break; 75943623Sdillon 76013675Sdyson /* 76113675Sdyson * If the "write-side" has been blocked, wake it up now. 76213675Sdyson */ 76313675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 76413675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 76513675Sdyson wakeup(rpipe); 76613675Sdyson } 76743623Sdillon 76843623Sdillon /* 76947748Salc * Break if some data was read. 77043623Sdillon */ 77147748Salc if (nread > 0) 77213675Sdyson break; 77316960Sdyson 77443623Sdillon /* 775124394Sdes * Unlock the pipe buffer for our remaining processing. 776116127Smux * We will either break out with an error or we will 777116127Smux * sleep and relock to loop. 77843623Sdillon */ 77947748Salc pipeunlock(rpipe); 78043623Sdillon 78113675Sdyson /* 78247748Salc * Handle non-blocking mode operation or 78347748Salc * wait for more data. 78413675Sdyson */ 78576760Salfred if (fp->f_flag & FNONBLOCK) { 78647748Salc error = EAGAIN; 78776760Salfred } else { 78847748Salc rpipe->pipe_state |= PIPE_WANTR; 78991362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 79091362Salfred PRIBIO | PCATCH, 79177140Salfred "piperd", 0)) == 0) 79247748Salc error = pipelock(rpipe, 1); 79313675Sdyson } 79447748Salc if (error) 79547748Salc goto unlocked_error; 79613675Sdyson } 79713675Sdyson } 798101768Srwatson#ifdef MAC 799101768Srwatsonlocked_error: 800101768Srwatson#endif 80147748Salc pipeunlock(rpipe); 80213675Sdyson 80391362Salfred /* XXX: should probably do this before getting any locks. */ 80424101Sbde if (error == 0) 80555112Sbde vfs_timestamp(&rpipe->pipe_atime); 80647748Salcunlocked_error: 80747748Salc --rpipe->pipe_busy; 80813913Sdyson 80947748Salc /* 81047748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 81147748Salc */ 81213675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 81313675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 81413675Sdyson wakeup(rpipe); 81513675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 81613675Sdyson /* 81747748Salc * Handle write blocking hysteresis. 81813675Sdyson */ 81913675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 82013675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 82113675Sdyson wakeup(rpipe); 82213675Sdyson } 82313675Sdyson } 82414037Sdyson 82514802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 82614037Sdyson pipeselwakeup(rpipe); 82714037Sdyson 82891362Salfred PIPE_UNLOCK(rpipe); 82976760Salfred return (error); 83013675Sdyson} 83113675Sdyson 83214037Sdyson#ifndef PIPE_NODIRECT 83313907Sdyson/* 83413907Sdyson * Map the sending processes' buffer into kernel space and wire it. 83513907Sdyson * This is similar to a physical write operation. 83613907Sdyson */ 83713675Sdysonstatic int 83813907Sdysonpipe_build_write_buffer(wpipe, uio) 83913907Sdyson struct pipe *wpipe; 84013675Sdyson struct uio *uio; 84113675Sdyson{ 84218863Sdyson u_int size; 843216511Salc int i; 84413907Sdyson 84591412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 846133790Ssilby KASSERT(wpipe->pipe_state & PIPE_DIRECTW, 847133790Ssilby ("Clone attempt on non-direct write pipe!")); 84879224Sdillon 849231949Skib if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size) 850231949Skib size = wpipe->pipe_buffer.size; 851231949Skib else 852231949Skib size = uio->uio_iov->iov_len; 85313907Sdyson 854216699Salc if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 855216699Salc (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ, 856216699Salc wpipe->pipe_map.ms, PIPENPAGES)) < 0) 857193893Scperciva return (EFAULT); 85813907Sdyson 85913907Sdyson/* 86013907Sdyson * set up the control block 86113907Sdyson */ 86213907Sdyson wpipe->pipe_map.npages = i; 86376760Salfred wpipe->pipe_map.pos = 86476760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 86513907Sdyson wpipe->pipe_map.cnt = size; 86613907Sdyson 86713907Sdyson/* 86813907Sdyson * and update the uio data 86913907Sdyson */ 87013907Sdyson 87113907Sdyson uio->uio_iov->iov_len -= size; 872104908Smike uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 87313907Sdyson if (uio->uio_iov->iov_len == 0) 87413907Sdyson uio->uio_iov++; 87513907Sdyson uio->uio_resid -= size; 87613907Sdyson uio->uio_offset += size; 87776760Salfred return (0); 87813907Sdyson} 87913907Sdyson 88013907Sdyson/* 88113907Sdyson * unmap and unwire the process buffer 88213907Sdyson */ 88313907Sdysonstatic void 88413907Sdysonpipe_destroy_write_buffer(wpipe) 88576760Salfred struct pipe *wpipe; 88613907Sdyson{ 88776364Salfred 888127501Salc PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 889216511Salc vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages); 89091653Stanimura wpipe->pipe_map.npages = 0; 89113907Sdyson} 89213907Sdyson 89313907Sdyson/* 89413907Sdyson * In the case of a signal, the writing process might go away. This 89513907Sdyson * code copies the data into the circular buffer so that the source 89613907Sdyson * pages can be freed without loss of data. 89713907Sdyson */ 89813907Sdysonstatic void 89913907Sdysonpipe_clone_write_buffer(wpipe) 90076364Salfred struct pipe *wpipe; 90113907Sdyson{ 902127501Salc struct uio uio; 903127501Salc struct iovec iov; 90413907Sdyson int size; 90513907Sdyson int pos; 90613907Sdyson 90791362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 90813907Sdyson size = wpipe->pipe_map.cnt; 90913907Sdyson pos = wpipe->pipe_map.pos; 91013907Sdyson 91113907Sdyson wpipe->pipe_buffer.in = size; 91213907Sdyson wpipe->pipe_buffer.out = 0; 91313907Sdyson wpipe->pipe_buffer.cnt = size; 91413907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 91513907Sdyson 916119811Salc PIPE_UNLOCK(wpipe); 917127501Salc iov.iov_base = wpipe->pipe_buffer.buffer; 918127501Salc iov.iov_len = size; 919127501Salc uio.uio_iov = &iov; 920127501Salc uio.uio_iovcnt = 1; 921127501Salc uio.uio_offset = 0; 922127501Salc uio.uio_resid = size; 923127501Salc uio.uio_segflg = UIO_SYSSPACE; 924127501Salc uio.uio_rw = UIO_READ; 925127501Salc uio.uio_td = curthread; 926127501Salc uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio); 927127501Salc PIPE_LOCK(wpipe); 92813907Sdyson pipe_destroy_write_buffer(wpipe); 92913907Sdyson} 93013907Sdyson 93113907Sdyson/* 93213907Sdyson * This implements the pipe buffer write mechanism. Note that only 93313907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 93413907Sdyson * If there are any characters in the pipe buffer, the direct write will 93513907Sdyson * be deferred until the receiving process grabs all of the bytes from 93613907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 93713907Sdyson */ 93813907Sdysonstatic int 93913907Sdysonpipe_direct_write(wpipe, uio) 94013907Sdyson struct pipe *wpipe; 94113907Sdyson struct uio *uio; 94213907Sdyson{ 94313907Sdyson int error; 94476364Salfred 94513951Sdysonretry: 94691362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 947133049Ssilby error = pipelock(wpipe, 1); 948285971Scem if (error != 0) 949285971Scem goto error1; 950285971Scem if ((wpipe->pipe_state & PIPE_EOF) != 0) { 951133049Ssilby error = EPIPE; 952133049Ssilby pipeunlock(wpipe); 953133049Ssilby goto error1; 954133049Ssilby } 95513907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 95676760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 95713951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 95813951Sdyson wakeup(wpipe); 95913951Sdyson } 960173750Sdumbbell pipeselwakeup(wpipe); 96113992Sdyson wpipe->pipe_state |= PIPE_WANTW; 962133049Ssilby pipeunlock(wpipe); 96391362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 96491362Salfred PRIBIO | PCATCH, "pipdww", 0); 96514802Sdyson if (error) 96613907Sdyson goto error1; 967133049Ssilby else 968133049Ssilby goto retry; 96913907Sdyson } 97013907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 97113951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 97276760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 97313951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 97413951Sdyson wakeup(wpipe); 97513951Sdyson } 976173750Sdumbbell pipeselwakeup(wpipe); 97713992Sdyson wpipe->pipe_state |= PIPE_WANTW; 978133049Ssilby pipeunlock(wpipe); 97991362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 98091362Salfred PRIBIO | PCATCH, "pipdwc", 0); 98114802Sdyson if (error) 98213907Sdyson goto error1; 983133049Ssilby else 984133049Ssilby goto retry; 98513907Sdyson } 98613907Sdyson 98713951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 98813951Sdyson 989119872Salc PIPE_UNLOCK(wpipe); 99013907Sdyson error = pipe_build_write_buffer(wpipe, uio); 991119872Salc PIPE_LOCK(wpipe); 99213907Sdyson if (error) { 99313907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 994133049Ssilby pipeunlock(wpipe); 99513907Sdyson goto error1; 99613907Sdyson } 99713907Sdyson 99813907Sdyson error = 0; 99913907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 100013907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 100113907Sdyson pipe_destroy_write_buffer(wpipe); 1002112981Shsu pipeselwakeup(wpipe); 100313907Sdyson pipeunlock(wpipe); 100414802Sdyson error = EPIPE; 100514802Sdyson goto error1; 100613907Sdyson } 100713992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 100813992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 100913992Sdyson wakeup(wpipe); 101013992Sdyson } 101114037Sdyson pipeselwakeup(wpipe); 1012238928Sdavidxu wpipe->pipe_state |= PIPE_WANTW; 1013133049Ssilby pipeunlock(wpipe); 101491362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 101591362Salfred "pipdwt", 0); 1016133049Ssilby pipelock(wpipe, 0); 101713907Sdyson } 101813907Sdyson 1019126131Sgreen if (wpipe->pipe_state & PIPE_EOF) 1020126131Sgreen error = EPIPE; 102113907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 102213907Sdyson /* 102313907Sdyson * this bit of trickery substitutes a kernel buffer for 102413907Sdyson * the process that might be going away. 102513907Sdyson */ 102613907Sdyson pipe_clone_write_buffer(wpipe); 102713907Sdyson } else { 102813907Sdyson pipe_destroy_write_buffer(wpipe); 102913907Sdyson } 103013907Sdyson pipeunlock(wpipe); 103176760Salfred return (error); 103213907Sdyson 103313907Sdysonerror1: 103413907Sdyson wakeup(wpipe); 103576760Salfred return (error); 103613907Sdyson} 103714037Sdyson#endif 1038124394Sdes 103916960Sdysonstatic int 1040101941Srwatsonpipe_write(fp, uio, active_cred, flags, td) 104116960Sdyson struct file *fp; 104213907Sdyson struct uio *uio; 1043101941Srwatson struct ucred *active_cred; 104483366Sjulian struct thread *td; 104545311Sdt int flags; 104613907Sdyson{ 1047232495Skib int error = 0; 1048232495Skib int desiredsize; 1049232495Skib ssize_t orig_resid; 105016960Sdyson struct pipe *wpipe, *rpipe; 105116960Sdyson 1052109153Sdillon rpipe = fp->f_data; 1053232055Skmacy wpipe = PIPE_PEER(rpipe); 105491395Salfred PIPE_LOCK(rpipe); 1055133049Ssilby error = pipelock(wpipe, 1); 1056133049Ssilby if (error) { 1057133049Ssilby PIPE_UNLOCK(rpipe); 1058133049Ssilby return (error); 1059133049Ssilby } 106013675Sdyson /* 106113675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 106213675Sdyson */ 1063179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1064179243Skib (wpipe->pipe_state & PIPE_EOF)) { 1065133049Ssilby pipeunlock(wpipe); 106691395Salfred PIPE_UNLOCK(rpipe); 106776760Salfred return (EPIPE); 106813675Sdyson } 1069101768Srwatson#ifdef MAC 1070172930Srwatson error = mac_pipe_check_write(active_cred, wpipe->pipe_pair); 1071101768Srwatson if (error) { 1072133049Ssilby pipeunlock(wpipe); 1073101768Srwatson PIPE_UNLOCK(rpipe); 1074101768Srwatson return (error); 1075101768Srwatson } 1076101768Srwatson#endif 107777676Sdillon ++wpipe->pipe_busy; 107813675Sdyson 1079133790Ssilby /* Choose a larger size if it's advantageous */ 1080133790Ssilby desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size); 1081133790Ssilby while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) { 1082133790Ssilby if (piperesizeallowed != 1) 1083133790Ssilby break; 1084133790Ssilby if (amountpipekva > maxpipekva / 2) 1085133790Ssilby break; 1086133790Ssilby if (desiredsize == BIG_PIPE_SIZE) 1087133790Ssilby break; 1088133790Ssilby desiredsize = desiredsize * 2; 1089133790Ssilby } 109017163Sdyson 1091133790Ssilby /* Choose a smaller size if we're in a OOM situation */ 1092133790Ssilby if ((amountpipekva > (3 * maxpipekva) / 4) && 1093133790Ssilby (wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 1094133790Ssilby (wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 1095133790Ssilby (piperesizeallowed == 1)) 1096133790Ssilby desiredsize = SMALL_PIPE_SIZE; 1097133790Ssilby 1098133790Ssilby /* Resize if the above determined that a new size was necessary */ 1099133790Ssilby if ((desiredsize != wpipe->pipe_buffer.size) && 1100133790Ssilby ((wpipe->pipe_state & PIPE_DIRECTW) == 0)) { 1101133049Ssilby PIPE_UNLOCK(wpipe); 1102133790Ssilby pipespace(wpipe, desiredsize); 1103133049Ssilby PIPE_LOCK(wpipe); 110413907Sdyson } 1105133790Ssilby if (wpipe->pipe_buffer.size == 0) { 1106133790Ssilby /* 1107133790Ssilby * This can only happen for reverse direction use of pipes 1108133790Ssilby * in a complete OOM situation. 1109133790Ssilby */ 1110133790Ssilby error = ENOMEM; 1111133790Ssilby --wpipe->pipe_busy; 1112133790Ssilby pipeunlock(wpipe); 1113133790Ssilby PIPE_UNLOCK(wpipe); 1114133790Ssilby return (error); 1115133790Ssilby } 111677676Sdillon 1117133049Ssilby pipeunlock(wpipe); 1118124394Sdes 111913913Sdyson orig_resid = uio->uio_resid; 112077676Sdillon 112113675Sdyson while (uio->uio_resid) { 112213907Sdyson int space; 112376760Salfred 1124133049Ssilby pipelock(wpipe, 0); 1125133049Ssilby if (wpipe->pipe_state & PIPE_EOF) { 1126133049Ssilby pipeunlock(wpipe); 1127133049Ssilby error = EPIPE; 1128133049Ssilby break; 1129133049Ssilby } 113014037Sdyson#ifndef PIPE_NODIRECT 113113907Sdyson /* 113213907Sdyson * If the transfer is large, we can gain performance if 113313907Sdyson * we do process-to-process copies directly. 113416416Sdyson * If the write is non-blocking, we don't use the 113516416Sdyson * direct write mechanism. 113658505Sdillon * 113758505Sdillon * The direct write mechanism will detect the reader going 113858505Sdillon * away on us. 113913907Sdyson */ 1140165347Spjd if (uio->uio_segflg == UIO_USERSPACE && 1141165347Spjd uio->uio_iov->iov_len >= PIPE_MINDIRECT && 1142165347Spjd wpipe->pipe_buffer.size >= PIPE_MINDIRECT && 1143127501Salc (fp->f_flag & FNONBLOCK) == 0) { 1144133049Ssilby pipeunlock(wpipe); 1145105009Salfred error = pipe_direct_write(wpipe, uio); 114676760Salfred if (error) 114713907Sdyson break; 114813907Sdyson continue; 114991362Salfred } 115014037Sdyson#endif 115113907Sdyson 115213907Sdyson /* 115313907Sdyson * Pipe buffered writes cannot be coincidental with 115413907Sdyson * direct writes. We wait until the currently executing 115513907Sdyson * direct write is completed before we start filling the 115658505Sdillon * pipe buffer. We break out if a signal occurs or the 115758505Sdillon * reader goes away. 115813907Sdyson */ 1159133049Ssilby if (wpipe->pipe_state & PIPE_DIRECTW) { 116013992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 116113992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 116213992Sdyson wakeup(wpipe); 116313992Sdyson } 1164173750Sdumbbell pipeselwakeup(wpipe); 1165173750Sdumbbell wpipe->pipe_state |= PIPE_WANTW; 1166133049Ssilby pipeunlock(wpipe); 116791395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 116891362Salfred "pipbww", 0); 116913907Sdyson if (error) 117013907Sdyson break; 1171133049Ssilby else 1172133049Ssilby continue; 117313907Sdyson } 117413907Sdyson 117513907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 117614644Sdyson 117714644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 117813913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 117913913Sdyson space = 0; 118013907Sdyson 1181118230Spb if (space > 0) { 1182133049Ssilby int size; /* Transfer size */ 1183133049Ssilby int segsize; /* first segment to transfer */ 118476760Salfred 1185133049Ssilby /* 1186133049Ssilby * Transfer size is minimum of uio transfer 1187133049Ssilby * and free space in pipe buffer. 1188133049Ssilby */ 1189133049Ssilby if (space > uio->uio_resid) 1190133049Ssilby size = uio->uio_resid; 1191133049Ssilby else 1192133049Ssilby size = space; 1193133049Ssilby /* 1194133049Ssilby * First segment to transfer is minimum of 1195133049Ssilby * transfer size and contiguous space in 1196133049Ssilby * pipe buffer. If first segment to transfer 1197133049Ssilby * is less than the transfer size, we've got 1198133049Ssilby * a wraparound in the buffer. 1199133049Ssilby */ 1200133049Ssilby segsize = wpipe->pipe_buffer.size - 1201133049Ssilby wpipe->pipe_buffer.in; 1202133049Ssilby if (segsize > size) 1203133049Ssilby segsize = size; 120454534Stegge 1205133049Ssilby /* Transfer first segment */ 1206133049Ssilby 1207133049Ssilby PIPE_UNLOCK(rpipe); 1208133049Ssilby error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1209133049Ssilby segsize, uio); 1210133049Ssilby PIPE_LOCK(rpipe); 1211133049Ssilby 1212133049Ssilby if (error == 0 && segsize < size) { 1213133049Ssilby KASSERT(wpipe->pipe_buffer.in + segsize == 1214133049Ssilby wpipe->pipe_buffer.size, 1215133049Ssilby ("Pipe buffer wraparound disappeared")); 121654534Stegge /* 1217133049Ssilby * Transfer remaining part now, to 1218133049Ssilby * support atomic writes. Wraparound 1219133049Ssilby * happened. 122054534Stegge */ 1221124394Sdes 122291395Salfred PIPE_UNLOCK(rpipe); 1223133049Ssilby error = uiomove( 1224133049Ssilby &wpipe->pipe_buffer.buffer[0], 1225133049Ssilby size - segsize, uio); 122691395Salfred PIPE_LOCK(rpipe); 1227133049Ssilby } 1228133049Ssilby if (error == 0) { 1229133049Ssilby wpipe->pipe_buffer.in += size; 1230133049Ssilby if (wpipe->pipe_buffer.in >= 1231133049Ssilby wpipe->pipe_buffer.size) { 1232133049Ssilby KASSERT(wpipe->pipe_buffer.in == 1233133049Ssilby size - segsize + 1234133049Ssilby wpipe->pipe_buffer.size, 1235133049Ssilby ("Expected wraparound bad")); 1236133049Ssilby wpipe->pipe_buffer.in = size - segsize; 123754534Stegge } 1238124394Sdes 1239133049Ssilby wpipe->pipe_buffer.cnt += size; 1240133049Ssilby KASSERT(wpipe->pipe_buffer.cnt <= 1241133049Ssilby wpipe->pipe_buffer.size, 1242133049Ssilby ("Pipe buffer overflow")); 124313675Sdyson } 1244133049Ssilby pipeunlock(wpipe); 1245153484Sdelphij if (error != 0) 1246153484Sdelphij break; 124713675Sdyson } else { 124813675Sdyson /* 124913675Sdyson * If the "read-side" has been blocked, wake it up now. 125013675Sdyson */ 125113675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 125213675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 125313675Sdyson wakeup(wpipe); 125413675Sdyson } 125514037Sdyson 125613675Sdyson /* 125713675Sdyson * don't block on non-blocking I/O 125813675Sdyson */ 125916960Sdyson if (fp->f_flag & FNONBLOCK) { 126013907Sdyson error = EAGAIN; 1261133049Ssilby pipeunlock(wpipe); 126213675Sdyson break; 126313675Sdyson } 126413907Sdyson 126514037Sdyson /* 126614037Sdyson * We have no more space and have something to offer, 126729356Speter * wake up select/poll. 126814037Sdyson */ 126914037Sdyson pipeselwakeup(wpipe); 127014037Sdyson 127113675Sdyson wpipe->pipe_state |= PIPE_WANTW; 1272133049Ssilby pipeunlock(wpipe); 127391395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 127491362Salfred PRIBIO | PCATCH, "pipewr", 0); 127576760Salfred if (error != 0) 127613675Sdyson break; 127713675Sdyson } 127813675Sdyson } 127913675Sdyson 1280133049Ssilby pipelock(wpipe, 0); 128114644Sdyson --wpipe->pipe_busy; 128277676Sdillon 128376760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 128476760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 128513675Sdyson wakeup(wpipe); 128613675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 128713675Sdyson /* 128813675Sdyson * If we have put any characters in the buffer, we wake up 128913675Sdyson * the reader. 129013675Sdyson */ 129113675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 129213675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 129313675Sdyson wakeup(wpipe); 129413675Sdyson } 129513675Sdyson } 129613909Sdyson 129713909Sdyson /* 1298274609Skib * Don't return EPIPE if any byte was written. 1299274609Skib * EINTR and other interrupts are handled by generic I/O layer. 1300274609Skib * Do not pretend that I/O succeeded for obvious user error 1301274609Skib * like EFAULT. 130213909Sdyson */ 1303274609Skib if (uio->uio_resid != orig_resid && error == EPIPE) 130413907Sdyson error = 0; 130513913Sdyson 130624101Sbde if (error == 0) 130755112Sbde vfs_timestamp(&wpipe->pipe_mtime); 130824101Sbde 130914037Sdyson /* 131014037Sdyson * We have something to offer, 131129356Speter * wake up select/poll. 131214037Sdyson */ 131314177Sdyson if (wpipe->pipe_buffer.cnt) 131414037Sdyson pipeselwakeup(wpipe); 131513907Sdyson 1316133049Ssilby pipeunlock(wpipe); 131791395Salfred PIPE_UNLOCK(rpipe); 131876760Salfred return (error); 131913675Sdyson} 132013675Sdyson 1321175140Sjhb/* ARGSUSED */ 1322175140Sjhbstatic int 1323175140Sjhbpipe_truncate(fp, length, active_cred, td) 1324175140Sjhb struct file *fp; 1325175140Sjhb off_t length; 1326175140Sjhb struct ucred *active_cred; 1327175140Sjhb struct thread *td; 1328175140Sjhb{ 1329175140Sjhb 1330232055Skmacy /* For named pipes call the vnode operation. */ 1331232055Skmacy if (fp->f_vnode != NULL) 1332232055Skmacy return (vnops.fo_truncate(fp, length, active_cred, td)); 1333175140Sjhb return (EINVAL); 1334175140Sjhb} 1335175140Sjhb 133613675Sdyson/* 133713675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 133813675Sdyson */ 1339104094Sphkstatic int 1340102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td) 134113675Sdyson struct file *fp; 134236735Sdfr u_long cmd; 134399009Salfred void *data; 1344102003Srwatson struct ucred *active_cred; 134583366Sjulian struct thread *td; 134613675Sdyson{ 1347109153Sdillon struct pipe *mpipe = fp->f_data; 1348101768Srwatson int error; 134913675Sdyson 1350104269Srwatson PIPE_LOCK(mpipe); 1351104269Srwatson 1352104269Srwatson#ifdef MAC 1353172930Srwatson error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data); 1354121970Srwatson if (error) { 1355121970Srwatson PIPE_UNLOCK(mpipe); 1356101768Srwatson return (error); 1357121970Srwatson } 1358101768Srwatson#endif 1359101768Srwatson 1360137752Sphk error = 0; 136113675Sdyson switch (cmd) { 136213675Sdyson 136313675Sdyson case FIONBIO: 1364137752Sphk break; 136513675Sdyson 136613675Sdyson case FIOASYNC: 136713675Sdyson if (*(int *)data) { 136813675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 136913675Sdyson } else { 137013675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 137113675Sdyson } 1372137752Sphk break; 137313675Sdyson 137413675Sdyson case FIONREAD: 1375232055Skmacy if (!(fp->f_flag & FREAD)) { 1376232055Skmacy *(int *)data = 0; 1377232055Skmacy PIPE_UNLOCK(mpipe); 1378232055Skmacy return (0); 1379232055Skmacy } 138014037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 138114037Sdyson *(int *)data = mpipe->pipe_map.cnt; 138214037Sdyson else 138314037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 1384137752Sphk break; 138513675Sdyson 138641086Struckman case FIOSETOWN: 1387138032Srwatson PIPE_UNLOCK(mpipe); 1388137752Sphk error = fsetown(*(int *)data, &mpipe->pipe_sigio); 1389138032Srwatson goto out_unlocked; 139041086Struckman 139141086Struckman case FIOGETOWN: 1392104393Struckman *(int *)data = fgetown(&mpipe->pipe_sigio); 1393137752Sphk break; 139413675Sdyson 139541086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 139641086Struckman case TIOCSPGRP: 1397138032Srwatson PIPE_UNLOCK(mpipe); 1398137752Sphk error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); 1399138032Srwatson goto out_unlocked; 140041086Struckman 140141086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 140218863Sdyson case TIOCGPGRP: 1403104393Struckman *(int *)data = -fgetown(&mpipe->pipe_sigio); 1404137752Sphk break; 140513675Sdyson 1406137752Sphk default: 1407137752Sphk error = ENOTTY; 1408137764Sphk break; 140913675Sdyson } 1410104269Srwatson PIPE_UNLOCK(mpipe); 1411138032Srwatsonout_unlocked: 1412137752Sphk return (error); 141313675Sdyson} 141413675Sdyson 1415104094Sphkstatic int 1416101983Srwatsonpipe_poll(fp, events, active_cred, td) 141713675Sdyson struct file *fp; 141829356Speter int events; 1419101983Srwatson struct ucred *active_cred; 142083366Sjulian struct thread *td; 142113675Sdyson{ 1422232055Skmacy struct pipe *rpipe; 142313675Sdyson struct pipe *wpipe; 1424232055Skmacy int levents, revents; 1425101768Srwatson#ifdef MAC 1426101768Srwatson int error; 1427101768Srwatson#endif 142813675Sdyson 1429232055Skmacy revents = 0; 1430232055Skmacy rpipe = fp->f_data; 1431232055Skmacy wpipe = PIPE_PEER(rpipe); 143291362Salfred PIPE_LOCK(rpipe); 1433101768Srwatson#ifdef MAC 1434172930Srwatson error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair); 1435101768Srwatson if (error) 1436101768Srwatson goto locked_error; 1437101768Srwatson#endif 1438232055Skmacy if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) 143929356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 1440195423Skib (rpipe->pipe_buffer.cnt > 0)) 144129356Speter revents |= events & (POLLIN | POLLRDNORM); 144213675Sdyson 1443232055Skmacy if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) 1444179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1445179243Skib (wpipe->pipe_state & PIPE_EOF) || 144643311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1447228510Sjilles ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF || 1448228510Sjilles wpipe->pipe_buffer.size == 0))) 144929356Speter revents |= events & (POLLOUT | POLLWRNORM); 145013675Sdyson 1451232055Skmacy levents = events & 1452232055Skmacy (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND); 1453232055Skmacy if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents && 1454238936Sdavidxu fp->f_seqcount == rpipe->pipe_wgen) 1455232055Skmacy events |= POLLINIGNEOF; 1456232055Skmacy 1457195423Skib if ((events & POLLINIGNEOF) == 0) { 1458195423Skib if (rpipe->pipe_state & PIPE_EOF) { 1459195423Skib revents |= (events & (POLLIN | POLLRDNORM)); 1460195423Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1461195423Skib (wpipe->pipe_state & PIPE_EOF)) 1462195423Skib revents |= POLLHUP; 1463195423Skib } 1464195423Skib } 146529356Speter 146629356Speter if (revents == 0) { 1467232055Skmacy if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) { 146883805Sjhb selrecord(td, &rpipe->pipe_sel); 1469174647Sjeff if (SEL_WAITING(&rpipe->pipe_sel)) 1470174647Sjeff rpipe->pipe_state |= PIPE_SEL; 147113675Sdyson } 147213675Sdyson 1473232055Skmacy if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) { 147483805Sjhb selrecord(td, &wpipe->pipe_sel); 1475174647Sjeff if (SEL_WAITING(&wpipe->pipe_sel)) 1476174647Sjeff wpipe->pipe_state |= PIPE_SEL; 147713907Sdyson } 147813675Sdyson } 1479101768Srwatson#ifdef MAC 1480101768Srwatsonlocked_error: 1481101768Srwatson#endif 148291362Salfred PIPE_UNLOCK(rpipe); 148329356Speter 148429356Speter return (revents); 148513675Sdyson} 148613675Sdyson 148798989Salfred/* 148898989Salfred * We shouldn't need locks here as we're doing a read and this should 148998989Salfred * be a natural race. 149098989Salfred */ 149152983Speterstatic int 1492101983Srwatsonpipe_stat(fp, ub, active_cred, td) 149352983Speter struct file *fp; 149452983Speter struct stat *ub; 1495101983Srwatson struct ucred *active_cred; 149683366Sjulian struct thread *td; 149713675Sdyson{ 1498228306Skib struct pipe *pipe; 1499228306Skib int new_unr; 1500101768Srwatson#ifdef MAC 1501101768Srwatson int error; 1502228306Skib#endif 150352983Speter 1504228306Skib pipe = fp->f_data; 1505104269Srwatson PIPE_LOCK(pipe); 1506228306Skib#ifdef MAC 1507172930Srwatson error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); 1508228306Skib if (error) { 1509228306Skib PIPE_UNLOCK(pipe); 1510101768Srwatson return (error); 1511228306Skib } 1512101768Srwatson#endif 1513232055Skmacy 1514232055Skmacy /* For named pipes ask the underlying filesystem. */ 1515232055Skmacy if (pipe->pipe_state & PIPE_NAMED) { 1516232055Skmacy PIPE_UNLOCK(pipe); 1517232055Skmacy return (vnops.fo_stat(fp, ub, active_cred, td)); 1518232055Skmacy } 1519232055Skmacy 1520228306Skib /* 1521228306Skib * Lazily allocate an inode number for the pipe. Most pipe 1522228306Skib * users do not call fstat(2) on the pipe, which means that 1523228306Skib * postponing the inode allocation until it is must be 1524228306Skib * returned to userland is useful. If alloc_unr failed, 1525228306Skib * assign st_ino zero instead of returning an error. 1526228306Skib * Special pipe_ino values: 1527228306Skib * -1 - not yet initialized; 1528228306Skib * 0 - alloc_unr failed, return 0 as st_ino forever. 1529228306Skib */ 1530228306Skib if (pipe->pipe_ino == (ino_t)-1) { 1531228306Skib new_unr = alloc_unr(pipeino_unr); 1532228306Skib if (new_unr != -1) 1533228306Skib pipe->pipe_ino = new_unr; 1534228306Skib else 1535228306Skib pipe->pipe_ino = 0; 1536228306Skib } 1537228306Skib PIPE_UNLOCK(pipe); 1538228306Skib 1539100527Salfred bzero(ub, sizeof(*ub)); 154017124Sbde ub->st_mode = S_IFIFO; 1541133790Ssilby ub->st_blksize = PAGE_SIZE; 1542132436Ssilby if (pipe->pipe_state & PIPE_DIRECTW) 1543132436Ssilby ub->st_size = pipe->pipe_map.cnt; 1544132436Ssilby else 1545132436Ssilby ub->st_size = pipe->pipe_buffer.cnt; 154613675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1547205792Sed ub->st_atim = pipe->pipe_atime; 1548205792Sed ub->st_mtim = pipe->pipe_mtime; 1549205792Sed ub->st_ctim = pipe->pipe_ctime; 155060404Schris ub->st_uid = fp->f_cred->cr_uid; 155160404Schris ub->st_gid = fp->f_cred->cr_gid; 1552226042Skib ub->st_dev = pipedev_ino; 1553226042Skib ub->st_ino = pipe->pipe_ino; 155417124Sbde /* 1555226042Skib * Left as 0: st_nlink, st_rdev, st_flags, st_gen. 155617124Sbde */ 155776760Salfred return (0); 155813675Sdyson} 155913675Sdyson 156013675Sdyson/* ARGSUSED */ 156113675Sdysonstatic int 156283366Sjulianpipe_close(fp, td) 156313675Sdyson struct file *fp; 156483366Sjulian struct thread *td; 156513675Sdyson{ 156616322Sgpalmer 1567232055Skmacy if (fp->f_vnode != NULL) 1568232055Skmacy return vnops.fo_close(fp, td); 156949413Sgreen fp->f_ops = &badfileops; 1570232055Skmacy pipe_dtor(fp->f_data); 1571109153Sdillon fp->f_data = NULL; 157276760Salfred return (0); 157313675Sdyson} 157413675Sdyson 1575232183Sjillesstatic int 1576232271Sdimpipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) 1577232183Sjilles{ 1578232183Sjilles struct pipe *cpipe; 1579232183Sjilles int error; 1580232183Sjilles 1581232183Sjilles cpipe = fp->f_data; 1582232183Sjilles if (cpipe->pipe_state & PIPE_NAMED) 1583232183Sjilles error = vn_chmod(fp, mode, active_cred, td); 1584232183Sjilles else 1585232183Sjilles error = invfo_chmod(fp, mode, active_cred, td); 1586232183Sjilles return (error); 1587232183Sjilles} 1588232183Sjilles 1589232183Sjillesstatic int 1590232183Sjillespipe_chown(fp, uid, gid, active_cred, td) 1591232183Sjilles struct file *fp; 1592232183Sjilles uid_t uid; 1593232183Sjilles gid_t gid; 1594232183Sjilles struct ucred *active_cred; 1595232183Sjilles struct thread *td; 1596232183Sjilles{ 1597232183Sjilles struct pipe *cpipe; 1598232183Sjilles int error; 1599232183Sjilles 1600232183Sjilles cpipe = fp->f_data; 1601232183Sjilles if (cpipe->pipe_state & PIPE_NAMED) 1602232183Sjilles error = vn_chown(fp, uid, gid, active_cred, td); 1603232183Sjilles else 1604232183Sjilles error = invfo_chown(fp, uid, gid, active_cred, td); 1605232183Sjilles return (error); 1606232183Sjilles} 1607232183Sjilles 160876364Salfredstatic void 160976364Salfredpipe_free_kmem(cpipe) 161076364Salfred struct pipe *cpipe; 161176364Salfred{ 161291412Salfred 1613125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), 1614125293Srwatson ("pipe_free_kmem: pipe mutex locked")); 161576364Salfred 161676364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 1617189649Sjhb atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size); 1618118764Ssilby vm_map_remove(pipe_map, 1619118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer, 1620118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 162176364Salfred cpipe->pipe_buffer.buffer = NULL; 162276364Salfred } 162376364Salfred#ifndef PIPE_NODIRECT 1624127501Salc { 162576364Salfred cpipe->pipe_map.cnt = 0; 162676364Salfred cpipe->pipe_map.pos = 0; 162776364Salfred cpipe->pipe_map.npages = 0; 162876364Salfred } 162976364Salfred#endif 163076364Salfred} 163176364Salfred 163213675Sdyson/* 163313675Sdyson * shutdown the pipe 163413675Sdyson */ 163513675Sdysonstatic void 163613675Sdysonpipeclose(cpipe) 163713675Sdyson struct pipe *cpipe; 163813675Sdyson{ 1639125293Srwatson struct pipepair *pp; 164013907Sdyson struct pipe *ppipe; 164176364Salfred 1642125293Srwatson KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); 164391968Salfred 1644125293Srwatson PIPE_LOCK(cpipe); 1645133049Ssilby pipelock(cpipe, 0); 1646125293Srwatson pp = cpipe->pipe_pair; 164791968Salfred 164891968Salfred pipeselwakeup(cpipe); 164913907Sdyson 165091968Salfred /* 165191968Salfred * If the other side is blocked, wake it up saying that 165291968Salfred * we want to close it down. 165391968Salfred */ 1654126131Sgreen cpipe->pipe_state |= PIPE_EOF; 165591968Salfred while (cpipe->pipe_busy) { 165691968Salfred wakeup(cpipe); 1657126131Sgreen cpipe->pipe_state |= PIPE_WANT; 1658133049Ssilby pipeunlock(cpipe); 165991968Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1660133049Ssilby pipelock(cpipe, 0); 166191968Salfred } 166213675Sdyson 1663101768Srwatson 166491968Salfred /* 1665125293Srwatson * Disconnect from peer, if any. 166691968Salfred */ 1667125293Srwatson ppipe = cpipe->pipe_peer; 1668179243Skib if (ppipe->pipe_present == PIPE_ACTIVE) { 166991968Salfred pipeselwakeup(ppipe); 167013907Sdyson 167191968Salfred ppipe->pipe_state |= PIPE_EOF; 167291968Salfred wakeup(ppipe); 1673133741Sjmg KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0); 167491968Salfred } 1675125293Srwatson 167691968Salfred /* 1677125293Srwatson * Mark this endpoint as free. Release kmem resources. We 1678125293Srwatson * don't mark this endpoint as unused until we've finished 1679125293Srwatson * doing that, or the pipe might disappear out from under 1680125293Srwatson * us. 168191968Salfred */ 1682125293Srwatson PIPE_UNLOCK(cpipe); 1683125293Srwatson pipe_free_kmem(cpipe); 1684125293Srwatson PIPE_LOCK(cpipe); 1685179243Skib cpipe->pipe_present = PIPE_CLOSING; 1686126131Sgreen pipeunlock(cpipe); 1687179243Skib 1688179243Skib /* 1689179243Skib * knlist_clear() may sleep dropping the PIPE_MTX. Set the 1690179243Skib * PIPE_FINALIZED, that allows other end to free the 1691179243Skib * pipe_pair, only after the knotes are completely dismantled. 1692179243Skib */ 1693133741Sjmg knlist_clear(&cpipe->pipe_sel.si_note, 1); 1694179243Skib cpipe->pipe_present = PIPE_FINALIZED; 1695225177Sattilio seldrain(&cpipe->pipe_sel); 1696133741Sjmg knlist_destroy(&cpipe->pipe_sel.si_note); 1697125293Srwatson 1698125293Srwatson /* 1699125293Srwatson * If both endpoints are now closed, release the memory for the 1700125293Srwatson * pipe pair. If not, unlock. 1701125293Srwatson */ 1702179243Skib if (ppipe->pipe_present == PIPE_FINALIZED) { 170391968Salfred PIPE_UNLOCK(cpipe); 1704125293Srwatson#ifdef MAC 1705172930Srwatson mac_pipe_destroy(pp); 1706125293Srwatson#endif 1707125293Srwatson uma_zfree(pipe_zone, cpipe->pipe_pair); 1708125293Srwatson } else 1709125293Srwatson PIPE_UNLOCK(cpipe); 171013675Sdyson} 171159288Sjlemon 171272521Sjlemon/*ARGSUSED*/ 171359288Sjlemonstatic int 171472521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 171559288Sjlemon{ 171689306Salfred struct pipe *cpipe; 171759288Sjlemon 1718232055Skmacy /* 1719232055Skmacy * If a filter is requested that is not supported by this file 1720232055Skmacy * descriptor, don't return an error, but also don't ever generate an 1721232055Skmacy * event. 1722232055Skmacy */ 1723232055Skmacy if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) { 1724232055Skmacy kn->kn_fop = &pipe_nfiltops; 1725232055Skmacy return (0); 1726232055Skmacy } 1727232055Skmacy if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) { 1728232055Skmacy kn->kn_fop = &pipe_nfiltops; 1729232055Skmacy return (0); 1730232055Skmacy } 1731232055Skmacy cpipe = fp->f_data; 1732126131Sgreen PIPE_LOCK(cpipe); 173372521Sjlemon switch (kn->kn_filter) { 173472521Sjlemon case EVFILT_READ: 173572521Sjlemon kn->kn_fop = &pipe_rfiltops; 173672521Sjlemon break; 173772521Sjlemon case EVFILT_WRITE: 173872521Sjlemon kn->kn_fop = &pipe_wfiltops; 1739179243Skib if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) { 1740101382Sdes /* other end of pipe has been closed */ 1741126131Sgreen PIPE_UNLOCK(cpipe); 1742118929Sjmg return (EPIPE); 1743126131Sgreen } 1744232055Skmacy cpipe = PIPE_PEER(cpipe); 174572521Sjlemon break; 174672521Sjlemon default: 1747126131Sgreen PIPE_UNLOCK(cpipe); 1748133741Sjmg return (EINVAL); 174972521Sjlemon } 175078292Sjlemon 1751232055Skmacy kn->kn_hook = cpipe; 1752133741Sjmg knlist_add(&cpipe->pipe_sel.si_note, kn, 1); 175391372Salfred PIPE_UNLOCK(cpipe); 175459288Sjlemon return (0); 175559288Sjlemon} 175659288Sjlemon 175759288Sjlemonstatic void 175859288Sjlemonfilt_pipedetach(struct knote *kn) 175959288Sjlemon{ 1760232055Skmacy struct pipe *cpipe = kn->kn_hook; 176159288Sjlemon 1762126131Sgreen PIPE_LOCK(cpipe); 1763133741Sjmg knlist_remove(&cpipe->pipe_sel.si_note, kn, 1); 176491372Salfred PIPE_UNLOCK(cpipe); 176559288Sjlemon} 176659288Sjlemon 176759288Sjlemon/*ARGSUSED*/ 176859288Sjlemonstatic int 176959288Sjlemonfilt_piperead(struct knote *kn, long hint) 177059288Sjlemon{ 1771232055Skmacy struct pipe *rpipe = kn->kn_hook; 177259288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 1773133741Sjmg int ret; 177459288Sjlemon 1775276353Skib PIPE_LOCK_ASSERT(rpipe, MA_OWNED); 177659288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 177759288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 177859288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 177959288Sjlemon 178059288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 1781179243Skib wpipe->pipe_present != PIPE_ACTIVE || 1782179243Skib (wpipe->pipe_state & PIPE_EOF)) { 178391372Salfred kn->kn_flags |= EV_EOF; 178459288Sjlemon return (1); 178559288Sjlemon } 1786133741Sjmg ret = kn->kn_data > 0; 1787133741Sjmg return ret; 178859288Sjlemon} 178959288Sjlemon 179059288Sjlemon/*ARGSUSED*/ 179159288Sjlemonstatic int 179259288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 179359288Sjlemon{ 1794232055Skmacy struct pipe *wpipe; 1795232055Skmacy 1796232055Skmacy wpipe = kn->kn_hook; 1797276353Skib PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 1798179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1799179243Skib (wpipe->pipe_state & PIPE_EOF)) { 180059288Sjlemon kn->kn_data = 0; 1801124394Sdes kn->kn_flags |= EV_EOF; 180259288Sjlemon return (1); 180359288Sjlemon } 1804228510Sjilles kn->kn_data = (wpipe->pipe_buffer.size > 0) ? 1805228510Sjilles (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) : PIPE_BUF; 180665855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 180759288Sjlemon kn->kn_data = 0; 180859288Sjlemon 180959288Sjlemon return (kn->kn_data >= PIPE_BUF); 181059288Sjlemon} 1811232055Skmacy 1812232055Skmacystatic void 1813232055Skmacyfilt_pipedetach_notsup(struct knote *kn) 1814232055Skmacy{ 1815232055Skmacy 1816232055Skmacy} 1817232055Skmacy 1818232055Skmacystatic int 1819232055Skmacyfilt_pipenotsup(struct knote *kn, long hint) 1820232055Skmacy{ 1821232055Skmacy 1822232055Skmacy return (0); 1823232055Skmacy} 1824