1139804Simp/*- 213675Sdyson * Copyright (c) 1996 John S. Dyson 3232055Skmacy * Copyright (c) 2012 Giovanni Trematerra 413675Sdyson * All rights reserved. 513675Sdyson * 613675Sdyson * Redistribution and use in source and binary forms, with or without 713675Sdyson * modification, are permitted provided that the following conditions 813675Sdyson * are met: 913675Sdyson * 1. Redistributions of source code must retain the above copyright 1013675Sdyson * notice immediately at the beginning of the file, without modification, 1113675Sdyson * this list of conditions, and the following disclaimer. 1213675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1313675Sdyson * notice, this list of conditions and the following disclaimer in the 1413675Sdyson * documentation and/or other materials provided with the distribution. 1513675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1613675Sdyson * John S. Dyson. 1714037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1813675Sdyson * are met. 1913675Sdyson */ 2013675Sdyson 2113675Sdyson/* 2213675Sdyson * This file contains a high-performance replacement for the socket-based 2313675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2413675Sdyson * all features of sockets, but does do everything that pipes normally 2513675Sdyson * do. 2613675Sdyson */ 2713675Sdyson 2813907Sdyson/* 2913907Sdyson * This code has two modes of operation, a small write mode and a large 3013907Sdyson * write mode. The small write mode acts like conventional pipes with 3113907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3213907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 33219801Salc * and PIPE_SIZE in size, the sending process pins the underlying pages in 34219801Salc * memory, and the receiving process copies directly from these pinned pages 35219801Salc * in the sending process. 3613907Sdyson * 3713907Sdyson * If the sending process receives a signal, it is possible that it will 3813913Sdyson * go away, and certainly its address space can change, because control 3913907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4013907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4113907Sdyson * kernel buffer, and the receiving process will grab the data from the 4213907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4313907Sdyson * the copy operation is normally eliminated. 4413907Sdyson * 4513907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4613907Sdyson * happen for small transfers so that the system will not spend all of 47118764Ssilby * its time context switching. 48117325Ssilby * 49118764Ssilby * In order to limit the resource use of pipes, two sysctls exist: 50117325Ssilby * 51118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 52133790Ssilby * address space available to us in pipe_map. This value is normally 53133790Ssilby * autotuned, but may also be loader tuned. 54117325Ssilby * 55133790Ssilby * kern.ipc.pipekva - This read-only sysctl tracks the current amount of 56133790Ssilby * memory in use by pipes. 57117325Ssilby * 58133790Ssilby * Based on how large pipekva is relative to maxpipekva, the following 59133790Ssilby * will happen: 60117325Ssilby * 61133790Ssilby * 0% - 50%: 62133790Ssilby * New pipes are given 16K of memory backing, pipes may dynamically 63133790Ssilby * grow to as large as 64K where needed. 64133790Ssilby * 50% - 75%: 65133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 66133790Ssilby * existing pipes may NOT grow. 67133790Ssilby * 75% - 100%: 68133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 69133790Ssilby * existing pipes will be shrunk down to 4K whenever possible. 70133049Ssilby * 71133790Ssilby * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0. If 72133790Ssilby * that is set, the only resize that will occur is the 0 -> SMALL_PIPE_SIZE 73133790Ssilby * resize which MUST occur for reverse-direction pipes when they are 74133790Ssilby * first used. 75133790Ssilby * 76133790Ssilby * Additional information about the current state of pipes may be obtained 77133790Ssilby * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail, 78133790Ssilby * and kern.ipc.piperesizefail. 79133790Ssilby * 80133049Ssilby * Locking rules: There are two locks present here: A mutex, used via 81133049Ssilby * PIPE_LOCK, and a flag, used via pipelock(). All locking is done via 82133049Ssilby * the flag, as mutexes can not persist over uiomove. The mutex 83133049Ssilby * exists only to guard access to the flag, and is not in itself a 84133790Ssilby * locking mechanism. Also note that there is only a single mutex for 85133790Ssilby * both directions of a pipe. 86133049Ssilby * 87133049Ssilby * As pipelock() may have to sleep before it can acquire the flag, it 88133049Ssilby * is important to reread all data after a call to pipelock(); everything 89133049Ssilby * in the structure may have changed. 9013907Sdyson */ 9113907Sdyson 92116182Sobrien#include <sys/cdefs.h> 93116182Sobrien__FBSDID("$FreeBSD$"); 94116182Sobrien 9513675Sdyson#include <sys/param.h> 9613675Sdyson#include <sys/systm.h> 97226042Skib#include <sys/conf.h> 9824131Sbde#include <sys/fcntl.h> 9913675Sdyson#include <sys/file.h> 10013675Sdyson#include <sys/filedesc.h> 10124206Sbde#include <sys/filio.h> 10291372Salfred#include <sys/kernel.h> 10376166Smarkm#include <sys/lock.h> 10476827Salfred#include <sys/mutex.h> 10524206Sbde#include <sys/ttycom.h> 10613675Sdyson#include <sys/stat.h> 10791968Salfred#include <sys/malloc.h> 10829356Speter#include <sys/poll.h> 10970834Swollman#include <sys/selinfo.h> 11013675Sdyson#include <sys/signalvar.h> 111184849Sed#include <sys/syscallsubr.h> 112117325Ssilby#include <sys/sysctl.h> 11313675Sdyson#include <sys/sysproto.h> 11413675Sdyson#include <sys/pipe.h> 11576166Smarkm#include <sys/proc.h> 11655112Sbde#include <sys/vnode.h> 11734924Sbde#include <sys/uio.h> 11859288Sjlemon#include <sys/event.h> 11913675Sdyson 120163606Srwatson#include <security/mac/mac_framework.h> 121163606Srwatson 12213675Sdyson#include <vm/vm.h> 12313675Sdyson#include <vm/vm_param.h> 12413675Sdyson#include <vm/vm_object.h> 12513675Sdyson#include <vm/vm_kern.h> 12613675Sdyson#include <vm/vm_extern.h> 12713675Sdyson#include <vm/pmap.h> 12813675Sdyson#include <vm/vm_map.h> 12913907Sdyson#include <vm/vm_page.h> 13092751Sjeff#include <vm/uma.h> 13113675Sdyson 13214037Sdyson/* 13314037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 13414037Sdyson * approx 30% decrease in transfer rate. This could be useful for 13514037Sdyson * NetBSD or OpenBSD. 13614037Sdyson */ 13714037Sdyson/* #define PIPE_NODIRECT */ 13814037Sdyson 139232055Skmacy#define PIPE_PEER(pipe) \ 140232055Skmacy (((pipe)->pipe_state & PIPE_NAMED) ? (pipe) : ((pipe)->pipe_peer)) 141232055Skmacy 14214037Sdyson/* 14314037Sdyson * interfaces to the outside world 14414037Sdyson */ 145108255Sphkstatic fo_rdwr_t pipe_read; 146108255Sphkstatic fo_rdwr_t pipe_write; 147175140Sjhbstatic fo_truncate_t pipe_truncate; 148108255Sphkstatic fo_ioctl_t pipe_ioctl; 149108255Sphkstatic fo_poll_t pipe_poll; 150108255Sphkstatic fo_kqfilter_t pipe_kqfilter; 151108255Sphkstatic fo_stat_t pipe_stat; 152108255Sphkstatic fo_close_t pipe_close; 153232183Sjillesstatic fo_chmod_t pipe_chmod; 154232183Sjillesstatic fo_chown_t pipe_chown; 15513675Sdyson 156232055Skmacystruct fileops pipeops = { 157116546Sphk .fo_read = pipe_read, 158116546Sphk .fo_write = pipe_write, 159175140Sjhb .fo_truncate = pipe_truncate, 160116546Sphk .fo_ioctl = pipe_ioctl, 161116546Sphk .fo_poll = pipe_poll, 162116546Sphk .fo_kqfilter = pipe_kqfilter, 163116546Sphk .fo_stat = pipe_stat, 164116546Sphk .fo_close = pipe_close, 165232183Sjilles .fo_chmod = pipe_chmod, 166232183Sjilles .fo_chown = pipe_chown, 167254356Sglebius .fo_sendfile = invfo_sendfile, 168116546Sphk .fo_flags = DFLAG_PASSABLE 16972521Sjlemon}; 17013675Sdyson 17159288Sjlemonstatic void filt_pipedetach(struct knote *kn); 172232055Skmacystatic void filt_pipedetach_notsup(struct knote *kn); 173232055Skmacystatic int filt_pipenotsup(struct knote *kn, long hint); 17459288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 17559288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 17659288Sjlemon 177232055Skmacystatic struct filterops pipe_nfiltops = { 178232055Skmacy .f_isfd = 1, 179232055Skmacy .f_detach = filt_pipedetach_notsup, 180232055Skmacy .f_event = filt_pipenotsup 181232055Skmacy}; 182197134Srwatsonstatic struct filterops pipe_rfiltops = { 183197134Srwatson .f_isfd = 1, 184197134Srwatson .f_detach = filt_pipedetach, 185197134Srwatson .f_event = filt_piperead 186197134Srwatson}; 187197134Srwatsonstatic struct filterops pipe_wfiltops = { 188197134Srwatson .f_isfd = 1, 189197134Srwatson .f_detach = filt_pipedetach, 190197134Srwatson .f_event = filt_pipewrite 191197134Srwatson}; 19259288Sjlemon 19313675Sdyson/* 19413675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 19513675Sdyson * space is pageable. The pipe code will try to maintain locality of 19613675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 19713675Sdyson * will not wipe the cache. 19813675Sdyson */ 19913907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 20013907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 20113675Sdyson 202189649Sjhbstatic long amountpipekva; 203133790Ssilbystatic int pipefragretry; 204133790Ssilbystatic int pipeallocfail; 205133790Ssilbystatic int piperesizefail; 206133790Ssilbystatic int piperesizeallowed = 1; 20713907Sdyson 208189649SjhbSYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN, 209117325Ssilby &maxpipekva, 0, "Pipe KVA limit"); 210189649SjhbSYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 211117325Ssilby &amountpipekva, 0, "Pipe KVA usage"); 212133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD, 213133790Ssilby &pipefragretry, 0, "Pipe allocation retries due to fragmentation"); 214133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD, 215133790Ssilby &pipeallocfail, 0, "Pipe allocation failures"); 216133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD, 217133790Ssilby &piperesizefail, 0, "Pipe resize failures"); 218133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, 219133790Ssilby &piperesizeallowed, 0, "Pipe resizing allowed"); 220117325Ssilby 22191413Salfredstatic void pipeinit(void *dummy __unused); 22291413Salfredstatic void pipeclose(struct pipe *cpipe); 22391413Salfredstatic void pipe_free_kmem(struct pipe *cpipe); 224133790Ssilbystatic int pipe_create(struct pipe *pipe, int backing); 225232055Skmacystatic int pipe_paircreate(struct thread *td, struct pipepair **p_pp); 22691413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch); 22791413Salfredstatic __inline void pipeunlock(struct pipe *cpipe); 22814037Sdyson#ifndef PIPE_NODIRECT 22991413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 23091413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 23191413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 23291413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe); 23314037Sdyson#endif 23491413Salfredstatic int pipespace(struct pipe *cpipe, int size); 235132579Srwatsonstatic int pipespace_new(struct pipe *cpipe, int size); 23613675Sdyson 237132987Sgreenstatic int pipe_zone_ctor(void *mem, int size, void *arg, int flags); 238132987Sgreenstatic int pipe_zone_init(void *mem, int size, int flags); 239125293Srwatsonstatic void pipe_zone_fini(void *mem, int size); 240125293Srwatson 24192751Sjeffstatic uma_zone_t pipe_zone; 242226042Skibstatic struct unrhdr *pipeino_unr; 243226042Skibstatic dev_t pipedev_ino; 24427899Sdyson 24591372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 24691372Salfred 24791372Salfredstatic void 24891372Salfredpipeinit(void *dummy __unused) 24991372Salfred{ 250118880Salc 251170022Srwatson pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair), 252170022Srwatson pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, 253125293Srwatson UMA_ALIGN_PTR, 0); 254118880Salc KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 255226042Skib pipeino_unr = new_unrhdr(1, INT32_MAX, NULL); 256226042Skib KASSERT(pipeino_unr != NULL, ("pipe fake inodes not initialized")); 257226042Skib pipedev_ino = devfs_alloc_cdp_inode(); 258226042Skib KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized")); 25991372Salfred} 26091372Salfred 261132987Sgreenstatic int 262132987Sgreenpipe_zone_ctor(void *mem, int size, void *arg, int flags) 263125293Srwatson{ 264125293Srwatson struct pipepair *pp; 265125293Srwatson struct pipe *rpipe, *wpipe; 266125293Srwatson 267125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); 268125293Srwatson 269125293Srwatson pp = (struct pipepair *)mem; 270125293Srwatson 271125293Srwatson /* 272125293Srwatson * We zero both pipe endpoints to make sure all the kmem pointers 273125293Srwatson * are NULL, flag fields are zero'd, etc. We timestamp both 274125293Srwatson * endpoints with the same time. 275125293Srwatson */ 276125293Srwatson rpipe = &pp->pp_rpipe; 277125293Srwatson bzero(rpipe, sizeof(*rpipe)); 278125293Srwatson vfs_timestamp(&rpipe->pipe_ctime); 279125293Srwatson rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; 280125293Srwatson 281125293Srwatson wpipe = &pp->pp_wpipe; 282125293Srwatson bzero(wpipe, sizeof(*wpipe)); 283125293Srwatson wpipe->pipe_ctime = rpipe->pipe_ctime; 284125293Srwatson wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; 285125293Srwatson 286125293Srwatson rpipe->pipe_peer = wpipe; 287125293Srwatson rpipe->pipe_pair = pp; 288125293Srwatson wpipe->pipe_peer = rpipe; 289125293Srwatson wpipe->pipe_pair = pp; 290125293Srwatson 291125293Srwatson /* 292125293Srwatson * Mark both endpoints as present; they will later get free'd 293125293Srwatson * one at a time. When both are free'd, then the whole pair 294125293Srwatson * is released. 295125293Srwatson */ 296179243Skib rpipe->pipe_present = PIPE_ACTIVE; 297179243Skib wpipe->pipe_present = PIPE_ACTIVE; 298125293Srwatson 299125293Srwatson /* 300125293Srwatson * Eventually, the MAC Framework may initialize the label 301125293Srwatson * in ctor or init, but for now we do it elswhere to avoid 302125293Srwatson * blocking in ctor or init. 303125293Srwatson */ 304125293Srwatson pp->pp_label = NULL; 305125293Srwatson 306132987Sgreen return (0); 307125293Srwatson} 308125293Srwatson 309132987Sgreenstatic int 310132987Sgreenpipe_zone_init(void *mem, int size, int flags) 311125293Srwatson{ 312125293Srwatson struct pipepair *pp; 313125293Srwatson 314125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); 315125293Srwatson 316125293Srwatson pp = (struct pipepair *)mem; 317125293Srwatson 318125293Srwatson mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 319132987Sgreen return (0); 320125293Srwatson} 321125293Srwatson 322125293Srwatsonstatic void 323125293Srwatsonpipe_zone_fini(void *mem, int size) 324125293Srwatson{ 325125293Srwatson struct pipepair *pp; 326125293Srwatson 327125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); 328125293Srwatson 329125293Srwatson pp = (struct pipepair *)mem; 330125293Srwatson 331125293Srwatson mtx_destroy(&pp->pp_mtx); 332125293Srwatson} 333125293Srwatson 334232055Skmacystatic int 335232055Skmacypipe_paircreate(struct thread *td, struct pipepair **p_pp) 33613675Sdyson{ 337125293Srwatson struct pipepair *pp; 33813675Sdyson struct pipe *rpipe, *wpipe; 339232055Skmacy int error; 34027899Sdyson 341232055Skmacy *p_pp = pp = uma_zalloc(pipe_zone, M_WAITOK); 342125293Srwatson#ifdef MAC 343125293Srwatson /* 344126249Srwatson * The MAC label is shared between the connected endpoints. As a 345172930Srwatson * result mac_pipe_init() and mac_pipe_create() are called once 346126249Srwatson * for the pair, and not on the endpoints. 347125293Srwatson */ 348172930Srwatson mac_pipe_init(pp); 349172930Srwatson mac_pipe_create(td->td_ucred, pp); 350125293Srwatson#endif 351125293Srwatson rpipe = &pp->pp_rpipe; 352125293Srwatson wpipe = &pp->pp_wpipe; 353125293Srwatson 354193951Skib knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe)); 355193951Skib knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe)); 356140369Ssilby 357133790Ssilby /* Only the forward direction pipe is backed by default */ 358155035Sglebius if ((error = pipe_create(rpipe, 1)) != 0 || 359155035Sglebius (error = pipe_create(wpipe, 0)) != 0) { 360124394Sdes pipeclose(rpipe); 361124394Sdes pipeclose(wpipe); 362155035Sglebius return (error); 36376364Salfred } 364124394Sdes 36513907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 36613907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 367232055Skmacy return (0); 368232055Skmacy} 36913675Sdyson 370232055Skmacyint 371232055Skmacypipe_named_ctor(struct pipe **ppipe, struct thread *td) 372232055Skmacy{ 373232055Skmacy struct pipepair *pp; 374232055Skmacy int error; 375232055Skmacy 376232055Skmacy error = pipe_paircreate(td, &pp); 377232055Skmacy if (error != 0) 378232055Skmacy return (error); 379232055Skmacy pp->pp_rpipe.pipe_state |= PIPE_NAMED; 380232055Skmacy *ppipe = &pp->pp_rpipe; 381232055Skmacy return (0); 382232055Skmacy} 383232055Skmacy 384232055Skmacyvoid 385232055Skmacypipe_dtor(struct pipe *dpipe) 386232055Skmacy{ 387232055Skmacy ino_t ino; 388232055Skmacy 389232055Skmacy ino = dpipe->pipe_ino; 390232055Skmacy funsetown(&dpipe->pipe_sigio); 391232055Skmacy pipeclose(dpipe); 392232055Skmacy if (dpipe->pipe_state & PIPE_NAMED) { 393232055Skmacy dpipe = dpipe->pipe_peer; 394232055Skmacy funsetown(&dpipe->pipe_sigio); 395232055Skmacy pipeclose(dpipe); 396232055Skmacy } 397232055Skmacy if (ino != 0 && ino != (ino_t)-1) 398232055Skmacy free_unr(pipeino_unr, ino); 399232055Skmacy} 400232055Skmacy 401232055Skmacy/* 402232055Skmacy * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let 403232055Skmacy * the zone pick up the pieces via pipeclose(). 404232055Skmacy */ 405232055Skmacyint 406232055Skmacykern_pipe(struct thread *td, int fildes[2]) 407232055Skmacy{ 408234352Sjkim 409248951Sjilles return (kern_pipe2(td, fildes, 0)); 410234352Sjkim} 411234352Sjkim 412234352Sjkimint 413248951Sjilleskern_pipe2(struct thread *td, int fildes[2], int flags) 414234352Sjkim{ 415232055Skmacy struct filedesc *fdp; 416232055Skmacy struct file *rf, *wf; 417232055Skmacy struct pipe *rpipe, *wpipe; 418232055Skmacy struct pipepair *pp; 419234352Sjkim int fd, fflags, error; 420232055Skmacy 421232055Skmacy fdp = td->td_proc->p_fd; 422232055Skmacy error = pipe_paircreate(td, &pp); 423232055Skmacy if (error != 0) 424232055Skmacy return (error); 425232055Skmacy rpipe = &pp->pp_rpipe; 426232055Skmacy wpipe = &pp->pp_wpipe; 427234352Sjkim error = falloc(td, &rf, &fd, flags); 42870915Sdwmalone if (error) { 42970915Sdwmalone pipeclose(rpipe); 43070915Sdwmalone pipeclose(wpipe); 43170915Sdwmalone return (error); 43270915Sdwmalone } 433121256Sdwmalone /* An extra reference on `rf' has been held for us by falloc(). */ 434184849Sed fildes[0] = fd; 43570915Sdwmalone 436234352Sjkim fflags = FREAD | FWRITE; 437234352Sjkim if ((flags & O_NONBLOCK) != 0) 438234352Sjkim fflags |= FNONBLOCK; 439234352Sjkim 44070803Sdwmalone /* 44170803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 44270803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 44370803Sdwmalone * to avoid races against processes which manage to dup() the read 44470803Sdwmalone * side while we are blocked trying to allocate the write side. 44570803Sdwmalone */ 446234352Sjkim finit(rf, fflags, DTYPE_PIPE, rpipe, &pipeops); 447234352Sjkim error = falloc(td, &wf, &fd, flags); 44870915Sdwmalone if (error) { 449184849Sed fdclose(fdp, rf, fildes[0], td); 45083366Sjulian fdrop(rf, td); 45170915Sdwmalone /* rpipe has been closed by fdrop(). */ 45270915Sdwmalone pipeclose(wpipe); 45370915Sdwmalone return (error); 45470915Sdwmalone } 455121256Sdwmalone /* An extra reference on `wf' has been held for us by falloc(). */ 456234352Sjkim finit(wf, fflags, DTYPE_PIPE, wpipe, &pipeops); 457121256Sdwmalone fdrop(wf, td); 458184849Sed fildes[1] = fd; 45983366Sjulian fdrop(rf, td); 46013675Sdyson 46113675Sdyson return (0); 46213675Sdyson} 46313675Sdyson 464184849Sed/* ARGSUSED */ 465184849Sedint 466225617Skmacysys_pipe(struct thread *td, struct pipe_args *uap) 467184849Sed{ 468184849Sed int error; 469184849Sed int fildes[2]; 470184849Sed 471184849Sed error = kern_pipe(td, fildes); 472184849Sed if (error) 473184849Sed return (error); 474246907Spjd 475184849Sed td->td_retval[0] = fildes[0]; 476184849Sed td->td_retval[1] = fildes[1]; 477184849Sed 478184849Sed return (0); 479184849Sed} 480184849Sed 481250159Sjillesint 482250159Sjillessys_pipe2(struct thread *td, struct pipe2_args *uap) 483250159Sjilles{ 484250159Sjilles int error, fildes[2]; 485250159Sjilles 486250159Sjilles if (uap->flags & ~(O_CLOEXEC | O_NONBLOCK)) 487250159Sjilles return (EINVAL); 488250159Sjilles error = kern_pipe2(td, fildes, uap->flags); 489250159Sjilles if (error) 490250159Sjilles return (error); 491250159Sjilles error = copyout(fildes, uap->fildes, 2 * sizeof(int)); 492250159Sjilles if (error) { 493250159Sjilles (void)kern_close(td, fildes[0]); 494250159Sjilles (void)kern_close(td, fildes[1]); 495250159Sjilles } 496250159Sjilles return (error); 497250159Sjilles} 498250159Sjilles 49913909Sdyson/* 50013909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 50176364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 50276364Salfred * it will retain the old buffer. 50376364Salfred * If it fails it will return ENOMEM. 50413909Sdyson */ 50576364Salfredstatic int 506132579Srwatsonpipespace_new(cpipe, size) 50713675Sdyson struct pipe *cpipe; 50876364Salfred int size; 50913675Sdyson{ 51076364Salfred caddr_t buffer; 511133790Ssilby int error, cnt, firstseg; 512117325Ssilby static int curfail = 0; 513117325Ssilby static struct timeval lastfail; 51413675Sdyson 515125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); 516133790Ssilby KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW), 517133790Ssilby ("pipespace: resize of direct writes not allowed")); 518133790Ssilbyretry: 519133790Ssilby cnt = cpipe->pipe_buffer.cnt; 520133790Ssilby if (cnt > size) 521133790Ssilby size = cnt; 52279224Sdillon 523118764Ssilby size = round_page(size); 524118764Ssilby buffer = (caddr_t) vm_map_min(pipe_map); 52513675Sdyson 526122163Salc error = vm_map_find(pipe_map, NULL, 0, 527255426Sjhb (vm_offset_t *) &buffer, size, 0, VMFS_ANY_SPACE, 52813688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 52976364Salfred if (error != KERN_SUCCESS) { 530133790Ssilby if ((cpipe->pipe_buffer.buffer == NULL) && 531133790Ssilby (size > SMALL_PIPE_SIZE)) { 532133790Ssilby size = SMALL_PIPE_SIZE; 533133790Ssilby pipefragretry++; 534133790Ssilby goto retry; 535133790Ssilby } 536133790Ssilby if (cpipe->pipe_buffer.buffer == NULL) { 537133790Ssilby pipeallocfail++; 538133790Ssilby if (ppsratecheck(&lastfail, &curfail, 1)) 539133790Ssilby printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); 540133790Ssilby } else { 541133790Ssilby piperesizefail++; 542133790Ssilby } 54376364Salfred return (ENOMEM); 54476364Salfred } 54576364Salfred 546133790Ssilby /* copy data, then free old resources if we're resizing */ 547133790Ssilby if (cnt > 0) { 548133790Ssilby if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) { 549133790Ssilby firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out; 550133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 551133790Ssilby buffer, firstseg); 552133790Ssilby if ((cnt - firstseg) > 0) 553133790Ssilby bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg], 554133790Ssilby cpipe->pipe_buffer.in); 555133790Ssilby } else { 556133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 557133790Ssilby buffer, cnt); 558133790Ssilby } 559133790Ssilby } 56076364Salfred pipe_free_kmem(cpipe); 56176364Salfred cpipe->pipe_buffer.buffer = buffer; 56276364Salfred cpipe->pipe_buffer.size = size; 563133790Ssilby cpipe->pipe_buffer.in = cnt; 56476364Salfred cpipe->pipe_buffer.out = 0; 565133790Ssilby cpipe->pipe_buffer.cnt = cnt; 566189649Sjhb atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size); 56776364Salfred return (0); 56813907Sdyson} 56913688Sdyson 57013907Sdyson/* 571132579Srwatson * Wrapper for pipespace_new() that performs locking assertions. 572132579Srwatson */ 573132579Srwatsonstatic int 574132579Srwatsonpipespace(cpipe, size) 575132579Srwatson struct pipe *cpipe; 576132579Srwatson int size; 577132579Srwatson{ 578132579Srwatson 579133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 580133049Ssilby ("Unlocked pipe passed to pipespace")); 581132579Srwatson return (pipespace_new(cpipe, size)); 582132579Srwatson} 583132579Srwatson 584132579Srwatson/* 58513675Sdyson * lock a pipe for I/O, blocking other access 58613675Sdyson */ 58713675Sdysonstatic __inline int 58813907Sdysonpipelock(cpipe, catch) 58913675Sdyson struct pipe *cpipe; 59013907Sdyson int catch; 59113675Sdyson{ 59213776Sdyson int error; 59376364Salfred 59491362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 59591362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 59613675Sdyson cpipe->pipe_state |= PIPE_LWANT; 59791362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 59891362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 59976760Salfred "pipelk", 0); 600124394Sdes if (error != 0) 60176760Salfred return (error); 60213675Sdyson } 60391362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 60476760Salfred return (0); 60513675Sdyson} 60613675Sdyson 60713675Sdyson/* 60813675Sdyson * unlock a pipe I/O lock 60913675Sdyson */ 61013675Sdysonstatic __inline void 61113675Sdysonpipeunlock(cpipe) 61213675Sdyson struct pipe *cpipe; 61313675Sdyson{ 61476364Salfred 61591362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 616133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 617133049Ssilby ("Unlocked pipe passed to pipeunlock")); 61891362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 61913675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 62013675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 62114177Sdyson wakeup(cpipe); 62213675Sdyson } 62313675Sdyson} 62413675Sdyson 625238928Sdavidxuvoid 62614037Sdysonpipeselwakeup(cpipe) 62714037Sdyson struct pipe *cpipe; 62814037Sdyson{ 62976364Salfred 630126252Srwatson PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 63114037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 632122352Stanimura selwakeuppri(&cpipe->pipe_sel, PSOCK); 633174647Sjeff if (!SEL_WAITING(&cpipe->pipe_sel)) 634174647Sjeff cpipe->pipe_state &= ~PIPE_SEL; 63514037Sdyson } 63641086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 63795883Salfred pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 638133741Sjmg KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0); 63914037Sdyson} 64014037Sdyson 641126131Sgreen/* 642126131Sgreen * Initialize and allocate VM and memory for pipe. The structure 643126131Sgreen * will start out zero'd from the ctor, so we just manage the kmem. 644126131Sgreen */ 645126131Sgreenstatic int 646133790Ssilbypipe_create(pipe, backing) 647126131Sgreen struct pipe *pipe; 648133790Ssilby int backing; 649126131Sgreen{ 650126131Sgreen int error; 651126131Sgreen 652133790Ssilby if (backing) { 653133790Ssilby if (amountpipekva > maxpipekva / 2) 654133790Ssilby error = pipespace_new(pipe, SMALL_PIPE_SIZE); 655133790Ssilby else 656133790Ssilby error = pipespace_new(pipe, PIPE_SIZE); 657133790Ssilby } else { 658133790Ssilby /* If we're not backing this pipe, no need to do anything. */ 659133790Ssilby error = 0; 660133790Ssilby } 661228306Skib pipe->pipe_ino = -1; 662132579Srwatson return (error); 663126131Sgreen} 664126131Sgreen 66513675Sdyson/* ARGSUSED */ 66613675Sdysonstatic int 667101941Srwatsonpipe_read(fp, uio, active_cred, flags, td) 66813675Sdyson struct file *fp; 66913675Sdyson struct uio *uio; 670101941Srwatson struct ucred *active_cred; 67183366Sjulian struct thread *td; 67245311Sdt int flags; 67313675Sdyson{ 674232055Skmacy struct pipe *rpipe; 67547748Salc int error; 67613675Sdyson int nread = 0; 677232495Skib int size; 67813675Sdyson 679232055Skmacy rpipe = fp->f_data; 68091362Salfred PIPE_LOCK(rpipe); 68113675Sdyson ++rpipe->pipe_busy; 68247748Salc error = pipelock(rpipe, 1); 68347748Salc if (error) 68447748Salc goto unlocked_error; 68547748Salc 686101768Srwatson#ifdef MAC 687172930Srwatson error = mac_pipe_check_read(active_cred, rpipe->pipe_pair); 688101768Srwatson if (error) 689101768Srwatson goto locked_error; 690101768Srwatson#endif 691133790Ssilby if (amountpipekva > (3 * maxpipekva) / 4) { 692133790Ssilby if (!(rpipe->pipe_state & PIPE_DIRECTW) && 693133790Ssilby (rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 694133790Ssilby (rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 695133790Ssilby (piperesizeallowed == 1)) { 696133790Ssilby PIPE_UNLOCK(rpipe); 697133790Ssilby pipespace(rpipe, SMALL_PIPE_SIZE); 698133790Ssilby PIPE_LOCK(rpipe); 699133790Ssilby } 700133790Ssilby } 701101768Srwatson 70213675Sdyson while (uio->uio_resid) { 70313907Sdyson /* 70413907Sdyson * normal pipe buffer receive 70513907Sdyson */ 70613675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 70718863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 70813675Sdyson if (size > rpipe->pipe_buffer.cnt) 70913675Sdyson size = rpipe->pipe_buffer.cnt; 710231949Skib if (size > uio->uio_resid) 711232495Skib size = uio->uio_resid; 71247748Salc 71391362Salfred PIPE_UNLOCK(rpipe); 714116127Smux error = uiomove( 715116127Smux &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 716116127Smux size, uio); 71791362Salfred PIPE_LOCK(rpipe); 71876760Salfred if (error) 71913675Sdyson break; 72076760Salfred 72113675Sdyson rpipe->pipe_buffer.out += size; 72213675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 72313675Sdyson rpipe->pipe_buffer.out = 0; 72413675Sdyson 72513675Sdyson rpipe->pipe_buffer.cnt -= size; 72647748Salc 72747748Salc /* 72847748Salc * If there is no more to read in the pipe, reset 72947748Salc * its pointers to the beginning. This improves 73047748Salc * cache hit stats. 73147748Salc */ 73247748Salc if (rpipe->pipe_buffer.cnt == 0) { 73347748Salc rpipe->pipe_buffer.in = 0; 73447748Salc rpipe->pipe_buffer.out = 0; 73547748Salc } 73613675Sdyson nread += size; 73714037Sdyson#ifndef PIPE_NODIRECT 73813907Sdyson /* 73913907Sdyson * Direct copy, bypassing a kernel buffer. 74013907Sdyson */ 74113907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 74247748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 743231949Skib if (size > uio->uio_resid) 74418863Sdyson size = (u_int) uio->uio_resid; 74547748Salc 74691362Salfred PIPE_UNLOCK(rpipe); 747127501Salc error = uiomove_fromphys(rpipe->pipe_map.ms, 748127501Salc rpipe->pipe_map.pos, size, uio); 74991362Salfred PIPE_LOCK(rpipe); 75013907Sdyson if (error) 75113907Sdyson break; 75213907Sdyson nread += size; 75313907Sdyson rpipe->pipe_map.pos += size; 75413907Sdyson rpipe->pipe_map.cnt -= size; 75513907Sdyson if (rpipe->pipe_map.cnt == 0) { 756238928Sdavidxu rpipe->pipe_state &= ~(PIPE_DIRECTW|PIPE_WANTW); 75713907Sdyson wakeup(rpipe); 75813907Sdyson } 75914037Sdyson#endif 76013675Sdyson } else { 76113675Sdyson /* 76213675Sdyson * detect EOF condition 76376760Salfred * read returns 0 on EOF, no need to set error 76413675Sdyson */ 76576760Salfred if (rpipe->pipe_state & PIPE_EOF) 76613675Sdyson break; 76743623Sdillon 76813675Sdyson /* 76913675Sdyson * If the "write-side" has been blocked, wake it up now. 77013675Sdyson */ 77113675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 77213675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 77313675Sdyson wakeup(rpipe); 77413675Sdyson } 77543623Sdillon 77643623Sdillon /* 77747748Salc * Break if some data was read. 77843623Sdillon */ 77947748Salc if (nread > 0) 78013675Sdyson break; 78116960Sdyson 78243623Sdillon /* 783124394Sdes * Unlock the pipe buffer for our remaining processing. 784116127Smux * We will either break out with an error or we will 785116127Smux * sleep and relock to loop. 78643623Sdillon */ 78747748Salc pipeunlock(rpipe); 78843623Sdillon 78913675Sdyson /* 79047748Salc * Handle non-blocking mode operation or 79147748Salc * wait for more data. 79213675Sdyson */ 79376760Salfred if (fp->f_flag & FNONBLOCK) { 79447748Salc error = EAGAIN; 79576760Salfred } else { 79647748Salc rpipe->pipe_state |= PIPE_WANTR; 79791362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 79891362Salfred PRIBIO | PCATCH, 79977140Salfred "piperd", 0)) == 0) 80047748Salc error = pipelock(rpipe, 1); 80113675Sdyson } 80247748Salc if (error) 80347748Salc goto unlocked_error; 80413675Sdyson } 80513675Sdyson } 806101768Srwatson#ifdef MAC 807101768Srwatsonlocked_error: 808101768Srwatson#endif 80947748Salc pipeunlock(rpipe); 81013675Sdyson 81191362Salfred /* XXX: should probably do this before getting any locks. */ 81224101Sbde if (error == 0) 81355112Sbde vfs_timestamp(&rpipe->pipe_atime); 81447748Salcunlocked_error: 81547748Salc --rpipe->pipe_busy; 81613913Sdyson 81747748Salc /* 81847748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 81947748Salc */ 82013675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 82113675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 82213675Sdyson wakeup(rpipe); 82313675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 82413675Sdyson /* 82547748Salc * Handle write blocking hysteresis. 82613675Sdyson */ 82713675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 82813675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 82913675Sdyson wakeup(rpipe); 83013675Sdyson } 83113675Sdyson } 83214037Sdyson 83314802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 83414037Sdyson pipeselwakeup(rpipe); 83514037Sdyson 83691362Salfred PIPE_UNLOCK(rpipe); 83776760Salfred return (error); 83813675Sdyson} 83913675Sdyson 84014037Sdyson#ifndef PIPE_NODIRECT 84113907Sdyson/* 84213907Sdyson * Map the sending processes' buffer into kernel space and wire it. 84313907Sdyson * This is similar to a physical write operation. 84413907Sdyson */ 84513675Sdysonstatic int 84613907Sdysonpipe_build_write_buffer(wpipe, uio) 84713907Sdyson struct pipe *wpipe; 84813675Sdyson struct uio *uio; 84913675Sdyson{ 85018863Sdyson u_int size; 851216511Salc int i; 85213907Sdyson 85391412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 854133790Ssilby KASSERT(wpipe->pipe_state & PIPE_DIRECTW, 855133790Ssilby ("Clone attempt on non-direct write pipe!")); 85679224Sdillon 857231949Skib if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size) 858231949Skib size = wpipe->pipe_buffer.size; 859231949Skib else 860231949Skib size = uio->uio_iov->iov_len; 86113907Sdyson 862216699Salc if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 863216699Salc (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ, 864216699Salc wpipe->pipe_map.ms, PIPENPAGES)) < 0) 865193893Scperciva return (EFAULT); 86613907Sdyson 86713907Sdyson/* 86813907Sdyson * set up the control block 86913907Sdyson */ 87013907Sdyson wpipe->pipe_map.npages = i; 87176760Salfred wpipe->pipe_map.pos = 87276760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 87313907Sdyson wpipe->pipe_map.cnt = size; 87413907Sdyson 87513907Sdyson/* 87613907Sdyson * and update the uio data 87713907Sdyson */ 87813907Sdyson 87913907Sdyson uio->uio_iov->iov_len -= size; 880104908Smike uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 88113907Sdyson if (uio->uio_iov->iov_len == 0) 88213907Sdyson uio->uio_iov++; 88313907Sdyson uio->uio_resid -= size; 88413907Sdyson uio->uio_offset += size; 88576760Salfred return (0); 88613907Sdyson} 88713907Sdyson 88813907Sdyson/* 88913907Sdyson * unmap and unwire the process buffer 89013907Sdyson */ 89113907Sdysonstatic void 89213907Sdysonpipe_destroy_write_buffer(wpipe) 89376760Salfred struct pipe *wpipe; 89413907Sdyson{ 89576364Salfred 896127501Salc PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 897216511Salc vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages); 89891653Stanimura wpipe->pipe_map.npages = 0; 89913907Sdyson} 90013907Sdyson 90113907Sdyson/* 90213907Sdyson * In the case of a signal, the writing process might go away. This 90313907Sdyson * code copies the data into the circular buffer so that the source 90413907Sdyson * pages can be freed without loss of data. 90513907Sdyson */ 90613907Sdysonstatic void 90713907Sdysonpipe_clone_write_buffer(wpipe) 90876364Salfred struct pipe *wpipe; 90913907Sdyson{ 910127501Salc struct uio uio; 911127501Salc struct iovec iov; 91213907Sdyson int size; 91313907Sdyson int pos; 91413907Sdyson 91591362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 91613907Sdyson size = wpipe->pipe_map.cnt; 91713907Sdyson pos = wpipe->pipe_map.pos; 91813907Sdyson 91913907Sdyson wpipe->pipe_buffer.in = size; 92013907Sdyson wpipe->pipe_buffer.out = 0; 92113907Sdyson wpipe->pipe_buffer.cnt = size; 92213907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 92313907Sdyson 924119811Salc PIPE_UNLOCK(wpipe); 925127501Salc iov.iov_base = wpipe->pipe_buffer.buffer; 926127501Salc iov.iov_len = size; 927127501Salc uio.uio_iov = &iov; 928127501Salc uio.uio_iovcnt = 1; 929127501Salc uio.uio_offset = 0; 930127501Salc uio.uio_resid = size; 931127501Salc uio.uio_segflg = UIO_SYSSPACE; 932127501Salc uio.uio_rw = UIO_READ; 933127501Salc uio.uio_td = curthread; 934127501Salc uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio); 935127501Salc PIPE_LOCK(wpipe); 93613907Sdyson pipe_destroy_write_buffer(wpipe); 93713907Sdyson} 93813907Sdyson 93913907Sdyson/* 94013907Sdyson * This implements the pipe buffer write mechanism. Note that only 94113907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 94213907Sdyson * If there are any characters in the pipe buffer, the direct write will 94313907Sdyson * be deferred until the receiving process grabs all of the bytes from 94413907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 94513907Sdyson */ 94613907Sdysonstatic int 94713907Sdysonpipe_direct_write(wpipe, uio) 94813907Sdyson struct pipe *wpipe; 94913907Sdyson struct uio *uio; 95013907Sdyson{ 95113907Sdyson int error; 95276364Salfred 95313951Sdysonretry: 95491362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 955133049Ssilby error = pipelock(wpipe, 1); 956133049Ssilby if (wpipe->pipe_state & PIPE_EOF) 957133049Ssilby error = EPIPE; 958133049Ssilby if (error) { 959133049Ssilby pipeunlock(wpipe); 960133049Ssilby goto error1; 961133049Ssilby } 96213907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 96376760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 96413951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 96513951Sdyson wakeup(wpipe); 96613951Sdyson } 967173750Sdumbbell pipeselwakeup(wpipe); 96813992Sdyson wpipe->pipe_state |= PIPE_WANTW; 969133049Ssilby pipeunlock(wpipe); 97091362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 97191362Salfred PRIBIO | PCATCH, "pipdww", 0); 97214802Sdyson if (error) 97313907Sdyson goto error1; 974133049Ssilby else 975133049Ssilby goto retry; 97613907Sdyson } 97713907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 97813951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 97976760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 98013951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 98113951Sdyson wakeup(wpipe); 98213951Sdyson } 983173750Sdumbbell pipeselwakeup(wpipe); 98413992Sdyson wpipe->pipe_state |= PIPE_WANTW; 985133049Ssilby pipeunlock(wpipe); 98691362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 98791362Salfred PRIBIO | PCATCH, "pipdwc", 0); 98814802Sdyson if (error) 98913907Sdyson goto error1; 990133049Ssilby else 991133049Ssilby goto retry; 99213907Sdyson } 99313907Sdyson 99413951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 99513951Sdyson 996119872Salc PIPE_UNLOCK(wpipe); 99713907Sdyson error = pipe_build_write_buffer(wpipe, uio); 998119872Salc PIPE_LOCK(wpipe); 99913907Sdyson if (error) { 100013907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 1001133049Ssilby pipeunlock(wpipe); 100213907Sdyson goto error1; 100313907Sdyson } 100413907Sdyson 100513907Sdyson error = 0; 100613907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 100713907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 100813907Sdyson pipe_destroy_write_buffer(wpipe); 1009112981Shsu pipeselwakeup(wpipe); 101013907Sdyson pipeunlock(wpipe); 101114802Sdyson error = EPIPE; 101214802Sdyson goto error1; 101313907Sdyson } 101413992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 101513992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 101613992Sdyson wakeup(wpipe); 101713992Sdyson } 101814037Sdyson pipeselwakeup(wpipe); 1019238928Sdavidxu wpipe->pipe_state |= PIPE_WANTW; 1020133049Ssilby pipeunlock(wpipe); 102191362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 102291362Salfred "pipdwt", 0); 1023133049Ssilby pipelock(wpipe, 0); 102413907Sdyson } 102513907Sdyson 1026126131Sgreen if (wpipe->pipe_state & PIPE_EOF) 1027126131Sgreen error = EPIPE; 102813907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 102913907Sdyson /* 103013907Sdyson * this bit of trickery substitutes a kernel buffer for 103113907Sdyson * the process that might be going away. 103213907Sdyson */ 103313907Sdyson pipe_clone_write_buffer(wpipe); 103413907Sdyson } else { 103513907Sdyson pipe_destroy_write_buffer(wpipe); 103613907Sdyson } 103713907Sdyson pipeunlock(wpipe); 103876760Salfred return (error); 103913907Sdyson 104013907Sdysonerror1: 104113907Sdyson wakeup(wpipe); 104276760Salfred return (error); 104313907Sdyson} 104414037Sdyson#endif 1045124394Sdes 104616960Sdysonstatic int 1047101941Srwatsonpipe_write(fp, uio, active_cred, flags, td) 104816960Sdyson struct file *fp; 104913907Sdyson struct uio *uio; 1050101941Srwatson struct ucred *active_cred; 105183366Sjulian struct thread *td; 105245311Sdt int flags; 105313907Sdyson{ 1054232495Skib int error = 0; 1055232495Skib int desiredsize; 1056232495Skib ssize_t orig_resid; 105716960Sdyson struct pipe *wpipe, *rpipe; 105816960Sdyson 1059109153Sdillon rpipe = fp->f_data; 1060232055Skmacy wpipe = PIPE_PEER(rpipe); 106191395Salfred PIPE_LOCK(rpipe); 1062133049Ssilby error = pipelock(wpipe, 1); 1063133049Ssilby if (error) { 1064133049Ssilby PIPE_UNLOCK(rpipe); 1065133049Ssilby return (error); 1066133049Ssilby } 106713675Sdyson /* 106813675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 106913675Sdyson */ 1070179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1071179243Skib (wpipe->pipe_state & PIPE_EOF)) { 1072133049Ssilby pipeunlock(wpipe); 107391395Salfred PIPE_UNLOCK(rpipe); 107476760Salfred return (EPIPE); 107513675Sdyson } 1076101768Srwatson#ifdef MAC 1077172930Srwatson error = mac_pipe_check_write(active_cred, wpipe->pipe_pair); 1078101768Srwatson if (error) { 1079133049Ssilby pipeunlock(wpipe); 1080101768Srwatson PIPE_UNLOCK(rpipe); 1081101768Srwatson return (error); 1082101768Srwatson } 1083101768Srwatson#endif 108477676Sdillon ++wpipe->pipe_busy; 108513675Sdyson 1086133790Ssilby /* Choose a larger size if it's advantageous */ 1087133790Ssilby desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size); 1088133790Ssilby while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) { 1089133790Ssilby if (piperesizeallowed != 1) 1090133790Ssilby break; 1091133790Ssilby if (amountpipekva > maxpipekva / 2) 1092133790Ssilby break; 1093133790Ssilby if (desiredsize == BIG_PIPE_SIZE) 1094133790Ssilby break; 1095133790Ssilby desiredsize = desiredsize * 2; 1096133790Ssilby } 109717163Sdyson 1098133790Ssilby /* Choose a smaller size if we're in a OOM situation */ 1099133790Ssilby if ((amountpipekva > (3 * maxpipekva) / 4) && 1100133790Ssilby (wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 1101133790Ssilby (wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 1102133790Ssilby (piperesizeallowed == 1)) 1103133790Ssilby desiredsize = SMALL_PIPE_SIZE; 1104133790Ssilby 1105133790Ssilby /* Resize if the above determined that a new size was necessary */ 1106133790Ssilby if ((desiredsize != wpipe->pipe_buffer.size) && 1107133790Ssilby ((wpipe->pipe_state & PIPE_DIRECTW) == 0)) { 1108133049Ssilby PIPE_UNLOCK(wpipe); 1109133790Ssilby pipespace(wpipe, desiredsize); 1110133049Ssilby PIPE_LOCK(wpipe); 111113907Sdyson } 1112133790Ssilby if (wpipe->pipe_buffer.size == 0) { 1113133790Ssilby /* 1114133790Ssilby * This can only happen for reverse direction use of pipes 1115133790Ssilby * in a complete OOM situation. 1116133790Ssilby */ 1117133790Ssilby error = ENOMEM; 1118133790Ssilby --wpipe->pipe_busy; 1119133790Ssilby pipeunlock(wpipe); 1120133790Ssilby PIPE_UNLOCK(wpipe); 1121133790Ssilby return (error); 1122133790Ssilby } 112377676Sdillon 1124133049Ssilby pipeunlock(wpipe); 1125124394Sdes 112613913Sdyson orig_resid = uio->uio_resid; 112777676Sdillon 112813675Sdyson while (uio->uio_resid) { 112913907Sdyson int space; 113076760Salfred 1131133049Ssilby pipelock(wpipe, 0); 1132133049Ssilby if (wpipe->pipe_state & PIPE_EOF) { 1133133049Ssilby pipeunlock(wpipe); 1134133049Ssilby error = EPIPE; 1135133049Ssilby break; 1136133049Ssilby } 113714037Sdyson#ifndef PIPE_NODIRECT 113813907Sdyson /* 113913907Sdyson * If the transfer is large, we can gain performance if 114013907Sdyson * we do process-to-process copies directly. 114116416Sdyson * If the write is non-blocking, we don't use the 114216416Sdyson * direct write mechanism. 114358505Sdillon * 114458505Sdillon * The direct write mechanism will detect the reader going 114558505Sdillon * away on us. 114613907Sdyson */ 1147165347Spjd if (uio->uio_segflg == UIO_USERSPACE && 1148165347Spjd uio->uio_iov->iov_len >= PIPE_MINDIRECT && 1149165347Spjd wpipe->pipe_buffer.size >= PIPE_MINDIRECT && 1150127501Salc (fp->f_flag & FNONBLOCK) == 0) { 1151133049Ssilby pipeunlock(wpipe); 1152105009Salfred error = pipe_direct_write(wpipe, uio); 115376760Salfred if (error) 115413907Sdyson break; 115513907Sdyson continue; 115691362Salfred } 115714037Sdyson#endif 115813907Sdyson 115913907Sdyson /* 116013907Sdyson * Pipe buffered writes cannot be coincidental with 116113907Sdyson * direct writes. We wait until the currently executing 116213907Sdyson * direct write is completed before we start filling the 116358505Sdillon * pipe buffer. We break out if a signal occurs or the 116458505Sdillon * reader goes away. 116513907Sdyson */ 1166133049Ssilby if (wpipe->pipe_state & PIPE_DIRECTW) { 116713992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 116813992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 116913992Sdyson wakeup(wpipe); 117013992Sdyson } 1171173750Sdumbbell pipeselwakeup(wpipe); 1172173750Sdumbbell wpipe->pipe_state |= PIPE_WANTW; 1173133049Ssilby pipeunlock(wpipe); 117491395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 117591362Salfred "pipbww", 0); 117613907Sdyson if (error) 117713907Sdyson break; 1178133049Ssilby else 1179133049Ssilby continue; 118013907Sdyson } 118113907Sdyson 118213907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 118314644Sdyson 118414644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 118513913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 118613913Sdyson space = 0; 118713907Sdyson 1188118230Spb if (space > 0) { 1189133049Ssilby int size; /* Transfer size */ 1190133049Ssilby int segsize; /* first segment to transfer */ 119176760Salfred 1192133049Ssilby /* 1193133049Ssilby * Transfer size is minimum of uio transfer 1194133049Ssilby * and free space in pipe buffer. 1195133049Ssilby */ 1196133049Ssilby if (space > uio->uio_resid) 1197133049Ssilby size = uio->uio_resid; 1198133049Ssilby else 1199133049Ssilby size = space; 1200133049Ssilby /* 1201133049Ssilby * First segment to transfer is minimum of 1202133049Ssilby * transfer size and contiguous space in 1203133049Ssilby * pipe buffer. If first segment to transfer 1204133049Ssilby * is less than the transfer size, we've got 1205133049Ssilby * a wraparound in the buffer. 1206133049Ssilby */ 1207133049Ssilby segsize = wpipe->pipe_buffer.size - 1208133049Ssilby wpipe->pipe_buffer.in; 1209133049Ssilby if (segsize > size) 1210133049Ssilby segsize = size; 121154534Stegge 1212133049Ssilby /* Transfer first segment */ 1213133049Ssilby 1214133049Ssilby PIPE_UNLOCK(rpipe); 1215133049Ssilby error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1216133049Ssilby segsize, uio); 1217133049Ssilby PIPE_LOCK(rpipe); 1218133049Ssilby 1219133049Ssilby if (error == 0 && segsize < size) { 1220133049Ssilby KASSERT(wpipe->pipe_buffer.in + segsize == 1221133049Ssilby wpipe->pipe_buffer.size, 1222133049Ssilby ("Pipe buffer wraparound disappeared")); 122354534Stegge /* 1224133049Ssilby * Transfer remaining part now, to 1225133049Ssilby * support atomic writes. Wraparound 1226133049Ssilby * happened. 122754534Stegge */ 1228124394Sdes 122991395Salfred PIPE_UNLOCK(rpipe); 1230133049Ssilby error = uiomove( 1231133049Ssilby &wpipe->pipe_buffer.buffer[0], 1232133049Ssilby size - segsize, uio); 123391395Salfred PIPE_LOCK(rpipe); 1234133049Ssilby } 1235133049Ssilby if (error == 0) { 1236133049Ssilby wpipe->pipe_buffer.in += size; 1237133049Ssilby if (wpipe->pipe_buffer.in >= 1238133049Ssilby wpipe->pipe_buffer.size) { 1239133049Ssilby KASSERT(wpipe->pipe_buffer.in == 1240133049Ssilby size - segsize + 1241133049Ssilby wpipe->pipe_buffer.size, 1242133049Ssilby ("Expected wraparound bad")); 1243133049Ssilby wpipe->pipe_buffer.in = size - segsize; 124454534Stegge } 1245124394Sdes 1246133049Ssilby wpipe->pipe_buffer.cnt += size; 1247133049Ssilby KASSERT(wpipe->pipe_buffer.cnt <= 1248133049Ssilby wpipe->pipe_buffer.size, 1249133049Ssilby ("Pipe buffer overflow")); 125013675Sdyson } 1251133049Ssilby pipeunlock(wpipe); 1252153484Sdelphij if (error != 0) 1253153484Sdelphij break; 125413675Sdyson } else { 125513675Sdyson /* 125613675Sdyson * If the "read-side" has been blocked, wake it up now. 125713675Sdyson */ 125813675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 125913675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 126013675Sdyson wakeup(wpipe); 126113675Sdyson } 126214037Sdyson 126313675Sdyson /* 126413675Sdyson * don't block on non-blocking I/O 126513675Sdyson */ 126616960Sdyson if (fp->f_flag & FNONBLOCK) { 126713907Sdyson error = EAGAIN; 1268133049Ssilby pipeunlock(wpipe); 126913675Sdyson break; 127013675Sdyson } 127113907Sdyson 127214037Sdyson /* 127314037Sdyson * We have no more space and have something to offer, 127429356Speter * wake up select/poll. 127514037Sdyson */ 127614037Sdyson pipeselwakeup(wpipe); 127714037Sdyson 127813675Sdyson wpipe->pipe_state |= PIPE_WANTW; 1279133049Ssilby pipeunlock(wpipe); 128091395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 128191362Salfred PRIBIO | PCATCH, "pipewr", 0); 128276760Salfred if (error != 0) 128313675Sdyson break; 128413675Sdyson } 128513675Sdyson } 128613675Sdyson 1287133049Ssilby pipelock(wpipe, 0); 128814644Sdyson --wpipe->pipe_busy; 128977676Sdillon 129076760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 129176760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 129213675Sdyson wakeup(wpipe); 129313675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 129413675Sdyson /* 129513675Sdyson * If we have put any characters in the buffer, we wake up 129613675Sdyson * the reader. 129713675Sdyson */ 129813675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 129913675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 130013675Sdyson wakeup(wpipe); 130113675Sdyson } 130213675Sdyson } 130313909Sdyson 130413909Sdyson /* 130513909Sdyson * Don't return EPIPE if I/O was successful 130613909Sdyson */ 130713907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 130877676Sdillon (uio->uio_resid == 0) && 130977676Sdillon (error == EPIPE)) { 131013907Sdyson error = 0; 131177676Sdillon } 131213913Sdyson 131324101Sbde if (error == 0) 131455112Sbde vfs_timestamp(&wpipe->pipe_mtime); 131524101Sbde 131614037Sdyson /* 131714037Sdyson * We have something to offer, 131829356Speter * wake up select/poll. 131914037Sdyson */ 132014177Sdyson if (wpipe->pipe_buffer.cnt) 132114037Sdyson pipeselwakeup(wpipe); 132213907Sdyson 1323133049Ssilby pipeunlock(wpipe); 132491395Salfred PIPE_UNLOCK(rpipe); 132576760Salfred return (error); 132613675Sdyson} 132713675Sdyson 1328175140Sjhb/* ARGSUSED */ 1329175140Sjhbstatic int 1330175140Sjhbpipe_truncate(fp, length, active_cred, td) 1331175140Sjhb struct file *fp; 1332175140Sjhb off_t length; 1333175140Sjhb struct ucred *active_cred; 1334175140Sjhb struct thread *td; 1335175140Sjhb{ 1336175140Sjhb 1337232055Skmacy /* For named pipes call the vnode operation. */ 1338232055Skmacy if (fp->f_vnode != NULL) 1339232055Skmacy return (vnops.fo_truncate(fp, length, active_cred, td)); 1340175140Sjhb return (EINVAL); 1341175140Sjhb} 1342175140Sjhb 134313675Sdyson/* 134413675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 134513675Sdyson */ 1346104094Sphkstatic int 1347102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td) 134813675Sdyson struct file *fp; 134936735Sdfr u_long cmd; 135099009Salfred void *data; 1351102003Srwatson struct ucred *active_cred; 135283366Sjulian struct thread *td; 135313675Sdyson{ 1354109153Sdillon struct pipe *mpipe = fp->f_data; 1355101768Srwatson int error; 135613675Sdyson 1357104269Srwatson PIPE_LOCK(mpipe); 1358104269Srwatson 1359104269Srwatson#ifdef MAC 1360172930Srwatson error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data); 1361121970Srwatson if (error) { 1362121970Srwatson PIPE_UNLOCK(mpipe); 1363101768Srwatson return (error); 1364121970Srwatson } 1365101768Srwatson#endif 1366101768Srwatson 1367137752Sphk error = 0; 136813675Sdyson switch (cmd) { 136913675Sdyson 137013675Sdyson case FIONBIO: 1371137752Sphk break; 137213675Sdyson 137313675Sdyson case FIOASYNC: 137413675Sdyson if (*(int *)data) { 137513675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 137613675Sdyson } else { 137713675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 137813675Sdyson } 1379137752Sphk break; 138013675Sdyson 138113675Sdyson case FIONREAD: 1382232055Skmacy if (!(fp->f_flag & FREAD)) { 1383232055Skmacy *(int *)data = 0; 1384232055Skmacy PIPE_UNLOCK(mpipe); 1385232055Skmacy return (0); 1386232055Skmacy } 138714037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 138814037Sdyson *(int *)data = mpipe->pipe_map.cnt; 138914037Sdyson else 139014037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 1391137752Sphk break; 139213675Sdyson 139341086Struckman case FIOSETOWN: 1394138032Srwatson PIPE_UNLOCK(mpipe); 1395137752Sphk error = fsetown(*(int *)data, &mpipe->pipe_sigio); 1396138032Srwatson goto out_unlocked; 139741086Struckman 139841086Struckman case FIOGETOWN: 1399104393Struckman *(int *)data = fgetown(&mpipe->pipe_sigio); 1400137752Sphk break; 140113675Sdyson 140241086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 140341086Struckman case TIOCSPGRP: 1404138032Srwatson PIPE_UNLOCK(mpipe); 1405137752Sphk error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); 1406138032Srwatson goto out_unlocked; 140741086Struckman 140841086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 140918863Sdyson case TIOCGPGRP: 1410104393Struckman *(int *)data = -fgetown(&mpipe->pipe_sigio); 1411137752Sphk break; 141213675Sdyson 1413137752Sphk default: 1414137752Sphk error = ENOTTY; 1415137764Sphk break; 141613675Sdyson } 1417104269Srwatson PIPE_UNLOCK(mpipe); 1418138032Srwatsonout_unlocked: 1419137752Sphk return (error); 142013675Sdyson} 142113675Sdyson 1422104094Sphkstatic int 1423101983Srwatsonpipe_poll(fp, events, active_cred, td) 142413675Sdyson struct file *fp; 142529356Speter int events; 1426101983Srwatson struct ucred *active_cred; 142783366Sjulian struct thread *td; 142813675Sdyson{ 1429232055Skmacy struct pipe *rpipe; 143013675Sdyson struct pipe *wpipe; 1431232055Skmacy int levents, revents; 1432101768Srwatson#ifdef MAC 1433101768Srwatson int error; 1434101768Srwatson#endif 143513675Sdyson 1436232055Skmacy revents = 0; 1437232055Skmacy rpipe = fp->f_data; 1438232055Skmacy wpipe = PIPE_PEER(rpipe); 143991362Salfred PIPE_LOCK(rpipe); 1440101768Srwatson#ifdef MAC 1441172930Srwatson error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair); 1442101768Srwatson if (error) 1443101768Srwatson goto locked_error; 1444101768Srwatson#endif 1445232055Skmacy if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) 144629356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 1447195423Skib (rpipe->pipe_buffer.cnt > 0)) 144829356Speter revents |= events & (POLLIN | POLLRDNORM); 144913675Sdyson 1450232055Skmacy if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) 1451179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1452179243Skib (wpipe->pipe_state & PIPE_EOF) || 145343311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1454228510Sjilles ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF || 1455228510Sjilles wpipe->pipe_buffer.size == 0))) 145629356Speter revents |= events & (POLLOUT | POLLWRNORM); 145713675Sdyson 1458232055Skmacy levents = events & 1459232055Skmacy (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND); 1460232055Skmacy if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents && 1461238936Sdavidxu fp->f_seqcount == rpipe->pipe_wgen) 1462232055Skmacy events |= POLLINIGNEOF; 1463232055Skmacy 1464195423Skib if ((events & POLLINIGNEOF) == 0) { 1465195423Skib if (rpipe->pipe_state & PIPE_EOF) { 1466195423Skib revents |= (events & (POLLIN | POLLRDNORM)); 1467195423Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1468195423Skib (wpipe->pipe_state & PIPE_EOF)) 1469195423Skib revents |= POLLHUP; 1470195423Skib } 1471195423Skib } 147229356Speter 147329356Speter if (revents == 0) { 1474232055Skmacy if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) { 147583805Sjhb selrecord(td, &rpipe->pipe_sel); 1476174647Sjeff if (SEL_WAITING(&rpipe->pipe_sel)) 1477174647Sjeff rpipe->pipe_state |= PIPE_SEL; 147813675Sdyson } 147913675Sdyson 1480232055Skmacy if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) { 148183805Sjhb selrecord(td, &wpipe->pipe_sel); 1482174647Sjeff if (SEL_WAITING(&wpipe->pipe_sel)) 1483174647Sjeff wpipe->pipe_state |= PIPE_SEL; 148413907Sdyson } 148513675Sdyson } 1486101768Srwatson#ifdef MAC 1487101768Srwatsonlocked_error: 1488101768Srwatson#endif 148991362Salfred PIPE_UNLOCK(rpipe); 149029356Speter 149129356Speter return (revents); 149213675Sdyson} 149313675Sdyson 149498989Salfred/* 149598989Salfred * We shouldn't need locks here as we're doing a read and this should 149698989Salfred * be a natural race. 149798989Salfred */ 149852983Speterstatic int 1499101983Srwatsonpipe_stat(fp, ub, active_cred, td) 150052983Speter struct file *fp; 150152983Speter struct stat *ub; 1502101983Srwatson struct ucred *active_cred; 150383366Sjulian struct thread *td; 150413675Sdyson{ 1505228306Skib struct pipe *pipe; 1506228306Skib int new_unr; 1507101768Srwatson#ifdef MAC 1508101768Srwatson int error; 1509228306Skib#endif 151052983Speter 1511228306Skib pipe = fp->f_data; 1512104269Srwatson PIPE_LOCK(pipe); 1513228306Skib#ifdef MAC 1514172930Srwatson error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); 1515228306Skib if (error) { 1516228306Skib PIPE_UNLOCK(pipe); 1517101768Srwatson return (error); 1518228306Skib } 1519101768Srwatson#endif 1520232055Skmacy 1521232055Skmacy /* For named pipes ask the underlying filesystem. */ 1522232055Skmacy if (pipe->pipe_state & PIPE_NAMED) { 1523232055Skmacy PIPE_UNLOCK(pipe); 1524232055Skmacy return (vnops.fo_stat(fp, ub, active_cred, td)); 1525232055Skmacy } 1526232055Skmacy 1527228306Skib /* 1528228306Skib * Lazily allocate an inode number for the pipe. Most pipe 1529228306Skib * users do not call fstat(2) on the pipe, which means that 1530228306Skib * postponing the inode allocation until it is must be 1531228306Skib * returned to userland is useful. If alloc_unr failed, 1532228306Skib * assign st_ino zero instead of returning an error. 1533228306Skib * Special pipe_ino values: 1534228306Skib * -1 - not yet initialized; 1535228306Skib * 0 - alloc_unr failed, return 0 as st_ino forever. 1536228306Skib */ 1537228306Skib if (pipe->pipe_ino == (ino_t)-1) { 1538228306Skib new_unr = alloc_unr(pipeino_unr); 1539228306Skib if (new_unr != -1) 1540228306Skib pipe->pipe_ino = new_unr; 1541228306Skib else 1542228306Skib pipe->pipe_ino = 0; 1543228306Skib } 1544228306Skib PIPE_UNLOCK(pipe); 1545228306Skib 1546100527Salfred bzero(ub, sizeof(*ub)); 154717124Sbde ub->st_mode = S_IFIFO; 1548133790Ssilby ub->st_blksize = PAGE_SIZE; 1549132436Ssilby if (pipe->pipe_state & PIPE_DIRECTW) 1550132436Ssilby ub->st_size = pipe->pipe_map.cnt; 1551132436Ssilby else 1552132436Ssilby ub->st_size = pipe->pipe_buffer.cnt; 155313675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1554205792Sed ub->st_atim = pipe->pipe_atime; 1555205792Sed ub->st_mtim = pipe->pipe_mtime; 1556205792Sed ub->st_ctim = pipe->pipe_ctime; 155760404Schris ub->st_uid = fp->f_cred->cr_uid; 155860404Schris ub->st_gid = fp->f_cred->cr_gid; 1559226042Skib ub->st_dev = pipedev_ino; 1560226042Skib ub->st_ino = pipe->pipe_ino; 156117124Sbde /* 1562226042Skib * Left as 0: st_nlink, st_rdev, st_flags, st_gen. 156317124Sbde */ 156476760Salfred return (0); 156513675Sdyson} 156613675Sdyson 156713675Sdyson/* ARGSUSED */ 156813675Sdysonstatic int 156983366Sjulianpipe_close(fp, td) 157013675Sdyson struct file *fp; 157183366Sjulian struct thread *td; 157213675Sdyson{ 157316322Sgpalmer 1574232055Skmacy if (fp->f_vnode != NULL) 1575232055Skmacy return vnops.fo_close(fp, td); 157649413Sgreen fp->f_ops = &badfileops; 1577232055Skmacy pipe_dtor(fp->f_data); 1578109153Sdillon fp->f_data = NULL; 157976760Salfred return (0); 158013675Sdyson} 158113675Sdyson 1582232183Sjillesstatic int 1583232271Sdimpipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) 1584232183Sjilles{ 1585232183Sjilles struct pipe *cpipe; 1586232183Sjilles int error; 1587232183Sjilles 1588232183Sjilles cpipe = fp->f_data; 1589232183Sjilles if (cpipe->pipe_state & PIPE_NAMED) 1590232183Sjilles error = vn_chmod(fp, mode, active_cred, td); 1591232183Sjilles else 1592232183Sjilles error = invfo_chmod(fp, mode, active_cred, td); 1593232183Sjilles return (error); 1594232183Sjilles} 1595232183Sjilles 1596232183Sjillesstatic int 1597232183Sjillespipe_chown(fp, uid, gid, active_cred, td) 1598232183Sjilles struct file *fp; 1599232183Sjilles uid_t uid; 1600232183Sjilles gid_t gid; 1601232183Sjilles struct ucred *active_cred; 1602232183Sjilles struct thread *td; 1603232183Sjilles{ 1604232183Sjilles struct pipe *cpipe; 1605232183Sjilles int error; 1606232183Sjilles 1607232183Sjilles cpipe = fp->f_data; 1608232183Sjilles if (cpipe->pipe_state & PIPE_NAMED) 1609232183Sjilles error = vn_chown(fp, uid, gid, active_cred, td); 1610232183Sjilles else 1611232183Sjilles error = invfo_chown(fp, uid, gid, active_cred, td); 1612232183Sjilles return (error); 1613232183Sjilles} 1614232183Sjilles 161576364Salfredstatic void 161676364Salfredpipe_free_kmem(cpipe) 161776364Salfred struct pipe *cpipe; 161876364Salfred{ 161991412Salfred 1620125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), 1621125293Srwatson ("pipe_free_kmem: pipe mutex locked")); 162276364Salfred 162376364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 1624189649Sjhb atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size); 1625118764Ssilby vm_map_remove(pipe_map, 1626118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer, 1627118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 162876364Salfred cpipe->pipe_buffer.buffer = NULL; 162976364Salfred } 163076364Salfred#ifndef PIPE_NODIRECT 1631127501Salc { 163276364Salfred cpipe->pipe_map.cnt = 0; 163376364Salfred cpipe->pipe_map.pos = 0; 163476364Salfred cpipe->pipe_map.npages = 0; 163576364Salfred } 163676364Salfred#endif 163776364Salfred} 163876364Salfred 163913675Sdyson/* 164013675Sdyson * shutdown the pipe 164113675Sdyson */ 164213675Sdysonstatic void 164313675Sdysonpipeclose(cpipe) 164413675Sdyson struct pipe *cpipe; 164513675Sdyson{ 1646125293Srwatson struct pipepair *pp; 164713907Sdyson struct pipe *ppipe; 164876364Salfred 1649125293Srwatson KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); 165091968Salfred 1651125293Srwatson PIPE_LOCK(cpipe); 1652133049Ssilby pipelock(cpipe, 0); 1653125293Srwatson pp = cpipe->pipe_pair; 165491968Salfred 165591968Salfred pipeselwakeup(cpipe); 165613907Sdyson 165791968Salfred /* 165891968Salfred * If the other side is blocked, wake it up saying that 165991968Salfred * we want to close it down. 166091968Salfred */ 1661126131Sgreen cpipe->pipe_state |= PIPE_EOF; 166291968Salfred while (cpipe->pipe_busy) { 166391968Salfred wakeup(cpipe); 1664126131Sgreen cpipe->pipe_state |= PIPE_WANT; 1665133049Ssilby pipeunlock(cpipe); 166691968Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1667133049Ssilby pipelock(cpipe, 0); 166891968Salfred } 166913675Sdyson 1670101768Srwatson 167191968Salfred /* 1672125293Srwatson * Disconnect from peer, if any. 167391968Salfred */ 1674125293Srwatson ppipe = cpipe->pipe_peer; 1675179243Skib if (ppipe->pipe_present == PIPE_ACTIVE) { 167691968Salfred pipeselwakeup(ppipe); 167713907Sdyson 167891968Salfred ppipe->pipe_state |= PIPE_EOF; 167991968Salfred wakeup(ppipe); 1680133741Sjmg KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0); 168191968Salfred } 1682125293Srwatson 168391968Salfred /* 1684125293Srwatson * Mark this endpoint as free. Release kmem resources. We 1685125293Srwatson * don't mark this endpoint as unused until we've finished 1686125293Srwatson * doing that, or the pipe might disappear out from under 1687125293Srwatson * us. 168891968Salfred */ 1689125293Srwatson PIPE_UNLOCK(cpipe); 1690125293Srwatson pipe_free_kmem(cpipe); 1691125293Srwatson PIPE_LOCK(cpipe); 1692179243Skib cpipe->pipe_present = PIPE_CLOSING; 1693126131Sgreen pipeunlock(cpipe); 1694179243Skib 1695179243Skib /* 1696179243Skib * knlist_clear() may sleep dropping the PIPE_MTX. Set the 1697179243Skib * PIPE_FINALIZED, that allows other end to free the 1698179243Skib * pipe_pair, only after the knotes are completely dismantled. 1699179243Skib */ 1700133741Sjmg knlist_clear(&cpipe->pipe_sel.si_note, 1); 1701179243Skib cpipe->pipe_present = PIPE_FINALIZED; 1702225177Sattilio seldrain(&cpipe->pipe_sel); 1703133741Sjmg knlist_destroy(&cpipe->pipe_sel.si_note); 1704125293Srwatson 1705125293Srwatson /* 1706125293Srwatson * If both endpoints are now closed, release the memory for the 1707125293Srwatson * pipe pair. If not, unlock. 1708125293Srwatson */ 1709179243Skib if (ppipe->pipe_present == PIPE_FINALIZED) { 171091968Salfred PIPE_UNLOCK(cpipe); 1711125293Srwatson#ifdef MAC 1712172930Srwatson mac_pipe_destroy(pp); 1713125293Srwatson#endif 1714125293Srwatson uma_zfree(pipe_zone, cpipe->pipe_pair); 1715125293Srwatson } else 1716125293Srwatson PIPE_UNLOCK(cpipe); 171713675Sdyson} 171859288Sjlemon 171972521Sjlemon/*ARGSUSED*/ 172059288Sjlemonstatic int 172172521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 172259288Sjlemon{ 172389306Salfred struct pipe *cpipe; 172459288Sjlemon 1725232055Skmacy /* 1726232055Skmacy * If a filter is requested that is not supported by this file 1727232055Skmacy * descriptor, don't return an error, but also don't ever generate an 1728232055Skmacy * event. 1729232055Skmacy */ 1730232055Skmacy if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) { 1731232055Skmacy kn->kn_fop = &pipe_nfiltops; 1732232055Skmacy return (0); 1733232055Skmacy } 1734232055Skmacy if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) { 1735232055Skmacy kn->kn_fop = &pipe_nfiltops; 1736232055Skmacy return (0); 1737232055Skmacy } 1738232055Skmacy cpipe = fp->f_data; 1739126131Sgreen PIPE_LOCK(cpipe); 174072521Sjlemon switch (kn->kn_filter) { 174172521Sjlemon case EVFILT_READ: 174272521Sjlemon kn->kn_fop = &pipe_rfiltops; 174372521Sjlemon break; 174472521Sjlemon case EVFILT_WRITE: 174572521Sjlemon kn->kn_fop = &pipe_wfiltops; 1746179243Skib if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) { 1747101382Sdes /* other end of pipe has been closed */ 1748126131Sgreen PIPE_UNLOCK(cpipe); 1749118929Sjmg return (EPIPE); 1750126131Sgreen } 1751232055Skmacy cpipe = PIPE_PEER(cpipe); 175272521Sjlemon break; 175372521Sjlemon default: 1754126131Sgreen PIPE_UNLOCK(cpipe); 1755133741Sjmg return (EINVAL); 175672521Sjlemon } 175778292Sjlemon 1758232055Skmacy kn->kn_hook = cpipe; 1759133741Sjmg knlist_add(&cpipe->pipe_sel.si_note, kn, 1); 176091372Salfred PIPE_UNLOCK(cpipe); 176159288Sjlemon return (0); 176259288Sjlemon} 176359288Sjlemon 176459288Sjlemonstatic void 176559288Sjlemonfilt_pipedetach(struct knote *kn) 176659288Sjlemon{ 1767232055Skmacy struct pipe *cpipe = kn->kn_hook; 176859288Sjlemon 1769126131Sgreen PIPE_LOCK(cpipe); 1770133741Sjmg knlist_remove(&cpipe->pipe_sel.si_note, kn, 1); 177191372Salfred PIPE_UNLOCK(cpipe); 177259288Sjlemon} 177359288Sjlemon 177459288Sjlemon/*ARGSUSED*/ 177559288Sjlemonstatic int 177659288Sjlemonfilt_piperead(struct knote *kn, long hint) 177759288Sjlemon{ 1778232055Skmacy struct pipe *rpipe = kn->kn_hook; 177959288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 1780133741Sjmg int ret; 178159288Sjlemon 178291372Salfred PIPE_LOCK(rpipe); 178359288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 178459288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 178559288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 178659288Sjlemon 178759288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 1788179243Skib wpipe->pipe_present != PIPE_ACTIVE || 1789179243Skib (wpipe->pipe_state & PIPE_EOF)) { 179091372Salfred kn->kn_flags |= EV_EOF; 179191372Salfred PIPE_UNLOCK(rpipe); 179259288Sjlemon return (1); 179359288Sjlemon } 1794133741Sjmg ret = kn->kn_data > 0; 179591372Salfred PIPE_UNLOCK(rpipe); 1796133741Sjmg return ret; 179759288Sjlemon} 179859288Sjlemon 179959288Sjlemon/*ARGSUSED*/ 180059288Sjlemonstatic int 180159288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 180259288Sjlemon{ 1803232055Skmacy struct pipe *wpipe; 1804232055Skmacy 1805232055Skmacy wpipe = kn->kn_hook; 1806232055Skmacy PIPE_LOCK(wpipe); 1807179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1808179243Skib (wpipe->pipe_state & PIPE_EOF)) { 180959288Sjlemon kn->kn_data = 0; 1810124394Sdes kn->kn_flags |= EV_EOF; 1811232055Skmacy PIPE_UNLOCK(wpipe); 181259288Sjlemon return (1); 181359288Sjlemon } 1814228510Sjilles kn->kn_data = (wpipe->pipe_buffer.size > 0) ? 1815228510Sjilles (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) : PIPE_BUF; 181665855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 181759288Sjlemon kn->kn_data = 0; 181859288Sjlemon 1819232055Skmacy PIPE_UNLOCK(wpipe); 182059288Sjlemon return (kn->kn_data >= PIPE_BUF); 182159288Sjlemon} 1822232055Skmacy 1823232055Skmacystatic void 1824232055Skmacyfilt_pipedetach_notsup(struct knote *kn) 1825232055Skmacy{ 1826232055Skmacy 1827232055Skmacy} 1828232055Skmacy 1829232055Skmacystatic int 1830232055Skmacyfilt_pipenotsup(struct knote *kn, long hint) 1831232055Skmacy{ 1832232055Skmacy 1833232055Skmacy return (0); 1834232055Skmacy} 1835