sys_pipe.c revision 238928
1139804Simp/*- 213675Sdyson * Copyright (c) 1996 John S. Dyson 3232055Skmacy * Copyright (c) 2012 Giovanni Trematerra 413675Sdyson * All rights reserved. 513675Sdyson * 613675Sdyson * Redistribution and use in source and binary forms, with or without 713675Sdyson * modification, are permitted provided that the following conditions 813675Sdyson * are met: 913675Sdyson * 1. Redistributions of source code must retain the above copyright 1013675Sdyson * notice immediately at the beginning of the file, without modification, 1113675Sdyson * this list of conditions, and the following disclaimer. 1213675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1313675Sdyson * notice, this list of conditions and the following disclaimer in the 1413675Sdyson * documentation and/or other materials provided with the distribution. 1513675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1613675Sdyson * John S. Dyson. 1714037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1813675Sdyson * are met. 1913675Sdyson */ 2013675Sdyson 2113675Sdyson/* 2213675Sdyson * This file contains a high-performance replacement for the socket-based 2313675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2413675Sdyson * all features of sockets, but does do everything that pipes normally 2513675Sdyson * do. 2613675Sdyson */ 2713675Sdyson 2813907Sdyson/* 2913907Sdyson * This code has two modes of operation, a small write mode and a large 3013907Sdyson * write mode. The small write mode acts like conventional pipes with 3113907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3213907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 33219801Salc * and PIPE_SIZE in size, the sending process pins the underlying pages in 34219801Salc * memory, and the receiving process copies directly from these pinned pages 35219801Salc * in the sending process. 3613907Sdyson * 3713907Sdyson * If the sending process receives a signal, it is possible that it will 3813913Sdyson * go away, and certainly its address space can change, because control 3913907Sdyson * is returned back to the user-mode side. In that case, the pipe code 4013907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4113907Sdyson * kernel buffer, and the receiving process will grab the data from the 4213907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4313907Sdyson * the copy operation is normally eliminated. 4413907Sdyson * 4513907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4613907Sdyson * happen for small transfers so that the system will not spend all of 47118764Ssilby * its time context switching. 48117325Ssilby * 49118764Ssilby * In order to limit the resource use of pipes, two sysctls exist: 50117325Ssilby * 51118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 52133790Ssilby * address space available to us in pipe_map. This value is normally 53133790Ssilby * autotuned, but may also be loader tuned. 54117325Ssilby * 55133790Ssilby * kern.ipc.pipekva - This read-only sysctl tracks the current amount of 56133790Ssilby * memory in use by pipes. 57117325Ssilby * 58133790Ssilby * Based on how large pipekva is relative to maxpipekva, the following 59133790Ssilby * will happen: 60117325Ssilby * 61133790Ssilby * 0% - 50%: 62133790Ssilby * New pipes are given 16K of memory backing, pipes may dynamically 63133790Ssilby * grow to as large as 64K where needed. 64133790Ssilby * 50% - 75%: 65133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 66133790Ssilby * existing pipes may NOT grow. 67133790Ssilby * 75% - 100%: 68133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 69133790Ssilby * existing pipes will be shrunk down to 4K whenever possible. 70133049Ssilby * 71133790Ssilby * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0. If 72133790Ssilby * that is set, the only resize that will occur is the 0 -> SMALL_PIPE_SIZE 73133790Ssilby * resize which MUST occur for reverse-direction pipes when they are 74133790Ssilby * first used. 75133790Ssilby * 76133790Ssilby * Additional information about the current state of pipes may be obtained 77133790Ssilby * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail, 78133790Ssilby * and kern.ipc.piperesizefail. 79133790Ssilby * 80133049Ssilby * Locking rules: There are two locks present here: A mutex, used via 81133049Ssilby * PIPE_LOCK, and a flag, used via pipelock(). All locking is done via 82133049Ssilby * the flag, as mutexes can not persist over uiomove. The mutex 83133049Ssilby * exists only to guard access to the flag, and is not in itself a 84133790Ssilby * locking mechanism. Also note that there is only a single mutex for 85133790Ssilby * both directions of a pipe. 86133049Ssilby * 87133049Ssilby * As pipelock() may have to sleep before it can acquire the flag, it 88133049Ssilby * is important to reread all data after a call to pipelock(); everything 89133049Ssilby * in the structure may have changed. 9013907Sdyson */ 9113907Sdyson 92116182Sobrien#include <sys/cdefs.h> 93116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 238928 2012-07-31 02:00:37Z davidxu $"); 94116182Sobrien 9513675Sdyson#include <sys/param.h> 9613675Sdyson#include <sys/systm.h> 97226042Skib#include <sys/conf.h> 9824131Sbde#include <sys/fcntl.h> 9913675Sdyson#include <sys/file.h> 10013675Sdyson#include <sys/filedesc.h> 10124206Sbde#include <sys/filio.h> 10291372Salfred#include <sys/kernel.h> 10376166Smarkm#include <sys/lock.h> 10476827Salfred#include <sys/mutex.h> 10524206Sbde#include <sys/ttycom.h> 10613675Sdyson#include <sys/stat.h> 10791968Salfred#include <sys/malloc.h> 10829356Speter#include <sys/poll.h> 10970834Swollman#include <sys/selinfo.h> 11013675Sdyson#include <sys/signalvar.h> 111184849Sed#include <sys/syscallsubr.h> 112117325Ssilby#include <sys/sysctl.h> 11313675Sdyson#include <sys/sysproto.h> 11413675Sdyson#include <sys/pipe.h> 11576166Smarkm#include <sys/proc.h> 11655112Sbde#include <sys/vnode.h> 11734924Sbde#include <sys/uio.h> 11859288Sjlemon#include <sys/event.h> 11913675Sdyson 120163606Srwatson#include <security/mac/mac_framework.h> 121163606Srwatson 12213675Sdyson#include <vm/vm.h> 12313675Sdyson#include <vm/vm_param.h> 12413675Sdyson#include <vm/vm_object.h> 12513675Sdyson#include <vm/vm_kern.h> 12613675Sdyson#include <vm/vm_extern.h> 12713675Sdyson#include <vm/pmap.h> 12813675Sdyson#include <vm/vm_map.h> 12913907Sdyson#include <vm/vm_page.h> 13092751Sjeff#include <vm/uma.h> 13113675Sdyson 132234352Sjkim/* XXX */ 133234352Sjkimint do_pipe(struct thread *td, int fildes[2], int flags); 134234352Sjkim 13514037Sdyson/* 13614037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 13714037Sdyson * approx 30% decrease in transfer rate. This could be useful for 13814037Sdyson * NetBSD or OpenBSD. 13914037Sdyson */ 14014037Sdyson/* #define PIPE_NODIRECT */ 14114037Sdyson 142232055Skmacy#define PIPE_PEER(pipe) \ 143232055Skmacy (((pipe)->pipe_state & PIPE_NAMED) ? (pipe) : ((pipe)->pipe_peer)) 144232055Skmacy 14514037Sdyson/* 14614037Sdyson * interfaces to the outside world 14714037Sdyson */ 148108255Sphkstatic fo_rdwr_t pipe_read; 149108255Sphkstatic fo_rdwr_t pipe_write; 150175140Sjhbstatic fo_truncate_t pipe_truncate; 151108255Sphkstatic fo_ioctl_t pipe_ioctl; 152108255Sphkstatic fo_poll_t pipe_poll; 153108255Sphkstatic fo_kqfilter_t pipe_kqfilter; 154108255Sphkstatic fo_stat_t pipe_stat; 155108255Sphkstatic fo_close_t pipe_close; 156232183Sjillesstatic fo_chmod_t pipe_chmod; 157232183Sjillesstatic fo_chown_t pipe_chown; 15813675Sdyson 159232055Skmacystruct fileops pipeops = { 160116546Sphk .fo_read = pipe_read, 161116546Sphk .fo_write = pipe_write, 162175140Sjhb .fo_truncate = pipe_truncate, 163116546Sphk .fo_ioctl = pipe_ioctl, 164116546Sphk .fo_poll = pipe_poll, 165116546Sphk .fo_kqfilter = pipe_kqfilter, 166116546Sphk .fo_stat = pipe_stat, 167116546Sphk .fo_close = pipe_close, 168232183Sjilles .fo_chmod = pipe_chmod, 169232183Sjilles .fo_chown = pipe_chown, 170116546Sphk .fo_flags = DFLAG_PASSABLE 17172521Sjlemon}; 17213675Sdyson 17359288Sjlemonstatic void filt_pipedetach(struct knote *kn); 174232055Skmacystatic void filt_pipedetach_notsup(struct knote *kn); 175232055Skmacystatic int filt_pipenotsup(struct knote *kn, long hint); 17659288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 17759288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 17859288Sjlemon 179232055Skmacystatic struct filterops pipe_nfiltops = { 180232055Skmacy .f_isfd = 1, 181232055Skmacy .f_detach = filt_pipedetach_notsup, 182232055Skmacy .f_event = filt_pipenotsup 183232055Skmacy}; 184197134Srwatsonstatic struct filterops pipe_rfiltops = { 185197134Srwatson .f_isfd = 1, 186197134Srwatson .f_detach = filt_pipedetach, 187197134Srwatson .f_event = filt_piperead 188197134Srwatson}; 189197134Srwatsonstatic struct filterops pipe_wfiltops = { 190197134Srwatson .f_isfd = 1, 191197134Srwatson .f_detach = filt_pipedetach, 192197134Srwatson .f_event = filt_pipewrite 193197134Srwatson}; 19459288Sjlemon 19513675Sdyson/* 19613675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 19713675Sdyson * space is pageable. The pipe code will try to maintain locality of 19813675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 19913675Sdyson * will not wipe the cache. 20013675Sdyson */ 20113907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 20213907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 20313675Sdyson 204189649Sjhbstatic long amountpipekva; 205133790Ssilbystatic int pipefragretry; 206133790Ssilbystatic int pipeallocfail; 207133790Ssilbystatic int piperesizefail; 208133790Ssilbystatic int piperesizeallowed = 1; 20913907Sdyson 210189649SjhbSYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN, 211117325Ssilby &maxpipekva, 0, "Pipe KVA limit"); 212189649SjhbSYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 213117325Ssilby &amountpipekva, 0, "Pipe KVA usage"); 214133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD, 215133790Ssilby &pipefragretry, 0, "Pipe allocation retries due to fragmentation"); 216133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD, 217133790Ssilby &pipeallocfail, 0, "Pipe allocation failures"); 218133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD, 219133790Ssilby &piperesizefail, 0, "Pipe resize failures"); 220133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, 221133790Ssilby &piperesizeallowed, 0, "Pipe resizing allowed"); 222117325Ssilby 22391413Salfredstatic void pipeinit(void *dummy __unused); 22491413Salfredstatic void pipeclose(struct pipe *cpipe); 22591413Salfredstatic void pipe_free_kmem(struct pipe *cpipe); 226133790Ssilbystatic int pipe_create(struct pipe *pipe, int backing); 227232055Skmacystatic int pipe_paircreate(struct thread *td, struct pipepair **p_pp); 22891413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch); 22991413Salfredstatic __inline void pipeunlock(struct pipe *cpipe); 23014037Sdyson#ifndef PIPE_NODIRECT 23191413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 23291413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 23391413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 23491413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe); 23514037Sdyson#endif 23691413Salfredstatic int pipespace(struct pipe *cpipe, int size); 237132579Srwatsonstatic int pipespace_new(struct pipe *cpipe, int size); 23813675Sdyson 239132987Sgreenstatic int pipe_zone_ctor(void *mem, int size, void *arg, int flags); 240132987Sgreenstatic int pipe_zone_init(void *mem, int size, int flags); 241125293Srwatsonstatic void pipe_zone_fini(void *mem, int size); 242125293Srwatson 24392751Sjeffstatic uma_zone_t pipe_zone; 244226042Skibstatic struct unrhdr *pipeino_unr; 245226042Skibstatic dev_t pipedev_ino; 24627899Sdyson 24791372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 24891372Salfred 24991372Salfredstatic void 25091372Salfredpipeinit(void *dummy __unused) 25191372Salfred{ 252118880Salc 253170022Srwatson pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair), 254170022Srwatson pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, 255125293Srwatson UMA_ALIGN_PTR, 0); 256118880Salc KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 257226042Skib pipeino_unr = new_unrhdr(1, INT32_MAX, NULL); 258226042Skib KASSERT(pipeino_unr != NULL, ("pipe fake inodes not initialized")); 259226042Skib pipedev_ino = devfs_alloc_cdp_inode(); 260226042Skib KASSERT(pipedev_ino > 0, ("pipe dev inode not initialized")); 26191372Salfred} 26291372Salfred 263132987Sgreenstatic int 264132987Sgreenpipe_zone_ctor(void *mem, int size, void *arg, int flags) 265125293Srwatson{ 266125293Srwatson struct pipepair *pp; 267125293Srwatson struct pipe *rpipe, *wpipe; 268125293Srwatson 269125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); 270125293Srwatson 271125293Srwatson pp = (struct pipepair *)mem; 272125293Srwatson 273125293Srwatson /* 274125293Srwatson * We zero both pipe endpoints to make sure all the kmem pointers 275125293Srwatson * are NULL, flag fields are zero'd, etc. We timestamp both 276125293Srwatson * endpoints with the same time. 277125293Srwatson */ 278125293Srwatson rpipe = &pp->pp_rpipe; 279125293Srwatson bzero(rpipe, sizeof(*rpipe)); 280125293Srwatson vfs_timestamp(&rpipe->pipe_ctime); 281125293Srwatson rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; 282125293Srwatson 283125293Srwatson wpipe = &pp->pp_wpipe; 284125293Srwatson bzero(wpipe, sizeof(*wpipe)); 285125293Srwatson wpipe->pipe_ctime = rpipe->pipe_ctime; 286125293Srwatson wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; 287125293Srwatson 288125293Srwatson rpipe->pipe_peer = wpipe; 289125293Srwatson rpipe->pipe_pair = pp; 290125293Srwatson wpipe->pipe_peer = rpipe; 291125293Srwatson wpipe->pipe_pair = pp; 292125293Srwatson 293125293Srwatson /* 294125293Srwatson * Mark both endpoints as present; they will later get free'd 295125293Srwatson * one at a time. When both are free'd, then the whole pair 296125293Srwatson * is released. 297125293Srwatson */ 298179243Skib rpipe->pipe_present = PIPE_ACTIVE; 299179243Skib wpipe->pipe_present = PIPE_ACTIVE; 300125293Srwatson 301125293Srwatson /* 302125293Srwatson * Eventually, the MAC Framework may initialize the label 303125293Srwatson * in ctor or init, but for now we do it elswhere to avoid 304125293Srwatson * blocking in ctor or init. 305125293Srwatson */ 306125293Srwatson pp->pp_label = NULL; 307125293Srwatson 308132987Sgreen return (0); 309125293Srwatson} 310125293Srwatson 311132987Sgreenstatic int 312132987Sgreenpipe_zone_init(void *mem, int size, int flags) 313125293Srwatson{ 314125293Srwatson struct pipepair *pp; 315125293Srwatson 316125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); 317125293Srwatson 318125293Srwatson pp = (struct pipepair *)mem; 319125293Srwatson 320125293Srwatson mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 321132987Sgreen return (0); 322125293Srwatson} 323125293Srwatson 324125293Srwatsonstatic void 325125293Srwatsonpipe_zone_fini(void *mem, int size) 326125293Srwatson{ 327125293Srwatson struct pipepair *pp; 328125293Srwatson 329125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); 330125293Srwatson 331125293Srwatson pp = (struct pipepair *)mem; 332125293Srwatson 333125293Srwatson mtx_destroy(&pp->pp_mtx); 334125293Srwatson} 335125293Srwatson 336232055Skmacystatic int 337232055Skmacypipe_paircreate(struct thread *td, struct pipepair **p_pp) 33813675Sdyson{ 339125293Srwatson struct pipepair *pp; 34013675Sdyson struct pipe *rpipe, *wpipe; 341232055Skmacy int error; 34227899Sdyson 343232055Skmacy *p_pp = pp = uma_zalloc(pipe_zone, M_WAITOK); 344125293Srwatson#ifdef MAC 345125293Srwatson /* 346126249Srwatson * The MAC label is shared between the connected endpoints. As a 347172930Srwatson * result mac_pipe_init() and mac_pipe_create() are called once 348126249Srwatson * for the pair, and not on the endpoints. 349125293Srwatson */ 350172930Srwatson mac_pipe_init(pp); 351172930Srwatson mac_pipe_create(td->td_ucred, pp); 352125293Srwatson#endif 353125293Srwatson rpipe = &pp->pp_rpipe; 354125293Srwatson wpipe = &pp->pp_wpipe; 355125293Srwatson 356193951Skib knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe)); 357193951Skib knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe)); 358140369Ssilby 359133790Ssilby /* Only the forward direction pipe is backed by default */ 360155035Sglebius if ((error = pipe_create(rpipe, 1)) != 0 || 361155035Sglebius (error = pipe_create(wpipe, 0)) != 0) { 362124394Sdes pipeclose(rpipe); 363124394Sdes pipeclose(wpipe); 364155035Sglebius return (error); 36576364Salfred } 366124394Sdes 36713907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 36813907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 369232055Skmacy return (0); 370232055Skmacy} 37113675Sdyson 372232055Skmacyint 373232055Skmacypipe_named_ctor(struct pipe **ppipe, struct thread *td) 374232055Skmacy{ 375232055Skmacy struct pipepair *pp; 376232055Skmacy int error; 377232055Skmacy 378232055Skmacy error = pipe_paircreate(td, &pp); 379232055Skmacy if (error != 0) 380232055Skmacy return (error); 381232055Skmacy pp->pp_rpipe.pipe_state |= PIPE_NAMED; 382232055Skmacy *ppipe = &pp->pp_rpipe; 383232055Skmacy return (0); 384232055Skmacy} 385232055Skmacy 386232055Skmacyvoid 387232055Skmacypipe_dtor(struct pipe *dpipe) 388232055Skmacy{ 389232055Skmacy ino_t ino; 390232055Skmacy 391232055Skmacy ino = dpipe->pipe_ino; 392232055Skmacy funsetown(&dpipe->pipe_sigio); 393232055Skmacy pipeclose(dpipe); 394232055Skmacy if (dpipe->pipe_state & PIPE_NAMED) { 395232055Skmacy dpipe = dpipe->pipe_peer; 396232055Skmacy funsetown(&dpipe->pipe_sigio); 397232055Skmacy pipeclose(dpipe); 398232055Skmacy } 399232055Skmacy if (ino != 0 && ino != (ino_t)-1) 400232055Skmacy free_unr(pipeino_unr, ino); 401232055Skmacy} 402232055Skmacy 403232055Skmacy/* 404232055Skmacy * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let 405232055Skmacy * the zone pick up the pieces via pipeclose(). 406232055Skmacy */ 407232055Skmacyint 408232055Skmacykern_pipe(struct thread *td, int fildes[2]) 409232055Skmacy{ 410234352Sjkim 411234352Sjkim return (do_pipe(td, fildes, 0)); 412234352Sjkim} 413234352Sjkim 414234352Sjkimint 415234352Sjkimdo_pipe(struct thread *td, int fildes[2], int flags) 416234352Sjkim{ 417232055Skmacy struct filedesc *fdp; 418232055Skmacy struct file *rf, *wf; 419232055Skmacy struct pipe *rpipe, *wpipe; 420232055Skmacy struct pipepair *pp; 421234352Sjkim int fd, fflags, error; 422232055Skmacy 423232055Skmacy fdp = td->td_proc->p_fd; 424232055Skmacy error = pipe_paircreate(td, &pp); 425232055Skmacy if (error != 0) 426232055Skmacy return (error); 427232055Skmacy rpipe = &pp->pp_rpipe; 428232055Skmacy wpipe = &pp->pp_wpipe; 429234352Sjkim error = falloc(td, &rf, &fd, flags); 43070915Sdwmalone if (error) { 43170915Sdwmalone pipeclose(rpipe); 43270915Sdwmalone pipeclose(wpipe); 43370915Sdwmalone return (error); 43470915Sdwmalone } 435121256Sdwmalone /* An extra reference on `rf' has been held for us by falloc(). */ 436184849Sed fildes[0] = fd; 43770915Sdwmalone 438234352Sjkim fflags = FREAD | FWRITE; 439234352Sjkim if ((flags & O_NONBLOCK) != 0) 440234352Sjkim fflags |= FNONBLOCK; 441234352Sjkim 44270803Sdwmalone /* 44370803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 44470803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 44570803Sdwmalone * to avoid races against processes which manage to dup() the read 44670803Sdwmalone * side while we are blocked trying to allocate the write side. 44770803Sdwmalone */ 448234352Sjkim finit(rf, fflags, DTYPE_PIPE, rpipe, &pipeops); 449234352Sjkim error = falloc(td, &wf, &fd, flags); 45070915Sdwmalone if (error) { 451184849Sed fdclose(fdp, rf, fildes[0], td); 45283366Sjulian fdrop(rf, td); 45370915Sdwmalone /* rpipe has been closed by fdrop(). */ 45470915Sdwmalone pipeclose(wpipe); 45570915Sdwmalone return (error); 45670915Sdwmalone } 457121256Sdwmalone /* An extra reference on `wf' has been held for us by falloc(). */ 458234352Sjkim finit(wf, fflags, DTYPE_PIPE, wpipe, &pipeops); 459121256Sdwmalone fdrop(wf, td); 460184849Sed fildes[1] = fd; 46183366Sjulian fdrop(rf, td); 46213675Sdyson 46313675Sdyson return (0); 46413675Sdyson} 46513675Sdyson 466184849Sed/* ARGSUSED */ 467184849Sedint 468225617Skmacysys_pipe(struct thread *td, struct pipe_args *uap) 469184849Sed{ 470184849Sed int error; 471184849Sed int fildes[2]; 472184849Sed 473184849Sed error = kern_pipe(td, fildes); 474184849Sed if (error) 475184849Sed return (error); 476184849Sed 477184849Sed td->td_retval[0] = fildes[0]; 478184849Sed td->td_retval[1] = fildes[1]; 479184849Sed 480184849Sed return (0); 481184849Sed} 482184849Sed 48313909Sdyson/* 48413909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 48576364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 48676364Salfred * it will retain the old buffer. 48776364Salfred * If it fails it will return ENOMEM. 48813909Sdyson */ 48976364Salfredstatic int 490132579Srwatsonpipespace_new(cpipe, size) 49113675Sdyson struct pipe *cpipe; 49276364Salfred int size; 49313675Sdyson{ 49476364Salfred caddr_t buffer; 495133790Ssilby int error, cnt, firstseg; 496117325Ssilby static int curfail = 0; 497117325Ssilby static struct timeval lastfail; 49813675Sdyson 499125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); 500133790Ssilby KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW), 501133790Ssilby ("pipespace: resize of direct writes not allowed")); 502133790Ssilbyretry: 503133790Ssilby cnt = cpipe->pipe_buffer.cnt; 504133790Ssilby if (cnt > size) 505133790Ssilby size = cnt; 50679224Sdillon 507118764Ssilby size = round_page(size); 508118764Ssilby buffer = (caddr_t) vm_map_min(pipe_map); 50913675Sdyson 510122163Salc error = vm_map_find(pipe_map, NULL, 0, 51176364Salfred (vm_offset_t *) &buffer, size, 1, 51213688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 51376364Salfred if (error != KERN_SUCCESS) { 514133790Ssilby if ((cpipe->pipe_buffer.buffer == NULL) && 515133790Ssilby (size > SMALL_PIPE_SIZE)) { 516133790Ssilby size = SMALL_PIPE_SIZE; 517133790Ssilby pipefragretry++; 518133790Ssilby goto retry; 519133790Ssilby } 520133790Ssilby if (cpipe->pipe_buffer.buffer == NULL) { 521133790Ssilby pipeallocfail++; 522133790Ssilby if (ppsratecheck(&lastfail, &curfail, 1)) 523133790Ssilby printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); 524133790Ssilby } else { 525133790Ssilby piperesizefail++; 526133790Ssilby } 52776364Salfred return (ENOMEM); 52876364Salfred } 52976364Salfred 530133790Ssilby /* copy data, then free old resources if we're resizing */ 531133790Ssilby if (cnt > 0) { 532133790Ssilby if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) { 533133790Ssilby firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out; 534133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 535133790Ssilby buffer, firstseg); 536133790Ssilby if ((cnt - firstseg) > 0) 537133790Ssilby bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg], 538133790Ssilby cpipe->pipe_buffer.in); 539133790Ssilby } else { 540133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 541133790Ssilby buffer, cnt); 542133790Ssilby } 543133790Ssilby } 54476364Salfred pipe_free_kmem(cpipe); 54576364Salfred cpipe->pipe_buffer.buffer = buffer; 54676364Salfred cpipe->pipe_buffer.size = size; 547133790Ssilby cpipe->pipe_buffer.in = cnt; 54876364Salfred cpipe->pipe_buffer.out = 0; 549133790Ssilby cpipe->pipe_buffer.cnt = cnt; 550189649Sjhb atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size); 55176364Salfred return (0); 55213907Sdyson} 55313688Sdyson 55413907Sdyson/* 555132579Srwatson * Wrapper for pipespace_new() that performs locking assertions. 556132579Srwatson */ 557132579Srwatsonstatic int 558132579Srwatsonpipespace(cpipe, size) 559132579Srwatson struct pipe *cpipe; 560132579Srwatson int size; 561132579Srwatson{ 562132579Srwatson 563133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 564133049Ssilby ("Unlocked pipe passed to pipespace")); 565132579Srwatson return (pipespace_new(cpipe, size)); 566132579Srwatson} 567132579Srwatson 568132579Srwatson/* 56913675Sdyson * lock a pipe for I/O, blocking other access 57013675Sdyson */ 57113675Sdysonstatic __inline int 57213907Sdysonpipelock(cpipe, catch) 57313675Sdyson struct pipe *cpipe; 57413907Sdyson int catch; 57513675Sdyson{ 57613776Sdyson int error; 57776364Salfred 57891362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 57991362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 58013675Sdyson cpipe->pipe_state |= PIPE_LWANT; 58191362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 58291362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 58376760Salfred "pipelk", 0); 584124394Sdes if (error != 0) 58576760Salfred return (error); 58613675Sdyson } 58791362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 58876760Salfred return (0); 58913675Sdyson} 59013675Sdyson 59113675Sdyson/* 59213675Sdyson * unlock a pipe I/O lock 59313675Sdyson */ 59413675Sdysonstatic __inline void 59513675Sdysonpipeunlock(cpipe) 59613675Sdyson struct pipe *cpipe; 59713675Sdyson{ 59876364Salfred 59991362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 600133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 601133049Ssilby ("Unlocked pipe passed to pipeunlock")); 60291362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 60313675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 60413675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 60514177Sdyson wakeup(cpipe); 60613675Sdyson } 60713675Sdyson} 60813675Sdyson 609238928Sdavidxuvoid 61014037Sdysonpipeselwakeup(cpipe) 61114037Sdyson struct pipe *cpipe; 61214037Sdyson{ 61376364Salfred 614126252Srwatson PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 61514037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 616122352Stanimura selwakeuppri(&cpipe->pipe_sel, PSOCK); 617174647Sjeff if (!SEL_WAITING(&cpipe->pipe_sel)) 618174647Sjeff cpipe->pipe_state &= ~PIPE_SEL; 61914037Sdyson } 62041086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 62195883Salfred pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 622133741Sjmg KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0); 62314037Sdyson} 62414037Sdyson 625126131Sgreen/* 626126131Sgreen * Initialize and allocate VM and memory for pipe. The structure 627126131Sgreen * will start out zero'd from the ctor, so we just manage the kmem. 628126131Sgreen */ 629126131Sgreenstatic int 630133790Ssilbypipe_create(pipe, backing) 631126131Sgreen struct pipe *pipe; 632133790Ssilby int backing; 633126131Sgreen{ 634126131Sgreen int error; 635126131Sgreen 636133790Ssilby if (backing) { 637133790Ssilby if (amountpipekva > maxpipekva / 2) 638133790Ssilby error = pipespace_new(pipe, SMALL_PIPE_SIZE); 639133790Ssilby else 640133790Ssilby error = pipespace_new(pipe, PIPE_SIZE); 641133790Ssilby } else { 642133790Ssilby /* If we're not backing this pipe, no need to do anything. */ 643133790Ssilby error = 0; 644133790Ssilby } 645228306Skib pipe->pipe_ino = -1; 646132579Srwatson return (error); 647126131Sgreen} 648126131Sgreen 64913675Sdyson/* ARGSUSED */ 65013675Sdysonstatic int 651101941Srwatsonpipe_read(fp, uio, active_cred, flags, td) 65213675Sdyson struct file *fp; 65313675Sdyson struct uio *uio; 654101941Srwatson struct ucred *active_cred; 65583366Sjulian struct thread *td; 65645311Sdt int flags; 65713675Sdyson{ 658232055Skmacy struct pipe *rpipe; 65947748Salc int error; 66013675Sdyson int nread = 0; 661232495Skib int size; 66213675Sdyson 663232055Skmacy rpipe = fp->f_data; 66491362Salfred PIPE_LOCK(rpipe); 66513675Sdyson ++rpipe->pipe_busy; 66647748Salc error = pipelock(rpipe, 1); 66747748Salc if (error) 66847748Salc goto unlocked_error; 66947748Salc 670101768Srwatson#ifdef MAC 671172930Srwatson error = mac_pipe_check_read(active_cred, rpipe->pipe_pair); 672101768Srwatson if (error) 673101768Srwatson goto locked_error; 674101768Srwatson#endif 675133790Ssilby if (amountpipekva > (3 * maxpipekva) / 4) { 676133790Ssilby if (!(rpipe->pipe_state & PIPE_DIRECTW) && 677133790Ssilby (rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 678133790Ssilby (rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 679133790Ssilby (piperesizeallowed == 1)) { 680133790Ssilby PIPE_UNLOCK(rpipe); 681133790Ssilby pipespace(rpipe, SMALL_PIPE_SIZE); 682133790Ssilby PIPE_LOCK(rpipe); 683133790Ssilby } 684133790Ssilby } 685101768Srwatson 68613675Sdyson while (uio->uio_resid) { 68713907Sdyson /* 68813907Sdyson * normal pipe buffer receive 68913907Sdyson */ 69013675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 69118863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 69213675Sdyson if (size > rpipe->pipe_buffer.cnt) 69313675Sdyson size = rpipe->pipe_buffer.cnt; 694231949Skib if (size > uio->uio_resid) 695232495Skib size = uio->uio_resid; 69647748Salc 69791362Salfred PIPE_UNLOCK(rpipe); 698116127Smux error = uiomove( 699116127Smux &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 700116127Smux size, uio); 70191362Salfred PIPE_LOCK(rpipe); 70276760Salfred if (error) 70313675Sdyson break; 70476760Salfred 70513675Sdyson rpipe->pipe_buffer.out += size; 70613675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 70713675Sdyson rpipe->pipe_buffer.out = 0; 70813675Sdyson 70913675Sdyson rpipe->pipe_buffer.cnt -= size; 71047748Salc 71147748Salc /* 71247748Salc * If there is no more to read in the pipe, reset 71347748Salc * its pointers to the beginning. This improves 71447748Salc * cache hit stats. 71547748Salc */ 71647748Salc if (rpipe->pipe_buffer.cnt == 0) { 71747748Salc rpipe->pipe_buffer.in = 0; 71847748Salc rpipe->pipe_buffer.out = 0; 71947748Salc } 72013675Sdyson nread += size; 72114037Sdyson#ifndef PIPE_NODIRECT 72213907Sdyson /* 72313907Sdyson * Direct copy, bypassing a kernel buffer. 72413907Sdyson */ 72513907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 72647748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 727231949Skib if (size > uio->uio_resid) 72818863Sdyson size = (u_int) uio->uio_resid; 72947748Salc 73091362Salfred PIPE_UNLOCK(rpipe); 731127501Salc error = uiomove_fromphys(rpipe->pipe_map.ms, 732127501Salc rpipe->pipe_map.pos, size, uio); 73391362Salfred PIPE_LOCK(rpipe); 73413907Sdyson if (error) 73513907Sdyson break; 73613907Sdyson nread += size; 73713907Sdyson rpipe->pipe_map.pos += size; 73813907Sdyson rpipe->pipe_map.cnt -= size; 73913907Sdyson if (rpipe->pipe_map.cnt == 0) { 740238928Sdavidxu rpipe->pipe_state &= ~(PIPE_DIRECTW|PIPE_WANTW); 74113907Sdyson wakeup(rpipe); 74213907Sdyson } 74314037Sdyson#endif 74413675Sdyson } else { 74513675Sdyson /* 74613675Sdyson * detect EOF condition 74776760Salfred * read returns 0 on EOF, no need to set error 74813675Sdyson */ 74976760Salfred if (rpipe->pipe_state & PIPE_EOF) 75013675Sdyson break; 75143623Sdillon 75213675Sdyson /* 75313675Sdyson * If the "write-side" has been blocked, wake it up now. 75413675Sdyson */ 75513675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 75613675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 75713675Sdyson wakeup(rpipe); 75813675Sdyson } 75943623Sdillon 76043623Sdillon /* 76147748Salc * Break if some data was read. 76243623Sdillon */ 76347748Salc if (nread > 0) 76413675Sdyson break; 76516960Sdyson 76643623Sdillon /* 767124394Sdes * Unlock the pipe buffer for our remaining processing. 768116127Smux * We will either break out with an error or we will 769116127Smux * sleep and relock to loop. 77043623Sdillon */ 77147748Salc pipeunlock(rpipe); 77243623Sdillon 77313675Sdyson /* 77447748Salc * Handle non-blocking mode operation or 77547748Salc * wait for more data. 77613675Sdyson */ 77776760Salfred if (fp->f_flag & FNONBLOCK) { 77847748Salc error = EAGAIN; 77976760Salfred } else { 78047748Salc rpipe->pipe_state |= PIPE_WANTR; 78191362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 78291362Salfred PRIBIO | PCATCH, 78377140Salfred "piperd", 0)) == 0) 78447748Salc error = pipelock(rpipe, 1); 78513675Sdyson } 78647748Salc if (error) 78747748Salc goto unlocked_error; 78813675Sdyson } 78913675Sdyson } 790101768Srwatson#ifdef MAC 791101768Srwatsonlocked_error: 792101768Srwatson#endif 79347748Salc pipeunlock(rpipe); 79413675Sdyson 79591362Salfred /* XXX: should probably do this before getting any locks. */ 79624101Sbde if (error == 0) 79755112Sbde vfs_timestamp(&rpipe->pipe_atime); 79847748Salcunlocked_error: 79947748Salc --rpipe->pipe_busy; 80013913Sdyson 80147748Salc /* 80247748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 80347748Salc */ 80413675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 80513675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 80613675Sdyson wakeup(rpipe); 80713675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 80813675Sdyson /* 80947748Salc * Handle write blocking hysteresis. 81013675Sdyson */ 81113675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 81213675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 81313675Sdyson wakeup(rpipe); 81413675Sdyson } 81513675Sdyson } 81614037Sdyson 81714802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 81814037Sdyson pipeselwakeup(rpipe); 81914037Sdyson 82091362Salfred PIPE_UNLOCK(rpipe); 82176760Salfred return (error); 82213675Sdyson} 82313675Sdyson 82414037Sdyson#ifndef PIPE_NODIRECT 82513907Sdyson/* 82613907Sdyson * Map the sending processes' buffer into kernel space and wire it. 82713907Sdyson * This is similar to a physical write operation. 82813907Sdyson */ 82913675Sdysonstatic int 83013907Sdysonpipe_build_write_buffer(wpipe, uio) 83113907Sdyson struct pipe *wpipe; 83213675Sdyson struct uio *uio; 83313675Sdyson{ 83418863Sdyson u_int size; 835216511Salc int i; 83613907Sdyson 83791412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 838133790Ssilby KASSERT(wpipe->pipe_state & PIPE_DIRECTW, 839133790Ssilby ("Clone attempt on non-direct write pipe!")); 84079224Sdillon 841231949Skib if (uio->uio_iov->iov_len > wpipe->pipe_buffer.size) 842231949Skib size = wpipe->pipe_buffer.size; 843231949Skib else 844231949Skib size = uio->uio_iov->iov_len; 84513907Sdyson 846216699Salc if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 847216699Salc (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ, 848216699Salc wpipe->pipe_map.ms, PIPENPAGES)) < 0) 849193893Scperciva return (EFAULT); 85013907Sdyson 85113907Sdyson/* 85213907Sdyson * set up the control block 85313907Sdyson */ 85413907Sdyson wpipe->pipe_map.npages = i; 85576760Salfred wpipe->pipe_map.pos = 85676760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 85713907Sdyson wpipe->pipe_map.cnt = size; 85813907Sdyson 85913907Sdyson/* 86013907Sdyson * and update the uio data 86113907Sdyson */ 86213907Sdyson 86313907Sdyson uio->uio_iov->iov_len -= size; 864104908Smike uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 86513907Sdyson if (uio->uio_iov->iov_len == 0) 86613907Sdyson uio->uio_iov++; 86713907Sdyson uio->uio_resid -= size; 86813907Sdyson uio->uio_offset += size; 86976760Salfred return (0); 87013907Sdyson} 87113907Sdyson 87213907Sdyson/* 87313907Sdyson * unmap and unwire the process buffer 87413907Sdyson */ 87513907Sdysonstatic void 87613907Sdysonpipe_destroy_write_buffer(wpipe) 87776760Salfred struct pipe *wpipe; 87813907Sdyson{ 87976364Salfred 880127501Salc PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 881216511Salc vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages); 88291653Stanimura wpipe->pipe_map.npages = 0; 88313907Sdyson} 88413907Sdyson 88513907Sdyson/* 88613907Sdyson * In the case of a signal, the writing process might go away. This 88713907Sdyson * code copies the data into the circular buffer so that the source 88813907Sdyson * pages can be freed without loss of data. 88913907Sdyson */ 89013907Sdysonstatic void 89113907Sdysonpipe_clone_write_buffer(wpipe) 89276364Salfred struct pipe *wpipe; 89313907Sdyson{ 894127501Salc struct uio uio; 895127501Salc struct iovec iov; 89613907Sdyson int size; 89713907Sdyson int pos; 89813907Sdyson 89991362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 90013907Sdyson size = wpipe->pipe_map.cnt; 90113907Sdyson pos = wpipe->pipe_map.pos; 90213907Sdyson 90313907Sdyson wpipe->pipe_buffer.in = size; 90413907Sdyson wpipe->pipe_buffer.out = 0; 90513907Sdyson wpipe->pipe_buffer.cnt = size; 90613907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 90713907Sdyson 908119811Salc PIPE_UNLOCK(wpipe); 909127501Salc iov.iov_base = wpipe->pipe_buffer.buffer; 910127501Salc iov.iov_len = size; 911127501Salc uio.uio_iov = &iov; 912127501Salc uio.uio_iovcnt = 1; 913127501Salc uio.uio_offset = 0; 914127501Salc uio.uio_resid = size; 915127501Salc uio.uio_segflg = UIO_SYSSPACE; 916127501Salc uio.uio_rw = UIO_READ; 917127501Salc uio.uio_td = curthread; 918127501Salc uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio); 919127501Salc PIPE_LOCK(wpipe); 92013907Sdyson pipe_destroy_write_buffer(wpipe); 92113907Sdyson} 92213907Sdyson 92313907Sdyson/* 92413907Sdyson * This implements the pipe buffer write mechanism. Note that only 92513907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 92613907Sdyson * If there are any characters in the pipe buffer, the direct write will 92713907Sdyson * be deferred until the receiving process grabs all of the bytes from 92813907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 92913907Sdyson */ 93013907Sdysonstatic int 93113907Sdysonpipe_direct_write(wpipe, uio) 93213907Sdyson struct pipe *wpipe; 93313907Sdyson struct uio *uio; 93413907Sdyson{ 93513907Sdyson int error; 93676364Salfred 93713951Sdysonretry: 93891362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 939133049Ssilby error = pipelock(wpipe, 1); 940133049Ssilby if (wpipe->pipe_state & PIPE_EOF) 941133049Ssilby error = EPIPE; 942133049Ssilby if (error) { 943133049Ssilby pipeunlock(wpipe); 944133049Ssilby goto error1; 945133049Ssilby } 94613907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 94776760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 94813951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 94913951Sdyson wakeup(wpipe); 95013951Sdyson } 951173750Sdumbbell pipeselwakeup(wpipe); 95213992Sdyson wpipe->pipe_state |= PIPE_WANTW; 953133049Ssilby pipeunlock(wpipe); 95491362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 95591362Salfred PRIBIO | PCATCH, "pipdww", 0); 95614802Sdyson if (error) 95713907Sdyson goto error1; 958133049Ssilby else 959133049Ssilby goto retry; 96013907Sdyson } 96113907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 96213951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 96376760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 96413951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 96513951Sdyson wakeup(wpipe); 96613951Sdyson } 967173750Sdumbbell pipeselwakeup(wpipe); 96813992Sdyson wpipe->pipe_state |= PIPE_WANTW; 969133049Ssilby pipeunlock(wpipe); 97091362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 97191362Salfred PRIBIO | PCATCH, "pipdwc", 0); 97214802Sdyson if (error) 97313907Sdyson goto error1; 974133049Ssilby else 975133049Ssilby goto retry; 97613907Sdyson } 97713907Sdyson 97813951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 97913951Sdyson 980119872Salc PIPE_UNLOCK(wpipe); 98113907Sdyson error = pipe_build_write_buffer(wpipe, uio); 982119872Salc PIPE_LOCK(wpipe); 98313907Sdyson if (error) { 98413907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 985133049Ssilby pipeunlock(wpipe); 98613907Sdyson goto error1; 98713907Sdyson } 98813907Sdyson 98913907Sdyson error = 0; 99013907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 99113907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 99213907Sdyson pipe_destroy_write_buffer(wpipe); 993112981Shsu pipeselwakeup(wpipe); 99413907Sdyson pipeunlock(wpipe); 99514802Sdyson error = EPIPE; 99614802Sdyson goto error1; 99713907Sdyson } 99813992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 99913992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 100013992Sdyson wakeup(wpipe); 100113992Sdyson } 100214037Sdyson pipeselwakeup(wpipe); 1003238928Sdavidxu wpipe->pipe_state |= PIPE_WANTW; 1004133049Ssilby pipeunlock(wpipe); 100591362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 100691362Salfred "pipdwt", 0); 1007133049Ssilby pipelock(wpipe, 0); 100813907Sdyson } 100913907Sdyson 1010126131Sgreen if (wpipe->pipe_state & PIPE_EOF) 1011126131Sgreen error = EPIPE; 101213907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 101313907Sdyson /* 101413907Sdyson * this bit of trickery substitutes a kernel buffer for 101513907Sdyson * the process that might be going away. 101613907Sdyson */ 101713907Sdyson pipe_clone_write_buffer(wpipe); 101813907Sdyson } else { 101913907Sdyson pipe_destroy_write_buffer(wpipe); 102013907Sdyson } 102113907Sdyson pipeunlock(wpipe); 102276760Salfred return (error); 102313907Sdyson 102413907Sdysonerror1: 102513907Sdyson wakeup(wpipe); 102676760Salfred return (error); 102713907Sdyson} 102814037Sdyson#endif 1029124394Sdes 103016960Sdysonstatic int 1031101941Srwatsonpipe_write(fp, uio, active_cred, flags, td) 103216960Sdyson struct file *fp; 103313907Sdyson struct uio *uio; 1034101941Srwatson struct ucred *active_cred; 103583366Sjulian struct thread *td; 103645311Sdt int flags; 103713907Sdyson{ 1038232495Skib int error = 0; 1039232495Skib int desiredsize; 1040232495Skib ssize_t orig_resid; 104116960Sdyson struct pipe *wpipe, *rpipe; 104216960Sdyson 1043109153Sdillon rpipe = fp->f_data; 1044232055Skmacy wpipe = PIPE_PEER(rpipe); 104591395Salfred PIPE_LOCK(rpipe); 1046133049Ssilby error = pipelock(wpipe, 1); 1047133049Ssilby if (error) { 1048133049Ssilby PIPE_UNLOCK(rpipe); 1049133049Ssilby return (error); 1050133049Ssilby } 105113675Sdyson /* 105213675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 105313675Sdyson */ 1054179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1055179243Skib (wpipe->pipe_state & PIPE_EOF)) { 1056133049Ssilby pipeunlock(wpipe); 105791395Salfred PIPE_UNLOCK(rpipe); 105876760Salfred return (EPIPE); 105913675Sdyson } 1060101768Srwatson#ifdef MAC 1061172930Srwatson error = mac_pipe_check_write(active_cred, wpipe->pipe_pair); 1062101768Srwatson if (error) { 1063133049Ssilby pipeunlock(wpipe); 1064101768Srwatson PIPE_UNLOCK(rpipe); 1065101768Srwatson return (error); 1066101768Srwatson } 1067101768Srwatson#endif 106877676Sdillon ++wpipe->pipe_busy; 106913675Sdyson 1070133790Ssilby /* Choose a larger size if it's advantageous */ 1071133790Ssilby desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size); 1072133790Ssilby while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) { 1073133790Ssilby if (piperesizeallowed != 1) 1074133790Ssilby break; 1075133790Ssilby if (amountpipekva > maxpipekva / 2) 1076133790Ssilby break; 1077133790Ssilby if (desiredsize == BIG_PIPE_SIZE) 1078133790Ssilby break; 1079133790Ssilby desiredsize = desiredsize * 2; 1080133790Ssilby } 108117163Sdyson 1082133790Ssilby /* Choose a smaller size if we're in a OOM situation */ 1083133790Ssilby if ((amountpipekva > (3 * maxpipekva) / 4) && 1084133790Ssilby (wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 1085133790Ssilby (wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 1086133790Ssilby (piperesizeallowed == 1)) 1087133790Ssilby desiredsize = SMALL_PIPE_SIZE; 1088133790Ssilby 1089133790Ssilby /* Resize if the above determined that a new size was necessary */ 1090133790Ssilby if ((desiredsize != wpipe->pipe_buffer.size) && 1091133790Ssilby ((wpipe->pipe_state & PIPE_DIRECTW) == 0)) { 1092133049Ssilby PIPE_UNLOCK(wpipe); 1093133790Ssilby pipespace(wpipe, desiredsize); 1094133049Ssilby PIPE_LOCK(wpipe); 109513907Sdyson } 1096133790Ssilby if (wpipe->pipe_buffer.size == 0) { 1097133790Ssilby /* 1098133790Ssilby * This can only happen for reverse direction use of pipes 1099133790Ssilby * in a complete OOM situation. 1100133790Ssilby */ 1101133790Ssilby error = ENOMEM; 1102133790Ssilby --wpipe->pipe_busy; 1103133790Ssilby pipeunlock(wpipe); 1104133790Ssilby PIPE_UNLOCK(wpipe); 1105133790Ssilby return (error); 1106133790Ssilby } 110777676Sdillon 1108133049Ssilby pipeunlock(wpipe); 1109124394Sdes 111013913Sdyson orig_resid = uio->uio_resid; 111177676Sdillon 111213675Sdyson while (uio->uio_resid) { 111313907Sdyson int space; 111476760Salfred 1115133049Ssilby pipelock(wpipe, 0); 1116133049Ssilby if (wpipe->pipe_state & PIPE_EOF) { 1117133049Ssilby pipeunlock(wpipe); 1118133049Ssilby error = EPIPE; 1119133049Ssilby break; 1120133049Ssilby } 112114037Sdyson#ifndef PIPE_NODIRECT 112213907Sdyson /* 112313907Sdyson * If the transfer is large, we can gain performance if 112413907Sdyson * we do process-to-process copies directly. 112516416Sdyson * If the write is non-blocking, we don't use the 112616416Sdyson * direct write mechanism. 112758505Sdillon * 112858505Sdillon * The direct write mechanism will detect the reader going 112958505Sdillon * away on us. 113013907Sdyson */ 1131165347Spjd if (uio->uio_segflg == UIO_USERSPACE && 1132165347Spjd uio->uio_iov->iov_len >= PIPE_MINDIRECT && 1133165347Spjd wpipe->pipe_buffer.size >= PIPE_MINDIRECT && 1134127501Salc (fp->f_flag & FNONBLOCK) == 0) { 1135133049Ssilby pipeunlock(wpipe); 1136105009Salfred error = pipe_direct_write(wpipe, uio); 113776760Salfred if (error) 113813907Sdyson break; 113913907Sdyson continue; 114091362Salfred } 114114037Sdyson#endif 114213907Sdyson 114313907Sdyson /* 114413907Sdyson * Pipe buffered writes cannot be coincidental with 114513907Sdyson * direct writes. We wait until the currently executing 114613907Sdyson * direct write is completed before we start filling the 114758505Sdillon * pipe buffer. We break out if a signal occurs or the 114858505Sdillon * reader goes away. 114913907Sdyson */ 1150133049Ssilby if (wpipe->pipe_state & PIPE_DIRECTW) { 115113992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 115213992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 115313992Sdyson wakeup(wpipe); 115413992Sdyson } 1155173750Sdumbbell pipeselwakeup(wpipe); 1156173750Sdumbbell wpipe->pipe_state |= PIPE_WANTW; 1157133049Ssilby pipeunlock(wpipe); 115891395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 115991362Salfred "pipbww", 0); 116013907Sdyson if (error) 116113907Sdyson break; 1162133049Ssilby else 1163133049Ssilby continue; 116413907Sdyson } 116513907Sdyson 116613907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 116714644Sdyson 116814644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 116913913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 117013913Sdyson space = 0; 117113907Sdyson 1172118230Spb if (space > 0) { 1173133049Ssilby int size; /* Transfer size */ 1174133049Ssilby int segsize; /* first segment to transfer */ 117576760Salfred 1176133049Ssilby /* 1177133049Ssilby * Transfer size is minimum of uio transfer 1178133049Ssilby * and free space in pipe buffer. 1179133049Ssilby */ 1180133049Ssilby if (space > uio->uio_resid) 1181133049Ssilby size = uio->uio_resid; 1182133049Ssilby else 1183133049Ssilby size = space; 1184133049Ssilby /* 1185133049Ssilby * First segment to transfer is minimum of 1186133049Ssilby * transfer size and contiguous space in 1187133049Ssilby * pipe buffer. If first segment to transfer 1188133049Ssilby * is less than the transfer size, we've got 1189133049Ssilby * a wraparound in the buffer. 1190133049Ssilby */ 1191133049Ssilby segsize = wpipe->pipe_buffer.size - 1192133049Ssilby wpipe->pipe_buffer.in; 1193133049Ssilby if (segsize > size) 1194133049Ssilby segsize = size; 119554534Stegge 1196133049Ssilby /* Transfer first segment */ 1197133049Ssilby 1198133049Ssilby PIPE_UNLOCK(rpipe); 1199133049Ssilby error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1200133049Ssilby segsize, uio); 1201133049Ssilby PIPE_LOCK(rpipe); 1202133049Ssilby 1203133049Ssilby if (error == 0 && segsize < size) { 1204133049Ssilby KASSERT(wpipe->pipe_buffer.in + segsize == 1205133049Ssilby wpipe->pipe_buffer.size, 1206133049Ssilby ("Pipe buffer wraparound disappeared")); 120754534Stegge /* 1208133049Ssilby * Transfer remaining part now, to 1209133049Ssilby * support atomic writes. Wraparound 1210133049Ssilby * happened. 121154534Stegge */ 1212124394Sdes 121391395Salfred PIPE_UNLOCK(rpipe); 1214133049Ssilby error = uiomove( 1215133049Ssilby &wpipe->pipe_buffer.buffer[0], 1216133049Ssilby size - segsize, uio); 121791395Salfred PIPE_LOCK(rpipe); 1218133049Ssilby } 1219133049Ssilby if (error == 0) { 1220133049Ssilby wpipe->pipe_buffer.in += size; 1221133049Ssilby if (wpipe->pipe_buffer.in >= 1222133049Ssilby wpipe->pipe_buffer.size) { 1223133049Ssilby KASSERT(wpipe->pipe_buffer.in == 1224133049Ssilby size - segsize + 1225133049Ssilby wpipe->pipe_buffer.size, 1226133049Ssilby ("Expected wraparound bad")); 1227133049Ssilby wpipe->pipe_buffer.in = size - segsize; 122854534Stegge } 1229124394Sdes 1230133049Ssilby wpipe->pipe_buffer.cnt += size; 1231133049Ssilby KASSERT(wpipe->pipe_buffer.cnt <= 1232133049Ssilby wpipe->pipe_buffer.size, 1233133049Ssilby ("Pipe buffer overflow")); 123413675Sdyson } 1235133049Ssilby pipeunlock(wpipe); 1236153484Sdelphij if (error != 0) 1237153484Sdelphij break; 123813675Sdyson } else { 123913675Sdyson /* 124013675Sdyson * If the "read-side" has been blocked, wake it up now. 124113675Sdyson */ 124213675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 124313675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 124413675Sdyson wakeup(wpipe); 124513675Sdyson } 124614037Sdyson 124713675Sdyson /* 124813675Sdyson * don't block on non-blocking I/O 124913675Sdyson */ 125016960Sdyson if (fp->f_flag & FNONBLOCK) { 125113907Sdyson error = EAGAIN; 1252133049Ssilby pipeunlock(wpipe); 125313675Sdyson break; 125413675Sdyson } 125513907Sdyson 125614037Sdyson /* 125714037Sdyson * We have no more space and have something to offer, 125829356Speter * wake up select/poll. 125914037Sdyson */ 126014037Sdyson pipeselwakeup(wpipe); 126114037Sdyson 126213675Sdyson wpipe->pipe_state |= PIPE_WANTW; 1263133049Ssilby pipeunlock(wpipe); 126491395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 126591362Salfred PRIBIO | PCATCH, "pipewr", 0); 126676760Salfred if (error != 0) 126713675Sdyson break; 126813675Sdyson } 126913675Sdyson } 127013675Sdyson 1271133049Ssilby pipelock(wpipe, 0); 127214644Sdyson --wpipe->pipe_busy; 127377676Sdillon 127476760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 127576760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 127613675Sdyson wakeup(wpipe); 127713675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 127813675Sdyson /* 127913675Sdyson * If we have put any characters in the buffer, we wake up 128013675Sdyson * the reader. 128113675Sdyson */ 128213675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 128313675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 128413675Sdyson wakeup(wpipe); 128513675Sdyson } 128613675Sdyson } 128713909Sdyson 128813909Sdyson /* 128913909Sdyson * Don't return EPIPE if I/O was successful 129013909Sdyson */ 129113907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 129277676Sdillon (uio->uio_resid == 0) && 129377676Sdillon (error == EPIPE)) { 129413907Sdyson error = 0; 129577676Sdillon } 129613913Sdyson 129724101Sbde if (error == 0) 129855112Sbde vfs_timestamp(&wpipe->pipe_mtime); 129924101Sbde 130014037Sdyson /* 130114037Sdyson * We have something to offer, 130229356Speter * wake up select/poll. 130314037Sdyson */ 130414177Sdyson if (wpipe->pipe_buffer.cnt) 130514037Sdyson pipeselwakeup(wpipe); 130613907Sdyson 1307133049Ssilby pipeunlock(wpipe); 130891395Salfred PIPE_UNLOCK(rpipe); 130976760Salfred return (error); 131013675Sdyson} 131113675Sdyson 1312175140Sjhb/* ARGSUSED */ 1313175140Sjhbstatic int 1314175140Sjhbpipe_truncate(fp, length, active_cred, td) 1315175140Sjhb struct file *fp; 1316175140Sjhb off_t length; 1317175140Sjhb struct ucred *active_cred; 1318175140Sjhb struct thread *td; 1319175140Sjhb{ 1320175140Sjhb 1321232055Skmacy /* For named pipes call the vnode operation. */ 1322232055Skmacy if (fp->f_vnode != NULL) 1323232055Skmacy return (vnops.fo_truncate(fp, length, active_cred, td)); 1324175140Sjhb return (EINVAL); 1325175140Sjhb} 1326175140Sjhb 132713675Sdyson/* 132813675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 132913675Sdyson */ 1330104094Sphkstatic int 1331102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td) 133213675Sdyson struct file *fp; 133336735Sdfr u_long cmd; 133499009Salfred void *data; 1335102003Srwatson struct ucred *active_cred; 133683366Sjulian struct thread *td; 133713675Sdyson{ 1338109153Sdillon struct pipe *mpipe = fp->f_data; 1339101768Srwatson int error; 134013675Sdyson 1341104269Srwatson PIPE_LOCK(mpipe); 1342104269Srwatson 1343104269Srwatson#ifdef MAC 1344172930Srwatson error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data); 1345121970Srwatson if (error) { 1346121970Srwatson PIPE_UNLOCK(mpipe); 1347101768Srwatson return (error); 1348121970Srwatson } 1349101768Srwatson#endif 1350101768Srwatson 1351137752Sphk error = 0; 135213675Sdyson switch (cmd) { 135313675Sdyson 135413675Sdyson case FIONBIO: 1355137752Sphk break; 135613675Sdyson 135713675Sdyson case FIOASYNC: 135813675Sdyson if (*(int *)data) { 135913675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 136013675Sdyson } else { 136113675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 136213675Sdyson } 1363137752Sphk break; 136413675Sdyson 136513675Sdyson case FIONREAD: 1366232055Skmacy if (!(fp->f_flag & FREAD)) { 1367232055Skmacy *(int *)data = 0; 1368232055Skmacy PIPE_UNLOCK(mpipe); 1369232055Skmacy return (0); 1370232055Skmacy } 137114037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 137214037Sdyson *(int *)data = mpipe->pipe_map.cnt; 137314037Sdyson else 137414037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 1375137752Sphk break; 137613675Sdyson 137741086Struckman case FIOSETOWN: 1378138032Srwatson PIPE_UNLOCK(mpipe); 1379137752Sphk error = fsetown(*(int *)data, &mpipe->pipe_sigio); 1380138032Srwatson goto out_unlocked; 138141086Struckman 138241086Struckman case FIOGETOWN: 1383104393Struckman *(int *)data = fgetown(&mpipe->pipe_sigio); 1384137752Sphk break; 138513675Sdyson 138641086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 138741086Struckman case TIOCSPGRP: 1388138032Srwatson PIPE_UNLOCK(mpipe); 1389137752Sphk error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); 1390138032Srwatson goto out_unlocked; 139141086Struckman 139241086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 139318863Sdyson case TIOCGPGRP: 1394104393Struckman *(int *)data = -fgetown(&mpipe->pipe_sigio); 1395137752Sphk break; 139613675Sdyson 1397137752Sphk default: 1398137752Sphk error = ENOTTY; 1399137764Sphk break; 140013675Sdyson } 1401104269Srwatson PIPE_UNLOCK(mpipe); 1402138032Srwatsonout_unlocked: 1403137752Sphk return (error); 140413675Sdyson} 140513675Sdyson 1406104094Sphkstatic int 1407101983Srwatsonpipe_poll(fp, events, active_cred, td) 140813675Sdyson struct file *fp; 140929356Speter int events; 1410101983Srwatson struct ucred *active_cred; 141183366Sjulian struct thread *td; 141213675Sdyson{ 1413232055Skmacy struct pipe *rpipe; 141413675Sdyson struct pipe *wpipe; 1415232055Skmacy int levents, revents; 1416101768Srwatson#ifdef MAC 1417101768Srwatson int error; 1418101768Srwatson#endif 141913675Sdyson 1420232055Skmacy revents = 0; 1421232055Skmacy rpipe = fp->f_data; 1422232055Skmacy wpipe = PIPE_PEER(rpipe); 142391362Salfred PIPE_LOCK(rpipe); 1424101768Srwatson#ifdef MAC 1425172930Srwatson error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair); 1426101768Srwatson if (error) 1427101768Srwatson goto locked_error; 1428101768Srwatson#endif 1429232055Skmacy if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) 143029356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 1431195423Skib (rpipe->pipe_buffer.cnt > 0)) 143229356Speter revents |= events & (POLLIN | POLLRDNORM); 143313675Sdyson 1434232055Skmacy if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) 1435179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1436179243Skib (wpipe->pipe_state & PIPE_EOF) || 143743311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1438228510Sjilles ((wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF || 1439228510Sjilles wpipe->pipe_buffer.size == 0))) 144029356Speter revents |= events & (POLLOUT | POLLWRNORM); 144113675Sdyson 1442232055Skmacy levents = events & 1443232055Skmacy (POLLIN | POLLINIGNEOF | POLLPRI | POLLRDNORM | POLLRDBAND); 1444232055Skmacy if (rpipe->pipe_state & PIPE_NAMED && fp->f_flag & FREAD && levents && 1445232641Skib rpipe->pipe_state & PIPE_SAMEWGEN) 1446232055Skmacy events |= POLLINIGNEOF; 1447232055Skmacy 1448195423Skib if ((events & POLLINIGNEOF) == 0) { 1449195423Skib if (rpipe->pipe_state & PIPE_EOF) { 1450195423Skib revents |= (events & (POLLIN | POLLRDNORM)); 1451195423Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1452195423Skib (wpipe->pipe_state & PIPE_EOF)) 1453195423Skib revents |= POLLHUP; 1454195423Skib } 1455195423Skib } 145629356Speter 145729356Speter if (revents == 0) { 1458232055Skmacy if (fp->f_flag & FREAD && events & (POLLIN | POLLRDNORM)) { 145983805Sjhb selrecord(td, &rpipe->pipe_sel); 1460174647Sjeff if (SEL_WAITING(&rpipe->pipe_sel)) 1461174647Sjeff rpipe->pipe_state |= PIPE_SEL; 146213675Sdyson } 146313675Sdyson 1464232055Skmacy if (fp->f_flag & FWRITE && events & (POLLOUT | POLLWRNORM)) { 146583805Sjhb selrecord(td, &wpipe->pipe_sel); 1466174647Sjeff if (SEL_WAITING(&wpipe->pipe_sel)) 1467174647Sjeff wpipe->pipe_state |= PIPE_SEL; 146813907Sdyson } 146913675Sdyson } 1470101768Srwatson#ifdef MAC 1471101768Srwatsonlocked_error: 1472101768Srwatson#endif 147391362Salfred PIPE_UNLOCK(rpipe); 147429356Speter 147529356Speter return (revents); 147613675Sdyson} 147713675Sdyson 147898989Salfred/* 147998989Salfred * We shouldn't need locks here as we're doing a read and this should 148098989Salfred * be a natural race. 148198989Salfred */ 148252983Speterstatic int 1483101983Srwatsonpipe_stat(fp, ub, active_cred, td) 148452983Speter struct file *fp; 148552983Speter struct stat *ub; 1486101983Srwatson struct ucred *active_cred; 148783366Sjulian struct thread *td; 148813675Sdyson{ 1489228306Skib struct pipe *pipe; 1490228306Skib int new_unr; 1491101768Srwatson#ifdef MAC 1492101768Srwatson int error; 1493228306Skib#endif 149452983Speter 1495228306Skib pipe = fp->f_data; 1496104269Srwatson PIPE_LOCK(pipe); 1497228306Skib#ifdef MAC 1498172930Srwatson error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); 1499228306Skib if (error) { 1500228306Skib PIPE_UNLOCK(pipe); 1501101768Srwatson return (error); 1502228306Skib } 1503101768Srwatson#endif 1504232055Skmacy 1505232055Skmacy /* For named pipes ask the underlying filesystem. */ 1506232055Skmacy if (pipe->pipe_state & PIPE_NAMED) { 1507232055Skmacy PIPE_UNLOCK(pipe); 1508232055Skmacy return (vnops.fo_stat(fp, ub, active_cred, td)); 1509232055Skmacy } 1510232055Skmacy 1511228306Skib /* 1512228306Skib * Lazily allocate an inode number for the pipe. Most pipe 1513228306Skib * users do not call fstat(2) on the pipe, which means that 1514228306Skib * postponing the inode allocation until it is must be 1515228306Skib * returned to userland is useful. If alloc_unr failed, 1516228306Skib * assign st_ino zero instead of returning an error. 1517228306Skib * Special pipe_ino values: 1518228306Skib * -1 - not yet initialized; 1519228306Skib * 0 - alloc_unr failed, return 0 as st_ino forever. 1520228306Skib */ 1521228306Skib if (pipe->pipe_ino == (ino_t)-1) { 1522228306Skib new_unr = alloc_unr(pipeino_unr); 1523228306Skib if (new_unr != -1) 1524228306Skib pipe->pipe_ino = new_unr; 1525228306Skib else 1526228306Skib pipe->pipe_ino = 0; 1527228306Skib } 1528228306Skib PIPE_UNLOCK(pipe); 1529228306Skib 1530100527Salfred bzero(ub, sizeof(*ub)); 153117124Sbde ub->st_mode = S_IFIFO; 1532133790Ssilby ub->st_blksize = PAGE_SIZE; 1533132436Ssilby if (pipe->pipe_state & PIPE_DIRECTW) 1534132436Ssilby ub->st_size = pipe->pipe_map.cnt; 1535132436Ssilby else 1536132436Ssilby ub->st_size = pipe->pipe_buffer.cnt; 153713675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1538205792Sed ub->st_atim = pipe->pipe_atime; 1539205792Sed ub->st_mtim = pipe->pipe_mtime; 1540205792Sed ub->st_ctim = pipe->pipe_ctime; 154160404Schris ub->st_uid = fp->f_cred->cr_uid; 154260404Schris ub->st_gid = fp->f_cred->cr_gid; 1543226042Skib ub->st_dev = pipedev_ino; 1544226042Skib ub->st_ino = pipe->pipe_ino; 154517124Sbde /* 1546226042Skib * Left as 0: st_nlink, st_rdev, st_flags, st_gen. 154717124Sbde */ 154876760Salfred return (0); 154913675Sdyson} 155013675Sdyson 155113675Sdyson/* ARGSUSED */ 155213675Sdysonstatic int 155383366Sjulianpipe_close(fp, td) 155413675Sdyson struct file *fp; 155583366Sjulian struct thread *td; 155613675Sdyson{ 155716322Sgpalmer 1558232055Skmacy if (fp->f_vnode != NULL) 1559232055Skmacy return vnops.fo_close(fp, td); 156049413Sgreen fp->f_ops = &badfileops; 1561232055Skmacy pipe_dtor(fp->f_data); 1562109153Sdillon fp->f_data = NULL; 156376760Salfred return (0); 156413675Sdyson} 156513675Sdyson 1566232183Sjillesstatic int 1567232271Sdimpipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct thread *td) 1568232183Sjilles{ 1569232183Sjilles struct pipe *cpipe; 1570232183Sjilles int error; 1571232183Sjilles 1572232183Sjilles cpipe = fp->f_data; 1573232183Sjilles if (cpipe->pipe_state & PIPE_NAMED) 1574232183Sjilles error = vn_chmod(fp, mode, active_cred, td); 1575232183Sjilles else 1576232183Sjilles error = invfo_chmod(fp, mode, active_cred, td); 1577232183Sjilles return (error); 1578232183Sjilles} 1579232183Sjilles 1580232183Sjillesstatic int 1581232183Sjillespipe_chown(fp, uid, gid, active_cred, td) 1582232183Sjilles struct file *fp; 1583232183Sjilles uid_t uid; 1584232183Sjilles gid_t gid; 1585232183Sjilles struct ucred *active_cred; 1586232183Sjilles struct thread *td; 1587232183Sjilles{ 1588232183Sjilles struct pipe *cpipe; 1589232183Sjilles int error; 1590232183Sjilles 1591232183Sjilles cpipe = fp->f_data; 1592232183Sjilles if (cpipe->pipe_state & PIPE_NAMED) 1593232183Sjilles error = vn_chown(fp, uid, gid, active_cred, td); 1594232183Sjilles else 1595232183Sjilles error = invfo_chown(fp, uid, gid, active_cred, td); 1596232183Sjilles return (error); 1597232183Sjilles} 1598232183Sjilles 159976364Salfredstatic void 160076364Salfredpipe_free_kmem(cpipe) 160176364Salfred struct pipe *cpipe; 160276364Salfred{ 160391412Salfred 1604125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), 1605125293Srwatson ("pipe_free_kmem: pipe mutex locked")); 160676364Salfred 160776364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 1608189649Sjhb atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size); 1609118764Ssilby vm_map_remove(pipe_map, 1610118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer, 1611118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 161276364Salfred cpipe->pipe_buffer.buffer = NULL; 161376364Salfred } 161476364Salfred#ifndef PIPE_NODIRECT 1615127501Salc { 161676364Salfred cpipe->pipe_map.cnt = 0; 161776364Salfred cpipe->pipe_map.pos = 0; 161876364Salfred cpipe->pipe_map.npages = 0; 161976364Salfred } 162076364Salfred#endif 162176364Salfred} 162276364Salfred 162313675Sdyson/* 162413675Sdyson * shutdown the pipe 162513675Sdyson */ 162613675Sdysonstatic void 162713675Sdysonpipeclose(cpipe) 162813675Sdyson struct pipe *cpipe; 162913675Sdyson{ 1630125293Srwatson struct pipepair *pp; 163113907Sdyson struct pipe *ppipe; 163276364Salfred 1633125293Srwatson KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); 163491968Salfred 1635125293Srwatson PIPE_LOCK(cpipe); 1636133049Ssilby pipelock(cpipe, 0); 1637125293Srwatson pp = cpipe->pipe_pair; 163891968Salfred 163991968Salfred pipeselwakeup(cpipe); 164013907Sdyson 164191968Salfred /* 164291968Salfred * If the other side is blocked, wake it up saying that 164391968Salfred * we want to close it down. 164491968Salfred */ 1645126131Sgreen cpipe->pipe_state |= PIPE_EOF; 164691968Salfred while (cpipe->pipe_busy) { 164791968Salfred wakeup(cpipe); 1648126131Sgreen cpipe->pipe_state |= PIPE_WANT; 1649133049Ssilby pipeunlock(cpipe); 165091968Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1651133049Ssilby pipelock(cpipe, 0); 165291968Salfred } 165313675Sdyson 1654101768Srwatson 165591968Salfred /* 1656125293Srwatson * Disconnect from peer, if any. 165791968Salfred */ 1658125293Srwatson ppipe = cpipe->pipe_peer; 1659179243Skib if (ppipe->pipe_present == PIPE_ACTIVE) { 166091968Salfred pipeselwakeup(ppipe); 166113907Sdyson 166291968Salfred ppipe->pipe_state |= PIPE_EOF; 166391968Salfred wakeup(ppipe); 1664133741Sjmg KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0); 166591968Salfred } 1666125293Srwatson 166791968Salfred /* 1668125293Srwatson * Mark this endpoint as free. Release kmem resources. We 1669125293Srwatson * don't mark this endpoint as unused until we've finished 1670125293Srwatson * doing that, or the pipe might disappear out from under 1671125293Srwatson * us. 167291968Salfred */ 1673125293Srwatson PIPE_UNLOCK(cpipe); 1674125293Srwatson pipe_free_kmem(cpipe); 1675125293Srwatson PIPE_LOCK(cpipe); 1676179243Skib cpipe->pipe_present = PIPE_CLOSING; 1677126131Sgreen pipeunlock(cpipe); 1678179243Skib 1679179243Skib /* 1680179243Skib * knlist_clear() may sleep dropping the PIPE_MTX. Set the 1681179243Skib * PIPE_FINALIZED, that allows other end to free the 1682179243Skib * pipe_pair, only after the knotes are completely dismantled. 1683179243Skib */ 1684133741Sjmg knlist_clear(&cpipe->pipe_sel.si_note, 1); 1685179243Skib cpipe->pipe_present = PIPE_FINALIZED; 1686225177Sattilio seldrain(&cpipe->pipe_sel); 1687133741Sjmg knlist_destroy(&cpipe->pipe_sel.si_note); 1688125293Srwatson 1689125293Srwatson /* 1690125293Srwatson * If both endpoints are now closed, release the memory for the 1691125293Srwatson * pipe pair. If not, unlock. 1692125293Srwatson */ 1693179243Skib if (ppipe->pipe_present == PIPE_FINALIZED) { 169491968Salfred PIPE_UNLOCK(cpipe); 1695125293Srwatson#ifdef MAC 1696172930Srwatson mac_pipe_destroy(pp); 1697125293Srwatson#endif 1698125293Srwatson uma_zfree(pipe_zone, cpipe->pipe_pair); 1699125293Srwatson } else 1700125293Srwatson PIPE_UNLOCK(cpipe); 170113675Sdyson} 170259288Sjlemon 170372521Sjlemon/*ARGSUSED*/ 170459288Sjlemonstatic int 170572521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 170659288Sjlemon{ 170789306Salfred struct pipe *cpipe; 170859288Sjlemon 1709232055Skmacy /* 1710232055Skmacy * If a filter is requested that is not supported by this file 1711232055Skmacy * descriptor, don't return an error, but also don't ever generate an 1712232055Skmacy * event. 1713232055Skmacy */ 1714232055Skmacy if ((kn->kn_filter == EVFILT_READ) && !(fp->f_flag & FREAD)) { 1715232055Skmacy kn->kn_fop = &pipe_nfiltops; 1716232055Skmacy return (0); 1717232055Skmacy } 1718232055Skmacy if ((kn->kn_filter == EVFILT_WRITE) && !(fp->f_flag & FWRITE)) { 1719232055Skmacy kn->kn_fop = &pipe_nfiltops; 1720232055Skmacy return (0); 1721232055Skmacy } 1722232055Skmacy cpipe = fp->f_data; 1723126131Sgreen PIPE_LOCK(cpipe); 172472521Sjlemon switch (kn->kn_filter) { 172572521Sjlemon case EVFILT_READ: 172672521Sjlemon kn->kn_fop = &pipe_rfiltops; 172772521Sjlemon break; 172872521Sjlemon case EVFILT_WRITE: 172972521Sjlemon kn->kn_fop = &pipe_wfiltops; 1730179243Skib if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) { 1731101382Sdes /* other end of pipe has been closed */ 1732126131Sgreen PIPE_UNLOCK(cpipe); 1733118929Sjmg return (EPIPE); 1734126131Sgreen } 1735232055Skmacy cpipe = PIPE_PEER(cpipe); 173672521Sjlemon break; 173772521Sjlemon default: 1738126131Sgreen PIPE_UNLOCK(cpipe); 1739133741Sjmg return (EINVAL); 174072521Sjlemon } 174178292Sjlemon 1742232055Skmacy kn->kn_hook = cpipe; 1743133741Sjmg knlist_add(&cpipe->pipe_sel.si_note, kn, 1); 174491372Salfred PIPE_UNLOCK(cpipe); 174559288Sjlemon return (0); 174659288Sjlemon} 174759288Sjlemon 174859288Sjlemonstatic void 174959288Sjlemonfilt_pipedetach(struct knote *kn) 175059288Sjlemon{ 1751232055Skmacy struct pipe *cpipe = kn->kn_hook; 175259288Sjlemon 1753126131Sgreen PIPE_LOCK(cpipe); 1754133741Sjmg knlist_remove(&cpipe->pipe_sel.si_note, kn, 1); 175591372Salfred PIPE_UNLOCK(cpipe); 175659288Sjlemon} 175759288Sjlemon 175859288Sjlemon/*ARGSUSED*/ 175959288Sjlemonstatic int 176059288Sjlemonfilt_piperead(struct knote *kn, long hint) 176159288Sjlemon{ 1762232055Skmacy struct pipe *rpipe = kn->kn_hook; 176359288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 1764133741Sjmg int ret; 176559288Sjlemon 176691372Salfred PIPE_LOCK(rpipe); 176759288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 176859288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 176959288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 177059288Sjlemon 177159288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 1772179243Skib wpipe->pipe_present != PIPE_ACTIVE || 1773179243Skib (wpipe->pipe_state & PIPE_EOF)) { 177491372Salfred kn->kn_flags |= EV_EOF; 177591372Salfred PIPE_UNLOCK(rpipe); 177659288Sjlemon return (1); 177759288Sjlemon } 1778133741Sjmg ret = kn->kn_data > 0; 177991372Salfred PIPE_UNLOCK(rpipe); 1780133741Sjmg return ret; 178159288Sjlemon} 178259288Sjlemon 178359288Sjlemon/*ARGSUSED*/ 178459288Sjlemonstatic int 178559288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 178659288Sjlemon{ 1787232055Skmacy struct pipe *wpipe; 1788232055Skmacy 1789232055Skmacy wpipe = kn->kn_hook; 1790232055Skmacy PIPE_LOCK(wpipe); 1791179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1792179243Skib (wpipe->pipe_state & PIPE_EOF)) { 179359288Sjlemon kn->kn_data = 0; 1794124394Sdes kn->kn_flags |= EV_EOF; 1795232055Skmacy PIPE_UNLOCK(wpipe); 179659288Sjlemon return (1); 179759288Sjlemon } 1798228510Sjilles kn->kn_data = (wpipe->pipe_buffer.size > 0) ? 1799228510Sjilles (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) : PIPE_BUF; 180065855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 180159288Sjlemon kn->kn_data = 0; 180259288Sjlemon 1803232055Skmacy PIPE_UNLOCK(wpipe); 180459288Sjlemon return (kn->kn_data >= PIPE_BUF); 180559288Sjlemon} 1806232055Skmacy 1807232055Skmacystatic void 1808232055Skmacyfilt_pipedetach_notsup(struct knote *kn) 1809232055Skmacy{ 1810232055Skmacy 1811232055Skmacy} 1812232055Skmacy 1813232055Skmacystatic int 1814232055Skmacyfilt_pipenotsup(struct knote *kn, long hint) 1815232055Skmacy{ 1816232055Skmacy 1817232055Skmacy return (0); 1818232055Skmacy} 1819