sys_pipe.c revision 184849
1139804Simp/*- 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson */ 1913675Sdyson 2013675Sdyson/* 2113675Sdyson * This file contains a high-performance replacement for the socket-based 2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2313675Sdyson * all features of sockets, but does do everything that pipes normally 2413675Sdyson * do. 2513675Sdyson */ 2613675Sdyson 2713907Sdyson/* 2813907Sdyson * This code has two modes of operation, a small write mode and a large 2913907Sdyson * write mode. The small write mode acts like conventional pipes with 3013907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3113907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3213907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3313907Sdyson * the receiving process can copy it directly from the pages in the sending 3413907Sdyson * process. 3513907Sdyson * 3613907Sdyson * If the sending process receives a signal, it is possible that it will 3713913Sdyson * go away, and certainly its address space can change, because control 3813907Sdyson * is returned back to the user-mode side. In that case, the pipe code 3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4013907Sdyson * kernel buffer, and the receiving process will grab the data from the 4113907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4213907Sdyson * the copy operation is normally eliminated. 4313907Sdyson * 4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4513907Sdyson * happen for small transfers so that the system will not spend all of 46118764Ssilby * its time context switching. 47117325Ssilby * 48118764Ssilby * In order to limit the resource use of pipes, two sysctls exist: 49117325Ssilby * 50118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 51133790Ssilby * address space available to us in pipe_map. This value is normally 52133790Ssilby * autotuned, but may also be loader tuned. 53117325Ssilby * 54133790Ssilby * kern.ipc.pipekva - This read-only sysctl tracks the current amount of 55133790Ssilby * memory in use by pipes. 56117325Ssilby * 57133790Ssilby * Based on how large pipekva is relative to maxpipekva, the following 58133790Ssilby * will happen: 59117325Ssilby * 60133790Ssilby * 0% - 50%: 61133790Ssilby * New pipes are given 16K of memory backing, pipes may dynamically 62133790Ssilby * grow to as large as 64K where needed. 63133790Ssilby * 50% - 75%: 64133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 65133790Ssilby * existing pipes may NOT grow. 66133790Ssilby * 75% - 100%: 67133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 68133790Ssilby * existing pipes will be shrunk down to 4K whenever possible. 69133049Ssilby * 70133790Ssilby * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0. If 71133790Ssilby * that is set, the only resize that will occur is the 0 -> SMALL_PIPE_SIZE 72133790Ssilby * resize which MUST occur for reverse-direction pipes when they are 73133790Ssilby * first used. 74133790Ssilby * 75133790Ssilby * Additional information about the current state of pipes may be obtained 76133790Ssilby * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail, 77133790Ssilby * and kern.ipc.piperesizefail. 78133790Ssilby * 79133049Ssilby * Locking rules: There are two locks present here: A mutex, used via 80133049Ssilby * PIPE_LOCK, and a flag, used via pipelock(). All locking is done via 81133049Ssilby * the flag, as mutexes can not persist over uiomove. The mutex 82133049Ssilby * exists only to guard access to the flag, and is not in itself a 83133790Ssilby * locking mechanism. Also note that there is only a single mutex for 84133790Ssilby * both directions of a pipe. 85133049Ssilby * 86133049Ssilby * As pipelock() may have to sleep before it can acquire the flag, it 87133049Ssilby * is important to reread all data after a call to pipelock(); everything 88133049Ssilby * in the structure may have changed. 8913907Sdyson */ 9013907Sdyson 91116182Sobrien#include <sys/cdefs.h> 92116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 184849 2008-11-11 14:55:59Z ed $"); 93116182Sobrien 94101768Srwatson#include "opt_mac.h" 95101768Srwatson 9613675Sdyson#include <sys/param.h> 9713675Sdyson#include <sys/systm.h> 9824131Sbde#include <sys/fcntl.h> 9913675Sdyson#include <sys/file.h> 10013675Sdyson#include <sys/filedesc.h> 10124206Sbde#include <sys/filio.h> 10291372Salfred#include <sys/kernel.h> 10376166Smarkm#include <sys/lock.h> 10476827Salfred#include <sys/mutex.h> 10524206Sbde#include <sys/ttycom.h> 10613675Sdyson#include <sys/stat.h> 10791968Salfred#include <sys/malloc.h> 10829356Speter#include <sys/poll.h> 10970834Swollman#include <sys/selinfo.h> 11013675Sdyson#include <sys/signalvar.h> 111184849Sed#include <sys/syscallsubr.h> 112117325Ssilby#include <sys/sysctl.h> 11313675Sdyson#include <sys/sysproto.h> 11413675Sdyson#include <sys/pipe.h> 11576166Smarkm#include <sys/proc.h> 11655112Sbde#include <sys/vnode.h> 11734924Sbde#include <sys/uio.h> 11859288Sjlemon#include <sys/event.h> 11913675Sdyson 120163606Srwatson#include <security/mac/mac_framework.h> 121163606Srwatson 12213675Sdyson#include <vm/vm.h> 12313675Sdyson#include <vm/vm_param.h> 12413675Sdyson#include <vm/vm_object.h> 12513675Sdyson#include <vm/vm_kern.h> 12613675Sdyson#include <vm/vm_extern.h> 12713675Sdyson#include <vm/pmap.h> 12813675Sdyson#include <vm/vm_map.h> 12913907Sdyson#include <vm/vm_page.h> 13092751Sjeff#include <vm/uma.h> 13113675Sdyson 13214037Sdyson/* 13314037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 13414037Sdyson * approx 30% decrease in transfer rate. This could be useful for 13514037Sdyson * NetBSD or OpenBSD. 13614037Sdyson */ 13714037Sdyson/* #define PIPE_NODIRECT */ 13814037Sdyson 13914037Sdyson/* 14014037Sdyson * interfaces to the outside world 14114037Sdyson */ 142108255Sphkstatic fo_rdwr_t pipe_read; 143108255Sphkstatic fo_rdwr_t pipe_write; 144175140Sjhbstatic fo_truncate_t pipe_truncate; 145108255Sphkstatic fo_ioctl_t pipe_ioctl; 146108255Sphkstatic fo_poll_t pipe_poll; 147108255Sphkstatic fo_kqfilter_t pipe_kqfilter; 148108255Sphkstatic fo_stat_t pipe_stat; 149108255Sphkstatic fo_close_t pipe_close; 15013675Sdyson 15172521Sjlemonstatic struct fileops pipeops = { 152116546Sphk .fo_read = pipe_read, 153116546Sphk .fo_write = pipe_write, 154175140Sjhb .fo_truncate = pipe_truncate, 155116546Sphk .fo_ioctl = pipe_ioctl, 156116546Sphk .fo_poll = pipe_poll, 157116546Sphk .fo_kqfilter = pipe_kqfilter, 158116546Sphk .fo_stat = pipe_stat, 159116546Sphk .fo_close = pipe_close, 160116546Sphk .fo_flags = DFLAG_PASSABLE 16172521Sjlemon}; 16213675Sdyson 16359288Sjlemonstatic void filt_pipedetach(struct knote *kn); 16459288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 16559288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 16659288Sjlemon 16772521Sjlemonstatic struct filterops pipe_rfiltops = 16872521Sjlemon { 1, NULL, filt_pipedetach, filt_piperead }; 16972521Sjlemonstatic struct filterops pipe_wfiltops = 17072521Sjlemon { 1, NULL, filt_pipedetach, filt_pipewrite }; 17159288Sjlemon 17213675Sdyson/* 17313675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 17413675Sdyson * space is pageable. The pipe code will try to maintain locality of 17513675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 17613675Sdyson * will not wipe the cache. 17713675Sdyson */ 17813907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 17913907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 18013675Sdyson 18117124Sbdestatic int amountpipekva; 182133790Ssilbystatic int pipefragretry; 183133790Ssilbystatic int pipeallocfail; 184133790Ssilbystatic int piperesizefail; 185133790Ssilbystatic int piperesizeallowed = 1; 18613907Sdyson 187121307SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN, 188117325Ssilby &maxpipekva, 0, "Pipe KVA limit"); 189117325SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 190117325Ssilby &amountpipekva, 0, "Pipe KVA usage"); 191133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD, 192133790Ssilby &pipefragretry, 0, "Pipe allocation retries due to fragmentation"); 193133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD, 194133790Ssilby &pipeallocfail, 0, "Pipe allocation failures"); 195133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD, 196133790Ssilby &piperesizefail, 0, "Pipe resize failures"); 197133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, 198133790Ssilby &piperesizeallowed, 0, "Pipe resizing allowed"); 199117325Ssilby 20091413Salfredstatic void pipeinit(void *dummy __unused); 20191413Salfredstatic void pipeclose(struct pipe *cpipe); 20291413Salfredstatic void pipe_free_kmem(struct pipe *cpipe); 203133790Ssilbystatic int pipe_create(struct pipe *pipe, int backing); 20491413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch); 20591413Salfredstatic __inline void pipeunlock(struct pipe *cpipe); 20691413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe); 20714037Sdyson#ifndef PIPE_NODIRECT 20891413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 20991413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 21091413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 21191413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe); 21214037Sdyson#endif 21391413Salfredstatic int pipespace(struct pipe *cpipe, int size); 214132579Srwatsonstatic int pipespace_new(struct pipe *cpipe, int size); 21513675Sdyson 216132987Sgreenstatic int pipe_zone_ctor(void *mem, int size, void *arg, int flags); 217132987Sgreenstatic int pipe_zone_init(void *mem, int size, int flags); 218125293Srwatsonstatic void pipe_zone_fini(void *mem, int size); 219125293Srwatson 22092751Sjeffstatic uma_zone_t pipe_zone; 22127899Sdyson 22291372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 22391372Salfred 22491372Salfredstatic void 22591372Salfredpipeinit(void *dummy __unused) 22691372Salfred{ 227118880Salc 228170022Srwatson pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair), 229170022Srwatson pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, 230125293Srwatson UMA_ALIGN_PTR, 0); 231118880Salc KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 23291372Salfred} 23391372Salfred 234132987Sgreenstatic int 235132987Sgreenpipe_zone_ctor(void *mem, int size, void *arg, int flags) 236125293Srwatson{ 237125293Srwatson struct pipepair *pp; 238125293Srwatson struct pipe *rpipe, *wpipe; 239125293Srwatson 240125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); 241125293Srwatson 242125293Srwatson pp = (struct pipepair *)mem; 243125293Srwatson 244125293Srwatson /* 245125293Srwatson * We zero both pipe endpoints to make sure all the kmem pointers 246125293Srwatson * are NULL, flag fields are zero'd, etc. We timestamp both 247125293Srwatson * endpoints with the same time. 248125293Srwatson */ 249125293Srwatson rpipe = &pp->pp_rpipe; 250125293Srwatson bzero(rpipe, sizeof(*rpipe)); 251125293Srwatson vfs_timestamp(&rpipe->pipe_ctime); 252125293Srwatson rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; 253125293Srwatson 254125293Srwatson wpipe = &pp->pp_wpipe; 255125293Srwatson bzero(wpipe, sizeof(*wpipe)); 256125293Srwatson wpipe->pipe_ctime = rpipe->pipe_ctime; 257125293Srwatson wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; 258125293Srwatson 259125293Srwatson rpipe->pipe_peer = wpipe; 260125293Srwatson rpipe->pipe_pair = pp; 261125293Srwatson wpipe->pipe_peer = rpipe; 262125293Srwatson wpipe->pipe_pair = pp; 263125293Srwatson 264125293Srwatson /* 265125293Srwatson * Mark both endpoints as present; they will later get free'd 266125293Srwatson * one at a time. When both are free'd, then the whole pair 267125293Srwatson * is released. 268125293Srwatson */ 269179243Skib rpipe->pipe_present = PIPE_ACTIVE; 270179243Skib wpipe->pipe_present = PIPE_ACTIVE; 271125293Srwatson 272125293Srwatson /* 273125293Srwatson * Eventually, the MAC Framework may initialize the label 274125293Srwatson * in ctor or init, but for now we do it elswhere to avoid 275125293Srwatson * blocking in ctor or init. 276125293Srwatson */ 277125293Srwatson pp->pp_label = NULL; 278125293Srwatson 279132987Sgreen return (0); 280125293Srwatson} 281125293Srwatson 282132987Sgreenstatic int 283132987Sgreenpipe_zone_init(void *mem, int size, int flags) 284125293Srwatson{ 285125293Srwatson struct pipepair *pp; 286125293Srwatson 287125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); 288125293Srwatson 289125293Srwatson pp = (struct pipepair *)mem; 290125293Srwatson 291125293Srwatson mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 292132987Sgreen return (0); 293125293Srwatson} 294125293Srwatson 295125293Srwatsonstatic void 296125293Srwatsonpipe_zone_fini(void *mem, int size) 297125293Srwatson{ 298125293Srwatson struct pipepair *pp; 299125293Srwatson 300125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); 301125293Srwatson 302125293Srwatson pp = (struct pipepair *)mem; 303125293Srwatson 304125293Srwatson mtx_destroy(&pp->pp_mtx); 305125293Srwatson} 306125293Srwatson 30713675Sdyson/* 308167232Srwatson * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let 309167232Srwatson * the zone pick up the pieces via pipeclose(). 31013675Sdyson */ 31113675Sdysonint 312184849Sedkern_pipe(struct thread *td, int fildes[2]) 31313675Sdyson{ 31483366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 31513675Sdyson struct file *rf, *wf; 316125293Srwatson struct pipepair *pp; 31713675Sdyson struct pipe *rpipe, *wpipe; 31813675Sdyson int fd, error; 31927899Sdyson 320125293Srwatson pp = uma_zalloc(pipe_zone, M_WAITOK); 321125293Srwatson#ifdef MAC 322125293Srwatson /* 323126249Srwatson * The MAC label is shared between the connected endpoints. As a 324172930Srwatson * result mac_pipe_init() and mac_pipe_create() are called once 325126249Srwatson * for the pair, and not on the endpoints. 326125293Srwatson */ 327172930Srwatson mac_pipe_init(pp); 328172930Srwatson mac_pipe_create(td->td_ucred, pp); 329125293Srwatson#endif 330125293Srwatson rpipe = &pp->pp_rpipe; 331125293Srwatson wpipe = &pp->pp_wpipe; 332125293Srwatson 333147730Sssouhlal knlist_init(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe), NULL, NULL, 334147730Sssouhlal NULL); 335147730Sssouhlal knlist_init(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe), NULL, NULL, 336147730Sssouhlal NULL); 337140369Ssilby 338133790Ssilby /* Only the forward direction pipe is backed by default */ 339155035Sglebius if ((error = pipe_create(rpipe, 1)) != 0 || 340155035Sglebius (error = pipe_create(wpipe, 0)) != 0) { 341124394Sdes pipeclose(rpipe); 342124394Sdes pipeclose(wpipe); 343155035Sglebius return (error); 34476364Salfred } 345124394Sdes 34613907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 34713907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 34813675Sdyson 34983366Sjulian error = falloc(td, &rf, &fd); 35070915Sdwmalone if (error) { 35170915Sdwmalone pipeclose(rpipe); 35270915Sdwmalone pipeclose(wpipe); 35370915Sdwmalone return (error); 35470915Sdwmalone } 355121256Sdwmalone /* An extra reference on `rf' has been held for us by falloc(). */ 356184849Sed fildes[0] = fd; 35770915Sdwmalone 35870803Sdwmalone /* 35970803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 36070803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 36170803Sdwmalone * to avoid races against processes which manage to dup() the read 36270803Sdwmalone * side while we are blocked trying to allocate the write side. 36370803Sdwmalone */ 364174988Sjeff finit(rf, FREAD | FWRITE, DTYPE_PIPE, rpipe, &pipeops); 36583366Sjulian error = falloc(td, &wf, &fd); 36670915Sdwmalone if (error) { 367184849Sed fdclose(fdp, rf, fildes[0], td); 36883366Sjulian fdrop(rf, td); 36970915Sdwmalone /* rpipe has been closed by fdrop(). */ 37070915Sdwmalone pipeclose(wpipe); 37170915Sdwmalone return (error); 37270915Sdwmalone } 373121256Sdwmalone /* An extra reference on `wf' has been held for us by falloc(). */ 374174988Sjeff finit(wf, FREAD | FWRITE, DTYPE_PIPE, wpipe, &pipeops); 375121256Sdwmalone fdrop(wf, td); 376184849Sed fildes[1] = fd; 37783366Sjulian fdrop(rf, td); 37813675Sdyson 37913675Sdyson return (0); 38013675Sdyson} 38113675Sdyson 382184849Sed/* ARGSUSED */ 383184849Sedint 384184849Sedpipe(struct thread *td, struct pipe_args *uap) 385184849Sed{ 386184849Sed int error; 387184849Sed int fildes[2]; 388184849Sed 389184849Sed error = kern_pipe(td, fildes); 390184849Sed if (error) 391184849Sed return (error); 392184849Sed 393184849Sed td->td_retval[0] = fildes[0]; 394184849Sed td->td_retval[1] = fildes[1]; 395184849Sed 396184849Sed return (0); 397184849Sed} 398184849Sed 39913909Sdyson/* 40013909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 40176364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 40276364Salfred * it will retain the old buffer. 40376364Salfred * If it fails it will return ENOMEM. 40413909Sdyson */ 40576364Salfredstatic int 406132579Srwatsonpipespace_new(cpipe, size) 40713675Sdyson struct pipe *cpipe; 40876364Salfred int size; 40913675Sdyson{ 41076364Salfred caddr_t buffer; 411133790Ssilby int error, cnt, firstseg; 412117325Ssilby static int curfail = 0; 413117325Ssilby static struct timeval lastfail; 41413675Sdyson 415125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); 416133790Ssilby KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW), 417133790Ssilby ("pipespace: resize of direct writes not allowed")); 418133790Ssilbyretry: 419133790Ssilby cnt = cpipe->pipe_buffer.cnt; 420133790Ssilby if (cnt > size) 421133790Ssilby size = cnt; 42279224Sdillon 423118764Ssilby size = round_page(size); 424118764Ssilby buffer = (caddr_t) vm_map_min(pipe_map); 42513675Sdyson 426122163Salc error = vm_map_find(pipe_map, NULL, 0, 42776364Salfred (vm_offset_t *) &buffer, size, 1, 42813688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 42976364Salfred if (error != KERN_SUCCESS) { 430133790Ssilby if ((cpipe->pipe_buffer.buffer == NULL) && 431133790Ssilby (size > SMALL_PIPE_SIZE)) { 432133790Ssilby size = SMALL_PIPE_SIZE; 433133790Ssilby pipefragretry++; 434133790Ssilby goto retry; 435133790Ssilby } 436133790Ssilby if (cpipe->pipe_buffer.buffer == NULL) { 437133790Ssilby pipeallocfail++; 438133790Ssilby if (ppsratecheck(&lastfail, &curfail, 1)) 439133790Ssilby printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); 440133790Ssilby } else { 441133790Ssilby piperesizefail++; 442133790Ssilby } 44376364Salfred return (ENOMEM); 44476364Salfred } 44576364Salfred 446133790Ssilby /* copy data, then free old resources if we're resizing */ 447133790Ssilby if (cnt > 0) { 448133790Ssilby if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) { 449133790Ssilby firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out; 450133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 451133790Ssilby buffer, firstseg); 452133790Ssilby if ((cnt - firstseg) > 0) 453133790Ssilby bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg], 454133790Ssilby cpipe->pipe_buffer.in); 455133790Ssilby } else { 456133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 457133790Ssilby buffer, cnt); 458133790Ssilby } 459133790Ssilby } 46076364Salfred pipe_free_kmem(cpipe); 46176364Salfred cpipe->pipe_buffer.buffer = buffer; 46276364Salfred cpipe->pipe_buffer.size = size; 463133790Ssilby cpipe->pipe_buffer.in = cnt; 46476364Salfred cpipe->pipe_buffer.out = 0; 465133790Ssilby cpipe->pipe_buffer.cnt = cnt; 466110816Salc atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 46776364Salfred return (0); 46813907Sdyson} 46913688Sdyson 47013907Sdyson/* 471132579Srwatson * Wrapper for pipespace_new() that performs locking assertions. 472132579Srwatson */ 473132579Srwatsonstatic int 474132579Srwatsonpipespace(cpipe, size) 475132579Srwatson struct pipe *cpipe; 476132579Srwatson int size; 477132579Srwatson{ 478132579Srwatson 479133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 480133049Ssilby ("Unlocked pipe passed to pipespace")); 481132579Srwatson return (pipespace_new(cpipe, size)); 482132579Srwatson} 483132579Srwatson 484132579Srwatson/* 48513675Sdyson * lock a pipe for I/O, blocking other access 48613675Sdyson */ 48713675Sdysonstatic __inline int 48813907Sdysonpipelock(cpipe, catch) 48913675Sdyson struct pipe *cpipe; 49013907Sdyson int catch; 49113675Sdyson{ 49213776Sdyson int error; 49376364Salfred 49491362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 49591362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 49613675Sdyson cpipe->pipe_state |= PIPE_LWANT; 49791362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 49891362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 49976760Salfred "pipelk", 0); 500124394Sdes if (error != 0) 50176760Salfred return (error); 50213675Sdyson } 50391362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 50476760Salfred return (0); 50513675Sdyson} 50613675Sdyson 50713675Sdyson/* 50813675Sdyson * unlock a pipe I/O lock 50913675Sdyson */ 51013675Sdysonstatic __inline void 51113675Sdysonpipeunlock(cpipe) 51213675Sdyson struct pipe *cpipe; 51313675Sdyson{ 51476364Salfred 51591362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 516133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 517133049Ssilby ("Unlocked pipe passed to pipeunlock")); 51891362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 51913675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 52013675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 52114177Sdyson wakeup(cpipe); 52213675Sdyson } 52313675Sdyson} 52413675Sdyson 52514037Sdysonstatic __inline void 52614037Sdysonpipeselwakeup(cpipe) 52714037Sdyson struct pipe *cpipe; 52814037Sdyson{ 52976364Salfred 530126252Srwatson PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 53114037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 532122352Stanimura selwakeuppri(&cpipe->pipe_sel, PSOCK); 533174647Sjeff if (!SEL_WAITING(&cpipe->pipe_sel)) 534174647Sjeff cpipe->pipe_state &= ~PIPE_SEL; 53514037Sdyson } 53641086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 53795883Salfred pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 538133741Sjmg KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0); 53914037Sdyson} 54014037Sdyson 541126131Sgreen/* 542126131Sgreen * Initialize and allocate VM and memory for pipe. The structure 543126131Sgreen * will start out zero'd from the ctor, so we just manage the kmem. 544126131Sgreen */ 545126131Sgreenstatic int 546133790Ssilbypipe_create(pipe, backing) 547126131Sgreen struct pipe *pipe; 548133790Ssilby int backing; 549126131Sgreen{ 550126131Sgreen int error; 551126131Sgreen 552133790Ssilby if (backing) { 553133790Ssilby if (amountpipekva > maxpipekva / 2) 554133790Ssilby error = pipespace_new(pipe, SMALL_PIPE_SIZE); 555133790Ssilby else 556133790Ssilby error = pipespace_new(pipe, PIPE_SIZE); 557133790Ssilby } else { 558133790Ssilby /* If we're not backing this pipe, no need to do anything. */ 559133790Ssilby error = 0; 560133790Ssilby } 561132579Srwatson return (error); 562126131Sgreen} 563126131Sgreen 56413675Sdyson/* ARGSUSED */ 56513675Sdysonstatic int 566101941Srwatsonpipe_read(fp, uio, active_cred, flags, td) 56713675Sdyson struct file *fp; 56813675Sdyson struct uio *uio; 569101941Srwatson struct ucred *active_cred; 57083366Sjulian struct thread *td; 57145311Sdt int flags; 57213675Sdyson{ 573109153Sdillon struct pipe *rpipe = fp->f_data; 57447748Salc int error; 57513675Sdyson int nread = 0; 57618863Sdyson u_int size; 57713675Sdyson 57891362Salfred PIPE_LOCK(rpipe); 57913675Sdyson ++rpipe->pipe_busy; 58047748Salc error = pipelock(rpipe, 1); 58147748Salc if (error) 58247748Salc goto unlocked_error; 58347748Salc 584101768Srwatson#ifdef MAC 585172930Srwatson error = mac_pipe_check_read(active_cred, rpipe->pipe_pair); 586101768Srwatson if (error) 587101768Srwatson goto locked_error; 588101768Srwatson#endif 589133790Ssilby if (amountpipekva > (3 * maxpipekva) / 4) { 590133790Ssilby if (!(rpipe->pipe_state & PIPE_DIRECTW) && 591133790Ssilby (rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 592133790Ssilby (rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 593133790Ssilby (piperesizeallowed == 1)) { 594133790Ssilby PIPE_UNLOCK(rpipe); 595133790Ssilby pipespace(rpipe, SMALL_PIPE_SIZE); 596133790Ssilby PIPE_LOCK(rpipe); 597133790Ssilby } 598133790Ssilby } 599101768Srwatson 60013675Sdyson while (uio->uio_resid) { 60113907Sdyson /* 60213907Sdyson * normal pipe buffer receive 60313907Sdyson */ 60413675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 60518863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 60613675Sdyson if (size > rpipe->pipe_buffer.cnt) 60713675Sdyson size = rpipe->pipe_buffer.cnt; 60818863Sdyson if (size > (u_int) uio->uio_resid) 60918863Sdyson size = (u_int) uio->uio_resid; 61047748Salc 61191362Salfred PIPE_UNLOCK(rpipe); 612116127Smux error = uiomove( 613116127Smux &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 614116127Smux size, uio); 61591362Salfred PIPE_LOCK(rpipe); 61676760Salfred if (error) 61713675Sdyson break; 61876760Salfred 61913675Sdyson rpipe->pipe_buffer.out += size; 62013675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 62113675Sdyson rpipe->pipe_buffer.out = 0; 62213675Sdyson 62313675Sdyson rpipe->pipe_buffer.cnt -= size; 62447748Salc 62547748Salc /* 62647748Salc * If there is no more to read in the pipe, reset 62747748Salc * its pointers to the beginning. This improves 62847748Salc * cache hit stats. 62947748Salc */ 63047748Salc if (rpipe->pipe_buffer.cnt == 0) { 63147748Salc rpipe->pipe_buffer.in = 0; 63247748Salc rpipe->pipe_buffer.out = 0; 63347748Salc } 63413675Sdyson nread += size; 63514037Sdyson#ifndef PIPE_NODIRECT 63613907Sdyson /* 63713907Sdyson * Direct copy, bypassing a kernel buffer. 63813907Sdyson */ 63913907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 64047748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 64118863Sdyson if (size > (u_int) uio->uio_resid) 64218863Sdyson size = (u_int) uio->uio_resid; 64347748Salc 64491362Salfred PIPE_UNLOCK(rpipe); 645127501Salc error = uiomove_fromphys(rpipe->pipe_map.ms, 646127501Salc rpipe->pipe_map.pos, size, uio); 64791362Salfred PIPE_LOCK(rpipe); 64813907Sdyson if (error) 64913907Sdyson break; 65013907Sdyson nread += size; 65113907Sdyson rpipe->pipe_map.pos += size; 65213907Sdyson rpipe->pipe_map.cnt -= size; 65313907Sdyson if (rpipe->pipe_map.cnt == 0) { 65413907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 65513907Sdyson wakeup(rpipe); 65613907Sdyson } 65714037Sdyson#endif 65813675Sdyson } else { 65913675Sdyson /* 66013675Sdyson * detect EOF condition 66176760Salfred * read returns 0 on EOF, no need to set error 66213675Sdyson */ 66376760Salfred if (rpipe->pipe_state & PIPE_EOF) 66413675Sdyson break; 66543623Sdillon 66613675Sdyson /* 66713675Sdyson * If the "write-side" has been blocked, wake it up now. 66813675Sdyson */ 66913675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 67013675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 67113675Sdyson wakeup(rpipe); 67213675Sdyson } 67343623Sdillon 67443623Sdillon /* 67547748Salc * Break if some data was read. 67643623Sdillon */ 67747748Salc if (nread > 0) 67813675Sdyson break; 67916960Sdyson 68043623Sdillon /* 681124394Sdes * Unlock the pipe buffer for our remaining processing. 682116127Smux * We will either break out with an error or we will 683116127Smux * sleep and relock to loop. 68443623Sdillon */ 68547748Salc pipeunlock(rpipe); 68643623Sdillon 68713675Sdyson /* 68847748Salc * Handle non-blocking mode operation or 68947748Salc * wait for more data. 69013675Sdyson */ 69176760Salfred if (fp->f_flag & FNONBLOCK) { 69247748Salc error = EAGAIN; 69376760Salfred } else { 69447748Salc rpipe->pipe_state |= PIPE_WANTR; 69591362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 69691362Salfred PRIBIO | PCATCH, 69777140Salfred "piperd", 0)) == 0) 69847748Salc error = pipelock(rpipe, 1); 69913675Sdyson } 70047748Salc if (error) 70147748Salc goto unlocked_error; 70213675Sdyson } 70313675Sdyson } 704101768Srwatson#ifdef MAC 705101768Srwatsonlocked_error: 706101768Srwatson#endif 70747748Salc pipeunlock(rpipe); 70813675Sdyson 70991362Salfred /* XXX: should probably do this before getting any locks. */ 71024101Sbde if (error == 0) 71155112Sbde vfs_timestamp(&rpipe->pipe_atime); 71247748Salcunlocked_error: 71347748Salc --rpipe->pipe_busy; 71413913Sdyson 71547748Salc /* 71647748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 71747748Salc */ 71813675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 71913675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 72013675Sdyson wakeup(rpipe); 72113675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 72213675Sdyson /* 72347748Salc * Handle write blocking hysteresis. 72413675Sdyson */ 72513675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 72613675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 72713675Sdyson wakeup(rpipe); 72813675Sdyson } 72913675Sdyson } 73014037Sdyson 73114802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 73214037Sdyson pipeselwakeup(rpipe); 73314037Sdyson 73491362Salfred PIPE_UNLOCK(rpipe); 73576760Salfred return (error); 73613675Sdyson} 73713675Sdyson 73814037Sdyson#ifndef PIPE_NODIRECT 73913907Sdyson/* 74013907Sdyson * Map the sending processes' buffer into kernel space and wire it. 74113907Sdyson * This is similar to a physical write operation. 74213907Sdyson */ 74313675Sdysonstatic int 74413907Sdysonpipe_build_write_buffer(wpipe, uio) 74513907Sdyson struct pipe *wpipe; 74613675Sdyson struct uio *uio; 74713675Sdyson{ 748119872Salc pmap_t pmap; 74918863Sdyson u_int size; 750119872Salc int i, j; 751112569Sjake vm_offset_t addr, endaddr; 75213907Sdyson 75391412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 754133790Ssilby KASSERT(wpipe->pipe_state & PIPE_DIRECTW, 755133790Ssilby ("Clone attempt on non-direct write pipe!")); 75679224Sdillon 75718863Sdyson size = (u_int) uio->uio_iov->iov_len; 75813907Sdyson if (size > wpipe->pipe_buffer.size) 75913907Sdyson size = wpipe->pipe_buffer.size; 76013907Sdyson 761119872Salc pmap = vmspace_pmap(curproc->p_vmspace); 76240286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 76376760Salfred addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 76476760Salfred for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 76599899Salc /* 76699899Salc * vm_fault_quick() can sleep. Consequently, 76799899Salc * vm_page_lock_queue() and vm_page_unlock_queue() 76899899Salc * should not be performed outside of this loop. 76999899Salc */ 770119872Salc race: 771119872Salc if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { 77299899Salc vm_page_lock_queues(); 773119872Salc for (j = 0; j < i; j++) 774118757Salc vm_page_unhold(wpipe->pipe_map.ms[j]); 77599899Salc vm_page_unlock_queues(); 77676760Salfred return (EFAULT); 77713907Sdyson } 778120000Salc wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, 779120000Salc VM_PROT_READ); 780119872Salc if (wpipe->pipe_map.ms[i] == NULL) 781119872Salc goto race; 78213907Sdyson } 78313907Sdyson 78413907Sdyson/* 78513907Sdyson * set up the control block 78613907Sdyson */ 78713907Sdyson wpipe->pipe_map.npages = i; 78876760Salfred wpipe->pipe_map.pos = 78976760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 79013907Sdyson wpipe->pipe_map.cnt = size; 79113907Sdyson 79213907Sdyson/* 79313907Sdyson * and update the uio data 79413907Sdyson */ 79513907Sdyson 79613907Sdyson uio->uio_iov->iov_len -= size; 797104908Smike uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 79813907Sdyson if (uio->uio_iov->iov_len == 0) 79913907Sdyson uio->uio_iov++; 80013907Sdyson uio->uio_resid -= size; 80113907Sdyson uio->uio_offset += size; 80276760Salfred return (0); 80313907Sdyson} 80413907Sdyson 80513907Sdyson/* 80613907Sdyson * unmap and unwire the process buffer 80713907Sdyson */ 80813907Sdysonstatic void 80913907Sdysonpipe_destroy_write_buffer(wpipe) 81076760Salfred struct pipe *wpipe; 81113907Sdyson{ 81213907Sdyson int i; 81376364Salfred 814127501Salc PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 81599899Salc vm_page_lock_queues(); 816117325Ssilby for (i = 0; i < wpipe->pipe_map.npages; i++) { 817118757Salc vm_page_unhold(wpipe->pipe_map.ms[i]); 818117325Ssilby } 81999899Salc vm_page_unlock_queues(); 82091653Stanimura wpipe->pipe_map.npages = 0; 82113907Sdyson} 82213907Sdyson 82313907Sdyson/* 82413907Sdyson * In the case of a signal, the writing process might go away. This 82513907Sdyson * code copies the data into the circular buffer so that the source 82613907Sdyson * pages can be freed without loss of data. 82713907Sdyson */ 82813907Sdysonstatic void 82913907Sdysonpipe_clone_write_buffer(wpipe) 83076364Salfred struct pipe *wpipe; 83113907Sdyson{ 832127501Salc struct uio uio; 833127501Salc struct iovec iov; 83413907Sdyson int size; 83513907Sdyson int pos; 83613907Sdyson 83791362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 83813907Sdyson size = wpipe->pipe_map.cnt; 83913907Sdyson pos = wpipe->pipe_map.pos; 84013907Sdyson 84113907Sdyson wpipe->pipe_buffer.in = size; 84213907Sdyson wpipe->pipe_buffer.out = 0; 84313907Sdyson wpipe->pipe_buffer.cnt = size; 84413907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 84513907Sdyson 846119811Salc PIPE_UNLOCK(wpipe); 847127501Salc iov.iov_base = wpipe->pipe_buffer.buffer; 848127501Salc iov.iov_len = size; 849127501Salc uio.uio_iov = &iov; 850127501Salc uio.uio_iovcnt = 1; 851127501Salc uio.uio_offset = 0; 852127501Salc uio.uio_resid = size; 853127501Salc uio.uio_segflg = UIO_SYSSPACE; 854127501Salc uio.uio_rw = UIO_READ; 855127501Salc uio.uio_td = curthread; 856127501Salc uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio); 857127501Salc PIPE_LOCK(wpipe); 85813907Sdyson pipe_destroy_write_buffer(wpipe); 85913907Sdyson} 86013907Sdyson 86113907Sdyson/* 86213907Sdyson * This implements the pipe buffer write mechanism. Note that only 86313907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 86413907Sdyson * If there are any characters in the pipe buffer, the direct write will 86513907Sdyson * be deferred until the receiving process grabs all of the bytes from 86613907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 86713907Sdyson */ 86813907Sdysonstatic int 86913907Sdysonpipe_direct_write(wpipe, uio) 87013907Sdyson struct pipe *wpipe; 87113907Sdyson struct uio *uio; 87213907Sdyson{ 87313907Sdyson int error; 87476364Salfred 87513951Sdysonretry: 87691362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 877133049Ssilby error = pipelock(wpipe, 1); 878133049Ssilby if (wpipe->pipe_state & PIPE_EOF) 879133049Ssilby error = EPIPE; 880133049Ssilby if (error) { 881133049Ssilby pipeunlock(wpipe); 882133049Ssilby goto error1; 883133049Ssilby } 88413907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 88576760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 88613951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 88713951Sdyson wakeup(wpipe); 88813951Sdyson } 889173750Sdumbbell pipeselwakeup(wpipe); 89013992Sdyson wpipe->pipe_state |= PIPE_WANTW; 891133049Ssilby pipeunlock(wpipe); 89291362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 89391362Salfred PRIBIO | PCATCH, "pipdww", 0); 89414802Sdyson if (error) 89513907Sdyson goto error1; 896133049Ssilby else 897133049Ssilby goto retry; 89813907Sdyson } 89913907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 90013951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 90176760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 90213951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 90313951Sdyson wakeup(wpipe); 90413951Sdyson } 905173750Sdumbbell pipeselwakeup(wpipe); 90613992Sdyson wpipe->pipe_state |= PIPE_WANTW; 907133049Ssilby pipeunlock(wpipe); 90891362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 90991362Salfred PRIBIO | PCATCH, "pipdwc", 0); 91014802Sdyson if (error) 91113907Sdyson goto error1; 912133049Ssilby else 913133049Ssilby goto retry; 91413907Sdyson } 91513907Sdyson 91613951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 91713951Sdyson 918119872Salc PIPE_UNLOCK(wpipe); 91913907Sdyson error = pipe_build_write_buffer(wpipe, uio); 920119872Salc PIPE_LOCK(wpipe); 92113907Sdyson if (error) { 92213907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 923133049Ssilby pipeunlock(wpipe); 92413907Sdyson goto error1; 92513907Sdyson } 92613907Sdyson 92713907Sdyson error = 0; 92813907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 92913907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 93013907Sdyson pipe_destroy_write_buffer(wpipe); 931112981Shsu pipeselwakeup(wpipe); 93213907Sdyson pipeunlock(wpipe); 93314802Sdyson error = EPIPE; 93414802Sdyson goto error1; 93513907Sdyson } 93613992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 93713992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 93813992Sdyson wakeup(wpipe); 93913992Sdyson } 94014037Sdyson pipeselwakeup(wpipe); 941133049Ssilby pipeunlock(wpipe); 94291362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 94391362Salfred "pipdwt", 0); 944133049Ssilby pipelock(wpipe, 0); 94513907Sdyson } 94613907Sdyson 947126131Sgreen if (wpipe->pipe_state & PIPE_EOF) 948126131Sgreen error = EPIPE; 94913907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 95013907Sdyson /* 95113907Sdyson * this bit of trickery substitutes a kernel buffer for 95213907Sdyson * the process that might be going away. 95313907Sdyson */ 95413907Sdyson pipe_clone_write_buffer(wpipe); 95513907Sdyson } else { 95613907Sdyson pipe_destroy_write_buffer(wpipe); 95713907Sdyson } 95813907Sdyson pipeunlock(wpipe); 95976760Salfred return (error); 96013907Sdyson 96113907Sdysonerror1: 96213907Sdyson wakeup(wpipe); 96376760Salfred return (error); 96413907Sdyson} 96514037Sdyson#endif 966124394Sdes 96716960Sdysonstatic int 968101941Srwatsonpipe_write(fp, uio, active_cred, flags, td) 96916960Sdyson struct file *fp; 97013907Sdyson struct uio *uio; 971101941Srwatson struct ucred *active_cred; 97283366Sjulian struct thread *td; 97345311Sdt int flags; 97413907Sdyson{ 97513675Sdyson int error = 0; 976133790Ssilby int desiredsize, orig_resid; 97716960Sdyson struct pipe *wpipe, *rpipe; 97816960Sdyson 979109153Sdillon rpipe = fp->f_data; 98016960Sdyson wpipe = rpipe->pipe_peer; 98116960Sdyson 98291395Salfred PIPE_LOCK(rpipe); 983133049Ssilby error = pipelock(wpipe, 1); 984133049Ssilby if (error) { 985133049Ssilby PIPE_UNLOCK(rpipe); 986133049Ssilby return (error); 987133049Ssilby } 98813675Sdyson /* 98913675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 99013675Sdyson */ 991179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 992179243Skib (wpipe->pipe_state & PIPE_EOF)) { 993133049Ssilby pipeunlock(wpipe); 99491395Salfred PIPE_UNLOCK(rpipe); 99576760Salfred return (EPIPE); 99613675Sdyson } 997101768Srwatson#ifdef MAC 998172930Srwatson error = mac_pipe_check_write(active_cred, wpipe->pipe_pair); 999101768Srwatson if (error) { 1000133049Ssilby pipeunlock(wpipe); 1001101768Srwatson PIPE_UNLOCK(rpipe); 1002101768Srwatson return (error); 1003101768Srwatson } 1004101768Srwatson#endif 100577676Sdillon ++wpipe->pipe_busy; 100613675Sdyson 1007133790Ssilby /* Choose a larger size if it's advantageous */ 1008133790Ssilby desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size); 1009133790Ssilby while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) { 1010133790Ssilby if (piperesizeallowed != 1) 1011133790Ssilby break; 1012133790Ssilby if (amountpipekva > maxpipekva / 2) 1013133790Ssilby break; 1014133790Ssilby if (desiredsize == BIG_PIPE_SIZE) 1015133790Ssilby break; 1016133790Ssilby desiredsize = desiredsize * 2; 1017133790Ssilby } 101817163Sdyson 1019133790Ssilby /* Choose a smaller size if we're in a OOM situation */ 1020133790Ssilby if ((amountpipekva > (3 * maxpipekva) / 4) && 1021133790Ssilby (wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 1022133790Ssilby (wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 1023133790Ssilby (piperesizeallowed == 1)) 1024133790Ssilby desiredsize = SMALL_PIPE_SIZE; 1025133790Ssilby 1026133790Ssilby /* Resize if the above determined that a new size was necessary */ 1027133790Ssilby if ((desiredsize != wpipe->pipe_buffer.size) && 1028133790Ssilby ((wpipe->pipe_state & PIPE_DIRECTW) == 0)) { 1029133049Ssilby PIPE_UNLOCK(wpipe); 1030133790Ssilby pipespace(wpipe, desiredsize); 1031133049Ssilby PIPE_LOCK(wpipe); 103213907Sdyson } 1033133790Ssilby if (wpipe->pipe_buffer.size == 0) { 1034133790Ssilby /* 1035133790Ssilby * This can only happen for reverse direction use of pipes 1036133790Ssilby * in a complete OOM situation. 1037133790Ssilby */ 1038133790Ssilby error = ENOMEM; 1039133790Ssilby --wpipe->pipe_busy; 1040133790Ssilby pipeunlock(wpipe); 1041133790Ssilby PIPE_UNLOCK(wpipe); 1042133790Ssilby return (error); 1043133790Ssilby } 104477676Sdillon 1045133049Ssilby pipeunlock(wpipe); 1046124394Sdes 104713913Sdyson orig_resid = uio->uio_resid; 104877676Sdillon 104913675Sdyson while (uio->uio_resid) { 105013907Sdyson int space; 105176760Salfred 1052133049Ssilby pipelock(wpipe, 0); 1053133049Ssilby if (wpipe->pipe_state & PIPE_EOF) { 1054133049Ssilby pipeunlock(wpipe); 1055133049Ssilby error = EPIPE; 1056133049Ssilby break; 1057133049Ssilby } 105814037Sdyson#ifndef PIPE_NODIRECT 105913907Sdyson /* 106013907Sdyson * If the transfer is large, we can gain performance if 106113907Sdyson * we do process-to-process copies directly. 106216416Sdyson * If the write is non-blocking, we don't use the 106316416Sdyson * direct write mechanism. 106458505Sdillon * 106558505Sdillon * The direct write mechanism will detect the reader going 106658505Sdillon * away on us. 106713907Sdyson */ 1068165347Spjd if (uio->uio_segflg == UIO_USERSPACE && 1069165347Spjd uio->uio_iov->iov_len >= PIPE_MINDIRECT && 1070165347Spjd wpipe->pipe_buffer.size >= PIPE_MINDIRECT && 1071127501Salc (fp->f_flag & FNONBLOCK) == 0) { 1072133049Ssilby pipeunlock(wpipe); 1073105009Salfred error = pipe_direct_write(wpipe, uio); 107476760Salfred if (error) 107513907Sdyson break; 107613907Sdyson continue; 107791362Salfred } 107814037Sdyson#endif 107913907Sdyson 108013907Sdyson /* 108113907Sdyson * Pipe buffered writes cannot be coincidental with 108213907Sdyson * direct writes. We wait until the currently executing 108313907Sdyson * direct write is completed before we start filling the 108458505Sdillon * pipe buffer. We break out if a signal occurs or the 108558505Sdillon * reader goes away. 108613907Sdyson */ 1087133049Ssilby if (wpipe->pipe_state & PIPE_DIRECTW) { 108813992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 108913992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 109013992Sdyson wakeup(wpipe); 109113992Sdyson } 1092173750Sdumbbell pipeselwakeup(wpipe); 1093173750Sdumbbell wpipe->pipe_state |= PIPE_WANTW; 1094133049Ssilby pipeunlock(wpipe); 109591395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 109691362Salfred "pipbww", 0); 109713907Sdyson if (error) 109813907Sdyson break; 1099133049Ssilby else 1100133049Ssilby continue; 110113907Sdyson } 110213907Sdyson 110313907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 110414644Sdyson 110514644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 110613913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 110713913Sdyson space = 0; 110813907Sdyson 1109118230Spb if (space > 0) { 1110133049Ssilby int size; /* Transfer size */ 1111133049Ssilby int segsize; /* first segment to transfer */ 111276760Salfred 1113133049Ssilby /* 1114133049Ssilby * Transfer size is minimum of uio transfer 1115133049Ssilby * and free space in pipe buffer. 1116133049Ssilby */ 1117133049Ssilby if (space > uio->uio_resid) 1118133049Ssilby size = uio->uio_resid; 1119133049Ssilby else 1120133049Ssilby size = space; 1121133049Ssilby /* 1122133049Ssilby * First segment to transfer is minimum of 1123133049Ssilby * transfer size and contiguous space in 1124133049Ssilby * pipe buffer. If first segment to transfer 1125133049Ssilby * is less than the transfer size, we've got 1126133049Ssilby * a wraparound in the buffer. 1127133049Ssilby */ 1128133049Ssilby segsize = wpipe->pipe_buffer.size - 1129133049Ssilby wpipe->pipe_buffer.in; 1130133049Ssilby if (segsize > size) 1131133049Ssilby segsize = size; 113254534Stegge 1133133049Ssilby /* Transfer first segment */ 1134133049Ssilby 1135133049Ssilby PIPE_UNLOCK(rpipe); 1136133049Ssilby error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1137133049Ssilby segsize, uio); 1138133049Ssilby PIPE_LOCK(rpipe); 1139133049Ssilby 1140133049Ssilby if (error == 0 && segsize < size) { 1141133049Ssilby KASSERT(wpipe->pipe_buffer.in + segsize == 1142133049Ssilby wpipe->pipe_buffer.size, 1143133049Ssilby ("Pipe buffer wraparound disappeared")); 114454534Stegge /* 1145133049Ssilby * Transfer remaining part now, to 1146133049Ssilby * support atomic writes. Wraparound 1147133049Ssilby * happened. 114854534Stegge */ 1149124394Sdes 115091395Salfred PIPE_UNLOCK(rpipe); 1151133049Ssilby error = uiomove( 1152133049Ssilby &wpipe->pipe_buffer.buffer[0], 1153133049Ssilby size - segsize, uio); 115491395Salfred PIPE_LOCK(rpipe); 1155133049Ssilby } 1156133049Ssilby if (error == 0) { 1157133049Ssilby wpipe->pipe_buffer.in += size; 1158133049Ssilby if (wpipe->pipe_buffer.in >= 1159133049Ssilby wpipe->pipe_buffer.size) { 1160133049Ssilby KASSERT(wpipe->pipe_buffer.in == 1161133049Ssilby size - segsize + 1162133049Ssilby wpipe->pipe_buffer.size, 1163133049Ssilby ("Expected wraparound bad")); 1164133049Ssilby wpipe->pipe_buffer.in = size - segsize; 116554534Stegge } 1166124394Sdes 1167133049Ssilby wpipe->pipe_buffer.cnt += size; 1168133049Ssilby KASSERT(wpipe->pipe_buffer.cnt <= 1169133049Ssilby wpipe->pipe_buffer.size, 1170133049Ssilby ("Pipe buffer overflow")); 117113675Sdyson } 1172133049Ssilby pipeunlock(wpipe); 1173153484Sdelphij if (error != 0) 1174153484Sdelphij break; 117513675Sdyson } else { 117613675Sdyson /* 117713675Sdyson * If the "read-side" has been blocked, wake it up now. 117813675Sdyson */ 117913675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 118013675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 118113675Sdyson wakeup(wpipe); 118213675Sdyson } 118314037Sdyson 118413675Sdyson /* 118513675Sdyson * don't block on non-blocking I/O 118613675Sdyson */ 118716960Sdyson if (fp->f_flag & FNONBLOCK) { 118813907Sdyson error = EAGAIN; 1189133049Ssilby pipeunlock(wpipe); 119013675Sdyson break; 119113675Sdyson } 119213907Sdyson 119314037Sdyson /* 119414037Sdyson * We have no more space and have something to offer, 119529356Speter * wake up select/poll. 119614037Sdyson */ 119714037Sdyson pipeselwakeup(wpipe); 119814037Sdyson 119913675Sdyson wpipe->pipe_state |= PIPE_WANTW; 1200133049Ssilby pipeunlock(wpipe); 120191395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 120291362Salfred PRIBIO | PCATCH, "pipewr", 0); 120376760Salfred if (error != 0) 120413675Sdyson break; 120513675Sdyson } 120613675Sdyson } 120713675Sdyson 1208133049Ssilby pipelock(wpipe, 0); 120914644Sdyson --wpipe->pipe_busy; 121077676Sdillon 121176760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 121276760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 121313675Sdyson wakeup(wpipe); 121413675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 121513675Sdyson /* 121613675Sdyson * If we have put any characters in the buffer, we wake up 121713675Sdyson * the reader. 121813675Sdyson */ 121913675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 122013675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 122113675Sdyson wakeup(wpipe); 122213675Sdyson } 122313675Sdyson } 122413909Sdyson 122513909Sdyson /* 122613909Sdyson * Don't return EPIPE if I/O was successful 122713909Sdyson */ 122813907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 122977676Sdillon (uio->uio_resid == 0) && 123077676Sdillon (error == EPIPE)) { 123113907Sdyson error = 0; 123277676Sdillon } 123313913Sdyson 123424101Sbde if (error == 0) 123555112Sbde vfs_timestamp(&wpipe->pipe_mtime); 123624101Sbde 123714037Sdyson /* 123814037Sdyson * We have something to offer, 123929356Speter * wake up select/poll. 124014037Sdyson */ 124114177Sdyson if (wpipe->pipe_buffer.cnt) 124214037Sdyson pipeselwakeup(wpipe); 124313907Sdyson 1244133049Ssilby pipeunlock(wpipe); 124591395Salfred PIPE_UNLOCK(rpipe); 124676760Salfred return (error); 124713675Sdyson} 124813675Sdyson 1249175140Sjhb/* ARGSUSED */ 1250175140Sjhbstatic int 1251175140Sjhbpipe_truncate(fp, length, active_cred, td) 1252175140Sjhb struct file *fp; 1253175140Sjhb off_t length; 1254175140Sjhb struct ucred *active_cred; 1255175140Sjhb struct thread *td; 1256175140Sjhb{ 1257175140Sjhb 1258175140Sjhb return (EINVAL); 1259175140Sjhb} 1260175140Sjhb 126113675Sdyson/* 126213675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 126313675Sdyson */ 1264104094Sphkstatic int 1265102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td) 126613675Sdyson struct file *fp; 126736735Sdfr u_long cmd; 126899009Salfred void *data; 1269102003Srwatson struct ucred *active_cred; 127083366Sjulian struct thread *td; 127113675Sdyson{ 1272109153Sdillon struct pipe *mpipe = fp->f_data; 1273101768Srwatson int error; 127413675Sdyson 1275104269Srwatson PIPE_LOCK(mpipe); 1276104269Srwatson 1277104269Srwatson#ifdef MAC 1278172930Srwatson error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data); 1279121970Srwatson if (error) { 1280121970Srwatson PIPE_UNLOCK(mpipe); 1281101768Srwatson return (error); 1282121970Srwatson } 1283101768Srwatson#endif 1284101768Srwatson 1285137752Sphk error = 0; 128613675Sdyson switch (cmd) { 128713675Sdyson 128813675Sdyson case FIONBIO: 1289137752Sphk break; 129013675Sdyson 129113675Sdyson case FIOASYNC: 129213675Sdyson if (*(int *)data) { 129313675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 129413675Sdyson } else { 129513675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 129613675Sdyson } 1297137752Sphk break; 129813675Sdyson 129913675Sdyson case FIONREAD: 130014037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 130114037Sdyson *(int *)data = mpipe->pipe_map.cnt; 130214037Sdyson else 130314037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 1304137752Sphk break; 130513675Sdyson 130641086Struckman case FIOSETOWN: 1307138032Srwatson PIPE_UNLOCK(mpipe); 1308137752Sphk error = fsetown(*(int *)data, &mpipe->pipe_sigio); 1309138032Srwatson goto out_unlocked; 131041086Struckman 131141086Struckman case FIOGETOWN: 1312104393Struckman *(int *)data = fgetown(&mpipe->pipe_sigio); 1313137752Sphk break; 131413675Sdyson 131541086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 131641086Struckman case TIOCSPGRP: 1317138032Srwatson PIPE_UNLOCK(mpipe); 1318137752Sphk error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); 1319138032Srwatson goto out_unlocked; 132041086Struckman 132141086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 132218863Sdyson case TIOCGPGRP: 1323104393Struckman *(int *)data = -fgetown(&mpipe->pipe_sigio); 1324137752Sphk break; 132513675Sdyson 1326137752Sphk default: 1327137752Sphk error = ENOTTY; 1328137764Sphk break; 132913675Sdyson } 1330104269Srwatson PIPE_UNLOCK(mpipe); 1331138032Srwatsonout_unlocked: 1332137752Sphk return (error); 133313675Sdyson} 133413675Sdyson 1335104094Sphkstatic int 1336101983Srwatsonpipe_poll(fp, events, active_cred, td) 133713675Sdyson struct file *fp; 133829356Speter int events; 1339101983Srwatson struct ucred *active_cred; 134083366Sjulian struct thread *td; 134113675Sdyson{ 1342109153Sdillon struct pipe *rpipe = fp->f_data; 134313675Sdyson struct pipe *wpipe; 134429356Speter int revents = 0; 1345101768Srwatson#ifdef MAC 1346101768Srwatson int error; 1347101768Srwatson#endif 134813675Sdyson 134913675Sdyson wpipe = rpipe->pipe_peer; 135091362Salfred PIPE_LOCK(rpipe); 1351101768Srwatson#ifdef MAC 1352172930Srwatson error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair); 1353101768Srwatson if (error) 1354101768Srwatson goto locked_error; 1355101768Srwatson#endif 135629356Speter if (events & (POLLIN | POLLRDNORM)) 135729356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 135829356Speter (rpipe->pipe_buffer.cnt > 0) || 135929356Speter (rpipe->pipe_state & PIPE_EOF)) 136029356Speter revents |= events & (POLLIN | POLLRDNORM); 136113675Sdyson 136229356Speter if (events & (POLLOUT | POLLWRNORM)) 1363179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1364179243Skib (wpipe->pipe_state & PIPE_EOF) || 136543311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 136643311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 136729356Speter revents |= events & (POLLOUT | POLLWRNORM); 136813675Sdyson 136929356Speter if ((rpipe->pipe_state & PIPE_EOF) || 1370179243Skib wpipe->pipe_present != PIPE_ACTIVE || 137129356Speter (wpipe->pipe_state & PIPE_EOF)) 137229356Speter revents |= POLLHUP; 137329356Speter 137429356Speter if (revents == 0) { 137529356Speter if (events & (POLLIN | POLLRDNORM)) { 137683805Sjhb selrecord(td, &rpipe->pipe_sel); 1377174647Sjeff if (SEL_WAITING(&rpipe->pipe_sel)) 1378174647Sjeff rpipe->pipe_state |= PIPE_SEL; 137913675Sdyson } 138013675Sdyson 138129356Speter if (events & (POLLOUT | POLLWRNORM)) { 138283805Sjhb selrecord(td, &wpipe->pipe_sel); 1383174647Sjeff if (SEL_WAITING(&wpipe->pipe_sel)) 1384174647Sjeff wpipe->pipe_state |= PIPE_SEL; 138513907Sdyson } 138613675Sdyson } 1387101768Srwatson#ifdef MAC 1388101768Srwatsonlocked_error: 1389101768Srwatson#endif 139091362Salfred PIPE_UNLOCK(rpipe); 139129356Speter 139229356Speter return (revents); 139313675Sdyson} 139413675Sdyson 139598989Salfred/* 139698989Salfred * We shouldn't need locks here as we're doing a read and this should 139798989Salfred * be a natural race. 139898989Salfred */ 139952983Speterstatic int 1400101983Srwatsonpipe_stat(fp, ub, active_cred, td) 140152983Speter struct file *fp; 140252983Speter struct stat *ub; 1403101983Srwatson struct ucred *active_cred; 140483366Sjulian struct thread *td; 140513675Sdyson{ 1406109153Sdillon struct pipe *pipe = fp->f_data; 1407101768Srwatson#ifdef MAC 1408101768Srwatson int error; 140952983Speter 1410104269Srwatson PIPE_LOCK(pipe); 1411172930Srwatson error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); 1412104269Srwatson PIPE_UNLOCK(pipe); 1413101768Srwatson if (error) 1414101768Srwatson return (error); 1415101768Srwatson#endif 1416100527Salfred bzero(ub, sizeof(*ub)); 141717124Sbde ub->st_mode = S_IFIFO; 1418133790Ssilby ub->st_blksize = PAGE_SIZE; 1419132436Ssilby if (pipe->pipe_state & PIPE_DIRECTW) 1420132436Ssilby ub->st_size = pipe->pipe_map.cnt; 1421132436Ssilby else 1422132436Ssilby ub->st_size = pipe->pipe_buffer.cnt; 142313675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 142434901Sphk ub->st_atimespec = pipe->pipe_atime; 142534901Sphk ub->st_mtimespec = pipe->pipe_mtime; 142634901Sphk ub->st_ctimespec = pipe->pipe_ctime; 142760404Schris ub->st_uid = fp->f_cred->cr_uid; 142860404Schris ub->st_gid = fp->f_cred->cr_gid; 142917124Sbde /* 143060404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 143117124Sbde * XXX (st_dev, st_ino) should be unique. 143217124Sbde */ 143376760Salfred return (0); 143413675Sdyson} 143513675Sdyson 143613675Sdyson/* ARGSUSED */ 143713675Sdysonstatic int 143883366Sjulianpipe_close(fp, td) 143913675Sdyson struct file *fp; 144083366Sjulian struct thread *td; 144113675Sdyson{ 1442109153Sdillon struct pipe *cpipe = fp->f_data; 144316322Sgpalmer 144449413Sgreen fp->f_ops = &badfileops; 1445109153Sdillon fp->f_data = NULL; 144696122Salfred funsetown(&cpipe->pipe_sigio); 144713675Sdyson pipeclose(cpipe); 144876760Salfred return (0); 144913675Sdyson} 145013675Sdyson 145176364Salfredstatic void 145276364Salfredpipe_free_kmem(cpipe) 145376364Salfred struct pipe *cpipe; 145476364Salfred{ 145591412Salfred 1456125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), 1457125293Srwatson ("pipe_free_kmem: pipe mutex locked")); 145876364Salfred 145976364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 1460110816Salc atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size); 1461118764Ssilby vm_map_remove(pipe_map, 1462118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer, 1463118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 146476364Salfred cpipe->pipe_buffer.buffer = NULL; 146576364Salfred } 146676364Salfred#ifndef PIPE_NODIRECT 1467127501Salc { 146876364Salfred cpipe->pipe_map.cnt = 0; 146976364Salfred cpipe->pipe_map.pos = 0; 147076364Salfred cpipe->pipe_map.npages = 0; 147176364Salfred } 147276364Salfred#endif 147376364Salfred} 147476364Salfred 147513675Sdyson/* 147613675Sdyson * shutdown the pipe 147713675Sdyson */ 147813675Sdysonstatic void 147913675Sdysonpipeclose(cpipe) 148013675Sdyson struct pipe *cpipe; 148113675Sdyson{ 1482125293Srwatson struct pipepair *pp; 148313907Sdyson struct pipe *ppipe; 148476364Salfred 1485125293Srwatson KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); 148691968Salfred 1487125293Srwatson PIPE_LOCK(cpipe); 1488133049Ssilby pipelock(cpipe, 0); 1489125293Srwatson pp = cpipe->pipe_pair; 149091968Salfred 149191968Salfred pipeselwakeup(cpipe); 149213907Sdyson 149391968Salfred /* 149491968Salfred * If the other side is blocked, wake it up saying that 149591968Salfred * we want to close it down. 149691968Salfred */ 1497126131Sgreen cpipe->pipe_state |= PIPE_EOF; 149891968Salfred while (cpipe->pipe_busy) { 149991968Salfred wakeup(cpipe); 1500126131Sgreen cpipe->pipe_state |= PIPE_WANT; 1501133049Ssilby pipeunlock(cpipe); 150291968Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1503133049Ssilby pipelock(cpipe, 0); 150491968Salfred } 150513675Sdyson 1506101768Srwatson 150791968Salfred /* 1508125293Srwatson * Disconnect from peer, if any. 150991968Salfred */ 1510125293Srwatson ppipe = cpipe->pipe_peer; 1511179243Skib if (ppipe->pipe_present == PIPE_ACTIVE) { 151291968Salfred pipeselwakeup(ppipe); 151313907Sdyson 151491968Salfred ppipe->pipe_state |= PIPE_EOF; 151591968Salfred wakeup(ppipe); 1516133741Sjmg KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0); 151791968Salfred } 1518125293Srwatson 151991968Salfred /* 1520125293Srwatson * Mark this endpoint as free. Release kmem resources. We 1521125293Srwatson * don't mark this endpoint as unused until we've finished 1522125293Srwatson * doing that, or the pipe might disappear out from under 1523125293Srwatson * us. 152491968Salfred */ 1525125293Srwatson PIPE_UNLOCK(cpipe); 1526125293Srwatson pipe_free_kmem(cpipe); 1527125293Srwatson PIPE_LOCK(cpipe); 1528179243Skib cpipe->pipe_present = PIPE_CLOSING; 1529126131Sgreen pipeunlock(cpipe); 1530179243Skib 1531179243Skib /* 1532179243Skib * knlist_clear() may sleep dropping the PIPE_MTX. Set the 1533179243Skib * PIPE_FINALIZED, that allows other end to free the 1534179243Skib * pipe_pair, only after the knotes are completely dismantled. 1535179243Skib */ 1536133741Sjmg knlist_clear(&cpipe->pipe_sel.si_note, 1); 1537179243Skib cpipe->pipe_present = PIPE_FINALIZED; 1538133741Sjmg knlist_destroy(&cpipe->pipe_sel.si_note); 1539125293Srwatson 1540125293Srwatson /* 1541125293Srwatson * If both endpoints are now closed, release the memory for the 1542125293Srwatson * pipe pair. If not, unlock. 1543125293Srwatson */ 1544179243Skib if (ppipe->pipe_present == PIPE_FINALIZED) { 154591968Salfred PIPE_UNLOCK(cpipe); 1546125293Srwatson#ifdef MAC 1547172930Srwatson mac_pipe_destroy(pp); 1548125293Srwatson#endif 1549125293Srwatson uma_zfree(pipe_zone, cpipe->pipe_pair); 1550125293Srwatson } else 1551125293Srwatson PIPE_UNLOCK(cpipe); 155213675Sdyson} 155359288Sjlemon 155472521Sjlemon/*ARGSUSED*/ 155559288Sjlemonstatic int 155672521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 155759288Sjlemon{ 155889306Salfred struct pipe *cpipe; 155959288Sjlemon 1560109153Sdillon cpipe = kn->kn_fp->f_data; 1561126131Sgreen PIPE_LOCK(cpipe); 156272521Sjlemon switch (kn->kn_filter) { 156372521Sjlemon case EVFILT_READ: 156472521Sjlemon kn->kn_fop = &pipe_rfiltops; 156572521Sjlemon break; 156672521Sjlemon case EVFILT_WRITE: 156772521Sjlemon kn->kn_fop = &pipe_wfiltops; 1568179243Skib if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) { 1569101382Sdes /* other end of pipe has been closed */ 1570126131Sgreen PIPE_UNLOCK(cpipe); 1571118929Sjmg return (EPIPE); 1572126131Sgreen } 1573126131Sgreen cpipe = cpipe->pipe_peer; 157472521Sjlemon break; 157572521Sjlemon default: 1576126131Sgreen PIPE_UNLOCK(cpipe); 1577133741Sjmg return (EINVAL); 157872521Sjlemon } 157978292Sjlemon 1580133741Sjmg knlist_add(&cpipe->pipe_sel.si_note, kn, 1); 158191372Salfred PIPE_UNLOCK(cpipe); 158259288Sjlemon return (0); 158359288Sjlemon} 158459288Sjlemon 158559288Sjlemonstatic void 158659288Sjlemonfilt_pipedetach(struct knote *kn) 158759288Sjlemon{ 1588121018Sjmg struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 158959288Sjlemon 1590126131Sgreen PIPE_LOCK(cpipe); 1591179242Skib if (kn->kn_filter == EVFILT_WRITE) 1592121018Sjmg cpipe = cpipe->pipe_peer; 1593133741Sjmg knlist_remove(&cpipe->pipe_sel.si_note, kn, 1); 159491372Salfred PIPE_UNLOCK(cpipe); 159559288Sjlemon} 159659288Sjlemon 159759288Sjlemon/*ARGSUSED*/ 159859288Sjlemonstatic int 159959288Sjlemonfilt_piperead(struct knote *kn, long hint) 160059288Sjlemon{ 1601109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 160259288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 1603133741Sjmg int ret; 160459288Sjlemon 160591372Salfred PIPE_LOCK(rpipe); 160659288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 160759288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 160859288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 160959288Sjlemon 161059288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 1611179243Skib wpipe->pipe_present != PIPE_ACTIVE || 1612179243Skib (wpipe->pipe_state & PIPE_EOF)) { 161391372Salfred kn->kn_flags |= EV_EOF; 161491372Salfred PIPE_UNLOCK(rpipe); 161559288Sjlemon return (1); 161659288Sjlemon } 1617133741Sjmg ret = kn->kn_data > 0; 161891372Salfred PIPE_UNLOCK(rpipe); 1619133741Sjmg return ret; 162059288Sjlemon} 162159288Sjlemon 162259288Sjlemon/*ARGSUSED*/ 162359288Sjlemonstatic int 162459288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 162559288Sjlemon{ 1626109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 162759288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 162859288Sjlemon 162991372Salfred PIPE_LOCK(rpipe); 1630179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1631179243Skib (wpipe->pipe_state & PIPE_EOF)) { 163259288Sjlemon kn->kn_data = 0; 1633124394Sdes kn->kn_flags |= EV_EOF; 163491372Salfred PIPE_UNLOCK(rpipe); 163559288Sjlemon return (1); 163659288Sjlemon } 163759288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 163865855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 163959288Sjlemon kn->kn_data = 0; 164059288Sjlemon 164191372Salfred PIPE_UNLOCK(rpipe); 164259288Sjlemon return (kn->kn_data >= PIPE_BUF); 164359288Sjlemon} 1644