sys_pipe.c revision 216511
1139804Simp/*- 213675Sdyson * Copyright (c) 1996 John S. Dyson 313675Sdyson * All rights reserved. 413675Sdyson * 513675Sdyson * Redistribution and use in source and binary forms, with or without 613675Sdyson * modification, are permitted provided that the following conditions 713675Sdyson * are met: 813675Sdyson * 1. Redistributions of source code must retain the above copyright 913675Sdyson * notice immediately at the beginning of the file, without modification, 1013675Sdyson * this list of conditions, and the following disclaimer. 1113675Sdyson * 2. Redistributions in binary form must reproduce the above copyright 1213675Sdyson * notice, this list of conditions and the following disclaimer in the 1313675Sdyson * documentation and/or other materials provided with the distribution. 1413675Sdyson * 3. Absolutely no warranty of function or purpose is made by the author 1513675Sdyson * John S. Dyson. 1614037Sdyson * 4. Modifications may be freely made to this file if the above conditions 1713675Sdyson * are met. 1813675Sdyson */ 1913675Sdyson 2013675Sdyson/* 2113675Sdyson * This file contains a high-performance replacement for the socket-based 2213675Sdyson * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2313675Sdyson * all features of sockets, but does do everything that pipes normally 2413675Sdyson * do. 2513675Sdyson */ 2613675Sdyson 2713907Sdyson/* 2813907Sdyson * This code has two modes of operation, a small write mode and a large 2913907Sdyson * write mode. The small write mode acts like conventional pipes with 3013907Sdyson * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3113907Sdyson * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3213907Sdyson * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 3313907Sdyson * the receiving process can copy it directly from the pages in the sending 3413907Sdyson * process. 3513907Sdyson * 3613907Sdyson * If the sending process receives a signal, it is possible that it will 3713913Sdyson * go away, and certainly its address space can change, because control 3813907Sdyson * is returned back to the user-mode side. In that case, the pipe code 3913907Sdyson * arranges to copy the buffer supplied by the user process, to a pageable 4013907Sdyson * kernel buffer, and the receiving process will grab the data from the 4113907Sdyson * pageable kernel buffer. Since signals don't happen all that often, 4213907Sdyson * the copy operation is normally eliminated. 4313907Sdyson * 4413907Sdyson * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 4513907Sdyson * happen for small transfers so that the system will not spend all of 46118764Ssilby * its time context switching. 47117325Ssilby * 48118764Ssilby * In order to limit the resource use of pipes, two sysctls exist: 49117325Ssilby * 50118764Ssilby * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 51133790Ssilby * address space available to us in pipe_map. This value is normally 52133790Ssilby * autotuned, but may also be loader tuned. 53117325Ssilby * 54133790Ssilby * kern.ipc.pipekva - This read-only sysctl tracks the current amount of 55133790Ssilby * memory in use by pipes. 56117325Ssilby * 57133790Ssilby * Based on how large pipekva is relative to maxpipekva, the following 58133790Ssilby * will happen: 59117325Ssilby * 60133790Ssilby * 0% - 50%: 61133790Ssilby * New pipes are given 16K of memory backing, pipes may dynamically 62133790Ssilby * grow to as large as 64K where needed. 63133790Ssilby * 50% - 75%: 64133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 65133790Ssilby * existing pipes may NOT grow. 66133790Ssilby * 75% - 100%: 67133790Ssilby * New pipes are given 4K (or PAGE_SIZE) of memory backing, 68133790Ssilby * existing pipes will be shrunk down to 4K whenever possible. 69133049Ssilby * 70133790Ssilby * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0. If 71133790Ssilby * that is set, the only resize that will occur is the 0 -> SMALL_PIPE_SIZE 72133790Ssilby * resize which MUST occur for reverse-direction pipes when they are 73133790Ssilby * first used. 74133790Ssilby * 75133790Ssilby * Additional information about the current state of pipes may be obtained 76133790Ssilby * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail, 77133790Ssilby * and kern.ipc.piperesizefail. 78133790Ssilby * 79133049Ssilby * Locking rules: There are two locks present here: A mutex, used via 80133049Ssilby * PIPE_LOCK, and a flag, used via pipelock(). All locking is done via 81133049Ssilby * the flag, as mutexes can not persist over uiomove. The mutex 82133049Ssilby * exists only to guard access to the flag, and is not in itself a 83133790Ssilby * locking mechanism. Also note that there is only a single mutex for 84133790Ssilby * both directions of a pipe. 85133049Ssilby * 86133049Ssilby * As pipelock() may have to sleep before it can acquire the flag, it 87133049Ssilby * is important to reread all data after a call to pipelock(); everything 88133049Ssilby * in the structure may have changed. 8913907Sdyson */ 9013907Sdyson 91116182Sobrien#include <sys/cdefs.h> 92116182Sobrien__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 216511 2010-12-17 22:41:22Z alc $"); 93116182Sobrien 9413675Sdyson#include <sys/param.h> 9513675Sdyson#include <sys/systm.h> 9624131Sbde#include <sys/fcntl.h> 9713675Sdyson#include <sys/file.h> 9813675Sdyson#include <sys/filedesc.h> 9924206Sbde#include <sys/filio.h> 10091372Salfred#include <sys/kernel.h> 10176166Smarkm#include <sys/lock.h> 10276827Salfred#include <sys/mutex.h> 10324206Sbde#include <sys/ttycom.h> 10413675Sdyson#include <sys/stat.h> 10591968Salfred#include <sys/malloc.h> 10629356Speter#include <sys/poll.h> 10770834Swollman#include <sys/selinfo.h> 10813675Sdyson#include <sys/signalvar.h> 109184849Sed#include <sys/syscallsubr.h> 110117325Ssilby#include <sys/sysctl.h> 11113675Sdyson#include <sys/sysproto.h> 11213675Sdyson#include <sys/pipe.h> 11376166Smarkm#include <sys/proc.h> 11455112Sbde#include <sys/vnode.h> 11534924Sbde#include <sys/uio.h> 11659288Sjlemon#include <sys/event.h> 11713675Sdyson 118163606Srwatson#include <security/mac/mac_framework.h> 119163606Srwatson 12013675Sdyson#include <vm/vm.h> 12113675Sdyson#include <vm/vm_param.h> 12213675Sdyson#include <vm/vm_object.h> 12313675Sdyson#include <vm/vm_kern.h> 12413675Sdyson#include <vm/vm_extern.h> 12513675Sdyson#include <vm/pmap.h> 12613675Sdyson#include <vm/vm_map.h> 12713907Sdyson#include <vm/vm_page.h> 12892751Sjeff#include <vm/uma.h> 12913675Sdyson 13014037Sdyson/* 13114037Sdyson * Use this define if you want to disable *fancy* VM things. Expect an 13214037Sdyson * approx 30% decrease in transfer rate. This could be useful for 13314037Sdyson * NetBSD or OpenBSD. 13414037Sdyson */ 13514037Sdyson/* #define PIPE_NODIRECT */ 13614037Sdyson 13714037Sdyson/* 13814037Sdyson * interfaces to the outside world 13914037Sdyson */ 140108255Sphkstatic fo_rdwr_t pipe_read; 141108255Sphkstatic fo_rdwr_t pipe_write; 142175140Sjhbstatic fo_truncate_t pipe_truncate; 143108255Sphkstatic fo_ioctl_t pipe_ioctl; 144108255Sphkstatic fo_poll_t pipe_poll; 145108255Sphkstatic fo_kqfilter_t pipe_kqfilter; 146108255Sphkstatic fo_stat_t pipe_stat; 147108255Sphkstatic fo_close_t pipe_close; 14813675Sdyson 14972521Sjlemonstatic struct fileops pipeops = { 150116546Sphk .fo_read = pipe_read, 151116546Sphk .fo_write = pipe_write, 152175140Sjhb .fo_truncate = pipe_truncate, 153116546Sphk .fo_ioctl = pipe_ioctl, 154116546Sphk .fo_poll = pipe_poll, 155116546Sphk .fo_kqfilter = pipe_kqfilter, 156116546Sphk .fo_stat = pipe_stat, 157116546Sphk .fo_close = pipe_close, 158116546Sphk .fo_flags = DFLAG_PASSABLE 15972521Sjlemon}; 16013675Sdyson 16159288Sjlemonstatic void filt_pipedetach(struct knote *kn); 16259288Sjlemonstatic int filt_piperead(struct knote *kn, long hint); 16359288Sjlemonstatic int filt_pipewrite(struct knote *kn, long hint); 16459288Sjlemon 165197134Srwatsonstatic struct filterops pipe_rfiltops = { 166197134Srwatson .f_isfd = 1, 167197134Srwatson .f_detach = filt_pipedetach, 168197134Srwatson .f_event = filt_piperead 169197134Srwatson}; 170197134Srwatsonstatic struct filterops pipe_wfiltops = { 171197134Srwatson .f_isfd = 1, 172197134Srwatson .f_detach = filt_pipedetach, 173197134Srwatson .f_event = filt_pipewrite 174197134Srwatson}; 17559288Sjlemon 17613675Sdyson/* 17713675Sdyson * Default pipe buffer size(s), this can be kind-of large now because pipe 17813675Sdyson * space is pageable. The pipe code will try to maintain locality of 17913675Sdyson * reference for performance reasons, so small amounts of outstanding I/O 18013675Sdyson * will not wipe the cache. 18113675Sdyson */ 18213907Sdyson#define MINPIPESIZE (PIPE_SIZE/3) 18313907Sdyson#define MAXPIPESIZE (2*PIPE_SIZE/3) 18413675Sdyson 185189649Sjhbstatic long amountpipekva; 186133790Ssilbystatic int pipefragretry; 187133790Ssilbystatic int pipeallocfail; 188133790Ssilbystatic int piperesizefail; 189133790Ssilbystatic int piperesizeallowed = 1; 19013907Sdyson 191189649SjhbSYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN, 192117325Ssilby &maxpipekva, 0, "Pipe KVA limit"); 193189649SjhbSYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 194117325Ssilby &amountpipekva, 0, "Pipe KVA usage"); 195133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD, 196133790Ssilby &pipefragretry, 0, "Pipe allocation retries due to fragmentation"); 197133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD, 198133790Ssilby &pipeallocfail, 0, "Pipe allocation failures"); 199133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD, 200133790Ssilby &piperesizefail, 0, "Pipe resize failures"); 201133790SsilbySYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, 202133790Ssilby &piperesizeallowed, 0, "Pipe resizing allowed"); 203117325Ssilby 20491413Salfredstatic void pipeinit(void *dummy __unused); 20591413Salfredstatic void pipeclose(struct pipe *cpipe); 20691413Salfredstatic void pipe_free_kmem(struct pipe *cpipe); 207133790Ssilbystatic int pipe_create(struct pipe *pipe, int backing); 20891413Salfredstatic __inline int pipelock(struct pipe *cpipe, int catch); 20991413Salfredstatic __inline void pipeunlock(struct pipe *cpipe); 21091413Salfredstatic __inline void pipeselwakeup(struct pipe *cpipe); 21114037Sdyson#ifndef PIPE_NODIRECT 21291413Salfredstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 21391413Salfredstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 21491413Salfredstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 21591413Salfredstatic void pipe_clone_write_buffer(struct pipe *wpipe); 21614037Sdyson#endif 21791413Salfredstatic int pipespace(struct pipe *cpipe, int size); 218132579Srwatsonstatic int pipespace_new(struct pipe *cpipe, int size); 21913675Sdyson 220132987Sgreenstatic int pipe_zone_ctor(void *mem, int size, void *arg, int flags); 221132987Sgreenstatic int pipe_zone_init(void *mem, int size, int flags); 222125293Srwatsonstatic void pipe_zone_fini(void *mem, int size); 223125293Srwatson 22492751Sjeffstatic uma_zone_t pipe_zone; 22527899Sdyson 22691372SalfredSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 22791372Salfred 22891372Salfredstatic void 22991372Salfredpipeinit(void *dummy __unused) 23091372Salfred{ 231118880Salc 232170022Srwatson pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair), 233170022Srwatson pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, 234125293Srwatson UMA_ALIGN_PTR, 0); 235118880Salc KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 23691372Salfred} 23791372Salfred 238132987Sgreenstatic int 239132987Sgreenpipe_zone_ctor(void *mem, int size, void *arg, int flags) 240125293Srwatson{ 241125293Srwatson struct pipepair *pp; 242125293Srwatson struct pipe *rpipe, *wpipe; 243125293Srwatson 244125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); 245125293Srwatson 246125293Srwatson pp = (struct pipepair *)mem; 247125293Srwatson 248125293Srwatson /* 249125293Srwatson * We zero both pipe endpoints to make sure all the kmem pointers 250125293Srwatson * are NULL, flag fields are zero'd, etc. We timestamp both 251125293Srwatson * endpoints with the same time. 252125293Srwatson */ 253125293Srwatson rpipe = &pp->pp_rpipe; 254125293Srwatson bzero(rpipe, sizeof(*rpipe)); 255125293Srwatson vfs_timestamp(&rpipe->pipe_ctime); 256125293Srwatson rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; 257125293Srwatson 258125293Srwatson wpipe = &pp->pp_wpipe; 259125293Srwatson bzero(wpipe, sizeof(*wpipe)); 260125293Srwatson wpipe->pipe_ctime = rpipe->pipe_ctime; 261125293Srwatson wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; 262125293Srwatson 263125293Srwatson rpipe->pipe_peer = wpipe; 264125293Srwatson rpipe->pipe_pair = pp; 265125293Srwatson wpipe->pipe_peer = rpipe; 266125293Srwatson wpipe->pipe_pair = pp; 267125293Srwatson 268125293Srwatson /* 269125293Srwatson * Mark both endpoints as present; they will later get free'd 270125293Srwatson * one at a time. When both are free'd, then the whole pair 271125293Srwatson * is released. 272125293Srwatson */ 273179243Skib rpipe->pipe_present = PIPE_ACTIVE; 274179243Skib wpipe->pipe_present = PIPE_ACTIVE; 275125293Srwatson 276125293Srwatson /* 277125293Srwatson * Eventually, the MAC Framework may initialize the label 278125293Srwatson * in ctor or init, but for now we do it elswhere to avoid 279125293Srwatson * blocking in ctor or init. 280125293Srwatson */ 281125293Srwatson pp->pp_label = NULL; 282125293Srwatson 283132987Sgreen return (0); 284125293Srwatson} 285125293Srwatson 286132987Sgreenstatic int 287132987Sgreenpipe_zone_init(void *mem, int size, int flags) 288125293Srwatson{ 289125293Srwatson struct pipepair *pp; 290125293Srwatson 291125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); 292125293Srwatson 293125293Srwatson pp = (struct pipepair *)mem; 294125293Srwatson 295125293Srwatson mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 296132987Sgreen return (0); 297125293Srwatson} 298125293Srwatson 299125293Srwatsonstatic void 300125293Srwatsonpipe_zone_fini(void *mem, int size) 301125293Srwatson{ 302125293Srwatson struct pipepair *pp; 303125293Srwatson 304125293Srwatson KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); 305125293Srwatson 306125293Srwatson pp = (struct pipepair *)mem; 307125293Srwatson 308125293Srwatson mtx_destroy(&pp->pp_mtx); 309125293Srwatson} 310125293Srwatson 31113675Sdyson/* 312167232Srwatson * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let 313167232Srwatson * the zone pick up the pieces via pipeclose(). 31413675Sdyson */ 31513675Sdysonint 316184849Sedkern_pipe(struct thread *td, int fildes[2]) 31713675Sdyson{ 31883366Sjulian struct filedesc *fdp = td->td_proc->p_fd; 31913675Sdyson struct file *rf, *wf; 320125293Srwatson struct pipepair *pp; 32113675Sdyson struct pipe *rpipe, *wpipe; 32213675Sdyson int fd, error; 32327899Sdyson 324125293Srwatson pp = uma_zalloc(pipe_zone, M_WAITOK); 325125293Srwatson#ifdef MAC 326125293Srwatson /* 327126249Srwatson * The MAC label is shared between the connected endpoints. As a 328172930Srwatson * result mac_pipe_init() and mac_pipe_create() are called once 329126249Srwatson * for the pair, and not on the endpoints. 330125293Srwatson */ 331172930Srwatson mac_pipe_init(pp); 332172930Srwatson mac_pipe_create(td->td_ucred, pp); 333125293Srwatson#endif 334125293Srwatson rpipe = &pp->pp_rpipe; 335125293Srwatson wpipe = &pp->pp_wpipe; 336125293Srwatson 337193951Skib knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe)); 338193951Skib knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe)); 339140369Ssilby 340133790Ssilby /* Only the forward direction pipe is backed by default */ 341155035Sglebius if ((error = pipe_create(rpipe, 1)) != 0 || 342155035Sglebius (error = pipe_create(wpipe, 0)) != 0) { 343124394Sdes pipeclose(rpipe); 344124394Sdes pipeclose(wpipe); 345155035Sglebius return (error); 34676364Salfred } 347124394Sdes 34813907Sdyson rpipe->pipe_state |= PIPE_DIRECTOK; 34913907Sdyson wpipe->pipe_state |= PIPE_DIRECTOK; 35013675Sdyson 35183366Sjulian error = falloc(td, &rf, &fd); 35270915Sdwmalone if (error) { 35370915Sdwmalone pipeclose(rpipe); 35470915Sdwmalone pipeclose(wpipe); 35570915Sdwmalone return (error); 35670915Sdwmalone } 357121256Sdwmalone /* An extra reference on `rf' has been held for us by falloc(). */ 358184849Sed fildes[0] = fd; 35970915Sdwmalone 36070803Sdwmalone /* 36170803Sdwmalone * Warning: once we've gotten past allocation of the fd for the 36270803Sdwmalone * read-side, we can only drop the read side via fdrop() in order 36370803Sdwmalone * to avoid races against processes which manage to dup() the read 36470803Sdwmalone * side while we are blocked trying to allocate the write side. 36570803Sdwmalone */ 366174988Sjeff finit(rf, FREAD | FWRITE, DTYPE_PIPE, rpipe, &pipeops); 36783366Sjulian error = falloc(td, &wf, &fd); 36870915Sdwmalone if (error) { 369184849Sed fdclose(fdp, rf, fildes[0], td); 37083366Sjulian fdrop(rf, td); 37170915Sdwmalone /* rpipe has been closed by fdrop(). */ 37270915Sdwmalone pipeclose(wpipe); 37370915Sdwmalone return (error); 37470915Sdwmalone } 375121256Sdwmalone /* An extra reference on `wf' has been held for us by falloc(). */ 376174988Sjeff finit(wf, FREAD | FWRITE, DTYPE_PIPE, wpipe, &pipeops); 377121256Sdwmalone fdrop(wf, td); 378184849Sed fildes[1] = fd; 37983366Sjulian fdrop(rf, td); 38013675Sdyson 38113675Sdyson return (0); 38213675Sdyson} 38313675Sdyson 384184849Sed/* ARGSUSED */ 385184849Sedint 386184849Sedpipe(struct thread *td, struct pipe_args *uap) 387184849Sed{ 388184849Sed int error; 389184849Sed int fildes[2]; 390184849Sed 391184849Sed error = kern_pipe(td, fildes); 392184849Sed if (error) 393184849Sed return (error); 394184849Sed 395184849Sed td->td_retval[0] = fildes[0]; 396184849Sed td->td_retval[1] = fildes[1]; 397184849Sed 398184849Sed return (0); 399184849Sed} 400184849Sed 40113909Sdyson/* 40213909Sdyson * Allocate kva for pipe circular buffer, the space is pageable 40376364Salfred * This routine will 'realloc' the size of a pipe safely, if it fails 40476364Salfred * it will retain the old buffer. 40576364Salfred * If it fails it will return ENOMEM. 40613909Sdyson */ 40776364Salfredstatic int 408132579Srwatsonpipespace_new(cpipe, size) 40913675Sdyson struct pipe *cpipe; 41076364Salfred int size; 41113675Sdyson{ 41276364Salfred caddr_t buffer; 413133790Ssilby int error, cnt, firstseg; 414117325Ssilby static int curfail = 0; 415117325Ssilby static struct timeval lastfail; 41613675Sdyson 417125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); 418133790Ssilby KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW), 419133790Ssilby ("pipespace: resize of direct writes not allowed")); 420133790Ssilbyretry: 421133790Ssilby cnt = cpipe->pipe_buffer.cnt; 422133790Ssilby if (cnt > size) 423133790Ssilby size = cnt; 42479224Sdillon 425118764Ssilby size = round_page(size); 426118764Ssilby buffer = (caddr_t) vm_map_min(pipe_map); 42713675Sdyson 428122163Salc error = vm_map_find(pipe_map, NULL, 0, 42976364Salfred (vm_offset_t *) &buffer, size, 1, 43013688Sdyson VM_PROT_ALL, VM_PROT_ALL, 0); 43176364Salfred if (error != KERN_SUCCESS) { 432133790Ssilby if ((cpipe->pipe_buffer.buffer == NULL) && 433133790Ssilby (size > SMALL_PIPE_SIZE)) { 434133790Ssilby size = SMALL_PIPE_SIZE; 435133790Ssilby pipefragretry++; 436133790Ssilby goto retry; 437133790Ssilby } 438133790Ssilby if (cpipe->pipe_buffer.buffer == NULL) { 439133790Ssilby pipeallocfail++; 440133790Ssilby if (ppsratecheck(&lastfail, &curfail, 1)) 441133790Ssilby printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); 442133790Ssilby } else { 443133790Ssilby piperesizefail++; 444133790Ssilby } 44576364Salfred return (ENOMEM); 44676364Salfred } 44776364Salfred 448133790Ssilby /* copy data, then free old resources if we're resizing */ 449133790Ssilby if (cnt > 0) { 450133790Ssilby if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) { 451133790Ssilby firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out; 452133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 453133790Ssilby buffer, firstseg); 454133790Ssilby if ((cnt - firstseg) > 0) 455133790Ssilby bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg], 456133790Ssilby cpipe->pipe_buffer.in); 457133790Ssilby } else { 458133790Ssilby bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 459133790Ssilby buffer, cnt); 460133790Ssilby } 461133790Ssilby } 46276364Salfred pipe_free_kmem(cpipe); 46376364Salfred cpipe->pipe_buffer.buffer = buffer; 46476364Salfred cpipe->pipe_buffer.size = size; 465133790Ssilby cpipe->pipe_buffer.in = cnt; 46676364Salfred cpipe->pipe_buffer.out = 0; 467133790Ssilby cpipe->pipe_buffer.cnt = cnt; 468189649Sjhb atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size); 46976364Salfred return (0); 47013907Sdyson} 47113688Sdyson 47213907Sdyson/* 473132579Srwatson * Wrapper for pipespace_new() that performs locking assertions. 474132579Srwatson */ 475132579Srwatsonstatic int 476132579Srwatsonpipespace(cpipe, size) 477132579Srwatson struct pipe *cpipe; 478132579Srwatson int size; 479132579Srwatson{ 480132579Srwatson 481133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 482133049Ssilby ("Unlocked pipe passed to pipespace")); 483132579Srwatson return (pipespace_new(cpipe, size)); 484132579Srwatson} 485132579Srwatson 486132579Srwatson/* 48713675Sdyson * lock a pipe for I/O, blocking other access 48813675Sdyson */ 48913675Sdysonstatic __inline int 49013907Sdysonpipelock(cpipe, catch) 49113675Sdyson struct pipe *cpipe; 49213907Sdyson int catch; 49313675Sdyson{ 49413776Sdyson int error; 49576364Salfred 49691362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 49791362Salfred while (cpipe->pipe_state & PIPE_LOCKFL) { 49813675Sdyson cpipe->pipe_state |= PIPE_LWANT; 49991362Salfred error = msleep(cpipe, PIPE_MTX(cpipe), 50091362Salfred catch ? (PRIBIO | PCATCH) : PRIBIO, 50176760Salfred "pipelk", 0); 502124394Sdes if (error != 0) 50376760Salfred return (error); 50413675Sdyson } 50591362Salfred cpipe->pipe_state |= PIPE_LOCKFL; 50676760Salfred return (0); 50713675Sdyson} 50813675Sdyson 50913675Sdyson/* 51013675Sdyson * unlock a pipe I/O lock 51113675Sdyson */ 51213675Sdysonstatic __inline void 51313675Sdysonpipeunlock(cpipe) 51413675Sdyson struct pipe *cpipe; 51513675Sdyson{ 51676364Salfred 51791362Salfred PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 518133049Ssilby KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 519133049Ssilby ("Unlocked pipe passed to pipeunlock")); 52091362Salfred cpipe->pipe_state &= ~PIPE_LOCKFL; 52113675Sdyson if (cpipe->pipe_state & PIPE_LWANT) { 52213675Sdyson cpipe->pipe_state &= ~PIPE_LWANT; 52314177Sdyson wakeup(cpipe); 52413675Sdyson } 52513675Sdyson} 52613675Sdyson 52714037Sdysonstatic __inline void 52814037Sdysonpipeselwakeup(cpipe) 52914037Sdyson struct pipe *cpipe; 53014037Sdyson{ 53176364Salfred 532126252Srwatson PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 53314037Sdyson if (cpipe->pipe_state & PIPE_SEL) { 534122352Stanimura selwakeuppri(&cpipe->pipe_sel, PSOCK); 535174647Sjeff if (!SEL_WAITING(&cpipe->pipe_sel)) 536174647Sjeff cpipe->pipe_state &= ~PIPE_SEL; 53714037Sdyson } 53841086Struckman if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 53995883Salfred pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 540133741Sjmg KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0); 54114037Sdyson} 54214037Sdyson 543126131Sgreen/* 544126131Sgreen * Initialize and allocate VM and memory for pipe. The structure 545126131Sgreen * will start out zero'd from the ctor, so we just manage the kmem. 546126131Sgreen */ 547126131Sgreenstatic int 548133790Ssilbypipe_create(pipe, backing) 549126131Sgreen struct pipe *pipe; 550133790Ssilby int backing; 551126131Sgreen{ 552126131Sgreen int error; 553126131Sgreen 554133790Ssilby if (backing) { 555133790Ssilby if (amountpipekva > maxpipekva / 2) 556133790Ssilby error = pipespace_new(pipe, SMALL_PIPE_SIZE); 557133790Ssilby else 558133790Ssilby error = pipespace_new(pipe, PIPE_SIZE); 559133790Ssilby } else { 560133790Ssilby /* If we're not backing this pipe, no need to do anything. */ 561133790Ssilby error = 0; 562133790Ssilby } 563132579Srwatson return (error); 564126131Sgreen} 565126131Sgreen 56613675Sdyson/* ARGSUSED */ 56713675Sdysonstatic int 568101941Srwatsonpipe_read(fp, uio, active_cred, flags, td) 56913675Sdyson struct file *fp; 57013675Sdyson struct uio *uio; 571101941Srwatson struct ucred *active_cred; 57283366Sjulian struct thread *td; 57345311Sdt int flags; 57413675Sdyson{ 575109153Sdillon struct pipe *rpipe = fp->f_data; 57647748Salc int error; 57713675Sdyson int nread = 0; 57818863Sdyson u_int size; 57913675Sdyson 58091362Salfred PIPE_LOCK(rpipe); 58113675Sdyson ++rpipe->pipe_busy; 58247748Salc error = pipelock(rpipe, 1); 58347748Salc if (error) 58447748Salc goto unlocked_error; 58547748Salc 586101768Srwatson#ifdef MAC 587172930Srwatson error = mac_pipe_check_read(active_cred, rpipe->pipe_pair); 588101768Srwatson if (error) 589101768Srwatson goto locked_error; 590101768Srwatson#endif 591133790Ssilby if (amountpipekva > (3 * maxpipekva) / 4) { 592133790Ssilby if (!(rpipe->pipe_state & PIPE_DIRECTW) && 593133790Ssilby (rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 594133790Ssilby (rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 595133790Ssilby (piperesizeallowed == 1)) { 596133790Ssilby PIPE_UNLOCK(rpipe); 597133790Ssilby pipespace(rpipe, SMALL_PIPE_SIZE); 598133790Ssilby PIPE_LOCK(rpipe); 599133790Ssilby } 600133790Ssilby } 601101768Srwatson 60213675Sdyson while (uio->uio_resid) { 60313907Sdyson /* 60413907Sdyson * normal pipe buffer receive 60513907Sdyson */ 60613675Sdyson if (rpipe->pipe_buffer.cnt > 0) { 60718863Sdyson size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 60813675Sdyson if (size > rpipe->pipe_buffer.cnt) 60913675Sdyson size = rpipe->pipe_buffer.cnt; 61018863Sdyson if (size > (u_int) uio->uio_resid) 61118863Sdyson size = (u_int) uio->uio_resid; 61247748Salc 61391362Salfred PIPE_UNLOCK(rpipe); 614116127Smux error = uiomove( 615116127Smux &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 616116127Smux size, uio); 61791362Salfred PIPE_LOCK(rpipe); 61876760Salfred if (error) 61913675Sdyson break; 62076760Salfred 62113675Sdyson rpipe->pipe_buffer.out += size; 62213675Sdyson if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 62313675Sdyson rpipe->pipe_buffer.out = 0; 62413675Sdyson 62513675Sdyson rpipe->pipe_buffer.cnt -= size; 62647748Salc 62747748Salc /* 62847748Salc * If there is no more to read in the pipe, reset 62947748Salc * its pointers to the beginning. This improves 63047748Salc * cache hit stats. 63147748Salc */ 63247748Salc if (rpipe->pipe_buffer.cnt == 0) { 63347748Salc rpipe->pipe_buffer.in = 0; 63447748Salc rpipe->pipe_buffer.out = 0; 63547748Salc } 63613675Sdyson nread += size; 63714037Sdyson#ifndef PIPE_NODIRECT 63813907Sdyson /* 63913907Sdyson * Direct copy, bypassing a kernel buffer. 64013907Sdyson */ 64113907Sdyson } else if ((size = rpipe->pipe_map.cnt) && 64247748Salc (rpipe->pipe_state & PIPE_DIRECTW)) { 64318863Sdyson if (size > (u_int) uio->uio_resid) 64418863Sdyson size = (u_int) uio->uio_resid; 64547748Salc 64691362Salfred PIPE_UNLOCK(rpipe); 647127501Salc error = uiomove_fromphys(rpipe->pipe_map.ms, 648127501Salc rpipe->pipe_map.pos, size, uio); 64991362Salfred PIPE_LOCK(rpipe); 65013907Sdyson if (error) 65113907Sdyson break; 65213907Sdyson nread += size; 65313907Sdyson rpipe->pipe_map.pos += size; 65413907Sdyson rpipe->pipe_map.cnt -= size; 65513907Sdyson if (rpipe->pipe_map.cnt == 0) { 65613907Sdyson rpipe->pipe_state &= ~PIPE_DIRECTW; 65713907Sdyson wakeup(rpipe); 65813907Sdyson } 65914037Sdyson#endif 66013675Sdyson } else { 66113675Sdyson /* 66213675Sdyson * detect EOF condition 66376760Salfred * read returns 0 on EOF, no need to set error 66413675Sdyson */ 66576760Salfred if (rpipe->pipe_state & PIPE_EOF) 66613675Sdyson break; 66743623Sdillon 66813675Sdyson /* 66913675Sdyson * If the "write-side" has been blocked, wake it up now. 67013675Sdyson */ 67113675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 67213675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 67313675Sdyson wakeup(rpipe); 67413675Sdyson } 67543623Sdillon 67643623Sdillon /* 67747748Salc * Break if some data was read. 67843623Sdillon */ 67947748Salc if (nread > 0) 68013675Sdyson break; 68116960Sdyson 68243623Sdillon /* 683124394Sdes * Unlock the pipe buffer for our remaining processing. 684116127Smux * We will either break out with an error or we will 685116127Smux * sleep and relock to loop. 68643623Sdillon */ 68747748Salc pipeunlock(rpipe); 68843623Sdillon 68913675Sdyson /* 69047748Salc * Handle non-blocking mode operation or 69147748Salc * wait for more data. 69213675Sdyson */ 69376760Salfred if (fp->f_flag & FNONBLOCK) { 69447748Salc error = EAGAIN; 69576760Salfred } else { 69647748Salc rpipe->pipe_state |= PIPE_WANTR; 69791362Salfred if ((error = msleep(rpipe, PIPE_MTX(rpipe), 69891362Salfred PRIBIO | PCATCH, 69977140Salfred "piperd", 0)) == 0) 70047748Salc error = pipelock(rpipe, 1); 70113675Sdyson } 70247748Salc if (error) 70347748Salc goto unlocked_error; 70413675Sdyson } 70513675Sdyson } 706101768Srwatson#ifdef MAC 707101768Srwatsonlocked_error: 708101768Srwatson#endif 70947748Salc pipeunlock(rpipe); 71013675Sdyson 71191362Salfred /* XXX: should probably do this before getting any locks. */ 71224101Sbde if (error == 0) 71355112Sbde vfs_timestamp(&rpipe->pipe_atime); 71447748Salcunlocked_error: 71547748Salc --rpipe->pipe_busy; 71613913Sdyson 71747748Salc /* 71847748Salc * PIPE_WANT processing only makes sense if pipe_busy is 0. 71947748Salc */ 72013675Sdyson if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 72113675Sdyson rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 72213675Sdyson wakeup(rpipe); 72313675Sdyson } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 72413675Sdyson /* 72547748Salc * Handle write blocking hysteresis. 72613675Sdyson */ 72713675Sdyson if (rpipe->pipe_state & PIPE_WANTW) { 72813675Sdyson rpipe->pipe_state &= ~PIPE_WANTW; 72913675Sdyson wakeup(rpipe); 73013675Sdyson } 73113675Sdyson } 73214037Sdyson 73314802Sdyson if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 73414037Sdyson pipeselwakeup(rpipe); 73514037Sdyson 73691362Salfred PIPE_UNLOCK(rpipe); 73776760Salfred return (error); 73813675Sdyson} 73913675Sdyson 74014037Sdyson#ifndef PIPE_NODIRECT 74113907Sdyson/* 74213907Sdyson * Map the sending processes' buffer into kernel space and wire it. 74313907Sdyson * This is similar to a physical write operation. 74413907Sdyson */ 74513675Sdysonstatic int 74613907Sdysonpipe_build_write_buffer(wpipe, uio) 74713907Sdyson struct pipe *wpipe; 74813675Sdyson struct uio *uio; 74913675Sdyson{ 750119872Salc pmap_t pmap; 75118863Sdyson u_int size; 752216511Salc int i; 753112569Sjake vm_offset_t addr, endaddr; 75413907Sdyson 75591412Salfred PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 756133790Ssilby KASSERT(wpipe->pipe_state & PIPE_DIRECTW, 757133790Ssilby ("Clone attempt on non-direct write pipe!")); 75879224Sdillon 75918863Sdyson size = (u_int) uio->uio_iov->iov_len; 76013907Sdyson if (size > wpipe->pipe_buffer.size) 76113907Sdyson size = wpipe->pipe_buffer.size; 76213907Sdyson 763119872Salc pmap = vmspace_pmap(curproc->p_vmspace); 76440286Sdg endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 76576760Salfred addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 766193893Scperciva if (endaddr < addr) 767193893Scperciva return (EFAULT); 76876760Salfred for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 76999899Salc /* 770207805Salc * vm_fault_quick() can sleep. 77199899Salc */ 772119872Salc race: 773119872Salc if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { 774216511Salc vm_page_unhold_pages(wpipe->pipe_map.ms, i); 77576760Salfred return (EFAULT); 77613907Sdyson } 777120000Salc wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, 778120000Salc VM_PROT_READ); 779119872Salc if (wpipe->pipe_map.ms[i] == NULL) 780119872Salc goto race; 78113907Sdyson } 78213907Sdyson 78313907Sdyson/* 78413907Sdyson * set up the control block 78513907Sdyson */ 78613907Sdyson wpipe->pipe_map.npages = i; 78776760Salfred wpipe->pipe_map.pos = 78876760Salfred ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 78913907Sdyson wpipe->pipe_map.cnt = size; 79013907Sdyson 79113907Sdyson/* 79213907Sdyson * and update the uio data 79313907Sdyson */ 79413907Sdyson 79513907Sdyson uio->uio_iov->iov_len -= size; 796104908Smike uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 79713907Sdyson if (uio->uio_iov->iov_len == 0) 79813907Sdyson uio->uio_iov++; 79913907Sdyson uio->uio_resid -= size; 80013907Sdyson uio->uio_offset += size; 80176760Salfred return (0); 80213907Sdyson} 80313907Sdyson 80413907Sdyson/* 80513907Sdyson * unmap and unwire the process buffer 80613907Sdyson */ 80713907Sdysonstatic void 80813907Sdysonpipe_destroy_write_buffer(wpipe) 80976760Salfred struct pipe *wpipe; 81013907Sdyson{ 81176364Salfred 812127501Salc PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 813216511Salc vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages); 81491653Stanimura wpipe->pipe_map.npages = 0; 81513907Sdyson} 81613907Sdyson 81713907Sdyson/* 81813907Sdyson * In the case of a signal, the writing process might go away. This 81913907Sdyson * code copies the data into the circular buffer so that the source 82013907Sdyson * pages can be freed without loss of data. 82113907Sdyson */ 82213907Sdysonstatic void 82313907Sdysonpipe_clone_write_buffer(wpipe) 82476364Salfred struct pipe *wpipe; 82513907Sdyson{ 826127501Salc struct uio uio; 827127501Salc struct iovec iov; 82813907Sdyson int size; 82913907Sdyson int pos; 83013907Sdyson 83191362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 83213907Sdyson size = wpipe->pipe_map.cnt; 83313907Sdyson pos = wpipe->pipe_map.pos; 83413907Sdyson 83513907Sdyson wpipe->pipe_buffer.in = size; 83613907Sdyson wpipe->pipe_buffer.out = 0; 83713907Sdyson wpipe->pipe_buffer.cnt = size; 83813907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 83913907Sdyson 840119811Salc PIPE_UNLOCK(wpipe); 841127501Salc iov.iov_base = wpipe->pipe_buffer.buffer; 842127501Salc iov.iov_len = size; 843127501Salc uio.uio_iov = &iov; 844127501Salc uio.uio_iovcnt = 1; 845127501Salc uio.uio_offset = 0; 846127501Salc uio.uio_resid = size; 847127501Salc uio.uio_segflg = UIO_SYSSPACE; 848127501Salc uio.uio_rw = UIO_READ; 849127501Salc uio.uio_td = curthread; 850127501Salc uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio); 851127501Salc PIPE_LOCK(wpipe); 85213907Sdyson pipe_destroy_write_buffer(wpipe); 85313907Sdyson} 85413907Sdyson 85513907Sdyson/* 85613907Sdyson * This implements the pipe buffer write mechanism. Note that only 85713907Sdyson * a direct write OR a normal pipe write can be pending at any given time. 85813907Sdyson * If there are any characters in the pipe buffer, the direct write will 85913907Sdyson * be deferred until the receiving process grabs all of the bytes from 86013907Sdyson * the pipe buffer. Then the direct mapping write is set-up. 86113907Sdyson */ 86213907Sdysonstatic int 86313907Sdysonpipe_direct_write(wpipe, uio) 86413907Sdyson struct pipe *wpipe; 86513907Sdyson struct uio *uio; 86613907Sdyson{ 86713907Sdyson int error; 86876364Salfred 86913951Sdysonretry: 87091362Salfred PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 871133049Ssilby error = pipelock(wpipe, 1); 872133049Ssilby if (wpipe->pipe_state & PIPE_EOF) 873133049Ssilby error = EPIPE; 874133049Ssilby if (error) { 875133049Ssilby pipeunlock(wpipe); 876133049Ssilby goto error1; 877133049Ssilby } 87813907Sdyson while (wpipe->pipe_state & PIPE_DIRECTW) { 87976760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 88013951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 88113951Sdyson wakeup(wpipe); 88213951Sdyson } 883173750Sdumbbell pipeselwakeup(wpipe); 88413992Sdyson wpipe->pipe_state |= PIPE_WANTW; 885133049Ssilby pipeunlock(wpipe); 88691362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 88791362Salfred PRIBIO | PCATCH, "pipdww", 0); 88814802Sdyson if (error) 88913907Sdyson goto error1; 890133049Ssilby else 891133049Ssilby goto retry; 89213907Sdyson } 89313907Sdyson wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 89413951Sdyson if (wpipe->pipe_buffer.cnt > 0) { 89576760Salfred if (wpipe->pipe_state & PIPE_WANTR) { 89613951Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 89713951Sdyson wakeup(wpipe); 89813951Sdyson } 899173750Sdumbbell pipeselwakeup(wpipe); 90013992Sdyson wpipe->pipe_state |= PIPE_WANTW; 901133049Ssilby pipeunlock(wpipe); 90291362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), 90391362Salfred PRIBIO | PCATCH, "pipdwc", 0); 90414802Sdyson if (error) 90513907Sdyson goto error1; 906133049Ssilby else 907133049Ssilby goto retry; 90813907Sdyson } 90913907Sdyson 91013951Sdyson wpipe->pipe_state |= PIPE_DIRECTW; 91113951Sdyson 912119872Salc PIPE_UNLOCK(wpipe); 91313907Sdyson error = pipe_build_write_buffer(wpipe, uio); 914119872Salc PIPE_LOCK(wpipe); 91513907Sdyson if (error) { 91613907Sdyson wpipe->pipe_state &= ~PIPE_DIRECTW; 917133049Ssilby pipeunlock(wpipe); 91813907Sdyson goto error1; 91913907Sdyson } 92013907Sdyson 92113907Sdyson error = 0; 92213907Sdyson while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 92313907Sdyson if (wpipe->pipe_state & PIPE_EOF) { 92413907Sdyson pipe_destroy_write_buffer(wpipe); 925112981Shsu pipeselwakeup(wpipe); 92613907Sdyson pipeunlock(wpipe); 92714802Sdyson error = EPIPE; 92814802Sdyson goto error1; 92913907Sdyson } 93013992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 93113992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 93213992Sdyson wakeup(wpipe); 93313992Sdyson } 93414037Sdyson pipeselwakeup(wpipe); 935133049Ssilby pipeunlock(wpipe); 93691362Salfred error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 93791362Salfred "pipdwt", 0); 938133049Ssilby pipelock(wpipe, 0); 93913907Sdyson } 94013907Sdyson 941126131Sgreen if (wpipe->pipe_state & PIPE_EOF) 942126131Sgreen error = EPIPE; 94313907Sdyson if (wpipe->pipe_state & PIPE_DIRECTW) { 94413907Sdyson /* 94513907Sdyson * this bit of trickery substitutes a kernel buffer for 94613907Sdyson * the process that might be going away. 94713907Sdyson */ 94813907Sdyson pipe_clone_write_buffer(wpipe); 94913907Sdyson } else { 95013907Sdyson pipe_destroy_write_buffer(wpipe); 95113907Sdyson } 95213907Sdyson pipeunlock(wpipe); 95376760Salfred return (error); 95413907Sdyson 95513907Sdysonerror1: 95613907Sdyson wakeup(wpipe); 95776760Salfred return (error); 95813907Sdyson} 95914037Sdyson#endif 960124394Sdes 96116960Sdysonstatic int 962101941Srwatsonpipe_write(fp, uio, active_cred, flags, td) 96316960Sdyson struct file *fp; 96413907Sdyson struct uio *uio; 965101941Srwatson struct ucred *active_cred; 96683366Sjulian struct thread *td; 96745311Sdt int flags; 96813907Sdyson{ 96913675Sdyson int error = 0; 970133790Ssilby int desiredsize, orig_resid; 97116960Sdyson struct pipe *wpipe, *rpipe; 97216960Sdyson 973109153Sdillon rpipe = fp->f_data; 97416960Sdyson wpipe = rpipe->pipe_peer; 97516960Sdyson 97691395Salfred PIPE_LOCK(rpipe); 977133049Ssilby error = pipelock(wpipe, 1); 978133049Ssilby if (error) { 979133049Ssilby PIPE_UNLOCK(rpipe); 980133049Ssilby return (error); 981133049Ssilby } 98213675Sdyson /* 98313675Sdyson * detect loss of pipe read side, issue SIGPIPE if lost. 98413675Sdyson */ 985179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 986179243Skib (wpipe->pipe_state & PIPE_EOF)) { 987133049Ssilby pipeunlock(wpipe); 98891395Salfred PIPE_UNLOCK(rpipe); 98976760Salfred return (EPIPE); 99013675Sdyson } 991101768Srwatson#ifdef MAC 992172930Srwatson error = mac_pipe_check_write(active_cred, wpipe->pipe_pair); 993101768Srwatson if (error) { 994133049Ssilby pipeunlock(wpipe); 995101768Srwatson PIPE_UNLOCK(rpipe); 996101768Srwatson return (error); 997101768Srwatson } 998101768Srwatson#endif 99977676Sdillon ++wpipe->pipe_busy; 100013675Sdyson 1001133790Ssilby /* Choose a larger size if it's advantageous */ 1002133790Ssilby desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size); 1003133790Ssilby while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) { 1004133790Ssilby if (piperesizeallowed != 1) 1005133790Ssilby break; 1006133790Ssilby if (amountpipekva > maxpipekva / 2) 1007133790Ssilby break; 1008133790Ssilby if (desiredsize == BIG_PIPE_SIZE) 1009133790Ssilby break; 1010133790Ssilby desiredsize = desiredsize * 2; 1011133790Ssilby } 101217163Sdyson 1013133790Ssilby /* Choose a smaller size if we're in a OOM situation */ 1014133790Ssilby if ((amountpipekva > (3 * maxpipekva) / 4) && 1015133790Ssilby (wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 1016133790Ssilby (wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 1017133790Ssilby (piperesizeallowed == 1)) 1018133790Ssilby desiredsize = SMALL_PIPE_SIZE; 1019133790Ssilby 1020133790Ssilby /* Resize if the above determined that a new size was necessary */ 1021133790Ssilby if ((desiredsize != wpipe->pipe_buffer.size) && 1022133790Ssilby ((wpipe->pipe_state & PIPE_DIRECTW) == 0)) { 1023133049Ssilby PIPE_UNLOCK(wpipe); 1024133790Ssilby pipespace(wpipe, desiredsize); 1025133049Ssilby PIPE_LOCK(wpipe); 102613907Sdyson } 1027133790Ssilby if (wpipe->pipe_buffer.size == 0) { 1028133790Ssilby /* 1029133790Ssilby * This can only happen for reverse direction use of pipes 1030133790Ssilby * in a complete OOM situation. 1031133790Ssilby */ 1032133790Ssilby error = ENOMEM; 1033133790Ssilby --wpipe->pipe_busy; 1034133790Ssilby pipeunlock(wpipe); 1035133790Ssilby PIPE_UNLOCK(wpipe); 1036133790Ssilby return (error); 1037133790Ssilby } 103877676Sdillon 1039133049Ssilby pipeunlock(wpipe); 1040124394Sdes 104113913Sdyson orig_resid = uio->uio_resid; 104277676Sdillon 104313675Sdyson while (uio->uio_resid) { 104413907Sdyson int space; 104576760Salfred 1046133049Ssilby pipelock(wpipe, 0); 1047133049Ssilby if (wpipe->pipe_state & PIPE_EOF) { 1048133049Ssilby pipeunlock(wpipe); 1049133049Ssilby error = EPIPE; 1050133049Ssilby break; 1051133049Ssilby } 105214037Sdyson#ifndef PIPE_NODIRECT 105313907Sdyson /* 105413907Sdyson * If the transfer is large, we can gain performance if 105513907Sdyson * we do process-to-process copies directly. 105616416Sdyson * If the write is non-blocking, we don't use the 105716416Sdyson * direct write mechanism. 105858505Sdillon * 105958505Sdillon * The direct write mechanism will detect the reader going 106058505Sdillon * away on us. 106113907Sdyson */ 1062165347Spjd if (uio->uio_segflg == UIO_USERSPACE && 1063165347Spjd uio->uio_iov->iov_len >= PIPE_MINDIRECT && 1064165347Spjd wpipe->pipe_buffer.size >= PIPE_MINDIRECT && 1065127501Salc (fp->f_flag & FNONBLOCK) == 0) { 1066133049Ssilby pipeunlock(wpipe); 1067105009Salfred error = pipe_direct_write(wpipe, uio); 106876760Salfred if (error) 106913907Sdyson break; 107013907Sdyson continue; 107191362Salfred } 107214037Sdyson#endif 107313907Sdyson 107413907Sdyson /* 107513907Sdyson * Pipe buffered writes cannot be coincidental with 107613907Sdyson * direct writes. We wait until the currently executing 107713907Sdyson * direct write is completed before we start filling the 107858505Sdillon * pipe buffer. We break out if a signal occurs or the 107958505Sdillon * reader goes away. 108013907Sdyson */ 1081133049Ssilby if (wpipe->pipe_state & PIPE_DIRECTW) { 108213992Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 108313992Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 108413992Sdyson wakeup(wpipe); 108513992Sdyson } 1086173750Sdumbbell pipeselwakeup(wpipe); 1087173750Sdumbbell wpipe->pipe_state |= PIPE_WANTW; 1088133049Ssilby pipeunlock(wpipe); 108991395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 109091362Salfred "pipbww", 0); 109113907Sdyson if (error) 109213907Sdyson break; 1093133049Ssilby else 1094133049Ssilby continue; 109513907Sdyson } 109613907Sdyson 109713907Sdyson space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 109814644Sdyson 109914644Sdyson /* Writes of size <= PIPE_BUF must be atomic. */ 110013913Sdyson if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 110113913Sdyson space = 0; 110213907Sdyson 1103118230Spb if (space > 0) { 1104133049Ssilby int size; /* Transfer size */ 1105133049Ssilby int segsize; /* first segment to transfer */ 110676760Salfred 1107133049Ssilby /* 1108133049Ssilby * Transfer size is minimum of uio transfer 1109133049Ssilby * and free space in pipe buffer. 1110133049Ssilby */ 1111133049Ssilby if (space > uio->uio_resid) 1112133049Ssilby size = uio->uio_resid; 1113133049Ssilby else 1114133049Ssilby size = space; 1115133049Ssilby /* 1116133049Ssilby * First segment to transfer is minimum of 1117133049Ssilby * transfer size and contiguous space in 1118133049Ssilby * pipe buffer. If first segment to transfer 1119133049Ssilby * is less than the transfer size, we've got 1120133049Ssilby * a wraparound in the buffer. 1121133049Ssilby */ 1122133049Ssilby segsize = wpipe->pipe_buffer.size - 1123133049Ssilby wpipe->pipe_buffer.in; 1124133049Ssilby if (segsize > size) 1125133049Ssilby segsize = size; 112654534Stegge 1127133049Ssilby /* Transfer first segment */ 1128133049Ssilby 1129133049Ssilby PIPE_UNLOCK(rpipe); 1130133049Ssilby error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1131133049Ssilby segsize, uio); 1132133049Ssilby PIPE_LOCK(rpipe); 1133133049Ssilby 1134133049Ssilby if (error == 0 && segsize < size) { 1135133049Ssilby KASSERT(wpipe->pipe_buffer.in + segsize == 1136133049Ssilby wpipe->pipe_buffer.size, 1137133049Ssilby ("Pipe buffer wraparound disappeared")); 113854534Stegge /* 1139133049Ssilby * Transfer remaining part now, to 1140133049Ssilby * support atomic writes. Wraparound 1141133049Ssilby * happened. 114254534Stegge */ 1143124394Sdes 114491395Salfred PIPE_UNLOCK(rpipe); 1145133049Ssilby error = uiomove( 1146133049Ssilby &wpipe->pipe_buffer.buffer[0], 1147133049Ssilby size - segsize, uio); 114891395Salfred PIPE_LOCK(rpipe); 1149133049Ssilby } 1150133049Ssilby if (error == 0) { 1151133049Ssilby wpipe->pipe_buffer.in += size; 1152133049Ssilby if (wpipe->pipe_buffer.in >= 1153133049Ssilby wpipe->pipe_buffer.size) { 1154133049Ssilby KASSERT(wpipe->pipe_buffer.in == 1155133049Ssilby size - segsize + 1156133049Ssilby wpipe->pipe_buffer.size, 1157133049Ssilby ("Expected wraparound bad")); 1158133049Ssilby wpipe->pipe_buffer.in = size - segsize; 115954534Stegge } 1160124394Sdes 1161133049Ssilby wpipe->pipe_buffer.cnt += size; 1162133049Ssilby KASSERT(wpipe->pipe_buffer.cnt <= 1163133049Ssilby wpipe->pipe_buffer.size, 1164133049Ssilby ("Pipe buffer overflow")); 116513675Sdyson } 1166133049Ssilby pipeunlock(wpipe); 1167153484Sdelphij if (error != 0) 1168153484Sdelphij break; 116913675Sdyson } else { 117013675Sdyson /* 117113675Sdyson * If the "read-side" has been blocked, wake it up now. 117213675Sdyson */ 117313675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 117413675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 117513675Sdyson wakeup(wpipe); 117613675Sdyson } 117714037Sdyson 117813675Sdyson /* 117913675Sdyson * don't block on non-blocking I/O 118013675Sdyson */ 118116960Sdyson if (fp->f_flag & FNONBLOCK) { 118213907Sdyson error = EAGAIN; 1183133049Ssilby pipeunlock(wpipe); 118413675Sdyson break; 118513675Sdyson } 118613907Sdyson 118714037Sdyson /* 118814037Sdyson * We have no more space and have something to offer, 118929356Speter * wake up select/poll. 119014037Sdyson */ 119114037Sdyson pipeselwakeup(wpipe); 119214037Sdyson 119313675Sdyson wpipe->pipe_state |= PIPE_WANTW; 1194133049Ssilby pipeunlock(wpipe); 119591395Salfred error = msleep(wpipe, PIPE_MTX(rpipe), 119691362Salfred PRIBIO | PCATCH, "pipewr", 0); 119776760Salfred if (error != 0) 119813675Sdyson break; 119913675Sdyson } 120013675Sdyson } 120113675Sdyson 1202133049Ssilby pipelock(wpipe, 0); 120314644Sdyson --wpipe->pipe_busy; 120477676Sdillon 120576760Salfred if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 120676760Salfred wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 120713675Sdyson wakeup(wpipe); 120813675Sdyson } else if (wpipe->pipe_buffer.cnt > 0) { 120913675Sdyson /* 121013675Sdyson * If we have put any characters in the buffer, we wake up 121113675Sdyson * the reader. 121213675Sdyson */ 121313675Sdyson if (wpipe->pipe_state & PIPE_WANTR) { 121413675Sdyson wpipe->pipe_state &= ~PIPE_WANTR; 121513675Sdyson wakeup(wpipe); 121613675Sdyson } 121713675Sdyson } 121813909Sdyson 121913909Sdyson /* 122013909Sdyson * Don't return EPIPE if I/O was successful 122113909Sdyson */ 122213907Sdyson if ((wpipe->pipe_buffer.cnt == 0) && 122377676Sdillon (uio->uio_resid == 0) && 122477676Sdillon (error == EPIPE)) { 122513907Sdyson error = 0; 122677676Sdillon } 122713913Sdyson 122824101Sbde if (error == 0) 122955112Sbde vfs_timestamp(&wpipe->pipe_mtime); 123024101Sbde 123114037Sdyson /* 123214037Sdyson * We have something to offer, 123329356Speter * wake up select/poll. 123414037Sdyson */ 123514177Sdyson if (wpipe->pipe_buffer.cnt) 123614037Sdyson pipeselwakeup(wpipe); 123713907Sdyson 1238133049Ssilby pipeunlock(wpipe); 123991395Salfred PIPE_UNLOCK(rpipe); 124076760Salfred return (error); 124113675Sdyson} 124213675Sdyson 1243175140Sjhb/* ARGSUSED */ 1244175140Sjhbstatic int 1245175140Sjhbpipe_truncate(fp, length, active_cred, td) 1246175140Sjhb struct file *fp; 1247175140Sjhb off_t length; 1248175140Sjhb struct ucred *active_cred; 1249175140Sjhb struct thread *td; 1250175140Sjhb{ 1251175140Sjhb 1252175140Sjhb return (EINVAL); 1253175140Sjhb} 1254175140Sjhb 125513675Sdyson/* 125613675Sdyson * we implement a very minimal set of ioctls for compatibility with sockets. 125713675Sdyson */ 1258104094Sphkstatic int 1259102003Srwatsonpipe_ioctl(fp, cmd, data, active_cred, td) 126013675Sdyson struct file *fp; 126136735Sdfr u_long cmd; 126299009Salfred void *data; 1263102003Srwatson struct ucred *active_cred; 126483366Sjulian struct thread *td; 126513675Sdyson{ 1266109153Sdillon struct pipe *mpipe = fp->f_data; 1267101768Srwatson int error; 126813675Sdyson 1269104269Srwatson PIPE_LOCK(mpipe); 1270104269Srwatson 1271104269Srwatson#ifdef MAC 1272172930Srwatson error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data); 1273121970Srwatson if (error) { 1274121970Srwatson PIPE_UNLOCK(mpipe); 1275101768Srwatson return (error); 1276121970Srwatson } 1277101768Srwatson#endif 1278101768Srwatson 1279137752Sphk error = 0; 128013675Sdyson switch (cmd) { 128113675Sdyson 128213675Sdyson case FIONBIO: 1283137752Sphk break; 128413675Sdyson 128513675Sdyson case FIOASYNC: 128613675Sdyson if (*(int *)data) { 128713675Sdyson mpipe->pipe_state |= PIPE_ASYNC; 128813675Sdyson } else { 128913675Sdyson mpipe->pipe_state &= ~PIPE_ASYNC; 129013675Sdyson } 1291137752Sphk break; 129213675Sdyson 129313675Sdyson case FIONREAD: 129414037Sdyson if (mpipe->pipe_state & PIPE_DIRECTW) 129514037Sdyson *(int *)data = mpipe->pipe_map.cnt; 129614037Sdyson else 129714037Sdyson *(int *)data = mpipe->pipe_buffer.cnt; 1298137752Sphk break; 129913675Sdyson 130041086Struckman case FIOSETOWN: 1301138032Srwatson PIPE_UNLOCK(mpipe); 1302137752Sphk error = fsetown(*(int *)data, &mpipe->pipe_sigio); 1303138032Srwatson goto out_unlocked; 130441086Struckman 130541086Struckman case FIOGETOWN: 1306104393Struckman *(int *)data = fgetown(&mpipe->pipe_sigio); 1307137752Sphk break; 130813675Sdyson 130941086Struckman /* This is deprecated, FIOSETOWN should be used instead. */ 131041086Struckman case TIOCSPGRP: 1311138032Srwatson PIPE_UNLOCK(mpipe); 1312137752Sphk error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); 1313138032Srwatson goto out_unlocked; 131441086Struckman 131541086Struckman /* This is deprecated, FIOGETOWN should be used instead. */ 131618863Sdyson case TIOCGPGRP: 1317104393Struckman *(int *)data = -fgetown(&mpipe->pipe_sigio); 1318137752Sphk break; 131913675Sdyson 1320137752Sphk default: 1321137752Sphk error = ENOTTY; 1322137764Sphk break; 132313675Sdyson } 1324104269Srwatson PIPE_UNLOCK(mpipe); 1325138032Srwatsonout_unlocked: 1326137752Sphk return (error); 132713675Sdyson} 132813675Sdyson 1329104094Sphkstatic int 1330101983Srwatsonpipe_poll(fp, events, active_cred, td) 133113675Sdyson struct file *fp; 133229356Speter int events; 1333101983Srwatson struct ucred *active_cred; 133483366Sjulian struct thread *td; 133513675Sdyson{ 1336109153Sdillon struct pipe *rpipe = fp->f_data; 133713675Sdyson struct pipe *wpipe; 133829356Speter int revents = 0; 1339101768Srwatson#ifdef MAC 1340101768Srwatson int error; 1341101768Srwatson#endif 134213675Sdyson 134313675Sdyson wpipe = rpipe->pipe_peer; 134491362Salfred PIPE_LOCK(rpipe); 1345101768Srwatson#ifdef MAC 1346172930Srwatson error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair); 1347101768Srwatson if (error) 1348101768Srwatson goto locked_error; 1349101768Srwatson#endif 135029356Speter if (events & (POLLIN | POLLRDNORM)) 135129356Speter if ((rpipe->pipe_state & PIPE_DIRECTW) || 1352195423Skib (rpipe->pipe_buffer.cnt > 0)) 135329356Speter revents |= events & (POLLIN | POLLRDNORM); 135413675Sdyson 135529356Speter if (events & (POLLOUT | POLLWRNORM)) 1356179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1357179243Skib (wpipe->pipe_state & PIPE_EOF) || 135843311Sdillon (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 135943311Sdillon (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 136029356Speter revents |= events & (POLLOUT | POLLWRNORM); 136113675Sdyson 1362195423Skib if ((events & POLLINIGNEOF) == 0) { 1363195423Skib if (rpipe->pipe_state & PIPE_EOF) { 1364195423Skib revents |= (events & (POLLIN | POLLRDNORM)); 1365195423Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1366195423Skib (wpipe->pipe_state & PIPE_EOF)) 1367195423Skib revents |= POLLHUP; 1368195423Skib } 1369195423Skib } 137029356Speter 137129356Speter if (revents == 0) { 137229356Speter if (events & (POLLIN | POLLRDNORM)) { 137383805Sjhb selrecord(td, &rpipe->pipe_sel); 1374174647Sjeff if (SEL_WAITING(&rpipe->pipe_sel)) 1375174647Sjeff rpipe->pipe_state |= PIPE_SEL; 137613675Sdyson } 137713675Sdyson 137829356Speter if (events & (POLLOUT | POLLWRNORM)) { 137983805Sjhb selrecord(td, &wpipe->pipe_sel); 1380174647Sjeff if (SEL_WAITING(&wpipe->pipe_sel)) 1381174647Sjeff wpipe->pipe_state |= PIPE_SEL; 138213907Sdyson } 138313675Sdyson } 1384101768Srwatson#ifdef MAC 1385101768Srwatsonlocked_error: 1386101768Srwatson#endif 138791362Salfred PIPE_UNLOCK(rpipe); 138829356Speter 138929356Speter return (revents); 139013675Sdyson} 139113675Sdyson 139298989Salfred/* 139398989Salfred * We shouldn't need locks here as we're doing a read and this should 139498989Salfred * be a natural race. 139598989Salfred */ 139652983Speterstatic int 1397101983Srwatsonpipe_stat(fp, ub, active_cred, td) 139852983Speter struct file *fp; 139952983Speter struct stat *ub; 1400101983Srwatson struct ucred *active_cred; 140183366Sjulian struct thread *td; 140213675Sdyson{ 1403109153Sdillon struct pipe *pipe = fp->f_data; 1404101768Srwatson#ifdef MAC 1405101768Srwatson int error; 140652983Speter 1407104269Srwatson PIPE_LOCK(pipe); 1408172930Srwatson error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); 1409104269Srwatson PIPE_UNLOCK(pipe); 1410101768Srwatson if (error) 1411101768Srwatson return (error); 1412101768Srwatson#endif 1413100527Salfred bzero(ub, sizeof(*ub)); 141417124Sbde ub->st_mode = S_IFIFO; 1415133790Ssilby ub->st_blksize = PAGE_SIZE; 1416132436Ssilby if (pipe->pipe_state & PIPE_DIRECTW) 1417132436Ssilby ub->st_size = pipe->pipe_map.cnt; 1418132436Ssilby else 1419132436Ssilby ub->st_size = pipe->pipe_buffer.cnt; 142013675Sdyson ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1421205792Sed ub->st_atim = pipe->pipe_atime; 1422205792Sed ub->st_mtim = pipe->pipe_mtime; 1423205792Sed ub->st_ctim = pipe->pipe_ctime; 142460404Schris ub->st_uid = fp->f_cred->cr_uid; 142560404Schris ub->st_gid = fp->f_cred->cr_gid; 142617124Sbde /* 142760404Schris * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 142817124Sbde * XXX (st_dev, st_ino) should be unique. 142917124Sbde */ 143076760Salfred return (0); 143113675Sdyson} 143213675Sdyson 143313675Sdyson/* ARGSUSED */ 143413675Sdysonstatic int 143583366Sjulianpipe_close(fp, td) 143613675Sdyson struct file *fp; 143783366Sjulian struct thread *td; 143813675Sdyson{ 1439109153Sdillon struct pipe *cpipe = fp->f_data; 144016322Sgpalmer 144149413Sgreen fp->f_ops = &badfileops; 1442109153Sdillon fp->f_data = NULL; 144396122Salfred funsetown(&cpipe->pipe_sigio); 144413675Sdyson pipeclose(cpipe); 144576760Salfred return (0); 144613675Sdyson} 144713675Sdyson 144876364Salfredstatic void 144976364Salfredpipe_free_kmem(cpipe) 145076364Salfred struct pipe *cpipe; 145176364Salfred{ 145291412Salfred 1453125293Srwatson KASSERT(!mtx_owned(PIPE_MTX(cpipe)), 1454125293Srwatson ("pipe_free_kmem: pipe mutex locked")); 145576364Salfred 145676364Salfred if (cpipe->pipe_buffer.buffer != NULL) { 1457189649Sjhb atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size); 1458118764Ssilby vm_map_remove(pipe_map, 1459118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer, 1460118764Ssilby (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 146176364Salfred cpipe->pipe_buffer.buffer = NULL; 146276364Salfred } 146376364Salfred#ifndef PIPE_NODIRECT 1464127501Salc { 146576364Salfred cpipe->pipe_map.cnt = 0; 146676364Salfred cpipe->pipe_map.pos = 0; 146776364Salfred cpipe->pipe_map.npages = 0; 146876364Salfred } 146976364Salfred#endif 147076364Salfred} 147176364Salfred 147213675Sdyson/* 147313675Sdyson * shutdown the pipe 147413675Sdyson */ 147513675Sdysonstatic void 147613675Sdysonpipeclose(cpipe) 147713675Sdyson struct pipe *cpipe; 147813675Sdyson{ 1479125293Srwatson struct pipepair *pp; 148013907Sdyson struct pipe *ppipe; 148176364Salfred 1482125293Srwatson KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); 148391968Salfred 1484125293Srwatson PIPE_LOCK(cpipe); 1485133049Ssilby pipelock(cpipe, 0); 1486125293Srwatson pp = cpipe->pipe_pair; 148791968Salfred 148891968Salfred pipeselwakeup(cpipe); 148913907Sdyson 149091968Salfred /* 149191968Salfred * If the other side is blocked, wake it up saying that 149291968Salfred * we want to close it down. 149391968Salfred */ 1494126131Sgreen cpipe->pipe_state |= PIPE_EOF; 149591968Salfred while (cpipe->pipe_busy) { 149691968Salfred wakeup(cpipe); 1497126131Sgreen cpipe->pipe_state |= PIPE_WANT; 1498133049Ssilby pipeunlock(cpipe); 149991968Salfred msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1500133049Ssilby pipelock(cpipe, 0); 150191968Salfred } 150213675Sdyson 1503101768Srwatson 150491968Salfred /* 1505125293Srwatson * Disconnect from peer, if any. 150691968Salfred */ 1507125293Srwatson ppipe = cpipe->pipe_peer; 1508179243Skib if (ppipe->pipe_present == PIPE_ACTIVE) { 150991968Salfred pipeselwakeup(ppipe); 151013907Sdyson 151191968Salfred ppipe->pipe_state |= PIPE_EOF; 151291968Salfred wakeup(ppipe); 1513133741Sjmg KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0); 151491968Salfred } 1515125293Srwatson 151691968Salfred /* 1517125293Srwatson * Mark this endpoint as free. Release kmem resources. We 1518125293Srwatson * don't mark this endpoint as unused until we've finished 1519125293Srwatson * doing that, or the pipe might disappear out from under 1520125293Srwatson * us. 152191968Salfred */ 1522125293Srwatson PIPE_UNLOCK(cpipe); 1523125293Srwatson pipe_free_kmem(cpipe); 1524125293Srwatson PIPE_LOCK(cpipe); 1525179243Skib cpipe->pipe_present = PIPE_CLOSING; 1526126131Sgreen pipeunlock(cpipe); 1527179243Skib 1528179243Skib /* 1529179243Skib * knlist_clear() may sleep dropping the PIPE_MTX. Set the 1530179243Skib * PIPE_FINALIZED, that allows other end to free the 1531179243Skib * pipe_pair, only after the knotes are completely dismantled. 1532179243Skib */ 1533133741Sjmg knlist_clear(&cpipe->pipe_sel.si_note, 1); 1534179243Skib cpipe->pipe_present = PIPE_FINALIZED; 1535133741Sjmg knlist_destroy(&cpipe->pipe_sel.si_note); 1536125293Srwatson 1537125293Srwatson /* 1538125293Srwatson * If both endpoints are now closed, release the memory for the 1539125293Srwatson * pipe pair. If not, unlock. 1540125293Srwatson */ 1541179243Skib if (ppipe->pipe_present == PIPE_FINALIZED) { 154291968Salfred PIPE_UNLOCK(cpipe); 1543125293Srwatson#ifdef MAC 1544172930Srwatson mac_pipe_destroy(pp); 1545125293Srwatson#endif 1546125293Srwatson uma_zfree(pipe_zone, cpipe->pipe_pair); 1547125293Srwatson } else 1548125293Srwatson PIPE_UNLOCK(cpipe); 154913675Sdyson} 155059288Sjlemon 155172521Sjlemon/*ARGSUSED*/ 155259288Sjlemonstatic int 155372521Sjlemonpipe_kqfilter(struct file *fp, struct knote *kn) 155459288Sjlemon{ 155589306Salfred struct pipe *cpipe; 155659288Sjlemon 1557109153Sdillon cpipe = kn->kn_fp->f_data; 1558126131Sgreen PIPE_LOCK(cpipe); 155972521Sjlemon switch (kn->kn_filter) { 156072521Sjlemon case EVFILT_READ: 156172521Sjlemon kn->kn_fop = &pipe_rfiltops; 156272521Sjlemon break; 156372521Sjlemon case EVFILT_WRITE: 156472521Sjlemon kn->kn_fop = &pipe_wfiltops; 1565179243Skib if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) { 1566101382Sdes /* other end of pipe has been closed */ 1567126131Sgreen PIPE_UNLOCK(cpipe); 1568118929Sjmg return (EPIPE); 1569126131Sgreen } 1570126131Sgreen cpipe = cpipe->pipe_peer; 157172521Sjlemon break; 157272521Sjlemon default: 1573126131Sgreen PIPE_UNLOCK(cpipe); 1574133741Sjmg return (EINVAL); 157572521Sjlemon } 157678292Sjlemon 1577133741Sjmg knlist_add(&cpipe->pipe_sel.si_note, kn, 1); 157891372Salfred PIPE_UNLOCK(cpipe); 157959288Sjlemon return (0); 158059288Sjlemon} 158159288Sjlemon 158259288Sjlemonstatic void 158359288Sjlemonfilt_pipedetach(struct knote *kn) 158459288Sjlemon{ 1585121018Sjmg struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 158659288Sjlemon 1587126131Sgreen PIPE_LOCK(cpipe); 1588179242Skib if (kn->kn_filter == EVFILT_WRITE) 1589121018Sjmg cpipe = cpipe->pipe_peer; 1590133741Sjmg knlist_remove(&cpipe->pipe_sel.si_note, kn, 1); 159191372Salfred PIPE_UNLOCK(cpipe); 159259288Sjlemon} 159359288Sjlemon 159459288Sjlemon/*ARGSUSED*/ 159559288Sjlemonstatic int 159659288Sjlemonfilt_piperead(struct knote *kn, long hint) 159759288Sjlemon{ 1598109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 159959288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 1600133741Sjmg int ret; 160159288Sjlemon 160291372Salfred PIPE_LOCK(rpipe); 160359288Sjlemon kn->kn_data = rpipe->pipe_buffer.cnt; 160459288Sjlemon if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 160559288Sjlemon kn->kn_data = rpipe->pipe_map.cnt; 160659288Sjlemon 160759288Sjlemon if ((rpipe->pipe_state & PIPE_EOF) || 1608179243Skib wpipe->pipe_present != PIPE_ACTIVE || 1609179243Skib (wpipe->pipe_state & PIPE_EOF)) { 161091372Salfred kn->kn_flags |= EV_EOF; 161191372Salfred PIPE_UNLOCK(rpipe); 161259288Sjlemon return (1); 161359288Sjlemon } 1614133741Sjmg ret = kn->kn_data > 0; 161591372Salfred PIPE_UNLOCK(rpipe); 1616133741Sjmg return ret; 161759288Sjlemon} 161859288Sjlemon 161959288Sjlemon/*ARGSUSED*/ 162059288Sjlemonstatic int 162159288Sjlemonfilt_pipewrite(struct knote *kn, long hint) 162259288Sjlemon{ 1623109153Sdillon struct pipe *rpipe = kn->kn_fp->f_data; 162459288Sjlemon struct pipe *wpipe = rpipe->pipe_peer; 162559288Sjlemon 162691372Salfred PIPE_LOCK(rpipe); 1627179243Skib if (wpipe->pipe_present != PIPE_ACTIVE || 1628179243Skib (wpipe->pipe_state & PIPE_EOF)) { 162959288Sjlemon kn->kn_data = 0; 1630124394Sdes kn->kn_flags |= EV_EOF; 163191372Salfred PIPE_UNLOCK(rpipe); 163259288Sjlemon return (1); 163359288Sjlemon } 163459288Sjlemon kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 163565855Sjlemon if (wpipe->pipe_state & PIPE_DIRECTW) 163659288Sjlemon kn->kn_data = 0; 163759288Sjlemon 163891372Salfred PIPE_UNLOCK(rpipe); 163959288Sjlemon return (kn->kn_data >= PIPE_BUF); 164059288Sjlemon} 1641