sys_pipe.c revision 224914
199026Sjulian/*- 299026Sjulian * Copyright (c) 1996 John S. Dyson 399026Sjulian * All rights reserved. 499026Sjulian * 599026Sjulian * Redistribution and use in source and binary forms, with or without 699026Sjulian * modification, are permitted provided that the following conditions 799026Sjulian * are met: 899026Sjulian * 1. Redistributions of source code must retain the above copyright 999026Sjulian * notice immediately at the beginning of the file, without modification, 1099026Sjulian * this list of conditions, and the following disclaimer. 1199026Sjulian * 2. Redistributions in binary form must reproduce the above copyright 1299026Sjulian * notice, this list of conditions and the following disclaimer in the 1399026Sjulian * documentation and/or other materials provided with the distribution. 1499026Sjulian * 3. Absolutely no warranty of function or purpose is made by the author 1599026Sjulian * John S. Dyson. 1699026Sjulian * 4. Modifications may be freely made to this file if the above conditions 1799026Sjulian * are met. 1899026Sjulian */ 1999026Sjulian 2099026Sjulian/* 2199026Sjulian * This file contains a high-performance replacement for the socket-based 2299026Sjulian * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 2399026Sjulian * all features of sockets, but does do everything that pipes normally 2499026Sjulian * do. 2599026Sjulian */ 2699026Sjulian 2799026Sjulian/* 2899026Sjulian * This code has two modes of operation, a small write mode and a large 2999026Sjulian * write mode. The small write mode acts like conventional pipes with 3099026Sjulian * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 3199026Sjulian * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 3299026Sjulian * and PIPE_SIZE in size, the sending process pins the underlying pages in 3399026Sjulian * memory, and the receiving process copies directly from these pinned pages 3499026Sjulian * in the sending process. 3599026Sjulian * 3699026Sjulian * If the sending process receives a signal, it is possible that it will 3799026Sjulian * go away, and certainly its address space can change, because control 38107029Sjulian * is returned back to the user-mode side. In that case, the pipe code 3999026Sjulian * arranges to copy the buffer supplied by the user process, to a pageable 40105854Sjulian * kernel buffer, and the receiving process will grab the data from the 4199026Sjulian * pageable kernel buffer. Since signals don't happen all that often, 42107126Sjeff * the copy operation is normally eliminated. 4399026Sjulian * 4499026Sjulian * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 45107126Sjeff * happen for small transfers so that the system will not spend all of 4699026Sjulian * its time context switching. 4799026Sjulian * 4899026Sjulian * In order to limit the resource use of pipes, two sysctls exist: 4999026Sjulian * 50103410Smini * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 5199026Sjulian * address space available to us in pipe_map. This value is normally 5299026Sjulian * autotuned, but may also be loader tuned. 5399026Sjulian * 5499026Sjulian * kern.ipc.pipekva - This read-only sysctl tracks the current amount of 5599026Sjulian * memory in use by pipes. 5699026Sjulian * 5799026Sjulian * Based on how large pipekva is relative to maxpipekva, the following 58100273Speter * will happen: 59100273Speter * 6099026Sjulian * 0% - 50%: 61103367Sjulian * New pipes are given 16K of memory backing, pipes may dynamically 6299026Sjulian * grow to as large as 64K where needed. 63103367Sjulian * 50% - 75%: 64103367Sjulian * New pipes are given 4K (or PAGE_SIZE) of memory backing, 6599026Sjulian * existing pipes may NOT grow. 66111028Sjeff * 75% - 100%: 6799026Sjulian * New pipes are given 4K (or PAGE_SIZE) of memory backing, 68103367Sjulian * existing pipes will be shrunk down to 4K whenever possible. 6999026Sjulian * 70107719Sjulian * Resizing may be disabled by setting kern.ipc.piperesizeallowed=0. If 71107719Sjulian * that is set, the only resize that will occur is the 0 -> SMALL_PIPE_SIZE 72107719Sjulian * resize which MUST occur for reverse-direction pipes when they are 7399026Sjulian * first used. 74107006Sdavidxu * 75107006Sdavidxu * Additional information about the current state of pipes may be obtained 76103367Sjulian * from kern.ipc.pipes, kern.ipc.pipefragretry, kern.ipc.pipeallocfail, 77103367Sjulian * and kern.ipc.piperesizefail. 78107006Sdavidxu * 79107006Sdavidxu * Locking rules: There are two locks present here: A mutex, used via 80107006Sdavidxu * PIPE_LOCK, and a flag, used via pipelock(). All locking is done via 81107006Sdavidxu * the flag, as mutexes can not persist over uiomove. The mutex 82111115Sdavidxu * exists only to guard access to the flag, and is not in itself a 83111115Sdavidxu * locking mechanism. Also note that there is only a single mutex for 84111115Sdavidxu * both directions of a pipe. 85111115Sdavidxu * 86111028Sjeff * As pipelock() may have to sleep before it can acquire the flag, it 87111028Sjeff * is important to reread all data after a call to pipelock(); everything 8899026Sjulian * in the structure may have changed. 8999026Sjulian */ 90111028Sjeff 91105854Sjulian#include <sys/cdefs.h> 92105854Sjulian__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 224914 2011-08-16 20:07:47Z kib $"); 93111028Sjeff 94111028Sjeff#include <sys/param.h> 95111028Sjeff#include <sys/systm.h> 96111028Sjeff#include <sys/fcntl.h> 9799026Sjulian#include <sys/file.h> 98107719Sjulian#include <sys/filedesc.h> 99111028Sjeff#include <sys/filio.h> 100111515Sdavidxu#include <sys/kernel.h> 101111028Sjeff#include <sys/lock.h> 102105854Sjulian#include <sys/mutex.h> 103111028Sjeff#include <sys/ttycom.h> 104111028Sjeff#include <sys/stat.h> 105111028Sjeff#include <sys/malloc.h> 106111028Sjeff#include <sys/poll.h> 107111028Sjeff#include <sys/selinfo.h> 108111028Sjeff#include <sys/signalvar.h> 109111028Sjeff#include <sys/syscallsubr.h> 110111028Sjeff#include <sys/sysctl.h> 111111028Sjeff#include <sys/sysproto.h> 112111028Sjeff#include <sys/pipe.h> 113111028Sjeff#include <sys/proc.h> 114111028Sjeff#include <sys/vnode.h> 115111028Sjeff#include <sys/uio.h> 116111028Sjeff#include <sys/event.h> 117111028Sjeff 118111028Sjeff#include <security/mac/mac_framework.h> 119111028Sjeff 120111028Sjeff#include <vm/vm.h> 121111028Sjeff#include <vm/vm_param.h> 122111028Sjeff#include <vm/vm_object.h> 123111028Sjeff#include <vm/vm_kern.h> 124111028Sjeff#include <vm/vm_extern.h> 125111028Sjeff#include <vm/pmap.h> 126111028Sjeff#include <vm/vm_map.h> 127111028Sjeff#include <vm/vm_page.h> 128111028Sjeff#include <vm/uma.h> 129111028Sjeff 130111028Sjeff/* 131111028Sjeff * Use this define if you want to disable *fancy* VM things. Expect an 13299026Sjulian * approx 30% decrease in transfer rate. This could be useful for 133107719Sjulian * NetBSD or OpenBSD. 13499026Sjulian */ 13599026Sjulian/* #define PIPE_NODIRECT */ 13699026Sjulian 13799026Sjulian/* 13899026Sjulian * interfaces to the outside world 13999026Sjulian */ 14099026Sjulianstatic fo_rdwr_t pipe_read; 141103216Sjulianstatic fo_rdwr_t pipe_write; 142113339Sjulianstatic fo_truncate_t pipe_truncate; 14399026Sjulianstatic fo_ioctl_t pipe_ioctl; 14499026Sjulianstatic fo_poll_t pipe_poll; 14599026Sjulianstatic fo_kqfilter_t pipe_kqfilter; 14699026Sjulianstatic fo_stat_t pipe_stat; 14799026Sjulianstatic fo_close_t pipe_close; 14899026Sjulian 14999026Sjulianstatic struct fileops pipeops = { 15099026Sjulian .fo_read = pipe_read, 15199026Sjulian .fo_write = pipe_write, 15299026Sjulian .fo_truncate = pipe_truncate, 15399026Sjulian .fo_ioctl = pipe_ioctl, 15499026Sjulian .fo_poll = pipe_poll, 15599026Sjulian .fo_kqfilter = pipe_kqfilter, 15699026Sjulian .fo_stat = pipe_stat, 15799026Sjulian .fo_close = pipe_close, 158103216Sjulian .fo_chmod = invfo_chmod, 159103216Sjulian .fo_chown = invfo_chown, 160103216Sjulian .fo_flags = DFLAG_PASSABLE 16199026Sjulian}; 16299026Sjulian 16399026Sjulianstatic void filt_pipedetach(struct knote *kn); 16499026Sjulianstatic int filt_piperead(struct knote *kn, long hint); 16599026Sjulianstatic int filt_pipewrite(struct knote *kn, long hint); 16699026Sjulian 16799026Sjulianstatic struct filterops pipe_rfiltops = { 168103216Sjulian .f_isfd = 1, 16999026Sjulian .f_detach = filt_pipedetach, 17099026Sjulian .f_event = filt_piperead 17199026Sjulian}; 17299026Sjulianstatic struct filterops pipe_wfiltops = { 17399026Sjulian .f_isfd = 1, 17499026Sjulian .f_detach = filt_pipedetach, 17599026Sjulian .f_event = filt_pipewrite 17699026Sjulian}; 17799026Sjulian 17899026Sjulian/* 17999026Sjulian * Default pipe buffer size(s), this can be kind-of large now because pipe 18099026Sjulian * space is pageable. The pipe code will try to maintain locality of 18199026Sjulian * reference for performance reasons, so small amounts of outstanding I/O 18299026Sjulian * will not wipe the cache. 18399026Sjulian */ 18499026Sjulian#define MINPIPESIZE (PIPE_SIZE/3) 18599026Sjulian#define MAXPIPESIZE (2*PIPE_SIZE/3) 186103312Sjulian 187104354Sscottlstatic long amountpipekva; 188103312Sjulianstatic int pipefragretry; 18999026Sjulianstatic int pipeallocfail; 190107126Sjeffstatic int piperesizefail; 19199026Sjulianstatic int piperesizeallowed = 1; 19299026Sjulian 19399026SjulianSYSCTL_LONG(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN, 19499026Sjulian &maxpipekva, 0, "Pipe KVA limit"); 19599026SjulianSYSCTL_LONG(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 19699026Sjulian &amountpipekva, 0, "Pipe KVA usage"); 19799026SjulianSYSCTL_INT(_kern_ipc, OID_AUTO, pipefragretry, CTLFLAG_RD, 19899026Sjulian &pipefragretry, 0, "Pipe allocation retries due to fragmentation"); 19999026SjulianSYSCTL_INT(_kern_ipc, OID_AUTO, pipeallocfail, CTLFLAG_RD, 20099026Sjulian &pipeallocfail, 0, "Pipe allocation failures"); 20199026SjulianSYSCTL_INT(_kern_ipc, OID_AUTO, piperesizefail, CTLFLAG_RD, 20299026Sjulian &piperesizefail, 0, "Pipe resize failures"); 20399026SjulianSYSCTL_INT(_kern_ipc, OID_AUTO, piperesizeallowed, CTLFLAG_RW, 204111028Sjeff &piperesizeallowed, 0, "Pipe resizing allowed"); 205107126Sjeff 206107126Sjeffstatic void pipeinit(void *dummy __unused); 207107126Sjeffstatic void pipeclose(struct pipe *cpipe); 208107126Sjeffstatic void pipe_free_kmem(struct pipe *cpipe); 209107126Sjeffstatic int pipe_create(struct pipe *pipe, int backing); 210107126Sjeffstatic __inline int pipelock(struct pipe *cpipe, int catch); 211107126Sjeffstatic __inline void pipeunlock(struct pipe *cpipe); 21299026Sjulianstatic __inline void pipeselwakeup(struct pipe *cpipe); 213107126Sjeff#ifndef PIPE_NODIRECT 214107126Sjeffstatic int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 215107126Sjeffstatic void pipe_destroy_write_buffer(struct pipe *wpipe); 216111028Sjeffstatic int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 217107126Sjeffstatic void pipe_clone_write_buffer(struct pipe *wpipe); 218107126Sjeff#endif 219107126Sjeffstatic int pipespace(struct pipe *cpipe, int size); 220107126Sjeffstatic int pipespace_new(struct pipe *cpipe, int size); 221107126Sjeff 222107126Sjeffstatic int pipe_zone_ctor(void *mem, int size, void *arg, int flags); 223107126Sjeffstatic int pipe_zone_init(void *mem, int size, int flags); 224107126Sjeffstatic void pipe_zone_fini(void *mem, int size); 225107126Sjeff 226107126Sjeffstatic uma_zone_t pipe_zone; 227107126Sjeff 228107126SjeffSYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 229105854Sjulian 230111028Sjeffstatic void 231105854Sjulianpipeinit(void *dummy __unused) 232105854Sjulian{ 233105854Sjulian 234105854Sjulian pipe_zone = uma_zcreate("pipe", sizeof(struct pipepair), 235105854Sjulian pipe_zone_ctor, NULL, pipe_zone_init, pipe_zone_fini, 236105854Sjulian UMA_ALIGN_PTR, 0); 237105854Sjulian KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 238105854Sjulian} 239111028Sjeff 240105854Sjulianstatic int 241105854Sjulianpipe_zone_ctor(void *mem, int size, void *arg, int flags) 242105854Sjulian{ 243111028Sjeff struct pipepair *pp; 244111028Sjeff struct pipe *rpipe, *wpipe; 245105854Sjulian 246105854Sjulian KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); 247105854Sjulian 248105854Sjulian pp = (struct pipepair *)mem; 249105854Sjulian 250105854Sjulian /* 251105854Sjulian * We zero both pipe endpoints to make sure all the kmem pointers 252105854Sjulian * are NULL, flag fields are zero'd, etc. We timestamp both 253105854Sjulian * endpoints with the same time. 254105854Sjulian */ 255111028Sjeff rpipe = &pp->pp_rpipe; 256111028Sjeff bzero(rpipe, sizeof(*rpipe)); 257111028Sjeff vfs_timestamp(&rpipe->pipe_ctime); 258105854Sjulian rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; 259111028Sjeff 260111028Sjeff wpipe = &pp->pp_wpipe; 261105854Sjulian bzero(wpipe, sizeof(*wpipe)); 262105854Sjulian wpipe->pipe_ctime = rpipe->pipe_ctime; 263105854Sjulian wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; 264105854Sjulian 265105854Sjulian rpipe->pipe_peer = wpipe; 266105854Sjulian rpipe->pipe_pair = pp; 267105854Sjulian wpipe->pipe_peer = rpipe; 268105854Sjulian wpipe->pipe_pair = pp; 269105854Sjulian 270105854Sjulian /* 271105854Sjulian * Mark both endpoints as present; they will later get free'd 272105854Sjulian * one at a time. When both are free'd, then the whole pair 273105854Sjulian * is released. 274105854Sjulian */ 275111028Sjeff rpipe->pipe_present = PIPE_ACTIVE; 276111028Sjeff wpipe->pipe_present = PIPE_ACTIVE; 277111028Sjeff 278111028Sjeff /* 279111028Sjeff * Eventually, the MAC Framework may initialize the label 280111028Sjeff * in ctor or init, but for now we do it elswhere to avoid 281111028Sjeff * blocking in ctor or init. 282105854Sjulian */ 283111028Sjeff pp->pp_label = NULL; 284111028Sjeff 285111028Sjeff return (0); 286111028Sjeff} 287111028Sjeff 288111028Sjeffstatic int 289105854Sjulianpipe_zone_init(void *mem, int size, int flags) 290105854Sjulian{ 291105854Sjulian struct pipepair *pp; 292105854Sjulian 293105854Sjulian KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); 294105854Sjulian 295105854Sjulian pp = (struct pipepair *)mem; 296105854Sjulian 297105854Sjulian mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 298105854Sjulian return (0); 299111028Sjeff} 300111028Sjeff 301111028Sjeffstatic void 302111028Sjeffpipe_zone_fini(void *mem, int size) 303105854Sjulian{ 304105854Sjulian struct pipepair *pp; 305105854Sjulian 306105854Sjulian KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); 307105854Sjulian 308105854Sjulian pp = (struct pipepair *)mem; 309105854Sjulian 310105854Sjulian mtx_destroy(&pp->pp_mtx); 311105854Sjulian} 312111028Sjeff 313111028Sjeff/* 314111028Sjeff * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, let 315111028Sjeff * the zone pick up the pieces via pipeclose(). 316111028Sjeff */ 317111125Sdavidxuint 318111028Sjeffkern_pipe(struct thread *td, int fildes[2]) 319111028Sjeff{ 320111028Sjeff struct filedesc *fdp = td->td_proc->p_fd; 321111028Sjeff struct file *rf, *wf; 322111028Sjeff struct pipepair *pp; 323111028Sjeff struct pipe *rpipe, *wpipe; 324111028Sjeff int fd, error; 325111028Sjeff 326111028Sjeff pp = uma_zalloc(pipe_zone, M_WAITOK); 327111028Sjeff#ifdef MAC 328111028Sjeff /* 329111028Sjeff * The MAC label is shared between the connected endpoints. As a 330111028Sjeff * result mac_pipe_init() and mac_pipe_create() are called once 331111028Sjeff * for the pair, and not on the endpoints. 332111028Sjeff */ 333111028Sjeff mac_pipe_init(pp); 334111028Sjeff mac_pipe_create(td->td_ucred, pp); 335111028Sjeff#endif 336111028Sjeff rpipe = &pp->pp_rpipe; 337111028Sjeff wpipe = &pp->pp_wpipe; 338111028Sjeff 339111028Sjeff knlist_init_mtx(&rpipe->pipe_sel.si_note, PIPE_MTX(rpipe)); 340111028Sjeff knlist_init_mtx(&wpipe->pipe_sel.si_note, PIPE_MTX(wpipe)); 341111028Sjeff 342111028Sjeff /* Only the forward direction pipe is backed by default */ 343111028Sjeff if ((error = pipe_create(rpipe, 1)) != 0 || 344111028Sjeff (error = pipe_create(wpipe, 0)) != 0) { 345111028Sjeff pipeclose(rpipe); 346111028Sjeff pipeclose(wpipe); 347111028Sjeff return (error); 348111028Sjeff } 349111028Sjeff 350111028Sjeff rpipe->pipe_state |= PIPE_DIRECTOK; 351111028Sjeff wpipe->pipe_state |= PIPE_DIRECTOK; 352111028Sjeff 353111028Sjeff error = falloc(td, &rf, &fd, 0); 354111028Sjeff if (error) { 355111028Sjeff pipeclose(rpipe); 356111028Sjeff pipeclose(wpipe); 357111028Sjeff return (error); 358111028Sjeff } 359111028Sjeff /* An extra reference on `rf' has been held for us by falloc(). */ 360111028Sjeff fildes[0] = fd; 361111028Sjeff 36299026Sjulian /* 363111028Sjeff * Warning: once we've gotten past allocation of the fd for the 364111028Sjeff * read-side, we can only drop the read side via fdrop() in order 365105854Sjulian * to avoid races against processes which manage to dup() the read 366105854Sjulian * side while we are blocked trying to allocate the write side. 367105854Sjulian */ 368111028Sjeff finit(rf, FREAD | FWRITE, DTYPE_PIPE, rpipe, &pipeops); 369105854Sjulian error = falloc(td, &wf, &fd, 0); 370105854Sjulian if (error) { 371105854Sjulian fdclose(fdp, rf, fildes[0], td); 372105854Sjulian fdrop(rf, td); 373105854Sjulian /* rpipe has been closed by fdrop(). */ 374105854Sjulian pipeclose(wpipe); 375105854Sjulian return (error); 376105854Sjulian } 377105854Sjulian /* An extra reference on `wf' has been held for us by falloc(). */ 378105854Sjulian finit(wf, FREAD | FWRITE, DTYPE_PIPE, wpipe, &pipeops); 379105854Sjulian fdrop(wf, td); 380105854Sjulian fildes[1] = fd; 381105854Sjulian fdrop(rf, td); 382111028Sjeff 383111028Sjeff return (0); 384111028Sjeff} 385111028Sjeff 386111028Sjeff/* ARGSUSED */ 387105854Sjulianint 388105854Sjulianpipe(struct thread *td, struct pipe_args *uap) 389105854Sjulian{ 390106180Sdavidxu int error; 391106180Sdavidxu int fildes[2]; 392105854Sjulian 393106242Sdavidxu error = kern_pipe(td, fildes); 394111585Sjulian if (error) 395106242Sdavidxu return (error); 396106180Sdavidxu 397106180Sdavidxu td->td_retval[0] = fildes[0]; 398106180Sdavidxu td->td_retval[1] = fildes[1]; 399106180Sdavidxu 400106180Sdavidxu return (0); 401106180Sdavidxu} 402106180Sdavidxu 403106180Sdavidxu/* 404106180Sdavidxu * Allocate kva for pipe circular buffer, the space is pageable 405111028Sjeff * This routine will 'realloc' the size of a pipe safely, if it fails 406106180Sdavidxu * it will retain the old buffer. 407106182Sdavidxu * If it fails it will return ENOMEM. 408106180Sdavidxu */ 409106180Sdavidxustatic int 410106180Sdavidxupipespace_new(cpipe, size) 411106182Sdavidxu struct pipe *cpipe; 412105854Sjulian int size; 413105854Sjulian{ 414111028Sjeff caddr_t buffer; 415111028Sjeff int error, cnt, firstseg; 416111028Sjeff static int curfail = 0; 417111028Sjeff static struct timeval lastfail; 418111028Sjeff 419105854Sjulian KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); 420105854Sjulian KASSERT(!(cpipe->pipe_state & PIPE_DIRECTW), 421105854Sjulian ("pipespace: resize of direct writes not allowed")); 422105854Sjulianretry: 423105854Sjulian cnt = cpipe->pipe_buffer.cnt; 424108640Sdavidxu if (cnt > size) 425105854Sjulian size = cnt; 426105854Sjulian 427113793Sdavidxu size = round_page(size); 428106182Sdavidxu buffer = (caddr_t) vm_map_min(pipe_map); 429105854Sjulian 430111028Sjeff error = vm_map_find(pipe_map, NULL, 0, 431105854Sjulian (vm_offset_t *) &buffer, size, 1, 432105854Sjulian VM_PROT_ALL, VM_PROT_ALL, 0); 433111028Sjeff if (error != KERN_SUCCESS) { 434105854Sjulian if ((cpipe->pipe_buffer.buffer == NULL) && 435105854Sjulian (size > SMALL_PIPE_SIZE)) { 436105854Sjulian size = SMALL_PIPE_SIZE; 437105854Sjulian pipefragretry++; 438108640Sdavidxu goto retry; 439111028Sjeff } 440108640Sdavidxu if (cpipe->pipe_buffer.buffer == NULL) { 441111028Sjeff pipeallocfail++; 442111585Sjulian if (ppsratecheck(&lastfail, &curfail, 1)) 443105854Sjulian printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); 444105854Sjulian } else { 445105854Sjulian piperesizefail++; 446111028Sjeff } 447111028Sjeff return (ENOMEM); 448111028Sjeff } 449111028Sjeff 450112071Sdavidxu /* copy data, then free old resources if we're resizing */ 451105854Sjulian if (cnt > 0) { 452105854Sjulian if (cpipe->pipe_buffer.in <= cpipe->pipe_buffer.out) { 453105854Sjulian firstseg = cpipe->pipe_buffer.size - cpipe->pipe_buffer.out; 454106182Sdavidxu bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 455105854Sjulian buffer, firstseg); 456105854Sjulian if ((cnt - firstseg) > 0) 457107719Sjulian bcopy(cpipe->pipe_buffer.buffer, &buffer[firstseg], 458108338Sjulian cpipe->pipe_buffer.in); 459111028Sjeff } else { 460107719Sjulian bcopy(&cpipe->pipe_buffer.buffer[cpipe->pipe_buffer.out], 461111028Sjeff buffer, cnt); 462111028Sjeff } 463111169Sdavidxu } 464111028Sjeff pipe_free_kmem(cpipe); 465111028Sjeff cpipe->pipe_buffer.buffer = buffer; 466105854Sjulian cpipe->pipe_buffer.size = size; 467111028Sjeff cpipe->pipe_buffer.in = cnt; 468105854Sjulian cpipe->pipe_buffer.out = 0; 469105854Sjulian cpipe->pipe_buffer.cnt = cnt; 470107719Sjulian atomic_add_long(&amountpipekva, cpipe->pipe_buffer.size); 471111169Sdavidxu return (0); 472111169Sdavidxu} 473111169Sdavidxu 474105854Sjulian/* 475105854Sjulian * Wrapper for pipespace_new() that performs locking assertions. 476107719Sjulian */ 477113793Sdavidxustatic int 478107719Sjulianpipespace(cpipe, size) 479111169Sdavidxu struct pipe *cpipe; 480111169Sdavidxu int size; 481111169Sdavidxu{ 482111169Sdavidxu 483111169Sdavidxu KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 484111169Sdavidxu ("Unlocked pipe passed to pipespace")); 485111169Sdavidxu return (pipespace_new(cpipe, size)); 486108613Sjulian} 487108338Sjulian 488111028Sjeff/* 489112888Sjeff * lock a pipe for I/O, blocking other access 490111042Sdavidxu */ 491111042Sdavidxustatic __inline int 492112888Sjeffpipelock(cpipe, catch) 493111169Sdavidxu struct pipe *cpipe; 494111169Sdavidxu int catch; 495111169Sdavidxu{ 496111169Sdavidxu int error; 497111028Sjeff 498111169Sdavidxu PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 499111169Sdavidxu while (cpipe->pipe_state & PIPE_LOCKFL) { 500111028Sjeff cpipe->pipe_state |= PIPE_LWANT; 501110190Sjulian error = msleep(cpipe, PIPE_MTX(cpipe), 502111169Sdavidxu catch ? (PRIBIO | PCATCH) : PRIBIO, 503111169Sdavidxu "pipelk", 0); 504111169Sdavidxu if (error != 0) 505111169Sdavidxu return (error); 506111169Sdavidxu } 507111169Sdavidxu cpipe->pipe_state |= PIPE_LOCKFL; 508111169Sdavidxu return (0); 509111169Sdavidxu} 510111169Sdavidxu 511105854Sjulian/* 512111169Sdavidxu * unlock a pipe I/O lock 513107719Sjulian */ 514105854Sjulianstatic __inline void 515105854Sjulianpipeunlock(cpipe) 516105854Sjulian struct pipe *cpipe; 517105854Sjulian{ 518105854Sjulian 519105854Sjulian PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 520105854Sjulian KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 521105854Sjulian ("Unlocked pipe passed to pipeunlock")); 522105854Sjulian cpipe->pipe_state &= ~PIPE_LOCKFL; 523105854Sjulian if (cpipe->pipe_state & PIPE_LWANT) { 524111028Sjeff cpipe->pipe_state &= ~PIPE_LWANT; 525108338Sjulian wakeup(cpipe); 526105854Sjulian } 527105854Sjulian} 528108338Sjulian 529111028Sjeffstatic __inline void 530105854Sjulianpipeselwakeup(cpipe) 531111585Sjulian struct pipe *cpipe; 532111028Sjeff{ 533111028Sjeff 534108613Sjulian PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 535105854Sjulian if (cpipe->pipe_state & PIPE_SEL) { 536105854Sjulian selwakeuppri(&cpipe->pipe_sel, PSOCK); 537111028Sjeff if (!SEL_WAITING(&cpipe->pipe_sel)) 538111207Sdavidxu cpipe->pipe_state &= ~PIPE_SEL; 539111028Sjeff } 540108613Sjulian if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 541111028Sjeff pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 542108338Sjulian KNOTE_LOCKED(&cpipe->pipe_sel.si_note, 0); 543105854Sjulian} 544105854Sjulian 545105854Sjulian/* 546111028Sjeff * Initialize and allocate VM and memory for pipe. The structure 547111028Sjeff * will start out zero'd from the ctor, so we just manage the kmem. 548111028Sjeff */ 549111028Sjeffstatic int 550111028Sjeffpipe_create(pipe, backing) 551108338Sjulian struct pipe *pipe; 552111028Sjeff int backing; 553105854Sjulian{ 554111028Sjeff int error; 555111028Sjeff 556111028Sjeff if (backing) { 557111028Sjeff if (amountpipekva > maxpipekva / 2) 558111028Sjeff error = pipespace_new(pipe, SMALL_PIPE_SIZE); 559111028Sjeff else 560111028Sjeff error = pipespace_new(pipe, PIPE_SIZE); 561111028Sjeff } else { 562108613Sjulian /* If we're not backing this pipe, no need to do anything. */ 563105854Sjulian error = 0; 564111028Sjeff } 565108338Sjulian return (error); 566108613Sjulian} 567105854Sjulian 568111028Sjeff/* ARGSUSED */ 569108338Sjulianstatic int 570105854Sjulianpipe_read(fp, uio, active_cred, flags, td) 571105854Sjulian struct file *fp; 572105854Sjulian struct uio *uio; 573105854Sjulian struct ucred *active_cred; 574111028Sjeff struct thread *td; 575105854Sjulian int flags; 576105854Sjulian{ 577105854Sjulian struct pipe *rpipe = fp->f_data; 578105854Sjulian int error; 579105854Sjulian int nread = 0; 580105854Sjulian u_int size; 581105854Sjulian 582105854Sjulian PIPE_LOCK(rpipe); 583105854Sjulian ++rpipe->pipe_busy; 584105854Sjulian error = pipelock(rpipe, 1); 585105854Sjulian if (error) 586105854Sjulian goto unlocked_error; 587105854Sjulian 588111028Sjeff#ifdef MAC 589111028Sjeff error = mac_pipe_check_read(active_cred, rpipe->pipe_pair); 590105854Sjulian if (error) 591105854Sjulian goto locked_error; 592105854Sjulian#endif 593105854Sjulian if (amountpipekva > (3 * maxpipekva) / 4) { 594105854Sjulian if (!(rpipe->pipe_state & PIPE_DIRECTW) && 595111028Sjeff (rpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 596111028Sjeff (rpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 597111028Sjeff (piperesizeallowed == 1)) { 598111028Sjeff PIPE_UNLOCK(rpipe); 599111028Sjeff pipespace(rpipe, SMALL_PIPE_SIZE); 600111028Sjeff PIPE_LOCK(rpipe); 601111028Sjeff } 602111028Sjeff } 603111028Sjeff 604111028Sjeff while (uio->uio_resid) { 605112078Sdavidxu /* 606111585Sjulian * normal pipe buffer receive 607112078Sdavidxu */ 608105854Sjulian if (rpipe->pipe_buffer.cnt > 0) { 609105854Sjulian size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 610111028Sjeff if (size > rpipe->pipe_buffer.cnt) 611107006Sdavidxu size = rpipe->pipe_buffer.cnt; 612107006Sdavidxu if (size > (u_int) uio->uio_resid) 613105854Sjulian size = (u_int) uio->uio_resid; 614105854Sjulian 615105854Sjulian PIPE_UNLOCK(rpipe); 616111028Sjeff error = uiomove( 617105854Sjulian &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 618105854Sjulian size, uio); 619105854Sjulian PIPE_LOCK(rpipe); 620111028Sjeff if (error) 621105854Sjulian break; 622105854Sjulian 623111028Sjeff rpipe->pipe_buffer.out += size; 624111028Sjeff if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 625111028Sjeff rpipe->pipe_buffer.out = 0; 626111677Sdavidxu 627111028Sjeff rpipe->pipe_buffer.cnt -= size; 628111028Sjeff 629111677Sdavidxu /* 630111028Sjeff * If there is no more to read in the pipe, reset 631105854Sjulian * its pointers to the beginning. This improves 632111028Sjeff * cache hit stats. 633111028Sjeff */ 634111028Sjeff if (rpipe->pipe_buffer.cnt == 0) { 635111028Sjeff rpipe->pipe_buffer.in = 0; 636111028Sjeff rpipe->pipe_buffer.out = 0; 637111028Sjeff } 638111028Sjeff nread += size; 639111028Sjeff#ifndef PIPE_NODIRECT 640111028Sjeff /* 641111028Sjeff * Direct copy, bypassing a kernel buffer. 642111028Sjeff */ 643111028Sjeff } else if ((size = rpipe->pipe_map.cnt) && 644111028Sjeff (rpipe->pipe_state & PIPE_DIRECTW)) { 645111028Sjeff if (size > (u_int) uio->uio_resid) 646111028Sjeff size = (u_int) uio->uio_resid; 647111028Sjeff 648111028Sjeff PIPE_UNLOCK(rpipe); 649111028Sjeff error = uiomove_fromphys(rpipe->pipe_map.ms, 650111028Sjeff rpipe->pipe_map.pos, size, uio); 651111028Sjeff PIPE_LOCK(rpipe); 652111028Sjeff if (error) 653111028Sjeff break; 654111028Sjeff nread += size; 655111028Sjeff rpipe->pipe_map.pos += size; 656111028Sjeff rpipe->pipe_map.cnt -= size; 657105854Sjulian if (rpipe->pipe_map.cnt == 0) { 658111028Sjeff rpipe->pipe_state &= ~PIPE_DIRECTW; 659105854Sjulian wakeup(rpipe); 660111028Sjeff } 661111028Sjeff#endif 662105854Sjulian } else { 663111028Sjeff /* 664111028Sjeff * detect EOF condition 665111028Sjeff * read returns 0 on EOF, no need to set error 666111028Sjeff */ 667105854Sjulian if (rpipe->pipe_state & PIPE_EOF) 668111028Sjeff break; 669111028Sjeff 670111028Sjeff /* 671111028Sjeff * If the "write-side" has been blocked, wake it up now. 672111028Sjeff */ 673105854Sjulian if (rpipe->pipe_state & PIPE_WANTW) { 674111028Sjeff rpipe->pipe_state &= ~PIPE_WANTW; 675111028Sjeff wakeup(rpipe); 676111028Sjeff } 677111028Sjeff 678111028Sjeff /* 679111028Sjeff * Break if some data was read. 680111028Sjeff */ 681111028Sjeff if (nread > 0) 682111028Sjeff break; 683111028Sjeff 684111028Sjeff /* 685111028Sjeff * Unlock the pipe buffer for our remaining processing. 686111595Sdavidxu * We will either break out with an error or we will 687111028Sjeff * sleep and relock to loop. 688111028Sjeff */ 689111028Sjeff pipeunlock(rpipe); 690111028Sjeff 691112397Sdavidxu /* 692112397Sdavidxu * Handle non-blocking mode operation or 693111028Sjeff * wait for more data. 694111028Sjeff */ 695111028Sjeff if (fp->f_flag & FNONBLOCK) { 696111028Sjeff error = EAGAIN; 697111028Sjeff } else { 698111028Sjeff rpipe->pipe_state |= PIPE_WANTR; 699111028Sjeff if ((error = msleep(rpipe, PIPE_MTX(rpipe), 700111028Sjeff PRIBIO | PCATCH, 701111028Sjeff "piperd", 0)) == 0) 702111028Sjeff error = pipelock(rpipe, 1); 703111028Sjeff } 704105854Sjulian if (error) 705105854Sjulian goto unlocked_error; 706111028Sjeff } 707111028Sjeff } 708105854Sjulian#ifdef MAC 709111028Sjefflocked_error: 710111028Sjeff#endif 711111028Sjeff pipeunlock(rpipe); 712111028Sjeff 713111028Sjeff /* XXX: should probably do this before getting any locks. */ 714111028Sjeff if (error == 0) 715111028Sjeff vfs_timestamp(&rpipe->pipe_atime); 716111028Sjeffunlocked_error: 717111028Sjeff --rpipe->pipe_busy; 718105854Sjulian 719111028Sjeff /* 720105854Sjulian * PIPE_WANT processing only makes sense if pipe_busy is 0. 721105854Sjulian */ 722105854Sjulian if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 723105854Sjulian rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 724103410Smini wakeup(rpipe); 725103410Smini } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 726103410Smini /* 727103410Smini * Handle write blocking hysteresis. 728103410Smini */ 729103410Smini if (rpipe->pipe_state & PIPE_WANTW) { 730103410Smini rpipe->pipe_state &= ~PIPE_WANTW; 731103410Smini wakeup(rpipe); 732103464Speter } 733103464Speter } 734103464Speter 735103464Speter if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 736103463Speter pipeselwakeup(rpipe); 737103410Smini 738103463Speter PIPE_UNLOCK(rpipe); 739113626Sjhb return (error); 740112888Sjeff} 741113626Sjhb 742103410Smini#ifndef PIPE_NODIRECT 743103410Smini/* 744103410Smini * Map the sending processes' buffer into kernel space and wire it. 745103410Smini * This is similar to a physical write operation. 746103410Smini */ 747103410Sministatic int 748103410Sminipipe_build_write_buffer(wpipe, uio) 749103410Smini struct pipe *wpipe; 750103410Smini struct uio *uio; 751103410Smini{ 752103410Smini u_int size; 753103410Smini int i; 754103464Speter 755103464Speter PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 756103464Speter KASSERT(wpipe->pipe_state & PIPE_DIRECTW, 757103464Speter ("Clone attempt on non-direct write pipe!")); 758103463Speter 759103410Smini size = (u_int) uio->uio_iov->iov_len; 760103463Speter if (size > wpipe->pipe_buffer.size) 761103463Speter size = wpipe->pipe_buffer.size; 762103463Speter 763103410Smini if ((i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, 764103410Smini (vm_offset_t)uio->uio_iov->iov_base, size, VM_PROT_READ, 765103410Smini wpipe->pipe_map.ms, PIPENPAGES)) < 0) 766112888Sjeff return (EFAULT); 767103410Smini 768103410Smini/* 769103410Smini * set up the control block 770103410Smini */ 771103410Smini wpipe->pipe_map.npages = i; 772103410Smini wpipe->pipe_map.pos = 77399026Sjulian ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 77499026Sjulian wpipe->pipe_map.cnt = size; 77599026Sjulian 77699026Sjulian/* 77799026Sjulian * and update the uio data 77899026Sjulian */ 779104437Speter 780107126Sjeff uio->uio_iov->iov_len -= size; 78199026Sjulian uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 78299026Sjulian if (uio->uio_iov->iov_len == 0) 783104437Speter uio->uio_iov++; 784104437Speter uio->uio_resid -= size; 785104437Speter uio->uio_offset += size; 786104437Speter return (0); 787104437Speter} 788104437Speter 789104437Speter/* 790104437Speter * unmap and unwire the process buffer 791107126Sjeff */ 792104437Speterstatic void 793104437Speterpipe_destroy_write_buffer(wpipe) 794104437Speter struct pipe *wpipe; 795104437Speter{ 796107126Sjeff 797107126Sjeff PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 798103367Sjulian vm_page_unhold_pages(wpipe->pipe_map.ms, wpipe->pipe_map.npages); 799107126Sjeff wpipe->pipe_map.npages = 0; 800107126Sjeff} 801103367Sjulian 802111028Sjeff/* 803111028Sjeff * In the case of a signal, the writing process might go away. This 80499026Sjulian * code copies the data into the circular buffer so that the source 80599026Sjulian * pages can be freed without loss of data. 80699026Sjulian */ 807103002Sjulianstatic void 80899026Sjulianpipe_clone_write_buffer(wpipe) 80999026Sjulian struct pipe *wpipe; 81099026Sjulian{ 81199026Sjulian struct uio uio; 812111028Sjeff struct iovec iov; 81399026Sjulian int size; 814111028Sjeff int pos; 81599026Sjulian 81699026Sjulian PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 817103410Smini size = wpipe->pipe_map.cnt; 818105854Sjulian pos = wpipe->pipe_map.pos; 819105854Sjulian 820105854Sjulian wpipe->pipe_buffer.in = size; 821105854Sjulian wpipe->pipe_buffer.out = 0; 822105854Sjulian wpipe->pipe_buffer.cnt = size; 823111028Sjeff wpipe->pipe_state &= ~PIPE_DIRECTW; 824105854Sjulian 825111028Sjeff PIPE_UNLOCK(wpipe); 826105854Sjulian iov.iov_base = wpipe->pipe_buffer.buffer; 827105854Sjulian iov.iov_len = size; 828105854Sjulian uio.uio_iov = &iov; 829111028Sjeff uio.uio_iovcnt = 1; 830111028Sjeff uio.uio_offset = 0; 831111028Sjeff uio.uio_resid = size; 832111028Sjeff uio.uio_segflg = UIO_SYSSPACE; 833111028Sjeff uio.uio_rw = UIO_READ; 834111028Sjeff uio.uio_td = curthread; 835111028Sjeff uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio); 836111028Sjeff PIPE_LOCK(wpipe); 837111028Sjeff pipe_destroy_write_buffer(wpipe); 838111028Sjeff} 839111028Sjeff 840111028Sjeff/* 841105854Sjulian * This implements the pipe buffer write mechanism. Note that only 842105854Sjulian * a direct write OR a normal pipe write can be pending at any given time. 843105854Sjulian * If there are any characters in the pipe buffer, the direct write will 844105854Sjulian * be deferred until the receiving process grabs all of the bytes from 845105854Sjulian * the pipe buffer. Then the direct mapping write is set-up. 846111028Sjeff */ 847105854Sjulianstatic int 848111028Sjeffpipe_direct_write(wpipe, uio) 849105854Sjulian struct pipe *wpipe; 850105854Sjulian struct uio *uio; 851105854Sjulian{ 852111028Sjeff int error; 85399026Sjulian 85499026Sjulianretry: 85599026Sjulian PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 85699026Sjulian error = pipelock(wpipe, 1); 857105854Sjulian if (wpipe->pipe_state & PIPE_EOF) 858105854Sjulian error = EPIPE; 859105854Sjulian if (error) { 860111028Sjeff pipeunlock(wpipe); 86199026Sjulian goto error1; 86299026Sjulian } 863111028Sjeff while (wpipe->pipe_state & PIPE_DIRECTW) { 864111028Sjeff if (wpipe->pipe_state & PIPE_WANTR) { 86599026Sjulian wpipe->pipe_state &= ~PIPE_WANTR; 866105854Sjulian wakeup(wpipe); 867105854Sjulian } 868111028Sjeff pipeselwakeup(wpipe); 869111028Sjeff wpipe->pipe_state |= PIPE_WANTW; 870111028Sjeff pipeunlock(wpipe); 871105854Sjulian error = msleep(wpipe, PIPE_MTX(wpipe), 872105854Sjulian PRIBIO | PCATCH, "pipdww", 0); 873105854Sjulian if (error) 874111028Sjeff goto error1; 875105854Sjulian else 876105854Sjulian goto retry; 877105854Sjulian } 878105854Sjulian wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 879105854Sjulian if (wpipe->pipe_buffer.cnt > 0) { 880105854Sjulian if (wpipe->pipe_state & PIPE_WANTR) { 881111028Sjeff wpipe->pipe_state &= ~PIPE_WANTR; 882111028Sjeff wakeup(wpipe); 883111028Sjeff } 884105854Sjulian pipeselwakeup(wpipe); 885105854Sjulian wpipe->pipe_state |= PIPE_WANTW; 886111028Sjeff pipeunlock(wpipe); 887111028Sjeff error = msleep(wpipe, PIPE_MTX(wpipe), 888105854Sjulian PRIBIO | PCATCH, "pipdwc", 0); 889105854Sjulian if (error) 89099026Sjulian goto error1; 891105854Sjulian else 892105854Sjulian goto retry; 893105854Sjulian } 894105854Sjulian 895105854Sjulian wpipe->pipe_state |= PIPE_DIRECTW; 896105854Sjulian 897105854Sjulian PIPE_UNLOCK(wpipe); 898105854Sjulian error = pipe_build_write_buffer(wpipe, uio); 899105854Sjulian PIPE_LOCK(wpipe); 900105854Sjulian if (error) { 901111028Sjeff wpipe->pipe_state &= ~PIPE_DIRECTW; 902111028Sjeff pipeunlock(wpipe); 903111028Sjeff goto error1; 904111028Sjeff } 905111028Sjeff 90699026Sjulian error = 0; 90799026Sjulian while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 90899026Sjulian if (wpipe->pipe_state & PIPE_EOF) { 90999026Sjulian pipe_destroy_write_buffer(wpipe); 910103367Sjulian pipeselwakeup(wpipe); 911103367Sjulian pipeunlock(wpipe); 912103367Sjulian error = EPIPE; 913103367Sjulian goto error1; 914103367Sjulian } 915111119Simp if (wpipe->pipe_state & PIPE_WANTR) { 916103367Sjulian wpipe->pipe_state &= ~PIPE_WANTR; 917103367Sjulian wakeup(wpipe); 918103367Sjulian } 919103367Sjulian pipeselwakeup(wpipe); 920103367Sjulian pipeunlock(wpipe); 921103367Sjulian error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 922103367Sjulian "pipdwt", 0); 923103367Sjulian pipelock(wpipe, 0); 924111119Simp } 925103367Sjulian 926103367Sjulian if (wpipe->pipe_state & PIPE_EOF) 927103367Sjulian error = EPIPE; 92899026Sjulian if (wpipe->pipe_state & PIPE_DIRECTW) { 92999026Sjulian /* 93099026Sjulian * this bit of trickery substitutes a kernel buffer for 93199026Sjulian * the process that might be going away. 93299026Sjulian */ 93399026Sjulian pipe_clone_write_buffer(wpipe); 934111119Simp } else { 93599026Sjulian pipe_destroy_write_buffer(wpipe); 93699026Sjulian } 93799026Sjulian pipeunlock(wpipe); 938103367Sjulian return (error); 939103367Sjulian 940103367Sjulianerror1: 941103367Sjulian wakeup(wpipe); 942103367Sjulian return (error); 943103367Sjulian} 944103367Sjulian#endif 945103367Sjulian 946103367Sjulianstatic int 947103367Sjulianpipe_write(fp, uio, active_cred, flags, td) 948103367Sjulian struct file *fp; 949103367Sjulian struct uio *uio; 950103367Sjulian struct ucred *active_cred; 951103367Sjulian struct thread *td; 952103367Sjulian int flags; 953103367Sjulian{ 954103367Sjulian int error = 0; 955103367Sjulian int desiredsize, orig_resid; 95699026Sjulian struct pipe *wpipe, *rpipe; 95799026Sjulian 95899026Sjulian rpipe = fp->f_data; 95999026Sjulian wpipe = rpipe->pipe_peer; 96099026Sjulian 961107719Sjulian PIPE_LOCK(rpipe); 962107719Sjulian error = pipelock(wpipe, 1); 96399026Sjulian if (error) { 96499026Sjulian PIPE_UNLOCK(rpipe); 96599026Sjulian return (error); 96699026Sjulian } 96799026Sjulian /* 968104031Sjulian * detect loss of pipe read side, issue SIGPIPE if lost. 969104031Sjulian */ 97099026Sjulian if (wpipe->pipe_present != PIPE_ACTIVE || 97199026Sjulian (wpipe->pipe_state & PIPE_EOF)) { 97299026Sjulian pipeunlock(wpipe); 97399026Sjulian PIPE_UNLOCK(rpipe); 974104503Sjmallett return (EPIPE); 975104031Sjulian } 976104031Sjulian#ifdef MAC 977104031Sjulian error = mac_pipe_check_write(active_cred, wpipe->pipe_pair); 978111028Sjeff if (error) { 979103410Smini pipeunlock(wpipe); 98099026Sjulian PIPE_UNLOCK(rpipe); 981104503Sjmallett return (error); 982104503Sjmallett } 983104503Sjmallett#endif 984104031Sjulian ++wpipe->pipe_busy; 985111028Sjeff 986104031Sjulian /* Choose a larger size if it's advantageous */ 987108338Sjulian desiredsize = max(SMALL_PIPE_SIZE, wpipe->pipe_buffer.size); 988108338Sjulian while (desiredsize < wpipe->pipe_buffer.cnt + uio->uio_resid) { 989104031Sjulian if (piperesizeallowed != 1) 990108338Sjulian break; 991108338Sjulian if (amountpipekva > maxpipekva / 2) 992108338Sjulian break; 993108338Sjulian if (desiredsize == BIG_PIPE_SIZE) 994108338Sjulian break; 995111028Sjeff desiredsize = desiredsize * 2; 996111028Sjeff } 997111028Sjeff 998111028Sjeff /* Choose a smaller size if we're in a OOM situation */ 999111028Sjeff if ((amountpipekva > (3 * maxpipekva) / 4) && 1000111028Sjeff (wpipe->pipe_buffer.size > SMALL_PIPE_SIZE) && 1001111028Sjeff (wpipe->pipe_buffer.cnt <= SMALL_PIPE_SIZE) && 1002104031Sjulian (piperesizeallowed == 1)) 1003104031Sjulian desiredsize = SMALL_PIPE_SIZE; 1004104031Sjulian 1005104031Sjulian /* Resize if the above determined that a new size was necessary */ 1006104031Sjulian if ((desiredsize != wpipe->pipe_buffer.size) && 1007104031Sjulian ((wpipe->pipe_state & PIPE_DIRECTW) == 0)) { 1008104031Sjulian PIPE_UNLOCK(wpipe); 1009104031Sjulian pipespace(wpipe, desiredsize); 1010108338Sjulian PIPE_LOCK(wpipe); 1011107034Sdavidxu } 1012104031Sjulian if (wpipe->pipe_buffer.size == 0) { 1013104126Sjulian /* 1014104031Sjulian * This can only happen for reverse direction use of pipes 1015104031Sjulian * in a complete OOM situation. 1016111028Sjeff */ 1017111028Sjeff error = ENOMEM; 1018111028Sjeff --wpipe->pipe_busy; 1019111028Sjeff pipeunlock(wpipe); 1020111028Sjeff PIPE_UNLOCK(wpipe); 1021111028Sjeff return (error); 1022111028Sjeff } 1023104126Sjulian 1024104031Sjulian pipeunlock(wpipe); 1025104031Sjulian 1026104126Sjulian orig_resid = uio->uio_resid; 1027104031Sjulian 1028111028Sjeff while (uio->uio_resid) { 1029104031Sjulian int space; 1030107034Sdavidxu 1031107034Sdavidxu pipelock(wpipe, 0); 1032107034Sdavidxu if (wpipe->pipe_state & PIPE_EOF) { 1033107034Sdavidxu pipeunlock(wpipe); 1034107034Sdavidxu error = EPIPE; 1035111028Sjeff break; 1036111028Sjeff } 1037111028Sjeff#ifndef PIPE_NODIRECT 1038108338Sjulian /* 1039104031Sjulian * If the transfer is large, we can gain performance if 104099026Sjulian * we do process-to-process copies directly. 1041104031Sjulian * If the write is non-blocking, we don't use the 1042104031Sjulian * direct write mechanism. 1043111028Sjeff * 1044104031Sjulian * The direct write mechanism will detect the reader going 1045104031Sjulian * away on us. 1046111028Sjeff */ 1047104031Sjulian if (uio->uio_segflg == UIO_USERSPACE && 1048104126Sjulian uio->uio_iov->iov_len >= PIPE_MINDIRECT && 1049104031Sjulian wpipe->pipe_buffer.size >= PIPE_MINDIRECT && 1050104031Sjulian (fp->f_flag & FNONBLOCK) == 0) { 1051104031Sjulian pipeunlock(wpipe); 1052111028Sjeff error = pipe_direct_write(wpipe, uio); 1053104031Sjulian if (error) 1054104031Sjulian break; 1055104031Sjulian continue; 1056104126Sjulian } 1057104126Sjulian#endif 1058104126Sjulian 1059104031Sjulian /* 1060104031Sjulian * Pipe buffered writes cannot be coincidental with 1061104126Sjulian * direct writes. We wait until the currently executing 1062104031Sjulian * direct write is completed before we start filling the 1063104031Sjulian * pipe buffer. We break out if a signal occurs or the 1064104126Sjulian * reader goes away. 1065104031Sjulian */ 1066104031Sjulian if (wpipe->pipe_state & PIPE_DIRECTW) { 1067104126Sjulian if (wpipe->pipe_state & PIPE_WANTR) { 106899026Sjulian wpipe->pipe_state &= ~PIPE_WANTR; 1069104031Sjulian wakeup(wpipe); 107099026Sjulian } 107199026Sjulian pipeselwakeup(wpipe); 107299026Sjulian wpipe->pipe_state |= PIPE_WANTW; 1073107034Sdavidxu pipeunlock(wpipe); 1074107034Sdavidxu error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 1075107034Sdavidxu "pipbww", 0); 1076111028Sjeff if (error) 1077107034Sdavidxu break; 1078107034Sdavidxu else 1079107034Sdavidxu continue; 1080111028Sjeff } 1081111028Sjeff 1082107034Sdavidxu space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1083107034Sdavidxu 1084111976Sdavidxu /* Writes of size <= PIPE_BUF must be atomic. */ 1085111032Sjulian if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 1086111976Sdavidxu space = 0; 1087111028Sjeff 1088107034Sdavidxu if (space > 0) { 1089107034Sdavidxu int size; /* Transfer size */ 1090111028Sjeff int segsize; /* first segment to transfer */ 1091111028Sjeff 1092111028Sjeff /* 1093111028Sjeff * Transfer size is minimum of uio transfer 1094111028Sjeff * and free space in pipe buffer. 1095111028Sjeff */ 1096111028Sjeff if (space > uio->uio_resid) 1097111028Sjeff size = uio->uio_resid; 1098111028Sjeff else 1099107034Sdavidxu size = space; 1100111028Sjeff /* 1101107034Sdavidxu * First segment to transfer is minimum of 1102107034Sdavidxu * transfer size and contiguous space in 1103111028Sjeff * pipe buffer. If first segment to transfer 1104111515Sdavidxu * is less than the transfer size, we've got 1105111028Sjeff * a wraparound in the buffer. 1106107034Sdavidxu */ 1107111515Sdavidxu segsize = wpipe->pipe_buffer.size - 1108107034Sdavidxu wpipe->pipe_buffer.in; 1109107034Sdavidxu if (segsize > size) 1110107034Sdavidxu segsize = size; 1111111028Sjeff 1112112397Sdavidxu /* Transfer first segment */ 1113107034Sdavidxu 1114111028Sjeff PIPE_UNLOCK(rpipe); 1115107034Sdavidxu error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1116111028Sjeff segsize, uio); 1117111028Sjeff PIPE_LOCK(rpipe); 1118111028Sjeff 1119111028Sjeff if (error == 0 && segsize < size) { 1120107034Sdavidxu KASSERT(wpipe->pipe_buffer.in + segsize == 1121111028Sjeff wpipe->pipe_buffer.size, 1122111515Sdavidxu ("Pipe buffer wraparound disappeared")); 1123111515Sdavidxu /* 1124111515Sdavidxu * Transfer remaining part now, to 1125111515Sdavidxu * support atomic writes. Wraparound 1126111515Sdavidxu * happened. 1127111515Sdavidxu */ 1128111515Sdavidxu 1129111515Sdavidxu PIPE_UNLOCK(rpipe); 1130111515Sdavidxu error = uiomove( 1131107034Sdavidxu &wpipe->pipe_buffer.buffer[0], 1132111515Sdavidxu size - segsize, uio); 1133111028Sjeff PIPE_LOCK(rpipe); 1134111028Sjeff } 1135111028Sjeff if (error == 0) { 1136111028Sjeff wpipe->pipe_buffer.in += size; 1137111028Sjeff if (wpipe->pipe_buffer.in >= 1138107034Sdavidxu wpipe->pipe_buffer.size) { 1139112397Sdavidxu KASSERT(wpipe->pipe_buffer.in == 1140112397Sdavidxu size - segsize + 1141112397Sdavidxu wpipe->pipe_buffer.size, 1142112397Sdavidxu ("Expected wraparound bad")); 1143112397Sdavidxu wpipe->pipe_buffer.in = size - segsize; 1144112397Sdavidxu } 1145111028Sjeff 1146111028Sjeff wpipe->pipe_buffer.cnt += size; 1147111028Sjeff KASSERT(wpipe->pipe_buffer.cnt <= 1148111028Sjeff wpipe->pipe_buffer.size, 114999026Sjulian ("Pipe buffer overflow")); 115099026Sjulian } 115199026Sjulian pipeunlock(wpipe); 1152107719Sjulian if (error != 0) 1153107719Sjulian break; 1154107719Sjulian } else { 115599026Sjulian /* 115699026Sjulian * If the "read-side" has been blocked, wake it up now. 115799026Sjulian */ 115899026Sjulian if (wpipe->pipe_state & PIPE_WANTR) { 115999026Sjulian wpipe->pipe_state &= ~PIPE_WANTR; 116099026Sjulian wakeup(wpipe); 116199026Sjulian } 116299026Sjulian 116399026Sjulian /* 116499026Sjulian * don't block on non-blocking I/O 116599026Sjulian */ 116699026Sjulian if (fp->f_flag & FNONBLOCK) { 116799026Sjulian error = EAGAIN; 116899026Sjulian pipeunlock(wpipe); 116999026Sjulian break; 1170102581Sjulian } 1171102581Sjulian 1172102581Sjulian /* 117399026Sjulian * We have no more space and have something to offer, 117499026Sjulian * wake up select/poll. 117599026Sjulian */ 117699026Sjulian pipeselwakeup(wpipe); 1177104695Sjulian 1178104695Sjulian wpipe->pipe_state |= PIPE_WANTW; 1179104695Sjulian pipeunlock(wpipe); 1180104695Sjulian error = msleep(wpipe, PIPE_MTX(rpipe), 1181104695Sjulian PRIBIO | PCATCH, "pipewr", 0); 118299026Sjulian if (error != 0) 118399026Sjulian break; 1184102581Sjulian } 1185103002Sjulian } 1186103002Sjulian 1187103002Sjulian pipelock(wpipe, 0); 1188102581Sjulian --wpipe->pipe_busy; 1189103002Sjulian 1190113641Sjulian if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1191111115Sdavidxu wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1192111115Sdavidxu wakeup(wpipe); 1193103002Sjulian } else if (wpipe->pipe_buffer.cnt > 0) { 1194103002Sjulian /* 1195103002Sjulian * If we have put any characters in the buffer, we wake up 1196103002Sjulian * the reader. 1197103002Sjulian */ 1198103002Sjulian if (wpipe->pipe_state & PIPE_WANTR) { 1199103002Sjulian wpipe->pipe_state &= ~PIPE_WANTR; 1200103216Sjulian wakeup(wpipe); 1201103002Sjulian } 120299026Sjulian } 1203104695Sjulian 1204111028Sjeff /* 1205111028Sjeff * Don't return EPIPE if I/O was successful 1206111028Sjeff */ 1207111028Sjeff if ((wpipe->pipe_buffer.cnt == 0) && 1208111028Sjeff (uio->uio_resid == 0) && 1209111028Sjeff (error == EPIPE)) { 1210111028Sjeff error = 0; 1211111028Sjeff } 1212111028Sjeff 1213111028Sjeff if (error == 0) 1214111028Sjeff vfs_timestamp(&wpipe->pipe_mtime); 1215104695Sjulian 1216111028Sjeff /* 1217104695Sjulian * We have something to offer, 1218108338Sjulian * wake up select/poll. 1219104695Sjulian */ 1220111028Sjeff if (wpipe->pipe_buffer.cnt) 1221105854Sjulian pipeselwakeup(wpipe); 1222111028Sjeff 1223105854Sjulian pipeunlock(wpipe); 1224105854Sjulian PIPE_UNLOCK(rpipe); 1225111028Sjeff return (error); 1226105854Sjulian} 1227113244Sdavidxu 1228105854Sjulian/* ARGSUSED */ 1229113244Sdavidxustatic int 1230105854Sjulianpipe_truncate(fp, length, active_cred, td) 1231105854Sjulian struct file *fp; 1232107719Sjulian off_t length; 1233103002Sjulian struct ucred *active_cred; 1234103002Sjulian struct thread *td; 123599026Sjulian{ 1236112888Sjeff 1237112993Speter return (EINVAL); 1238112993Speter} 1239112993Speter 1240112993Speter/* 124199026Sjulian * we implement a very minimal set of ioctls for compatibility with sockets. 1242112993Speter */ 1243112993Speterstatic int 124499026Sjulianpipe_ioctl(fp, cmd, data, active_cred, td) 124599026Sjulian struct file *fp; 124699026Sjulian u_long cmd; 1247107719Sjulian void *data; 1248107719Sjulian struct ucred *active_cred; 1249107719Sjulian struct thread *td; 1250107719Sjulian{ 1251107719Sjulian struct pipe *mpipe = fp->f_data; 1252107719Sjulian int error; 1253107719Sjulian 1254107719Sjulian PIPE_LOCK(mpipe); 1255107719Sjulian 1256107719Sjulian#ifdef MAC 1257107719Sjulian error = mac_pipe_check_ioctl(active_cred, mpipe->pipe_pair, cmd, data); 1258107719Sjulian if (error) { 1259107719Sjulian PIPE_UNLOCK(mpipe); 1260107719Sjulian return (error); 1261107719Sjulian } 1262107719Sjulian#endif 1263107719Sjulian 1264107719Sjulian error = 0; 1265107719Sjulian switch (cmd) { 1266107719Sjulian 1267107719Sjulian case FIONBIO: 126899026Sjulian break; 126999026Sjulian 1270103002Sjulian case FIOASYNC: 1271103002Sjulian if (*(int *)data) { 127299026Sjulian mpipe->pipe_state |= PIPE_ASYNC; 127399026Sjulian } else { 127499026Sjulian mpipe->pipe_state &= ~PIPE_ASYNC; 127599026Sjulian } 127699026Sjulian break; 127799026Sjulian 127899026Sjulian case FIONREAD: 127999026Sjulian if (mpipe->pipe_state & PIPE_DIRECTW) 128099026Sjulian *(int *)data = mpipe->pipe_map.cnt; 128199026Sjulian else 1282111028Sjeff *(int *)data = mpipe->pipe_buffer.cnt; 1283111028Sjeff break; 1284111028Sjeff 1285111028Sjeff case FIOSETOWN: 1286111028Sjeff PIPE_UNLOCK(mpipe); 1287111028Sjeff error = fsetown(*(int *)data, &mpipe->pipe_sigio); 128899026Sjulian goto out_unlocked; 1289103002Sjulian 1290103002Sjulian case FIOGETOWN: 129199026Sjulian *(int *)data = fgetown(&mpipe->pipe_sigio); 129299026Sjulian break; 129399026Sjulian 129499026Sjulian /* This is deprecated, FIOSETOWN should be used instead. */ 129599026Sjulian case TIOCSPGRP: 129699026Sjulian PIPE_UNLOCK(mpipe); 1297113641Sjulian error = fsetown(-(*(int *)data), &mpipe->pipe_sigio); 1298113641Sjulian goto out_unlocked; 1299113641Sjulian 1300113641Sjulian /* This is deprecated, FIOGETOWN should be used instead. */ 1301113641Sjulian case TIOCGPGRP: 1302113641Sjulian *(int *)data = -fgetown(&mpipe->pipe_sigio); 1303113641Sjulian break; 1304113641Sjulian 1305113641Sjulian default: 1306113641Sjulian error = ENOTTY; 1307113641Sjulian break; 1308113641Sjulian } 1309113641Sjulian PIPE_UNLOCK(mpipe); 1310111028Sjeffout_unlocked: 1311111028Sjeff return (error); 1312111028Sjeff} 1313111028Sjeff 1314105854Sjulianstatic int 1315111028Sjeffpipe_poll(fp, events, active_cred, td) 1316111028Sjeff struct file *fp; 1317111028Sjeff int events; 1318111028Sjeff struct ucred *active_cred; 1319111028Sjeff struct thread *td; 1320111028Sjeff{ 1321111028Sjeff struct pipe *rpipe = fp->f_data; 1322111028Sjeff struct pipe *wpipe; 1323111028Sjeff int revents = 0; 1324111028Sjeff#ifdef MAC 1325111028Sjeff int error; 1326111028Sjeff#endif 1327111028Sjeff 1328111028Sjeff wpipe = rpipe->pipe_peer; 1329111028Sjeff PIPE_LOCK(rpipe); 1330111028Sjeff#ifdef MAC 1331111028Sjeff error = mac_pipe_check_poll(active_cred, rpipe->pipe_pair); 1332111028Sjeff if (error) 1333111028Sjeff goto locked_error; 1334111028Sjeff#endif 1335111028Sjeff if (events & (POLLIN | POLLRDNORM)) 1336111028Sjeff if ((rpipe->pipe_state & PIPE_DIRECTW) || 1337111028Sjeff (rpipe->pipe_buffer.cnt > 0)) 1338111028Sjeff revents |= events & (POLLIN | POLLRDNORM); 1339111028Sjeff 1340105854Sjulian if (events & (POLLOUT | POLLWRNORM)) 1341105854Sjulian if (wpipe->pipe_present != PIPE_ACTIVE || 1342105854Sjulian (wpipe->pipe_state & PIPE_EOF) || 1343111028Sjeff (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1344105854Sjulian (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1345105854Sjulian revents |= events & (POLLOUT | POLLWRNORM); 1346105854Sjulian 1347105854Sjulian if ((events & POLLINIGNEOF) == 0) { 1348105854Sjulian if (rpipe->pipe_state & PIPE_EOF) { 1349105854Sjulian revents |= (events & (POLLIN | POLLRDNORM)); 1350111028Sjeff if (wpipe->pipe_present != PIPE_ACTIVE || 1351111028Sjeff (wpipe->pipe_state & PIPE_EOF)) 1352111028Sjeff revents |= POLLHUP; 1353111028Sjeff } 1354111028Sjeff } 1355111028Sjeff 1356111028Sjeff if (revents == 0) { 1357111028Sjeff if (events & (POLLIN | POLLRDNORM)) { 1358111028Sjeff selrecord(td, &rpipe->pipe_sel); 1359111028Sjeff if (SEL_WAITING(&rpipe->pipe_sel)) 1360111028Sjeff rpipe->pipe_state |= PIPE_SEL; 1361111028Sjeff } 1362111028Sjeff 1363111028Sjeff if (events & (POLLOUT | POLLWRNORM)) { 1364105854Sjulian selrecord(td, &wpipe->pipe_sel); 1365111028Sjeff if (SEL_WAITING(&wpipe->pipe_sel)) 1366111028Sjeff wpipe->pipe_state |= PIPE_SEL; 1367111028Sjeff } 1368111028Sjeff } 1369111028Sjeff#ifdef MAC 1370111028Sjefflocked_error: 1371111028Sjeff#endif 1372105854Sjulian PIPE_UNLOCK(rpipe); 1373105854Sjulian 1374105854Sjulian return (revents); 1375105854Sjulian} 1376105854Sjulian 1377105854Sjulian/* 1378105854Sjulian * We shouldn't need locks here as we're doing a read and this should 1379111028Sjeff * be a natural race. 1380111028Sjeff */ 1381111028Sjeffstatic int 1382111028Sjeffpipe_stat(fp, ub, active_cred, td) 1383111028Sjeff struct file *fp; 1384111028Sjeff struct stat *ub; 1385111028Sjeff struct ucred *active_cred; 1386111028Sjeff struct thread *td; 1387111028Sjeff{ 1388111028Sjeff struct pipe *pipe = fp->f_data; 1389111028Sjeff#ifdef MAC 1390111028Sjeff int error; 1391111028Sjeff 1392111028Sjeff PIPE_LOCK(pipe); 1393111028Sjeff error = mac_pipe_check_stat(active_cred, pipe->pipe_pair); 1394111028Sjeff PIPE_UNLOCK(pipe); 1395111028Sjeff if (error) 1396111028Sjeff return (error); 1397105854Sjulian#endif 139899026Sjulian bzero(ub, sizeof(*ub)); 1399103410Smini ub->st_mode = S_IFIFO; 1400108338Sjulian ub->st_blksize = PAGE_SIZE; 140199026Sjulian if (pipe->pipe_state & PIPE_DIRECTW) 140299026Sjulian ub->st_size = pipe->pipe_map.cnt; 1403111028Sjeff else 140499026Sjulian ub->st_size = pipe->pipe_buffer.cnt; 140599026Sjulian ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 140699026Sjulian ub->st_atim = pipe->pipe_atime; 140799026Sjulian ub->st_mtim = pipe->pipe_mtime; 1408104695Sjulian ub->st_ctim = pipe->pipe_ctime; 1409104695Sjulian ub->st_uid = fp->f_cred->cr_uid; 1410111028Sjeff ub->st_gid = fp->f_cred->cr_gid; 1411111028Sjeff /* 1412111028Sjeff * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1413104695Sjulian * XXX (st_dev, st_ino) should be unique. 1414111028Sjeff */ 1415104695Sjulian return (0); 1416104695Sjulian} 141799026Sjulian 1418111028Sjeff/* ARGSUSED */ 1419106182Sdavidxustatic int 142099026Sjulianpipe_close(fp, td) 142199026Sjulian struct file *fp; 1422104695Sjulian struct thread *td; 1423103002Sjulian{ 1424103002Sjulian struct pipe *cpipe = fp->f_data; 1425111028Sjeff 1426113244Sdavidxu fp->f_ops = &badfileops; 1427113244Sdavidxu fp->f_data = NULL; 1428113244Sdavidxu funsetown(&cpipe->pipe_sigio); 1429111028Sjeff pipeclose(cpipe); 1430111028Sjeff return (0); 1431111028Sjeff} 1432111028Sjeff 1433112888Sjeffstatic void 1434111041Sdavidxupipe_free_kmem(cpipe) 1435111041Sdavidxu struct pipe *cpipe; 1436112888Sjeff{ 1437111028Sjeff 1438111028Sjeff KASSERT(!mtx_owned(PIPE_MTX(cpipe)), 1439104695Sjulian ("pipe_free_kmem: pipe mutex locked")); 1440111028Sjeff 1441104695Sjulian if (cpipe->pipe_buffer.buffer != NULL) { 144299026Sjulian atomic_subtract_long(&amountpipekva, cpipe->pipe_buffer.size); 144399026Sjulian vm_map_remove(pipe_map, 1444111033Sjeff (vm_offset_t)cpipe->pipe_buffer.buffer, 1445111033Sjeff (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 1446103410Smini cpipe->pipe_buffer.buffer = NULL; 1447111033Sjeff } 1448111033Sjeff#ifndef PIPE_NODIRECT 1449103410Smini { 1450103410Smini cpipe->pipe_map.cnt = 0; 1451103410Smini cpipe->pipe_map.pos = 0; 1452111033Sjeff cpipe->pipe_map.npages = 0; 1453111033Sjeff } 1454111033Sjeff#endif 1455111033Sjeff} 1456111033Sjeff 1457103410Smini/* 1458111033Sjeff * shutdown the pipe 1459103410Smini */ 1460111033Sjeffstatic void 1461111033Sjeffpipeclose(cpipe) 1462103410Smini struct pipe *cpipe; 1463111033Sjeff{ 1464111033Sjeff struct pipepair *pp; 1465111033Sjeff struct pipe *ppipe; 1466111033Sjeff 1467111033Sjeff KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); 1468103410Smini 1469111033Sjeff PIPE_LOCK(cpipe); 1470111033Sjeff pipelock(cpipe, 0); 1471111033Sjeff pp = cpipe->pipe_pair; 1472111033Sjeff 1473111033Sjeff pipeselwakeup(cpipe); 1474111033Sjeff 1475111033Sjeff /* 1476111033Sjeff * If the other side is blocked, wake it up saying that 1477111033Sjeff * we want to close it down. 1478111033Sjeff */ 1479111033Sjeff cpipe->pipe_state |= PIPE_EOF; 1480111033Sjeff while (cpipe->pipe_busy) { 1481111033Sjeff wakeup(cpipe); 1482111033Sjeff cpipe->pipe_state |= PIPE_WANT; 1483103410Smini pipeunlock(cpipe); 1484111033Sjeff msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1485103410Smini pipelock(cpipe, 0); 1486111033Sjeff } 1487111033Sjeff 1488103410Smini 1489103410Smini /* 1490112397Sdavidxu * Disconnect from peer, if any. 1491112397Sdavidxu */ 1492112397Sdavidxu ppipe = cpipe->pipe_peer; 1493112397Sdavidxu if (ppipe->pipe_present == PIPE_ACTIVE) { 1494112397Sdavidxu pipeselwakeup(ppipe); 1495112397Sdavidxu 1496112397Sdavidxu ppipe->pipe_state |= PIPE_EOF; 1497112397Sdavidxu wakeup(ppipe); 1498112397Sdavidxu KNOTE_LOCKED(&ppipe->pipe_sel.si_note, 0); 1499112397Sdavidxu } 1500112397Sdavidxu 1501112397Sdavidxu /* 1502112397Sdavidxu * Mark this endpoint as free. Release kmem resources. We 1503112397Sdavidxu * don't mark this endpoint as unused until we've finished 1504112397Sdavidxu * doing that, or the pipe might disappear out from under 1505112397Sdavidxu * us. 1506112397Sdavidxu */ 1507112397Sdavidxu PIPE_UNLOCK(cpipe); 1508112397Sdavidxu pipe_free_kmem(cpipe); 1509112397Sdavidxu PIPE_LOCK(cpipe); 1510112397Sdavidxu cpipe->pipe_present = PIPE_CLOSING; 1511112397Sdavidxu pipeunlock(cpipe); 1512112397Sdavidxu 1513112397Sdavidxu /* 1514112397Sdavidxu * knlist_clear() may sleep dropping the PIPE_MTX. Set the 1515112397Sdavidxu * PIPE_FINALIZED, that allows other end to free the 1516112397Sdavidxu * pipe_pair, only after the knotes are completely dismantled. 1517112397Sdavidxu */ 1518112397Sdavidxu knlist_clear(&cpipe->pipe_sel.si_note, 1); 1519112397Sdavidxu cpipe->pipe_present = PIPE_FINALIZED; 1520112397Sdavidxu knlist_destroy(&cpipe->pipe_sel.si_note); 1521112397Sdavidxu 1522103410Smini /* 1523111028Sjeff * If both endpoints are now closed, release the memory for the 1524105900Sjulian * pipe pair. If not, unlock. 1525105900Sjulian */ 1526105900Sjulian if (ppipe->pipe_present == PIPE_FINALIZED) { 1527105900Sjulian PIPE_UNLOCK(cpipe); 1528105900Sjulian#ifdef MAC 1529111028Sjeff mac_pipe_destroy(pp); 1530111028Sjeff#endif 1531113793Sdavidxu uma_zfree(pipe_zone, cpipe->pipe_pair); 1532105900Sjulian } else 1533111028Sjeff PIPE_UNLOCK(cpipe); 1534113793Sdavidxu} 1535105900Sjulian 1536105900Sjulian/*ARGSUSED*/ 1537105900Sjulianstatic int 1538105900Sjulianpipe_kqfilter(struct file *fp, struct knote *kn) 1539113686Sjhb{ 1540111028Sjeff struct pipe *cpipe; 1541105900Sjulian 1542112071Sdavidxu cpipe = kn->kn_fp->f_data; 1543105900Sjulian PIPE_LOCK(cpipe); 1544105900Sjulian switch (kn->kn_filter) { 1545105900Sjulian case EVFILT_READ: 1546113686Sjhb kn->kn_fop = &pipe_rfiltops; 1547105900Sjulian break; 1548105900Sjulian case EVFILT_WRITE: 1549105900Sjulian kn->kn_fop = &pipe_wfiltops; 1550105900Sjulian if (cpipe->pipe_peer->pipe_present != PIPE_ACTIVE) { 1551108338Sjulian /* other end of pipe has been closed */ 1552105900Sjulian PIPE_UNLOCK(cpipe); 1553105900Sjulian return (EPIPE); 1554111028Sjeff } 1555111028Sjeff cpipe = cpipe->pipe_peer; 1556111028Sjeff break; 1557111028Sjeff default: 1558111028Sjeff PIPE_UNLOCK(cpipe); 1559113793Sdavidxu return (EINVAL); 1560113793Sdavidxu } 1561113793Sdavidxu 1562113793Sdavidxu knlist_add(&cpipe->pipe_sel.si_note, kn, 1); 1563111028Sjeff PIPE_UNLOCK(cpipe); 1564105900Sjulian return (0); 1565113793Sdavidxu} 1566111115Sdavidxu 1567111115Sdavidxustatic void 1568111115Sdavidxufilt_pipedetach(struct knote *kn) 1569113793Sdavidxu{ 1570113793Sdavidxu struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1571113793Sdavidxu 1572113793Sdavidxu PIPE_LOCK(cpipe); 1573111115Sdavidxu if (kn->kn_filter == EVFILT_WRITE) 1574105900Sjulian cpipe = cpipe->pipe_peer; 1575105900Sjulian knlist_remove(&cpipe->pipe_sel.si_note, kn, 1); 1576105900Sjulian PIPE_UNLOCK(cpipe); 1577105900Sjulian} 1578105900Sjulian 1579103410Smini/*ARGSUSED*/ 1580103410Sministatic int 1581103410Sminifilt_piperead(struct knote *kn, long hint) 158299026Sjulian{ 158399026Sjulian struct pipe *rpipe = kn->kn_fp->f_data; 158499026Sjulian struct pipe *wpipe = rpipe->pipe_peer; 158599026Sjulian int ret; 1586103410Smini 1587103410Smini PIPE_LOCK(rpipe); 1588103410Smini kn->kn_data = rpipe->pipe_buffer.cnt; 158999026Sjulian if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 159099026Sjulian kn->kn_data = rpipe->pipe_map.cnt; 1591103838Sjulian 159299026Sjulian if ((rpipe->pipe_state & PIPE_EOF) || 1593113793Sdavidxu wpipe->pipe_present != PIPE_ACTIVE || 1594111028Sjeff (wpipe->pipe_state & PIPE_EOF)) { 1595111115Sdavidxu kn->kn_flags |= EV_EOF; 1596104695Sjulian PIPE_UNLOCK(rpipe); 1597107060Sdavidxu return (1); 159899026Sjulian } 1599111028Sjeff ret = kn->kn_data > 0; 1600110190Sjulian PIPE_UNLOCK(rpipe); 1601104695Sjulian return ret; 1602111028Sjeff} 1603111028Sjeff 1604111028Sjeff/*ARGSUSED*/ 1605108338Sjulianstatic int 1606103410Sminifilt_pipewrite(struct knote *kn, long hint) 1607111028Sjeff{ 1608111028Sjeff struct pipe *rpipe = kn->kn_fp->f_data; 1609111028Sjeff struct pipe *wpipe = rpipe->pipe_peer; 1610103410Smini 1611111028Sjeff PIPE_LOCK(rpipe); 1612111515Sdavidxu if (wpipe->pipe_present != PIPE_ACTIVE || 1613111028Sjeff (wpipe->pipe_state & PIPE_EOF)) { 1614111028Sjeff kn->kn_data = 0; 1615111028Sjeff kn->kn_flags |= EV_EOF; 1616111515Sdavidxu PIPE_UNLOCK(rpipe); 1617111515Sdavidxu return (1); 1618111515Sdavidxu } 1619111028Sjeff kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1620108338Sjulian if (wpipe->pipe_state & PIPE_DIRECTW) 1621113793Sdavidxu kn->kn_data = 0; 1622113793Sdavidxu 1623111028Sjeff PIPE_UNLOCK(rpipe); 1624111028Sjeff return (kn->kn_data >= PIPE_BUF); 1625111028Sjeff} 1626111028Sjeff