sys_pipe.c revision 121970
1/* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 */ 19 20/* 21 * This file contains a high-performance replacement for the socket-based 22 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 23 * all features of sockets, but does do everything that pipes normally 24 * do. 25 */ 26 27/* 28 * This code has two modes of operation, a small write mode and a large 29 * write mode. The small write mode acts like conventional pipes with 30 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 31 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 32 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 33 * the receiving process can copy it directly from the pages in the sending 34 * process. 35 * 36 * If the sending process receives a signal, it is possible that it will 37 * go away, and certainly its address space can change, because control 38 * is returned back to the user-mode side. In that case, the pipe code 39 * arranges to copy the buffer supplied by the user process, to a pageable 40 * kernel buffer, and the receiving process will grab the data from the 41 * pageable kernel buffer. Since signals don't happen all that often, 42 * the copy operation is normally eliminated. 43 * 44 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 45 * happen for small transfers so that the system will not spend all of 46 * its time context switching. 47 * 48 * In order to limit the resource use of pipes, two sysctls exist: 49 * 50 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 51 * address space available to us in pipe_map. Whenever the amount in use 52 * exceeds half of this value, all new pipes will be created with size 53 * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited 54 * as well. This value is loader tunable only. 55 * 56 * kern.ipc.maxpipekvawired - This value limits the amount of memory that may 57 * be wired in order to facilitate direct copies using page flipping. 58 * Whenever this value is exceeded, pipes will fall back to using regular 59 * copies. This value is sysctl controllable at all times. 60 * 61 * These values are autotuned in subr_param.c. 62 * 63 * Memory usage may be monitored through the sysctls 64 * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired. 65 * 66 */ 67 68#include <sys/cdefs.h> 69__FBSDID("$FreeBSD: head/sys/kern/sys_pipe.c 121970 2003-11-03 17:58:23Z rwatson $"); 70 71#include "opt_mac.h" 72 73#include <sys/param.h> 74#include <sys/systm.h> 75#include <sys/fcntl.h> 76#include <sys/file.h> 77#include <sys/filedesc.h> 78#include <sys/filio.h> 79#include <sys/kernel.h> 80#include <sys/lock.h> 81#include <sys/mac.h> 82#include <sys/mutex.h> 83#include <sys/ttycom.h> 84#include <sys/stat.h> 85#include <sys/malloc.h> 86#include <sys/poll.h> 87#include <sys/selinfo.h> 88#include <sys/signalvar.h> 89#include <sys/sysctl.h> 90#include <sys/sysproto.h> 91#include <sys/pipe.h> 92#include <sys/proc.h> 93#include <sys/vnode.h> 94#include <sys/uio.h> 95#include <sys/event.h> 96 97#include <vm/vm.h> 98#include <vm/vm_param.h> 99#include <vm/vm_object.h> 100#include <vm/vm_kern.h> 101#include <vm/vm_extern.h> 102#include <vm/pmap.h> 103#include <vm/vm_map.h> 104#include <vm/vm_page.h> 105#include <vm/uma.h> 106 107/* 108 * Use this define if you want to disable *fancy* VM things. Expect an 109 * approx 30% decrease in transfer rate. This could be useful for 110 * NetBSD or OpenBSD. 111 */ 112/* #define PIPE_NODIRECT */ 113 114/* 115 * interfaces to the outside world 116 */ 117static fo_rdwr_t pipe_read; 118static fo_rdwr_t pipe_write; 119static fo_ioctl_t pipe_ioctl; 120static fo_poll_t pipe_poll; 121static fo_kqfilter_t pipe_kqfilter; 122static fo_stat_t pipe_stat; 123static fo_close_t pipe_close; 124 125static struct fileops pipeops = { 126 .fo_read = pipe_read, 127 .fo_write = pipe_write, 128 .fo_ioctl = pipe_ioctl, 129 .fo_poll = pipe_poll, 130 .fo_kqfilter = pipe_kqfilter, 131 .fo_stat = pipe_stat, 132 .fo_close = pipe_close, 133 .fo_flags = DFLAG_PASSABLE 134}; 135 136static void filt_pipedetach(struct knote *kn); 137static int filt_piperead(struct knote *kn, long hint); 138static int filt_pipewrite(struct knote *kn, long hint); 139 140static struct filterops pipe_rfiltops = 141 { 1, NULL, filt_pipedetach, filt_piperead }; 142static struct filterops pipe_wfiltops = 143 { 1, NULL, filt_pipedetach, filt_pipewrite }; 144 145/* 146 * Default pipe buffer size(s), this can be kind-of large now because pipe 147 * space is pageable. The pipe code will try to maintain locality of 148 * reference for performance reasons, so small amounts of outstanding I/O 149 * will not wipe the cache. 150 */ 151#define MINPIPESIZE (PIPE_SIZE/3) 152#define MAXPIPESIZE (2*PIPE_SIZE/3) 153 154/* 155 * Limit the number of "big" pipes 156 */ 157#define LIMITBIGPIPES 32 158static int nbigpipe; 159 160static int amountpipes; 161static int amountpipekva; 162static int amountpipekvawired; 163 164SYSCTL_DECL(_kern_ipc); 165 166SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN, 167 &maxpipekva, 0, "Pipe KVA limit"); 168SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW, 169 &maxpipekvawired, 0, "Pipe KVA wired limit"); 170SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD, 171 &amountpipes, 0, "Current # of pipes"); 172SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD, 173 &nbigpipe, 0, "Current # of big pipes"); 174SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 175 &amountpipekva, 0, "Pipe KVA usage"); 176SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD, 177 &amountpipekvawired, 0, "Pipe wired KVA usage"); 178 179static void pipeinit(void *dummy __unused); 180static void pipeclose(struct pipe *cpipe); 181static void pipe_free_kmem(struct pipe *cpipe); 182static int pipe_create(struct pipe **cpipep); 183static __inline int pipelock(struct pipe *cpipe, int catch); 184static __inline void pipeunlock(struct pipe *cpipe); 185static __inline void pipeselwakeup(struct pipe *cpipe); 186#ifndef PIPE_NODIRECT 187static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 188static void pipe_destroy_write_buffer(struct pipe *wpipe); 189static int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 190static void pipe_clone_write_buffer(struct pipe *wpipe); 191#endif 192static int pipespace(struct pipe *cpipe, int size); 193 194static uma_zone_t pipe_zone; 195 196SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 197 198static void 199pipeinit(void *dummy __unused) 200{ 201 202 pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL, 203 NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 204 KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 205} 206 207/* 208 * The pipe system call for the DTYPE_PIPE type of pipes 209 */ 210 211/* ARGSUSED */ 212int 213pipe(td, uap) 214 struct thread *td; 215 struct pipe_args /* { 216 int dummy; 217 } */ *uap; 218{ 219 struct filedesc *fdp = td->td_proc->p_fd; 220 struct file *rf, *wf; 221 struct pipe *rpipe, *wpipe; 222 struct mtx *pmtx; 223 int fd, error; 224 225 pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO); 226 227 rpipe = wpipe = NULL; 228 if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 229 pipeclose(rpipe); 230 pipeclose(wpipe); 231 free(pmtx, M_TEMP); 232 return (ENFILE); 233 } 234 235 rpipe->pipe_state |= PIPE_DIRECTOK; 236 wpipe->pipe_state |= PIPE_DIRECTOK; 237 238 error = falloc(td, &rf, &fd); 239 if (error) { 240 pipeclose(rpipe); 241 pipeclose(wpipe); 242 free(pmtx, M_TEMP); 243 return (error); 244 } 245 /* An extra reference on `rf' has been held for us by falloc(). */ 246 td->td_retval[0] = fd; 247 248 /* 249 * Warning: once we've gotten past allocation of the fd for the 250 * read-side, we can only drop the read side via fdrop() in order 251 * to avoid races against processes which manage to dup() the read 252 * side while we are blocked trying to allocate the write side. 253 */ 254 FILE_LOCK(rf); 255 rf->f_flag = FREAD | FWRITE; 256 rf->f_type = DTYPE_PIPE; 257 rf->f_data = rpipe; 258 rf->f_ops = &pipeops; 259 FILE_UNLOCK(rf); 260 error = falloc(td, &wf, &fd); 261 if (error) { 262 FILEDESC_LOCK(fdp); 263 if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 264 fdp->fd_ofiles[td->td_retval[0]] = NULL; 265 FILEDESC_UNLOCK(fdp); 266 fdrop(rf, td); 267 } else 268 FILEDESC_UNLOCK(fdp); 269 fdrop(rf, td); 270 /* rpipe has been closed by fdrop(). */ 271 pipeclose(wpipe); 272 free(pmtx, M_TEMP); 273 return (error); 274 } 275 /* An extra reference on `wf' has been held for us by falloc(). */ 276 FILE_LOCK(wf); 277 wf->f_flag = FREAD | FWRITE; 278 wf->f_type = DTYPE_PIPE; 279 wf->f_data = wpipe; 280 wf->f_ops = &pipeops; 281 FILE_UNLOCK(wf); 282 fdrop(wf, td); 283 td->td_retval[1] = fd; 284 rpipe->pipe_peer = wpipe; 285 wpipe->pipe_peer = rpipe; 286#ifdef MAC 287 /* 288 * struct pipe represents a pipe endpoint. The MAC label is shared 289 * between the connected endpoints. As a result mac_init_pipe() and 290 * mac_create_pipe() should only be called on one of the endpoints 291 * after they have been connected. 292 */ 293 mac_init_pipe(rpipe); 294 mac_create_pipe(td->td_ucred, rpipe); 295#endif 296 mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 297 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 298 fdrop(rf, td); 299 300 return (0); 301} 302 303/* 304 * Allocate kva for pipe circular buffer, the space is pageable 305 * This routine will 'realloc' the size of a pipe safely, if it fails 306 * it will retain the old buffer. 307 * If it fails it will return ENOMEM. 308 */ 309static int 310pipespace(cpipe, size) 311 struct pipe *cpipe; 312 int size; 313{ 314 struct vm_object *object; 315 caddr_t buffer; 316 int npages, error; 317 static int curfail = 0; 318 static struct timeval lastfail; 319 320 KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 321 ("pipespace: pipe mutex locked")); 322 323 size = round_page(size); 324 npages = size / PAGE_SIZE; 325 /* 326 * Create an object, I don't like the idea of paging to/from 327 * kernel_object. 328 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 329 */ 330 object = vm_object_allocate(OBJT_DEFAULT, npages); 331 buffer = (caddr_t) vm_map_min(pipe_map); 332 333 /* 334 * Insert the object into the kernel map, and allocate kva for it. 335 * The map entry is, by default, pageable. 336 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 337 */ 338 error = vm_map_find(pipe_map, object, 0, 339 (vm_offset_t *) &buffer, size, 1, 340 VM_PROT_ALL, VM_PROT_ALL, 0); 341 342 if (error != KERN_SUCCESS) { 343 vm_object_deallocate(object); 344 if (ppsratecheck(&lastfail, &curfail, 1)) 345 printf("kern.maxpipekva exceeded, please see tuning(7).\n"); 346 return (ENOMEM); 347 } 348 349 /* free old resources if we're resizing */ 350 pipe_free_kmem(cpipe); 351 cpipe->pipe_buffer.buffer = buffer; 352 cpipe->pipe_buffer.size = size; 353 cpipe->pipe_buffer.in = 0; 354 cpipe->pipe_buffer.out = 0; 355 cpipe->pipe_buffer.cnt = 0; 356 atomic_add_int(&amountpipes, 1); 357 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 358 return (0); 359} 360 361/* 362 * initialize and allocate VM and memory for pipe 363 */ 364static int 365pipe_create(cpipep) 366 struct pipe **cpipep; 367{ 368 struct pipe *cpipe; 369 int error; 370 371 *cpipep = uma_zalloc(pipe_zone, M_WAITOK); 372 if (*cpipep == NULL) 373 return (ENOMEM); 374 375 cpipe = *cpipep; 376 377 /* 378 * protect so pipeclose() doesn't follow a junk pointer 379 * if pipespace() fails. 380 */ 381 bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 382 cpipe->pipe_state = 0; 383 cpipe->pipe_peer = NULL; 384 cpipe->pipe_busy = 0; 385 386#ifndef PIPE_NODIRECT 387 /* 388 * pipe data structure initializations to support direct pipe I/O 389 */ 390 cpipe->pipe_map.cnt = 0; 391 cpipe->pipe_map.kva = 0; 392 cpipe->pipe_map.pos = 0; 393 cpipe->pipe_map.npages = 0; 394 /* cpipe->pipe_map.ms[] = invalid */ 395#endif 396 397 cpipe->pipe_mtxp = NULL; /* avoid pipespace assertion */ 398 /* 399 * Reduce to 1/4th pipe size if we're over our global max. 400 */ 401 if (amountpipekva > maxpipekva / 2) 402 error = pipespace(cpipe, SMALL_PIPE_SIZE); 403 else 404 error = pipespace(cpipe, PIPE_SIZE); 405 if (error) 406 return (error); 407 408 vfs_timestamp(&cpipe->pipe_ctime); 409 cpipe->pipe_atime = cpipe->pipe_ctime; 410 cpipe->pipe_mtime = cpipe->pipe_ctime; 411 412 return (0); 413} 414 415 416/* 417 * lock a pipe for I/O, blocking other access 418 */ 419static __inline int 420pipelock(cpipe, catch) 421 struct pipe *cpipe; 422 int catch; 423{ 424 int error; 425 426 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 427 while (cpipe->pipe_state & PIPE_LOCKFL) { 428 cpipe->pipe_state |= PIPE_LWANT; 429 error = msleep(cpipe, PIPE_MTX(cpipe), 430 catch ? (PRIBIO | PCATCH) : PRIBIO, 431 "pipelk", 0); 432 if (error != 0) 433 return (error); 434 } 435 cpipe->pipe_state |= PIPE_LOCKFL; 436 return (0); 437} 438 439/* 440 * unlock a pipe I/O lock 441 */ 442static __inline void 443pipeunlock(cpipe) 444 struct pipe *cpipe; 445{ 446 447 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 448 cpipe->pipe_state &= ~PIPE_LOCKFL; 449 if (cpipe->pipe_state & PIPE_LWANT) { 450 cpipe->pipe_state &= ~PIPE_LWANT; 451 wakeup(cpipe); 452 } 453} 454 455static __inline void 456pipeselwakeup(cpipe) 457 struct pipe *cpipe; 458{ 459 460 if (cpipe->pipe_state & PIPE_SEL) { 461 cpipe->pipe_state &= ~PIPE_SEL; 462 selwakeup(&cpipe->pipe_sel); 463 } 464 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 465 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 466 KNOTE(&cpipe->pipe_sel.si_note, 0); 467} 468 469/* ARGSUSED */ 470static int 471pipe_read(fp, uio, active_cred, flags, td) 472 struct file *fp; 473 struct uio *uio; 474 struct ucred *active_cred; 475 struct thread *td; 476 int flags; 477{ 478 struct pipe *rpipe = fp->f_data; 479 int error; 480 int nread = 0; 481 u_int size; 482 483 PIPE_LOCK(rpipe); 484 ++rpipe->pipe_busy; 485 error = pipelock(rpipe, 1); 486 if (error) 487 goto unlocked_error; 488 489#ifdef MAC 490 error = mac_check_pipe_read(active_cred, rpipe); 491 if (error) 492 goto locked_error; 493#endif 494 495 while (uio->uio_resid) { 496 /* 497 * normal pipe buffer receive 498 */ 499 if (rpipe->pipe_buffer.cnt > 0) { 500 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 501 if (size > rpipe->pipe_buffer.cnt) 502 size = rpipe->pipe_buffer.cnt; 503 if (size > (u_int) uio->uio_resid) 504 size = (u_int) uio->uio_resid; 505 506 PIPE_UNLOCK(rpipe); 507 error = uiomove( 508 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 509 size, uio); 510 PIPE_LOCK(rpipe); 511 if (error) 512 break; 513 514 rpipe->pipe_buffer.out += size; 515 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 516 rpipe->pipe_buffer.out = 0; 517 518 rpipe->pipe_buffer.cnt -= size; 519 520 /* 521 * If there is no more to read in the pipe, reset 522 * its pointers to the beginning. This improves 523 * cache hit stats. 524 */ 525 if (rpipe->pipe_buffer.cnt == 0) { 526 rpipe->pipe_buffer.in = 0; 527 rpipe->pipe_buffer.out = 0; 528 } 529 nread += size; 530#ifndef PIPE_NODIRECT 531 /* 532 * Direct copy, bypassing a kernel buffer. 533 */ 534 } else if ((size = rpipe->pipe_map.cnt) && 535 (rpipe->pipe_state & PIPE_DIRECTW)) { 536 caddr_t va; 537 if (size > (u_int) uio->uio_resid) 538 size = (u_int) uio->uio_resid; 539 540 va = (caddr_t) rpipe->pipe_map.kva + 541 rpipe->pipe_map.pos; 542 PIPE_UNLOCK(rpipe); 543 error = uiomove(va, size, uio); 544 PIPE_LOCK(rpipe); 545 if (error) 546 break; 547 nread += size; 548 rpipe->pipe_map.pos += size; 549 rpipe->pipe_map.cnt -= size; 550 if (rpipe->pipe_map.cnt == 0) { 551 rpipe->pipe_state &= ~PIPE_DIRECTW; 552 wakeup(rpipe); 553 } 554#endif 555 } else { 556 /* 557 * detect EOF condition 558 * read returns 0 on EOF, no need to set error 559 */ 560 if (rpipe->pipe_state & PIPE_EOF) 561 break; 562 563 /* 564 * If the "write-side" has been blocked, wake it up now. 565 */ 566 if (rpipe->pipe_state & PIPE_WANTW) { 567 rpipe->pipe_state &= ~PIPE_WANTW; 568 wakeup(rpipe); 569 } 570 571 /* 572 * Break if some data was read. 573 */ 574 if (nread > 0) 575 break; 576 577 /* 578 * Unlock the pipe buffer for our remaining processing. 579 * We will either break out with an error or we will 580 * sleep and relock to loop. 581 */ 582 pipeunlock(rpipe); 583 584 /* 585 * Handle non-blocking mode operation or 586 * wait for more data. 587 */ 588 if (fp->f_flag & FNONBLOCK) { 589 error = EAGAIN; 590 } else { 591 rpipe->pipe_state |= PIPE_WANTR; 592 if ((error = msleep(rpipe, PIPE_MTX(rpipe), 593 PRIBIO | PCATCH, 594 "piperd", 0)) == 0) 595 error = pipelock(rpipe, 1); 596 } 597 if (error) 598 goto unlocked_error; 599 } 600 } 601#ifdef MAC 602locked_error: 603#endif 604 pipeunlock(rpipe); 605 606 /* XXX: should probably do this before getting any locks. */ 607 if (error == 0) 608 vfs_timestamp(&rpipe->pipe_atime); 609unlocked_error: 610 --rpipe->pipe_busy; 611 612 /* 613 * PIPE_WANT processing only makes sense if pipe_busy is 0. 614 */ 615 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 616 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 617 wakeup(rpipe); 618 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 619 /* 620 * Handle write blocking hysteresis. 621 */ 622 if (rpipe->pipe_state & PIPE_WANTW) { 623 rpipe->pipe_state &= ~PIPE_WANTW; 624 wakeup(rpipe); 625 } 626 } 627 628 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 629 pipeselwakeup(rpipe); 630 631 PIPE_UNLOCK(rpipe); 632 return (error); 633} 634 635#ifndef PIPE_NODIRECT 636/* 637 * Map the sending processes' buffer into kernel space and wire it. 638 * This is similar to a physical write operation. 639 */ 640static int 641pipe_build_write_buffer(wpipe, uio) 642 struct pipe *wpipe; 643 struct uio *uio; 644{ 645 pmap_t pmap; 646 u_int size; 647 int i, j; 648 vm_offset_t addr, endaddr; 649 650 PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 651 652 size = (u_int) uio->uio_iov->iov_len; 653 if (size > wpipe->pipe_buffer.size) 654 size = wpipe->pipe_buffer.size; 655 656 pmap = vmspace_pmap(curproc->p_vmspace); 657 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 658 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 659 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 660 /* 661 * vm_fault_quick() can sleep. Consequently, 662 * vm_page_lock_queue() and vm_page_unlock_queue() 663 * should not be performed outside of this loop. 664 */ 665 race: 666 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { 667 vm_page_lock_queues(); 668 for (j = 0; j < i; j++) 669 vm_page_unhold(wpipe->pipe_map.ms[j]); 670 vm_page_unlock_queues(); 671 return (EFAULT); 672 } 673 wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, 674 VM_PROT_READ); 675 if (wpipe->pipe_map.ms[i] == NULL) 676 goto race; 677 } 678 679/* 680 * set up the control block 681 */ 682 wpipe->pipe_map.npages = i; 683 wpipe->pipe_map.pos = 684 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 685 wpipe->pipe_map.cnt = size; 686 687/* 688 * and map the buffer 689 */ 690 if (wpipe->pipe_map.kva == 0) { 691 /* 692 * We need to allocate space for an extra page because the 693 * address range might (will) span pages at times. 694 */ 695 wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map, 696 wpipe->pipe_buffer.size + PAGE_SIZE); 697 atomic_add_int(&amountpipekvawired, 698 wpipe->pipe_buffer.size + PAGE_SIZE); 699 } 700 pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 701 wpipe->pipe_map.npages); 702 703/* 704 * and update the uio data 705 */ 706 707 uio->uio_iov->iov_len -= size; 708 uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 709 if (uio->uio_iov->iov_len == 0) 710 uio->uio_iov++; 711 uio->uio_resid -= size; 712 uio->uio_offset += size; 713 return (0); 714} 715 716/* 717 * unmap and unwire the process buffer 718 */ 719static void 720pipe_destroy_write_buffer(wpipe) 721 struct pipe *wpipe; 722{ 723 int i; 724 725 PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 726 if (wpipe->pipe_map.kva) { 727 pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 728 729 if (amountpipekvawired > maxpipekvawired / 2) { 730 /* Conserve address space */ 731 vm_offset_t kva = wpipe->pipe_map.kva; 732 wpipe->pipe_map.kva = 0; 733 kmem_free(kernel_map, kva, 734 wpipe->pipe_buffer.size + PAGE_SIZE); 735 atomic_subtract_int(&amountpipekvawired, 736 wpipe->pipe_buffer.size + PAGE_SIZE); 737 } 738 } 739 vm_page_lock_queues(); 740 for (i = 0; i < wpipe->pipe_map.npages; i++) { 741 vm_page_unhold(wpipe->pipe_map.ms[i]); 742 } 743 vm_page_unlock_queues(); 744 wpipe->pipe_map.npages = 0; 745} 746 747/* 748 * In the case of a signal, the writing process might go away. This 749 * code copies the data into the circular buffer so that the source 750 * pages can be freed without loss of data. 751 */ 752static void 753pipe_clone_write_buffer(wpipe) 754 struct pipe *wpipe; 755{ 756 int size; 757 int pos; 758 759 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 760 size = wpipe->pipe_map.cnt; 761 pos = wpipe->pipe_map.pos; 762 763 wpipe->pipe_buffer.in = size; 764 wpipe->pipe_buffer.out = 0; 765 wpipe->pipe_buffer.cnt = size; 766 wpipe->pipe_state &= ~PIPE_DIRECTW; 767 768 PIPE_UNLOCK(wpipe); 769 bcopy((caddr_t) wpipe->pipe_map.kva + pos, 770 wpipe->pipe_buffer.buffer, size); 771 pipe_destroy_write_buffer(wpipe); 772 PIPE_LOCK(wpipe); 773} 774 775/* 776 * This implements the pipe buffer write mechanism. Note that only 777 * a direct write OR a normal pipe write can be pending at any given time. 778 * If there are any characters in the pipe buffer, the direct write will 779 * be deferred until the receiving process grabs all of the bytes from 780 * the pipe buffer. Then the direct mapping write is set-up. 781 */ 782static int 783pipe_direct_write(wpipe, uio) 784 struct pipe *wpipe; 785 struct uio *uio; 786{ 787 int error; 788 789retry: 790 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 791 while (wpipe->pipe_state & PIPE_DIRECTW) { 792 if (wpipe->pipe_state & PIPE_WANTR) { 793 wpipe->pipe_state &= ~PIPE_WANTR; 794 wakeup(wpipe); 795 } 796 wpipe->pipe_state |= PIPE_WANTW; 797 error = msleep(wpipe, PIPE_MTX(wpipe), 798 PRIBIO | PCATCH, "pipdww", 0); 799 if (error) 800 goto error1; 801 if (wpipe->pipe_state & PIPE_EOF) { 802 error = EPIPE; 803 goto error1; 804 } 805 } 806 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 807 if (wpipe->pipe_buffer.cnt > 0) { 808 if (wpipe->pipe_state & PIPE_WANTR) { 809 wpipe->pipe_state &= ~PIPE_WANTR; 810 wakeup(wpipe); 811 } 812 813 wpipe->pipe_state |= PIPE_WANTW; 814 error = msleep(wpipe, PIPE_MTX(wpipe), 815 PRIBIO | PCATCH, "pipdwc", 0); 816 if (error) 817 goto error1; 818 if (wpipe->pipe_state & PIPE_EOF) { 819 error = EPIPE; 820 goto error1; 821 } 822 goto retry; 823 } 824 825 wpipe->pipe_state |= PIPE_DIRECTW; 826 827 pipelock(wpipe, 0); 828 PIPE_UNLOCK(wpipe); 829 error = pipe_build_write_buffer(wpipe, uio); 830 PIPE_LOCK(wpipe); 831 pipeunlock(wpipe); 832 if (error) { 833 wpipe->pipe_state &= ~PIPE_DIRECTW; 834 goto error1; 835 } 836 837 error = 0; 838 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 839 if (wpipe->pipe_state & PIPE_EOF) { 840 pipelock(wpipe, 0); 841 PIPE_UNLOCK(wpipe); 842 pipe_destroy_write_buffer(wpipe); 843 PIPE_LOCK(wpipe); 844 pipeselwakeup(wpipe); 845 pipeunlock(wpipe); 846 error = EPIPE; 847 goto error1; 848 } 849 if (wpipe->pipe_state & PIPE_WANTR) { 850 wpipe->pipe_state &= ~PIPE_WANTR; 851 wakeup(wpipe); 852 } 853 pipeselwakeup(wpipe); 854 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 855 "pipdwt", 0); 856 } 857 858 pipelock(wpipe,0); 859 if (wpipe->pipe_state & PIPE_DIRECTW) { 860 /* 861 * this bit of trickery substitutes a kernel buffer for 862 * the process that might be going away. 863 */ 864 pipe_clone_write_buffer(wpipe); 865 } else { 866 PIPE_UNLOCK(wpipe); 867 pipe_destroy_write_buffer(wpipe); 868 PIPE_LOCK(wpipe); 869 } 870 pipeunlock(wpipe); 871 return (error); 872 873error1: 874 wakeup(wpipe); 875 return (error); 876} 877#endif 878 879static int 880pipe_write(fp, uio, active_cred, flags, td) 881 struct file *fp; 882 struct uio *uio; 883 struct ucred *active_cred; 884 struct thread *td; 885 int flags; 886{ 887 int error = 0; 888 int orig_resid; 889 struct pipe *wpipe, *rpipe; 890 891 rpipe = fp->f_data; 892 wpipe = rpipe->pipe_peer; 893 894 PIPE_LOCK(rpipe); 895 /* 896 * detect loss of pipe read side, issue SIGPIPE if lost. 897 */ 898 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 899 PIPE_UNLOCK(rpipe); 900 return (EPIPE); 901 } 902#ifdef MAC 903 error = mac_check_pipe_write(active_cred, wpipe); 904 if (error) { 905 PIPE_UNLOCK(rpipe); 906 return (error); 907 } 908#endif 909 ++wpipe->pipe_busy; 910 911 /* 912 * If it is advantageous to resize the pipe buffer, do 913 * so. 914 */ 915 if ((uio->uio_resid > PIPE_SIZE) && 916 (amountpipekva < maxpipekva / 2) && 917 (nbigpipe < LIMITBIGPIPES) && 918 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 919 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 920 (wpipe->pipe_buffer.cnt == 0)) { 921 922 if ((error = pipelock(wpipe, 1)) == 0) { 923 PIPE_UNLOCK(wpipe); 924 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 925 atomic_add_int(&nbigpipe, 1); 926 PIPE_LOCK(wpipe); 927 pipeunlock(wpipe); 928 } 929 } 930 931 /* 932 * If an early error occured unbusy and return, waking up any pending 933 * readers. 934 */ 935 if (error) { 936 --wpipe->pipe_busy; 937 if ((wpipe->pipe_busy == 0) && 938 (wpipe->pipe_state & PIPE_WANT)) { 939 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 940 wakeup(wpipe); 941 } 942 PIPE_UNLOCK(rpipe); 943 return(error); 944 } 945 946 orig_resid = uio->uio_resid; 947 948 while (uio->uio_resid) { 949 int space; 950 951#ifndef PIPE_NODIRECT 952 /* 953 * If the transfer is large, we can gain performance if 954 * we do process-to-process copies directly. 955 * If the write is non-blocking, we don't use the 956 * direct write mechanism. 957 * 958 * The direct write mechanism will detect the reader going 959 * away on us. 960 */ 961 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 962 (fp->f_flag & FNONBLOCK) == 0 && 963 amountpipekvawired + uio->uio_resid < maxpipekvawired) { 964 error = pipe_direct_write(wpipe, uio); 965 if (error) 966 break; 967 continue; 968 } 969#endif 970 971 /* 972 * Pipe buffered writes cannot be coincidental with 973 * direct writes. We wait until the currently executing 974 * direct write is completed before we start filling the 975 * pipe buffer. We break out if a signal occurs or the 976 * reader goes away. 977 */ 978 retrywrite: 979 while (wpipe->pipe_state & PIPE_DIRECTW) { 980 if (wpipe->pipe_state & PIPE_WANTR) { 981 wpipe->pipe_state &= ~PIPE_WANTR; 982 wakeup(wpipe); 983 } 984 error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 985 "pipbww", 0); 986 if (wpipe->pipe_state & PIPE_EOF) 987 break; 988 if (error) 989 break; 990 } 991 if (wpipe->pipe_state & PIPE_EOF) { 992 error = EPIPE; 993 break; 994 } 995 996 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 997 998 /* Writes of size <= PIPE_BUF must be atomic. */ 999 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 1000 space = 0; 1001 1002 if (space > 0) { 1003 if ((error = pipelock(wpipe,1)) == 0) { 1004 int size; /* Transfer size */ 1005 int segsize; /* first segment to transfer */ 1006 1007 /* 1008 * It is possible for a direct write to 1009 * slip in on us... handle it here... 1010 */ 1011 if (wpipe->pipe_state & PIPE_DIRECTW) { 1012 pipeunlock(wpipe); 1013 goto retrywrite; 1014 } 1015 /* 1016 * If a process blocked in uiomove, our 1017 * value for space might be bad. 1018 * 1019 * XXX will we be ok if the reader has gone 1020 * away here? 1021 */ 1022 if (space > wpipe->pipe_buffer.size - 1023 wpipe->pipe_buffer.cnt) { 1024 pipeunlock(wpipe); 1025 goto retrywrite; 1026 } 1027 1028 /* 1029 * Transfer size is minimum of uio transfer 1030 * and free space in pipe buffer. 1031 */ 1032 if (space > uio->uio_resid) 1033 size = uio->uio_resid; 1034 else 1035 size = space; 1036 /* 1037 * First segment to transfer is minimum of 1038 * transfer size and contiguous space in 1039 * pipe buffer. If first segment to transfer 1040 * is less than the transfer size, we've got 1041 * a wraparound in the buffer. 1042 */ 1043 segsize = wpipe->pipe_buffer.size - 1044 wpipe->pipe_buffer.in; 1045 if (segsize > size) 1046 segsize = size; 1047 1048 /* Transfer first segment */ 1049 1050 PIPE_UNLOCK(rpipe); 1051 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1052 segsize, uio); 1053 PIPE_LOCK(rpipe); 1054 1055 if (error == 0 && segsize < size) { 1056 /* 1057 * Transfer remaining part now, to 1058 * support atomic writes. Wraparound 1059 * happened. 1060 */ 1061 if (wpipe->pipe_buffer.in + segsize != 1062 wpipe->pipe_buffer.size) 1063 panic("Expected pipe buffer " 1064 "wraparound disappeared"); 1065 1066 PIPE_UNLOCK(rpipe); 1067 error = uiomove( 1068 &wpipe->pipe_buffer.buffer[0], 1069 size - segsize, uio); 1070 PIPE_LOCK(rpipe); 1071 } 1072 if (error == 0) { 1073 wpipe->pipe_buffer.in += size; 1074 if (wpipe->pipe_buffer.in >= 1075 wpipe->pipe_buffer.size) { 1076 if (wpipe->pipe_buffer.in != 1077 size - segsize + 1078 wpipe->pipe_buffer.size) 1079 panic("Expected " 1080 "wraparound bad"); 1081 wpipe->pipe_buffer.in = size - 1082 segsize; 1083 } 1084 1085 wpipe->pipe_buffer.cnt += size; 1086 if (wpipe->pipe_buffer.cnt > 1087 wpipe->pipe_buffer.size) 1088 panic("Pipe buffer overflow"); 1089 1090 } 1091 pipeunlock(wpipe); 1092 } 1093 if (error) 1094 break; 1095 1096 } else { 1097 /* 1098 * If the "read-side" has been blocked, wake it up now. 1099 */ 1100 if (wpipe->pipe_state & PIPE_WANTR) { 1101 wpipe->pipe_state &= ~PIPE_WANTR; 1102 wakeup(wpipe); 1103 } 1104 1105 /* 1106 * don't block on non-blocking I/O 1107 */ 1108 if (fp->f_flag & FNONBLOCK) { 1109 error = EAGAIN; 1110 break; 1111 } 1112 1113 /* 1114 * We have no more space and have something to offer, 1115 * wake up select/poll. 1116 */ 1117 pipeselwakeup(wpipe); 1118 1119 wpipe->pipe_state |= PIPE_WANTW; 1120 error = msleep(wpipe, PIPE_MTX(rpipe), 1121 PRIBIO | PCATCH, "pipewr", 0); 1122 if (error != 0) 1123 break; 1124 /* 1125 * If read side wants to go away, we just issue a signal 1126 * to ourselves. 1127 */ 1128 if (wpipe->pipe_state & PIPE_EOF) { 1129 error = EPIPE; 1130 break; 1131 } 1132 } 1133 } 1134 1135 --wpipe->pipe_busy; 1136 1137 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1138 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1139 wakeup(wpipe); 1140 } else if (wpipe->pipe_buffer.cnt > 0) { 1141 /* 1142 * If we have put any characters in the buffer, we wake up 1143 * the reader. 1144 */ 1145 if (wpipe->pipe_state & PIPE_WANTR) { 1146 wpipe->pipe_state &= ~PIPE_WANTR; 1147 wakeup(wpipe); 1148 } 1149 } 1150 1151 /* 1152 * Don't return EPIPE if I/O was successful 1153 */ 1154 if ((wpipe->pipe_buffer.cnt == 0) && 1155 (uio->uio_resid == 0) && 1156 (error == EPIPE)) { 1157 error = 0; 1158 } 1159 1160 if (error == 0) 1161 vfs_timestamp(&wpipe->pipe_mtime); 1162 1163 /* 1164 * We have something to offer, 1165 * wake up select/poll. 1166 */ 1167 if (wpipe->pipe_buffer.cnt) 1168 pipeselwakeup(wpipe); 1169 1170 PIPE_UNLOCK(rpipe); 1171 return (error); 1172} 1173 1174/* 1175 * we implement a very minimal set of ioctls for compatibility with sockets. 1176 */ 1177static int 1178pipe_ioctl(fp, cmd, data, active_cred, td) 1179 struct file *fp; 1180 u_long cmd; 1181 void *data; 1182 struct ucred *active_cred; 1183 struct thread *td; 1184{ 1185 struct pipe *mpipe = fp->f_data; 1186#ifdef MAC 1187 int error; 1188#endif 1189 1190 PIPE_LOCK(mpipe); 1191 1192#ifdef MAC 1193 error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data); 1194 if (error) { 1195 PIPE_UNLOCK(mpipe); 1196 return (error); 1197 } 1198#endif 1199 1200 switch (cmd) { 1201 1202 case FIONBIO: 1203 PIPE_UNLOCK(mpipe); 1204 return (0); 1205 1206 case FIOASYNC: 1207 if (*(int *)data) { 1208 mpipe->pipe_state |= PIPE_ASYNC; 1209 } else { 1210 mpipe->pipe_state &= ~PIPE_ASYNC; 1211 } 1212 PIPE_UNLOCK(mpipe); 1213 return (0); 1214 1215 case FIONREAD: 1216 if (mpipe->pipe_state & PIPE_DIRECTW) 1217 *(int *)data = mpipe->pipe_map.cnt; 1218 else 1219 *(int *)data = mpipe->pipe_buffer.cnt; 1220 PIPE_UNLOCK(mpipe); 1221 return (0); 1222 1223 case FIOSETOWN: 1224 PIPE_UNLOCK(mpipe); 1225 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1226 1227 case FIOGETOWN: 1228 PIPE_UNLOCK(mpipe); 1229 *(int *)data = fgetown(&mpipe->pipe_sigio); 1230 return (0); 1231 1232 /* This is deprecated, FIOSETOWN should be used instead. */ 1233 case TIOCSPGRP: 1234 PIPE_UNLOCK(mpipe); 1235 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1236 1237 /* This is deprecated, FIOGETOWN should be used instead. */ 1238 case TIOCGPGRP: 1239 PIPE_UNLOCK(mpipe); 1240 *(int *)data = -fgetown(&mpipe->pipe_sigio); 1241 return (0); 1242 1243 } 1244 PIPE_UNLOCK(mpipe); 1245 return (ENOTTY); 1246} 1247 1248static int 1249pipe_poll(fp, events, active_cred, td) 1250 struct file *fp; 1251 int events; 1252 struct ucred *active_cred; 1253 struct thread *td; 1254{ 1255 struct pipe *rpipe = fp->f_data; 1256 struct pipe *wpipe; 1257 int revents = 0; 1258#ifdef MAC 1259 int error; 1260#endif 1261 1262 wpipe = rpipe->pipe_peer; 1263 PIPE_LOCK(rpipe); 1264#ifdef MAC 1265 error = mac_check_pipe_poll(active_cred, rpipe); 1266 if (error) 1267 goto locked_error; 1268#endif 1269 if (events & (POLLIN | POLLRDNORM)) 1270 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1271 (rpipe->pipe_buffer.cnt > 0) || 1272 (rpipe->pipe_state & PIPE_EOF)) 1273 revents |= events & (POLLIN | POLLRDNORM); 1274 1275 if (events & (POLLOUT | POLLWRNORM)) 1276 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 1277 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1278 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1279 revents |= events & (POLLOUT | POLLWRNORM); 1280 1281 if ((rpipe->pipe_state & PIPE_EOF) || 1282 (wpipe == NULL) || 1283 (wpipe->pipe_state & PIPE_EOF)) 1284 revents |= POLLHUP; 1285 1286 if (revents == 0) { 1287 if (events & (POLLIN | POLLRDNORM)) { 1288 selrecord(td, &rpipe->pipe_sel); 1289 rpipe->pipe_state |= PIPE_SEL; 1290 } 1291 1292 if (events & (POLLOUT | POLLWRNORM)) { 1293 selrecord(td, &wpipe->pipe_sel); 1294 wpipe->pipe_state |= PIPE_SEL; 1295 } 1296 } 1297#ifdef MAC 1298locked_error: 1299#endif 1300 PIPE_UNLOCK(rpipe); 1301 1302 return (revents); 1303} 1304 1305/* 1306 * We shouldn't need locks here as we're doing a read and this should 1307 * be a natural race. 1308 */ 1309static int 1310pipe_stat(fp, ub, active_cred, td) 1311 struct file *fp; 1312 struct stat *ub; 1313 struct ucred *active_cred; 1314 struct thread *td; 1315{ 1316 struct pipe *pipe = fp->f_data; 1317#ifdef MAC 1318 int error; 1319 1320 PIPE_LOCK(pipe); 1321 error = mac_check_pipe_stat(active_cred, pipe); 1322 PIPE_UNLOCK(pipe); 1323 if (error) 1324 return (error); 1325#endif 1326 bzero(ub, sizeof(*ub)); 1327 ub->st_mode = S_IFIFO; 1328 ub->st_blksize = pipe->pipe_buffer.size; 1329 ub->st_size = pipe->pipe_buffer.cnt; 1330 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1331 ub->st_atimespec = pipe->pipe_atime; 1332 ub->st_mtimespec = pipe->pipe_mtime; 1333 ub->st_ctimespec = pipe->pipe_ctime; 1334 ub->st_uid = fp->f_cred->cr_uid; 1335 ub->st_gid = fp->f_cred->cr_gid; 1336 /* 1337 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1338 * XXX (st_dev, st_ino) should be unique. 1339 */ 1340 return (0); 1341} 1342 1343/* ARGSUSED */ 1344static int 1345pipe_close(fp, td) 1346 struct file *fp; 1347 struct thread *td; 1348{ 1349 struct pipe *cpipe = fp->f_data; 1350 1351 fp->f_ops = &badfileops; 1352 fp->f_data = NULL; 1353 funsetown(&cpipe->pipe_sigio); 1354 pipeclose(cpipe); 1355 return (0); 1356} 1357 1358static void 1359pipe_free_kmem(cpipe) 1360 struct pipe *cpipe; 1361{ 1362 1363 KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 1364 ("pipespace: pipe mutex locked")); 1365 1366 if (cpipe->pipe_buffer.buffer != NULL) { 1367 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1368 atomic_subtract_int(&nbigpipe, 1); 1369 atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size); 1370 atomic_subtract_int(&amountpipes, 1); 1371 vm_map_remove(pipe_map, 1372 (vm_offset_t)cpipe->pipe_buffer.buffer, 1373 (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 1374 cpipe->pipe_buffer.buffer = NULL; 1375 } 1376#ifndef PIPE_NODIRECT 1377 if (cpipe->pipe_map.kva != 0) { 1378 atomic_subtract_int(&amountpipekvawired, 1379 cpipe->pipe_buffer.size + PAGE_SIZE); 1380 kmem_free(kernel_map, 1381 cpipe->pipe_map.kva, 1382 cpipe->pipe_buffer.size + PAGE_SIZE); 1383 cpipe->pipe_map.cnt = 0; 1384 cpipe->pipe_map.kva = 0; 1385 cpipe->pipe_map.pos = 0; 1386 cpipe->pipe_map.npages = 0; 1387 } 1388#endif 1389} 1390 1391/* 1392 * shutdown the pipe 1393 */ 1394static void 1395pipeclose(cpipe) 1396 struct pipe *cpipe; 1397{ 1398 struct pipe *ppipe; 1399 int hadpeer; 1400 1401 if (cpipe == NULL) 1402 return; 1403 1404 hadpeer = 0; 1405 1406 /* partially created pipes won't have a valid mutex. */ 1407 if (PIPE_MTX(cpipe) != NULL) 1408 PIPE_LOCK(cpipe); 1409 1410 pipeselwakeup(cpipe); 1411 1412 /* 1413 * If the other side is blocked, wake it up saying that 1414 * we want to close it down. 1415 */ 1416 while (cpipe->pipe_busy) { 1417 wakeup(cpipe); 1418 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 1419 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1420 } 1421 1422#ifdef MAC 1423 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) 1424 mac_destroy_pipe(cpipe); 1425#endif 1426 1427 /* 1428 * Disconnect from peer 1429 */ 1430 if ((ppipe = cpipe->pipe_peer) != NULL) { 1431 hadpeer++; 1432 pipeselwakeup(ppipe); 1433 1434 ppipe->pipe_state |= PIPE_EOF; 1435 wakeup(ppipe); 1436 KNOTE(&ppipe->pipe_sel.si_note, 0); 1437 ppipe->pipe_peer = NULL; 1438 } 1439 /* 1440 * free resources 1441 */ 1442 if (PIPE_MTX(cpipe) != NULL) { 1443 PIPE_UNLOCK(cpipe); 1444 if (!hadpeer) { 1445 mtx_destroy(PIPE_MTX(cpipe)); 1446 free(PIPE_MTX(cpipe), M_TEMP); 1447 } 1448 } 1449 pipe_free_kmem(cpipe); 1450 uma_zfree(pipe_zone, cpipe); 1451} 1452 1453/*ARGSUSED*/ 1454static int 1455pipe_kqfilter(struct file *fp, struct knote *kn) 1456{ 1457 struct pipe *cpipe; 1458 1459 cpipe = kn->kn_fp->f_data; 1460 switch (kn->kn_filter) { 1461 case EVFILT_READ: 1462 kn->kn_fop = &pipe_rfiltops; 1463 break; 1464 case EVFILT_WRITE: 1465 kn->kn_fop = &pipe_wfiltops; 1466 cpipe = cpipe->pipe_peer; 1467 if (cpipe == NULL) 1468 /* other end of pipe has been closed */ 1469 return (EPIPE); 1470 break; 1471 default: 1472 return (1); 1473 } 1474 1475 PIPE_LOCK(cpipe); 1476 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1477 PIPE_UNLOCK(cpipe); 1478 return (0); 1479} 1480 1481static void 1482filt_pipedetach(struct knote *kn) 1483{ 1484 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1485 1486 if (kn->kn_filter == EVFILT_WRITE) { 1487 if (cpipe->pipe_peer == NULL) 1488 return; 1489 cpipe = cpipe->pipe_peer; 1490 } 1491 1492 PIPE_LOCK(cpipe); 1493 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1494 PIPE_UNLOCK(cpipe); 1495} 1496 1497/*ARGSUSED*/ 1498static int 1499filt_piperead(struct knote *kn, long hint) 1500{ 1501 struct pipe *rpipe = kn->kn_fp->f_data; 1502 struct pipe *wpipe = rpipe->pipe_peer; 1503 1504 PIPE_LOCK(rpipe); 1505 kn->kn_data = rpipe->pipe_buffer.cnt; 1506 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1507 kn->kn_data = rpipe->pipe_map.cnt; 1508 1509 if ((rpipe->pipe_state & PIPE_EOF) || 1510 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1511 kn->kn_flags |= EV_EOF; 1512 PIPE_UNLOCK(rpipe); 1513 return (1); 1514 } 1515 PIPE_UNLOCK(rpipe); 1516 return (kn->kn_data > 0); 1517} 1518 1519/*ARGSUSED*/ 1520static int 1521filt_pipewrite(struct knote *kn, long hint) 1522{ 1523 struct pipe *rpipe = kn->kn_fp->f_data; 1524 struct pipe *wpipe = rpipe->pipe_peer; 1525 1526 PIPE_LOCK(rpipe); 1527 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1528 kn->kn_data = 0; 1529 kn->kn_flags |= EV_EOF; 1530 PIPE_UNLOCK(rpipe); 1531 return (1); 1532 } 1533 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1534 if (wpipe->pipe_state & PIPE_DIRECTW) 1535 kn->kn_data = 0; 1536 1537 PIPE_UNLOCK(rpipe); 1538 return (kn->kn_data >= PIPE_BUF); 1539} 1540