1/* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 */ 19/* 20 * Copyright (c) 2003-2007 Apple Inc. All rights reserved. 21 * 22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 23 * 24 * This file contains Original Code and/or Modifications of Original Code 25 * as defined in and that are subject to the Apple Public Source License 26 * Version 2.0 (the 'License'). You may not use this file except in 27 * compliance with the License. The rights granted to you under the License 28 * may not be used to create, or enable the creation or redistribution of, 29 * unlawful or unlicensed copies of an Apple operating system, or to 30 * circumvent, violate, or enable the circumvention or violation of, any 31 * terms of an Apple operating system software license agreement. 32 * 33 * Please obtain a copy of the License at 34 * http://www.opensource.apple.com/apsl/ and read it before using this file. 35 * 36 * The Original Code and all software distributed under the License are 37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 41 * Please see the License for the specific language governing rights and 42 * limitations under the License. 43 * 44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 45 */ 46/* 47 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 48 * support for mandatory and extensible security protections. This notice 49 * is included in support of clause 2.2 (b) of the Apple Public License, 50 * Version 2.0. 51 */ 52 53/* 54 * This file contains a high-performance replacement for the socket-based 55 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 56 * all features of sockets, but does do everything that pipes normally 57 * do. 58 * 59 * Pipes are implemented as circular buffers. Following are the valid states in pipes operations 60 * 61 * _________________________________ 62 * 1. |_________________________________| r=w, c=0 63 * 64 * _________________________________ 65 * 2. |__r:::::wc_______________________| r <= w , c > 0 66 * 67 * _________________________________ 68 * 3. |::::wc_____r:::::::::::::::::::::| r>w , c > 0 69 * 70 * _________________________________ 71 * 4. |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size 72 * 73 * 74 * Nomenclature:- 75 * a-z define the steps in a program flow 76 * 1-4 are the states as defined aboe 77 * Action: is what file operation is done on the pipe 78 * 79 * Current:None Action: initialize with size M=200 80 * a. State 1 ( r=0, w=0, c=0) 81 * 82 * Current: a Action: write(100) (w < M) 83 * b. State 2 (r=0, w=100, c=100) 84 * 85 * Current: b Action: write(100) (w = M-w) 86 * c. State 4 (r=0,w=0,c=200) 87 * 88 * Current: b Action: read(70) ( r < c ) 89 * d. State 2(r=70,w=100,c=30) 90 * 91 * Current: d Action: write(75) ( w < (m-w)) 92 * e. State 2 (r=70,w=175,c=105) 93 * 94 * Current: d Action: write(110) ( w > (m-w)) 95 * f. State 3 (r=70,w=10,c=140) 96 * 97 * Current: d Action: read(30) (r >= c ) 98 * g. State 1 (r=100,w=100,c=0) 99 * 100 */ 101 102/* 103 * This code create half duplex pipe buffers for facilitating file like 104 * operations on pipes. The initial buffer is very small, but this can 105 * dynamically change to larger sizes based on usage. The buffer size is never 106 * reduced. The total amount of kernel memory used is governed by maxpipekva. 107 * In case of dynamic expansion limit is reached, the output thread is blocked 108 * until the pipe buffer empties enough to continue. 109 * 110 * In order to limit the resource use of pipes, two sysctls exist: 111 * 112 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 113 * address space available to us in pipe_map. 114 * 115 * Memory usage may be monitored through the sysctls 116 * kern.ipc.pipes, kern.ipc.pipekva. 117 * 118 */ 119 120#include <sys/param.h> 121#include <sys/systm.h> 122#include <sys/filedesc.h> 123#include <sys/kernel.h> 124#include <sys/vnode.h> 125#include <sys/proc_internal.h> 126#include <sys/kauth.h> 127#include <sys/file_internal.h> 128#include <sys/stat.h> 129#include <sys/ioctl.h> 130#include <sys/fcntl.h> 131#include <sys/malloc.h> 132#include <sys/syslog.h> 133#include <sys/unistd.h> 134#include <sys/resourcevar.h> 135#include <sys/aio_kern.h> 136#include <sys/signalvar.h> 137#include <sys/pipe.h> 138#include <sys/sysproto.h> 139#include <sys/proc_info.h> 140 141#include <security/audit/audit.h> 142 143#include <sys/kdebug.h> 144 145#include <kern/zalloc.h> 146#include <kern/kalloc.h> 147#include <vm/vm_kern.h> 148#include <libkern/OSAtomic.h> 149 150#define f_flag f_fglob->fg_flag 151#define f_type f_fglob->fg_type 152#define f_msgcount f_fglob->fg_msgcount 153#define f_cred f_fglob->fg_cred 154#define f_ops f_fglob->fg_ops 155#define f_offset f_fglob->fg_offset 156#define f_data f_fglob->fg_data 157 158/* 159 * interfaces to the outside world exported through file operations 160 */ 161static int pipe_read(struct fileproc *fp, struct uio *uio, 162 int flags, vfs_context_t ctx); 163static int pipe_write(struct fileproc *fp, struct uio *uio, 164 int flags, vfs_context_t ctx); 165static int pipe_close(struct fileglob *fg, vfs_context_t ctx); 166static int pipe_select(struct fileproc *fp, int which, void * wql, 167 vfs_context_t ctx); 168static int pipe_kqfilter(struct fileproc *fp, struct knote *kn, 169 vfs_context_t ctx); 170static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, 171 vfs_context_t ctx); 172static int pipe_drain(struct fileproc *fp,vfs_context_t ctx); 173 174struct fileops pipeops = 175 { pipe_read, 176 pipe_write, 177 pipe_ioctl, 178 pipe_select, 179 pipe_close, 180 pipe_kqfilter, 181 pipe_drain }; 182 183static void filt_pipedetach(struct knote *kn); 184static int filt_piperead(struct knote *kn, long hint); 185static int filt_pipewrite(struct knote *kn, long hint); 186 187static struct filterops pipe_rfiltops = { 188 .f_isfd = 1, 189 .f_detach = filt_pipedetach, 190 .f_event = filt_piperead, 191}; 192 193static struct filterops pipe_wfiltops = { 194 .f_isfd = 1, 195 .f_detach = filt_pipedetach, 196 .f_event = filt_pipewrite, 197}; 198 199static int nbigpipe; /* for compatibility sake. no longer used */ 200static int amountpipes; /* total number of pipes in system */ 201static int amountpipekva; /* total memory used by pipes */ 202 203int maxpipekva = PIPE_KVAMAX; /* allowing 16MB max. */ 204 205#if PIPE_SYSCTLS 206SYSCTL_DECL(_kern_ipc); 207 208SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED, 209 &maxpipekva, 0, "Pipe KVA limit"); 210SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED, 211 &maxpipekvawired, 0, "Pipe KVA wired limit"); 212SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED, 213 &amountpipes, 0, "Current # of pipes"); 214SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED, 215 &nbigpipe, 0, "Current # of big pipes"); 216SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED, 217 &amountpipekva, 0, "Pipe KVA usage"); 218SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED, 219 &amountpipekvawired, 0, "Pipe wired KVA usage"); 220#endif 221 222static void pipeclose(struct pipe *cpipe); 223static void pipe_free_kmem(struct pipe *cpipe); 224static int pipe_create(struct pipe **cpipep); 225static int pipespace(struct pipe *cpipe, int size); 226static int choose_pipespace(unsigned long current, unsigned long expected); 227static int expand_pipespace(struct pipe *p, int target_size); 228static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe); 229static __inline int pipeio_lock(struct pipe *cpipe, int catch); 230static __inline void pipeio_unlock(struct pipe *cpipe); 231 232extern int postpipeevent(struct pipe *, int); 233extern void evpipefree(struct pipe *cpipe); 234 235static lck_grp_t *pipe_mtx_grp; 236static lck_attr_t *pipe_mtx_attr; 237static lck_grp_attr_t *pipe_mtx_grp_attr; 238 239static zone_t pipe_zone; 240 241#define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) ) 242 243#define PIPE_GARBAGE_AGE_LIMIT 5000 /* In milliseconds */ 244#define PIPE_GARBAGE_QUEUE_LIMIT 32000 245 246struct pipe_garbage { 247 struct pipe *pg_pipe; 248 struct pipe_garbage *pg_next; 249 uint64_t pg_timestamp; 250}; 251 252static zone_t pipe_garbage_zone; 253static struct pipe_garbage *pipe_garbage_head = NULL; 254static struct pipe_garbage *pipe_garbage_tail = NULL; 255static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT; 256static int pipe_garbage_count = 0; 257static lck_mtx_t *pipe_garbage_lock; 258static void pipe_garbage_collect(struct pipe *cpipe); 259 260SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 261 262/* initial setup done at time of sysinit */ 263void 264pipeinit(void) 265{ 266 nbigpipe=0; 267 vm_size_t zone_size; 268 269 zone_size = 8192 * sizeof(struct pipe); 270 pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone"); 271 272 273 /* allocate lock group attribute and group for pipe mutexes */ 274 pipe_mtx_grp_attr = lck_grp_attr_alloc_init(); 275 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr); 276 277 /* allocate the lock attribute for pipe mutexes */ 278 pipe_mtx_attr = lck_attr_alloc_init(); 279 280 /* 281 * Set up garbage collection for dead pipes 282 */ 283 zone_size = (PIPE_GARBAGE_QUEUE_LIMIT + 20) * 284 sizeof(struct pipe_garbage); 285 pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage), 286 zone_size, 4096, "pipe garbage zone"); 287 pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr); 288 289} 290 291/* Bitmap for things to touch in pipe_touch() */ 292#define PIPE_ATIME 0x00000001 /* time of last access */ 293#define PIPE_MTIME 0x00000002 /* time of last modification */ 294#define PIPE_CTIME 0x00000004 /* time of last status change */ 295 296static void 297pipe_touch(struct pipe *tpipe, int touch) 298{ 299 struct timeval now; 300 301 microtime(&now); 302 303 if (touch & PIPE_ATIME) { 304 tpipe->st_atimespec.tv_sec = now.tv_sec; 305 tpipe->st_atimespec.tv_nsec = now.tv_usec * 1000; 306 } 307 308 if (touch & PIPE_MTIME) { 309 tpipe->st_mtimespec.tv_sec = now.tv_sec; 310 tpipe->st_mtimespec.tv_nsec = now.tv_usec * 1000; 311 } 312 313 if (touch & PIPE_CTIME) { 314 tpipe->st_ctimespec.tv_sec = now.tv_sec; 315 tpipe->st_ctimespec.tv_nsec = now.tv_usec * 1000; 316 } 317} 318 319static const unsigned int pipesize_blocks[] = {128,256,1024,2048,PAGE_SIZE, PAGE_SIZE * 2, PIPE_SIZE , PIPE_SIZE * 4 }; 320 321/* 322 * finds the right size from possible sizes in pipesize_blocks 323 * returns the size which matches max(current,expected) 324 */ 325static int 326choose_pipespace(unsigned long current, unsigned long expected) 327{ 328 int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1; 329 unsigned long target; 330 331 if (expected > current) 332 target = expected; 333 else 334 target = current; 335 336 while ( i >0 && pipesize_blocks[i-1] > target) { 337 i=i-1; 338 339 } 340 341 return pipesize_blocks[i]; 342} 343 344 345/* 346 * expand the size of pipe while there is data to be read, 347 * and then free the old buffer once the current buffered 348 * data has been transferred to new storage. 349 * Required: PIPE_LOCK and io lock to be held by caller. 350 * returns 0 on success or no expansion possible 351 */ 352static int 353expand_pipespace(struct pipe *p, int target_size) 354{ 355 struct pipe tmp, oldpipe; 356 int error; 357 tmp.pipe_buffer.buffer = 0; 358 359 if (p->pipe_buffer.size >= (unsigned) target_size) { 360 return 0; /* the existing buffer is max size possible */ 361 } 362 363 /* create enough space in the target */ 364 error = pipespace(&tmp, target_size); 365 if (error != 0) 366 return (error); 367 368 oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer; 369 oldpipe.pipe_buffer.size = p->pipe_buffer.size; 370 371 memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size); 372 if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out ){ 373 /* we are in State 3 and need extra copying for read to be consistent */ 374 memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size); 375 p->pipe_buffer.in += p->pipe_buffer.size; 376 } 377 378 p->pipe_buffer.buffer = tmp.pipe_buffer.buffer; 379 p->pipe_buffer.size = tmp.pipe_buffer.size; 380 381 382 pipe_free_kmem(&oldpipe); 383 return 0; 384} 385 386/* 387 * The pipe system call for the DTYPE_PIPE type of pipes 388 * 389 * returns: 390 * FREAD | fd0 | -->[struct rpipe] --> |~~buffer~~| \ 391 * (pipe_mutex) 392 * FWRITE | fd1 | -->[struct wpipe] --X / 393 */ 394 395/* ARGSUSED */ 396int 397pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval) 398{ 399 struct fileproc *rf, *wf; 400 struct pipe *rpipe, *wpipe; 401 lck_mtx_t *pmtx; 402 int fd, error; 403 404 if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL) 405 return (ENOMEM); 406 407 rpipe = wpipe = NULL; 408 if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 409 error = ENFILE; 410 goto freepipes; 411 } 412 /* 413 * allocate the space for the normal I/O direction up 414 * front... we'll delay the allocation for the other 415 * direction until a write actually occurs (most likely it won't)... 416 */ 417 error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0)); 418 if (error) 419 goto freepipes; 420 421 TAILQ_INIT(&rpipe->pipe_evlist); 422 TAILQ_INIT(&wpipe->pipe_evlist); 423 424 error = falloc(p, &rf, &fd, vfs_context_current()); 425 if (error) { 426 goto freepipes; 427 } 428 retval[0] = fd; 429 430 /* 431 * for now we'll create half-duplex pipes(refer returns section above). 432 * this is what we've always supported.. 433 */ 434 rf->f_flag = FREAD; 435 rf->f_type = DTYPE_PIPE; 436 rf->f_data = (caddr_t)rpipe; 437 rf->f_ops = &pipeops; 438 439 error = falloc(p, &wf, &fd, vfs_context_current()); 440 if (error) { 441 fp_free(p, retval[0], rf); 442 goto freepipes; 443 } 444 wf->f_flag = FWRITE; 445 wf->f_type = DTYPE_PIPE; 446 wf->f_data = (caddr_t)wpipe; 447 wf->f_ops = &pipeops; 448 449 rpipe->pipe_peer = wpipe; 450 wpipe->pipe_peer = rpipe; 451 /* both structures share the same mutex */ 452 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 453 454 retval[1] = fd; 455#if CONFIG_MACF 456 /* 457 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX 458 * 459 * struct pipe represents a pipe endpoint. The MAC label is shared 460 * between the connected endpoints. As a result mac_pipe_label_init() and 461 * mac_pipe_label_associate() should only be called on one of the endpoints 462 * after they have been connected. 463 */ 464 mac_pipe_label_init(rpipe); 465 mac_pipe_label_associate(kauth_cred_get(), rpipe); 466 wpipe->pipe_label = rpipe->pipe_label; 467#endif 468 proc_fdlock_spin(p); 469 procfdtbl_releasefd(p, retval[0], NULL); 470 procfdtbl_releasefd(p, retval[1], NULL); 471 fp_drop(p, retval[0], rf, 1); 472 fp_drop(p, retval[1], wf, 1); 473 proc_fdunlock(p); 474 475 476 return (0); 477 478freepipes: 479 pipeclose(rpipe); 480 pipeclose(wpipe); 481 lck_mtx_free(pmtx, pipe_mtx_grp); 482 483 return (error); 484} 485 486int 487pipe_stat(struct pipe *cpipe, void *ub, int isstat64) 488{ 489#if CONFIG_MACF 490 int error; 491#endif 492 int pipe_size = 0; 493 int pipe_count; 494 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */ 495 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */ 496 497 if (cpipe == NULL) 498 return (EBADF); 499 PIPE_LOCK(cpipe); 500 501#if CONFIG_MACF 502 error = mac_pipe_check_stat(kauth_cred_get(), cpipe); 503 if (error) { 504 PIPE_UNLOCK(cpipe); 505 return (error); 506 } 507#endif 508 if (cpipe->pipe_buffer.buffer == 0) { 509 /* must be stat'ing the write fd */ 510 if (cpipe->pipe_peer) { 511 /* the peer still exists, use it's info */ 512 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer); 513 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt; 514 } else { 515 pipe_count = 0; 516 } 517 } else { 518 pipe_size = MAX_PIPESIZE(cpipe); 519 pipe_count = cpipe->pipe_buffer.cnt; 520 } 521 /* 522 * since peer's buffer is setup ouside of lock 523 * we might catch it in transient state 524 */ 525 if (pipe_size == 0) 526 pipe_size = MAX(PIPE_SIZE, pipesize_blocks[0]); 527 528 if (isstat64 != 0) { 529 sb64 = (struct stat64 *)ub; 530 531 bzero(sb64, sizeof(*sb64)); 532 sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; 533 sb64->st_blksize = pipe_size; 534 sb64->st_size = pipe_count; 535 sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize; 536 537 sb64->st_uid = kauth_getuid(); 538 sb64->st_gid = kauth_getgid(); 539 540 sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec; 541 sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec; 542 543 sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec; 544 sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec; 545 546 sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec; 547 sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec; 548 549 /* 550 * Return a relatively unique inode number based on the current 551 * address of this pipe's struct pipe. This number may be recycled 552 * relatively quickly. 553 */ 554 sb64->st_ino = (ino64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe); 555 } else { 556 sb = (struct stat *)ub; 557 558 bzero(sb, sizeof(*sb)); 559 sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; 560 sb->st_blksize = pipe_size; 561 sb->st_size = pipe_count; 562 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize; 563 564 sb->st_uid = kauth_getuid(); 565 sb->st_gid = kauth_getgid(); 566 567 sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec; 568 sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec; 569 570 sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec; 571 sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec; 572 573 sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec; 574 sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec; 575 576 /* 577 * Return a relatively unique inode number based on the current 578 * address of this pipe's struct pipe. This number may be recycled 579 * relatively quickly. 580 */ 581 sb->st_ino = (ino_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe); 582 } 583 PIPE_UNLOCK(cpipe); 584 585 /* 586 * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen, 587 * st_uid, st_gid. 588 * 589 * XXX (st_dev) should be unique, but there is no device driver that 590 * XXX is associated with pipes, since they are implemented via a 591 * XXX struct fileops indirection rather than as FS objects. 592 */ 593 return (0); 594} 595 596 597/* 598 * Allocate kva for pipe circular buffer, the space is pageable 599 * This routine will 'realloc' the size of a pipe safely, if it fails 600 * it will retain the old buffer. 601 * If it fails it will return ENOMEM. 602 */ 603static int 604pipespace(struct pipe *cpipe, int size) 605{ 606 vm_offset_t buffer; 607 608 if (size <= 0) 609 return(EINVAL); 610 611 if ((buffer = (vm_offset_t)kalloc(size)) == 0 ) 612 return(ENOMEM); 613 614 /* free old resources if we're resizing */ 615 pipe_free_kmem(cpipe); 616 cpipe->pipe_buffer.buffer = (caddr_t)buffer; 617 cpipe->pipe_buffer.size = size; 618 cpipe->pipe_buffer.in = 0; 619 cpipe->pipe_buffer.out = 0; 620 cpipe->pipe_buffer.cnt = 0; 621 622 OSAddAtomic(1, &amountpipes); 623 OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva); 624 625 return (0); 626} 627 628/* 629 * initialize and allocate VM and memory for pipe 630 */ 631static int 632pipe_create(struct pipe **cpipep) 633{ 634 struct pipe *cpipe; 635 cpipe = (struct pipe *)zalloc(pipe_zone); 636 637 if ((*cpipep = cpipe) == NULL) 638 return (ENOMEM); 639 640 /* 641 * protect so pipespace or pipeclose don't follow a junk pointer 642 * if pipespace() fails. 643 */ 644 bzero(cpipe, sizeof *cpipe); 645 646 /* Initial times are all the time of creation of the pipe */ 647 pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME); 648 return (0); 649} 650 651 652/* 653 * lock a pipe for I/O, blocking other access 654 */ 655static inline int 656pipeio_lock(struct pipe *cpipe, int catch) 657{ 658 int error; 659 while (cpipe->pipe_state & PIPE_LOCKFL) { 660 cpipe->pipe_state |= PIPE_LWANT; 661 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO, 662 "pipelk", 0); 663 if (error != 0) 664 return (error); 665 } 666 cpipe->pipe_state |= PIPE_LOCKFL; 667 return (0); 668} 669 670/* 671 * unlock a pipe I/O lock 672 */ 673static inline void 674pipeio_unlock(struct pipe *cpipe) 675{ 676 cpipe->pipe_state &= ~PIPE_LOCKFL; 677 if (cpipe->pipe_state & PIPE_LWANT) { 678 cpipe->pipe_state &= ~PIPE_LWANT; 679 wakeup(cpipe); 680 } 681} 682 683/* 684 * wakeup anyone whos blocked in select 685 */ 686static void 687pipeselwakeup(struct pipe *cpipe, struct pipe *spipe) 688{ 689 if (cpipe->pipe_state & PIPE_SEL) { 690 cpipe->pipe_state &= ~PIPE_SEL; 691 selwakeup(&cpipe->pipe_sel); 692 } 693 if (cpipe->pipe_state & PIPE_KNOTE) 694 KNOTE(&cpipe->pipe_sel.si_note, 1); 695 696 postpipeevent(cpipe, EV_RWBYTES); 697 698 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) { 699 if (spipe->pipe_pgid < 0) 700 gsignal(-spipe->pipe_pgid, SIGIO); 701 else 702 proc_signal(spipe->pipe_pgid, SIGIO); 703 } 704} 705 706/* 707 * Read n bytes from the buffer. Semantics are similar to file read. 708 * returns: number of bytes read from the buffer 709 */ 710/* ARGSUSED */ 711static int 712pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags, 713 __unused vfs_context_t ctx) 714{ 715 struct pipe *rpipe = (struct pipe *)fp->f_data; 716 int error; 717 int nread = 0; 718 u_int size; 719 720 PIPE_LOCK(rpipe); 721 ++rpipe->pipe_busy; 722 723 error = pipeio_lock(rpipe, 1); 724 if (error) 725 goto unlocked_error; 726 727#if CONFIG_MACF 728 error = mac_pipe_check_read(kauth_cred_get(), rpipe); 729 if (error) 730 goto locked_error; 731#endif 732 733 734 while (uio_resid(uio)) { 735 /* 736 * normal pipe buffer receive 737 */ 738 if (rpipe->pipe_buffer.cnt > 0) { 739 /* 740 * # bytes to read is min( bytes from read pointer until end of buffer, 741 * total unread bytes, 742 * user requested byte count) 743 */ 744 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 745 if (size > rpipe->pipe_buffer.cnt) 746 size = rpipe->pipe_buffer.cnt; 747 // LP64todo - fix this! 748 if (size > (u_int) uio_resid(uio)) 749 size = (u_int) uio_resid(uio); 750 751 PIPE_UNLOCK(rpipe); /* we still hold io lock.*/ 752 error = uiomove( 753 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 754 size, uio); 755 PIPE_LOCK(rpipe); 756 if (error) 757 break; 758 759 rpipe->pipe_buffer.out += size; 760 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 761 rpipe->pipe_buffer.out = 0; 762 763 rpipe->pipe_buffer.cnt -= size; 764 765 /* 766 * If there is no more to read in the pipe, reset 767 * its pointers to the beginning. This improves 768 * cache hit stats. 769 */ 770 if (rpipe->pipe_buffer.cnt == 0) { 771 rpipe->pipe_buffer.in = 0; 772 rpipe->pipe_buffer.out = 0; 773 } 774 nread += size; 775 } else { 776 /* 777 * detect EOF condition 778 * read returns 0 on EOF, no need to set error 779 */ 780 if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { 781 break; 782 } 783 784 /* 785 * If the "write-side" has been blocked, wake it up now. 786 */ 787 if (rpipe->pipe_state & PIPE_WANTW) { 788 rpipe->pipe_state &= ~PIPE_WANTW; 789 wakeup(rpipe); 790 } 791 792 /* 793 * Break if some data was read in previous iteration. 794 */ 795 if (nread > 0) 796 break; 797 798 /* 799 * Unlock the pipe buffer for our remaining processing. 800 * We will either break out with an error or we will 801 * sleep and relock to loop. 802 */ 803 pipeio_unlock(rpipe); 804 805 /* 806 * Handle non-blocking mode operation or 807 * wait for more data. 808 */ 809 if (fp->f_flag & FNONBLOCK) { 810 error = EAGAIN; 811 } else { 812 rpipe->pipe_state |= PIPE_WANTR; 813 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0); 814 if (error == 0) 815 error = pipeio_lock(rpipe, 1); 816 } 817 if (error) 818 goto unlocked_error; 819 } 820 } 821#if CONFIG_MACF 822locked_error: 823#endif 824 pipeio_unlock(rpipe); 825 826unlocked_error: 827 --rpipe->pipe_busy; 828 829 /* 830 * PIPE_WANT processing only makes sense if pipe_busy is 0. 831 */ 832 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 833 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 834 wakeup(rpipe); 835 } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) { 836 /* 837 * Handle write blocking hysteresis. 838 */ 839 if (rpipe->pipe_state & PIPE_WANTW) { 840 rpipe->pipe_state &= ~PIPE_WANTW; 841 wakeup(rpipe); 842 } 843 } 844 845 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0) 846 pipeselwakeup(rpipe, rpipe->pipe_peer); 847 848 /* update last read time */ 849 pipe_touch(rpipe, PIPE_ATIME); 850 851 PIPE_UNLOCK(rpipe); 852 853 return (error); 854} 855 856/* 857 * perform a write of n bytes into the read side of buffer. Since 858 * pipes are unidirectional a write is meant to be read by the otherside only. 859 */ 860static int 861pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags, 862 __unused vfs_context_t ctx) 863{ 864 int error = 0; 865 int orig_resid; 866 int pipe_size; 867 struct pipe *wpipe, *rpipe; 868 // LP64todo - fix this! 869 orig_resid = uio_resid(uio); 870 int space; 871 872 rpipe = (struct pipe *)fp->f_data; 873 874 PIPE_LOCK(rpipe); 875 wpipe = rpipe->pipe_peer; 876 877 /* 878 * detect loss of pipe read side, issue SIGPIPE if lost. 879 */ 880 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { 881 PIPE_UNLOCK(rpipe); 882 return (EPIPE); 883 } 884#if CONFIG_MACF 885 error = mac_pipe_check_write(kauth_cred_get(), wpipe); 886 if (error) { 887 PIPE_UNLOCK(rpipe); 888 return (error); 889 } 890#endif 891 ++wpipe->pipe_busy; 892 893 pipe_size = 0; 894 895 /* 896 * need to allocate some storage... we delay the allocation 897 * until the first write on fd[0] to avoid allocating storage for both 898 * 'pipe ends'... most pipes are half-duplex with the writes targeting 899 * fd[1], so allocating space for both ends is a waste... 900 */ 901 902 if ( wpipe->pipe_buffer.buffer == 0 || ( 903 (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt && 904 amountpipekva < maxpipekva ) ) { 905 906 pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid); 907 } 908 if (pipe_size) { 909 /* 910 * need to do initial allocation or resizing of pipe 911 * holding both structure and io locks. 912 */ 913 if ((error = pipeio_lock(wpipe, 1)) == 0) { 914 if (wpipe->pipe_buffer.cnt == 0) 915 error = pipespace(wpipe, pipe_size); 916 else 917 error = expand_pipespace(wpipe, pipe_size); 918 919 pipeio_unlock(wpipe); 920 921 /* allocation failed */ 922 if (wpipe->pipe_buffer.buffer == 0) 923 error = ENOMEM; 924 } 925 if (error) { 926 /* 927 * If an error occurred unbusy and return, waking up any pending 928 * readers. 929 */ 930 --wpipe->pipe_busy; 931 if ((wpipe->pipe_busy == 0) && 932 (wpipe->pipe_state & PIPE_WANT)) { 933 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 934 wakeup(wpipe); 935 } 936 PIPE_UNLOCK(rpipe); 937 return(error); 938 } 939 } 940 941 while (uio_resid(uio)) { 942 943 retrywrite: 944 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 945 946 /* Writes of size <= PIPE_BUF must be atomic. */ 947 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF)) 948 space = 0; 949 950 if (space > 0) { 951 952 if ((error = pipeio_lock(wpipe,1)) == 0) { 953 int size; /* Transfer size */ 954 int segsize; /* first segment to transfer */ 955 956 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { 957 pipeio_unlock(wpipe); 958 error = EPIPE; 959 break; 960 } 961 /* 962 * If a process blocked in pipeio_lock, our 963 * value for space might be bad... the mutex 964 * is dropped while we're blocked 965 */ 966 if (space > (int)(wpipe->pipe_buffer.size - 967 wpipe->pipe_buffer.cnt)) { 968 pipeio_unlock(wpipe); 969 goto retrywrite; 970 } 971 972 /* 973 * Transfer size is minimum of uio transfer 974 * and free space in pipe buffer. 975 */ 976 // LP64todo - fix this! 977 if (space > uio_resid(uio)) 978 size = uio_resid(uio); 979 else 980 size = space; 981 /* 982 * First segment to transfer is minimum of 983 * transfer size and contiguous space in 984 * pipe buffer. If first segment to transfer 985 * is less than the transfer size, we've got 986 * a wraparound in the buffer. 987 */ 988 segsize = wpipe->pipe_buffer.size - 989 wpipe->pipe_buffer.in; 990 if (segsize > size) 991 segsize = size; 992 993 /* Transfer first segment */ 994 995 PIPE_UNLOCK(rpipe); 996 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 997 segsize, uio); 998 PIPE_LOCK(rpipe); 999 1000 if (error == 0 && segsize < size) { 1001 /* 1002 * Transfer remaining part now, to 1003 * support atomic writes. Wraparound 1004 * happened. (State 3) 1005 */ 1006 if (wpipe->pipe_buffer.in + segsize != 1007 wpipe->pipe_buffer.size) 1008 panic("Expected pipe buffer " 1009 "wraparound disappeared"); 1010 1011 PIPE_UNLOCK(rpipe); 1012 error = uiomove( 1013 &wpipe->pipe_buffer.buffer[0], 1014 size - segsize, uio); 1015 PIPE_LOCK(rpipe); 1016 } 1017 /* 1018 * readers never know to read until count is updated. 1019 */ 1020 if (error == 0) { 1021 wpipe->pipe_buffer.in += size; 1022 if (wpipe->pipe_buffer.in > 1023 wpipe->pipe_buffer.size) { 1024 if (wpipe->pipe_buffer.in != 1025 size - segsize + 1026 wpipe->pipe_buffer.size) 1027 panic("Expected " 1028 "wraparound bad"); 1029 wpipe->pipe_buffer.in = size - 1030 segsize; 1031 } 1032 1033 wpipe->pipe_buffer.cnt += size; 1034 if (wpipe->pipe_buffer.cnt > 1035 wpipe->pipe_buffer.size) 1036 panic("Pipe buffer overflow"); 1037 1038 } 1039 pipeio_unlock(wpipe); 1040 } 1041 if (error) 1042 break; 1043 1044 } else { 1045 /* 1046 * If the "read-side" has been blocked, wake it up now. 1047 */ 1048 if (wpipe->pipe_state & PIPE_WANTR) { 1049 wpipe->pipe_state &= ~PIPE_WANTR; 1050 wakeup(wpipe); 1051 } 1052 /* 1053 * don't block on non-blocking I/O 1054 * we'll do the pipeselwakeup on the way out 1055 */ 1056 if (fp->f_flag & FNONBLOCK) { 1057 error = EAGAIN; 1058 break; 1059 } 1060 1061 /* 1062 * If read side wants to go away, we just issue a signal 1063 * to ourselves. 1064 */ 1065 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { 1066 error = EPIPE; 1067 break; 1068 } 1069 1070 /* 1071 * We have no more space and have something to offer, 1072 * wake up select/poll. 1073 */ 1074 pipeselwakeup(wpipe, wpipe); 1075 1076 wpipe->pipe_state |= PIPE_WANTW; 1077 1078 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0); 1079 1080 if (error != 0) 1081 break; 1082 } 1083 } 1084 --wpipe->pipe_busy; 1085 1086 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1087 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1088 wakeup(wpipe); 1089 } 1090 if (wpipe->pipe_buffer.cnt > 0) { 1091 /* 1092 * If there are any characters in the buffer, we wake up 1093 * the reader if it was blocked waiting for data. 1094 */ 1095 if (wpipe->pipe_state & PIPE_WANTR) { 1096 wpipe->pipe_state &= ~PIPE_WANTR; 1097 wakeup(wpipe); 1098 } 1099 /* 1100 * wake up thread blocked in select/poll or post the notification 1101 */ 1102 pipeselwakeup(wpipe, wpipe); 1103 } 1104 1105 /* Update modification, status change (# of bytes in pipe) times */ 1106 pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME); 1107 pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME); 1108 PIPE_UNLOCK(rpipe); 1109 1110 return (error); 1111} 1112 1113/* 1114 * we implement a very minimal set of ioctls for compatibility with sockets. 1115 */ 1116/* ARGSUSED 3 */ 1117static int 1118pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, 1119 __unused vfs_context_t ctx) 1120{ 1121 struct pipe *mpipe = (struct pipe *)fp->f_data; 1122#if CONFIG_MACF 1123 int error; 1124#endif 1125 1126 PIPE_LOCK(mpipe); 1127 1128#if CONFIG_MACF 1129 error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd); 1130 if (error) { 1131 PIPE_UNLOCK(mpipe); 1132 1133 return (error); 1134 } 1135#endif 1136 1137 switch (cmd) { 1138 1139 case FIONBIO: 1140 PIPE_UNLOCK(mpipe); 1141 return (0); 1142 1143 case FIOASYNC: 1144 if (*(int *)data) { 1145 mpipe->pipe_state |= PIPE_ASYNC; 1146 } else { 1147 mpipe->pipe_state &= ~PIPE_ASYNC; 1148 } 1149 PIPE_UNLOCK(mpipe); 1150 return (0); 1151 1152 case FIONREAD: 1153 *(int *)data = mpipe->pipe_buffer.cnt; 1154 PIPE_UNLOCK(mpipe); 1155 return (0); 1156 1157 case TIOCSPGRP: 1158 mpipe->pipe_pgid = *(int *)data; 1159 1160 PIPE_UNLOCK(mpipe); 1161 return (0); 1162 1163 case TIOCGPGRP: 1164 *(int *)data = mpipe->pipe_pgid; 1165 1166 PIPE_UNLOCK(mpipe); 1167 return (0); 1168 1169 } 1170 PIPE_UNLOCK(mpipe); 1171 return (ENOTTY); 1172} 1173 1174 1175static int 1176pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx) 1177{ 1178 struct pipe *rpipe = (struct pipe *)fp->f_data; 1179 struct pipe *wpipe; 1180 int retnum = 0; 1181 1182 if (rpipe == NULL || rpipe == (struct pipe *)-1) 1183 return (retnum); 1184 1185 PIPE_LOCK(rpipe); 1186 1187 wpipe = rpipe->pipe_peer; 1188 1189 1190#if CONFIG_MACF 1191 /* 1192 * XXX We should use a per thread credential here; minimally, the 1193 * XXX process credential should have a persistent reference on it 1194 * XXX before being passed in here. 1195 */ 1196 if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) { 1197 PIPE_UNLOCK(rpipe); 1198 return (0); 1199 } 1200#endif 1201 switch (which) { 1202 1203 case FREAD: 1204 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1205 (rpipe->pipe_buffer.cnt > 0) || 1206 (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { 1207 1208 retnum = 1; 1209 } else { 1210 rpipe->pipe_state |= PIPE_SEL; 1211 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql); 1212 } 1213 break; 1214 1215 case FWRITE: 1216 if (wpipe) 1217 wpipe->pipe_state |= PIPE_WSELECT; 1218 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) || 1219 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1220 (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) > 0)) { 1221 1222 retnum = 1; 1223 } else { 1224 wpipe->pipe_state |= PIPE_SEL; 1225 selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql); 1226 } 1227 break; 1228 case 0: 1229 rpipe->pipe_state |= PIPE_SEL; 1230 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql); 1231 break; 1232 } 1233 PIPE_UNLOCK(rpipe); 1234 1235 return (retnum); 1236} 1237 1238 1239/* ARGSUSED 1 */ 1240static int 1241pipe_close(struct fileglob *fg, __unused vfs_context_t ctx) 1242{ 1243 struct pipe *cpipe; 1244 1245 proc_fdlock_spin(vfs_context_proc(ctx)); 1246 cpipe = (struct pipe *)fg->fg_data; 1247 fg->fg_data = NULL; 1248 proc_fdunlock(vfs_context_proc(ctx)); 1249 if (cpipe) 1250 pipeclose(cpipe); 1251 1252 return (0); 1253} 1254 1255static void 1256pipe_free_kmem(struct pipe *cpipe) 1257{ 1258 if (cpipe->pipe_buffer.buffer != NULL) { 1259 OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva); 1260 OSAddAtomic(-1, &amountpipes); 1261 kfree((void *)cpipe->pipe_buffer.buffer, 1262 cpipe->pipe_buffer.size); 1263 cpipe->pipe_buffer.buffer = NULL; 1264 cpipe->pipe_buffer.size = 0; 1265 } 1266} 1267 1268/* 1269 * shutdown the pipe 1270 */ 1271static void 1272pipeclose(struct pipe *cpipe) 1273{ 1274 struct pipe *ppipe; 1275 1276 if (cpipe == NULL) 1277 return; 1278 /* partially created pipes won't have a valid mutex. */ 1279 if (PIPE_MTX(cpipe) != NULL) 1280 PIPE_LOCK(cpipe); 1281 1282 1283 /* 1284 * If the other side is blocked, wake it up saying that 1285 * we want to close it down. 1286 */ 1287 cpipe->pipe_state &= ~PIPE_DRAIN; 1288 cpipe->pipe_state |= PIPE_EOF; 1289 pipeselwakeup(cpipe, cpipe); 1290 1291 while (cpipe->pipe_busy) { 1292 cpipe->pipe_state |= PIPE_WANT; 1293 1294 wakeup(cpipe); 1295 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1296 } 1297 1298#if CONFIG_MACF 1299 /* 1300 * Free the shared pipe label only after the two ends are disconnected. 1301 */ 1302 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) 1303 mac_pipe_label_destroy(cpipe); 1304#endif 1305 1306 /* 1307 * Disconnect from peer 1308 */ 1309 if ((ppipe = cpipe->pipe_peer) != NULL) { 1310 1311 ppipe->pipe_state &= ~(PIPE_DRAIN); 1312 ppipe->pipe_state |= PIPE_EOF; 1313 1314 pipeselwakeup(ppipe, ppipe); 1315 wakeup(ppipe); 1316 1317 if (cpipe->pipe_state & PIPE_KNOTE) 1318 KNOTE(&ppipe->pipe_sel.si_note, 1); 1319 1320 postpipeevent(ppipe, EV_RCLOSED); 1321 1322 ppipe->pipe_peer = NULL; 1323 } 1324 evpipefree(cpipe); 1325 1326 /* 1327 * free resources 1328 */ 1329 if (PIPE_MTX(cpipe) != NULL) { 1330 if (ppipe != NULL) { 1331 /* 1332 * since the mutex is shared and the peer is still 1333 * alive, we need to release the mutex, not free it 1334 */ 1335 PIPE_UNLOCK(cpipe); 1336 } else { 1337 /* 1338 * peer is gone, so we're the sole party left with 1339 * interest in this mutex... we can just free it 1340 */ 1341 lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp); 1342 } 1343 } 1344 pipe_free_kmem(cpipe); 1345 if (cpipe->pipe_state & PIPE_WSELECT) { 1346 pipe_garbage_collect(cpipe); 1347 } else { 1348 zfree(pipe_zone, cpipe); 1349 pipe_garbage_collect(NULL); 1350 } 1351 1352} 1353 1354/*ARGSUSED*/ 1355static int 1356pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx) 1357{ 1358 struct pipe *cpipe; 1359 1360 cpipe = (struct pipe *)kn->kn_fp->f_data; 1361 1362 PIPE_LOCK(cpipe); 1363#if CONFIG_MACF 1364 /* 1365 * XXX We should use a per thread credential here; minimally, the 1366 * XXX process credential should have a persistent reference on it 1367 * XXX before being passed in here. 1368 */ 1369 if (mac_pipe_check_kqfilter(vfs_context_ucred(ctx), kn, cpipe) != 0) { 1370 PIPE_UNLOCK(cpipe); 1371 return (1); 1372 } 1373#endif 1374 1375 switch (kn->kn_filter) { 1376 case EVFILT_READ: 1377 kn->kn_fop = &pipe_rfiltops; 1378 1379 break; 1380 case EVFILT_WRITE: 1381 kn->kn_fop = &pipe_wfiltops; 1382 1383 if (cpipe->pipe_peer == NULL) { 1384 /* 1385 * other end of pipe has been closed 1386 */ 1387 PIPE_UNLOCK(cpipe); 1388 return (EPIPE); 1389 } 1390 if (cpipe->pipe_peer) 1391 cpipe = cpipe->pipe_peer; 1392 break; 1393 default: 1394 PIPE_UNLOCK(cpipe); 1395 return (1); 1396 } 1397 1398 if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn)) 1399 cpipe->pipe_state |= PIPE_KNOTE; 1400 1401 PIPE_UNLOCK(cpipe); 1402 return (0); 1403} 1404 1405static void 1406filt_pipedetach(struct knote *kn) 1407{ 1408 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1409 1410 PIPE_LOCK(cpipe); 1411 1412 if (kn->kn_filter == EVFILT_WRITE) { 1413 if (cpipe->pipe_peer == NULL) { 1414 PIPE_UNLOCK(cpipe); 1415 return; 1416 } 1417 cpipe = cpipe->pipe_peer; 1418 } 1419 if (cpipe->pipe_state & PIPE_KNOTE) { 1420 if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn)) 1421 cpipe->pipe_state &= ~PIPE_KNOTE; 1422 } 1423 PIPE_UNLOCK(cpipe); 1424} 1425 1426/*ARGSUSED*/ 1427static int 1428filt_piperead(struct knote *kn, long hint) 1429{ 1430 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1431 struct pipe *wpipe; 1432 int retval; 1433 1434 /* 1435 * if hint == 0, then we've been called from the kevent 1436 * world directly and do not currently hold the pipe mutex... 1437 * if hint == 1, we're being called back via the KNOTE post 1438 * we made in pipeselwakeup, and we already hold the mutex... 1439 */ 1440 if (hint == 0) 1441 PIPE_LOCK(rpipe); 1442 1443 wpipe = rpipe->pipe_peer; 1444 kn->kn_data = rpipe->pipe_buffer.cnt; 1445 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) || 1446 (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { 1447 kn->kn_flags |= EV_EOF; 1448 retval = 1; 1449 } else { 1450 int64_t lowwat = 1; 1451 if (kn->kn_sfflags & NOTE_LOWAT) { 1452 if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe)) 1453 lowwat = MAX_PIPESIZE(rpipe); 1454 else if (kn->kn_sdata > lowwat) 1455 lowwat = kn->kn_sdata; 1456 } 1457 retval = kn->kn_data >= lowwat; 1458 } 1459 1460 if (hint == 0) 1461 PIPE_UNLOCK(rpipe); 1462 1463 return (retval); 1464} 1465 1466/*ARGSUSED*/ 1467static int 1468filt_pipewrite(struct knote *kn, long hint) 1469{ 1470 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1471 struct pipe *wpipe; 1472 1473 /* 1474 * if hint == 0, then we've been called from the kevent 1475 * world directly and do not currently hold the pipe mutex... 1476 * if hint == 1, we're being called back via the KNOTE post 1477 * we made in pipeselwakeup, and we already hold the mutex... 1478 */ 1479 if (hint == 0) 1480 PIPE_LOCK(rpipe); 1481 1482 wpipe = rpipe->pipe_peer; 1483 1484 if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { 1485 kn->kn_data = 0; 1486 kn->kn_flags |= EV_EOF; 1487 1488 if (hint == 0) 1489 PIPE_UNLOCK(rpipe); 1490 return (1); 1491 } 1492 kn->kn_data = MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt; 1493 1494 int64_t lowwat = PIPE_BUF; 1495 if (kn->kn_sfflags & NOTE_LOWAT) { 1496 if (wpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(wpipe)) 1497 lowwat = MAX_PIPESIZE(wpipe); 1498 else if (kn->kn_sdata > lowwat) 1499 lowwat = kn->kn_sdata; 1500 } 1501 1502 if (hint == 0) 1503 PIPE_UNLOCK(rpipe); 1504 1505 return (kn->kn_data >= lowwat); 1506} 1507 1508int 1509fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo) 1510{ 1511#if CONFIG_MACF 1512 int error; 1513#endif 1514 struct timeval now; 1515 struct vinfo_stat * ub; 1516 int pipe_size = 0; 1517 int pipe_count; 1518 1519 if (cpipe == NULL) 1520 return (EBADF); 1521 PIPE_LOCK(cpipe); 1522 1523#if CONFIG_MACF 1524 error = mac_pipe_check_stat(kauth_cred_get(), cpipe); 1525 if (error) { 1526 PIPE_UNLOCK(cpipe); 1527 return (error); 1528 } 1529#endif 1530 if (cpipe->pipe_buffer.buffer == 0) { 1531 /* 1532 * must be stat'ing the write fd 1533 */ 1534 if (cpipe->pipe_peer) { 1535 /* 1536 * the peer still exists, use it's info 1537 */ 1538 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer); 1539 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt; 1540 } else { 1541 pipe_count = 0; 1542 } 1543 } else { 1544 pipe_size = MAX_PIPESIZE(cpipe); 1545 pipe_count = cpipe->pipe_buffer.cnt; 1546 } 1547 /* 1548 * since peer's buffer is setup ouside of lock 1549 * we might catch it in transient state 1550 */ 1551 if (pipe_size == 0) 1552 pipe_size = PIPE_SIZE; 1553 1554 ub = &pinfo->pipe_stat; 1555 1556 bzero(ub, sizeof(*ub)); 1557 ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; 1558 ub->vst_blksize = pipe_size; 1559 ub->vst_size = pipe_count; 1560 if (ub->vst_blksize != 0) 1561 ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize; 1562 ub->vst_nlink = 1; 1563 1564 ub->vst_uid = kauth_getuid(); 1565 ub->vst_gid = kauth_getgid(); 1566 1567 microtime(&now); 1568 ub->vst_atime = now.tv_sec; 1569 ub->vst_atimensec = now.tv_usec * 1000; 1570 1571 ub->vst_mtime = now.tv_sec; 1572 ub->vst_mtimensec = now.tv_usec * 1000; 1573 1574 ub->vst_ctime = now.tv_sec; 1575 ub->vst_ctimensec = now.tv_usec * 1000; 1576 1577 /* 1578 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid. 1579 * XXX (st_dev, st_ino) should be unique. 1580 */ 1581 1582 pinfo->pipe_handle = (uint64_t)((uintptr_t)cpipe); 1583 pinfo->pipe_peerhandle = (uint64_t)((uintptr_t)(cpipe->pipe_peer)); 1584 pinfo->pipe_status = cpipe->pipe_state; 1585 1586 PIPE_UNLOCK(cpipe); 1587 1588 return (0); 1589} 1590 1591 1592static int 1593pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx) 1594{ 1595 1596 /* Note: fdlock already held */ 1597 struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data); 1598 1599 if (cpipe) { 1600 PIPE_LOCK(cpipe); 1601 cpipe->pipe_state |= PIPE_DRAIN; 1602 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW); 1603 wakeup(cpipe); 1604 1605 /* Must wake up peer: a writer sleeps on the read side */ 1606 if ((ppipe = cpipe->pipe_peer)) { 1607 ppipe->pipe_state |= PIPE_DRAIN; 1608 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW); 1609 wakeup(ppipe); 1610 } 1611 1612 PIPE_UNLOCK(cpipe); 1613 return 0; 1614 } 1615 1616 return 1; 1617} 1618 1619 1620 /* 1621 * When a thread sets a write-select on a pipe, it creates an implicit, 1622 * untracked dependency between that thread and the peer of the pipe 1623 * on which the select is set. If the peer pipe is closed and freed 1624 * before the select()ing thread wakes up, the system will panic as 1625 * it attempts to unwind the dangling select(). To avoid that panic, 1626 * we notice whenever a dangerous select() is set on a pipe, and 1627 * defer the final deletion of the pipe until that select()s are all 1628 * resolved. Since we can't currently detect exactly when that 1629 * resolution happens, we use a simple garbage collection queue to 1630 * reap the at-risk pipes 'later'. 1631 */ 1632static void 1633pipe_garbage_collect(struct pipe *cpipe) 1634{ 1635 uint64_t old, now; 1636 struct pipe_garbage *pgp; 1637 1638 /* Convert msecs to nsecs and then to abstime */ 1639 old = pipe_garbage_age_limit * 1000000; 1640 nanoseconds_to_absolutetime(old, &old); 1641 1642 lck_mtx_lock(pipe_garbage_lock); 1643 1644 /* Free anything that's been on the queue for <mumble> seconds */ 1645 now = mach_absolute_time(); 1646 old = now - old; 1647 while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) { 1648 pipe_garbage_head = pgp->pg_next; 1649 if (pipe_garbage_head == NULL) 1650 pipe_garbage_tail = NULL; 1651 pipe_garbage_count--; 1652 zfree(pipe_zone, pgp->pg_pipe); 1653 zfree(pipe_garbage_zone, pgp); 1654 } 1655 1656 /* Add the new pipe (if any) to the tail of the garbage queue */ 1657 if (cpipe) { 1658 cpipe->pipe_state = PIPE_DEAD; 1659 pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone); 1660 if (pgp == NULL) { 1661 /* 1662 * We're too low on memory to garbage collect the 1663 * pipe. Freeing it runs the risk of panicing the 1664 * system. All we can do is leak it and leave 1665 * a breadcrumb behind. The good news, such as it 1666 * is, is that this will probably never happen. 1667 * We will probably hit the panic below first. 1668 */ 1669 printf("Leaking pipe %p - no room left in the queue", 1670 cpipe); 1671 lck_mtx_unlock(pipe_garbage_lock); 1672 return; 1673 } 1674 1675 pgp->pg_pipe = cpipe; 1676 pgp->pg_timestamp = now; 1677 pgp->pg_next = NULL; 1678 1679 if (pipe_garbage_tail) 1680 pipe_garbage_tail->pg_next = pgp; 1681 pipe_garbage_tail = pgp; 1682 if (pipe_garbage_head == NULL) 1683 pipe_garbage_head = pipe_garbage_tail; 1684 1685 if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT) 1686 panic("Length of pipe garbage queue exceeded %d", 1687 PIPE_GARBAGE_QUEUE_LIMIT); 1688 } 1689 lck_mtx_unlock(pipe_garbage_lock); 1690} 1691 1692