1/* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 */ 19/* 20 * Copyright (c) 2003-2014 Apple Inc. All rights reserved. 21 * 22 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 23 * 24 * This file contains Original Code and/or Modifications of Original Code 25 * as defined in and that are subject to the Apple Public Source License 26 * Version 2.0 (the 'License'). You may not use this file except in 27 * compliance with the License. The rights granted to you under the License 28 * may not be used to create, or enable the creation or redistribution of, 29 * unlawful or unlicensed copies of an Apple operating system, or to 30 * circumvent, violate, or enable the circumvention or violation of, any 31 * terms of an Apple operating system software license agreement. 32 * 33 * Please obtain a copy of the License at 34 * http://www.opensource.apple.com/apsl/ and read it before using this file. 35 * 36 * The Original Code and all software distributed under the License are 37 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 38 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 39 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 40 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 41 * Please see the License for the specific language governing rights and 42 * limitations under the License. 43 * 44 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 45 */ 46/* 47 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 48 * support for mandatory and extensible security protections. This notice 49 * is included in support of clause 2.2 (b) of the Apple Public License, 50 * Version 2.0. 51 */ 52 53/* 54 * This file contains a high-performance replacement for the socket-based 55 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 56 * all features of sockets, but does do everything that pipes normally 57 * do. 58 * 59 * Pipes are implemented as circular buffers. Following are the valid states in pipes operations 60 * 61 * _________________________________ 62 * 1. |_________________________________| r=w, c=0 63 * 64 * _________________________________ 65 * 2. |__r:::::wc_______________________| r <= w , c > 0 66 * 67 * _________________________________ 68 * 3. |::::wc_____r:::::::::::::::::::::| r>w , c > 0 69 * 70 * _________________________________ 71 * 4. |:::::::wrc:::::::::::::::::::::::| w=r, c = Max size 72 * 73 * 74 * Nomenclature:- 75 * a-z define the steps in a program flow 76 * 1-4 are the states as defined aboe 77 * Action: is what file operation is done on the pipe 78 * 79 * Current:None Action: initialize with size M=200 80 * a. State 1 ( r=0, w=0, c=0) 81 * 82 * Current: a Action: write(100) (w < M) 83 * b. State 2 (r=0, w=100, c=100) 84 * 85 * Current: b Action: write(100) (w = M-w) 86 * c. State 4 (r=0,w=0,c=200) 87 * 88 * Current: b Action: read(70) ( r < c ) 89 * d. State 2(r=70,w=100,c=30) 90 * 91 * Current: d Action: write(75) ( w < (m-w)) 92 * e. State 2 (r=70,w=175,c=105) 93 * 94 * Current: d Action: write(110) ( w > (m-w)) 95 * f. State 3 (r=70,w=10,c=140) 96 * 97 * Current: d Action: read(30) (r >= c ) 98 * g. State 1 (r=100,w=100,c=0) 99 * 100 */ 101 102/* 103 * This code create half duplex pipe buffers for facilitating file like 104 * operations on pipes. The initial buffer is very small, but this can 105 * dynamically change to larger sizes based on usage. The buffer size is never 106 * reduced. The total amount of kernel memory used is governed by maxpipekva. 107 * In case of dynamic expansion limit is reached, the output thread is blocked 108 * until the pipe buffer empties enough to continue. 109 * 110 * In order to limit the resource use of pipes, two sysctls exist: 111 * 112 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 113 * address space available to us in pipe_map. 114 * 115 * Memory usage may be monitored through the sysctls 116 * kern.ipc.pipes, kern.ipc.pipekva. 117 * 118 */ 119 120#include <sys/param.h> 121#include <sys/systm.h> 122#include <sys/filedesc.h> 123#include <sys/kernel.h> 124#include <sys/vnode.h> 125#include <sys/proc_internal.h> 126#include <sys/kauth.h> 127#include <sys/file_internal.h> 128#include <sys/stat.h> 129#include <sys/ioctl.h> 130#include <sys/fcntl.h> 131#include <sys/malloc.h> 132#include <sys/syslog.h> 133#include <sys/unistd.h> 134#include <sys/resourcevar.h> 135#include <sys/aio_kern.h> 136#include <sys/signalvar.h> 137#include <sys/pipe.h> 138#include <sys/sysproto.h> 139#include <sys/proc_info.h> 140 141#include <security/audit/audit.h> 142 143#include <sys/kdebug.h> 144 145#include <kern/zalloc.h> 146#include <kern/kalloc.h> 147#include <vm/vm_kern.h> 148#include <libkern/OSAtomic.h> 149 150#define f_flag f_fglob->fg_flag 151#define f_msgcount f_fglob->fg_msgcount 152#define f_cred f_fglob->fg_cred 153#define f_ops f_fglob->fg_ops 154#define f_offset f_fglob->fg_offset 155#define f_data f_fglob->fg_data 156 157/* 158 * interfaces to the outside world exported through file operations 159 */ 160static int pipe_read(struct fileproc *fp, struct uio *uio, 161 int flags, vfs_context_t ctx); 162static int pipe_write(struct fileproc *fp, struct uio *uio, 163 int flags, vfs_context_t ctx); 164static int pipe_close(struct fileglob *fg, vfs_context_t ctx); 165static int pipe_select(struct fileproc *fp, int which, void * wql, 166 vfs_context_t ctx); 167static int pipe_kqfilter(struct fileproc *fp, struct knote *kn, 168 vfs_context_t ctx); 169static int pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, 170 vfs_context_t ctx); 171static int pipe_drain(struct fileproc *fp,vfs_context_t ctx); 172 173static const struct fileops pipeops = { 174 DTYPE_PIPE, 175 pipe_read, 176 pipe_write, 177 pipe_ioctl, 178 pipe_select, 179 pipe_close, 180 pipe_kqfilter, 181 pipe_drain 182}; 183 184static void filt_pipedetach(struct knote *kn); 185static int filt_piperead(struct knote *kn, long hint); 186static int filt_pipewrite(struct knote *kn, long hint); 187 188static struct filterops pipe_rfiltops = { 189 .f_isfd = 1, 190 .f_detach = filt_pipedetach, 191 .f_event = filt_piperead, 192}; 193 194static struct filterops pipe_wfiltops = { 195 .f_isfd = 1, 196 .f_detach = filt_pipedetach, 197 .f_event = filt_pipewrite, 198}; 199 200static int nbigpipe; /* for compatibility sake. no longer used */ 201static int amountpipes; /* total number of pipes in system */ 202static int amountpipekva; /* total memory used by pipes */ 203 204int maxpipekva __attribute__((used)) = PIPE_KVAMAX; /* allowing 16MB max. */ 205 206#if PIPE_SYSCTLS 207SYSCTL_DECL(_kern_ipc); 208 209SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD|CTLFLAG_LOCKED, 210 &maxpipekva, 0, "Pipe KVA limit"); 211SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW|CTLFLAG_LOCKED, 212 &maxpipekvawired, 0, "Pipe KVA wired limit"); 213SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD|CTLFLAG_LOCKED, 214 &amountpipes, 0, "Current # of pipes"); 215SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD|CTLFLAG_LOCKED, 216 &nbigpipe, 0, "Current # of big pipes"); 217SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD|CTLFLAG_LOCKED, 218 &amountpipekva, 0, "Pipe KVA usage"); 219SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD|CTLFLAG_LOCKED, 220 &amountpipekvawired, 0, "Pipe wired KVA usage"); 221#endif 222 223static void pipeclose(struct pipe *cpipe); 224static void pipe_free_kmem(struct pipe *cpipe); 225static int pipe_create(struct pipe **cpipep); 226static int pipespace(struct pipe *cpipe, int size); 227static int choose_pipespace(unsigned long current, unsigned long expected); 228static int expand_pipespace(struct pipe *p, int target_size); 229static void pipeselwakeup(struct pipe *cpipe, struct pipe *spipe); 230static __inline int pipeio_lock(struct pipe *cpipe, int catch); 231static __inline void pipeio_unlock(struct pipe *cpipe); 232 233extern int postpipeevent(struct pipe *, int); 234extern void evpipefree(struct pipe *cpipe); 235 236static lck_grp_t *pipe_mtx_grp; 237static lck_attr_t *pipe_mtx_attr; 238static lck_grp_attr_t *pipe_mtx_grp_attr; 239 240static zone_t pipe_zone; 241 242#define MAX_PIPESIZE(pipe) ( MAX(PIPE_SIZE, (pipe)->pipe_buffer.size) ) 243 244#define PIPE_GARBAGE_AGE_LIMIT 5000 /* In milliseconds */ 245#define PIPE_GARBAGE_QUEUE_LIMIT 32000 246 247struct pipe_garbage { 248 struct pipe *pg_pipe; 249 struct pipe_garbage *pg_next; 250 uint64_t pg_timestamp; 251}; 252 253static zone_t pipe_garbage_zone; 254static struct pipe_garbage *pipe_garbage_head = NULL; 255static struct pipe_garbage *pipe_garbage_tail = NULL; 256static uint64_t pipe_garbage_age_limit = PIPE_GARBAGE_AGE_LIMIT; 257static int pipe_garbage_count = 0; 258static lck_mtx_t *pipe_garbage_lock; 259static void pipe_garbage_collect(struct pipe *cpipe); 260 261SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 262 263/* initial setup done at time of sysinit */ 264void 265pipeinit(void) 266{ 267 nbigpipe=0; 268 vm_size_t zone_size; 269 270 zone_size = 8192 * sizeof(struct pipe); 271 pipe_zone = zinit(sizeof(struct pipe), zone_size, 4096, "pipe zone"); 272 273 274 /* allocate lock group attribute and group for pipe mutexes */ 275 pipe_mtx_grp_attr = lck_grp_attr_alloc_init(); 276 pipe_mtx_grp = lck_grp_alloc_init("pipe", pipe_mtx_grp_attr); 277 278 /* allocate the lock attribute for pipe mutexes */ 279 pipe_mtx_attr = lck_attr_alloc_init(); 280 281 /* 282 * Set up garbage collection for dead pipes 283 */ 284 zone_size = (PIPE_GARBAGE_QUEUE_LIMIT + 20) * 285 sizeof(struct pipe_garbage); 286 pipe_garbage_zone = (zone_t)zinit(sizeof(struct pipe_garbage), 287 zone_size, 4096, "pipe garbage zone"); 288 pipe_garbage_lock = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr); 289 290} 291 292/* Bitmap for things to touch in pipe_touch() */ 293#define PIPE_ATIME 0x00000001 /* time of last access */ 294#define PIPE_MTIME 0x00000002 /* time of last modification */ 295#define PIPE_CTIME 0x00000004 /* time of last status change */ 296 297static void 298pipe_touch(struct pipe *tpipe, int touch) 299{ 300 struct timeval now; 301 302 microtime(&now); 303 304 if (touch & PIPE_ATIME) { 305 tpipe->st_atimespec.tv_sec = now.tv_sec; 306 tpipe->st_atimespec.tv_nsec = now.tv_usec * 1000; 307 } 308 309 if (touch & PIPE_MTIME) { 310 tpipe->st_mtimespec.tv_sec = now.tv_sec; 311 tpipe->st_mtimespec.tv_nsec = now.tv_usec * 1000; 312 } 313 314 if (touch & PIPE_CTIME) { 315 tpipe->st_ctimespec.tv_sec = now.tv_sec; 316 tpipe->st_ctimespec.tv_nsec = now.tv_usec * 1000; 317 } 318} 319 320static const unsigned int pipesize_blocks[] = {128,256,1024,2048,4096, 4096 * 2, PIPE_SIZE , PIPE_SIZE * 4 }; 321 322/* 323 * finds the right size from possible sizes in pipesize_blocks 324 * returns the size which matches max(current,expected) 325 */ 326static int 327choose_pipespace(unsigned long current, unsigned long expected) 328{ 329 int i = sizeof(pipesize_blocks)/sizeof(unsigned int) -1; 330 unsigned long target; 331 332 if (expected > current) 333 target = expected; 334 else 335 target = current; 336 337 while ( i >0 && pipesize_blocks[i-1] > target) { 338 i=i-1; 339 340 } 341 342 return pipesize_blocks[i]; 343} 344 345 346/* 347 * expand the size of pipe while there is data to be read, 348 * and then free the old buffer once the current buffered 349 * data has been transferred to new storage. 350 * Required: PIPE_LOCK and io lock to be held by caller. 351 * returns 0 on success or no expansion possible 352 */ 353static int 354expand_pipespace(struct pipe *p, int target_size) 355{ 356 struct pipe tmp, oldpipe; 357 int error; 358 tmp.pipe_buffer.buffer = 0; 359 360 if (p->pipe_buffer.size >= (unsigned) target_size) { 361 return 0; /* the existing buffer is max size possible */ 362 } 363 364 /* create enough space in the target */ 365 error = pipespace(&tmp, target_size); 366 if (error != 0) 367 return (error); 368 369 oldpipe.pipe_buffer.buffer = p->pipe_buffer.buffer; 370 oldpipe.pipe_buffer.size = p->pipe_buffer.size; 371 372 memcpy(tmp.pipe_buffer.buffer, p->pipe_buffer.buffer, p->pipe_buffer.size); 373 if (p->pipe_buffer.cnt > 0 && p->pipe_buffer.in <= p->pipe_buffer.out ){ 374 /* we are in State 3 and need extra copying for read to be consistent */ 375 memcpy(&tmp.pipe_buffer.buffer[p->pipe_buffer.size], p->pipe_buffer.buffer, p->pipe_buffer.size); 376 p->pipe_buffer.in += p->pipe_buffer.size; 377 } 378 379 p->pipe_buffer.buffer = tmp.pipe_buffer.buffer; 380 p->pipe_buffer.size = tmp.pipe_buffer.size; 381 382 383 pipe_free_kmem(&oldpipe); 384 return 0; 385} 386 387/* 388 * The pipe system call for the DTYPE_PIPE type of pipes 389 * 390 * returns: 391 * FREAD | fd0 | -->[struct rpipe] --> |~~buffer~~| \ 392 * (pipe_mutex) 393 * FWRITE | fd1 | -->[struct wpipe] --X / 394 */ 395 396/* ARGSUSED */ 397int 398pipe(proc_t p, __unused struct pipe_args *uap, int32_t *retval) 399{ 400 struct fileproc *rf, *wf; 401 struct pipe *rpipe, *wpipe; 402 lck_mtx_t *pmtx; 403 int fd, error; 404 405 if ((pmtx = lck_mtx_alloc_init(pipe_mtx_grp, pipe_mtx_attr)) == NULL) 406 return (ENOMEM); 407 408 rpipe = wpipe = NULL; 409 if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 410 error = ENFILE; 411 goto freepipes; 412 } 413 /* 414 * allocate the space for the normal I/O direction up 415 * front... we'll delay the allocation for the other 416 * direction until a write actually occurs (most likely it won't)... 417 */ 418 error = pipespace(rpipe, choose_pipespace(rpipe->pipe_buffer.size, 0)); 419 if (error) 420 goto freepipes; 421 422 TAILQ_INIT(&rpipe->pipe_evlist); 423 TAILQ_INIT(&wpipe->pipe_evlist); 424 425 error = falloc(p, &rf, &fd, vfs_context_current()); 426 if (error) { 427 goto freepipes; 428 } 429 retval[0] = fd; 430 431 /* 432 * for now we'll create half-duplex pipes(refer returns section above). 433 * this is what we've always supported.. 434 */ 435 rf->f_flag = FREAD; 436 rf->f_data = (caddr_t)rpipe; 437 rf->f_ops = &pipeops; 438 439 error = falloc(p, &wf, &fd, vfs_context_current()); 440 if (error) { 441 fp_free(p, retval[0], rf); 442 goto freepipes; 443 } 444 wf->f_flag = FWRITE; 445 wf->f_data = (caddr_t)wpipe; 446 wf->f_ops = &pipeops; 447 448 rpipe->pipe_peer = wpipe; 449 wpipe->pipe_peer = rpipe; 450 /* both structures share the same mutex */ 451 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 452 453 retval[1] = fd; 454#if CONFIG_MACF 455 /* 456 * XXXXXXXX SHOULD NOT HOLD FILE_LOCK() XXXXXXXXXXXX 457 * 458 * struct pipe represents a pipe endpoint. The MAC label is shared 459 * between the connected endpoints. As a result mac_pipe_label_init() and 460 * mac_pipe_label_associate() should only be called on one of the endpoints 461 * after they have been connected. 462 */ 463 mac_pipe_label_init(rpipe); 464 mac_pipe_label_associate(kauth_cred_get(), rpipe); 465 wpipe->pipe_label = rpipe->pipe_label; 466#endif 467 proc_fdlock_spin(p); 468 procfdtbl_releasefd(p, retval[0], NULL); 469 procfdtbl_releasefd(p, retval[1], NULL); 470 fp_drop(p, retval[0], rf, 1); 471 fp_drop(p, retval[1], wf, 1); 472 proc_fdunlock(p); 473 474 475 return (0); 476 477freepipes: 478 pipeclose(rpipe); 479 pipeclose(wpipe); 480 lck_mtx_free(pmtx, pipe_mtx_grp); 481 482 return (error); 483} 484 485int 486pipe_stat(struct pipe *cpipe, void *ub, int isstat64) 487{ 488#if CONFIG_MACF 489 int error; 490#endif 491 int pipe_size = 0; 492 int pipe_count; 493 struct stat *sb = (struct stat *)0; /* warning avoidance ; protected by isstat64 */ 494 struct stat64 * sb64 = (struct stat64 *)0; /* warning avoidance ; protected by isstat64 */ 495 496 if (cpipe == NULL) 497 return (EBADF); 498 PIPE_LOCK(cpipe); 499 500#if CONFIG_MACF 501 error = mac_pipe_check_stat(kauth_cred_get(), cpipe); 502 if (error) { 503 PIPE_UNLOCK(cpipe); 504 return (error); 505 } 506#endif 507 if (cpipe->pipe_buffer.buffer == 0) { 508 /* must be stat'ing the write fd */ 509 if (cpipe->pipe_peer) { 510 /* the peer still exists, use it's info */ 511 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer); 512 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt; 513 } else { 514 pipe_count = 0; 515 } 516 } else { 517 pipe_size = MAX_PIPESIZE(cpipe); 518 pipe_count = cpipe->pipe_buffer.cnt; 519 } 520 /* 521 * since peer's buffer is setup ouside of lock 522 * we might catch it in transient state 523 */ 524 if (pipe_size == 0) 525 pipe_size = MAX(PIPE_SIZE, pipesize_blocks[0]); 526 527 if (isstat64 != 0) { 528 sb64 = (struct stat64 *)ub; 529 530 bzero(sb64, sizeof(*sb64)); 531 sb64->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; 532 sb64->st_blksize = pipe_size; 533 sb64->st_size = pipe_count; 534 sb64->st_blocks = (sb64->st_size + sb64->st_blksize - 1) / sb64->st_blksize; 535 536 sb64->st_uid = kauth_getuid(); 537 sb64->st_gid = kauth_getgid(); 538 539 sb64->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec; 540 sb64->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec; 541 542 sb64->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec; 543 sb64->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec; 544 545 sb64->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec; 546 sb64->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec; 547 548 /* 549 * Return a relatively unique inode number based on the current 550 * address of this pipe's struct pipe. This number may be recycled 551 * relatively quickly. 552 */ 553 sb64->st_ino = (ino64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe); 554 } else { 555 sb = (struct stat *)ub; 556 557 bzero(sb, sizeof(*sb)); 558 sb->st_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; 559 sb->st_blksize = pipe_size; 560 sb->st_size = pipe_count; 561 sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize; 562 563 sb->st_uid = kauth_getuid(); 564 sb->st_gid = kauth_getgid(); 565 566 sb->st_atimespec.tv_sec = cpipe->st_atimespec.tv_sec; 567 sb->st_atimespec.tv_nsec = cpipe->st_atimespec.tv_nsec; 568 569 sb->st_mtimespec.tv_sec = cpipe->st_mtimespec.tv_sec; 570 sb->st_mtimespec.tv_nsec = cpipe->st_mtimespec.tv_nsec; 571 572 sb->st_ctimespec.tv_sec = cpipe->st_ctimespec.tv_sec; 573 sb->st_ctimespec.tv_nsec = cpipe->st_ctimespec.tv_nsec; 574 575 /* 576 * Return a relatively unique inode number based on the current 577 * address of this pipe's struct pipe. This number may be recycled 578 * relatively quickly. 579 */ 580 sb->st_ino = (ino_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe); 581 } 582 PIPE_UNLOCK(cpipe); 583 584 /* 585 * POSIX: Left as 0: st_dev, st_nlink, st_rdev, st_flags, st_gen, 586 * st_uid, st_gid. 587 * 588 * XXX (st_dev) should be unique, but there is no device driver that 589 * XXX is associated with pipes, since they are implemented via a 590 * XXX struct fileops indirection rather than as FS objects. 591 */ 592 return (0); 593} 594 595 596/* 597 * Allocate kva for pipe circular buffer, the space is pageable 598 * This routine will 'realloc' the size of a pipe safely, if it fails 599 * it will retain the old buffer. 600 * If it fails it will return ENOMEM. 601 */ 602static int 603pipespace(struct pipe *cpipe, int size) 604{ 605 vm_offset_t buffer; 606 607 if (size <= 0) 608 return(EINVAL); 609 610 if ((buffer = (vm_offset_t)kalloc(size)) == 0 ) 611 return(ENOMEM); 612 613 /* free old resources if we're resizing */ 614 pipe_free_kmem(cpipe); 615 cpipe->pipe_buffer.buffer = (caddr_t)buffer; 616 cpipe->pipe_buffer.size = size; 617 cpipe->pipe_buffer.in = 0; 618 cpipe->pipe_buffer.out = 0; 619 cpipe->pipe_buffer.cnt = 0; 620 621 OSAddAtomic(1, &amountpipes); 622 OSAddAtomic(cpipe->pipe_buffer.size, &amountpipekva); 623 624 return (0); 625} 626 627/* 628 * initialize and allocate VM and memory for pipe 629 */ 630static int 631pipe_create(struct pipe **cpipep) 632{ 633 struct pipe *cpipe; 634 cpipe = (struct pipe *)zalloc(pipe_zone); 635 636 if ((*cpipep = cpipe) == NULL) 637 return (ENOMEM); 638 639 /* 640 * protect so pipespace or pipeclose don't follow a junk pointer 641 * if pipespace() fails. 642 */ 643 bzero(cpipe, sizeof *cpipe); 644 645 /* Initial times are all the time of creation of the pipe */ 646 pipe_touch(cpipe, PIPE_ATIME | PIPE_MTIME | PIPE_CTIME); 647 return (0); 648} 649 650 651/* 652 * lock a pipe for I/O, blocking other access 653 */ 654static inline int 655pipeio_lock(struct pipe *cpipe, int catch) 656{ 657 int error; 658 while (cpipe->pipe_state & PIPE_LOCKFL) { 659 cpipe->pipe_state |= PIPE_LWANT; 660 error = msleep(cpipe, PIPE_MTX(cpipe), catch ? (PRIBIO | PCATCH) : PRIBIO, 661 "pipelk", 0); 662 if (error != 0) 663 return (error); 664 } 665 cpipe->pipe_state |= PIPE_LOCKFL; 666 return (0); 667} 668 669/* 670 * unlock a pipe I/O lock 671 */ 672static inline void 673pipeio_unlock(struct pipe *cpipe) 674{ 675 cpipe->pipe_state &= ~PIPE_LOCKFL; 676 if (cpipe->pipe_state & PIPE_LWANT) { 677 cpipe->pipe_state &= ~PIPE_LWANT; 678 wakeup(cpipe); 679 } 680} 681 682/* 683 * wakeup anyone whos blocked in select 684 */ 685static void 686pipeselwakeup(struct pipe *cpipe, struct pipe *spipe) 687{ 688 if (cpipe->pipe_state & PIPE_SEL) { 689 cpipe->pipe_state &= ~PIPE_SEL; 690 selwakeup(&cpipe->pipe_sel); 691 } 692 if (cpipe->pipe_state & PIPE_KNOTE) 693 KNOTE(&cpipe->pipe_sel.si_note, 1); 694 695 postpipeevent(cpipe, EV_RWBYTES); 696 697 if (spipe && (spipe->pipe_state & PIPE_ASYNC) && spipe->pipe_pgid) { 698 if (spipe->pipe_pgid < 0) 699 gsignal(-spipe->pipe_pgid, SIGIO); 700 else 701 proc_signal(spipe->pipe_pgid, SIGIO); 702 } 703} 704 705/* 706 * Read n bytes from the buffer. Semantics are similar to file read. 707 * returns: number of bytes read from the buffer 708 */ 709/* ARGSUSED */ 710static int 711pipe_read(struct fileproc *fp, struct uio *uio, __unused int flags, 712 __unused vfs_context_t ctx) 713{ 714 struct pipe *rpipe = (struct pipe *)fp->f_data; 715 int error; 716 int nread = 0; 717 u_int size; 718 719 PIPE_LOCK(rpipe); 720 ++rpipe->pipe_busy; 721 722 error = pipeio_lock(rpipe, 1); 723 if (error) 724 goto unlocked_error; 725 726#if CONFIG_MACF 727 error = mac_pipe_check_read(kauth_cred_get(), rpipe); 728 if (error) 729 goto locked_error; 730#endif 731 732 733 while (uio_resid(uio)) { 734 /* 735 * normal pipe buffer receive 736 */ 737 if (rpipe->pipe_buffer.cnt > 0) { 738 /* 739 * # bytes to read is min( bytes from read pointer until end of buffer, 740 * total unread bytes, 741 * user requested byte count) 742 */ 743 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 744 if (size > rpipe->pipe_buffer.cnt) 745 size = rpipe->pipe_buffer.cnt; 746 // LP64todo - fix this! 747 if (size > (u_int) uio_resid(uio)) 748 size = (u_int) uio_resid(uio); 749 750 PIPE_UNLOCK(rpipe); /* we still hold io lock.*/ 751 error = uiomove( 752 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 753 size, uio); 754 PIPE_LOCK(rpipe); 755 if (error) 756 break; 757 758 rpipe->pipe_buffer.out += size; 759 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 760 rpipe->pipe_buffer.out = 0; 761 762 rpipe->pipe_buffer.cnt -= size; 763 764 /* 765 * If there is no more to read in the pipe, reset 766 * its pointers to the beginning. This improves 767 * cache hit stats. 768 */ 769 if (rpipe->pipe_buffer.cnt == 0) { 770 rpipe->pipe_buffer.in = 0; 771 rpipe->pipe_buffer.out = 0; 772 } 773 nread += size; 774 } else { 775 /* 776 * detect EOF condition 777 * read returns 0 on EOF, no need to set error 778 */ 779 if (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { 780 break; 781 } 782 783 /* 784 * If the "write-side" has been blocked, wake it up now. 785 */ 786 if (rpipe->pipe_state & PIPE_WANTW) { 787 rpipe->pipe_state &= ~PIPE_WANTW; 788 wakeup(rpipe); 789 } 790 791 /* 792 * Break if some data was read in previous iteration. 793 */ 794 if (nread > 0) 795 break; 796 797 /* 798 * Unlock the pipe buffer for our remaining processing. 799 * We will either break out with an error or we will 800 * sleep and relock to loop. 801 */ 802 pipeio_unlock(rpipe); 803 804 /* 805 * Handle non-blocking mode operation or 806 * wait for more data. 807 */ 808 if (fp->f_flag & FNONBLOCK) { 809 error = EAGAIN; 810 } else { 811 rpipe->pipe_state |= PIPE_WANTR; 812 error = msleep(rpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, "piperd", 0); 813 if (error == 0) 814 error = pipeio_lock(rpipe, 1); 815 } 816 if (error) 817 goto unlocked_error; 818 } 819 } 820#if CONFIG_MACF 821locked_error: 822#endif 823 pipeio_unlock(rpipe); 824 825unlocked_error: 826 --rpipe->pipe_busy; 827 828 /* 829 * PIPE_WANT processing only makes sense if pipe_busy is 0. 830 */ 831 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 832 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 833 wakeup(rpipe); 834 } else if (rpipe->pipe_buffer.cnt < rpipe->pipe_buffer.size) { 835 /* 836 * Handle write blocking hysteresis. 837 */ 838 if (rpipe->pipe_state & PIPE_WANTW) { 839 rpipe->pipe_state &= ~PIPE_WANTW; 840 wakeup(rpipe); 841 } 842 } 843 844 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) > 0) 845 pipeselwakeup(rpipe, rpipe->pipe_peer); 846 847 /* update last read time */ 848 pipe_touch(rpipe, PIPE_ATIME); 849 850 PIPE_UNLOCK(rpipe); 851 852 return (error); 853} 854 855/* 856 * perform a write of n bytes into the read side of buffer. Since 857 * pipes are unidirectional a write is meant to be read by the otherside only. 858 */ 859static int 860pipe_write(struct fileproc *fp, struct uio *uio, __unused int flags, 861 __unused vfs_context_t ctx) 862{ 863 int error = 0; 864 int orig_resid; 865 int pipe_size; 866 struct pipe *wpipe, *rpipe; 867 // LP64todo - fix this! 868 orig_resid = uio_resid(uio); 869 int space; 870 871 rpipe = (struct pipe *)fp->f_data; 872 873 PIPE_LOCK(rpipe); 874 wpipe = rpipe->pipe_peer; 875 876 /* 877 * detect loss of pipe read side, issue SIGPIPE if lost. 878 */ 879 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { 880 PIPE_UNLOCK(rpipe); 881 return (EPIPE); 882 } 883#if CONFIG_MACF 884 error = mac_pipe_check_write(kauth_cred_get(), wpipe); 885 if (error) { 886 PIPE_UNLOCK(rpipe); 887 return (error); 888 } 889#endif 890 ++wpipe->pipe_busy; 891 892 pipe_size = 0; 893 894 /* 895 * need to allocate some storage... we delay the allocation 896 * until the first write on fd[0] to avoid allocating storage for both 897 * 'pipe ends'... most pipes are half-duplex with the writes targeting 898 * fd[1], so allocating space for both ends is a waste... 899 */ 900 901 if ( wpipe->pipe_buffer.buffer == 0 || ( 902 (unsigned)orig_resid > wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt && 903 amountpipekva < maxpipekva ) ) { 904 905 pipe_size = choose_pipespace(wpipe->pipe_buffer.size, wpipe->pipe_buffer.cnt + orig_resid); 906 } 907 if (pipe_size) { 908 /* 909 * need to do initial allocation or resizing of pipe 910 * holding both structure and io locks. 911 */ 912 if ((error = pipeio_lock(wpipe, 1)) == 0) { 913 if (wpipe->pipe_buffer.cnt == 0) 914 error = pipespace(wpipe, pipe_size); 915 else 916 error = expand_pipespace(wpipe, pipe_size); 917 918 pipeio_unlock(wpipe); 919 920 /* allocation failed */ 921 if (wpipe->pipe_buffer.buffer == 0) 922 error = ENOMEM; 923 } 924 if (error) { 925 /* 926 * If an error occurred unbusy and return, waking up any pending 927 * readers. 928 */ 929 --wpipe->pipe_busy; 930 if ((wpipe->pipe_busy == 0) && 931 (wpipe->pipe_state & PIPE_WANT)) { 932 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 933 wakeup(wpipe); 934 } 935 PIPE_UNLOCK(rpipe); 936 return(error); 937 } 938 } 939 940 while (uio_resid(uio)) { 941 942 retrywrite: 943 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 944 945 /* Writes of size <= PIPE_BUF must be atomic. */ 946 if ((space < uio_resid(uio)) && (orig_resid <= PIPE_BUF)) 947 space = 0; 948 949 if (space > 0) { 950 951 if ((error = pipeio_lock(wpipe,1)) == 0) { 952 int size; /* Transfer size */ 953 int segsize; /* first segment to transfer */ 954 955 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { 956 pipeio_unlock(wpipe); 957 error = EPIPE; 958 break; 959 } 960 /* 961 * If a process blocked in pipeio_lock, our 962 * value for space might be bad... the mutex 963 * is dropped while we're blocked 964 */ 965 if (space > (int)(wpipe->pipe_buffer.size - 966 wpipe->pipe_buffer.cnt)) { 967 pipeio_unlock(wpipe); 968 goto retrywrite; 969 } 970 971 /* 972 * Transfer size is minimum of uio transfer 973 * and free space in pipe buffer. 974 */ 975 // LP64todo - fix this! 976 if (space > uio_resid(uio)) 977 size = uio_resid(uio); 978 else 979 size = space; 980 /* 981 * First segment to transfer is minimum of 982 * transfer size and contiguous space in 983 * pipe buffer. If first segment to transfer 984 * is less than the transfer size, we've got 985 * a wraparound in the buffer. 986 */ 987 segsize = wpipe->pipe_buffer.size - 988 wpipe->pipe_buffer.in; 989 if (segsize > size) 990 segsize = size; 991 992 /* Transfer first segment */ 993 994 PIPE_UNLOCK(rpipe); 995 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 996 segsize, uio); 997 PIPE_LOCK(rpipe); 998 999 if (error == 0 && segsize < size) { 1000 /* 1001 * Transfer remaining part now, to 1002 * support atomic writes. Wraparound 1003 * happened. (State 3) 1004 */ 1005 if (wpipe->pipe_buffer.in + segsize != 1006 wpipe->pipe_buffer.size) 1007 panic("Expected pipe buffer " 1008 "wraparound disappeared"); 1009 1010 PIPE_UNLOCK(rpipe); 1011 error = uiomove( 1012 &wpipe->pipe_buffer.buffer[0], 1013 size - segsize, uio); 1014 PIPE_LOCK(rpipe); 1015 } 1016 /* 1017 * readers never know to read until count is updated. 1018 */ 1019 if (error == 0) { 1020 wpipe->pipe_buffer.in += size; 1021 if (wpipe->pipe_buffer.in > 1022 wpipe->pipe_buffer.size) { 1023 if (wpipe->pipe_buffer.in != 1024 size - segsize + 1025 wpipe->pipe_buffer.size) 1026 panic("Expected " 1027 "wraparound bad"); 1028 wpipe->pipe_buffer.in = size - 1029 segsize; 1030 } 1031 1032 wpipe->pipe_buffer.cnt += size; 1033 if (wpipe->pipe_buffer.cnt > 1034 wpipe->pipe_buffer.size) 1035 panic("Pipe buffer overflow"); 1036 1037 } 1038 pipeio_unlock(wpipe); 1039 } 1040 if (error) 1041 break; 1042 1043 } else { 1044 /* 1045 * If the "read-side" has been blocked, wake it up now. 1046 */ 1047 if (wpipe->pipe_state & PIPE_WANTR) { 1048 wpipe->pipe_state &= ~PIPE_WANTR; 1049 wakeup(wpipe); 1050 } 1051 /* 1052 * don't block on non-blocking I/O 1053 * we'll do the pipeselwakeup on the way out 1054 */ 1055 if (fp->f_flag & FNONBLOCK) { 1056 error = EAGAIN; 1057 break; 1058 } 1059 1060 /* 1061 * If read side wants to go away, we just issue a signal 1062 * to ourselves. 1063 */ 1064 if (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) { 1065 error = EPIPE; 1066 break; 1067 } 1068 1069 /* 1070 * We have no more space and have something to offer, 1071 * wake up select/poll. 1072 */ 1073 pipeselwakeup(wpipe, wpipe); 1074 1075 wpipe->pipe_state |= PIPE_WANTW; 1076 1077 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, "pipewr", 0); 1078 1079 if (error != 0) 1080 break; 1081 } 1082 } 1083 --wpipe->pipe_busy; 1084 1085 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1086 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1087 wakeup(wpipe); 1088 } 1089 if (wpipe->pipe_buffer.cnt > 0) { 1090 /* 1091 * If there are any characters in the buffer, we wake up 1092 * the reader if it was blocked waiting for data. 1093 */ 1094 if (wpipe->pipe_state & PIPE_WANTR) { 1095 wpipe->pipe_state &= ~PIPE_WANTR; 1096 wakeup(wpipe); 1097 } 1098 /* 1099 * wake up thread blocked in select/poll or post the notification 1100 */ 1101 pipeselwakeup(wpipe, wpipe); 1102 } 1103 1104 /* Update modification, status change (# of bytes in pipe) times */ 1105 pipe_touch(rpipe, PIPE_MTIME | PIPE_CTIME); 1106 pipe_touch(wpipe, PIPE_MTIME | PIPE_CTIME); 1107 PIPE_UNLOCK(rpipe); 1108 1109 return (error); 1110} 1111 1112/* 1113 * we implement a very minimal set of ioctls for compatibility with sockets. 1114 */ 1115/* ARGSUSED 3 */ 1116static int 1117pipe_ioctl(struct fileproc *fp, u_long cmd, caddr_t data, 1118 __unused vfs_context_t ctx) 1119{ 1120 struct pipe *mpipe = (struct pipe *)fp->f_data; 1121#if CONFIG_MACF 1122 int error; 1123#endif 1124 1125 PIPE_LOCK(mpipe); 1126 1127#if CONFIG_MACF 1128 error = mac_pipe_check_ioctl(kauth_cred_get(), mpipe, cmd); 1129 if (error) { 1130 PIPE_UNLOCK(mpipe); 1131 1132 return (error); 1133 } 1134#endif 1135 1136 switch (cmd) { 1137 1138 case FIONBIO: 1139 PIPE_UNLOCK(mpipe); 1140 return (0); 1141 1142 case FIOASYNC: 1143 if (*(int *)data) { 1144 mpipe->pipe_state |= PIPE_ASYNC; 1145 } else { 1146 mpipe->pipe_state &= ~PIPE_ASYNC; 1147 } 1148 PIPE_UNLOCK(mpipe); 1149 return (0); 1150 1151 case FIONREAD: 1152 *(int *)data = mpipe->pipe_buffer.cnt; 1153 PIPE_UNLOCK(mpipe); 1154 return (0); 1155 1156 case TIOCSPGRP: 1157 mpipe->pipe_pgid = *(int *)data; 1158 1159 PIPE_UNLOCK(mpipe); 1160 return (0); 1161 1162 case TIOCGPGRP: 1163 *(int *)data = mpipe->pipe_pgid; 1164 1165 PIPE_UNLOCK(mpipe); 1166 return (0); 1167 1168 } 1169 PIPE_UNLOCK(mpipe); 1170 return (ENOTTY); 1171} 1172 1173 1174static int 1175pipe_select(struct fileproc *fp, int which, void *wql, vfs_context_t ctx) 1176{ 1177 struct pipe *rpipe = (struct pipe *)fp->f_data; 1178 struct pipe *wpipe; 1179 int retnum = 0; 1180 1181 if (rpipe == NULL || rpipe == (struct pipe *)-1) 1182 return (retnum); 1183 1184 PIPE_LOCK(rpipe); 1185 1186 wpipe = rpipe->pipe_peer; 1187 1188 1189#if CONFIG_MACF 1190 /* 1191 * XXX We should use a per thread credential here; minimally, the 1192 * XXX process credential should have a persistent reference on it 1193 * XXX before being passed in here. 1194 */ 1195 if (mac_pipe_check_select(vfs_context_ucred(ctx), rpipe, which)) { 1196 PIPE_UNLOCK(rpipe); 1197 return (0); 1198 } 1199#endif 1200 switch (which) { 1201 1202 case FREAD: 1203 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1204 (rpipe->pipe_buffer.cnt > 0) || 1205 (rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { 1206 1207 retnum = 1; 1208 } else { 1209 rpipe->pipe_state |= PIPE_SEL; 1210 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql); 1211 } 1212 break; 1213 1214 case FWRITE: 1215 if (wpipe) 1216 wpipe->pipe_state |= PIPE_WSELECT; 1217 if (wpipe == NULL || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) || 1218 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1219 (MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) { 1220 1221 retnum = 1; 1222 } else { 1223 wpipe->pipe_state |= PIPE_SEL; 1224 selrecord(vfs_context_proc(ctx), &wpipe->pipe_sel, wql); 1225 } 1226 break; 1227 case 0: 1228 rpipe->pipe_state |= PIPE_SEL; 1229 selrecord(vfs_context_proc(ctx), &rpipe->pipe_sel, wql); 1230 break; 1231 } 1232 PIPE_UNLOCK(rpipe); 1233 1234 return (retnum); 1235} 1236 1237 1238/* ARGSUSED 1 */ 1239static int 1240pipe_close(struct fileglob *fg, __unused vfs_context_t ctx) 1241{ 1242 struct pipe *cpipe; 1243 1244 proc_fdlock_spin(vfs_context_proc(ctx)); 1245 cpipe = (struct pipe *)fg->fg_data; 1246 fg->fg_data = NULL; 1247 proc_fdunlock(vfs_context_proc(ctx)); 1248 if (cpipe) 1249 pipeclose(cpipe); 1250 1251 return (0); 1252} 1253 1254static void 1255pipe_free_kmem(struct pipe *cpipe) 1256{ 1257 if (cpipe->pipe_buffer.buffer != NULL) { 1258 OSAddAtomic(-(cpipe->pipe_buffer.size), &amountpipekva); 1259 OSAddAtomic(-1, &amountpipes); 1260 kfree((void *)cpipe->pipe_buffer.buffer, 1261 cpipe->pipe_buffer.size); 1262 cpipe->pipe_buffer.buffer = NULL; 1263 cpipe->pipe_buffer.size = 0; 1264 } 1265} 1266 1267/* 1268 * shutdown the pipe 1269 */ 1270static void 1271pipeclose(struct pipe *cpipe) 1272{ 1273 struct pipe *ppipe; 1274 1275 if (cpipe == NULL) 1276 return; 1277 /* partially created pipes won't have a valid mutex. */ 1278 if (PIPE_MTX(cpipe) != NULL) 1279 PIPE_LOCK(cpipe); 1280 1281 1282 /* 1283 * If the other side is blocked, wake it up saying that 1284 * we want to close it down. 1285 */ 1286 cpipe->pipe_state &= ~PIPE_DRAIN; 1287 cpipe->pipe_state |= PIPE_EOF; 1288 pipeselwakeup(cpipe, cpipe); 1289 1290 while (cpipe->pipe_busy) { 1291 cpipe->pipe_state |= PIPE_WANT; 1292 1293 wakeup(cpipe); 1294 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1295 } 1296 1297#if CONFIG_MACF 1298 /* 1299 * Free the shared pipe label only after the two ends are disconnected. 1300 */ 1301 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) 1302 mac_pipe_label_destroy(cpipe); 1303#endif 1304 1305 /* 1306 * Disconnect from peer 1307 */ 1308 if ((ppipe = cpipe->pipe_peer) != NULL) { 1309 1310 ppipe->pipe_state &= ~(PIPE_DRAIN); 1311 ppipe->pipe_state |= PIPE_EOF; 1312 1313 pipeselwakeup(ppipe, ppipe); 1314 wakeup(ppipe); 1315 1316 if (cpipe->pipe_state & PIPE_KNOTE) 1317 KNOTE(&ppipe->pipe_sel.si_note, 1); 1318 1319 postpipeevent(ppipe, EV_RCLOSED); 1320 1321 ppipe->pipe_peer = NULL; 1322 } 1323 evpipefree(cpipe); 1324 1325 /* 1326 * free resources 1327 */ 1328 if (PIPE_MTX(cpipe) != NULL) { 1329 if (ppipe != NULL) { 1330 /* 1331 * since the mutex is shared and the peer is still 1332 * alive, we need to release the mutex, not free it 1333 */ 1334 PIPE_UNLOCK(cpipe); 1335 } else { 1336 /* 1337 * peer is gone, so we're the sole party left with 1338 * interest in this mutex... unlock and free it 1339 */ 1340 PIPE_UNLOCK(cpipe); 1341 lck_mtx_free(PIPE_MTX(cpipe), pipe_mtx_grp); 1342 } 1343 } 1344 pipe_free_kmem(cpipe); 1345 if (cpipe->pipe_state & PIPE_WSELECT) { 1346 pipe_garbage_collect(cpipe); 1347 } else { 1348 zfree(pipe_zone, cpipe); 1349 pipe_garbage_collect(NULL); 1350 } 1351 1352} 1353 1354/*ARGSUSED*/ 1355static int 1356pipe_kqfilter(__unused struct fileproc *fp, struct knote *kn, __unused vfs_context_t ctx) 1357{ 1358 struct pipe *cpipe; 1359 1360 cpipe = (struct pipe *)kn->kn_fp->f_data; 1361 1362 PIPE_LOCK(cpipe); 1363#if CONFIG_MACF 1364 /* 1365 * XXX We should use a per thread credential here; minimally, the 1366 * XXX process credential should have a persistent reference on it 1367 * XXX before being passed in here. 1368 */ 1369 if (mac_pipe_check_kqfilter(vfs_context_ucred(ctx), kn, cpipe) != 0) { 1370 PIPE_UNLOCK(cpipe); 1371 return (1); 1372 } 1373#endif 1374 1375 switch (kn->kn_filter) { 1376 case EVFILT_READ: 1377 kn->kn_fop = &pipe_rfiltops; 1378 1379 break; 1380 case EVFILT_WRITE: 1381 kn->kn_fop = &pipe_wfiltops; 1382 1383 if (cpipe->pipe_peer == NULL) { 1384 /* 1385 * other end of pipe has been closed 1386 */ 1387 PIPE_UNLOCK(cpipe); 1388 return (EPIPE); 1389 } 1390 if (cpipe->pipe_peer) 1391 cpipe = cpipe->pipe_peer; 1392 break; 1393 default: 1394 PIPE_UNLOCK(cpipe); 1395 return (1); 1396 } 1397 1398 if (KNOTE_ATTACH(&cpipe->pipe_sel.si_note, kn)) 1399 cpipe->pipe_state |= PIPE_KNOTE; 1400 1401 PIPE_UNLOCK(cpipe); 1402 return (0); 1403} 1404 1405static void 1406filt_pipedetach(struct knote *kn) 1407{ 1408 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1409 1410 PIPE_LOCK(cpipe); 1411 1412 if (kn->kn_filter == EVFILT_WRITE) { 1413 if (cpipe->pipe_peer == NULL) { 1414 PIPE_UNLOCK(cpipe); 1415 return; 1416 } 1417 cpipe = cpipe->pipe_peer; 1418 } 1419 if (cpipe->pipe_state & PIPE_KNOTE) { 1420 if (KNOTE_DETACH(&cpipe->pipe_sel.si_note, kn)) 1421 cpipe->pipe_state &= ~PIPE_KNOTE; 1422 } 1423 PIPE_UNLOCK(cpipe); 1424} 1425 1426/*ARGSUSED*/ 1427static int 1428filt_piperead(struct knote *kn, long hint) 1429{ 1430 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1431 struct pipe *wpipe; 1432 int retval; 1433 1434 /* 1435 * if hint == 0, then we've been called from the kevent 1436 * world directly and do not currently hold the pipe mutex... 1437 * if hint == 1, we're being called back via the KNOTE post 1438 * we made in pipeselwakeup, and we already hold the mutex... 1439 */ 1440 if (hint == 0) 1441 PIPE_LOCK(rpipe); 1442 1443 wpipe = rpipe->pipe_peer; 1444 kn->kn_data = rpipe->pipe_buffer.cnt; 1445 if ((rpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF)) || 1446 (wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { 1447 kn->kn_flags |= EV_EOF; 1448 retval = 1; 1449 } else { 1450 int64_t lowwat = 1; 1451 if (kn->kn_sfflags & NOTE_LOWAT) { 1452 if (rpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(rpipe)) 1453 lowwat = MAX_PIPESIZE(rpipe); 1454 else if (kn->kn_sdata > lowwat) 1455 lowwat = kn->kn_sdata; 1456 } 1457 retval = kn->kn_data >= lowwat; 1458 } 1459 1460 if (hint == 0) 1461 PIPE_UNLOCK(rpipe); 1462 1463 return (retval); 1464} 1465 1466/*ARGSUSED*/ 1467static int 1468filt_pipewrite(struct knote *kn, long hint) 1469{ 1470 struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data; 1471 struct pipe *wpipe; 1472 1473 /* 1474 * if hint == 0, then we've been called from the kevent 1475 * world directly and do not currently hold the pipe mutex... 1476 * if hint == 1, we're being called back via the KNOTE post 1477 * we made in pipeselwakeup, and we already hold the mutex... 1478 */ 1479 if (hint == 0) 1480 PIPE_LOCK(rpipe); 1481 1482 wpipe = rpipe->pipe_peer; 1483 1484 if ((wpipe == NULL) || (wpipe->pipe_state & (PIPE_DRAIN | PIPE_EOF))) { 1485 kn->kn_data = 0; 1486 kn->kn_flags |= EV_EOF; 1487 1488 if (hint == 0) 1489 PIPE_UNLOCK(rpipe); 1490 return (1); 1491 } 1492 kn->kn_data = MAX_PIPESIZE(wpipe) - wpipe->pipe_buffer.cnt; 1493 1494 int64_t lowwat = PIPE_BUF; 1495 if (kn->kn_sfflags & NOTE_LOWAT) { 1496 if (wpipe->pipe_buffer.size && kn->kn_sdata > MAX_PIPESIZE(wpipe)) 1497 lowwat = MAX_PIPESIZE(wpipe); 1498 else if (kn->kn_sdata > lowwat) 1499 lowwat = kn->kn_sdata; 1500 } 1501 1502 if (hint == 0) 1503 PIPE_UNLOCK(rpipe); 1504 1505 return (kn->kn_data >= lowwat); 1506} 1507 1508int 1509fill_pipeinfo(struct pipe * cpipe, struct pipe_info * pinfo) 1510{ 1511#if CONFIG_MACF 1512 int error; 1513#endif 1514 struct timeval now; 1515 struct vinfo_stat * ub; 1516 int pipe_size = 0; 1517 int pipe_count; 1518 1519 if (cpipe == NULL) 1520 return (EBADF); 1521 PIPE_LOCK(cpipe); 1522 1523#if CONFIG_MACF 1524 error = mac_pipe_check_stat(kauth_cred_get(), cpipe); 1525 if (error) { 1526 PIPE_UNLOCK(cpipe); 1527 return (error); 1528 } 1529#endif 1530 if (cpipe->pipe_buffer.buffer == 0) { 1531 /* 1532 * must be stat'ing the write fd 1533 */ 1534 if (cpipe->pipe_peer) { 1535 /* 1536 * the peer still exists, use it's info 1537 */ 1538 pipe_size = MAX_PIPESIZE(cpipe->pipe_peer); 1539 pipe_count = cpipe->pipe_peer->pipe_buffer.cnt; 1540 } else { 1541 pipe_count = 0; 1542 } 1543 } else { 1544 pipe_size = MAX_PIPESIZE(cpipe); 1545 pipe_count = cpipe->pipe_buffer.cnt; 1546 } 1547 /* 1548 * since peer's buffer is setup ouside of lock 1549 * we might catch it in transient state 1550 */ 1551 if (pipe_size == 0) 1552 pipe_size = PIPE_SIZE; 1553 1554 ub = &pinfo->pipe_stat; 1555 1556 bzero(ub, sizeof(*ub)); 1557 ub->vst_mode = S_IFIFO | S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; 1558 ub->vst_blksize = pipe_size; 1559 ub->vst_size = pipe_count; 1560 if (ub->vst_blksize != 0) 1561 ub->vst_blocks = (ub->vst_size + ub->vst_blksize - 1) / ub->vst_blksize; 1562 ub->vst_nlink = 1; 1563 1564 ub->vst_uid = kauth_getuid(); 1565 ub->vst_gid = kauth_getgid(); 1566 1567 microtime(&now); 1568 ub->vst_atime = now.tv_sec; 1569 ub->vst_atimensec = now.tv_usec * 1000; 1570 1571 ub->vst_mtime = now.tv_sec; 1572 ub->vst_mtimensec = now.tv_usec * 1000; 1573 1574 ub->vst_ctime = now.tv_sec; 1575 ub->vst_ctimensec = now.tv_usec * 1000; 1576 1577 /* 1578 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen, st_uid, st_gid. 1579 * XXX (st_dev, st_ino) should be unique. 1580 */ 1581 1582 pinfo->pipe_handle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)cpipe); 1583 pinfo->pipe_peerhandle = (uint64_t)VM_KERNEL_ADDRPERM((uintptr_t)(cpipe->pipe_peer)); 1584 pinfo->pipe_status = cpipe->pipe_state; 1585 1586 PIPE_UNLOCK(cpipe); 1587 1588 return (0); 1589} 1590 1591 1592static int 1593pipe_drain(struct fileproc *fp, __unused vfs_context_t ctx) 1594{ 1595 1596 /* Note: fdlock already held */ 1597 struct pipe *ppipe, *cpipe = (struct pipe *)(fp->f_fglob->fg_data); 1598 1599 if (cpipe) { 1600 PIPE_LOCK(cpipe); 1601 cpipe->pipe_state |= PIPE_DRAIN; 1602 cpipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW); 1603 wakeup(cpipe); 1604 1605 /* Must wake up peer: a writer sleeps on the read side */ 1606 if ((ppipe = cpipe->pipe_peer)) { 1607 ppipe->pipe_state |= PIPE_DRAIN; 1608 ppipe->pipe_state &= ~(PIPE_WANTR | PIPE_WANTW); 1609 wakeup(ppipe); 1610 } 1611 1612 PIPE_UNLOCK(cpipe); 1613 return 0; 1614 } 1615 1616 return 1; 1617} 1618 1619 1620 /* 1621 * When a thread sets a write-select on a pipe, it creates an implicit, 1622 * untracked dependency between that thread and the peer of the pipe 1623 * on which the select is set. If the peer pipe is closed and freed 1624 * before the select()ing thread wakes up, the system will panic as 1625 * it attempts to unwind the dangling select(). To avoid that panic, 1626 * we notice whenever a dangerous select() is set on a pipe, and 1627 * defer the final deletion of the pipe until that select()s are all 1628 * resolved. Since we can't currently detect exactly when that 1629 * resolution happens, we use a simple garbage collection queue to 1630 * reap the at-risk pipes 'later'. 1631 */ 1632static void 1633pipe_garbage_collect(struct pipe *cpipe) 1634{ 1635 uint64_t old, now; 1636 struct pipe_garbage *pgp; 1637 1638 /* Convert msecs to nsecs and then to abstime */ 1639 old = pipe_garbage_age_limit * 1000000; 1640 nanoseconds_to_absolutetime(old, &old); 1641 1642 lck_mtx_lock(pipe_garbage_lock); 1643 1644 /* Free anything that's been on the queue for <mumble> seconds */ 1645 now = mach_absolute_time(); 1646 old = now - old; 1647 while ((pgp = pipe_garbage_head) && pgp->pg_timestamp < old) { 1648 pipe_garbage_head = pgp->pg_next; 1649 if (pipe_garbage_head == NULL) 1650 pipe_garbage_tail = NULL; 1651 pipe_garbage_count--; 1652 zfree(pipe_zone, pgp->pg_pipe); 1653 zfree(pipe_garbage_zone, pgp); 1654 } 1655 1656 /* Add the new pipe (if any) to the tail of the garbage queue */ 1657 if (cpipe) { 1658 cpipe->pipe_state = PIPE_DEAD; 1659 pgp = (struct pipe_garbage *)zalloc(pipe_garbage_zone); 1660 if (pgp == NULL) { 1661 /* 1662 * We're too low on memory to garbage collect the 1663 * pipe. Freeing it runs the risk of panicing the 1664 * system. All we can do is leak it and leave 1665 * a breadcrumb behind. The good news, such as it 1666 * is, is that this will probably never happen. 1667 * We will probably hit the panic below first. 1668 */ 1669 printf("Leaking pipe %p - no room left in the queue", 1670 cpipe); 1671 lck_mtx_unlock(pipe_garbage_lock); 1672 return; 1673 } 1674 1675 pgp->pg_pipe = cpipe; 1676 pgp->pg_timestamp = now; 1677 pgp->pg_next = NULL; 1678 1679 if (pipe_garbage_tail) 1680 pipe_garbage_tail->pg_next = pgp; 1681 pipe_garbage_tail = pgp; 1682 if (pipe_garbage_head == NULL) 1683 pipe_garbage_head = pipe_garbage_tail; 1684 1685 if (pipe_garbage_count++ >= PIPE_GARBAGE_QUEUE_LIMIT) 1686 panic("Length of pipe garbage queue exceeded %d", 1687 PIPE_GARBAGE_QUEUE_LIMIT); 1688 } 1689 lck_mtx_unlock(pipe_garbage_lock); 1690} 1691 1692