1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Mach Operating System 30 * Copyright (c) 1987 Carnegie-Mellon University 31 * All rights reserved. The CMU software License Agreement specifies 32 * the terms and conditions for use and redistribution. 33 */ 34/* 35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce 36 * support for mandatory and extensible security protections. This notice 37 * is included in support of clause 2.2 (b) of the Apple Public License, 38 * Version 2.0. 39 */ 40 41#include <meta_features.h> 42 43#include <kern/task.h> 44#include <kern/thread.h> 45#include <kern/debug.h> 46#include <kern/lock.h> 47#include <kern/extmod_statistics.h> 48#include <mach/mach_traps.h> 49#include <mach/port.h> 50#include <mach/task.h> 51#include <mach/task_access.h> 52#include <mach/task_special_ports.h> 53#include <mach/time_value.h> 54#include <mach/vm_map.h> 55#include <mach/vm_param.h> 56#include <mach/vm_prot.h> 57 58#include <sys/file_internal.h> 59#include <sys/param.h> 60#include <sys/systm.h> 61#include <sys/dir.h> 62#include <sys/namei.h> 63#include <sys/proc_internal.h> 64#include <sys/kauth.h> 65#include <sys/vm.h> 66#include <sys/file.h> 67#include <sys/vnode_internal.h> 68#include <sys/mount.h> 69#include <sys/trace.h> 70#include <sys/kernel.h> 71#include <sys/ubc_internal.h> 72#include <sys/user.h> 73#include <sys/syslog.h> 74#include <sys/stat.h> 75#include <sys/sysproto.h> 76#include <sys/mman.h> 77#include <sys/sysctl.h> 78#include <sys/cprotect.h> 79#include <sys/kpi_socket.h> 80#include <sys/kas_info.h> 81 82#include <security/audit/audit.h> 83#include <security/mac.h> 84#include <bsm/audit_kevents.h> 85 86#include <kern/kalloc.h> 87#include <vm/vm_map.h> 88#include <vm/vm_kern.h> 89#include <vm/vm_pageout.h> 90 91#include <machine/spl.h> 92 93#include <mach/shared_region.h> 94#include <vm/vm_shared_region.h> 95 96#include <vm/vm_protos.h> 97 98#include <sys/kern_memorystatus.h> 99 100 101int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t); 102int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); 103 104SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, ""); 105 106 107/* 108 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c 109 */ 110 111#ifndef SECURE_KERNEL 112extern int allow_stack_exec, allow_data_exec; 113 114SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, ""); 115SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, ""); 116#endif /* !SECURE_KERNEL */ 117 118static const char *prot_values[] = { 119 "none", 120 "read-only", 121 "write-only", 122 "read-write", 123 "execute-only", 124 "read-execute", 125 "write-execute", 126 "read-write-execute" 127}; 128 129void 130log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot) 131{ 132 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n", 133 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]); 134} 135 136int shared_region_unnest_logging = 1; 137 138SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED, 139 &shared_region_unnest_logging, 0, ""); 140 141int vm_shared_region_unnest_log_interval = 10; 142int shared_region_unnest_log_count_threshold = 5; 143 144/* These log rate throttling state variables aren't thread safe, but 145 * are sufficient unto the task. 146 */ 147static int64_t last_unnest_log_time = 0; 148static int shared_region_unnest_log_count = 0; 149 150void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) { 151 struct timeval tv; 152 const char *pcommstr; 153 154 if (shared_region_unnest_logging == 0) 155 return; 156 157 if (shared_region_unnest_logging == 1) { 158 microtime(&tv); 159 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) { 160 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold) 161 return; 162 } 163 else { 164 last_unnest_log_time = tv.tv_sec; 165 shared_region_unnest_log_count = 0; 166 } 167 } 168 169 pcommstr = current_proc()->p_comm; 170 171 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e); 172} 173 174int 175useracc( 176 user_addr_t addr, 177 user_size_t len, 178 int prot) 179{ 180 vm_map_t map; 181 182 map = current_map(); 183 return (vm_map_check_protection( 184 map, 185 vm_map_trunc_page(addr, 186 vm_map_page_mask(map)), 187 vm_map_round_page(addr+len, 188 vm_map_page_mask(map)), 189 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); 190} 191 192int 193vslock( 194 user_addr_t addr, 195 user_size_t len) 196{ 197 kern_return_t kret; 198 vm_map_t map; 199 200 map = current_map(); 201 kret = vm_map_wire(map, 202 vm_map_trunc_page(addr, 203 vm_map_page_mask(map)), 204 vm_map_round_page(addr+len, 205 vm_map_page_mask(map)), 206 VM_PROT_READ | VM_PROT_WRITE, 207 FALSE); 208 209 switch (kret) { 210 case KERN_SUCCESS: 211 return (0); 212 case KERN_INVALID_ADDRESS: 213 case KERN_NO_SPACE: 214 return (ENOMEM); 215 case KERN_PROTECTION_FAILURE: 216 return (EACCES); 217 default: 218 return (EINVAL); 219 } 220} 221 222int 223vsunlock( 224 user_addr_t addr, 225 user_size_t len, 226 __unused int dirtied) 227{ 228#if FIXME /* [ */ 229 pmap_t pmap; 230 vm_page_t pg; 231 vm_map_offset_t vaddr; 232 ppnum_t paddr; 233#endif /* FIXME ] */ 234 kern_return_t kret; 235 vm_map_t map; 236 237 map = current_map(); 238 239#if FIXME /* [ */ 240 if (dirtied) { 241 pmap = get_task_pmap(current_task()); 242 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK); 243 vaddr < vm_map_round_page(addr+len, PAGE_MASK); 244 vaddr += PAGE_SIZE) { 245 paddr = pmap_extract(pmap, vaddr); 246 pg = PHYS_TO_VM_PAGE(paddr); 247 vm_page_set_modified(pg); 248 } 249 } 250#endif /* FIXME ] */ 251#ifdef lint 252 dirtied++; 253#endif /* lint */ 254 kret = vm_map_unwire(map, 255 vm_map_trunc_page(addr, 256 vm_map_page_mask(map)), 257 vm_map_round_page(addr+len, 258 vm_map_page_mask(map)), 259 FALSE); 260 switch (kret) { 261 case KERN_SUCCESS: 262 return (0); 263 case KERN_INVALID_ADDRESS: 264 case KERN_NO_SPACE: 265 return (ENOMEM); 266 case KERN_PROTECTION_FAILURE: 267 return (EACCES); 268 default: 269 return (EINVAL); 270 } 271} 272 273int 274subyte( 275 user_addr_t addr, 276 int byte) 277{ 278 char character; 279 280 character = (char)byte; 281 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); 282} 283 284int 285suibyte( 286 user_addr_t addr, 287 int byte) 288{ 289 char character; 290 291 character = (char)byte; 292 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); 293} 294 295int fubyte(user_addr_t addr) 296{ 297 unsigned char byte; 298 299 if (copyin(addr, (void *) &byte, sizeof(char))) 300 return(-1); 301 return(byte); 302} 303 304int fuibyte(user_addr_t addr) 305{ 306 unsigned char byte; 307 308 if (copyin(addr, (void *) &(byte), sizeof(char))) 309 return(-1); 310 return(byte); 311} 312 313int 314suword( 315 user_addr_t addr, 316 long word) 317{ 318 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); 319} 320 321long fuword(user_addr_t addr) 322{ 323 long word = 0; 324 325 if (copyin(addr, (void *) &word, sizeof(int))) 326 return(-1); 327 return(word); 328} 329 330/* suiword and fuiword are the same as suword and fuword, respectively */ 331 332int 333suiword( 334 user_addr_t addr, 335 long word) 336{ 337 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); 338} 339 340long fuiword(user_addr_t addr) 341{ 342 long word = 0; 343 344 if (copyin(addr, (void *) &word, sizeof(int))) 345 return(-1); 346 return(word); 347} 348 349/* 350 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the 351 * fetching and setting of process-sized size_t and pointer values. 352 */ 353int 354sulong(user_addr_t addr, int64_t word) 355{ 356 357 if (IS_64BIT_PROCESS(current_proc())) { 358 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1); 359 } else { 360 return(suiword(addr, (long)word)); 361 } 362} 363 364int64_t 365fulong(user_addr_t addr) 366{ 367 int64_t longword; 368 369 if (IS_64BIT_PROCESS(current_proc())) { 370 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) 371 return(-1); 372 return(longword); 373 } else { 374 return((int64_t)fuiword(addr)); 375 } 376} 377 378int 379suulong(user_addr_t addr, uint64_t uword) 380{ 381 382 if (IS_64BIT_PROCESS(current_proc())) { 383 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1); 384 } else { 385 return(suiword(addr, (uint32_t)uword)); 386 } 387} 388 389uint64_t 390fuulong(user_addr_t addr) 391{ 392 uint64_t ulongword; 393 394 if (IS_64BIT_PROCESS(current_proc())) { 395 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) 396 return(-1ULL); 397 return(ulongword); 398 } else { 399 return((uint64_t)fuiword(addr)); 400 } 401} 402 403int 404swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval) 405{ 406 return(ENOTSUP); 407} 408 409/* 410 * pid_for_task 411 * 412 * Find the BSD process ID for the Mach task associated with the given Mach port 413 * name 414 * 415 * Parameters: args User argument descriptor (see below) 416 * 417 * Indirect parameters: args->t Mach port name 418 * args->pid Process ID (returned value; see below) 419 * 420 * Returns: KERL_SUCCESS Success 421 * KERN_FAILURE Not success 422 * 423 * Implicit returns: args->pid Process ID 424 * 425 */ 426kern_return_t 427pid_for_task( 428 struct pid_for_task_args *args) 429{ 430 mach_port_name_t t = args->t; 431 user_addr_t pid_addr = args->pid; 432 proc_t p; 433 task_t t1; 434 int pid = -1; 435 kern_return_t err = KERN_SUCCESS; 436 437 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK); 438 AUDIT_ARG(mach_port1, t); 439 440 t1 = port_name_to_task(t); 441 442 if (t1 == TASK_NULL) { 443 err = KERN_FAILURE; 444 goto pftout; 445 } else { 446 p = get_bsdtask_info(t1); 447 if (p) { 448 pid = proc_pid(p); 449 err = KERN_SUCCESS; 450 } else { 451 err = KERN_FAILURE; 452 } 453 } 454 task_deallocate(t1); 455pftout: 456 AUDIT_ARG(pid, pid); 457 (void) copyout((char *) &pid, pid_addr, sizeof(int)); 458 AUDIT_MACH_SYSCALL_EXIT(err); 459 return(err); 460} 461 462/* 463 * 464 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self 465 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication 466 * 467 */ 468static int tfp_policy = KERN_TFP_POLICY_DEFAULT; 469 470/* 471 * Routine: task_for_pid_posix_check 472 * Purpose: 473 * Verify that the current process should be allowed to 474 * get the target process's task port. This is only 475 * permitted if: 476 * - The current process is root 477 * OR all of the following are true: 478 * - The target process's real, effective, and saved uids 479 * are the same as the current proc's euid, 480 * - The target process's group set is a subset of the 481 * calling process's group set, and 482 * - The target process hasn't switched credentials. 483 * 484 * Returns: TRUE: permitted 485 * FALSE: denied 486 */ 487static int 488task_for_pid_posix_check(proc_t target) 489{ 490 kauth_cred_t targetcred, mycred; 491 uid_t myuid; 492 int allowed; 493 494 /* No task_for_pid on bad targets */ 495 if (target->p_stat == SZOMB) { 496 return FALSE; 497 } 498 499 mycred = kauth_cred_get(); 500 myuid = kauth_cred_getuid(mycred); 501 502 /* If we're running as root, the check passes */ 503 if (kauth_cred_issuser(mycred)) 504 return TRUE; 505 506 /* We're allowed to get our own task port */ 507 if (target == current_proc()) 508 return TRUE; 509 510 /* 511 * Under DENY, only root can get another proc's task port, 512 * so no more checks are needed. 513 */ 514 if (tfp_policy == KERN_TFP_POLICY_DENY) { 515 return FALSE; 516 } 517 518 targetcred = kauth_cred_proc_ref(target); 519 allowed = TRUE; 520 521 /* Do target's ruid, euid, and saved uid match my euid? */ 522 if ((kauth_cred_getuid(targetcred) != myuid) || 523 (kauth_cred_getruid(targetcred) != myuid) || 524 (kauth_cred_getsvuid(targetcred) != myuid)) { 525 allowed = FALSE; 526 goto out; 527 } 528 529 /* Are target's groups a subset of my groups? */ 530 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) || 531 allowed == 0) { 532 allowed = FALSE; 533 goto out; 534 } 535 536 /* Has target switched credentials? */ 537 if (target->p_flag & P_SUGID) { 538 allowed = FALSE; 539 goto out; 540 } 541 542out: 543 kauth_cred_unref(&targetcred); 544 return allowed; 545} 546 547/* 548 * Routine: task_for_pid 549 * Purpose: 550 * Get the task port for another "process", named by its 551 * process ID on the same host as "target_task". 552 * 553 * Only permitted to privileged processes, or processes 554 * with the same user ID. 555 * 556 * Note: if pid == 0, an error is return no matter who is calling. 557 * 558 * XXX This should be a BSD system call, not a Mach trap!!! 559 */ 560kern_return_t 561task_for_pid( 562 struct task_for_pid_args *args) 563{ 564 mach_port_name_t target_tport = args->target_tport; 565 int pid = args->pid; 566 user_addr_t task_addr = args->t; 567 proc_t p = PROC_NULL; 568 task_t t1 = TASK_NULL; 569 mach_port_name_t tret = MACH_PORT_NULL; 570 ipc_port_t tfpport; 571 void * sright; 572 int error = 0; 573 574 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID); 575 AUDIT_ARG(pid, pid); 576 AUDIT_ARG(mach_port1, target_tport); 577 578 /* Always check if pid == 0 */ 579 if (pid == 0) { 580 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 581 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 582 return(KERN_FAILURE); 583 } 584 585 t1 = port_name_to_task(target_tport); 586 if (t1 == TASK_NULL) { 587 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 588 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 589 return(KERN_FAILURE); 590 } 591 592 593 p = proc_find(pid); 594 if (p == PROC_NULL) { 595 error = KERN_FAILURE; 596 goto tfpout; 597 } 598 599#if CONFIG_AUDIT 600 AUDIT_ARG(process, p); 601#endif 602 603 if (!(task_for_pid_posix_check(p))) { 604 error = KERN_FAILURE; 605 goto tfpout; 606 } 607 608 if (p->task != TASK_NULL) { 609 /* If we aren't root and target's task access port is set... */ 610 if (!kauth_cred_issuser(kauth_cred_get()) && 611 p != current_proc() && 612 (task_get_task_access_port(p->task, &tfpport) == 0) && 613 (tfpport != IPC_PORT_NULL)) { 614 615 if (tfpport == IPC_PORT_DEAD) { 616 error = KERN_PROTECTION_FAILURE; 617 goto tfpout; 618 } 619 620 /* Call up to the task access server */ 621 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid); 622 623 if (error != MACH_MSG_SUCCESS) { 624 if (error == MACH_RCV_INTERRUPTED) 625 error = KERN_ABORTED; 626 else 627 error = KERN_FAILURE; 628 goto tfpout; 629 } 630 } 631#if CONFIG_MACF 632 error = mac_proc_check_get_task(kauth_cred_get(), p); 633 if (error) { 634 error = KERN_FAILURE; 635 goto tfpout; 636 } 637#endif 638 639 /* Grant task port access */ 640 task_reference(p->task); 641 extmod_statistics_incr_task_for_pid(p->task); 642 643 sright = (void *) convert_task_to_port(p->task); 644 tret = ipc_port_copyout_send( 645 sright, 646 get_task_ipcspace(current_task())); 647 } 648 error = KERN_SUCCESS; 649 650tfpout: 651 task_deallocate(t1); 652 AUDIT_ARG(mach_port2, tret); 653 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); 654 if (p != PROC_NULL) 655 proc_rele(p); 656 AUDIT_MACH_SYSCALL_EXIT(error); 657 return(error); 658} 659 660/* 661 * Routine: task_name_for_pid 662 * Purpose: 663 * Get the task name port for another "process", named by its 664 * process ID on the same host as "target_task". 665 * 666 * Only permitted to privileged processes, or processes 667 * with the same user ID. 668 * 669 * XXX This should be a BSD system call, not a Mach trap!!! 670 */ 671 672kern_return_t 673task_name_for_pid( 674 struct task_name_for_pid_args *args) 675{ 676 mach_port_name_t target_tport = args->target_tport; 677 int pid = args->pid; 678 user_addr_t task_addr = args->t; 679 proc_t p = PROC_NULL; 680 task_t t1; 681 mach_port_name_t tret; 682 void * sright; 683 int error = 0, refheld = 0; 684 kauth_cred_t target_cred; 685 686 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID); 687 AUDIT_ARG(pid, pid); 688 AUDIT_ARG(mach_port1, target_tport); 689 690 t1 = port_name_to_task(target_tport); 691 if (t1 == TASK_NULL) { 692 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 693 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 694 return(KERN_FAILURE); 695 } 696 697 p = proc_find(pid); 698 if (p != PROC_NULL) { 699 AUDIT_ARG(process, p); 700 target_cred = kauth_cred_proc_ref(p); 701 refheld = 1; 702 703 if ((p->p_stat != SZOMB) 704 && ((current_proc() == p) 705 || kauth_cred_issuser(kauth_cred_get()) 706 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && 707 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) { 708 709 if (p->task != TASK_NULL) { 710 task_reference(p->task); 711#if CONFIG_MACF 712 error = mac_proc_check_get_task_name(kauth_cred_get(), p); 713 if (error) { 714 task_deallocate(p->task); 715 goto noperm; 716 } 717#endif 718 sright = (void *)convert_task_name_to_port(p->task); 719 tret = ipc_port_copyout_send(sright, 720 get_task_ipcspace(current_task())); 721 } else 722 tret = MACH_PORT_NULL; 723 724 AUDIT_ARG(mach_port2, tret); 725 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); 726 task_deallocate(t1); 727 error = KERN_SUCCESS; 728 goto tnfpout; 729 } 730 } 731 732#if CONFIG_MACF 733noperm: 734#endif 735 task_deallocate(t1); 736 tret = MACH_PORT_NULL; 737 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); 738 error = KERN_FAILURE; 739tnfpout: 740 if (refheld != 0) 741 kauth_cred_unref(&target_cred); 742 if (p != PROC_NULL) 743 proc_rele(p); 744 AUDIT_MACH_SYSCALL_EXIT(error); 745 return(error); 746} 747 748kern_return_t 749pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) 750{ 751 task_t target = NULL; 752 proc_t targetproc = PROC_NULL; 753 int pid = args->pid; 754 int error = 0; 755 756#if CONFIG_MACF 757 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND); 758 if (error) { 759 error = EPERM; 760 goto out; 761 } 762#endif 763 764 if (pid == 0) { 765 error = EPERM; 766 goto out; 767 } 768 769 targetproc = proc_find(pid); 770 if (targetproc == PROC_NULL) { 771 error = ESRCH; 772 goto out; 773 } 774 775 if (!task_for_pid_posix_check(targetproc)) { 776 error = EPERM; 777 goto out; 778 } 779 780 target = targetproc->task; 781 if (target != TASK_NULL) { 782 mach_port_t tfpport; 783 784 /* If we aren't root and target's task access port is set... */ 785 if (!kauth_cred_issuser(kauth_cred_get()) && 786 targetproc != current_proc() && 787 (task_get_task_access_port(target, &tfpport) == 0) && 788 (tfpport != IPC_PORT_NULL)) { 789 790 if (tfpport == IPC_PORT_DEAD) { 791 error = EACCES; 792 goto out; 793 } 794 795 /* Call up to the task access server */ 796 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid); 797 798 if (error != MACH_MSG_SUCCESS) { 799 if (error == MACH_RCV_INTERRUPTED) 800 error = EINTR; 801 else 802 error = EPERM; 803 goto out; 804 } 805 } 806 } 807 808 task_reference(target); 809 error = task_pidsuspend(target); 810 if (error) { 811 if (error == KERN_INVALID_ARGUMENT) { 812 error = EINVAL; 813 } else { 814 error = EPERM; 815 } 816 } 817#if CONFIG_MEMORYSTATUS 818 else { 819 memorystatus_on_suspend(targetproc); 820 } 821#endif 822 823 task_deallocate(target); 824 825out: 826 if (targetproc != PROC_NULL) 827 proc_rele(targetproc); 828 *ret = error; 829 return error; 830} 831 832kern_return_t 833pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) 834{ 835 task_t target = NULL; 836 proc_t targetproc = PROC_NULL; 837 int pid = args->pid; 838 int error = 0; 839 840#if CONFIG_MACF 841 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME); 842 if (error) { 843 error = EPERM; 844 goto out; 845 } 846#endif 847 848 if (pid == 0) { 849 error = EPERM; 850 goto out; 851 } 852 853 targetproc = proc_find(pid); 854 if (targetproc == PROC_NULL) { 855 error = ESRCH; 856 goto out; 857 } 858 859 if (!task_for_pid_posix_check(targetproc)) { 860 error = EPERM; 861 goto out; 862 } 863 864 target = targetproc->task; 865 if (target != TASK_NULL) { 866 mach_port_t tfpport; 867 868 /* If we aren't root and target's task access port is set... */ 869 if (!kauth_cred_issuser(kauth_cred_get()) && 870 targetproc != current_proc() && 871 (task_get_task_access_port(target, &tfpport) == 0) && 872 (tfpport != IPC_PORT_NULL)) { 873 874 if (tfpport == IPC_PORT_DEAD) { 875 error = EACCES; 876 goto out; 877 } 878 879 /* Call up to the task access server */ 880 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid); 881 882 if (error != MACH_MSG_SUCCESS) { 883 if (error == MACH_RCV_INTERRUPTED) 884 error = EINTR; 885 else 886 error = EPERM; 887 goto out; 888 } 889 } 890 } 891 892 task_reference(target); 893 894#if CONFIG_MEMORYSTATUS 895 memorystatus_on_resume(targetproc); 896#endif 897 898 error = task_pidresume(target); 899 if (error) { 900 if (error == KERN_INVALID_ARGUMENT) { 901 error = EINVAL; 902 } else { 903 if (error == KERN_MEMORY_ERROR) { 904 psignal(targetproc, SIGKILL); 905 error = EIO; 906 } else 907 error = EPERM; 908 } 909 } 910 911 task_deallocate(target); 912 913out: 914 if (targetproc != PROC_NULL) 915 proc_rele(targetproc); 916 917 *ret = error; 918 return error; 919} 920 921 922static int 923sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, 924 __unused int arg2, struct sysctl_req *req) 925{ 926 int error = 0; 927 int new_value; 928 929 error = SYSCTL_OUT(req, arg1, sizeof(int)); 930 if (error || req->newptr == USER_ADDR_NULL) 931 return(error); 932 933 if (!kauth_cred_issuser(kauth_cred_get())) 934 return(EPERM); 935 936 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { 937 goto out; 938 } 939 if ((new_value == KERN_TFP_POLICY_DENY) 940 || (new_value == KERN_TFP_POLICY_DEFAULT)) 941 tfp_policy = new_value; 942 else 943 error = EINVAL; 944out: 945 return(error); 946 947} 948 949#if defined(SECURE_KERNEL) 950static int kern_secure_kernel = 1; 951#else 952static int kern_secure_kernel = 0; 953#endif 954 955SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, ""); 956 957SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp"); 958SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 959 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy"); 960 961SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED, 962 &shared_region_trace_level, 0, ""); 963SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED, 964 &shared_region_version, 0, ""); 965SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED, 966 &shared_region_persistence, 0, ""); 967 968/* 969 * shared_region_check_np: 970 * 971 * This system call is intended for dyld. 972 * 973 * dyld calls this when any process starts to see if the process's shared 974 * region is already set up and ready to use. 975 * This call returns the base address of the first mapping in the 976 * process's shared region's first mapping. 977 * dyld will then check what's mapped at that address. 978 * 979 * If the shared region is empty, dyld will then attempt to map the shared 980 * cache file in the shared region via the shared_region_map_np() system call. 981 * 982 * If something's already mapped in the shared region, dyld will check if it 983 * matches the shared cache it would like to use for that process. 984 * If it matches, evrything's ready and the process can proceed and use the 985 * shared region. 986 * If it doesn't match, dyld will unmap the shared region and map the shared 987 * cache into the process's address space via mmap(). 988 * 989 * ERROR VALUES 990 * EINVAL no shared region 991 * ENOMEM shared region is empty 992 * EFAULT bad address for "start_address" 993 */ 994int 995shared_region_check_np( 996 __unused struct proc *p, 997 struct shared_region_check_np_args *uap, 998 __unused int *retvalp) 999{ 1000 vm_shared_region_t shared_region; 1001 mach_vm_offset_t start_address = 0; 1002 int error; 1003 kern_return_t kr; 1004 1005 SHARED_REGION_TRACE_DEBUG( 1006 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n", 1007 current_thread(), p->p_pid, p->p_comm, 1008 (uint64_t)uap->start_address)); 1009 1010 /* retrieve the current tasks's shared region */ 1011 shared_region = vm_shared_region_get(current_task()); 1012 if (shared_region != NULL) { 1013 /* retrieve address of its first mapping... */ 1014 kr = vm_shared_region_start_address(shared_region, 1015 &start_address); 1016 if (kr != KERN_SUCCESS) { 1017 error = ENOMEM; 1018 } else { 1019 /* ... and give it to the caller */ 1020 error = copyout(&start_address, 1021 (user_addr_t) uap->start_address, 1022 sizeof (start_address)); 1023 if (error) { 1024 SHARED_REGION_TRACE_ERROR( 1025 ("shared_region: %p [%d(%s)] " 1026 "check_np(0x%llx) " 1027 "copyout(0x%llx) error %d\n", 1028 current_thread(), p->p_pid, p->p_comm, 1029 (uint64_t)uap->start_address, (uint64_t)start_address, 1030 error)); 1031 } 1032 } 1033 vm_shared_region_deallocate(shared_region); 1034 } else { 1035 /* no shared region ! */ 1036 error = EINVAL; 1037 } 1038 1039 SHARED_REGION_TRACE_DEBUG( 1040 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n", 1041 current_thread(), p->p_pid, p->p_comm, 1042 (uint64_t)uap->start_address, (uint64_t)start_address, error)); 1043 1044 return error; 1045} 1046 1047 1048int 1049shared_region_copyin_mappings( 1050 struct proc *p, 1051 user_addr_t user_mappings, 1052 unsigned int mappings_count, 1053 struct shared_file_mapping_np *mappings) 1054{ 1055 int error = 0; 1056 vm_size_t mappings_size = 0; 1057 1058 /* get the list of mappings the caller wants us to establish */ 1059 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); 1060 error = copyin(user_mappings, 1061 mappings, 1062 mappings_size); 1063 if (error) { 1064 SHARED_REGION_TRACE_ERROR( 1065 ("shared_region: %p [%d(%s)] map(): " 1066 "copyin(0x%llx, %d) failed (error=%d)\n", 1067 current_thread(), p->p_pid, p->p_comm, 1068 (uint64_t)user_mappings, mappings_count, error)); 1069 } 1070 return error; 1071} 1072/* 1073 * shared_region_map_np() 1074 * 1075 * This system call is intended for dyld. 1076 * 1077 * dyld uses this to map a shared cache file into a shared region. 1078 * This is usually done only the first time a shared cache is needed. 1079 * Subsequent processes will just use the populated shared region without 1080 * requiring any further setup. 1081 */ 1082int 1083_shared_region_map_and_slide( 1084 struct proc *p, 1085 int fd, 1086 uint32_t mappings_count, 1087 struct shared_file_mapping_np *mappings, 1088 uint32_t slide, 1089 user_addr_t slide_start, 1090 user_addr_t slide_size) 1091{ 1092 int error; 1093 kern_return_t kr; 1094 struct fileproc *fp; 1095 struct vnode *vp, *root_vp; 1096 struct vnode_attr va; 1097 off_t fs; 1098 memory_object_size_t file_size; 1099#if CONFIG_MACF 1100 vm_prot_t maxprot = VM_PROT_ALL; 1101#endif 1102 memory_object_control_t file_control; 1103 struct vm_shared_region *shared_region; 1104 1105 SHARED_REGION_TRACE_DEBUG( 1106 ("shared_region: %p [%d(%s)] -> map\n", 1107 current_thread(), p->p_pid, p->p_comm)); 1108 1109 shared_region = NULL; 1110 fp = NULL; 1111 vp = NULL; 1112 1113 /* get file structure from file descriptor */ 1114 error = fp_lookup(p, fd, &fp, 0); 1115 if (error) { 1116 SHARED_REGION_TRACE_ERROR( 1117 ("shared_region: %p [%d(%s)] map: " 1118 "fd=%d lookup failed (error=%d)\n", 1119 current_thread(), p->p_pid, p->p_comm, fd, error)); 1120 goto done; 1121 } 1122 1123 /* make sure we're attempting to map a vnode */ 1124 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { 1125 SHARED_REGION_TRACE_ERROR( 1126 ("shared_region: %p [%d(%s)] map: " 1127 "fd=%d not a vnode (type=%d)\n", 1128 current_thread(), p->p_pid, p->p_comm, 1129 fd, FILEGLOB_DTYPE(fp->f_fglob))); 1130 error = EINVAL; 1131 goto done; 1132 } 1133 1134 /* we need at least read permission on the file */ 1135 if (! (fp->f_fglob->fg_flag & FREAD)) { 1136 SHARED_REGION_TRACE_ERROR( 1137 ("shared_region: %p [%d(%s)] map: " 1138 "fd=%d not readable\n", 1139 current_thread(), p->p_pid, p->p_comm, fd)); 1140 error = EPERM; 1141 goto done; 1142 } 1143 1144 /* get vnode from file structure */ 1145 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data); 1146 if (error) { 1147 SHARED_REGION_TRACE_ERROR( 1148 ("shared_region: %p [%d(%s)] map: " 1149 "fd=%d getwithref failed (error=%d)\n", 1150 current_thread(), p->p_pid, p->p_comm, fd, error)); 1151 goto done; 1152 } 1153 vp = (struct vnode *) fp->f_fglob->fg_data; 1154 1155 /* make sure the vnode is a regular file */ 1156 if (vp->v_type != VREG) { 1157 SHARED_REGION_TRACE_ERROR( 1158 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1159 "not a file (type=%d)\n", 1160 current_thread(), p->p_pid, p->p_comm, 1161 vp, vp->v_name, vp->v_type)); 1162 error = EINVAL; 1163 goto done; 1164 } 1165 1166#if CONFIG_MACF 1167 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), 1168 fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot); 1169 if (error) { 1170 goto done; 1171 } 1172#endif /* MAC */ 1173 1174#if CONFIG_PROTECT 1175 /* check for content protection access */ 1176 { 1177 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0); 1178 if (error) { 1179 goto done; 1180 } 1181 } 1182#endif /* CONFIG_PROTECT */ 1183 1184 /* make sure vnode is on the process's root volume */ 1185 root_vp = p->p_fd->fd_rdir; 1186 if (root_vp == NULL) { 1187 root_vp = rootvnode; 1188 } else { 1189 /* 1190 * Chroot-ed processes can't use the shared_region. 1191 */ 1192 error = EINVAL; 1193 goto done; 1194 } 1195 1196 if (vp->v_mount != root_vp->v_mount) { 1197 SHARED_REGION_TRACE_ERROR( 1198 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1199 "not on process's root volume\n", 1200 current_thread(), p->p_pid, p->p_comm, 1201 vp, vp->v_name)); 1202 error = EPERM; 1203 goto done; 1204 } 1205 1206 /* make sure vnode is owned by "root" */ 1207 VATTR_INIT(&va); 1208 VATTR_WANTED(&va, va_uid); 1209 error = vnode_getattr(vp, &va, vfs_context_current()); 1210 if (error) { 1211 SHARED_REGION_TRACE_ERROR( 1212 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1213 "vnode_getattr(%p) failed (error=%d)\n", 1214 current_thread(), p->p_pid, p->p_comm, 1215 vp, vp->v_name, vp, error)); 1216 goto done; 1217 } 1218 if (va.va_uid != 0) { 1219 SHARED_REGION_TRACE_ERROR( 1220 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1221 "owned by uid=%d instead of 0\n", 1222 current_thread(), p->p_pid, p->p_comm, 1223 vp, vp->v_name, va.va_uid)); 1224 error = EPERM; 1225 goto done; 1226 } 1227 1228 /* get vnode size */ 1229 error = vnode_size(vp, &fs, vfs_context_current()); 1230 if (error) { 1231 SHARED_REGION_TRACE_ERROR( 1232 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1233 "vnode_size(%p) failed (error=%d)\n", 1234 current_thread(), p->p_pid, p->p_comm, 1235 vp, vp->v_name, vp, error)); 1236 goto done; 1237 } 1238 file_size = fs; 1239 1240 /* get the file's memory object handle */ 1241 file_control = ubc_getobject(vp, UBC_HOLDOBJECT); 1242 if (file_control == MEMORY_OBJECT_CONTROL_NULL) { 1243 SHARED_REGION_TRACE_ERROR( 1244 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1245 "no memory object\n", 1246 current_thread(), p->p_pid, p->p_comm, 1247 vp, vp->v_name)); 1248 error = EINVAL; 1249 goto done; 1250 } 1251 1252 1253 /* get the process's shared region (setup in vm_map_exec()) */ 1254 shared_region = vm_shared_region_get(current_task()); 1255 if (shared_region == NULL) { 1256 SHARED_REGION_TRACE_ERROR( 1257 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1258 "no shared region\n", 1259 current_thread(), p->p_pid, p->p_comm, 1260 vp, vp->v_name)); 1261 goto done; 1262 } 1263 1264 /* map the file into that shared region's submap */ 1265 kr = vm_shared_region_map_file(shared_region, 1266 mappings_count, 1267 mappings, 1268 file_control, 1269 file_size, 1270 (void *) p->p_fd->fd_rdir, 1271 slide, 1272 slide_start, 1273 slide_size); 1274 if (kr != KERN_SUCCESS) { 1275 SHARED_REGION_TRACE_ERROR( 1276 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1277 "vm_shared_region_map_file() failed kr=0x%x\n", 1278 current_thread(), p->p_pid, p->p_comm, 1279 vp, vp->v_name, kr)); 1280 switch (kr) { 1281 case KERN_INVALID_ADDRESS: 1282 error = EFAULT; 1283 break; 1284 case KERN_PROTECTION_FAILURE: 1285 error = EPERM; 1286 break; 1287 case KERN_NO_SPACE: 1288 error = ENOMEM; 1289 break; 1290 case KERN_FAILURE: 1291 case KERN_INVALID_ARGUMENT: 1292 default: 1293 error = EINVAL; 1294 break; 1295 } 1296 goto done; 1297 } 1298 1299 error = 0; 1300 1301 vnode_lock_spin(vp); 1302 1303 vp->v_flag |= VSHARED_DYLD; 1304 1305 vnode_unlock(vp); 1306 1307 /* update the vnode's access time */ 1308 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) { 1309 VATTR_INIT(&va); 1310 nanotime(&va.va_access_time); 1311 VATTR_SET_ACTIVE(&va, va_access_time); 1312 vnode_setattr(vp, &va, vfs_context_current()); 1313 } 1314 1315 if (p->p_flag & P_NOSHLIB) { 1316 /* signal that this process is now using split libraries */ 1317 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag); 1318 } 1319 1320done: 1321 if (vp != NULL) { 1322 /* 1323 * release the vnode... 1324 * ubc_map() still holds it for us in the non-error case 1325 */ 1326 (void) vnode_put(vp); 1327 vp = NULL; 1328 } 1329 if (fp != NULL) { 1330 /* release the file descriptor */ 1331 fp_drop(p, fd, fp, 0); 1332 fp = NULL; 1333 } 1334 1335 if (shared_region != NULL) { 1336 vm_shared_region_deallocate(shared_region); 1337 } 1338 1339 SHARED_REGION_TRACE_DEBUG( 1340 ("shared_region: %p [%d(%s)] <- map\n", 1341 current_thread(), p->p_pid, p->p_comm)); 1342 1343 return error; 1344} 1345 1346int 1347shared_region_map_and_slide_np( 1348 struct proc *p, 1349 struct shared_region_map_and_slide_np_args *uap, 1350 __unused int *retvalp) 1351{ 1352 struct shared_file_mapping_np *mappings; 1353 unsigned int mappings_count = uap->count; 1354 kern_return_t kr = KERN_SUCCESS; 1355 uint32_t slide = uap->slide; 1356 1357#define SFM_MAX_STACK 8 1358 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; 1359 1360 /* Is the process chrooted?? */ 1361 if (p->p_fd->fd_rdir != NULL) { 1362 kr = EINVAL; 1363 goto done; 1364 } 1365 1366 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) { 1367 if (kr == KERN_INVALID_ARGUMENT) { 1368 /* 1369 * This will happen if we request sliding again 1370 * with the same slide value that was used earlier 1371 * for the very first sliding. 1372 */ 1373 kr = KERN_SUCCESS; 1374 } 1375 goto done; 1376 } 1377 1378 if (mappings_count == 0) { 1379 SHARED_REGION_TRACE_INFO( 1380 ("shared_region: %p [%d(%s)] map(): " 1381 "no mappings\n", 1382 current_thread(), p->p_pid, p->p_comm)); 1383 kr = 0; /* no mappings: we're done ! */ 1384 goto done; 1385 } else if (mappings_count <= SFM_MAX_STACK) { 1386 mappings = &stack_mappings[0]; 1387 } else { 1388 SHARED_REGION_TRACE_ERROR( 1389 ("shared_region: %p [%d(%s)] map(): " 1390 "too many mappings (%d)\n", 1391 current_thread(), p->p_pid, p->p_comm, 1392 mappings_count)); 1393 kr = KERN_FAILURE; 1394 goto done; 1395 } 1396 1397 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) { 1398 goto done; 1399 } 1400 1401 1402 kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings, 1403 slide, 1404 uap->slide_start, uap->slide_size); 1405 if (kr != KERN_SUCCESS) { 1406 return kr; 1407 } 1408 1409done: 1410 return kr; 1411} 1412 1413/* sysctl overflow room */ 1414 1415/* vm_page_free_target is provided as a makeshift solution for applications that want to 1416 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be 1417 reclaimed. It allows the app to calculate how much memory is free outside the free target. */ 1418extern unsigned int vm_page_free_target; 1419SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED, 1420 &vm_page_free_target, 0, "Pageout daemon free target"); 1421 1422extern unsigned int vm_memory_pressure; 1423SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED, 1424 &vm_memory_pressure, 0, "Memory pressure indicator"); 1425 1426static int 1427vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS 1428{ 1429#pragma unused(oidp, arg1, arg2) 1430 unsigned int page_free_wanted; 1431 1432 page_free_wanted = mach_vm_ctl_page_free_wanted(); 1433 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted)); 1434} 1435SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted, 1436 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 1437 0, 0, vm_ctl_page_free_wanted, "I", ""); 1438 1439extern unsigned int vm_page_purgeable_count; 1440SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED, 1441 &vm_page_purgeable_count, 0, "Purgeable page count"); 1442 1443extern unsigned int vm_page_purgeable_wired_count; 1444SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED, 1445 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count"); 1446 1447extern int madvise_free_debug; 1448SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED, 1449 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)"); 1450 1451SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED, 1452 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count"); 1453SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED, 1454 &vm_page_stats_reusable.reusable_pages_success, ""); 1455SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED, 1456 &vm_page_stats_reusable.reusable_pages_failure, ""); 1457SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED, 1458 &vm_page_stats_reusable.reusable_pages_shared, ""); 1459SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1460 &vm_page_stats_reusable.all_reusable_calls, ""); 1461SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1462 &vm_page_stats_reusable.partial_reusable_calls, ""); 1463SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, 1464 &vm_page_stats_reusable.reuse_pages_success, ""); 1465SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, 1466 &vm_page_stats_reusable.reuse_pages_failure, ""); 1467SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1468 &vm_page_stats_reusable.all_reuse_calls, ""); 1469SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1470 &vm_page_stats_reusable.partial_reuse_calls, ""); 1471SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, 1472 &vm_page_stats_reusable.can_reuse_success, ""); 1473SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, 1474 &vm_page_stats_reusable.can_reuse_failure, ""); 1475SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED, 1476 &vm_page_stats_reusable.reusable_reclaimed, ""); 1477 1478 1479extern unsigned int vm_page_free_count, vm_page_speculative_count; 1480SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, ""); 1481SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, ""); 1482 1483extern unsigned int vm_page_cleaned_count; 1484SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size"); 1485 1486/* pageout counts */ 1487extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used; 1488extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative; 1489SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, ""); 1490SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, ""); 1491SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, ""); 1492SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, ""); 1493SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, ""); 1494SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, ""); 1495SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, ""); 1496 1497extern unsigned int vm_pageout_freed_from_cleaned; 1498SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, ""); 1499 1500/* counts of pages entering the cleaned queue */ 1501extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty; 1502SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */ 1503SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, ""); 1504SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, ""); 1505 1506/* counts of pages leaving the cleaned queue */ 1507extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock; 1508SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed"); 1509SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */ 1510SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated"); 1511SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated"); 1512SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated"); 1513SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated"); 1514SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)"); 1515SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)"); 1516 1517#include <kern/thread.h> 1518#include <sys/user.h> 1519 1520void vm_pageout_io_throttle(void); 1521 1522void vm_pageout_io_throttle(void) { 1523 struct uthread *uthread = get_bsdthread_info(current_thread()); 1524 1525 /* 1526 * thread is marked as a low priority I/O type 1527 * and the I/O we issued while in this cleaning operation 1528 * collided with normal I/O operations... we'll 1529 * delay in order to mitigate the impact of this 1530 * task on the normal operation of the system 1531 */ 1532 1533 if (uthread->uu_lowpri_window) { 1534 throttle_lowpri_io(1); 1535 } 1536 1537} 1538 1539int 1540vm_pressure_monitor( 1541 __unused struct proc *p, 1542 struct vm_pressure_monitor_args *uap, 1543 int *retval) 1544{ 1545 kern_return_t kr; 1546 uint32_t pages_reclaimed; 1547 uint32_t pages_wanted; 1548 1549 kr = mach_vm_pressure_monitor( 1550 (boolean_t) uap->wait_for_pressure, 1551 uap->nsecs_monitored, 1552 (uap->pages_reclaimed) ? &pages_reclaimed : NULL, 1553 &pages_wanted); 1554 1555 switch (kr) { 1556 case KERN_SUCCESS: 1557 break; 1558 case KERN_ABORTED: 1559 return EINTR; 1560 default: 1561 return EINVAL; 1562 } 1563 1564 if (uap->pages_reclaimed) { 1565 if (copyout((void *)&pages_reclaimed, 1566 uap->pages_reclaimed, 1567 sizeof (pages_reclaimed)) != 0) { 1568 return EFAULT; 1569 } 1570 } 1571 1572 *retval = (int) pages_wanted; 1573 return 0; 1574} 1575 1576int 1577kas_info(struct proc *p, 1578 struct kas_info_args *uap, 1579 int *retval __unused) 1580{ 1581#ifdef SECURE_KERNEL 1582 (void)p; 1583 (void)uap; 1584 return ENOTSUP; 1585#else /* !SECURE_KERNEL */ 1586 int selector = uap->selector; 1587 user_addr_t valuep = uap->value; 1588 user_addr_t sizep = uap->size; 1589 user_size_t size; 1590 int error; 1591 1592 if (!kauth_cred_issuser(kauth_cred_get())) { 1593 return EPERM; 1594 } 1595 1596#if CONFIG_MACF 1597 error = mac_system_check_kas_info(kauth_cred_get(), selector); 1598 if (error) { 1599 return error; 1600 } 1601#endif 1602 1603 if (IS_64BIT_PROCESS(p)) { 1604 user64_size_t size64; 1605 error = copyin(sizep, &size64, sizeof(size64)); 1606 size = (user_size_t)size64; 1607 } else { 1608 user32_size_t size32; 1609 error = copyin(sizep, &size32, sizeof(size32)); 1610 size = (user_size_t)size32; 1611 } 1612 if (error) { 1613 return error; 1614 } 1615 1616 switch (selector) { 1617 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR: 1618 { 1619 uint64_t slide = vm_kernel_slide; 1620 1621 if (sizeof(slide) != size) { 1622 return EINVAL; 1623 } 1624 1625 if (IS_64BIT_PROCESS(p)) { 1626 user64_size_t size64 = (user64_size_t)size; 1627 error = copyout(&size64, sizep, sizeof(size64)); 1628 } else { 1629 user32_size_t size32 = (user32_size_t)size; 1630 error = copyout(&size32, sizep, sizeof(size32)); 1631 } 1632 if (error) { 1633 return error; 1634 } 1635 1636 error = copyout(&slide, valuep, sizeof(slide)); 1637 if (error) { 1638 return error; 1639 } 1640 } 1641 break; 1642 default: 1643 return EINVAL; 1644 } 1645 1646 return 0; 1647#endif /* !SECURE_KERNEL */ 1648} 1649