1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Mach Operating System 30 * Copyright (c) 1987 Carnegie-Mellon University 31 * All rights reserved. The CMU software License Agreement specifies 32 * the terms and conditions for use and redistribution. 33 */ 34/* 35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce 36 * support for mandatory and extensible security protections. This notice 37 * is included in support of clause 2.2 (b) of the Apple Public License, 38 * Version 2.0. 39 */ 40 41#include <meta_features.h> 42 43#include <kern/task.h> 44#include <kern/thread.h> 45#include <kern/debug.h> 46#include <kern/lock.h> 47#include <kern/extmod_statistics.h> 48#include <mach/mach_traps.h> 49#include <mach/port.h> 50#include <mach/task.h> 51#include <mach/task_access.h> 52#include <mach/task_special_ports.h> 53#include <mach/time_value.h> 54#include <mach/vm_map.h> 55#include <mach/vm_param.h> 56#include <mach/vm_prot.h> 57 58#include <sys/file_internal.h> 59#include <sys/param.h> 60#include <sys/systm.h> 61#include <sys/dir.h> 62#include <sys/namei.h> 63#include <sys/proc_internal.h> 64#include <sys/kauth.h> 65#include <sys/vm.h> 66#include <sys/file.h> 67#include <sys/vnode_internal.h> 68#include <sys/mount.h> 69#include <sys/trace.h> 70#include <sys/kernel.h> 71#include <sys/ubc_internal.h> 72#include <sys/user.h> 73#include <sys/syslog.h> 74#include <sys/stat.h> 75#include <sys/sysproto.h> 76#include <sys/mman.h> 77#include <sys/sysctl.h> 78#include <sys/cprotect.h> 79#include <sys/kpi_socket.h> 80#include <sys/kas_info.h> 81 82#include <security/audit/audit.h> 83#include <security/mac.h> 84#include <bsm/audit_kevents.h> 85 86#include <kern/kalloc.h> 87#include <vm/vm_map.h> 88#include <vm/vm_kern.h> 89#include <vm/vm_pageout.h> 90 91#include <machine/spl.h> 92 93#include <mach/shared_region.h> 94#include <vm/vm_shared_region.h> 95 96#include <vm/vm_protos.h> 97 98#include <sys/kern_memorystatus.h> 99 100int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*); 101int _shared_region_slide(uint32_t, mach_vm_offset_t, mach_vm_size_t, mach_vm_offset_t, mach_vm_size_t, memory_object_control_t); 102int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); 103 104SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, ""); 105 106 107/* 108 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c 109 */ 110 111#if !SECURE_KERNEL 112extern int allow_stack_exec, allow_data_exec; 113 114SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, ""); 115SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, ""); 116#endif /* !SECURE_KERNEL */ 117 118static const char *prot_values[] = { 119 "none", 120 "read-only", 121 "write-only", 122 "read-write", 123 "execute-only", 124 "read-execute", 125 "write-execute", 126 "read-write-execute" 127}; 128 129void 130log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot) 131{ 132 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n", 133 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]); 134} 135 136int shared_region_unnest_logging = 1; 137 138SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED, 139 &shared_region_unnest_logging, 0, ""); 140 141int vm_shared_region_unnest_log_interval = 10; 142int shared_region_unnest_log_count_threshold = 5; 143 144/* These log rate throttling state variables aren't thread safe, but 145 * are sufficient unto the task. 146 */ 147static int64_t last_unnest_log_time = 0; 148static int shared_region_unnest_log_count = 0; 149 150void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) { 151 struct timeval tv; 152 const char *pcommstr; 153 154 if (shared_region_unnest_logging == 0) 155 return; 156 157 if (shared_region_unnest_logging == 1) { 158 microtime(&tv); 159 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) { 160 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold) 161 return; 162 } 163 else { 164 last_unnest_log_time = tv.tv_sec; 165 shared_region_unnest_log_count = 0; 166 } 167 } 168 169 pcommstr = current_proc()->p_comm; 170 171 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e); 172} 173 174int 175useracc( 176 user_addr_t addr, 177 user_size_t len, 178 int prot) 179{ 180 return (vm_map_check_protection( 181 current_map(), 182 vm_map_trunc_page(addr), vm_map_round_page(addr+len), 183 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); 184} 185 186int 187vslock( 188 user_addr_t addr, 189 user_size_t len) 190{ 191 kern_return_t kret; 192 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr), 193 vm_map_round_page(addr+len), 194 VM_PROT_READ | VM_PROT_WRITE ,FALSE); 195 196 switch (kret) { 197 case KERN_SUCCESS: 198 return (0); 199 case KERN_INVALID_ADDRESS: 200 case KERN_NO_SPACE: 201 return (ENOMEM); 202 case KERN_PROTECTION_FAILURE: 203 return (EACCES); 204 default: 205 return (EINVAL); 206 } 207} 208 209int 210vsunlock( 211 user_addr_t addr, 212 user_size_t len, 213 __unused int dirtied) 214{ 215#if FIXME /* [ */ 216 pmap_t pmap; 217 vm_page_t pg; 218 vm_map_offset_t vaddr; 219 ppnum_t paddr; 220#endif /* FIXME ] */ 221 kern_return_t kret; 222 223#if FIXME /* [ */ 224 if (dirtied) { 225 pmap = get_task_pmap(current_task()); 226 for (vaddr = vm_map_trunc_page(addr); 227 vaddr < vm_map_round_page(addr+len); 228 vaddr += PAGE_SIZE) { 229 paddr = pmap_extract(pmap, vaddr); 230 pg = PHYS_TO_VM_PAGE(paddr); 231 vm_page_set_modified(pg); 232 } 233 } 234#endif /* FIXME ] */ 235#ifdef lint 236 dirtied++; 237#endif /* lint */ 238 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr), 239 vm_map_round_page(addr+len), FALSE); 240 switch (kret) { 241 case KERN_SUCCESS: 242 return (0); 243 case KERN_INVALID_ADDRESS: 244 case KERN_NO_SPACE: 245 return (ENOMEM); 246 case KERN_PROTECTION_FAILURE: 247 return (EACCES); 248 default: 249 return (EINVAL); 250 } 251} 252 253int 254subyte( 255 user_addr_t addr, 256 int byte) 257{ 258 char character; 259 260 character = (char)byte; 261 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); 262} 263 264int 265suibyte( 266 user_addr_t addr, 267 int byte) 268{ 269 char character; 270 271 character = (char)byte; 272 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); 273} 274 275int fubyte(user_addr_t addr) 276{ 277 unsigned char byte; 278 279 if (copyin(addr, (void *) &byte, sizeof(char))) 280 return(-1); 281 return(byte); 282} 283 284int fuibyte(user_addr_t addr) 285{ 286 unsigned char byte; 287 288 if (copyin(addr, (void *) &(byte), sizeof(char))) 289 return(-1); 290 return(byte); 291} 292 293int 294suword( 295 user_addr_t addr, 296 long word) 297{ 298 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); 299} 300 301long fuword(user_addr_t addr) 302{ 303 long word = 0; 304 305 if (copyin(addr, (void *) &word, sizeof(int))) 306 return(-1); 307 return(word); 308} 309 310/* suiword and fuiword are the same as suword and fuword, respectively */ 311 312int 313suiword( 314 user_addr_t addr, 315 long word) 316{ 317 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); 318} 319 320long fuiword(user_addr_t addr) 321{ 322 long word = 0; 323 324 if (copyin(addr, (void *) &word, sizeof(int))) 325 return(-1); 326 return(word); 327} 328 329/* 330 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the 331 * fetching and setting of process-sized size_t and pointer values. 332 */ 333int 334sulong(user_addr_t addr, int64_t word) 335{ 336 337 if (IS_64BIT_PROCESS(current_proc())) { 338 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1); 339 } else { 340 return(suiword(addr, (long)word)); 341 } 342} 343 344int64_t 345fulong(user_addr_t addr) 346{ 347 int64_t longword; 348 349 if (IS_64BIT_PROCESS(current_proc())) { 350 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) 351 return(-1); 352 return(longword); 353 } else { 354 return((int64_t)fuiword(addr)); 355 } 356} 357 358int 359suulong(user_addr_t addr, uint64_t uword) 360{ 361 362 if (IS_64BIT_PROCESS(current_proc())) { 363 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1); 364 } else { 365 return(suiword(addr, (uint32_t)uword)); 366 } 367} 368 369uint64_t 370fuulong(user_addr_t addr) 371{ 372 uint64_t ulongword; 373 374 if (IS_64BIT_PROCESS(current_proc())) { 375 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) 376 return(-1ULL); 377 return(ulongword); 378 } else { 379 return((uint64_t)fuiword(addr)); 380 } 381} 382 383int 384swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval) 385{ 386 return(ENOTSUP); 387} 388 389/* 390 * pid_for_task 391 * 392 * Find the BSD process ID for the Mach task associated with the given Mach port 393 * name 394 * 395 * Parameters: args User argument descriptor (see below) 396 * 397 * Indirect parameters: args->t Mach port name 398 * args->pid Process ID (returned value; see below) 399 * 400 * Returns: KERL_SUCCESS Success 401 * KERN_FAILURE Not success 402 * 403 * Implicit returns: args->pid Process ID 404 * 405 */ 406kern_return_t 407pid_for_task( 408 struct pid_for_task_args *args) 409{ 410 mach_port_name_t t = args->t; 411 user_addr_t pid_addr = args->pid; 412 proc_t p; 413 task_t t1; 414 int pid = -1; 415 kern_return_t err = KERN_SUCCESS; 416 417 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK); 418 AUDIT_ARG(mach_port1, t); 419 420 t1 = port_name_to_task(t); 421 422 if (t1 == TASK_NULL) { 423 err = KERN_FAILURE; 424 goto pftout; 425 } else { 426 p = get_bsdtask_info(t1); 427 if (p) { 428 pid = proc_pid(p); 429 err = KERN_SUCCESS; 430 } else { 431 err = KERN_FAILURE; 432 } 433 } 434 task_deallocate(t1); 435pftout: 436 AUDIT_ARG(pid, pid); 437 (void) copyout((char *) &pid, pid_addr, sizeof(int)); 438 AUDIT_MACH_SYSCALL_EXIT(err); 439 return(err); 440} 441 442/* 443 * 444 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self 445 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication 446 * 447 */ 448static int tfp_policy = KERN_TFP_POLICY_DEFAULT; 449 450/* 451 * Routine: task_for_pid_posix_check 452 * Purpose: 453 * Verify that the current process should be allowed to 454 * get the target process's task port. This is only 455 * permitted if: 456 * - The current process is root 457 * OR all of the following are true: 458 * - The target process's real, effective, and saved uids 459 * are the same as the current proc's euid, 460 * - The target process's group set is a subset of the 461 * calling process's group set, and 462 * - The target process hasn't switched credentials. 463 * 464 * Returns: TRUE: permitted 465 * FALSE: denied 466 */ 467static int 468task_for_pid_posix_check(proc_t target) 469{ 470 kauth_cred_t targetcred, mycred; 471 uid_t myuid; 472 int allowed; 473 474 /* No task_for_pid on bad targets */ 475 if (target->p_stat == SZOMB) { 476 return FALSE; 477 } 478 479 mycred = kauth_cred_get(); 480 myuid = kauth_cred_getuid(mycred); 481 482 /* If we're running as root, the check passes */ 483 if (kauth_cred_issuser(mycred)) 484 return TRUE; 485 486 /* We're allowed to get our own task port */ 487 if (target == current_proc()) 488 return TRUE; 489 490 /* 491 * Under DENY, only root can get another proc's task port, 492 * so no more checks are needed. 493 */ 494 if (tfp_policy == KERN_TFP_POLICY_DENY) { 495 return FALSE; 496 } 497 498 targetcred = kauth_cred_proc_ref(target); 499 allowed = TRUE; 500 501 /* Do target's ruid, euid, and saved uid match my euid? */ 502 if ((kauth_cred_getuid(targetcred) != myuid) || 503 (kauth_cred_getruid(targetcred) != myuid) || 504 (kauth_cred_getsvuid(targetcred) != myuid)) { 505 allowed = FALSE; 506 goto out; 507 } 508 509 /* Are target's groups a subset of my groups? */ 510 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) || 511 allowed == 0) { 512 allowed = FALSE; 513 goto out; 514 } 515 516 /* Has target switched credentials? */ 517 if (target->p_flag & P_SUGID) { 518 allowed = FALSE; 519 goto out; 520 } 521 522out: 523 kauth_cred_unref(&targetcred); 524 return allowed; 525} 526 527/* 528 * Routine: task_for_pid 529 * Purpose: 530 * Get the task port for another "process", named by its 531 * process ID on the same host as "target_task". 532 * 533 * Only permitted to privileged processes, or processes 534 * with the same user ID. 535 * 536 * Note: if pid == 0, an error is return no matter who is calling. 537 * 538 * XXX This should be a BSD system call, not a Mach trap!!! 539 */ 540kern_return_t 541task_for_pid( 542 struct task_for_pid_args *args) 543{ 544 mach_port_name_t target_tport = args->target_tport; 545 int pid = args->pid; 546 user_addr_t task_addr = args->t; 547 proc_t p = PROC_NULL; 548 task_t t1 = TASK_NULL; 549 mach_port_name_t tret = MACH_PORT_NULL; 550 ipc_port_t tfpport; 551 void * sright; 552 int error = 0; 553 554 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID); 555 AUDIT_ARG(pid, pid); 556 AUDIT_ARG(mach_port1, target_tport); 557 558 /* Always check if pid == 0 */ 559 if (pid == 0) { 560 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 561 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 562 return(KERN_FAILURE); 563 } 564 565 t1 = port_name_to_task(target_tport); 566 if (t1 == TASK_NULL) { 567 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 568 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 569 return(KERN_FAILURE); 570 } 571 572 573 p = proc_find(pid); 574 if (p == PROC_NULL) { 575 error = KERN_FAILURE; 576 goto tfpout; 577 } 578 579#if CONFIG_AUDIT 580 AUDIT_ARG(process, p); 581#endif 582 583 if (!(task_for_pid_posix_check(p))) { 584 error = KERN_FAILURE; 585 goto tfpout; 586 } 587 588 if (p->task != TASK_NULL) { 589 /* If we aren't root and target's task access port is set... */ 590 if (!kauth_cred_issuser(kauth_cred_get()) && 591 p != current_proc() && 592 (task_get_task_access_port(p->task, &tfpport) == 0) && 593 (tfpport != IPC_PORT_NULL)) { 594 595 if (tfpport == IPC_PORT_DEAD) { 596 error = KERN_PROTECTION_FAILURE; 597 goto tfpout; 598 } 599 600 /* Call up to the task access server */ 601 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid); 602 603 if (error != MACH_MSG_SUCCESS) { 604 if (error == MACH_RCV_INTERRUPTED) 605 error = KERN_ABORTED; 606 else 607 error = KERN_FAILURE; 608 goto tfpout; 609 } 610 } 611#if CONFIG_MACF 612 error = mac_proc_check_get_task(kauth_cred_get(), p); 613 if (error) { 614 error = KERN_FAILURE; 615 goto tfpout; 616 } 617#endif 618 619 /* Grant task port access */ 620 task_reference(p->task); 621 extmod_statistics_incr_task_for_pid(p->task); 622 623 sright = (void *) convert_task_to_port(p->task); 624 tret = ipc_port_copyout_send( 625 sright, 626 get_task_ipcspace(current_task())); 627 } 628 error = KERN_SUCCESS; 629 630tfpout: 631 task_deallocate(t1); 632 AUDIT_ARG(mach_port2, tret); 633 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); 634 if (p != PROC_NULL) 635 proc_rele(p); 636 AUDIT_MACH_SYSCALL_EXIT(error); 637 return(error); 638} 639 640/* 641 * Routine: task_name_for_pid 642 * Purpose: 643 * Get the task name port for another "process", named by its 644 * process ID on the same host as "target_task". 645 * 646 * Only permitted to privileged processes, or processes 647 * with the same user ID. 648 * 649 * XXX This should be a BSD system call, not a Mach trap!!! 650 */ 651 652kern_return_t 653task_name_for_pid( 654 struct task_name_for_pid_args *args) 655{ 656 mach_port_name_t target_tport = args->target_tport; 657 int pid = args->pid; 658 user_addr_t task_addr = args->t; 659 proc_t p = PROC_NULL; 660 task_t t1; 661 mach_port_name_t tret; 662 void * sright; 663 int error = 0, refheld = 0; 664 kauth_cred_t target_cred; 665 666 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID); 667 AUDIT_ARG(pid, pid); 668 AUDIT_ARG(mach_port1, target_tport); 669 670 t1 = port_name_to_task(target_tport); 671 if (t1 == TASK_NULL) { 672 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 673 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 674 return(KERN_FAILURE); 675 } 676 677 p = proc_find(pid); 678 if (p != PROC_NULL) { 679 AUDIT_ARG(process, p); 680 target_cred = kauth_cred_proc_ref(p); 681 refheld = 1; 682 683 if ((p->p_stat != SZOMB) 684 && ((current_proc() == p) 685 || kauth_cred_issuser(kauth_cred_get()) 686 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && 687 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) { 688 689 if (p->task != TASK_NULL) { 690 task_reference(p->task); 691#if CONFIG_MACF 692 error = mac_proc_check_get_task_name(kauth_cred_get(), p); 693 if (error) { 694 task_deallocate(p->task); 695 goto noperm; 696 } 697#endif 698 sright = (void *)convert_task_name_to_port(p->task); 699 tret = ipc_port_copyout_send(sright, 700 get_task_ipcspace(current_task())); 701 } else 702 tret = MACH_PORT_NULL; 703 704 AUDIT_ARG(mach_port2, tret); 705 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); 706 task_deallocate(t1); 707 error = KERN_SUCCESS; 708 goto tnfpout; 709 } 710 } 711 712#if CONFIG_MACF 713noperm: 714#endif 715 task_deallocate(t1); 716 tret = MACH_PORT_NULL; 717 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); 718 error = KERN_FAILURE; 719tnfpout: 720 if (refheld != 0) 721 kauth_cred_unref(&target_cred); 722 if (p != PROC_NULL) 723 proc_rele(p); 724 AUDIT_MACH_SYSCALL_EXIT(error); 725 return(error); 726} 727 728kern_return_t 729pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) 730{ 731 task_t target = NULL; 732 proc_t targetproc = PROC_NULL; 733 int pid = args->pid; 734 int error = 0; 735 736#if CONFIG_MACF 737 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND); 738 if (error) { 739 error = EPERM; 740 goto out; 741 } 742#endif 743 744 if (pid == 0) { 745 error = EPERM; 746 goto out; 747 } 748 749 targetproc = proc_find(pid); 750 if (targetproc == PROC_NULL) { 751 error = ESRCH; 752 goto out; 753 } 754 755 if (!task_for_pid_posix_check(targetproc)) { 756 error = EPERM; 757 goto out; 758 } 759 760 target = targetproc->task; 761#ifndef CONFIG_EMBEDDED 762 if (target != TASK_NULL) { 763 mach_port_t tfpport; 764 765 /* If we aren't root and target's task access port is set... */ 766 if (!kauth_cred_issuser(kauth_cred_get()) && 767 targetproc != current_proc() && 768 (task_get_task_access_port(target, &tfpport) == 0) && 769 (tfpport != IPC_PORT_NULL)) { 770 771 if (tfpport == IPC_PORT_DEAD) { 772 error = EACCES; 773 goto out; 774 } 775 776 /* Call up to the task access server */ 777 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid); 778 779 if (error != MACH_MSG_SUCCESS) { 780 if (error == MACH_RCV_INTERRUPTED) 781 error = EINTR; 782 else 783 error = EPERM; 784 goto out; 785 } 786 } 787 } 788#endif 789 790 task_reference(target); 791 error = task_pidsuspend(target); 792 if (error) { 793 if (error == KERN_INVALID_ARGUMENT) { 794 error = EINVAL; 795 } else { 796 error = EPERM; 797 } 798 } 799#if CONFIG_MEMORYSTATUS 800 else { 801 memorystatus_on_suspend(pid); 802 } 803#endif 804 805 task_deallocate(target); 806 807out: 808 if (targetproc != PROC_NULL) 809 proc_rele(targetproc); 810 *ret = error; 811 return error; 812} 813 814kern_return_t 815pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) 816{ 817 task_t target = NULL; 818 proc_t targetproc = PROC_NULL; 819 int pid = args->pid; 820 int error = 0; 821 822#if CONFIG_MACF 823 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME); 824 if (error) { 825 error = EPERM; 826 goto out; 827 } 828#endif 829 830 if (pid == 0) { 831 error = EPERM; 832 goto out; 833 } 834 835 targetproc = proc_find(pid); 836 if (targetproc == PROC_NULL) { 837 error = ESRCH; 838 goto out; 839 } 840 841 if (!task_for_pid_posix_check(targetproc)) { 842 error = EPERM; 843 goto out; 844 } 845 846 target = targetproc->task; 847#ifndef CONFIG_EMBEDDED 848 if (target != TASK_NULL) { 849 mach_port_t tfpport; 850 851 /* If we aren't root and target's task access port is set... */ 852 if (!kauth_cred_issuser(kauth_cred_get()) && 853 targetproc != current_proc() && 854 (task_get_task_access_port(target, &tfpport) == 0) && 855 (tfpport != IPC_PORT_NULL)) { 856 857 if (tfpport == IPC_PORT_DEAD) { 858 error = EACCES; 859 goto out; 860 } 861 862 /* Call up to the task access server */ 863 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid); 864 865 if (error != MACH_MSG_SUCCESS) { 866 if (error == MACH_RCV_INTERRUPTED) 867 error = EINTR; 868 else 869 error = EPERM; 870 goto out; 871 } 872 } 873 } 874#endif 875 876 task_reference(target); 877 878#if CONFIG_MEMORYSTATUS 879 memorystatus_on_resume(pid); 880#endif 881 882 error = task_pidresume(target); 883 if (error) { 884 if (error == KERN_INVALID_ARGUMENT) { 885 error = EINVAL; 886 } else { 887 error = EPERM; 888 } 889 } 890 891 task_deallocate(target); 892 893out: 894 if (targetproc != PROC_NULL) 895 proc_rele(targetproc); 896 897 *ret = error; 898 return error; 899} 900 901#if CONFIG_EMBEDDED 902kern_return_t 903pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret) 904{ 905 int error = 0; 906 proc_t targetproc = PROC_NULL; 907 int pid = args->pid; 908 909#ifndef CONFIG_FREEZE 910 #pragma unused(pid) 911#else 912 913#if CONFIG_MACF 914 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE); 915 if (error) { 916 error = EPERM; 917 goto out; 918 } 919#endif 920 921 /* 922 * The only accepted pid value here is currently -1, since we just kick off the freeze thread 923 * here - individual ids aren't required. However, it's intended that that this call is to change 924 * in the future to initiate freeze of individual processes. In anticipation, we'll obtain the 925 * process handle for potentially valid values and call task_for_pid_posix_check(); this way, everything 926 * is validated correctly and set for further refactoring. See <rdar://problem/7839708> for more details. 927 */ 928 if (pid >= 0) { 929 targetproc = proc_find(pid); 930 if (targetproc == PROC_NULL) { 931 error = ESRCH; 932 goto out; 933 } 934 935 if (!task_for_pid_posix_check(targetproc)) { 936 error = EPERM; 937 goto out; 938 } 939 } 940 941 if (pid == -1) { 942 memorystatus_on_inactivity(pid); 943 } else { 944 error = EPERM; 945 } 946 947out: 948 949#endif /* CONFIG_FREEZE */ 950 951 if (targetproc != PROC_NULL) 952 proc_rele(targetproc); 953 *ret = error; 954 return error; 955} 956 957int 958pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret) 959{ 960 int error = 0; 961 proc_t targetproc = PROC_NULL; 962 struct filedesc *fdp; 963 struct fileproc *fp; 964 int pid = args->pid; 965 int level = args->level; 966 int i; 967 968 if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC && 969 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL) 970 { 971 error = EINVAL; 972 goto out; 973 } 974 975#if CONFIG_MACF 976 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS); 977 if (error) { 978 error = EPERM; 979 goto out; 980 } 981#endif 982 983 targetproc = proc_find(pid); 984 if (targetproc == PROC_NULL) { 985 error = ESRCH; 986 goto out; 987 } 988 989 if (!task_for_pid_posix_check(targetproc)) { 990 error = EPERM; 991 goto out; 992 } 993 994 proc_fdlock(targetproc); 995 fdp = targetproc->p_fd; 996 997 for (i = 0; i < fdp->fd_nfiles; i++) { 998 struct socket *sockp; 999 1000 fp = fdp->fd_ofiles[i]; 1001 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 || 1002 fp->f_fglob->fg_type != DTYPE_SOCKET) 1003 { 1004 continue; 1005 } 1006 1007 sockp = (struct socket *)fp->f_fglob->fg_data; 1008 1009 /* Call networking stack with socket and level */ 1010 (void) socket_defunct(targetproc, sockp, level); 1011 } 1012 1013 proc_fdunlock(targetproc); 1014 1015out: 1016 if (targetproc != PROC_NULL) 1017 proc_rele(targetproc); 1018 *ret = error; 1019 return error; 1020} 1021#endif /* CONFIG_EMBEDDED */ 1022 1023static int 1024sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, 1025 __unused int arg2, struct sysctl_req *req) 1026{ 1027 int error = 0; 1028 int new_value; 1029 1030 error = SYSCTL_OUT(req, arg1, sizeof(int)); 1031 if (error || req->newptr == USER_ADDR_NULL) 1032 return(error); 1033 1034 if (!is_suser()) 1035 return(EPERM); 1036 1037 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { 1038 goto out; 1039 } 1040 if ((new_value == KERN_TFP_POLICY_DENY) 1041 || (new_value == KERN_TFP_POLICY_DEFAULT)) 1042 tfp_policy = new_value; 1043 else 1044 error = EINVAL; 1045out: 1046 return(error); 1047 1048} 1049 1050#if defined(SECURE_KERNEL) 1051static int kern_secure_kernel = 1; 1052#else 1053static int kern_secure_kernel = 0; 1054#endif 1055 1056SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, ""); 1057 1058SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp"); 1059SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 1060 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy"); 1061 1062SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED, 1063 &shared_region_trace_level, 0, ""); 1064SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED, 1065 &shared_region_version, 0, ""); 1066SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED, 1067 &shared_region_persistence, 0, ""); 1068 1069/* 1070 * shared_region_check_np: 1071 * 1072 * This system call is intended for dyld. 1073 * 1074 * dyld calls this when any process starts to see if the process's shared 1075 * region is already set up and ready to use. 1076 * This call returns the base address of the first mapping in the 1077 * process's shared region's first mapping. 1078 * dyld will then check what's mapped at that address. 1079 * 1080 * If the shared region is empty, dyld will then attempt to map the shared 1081 * cache file in the shared region via the shared_region_map_np() system call. 1082 * 1083 * If something's already mapped in the shared region, dyld will check if it 1084 * matches the shared cache it would like to use for that process. 1085 * If it matches, evrything's ready and the process can proceed and use the 1086 * shared region. 1087 * If it doesn't match, dyld will unmap the shared region and map the shared 1088 * cache into the process's address space via mmap(). 1089 * 1090 * ERROR VALUES 1091 * EINVAL no shared region 1092 * ENOMEM shared region is empty 1093 * EFAULT bad address for "start_address" 1094 */ 1095int 1096shared_region_check_np( 1097 __unused struct proc *p, 1098 struct shared_region_check_np_args *uap, 1099 __unused int *retvalp) 1100{ 1101 vm_shared_region_t shared_region; 1102 mach_vm_offset_t start_address = 0; 1103 int error; 1104 kern_return_t kr; 1105 1106 SHARED_REGION_TRACE_DEBUG( 1107 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n", 1108 current_thread(), p->p_pid, p->p_comm, 1109 (uint64_t)uap->start_address)); 1110 1111 /* retrieve the current tasks's shared region */ 1112 shared_region = vm_shared_region_get(current_task()); 1113 if (shared_region != NULL) { 1114 /* retrieve address of its first mapping... */ 1115 kr = vm_shared_region_start_address(shared_region, 1116 &start_address); 1117 if (kr != KERN_SUCCESS) { 1118 error = ENOMEM; 1119 } else { 1120 /* ... and give it to the caller */ 1121 error = copyout(&start_address, 1122 (user_addr_t) uap->start_address, 1123 sizeof (start_address)); 1124 if (error) { 1125 SHARED_REGION_TRACE_ERROR( 1126 ("shared_region: %p [%d(%s)] " 1127 "check_np(0x%llx) " 1128 "copyout(0x%llx) error %d\n", 1129 current_thread(), p->p_pid, p->p_comm, 1130 (uint64_t)uap->start_address, (uint64_t)start_address, 1131 error)); 1132 } 1133 } 1134 vm_shared_region_deallocate(shared_region); 1135 } else { 1136 /* no shared region ! */ 1137 error = EINVAL; 1138 } 1139 1140 SHARED_REGION_TRACE_DEBUG( 1141 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n", 1142 current_thread(), p->p_pid, p->p_comm, 1143 (uint64_t)uap->start_address, (uint64_t)start_address, error)); 1144 1145 return error; 1146} 1147 1148 1149int 1150shared_region_copyin_mappings( 1151 struct proc *p, 1152 user_addr_t user_mappings, 1153 unsigned int mappings_count, 1154 struct shared_file_mapping_np *mappings) 1155{ 1156 int error = 0; 1157 vm_size_t mappings_size = 0; 1158 1159 /* get the list of mappings the caller wants us to establish */ 1160 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); 1161 error = copyin(user_mappings, 1162 mappings, 1163 mappings_size); 1164 if (error) { 1165 SHARED_REGION_TRACE_ERROR( 1166 ("shared_region: %p [%d(%s)] map(): " 1167 "copyin(0x%llx, %d) failed (error=%d)\n", 1168 current_thread(), p->p_pid, p->p_comm, 1169 (uint64_t)user_mappings, mappings_count, error)); 1170 } 1171 return error; 1172} 1173/* 1174 * shared_region_map_np() 1175 * 1176 * This system call is intended for dyld. 1177 * 1178 * dyld uses this to map a shared cache file into a shared region. 1179 * This is usually done only the first time a shared cache is needed. 1180 * Subsequent processes will just use the populated shared region without 1181 * requiring any further setup. 1182 */ 1183int 1184_shared_region_map( 1185 struct proc *p, 1186 int fd, 1187 uint32_t mappings_count, 1188 struct shared_file_mapping_np *mappings, 1189 memory_object_control_t *sr_file_control, 1190 struct shared_file_mapping_np *mapping_to_slide) 1191{ 1192 int error; 1193 kern_return_t kr; 1194 struct fileproc *fp; 1195 struct vnode *vp, *root_vp; 1196 struct vnode_attr va; 1197 off_t fs; 1198 memory_object_size_t file_size; 1199 vm_prot_t maxprot = VM_PROT_ALL; 1200 memory_object_control_t file_control; 1201 struct vm_shared_region *shared_region; 1202 1203 SHARED_REGION_TRACE_DEBUG( 1204 ("shared_region: %p [%d(%s)] -> map\n", 1205 current_thread(), p->p_pid, p->p_comm)); 1206 1207 shared_region = NULL; 1208 fp = NULL; 1209 vp = NULL; 1210 1211 /* get file structure from file descriptor */ 1212 error = fp_lookup(p, fd, &fp, 0); 1213 if (error) { 1214 SHARED_REGION_TRACE_ERROR( 1215 ("shared_region: %p [%d(%s)] map: " 1216 "fd=%d lookup failed (error=%d)\n", 1217 current_thread(), p->p_pid, p->p_comm, fd, error)); 1218 goto done; 1219 } 1220 1221 /* make sure we're attempting to map a vnode */ 1222 if (fp->f_fglob->fg_type != DTYPE_VNODE) { 1223 SHARED_REGION_TRACE_ERROR( 1224 ("shared_region: %p [%d(%s)] map: " 1225 "fd=%d not a vnode (type=%d)\n", 1226 current_thread(), p->p_pid, p->p_comm, 1227 fd, fp->f_fglob->fg_type)); 1228 error = EINVAL; 1229 goto done; 1230 } 1231 1232 /* we need at least read permission on the file */ 1233 if (! (fp->f_fglob->fg_flag & FREAD)) { 1234 SHARED_REGION_TRACE_ERROR( 1235 ("shared_region: %p [%d(%s)] map: " 1236 "fd=%d not readable\n", 1237 current_thread(), p->p_pid, p->p_comm, fd)); 1238 error = EPERM; 1239 goto done; 1240 } 1241 1242 /* get vnode from file structure */ 1243 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data); 1244 if (error) { 1245 SHARED_REGION_TRACE_ERROR( 1246 ("shared_region: %p [%d(%s)] map: " 1247 "fd=%d getwithref failed (error=%d)\n", 1248 current_thread(), p->p_pid, p->p_comm, fd, error)); 1249 goto done; 1250 } 1251 vp = (struct vnode *) fp->f_fglob->fg_data; 1252 1253 /* make sure the vnode is a regular file */ 1254 if (vp->v_type != VREG) { 1255 SHARED_REGION_TRACE_ERROR( 1256 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1257 "not a file (type=%d)\n", 1258 current_thread(), p->p_pid, p->p_comm, 1259 vp, vp->v_name, vp->v_type)); 1260 error = EINVAL; 1261 goto done; 1262 } 1263 1264#if CONFIG_MACF 1265 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), 1266 fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot); 1267 if (error) { 1268 goto done; 1269 } 1270#endif /* MAC */ 1271 1272 /* make sure vnode is on the process's root volume */ 1273 root_vp = p->p_fd->fd_rdir; 1274 if (root_vp == NULL) { 1275 root_vp = rootvnode; 1276 } else { 1277 /* 1278 * Chroot-ed processes can't use the shared_region. 1279 */ 1280 error = EINVAL; 1281 goto done; 1282 } 1283 1284 if (vp->v_mount != root_vp->v_mount) { 1285 SHARED_REGION_TRACE_ERROR( 1286 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1287 "not on process's root volume\n", 1288 current_thread(), p->p_pid, p->p_comm, 1289 vp, vp->v_name)); 1290 error = EPERM; 1291 goto done; 1292 } 1293 1294 /* make sure vnode is owned by "root" */ 1295 VATTR_INIT(&va); 1296 VATTR_WANTED(&va, va_uid); 1297 error = vnode_getattr(vp, &va, vfs_context_current()); 1298 if (error) { 1299 SHARED_REGION_TRACE_ERROR( 1300 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1301 "vnode_getattr(%p) failed (error=%d)\n", 1302 current_thread(), p->p_pid, p->p_comm, 1303 vp, vp->v_name, vp, error)); 1304 goto done; 1305 } 1306 if (va.va_uid != 0) { 1307 SHARED_REGION_TRACE_ERROR( 1308 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1309 "owned by uid=%d instead of 0\n", 1310 current_thread(), p->p_pid, p->p_comm, 1311 vp, vp->v_name, va.va_uid)); 1312 error = EPERM; 1313 goto done; 1314 } 1315 1316 /* get vnode size */ 1317 error = vnode_size(vp, &fs, vfs_context_current()); 1318 if (error) { 1319 SHARED_REGION_TRACE_ERROR( 1320 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1321 "vnode_size(%p) failed (error=%d)\n", 1322 current_thread(), p->p_pid, p->p_comm, 1323 vp, vp->v_name, vp, error)); 1324 goto done; 1325 } 1326 file_size = fs; 1327 1328 /* get the file's memory object handle */ 1329 file_control = ubc_getobject(vp, UBC_HOLDOBJECT); 1330 if (file_control == MEMORY_OBJECT_CONTROL_NULL) { 1331 SHARED_REGION_TRACE_ERROR( 1332 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1333 "no memory object\n", 1334 current_thread(), p->p_pid, p->p_comm, 1335 vp, vp->v_name)); 1336 error = EINVAL; 1337 goto done; 1338 } 1339 1340 if (sr_file_control != NULL) { 1341 *sr_file_control = file_control; 1342 } 1343 1344 1345 1346 /* get the process's shared region (setup in vm_map_exec()) */ 1347 shared_region = vm_shared_region_get(current_task()); 1348 if (shared_region == NULL) { 1349 SHARED_REGION_TRACE_ERROR( 1350 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1351 "no shared region\n", 1352 current_thread(), p->p_pid, p->p_comm, 1353 vp, vp->v_name)); 1354 goto done; 1355 } 1356 1357 /* map the file into that shared region's submap */ 1358 kr = vm_shared_region_map_file(shared_region, 1359 mappings_count, 1360 mappings, 1361 file_control, 1362 file_size, 1363 (void *) p->p_fd->fd_rdir, 1364 mapping_to_slide); 1365 if (kr != KERN_SUCCESS) { 1366 SHARED_REGION_TRACE_ERROR( 1367 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1368 "vm_shared_region_map_file() failed kr=0x%x\n", 1369 current_thread(), p->p_pid, p->p_comm, 1370 vp, vp->v_name, kr)); 1371 switch (kr) { 1372 case KERN_INVALID_ADDRESS: 1373 error = EFAULT; 1374 break; 1375 case KERN_PROTECTION_FAILURE: 1376 error = EPERM; 1377 break; 1378 case KERN_NO_SPACE: 1379 error = ENOMEM; 1380 break; 1381 case KERN_FAILURE: 1382 case KERN_INVALID_ARGUMENT: 1383 default: 1384 error = EINVAL; 1385 break; 1386 } 1387 goto done; 1388 } 1389 1390 error = 0; 1391 1392 vnode_lock_spin(vp); 1393 1394 vp->v_flag |= VSHARED_DYLD; 1395 1396 vnode_unlock(vp); 1397 1398 /* update the vnode's access time */ 1399 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) { 1400 VATTR_INIT(&va); 1401 nanotime(&va.va_access_time); 1402 VATTR_SET_ACTIVE(&va, va_access_time); 1403 vnode_setattr(vp, &va, vfs_context_current()); 1404 } 1405 1406 if (p->p_flag & P_NOSHLIB) { 1407 /* signal that this process is now using split libraries */ 1408 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag); 1409 } 1410 1411done: 1412 if (vp != NULL) { 1413 /* 1414 * release the vnode... 1415 * ubc_map() still holds it for us in the non-error case 1416 */ 1417 (void) vnode_put(vp); 1418 vp = NULL; 1419 } 1420 if (fp != NULL) { 1421 /* release the file descriptor */ 1422 fp_drop(p, fd, fp, 0); 1423 fp = NULL; 1424 } 1425 1426 if (shared_region != NULL) { 1427 vm_shared_region_deallocate(shared_region); 1428 } 1429 1430 SHARED_REGION_TRACE_DEBUG( 1431 ("shared_region: %p [%d(%s)] <- map\n", 1432 current_thread(), p->p_pid, p->p_comm)); 1433 1434 return error; 1435} 1436 1437int 1438_shared_region_slide(uint32_t slide, 1439 mach_vm_offset_t entry_start_address, 1440 mach_vm_size_t entry_size, 1441 mach_vm_offset_t slide_start, 1442 mach_vm_size_t slide_size, 1443 memory_object_control_t sr_file_control) 1444{ 1445 void *slide_info_entry = NULL; 1446 int error; 1447 1448 if((error = vm_shared_region_slide_init(slide_size, entry_start_address, entry_size, slide, sr_file_control))) { 1449 printf("slide_info initialization failed with kr=%d\n", error); 1450 goto done; 1451 } 1452 1453 slide_info_entry = vm_shared_region_get_slide_info_entry(); 1454 if (slide_info_entry == NULL){ 1455 error = EFAULT; 1456 } else { 1457 error = copyin((user_addr_t)slide_start, 1458 slide_info_entry, 1459 (vm_size_t)slide_size); 1460 } 1461 if (error) { 1462 goto done; 1463 } 1464 1465 if (vm_shared_region_slide_sanity_check() != KERN_SUCCESS) { 1466 error = EFAULT; 1467 printf("Sanity Check failed for slide_info\n"); 1468 } else { 1469#if DEBUG 1470 printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n", 1471 (void*)(uintptr_t)entry_start_address, 1472 (unsigned long)entry_size, 1473 (unsigned long)slide_size); 1474#endif 1475 } 1476done: 1477 return error; 1478} 1479 1480int 1481shared_region_map_and_slide_np( 1482 struct proc *p, 1483 struct shared_region_map_and_slide_np_args *uap, 1484 __unused int *retvalp) 1485{ 1486 struct shared_file_mapping_np mapping_to_slide; 1487 struct shared_file_mapping_np *mappings; 1488 unsigned int mappings_count = uap->count; 1489 1490 memory_object_control_t sr_file_control; 1491 kern_return_t kr = KERN_SUCCESS; 1492 uint32_t slide = uap->slide; 1493 1494#define SFM_MAX_STACK 8 1495 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; 1496 1497 /* Is the process chrooted?? */ 1498 if (p->p_fd->fd_rdir != NULL) { 1499 kr = EINVAL; 1500 goto done; 1501 } 1502 1503 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) { 1504 if (kr == KERN_INVALID_ARGUMENT) { 1505 /* 1506 * This will happen if we request sliding again 1507 * with the same slide value that was used earlier 1508 * for the very first sliding. 1509 */ 1510 kr = KERN_SUCCESS; 1511 } 1512 goto done; 1513 } 1514 1515 if (mappings_count == 0) { 1516 SHARED_REGION_TRACE_INFO( 1517 ("shared_region: %p [%d(%s)] map(): " 1518 "no mappings\n", 1519 current_thread(), p->p_pid, p->p_comm)); 1520 kr = 0; /* no mappings: we're done ! */ 1521 goto done; 1522 } else if (mappings_count <= SFM_MAX_STACK) { 1523 mappings = &stack_mappings[0]; 1524 } else { 1525 SHARED_REGION_TRACE_ERROR( 1526 ("shared_region: %p [%d(%s)] map(): " 1527 "too many mappings (%d)\n", 1528 current_thread(), p->p_pid, p->p_comm, 1529 mappings_count)); 1530 kr = KERN_FAILURE; 1531 goto done; 1532 } 1533 1534 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) { 1535 goto done; 1536 } 1537 1538 1539 kr = _shared_region_map(p, uap->fd, mappings_count, mappings, &sr_file_control, &mapping_to_slide); 1540 if (kr != KERN_SUCCESS) { 1541 return kr; 1542 } 1543 1544 if (slide) { 1545 kr = _shared_region_slide(slide, 1546 mapping_to_slide.sfm_file_offset, 1547 mapping_to_slide.sfm_size, 1548 uap->slide_start, 1549 uap->slide_size, 1550 sr_file_control); 1551 if (kr != KERN_SUCCESS) { 1552 vm_shared_region_undo_mappings(NULL, 0, mappings, mappings_count); 1553 return kr; 1554 } 1555 } 1556done: 1557 return kr; 1558} 1559 1560/* sysctl overflow room */ 1561 1562/* vm_page_free_target is provided as a makeshift solution for applications that want to 1563 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be 1564 reclaimed. It allows the app to calculate how much memory is free outside the free target. */ 1565extern unsigned int vm_page_free_target; 1566SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED, 1567 &vm_page_free_target, 0, "Pageout daemon free target"); 1568 1569extern unsigned int vm_memory_pressure; 1570SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED, 1571 &vm_memory_pressure, 0, "Memory pressure indicator"); 1572 1573static int 1574vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS 1575{ 1576#pragma unused(oidp, arg1, arg2) 1577 unsigned int page_free_wanted; 1578 1579 page_free_wanted = mach_vm_ctl_page_free_wanted(); 1580 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted)); 1581} 1582SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted, 1583 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 1584 0, 0, vm_ctl_page_free_wanted, "I", ""); 1585 1586extern unsigned int vm_page_purgeable_count; 1587SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED, 1588 &vm_page_purgeable_count, 0, "Purgeable page count"); 1589 1590extern unsigned int vm_page_purgeable_wired_count; 1591SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED, 1592 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count"); 1593 1594SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED, 1595 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count"); 1596SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED, 1597 &vm_page_stats_reusable.reusable_pages_success, ""); 1598SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED, 1599 &vm_page_stats_reusable.reusable_pages_failure, ""); 1600SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED, 1601 &vm_page_stats_reusable.reusable_pages_shared, ""); 1602SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1603 &vm_page_stats_reusable.all_reusable_calls, ""); 1604SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1605 &vm_page_stats_reusable.partial_reusable_calls, ""); 1606SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, 1607 &vm_page_stats_reusable.reuse_pages_success, ""); 1608SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, 1609 &vm_page_stats_reusable.reuse_pages_failure, ""); 1610SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1611 &vm_page_stats_reusable.all_reuse_calls, ""); 1612SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1613 &vm_page_stats_reusable.partial_reuse_calls, ""); 1614SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, 1615 &vm_page_stats_reusable.can_reuse_success, ""); 1616SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, 1617 &vm_page_stats_reusable.can_reuse_failure, ""); 1618 1619 1620extern unsigned int vm_page_free_count, vm_page_speculative_count; 1621SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, ""); 1622SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, ""); 1623 1624extern unsigned int vm_page_cleaned_count; 1625SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size"); 1626 1627/* pageout counts */ 1628extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used; 1629extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative; 1630SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, ""); 1631SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, ""); 1632SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, ""); 1633SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, ""); 1634SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, ""); 1635SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, ""); 1636SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, ""); 1637 1638extern unsigned int vm_pageout_freed_from_cleaned; 1639SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, ""); 1640 1641/* counts of pages entering the cleaned queue */ 1642extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty; 1643SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */ 1644SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, ""); 1645SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, ""); 1646 1647/* counts of pages leaving the cleaned queue */ 1648extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock; 1649SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed"); 1650SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */ 1651SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated"); 1652SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated"); 1653SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated"); 1654SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated"); 1655SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)"); 1656SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)"); 1657 1658#include <kern/thread.h> 1659#include <sys/user.h> 1660 1661void vm_pageout_io_throttle(void); 1662 1663void vm_pageout_io_throttle(void) { 1664 struct uthread *uthread = get_bsdthread_info(current_thread()); 1665 1666 /* 1667 * thread is marked as a low priority I/O type 1668 * and the I/O we issued while in this cleaning operation 1669 * collided with normal I/O operations... we'll 1670 * delay in order to mitigate the impact of this 1671 * task on the normal operation of the system 1672 */ 1673 1674 if (uthread->uu_lowpri_window) { 1675 throttle_lowpri_io(TRUE); 1676 } 1677 1678} 1679 1680int 1681vm_pressure_monitor( 1682 __unused struct proc *p, 1683 struct vm_pressure_monitor_args *uap, 1684 int *retval) 1685{ 1686 kern_return_t kr; 1687 uint32_t pages_reclaimed; 1688 uint32_t pages_wanted; 1689 1690 kr = mach_vm_pressure_monitor( 1691 (boolean_t) uap->wait_for_pressure, 1692 uap->nsecs_monitored, 1693 (uap->pages_reclaimed) ? &pages_reclaimed : NULL, 1694 &pages_wanted); 1695 1696 switch (kr) { 1697 case KERN_SUCCESS: 1698 break; 1699 case KERN_ABORTED: 1700 return EINTR; 1701 default: 1702 return EINVAL; 1703 } 1704 1705 if (uap->pages_reclaimed) { 1706 if (copyout((void *)&pages_reclaimed, 1707 uap->pages_reclaimed, 1708 sizeof (pages_reclaimed)) != 0) { 1709 return EFAULT; 1710 } 1711 } 1712 1713 *retval = (int) pages_wanted; 1714 return 0; 1715} 1716 1717int 1718kas_info(struct proc *p, 1719 struct kas_info_args *uap, 1720 int *retval __unused) 1721{ 1722#ifdef SECURE_KERNEL 1723 (void)p; 1724 (void)uap; 1725 return ENOTSUP; 1726#else /* !SECURE_KERNEL */ 1727 int selector = uap->selector; 1728 user_addr_t valuep = uap->value; 1729 user_addr_t sizep = uap->size; 1730 user_size_t size; 1731 int error; 1732 1733 if (!kauth_cred_issuser(kauth_cred_get())) { 1734 return EPERM; 1735 } 1736 1737#if CONFIG_MACF 1738 error = mac_system_check_kas_info(kauth_cred_get(), selector); 1739 if (error) { 1740 return error; 1741 } 1742#endif 1743 1744 if (IS_64BIT_PROCESS(p)) { 1745 user64_size_t size64; 1746 error = copyin(sizep, &size64, sizeof(size64)); 1747 size = (user_size_t)size64; 1748 } else { 1749 user32_size_t size32; 1750 error = copyin(sizep, &size32, sizeof(size32)); 1751 size = (user_size_t)size32; 1752 } 1753 if (error) { 1754 return error; 1755 } 1756 1757 switch (selector) { 1758 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR: 1759 { 1760 uint64_t slide = vm_kernel_slide; 1761 1762 if (sizeof(slide) != size) { 1763 return EINVAL; 1764 } 1765 1766 if (IS_64BIT_PROCESS(p)) { 1767 user64_size_t size64 = (user64_size_t)size; 1768 error = copyout(&size64, sizep, sizeof(size64)); 1769 } else { 1770 user32_size_t size32 = (user32_size_t)size; 1771 error = copyout(&size32, sizep, sizeof(size32)); 1772 } 1773 if (error) { 1774 return error; 1775 } 1776 1777 error = copyout(&slide, valuep, sizeof(slide)); 1778 if (error) { 1779 return error; 1780 } 1781 } 1782 break; 1783 default: 1784 return EINVAL; 1785 } 1786 1787 return 0; 1788#endif /* !SECURE_KERNEL */ 1789} 1790