1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Mach Operating System 30 * Copyright (c) 1987 Carnegie-Mellon University 31 * All rights reserved. The CMU software License Agreement specifies 32 * the terms and conditions for use and redistribution. 33 */ 34/* 35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce 36 * support for mandatory and extensible security protections. This notice 37 * is included in support of clause 2.2 (b) of the Apple Public License, 38 * Version 2.0. 39 */ 40 41#include <meta_features.h> 42 43#include <vm/vm_options.h> 44 45#include <kern/task.h> 46#include <kern/thread.h> 47#include <kern/debug.h> 48#include <kern/extmod_statistics.h> 49#include <mach/mach_traps.h> 50#include <mach/port.h> 51#include <mach/task.h> 52#include <mach/task_access.h> 53#include <mach/task_special_ports.h> 54#include <mach/time_value.h> 55#include <mach/vm_map.h> 56#include <mach/vm_param.h> 57#include <mach/vm_prot.h> 58 59#include <sys/file_internal.h> 60#include <sys/param.h> 61#include <sys/systm.h> 62#include <sys/dir.h> 63#include <sys/namei.h> 64#include <sys/proc_internal.h> 65#include <sys/kauth.h> 66#include <sys/vm.h> 67#include <sys/file.h> 68#include <sys/vnode_internal.h> 69#include <sys/mount.h> 70#include <sys/trace.h> 71#include <sys/kernel.h> 72#include <sys/ubc_internal.h> 73#include <sys/user.h> 74#include <sys/syslog.h> 75#include <sys/stat.h> 76#include <sys/sysproto.h> 77#include <sys/mman.h> 78#include <sys/sysctl.h> 79#include <sys/cprotect.h> 80#include <sys/kpi_socket.h> 81#include <sys/kas_info.h> 82#include <sys/socket.h> 83#include <sys/socketvar.h> 84 85#include <security/audit/audit.h> 86#include <security/mac.h> 87#include <bsm/audit_kevents.h> 88 89#include <kern/kalloc.h> 90#include <vm/vm_map.h> 91#include <vm/vm_kern.h> 92#include <vm/vm_pageout.h> 93 94#include <machine/spl.h> 95 96#include <mach/shared_region.h> 97#include <vm/vm_shared_region.h> 98 99#include <vm/vm_protos.h> 100 101#include <sys/kern_memorystatus.h> 102 103 104int _shared_region_map_and_slide(struct proc*, int, unsigned int, struct shared_file_mapping_np*, uint32_t, user_addr_t, user_addr_t); 105int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *); 106 107SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor, 0, ""); 108SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_compressor_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_compressor_pages, 0, ""); 109SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate, 0, ""); 110SYSCTL_INT(_vm, OID_AUTO, vm_do_collapse_terminate_failure, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.do_collapse_terminate_failure, 0, ""); 111SYSCTL_INT(_vm, OID_AUTO, vm_should_cow_but_wired, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.should_cow_but_wired, 0, ""); 112SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow, 0, ""); 113SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_extra_cow_pages, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_extra_cow_pages, 0, ""); 114SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_write, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_write, 0, ""); 115SYSCTL_INT(_vm, OID_AUTO, vm_create_upl_lookup_failure_copy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_counters.create_upl_lookup_failure_copy, 0, ""); 116#if VM_SCAN_FOR_SHADOW_CHAIN 117static int vm_shadow_max_enabled = 0; /* Disabled by default */ 118extern int proc_shadow_max(void); 119static int 120vm_shadow_max SYSCTL_HANDLER_ARGS 121{ 122#pragma unused(arg1, arg2, oidp) 123 int value = 0; 124 125 if (vm_shadow_max_enabled) 126 value = proc_shadow_max(); 127 128 return SYSCTL_OUT(req, &value, sizeof(value)); 129} 130SYSCTL_PROC(_vm, OID_AUTO, vm_shadow_max, CTLTYPE_INT|CTLFLAG_RD|CTLFLAG_LOCKED, 131 0, 0, &vm_shadow_max, "I", ""); 132 133SYSCTL_INT(_vm, OID_AUTO, vm_shadow_max_enabled, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_shadow_max_enabled, 0, ""); 134 135#endif /* VM_SCAN_FOR_SHADOW_CHAIN */ 136 137SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, ""); 138 139__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__( 140 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid); 141/* 142 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c 143 */ 144 145#ifndef SECURE_KERNEL 146extern int allow_stack_exec, allow_data_exec; 147 148SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, ""); 149SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, ""); 150#endif /* !SECURE_KERNEL */ 151 152static const char *prot_values[] = { 153 "none", 154 "read-only", 155 "write-only", 156 "read-write", 157 "execute-only", 158 "read-execute", 159 "write-execute", 160 "read-write-execute" 161}; 162 163void 164log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot) 165{ 166 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n", 167 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]); 168} 169 170int shared_region_unnest_logging = 1; 171 172SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED, 173 &shared_region_unnest_logging, 0, ""); 174 175int vm_shared_region_unnest_log_interval = 10; 176int shared_region_unnest_log_count_threshold = 5; 177 178/* 179 * Shared cache path enforcement. 180 */ 181 182static int scdir_enforce = 1; 183static char scdir_path[] = "/var/db/dyld/"; 184 185#ifndef SECURE_KERNEL 186SYSCTL_INT(_vm, OID_AUTO, enforce_shared_cache_dir, CTLFLAG_RW | CTLFLAG_LOCKED, &scdir_enforce, 0, ""); 187#endif 188 189/* These log rate throttling state variables aren't thread safe, but 190 * are sufficient unto the task. 191 */ 192static int64_t last_unnest_log_time = 0; 193static int shared_region_unnest_log_count = 0; 194 195void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) { 196 struct timeval tv; 197 const char *pcommstr; 198 199 if (shared_region_unnest_logging == 0) 200 return; 201 202 if (shared_region_unnest_logging == 1) { 203 microtime(&tv); 204 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) { 205 if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold) 206 return; 207 } 208 else { 209 last_unnest_log_time = tv.tv_sec; 210 shared_region_unnest_log_count = 0; 211 } 212 } 213 214 pcommstr = current_proc()->p_comm; 215 216 printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e); 217} 218 219int 220useracc( 221 user_addr_t addr, 222 user_size_t len, 223 int prot) 224{ 225 vm_map_t map; 226 227 map = current_map(); 228 return (vm_map_check_protection( 229 map, 230 vm_map_trunc_page(addr, 231 vm_map_page_mask(map)), 232 vm_map_round_page(addr+len, 233 vm_map_page_mask(map)), 234 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); 235} 236 237int 238vslock( 239 user_addr_t addr, 240 user_size_t len) 241{ 242 kern_return_t kret; 243 vm_map_t map; 244 245 map = current_map(); 246 kret = vm_map_wire(map, 247 vm_map_trunc_page(addr, 248 vm_map_page_mask(map)), 249 vm_map_round_page(addr+len, 250 vm_map_page_mask(map)), 251 VM_PROT_READ | VM_PROT_WRITE, 252 FALSE); 253 254 switch (kret) { 255 case KERN_SUCCESS: 256 return (0); 257 case KERN_INVALID_ADDRESS: 258 case KERN_NO_SPACE: 259 return (ENOMEM); 260 case KERN_PROTECTION_FAILURE: 261 return (EACCES); 262 default: 263 return (EINVAL); 264 } 265} 266 267int 268vsunlock( 269 user_addr_t addr, 270 user_size_t len, 271 __unused int dirtied) 272{ 273#if FIXME /* [ */ 274 pmap_t pmap; 275 vm_page_t pg; 276 vm_map_offset_t vaddr; 277 ppnum_t paddr; 278#endif /* FIXME ] */ 279 kern_return_t kret; 280 vm_map_t map; 281 282 map = current_map(); 283 284#if FIXME /* [ */ 285 if (dirtied) { 286 pmap = get_task_pmap(current_task()); 287 for (vaddr = vm_map_trunc_page(addr, PAGE_MASK); 288 vaddr < vm_map_round_page(addr+len, PAGE_MASK); 289 vaddr += PAGE_SIZE) { 290 paddr = pmap_extract(pmap, vaddr); 291 pg = PHYS_TO_VM_PAGE(paddr); 292 vm_page_set_modified(pg); 293 } 294 } 295#endif /* FIXME ] */ 296#ifdef lint 297 dirtied++; 298#endif /* lint */ 299 kret = vm_map_unwire(map, 300 vm_map_trunc_page(addr, 301 vm_map_page_mask(map)), 302 vm_map_round_page(addr+len, 303 vm_map_page_mask(map)), 304 FALSE); 305 switch (kret) { 306 case KERN_SUCCESS: 307 return (0); 308 case KERN_INVALID_ADDRESS: 309 case KERN_NO_SPACE: 310 return (ENOMEM); 311 case KERN_PROTECTION_FAILURE: 312 return (EACCES); 313 default: 314 return (EINVAL); 315 } 316} 317 318int 319subyte( 320 user_addr_t addr, 321 int byte) 322{ 323 char character; 324 325 character = (char)byte; 326 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); 327} 328 329int 330suibyte( 331 user_addr_t addr, 332 int byte) 333{ 334 char character; 335 336 character = (char)byte; 337 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); 338} 339 340int fubyte(user_addr_t addr) 341{ 342 unsigned char byte; 343 344 if (copyin(addr, (void *) &byte, sizeof(char))) 345 return(-1); 346 return(byte); 347} 348 349int fuibyte(user_addr_t addr) 350{ 351 unsigned char byte; 352 353 if (copyin(addr, (void *) &(byte), sizeof(char))) 354 return(-1); 355 return(byte); 356} 357 358int 359suword( 360 user_addr_t addr, 361 long word) 362{ 363 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); 364} 365 366long fuword(user_addr_t addr) 367{ 368 long word = 0; 369 370 if (copyin(addr, (void *) &word, sizeof(int))) 371 return(-1); 372 return(word); 373} 374 375/* suiword and fuiword are the same as suword and fuword, respectively */ 376 377int 378suiword( 379 user_addr_t addr, 380 long word) 381{ 382 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); 383} 384 385long fuiword(user_addr_t addr) 386{ 387 long word = 0; 388 389 if (copyin(addr, (void *) &word, sizeof(int))) 390 return(-1); 391 return(word); 392} 393 394/* 395 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the 396 * fetching and setting of process-sized size_t and pointer values. 397 */ 398int 399sulong(user_addr_t addr, int64_t word) 400{ 401 402 if (IS_64BIT_PROCESS(current_proc())) { 403 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1); 404 } else { 405 return(suiword(addr, (long)word)); 406 } 407} 408 409int64_t 410fulong(user_addr_t addr) 411{ 412 int64_t longword; 413 414 if (IS_64BIT_PROCESS(current_proc())) { 415 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) 416 return(-1); 417 return(longword); 418 } else { 419 return((int64_t)fuiword(addr)); 420 } 421} 422 423int 424suulong(user_addr_t addr, uint64_t uword) 425{ 426 427 if (IS_64BIT_PROCESS(current_proc())) { 428 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1); 429 } else { 430 return(suiword(addr, (uint32_t)uword)); 431 } 432} 433 434uint64_t 435fuulong(user_addr_t addr) 436{ 437 uint64_t ulongword; 438 439 if (IS_64BIT_PROCESS(current_proc())) { 440 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) 441 return(-1ULL); 442 return(ulongword); 443 } else { 444 return((uint64_t)fuiword(addr)); 445 } 446} 447 448int 449swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval) 450{ 451 return(ENOTSUP); 452} 453 454/* 455 * pid_for_task 456 * 457 * Find the BSD process ID for the Mach task associated with the given Mach port 458 * name 459 * 460 * Parameters: args User argument descriptor (see below) 461 * 462 * Indirect parameters: args->t Mach port name 463 * args->pid Process ID (returned value; see below) 464 * 465 * Returns: KERL_SUCCESS Success 466 * KERN_FAILURE Not success 467 * 468 * Implicit returns: args->pid Process ID 469 * 470 */ 471kern_return_t 472pid_for_task( 473 struct pid_for_task_args *args) 474{ 475 mach_port_name_t t = args->t; 476 user_addr_t pid_addr = args->pid; 477 proc_t p; 478 task_t t1; 479 int pid = -1; 480 kern_return_t err = KERN_SUCCESS; 481 482 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK); 483 AUDIT_ARG(mach_port1, t); 484 485 t1 = port_name_to_task(t); 486 487 if (t1 == TASK_NULL) { 488 err = KERN_FAILURE; 489 goto pftout; 490 } else { 491 p = get_bsdtask_info(t1); 492 if (p) { 493 pid = proc_pid(p); 494 err = KERN_SUCCESS; 495 } else { 496 err = KERN_FAILURE; 497 } 498 } 499 task_deallocate(t1); 500pftout: 501 AUDIT_ARG(pid, pid); 502 (void) copyout((char *) &pid, pid_addr, sizeof(int)); 503 AUDIT_MACH_SYSCALL_EXIT(err); 504 return(err); 505} 506 507/* 508 * 509 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self 510 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication 511 * 512 */ 513static int tfp_policy = KERN_TFP_POLICY_DEFAULT; 514 515/* 516 * Routine: task_for_pid_posix_check 517 * Purpose: 518 * Verify that the current process should be allowed to 519 * get the target process's task port. This is only 520 * permitted if: 521 * - The current process is root 522 * OR all of the following are true: 523 * - The target process's real, effective, and saved uids 524 * are the same as the current proc's euid, 525 * - The target process's group set is a subset of the 526 * calling process's group set, and 527 * - The target process hasn't switched credentials. 528 * 529 * Returns: TRUE: permitted 530 * FALSE: denied 531 */ 532static int 533task_for_pid_posix_check(proc_t target) 534{ 535 kauth_cred_t targetcred, mycred; 536 uid_t myuid; 537 int allowed; 538 539 /* No task_for_pid on bad targets */ 540 if (target->p_stat == SZOMB) { 541 return FALSE; 542 } 543 544 mycred = kauth_cred_get(); 545 myuid = kauth_cred_getuid(mycred); 546 547 /* If we're running as root, the check passes */ 548 if (kauth_cred_issuser(mycred)) 549 return TRUE; 550 551 /* We're allowed to get our own task port */ 552 if (target == current_proc()) 553 return TRUE; 554 555 /* 556 * Under DENY, only root can get another proc's task port, 557 * so no more checks are needed. 558 */ 559 if (tfp_policy == KERN_TFP_POLICY_DENY) { 560 return FALSE; 561 } 562 563 targetcred = kauth_cred_proc_ref(target); 564 allowed = TRUE; 565 566 /* Do target's ruid, euid, and saved uid match my euid? */ 567 if ((kauth_cred_getuid(targetcred) != myuid) || 568 (kauth_cred_getruid(targetcred) != myuid) || 569 (kauth_cred_getsvuid(targetcred) != myuid)) { 570 allowed = FALSE; 571 goto out; 572 } 573 574 /* Are target's groups a subset of my groups? */ 575 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) || 576 allowed == 0) { 577 allowed = FALSE; 578 goto out; 579 } 580 581 /* Has target switched credentials? */ 582 if (target->p_flag & P_SUGID) { 583 allowed = FALSE; 584 goto out; 585 } 586 587out: 588 kauth_cred_unref(&targetcred); 589 return allowed; 590} 591 592/* 593 * __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__ 594 * 595 * Description: Waits for the user space daemon to respond to the request 596 * we made. Function declared non inline to be visible in 597 * stackshots and spindumps as well as debugging. 598 */ 599__attribute__((noinline)) int __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__( 600 mach_port_t task_access_port, int32_t calling_pid, uint32_t calling_gid, int32_t target_pid) 601{ 602 return check_task_access(task_access_port, calling_pid, calling_gid, target_pid); 603} 604 605/* 606 * Routine: task_for_pid 607 * Purpose: 608 * Get the task port for another "process", named by its 609 * process ID on the same host as "target_task". 610 * 611 * Only permitted to privileged processes, or processes 612 * with the same user ID. 613 * 614 * Note: if pid == 0, an error is return no matter who is calling. 615 * 616 * XXX This should be a BSD system call, not a Mach trap!!! 617 */ 618kern_return_t 619task_for_pid( 620 struct task_for_pid_args *args) 621{ 622 mach_port_name_t target_tport = args->target_tport; 623 int pid = args->pid; 624 user_addr_t task_addr = args->t; 625 proc_t p = PROC_NULL; 626 task_t t1 = TASK_NULL; 627 mach_port_name_t tret = MACH_PORT_NULL; 628 ipc_port_t tfpport; 629 void * sright; 630 int error = 0; 631 632 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID); 633 AUDIT_ARG(pid, pid); 634 AUDIT_ARG(mach_port1, target_tport); 635 636 /* Always check if pid == 0 */ 637 if (pid == 0) { 638 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 639 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 640 return(KERN_FAILURE); 641 } 642 643 t1 = port_name_to_task(target_tport); 644 if (t1 == TASK_NULL) { 645 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 646 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 647 return(KERN_FAILURE); 648 } 649 650 651 p = proc_find(pid); 652 if (p == PROC_NULL) { 653 error = KERN_FAILURE; 654 goto tfpout; 655 } 656 657#if CONFIG_AUDIT 658 AUDIT_ARG(process, p); 659#endif 660 661 if (!(task_for_pid_posix_check(p))) { 662 error = KERN_FAILURE; 663 goto tfpout; 664 } 665 666 if (p->task != TASK_NULL) { 667 /* If we aren't root and target's task access port is set... */ 668 if (!kauth_cred_issuser(kauth_cred_get()) && 669 p != current_proc() && 670 (task_get_task_access_port(p->task, &tfpport) == 0) && 671 (tfpport != IPC_PORT_NULL)) { 672 673 if (tfpport == IPC_PORT_DEAD) { 674 error = KERN_PROTECTION_FAILURE; 675 goto tfpout; 676 } 677 678 /* Call up to the task access server */ 679 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); 680 681 if (error != MACH_MSG_SUCCESS) { 682 if (error == MACH_RCV_INTERRUPTED) 683 error = KERN_ABORTED; 684 else 685 error = KERN_FAILURE; 686 goto tfpout; 687 } 688 } 689#if CONFIG_MACF 690 error = mac_proc_check_get_task(kauth_cred_get(), p); 691 if (error) { 692 error = KERN_FAILURE; 693 goto tfpout; 694 } 695#endif 696 697 /* Grant task port access */ 698 task_reference(p->task); 699 extmod_statistics_incr_task_for_pid(p->task); 700 701 sright = (void *) convert_task_to_port(p->task); 702 tret = ipc_port_copyout_send( 703 sright, 704 get_task_ipcspace(current_task())); 705 } 706 error = KERN_SUCCESS; 707 708tfpout: 709 task_deallocate(t1); 710 AUDIT_ARG(mach_port2, tret); 711 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); 712 if (p != PROC_NULL) 713 proc_rele(p); 714 AUDIT_MACH_SYSCALL_EXIT(error); 715 return(error); 716} 717 718/* 719 * Routine: task_name_for_pid 720 * Purpose: 721 * Get the task name port for another "process", named by its 722 * process ID on the same host as "target_task". 723 * 724 * Only permitted to privileged processes, or processes 725 * with the same user ID. 726 * 727 * XXX This should be a BSD system call, not a Mach trap!!! 728 */ 729 730kern_return_t 731task_name_for_pid( 732 struct task_name_for_pid_args *args) 733{ 734 mach_port_name_t target_tport = args->target_tport; 735 int pid = args->pid; 736 user_addr_t task_addr = args->t; 737 proc_t p = PROC_NULL; 738 task_t t1; 739 mach_port_name_t tret; 740 void * sright; 741 int error = 0, refheld = 0; 742 kauth_cred_t target_cred; 743 744 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID); 745 AUDIT_ARG(pid, pid); 746 AUDIT_ARG(mach_port1, target_tport); 747 748 t1 = port_name_to_task(target_tport); 749 if (t1 == TASK_NULL) { 750 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 751 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 752 return(KERN_FAILURE); 753 } 754 755 p = proc_find(pid); 756 if (p != PROC_NULL) { 757 AUDIT_ARG(process, p); 758 target_cred = kauth_cred_proc_ref(p); 759 refheld = 1; 760 761 if ((p->p_stat != SZOMB) 762 && ((current_proc() == p) 763 || kauth_cred_issuser(kauth_cred_get()) 764 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && 765 ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) { 766 767 if (p->task != TASK_NULL) { 768 task_reference(p->task); 769#if CONFIG_MACF 770 error = mac_proc_check_get_task_name(kauth_cred_get(), p); 771 if (error) { 772 task_deallocate(p->task); 773 goto noperm; 774 } 775#endif 776 sright = (void *)convert_task_name_to_port(p->task); 777 tret = ipc_port_copyout_send(sright, 778 get_task_ipcspace(current_task())); 779 } else 780 tret = MACH_PORT_NULL; 781 782 AUDIT_ARG(mach_port2, tret); 783 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); 784 task_deallocate(t1); 785 error = KERN_SUCCESS; 786 goto tnfpout; 787 } 788 } 789 790#if CONFIG_MACF 791noperm: 792#endif 793 task_deallocate(t1); 794 tret = MACH_PORT_NULL; 795 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); 796 error = KERN_FAILURE; 797tnfpout: 798 if (refheld != 0) 799 kauth_cred_unref(&target_cred); 800 if (p != PROC_NULL) 801 proc_rele(p); 802 AUDIT_MACH_SYSCALL_EXIT(error); 803 return(error); 804} 805 806kern_return_t 807pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret) 808{ 809 task_t target = NULL; 810 proc_t targetproc = PROC_NULL; 811 int pid = args->pid; 812 int error = 0; 813 814#if CONFIG_MACF 815 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND); 816 if (error) { 817 error = EPERM; 818 goto out; 819 } 820#endif 821 822 if (pid == 0) { 823 error = EPERM; 824 goto out; 825 } 826 827 targetproc = proc_find(pid); 828 if (targetproc == PROC_NULL) { 829 error = ESRCH; 830 goto out; 831 } 832 833 if (!task_for_pid_posix_check(targetproc)) { 834 error = EPERM; 835 goto out; 836 } 837 838 target = targetproc->task; 839 if (target != TASK_NULL) { 840 mach_port_t tfpport; 841 842 /* If we aren't root and target's task access port is set... */ 843 if (!kauth_cred_issuser(kauth_cred_get()) && 844 targetproc != current_proc() && 845 (task_get_task_access_port(target, &tfpport) == 0) && 846 (tfpport != IPC_PORT_NULL)) { 847 848 if (tfpport == IPC_PORT_DEAD) { 849 error = EACCES; 850 goto out; 851 } 852 853 /* Call up to the task access server */ 854 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); 855 856 if (error != MACH_MSG_SUCCESS) { 857 if (error == MACH_RCV_INTERRUPTED) 858 error = EINTR; 859 else 860 error = EPERM; 861 goto out; 862 } 863 } 864 } 865 866 task_reference(target); 867 error = task_pidsuspend(target); 868 if (error) { 869 if (error == KERN_INVALID_ARGUMENT) { 870 error = EINVAL; 871 } else { 872 error = EPERM; 873 } 874 } 875#if CONFIG_MEMORYSTATUS 876 else { 877 memorystatus_on_suspend(targetproc); 878 } 879#endif 880 881 task_deallocate(target); 882 883out: 884 if (targetproc != PROC_NULL) 885 proc_rele(targetproc); 886 *ret = error; 887 return error; 888} 889 890kern_return_t 891pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret) 892{ 893 task_t target = NULL; 894 proc_t targetproc = PROC_NULL; 895 int pid = args->pid; 896 int error = 0; 897 898#if CONFIG_MACF 899 error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME); 900 if (error) { 901 error = EPERM; 902 goto out; 903 } 904#endif 905 906 if (pid == 0) { 907 error = EPERM; 908 goto out; 909 } 910 911 targetproc = proc_find(pid); 912 if (targetproc == PROC_NULL) { 913 error = ESRCH; 914 goto out; 915 } 916 917 if (!task_for_pid_posix_check(targetproc)) { 918 error = EPERM; 919 goto out; 920 } 921 922 target = targetproc->task; 923 if (target != TASK_NULL) { 924 mach_port_t tfpport; 925 926 /* If we aren't root and target's task access port is set... */ 927 if (!kauth_cred_issuser(kauth_cred_get()) && 928 targetproc != current_proc() && 929 (task_get_task_access_port(target, &tfpport) == 0) && 930 (tfpport != IPC_PORT_NULL)) { 931 932 if (tfpport == IPC_PORT_DEAD) { 933 error = EACCES; 934 goto out; 935 } 936 937 /* Call up to the task access server */ 938 error = __KERNEL_WAITING_ON_TASKGATED_CHECK_ACCESS_UPCALL__(tfpport, proc_selfpid(), kauth_getgid(), pid); 939 940 if (error != MACH_MSG_SUCCESS) { 941 if (error == MACH_RCV_INTERRUPTED) 942 error = EINTR; 943 else 944 error = EPERM; 945 goto out; 946 } 947 } 948 } 949 950 task_reference(target); 951 952#if CONFIG_MEMORYSTATUS 953 memorystatus_on_resume(targetproc); 954#endif 955 956 error = task_pidresume(target); 957 if (error) { 958 if (error == KERN_INVALID_ARGUMENT) { 959 error = EINVAL; 960 } else { 961 if (error == KERN_MEMORY_ERROR) { 962 psignal(targetproc, SIGKILL); 963 error = EIO; 964 } else 965 error = EPERM; 966 } 967 } 968 969 task_deallocate(target); 970 971out: 972 if (targetproc != PROC_NULL) 973 proc_rele(targetproc); 974 975 *ret = error; 976 return error; 977} 978 979 980static int 981sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, 982 __unused int arg2, struct sysctl_req *req) 983{ 984 int error = 0; 985 int new_value; 986 987 error = SYSCTL_OUT(req, arg1, sizeof(int)); 988 if (error || req->newptr == USER_ADDR_NULL) 989 return(error); 990 991 if (!kauth_cred_issuser(kauth_cred_get())) 992 return(EPERM); 993 994 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { 995 goto out; 996 } 997 if ((new_value == KERN_TFP_POLICY_DENY) 998 || (new_value == KERN_TFP_POLICY_DEFAULT)) 999 tfp_policy = new_value; 1000 else 1001 error = EINVAL; 1002out: 1003 return(error); 1004 1005} 1006 1007#if defined(SECURE_KERNEL) 1008static int kern_secure_kernel = 1; 1009#else 1010static int kern_secure_kernel = 0; 1011#endif 1012 1013SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, ""); 1014 1015SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp"); 1016SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, 1017 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy"); 1018 1019SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED, 1020 &shared_region_trace_level, 0, ""); 1021SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED, 1022 &shared_region_version, 0, ""); 1023SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED, 1024 &shared_region_persistence, 0, ""); 1025 1026/* 1027 * shared_region_check_np: 1028 * 1029 * This system call is intended for dyld. 1030 * 1031 * dyld calls this when any process starts to see if the process's shared 1032 * region is already set up and ready to use. 1033 * This call returns the base address of the first mapping in the 1034 * process's shared region's first mapping. 1035 * dyld will then check what's mapped at that address. 1036 * 1037 * If the shared region is empty, dyld will then attempt to map the shared 1038 * cache file in the shared region via the shared_region_map_np() system call. 1039 * 1040 * If something's already mapped in the shared region, dyld will check if it 1041 * matches the shared cache it would like to use for that process. 1042 * If it matches, evrything's ready and the process can proceed and use the 1043 * shared region. 1044 * If it doesn't match, dyld will unmap the shared region and map the shared 1045 * cache into the process's address space via mmap(). 1046 * 1047 * ERROR VALUES 1048 * EINVAL no shared region 1049 * ENOMEM shared region is empty 1050 * EFAULT bad address for "start_address" 1051 */ 1052int 1053shared_region_check_np( 1054 __unused struct proc *p, 1055 struct shared_region_check_np_args *uap, 1056 __unused int *retvalp) 1057{ 1058 vm_shared_region_t shared_region; 1059 mach_vm_offset_t start_address = 0; 1060 int error; 1061 kern_return_t kr; 1062 1063 SHARED_REGION_TRACE_DEBUG( 1064 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n", 1065 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1066 p->p_pid, p->p_comm, 1067 (uint64_t)uap->start_address)); 1068 1069 /* retrieve the current tasks's shared region */ 1070 shared_region = vm_shared_region_get(current_task()); 1071 if (shared_region != NULL) { 1072 /* retrieve address of its first mapping... */ 1073 kr = vm_shared_region_start_address(shared_region, 1074 &start_address); 1075 if (kr != KERN_SUCCESS) { 1076 error = ENOMEM; 1077 } else { 1078 /* ... and give it to the caller */ 1079 error = copyout(&start_address, 1080 (user_addr_t) uap->start_address, 1081 sizeof (start_address)); 1082 if (error) { 1083 SHARED_REGION_TRACE_ERROR( 1084 ("shared_region: %p [%d(%s)] " 1085 "check_np(0x%llx) " 1086 "copyout(0x%llx) error %d\n", 1087 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1088 p->p_pid, p->p_comm, 1089 (uint64_t)uap->start_address, (uint64_t)start_address, 1090 error)); 1091 } 1092 } 1093 vm_shared_region_deallocate(shared_region); 1094 } else { 1095 /* no shared region ! */ 1096 error = EINVAL; 1097 } 1098 1099 SHARED_REGION_TRACE_DEBUG( 1100 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n", 1101 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1102 p->p_pid, p->p_comm, 1103 (uint64_t)uap->start_address, (uint64_t)start_address, error)); 1104 1105 return error; 1106} 1107 1108 1109int 1110shared_region_copyin_mappings( 1111 struct proc *p, 1112 user_addr_t user_mappings, 1113 unsigned int mappings_count, 1114 struct shared_file_mapping_np *mappings) 1115{ 1116 int error = 0; 1117 vm_size_t mappings_size = 0; 1118 1119 /* get the list of mappings the caller wants us to establish */ 1120 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); 1121 error = copyin(user_mappings, 1122 mappings, 1123 mappings_size); 1124 if (error) { 1125 SHARED_REGION_TRACE_ERROR( 1126 ("shared_region: %p [%d(%s)] map(): " 1127 "copyin(0x%llx, %d) failed (error=%d)\n", 1128 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1129 p->p_pid, p->p_comm, 1130 (uint64_t)user_mappings, mappings_count, error)); 1131 } 1132 return error; 1133} 1134/* 1135 * shared_region_map_np() 1136 * 1137 * This system call is intended for dyld. 1138 * 1139 * dyld uses this to map a shared cache file into a shared region. 1140 * This is usually done only the first time a shared cache is needed. 1141 * Subsequent processes will just use the populated shared region without 1142 * requiring any further setup. 1143 */ 1144int 1145_shared_region_map_and_slide( 1146 struct proc *p, 1147 int fd, 1148 uint32_t mappings_count, 1149 struct shared_file_mapping_np *mappings, 1150 uint32_t slide, 1151 user_addr_t slide_start, 1152 user_addr_t slide_size) 1153{ 1154 int error; 1155 kern_return_t kr; 1156 struct fileproc *fp; 1157 struct vnode *vp, *root_vp, *scdir_vp; 1158 struct vnode_attr va; 1159 off_t fs; 1160 memory_object_size_t file_size; 1161#if CONFIG_MACF 1162 vm_prot_t maxprot = VM_PROT_ALL; 1163#endif 1164 memory_object_control_t file_control; 1165 struct vm_shared_region *shared_region; 1166 1167 SHARED_REGION_TRACE_DEBUG( 1168 ("shared_region: %p [%d(%s)] -> map\n", 1169 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1170 p->p_pid, p->p_comm)); 1171 1172 shared_region = NULL; 1173 fp = NULL; 1174 vp = NULL; 1175 scdir_vp = NULL; 1176 1177 /* get file structure from file descriptor */ 1178 error = fp_lookup(p, fd, &fp, 0); 1179 if (error) { 1180 SHARED_REGION_TRACE_ERROR( 1181 ("shared_region: %p [%d(%s)] map: " 1182 "fd=%d lookup failed (error=%d)\n", 1183 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1184 p->p_pid, p->p_comm, fd, error)); 1185 goto done; 1186 } 1187 1188 /* make sure we're attempting to map a vnode */ 1189 if (FILEGLOB_DTYPE(fp->f_fglob) != DTYPE_VNODE) { 1190 SHARED_REGION_TRACE_ERROR( 1191 ("shared_region: %p [%d(%s)] map: " 1192 "fd=%d not a vnode (type=%d)\n", 1193 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1194 p->p_pid, p->p_comm, 1195 fd, FILEGLOB_DTYPE(fp->f_fglob))); 1196 error = EINVAL; 1197 goto done; 1198 } 1199 1200 /* we need at least read permission on the file */ 1201 if (! (fp->f_fglob->fg_flag & FREAD)) { 1202 SHARED_REGION_TRACE_ERROR( 1203 ("shared_region: %p [%d(%s)] map: " 1204 "fd=%d not readable\n", 1205 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1206 p->p_pid, p->p_comm, fd)); 1207 error = EPERM; 1208 goto done; 1209 } 1210 1211 /* get vnode from file structure */ 1212 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data); 1213 if (error) { 1214 SHARED_REGION_TRACE_ERROR( 1215 ("shared_region: %p [%d(%s)] map: " 1216 "fd=%d getwithref failed (error=%d)\n", 1217 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1218 p->p_pid, p->p_comm, fd, error)); 1219 goto done; 1220 } 1221 vp = (struct vnode *) fp->f_fglob->fg_data; 1222 1223 /* make sure the vnode is a regular file */ 1224 if (vp->v_type != VREG) { 1225 SHARED_REGION_TRACE_ERROR( 1226 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1227 "not a file (type=%d)\n", 1228 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1229 p->p_pid, p->p_comm, 1230 (void *)VM_KERNEL_ADDRPERM(vp), 1231 vp->v_name, vp->v_type)); 1232 error = EINVAL; 1233 goto done; 1234 } 1235 1236#if CONFIG_MACF 1237 error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()), 1238 fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot); 1239 if (error) { 1240 goto done; 1241 } 1242#endif /* MAC */ 1243 1244#if CONFIG_PROTECT 1245 /* check for content protection access */ 1246 { 1247 error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0); 1248 if (error) { 1249 goto done; 1250 } 1251 } 1252#endif /* CONFIG_PROTECT */ 1253 1254 /* make sure vnode is on the process's root volume */ 1255 root_vp = p->p_fd->fd_rdir; 1256 if (root_vp == NULL) { 1257 root_vp = rootvnode; 1258 } else { 1259 /* 1260 * Chroot-ed processes can't use the shared_region. 1261 */ 1262 error = EINVAL; 1263 goto done; 1264 } 1265 1266 if (vp->v_mount != root_vp->v_mount) { 1267 SHARED_REGION_TRACE_ERROR( 1268 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1269 "not on process's root volume\n", 1270 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1271 p->p_pid, p->p_comm, 1272 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); 1273 error = EPERM; 1274 goto done; 1275 } 1276 1277 /* make sure vnode is owned by "root" */ 1278 VATTR_INIT(&va); 1279 VATTR_WANTED(&va, va_uid); 1280 error = vnode_getattr(vp, &va, vfs_context_current()); 1281 if (error) { 1282 SHARED_REGION_TRACE_ERROR( 1283 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1284 "vnode_getattr(%p) failed (error=%d)\n", 1285 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1286 p->p_pid, p->p_comm, 1287 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, 1288 (void *)VM_KERNEL_ADDRPERM(vp), error)); 1289 goto done; 1290 } 1291 if (va.va_uid != 0) { 1292 SHARED_REGION_TRACE_ERROR( 1293 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1294 "owned by uid=%d instead of 0\n", 1295 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1296 p->p_pid, p->p_comm, 1297 (void *)VM_KERNEL_ADDRPERM(vp), 1298 vp->v_name, va.va_uid)); 1299 error = EPERM; 1300 goto done; 1301 } 1302 1303 if (scdir_enforce) { 1304 /* get vnode for scdir_path */ 1305 error = vnode_lookup(scdir_path, 0, &scdir_vp, vfs_context_current()); 1306 if (error) { 1307 SHARED_REGION_TRACE_ERROR( 1308 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1309 "vnode_lookup(%s) failed (error=%d)\n", 1310 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1311 p->p_pid, p->p_comm, 1312 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, 1313 scdir_path, error)); 1314 goto done; 1315 } 1316 1317 /* ensure parent is scdir_vp */ 1318 if (vnode_parent(vp) != scdir_vp) { 1319 SHARED_REGION_TRACE_ERROR( 1320 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1321 "shared cache file not in %s\n", 1322 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1323 p->p_pid, p->p_comm, 1324 (void *)VM_KERNEL_ADDRPERM(vp), 1325 vp->v_name, scdir_path)); 1326 error = EPERM; 1327 goto done; 1328 } 1329 } 1330 1331 /* get vnode size */ 1332 error = vnode_size(vp, &fs, vfs_context_current()); 1333 if (error) { 1334 SHARED_REGION_TRACE_ERROR( 1335 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1336 "vnode_size(%p) failed (error=%d)\n", 1337 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1338 p->p_pid, p->p_comm, 1339 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, 1340 (void *)VM_KERNEL_ADDRPERM(vp), error)); 1341 goto done; 1342 } 1343 file_size = fs; 1344 1345 /* get the file's memory object handle */ 1346 file_control = ubc_getobject(vp, UBC_HOLDOBJECT); 1347 if (file_control == MEMORY_OBJECT_CONTROL_NULL) { 1348 SHARED_REGION_TRACE_ERROR( 1349 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1350 "no memory object\n", 1351 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1352 p->p_pid, p->p_comm, 1353 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); 1354 error = EINVAL; 1355 goto done; 1356 } 1357 1358 1359 /* get the process's shared region (setup in vm_map_exec()) */ 1360 shared_region = vm_shared_region_get(current_task()); 1361 if (shared_region == NULL) { 1362 SHARED_REGION_TRACE_ERROR( 1363 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1364 "no shared region\n", 1365 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1366 p->p_pid, p->p_comm, 1367 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name)); 1368 goto done; 1369 } 1370 1371 /* map the file into that shared region's submap */ 1372 kr = vm_shared_region_map_file(shared_region, 1373 mappings_count, 1374 mappings, 1375 file_control, 1376 file_size, 1377 (void *) p->p_fd->fd_rdir, 1378 slide, 1379 slide_start, 1380 slide_size); 1381 if (kr != KERN_SUCCESS) { 1382 SHARED_REGION_TRACE_ERROR( 1383 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1384 "vm_shared_region_map_file() failed kr=0x%x\n", 1385 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1386 p->p_pid, p->p_comm, 1387 (void *)VM_KERNEL_ADDRPERM(vp), vp->v_name, kr)); 1388 switch (kr) { 1389 case KERN_INVALID_ADDRESS: 1390 error = EFAULT; 1391 break; 1392 case KERN_PROTECTION_FAILURE: 1393 error = EPERM; 1394 break; 1395 case KERN_NO_SPACE: 1396 error = ENOMEM; 1397 break; 1398 case KERN_FAILURE: 1399 case KERN_INVALID_ARGUMENT: 1400 default: 1401 error = EINVAL; 1402 break; 1403 } 1404 goto done; 1405 } 1406 1407 error = 0; 1408 1409 vnode_lock_spin(vp); 1410 1411 vp->v_flag |= VSHARED_DYLD; 1412 1413 vnode_unlock(vp); 1414 1415 /* update the vnode's access time */ 1416 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) { 1417 VATTR_INIT(&va); 1418 nanotime(&va.va_access_time); 1419 VATTR_SET_ACTIVE(&va, va_access_time); 1420 vnode_setattr(vp, &va, vfs_context_current()); 1421 } 1422 1423 if (p->p_flag & P_NOSHLIB) { 1424 /* signal that this process is now using split libraries */ 1425 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag); 1426 } 1427 1428done: 1429 if (vp != NULL) { 1430 /* 1431 * release the vnode... 1432 * ubc_map() still holds it for us in the non-error case 1433 */ 1434 (void) vnode_put(vp); 1435 vp = NULL; 1436 } 1437 if (fp != NULL) { 1438 /* release the file descriptor */ 1439 fp_drop(p, fd, fp, 0); 1440 fp = NULL; 1441 } 1442 if (scdir_vp != NULL) { 1443 (void)vnode_put(scdir_vp); 1444 scdir_vp = NULL; 1445 } 1446 1447 if (shared_region != NULL) { 1448 vm_shared_region_deallocate(shared_region); 1449 } 1450 1451 SHARED_REGION_TRACE_DEBUG( 1452 ("shared_region: %p [%d(%s)] <- map\n", 1453 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1454 p->p_pid, p->p_comm)); 1455 1456 return error; 1457} 1458 1459int 1460shared_region_map_and_slide_np( 1461 struct proc *p, 1462 struct shared_region_map_and_slide_np_args *uap, 1463 __unused int *retvalp) 1464{ 1465 struct shared_file_mapping_np *mappings; 1466 unsigned int mappings_count = uap->count; 1467 kern_return_t kr = KERN_SUCCESS; 1468 uint32_t slide = uap->slide; 1469 1470#define SFM_MAX_STACK 8 1471 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; 1472 1473 /* Is the process chrooted?? */ 1474 if (p->p_fd->fd_rdir != NULL) { 1475 kr = EINVAL; 1476 goto done; 1477 } 1478 1479 if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) { 1480 if (kr == KERN_INVALID_ARGUMENT) { 1481 /* 1482 * This will happen if we request sliding again 1483 * with the same slide value that was used earlier 1484 * for the very first sliding. 1485 */ 1486 kr = KERN_SUCCESS; 1487 } 1488 goto done; 1489 } 1490 1491 if (mappings_count == 0) { 1492 SHARED_REGION_TRACE_INFO( 1493 ("shared_region: %p [%d(%s)] map(): " 1494 "no mappings\n", 1495 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1496 p->p_pid, p->p_comm)); 1497 kr = 0; /* no mappings: we're done ! */ 1498 goto done; 1499 } else if (mappings_count <= SFM_MAX_STACK) { 1500 mappings = &stack_mappings[0]; 1501 } else { 1502 SHARED_REGION_TRACE_ERROR( 1503 ("shared_region: %p [%d(%s)] map(): " 1504 "too many mappings (%d)\n", 1505 (void *)VM_KERNEL_ADDRPERM(current_thread()), 1506 p->p_pid, p->p_comm, 1507 mappings_count)); 1508 kr = KERN_FAILURE; 1509 goto done; 1510 } 1511 1512 if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) { 1513 goto done; 1514 } 1515 1516 1517 kr = _shared_region_map_and_slide(p, uap->fd, mappings_count, mappings, 1518 slide, 1519 uap->slide_start, uap->slide_size); 1520 if (kr != KERN_SUCCESS) { 1521 return kr; 1522 } 1523 1524done: 1525 return kr; 1526} 1527 1528/* sysctl overflow room */ 1529 1530SYSCTL_INT (_vm, OID_AUTO, pagesize, CTLFLAG_RD | CTLFLAG_LOCKED, 1531 (int *) &page_size, 0, "vm page size"); 1532 1533/* vm_page_free_target is provided as a makeshift solution for applications that want to 1534 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be 1535 reclaimed. It allows the app to calculate how much memory is free outside the free target. */ 1536extern unsigned int vm_page_free_target; 1537SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED, 1538 &vm_page_free_target, 0, "Pageout daemon free target"); 1539 1540extern unsigned int vm_memory_pressure; 1541SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED, 1542 &vm_memory_pressure, 0, "Memory pressure indicator"); 1543 1544static int 1545vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS 1546{ 1547#pragma unused(oidp, arg1, arg2) 1548 unsigned int page_free_wanted; 1549 1550 page_free_wanted = mach_vm_ctl_page_free_wanted(); 1551 return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted)); 1552} 1553SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted, 1554 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED, 1555 0, 0, vm_ctl_page_free_wanted, "I", ""); 1556 1557extern unsigned int vm_page_purgeable_count; 1558SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED, 1559 &vm_page_purgeable_count, 0, "Purgeable page count"); 1560 1561extern unsigned int vm_page_purgeable_wired_count; 1562SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED, 1563 &vm_page_purgeable_wired_count, 0, "Wired purgeable page count"); 1564 1565extern int madvise_free_debug; 1566SYSCTL_INT(_vm, OID_AUTO, madvise_free_debug, CTLFLAG_RW | CTLFLAG_LOCKED, 1567 &madvise_free_debug, 0, "zero-fill on madvise(MADV_FREE*)"); 1568 1569SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED, 1570 &vm_page_stats_reusable.reusable_count, 0, "Reusable page count"); 1571SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED, 1572 &vm_page_stats_reusable.reusable_pages_success, ""); 1573SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED, 1574 &vm_page_stats_reusable.reusable_pages_failure, ""); 1575SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED, 1576 &vm_page_stats_reusable.reusable_pages_shared, ""); 1577SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1578 &vm_page_stats_reusable.all_reusable_calls, ""); 1579SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1580 &vm_page_stats_reusable.partial_reusable_calls, ""); 1581SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, 1582 &vm_page_stats_reusable.reuse_pages_success, ""); 1583SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, 1584 &vm_page_stats_reusable.reuse_pages_failure, ""); 1585SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1586 &vm_page_stats_reusable.all_reuse_calls, ""); 1587SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED, 1588 &vm_page_stats_reusable.partial_reuse_calls, ""); 1589SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED, 1590 &vm_page_stats_reusable.can_reuse_success, ""); 1591SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED, 1592 &vm_page_stats_reusable.can_reuse_failure, ""); 1593SYSCTL_QUAD(_vm, OID_AUTO, reusable_reclaimed, CTLFLAG_RD | CTLFLAG_LOCKED, 1594 &vm_page_stats_reusable.reusable_reclaimed, ""); 1595 1596 1597extern unsigned int vm_page_free_count, vm_page_speculative_count; 1598SYSCTL_UINT(_vm, OID_AUTO, page_free_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_free_count, 0, ""); 1599SYSCTL_UINT(_vm, OID_AUTO, page_speculative_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_speculative_count, 0, ""); 1600 1601extern unsigned int vm_page_cleaned_count; 1602SYSCTL_UINT(_vm, OID_AUTO, page_cleaned_count, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_page_cleaned_count, 0, "Cleaned queue size"); 1603 1604/* pageout counts */ 1605extern unsigned int vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external, vm_pageout_inactive_clean, vm_pageout_speculative_clean, vm_pageout_inactive_used; 1606extern unsigned int vm_pageout_freed_from_inactive_clean, vm_pageout_freed_from_speculative; 1607SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_internal, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_internal, 0, ""); 1608SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_dirty_external, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_dirty_external, 0, ""); 1609SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_clean, 0, ""); 1610SYSCTL_UINT(_vm, OID_AUTO, pageout_speculative_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_speculative_clean, 0, ""); 1611SYSCTL_UINT(_vm, OID_AUTO, pageout_inactive_used, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_inactive_used, 0, ""); 1612SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_inactive_clean, 0, ""); 1613SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_speculative, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_speculative, 0, ""); 1614 1615extern unsigned int vm_pageout_freed_from_cleaned; 1616SYSCTL_UINT(_vm, OID_AUTO, pageout_freed_from_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_freed_from_cleaned, 0, ""); 1617 1618/* counts of pages entering the cleaned queue */ 1619extern unsigned int vm_pageout_enqueued_cleaned, vm_pageout_enqueued_cleaned_from_inactive_clean, vm_pageout_enqueued_cleaned_from_inactive_dirty; 1620SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned, 0, ""); /* sum of next two */ 1621SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_clean, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_clean, 0, ""); 1622SYSCTL_UINT(_vm, OID_AUTO, pageout_enqueued_cleaned_from_inactive_dirty, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_enqueued_cleaned_from_inactive_dirty, 0, ""); 1623 1624/* counts of pages leaving the cleaned queue */ 1625extern unsigned int vm_pageout_cleaned_reclaimed, vm_pageout_cleaned_reactivated, vm_pageout_cleaned_reference_reactivated, vm_pageout_cleaned_volatile_reactivated, vm_pageout_cleaned_fault_reactivated, vm_pageout_cleaned_commit_reactivated, vm_pageout_cleaned_busy, vm_pageout_cleaned_nolock; 1626SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reclaimed, 0, "Cleaned pages reclaimed"); 1627SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reactivated, 0, "Cleaned pages reactivated"); /* sum of all reactivated AND busy and nolock (even though those actually get reDEactivated */ 1628SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_reference_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_reference_reactivated, 0, "Cleaned pages reference reactivated"); 1629SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_volatile_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_volatile_reactivated, 0, "Cleaned pages volatile reactivated"); 1630SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_fault_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_fault_reactivated, 0, "Cleaned pages fault reactivated"); 1631SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_commit_reactivated, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_commit_reactivated, 0, "Cleaned pages commit reactivated"); 1632SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_busy, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_busy, 0, "Cleaned pages busy (deactivated)"); 1633SYSCTL_UINT(_vm, OID_AUTO, pageout_cleaned_nolock, CTLFLAG_RD | CTLFLAG_LOCKED, &vm_pageout_cleaned_nolock, 0, "Cleaned pages no-lock (deactivated)"); 1634 1635/* counts of pages prefaulted when entering a memory object */ 1636extern int64_t vm_prefault_nb_pages, vm_prefault_nb_bailout; 1637SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_pages, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_pages, ""); 1638SYSCTL_QUAD(_vm, OID_AUTO, prefault_nb_bailout, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_prefault_nb_bailout, ""); 1639 1640#include <kern/thread.h> 1641#include <sys/user.h> 1642 1643void vm_pageout_io_throttle(void); 1644 1645void vm_pageout_io_throttle(void) { 1646 struct uthread *uthread = get_bsdthread_info(current_thread()); 1647 1648 /* 1649 * thread is marked as a low priority I/O type 1650 * and the I/O we issued while in this cleaning operation 1651 * collided with normal I/O operations... we'll 1652 * delay in order to mitigate the impact of this 1653 * task on the normal operation of the system 1654 */ 1655 1656 if (uthread->uu_lowpri_window) { 1657 throttle_lowpri_io(1); 1658 } 1659 1660} 1661 1662int 1663vm_pressure_monitor( 1664 __unused struct proc *p, 1665 struct vm_pressure_monitor_args *uap, 1666 int *retval) 1667{ 1668 kern_return_t kr; 1669 uint32_t pages_reclaimed; 1670 uint32_t pages_wanted; 1671 1672 kr = mach_vm_pressure_monitor( 1673 (boolean_t) uap->wait_for_pressure, 1674 uap->nsecs_monitored, 1675 (uap->pages_reclaimed) ? &pages_reclaimed : NULL, 1676 &pages_wanted); 1677 1678 switch (kr) { 1679 case KERN_SUCCESS: 1680 break; 1681 case KERN_ABORTED: 1682 return EINTR; 1683 default: 1684 return EINVAL; 1685 } 1686 1687 if (uap->pages_reclaimed) { 1688 if (copyout((void *)&pages_reclaimed, 1689 uap->pages_reclaimed, 1690 sizeof (pages_reclaimed)) != 0) { 1691 return EFAULT; 1692 } 1693 } 1694 1695 *retval = (int) pages_wanted; 1696 return 0; 1697} 1698 1699int 1700kas_info(struct proc *p, 1701 struct kas_info_args *uap, 1702 int *retval __unused) 1703{ 1704#ifdef SECURE_KERNEL 1705 (void)p; 1706 (void)uap; 1707 return ENOTSUP; 1708#else /* !SECURE_KERNEL */ 1709 int selector = uap->selector; 1710 user_addr_t valuep = uap->value; 1711 user_addr_t sizep = uap->size; 1712 user_size_t size; 1713 int error; 1714 1715 if (!kauth_cred_issuser(kauth_cred_get())) { 1716 return EPERM; 1717 } 1718 1719#if CONFIG_MACF 1720 error = mac_system_check_kas_info(kauth_cred_get(), selector); 1721 if (error) { 1722 return error; 1723 } 1724#endif 1725 1726 if (IS_64BIT_PROCESS(p)) { 1727 user64_size_t size64; 1728 error = copyin(sizep, &size64, sizeof(size64)); 1729 size = (user_size_t)size64; 1730 } else { 1731 user32_size_t size32; 1732 error = copyin(sizep, &size32, sizeof(size32)); 1733 size = (user_size_t)size32; 1734 } 1735 if (error) { 1736 return error; 1737 } 1738 1739 switch (selector) { 1740 case KAS_INFO_KERNEL_TEXT_SLIDE_SELECTOR: 1741 { 1742 uint64_t slide = vm_kernel_slide; 1743 1744 if (sizeof(slide) != size) { 1745 return EINVAL; 1746 } 1747 1748 if (IS_64BIT_PROCESS(p)) { 1749 user64_size_t size64 = (user64_size_t)size; 1750 error = copyout(&size64, sizep, sizeof(size64)); 1751 } else { 1752 user32_size_t size32 = (user32_size_t)size; 1753 error = copyout(&size32, sizep, sizeof(size32)); 1754 } 1755 if (error) { 1756 return error; 1757 } 1758 1759 error = copyout(&slide, valuep, sizeof(slide)); 1760 if (error) { 1761 return error; 1762 } 1763 } 1764 break; 1765 default: 1766 return EINVAL; 1767 } 1768 1769 return 0; 1770#endif /* !SECURE_KERNEL */ 1771} 1772