1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Mach Operating System 30 * Copyright (c) 1987 Carnegie-Mellon University 31 * All rights reserved. The CMU software License Agreement specifies 32 * the terms and conditions for use and redistribution. 33 */ 34/* 35 * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce 36 * support for mandatory and extensible security protections. This notice 37 * is included in support of clause 2.2 (b) of the Apple Public License, 38 * Version 2.0. 39 */ 40 41#include <meta_features.h> 42 43#include <kern/task.h> 44#include <kern/thread.h> 45#include <kern/debug.h> 46#include <kern/lock.h> 47#include <mach/mach_traps.h> 48#include <mach/port.h> 49#include <mach/task.h> 50#include <mach/task_access.h> 51#include <mach/task_special_ports.h> 52#include <mach/time_value.h> 53#include <mach/vm_map.h> 54#include <mach/vm_param.h> 55#include <mach/vm_prot.h> 56 57#include <sys/file_internal.h> 58#include <sys/param.h> 59#include <sys/systm.h> 60#include <sys/dir.h> 61#include <sys/namei.h> 62#include <sys/proc_internal.h> 63#include <sys/kauth.h> 64#include <sys/vm.h> 65#include <sys/file.h> 66#include <sys/vnode_internal.h> 67#include <sys/mount.h> 68#include <sys/trace.h> 69#include <sys/kernel.h> 70#include <sys/ubc_internal.h> 71#include <sys/user.h> 72#include <sys/syslog.h> 73#include <sys/stat.h> 74#include <sys/sysproto.h> 75#include <sys/mman.h> 76#include <sys/sysctl.h> 77 78#include <bsm/audit_kernel.h> 79#include <bsm/audit_kevents.h> 80 81#include <kern/kalloc.h> 82#include <vm/vm_map.h> 83#include <vm/vm_kern.h> 84 85#include <machine/spl.h> 86 87#include <mach/shared_region.h> 88#include <vm/vm_shared_region.h> 89 90#include <vm/vm_protos.h> 91 92/* 93 * Sysctl's related to data/stack execution. See osfmk/vm/vm_map.c 94 */ 95 96#ifndef SECURE_KERNEL 97extern int allow_stack_exec, allow_data_exec; 98 99SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, ""); 100SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, ""); 101#endif /* !SECURE_KERNEL */ 102 103#if CONFIG_NO_PRINTF_STRINGS 104void 105log_stack_execution_failure(__unused addr64_t a, __unused vm_prot_t b) 106{ 107} 108#else 109static const char *prot_values[] = { 110 "none", 111 "read-only", 112 "write-only", 113 "read-write", 114 "execute-only", 115 "read-execute", 116 "write-execute", 117 "read-write-execute" 118}; 119 120void 121log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot) 122{ 123 printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n", 124 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]); 125} 126#endif 127 128 129int 130useracc( 131 user_addr_t addr, 132 user_size_t len, 133 int prot) 134{ 135 return (vm_map_check_protection( 136 current_map(), 137 vm_map_trunc_page(addr), vm_map_round_page(addr+len), 138 prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE)); 139} 140 141int 142vslock( 143 user_addr_t addr, 144 user_size_t len) 145{ 146 kern_return_t kret; 147 kret = vm_map_wire(current_map(), vm_map_trunc_page(addr), 148 vm_map_round_page(addr+len), 149 VM_PROT_READ | VM_PROT_WRITE ,FALSE); 150 151 switch (kret) { 152 case KERN_SUCCESS: 153 return (0); 154 case KERN_INVALID_ADDRESS: 155 case KERN_NO_SPACE: 156 return (ENOMEM); 157 case KERN_PROTECTION_FAILURE: 158 return (EACCES); 159 default: 160 return (EINVAL); 161 } 162} 163 164int 165vsunlock( 166 user_addr_t addr, 167 user_size_t len, 168 __unused int dirtied) 169{ 170#if FIXME /* [ */ 171 pmap_t pmap; 172 vm_page_t pg; 173 vm_map_offset_t vaddr; 174 ppnum_t paddr; 175#endif /* FIXME ] */ 176 kern_return_t kret; 177 178#if FIXME /* [ */ 179 if (dirtied) { 180 pmap = get_task_pmap(current_task()); 181 for (vaddr = vm_map_trunc_page(addr); 182 vaddr < vm_map_round_page(addr+len); 183 vaddr += PAGE_SIZE) { 184 paddr = pmap_extract(pmap, vaddr); 185 pg = PHYS_TO_VM_PAGE(paddr); 186 vm_page_set_modified(pg); 187 } 188 } 189#endif /* FIXME ] */ 190#ifdef lint 191 dirtied++; 192#endif /* lint */ 193 kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr), 194 vm_map_round_page(addr+len), FALSE); 195 switch (kret) { 196 case KERN_SUCCESS: 197 return (0); 198 case KERN_INVALID_ADDRESS: 199 case KERN_NO_SPACE: 200 return (ENOMEM); 201 case KERN_PROTECTION_FAILURE: 202 return (EACCES); 203 default: 204 return (EINVAL); 205 } 206} 207 208int 209subyte( 210 user_addr_t addr, 211 int byte) 212{ 213 char character; 214 215 character = (char)byte; 216 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); 217} 218 219int 220suibyte( 221 user_addr_t addr, 222 int byte) 223{ 224 char character; 225 226 character = (char)byte; 227 return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1); 228} 229 230int fubyte(user_addr_t addr) 231{ 232 unsigned char byte; 233 234 if (copyin(addr, (void *) &byte, sizeof(char))) 235 return(-1); 236 return(byte); 237} 238 239int fuibyte(user_addr_t addr) 240{ 241 unsigned char byte; 242 243 if (copyin(addr, (void *) &(byte), sizeof(char))) 244 return(-1); 245 return(byte); 246} 247 248int 249suword( 250 user_addr_t addr, 251 long word) 252{ 253 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); 254} 255 256long fuword(user_addr_t addr) 257{ 258 long word; 259 260 if (copyin(addr, (void *) &word, sizeof(int))) 261 return(-1); 262 return(word); 263} 264 265/* suiword and fuiword are the same as suword and fuword, respectively */ 266 267int 268suiword( 269 user_addr_t addr, 270 long word) 271{ 272 return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1); 273} 274 275long fuiword(user_addr_t addr) 276{ 277 long word; 278 279 if (copyin(addr, (void *) &word, sizeof(int))) 280 return(-1); 281 return(word); 282} 283 284/* 285 * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the 286 * fetching and setting of process-sized size_t and pointer values. 287 */ 288int 289sulong(user_addr_t addr, int64_t word) 290{ 291 292 if (IS_64BIT_PROCESS(current_proc())) { 293 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1); 294 } else { 295 return(suiword(addr, (long)word)); 296 } 297} 298 299int64_t 300fulong(user_addr_t addr) 301{ 302 int64_t longword; 303 304 if (IS_64BIT_PROCESS(current_proc())) { 305 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0) 306 return(-1); 307 return(longword); 308 } else { 309 return((int64_t)fuiword(addr)); 310 } 311} 312 313int 314suulong(user_addr_t addr, uint64_t uword) 315{ 316 317 if (IS_64BIT_PROCESS(current_proc())) { 318 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1); 319 } else { 320 return(suiword(addr, (u_long)uword)); 321 } 322} 323 324uint64_t 325fuulong(user_addr_t addr) 326{ 327 uint64_t ulongword; 328 329 if (IS_64BIT_PROCESS(current_proc())) { 330 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0) 331 return(-1ULL); 332 return(ulongword); 333 } else { 334 return((uint64_t)fuiword(addr)); 335 } 336} 337 338int 339swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval) 340{ 341 return(ENOTSUP); 342} 343 344 345kern_return_t 346pid_for_task( 347 struct pid_for_task_args *args) 348{ 349 mach_port_name_t t = args->t; 350 user_addr_t pid_addr = args->pid; 351 proc_t p; 352 task_t t1; 353 int pid = -1; 354 kern_return_t err = KERN_SUCCESS; 355 356 AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK); 357 AUDIT_ARG(mach_port1, t); 358 359 t1 = port_name_to_task(t); 360 361 if (t1 == TASK_NULL) { 362 err = KERN_FAILURE; 363 goto pftout; 364 } else { 365 p = get_bsdtask_info(t1); 366 if (p) { 367 pid = proc_pid(p); 368 err = KERN_SUCCESS; 369 } else { 370 err = KERN_FAILURE; 371 } 372 } 373 task_deallocate(t1); 374pftout: 375 AUDIT_ARG(pid, pid); 376 (void) copyout((char *) &pid, pid_addr, sizeof(int)); 377 AUDIT_MACH_SYSCALL_EXIT(err); 378 return(err); 379} 380 381/* 382 * 383 * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self 384 * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication 385 * 386 */ 387static int tfp_policy = KERN_TFP_POLICY_DEFAULT; 388 389/* 390 * Routine: task_for_pid_posix_check 391 * Purpose: 392 * Verify that the current process should be allowed to 393 * get the target process's task port. This is only 394 * permitted if: 395 * - The current process is root 396 * OR all of the following are true: 397 * - The target process's real, effective, and saved uids 398 * are the same as the current proc's euid, 399 * - The target process's group set is a subset of the 400 * calling process's group set, and 401 * - The target process hasn't switched credentials. 402 * 403 * Returns: TRUE: permitted 404 * FALSE: denied 405 */ 406static int 407task_for_pid_posix_check(proc_t target) 408{ 409 kauth_cred_t targetcred, mycred; 410 uid_t myuid; 411 int allowed; 412 413 /* No task_for_pid on bad targets */ 414 if (target == PROC_NULL || target->p_stat == SZOMB) { 415 return FALSE; 416 } 417 418 mycred = kauth_cred_get(); 419 myuid = kauth_cred_getuid(mycred); 420 421 /* If we're running as root, the check passes */ 422 if (kauth_cred_issuser(mycred)) 423 return TRUE; 424 425 /* We're allowed to get our own task port */ 426 if (target == current_proc()) 427 return TRUE; 428 429 /* 430 * Under DENY, only root can get another proc's task port, 431 * so no more checks are needed. 432 */ 433 if (tfp_policy == KERN_TFP_POLICY_DENY) { 434 return FALSE; 435 } 436 437 targetcred = kauth_cred_proc_ref(target); 438 allowed = TRUE; 439 440 /* Do target's ruid, euid, and saved uid match my euid? */ 441 if ((kauth_cred_getuid(targetcred) != myuid) || 442 (targetcred->cr_ruid != myuid) || 443 (targetcred->cr_svuid != myuid)) { 444 allowed = FALSE; 445 goto out; 446 } 447 448 /* Are target's groups a subset of my groups? */ 449 if (kauth_cred_gid_subset(targetcred, mycred, &allowed) || 450 allowed == 0) { 451 allowed = FALSE; 452 goto out; 453 } 454 455 /* Has target switched credentials? */ 456 if (target->p_flag & P_SUGID) { 457 allowed = FALSE; 458 goto out; 459 } 460 461out: 462 kauth_cred_unref(&targetcred); 463 return allowed; 464} 465 466/* 467 * Routine: task_for_pid 468 * Purpose: 469 * Get the task port for another "process", named by its 470 * process ID on the same host as "target_task". 471 * 472 * Only permitted to privileged processes, or processes 473 * with the same user ID. 474 * 475 * XXX This should be a BSD system call, not a Mach trap!!! 476 */ 477kern_return_t 478task_for_pid( 479 struct task_for_pid_args *args) 480{ 481 mach_port_name_t target_tport = args->target_tport; 482 int pid = args->pid; 483 user_addr_t task_addr = args->t; 484 struct uthread *uthread; 485 proc_t p = PROC_NULL; 486 task_t t1 = TASK_NULL; 487 mach_port_name_t tret = MACH_PORT_NULL; 488 ipc_port_t tfpport; 489 void * sright; 490 int error = 0; 491 492 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID); 493 AUDIT_ARG(pid, pid); 494 AUDIT_ARG(mach_port1, target_tport); 495 496#if defined(SECURE_KERNEL) 497 if (0 == pid) { 498 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 499 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 500 return(KERN_FAILURE); 501 } 502#endif 503 504 t1 = port_name_to_task(target_tport); 505 if (t1 == TASK_NULL) { 506 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 507 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 508 return(KERN_FAILURE); 509 } 510 511 512 /* 513 * Delayed binding of thread credential to process credential, if we 514 * are not running with an explicitly set thread credential. 515 */ 516 uthread = get_bsdthread_info(current_thread()); 517 kauth_cred_uthread_update(uthread, current_proc()); 518 519 p = proc_find(pid); 520 AUDIT_ARG(process, p); 521 522 if (!(task_for_pid_posix_check(p))) { 523 error = KERN_FAILURE; 524 goto tfpout; 525 } 526 527 if (p->task != TASK_NULL) { 528 /* If we aren't root and target's task access port is set... */ 529 if (!kauth_cred_issuser(kauth_cred_get()) && 530 p != current_proc() && 531 (task_get_task_access_port(p->task, &tfpport) == 0) && 532 (tfpport != IPC_PORT_NULL)) { 533 534 if (tfpport == IPC_PORT_DEAD) { 535 error = KERN_PROTECTION_FAILURE; 536 goto tfpout; 537 } 538 539 /* Call up to the task access server */ 540 error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid); 541 542 if (error != MACH_MSG_SUCCESS) { 543 if (error == MACH_RCV_INTERRUPTED) 544 error = KERN_ABORTED; 545 else 546 error = KERN_FAILURE; 547 goto tfpout; 548 } 549 } 550#if CONFIG_MACF 551 error = mac_proc_check_get_task(kauth_cred_get(), p); 552 if (error) { 553 error = KERN_FAILURE; 554 goto tfpout; 555 } 556#endif 557 558 /* Grant task port access */ 559 task_reference(p->task); 560 sright = (void *) convert_task_to_port(p->task); 561 tret = ipc_port_copyout_send( 562 sright, 563 get_task_ipcspace(current_task())); 564 } 565 error = KERN_SUCCESS; 566 567tfpout: 568 task_deallocate(t1); 569 AUDIT_ARG(mach_port2, tret); 570 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); 571 if (p != PROC_NULL) 572 proc_rele(p); 573 AUDIT_MACH_SYSCALL_EXIT(error); 574 return(error); 575} 576 577/* 578 * Routine: task_name_for_pid 579 * Purpose: 580 * Get the task name port for another "process", named by its 581 * process ID on the same host as "target_task". 582 * 583 * Only permitted to privileged processes, or processes 584 * with the same user ID. 585 * 586 * XXX This should be a BSD system call, not a Mach trap!!! 587 */ 588 589kern_return_t 590task_name_for_pid( 591 struct task_name_for_pid_args *args) 592{ 593 mach_port_name_t target_tport = args->target_tport; 594 int pid = args->pid; 595 user_addr_t task_addr = args->t; 596 struct uthread *uthread; 597 proc_t p = PROC_NULL; 598 task_t t1; 599 mach_port_name_t tret; 600 void * sright; 601 int error = 0, refheld = 0; 602 kauth_cred_t target_cred; 603 604 AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID); 605 AUDIT_ARG(pid, pid); 606 AUDIT_ARG(mach_port1, target_tport); 607 608 t1 = port_name_to_task(target_tport); 609 if (t1 == TASK_NULL) { 610 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t)); 611 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE); 612 return(KERN_FAILURE); 613 } 614 615 616 /* 617 * Delayed binding of thread credential to process credential, if we 618 * are not running with an explicitly set thread credential. 619 */ 620 uthread = get_bsdthread_info(current_thread()); 621 kauth_cred_uthread_update(uthread, current_proc()); 622 623 p = proc_find(pid); 624 AUDIT_ARG(process, p); 625 if (p != PROC_NULL) { 626 target_cred = kauth_cred_proc_ref(p); 627 refheld = 1; 628 629 if ((p->p_stat != SZOMB) 630 && ((current_proc() == p) 631 || kauth_cred_issuser(kauth_cred_get()) 632 || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && 633 ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) { 634 635 if (p->task != TASK_NULL) { 636 task_reference(p->task); 637#if CONFIG_MACF 638 error = mac_proc_check_get_task_name(kauth_cred_get(), p); 639 if (error) { 640 task_deallocate(p->task); 641 goto noperm; 642 } 643#endif 644 sright = (void *)convert_task_name_to_port(p->task); 645 tret = ipc_port_copyout_send(sright, 646 get_task_ipcspace(current_task())); 647 } else 648 tret = MACH_PORT_NULL; 649 650 AUDIT_ARG(mach_port2, tret); 651 (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t)); 652 task_deallocate(t1); 653 error = KERN_SUCCESS; 654 goto tnfpout; 655 } 656 } 657 658#if CONFIG_MACF 659noperm: 660#endif 661 task_deallocate(t1); 662 tret = MACH_PORT_NULL; 663 (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t)); 664 error = KERN_FAILURE; 665tnfpout: 666 if (refheld != 0) 667 kauth_cred_unref(&target_cred); 668 if (p != PROC_NULL) 669 proc_rele(p); 670 AUDIT_MACH_SYSCALL_EXIT(error); 671 return(error); 672} 673 674static int 675sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1, 676 __unused int arg2, struct sysctl_req *req) 677{ 678 int error = 0; 679 int new_value; 680 681 error = SYSCTL_OUT(req, arg1, sizeof(int)); 682 if (error || req->newptr == USER_ADDR_NULL) 683 return(error); 684 685 if (!is_suser()) 686 return(EPERM); 687 688 if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) { 689 goto out; 690 } 691 if ((new_value == KERN_TFP_POLICY_DENY) 692 || (new_value == KERN_TFP_POLICY_DEFAULT)) 693 tfp_policy = new_value; 694 else 695 error = EINVAL; 696out: 697 return(error); 698 699} 700 701#if defined(SECURE_KERNEL) 702static int kern_secure_kernel = 1; 703#else 704static int kern_secure_kernel = 0; 705#endif 706 707SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, ""); 708 709SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp"); 710SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW, 711 &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy"); 712 713SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW, 714 &shared_region_trace_level, 0, ""); 715SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD, 716 &shared_region_version, 0, ""); 717SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW, 718 &shared_region_persistence, 0, ""); 719 720/* 721 * shared_region_check_np: 722 * 723 * This system call is intended for dyld. 724 * 725 * dyld calls this when any process starts to see if the process's shared 726 * region is already set up and ready to use. 727 * This call returns the base address of the first mapping in the 728 * process's shared region's first mapping. 729 * dyld will then check what's mapped at that address. 730 * 731 * If the shared region is empty, dyld will then attempt to map the shared 732 * cache file in the shared region via the shared_region_map_np() system call. 733 * 734 * If something's already mapped in the shared region, dyld will check if it 735 * matches the shared cache it would like to use for that process. 736 * If it matches, evrything's ready and the process can proceed and use the 737 * shared region. 738 * If it doesn't match, dyld will unmap the shared region and map the shared 739 * cache into the process's address space via mmap(). 740 * 741 * ERROR VALUES 742 * EINVAL no shared region 743 * ENOMEM shared region is empty 744 * EFAULT bad address for "start_address" 745 */ 746int 747shared_region_check_np( 748 __unused struct proc *p, 749 struct shared_region_check_np_args *uap, 750 __unused int *retvalp) 751{ 752 vm_shared_region_t shared_region; 753 mach_vm_offset_t start_address; 754 int error; 755 kern_return_t kr; 756 757 SHARED_REGION_TRACE_DEBUG( 758 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n", 759 current_thread(), p->p_pid, p->p_comm, 760 (uint64_t)uap->start_address)); 761 762 /* retrieve the current tasks's shared region */ 763 shared_region = vm_shared_region_get(current_task()); 764 if (shared_region != NULL) { 765 /* retrieve address of its first mapping... */ 766 kr = vm_shared_region_start_address(shared_region, 767 &start_address); 768 if (kr != KERN_SUCCESS) { 769 error = ENOMEM; 770 } else { 771 /* ... and give it to the caller */ 772 error = copyout(&start_address, 773 (user_addr_t) uap->start_address, 774 sizeof (start_address)); 775 if (error) { 776 SHARED_REGION_TRACE_ERROR( 777 ("shared_region: %p [%d(%s)] " 778 "check_np(0x%llx) " 779 "copyout(0x%llx) error %d\n", 780 current_thread(), p->p_pid, p->p_comm, 781 (uint64_t)uap->start_address, (uint64_t)start_address, 782 error)); 783 } 784 } 785 vm_shared_region_deallocate(shared_region); 786 } else { 787 /* no shared region ! */ 788 error = EINVAL; 789 } 790 791 SHARED_REGION_TRACE_DEBUG( 792 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n", 793 current_thread(), p->p_pid, p->p_comm, 794 (uint64_t)uap->start_address, (uint64_t)start_address, error)); 795 796 return error; 797} 798 799/* 800 * shared_region_map_np() 801 * 802 * This system call is intended for dyld. 803 * 804 * dyld uses this to map a shared cache file into a shared region. 805 * This is usually done only the first time a shared cache is needed. 806 * Subsequent processes will just use the populated shared region without 807 * requiring any further setup. 808 */ 809int 810shared_region_map_np( 811 struct proc *p, 812 struct shared_region_map_np_args *uap, 813 __unused int *retvalp) 814{ 815 int error; 816 kern_return_t kr; 817 int fd; 818 struct fileproc *fp; 819 struct vnode *vp, *root_vp; 820 struct vnode_attr va; 821 off_t fs; 822 memory_object_size_t file_size; 823 user_addr_t user_mappings; 824 struct shared_file_mapping_np *mappings; 825#define SFM_MAX_STACK 8 826 struct shared_file_mapping_np stack_mappings[SFM_MAX_STACK]; 827 unsigned int mappings_count; 828 vm_size_t mappings_size; 829 memory_object_control_t file_control; 830 struct vm_shared_region *shared_region; 831 832 SHARED_REGION_TRACE_DEBUG( 833 ("shared_region: %p [%d(%s)] -> map\n", 834 current_thread(), p->p_pid, p->p_comm)); 835 836 shared_region = NULL; 837 mappings_count = 0; 838 mappings_size = 0; 839 mappings = NULL; 840 fp = NULL; 841 vp = NULL; 842 843 /* get file descriptor for shared region cache file */ 844 fd = uap->fd; 845 846 /* get file structure from file descriptor */ 847 error = fp_lookup(p, fd, &fp, 0); 848 if (error) { 849 SHARED_REGION_TRACE_ERROR( 850 ("shared_region: %p [%d(%s)] map: " 851 "fd=%d lookup failed (error=%d)\n", 852 current_thread(), p->p_pid, p->p_comm, fd, error)); 853 goto done; 854 } 855 856 /* make sure we're attempting to map a vnode */ 857 if (fp->f_fglob->fg_type != DTYPE_VNODE) { 858 SHARED_REGION_TRACE_ERROR( 859 ("shared_region: %p [%d(%s)] map: " 860 "fd=%d not a vnode (type=%d)\n", 861 current_thread(), p->p_pid, p->p_comm, 862 fd, fp->f_fglob->fg_type)); 863 error = EINVAL; 864 goto done; 865 } 866 867 /* we need at least read permission on the file */ 868 if (! (fp->f_fglob->fg_flag & FREAD)) { 869 SHARED_REGION_TRACE_ERROR( 870 ("shared_region: %p [%d(%s)] map: " 871 "fd=%d not readable\n", 872 current_thread(), p->p_pid, p->p_comm, fd)); 873 error = EPERM; 874 goto done; 875 } 876 877 /* get vnode from file structure */ 878 error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data); 879 if (error) { 880 SHARED_REGION_TRACE_ERROR( 881 ("shared_region: %p [%d(%s)] map: " 882 "fd=%d getwithref failed (error=%d)\n", 883 current_thread(), p->p_pid, p->p_comm, fd, error)); 884 goto done; 885 } 886 vp = (struct vnode *) fp->f_fglob->fg_data; 887 888 /* make sure the vnode is a regular file */ 889 if (vp->v_type != VREG) { 890 SHARED_REGION_TRACE_ERROR( 891 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 892 "not a file (type=%d)\n", 893 current_thread(), p->p_pid, p->p_comm, 894 vp, vp->v_name, vp->v_type)); 895 error = EINVAL; 896 goto done; 897 } 898 899 /* make sure vnode is on the process's root volume */ 900 root_vp = p->p_fd->fd_rdir; 901 if (root_vp == NULL) { 902 root_vp = rootvnode; 903 } 904 if (vp->v_mount != root_vp->v_mount) { 905 SHARED_REGION_TRACE_ERROR( 906 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 907 "not on process's root volume\n", 908 current_thread(), p->p_pid, p->p_comm, 909 vp, vp->v_name)); 910 error = EPERM; 911 goto done; 912 } 913 914 /* make sure vnode is owned by "root" */ 915 VATTR_INIT(&va); 916 VATTR_WANTED(&va, va_uid); 917 error = vnode_getattr(vp, &va, vfs_context_current()); 918 if (error) { 919 SHARED_REGION_TRACE_ERROR( 920 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 921 "vnode_getattr(%p) failed (error=%d)\n", 922 current_thread(), p->p_pid, p->p_comm, 923 vp, vp->v_name, vp, error)); 924 goto done; 925 } 926 if (va.va_uid != 0) { 927 SHARED_REGION_TRACE_ERROR( 928 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 929 "owned by uid=%d instead of 0\n", 930 current_thread(), p->p_pid, p->p_comm, 931 vp, vp->v_name, va.va_uid)); 932 error = EPERM; 933 goto done; 934 } 935 936 /* get vnode size */ 937 error = vnode_size(vp, &fs, vfs_context_current()); 938 if (error) { 939 SHARED_REGION_TRACE_ERROR( 940 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 941 "vnode_size(%p) failed (error=%d)\n", 942 current_thread(), p->p_pid, p->p_comm, 943 vp, vp->v_name, vp, error)); 944 goto done; 945 } 946 file_size = fs; 947 948 /* get the file's memory object handle */ 949 file_control = ubc_getobject(vp, UBC_HOLDOBJECT); 950 if (file_control == MEMORY_OBJECT_CONTROL_NULL) { 951 SHARED_REGION_TRACE_ERROR( 952 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 953 "no memory object\n", 954 current_thread(), p->p_pid, p->p_comm, 955 vp, vp->v_name)); 956 error = EINVAL; 957 goto done; 958 } 959 960 /* get the list of mappings the caller wants us to establish */ 961 mappings_count = uap->count; /* number of mappings */ 962 mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0])); 963 if (mappings_count == 0) { 964 SHARED_REGION_TRACE_INFO( 965 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 966 "no mappings\n", 967 current_thread(), p->p_pid, p->p_comm, 968 vp, vp->v_name)); 969 error = 0; /* no mappings: we're done ! */ 970 goto done; 971 } else if (mappings_count <= SFM_MAX_STACK) { 972 mappings = &stack_mappings[0]; 973 } else { 974 SHARED_REGION_TRACE_ERROR( 975 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 976 "too many mappings (%d)\n", 977 current_thread(), p->p_pid, p->p_comm, 978 vp, vp->v_name, mappings_count)); 979 error = EINVAL; 980 goto done; 981 } 982 983 user_mappings = uap->mappings; /* the mappings, in user space */ 984 error = copyin(user_mappings, 985 mappings, 986 mappings_size); 987 if (error) { 988 SHARED_REGION_TRACE_ERROR( 989 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 990 "copyin(0x%llx, %d) failed (error=%d)\n", 991 current_thread(), p->p_pid, p->p_comm, 992 vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error)); 993 goto done; 994 } 995 996 /* get the process's shared region (setup in vm_map_exec()) */ 997 shared_region = vm_shared_region_get(current_task()); 998 if (shared_region == NULL) { 999 SHARED_REGION_TRACE_ERROR( 1000 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1001 "no shared region\n", 1002 current_thread(), p->p_pid, p->p_comm, 1003 vp, vp->v_name)); 1004 goto done; 1005 } 1006 1007 /* map the file into that shared region's submap */ 1008 kr = vm_shared_region_map_file(shared_region, 1009 mappings_count, 1010 mappings, 1011 file_control, 1012 file_size, 1013 (void *) p->p_fd->fd_rdir); 1014 if (kr != KERN_SUCCESS) { 1015 SHARED_REGION_TRACE_ERROR( 1016 ("shared_region: %p [%d(%s)] map(%p:'%s'): " 1017 "vm_shared_region_map_file() failed kr=0x%x\n", 1018 current_thread(), p->p_pid, p->p_comm, 1019 vp, vp->v_name, kr)); 1020 switch (kr) { 1021 case KERN_INVALID_ADDRESS: 1022 error = EFAULT; 1023 break; 1024 case KERN_PROTECTION_FAILURE: 1025 error = EPERM; 1026 break; 1027 case KERN_NO_SPACE: 1028 error = ENOMEM; 1029 break; 1030 case KERN_FAILURE: 1031 case KERN_INVALID_ARGUMENT: 1032 default: 1033 error = EINVAL; 1034 break; 1035 } 1036 goto done; 1037 } 1038 1039 error = 0; 1040 1041 /* update the vnode's access time */ 1042 if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) { 1043 VATTR_INIT(&va); 1044 nanotime(&va.va_access_time); 1045 VATTR_SET_ACTIVE(&va, va_access_time); 1046 vnode_setattr(vp, &va, vfs_context_current()); 1047 } 1048 1049 if (p->p_flag & P_NOSHLIB) { 1050 /* signal that this process is now using split libraries */ 1051 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag); 1052 } 1053 1054done: 1055 if (vp != NULL) { 1056 /* 1057 * release the vnode... 1058 * ubc_map() still holds it for us in the non-error case 1059 */ 1060 (void) vnode_put(vp); 1061 vp = NULL; 1062 } 1063 if (fp != NULL) { 1064 /* release the file descriptor */ 1065 fp_drop(p, fd, fp, 0); 1066 fp = NULL; 1067 } 1068 1069 if (shared_region != NULL) { 1070 vm_shared_region_deallocate(shared_region); 1071 } 1072 1073 SHARED_REGION_TRACE_DEBUG( 1074 ("shared_region: %p [%d(%s)] <- map\n", 1075 current_thread(), p->p_pid, p->p_comm)); 1076 1077 return error; 1078} 1079 1080 1081/* sysctl overflow room */ 1082 1083/* vm_page_free_target is provided as a makeshift solution for applications that want to 1084 allocate buffer space, possibly purgeable memory, but not cause inactive pages to be 1085 reclaimed. It allows the app to calculate how much memory is free outside the free target. */ 1086extern unsigned int vm_page_free_target; 1087SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD, 1088 &vm_page_free_target, 0, "Pageout daemon free target"); 1089 1090