1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce 30 * support for mandatory and extensible security protections. This notice 31 * is included in support of clause 2.2 (b) of the Apple Public License, 32 * Version 2.0. 33 */ 34 35#include <sys/param.h> 36#include <sys/systm.h> 37#include <sys/lock.h> 38#include <sys/proc_internal.h> 39#include <sys/kauth.h> 40#include <sys/buf.h> 41#include <sys/uio.h> 42#include <sys/vnode_internal.h> 43#include <sys/namei.h> 44#include <sys/ubc_internal.h> 45#include <sys/malloc.h> 46#include <sys/user.h> 47#if CONFIG_PROTECT 48#include <sys/cprotect.h> 49#endif 50 51#include <default_pager/default_pager_types.h> 52#include <default_pager/default_pager_object.h> 53 54#include <security/audit/audit.h> 55#include <bsm/audit_kevents.h> 56 57#include <mach/mach_types.h> 58#include <mach/host_priv.h> 59#include <mach/mach_traps.h> 60#include <mach/boolean.h> 61 62#include <kern/kern_types.h> 63#include <kern/locks.h> 64#include <kern/host.h> 65#include <kern/task.h> 66#include <kern/zalloc.h> 67#include <kern/kalloc.h> 68#include <kern/assert.h> 69 70#include <libkern/libkern.h> 71 72#include <vm/vm_pageout.h> 73#include <vm/vm_map.h> 74#include <vm/vm_kern.h> 75#include <vm/vnode_pager.h> 76#include <vm/vm_protos.h> 77#if CONFIG_MACF 78#include <security/mac_framework.h> 79#endif 80 81#include <pexpert/pexpert.h> 82 83void macx_init(void); 84 85static lck_grp_t *macx_lock_group; 86static lck_mtx_t *macx_lock; 87 88/* 89 * temporary support for delayed instantiation 90 * of default_pager 91 */ 92int default_pager_init_flag = 0; 93 94struct bs_map bs_port_table[MAX_BACKING_STORE] = { 95 {0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 96 {0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 97 {0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 98 {0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 99 {0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 100 {0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 101 {0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 102 {0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 103 {0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}, 104 {0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}; 105 106/* ###################################################### */ 107 108/* 109 * Routine: macx_init 110 * Function: 111 * Initialize locks so that only one caller can change 112 * state at a time. 113 */ 114void 115macx_init(void) 116{ 117 macx_lock_group = lck_grp_alloc_init("macx", NULL); 118 macx_lock = lck_mtx_alloc_init(macx_lock_group, NULL); 119} 120 121/* 122 * Routine: macx_backing_store_recovery 123 * Function: 124 * Syscall interface to set a tasks privilege 125 * level so that it is not subject to 126 * macx_backing_store_suspend 127 */ 128int 129macx_backing_store_recovery( 130 struct macx_backing_store_recovery_args *args) 131{ 132 int pid = args->pid; 133 int error; 134 struct proc *p = current_proc(); 135 136 if ((error = suser(kauth_cred_get(), 0))) 137 goto backing_store_recovery_return; 138 139 /* for now restrict backing_store_recovery */ 140 /* usage to only present task */ 141 if(pid != proc_selfpid()) { 142 error = EINVAL; 143 goto backing_store_recovery_return; 144 } 145 146 task_backing_store_privileged(p->task); 147 148backing_store_recovery_return: 149 return(error); 150} 151 152/* 153 * Routine: macx_backing_store_suspend 154 * Function: 155 * Syscall interface to stop new demand for 156 * backing store when backing store is low 157 */ 158 159int 160macx_backing_store_suspend( 161 struct macx_backing_store_suspend_args *args) 162{ 163 boolean_t suspend = args->suspend; 164 int error; 165 166 lck_mtx_lock(macx_lock); 167 if ((error = suser(kauth_cred_get(), 0))) 168 goto backing_store_suspend_return; 169 170 /* Multiple writers protected by macx_lock */ 171 vm_backing_store_disable(suspend); 172 173backing_store_suspend_return: 174 lck_mtx_unlock(macx_lock); 175 return(error); 176} 177 178extern boolean_t backing_store_stop_compaction; 179extern boolean_t compressor_store_stop_compaction; 180 181/* 182 * Routine: macx_backing_store_compaction 183 * Function: 184 * Turn compaction of swap space on or off. This is 185 * used during shutdown/restart so that the kernel 186 * doesn't waste time compacting swap files that are 187 * about to be deleted anyway. Compaction is always 188 * on by default when the system comes up and is turned 189 * off when a shutdown/restart is requested. It is 190 * re-enabled if the shutdown/restart is aborted for any reason. 191 * 192 * This routine assumes macx_lock has been locked by macx_triggers -> 193 * mach_macx_triggers -> macx_backing_store_compaction 194 */ 195 196int 197macx_backing_store_compaction(int flags) 198{ 199 int error; 200 201 lck_mtx_assert(macx_lock, LCK_MTX_ASSERT_OWNED); 202 if ((error = suser(kauth_cred_get(), 0))) 203 return error; 204 205 if (flags & SWAP_COMPACT_DISABLE) { 206 backing_store_stop_compaction = TRUE; 207 compressor_store_stop_compaction = TRUE; 208 209 kprintf("backing_store_stop_compaction = TRUE\n"); 210 211 } else if (flags & SWAP_COMPACT_ENABLE) { 212 backing_store_stop_compaction = FALSE; 213 compressor_store_stop_compaction = FALSE; 214 215 kprintf("backing_store_stop_compaction = FALSE\n"); 216 } 217 218 return 0; 219} 220 221/* 222 * Routine: macx_triggers 223 * Function: 224 * Syscall interface to set the call backs for low and 225 * high water marks. 226 */ 227int 228macx_triggers( 229 struct macx_triggers_args *args) 230{ 231 int error; 232 233 lck_mtx_lock(macx_lock); 234 error = suser(kauth_cred_get(), 0); 235 if (error) 236 return error; 237 238 error = mach_macx_triggers(args); 239 240 lck_mtx_unlock(macx_lock); 241 return error; 242} 243 244 245extern boolean_t dp_isssd; 246 247/* 248 * In the compressed pager world, the swapfiles are created by the kernel. 249 * Well, all except the first one. That swapfile is absorbed by the kernel at 250 * the end of the macx_swapon function (if swap is enabled). That's why 251 * we allow the first invocation of macx_swapon to succeed. 252 * 253 * If the compressor pool is running low, the kernel messages the dynamic pager 254 * on the port it has registered with the kernel. That port can transport 1 of 2 255 * pieces of information to dynamic pager: create a swapfile or delete a swapfile. 256 * 257 * We choose to transmit the former. So, that message tells dynamic pager 258 * to create a swapfile and activate it by calling macx_swapon. 259 * 260 * We deny this new macx_swapon request. That leads dynamic pager to interpret the 261 * failure as a serious error and notify all it's clients that swap is running low. 262 * That's how we get the loginwindow "Resume / Force Quit Applications" dialog to appear. 263 * 264 * NOTE: 265 * If the kernel has already created multiple swapfiles by the time the compressor 266 * pool is running low (and it has to play this trick), dynamic pager won't be able to 267 * create a file in user-space and, that too will lead to a similar notification blast 268 * to all of it's clients. So, that behaves as desired too. 269 */ 270boolean_t macx_swapon_allowed = TRUE; 271 272/* 273 * Routine: macx_swapon 274 * Function: 275 * Syscall interface to add a file to backing store 276 */ 277int 278macx_swapon( 279 struct macx_swapon_args *args) 280{ 281 int size = args->size; 282 vnode_t vp = (vnode_t)NULL; 283 struct nameidata nd, *ndp; 284 register int error; 285 kern_return_t kr; 286 mach_port_t backing_store; 287 memory_object_default_t default_pager; 288 int i; 289 off_t file_size; 290 vfs_context_t ctx = vfs_context_current(); 291 struct proc *p = current_proc(); 292 int dp_cluster_size; 293 294 AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPON); 295 AUDIT_ARG(value32, args->priority); 296 297 lck_mtx_lock(macx_lock); 298 299 if (COMPRESSED_PAGER_IS_ACTIVE) { 300 if (macx_swapon_allowed == FALSE) { 301 error = EINVAL; 302 goto swapon_bailout; 303 } else { 304 macx_swapon_allowed = FALSE; 305 error = 0; 306 goto swapon_bailout; 307 } 308 } 309 310 ndp = &nd; 311 312 if ((error = suser(kauth_cred_get(), 0))) 313 goto swapon_bailout; 314 315 /* 316 * Get a vnode for the paging area. 317 */ 318 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 319 ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32), 320 (user_addr_t) args->filename, ctx); 321 322 if ((error = namei(ndp))) 323 goto swapon_bailout; 324 nameidone(ndp); 325 vp = ndp->ni_vp; 326 327 if (vp->v_type != VREG) { 328 error = EINVAL; 329 goto swapon_bailout; 330 } 331 332 /* get file size */ 333 if ((error = vnode_size(vp, &file_size, ctx)) != 0) 334 goto swapon_bailout; 335#if CONFIG_MACF 336 vnode_lock(vp); 337 error = mac_system_check_swapon(vfs_context_ucred(ctx), vp); 338 vnode_unlock(vp); 339 if (error) 340 goto swapon_bailout; 341#endif 342 343 /* resize to desired size if it's too small */ 344 if ((file_size < (off_t)size) && ((error = vnode_setsize(vp, (off_t)size, 0, ctx)) != 0)) 345 goto swapon_bailout; 346 347#if CONFIG_PROTECT 348 { 349 /* initialize content protection keys manually */ 350 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) { 351 goto swapon_bailout; 352 } 353 } 354#endif 355 356 357 if (default_pager_init_flag == 0) { 358 start_def_pager(NULL); 359 default_pager_init_flag = 1; 360 } 361 362 /* add new backing store to list */ 363 i = 0; 364 while(bs_port_table[i].vp != 0) { 365 if(i == MAX_BACKING_STORE) 366 break; 367 i++; 368 } 369 if(i == MAX_BACKING_STORE) { 370 error = ENOMEM; 371 goto swapon_bailout; 372 } 373 374 /* remember the vnode. This vnode has namei() reference */ 375 bs_port_table[i].vp = vp; 376 377 /* 378 * Look to see if we are already paging to this file. 379 */ 380 /* make certain the copy send of kernel call will work */ 381 default_pager = MEMORY_OBJECT_DEFAULT_NULL; 382 kr = host_default_memory_manager(host_priv_self(), &default_pager, 0); 383 if(kr != KERN_SUCCESS) { 384 error = EAGAIN; 385 bs_port_table[i].vp = 0; 386 goto swapon_bailout; 387 } 388 389 if ((dp_isssd = vnode_pager_isSSD(vp)) == TRUE) { 390 /* 391 * keep the cluster size small since the 392 * seek cost is effectively 0 which means 393 * we don't care much about fragmentation 394 */ 395 dp_cluster_size = 2 * PAGE_SIZE; 396 } else { 397 /* 398 * use the default cluster size 399 */ 400 dp_cluster_size = 0; 401 } 402 kr = default_pager_backing_store_create(default_pager, 403 -1, /* default priority */ 404 dp_cluster_size, 405 &backing_store); 406 memory_object_default_deallocate(default_pager); 407 408 if(kr != KERN_SUCCESS) { 409 error = ENOMEM; 410 bs_port_table[i].vp = 0; 411 goto swapon_bailout; 412 } 413 414 /* Mark this vnode as being used for swapfile */ 415 vnode_lock_spin(vp); 416 SET(vp->v_flag, VSWAP); 417 vnode_unlock(vp); 418 419 /* 420 * NOTE: we are able to supply PAGE_SIZE here instead of 421 * an actual record size or block number because: 422 * a: we do not support offsets from the beginning of the 423 * file (allowing for non page size/record modulo offsets. 424 * b: because allow paging will be done modulo page size 425 */ 426 427 kr = default_pager_add_file(backing_store, (vnode_ptr_t) vp, 428 PAGE_SIZE, (int)(file_size/PAGE_SIZE)); 429 if(kr != KERN_SUCCESS) { 430 bs_port_table[i].vp = 0; 431 if(kr == KERN_INVALID_ARGUMENT) 432 error = EINVAL; 433 else 434 error = ENOMEM; 435 436 /* This vnode is not to be used for swapfile */ 437 vnode_lock_spin(vp); 438 CLR(vp->v_flag, VSWAP); 439 vnode_unlock(vp); 440 441 goto swapon_bailout; 442 } 443 bs_port_table[i].bs = (void *)backing_store; 444 error = 0; 445 446 ubc_setthreadcred(vp, p, current_thread()); 447 448 /* 449 * take a long term reference on the vnode to keep 450 * vnreclaim() away from this vnode. 451 */ 452 vnode_ref(vp); 453 454swapon_bailout: 455 if (vp) { 456 vnode_put(vp); 457 } 458 lck_mtx_unlock(macx_lock); 459 AUDIT_MACH_SYSCALL_EXIT(error); 460 461 if (error) 462 printf("macx_swapon FAILED - %d\n", error); 463 else 464 printf("macx_swapon SUCCESS\n"); 465 466 return(error); 467} 468 469/* 470 * Routine: macx_swapoff 471 * Function: 472 * Syscall interface to remove a file from backing store 473 */ 474int 475macx_swapoff( 476 struct macx_swapoff_args *args) 477{ 478 __unused int flags = args->flags; 479 kern_return_t kr; 480 mach_port_t backing_store; 481 482 struct vnode *vp = 0; 483 struct nameidata nd, *ndp; 484 struct proc *p = current_proc(); 485 int i; 486 int error; 487 vfs_context_t ctx = vfs_context_current(); 488 int orig_iopol_disk; 489 490 AUDIT_MACH_SYSCALL_ENTER(AUE_SWAPOFF); 491 492 lck_mtx_lock(macx_lock); 493 494 backing_store = NULL; 495 ndp = &nd; 496 497 if ((error = suser(kauth_cred_get(), 0))) 498 goto swapoff_bailout; 499 500 /* 501 * Get the vnode for the paging area. 502 */ 503 NDINIT(ndp, LOOKUP, OP_LOOKUP, FOLLOW | LOCKLEAF | AUDITVNPATH1, 504 ((IS_64BIT_PROCESS(p)) ? UIO_USERSPACE64 : UIO_USERSPACE32), 505 (user_addr_t) args->filename, ctx); 506 507 if ((error = namei(ndp))) 508 goto swapoff_bailout; 509 nameidone(ndp); 510 vp = ndp->ni_vp; 511 512 if (vp->v_type != VREG) { 513 error = EINVAL; 514 goto swapoff_bailout; 515 } 516#if CONFIG_MACF 517 vnode_lock(vp); 518 error = mac_system_check_swapoff(vfs_context_ucred(ctx), vp); 519 vnode_unlock(vp); 520 if (error) 521 goto swapoff_bailout; 522#endif 523 524 for(i = 0; i < MAX_BACKING_STORE; i++) { 525 if(bs_port_table[i].vp == vp) { 526 break; 527 } 528 } 529 if (i == MAX_BACKING_STORE) { 530 error = EINVAL; 531 goto swapoff_bailout; 532 } 533 backing_store = (mach_port_t)bs_port_table[i].bs; 534 535 orig_iopol_disk = proc_get_task_policy(current_task(), current_thread(), 536 TASK_POLICY_INTERNAL, TASK_POLICY_IOPOL); 537 538 proc_set_task_policy(current_task(), current_thread(), TASK_POLICY_INTERNAL, 539 TASK_POLICY_IOPOL, IOPOL_THROTTLE); 540 541 kr = default_pager_backing_store_delete(backing_store); 542 543 proc_set_task_policy(current_task(), current_thread(), TASK_POLICY_INTERNAL, 544 TASK_POLICY_IOPOL, orig_iopol_disk); 545 546 switch (kr) { 547 case KERN_SUCCESS: 548 error = 0; 549 bs_port_table[i].vp = 0; 550 /* This vnode is no longer used for swapfile */ 551 vnode_lock_spin(vp); 552 CLR(vp->v_flag, VSWAP); 553 vnode_unlock(vp); 554 555 /* get rid of macx_swapon() "long term" reference */ 556 vnode_rele(vp); 557 558 break; 559 case KERN_FAILURE: 560 error = EAGAIN; 561 break; 562 default: 563 error = EAGAIN; 564 break; 565 } 566 567swapoff_bailout: 568 /* get rid of macx_swapoff() namei() reference */ 569 if (vp) 570 vnode_put(vp); 571 lck_mtx_unlock(macx_lock); 572 AUDIT_MACH_SYSCALL_EXIT(error); 573 574 if (error) 575 printf("macx_swapoff FAILED - %d\n", error); 576 else 577 printf("macx_swapoff SUCCESS\n"); 578 579 return(error); 580} 581 582/* 583 * Routine: macx_swapinfo 584 * Function: 585 * Syscall interface to get general swap statistics 586 */ 587extern uint64_t vm_swap_get_total_space(void); 588extern uint64_t vm_swap_get_used_space(void); 589extern uint64_t vm_swap_get_free_space(void); 590extern boolean_t vm_swap_up; 591 592int 593macx_swapinfo( 594 memory_object_size_t *total_p, 595 memory_object_size_t *avail_p, 596 vm_size_t *pagesize_p, 597 boolean_t *encrypted_p) 598{ 599 int error; 600 memory_object_default_t default_pager; 601 default_pager_info_64_t dpi64; 602 kern_return_t kr; 603 604 error = 0; 605 if (COMPRESSED_PAGER_IS_ACTIVE) { 606 607 if (vm_swap_up == TRUE) { 608 609 *total_p = vm_swap_get_total_space(); 610 *avail_p = vm_swap_get_free_space(); 611 *pagesize_p = (vm_size_t)PAGE_SIZE_64; 612 *encrypted_p = TRUE; 613 614 } else { 615 616 *total_p = 0; 617 *avail_p = 0; 618 *pagesize_p = 0; 619 *encrypted_p = FALSE; 620 } 621 } else { 622 623 /* 624 * Get a handle on the default pager. 625 */ 626 default_pager = MEMORY_OBJECT_DEFAULT_NULL; 627 kr = host_default_memory_manager(host_priv_self(), &default_pager, 0); 628 if (kr != KERN_SUCCESS) { 629 error = EAGAIN; /* XXX why EAGAIN ? */ 630 goto done; 631 } 632 if (default_pager == MEMORY_OBJECT_DEFAULT_NULL) { 633 /* 634 * The default pager has not initialized yet, 635 * so it can't be using any swap space at all. 636 */ 637 *total_p = 0; 638 *avail_p = 0; 639 *pagesize_p = 0; 640 *encrypted_p = FALSE; 641 goto done; 642 } 643 644 /* 645 * Get swap usage data from default pager. 646 */ 647 kr = default_pager_info_64(default_pager, &dpi64); 648 if (kr != KERN_SUCCESS) { 649 error = ENOTSUP; 650 goto done; 651 } 652 653 /* 654 * Provide default pager info to caller. 655 */ 656 *total_p = dpi64.dpi_total_space; 657 *avail_p = dpi64.dpi_free_space; 658 *pagesize_p = dpi64.dpi_page_size; 659 if (dpi64.dpi_flags & DPI_ENCRYPTED) { 660 *encrypted_p = TRUE; 661 } else { 662 *encrypted_p = FALSE; 663 } 664 665done: 666 if (default_pager != MEMORY_OBJECT_DEFAULT_NULL) { 667 /* release our handle on default pager */ 668 memory_object_default_deallocate(default_pager); 669 } 670 } 671 return error; 672} 673