1/* 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm_fault.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * 62 * Page fault handling module. 63 */ 64 65#include <mach_cluster_stats.h> 66#include <mach_pagemap.h> 67#include <libkern/OSAtomic.h> 68 69#include <mach/mach_types.h> 70#include <mach/kern_return.h> 71#include <mach/message.h> /* for error codes */ 72#include <mach/vm_param.h> 73#include <mach/vm_behavior.h> 74#include <mach/memory_object.h> 75 /* For memory_object_data_{request,unlock} */ 76#include <mach/sdt.h> 77 78#include <kern/kern_types.h> 79#include <kern/host_statistics.h> 80#include <kern/counters.h> 81#include <kern/task.h> 82#include <kern/thread.h> 83#include <kern/sched_prim.h> 84#include <kern/host.h> 85#include <kern/xpr.h> 86#include <kern/mach_param.h> 87#include <kern/macro_help.h> 88#include <kern/zalloc.h> 89#include <kern/misc_protos.h> 90 91#include <vm/vm_compressor.h> 92#include <vm/vm_compressor_pager.h> 93#include <vm/vm_fault.h> 94#include <vm/vm_map.h> 95#include <vm/vm_object.h> 96#include <vm/vm_page.h> 97#include <vm/vm_kern.h> 98#include <vm/pmap.h> 99#include <vm/vm_pageout.h> 100#include <vm/vm_protos.h> 101#include <vm/vm_external.h> 102#include <vm/memory_object.h> 103#include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */ 104#include <vm/vm_shared_region.h> 105 106#include <sys/codesign.h> 107 108#include <libsa/sys/timers.h> /* for struct timespec */ 109 110#define VM_FAULT_CLASSIFY 0 111 112#define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */ 113 114unsigned int vm_object_pagein_throttle = 16; 115 116/* 117 * We apply a hard throttle to the demand zero rate of tasks that we believe are running out of control which 118 * kicks in when swap space runs out. 64-bit programs have massive address spaces and can leak enormous amounts 119 * of memory if they're buggy and can run the system completely out of swap space. If this happens, we 120 * impose a hard throttle on them to prevent them from taking the last bit of memory left. This helps 121 * keep the UI active so that the user has a chance to kill the offending task before the system 122 * completely hangs. 123 * 124 * The hard throttle is only applied when the system is nearly completely out of swap space and is only applied 125 * to tasks that appear to be bloated. When swap runs out, any task using more than vm_hard_throttle_threshold 126 * will be throttled. The throttling is done by giving the thread that's trying to demand zero a page a 127 * delay of HARD_THROTTLE_DELAY microseconds before being allowed to try the page fault again. 128 */ 129 130extern void throttle_lowpri_io(int); 131 132uint64_t vm_hard_throttle_threshold; 133 134 135 136#define NEED_TO_HARD_THROTTLE_THIS_TASK() (vm_wants_task_throttled(current_task()) || \ 137 (vm_page_free_count < vm_page_throttle_limit && \ 138 proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO) >= THROTTLE_LEVEL_THROTTLED)) 139 140 141#define HARD_THROTTLE_DELAY 20000 /* 20000 us == 20 ms */ 142#define SOFT_THROTTLE_DELAY 2000 /* 2000 us == 2 ms */ 143 144boolean_t current_thread_aborted(void); 145 146/* Forward declarations of internal routines. */ 147extern kern_return_t vm_fault_wire_fast( 148 vm_map_t map, 149 vm_map_offset_t va, 150 vm_map_entry_t entry, 151 pmap_t pmap, 152 vm_map_offset_t pmap_addr, 153 ppnum_t *physpage_p); 154 155extern void vm_fault_continue(void); 156 157extern void vm_fault_copy_cleanup( 158 vm_page_t page, 159 vm_page_t top_page); 160 161extern void vm_fault_copy_dst_cleanup( 162 vm_page_t page); 163 164#if VM_FAULT_CLASSIFY 165extern void vm_fault_classify(vm_object_t object, 166 vm_object_offset_t offset, 167 vm_prot_t fault_type); 168 169extern void vm_fault_classify_init(void); 170#endif 171 172unsigned long vm_pmap_enter_blocked = 0; 173unsigned long vm_pmap_enter_retried = 0; 174 175unsigned long vm_cs_validates = 0; 176unsigned long vm_cs_revalidates = 0; 177unsigned long vm_cs_query_modified = 0; 178unsigned long vm_cs_validated_dirtied = 0; 179unsigned long vm_cs_bitmap_validated = 0; 180 181void vm_pre_fault(vm_map_offset_t); 182 183/* 184 * Routine: vm_fault_init 185 * Purpose: 186 * Initialize our private data structures. 187 */ 188void 189vm_fault_init(void) 190{ 191 int i, vm_compressor_temp; 192 boolean_t need_default_val = TRUE; 193 /* 194 * Choose a value for the hard throttle threshold based on the amount of ram. The threshold is 195 * computed as a percentage of available memory, and the percentage used is scaled inversely with 196 * the amount of memory. The percentage runs between 10% and 35%. We use 35% for small memory systems 197 * and reduce the value down to 10% for very large memory configurations. This helps give us a 198 * definition of a memory hog that makes more sense relative to the amount of ram in the machine. 199 * The formula here simply uses the number of gigabytes of ram to adjust the percentage. 200 */ 201 202 vm_hard_throttle_threshold = sane_size * (35 - MIN((int)(sane_size / (1024*1024*1024)), 25)) / 100; 203 204 /* 205 * Configure compressed pager behavior. A boot arg takes precedence over a device tree entry. 206 */ 207 208 if (PE_parse_boot_argn("vm_compressor", &vm_compressor_temp, sizeof (vm_compressor_temp))) { 209 for ( i = 0; i < VM_PAGER_MAX_MODES; i++) { 210 if (vm_compressor_temp > 0 && 211 ((vm_compressor_temp & ( 1 << i)) == vm_compressor_temp)) { 212 need_default_val = FALSE; 213 vm_compressor_mode = vm_compressor_temp; 214 break; 215 } 216 } 217 if (need_default_val) 218 printf("Ignoring \"vm_compressor\" boot arg %d\n", vm_compressor_temp); 219 } 220 if (need_default_val) { 221 /* If no boot arg or incorrect boot arg, try device tree. */ 222 PE_get_default("kern.vm_compressor", &vm_compressor_mode, sizeof(vm_compressor_mode)); 223 } 224 PE_parse_boot_argn("vm_compressor_threads", &vm_compressor_thread_count, sizeof (vm_compressor_thread_count)); 225 printf("\"vm_compressor_mode\" is %d\n", vm_compressor_mode); 226} 227 228/* 229 * Routine: vm_fault_cleanup 230 * Purpose: 231 * Clean up the result of vm_fault_page. 232 * Results: 233 * The paging reference for "object" is released. 234 * "object" is unlocked. 235 * If "top_page" is not null, "top_page" is 236 * freed and the paging reference for the object 237 * containing it is released. 238 * 239 * In/out conditions: 240 * "object" must be locked. 241 */ 242void 243vm_fault_cleanup( 244 register vm_object_t object, 245 register vm_page_t top_page) 246{ 247 vm_object_paging_end(object); 248 vm_object_unlock(object); 249 250 if (top_page != VM_PAGE_NULL) { 251 object = top_page->object; 252 253 vm_object_lock(object); 254 VM_PAGE_FREE(top_page); 255 vm_object_paging_end(object); 256 vm_object_unlock(object); 257 } 258} 259 260#if MACH_CLUSTER_STATS 261#define MAXCLUSTERPAGES 16 262struct { 263 unsigned long pages_in_cluster; 264 unsigned long pages_at_higher_offsets; 265 unsigned long pages_at_lower_offsets; 266} cluster_stats_in[MAXCLUSTERPAGES]; 267#define CLUSTER_STAT(clause) clause 268#define CLUSTER_STAT_HIGHER(x) \ 269 ((cluster_stats_in[(x)].pages_at_higher_offsets)++) 270#define CLUSTER_STAT_LOWER(x) \ 271 ((cluster_stats_in[(x)].pages_at_lower_offsets)++) 272#define CLUSTER_STAT_CLUSTER(x) \ 273 ((cluster_stats_in[(x)].pages_in_cluster)++) 274#else /* MACH_CLUSTER_STATS */ 275#define CLUSTER_STAT(clause) 276#endif /* MACH_CLUSTER_STATS */ 277 278#define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0) 279 280 281boolean_t vm_page_deactivate_behind = TRUE; 282/* 283 * default sizes given VM_BEHAVIOR_DEFAULT reference behavior 284 */ 285#define VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW 128 286#define VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER 16 /* don't make this too big... */ 287 /* we use it to size an array on the stack */ 288 289int vm_default_behind = VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW; 290 291#define MAX_SEQUENTIAL_RUN (1024 * 1024 * 1024) 292 293/* 294 * vm_page_is_sequential 295 * 296 * Determine if sequential access is in progress 297 * in accordance with the behavior specified. 298 * Update state to indicate current access pattern. 299 * 300 * object must have at least the shared lock held 301 */ 302static 303void 304vm_fault_is_sequential( 305 vm_object_t object, 306 vm_object_offset_t offset, 307 vm_behavior_t behavior) 308{ 309 vm_object_offset_t last_alloc; 310 int sequential; 311 int orig_sequential; 312 313 last_alloc = object->last_alloc; 314 sequential = object->sequential; 315 orig_sequential = sequential; 316 317 switch (behavior) { 318 case VM_BEHAVIOR_RANDOM: 319 /* 320 * reset indicator of sequential behavior 321 */ 322 sequential = 0; 323 break; 324 325 case VM_BEHAVIOR_SEQUENTIAL: 326 if (offset && last_alloc == offset - PAGE_SIZE_64) { 327 /* 328 * advance indicator of sequential behavior 329 */ 330 if (sequential < MAX_SEQUENTIAL_RUN) 331 sequential += PAGE_SIZE; 332 } else { 333 /* 334 * reset indicator of sequential behavior 335 */ 336 sequential = 0; 337 } 338 break; 339 340 case VM_BEHAVIOR_RSEQNTL: 341 if (last_alloc && last_alloc == offset + PAGE_SIZE_64) { 342 /* 343 * advance indicator of sequential behavior 344 */ 345 if (sequential > -MAX_SEQUENTIAL_RUN) 346 sequential -= PAGE_SIZE; 347 } else { 348 /* 349 * reset indicator of sequential behavior 350 */ 351 sequential = 0; 352 } 353 break; 354 355 case VM_BEHAVIOR_DEFAULT: 356 default: 357 if (offset && last_alloc == (offset - PAGE_SIZE_64)) { 358 /* 359 * advance indicator of sequential behavior 360 */ 361 if (sequential < 0) 362 sequential = 0; 363 if (sequential < MAX_SEQUENTIAL_RUN) 364 sequential += PAGE_SIZE; 365 366 } else if (last_alloc && last_alloc == (offset + PAGE_SIZE_64)) { 367 /* 368 * advance indicator of sequential behavior 369 */ 370 if (sequential > 0) 371 sequential = 0; 372 if (sequential > -MAX_SEQUENTIAL_RUN) 373 sequential -= PAGE_SIZE; 374 } else { 375 /* 376 * reset indicator of sequential behavior 377 */ 378 sequential = 0; 379 } 380 break; 381 } 382 if (sequential != orig_sequential) { 383 if (!OSCompareAndSwap(orig_sequential, sequential, (UInt32 *)&object->sequential)) { 384 /* 385 * if someone else has already updated object->sequential 386 * don't bother trying to update it or object->last_alloc 387 */ 388 return; 389 } 390 } 391 /* 392 * I'd like to do this with a OSCompareAndSwap64, but that 393 * doesn't exist for PPC... however, it shouldn't matter 394 * that much... last_alloc is maintained so that we can determine 395 * if a sequential access pattern is taking place... if only 396 * one thread is banging on this object, no problem with the unprotected 397 * update... if 2 or more threads are banging away, we run the risk of 398 * someone seeing a mangled update... however, in the face of multiple 399 * accesses, no sequential access pattern can develop anyway, so we 400 * haven't lost any real info. 401 */ 402 object->last_alloc = offset; 403} 404 405 406int vm_page_deactivate_behind_count = 0; 407 408/* 409 * vm_page_deactivate_behind 410 * 411 * Determine if sequential access is in progress 412 * in accordance with the behavior specified. If 413 * so, compute a potential page to deactivate and 414 * deactivate it. 415 * 416 * object must be locked. 417 * 418 * return TRUE if we actually deactivate a page 419 */ 420static 421boolean_t 422vm_fault_deactivate_behind( 423 vm_object_t object, 424 vm_object_offset_t offset, 425 vm_behavior_t behavior) 426{ 427 int n; 428 int pages_in_run = 0; 429 int max_pages_in_run = 0; 430 int sequential_run; 431 int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; 432 vm_object_offset_t run_offset = 0; 433 vm_object_offset_t pg_offset = 0; 434 vm_page_t m; 435 vm_page_t page_run[VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER]; 436 437 pages_in_run = 0; 438#if TRACEFAULTPAGE 439 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */ 440#endif 441 442 if (object == kernel_object || vm_page_deactivate_behind == FALSE) { 443 /* 444 * Do not deactivate pages from the kernel object: they 445 * are not intended to become pageable. 446 * or we've disabled the deactivate behind mechanism 447 */ 448 return FALSE; 449 } 450 if ((sequential_run = object->sequential)) { 451 if (sequential_run < 0) { 452 sequential_behavior = VM_BEHAVIOR_RSEQNTL; 453 sequential_run = 0 - sequential_run; 454 } else { 455 sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; 456 } 457 } 458 switch (behavior) { 459 case VM_BEHAVIOR_RANDOM: 460 break; 461 case VM_BEHAVIOR_SEQUENTIAL: 462 if (sequential_run >= (int)PAGE_SIZE) { 463 run_offset = 0 - PAGE_SIZE_64; 464 max_pages_in_run = 1; 465 } 466 break; 467 case VM_BEHAVIOR_RSEQNTL: 468 if (sequential_run >= (int)PAGE_SIZE) { 469 run_offset = PAGE_SIZE_64; 470 max_pages_in_run = 1; 471 } 472 break; 473 case VM_BEHAVIOR_DEFAULT: 474 default: 475 { vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64; 476 477 /* 478 * determine if the run of sequential accesss has been 479 * long enough on an object with default access behavior 480 * to consider it for deactivation 481 */ 482 if ((uint64_t)sequential_run >= behind && (sequential_run % (VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER * PAGE_SIZE)) == 0) { 483 /* 484 * the comparisons between offset and behind are done 485 * in this kind of odd fashion in order to prevent wrap around 486 * at the end points 487 */ 488 if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) { 489 if (offset >= behind) { 490 run_offset = 0 - behind; 491 pg_offset = PAGE_SIZE_64; 492 max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER; 493 } 494 } else { 495 if (offset < -behind) { 496 run_offset = behind; 497 pg_offset = 0 - PAGE_SIZE_64; 498 max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER; 499 } 500 } 501 } 502 break; 503 } 504 } 505 for (n = 0; n < max_pages_in_run; n++) { 506 m = vm_page_lookup(object, offset + run_offset + (n * pg_offset)); 507 508 if (m && !m->laundry && !m->busy && !m->no_cache && !m->throttled && !m->fictitious && !m->absent) { 509 page_run[pages_in_run++] = m; 510 511 /* 512 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise... 513 * 514 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being 515 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the 516 * new reference happens. If no futher references happen on the page after that remote TLB flushes 517 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue 518 * by pageout_scan, which is just fine since the last reference would have happened quite far 519 * in the past (TLB caches don't hang around for very long), and of course could just as easily 520 * have happened before we did the deactivate_behind. 521 */ 522 pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL); 523 } 524 } 525 if (pages_in_run) { 526 vm_page_lockspin_queues(); 527 528 for (n = 0; n < pages_in_run; n++) { 529 530 m = page_run[n]; 531 532 vm_page_deactivate_internal(m, FALSE); 533 534 vm_page_deactivate_behind_count++; 535#if TRACEFAULTPAGE 536 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */ 537#endif 538 } 539 vm_page_unlock_queues(); 540 541 return TRUE; 542 } 543 return FALSE; 544} 545 546 547static int 548vm_page_throttled(void) 549{ 550 clock_sec_t elapsed_sec; 551 clock_sec_t tv_sec; 552 clock_usec_t tv_usec; 553 554 thread_t thread = current_thread(); 555 556 if (thread->options & TH_OPT_VMPRIV) 557 return (0); 558 559 thread->t_page_creation_count++; 560 561 if (NEED_TO_HARD_THROTTLE_THIS_TASK()) 562 return (HARD_THROTTLE_DELAY); 563 564 if ((vm_page_free_count < vm_page_throttle_limit || ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && SWAPPER_NEEDS_TO_UNTHROTTLE())) && 565 thread->t_page_creation_count > vm_page_creation_throttle) { 566 567 clock_get_system_microtime(&tv_sec, &tv_usec); 568 569 elapsed_sec = tv_sec - thread->t_page_creation_time; 570 571 if (elapsed_sec <= 6 || (thread->t_page_creation_count / elapsed_sec) >= (vm_page_creation_throttle / 6)) { 572 573 if (elapsed_sec >= 60) { 574 /* 575 * we'll reset our stats to give a well behaved app 576 * that was unlucky enough to accumulate a bunch of pages 577 * over a long period of time a chance to get out of 578 * the throttled state... we reset the counter and timestamp 579 * so that if it stays under the rate limit for the next second 580 * it will be back in our good graces... if it exceeds it, it 581 * will remain in the throttled state 582 */ 583 thread->t_page_creation_time = tv_sec; 584 thread->t_page_creation_count = (vm_page_creation_throttle / 6) * 5; 585 } 586 ++vm_page_throttle_count; 587 588 if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && HARD_THROTTLE_LIMIT_REACHED()) 589 return (HARD_THROTTLE_DELAY); 590 else 591 return (SOFT_THROTTLE_DELAY); 592 } 593 thread->t_page_creation_time = tv_sec; 594 thread->t_page_creation_count = 0; 595 } 596 return (0); 597} 598 599 600/* 601 * check for various conditions that would 602 * prevent us from creating a ZF page... 603 * cleanup is based on being called from vm_fault_page 604 * 605 * object must be locked 606 * object == m->object 607 */ 608static vm_fault_return_t 609vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t interruptible_state) 610{ 611 int throttle_delay; 612 613 if (object->shadow_severed || 614 VM_OBJECT_PURGEABLE_FAULT_ERROR(object)) { 615 /* 616 * Either: 617 * 1. the shadow chain was severed, 618 * 2. the purgeable object is volatile or empty and is marked 619 * to fault on access while volatile. 620 * Just have to return an error at this point 621 */ 622 if (m != VM_PAGE_NULL) 623 VM_PAGE_FREE(m); 624 vm_fault_cleanup(object, first_m); 625 626 thread_interrupt_level(interruptible_state); 627 628 return (VM_FAULT_MEMORY_ERROR); 629 } 630 if (vm_backing_store_low) { 631 /* 632 * are we protecting the system from 633 * backing store exhaustion. If so 634 * sleep unless we are privileged. 635 */ 636 if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) { 637 638 if (m != VM_PAGE_NULL) 639 VM_PAGE_FREE(m); 640 vm_fault_cleanup(object, first_m); 641 642 assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT); 643 644 thread_block(THREAD_CONTINUE_NULL); 645 thread_interrupt_level(interruptible_state); 646 647 return (VM_FAULT_RETRY); 648 } 649 } 650 if ((throttle_delay = vm_page_throttled())) { 651 /* 652 * we're throttling zero-fills... 653 * treat this as if we couldn't grab a page 654 */ 655 if (m != VM_PAGE_NULL) 656 VM_PAGE_FREE(m); 657 vm_fault_cleanup(object, first_m); 658 659 VM_DEBUG_EVENT(vmf_check_zfdelay, VMF_CHECK_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0); 660 661 delay(throttle_delay); 662 663 if (current_thread_aborted()) { 664 thread_interrupt_level(interruptible_state); 665 return VM_FAULT_INTERRUPTED; 666 } 667 thread_interrupt_level(interruptible_state); 668 669 return (VM_FAULT_MEMORY_SHORTAGE); 670 } 671 return (VM_FAULT_SUCCESS); 672} 673 674 675/* 676 * do the work to zero fill a page and 677 * inject it into the correct paging queue 678 * 679 * m->object must be locked 680 * page queue lock must NOT be held 681 */ 682static int 683vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill) 684{ 685 int my_fault = DBG_ZERO_FILL_FAULT; 686 687 /* 688 * This is is a zero-fill page fault... 689 * 690 * Checking the page lock is a waste of 691 * time; this page was absent, so 692 * it can't be page locked by a pager. 693 * 694 * we also consider it undefined 695 * with respect to instruction 696 * execution. i.e. it is the responsibility 697 * of higher layers to call for an instruction 698 * sync after changing the contents and before 699 * sending a program into this area. We 700 * choose this approach for performance 701 */ 702 m->pmapped = TRUE; 703 704 m->cs_validated = FALSE; 705 m->cs_tainted = FALSE; 706 707 if (no_zero_fill == TRUE) { 708 my_fault = DBG_NZF_PAGE_FAULT; 709 710 if (m->absent && m->busy) 711 return (my_fault); 712 } else { 713 vm_page_zero_fill(m); 714 715 VM_STAT_INCR(zero_fill_count); 716 DTRACE_VM2(zfod, int, 1, (uint64_t *), NULL); 717 } 718 assert(!m->laundry); 719 assert(m->object != kernel_object); 720 //assert(m->pageq.next == NULL && m->pageq.prev == NULL); 721 722 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && 723 (m->object->purgable == VM_PURGABLE_DENY || 724 m->object->purgable == VM_PURGABLE_NONVOLATILE || 725 m->object->purgable == VM_PURGABLE_VOLATILE )) { 726 727 vm_page_lockspin_queues(); 728 729 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { 730 assert(!VM_PAGE_WIRED(m)); 731 732 /* 733 * can't be on the pageout queue since we don't 734 * have a pager to try and clean to 735 */ 736 assert(!m->pageout_queue); 737 738 VM_PAGE_QUEUES_REMOVE(m); 739 740 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq); 741 m->throttled = TRUE; 742 vm_page_throttled_count++; 743 } 744 vm_page_unlock_queues(); 745 } 746 return (my_fault); 747} 748 749 750/* 751 * Routine: vm_fault_page 752 * Purpose: 753 * Find the resident page for the virtual memory 754 * specified by the given virtual memory object 755 * and offset. 756 * Additional arguments: 757 * The required permissions for the page is given 758 * in "fault_type". Desired permissions are included 759 * in "protection". 760 * fault_info is passed along to determine pagein cluster 761 * limits... it contains the expected reference pattern, 762 * cluster size if available, etc... 763 * 764 * If the desired page is known to be resident (for 765 * example, because it was previously wired down), asserting 766 * the "unwiring" parameter will speed the search. 767 * 768 * If the operation can be interrupted (by thread_abort 769 * or thread_terminate), then the "interruptible" 770 * parameter should be asserted. 771 * 772 * Results: 773 * The page containing the proper data is returned 774 * in "result_page". 775 * 776 * In/out conditions: 777 * The source object must be locked and referenced, 778 * and must donate one paging reference. The reference 779 * is not affected. The paging reference and lock are 780 * consumed. 781 * 782 * If the call succeeds, the object in which "result_page" 783 * resides is left locked and holding a paging reference. 784 * If this is not the original object, a busy page in the 785 * original object is returned in "top_page", to prevent other 786 * callers from pursuing this same data, along with a paging 787 * reference for the original object. The "top_page" should 788 * be destroyed when this guarantee is no longer required. 789 * The "result_page" is also left busy. It is not removed 790 * from the pageout queues. 791 * Special Case: 792 * A return value of VM_FAULT_SUCCESS_NO_PAGE means that the 793 * fault succeeded but there's no VM page (i.e. the VM object 794 * does not actually hold VM pages, but device memory or 795 * large pages). The object is still locked and we still hold a 796 * paging_in_progress reference. 797 */ 798unsigned int vm_fault_page_blocked_access = 0; 799unsigned int vm_fault_page_forced_retry = 0; 800 801vm_fault_return_t 802vm_fault_page( 803 /* Arguments: */ 804 vm_object_t first_object, /* Object to begin search */ 805 vm_object_offset_t first_offset, /* Offset into object */ 806 vm_prot_t fault_type, /* What access is requested */ 807 boolean_t must_be_resident,/* Must page be resident? */ 808 boolean_t caller_lookup, /* caller looked up page */ 809 /* Modifies in place: */ 810 vm_prot_t *protection, /* Protection for mapping */ 811 vm_page_t *result_page, /* Page found, if successful */ 812 /* Returns: */ 813 vm_page_t *top_page, /* Page in top object, if 814 * not result_page. */ 815 int *type_of_fault, /* if non-null, fill in with type of fault 816 * COW, zero-fill, etc... returned in trace point */ 817 /* More arguments: */ 818 kern_return_t *error_code, /* code if page is in error */ 819 boolean_t no_zero_fill, /* don't zero fill absent pages */ 820 boolean_t data_supply, /* treat as data_supply if 821 * it is a write fault and a full 822 * page is provided */ 823 vm_object_fault_info_t fault_info) 824{ 825 vm_page_t m; 826 vm_object_t object; 827 vm_object_offset_t offset; 828 vm_page_t first_m; 829 vm_object_t next_object; 830 vm_object_t copy_object; 831 boolean_t look_for_page; 832 boolean_t force_fault_retry = FALSE; 833 vm_prot_t access_required = fault_type; 834 vm_prot_t wants_copy_flag; 835 CLUSTER_STAT(int pages_at_higher_offsets;) 836 CLUSTER_STAT(int pages_at_lower_offsets;) 837 kern_return_t wait_result; 838 boolean_t interruptible_state; 839 boolean_t data_already_requested = FALSE; 840 vm_behavior_t orig_behavior; 841 vm_size_t orig_cluster_size; 842 vm_fault_return_t error; 843 int my_fault; 844 uint32_t try_failed_count; 845 int interruptible; /* how may fault be interrupted? */ 846 int external_state = VM_EXTERNAL_STATE_UNKNOWN; 847 memory_object_t pager; 848 vm_fault_return_t retval; 849 850/* 851 * MACH page map - an optional optimization where a bit map is maintained 852 * by the VM subsystem for internal objects to indicate which pages of 853 * the object currently reside on backing store. This existence map 854 * duplicates information maintained by the vnode pager. It is 855 * created at the time of the first pageout against the object, i.e. 856 * at the same time pager for the object is created. The optimization 857 * is designed to eliminate pager interaction overhead, if it is 858 * 'known' that the page does not exist on backing store. 859 * 860 * MUST_ASK_PAGER() evaluates to TRUE if the page specified by object/offset is 861 * either marked as paged out in the existence map for the object or no 862 * existence map exists for the object. MUST_ASK_PAGER() is one of the 863 * criteria in the decision to invoke the pager. It is also used as one 864 * of the criteria to terminate the scan for adjacent pages in a clustered 865 * pagein operation. Note that MUST_ASK_PAGER() always evaluates to TRUE for 866 * permanent objects. Note also that if the pager for an internal object 867 * has not been created, the pager is not invoked regardless of the value 868 * of MUST_ASK_PAGER() and that clustered pagein scans are only done on an object 869 * for which a pager has been created. 870 * 871 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset 872 * is marked as paged out in the existence map for the object. PAGED_OUT() 873 * PAGED_OUT() is used to determine if a page has already been pushed 874 * into a copy object in order to avoid a redundant page out operation. 875 */ 876#if MACH_PAGEMAP 877#define MUST_ASK_PAGER(o, f, s) \ 878 ((vm_external_state_get((o)->existence_map, (f)) \ 879 != VM_EXTERNAL_STATE_ABSENT) && \ 880 (s = (VM_COMPRESSOR_PAGER_STATE_GET((o), (f)))) \ 881 != VM_EXTERNAL_STATE_ABSENT) 882#define PAGED_OUT(o, f) \ 883 ((vm_external_state_get((o)->existence_map, (f)) \ 884 == VM_EXTERNAL_STATE_EXISTS) || \ 885 (VM_COMPRESSOR_PAGER_STATE_GET((o), (f)) \ 886 == VM_EXTERNAL_STATE_EXISTS)) 887#else /* MACH_PAGEMAP */ 888#define MUST_ASK_PAGER(o, f, s) \ 889 ((s = VM_COMPRESSOR_PAGER_STATE_GET((o), (f))) != VM_EXTERNAL_STATE_ABSENT) 890#define PAGED_OUT(o, f) \ 891 (VM_COMPRESSOR_PAGER_STATE_GET((o), (f)) == VM_EXTERNAL_STATE_EXISTS) 892#endif /* MACH_PAGEMAP */ 893 894/* 895 * Recovery actions 896 */ 897#define RELEASE_PAGE(m) \ 898 MACRO_BEGIN \ 899 PAGE_WAKEUP_DONE(m); \ 900 if (!m->active && !m->inactive && !m->throttled) { \ 901 vm_page_lockspin_queues(); \ 902 if (!m->active && !m->inactive && !m->throttled) { \ 903 if (COMPRESSED_PAGER_IS_ACTIVE) \ 904 vm_page_deactivate(m); \ 905 else \ 906 vm_page_activate(m); \ 907 } \ 908 vm_page_unlock_queues(); \ 909 } \ 910 MACRO_END 911 912#if TRACEFAULTPAGE 913 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */ 914#endif 915 916 interruptible = fault_info->interruptible; 917 interruptible_state = thread_interrupt_level(interruptible); 918 919 /* 920 * INVARIANTS (through entire routine): 921 * 922 * 1) At all times, we must either have the object 923 * lock or a busy page in some object to prevent 924 * some other thread from trying to bring in 925 * the same page. 926 * 927 * Note that we cannot hold any locks during the 928 * pager access or when waiting for memory, so 929 * we use a busy page then. 930 * 931 * 2) To prevent another thread from racing us down the 932 * shadow chain and entering a new page in the top 933 * object before we do, we must keep a busy page in 934 * the top object while following the shadow chain. 935 * 936 * 3) We must increment paging_in_progress on any object 937 * for which we have a busy page before dropping 938 * the object lock 939 * 940 * 4) We leave busy pages on the pageout queues. 941 * If the pageout daemon comes across a busy page, 942 * it will remove the page from the pageout queues. 943 */ 944 945 object = first_object; 946 offset = first_offset; 947 first_m = VM_PAGE_NULL; 948 access_required = fault_type; 949 950 951 XPR(XPR_VM_FAULT, 952 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n", 953 object, offset, fault_type, *protection, 0); 954 955 /* 956 * default type of fault 957 */ 958 my_fault = DBG_CACHE_HIT_FAULT; 959 960 while (TRUE) { 961#if TRACEFAULTPAGE 962 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */ 963#endif 964 if (!object->alive) { 965 /* 966 * object is no longer valid 967 * clean up and return error 968 */ 969 vm_fault_cleanup(object, first_m); 970 thread_interrupt_level(interruptible_state); 971 972 return (VM_FAULT_MEMORY_ERROR); 973 } 974 975 if (!object->pager_created && object->phys_contiguous) { 976 /* 977 * A physically-contiguous object without a pager: 978 * must be a "large page" object. We do not deal 979 * with VM pages for this object. 980 */ 981 caller_lookup = FALSE; 982 m = VM_PAGE_NULL; 983 goto phys_contig_object; 984 } 985 986 if (object->blocked_access) { 987 /* 988 * Access to this VM object has been blocked. 989 * Replace our "paging_in_progress" reference with 990 * a "activity_in_progress" reference and wait for 991 * access to be unblocked. 992 */ 993 caller_lookup = FALSE; /* no longer valid after sleep */ 994 vm_object_activity_begin(object); 995 vm_object_paging_end(object); 996 while (object->blocked_access) { 997 vm_object_sleep(object, 998 VM_OBJECT_EVENT_UNBLOCKED, 999 THREAD_UNINT); 1000 } 1001 vm_fault_page_blocked_access++; 1002 vm_object_paging_begin(object); 1003 vm_object_activity_end(object); 1004 } 1005 1006 /* 1007 * See whether the page at 'offset' is resident 1008 */ 1009 if (caller_lookup == TRUE) { 1010 /* 1011 * The caller has already looked up the page 1012 * and gave us the result in "result_page". 1013 * We can use this for the first lookup but 1014 * it loses its validity as soon as we unlock 1015 * the object. 1016 */ 1017 m = *result_page; 1018 caller_lookup = FALSE; /* no longer valid after that */ 1019 } else { 1020 m = vm_page_lookup(object, offset); 1021 } 1022#if TRACEFAULTPAGE 1023 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */ 1024#endif 1025 if (m != VM_PAGE_NULL) { 1026 1027 if (m->busy) { 1028 /* 1029 * The page is being brought in, 1030 * wait for it and then retry. 1031 */ 1032#if TRACEFAULTPAGE 1033 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */ 1034#endif 1035 wait_result = PAGE_SLEEP(object, m, interruptible); 1036 1037 XPR(XPR_VM_FAULT, 1038 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n", 1039 object, offset, 1040 m, 0, 0); 1041 counter(c_vm_fault_page_block_busy_kernel++); 1042 1043 if (wait_result != THREAD_AWAKENED) { 1044 vm_fault_cleanup(object, first_m); 1045 thread_interrupt_level(interruptible_state); 1046 1047 if (wait_result == THREAD_RESTART) 1048 return (VM_FAULT_RETRY); 1049 else 1050 return (VM_FAULT_INTERRUPTED); 1051 } 1052 continue; 1053 } 1054 if (m->laundry) { 1055 m->pageout = FALSE; 1056 1057 if (!m->cleaning) 1058 vm_pageout_steal_laundry(m, FALSE); 1059 } 1060 if (m->phys_page == vm_page_guard_addr) { 1061 /* 1062 * Guard page: off limits ! 1063 */ 1064 if (fault_type == VM_PROT_NONE) { 1065 /* 1066 * The fault is not requesting any 1067 * access to the guard page, so it must 1068 * be just to wire or unwire it. 1069 * Let's pretend it succeeded... 1070 */ 1071 m->busy = TRUE; 1072 *result_page = m; 1073 assert(first_m == VM_PAGE_NULL); 1074 *top_page = first_m; 1075 if (type_of_fault) 1076 *type_of_fault = DBG_GUARD_FAULT; 1077 thread_interrupt_level(interruptible_state); 1078 return VM_FAULT_SUCCESS; 1079 } else { 1080 /* 1081 * The fault requests access to the 1082 * guard page: let's deny that ! 1083 */ 1084 vm_fault_cleanup(object, first_m); 1085 thread_interrupt_level(interruptible_state); 1086 return VM_FAULT_MEMORY_ERROR; 1087 } 1088 } 1089 1090 if (m->error) { 1091 /* 1092 * The page is in error, give up now. 1093 */ 1094#if TRACEFAULTPAGE 1095 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */ 1096#endif 1097 if (error_code) 1098 *error_code = KERN_MEMORY_ERROR; 1099 VM_PAGE_FREE(m); 1100 1101 vm_fault_cleanup(object, first_m); 1102 thread_interrupt_level(interruptible_state); 1103 1104 return (VM_FAULT_MEMORY_ERROR); 1105 } 1106 if (m->restart) { 1107 /* 1108 * The pager wants us to restart 1109 * at the top of the chain, 1110 * typically because it has moved the 1111 * page to another pager, then do so. 1112 */ 1113#if TRACEFAULTPAGE 1114 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */ 1115#endif 1116 VM_PAGE_FREE(m); 1117 1118 vm_fault_cleanup(object, first_m); 1119 thread_interrupt_level(interruptible_state); 1120 1121 return (VM_FAULT_RETRY); 1122 } 1123 if (m->absent) { 1124 /* 1125 * The page isn't busy, but is absent, 1126 * therefore it's deemed "unavailable". 1127 * 1128 * Remove the non-existent page (unless it's 1129 * in the top object) and move on down to the 1130 * next object (if there is one). 1131 */ 1132#if TRACEFAULTPAGE 1133 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */ 1134#endif 1135 next_object = object->shadow; 1136 1137 if (next_object == VM_OBJECT_NULL) { 1138 /* 1139 * Absent page at bottom of shadow 1140 * chain; zero fill the page we left 1141 * busy in the first object, and free 1142 * the absent page. 1143 */ 1144 assert(!must_be_resident); 1145 1146 /* 1147 * check for any conditions that prevent 1148 * us from creating a new zero-fill page 1149 * vm_fault_check will do all of the 1150 * fault cleanup in the case of an error condition 1151 * including resetting the thread_interrupt_level 1152 */ 1153 error = vm_fault_check(object, m, first_m, interruptible_state); 1154 1155 if (error != VM_FAULT_SUCCESS) 1156 return (error); 1157 1158 XPR(XPR_VM_FAULT, 1159 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n", 1160 object, offset, 1161 m, 1162 first_object, 0); 1163 1164 if (object != first_object) { 1165 /* 1166 * free the absent page we just found 1167 */ 1168 VM_PAGE_FREE(m); 1169 1170 /* 1171 * drop reference and lock on current object 1172 */ 1173 vm_object_paging_end(object); 1174 vm_object_unlock(object); 1175 1176 /* 1177 * grab the original page we 1178 * 'soldered' in place and 1179 * retake lock on 'first_object' 1180 */ 1181 m = first_m; 1182 first_m = VM_PAGE_NULL; 1183 1184 object = first_object; 1185 offset = first_offset; 1186 1187 vm_object_lock(object); 1188 } else { 1189 /* 1190 * we're going to use the absent page we just found 1191 * so convert it to a 'busy' page 1192 */ 1193 m->absent = FALSE; 1194 m->busy = TRUE; 1195 } 1196 if (fault_info->mark_zf_absent && no_zero_fill == TRUE) 1197 m->absent = TRUE; 1198 /* 1199 * zero-fill the page and put it on 1200 * the correct paging queue 1201 */ 1202 my_fault = vm_fault_zero_page(m, no_zero_fill); 1203 1204 break; 1205 } else { 1206 if (must_be_resident) 1207 vm_object_paging_end(object); 1208 else if (object != first_object) { 1209 vm_object_paging_end(object); 1210 VM_PAGE_FREE(m); 1211 } else { 1212 first_m = m; 1213 m->absent = FALSE; 1214 m->busy = TRUE; 1215 1216 vm_page_lockspin_queues(); 1217 1218 assert(!m->pageout_queue); 1219 VM_PAGE_QUEUES_REMOVE(m); 1220 1221 vm_page_unlock_queues(); 1222 } 1223 XPR(XPR_VM_FAULT, 1224 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n", 1225 object, offset, 1226 next_object, 1227 offset+object->vo_shadow_offset,0); 1228 1229 offset += object->vo_shadow_offset; 1230 fault_info->lo_offset += object->vo_shadow_offset; 1231 fault_info->hi_offset += object->vo_shadow_offset; 1232 access_required = VM_PROT_READ; 1233 1234 vm_object_lock(next_object); 1235 vm_object_unlock(object); 1236 object = next_object; 1237 vm_object_paging_begin(object); 1238 1239 /* 1240 * reset to default type of fault 1241 */ 1242 my_fault = DBG_CACHE_HIT_FAULT; 1243 1244 continue; 1245 } 1246 } 1247 if ((m->cleaning) 1248 && ((object != first_object) || (object->copy != VM_OBJECT_NULL)) 1249 && (fault_type & VM_PROT_WRITE)) { 1250 /* 1251 * This is a copy-on-write fault that will 1252 * cause us to revoke access to this page, but 1253 * this page is in the process of being cleaned 1254 * in a clustered pageout. We must wait until 1255 * the cleaning operation completes before 1256 * revoking access to the original page, 1257 * otherwise we might attempt to remove a 1258 * wired mapping. 1259 */ 1260#if TRACEFAULTPAGE 1261 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */ 1262#endif 1263 XPR(XPR_VM_FAULT, 1264 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n", 1265 object, offset, 1266 m, 0, 0); 1267 /* 1268 * take an extra ref so that object won't die 1269 */ 1270 vm_object_reference_locked(object); 1271 1272 vm_fault_cleanup(object, first_m); 1273 1274 counter(c_vm_fault_page_block_backoff_kernel++); 1275 vm_object_lock(object); 1276 assert(object->ref_count > 0); 1277 1278 m = vm_page_lookup(object, offset); 1279 1280 if (m != VM_PAGE_NULL && m->cleaning) { 1281 PAGE_ASSERT_WAIT(m, interruptible); 1282 1283 vm_object_unlock(object); 1284 wait_result = thread_block(THREAD_CONTINUE_NULL); 1285 vm_object_deallocate(object); 1286 1287 goto backoff; 1288 } else { 1289 vm_object_unlock(object); 1290 1291 vm_object_deallocate(object); 1292 thread_interrupt_level(interruptible_state); 1293 1294 return (VM_FAULT_RETRY); 1295 } 1296 } 1297 if (type_of_fault == NULL && m->speculative && 1298 !(fault_info != NULL && fault_info->stealth)) { 1299 /* 1300 * If we were passed a non-NULL pointer for 1301 * "type_of_fault", than we came from 1302 * vm_fault... we'll let it deal with 1303 * this condition, since it 1304 * needs to see m->speculative to correctly 1305 * account the pageins, otherwise... 1306 * take it off the speculative queue, we'll 1307 * let the caller of vm_fault_page deal 1308 * with getting it onto the correct queue 1309 * 1310 * If the caller specified in fault_info that 1311 * it wants a "stealth" fault, we also leave 1312 * the page in the speculative queue. 1313 */ 1314 vm_page_lockspin_queues(); 1315 if (m->speculative) 1316 VM_PAGE_QUEUES_REMOVE(m); 1317 vm_page_unlock_queues(); 1318 } 1319 1320 if (m->encrypted) { 1321 /* 1322 * ENCRYPTED SWAP: 1323 * the user needs access to a page that we 1324 * encrypted before paging it out. 1325 * Decrypt the page now. 1326 * Keep it busy to prevent anyone from 1327 * accessing it during the decryption. 1328 */ 1329 m->busy = TRUE; 1330 vm_page_decrypt(m, 0); 1331 assert(object == m->object); 1332 assert(m->busy); 1333 PAGE_WAKEUP_DONE(m); 1334 1335 /* 1336 * Retry from the top, in case 1337 * something changed while we were 1338 * decrypting. 1339 */ 1340 continue; 1341 } 1342 ASSERT_PAGE_DECRYPTED(m); 1343 1344 if (m->object->code_signed) { 1345 /* 1346 * CODE SIGNING: 1347 * We just paged in a page from a signed 1348 * memory object but we don't need to 1349 * validate it now. We'll validate it if 1350 * when it gets mapped into a user address 1351 * space for the first time or when the page 1352 * gets copied to another object as a result 1353 * of a copy-on-write. 1354 */ 1355 } 1356 1357 /* 1358 * We mark the page busy and leave it on 1359 * the pageout queues. If the pageout 1360 * deamon comes across it, then it will 1361 * remove the page from the queue, but not the object 1362 */ 1363#if TRACEFAULTPAGE 1364 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */ 1365#endif 1366 XPR(XPR_VM_FAULT, 1367 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n", 1368 object, offset, m, 0, 0); 1369 assert(!m->busy); 1370 assert(!m->absent); 1371 1372 m->busy = TRUE; 1373 break; 1374 } 1375 1376 1377 /* 1378 * we get here when there is no page present in the object at 1379 * the offset we're interested in... we'll allocate a page 1380 * at this point if the pager associated with 1381 * this object can provide the data or we're the top object... 1382 * object is locked; m == NULL 1383 */ 1384 if (must_be_resident) { 1385 if (fault_type == VM_PROT_NONE && 1386 object == kernel_object) { 1387 /* 1388 * We've been called from vm_fault_unwire() 1389 * while removing a map entry that was allocated 1390 * with KMA_KOBJECT and KMA_VAONLY. This page 1391 * is not present and there's nothing more to 1392 * do here (nothing to unwire). 1393 */ 1394 vm_fault_cleanup(object, first_m); 1395 thread_interrupt_level(interruptible_state); 1396 1397 return VM_FAULT_MEMORY_ERROR; 1398 } 1399 1400 goto dont_look_for_page; 1401 } 1402 1403#if !MACH_PAGEMAP 1404 data_supply = FALSE; 1405#endif /* !MACH_PAGEMAP */ 1406 1407 look_for_page = (object->pager_created && (MUST_ASK_PAGER(object, offset, external_state) == TRUE) && !data_supply); 1408 1409#if TRACEFAULTPAGE 1410 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */ 1411#endif 1412 if (!look_for_page && object == first_object && !object->phys_contiguous) { 1413 /* 1414 * Allocate a new page for this object/offset pair as a placeholder 1415 */ 1416 m = vm_page_grab(); 1417#if TRACEFAULTPAGE 1418 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */ 1419#endif 1420 if (m == VM_PAGE_NULL) { 1421 1422 vm_fault_cleanup(object, first_m); 1423 thread_interrupt_level(interruptible_state); 1424 1425 return (VM_FAULT_MEMORY_SHORTAGE); 1426 } 1427 1428 if (fault_info && fault_info->batch_pmap_op == TRUE) { 1429 vm_page_insert_internal(m, object, offset, FALSE, TRUE, TRUE); 1430 } else { 1431 vm_page_insert(m, object, offset); 1432 } 1433 } 1434 if (look_for_page) { 1435 kern_return_t rc; 1436 int my_fault_type; 1437 1438 /* 1439 * If the memory manager is not ready, we 1440 * cannot make requests. 1441 */ 1442 if (!object->pager_ready) { 1443#if TRACEFAULTPAGE 1444 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */ 1445#endif 1446 if (m != VM_PAGE_NULL) 1447 VM_PAGE_FREE(m); 1448 1449 XPR(XPR_VM_FAULT, 1450 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n", 1451 object, offset, 0, 0, 0); 1452 1453 /* 1454 * take an extra ref so object won't die 1455 */ 1456 vm_object_reference_locked(object); 1457 vm_fault_cleanup(object, first_m); 1458 counter(c_vm_fault_page_block_backoff_kernel++); 1459 1460 vm_object_lock(object); 1461 assert(object->ref_count > 0); 1462 1463 if (!object->pager_ready) { 1464 wait_result = vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGER_READY, interruptible); 1465 1466 vm_object_unlock(object); 1467 if (wait_result == THREAD_WAITING) 1468 wait_result = thread_block(THREAD_CONTINUE_NULL); 1469 vm_object_deallocate(object); 1470 1471 goto backoff; 1472 } else { 1473 vm_object_unlock(object); 1474 vm_object_deallocate(object); 1475 thread_interrupt_level(interruptible_state); 1476 1477 return (VM_FAULT_RETRY); 1478 } 1479 } 1480 if (!object->internal && !object->phys_contiguous && object->paging_in_progress > vm_object_pagein_throttle) { 1481 /* 1482 * If there are too many outstanding page 1483 * requests pending on this external object, we 1484 * wait for them to be resolved now. 1485 */ 1486#if TRACEFAULTPAGE 1487 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */ 1488#endif 1489 if (m != VM_PAGE_NULL) 1490 VM_PAGE_FREE(m); 1491 /* 1492 * take an extra ref so object won't die 1493 */ 1494 vm_object_reference_locked(object); 1495 1496 vm_fault_cleanup(object, first_m); 1497 1498 counter(c_vm_fault_page_block_backoff_kernel++); 1499 1500 vm_object_lock(object); 1501 assert(object->ref_count > 0); 1502 1503 if (object->paging_in_progress >= vm_object_pagein_throttle) { 1504 vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGING_ONLY_IN_PROGRESS, interruptible); 1505 1506 vm_object_unlock(object); 1507 wait_result = thread_block(THREAD_CONTINUE_NULL); 1508 vm_object_deallocate(object); 1509 1510 goto backoff; 1511 } else { 1512 vm_object_unlock(object); 1513 vm_object_deallocate(object); 1514 thread_interrupt_level(interruptible_state); 1515 1516 return (VM_FAULT_RETRY); 1517 } 1518 } 1519 if (object->internal && 1520 (COMPRESSED_PAGER_IS_ACTIVE 1521 || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE)) { 1522 int compressed_count_delta; 1523 1524 if (m == VM_PAGE_NULL) { 1525 /* 1526 * Allocate a new page for this object/offset pair as a placeholder 1527 */ 1528 m = vm_page_grab(); 1529#if TRACEFAULTPAGE 1530 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */ 1531#endif 1532 if (m == VM_PAGE_NULL) { 1533 1534 vm_fault_cleanup(object, first_m); 1535 thread_interrupt_level(interruptible_state); 1536 1537 return (VM_FAULT_MEMORY_SHORTAGE); 1538 } 1539 1540 m->absent = TRUE; 1541 if (fault_info && fault_info->batch_pmap_op == TRUE) { 1542 vm_page_insert_internal(m, object, offset, FALSE, TRUE, TRUE); 1543 } else { 1544 vm_page_insert(m, object, offset); 1545 } 1546 } 1547 assert(m->busy); 1548 1549 m->absent = TRUE; 1550 pager = object->pager; 1551 1552 assert(object->paging_in_progress > 0); 1553 vm_object_unlock(object); 1554 1555 rc = vm_compressor_pager_get( 1556 pager, 1557 offset + object->paging_offset, 1558 m->phys_page, 1559 &my_fault_type, 1560 0, 1561 &compressed_count_delta); 1562 1563 vm_object_lock(object); 1564 assert(object->paging_in_progress > 0); 1565 1566 vm_compressor_pager_count( 1567 pager, 1568 compressed_count_delta, 1569 FALSE, /* shared_lock */ 1570 object); 1571 1572 switch (rc) { 1573 case KERN_SUCCESS: 1574 m->absent = FALSE; 1575 m->dirty = TRUE; 1576 if ((m->object->wimg_bits & 1577 VM_WIMG_MASK) != 1578 VM_WIMG_USE_DEFAULT) { 1579 /* 1580 * If the page is not cacheable, 1581 * we can't let its contents 1582 * linger in the data cache 1583 * after the decompression. 1584 */ 1585 pmap_sync_page_attributes_phys( 1586 m->phys_page); 1587 } else { 1588 m->written_by_kernel = TRUE; 1589 } 1590 1591 /* 1592 * If the object is purgeable, its 1593 * owner's purgeable ledgers have been 1594 * updated in vm_page_insert() but the 1595 * page was also accounted for in a 1596 * "compressed purgeable" ledger, so 1597 * update that now. 1598 */ 1599 if ((object->purgable != 1600 VM_PURGABLE_DENY) && 1601 (object->vo_purgeable_owner != 1602 NULL)) { 1603 /* 1604 * One less compressed 1605 * purgeable page. 1606 */ 1607 vm_purgeable_compressed_update( 1608 object, 1609 -1); 1610 } 1611 1612 break; 1613 case KERN_MEMORY_FAILURE: 1614 m->unusual = TRUE; 1615 m->error = TRUE; 1616 m->absent = FALSE; 1617 break; 1618 case KERN_MEMORY_ERROR: 1619 assert(m->absent); 1620 break; 1621 default: 1622 panic("vm_fault_page(): unexpected " 1623 "error %d from " 1624 "vm_compressor_pager_get()\n", 1625 rc); 1626 } 1627 PAGE_WAKEUP_DONE(m); 1628 1629 rc = KERN_SUCCESS; 1630 goto data_requested; 1631 } 1632 my_fault_type = DBG_PAGEIN_FAULT; 1633 1634 if (m != VM_PAGE_NULL) { 1635 VM_PAGE_FREE(m); 1636 m = VM_PAGE_NULL; 1637 } 1638 1639#if TRACEFAULTPAGE 1640 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */ 1641#endif 1642 1643 /* 1644 * It's possible someone called vm_object_destroy while we weren't 1645 * holding the object lock. If that has happened, then bail out 1646 * here. 1647 */ 1648 1649 pager = object->pager; 1650 1651 if (pager == MEMORY_OBJECT_NULL) { 1652 vm_fault_cleanup(object, first_m); 1653 thread_interrupt_level(interruptible_state); 1654 return VM_FAULT_MEMORY_ERROR; 1655 } 1656 1657 /* 1658 * We have an absent page in place for the faulting offset, 1659 * so we can release the object lock. 1660 */ 1661 1662 vm_object_unlock(object); 1663 1664 /* 1665 * If this object uses a copy_call strategy, 1666 * and we are interested in a copy of this object 1667 * (having gotten here only by following a 1668 * shadow chain), then tell the memory manager 1669 * via a flag added to the desired_access 1670 * parameter, so that it can detect a race 1671 * between our walking down the shadow chain 1672 * and its pushing pages up into a copy of 1673 * the object that it manages. 1674 */ 1675 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL && object != first_object) 1676 wants_copy_flag = VM_PROT_WANTS_COPY; 1677 else 1678 wants_copy_flag = VM_PROT_NONE; 1679 1680 XPR(XPR_VM_FAULT, 1681 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n", 1682 object, offset, m, 1683 access_required | wants_copy_flag, 0); 1684 1685 if (object->copy == first_object) { 1686 /* 1687 * if we issue the memory_object_data_request in 1688 * this state, we are subject to a deadlock with 1689 * the underlying filesystem if it is trying to 1690 * shrink the file resulting in a push of pages 1691 * into the copy object... that push will stall 1692 * on the placeholder page, and if the pushing thread 1693 * is holding a lock that is required on the pagein 1694 * path (such as a truncate lock), we'll deadlock... 1695 * to avoid this potential deadlock, we throw away 1696 * our placeholder page before calling memory_object_data_request 1697 * and force this thread to retry the vm_fault_page after 1698 * we have issued the I/O. the second time through this path 1699 * we will find the page already in the cache (presumably still 1700 * busy waiting for the I/O to complete) and then complete 1701 * the fault w/o having to go through memory_object_data_request again 1702 */ 1703 assert(first_m != VM_PAGE_NULL); 1704 assert(first_m->object == first_object); 1705 1706 vm_object_lock(first_object); 1707 VM_PAGE_FREE(first_m); 1708 vm_object_paging_end(first_object); 1709 vm_object_unlock(first_object); 1710 1711 first_m = VM_PAGE_NULL; 1712 force_fault_retry = TRUE; 1713 1714 vm_fault_page_forced_retry++; 1715 } 1716 1717 if (data_already_requested == TRUE) { 1718 orig_behavior = fault_info->behavior; 1719 orig_cluster_size = fault_info->cluster_size; 1720 1721 fault_info->behavior = VM_BEHAVIOR_RANDOM; 1722 fault_info->cluster_size = PAGE_SIZE; 1723 } 1724 /* 1725 * Call the memory manager to retrieve the data. 1726 */ 1727 rc = memory_object_data_request( 1728 pager, 1729 offset + object->paging_offset, 1730 PAGE_SIZE, 1731 access_required | wants_copy_flag, 1732 (memory_object_fault_info_t)fault_info); 1733 1734 if (data_already_requested == TRUE) { 1735 fault_info->behavior = orig_behavior; 1736 fault_info->cluster_size = orig_cluster_size; 1737 } else 1738 data_already_requested = TRUE; 1739 1740 DTRACE_VM2(maj_fault, int, 1, (uint64_t *), NULL); 1741#if TRACEFAULTPAGE 1742 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */ 1743#endif 1744 vm_object_lock(object); 1745 1746 data_requested: 1747 if (rc != KERN_SUCCESS) { 1748 1749 vm_fault_cleanup(object, first_m); 1750 thread_interrupt_level(interruptible_state); 1751 1752 return ((rc == MACH_SEND_INTERRUPTED) ? 1753 VM_FAULT_INTERRUPTED : 1754 VM_FAULT_MEMORY_ERROR); 1755 } else { 1756 clock_sec_t tv_sec; 1757 clock_usec_t tv_usec; 1758 1759 if (my_fault_type == DBG_PAGEIN_FAULT) { 1760 clock_get_system_microtime(&tv_sec, &tv_usec); 1761 current_thread()->t_page_creation_time = tv_sec; 1762 current_thread()->t_page_creation_count = 0; 1763 } 1764 } 1765 if ((interruptible != THREAD_UNINT) && (current_thread()->sched_flags & TH_SFLAG_ABORT)) { 1766 1767 vm_fault_cleanup(object, first_m); 1768 thread_interrupt_level(interruptible_state); 1769 1770 return (VM_FAULT_INTERRUPTED); 1771 } 1772 if (force_fault_retry == TRUE) { 1773 1774 vm_fault_cleanup(object, first_m); 1775 thread_interrupt_level(interruptible_state); 1776 1777 return (VM_FAULT_RETRY); 1778 } 1779 if (m == VM_PAGE_NULL && object->phys_contiguous) { 1780 /* 1781 * No page here means that the object we 1782 * initially looked up was "physically 1783 * contiguous" (i.e. device memory). However, 1784 * with Virtual VRAM, the object might not 1785 * be backed by that device memory anymore, 1786 * so we're done here only if the object is 1787 * still "phys_contiguous". 1788 * Otherwise, if the object is no longer 1789 * "phys_contiguous", we need to retry the 1790 * page fault against the object's new backing 1791 * store (different memory object). 1792 */ 1793 phys_contig_object: 1794 goto done; 1795 } 1796 /* 1797 * potentially a pagein fault 1798 * if we make it through the state checks 1799 * above, than we'll count it as such 1800 */ 1801 my_fault = my_fault_type; 1802 1803 /* 1804 * Retry with same object/offset, since new data may 1805 * be in a different page (i.e., m is meaningless at 1806 * this point). 1807 */ 1808 continue; 1809 } 1810dont_look_for_page: 1811 /* 1812 * We get here if the object has no pager, or an existence map 1813 * exists and indicates the page isn't present on the pager 1814 * or we're unwiring a page. If a pager exists, but there 1815 * is no existence map, then the m->absent case above handles 1816 * the ZF case when the pager can't provide the page 1817 */ 1818#if TRACEFAULTPAGE 1819 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */ 1820#endif 1821 if (object == first_object) 1822 first_m = m; 1823 else 1824 assert(m == VM_PAGE_NULL); 1825 1826 XPR(XPR_VM_FAULT, 1827 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n", 1828 object, offset, m, 1829 object->shadow, 0); 1830 1831 next_object = object->shadow; 1832 1833 if (next_object == VM_OBJECT_NULL) { 1834 /* 1835 * we've hit the bottom of the shadown chain, 1836 * fill the page in the top object with zeros. 1837 */ 1838 assert(!must_be_resident); 1839 1840 if (object != first_object) { 1841 vm_object_paging_end(object); 1842 vm_object_unlock(object); 1843 1844 object = first_object; 1845 offset = first_offset; 1846 vm_object_lock(object); 1847 } 1848 m = first_m; 1849 assert(m->object == object); 1850 first_m = VM_PAGE_NULL; 1851 1852 /* 1853 * check for any conditions that prevent 1854 * us from creating a new zero-fill page 1855 * vm_fault_check will do all of the 1856 * fault cleanup in the case of an error condition 1857 * including resetting the thread_interrupt_level 1858 */ 1859 error = vm_fault_check(object, m, first_m, interruptible_state); 1860 1861 if (error != VM_FAULT_SUCCESS) 1862 return (error); 1863 1864 if (m == VM_PAGE_NULL) { 1865 m = vm_page_grab(); 1866 1867 if (m == VM_PAGE_NULL) { 1868 vm_fault_cleanup(object, VM_PAGE_NULL); 1869 thread_interrupt_level(interruptible_state); 1870 1871 return (VM_FAULT_MEMORY_SHORTAGE); 1872 } 1873 vm_page_insert(m, object, offset); 1874 } 1875 if (fault_info->mark_zf_absent && no_zero_fill == TRUE) 1876 m->absent = TRUE; 1877 1878 my_fault = vm_fault_zero_page(m, no_zero_fill); 1879 1880 break; 1881 1882 } else { 1883 /* 1884 * Move on to the next object. Lock the next 1885 * object before unlocking the current one. 1886 */ 1887 if ((object != first_object) || must_be_resident) 1888 vm_object_paging_end(object); 1889 1890 offset += object->vo_shadow_offset; 1891 fault_info->lo_offset += object->vo_shadow_offset; 1892 fault_info->hi_offset += object->vo_shadow_offset; 1893 access_required = VM_PROT_READ; 1894 1895 vm_object_lock(next_object); 1896 vm_object_unlock(object); 1897 1898 object = next_object; 1899 vm_object_paging_begin(object); 1900 } 1901 } 1902 1903 /* 1904 * PAGE HAS BEEN FOUND. 1905 * 1906 * This page (m) is: 1907 * busy, so that we can play with it; 1908 * not absent, so that nobody else will fill it; 1909 * possibly eligible for pageout; 1910 * 1911 * The top-level page (first_m) is: 1912 * VM_PAGE_NULL if the page was found in the 1913 * top-level object; 1914 * busy, not absent, and ineligible for pageout. 1915 * 1916 * The current object (object) is locked. A paging 1917 * reference is held for the current and top-level 1918 * objects. 1919 */ 1920 1921#if TRACEFAULTPAGE 1922 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */ 1923#endif 1924#if EXTRA_ASSERTIONS 1925 assert(m->busy && !m->absent); 1926 assert((first_m == VM_PAGE_NULL) || 1927 (first_m->busy && !first_m->absent && 1928 !first_m->active && !first_m->inactive)); 1929#endif /* EXTRA_ASSERTIONS */ 1930 1931 /* 1932 * ENCRYPTED SWAP: 1933 * If we found a page, we must have decrypted it before we 1934 * get here... 1935 */ 1936 ASSERT_PAGE_DECRYPTED(m); 1937 1938 XPR(XPR_VM_FAULT, 1939 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n", 1940 object, offset, m, 1941 first_object, first_m); 1942 1943 /* 1944 * If the page is being written, but isn't 1945 * already owned by the top-level object, 1946 * we have to copy it into a new page owned 1947 * by the top-level object. 1948 */ 1949 if (object != first_object) { 1950 1951#if TRACEFAULTPAGE 1952 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */ 1953#endif 1954 if (fault_type & VM_PROT_WRITE) { 1955 vm_page_t copy_m; 1956 1957 /* 1958 * We only really need to copy if we 1959 * want to write it. 1960 */ 1961 assert(!must_be_resident); 1962 1963 /* 1964 * are we protecting the system from 1965 * backing store exhaustion. If so 1966 * sleep unless we are privileged. 1967 */ 1968 if (vm_backing_store_low) { 1969 if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) { 1970 1971 RELEASE_PAGE(m); 1972 vm_fault_cleanup(object, first_m); 1973 1974 assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT); 1975 1976 thread_block(THREAD_CONTINUE_NULL); 1977 thread_interrupt_level(interruptible_state); 1978 1979 return (VM_FAULT_RETRY); 1980 } 1981 } 1982 /* 1983 * If we try to collapse first_object at this 1984 * point, we may deadlock when we try to get 1985 * the lock on an intermediate object (since we 1986 * have the bottom object locked). We can't 1987 * unlock the bottom object, because the page 1988 * we found may move (by collapse) if we do. 1989 * 1990 * Instead, we first copy the page. Then, when 1991 * we have no more use for the bottom object, 1992 * we unlock it and try to collapse. 1993 * 1994 * Note that we copy the page even if we didn't 1995 * need to... that's the breaks. 1996 */ 1997 1998 /* 1999 * Allocate a page for the copy 2000 */ 2001 copy_m = vm_page_grab(); 2002 2003 if (copy_m == VM_PAGE_NULL) { 2004 RELEASE_PAGE(m); 2005 2006 vm_fault_cleanup(object, first_m); 2007 thread_interrupt_level(interruptible_state); 2008 2009 return (VM_FAULT_MEMORY_SHORTAGE); 2010 } 2011 XPR(XPR_VM_FAULT, 2012 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n", 2013 object, offset, 2014 m, copy_m, 0); 2015 2016 vm_page_copy(m, copy_m); 2017 2018 /* 2019 * If another map is truly sharing this 2020 * page with us, we have to flush all 2021 * uses of the original page, since we 2022 * can't distinguish those which want the 2023 * original from those which need the 2024 * new copy. 2025 * 2026 * XXXO If we know that only one map has 2027 * access to this page, then we could 2028 * avoid the pmap_disconnect() call. 2029 */ 2030 if (m->pmapped) 2031 pmap_disconnect(m->phys_page); 2032 2033 if (m->clustered) { 2034 VM_PAGE_COUNT_AS_PAGEIN(m); 2035 VM_PAGE_CONSUME_CLUSTERED(m); 2036 } 2037 assert(!m->cleaning); 2038 2039 /* 2040 * We no longer need the old page or object. 2041 */ 2042 RELEASE_PAGE(m); 2043 2044 vm_object_paging_end(object); 2045 vm_object_unlock(object); 2046 2047 my_fault = DBG_COW_FAULT; 2048 VM_STAT_INCR(cow_faults); 2049 DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL); 2050 current_task()->cow_faults++; 2051 2052 object = first_object; 2053 offset = first_offset; 2054 2055 vm_object_lock(object); 2056 /* 2057 * get rid of the place holder 2058 * page that we soldered in earlier 2059 */ 2060 VM_PAGE_FREE(first_m); 2061 first_m = VM_PAGE_NULL; 2062 2063 /* 2064 * and replace it with the 2065 * page we just copied into 2066 */ 2067 assert(copy_m->busy); 2068 vm_page_insert(copy_m, object, offset); 2069 SET_PAGE_DIRTY(copy_m, TRUE); 2070 2071 m = copy_m; 2072 /* 2073 * Now that we've gotten the copy out of the 2074 * way, let's try to collapse the top object. 2075 * But we have to play ugly games with 2076 * paging_in_progress to do that... 2077 */ 2078 vm_object_paging_end(object); 2079 vm_object_collapse(object, offset, TRUE); 2080 vm_object_paging_begin(object); 2081 2082 } else 2083 *protection &= (~VM_PROT_WRITE); 2084 } 2085 /* 2086 * Now check whether the page needs to be pushed into the 2087 * copy object. The use of asymmetric copy on write for 2088 * shared temporary objects means that we may do two copies to 2089 * satisfy the fault; one above to get the page from a 2090 * shadowed object, and one here to push it into the copy. 2091 */ 2092 try_failed_count = 0; 2093 2094 while ((copy_object = first_object->copy) != VM_OBJECT_NULL) { 2095 vm_object_offset_t copy_offset; 2096 vm_page_t copy_m; 2097 2098#if TRACEFAULTPAGE 2099 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */ 2100#endif 2101 /* 2102 * If the page is being written, but hasn't been 2103 * copied to the copy-object, we have to copy it there. 2104 */ 2105 if ((fault_type & VM_PROT_WRITE) == 0) { 2106 *protection &= ~VM_PROT_WRITE; 2107 break; 2108 } 2109 2110 /* 2111 * If the page was guaranteed to be resident, 2112 * we must have already performed the copy. 2113 */ 2114 if (must_be_resident) 2115 break; 2116 2117 /* 2118 * Try to get the lock on the copy_object. 2119 */ 2120 if (!vm_object_lock_try(copy_object)) { 2121 2122 vm_object_unlock(object); 2123 try_failed_count++; 2124 2125 mutex_pause(try_failed_count); /* wait a bit */ 2126 vm_object_lock(object); 2127 2128 continue; 2129 } 2130 try_failed_count = 0; 2131 2132 /* 2133 * Make another reference to the copy-object, 2134 * to keep it from disappearing during the 2135 * copy. 2136 */ 2137 vm_object_reference_locked(copy_object); 2138 2139 /* 2140 * Does the page exist in the copy? 2141 */ 2142 copy_offset = first_offset - copy_object->vo_shadow_offset; 2143 2144 if (copy_object->vo_size <= copy_offset) 2145 /* 2146 * Copy object doesn't cover this page -- do nothing. 2147 */ 2148 ; 2149 else if ((copy_m = vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) { 2150 /* 2151 * Page currently exists in the copy object 2152 */ 2153 if (copy_m->busy) { 2154 /* 2155 * If the page is being brought 2156 * in, wait for it and then retry. 2157 */ 2158 RELEASE_PAGE(m); 2159 2160 /* 2161 * take an extra ref so object won't die 2162 */ 2163 vm_object_reference_locked(copy_object); 2164 vm_object_unlock(copy_object); 2165 vm_fault_cleanup(object, first_m); 2166 counter(c_vm_fault_page_block_backoff_kernel++); 2167 2168 vm_object_lock(copy_object); 2169 assert(copy_object->ref_count > 0); 2170 VM_OBJ_RES_DECR(copy_object); 2171 vm_object_lock_assert_exclusive(copy_object); 2172 copy_object->ref_count--; 2173 assert(copy_object->ref_count > 0); 2174 copy_m = vm_page_lookup(copy_object, copy_offset); 2175 /* 2176 * ENCRYPTED SWAP: 2177 * it's OK if the "copy_m" page is encrypted, 2178 * because we're not moving it nor handling its 2179 * contents. 2180 */ 2181 if (copy_m != VM_PAGE_NULL && copy_m->busy) { 2182 PAGE_ASSERT_WAIT(copy_m, interruptible); 2183 2184 vm_object_unlock(copy_object); 2185 wait_result = thread_block(THREAD_CONTINUE_NULL); 2186 vm_object_deallocate(copy_object); 2187 2188 goto backoff; 2189 } else { 2190 vm_object_unlock(copy_object); 2191 vm_object_deallocate(copy_object); 2192 thread_interrupt_level(interruptible_state); 2193 2194 return (VM_FAULT_RETRY); 2195 } 2196 } 2197 } 2198 else if (!PAGED_OUT(copy_object, copy_offset)) { 2199 /* 2200 * If PAGED_OUT is TRUE, then the page used to exist 2201 * in the copy-object, and has already been paged out. 2202 * We don't need to repeat this. If PAGED_OUT is 2203 * FALSE, then either we don't know (!pager_created, 2204 * for example) or it hasn't been paged out. 2205 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT) 2206 * We must copy the page to the copy object. 2207 */ 2208 2209 if (vm_backing_store_low) { 2210 /* 2211 * we are protecting the system from 2212 * backing store exhaustion. If so 2213 * sleep unless we are privileged. 2214 */ 2215 if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) { 2216 assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT); 2217 2218 RELEASE_PAGE(m); 2219 VM_OBJ_RES_DECR(copy_object); 2220 vm_object_lock_assert_exclusive(copy_object); 2221 copy_object->ref_count--; 2222 assert(copy_object->ref_count > 0); 2223 2224 vm_object_unlock(copy_object); 2225 vm_fault_cleanup(object, first_m); 2226 thread_block(THREAD_CONTINUE_NULL); 2227 thread_interrupt_level(interruptible_state); 2228 2229 return (VM_FAULT_RETRY); 2230 } 2231 } 2232 /* 2233 * Allocate a page for the copy 2234 */ 2235 copy_m = vm_page_alloc(copy_object, copy_offset); 2236 2237 if (copy_m == VM_PAGE_NULL) { 2238 RELEASE_PAGE(m); 2239 2240 VM_OBJ_RES_DECR(copy_object); 2241 vm_object_lock_assert_exclusive(copy_object); 2242 copy_object->ref_count--; 2243 assert(copy_object->ref_count > 0); 2244 2245 vm_object_unlock(copy_object); 2246 vm_fault_cleanup(object, first_m); 2247 thread_interrupt_level(interruptible_state); 2248 2249 return (VM_FAULT_MEMORY_SHORTAGE); 2250 } 2251 /* 2252 * Must copy page into copy-object. 2253 */ 2254 vm_page_copy(m, copy_m); 2255 2256 /* 2257 * If the old page was in use by any users 2258 * of the copy-object, it must be removed 2259 * from all pmaps. (We can't know which 2260 * pmaps use it.) 2261 */ 2262 if (m->pmapped) 2263 pmap_disconnect(m->phys_page); 2264 2265 if (m->clustered) { 2266 VM_PAGE_COUNT_AS_PAGEIN(m); 2267 VM_PAGE_CONSUME_CLUSTERED(m); 2268 } 2269 /* 2270 * If there's a pager, then immediately 2271 * page out this page, using the "initialize" 2272 * option. Else, we use the copy. 2273 */ 2274 if ((!copy_object->pager_ready) 2275#if MACH_PAGEMAP 2276 || vm_external_state_get(copy_object->existence_map, copy_offset) == VM_EXTERNAL_STATE_ABSENT 2277#endif 2278 || VM_COMPRESSOR_PAGER_STATE_GET(copy_object, copy_offset) == VM_EXTERNAL_STATE_ABSENT 2279 ) { 2280 2281 vm_page_lockspin_queues(); 2282 assert(!m->cleaning); 2283 vm_page_activate(copy_m); 2284 vm_page_unlock_queues(); 2285 2286 SET_PAGE_DIRTY(copy_m, TRUE); 2287 PAGE_WAKEUP_DONE(copy_m); 2288 2289 } else if (copy_object->internal && 2290 (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE)) { 2291 /* 2292 * For internal objects check with the pager to see 2293 * if the page already exists in the backing store. 2294 * If yes, then we can drop the copy page. If not, 2295 * then we'll activate it, mark it dirty and keep it 2296 * around. 2297 */ 2298 2299 kern_return_t kr = KERN_SUCCESS; 2300 2301 memory_object_t copy_pager = copy_object->pager; 2302 assert(copy_pager != MEMORY_OBJECT_NULL); 2303 vm_object_paging_begin(copy_object); 2304 2305 vm_object_unlock(copy_object); 2306 2307 kr = memory_object_data_request( 2308 copy_pager, 2309 copy_offset + copy_object->paging_offset, 2310 0, /* Only query the pager. */ 2311 VM_PROT_READ, 2312 NULL); 2313 2314 vm_object_lock(copy_object); 2315 2316 vm_object_paging_end(copy_object); 2317 2318 /* 2319 * Since we dropped the copy_object's lock, 2320 * check whether we'll have to deallocate 2321 * the hard way. 2322 */ 2323 if ((copy_object->shadow != object) || (copy_object->ref_count == 1)) { 2324 vm_object_unlock(copy_object); 2325 vm_object_deallocate(copy_object); 2326 vm_object_lock(object); 2327 2328 continue; 2329 } 2330 if (kr == KERN_SUCCESS) { 2331 /* 2332 * The pager has the page. We don't want to overwrite 2333 * that page by sending this one out to the backing store. 2334 * So we drop the copy page. 2335 */ 2336 VM_PAGE_FREE(copy_m); 2337 2338 } else { 2339 /* 2340 * The pager doesn't have the page. We'll keep this one 2341 * around in the copy object. It might get sent out to 2342 * the backing store under memory pressure. 2343 */ 2344 vm_page_lockspin_queues(); 2345 assert(!m->cleaning); 2346 vm_page_activate(copy_m); 2347 vm_page_unlock_queues(); 2348 2349 SET_PAGE_DIRTY(copy_m, TRUE); 2350 PAGE_WAKEUP_DONE(copy_m); 2351 } 2352 } else { 2353 2354 assert(copy_m->busy == TRUE); 2355 assert(!m->cleaning); 2356 2357 /* 2358 * dirty is protected by the object lock 2359 */ 2360 SET_PAGE_DIRTY(copy_m, TRUE); 2361 2362 /* 2363 * The page is already ready for pageout: 2364 * not on pageout queues and busy. 2365 * Unlock everything except the 2366 * copy_object itself. 2367 */ 2368 vm_object_unlock(object); 2369 2370 /* 2371 * Write the page to the copy-object, 2372 * flushing it from the kernel. 2373 */ 2374 vm_pageout_initialize_page(copy_m); 2375 2376 /* 2377 * Since the pageout may have 2378 * temporarily dropped the 2379 * copy_object's lock, we 2380 * check whether we'll have 2381 * to deallocate the hard way. 2382 */ 2383 if ((copy_object->shadow != object) || (copy_object->ref_count == 1)) { 2384 vm_object_unlock(copy_object); 2385 vm_object_deallocate(copy_object); 2386 vm_object_lock(object); 2387 2388 continue; 2389 } 2390 /* 2391 * Pick back up the old object's 2392 * lock. [It is safe to do so, 2393 * since it must be deeper in the 2394 * object tree.] 2395 */ 2396 vm_object_lock(object); 2397 } 2398 2399 /* 2400 * Because we're pushing a page upward 2401 * in the object tree, we must restart 2402 * any faults that are waiting here. 2403 * [Note that this is an expansion of 2404 * PAGE_WAKEUP that uses the THREAD_RESTART 2405 * wait result]. Can't turn off the page's 2406 * busy bit because we're not done with it. 2407 */ 2408 if (m->wanted) { 2409 m->wanted = FALSE; 2410 thread_wakeup_with_result((event_t) m, THREAD_RESTART); 2411 } 2412 } 2413 /* 2414 * The reference count on copy_object must be 2415 * at least 2: one for our extra reference, 2416 * and at least one from the outside world 2417 * (we checked that when we last locked 2418 * copy_object). 2419 */ 2420 vm_object_lock_assert_exclusive(copy_object); 2421 copy_object->ref_count--; 2422 assert(copy_object->ref_count > 0); 2423 2424 VM_OBJ_RES_DECR(copy_object); 2425 vm_object_unlock(copy_object); 2426 2427 break; 2428 } 2429 2430done: 2431 *result_page = m; 2432 *top_page = first_m; 2433 2434 XPR(XPR_VM_FAULT, 2435 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n", 2436 object, offset, m, first_m, 0); 2437 2438 if (m != VM_PAGE_NULL) { 2439 retval = VM_FAULT_SUCCESS; 2440 2441 if (my_fault == DBG_PAGEIN_FAULT) { 2442 2443 VM_PAGE_COUNT_AS_PAGEIN(m); 2444 2445 if (m->object->internal) 2446 my_fault = DBG_PAGEIND_FAULT; 2447 else 2448 my_fault = DBG_PAGEINV_FAULT; 2449 2450 /* 2451 * evaluate access pattern and update state 2452 * vm_fault_deactivate_behind depends on the 2453 * state being up to date 2454 */ 2455 vm_fault_is_sequential(object, offset, fault_info->behavior); 2456 2457 vm_fault_deactivate_behind(object, offset, fault_info->behavior); 2458 } else if (my_fault == DBG_COMPRESSOR_FAULT || my_fault == DBG_COMPRESSOR_SWAPIN_FAULT) { 2459 2460 VM_STAT_INCR(decompressions); 2461 } 2462 if (type_of_fault) 2463 *type_of_fault = my_fault; 2464 } else { 2465 retval = VM_FAULT_SUCCESS_NO_VM_PAGE; 2466 assert(first_m == VM_PAGE_NULL); 2467 assert(object == first_object); 2468 } 2469 2470 thread_interrupt_level(interruptible_state); 2471 2472#if TRACEFAULTPAGE 2473 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */ 2474#endif 2475 return retval; 2476 2477backoff: 2478 thread_interrupt_level(interruptible_state); 2479 2480 if (wait_result == THREAD_INTERRUPTED) 2481 return (VM_FAULT_INTERRUPTED); 2482 return (VM_FAULT_RETRY); 2483 2484#undef RELEASE_PAGE 2485} 2486 2487 2488 2489/* 2490 * CODE SIGNING: 2491 * When soft faulting a page, we have to validate the page if: 2492 * 1. the page is being mapped in user space 2493 * 2. the page hasn't already been found to be "tainted" 2494 * 3. the page belongs to a code-signed object 2495 * 4. the page has not been validated yet or has been mapped for write. 2496 */ 2497#define VM_FAULT_NEED_CS_VALIDATION(pmap, page) \ 2498 ((pmap) != kernel_pmap /*1*/ && \ 2499 !(page)->cs_tainted /*2*/ && \ 2500 (page)->object->code_signed /*3*/ && \ 2501 (!(page)->cs_validated || (page)->wpmapped /*4*/)) 2502 2503 2504/* 2505 * page queue lock must NOT be held 2506 * m->object must be locked 2507 * 2508 * NOTE: m->object could be locked "shared" only if we are called 2509 * from vm_fault() as part of a soft fault. If so, we must be 2510 * careful not to modify the VM object in any way that is not 2511 * legal under a shared lock... 2512 */ 2513extern int proc_selfpid(void); 2514extern char *proc_name_address(void *p); 2515unsigned long cs_enter_tainted_rejected = 0; 2516unsigned long cs_enter_tainted_accepted = 0; 2517kern_return_t 2518vm_fault_enter(vm_page_t m, 2519 pmap_t pmap, 2520 vm_map_offset_t vaddr, 2521 vm_prot_t prot, 2522 vm_prot_t fault_type, 2523 boolean_t wired, 2524 boolean_t change_wiring, 2525 boolean_t no_cache, 2526 boolean_t cs_bypass, 2527 __unused int user_tag, 2528 int pmap_options, 2529 boolean_t *need_retry, 2530 int *type_of_fault) 2531{ 2532 kern_return_t kr, pe_result; 2533 boolean_t previously_pmapped = m->pmapped; 2534 boolean_t must_disconnect = 0; 2535 boolean_t map_is_switched, map_is_switch_protected; 2536 int cs_enforcement_enabled; 2537 2538 vm_object_lock_assert_held(m->object); 2539#if DEBUG 2540 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED); 2541#endif /* DEBUG */ 2542 2543 if (m->phys_page == vm_page_guard_addr) { 2544 assert(m->fictitious); 2545 return KERN_SUCCESS; 2546 } 2547 2548 if (*type_of_fault == DBG_ZERO_FILL_FAULT) { 2549 2550 vm_object_lock_assert_exclusive(m->object); 2551 2552 } else if ((fault_type & VM_PROT_WRITE) == 0) { 2553 /* 2554 * This is not a "write" fault, so we 2555 * might not have taken the object lock 2556 * exclusively and we might not be able 2557 * to update the "wpmapped" bit in 2558 * vm_fault_enter(). 2559 * Let's just grant read access to 2560 * the page for now and we'll 2561 * soft-fault again if we need write 2562 * access later... 2563 */ 2564 prot &= ~VM_PROT_WRITE; 2565 } 2566 if (m->pmapped == FALSE) { 2567 2568 if (m->clustered) { 2569 if (*type_of_fault == DBG_CACHE_HIT_FAULT) { 2570 /* 2571 * found it in the cache, but this 2572 * is the first fault-in of the page (m->pmapped == FALSE) 2573 * so it must have come in as part of 2574 * a cluster... account 1 pagein against it 2575 */ 2576 if (m->object->internal) 2577 *type_of_fault = DBG_PAGEIND_FAULT; 2578 else 2579 *type_of_fault = DBG_PAGEINV_FAULT; 2580 2581 VM_PAGE_COUNT_AS_PAGEIN(m); 2582 } 2583 VM_PAGE_CONSUME_CLUSTERED(m); 2584 } 2585 } 2586 2587 if (*type_of_fault != DBG_COW_FAULT) { 2588 DTRACE_VM2(as_fault, int, 1, (uint64_t *), NULL); 2589 2590 if (pmap == kernel_pmap) { 2591 DTRACE_VM2(kernel_asflt, int, 1, (uint64_t *), NULL); 2592 } 2593 } 2594 2595 /* Validate code signature if necessary. */ 2596 if (VM_FAULT_NEED_CS_VALIDATION(pmap, m)) { 2597 vm_object_lock_assert_exclusive(m->object); 2598 2599 if (m->cs_validated) { 2600 vm_cs_revalidates++; 2601 } 2602 2603 /* VM map is locked, so 1 ref will remain on VM object - 2604 * so no harm if vm_page_validate_cs drops the object lock */ 2605 vm_page_validate_cs(m); 2606 } 2607 2608#define page_immutable(m,prot) ((m)->cs_validated /*&& ((prot) & VM_PROT_EXECUTE)*/) 2609 2610 map_is_switched = ((pmap != vm_map_pmap(current_task()->map)) && 2611 (pmap == vm_map_pmap(current_thread()->map))); 2612 map_is_switch_protected = current_thread()->map->switch_protect; 2613 2614 /* If the map is switched, and is switch-protected, we must protect 2615 * some pages from being write-faulted: immutable pages because by 2616 * definition they may not be written, and executable pages because that 2617 * would provide a way to inject unsigned code. 2618 * If the page is immutable, we can simply return. However, we can't 2619 * immediately determine whether a page is executable anywhere. But, 2620 * we can disconnect it everywhere and remove the executable protection 2621 * from the current map. We do that below right before we do the 2622 * PMAP_ENTER. 2623 */ 2624 cs_enforcement_enabled = cs_enforcement(NULL); 2625 2626 if(cs_enforcement_enabled && map_is_switched && 2627 map_is_switch_protected && page_immutable(m, prot) && 2628 (prot & VM_PROT_WRITE)) 2629 { 2630 return KERN_CODESIGN_ERROR; 2631 } 2632 2633 /* A page could be tainted, or pose a risk of being tainted later. 2634 * Check whether the receiving process wants it, and make it feel 2635 * the consequences (that hapens in cs_invalid_page()). 2636 * For CS Enforcement, two other conditions will 2637 * cause that page to be tainted as well: 2638 * - pmapping an unsigned page executable - this means unsigned code; 2639 * - writeable mapping of a validated page - the content of that page 2640 * can be changed without the kernel noticing, therefore unsigned 2641 * code can be created 2642 */ 2643 if (m->cs_tainted || 2644 ((cs_enforcement_enabled && !cs_bypass ) && 2645 (/* The page is unsigned and wants to be executable */ 2646 (!m->cs_validated && (prot & VM_PROT_EXECUTE)) || 2647 /* The page should be immutable, but is in danger of being modified 2648 * This is the case where we want policy from the code directory - 2649 * is the page immutable or not? For now we have to assume that 2650 * code pages will be immutable, data pages not. 2651 * We'll assume a page is a code page if it has a code directory 2652 * and we fault for execution. 2653 * That is good enough since if we faulted the code page for 2654 * writing in another map before, it is wpmapped; if we fault 2655 * it for writing in this map later it will also be faulted for executing 2656 * at the same time; and if we fault for writing in another map 2657 * later, we will disconnect it from this pmap so we'll notice 2658 * the change. 2659 */ 2660 (page_immutable(m, prot) && ((prot & VM_PROT_WRITE) || m->wpmapped)) 2661 )) 2662 ) 2663 { 2664 /* We will have a tainted page. Have to handle the special case 2665 * of a switched map now. If the map is not switched, standard 2666 * procedure applies - call cs_invalid_page(). 2667 * If the map is switched, the real owner is invalid already. 2668 * There is no point in invalidating the switching process since 2669 * it will not be executing from the map. So we don't call 2670 * cs_invalid_page() in that case. */ 2671 boolean_t reject_page; 2672 if(map_is_switched) { 2673 assert(pmap==vm_map_pmap(current_thread()->map)); 2674 assert(!(prot & VM_PROT_WRITE) || (map_is_switch_protected == FALSE)); 2675 reject_page = FALSE; 2676 } else { 2677 if (cs_debug > 5) 2678 printf("vm_fault: signed: %s validate: %s tainted: %s wpmapped: %s slid: %s prot: 0x%x\n", 2679 m->object->code_signed ? "yes" : "no", 2680 m->cs_validated ? "yes" : "no", 2681 m->cs_tainted ? "yes" : "no", 2682 m->wpmapped ? "yes" : "no", 2683 m->slid ? "yes" : "no", 2684 (int)prot); 2685 reject_page = cs_invalid_page((addr64_t) vaddr); 2686 } 2687 2688 if (reject_page) { 2689 /* reject the invalid page: abort the page fault */ 2690 int pid; 2691 const char *procname; 2692 task_t task; 2693 vm_object_t file_object, shadow; 2694 vm_object_offset_t file_offset; 2695 char *pathname, *filename; 2696 vm_size_t pathname_len, filename_len; 2697 boolean_t truncated_path; 2698#define __PATH_MAX 1024 2699 struct timespec mtime, cs_mtime; 2700 2701 kr = KERN_CODESIGN_ERROR; 2702 cs_enter_tainted_rejected++; 2703 2704 /* get process name and pid */ 2705 procname = "?"; 2706 task = current_task(); 2707 pid = proc_selfpid(); 2708 if (task->bsd_info != NULL) 2709 procname = proc_name_address(task->bsd_info); 2710 2711 /* get file's VM object */ 2712 file_object = m->object; 2713 file_offset = m->offset; 2714 for (shadow = file_object->shadow; 2715 shadow != VM_OBJECT_NULL; 2716 shadow = file_object->shadow) { 2717 vm_object_lock_shared(shadow); 2718 if (file_object != m->object) { 2719 vm_object_unlock(file_object); 2720 } 2721 file_offset += file_object->vo_shadow_offset; 2722 file_object = shadow; 2723 } 2724 2725 mtime.tv_sec = 0; 2726 mtime.tv_nsec = 0; 2727 cs_mtime.tv_sec = 0; 2728 cs_mtime.tv_nsec = 0; 2729 2730 /* get file's pathname and/or filename */ 2731 pathname = NULL; 2732 filename = NULL; 2733 pathname_len = 0; 2734 filename_len = 0; 2735 truncated_path = FALSE; 2736 if (file_object->pager == NULL) { 2737 /* no pager -> no file -> no pathname */ 2738 pathname = (char *) "<nil>"; 2739 } else { 2740 pathname = (char *)kalloc(__PATH_MAX * 2); 2741 if (pathname) { 2742 pathname[0] = '\0'; 2743 pathname_len = __PATH_MAX; 2744 filename = pathname + pathname_len; 2745 filename_len = __PATH_MAX; 2746 } 2747 vnode_pager_get_object_name(file_object->pager, 2748 pathname, 2749 pathname_len, 2750 filename, 2751 filename_len, 2752 &truncated_path); 2753 vnode_pager_get_object_mtime(file_object->pager, 2754 &mtime, 2755 &cs_mtime); 2756 } 2757 printf("CODE SIGNING: process %d[%s]: " 2758 "rejecting invalid page at address 0x%llx " 2759 "from offset 0x%llx in file \"%s%s%s\" " 2760 "(cs_mtime:%lu.%ld %s mtime:%lu.%ld) " 2761 "(signed:%d validated:%d tainted:%d " 2762 "wpmapped:%d slid:%d)\n", 2763 pid, procname, (addr64_t) vaddr, 2764 file_offset, 2765 (pathname ? pathname : ""), 2766 (truncated_path ? "/.../" : ""), 2767 (truncated_path ? filename : ""), 2768 cs_mtime.tv_sec, cs_mtime.tv_nsec, 2769 ((cs_mtime.tv_sec == mtime.tv_sec && 2770 cs_mtime.tv_nsec == mtime.tv_nsec) 2771 ? "==" 2772 : "!="), 2773 mtime.tv_sec, mtime.tv_nsec, 2774 m->object->code_signed, 2775 m->cs_validated, 2776 m->cs_tainted, 2777 m->wpmapped, 2778 m->slid); 2779 if (file_object != m->object) { 2780 vm_object_unlock(file_object); 2781 } 2782 if (pathname_len != 0) { 2783 kfree(pathname, __PATH_MAX * 2); 2784 pathname = NULL; 2785 filename = NULL; 2786 } 2787 } else { 2788 /* proceed with the invalid page */ 2789 kr = KERN_SUCCESS; 2790 if (!m->cs_validated) { 2791 /* 2792 * This page has not been validated, so it 2793 * must not belong to a code-signed object 2794 * and should not be forcefully considered 2795 * as tainted. 2796 * We're just concerned about it here because 2797 * we've been asked to "execute" it but that 2798 * does not mean that it should cause other 2799 * accesses to fail. 2800 * This happens when a debugger sets a 2801 * breakpoint and we then execute code in 2802 * that page. Marking the page as "tainted" 2803 * would cause any inspection tool ("leaks", 2804 * "vmmap", "CrashReporter", ...) to get killed 2805 * due to code-signing violation on that page, 2806 * even though they're just reading it and not 2807 * executing from it. 2808 */ 2809 assert(!m->object->code_signed); 2810 } else { 2811 /* 2812 * Page might have been tainted before or not; 2813 * now it definitively is. If the page wasn't 2814 * tainted, we must disconnect it from all 2815 * pmaps later, to force existing mappings 2816 * through that code path for re-consideration 2817 * of the validity of that page. 2818 */ 2819 must_disconnect = !m->cs_tainted; 2820 m->cs_tainted = TRUE; 2821 } 2822 cs_enter_tainted_accepted++; 2823 } 2824 if (kr != KERN_SUCCESS) { 2825 if (cs_debug) { 2826 printf("CODESIGNING: vm_fault_enter(0x%llx): " 2827 "*** INVALID PAGE ***\n", 2828 (long long)vaddr); 2829 } 2830#if !SECURE_KERNEL 2831 if (cs_enforcement_panic) { 2832 panic("CODESIGNING: panicking on invalid page\n"); 2833 } 2834#endif 2835 } 2836 2837 } else { 2838 /* proceed with the valid page */ 2839 kr = KERN_SUCCESS; 2840 } 2841 2842 boolean_t page_queues_locked = FALSE; 2843#define __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED() \ 2844MACRO_BEGIN \ 2845 if (! page_queues_locked) { \ 2846 page_queues_locked = TRUE; \ 2847 vm_page_lockspin_queues(); \ 2848 } \ 2849MACRO_END 2850#define __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED() \ 2851MACRO_BEGIN \ 2852 if (page_queues_locked) { \ 2853 page_queues_locked = FALSE; \ 2854 vm_page_unlock_queues(); \ 2855 } \ 2856MACRO_END 2857 2858 /* 2859 * Hold queues lock to manipulate 2860 * the page queues. Change wiring 2861 * case is obvious. 2862 */ 2863 assert(m->compressor || m->object != compressor_object); 2864 if (m->compressor) { 2865 /* 2866 * Compressor pages are neither wired 2867 * nor pageable and should never change. 2868 */ 2869 assert(m->object == compressor_object); 2870 } else if (change_wiring) { 2871 __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); 2872 2873 if (wired) { 2874 if (kr == KERN_SUCCESS) { 2875 vm_page_wire(m); 2876 } 2877 } else { 2878 vm_page_unwire(m, TRUE); 2879 } 2880 /* we keep the page queues lock, if we need it later */ 2881 2882 } else { 2883 if (kr != KERN_SUCCESS) { 2884 __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); 2885 vm_page_deactivate(m); 2886 /* we keep the page queues lock, if we need it later */ 2887 } else if (((!m->active && !m->inactive) || 2888 m->clean_queue || 2889 no_cache) && 2890 !VM_PAGE_WIRED(m) && !m->throttled) { 2891 2892 if (vm_page_local_q && 2893 !no_cache && 2894 (*type_of_fault == DBG_COW_FAULT || 2895 *type_of_fault == DBG_ZERO_FILL_FAULT) ) { 2896 struct vpl *lq; 2897 uint32_t lid; 2898 2899 __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED(); 2900 vm_object_lock_assert_exclusive(m->object); 2901 2902 /* 2903 * we got a local queue to stuff this 2904 * new page on... 2905 * its safe to manipulate local and 2906 * local_id at this point since we're 2907 * behind an exclusive object lock and 2908 * the page is not on any global queue. 2909 * 2910 * we'll use the current cpu number to 2911 * select the queue note that we don't 2912 * need to disable preemption... we're 2913 * going to behind the local queue's 2914 * lock to do the real work 2915 */ 2916 lid = cpu_number(); 2917 2918 lq = &vm_page_local_q[lid].vpl_un.vpl; 2919 2920 VPL_LOCK(&lq->vpl_lock); 2921 2922 queue_enter(&lq->vpl_queue, m, 2923 vm_page_t, pageq); 2924 m->local = TRUE; 2925 m->local_id = lid; 2926 lq->vpl_count++; 2927 2928 if (m->object->internal) 2929 lq->vpl_internal_count++; 2930 else 2931 lq->vpl_external_count++; 2932 2933 VPL_UNLOCK(&lq->vpl_lock); 2934 2935 if (lq->vpl_count > vm_page_local_q_soft_limit) 2936 { 2937 /* 2938 * we're beyond the soft limit 2939 * for the local queue 2940 * vm_page_reactivate_local will 2941 * 'try' to take the global page 2942 * queue lock... if it can't 2943 * that's ok... we'll let the 2944 * queue continue to grow up 2945 * to the hard limit... at that 2946 * point we'll wait for the 2947 * lock... once we've got the 2948 * lock, we'll transfer all of 2949 * the pages from the local 2950 * queue to the global active 2951 * queue 2952 */ 2953 vm_page_reactivate_local(lid, FALSE, FALSE); 2954 } 2955 } else { 2956 2957 __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); 2958 2959 /* 2960 * test again now that we hold the 2961 * page queue lock 2962 */ 2963 if (!VM_PAGE_WIRED(m)) { 2964 if (m->clean_queue) { 2965 VM_PAGE_QUEUES_REMOVE(m); 2966 2967 vm_pageout_cleaned_reactivated++; 2968 vm_pageout_cleaned_fault_reactivated++; 2969 } 2970 2971 if ((!m->active && 2972 !m->inactive) || 2973 no_cache) { 2974 /* 2975 * If this is a no_cache mapping 2976 * and the page has never been 2977 * mapped before or was 2978 * previously a no_cache page, 2979 * then we want to leave pages 2980 * in the speculative state so 2981 * that they can be readily 2982 * recycled if free memory runs 2983 * low. Otherwise the page is 2984 * activated as normal. 2985 */ 2986 2987 if (no_cache && 2988 (!previously_pmapped || 2989 m->no_cache)) { 2990 m->no_cache = TRUE; 2991 2992 if (!m->speculative) 2993 vm_page_speculate(m, FALSE); 2994 2995 } else if (!m->active && 2996 !m->inactive) { 2997 2998 vm_page_activate(m); 2999 } 3000 } 3001 } 3002 /* we keep the page queues lock, if we need it later */ 3003 } 3004 } 3005 } 3006 /* we're done with the page queues lock, if we ever took it */ 3007 __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED(); 3008 3009 3010 /* If we have a KERN_SUCCESS from the previous checks, we either have 3011 * a good page, or a tainted page that has been accepted by the process. 3012 * In both cases the page will be entered into the pmap. 3013 * If the page is writeable, we need to disconnect it from other pmaps 3014 * now so those processes can take note. 3015 */ 3016 if (kr == KERN_SUCCESS) { 3017 3018 /* 3019 * NOTE: we may only hold the vm_object lock SHARED 3020 * at this point, so we need the phys_page lock to 3021 * properly serialize updating the pmapped and 3022 * xpmapped bits 3023 */ 3024 if ((prot & VM_PROT_EXECUTE) && !m->xpmapped) { 3025 3026 pmap_lock_phys_page(m->phys_page); 3027 /* 3028 * go ahead and take the opportunity 3029 * to set 'pmapped' here so that we don't 3030 * need to grab this lock a 2nd time 3031 * just below 3032 */ 3033 m->pmapped = TRUE; 3034 3035 if (!m->xpmapped) { 3036 3037 m->xpmapped = TRUE; 3038 3039 pmap_unlock_phys_page(m->phys_page); 3040 3041 if (!m->object->internal) 3042 OSAddAtomic(1, &vm_page_xpmapped_external_count); 3043 3044 if ((COMPRESSED_PAGER_IS_ACTIVE) && 3045 m->object->internal && 3046 m->object->pager != NULL) { 3047 /* 3048 * This page could have been 3049 * uncompressed by the 3050 * compressor pager and its 3051 * contents might be only in 3052 * the data cache. 3053 * Since it's being mapped for 3054 * "execute" for the fist time, 3055 * make sure the icache is in 3056 * sync. 3057 */ 3058 pmap_sync_page_data_phys(m->phys_page); 3059 } 3060 } else 3061 pmap_unlock_phys_page(m->phys_page); 3062 } else { 3063 if (m->pmapped == FALSE) { 3064 pmap_lock_phys_page(m->phys_page); 3065 m->pmapped = TRUE; 3066 pmap_unlock_phys_page(m->phys_page); 3067 } 3068 } 3069 if (vm_page_is_slideable(m)) { 3070 boolean_t was_busy = m->busy; 3071 3072 vm_object_lock_assert_exclusive(m->object); 3073 3074 m->busy = TRUE; 3075 kr = vm_page_slide(m, 0); 3076 assert(m->busy); 3077 if(!was_busy) { 3078 PAGE_WAKEUP_DONE(m); 3079 } 3080 if (kr != KERN_SUCCESS) { 3081 /* 3082 * This page has not been slid correctly, 3083 * do not do the pmap_enter() ! 3084 * Let vm_fault_enter() return the error 3085 * so the caller can fail the fault. 3086 */ 3087 goto after_the_pmap_enter; 3088 } 3089 } 3090 3091 if (fault_type & VM_PROT_WRITE) { 3092 3093 if (m->wpmapped == FALSE) { 3094 vm_object_lock_assert_exclusive(m->object); 3095 3096 m->wpmapped = TRUE; 3097 } 3098 if (must_disconnect) { 3099 /* 3100 * We can only get here 3101 * because of the CSE logic 3102 */ 3103 assert(cs_enforcement_enabled); 3104 pmap_disconnect(m->phys_page); 3105 /* 3106 * If we are faulting for a write, we can clear 3107 * the execute bit - that will ensure the page is 3108 * checked again before being executable, which 3109 * protects against a map switch. 3110 * This only happens the first time the page 3111 * gets tainted, so we won't get stuck here 3112 * to make an already writeable page executable. 3113 */ 3114 if (!cs_bypass){ 3115 prot &= ~VM_PROT_EXECUTE; 3116 } 3117 } 3118 } 3119 3120 /* Prevent a deadlock by not 3121 * holding the object lock if we need to wait for a page in 3122 * pmap_enter() - <rdar://problem/7138958> */ 3123 PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, fault_type, 0, 3124 wired, 3125 pmap_options | PMAP_OPTIONS_NOWAIT, 3126 pe_result); 3127 3128 if(pe_result == KERN_RESOURCE_SHORTAGE) { 3129 3130 if (need_retry) { 3131 /* 3132 * this will be non-null in the case where we hold the lock 3133 * on the top-object in this chain... we can't just drop 3134 * the lock on the object we're inserting the page into 3135 * and recall the PMAP_ENTER since we can still cause 3136 * a deadlock if one of the critical paths tries to 3137 * acquire the lock on the top-object and we're blocked 3138 * in PMAP_ENTER waiting for memory... our only recourse 3139 * is to deal with it at a higher level where we can 3140 * drop both locks. 3141 */ 3142 *need_retry = TRUE; 3143 vm_pmap_enter_retried++; 3144 goto after_the_pmap_enter; 3145 } 3146 /* The nonblocking version of pmap_enter did not succeed. 3147 * and we don't need to drop other locks and retry 3148 * at the level above us, so 3149 * use the blocking version instead. Requires marking 3150 * the page busy and unlocking the object */ 3151 boolean_t was_busy = m->busy; 3152 3153 vm_object_lock_assert_exclusive(m->object); 3154 3155 m->busy = TRUE; 3156 vm_object_unlock(m->object); 3157 3158 PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, fault_type, 3159 0, wired, 3160 pmap_options, pe_result); 3161 3162 /* Take the object lock again. */ 3163 vm_object_lock(m->object); 3164 3165 /* If the page was busy, someone else will wake it up. 3166 * Otherwise, we have to do it now. */ 3167 assert(m->busy); 3168 if(!was_busy) { 3169 PAGE_WAKEUP_DONE(m); 3170 } 3171 vm_pmap_enter_blocked++; 3172 } 3173 } 3174 3175after_the_pmap_enter: 3176 return kr; 3177} 3178 3179void 3180vm_pre_fault(vm_map_offset_t vaddr) 3181{ 3182 if (pmap_find_phys(current_map()->pmap, vaddr) == 0) { 3183 3184 vm_fault(current_map(), /* map */ 3185 vaddr, /* vaddr */ 3186 VM_PROT_READ, /* fault_type */ 3187 FALSE, /* change_wiring */ 3188 THREAD_UNINT, /* interruptible */ 3189 NULL, /* caller_pmap */ 3190 0 /* caller_pmap_addr */); 3191 } 3192} 3193 3194 3195/* 3196 * Routine: vm_fault 3197 * Purpose: 3198 * Handle page faults, including pseudo-faults 3199 * used to change the wiring status of pages. 3200 * Returns: 3201 * Explicit continuations have been removed. 3202 * Implementation: 3203 * vm_fault and vm_fault_page save mucho state 3204 * in the moral equivalent of a closure. The state 3205 * structure is allocated when first entering vm_fault 3206 * and deallocated when leaving vm_fault. 3207 */ 3208 3209extern int _map_enter_debug; 3210 3211unsigned long vm_fault_collapse_total = 0; 3212unsigned long vm_fault_collapse_skipped = 0; 3213 3214 3215kern_return_t 3216vm_fault( 3217 vm_map_t map, 3218 vm_map_offset_t vaddr, 3219 vm_prot_t fault_type, 3220 boolean_t change_wiring, 3221 int interruptible, 3222 pmap_t caller_pmap, 3223 vm_map_offset_t caller_pmap_addr) 3224{ 3225 return vm_fault_internal(map, vaddr, fault_type, change_wiring, 3226 interruptible, caller_pmap, caller_pmap_addr, 3227 NULL); 3228} 3229 3230kern_return_t 3231vm_fault_internal( 3232 vm_map_t map, 3233 vm_map_offset_t vaddr, 3234 vm_prot_t fault_type, 3235 boolean_t change_wiring, 3236 int interruptible, 3237 pmap_t caller_pmap, 3238 vm_map_offset_t caller_pmap_addr, 3239 ppnum_t *physpage_p) 3240{ 3241 vm_map_version_t version; /* Map version for verificiation */ 3242 boolean_t wired; /* Should mapping be wired down? */ 3243 vm_object_t object; /* Top-level object */ 3244 vm_object_offset_t offset; /* Top-level offset */ 3245 vm_prot_t prot; /* Protection for mapping */ 3246 vm_object_t old_copy_object; /* Saved copy object */ 3247 vm_page_t result_page; /* Result of vm_fault_page */ 3248 vm_page_t top_page; /* Placeholder page */ 3249 kern_return_t kr; 3250 3251 vm_page_t m; /* Fast access to result_page */ 3252 kern_return_t error_code; 3253 vm_object_t cur_object; 3254 vm_object_offset_t cur_offset; 3255 vm_page_t cur_m; 3256 vm_object_t new_object; 3257 int type_of_fault; 3258 pmap_t pmap; 3259 boolean_t interruptible_state; 3260 vm_map_t real_map = map; 3261 vm_map_t original_map = map; 3262 vm_prot_t original_fault_type; 3263 struct vm_object_fault_info fault_info; 3264 boolean_t need_collapse = FALSE; 3265 boolean_t need_retry = FALSE; 3266 boolean_t *need_retry_ptr = NULL; 3267 int object_lock_type = 0; 3268 int cur_object_lock_type; 3269 vm_object_t top_object = VM_OBJECT_NULL; 3270 int throttle_delay; 3271 int compressed_count_delta; 3272 3273 3274 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 3275 (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START, 3276 ((uint64_t)vaddr >> 32), 3277 vaddr, 3278 (map == kernel_map), 3279 0, 3280 0); 3281 3282 if (get_preemption_level() != 0) { 3283 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 3284 (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END, 3285 ((uint64_t)vaddr >> 32), 3286 vaddr, 3287 KERN_FAILURE, 3288 0, 3289 0); 3290 3291 return (KERN_FAILURE); 3292 } 3293 3294 interruptible_state = thread_interrupt_level(interruptible); 3295 3296 VM_STAT_INCR(faults); 3297 current_task()->faults++; 3298 original_fault_type = fault_type; 3299 3300 if (fault_type & VM_PROT_WRITE) 3301 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3302 else 3303 object_lock_type = OBJECT_LOCK_SHARED; 3304 3305 cur_object_lock_type = OBJECT_LOCK_SHARED; 3306 3307RetryFault: 3308 /* 3309 * assume we will hit a page in the cache 3310 * otherwise, explicitly override with 3311 * the real fault type once we determine it 3312 */ 3313 type_of_fault = DBG_CACHE_HIT_FAULT; 3314 3315 /* 3316 * Find the backing store object and offset into 3317 * it to begin the search. 3318 */ 3319 fault_type = original_fault_type; 3320 map = original_map; 3321 vm_map_lock_read(map); 3322 3323 kr = vm_map_lookup_locked(&map, vaddr, fault_type, 3324 object_lock_type, &version, 3325 &object, &offset, &prot, &wired, 3326 &fault_info, 3327 &real_map); 3328 3329 if (kr != KERN_SUCCESS) { 3330 vm_map_unlock_read(map); 3331 goto done; 3332 } 3333 pmap = real_map->pmap; 3334 fault_info.interruptible = interruptible; 3335 fault_info.stealth = FALSE; 3336 fault_info.io_sync = FALSE; 3337 fault_info.mark_zf_absent = FALSE; 3338 fault_info.batch_pmap_op = FALSE; 3339 3340 /* 3341 * If the page is wired, we must fault for the current protection 3342 * value, to avoid further faults. 3343 */ 3344 if (wired) { 3345 fault_type = prot | VM_PROT_WRITE; 3346 /* 3347 * since we're treating this fault as a 'write' 3348 * we must hold the top object lock exclusively 3349 */ 3350 if (object_lock_type == OBJECT_LOCK_SHARED) { 3351 3352 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3353 3354 if (vm_object_lock_upgrade(object) == FALSE) { 3355 /* 3356 * couldn't upgrade, so explictly 3357 * take the lock exclusively 3358 */ 3359 vm_object_lock(object); 3360 } 3361 } 3362 } 3363 3364#if VM_FAULT_CLASSIFY 3365 /* 3366 * Temporary data gathering code 3367 */ 3368 vm_fault_classify(object, offset, fault_type); 3369#endif 3370 /* 3371 * Fast fault code. The basic idea is to do as much as 3372 * possible while holding the map lock and object locks. 3373 * Busy pages are not used until the object lock has to 3374 * be dropped to do something (copy, zero fill, pmap enter). 3375 * Similarly, paging references aren't acquired until that 3376 * point, and object references aren't used. 3377 * 3378 * If we can figure out what to do 3379 * (zero fill, copy on write, pmap enter) while holding 3380 * the locks, then it gets done. Otherwise, we give up, 3381 * and use the original fault path (which doesn't hold 3382 * the map lock, and relies on busy pages). 3383 * The give up cases include: 3384 * - Have to talk to pager. 3385 * - Page is busy, absent or in error. 3386 * - Pager has locked out desired access. 3387 * - Fault needs to be restarted. 3388 * - Have to push page into copy object. 3389 * 3390 * The code is an infinite loop that moves one level down 3391 * the shadow chain each time. cur_object and cur_offset 3392 * refer to the current object being examined. object and offset 3393 * are the original object from the map. The loop is at the 3394 * top level if and only if object and cur_object are the same. 3395 * 3396 * Invariants: Map lock is held throughout. Lock is held on 3397 * original object and cur_object (if different) when 3398 * continuing or exiting loop. 3399 * 3400 */ 3401 3402 3403 /* 3404 * If this page is to be inserted in a copy delay object 3405 * for writing, and if the object has a copy, then the 3406 * copy delay strategy is implemented in the slow fault page. 3407 */ 3408 if (object->copy_strategy == MEMORY_OBJECT_COPY_DELAY && 3409 object->copy != VM_OBJECT_NULL && (fault_type & VM_PROT_WRITE)) 3410 goto handle_copy_delay; 3411 3412 cur_object = object; 3413 cur_offset = offset; 3414 3415 while (TRUE) { 3416 if (!cur_object->pager_created && 3417 cur_object->phys_contiguous) /* superpage */ 3418 break; 3419 3420 if (cur_object->blocked_access) { 3421 /* 3422 * Access to this VM object has been blocked. 3423 * Let the slow path handle it. 3424 */ 3425 break; 3426 } 3427 3428 m = vm_page_lookup(cur_object, cur_offset); 3429 3430 if (m != VM_PAGE_NULL) { 3431 if (m->busy) { 3432 wait_result_t result; 3433 3434 /* 3435 * in order to do the PAGE_ASSERT_WAIT, we must 3436 * have object that 'm' belongs to locked exclusively 3437 */ 3438 if (object != cur_object) { 3439 3440 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3441 3442 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3443 3444 if (vm_object_lock_upgrade(cur_object) == FALSE) { 3445 /* 3446 * couldn't upgrade so go do a full retry 3447 * immediately since we can no longer be 3448 * certain about cur_object (since we 3449 * don't hold a reference on it)... 3450 * first drop the top object lock 3451 */ 3452 vm_object_unlock(object); 3453 3454 vm_map_unlock_read(map); 3455 if (real_map != map) 3456 vm_map_unlock(real_map); 3457 3458 goto RetryFault; 3459 } 3460 } 3461 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3462 3463 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3464 3465 if (vm_object_lock_upgrade(object) == FALSE) { 3466 /* 3467 * couldn't upgrade, so explictly take the lock 3468 * exclusively and go relookup the page since we 3469 * will have dropped the object lock and 3470 * a different thread could have inserted 3471 * a page at this offset 3472 * no need for a full retry since we're 3473 * at the top level of the object chain 3474 */ 3475 vm_object_lock(object); 3476 3477 continue; 3478 } 3479 } 3480 if (m->pageout_queue && m->object->internal && COMPRESSED_PAGER_IS_ACTIVE) { 3481 /* 3482 * m->busy == TRUE and the object is locked exclusively 3483 * if m->pageout_queue == TRUE after we acquire the 3484 * queues lock, we are guaranteed that it is stable on 3485 * the pageout queue and therefore reclaimable 3486 * 3487 * NOTE: this is only true for the internal pageout queue 3488 * in the compressor world 3489 */ 3490 vm_page_lock_queues(); 3491 3492 if (m->pageout_queue) { 3493 vm_pageout_throttle_up(m); 3494 vm_page_unlock_queues(); 3495 3496 PAGE_WAKEUP_DONE(m); 3497 goto reclaimed_from_pageout; 3498 } 3499 vm_page_unlock_queues(); 3500 } 3501 if (object != cur_object) 3502 vm_object_unlock(object); 3503 3504 vm_map_unlock_read(map); 3505 if (real_map != map) 3506 vm_map_unlock(real_map); 3507 3508 result = PAGE_ASSERT_WAIT(m, interruptible); 3509 3510 vm_object_unlock(cur_object); 3511 3512 if (result == THREAD_WAITING) { 3513 result = thread_block(THREAD_CONTINUE_NULL); 3514 3515 counter(c_vm_fault_page_block_busy_kernel++); 3516 } 3517 if (result == THREAD_AWAKENED || result == THREAD_RESTART) 3518 goto RetryFault; 3519 3520 kr = KERN_ABORTED; 3521 goto done; 3522 } 3523reclaimed_from_pageout: 3524 if (m->laundry) { 3525 if (object != cur_object) { 3526 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3527 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3528 3529 vm_object_unlock(object); 3530 vm_object_unlock(cur_object); 3531 3532 vm_map_unlock_read(map); 3533 if (real_map != map) 3534 vm_map_unlock(real_map); 3535 3536 goto RetryFault; 3537 } 3538 3539 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3540 3541 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3542 3543 if (vm_object_lock_upgrade(object) == FALSE) { 3544 /* 3545 * couldn't upgrade, so explictly take the lock 3546 * exclusively and go relookup the page since we 3547 * will have dropped the object lock and 3548 * a different thread could have inserted 3549 * a page at this offset 3550 * no need for a full retry since we're 3551 * at the top level of the object chain 3552 */ 3553 vm_object_lock(object); 3554 3555 continue; 3556 } 3557 } 3558 m->pageout = FALSE; 3559 3560 vm_pageout_steal_laundry(m, FALSE); 3561 } 3562 3563 if (m->phys_page == vm_page_guard_addr) { 3564 /* 3565 * Guard page: let the slow path deal with it 3566 */ 3567 break; 3568 } 3569 if (m->unusual && (m->error || m->restart || m->private || m->absent)) { 3570 /* 3571 * Unusual case... let the slow path deal with it 3572 */ 3573 break; 3574 } 3575 if (VM_OBJECT_PURGEABLE_FAULT_ERROR(m->object)) { 3576 if (object != cur_object) 3577 vm_object_unlock(object); 3578 vm_map_unlock_read(map); 3579 if (real_map != map) 3580 vm_map_unlock(real_map); 3581 vm_object_unlock(cur_object); 3582 kr = KERN_MEMORY_ERROR; 3583 goto done; 3584 } 3585 3586 if (m->encrypted) { 3587 /* 3588 * ENCRYPTED SWAP: 3589 * We've soft-faulted (because it's not in the page 3590 * table) on an encrypted page. 3591 * Keep the page "busy" so that no one messes with 3592 * it during the decryption. 3593 * Release the extra locks we're holding, keep only 3594 * the page's VM object lock. 3595 * 3596 * in order to set 'busy' on 'm', we must 3597 * have object that 'm' belongs to locked exclusively 3598 */ 3599 if (object != cur_object) { 3600 vm_object_unlock(object); 3601 3602 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3603 3604 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3605 3606 if (vm_object_lock_upgrade(cur_object) == FALSE) { 3607 /* 3608 * couldn't upgrade so go do a full retry 3609 * immediately since we've already dropped 3610 * the top object lock associated with this page 3611 * and the current one got dropped due to the 3612 * failed upgrade... the state is no longer valid 3613 */ 3614 vm_map_unlock_read(map); 3615 if (real_map != map) 3616 vm_map_unlock(real_map); 3617 3618 goto RetryFault; 3619 } 3620 } 3621 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3622 3623 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3624 3625 if (vm_object_lock_upgrade(object) == FALSE) { 3626 /* 3627 * couldn't upgrade, so explictly take the lock 3628 * exclusively and go relookup the page since we 3629 * will have dropped the object lock and 3630 * a different thread could have inserted 3631 * a page at this offset 3632 * no need for a full retry since we're 3633 * at the top level of the object chain 3634 */ 3635 vm_object_lock(object); 3636 3637 continue; 3638 } 3639 } 3640 m->busy = TRUE; 3641 3642 vm_map_unlock_read(map); 3643 if (real_map != map) 3644 vm_map_unlock(real_map); 3645 3646 vm_page_decrypt(m, 0); 3647 3648 assert(m->busy); 3649 PAGE_WAKEUP_DONE(m); 3650 3651 vm_object_unlock(cur_object); 3652 /* 3653 * Retry from the top, in case anything 3654 * changed while we were decrypting... 3655 */ 3656 goto RetryFault; 3657 } 3658 ASSERT_PAGE_DECRYPTED(m); 3659 3660 if(vm_page_is_slideable(m)) { 3661 /* 3662 * We might need to slide this page, and so, 3663 * we want to hold the VM object exclusively. 3664 */ 3665 if (object != cur_object) { 3666 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3667 vm_object_unlock(object); 3668 vm_object_unlock(cur_object); 3669 3670 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3671 3672 vm_map_unlock_read(map); 3673 if (real_map != map) 3674 vm_map_unlock(real_map); 3675 3676 goto RetryFault; 3677 } 3678 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3679 3680 vm_object_unlock(object); 3681 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3682 vm_map_unlock_read(map); 3683 goto RetryFault; 3684 } 3685 } 3686 3687 if (VM_FAULT_NEED_CS_VALIDATION(map->pmap, m) || 3688 (physpage_p != NULL && (prot & VM_PROT_WRITE))) { 3689upgrade_for_validation: 3690 /* 3691 * We might need to validate this page 3692 * against its code signature, so we 3693 * want to hold the VM object exclusively. 3694 */ 3695 if (object != cur_object) { 3696 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3697 vm_object_unlock(object); 3698 vm_object_unlock(cur_object); 3699 3700 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3701 3702 vm_map_unlock_read(map); 3703 if (real_map != map) 3704 vm_map_unlock(real_map); 3705 3706 goto RetryFault; 3707 } 3708 3709 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3710 3711 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3712 3713 if (vm_object_lock_upgrade(object) == FALSE) { 3714 /* 3715 * couldn't upgrade, so explictly take the lock 3716 * exclusively and go relookup the page since we 3717 * will have dropped the object lock and 3718 * a different thread could have inserted 3719 * a page at this offset 3720 * no need for a full retry since we're 3721 * at the top level of the object chain 3722 */ 3723 vm_object_lock(object); 3724 3725 continue; 3726 } 3727 } 3728 } 3729 /* 3730 * Two cases of map in faults: 3731 * - At top level w/o copy object. 3732 * - Read fault anywhere. 3733 * --> must disallow write. 3734 */ 3735 3736 if (object == cur_object && object->copy == VM_OBJECT_NULL) { 3737 3738 goto FastPmapEnter; 3739 } 3740 3741 if ((fault_type & VM_PROT_WRITE) == 0) { 3742 3743 if (object != cur_object) { 3744 /* 3745 * We still need to hold the top object 3746 * lock here to prevent a race between 3747 * a read fault (taking only "shared" 3748 * locks) and a write fault (taking 3749 * an "exclusive" lock on the top 3750 * object. 3751 * Otherwise, as soon as we release the 3752 * top lock, the write fault could 3753 * proceed and actually complete before 3754 * the read fault, and the copied page's 3755 * translation could then be overwritten 3756 * by the read fault's translation for 3757 * the original page. 3758 * 3759 * Let's just record what the top object 3760 * is and we'll release it later. 3761 */ 3762 top_object = object; 3763 3764 /* 3765 * switch to the object that has the new page 3766 */ 3767 object = cur_object; 3768 object_lock_type = cur_object_lock_type; 3769 } 3770FastPmapEnter: 3771 /* 3772 * prepare for the pmap_enter... 3773 * object and map are both locked 3774 * m contains valid data 3775 * object == m->object 3776 * cur_object == NULL or it's been unlocked 3777 * no paging references on either object or cur_object 3778 */ 3779 if (top_object != VM_OBJECT_NULL || object_lock_type != OBJECT_LOCK_EXCLUSIVE) 3780 need_retry_ptr = &need_retry; 3781 else 3782 need_retry_ptr = NULL; 3783 3784 if (caller_pmap) { 3785 kr = vm_fault_enter(m, 3786 caller_pmap, 3787 caller_pmap_addr, 3788 prot, 3789 fault_type, 3790 wired, 3791 change_wiring, 3792 fault_info.no_cache, 3793 fault_info.cs_bypass, 3794 fault_info.user_tag, 3795 fault_info.pmap_options, 3796 need_retry_ptr, 3797 &type_of_fault); 3798 } else { 3799 kr = vm_fault_enter(m, 3800 pmap, 3801 vaddr, 3802 prot, 3803 fault_type, 3804 wired, 3805 change_wiring, 3806 fault_info.no_cache, 3807 fault_info.cs_bypass, 3808 fault_info.user_tag, 3809 fault_info.pmap_options, 3810 need_retry_ptr, 3811 &type_of_fault); 3812 } 3813 3814 if (kr == KERN_SUCCESS && 3815 physpage_p != NULL) { 3816 /* for vm_map_wire_and_extract() */ 3817 *physpage_p = m->phys_page; 3818 if (prot & VM_PROT_WRITE) { 3819 vm_object_lock_assert_exclusive( 3820 m->object); 3821 m->dirty = TRUE; 3822 } 3823 } 3824 3825 if (top_object != VM_OBJECT_NULL) { 3826 /* 3827 * It's safe to drop the top object 3828 * now that we've done our 3829 * vm_fault_enter(). Any other fault 3830 * in progress for that virtual 3831 * address will either find our page 3832 * and translation or put in a new page 3833 * and translation. 3834 */ 3835 vm_object_unlock(top_object); 3836 top_object = VM_OBJECT_NULL; 3837 } 3838 3839 if (need_collapse == TRUE) 3840 vm_object_collapse(object, offset, TRUE); 3841 3842 if (need_retry == FALSE && 3843 (type_of_fault == DBG_PAGEIND_FAULT || type_of_fault == DBG_PAGEINV_FAULT || type_of_fault == DBG_CACHE_HIT_FAULT)) { 3844 /* 3845 * evaluate access pattern and update state 3846 * vm_fault_deactivate_behind depends on the 3847 * state being up to date 3848 */ 3849 vm_fault_is_sequential(object, cur_offset, fault_info.behavior); 3850 3851 vm_fault_deactivate_behind(object, cur_offset, fault_info.behavior); 3852 } 3853 /* 3854 * That's it, clean up and return. 3855 */ 3856 if (m->busy) 3857 PAGE_WAKEUP_DONE(m); 3858 3859 vm_object_unlock(object); 3860 3861 vm_map_unlock_read(map); 3862 if (real_map != map) 3863 vm_map_unlock(real_map); 3864 3865 if (need_retry == TRUE) { 3866 /* 3867 * vm_fault_enter couldn't complete the PMAP_ENTER... 3868 * at this point we don't hold any locks so it's safe 3869 * to ask the pmap layer to expand the page table to 3870 * accommodate this mapping... once expanded, we'll 3871 * re-drive the fault which should result in vm_fault_enter 3872 * being able to successfully enter the mapping this time around 3873 */ 3874 (void)pmap_enter_options( 3875 pmap, vaddr, 0, 0, 0, 0, 0, 3876 PMAP_OPTIONS_NOENTER, NULL); 3877 3878 need_retry = FALSE; 3879 goto RetryFault; 3880 } 3881 goto done; 3882 } 3883 /* 3884 * COPY ON WRITE FAULT 3885 */ 3886 assert(object_lock_type == OBJECT_LOCK_EXCLUSIVE); 3887 3888 if ((throttle_delay = vm_page_throttled())) { 3889 /* 3890 * drop all of our locks... 3891 * wait until the free queue is 3892 * pumped back up and then 3893 * redrive the fault 3894 */ 3895 if (object != cur_object) 3896 vm_object_unlock(cur_object); 3897 vm_object_unlock(object); 3898 vm_map_unlock_read(map); 3899 if (real_map != map) 3900 vm_map_unlock(real_map); 3901 3902 VM_DEBUG_EVENT(vmf_cowdelay, VMF_COWDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0); 3903 3904 delay(throttle_delay); 3905 3906 if (!current_thread_aborted() && vm_page_wait((change_wiring) ? 3907 THREAD_UNINT : 3908 THREAD_ABORTSAFE)) 3909 goto RetryFault; 3910 kr = KERN_ABORTED; 3911 goto done; 3912 } 3913 /* 3914 * If objects match, then 3915 * object->copy must not be NULL (else control 3916 * would be in previous code block), and we 3917 * have a potential push into the copy object 3918 * with which we can't cope with here. 3919 */ 3920 if (cur_object == object) { 3921 /* 3922 * must take the slow path to 3923 * deal with the copy push 3924 */ 3925 break; 3926 } 3927 3928 /* 3929 * This is now a shadow based copy on write 3930 * fault -- it requires a copy up the shadow 3931 * chain. 3932 */ 3933 3934 if ((cur_object_lock_type == OBJECT_LOCK_SHARED) && 3935 VM_FAULT_NEED_CS_VALIDATION(NULL, m)) { 3936 goto upgrade_for_validation; 3937 } 3938 3939 /* 3940 * Allocate a page in the original top level 3941 * object. Give up if allocate fails. Also 3942 * need to remember current page, as it's the 3943 * source of the copy. 3944 * 3945 * at this point we hold locks on both 3946 * object and cur_object... no need to take 3947 * paging refs or mark pages BUSY since 3948 * we don't drop either object lock until 3949 * the page has been copied and inserted 3950 */ 3951 cur_m = m; 3952 m = vm_page_grab(); 3953 3954 if (m == VM_PAGE_NULL) { 3955 /* 3956 * no free page currently available... 3957 * must take the slow path 3958 */ 3959 break; 3960 } 3961 /* 3962 * Now do the copy. Mark the source page busy... 3963 * 3964 * NOTE: This code holds the map lock across 3965 * the page copy. 3966 */ 3967 vm_page_copy(cur_m, m); 3968 vm_page_insert(m, object, offset); 3969 SET_PAGE_DIRTY(m, FALSE); 3970 3971 /* 3972 * Now cope with the source page and object 3973 */ 3974 if (object->ref_count > 1 && cur_m->pmapped) 3975 pmap_disconnect(cur_m->phys_page); 3976 3977 if (cur_m->clustered) { 3978 VM_PAGE_COUNT_AS_PAGEIN(cur_m); 3979 VM_PAGE_CONSUME_CLUSTERED(cur_m); 3980 } 3981 need_collapse = TRUE; 3982 3983 if (!cur_object->internal && 3984 cur_object->copy_strategy == MEMORY_OBJECT_COPY_DELAY) { 3985 /* 3986 * The object from which we've just 3987 * copied a page is most probably backed 3988 * by a vnode. We don't want to waste too 3989 * much time trying to collapse the VM objects 3990 * and create a bottleneck when several tasks 3991 * map the same file. 3992 */ 3993 if (cur_object->copy == object) { 3994 /* 3995 * Shared mapping or no COW yet. 3996 * We can never collapse a copy 3997 * object into its backing object. 3998 */ 3999 need_collapse = FALSE; 4000 } else if (cur_object->copy == object->shadow && 4001 object->shadow->resident_page_count == 0) { 4002 /* 4003 * Shared mapping after a COW occurred. 4004 */ 4005 need_collapse = FALSE; 4006 } 4007 } 4008 vm_object_unlock(cur_object); 4009 4010 if (need_collapse == FALSE) 4011 vm_fault_collapse_skipped++; 4012 vm_fault_collapse_total++; 4013 4014 type_of_fault = DBG_COW_FAULT; 4015 VM_STAT_INCR(cow_faults); 4016 DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL); 4017 current_task()->cow_faults++; 4018 4019 goto FastPmapEnter; 4020 4021 } else { 4022 /* 4023 * No page at cur_object, cur_offset... m == NULL 4024 */ 4025 if (cur_object->pager_created) { 4026 int compressor_external_state = VM_EXTERNAL_STATE_UNKNOWN; 4027 4028 if (MUST_ASK_PAGER(cur_object, cur_offset, compressor_external_state) == TRUE) { 4029 int my_fault_type; 4030 int c_flags = C_DONT_BLOCK; 4031 boolean_t insert_cur_object = FALSE; 4032 4033 /* 4034 * May have to talk to a pager... 4035 * if so, take the slow path by 4036 * doing a 'break' from the while (TRUE) loop 4037 * 4038 * external_state will only be set to VM_EXTERNAL_STATE_EXISTS 4039 * if the compressor is active and the page exists there 4040 */ 4041 if (compressor_external_state != VM_EXTERNAL_STATE_EXISTS) 4042 break; 4043 4044 if (map == kernel_map || real_map == kernel_map) { 4045 /* 4046 * can't call into the compressor with the kernel_map 4047 * lock held, since the compressor may try to operate 4048 * on the kernel map in order to return an empty c_segment 4049 */ 4050 break; 4051 } 4052 if (object != cur_object) { 4053 if (fault_type & VM_PROT_WRITE) 4054 c_flags |= C_KEEP; 4055 else 4056 insert_cur_object = TRUE; 4057 } 4058 if (insert_cur_object == TRUE) { 4059 4060 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 4061 4062 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 4063 4064 if (vm_object_lock_upgrade(cur_object) == FALSE) { 4065 /* 4066 * couldn't upgrade so go do a full retry 4067 * immediately since we can no longer be 4068 * certain about cur_object (since we 4069 * don't hold a reference on it)... 4070 * first drop the top object lock 4071 */ 4072 vm_object_unlock(object); 4073 4074 vm_map_unlock_read(map); 4075 if (real_map != map) 4076 vm_map_unlock(real_map); 4077 4078 goto RetryFault; 4079 } 4080 } 4081 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 4082 4083 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 4084 4085 if (object != cur_object) { 4086 /* 4087 * we can't go for the upgrade on the top 4088 * lock since the upgrade may block waiting 4089 * for readers to drain... since we hold 4090 * cur_object locked at this point, waiting 4091 * for the readers to drain would represent 4092 * a lock order inversion since the lock order 4093 * for objects is the reference order in the 4094 * shadown chain 4095 */ 4096 vm_object_unlock(object); 4097 vm_object_unlock(cur_object); 4098 4099 vm_map_unlock_read(map); 4100 if (real_map != map) 4101 vm_map_unlock(real_map); 4102 4103 goto RetryFault; 4104 } 4105 if (vm_object_lock_upgrade(object) == FALSE) { 4106 /* 4107 * couldn't upgrade, so explictly take the lock 4108 * exclusively and go relookup the page since we 4109 * will have dropped the object lock and 4110 * a different thread could have inserted 4111 * a page at this offset 4112 * no need for a full retry since we're 4113 * at the top level of the object chain 4114 */ 4115 vm_object_lock(object); 4116 4117 continue; 4118 } 4119 } 4120 m = vm_page_grab(); 4121 4122 if (m == VM_PAGE_NULL) { 4123 /* 4124 * no free page currently available... 4125 * must take the slow path 4126 */ 4127 break; 4128 } 4129 4130 /* 4131 * The object is and remains locked 4132 * so no need to take a 4133 * "paging_in_progress" reference. 4134 */ 4135 boolean_t shared_lock; 4136 if ((object == cur_object && 4137 object_lock_type == OBJECT_LOCK_EXCLUSIVE) || 4138 (object != cur_object && 4139 cur_object_lock_type == OBJECT_LOCK_EXCLUSIVE)) { 4140 shared_lock = FALSE; 4141 } else { 4142 shared_lock = TRUE; 4143 } 4144 4145 kr = vm_compressor_pager_get( 4146 cur_object->pager, 4147 (cur_offset + 4148 cur_object->paging_offset), 4149 m->phys_page, 4150 &my_fault_type, 4151 c_flags, 4152 &compressed_count_delta); 4153 4154 vm_compressor_pager_count( 4155 cur_object->pager, 4156 compressed_count_delta, 4157 shared_lock, 4158 cur_object); 4159 4160 if (kr != KERN_SUCCESS) { 4161 vm_page_release(m); 4162 break; 4163 } 4164 m->dirty = TRUE; 4165 4166 /* 4167 * If the object is purgeable, its 4168 * owner's purgeable ledgers will be 4169 * updated in vm_page_insert() but the 4170 * page was also accounted for in a 4171 * "compressed purgeable" ledger, so 4172 * update that now. 4173 */ 4174 if (object != cur_object && 4175 !insert_cur_object) { 4176 /* 4177 * We're not going to insert 4178 * the decompressed page into 4179 * the object it came from. 4180 * 4181 * We're dealing with a 4182 * copy-on-write fault on 4183 * "object". 4184 * We're going to decompress 4185 * the page directly into the 4186 * target "object" while 4187 * keepin the compressed 4188 * page for "cur_object", so 4189 * no ledger update in that 4190 * case. 4191 */ 4192 } else if ((cur_object->purgable == 4193 VM_PURGABLE_DENY) || 4194 (cur_object->vo_purgeable_owner == 4195 NULL)) { 4196 /* 4197 * "cur_object" is not purgeable 4198 * or is not owned, so no 4199 * purgeable ledgers to update. 4200 */ 4201 } else { 4202 /* 4203 * One less compressed 4204 * purgeable page for 4205 * cur_object's owner. 4206 */ 4207 vm_purgeable_compressed_update( 4208 cur_object, 4209 -1); 4210 } 4211 4212 if (insert_cur_object) { 4213 vm_page_insert(m, cur_object, cur_offset); 4214 } else { 4215 vm_page_insert(m, object, offset); 4216 } 4217 4218 if ((m->object->wimg_bits & VM_WIMG_MASK) != VM_WIMG_USE_DEFAULT) { 4219 /* 4220 * If the page is not cacheable, 4221 * we can't let its contents 4222 * linger in the data cache 4223 * after the decompression. 4224 */ 4225 pmap_sync_page_attributes_phys(m->phys_page); 4226 } 4227 4228 type_of_fault = my_fault_type; 4229 4230 VM_STAT_INCR(decompressions); 4231 4232 if (cur_object != object) { 4233 if (insert_cur_object) { 4234 top_object = object; 4235 /* 4236 * switch to the object that has the new page 4237 */ 4238 object = cur_object; 4239 object_lock_type = cur_object_lock_type; 4240 } else { 4241 vm_object_unlock(cur_object); 4242 cur_object = object; 4243 } 4244 } 4245 goto FastPmapEnter; 4246 } 4247 /* 4248 * existence map present and indicates 4249 * that the pager doesn't have this page 4250 */ 4251 } 4252 if (cur_object->shadow == VM_OBJECT_NULL) { 4253 /* 4254 * Zero fill fault. Page gets 4255 * inserted into the original object. 4256 */ 4257 if (cur_object->shadow_severed || 4258 VM_OBJECT_PURGEABLE_FAULT_ERROR(cur_object)) 4259 { 4260 if (object != cur_object) 4261 vm_object_unlock(cur_object); 4262 vm_object_unlock(object); 4263 4264 vm_map_unlock_read(map); 4265 if (real_map != map) 4266 vm_map_unlock(real_map); 4267 4268 kr = KERN_MEMORY_ERROR; 4269 goto done; 4270 } 4271 if ((throttle_delay = vm_page_throttled())) { 4272 /* 4273 * drop all of our locks... 4274 * wait until the free queue is 4275 * pumped back up and then 4276 * redrive the fault 4277 */ 4278 if (object != cur_object) 4279 vm_object_unlock(cur_object); 4280 vm_object_unlock(object); 4281 vm_map_unlock_read(map); 4282 if (real_map != map) 4283 vm_map_unlock(real_map); 4284 4285 VM_DEBUG_EVENT(vmf_zfdelay, VMF_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0); 4286 4287 delay(throttle_delay); 4288 4289 if (!current_thread_aborted() && vm_page_wait((change_wiring) ? 4290 THREAD_UNINT : 4291 THREAD_ABORTSAFE)) 4292 goto RetryFault; 4293 kr = KERN_ABORTED; 4294 goto done; 4295 } 4296 if (vm_backing_store_low) { 4297 /* 4298 * we are protecting the system from 4299 * backing store exhaustion... 4300 * must take the slow path if we're 4301 * not privileged 4302 */ 4303 if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) 4304 break; 4305 } 4306 if (cur_object != object) { 4307 vm_object_unlock(cur_object); 4308 4309 cur_object = object; 4310 } 4311 if (object_lock_type == OBJECT_LOCK_SHARED) { 4312 4313 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 4314 4315 if (vm_object_lock_upgrade(object) == FALSE) { 4316 /* 4317 * couldn't upgrade so do a full retry on the fault 4318 * since we dropped the object lock which 4319 * could allow another thread to insert 4320 * a page at this offset 4321 */ 4322 vm_map_unlock_read(map); 4323 if (real_map != map) 4324 vm_map_unlock(real_map); 4325 4326 goto RetryFault; 4327 } 4328 } 4329 m = vm_page_alloc(object, offset); 4330 4331 if (m == VM_PAGE_NULL) { 4332 /* 4333 * no free page currently available... 4334 * must take the slow path 4335 */ 4336 break; 4337 } 4338 4339 /* 4340 * Now zero fill page... 4341 * the page is probably going to 4342 * be written soon, so don't bother 4343 * to clear the modified bit 4344 * 4345 * NOTE: This code holds the map 4346 * lock across the zero fill. 4347 */ 4348 type_of_fault = vm_fault_zero_page(m, map->no_zero_fill); 4349 4350 goto FastPmapEnter; 4351 } 4352 /* 4353 * On to the next level in the shadow chain 4354 */ 4355 cur_offset += cur_object->vo_shadow_offset; 4356 new_object = cur_object->shadow; 4357 4358 /* 4359 * take the new_object's lock with the indicated state 4360 */ 4361 if (cur_object_lock_type == OBJECT_LOCK_SHARED) 4362 vm_object_lock_shared(new_object); 4363 else 4364 vm_object_lock(new_object); 4365 4366 if (cur_object != object) 4367 vm_object_unlock(cur_object); 4368 4369 cur_object = new_object; 4370 4371 continue; 4372 } 4373 } 4374 /* 4375 * Cleanup from fast fault failure. Drop any object 4376 * lock other than original and drop map lock. 4377 */ 4378 if (object != cur_object) 4379 vm_object_unlock(cur_object); 4380 4381 /* 4382 * must own the object lock exclusively at this point 4383 */ 4384 if (object_lock_type == OBJECT_LOCK_SHARED) { 4385 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 4386 4387 if (vm_object_lock_upgrade(object) == FALSE) { 4388 /* 4389 * couldn't upgrade, so explictly 4390 * take the lock exclusively 4391 * no need to retry the fault at this 4392 * point since "vm_fault_page" will 4393 * completely re-evaluate the state 4394 */ 4395 vm_object_lock(object); 4396 } 4397 } 4398 4399handle_copy_delay: 4400 vm_map_unlock_read(map); 4401 if (real_map != map) 4402 vm_map_unlock(real_map); 4403 4404 /* 4405 * Make a reference to this object to 4406 * prevent its disposal while we are messing with 4407 * it. Once we have the reference, the map is free 4408 * to be diddled. Since objects reference their 4409 * shadows (and copies), they will stay around as well. 4410 */ 4411 vm_object_reference_locked(object); 4412 vm_object_paging_begin(object); 4413 4414 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0); 4415 4416 error_code = 0; 4417 4418 result_page = VM_PAGE_NULL; 4419 kr = vm_fault_page(object, offset, fault_type, 4420 (change_wiring && !wired), 4421 FALSE, /* page not looked up */ 4422 &prot, &result_page, &top_page, 4423 &type_of_fault, 4424 &error_code, map->no_zero_fill, 4425 FALSE, &fault_info); 4426 4427 /* 4428 * if kr != VM_FAULT_SUCCESS, then the paging reference 4429 * has been dropped and the object unlocked... the ref_count 4430 * is still held 4431 * 4432 * if kr == VM_FAULT_SUCCESS, then the paging reference 4433 * is still held along with the ref_count on the original object 4434 * 4435 * the object is returned locked with a paging reference 4436 * 4437 * if top_page != NULL, then it's BUSY and the 4438 * object it belongs to has a paging reference 4439 * but is returned unlocked 4440 */ 4441 if (kr != VM_FAULT_SUCCESS && 4442 kr != VM_FAULT_SUCCESS_NO_VM_PAGE) { 4443 /* 4444 * we didn't succeed, lose the object reference immediately. 4445 */ 4446 vm_object_deallocate(object); 4447 4448 /* 4449 * See why we failed, and take corrective action. 4450 */ 4451 switch (kr) { 4452 case VM_FAULT_MEMORY_SHORTAGE: 4453 if (vm_page_wait((change_wiring) ? 4454 THREAD_UNINT : 4455 THREAD_ABORTSAFE)) 4456 goto RetryFault; 4457 /* 4458 * fall thru 4459 */ 4460 case VM_FAULT_INTERRUPTED: 4461 kr = KERN_ABORTED; 4462 goto done; 4463 case VM_FAULT_RETRY: 4464 goto RetryFault; 4465 case VM_FAULT_MEMORY_ERROR: 4466 if (error_code) 4467 kr = error_code; 4468 else 4469 kr = KERN_MEMORY_ERROR; 4470 goto done; 4471 default: 4472 panic("vm_fault: unexpected error 0x%x from " 4473 "vm_fault_page()\n", kr); 4474 } 4475 } 4476 m = result_page; 4477 4478 if (m != VM_PAGE_NULL) { 4479 assert((change_wiring && !wired) ? 4480 (top_page == VM_PAGE_NULL) : 4481 ((top_page == VM_PAGE_NULL) == (m->object == object))); 4482 } 4483 4484 /* 4485 * What to do with the resulting page from vm_fault_page 4486 * if it doesn't get entered into the physical map: 4487 */ 4488#define RELEASE_PAGE(m) \ 4489 MACRO_BEGIN \ 4490 PAGE_WAKEUP_DONE(m); \ 4491 if (!m->active && !m->inactive && !m->throttled) { \ 4492 vm_page_lockspin_queues(); \ 4493 if (!m->active && !m->inactive && !m->throttled) \ 4494 vm_page_activate(m); \ 4495 vm_page_unlock_queues(); \ 4496 } \ 4497 MACRO_END 4498 4499 /* 4500 * We must verify that the maps have not changed 4501 * since our last lookup. 4502 */ 4503 if (m != VM_PAGE_NULL) { 4504 old_copy_object = m->object->copy; 4505 vm_object_unlock(m->object); 4506 } else { 4507 old_copy_object = VM_OBJECT_NULL; 4508 vm_object_unlock(object); 4509 } 4510 4511 /* 4512 * no object locks are held at this point 4513 */ 4514 if ((map != original_map) || !vm_map_verify(map, &version)) { 4515 vm_object_t retry_object; 4516 vm_object_offset_t retry_offset; 4517 vm_prot_t retry_prot; 4518 4519 /* 4520 * To avoid trying to write_lock the map while another 4521 * thread has it read_locked (in vm_map_pageable), we 4522 * do not try for write permission. If the page is 4523 * still writable, we will get write permission. If it 4524 * is not, or has been marked needs_copy, we enter the 4525 * mapping without write permission, and will merely 4526 * take another fault. 4527 */ 4528 map = original_map; 4529 vm_map_lock_read(map); 4530 4531 kr = vm_map_lookup_locked(&map, vaddr, 4532 fault_type & ~VM_PROT_WRITE, 4533 OBJECT_LOCK_EXCLUSIVE, &version, 4534 &retry_object, &retry_offset, &retry_prot, 4535 &wired, 4536 &fault_info, 4537 &real_map); 4538 pmap = real_map->pmap; 4539 4540 if (kr != KERN_SUCCESS) { 4541 vm_map_unlock_read(map); 4542 4543 if (m != VM_PAGE_NULL) { 4544 /* 4545 * retake the lock so that 4546 * we can drop the paging reference 4547 * in vm_fault_cleanup and do the 4548 * PAGE_WAKEUP_DONE in RELEASE_PAGE 4549 */ 4550 vm_object_lock(m->object); 4551 4552 RELEASE_PAGE(m); 4553 4554 vm_fault_cleanup(m->object, top_page); 4555 } else { 4556 /* 4557 * retake the lock so that 4558 * we can drop the paging reference 4559 * in vm_fault_cleanup 4560 */ 4561 vm_object_lock(object); 4562 4563 vm_fault_cleanup(object, top_page); 4564 } 4565 vm_object_deallocate(object); 4566 4567 goto done; 4568 } 4569 vm_object_unlock(retry_object); 4570 4571 if ((retry_object != object) || (retry_offset != offset)) { 4572 4573 vm_map_unlock_read(map); 4574 if (real_map != map) 4575 vm_map_unlock(real_map); 4576 4577 if (m != VM_PAGE_NULL) { 4578 /* 4579 * retake the lock so that 4580 * we can drop the paging reference 4581 * in vm_fault_cleanup and do the 4582 * PAGE_WAKEUP_DONE in RELEASE_PAGE 4583 */ 4584 vm_object_lock(m->object); 4585 4586 RELEASE_PAGE(m); 4587 4588 vm_fault_cleanup(m->object, top_page); 4589 } else { 4590 /* 4591 * retake the lock so that 4592 * we can drop the paging reference 4593 * in vm_fault_cleanup 4594 */ 4595 vm_object_lock(object); 4596 4597 vm_fault_cleanup(object, top_page); 4598 } 4599 vm_object_deallocate(object); 4600 4601 goto RetryFault; 4602 } 4603 /* 4604 * Check whether the protection has changed or the object 4605 * has been copied while we left the map unlocked. 4606 */ 4607 prot &= retry_prot; 4608 } 4609 if (m != VM_PAGE_NULL) { 4610 vm_object_lock(m->object); 4611 4612 if (m->object->copy != old_copy_object) { 4613 /* 4614 * The copy object changed while the top-level object 4615 * was unlocked, so take away write permission. 4616 */ 4617 prot &= ~VM_PROT_WRITE; 4618 } 4619 } else 4620 vm_object_lock(object); 4621 4622 /* 4623 * If we want to wire down this page, but no longer have 4624 * adequate permissions, we must start all over. 4625 */ 4626 if (wired && (fault_type != (prot | VM_PROT_WRITE))) { 4627 4628 vm_map_verify_done(map, &version); 4629 if (real_map != map) 4630 vm_map_unlock(real_map); 4631 4632 if (m != VM_PAGE_NULL) { 4633 RELEASE_PAGE(m); 4634 4635 vm_fault_cleanup(m->object, top_page); 4636 } else 4637 vm_fault_cleanup(object, top_page); 4638 4639 vm_object_deallocate(object); 4640 4641 goto RetryFault; 4642 } 4643 if (m != VM_PAGE_NULL) { 4644 /* 4645 * Put this page into the physical map. 4646 * We had to do the unlock above because pmap_enter 4647 * may cause other faults. The page may be on 4648 * the pageout queues. If the pageout daemon comes 4649 * across the page, it will remove it from the queues. 4650 */ 4651 if (caller_pmap) { 4652 kr = vm_fault_enter(m, 4653 caller_pmap, 4654 caller_pmap_addr, 4655 prot, 4656 fault_type, 4657 wired, 4658 change_wiring, 4659 fault_info.no_cache, 4660 fault_info.cs_bypass, 4661 fault_info.user_tag, 4662 fault_info.pmap_options, 4663 NULL, 4664 &type_of_fault); 4665 } else { 4666 kr = vm_fault_enter(m, 4667 pmap, 4668 vaddr, 4669 prot, 4670 fault_type, 4671 wired, 4672 change_wiring, 4673 fault_info.no_cache, 4674 fault_info.cs_bypass, 4675 fault_info.user_tag, 4676 fault_info.pmap_options, 4677 NULL, 4678 &type_of_fault); 4679 } 4680 if (kr != KERN_SUCCESS) { 4681 /* abort this page fault */ 4682 vm_map_verify_done(map, &version); 4683 if (real_map != map) 4684 vm_map_unlock(real_map); 4685 PAGE_WAKEUP_DONE(m); 4686 vm_fault_cleanup(m->object, top_page); 4687 vm_object_deallocate(object); 4688 goto done; 4689 } 4690 if (physpage_p != NULL) { 4691 /* for vm_map_wire_and_extract() */ 4692 *physpage_p = m->phys_page; 4693 if (prot & VM_PROT_WRITE) { 4694 vm_object_lock_assert_exclusive(m->object); 4695 m->dirty = TRUE; 4696 } 4697 } 4698 } else { 4699 4700 vm_map_entry_t entry; 4701 vm_map_offset_t laddr; 4702 vm_map_offset_t ldelta, hdelta; 4703 4704 /* 4705 * do a pmap block mapping from the physical address 4706 * in the object 4707 */ 4708 4709#ifdef ppc 4710 /* While we do not worry about execution protection in */ 4711 /* general, certian pages may have instruction execution */ 4712 /* disallowed. We will check here, and if not allowed */ 4713 /* to execute, we return with a protection failure. */ 4714 4715 if ((fault_type & VM_PROT_EXECUTE) && 4716 (!pmap_eligible_for_execute((ppnum_t)(object->vo_shadow_offset >> 12)))) { 4717 4718 vm_map_verify_done(map, &version); 4719 4720 if (real_map != map) 4721 vm_map_unlock(real_map); 4722 4723 vm_fault_cleanup(object, top_page); 4724 vm_object_deallocate(object); 4725 4726 kr = KERN_PROTECTION_FAILURE; 4727 goto done; 4728 } 4729#endif /* ppc */ 4730 4731 if (real_map != map) 4732 vm_map_unlock(real_map); 4733 4734 if (original_map != map) { 4735 vm_map_unlock_read(map); 4736 vm_map_lock_read(original_map); 4737 map = original_map; 4738 } 4739 real_map = map; 4740 4741 laddr = vaddr; 4742 hdelta = 0xFFFFF000; 4743 ldelta = 0xFFFFF000; 4744 4745 while (vm_map_lookup_entry(map, laddr, &entry)) { 4746 if (ldelta > (laddr - entry->vme_start)) 4747 ldelta = laddr - entry->vme_start; 4748 if (hdelta > (entry->vme_end - laddr)) 4749 hdelta = entry->vme_end - laddr; 4750 if (entry->is_sub_map) { 4751 4752 laddr = (laddr - entry->vme_start) 4753 + entry->offset; 4754 vm_map_lock_read(entry->object.sub_map); 4755 4756 if (map != real_map) 4757 vm_map_unlock_read(map); 4758 if (entry->use_pmap) { 4759 vm_map_unlock_read(real_map); 4760 real_map = entry->object.sub_map; 4761 } 4762 map = entry->object.sub_map; 4763 4764 } else { 4765 break; 4766 } 4767 } 4768 4769 if (vm_map_lookup_entry(map, laddr, &entry) && 4770 (entry->object.vm_object != NULL) && 4771 (entry->object.vm_object == object)) { 4772 4773 int superpage = (!object->pager_created && object->phys_contiguous)? VM_MEM_SUPERPAGE : 0; 4774 4775 if (superpage && physpage_p) { 4776 /* for vm_map_wire_and_extract() */ 4777 *physpage_p = (ppnum_t) ((((vm_map_offset_t) entry->object.vm_object->vo_shadow_offset) 4778 + entry->offset 4779 + (laddr - entry->vme_start)) 4780 >> PAGE_SHIFT); 4781 } 4782 4783 if (caller_pmap) { 4784 /* 4785 * Set up a block mapped area 4786 */ 4787 assert((uint32_t)((ldelta + hdelta) >> PAGE_SHIFT) == ((ldelta + hdelta) >> PAGE_SHIFT)); 4788 pmap_map_block(caller_pmap, 4789 (addr64_t)(caller_pmap_addr - ldelta), 4790 (ppnum_t)((((vm_map_offset_t) (entry->object.vm_object->vo_shadow_offset)) + 4791 entry->offset + (laddr - entry->vme_start) - ldelta) >> PAGE_SHIFT), 4792 (uint32_t)((ldelta + hdelta) >> PAGE_SHIFT), prot, 4793 (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0); 4794 } else { 4795 /* 4796 * Set up a block mapped area 4797 */ 4798 assert((uint32_t)((ldelta + hdelta) >> PAGE_SHIFT) == ((ldelta + hdelta) >> PAGE_SHIFT)); 4799 pmap_map_block(real_map->pmap, 4800 (addr64_t)(vaddr - ldelta), 4801 (ppnum_t)((((vm_map_offset_t)(entry->object.vm_object->vo_shadow_offset)) + 4802 entry->offset + (laddr - entry->vme_start) - ldelta) >> PAGE_SHIFT), 4803 (uint32_t)((ldelta + hdelta) >> PAGE_SHIFT), prot, 4804 (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0); 4805 } 4806 } 4807 } 4808 4809 /* 4810 * Unlock everything, and return 4811 */ 4812 vm_map_verify_done(map, &version); 4813 if (real_map != map) 4814 vm_map_unlock(real_map); 4815 4816 if (m != VM_PAGE_NULL) { 4817 PAGE_WAKEUP_DONE(m); 4818 4819 vm_fault_cleanup(m->object, top_page); 4820 } else 4821 vm_fault_cleanup(object, top_page); 4822 4823 vm_object_deallocate(object); 4824 4825#undef RELEASE_PAGE 4826 4827 kr = KERN_SUCCESS; 4828done: 4829 thread_interrupt_level(interruptible_state); 4830 4831 /* 4832 * Only throttle on faults which cause a pagein. 4833 */ 4834 if ((type_of_fault == DBG_PAGEIND_FAULT) || (type_of_fault == DBG_PAGEINV_FAULT) || (type_of_fault == DBG_COMPRESSOR_SWAPIN_FAULT)) { 4835 throttle_lowpri_io(1); 4836 } 4837 4838 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4839 (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END, 4840 ((uint64_t)vaddr >> 32), 4841 vaddr, 4842 kr, 4843 type_of_fault, 4844 0); 4845 4846 return (kr); 4847} 4848 4849/* 4850 * vm_fault_wire: 4851 * 4852 * Wire down a range of virtual addresses in a map. 4853 */ 4854kern_return_t 4855vm_fault_wire( 4856 vm_map_t map, 4857 vm_map_entry_t entry, 4858 pmap_t pmap, 4859 vm_map_offset_t pmap_addr, 4860 ppnum_t *physpage_p) 4861{ 4862 4863 register vm_map_offset_t va; 4864 register vm_map_offset_t end_addr = entry->vme_end; 4865 register kern_return_t rc; 4866 4867 assert(entry->in_transition); 4868 4869 if ((entry->object.vm_object != NULL) && 4870 !entry->is_sub_map && 4871 entry->object.vm_object->phys_contiguous) { 4872 return KERN_SUCCESS; 4873 } 4874 4875 /* 4876 * Inform the physical mapping system that the 4877 * range of addresses may not fault, so that 4878 * page tables and such can be locked down as well. 4879 */ 4880 4881 pmap_pageable(pmap, pmap_addr, 4882 pmap_addr + (end_addr - entry->vme_start), FALSE); 4883 4884 /* 4885 * We simulate a fault to get the page and enter it 4886 * in the physical map. 4887 */ 4888 4889 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) { 4890 rc = vm_fault_wire_fast(map, va, entry, pmap, 4891 pmap_addr + (va - entry->vme_start), 4892 physpage_p); 4893 if (rc != KERN_SUCCESS) { 4894 rc = vm_fault_internal(map, va, VM_PROT_NONE, TRUE, 4895 ((pmap == kernel_pmap) 4896 ? THREAD_UNINT 4897 : THREAD_ABORTSAFE), 4898 pmap, 4899 (pmap_addr + 4900 (va - entry->vme_start)), 4901 physpage_p); 4902 DTRACE_VM2(softlock, int, 1, (uint64_t *), NULL); 4903 } 4904 4905 if (rc != KERN_SUCCESS) { 4906 struct vm_map_entry tmp_entry = *entry; 4907 4908 /* unwire wired pages */ 4909 tmp_entry.vme_end = va; 4910 vm_fault_unwire(map, 4911 &tmp_entry, FALSE, pmap, pmap_addr); 4912 4913 return rc; 4914 } 4915 } 4916 return KERN_SUCCESS; 4917} 4918 4919/* 4920 * vm_fault_unwire: 4921 * 4922 * Unwire a range of virtual addresses in a map. 4923 */ 4924void 4925vm_fault_unwire( 4926 vm_map_t map, 4927 vm_map_entry_t entry, 4928 boolean_t deallocate, 4929 pmap_t pmap, 4930 vm_map_offset_t pmap_addr) 4931{ 4932 register vm_map_offset_t va; 4933 register vm_map_offset_t end_addr = entry->vme_end; 4934 vm_object_t object; 4935 struct vm_object_fault_info fault_info; 4936 4937 object = (entry->is_sub_map) 4938 ? VM_OBJECT_NULL : entry->object.vm_object; 4939 4940 /* 4941 * If it's marked phys_contiguous, then vm_fault_wire() didn't actually 4942 * do anything since such memory is wired by default. So we don't have 4943 * anything to undo here. 4944 */ 4945 4946 if (object != VM_OBJECT_NULL && object->phys_contiguous) 4947 return; 4948 4949 fault_info.interruptible = THREAD_UNINT; 4950 fault_info.behavior = entry->behavior; 4951 fault_info.user_tag = entry->alias; 4952 fault_info.pmap_options = 0; 4953 if (entry->iokit_acct || 4954 (!entry->is_sub_map && !entry->use_pmap)) { 4955 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT; 4956 } 4957 fault_info.lo_offset = entry->offset; 4958 fault_info.hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; 4959 fault_info.no_cache = entry->no_cache; 4960 fault_info.stealth = TRUE; 4961 fault_info.io_sync = FALSE; 4962 fault_info.cs_bypass = FALSE; 4963 fault_info.mark_zf_absent = FALSE; 4964 fault_info.batch_pmap_op = FALSE; 4965 4966 /* 4967 * Since the pages are wired down, we must be able to 4968 * get their mappings from the physical map system. 4969 */ 4970 4971 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) { 4972 4973 if (object == VM_OBJECT_NULL) { 4974 if (pmap) { 4975 pmap_change_wiring(pmap, 4976 pmap_addr + (va - entry->vme_start), FALSE); 4977 } 4978 (void) vm_fault(map, va, VM_PROT_NONE, 4979 TRUE, THREAD_UNINT, pmap, pmap_addr); 4980 } else { 4981 vm_prot_t prot; 4982 vm_page_t result_page; 4983 vm_page_t top_page; 4984 vm_object_t result_object; 4985 vm_fault_return_t result; 4986 4987 if (end_addr - va > (vm_size_t) -1) { 4988 /* 32-bit overflow */ 4989 fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE); 4990 } else { 4991 fault_info.cluster_size = (vm_size_t) (end_addr - va); 4992 assert(fault_info.cluster_size == end_addr - va); 4993 } 4994 4995 do { 4996 prot = VM_PROT_NONE; 4997 4998 vm_object_lock(object); 4999 vm_object_paging_begin(object); 5000 XPR(XPR_VM_FAULT, 5001 "vm_fault_unwire -> vm_fault_page\n", 5002 0,0,0,0,0); 5003 result_page = VM_PAGE_NULL; 5004 result = vm_fault_page( 5005 object, 5006 entry->offset + (va - entry->vme_start), 5007 VM_PROT_NONE, TRUE, 5008 FALSE, /* page not looked up */ 5009 &prot, &result_page, &top_page, 5010 (int *)0, 5011 NULL, map->no_zero_fill, 5012 FALSE, &fault_info); 5013 } while (result == VM_FAULT_RETRY); 5014 5015 /* 5016 * If this was a mapping to a file on a device that has been forcibly 5017 * unmounted, then we won't get a page back from vm_fault_page(). Just 5018 * move on to the next one in case the remaining pages are mapped from 5019 * different objects. During a forced unmount, the object is terminated 5020 * so the alive flag will be false if this happens. A forced unmount will 5021 * will occur when an external disk is unplugged before the user does an 5022 * eject, so we don't want to panic in that situation. 5023 */ 5024 5025 if (result == VM_FAULT_MEMORY_ERROR && !object->alive) 5026 continue; 5027 5028 if (result == VM_FAULT_MEMORY_ERROR && 5029 object == kernel_object) { 5030 /* 5031 * This must have been allocated with 5032 * KMA_KOBJECT and KMA_VAONLY and there's 5033 * no physical page at this offset. 5034 * We're done (no page to free). 5035 */ 5036 assert(deallocate); 5037 continue; 5038 } 5039 5040 if (result != VM_FAULT_SUCCESS) 5041 panic("vm_fault_unwire: failure"); 5042 5043 result_object = result_page->object; 5044 5045 if (deallocate) { 5046 assert(result_page->phys_page != 5047 vm_page_fictitious_addr); 5048 pmap_disconnect(result_page->phys_page); 5049 VM_PAGE_FREE(result_page); 5050 } else { 5051 if ((pmap) && (result_page->phys_page != vm_page_guard_addr)) 5052 pmap_change_wiring(pmap, 5053 pmap_addr + (va - entry->vme_start), FALSE); 5054 5055 5056 if (VM_PAGE_WIRED(result_page)) { 5057 vm_page_lockspin_queues(); 5058 vm_page_unwire(result_page, TRUE); 5059 vm_page_unlock_queues(); 5060 } 5061 if(entry->zero_wired_pages) { 5062 pmap_zero_page(result_page->phys_page); 5063 entry->zero_wired_pages = FALSE; 5064 } 5065 5066 PAGE_WAKEUP_DONE(result_page); 5067 } 5068 vm_fault_cleanup(result_object, top_page); 5069 } 5070 } 5071 5072 /* 5073 * Inform the physical mapping system that the range 5074 * of addresses may fault, so that page tables and 5075 * such may be unwired themselves. 5076 */ 5077 5078 pmap_pageable(pmap, pmap_addr, 5079 pmap_addr + (end_addr - entry->vme_start), TRUE); 5080 5081} 5082 5083/* 5084 * vm_fault_wire_fast: 5085 * 5086 * Handle common case of a wire down page fault at the given address. 5087 * If successful, the page is inserted into the associated physical map. 5088 * The map entry is passed in to avoid the overhead of a map lookup. 5089 * 5090 * NOTE: the given address should be truncated to the 5091 * proper page address. 5092 * 5093 * KERN_SUCCESS is returned if the page fault is handled; otherwise, 5094 * a standard error specifying why the fault is fatal is returned. 5095 * 5096 * The map in question must be referenced, and remains so. 5097 * Caller has a read lock on the map. 5098 * 5099 * This is a stripped version of vm_fault() for wiring pages. Anything 5100 * other than the common case will return KERN_FAILURE, and the caller 5101 * is expected to call vm_fault(). 5102 */ 5103kern_return_t 5104vm_fault_wire_fast( 5105 __unused vm_map_t map, 5106 vm_map_offset_t va, 5107 vm_map_entry_t entry, 5108 pmap_t pmap, 5109 vm_map_offset_t pmap_addr, 5110 ppnum_t *physpage_p) 5111{ 5112 vm_object_t object; 5113 vm_object_offset_t offset; 5114 register vm_page_t m; 5115 vm_prot_t prot; 5116 thread_t thread = current_thread(); 5117 int type_of_fault; 5118 kern_return_t kr; 5119 5120 VM_STAT_INCR(faults); 5121 5122 if (thread != THREAD_NULL && thread->task != TASK_NULL) 5123 thread->task->faults++; 5124 5125/* 5126 * Recovery actions 5127 */ 5128 5129#undef RELEASE_PAGE 5130#define RELEASE_PAGE(m) { \ 5131 PAGE_WAKEUP_DONE(m); \ 5132 vm_page_lockspin_queues(); \ 5133 vm_page_unwire(m, TRUE); \ 5134 vm_page_unlock_queues(); \ 5135} 5136 5137 5138#undef UNLOCK_THINGS 5139#define UNLOCK_THINGS { \ 5140 vm_object_paging_end(object); \ 5141 vm_object_unlock(object); \ 5142} 5143 5144#undef UNLOCK_AND_DEALLOCATE 5145#define UNLOCK_AND_DEALLOCATE { \ 5146 UNLOCK_THINGS; \ 5147 vm_object_deallocate(object); \ 5148} 5149/* 5150 * Give up and have caller do things the hard way. 5151 */ 5152 5153#define GIVE_UP { \ 5154 UNLOCK_AND_DEALLOCATE; \ 5155 return(KERN_FAILURE); \ 5156} 5157 5158 5159 /* 5160 * If this entry is not directly to a vm_object, bail out. 5161 */ 5162 if (entry->is_sub_map) { 5163 assert(physpage_p == NULL); 5164 return(KERN_FAILURE); 5165 } 5166 5167 /* 5168 * Find the backing store object and offset into it. 5169 */ 5170 5171 object = entry->object.vm_object; 5172 offset = (va - entry->vme_start) + entry->offset; 5173 prot = entry->protection; 5174 5175 /* 5176 * Make a reference to this object to prevent its 5177 * disposal while we are messing with it. 5178 */ 5179 5180 vm_object_lock(object); 5181 vm_object_reference_locked(object); 5182 vm_object_paging_begin(object); 5183 5184 /* 5185 * INVARIANTS (through entire routine): 5186 * 5187 * 1) At all times, we must either have the object 5188 * lock or a busy page in some object to prevent 5189 * some other thread from trying to bring in 5190 * the same page. 5191 * 5192 * 2) Once we have a busy page, we must remove it from 5193 * the pageout queues, so that the pageout daemon 5194 * will not grab it away. 5195 * 5196 */ 5197 5198 /* 5199 * Look for page in top-level object. If it's not there or 5200 * there's something going on, give up. 5201 * ENCRYPTED SWAP: use the slow fault path, since we'll need to 5202 * decrypt the page before wiring it down. 5203 */ 5204 m = vm_page_lookup(object, offset); 5205 if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) || 5206 (m->unusual && ( m->error || m->restart || m->absent))) { 5207 5208 GIVE_UP; 5209 } 5210 ASSERT_PAGE_DECRYPTED(m); 5211 5212 if (m->fictitious && 5213 m->phys_page == vm_page_guard_addr) { 5214 /* 5215 * Guard pages are fictitious pages and are never 5216 * entered into a pmap, so let's say it's been wired... 5217 */ 5218 kr = KERN_SUCCESS; 5219 goto done; 5220 } 5221 5222 /* 5223 * Wire the page down now. All bail outs beyond this 5224 * point must unwire the page. 5225 */ 5226 5227 vm_page_lockspin_queues(); 5228 vm_page_wire(m); 5229 vm_page_unlock_queues(); 5230 5231 /* 5232 * Mark page busy for other threads. 5233 */ 5234 assert(!m->busy); 5235 m->busy = TRUE; 5236 assert(!m->absent); 5237 5238 /* 5239 * Give up if the page is being written and there's a copy object 5240 */ 5241 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) { 5242 RELEASE_PAGE(m); 5243 GIVE_UP; 5244 } 5245 5246 /* 5247 * Put this page into the physical map. 5248 */ 5249 type_of_fault = DBG_CACHE_HIT_FAULT; 5250 kr = vm_fault_enter(m, 5251 pmap, 5252 pmap_addr, 5253 prot, 5254 prot, 5255 TRUE, 5256 FALSE, 5257 FALSE, 5258 FALSE, 5259 entry->alias, 5260 ((entry->iokit_acct || 5261 (!entry->is_sub_map && !entry->use_pmap)) 5262 ? PMAP_OPTIONS_ALT_ACCT 5263 : 0), 5264 NULL, 5265 &type_of_fault); 5266 5267done: 5268 /* 5269 * Unlock everything, and return 5270 */ 5271 5272 if (physpage_p) { 5273 /* for vm_map_wire_and_extract() */ 5274 if (kr == KERN_SUCCESS) { 5275 *physpage_p = m->phys_page; 5276 if (prot & VM_PROT_WRITE) { 5277 vm_object_lock_assert_exclusive(m->object); 5278 m->dirty = TRUE; 5279 } 5280 } else { 5281 *physpage_p = 0; 5282 } 5283 } 5284 5285 PAGE_WAKEUP_DONE(m); 5286 UNLOCK_AND_DEALLOCATE; 5287 5288 return kr; 5289 5290} 5291 5292/* 5293 * Routine: vm_fault_copy_cleanup 5294 * Purpose: 5295 * Release a page used by vm_fault_copy. 5296 */ 5297 5298void 5299vm_fault_copy_cleanup( 5300 vm_page_t page, 5301 vm_page_t top_page) 5302{ 5303 vm_object_t object = page->object; 5304 5305 vm_object_lock(object); 5306 PAGE_WAKEUP_DONE(page); 5307 if (!page->active && !page->inactive && !page->throttled) { 5308 vm_page_lockspin_queues(); 5309 if (!page->active && !page->inactive && !page->throttled) 5310 vm_page_activate(page); 5311 vm_page_unlock_queues(); 5312 } 5313 vm_fault_cleanup(object, top_page); 5314} 5315 5316void 5317vm_fault_copy_dst_cleanup( 5318 vm_page_t page) 5319{ 5320 vm_object_t object; 5321 5322 if (page != VM_PAGE_NULL) { 5323 object = page->object; 5324 vm_object_lock(object); 5325 vm_page_lockspin_queues(); 5326 vm_page_unwire(page, TRUE); 5327 vm_page_unlock_queues(); 5328 vm_object_paging_end(object); 5329 vm_object_unlock(object); 5330 } 5331} 5332 5333/* 5334 * Routine: vm_fault_copy 5335 * 5336 * Purpose: 5337 * Copy pages from one virtual memory object to another -- 5338 * neither the source nor destination pages need be resident. 5339 * 5340 * Before actually copying a page, the version associated with 5341 * the destination address map wil be verified. 5342 * 5343 * In/out conditions: 5344 * The caller must hold a reference, but not a lock, to 5345 * each of the source and destination objects and to the 5346 * destination map. 5347 * 5348 * Results: 5349 * Returns KERN_SUCCESS if no errors were encountered in 5350 * reading or writing the data. Returns KERN_INTERRUPTED if 5351 * the operation was interrupted (only possible if the 5352 * "interruptible" argument is asserted). Other return values 5353 * indicate a permanent error in copying the data. 5354 * 5355 * The actual amount of data copied will be returned in the 5356 * "copy_size" argument. In the event that the destination map 5357 * verification failed, this amount may be less than the amount 5358 * requested. 5359 */ 5360kern_return_t 5361vm_fault_copy( 5362 vm_object_t src_object, 5363 vm_object_offset_t src_offset, 5364 vm_map_size_t *copy_size, /* INOUT */ 5365 vm_object_t dst_object, 5366 vm_object_offset_t dst_offset, 5367 vm_map_t dst_map, 5368 vm_map_version_t *dst_version, 5369 int interruptible) 5370{ 5371 vm_page_t result_page; 5372 5373 vm_page_t src_page; 5374 vm_page_t src_top_page; 5375 vm_prot_t src_prot; 5376 5377 vm_page_t dst_page; 5378 vm_page_t dst_top_page; 5379 vm_prot_t dst_prot; 5380 5381 vm_map_size_t amount_left; 5382 vm_object_t old_copy_object; 5383 kern_return_t error = 0; 5384 vm_fault_return_t result; 5385 5386 vm_map_size_t part_size; 5387 struct vm_object_fault_info fault_info_src; 5388 struct vm_object_fault_info fault_info_dst; 5389 5390 /* 5391 * In order not to confuse the clustered pageins, align 5392 * the different offsets on a page boundary. 5393 */ 5394 5395#define RETURN(x) \ 5396 MACRO_BEGIN \ 5397 *copy_size -= amount_left; \ 5398 MACRO_RETURN(x); \ 5399 MACRO_END 5400 5401 amount_left = *copy_size; 5402 5403 fault_info_src.interruptible = interruptible; 5404 fault_info_src.behavior = VM_BEHAVIOR_SEQUENTIAL; 5405 fault_info_src.user_tag = 0; 5406 fault_info_src.pmap_options = 0; 5407 fault_info_src.lo_offset = vm_object_trunc_page(src_offset); 5408 fault_info_src.hi_offset = fault_info_src.lo_offset + amount_left; 5409 fault_info_src.no_cache = FALSE; 5410 fault_info_src.stealth = TRUE; 5411 fault_info_src.io_sync = FALSE; 5412 fault_info_src.cs_bypass = FALSE; 5413 fault_info_src.mark_zf_absent = FALSE; 5414 fault_info_src.batch_pmap_op = FALSE; 5415 5416 fault_info_dst.interruptible = interruptible; 5417 fault_info_dst.behavior = VM_BEHAVIOR_SEQUENTIAL; 5418 fault_info_dst.user_tag = 0; 5419 fault_info_dst.pmap_options = 0; 5420 fault_info_dst.lo_offset = vm_object_trunc_page(dst_offset); 5421 fault_info_dst.hi_offset = fault_info_dst.lo_offset + amount_left; 5422 fault_info_dst.no_cache = FALSE; 5423 fault_info_dst.stealth = TRUE; 5424 fault_info_dst.io_sync = FALSE; 5425 fault_info_dst.cs_bypass = FALSE; 5426 fault_info_dst.mark_zf_absent = FALSE; 5427 fault_info_dst.batch_pmap_op = FALSE; 5428 5429 do { /* while (amount_left > 0) */ 5430 /* 5431 * There may be a deadlock if both source and destination 5432 * pages are the same. To avoid this deadlock, the copy must 5433 * start by getting the destination page in order to apply 5434 * COW semantics if any. 5435 */ 5436 5437 RetryDestinationFault: ; 5438 5439 dst_prot = VM_PROT_WRITE|VM_PROT_READ; 5440 5441 vm_object_lock(dst_object); 5442 vm_object_paging_begin(dst_object); 5443 5444 if (amount_left > (vm_size_t) -1) { 5445 /* 32-bit overflow */ 5446 fault_info_dst.cluster_size = (vm_size_t) (0 - PAGE_SIZE); 5447 } else { 5448 fault_info_dst.cluster_size = (vm_size_t) amount_left; 5449 assert(fault_info_dst.cluster_size == amount_left); 5450 } 5451 5452 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0); 5453 dst_page = VM_PAGE_NULL; 5454 result = vm_fault_page(dst_object, 5455 vm_object_trunc_page(dst_offset), 5456 VM_PROT_WRITE|VM_PROT_READ, 5457 FALSE, 5458 FALSE, /* page not looked up */ 5459 &dst_prot, &dst_page, &dst_top_page, 5460 (int *)0, 5461 &error, 5462 dst_map->no_zero_fill, 5463 FALSE, &fault_info_dst); 5464 switch (result) { 5465 case VM_FAULT_SUCCESS: 5466 break; 5467 case VM_FAULT_RETRY: 5468 goto RetryDestinationFault; 5469 case VM_FAULT_MEMORY_SHORTAGE: 5470 if (vm_page_wait(interruptible)) 5471 goto RetryDestinationFault; 5472 /* fall thru */ 5473 case VM_FAULT_INTERRUPTED: 5474 RETURN(MACH_SEND_INTERRUPTED); 5475 case VM_FAULT_SUCCESS_NO_VM_PAGE: 5476 /* success but no VM page: fail the copy */ 5477 vm_object_paging_end(dst_object); 5478 vm_object_unlock(dst_object); 5479 /*FALLTHROUGH*/ 5480 case VM_FAULT_MEMORY_ERROR: 5481 if (error) 5482 return (error); 5483 else 5484 return(KERN_MEMORY_ERROR); 5485 default: 5486 panic("vm_fault_copy: unexpected error 0x%x from " 5487 "vm_fault_page()\n", result); 5488 } 5489 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE); 5490 5491 old_copy_object = dst_page->object->copy; 5492 5493 /* 5494 * There exists the possiblity that the source and 5495 * destination page are the same. But we can't 5496 * easily determine that now. If they are the 5497 * same, the call to vm_fault_page() for the 5498 * destination page will deadlock. To prevent this we 5499 * wire the page so we can drop busy without having 5500 * the page daemon steal the page. We clean up the 5501 * top page but keep the paging reference on the object 5502 * holding the dest page so it doesn't go away. 5503 */ 5504 5505 vm_page_lockspin_queues(); 5506 vm_page_wire(dst_page); 5507 vm_page_unlock_queues(); 5508 PAGE_WAKEUP_DONE(dst_page); 5509 vm_object_unlock(dst_page->object); 5510 5511 if (dst_top_page != VM_PAGE_NULL) { 5512 vm_object_lock(dst_object); 5513 VM_PAGE_FREE(dst_top_page); 5514 vm_object_paging_end(dst_object); 5515 vm_object_unlock(dst_object); 5516 } 5517 5518 RetrySourceFault: ; 5519 5520 if (src_object == VM_OBJECT_NULL) { 5521 /* 5522 * No source object. We will just 5523 * zero-fill the page in dst_object. 5524 */ 5525 src_page = VM_PAGE_NULL; 5526 result_page = VM_PAGE_NULL; 5527 } else { 5528 vm_object_lock(src_object); 5529 src_page = vm_page_lookup(src_object, 5530 vm_object_trunc_page(src_offset)); 5531 if (src_page == dst_page) { 5532 src_prot = dst_prot; 5533 result_page = VM_PAGE_NULL; 5534 } else { 5535 src_prot = VM_PROT_READ; 5536 vm_object_paging_begin(src_object); 5537 5538 if (amount_left > (vm_size_t) -1) { 5539 /* 32-bit overflow */ 5540 fault_info_src.cluster_size = (vm_size_t) (0 - PAGE_SIZE); 5541 } else { 5542 fault_info_src.cluster_size = (vm_size_t) amount_left; 5543 assert(fault_info_src.cluster_size == amount_left); 5544 } 5545 5546 XPR(XPR_VM_FAULT, 5547 "vm_fault_copy(2) -> vm_fault_page\n", 5548 0,0,0,0,0); 5549 result_page = VM_PAGE_NULL; 5550 result = vm_fault_page( 5551 src_object, 5552 vm_object_trunc_page(src_offset), 5553 VM_PROT_READ, FALSE, 5554 FALSE, /* page not looked up */ 5555 &src_prot, 5556 &result_page, &src_top_page, 5557 (int *)0, &error, FALSE, 5558 FALSE, &fault_info_src); 5559 5560 switch (result) { 5561 case VM_FAULT_SUCCESS: 5562 break; 5563 case VM_FAULT_RETRY: 5564 goto RetrySourceFault; 5565 case VM_FAULT_MEMORY_SHORTAGE: 5566 if (vm_page_wait(interruptible)) 5567 goto RetrySourceFault; 5568 /* fall thru */ 5569 case VM_FAULT_INTERRUPTED: 5570 vm_fault_copy_dst_cleanup(dst_page); 5571 RETURN(MACH_SEND_INTERRUPTED); 5572 case VM_FAULT_SUCCESS_NO_VM_PAGE: 5573 /* success but no VM page: fail */ 5574 vm_object_paging_end(src_object); 5575 vm_object_unlock(src_object); 5576 /*FALLTHROUGH*/ 5577 case VM_FAULT_MEMORY_ERROR: 5578 vm_fault_copy_dst_cleanup(dst_page); 5579 if (error) 5580 return (error); 5581 else 5582 return(KERN_MEMORY_ERROR); 5583 default: 5584 panic("vm_fault_copy(2): unexpected " 5585 "error 0x%x from " 5586 "vm_fault_page()\n", result); 5587 } 5588 5589 5590 assert((src_top_page == VM_PAGE_NULL) == 5591 (result_page->object == src_object)); 5592 } 5593 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE); 5594 vm_object_unlock(result_page->object); 5595 } 5596 5597 if (!vm_map_verify(dst_map, dst_version)) { 5598 if (result_page != VM_PAGE_NULL && src_page != dst_page) 5599 vm_fault_copy_cleanup(result_page, src_top_page); 5600 vm_fault_copy_dst_cleanup(dst_page); 5601 break; 5602 } 5603 5604 vm_object_lock(dst_page->object); 5605 5606 if (dst_page->object->copy != old_copy_object) { 5607 vm_object_unlock(dst_page->object); 5608 vm_map_verify_done(dst_map, dst_version); 5609 if (result_page != VM_PAGE_NULL && src_page != dst_page) 5610 vm_fault_copy_cleanup(result_page, src_top_page); 5611 vm_fault_copy_dst_cleanup(dst_page); 5612 break; 5613 } 5614 vm_object_unlock(dst_page->object); 5615 5616 /* 5617 * Copy the page, and note that it is dirty 5618 * immediately. 5619 */ 5620 5621 if (!page_aligned(src_offset) || 5622 !page_aligned(dst_offset) || 5623 !page_aligned(amount_left)) { 5624 5625 vm_object_offset_t src_po, 5626 dst_po; 5627 5628 src_po = src_offset - vm_object_trunc_page(src_offset); 5629 dst_po = dst_offset - vm_object_trunc_page(dst_offset); 5630 5631 if (dst_po > src_po) { 5632 part_size = PAGE_SIZE - dst_po; 5633 } else { 5634 part_size = PAGE_SIZE - src_po; 5635 } 5636 if (part_size > (amount_left)){ 5637 part_size = amount_left; 5638 } 5639 5640 if (result_page == VM_PAGE_NULL) { 5641 assert((vm_offset_t) dst_po == dst_po); 5642 assert((vm_size_t) part_size == part_size); 5643 vm_page_part_zero_fill(dst_page, 5644 (vm_offset_t) dst_po, 5645 (vm_size_t) part_size); 5646 } else { 5647 assert((vm_offset_t) src_po == src_po); 5648 assert((vm_offset_t) dst_po == dst_po); 5649 assert((vm_size_t) part_size == part_size); 5650 vm_page_part_copy(result_page, 5651 (vm_offset_t) src_po, 5652 dst_page, 5653 (vm_offset_t) dst_po, 5654 (vm_size_t)part_size); 5655 if(!dst_page->dirty){ 5656 vm_object_lock(dst_object); 5657 SET_PAGE_DIRTY(dst_page, TRUE); 5658 vm_object_unlock(dst_page->object); 5659 } 5660 5661 } 5662 } else { 5663 part_size = PAGE_SIZE; 5664 5665 if (result_page == VM_PAGE_NULL) 5666 vm_page_zero_fill(dst_page); 5667 else{ 5668 vm_object_lock(result_page->object); 5669 vm_page_copy(result_page, dst_page); 5670 vm_object_unlock(result_page->object); 5671 5672 if(!dst_page->dirty){ 5673 vm_object_lock(dst_object); 5674 SET_PAGE_DIRTY(dst_page, TRUE); 5675 vm_object_unlock(dst_page->object); 5676 } 5677 } 5678 5679 } 5680 5681 /* 5682 * Unlock everything, and return 5683 */ 5684 5685 vm_map_verify_done(dst_map, dst_version); 5686 5687 if (result_page != VM_PAGE_NULL && src_page != dst_page) 5688 vm_fault_copy_cleanup(result_page, src_top_page); 5689 vm_fault_copy_dst_cleanup(dst_page); 5690 5691 amount_left -= part_size; 5692 src_offset += part_size; 5693 dst_offset += part_size; 5694 } while (amount_left > 0); 5695 5696 RETURN(KERN_SUCCESS); 5697#undef RETURN 5698 5699 /*NOTREACHED*/ 5700} 5701 5702#if VM_FAULT_CLASSIFY 5703/* 5704 * Temporary statistics gathering support. 5705 */ 5706 5707/* 5708 * Statistics arrays: 5709 */ 5710#define VM_FAULT_TYPES_MAX 5 5711#define VM_FAULT_LEVEL_MAX 8 5712 5713int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX]; 5714 5715#define VM_FAULT_TYPE_ZERO_FILL 0 5716#define VM_FAULT_TYPE_MAP_IN 1 5717#define VM_FAULT_TYPE_PAGER 2 5718#define VM_FAULT_TYPE_COPY 3 5719#define VM_FAULT_TYPE_OTHER 4 5720 5721 5722void 5723vm_fault_classify(vm_object_t object, 5724 vm_object_offset_t offset, 5725 vm_prot_t fault_type) 5726{ 5727 int type, level = 0; 5728 vm_page_t m; 5729 5730 while (TRUE) { 5731 m = vm_page_lookup(object, offset); 5732 if (m != VM_PAGE_NULL) { 5733 if (m->busy || m->error || m->restart || m->absent) { 5734 type = VM_FAULT_TYPE_OTHER; 5735 break; 5736 } 5737 if (((fault_type & VM_PROT_WRITE) == 0) || 5738 ((level == 0) && object->copy == VM_OBJECT_NULL)) { 5739 type = VM_FAULT_TYPE_MAP_IN; 5740 break; 5741 } 5742 type = VM_FAULT_TYPE_COPY; 5743 break; 5744 } 5745 else { 5746 if (object->pager_created) { 5747 type = VM_FAULT_TYPE_PAGER; 5748 break; 5749 } 5750 if (object->shadow == VM_OBJECT_NULL) { 5751 type = VM_FAULT_TYPE_ZERO_FILL; 5752 break; 5753 } 5754 5755 offset += object->vo_shadow_offset; 5756 object = object->shadow; 5757 level++; 5758 continue; 5759 } 5760 } 5761 5762 if (level > VM_FAULT_LEVEL_MAX) 5763 level = VM_FAULT_LEVEL_MAX; 5764 5765 vm_fault_stats[type][level] += 1; 5766 5767 return; 5768} 5769 5770/* cleanup routine to call from debugger */ 5771 5772void 5773vm_fault_classify_init(void) 5774{ 5775 int type, level; 5776 5777 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) { 5778 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) { 5779 vm_fault_stats[type][level] = 0; 5780 } 5781 } 5782 5783 return; 5784} 5785#endif /* VM_FAULT_CLASSIFY */ 5786 5787 5788void 5789vm_page_validate_cs_mapped( 5790 vm_page_t page, 5791 const void *kaddr) 5792{ 5793 vm_object_t object; 5794 vm_object_offset_t offset; 5795 kern_return_t kr; 5796 memory_object_t pager; 5797 void *blobs; 5798 boolean_t validated, tainted; 5799 5800 assert(page->busy); 5801 vm_object_lock_assert_exclusive(page->object); 5802 5803 if (!cs_validation) { 5804 return; 5805 } 5806 5807 if (page->wpmapped && !page->cs_tainted) { 5808 /* 5809 * This page was mapped for "write" access sometime in the 5810 * past and could still be modifiable in the future. 5811 * Consider it tainted. 5812 * [ If the page was already found to be "tainted", no 5813 * need to re-validate. ] 5814 */ 5815 page->cs_validated = TRUE; 5816 page->cs_tainted = TRUE; 5817 if (cs_debug) { 5818 printf("CODESIGNING: vm_page_validate_cs: " 5819 "page %p obj %p off 0x%llx " 5820 "was modified\n", 5821 page, page->object, page->offset); 5822 } 5823 vm_cs_validated_dirtied++; 5824 } 5825 5826 if (page->cs_validated) { 5827 return; 5828 } 5829 5830 vm_cs_validates++; 5831 5832 object = page->object; 5833 assert(object->code_signed); 5834 offset = page->offset; 5835 5836 if (!object->alive || object->terminating || object->pager == NULL) { 5837 /* 5838 * The object is terminating and we don't have its pager 5839 * so we can't validate the data... 5840 */ 5841 return; 5842 } 5843 /* 5844 * Since we get here to validate a page that was brought in by 5845 * the pager, we know that this pager is all setup and ready 5846 * by now. 5847 */ 5848 assert(!object->internal); 5849 assert(object->pager != NULL); 5850 assert(object->pager_ready); 5851 5852 pager = object->pager; 5853 assert(object->paging_in_progress); 5854 kr = vnode_pager_get_object_cs_blobs(pager, &blobs); 5855 if (kr != KERN_SUCCESS) { 5856 blobs = NULL; 5857 } 5858 5859 /* verify the SHA1 hash for this page */ 5860 validated = cs_validate_page(blobs, 5861 pager, 5862 offset + object->paging_offset, 5863 (const void *)kaddr, 5864 &tainted); 5865 5866 page->cs_validated = validated; 5867 if (validated) { 5868 page->cs_tainted = tainted; 5869 } 5870} 5871 5872void 5873vm_page_validate_cs( 5874 vm_page_t page) 5875{ 5876 vm_object_t object; 5877 vm_object_offset_t offset; 5878 vm_map_offset_t koffset; 5879 vm_map_size_t ksize; 5880 vm_offset_t kaddr; 5881 kern_return_t kr; 5882 boolean_t busy_page; 5883 boolean_t need_unmap; 5884 5885 vm_object_lock_assert_held(page->object); 5886 5887 if (!cs_validation) { 5888 return; 5889 } 5890 5891 if (page->wpmapped && !page->cs_tainted) { 5892 vm_object_lock_assert_exclusive(page->object); 5893 5894 /* 5895 * This page was mapped for "write" access sometime in the 5896 * past and could still be modifiable in the future. 5897 * Consider it tainted. 5898 * [ If the page was already found to be "tainted", no 5899 * need to re-validate. ] 5900 */ 5901 page->cs_validated = TRUE; 5902 page->cs_tainted = TRUE; 5903 if (cs_debug) { 5904 printf("CODESIGNING: vm_page_validate_cs: " 5905 "page %p obj %p off 0x%llx " 5906 "was modified\n", 5907 page, page->object, page->offset); 5908 } 5909 vm_cs_validated_dirtied++; 5910 } 5911 5912 if (page->cs_validated) { 5913 return; 5914 } 5915 5916 if (page->slid) { 5917 panic("vm_page_validate_cs(%p): page is slid\n", page); 5918 } 5919 assert(!page->slid); 5920 5921#if CHECK_CS_VALIDATION_BITMAP 5922 if ( vnode_pager_cs_check_validation_bitmap( page->object->pager, trunc_page(page->offset + page->object->paging_offset), CS_BITMAP_CHECK ) == KERN_SUCCESS) { 5923 page->cs_validated = TRUE; 5924 page->cs_tainted = FALSE; 5925 vm_cs_bitmap_validated++; 5926 return; 5927 } 5928#endif 5929 vm_object_lock_assert_exclusive(page->object); 5930 5931 object = page->object; 5932 assert(object->code_signed); 5933 offset = page->offset; 5934 5935 busy_page = page->busy; 5936 if (!busy_page) { 5937 /* keep page busy while we map (and unlock) the VM object */ 5938 page->busy = TRUE; 5939 } 5940 5941 /* 5942 * Take a paging reference on the VM object 5943 * to protect it from collapse or bypass, 5944 * and keep it from disappearing too. 5945 */ 5946 vm_object_paging_begin(object); 5947 5948 /* map the page in the kernel address space */ 5949 ksize = PAGE_SIZE_64; 5950 koffset = 0; 5951 need_unmap = FALSE; 5952 kr = vm_paging_map_object(page, 5953 object, 5954 offset, 5955 VM_PROT_READ, 5956 FALSE, /* can't unlock object ! */ 5957 &ksize, 5958 &koffset, 5959 &need_unmap); 5960 if (kr != KERN_SUCCESS) { 5961 panic("vm_page_validate_cs: could not map page: 0x%x\n", kr); 5962 } 5963 kaddr = CAST_DOWN(vm_offset_t, koffset); 5964 5965 /* validate the mapped page */ 5966 vm_page_validate_cs_mapped(page, (const void *) kaddr); 5967 5968#if CHECK_CS_VALIDATION_BITMAP 5969 if ( page->cs_validated == TRUE && page->cs_tainted == FALSE ) { 5970 vnode_pager_cs_check_validation_bitmap( object->pager, trunc_page( offset + object->paging_offset), CS_BITMAP_SET ); 5971 } 5972#endif 5973 assert(page->busy); 5974 assert(object == page->object); 5975 vm_object_lock_assert_exclusive(object); 5976 5977 if (!busy_page) { 5978 PAGE_WAKEUP_DONE(page); 5979 } 5980 if (need_unmap) { 5981 /* unmap the map from the kernel address space */ 5982 vm_paging_unmap_object(object, koffset, koffset + ksize); 5983 koffset = 0; 5984 ksize = 0; 5985 kaddr = 0; 5986 } 5987 vm_object_paging_end(object); 5988} 5989