1/* 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm_fault.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * 62 * Page fault handling module. 63 */ 64 65#include <mach_cluster_stats.h> 66#include <mach_pagemap.h> 67#include <libkern/OSAtomic.h> 68 69#include <mach/mach_types.h> 70#include <mach/kern_return.h> 71#include <mach/message.h> /* for error codes */ 72#include <mach/vm_param.h> 73#include <mach/vm_behavior.h> 74#include <mach/memory_object.h> 75 /* For memory_object_data_{request,unlock} */ 76#include <mach/sdt.h> 77 78#include <kern/kern_types.h> 79#include <kern/host_statistics.h> 80#include <kern/counters.h> 81#include <kern/task.h> 82#include <kern/thread.h> 83#include <kern/sched_prim.h> 84#include <kern/host.h> 85#include <kern/xpr.h> 86#include <kern/mach_param.h> 87#include <kern/macro_help.h> 88#include <kern/zalloc.h> 89#include <kern/misc_protos.h> 90 91#include <vm/vm_compressor.h> 92#include <vm/vm_compressor_pager.h> 93#include <vm/vm_fault.h> 94#include <vm/vm_map.h> 95#include <vm/vm_object.h> 96#include <vm/vm_page.h> 97#include <vm/vm_kern.h> 98#include <vm/pmap.h> 99#include <vm/vm_pageout.h> 100#include <vm/vm_protos.h> 101#include <vm/vm_external.h> 102#include <vm/memory_object.h> 103#include <vm/vm_purgeable_internal.h> /* Needed by some vm_page.h macros */ 104#include <vm/vm_shared_region.h> 105 106#include <sys/codesign.h> 107 108#include <libsa/sys/timers.h> /* for struct timespec */ 109 110#define VM_FAULT_CLASSIFY 0 111 112#define TRACEFAULTPAGE 0 /* (TEST/DEBUG) */ 113 114int vm_object_pagein_throttle = 16; 115 116/* 117 * We apply a hard throttle to the demand zero rate of tasks that we believe are running out of control which 118 * kicks in when swap space runs out. 64-bit programs have massive address spaces and can leak enormous amounts 119 * of memory if they're buggy and can run the system completely out of swap space. If this happens, we 120 * impose a hard throttle on them to prevent them from taking the last bit of memory left. This helps 121 * keep the UI active so that the user has a chance to kill the offending task before the system 122 * completely hangs. 123 * 124 * The hard throttle is only applied when the system is nearly completely out of swap space and is only applied 125 * to tasks that appear to be bloated. When swap runs out, any task using more than vm_hard_throttle_threshold 126 * will be throttled. The throttling is done by giving the thread that's trying to demand zero a page a 127 * delay of HARD_THROTTLE_DELAY microseconds before being allowed to try the page fault again. 128 */ 129 130extern void throttle_lowpri_io(int); 131 132uint64_t vm_hard_throttle_threshold; 133 134 135 136#define NEED_TO_HARD_THROTTLE_THIS_TASK() ((current_task() != kernel_task && \ 137 get_task_resident_size(current_task()) > (((AVAILABLE_NON_COMPRESSED_MEMORY) * PAGE_SIZE) / 5)) && \ 138 (vm_low_on_space() || (vm_page_free_count < vm_page_throttle_limit && \ 139 proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO) >= THROTTLE_LEVEL_THROTTLED ))) 140 141 142 143#define HARD_THROTTLE_DELAY 20000 /* 20000 us == 20 ms */ 144#define SOFT_THROTTLE_DELAY 2000 /* 2000 us == 2 ms */ 145 146boolean_t current_thread_aborted(void); 147 148/* Forward declarations of internal routines. */ 149extern kern_return_t vm_fault_wire_fast( 150 vm_map_t map, 151 vm_map_offset_t va, 152 vm_map_entry_t entry, 153 pmap_t pmap, 154 vm_map_offset_t pmap_addr); 155 156extern void vm_fault_continue(void); 157 158extern void vm_fault_copy_cleanup( 159 vm_page_t page, 160 vm_page_t top_page); 161 162extern void vm_fault_copy_dst_cleanup( 163 vm_page_t page); 164 165#if VM_FAULT_CLASSIFY 166extern void vm_fault_classify(vm_object_t object, 167 vm_object_offset_t offset, 168 vm_prot_t fault_type); 169 170extern void vm_fault_classify_init(void); 171#endif 172 173unsigned long vm_pmap_enter_blocked = 0; 174unsigned long vm_pmap_enter_retried = 0; 175 176unsigned long vm_cs_validates = 0; 177unsigned long vm_cs_revalidates = 0; 178unsigned long vm_cs_query_modified = 0; 179unsigned long vm_cs_validated_dirtied = 0; 180unsigned long vm_cs_bitmap_validated = 0; 181 182/* 183 * Routine: vm_fault_init 184 * Purpose: 185 * Initialize our private data structures. 186 */ 187void 188vm_fault_init(void) 189{ 190 int i, vm_compressor_temp; 191 boolean_t need_default_val = TRUE; 192 /* 193 * Choose a value for the hard throttle threshold based on the amount of ram. The threshold is 194 * computed as a percentage of available memory, and the percentage used is scaled inversely with 195 * the amount of memory. The percentage runs between 10% and 35%. We use 35% for small memory systems 196 * and reduce the value down to 10% for very large memory configurations. This helps give us a 197 * definition of a memory hog that makes more sense relative to the amount of ram in the machine. 198 * The formula here simply uses the number of gigabytes of ram to adjust the percentage. 199 */ 200 201 vm_hard_throttle_threshold = sane_size * (35 - MIN((int)(sane_size / (1024*1024*1024)), 25)) / 100; 202 203 /* 204 * Configure compressed pager behavior. A boot arg takes precedence over a device tree entry. 205 */ 206 207 if (PE_parse_boot_argn("vm_compressor", &vm_compressor_temp, sizeof (vm_compressor_temp))) { 208 for ( i = 0; i < VM_PAGER_MAX_MODES; i++) { 209 if (vm_compressor_temp > 0 && 210 ((vm_compressor_temp & ( 1 << i)) == vm_compressor_temp)) { 211 need_default_val = FALSE; 212 vm_compressor_mode = vm_compressor_temp; 213 break; 214 } 215 } 216 if (need_default_val) 217 printf("Ignoring \"vm_compressor\" boot arg %d\n", vm_compressor_temp); 218 } 219 if (need_default_val) { 220 /* If no boot arg or incorrect boot arg, try device tree. */ 221 PE_get_default("kern.vm_compressor", &vm_compressor_mode, sizeof(vm_compressor_mode)); 222 } 223 PE_parse_boot_argn("vm_compressor_threads", &vm_compressor_thread_count, sizeof (vm_compressor_thread_count)); 224 printf("\"vm_compressor_mode\" is %d\n", vm_compressor_mode); 225} 226 227/* 228 * Routine: vm_fault_cleanup 229 * Purpose: 230 * Clean up the result of vm_fault_page. 231 * Results: 232 * The paging reference for "object" is released. 233 * "object" is unlocked. 234 * If "top_page" is not null, "top_page" is 235 * freed and the paging reference for the object 236 * containing it is released. 237 * 238 * In/out conditions: 239 * "object" must be locked. 240 */ 241void 242vm_fault_cleanup( 243 register vm_object_t object, 244 register vm_page_t top_page) 245{ 246 vm_object_paging_end(object); 247 vm_object_unlock(object); 248 249 if (top_page != VM_PAGE_NULL) { 250 object = top_page->object; 251 252 vm_object_lock(object); 253 VM_PAGE_FREE(top_page); 254 vm_object_paging_end(object); 255 vm_object_unlock(object); 256 } 257} 258 259#if MACH_CLUSTER_STATS 260#define MAXCLUSTERPAGES 16 261struct { 262 unsigned long pages_in_cluster; 263 unsigned long pages_at_higher_offsets; 264 unsigned long pages_at_lower_offsets; 265} cluster_stats_in[MAXCLUSTERPAGES]; 266#define CLUSTER_STAT(clause) clause 267#define CLUSTER_STAT_HIGHER(x) \ 268 ((cluster_stats_in[(x)].pages_at_higher_offsets)++) 269#define CLUSTER_STAT_LOWER(x) \ 270 ((cluster_stats_in[(x)].pages_at_lower_offsets)++) 271#define CLUSTER_STAT_CLUSTER(x) \ 272 ((cluster_stats_in[(x)].pages_in_cluster)++) 273#else /* MACH_CLUSTER_STATS */ 274#define CLUSTER_STAT(clause) 275#endif /* MACH_CLUSTER_STATS */ 276 277#define ALIGNED(x) (((x) & (PAGE_SIZE_64 - 1)) == 0) 278 279 280boolean_t vm_page_deactivate_behind = TRUE; 281/* 282 * default sizes given VM_BEHAVIOR_DEFAULT reference behavior 283 */ 284#define VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW 128 285#define VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER 16 /* don't make this too big... */ 286 /* we use it to size an array on the stack */ 287 288int vm_default_behind = VM_DEFAULT_DEACTIVATE_BEHIND_WINDOW; 289 290#define MAX_SEQUENTIAL_RUN (1024 * 1024 * 1024) 291 292/* 293 * vm_page_is_sequential 294 * 295 * Determine if sequential access is in progress 296 * in accordance with the behavior specified. 297 * Update state to indicate current access pattern. 298 * 299 * object must have at least the shared lock held 300 */ 301static 302void 303vm_fault_is_sequential( 304 vm_object_t object, 305 vm_object_offset_t offset, 306 vm_behavior_t behavior) 307{ 308 vm_object_offset_t last_alloc; 309 int sequential; 310 int orig_sequential; 311 312 last_alloc = object->last_alloc; 313 sequential = object->sequential; 314 orig_sequential = sequential; 315 316 switch (behavior) { 317 case VM_BEHAVIOR_RANDOM: 318 /* 319 * reset indicator of sequential behavior 320 */ 321 sequential = 0; 322 break; 323 324 case VM_BEHAVIOR_SEQUENTIAL: 325 if (offset && last_alloc == offset - PAGE_SIZE_64) { 326 /* 327 * advance indicator of sequential behavior 328 */ 329 if (sequential < MAX_SEQUENTIAL_RUN) 330 sequential += PAGE_SIZE; 331 } else { 332 /* 333 * reset indicator of sequential behavior 334 */ 335 sequential = 0; 336 } 337 break; 338 339 case VM_BEHAVIOR_RSEQNTL: 340 if (last_alloc && last_alloc == offset + PAGE_SIZE_64) { 341 /* 342 * advance indicator of sequential behavior 343 */ 344 if (sequential > -MAX_SEQUENTIAL_RUN) 345 sequential -= PAGE_SIZE; 346 } else { 347 /* 348 * reset indicator of sequential behavior 349 */ 350 sequential = 0; 351 } 352 break; 353 354 case VM_BEHAVIOR_DEFAULT: 355 default: 356 if (offset && last_alloc == (offset - PAGE_SIZE_64)) { 357 /* 358 * advance indicator of sequential behavior 359 */ 360 if (sequential < 0) 361 sequential = 0; 362 if (sequential < MAX_SEQUENTIAL_RUN) 363 sequential += PAGE_SIZE; 364 365 } else if (last_alloc && last_alloc == (offset + PAGE_SIZE_64)) { 366 /* 367 * advance indicator of sequential behavior 368 */ 369 if (sequential > 0) 370 sequential = 0; 371 if (sequential > -MAX_SEQUENTIAL_RUN) 372 sequential -= PAGE_SIZE; 373 } else { 374 /* 375 * reset indicator of sequential behavior 376 */ 377 sequential = 0; 378 } 379 break; 380 } 381 if (sequential != orig_sequential) { 382 if (!OSCompareAndSwap(orig_sequential, sequential, (UInt32 *)&object->sequential)) { 383 /* 384 * if someone else has already updated object->sequential 385 * don't bother trying to update it or object->last_alloc 386 */ 387 return; 388 } 389 } 390 /* 391 * I'd like to do this with a OSCompareAndSwap64, but that 392 * doesn't exist for PPC... however, it shouldn't matter 393 * that much... last_alloc is maintained so that we can determine 394 * if a sequential access pattern is taking place... if only 395 * one thread is banging on this object, no problem with the unprotected 396 * update... if 2 or more threads are banging away, we run the risk of 397 * someone seeing a mangled update... however, in the face of multiple 398 * accesses, no sequential access pattern can develop anyway, so we 399 * haven't lost any real info. 400 */ 401 object->last_alloc = offset; 402} 403 404 405int vm_page_deactivate_behind_count = 0; 406 407/* 408 * vm_page_deactivate_behind 409 * 410 * Determine if sequential access is in progress 411 * in accordance with the behavior specified. If 412 * so, compute a potential page to deactivate and 413 * deactivate it. 414 * 415 * object must be locked. 416 * 417 * return TRUE if we actually deactivate a page 418 */ 419static 420boolean_t 421vm_fault_deactivate_behind( 422 vm_object_t object, 423 vm_object_offset_t offset, 424 vm_behavior_t behavior) 425{ 426 int n; 427 int pages_in_run = 0; 428 int max_pages_in_run = 0; 429 int sequential_run; 430 int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; 431 vm_object_offset_t run_offset = 0; 432 vm_object_offset_t pg_offset = 0; 433 vm_page_t m; 434 vm_page_t page_run[VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER]; 435 436 pages_in_run = 0; 437#if TRACEFAULTPAGE 438 dbgTrace(0xBEEF0018, (unsigned int) object, (unsigned int) vm_fault_deactivate_behind); /* (TEST/DEBUG) */ 439#endif 440 441 if (object == kernel_object || vm_page_deactivate_behind == FALSE) { 442 /* 443 * Do not deactivate pages from the kernel object: they 444 * are not intended to become pageable. 445 * or we've disabled the deactivate behind mechanism 446 */ 447 return FALSE; 448 } 449 if ((sequential_run = object->sequential)) { 450 if (sequential_run < 0) { 451 sequential_behavior = VM_BEHAVIOR_RSEQNTL; 452 sequential_run = 0 - sequential_run; 453 } else { 454 sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; 455 } 456 } 457 switch (behavior) { 458 case VM_BEHAVIOR_RANDOM: 459 break; 460 case VM_BEHAVIOR_SEQUENTIAL: 461 if (sequential_run >= (int)PAGE_SIZE) { 462 run_offset = 0 - PAGE_SIZE_64; 463 max_pages_in_run = 1; 464 } 465 break; 466 case VM_BEHAVIOR_RSEQNTL: 467 if (sequential_run >= (int)PAGE_SIZE) { 468 run_offset = PAGE_SIZE_64; 469 max_pages_in_run = 1; 470 } 471 break; 472 case VM_BEHAVIOR_DEFAULT: 473 default: 474 { vm_object_offset_t behind = vm_default_behind * PAGE_SIZE_64; 475 476 /* 477 * determine if the run of sequential accesss has been 478 * long enough on an object with default access behavior 479 * to consider it for deactivation 480 */ 481 if ((uint64_t)sequential_run >= behind && (sequential_run % (VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER * PAGE_SIZE)) == 0) { 482 /* 483 * the comparisons between offset and behind are done 484 * in this kind of odd fashion in order to prevent wrap around 485 * at the end points 486 */ 487 if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) { 488 if (offset >= behind) { 489 run_offset = 0 - behind; 490 pg_offset = PAGE_SIZE_64; 491 max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER; 492 } 493 } else { 494 if (offset < -behind) { 495 run_offset = behind; 496 pg_offset = 0 - PAGE_SIZE_64; 497 max_pages_in_run = VM_DEFAULT_DEACTIVATE_BEHIND_CLUSTER; 498 } 499 } 500 } 501 break; 502 } 503 } 504 for (n = 0; n < max_pages_in_run; n++) { 505 m = vm_page_lookup(object, offset + run_offset + (n * pg_offset)); 506 507 if (m && !m->laundry && !m->busy && !m->no_cache && !m->throttled && !m->fictitious && !m->absent) { 508 page_run[pages_in_run++] = m; 509 510 /* 511 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise... 512 * 513 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being 514 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the 515 * new reference happens. If no futher references happen on the page after that remote TLB flushes 516 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue 517 * by pageout_scan, which is just fine since the last reference would have happened quite far 518 * in the past (TLB caches don't hang around for very long), and of course could just as easily 519 * have happened before we did the deactivate_behind. 520 */ 521 pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL); 522 } 523 } 524 if (pages_in_run) { 525 vm_page_lockspin_queues(); 526 527 for (n = 0; n < pages_in_run; n++) { 528 529 m = page_run[n]; 530 531 vm_page_deactivate_internal(m, FALSE); 532 533 vm_page_deactivate_behind_count++; 534#if TRACEFAULTPAGE 535 dbgTrace(0xBEEF0019, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */ 536#endif 537 } 538 vm_page_unlock_queues(); 539 540 return TRUE; 541 } 542 return FALSE; 543} 544 545 546static int 547vm_page_throttled(void) 548{ 549 clock_sec_t elapsed_sec; 550 clock_sec_t tv_sec; 551 clock_usec_t tv_usec; 552 553 thread_t thread = current_thread(); 554 555 if (thread->options & TH_OPT_VMPRIV) 556 return (0); 557 558 thread->t_page_creation_count++; 559 560 if (NEED_TO_HARD_THROTTLE_THIS_TASK()) 561 return (HARD_THROTTLE_DELAY); 562 563 if ((vm_page_free_count < vm_page_throttle_limit || ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && SWAPPER_NEEDS_TO_UNTHROTTLE())) && 564 thread->t_page_creation_count > vm_page_creation_throttle) { 565 566 clock_get_system_microtime(&tv_sec, &tv_usec); 567 568 elapsed_sec = tv_sec - thread->t_page_creation_time; 569 570 if (elapsed_sec <= 6 || (thread->t_page_creation_count / elapsed_sec) >= (vm_page_creation_throttle / 6)) { 571 572 if (elapsed_sec >= 60) { 573 /* 574 * we'll reset our stats to give a well behaved app 575 * that was unlucky enough to accumulate a bunch of pages 576 * over a long period of time a chance to get out of 577 * the throttled state... we reset the counter and timestamp 578 * so that if it stays under the rate limit for the next second 579 * it will be back in our good graces... if it exceeds it, it 580 * will remain in the throttled state 581 */ 582 thread->t_page_creation_time = tv_sec; 583 thread->t_page_creation_count = (vm_page_creation_throttle / 6) * 5; 584 } 585 ++vm_page_throttle_count; 586 587 if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && HARD_THROTTLE_LIMIT_REACHED()) 588 return (HARD_THROTTLE_DELAY); 589 else 590 return (SOFT_THROTTLE_DELAY); 591 } 592 thread->t_page_creation_time = tv_sec; 593 thread->t_page_creation_count = 0; 594 } 595 return (0); 596} 597 598 599/* 600 * check for various conditions that would 601 * prevent us from creating a ZF page... 602 * cleanup is based on being called from vm_fault_page 603 * 604 * object must be locked 605 * object == m->object 606 */ 607static vm_fault_return_t 608vm_fault_check(vm_object_t object, vm_page_t m, vm_page_t first_m, boolean_t interruptible_state) 609{ 610 int throttle_delay; 611 612 if (object->shadow_severed || 613 VM_OBJECT_PURGEABLE_FAULT_ERROR(object)) { 614 /* 615 * Either: 616 * 1. the shadow chain was severed, 617 * 2. the purgeable object is volatile or empty and is marked 618 * to fault on access while volatile. 619 * Just have to return an error at this point 620 */ 621 if (m != VM_PAGE_NULL) 622 VM_PAGE_FREE(m); 623 vm_fault_cleanup(object, first_m); 624 625 thread_interrupt_level(interruptible_state); 626 627 return (VM_FAULT_MEMORY_ERROR); 628 } 629 if (vm_backing_store_low) { 630 /* 631 * are we protecting the system from 632 * backing store exhaustion. If so 633 * sleep unless we are privileged. 634 */ 635 if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) { 636 637 if (m != VM_PAGE_NULL) 638 VM_PAGE_FREE(m); 639 vm_fault_cleanup(object, first_m); 640 641 assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT); 642 643 thread_block(THREAD_CONTINUE_NULL); 644 thread_interrupt_level(interruptible_state); 645 646 return (VM_FAULT_RETRY); 647 } 648 } 649 if ((throttle_delay = vm_page_throttled())) { 650 /* 651 * we're throttling zero-fills... 652 * treat this as if we couldn't grab a page 653 */ 654 if (m != VM_PAGE_NULL) 655 VM_PAGE_FREE(m); 656 vm_fault_cleanup(object, first_m); 657 658 VM_DEBUG_EVENT(vmf_check_zfdelay, VMF_CHECK_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0); 659 660 delay(throttle_delay); 661 662 if (current_thread_aborted()) { 663 thread_interrupt_level(interruptible_state); 664 return VM_FAULT_INTERRUPTED; 665 } 666 thread_interrupt_level(interruptible_state); 667 668 return (VM_FAULT_MEMORY_SHORTAGE); 669 } 670 return (VM_FAULT_SUCCESS); 671} 672 673 674/* 675 * do the work to zero fill a page and 676 * inject it into the correct paging queue 677 * 678 * m->object must be locked 679 * page queue lock must NOT be held 680 */ 681static int 682vm_fault_zero_page(vm_page_t m, boolean_t no_zero_fill) 683{ 684 int my_fault = DBG_ZERO_FILL_FAULT; 685 686 /* 687 * This is is a zero-fill page fault... 688 * 689 * Checking the page lock is a waste of 690 * time; this page was absent, so 691 * it can't be page locked by a pager. 692 * 693 * we also consider it undefined 694 * with respect to instruction 695 * execution. i.e. it is the responsibility 696 * of higher layers to call for an instruction 697 * sync after changing the contents and before 698 * sending a program into this area. We 699 * choose this approach for performance 700 */ 701 m->pmapped = TRUE; 702 703 m->cs_validated = FALSE; 704 m->cs_tainted = FALSE; 705 706 if (no_zero_fill == TRUE) { 707 my_fault = DBG_NZF_PAGE_FAULT; 708 } else { 709 vm_page_zero_fill(m); 710 711 VM_STAT_INCR(zero_fill_count); 712 DTRACE_VM2(zfod, int, 1, (uint64_t *), NULL); 713 } 714 assert(!m->laundry); 715 assert(m->object != kernel_object); 716 //assert(m->pageq.next == NULL && m->pageq.prev == NULL); 717 718 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && 719 (m->object->purgable == VM_PURGABLE_DENY || 720 m->object->purgable == VM_PURGABLE_NONVOLATILE || 721 m->object->purgable == VM_PURGABLE_VOLATILE )) { 722 723 vm_page_lockspin_queues(); 724 725 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { 726 assert(!VM_PAGE_WIRED(m)); 727 728 /* 729 * can't be on the pageout queue since we don't 730 * have a pager to try and clean to 731 */ 732 assert(!m->pageout_queue); 733 734 VM_PAGE_QUEUES_REMOVE(m); 735 736 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq); 737 m->throttled = TRUE; 738 vm_page_throttled_count++; 739 } 740 vm_page_unlock_queues(); 741 } 742 return (my_fault); 743} 744 745 746/* 747 * Routine: vm_fault_page 748 * Purpose: 749 * Find the resident page for the virtual memory 750 * specified by the given virtual memory object 751 * and offset. 752 * Additional arguments: 753 * The required permissions for the page is given 754 * in "fault_type". Desired permissions are included 755 * in "protection". 756 * fault_info is passed along to determine pagein cluster 757 * limits... it contains the expected reference pattern, 758 * cluster size if available, etc... 759 * 760 * If the desired page is known to be resident (for 761 * example, because it was previously wired down), asserting 762 * the "unwiring" parameter will speed the search. 763 * 764 * If the operation can be interrupted (by thread_abort 765 * or thread_terminate), then the "interruptible" 766 * parameter should be asserted. 767 * 768 * Results: 769 * The page containing the proper data is returned 770 * in "result_page". 771 * 772 * In/out conditions: 773 * The source object must be locked and referenced, 774 * and must donate one paging reference. The reference 775 * is not affected. The paging reference and lock are 776 * consumed. 777 * 778 * If the call succeeds, the object in which "result_page" 779 * resides is left locked and holding a paging reference. 780 * If this is not the original object, a busy page in the 781 * original object is returned in "top_page", to prevent other 782 * callers from pursuing this same data, along with a paging 783 * reference for the original object. The "top_page" should 784 * be destroyed when this guarantee is no longer required. 785 * The "result_page" is also left busy. It is not removed 786 * from the pageout queues. 787 * Special Case: 788 * A return value of VM_FAULT_SUCCESS_NO_PAGE means that the 789 * fault succeeded but there's no VM page (i.e. the VM object 790 * does not actually hold VM pages, but device memory or 791 * large pages). The object is still locked and we still hold a 792 * paging_in_progress reference. 793 */ 794unsigned int vm_fault_page_blocked_access = 0; 795unsigned int vm_fault_page_forced_retry = 0; 796 797vm_fault_return_t 798vm_fault_page( 799 /* Arguments: */ 800 vm_object_t first_object, /* Object to begin search */ 801 vm_object_offset_t first_offset, /* Offset into object */ 802 vm_prot_t fault_type, /* What access is requested */ 803 boolean_t must_be_resident,/* Must page be resident? */ 804 boolean_t caller_lookup, /* caller looked up page */ 805 /* Modifies in place: */ 806 vm_prot_t *protection, /* Protection for mapping */ 807 vm_page_t *result_page, /* Page found, if successful */ 808 /* Returns: */ 809 vm_page_t *top_page, /* Page in top object, if 810 * not result_page. */ 811 int *type_of_fault, /* if non-null, fill in with type of fault 812 * COW, zero-fill, etc... returned in trace point */ 813 /* More arguments: */ 814 kern_return_t *error_code, /* code if page is in error */ 815 boolean_t no_zero_fill, /* don't zero fill absent pages */ 816 boolean_t data_supply, /* treat as data_supply if 817 * it is a write fault and a full 818 * page is provided */ 819 vm_object_fault_info_t fault_info) 820{ 821 vm_page_t m; 822 vm_object_t object; 823 vm_object_offset_t offset; 824 vm_page_t first_m; 825 vm_object_t next_object; 826 vm_object_t copy_object; 827 boolean_t look_for_page; 828 boolean_t force_fault_retry = FALSE; 829 vm_prot_t access_required = fault_type; 830 vm_prot_t wants_copy_flag; 831 CLUSTER_STAT(int pages_at_higher_offsets;) 832 CLUSTER_STAT(int pages_at_lower_offsets;) 833 kern_return_t wait_result; 834 boolean_t interruptible_state; 835 boolean_t data_already_requested = FALSE; 836 vm_behavior_t orig_behavior; 837 vm_size_t orig_cluster_size; 838 vm_fault_return_t error; 839 int my_fault; 840 uint32_t try_failed_count; 841 int interruptible; /* how may fault be interrupted? */ 842 int external_state = VM_EXTERNAL_STATE_UNKNOWN; 843 memory_object_t pager; 844 vm_fault_return_t retval; 845 846/* 847 * MACH page map - an optional optimization where a bit map is maintained 848 * by the VM subsystem for internal objects to indicate which pages of 849 * the object currently reside on backing store. This existence map 850 * duplicates information maintained by the vnode pager. It is 851 * created at the time of the first pageout against the object, i.e. 852 * at the same time pager for the object is created. The optimization 853 * is designed to eliminate pager interaction overhead, if it is 854 * 'known' that the page does not exist on backing store. 855 * 856 * MUST_ASK_PAGER() evaluates to TRUE if the page specified by object/offset is 857 * either marked as paged out in the existence map for the object or no 858 * existence map exists for the object. MUST_ASK_PAGER() is one of the 859 * criteria in the decision to invoke the pager. It is also used as one 860 * of the criteria to terminate the scan for adjacent pages in a clustered 861 * pagein operation. Note that MUST_ASK_PAGER() always evaluates to TRUE for 862 * permanent objects. Note also that if the pager for an internal object 863 * has not been created, the pager is not invoked regardless of the value 864 * of MUST_ASK_PAGER() and that clustered pagein scans are only done on an object 865 * for which a pager has been created. 866 * 867 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset 868 * is marked as paged out in the existence map for the object. PAGED_OUT() 869 * PAGED_OUT() is used to determine if a page has already been pushed 870 * into a copy object in order to avoid a redundant page out operation. 871 */ 872#if MACH_PAGEMAP 873#define MUST_ASK_PAGER(o, f, s) \ 874 ((vm_external_state_get((o)->existence_map, (f)) \ 875 != VM_EXTERNAL_STATE_ABSENT) && \ 876 (s = (VM_COMPRESSOR_PAGER_STATE_GET((o), (f)))) \ 877 != VM_EXTERNAL_STATE_ABSENT) 878#define PAGED_OUT(o, f) \ 879 ((vm_external_state_get((o)->existence_map, (f)) \ 880 == VM_EXTERNAL_STATE_EXISTS) || \ 881 (VM_COMPRESSOR_PAGER_STATE_GET((o), (f)) \ 882 == VM_EXTERNAL_STATE_EXISTS)) 883#else /* MACH_PAGEMAP */ 884#define MUST_ASK_PAGER(o, f, s) \ 885 ((s = VM_COMPRESSOR_PAGER_STATE_GET((o), (f))) != VM_EXTERNAL_STATE_ABSENT) 886#define PAGED_OUT(o, f) \ 887 (VM_COMPRESSOR_PAGER_STATE_GET((o), (f)) == VM_EXTERNAL_STATE_EXISTS) 888#endif /* MACH_PAGEMAP */ 889 890/* 891 * Recovery actions 892 */ 893#define RELEASE_PAGE(m) \ 894 MACRO_BEGIN \ 895 PAGE_WAKEUP_DONE(m); \ 896 if (!m->active && !m->inactive && !m->throttled) { \ 897 vm_page_lockspin_queues(); \ 898 if (!m->active && !m->inactive && !m->throttled) { \ 899 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) \ 900 vm_page_deactivate(m); \ 901 else \ 902 vm_page_activate(m); \ 903 } \ 904 vm_page_unlock_queues(); \ 905 } \ 906 MACRO_END 907 908#if TRACEFAULTPAGE 909 dbgTrace(0xBEEF0002, (unsigned int) first_object, (unsigned int) first_offset); /* (TEST/DEBUG) */ 910#endif 911 912 interruptible = fault_info->interruptible; 913 interruptible_state = thread_interrupt_level(interruptible); 914 915 /* 916 * INVARIANTS (through entire routine): 917 * 918 * 1) At all times, we must either have the object 919 * lock or a busy page in some object to prevent 920 * some other thread from trying to bring in 921 * the same page. 922 * 923 * Note that we cannot hold any locks during the 924 * pager access or when waiting for memory, so 925 * we use a busy page then. 926 * 927 * 2) To prevent another thread from racing us down the 928 * shadow chain and entering a new page in the top 929 * object before we do, we must keep a busy page in 930 * the top object while following the shadow chain. 931 * 932 * 3) We must increment paging_in_progress on any object 933 * for which we have a busy page before dropping 934 * the object lock 935 * 936 * 4) We leave busy pages on the pageout queues. 937 * If the pageout daemon comes across a busy page, 938 * it will remove the page from the pageout queues. 939 */ 940 941 object = first_object; 942 offset = first_offset; 943 first_m = VM_PAGE_NULL; 944 access_required = fault_type; 945 946 947 XPR(XPR_VM_FAULT, 948 "vm_f_page: obj 0x%X, offset 0x%X, type %d, prot %d\n", 949 object, offset, fault_type, *protection, 0); 950 951 /* 952 * default type of fault 953 */ 954 my_fault = DBG_CACHE_HIT_FAULT; 955 956 while (TRUE) { 957#if TRACEFAULTPAGE 958 dbgTrace(0xBEEF0003, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */ 959#endif 960 if (!object->alive) { 961 /* 962 * object is no longer valid 963 * clean up and return error 964 */ 965 vm_fault_cleanup(object, first_m); 966 thread_interrupt_level(interruptible_state); 967 968 return (VM_FAULT_MEMORY_ERROR); 969 } 970 971 if (!object->pager_created && object->phys_contiguous) { 972 /* 973 * A physically-contiguous object without a pager: 974 * must be a "large page" object. We do not deal 975 * with VM pages for this object. 976 */ 977 caller_lookup = FALSE; 978 m = VM_PAGE_NULL; 979 goto phys_contig_object; 980 } 981 982 if (object->blocked_access) { 983 /* 984 * Access to this VM object has been blocked. 985 * Replace our "paging_in_progress" reference with 986 * a "activity_in_progress" reference and wait for 987 * access to be unblocked. 988 */ 989 caller_lookup = FALSE; /* no longer valid after sleep */ 990 vm_object_activity_begin(object); 991 vm_object_paging_end(object); 992 while (object->blocked_access) { 993 vm_object_sleep(object, 994 VM_OBJECT_EVENT_UNBLOCKED, 995 THREAD_UNINT); 996 } 997 vm_fault_page_blocked_access++; 998 vm_object_paging_begin(object); 999 vm_object_activity_end(object); 1000 } 1001 1002 /* 1003 * See whether the page at 'offset' is resident 1004 */ 1005 if (caller_lookup == TRUE) { 1006 /* 1007 * The caller has already looked up the page 1008 * and gave us the result in "result_page". 1009 * We can use this for the first lookup but 1010 * it loses its validity as soon as we unlock 1011 * the object. 1012 */ 1013 m = *result_page; 1014 caller_lookup = FALSE; /* no longer valid after that */ 1015 } else { 1016 m = vm_page_lookup(object, offset); 1017 } 1018#if TRACEFAULTPAGE 1019 dbgTrace(0xBEEF0004, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */ 1020#endif 1021 if (m != VM_PAGE_NULL) { 1022 1023 if (m->busy) { 1024 /* 1025 * The page is being brought in, 1026 * wait for it and then retry. 1027 */ 1028#if TRACEFAULTPAGE 1029 dbgTrace(0xBEEF0005, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */ 1030#endif 1031 wait_result = PAGE_SLEEP(object, m, interruptible); 1032 1033 XPR(XPR_VM_FAULT, 1034 "vm_f_page: block busy obj 0x%X, offset 0x%X, page 0x%X\n", 1035 object, offset, 1036 m, 0, 0); 1037 counter(c_vm_fault_page_block_busy_kernel++); 1038 1039 if (wait_result != THREAD_AWAKENED) { 1040 vm_fault_cleanup(object, first_m); 1041 thread_interrupt_level(interruptible_state); 1042 1043 if (wait_result == THREAD_RESTART) 1044 return (VM_FAULT_RETRY); 1045 else 1046 return (VM_FAULT_INTERRUPTED); 1047 } 1048 continue; 1049 } 1050 if (m->laundry) { 1051 m->pageout = FALSE; 1052 1053 if (!m->cleaning) 1054 vm_pageout_steal_laundry(m, FALSE); 1055 } 1056 if (m->phys_page == vm_page_guard_addr) { 1057 /* 1058 * Guard page: off limits ! 1059 */ 1060 if (fault_type == VM_PROT_NONE) { 1061 /* 1062 * The fault is not requesting any 1063 * access to the guard page, so it must 1064 * be just to wire or unwire it. 1065 * Let's pretend it succeeded... 1066 */ 1067 m->busy = TRUE; 1068 *result_page = m; 1069 assert(first_m == VM_PAGE_NULL); 1070 *top_page = first_m; 1071 if (type_of_fault) 1072 *type_of_fault = DBG_GUARD_FAULT; 1073 thread_interrupt_level(interruptible_state); 1074 return VM_FAULT_SUCCESS; 1075 } else { 1076 /* 1077 * The fault requests access to the 1078 * guard page: let's deny that ! 1079 */ 1080 vm_fault_cleanup(object, first_m); 1081 thread_interrupt_level(interruptible_state); 1082 return VM_FAULT_MEMORY_ERROR; 1083 } 1084 } 1085 1086 if (m->error) { 1087 /* 1088 * The page is in error, give up now. 1089 */ 1090#if TRACEFAULTPAGE 1091 dbgTrace(0xBEEF0006, (unsigned int) m, (unsigned int) error_code); /* (TEST/DEBUG) */ 1092#endif 1093 if (error_code) 1094 *error_code = KERN_MEMORY_ERROR; 1095 VM_PAGE_FREE(m); 1096 1097 vm_fault_cleanup(object, first_m); 1098 thread_interrupt_level(interruptible_state); 1099 1100 return (VM_FAULT_MEMORY_ERROR); 1101 } 1102 if (m->restart) { 1103 /* 1104 * The pager wants us to restart 1105 * at the top of the chain, 1106 * typically because it has moved the 1107 * page to another pager, then do so. 1108 */ 1109#if TRACEFAULTPAGE 1110 dbgTrace(0xBEEF0007, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */ 1111#endif 1112 VM_PAGE_FREE(m); 1113 1114 vm_fault_cleanup(object, first_m); 1115 thread_interrupt_level(interruptible_state); 1116 1117 return (VM_FAULT_RETRY); 1118 } 1119 if (m->absent) { 1120 /* 1121 * The page isn't busy, but is absent, 1122 * therefore it's deemed "unavailable". 1123 * 1124 * Remove the non-existent page (unless it's 1125 * in the top object) and move on down to the 1126 * next object (if there is one). 1127 */ 1128#if TRACEFAULTPAGE 1129 dbgTrace(0xBEEF0008, (unsigned int) m, (unsigned int) object->shadow); /* (TEST/DEBUG) */ 1130#endif 1131 next_object = object->shadow; 1132 1133 if (next_object == VM_OBJECT_NULL) { 1134 /* 1135 * Absent page at bottom of shadow 1136 * chain; zero fill the page we left 1137 * busy in the first object, and free 1138 * the absent page. 1139 */ 1140 assert(!must_be_resident); 1141 1142 /* 1143 * check for any conditions that prevent 1144 * us from creating a new zero-fill page 1145 * vm_fault_check will do all of the 1146 * fault cleanup in the case of an error condition 1147 * including resetting the thread_interrupt_level 1148 */ 1149 error = vm_fault_check(object, m, first_m, interruptible_state); 1150 1151 if (error != VM_FAULT_SUCCESS) 1152 return (error); 1153 1154 XPR(XPR_VM_FAULT, 1155 "vm_f_page: zero obj 0x%X, off 0x%X, page 0x%X, first_obj 0x%X\n", 1156 object, offset, 1157 m, 1158 first_object, 0); 1159 1160 if (object != first_object) { 1161 /* 1162 * free the absent page we just found 1163 */ 1164 VM_PAGE_FREE(m); 1165 1166 /* 1167 * drop reference and lock on current object 1168 */ 1169 vm_object_paging_end(object); 1170 vm_object_unlock(object); 1171 1172 /* 1173 * grab the original page we 1174 * 'soldered' in place and 1175 * retake lock on 'first_object' 1176 */ 1177 m = first_m; 1178 first_m = VM_PAGE_NULL; 1179 1180 object = first_object; 1181 offset = first_offset; 1182 1183 vm_object_lock(object); 1184 } else { 1185 /* 1186 * we're going to use the absent page we just found 1187 * so convert it to a 'busy' page 1188 */ 1189 m->absent = FALSE; 1190 m->busy = TRUE; 1191 } 1192 /* 1193 * zero-fill the page and put it on 1194 * the correct paging queue 1195 */ 1196 my_fault = vm_fault_zero_page(m, no_zero_fill); 1197 1198 if (fault_info->mark_zf_absent && no_zero_fill == TRUE) 1199 m->absent = TRUE; 1200 1201 break; 1202 } else { 1203 if (must_be_resident) 1204 vm_object_paging_end(object); 1205 else if (object != first_object) { 1206 vm_object_paging_end(object); 1207 VM_PAGE_FREE(m); 1208 } else { 1209 first_m = m; 1210 m->absent = FALSE; 1211 m->busy = TRUE; 1212 1213 vm_page_lockspin_queues(); 1214 1215 assert(!m->pageout_queue); 1216 VM_PAGE_QUEUES_REMOVE(m); 1217 1218 vm_page_unlock_queues(); 1219 } 1220 XPR(XPR_VM_FAULT, 1221 "vm_f_page: unavail obj 0x%X, off 0x%X, next_obj 0x%X, newoff 0x%X\n", 1222 object, offset, 1223 next_object, 1224 offset+object->vo_shadow_offset,0); 1225 1226 offset += object->vo_shadow_offset; 1227 fault_info->lo_offset += object->vo_shadow_offset; 1228 fault_info->hi_offset += object->vo_shadow_offset; 1229 access_required = VM_PROT_READ; 1230 1231 vm_object_lock(next_object); 1232 vm_object_unlock(object); 1233 object = next_object; 1234 vm_object_paging_begin(object); 1235 1236 /* 1237 * reset to default type of fault 1238 */ 1239 my_fault = DBG_CACHE_HIT_FAULT; 1240 1241 continue; 1242 } 1243 } 1244 if ((m->cleaning) 1245 && ((object != first_object) || (object->copy != VM_OBJECT_NULL)) 1246 && (fault_type & VM_PROT_WRITE)) { 1247 /* 1248 * This is a copy-on-write fault that will 1249 * cause us to revoke access to this page, but 1250 * this page is in the process of being cleaned 1251 * in a clustered pageout. We must wait until 1252 * the cleaning operation completes before 1253 * revoking access to the original page, 1254 * otherwise we might attempt to remove a 1255 * wired mapping. 1256 */ 1257#if TRACEFAULTPAGE 1258 dbgTrace(0xBEEF0009, (unsigned int) m, (unsigned int) offset); /* (TEST/DEBUG) */ 1259#endif 1260 XPR(XPR_VM_FAULT, 1261 "vm_f_page: cleaning obj 0x%X, offset 0x%X, page 0x%X\n", 1262 object, offset, 1263 m, 0, 0); 1264 /* 1265 * take an extra ref so that object won't die 1266 */ 1267 vm_object_reference_locked(object); 1268 1269 vm_fault_cleanup(object, first_m); 1270 1271 counter(c_vm_fault_page_block_backoff_kernel++); 1272 vm_object_lock(object); 1273 assert(object->ref_count > 0); 1274 1275 m = vm_page_lookup(object, offset); 1276 1277 if (m != VM_PAGE_NULL && m->cleaning) { 1278 PAGE_ASSERT_WAIT(m, interruptible); 1279 1280 vm_object_unlock(object); 1281 wait_result = thread_block(THREAD_CONTINUE_NULL); 1282 vm_object_deallocate(object); 1283 1284 goto backoff; 1285 } else { 1286 vm_object_unlock(object); 1287 1288 vm_object_deallocate(object); 1289 thread_interrupt_level(interruptible_state); 1290 1291 return (VM_FAULT_RETRY); 1292 } 1293 } 1294 if (type_of_fault == NULL && m->speculative && 1295 !(fault_info != NULL && fault_info->stealth)) { 1296 /* 1297 * If we were passed a non-NULL pointer for 1298 * "type_of_fault", than we came from 1299 * vm_fault... we'll let it deal with 1300 * this condition, since it 1301 * needs to see m->speculative to correctly 1302 * account the pageins, otherwise... 1303 * take it off the speculative queue, we'll 1304 * let the caller of vm_fault_page deal 1305 * with getting it onto the correct queue 1306 * 1307 * If the caller specified in fault_info that 1308 * it wants a "stealth" fault, we also leave 1309 * the page in the speculative queue. 1310 */ 1311 vm_page_lockspin_queues(); 1312 if (m->speculative) 1313 VM_PAGE_QUEUES_REMOVE(m); 1314 vm_page_unlock_queues(); 1315 } 1316 1317 if (m->encrypted) { 1318 /* 1319 * ENCRYPTED SWAP: 1320 * the user needs access to a page that we 1321 * encrypted before paging it out. 1322 * Decrypt the page now. 1323 * Keep it busy to prevent anyone from 1324 * accessing it during the decryption. 1325 */ 1326 m->busy = TRUE; 1327 vm_page_decrypt(m, 0); 1328 assert(object == m->object); 1329 assert(m->busy); 1330 PAGE_WAKEUP_DONE(m); 1331 1332 /* 1333 * Retry from the top, in case 1334 * something changed while we were 1335 * decrypting. 1336 */ 1337 continue; 1338 } 1339 ASSERT_PAGE_DECRYPTED(m); 1340 1341 if (m->object->code_signed) { 1342 /* 1343 * CODE SIGNING: 1344 * We just paged in a page from a signed 1345 * memory object but we don't need to 1346 * validate it now. We'll validate it if 1347 * when it gets mapped into a user address 1348 * space for the first time or when the page 1349 * gets copied to another object as a result 1350 * of a copy-on-write. 1351 */ 1352 } 1353 1354 /* 1355 * We mark the page busy and leave it on 1356 * the pageout queues. If the pageout 1357 * deamon comes across it, then it will 1358 * remove the page from the queue, but not the object 1359 */ 1360#if TRACEFAULTPAGE 1361 dbgTrace(0xBEEF000B, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */ 1362#endif 1363 XPR(XPR_VM_FAULT, 1364 "vm_f_page: found page obj 0x%X, offset 0x%X, page 0x%X\n", 1365 object, offset, m, 0, 0); 1366 assert(!m->busy); 1367 assert(!m->absent); 1368 1369 m->busy = TRUE; 1370 break; 1371 } 1372 1373 1374 /* 1375 * we get here when there is no page present in the object at 1376 * the offset we're interested in... we'll allocate a page 1377 * at this point if the pager associated with 1378 * this object can provide the data or we're the top object... 1379 * object is locked; m == NULL 1380 */ 1381 if (must_be_resident) { 1382 if (fault_type == VM_PROT_NONE && 1383 object == kernel_object) { 1384 /* 1385 * We've been called from vm_fault_unwire() 1386 * while removing a map entry that was allocated 1387 * with KMA_KOBJECT and KMA_VAONLY. This page 1388 * is not present and there's nothing more to 1389 * do here (nothing to unwire). 1390 */ 1391 vm_fault_cleanup(object, first_m); 1392 thread_interrupt_level(interruptible_state); 1393 1394 return VM_FAULT_MEMORY_ERROR; 1395 } 1396 1397 goto dont_look_for_page; 1398 } 1399 1400#if !MACH_PAGEMAP 1401 data_supply = FALSE; 1402#endif /* !MACH_PAGEMAP */ 1403 1404 look_for_page = (object->pager_created && (MUST_ASK_PAGER(object, offset, external_state) == TRUE) && !data_supply); 1405 1406#if TRACEFAULTPAGE 1407 dbgTrace(0xBEEF000C, (unsigned int) look_for_page, (unsigned int) object); /* (TEST/DEBUG) */ 1408#endif 1409 if (!look_for_page && object == first_object && !object->phys_contiguous) { 1410 /* 1411 * Allocate a new page for this object/offset pair as a placeholder 1412 */ 1413 m = vm_page_grab(); 1414#if TRACEFAULTPAGE 1415 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */ 1416#endif 1417 if (m == VM_PAGE_NULL) { 1418 1419 vm_fault_cleanup(object, first_m); 1420 thread_interrupt_level(interruptible_state); 1421 1422 return (VM_FAULT_MEMORY_SHORTAGE); 1423 } 1424 1425 if (fault_info && fault_info->batch_pmap_op == TRUE) { 1426 vm_page_insert_internal(m, object, offset, FALSE, TRUE, TRUE); 1427 } else { 1428 vm_page_insert(m, object, offset); 1429 } 1430 } 1431 if (look_for_page) { 1432 kern_return_t rc; 1433 int my_fault_type; 1434 1435 /* 1436 * If the memory manager is not ready, we 1437 * cannot make requests. 1438 */ 1439 if (!object->pager_ready) { 1440#if TRACEFAULTPAGE 1441 dbgTrace(0xBEEF000E, (unsigned int) 0, (unsigned int) 0); /* (TEST/DEBUG) */ 1442#endif 1443 if (m != VM_PAGE_NULL) 1444 VM_PAGE_FREE(m); 1445 1446 XPR(XPR_VM_FAULT, 1447 "vm_f_page: ready wait obj 0x%X, offset 0x%X\n", 1448 object, offset, 0, 0, 0); 1449 1450 /* 1451 * take an extra ref so object won't die 1452 */ 1453 vm_object_reference_locked(object); 1454 vm_fault_cleanup(object, first_m); 1455 counter(c_vm_fault_page_block_backoff_kernel++); 1456 1457 vm_object_lock(object); 1458 assert(object->ref_count > 0); 1459 1460 if (!object->pager_ready) { 1461 wait_result = vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGER_READY, interruptible); 1462 1463 vm_object_unlock(object); 1464 if (wait_result == THREAD_WAITING) 1465 wait_result = thread_block(THREAD_CONTINUE_NULL); 1466 vm_object_deallocate(object); 1467 1468 goto backoff; 1469 } else { 1470 vm_object_unlock(object); 1471 vm_object_deallocate(object); 1472 thread_interrupt_level(interruptible_state); 1473 1474 return (VM_FAULT_RETRY); 1475 } 1476 } 1477 if (!object->internal && !object->phys_contiguous && object->paging_in_progress > vm_object_pagein_throttle) { 1478 /* 1479 * If there are too many outstanding page 1480 * requests pending on this external object, we 1481 * wait for them to be resolved now. 1482 */ 1483#if TRACEFAULTPAGE 1484 dbgTrace(0xBEEF0010, (unsigned int) m, (unsigned int) 0); /* (TEST/DEBUG) */ 1485#endif 1486 if (m != VM_PAGE_NULL) 1487 VM_PAGE_FREE(m); 1488 /* 1489 * take an extra ref so object won't die 1490 */ 1491 vm_object_reference_locked(object); 1492 1493 vm_fault_cleanup(object, first_m); 1494 1495 counter(c_vm_fault_page_block_backoff_kernel++); 1496 1497 vm_object_lock(object); 1498 assert(object->ref_count > 0); 1499 1500 if (object->paging_in_progress >= vm_object_pagein_throttle) { 1501 vm_object_assert_wait(object, VM_OBJECT_EVENT_PAGING_ONLY_IN_PROGRESS, interruptible); 1502 1503 vm_object_unlock(object); 1504 wait_result = thread_block(THREAD_CONTINUE_NULL); 1505 vm_object_deallocate(object); 1506 1507 goto backoff; 1508 } else { 1509 vm_object_unlock(object); 1510 vm_object_deallocate(object); 1511 thread_interrupt_level(interruptible_state); 1512 1513 return (VM_FAULT_RETRY); 1514 } 1515 } 1516 if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && object->internal) { 1517 1518 if (m == VM_PAGE_NULL) { 1519 /* 1520 * Allocate a new page for this object/offset pair as a placeholder 1521 */ 1522 m = vm_page_grab(); 1523#if TRACEFAULTPAGE 1524 dbgTrace(0xBEEF000D, (unsigned int) m, (unsigned int) object); /* (TEST/DEBUG) */ 1525#endif 1526 if (m == VM_PAGE_NULL) { 1527 1528 vm_fault_cleanup(object, first_m); 1529 thread_interrupt_level(interruptible_state); 1530 1531 return (VM_FAULT_MEMORY_SHORTAGE); 1532 } 1533 1534 m->absent = TRUE; 1535 if (fault_info && fault_info->batch_pmap_op == TRUE) { 1536 vm_page_insert_internal(m, object, offset, FALSE, TRUE, TRUE); 1537 } else { 1538 vm_page_insert(m, object, offset); 1539 } 1540 } 1541 assert(m->busy); 1542 1543 m->absent = TRUE; 1544 pager = object->pager; 1545 1546 vm_object_unlock(object); 1547 1548 rc = vm_compressor_pager_get(pager, offset + object->paging_offset, m->phys_page, &my_fault_type, 0); 1549 1550 vm_object_lock(object); 1551 1552 switch (rc) { 1553 case KERN_SUCCESS: 1554 m->absent = FALSE; 1555 m->dirty = TRUE; 1556 if ((m->object->wimg_bits & 1557 VM_WIMG_MASK) != 1558 VM_WIMG_USE_DEFAULT) { 1559 /* 1560 * If the page is not cacheable, 1561 * we can't let its contents 1562 * linger in the data cache 1563 * after the decompression. 1564 */ 1565 pmap_sync_page_attributes_phys( 1566 m->phys_page); 1567 } else 1568 m->written_by_kernel = TRUE; 1569 break; 1570 case KERN_MEMORY_FAILURE: 1571 m->unusual = TRUE; 1572 m->error = TRUE; 1573 m->absent = FALSE; 1574 break; 1575 case KERN_MEMORY_ERROR: 1576 assert(m->absent); 1577 break; 1578 default: 1579 panic("?"); 1580 } 1581 PAGE_WAKEUP_DONE(m); 1582 1583 rc = KERN_SUCCESS; 1584 goto data_requested; 1585 } 1586 my_fault_type = DBG_PAGEIN_FAULT; 1587 1588 if (m != VM_PAGE_NULL) { 1589 VM_PAGE_FREE(m); 1590 m = VM_PAGE_NULL; 1591 } 1592 1593#if TRACEFAULTPAGE 1594 dbgTrace(0xBEEF0012, (unsigned int) object, (unsigned int) 0); /* (TEST/DEBUG) */ 1595#endif 1596 1597 /* 1598 * It's possible someone called vm_object_destroy while we weren't 1599 * holding the object lock. If that has happened, then bail out 1600 * here. 1601 */ 1602 1603 pager = object->pager; 1604 1605 if (pager == MEMORY_OBJECT_NULL) { 1606 vm_fault_cleanup(object, first_m); 1607 thread_interrupt_level(interruptible_state); 1608 return VM_FAULT_MEMORY_ERROR; 1609 } 1610 1611 /* 1612 * We have an absent page in place for the faulting offset, 1613 * so we can release the object lock. 1614 */ 1615 1616 vm_object_unlock(object); 1617 1618 /* 1619 * If this object uses a copy_call strategy, 1620 * and we are interested in a copy of this object 1621 * (having gotten here only by following a 1622 * shadow chain), then tell the memory manager 1623 * via a flag added to the desired_access 1624 * parameter, so that it can detect a race 1625 * between our walking down the shadow chain 1626 * and its pushing pages up into a copy of 1627 * the object that it manages. 1628 */ 1629 if (object->copy_strategy == MEMORY_OBJECT_COPY_CALL && object != first_object) 1630 wants_copy_flag = VM_PROT_WANTS_COPY; 1631 else 1632 wants_copy_flag = VM_PROT_NONE; 1633 1634 XPR(XPR_VM_FAULT, 1635 "vm_f_page: data_req obj 0x%X, offset 0x%X, page 0x%X, acc %d\n", 1636 object, offset, m, 1637 access_required | wants_copy_flag, 0); 1638 1639 if (object->copy == first_object) { 1640 /* 1641 * if we issue the memory_object_data_request in 1642 * this state, we are subject to a deadlock with 1643 * the underlying filesystem if it is trying to 1644 * shrink the file resulting in a push of pages 1645 * into the copy object... that push will stall 1646 * on the placeholder page, and if the pushing thread 1647 * is holding a lock that is required on the pagein 1648 * path (such as a truncate lock), we'll deadlock... 1649 * to avoid this potential deadlock, we throw away 1650 * our placeholder page before calling memory_object_data_request 1651 * and force this thread to retry the vm_fault_page after 1652 * we have issued the I/O. the second time through this path 1653 * we will find the page already in the cache (presumably still 1654 * busy waiting for the I/O to complete) and then complete 1655 * the fault w/o having to go through memory_object_data_request again 1656 */ 1657 assert(first_m != VM_PAGE_NULL); 1658 assert(first_m->object == first_object); 1659 1660 vm_object_lock(first_object); 1661 VM_PAGE_FREE(first_m); 1662 vm_object_paging_end(first_object); 1663 vm_object_unlock(first_object); 1664 1665 first_m = VM_PAGE_NULL; 1666 force_fault_retry = TRUE; 1667 1668 vm_fault_page_forced_retry++; 1669 } 1670 1671 if (data_already_requested == TRUE) { 1672 orig_behavior = fault_info->behavior; 1673 orig_cluster_size = fault_info->cluster_size; 1674 1675 fault_info->behavior = VM_BEHAVIOR_RANDOM; 1676 fault_info->cluster_size = PAGE_SIZE; 1677 } 1678 /* 1679 * Call the memory manager to retrieve the data. 1680 */ 1681 rc = memory_object_data_request( 1682 pager, 1683 offset + object->paging_offset, 1684 PAGE_SIZE, 1685 access_required | wants_copy_flag, 1686 (memory_object_fault_info_t)fault_info); 1687 1688 if (data_already_requested == TRUE) { 1689 fault_info->behavior = orig_behavior; 1690 fault_info->cluster_size = orig_cluster_size; 1691 } else 1692 data_already_requested = TRUE; 1693 1694#if TRACEFAULTPAGE 1695 dbgTrace(0xBEEF0013, (unsigned int) object, (unsigned int) rc); /* (TEST/DEBUG) */ 1696#endif 1697 vm_object_lock(object); 1698 1699 data_requested: 1700 if (rc != KERN_SUCCESS) { 1701 1702 vm_fault_cleanup(object, first_m); 1703 thread_interrupt_level(interruptible_state); 1704 1705 return ((rc == MACH_SEND_INTERRUPTED) ? 1706 VM_FAULT_INTERRUPTED : 1707 VM_FAULT_MEMORY_ERROR); 1708 } else { 1709 clock_sec_t tv_sec; 1710 clock_usec_t tv_usec; 1711 1712 if (my_fault_type == DBG_PAGEIN_FAULT) { 1713 clock_get_system_microtime(&tv_sec, &tv_usec); 1714 current_thread()->t_page_creation_time = tv_sec; 1715 current_thread()->t_page_creation_count = 0; 1716 } 1717 } 1718 if ((interruptible != THREAD_UNINT) && (current_thread()->sched_flags & TH_SFLAG_ABORT)) { 1719 1720 vm_fault_cleanup(object, first_m); 1721 thread_interrupt_level(interruptible_state); 1722 1723 return (VM_FAULT_INTERRUPTED); 1724 } 1725 if (force_fault_retry == TRUE) { 1726 1727 vm_fault_cleanup(object, first_m); 1728 thread_interrupt_level(interruptible_state); 1729 1730 return (VM_FAULT_RETRY); 1731 } 1732 if (m == VM_PAGE_NULL && object->phys_contiguous) { 1733 /* 1734 * No page here means that the object we 1735 * initially looked up was "physically 1736 * contiguous" (i.e. device memory). However, 1737 * with Virtual VRAM, the object might not 1738 * be backed by that device memory anymore, 1739 * so we're done here only if the object is 1740 * still "phys_contiguous". 1741 * Otherwise, if the object is no longer 1742 * "phys_contiguous", we need to retry the 1743 * page fault against the object's new backing 1744 * store (different memory object). 1745 */ 1746 phys_contig_object: 1747 goto done; 1748 } 1749 /* 1750 * potentially a pagein fault 1751 * if we make it through the state checks 1752 * above, than we'll count it as such 1753 */ 1754 my_fault = my_fault_type; 1755 1756 /* 1757 * Retry with same object/offset, since new data may 1758 * be in a different page (i.e., m is meaningless at 1759 * this point). 1760 */ 1761 continue; 1762 } 1763dont_look_for_page: 1764 /* 1765 * We get here if the object has no pager, or an existence map 1766 * exists and indicates the page isn't present on the pager 1767 * or we're unwiring a page. If a pager exists, but there 1768 * is no existence map, then the m->absent case above handles 1769 * the ZF case when the pager can't provide the page 1770 */ 1771#if TRACEFAULTPAGE 1772 dbgTrace(0xBEEF0014, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */ 1773#endif 1774 if (object == first_object) 1775 first_m = m; 1776 else 1777 assert(m == VM_PAGE_NULL); 1778 1779 XPR(XPR_VM_FAULT, 1780 "vm_f_page: no pager obj 0x%X, offset 0x%X, page 0x%X, next_obj 0x%X\n", 1781 object, offset, m, 1782 object->shadow, 0); 1783 1784 next_object = object->shadow; 1785 1786 if (next_object == VM_OBJECT_NULL) { 1787 /* 1788 * we've hit the bottom of the shadown chain, 1789 * fill the page in the top object with zeros. 1790 */ 1791 assert(!must_be_resident); 1792 1793 if (object != first_object) { 1794 vm_object_paging_end(object); 1795 vm_object_unlock(object); 1796 1797 object = first_object; 1798 offset = first_offset; 1799 vm_object_lock(object); 1800 } 1801 m = first_m; 1802 assert(m->object == object); 1803 first_m = VM_PAGE_NULL; 1804 1805 /* 1806 * check for any conditions that prevent 1807 * us from creating a new zero-fill page 1808 * vm_fault_check will do all of the 1809 * fault cleanup in the case of an error condition 1810 * including resetting the thread_interrupt_level 1811 */ 1812 error = vm_fault_check(object, m, first_m, interruptible_state); 1813 1814 if (error != VM_FAULT_SUCCESS) 1815 return (error); 1816 1817 if (m == VM_PAGE_NULL) { 1818 m = vm_page_grab(); 1819 1820 if (m == VM_PAGE_NULL) { 1821 vm_fault_cleanup(object, VM_PAGE_NULL); 1822 thread_interrupt_level(interruptible_state); 1823 1824 return (VM_FAULT_MEMORY_SHORTAGE); 1825 } 1826 vm_page_insert(m, object, offset); 1827 } 1828 my_fault = vm_fault_zero_page(m, no_zero_fill); 1829 1830 if (fault_info->mark_zf_absent && no_zero_fill == TRUE) 1831 m->absent = TRUE; 1832 break; 1833 1834 } else { 1835 /* 1836 * Move on to the next object. Lock the next 1837 * object before unlocking the current one. 1838 */ 1839 if ((object != first_object) || must_be_resident) 1840 vm_object_paging_end(object); 1841 1842 offset += object->vo_shadow_offset; 1843 fault_info->lo_offset += object->vo_shadow_offset; 1844 fault_info->hi_offset += object->vo_shadow_offset; 1845 access_required = VM_PROT_READ; 1846 1847 vm_object_lock(next_object); 1848 vm_object_unlock(object); 1849 1850 object = next_object; 1851 vm_object_paging_begin(object); 1852 } 1853 } 1854 1855 /* 1856 * PAGE HAS BEEN FOUND. 1857 * 1858 * This page (m) is: 1859 * busy, so that we can play with it; 1860 * not absent, so that nobody else will fill it; 1861 * possibly eligible for pageout; 1862 * 1863 * The top-level page (first_m) is: 1864 * VM_PAGE_NULL if the page was found in the 1865 * top-level object; 1866 * busy, not absent, and ineligible for pageout. 1867 * 1868 * The current object (object) is locked. A paging 1869 * reference is held for the current and top-level 1870 * objects. 1871 */ 1872 1873#if TRACEFAULTPAGE 1874 dbgTrace(0xBEEF0015, (unsigned int) object, (unsigned int) m); /* (TEST/DEBUG) */ 1875#endif 1876#if EXTRA_ASSERTIONS 1877 assert(m->busy && !m->absent); 1878 assert((first_m == VM_PAGE_NULL) || 1879 (first_m->busy && !first_m->absent && 1880 !first_m->active && !first_m->inactive)); 1881#endif /* EXTRA_ASSERTIONS */ 1882 1883 /* 1884 * ENCRYPTED SWAP: 1885 * If we found a page, we must have decrypted it before we 1886 * get here... 1887 */ 1888 ASSERT_PAGE_DECRYPTED(m); 1889 1890 XPR(XPR_VM_FAULT, 1891 "vm_f_page: FOUND obj 0x%X, off 0x%X, page 0x%X, 1_obj 0x%X, 1_m 0x%X\n", 1892 object, offset, m, 1893 first_object, first_m); 1894 1895 /* 1896 * If the page is being written, but isn't 1897 * already owned by the top-level object, 1898 * we have to copy it into a new page owned 1899 * by the top-level object. 1900 */ 1901 if (object != first_object) { 1902 1903#if TRACEFAULTPAGE 1904 dbgTrace(0xBEEF0016, (unsigned int) object, (unsigned int) fault_type); /* (TEST/DEBUG) */ 1905#endif 1906 if (fault_type & VM_PROT_WRITE) { 1907 vm_page_t copy_m; 1908 1909 /* 1910 * We only really need to copy if we 1911 * want to write it. 1912 */ 1913 assert(!must_be_resident); 1914 1915 /* 1916 * are we protecting the system from 1917 * backing store exhaustion. If so 1918 * sleep unless we are privileged. 1919 */ 1920 if (vm_backing_store_low) { 1921 if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) { 1922 1923 RELEASE_PAGE(m); 1924 vm_fault_cleanup(object, first_m); 1925 1926 assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT); 1927 1928 thread_block(THREAD_CONTINUE_NULL); 1929 thread_interrupt_level(interruptible_state); 1930 1931 return (VM_FAULT_RETRY); 1932 } 1933 } 1934 /* 1935 * If we try to collapse first_object at this 1936 * point, we may deadlock when we try to get 1937 * the lock on an intermediate object (since we 1938 * have the bottom object locked). We can't 1939 * unlock the bottom object, because the page 1940 * we found may move (by collapse) if we do. 1941 * 1942 * Instead, we first copy the page. Then, when 1943 * we have no more use for the bottom object, 1944 * we unlock it and try to collapse. 1945 * 1946 * Note that we copy the page even if we didn't 1947 * need to... that's the breaks. 1948 */ 1949 1950 /* 1951 * Allocate a page for the copy 1952 */ 1953 copy_m = vm_page_grab(); 1954 1955 if (copy_m == VM_PAGE_NULL) { 1956 RELEASE_PAGE(m); 1957 1958 vm_fault_cleanup(object, first_m); 1959 thread_interrupt_level(interruptible_state); 1960 1961 return (VM_FAULT_MEMORY_SHORTAGE); 1962 } 1963 XPR(XPR_VM_FAULT, 1964 "vm_f_page: page_copy obj 0x%X, offset 0x%X, m 0x%X, copy_m 0x%X\n", 1965 object, offset, 1966 m, copy_m, 0); 1967 1968 vm_page_copy(m, copy_m); 1969 1970 /* 1971 * If another map is truly sharing this 1972 * page with us, we have to flush all 1973 * uses of the original page, since we 1974 * can't distinguish those which want the 1975 * original from those which need the 1976 * new copy. 1977 * 1978 * XXXO If we know that only one map has 1979 * access to this page, then we could 1980 * avoid the pmap_disconnect() call. 1981 */ 1982 if (m->pmapped) 1983 pmap_disconnect(m->phys_page); 1984 1985 assert(!m->cleaning); 1986 1987 /* 1988 * We no longer need the old page or object. 1989 */ 1990 RELEASE_PAGE(m); 1991 1992 vm_object_paging_end(object); 1993 vm_object_unlock(object); 1994 1995 my_fault = DBG_COW_FAULT; 1996 VM_STAT_INCR(cow_faults); 1997 DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL); 1998 current_task()->cow_faults++; 1999 2000 object = first_object; 2001 offset = first_offset; 2002 2003 vm_object_lock(object); 2004 /* 2005 * get rid of the place holder 2006 * page that we soldered in earlier 2007 */ 2008 VM_PAGE_FREE(first_m); 2009 first_m = VM_PAGE_NULL; 2010 2011 /* 2012 * and replace it with the 2013 * page we just copied into 2014 */ 2015 assert(copy_m->busy); 2016 vm_page_insert(copy_m, object, offset); 2017 SET_PAGE_DIRTY(copy_m, TRUE); 2018 2019 m = copy_m; 2020 /* 2021 * Now that we've gotten the copy out of the 2022 * way, let's try to collapse the top object. 2023 * But we have to play ugly games with 2024 * paging_in_progress to do that... 2025 */ 2026 vm_object_paging_end(object); 2027 vm_object_collapse(object, offset, TRUE); 2028 vm_object_paging_begin(object); 2029 2030 } else 2031 *protection &= (~VM_PROT_WRITE); 2032 } 2033 /* 2034 * Now check whether the page needs to be pushed into the 2035 * copy object. The use of asymmetric copy on write for 2036 * shared temporary objects means that we may do two copies to 2037 * satisfy the fault; one above to get the page from a 2038 * shadowed object, and one here to push it into the copy. 2039 */ 2040 try_failed_count = 0; 2041 2042 while ((copy_object = first_object->copy) != VM_OBJECT_NULL) { 2043 vm_object_offset_t copy_offset; 2044 vm_page_t copy_m; 2045 2046#if TRACEFAULTPAGE 2047 dbgTrace(0xBEEF0017, (unsigned int) copy_object, (unsigned int) fault_type); /* (TEST/DEBUG) */ 2048#endif 2049 /* 2050 * If the page is being written, but hasn't been 2051 * copied to the copy-object, we have to copy it there. 2052 */ 2053 if ((fault_type & VM_PROT_WRITE) == 0) { 2054 *protection &= ~VM_PROT_WRITE; 2055 break; 2056 } 2057 2058 /* 2059 * If the page was guaranteed to be resident, 2060 * we must have already performed the copy. 2061 */ 2062 if (must_be_resident) 2063 break; 2064 2065 /* 2066 * Try to get the lock on the copy_object. 2067 */ 2068 if (!vm_object_lock_try(copy_object)) { 2069 2070 vm_object_unlock(object); 2071 try_failed_count++; 2072 2073 mutex_pause(try_failed_count); /* wait a bit */ 2074 vm_object_lock(object); 2075 2076 continue; 2077 } 2078 try_failed_count = 0; 2079 2080 /* 2081 * Make another reference to the copy-object, 2082 * to keep it from disappearing during the 2083 * copy. 2084 */ 2085 vm_object_reference_locked(copy_object); 2086 2087 /* 2088 * Does the page exist in the copy? 2089 */ 2090 copy_offset = first_offset - copy_object->vo_shadow_offset; 2091 2092 if (copy_object->vo_size <= copy_offset) 2093 /* 2094 * Copy object doesn't cover this page -- do nothing. 2095 */ 2096 ; 2097 else if ((copy_m = vm_page_lookup(copy_object, copy_offset)) != VM_PAGE_NULL) { 2098 /* 2099 * Page currently exists in the copy object 2100 */ 2101 if (copy_m->busy) { 2102 /* 2103 * If the page is being brought 2104 * in, wait for it and then retry. 2105 */ 2106 RELEASE_PAGE(m); 2107 2108 /* 2109 * take an extra ref so object won't die 2110 */ 2111 vm_object_reference_locked(copy_object); 2112 vm_object_unlock(copy_object); 2113 vm_fault_cleanup(object, first_m); 2114 counter(c_vm_fault_page_block_backoff_kernel++); 2115 2116 vm_object_lock(copy_object); 2117 assert(copy_object->ref_count > 0); 2118 VM_OBJ_RES_DECR(copy_object); 2119 vm_object_lock_assert_exclusive(copy_object); 2120 copy_object->ref_count--; 2121 assert(copy_object->ref_count > 0); 2122 copy_m = vm_page_lookup(copy_object, copy_offset); 2123 /* 2124 * ENCRYPTED SWAP: 2125 * it's OK if the "copy_m" page is encrypted, 2126 * because we're not moving it nor handling its 2127 * contents. 2128 */ 2129 if (copy_m != VM_PAGE_NULL && copy_m->busy) { 2130 PAGE_ASSERT_WAIT(copy_m, interruptible); 2131 2132 vm_object_unlock(copy_object); 2133 wait_result = thread_block(THREAD_CONTINUE_NULL); 2134 vm_object_deallocate(copy_object); 2135 2136 goto backoff; 2137 } else { 2138 vm_object_unlock(copy_object); 2139 vm_object_deallocate(copy_object); 2140 thread_interrupt_level(interruptible_state); 2141 2142 return (VM_FAULT_RETRY); 2143 } 2144 } 2145 } 2146 else if (!PAGED_OUT(copy_object, copy_offset)) { 2147 /* 2148 * If PAGED_OUT is TRUE, then the page used to exist 2149 * in the copy-object, and has already been paged out. 2150 * We don't need to repeat this. If PAGED_OUT is 2151 * FALSE, then either we don't know (!pager_created, 2152 * for example) or it hasn't been paged out. 2153 * (VM_EXTERNAL_STATE_UNKNOWN||VM_EXTERNAL_STATE_ABSENT) 2154 * We must copy the page to the copy object. 2155 */ 2156 2157 if (vm_backing_store_low) { 2158 /* 2159 * we are protecting the system from 2160 * backing store exhaustion. If so 2161 * sleep unless we are privileged. 2162 */ 2163 if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) { 2164 assert_wait((event_t)&vm_backing_store_low, THREAD_UNINT); 2165 2166 RELEASE_PAGE(m); 2167 VM_OBJ_RES_DECR(copy_object); 2168 vm_object_lock_assert_exclusive(copy_object); 2169 copy_object->ref_count--; 2170 assert(copy_object->ref_count > 0); 2171 2172 vm_object_unlock(copy_object); 2173 vm_fault_cleanup(object, first_m); 2174 thread_block(THREAD_CONTINUE_NULL); 2175 thread_interrupt_level(interruptible_state); 2176 2177 return (VM_FAULT_RETRY); 2178 } 2179 } 2180 /* 2181 * Allocate a page for the copy 2182 */ 2183 copy_m = vm_page_alloc(copy_object, copy_offset); 2184 2185 if (copy_m == VM_PAGE_NULL) { 2186 RELEASE_PAGE(m); 2187 2188 VM_OBJ_RES_DECR(copy_object); 2189 vm_object_lock_assert_exclusive(copy_object); 2190 copy_object->ref_count--; 2191 assert(copy_object->ref_count > 0); 2192 2193 vm_object_unlock(copy_object); 2194 vm_fault_cleanup(object, first_m); 2195 thread_interrupt_level(interruptible_state); 2196 2197 return (VM_FAULT_MEMORY_SHORTAGE); 2198 } 2199 /* 2200 * Must copy page into copy-object. 2201 */ 2202 vm_page_copy(m, copy_m); 2203 2204 /* 2205 * If the old page was in use by any users 2206 * of the copy-object, it must be removed 2207 * from all pmaps. (We can't know which 2208 * pmaps use it.) 2209 */ 2210 if (m->pmapped) 2211 pmap_disconnect(m->phys_page); 2212 2213 /* 2214 * If there's a pager, then immediately 2215 * page out this page, using the "initialize" 2216 * option. Else, we use the copy. 2217 */ 2218 if ((!copy_object->pager_created) 2219#if MACH_PAGEMAP 2220 || vm_external_state_get(copy_object->existence_map, copy_offset) == VM_EXTERNAL_STATE_ABSENT 2221#endif 2222 || VM_COMPRESSOR_PAGER_STATE_GET(copy_object, copy_offset) == VM_EXTERNAL_STATE_ABSENT 2223 ) { 2224 2225 vm_page_lockspin_queues(); 2226 assert(!m->cleaning); 2227 vm_page_activate(copy_m); 2228 vm_page_unlock_queues(); 2229 2230 SET_PAGE_DIRTY(copy_m, TRUE); 2231 PAGE_WAKEUP_DONE(copy_m); 2232 2233 } else if (copy_object->internal && 2234 (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE)) { 2235 /* 2236 * For internal objects check with the pager to see 2237 * if the page already exists in the backing store. 2238 * If yes, then we can drop the copy page. If not, 2239 * then we'll activate it, mark it dirty and keep it 2240 * around. 2241 */ 2242 2243 kern_return_t kr = KERN_SUCCESS; 2244 2245 memory_object_t copy_pager = copy_object->pager; 2246 assert(copy_pager != MEMORY_OBJECT_NULL); 2247 vm_object_paging_begin(copy_object); 2248 2249 vm_object_unlock(copy_object); 2250 2251 kr = memory_object_data_request( 2252 copy_pager, 2253 copy_offset + copy_object->paging_offset, 2254 0, /* Only query the pager. */ 2255 VM_PROT_READ, 2256 NULL); 2257 2258 vm_object_lock(copy_object); 2259 2260 vm_object_paging_end(copy_object); 2261 2262 /* 2263 * Since we dropped the copy_object's lock, 2264 * check whether we'll have to deallocate 2265 * the hard way. 2266 */ 2267 if ((copy_object->shadow != object) || (copy_object->ref_count == 1)) { 2268 vm_object_unlock(copy_object); 2269 vm_object_deallocate(copy_object); 2270 vm_object_lock(object); 2271 2272 continue; 2273 } 2274 if (kr == KERN_SUCCESS) { 2275 /* 2276 * The pager has the page. We don't want to overwrite 2277 * that page by sending this one out to the backing store. 2278 * So we drop the copy page. 2279 */ 2280 VM_PAGE_FREE(copy_m); 2281 2282 } else { 2283 /* 2284 * The pager doesn't have the page. We'll keep this one 2285 * around in the copy object. It might get sent out to 2286 * the backing store under memory pressure. 2287 */ 2288 vm_page_lockspin_queues(); 2289 assert(!m->cleaning); 2290 vm_page_activate(copy_m); 2291 vm_page_unlock_queues(); 2292 2293 SET_PAGE_DIRTY(copy_m, TRUE); 2294 PAGE_WAKEUP_DONE(copy_m); 2295 } 2296 } else { 2297 2298 assert(copy_m->busy == TRUE); 2299 assert(!m->cleaning); 2300 2301 /* 2302 * dirty is protected by the object lock 2303 */ 2304 SET_PAGE_DIRTY(copy_m, TRUE); 2305 2306 /* 2307 * The page is already ready for pageout: 2308 * not on pageout queues and busy. 2309 * Unlock everything except the 2310 * copy_object itself. 2311 */ 2312 vm_object_unlock(object); 2313 2314 /* 2315 * Write the page to the copy-object, 2316 * flushing it from the kernel. 2317 */ 2318 vm_pageout_initialize_page(copy_m); 2319 2320 /* 2321 * Since the pageout may have 2322 * temporarily dropped the 2323 * copy_object's lock, we 2324 * check whether we'll have 2325 * to deallocate the hard way. 2326 */ 2327 if ((copy_object->shadow != object) || (copy_object->ref_count == 1)) { 2328 vm_object_unlock(copy_object); 2329 vm_object_deallocate(copy_object); 2330 vm_object_lock(object); 2331 2332 continue; 2333 } 2334 /* 2335 * Pick back up the old object's 2336 * lock. [It is safe to do so, 2337 * since it must be deeper in the 2338 * object tree.] 2339 */ 2340 vm_object_lock(object); 2341 } 2342 2343 /* 2344 * Because we're pushing a page upward 2345 * in the object tree, we must restart 2346 * any faults that are waiting here. 2347 * [Note that this is an expansion of 2348 * PAGE_WAKEUP that uses the THREAD_RESTART 2349 * wait result]. Can't turn off the page's 2350 * busy bit because we're not done with it. 2351 */ 2352 if (m->wanted) { 2353 m->wanted = FALSE; 2354 thread_wakeup_with_result((event_t) m, THREAD_RESTART); 2355 } 2356 } 2357 /* 2358 * The reference count on copy_object must be 2359 * at least 2: one for our extra reference, 2360 * and at least one from the outside world 2361 * (we checked that when we last locked 2362 * copy_object). 2363 */ 2364 vm_object_lock_assert_exclusive(copy_object); 2365 copy_object->ref_count--; 2366 assert(copy_object->ref_count > 0); 2367 2368 VM_OBJ_RES_DECR(copy_object); 2369 vm_object_unlock(copy_object); 2370 2371 break; 2372 } 2373 2374done: 2375 *result_page = m; 2376 *top_page = first_m; 2377 2378 XPR(XPR_VM_FAULT, 2379 "vm_f_page: DONE obj 0x%X, offset 0x%X, m 0x%X, first_m 0x%X\n", 2380 object, offset, m, first_m, 0); 2381 2382 if (m != VM_PAGE_NULL) { 2383 retval = VM_FAULT_SUCCESS; 2384 if (my_fault == DBG_PAGEIN_FAULT) { 2385 2386 if (!m->object->internal || (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE)) 2387 VM_STAT_INCR(pageins); 2388 DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL); 2389 DTRACE_VM2(maj_fault, int, 1, (uint64_t *), NULL); 2390 current_task()->pageins++; 2391 2392 if (m->object->internal) { 2393 DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL); 2394 my_fault = DBG_PAGEIND_FAULT; 2395 } else { 2396 DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL); 2397 my_fault = DBG_PAGEINV_FAULT; 2398 } 2399 2400 /* 2401 * evaluate access pattern and update state 2402 * vm_fault_deactivate_behind depends on the 2403 * state being up to date 2404 */ 2405 vm_fault_is_sequential(object, offset, fault_info->behavior); 2406 2407 vm_fault_deactivate_behind(object, offset, fault_info->behavior); 2408 } else if (my_fault == DBG_COMPRESSOR_FAULT || my_fault == DBG_COMPRESSOR_SWAPIN_FAULT) { 2409 2410 VM_STAT_INCR(decompressions); 2411 } 2412 if (type_of_fault) 2413 *type_of_fault = my_fault; 2414 } else { 2415 retval = VM_FAULT_SUCCESS_NO_VM_PAGE; 2416 assert(first_m == VM_PAGE_NULL); 2417 assert(object == first_object); 2418 } 2419 2420 thread_interrupt_level(interruptible_state); 2421 2422#if TRACEFAULTPAGE 2423 dbgTrace(0xBEEF001A, (unsigned int) VM_FAULT_SUCCESS, 0); /* (TEST/DEBUG) */ 2424#endif 2425 return retval; 2426 2427backoff: 2428 thread_interrupt_level(interruptible_state); 2429 2430 if (wait_result == THREAD_INTERRUPTED) 2431 return (VM_FAULT_INTERRUPTED); 2432 return (VM_FAULT_RETRY); 2433 2434#undef RELEASE_PAGE 2435} 2436 2437 2438 2439/* 2440 * CODE SIGNING: 2441 * When soft faulting a page, we have to validate the page if: 2442 * 1. the page is being mapped in user space 2443 * 2. the page hasn't already been found to be "tainted" 2444 * 3. the page belongs to a code-signed object 2445 * 4. the page has not been validated yet or has been mapped for write. 2446 */ 2447#define VM_FAULT_NEED_CS_VALIDATION(pmap, page) \ 2448 ((pmap) != kernel_pmap /*1*/ && \ 2449 !(page)->cs_tainted /*2*/ && \ 2450 (page)->object->code_signed /*3*/ && \ 2451 (!(page)->cs_validated || (page)->wpmapped /*4*/)) 2452 2453 2454/* 2455 * page queue lock must NOT be held 2456 * m->object must be locked 2457 * 2458 * NOTE: m->object could be locked "shared" only if we are called 2459 * from vm_fault() as part of a soft fault. If so, we must be 2460 * careful not to modify the VM object in any way that is not 2461 * legal under a shared lock... 2462 */ 2463extern int proc_selfpid(void); 2464extern char *proc_name_address(void *p); 2465unsigned long cs_enter_tainted_rejected = 0; 2466unsigned long cs_enter_tainted_accepted = 0; 2467kern_return_t 2468vm_fault_enter(vm_page_t m, 2469 pmap_t pmap, 2470 vm_map_offset_t vaddr, 2471 vm_prot_t prot, 2472 vm_prot_t fault_type, 2473 boolean_t wired, 2474 boolean_t change_wiring, 2475 boolean_t no_cache, 2476 boolean_t cs_bypass, 2477 boolean_t *need_retry, 2478 int *type_of_fault) 2479{ 2480 kern_return_t kr, pe_result; 2481 boolean_t previously_pmapped = m->pmapped; 2482 boolean_t must_disconnect = 0; 2483 boolean_t map_is_switched, map_is_switch_protected; 2484 int cs_enforcement_enabled; 2485 2486 vm_object_lock_assert_held(m->object); 2487#if DEBUG 2488 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED); 2489#endif /* DEBUG */ 2490 2491 if (m->phys_page == vm_page_guard_addr) { 2492 assert(m->fictitious); 2493 return KERN_SUCCESS; 2494 } 2495 2496 if (*type_of_fault == DBG_ZERO_FILL_FAULT) { 2497 2498 vm_object_lock_assert_exclusive(m->object); 2499 2500 } else if ((fault_type & VM_PROT_WRITE) == 0) { 2501 /* 2502 * This is not a "write" fault, so we 2503 * might not have taken the object lock 2504 * exclusively and we might not be able 2505 * to update the "wpmapped" bit in 2506 * vm_fault_enter(). 2507 * Let's just grant read access to 2508 * the page for now and we'll 2509 * soft-fault again if we need write 2510 * access later... 2511 */ 2512 prot &= ~VM_PROT_WRITE; 2513 } 2514 if (m->pmapped == FALSE) { 2515 2516 if ((*type_of_fault == DBG_CACHE_HIT_FAULT) && m->clustered) { 2517 /* 2518 * found it in the cache, but this 2519 * is the first fault-in of the page (m->pmapped == FALSE) 2520 * so it must have come in as part of 2521 * a cluster... account 1 pagein against it 2522 */ 2523 VM_STAT_INCR(pageins); 2524 DTRACE_VM2(pgin, int, 1, (uint64_t *), NULL); 2525 2526 if (m->object->internal) { 2527 DTRACE_VM2(anonpgin, int, 1, (uint64_t *), NULL); 2528 *type_of_fault = DBG_PAGEIND_FAULT; 2529 } else { 2530 DTRACE_VM2(fspgin, int, 1, (uint64_t *), NULL); 2531 *type_of_fault = DBG_PAGEINV_FAULT; 2532 } 2533 2534 current_task()->pageins++; 2535 } 2536 VM_PAGE_CONSUME_CLUSTERED(m); 2537 2538 } 2539 2540 if (*type_of_fault != DBG_COW_FAULT) { 2541 DTRACE_VM2(as_fault, int, 1, (uint64_t *), NULL); 2542 2543 if (pmap == kernel_pmap) { 2544 DTRACE_VM2(kernel_asflt, int, 1, (uint64_t *), NULL); 2545 } 2546 } 2547 2548 /* Validate code signature if necessary. */ 2549 if (VM_FAULT_NEED_CS_VALIDATION(pmap, m)) { 2550 vm_object_lock_assert_exclusive(m->object); 2551 2552 if (m->cs_validated) { 2553 vm_cs_revalidates++; 2554 } 2555 2556 /* VM map is locked, so 1 ref will remain on VM object - 2557 * so no harm if vm_page_validate_cs drops the object lock */ 2558 vm_page_validate_cs(m); 2559 } 2560 2561#define page_immutable(m,prot) ((m)->cs_validated /*&& ((prot) & VM_PROT_EXECUTE)*/) 2562 2563 map_is_switched = ((pmap != vm_map_pmap(current_task()->map)) && 2564 (pmap == vm_map_pmap(current_thread()->map))); 2565 map_is_switch_protected = current_thread()->map->switch_protect; 2566 2567 /* If the map is switched, and is switch-protected, we must protect 2568 * some pages from being write-faulted: immutable pages because by 2569 * definition they may not be written, and executable pages because that 2570 * would provide a way to inject unsigned code. 2571 * If the page is immutable, we can simply return. However, we can't 2572 * immediately determine whether a page is executable anywhere. But, 2573 * we can disconnect it everywhere and remove the executable protection 2574 * from the current map. We do that below right before we do the 2575 * PMAP_ENTER. 2576 */ 2577 cs_enforcement_enabled = cs_enforcement(NULL); 2578 2579 if(cs_enforcement_enabled && map_is_switched && 2580 map_is_switch_protected && page_immutable(m, prot) && 2581 (prot & VM_PROT_WRITE)) 2582 { 2583 return KERN_CODESIGN_ERROR; 2584 } 2585 2586 /* A page could be tainted, or pose a risk of being tainted later. 2587 * Check whether the receiving process wants it, and make it feel 2588 * the consequences (that hapens in cs_invalid_page()). 2589 * For CS Enforcement, two other conditions will 2590 * cause that page to be tainted as well: 2591 * - pmapping an unsigned page executable - this means unsigned code; 2592 * - writeable mapping of a validated page - the content of that page 2593 * can be changed without the kernel noticing, therefore unsigned 2594 * code can be created 2595 */ 2596 if (m->cs_tainted || 2597 ((cs_enforcement_enabled && !cs_bypass ) && 2598 (/* The page is unsigned and wants to be executable */ 2599 (!m->cs_validated && (prot & VM_PROT_EXECUTE)) || 2600 /* The page should be immutable, but is in danger of being modified 2601 * This is the case where we want policy from the code directory - 2602 * is the page immutable or not? For now we have to assume that 2603 * code pages will be immutable, data pages not. 2604 * We'll assume a page is a code page if it has a code directory 2605 * and we fault for execution. 2606 * That is good enough since if we faulted the code page for 2607 * writing in another map before, it is wpmapped; if we fault 2608 * it for writing in this map later it will also be faulted for executing 2609 * at the same time; and if we fault for writing in another map 2610 * later, we will disconnect it from this pmap so we'll notice 2611 * the change. 2612 */ 2613 (page_immutable(m, prot) && ((prot & VM_PROT_WRITE) || m->wpmapped)) 2614 )) 2615 ) 2616 { 2617 /* We will have a tainted page. Have to handle the special case 2618 * of a switched map now. If the map is not switched, standard 2619 * procedure applies - call cs_invalid_page(). 2620 * If the map is switched, the real owner is invalid already. 2621 * There is no point in invalidating the switching process since 2622 * it will not be executing from the map. So we don't call 2623 * cs_invalid_page() in that case. */ 2624 boolean_t reject_page; 2625 if(map_is_switched) { 2626 assert(pmap==vm_map_pmap(current_thread()->map)); 2627 assert(!(prot & VM_PROT_WRITE) || (map_is_switch_protected == FALSE)); 2628 reject_page = FALSE; 2629 } else { 2630 if (cs_debug > 5) 2631 printf("vm_fault: signed: %s validate: %s tainted: %s wpmapped: %s slid: %s prot: 0x%x\n", 2632 m->object->code_signed ? "yes" : "no", 2633 m->cs_validated ? "yes" : "no", 2634 m->cs_tainted ? "yes" : "no", 2635 m->wpmapped ? "yes" : "no", 2636 m->slid ? "yes" : "no", 2637 (int)prot); 2638 reject_page = cs_invalid_page((addr64_t) vaddr); 2639 } 2640 2641 if (reject_page) { 2642 /* reject the tainted page: abort the page fault */ 2643 int pid; 2644 const char *procname; 2645 task_t task; 2646 vm_object_t file_object, shadow; 2647 vm_object_offset_t file_offset; 2648 char *pathname, *filename; 2649 vm_size_t pathname_len, filename_len; 2650 boolean_t truncated_path; 2651#define __PATH_MAX 1024 2652 struct timespec mtime, cs_mtime; 2653 2654 kr = KERN_CODESIGN_ERROR; 2655 cs_enter_tainted_rejected++; 2656 2657 /* get process name and pid */ 2658 procname = "?"; 2659 task = current_task(); 2660 pid = proc_selfpid(); 2661 if (task->bsd_info != NULL) 2662 procname = proc_name_address(task->bsd_info); 2663 2664 /* get file's VM object */ 2665 file_object = m->object; 2666 file_offset = m->offset; 2667 for (shadow = file_object->shadow; 2668 shadow != VM_OBJECT_NULL; 2669 shadow = file_object->shadow) { 2670 vm_object_lock_shared(shadow); 2671 if (file_object != m->object) { 2672 vm_object_unlock(file_object); 2673 } 2674 file_offset += file_object->vo_shadow_offset; 2675 file_object = shadow; 2676 } 2677 2678 mtime.tv_sec = 0; 2679 mtime.tv_nsec = 0; 2680 cs_mtime.tv_sec = 0; 2681 cs_mtime.tv_nsec = 0; 2682 2683 /* get file's pathname and/or filename */ 2684 pathname = NULL; 2685 filename = NULL; 2686 pathname_len = 0; 2687 filename_len = 0; 2688 truncated_path = FALSE; 2689 if (file_object->pager == NULL) { 2690 /* no pager -> no file -> no pathname */ 2691 pathname = (char *) "<nil>"; 2692 } else { 2693 pathname = (char *)kalloc(__PATH_MAX * 2); 2694 if (pathname) { 2695 pathname_len = __PATH_MAX; 2696 filename = pathname + pathname_len; 2697 filename_len = __PATH_MAX; 2698 } 2699 vnode_pager_get_object_name(file_object->pager, 2700 pathname, 2701 pathname_len, 2702 filename, 2703 filename_len, 2704 &truncated_path); 2705 vnode_pager_get_object_mtime(file_object->pager, 2706 &mtime, 2707 &cs_mtime); 2708 } 2709 printf("CODE SIGNING: process %d[%s]: " 2710 "rejecting invalid page at address 0x%llx " 2711 "from offset 0x%llx in file \"%s%s%s\" " 2712 "(cs_mtime:%lu.%ld %s mtime:%lu.%ld) " 2713 "(signed:%d validated:%d tainted:%d " 2714 "wpmapped:%d slid:%d)\n", 2715 pid, procname, (addr64_t) vaddr, 2716 file_offset, 2717 pathname, 2718 (truncated_path ? "/.../" : ""), 2719 (truncated_path ? filename : ""), 2720 cs_mtime.tv_sec, cs_mtime.tv_nsec, 2721 ((cs_mtime.tv_sec == mtime.tv_sec && 2722 cs_mtime.tv_nsec == mtime.tv_nsec) 2723 ? "==" 2724 : "!="), 2725 mtime.tv_sec, mtime.tv_nsec, 2726 m->object->code_signed, 2727 m->cs_validated, 2728 m->cs_tainted, 2729 m->wpmapped, 2730 m->slid); 2731 if (file_object != m->object) { 2732 vm_object_unlock(file_object); 2733 } 2734 if (pathname_len != 0) { 2735 kfree(pathname, __PATH_MAX * 2); 2736 pathname = NULL; 2737 filename = NULL; 2738 } 2739 } else { 2740 /* proceed with the tainted page */ 2741 kr = KERN_SUCCESS; 2742 /* Page might have been tainted before or not; now it 2743 * definitively is. If the page wasn't tainted, we must 2744 * disconnect it from all pmaps later. */ 2745 must_disconnect = !m->cs_tainted; 2746 m->cs_tainted = TRUE; 2747 cs_enter_tainted_accepted++; 2748 } 2749 if (kr != KERN_SUCCESS) { 2750 if (cs_debug) { 2751 printf("CODESIGNING: vm_fault_enter(0x%llx): " 2752 "page %p obj %p off 0x%llx *** INVALID PAGE ***\n", 2753 (long long)vaddr, m, m->object, m->offset); 2754 } 2755#if !SECURE_KERNEL 2756 if (cs_enforcement_panic) { 2757 panic("CODESIGNING: panicking on invalid page\n"); 2758 } 2759#endif 2760 } 2761 2762 } else { 2763 /* proceed with the valid page */ 2764 kr = KERN_SUCCESS; 2765 } 2766 2767 boolean_t page_queues_locked = FALSE; 2768#define __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED() \ 2769MACRO_BEGIN \ 2770 if (! page_queues_locked) { \ 2771 page_queues_locked = TRUE; \ 2772 vm_page_lockspin_queues(); \ 2773 } \ 2774MACRO_END 2775#define __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED() \ 2776MACRO_BEGIN \ 2777 if (page_queues_locked) { \ 2778 page_queues_locked = FALSE; \ 2779 vm_page_unlock_queues(); \ 2780 } \ 2781MACRO_END 2782 2783 /* 2784 * Hold queues lock to manipulate 2785 * the page queues. Change wiring 2786 * case is obvious. 2787 */ 2788 assert(m->compressor || m->object != compressor_object); 2789 if (m->compressor) { 2790 /* 2791 * Compressor pages are neither wired 2792 * nor pageable and should never change. 2793 */ 2794 assert(m->object == compressor_object); 2795 } else if (change_wiring) { 2796 __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); 2797 2798 if (wired) { 2799 if (kr == KERN_SUCCESS) { 2800 vm_page_wire(m); 2801 } 2802 } else { 2803 vm_page_unwire(m, TRUE); 2804 } 2805 /* we keep the page queues lock, if we need it later */ 2806 2807 } else { 2808 if (kr != KERN_SUCCESS) { 2809 __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); 2810 vm_page_deactivate(m); 2811 /* we keep the page queues lock, if we need it later */ 2812 } else if (((!m->active && !m->inactive) || 2813 m->clean_queue || 2814 no_cache) && 2815 !VM_PAGE_WIRED(m) && !m->throttled) { 2816 2817 if (vm_page_local_q && 2818 !no_cache && 2819 (*type_of_fault == DBG_COW_FAULT || 2820 *type_of_fault == DBG_ZERO_FILL_FAULT) ) { 2821 struct vpl *lq; 2822 uint32_t lid; 2823 2824 __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED(); 2825 vm_object_lock_assert_exclusive(m->object); 2826 2827 /* 2828 * we got a local queue to stuff this 2829 * new page on... 2830 * its safe to manipulate local and 2831 * local_id at this point since we're 2832 * behind an exclusive object lock and 2833 * the page is not on any global queue. 2834 * 2835 * we'll use the current cpu number to 2836 * select the queue note that we don't 2837 * need to disable preemption... we're 2838 * going to behind the local queue's 2839 * lock to do the real work 2840 */ 2841 lid = cpu_number(); 2842 2843 lq = &vm_page_local_q[lid].vpl_un.vpl; 2844 2845 VPL_LOCK(&lq->vpl_lock); 2846 2847 queue_enter(&lq->vpl_queue, m, 2848 vm_page_t, pageq); 2849 m->local = TRUE; 2850 m->local_id = lid; 2851 lq->vpl_count++; 2852 2853 if (m->object->internal) 2854 lq->vpl_internal_count++; 2855 else 2856 lq->vpl_external_count++; 2857 2858 VPL_UNLOCK(&lq->vpl_lock); 2859 2860 if (lq->vpl_count > vm_page_local_q_soft_limit) 2861 { 2862 /* 2863 * we're beyond the soft limit 2864 * for the local queue 2865 * vm_page_reactivate_local will 2866 * 'try' to take the global page 2867 * queue lock... if it can't 2868 * that's ok... we'll let the 2869 * queue continue to grow up 2870 * to the hard limit... at that 2871 * point we'll wait for the 2872 * lock... once we've got the 2873 * lock, we'll transfer all of 2874 * the pages from the local 2875 * queue to the global active 2876 * queue 2877 */ 2878 vm_page_reactivate_local(lid, FALSE, FALSE); 2879 } 2880 } else { 2881 2882 __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); 2883 2884 /* 2885 * test again now that we hold the 2886 * page queue lock 2887 */ 2888 if (!VM_PAGE_WIRED(m)) { 2889 if (m->clean_queue) { 2890 VM_PAGE_QUEUES_REMOVE(m); 2891 2892 vm_pageout_cleaned_reactivated++; 2893 vm_pageout_cleaned_fault_reactivated++; 2894 } 2895 2896 if ((!m->active && 2897 !m->inactive) || 2898 no_cache) { 2899 /* 2900 * If this is a no_cache mapping 2901 * and the page has never been 2902 * mapped before or was 2903 * previously a no_cache page, 2904 * then we want to leave pages 2905 * in the speculative state so 2906 * that they can be readily 2907 * recycled if free memory runs 2908 * low. Otherwise the page is 2909 * activated as normal. 2910 */ 2911 2912 if (no_cache && 2913 (!previously_pmapped || 2914 m->no_cache)) { 2915 m->no_cache = TRUE; 2916 2917 if (!m->speculative) 2918 vm_page_speculate(m, FALSE); 2919 2920 } else if (!m->active && 2921 !m->inactive) { 2922 2923 vm_page_activate(m); 2924 } 2925 } 2926 } 2927 /* we keep the page queues lock, if we need it later */ 2928 } 2929 } 2930 } 2931 2932 if ((prot & VM_PROT_EXECUTE) && 2933 ! m->xpmapped) { 2934 2935 __VM_PAGE_LOCKSPIN_QUEUES_IF_NEEDED(); 2936 2937 /* 2938 * xpmapped is protected by the page queues lock 2939 * so it matters not that we might only hold the 2940 * object lock in the shared state 2941 */ 2942 2943 if (! m->xpmapped) { 2944 2945 m->xpmapped = TRUE; 2946 __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED(); 2947 2948 if ((COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) && 2949 m->object->internal && 2950 m->object->pager != NULL) { 2951 /* 2952 * This page could have been 2953 * uncompressed by the 2954 * compressor pager and its 2955 * contents might be only in 2956 * the data cache. 2957 * Since it's being mapped for 2958 * "execute" for the fist time, 2959 * make sure the icache is in 2960 * sync. 2961 */ 2962 pmap_sync_page_data_phys(m->phys_page); 2963 } 2964 2965 } 2966 } 2967 /* we're done with the page queues lock, if we ever took it */ 2968 __VM_PAGE_UNLOCK_QUEUES_IF_NEEDED(); 2969 2970 2971 /* If we have a KERN_SUCCESS from the previous checks, we either have 2972 * a good page, or a tainted page that has been accepted by the process. 2973 * In both cases the page will be entered into the pmap. 2974 * If the page is writeable, we need to disconnect it from other pmaps 2975 * now so those processes can take note. 2976 */ 2977 if (kr == KERN_SUCCESS) { 2978 /* 2979 * NOTE: we may only hold the vm_object lock SHARED 2980 * at this point, but the update of pmapped is ok 2981 * since this is the ONLY bit updated behind the SHARED 2982 * lock... however, we need to figure out how to do an atomic 2983 * update on a bit field to make this less fragile... right 2984 * now I don't know how to coerce 'C' to give me the offset info 2985 * that's needed for an AtomicCompareAndSwap 2986 */ 2987 m->pmapped = TRUE; 2988 if(vm_page_is_slideable(m)) { 2989 boolean_t was_busy = m->busy; 2990 2991 vm_object_lock_assert_exclusive(m->object); 2992 2993 m->busy = TRUE; 2994 kr = vm_page_slide(m, 0); 2995 assert(m->busy); 2996 if(!was_busy) { 2997 PAGE_WAKEUP_DONE(m); 2998 } 2999 if (kr != KERN_SUCCESS) { 3000 /* 3001 * This page has not been slid correctly, 3002 * do not do the pmap_enter() ! 3003 * Let vm_fault_enter() return the error 3004 * so the caller can fail the fault. 3005 */ 3006 goto after_the_pmap_enter; 3007 } 3008 } 3009 3010 if (fault_type & VM_PROT_WRITE) { 3011 3012 if (m->wpmapped == FALSE) { 3013 vm_object_lock_assert_exclusive(m->object); 3014 3015 m->wpmapped = TRUE; 3016 } 3017 if (must_disconnect) { 3018 /* 3019 * We can only get here 3020 * because of the CSE logic 3021 */ 3022 assert(cs_enforcement_enabled); 3023 pmap_disconnect(m->phys_page); 3024 /* 3025 * If we are faulting for a write, we can clear 3026 * the execute bit - that will ensure the page is 3027 * checked again before being executable, which 3028 * protects against a map switch. 3029 * This only happens the first time the page 3030 * gets tainted, so we won't get stuck here 3031 * to make an already writeable page executable. 3032 */ 3033 if (!cs_bypass){ 3034 prot &= ~VM_PROT_EXECUTE; 3035 } 3036 } 3037 } 3038 3039 /* Prevent a deadlock by not 3040 * holding the object lock if we need to wait for a page in 3041 * pmap_enter() - <rdar://problem/7138958> */ 3042 PMAP_ENTER_OPTIONS(pmap, vaddr, m, prot, fault_type, 0, 3043 wired, PMAP_OPTIONS_NOWAIT, pe_result); 3044 3045 if(pe_result == KERN_RESOURCE_SHORTAGE) { 3046 3047 if (need_retry) { 3048 /* 3049 * this will be non-null in the case where we hold the lock 3050 * on the top-object in this chain... we can't just drop 3051 * the lock on the object we're inserting the page into 3052 * and recall the PMAP_ENTER since we can still cause 3053 * a deadlock if one of the critical paths tries to 3054 * acquire the lock on the top-object and we're blocked 3055 * in PMAP_ENTER waiting for memory... our only recourse 3056 * is to deal with it at a higher level where we can 3057 * drop both locks. 3058 */ 3059 *need_retry = TRUE; 3060 vm_pmap_enter_retried++; 3061 goto after_the_pmap_enter; 3062 } 3063 /* The nonblocking version of pmap_enter did not succeed. 3064 * and we don't need to drop other locks and retry 3065 * at the level above us, so 3066 * use the blocking version instead. Requires marking 3067 * the page busy and unlocking the object */ 3068 boolean_t was_busy = m->busy; 3069 3070 vm_object_lock_assert_exclusive(m->object); 3071 3072 m->busy = TRUE; 3073 vm_object_unlock(m->object); 3074 3075 PMAP_ENTER(pmap, vaddr, m, prot, fault_type, 0, wired); 3076 3077 /* Take the object lock again. */ 3078 vm_object_lock(m->object); 3079 3080 /* If the page was busy, someone else will wake it up. 3081 * Otherwise, we have to do it now. */ 3082 assert(m->busy); 3083 if(!was_busy) { 3084 PAGE_WAKEUP_DONE(m); 3085 } 3086 vm_pmap_enter_blocked++; 3087 } 3088 } 3089 3090after_the_pmap_enter: 3091 return kr; 3092} 3093 3094 3095/* 3096 * Routine: vm_fault 3097 * Purpose: 3098 * Handle page faults, including pseudo-faults 3099 * used to change the wiring status of pages. 3100 * Returns: 3101 * Explicit continuations have been removed. 3102 * Implementation: 3103 * vm_fault and vm_fault_page save mucho state 3104 * in the moral equivalent of a closure. The state 3105 * structure is allocated when first entering vm_fault 3106 * and deallocated when leaving vm_fault. 3107 */ 3108 3109extern int _map_enter_debug; 3110 3111unsigned long vm_fault_collapse_total = 0; 3112unsigned long vm_fault_collapse_skipped = 0; 3113 3114 3115kern_return_t 3116vm_fault( 3117 vm_map_t map, 3118 vm_map_offset_t vaddr, 3119 vm_prot_t fault_type, 3120 boolean_t change_wiring, 3121 int interruptible, 3122 pmap_t caller_pmap, 3123 vm_map_offset_t caller_pmap_addr) 3124{ 3125 vm_map_version_t version; /* Map version for verificiation */ 3126 boolean_t wired; /* Should mapping be wired down? */ 3127 vm_object_t object; /* Top-level object */ 3128 vm_object_offset_t offset; /* Top-level offset */ 3129 vm_prot_t prot; /* Protection for mapping */ 3130 vm_object_t old_copy_object; /* Saved copy object */ 3131 vm_page_t result_page; /* Result of vm_fault_page */ 3132 vm_page_t top_page; /* Placeholder page */ 3133 kern_return_t kr; 3134 3135 vm_page_t m; /* Fast access to result_page */ 3136 kern_return_t error_code; 3137 vm_object_t cur_object; 3138 vm_object_offset_t cur_offset; 3139 vm_page_t cur_m; 3140 vm_object_t new_object; 3141 int type_of_fault; 3142 pmap_t pmap; 3143 boolean_t interruptible_state; 3144 vm_map_t real_map = map; 3145 vm_map_t original_map = map; 3146 vm_prot_t original_fault_type; 3147 struct vm_object_fault_info fault_info; 3148 boolean_t need_collapse = FALSE; 3149 boolean_t need_retry = FALSE; 3150 boolean_t *need_retry_ptr = NULL; 3151 int object_lock_type = 0; 3152 int cur_object_lock_type; 3153 vm_object_t top_object = VM_OBJECT_NULL; 3154 int throttle_delay; 3155 3156 3157 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 3158 (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_START, 3159 ((uint64_t)vaddr >> 32), 3160 vaddr, 3161 (map == kernel_map), 3162 0, 3163 0); 3164 3165 if (get_preemption_level() != 0) { 3166 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 3167 (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END, 3168 ((uint64_t)vaddr >> 32), 3169 vaddr, 3170 KERN_FAILURE, 3171 0, 3172 0); 3173 3174 return (KERN_FAILURE); 3175 } 3176 3177 interruptible_state = thread_interrupt_level(interruptible); 3178 3179 VM_STAT_INCR(faults); 3180 current_task()->faults++; 3181 original_fault_type = fault_type; 3182 3183 if (fault_type & VM_PROT_WRITE) 3184 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3185 else 3186 object_lock_type = OBJECT_LOCK_SHARED; 3187 3188 cur_object_lock_type = OBJECT_LOCK_SHARED; 3189 3190RetryFault: 3191 /* 3192 * assume we will hit a page in the cache 3193 * otherwise, explicitly override with 3194 * the real fault type once we determine it 3195 */ 3196 type_of_fault = DBG_CACHE_HIT_FAULT; 3197 3198 /* 3199 * Find the backing store object and offset into 3200 * it to begin the search. 3201 */ 3202 fault_type = original_fault_type; 3203 map = original_map; 3204 vm_map_lock_read(map); 3205 3206 kr = vm_map_lookup_locked(&map, vaddr, fault_type, 3207 object_lock_type, &version, 3208 &object, &offset, &prot, &wired, 3209 &fault_info, 3210 &real_map); 3211 3212 if (kr != KERN_SUCCESS) { 3213 vm_map_unlock_read(map); 3214 goto done; 3215 } 3216 pmap = real_map->pmap; 3217 fault_info.interruptible = interruptible; 3218 fault_info.stealth = FALSE; 3219 fault_info.io_sync = FALSE; 3220 fault_info.mark_zf_absent = FALSE; 3221 fault_info.batch_pmap_op = FALSE; 3222 3223 /* 3224 * If the page is wired, we must fault for the current protection 3225 * value, to avoid further faults. 3226 */ 3227 if (wired) { 3228 fault_type = prot | VM_PROT_WRITE; 3229 /* 3230 * since we're treating this fault as a 'write' 3231 * we must hold the top object lock exclusively 3232 */ 3233 if (object_lock_type == OBJECT_LOCK_SHARED) { 3234 3235 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3236 3237 if (vm_object_lock_upgrade(object) == FALSE) { 3238 /* 3239 * couldn't upgrade, so explictly 3240 * take the lock exclusively 3241 */ 3242 vm_object_lock(object); 3243 } 3244 } 3245 } 3246 3247#if VM_FAULT_CLASSIFY 3248 /* 3249 * Temporary data gathering code 3250 */ 3251 vm_fault_classify(object, offset, fault_type); 3252#endif 3253 /* 3254 * Fast fault code. The basic idea is to do as much as 3255 * possible while holding the map lock and object locks. 3256 * Busy pages are not used until the object lock has to 3257 * be dropped to do something (copy, zero fill, pmap enter). 3258 * Similarly, paging references aren't acquired until that 3259 * point, and object references aren't used. 3260 * 3261 * If we can figure out what to do 3262 * (zero fill, copy on write, pmap enter) while holding 3263 * the locks, then it gets done. Otherwise, we give up, 3264 * and use the original fault path (which doesn't hold 3265 * the map lock, and relies on busy pages). 3266 * The give up cases include: 3267 * - Have to talk to pager. 3268 * - Page is busy, absent or in error. 3269 * - Pager has locked out desired access. 3270 * - Fault needs to be restarted. 3271 * - Have to push page into copy object. 3272 * 3273 * The code is an infinite loop that moves one level down 3274 * the shadow chain each time. cur_object and cur_offset 3275 * refer to the current object being examined. object and offset 3276 * are the original object from the map. The loop is at the 3277 * top level if and only if object and cur_object are the same. 3278 * 3279 * Invariants: Map lock is held throughout. Lock is held on 3280 * original object and cur_object (if different) when 3281 * continuing or exiting loop. 3282 * 3283 */ 3284 3285 3286 /* 3287 * If this page is to be inserted in a copy delay object 3288 * for writing, and if the object has a copy, then the 3289 * copy delay strategy is implemented in the slow fault page. 3290 */ 3291 if (object->copy_strategy == MEMORY_OBJECT_COPY_DELAY && 3292 object->copy != VM_OBJECT_NULL && (fault_type & VM_PROT_WRITE)) 3293 goto handle_copy_delay; 3294 3295 cur_object = object; 3296 cur_offset = offset; 3297 3298 while (TRUE) { 3299 if (!cur_object->pager_created && 3300 cur_object->phys_contiguous) /* superpage */ 3301 break; 3302 3303 if (cur_object->blocked_access) { 3304 /* 3305 * Access to this VM object has been blocked. 3306 * Let the slow path handle it. 3307 */ 3308 break; 3309 } 3310 3311 m = vm_page_lookup(cur_object, cur_offset); 3312 3313 if (m != VM_PAGE_NULL) { 3314 if (m->busy) { 3315 wait_result_t result; 3316 3317 /* 3318 * in order to do the PAGE_ASSERT_WAIT, we must 3319 * have object that 'm' belongs to locked exclusively 3320 */ 3321 if (object != cur_object) { 3322 3323 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3324 3325 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3326 3327 if (vm_object_lock_upgrade(cur_object) == FALSE) { 3328 /* 3329 * couldn't upgrade so go do a full retry 3330 * immediately since we can no longer be 3331 * certain about cur_object (since we 3332 * don't hold a reference on it)... 3333 * first drop the top object lock 3334 */ 3335 vm_object_unlock(object); 3336 3337 vm_map_unlock_read(map); 3338 if (real_map != map) 3339 vm_map_unlock(real_map); 3340 3341 goto RetryFault; 3342 } 3343 } 3344 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3345 3346 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3347 3348 if (vm_object_lock_upgrade(object) == FALSE) { 3349 /* 3350 * couldn't upgrade, so explictly take the lock 3351 * exclusively and go relookup the page since we 3352 * will have dropped the object lock and 3353 * a different thread could have inserted 3354 * a page at this offset 3355 * no need for a full retry since we're 3356 * at the top level of the object chain 3357 */ 3358 vm_object_lock(object); 3359 3360 continue; 3361 } 3362 } 3363 if (m->pageout_queue && m->object->internal && COMPRESSED_PAGER_IS_ACTIVE) { 3364 /* 3365 * m->busy == TRUE and the object is locked exclusively 3366 * if m->pageout_queue == TRUE after we acquire the 3367 * queues lock, we are guaranteed that it is stable on 3368 * the pageout queue and therefore reclaimable 3369 * 3370 * NOTE: this is only true for the internal pageout queue 3371 * in the compressor world 3372 */ 3373 vm_page_lock_queues(); 3374 3375 if (m->pageout_queue) { 3376 vm_pageout_throttle_up(m); 3377 vm_page_unlock_queues(); 3378 3379 PAGE_WAKEUP_DONE(m); 3380 goto reclaimed_from_pageout; 3381 } 3382 vm_page_unlock_queues(); 3383 } 3384 if (object != cur_object) 3385 vm_object_unlock(object); 3386 3387 vm_map_unlock_read(map); 3388 if (real_map != map) 3389 vm_map_unlock(real_map); 3390 3391 result = PAGE_ASSERT_WAIT(m, interruptible); 3392 3393 vm_object_unlock(cur_object); 3394 3395 if (result == THREAD_WAITING) { 3396 result = thread_block(THREAD_CONTINUE_NULL); 3397 3398 counter(c_vm_fault_page_block_busy_kernel++); 3399 } 3400 if (result == THREAD_AWAKENED || result == THREAD_RESTART) 3401 goto RetryFault; 3402 3403 kr = KERN_ABORTED; 3404 goto done; 3405 } 3406reclaimed_from_pageout: 3407 if (m->laundry) { 3408 if (object != cur_object) { 3409 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3410 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3411 3412 vm_object_unlock(object); 3413 vm_object_unlock(cur_object); 3414 3415 vm_map_unlock_read(map); 3416 if (real_map != map) 3417 vm_map_unlock(real_map); 3418 3419 goto RetryFault; 3420 } 3421 3422 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3423 3424 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3425 3426 if (vm_object_lock_upgrade(object) == FALSE) { 3427 /* 3428 * couldn't upgrade, so explictly take the lock 3429 * exclusively and go relookup the page since we 3430 * will have dropped the object lock and 3431 * a different thread could have inserted 3432 * a page at this offset 3433 * no need for a full retry since we're 3434 * at the top level of the object chain 3435 */ 3436 vm_object_lock(object); 3437 3438 continue; 3439 } 3440 } 3441 m->pageout = FALSE; 3442 3443 vm_pageout_steal_laundry(m, FALSE); 3444 } 3445 3446 if (m->phys_page == vm_page_guard_addr) { 3447 /* 3448 * Guard page: let the slow path deal with it 3449 */ 3450 break; 3451 } 3452 if (m->unusual && (m->error || m->restart || m->private || m->absent)) { 3453 /* 3454 * Unusual case... let the slow path deal with it 3455 */ 3456 break; 3457 } 3458 if (VM_OBJECT_PURGEABLE_FAULT_ERROR(m->object)) { 3459 if (object != cur_object) 3460 vm_object_unlock(object); 3461 vm_map_unlock_read(map); 3462 if (real_map != map) 3463 vm_map_unlock(real_map); 3464 vm_object_unlock(cur_object); 3465 kr = KERN_MEMORY_ERROR; 3466 goto done; 3467 } 3468 3469 if (m->encrypted) { 3470 /* 3471 * ENCRYPTED SWAP: 3472 * We've soft-faulted (because it's not in the page 3473 * table) on an encrypted page. 3474 * Keep the page "busy" so that no one messes with 3475 * it during the decryption. 3476 * Release the extra locks we're holding, keep only 3477 * the page's VM object lock. 3478 * 3479 * in order to set 'busy' on 'm', we must 3480 * have object that 'm' belongs to locked exclusively 3481 */ 3482 if (object != cur_object) { 3483 vm_object_unlock(object); 3484 3485 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3486 3487 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3488 3489 if (vm_object_lock_upgrade(cur_object) == FALSE) { 3490 /* 3491 * couldn't upgrade so go do a full retry 3492 * immediately since we've already dropped 3493 * the top object lock associated with this page 3494 * and the current one got dropped due to the 3495 * failed upgrade... the state is no longer valid 3496 */ 3497 vm_map_unlock_read(map); 3498 if (real_map != map) 3499 vm_map_unlock(real_map); 3500 3501 goto RetryFault; 3502 } 3503 } 3504 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3505 3506 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3507 3508 if (vm_object_lock_upgrade(object) == FALSE) { 3509 /* 3510 * couldn't upgrade, so explictly take the lock 3511 * exclusively and go relookup the page since we 3512 * will have dropped the object lock and 3513 * a different thread could have inserted 3514 * a page at this offset 3515 * no need for a full retry since we're 3516 * at the top level of the object chain 3517 */ 3518 vm_object_lock(object); 3519 3520 continue; 3521 } 3522 } 3523 m->busy = TRUE; 3524 3525 vm_map_unlock_read(map); 3526 if (real_map != map) 3527 vm_map_unlock(real_map); 3528 3529 vm_page_decrypt(m, 0); 3530 3531 assert(m->busy); 3532 PAGE_WAKEUP_DONE(m); 3533 3534 vm_object_unlock(cur_object); 3535 /* 3536 * Retry from the top, in case anything 3537 * changed while we were decrypting... 3538 */ 3539 goto RetryFault; 3540 } 3541 ASSERT_PAGE_DECRYPTED(m); 3542 3543 if(vm_page_is_slideable(m)) { 3544 /* 3545 * We might need to slide this page, and so, 3546 * we want to hold the VM object exclusively. 3547 */ 3548 if (object != cur_object) { 3549 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3550 vm_object_unlock(object); 3551 vm_object_unlock(cur_object); 3552 3553 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3554 3555 vm_map_unlock_read(map); 3556 if (real_map != map) 3557 vm_map_unlock(real_map); 3558 3559 goto RetryFault; 3560 } 3561 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3562 3563 vm_object_unlock(object); 3564 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3565 vm_map_unlock_read(map); 3566 goto RetryFault; 3567 } 3568 } 3569 3570 if (VM_FAULT_NEED_CS_VALIDATION(map->pmap, m)) { 3571upgrade_for_validation: 3572 /* 3573 * We might need to validate this page 3574 * against its code signature, so we 3575 * want to hold the VM object exclusively. 3576 */ 3577 if (object != cur_object) { 3578 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3579 vm_object_unlock(object); 3580 vm_object_unlock(cur_object); 3581 3582 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3583 3584 vm_map_unlock_read(map); 3585 if (real_map != map) 3586 vm_map_unlock(real_map); 3587 3588 goto RetryFault; 3589 } 3590 3591 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3592 3593 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3594 3595 if (vm_object_lock_upgrade(object) == FALSE) { 3596 /* 3597 * couldn't upgrade, so explictly take the lock 3598 * exclusively and go relookup the page since we 3599 * will have dropped the object lock and 3600 * a different thread could have inserted 3601 * a page at this offset 3602 * no need for a full retry since we're 3603 * at the top level of the object chain 3604 */ 3605 vm_object_lock(object); 3606 3607 continue; 3608 } 3609 } 3610 } 3611 /* 3612 * Two cases of map in faults: 3613 * - At top level w/o copy object. 3614 * - Read fault anywhere. 3615 * --> must disallow write. 3616 */ 3617 3618 if (object == cur_object && object->copy == VM_OBJECT_NULL) { 3619 3620 goto FastPmapEnter; 3621 } 3622 3623 if ((fault_type & VM_PROT_WRITE) == 0) { 3624 3625 if (object != cur_object) { 3626 /* 3627 * We still need to hold the top object 3628 * lock here to prevent a race between 3629 * a read fault (taking only "shared" 3630 * locks) and a write fault (taking 3631 * an "exclusive" lock on the top 3632 * object. 3633 * Otherwise, as soon as we release the 3634 * top lock, the write fault could 3635 * proceed and actually complete before 3636 * the read fault, and the copied page's 3637 * translation could then be overwritten 3638 * by the read fault's translation for 3639 * the original page. 3640 * 3641 * Let's just record what the top object 3642 * is and we'll release it later. 3643 */ 3644 top_object = object; 3645 3646 /* 3647 * switch to the object that has the new page 3648 */ 3649 object = cur_object; 3650 object_lock_type = cur_object_lock_type; 3651 } 3652FastPmapEnter: 3653 /* 3654 * prepare for the pmap_enter... 3655 * object and map are both locked 3656 * m contains valid data 3657 * object == m->object 3658 * cur_object == NULL or it's been unlocked 3659 * no paging references on either object or cur_object 3660 */ 3661 if (top_object != VM_OBJECT_NULL || object_lock_type != OBJECT_LOCK_EXCLUSIVE) 3662 need_retry_ptr = &need_retry; 3663 else 3664 need_retry_ptr = NULL; 3665 3666 if (caller_pmap) { 3667 kr = vm_fault_enter(m, 3668 caller_pmap, 3669 caller_pmap_addr, 3670 prot, 3671 fault_type, 3672 wired, 3673 change_wiring, 3674 fault_info.no_cache, 3675 fault_info.cs_bypass, 3676 need_retry_ptr, 3677 &type_of_fault); 3678 } else { 3679 kr = vm_fault_enter(m, 3680 pmap, 3681 vaddr, 3682 prot, 3683 fault_type, 3684 wired, 3685 change_wiring, 3686 fault_info.no_cache, 3687 fault_info.cs_bypass, 3688 need_retry_ptr, 3689 &type_of_fault); 3690 } 3691 3692 if (top_object != VM_OBJECT_NULL) { 3693 /* 3694 * It's safe to drop the top object 3695 * now that we've done our 3696 * vm_fault_enter(). Any other fault 3697 * in progress for that virtual 3698 * address will either find our page 3699 * and translation or put in a new page 3700 * and translation. 3701 */ 3702 vm_object_unlock(top_object); 3703 top_object = VM_OBJECT_NULL; 3704 } 3705 3706 if (need_collapse == TRUE) 3707 vm_object_collapse(object, offset, TRUE); 3708 3709 if (need_retry == FALSE && 3710 (type_of_fault == DBG_PAGEIND_FAULT || type_of_fault == DBG_PAGEINV_FAULT || type_of_fault == DBG_CACHE_HIT_FAULT)) { 3711 /* 3712 * evaluate access pattern and update state 3713 * vm_fault_deactivate_behind depends on the 3714 * state being up to date 3715 */ 3716 vm_fault_is_sequential(object, cur_offset, fault_info.behavior); 3717 3718 vm_fault_deactivate_behind(object, cur_offset, fault_info.behavior); 3719 } 3720 /* 3721 * That's it, clean up and return. 3722 */ 3723 if (m->busy) 3724 PAGE_WAKEUP_DONE(m); 3725 3726 vm_object_unlock(object); 3727 3728 vm_map_unlock_read(map); 3729 if (real_map != map) 3730 vm_map_unlock(real_map); 3731 3732 if (need_retry == TRUE) { 3733 /* 3734 * vm_fault_enter couldn't complete the PMAP_ENTER... 3735 * at this point we don't hold any locks so it's safe 3736 * to ask the pmap layer to expand the page table to 3737 * accommodate this mapping... once expanded, we'll 3738 * re-drive the fault which should result in vm_fault_enter 3739 * being able to successfully enter the mapping this time around 3740 */ 3741 (void)pmap_enter_options(pmap, vaddr, 0, 0, 0, 0, 0, PMAP_OPTIONS_NOENTER, NULL); 3742 3743 need_retry = FALSE; 3744 goto RetryFault; 3745 } 3746 goto done; 3747 } 3748 /* 3749 * COPY ON WRITE FAULT 3750 */ 3751 assert(object_lock_type == OBJECT_LOCK_EXCLUSIVE); 3752 3753 if ((throttle_delay = vm_page_throttled())) { 3754 /* 3755 * drop all of our locks... 3756 * wait until the free queue is 3757 * pumped back up and then 3758 * redrive the fault 3759 */ 3760 if (object != cur_object) 3761 vm_object_unlock(cur_object); 3762 vm_object_unlock(object); 3763 vm_map_unlock_read(map); 3764 if (real_map != map) 3765 vm_map_unlock(real_map); 3766 3767 VM_DEBUG_EVENT(vmf_cowdelay, VMF_COWDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0); 3768 3769 delay(throttle_delay); 3770 3771 if (!current_thread_aborted() && vm_page_wait((change_wiring) ? 3772 THREAD_UNINT : 3773 THREAD_ABORTSAFE)) 3774 goto RetryFault; 3775 kr = KERN_ABORTED; 3776 goto done; 3777 } 3778 /* 3779 * If objects match, then 3780 * object->copy must not be NULL (else control 3781 * would be in previous code block), and we 3782 * have a potential push into the copy object 3783 * with which we can't cope with here. 3784 */ 3785 if (cur_object == object) { 3786 /* 3787 * must take the slow path to 3788 * deal with the copy push 3789 */ 3790 break; 3791 } 3792 3793 /* 3794 * This is now a shadow based copy on write 3795 * fault -- it requires a copy up the shadow 3796 * chain. 3797 */ 3798 3799 if ((cur_object_lock_type == OBJECT_LOCK_SHARED) && 3800 VM_FAULT_NEED_CS_VALIDATION(NULL, m)) { 3801 goto upgrade_for_validation; 3802 } 3803 3804 /* 3805 * Allocate a page in the original top level 3806 * object. Give up if allocate fails. Also 3807 * need to remember current page, as it's the 3808 * source of the copy. 3809 * 3810 * at this point we hold locks on both 3811 * object and cur_object... no need to take 3812 * paging refs or mark pages BUSY since 3813 * we don't drop either object lock until 3814 * the page has been copied and inserted 3815 */ 3816 cur_m = m; 3817 m = vm_page_grab(); 3818 3819 if (m == VM_PAGE_NULL) { 3820 /* 3821 * no free page currently available... 3822 * must take the slow path 3823 */ 3824 break; 3825 } 3826 /* 3827 * Now do the copy. Mark the source page busy... 3828 * 3829 * NOTE: This code holds the map lock across 3830 * the page copy. 3831 */ 3832 vm_page_copy(cur_m, m); 3833 vm_page_insert(m, object, offset); 3834 SET_PAGE_DIRTY(m, FALSE); 3835 3836 /* 3837 * Now cope with the source page and object 3838 */ 3839 if (object->ref_count > 1 && cur_m->pmapped) 3840 pmap_disconnect(cur_m->phys_page); 3841 3842 need_collapse = TRUE; 3843 3844 if (!cur_object->internal && 3845 cur_object->copy_strategy == MEMORY_OBJECT_COPY_DELAY) { 3846 /* 3847 * The object from which we've just 3848 * copied a page is most probably backed 3849 * by a vnode. We don't want to waste too 3850 * much time trying to collapse the VM objects 3851 * and create a bottleneck when several tasks 3852 * map the same file. 3853 */ 3854 if (cur_object->copy == object) { 3855 /* 3856 * Shared mapping or no COW yet. 3857 * We can never collapse a copy 3858 * object into its backing object. 3859 */ 3860 need_collapse = FALSE; 3861 } else if (cur_object->copy == object->shadow && 3862 object->shadow->resident_page_count == 0) { 3863 /* 3864 * Shared mapping after a COW occurred. 3865 */ 3866 need_collapse = FALSE; 3867 } 3868 } 3869 vm_object_unlock(cur_object); 3870 3871 if (need_collapse == FALSE) 3872 vm_fault_collapse_skipped++; 3873 vm_fault_collapse_total++; 3874 3875 type_of_fault = DBG_COW_FAULT; 3876 VM_STAT_INCR(cow_faults); 3877 DTRACE_VM2(cow_fault, int, 1, (uint64_t *), NULL); 3878 current_task()->cow_faults++; 3879 3880 goto FastPmapEnter; 3881 3882 } else { 3883 /* 3884 * No page at cur_object, cur_offset... m == NULL 3885 */ 3886 if (cur_object->pager_created) { 3887 int compressor_external_state = VM_EXTERNAL_STATE_UNKNOWN; 3888 3889 if (MUST_ASK_PAGER(cur_object, cur_offset, compressor_external_state) == TRUE) { 3890 int my_fault_type; 3891 int c_flags = C_DONT_BLOCK; 3892 boolean_t insert_cur_object = FALSE; 3893 3894 /* 3895 * May have to talk to a pager... 3896 * if so, take the slow path by 3897 * doing a 'break' from the while (TRUE) loop 3898 * 3899 * external_state will only be set to VM_EXTERNAL_STATE_EXISTS 3900 * if the compressor is active and the page exists there 3901 */ 3902 if (compressor_external_state != VM_EXTERNAL_STATE_EXISTS) 3903 break; 3904 3905 if (map == kernel_map || real_map == kernel_map) { 3906 /* 3907 * can't call into the compressor with the kernel_map 3908 * lock held, since the compressor may try to operate 3909 * on the kernel map in order to return an empty c_segment 3910 */ 3911 break; 3912 } 3913 if (object != cur_object) { 3914 if (fault_type & VM_PROT_WRITE) 3915 c_flags |= C_KEEP; 3916 else 3917 insert_cur_object = TRUE; 3918 } 3919 if (insert_cur_object == TRUE) { 3920 3921 if (cur_object_lock_type == OBJECT_LOCK_SHARED) { 3922 3923 cur_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3924 3925 if (vm_object_lock_upgrade(cur_object) == FALSE) { 3926 /* 3927 * couldn't upgrade so go do a full retry 3928 * immediately since we can no longer be 3929 * certain about cur_object (since we 3930 * don't hold a reference on it)... 3931 * first drop the top object lock 3932 */ 3933 vm_object_unlock(object); 3934 3935 vm_map_unlock_read(map); 3936 if (real_map != map) 3937 vm_map_unlock(real_map); 3938 3939 goto RetryFault; 3940 } 3941 } 3942 } else if (object_lock_type == OBJECT_LOCK_SHARED) { 3943 3944 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 3945 3946 if (object != cur_object) { 3947 /* 3948 * we can't go for the upgrade on the top 3949 * lock since the upgrade may block waiting 3950 * for readers to drain... since we hold 3951 * cur_object locked at this point, waiting 3952 * for the readers to drain would represent 3953 * a lock order inversion since the lock order 3954 * for objects is the reference order in the 3955 * shadown chain 3956 */ 3957 vm_object_unlock(object); 3958 vm_object_unlock(cur_object); 3959 3960 vm_map_unlock_read(map); 3961 if (real_map != map) 3962 vm_map_unlock(real_map); 3963 3964 goto RetryFault; 3965 } 3966 if (vm_object_lock_upgrade(object) == FALSE) { 3967 /* 3968 * couldn't upgrade, so explictly take the lock 3969 * exclusively and go relookup the page since we 3970 * will have dropped the object lock and 3971 * a different thread could have inserted 3972 * a page at this offset 3973 * no need for a full retry since we're 3974 * at the top level of the object chain 3975 */ 3976 vm_object_lock(object); 3977 3978 continue; 3979 } 3980 } 3981 m = vm_page_grab(); 3982 3983 if (m == VM_PAGE_NULL) { 3984 /* 3985 * no free page currently available... 3986 * must take the slow path 3987 */ 3988 break; 3989 } 3990 if (vm_compressor_pager_get(cur_object->pager, cur_offset + cur_object->paging_offset, 3991 m->phys_page, &my_fault_type, c_flags) != KERN_SUCCESS) { 3992 vm_page_release(m); 3993 break; 3994 } 3995 m->dirty = TRUE; 3996 3997 if (insert_cur_object) 3998 vm_page_insert(m, cur_object, cur_offset); 3999 else 4000 vm_page_insert(m, object, offset); 4001 4002 if ((m->object->wimg_bits & VM_WIMG_MASK) != VM_WIMG_USE_DEFAULT) { 4003 /* 4004 * If the page is not cacheable, 4005 * we can't let its contents 4006 * linger in the data cache 4007 * after the decompression. 4008 */ 4009 pmap_sync_page_attributes_phys(m->phys_page); 4010 } 4011 type_of_fault = my_fault_type; 4012 4013 VM_STAT_INCR(decompressions); 4014 4015 if (cur_object != object) { 4016 if (insert_cur_object) { 4017 top_object = object; 4018 /* 4019 * switch to the object that has the new page 4020 */ 4021 object = cur_object; 4022 object_lock_type = cur_object_lock_type; 4023 } else { 4024 vm_object_unlock(cur_object); 4025 cur_object = object; 4026 } 4027 } 4028 goto FastPmapEnter; 4029 } 4030 /* 4031 * existence map present and indicates 4032 * that the pager doesn't have this page 4033 */ 4034 } 4035 if (cur_object->shadow == VM_OBJECT_NULL) { 4036 /* 4037 * Zero fill fault. Page gets 4038 * inserted into the original object. 4039 */ 4040 if (cur_object->shadow_severed || 4041 VM_OBJECT_PURGEABLE_FAULT_ERROR(cur_object)) 4042 { 4043 if (object != cur_object) 4044 vm_object_unlock(cur_object); 4045 vm_object_unlock(object); 4046 4047 vm_map_unlock_read(map); 4048 if (real_map != map) 4049 vm_map_unlock(real_map); 4050 4051 kr = KERN_MEMORY_ERROR; 4052 goto done; 4053 } 4054 if ((throttle_delay = vm_page_throttled())) { 4055 /* 4056 * drop all of our locks... 4057 * wait until the free queue is 4058 * pumped back up and then 4059 * redrive the fault 4060 */ 4061 if (object != cur_object) 4062 vm_object_unlock(cur_object); 4063 vm_object_unlock(object); 4064 vm_map_unlock_read(map); 4065 if (real_map != map) 4066 vm_map_unlock(real_map); 4067 4068 VM_DEBUG_EVENT(vmf_zfdelay, VMF_ZFDELAY, DBG_FUNC_NONE, throttle_delay, 0, 0, 0); 4069 4070 delay(throttle_delay); 4071 4072 if (!current_thread_aborted() && vm_page_wait((change_wiring) ? 4073 THREAD_UNINT : 4074 THREAD_ABORTSAFE)) 4075 goto RetryFault; 4076 kr = KERN_ABORTED; 4077 goto done; 4078 } 4079 if (vm_backing_store_low) { 4080 /* 4081 * we are protecting the system from 4082 * backing store exhaustion... 4083 * must take the slow path if we're 4084 * not privileged 4085 */ 4086 if (!(current_task()->priv_flags & VM_BACKING_STORE_PRIV)) 4087 break; 4088 } 4089 if (cur_object != object) { 4090 vm_object_unlock(cur_object); 4091 4092 cur_object = object; 4093 } 4094 if (object_lock_type == OBJECT_LOCK_SHARED) { 4095 4096 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 4097 4098 if (vm_object_lock_upgrade(object) == FALSE) { 4099 /* 4100 * couldn't upgrade so do a full retry on the fault 4101 * since we dropped the object lock which 4102 * could allow another thread to insert 4103 * a page at this offset 4104 */ 4105 vm_map_unlock_read(map); 4106 if (real_map != map) 4107 vm_map_unlock(real_map); 4108 4109 goto RetryFault; 4110 } 4111 } 4112 m = vm_page_alloc(object, offset); 4113 4114 if (m == VM_PAGE_NULL) { 4115 /* 4116 * no free page currently available... 4117 * must take the slow path 4118 */ 4119 break; 4120 } 4121 4122 /* 4123 * Now zero fill page... 4124 * the page is probably going to 4125 * be written soon, so don't bother 4126 * to clear the modified bit 4127 * 4128 * NOTE: This code holds the map 4129 * lock across the zero fill. 4130 */ 4131 type_of_fault = vm_fault_zero_page(m, map->no_zero_fill); 4132 4133 goto FastPmapEnter; 4134 } 4135 /* 4136 * On to the next level in the shadow chain 4137 */ 4138 cur_offset += cur_object->vo_shadow_offset; 4139 new_object = cur_object->shadow; 4140 4141 /* 4142 * take the new_object's lock with the indicated state 4143 */ 4144 if (cur_object_lock_type == OBJECT_LOCK_SHARED) 4145 vm_object_lock_shared(new_object); 4146 else 4147 vm_object_lock(new_object); 4148 4149 if (cur_object != object) 4150 vm_object_unlock(cur_object); 4151 4152 cur_object = new_object; 4153 4154 continue; 4155 } 4156 } 4157 /* 4158 * Cleanup from fast fault failure. Drop any object 4159 * lock other than original and drop map lock. 4160 */ 4161 if (object != cur_object) 4162 vm_object_unlock(cur_object); 4163 4164 /* 4165 * must own the object lock exclusively at this point 4166 */ 4167 if (object_lock_type == OBJECT_LOCK_SHARED) { 4168 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 4169 4170 if (vm_object_lock_upgrade(object) == FALSE) { 4171 /* 4172 * couldn't upgrade, so explictly 4173 * take the lock exclusively 4174 * no need to retry the fault at this 4175 * point since "vm_fault_page" will 4176 * completely re-evaluate the state 4177 */ 4178 vm_object_lock(object); 4179 } 4180 } 4181 4182handle_copy_delay: 4183 vm_map_unlock_read(map); 4184 if (real_map != map) 4185 vm_map_unlock(real_map); 4186 4187 /* 4188 * Make a reference to this object to 4189 * prevent its disposal while we are messing with 4190 * it. Once we have the reference, the map is free 4191 * to be diddled. Since objects reference their 4192 * shadows (and copies), they will stay around as well. 4193 */ 4194 vm_object_reference_locked(object); 4195 vm_object_paging_begin(object); 4196 4197 XPR(XPR_VM_FAULT,"vm_fault -> vm_fault_page\n",0,0,0,0,0); 4198 4199 error_code = 0; 4200 4201 result_page = VM_PAGE_NULL; 4202 kr = vm_fault_page(object, offset, fault_type, 4203 (change_wiring && !wired), 4204 FALSE, /* page not looked up */ 4205 &prot, &result_page, &top_page, 4206 &type_of_fault, 4207 &error_code, map->no_zero_fill, 4208 FALSE, &fault_info); 4209 4210 /* 4211 * if kr != VM_FAULT_SUCCESS, then the paging reference 4212 * has been dropped and the object unlocked... the ref_count 4213 * is still held 4214 * 4215 * if kr == VM_FAULT_SUCCESS, then the paging reference 4216 * is still held along with the ref_count on the original object 4217 * 4218 * the object is returned locked with a paging reference 4219 * 4220 * if top_page != NULL, then it's BUSY and the 4221 * object it belongs to has a paging reference 4222 * but is returned unlocked 4223 */ 4224 if (kr != VM_FAULT_SUCCESS && 4225 kr != VM_FAULT_SUCCESS_NO_VM_PAGE) { 4226 /* 4227 * we didn't succeed, lose the object reference immediately. 4228 */ 4229 vm_object_deallocate(object); 4230 4231 /* 4232 * See why we failed, and take corrective action. 4233 */ 4234 switch (kr) { 4235 case VM_FAULT_MEMORY_SHORTAGE: 4236 if (vm_page_wait((change_wiring) ? 4237 THREAD_UNINT : 4238 THREAD_ABORTSAFE)) 4239 goto RetryFault; 4240 /* 4241 * fall thru 4242 */ 4243 case VM_FAULT_INTERRUPTED: 4244 kr = KERN_ABORTED; 4245 goto done; 4246 case VM_FAULT_RETRY: 4247 goto RetryFault; 4248 case VM_FAULT_MEMORY_ERROR: 4249 if (error_code) 4250 kr = error_code; 4251 else 4252 kr = KERN_MEMORY_ERROR; 4253 goto done; 4254 default: 4255 panic("vm_fault: unexpected error 0x%x from " 4256 "vm_fault_page()\n", kr); 4257 } 4258 } 4259 m = result_page; 4260 4261 if (m != VM_PAGE_NULL) { 4262 assert((change_wiring && !wired) ? 4263 (top_page == VM_PAGE_NULL) : 4264 ((top_page == VM_PAGE_NULL) == (m->object == object))); 4265 } 4266 4267 /* 4268 * What to do with the resulting page from vm_fault_page 4269 * if it doesn't get entered into the physical map: 4270 */ 4271#define RELEASE_PAGE(m) \ 4272 MACRO_BEGIN \ 4273 PAGE_WAKEUP_DONE(m); \ 4274 if (!m->active && !m->inactive && !m->throttled) { \ 4275 vm_page_lockspin_queues(); \ 4276 if (!m->active && !m->inactive && !m->throttled) \ 4277 vm_page_activate(m); \ 4278 vm_page_unlock_queues(); \ 4279 } \ 4280 MACRO_END 4281 4282 /* 4283 * We must verify that the maps have not changed 4284 * since our last lookup. 4285 */ 4286 if (m != VM_PAGE_NULL) { 4287 old_copy_object = m->object->copy; 4288 vm_object_unlock(m->object); 4289 } else { 4290 old_copy_object = VM_OBJECT_NULL; 4291 vm_object_unlock(object); 4292 } 4293 4294 /* 4295 * no object locks are held at this point 4296 */ 4297 if ((map != original_map) || !vm_map_verify(map, &version)) { 4298 vm_object_t retry_object; 4299 vm_object_offset_t retry_offset; 4300 vm_prot_t retry_prot; 4301 4302 /* 4303 * To avoid trying to write_lock the map while another 4304 * thread has it read_locked (in vm_map_pageable), we 4305 * do not try for write permission. If the page is 4306 * still writable, we will get write permission. If it 4307 * is not, or has been marked needs_copy, we enter the 4308 * mapping without write permission, and will merely 4309 * take another fault. 4310 */ 4311 map = original_map; 4312 vm_map_lock_read(map); 4313 4314 kr = vm_map_lookup_locked(&map, vaddr, 4315 fault_type & ~VM_PROT_WRITE, 4316 OBJECT_LOCK_EXCLUSIVE, &version, 4317 &retry_object, &retry_offset, &retry_prot, 4318 &wired, 4319 &fault_info, 4320 &real_map); 4321 pmap = real_map->pmap; 4322 4323 if (kr != KERN_SUCCESS) { 4324 vm_map_unlock_read(map); 4325 4326 if (m != VM_PAGE_NULL) { 4327 /* 4328 * retake the lock so that 4329 * we can drop the paging reference 4330 * in vm_fault_cleanup and do the 4331 * PAGE_WAKEUP_DONE in RELEASE_PAGE 4332 */ 4333 vm_object_lock(m->object); 4334 4335 RELEASE_PAGE(m); 4336 4337 vm_fault_cleanup(m->object, top_page); 4338 } else { 4339 /* 4340 * retake the lock so that 4341 * we can drop the paging reference 4342 * in vm_fault_cleanup 4343 */ 4344 vm_object_lock(object); 4345 4346 vm_fault_cleanup(object, top_page); 4347 } 4348 vm_object_deallocate(object); 4349 4350 goto done; 4351 } 4352 vm_object_unlock(retry_object); 4353 4354 if ((retry_object != object) || (retry_offset != offset)) { 4355 4356 vm_map_unlock_read(map); 4357 if (real_map != map) 4358 vm_map_unlock(real_map); 4359 4360 if (m != VM_PAGE_NULL) { 4361 /* 4362 * retake the lock so that 4363 * we can drop the paging reference 4364 * in vm_fault_cleanup and do the 4365 * PAGE_WAKEUP_DONE in RELEASE_PAGE 4366 */ 4367 vm_object_lock(m->object); 4368 4369 RELEASE_PAGE(m); 4370 4371 vm_fault_cleanup(m->object, top_page); 4372 } else { 4373 /* 4374 * retake the lock so that 4375 * we can drop the paging reference 4376 * in vm_fault_cleanup 4377 */ 4378 vm_object_lock(object); 4379 4380 vm_fault_cleanup(object, top_page); 4381 } 4382 vm_object_deallocate(object); 4383 4384 goto RetryFault; 4385 } 4386 /* 4387 * Check whether the protection has changed or the object 4388 * has been copied while we left the map unlocked. 4389 */ 4390 prot &= retry_prot; 4391 } 4392 if (m != VM_PAGE_NULL) { 4393 vm_object_lock(m->object); 4394 4395 if (m->object->copy != old_copy_object) { 4396 /* 4397 * The copy object changed while the top-level object 4398 * was unlocked, so take away write permission. 4399 */ 4400 prot &= ~VM_PROT_WRITE; 4401 } 4402 } else 4403 vm_object_lock(object); 4404 4405 /* 4406 * If we want to wire down this page, but no longer have 4407 * adequate permissions, we must start all over. 4408 */ 4409 if (wired && (fault_type != (prot | VM_PROT_WRITE))) { 4410 4411 vm_map_verify_done(map, &version); 4412 if (real_map != map) 4413 vm_map_unlock(real_map); 4414 4415 if (m != VM_PAGE_NULL) { 4416 RELEASE_PAGE(m); 4417 4418 vm_fault_cleanup(m->object, top_page); 4419 } else 4420 vm_fault_cleanup(object, top_page); 4421 4422 vm_object_deallocate(object); 4423 4424 goto RetryFault; 4425 } 4426 if (m != VM_PAGE_NULL) { 4427 /* 4428 * Put this page into the physical map. 4429 * We had to do the unlock above because pmap_enter 4430 * may cause other faults. The page may be on 4431 * the pageout queues. If the pageout daemon comes 4432 * across the page, it will remove it from the queues. 4433 */ 4434 if (caller_pmap) { 4435 kr = vm_fault_enter(m, 4436 caller_pmap, 4437 caller_pmap_addr, 4438 prot, 4439 fault_type, 4440 wired, 4441 change_wiring, 4442 fault_info.no_cache, 4443 fault_info.cs_bypass, 4444 NULL, 4445 &type_of_fault); 4446 } else { 4447 kr = vm_fault_enter(m, 4448 pmap, 4449 vaddr, 4450 prot, 4451 fault_type, 4452 wired, 4453 change_wiring, 4454 fault_info.no_cache, 4455 fault_info.cs_bypass, 4456 NULL, 4457 &type_of_fault); 4458 } 4459 if (kr != KERN_SUCCESS) { 4460 /* abort this page fault */ 4461 vm_map_verify_done(map, &version); 4462 if (real_map != map) 4463 vm_map_unlock(real_map); 4464 PAGE_WAKEUP_DONE(m); 4465 vm_fault_cleanup(m->object, top_page); 4466 vm_object_deallocate(object); 4467 goto done; 4468 } 4469 } else { 4470 4471 vm_map_entry_t entry; 4472 vm_map_offset_t laddr; 4473 vm_map_offset_t ldelta, hdelta; 4474 4475 /* 4476 * do a pmap block mapping from the physical address 4477 * in the object 4478 */ 4479 4480#ifdef ppc 4481 /* While we do not worry about execution protection in */ 4482 /* general, certian pages may have instruction execution */ 4483 /* disallowed. We will check here, and if not allowed */ 4484 /* to execute, we return with a protection failure. */ 4485 4486 if ((fault_type & VM_PROT_EXECUTE) && 4487 (!pmap_eligible_for_execute((ppnum_t)(object->vo_shadow_offset >> 12)))) { 4488 4489 vm_map_verify_done(map, &version); 4490 4491 if (real_map != map) 4492 vm_map_unlock(real_map); 4493 4494 vm_fault_cleanup(object, top_page); 4495 vm_object_deallocate(object); 4496 4497 kr = KERN_PROTECTION_FAILURE; 4498 goto done; 4499 } 4500#endif /* ppc */ 4501 4502 if (real_map != map) 4503 vm_map_unlock(real_map); 4504 4505 if (original_map != map) { 4506 vm_map_unlock_read(map); 4507 vm_map_lock_read(original_map); 4508 map = original_map; 4509 } 4510 real_map = map; 4511 4512 laddr = vaddr; 4513 hdelta = 0xFFFFF000; 4514 ldelta = 0xFFFFF000; 4515 4516 while (vm_map_lookup_entry(map, laddr, &entry)) { 4517 if (ldelta > (laddr - entry->vme_start)) 4518 ldelta = laddr - entry->vme_start; 4519 if (hdelta > (entry->vme_end - laddr)) 4520 hdelta = entry->vme_end - laddr; 4521 if (entry->is_sub_map) { 4522 4523 laddr = (laddr - entry->vme_start) 4524 + entry->offset; 4525 vm_map_lock_read(entry->object.sub_map); 4526 4527 if (map != real_map) 4528 vm_map_unlock_read(map); 4529 if (entry->use_pmap) { 4530 vm_map_unlock_read(real_map); 4531 real_map = entry->object.sub_map; 4532 } 4533 map = entry->object.sub_map; 4534 4535 } else { 4536 break; 4537 } 4538 } 4539 4540 if (vm_map_lookup_entry(map, laddr, &entry) && 4541 (entry->object.vm_object != NULL) && 4542 (entry->object.vm_object == object)) { 4543 4544 int superpage = (!object->pager_created && object->phys_contiguous)? VM_MEM_SUPERPAGE : 0; 4545 if (caller_pmap) { 4546 /* 4547 * Set up a block mapped area 4548 */ 4549 assert((uint32_t)((ldelta + hdelta) >> 12) == ((ldelta + hdelta) >> 12)); 4550 pmap_map_block(caller_pmap, 4551 (addr64_t)(caller_pmap_addr - ldelta), 4552 (ppnum_t)((((vm_map_offset_t) (entry->object.vm_object->vo_shadow_offset)) + 4553 entry->offset + (laddr - entry->vme_start) - ldelta) >> 12), 4554 (uint32_t)((ldelta + hdelta) >> 12), prot, 4555 (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0); 4556 } else { 4557 /* 4558 * Set up a block mapped area 4559 */ 4560 assert((uint32_t)((ldelta + hdelta) >> 12) == ((ldelta + hdelta) >> 12)); 4561 pmap_map_block(real_map->pmap, 4562 (addr64_t)(vaddr - ldelta), 4563 (ppnum_t)((((vm_map_offset_t)(entry->object.vm_object->vo_shadow_offset)) + 4564 entry->offset + (laddr - entry->vme_start) - ldelta) >> 12), 4565 (uint32_t)((ldelta + hdelta) >> 12), prot, 4566 (VM_WIMG_MASK & (int)object->wimg_bits) | superpage, 0); 4567 } 4568 } 4569 } 4570 4571 /* 4572 * Unlock everything, and return 4573 */ 4574 vm_map_verify_done(map, &version); 4575 if (real_map != map) 4576 vm_map_unlock(real_map); 4577 4578 if (m != VM_PAGE_NULL) { 4579 PAGE_WAKEUP_DONE(m); 4580 4581 vm_fault_cleanup(m->object, top_page); 4582 } else 4583 vm_fault_cleanup(object, top_page); 4584 4585 vm_object_deallocate(object); 4586 4587#undef RELEASE_PAGE 4588 4589 kr = KERN_SUCCESS; 4590done: 4591 thread_interrupt_level(interruptible_state); 4592 4593 /* 4594 * Only throttle on faults which cause a pagein. 4595 */ 4596 if ((type_of_fault == DBG_PAGEIND_FAULT) || (type_of_fault == DBG_PAGEINV_FAULT) || (type_of_fault == DBG_COMPRESSOR_SWAPIN_FAULT)) { 4597 throttle_lowpri_io(1); 4598 } 4599 4600 KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, 4601 (MACHDBG_CODE(DBG_MACH_VM, 2)) | DBG_FUNC_END, 4602 ((uint64_t)vaddr >> 32), 4603 vaddr, 4604 kr, 4605 type_of_fault, 4606 0); 4607 4608 return (kr); 4609} 4610 4611/* 4612 * vm_fault_wire: 4613 * 4614 * Wire down a range of virtual addresses in a map. 4615 */ 4616kern_return_t 4617vm_fault_wire( 4618 vm_map_t map, 4619 vm_map_entry_t entry, 4620 pmap_t pmap, 4621 vm_map_offset_t pmap_addr) 4622{ 4623 4624 register vm_map_offset_t va; 4625 register vm_map_offset_t end_addr = entry->vme_end; 4626 register kern_return_t rc; 4627 4628 assert(entry->in_transition); 4629 4630 if ((entry->object.vm_object != NULL) && 4631 !entry->is_sub_map && 4632 entry->object.vm_object->phys_contiguous) { 4633 return KERN_SUCCESS; 4634 } 4635 4636 /* 4637 * Inform the physical mapping system that the 4638 * range of addresses may not fault, so that 4639 * page tables and such can be locked down as well. 4640 */ 4641 4642 pmap_pageable(pmap, pmap_addr, 4643 pmap_addr + (end_addr - entry->vme_start), FALSE); 4644 4645 /* 4646 * We simulate a fault to get the page and enter it 4647 * in the physical map. 4648 */ 4649 4650 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) { 4651 if ((rc = vm_fault_wire_fast( 4652 map, va, entry, pmap, 4653 pmap_addr + (va - entry->vme_start) 4654 )) != KERN_SUCCESS) { 4655 rc = vm_fault(map, va, VM_PROT_NONE, TRUE, 4656 (pmap == kernel_pmap) ? 4657 THREAD_UNINT : THREAD_ABORTSAFE, 4658 pmap, pmap_addr + (va - entry->vme_start)); 4659 DTRACE_VM2(softlock, int, 1, (uint64_t *), NULL); 4660 } 4661 4662 if (rc != KERN_SUCCESS) { 4663 struct vm_map_entry tmp_entry = *entry; 4664 4665 /* unwire wired pages */ 4666 tmp_entry.vme_end = va; 4667 vm_fault_unwire(map, 4668 &tmp_entry, FALSE, pmap, pmap_addr); 4669 4670 return rc; 4671 } 4672 } 4673 return KERN_SUCCESS; 4674} 4675 4676/* 4677 * vm_fault_unwire: 4678 * 4679 * Unwire a range of virtual addresses in a map. 4680 */ 4681void 4682vm_fault_unwire( 4683 vm_map_t map, 4684 vm_map_entry_t entry, 4685 boolean_t deallocate, 4686 pmap_t pmap, 4687 vm_map_offset_t pmap_addr) 4688{ 4689 register vm_map_offset_t va; 4690 register vm_map_offset_t end_addr = entry->vme_end; 4691 vm_object_t object; 4692 struct vm_object_fault_info fault_info; 4693 4694 object = (entry->is_sub_map) 4695 ? VM_OBJECT_NULL : entry->object.vm_object; 4696 4697 /* 4698 * If it's marked phys_contiguous, then vm_fault_wire() didn't actually 4699 * do anything since such memory is wired by default. So we don't have 4700 * anything to undo here. 4701 */ 4702 4703 if (object != VM_OBJECT_NULL && object->phys_contiguous) 4704 return; 4705 4706 fault_info.interruptible = THREAD_UNINT; 4707 fault_info.behavior = entry->behavior; 4708 fault_info.user_tag = entry->alias; 4709 fault_info.lo_offset = entry->offset; 4710 fault_info.hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; 4711 fault_info.no_cache = entry->no_cache; 4712 fault_info.stealth = TRUE; 4713 fault_info.io_sync = FALSE; 4714 fault_info.cs_bypass = FALSE; 4715 fault_info.mark_zf_absent = FALSE; 4716 fault_info.batch_pmap_op = FALSE; 4717 4718 /* 4719 * Since the pages are wired down, we must be able to 4720 * get their mappings from the physical map system. 4721 */ 4722 4723 for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) { 4724 4725 if (object == VM_OBJECT_NULL) { 4726 if (pmap) { 4727 pmap_change_wiring(pmap, 4728 pmap_addr + (va - entry->vme_start), FALSE); 4729 } 4730 (void) vm_fault(map, va, VM_PROT_NONE, 4731 TRUE, THREAD_UNINT, pmap, pmap_addr); 4732 } else { 4733 vm_prot_t prot; 4734 vm_page_t result_page; 4735 vm_page_t top_page; 4736 vm_object_t result_object; 4737 vm_fault_return_t result; 4738 4739 if (end_addr - va > (vm_size_t) -1) { 4740 /* 32-bit overflow */ 4741 fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE); 4742 } else { 4743 fault_info.cluster_size = (vm_size_t) (end_addr - va); 4744 assert(fault_info.cluster_size == end_addr - va); 4745 } 4746 4747 do { 4748 prot = VM_PROT_NONE; 4749 4750 vm_object_lock(object); 4751 vm_object_paging_begin(object); 4752 XPR(XPR_VM_FAULT, 4753 "vm_fault_unwire -> vm_fault_page\n", 4754 0,0,0,0,0); 4755 result_page = VM_PAGE_NULL; 4756 result = vm_fault_page( 4757 object, 4758 entry->offset + (va - entry->vme_start), 4759 VM_PROT_NONE, TRUE, 4760 FALSE, /* page not looked up */ 4761 &prot, &result_page, &top_page, 4762 (int *)0, 4763 NULL, map->no_zero_fill, 4764 FALSE, &fault_info); 4765 } while (result == VM_FAULT_RETRY); 4766 4767 /* 4768 * If this was a mapping to a file on a device that has been forcibly 4769 * unmounted, then we won't get a page back from vm_fault_page(). Just 4770 * move on to the next one in case the remaining pages are mapped from 4771 * different objects. During a forced unmount, the object is terminated 4772 * so the alive flag will be false if this happens. A forced unmount will 4773 * will occur when an external disk is unplugged before the user does an 4774 * eject, so we don't want to panic in that situation. 4775 */ 4776 4777 if (result == VM_FAULT_MEMORY_ERROR && !object->alive) 4778 continue; 4779 4780 if (result == VM_FAULT_MEMORY_ERROR && 4781 object == kernel_object) { 4782 /* 4783 * This must have been allocated with 4784 * KMA_KOBJECT and KMA_VAONLY and there's 4785 * no physical page at this offset. 4786 * We're done (no page to free). 4787 */ 4788 assert(deallocate); 4789 continue; 4790 } 4791 4792 if (result != VM_FAULT_SUCCESS) 4793 panic("vm_fault_unwire: failure"); 4794 4795 result_object = result_page->object; 4796 4797 if (deallocate) { 4798 assert(result_page->phys_page != 4799 vm_page_fictitious_addr); 4800 pmap_disconnect(result_page->phys_page); 4801 VM_PAGE_FREE(result_page); 4802 } else { 4803 if ((pmap) && (result_page->phys_page != vm_page_guard_addr)) 4804 pmap_change_wiring(pmap, 4805 pmap_addr + (va - entry->vme_start), FALSE); 4806 4807 4808 if (VM_PAGE_WIRED(result_page)) { 4809 vm_page_lockspin_queues(); 4810 vm_page_unwire(result_page, TRUE); 4811 vm_page_unlock_queues(); 4812 } 4813 if(entry->zero_wired_pages) { 4814 pmap_zero_page(result_page->phys_page); 4815 entry->zero_wired_pages = FALSE; 4816 } 4817 4818 PAGE_WAKEUP_DONE(result_page); 4819 } 4820 vm_fault_cleanup(result_object, top_page); 4821 } 4822 } 4823 4824 /* 4825 * Inform the physical mapping system that the range 4826 * of addresses may fault, so that page tables and 4827 * such may be unwired themselves. 4828 */ 4829 4830 pmap_pageable(pmap, pmap_addr, 4831 pmap_addr + (end_addr - entry->vme_start), TRUE); 4832 4833} 4834 4835/* 4836 * vm_fault_wire_fast: 4837 * 4838 * Handle common case of a wire down page fault at the given address. 4839 * If successful, the page is inserted into the associated physical map. 4840 * The map entry is passed in to avoid the overhead of a map lookup. 4841 * 4842 * NOTE: the given address should be truncated to the 4843 * proper page address. 4844 * 4845 * KERN_SUCCESS is returned if the page fault is handled; otherwise, 4846 * a standard error specifying why the fault is fatal is returned. 4847 * 4848 * The map in question must be referenced, and remains so. 4849 * Caller has a read lock on the map. 4850 * 4851 * This is a stripped version of vm_fault() for wiring pages. Anything 4852 * other than the common case will return KERN_FAILURE, and the caller 4853 * is expected to call vm_fault(). 4854 */ 4855kern_return_t 4856vm_fault_wire_fast( 4857 __unused vm_map_t map, 4858 vm_map_offset_t va, 4859 vm_map_entry_t entry, 4860 pmap_t pmap, 4861 vm_map_offset_t pmap_addr) 4862{ 4863 vm_object_t object; 4864 vm_object_offset_t offset; 4865 register vm_page_t m; 4866 vm_prot_t prot; 4867 thread_t thread = current_thread(); 4868 int type_of_fault; 4869 kern_return_t kr; 4870 4871 VM_STAT_INCR(faults); 4872 4873 if (thread != THREAD_NULL && thread->task != TASK_NULL) 4874 thread->task->faults++; 4875 4876/* 4877 * Recovery actions 4878 */ 4879 4880#undef RELEASE_PAGE 4881#define RELEASE_PAGE(m) { \ 4882 PAGE_WAKEUP_DONE(m); \ 4883 vm_page_lockspin_queues(); \ 4884 vm_page_unwire(m, TRUE); \ 4885 vm_page_unlock_queues(); \ 4886} 4887 4888 4889#undef UNLOCK_THINGS 4890#define UNLOCK_THINGS { \ 4891 vm_object_paging_end(object); \ 4892 vm_object_unlock(object); \ 4893} 4894 4895#undef UNLOCK_AND_DEALLOCATE 4896#define UNLOCK_AND_DEALLOCATE { \ 4897 UNLOCK_THINGS; \ 4898 vm_object_deallocate(object); \ 4899} 4900/* 4901 * Give up and have caller do things the hard way. 4902 */ 4903 4904#define GIVE_UP { \ 4905 UNLOCK_AND_DEALLOCATE; \ 4906 return(KERN_FAILURE); \ 4907} 4908 4909 4910 /* 4911 * If this entry is not directly to a vm_object, bail out. 4912 */ 4913 if (entry->is_sub_map) 4914 return(KERN_FAILURE); 4915 4916 /* 4917 * Find the backing store object and offset into it. 4918 */ 4919 4920 object = entry->object.vm_object; 4921 offset = (va - entry->vme_start) + entry->offset; 4922 prot = entry->protection; 4923 4924 /* 4925 * Make a reference to this object to prevent its 4926 * disposal while we are messing with it. 4927 */ 4928 4929 vm_object_lock(object); 4930 vm_object_reference_locked(object); 4931 vm_object_paging_begin(object); 4932 4933 /* 4934 * INVARIANTS (through entire routine): 4935 * 4936 * 1) At all times, we must either have the object 4937 * lock or a busy page in some object to prevent 4938 * some other thread from trying to bring in 4939 * the same page. 4940 * 4941 * 2) Once we have a busy page, we must remove it from 4942 * the pageout queues, so that the pageout daemon 4943 * will not grab it away. 4944 * 4945 */ 4946 4947 /* 4948 * Look for page in top-level object. If it's not there or 4949 * there's something going on, give up. 4950 * ENCRYPTED SWAP: use the slow fault path, since we'll need to 4951 * decrypt the page before wiring it down. 4952 */ 4953 m = vm_page_lookup(object, offset); 4954 if ((m == VM_PAGE_NULL) || (m->busy) || (m->encrypted) || 4955 (m->unusual && ( m->error || m->restart || m->absent))) { 4956 4957 GIVE_UP; 4958 } 4959 ASSERT_PAGE_DECRYPTED(m); 4960 4961 if (m->fictitious && 4962 m->phys_page == vm_page_guard_addr) { 4963 /* 4964 * Guard pages are fictitious pages and are never 4965 * entered into a pmap, so let's say it's been wired... 4966 */ 4967 kr = KERN_SUCCESS; 4968 goto done; 4969 } 4970 4971 /* 4972 * Wire the page down now. All bail outs beyond this 4973 * point must unwire the page. 4974 */ 4975 4976 vm_page_lockspin_queues(); 4977 vm_page_wire(m); 4978 vm_page_unlock_queues(); 4979 4980 /* 4981 * Mark page busy for other threads. 4982 */ 4983 assert(!m->busy); 4984 m->busy = TRUE; 4985 assert(!m->absent); 4986 4987 /* 4988 * Give up if the page is being written and there's a copy object 4989 */ 4990 if ((object->copy != VM_OBJECT_NULL) && (prot & VM_PROT_WRITE)) { 4991 RELEASE_PAGE(m); 4992 GIVE_UP; 4993 } 4994 4995 /* 4996 * Put this page into the physical map. 4997 */ 4998 type_of_fault = DBG_CACHE_HIT_FAULT; 4999 kr = vm_fault_enter(m, 5000 pmap, 5001 pmap_addr, 5002 prot, 5003 prot, 5004 TRUE, 5005 FALSE, 5006 FALSE, 5007 FALSE, 5008 NULL, 5009 &type_of_fault); 5010 5011done: 5012 /* 5013 * Unlock everything, and return 5014 */ 5015 5016 PAGE_WAKEUP_DONE(m); 5017 UNLOCK_AND_DEALLOCATE; 5018 5019 return kr; 5020 5021} 5022 5023/* 5024 * Routine: vm_fault_copy_cleanup 5025 * Purpose: 5026 * Release a page used by vm_fault_copy. 5027 */ 5028 5029void 5030vm_fault_copy_cleanup( 5031 vm_page_t page, 5032 vm_page_t top_page) 5033{ 5034 vm_object_t object = page->object; 5035 5036 vm_object_lock(object); 5037 PAGE_WAKEUP_DONE(page); 5038 if (!page->active && !page->inactive && !page->throttled) { 5039 vm_page_lockspin_queues(); 5040 if (!page->active && !page->inactive && !page->throttled) 5041 vm_page_activate(page); 5042 vm_page_unlock_queues(); 5043 } 5044 vm_fault_cleanup(object, top_page); 5045} 5046 5047void 5048vm_fault_copy_dst_cleanup( 5049 vm_page_t page) 5050{ 5051 vm_object_t object; 5052 5053 if (page != VM_PAGE_NULL) { 5054 object = page->object; 5055 vm_object_lock(object); 5056 vm_page_lockspin_queues(); 5057 vm_page_unwire(page, TRUE); 5058 vm_page_unlock_queues(); 5059 vm_object_paging_end(object); 5060 vm_object_unlock(object); 5061 } 5062} 5063 5064/* 5065 * Routine: vm_fault_copy 5066 * 5067 * Purpose: 5068 * Copy pages from one virtual memory object to another -- 5069 * neither the source nor destination pages need be resident. 5070 * 5071 * Before actually copying a page, the version associated with 5072 * the destination address map wil be verified. 5073 * 5074 * In/out conditions: 5075 * The caller must hold a reference, but not a lock, to 5076 * each of the source and destination objects and to the 5077 * destination map. 5078 * 5079 * Results: 5080 * Returns KERN_SUCCESS if no errors were encountered in 5081 * reading or writing the data. Returns KERN_INTERRUPTED if 5082 * the operation was interrupted (only possible if the 5083 * "interruptible" argument is asserted). Other return values 5084 * indicate a permanent error in copying the data. 5085 * 5086 * The actual amount of data copied will be returned in the 5087 * "copy_size" argument. In the event that the destination map 5088 * verification failed, this amount may be less than the amount 5089 * requested. 5090 */ 5091kern_return_t 5092vm_fault_copy( 5093 vm_object_t src_object, 5094 vm_object_offset_t src_offset, 5095 vm_map_size_t *copy_size, /* INOUT */ 5096 vm_object_t dst_object, 5097 vm_object_offset_t dst_offset, 5098 vm_map_t dst_map, 5099 vm_map_version_t *dst_version, 5100 int interruptible) 5101{ 5102 vm_page_t result_page; 5103 5104 vm_page_t src_page; 5105 vm_page_t src_top_page; 5106 vm_prot_t src_prot; 5107 5108 vm_page_t dst_page; 5109 vm_page_t dst_top_page; 5110 vm_prot_t dst_prot; 5111 5112 vm_map_size_t amount_left; 5113 vm_object_t old_copy_object; 5114 kern_return_t error = 0; 5115 vm_fault_return_t result; 5116 5117 vm_map_size_t part_size; 5118 struct vm_object_fault_info fault_info_src; 5119 struct vm_object_fault_info fault_info_dst; 5120 5121 /* 5122 * In order not to confuse the clustered pageins, align 5123 * the different offsets on a page boundary. 5124 */ 5125 5126#define RETURN(x) \ 5127 MACRO_BEGIN \ 5128 *copy_size -= amount_left; \ 5129 MACRO_RETURN(x); \ 5130 MACRO_END 5131 5132 amount_left = *copy_size; 5133 5134 fault_info_src.interruptible = interruptible; 5135 fault_info_src.behavior = VM_BEHAVIOR_SEQUENTIAL; 5136 fault_info_src.user_tag = 0; 5137 fault_info_src.lo_offset = vm_object_trunc_page(src_offset); 5138 fault_info_src.hi_offset = fault_info_src.lo_offset + amount_left; 5139 fault_info_src.no_cache = FALSE; 5140 fault_info_src.stealth = TRUE; 5141 fault_info_src.io_sync = FALSE; 5142 fault_info_src.cs_bypass = FALSE; 5143 fault_info_src.mark_zf_absent = FALSE; 5144 fault_info_src.batch_pmap_op = FALSE; 5145 5146 fault_info_dst.interruptible = interruptible; 5147 fault_info_dst.behavior = VM_BEHAVIOR_SEQUENTIAL; 5148 fault_info_dst.user_tag = 0; 5149 fault_info_dst.lo_offset = vm_object_trunc_page(dst_offset); 5150 fault_info_dst.hi_offset = fault_info_dst.lo_offset + amount_left; 5151 fault_info_dst.no_cache = FALSE; 5152 fault_info_dst.stealth = TRUE; 5153 fault_info_dst.io_sync = FALSE; 5154 fault_info_dst.cs_bypass = FALSE; 5155 fault_info_dst.mark_zf_absent = FALSE; 5156 fault_info_dst.batch_pmap_op = FALSE; 5157 5158 do { /* while (amount_left > 0) */ 5159 /* 5160 * There may be a deadlock if both source and destination 5161 * pages are the same. To avoid this deadlock, the copy must 5162 * start by getting the destination page in order to apply 5163 * COW semantics if any. 5164 */ 5165 5166 RetryDestinationFault: ; 5167 5168 dst_prot = VM_PROT_WRITE|VM_PROT_READ; 5169 5170 vm_object_lock(dst_object); 5171 vm_object_paging_begin(dst_object); 5172 5173 if (amount_left > (vm_size_t) -1) { 5174 /* 32-bit overflow */ 5175 fault_info_dst.cluster_size = (vm_size_t) (0 - PAGE_SIZE); 5176 } else { 5177 fault_info_dst.cluster_size = (vm_size_t) amount_left; 5178 assert(fault_info_dst.cluster_size == amount_left); 5179 } 5180 5181 XPR(XPR_VM_FAULT,"vm_fault_copy -> vm_fault_page\n",0,0,0,0,0); 5182 dst_page = VM_PAGE_NULL; 5183 result = vm_fault_page(dst_object, 5184 vm_object_trunc_page(dst_offset), 5185 VM_PROT_WRITE|VM_PROT_READ, 5186 FALSE, 5187 FALSE, /* page not looked up */ 5188 &dst_prot, &dst_page, &dst_top_page, 5189 (int *)0, 5190 &error, 5191 dst_map->no_zero_fill, 5192 FALSE, &fault_info_dst); 5193 switch (result) { 5194 case VM_FAULT_SUCCESS: 5195 break; 5196 case VM_FAULT_RETRY: 5197 goto RetryDestinationFault; 5198 case VM_FAULT_MEMORY_SHORTAGE: 5199 if (vm_page_wait(interruptible)) 5200 goto RetryDestinationFault; 5201 /* fall thru */ 5202 case VM_FAULT_INTERRUPTED: 5203 RETURN(MACH_SEND_INTERRUPTED); 5204 case VM_FAULT_SUCCESS_NO_VM_PAGE: 5205 /* success but no VM page: fail the copy */ 5206 vm_object_paging_end(dst_object); 5207 vm_object_unlock(dst_object); 5208 /*FALLTHROUGH*/ 5209 case VM_FAULT_MEMORY_ERROR: 5210 if (error) 5211 return (error); 5212 else 5213 return(KERN_MEMORY_ERROR); 5214 default: 5215 panic("vm_fault_copy: unexpected error 0x%x from " 5216 "vm_fault_page()\n", result); 5217 } 5218 assert ((dst_prot & VM_PROT_WRITE) != VM_PROT_NONE); 5219 5220 old_copy_object = dst_page->object->copy; 5221 5222 /* 5223 * There exists the possiblity that the source and 5224 * destination page are the same. But we can't 5225 * easily determine that now. If they are the 5226 * same, the call to vm_fault_page() for the 5227 * destination page will deadlock. To prevent this we 5228 * wire the page so we can drop busy without having 5229 * the page daemon steal the page. We clean up the 5230 * top page but keep the paging reference on the object 5231 * holding the dest page so it doesn't go away. 5232 */ 5233 5234 vm_page_lockspin_queues(); 5235 vm_page_wire(dst_page); 5236 vm_page_unlock_queues(); 5237 PAGE_WAKEUP_DONE(dst_page); 5238 vm_object_unlock(dst_page->object); 5239 5240 if (dst_top_page != VM_PAGE_NULL) { 5241 vm_object_lock(dst_object); 5242 VM_PAGE_FREE(dst_top_page); 5243 vm_object_paging_end(dst_object); 5244 vm_object_unlock(dst_object); 5245 } 5246 5247 RetrySourceFault: ; 5248 5249 if (src_object == VM_OBJECT_NULL) { 5250 /* 5251 * No source object. We will just 5252 * zero-fill the page in dst_object. 5253 */ 5254 src_page = VM_PAGE_NULL; 5255 result_page = VM_PAGE_NULL; 5256 } else { 5257 vm_object_lock(src_object); 5258 src_page = vm_page_lookup(src_object, 5259 vm_object_trunc_page(src_offset)); 5260 if (src_page == dst_page) { 5261 src_prot = dst_prot; 5262 result_page = VM_PAGE_NULL; 5263 } else { 5264 src_prot = VM_PROT_READ; 5265 vm_object_paging_begin(src_object); 5266 5267 if (amount_left > (vm_size_t) -1) { 5268 /* 32-bit overflow */ 5269 fault_info_src.cluster_size = (vm_size_t) (0 - PAGE_SIZE); 5270 } else { 5271 fault_info_src.cluster_size = (vm_size_t) amount_left; 5272 assert(fault_info_src.cluster_size == amount_left); 5273 } 5274 5275 XPR(XPR_VM_FAULT, 5276 "vm_fault_copy(2) -> vm_fault_page\n", 5277 0,0,0,0,0); 5278 result_page = VM_PAGE_NULL; 5279 result = vm_fault_page( 5280 src_object, 5281 vm_object_trunc_page(src_offset), 5282 VM_PROT_READ, FALSE, 5283 FALSE, /* page not looked up */ 5284 &src_prot, 5285 &result_page, &src_top_page, 5286 (int *)0, &error, FALSE, 5287 FALSE, &fault_info_src); 5288 5289 switch (result) { 5290 case VM_FAULT_SUCCESS: 5291 break; 5292 case VM_FAULT_RETRY: 5293 goto RetrySourceFault; 5294 case VM_FAULT_MEMORY_SHORTAGE: 5295 if (vm_page_wait(interruptible)) 5296 goto RetrySourceFault; 5297 /* fall thru */ 5298 case VM_FAULT_INTERRUPTED: 5299 vm_fault_copy_dst_cleanup(dst_page); 5300 RETURN(MACH_SEND_INTERRUPTED); 5301 case VM_FAULT_SUCCESS_NO_VM_PAGE: 5302 /* success but no VM page: fail */ 5303 vm_object_paging_end(src_object); 5304 vm_object_unlock(src_object); 5305 /*FALLTHROUGH*/ 5306 case VM_FAULT_MEMORY_ERROR: 5307 vm_fault_copy_dst_cleanup(dst_page); 5308 if (error) 5309 return (error); 5310 else 5311 return(KERN_MEMORY_ERROR); 5312 default: 5313 panic("vm_fault_copy(2): unexpected " 5314 "error 0x%x from " 5315 "vm_fault_page()\n", result); 5316 } 5317 5318 5319 assert((src_top_page == VM_PAGE_NULL) == 5320 (result_page->object == src_object)); 5321 } 5322 assert ((src_prot & VM_PROT_READ) != VM_PROT_NONE); 5323 vm_object_unlock(result_page->object); 5324 } 5325 5326 if (!vm_map_verify(dst_map, dst_version)) { 5327 if (result_page != VM_PAGE_NULL && src_page != dst_page) 5328 vm_fault_copy_cleanup(result_page, src_top_page); 5329 vm_fault_copy_dst_cleanup(dst_page); 5330 break; 5331 } 5332 5333 vm_object_lock(dst_page->object); 5334 5335 if (dst_page->object->copy != old_copy_object) { 5336 vm_object_unlock(dst_page->object); 5337 vm_map_verify_done(dst_map, dst_version); 5338 if (result_page != VM_PAGE_NULL && src_page != dst_page) 5339 vm_fault_copy_cleanup(result_page, src_top_page); 5340 vm_fault_copy_dst_cleanup(dst_page); 5341 break; 5342 } 5343 vm_object_unlock(dst_page->object); 5344 5345 /* 5346 * Copy the page, and note that it is dirty 5347 * immediately. 5348 */ 5349 5350 if (!page_aligned(src_offset) || 5351 !page_aligned(dst_offset) || 5352 !page_aligned(amount_left)) { 5353 5354 vm_object_offset_t src_po, 5355 dst_po; 5356 5357 src_po = src_offset - vm_object_trunc_page(src_offset); 5358 dst_po = dst_offset - vm_object_trunc_page(dst_offset); 5359 5360 if (dst_po > src_po) { 5361 part_size = PAGE_SIZE - dst_po; 5362 } else { 5363 part_size = PAGE_SIZE - src_po; 5364 } 5365 if (part_size > (amount_left)){ 5366 part_size = amount_left; 5367 } 5368 5369 if (result_page == VM_PAGE_NULL) { 5370 assert((vm_offset_t) dst_po == dst_po); 5371 assert((vm_size_t) part_size == part_size); 5372 vm_page_part_zero_fill(dst_page, 5373 (vm_offset_t) dst_po, 5374 (vm_size_t) part_size); 5375 } else { 5376 assert((vm_offset_t) src_po == src_po); 5377 assert((vm_offset_t) dst_po == dst_po); 5378 assert((vm_size_t) part_size == part_size); 5379 vm_page_part_copy(result_page, 5380 (vm_offset_t) src_po, 5381 dst_page, 5382 (vm_offset_t) dst_po, 5383 (vm_size_t)part_size); 5384 if(!dst_page->dirty){ 5385 vm_object_lock(dst_object); 5386 SET_PAGE_DIRTY(dst_page, TRUE); 5387 vm_object_unlock(dst_page->object); 5388 } 5389 5390 } 5391 } else { 5392 part_size = PAGE_SIZE; 5393 5394 if (result_page == VM_PAGE_NULL) 5395 vm_page_zero_fill(dst_page); 5396 else{ 5397 vm_object_lock(result_page->object); 5398 vm_page_copy(result_page, dst_page); 5399 vm_object_unlock(result_page->object); 5400 5401 if(!dst_page->dirty){ 5402 vm_object_lock(dst_object); 5403 SET_PAGE_DIRTY(dst_page, TRUE); 5404 vm_object_unlock(dst_page->object); 5405 } 5406 } 5407 5408 } 5409 5410 /* 5411 * Unlock everything, and return 5412 */ 5413 5414 vm_map_verify_done(dst_map, dst_version); 5415 5416 if (result_page != VM_PAGE_NULL && src_page != dst_page) 5417 vm_fault_copy_cleanup(result_page, src_top_page); 5418 vm_fault_copy_dst_cleanup(dst_page); 5419 5420 amount_left -= part_size; 5421 src_offset += part_size; 5422 dst_offset += part_size; 5423 } while (amount_left > 0); 5424 5425 RETURN(KERN_SUCCESS); 5426#undef RETURN 5427 5428 /*NOTREACHED*/ 5429} 5430 5431#if VM_FAULT_CLASSIFY 5432/* 5433 * Temporary statistics gathering support. 5434 */ 5435 5436/* 5437 * Statistics arrays: 5438 */ 5439#define VM_FAULT_TYPES_MAX 5 5440#define VM_FAULT_LEVEL_MAX 8 5441 5442int vm_fault_stats[VM_FAULT_TYPES_MAX][VM_FAULT_LEVEL_MAX]; 5443 5444#define VM_FAULT_TYPE_ZERO_FILL 0 5445#define VM_FAULT_TYPE_MAP_IN 1 5446#define VM_FAULT_TYPE_PAGER 2 5447#define VM_FAULT_TYPE_COPY 3 5448#define VM_FAULT_TYPE_OTHER 4 5449 5450 5451void 5452vm_fault_classify(vm_object_t object, 5453 vm_object_offset_t offset, 5454 vm_prot_t fault_type) 5455{ 5456 int type, level = 0; 5457 vm_page_t m; 5458 5459 while (TRUE) { 5460 m = vm_page_lookup(object, offset); 5461 if (m != VM_PAGE_NULL) { 5462 if (m->busy || m->error || m->restart || m->absent) { 5463 type = VM_FAULT_TYPE_OTHER; 5464 break; 5465 } 5466 if (((fault_type & VM_PROT_WRITE) == 0) || 5467 ((level == 0) && object->copy == VM_OBJECT_NULL)) { 5468 type = VM_FAULT_TYPE_MAP_IN; 5469 break; 5470 } 5471 type = VM_FAULT_TYPE_COPY; 5472 break; 5473 } 5474 else { 5475 if (object->pager_created) { 5476 type = VM_FAULT_TYPE_PAGER; 5477 break; 5478 } 5479 if (object->shadow == VM_OBJECT_NULL) { 5480 type = VM_FAULT_TYPE_ZERO_FILL; 5481 break; 5482 } 5483 5484 offset += object->vo_shadow_offset; 5485 object = object->shadow; 5486 level++; 5487 continue; 5488 } 5489 } 5490 5491 if (level > VM_FAULT_LEVEL_MAX) 5492 level = VM_FAULT_LEVEL_MAX; 5493 5494 vm_fault_stats[type][level] += 1; 5495 5496 return; 5497} 5498 5499/* cleanup routine to call from debugger */ 5500 5501void 5502vm_fault_classify_init(void) 5503{ 5504 int type, level; 5505 5506 for (type = 0; type < VM_FAULT_TYPES_MAX; type++) { 5507 for (level = 0; level < VM_FAULT_LEVEL_MAX; level++) { 5508 vm_fault_stats[type][level] = 0; 5509 } 5510 } 5511 5512 return; 5513} 5514#endif /* VM_FAULT_CLASSIFY */ 5515 5516 5517void 5518vm_page_validate_cs_mapped( 5519 vm_page_t page, 5520 const void *kaddr) 5521{ 5522 vm_object_t object; 5523 vm_object_offset_t offset; 5524 kern_return_t kr; 5525 memory_object_t pager; 5526 void *blobs; 5527 boolean_t validated, tainted; 5528 5529 assert(page->busy); 5530 vm_object_lock_assert_exclusive(page->object); 5531 5532 if (!cs_validation) { 5533 return; 5534 } 5535 5536 if (page->wpmapped && !page->cs_tainted) { 5537 /* 5538 * This page was mapped for "write" access sometime in the 5539 * past and could still be modifiable in the future. 5540 * Consider it tainted. 5541 * [ If the page was already found to be "tainted", no 5542 * need to re-validate. ] 5543 */ 5544 page->cs_validated = TRUE; 5545 page->cs_tainted = TRUE; 5546 if (cs_debug) { 5547 printf("CODESIGNING: vm_page_validate_cs: " 5548 "page %p obj %p off 0x%llx " 5549 "was modified\n", 5550 page, page->object, page->offset); 5551 } 5552 vm_cs_validated_dirtied++; 5553 } 5554 5555 if (page->cs_validated) { 5556 return; 5557 } 5558 5559 vm_cs_validates++; 5560 5561 object = page->object; 5562 assert(object->code_signed); 5563 offset = page->offset; 5564 5565 if (!object->alive || object->terminating || object->pager == NULL) { 5566 /* 5567 * The object is terminating and we don't have its pager 5568 * so we can't validate the data... 5569 */ 5570 return; 5571 } 5572 /* 5573 * Since we get here to validate a page that was brought in by 5574 * the pager, we know that this pager is all setup and ready 5575 * by now. 5576 */ 5577 assert(!object->internal); 5578 assert(object->pager != NULL); 5579 assert(object->pager_ready); 5580 5581 pager = object->pager; 5582 assert(object->paging_in_progress); 5583 kr = vnode_pager_get_object_cs_blobs(pager, &blobs); 5584 if (kr != KERN_SUCCESS) { 5585 blobs = NULL; 5586 } 5587 5588 /* verify the SHA1 hash for this page */ 5589 validated = cs_validate_page(blobs, 5590 pager, 5591 offset + object->paging_offset, 5592 (const void *)kaddr, 5593 &tainted); 5594 5595 page->cs_validated = validated; 5596 if (validated) { 5597 page->cs_tainted = tainted; 5598 } 5599} 5600 5601extern int panic_on_cs_killed; 5602void 5603vm_page_validate_cs( 5604 vm_page_t page) 5605{ 5606 vm_object_t object; 5607 vm_object_offset_t offset; 5608 vm_map_offset_t koffset; 5609 vm_map_size_t ksize; 5610 vm_offset_t kaddr; 5611 kern_return_t kr; 5612 boolean_t busy_page; 5613 boolean_t need_unmap; 5614 5615 vm_object_lock_assert_held(page->object); 5616 5617 if (!cs_validation) { 5618 return; 5619 } 5620 5621 if (page->wpmapped && !page->cs_tainted) { 5622 vm_object_lock_assert_exclusive(page->object); 5623 5624 /* 5625 * This page was mapped for "write" access sometime in the 5626 * past and could still be modifiable in the future. 5627 * Consider it tainted. 5628 * [ If the page was already found to be "tainted", no 5629 * need to re-validate. ] 5630 */ 5631 page->cs_validated = TRUE; 5632 page->cs_tainted = TRUE; 5633 if (cs_debug) { 5634 printf("CODESIGNING: vm_page_validate_cs: " 5635 "page %p obj %p off 0x%llx " 5636 "was modified\n", 5637 page, page->object, page->offset); 5638 } 5639 vm_cs_validated_dirtied++; 5640 } 5641 5642 if (page->cs_validated) { 5643 return; 5644 } 5645 5646 if (panic_on_cs_killed && 5647 page->slid) { 5648 panic("vm_page_validate_cs(%p): page is slid\n", page); 5649 } 5650 assert(!page->slid); 5651 5652#if CHECK_CS_VALIDATION_BITMAP 5653 if ( vnode_pager_cs_check_validation_bitmap( page->object->pager, trunc_page(page->offset + page->object->paging_offset), CS_BITMAP_CHECK ) == KERN_SUCCESS) { 5654 page->cs_validated = TRUE; 5655 page->cs_tainted = FALSE; 5656 vm_cs_bitmap_validated++; 5657 return; 5658 } 5659#endif 5660 vm_object_lock_assert_exclusive(page->object); 5661 5662 object = page->object; 5663 assert(object->code_signed); 5664 offset = page->offset; 5665 5666 busy_page = page->busy; 5667 if (!busy_page) { 5668 /* keep page busy while we map (and unlock) the VM object */ 5669 page->busy = TRUE; 5670 } 5671 5672 /* 5673 * Take a paging reference on the VM object 5674 * to protect it from collapse or bypass, 5675 * and keep it from disappearing too. 5676 */ 5677 vm_object_paging_begin(object); 5678 5679 /* map the page in the kernel address space */ 5680 ksize = PAGE_SIZE_64; 5681 koffset = 0; 5682 need_unmap = FALSE; 5683 kr = vm_paging_map_object(page, 5684 object, 5685 offset, 5686 VM_PROT_READ, 5687 FALSE, /* can't unlock object ! */ 5688 &ksize, 5689 &koffset, 5690 &need_unmap); 5691 if (kr != KERN_SUCCESS) { 5692 panic("vm_page_validate_cs: could not map page: 0x%x\n", kr); 5693 } 5694 kaddr = CAST_DOWN(vm_offset_t, koffset); 5695 5696 /* validate the mapped page */ 5697 vm_page_validate_cs_mapped(page, (const void *) kaddr); 5698 5699#if CHECK_CS_VALIDATION_BITMAP 5700 if ( page->cs_validated == TRUE && page->cs_tainted == FALSE ) { 5701 vnode_pager_cs_check_validation_bitmap( object->pager, trunc_page( offset + object->paging_offset), CS_BITMAP_SET ); 5702 } 5703#endif 5704 assert(page->busy); 5705 assert(object == page->object); 5706 vm_object_lock_assert_exclusive(object); 5707 5708 if (!busy_page) { 5709 PAGE_WAKEUP_DONE(page); 5710 } 5711 if (need_unmap) { 5712 /* unmap the map from the kernel address space */ 5713 vm_paging_unmap_object(object, koffset, koffset + ksize); 5714 koffset = 0; 5715 ksize = 0; 5716 kaddr = 0; 5717 } 5718 vm_object_paging_end(object); 5719} 5720