1/* 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_pageout.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * Date: 1985 62 * 63 * The proverbial page-out daemon. 64 */ 65 66#include <stdint.h> 67 68#include <debug.h> 69#include <mach_pagemap.h> 70#include <mach_cluster_stats.h> 71#include <advisory_pageout.h> 72 73#include <mach/mach_types.h> 74#include <mach/memory_object.h> 75#include <mach/memory_object_default.h> 76#include <mach/memory_object_control_server.h> 77#include <mach/mach_host_server.h> 78#include <mach/upl.h> 79#include <mach/vm_map.h> 80#include <mach/vm_param.h> 81#include <mach/vm_statistics.h> 82#include <mach/sdt.h> 83 84#include <kern/kern_types.h> 85#include <kern/counters.h> 86#include <kern/host_statistics.h> 87#include <kern/machine.h> 88#include <kern/misc_protos.h> 89#include <kern/sched.h> 90#include <kern/thread.h> 91#include <kern/xpr.h> 92#include <kern/kalloc.h> 93 94#include <machine/vm_tuning.h> 95#include <machine/commpage.h> 96 97#include <vm/pmap.h> 98#include <vm/vm_fault.h> 99#include <vm/vm_map.h> 100#include <vm/vm_object.h> 101#include <vm/vm_page.h> 102#include <vm/vm_pageout.h> 103#include <vm/vm_protos.h> /* must be last */ 104#include <vm/memory_object.h> 105#include <vm/vm_purgeable_internal.h> 106#include <vm/vm_shared_region.h> 107/* 108 * ENCRYPTED SWAP: 109 */ 110#include <libkern/crypto/aes.h> 111extern u_int32_t random(void); /* from <libkern/libkern.h> */ 112 113extern int cs_debug; 114 115#if UPL_DEBUG 116#include <libkern/OSDebug.h> 117#endif 118 119#if VM_PRESSURE_EVENTS 120extern void consider_vm_pressure_events(void); 121#endif 122 123#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */ 124#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100 125#endif 126 127#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */ 128#ifdef CONFIG_EMBEDDED 129#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024 130#else 131#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 132#endif 133#endif 134 135#ifndef VM_PAGEOUT_DEADLOCK_RELIEF 136#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */ 137#endif 138 139#ifndef VM_PAGEOUT_INACTIVE_RELIEF 140#define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */ 141#endif 142 143#ifndef VM_PAGE_LAUNDRY_MAX 144#define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */ 145#endif /* VM_PAGEOUT_LAUNDRY_MAX */ 146 147#ifndef VM_PAGEOUT_BURST_WAIT 148#define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds */ 149#endif /* VM_PAGEOUT_BURST_WAIT */ 150 151#ifndef VM_PAGEOUT_EMPTY_WAIT 152#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */ 153#endif /* VM_PAGEOUT_EMPTY_WAIT */ 154 155#ifndef VM_PAGEOUT_DEADLOCK_WAIT 156#define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */ 157#endif /* VM_PAGEOUT_DEADLOCK_WAIT */ 158 159#ifndef VM_PAGEOUT_IDLE_WAIT 160#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */ 161#endif /* VM_PAGEOUT_IDLE_WAIT */ 162 163#ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 164#define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */ 165#endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */ 166 167#ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 168#define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */ 169#endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */ 170 171unsigned int vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS; 172unsigned int vm_page_speculative_percentage = 5; 173 174#ifndef VM_PAGE_SPECULATIVE_TARGET 175#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage)) 176#endif /* VM_PAGE_SPECULATIVE_TARGET */ 177 178 179#ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT 180#define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200) 181#endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */ 182 183 184/* 185 * To obtain a reasonable LRU approximation, the inactive queue 186 * needs to be large enough to give pages on it a chance to be 187 * referenced a second time. This macro defines the fraction 188 * of active+inactive pages that should be inactive. 189 * The pageout daemon uses it to update vm_page_inactive_target. 190 * 191 * If vm_page_free_count falls below vm_page_free_target and 192 * vm_page_inactive_count is below vm_page_inactive_target, 193 * then the pageout daemon starts running. 194 */ 195 196#ifndef VM_PAGE_INACTIVE_TARGET 197#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2) 198#endif /* VM_PAGE_INACTIVE_TARGET */ 199 200/* 201 * Once the pageout daemon starts running, it keeps going 202 * until vm_page_free_count meets or exceeds vm_page_free_target. 203 */ 204 205#ifndef VM_PAGE_FREE_TARGET 206#ifdef CONFIG_EMBEDDED 207#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100) 208#else 209#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80) 210#endif 211#endif /* VM_PAGE_FREE_TARGET */ 212 213/* 214 * The pageout daemon always starts running once vm_page_free_count 215 * falls below vm_page_free_min. 216 */ 217 218#ifndef VM_PAGE_FREE_MIN 219#ifdef CONFIG_EMBEDDED 220#define VM_PAGE_FREE_MIN(free) (10 + (free) / 200) 221#else 222#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100) 223#endif 224#endif /* VM_PAGE_FREE_MIN */ 225 226#define VM_PAGE_FREE_RESERVED_LIMIT 100 227#define VM_PAGE_FREE_MIN_LIMIT 1500 228#define VM_PAGE_FREE_TARGET_LIMIT 2000 229 230 231/* 232 * When vm_page_free_count falls below vm_page_free_reserved, 233 * only vm-privileged threads can allocate pages. vm-privilege 234 * allows the pageout daemon and default pager (and any other 235 * associated threads needed for default pageout) to continue 236 * operation by dipping into the reserved pool of pages. 237 */ 238 239#ifndef VM_PAGE_FREE_RESERVED 240#define VM_PAGE_FREE_RESERVED(n) \ 241 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n)) 242#endif /* VM_PAGE_FREE_RESERVED */ 243 244/* 245 * When we dequeue pages from the inactive list, they are 246 * reactivated (ie, put back on the active queue) if referenced. 247 * However, it is possible to starve the free list if other 248 * processors are referencing pages faster than we can turn off 249 * the referenced bit. So we limit the number of reactivations 250 * we will make per call of vm_pageout_scan(). 251 */ 252#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000 253#ifndef VM_PAGE_REACTIVATE_LIMIT 254#ifdef CONFIG_EMBEDDED 255#define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2) 256#else 257#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX)) 258#endif 259#endif /* VM_PAGE_REACTIVATE_LIMIT */ 260#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100 261 262 263extern boolean_t hibernate_cleaning_in_progress; 264 265/* 266 * Exported variable used to broadcast the activation of the pageout scan 267 * Working Set uses this to throttle its use of pmap removes. In this 268 * way, code which runs within memory in an uncontested context does 269 * not keep encountering soft faults. 270 */ 271 272unsigned int vm_pageout_scan_event_counter = 0; 273 274/* 275 * Forward declarations for internal routines. 276 */ 277 278static void vm_pressure_thread(void); 279static void vm_pageout_garbage_collect(int); 280static void vm_pageout_iothread_continue(struct vm_pageout_queue *); 281static void vm_pageout_iothread_external(void); 282static void vm_pageout_iothread_internal(void); 283static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue *, struct vm_pageout_queue *, boolean_t); 284 285extern void vm_pageout_continue(void); 286extern void vm_pageout_scan(void); 287 288static thread_t vm_pageout_external_iothread = THREAD_NULL; 289static thread_t vm_pageout_internal_iothread = THREAD_NULL; 290 291unsigned int vm_pageout_reserved_internal = 0; 292unsigned int vm_pageout_reserved_really = 0; 293 294unsigned int vm_pageout_idle_wait = 0; /* milliseconds */ 295unsigned int vm_pageout_empty_wait = 0; /* milliseconds */ 296unsigned int vm_pageout_burst_wait = 0; /* milliseconds */ 297unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */ 298unsigned int vm_pageout_deadlock_relief = 0; 299unsigned int vm_pageout_inactive_relief = 0; 300unsigned int vm_pageout_burst_active_throttle = 0; 301unsigned int vm_pageout_burst_inactive_throttle = 0; 302 303int vm_upl_wait_for_pages = 0; 304 305 306/* 307 * These variables record the pageout daemon's actions: 308 * how many pages it looks at and what happens to those pages. 309 * No locking needed because only one thread modifies the variables. 310 */ 311 312unsigned int vm_pageout_active = 0; /* debugging */ 313unsigned int vm_pageout_active_busy = 0; /* debugging */ 314unsigned int vm_pageout_inactive = 0; /* debugging */ 315unsigned int vm_pageout_inactive_throttled = 0; /* debugging */ 316unsigned int vm_pageout_inactive_forced = 0; /* debugging */ 317unsigned int vm_pageout_inactive_nolock = 0; /* debugging */ 318unsigned int vm_pageout_inactive_avoid = 0; /* debugging */ 319unsigned int vm_pageout_inactive_busy = 0; /* debugging */ 320unsigned int vm_pageout_inactive_error = 0; /* debugging */ 321unsigned int vm_pageout_inactive_absent = 0; /* debugging */ 322unsigned int vm_pageout_inactive_notalive = 0; /* debugging */ 323unsigned int vm_pageout_inactive_used = 0; /* debugging */ 324unsigned int vm_pageout_cache_evicted = 0; /* debugging */ 325unsigned int vm_pageout_inactive_clean = 0; /* debugging */ 326unsigned int vm_pageout_speculative_clean = 0; /* debugging */ 327 328unsigned int vm_pageout_freed_from_cleaned = 0; 329unsigned int vm_pageout_freed_from_speculative = 0; 330unsigned int vm_pageout_freed_from_inactive_clean = 0; 331 332unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean = 0; 333unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0; 334 335unsigned int vm_pageout_cleaned_reclaimed = 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */ 336unsigned int vm_pageout_cleaned_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */ 337unsigned int vm_pageout_cleaned_reference_reactivated = 0; 338unsigned int vm_pageout_cleaned_volatile_reactivated = 0; 339unsigned int vm_pageout_cleaned_fault_reactivated = 0; 340unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */ 341unsigned int vm_pageout_cleaned_busy = 0; 342unsigned int vm_pageout_cleaned_nolock = 0; 343 344unsigned int vm_pageout_inactive_dirty_internal = 0; /* debugging */ 345unsigned int vm_pageout_inactive_dirty_external = 0; /* debugging */ 346unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */ 347unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */ 348unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */ 349unsigned int vm_pageout_purged_objects = 0; /* debugging */ 350unsigned int vm_stat_discard = 0; /* debugging */ 351unsigned int vm_stat_discard_sent = 0; /* debugging */ 352unsigned int vm_stat_discard_failure = 0; /* debugging */ 353unsigned int vm_stat_discard_throttle = 0; /* debugging */ 354unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */ 355unsigned int vm_pageout_catch_ups = 0; /* debugging */ 356unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */ 357 358unsigned int vm_pageout_scan_reclaimed_throttled = 0; 359unsigned int vm_pageout_scan_active_throttled = 0; 360unsigned int vm_pageout_scan_inactive_throttled_internal = 0; 361unsigned int vm_pageout_scan_inactive_throttled_external = 0; 362unsigned int vm_pageout_scan_throttle = 0; /* debugging */ 363unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */ 364unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */ 365unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */ 366unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */ 367unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */ 368unsigned int vm_pageout_inactive_external_forced_reactivate_count = 0; /* debugging */ 369unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */ 370unsigned int vm_page_speculative_count_drifts = 0; 371unsigned int vm_page_speculative_count_drift_max = 0; 372 373 374unsigned int vm_precleaning_aborted = 0; 375 376static boolean_t vm_pageout_need_to_refill_clean_queue = FALSE; 377static boolean_t vm_pageout_precleaning_delayed = FALSE; 378 379/* 380 * Backing store throttle when BS is exhausted 381 */ 382unsigned int vm_backing_store_low = 0; 383 384unsigned int vm_pageout_out_of_line = 0; 385unsigned int vm_pageout_in_place = 0; 386 387unsigned int vm_page_steal_pageout_page = 0; 388 389/* 390 * ENCRYPTED SWAP: 391 * counters and statistics... 392 */ 393unsigned long vm_page_decrypt_counter = 0; 394unsigned long vm_page_decrypt_for_upl_counter = 0; 395unsigned long vm_page_encrypt_counter = 0; 396unsigned long vm_page_encrypt_abort_counter = 0; 397unsigned long vm_page_encrypt_already_encrypted_counter = 0; 398boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */ 399 400struct vm_pageout_queue vm_pageout_queue_internal; 401struct vm_pageout_queue vm_pageout_queue_external; 402 403unsigned int vm_page_speculative_target = 0; 404 405vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL; 406 407boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL; 408 409#if DEVELOPMENT || DEBUG 410unsigned long vm_cs_validated_resets = 0; 411#endif 412 413int vm_debug_events = 0; 414 415#if CONFIG_MEMORYSTATUS 416extern int memorystatus_wakeup; 417#endif 418#if CONFIG_JETSAM 419extern int memorystatus_kill_top_proc_from_VM(void); 420#endif 421 422/* 423 * Routine: vm_backing_store_disable 424 * Purpose: 425 * Suspend non-privileged threads wishing to extend 426 * backing store when we are low on backing store 427 * (Synchronized by caller) 428 */ 429void 430vm_backing_store_disable( 431 boolean_t disable) 432{ 433 if(disable) { 434 vm_backing_store_low = 1; 435 } else { 436 if(vm_backing_store_low) { 437 vm_backing_store_low = 0; 438 thread_wakeup((event_t) &vm_backing_store_low); 439 } 440 } 441} 442 443 444#if MACH_CLUSTER_STATS 445unsigned long vm_pageout_cluster_dirtied = 0; 446unsigned long vm_pageout_cluster_cleaned = 0; 447unsigned long vm_pageout_cluster_collisions = 0; 448unsigned long vm_pageout_cluster_clusters = 0; 449unsigned long vm_pageout_cluster_conversions = 0; 450unsigned long vm_pageout_target_collisions = 0; 451unsigned long vm_pageout_target_page_dirtied = 0; 452unsigned long vm_pageout_target_page_freed = 0; 453#define CLUSTER_STAT(clause) clause 454#else /* MACH_CLUSTER_STATS */ 455#define CLUSTER_STAT(clause) 456#endif /* MACH_CLUSTER_STATS */ 457 458/* 459 * Routine: vm_pageout_object_terminate 460 * Purpose: 461 * Destroy the pageout_object, and perform all of the 462 * required cleanup actions. 463 * 464 * In/Out conditions: 465 * The object must be locked, and will be returned locked. 466 */ 467void 468vm_pageout_object_terminate( 469 vm_object_t object) 470{ 471 vm_object_t shadow_object; 472 473 /* 474 * Deal with the deallocation (last reference) of a pageout object 475 * (used for cleaning-in-place) by dropping the paging references/ 476 * freeing pages in the original object. 477 */ 478 479 assert(object->pageout); 480 shadow_object = object->shadow; 481 vm_object_lock(shadow_object); 482 483 while (!queue_empty(&object->memq)) { 484 vm_page_t p, m; 485 vm_object_offset_t offset; 486 487 p = (vm_page_t) queue_first(&object->memq); 488 489 assert(p->private); 490 assert(p->pageout); 491 p->pageout = FALSE; 492 assert(!p->cleaning); 493 assert(!p->laundry); 494 495 offset = p->offset; 496 VM_PAGE_FREE(p); 497 p = VM_PAGE_NULL; 498 499 m = vm_page_lookup(shadow_object, 500 offset + object->vo_shadow_offset); 501 502 if(m == VM_PAGE_NULL) 503 continue; 504 505 assert((m->dirty) || (m->precious) || 506 (m->busy && m->cleaning)); 507 508 /* 509 * Handle the trusted pager throttle. 510 * Also decrement the burst throttle (if external). 511 */ 512 vm_page_lock_queues(); 513 if (m->laundry) 514 vm_pageout_throttle_up(m); 515 516 /* 517 * Handle the "target" page(s). These pages are to be freed if 518 * successfully cleaned. Target pages are always busy, and are 519 * wired exactly once. The initial target pages are not mapped, 520 * (so cannot be referenced or modified) but converted target 521 * pages may have been modified between the selection as an 522 * adjacent page and conversion to a target. 523 */ 524 if (m->pageout) { 525 assert(m->busy); 526 assert(m->wire_count == 1); 527 m->cleaning = FALSE; 528 m->encrypted_cleaning = FALSE; 529 m->pageout = FALSE; 530#if MACH_CLUSTER_STATS 531 if (m->wanted) vm_pageout_target_collisions++; 532#endif 533 /* 534 * Revoke all access to the page. Since the object is 535 * locked, and the page is busy, this prevents the page 536 * from being dirtied after the pmap_disconnect() call 537 * returns. 538 * 539 * Since the page is left "dirty" but "not modifed", we 540 * can detect whether the page was redirtied during 541 * pageout by checking the modify state. 542 */ 543 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) { 544 SET_PAGE_DIRTY(m, FALSE); 545 } else { 546 m->dirty = FALSE; 547 } 548 549 if (m->dirty) { 550 CLUSTER_STAT(vm_pageout_target_page_dirtied++;) 551 vm_page_unwire(m, TRUE); /* reactivates */ 552 VM_STAT_INCR(reactivations); 553 PAGE_WAKEUP_DONE(m); 554 } else { 555 CLUSTER_STAT(vm_pageout_target_page_freed++;) 556 vm_page_free(m);/* clears busy, etc. */ 557 } 558 vm_page_unlock_queues(); 559 continue; 560 } 561 /* 562 * Handle the "adjacent" pages. These pages were cleaned in 563 * place, and should be left alone. 564 * If prep_pin_count is nonzero, then someone is using the 565 * page, so make it active. 566 */ 567 if (!m->active && !m->inactive && !m->throttled && !m->private) { 568 if (m->reference) 569 vm_page_activate(m); 570 else 571 vm_page_deactivate(m); 572 } 573 if (m->overwriting) { 574 /* 575 * the (COPY_OUT_FROM == FALSE) request_page_list case 576 */ 577 if (m->busy) { 578 /* 579 * We do not re-set m->dirty ! 580 * The page was busy so no extraneous activity 581 * could have occurred. COPY_INTO is a read into the 582 * new pages. CLEAN_IN_PLACE does actually write 583 * out the pages but handling outside of this code 584 * will take care of resetting dirty. We clear the 585 * modify however for the Programmed I/O case. 586 */ 587 pmap_clear_modify(m->phys_page); 588 589 m->busy = FALSE; 590 m->absent = FALSE; 591 } else { 592 /* 593 * alternate (COPY_OUT_FROM == FALSE) request_page_list case 594 * Occurs when the original page was wired 595 * at the time of the list request 596 */ 597 assert(VM_PAGE_WIRED(m)); 598 vm_page_unwire(m, TRUE); /* reactivates */ 599 } 600 m->overwriting = FALSE; 601 } else { 602 /* 603 * Set the dirty state according to whether or not the page was 604 * modified during the pageout. Note that we purposefully do 605 * NOT call pmap_clear_modify since the page is still mapped. 606 * If the page were to be dirtied between the 2 calls, this 607 * this fact would be lost. This code is only necessary to 608 * maintain statistics, since the pmap module is always 609 * consulted if m->dirty is false. 610 */ 611#if MACH_CLUSTER_STATS 612 m->dirty = pmap_is_modified(m->phys_page); 613 614 if (m->dirty) vm_pageout_cluster_dirtied++; 615 else vm_pageout_cluster_cleaned++; 616 if (m->wanted) vm_pageout_cluster_collisions++; 617#else 618 m->dirty = FALSE; 619#endif 620 } 621 if (m->encrypted_cleaning == TRUE) { 622 m->encrypted_cleaning = FALSE; 623 m->busy = FALSE; 624 } 625 m->cleaning = FALSE; 626 627 /* 628 * Wakeup any thread waiting for the page to be un-cleaning. 629 */ 630 PAGE_WAKEUP(m); 631 vm_page_unlock_queues(); 632 } 633 /* 634 * Account for the paging reference taken in vm_paging_object_allocate. 635 */ 636 vm_object_activity_end(shadow_object); 637 vm_object_unlock(shadow_object); 638 639 assert(object->ref_count == 0); 640 assert(object->paging_in_progress == 0); 641 assert(object->activity_in_progress == 0); 642 assert(object->resident_page_count == 0); 643 return; 644} 645 646/* 647 * Routine: vm_pageclean_setup 648 * 649 * Purpose: setup a page to be cleaned (made non-dirty), but not 650 * necessarily flushed from the VM page cache. 651 * This is accomplished by cleaning in place. 652 * 653 * The page must not be busy, and new_object 654 * must be locked. 655 * 656 */ 657void 658vm_pageclean_setup( 659 vm_page_t m, 660 vm_page_t new_m, 661 vm_object_t new_object, 662 vm_object_offset_t new_offset) 663{ 664 assert(!m->busy); 665#if 0 666 assert(!m->cleaning); 667#endif 668 669 XPR(XPR_VM_PAGEOUT, 670 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n", 671 m->object, m->offset, m, 672 new_m, new_offset); 673 674 pmap_clear_modify(m->phys_page); 675 676 /* 677 * Mark original page as cleaning in place. 678 */ 679 m->cleaning = TRUE; 680 SET_PAGE_DIRTY(m, FALSE); 681 m->precious = FALSE; 682 683 /* 684 * Convert the fictitious page to a private shadow of 685 * the real page. 686 */ 687 assert(new_m->fictitious); 688 assert(new_m->phys_page == vm_page_fictitious_addr); 689 new_m->fictitious = FALSE; 690 new_m->private = TRUE; 691 new_m->pageout = TRUE; 692 new_m->phys_page = m->phys_page; 693 694 vm_page_lockspin_queues(); 695 vm_page_wire(new_m); 696 vm_page_unlock_queues(); 697 698 vm_page_insert(new_m, new_object, new_offset); 699 assert(!new_m->wanted); 700 new_m->busy = FALSE; 701} 702 703/* 704 * Routine: vm_pageout_initialize_page 705 * Purpose: 706 * Causes the specified page to be initialized in 707 * the appropriate memory object. This routine is used to push 708 * pages into a copy-object when they are modified in the 709 * permanent object. 710 * 711 * The page is moved to a temporary object and paged out. 712 * 713 * In/out conditions: 714 * The page in question must not be on any pageout queues. 715 * The object to which it belongs must be locked. 716 * The page must be busy, but not hold a paging reference. 717 * 718 * Implementation: 719 * Move this page to a completely new object. 720 */ 721void 722vm_pageout_initialize_page( 723 vm_page_t m) 724{ 725 vm_object_t object; 726 vm_object_offset_t paging_offset; 727 memory_object_t pager; 728 729 XPR(XPR_VM_PAGEOUT, 730 "vm_pageout_initialize_page, page 0x%X\n", 731 m, 0, 0, 0, 0); 732 assert(m->busy); 733 734 /* 735 * Verify that we really want to clean this page 736 */ 737 assert(!m->absent); 738 assert(!m->error); 739 assert(m->dirty); 740 741 /* 742 * Create a paging reference to let us play with the object. 743 */ 744 object = m->object; 745 paging_offset = m->offset + object->paging_offset; 746 747 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) { 748 VM_PAGE_FREE(m); 749 panic("reservation without pageout?"); /* alan */ 750 vm_object_unlock(object); 751 752 return; 753 } 754 755 /* 756 * If there's no pager, then we can't clean the page. This should 757 * never happen since this should be a copy object and therefore not 758 * an external object, so the pager should always be there. 759 */ 760 761 pager = object->pager; 762 763 if (pager == MEMORY_OBJECT_NULL) { 764 VM_PAGE_FREE(m); 765 panic("missing pager for copy object"); 766 return; 767 } 768 769 /* 770 * set the page for future call to vm_fault_list_request 771 */ 772 pmap_clear_modify(m->phys_page); 773 SET_PAGE_DIRTY(m, FALSE); 774 m->pageout = TRUE; 775 776 /* 777 * keep the object from collapsing or terminating 778 */ 779 vm_object_paging_begin(object); 780 vm_object_unlock(object); 781 782 /* 783 * Write the data to its pager. 784 * Note that the data is passed by naming the new object, 785 * not a virtual address; the pager interface has been 786 * manipulated to use the "internal memory" data type. 787 * [The object reference from its allocation is donated 788 * to the eventual recipient.] 789 */ 790 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE); 791 792 vm_object_lock(object); 793 vm_object_paging_end(object); 794} 795 796#if MACH_CLUSTER_STATS 797#define MAXCLUSTERPAGES 16 798struct { 799 unsigned long pages_in_cluster; 800 unsigned long pages_at_higher_offsets; 801 unsigned long pages_at_lower_offsets; 802} cluster_stats[MAXCLUSTERPAGES]; 803#endif /* MACH_CLUSTER_STATS */ 804 805 806/* 807 * vm_pageout_cluster: 808 * 809 * Given a page, queue it to the appropriate I/O thread, 810 * which will page it out and attempt to clean adjacent pages 811 * in the same operation. 812 * 813 * The page must be busy, and the object and queues locked. We will take a 814 * paging reference to prevent deallocation or collapse when we 815 * release the object lock back at the call site. The I/O thread 816 * is responsible for consuming this reference 817 * 818 * The page must not be on any pageout queue. 819 */ 820 821void 822vm_pageout_cluster(vm_page_t m, boolean_t pageout) 823{ 824 vm_object_t object = m->object; 825 struct vm_pageout_queue *q; 826 827 828 XPR(XPR_VM_PAGEOUT, 829 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n", 830 object, m->offset, m, 0, 0); 831 832 VM_PAGE_CHECK(m); 833#if DEBUG 834 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 835#endif 836 vm_object_lock_assert_exclusive(object); 837 838 /* 839 * Only a certain kind of page is appreciated here. 840 */ 841 assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m))); 842 assert(!m->cleaning && !m->pageout && !m->laundry); 843#ifndef CONFIG_FREEZE 844 assert(!m->inactive && !m->active); 845 assert(!m->throttled); 846#endif 847 848 /* 849 * protect the object from collapse or termination 850 */ 851 vm_object_activity_begin(object); 852 853 m->pageout = pageout; 854 855 if (object->internal == TRUE) 856 q = &vm_pageout_queue_internal; 857 else 858 q = &vm_pageout_queue_external; 859 860 /* 861 * pgo_laundry count is tied to the laundry bit 862 */ 863 m->laundry = TRUE; 864 q->pgo_laundry++; 865 866 m->pageout_queue = TRUE; 867 queue_enter(&q->pgo_pending, m, vm_page_t, pageq); 868 869 if (q->pgo_idle == TRUE) { 870 q->pgo_idle = FALSE; 871 thread_wakeup((event_t) &q->pgo_pending); 872 } 873 874 VM_PAGE_CHECK(m); 875} 876 877 878unsigned long vm_pageout_throttle_up_count = 0; 879 880/* 881 * A page is back from laundry or we are stealing it back from 882 * the laundering state. See if there are some pages waiting to 883 * go to laundry and if we can let some of them go now. 884 * 885 * Object and page queues must be locked. 886 */ 887void 888vm_pageout_throttle_up( 889 vm_page_t m) 890{ 891 struct vm_pageout_queue *q; 892 893 assert(m->object != VM_OBJECT_NULL); 894 assert(m->object != kernel_object); 895 896#if DEBUG 897 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 898 vm_object_lock_assert_exclusive(m->object); 899#endif 900 901 vm_pageout_throttle_up_count++; 902 903 if (m->object->internal == TRUE) 904 q = &vm_pageout_queue_internal; 905 else 906 q = &vm_pageout_queue_external; 907 908 if (m->pageout_queue == TRUE) { 909 910 queue_remove(&q->pgo_pending, m, vm_page_t, pageq); 911 m->pageout_queue = FALSE; 912 913 m->pageq.next = NULL; 914 m->pageq.prev = NULL; 915 916 vm_object_activity_end(m->object); 917 } 918 if (m->laundry == TRUE) { 919 920 m->laundry = FALSE; 921 q->pgo_laundry--; 922 923 if (q->pgo_throttled == TRUE) { 924 q->pgo_throttled = FALSE; 925 thread_wakeup((event_t) &q->pgo_laundry); 926 } 927 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) { 928 q->pgo_draining = FALSE; 929 thread_wakeup((event_t) (&q->pgo_laundry+1)); 930 } 931 if (vm_pageout_precleaning_delayed == TRUE) { 932 /* 933 * since the pageout scan can return on laundry congestion, wake it up this way 934 * don't depend on pgo_throttled == TRUE to indicate that the pageout scan thread 935 * is blocked on &q->pgo_laundry since the hibernation mechanism utilizes both 936 * pgo_throttled and pgo_draining 937 */ 938 vm_pageout_precleaning_delayed = FALSE; 939 thread_wakeup((event_t)(&vm_page_free_wanted)); 940 } 941 } 942} 943 944 945/* 946 * VM memory pressure monitoring. 947 * 948 * vm_pageout_scan() keeps track of the number of pages it considers and 949 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now]. 950 * 951 * compute_memory_pressure() is called every second from compute_averages() 952 * and moves "vm_pageout_stat_now" forward, to start accumulating the number 953 * of recalimed pages in a new vm_pageout_stat[] bucket. 954 * 955 * mach_vm_pressure_monitor() collects past statistics about memory pressure. 956 * The caller provides the number of seconds ("nsecs") worth of statistics 957 * it wants, up to 30 seconds. 958 * It computes the number of pages reclaimed in the past "nsecs" seconds and 959 * also returns the number of pages the system still needs to reclaim at this 960 * moment in time. 961 */ 962#define VM_PAGEOUT_STAT_SIZE 31 963struct vm_pageout_stat { 964 unsigned int considered; 965 unsigned int reclaimed; 966} vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, }; 967unsigned int vm_pageout_stat_now = 0; 968unsigned int vm_memory_pressure = 0; 969 970#define VM_PAGEOUT_STAT_BEFORE(i) \ 971 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1) 972#define VM_PAGEOUT_STAT_AFTER(i) \ 973 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1) 974 975/* 976 * Called from compute_averages(). 977 */ 978void 979compute_memory_pressure( 980 __unused void *arg) 981{ 982 unsigned int vm_pageout_next; 983 984 vm_memory_pressure = 985 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed; 986 987 commpage_set_memory_pressure( vm_memory_pressure ); 988 989 /* move "now" forward */ 990 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now); 991 vm_pageout_stats[vm_pageout_next].considered = 0; 992 vm_pageout_stats[vm_pageout_next].reclaimed = 0; 993 vm_pageout_stat_now = vm_pageout_next; 994} 995 996 997/* 998 * IMPORTANT 999 * mach_vm_ctl_page_free_wanted() is called indirectly, via 1000 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore, 1001 * it must be safe in the restricted stackshot context. Locks and/or 1002 * blocking are not allowable. 1003 */ 1004unsigned int 1005mach_vm_ctl_page_free_wanted(void) 1006{ 1007 unsigned int page_free_target, page_free_count, page_free_wanted; 1008 1009 page_free_target = vm_page_free_target; 1010 page_free_count = vm_page_free_count; 1011 if (page_free_target > page_free_count) { 1012 page_free_wanted = page_free_target - page_free_count; 1013 } else { 1014 page_free_wanted = 0; 1015 } 1016 1017 return page_free_wanted; 1018} 1019 1020 1021/* 1022 * IMPORTANT: 1023 * mach_vm_pressure_monitor() is called when taking a stackshot, with 1024 * wait_for_pressure FALSE, so that code path must remain safe in the 1025 * restricted stackshot context. No blocking or locks are allowable. 1026 * on that code path. 1027 */ 1028 1029kern_return_t 1030mach_vm_pressure_monitor( 1031 boolean_t wait_for_pressure, 1032 unsigned int nsecs_monitored, 1033 unsigned int *pages_reclaimed_p, 1034 unsigned int *pages_wanted_p) 1035{ 1036 wait_result_t wr; 1037 unsigned int vm_pageout_then, vm_pageout_now; 1038 unsigned int pages_reclaimed; 1039 1040 /* 1041 * We don't take the vm_page_queue_lock here because we don't want 1042 * vm_pressure_monitor() to get in the way of the vm_pageout_scan() 1043 * thread when it's trying to reclaim memory. We don't need fully 1044 * accurate monitoring anyway... 1045 */ 1046 1047 if (wait_for_pressure) { 1048 /* wait until there's memory pressure */ 1049 while (vm_page_free_count >= vm_page_free_target) { 1050 wr = assert_wait((event_t) &vm_page_free_wanted, 1051 THREAD_INTERRUPTIBLE); 1052 if (wr == THREAD_WAITING) { 1053 wr = thread_block(THREAD_CONTINUE_NULL); 1054 } 1055 if (wr == THREAD_INTERRUPTED) { 1056 return KERN_ABORTED; 1057 } 1058 if (wr == THREAD_AWAKENED) { 1059 /* 1060 * The memory pressure might have already 1061 * been relieved but let's not block again 1062 * and let's report that there was memory 1063 * pressure at some point. 1064 */ 1065 break; 1066 } 1067 } 1068 } 1069 1070 /* provide the number of pages the system wants to reclaim */ 1071 if (pages_wanted_p != NULL) { 1072 *pages_wanted_p = mach_vm_ctl_page_free_wanted(); 1073 } 1074 1075 if (pages_reclaimed_p == NULL) { 1076 return KERN_SUCCESS; 1077 } 1078 1079 /* provide number of pages reclaimed in the last "nsecs_monitored" */ 1080 do { 1081 vm_pageout_now = vm_pageout_stat_now; 1082 pages_reclaimed = 0; 1083 for (vm_pageout_then = 1084 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now); 1085 vm_pageout_then != vm_pageout_now && 1086 nsecs_monitored-- != 0; 1087 vm_pageout_then = 1088 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) { 1089 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed; 1090 } 1091 } while (vm_pageout_now != vm_pageout_stat_now); 1092 *pages_reclaimed_p = pages_reclaimed; 1093 1094 return KERN_SUCCESS; 1095} 1096 1097 1098 1099/* 1100 * function in BSD to apply I/O throttle to the pageout thread 1101 */ 1102extern void vm_pageout_io_throttle(void); 1103 1104 1105/* 1106 * Page States: Used below to maintain the page state 1107 * before it's removed from it's Q. This saved state 1108 * helps us do the right accounting in certain cases 1109 */ 1110#define PAGE_STATE_SPECULATIVE 1 1111#define PAGE_STATE_ANONYMOUS 2 1112#define PAGE_STATE_INACTIVE 3 1113#define PAGE_STATE_INACTIVE_FIRST 4 1114#define PAGE_STATE_CLEAN 5 1115 1116#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \ 1117 MACRO_BEGIN \ 1118 /* \ 1119 * If a "reusable" page somehow made it back into \ 1120 * the active queue, it's been re-used and is not \ 1121 * quite re-usable. \ 1122 * If the VM object was "all_reusable", consider it \ 1123 * as "all re-used" instead of converting it to \ 1124 * "partially re-used", which could be expensive. \ 1125 */ \ 1126 if ((m)->reusable || \ 1127 (m)->object->all_reusable) { \ 1128 vm_object_reuse_pages((m)->object, \ 1129 (m)->offset, \ 1130 (m)->offset + PAGE_SIZE_64, \ 1131 FALSE); \ 1132 } \ 1133 MACRO_END 1134 1135 1136#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 128 1137#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024 1138 1139#define FCS_IDLE 0 1140#define FCS_DELAYED 1 1141#define FCS_DEADLOCK_DETECTED 2 1142 1143struct flow_control { 1144 int state; 1145 mach_timespec_t ts; 1146}; 1147 1148uint32_t vm_pageout_considered_page = 0; 1149 1150 1151/* 1152 * vm_pageout_scan does the dirty work for the pageout daemon. 1153 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock 1154 * held and vm_page_free_wanted == 0. 1155 */ 1156void 1157vm_pageout_scan(void) 1158{ 1159 unsigned int loop_count = 0; 1160 unsigned int inactive_burst_count = 0; 1161 unsigned int active_burst_count = 0; 1162 unsigned int reactivated_this_call; 1163 unsigned int reactivate_limit; 1164 vm_page_t local_freeq = NULL; 1165 int local_freed = 0; 1166 int delayed_unlock; 1167 int delayed_unlock_limit = 0; 1168 int refmod_state = 0; 1169 int vm_pageout_deadlock_target = 0; 1170 struct vm_pageout_queue *iq; 1171 struct vm_pageout_queue *eq; 1172 struct vm_speculative_age_q *sq; 1173 struct flow_control flow_control = { 0, { 0, 0 } }; 1174 boolean_t inactive_throttled = FALSE; 1175 boolean_t try_failed; 1176 mach_timespec_t ts; 1177 unsigned int msecs = 0; 1178 vm_object_t object; 1179 vm_object_t last_object_tried; 1180 uint32_t catch_up_count = 0; 1181 uint32_t inactive_reclaim_run; 1182 boolean_t forced_reclaim; 1183 boolean_t exceeded_burst_throttle; 1184 boolean_t grab_anonymous = FALSE; 1185 int page_prev_state = 0; 1186 int cache_evict_throttle = 0; 1187 uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0; 1188 1189 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START, 1190 vm_pageout_speculative_clean, vm_pageout_inactive_clean, 1191 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); 1192 1193 flow_control.state = FCS_IDLE; 1194 iq = &vm_pageout_queue_internal; 1195 eq = &vm_pageout_queue_external; 1196 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; 1197 1198 1199 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0); 1200 1201 1202 vm_page_lock_queues(); 1203 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */ 1204 1205 /* 1206 * Calculate the max number of referenced pages on the inactive 1207 * queue that we will reactivate. 1208 */ 1209 reactivated_this_call = 0; 1210 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count + 1211 vm_page_inactive_count); 1212 inactive_reclaim_run = 0; 1213 1214 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count; 1215 1216 /* 1217 * We want to gradually dribble pages from the active queue 1218 * to the inactive queue. If we let the inactive queue get 1219 * very small, and then suddenly dump many pages into it, 1220 * those pages won't get a sufficient chance to be referenced 1221 * before we start taking them from the inactive queue. 1222 * 1223 * We must limit the rate at which we send pages to the pagers 1224 * so that we don't tie up too many pages in the I/O queues. 1225 * We implement a throttling mechanism using the laundry count 1226 * to limit the number of pages outstanding to the default 1227 * and external pagers. We can bypass the throttles and look 1228 * for clean pages if the pageout queues don't drain in a timely 1229 * fashion since this may indicate that the pageout paths are 1230 * stalled waiting for memory, which only we can provide. 1231 */ 1232 1233 1234Restart: 1235 assert(delayed_unlock!=0); 1236 1237 /* 1238 * Recalculate vm_page_inactivate_target. 1239 */ 1240 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + 1241 vm_page_inactive_count + 1242 vm_page_speculative_count); 1243 1244 vm_page_anonymous_min = vm_page_inactive_target / 3; 1245 1246 /* 1247 * don't want to wake the pageout_scan thread up everytime we fall below 1248 * the targets... set a low water mark at 0.25% below the target 1249 */ 1250 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400); 1251 1252 if (vm_page_speculative_percentage > 50) 1253 vm_page_speculative_percentage = 50; 1254 else if (vm_page_speculative_percentage <= 0) 1255 vm_page_speculative_percentage = 1; 1256 1257 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count + 1258 vm_page_inactive_count); 1259 1260 object = NULL; 1261 last_object_tried = NULL; 1262 try_failed = FALSE; 1263 1264 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count)) 1265 catch_up_count = vm_page_inactive_count + vm_page_speculative_count; 1266 else 1267 catch_up_count = 0; 1268 1269 for (;;) { 1270 vm_page_t m; 1271 1272 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL); 1273 1274 if (delayed_unlock == 0) { 1275 vm_page_lock_queues(); 1276 delayed_unlock = 1; 1277 } 1278 if (vm_upl_wait_for_pages < 0) 1279 vm_upl_wait_for_pages = 0; 1280 1281 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages; 1282 1283 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX) 1284 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX; 1285 1286 /* 1287 * Move pages from active to inactive if we're below the target 1288 */ 1289 /* if we are trying to make clean, we need to make sure we actually have inactive - mj */ 1290 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target) 1291 goto done_moving_active_pages; 1292 1293 if (object != NULL) { 1294 vm_object_unlock(object); 1295 object = NULL; 1296 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1297 } 1298 /* 1299 * Don't sweep through active queue more than the throttle 1300 * which should be kept relatively low 1301 */ 1302 active_burst_count = MIN(vm_pageout_burst_active_throttle, 1303 vm_page_active_count); 1304 1305 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START, 1306 vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed); 1307 1308 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE, 1309 vm_pageout_speculative_clean, vm_pageout_inactive_clean, 1310 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); 1311 1312 while (!queue_empty(&vm_page_queue_active) && active_burst_count--) { 1313 1314 vm_pageout_active++; 1315 1316 m = (vm_page_t) queue_first(&vm_page_queue_active); 1317 1318 assert(m->active && !m->inactive); 1319 assert(!m->laundry); 1320 assert(m->object != kernel_object); 1321 assert(m->phys_page != vm_page_guard_addr); 1322 1323 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); 1324 1325 /* 1326 * The page might be absent or busy, 1327 * but vm_page_deactivate can handle that. 1328 */ 1329 vm_page_deactivate(m); 1330 1331 if (delayed_unlock++ > delayed_unlock_limit) { 1332 1333 if (local_freeq) { 1334 vm_page_unlock_queues(); 1335 1336 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1337 vm_page_free_count, local_freed, delayed_unlock_limit, 1); 1338 1339 vm_page_free_list(local_freeq, TRUE); 1340 1341 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1342 vm_page_free_count, 0, 0, 1); 1343 1344 local_freeq = NULL; 1345 local_freed = 0; 1346 vm_page_lock_queues(); 1347 } else 1348 lck_mtx_yield(&vm_page_queue_lock); 1349 1350 delayed_unlock = 1; 1351 1352 /* 1353 * continue the while loop processing 1354 * the active queue... need to hold 1355 * the page queues lock 1356 */ 1357 } 1358 } 1359 1360 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END, 1361 vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target); 1362 1363 1364 /********************************************************************** 1365 * above this point we're playing with the active queue 1366 * below this point we're playing with the throttling mechanisms 1367 * and the inactive queue 1368 **********************************************************************/ 1369 1370done_moving_active_pages: 1371 1372 if (vm_page_cleaned_count < VM_PAGE_CLEANED_MIN && vm_page_anonymous_count > vm_page_anonymous_min) 1373 vm_pageout_need_to_refill_clean_queue = TRUE; 1374 1375 if (vm_page_free_count + local_freed >= vm_page_free_target) { 1376 if (object != NULL) { 1377 vm_object_unlock(object); 1378 object = NULL; 1379 } 1380 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1381 1382 if (local_freeq) { 1383 vm_page_unlock_queues(); 1384 1385 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1386 vm_page_free_count, local_freed, delayed_unlock_limit, 2); 1387 1388 vm_page_free_list(local_freeq, TRUE); 1389 1390 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1391 vm_page_free_count, local_freed, 0, 2); 1392 1393 local_freeq = NULL; 1394 local_freed = 0; 1395 vm_page_lock_queues(); 1396 } 1397 /* 1398 * make sure the pageout I/O threads are running 1399 * throttled in case there are still requests 1400 * in the laundry... since we have met our targets 1401 * we don't need the laundry to be cleaned in a timely 1402 * fashion... so let's avoid interfering with foreground 1403 * activity 1404 */ 1405 vm_pageout_adjust_io_throttles(iq, eq, TRUE); 1406 1407 /* 1408 * recalculate vm_page_inactivate_target 1409 */ 1410 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + 1411 vm_page_inactive_count + 1412 vm_page_speculative_count); 1413#ifndef CONFIG_EMBEDDED 1414 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && 1415 !queue_empty(&vm_page_queue_active)) { 1416 /* 1417 * inactive target still not met... keep going 1418 * until we get the queues balanced... 1419 */ 1420 continue; 1421 } 1422#endif 1423 lck_mtx_lock(&vm_page_queue_free_lock); 1424 1425 if ((vm_page_free_count >= vm_page_free_target) && 1426 (vm_page_cleaned_count >= VM_PAGE_CLEANED_TARGET || vm_pageout_need_to_refill_clean_queue == FALSE) && 1427 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { 1428 /* 1429 * done - we have met our target *and* 1430 * there is no one waiting for a page. 1431 */ 1432 vm_pageout_need_to_refill_clean_queue = FALSE; 1433return_from_scan: 1434 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); 1435 1436 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE, 1437 vm_pageout_inactive, vm_pageout_inactive_used, vm_pageout_need_to_refill_clean_queue, 0); 1438 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END, 1439 vm_pageout_speculative_clean, vm_pageout_inactive_clean, 1440 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); 1441 1442 return; 1443 } 1444 lck_mtx_unlock(&vm_page_queue_free_lock); 1445 } 1446 1447 /* 1448 * Before anything, we check if we have any ripe volatile 1449 * objects around. If so, try to purge the first object. 1450 * If the purge fails, fall through to reclaim a page instead. 1451 * If the purge succeeds, go back to the top and reevalute 1452 * the new memory situation. 1453 */ 1454 assert (available_for_purge>=0); 1455 if (available_for_purge) 1456 { 1457 if (object != NULL) { 1458 vm_object_unlock(object); 1459 object = NULL; 1460 } 1461 1462 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0); 1463 1464 if (TRUE == vm_purgeable_object_purge_one()) { 1465 1466 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0); 1467 1468 continue; 1469 } 1470 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1); 1471 } 1472 if (queue_empty(&sq->age_q) && vm_page_speculative_count) { 1473 /* 1474 * try to pull pages from the aging bins... 1475 * see vm_page.h for an explanation of how 1476 * this mechanism works 1477 */ 1478 struct vm_speculative_age_q *aq; 1479 mach_timespec_t ts_fully_aged; 1480 boolean_t can_steal = FALSE; 1481 int num_scanned_queues; 1482 1483 aq = &vm_page_queue_speculative[speculative_steal_index]; 1484 1485 num_scanned_queues = 0; 1486 while (queue_empty(&aq->age_q) && 1487 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) { 1488 1489 speculative_steal_index++; 1490 1491 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) 1492 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q; 1493 1494 aq = &vm_page_queue_speculative[speculative_steal_index]; 1495 } 1496 1497 if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) { 1498 /* 1499 * XXX We've scanned all the speculative 1500 * queues but still haven't found one 1501 * that is not empty, even though 1502 * vm_page_speculative_count is not 0. 1503 * 1504 * report the anomaly... 1505 */ 1506 printf("vm_pageout_scan: " 1507 "all speculative queues empty " 1508 "but count=%d. Re-adjusting.\n", 1509 vm_page_speculative_count); 1510 if (vm_page_speculative_count > vm_page_speculative_count_drift_max) 1511 vm_page_speculative_count_drift_max = vm_page_speculative_count; 1512 vm_page_speculative_count_drifts++; 1513#if 6553678 1514 Debugger("vm_pageout_scan: no speculative pages"); 1515#endif 1516 /* readjust... */ 1517 vm_page_speculative_count = 0; 1518 /* ... and continue */ 1519 continue; 1520 } 1521 1522 if (vm_page_speculative_count > vm_page_speculative_target) 1523 can_steal = TRUE; 1524 else { 1525 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000; 1526 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000) 1527 * 1000 * NSEC_PER_USEC; 1528 1529 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts); 1530 1531 clock_sec_t sec; 1532 clock_nsec_t nsec; 1533 clock_get_system_nanotime(&sec, &nsec); 1534 ts.tv_sec = (unsigned int) sec; 1535 ts.tv_nsec = nsec; 1536 1537 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) 1538 can_steal = TRUE; 1539 } 1540 if (can_steal == TRUE) 1541 vm_page_speculate_ageit(aq); 1542 } 1543 if (queue_empty(&sq->age_q) && cache_evict_throttle == 0) { 1544 int pages_evicted; 1545 1546 if (object != NULL) { 1547 vm_object_unlock(object); 1548 object = NULL; 1549 } 1550 pages_evicted = vm_object_cache_evict(100, 10); 1551 1552 if (pages_evicted) { 1553 1554 vm_pageout_cache_evicted += pages_evicted; 1555 1556 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE, 1557 vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0); 1558 1559 /* 1560 * we just freed up to 100 pages, 1561 * so go back to the top of the main loop 1562 * and re-evaulate the memory situation 1563 */ 1564 continue; 1565 } else 1566 cache_evict_throttle = 100; 1567 } 1568 if (cache_evict_throttle) 1569 cache_evict_throttle--; 1570 1571 1572 exceeded_burst_throttle = FALSE; 1573 /* 1574 * Sometimes we have to pause: 1575 * 1) No inactive pages - nothing to do. 1576 * 2) Loop control - no acceptable pages found on the inactive queue 1577 * within the last vm_pageout_burst_inactive_throttle iterations 1578 * 3) Flow control - default pageout queue is full 1579 */ 1580 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_anonymous) && queue_empty(&sq->age_q)) { 1581 vm_pageout_scan_empty_throttle++; 1582 msecs = vm_pageout_empty_wait; 1583 goto vm_pageout_scan_delay; 1584 1585 } else if (inactive_burst_count >= 1586 MIN(vm_pageout_burst_inactive_throttle, 1587 (vm_page_inactive_count + 1588 vm_page_speculative_count))) { 1589 vm_pageout_scan_burst_throttle++; 1590 msecs = vm_pageout_burst_wait; 1591 1592 exceeded_burst_throttle = TRUE; 1593 goto vm_pageout_scan_delay; 1594 1595 } else if (VM_PAGE_Q_THROTTLED(iq) && 1596 VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { 1597 clock_sec_t sec; 1598 clock_nsec_t nsec; 1599 1600 switch (flow_control.state) { 1601 1602 case FCS_IDLE: 1603 if ((vm_page_free_count + local_freed) < vm_page_free_target) { 1604 if (vm_page_inactive_count - vm_page_anonymous_count > 0) { 1605 grab_anonymous = FALSE; 1606 goto consider_inactive; 1607 } 1608 if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) 1609 continue; 1610 } 1611reset_deadlock_timer: 1612 ts.tv_sec = vm_pageout_deadlock_wait / 1000; 1613 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC; 1614 clock_get_system_nanotime(&sec, &nsec); 1615 flow_control.ts.tv_sec = (unsigned int) sec; 1616 flow_control.ts.tv_nsec = nsec; 1617 ADD_MACH_TIMESPEC(&flow_control.ts, &ts); 1618 1619 flow_control.state = FCS_DELAYED; 1620 msecs = vm_pageout_deadlock_wait; 1621 1622 break; 1623 1624 case FCS_DELAYED: 1625 clock_get_system_nanotime(&sec, &nsec); 1626 ts.tv_sec = (unsigned int) sec; 1627 ts.tv_nsec = nsec; 1628 1629 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) { 1630 /* 1631 * the pageout thread for the default pager is potentially 1632 * deadlocked since the 1633 * default pager queue has been throttled for more than the 1634 * allowable time... we need to move some clean pages or dirty 1635 * pages belonging to the external pagers if they aren't throttled 1636 * vm_page_free_wanted represents the number of threads currently 1637 * blocked waiting for pages... we'll move one page for each of 1638 * these plus a fixed amount to break the logjam... once we're done 1639 * moving this number of pages, we'll re-enter the FSC_DELAYED state 1640 * with a new timeout target since we have no way of knowing 1641 * whether we've broken the deadlock except through observation 1642 * of the queue associated with the default pager... we need to 1643 * stop moving pages and allow the system to run to see what 1644 * state it settles into. 1645 */ 1646 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged; 1647 vm_pageout_scan_deadlock_detected++; 1648 flow_control.state = FCS_DEADLOCK_DETECTED; 1649 thread_wakeup((event_t) &vm_pageout_garbage_collect); 1650 goto consider_inactive; 1651 } 1652 /* 1653 * just resniff instead of trying 1654 * to compute a new delay time... we're going to be 1655 * awakened immediately upon a laundry completion, 1656 * so we won't wait any longer than necessary 1657 */ 1658 msecs = vm_pageout_idle_wait; 1659 break; 1660 1661 case FCS_DEADLOCK_DETECTED: 1662 if (vm_pageout_deadlock_target) 1663 goto consider_inactive; 1664 goto reset_deadlock_timer; 1665 1666 } 1667vm_pageout_scan_delay: 1668 if (object != NULL) { 1669 vm_object_unlock(object); 1670 object = NULL; 1671 } 1672 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1673 1674 if (local_freeq) { 1675 vm_page_unlock_queues(); 1676 1677 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1678 vm_page_free_count, local_freed, delayed_unlock_limit, 3); 1679 1680 vm_page_free_list(local_freeq, TRUE); 1681 1682 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1683 vm_page_free_count, local_freed, 0, 3); 1684 1685 local_freeq = NULL; 1686 local_freed = 0; 1687 vm_page_lock_queues(); 1688 1689 if (flow_control.state == FCS_DELAYED && 1690 !VM_PAGE_Q_THROTTLED(iq)) { 1691 flow_control.state = FCS_IDLE; 1692 goto consider_inactive; 1693 } 1694 } 1695 1696 if (vm_page_free_count >= vm_page_free_target) { 1697 /* 1698 * we're here because either 1699 * 1) someone else freed up some pages while we had 1700 * the queues unlocked above or 1701 * 2) we're precleaning and we haven't yet met 1702 * our cleaned target 1703 * and we've hit one of the 3 conditions that 1704 * cause us to pause the pageout scan thread 1705 * 1706 * since we already have enough free pages, 1707 * let's avoid stalling and return normally 1708 * 1709 * before we return, make sure the pageout I/O threads 1710 * are running throttled in case there are still requests 1711 * in the laundry... since we have enough free pages 1712 * we don't need the laundry to be cleaned in a timely 1713 * fashion... so let's avoid interfering with foreground 1714 * activity 1715 * 1716 * we don't want to hold vm_page_queue_free_lock when 1717 * calling vm_pageout_adjust_io_throttles (since it 1718 * may cause other locks to be taken), we do the intitial 1719 * check outside of the lock. Once we take the lock, 1720 * we recheck the condition since it may have changed. 1721 * if it has, no problem, we will make the threads 1722 * non-throttled before actually blocking 1723 */ 1724 vm_pageout_adjust_io_throttles(iq, eq, TRUE); 1725 } 1726 lck_mtx_lock(&vm_page_queue_free_lock); 1727 1728 if (vm_page_free_count >= vm_page_free_target) { 1729 if (vm_page_cleaned_count < VM_PAGE_CLEANED_TARGET) { 1730 vm_precleaning_aborted++; 1731 vm_pageout_precleaning_delayed = TRUE; 1732 } 1733 goto return_from_scan; 1734 } 1735 lck_mtx_unlock(&vm_page_queue_free_lock); 1736 1737 if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) { 1738 /* 1739 * we're most likely about to block due to one of 1740 * the 3 conditions that cause vm_pageout_scan to 1741 * not be able to make forward progress w/r 1742 * to providing new pages to the free queue, 1743 * so unthrottle the I/O threads in case we 1744 * have laundry to be cleaned... it needs 1745 * to be completed ASAP. 1746 * 1747 * even if we don't block, we want the io threads 1748 * running unthrottled since the sum of free + 1749 * clean pages is still under our free target 1750 */ 1751 vm_pageout_adjust_io_throttles(iq, eq, FALSE); 1752 } 1753 if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) { 1754 /* 1755 * if we get here we're below our free target and 1756 * we're stalling due to a full laundry queue or 1757 * we don't have any inactive pages other then 1758 * those in the clean queue... 1759 * however, we have pages on the clean queue that 1760 * can be moved to the free queue, so let's not 1761 * stall the pageout scan 1762 */ 1763 flow_control.state = FCS_IDLE; 1764 goto consider_inactive; 1765 } 1766 VM_CHECK_MEMORYSTATUS; 1767 1768 if (flow_control.state != FCS_IDLE) 1769 vm_pageout_scan_throttle++; 1770 iq->pgo_throttled = TRUE; 1771 1772 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC); 1773 counter(c_vm_pageout_scan_block++); 1774 1775 vm_page_unlock_queues(); 1776 1777 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); 1778 1779 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START, 1780 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0); 1781 1782 thread_block(THREAD_CONTINUE_NULL); 1783 1784 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END, 1785 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0); 1786 1787 vm_page_lock_queues(); 1788 delayed_unlock = 1; 1789 1790 iq->pgo_throttled = FALSE; 1791 1792 if (loop_count >= vm_page_inactive_count) 1793 loop_count = 0; 1794 inactive_burst_count = 0; 1795 1796 goto Restart; 1797 /*NOTREACHED*/ 1798 } 1799 1800 1801 flow_control.state = FCS_IDLE; 1802consider_inactive: 1803 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count), 1804 vm_pageout_inactive_external_forced_reactivate_limit); 1805 loop_count++; 1806 inactive_burst_count++; 1807 vm_pageout_inactive++; 1808 1809 boolean_t pageout_making_free = ((vm_page_free_count + local_freed) < vm_page_free_target); /* TRUE if making free, FALSE if making clean */ 1810 1811 /* 1812 * Choose a victim. 1813 */ 1814 while (1) { 1815 m = NULL; 1816 1817 if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { 1818 assert(vm_page_throttled_count == 0); 1819 assert(queue_empty(&vm_page_queue_throttled)); 1820 } 1821 1822 /* 1823 * If we are still below the free target, try speculative 1824 * and clean queue pages. 1825 */ 1826 if (pageout_making_free) { 1827 /* 1828 * The most eligible pages are ones we paged in speculatively, 1829 * but which have not yet been touched. 1830 */ 1831 if ( !queue_empty(&sq->age_q) ) { 1832 m = (vm_page_t) queue_first(&sq->age_q); 1833 1834 page_prev_state = PAGE_STATE_SPECULATIVE; 1835 1836 break; 1837 } 1838 1839 /* 1840 * Try a clean-queue inactive page, if we are still trying to fill the free list. 1841 */ 1842 if ( !queue_empty(&vm_page_queue_cleaned) ) { 1843 m = (vm_page_t) queue_first(&vm_page_queue_cleaned); 1844 1845 page_prev_state = PAGE_STATE_CLEAN; 1846 1847 break; 1848 } 1849 1850 if (grab_anonymous == FALSE || queue_empty(&vm_page_queue_anonymous)) { 1851 1852 if ( !queue_empty(&vm_page_queue_inactive) ) { 1853 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 1854 1855 page_prev_state = PAGE_STATE_INACTIVE; 1856 if (vm_pageout_need_to_refill_clean_queue == TRUE) 1857 grab_anonymous = TRUE; 1858 break; 1859 } 1860 } 1861 } 1862 if (vm_pageout_need_to_refill_clean_queue == TRUE) { 1863 if ( !queue_empty(&vm_page_queue_anonymous) ) { 1864 m = (vm_page_t) queue_first(&vm_page_queue_anonymous); 1865 1866 page_prev_state = PAGE_STATE_ANONYMOUS; 1867 grab_anonymous = FALSE; 1868 break; 1869 } 1870 } 1871 1872 /* 1873 * if we've gotten here, we have no victim page. 1874 * if making clean, free the local freed list and return. 1875 * if making free, check to see if we've finished balancing the queues 1876 * yet, if we haven't just continue, else panic 1877 */ 1878 vm_page_unlock_queues(); 1879 1880 if (object != NULL) { 1881 vm_object_unlock(object); 1882 object = NULL; 1883 } 1884 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1885 1886 if (local_freeq) { 1887 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1888 vm_page_free_count, local_freed, delayed_unlock_limit, 5); 1889 1890 vm_page_free_list(local_freeq, TRUE); 1891 1892 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1893 vm_page_free_count, local_freed, 0, 5); 1894 1895 local_freeq = NULL; 1896 local_freed = 0; 1897 } 1898 vm_page_lock_queues(); 1899 delayed_unlock = 1; 1900 1901 if (pageout_making_free == FALSE) { 1902 if (vm_pageout_need_to_refill_clean_queue == TRUE) 1903 DTRACE_VM(novictimforclean); 1904 1905 lck_mtx_lock(&vm_page_queue_free_lock); 1906 goto return_from_scan; 1907 1908 } 1909 if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) 1910 goto Restart; 1911 1912 panic("vm_pageout: no victim"); 1913 1914 /* NOTREACHED */ 1915 } 1916 1917 /* 1918 * we just found this page on one of our queues... 1919 * it can't also be on the pageout queue, so safe 1920 * to call VM_PAGE_QUEUES_REMOVE 1921 */ 1922 assert(!m->pageout_queue); 1923 1924 VM_PAGE_QUEUES_REMOVE(m); 1925 1926 assert(!m->laundry); 1927 assert(!m->private); 1928 assert(!m->fictitious); 1929 assert(m->object != kernel_object); 1930 assert(m->phys_page != vm_page_guard_addr); 1931 1932 1933 if (page_prev_state != PAGE_STATE_SPECULATIVE) 1934 vm_pageout_stats[vm_pageout_stat_now].considered++; 1935 1936 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); 1937 1938 /* 1939 * check to see if we currently are working 1940 * with the same object... if so, we've 1941 * already got the lock 1942 */ 1943 if (m->object != object) { 1944 /* 1945 * the object associated with candidate page is 1946 * different from the one we were just working 1947 * with... dump the lock if we still own it 1948 */ 1949 if (object != NULL) { 1950 vm_object_unlock(object); 1951 object = NULL; 1952 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1953 } 1954 /* 1955 * Try to lock object; since we've alread got the 1956 * page queues lock, we can only 'try' for this one. 1957 * if the 'try' fails, we need to do a mutex_pause 1958 * to allow the owner of the object lock a chance to 1959 * run... otherwise, we're likely to trip over this 1960 * object in the same state as we work our way through 1961 * the queue... clumps of pages associated with the same 1962 * object are fairly typical on the inactive and active queues 1963 */ 1964 if (!vm_object_lock_try_scan(m->object)) { 1965 vm_page_t m_want = NULL; 1966 1967 vm_pageout_inactive_nolock++; 1968 1969 if (page_prev_state == PAGE_STATE_CLEAN) 1970 vm_pageout_cleaned_nolock++; 1971 1972 if (page_prev_state == PAGE_STATE_SPECULATIVE) 1973 page_prev_state = PAGE_STATE_INACTIVE_FIRST; 1974 1975 pmap_clear_reference(m->phys_page); 1976 m->reference = FALSE; 1977 1978 /* 1979 * m->object must be stable since we hold the page queues lock... 1980 * we can update the scan_collisions field sans the object lock 1981 * since it is a separate field and this is the only spot that does 1982 * a read-modify-write operation and it is never executed concurrently... 1983 * we can asynchronously set this field to 0 when creating a UPL, so it 1984 * is possible for the value to be a bit non-determistic, but that's ok 1985 * since it's only used as a hint 1986 */ 1987 m->object->scan_collisions++; 1988 1989 if (pageout_making_free) { 1990 if ( !queue_empty(&sq->age_q) ) 1991 m_want = (vm_page_t) queue_first(&sq->age_q); 1992 else if (!queue_empty(&vm_page_queue_cleaned)) 1993 m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned); 1994 else if (grab_anonymous == FALSE || queue_empty(&vm_page_queue_anonymous)) 1995 m_want = (vm_page_t) queue_first(&vm_page_queue_inactive); 1996 } 1997 if (m_want == NULL && vm_pageout_need_to_refill_clean_queue == TRUE) { 1998 if ( !queue_empty(&vm_page_queue_anonymous) ) 1999 m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous); 2000 } 2001 /* 2002 * this is the next object we're going to be interested in 2003 * try to make sure its available after the mutex_yield 2004 * returns control 2005 */ 2006 if (m_want) 2007 vm_pageout_scan_wants_object = m_want->object; 2008 2009 /* 2010 * force us to dump any collected free pages 2011 * and to pause before moving on 2012 */ 2013 try_failed = TRUE; 2014 2015 goto requeue_page; 2016 } 2017 object = m->object; 2018 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 2019 2020 try_failed = FALSE; 2021 } 2022 if (catch_up_count) 2023 catch_up_count--; 2024 2025 if (m->busy) { 2026 if (m->encrypted_cleaning) { 2027 /* 2028 * ENCRYPTED SWAP: 2029 * if this page has already been picked up as 2030 * part of a page-out cluster, it will be busy 2031 * because it is being encrypted (see 2032 * vm_object_upl_request()). But we still 2033 * want to demote it from "clean-in-place" 2034 * (aka "adjacent") to "clean-and-free" (aka 2035 * "target"), so let's ignore its "busy" bit 2036 * here and proceed to check for "cleaning" a 2037 * little bit below... 2038 * 2039 * CAUTION CAUTION: 2040 * A "busy" page should still be left alone for 2041 * most purposes, so we have to be very careful 2042 * not to process that page too much. 2043 */ 2044 assert(m->cleaning); 2045 goto consider_inactive_page; 2046 } 2047 2048 /* 2049 * Somebody is already playing with this page. 2050 * Put it back on the appropriate queue 2051 * 2052 */ 2053 vm_pageout_inactive_busy++; 2054 2055 if (page_prev_state == PAGE_STATE_CLEAN) 2056 vm_pageout_cleaned_busy++; 2057 2058requeue_page: 2059 switch (page_prev_state) { 2060 2061 case PAGE_STATE_SPECULATIVE: 2062 vm_page_speculate(m, FALSE); 2063 break; 2064 2065 case PAGE_STATE_ANONYMOUS: 2066 case PAGE_STATE_CLEAN: 2067 case PAGE_STATE_INACTIVE: 2068 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE); 2069 break; 2070 2071 case PAGE_STATE_INACTIVE_FIRST: 2072 VM_PAGE_ENQUEUE_INACTIVE(m, TRUE); 2073 break; 2074 } 2075 goto done_with_inactivepage; 2076 } 2077 2078 2079 /* 2080 * If it's absent, in error or the object is no longer alive, 2081 * we can reclaim the page... in the no longer alive case, 2082 * there are 2 states the page can be in that preclude us 2083 * from reclaiming it - busy or cleaning - that we've already 2084 * dealt with 2085 */ 2086 if (m->absent || m->error || !object->alive) { 2087 2088 if (m->absent) 2089 vm_pageout_inactive_absent++; 2090 else if (!object->alive) 2091 vm_pageout_inactive_notalive++; 2092 else 2093 vm_pageout_inactive_error++; 2094reclaim_page: 2095 if (vm_pageout_deadlock_target) { 2096 vm_pageout_scan_inactive_throttle_success++; 2097 vm_pageout_deadlock_target--; 2098 } 2099 2100 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL); 2101 2102 if (object->internal) { 2103 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL); 2104 } else { 2105 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL); 2106 } 2107 assert(!m->cleaning); 2108 assert(!m->laundry); 2109 2110 m->busy = TRUE; 2111 2112 /* 2113 * remove page from object here since we're already 2114 * behind the object lock... defer the rest of the work 2115 * we'd normally do in vm_page_free_prepare_object 2116 * until 'vm_page_free_list' is called 2117 */ 2118 if (m->tabled) 2119 vm_page_remove(m, TRUE); 2120 2121 assert(m->pageq.next == NULL && 2122 m->pageq.prev == NULL); 2123 m->pageq.next = (queue_entry_t)local_freeq; 2124 local_freeq = m; 2125 local_freed++; 2126 2127 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2128 vm_pageout_freed_from_speculative++; 2129 else if (page_prev_state == PAGE_STATE_CLEAN) 2130 vm_pageout_freed_from_cleaned++; 2131 else 2132 vm_pageout_freed_from_inactive_clean++; 2133 2134 inactive_burst_count = 0; 2135 2136 if (page_prev_state != PAGE_STATE_SPECULATIVE) 2137 vm_pageout_stats[vm_pageout_stat_now].reclaimed++; 2138 2139 goto done_with_inactivepage; 2140 } 2141 /* 2142 * If the object is empty, the page must be reclaimed even 2143 * if dirty or used. 2144 * If the page belongs to a volatile object, we stick it back 2145 * on. 2146 */ 2147 if (object->copy == VM_OBJECT_NULL) { 2148 if (object->purgable == VM_PURGABLE_EMPTY) { 2149 if (m->pmapped == TRUE) { 2150 /* unmap the page */ 2151 refmod_state = pmap_disconnect(m->phys_page); 2152 if (refmod_state & VM_MEM_MODIFIED) { 2153 SET_PAGE_DIRTY(m, FALSE); 2154 } 2155 } 2156 if (m->dirty || m->precious) { 2157 /* we saved the cost of cleaning this page ! */ 2158 vm_page_purged_count++; 2159 } 2160 goto reclaim_page; 2161 } 2162 if (object->purgable == VM_PURGABLE_VOLATILE) { 2163 /* if it's wired, we can't put it on our queue */ 2164 assert(!VM_PAGE_WIRED(m)); 2165 2166 /* just stick it back on! */ 2167 reactivated_this_call++; 2168 2169 if (page_prev_state == PAGE_STATE_CLEAN) 2170 vm_pageout_cleaned_volatile_reactivated++; 2171 2172 goto reactivate_page; 2173 } 2174 } 2175 2176consider_inactive_page: 2177 if (m->busy) { 2178 /* 2179 * CAUTION CAUTION: 2180 * A "busy" page should always be left alone, except... 2181 */ 2182 if (m->cleaning && m->encrypted_cleaning) { 2183 /* 2184 * ENCRYPTED_SWAP: 2185 * We could get here with a "busy" page 2186 * if it's being encrypted during a 2187 * "clean-in-place" operation. We'll deal 2188 * with it right away by testing if it has been 2189 * referenced and either reactivating it or 2190 * promoting it from "clean-in-place" to 2191 * "clean-and-free". 2192 */ 2193 } else { 2194 panic("\"busy\" page considered for pageout\n"); 2195 } 2196 } 2197 2198 /* 2199 * If it's being used, reactivate. 2200 * (Fictitious pages are either busy or absent.) 2201 * First, update the reference and dirty bits 2202 * to make sure the page is unreferenced. 2203 */ 2204 refmod_state = -1; 2205 2206 if (m->reference == FALSE && m->pmapped == TRUE) { 2207 refmod_state = pmap_get_refmod(m->phys_page); 2208 2209 if (refmod_state & VM_MEM_REFERENCED) 2210 m->reference = TRUE; 2211 if (refmod_state & VM_MEM_MODIFIED) { 2212 SET_PAGE_DIRTY(m, FALSE); 2213 } 2214 } 2215 2216 /* 2217 * if (m->cleaning) 2218 * If already cleaning this page in place and it hasn't 2219 * been recently referenced, just pull off the queue. 2220 * We can leave the page mapped, and upl_commit_range 2221 * will put it on the clean queue. 2222 * 2223 * note: if m->encrypted_cleaning == TRUE, then 2224 * m->cleaning == TRUE 2225 * and we'll handle it here 2226 * 2227 * if (m->pageout && !m->cleaning) 2228 * an msync INVALIDATE is in progress... 2229 * this page has been marked for destruction 2230 * after it has been cleaned, 2231 * but not yet gathered into a UPL 2232 * where 'cleaning' will be set... 2233 * just leave it off the paging queues 2234 * 2235 * if (m->pageout && m->clenaing) 2236 * an msync INVALIDATE is in progress 2237 * and the UPL has already gathered this page... 2238 * just leave it off the paging queues 2239 */ 2240 2241 /* 2242 * page with m->pageout and still on the queues means that an 2243 * MS_INVALIDATE in progress on this page... leave it alone 2244 */ 2245 if (m->pageout) { 2246 inactive_burst_count = 0; 2247 goto done_with_inactivepage; 2248 } 2249 2250 /* if cleaning, reactivate if referenced. otherwise, just pull off queue */ 2251 if (m->cleaning) { 2252 if (m->reference == TRUE) { 2253 reactivated_this_call++; 2254 goto reactivate_page; 2255 } else { 2256 inactive_burst_count = 0; 2257 goto done_with_inactivepage; 2258 } 2259 } 2260 2261 if (m->reference || m->dirty) { 2262 /* deal with a rogue "reusable" page */ 2263 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m); 2264 } 2265 2266 if (m->reference && !m->no_cache) { 2267 /* 2268 * The page we pulled off the inactive list has 2269 * been referenced. It is possible for other 2270 * processors to be touching pages faster than we 2271 * can clear the referenced bit and traverse the 2272 * inactive queue, so we limit the number of 2273 * reactivations. 2274 */ 2275 if (++reactivated_this_call >= reactivate_limit) { 2276 vm_pageout_reactivation_limit_exceeded++; 2277 } else if (catch_up_count) { 2278 vm_pageout_catch_ups++; 2279 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) { 2280 vm_pageout_inactive_force_reclaim++; 2281 } else { 2282 uint32_t isinuse; 2283 2284 if (page_prev_state == PAGE_STATE_CLEAN) 2285 vm_pageout_cleaned_reference_reactivated++; 2286 2287reactivate_page: 2288 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL && 2289 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) { 2290 /* 2291 * no explict mappings of this object exist 2292 * and it's not open via the filesystem 2293 */ 2294 vm_page_deactivate(m); 2295 vm_pageout_inactive_deactivated++; 2296 } else { 2297 /* 2298 * The page was/is being used, so put back on active list. 2299 */ 2300 vm_page_activate(m); 2301 VM_STAT_INCR(reactivations); 2302 } 2303 2304 if (page_prev_state == PAGE_STATE_CLEAN) 2305 vm_pageout_cleaned_reactivated++; 2306 2307 vm_pageout_inactive_used++; 2308 inactive_burst_count = 0; 2309 2310 goto done_with_inactivepage; 2311 } 2312 /* 2313 * Make sure we call pmap_get_refmod() if it 2314 * wasn't already called just above, to update 2315 * the dirty bit. 2316 */ 2317 if ((refmod_state == -1) && !m->dirty && m->pmapped) { 2318 refmod_state = pmap_get_refmod(m->phys_page); 2319 if (refmod_state & VM_MEM_MODIFIED) { 2320 SET_PAGE_DIRTY(m, FALSE); 2321 } 2322 } 2323 forced_reclaim = TRUE; 2324 } else { 2325 forced_reclaim = FALSE; 2326 } 2327 2328 XPR(XPR_VM_PAGEOUT, 2329 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n", 2330 object, m->offset, m, 0,0); 2331 2332 /* 2333 * we've got a candidate page to steal... 2334 * 2335 * m->dirty is up to date courtesy of the 2336 * preceding check for m->reference... if 2337 * we get here, then m->reference had to be 2338 * FALSE (or possibly "reactivate_limit" was 2339 * exceeded), but in either case we called 2340 * pmap_get_refmod() and updated both 2341 * m->reference and m->dirty 2342 * 2343 * if it's dirty or precious we need to 2344 * see if the target queue is throtttled 2345 * it if is, we need to skip over it by moving it back 2346 * to the end of the inactive queue 2347 */ 2348 2349 inactive_throttled = FALSE; 2350 2351 if (m->dirty || m->precious) { 2352 if (object->internal) { 2353 if (VM_PAGE_Q_THROTTLED(iq)) 2354 inactive_throttled = TRUE; 2355 } else if (VM_PAGE_Q_THROTTLED(eq)) { 2356 inactive_throttled = TRUE; 2357 } 2358 } 2359throttle_inactive: 2360 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && 2361 object->internal && m->dirty && 2362 (object->purgable == VM_PURGABLE_DENY || 2363 object->purgable == VM_PURGABLE_NONVOLATILE || 2364 object->purgable == VM_PURGABLE_VOLATILE)) { 2365 queue_enter(&vm_page_queue_throttled, m, 2366 vm_page_t, pageq); 2367 m->throttled = TRUE; 2368 vm_page_throttled_count++; 2369 2370 vm_pageout_scan_reclaimed_throttled++; 2371 2372 goto done_with_inactivepage; 2373 } 2374 if (inactive_throttled == TRUE) { 2375 2376 if (object->internal) 2377 vm_pageout_scan_inactive_throttled_internal++; 2378 else 2379 vm_pageout_scan_inactive_throttled_external++; 2380 2381 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2382 page_prev_state = PAGE_STATE_INACTIVE; 2383 2384 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && object->internal == FALSE) { 2385 /* 2386 * a) The external pageout queue is throttled 2387 * b) We're done with the active queue and moved on to the inactive queue 2388 * c) We start noticing dirty pages and usually we would put them at the end of the inactive queue, but, 2389 * d) We don't have a default pager, and so, 2390 * e) We push these onto the active queue in an effort to cause a re-evaluation of the active queue 2391 * and get back some, possibly clean, pages. 2392 * 2393 * We also keep a count of the pages of this kind, since, these will be a good indicator of us being in a deadlock 2394 * on systems without a dynamic pager, where: 2395 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written. 2396 * b) The thread doing the writing is waiting for pages while holding the truncate lock 2397 * c) Most of the pages in the inactive queue belong to this file. 2398 */ 2399 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); 2400 m->active = TRUE; 2401 vm_page_active_count++; 2402 2403 vm_pageout_adjust_io_throttles(iq, eq, FALSE); 2404 2405 vm_pageout_inactive_external_forced_reactivate_count++; 2406 vm_pageout_inactive_external_forced_reactivate_limit--; 2407 2408 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0){ 2409 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count; 2410#if CONFIG_JETSAM 2411 /* 2412 * Possible deadlock scenario so request jetsam action 2413 */ 2414 assert(object); 2415 vm_object_unlock(object); 2416 object = VM_OBJECT_NULL; 2417 vm_page_unlock_queues(); 2418 2419 if (memorystatus_kill_top_proc_from_VM() < 0){ 2420 panic("vm_pageout_scan: Jetsam request failed\n"); 2421 } 2422 2423 vm_pageout_inactive_external_forced_jetsam_count++; 2424 vm_page_lock_queues(); 2425 delayed_unlock = 1; 2426#endif 2427 } 2428 inactive_burst_count = 0; 2429 goto done_with_inactivepage; 2430 } else { 2431 goto requeue_page; 2432 } 2433 } 2434 2435 /* 2436 * we've got a page that we can steal... 2437 * eliminate all mappings and make sure 2438 * we have the up-to-date modified state 2439 * 2440 * if we need to do a pmap_disconnect then we 2441 * need to re-evaluate m->dirty since the pmap_disconnect 2442 * provides the true state atomically... the 2443 * page was still mapped up to the pmap_disconnect 2444 * and may have been dirtied at the last microsecond 2445 * 2446 * we also check for the page being referenced 'late' 2447 * and reactivate it for that case 2448 * 2449 * Note that if 'pmapped' is FALSE then the page is not 2450 * and has not been in any map, so there is no point calling 2451 * pmap_disconnect(). m->dirty and/or m->reference could 2452 * have been set in anticipation of likely usage of the page. 2453 */ 2454 if (m->pmapped == TRUE) { 2455 refmod_state = pmap_disconnect(m->phys_page); 2456 2457 if (refmod_state & VM_MEM_MODIFIED) { 2458 SET_PAGE_DIRTY(m, FALSE); 2459 } 2460 if (refmod_state & VM_MEM_REFERENCED) { 2461 2462 /* If m->reference is already set, this page must have 2463 * already failed the reactivate_limit test, so don't 2464 * bump the counts twice. 2465 */ 2466 if ( ! m->reference ) { 2467 m->reference = TRUE; 2468 if (forced_reclaim || 2469 ++reactivated_this_call >= reactivate_limit) 2470 vm_pageout_reactivation_limit_exceeded++; 2471 else { 2472 if (page_prev_state == PAGE_STATE_CLEAN) 2473 vm_pageout_cleaned_reference_reactivated++; 2474 goto reactivate_page; 2475 } 2476 } 2477 } 2478 } 2479 /* 2480 * reset our count of pages that have been reclaimed 2481 * since the last page was 'stolen' 2482 */ 2483 inactive_reclaim_run = 0; 2484 2485 /* 2486 * If it's clean and not precious, we can free the page. 2487 */ 2488 if (!m->dirty && !m->precious) { 2489 2490 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2491 vm_pageout_speculative_clean++; 2492 else { 2493 if (page_prev_state == PAGE_STATE_ANONYMOUS) 2494 vm_pageout_inactive_anonymous++; 2495 else if (page_prev_state == PAGE_STATE_CLEAN) 2496 vm_pageout_cleaned_reclaimed++; 2497 2498 if (m->was_dirty) { 2499 /* page on clean queue used to be dirty; we should increment the vm_stat pageout count here */ 2500 VM_STAT_INCR(pageouts); 2501 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL); 2502 } 2503 vm_pageout_inactive_clean++; 2504 } 2505 /* FYI: (!pageout_making_free) == (!m->clean_queue && !m->speculative) */ 2506 if (((vm_page_free_count + local_freed) >= vm_page_free_target) && !pageout_making_free) { 2507 2508 assert(!m->clean_queue); 2509 assert(!m->speculative); 2510 2511 /* 2512 * we have met our free page target and this page wasn't just pulled 2513 * from the clean or speculative queues, so put it on the clean queue 2514 */ 2515 if (m->reference == TRUE) { 2516 /* 2517 * must have come through the forced reclaim path. 2518 * we need to clear out the reference state in this case 2519 * so that we don't just reactivate the page when we 2520 * find it in the clean queue based on an old reference. 2521 * if it gets re-referenced while on the queue, then 2522 * the reactivation is justified 2523 */ 2524 m->reference = FALSE; 2525 pmap_clear_reference(m->phys_page); 2526 } 2527 2528 vm_pageout_enqueued_cleaned_from_inactive_clean++; 2529 vm_page_enqueue_cleaned(m); 2530 2531 inactive_burst_count = 0; /* we found a usable page on the inactive queue, hooray */ 2532 2533 goto done_with_inactivepage; 2534 2535 } 2536 /* 2537 * OK, at this point we have found a page we are going to free. 2538 */ 2539 2540#ifndef CONFIG_EMBEDDED 2541 2542#define VM_PRESSURE_INTERVAL_NS 250000000 /* nanoseconds; == .25 seconds */ 2543 if (vm_pageout_need_to_refill_clean_queue == TRUE || page_prev_state == PAGE_STATE_CLEAN) { 2544 static uint64_t vm_pressure_last_time_ns = 0; 2545 uint64_t cur_time_ns = 0; 2546 absolutetime_to_nanoseconds(mach_absolute_time(), &cur_time_ns); 2547 if (cur_time_ns >= vm_pressure_last_time_ns + VM_PRESSURE_INTERVAL_NS) { 2548 vm_pressure_last_time_ns = cur_time_ns; 2549 thread_wakeup(&vm_pressure_thread); 2550#if CONFIG_MEMORYSTATUS 2551 /* Wake up idle-exit thread */ 2552 thread_wakeup((event_t)&memorystatus_wakeup); 2553#endif 2554 } 2555 } 2556#endif /* !CONFIG_EMBEDDED */ 2557 2558 goto reclaim_page; 2559 } 2560 2561 /* 2562 * The page may have been dirtied since the last check 2563 * for a throttled target queue (which may have been skipped 2564 * if the page was clean then). With the dirty page 2565 * disconnected here, we can make one final check. 2566 */ 2567 if (object->internal) { 2568 if (VM_PAGE_Q_THROTTLED(iq)) 2569 inactive_throttled = TRUE; 2570 } else if (VM_PAGE_Q_THROTTLED(eq)) { 2571 inactive_throttled = TRUE; 2572 } 2573 2574 if (inactive_throttled == TRUE) 2575 goto throttle_inactive; 2576 2577 /* 2578 * do NOT set the pageout bit! 2579 * sure, we might need free pages, but this page is going to take time to become free 2580 * anyway, so we may as well put it on the clean queue first and take it from there later 2581 * if necessary. that way, we'll ensure we don't free up too much. -mj 2582 */ 2583 vm_pageout_cluster(m, FALSE); 2584 2585 if (page_prev_state == PAGE_STATE_ANONYMOUS) 2586 vm_pageout_inactive_anonymous++; 2587 if (object->internal) 2588 vm_pageout_inactive_dirty_internal++; 2589 else 2590 vm_pageout_inactive_dirty_external++; 2591 2592 inactive_burst_count = 0; 2593 2594done_with_inactivepage: 2595 if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) { 2596 2597 if (object != NULL) { 2598 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 2599 vm_object_unlock(object); 2600 object = NULL; 2601 } 2602 if (local_freeq) { 2603 vm_page_unlock_queues(); 2604 2605 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 2606 vm_page_free_count, local_freed, delayed_unlock_limit, 4); 2607 2608 vm_page_free_list(local_freeq, TRUE); 2609 2610 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 2611 vm_page_free_count, local_freed, 0, 4); 2612 2613 local_freeq = NULL; 2614 local_freed = 0; 2615 vm_page_lock_queues(); 2616 } else 2617 lck_mtx_yield(&vm_page_queue_lock); 2618 2619 delayed_unlock = 1; 2620 } 2621 vm_pageout_considered_page++; 2622 2623 /* 2624 * back to top of pageout scan loop 2625 */ 2626 } 2627} 2628 2629 2630int vm_page_free_count_init; 2631 2632void 2633vm_page_free_reserve( 2634 int pages) 2635{ 2636 int free_after_reserve; 2637 2638 vm_page_free_reserved += pages; 2639 2640 if (vm_page_free_reserved > VM_PAGE_FREE_RESERVED_LIMIT) 2641 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT; 2642 2643 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved; 2644 2645 vm_page_free_min = vm_page_free_reserved + 2646 VM_PAGE_FREE_MIN(free_after_reserve); 2647 2648 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT) 2649 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT; 2650 2651 vm_page_free_target = vm_page_free_reserved + 2652 VM_PAGE_FREE_TARGET(free_after_reserve); 2653 2654 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT) 2655 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT; 2656 2657 if (vm_page_free_target < vm_page_free_min + 5) 2658 vm_page_free_target = vm_page_free_min + 5; 2659 2660 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 3); 2661 vm_page_creation_throttle = vm_page_free_target * 3; 2662} 2663 2664/* 2665 * vm_pageout is the high level pageout daemon. 2666 */ 2667 2668void 2669vm_pageout_continue(void) 2670{ 2671 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL); 2672 vm_pageout_scan_event_counter++; 2673 2674 vm_pageout_scan(); 2675 /* 2676 * we hold both the vm_page_queue_free_lock 2677 * and the vm_page_queues_lock at this point 2678 */ 2679 assert(vm_page_free_wanted == 0); 2680 assert(vm_page_free_wanted_privileged == 0); 2681 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT); 2682 2683 lck_mtx_unlock(&vm_page_queue_free_lock); 2684 vm_page_unlock_queues(); 2685 2686 counter(c_vm_pageout_block++); 2687 thread_block((thread_continue_t)vm_pageout_continue); 2688 /*NOTREACHED*/ 2689} 2690 2691 2692#ifdef FAKE_DEADLOCK 2693 2694#define FAKE_COUNT 5000 2695 2696int internal_count = 0; 2697int fake_deadlock = 0; 2698 2699#endif 2700 2701static void 2702vm_pageout_iothread_continue(struct vm_pageout_queue *q) 2703{ 2704 vm_page_t m = NULL; 2705 vm_object_t object; 2706 vm_object_offset_t offset; 2707 memory_object_t pager; 2708 thread_t self = current_thread(); 2709 2710 if ((vm_pageout_internal_iothread != THREAD_NULL) 2711 && (self == vm_pageout_external_iothread ) 2712 && (self->options & TH_OPT_VMPRIV)) 2713 self->options &= ~TH_OPT_VMPRIV; 2714 2715 vm_page_lockspin_queues(); 2716 2717 while ( !queue_empty(&q->pgo_pending) ) { 2718 2719 q->pgo_busy = TRUE; 2720 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); 2721 if (m->object == slide_info.slide_object) { 2722 panic("slid page %p not allowed on this path\n", m); 2723 } 2724 VM_PAGE_CHECK(m); 2725 m->pageout_queue = FALSE; 2726 m->pageq.next = NULL; 2727 m->pageq.prev = NULL; 2728 2729 /* 2730 * grab a snapshot of the object and offset this 2731 * page is tabled in so that we can relookup this 2732 * page after we've taken the object lock - these 2733 * fields are stable while we hold the page queues lock 2734 * but as soon as we drop it, there is nothing to keep 2735 * this page in this object... we hold an activity_in_progress 2736 * on this object which will keep it from terminating 2737 */ 2738 object = m->object; 2739 offset = m->offset; 2740 2741 vm_page_unlock_queues(); 2742 2743#ifdef FAKE_DEADLOCK 2744 if (q == &vm_pageout_queue_internal) { 2745 vm_offset_t addr; 2746 int pg_count; 2747 2748 internal_count++; 2749 2750 if ((internal_count == FAKE_COUNT)) { 2751 2752 pg_count = vm_page_free_count + vm_page_free_reserved; 2753 2754 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) { 2755 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count); 2756 } 2757 internal_count = 0; 2758 fake_deadlock++; 2759 } 2760 } 2761#endif 2762 vm_object_lock(object); 2763 2764 m = vm_page_lookup(object, offset); 2765 2766 if (m == NULL || 2767 m->busy || m->cleaning || m->pageout_queue || !m->laundry) { 2768 /* 2769 * it's either the same page that someone else has 2770 * started cleaning (or it's finished cleaning or 2771 * been put back on the pageout queue), or 2772 * the page has been freed or we have found a 2773 * new page at this offset... in all of these cases 2774 * we merely need to release the activity_in_progress 2775 * we took when we put the page on the pageout queue 2776 */ 2777 vm_object_activity_end(object); 2778 vm_object_unlock(object); 2779 2780 vm_page_lockspin_queues(); 2781 continue; 2782 } 2783 if (!object->pager_initialized) { 2784 2785 /* 2786 * If there is no memory object for the page, create 2787 * one and hand it to the default pager. 2788 */ 2789 2790 if (!object->pager_initialized) 2791 vm_object_collapse(object, 2792 (vm_object_offset_t) 0, 2793 TRUE); 2794 if (!object->pager_initialized) 2795 vm_object_pager_create(object); 2796 if (!object->pager_initialized) { 2797 /* 2798 * Still no pager for the object. 2799 * Reactivate the page. 2800 * 2801 * Should only happen if there is no 2802 * default pager. 2803 */ 2804 m->pageout = FALSE; 2805 2806 vm_page_lockspin_queues(); 2807 2808 vm_pageout_throttle_up(m); 2809 vm_page_activate(m); 2810 vm_pageout_dirty_no_pager++; 2811 2812 vm_page_unlock_queues(); 2813 2814 /* 2815 * And we are done with it. 2816 */ 2817 vm_object_activity_end(object); 2818 vm_object_unlock(object); 2819 2820 vm_page_lockspin_queues(); 2821 continue; 2822 } 2823 } 2824 pager = object->pager; 2825 2826 if (pager == MEMORY_OBJECT_NULL) { 2827 /* 2828 * This pager has been destroyed by either 2829 * memory_object_destroy or vm_object_destroy, and 2830 * so there is nowhere for the page to go. 2831 */ 2832 if (m->pageout) { 2833 /* 2834 * Just free the page... VM_PAGE_FREE takes 2835 * care of cleaning up all the state... 2836 * including doing the vm_pageout_throttle_up 2837 */ 2838 VM_PAGE_FREE(m); 2839 } else { 2840 vm_page_lockspin_queues(); 2841 2842 vm_pageout_throttle_up(m); 2843 vm_page_activate(m); 2844 2845 vm_page_unlock_queues(); 2846 2847 /* 2848 * And we are done with it. 2849 */ 2850 } 2851 vm_object_activity_end(object); 2852 vm_object_unlock(object); 2853 2854 vm_page_lockspin_queues(); 2855 continue; 2856 } 2857#if 0 2858 /* 2859 * we don't hold the page queue lock 2860 * so this check isn't safe to make 2861 */ 2862 VM_PAGE_CHECK(m); 2863#endif 2864 /* 2865 * give back the activity_in_progress reference we 2866 * took when we queued up this page and replace it 2867 * it with a paging_in_progress reference that will 2868 * also hold the paging offset from changing and 2869 * prevent the object from terminating 2870 */ 2871 vm_object_activity_end(object); 2872 vm_object_paging_begin(object); 2873 vm_object_unlock(object); 2874 2875 /* 2876 * Send the data to the pager. 2877 * any pageout clustering happens there 2878 */ 2879 memory_object_data_return(pager, 2880 m->offset + object->paging_offset, 2881 PAGE_SIZE, 2882 NULL, 2883 NULL, 2884 FALSE, 2885 FALSE, 2886 0); 2887 2888 vm_object_lock(object); 2889 vm_object_paging_end(object); 2890 vm_object_unlock(object); 2891 2892 vm_pageout_io_throttle(); 2893 2894 vm_page_lockspin_queues(); 2895 } 2896 q->pgo_busy = FALSE; 2897 q->pgo_idle = TRUE; 2898 2899 assert_wait((event_t) q, THREAD_UNINT); 2900 vm_page_unlock_queues(); 2901 2902 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending); 2903 /*NOTREACHED*/ 2904} 2905 2906 2907 2908static void 2909vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_queue *eq, boolean_t req_lowpriority) 2910{ 2911 uint32_t policy; 2912 boolean_t set_iq = FALSE; 2913 boolean_t set_eq = FALSE; 2914 2915 if (hibernate_cleaning_in_progress == TRUE) 2916 req_lowpriority = FALSE; 2917 2918 if (iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority) 2919 set_iq = TRUE; 2920 2921 if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) 2922 set_eq = TRUE; 2923 2924 if (set_iq == TRUE || set_eq == TRUE) { 2925 2926 vm_page_unlock_queues(); 2927 2928 if (req_lowpriority == TRUE) { 2929 policy = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE; 2930 DTRACE_VM(laundrythrottle); 2931 } else { 2932 policy = TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_NORMAL; 2933 DTRACE_VM(laundryunthrottle); 2934 } 2935 if (set_iq == TRUE) { 2936 proc_apply_thread_diskacc(kernel_task, iq->pgo_tid, policy); 2937 iq->pgo_lowpriority = req_lowpriority; 2938 } 2939 if (set_eq == TRUE) { 2940 proc_apply_thread_diskacc(kernel_task, eq->pgo_tid, policy); 2941 eq->pgo_lowpriority = req_lowpriority; 2942 } 2943 vm_page_lock_queues(); 2944 } 2945} 2946 2947 2948static void 2949vm_pageout_iothread_external(void) 2950{ 2951 thread_t self = current_thread(); 2952 2953 self->options |= TH_OPT_VMPRIV; 2954 2955 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL); 2956 proc_apply_thread_diskacc(kernel_task, self->thread_id, TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); 2957 2958 vm_page_lock_queues(); 2959 2960 vm_pageout_queue_external.pgo_tid = self->thread_id; 2961 vm_pageout_queue_external.pgo_lowpriority = TRUE; 2962 vm_pageout_queue_external.pgo_inited = TRUE; 2963 2964 vm_page_unlock_queues(); 2965 2966 vm_pageout_iothread_continue(&vm_pageout_queue_external); 2967 2968 /*NOTREACHED*/ 2969} 2970 2971static void 2972vm_pageout_iothread_internal(void) 2973{ 2974 thread_t self = current_thread(); 2975 2976 self->options |= TH_OPT_VMPRIV; 2977 2978 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL); 2979 proc_apply_thread_diskacc(kernel_task, self->thread_id, TASK_POLICY_HWACCESS_DISK_ATTRIBUTE_THROTTLE); 2980 2981 vm_page_lock_queues(); 2982 2983 vm_pageout_queue_internal.pgo_tid = self->thread_id; 2984 vm_pageout_queue_internal.pgo_lowpriority = TRUE; 2985 vm_pageout_queue_internal.pgo_inited = TRUE; 2986 2987 vm_page_unlock_queues(); 2988 2989 vm_pageout_iothread_continue(&vm_pageout_queue_internal); 2990 2991 /*NOTREACHED*/ 2992} 2993 2994kern_return_t 2995vm_set_buffer_cleanup_callout(boolean_t (*func)(int)) 2996{ 2997 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) { 2998 return KERN_SUCCESS; 2999 } else { 3000 return KERN_FAILURE; /* Already set */ 3001 } 3002} 3003 3004static void 3005vm_pressure_thread(void) { 3006 static boolean_t set_up_thread = FALSE; 3007 3008 if (set_up_thread) { 3009#if VM_PRESSURE_EVENTS 3010 consider_vm_pressure_events(); 3011#endif /* VM_PRESSURE_EVENTS */ 3012 } 3013 3014 set_up_thread = TRUE; 3015 assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT); 3016 thread_block((thread_continue_t)vm_pressure_thread); 3017} 3018 3019uint32_t vm_pageout_considered_page_last = 0; 3020 3021/* 3022 * called once per-second via "compute_averages" 3023 */ 3024void 3025compute_pageout_gc_throttle() 3026{ 3027 if (vm_pageout_considered_page != vm_pageout_considered_page_last) { 3028 3029 vm_pageout_considered_page_last = vm_pageout_considered_page; 3030 3031 thread_wakeup((event_t) &vm_pageout_garbage_collect); 3032 } 3033} 3034 3035 3036static void 3037vm_pageout_garbage_collect(int collect) 3038{ 3039 3040 if (collect) { 3041 boolean_t buf_large_zfree = FALSE; 3042 boolean_t first_try = TRUE; 3043 3044 stack_collect(); 3045 3046 consider_machine_collect(); 3047 3048 do { 3049 if (consider_buffer_cache_collect != NULL) { 3050 buf_large_zfree = (*consider_buffer_cache_collect)(0); 3051 } 3052 if (first_try == TRUE || buf_large_zfree == TRUE) { 3053 /* 3054 * consider_zone_gc should be last, because the other operations 3055 * might return memory to zones. 3056 */ 3057 consider_zone_gc(buf_large_zfree); 3058 } 3059 first_try = FALSE; 3060 3061 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target); 3062 3063 consider_machine_adjust(); 3064 } 3065 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT); 3066 3067 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1); 3068 /*NOTREACHED*/ 3069} 3070 3071 3072 3073void 3074vm_pageout(void) 3075{ 3076 thread_t self = current_thread(); 3077 thread_t thread; 3078 kern_return_t result; 3079 spl_t s; 3080 3081 /* 3082 * Set thread privileges. 3083 */ 3084 s = splsched(); 3085 thread_lock(self); 3086 self->priority = BASEPRI_PREEMPT - 1; 3087 set_sched_pri(self, self->priority); 3088 thread_unlock(self); 3089 3090 if (!self->reserved_stack) 3091 self->reserved_stack = self->kernel_stack; 3092 3093 splx(s); 3094 3095 /* 3096 * Initialize some paging parameters. 3097 */ 3098 3099 if (vm_pageout_idle_wait == 0) 3100 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT; 3101 3102 if (vm_pageout_burst_wait == 0) 3103 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT; 3104 3105 if (vm_pageout_empty_wait == 0) 3106 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT; 3107 3108 if (vm_pageout_deadlock_wait == 0) 3109 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT; 3110 3111 if (vm_pageout_deadlock_relief == 0) 3112 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF; 3113 3114 if (vm_pageout_inactive_relief == 0) 3115 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF; 3116 3117 if (vm_pageout_burst_active_throttle == 0) 3118 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE; 3119 3120 if (vm_pageout_burst_inactive_throttle == 0) 3121 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE; 3122 3123 /* 3124 * Set kernel task to low backing store privileged 3125 * status 3126 */ 3127 task_lock(kernel_task); 3128 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV; 3129 task_unlock(kernel_task); 3130 3131 vm_page_free_count_init = vm_page_free_count; 3132 3133 /* 3134 * even if we've already called vm_page_free_reserve 3135 * call it again here to insure that the targets are 3136 * accurately calculated (it uses vm_page_free_count_init) 3137 * calling it with an arg of 0 will not change the reserve 3138 * but will re-calculate free_min and free_target 3139 */ 3140 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) { 3141 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved); 3142 } else 3143 vm_page_free_reserve(0); 3144 3145 3146 queue_init(&vm_pageout_queue_external.pgo_pending); 3147 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; 3148 vm_pageout_queue_external.pgo_laundry = 0; 3149 vm_pageout_queue_external.pgo_idle = FALSE; 3150 vm_pageout_queue_external.pgo_busy = FALSE; 3151 vm_pageout_queue_external.pgo_throttled = FALSE; 3152 vm_pageout_queue_external.pgo_draining = FALSE; 3153 vm_pageout_queue_external.pgo_lowpriority = FALSE; 3154 vm_pageout_queue_external.pgo_tid = -1; 3155 vm_pageout_queue_external.pgo_inited = FALSE; 3156 3157 3158 queue_init(&vm_pageout_queue_internal.pgo_pending); 3159 vm_pageout_queue_internal.pgo_maxlaundry = 0; 3160 vm_pageout_queue_internal.pgo_laundry = 0; 3161 vm_pageout_queue_internal.pgo_idle = FALSE; 3162 vm_pageout_queue_internal.pgo_busy = FALSE; 3163 vm_pageout_queue_internal.pgo_throttled = FALSE; 3164 vm_pageout_queue_internal.pgo_draining = FALSE; 3165 vm_pageout_queue_internal.pgo_lowpriority = FALSE; 3166 vm_pageout_queue_internal.pgo_tid = -1; 3167 vm_pageout_queue_internal.pgo_inited = FALSE; 3168 3169 /* internal pageout thread started when default pager registered first time */ 3170 /* external pageout and garbage collection threads started here */ 3171 3172 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL, 3173 BASEPRI_PREEMPT - 1, 3174 &vm_pageout_external_iothread); 3175 if (result != KERN_SUCCESS) 3176 panic("vm_pageout_iothread_external: create failed"); 3177 3178 thread_deallocate(vm_pageout_external_iothread); 3179 3180 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL, 3181 BASEPRI_DEFAULT, 3182 &thread); 3183 if (result != KERN_SUCCESS) 3184 panic("vm_pageout_garbage_collect: create failed"); 3185 3186 thread_deallocate(thread); 3187 3188 result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL, 3189 BASEPRI_DEFAULT, 3190 &thread); 3191 3192 if (result != KERN_SUCCESS) 3193 panic("vm_pressure_thread: create failed"); 3194 3195 thread_deallocate(thread); 3196 3197 vm_object_reaper_init(); 3198 3199 3200 vm_pageout_continue(); 3201 3202 /* 3203 * Unreached code! 3204 * 3205 * The vm_pageout_continue() call above never returns, so the code below is never 3206 * executed. We take advantage of this to declare several DTrace VM related probe 3207 * points that our kernel doesn't have an analog for. These are probe points that 3208 * exist in Solaris and are in the DTrace documentation, so people may have written 3209 * scripts that use them. Declaring the probe points here means their scripts will 3210 * compile and execute which we want for portability of the scripts, but since this 3211 * section of code is never reached, the probe points will simply never fire. Yes, 3212 * this is basically a hack. The problem is the DTrace probe points were chosen with 3213 * Solaris specific VM events in mind, not portability to different VM implementations. 3214 */ 3215 3216 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL); 3217 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL); 3218 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL); 3219 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL); 3220 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL); 3221 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL); 3222 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL); 3223 /*NOTREACHED*/ 3224} 3225 3226kern_return_t 3227vm_pageout_internal_start(void) 3228{ 3229 kern_return_t result; 3230 3231 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; 3232 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread); 3233 if (result == KERN_SUCCESS) 3234 thread_deallocate(vm_pageout_internal_iothread); 3235 return result; 3236} 3237 3238 3239static upl_t 3240upl_create(int type, int flags, upl_size_t size) 3241{ 3242 upl_t upl; 3243 int page_field_size = 0; 3244 int upl_flags = 0; 3245 int upl_size = sizeof(struct upl); 3246 3247 size = round_page_32(size); 3248 3249 if (type & UPL_CREATE_LITE) { 3250 page_field_size = (atop(size) + 7) >> 3; 3251 page_field_size = (page_field_size + 3) & 0xFFFFFFFC; 3252 3253 upl_flags |= UPL_LITE; 3254 } 3255 if (type & UPL_CREATE_INTERNAL) { 3256 upl_size += (int) sizeof(struct upl_page_info) * atop(size); 3257 3258 upl_flags |= UPL_INTERNAL; 3259 } 3260 upl = (upl_t)kalloc(upl_size + page_field_size); 3261 3262 if (page_field_size) 3263 bzero((char *)upl + upl_size, page_field_size); 3264 3265 upl->flags = upl_flags | flags; 3266 upl->src_object = NULL; 3267 upl->kaddr = (vm_offset_t)0; 3268 upl->size = 0; 3269 upl->map_object = NULL; 3270 upl->ref_count = 1; 3271 upl->ext_ref_count = 0; 3272 upl->highest_page = 0; 3273 upl_lock_init(upl); 3274 upl->vector_upl = NULL; 3275#if UPL_DEBUG 3276 upl->ubc_alias1 = 0; 3277 upl->ubc_alias2 = 0; 3278 3279 upl->upl_creator = current_thread(); 3280 upl->upl_state = 0; 3281 upl->upl_commit_index = 0; 3282 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records)); 3283 3284 upl->uplq.next = 0; 3285 upl->uplq.prev = 0; 3286 3287 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES); 3288#endif /* UPL_DEBUG */ 3289 3290 return(upl); 3291} 3292 3293static void 3294upl_destroy(upl_t upl) 3295{ 3296 int page_field_size; /* bit field in word size buf */ 3297 int size; 3298 3299 if (upl->ext_ref_count) { 3300 panic("upl(%p) ext_ref_count", upl); 3301 } 3302 3303#if UPL_DEBUG 3304 if ( !(upl->flags & UPL_VECTOR)) { 3305 vm_object_t object; 3306 3307 if (upl->flags & UPL_SHADOWED) { 3308 object = upl->map_object->shadow; 3309 } else { 3310 object = upl->map_object; 3311 } 3312 vm_object_lock(object); 3313 queue_remove(&object->uplq, upl, upl_t, uplq); 3314 vm_object_activity_end(object); 3315 vm_object_collapse(object, 0, TRUE); 3316 vm_object_unlock(object); 3317 } 3318#endif /* UPL_DEBUG */ 3319 /* 3320 * drop a reference on the map_object whether or 3321 * not a pageout object is inserted 3322 */ 3323 if (upl->flags & UPL_SHADOWED) 3324 vm_object_deallocate(upl->map_object); 3325 3326 if (upl->flags & UPL_DEVICE_MEMORY) 3327 size = PAGE_SIZE; 3328 else 3329 size = upl->size; 3330 page_field_size = 0; 3331 3332 if (upl->flags & UPL_LITE) { 3333 page_field_size = ((size/PAGE_SIZE) + 7) >> 3; 3334 page_field_size = (page_field_size + 3) & 0xFFFFFFFC; 3335 } 3336 upl_lock_destroy(upl); 3337 upl->vector_upl = (vector_upl_t) 0xfeedbeef; 3338 3339 if (upl->flags & UPL_INTERNAL) { 3340 kfree(upl, 3341 sizeof(struct upl) + 3342 (sizeof(struct upl_page_info) * (size/PAGE_SIZE)) 3343 + page_field_size); 3344 } else { 3345 kfree(upl, sizeof(struct upl) + page_field_size); 3346 } 3347} 3348 3349void 3350upl_deallocate(upl_t upl) 3351{ 3352 if (--upl->ref_count == 0) { 3353 if(vector_upl_is_valid(upl)) 3354 vector_upl_deallocate(upl); 3355 upl_destroy(upl); 3356 } 3357} 3358 3359#if DEVELOPMENT || DEBUG 3360/*/* 3361 * Statistics about UPL enforcement of copy-on-write obligations. 3362 */ 3363unsigned long upl_cow = 0; 3364unsigned long upl_cow_again = 0; 3365unsigned long upl_cow_pages = 0; 3366unsigned long upl_cow_again_pages = 0; 3367 3368unsigned long iopl_cow = 0; 3369unsigned long iopl_cow_pages = 0; 3370#endif 3371 3372/* 3373 * Routine: vm_object_upl_request 3374 * Purpose: 3375 * Cause the population of a portion of a vm_object. 3376 * Depending on the nature of the request, the pages 3377 * returned may be contain valid data or be uninitialized. 3378 * A page list structure, listing the physical pages 3379 * will be returned upon request. 3380 * This function is called by the file system or any other 3381 * supplier of backing store to a pager. 3382 * IMPORTANT NOTE: The caller must still respect the relationship 3383 * between the vm_object and its backing memory object. The 3384 * caller MUST NOT substitute changes in the backing file 3385 * without first doing a memory_object_lock_request on the 3386 * target range unless it is know that the pages are not 3387 * shared with another entity at the pager level. 3388 * Copy_in_to: 3389 * if a page list structure is present 3390 * return the mapped physical pages, where a 3391 * page is not present, return a non-initialized 3392 * one. If the no_sync bit is turned on, don't 3393 * call the pager unlock to synchronize with other 3394 * possible copies of the page. Leave pages busy 3395 * in the original object, if a page list structure 3396 * was specified. When a commit of the page list 3397 * pages is done, the dirty bit will be set for each one. 3398 * Copy_out_from: 3399 * If a page list structure is present, return 3400 * all mapped pages. Where a page does not exist 3401 * map a zero filled one. Leave pages busy in 3402 * the original object. If a page list structure 3403 * is not specified, this call is a no-op. 3404 * 3405 * Note: access of default pager objects has a rather interesting 3406 * twist. The caller of this routine, presumably the file system 3407 * page cache handling code, will never actually make a request 3408 * against a default pager backed object. Only the default 3409 * pager will make requests on backing store related vm_objects 3410 * In this way the default pager can maintain the relationship 3411 * between backing store files (abstract memory objects) and 3412 * the vm_objects (cache objects), they support. 3413 * 3414 */ 3415 3416__private_extern__ kern_return_t 3417vm_object_upl_request( 3418 vm_object_t object, 3419 vm_object_offset_t offset, 3420 upl_size_t size, 3421 upl_t *upl_ptr, 3422 upl_page_info_array_t user_page_list, 3423 unsigned int *page_list_count, 3424 int cntrl_flags) 3425{ 3426 vm_page_t dst_page = VM_PAGE_NULL; 3427 vm_object_offset_t dst_offset; 3428 upl_size_t xfer_size; 3429 unsigned int size_in_pages; 3430 boolean_t dirty; 3431 boolean_t hw_dirty; 3432 upl_t upl = NULL; 3433 unsigned int entry; 3434#if MACH_CLUSTER_STATS 3435 boolean_t encountered_lrp = FALSE; 3436#endif 3437 vm_page_t alias_page = NULL; 3438 int refmod_state = 0; 3439 wpl_array_t lite_list = NULL; 3440 vm_object_t last_copy_object; 3441 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 3442 struct vm_page_delayed_work *dwp; 3443 int dw_count; 3444 int dw_limit; 3445 3446 if (cntrl_flags & ~UPL_VALID_FLAGS) { 3447 /* 3448 * For forward compatibility's sake, 3449 * reject any unknown flag. 3450 */ 3451 return KERN_INVALID_VALUE; 3452 } 3453 if ( (!object->internal) && (object->paging_offset != 0) ) 3454 panic("vm_object_upl_request: external object with non-zero paging offset\n"); 3455 if (object->phys_contiguous) 3456 panic("vm_object_upl_request: contiguous object specified\n"); 3457 3458 3459 if ((size / PAGE_SIZE) > MAX_UPL_SIZE) 3460 size = MAX_UPL_SIZE * PAGE_SIZE; 3461 3462 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL) 3463 *page_list_count = MAX_UPL_SIZE; 3464 3465 if (cntrl_flags & UPL_SET_INTERNAL) { 3466 if (cntrl_flags & UPL_SET_LITE) { 3467 3468 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, 0, size); 3469 3470 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 3471 lite_list = (wpl_array_t) 3472 (((uintptr_t)user_page_list) + 3473 ((size/PAGE_SIZE) * sizeof(upl_page_info_t))); 3474 if (size == 0) { 3475 user_page_list = NULL; 3476 lite_list = NULL; 3477 } 3478 } else { 3479 upl = upl_create(UPL_CREATE_INTERNAL, 0, size); 3480 3481 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 3482 if (size == 0) { 3483 user_page_list = NULL; 3484 } 3485 } 3486 } else { 3487 if (cntrl_flags & UPL_SET_LITE) { 3488 3489 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE, 0, size); 3490 3491 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 3492 if (size == 0) { 3493 lite_list = NULL; 3494 } 3495 } else { 3496 upl = upl_create(UPL_CREATE_EXTERNAL, 0, size); 3497 } 3498 } 3499 *upl_ptr = upl; 3500 3501 if (user_page_list) 3502 user_page_list[0].device = FALSE; 3503 3504 if (cntrl_flags & UPL_SET_LITE) { 3505 upl->map_object = object; 3506 } else { 3507 upl->map_object = vm_object_allocate(size); 3508 /* 3509 * No neeed to lock the new object: nobody else knows 3510 * about it yet, so it's all ours so far. 3511 */ 3512 upl->map_object->shadow = object; 3513 upl->map_object->pageout = TRUE; 3514 upl->map_object->can_persist = FALSE; 3515 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 3516 upl->map_object->vo_shadow_offset = offset; 3517 upl->map_object->wimg_bits = object->wimg_bits; 3518 3519 VM_PAGE_GRAB_FICTITIOUS(alias_page); 3520 3521 upl->flags |= UPL_SHADOWED; 3522 } 3523 /* 3524 * ENCRYPTED SWAP: 3525 * Just mark the UPL as "encrypted" here. 3526 * We'll actually encrypt the pages later, 3527 * in upl_encrypt(), when the caller has 3528 * selected which pages need to go to swap. 3529 */ 3530 if (cntrl_flags & UPL_ENCRYPT) 3531 upl->flags |= UPL_ENCRYPTED; 3532 3533 if (cntrl_flags & UPL_FOR_PAGEOUT) 3534 upl->flags |= UPL_PAGEOUT; 3535 3536 vm_object_lock(object); 3537 vm_object_activity_begin(object); 3538 3539 /* 3540 * we can lock in the paging_offset once paging_in_progress is set 3541 */ 3542 upl->size = size; 3543 upl->offset = offset + object->paging_offset; 3544 3545#if UPL_DEBUG 3546 vm_object_activity_begin(object); 3547 queue_enter(&object->uplq, upl, upl_t, uplq); 3548#endif /* UPL_DEBUG */ 3549 3550 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) { 3551 /* 3552 * Honor copy-on-write obligations 3553 * 3554 * The caller is gathering these pages and 3555 * might modify their contents. We need to 3556 * make sure that the copy object has its own 3557 * private copies of these pages before we let 3558 * the caller modify them. 3559 */ 3560 vm_object_update(object, 3561 offset, 3562 size, 3563 NULL, 3564 NULL, 3565 FALSE, /* should_return */ 3566 MEMORY_OBJECT_COPY_SYNC, 3567 VM_PROT_NO_CHANGE); 3568#if DEVELOPMENT || DEBUG 3569 upl_cow++; 3570 upl_cow_pages += size >> PAGE_SHIFT; 3571#endif 3572 } 3573 /* 3574 * remember which copy object we synchronized with 3575 */ 3576 last_copy_object = object->copy; 3577 entry = 0; 3578 3579 xfer_size = size; 3580 dst_offset = offset; 3581 size_in_pages = size / PAGE_SIZE; 3582 3583 dwp = &dw_array[0]; 3584 dw_count = 0; 3585 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 3586 3587 if (vm_page_free_count > (vm_page_free_target + size_in_pages) || 3588 object->resident_page_count < (MAX_UPL_SIZE * 2)) 3589 object->scan_collisions = 0; 3590 3591 while (xfer_size) { 3592 3593 dwp->dw_mask = 0; 3594 3595 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) { 3596 vm_object_unlock(object); 3597 VM_PAGE_GRAB_FICTITIOUS(alias_page); 3598 vm_object_lock(object); 3599 } 3600 if (cntrl_flags & UPL_COPYOUT_FROM) { 3601 upl->flags |= UPL_PAGE_SYNC_DONE; 3602 3603 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) || 3604 dst_page->fictitious || 3605 dst_page->absent || 3606 dst_page->error || 3607 dst_page->cleaning || 3608 (VM_PAGE_WIRED(dst_page))) { 3609 3610 if (user_page_list) 3611 user_page_list[entry].phys_addr = 0; 3612 3613 goto try_next_page; 3614 } 3615 /* 3616 * grab this up front... 3617 * a high percentange of the time we're going to 3618 * need the hardware modification state a bit later 3619 * anyway... so we can eliminate an extra call into 3620 * the pmap layer by grabbing it here and recording it 3621 */ 3622 if (dst_page->pmapped) 3623 refmod_state = pmap_get_refmod(dst_page->phys_page); 3624 else 3625 refmod_state = 0; 3626 3627 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) { 3628 /* 3629 * page is on inactive list and referenced... 3630 * reactivate it now... this gets it out of the 3631 * way of vm_pageout_scan which would have to 3632 * reactivate it upon tripping over it 3633 */ 3634 dwp->dw_mask |= DW_vm_page_activate; 3635 } 3636 if (cntrl_flags & UPL_RET_ONLY_DIRTY) { 3637 /* 3638 * we're only asking for DIRTY pages to be returned 3639 */ 3640 if (dst_page->pageout || !(cntrl_flags & UPL_FOR_PAGEOUT)) { 3641 /* 3642 * if we were the page stolen by vm_pageout_scan to be 3643 * cleaned (as opposed to a buddy being clustered in 3644 * or this request is not being driven by a PAGEOUT cluster 3645 * then we only need to check for the page being dirty or 3646 * precious to decide whether to return it 3647 */ 3648 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED)) 3649 goto check_busy; 3650 goto dont_return; 3651 } 3652 /* 3653 * this is a request for a PAGEOUT cluster and this page 3654 * is merely along for the ride as a 'buddy'... not only 3655 * does it have to be dirty to be returned, but it also 3656 * can't have been referenced recently... 3657 */ 3658 if ( (hibernate_cleaning_in_progress == TRUE || 3659 (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) || dst_page->throttled)) && 3660 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) { 3661 goto check_busy; 3662 } 3663dont_return: 3664 /* 3665 * if we reach here, we're not to return 3666 * the page... go on to the next one 3667 */ 3668 if (dst_page->laundry == TRUE) { 3669 /* 3670 * if we get here, the page is not 'cleaning' (filtered out above). 3671 * since it has been referenced, remove it from the laundry 3672 * so we don't pay the cost of an I/O to clean a page 3673 * we're just going to take back 3674 */ 3675 vm_page_lockspin_queues(); 3676 3677 vm_pageout_steal_laundry(dst_page, TRUE); 3678 vm_page_activate(dst_page); 3679 3680 vm_page_unlock_queues(); 3681 } 3682 if (user_page_list) 3683 user_page_list[entry].phys_addr = 0; 3684 3685 goto try_next_page; 3686 } 3687check_busy: 3688 if (dst_page->busy) { 3689 if (cntrl_flags & UPL_NOBLOCK) { 3690 if (user_page_list) 3691 user_page_list[entry].phys_addr = 0; 3692 3693 goto try_next_page; 3694 } 3695 /* 3696 * someone else is playing with the 3697 * page. We will have to wait. 3698 */ 3699 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 3700 3701 continue; 3702 } 3703 /* 3704 * ENCRYPTED SWAP: 3705 * The caller is gathering this page and might 3706 * access its contents later on. Decrypt the 3707 * page before adding it to the UPL, so that 3708 * the caller never sees encrypted data. 3709 */ 3710 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) { 3711 int was_busy; 3712 3713 /* 3714 * save the current state of busy 3715 * mark page as busy while decrypt 3716 * is in progress since it will drop 3717 * the object lock... 3718 */ 3719 was_busy = dst_page->busy; 3720 dst_page->busy = TRUE; 3721 3722 vm_page_decrypt(dst_page, 0); 3723 vm_page_decrypt_for_upl_counter++; 3724 /* 3725 * restore to original busy state 3726 */ 3727 dst_page->busy = was_busy; 3728 } 3729 if (dst_page->pageout_queue == TRUE) { 3730 3731 vm_page_lockspin_queues(); 3732 3733 if (dst_page->pageout_queue == TRUE) { 3734 /* 3735 * we've buddied up a page for a clustered pageout 3736 * that has already been moved to the pageout 3737 * queue by pageout_scan... we need to remove 3738 * it from the queue and drop the laundry count 3739 * on that queue 3740 */ 3741 vm_pageout_throttle_up(dst_page); 3742 } 3743 vm_page_unlock_queues(); 3744 } 3745#if MACH_CLUSTER_STATS 3746 /* 3747 * pageout statistics gathering. count 3748 * all the pages we will page out that 3749 * were not counted in the initial 3750 * vm_pageout_scan work 3751 */ 3752 if (dst_page->pageout) 3753 encountered_lrp = TRUE; 3754 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious))) { 3755 if (encountered_lrp) 3756 CLUSTER_STAT(pages_at_higher_offsets++;) 3757 else 3758 CLUSTER_STAT(pages_at_lower_offsets++;) 3759 } 3760#endif 3761 hw_dirty = refmod_state & VM_MEM_MODIFIED; 3762 dirty = hw_dirty ? TRUE : dst_page->dirty; 3763 3764 if (dst_page->phys_page > upl->highest_page) 3765 upl->highest_page = dst_page->phys_page; 3766 3767 if (cntrl_flags & UPL_SET_LITE) { 3768 unsigned int pg_num; 3769 3770 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); 3771 assert(pg_num == (dst_offset-offset)/PAGE_SIZE); 3772 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 3773 3774 if (hw_dirty) 3775 pmap_clear_modify(dst_page->phys_page); 3776 3777 /* 3778 * Mark original page as cleaning 3779 * in place. 3780 */ 3781 dst_page->cleaning = TRUE; 3782 dst_page->precious = FALSE; 3783 } else { 3784 /* 3785 * use pageclean setup, it is more 3786 * convenient even for the pageout 3787 * cases here 3788 */ 3789 vm_object_lock(upl->map_object); 3790 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); 3791 vm_object_unlock(upl->map_object); 3792 3793 alias_page->absent = FALSE; 3794 alias_page = NULL; 3795 } 3796#if MACH_PAGEMAP 3797 /* 3798 * Record that this page has been 3799 * written out 3800 */ 3801 vm_external_state_set(object->existence_map, dst_page->offset); 3802#endif /*MACH_PAGEMAP*/ 3803 if (dirty) { 3804 SET_PAGE_DIRTY(dst_page, FALSE); 3805 } else { 3806 dst_page->dirty = FALSE; 3807 } 3808 3809 if (!dirty) 3810 dst_page->precious = TRUE; 3811 3812 if ( (cntrl_flags & UPL_ENCRYPT) ) { 3813 /* 3814 * ENCRYPTED SWAP: 3815 * We want to deny access to the target page 3816 * because its contents are about to be 3817 * encrypted and the user would be very 3818 * confused to see encrypted data instead 3819 * of their data. 3820 * We also set "encrypted_cleaning" to allow 3821 * vm_pageout_scan() to demote that page 3822 * from "adjacent/clean-in-place" to 3823 * "target/clean-and-free" if it bumps into 3824 * this page during its scanning while we're 3825 * still processing this cluster. 3826 */ 3827 dst_page->busy = TRUE; 3828 dst_page->encrypted_cleaning = TRUE; 3829 } 3830 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) { 3831 if ( !VM_PAGE_WIRED(dst_page)) 3832 dst_page->pageout = TRUE; 3833 } 3834 } else { 3835 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) { 3836 /* 3837 * Honor copy-on-write obligations 3838 * 3839 * The copy object has changed since we 3840 * last synchronized for copy-on-write. 3841 * Another copy object might have been 3842 * inserted while we released the object's 3843 * lock. Since someone could have seen the 3844 * original contents of the remaining pages 3845 * through that new object, we have to 3846 * synchronize with it again for the remaining 3847 * pages only. The previous pages are "busy" 3848 * so they can not be seen through the new 3849 * mapping. The new mapping will see our 3850 * upcoming changes for those previous pages, 3851 * but that's OK since they couldn't see what 3852 * was there before. It's just a race anyway 3853 * and there's no guarantee of consistency or 3854 * atomicity. We just don't want new mappings 3855 * to see both the *before* and *after* pages. 3856 */ 3857 if (object->copy != VM_OBJECT_NULL) { 3858 vm_object_update( 3859 object, 3860 dst_offset,/* current offset */ 3861 xfer_size, /* remaining size */ 3862 NULL, 3863 NULL, 3864 FALSE, /* should_return */ 3865 MEMORY_OBJECT_COPY_SYNC, 3866 VM_PROT_NO_CHANGE); 3867 3868#if DEVELOPMENT || DEBUG 3869 upl_cow_again++; 3870 upl_cow_again_pages += xfer_size >> PAGE_SHIFT; 3871#endif 3872 } 3873 /* 3874 * remember the copy object we synced with 3875 */ 3876 last_copy_object = object->copy; 3877 } 3878 dst_page = vm_page_lookup(object, dst_offset); 3879 3880 if (dst_page != VM_PAGE_NULL) { 3881 3882 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) { 3883 /* 3884 * skip over pages already present in the cache 3885 */ 3886 if (user_page_list) 3887 user_page_list[entry].phys_addr = 0; 3888 3889 goto try_next_page; 3890 } 3891 if (dst_page->fictitious) { 3892 panic("need corner case for fictitious page"); 3893 } 3894 3895 if (dst_page->busy || dst_page->cleaning) { 3896 /* 3897 * someone else is playing with the 3898 * page. We will have to wait. 3899 */ 3900 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 3901 3902 continue; 3903 } 3904 if (dst_page->laundry) { 3905 dst_page->pageout = FALSE; 3906 3907 vm_pageout_steal_laundry(dst_page, FALSE); 3908 } 3909 } else { 3910 if (object->private) { 3911 /* 3912 * This is a nasty wrinkle for users 3913 * of upl who encounter device or 3914 * private memory however, it is 3915 * unavoidable, only a fault can 3916 * resolve the actual backing 3917 * physical page by asking the 3918 * backing device. 3919 */ 3920 if (user_page_list) 3921 user_page_list[entry].phys_addr = 0; 3922 3923 goto try_next_page; 3924 } 3925 if (object->scan_collisions) { 3926 /* 3927 * the pageout_scan thread is trying to steal 3928 * pages from this object, but has run into our 3929 * lock... grab 2 pages from the head of the object... 3930 * the first is freed on behalf of pageout_scan, the 3931 * 2nd is for our own use... we use vm_object_page_grab 3932 * in both cases to avoid taking pages from the free 3933 * list since we are under memory pressure and our 3934 * lock on this object is getting in the way of 3935 * relieving it 3936 */ 3937 dst_page = vm_object_page_grab(object); 3938 3939 if (dst_page != VM_PAGE_NULL) 3940 vm_page_release(dst_page); 3941 3942 dst_page = vm_object_page_grab(object); 3943 } 3944 if (dst_page == VM_PAGE_NULL) { 3945 /* 3946 * need to allocate a page 3947 */ 3948 dst_page = vm_page_grab(); 3949 } 3950 if (dst_page == VM_PAGE_NULL) { 3951 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) { 3952 /* 3953 * we don't want to stall waiting for pages to come onto the free list 3954 * while we're already holding absent pages in this UPL 3955 * the caller will deal with the empty slots 3956 */ 3957 if (user_page_list) 3958 user_page_list[entry].phys_addr = 0; 3959 3960 goto try_next_page; 3961 } 3962 /* 3963 * no pages available... wait 3964 * then try again for the same 3965 * offset... 3966 */ 3967 vm_object_unlock(object); 3968 3969 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); 3970 3971 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); 3972 3973 VM_PAGE_WAIT(); 3974 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 3975 3976 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); 3977 3978 vm_object_lock(object); 3979 3980 continue; 3981 } 3982 vm_page_insert(dst_page, object, dst_offset); 3983 3984 dst_page->absent = TRUE; 3985 dst_page->busy = FALSE; 3986 3987 if (cntrl_flags & UPL_RET_ONLY_ABSENT) { 3988 /* 3989 * if UPL_RET_ONLY_ABSENT was specified, 3990 * than we're definitely setting up a 3991 * upl for a clustered read/pagein 3992 * operation... mark the pages as clustered 3993 * so upl_commit_range can put them on the 3994 * speculative list 3995 */ 3996 dst_page->clustered = TRUE; 3997 } 3998 } 3999 /* 4000 * ENCRYPTED SWAP: 4001 */ 4002 if (cntrl_flags & UPL_ENCRYPT) { 4003 /* 4004 * The page is going to be encrypted when we 4005 * get it from the pager, so mark it so. 4006 */ 4007 dst_page->encrypted = TRUE; 4008 } else { 4009 /* 4010 * Otherwise, the page will not contain 4011 * encrypted data. 4012 */ 4013 dst_page->encrypted = FALSE; 4014 } 4015 dst_page->overwriting = TRUE; 4016 4017 if (dst_page->pmapped) { 4018 if ( !(cntrl_flags & UPL_FILE_IO)) 4019 /* 4020 * eliminate all mappings from the 4021 * original object and its prodigy 4022 */ 4023 refmod_state = pmap_disconnect(dst_page->phys_page); 4024 else 4025 refmod_state = pmap_get_refmod(dst_page->phys_page); 4026 } else 4027 refmod_state = 0; 4028 4029 hw_dirty = refmod_state & VM_MEM_MODIFIED; 4030 dirty = hw_dirty ? TRUE : dst_page->dirty; 4031 4032 if (cntrl_flags & UPL_SET_LITE) { 4033 unsigned int pg_num; 4034 4035 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); 4036 assert(pg_num == (dst_offset-offset)/PAGE_SIZE); 4037 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 4038 4039 if (hw_dirty) 4040 pmap_clear_modify(dst_page->phys_page); 4041 4042 /* 4043 * Mark original page as cleaning 4044 * in place. 4045 */ 4046 dst_page->cleaning = TRUE; 4047 dst_page->precious = FALSE; 4048 } else { 4049 /* 4050 * use pageclean setup, it is more 4051 * convenient even for the pageout 4052 * cases here 4053 */ 4054 vm_object_lock(upl->map_object); 4055 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); 4056 vm_object_unlock(upl->map_object); 4057 4058 alias_page->absent = FALSE; 4059 alias_page = NULL; 4060 } 4061 4062 if (cntrl_flags & UPL_REQUEST_SET_DIRTY) { 4063 upl->flags &= ~UPL_CLEAR_DIRTY; 4064 upl->flags |= UPL_SET_DIRTY; 4065 dirty = TRUE; 4066 upl->flags |= UPL_SET_DIRTY; 4067 } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) { 4068 /* 4069 * clean in place for read implies 4070 * that a write will be done on all 4071 * the pages that are dirty before 4072 * a upl commit is done. The caller 4073 * is obligated to preserve the 4074 * contents of all pages marked dirty 4075 */ 4076 upl->flags |= UPL_CLEAR_DIRTY; 4077 } 4078 dst_page->dirty = dirty; 4079 4080 if (!dirty) 4081 dst_page->precious = TRUE; 4082 4083 if ( !VM_PAGE_WIRED(dst_page)) { 4084 /* 4085 * deny access to the target page while 4086 * it is being worked on 4087 */ 4088 dst_page->busy = TRUE; 4089 } else 4090 dwp->dw_mask |= DW_vm_page_wire; 4091 4092 /* 4093 * We might be about to satisfy a fault which has been 4094 * requested. So no need for the "restart" bit. 4095 */ 4096 dst_page->restart = FALSE; 4097 if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) { 4098 /* 4099 * expect the page to be used 4100 */ 4101 dwp->dw_mask |= DW_set_reference; 4102 } 4103 if (cntrl_flags & UPL_PRECIOUS) { 4104 if (dst_page->object->internal) { 4105 SET_PAGE_DIRTY(dst_page, FALSE); 4106 dst_page->precious = FALSE; 4107 } else { 4108 dst_page->precious = TRUE; 4109 } 4110 } else { 4111 dst_page->precious = FALSE; 4112 } 4113 } 4114 if (dst_page->busy) 4115 upl->flags |= UPL_HAS_BUSY; 4116 4117 if (dst_page->phys_page > upl->highest_page) 4118 upl->highest_page = dst_page->phys_page; 4119 if (user_page_list) { 4120 user_page_list[entry].phys_addr = dst_page->phys_page; 4121 user_page_list[entry].pageout = dst_page->pageout; 4122 user_page_list[entry].absent = dst_page->absent; 4123 user_page_list[entry].dirty = dst_page->dirty; 4124 user_page_list[entry].precious = dst_page->precious; 4125 user_page_list[entry].device = FALSE; 4126 user_page_list[entry].needed = FALSE; 4127 if (dst_page->clustered == TRUE) 4128 user_page_list[entry].speculative = dst_page->speculative; 4129 else 4130 user_page_list[entry].speculative = FALSE; 4131 user_page_list[entry].cs_validated = dst_page->cs_validated; 4132 user_page_list[entry].cs_tainted = dst_page->cs_tainted; 4133 } 4134 /* 4135 * if UPL_RET_ONLY_ABSENT is set, then 4136 * we are working with a fresh page and we've 4137 * just set the clustered flag on it to 4138 * indicate that it was drug in as part of a 4139 * speculative cluster... so leave it alone 4140 */ 4141 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) { 4142 /* 4143 * someone is explicitly grabbing this page... 4144 * update clustered and speculative state 4145 * 4146 */ 4147 VM_PAGE_CONSUME_CLUSTERED(dst_page); 4148 } 4149try_next_page: 4150 if (dwp->dw_mask) { 4151 if (dwp->dw_mask & DW_vm_page_activate) 4152 VM_STAT_INCR(reactivations); 4153 4154 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count); 4155 4156 if (dw_count >= dw_limit) { 4157 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 4158 4159 dwp = &dw_array[0]; 4160 dw_count = 0; 4161 } 4162 } 4163 entry++; 4164 dst_offset += PAGE_SIZE_64; 4165 xfer_size -= PAGE_SIZE; 4166 } 4167 if (dw_count) 4168 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 4169 4170 if (alias_page != NULL) { 4171 VM_PAGE_FREE(alias_page); 4172 } 4173 4174 if (page_list_count != NULL) { 4175 if (upl->flags & UPL_INTERNAL) 4176 *page_list_count = 0; 4177 else if (*page_list_count > entry) 4178 *page_list_count = entry; 4179 } 4180#if UPL_DEBUG 4181 upl->upl_state = 1; 4182#endif 4183 vm_object_unlock(object); 4184 4185 return KERN_SUCCESS; 4186} 4187 4188/* JMM - Backward compatability for now */ 4189kern_return_t 4190vm_fault_list_request( /* forward */ 4191 memory_object_control_t control, 4192 vm_object_offset_t offset, 4193 upl_size_t size, 4194 upl_t *upl_ptr, 4195 upl_page_info_t **user_page_list_ptr, 4196 unsigned int page_list_count, 4197 int cntrl_flags); 4198kern_return_t 4199vm_fault_list_request( 4200 memory_object_control_t control, 4201 vm_object_offset_t offset, 4202 upl_size_t size, 4203 upl_t *upl_ptr, 4204 upl_page_info_t **user_page_list_ptr, 4205 unsigned int page_list_count, 4206 int cntrl_flags) 4207{ 4208 unsigned int local_list_count; 4209 upl_page_info_t *user_page_list; 4210 kern_return_t kr; 4211 4212 if((cntrl_flags & UPL_VECTOR)==UPL_VECTOR) 4213 return KERN_INVALID_ARGUMENT; 4214 4215 if (user_page_list_ptr != NULL) { 4216 local_list_count = page_list_count; 4217 user_page_list = *user_page_list_ptr; 4218 } else { 4219 local_list_count = 0; 4220 user_page_list = NULL; 4221 } 4222 kr = memory_object_upl_request(control, 4223 offset, 4224 size, 4225 upl_ptr, 4226 user_page_list, 4227 &local_list_count, 4228 cntrl_flags); 4229 4230 if(kr != KERN_SUCCESS) 4231 return kr; 4232 4233 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) { 4234 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr); 4235 } 4236 4237 return KERN_SUCCESS; 4238} 4239 4240 4241 4242/* 4243 * Routine: vm_object_super_upl_request 4244 * Purpose: 4245 * Cause the population of a portion of a vm_object 4246 * in much the same way as memory_object_upl_request. 4247 * Depending on the nature of the request, the pages 4248 * returned may be contain valid data or be uninitialized. 4249 * However, the region may be expanded up to the super 4250 * cluster size provided. 4251 */ 4252 4253__private_extern__ kern_return_t 4254vm_object_super_upl_request( 4255 vm_object_t object, 4256 vm_object_offset_t offset, 4257 upl_size_t size, 4258 upl_size_t super_cluster, 4259 upl_t *upl, 4260 upl_page_info_t *user_page_list, 4261 unsigned int *page_list_count, 4262 int cntrl_flags) 4263{ 4264 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR)) 4265 return KERN_FAILURE; 4266 4267 assert(object->paging_in_progress); 4268 offset = offset - object->paging_offset; 4269 4270 if (super_cluster > size) { 4271 4272 vm_object_offset_t base_offset; 4273 upl_size_t super_size; 4274 vm_object_size_t super_size_64; 4275 4276 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1)); 4277 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster; 4278 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size; 4279 super_size = (upl_size_t) super_size_64; 4280 assert(super_size == super_size_64); 4281 4282 if (offset > (base_offset + super_size)) { 4283 panic("vm_object_super_upl_request: Missed target pageout" 4284 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n", 4285 offset, base_offset, super_size, super_cluster, 4286 size, object->paging_offset); 4287 } 4288 /* 4289 * apparently there is a case where the vm requests a 4290 * page to be written out who's offset is beyond the 4291 * object size 4292 */ 4293 if ((offset + size) > (base_offset + super_size)) { 4294 super_size_64 = (offset + size) - base_offset; 4295 super_size = (upl_size_t) super_size_64; 4296 assert(super_size == super_size_64); 4297 } 4298 4299 offset = base_offset; 4300 size = super_size; 4301 } 4302 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags); 4303} 4304 4305 4306kern_return_t 4307vm_map_create_upl( 4308 vm_map_t map, 4309 vm_map_address_t offset, 4310 upl_size_t *upl_size, 4311 upl_t *upl, 4312 upl_page_info_array_t page_list, 4313 unsigned int *count, 4314 int *flags) 4315{ 4316 vm_map_entry_t entry; 4317 int caller_flags; 4318 int force_data_sync; 4319 int sync_cow_data; 4320 vm_object_t local_object; 4321 vm_map_offset_t local_offset; 4322 vm_map_offset_t local_start; 4323 kern_return_t ret; 4324 4325 caller_flags = *flags; 4326 4327 if (caller_flags & ~UPL_VALID_FLAGS) { 4328 /* 4329 * For forward compatibility's sake, 4330 * reject any unknown flag. 4331 */ 4332 return KERN_INVALID_VALUE; 4333 } 4334 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC); 4335 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM); 4336 4337 if (upl == NULL) 4338 return KERN_INVALID_ARGUMENT; 4339 4340REDISCOVER_ENTRY: 4341 vm_map_lock_read(map); 4342 4343 if (vm_map_lookup_entry(map, offset, &entry)) { 4344 4345 if ((entry->vme_end - offset) < *upl_size) { 4346 *upl_size = (upl_size_t) (entry->vme_end - offset); 4347 assert(*upl_size == entry->vme_end - offset); 4348 } 4349 4350 if (caller_flags & UPL_QUERY_OBJECT_TYPE) { 4351 *flags = 0; 4352 4353 if ( !entry->is_sub_map && entry->object.vm_object != VM_OBJECT_NULL) { 4354 if (entry->object.vm_object->private) 4355 *flags = UPL_DEV_MEMORY; 4356 4357 if (entry->object.vm_object->phys_contiguous) 4358 *flags |= UPL_PHYS_CONTIG; 4359 } 4360 vm_map_unlock_read(map); 4361 4362 return KERN_SUCCESS; 4363 } 4364 4365 if (entry->is_sub_map) { 4366 vm_map_t submap; 4367 4368 submap = entry->object.sub_map; 4369 local_start = entry->vme_start; 4370 local_offset = entry->offset; 4371 4372 vm_map_reference(submap); 4373 vm_map_unlock_read(map); 4374 4375 ret = vm_map_create_upl(submap, 4376 local_offset + (offset - local_start), 4377 upl_size, upl, page_list, count, flags); 4378 vm_map_deallocate(submap); 4379 4380 return ret; 4381 } 4382 4383 if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) { 4384 if ((*upl_size/PAGE_SIZE) > MAX_UPL_SIZE) 4385 *upl_size = MAX_UPL_SIZE * PAGE_SIZE; 4386 } 4387 /* 4388 * Create an object if necessary. 4389 */ 4390 if (entry->object.vm_object == VM_OBJECT_NULL) { 4391 4392 if (vm_map_lock_read_to_write(map)) 4393 goto REDISCOVER_ENTRY; 4394 4395 entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start)); 4396 entry->offset = 0; 4397 4398 vm_map_lock_write_to_read(map); 4399 } 4400 if (!(caller_flags & UPL_COPYOUT_FROM)) { 4401 if (!(entry->protection & VM_PROT_WRITE)) { 4402 vm_map_unlock_read(map); 4403 return KERN_PROTECTION_FAILURE; 4404 } 4405 4406#if !CONFIG_EMBEDDED 4407 local_object = entry->object.vm_object; 4408 if (vm_map_entry_should_cow_for_true_share(entry) && 4409 local_object->vo_size > *upl_size && 4410 *upl_size != 0) { 4411 vm_prot_t prot; 4412 4413 /* 4414 * Set up the targeted range for copy-on-write to avoid 4415 * applying true_share/copy_delay to the entire object. 4416 */ 4417 4418 if (vm_map_lock_read_to_write(map)) { 4419 goto REDISCOVER_ENTRY; 4420 } 4421 4422 vm_map_clip_start(map, entry, vm_map_trunc_page(offset)); 4423 vm_map_clip_end(map, entry, vm_map_round_page(offset + *upl_size)); 4424 prot = entry->protection & ~VM_PROT_WRITE; 4425 if (override_nx(map, entry->alias) && prot) 4426 prot |= VM_PROT_EXECUTE; 4427 vm_object_pmap_protect(local_object, 4428 entry->offset, 4429 entry->vme_end - entry->vme_start, 4430 ((entry->is_shared || map->mapped_in_other_pmaps) 4431 ? PMAP_NULL 4432 : map->pmap), 4433 entry->vme_start, 4434 prot); 4435 entry->needs_copy = TRUE; 4436 4437 vm_map_lock_write_to_read(map); 4438 } 4439#endif /* !CONFIG_EMBEDDED */ 4440 4441 if (entry->needs_copy) { 4442 /* 4443 * Honor copy-on-write for COPY_SYMMETRIC 4444 * strategy. 4445 */ 4446 vm_map_t local_map; 4447 vm_object_t object; 4448 vm_object_offset_t new_offset; 4449 vm_prot_t prot; 4450 boolean_t wired; 4451 vm_map_version_t version; 4452 vm_map_t real_map; 4453 4454 local_map = map; 4455 4456 if (vm_map_lookup_locked(&local_map, 4457 offset, VM_PROT_WRITE, 4458 OBJECT_LOCK_EXCLUSIVE, 4459 &version, &object, 4460 &new_offset, &prot, &wired, 4461 NULL, 4462 &real_map) != KERN_SUCCESS) { 4463 vm_map_unlock_read(local_map); 4464 return KERN_FAILURE; 4465 } 4466 if (real_map != map) 4467 vm_map_unlock(real_map); 4468 vm_map_unlock_read(local_map); 4469 4470 vm_object_unlock(object); 4471 4472 goto REDISCOVER_ENTRY; 4473 } 4474 } 4475 if (sync_cow_data) { 4476 if (entry->object.vm_object->shadow || entry->object.vm_object->copy) { 4477 local_object = entry->object.vm_object; 4478 local_start = entry->vme_start; 4479 local_offset = entry->offset; 4480 4481 vm_object_reference(local_object); 4482 vm_map_unlock_read(map); 4483 4484 if (local_object->shadow && local_object->copy) { 4485 vm_object_lock_request( 4486 local_object->shadow, 4487 (vm_object_offset_t) 4488 ((offset - local_start) + 4489 local_offset) + 4490 local_object->vo_shadow_offset, 4491 *upl_size, FALSE, 4492 MEMORY_OBJECT_DATA_SYNC, 4493 VM_PROT_NO_CHANGE); 4494 } 4495 sync_cow_data = FALSE; 4496 vm_object_deallocate(local_object); 4497 4498 goto REDISCOVER_ENTRY; 4499 } 4500 } 4501 if (force_data_sync) { 4502 local_object = entry->object.vm_object; 4503 local_start = entry->vme_start; 4504 local_offset = entry->offset; 4505 4506 vm_object_reference(local_object); 4507 vm_map_unlock_read(map); 4508 4509 vm_object_lock_request( 4510 local_object, 4511 (vm_object_offset_t) 4512 ((offset - local_start) + local_offset), 4513 (vm_object_size_t)*upl_size, FALSE, 4514 MEMORY_OBJECT_DATA_SYNC, 4515 VM_PROT_NO_CHANGE); 4516 4517 force_data_sync = FALSE; 4518 vm_object_deallocate(local_object); 4519 4520 goto REDISCOVER_ENTRY; 4521 } 4522 if (entry->object.vm_object->private) 4523 *flags = UPL_DEV_MEMORY; 4524 else 4525 *flags = 0; 4526 4527 if (entry->object.vm_object->phys_contiguous) 4528 *flags |= UPL_PHYS_CONTIG; 4529 4530 local_object = entry->object.vm_object; 4531 local_offset = entry->offset; 4532 local_start = entry->vme_start; 4533 4534 vm_object_reference(local_object); 4535 vm_map_unlock_read(map); 4536 4537 ret = vm_object_iopl_request(local_object, 4538 (vm_object_offset_t) ((offset - local_start) + local_offset), 4539 *upl_size, 4540 upl, 4541 page_list, 4542 count, 4543 caller_flags); 4544 vm_object_deallocate(local_object); 4545 4546 return(ret); 4547 } 4548 vm_map_unlock_read(map); 4549 4550 return(KERN_FAILURE); 4551} 4552 4553/* 4554 * Internal routine to enter a UPL into a VM map. 4555 * 4556 * JMM - This should just be doable through the standard 4557 * vm_map_enter() API. 4558 */ 4559kern_return_t 4560vm_map_enter_upl( 4561 vm_map_t map, 4562 upl_t upl, 4563 vm_map_offset_t *dst_addr) 4564{ 4565 vm_map_size_t size; 4566 vm_object_offset_t offset; 4567 vm_map_offset_t addr; 4568 vm_page_t m; 4569 kern_return_t kr; 4570 int isVectorUPL = 0, curr_upl=0; 4571 upl_t vector_upl = NULL; 4572 vm_offset_t vector_upl_dst_addr = 0; 4573 vm_map_t vector_upl_submap = NULL; 4574 upl_offset_t subupl_offset = 0; 4575 upl_size_t subupl_size = 0; 4576 4577 if (upl == UPL_NULL) 4578 return KERN_INVALID_ARGUMENT; 4579 4580 if((isVectorUPL = vector_upl_is_valid(upl))) { 4581 int mapped=0,valid_upls=0; 4582 vector_upl = upl; 4583 4584 upl_lock(vector_upl); 4585 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) { 4586 upl = vector_upl_subupl_byindex(vector_upl, curr_upl ); 4587 if(upl == NULL) 4588 continue; 4589 valid_upls++; 4590 if (UPL_PAGE_LIST_MAPPED & upl->flags) 4591 mapped++; 4592 } 4593 4594 if(mapped) { 4595 if(mapped != valid_upls) 4596 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls); 4597 else { 4598 upl_unlock(vector_upl); 4599 return KERN_FAILURE; 4600 } 4601 } 4602 4603 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap); 4604 if( kr != KERN_SUCCESS ) 4605 panic("Vector UPL submap allocation failed\n"); 4606 map = vector_upl_submap; 4607 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr); 4608 curr_upl=0; 4609 } 4610 else 4611 upl_lock(upl); 4612 4613process_upl_to_enter: 4614 if(isVectorUPL){ 4615 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) { 4616 *dst_addr = vector_upl_dst_addr; 4617 upl_unlock(vector_upl); 4618 return KERN_SUCCESS; 4619 } 4620 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ ); 4621 if(upl == NULL) 4622 goto process_upl_to_enter; 4623 4624 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size); 4625 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset); 4626 } else { 4627 /* 4628 * check to see if already mapped 4629 */ 4630 if (UPL_PAGE_LIST_MAPPED & upl->flags) { 4631 upl_unlock(upl); 4632 return KERN_FAILURE; 4633 } 4634 } 4635 if ((!(upl->flags & UPL_SHADOWED)) && 4636 ((upl->flags & UPL_HAS_BUSY) || 4637 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) { 4638 4639 vm_object_t object; 4640 vm_page_t alias_page; 4641 vm_object_offset_t new_offset; 4642 unsigned int pg_num; 4643 wpl_array_t lite_list; 4644 4645 if (upl->flags & UPL_INTERNAL) { 4646 lite_list = (wpl_array_t) 4647 ((((uintptr_t)upl) + sizeof(struct upl)) 4648 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 4649 } else { 4650 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl)); 4651 } 4652 object = upl->map_object; 4653 upl->map_object = vm_object_allocate(upl->size); 4654 4655 vm_object_lock(upl->map_object); 4656 4657 upl->map_object->shadow = object; 4658 upl->map_object->pageout = TRUE; 4659 upl->map_object->can_persist = FALSE; 4660 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 4661 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset; 4662 upl->map_object->wimg_bits = object->wimg_bits; 4663 offset = upl->map_object->vo_shadow_offset; 4664 new_offset = 0; 4665 size = upl->size; 4666 4667 upl->flags |= UPL_SHADOWED; 4668 4669 while (size) { 4670 pg_num = (unsigned int) (new_offset / PAGE_SIZE); 4671 assert(pg_num == new_offset / PAGE_SIZE); 4672 4673 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 4674 4675 VM_PAGE_GRAB_FICTITIOUS(alias_page); 4676 4677 vm_object_lock(object); 4678 4679 m = vm_page_lookup(object, offset); 4680 if (m == VM_PAGE_NULL) { 4681 panic("vm_upl_map: page missing\n"); 4682 } 4683 4684 /* 4685 * Convert the fictitious page to a private 4686 * shadow of the real page. 4687 */ 4688 assert(alias_page->fictitious); 4689 alias_page->fictitious = FALSE; 4690 alias_page->private = TRUE; 4691 alias_page->pageout = TRUE; 4692 /* 4693 * since m is a page in the upl it must 4694 * already be wired or BUSY, so it's 4695 * safe to assign the underlying physical 4696 * page to the alias 4697 */ 4698 alias_page->phys_page = m->phys_page; 4699 4700 vm_object_unlock(object); 4701 4702 vm_page_lockspin_queues(); 4703 vm_page_wire(alias_page); 4704 vm_page_unlock_queues(); 4705 4706 /* 4707 * ENCRYPTED SWAP: 4708 * The virtual page ("m") has to be wired in some way 4709 * here or its physical page ("m->phys_page") could 4710 * be recycled at any time. 4711 * Assuming this is enforced by the caller, we can't 4712 * get an encrypted page here. Since the encryption 4713 * key depends on the VM page's "pager" object and 4714 * the "paging_offset", we couldn't handle 2 pageable 4715 * VM pages (with different pagers and paging_offsets) 4716 * sharing the same physical page: we could end up 4717 * encrypting with one key (via one VM page) and 4718 * decrypting with another key (via the alias VM page). 4719 */ 4720 ASSERT_PAGE_DECRYPTED(m); 4721 4722 vm_page_insert(alias_page, upl->map_object, new_offset); 4723 4724 assert(!alias_page->wanted); 4725 alias_page->busy = FALSE; 4726 alias_page->absent = FALSE; 4727 } 4728 size -= PAGE_SIZE; 4729 offset += PAGE_SIZE_64; 4730 new_offset += PAGE_SIZE_64; 4731 } 4732 vm_object_unlock(upl->map_object); 4733 } 4734 if (upl->flags & UPL_SHADOWED) 4735 offset = 0; 4736 else 4737 offset = upl->offset - upl->map_object->paging_offset; 4738 4739 size = upl->size; 4740 4741 vm_object_reference(upl->map_object); 4742 4743 if(!isVectorUPL) { 4744 *dst_addr = 0; 4745 /* 4746 * NEED A UPL_MAP ALIAS 4747 */ 4748 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, 4749 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE, 4750 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 4751 4752 if (kr != KERN_SUCCESS) { 4753 upl_unlock(upl); 4754 return(kr); 4755 } 4756 } 4757 else { 4758 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, 4759 VM_FLAGS_FIXED, upl->map_object, offset, FALSE, 4760 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 4761 if(kr) 4762 panic("vm_map_enter failed for a Vector UPL\n"); 4763 } 4764 vm_object_lock(upl->map_object); 4765 4766 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) { 4767 m = vm_page_lookup(upl->map_object, offset); 4768 4769 if (m) { 4770 m->pmapped = TRUE; 4771 4772 /* CODE SIGNING ENFORCEMENT: page has been wpmapped, 4773 * but only in kernel space. If this was on a user map, 4774 * we'd have to set the wpmapped bit. */ 4775 /* m->wpmapped = TRUE; */ 4776 assert(map==kernel_map); 4777 4778 PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, VM_PROT_NONE, 0, TRUE); 4779 } 4780 offset += PAGE_SIZE_64; 4781 } 4782 vm_object_unlock(upl->map_object); 4783 4784 /* 4785 * hold a reference for the mapping 4786 */ 4787 upl->ref_count++; 4788 upl->flags |= UPL_PAGE_LIST_MAPPED; 4789 upl->kaddr = (vm_offset_t) *dst_addr; 4790 assert(upl->kaddr == *dst_addr); 4791 4792 if(isVectorUPL) 4793 goto process_upl_to_enter; 4794 4795 upl_unlock(upl); 4796 4797 return KERN_SUCCESS; 4798} 4799 4800/* 4801 * Internal routine to remove a UPL mapping from a VM map. 4802 * 4803 * XXX - This should just be doable through a standard 4804 * vm_map_remove() operation. Otherwise, implicit clean-up 4805 * of the target map won't be able to correctly remove 4806 * these (and release the reference on the UPL). Having 4807 * to do this means we can't map these into user-space 4808 * maps yet. 4809 */ 4810kern_return_t 4811vm_map_remove_upl( 4812 vm_map_t map, 4813 upl_t upl) 4814{ 4815 vm_address_t addr; 4816 upl_size_t size; 4817 int isVectorUPL = 0, curr_upl = 0; 4818 upl_t vector_upl = NULL; 4819 4820 if (upl == UPL_NULL) 4821 return KERN_INVALID_ARGUMENT; 4822 4823 if((isVectorUPL = vector_upl_is_valid(upl))) { 4824 int unmapped=0, valid_upls=0; 4825 vector_upl = upl; 4826 upl_lock(vector_upl); 4827 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) { 4828 upl = vector_upl_subupl_byindex(vector_upl, curr_upl ); 4829 if(upl == NULL) 4830 continue; 4831 valid_upls++; 4832 if (!(UPL_PAGE_LIST_MAPPED & upl->flags)) 4833 unmapped++; 4834 } 4835 4836 if(unmapped) { 4837 if(unmapped != valid_upls) 4838 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls); 4839 else { 4840 upl_unlock(vector_upl); 4841 return KERN_FAILURE; 4842 } 4843 } 4844 curr_upl=0; 4845 } 4846 else 4847 upl_lock(upl); 4848 4849process_upl_to_remove: 4850 if(isVectorUPL) { 4851 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) { 4852 vm_map_t v_upl_submap; 4853 vm_offset_t v_upl_submap_dst_addr; 4854 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr); 4855 4856 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS); 4857 vm_map_deallocate(v_upl_submap); 4858 upl_unlock(vector_upl); 4859 return KERN_SUCCESS; 4860 } 4861 4862 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ ); 4863 if(upl == NULL) 4864 goto process_upl_to_remove; 4865 } 4866 4867 if (upl->flags & UPL_PAGE_LIST_MAPPED) { 4868 addr = upl->kaddr; 4869 size = upl->size; 4870 4871 assert(upl->ref_count > 1); 4872 upl->ref_count--; /* removing mapping ref */ 4873 4874 upl->flags &= ~UPL_PAGE_LIST_MAPPED; 4875 upl->kaddr = (vm_offset_t) 0; 4876 4877 if(!isVectorUPL) { 4878 upl_unlock(upl); 4879 4880 vm_map_remove(map, 4881 vm_map_trunc_page(addr), 4882 vm_map_round_page(addr + size), 4883 VM_MAP_NO_FLAGS); 4884 4885 return KERN_SUCCESS; 4886 } 4887 else { 4888 /* 4889 * If it's a Vectored UPL, we'll be removing the entire 4890 * submap anyways, so no need to remove individual UPL 4891 * element mappings from within the submap 4892 */ 4893 goto process_upl_to_remove; 4894 } 4895 } 4896 upl_unlock(upl); 4897 4898 return KERN_FAILURE; 4899} 4900 4901 4902kern_return_t 4903upl_commit_range( 4904 upl_t upl, 4905 upl_offset_t offset, 4906 upl_size_t size, 4907 int flags, 4908 upl_page_info_t *page_list, 4909 mach_msg_type_number_t count, 4910 boolean_t *empty) 4911{ 4912 upl_size_t xfer_size, subupl_size = size; 4913 vm_object_t shadow_object; 4914 vm_object_t object; 4915 vm_object_offset_t target_offset; 4916 upl_offset_t subupl_offset = offset; 4917 int entry; 4918 wpl_array_t lite_list; 4919 int occupied; 4920 int clear_refmod = 0; 4921 int pgpgout_count = 0; 4922 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 4923 struct vm_page_delayed_work *dwp; 4924 int dw_count; 4925 int dw_limit; 4926 int isVectorUPL = 0; 4927 upl_t vector_upl = NULL; 4928 boolean_t should_be_throttled = FALSE; 4929 4930 *empty = FALSE; 4931 4932 if (upl == UPL_NULL) 4933 return KERN_INVALID_ARGUMENT; 4934 4935 if (count == 0) 4936 page_list = NULL; 4937 4938 if((isVectorUPL = vector_upl_is_valid(upl))) { 4939 vector_upl = upl; 4940 upl_lock(vector_upl); 4941 } 4942 else 4943 upl_lock(upl); 4944 4945process_upl_to_commit: 4946 4947 if(isVectorUPL) { 4948 size = subupl_size; 4949 offset = subupl_offset; 4950 if(size == 0) { 4951 upl_unlock(vector_upl); 4952 return KERN_SUCCESS; 4953 } 4954 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size); 4955 if(upl == NULL) { 4956 upl_unlock(vector_upl); 4957 return KERN_FAILURE; 4958 } 4959 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl); 4960 subupl_size -= size; 4961 subupl_offset += size; 4962 } 4963 4964#if UPL_DEBUG 4965 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) { 4966 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES); 4967 4968 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset; 4969 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size); 4970 4971 upl->upl_commit_index++; 4972 } 4973#endif 4974 if (upl->flags & UPL_DEVICE_MEMORY) 4975 xfer_size = 0; 4976 else if ((offset + size) <= upl->size) 4977 xfer_size = size; 4978 else { 4979 if(!isVectorUPL) 4980 upl_unlock(upl); 4981 else { 4982 upl_unlock(vector_upl); 4983 } 4984 return KERN_FAILURE; 4985 } 4986 if (upl->flags & UPL_SET_DIRTY) 4987 flags |= UPL_COMMIT_SET_DIRTY; 4988 if (upl->flags & UPL_CLEAR_DIRTY) 4989 flags |= UPL_COMMIT_CLEAR_DIRTY; 4990 4991 if (upl->flags & UPL_INTERNAL) 4992 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl)) 4993 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 4994 else 4995 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 4996 4997 object = upl->map_object; 4998 4999 if (upl->flags & UPL_SHADOWED) { 5000 vm_object_lock(object); 5001 shadow_object = object->shadow; 5002 } else { 5003 shadow_object = object; 5004 } 5005 entry = offset/PAGE_SIZE; 5006 target_offset = (vm_object_offset_t)offset; 5007 5008 if (upl->flags & UPL_KERNEL_OBJECT) 5009 vm_object_lock_shared(shadow_object); 5010 else 5011 vm_object_lock(shadow_object); 5012 5013 if (upl->flags & UPL_ACCESS_BLOCKED) { 5014 assert(shadow_object->blocked_access); 5015 shadow_object->blocked_access = FALSE; 5016 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED); 5017 } 5018 5019 if (shadow_object->code_signed) { 5020 /* 5021 * CODE SIGNING: 5022 * If the object is code-signed, do not let this UPL tell 5023 * us if the pages are valid or not. Let the pages be 5024 * validated by VM the normal way (when they get mapped or 5025 * copied). 5026 */ 5027 flags &= ~UPL_COMMIT_CS_VALIDATED; 5028 } 5029 if (! page_list) { 5030 /* 5031 * No page list to get the code-signing info from !? 5032 */ 5033 flags &= ~UPL_COMMIT_CS_VALIDATED; 5034 } 5035 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal) 5036 should_be_throttled = TRUE; 5037 5038 dwp = &dw_array[0]; 5039 dw_count = 0; 5040 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 5041 5042 while (xfer_size) { 5043 vm_page_t t, m; 5044 5045 dwp->dw_mask = 0; 5046 clear_refmod = 0; 5047 5048 m = VM_PAGE_NULL; 5049 5050 if (upl->flags & UPL_LITE) { 5051 unsigned int pg_num; 5052 5053 pg_num = (unsigned int) (target_offset/PAGE_SIZE); 5054 assert(pg_num == target_offset/PAGE_SIZE); 5055 5056 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 5057 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); 5058 5059 if (!(upl->flags & UPL_KERNEL_OBJECT)) 5060 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset)); 5061 } 5062 } 5063 if (upl->flags & UPL_SHADOWED) { 5064 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) { 5065 5066 t->pageout = FALSE; 5067 5068 VM_PAGE_FREE(t); 5069 5070 if (m == VM_PAGE_NULL) 5071 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset); 5072 } 5073 } 5074 if ((upl->flags & UPL_KERNEL_OBJECT) || m == VM_PAGE_NULL) 5075 goto commit_next_page; 5076 5077 if (flags & UPL_COMMIT_CS_VALIDATED) { 5078 /* 5079 * CODE SIGNING: 5080 * Set the code signing bits according to 5081 * what the UPL says they should be. 5082 */ 5083 m->cs_validated = page_list[entry].cs_validated; 5084 m->cs_tainted = page_list[entry].cs_tainted; 5085 } 5086 if (upl->flags & UPL_IO_WIRE) { 5087 5088 if (page_list) 5089 page_list[entry].phys_addr = 0; 5090 5091 if (flags & UPL_COMMIT_SET_DIRTY) { 5092 SET_PAGE_DIRTY(m, FALSE); 5093 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) { 5094 m->dirty = FALSE; 5095 5096 if (! (flags & UPL_COMMIT_CS_VALIDATED) && 5097 m->cs_validated && !m->cs_tainted) { 5098 /* 5099 * CODE SIGNING: 5100 * This page is no longer dirty 5101 * but could have been modified, 5102 * so it will need to be 5103 * re-validated. 5104 */ 5105 m->cs_validated = FALSE; 5106#if DEVELOPMENT || DEBUG 5107 vm_cs_validated_resets++; 5108#endif 5109 pmap_disconnect(m->phys_page); 5110 } 5111 clear_refmod |= VM_MEM_MODIFIED; 5112 } 5113 if (flags & UPL_COMMIT_INACTIVATE) { 5114 dwp->dw_mask |= DW_vm_page_deactivate_internal; 5115 clear_refmod |= VM_MEM_REFERENCED; 5116 } 5117 if (upl->flags & UPL_ACCESS_BLOCKED) { 5118 /* 5119 * We blocked access to the pages in this UPL. 5120 * Clear the "busy" bit and wake up any waiter 5121 * for this page. 5122 */ 5123 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 5124 } 5125 if (m->absent) { 5126 if (flags & UPL_COMMIT_FREE_ABSENT) 5127 dwp->dw_mask |= DW_vm_page_free; 5128 else { 5129 m->absent = FALSE; 5130 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 5131 5132 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal)) 5133 dwp->dw_mask |= DW_vm_page_activate; 5134 } 5135 } else 5136 dwp->dw_mask |= DW_vm_page_unwire; 5137 5138 goto commit_next_page; 5139 } 5140 if (page_list) 5141 page_list[entry].phys_addr = 0; 5142 5143 /* 5144 * make sure to clear the hardware 5145 * modify or reference bits before 5146 * releasing the BUSY bit on this page 5147 * otherwise we risk losing a legitimate 5148 * change of state 5149 */ 5150 if (flags & UPL_COMMIT_CLEAR_DIRTY) { 5151 m->dirty = FALSE; 5152 5153 clear_refmod |= VM_MEM_MODIFIED; 5154 } 5155 if (m->laundry) 5156 dwp->dw_mask |= DW_vm_pageout_throttle_up; 5157 5158 if (VM_PAGE_WIRED(m)) 5159 m->pageout = FALSE; 5160 5161 if (! (flags & UPL_COMMIT_CS_VALIDATED) && 5162 m->cs_validated && !m->cs_tainted) { 5163 /* 5164 * CODE SIGNING: 5165 * This page is no longer dirty 5166 * but could have been modified, 5167 * so it will need to be 5168 * re-validated. 5169 */ 5170 m->cs_validated = FALSE; 5171#if DEVELOPMENT || DEBUG 5172 vm_cs_validated_resets++; 5173#endif 5174 pmap_disconnect(m->phys_page); 5175 } 5176 if (m->overwriting) { 5177 /* 5178 * the (COPY_OUT_FROM == FALSE) request_page_list case 5179 */ 5180 if (m->busy) { 5181 m->absent = FALSE; 5182 5183 dwp->dw_mask |= DW_clear_busy; 5184 } else { 5185 /* 5186 * alternate (COPY_OUT_FROM == FALSE) page_list case 5187 * Occurs when the original page was wired 5188 * at the time of the list request 5189 */ 5190 assert(VM_PAGE_WIRED(m)); 5191 5192 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */ 5193 } 5194 m->overwriting = FALSE; 5195 } 5196 if (m->encrypted_cleaning == TRUE) { 5197 m->encrypted_cleaning = FALSE; 5198 5199 dwp->dw_mask |= DW_clear_busy | DW_PAGE_WAKEUP; 5200 } 5201 m->cleaning = FALSE; 5202 5203 if (m->pageout) { 5204 /* 5205 * With the clean queue enabled, UPL_PAGEOUT should 5206 * no longer set the pageout bit. It's pages now go 5207 * to the clean queue. 5208 */ 5209 assert(!(flags & UPL_PAGEOUT)); 5210 5211 m->pageout = FALSE; 5212#if MACH_CLUSTER_STATS 5213 if (m->wanted) vm_pageout_target_collisions++; 5214#endif 5215 if ((flags & UPL_COMMIT_SET_DIRTY) || 5216 (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))) { 5217 /* 5218 * page was re-dirtied after we started 5219 * the pageout... reactivate it since 5220 * we don't know whether the on-disk 5221 * copy matches what is now in memory 5222 */ 5223 SET_PAGE_DIRTY(m, FALSE); 5224 5225 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP; 5226 5227 if (upl->flags & UPL_PAGEOUT) { 5228 CLUSTER_STAT(vm_pageout_target_page_dirtied++;) 5229 VM_STAT_INCR(reactivations); 5230 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL); 5231 } 5232 } else { 5233 /* 5234 * page has been successfully cleaned 5235 * go ahead and free it for other use 5236 */ 5237 if (m->object->internal) { 5238 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL); 5239 } else { 5240 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL); 5241 } 5242 m->dirty = FALSE; 5243 m->busy = TRUE; 5244 5245 dwp->dw_mask |= DW_vm_page_free; 5246 } 5247 goto commit_next_page; 5248 } 5249#if MACH_CLUSTER_STATS 5250 if (m->wpmapped) 5251 m->dirty = pmap_is_modified(m->phys_page); 5252 5253 if (m->dirty) vm_pageout_cluster_dirtied++; 5254 else vm_pageout_cluster_cleaned++; 5255 if (m->wanted) vm_pageout_cluster_collisions++; 5256#endif 5257 /* 5258 * It is a part of the semantic of COPYOUT_FROM 5259 * UPLs that a commit implies cache sync 5260 * between the vm page and the backing store 5261 * this can be used to strip the precious bit 5262 * as well as clean 5263 */ 5264 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS)) 5265 m->precious = FALSE; 5266 5267 if (flags & UPL_COMMIT_SET_DIRTY) { 5268 SET_PAGE_DIRTY(m, FALSE); 5269 } else { 5270 m->dirty = FALSE; 5271 } 5272 5273 /* with the clean queue on, move *all* cleaned pages to the clean queue */ 5274 if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) { 5275 pgpgout_count++; 5276 5277 /* this page used to be dirty; now it's on the clean queue. */ 5278 m->was_dirty = TRUE; 5279 5280 dwp->dw_mask |= DW_enqueue_cleaned; 5281 vm_pageout_enqueued_cleaned_from_inactive_dirty++; 5282 } else if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) { 5283 /* 5284 * page coming back in from being 'frozen'... 5285 * it was dirty before it was frozen, so keep it so 5286 * the vm_page_activate will notice that it really belongs 5287 * on the throttle queue and put it there 5288 */ 5289 SET_PAGE_DIRTY(m, FALSE); 5290 dwp->dw_mask |= DW_vm_page_activate; 5291 5292 } else { 5293 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) { 5294 dwp->dw_mask |= DW_vm_page_deactivate_internal; 5295 clear_refmod |= VM_MEM_REFERENCED; 5296 } else if (!m->active && !m->inactive && !m->speculative) { 5297 5298 if (m->clustered || (flags & UPL_COMMIT_SPECULATE)) 5299 dwp->dw_mask |= DW_vm_page_speculate; 5300 else if (m->reference) 5301 dwp->dw_mask |= DW_vm_page_activate; 5302 else { 5303 dwp->dw_mask |= DW_vm_page_deactivate_internal; 5304 clear_refmod |= VM_MEM_REFERENCED; 5305 } 5306 } 5307 } 5308 if (upl->flags & UPL_ACCESS_BLOCKED) { 5309 /* 5310 * We blocked access to the pages in this URL. 5311 * Clear the "busy" bit on this page before we 5312 * wake up any waiter. 5313 */ 5314 dwp->dw_mask |= DW_clear_busy; 5315 } 5316 /* 5317 * Wakeup any thread waiting for the page to be un-cleaning. 5318 */ 5319 dwp->dw_mask |= DW_PAGE_WAKEUP; 5320 5321commit_next_page: 5322 if (clear_refmod) 5323 pmap_clear_refmod(m->phys_page, clear_refmod); 5324 5325 target_offset += PAGE_SIZE_64; 5326 xfer_size -= PAGE_SIZE; 5327 entry++; 5328 5329 if (dwp->dw_mask) { 5330 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) { 5331 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); 5332 5333 if (dw_count >= dw_limit) { 5334 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 5335 5336 dwp = &dw_array[0]; 5337 dw_count = 0; 5338 } 5339 } else { 5340 if (dwp->dw_mask & DW_clear_busy) 5341 m->busy = FALSE; 5342 5343 if (dwp->dw_mask & DW_PAGE_WAKEUP) 5344 PAGE_WAKEUP(m); 5345 } 5346 } 5347 } 5348 if (dw_count) 5349 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 5350 5351 occupied = 1; 5352 5353 if (upl->flags & UPL_DEVICE_MEMORY) { 5354 occupied = 0; 5355 } else if (upl->flags & UPL_LITE) { 5356 int pg_num; 5357 int i; 5358 5359 pg_num = upl->size/PAGE_SIZE; 5360 pg_num = (pg_num + 31) >> 5; 5361 occupied = 0; 5362 5363 for (i = 0; i < pg_num; i++) { 5364 if (lite_list[i] != 0) { 5365 occupied = 1; 5366 break; 5367 } 5368 } 5369 } else { 5370 if (queue_empty(&upl->map_object->memq)) 5371 occupied = 0; 5372 } 5373 if (occupied == 0) { 5374 /* 5375 * If this UPL element belongs to a Vector UPL and is 5376 * empty, then this is the right function to deallocate 5377 * it. So go ahead set the *empty variable. The flag 5378 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view 5379 * should be considered relevant for the Vector UPL and not 5380 * the internal UPLs. 5381 */ 5382 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL) 5383 *empty = TRUE; 5384 5385 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) { 5386 /* 5387 * this is not a paging object 5388 * so we need to drop the paging reference 5389 * that was taken when we created the UPL 5390 * against this object 5391 */ 5392 vm_object_activity_end(shadow_object); 5393 vm_object_collapse(shadow_object, 0, TRUE); 5394 } else { 5395 /* 5396 * we dontated the paging reference to 5397 * the map object... vm_pageout_object_terminate 5398 * will drop this reference 5399 */ 5400 } 5401 } 5402 vm_object_unlock(shadow_object); 5403 if (object != shadow_object) 5404 vm_object_unlock(object); 5405 5406 if(!isVectorUPL) 5407 upl_unlock(upl); 5408 else { 5409 /* 5410 * If we completed our operations on an UPL that is 5411 * part of a Vectored UPL and if empty is TRUE, then 5412 * we should go ahead and deallocate this UPL element. 5413 * Then we check if this was the last of the UPL elements 5414 * within that Vectored UPL. If so, set empty to TRUE 5415 * so that in ubc_upl_commit_range or ubc_upl_commit, we 5416 * can go ahead and deallocate the Vector UPL too. 5417 */ 5418 if(*empty==TRUE) { 5419 *empty = vector_upl_set_subupl(vector_upl, upl, 0); 5420 upl_deallocate(upl); 5421 } 5422 goto process_upl_to_commit; 5423 } 5424 5425 if (pgpgout_count) { 5426 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL); 5427 } 5428 5429 return KERN_SUCCESS; 5430} 5431 5432kern_return_t 5433upl_abort_range( 5434 upl_t upl, 5435 upl_offset_t offset, 5436 upl_size_t size, 5437 int error, 5438 boolean_t *empty) 5439{ 5440 upl_page_info_t *user_page_list = NULL; 5441 upl_size_t xfer_size, subupl_size = size; 5442 vm_object_t shadow_object; 5443 vm_object_t object; 5444 vm_object_offset_t target_offset; 5445 upl_offset_t subupl_offset = offset; 5446 int entry; 5447 wpl_array_t lite_list; 5448 int occupied; 5449 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 5450 struct vm_page_delayed_work *dwp; 5451 int dw_count; 5452 int dw_limit; 5453 int isVectorUPL = 0; 5454 upl_t vector_upl = NULL; 5455 5456 *empty = FALSE; 5457 5458 if (upl == UPL_NULL) 5459 return KERN_INVALID_ARGUMENT; 5460 5461 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) ) 5462 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty); 5463 5464 if((isVectorUPL = vector_upl_is_valid(upl))) { 5465 vector_upl = upl; 5466 upl_lock(vector_upl); 5467 } 5468 else 5469 upl_lock(upl); 5470 5471process_upl_to_abort: 5472 if(isVectorUPL) { 5473 size = subupl_size; 5474 offset = subupl_offset; 5475 if(size == 0) { 5476 upl_unlock(vector_upl); 5477 return KERN_SUCCESS; 5478 } 5479 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size); 5480 if(upl == NULL) { 5481 upl_unlock(vector_upl); 5482 return KERN_FAILURE; 5483 } 5484 subupl_size -= size; 5485 subupl_offset += size; 5486 } 5487 5488 *empty = FALSE; 5489 5490#if UPL_DEBUG 5491 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) { 5492 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES); 5493 5494 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset; 5495 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size); 5496 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1; 5497 5498 upl->upl_commit_index++; 5499 } 5500#endif 5501 if (upl->flags & UPL_DEVICE_MEMORY) 5502 xfer_size = 0; 5503 else if ((offset + size) <= upl->size) 5504 xfer_size = size; 5505 else { 5506 if(!isVectorUPL) 5507 upl_unlock(upl); 5508 else { 5509 upl_unlock(vector_upl); 5510 } 5511 5512 return KERN_FAILURE; 5513 } 5514 if (upl->flags & UPL_INTERNAL) { 5515 lite_list = (wpl_array_t) 5516 ((((uintptr_t)upl) + sizeof(struct upl)) 5517 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 5518 5519 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 5520 } else { 5521 lite_list = (wpl_array_t) 5522 (((uintptr_t)upl) + sizeof(struct upl)); 5523 } 5524 object = upl->map_object; 5525 5526 if (upl->flags & UPL_SHADOWED) { 5527 vm_object_lock(object); 5528 shadow_object = object->shadow; 5529 } else 5530 shadow_object = object; 5531 5532 entry = offset/PAGE_SIZE; 5533 target_offset = (vm_object_offset_t)offset; 5534 5535 if (upl->flags & UPL_KERNEL_OBJECT) 5536 vm_object_lock_shared(shadow_object); 5537 else 5538 vm_object_lock(shadow_object); 5539 5540 if (upl->flags & UPL_ACCESS_BLOCKED) { 5541 assert(shadow_object->blocked_access); 5542 shadow_object->blocked_access = FALSE; 5543 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED); 5544 } 5545 5546 dwp = &dw_array[0]; 5547 dw_count = 0; 5548 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 5549 5550 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT)) 5551 panic("upl_abort_range: kernel_object being DUMPED"); 5552 5553 while (xfer_size) { 5554 vm_page_t t, m; 5555 unsigned int pg_num; 5556 boolean_t needed; 5557 5558 pg_num = (unsigned int) (target_offset/PAGE_SIZE); 5559 assert(pg_num == target_offset/PAGE_SIZE); 5560 5561 needed = FALSE; 5562 5563 if (user_page_list) 5564 needed = user_page_list[pg_num].needed; 5565 5566 dwp->dw_mask = 0; 5567 m = VM_PAGE_NULL; 5568 5569 if (upl->flags & UPL_LITE) { 5570 5571 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 5572 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); 5573 5574 if ( !(upl->flags & UPL_KERNEL_OBJECT)) 5575 m = vm_page_lookup(shadow_object, target_offset + 5576 (upl->offset - shadow_object->paging_offset)); 5577 } 5578 } 5579 if (upl->flags & UPL_SHADOWED) { 5580 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) { 5581 t->pageout = FALSE; 5582 5583 VM_PAGE_FREE(t); 5584 5585 if (m == VM_PAGE_NULL) 5586 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset); 5587 } 5588 } 5589 if ((upl->flags & UPL_KERNEL_OBJECT)) 5590 goto abort_next_page; 5591 5592 if (m != VM_PAGE_NULL) { 5593 5594 if (m->absent) { 5595 boolean_t must_free = TRUE; 5596 5597 /* 5598 * COPYOUT = FALSE case 5599 * check for error conditions which must 5600 * be passed back to the pages customer 5601 */ 5602 if (error & UPL_ABORT_RESTART) { 5603 m->restart = TRUE; 5604 m->absent = FALSE; 5605 m->unusual = TRUE; 5606 must_free = FALSE; 5607 } else if (error & UPL_ABORT_UNAVAILABLE) { 5608 m->restart = FALSE; 5609 m->unusual = TRUE; 5610 must_free = FALSE; 5611 } else if (error & UPL_ABORT_ERROR) { 5612 m->restart = FALSE; 5613 m->absent = FALSE; 5614 m->error = TRUE; 5615 m->unusual = TRUE; 5616 must_free = FALSE; 5617 } 5618 if (m->clustered && needed == FALSE) { 5619 /* 5620 * This page was a part of a speculative 5621 * read-ahead initiated by the kernel 5622 * itself. No one is expecting this 5623 * page and no one will clean up its 5624 * error state if it ever becomes valid 5625 * in the future. 5626 * We have to free it here. 5627 */ 5628 must_free = TRUE; 5629 } 5630 5631 /* 5632 * ENCRYPTED SWAP: 5633 * If the page was already encrypted, 5634 * we don't really need to decrypt it 5635 * now. It will get decrypted later, 5636 * on demand, as soon as someone needs 5637 * to access its contents. 5638 */ 5639 5640 m->cleaning = FALSE; 5641 m->encrypted_cleaning = FALSE; 5642 5643 if (m->overwriting && !m->busy) { 5644 /* 5645 * this shouldn't happen since 5646 * this is an 'absent' page, but 5647 * it doesn't hurt to check for 5648 * the 'alternate' method of 5649 * stabilizing the page... 5650 * we will mark 'busy' to be cleared 5651 * in the following code which will 5652 * take care of the primary stabilzation 5653 * method (i.e. setting 'busy' to TRUE) 5654 */ 5655 dwp->dw_mask |= DW_vm_page_unwire; 5656 } 5657 m->overwriting = FALSE; 5658 5659 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 5660 5661 if (must_free == TRUE) 5662 dwp->dw_mask |= DW_vm_page_free; 5663 else 5664 dwp->dw_mask |= DW_vm_page_activate; 5665 } else { 5666 /* 5667 * Handle the trusted pager throttle. 5668 */ 5669 if (m->laundry) 5670 dwp->dw_mask |= DW_vm_pageout_throttle_up; 5671 5672 if (upl->flags & UPL_ACCESS_BLOCKED) { 5673 /* 5674 * We blocked access to the pages in this UPL. 5675 * Clear the "busy" bit and wake up any waiter 5676 * for this page. 5677 */ 5678 dwp->dw_mask |= DW_clear_busy; 5679 } 5680 if (m->overwriting) { 5681 if (m->busy) 5682 dwp->dw_mask |= DW_clear_busy; 5683 else { 5684 /* 5685 * deal with the 'alternate' method 5686 * of stabilizing the page... 5687 * we will either free the page 5688 * or mark 'busy' to be cleared 5689 * in the following code which will 5690 * take care of the primary stabilzation 5691 * method (i.e. setting 'busy' to TRUE) 5692 */ 5693 dwp->dw_mask |= DW_vm_page_unwire; 5694 } 5695 m->overwriting = FALSE; 5696 } 5697 if (m->encrypted_cleaning == TRUE) { 5698 m->encrypted_cleaning = FALSE; 5699 5700 dwp->dw_mask |= DW_clear_busy; 5701 } 5702 m->pageout = FALSE; 5703 m->cleaning = FALSE; 5704#if MACH_PAGEMAP 5705 vm_external_state_clr(m->object->existence_map, m->offset); 5706#endif /* MACH_PAGEMAP */ 5707 if (error & UPL_ABORT_DUMP_PAGES) { 5708 pmap_disconnect(m->phys_page); 5709 5710 dwp->dw_mask |= DW_vm_page_free; 5711 } else { 5712 if (!(dwp->dw_mask & DW_vm_page_unwire)) { 5713 if (error & UPL_ABORT_REFERENCE) { 5714 /* 5715 * we've been told to explictly 5716 * reference this page... for 5717 * file I/O, this is done by 5718 * implementing an LRU on the inactive q 5719 */ 5720 dwp->dw_mask |= DW_vm_page_lru; 5721 5722 } else if (!m->active && !m->inactive && !m->speculative) 5723 dwp->dw_mask |= DW_vm_page_deactivate_internal; 5724 } 5725 dwp->dw_mask |= DW_PAGE_WAKEUP; 5726 } 5727 } 5728 } 5729abort_next_page: 5730 target_offset += PAGE_SIZE_64; 5731 xfer_size -= PAGE_SIZE; 5732 entry++; 5733 5734 if (dwp->dw_mask) { 5735 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) { 5736 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); 5737 5738 if (dw_count >= dw_limit) { 5739 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 5740 5741 dwp = &dw_array[0]; 5742 dw_count = 0; 5743 } 5744 } else { 5745 if (dwp->dw_mask & DW_clear_busy) 5746 m->busy = FALSE; 5747 5748 if (dwp->dw_mask & DW_PAGE_WAKEUP) 5749 PAGE_WAKEUP(m); 5750 } 5751 } 5752 } 5753 if (dw_count) 5754 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 5755 5756 occupied = 1; 5757 5758 if (upl->flags & UPL_DEVICE_MEMORY) { 5759 occupied = 0; 5760 } else if (upl->flags & UPL_LITE) { 5761 int pg_num; 5762 int i; 5763 5764 pg_num = upl->size/PAGE_SIZE; 5765 pg_num = (pg_num + 31) >> 5; 5766 occupied = 0; 5767 5768 for (i = 0; i < pg_num; i++) { 5769 if (lite_list[i] != 0) { 5770 occupied = 1; 5771 break; 5772 } 5773 } 5774 } else { 5775 if (queue_empty(&upl->map_object->memq)) 5776 occupied = 0; 5777 } 5778 if (occupied == 0) { 5779 /* 5780 * If this UPL element belongs to a Vector UPL and is 5781 * empty, then this is the right function to deallocate 5782 * it. So go ahead set the *empty variable. The flag 5783 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view 5784 * should be considered relevant for the Vector UPL and 5785 * not the internal UPLs. 5786 */ 5787 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL) 5788 *empty = TRUE; 5789 5790 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) { 5791 /* 5792 * this is not a paging object 5793 * so we need to drop the paging reference 5794 * that was taken when we created the UPL 5795 * against this object 5796 */ 5797 vm_object_activity_end(shadow_object); 5798 vm_object_collapse(shadow_object, 0, TRUE); 5799 } else { 5800 /* 5801 * we dontated the paging reference to 5802 * the map object... vm_pageout_object_terminate 5803 * will drop this reference 5804 */ 5805 } 5806 } 5807 vm_object_unlock(shadow_object); 5808 if (object != shadow_object) 5809 vm_object_unlock(object); 5810 5811 if(!isVectorUPL) 5812 upl_unlock(upl); 5813 else { 5814 /* 5815 * If we completed our operations on an UPL that is 5816 * part of a Vectored UPL and if empty is TRUE, then 5817 * we should go ahead and deallocate this UPL element. 5818 * Then we check if this was the last of the UPL elements 5819 * within that Vectored UPL. If so, set empty to TRUE 5820 * so that in ubc_upl_abort_range or ubc_upl_abort, we 5821 * can go ahead and deallocate the Vector UPL too. 5822 */ 5823 if(*empty == TRUE) { 5824 *empty = vector_upl_set_subupl(vector_upl, upl,0); 5825 upl_deallocate(upl); 5826 } 5827 goto process_upl_to_abort; 5828 } 5829 5830 return KERN_SUCCESS; 5831} 5832 5833 5834kern_return_t 5835upl_abort( 5836 upl_t upl, 5837 int error) 5838{ 5839 boolean_t empty; 5840 5841 return upl_abort_range(upl, 0, upl->size, error, &empty); 5842} 5843 5844 5845/* an option on commit should be wire */ 5846kern_return_t 5847upl_commit( 5848 upl_t upl, 5849 upl_page_info_t *page_list, 5850 mach_msg_type_number_t count) 5851{ 5852 boolean_t empty; 5853 5854 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty); 5855} 5856 5857void 5858vm_object_set_pmap_cache_attr( 5859 vm_object_t object, 5860 upl_page_info_array_t user_page_list, 5861 unsigned int num_pages, 5862 boolean_t batch_pmap_op) 5863{ 5864 unsigned int cache_attr = 0; 5865 5866 cache_attr = object->wimg_bits & VM_WIMG_MASK; 5867 assert(user_page_list); 5868 if (cache_attr != VM_WIMG_USE_DEFAULT) { 5869 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op); 5870 } 5871} 5872 5873unsigned int vm_object_iopl_request_sleep_for_cleaning = 0; 5874 5875kern_return_t 5876vm_object_iopl_request( 5877 vm_object_t object, 5878 vm_object_offset_t offset, 5879 upl_size_t size, 5880 upl_t *upl_ptr, 5881 upl_page_info_array_t user_page_list, 5882 unsigned int *page_list_count, 5883 int cntrl_flags) 5884{ 5885 vm_page_t dst_page; 5886 vm_object_offset_t dst_offset; 5887 upl_size_t xfer_size; 5888 upl_t upl = NULL; 5889 unsigned int entry; 5890 wpl_array_t lite_list = NULL; 5891 int no_zero_fill = FALSE; 5892 unsigned int size_in_pages; 5893 u_int32_t psize; 5894 kern_return_t ret; 5895 vm_prot_t prot; 5896 struct vm_object_fault_info fault_info; 5897 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 5898 struct vm_page_delayed_work *dwp; 5899 int dw_count; 5900 int dw_limit; 5901 int dw_index; 5902 5903 if (cntrl_flags & ~UPL_VALID_FLAGS) { 5904 /* 5905 * For forward compatibility's sake, 5906 * reject any unknown flag. 5907 */ 5908 return KERN_INVALID_VALUE; 5909 } 5910 if (vm_lopage_needed == FALSE) 5911 cntrl_flags &= ~UPL_NEED_32BIT_ADDR; 5912 5913 if (cntrl_flags & UPL_NEED_32BIT_ADDR) { 5914 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE)) 5915 return KERN_INVALID_VALUE; 5916 5917 if (object->phys_contiguous) { 5918 if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address) 5919 return KERN_INVALID_ADDRESS; 5920 5921 if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address) 5922 return KERN_INVALID_ADDRESS; 5923 } 5924 } 5925 5926 if (cntrl_flags & UPL_ENCRYPT) { 5927 /* 5928 * ENCRYPTED SWAP: 5929 * The paging path doesn't use this interface, 5930 * so we don't support the UPL_ENCRYPT flag 5931 * here. We won't encrypt the pages. 5932 */ 5933 assert(! (cntrl_flags & UPL_ENCRYPT)); 5934 } 5935 if (cntrl_flags & UPL_NOZEROFILL) 5936 no_zero_fill = TRUE; 5937 5938 if (cntrl_flags & UPL_COPYOUT_FROM) 5939 prot = VM_PROT_READ; 5940 else 5941 prot = VM_PROT_READ | VM_PROT_WRITE; 5942 5943 if (((size/PAGE_SIZE) > MAX_UPL_SIZE) && !object->phys_contiguous) 5944 size = MAX_UPL_SIZE * PAGE_SIZE; 5945 5946 if (cntrl_flags & UPL_SET_INTERNAL) { 5947 if (page_list_count != NULL) 5948 *page_list_count = MAX_UPL_SIZE; 5949 } 5950 if (((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) && 5951 ((page_list_count != NULL) && (*page_list_count != 0) && *page_list_count < (size/page_size))) 5952 return KERN_INVALID_ARGUMENT; 5953 5954 if ((!object->internal) && (object->paging_offset != 0)) 5955 panic("vm_object_iopl_request: external object with non-zero paging offset\n"); 5956 5957 5958 if (object->phys_contiguous) 5959 psize = PAGE_SIZE; 5960 else 5961 psize = size; 5962 5963 if (cntrl_flags & UPL_SET_INTERNAL) { 5964 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, UPL_IO_WIRE, psize); 5965 5966 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 5967 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) + 5968 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t))); 5969 if (size == 0) { 5970 user_page_list = NULL; 5971 lite_list = NULL; 5972 } 5973 } else { 5974 upl = upl_create(UPL_CREATE_LITE, UPL_IO_WIRE, psize); 5975 5976 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 5977 if (size == 0) { 5978 lite_list = NULL; 5979 } 5980 } 5981 if (user_page_list) 5982 user_page_list[0].device = FALSE; 5983 *upl_ptr = upl; 5984 5985 upl->map_object = object; 5986 upl->size = size; 5987 5988 size_in_pages = size / PAGE_SIZE; 5989 5990 if (object == kernel_object && 5991 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) { 5992 upl->flags |= UPL_KERNEL_OBJECT; 5993#if UPL_DEBUG 5994 vm_object_lock(object); 5995#else 5996 vm_object_lock_shared(object); 5997#endif 5998 } else { 5999 vm_object_lock(object); 6000 vm_object_activity_begin(object); 6001 } 6002 /* 6003 * paging in progress also protects the paging_offset 6004 */ 6005 upl->offset = offset + object->paging_offset; 6006 6007 if (cntrl_flags & UPL_BLOCK_ACCESS) { 6008 /* 6009 * The user requested that access to the pages in this UPL 6010 * be blocked until the UPL is commited or aborted. 6011 */ 6012 upl->flags |= UPL_ACCESS_BLOCKED; 6013 } 6014 6015 if (object->phys_contiguous) { 6016#if UPL_DEBUG 6017 vm_object_activity_begin(object); 6018 queue_enter(&object->uplq, upl, upl_t, uplq); 6019#endif /* UPL_DEBUG */ 6020 6021 if (upl->flags & UPL_ACCESS_BLOCKED) { 6022 assert(!object->blocked_access); 6023 object->blocked_access = TRUE; 6024 } 6025 6026 vm_object_unlock(object); 6027 6028 /* 6029 * don't need any shadow mappings for this one 6030 * since it is already I/O memory 6031 */ 6032 upl->flags |= UPL_DEVICE_MEMORY; 6033 6034 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT); 6035 6036 if (user_page_list) { 6037 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT); 6038 user_page_list[0].device = TRUE; 6039 } 6040 if (page_list_count != NULL) { 6041 if (upl->flags & UPL_INTERNAL) 6042 *page_list_count = 0; 6043 else 6044 *page_list_count = 1; 6045 } 6046 return KERN_SUCCESS; 6047 } 6048 if (object != kernel_object) { 6049 /* 6050 * Protect user space from future COW operations 6051 */ 6052 object->true_share = TRUE; 6053 6054 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) 6055 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; 6056 } 6057 6058#if UPL_DEBUG 6059 vm_object_activity_begin(object); 6060 queue_enter(&object->uplq, upl, upl_t, uplq); 6061#endif /* UPL_DEBUG */ 6062 6063 if (!(cntrl_flags & UPL_COPYOUT_FROM) && 6064 object->copy != VM_OBJECT_NULL) { 6065 /* 6066 * Honor copy-on-write obligations 6067 * 6068 * The caller is gathering these pages and 6069 * might modify their contents. We need to 6070 * make sure that the copy object has its own 6071 * private copies of these pages before we let 6072 * the caller modify them. 6073 * 6074 * NOTE: someone else could map the original object 6075 * after we've done this copy-on-write here, and they 6076 * could then see an inconsistent picture of the memory 6077 * while it's being modified via the UPL. To prevent this, 6078 * we would have to block access to these pages until the 6079 * UPL is released. We could use the UPL_BLOCK_ACCESS 6080 * code path for that... 6081 */ 6082 vm_object_update(object, 6083 offset, 6084 size, 6085 NULL, 6086 NULL, 6087 FALSE, /* should_return */ 6088 MEMORY_OBJECT_COPY_SYNC, 6089 VM_PROT_NO_CHANGE); 6090#if DEVELOPMENT || DEBUG 6091 iopl_cow++; 6092 iopl_cow_pages += size >> PAGE_SHIFT; 6093#endif 6094 } 6095 6096 6097 entry = 0; 6098 6099 xfer_size = size; 6100 dst_offset = offset; 6101 6102 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; 6103 fault_info.user_tag = 0; 6104 fault_info.lo_offset = offset; 6105 fault_info.hi_offset = offset + xfer_size; 6106 fault_info.no_cache = FALSE; 6107 fault_info.stealth = FALSE; 6108 fault_info.io_sync = FALSE; 6109 fault_info.cs_bypass = FALSE; 6110 fault_info.mark_zf_absent = TRUE; 6111 6112 dwp = &dw_array[0]; 6113 dw_count = 0; 6114 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 6115 6116 while (xfer_size) { 6117 vm_fault_return_t result; 6118 unsigned int pg_num; 6119 6120 dwp->dw_mask = 0; 6121 6122 dst_page = vm_page_lookup(object, dst_offset); 6123 6124 /* 6125 * ENCRYPTED SWAP: 6126 * If the page is encrypted, we need to decrypt it, 6127 * so force a soft page fault. 6128 */ 6129 if (dst_page == VM_PAGE_NULL || 6130 dst_page->busy || 6131 dst_page->encrypted || 6132 dst_page->error || 6133 dst_page->restart || 6134 dst_page->absent || 6135 dst_page->fictitious) { 6136 6137 if (object == kernel_object) 6138 panic("vm_object_iopl_request: missing/bad page in kernel object\n"); 6139 6140 do { 6141 vm_page_t top_page; 6142 kern_return_t error_code; 6143 int interruptible; 6144 6145 if (cntrl_flags & UPL_SET_INTERRUPTIBLE) 6146 interruptible = THREAD_ABORTSAFE; 6147 else 6148 interruptible = THREAD_UNINT; 6149 6150 fault_info.interruptible = interruptible; 6151 fault_info.cluster_size = xfer_size; 6152 fault_info.batch_pmap_op = TRUE; 6153 6154 vm_object_paging_begin(object); 6155 6156 result = vm_fault_page(object, dst_offset, 6157 prot | VM_PROT_WRITE, FALSE, 6158 &prot, &dst_page, &top_page, 6159 (int *)0, 6160 &error_code, no_zero_fill, 6161 FALSE, &fault_info); 6162 6163 switch (result) { 6164 6165 case VM_FAULT_SUCCESS: 6166 6167 if ( !dst_page->absent) { 6168 PAGE_WAKEUP_DONE(dst_page); 6169 } else { 6170 /* 6171 * we only get back an absent page if we 6172 * requested that it not be zero-filled 6173 * because we are about to fill it via I/O 6174 * 6175 * absent pages should be left BUSY 6176 * to prevent them from being faulted 6177 * into an address space before we've 6178 * had a chance to complete the I/O on 6179 * them since they may contain info that 6180 * shouldn't be seen by the faulting task 6181 */ 6182 } 6183 /* 6184 * Release paging references and 6185 * top-level placeholder page, if any. 6186 */ 6187 if (top_page != VM_PAGE_NULL) { 6188 vm_object_t local_object; 6189 6190 local_object = top_page->object; 6191 6192 if (top_page->object != dst_page->object) { 6193 vm_object_lock(local_object); 6194 VM_PAGE_FREE(top_page); 6195 vm_object_paging_end(local_object); 6196 vm_object_unlock(local_object); 6197 } else { 6198 VM_PAGE_FREE(top_page); 6199 vm_object_paging_end(local_object); 6200 } 6201 } 6202 vm_object_paging_end(object); 6203 break; 6204 6205 case VM_FAULT_RETRY: 6206 vm_object_lock(object); 6207 break; 6208 6209 case VM_FAULT_MEMORY_SHORTAGE: 6210 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); 6211 6212 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); 6213 6214 if (vm_page_wait(interruptible)) { 6215 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 6216 6217 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); 6218 vm_object_lock(object); 6219 6220 break; 6221 } 6222 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 6223 6224 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1); 6225 6226 /* fall thru */ 6227 6228 case VM_FAULT_INTERRUPTED: 6229 error_code = MACH_SEND_INTERRUPTED; 6230 case VM_FAULT_MEMORY_ERROR: 6231 memory_error: 6232 ret = (error_code ? error_code: KERN_MEMORY_ERROR); 6233 6234 vm_object_lock(object); 6235 goto return_err; 6236 6237 case VM_FAULT_SUCCESS_NO_VM_PAGE: 6238 /* success but no page: fail */ 6239 vm_object_paging_end(object); 6240 vm_object_unlock(object); 6241 goto memory_error; 6242 6243 default: 6244 panic("vm_object_iopl_request: unexpected error" 6245 " 0x%x from vm_fault_page()\n", result); 6246 } 6247 } while (result != VM_FAULT_SUCCESS); 6248 6249 } 6250 if (upl->flags & UPL_KERNEL_OBJECT) 6251 goto record_phys_addr; 6252 6253 if (dst_page->cleaning) { 6254 /* 6255 * Someone else is cleaning this page in place. 6256 * In theory, we should be able to proceed and use this 6257 * page but they'll probably end up clearing the "busy" 6258 * bit on it in upl_commit_range() but they didn't set 6259 * it, so they would clear our "busy" bit and open 6260 * us to race conditions. 6261 * We'd better wait for the cleaning to complete and 6262 * then try again. 6263 */ 6264 vm_object_iopl_request_sleep_for_cleaning++; 6265 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 6266 continue; 6267 } 6268 if (dst_page->laundry) { 6269 dst_page->pageout = FALSE; 6270 6271 vm_pageout_steal_laundry(dst_page, FALSE); 6272 } 6273 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) && 6274 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) { 6275 vm_page_t low_page; 6276 int refmod; 6277 6278 /* 6279 * support devices that can't DMA above 32 bits 6280 * by substituting pages from a pool of low address 6281 * memory for any pages we find above the 4G mark 6282 * can't substitute if the page is already wired because 6283 * we don't know whether that physical address has been 6284 * handed out to some other 64 bit capable DMA device to use 6285 */ 6286 if (VM_PAGE_WIRED(dst_page)) { 6287 ret = KERN_PROTECTION_FAILURE; 6288 goto return_err; 6289 } 6290 low_page = vm_page_grablo(); 6291 6292 if (low_page == VM_PAGE_NULL) { 6293 ret = KERN_RESOURCE_SHORTAGE; 6294 goto return_err; 6295 } 6296 /* 6297 * from here until the vm_page_replace completes 6298 * we musn't drop the object lock... we don't 6299 * want anyone refaulting this page in and using 6300 * it after we disconnect it... we want the fault 6301 * to find the new page being substituted. 6302 */ 6303 if (dst_page->pmapped) 6304 refmod = pmap_disconnect(dst_page->phys_page); 6305 else 6306 refmod = 0; 6307 6308 if (!dst_page->absent) 6309 vm_page_copy(dst_page, low_page); 6310 6311 low_page->reference = dst_page->reference; 6312 low_page->dirty = dst_page->dirty; 6313 low_page->absent = dst_page->absent; 6314 6315 if (refmod & VM_MEM_REFERENCED) 6316 low_page->reference = TRUE; 6317 if (refmod & VM_MEM_MODIFIED) { 6318 SET_PAGE_DIRTY(low_page, FALSE); 6319 } 6320 6321 vm_page_replace(low_page, object, dst_offset); 6322 6323 dst_page = low_page; 6324 /* 6325 * vm_page_grablo returned the page marked 6326 * BUSY... we don't need a PAGE_WAKEUP_DONE 6327 * here, because we've never dropped the object lock 6328 */ 6329 if ( !dst_page->absent) 6330 dst_page->busy = FALSE; 6331 } 6332 if ( !dst_page->busy) 6333 dwp->dw_mask |= DW_vm_page_wire; 6334 6335 if (cntrl_flags & UPL_BLOCK_ACCESS) { 6336 /* 6337 * Mark the page "busy" to block any future page fault 6338 * on this page in addition to wiring it. 6339 * We'll also remove the mapping 6340 * of all these pages before leaving this routine. 6341 */ 6342 assert(!dst_page->fictitious); 6343 dst_page->busy = TRUE; 6344 } 6345 /* 6346 * expect the page to be used 6347 * page queues lock must be held to set 'reference' 6348 */ 6349 dwp->dw_mask |= DW_set_reference; 6350 6351 if (!(cntrl_flags & UPL_COPYOUT_FROM)) { 6352 SET_PAGE_DIRTY(dst_page, TRUE); 6353 } 6354record_phys_addr: 6355 if (dst_page->busy) 6356 upl->flags |= UPL_HAS_BUSY; 6357 6358 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); 6359 assert(pg_num == (dst_offset-offset)/PAGE_SIZE); 6360 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 6361 6362 if (dst_page->phys_page > upl->highest_page) 6363 upl->highest_page = dst_page->phys_page; 6364 6365 if (user_page_list) { 6366 user_page_list[entry].phys_addr = dst_page->phys_page; 6367 user_page_list[entry].pageout = dst_page->pageout; 6368 user_page_list[entry].absent = dst_page->absent; 6369 user_page_list[entry].dirty = dst_page->dirty; 6370 user_page_list[entry].precious = dst_page->precious; 6371 user_page_list[entry].device = FALSE; 6372 user_page_list[entry].needed = FALSE; 6373 if (dst_page->clustered == TRUE) 6374 user_page_list[entry].speculative = dst_page->speculative; 6375 else 6376 user_page_list[entry].speculative = FALSE; 6377 user_page_list[entry].cs_validated = dst_page->cs_validated; 6378 user_page_list[entry].cs_tainted = dst_page->cs_tainted; 6379 } 6380 if (object != kernel_object) { 6381 /* 6382 * someone is explicitly grabbing this page... 6383 * update clustered and speculative state 6384 * 6385 */ 6386 VM_PAGE_CONSUME_CLUSTERED(dst_page); 6387 } 6388 entry++; 6389 dst_offset += PAGE_SIZE_64; 6390 xfer_size -= PAGE_SIZE; 6391 6392 if (dwp->dw_mask) { 6393 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count); 6394 6395 if (dw_count >= dw_limit) { 6396 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 6397 6398 dwp = &dw_array[0]; 6399 dw_count = 0; 6400 } 6401 } 6402 } 6403 if (dw_count) 6404 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 6405 6406 vm_object_set_pmap_cache_attr(object, user_page_list, entry, TRUE); 6407 6408 if (page_list_count != NULL) { 6409 if (upl->flags & UPL_INTERNAL) 6410 *page_list_count = 0; 6411 else if (*page_list_count > entry) 6412 *page_list_count = entry; 6413 } 6414 vm_object_unlock(object); 6415 6416 if (cntrl_flags & UPL_BLOCK_ACCESS) { 6417 /* 6418 * We've marked all the pages "busy" so that future 6419 * page faults will block. 6420 * Now remove the mapping for these pages, so that they 6421 * can't be accessed without causing a page fault. 6422 */ 6423 vm_object_pmap_protect(object, offset, (vm_object_size_t)size, 6424 PMAP_NULL, 0, VM_PROT_NONE); 6425 assert(!object->blocked_access); 6426 object->blocked_access = TRUE; 6427 } 6428 return KERN_SUCCESS; 6429 6430return_err: 6431 dw_index = 0; 6432 6433 for (; offset < dst_offset; offset += PAGE_SIZE) { 6434 boolean_t need_unwire; 6435 6436 dst_page = vm_page_lookup(object, offset); 6437 6438 if (dst_page == VM_PAGE_NULL) 6439 panic("vm_object_iopl_request: Wired page missing. \n"); 6440 6441 /* 6442 * if we've already processed this page in an earlier 6443 * dw_do_work, we need to undo the wiring... we will 6444 * leave the dirty and reference bits on if they 6445 * were set, since we don't have a good way of knowing 6446 * what the previous state was and we won't get here 6447 * under any normal circumstances... we will always 6448 * clear BUSY and wakeup any waiters via vm_page_free 6449 * or PAGE_WAKEUP_DONE 6450 */ 6451 need_unwire = TRUE; 6452 6453 if (dw_count) { 6454 if (dw_array[dw_index].dw_m == dst_page) { 6455 /* 6456 * still in the deferred work list 6457 * which means we haven't yet called 6458 * vm_page_wire on this page 6459 */ 6460 need_unwire = FALSE; 6461 6462 dw_index++; 6463 dw_count--; 6464 } 6465 } 6466 vm_page_lock_queues(); 6467 6468 if (dst_page->absent) { 6469 vm_page_free(dst_page); 6470 6471 need_unwire = FALSE; 6472 } else { 6473 if (need_unwire == TRUE) 6474 vm_page_unwire(dst_page, TRUE); 6475 6476 PAGE_WAKEUP_DONE(dst_page); 6477 } 6478 vm_page_unlock_queues(); 6479 6480 if (need_unwire == TRUE) 6481 VM_STAT_INCR(reactivations); 6482 } 6483#if UPL_DEBUG 6484 upl->upl_state = 2; 6485#endif 6486 if (! (upl->flags & UPL_KERNEL_OBJECT)) { 6487 vm_object_activity_end(object); 6488 vm_object_collapse(object, 0, TRUE); 6489 } 6490 vm_object_unlock(object); 6491 upl_destroy(upl); 6492 6493 return ret; 6494} 6495 6496kern_return_t 6497upl_transpose( 6498 upl_t upl1, 6499 upl_t upl2) 6500{ 6501 kern_return_t retval; 6502 boolean_t upls_locked; 6503 vm_object_t object1, object2; 6504 6505 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) { 6506 return KERN_INVALID_ARGUMENT; 6507 } 6508 6509 upls_locked = FALSE; 6510 6511 /* 6512 * Since we need to lock both UPLs at the same time, 6513 * avoid deadlocks by always taking locks in the same order. 6514 */ 6515 if (upl1 < upl2) { 6516 upl_lock(upl1); 6517 upl_lock(upl2); 6518 } else { 6519 upl_lock(upl2); 6520 upl_lock(upl1); 6521 } 6522 upls_locked = TRUE; /* the UPLs will need to be unlocked */ 6523 6524 object1 = upl1->map_object; 6525 object2 = upl2->map_object; 6526 6527 if (upl1->offset != 0 || upl2->offset != 0 || 6528 upl1->size != upl2->size) { 6529 /* 6530 * We deal only with full objects, not subsets. 6531 * That's because we exchange the entire backing store info 6532 * for the objects: pager, resident pages, etc... We can't do 6533 * only part of it. 6534 */ 6535 retval = KERN_INVALID_VALUE; 6536 goto done; 6537 } 6538 6539 /* 6540 * Tranpose the VM objects' backing store. 6541 */ 6542 retval = vm_object_transpose(object1, object2, 6543 (vm_object_size_t) upl1->size); 6544 6545 if (retval == KERN_SUCCESS) { 6546 /* 6547 * Make each UPL point to the correct VM object, i.e. the 6548 * object holding the pages that the UPL refers to... 6549 */ 6550#if UPL_DEBUG 6551 queue_remove(&object1->uplq, upl1, upl_t, uplq); 6552 queue_remove(&object2->uplq, upl2, upl_t, uplq); 6553#endif 6554 upl1->map_object = object2; 6555 upl2->map_object = object1; 6556#if UPL_DEBUG 6557 queue_enter(&object1->uplq, upl2, upl_t, uplq); 6558 queue_enter(&object2->uplq, upl1, upl_t, uplq); 6559#endif 6560 } 6561 6562done: 6563 /* 6564 * Cleanup. 6565 */ 6566 if (upls_locked) { 6567 upl_unlock(upl1); 6568 upl_unlock(upl2); 6569 upls_locked = FALSE; 6570 } 6571 6572 return retval; 6573} 6574 6575void 6576upl_range_needed( 6577 upl_t upl, 6578 int index, 6579 int count) 6580{ 6581 upl_page_info_t *user_page_list; 6582 int size_in_pages; 6583 6584 if ( !(upl->flags & UPL_INTERNAL) || count <= 0) 6585 return; 6586 6587 size_in_pages = upl->size / PAGE_SIZE; 6588 6589 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 6590 6591 while (count-- && index < size_in_pages) 6592 user_page_list[index++].needed = TRUE; 6593} 6594 6595 6596/* 6597 * ENCRYPTED SWAP: 6598 * 6599 * Rationale: the user might have some encrypted data on disk (via 6600 * FileVault or any other mechanism). That data is then decrypted in 6601 * memory, which is safe as long as the machine is secure. But that 6602 * decrypted data in memory could be paged out to disk by the default 6603 * pager. The data would then be stored on disk in clear (not encrypted) 6604 * and it could be accessed by anyone who gets physical access to the 6605 * disk (if the laptop or the disk gets stolen for example). This weakens 6606 * the security offered by FileVault. 6607 * 6608 * Solution: the default pager will optionally request that all the 6609 * pages it gathers for pageout be encrypted, via the UPL interfaces, 6610 * before it sends this UPL to disk via the vnode_pageout() path. 6611 * 6612 * Notes: 6613 * 6614 * To avoid disrupting the VM LRU algorithms, we want to keep the 6615 * clean-in-place mechanisms, which allow us to send some extra pages to 6616 * swap (clustering) without actually removing them from the user's 6617 * address space. We don't want the user to unknowingly access encrypted 6618 * data, so we have to actually remove the encrypted pages from the page 6619 * table. When the user accesses the data, the hardware will fail to 6620 * locate the virtual page in its page table and will trigger a page 6621 * fault. We can then decrypt the page and enter it in the page table 6622 * again. Whenever we allow the user to access the contents of a page, 6623 * we have to make sure it's not encrypted. 6624 * 6625 * 6626 */ 6627/* 6628 * ENCRYPTED SWAP: 6629 * Reserve of virtual addresses in the kernel address space. 6630 * We need to map the physical pages in the kernel, so that we 6631 * can call the encryption/decryption routines with a kernel 6632 * virtual address. We keep this pool of pre-allocated kernel 6633 * virtual addresses so that we don't have to scan the kernel's 6634 * virtaul address space each time we need to encrypt or decrypt 6635 * a physical page. 6636 * It would be nice to be able to encrypt and decrypt in physical 6637 * mode but that might not always be more efficient... 6638 */ 6639decl_simple_lock_data(,vm_paging_lock) 6640#define VM_PAGING_NUM_PAGES 64 6641vm_map_offset_t vm_paging_base_address = 0; 6642boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, }; 6643int vm_paging_max_index = 0; 6644int vm_paging_page_waiter = 0; 6645int vm_paging_page_waiter_total = 0; 6646unsigned long vm_paging_no_kernel_page = 0; 6647unsigned long vm_paging_objects_mapped = 0; 6648unsigned long vm_paging_pages_mapped = 0; 6649unsigned long vm_paging_objects_mapped_slow = 0; 6650unsigned long vm_paging_pages_mapped_slow = 0; 6651 6652void 6653vm_paging_map_init(void) 6654{ 6655 kern_return_t kr; 6656 vm_map_offset_t page_map_offset; 6657 vm_map_entry_t map_entry; 6658 6659 assert(vm_paging_base_address == 0); 6660 6661 /* 6662 * Initialize our pool of pre-allocated kernel 6663 * virtual addresses. 6664 */ 6665 page_map_offset = 0; 6666 kr = vm_map_find_space(kernel_map, 6667 &page_map_offset, 6668 VM_PAGING_NUM_PAGES * PAGE_SIZE, 6669 0, 6670 0, 6671 &map_entry); 6672 if (kr != KERN_SUCCESS) { 6673 panic("vm_paging_map_init: kernel_map full\n"); 6674 } 6675 map_entry->object.vm_object = kernel_object; 6676 map_entry->offset = page_map_offset; 6677 map_entry->protection = VM_PROT_NONE; 6678 map_entry->max_protection = VM_PROT_NONE; 6679 map_entry->permanent = TRUE; 6680 vm_object_reference(kernel_object); 6681 vm_map_unlock(kernel_map); 6682 6683 assert(vm_paging_base_address == 0); 6684 vm_paging_base_address = page_map_offset; 6685} 6686 6687/* 6688 * ENCRYPTED SWAP: 6689 * vm_paging_map_object: 6690 * Maps part of a VM object's pages in the kernel 6691 * virtual address space, using the pre-allocated 6692 * kernel virtual addresses, if possible. 6693 * Context: 6694 * The VM object is locked. This lock will get 6695 * dropped and re-acquired though, so the caller 6696 * must make sure the VM object is kept alive 6697 * (by holding a VM map that has a reference 6698 * on it, for example, or taking an extra reference). 6699 * The page should also be kept busy to prevent 6700 * it from being reclaimed. 6701 */ 6702kern_return_t 6703vm_paging_map_object( 6704 vm_map_offset_t *address, 6705 vm_page_t page, 6706 vm_object_t object, 6707 vm_object_offset_t offset, 6708 vm_map_size_t *size, 6709 vm_prot_t protection, 6710 boolean_t can_unlock_object) 6711{ 6712 kern_return_t kr; 6713 vm_map_offset_t page_map_offset; 6714 vm_map_size_t map_size; 6715 vm_object_offset_t object_offset; 6716 int i; 6717 6718 6719 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) { 6720 assert(page->busy); 6721 /* 6722 * Use one of the pre-allocated kernel virtual addresses 6723 * and just enter the VM page in the kernel address space 6724 * at that virtual address. 6725 */ 6726 simple_lock(&vm_paging_lock); 6727 6728 /* 6729 * Try and find an available kernel virtual address 6730 * from our pre-allocated pool. 6731 */ 6732 page_map_offset = 0; 6733 for (;;) { 6734 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) { 6735 if (vm_paging_page_inuse[i] == FALSE) { 6736 page_map_offset = 6737 vm_paging_base_address + 6738 (i * PAGE_SIZE); 6739 break; 6740 } 6741 } 6742 if (page_map_offset != 0) { 6743 /* found a space to map our page ! */ 6744 break; 6745 } 6746 6747 if (can_unlock_object) { 6748 /* 6749 * If we can afford to unlock the VM object, 6750 * let's take the slow path now... 6751 */ 6752 break; 6753 } 6754 /* 6755 * We can't afford to unlock the VM object, so 6756 * let's wait for a space to become available... 6757 */ 6758 vm_paging_page_waiter_total++; 6759 vm_paging_page_waiter++; 6760 thread_sleep_fast_usimple_lock(&vm_paging_page_waiter, 6761 &vm_paging_lock, 6762 THREAD_UNINT); 6763 vm_paging_page_waiter--; 6764 /* ... and try again */ 6765 } 6766 6767 if (page_map_offset != 0) { 6768 /* 6769 * We found a kernel virtual address; 6770 * map the physical page to that virtual address. 6771 */ 6772 if (i > vm_paging_max_index) { 6773 vm_paging_max_index = i; 6774 } 6775 vm_paging_page_inuse[i] = TRUE; 6776 simple_unlock(&vm_paging_lock); 6777 6778 page->pmapped = TRUE; 6779 6780 /* 6781 * Keep the VM object locked over the PMAP_ENTER 6782 * and the actual use of the page by the kernel, 6783 * or this pmap mapping might get undone by a 6784 * vm_object_pmap_protect() call... 6785 */ 6786 PMAP_ENTER(kernel_pmap, 6787 page_map_offset, 6788 page, 6789 protection, 6790 VM_PROT_NONE, 6791 0, 6792 TRUE); 6793 vm_paging_objects_mapped++; 6794 vm_paging_pages_mapped++; 6795 *address = page_map_offset; 6796 6797 /* all done and mapped, ready to use ! */ 6798 return KERN_SUCCESS; 6799 } 6800 6801 /* 6802 * We ran out of pre-allocated kernel virtual 6803 * addresses. Just map the page in the kernel 6804 * the slow and regular way. 6805 */ 6806 vm_paging_no_kernel_page++; 6807 simple_unlock(&vm_paging_lock); 6808 } 6809 6810 if (! can_unlock_object) { 6811 return KERN_NOT_SUPPORTED; 6812 } 6813 6814 object_offset = vm_object_trunc_page(offset); 6815 map_size = vm_map_round_page(*size); 6816 6817 /* 6818 * Try and map the required range of the object 6819 * in the kernel_map 6820 */ 6821 6822 vm_object_reference_locked(object); /* for the map entry */ 6823 vm_object_unlock(object); 6824 6825 kr = vm_map_enter(kernel_map, 6826 address, 6827 map_size, 6828 0, 6829 VM_FLAGS_ANYWHERE, 6830 object, 6831 object_offset, 6832 FALSE, 6833 protection, 6834 VM_PROT_ALL, 6835 VM_INHERIT_NONE); 6836 if (kr != KERN_SUCCESS) { 6837 *address = 0; 6838 *size = 0; 6839 vm_object_deallocate(object); /* for the map entry */ 6840 vm_object_lock(object); 6841 return kr; 6842 } 6843 6844 *size = map_size; 6845 6846 /* 6847 * Enter the mapped pages in the page table now. 6848 */ 6849 vm_object_lock(object); 6850 /* 6851 * VM object must be kept locked from before PMAP_ENTER() 6852 * until after the kernel is done accessing the page(s). 6853 * Otherwise, the pmap mappings in the kernel could be 6854 * undone by a call to vm_object_pmap_protect(). 6855 */ 6856 6857 for (page_map_offset = 0; 6858 map_size != 0; 6859 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) { 6860 6861 page = vm_page_lookup(object, offset + page_map_offset); 6862 if (page == VM_PAGE_NULL) { 6863 printf("vm_paging_map_object: no page !?"); 6864 vm_object_unlock(object); 6865 kr = vm_map_remove(kernel_map, *address, *size, 6866 VM_MAP_NO_FLAGS); 6867 assert(kr == KERN_SUCCESS); 6868 *address = 0; 6869 *size = 0; 6870 vm_object_lock(object); 6871 return KERN_MEMORY_ERROR; 6872 } 6873 page->pmapped = TRUE; 6874 6875 //assert(pmap_verify_free(page->phys_page)); 6876 PMAP_ENTER(kernel_pmap, 6877 *address + page_map_offset, 6878 page, 6879 protection, 6880 VM_PROT_NONE, 6881 0, 6882 TRUE); 6883 } 6884 6885 vm_paging_objects_mapped_slow++; 6886 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64); 6887 6888 return KERN_SUCCESS; 6889} 6890 6891/* 6892 * ENCRYPTED SWAP: 6893 * vm_paging_unmap_object: 6894 * Unmaps part of a VM object's pages from the kernel 6895 * virtual address space. 6896 * Context: 6897 * The VM object is locked. This lock will get 6898 * dropped and re-acquired though. 6899 */ 6900void 6901vm_paging_unmap_object( 6902 vm_object_t object, 6903 vm_map_offset_t start, 6904 vm_map_offset_t end) 6905{ 6906 kern_return_t kr; 6907 int i; 6908 6909 if ((vm_paging_base_address == 0) || 6910 (start < vm_paging_base_address) || 6911 (end > (vm_paging_base_address 6912 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) { 6913 /* 6914 * We didn't use our pre-allocated pool of 6915 * kernel virtual address. Deallocate the 6916 * virtual memory. 6917 */ 6918 if (object != VM_OBJECT_NULL) { 6919 vm_object_unlock(object); 6920 } 6921 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS); 6922 if (object != VM_OBJECT_NULL) { 6923 vm_object_lock(object); 6924 } 6925 assert(kr == KERN_SUCCESS); 6926 } else { 6927 /* 6928 * We used a kernel virtual address from our 6929 * pre-allocated pool. Put it back in the pool 6930 * for next time. 6931 */ 6932 assert(end - start == PAGE_SIZE); 6933 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT); 6934 assert(i >= 0 && i < VM_PAGING_NUM_PAGES); 6935 6936 /* undo the pmap mapping */ 6937 pmap_remove(kernel_pmap, start, end); 6938 6939 simple_lock(&vm_paging_lock); 6940 vm_paging_page_inuse[i] = FALSE; 6941 if (vm_paging_page_waiter) { 6942 thread_wakeup(&vm_paging_page_waiter); 6943 } 6944 simple_unlock(&vm_paging_lock); 6945 } 6946} 6947 6948#if CRYPTO 6949/* 6950 * Encryption data. 6951 * "iv" is the "initial vector". Ideally, we want to 6952 * have a different one for each page we encrypt, so that 6953 * crackers can't find encryption patterns too easily. 6954 */ 6955#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */ 6956boolean_t swap_crypt_ctx_initialized = FALSE; 6957uint32_t swap_crypt_key[8]; /* big enough for a 256 key */ 6958aes_ctx swap_crypt_ctx; 6959const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, }; 6960 6961#if DEBUG 6962boolean_t swap_crypt_ctx_tested = FALSE; 6963unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096))); 6964unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096))); 6965unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096))); 6966#endif /* DEBUG */ 6967 6968/* 6969 * Initialize the encryption context: key and key size. 6970 */ 6971void swap_crypt_ctx_initialize(void); /* forward */ 6972void 6973swap_crypt_ctx_initialize(void) 6974{ 6975 unsigned int i; 6976 6977 /* 6978 * No need for locking to protect swap_crypt_ctx_initialized 6979 * because the first use of encryption will come from the 6980 * pageout thread (we won't pagein before there's been a pageout) 6981 * and there's only one pageout thread. 6982 */ 6983 if (swap_crypt_ctx_initialized == FALSE) { 6984 for (i = 0; 6985 i < (sizeof (swap_crypt_key) / 6986 sizeof (swap_crypt_key[0])); 6987 i++) { 6988 swap_crypt_key[i] = random(); 6989 } 6990 aes_encrypt_key((const unsigned char *) swap_crypt_key, 6991 SWAP_CRYPT_AES_KEY_SIZE, 6992 &swap_crypt_ctx.encrypt); 6993 aes_decrypt_key((const unsigned char *) swap_crypt_key, 6994 SWAP_CRYPT_AES_KEY_SIZE, 6995 &swap_crypt_ctx.decrypt); 6996 swap_crypt_ctx_initialized = TRUE; 6997 } 6998 6999#if DEBUG 7000 /* 7001 * Validate the encryption algorithms. 7002 */ 7003 if (swap_crypt_ctx_tested == FALSE) { 7004 /* initialize */ 7005 for (i = 0; i < 4096; i++) { 7006 swap_crypt_test_page_ref[i] = (char) i; 7007 } 7008 /* encrypt */ 7009 aes_encrypt_cbc(swap_crypt_test_page_ref, 7010 swap_crypt_null_iv, 7011 PAGE_SIZE / AES_BLOCK_SIZE, 7012 swap_crypt_test_page_encrypt, 7013 &swap_crypt_ctx.encrypt); 7014 /* decrypt */ 7015 aes_decrypt_cbc(swap_crypt_test_page_encrypt, 7016 swap_crypt_null_iv, 7017 PAGE_SIZE / AES_BLOCK_SIZE, 7018 swap_crypt_test_page_decrypt, 7019 &swap_crypt_ctx.decrypt); 7020 /* compare result with original */ 7021 for (i = 0; i < 4096; i ++) { 7022 if (swap_crypt_test_page_decrypt[i] != 7023 swap_crypt_test_page_ref[i]) { 7024 panic("encryption test failed"); 7025 } 7026 } 7027 7028 /* encrypt again */ 7029 aes_encrypt_cbc(swap_crypt_test_page_decrypt, 7030 swap_crypt_null_iv, 7031 PAGE_SIZE / AES_BLOCK_SIZE, 7032 swap_crypt_test_page_decrypt, 7033 &swap_crypt_ctx.encrypt); 7034 /* decrypt in place */ 7035 aes_decrypt_cbc(swap_crypt_test_page_decrypt, 7036 swap_crypt_null_iv, 7037 PAGE_SIZE / AES_BLOCK_SIZE, 7038 swap_crypt_test_page_decrypt, 7039 &swap_crypt_ctx.decrypt); 7040 for (i = 0; i < 4096; i ++) { 7041 if (swap_crypt_test_page_decrypt[i] != 7042 swap_crypt_test_page_ref[i]) { 7043 panic("in place encryption test failed"); 7044 } 7045 } 7046 7047 swap_crypt_ctx_tested = TRUE; 7048 } 7049#endif /* DEBUG */ 7050} 7051 7052/* 7053 * ENCRYPTED SWAP: 7054 * vm_page_encrypt: 7055 * Encrypt the given page, for secure paging. 7056 * The page might already be mapped at kernel virtual 7057 * address "kernel_mapping_offset". Otherwise, we need 7058 * to map it. 7059 * 7060 * Context: 7061 * The page's object is locked, but this lock will be released 7062 * and re-acquired. 7063 * The page is busy and not accessible by users (not entered in any pmap). 7064 */ 7065void 7066vm_page_encrypt( 7067 vm_page_t page, 7068 vm_map_offset_t kernel_mapping_offset) 7069{ 7070 kern_return_t kr; 7071 vm_map_size_t kernel_mapping_size; 7072 vm_offset_t kernel_vaddr; 7073 union { 7074 unsigned char aes_iv[AES_BLOCK_SIZE]; 7075 struct { 7076 memory_object_t pager_object; 7077 vm_object_offset_t paging_offset; 7078 } vm; 7079 } encrypt_iv; 7080 7081 if (! vm_pages_encrypted) { 7082 vm_pages_encrypted = TRUE; 7083 } 7084 7085 assert(page->busy); 7086 7087 if (page->encrypted) { 7088 /* 7089 * Already encrypted: no need to do it again. 7090 */ 7091 vm_page_encrypt_already_encrypted_counter++; 7092 return; 7093 } 7094 assert(page->dirty || page->precious); 7095 7096 ASSERT_PAGE_DECRYPTED(page); 7097 7098 /* 7099 * Take a paging-in-progress reference to keep the object 7100 * alive even if we have to unlock it (in vm_paging_map_object() 7101 * for example)... 7102 */ 7103 vm_object_paging_begin(page->object); 7104 7105 if (kernel_mapping_offset == 0) { 7106 /* 7107 * The page hasn't already been mapped in kernel space 7108 * by the caller. Map it now, so that we can access 7109 * its contents and encrypt them. 7110 */ 7111 kernel_mapping_size = PAGE_SIZE; 7112 kr = vm_paging_map_object(&kernel_mapping_offset, 7113 page, 7114 page->object, 7115 page->offset, 7116 &kernel_mapping_size, 7117 VM_PROT_READ | VM_PROT_WRITE, 7118 FALSE); 7119 if (kr != KERN_SUCCESS) { 7120 panic("vm_page_encrypt: " 7121 "could not map page in kernel: 0x%x\n", 7122 kr); 7123 } 7124 } else { 7125 kernel_mapping_size = 0; 7126 } 7127 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 7128 7129 if (swap_crypt_ctx_initialized == FALSE) { 7130 swap_crypt_ctx_initialize(); 7131 } 7132 assert(swap_crypt_ctx_initialized); 7133 7134 /* 7135 * Prepare an "initial vector" for the encryption. 7136 * We use the "pager" and the "paging_offset" for that 7137 * page to obfuscate the encrypted data a bit more and 7138 * prevent crackers from finding patterns that they could 7139 * use to break the key. 7140 */ 7141 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv)); 7142 encrypt_iv.vm.pager_object = page->object->pager; 7143 encrypt_iv.vm.paging_offset = 7144 page->object->paging_offset + page->offset; 7145 7146 /* encrypt the "initial vector" */ 7147 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0], 7148 swap_crypt_null_iv, 7149 1, 7150 &encrypt_iv.aes_iv[0], 7151 &swap_crypt_ctx.encrypt); 7152 7153 /* 7154 * Encrypt the page. 7155 */ 7156 aes_encrypt_cbc((const unsigned char *) kernel_vaddr, 7157 &encrypt_iv.aes_iv[0], 7158 PAGE_SIZE / AES_BLOCK_SIZE, 7159 (unsigned char *) kernel_vaddr, 7160 &swap_crypt_ctx.encrypt); 7161 7162 vm_page_encrypt_counter++; 7163 7164 /* 7165 * Unmap the page from the kernel's address space, 7166 * if we had to map it ourselves. Otherwise, let 7167 * the caller undo the mapping if needed. 7168 */ 7169 if (kernel_mapping_size != 0) { 7170 vm_paging_unmap_object(page->object, 7171 kernel_mapping_offset, 7172 kernel_mapping_offset + kernel_mapping_size); 7173 } 7174 7175 /* 7176 * Clear the "reference" and "modified" bits. 7177 * This should clean up any impact the encryption had 7178 * on them. 7179 * The page was kept busy and disconnected from all pmaps, 7180 * so it can't have been referenced or modified from user 7181 * space. 7182 * The software bits will be reset later after the I/O 7183 * has completed (in upl_commit_range()). 7184 */ 7185 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED); 7186 7187 page->encrypted = TRUE; 7188 7189 vm_object_paging_end(page->object); 7190} 7191 7192/* 7193 * ENCRYPTED SWAP: 7194 * vm_page_decrypt: 7195 * Decrypt the given page. 7196 * The page might already be mapped at kernel virtual 7197 * address "kernel_mapping_offset". Otherwise, we need 7198 * to map it. 7199 * 7200 * Context: 7201 * The page's VM object is locked but will be unlocked and relocked. 7202 * The page is busy and not accessible by users (not entered in any pmap). 7203 */ 7204void 7205vm_page_decrypt( 7206 vm_page_t page, 7207 vm_map_offset_t kernel_mapping_offset) 7208{ 7209 kern_return_t kr; 7210 vm_map_size_t kernel_mapping_size; 7211 vm_offset_t kernel_vaddr; 7212 union { 7213 unsigned char aes_iv[AES_BLOCK_SIZE]; 7214 struct { 7215 memory_object_t pager_object; 7216 vm_object_offset_t paging_offset; 7217 } vm; 7218 } decrypt_iv; 7219 boolean_t was_dirty; 7220 7221 assert(page->busy); 7222 assert(page->encrypted); 7223 7224 was_dirty = page->dirty; 7225 7226 /* 7227 * Take a paging-in-progress reference to keep the object 7228 * alive even if we have to unlock it (in vm_paging_map_object() 7229 * for example)... 7230 */ 7231 vm_object_paging_begin(page->object); 7232 7233 if (kernel_mapping_offset == 0) { 7234 /* 7235 * The page hasn't already been mapped in kernel space 7236 * by the caller. Map it now, so that we can access 7237 * its contents and decrypt them. 7238 */ 7239 kernel_mapping_size = PAGE_SIZE; 7240 kr = vm_paging_map_object(&kernel_mapping_offset, 7241 page, 7242 page->object, 7243 page->offset, 7244 &kernel_mapping_size, 7245 VM_PROT_READ | VM_PROT_WRITE, 7246 FALSE); 7247 if (kr != KERN_SUCCESS) { 7248 panic("vm_page_decrypt: " 7249 "could not map page in kernel: 0x%x\n", 7250 kr); 7251 } 7252 } else { 7253 kernel_mapping_size = 0; 7254 } 7255 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 7256 7257 assert(swap_crypt_ctx_initialized); 7258 7259 /* 7260 * Prepare an "initial vector" for the decryption. 7261 * It has to be the same as the "initial vector" we 7262 * used to encrypt that page. 7263 */ 7264 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv)); 7265 decrypt_iv.vm.pager_object = page->object->pager; 7266 decrypt_iv.vm.paging_offset = 7267 page->object->paging_offset + page->offset; 7268 7269 /* encrypt the "initial vector" */ 7270 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0], 7271 swap_crypt_null_iv, 7272 1, 7273 &decrypt_iv.aes_iv[0], 7274 &swap_crypt_ctx.encrypt); 7275 7276 /* 7277 * Decrypt the page. 7278 */ 7279 aes_decrypt_cbc((const unsigned char *) kernel_vaddr, 7280 &decrypt_iv.aes_iv[0], 7281 PAGE_SIZE / AES_BLOCK_SIZE, 7282 (unsigned char *) kernel_vaddr, 7283 &swap_crypt_ctx.decrypt); 7284 vm_page_decrypt_counter++; 7285 7286 /* 7287 * Unmap the page from the kernel's address space, 7288 * if we had to map it ourselves. Otherwise, let 7289 * the caller undo the mapping if needed. 7290 */ 7291 if (kernel_mapping_size != 0) { 7292 vm_paging_unmap_object(page->object, 7293 kernel_vaddr, 7294 kernel_vaddr + PAGE_SIZE); 7295 } 7296 7297 if (was_dirty) { 7298 /* 7299 * The pager did not specify that the page would be 7300 * clean when it got paged in, so let's not clean it here 7301 * either. 7302 */ 7303 } else { 7304 /* 7305 * After decryption, the page is actually still clean. 7306 * It was encrypted as part of paging, which "cleans" 7307 * the "dirty" pages. 7308 * Noone could access it after it was encrypted 7309 * and the decryption doesn't count. 7310 */ 7311 page->dirty = FALSE; 7312 assert (page->cs_validated == FALSE); 7313 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); 7314 } 7315 page->encrypted = FALSE; 7316 7317 /* 7318 * We've just modified the page's contents via the data cache and part 7319 * of the new contents might still be in the cache and not yet in RAM. 7320 * Since the page is now available and might get gathered in a UPL to 7321 * be part of a DMA transfer from a driver that expects the memory to 7322 * be coherent at this point, we have to flush the data cache. 7323 */ 7324 pmap_sync_page_attributes_phys(page->phys_page); 7325 /* 7326 * Since the page is not mapped yet, some code might assume that it 7327 * doesn't need to invalidate the instruction cache when writing to 7328 * that page. That code relies on "pmapped" being FALSE, so that the 7329 * caches get synchronized when the page is first mapped. 7330 */ 7331 assert(pmap_verify_free(page->phys_page)); 7332 page->pmapped = FALSE; 7333 page->wpmapped = FALSE; 7334 7335 vm_object_paging_end(page->object); 7336} 7337 7338#if DEVELOPMENT || DEBUG 7339unsigned long upl_encrypt_upls = 0; 7340unsigned long upl_encrypt_pages = 0; 7341#endif 7342 7343/* 7344 * ENCRYPTED SWAP: 7345 * 7346 * upl_encrypt: 7347 * Encrypts all the pages in the UPL, within the specified range. 7348 * 7349 */ 7350void 7351upl_encrypt( 7352 upl_t upl, 7353 upl_offset_t crypt_offset, 7354 upl_size_t crypt_size) 7355{ 7356 upl_size_t upl_size, subupl_size=crypt_size; 7357 upl_offset_t offset_in_upl, subupl_offset=crypt_offset; 7358 vm_object_t upl_object; 7359 vm_object_offset_t upl_offset; 7360 vm_page_t page; 7361 vm_object_t shadow_object; 7362 vm_object_offset_t shadow_offset; 7363 vm_object_offset_t paging_offset; 7364 vm_object_offset_t base_offset; 7365 int isVectorUPL = 0; 7366 upl_t vector_upl = NULL; 7367 7368 if((isVectorUPL = vector_upl_is_valid(upl))) 7369 vector_upl = upl; 7370 7371process_upl_to_encrypt: 7372 if(isVectorUPL) { 7373 crypt_size = subupl_size; 7374 crypt_offset = subupl_offset; 7375 upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size); 7376 if(upl == NULL) 7377 panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n"); 7378 subupl_size -= crypt_size; 7379 subupl_offset += crypt_size; 7380 } 7381 7382#if DEVELOPMENT || DEBUG 7383 upl_encrypt_upls++; 7384 upl_encrypt_pages += crypt_size / PAGE_SIZE; 7385#endif 7386 upl_object = upl->map_object; 7387 upl_offset = upl->offset; 7388 upl_size = upl->size; 7389 7390 vm_object_lock(upl_object); 7391 7392 /* 7393 * Find the VM object that contains the actual pages. 7394 */ 7395 if (upl_object->pageout) { 7396 shadow_object = upl_object->shadow; 7397 /* 7398 * The offset in the shadow object is actually also 7399 * accounted for in upl->offset. It possibly shouldn't be 7400 * this way, but for now don't account for it twice. 7401 */ 7402 shadow_offset = 0; 7403 assert(upl_object->paging_offset == 0); /* XXX ? */ 7404 vm_object_lock(shadow_object); 7405 } else { 7406 shadow_object = upl_object; 7407 shadow_offset = 0; 7408 } 7409 7410 paging_offset = shadow_object->paging_offset; 7411 vm_object_paging_begin(shadow_object); 7412 7413 if (shadow_object != upl_object) 7414 vm_object_unlock(upl_object); 7415 7416 7417 base_offset = shadow_offset; 7418 base_offset += upl_offset; 7419 base_offset += crypt_offset; 7420 base_offset -= paging_offset; 7421 7422 assert(crypt_offset + crypt_size <= upl_size); 7423 7424 for (offset_in_upl = 0; 7425 offset_in_upl < crypt_size; 7426 offset_in_upl += PAGE_SIZE) { 7427 page = vm_page_lookup(shadow_object, 7428 base_offset + offset_in_upl); 7429 if (page == VM_PAGE_NULL) { 7430 panic("upl_encrypt: " 7431 "no page for (obj=%p,off=0x%llx+0x%x)!\n", 7432 shadow_object, 7433 base_offset, 7434 offset_in_upl); 7435 } 7436 /* 7437 * Disconnect the page from all pmaps, so that nobody can 7438 * access it while it's encrypted. After that point, all 7439 * accesses to this page will cause a page fault and block 7440 * while the page is busy being encrypted. After the 7441 * encryption completes, any access will cause a 7442 * page fault and the page gets decrypted at that time. 7443 */ 7444 pmap_disconnect(page->phys_page); 7445 vm_page_encrypt(page, 0); 7446 7447 if (vm_object_lock_avoid(shadow_object)) { 7448 /* 7449 * Give vm_pageout_scan() a chance to convert more 7450 * pages from "clean-in-place" to "clean-and-free", 7451 * if it's interested in the same pages we selected 7452 * in this cluster. 7453 */ 7454 vm_object_unlock(shadow_object); 7455 mutex_pause(2); 7456 vm_object_lock(shadow_object); 7457 } 7458 } 7459 7460 vm_object_paging_end(shadow_object); 7461 vm_object_unlock(shadow_object); 7462 7463 if(isVectorUPL && subupl_size) 7464 goto process_upl_to_encrypt; 7465} 7466 7467#else /* CRYPTO */ 7468void 7469upl_encrypt( 7470 __unused upl_t upl, 7471 __unused upl_offset_t crypt_offset, 7472 __unused upl_size_t crypt_size) 7473{ 7474} 7475 7476void 7477vm_page_encrypt( 7478 __unused vm_page_t page, 7479 __unused vm_map_offset_t kernel_mapping_offset) 7480{ 7481} 7482 7483void 7484vm_page_decrypt( 7485 __unused vm_page_t page, 7486 __unused vm_map_offset_t kernel_mapping_offset) 7487{ 7488} 7489 7490#endif /* CRYPTO */ 7491 7492/* 7493 * page->object must be locked 7494 */ 7495void 7496vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked) 7497{ 7498 if (!queues_locked) { 7499 vm_page_lockspin_queues(); 7500 } 7501 7502 /* 7503 * need to drop the laundry count... 7504 * we may also need to remove it 7505 * from the I/O paging queue... 7506 * vm_pageout_throttle_up handles both cases 7507 * 7508 * the laundry and pageout_queue flags are cleared... 7509 */ 7510 vm_pageout_throttle_up(page); 7511 7512 vm_page_steal_pageout_page++; 7513 7514 if (!queues_locked) { 7515 vm_page_unlock_queues(); 7516 } 7517} 7518 7519upl_t 7520vector_upl_create(vm_offset_t upl_offset) 7521{ 7522 int vector_upl_size = sizeof(struct _vector_upl); 7523 int i=0; 7524 upl_t upl; 7525 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size); 7526 7527 upl = upl_create(0,UPL_VECTOR,0); 7528 upl->vector_upl = vector_upl; 7529 upl->offset = upl_offset; 7530 vector_upl->size = 0; 7531 vector_upl->offset = upl_offset; 7532 vector_upl->invalid_upls=0; 7533 vector_upl->num_upls=0; 7534 vector_upl->pagelist = NULL; 7535 7536 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) { 7537 vector_upl->upl_iostates[i].size = 0; 7538 vector_upl->upl_iostates[i].offset = 0; 7539 7540 } 7541 return upl; 7542} 7543 7544void 7545vector_upl_deallocate(upl_t upl) 7546{ 7547 if(upl) { 7548 vector_upl_t vector_upl = upl->vector_upl; 7549 if(vector_upl) { 7550 if(vector_upl->invalid_upls != vector_upl->num_upls) 7551 panic("Deallocating non-empty Vectored UPL\n"); 7552 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE))); 7553 vector_upl->invalid_upls=0; 7554 vector_upl->num_upls = 0; 7555 vector_upl->pagelist = NULL; 7556 vector_upl->size = 0; 7557 vector_upl->offset = 0; 7558 kfree(vector_upl, sizeof(struct _vector_upl)); 7559 vector_upl = (vector_upl_t)0xfeedfeed; 7560 } 7561 else 7562 panic("vector_upl_deallocate was passed a non-vectored upl\n"); 7563 } 7564 else 7565 panic("vector_upl_deallocate was passed a NULL upl\n"); 7566} 7567 7568boolean_t 7569vector_upl_is_valid(upl_t upl) 7570{ 7571 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) { 7572 vector_upl_t vector_upl = upl->vector_upl; 7573 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef) 7574 return FALSE; 7575 else 7576 return TRUE; 7577 } 7578 return FALSE; 7579} 7580 7581boolean_t 7582vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size) 7583{ 7584 if(vector_upl_is_valid(upl)) { 7585 vector_upl_t vector_upl = upl->vector_upl; 7586 7587 if(vector_upl) { 7588 if(subupl) { 7589 if(io_size) { 7590 if(io_size < PAGE_SIZE) 7591 io_size = PAGE_SIZE; 7592 subupl->vector_upl = (void*)vector_upl; 7593 vector_upl->upl_elems[vector_upl->num_upls++] = subupl; 7594 vector_upl->size += io_size; 7595 upl->size += io_size; 7596 } 7597 else { 7598 uint32_t i=0,invalid_upls=0; 7599 for(i = 0; i < vector_upl->num_upls; i++) { 7600 if(vector_upl->upl_elems[i] == subupl) 7601 break; 7602 } 7603 if(i == vector_upl->num_upls) 7604 panic("Trying to remove sub-upl when none exists"); 7605 7606 vector_upl->upl_elems[i] = NULL; 7607 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1); 7608 if(invalid_upls == vector_upl->num_upls) 7609 return TRUE; 7610 else 7611 return FALSE; 7612 } 7613 } 7614 else 7615 panic("vector_upl_set_subupl was passed a NULL upl element\n"); 7616 } 7617 else 7618 panic("vector_upl_set_subupl was passed a non-vectored upl\n"); 7619 } 7620 else 7621 panic("vector_upl_set_subupl was passed a NULL upl\n"); 7622 7623 return FALSE; 7624} 7625 7626void 7627vector_upl_set_pagelist(upl_t upl) 7628{ 7629 if(vector_upl_is_valid(upl)) { 7630 uint32_t i=0; 7631 vector_upl_t vector_upl = upl->vector_upl; 7632 7633 if(vector_upl) { 7634 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0; 7635 7636 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)); 7637 7638 for(i=0; i < vector_upl->num_upls; i++) { 7639 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE; 7640 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size); 7641 pagelist_size += cur_upl_pagelist_size; 7642 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page) 7643 upl->highest_page = vector_upl->upl_elems[i]->highest_page; 7644 } 7645 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) ); 7646 } 7647 else 7648 panic("vector_upl_set_pagelist was passed a non-vectored upl\n"); 7649 } 7650 else 7651 panic("vector_upl_set_pagelist was passed a NULL upl\n"); 7652 7653} 7654 7655upl_t 7656vector_upl_subupl_byindex(upl_t upl, uint32_t index) 7657{ 7658 if(vector_upl_is_valid(upl)) { 7659 vector_upl_t vector_upl = upl->vector_upl; 7660 if(vector_upl) { 7661 if(index < vector_upl->num_upls) 7662 return vector_upl->upl_elems[index]; 7663 } 7664 else 7665 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n"); 7666 } 7667 return NULL; 7668} 7669 7670upl_t 7671vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size) 7672{ 7673 if(vector_upl_is_valid(upl)) { 7674 uint32_t i=0; 7675 vector_upl_t vector_upl = upl->vector_upl; 7676 7677 if(vector_upl) { 7678 upl_t subupl = NULL; 7679 vector_upl_iostates_t subupl_state; 7680 7681 for(i=0; i < vector_upl->num_upls; i++) { 7682 subupl = vector_upl->upl_elems[i]; 7683 subupl_state = vector_upl->upl_iostates[i]; 7684 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) { 7685 /* We could have been passed an offset/size pair that belongs 7686 * to an UPL element that has already been committed/aborted. 7687 * If so, return NULL. 7688 */ 7689 if(subupl == NULL) 7690 return NULL; 7691 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) { 7692 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset; 7693 if(*upl_size > subupl_state.size) 7694 *upl_size = subupl_state.size; 7695 } 7696 if(*upl_offset >= subupl_state.offset) 7697 *upl_offset -= subupl_state.offset; 7698 else if(i) 7699 panic("Vector UPL offset miscalculation\n"); 7700 return subupl; 7701 } 7702 } 7703 } 7704 else 7705 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n"); 7706 } 7707 return NULL; 7708} 7709 7710void 7711vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr) 7712{ 7713 *v_upl_submap = NULL; 7714 7715 if(vector_upl_is_valid(upl)) { 7716 vector_upl_t vector_upl = upl->vector_upl; 7717 if(vector_upl) { 7718 *v_upl_submap = vector_upl->submap; 7719 *submap_dst_addr = vector_upl->submap_dst_addr; 7720 } 7721 else 7722 panic("vector_upl_get_submap was passed a non-vectored UPL\n"); 7723 } 7724 else 7725 panic("vector_upl_get_submap was passed a null UPL\n"); 7726} 7727 7728void 7729vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr) 7730{ 7731 if(vector_upl_is_valid(upl)) { 7732 vector_upl_t vector_upl = upl->vector_upl; 7733 if(vector_upl) { 7734 vector_upl->submap = submap; 7735 vector_upl->submap_dst_addr = submap_dst_addr; 7736 } 7737 else 7738 panic("vector_upl_get_submap was passed a non-vectored UPL\n"); 7739 } 7740 else 7741 panic("vector_upl_get_submap was passed a NULL UPL\n"); 7742} 7743 7744void 7745vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size) 7746{ 7747 if(vector_upl_is_valid(upl)) { 7748 uint32_t i = 0; 7749 vector_upl_t vector_upl = upl->vector_upl; 7750 7751 if(vector_upl) { 7752 for(i = 0; i < vector_upl->num_upls; i++) { 7753 if(vector_upl->upl_elems[i] == subupl) 7754 break; 7755 } 7756 7757 if(i == vector_upl->num_upls) 7758 panic("setting sub-upl iostate when none exists"); 7759 7760 vector_upl->upl_iostates[i].offset = offset; 7761 if(size < PAGE_SIZE) 7762 size = PAGE_SIZE; 7763 vector_upl->upl_iostates[i].size = size; 7764 } 7765 else 7766 panic("vector_upl_set_iostate was passed a non-vectored UPL\n"); 7767 } 7768 else 7769 panic("vector_upl_set_iostate was passed a NULL UPL\n"); 7770} 7771 7772void 7773vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size) 7774{ 7775 if(vector_upl_is_valid(upl)) { 7776 uint32_t i = 0; 7777 vector_upl_t vector_upl = upl->vector_upl; 7778 7779 if(vector_upl) { 7780 for(i = 0; i < vector_upl->num_upls; i++) { 7781 if(vector_upl->upl_elems[i] == subupl) 7782 break; 7783 } 7784 7785 if(i == vector_upl->num_upls) 7786 panic("getting sub-upl iostate when none exists"); 7787 7788 *offset = vector_upl->upl_iostates[i].offset; 7789 *size = vector_upl->upl_iostates[i].size; 7790 } 7791 else 7792 panic("vector_upl_get_iostate was passed a non-vectored UPL\n"); 7793 } 7794 else 7795 panic("vector_upl_get_iostate was passed a NULL UPL\n"); 7796} 7797 7798void 7799vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size) 7800{ 7801 if(vector_upl_is_valid(upl)) { 7802 vector_upl_t vector_upl = upl->vector_upl; 7803 if(vector_upl) { 7804 if(index < vector_upl->num_upls) { 7805 *offset = vector_upl->upl_iostates[index].offset; 7806 *size = vector_upl->upl_iostates[index].size; 7807 } 7808 else 7809 *offset = *size = 0; 7810 } 7811 else 7812 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n"); 7813 } 7814 else 7815 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n"); 7816} 7817 7818upl_page_info_t * 7819upl_get_internal_vectorupl_pagelist(upl_t upl) 7820{ 7821 return ((vector_upl_t)(upl->vector_upl))->pagelist; 7822} 7823 7824void * 7825upl_get_internal_vectorupl(upl_t upl) 7826{ 7827 return upl->vector_upl; 7828} 7829 7830vm_size_t 7831upl_get_internal_pagelist_offset(void) 7832{ 7833 return sizeof(struct upl); 7834} 7835 7836void 7837upl_clear_dirty( 7838 upl_t upl, 7839 boolean_t value) 7840{ 7841 if (value) { 7842 upl->flags |= UPL_CLEAR_DIRTY; 7843 } else { 7844 upl->flags &= ~UPL_CLEAR_DIRTY; 7845 } 7846} 7847 7848void 7849upl_set_referenced( 7850 upl_t upl, 7851 boolean_t value) 7852{ 7853 upl_lock(upl); 7854 if (value) { 7855 upl->ext_ref_count++; 7856 } else { 7857 if (!upl->ext_ref_count) { 7858 panic("upl_set_referenced not %p\n", upl); 7859 } 7860 upl->ext_ref_count--; 7861 } 7862 upl_unlock(upl); 7863} 7864 7865boolean_t 7866vm_page_is_slideable(vm_page_t m) 7867{ 7868 boolean_t result = FALSE; 7869 vm_object_t slide_object = slide_info.slide_object; 7870 mach_vm_offset_t start = slide_info.start; 7871 mach_vm_offset_t end = slide_info.end; 7872 7873 /* make sure our page belongs to the one object allowed to do this */ 7874 if (slide_object == VM_OBJECT_NULL) { 7875 return result; 7876 } 7877 7878 /*Should we traverse down the chain?*/ 7879 if (m->object != slide_object) { 7880 return result; 7881 } 7882 7883 if(!m->slid && (start <= m->offset && end > m->offset)) { 7884 result = TRUE; 7885 } 7886 return result; 7887} 7888 7889int vm_page_slide_counter = 0; 7890int vm_page_slide_errors = 0; 7891kern_return_t 7892vm_page_slide( 7893 vm_page_t page, 7894 vm_map_offset_t kernel_mapping_offset) 7895{ 7896 kern_return_t kr; 7897 vm_map_size_t kernel_mapping_size; 7898 vm_offset_t kernel_vaddr; 7899 uint32_t pageIndex = 0; 7900 7901 assert(!page->slid); 7902 7903 if (page->error) 7904 return KERN_FAILURE; 7905 7906 /* 7907 * Take a paging-in-progress reference to keep the object 7908 * alive even if we have to unlock it (in vm_paging_map_object() 7909 * for example)... 7910 */ 7911 vm_object_paging_begin(page->object); 7912 7913 if (kernel_mapping_offset == 0) { 7914 /* 7915 * The page hasn't already been mapped in kernel space 7916 * by the caller. Map it now, so that we can access 7917 * its contents and decrypt them. 7918 */ 7919 kernel_mapping_size = PAGE_SIZE; 7920 kr = vm_paging_map_object(&kernel_mapping_offset, 7921 page, 7922 page->object, 7923 page->offset, 7924 &kernel_mapping_size, 7925 VM_PROT_READ | VM_PROT_WRITE, 7926 FALSE); 7927 if (kr != KERN_SUCCESS) { 7928 panic("vm_page_slide: " 7929 "could not map page in kernel: 0x%x\n", 7930 kr); 7931 } 7932 } else { 7933 kernel_mapping_size = 0; 7934 } 7935 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 7936 7937 /* 7938 * Slide the pointers on the page. 7939 */ 7940 7941 /*assert that slide_file_info.start/end are page-aligned?*/ 7942 7943 pageIndex = (uint32_t)((page->offset - slide_info.start)/PAGE_SIZE); 7944 kr = vm_shared_region_slide(kernel_vaddr, pageIndex); 7945 vm_page_slide_counter++; 7946 7947 /* 7948 * Unmap the page from the kernel's address space, 7949 */ 7950 if (kernel_mapping_size != 0) { 7951 vm_paging_unmap_object(page->object, 7952 kernel_vaddr, 7953 kernel_vaddr + PAGE_SIZE); 7954 } 7955 7956 page->dirty = FALSE; 7957 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); 7958 7959 if (kr != KERN_SUCCESS || cs_debug > 1) { 7960 printf("vm_page_slide(%p): " 7961 "obj %p off 0x%llx mobj %p moff 0x%llx\n", 7962 page, 7963 page->object, page->offset, 7964 page->object->pager, 7965 page->offset + page->object->paging_offset); 7966 } 7967 7968 if (kr == KERN_SUCCESS) { 7969 page->slid = TRUE; 7970 } else { 7971 page->error = TRUE; 7972 vm_page_slide_errors++; 7973 } 7974 7975 vm_object_paging_end(page->object); 7976 7977 return kr; 7978} 7979 7980 7981#ifdef MACH_BSD 7982 7983boolean_t upl_device_page(upl_page_info_t *upl) 7984{ 7985 return(UPL_DEVICE_PAGE(upl)); 7986} 7987boolean_t upl_page_present(upl_page_info_t *upl, int index) 7988{ 7989 return(UPL_PAGE_PRESENT(upl, index)); 7990} 7991boolean_t upl_speculative_page(upl_page_info_t *upl, int index) 7992{ 7993 return(UPL_SPECULATIVE_PAGE(upl, index)); 7994} 7995boolean_t upl_dirty_page(upl_page_info_t *upl, int index) 7996{ 7997 return(UPL_DIRTY_PAGE(upl, index)); 7998} 7999boolean_t upl_valid_page(upl_page_info_t *upl, int index) 8000{ 8001 return(UPL_VALID_PAGE(upl, index)); 8002} 8003ppnum_t upl_phys_page(upl_page_info_t *upl, int index) 8004{ 8005 return(UPL_PHYS_PAGE(upl, index)); 8006} 8007 8008 8009void 8010vm_countdirtypages(void) 8011{ 8012 vm_page_t m; 8013 int dpages; 8014 int pgopages; 8015 int precpages; 8016 8017 8018 dpages=0; 8019 pgopages=0; 8020 precpages=0; 8021 8022 vm_page_lock_queues(); 8023 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 8024 do { 8025 if (m ==(vm_page_t )0) break; 8026 8027 if(m->dirty) dpages++; 8028 if(m->pageout) pgopages++; 8029 if(m->precious) precpages++; 8030 8031 assert(m->object != kernel_object); 8032 m = (vm_page_t) queue_next(&m->pageq); 8033 if (m ==(vm_page_t )0) break; 8034 8035 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m)); 8036 vm_page_unlock_queues(); 8037 8038 vm_page_lock_queues(); 8039 m = (vm_page_t) queue_first(&vm_page_queue_throttled); 8040 do { 8041 if (m ==(vm_page_t )0) break; 8042 8043 dpages++; 8044 assert(m->dirty); 8045 assert(!m->pageout); 8046 assert(m->object != kernel_object); 8047 m = (vm_page_t) queue_next(&m->pageq); 8048 if (m ==(vm_page_t )0) break; 8049 8050 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m)); 8051 vm_page_unlock_queues(); 8052 8053 vm_page_lock_queues(); 8054 m = (vm_page_t) queue_first(&vm_page_queue_anonymous); 8055 do { 8056 if (m ==(vm_page_t )0) break; 8057 8058 if(m->dirty) dpages++; 8059 if(m->pageout) pgopages++; 8060 if(m->precious) precpages++; 8061 8062 assert(m->object != kernel_object); 8063 m = (vm_page_t) queue_next(&m->pageq); 8064 if (m ==(vm_page_t )0) break; 8065 8066 } while (!queue_end(&vm_page_queue_anonymous,(queue_entry_t) m)); 8067 vm_page_unlock_queues(); 8068 8069 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages); 8070 8071 dpages=0; 8072 pgopages=0; 8073 precpages=0; 8074 8075 vm_page_lock_queues(); 8076 m = (vm_page_t) queue_first(&vm_page_queue_active); 8077 8078 do { 8079 if(m == (vm_page_t )0) break; 8080 if(m->dirty) dpages++; 8081 if(m->pageout) pgopages++; 8082 if(m->precious) precpages++; 8083 8084 assert(m->object != kernel_object); 8085 m = (vm_page_t) queue_next(&m->pageq); 8086 if(m == (vm_page_t )0) break; 8087 8088 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m)); 8089 vm_page_unlock_queues(); 8090 8091 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages); 8092 8093} 8094#endif /* MACH_BSD */ 8095 8096ppnum_t upl_get_highest_page( 8097 upl_t upl) 8098{ 8099 return upl->highest_page; 8100} 8101 8102upl_size_t upl_get_size( 8103 upl_t upl) 8104{ 8105 return upl->size; 8106} 8107 8108#if UPL_DEBUG 8109kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2) 8110{ 8111 upl->ubc_alias1 = alias1; 8112 upl->ubc_alias2 = alias2; 8113 return KERN_SUCCESS; 8114} 8115int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2) 8116{ 8117 if(al) 8118 *al = upl->ubc_alias1; 8119 if(al2) 8120 *al2 = upl->ubc_alias2; 8121 return KERN_SUCCESS; 8122} 8123#endif /* UPL_DEBUG */ 8124