1/* 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_pageout.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * Date: 1985 62 * 63 * The proverbial page-out daemon. 64 */ 65 66#include <stdint.h> 67 68#include <debug.h> 69#include <mach_pagemap.h> 70#include <mach_cluster_stats.h> 71#include <advisory_pageout.h> 72 73#include <mach/mach_types.h> 74#include <mach/memory_object.h> 75#include <mach/memory_object_default.h> 76#include <mach/memory_object_control_server.h> 77#include <mach/mach_host_server.h> 78#include <mach/upl.h> 79#include <mach/vm_map.h> 80#include <mach/vm_param.h> 81#include <mach/vm_statistics.h> 82#include <mach/sdt.h> 83 84#include <kern/kern_types.h> 85#include <kern/counters.h> 86#include <kern/host_statistics.h> 87#include <kern/machine.h> 88#include <kern/misc_protos.h> 89#include <kern/sched.h> 90#include <kern/thread.h> 91#include <kern/xpr.h> 92#include <kern/kalloc.h> 93 94#include <machine/vm_tuning.h> 95#include <machine/commpage.h> 96 97#include <vm/pmap.h> 98#include <vm/vm_compressor_pager.h> 99#include <vm/vm_fault.h> 100#include <vm/vm_map.h> 101#include <vm/vm_object.h> 102#include <vm/vm_page.h> 103#include <vm/vm_pageout.h> 104#include <vm/vm_protos.h> /* must be last */ 105#include <vm/memory_object.h> 106#include <vm/vm_purgeable_internal.h> 107#include <vm/vm_shared_region.h> 108#include <vm/vm_compressor.h> 109 110/* 111 * ENCRYPTED SWAP: 112 */ 113#include <libkern/crypto/aes.h> 114extern u_int32_t random(void); /* from <libkern/libkern.h> */ 115 116extern int cs_debug; 117 118#if UPL_DEBUG 119#include <libkern/OSDebug.h> 120#endif 121 122extern vm_pressure_level_t memorystatus_vm_pressure_level; 123int memorystatus_purge_on_warning = 2; 124int memorystatus_purge_on_urgent = 5; 125int memorystatus_purge_on_critical = 8; 126 127#if VM_PRESSURE_EVENTS 128void vm_pressure_response(void); 129boolean_t vm_pressure_thread_running = FALSE; 130extern void consider_vm_pressure_events(void); 131#endif 132boolean_t vm_pressure_changed = FALSE; 133 134#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */ 135#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100 136#endif 137 138#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */ 139#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 140#endif 141 142#ifndef VM_PAGEOUT_DEADLOCK_RELIEF 143#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */ 144#endif 145 146#ifndef VM_PAGEOUT_INACTIVE_RELIEF 147#define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */ 148#endif 149 150#ifndef VM_PAGE_LAUNDRY_MAX 151#define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */ 152#endif /* VM_PAGEOUT_LAUNDRY_MAX */ 153 154#ifndef VM_PAGEOUT_BURST_WAIT 155#define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds */ 156#endif /* VM_PAGEOUT_BURST_WAIT */ 157 158#ifndef VM_PAGEOUT_EMPTY_WAIT 159#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */ 160#endif /* VM_PAGEOUT_EMPTY_WAIT */ 161 162#ifndef VM_PAGEOUT_DEADLOCK_WAIT 163#define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */ 164#endif /* VM_PAGEOUT_DEADLOCK_WAIT */ 165 166#ifndef VM_PAGEOUT_IDLE_WAIT 167#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */ 168#endif /* VM_PAGEOUT_IDLE_WAIT */ 169 170#ifndef VM_PAGEOUT_SWAP_WAIT 171#define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */ 172#endif /* VM_PAGEOUT_SWAP_WAIT */ 173 174#ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 175#define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */ 176#endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */ 177 178#ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 179#define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */ 180#endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */ 181 182unsigned int vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS; 183unsigned int vm_page_speculative_percentage = 5; 184 185#ifndef VM_PAGE_SPECULATIVE_TARGET 186#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage)) 187#endif /* VM_PAGE_SPECULATIVE_TARGET */ 188 189 190#ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT 191#define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200) 192#endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */ 193 194 195/* 196 * To obtain a reasonable LRU approximation, the inactive queue 197 * needs to be large enough to give pages on it a chance to be 198 * referenced a second time. This macro defines the fraction 199 * of active+inactive pages that should be inactive. 200 * The pageout daemon uses it to update vm_page_inactive_target. 201 * 202 * If vm_page_free_count falls below vm_page_free_target and 203 * vm_page_inactive_count is below vm_page_inactive_target, 204 * then the pageout daemon starts running. 205 */ 206 207#ifndef VM_PAGE_INACTIVE_TARGET 208#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2) 209#endif /* VM_PAGE_INACTIVE_TARGET */ 210 211/* 212 * Once the pageout daemon starts running, it keeps going 213 * until vm_page_free_count meets or exceeds vm_page_free_target. 214 */ 215 216#ifndef VM_PAGE_FREE_TARGET 217#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80) 218#endif /* VM_PAGE_FREE_TARGET */ 219 220 221/* 222 * The pageout daemon always starts running once vm_page_free_count 223 * falls below vm_page_free_min. 224 */ 225 226#ifndef VM_PAGE_FREE_MIN 227#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100) 228#endif /* VM_PAGE_FREE_MIN */ 229 230#define VM_PAGE_FREE_RESERVED_LIMIT 100 231#define VM_PAGE_FREE_MIN_LIMIT 1500 232#define VM_PAGE_FREE_TARGET_LIMIT 2000 233 234 235/* 236 * When vm_page_free_count falls below vm_page_free_reserved, 237 * only vm-privileged threads can allocate pages. vm-privilege 238 * allows the pageout daemon and default pager (and any other 239 * associated threads needed for default pageout) to continue 240 * operation by dipping into the reserved pool of pages. 241 */ 242 243#ifndef VM_PAGE_FREE_RESERVED 244#define VM_PAGE_FREE_RESERVED(n) \ 245 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n)) 246#endif /* VM_PAGE_FREE_RESERVED */ 247 248/* 249 * When we dequeue pages from the inactive list, they are 250 * reactivated (ie, put back on the active queue) if referenced. 251 * However, it is possible to starve the free list if other 252 * processors are referencing pages faster than we can turn off 253 * the referenced bit. So we limit the number of reactivations 254 * we will make per call of vm_pageout_scan(). 255 */ 256#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000 257#ifndef VM_PAGE_REACTIVATE_LIMIT 258#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX)) 259#endif /* VM_PAGE_REACTIVATE_LIMIT */ 260#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100 261 262 263extern boolean_t hibernate_cleaning_in_progress; 264 265/* 266 * Exported variable used to broadcast the activation of the pageout scan 267 * Working Set uses this to throttle its use of pmap removes. In this 268 * way, code which runs within memory in an uncontested context does 269 * not keep encountering soft faults. 270 */ 271 272unsigned int vm_pageout_scan_event_counter = 0; 273 274/* 275 * Forward declarations for internal routines. 276 */ 277struct cq { 278 struct vm_pageout_queue *q; 279 void *current_chead; 280 char *scratch_buf; 281}; 282 283 284#if VM_PRESSURE_EVENTS 285void vm_pressure_thread(void); 286#endif 287static void vm_pageout_garbage_collect(int); 288static void vm_pageout_iothread_continue(struct vm_pageout_queue *); 289static void vm_pageout_iothread_external(void); 290static void vm_pageout_iothread_internal(struct cq *cq); 291static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue *, struct vm_pageout_queue *, boolean_t); 292 293extern void vm_pageout_continue(void); 294extern void vm_pageout_scan(void); 295 296static thread_t vm_pageout_external_iothread = THREAD_NULL; 297static thread_t vm_pageout_internal_iothread = THREAD_NULL; 298 299unsigned int vm_pageout_reserved_internal = 0; 300unsigned int vm_pageout_reserved_really = 0; 301 302unsigned int vm_pageout_swap_wait = 0; 303unsigned int vm_pageout_idle_wait = 0; /* milliseconds */ 304unsigned int vm_pageout_empty_wait = 0; /* milliseconds */ 305unsigned int vm_pageout_burst_wait = 0; /* milliseconds */ 306unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */ 307unsigned int vm_pageout_deadlock_relief = 0; 308unsigned int vm_pageout_inactive_relief = 0; 309unsigned int vm_pageout_burst_active_throttle = 0; 310unsigned int vm_pageout_burst_inactive_throttle = 0; 311 312int vm_upl_wait_for_pages = 0; 313 314 315/* 316 * These variables record the pageout daemon's actions: 317 * how many pages it looks at and what happens to those pages. 318 * No locking needed because only one thread modifies the variables. 319 */ 320 321unsigned int vm_pageout_active = 0; /* debugging */ 322unsigned int vm_pageout_active_busy = 0; /* debugging */ 323unsigned int vm_pageout_inactive = 0; /* debugging */ 324unsigned int vm_pageout_inactive_throttled = 0; /* debugging */ 325unsigned int vm_pageout_inactive_forced = 0; /* debugging */ 326unsigned int vm_pageout_inactive_nolock = 0; /* debugging */ 327unsigned int vm_pageout_inactive_avoid = 0; /* debugging */ 328unsigned int vm_pageout_inactive_busy = 0; /* debugging */ 329unsigned int vm_pageout_inactive_error = 0; /* debugging */ 330unsigned int vm_pageout_inactive_absent = 0; /* debugging */ 331unsigned int vm_pageout_inactive_notalive = 0; /* debugging */ 332unsigned int vm_pageout_inactive_used = 0; /* debugging */ 333unsigned int vm_pageout_cache_evicted = 0; /* debugging */ 334unsigned int vm_pageout_inactive_clean = 0; /* debugging */ 335unsigned int vm_pageout_speculative_clean = 0; /* debugging */ 336 337unsigned int vm_pageout_freed_from_cleaned = 0; 338unsigned int vm_pageout_freed_from_speculative = 0; 339unsigned int vm_pageout_freed_from_inactive_clean = 0; 340 341unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean = 0; 342unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0; 343 344unsigned int vm_pageout_cleaned_reclaimed = 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */ 345unsigned int vm_pageout_cleaned_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */ 346unsigned int vm_pageout_cleaned_reference_reactivated = 0; 347unsigned int vm_pageout_cleaned_volatile_reactivated = 0; 348unsigned int vm_pageout_cleaned_fault_reactivated = 0; 349unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */ 350unsigned int vm_pageout_cleaned_busy = 0; 351unsigned int vm_pageout_cleaned_nolock = 0; 352 353unsigned int vm_pageout_inactive_dirty_internal = 0; /* debugging */ 354unsigned int vm_pageout_inactive_dirty_external = 0; /* debugging */ 355unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */ 356unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */ 357unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */ 358unsigned int vm_pageout_purged_objects = 0; /* debugging */ 359unsigned int vm_stat_discard = 0; /* debugging */ 360unsigned int vm_stat_discard_sent = 0; /* debugging */ 361unsigned int vm_stat_discard_failure = 0; /* debugging */ 362unsigned int vm_stat_discard_throttle = 0; /* debugging */ 363unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */ 364unsigned int vm_pageout_catch_ups = 0; /* debugging */ 365unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */ 366 367unsigned int vm_pageout_scan_reclaimed_throttled = 0; 368unsigned int vm_pageout_scan_active_throttled = 0; 369unsigned int vm_pageout_scan_inactive_throttled_internal = 0; 370unsigned int vm_pageout_scan_inactive_throttled_external = 0; 371unsigned int vm_pageout_scan_throttle = 0; /* debugging */ 372unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */ 373unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */ 374unsigned int vm_pageout_scan_swap_throttle = 0; /* debugging */ 375unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */ 376unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */ 377unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */ 378unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */ 379unsigned int vm_page_speculative_count_drifts = 0; 380unsigned int vm_page_speculative_count_drift_max = 0; 381 382 383/* 384 * Backing store throttle when BS is exhausted 385 */ 386unsigned int vm_backing_store_low = 0; 387 388unsigned int vm_pageout_out_of_line = 0; 389unsigned int vm_pageout_in_place = 0; 390 391unsigned int vm_page_steal_pageout_page = 0; 392 393/* 394 * ENCRYPTED SWAP: 395 * counters and statistics... 396 */ 397unsigned long vm_page_decrypt_counter = 0; 398unsigned long vm_page_decrypt_for_upl_counter = 0; 399unsigned long vm_page_encrypt_counter = 0; 400unsigned long vm_page_encrypt_abort_counter = 0; 401unsigned long vm_page_encrypt_already_encrypted_counter = 0; 402boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */ 403 404struct vm_pageout_queue vm_pageout_queue_internal; 405struct vm_pageout_queue vm_pageout_queue_external; 406 407unsigned int vm_page_speculative_target = 0; 408 409vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL; 410 411boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL; 412 413#if DEVELOPMENT || DEBUG 414unsigned long vm_cs_validated_resets = 0; 415#endif 416 417int vm_debug_events = 0; 418 419#if CONFIG_MEMORYSTATUS 420#if !CONFIG_JETSAM 421extern boolean_t memorystatus_idle_exit_from_VM(void); 422#endif 423extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async); 424extern void memorystatus_on_pageout_scan_end(void); 425#endif 426 427boolean_t vm_page_compressions_failing = FALSE; 428 429/* 430 * Routine: vm_backing_store_disable 431 * Purpose: 432 * Suspend non-privileged threads wishing to extend 433 * backing store when we are low on backing store 434 * (Synchronized by caller) 435 */ 436void 437vm_backing_store_disable( 438 boolean_t disable) 439{ 440 if(disable) { 441 vm_backing_store_low = 1; 442 } else { 443 if(vm_backing_store_low) { 444 vm_backing_store_low = 0; 445 thread_wakeup((event_t) &vm_backing_store_low); 446 } 447 } 448} 449 450 451#if MACH_CLUSTER_STATS 452unsigned long vm_pageout_cluster_dirtied = 0; 453unsigned long vm_pageout_cluster_cleaned = 0; 454unsigned long vm_pageout_cluster_collisions = 0; 455unsigned long vm_pageout_cluster_clusters = 0; 456unsigned long vm_pageout_cluster_conversions = 0; 457unsigned long vm_pageout_target_collisions = 0; 458unsigned long vm_pageout_target_page_dirtied = 0; 459unsigned long vm_pageout_target_page_freed = 0; 460#define CLUSTER_STAT(clause) clause 461#else /* MACH_CLUSTER_STATS */ 462#define CLUSTER_STAT(clause) 463#endif /* MACH_CLUSTER_STATS */ 464 465/* 466 * Routine: vm_pageout_object_terminate 467 * Purpose: 468 * Destroy the pageout_object, and perform all of the 469 * required cleanup actions. 470 * 471 * In/Out conditions: 472 * The object must be locked, and will be returned locked. 473 */ 474void 475vm_pageout_object_terminate( 476 vm_object_t object) 477{ 478 vm_object_t shadow_object; 479 480 /* 481 * Deal with the deallocation (last reference) of a pageout object 482 * (used for cleaning-in-place) by dropping the paging references/ 483 * freeing pages in the original object. 484 */ 485 486 assert(object->pageout); 487 shadow_object = object->shadow; 488 vm_object_lock(shadow_object); 489 490 while (!queue_empty(&object->memq)) { 491 vm_page_t p, m; 492 vm_object_offset_t offset; 493 494 p = (vm_page_t) queue_first(&object->memq); 495 496 assert(p->private); 497 assert(p->pageout); 498 p->pageout = FALSE; 499 assert(!p->cleaning); 500 assert(!p->laundry); 501 502 offset = p->offset; 503 VM_PAGE_FREE(p); 504 p = VM_PAGE_NULL; 505 506 m = vm_page_lookup(shadow_object, 507 offset + object->vo_shadow_offset); 508 509 if(m == VM_PAGE_NULL) 510 continue; 511 512 assert((m->dirty) || (m->precious) || 513 (m->busy && m->cleaning)); 514 515 /* 516 * Handle the trusted pager throttle. 517 * Also decrement the burst throttle (if external). 518 */ 519 vm_page_lock_queues(); 520 if (m->pageout_queue) 521 vm_pageout_throttle_up(m); 522 523 /* 524 * Handle the "target" page(s). These pages are to be freed if 525 * successfully cleaned. Target pages are always busy, and are 526 * wired exactly once. The initial target pages are not mapped, 527 * (so cannot be referenced or modified) but converted target 528 * pages may have been modified between the selection as an 529 * adjacent page and conversion to a target. 530 */ 531 if (m->pageout) { 532 assert(m->busy); 533 assert(m->wire_count == 1); 534 m->cleaning = FALSE; 535 m->encrypted_cleaning = FALSE; 536 m->pageout = FALSE; 537#if MACH_CLUSTER_STATS 538 if (m->wanted) vm_pageout_target_collisions++; 539#endif 540 /* 541 * Revoke all access to the page. Since the object is 542 * locked, and the page is busy, this prevents the page 543 * from being dirtied after the pmap_disconnect() call 544 * returns. 545 * 546 * Since the page is left "dirty" but "not modifed", we 547 * can detect whether the page was redirtied during 548 * pageout by checking the modify state. 549 */ 550 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) { 551 SET_PAGE_DIRTY(m, FALSE); 552 } else { 553 m->dirty = FALSE; 554 } 555 556 if (m->dirty) { 557 CLUSTER_STAT(vm_pageout_target_page_dirtied++;) 558 vm_page_unwire(m, TRUE); /* reactivates */ 559 VM_STAT_INCR(reactivations); 560 PAGE_WAKEUP_DONE(m); 561 } else { 562 CLUSTER_STAT(vm_pageout_target_page_freed++;) 563 vm_page_free(m);/* clears busy, etc. */ 564 } 565 vm_page_unlock_queues(); 566 continue; 567 } 568 /* 569 * Handle the "adjacent" pages. These pages were cleaned in 570 * place, and should be left alone. 571 * If prep_pin_count is nonzero, then someone is using the 572 * page, so make it active. 573 */ 574 if (!m->active && !m->inactive && !m->throttled && !m->private) { 575 if (m->reference) 576 vm_page_activate(m); 577 else 578 vm_page_deactivate(m); 579 } 580 if (m->overwriting) { 581 /* 582 * the (COPY_OUT_FROM == FALSE) request_page_list case 583 */ 584 if (m->busy) { 585 /* 586 * We do not re-set m->dirty ! 587 * The page was busy so no extraneous activity 588 * could have occurred. COPY_INTO is a read into the 589 * new pages. CLEAN_IN_PLACE does actually write 590 * out the pages but handling outside of this code 591 * will take care of resetting dirty. We clear the 592 * modify however for the Programmed I/O case. 593 */ 594 pmap_clear_modify(m->phys_page); 595 596 m->busy = FALSE; 597 m->absent = FALSE; 598 } else { 599 /* 600 * alternate (COPY_OUT_FROM == FALSE) request_page_list case 601 * Occurs when the original page was wired 602 * at the time of the list request 603 */ 604 assert(VM_PAGE_WIRED(m)); 605 vm_page_unwire(m, TRUE); /* reactivates */ 606 } 607 m->overwriting = FALSE; 608 } else { 609 /* 610 * Set the dirty state according to whether or not the page was 611 * modified during the pageout. Note that we purposefully do 612 * NOT call pmap_clear_modify since the page is still mapped. 613 * If the page were to be dirtied between the 2 calls, this 614 * this fact would be lost. This code is only necessary to 615 * maintain statistics, since the pmap module is always 616 * consulted if m->dirty is false. 617 */ 618#if MACH_CLUSTER_STATS 619 m->dirty = pmap_is_modified(m->phys_page); 620 621 if (m->dirty) vm_pageout_cluster_dirtied++; 622 else vm_pageout_cluster_cleaned++; 623 if (m->wanted) vm_pageout_cluster_collisions++; 624#else 625 m->dirty = FALSE; 626#endif 627 } 628 if (m->encrypted_cleaning == TRUE) { 629 m->encrypted_cleaning = FALSE; 630 m->busy = FALSE; 631 } 632 m->cleaning = FALSE; 633 634 /* 635 * Wakeup any thread waiting for the page to be un-cleaning. 636 */ 637 PAGE_WAKEUP(m); 638 vm_page_unlock_queues(); 639 } 640 /* 641 * Account for the paging reference taken in vm_paging_object_allocate. 642 */ 643 vm_object_activity_end(shadow_object); 644 vm_object_unlock(shadow_object); 645 646 assert(object->ref_count == 0); 647 assert(object->paging_in_progress == 0); 648 assert(object->activity_in_progress == 0); 649 assert(object->resident_page_count == 0); 650 return; 651} 652 653/* 654 * Routine: vm_pageclean_setup 655 * 656 * Purpose: setup a page to be cleaned (made non-dirty), but not 657 * necessarily flushed from the VM page cache. 658 * This is accomplished by cleaning in place. 659 * 660 * The page must not be busy, and new_object 661 * must be locked. 662 * 663 */ 664void 665vm_pageclean_setup( 666 vm_page_t m, 667 vm_page_t new_m, 668 vm_object_t new_object, 669 vm_object_offset_t new_offset) 670{ 671 assert(!m->busy); 672#if 0 673 assert(!m->cleaning); 674#endif 675 676 XPR(XPR_VM_PAGEOUT, 677 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n", 678 m->object, m->offset, m, 679 new_m, new_offset); 680 681 pmap_clear_modify(m->phys_page); 682 683 /* 684 * Mark original page as cleaning in place. 685 */ 686 m->cleaning = TRUE; 687 SET_PAGE_DIRTY(m, FALSE); 688 m->precious = FALSE; 689 690 /* 691 * Convert the fictitious page to a private shadow of 692 * the real page. 693 */ 694 assert(new_m->fictitious); 695 assert(new_m->phys_page == vm_page_fictitious_addr); 696 new_m->fictitious = FALSE; 697 new_m->private = TRUE; 698 new_m->pageout = TRUE; 699 new_m->phys_page = m->phys_page; 700 701 vm_page_lockspin_queues(); 702 vm_page_wire(new_m); 703 vm_page_unlock_queues(); 704 705 vm_page_insert(new_m, new_object, new_offset); 706 assert(!new_m->wanted); 707 new_m->busy = FALSE; 708} 709 710/* 711 * Routine: vm_pageout_initialize_page 712 * Purpose: 713 * Causes the specified page to be initialized in 714 * the appropriate memory object. This routine is used to push 715 * pages into a copy-object when they are modified in the 716 * permanent object. 717 * 718 * The page is moved to a temporary object and paged out. 719 * 720 * In/out conditions: 721 * The page in question must not be on any pageout queues. 722 * The object to which it belongs must be locked. 723 * The page must be busy, but not hold a paging reference. 724 * 725 * Implementation: 726 * Move this page to a completely new object. 727 */ 728void 729vm_pageout_initialize_page( 730 vm_page_t m) 731{ 732 vm_object_t object; 733 vm_object_offset_t paging_offset; 734 memory_object_t pager; 735 736 XPR(XPR_VM_PAGEOUT, 737 "vm_pageout_initialize_page, page 0x%X\n", 738 m, 0, 0, 0, 0); 739 assert(m->busy); 740 741 /* 742 * Verify that we really want to clean this page 743 */ 744 assert(!m->absent); 745 assert(!m->error); 746 assert(m->dirty); 747 748 /* 749 * Create a paging reference to let us play with the object. 750 */ 751 object = m->object; 752 paging_offset = m->offset + object->paging_offset; 753 754 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) { 755 VM_PAGE_FREE(m); 756 panic("reservation without pageout?"); /* alan */ 757 vm_object_unlock(object); 758 759 return; 760 } 761 762 /* 763 * If there's no pager, then we can't clean the page. This should 764 * never happen since this should be a copy object and therefore not 765 * an external object, so the pager should always be there. 766 */ 767 768 pager = object->pager; 769 770 if (pager == MEMORY_OBJECT_NULL) { 771 VM_PAGE_FREE(m); 772 panic("missing pager for copy object"); 773 return; 774 } 775 776 /* 777 * set the page for future call to vm_fault_list_request 778 */ 779 pmap_clear_modify(m->phys_page); 780 SET_PAGE_DIRTY(m, FALSE); 781 m->pageout = TRUE; 782 783 /* 784 * keep the object from collapsing or terminating 785 */ 786 vm_object_paging_begin(object); 787 vm_object_unlock(object); 788 789 /* 790 * Write the data to its pager. 791 * Note that the data is passed by naming the new object, 792 * not a virtual address; the pager interface has been 793 * manipulated to use the "internal memory" data type. 794 * [The object reference from its allocation is donated 795 * to the eventual recipient.] 796 */ 797 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE); 798 799 vm_object_lock(object); 800 vm_object_paging_end(object); 801} 802 803#if MACH_CLUSTER_STATS 804#define MAXCLUSTERPAGES 16 805struct { 806 unsigned long pages_in_cluster; 807 unsigned long pages_at_higher_offsets; 808 unsigned long pages_at_lower_offsets; 809} cluster_stats[MAXCLUSTERPAGES]; 810#endif /* MACH_CLUSTER_STATS */ 811 812 813/* 814 * vm_pageout_cluster: 815 * 816 * Given a page, queue it to the appropriate I/O thread, 817 * which will page it out and attempt to clean adjacent pages 818 * in the same operation. 819 * 820 * The object and queues must be locked. We will take a 821 * paging reference to prevent deallocation or collapse when we 822 * release the object lock back at the call site. The I/O thread 823 * is responsible for consuming this reference 824 * 825 * The page must not be on any pageout queue. 826 */ 827 828void 829vm_pageout_cluster(vm_page_t m, boolean_t pageout) 830{ 831 vm_object_t object = m->object; 832 struct vm_pageout_queue *q; 833 834 835 XPR(XPR_VM_PAGEOUT, 836 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n", 837 object, m->offset, m, 0, 0); 838 839 VM_PAGE_CHECK(m); 840#if DEBUG 841 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 842#endif 843 vm_object_lock_assert_exclusive(object); 844 845 /* 846 * Only a certain kind of page is appreciated here. 847 */ 848 assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m))); 849 assert(!m->cleaning && !m->pageout && !m->laundry); 850#ifndef CONFIG_FREEZE 851 assert(!m->inactive && !m->active); 852 assert(!m->throttled); 853#endif 854 855 /* 856 * protect the object from collapse or termination 857 */ 858 vm_object_activity_begin(object); 859 860 m->pageout = pageout; 861 862 if (object->internal == TRUE) { 863 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) 864 m->busy = TRUE; 865 866 q = &vm_pageout_queue_internal; 867 } else 868 q = &vm_pageout_queue_external; 869 870 /* 871 * pgo_laundry count is tied to the laundry bit 872 */ 873 m->laundry = TRUE; 874 q->pgo_laundry++; 875 876 m->pageout_queue = TRUE; 877 queue_enter(&q->pgo_pending, m, vm_page_t, pageq); 878 879 if (q->pgo_idle == TRUE) { 880 q->pgo_idle = FALSE; 881 thread_wakeup((event_t) &q->pgo_pending); 882 } 883 VM_PAGE_CHECK(m); 884} 885 886 887unsigned long vm_pageout_throttle_up_count = 0; 888 889/* 890 * A page is back from laundry or we are stealing it back from 891 * the laundering state. See if there are some pages waiting to 892 * go to laundry and if we can let some of them go now. 893 * 894 * Object and page queues must be locked. 895 */ 896void 897vm_pageout_throttle_up( 898 vm_page_t m) 899{ 900 struct vm_pageout_queue *q; 901 902 assert(m->object != VM_OBJECT_NULL); 903 assert(m->object != kernel_object); 904 905#if DEBUG 906 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 907 vm_object_lock_assert_exclusive(m->object); 908#endif 909 910 vm_pageout_throttle_up_count++; 911 912 if (m->object->internal == TRUE) 913 q = &vm_pageout_queue_internal; 914 else 915 q = &vm_pageout_queue_external; 916 917 if (m->pageout_queue == TRUE) { 918 919 queue_remove(&q->pgo_pending, m, vm_page_t, pageq); 920 m->pageout_queue = FALSE; 921 922 m->pageq.next = NULL; 923 m->pageq.prev = NULL; 924 925 vm_object_activity_end(m->object); 926 } 927 if (m->laundry == TRUE) { 928 929 m->laundry = FALSE; 930 q->pgo_laundry--; 931 932 if (q->pgo_throttled == TRUE) { 933 q->pgo_throttled = FALSE; 934 thread_wakeup((event_t) &q->pgo_laundry); 935 } 936 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) { 937 q->pgo_draining = FALSE; 938 thread_wakeup((event_t) (&q->pgo_laundry+1)); 939 } 940 } 941} 942 943 944static void 945vm_pageout_throttle_up_batch( 946 struct vm_pageout_queue *q, 947 int batch_cnt) 948{ 949#if DEBUG 950 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 951#endif 952 953 vm_pageout_throttle_up_count += batch_cnt; 954 955 q->pgo_laundry -= batch_cnt; 956 957 if (q->pgo_throttled == TRUE) { 958 q->pgo_throttled = FALSE; 959 thread_wakeup((event_t) &q->pgo_laundry); 960 } 961 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) { 962 q->pgo_draining = FALSE; 963 thread_wakeup((event_t) (&q->pgo_laundry+1)); 964 } 965} 966 967 968 969/* 970 * VM memory pressure monitoring. 971 * 972 * vm_pageout_scan() keeps track of the number of pages it considers and 973 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now]. 974 * 975 * compute_memory_pressure() is called every second from compute_averages() 976 * and moves "vm_pageout_stat_now" forward, to start accumulating the number 977 * of recalimed pages in a new vm_pageout_stat[] bucket. 978 * 979 * mach_vm_pressure_monitor() collects past statistics about memory pressure. 980 * The caller provides the number of seconds ("nsecs") worth of statistics 981 * it wants, up to 30 seconds. 982 * It computes the number of pages reclaimed in the past "nsecs" seconds and 983 * also returns the number of pages the system still needs to reclaim at this 984 * moment in time. 985 */ 986#define VM_PAGEOUT_STAT_SIZE 31 987struct vm_pageout_stat { 988 unsigned int considered; 989 unsigned int reclaimed; 990} vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, }; 991unsigned int vm_pageout_stat_now = 0; 992unsigned int vm_memory_pressure = 0; 993 994#define VM_PAGEOUT_STAT_BEFORE(i) \ 995 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1) 996#define VM_PAGEOUT_STAT_AFTER(i) \ 997 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1) 998 999#if VM_PAGE_BUCKETS_CHECK 1000int vm_page_buckets_check_interval = 10; /* in seconds */ 1001#endif /* VM_PAGE_BUCKETS_CHECK */ 1002 1003/* 1004 * Called from compute_averages(). 1005 */ 1006void 1007compute_memory_pressure( 1008 __unused void *arg) 1009{ 1010 unsigned int vm_pageout_next; 1011 1012#if VM_PAGE_BUCKETS_CHECK 1013 /* check the consistency of VM page buckets at regular interval */ 1014 static int counter = 0; 1015 if ((++counter % vm_page_buckets_check_interval) == 0) { 1016 vm_page_buckets_check(); 1017 } 1018#endif /* VM_PAGE_BUCKETS_CHECK */ 1019 1020 vm_memory_pressure = 1021 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed; 1022 1023 commpage_set_memory_pressure( vm_memory_pressure ); 1024 1025 /* move "now" forward */ 1026 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now); 1027 vm_pageout_stats[vm_pageout_next].considered = 0; 1028 vm_pageout_stats[vm_pageout_next].reclaimed = 0; 1029 vm_pageout_stat_now = vm_pageout_next; 1030} 1031 1032 1033/* 1034 * IMPORTANT 1035 * mach_vm_ctl_page_free_wanted() is called indirectly, via 1036 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore, 1037 * it must be safe in the restricted stackshot context. Locks and/or 1038 * blocking are not allowable. 1039 */ 1040unsigned int 1041mach_vm_ctl_page_free_wanted(void) 1042{ 1043 unsigned int page_free_target, page_free_count, page_free_wanted; 1044 1045 page_free_target = vm_page_free_target; 1046 page_free_count = vm_page_free_count; 1047 if (page_free_target > page_free_count) { 1048 page_free_wanted = page_free_target - page_free_count; 1049 } else { 1050 page_free_wanted = 0; 1051 } 1052 1053 return page_free_wanted; 1054} 1055 1056 1057/* 1058 * IMPORTANT: 1059 * mach_vm_pressure_monitor() is called when taking a stackshot, with 1060 * wait_for_pressure FALSE, so that code path must remain safe in the 1061 * restricted stackshot context. No blocking or locks are allowable. 1062 * on that code path. 1063 */ 1064 1065kern_return_t 1066mach_vm_pressure_monitor( 1067 boolean_t wait_for_pressure, 1068 unsigned int nsecs_monitored, 1069 unsigned int *pages_reclaimed_p, 1070 unsigned int *pages_wanted_p) 1071{ 1072 wait_result_t wr; 1073 unsigned int vm_pageout_then, vm_pageout_now; 1074 unsigned int pages_reclaimed; 1075 1076 /* 1077 * We don't take the vm_page_queue_lock here because we don't want 1078 * vm_pressure_monitor() to get in the way of the vm_pageout_scan() 1079 * thread when it's trying to reclaim memory. We don't need fully 1080 * accurate monitoring anyway... 1081 */ 1082 1083 if (wait_for_pressure) { 1084 /* wait until there's memory pressure */ 1085 while (vm_page_free_count >= vm_page_free_target) { 1086 wr = assert_wait((event_t) &vm_page_free_wanted, 1087 THREAD_INTERRUPTIBLE); 1088 if (wr == THREAD_WAITING) { 1089 wr = thread_block(THREAD_CONTINUE_NULL); 1090 } 1091 if (wr == THREAD_INTERRUPTED) { 1092 return KERN_ABORTED; 1093 } 1094 if (wr == THREAD_AWAKENED) { 1095 /* 1096 * The memory pressure might have already 1097 * been relieved but let's not block again 1098 * and let's report that there was memory 1099 * pressure at some point. 1100 */ 1101 break; 1102 } 1103 } 1104 } 1105 1106 /* provide the number of pages the system wants to reclaim */ 1107 if (pages_wanted_p != NULL) { 1108 *pages_wanted_p = mach_vm_ctl_page_free_wanted(); 1109 } 1110 1111 if (pages_reclaimed_p == NULL) { 1112 return KERN_SUCCESS; 1113 } 1114 1115 /* provide number of pages reclaimed in the last "nsecs_monitored" */ 1116 do { 1117 vm_pageout_now = vm_pageout_stat_now; 1118 pages_reclaimed = 0; 1119 for (vm_pageout_then = 1120 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now); 1121 vm_pageout_then != vm_pageout_now && 1122 nsecs_monitored-- != 0; 1123 vm_pageout_then = 1124 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) { 1125 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed; 1126 } 1127 } while (vm_pageout_now != vm_pageout_stat_now); 1128 *pages_reclaimed_p = pages_reclaimed; 1129 1130 return KERN_SUCCESS; 1131} 1132 1133 1134 1135/* 1136 * function in BSD to apply I/O throttle to the pageout thread 1137 */ 1138extern void vm_pageout_io_throttle(void); 1139 1140 1141#if LATENCY_JETSAM 1142boolean_t jlp_init = FALSE; 1143uint64_t jlp_time = 0, jlp_current = 0; 1144struct vm_page jetsam_latency_page[NUM_OF_JETSAM_LATENCY_TOKENS]; 1145unsigned int latency_jetsam_wakeup = 0; 1146#endif /* LATENCY_JETSAM */ 1147 1148/* 1149 * Page States: Used below to maintain the page state 1150 * before it's removed from it's Q. This saved state 1151 * helps us do the right accounting in certain cases 1152 */ 1153#define PAGE_STATE_SPECULATIVE 1 1154#define PAGE_STATE_ANONYMOUS 2 1155#define PAGE_STATE_INACTIVE 3 1156#define PAGE_STATE_INACTIVE_FIRST 4 1157#define PAGE_STATE_CLEAN 5 1158 1159 1160#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \ 1161 MACRO_BEGIN \ 1162 /* \ 1163 * If a "reusable" page somehow made it back into \ 1164 * the active queue, it's been re-used and is not \ 1165 * quite re-usable. \ 1166 * If the VM object was "all_reusable", consider it \ 1167 * as "all re-used" instead of converting it to \ 1168 * "partially re-used", which could be expensive. \ 1169 */ \ 1170 if ((m)->reusable || \ 1171 (m)->object->all_reusable) { \ 1172 vm_object_reuse_pages((m)->object, \ 1173 (m)->offset, \ 1174 (m)->offset + PAGE_SIZE_64, \ 1175 FALSE); \ 1176 } \ 1177 MACRO_END 1178 1179 1180#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64 1181#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024 1182 1183#define FCS_IDLE 0 1184#define FCS_DELAYED 1 1185#define FCS_DEADLOCK_DETECTED 2 1186 1187struct flow_control { 1188 int state; 1189 mach_timespec_t ts; 1190}; 1191 1192uint32_t vm_pageout_considered_page = 0; 1193uint32_t vm_page_filecache_min = 0; 1194 1195#define VM_PAGE_FILECACHE_MIN 50000 1196#define ANONS_GRABBED_LIMIT 2 1197 1198/* 1199 * vm_pageout_scan does the dirty work for the pageout daemon. 1200 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock 1201 * held and vm_page_free_wanted == 0. 1202 */ 1203void 1204vm_pageout_scan(void) 1205{ 1206 unsigned int loop_count = 0; 1207 unsigned int inactive_burst_count = 0; 1208 unsigned int active_burst_count = 0; 1209 unsigned int reactivated_this_call; 1210 unsigned int reactivate_limit; 1211 vm_page_t local_freeq = NULL; 1212 int local_freed = 0; 1213 int delayed_unlock; 1214 int delayed_unlock_limit = 0; 1215 int refmod_state = 0; 1216 int vm_pageout_deadlock_target = 0; 1217 struct vm_pageout_queue *iq; 1218 struct vm_pageout_queue *eq; 1219 struct vm_speculative_age_q *sq; 1220 struct flow_control flow_control = { 0, { 0, 0 } }; 1221 boolean_t inactive_throttled = FALSE; 1222 boolean_t try_failed; 1223 mach_timespec_t ts; 1224 unsigned int msecs = 0; 1225 vm_object_t object; 1226 vm_object_t last_object_tried; 1227 uint32_t catch_up_count = 0; 1228 uint32_t inactive_reclaim_run; 1229 boolean_t forced_reclaim; 1230 boolean_t exceeded_burst_throttle; 1231 boolean_t grab_anonymous = FALSE; 1232 boolean_t force_anonymous = FALSE; 1233 int anons_grabbed = 0; 1234 int page_prev_state = 0; 1235 int cache_evict_throttle = 0; 1236 uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0; 1237 vm_pressure_level_t pressure_level; 1238 1239 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START, 1240 vm_pageout_speculative_clean, vm_pageout_inactive_clean, 1241 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); 1242 1243#if LATENCY_JETSAM 1244 if (jlp_init == FALSE) { 1245 int i=0; 1246 vm_page_t jlp; 1247 for(; i < NUM_OF_JETSAM_LATENCY_TOKENS; i++) { 1248 jlp = &jetsam_latency_page[i]; 1249 jlp->fictitious = TRUE; 1250 jlp->offset = 0; 1251 1252 } 1253 jlp = &jetsam_latency_page[0]; 1254 queue_enter(&vm_page_queue_active, jlp, vm_page_t, pageq); 1255 jlp->active = TRUE; 1256 1257 jlp->offset = mach_absolute_time(); 1258 jlp_time = jlp->offset; 1259 jlp_current++; 1260 jlp_init = TRUE; 1261 } 1262#endif /* LATENCY_JETSAM */ 1263 1264 flow_control.state = FCS_IDLE; 1265 iq = &vm_pageout_queue_internal; 1266 eq = &vm_pageout_queue_external; 1267 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; 1268 1269 1270 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0); 1271 1272 1273 vm_page_lock_queues(); 1274 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */ 1275 1276 /* 1277 * Calculate the max number of referenced pages on the inactive 1278 * queue that we will reactivate. 1279 */ 1280 reactivated_this_call = 0; 1281 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count + 1282 vm_page_inactive_count); 1283 inactive_reclaim_run = 0; 1284 1285 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count; 1286 1287 /* 1288 * We want to gradually dribble pages from the active queue 1289 * to the inactive queue. If we let the inactive queue get 1290 * very small, and then suddenly dump many pages into it, 1291 * those pages won't get a sufficient chance to be referenced 1292 * before we start taking them from the inactive queue. 1293 * 1294 * We must limit the rate at which we send pages to the pagers 1295 * so that we don't tie up too many pages in the I/O queues. 1296 * We implement a throttling mechanism using the laundry count 1297 * to limit the number of pages outstanding to the default 1298 * and external pagers. We can bypass the throttles and look 1299 * for clean pages if the pageout queues don't drain in a timely 1300 * fashion since this may indicate that the pageout paths are 1301 * stalled waiting for memory, which only we can provide. 1302 */ 1303 1304 1305Restart: 1306 assert(delayed_unlock!=0); 1307 1308 /* 1309 * Recalculate vm_page_inactivate_target. 1310 */ 1311 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + 1312 vm_page_inactive_count + 1313 vm_page_speculative_count); 1314 1315 vm_page_anonymous_min = vm_page_inactive_target / 20; 1316 1317 1318 /* 1319 * don't want to wake the pageout_scan thread up everytime we fall below 1320 * the targets... set a low water mark at 0.25% below the target 1321 */ 1322 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400); 1323 1324 if (vm_page_speculative_percentage > 50) 1325 vm_page_speculative_percentage = 50; 1326 else if (vm_page_speculative_percentage <= 0) 1327 vm_page_speculative_percentage = 1; 1328 1329 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count + 1330 vm_page_inactive_count); 1331 1332 object = NULL; 1333 last_object_tried = NULL; 1334 try_failed = FALSE; 1335 1336 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count)) 1337 catch_up_count = vm_page_inactive_count + vm_page_speculative_count; 1338 else 1339 catch_up_count = 0; 1340 1341 for (;;) { 1342 vm_page_t m; 1343 1344 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL); 1345 1346 if (delayed_unlock == 0) { 1347 vm_page_lock_queues(); 1348 delayed_unlock = 1; 1349 } 1350 if (vm_upl_wait_for_pages < 0) 1351 vm_upl_wait_for_pages = 0; 1352 1353 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages; 1354 1355 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX) 1356 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX; 1357 1358 /* 1359 * Move pages from active to inactive if we're below the target 1360 */ 1361 /* if we are trying to make clean, we need to make sure we actually have inactive - mj */ 1362 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target) 1363 goto done_moving_active_pages; 1364 1365 if (object != NULL) { 1366 vm_object_unlock(object); 1367 object = NULL; 1368 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1369 } 1370 /* 1371 * Don't sweep through active queue more than the throttle 1372 * which should be kept relatively low 1373 */ 1374 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count); 1375 1376 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START, 1377 vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed); 1378 1379 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE, 1380 vm_pageout_speculative_clean, vm_pageout_inactive_clean, 1381 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); 1382 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START); 1383 1384 1385 while (!queue_empty(&vm_page_queue_active) && active_burst_count--) { 1386 1387 vm_pageout_active++; 1388 1389 m = (vm_page_t) queue_first(&vm_page_queue_active); 1390 1391 assert(m->active && !m->inactive); 1392 assert(!m->laundry); 1393 assert(m->object != kernel_object); 1394 assert(m->phys_page != vm_page_guard_addr); 1395 1396 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); 1397 1398#if LATENCY_JETSAM 1399 if (m->fictitious) { 1400 const uint32_t FREE_TARGET_MULTIPLIER = 2; 1401 1402 uint64_t now = mach_absolute_time(); 1403 uint64_t delta = now - m->offset; 1404 clock_sec_t jl_secs = 0; 1405 clock_usec_t jl_usecs = 0; 1406 boolean_t issue_jetsam = FALSE; 1407 1408 absolutetime_to_microtime(delta, &jl_secs, &jl_usecs); 1409 jl_usecs += jl_secs * USEC_PER_SEC; 1410 1411 /* Jetsam only if the token hasn't aged sufficiently and the free count is close to the target (avoiding spurious triggers) */ 1412 if ((jl_usecs <= JETSAM_AGE_NOTIFY_CRITICAL) && (vm_page_free_count < (FREE_TARGET_MULTIPLIER * vm_page_free_target))) { 1413 issue_jetsam = TRUE; 1414 } 1415 1416 VM_DEBUG_EVENT(vm_pageout_page_token, VM_PAGEOUT_PAGE_TOKEN, DBG_FUNC_NONE, 1417 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, jl_usecs); 1418 1419 m->offset = 0; 1420 queue_remove(&vm_page_queue_active, m, vm_page_t, pageq); 1421 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); 1422 1423 m->offset = now; 1424 jlp_time = now; 1425 1426 if (issue_jetsam) { 1427 vm_page_unlock_queues(); 1428 1429 if (local_freeq) { 1430 vm_page_free_list(local_freeq, TRUE); 1431 local_freeq = NULL; 1432 local_freed = 0; 1433 } 1434 1435 VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START, 1436 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, 0); 1437 1438 assert_wait_timeout(&latency_jetsam_wakeup, THREAD_INTERRUPTIBLE, 10 /* msecs */, 1000*NSEC_PER_USEC); 1439 /* Kill the top process asynchronously */ 1440 memorystatus_kill_on_VM_page_shortage(TRUE); 1441 thread_block(THREAD_CONTINUE_NULL); 1442 1443 VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0); 1444 1445 vm_page_lock_queues(); 1446 } 1447 } else { 1448#endif /* LATENCY_JETSAM */ 1449 /* 1450 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise... 1451 * 1452 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being 1453 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the 1454 * new reference happens. If no futher references happen on the page after that remote TLB flushes 1455 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue 1456 * by pageout_scan, which is just fine since the last reference would have happened quite far 1457 * in the past (TLB caches don't hang around for very long), and of course could just as easily 1458 * have happened before we moved the page 1459 */ 1460 pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL); 1461 1462 /* 1463 * The page might be absent or busy, 1464 * but vm_page_deactivate can handle that. 1465 * FALSE indicates that we don't want a H/W clear reference 1466 */ 1467 vm_page_deactivate_internal(m, FALSE); 1468 1469 if (delayed_unlock++ > delayed_unlock_limit) { 1470 1471 if (local_freeq) { 1472 vm_page_unlock_queues(); 1473 1474 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1475 vm_page_free_count, local_freed, delayed_unlock_limit, 1); 1476 1477 vm_page_free_list(local_freeq, TRUE); 1478 1479 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1480 vm_page_free_count, 0, 0, 1); 1481 1482 local_freeq = NULL; 1483 local_freed = 0; 1484 vm_page_lock_queues(); 1485 } else 1486 lck_mtx_yield(&vm_page_queue_lock); 1487 1488 delayed_unlock = 1; 1489 1490 /* 1491 * continue the while loop processing 1492 * the active queue... need to hold 1493 * the page queues lock 1494 */ 1495 } 1496#if LATENCY_JETSAM 1497 } 1498#endif /* LATENCY_JETSAM */ 1499 } 1500 1501 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END, 1502 vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target); 1503 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END); 1504 1505 /********************************************************************** 1506 * above this point we're playing with the active queue 1507 * below this point we're playing with the throttling mechanisms 1508 * and the inactive queue 1509 **********************************************************************/ 1510 1511done_moving_active_pages: 1512 1513 if (vm_page_free_count + local_freed >= vm_page_free_target) { 1514 if (object != NULL) { 1515 vm_object_unlock(object); 1516 object = NULL; 1517 } 1518 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1519 1520 if (local_freeq) { 1521 vm_page_unlock_queues(); 1522 1523 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1524 vm_page_free_count, local_freed, delayed_unlock_limit, 2); 1525 1526 vm_page_free_list(local_freeq, TRUE); 1527 1528 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1529 vm_page_free_count, local_freed, 0, 2); 1530 1531 local_freeq = NULL; 1532 local_freed = 0; 1533 vm_page_lock_queues(); 1534 } 1535 /* 1536 * make sure the pageout I/O threads are running 1537 * throttled in case there are still requests 1538 * in the laundry... since we have met our targets 1539 * we don't need the laundry to be cleaned in a timely 1540 * fashion... so let's avoid interfering with foreground 1541 * activity 1542 */ 1543 vm_pageout_adjust_io_throttles(iq, eq, TRUE); 1544 1545 /* 1546 * recalculate vm_page_inactivate_target 1547 */ 1548 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + 1549 vm_page_inactive_count + 1550 vm_page_speculative_count); 1551 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && 1552 !queue_empty(&vm_page_queue_active)) { 1553 /* 1554 * inactive target still not met... keep going 1555 * until we get the queues balanced... 1556 */ 1557 continue; 1558 } 1559 lck_mtx_lock(&vm_page_queue_free_lock); 1560 1561 if ((vm_page_free_count >= vm_page_free_target) && 1562 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { 1563 /* 1564 * done - we have met our target *and* 1565 * there is no one waiting for a page. 1566 */ 1567return_from_scan: 1568 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); 1569 1570 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE, 1571 vm_pageout_inactive, vm_pageout_inactive_used, 0, 0); 1572 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END, 1573 vm_pageout_speculative_clean, vm_pageout_inactive_clean, 1574 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); 1575 1576 return; 1577 } 1578 lck_mtx_unlock(&vm_page_queue_free_lock); 1579 } 1580 1581 /* 1582 * Before anything, we check if we have any ripe volatile 1583 * objects around. If so, try to purge the first object. 1584 * If the purge fails, fall through to reclaim a page instead. 1585 * If the purge succeeds, go back to the top and reevalute 1586 * the new memory situation. 1587 */ 1588 pressure_level = memorystatus_vm_pressure_level; 1589 assert (available_for_purge>=0); 1590 1591 if (available_for_purge 1592 || pressure_level > kVMPressureNormal 1593 ) { 1594 int force_purge; 1595 1596 if (object != NULL) { 1597 vm_object_unlock(object); 1598 object = NULL; 1599 } 1600 1601 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0); 1602 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START); 1603 1604 force_purge = 0; /* no force-purging */ 1605 if (pressure_level >= kVMPressureCritical) { 1606 force_purge = memorystatus_purge_on_critical; 1607 } else if (pressure_level >= kVMPressureUrgent) { 1608 force_purge = memorystatus_purge_on_urgent; 1609 } else if (pressure_level >= kVMPressureWarning) { 1610 force_purge = memorystatus_purge_on_warning; 1611 } else { 1612 force_purge = 0; 1613 } 1614 if (vm_purgeable_object_purge_one(force_purge)) { 1615 1616 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0); 1617 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END); 1618 continue; 1619 } 1620 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1); 1621 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END); 1622 } 1623 if (queue_empty(&sq->age_q) && vm_page_speculative_count) { 1624 /* 1625 * try to pull pages from the aging bins... 1626 * see vm_page.h for an explanation of how 1627 * this mechanism works 1628 */ 1629 struct vm_speculative_age_q *aq; 1630 mach_timespec_t ts_fully_aged; 1631 boolean_t can_steal = FALSE; 1632 int num_scanned_queues; 1633 1634 aq = &vm_page_queue_speculative[speculative_steal_index]; 1635 1636 num_scanned_queues = 0; 1637 while (queue_empty(&aq->age_q) && 1638 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) { 1639 1640 speculative_steal_index++; 1641 1642 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) 1643 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q; 1644 1645 aq = &vm_page_queue_speculative[speculative_steal_index]; 1646 } 1647 1648 if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) { 1649 /* 1650 * XXX We've scanned all the speculative 1651 * queues but still haven't found one 1652 * that is not empty, even though 1653 * vm_page_speculative_count is not 0. 1654 * 1655 * report the anomaly... 1656 */ 1657 printf("vm_pageout_scan: " 1658 "all speculative queues empty " 1659 "but count=%d. Re-adjusting.\n", 1660 vm_page_speculative_count); 1661 if (vm_page_speculative_count > vm_page_speculative_count_drift_max) 1662 vm_page_speculative_count_drift_max = vm_page_speculative_count; 1663 vm_page_speculative_count_drifts++; 1664#if 6553678 1665 Debugger("vm_pageout_scan: no speculative pages"); 1666#endif 1667 /* readjust... */ 1668 vm_page_speculative_count = 0; 1669 /* ... and continue */ 1670 continue; 1671 } 1672 1673 if (vm_page_speculative_count > vm_page_speculative_target) 1674 can_steal = TRUE; 1675 else { 1676 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000; 1677 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000) 1678 * 1000 * NSEC_PER_USEC; 1679 1680 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts); 1681 1682 clock_sec_t sec; 1683 clock_nsec_t nsec; 1684 clock_get_system_nanotime(&sec, &nsec); 1685 ts.tv_sec = (unsigned int) sec; 1686 ts.tv_nsec = nsec; 1687 1688 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) 1689 can_steal = TRUE; 1690 } 1691 if (can_steal == TRUE) 1692 vm_page_speculate_ageit(aq); 1693 } 1694 if (queue_empty(&sq->age_q) && cache_evict_throttle == 0) { 1695 int pages_evicted; 1696 1697 if (object != NULL) { 1698 vm_object_unlock(object); 1699 object = NULL; 1700 } 1701 pages_evicted = vm_object_cache_evict(100, 10); 1702 1703 if (pages_evicted) { 1704 1705 vm_pageout_cache_evicted += pages_evicted; 1706 1707 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE, 1708 vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0); 1709 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE); 1710 1711 /* 1712 * we just freed up to 100 pages, 1713 * so go back to the top of the main loop 1714 * and re-evaulate the memory situation 1715 */ 1716 continue; 1717 } else 1718 cache_evict_throttle = 100; 1719 } 1720 if (cache_evict_throttle) 1721 cache_evict_throttle--; 1722 1723 1724 exceeded_burst_throttle = FALSE; 1725 /* 1726 * Sometimes we have to pause: 1727 * 1) No inactive pages - nothing to do. 1728 * 2) Loop control - no acceptable pages found on the inactive queue 1729 * within the last vm_pageout_burst_inactive_throttle iterations 1730 * 3) Flow control - default pageout queue is full 1731 */ 1732 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_anonymous) && queue_empty(&sq->age_q)) { 1733 vm_pageout_scan_empty_throttle++; 1734 msecs = vm_pageout_empty_wait; 1735 goto vm_pageout_scan_delay; 1736 1737 } else if (inactive_burst_count >= 1738 MIN(vm_pageout_burst_inactive_throttle, 1739 (vm_page_inactive_count + 1740 vm_page_speculative_count))) { 1741 vm_pageout_scan_burst_throttle++; 1742 msecs = vm_pageout_burst_wait; 1743 1744 exceeded_burst_throttle = TRUE; 1745 goto vm_pageout_scan_delay; 1746 1747 } else if (vm_page_free_count > (vm_page_free_reserved / 4) && 1748 VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) { 1749 vm_pageout_scan_swap_throttle++; 1750 msecs = vm_pageout_swap_wait; 1751 goto vm_pageout_scan_delay; 1752 1753 } else if (VM_PAGE_Q_THROTTLED(iq) && 1754 VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { 1755 clock_sec_t sec; 1756 clock_nsec_t nsec; 1757 1758 switch (flow_control.state) { 1759 1760 case FCS_IDLE: 1761 if ((vm_page_free_count + local_freed) < vm_page_free_target) { 1762 1763 if (vm_page_pageable_external_count > vm_page_filecache_min && !queue_empty(&vm_page_queue_inactive)) { 1764 anons_grabbed = ANONS_GRABBED_LIMIT; 1765 goto consider_inactive; 1766 } 1767 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count) 1768 continue; 1769 } 1770reset_deadlock_timer: 1771 ts.tv_sec = vm_pageout_deadlock_wait / 1000; 1772 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC; 1773 clock_get_system_nanotime(&sec, &nsec); 1774 flow_control.ts.tv_sec = (unsigned int) sec; 1775 flow_control.ts.tv_nsec = nsec; 1776 ADD_MACH_TIMESPEC(&flow_control.ts, &ts); 1777 1778 flow_control.state = FCS_DELAYED; 1779 msecs = vm_pageout_deadlock_wait; 1780 1781 break; 1782 1783 case FCS_DELAYED: 1784 clock_get_system_nanotime(&sec, &nsec); 1785 ts.tv_sec = (unsigned int) sec; 1786 ts.tv_nsec = nsec; 1787 1788 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) { 1789 /* 1790 * the pageout thread for the default pager is potentially 1791 * deadlocked since the 1792 * default pager queue has been throttled for more than the 1793 * allowable time... we need to move some clean pages or dirty 1794 * pages belonging to the external pagers if they aren't throttled 1795 * vm_page_free_wanted represents the number of threads currently 1796 * blocked waiting for pages... we'll move one page for each of 1797 * these plus a fixed amount to break the logjam... once we're done 1798 * moving this number of pages, we'll re-enter the FSC_DELAYED state 1799 * with a new timeout target since we have no way of knowing 1800 * whether we've broken the deadlock except through observation 1801 * of the queue associated with the default pager... we need to 1802 * stop moving pages and allow the system to run to see what 1803 * state it settles into. 1804 */ 1805 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged; 1806 vm_pageout_scan_deadlock_detected++; 1807 flow_control.state = FCS_DEADLOCK_DETECTED; 1808 thread_wakeup((event_t) &vm_pageout_garbage_collect); 1809 goto consider_inactive; 1810 } 1811 /* 1812 * just resniff instead of trying 1813 * to compute a new delay time... we're going to be 1814 * awakened immediately upon a laundry completion, 1815 * so we won't wait any longer than necessary 1816 */ 1817 msecs = vm_pageout_idle_wait; 1818 break; 1819 1820 case FCS_DEADLOCK_DETECTED: 1821 if (vm_pageout_deadlock_target) 1822 goto consider_inactive; 1823 goto reset_deadlock_timer; 1824 1825 } 1826vm_pageout_scan_delay: 1827 if (object != NULL) { 1828 vm_object_unlock(object); 1829 object = NULL; 1830 } 1831 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1832 1833 if (local_freeq) { 1834 vm_page_unlock_queues(); 1835 1836 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1837 vm_page_free_count, local_freed, delayed_unlock_limit, 3); 1838 1839 vm_page_free_list(local_freeq, TRUE); 1840 1841 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1842 vm_page_free_count, local_freed, 0, 3); 1843 1844 local_freeq = NULL; 1845 local_freed = 0; 1846 vm_page_lock_queues(); 1847 1848 if (flow_control.state == FCS_DELAYED && 1849 !VM_PAGE_Q_THROTTLED(iq)) { 1850 flow_control.state = FCS_IDLE; 1851 goto consider_inactive; 1852 } 1853 } 1854 1855 if (vm_page_free_count >= vm_page_free_target) { 1856 /* 1857 * we're here because 1858 * 1) someone else freed up some pages while we had 1859 * the queues unlocked above 1860 * and we've hit one of the 3 conditions that 1861 * cause us to pause the pageout scan thread 1862 * 1863 * since we already have enough free pages, 1864 * let's avoid stalling and return normally 1865 * 1866 * before we return, make sure the pageout I/O threads 1867 * are running throttled in case there are still requests 1868 * in the laundry... since we have enough free pages 1869 * we don't need the laundry to be cleaned in a timely 1870 * fashion... so let's avoid interfering with foreground 1871 * activity 1872 * 1873 * we don't want to hold vm_page_queue_free_lock when 1874 * calling vm_pageout_adjust_io_throttles (since it 1875 * may cause other locks to be taken), we do the intitial 1876 * check outside of the lock. Once we take the lock, 1877 * we recheck the condition since it may have changed. 1878 * if it has, no problem, we will make the threads 1879 * non-throttled before actually blocking 1880 */ 1881 vm_pageout_adjust_io_throttles(iq, eq, TRUE); 1882 } 1883 lck_mtx_lock(&vm_page_queue_free_lock); 1884 1885 if (vm_page_free_count >= vm_page_free_target && 1886 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { 1887 goto return_from_scan; 1888 } 1889 lck_mtx_unlock(&vm_page_queue_free_lock); 1890 1891 if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) { 1892 /* 1893 * we're most likely about to block due to one of 1894 * the 3 conditions that cause vm_pageout_scan to 1895 * not be able to make forward progress w/r 1896 * to providing new pages to the free queue, 1897 * so unthrottle the I/O threads in case we 1898 * have laundry to be cleaned... it needs 1899 * to be completed ASAP. 1900 * 1901 * even if we don't block, we want the io threads 1902 * running unthrottled since the sum of free + 1903 * clean pages is still under our free target 1904 */ 1905 vm_pageout_adjust_io_throttles(iq, eq, FALSE); 1906 } 1907 if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) { 1908 /* 1909 * if we get here we're below our free target and 1910 * we're stalling due to a full laundry queue or 1911 * we don't have any inactive pages other then 1912 * those in the clean queue... 1913 * however, we have pages on the clean queue that 1914 * can be moved to the free queue, so let's not 1915 * stall the pageout scan 1916 */ 1917 flow_control.state = FCS_IDLE; 1918 goto consider_inactive; 1919 } 1920 VM_CHECK_MEMORYSTATUS; 1921 1922 if (flow_control.state != FCS_IDLE) 1923 vm_pageout_scan_throttle++; 1924 iq->pgo_throttled = TRUE; 1925 1926 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) 1927 vm_consider_waking_compactor_swapper(); 1928 1929 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC); 1930 counter(c_vm_pageout_scan_block++); 1931 1932 vm_page_unlock_queues(); 1933 1934 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); 1935 1936 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START, 1937 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0); 1938 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START); 1939 1940 thread_block(THREAD_CONTINUE_NULL); 1941 1942 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END, 1943 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0); 1944 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END); 1945 1946 vm_page_lock_queues(); 1947 delayed_unlock = 1; 1948 1949 iq->pgo_throttled = FALSE; 1950 1951 if (loop_count >= vm_page_inactive_count) 1952 loop_count = 0; 1953 inactive_burst_count = 0; 1954 1955 goto Restart; 1956 /*NOTREACHED*/ 1957 } 1958 1959 1960 flow_control.state = FCS_IDLE; 1961consider_inactive: 1962 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count), 1963 vm_pageout_inactive_external_forced_reactivate_limit); 1964 loop_count++; 1965 inactive_burst_count++; 1966 vm_pageout_inactive++; 1967 1968 1969 /* 1970 * Choose a victim. 1971 */ 1972 while (1) { 1973 m = NULL; 1974 1975 if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { 1976 assert(vm_page_throttled_count == 0); 1977 assert(queue_empty(&vm_page_queue_throttled)); 1978 } 1979 /* 1980 * The most eligible pages are ones we paged in speculatively, 1981 * but which have not yet been touched. 1982 */ 1983 if (!queue_empty(&sq->age_q) ) { 1984 m = (vm_page_t) queue_first(&sq->age_q); 1985 1986 page_prev_state = PAGE_STATE_SPECULATIVE; 1987 1988 break; 1989 } 1990 /* 1991 * Try a clean-queue inactive page. 1992 */ 1993 if (!queue_empty(&vm_page_queue_cleaned)) { 1994 m = (vm_page_t) queue_first(&vm_page_queue_cleaned); 1995 1996 page_prev_state = PAGE_STATE_CLEAN; 1997 1998 break; 1999 } 2000 2001 grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min); 2002 2003 if (vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) { 2004 grab_anonymous = TRUE; 2005 anons_grabbed = 0; 2006 } 2007 2008 if (grab_anonymous == TRUE && vm_compression_available() == FALSE) 2009 grab_anonymous = FALSE; 2010 2011 if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous)) { 2012 2013 if ( !queue_empty(&vm_page_queue_inactive) ) { 2014 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 2015 2016 page_prev_state = PAGE_STATE_INACTIVE; 2017 anons_grabbed = 0; 2018 2019 break; 2020 } 2021 } 2022 if ( !queue_empty(&vm_page_queue_anonymous) ) { 2023 m = (vm_page_t) queue_first(&vm_page_queue_anonymous); 2024 2025 page_prev_state = PAGE_STATE_ANONYMOUS; 2026 anons_grabbed++; 2027 2028 break; 2029 } 2030 2031 /* 2032 * if we've gotten here, we have no victim page. 2033 * if making clean, free the local freed list and return. 2034 * if making free, check to see if we've finished balancing the queues 2035 * yet, if we haven't just continue, else panic 2036 */ 2037 vm_page_unlock_queues(); 2038 2039 if (object != NULL) { 2040 vm_object_unlock(object); 2041 object = NULL; 2042 } 2043 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 2044 2045 if (local_freeq) { 2046 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 2047 vm_page_free_count, local_freed, delayed_unlock_limit, 5); 2048 2049 vm_page_free_list(local_freeq, TRUE); 2050 2051 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 2052 vm_page_free_count, local_freed, 0, 5); 2053 2054 local_freeq = NULL; 2055 local_freed = 0; 2056 } 2057 vm_page_lock_queues(); 2058 delayed_unlock = 1; 2059 2060 if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) 2061 goto Restart; 2062 2063 panic("vm_pageout: no victim"); 2064 2065 /* NOTREACHED */ 2066 } 2067 force_anonymous = FALSE; 2068 2069 /* 2070 * we just found this page on one of our queues... 2071 * it can't also be on the pageout queue, so safe 2072 * to call VM_PAGE_QUEUES_REMOVE 2073 */ 2074 assert(!m->pageout_queue); 2075 2076 VM_PAGE_QUEUES_REMOVE(m); 2077 2078 assert(!m->laundry); 2079 assert(!m->private); 2080 assert(!m->fictitious); 2081 assert(m->object != kernel_object); 2082 assert(m->phys_page != vm_page_guard_addr); 2083 2084 2085 if (page_prev_state != PAGE_STATE_SPECULATIVE) 2086 vm_pageout_stats[vm_pageout_stat_now].considered++; 2087 2088 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); 2089 2090 /* 2091 * check to see if we currently are working 2092 * with the same object... if so, we've 2093 * already got the lock 2094 */ 2095 if (m->object != object) { 2096 /* 2097 * the object associated with candidate page is 2098 * different from the one we were just working 2099 * with... dump the lock if we still own it 2100 */ 2101 if (object != NULL) { 2102 vm_object_unlock(object); 2103 object = NULL; 2104 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 2105 } 2106 /* 2107 * Try to lock object; since we've alread got the 2108 * page queues lock, we can only 'try' for this one. 2109 * if the 'try' fails, we need to do a mutex_pause 2110 * to allow the owner of the object lock a chance to 2111 * run... otherwise, we're likely to trip over this 2112 * object in the same state as we work our way through 2113 * the queue... clumps of pages associated with the same 2114 * object are fairly typical on the inactive and active queues 2115 */ 2116 if (!vm_object_lock_try_scan(m->object)) { 2117 vm_page_t m_want = NULL; 2118 2119 vm_pageout_inactive_nolock++; 2120 2121 if (page_prev_state == PAGE_STATE_CLEAN) 2122 vm_pageout_cleaned_nolock++; 2123 2124 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2125 page_prev_state = PAGE_STATE_INACTIVE_FIRST; 2126 2127 pmap_clear_reference(m->phys_page); 2128 m->reference = FALSE; 2129 2130 /* 2131 * m->object must be stable since we hold the page queues lock... 2132 * we can update the scan_collisions field sans the object lock 2133 * since it is a separate field and this is the only spot that does 2134 * a read-modify-write operation and it is never executed concurrently... 2135 * we can asynchronously set this field to 0 when creating a UPL, so it 2136 * is possible for the value to be a bit non-determistic, but that's ok 2137 * since it's only used as a hint 2138 */ 2139 m->object->scan_collisions++; 2140 2141 if ( !queue_empty(&sq->age_q) ) 2142 m_want = (vm_page_t) queue_first(&sq->age_q); 2143 else if ( !queue_empty(&vm_page_queue_cleaned)) 2144 m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned); 2145 else if (anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous)) 2146 m_want = (vm_page_t) queue_first(&vm_page_queue_inactive); 2147 else if ( !queue_empty(&vm_page_queue_anonymous)) 2148 m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous); 2149 2150 /* 2151 * this is the next object we're going to be interested in 2152 * try to make sure its available after the mutex_yield 2153 * returns control 2154 */ 2155 if (m_want) 2156 vm_pageout_scan_wants_object = m_want->object; 2157 2158 /* 2159 * force us to dump any collected free pages 2160 * and to pause before moving on 2161 */ 2162 try_failed = TRUE; 2163 2164 goto requeue_page; 2165 } 2166 object = m->object; 2167 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 2168 2169 try_failed = FALSE; 2170 } 2171 if (catch_up_count) 2172 catch_up_count--; 2173 2174 if (m->busy) { 2175 if (m->encrypted_cleaning) { 2176 /* 2177 * ENCRYPTED SWAP: 2178 * if this page has already been picked up as 2179 * part of a page-out cluster, it will be busy 2180 * because it is being encrypted (see 2181 * vm_object_upl_request()). But we still 2182 * want to demote it from "clean-in-place" 2183 * (aka "adjacent") to "clean-and-free" (aka 2184 * "target"), so let's ignore its "busy" bit 2185 * here and proceed to check for "cleaning" a 2186 * little bit below... 2187 * 2188 * CAUTION CAUTION: 2189 * A "busy" page should still be left alone for 2190 * most purposes, so we have to be very careful 2191 * not to process that page too much. 2192 */ 2193 assert(m->cleaning); 2194 goto consider_inactive_page; 2195 } 2196 2197 /* 2198 * Somebody is already playing with this page. 2199 * Put it back on the appropriate queue 2200 * 2201 */ 2202 vm_pageout_inactive_busy++; 2203 2204 if (page_prev_state == PAGE_STATE_CLEAN) 2205 vm_pageout_cleaned_busy++; 2206 2207requeue_page: 2208 switch (page_prev_state) { 2209 2210 case PAGE_STATE_SPECULATIVE: 2211 vm_page_speculate(m, FALSE); 2212 break; 2213 2214 case PAGE_STATE_ANONYMOUS: 2215 case PAGE_STATE_CLEAN: 2216 case PAGE_STATE_INACTIVE: 2217 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE); 2218 break; 2219 2220 case PAGE_STATE_INACTIVE_FIRST: 2221 VM_PAGE_ENQUEUE_INACTIVE(m, TRUE); 2222 break; 2223 } 2224 goto done_with_inactivepage; 2225 } 2226 2227 2228 /* 2229 * If it's absent, in error or the object is no longer alive, 2230 * we can reclaim the page... in the no longer alive case, 2231 * there are 2 states the page can be in that preclude us 2232 * from reclaiming it - busy or cleaning - that we've already 2233 * dealt with 2234 */ 2235 if (m->absent || m->error || !object->alive) { 2236 2237 if (m->absent) 2238 vm_pageout_inactive_absent++; 2239 else if (!object->alive) 2240 vm_pageout_inactive_notalive++; 2241 else 2242 vm_pageout_inactive_error++; 2243reclaim_page: 2244 if (vm_pageout_deadlock_target) { 2245 vm_pageout_scan_inactive_throttle_success++; 2246 vm_pageout_deadlock_target--; 2247 } 2248 2249 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL); 2250 2251 if (object->internal) { 2252 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL); 2253 } else { 2254 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL); 2255 } 2256 assert(!m->cleaning); 2257 assert(!m->laundry); 2258 2259 m->busy = TRUE; 2260 2261 /* 2262 * remove page from object here since we're already 2263 * behind the object lock... defer the rest of the work 2264 * we'd normally do in vm_page_free_prepare_object 2265 * until 'vm_page_free_list' is called 2266 */ 2267 if (m->tabled) 2268 vm_page_remove(m, TRUE); 2269 2270 assert(m->pageq.next == NULL && 2271 m->pageq.prev == NULL); 2272 m->pageq.next = (queue_entry_t)local_freeq; 2273 local_freeq = m; 2274 local_freed++; 2275 2276 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2277 vm_pageout_freed_from_speculative++; 2278 else if (page_prev_state == PAGE_STATE_CLEAN) 2279 vm_pageout_freed_from_cleaned++; 2280 else 2281 vm_pageout_freed_from_inactive_clean++; 2282 2283 if (page_prev_state != PAGE_STATE_SPECULATIVE) 2284 vm_pageout_stats[vm_pageout_stat_now].reclaimed++; 2285 2286 goto done_with_inactivepage; 2287 } 2288 /* 2289 * If the object is empty, the page must be reclaimed even 2290 * if dirty or used. 2291 * If the page belongs to a volatile object, we stick it back 2292 * on. 2293 */ 2294 if (object->copy == VM_OBJECT_NULL) { 2295 if (object->purgable == VM_PURGABLE_EMPTY) { 2296 if (m->pmapped == TRUE) { 2297 /* unmap the page */ 2298 refmod_state = pmap_disconnect(m->phys_page); 2299 if (refmod_state & VM_MEM_MODIFIED) { 2300 SET_PAGE_DIRTY(m, FALSE); 2301 } 2302 } 2303 if (m->dirty || m->precious) { 2304 /* we saved the cost of cleaning this page ! */ 2305 vm_page_purged_count++; 2306 } 2307 goto reclaim_page; 2308 } 2309 2310 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 2311 /* 2312 * With the VM compressor, the cost of 2313 * reclaiming a page is much lower (no I/O), 2314 * so if we find a "volatile" page, it's better 2315 * to let it get compressed rather than letting 2316 * it occupy a full page until it gets purged. 2317 * So no need to check for "volatile" here. 2318 */ 2319 } else if (object->purgable == VM_PURGABLE_VOLATILE) { 2320 /* 2321 * Avoid cleaning a "volatile" page which might 2322 * be purged soon. 2323 */ 2324 2325 /* if it's wired, we can't put it on our queue */ 2326 assert(!VM_PAGE_WIRED(m)); 2327 2328 /* just stick it back on! */ 2329 reactivated_this_call++; 2330 2331 if (page_prev_state == PAGE_STATE_CLEAN) 2332 vm_pageout_cleaned_volatile_reactivated++; 2333 2334 goto reactivate_page; 2335 } 2336 } 2337 2338consider_inactive_page: 2339 if (m->busy) { 2340 /* 2341 * CAUTION CAUTION: 2342 * A "busy" page should always be left alone, except... 2343 */ 2344 if (m->cleaning && m->encrypted_cleaning) { 2345 /* 2346 * ENCRYPTED_SWAP: 2347 * We could get here with a "busy" page 2348 * if it's being encrypted during a 2349 * "clean-in-place" operation. We'll deal 2350 * with it right away by testing if it has been 2351 * referenced and either reactivating it or 2352 * promoting it from "clean-in-place" to 2353 * "clean-and-free". 2354 */ 2355 } else { 2356 panic("\"busy\" page considered for pageout\n"); 2357 } 2358 } 2359 2360 /* 2361 * If it's being used, reactivate. 2362 * (Fictitious pages are either busy or absent.) 2363 * First, update the reference and dirty bits 2364 * to make sure the page is unreferenced. 2365 */ 2366 refmod_state = -1; 2367 2368 if (m->reference == FALSE && m->pmapped == TRUE) { 2369 refmod_state = pmap_get_refmod(m->phys_page); 2370 2371 if (refmod_state & VM_MEM_REFERENCED) 2372 m->reference = TRUE; 2373 if (refmod_state & VM_MEM_MODIFIED) { 2374 SET_PAGE_DIRTY(m, FALSE); 2375 } 2376 } 2377 2378 /* 2379 * if (m->cleaning && !m->pageout) 2380 * If already cleaning this page in place and it hasn't 2381 * been recently referenced, just pull off the queue. 2382 * We can leave the page mapped, and upl_commit_range 2383 * will put it on the clean queue. 2384 * 2385 * note: if m->encrypted_cleaning == TRUE, then 2386 * m->cleaning == TRUE 2387 * and we'll handle it here 2388 * 2389 * if (m->pageout && !m->cleaning) 2390 * an msync INVALIDATE is in progress... 2391 * this page has been marked for destruction 2392 * after it has been cleaned, 2393 * but not yet gathered into a UPL 2394 * where 'cleaning' will be set... 2395 * just leave it off the paging queues 2396 * 2397 * if (m->pageout && m->clenaing) 2398 * an msync INVALIDATE is in progress 2399 * and the UPL has already gathered this page... 2400 * just leave it off the paging queues 2401 */ 2402 2403 /* 2404 * page with m->pageout and still on the queues means that an 2405 * MS_INVALIDATE is in progress on this page... leave it alone 2406 */ 2407 if (m->pageout) { 2408 goto done_with_inactivepage; 2409 } 2410 2411 /* if cleaning, reactivate if referenced. otherwise, just pull off queue */ 2412 if (m->cleaning) { 2413 if (m->reference == TRUE) { 2414 reactivated_this_call++; 2415 goto reactivate_page; 2416 } else { 2417 goto done_with_inactivepage; 2418 } 2419 } 2420 2421 if (m->reference || m->dirty) { 2422 /* deal with a rogue "reusable" page */ 2423 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m); 2424 } 2425 2426 if (m->reference && !m->no_cache) { 2427 /* 2428 * The page we pulled off the inactive list has 2429 * been referenced. It is possible for other 2430 * processors to be touching pages faster than we 2431 * can clear the referenced bit and traverse the 2432 * inactive queue, so we limit the number of 2433 * reactivations. 2434 */ 2435 if (++reactivated_this_call >= reactivate_limit) { 2436 vm_pageout_reactivation_limit_exceeded++; 2437 } else if (catch_up_count) { 2438 vm_pageout_catch_ups++; 2439 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) { 2440 vm_pageout_inactive_force_reclaim++; 2441 } else { 2442 uint32_t isinuse; 2443 2444 if (page_prev_state == PAGE_STATE_CLEAN) 2445 vm_pageout_cleaned_reference_reactivated++; 2446 2447reactivate_page: 2448 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL && 2449 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) { 2450 /* 2451 * no explict mappings of this object exist 2452 * and it's not open via the filesystem 2453 */ 2454 vm_page_deactivate(m); 2455 vm_pageout_inactive_deactivated++; 2456 } else { 2457 /* 2458 * The page was/is being used, so put back on active list. 2459 */ 2460 vm_page_activate(m); 2461 VM_STAT_INCR(reactivations); 2462 } 2463 2464 if (page_prev_state == PAGE_STATE_CLEAN) 2465 vm_pageout_cleaned_reactivated++; 2466 2467 vm_pageout_inactive_used++; 2468 2469 goto done_with_inactivepage; 2470 } 2471 /* 2472 * Make sure we call pmap_get_refmod() if it 2473 * wasn't already called just above, to update 2474 * the dirty bit. 2475 */ 2476 if ((refmod_state == -1) && !m->dirty && m->pmapped) { 2477 refmod_state = pmap_get_refmod(m->phys_page); 2478 if (refmod_state & VM_MEM_MODIFIED) { 2479 SET_PAGE_DIRTY(m, FALSE); 2480 } 2481 } 2482 forced_reclaim = TRUE; 2483 } else { 2484 forced_reclaim = FALSE; 2485 } 2486 2487 XPR(XPR_VM_PAGEOUT, 2488 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n", 2489 object, m->offset, m, 0,0); 2490 2491 /* 2492 * we've got a candidate page to steal... 2493 * 2494 * m->dirty is up to date courtesy of the 2495 * preceding check for m->reference... if 2496 * we get here, then m->reference had to be 2497 * FALSE (or possibly "reactivate_limit" was 2498 * exceeded), but in either case we called 2499 * pmap_get_refmod() and updated both 2500 * m->reference and m->dirty 2501 * 2502 * if it's dirty or precious we need to 2503 * see if the target queue is throtttled 2504 * it if is, we need to skip over it by moving it back 2505 * to the end of the inactive queue 2506 */ 2507 2508 inactive_throttled = FALSE; 2509 2510 if (m->dirty || m->precious) { 2511 if (object->internal) { 2512 if (VM_PAGE_Q_THROTTLED(iq)) 2513 inactive_throttled = TRUE; 2514 } else if (VM_PAGE_Q_THROTTLED(eq)) { 2515 inactive_throttled = TRUE; 2516 } 2517 } 2518throttle_inactive: 2519 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && 2520 object->internal && m->dirty && 2521 (object->purgable == VM_PURGABLE_DENY || 2522 object->purgable == VM_PURGABLE_NONVOLATILE || 2523 object->purgable == VM_PURGABLE_VOLATILE)) { 2524 queue_enter(&vm_page_queue_throttled, m, 2525 vm_page_t, pageq); 2526 m->throttled = TRUE; 2527 vm_page_throttled_count++; 2528 2529 vm_pageout_scan_reclaimed_throttled++; 2530 2531 goto done_with_inactivepage; 2532 } 2533 if (inactive_throttled == TRUE) { 2534 2535 if (object->internal == FALSE) { 2536 /* 2537 * we need to break up the following potential deadlock case... 2538 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written. 2539 * b) The thread doing the writing is waiting for pages while holding the truncate lock 2540 * c) Most of the pages in the inactive queue belong to this file. 2541 * 2542 * we are potentially in this deadlock because... 2543 * a) the external pageout queue is throttled 2544 * b) we're done with the active queue and moved on to the inactive queue 2545 * c) we've got a dirty external page 2546 * 2547 * since we don't know the reason for the external pageout queue being throttled we 2548 * must suspect that we are deadlocked, so move the current page onto the active queue 2549 * in an effort to cause a page from the active queue to 'age' to the inactive queue 2550 * 2551 * if we don't have jetsam configured (i.e. we have a dynamic pager), set 2552 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous 2553 * pool the next time we select a victim page... if we can make enough new free pages, 2554 * the deadlock will break, the external pageout queue will empty and it will no longer 2555 * be throttled 2556 * 2557 * if we have jestam configured, keep a count of the pages reactivated this way so 2558 * that we can try to find clean pages in the active/inactive queues before 2559 * deciding to jetsam a process 2560 */ 2561 vm_pageout_scan_inactive_throttled_external++; 2562 2563 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); 2564 m->active = TRUE; 2565 vm_page_active_count++; 2566 if (m->object->internal) { 2567 vm_page_pageable_internal_count++; 2568 } else { 2569 vm_page_pageable_external_count++; 2570 } 2571 2572 vm_pageout_adjust_io_throttles(iq, eq, FALSE); 2573 2574#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM 2575 vm_pageout_inactive_external_forced_reactivate_limit--; 2576 2577 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) { 2578 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count; 2579 /* 2580 * Possible deadlock scenario so request jetsam action 2581 */ 2582 assert(object); 2583 vm_object_unlock(object); 2584 object = VM_OBJECT_NULL; 2585 vm_page_unlock_queues(); 2586 2587 VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START, 2588 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count); 2589 2590 /* Kill first suitable process */ 2591 if (memorystatus_kill_on_VM_page_shortage(FALSE) == FALSE) { 2592 panic("vm_pageout_scan: Jetsam request failed\n"); 2593 } 2594 2595 VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0); 2596 2597 vm_pageout_inactive_external_forced_jetsam_count++; 2598 vm_page_lock_queues(); 2599 delayed_unlock = 1; 2600 } 2601#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ 2602 force_anonymous = TRUE; 2603#endif 2604 goto done_with_inactivepage; 2605 } else { 2606 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2607 page_prev_state = PAGE_STATE_INACTIVE; 2608 2609 vm_pageout_scan_inactive_throttled_internal++; 2610 2611 goto requeue_page; 2612 } 2613 } 2614 2615 /* 2616 * we've got a page that we can steal... 2617 * eliminate all mappings and make sure 2618 * we have the up-to-date modified state 2619 * 2620 * if we need to do a pmap_disconnect then we 2621 * need to re-evaluate m->dirty since the pmap_disconnect 2622 * provides the true state atomically... the 2623 * page was still mapped up to the pmap_disconnect 2624 * and may have been dirtied at the last microsecond 2625 * 2626 * Note that if 'pmapped' is FALSE then the page is not 2627 * and has not been in any map, so there is no point calling 2628 * pmap_disconnect(). m->dirty could have been set in anticipation 2629 * of likely usage of the page. 2630 */ 2631 if (m->pmapped == TRUE) { 2632 2633 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || object->internal == FALSE) { 2634 refmod_state = pmap_disconnect_options(m->phys_page, 0, NULL); 2635 } else { 2636 refmod_state = pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL); 2637 } 2638 2639 if (refmod_state & VM_MEM_MODIFIED) { 2640 SET_PAGE_DIRTY(m, FALSE); 2641 } 2642 } 2643 /* 2644 * reset our count of pages that have been reclaimed 2645 * since the last page was 'stolen' 2646 */ 2647 inactive_reclaim_run = 0; 2648 2649 /* 2650 * If it's clean and not precious, we can free the page. 2651 */ 2652 if (!m->dirty && !m->precious) { 2653 2654 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2655 vm_pageout_speculative_clean++; 2656 else { 2657 if (page_prev_state == PAGE_STATE_ANONYMOUS) 2658 vm_pageout_inactive_anonymous++; 2659 else if (page_prev_state == PAGE_STATE_CLEAN) 2660 vm_pageout_cleaned_reclaimed++; 2661 2662 if (m->was_dirty) { 2663 /* page on clean queue used to be dirty; we should increment the vm_stat pageout count here */ 2664 VM_STAT_INCR(pageouts); 2665 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL); 2666 } 2667 vm_pageout_inactive_clean++; 2668 } 2669 2670 /* 2671 * OK, at this point we have found a page we are going to free. 2672 */ 2673 goto reclaim_page; 2674 } 2675 2676 /* 2677 * The page may have been dirtied since the last check 2678 * for a throttled target queue (which may have been skipped 2679 * if the page was clean then). With the dirty page 2680 * disconnected here, we can make one final check. 2681 */ 2682 if (object->internal) { 2683 if (VM_PAGE_Q_THROTTLED(iq)) 2684 inactive_throttled = TRUE; 2685 } else if (VM_PAGE_Q_THROTTLED(eq)) { 2686 inactive_throttled = TRUE; 2687 } 2688 2689 if (inactive_throttled == TRUE) 2690 goto throttle_inactive; 2691 2692#if VM_PRESSURE_EVENTS 2693 vm_pressure_response(); 2694#endif /* VM_PRESSURE_EVENTS */ 2695 2696 /* 2697 * do NOT set the pageout bit! 2698 * sure, we might need free pages, but this page is going to take time to become free 2699 * anyway, so we may as well put it on the clean queue first and take it from there later 2700 * if necessary. that way, we'll ensure we don't free up too much. -mj 2701 */ 2702 vm_pageout_cluster(m, FALSE); 2703 2704 if (page_prev_state == PAGE_STATE_ANONYMOUS) 2705 vm_pageout_inactive_anonymous++; 2706 if (object->internal) 2707 vm_pageout_inactive_dirty_internal++; 2708 else 2709 vm_pageout_inactive_dirty_external++; 2710 2711 2712done_with_inactivepage: 2713 inactive_burst_count = 0; 2714 2715 if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) { 2716 2717 if (object != NULL) { 2718 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 2719 vm_object_unlock(object); 2720 object = NULL; 2721 } 2722 if (local_freeq) { 2723 vm_page_unlock_queues(); 2724 2725 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 2726 vm_page_free_count, local_freed, delayed_unlock_limit, 4); 2727 2728 vm_page_free_list(local_freeq, TRUE); 2729 2730 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 2731 vm_page_free_count, local_freed, 0, 4); 2732 2733 local_freeq = NULL; 2734 local_freed = 0; 2735 vm_page_lock_queues(); 2736 } else 2737 lck_mtx_yield(&vm_page_queue_lock); 2738 2739 delayed_unlock = 1; 2740 } 2741 vm_pageout_considered_page++; 2742 2743 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) 2744 vm_consider_waking_compactor_swapper(); 2745 2746 /* 2747 * back to top of pageout scan loop 2748 */ 2749 } 2750} 2751 2752 2753int vm_page_free_count_init; 2754 2755void 2756vm_page_free_reserve( 2757 int pages) 2758{ 2759 int free_after_reserve; 2760 2761 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 2762 2763 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT)) 2764 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT; 2765 else 2766 vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT); 2767 2768 } else { 2769 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT) 2770 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT; 2771 else 2772 vm_page_free_reserved += pages; 2773 } 2774 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved; 2775 2776 vm_page_free_min = vm_page_free_reserved + 2777 VM_PAGE_FREE_MIN(free_after_reserve); 2778 2779 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT) 2780 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT; 2781 2782 vm_page_free_target = vm_page_free_reserved + 2783 VM_PAGE_FREE_TARGET(free_after_reserve); 2784 2785 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT) 2786 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT; 2787 2788 if (vm_page_free_target < vm_page_free_min + 5) 2789 vm_page_free_target = vm_page_free_min + 5; 2790 2791 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 3); 2792 vm_page_creation_throttle = vm_page_free_target * 3; 2793} 2794 2795/* 2796 * vm_pageout is the high level pageout daemon. 2797 */ 2798 2799void 2800vm_pageout_continue(void) 2801{ 2802 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL); 2803 vm_pageout_scan_event_counter++; 2804 2805 vm_pageout_scan(); 2806 /* 2807 * we hold both the vm_page_queue_free_lock 2808 * and the vm_page_queues_lock at this point 2809 */ 2810 assert(vm_page_free_wanted == 0); 2811 assert(vm_page_free_wanted_privileged == 0); 2812 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT); 2813 2814 lck_mtx_unlock(&vm_page_queue_free_lock); 2815 vm_page_unlock_queues(); 2816 2817 counter(c_vm_pageout_block++); 2818 thread_block((thread_continue_t)vm_pageout_continue); 2819 /*NOTREACHED*/ 2820} 2821 2822 2823#ifdef FAKE_DEADLOCK 2824 2825#define FAKE_COUNT 5000 2826 2827int internal_count = 0; 2828int fake_deadlock = 0; 2829 2830#endif 2831 2832static void 2833vm_pageout_iothread_continue(struct vm_pageout_queue *q) 2834{ 2835 vm_page_t m = NULL; 2836 vm_object_t object; 2837 vm_object_offset_t offset; 2838 memory_object_t pager; 2839 thread_t self = current_thread(); 2840 2841 if ((vm_pageout_internal_iothread != THREAD_NULL) 2842 && (self == vm_pageout_external_iothread ) 2843 && (self->options & TH_OPT_VMPRIV)) 2844 self->options &= ~TH_OPT_VMPRIV; 2845 2846 vm_page_lockspin_queues(); 2847 2848 while ( !queue_empty(&q->pgo_pending) ) { 2849 2850 q->pgo_busy = TRUE; 2851 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); 2852 if (m->object->object_slid) { 2853 panic("slid page %p not allowed on this path\n", m); 2854 } 2855 VM_PAGE_CHECK(m); 2856 m->pageout_queue = FALSE; 2857 m->pageq.next = NULL; 2858 m->pageq.prev = NULL; 2859 2860 /* 2861 * grab a snapshot of the object and offset this 2862 * page is tabled in so that we can relookup this 2863 * page after we've taken the object lock - these 2864 * fields are stable while we hold the page queues lock 2865 * but as soon as we drop it, there is nothing to keep 2866 * this page in this object... we hold an activity_in_progress 2867 * on this object which will keep it from terminating 2868 */ 2869 object = m->object; 2870 offset = m->offset; 2871 2872 vm_page_unlock_queues(); 2873 2874#ifdef FAKE_DEADLOCK 2875 if (q == &vm_pageout_queue_internal) { 2876 vm_offset_t addr; 2877 int pg_count; 2878 2879 internal_count++; 2880 2881 if ((internal_count == FAKE_COUNT)) { 2882 2883 pg_count = vm_page_free_count + vm_page_free_reserved; 2884 2885 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) { 2886 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count); 2887 } 2888 internal_count = 0; 2889 fake_deadlock++; 2890 } 2891 } 2892#endif 2893 vm_object_lock(object); 2894 2895 m = vm_page_lookup(object, offset); 2896 2897 if (m == NULL || 2898 m->busy || m->cleaning || m->pageout_queue || !m->laundry) { 2899 /* 2900 * it's either the same page that someone else has 2901 * started cleaning (or it's finished cleaning or 2902 * been put back on the pageout queue), or 2903 * the page has been freed or we have found a 2904 * new page at this offset... in all of these cases 2905 * we merely need to release the activity_in_progress 2906 * we took when we put the page on the pageout queue 2907 */ 2908 vm_object_activity_end(object); 2909 vm_object_unlock(object); 2910 2911 vm_page_lockspin_queues(); 2912 continue; 2913 } 2914 if (!object->pager_initialized) { 2915 2916 /* 2917 * If there is no memory object for the page, create 2918 * one and hand it to the default pager. 2919 */ 2920 2921 if (!object->pager_initialized) 2922 vm_object_collapse(object, 2923 (vm_object_offset_t) 0, 2924 TRUE); 2925 if (!object->pager_initialized) 2926 vm_object_pager_create(object); 2927 if (!object->pager_initialized) { 2928 /* 2929 * Still no pager for the object. 2930 * Reactivate the page. 2931 * 2932 * Should only happen if there is no 2933 * default pager. 2934 */ 2935 m->pageout = FALSE; 2936 2937 vm_page_lockspin_queues(); 2938 2939 vm_pageout_throttle_up(m); 2940 vm_page_activate(m); 2941 vm_pageout_dirty_no_pager++; 2942 2943 vm_page_unlock_queues(); 2944 2945 /* 2946 * And we are done with it. 2947 */ 2948 vm_object_activity_end(object); 2949 vm_object_unlock(object); 2950 2951 vm_page_lockspin_queues(); 2952 continue; 2953 } 2954 } 2955 pager = object->pager; 2956 2957 if (pager == MEMORY_OBJECT_NULL) { 2958 /* 2959 * This pager has been destroyed by either 2960 * memory_object_destroy or vm_object_destroy, and 2961 * so there is nowhere for the page to go. 2962 */ 2963 if (m->pageout) { 2964 /* 2965 * Just free the page... VM_PAGE_FREE takes 2966 * care of cleaning up all the state... 2967 * including doing the vm_pageout_throttle_up 2968 */ 2969 VM_PAGE_FREE(m); 2970 } else { 2971 vm_page_lockspin_queues(); 2972 2973 vm_pageout_throttle_up(m); 2974 vm_page_activate(m); 2975 2976 vm_page_unlock_queues(); 2977 2978 /* 2979 * And we are done with it. 2980 */ 2981 } 2982 vm_object_activity_end(object); 2983 vm_object_unlock(object); 2984 2985 vm_page_lockspin_queues(); 2986 continue; 2987 } 2988#if 0 2989 /* 2990 * we don't hold the page queue lock 2991 * so this check isn't safe to make 2992 */ 2993 VM_PAGE_CHECK(m); 2994#endif 2995 /* 2996 * give back the activity_in_progress reference we 2997 * took when we queued up this page and replace it 2998 * it with a paging_in_progress reference that will 2999 * also hold the paging offset from changing and 3000 * prevent the object from terminating 3001 */ 3002 vm_object_activity_end(object); 3003 vm_object_paging_begin(object); 3004 vm_object_unlock(object); 3005 3006 /* 3007 * Send the data to the pager. 3008 * any pageout clustering happens there 3009 */ 3010 memory_object_data_return(pager, 3011 m->offset + object->paging_offset, 3012 PAGE_SIZE, 3013 NULL, 3014 NULL, 3015 FALSE, 3016 FALSE, 3017 0); 3018 3019 vm_object_lock(object); 3020 vm_object_paging_end(object); 3021 vm_object_unlock(object); 3022 3023 vm_pageout_io_throttle(); 3024 3025 vm_page_lockspin_queues(); 3026 } 3027 q->pgo_busy = FALSE; 3028 q->pgo_idle = TRUE; 3029 3030 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT); 3031 vm_page_unlock_queues(); 3032 3033 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) q); 3034 /*NOTREACHED*/ 3035} 3036 3037 3038static void 3039vm_pageout_iothread_external_continue(struct vm_pageout_queue *q) 3040{ 3041 vm_page_t m = NULL; 3042 vm_object_t object; 3043 vm_object_offset_t offset; 3044 memory_object_t pager; 3045 3046 3047 if (vm_pageout_internal_iothread != THREAD_NULL) 3048 current_thread()->options &= ~TH_OPT_VMPRIV; 3049 3050 vm_page_lockspin_queues(); 3051 3052 while ( !queue_empty(&q->pgo_pending) ) { 3053 3054 q->pgo_busy = TRUE; 3055 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); 3056 if (m->object->object_slid) { 3057 panic("slid page %p not allowed on this path\n", m); 3058 } 3059 VM_PAGE_CHECK(m); 3060 m->pageout_queue = FALSE; 3061 m->pageq.next = NULL; 3062 m->pageq.prev = NULL; 3063 3064 /* 3065 * grab a snapshot of the object and offset this 3066 * page is tabled in so that we can relookup this 3067 * page after we've taken the object lock - these 3068 * fields are stable while we hold the page queues lock 3069 * but as soon as we drop it, there is nothing to keep 3070 * this page in this object... we hold an activity_in_progress 3071 * on this object which will keep it from terminating 3072 */ 3073 object = m->object; 3074 offset = m->offset; 3075 3076 vm_page_unlock_queues(); 3077 3078 vm_object_lock(object); 3079 3080 m = vm_page_lookup(object, offset); 3081 3082 if (m == NULL || 3083 m->busy || m->cleaning || m->pageout_queue || !m->laundry) { 3084 /* 3085 * it's either the same page that someone else has 3086 * started cleaning (or it's finished cleaning or 3087 * been put back on the pageout queue), or 3088 * the page has been freed or we have found a 3089 * new page at this offset... in all of these cases 3090 * we merely need to release the activity_in_progress 3091 * we took when we put the page on the pageout queue 3092 */ 3093 vm_object_activity_end(object); 3094 vm_object_unlock(object); 3095 3096 vm_page_lockspin_queues(); 3097 continue; 3098 } 3099 pager = object->pager; 3100 3101 if (pager == MEMORY_OBJECT_NULL) { 3102 /* 3103 * This pager has been destroyed by either 3104 * memory_object_destroy or vm_object_destroy, and 3105 * so there is nowhere for the page to go. 3106 */ 3107 if (m->pageout) { 3108 /* 3109 * Just free the page... VM_PAGE_FREE takes 3110 * care of cleaning up all the state... 3111 * including doing the vm_pageout_throttle_up 3112 */ 3113 VM_PAGE_FREE(m); 3114 } else { 3115 vm_page_lockspin_queues(); 3116 3117 vm_pageout_throttle_up(m); 3118 vm_page_activate(m); 3119 3120 vm_page_unlock_queues(); 3121 3122 /* 3123 * And we are done with it. 3124 */ 3125 } 3126 vm_object_activity_end(object); 3127 vm_object_unlock(object); 3128 3129 vm_page_lockspin_queues(); 3130 continue; 3131 } 3132#if 0 3133 /* 3134 * we don't hold the page queue lock 3135 * so this check isn't safe to make 3136 */ 3137 VM_PAGE_CHECK(m); 3138#endif 3139 /* 3140 * give back the activity_in_progress reference we 3141 * took when we queued up this page and replace it 3142 * it with a paging_in_progress reference that will 3143 * also hold the paging offset from changing and 3144 * prevent the object from terminating 3145 */ 3146 vm_object_activity_end(object); 3147 vm_object_paging_begin(object); 3148 vm_object_unlock(object); 3149 3150 /* 3151 * Send the data to the pager. 3152 * any pageout clustering happens there 3153 */ 3154 memory_object_data_return(pager, 3155 m->offset + object->paging_offset, 3156 PAGE_SIZE, 3157 NULL, 3158 NULL, 3159 FALSE, 3160 FALSE, 3161 0); 3162 3163 vm_object_lock(object); 3164 vm_object_paging_end(object); 3165 vm_object_unlock(object); 3166 3167 vm_pageout_io_throttle(); 3168 3169 vm_page_lockspin_queues(); 3170 } 3171 q->pgo_busy = FALSE; 3172 q->pgo_idle = TRUE; 3173 3174 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT); 3175 vm_page_unlock_queues(); 3176 3177 thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q); 3178 /*NOTREACHED*/ 3179} 3180 3181 3182uint32_t vm_compressor_failed; 3183 3184static void 3185vm_pageout_iothread_internal_continue(struct cq *cq) 3186{ 3187 struct vm_pageout_queue *q; 3188 vm_page_t m = NULL; 3189 vm_object_t object; 3190 memory_object_t pager; 3191 boolean_t pgo_draining; 3192 vm_page_t local_q; 3193 int local_cnt; 3194 vm_page_t local_freeq = NULL; 3195 int local_freed = 0; 3196 int local_batch_size; 3197 kern_return_t retval; 3198 3199 3200 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0); 3201 3202 q = cq->q; 3203 local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 4); 3204 3205 while (TRUE) { 3206 3207 local_cnt = 0; 3208 local_q = NULL; 3209 3210 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0); 3211 3212 vm_page_lock_queues(); 3213 3214 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0); 3215 3216 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, 0, 0, 0, 0, 0); 3217 3218 while ( !queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) { 3219 3220 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); 3221 3222 VM_PAGE_CHECK(m); 3223 3224 m->pageout_queue = FALSE; 3225 m->pageq.prev = NULL; 3226 3227 m->pageq.next = (queue_entry_t)local_q; 3228 local_q = m; 3229 local_cnt++; 3230 } 3231 if (local_q == NULL) 3232 break; 3233 3234 q->pgo_busy = TRUE; 3235 3236 if ((pgo_draining = q->pgo_draining) == FALSE) 3237 vm_pageout_throttle_up_batch(q, local_cnt); 3238 3239 vm_page_unlock_queues(); 3240 3241 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0); 3242 3243 while (local_q) { 3244 3245 m = local_q; 3246 local_q = (vm_page_t)m->pageq.next; 3247 m->pageq.next = NULL; 3248 3249 if (m->object->object_slid) { 3250 panic("slid page %p not allowed on this path\n", m); 3251 } 3252 3253 object = m->object; 3254 pager = object->pager; 3255 3256 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) { 3257 3258 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0); 3259 3260 vm_object_lock(object); 3261 3262 /* 3263 * If there is no memory object for the page, create 3264 * one and hand it to the compression pager. 3265 */ 3266 3267 if (!object->pager_initialized) 3268 vm_object_collapse(object, (vm_object_offset_t) 0, TRUE); 3269 if (!object->pager_initialized) 3270 vm_object_compressor_pager_create(object); 3271 3272 if (!object->pager_initialized) { 3273 /* 3274 * Still no pager for the object. 3275 * Reactivate the page. 3276 * 3277 * Should only happen if there is no 3278 * compression pager 3279 */ 3280 m->pageout = FALSE; 3281 m->laundry = FALSE; 3282 PAGE_WAKEUP_DONE(m); 3283 3284 vm_page_lockspin_queues(); 3285 vm_page_activate(m); 3286 vm_pageout_dirty_no_pager++; 3287 vm_page_unlock_queues(); 3288 3289 /* 3290 * And we are done with it. 3291 */ 3292 vm_object_activity_end(object); 3293 vm_object_unlock(object); 3294 3295 continue; 3296 } 3297 pager = object->pager; 3298 3299 if (pager == MEMORY_OBJECT_NULL) { 3300 /* 3301 * This pager has been destroyed by either 3302 * memory_object_destroy or vm_object_destroy, and 3303 * so there is nowhere for the page to go. 3304 */ 3305 if (m->pageout) { 3306 /* 3307 * Just free the page... VM_PAGE_FREE takes 3308 * care of cleaning up all the state... 3309 * including doing the vm_pageout_throttle_up 3310 */ 3311 VM_PAGE_FREE(m); 3312 } else { 3313 m->laundry = FALSE; 3314 PAGE_WAKEUP_DONE(m); 3315 3316 vm_page_lockspin_queues(); 3317 vm_page_activate(m); 3318 vm_page_unlock_queues(); 3319 3320 /* 3321 * And we are done with it. 3322 */ 3323 } 3324 vm_object_activity_end(object); 3325 vm_object_unlock(object); 3326 3327 continue; 3328 } 3329 vm_object_unlock(object); 3330 3331 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0); 3332 } 3333 while (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) { 3334 kern_return_t wait_result; 3335 int need_wakeup = 0; 3336 3337 if (local_freeq) { 3338 vm_page_free_list(local_freeq, TRUE); 3339 3340 local_freeq = NULL; 3341 local_freed = 0; 3342 3343 continue; 3344 } 3345 lck_mtx_lock_spin(&vm_page_queue_free_lock); 3346 3347 if (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) { 3348 3349 if (vm_page_free_wanted_privileged++ == 0) 3350 need_wakeup = 1; 3351 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT); 3352 3353 lck_mtx_unlock(&vm_page_queue_free_lock); 3354 3355 if (need_wakeup) 3356 thread_wakeup((event_t)&vm_page_free_wanted); 3357 3358 if (wait_result == THREAD_WAITING) 3359 thread_block(THREAD_CONTINUE_NULL); 3360 } else 3361 lck_mtx_unlock(&vm_page_queue_free_lock); 3362 } 3363 retval = vm_compressor_pager_put(pager, m->offset + object->paging_offset, m->phys_page, &cq->current_chead, cq->scratch_buf); 3364 3365 vm_object_lock(object); 3366 m->laundry = FALSE; 3367 m->pageout = FALSE; 3368 3369 if (retval == KERN_SUCCESS) { 3370 3371 vm_page_compressions_failing = FALSE; 3372 3373 VM_STAT_INCR(compressions); 3374 3375 if (m->tabled) 3376 vm_page_remove(m, TRUE); 3377 vm_object_activity_end(object); 3378 vm_object_unlock(object); 3379 3380 m->pageq.next = (queue_entry_t)local_freeq; 3381 local_freeq = m; 3382 local_freed++; 3383 3384 } else { 3385 PAGE_WAKEUP_DONE(m); 3386 3387 vm_page_lockspin_queues(); 3388 3389 vm_page_activate(m); 3390 vm_compressor_failed++; 3391 3392 vm_page_compressions_failing = TRUE; 3393 3394 vm_page_unlock_queues(); 3395 3396 vm_object_activity_end(object); 3397 vm_object_unlock(object); 3398 } 3399 } 3400 if (local_freeq) { 3401 vm_page_free_list(local_freeq, TRUE); 3402 3403 local_freeq = NULL; 3404 local_freed = 0; 3405 } 3406 if (pgo_draining == TRUE) { 3407 vm_page_lockspin_queues(); 3408 vm_pageout_throttle_up_batch(q, local_cnt); 3409 vm_page_unlock_queues(); 3410 } 3411 } 3412 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0); 3413 3414 /* 3415 * queue lock is held and our q is empty 3416 */ 3417 q->pgo_busy = FALSE; 3418 q->pgo_idle = TRUE; 3419 3420 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT); 3421 vm_page_unlock_queues(); 3422 3423 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0); 3424 3425 thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq); 3426 /*NOTREACHED*/ 3427} 3428 3429 3430 3431static void 3432vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_queue *eq, boolean_t req_lowpriority) 3433{ 3434 uint32_t policy; 3435 boolean_t set_iq = FALSE; 3436 boolean_t set_eq = FALSE; 3437 3438 if (hibernate_cleaning_in_progress == TRUE) 3439 req_lowpriority = FALSE; 3440 3441 if ((DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) && iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority) 3442 set_iq = TRUE; 3443 3444 if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) 3445 set_eq = TRUE; 3446 3447 if (set_iq == TRUE || set_eq == TRUE) { 3448 3449 vm_page_unlock_queues(); 3450 3451 if (req_lowpriority == TRUE) { 3452 policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED; 3453 DTRACE_VM(laundrythrottle); 3454 } else { 3455 policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED; 3456 DTRACE_VM(laundryunthrottle); 3457 } 3458 if (set_iq == TRUE) { 3459 proc_set_task_policy_thread(kernel_task, iq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy); 3460 3461 iq->pgo_lowpriority = req_lowpriority; 3462 } 3463 if (set_eq == TRUE) { 3464 proc_set_task_policy_thread(kernel_task, eq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy); 3465 3466 eq->pgo_lowpriority = req_lowpriority; 3467 } 3468 vm_page_lock_queues(); 3469 } 3470} 3471 3472 3473static void 3474vm_pageout_iothread_external(void) 3475{ 3476 thread_t self = current_thread(); 3477 3478 self->options |= TH_OPT_VMPRIV; 3479 3480 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL); 3481 3482 proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL, 3483 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED); 3484 3485 vm_page_lock_queues(); 3486 3487 vm_pageout_queue_external.pgo_tid = self->thread_id; 3488 vm_pageout_queue_external.pgo_lowpriority = TRUE; 3489 vm_pageout_queue_external.pgo_inited = TRUE; 3490 3491 vm_page_unlock_queues(); 3492 3493 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) 3494 vm_pageout_iothread_external_continue(&vm_pageout_queue_external); 3495 else 3496 vm_pageout_iothread_continue(&vm_pageout_queue_external); 3497 3498 /*NOTREACHED*/ 3499} 3500 3501 3502static void 3503vm_pageout_iothread_internal(struct cq *cq) 3504{ 3505 thread_t self = current_thread(); 3506 3507 self->options |= TH_OPT_VMPRIV; 3508 3509 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) { 3510 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL); 3511 3512 proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL, 3513 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED); 3514 } 3515 vm_page_lock_queues(); 3516 3517 vm_pageout_queue_internal.pgo_tid = self->thread_id; 3518 vm_pageout_queue_internal.pgo_lowpriority = TRUE; 3519 vm_pageout_queue_internal.pgo_inited = TRUE; 3520 3521 vm_page_unlock_queues(); 3522 3523 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 3524 cq->q = &vm_pageout_queue_internal; 3525 cq->current_chead = NULL; 3526 cq->scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE); 3527 3528 vm_pageout_iothread_internal_continue(cq); 3529 } else 3530 vm_pageout_iothread_continue(&vm_pageout_queue_internal); 3531 3532 /*NOTREACHED*/ 3533} 3534 3535kern_return_t 3536vm_set_buffer_cleanup_callout(boolean_t (*func)(int)) 3537{ 3538 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) { 3539 return KERN_SUCCESS; 3540 } else { 3541 return KERN_FAILURE; /* Already set */ 3542 } 3543} 3544 3545 3546extern boolean_t memorystatus_manual_testing_on; 3547extern unsigned int memorystatus_level; 3548 3549 3550 3551#if VM_PRESSURE_EVENTS 3552 3553void 3554vm_pressure_response(void) 3555{ 3556 3557 3558 vm_pressure_level_t old_level = kVMPressureNormal; 3559 int new_level = -1; 3560 3561 uint64_t available_memory = (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY) * 100); 3562 3563 memorystatus_level = (unsigned int) (available_memory / atop_64(max_mem)); 3564 3565 if (memorystatus_manual_testing_on) { 3566 return; 3567 } 3568 3569 old_level = memorystatus_vm_pressure_level; 3570 3571 switch (memorystatus_vm_pressure_level) { 3572 3573 case kVMPressureNormal: 3574 { 3575 if (VM_PRESSURE_WARNING_TO_CRITICAL()) { 3576 new_level = kVMPressureCritical; 3577 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) { 3578 new_level = kVMPressureWarning; 3579 } 3580 break; 3581 } 3582 3583 case kVMPressureWarning: 3584 case kVMPressureUrgent: 3585 { 3586 if (VM_PRESSURE_WARNING_TO_NORMAL()) { 3587 new_level = kVMPressureNormal; 3588 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) { 3589 new_level = kVMPressureCritical; 3590 } 3591 break; 3592 } 3593 3594 case kVMPressureCritical: 3595 { 3596 if (VM_PRESSURE_WARNING_TO_NORMAL()) { 3597 new_level = kVMPressureNormal; 3598 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) { 3599 new_level = kVMPressureWarning; 3600 } 3601 break; 3602 } 3603 3604 default: 3605 return; 3606 } 3607 3608 if (new_level != -1) { 3609 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level; 3610 3611 if (old_level != new_level) { 3612 if (vm_pressure_thread_running == FALSE) { 3613 thread_wakeup(&vm_pressure_thread); 3614 } 3615 thread_wakeup(&vm_pressure_changed); 3616 } 3617 } 3618 3619} 3620#endif /* VM_PRESSURE_EVENTS */ 3621 3622kern_return_t 3623mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) { 3624 3625#if !VM_PRESSURE_EVENTS 3626 3627 return KERN_FAILURE; 3628 3629#else /* VM_PRESSURE_EVENTS */ 3630 3631 kern_return_t kr = KERN_SUCCESS; 3632 3633 if (pressure_level != NULL) { 3634 3635 vm_pressure_level_t old_level = memorystatus_vm_pressure_level; 3636 3637 if (wait_for_pressure == TRUE) { 3638 wait_result_t wr = 0; 3639 3640 while (old_level == *pressure_level) { 3641 wr = assert_wait((event_t) &vm_pressure_changed, 3642 THREAD_INTERRUPTIBLE); 3643 if (wr == THREAD_WAITING) { 3644 wr = thread_block(THREAD_CONTINUE_NULL); 3645 } 3646 if (wr == THREAD_INTERRUPTED) { 3647 return KERN_ABORTED; 3648 } 3649 if (wr == THREAD_AWAKENED) { 3650 3651 old_level = memorystatus_vm_pressure_level; 3652 3653 if (old_level != *pressure_level) { 3654 break; 3655 } 3656 } 3657 } 3658 } 3659 3660 *pressure_level = old_level; 3661 kr = KERN_SUCCESS; 3662 } else { 3663 kr = KERN_INVALID_ARGUMENT; 3664 } 3665 3666 return kr; 3667#endif /* VM_PRESSURE_EVENTS */ 3668} 3669 3670#if VM_PRESSURE_EVENTS 3671void 3672vm_pressure_thread(void) { 3673 static boolean_t set_up_thread = FALSE; 3674 3675 if (set_up_thread) { 3676 vm_pressure_thread_running = TRUE; 3677 consider_vm_pressure_events(); 3678 vm_pressure_thread_running = FALSE; 3679 } 3680 3681 set_up_thread = TRUE; 3682 assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT); 3683 thread_block((thread_continue_t)vm_pressure_thread); 3684} 3685#endif /* VM_PRESSURE_EVENTS */ 3686 3687 3688uint32_t vm_pageout_considered_page_last = 0; 3689 3690/* 3691 * called once per-second via "compute_averages" 3692 */ 3693void 3694compute_pageout_gc_throttle() 3695{ 3696 if (vm_pageout_considered_page != vm_pageout_considered_page_last) { 3697 3698 vm_pageout_considered_page_last = vm_pageout_considered_page; 3699 3700 thread_wakeup((event_t) &vm_pageout_garbage_collect); 3701 } 3702} 3703 3704 3705static void 3706vm_pageout_garbage_collect(int collect) 3707{ 3708 3709 if (collect) { 3710 boolean_t buf_large_zfree = FALSE; 3711 boolean_t first_try = TRUE; 3712 3713 stack_collect(); 3714 3715 consider_machine_collect(); 3716 3717 do { 3718 if (consider_buffer_cache_collect != NULL) { 3719 buf_large_zfree = (*consider_buffer_cache_collect)(0); 3720 } 3721 if (first_try == TRUE || buf_large_zfree == TRUE) { 3722 /* 3723 * consider_zone_gc should be last, because the other operations 3724 * might return memory to zones. 3725 */ 3726 consider_zone_gc(buf_large_zfree); 3727 } 3728 first_try = FALSE; 3729 3730 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target); 3731 3732 consider_machine_adjust(); 3733 } 3734 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT); 3735 3736 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1); 3737 /*NOTREACHED*/ 3738} 3739 3740 3741#if VM_PAGE_BUCKETS_CHECK 3742#if VM_PAGE_FAKE_BUCKETS 3743extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end; 3744#endif /* VM_PAGE_FAKE_BUCKETS */ 3745#endif /* VM_PAGE_BUCKETS_CHECK */ 3746 3747void 3748vm_pageout(void) 3749{ 3750 thread_t self = current_thread(); 3751 thread_t thread; 3752 kern_return_t result; 3753 spl_t s; 3754 3755 /* 3756 * Set thread privileges. 3757 */ 3758 s = splsched(); 3759 thread_lock(self); 3760 self->priority = BASEPRI_PREEMPT - 1; 3761 set_sched_pri(self, self->priority); 3762 thread_unlock(self); 3763 3764 if (!self->reserved_stack) 3765 self->reserved_stack = self->kernel_stack; 3766 3767 splx(s); 3768 3769 /* 3770 * Initialize some paging parameters. 3771 */ 3772 3773 if (vm_pageout_swap_wait == 0) 3774 vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT; 3775 3776 if (vm_pageout_idle_wait == 0) 3777 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT; 3778 3779 if (vm_pageout_burst_wait == 0) 3780 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT; 3781 3782 if (vm_pageout_empty_wait == 0) 3783 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT; 3784 3785 if (vm_pageout_deadlock_wait == 0) 3786 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT; 3787 3788 if (vm_pageout_deadlock_relief == 0) 3789 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF; 3790 3791 if (vm_pageout_inactive_relief == 0) 3792 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF; 3793 3794 if (vm_pageout_burst_active_throttle == 0) 3795 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE; 3796 3797 if (vm_pageout_burst_inactive_throttle == 0) 3798 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE; 3799 3800#if !CONFIG_JETSAM 3801 vm_page_filecache_min = (uint32_t) (max_mem / PAGE_SIZE) / 20; 3802 if (vm_page_filecache_min < VM_PAGE_FILECACHE_MIN) 3803 vm_page_filecache_min = VM_PAGE_FILECACHE_MIN; 3804#endif 3805 3806 /* 3807 * Set kernel task to low backing store privileged 3808 * status 3809 */ 3810 task_lock(kernel_task); 3811 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV; 3812 task_unlock(kernel_task); 3813 3814 vm_page_free_count_init = vm_page_free_count; 3815 3816 /* 3817 * even if we've already called vm_page_free_reserve 3818 * call it again here to insure that the targets are 3819 * accurately calculated (it uses vm_page_free_count_init) 3820 * calling it with an arg of 0 will not change the reserve 3821 * but will re-calculate free_min and free_target 3822 */ 3823 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) { 3824 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved); 3825 } else 3826 vm_page_free_reserve(0); 3827 3828 3829 queue_init(&vm_pageout_queue_external.pgo_pending); 3830 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; 3831 vm_pageout_queue_external.pgo_laundry = 0; 3832 vm_pageout_queue_external.pgo_idle = FALSE; 3833 vm_pageout_queue_external.pgo_busy = FALSE; 3834 vm_pageout_queue_external.pgo_throttled = FALSE; 3835 vm_pageout_queue_external.pgo_draining = FALSE; 3836 vm_pageout_queue_external.pgo_lowpriority = FALSE; 3837 vm_pageout_queue_external.pgo_tid = -1; 3838 vm_pageout_queue_external.pgo_inited = FALSE; 3839 3840 3841 queue_init(&vm_pageout_queue_internal.pgo_pending); 3842 vm_pageout_queue_internal.pgo_maxlaundry = 0; 3843 vm_pageout_queue_internal.pgo_laundry = 0; 3844 vm_pageout_queue_internal.pgo_idle = FALSE; 3845 vm_pageout_queue_internal.pgo_busy = FALSE; 3846 vm_pageout_queue_internal.pgo_throttled = FALSE; 3847 vm_pageout_queue_internal.pgo_draining = FALSE; 3848 vm_pageout_queue_internal.pgo_lowpriority = FALSE; 3849 vm_pageout_queue_internal.pgo_tid = -1; 3850 vm_pageout_queue_internal.pgo_inited = FALSE; 3851 3852 /* internal pageout thread started when default pager registered first time */ 3853 /* external pageout and garbage collection threads started here */ 3854 3855 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL, 3856 BASEPRI_PREEMPT - 1, 3857 &vm_pageout_external_iothread); 3858 if (result != KERN_SUCCESS) 3859 panic("vm_pageout_iothread_external: create failed"); 3860 3861 thread_deallocate(vm_pageout_external_iothread); 3862 3863 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL, 3864 BASEPRI_DEFAULT, 3865 &thread); 3866 if (result != KERN_SUCCESS) 3867 panic("vm_pageout_garbage_collect: create failed"); 3868 3869 thread_deallocate(thread); 3870 3871#if VM_PRESSURE_EVENTS 3872 result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL, 3873 BASEPRI_DEFAULT, 3874 &thread); 3875 3876 if (result != KERN_SUCCESS) 3877 panic("vm_pressure_thread: create failed"); 3878 3879 thread_deallocate(thread); 3880#endif 3881 3882 vm_object_reaper_init(); 3883 3884 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) 3885 vm_compressor_pager_init(); 3886 3887#if VM_PAGE_BUCKETS_CHECK 3888#if VM_PAGE_FAKE_BUCKETS 3889 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n", 3890 vm_page_fake_buckets_start, vm_page_fake_buckets_end); 3891 pmap_protect(kernel_pmap, 3892 vm_page_fake_buckets_start, 3893 vm_page_fake_buckets_end, 3894 VM_PROT_READ); 3895// *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */ 3896#endif /* VM_PAGE_FAKE_BUCKETS */ 3897#endif /* VM_PAGE_BUCKETS_CHECK */ 3898 3899 vm_pageout_continue(); 3900 3901 /* 3902 * Unreached code! 3903 * 3904 * The vm_pageout_continue() call above never returns, so the code below is never 3905 * executed. We take advantage of this to declare several DTrace VM related probe 3906 * points that our kernel doesn't have an analog for. These are probe points that 3907 * exist in Solaris and are in the DTrace documentation, so people may have written 3908 * scripts that use them. Declaring the probe points here means their scripts will 3909 * compile and execute which we want for portability of the scripts, but since this 3910 * section of code is never reached, the probe points will simply never fire. Yes, 3911 * this is basically a hack. The problem is the DTrace probe points were chosen with 3912 * Solaris specific VM events in mind, not portability to different VM implementations. 3913 */ 3914 3915 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL); 3916 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL); 3917 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL); 3918 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL); 3919 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL); 3920 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL); 3921 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL); 3922 /*NOTREACHED*/ 3923} 3924 3925 3926 3927#define MAX_COMRPESSOR_THREAD_COUNT 8 3928 3929struct cq ciq[MAX_COMRPESSOR_THREAD_COUNT]; 3930 3931int vm_compressor_thread_count = 2; 3932 3933kern_return_t 3934vm_pageout_internal_start(void) 3935{ 3936 kern_return_t result; 3937 int i; 3938 host_basic_info_data_t hinfo; 3939 3940 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 3941 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; 3942#define BSD_HOST 1 3943 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); 3944 3945 assert(hinfo.max_cpus > 0); 3946 3947 if (vm_compressor_thread_count >= hinfo.max_cpus) 3948 vm_compressor_thread_count = hinfo.max_cpus - 1; 3949 if (vm_compressor_thread_count <= 0) 3950 vm_compressor_thread_count = 1; 3951 else if (vm_compressor_thread_count > MAX_COMRPESSOR_THREAD_COUNT) 3952 vm_compressor_thread_count = MAX_COMRPESSOR_THREAD_COUNT; 3953 3954 vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX; 3955 } else { 3956 vm_compressor_thread_count = 1; 3957 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; 3958 } 3959 3960 for (i = 0; i < vm_compressor_thread_count; i++) { 3961 3962 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread); 3963 if (result == KERN_SUCCESS) 3964 thread_deallocate(vm_pageout_internal_iothread); 3965 else 3966 break; 3967 } 3968 return result; 3969} 3970 3971 3972static upl_t 3973upl_create(int type, int flags, upl_size_t size) 3974{ 3975 upl_t upl; 3976 vm_size_t page_field_size = 0; 3977 int upl_flags = 0; 3978 vm_size_t upl_size = sizeof(struct upl); 3979 3980 size = round_page_32(size); 3981 3982 if (type & UPL_CREATE_LITE) { 3983 page_field_size = (atop(size) + 7) >> 3; 3984 page_field_size = (page_field_size + 3) & 0xFFFFFFFC; 3985 3986 upl_flags |= UPL_LITE; 3987 } 3988 if (type & UPL_CREATE_INTERNAL) { 3989 upl_size += sizeof(struct upl_page_info) * atop(size); 3990 3991 upl_flags |= UPL_INTERNAL; 3992 } 3993 upl = (upl_t)kalloc(upl_size + page_field_size); 3994 3995 if (page_field_size) 3996 bzero((char *)upl + upl_size, page_field_size); 3997 3998 upl->flags = upl_flags | flags; 3999 upl->src_object = NULL; 4000 upl->kaddr = (vm_offset_t)0; 4001 upl->size = 0; 4002 upl->map_object = NULL; 4003 upl->ref_count = 1; 4004 upl->ext_ref_count = 0; 4005 upl->highest_page = 0; 4006 upl_lock_init(upl); 4007 upl->vector_upl = NULL; 4008#if UPL_DEBUG 4009 upl->ubc_alias1 = 0; 4010 upl->ubc_alias2 = 0; 4011 4012 upl->upl_creator = current_thread(); 4013 upl->upl_state = 0; 4014 upl->upl_commit_index = 0; 4015 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records)); 4016 4017 upl->uplq.next = 0; 4018 upl->uplq.prev = 0; 4019 4020 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES); 4021#endif /* UPL_DEBUG */ 4022 4023 return(upl); 4024} 4025 4026static void 4027upl_destroy(upl_t upl) 4028{ 4029 int page_field_size; /* bit field in word size buf */ 4030 int size; 4031 4032 if (upl->ext_ref_count) { 4033 panic("upl(%p) ext_ref_count", upl); 4034 } 4035 4036#if UPL_DEBUG 4037 if ( !(upl->flags & UPL_VECTOR)) { 4038 vm_object_t object; 4039 4040 if (upl->flags & UPL_SHADOWED) { 4041 object = upl->map_object->shadow; 4042 } else { 4043 object = upl->map_object; 4044 } 4045 vm_object_lock(object); 4046 queue_remove(&object->uplq, upl, upl_t, uplq); 4047 vm_object_activity_end(object); 4048 vm_object_collapse(object, 0, TRUE); 4049 vm_object_unlock(object); 4050 } 4051#endif /* UPL_DEBUG */ 4052 /* 4053 * drop a reference on the map_object whether or 4054 * not a pageout object is inserted 4055 */ 4056 if (upl->flags & UPL_SHADOWED) 4057 vm_object_deallocate(upl->map_object); 4058 4059 if (upl->flags & UPL_DEVICE_MEMORY) 4060 size = PAGE_SIZE; 4061 else 4062 size = upl->size; 4063 page_field_size = 0; 4064 4065 if (upl->flags & UPL_LITE) { 4066 page_field_size = ((size/PAGE_SIZE) + 7) >> 3; 4067 page_field_size = (page_field_size + 3) & 0xFFFFFFFC; 4068 } 4069 upl_lock_destroy(upl); 4070 upl->vector_upl = (vector_upl_t) 0xfeedbeef; 4071 4072 if (upl->flags & UPL_INTERNAL) { 4073 kfree(upl, 4074 sizeof(struct upl) + 4075 (sizeof(struct upl_page_info) * (size/PAGE_SIZE)) 4076 + page_field_size); 4077 } else { 4078 kfree(upl, sizeof(struct upl) + page_field_size); 4079 } 4080} 4081 4082void 4083upl_deallocate(upl_t upl) 4084{ 4085 if (--upl->ref_count == 0) { 4086 if(vector_upl_is_valid(upl)) 4087 vector_upl_deallocate(upl); 4088 upl_destroy(upl); 4089 } 4090} 4091 4092#if DEVELOPMENT || DEBUG 4093/*/* 4094 * Statistics about UPL enforcement of copy-on-write obligations. 4095 */ 4096unsigned long upl_cow = 0; 4097unsigned long upl_cow_again = 0; 4098unsigned long upl_cow_pages = 0; 4099unsigned long upl_cow_again_pages = 0; 4100 4101unsigned long iopl_cow = 0; 4102unsigned long iopl_cow_pages = 0; 4103#endif 4104 4105/* 4106 * Routine: vm_object_upl_request 4107 * Purpose: 4108 * Cause the population of a portion of a vm_object. 4109 * Depending on the nature of the request, the pages 4110 * returned may be contain valid data or be uninitialized. 4111 * A page list structure, listing the physical pages 4112 * will be returned upon request. 4113 * This function is called by the file system or any other 4114 * supplier of backing store to a pager. 4115 * IMPORTANT NOTE: The caller must still respect the relationship 4116 * between the vm_object and its backing memory object. The 4117 * caller MUST NOT substitute changes in the backing file 4118 * without first doing a memory_object_lock_request on the 4119 * target range unless it is know that the pages are not 4120 * shared with another entity at the pager level. 4121 * Copy_in_to: 4122 * if a page list structure is present 4123 * return the mapped physical pages, where a 4124 * page is not present, return a non-initialized 4125 * one. If the no_sync bit is turned on, don't 4126 * call the pager unlock to synchronize with other 4127 * possible copies of the page. Leave pages busy 4128 * in the original object, if a page list structure 4129 * was specified. When a commit of the page list 4130 * pages is done, the dirty bit will be set for each one. 4131 * Copy_out_from: 4132 * If a page list structure is present, return 4133 * all mapped pages. Where a page does not exist 4134 * map a zero filled one. Leave pages busy in 4135 * the original object. If a page list structure 4136 * is not specified, this call is a no-op. 4137 * 4138 * Note: access of default pager objects has a rather interesting 4139 * twist. The caller of this routine, presumably the file system 4140 * page cache handling code, will never actually make a request 4141 * against a default pager backed object. Only the default 4142 * pager will make requests on backing store related vm_objects 4143 * In this way the default pager can maintain the relationship 4144 * between backing store files (abstract memory objects) and 4145 * the vm_objects (cache objects), they support. 4146 * 4147 */ 4148 4149__private_extern__ kern_return_t 4150vm_object_upl_request( 4151 vm_object_t object, 4152 vm_object_offset_t offset, 4153 upl_size_t size, 4154 upl_t *upl_ptr, 4155 upl_page_info_array_t user_page_list, 4156 unsigned int *page_list_count, 4157 int cntrl_flags) 4158{ 4159 vm_page_t dst_page = VM_PAGE_NULL; 4160 vm_object_offset_t dst_offset; 4161 upl_size_t xfer_size; 4162 unsigned int size_in_pages; 4163 boolean_t dirty; 4164 boolean_t hw_dirty; 4165 upl_t upl = NULL; 4166 unsigned int entry; 4167#if MACH_CLUSTER_STATS 4168 boolean_t encountered_lrp = FALSE; 4169#endif 4170 vm_page_t alias_page = NULL; 4171 int refmod_state = 0; 4172 wpl_array_t lite_list = NULL; 4173 vm_object_t last_copy_object; 4174 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 4175 struct vm_page_delayed_work *dwp; 4176 int dw_count; 4177 int dw_limit; 4178 4179 if (cntrl_flags & ~UPL_VALID_FLAGS) { 4180 /* 4181 * For forward compatibility's sake, 4182 * reject any unknown flag. 4183 */ 4184 return KERN_INVALID_VALUE; 4185 } 4186 if ( (!object->internal) && (object->paging_offset != 0) ) 4187 panic("vm_object_upl_request: external object with non-zero paging offset\n"); 4188 if (object->phys_contiguous) 4189 panic("vm_object_upl_request: contiguous object specified\n"); 4190 4191 4192 if ((size / PAGE_SIZE) > MAX_UPL_SIZE) 4193 size = MAX_UPL_SIZE * PAGE_SIZE; 4194 4195 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL) 4196 *page_list_count = MAX_UPL_SIZE; 4197 4198 if (cntrl_flags & UPL_SET_INTERNAL) { 4199 if (cntrl_flags & UPL_SET_LITE) { 4200 4201 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, 0, size); 4202 4203 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 4204 lite_list = (wpl_array_t) 4205 (((uintptr_t)user_page_list) + 4206 ((size/PAGE_SIZE) * sizeof(upl_page_info_t))); 4207 if (size == 0) { 4208 user_page_list = NULL; 4209 lite_list = NULL; 4210 } 4211 } else { 4212 upl = upl_create(UPL_CREATE_INTERNAL, 0, size); 4213 4214 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 4215 if (size == 0) { 4216 user_page_list = NULL; 4217 } 4218 } 4219 } else { 4220 if (cntrl_flags & UPL_SET_LITE) { 4221 4222 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE, 0, size); 4223 4224 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 4225 if (size == 0) { 4226 lite_list = NULL; 4227 } 4228 } else { 4229 upl = upl_create(UPL_CREATE_EXTERNAL, 0, size); 4230 } 4231 } 4232 *upl_ptr = upl; 4233 4234 if (user_page_list) 4235 user_page_list[0].device = FALSE; 4236 4237 if (cntrl_flags & UPL_SET_LITE) { 4238 upl->map_object = object; 4239 } else { 4240 upl->map_object = vm_object_allocate(size); 4241 /* 4242 * No neeed to lock the new object: nobody else knows 4243 * about it yet, so it's all ours so far. 4244 */ 4245 upl->map_object->shadow = object; 4246 upl->map_object->pageout = TRUE; 4247 upl->map_object->can_persist = FALSE; 4248 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 4249 upl->map_object->vo_shadow_offset = offset; 4250 upl->map_object->wimg_bits = object->wimg_bits; 4251 4252 VM_PAGE_GRAB_FICTITIOUS(alias_page); 4253 4254 upl->flags |= UPL_SHADOWED; 4255 } 4256 /* 4257 * ENCRYPTED SWAP: 4258 * Just mark the UPL as "encrypted" here. 4259 * We'll actually encrypt the pages later, 4260 * in upl_encrypt(), when the caller has 4261 * selected which pages need to go to swap. 4262 */ 4263 if (cntrl_flags & UPL_ENCRYPT) 4264 upl->flags |= UPL_ENCRYPTED; 4265 4266 if (cntrl_flags & UPL_FOR_PAGEOUT) 4267 upl->flags |= UPL_PAGEOUT; 4268 4269 vm_object_lock(object); 4270 vm_object_activity_begin(object); 4271 4272 /* 4273 * we can lock in the paging_offset once paging_in_progress is set 4274 */ 4275 upl->size = size; 4276 upl->offset = offset + object->paging_offset; 4277 4278#if UPL_DEBUG 4279 vm_object_activity_begin(object); 4280 queue_enter(&object->uplq, upl, upl_t, uplq); 4281#endif /* UPL_DEBUG */ 4282 4283 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) { 4284 /* 4285 * Honor copy-on-write obligations 4286 * 4287 * The caller is gathering these pages and 4288 * might modify their contents. We need to 4289 * make sure that the copy object has its own 4290 * private copies of these pages before we let 4291 * the caller modify them. 4292 */ 4293 vm_object_update(object, 4294 offset, 4295 size, 4296 NULL, 4297 NULL, 4298 FALSE, /* should_return */ 4299 MEMORY_OBJECT_COPY_SYNC, 4300 VM_PROT_NO_CHANGE); 4301#if DEVELOPMENT || DEBUG 4302 upl_cow++; 4303 upl_cow_pages += size >> PAGE_SHIFT; 4304#endif 4305 } 4306 /* 4307 * remember which copy object we synchronized with 4308 */ 4309 last_copy_object = object->copy; 4310 entry = 0; 4311 4312 xfer_size = size; 4313 dst_offset = offset; 4314 size_in_pages = size / PAGE_SIZE; 4315 4316 dwp = &dw_array[0]; 4317 dw_count = 0; 4318 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 4319 4320 if (vm_page_free_count > (vm_page_free_target + size_in_pages) || 4321 object->resident_page_count < (MAX_UPL_SIZE * 2)) 4322 object->scan_collisions = 0; 4323 4324 while (xfer_size) { 4325 4326 dwp->dw_mask = 0; 4327 4328 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) { 4329 vm_object_unlock(object); 4330 VM_PAGE_GRAB_FICTITIOUS(alias_page); 4331 vm_object_lock(object); 4332 } 4333 if (cntrl_flags & UPL_COPYOUT_FROM) { 4334 upl->flags |= UPL_PAGE_SYNC_DONE; 4335 4336 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) || 4337 dst_page->fictitious || 4338 dst_page->absent || 4339 dst_page->error || 4340 dst_page->cleaning || 4341 (VM_PAGE_WIRED(dst_page))) { 4342 4343 if (user_page_list) 4344 user_page_list[entry].phys_addr = 0; 4345 4346 goto try_next_page; 4347 } 4348 /* 4349 * grab this up front... 4350 * a high percentange of the time we're going to 4351 * need the hardware modification state a bit later 4352 * anyway... so we can eliminate an extra call into 4353 * the pmap layer by grabbing it here and recording it 4354 */ 4355 if (dst_page->pmapped) 4356 refmod_state = pmap_get_refmod(dst_page->phys_page); 4357 else 4358 refmod_state = 0; 4359 4360 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) { 4361 /* 4362 * page is on inactive list and referenced... 4363 * reactivate it now... this gets it out of the 4364 * way of vm_pageout_scan which would have to 4365 * reactivate it upon tripping over it 4366 */ 4367 dwp->dw_mask |= DW_vm_page_activate; 4368 } 4369 if (cntrl_flags & UPL_RET_ONLY_DIRTY) { 4370 /* 4371 * we're only asking for DIRTY pages to be returned 4372 */ 4373 if (dst_page->laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) { 4374 /* 4375 * if we were the page stolen by vm_pageout_scan to be 4376 * cleaned (as opposed to a buddy being clustered in 4377 * or this request is not being driven by a PAGEOUT cluster 4378 * then we only need to check for the page being dirty or 4379 * precious to decide whether to return it 4380 */ 4381 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED)) 4382 goto check_busy; 4383 goto dont_return; 4384 } 4385 /* 4386 * this is a request for a PAGEOUT cluster and this page 4387 * is merely along for the ride as a 'buddy'... not only 4388 * does it have to be dirty to be returned, but it also 4389 * can't have been referenced recently... 4390 */ 4391 if ( (hibernate_cleaning_in_progress == TRUE || 4392 (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) || dst_page->throttled)) && 4393 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) { 4394 goto check_busy; 4395 } 4396dont_return: 4397 /* 4398 * if we reach here, we're not to return 4399 * the page... go on to the next one 4400 */ 4401 if (dst_page->laundry == TRUE) { 4402 /* 4403 * if we get here, the page is not 'cleaning' (filtered out above). 4404 * since it has been referenced, remove it from the laundry 4405 * so we don't pay the cost of an I/O to clean a page 4406 * we're just going to take back 4407 */ 4408 vm_page_lockspin_queues(); 4409 4410 vm_pageout_steal_laundry(dst_page, TRUE); 4411 vm_page_activate(dst_page); 4412 4413 vm_page_unlock_queues(); 4414 } 4415 if (user_page_list) 4416 user_page_list[entry].phys_addr = 0; 4417 4418 goto try_next_page; 4419 } 4420check_busy: 4421 if (dst_page->busy) { 4422 if (cntrl_flags & UPL_NOBLOCK) { 4423 if (user_page_list) 4424 user_page_list[entry].phys_addr = 0; 4425 4426 goto try_next_page; 4427 } 4428 /* 4429 * someone else is playing with the 4430 * page. We will have to wait. 4431 */ 4432 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 4433 4434 continue; 4435 } 4436 /* 4437 * ENCRYPTED SWAP: 4438 * The caller is gathering this page and might 4439 * access its contents later on. Decrypt the 4440 * page before adding it to the UPL, so that 4441 * the caller never sees encrypted data. 4442 */ 4443 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) { 4444 int was_busy; 4445 4446 /* 4447 * save the current state of busy 4448 * mark page as busy while decrypt 4449 * is in progress since it will drop 4450 * the object lock... 4451 */ 4452 was_busy = dst_page->busy; 4453 dst_page->busy = TRUE; 4454 4455 vm_page_decrypt(dst_page, 0); 4456 vm_page_decrypt_for_upl_counter++; 4457 /* 4458 * restore to original busy state 4459 */ 4460 dst_page->busy = was_busy; 4461 } 4462 if (dst_page->pageout_queue == TRUE) { 4463 4464 vm_page_lockspin_queues(); 4465 4466 if (dst_page->pageout_queue == TRUE) { 4467 /* 4468 * we've buddied up a page for a clustered pageout 4469 * that has already been moved to the pageout 4470 * queue by pageout_scan... we need to remove 4471 * it from the queue and drop the laundry count 4472 * on that queue 4473 */ 4474 vm_pageout_throttle_up(dst_page); 4475 } 4476 vm_page_unlock_queues(); 4477 } 4478#if MACH_CLUSTER_STATS 4479 /* 4480 * pageout statistics gathering. count 4481 * all the pages we will page out that 4482 * were not counted in the initial 4483 * vm_pageout_scan work 4484 */ 4485 if (dst_page->pageout) 4486 encountered_lrp = TRUE; 4487 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious))) { 4488 if (encountered_lrp) 4489 CLUSTER_STAT(pages_at_higher_offsets++;) 4490 else 4491 CLUSTER_STAT(pages_at_lower_offsets++;) 4492 } 4493#endif 4494 hw_dirty = refmod_state & VM_MEM_MODIFIED; 4495 dirty = hw_dirty ? TRUE : dst_page->dirty; 4496 4497 if (dst_page->phys_page > upl->highest_page) 4498 upl->highest_page = dst_page->phys_page; 4499 4500 if (cntrl_flags & UPL_SET_LITE) { 4501 unsigned int pg_num; 4502 4503 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); 4504 assert(pg_num == (dst_offset-offset)/PAGE_SIZE); 4505 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 4506 4507 if (hw_dirty) 4508 pmap_clear_modify(dst_page->phys_page); 4509 4510 /* 4511 * Mark original page as cleaning 4512 * in place. 4513 */ 4514 dst_page->cleaning = TRUE; 4515 dst_page->precious = FALSE; 4516 } else { 4517 /* 4518 * use pageclean setup, it is more 4519 * convenient even for the pageout 4520 * cases here 4521 */ 4522 vm_object_lock(upl->map_object); 4523 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); 4524 vm_object_unlock(upl->map_object); 4525 4526 alias_page->absent = FALSE; 4527 alias_page = NULL; 4528 } 4529#if MACH_PAGEMAP 4530 /* 4531 * Record that this page has been 4532 * written out 4533 */ 4534 vm_external_state_set(object->existence_map, dst_page->offset); 4535#endif /*MACH_PAGEMAP*/ 4536 if (dirty) { 4537 SET_PAGE_DIRTY(dst_page, FALSE); 4538 } else { 4539 dst_page->dirty = FALSE; 4540 } 4541 4542 if (!dirty) 4543 dst_page->precious = TRUE; 4544 4545 if ( (cntrl_flags & UPL_ENCRYPT) ) { 4546 /* 4547 * ENCRYPTED SWAP: 4548 * We want to deny access to the target page 4549 * because its contents are about to be 4550 * encrypted and the user would be very 4551 * confused to see encrypted data instead 4552 * of their data. 4553 * We also set "encrypted_cleaning" to allow 4554 * vm_pageout_scan() to demote that page 4555 * from "adjacent/clean-in-place" to 4556 * "target/clean-and-free" if it bumps into 4557 * this page during its scanning while we're 4558 * still processing this cluster. 4559 */ 4560 dst_page->busy = TRUE; 4561 dst_page->encrypted_cleaning = TRUE; 4562 } 4563 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) { 4564 if ( !VM_PAGE_WIRED(dst_page)) 4565 dst_page->pageout = TRUE; 4566 } 4567 } else { 4568 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) { 4569 /* 4570 * Honor copy-on-write obligations 4571 * 4572 * The copy object has changed since we 4573 * last synchronized for copy-on-write. 4574 * Another copy object might have been 4575 * inserted while we released the object's 4576 * lock. Since someone could have seen the 4577 * original contents of the remaining pages 4578 * through that new object, we have to 4579 * synchronize with it again for the remaining 4580 * pages only. The previous pages are "busy" 4581 * so they can not be seen through the new 4582 * mapping. The new mapping will see our 4583 * upcoming changes for those previous pages, 4584 * but that's OK since they couldn't see what 4585 * was there before. It's just a race anyway 4586 * and there's no guarantee of consistency or 4587 * atomicity. We just don't want new mappings 4588 * to see both the *before* and *after* pages. 4589 */ 4590 if (object->copy != VM_OBJECT_NULL) { 4591 vm_object_update( 4592 object, 4593 dst_offset,/* current offset */ 4594 xfer_size, /* remaining size */ 4595 NULL, 4596 NULL, 4597 FALSE, /* should_return */ 4598 MEMORY_OBJECT_COPY_SYNC, 4599 VM_PROT_NO_CHANGE); 4600 4601#if DEVELOPMENT || DEBUG 4602 upl_cow_again++; 4603 upl_cow_again_pages += xfer_size >> PAGE_SHIFT; 4604#endif 4605 } 4606 /* 4607 * remember the copy object we synced with 4608 */ 4609 last_copy_object = object->copy; 4610 } 4611 dst_page = vm_page_lookup(object, dst_offset); 4612 4613 if (dst_page != VM_PAGE_NULL) { 4614 4615 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) { 4616 /* 4617 * skip over pages already present in the cache 4618 */ 4619 if (user_page_list) 4620 user_page_list[entry].phys_addr = 0; 4621 4622 goto try_next_page; 4623 } 4624 if (dst_page->fictitious) { 4625 panic("need corner case for fictitious page"); 4626 } 4627 4628 if (dst_page->busy || dst_page->cleaning) { 4629 /* 4630 * someone else is playing with the 4631 * page. We will have to wait. 4632 */ 4633 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 4634 4635 continue; 4636 } 4637 if (dst_page->laundry) { 4638 dst_page->pageout = FALSE; 4639 4640 vm_pageout_steal_laundry(dst_page, FALSE); 4641 } 4642 } else { 4643 if (object->private) { 4644 /* 4645 * This is a nasty wrinkle for users 4646 * of upl who encounter device or 4647 * private memory however, it is 4648 * unavoidable, only a fault can 4649 * resolve the actual backing 4650 * physical page by asking the 4651 * backing device. 4652 */ 4653 if (user_page_list) 4654 user_page_list[entry].phys_addr = 0; 4655 4656 goto try_next_page; 4657 } 4658 if (object->scan_collisions) { 4659 /* 4660 * the pageout_scan thread is trying to steal 4661 * pages from this object, but has run into our 4662 * lock... grab 2 pages from the head of the object... 4663 * the first is freed on behalf of pageout_scan, the 4664 * 2nd is for our own use... we use vm_object_page_grab 4665 * in both cases to avoid taking pages from the free 4666 * list since we are under memory pressure and our 4667 * lock on this object is getting in the way of 4668 * relieving it 4669 */ 4670 dst_page = vm_object_page_grab(object); 4671 4672 if (dst_page != VM_PAGE_NULL) 4673 vm_page_release(dst_page); 4674 4675 dst_page = vm_object_page_grab(object); 4676 } 4677 if (dst_page == VM_PAGE_NULL) { 4678 /* 4679 * need to allocate a page 4680 */ 4681 dst_page = vm_page_grab(); 4682 } 4683 if (dst_page == VM_PAGE_NULL) { 4684 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) { 4685 /* 4686 * we don't want to stall waiting for pages to come onto the free list 4687 * while we're already holding absent pages in this UPL 4688 * the caller will deal with the empty slots 4689 */ 4690 if (user_page_list) 4691 user_page_list[entry].phys_addr = 0; 4692 4693 goto try_next_page; 4694 } 4695 /* 4696 * no pages available... wait 4697 * then try again for the same 4698 * offset... 4699 */ 4700 vm_object_unlock(object); 4701 4702 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); 4703 4704 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); 4705 4706 VM_PAGE_WAIT(); 4707 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 4708 4709 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); 4710 4711 vm_object_lock(object); 4712 4713 continue; 4714 } 4715 vm_page_insert(dst_page, object, dst_offset); 4716 4717 dst_page->absent = TRUE; 4718 dst_page->busy = FALSE; 4719 4720 if (cntrl_flags & UPL_RET_ONLY_ABSENT) { 4721 /* 4722 * if UPL_RET_ONLY_ABSENT was specified, 4723 * than we're definitely setting up a 4724 * upl for a clustered read/pagein 4725 * operation... mark the pages as clustered 4726 * so upl_commit_range can put them on the 4727 * speculative list 4728 */ 4729 dst_page->clustered = TRUE; 4730 } 4731 } 4732 /* 4733 * ENCRYPTED SWAP: 4734 */ 4735 if (cntrl_flags & UPL_ENCRYPT) { 4736 /* 4737 * The page is going to be encrypted when we 4738 * get it from the pager, so mark it so. 4739 */ 4740 dst_page->encrypted = TRUE; 4741 } else { 4742 /* 4743 * Otherwise, the page will not contain 4744 * encrypted data. 4745 */ 4746 dst_page->encrypted = FALSE; 4747 } 4748 dst_page->overwriting = TRUE; 4749 4750 if (dst_page->pmapped) { 4751 if ( !(cntrl_flags & UPL_FILE_IO)) 4752 /* 4753 * eliminate all mappings from the 4754 * original object and its prodigy 4755 */ 4756 refmod_state = pmap_disconnect(dst_page->phys_page); 4757 else 4758 refmod_state = pmap_get_refmod(dst_page->phys_page); 4759 } else 4760 refmod_state = 0; 4761 4762 hw_dirty = refmod_state & VM_MEM_MODIFIED; 4763 dirty = hw_dirty ? TRUE : dst_page->dirty; 4764 4765 if (cntrl_flags & UPL_SET_LITE) { 4766 unsigned int pg_num; 4767 4768 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); 4769 assert(pg_num == (dst_offset-offset)/PAGE_SIZE); 4770 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 4771 4772 if (hw_dirty) 4773 pmap_clear_modify(dst_page->phys_page); 4774 4775 /* 4776 * Mark original page as cleaning 4777 * in place. 4778 */ 4779 dst_page->cleaning = TRUE; 4780 dst_page->precious = FALSE; 4781 } else { 4782 /* 4783 * use pageclean setup, it is more 4784 * convenient even for the pageout 4785 * cases here 4786 */ 4787 vm_object_lock(upl->map_object); 4788 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); 4789 vm_object_unlock(upl->map_object); 4790 4791 alias_page->absent = FALSE; 4792 alias_page = NULL; 4793 } 4794 4795 if (cntrl_flags & UPL_REQUEST_SET_DIRTY) { 4796 upl->flags &= ~UPL_CLEAR_DIRTY; 4797 upl->flags |= UPL_SET_DIRTY; 4798 dirty = TRUE; 4799 upl->flags |= UPL_SET_DIRTY; 4800 } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) { 4801 /* 4802 * clean in place for read implies 4803 * that a write will be done on all 4804 * the pages that are dirty before 4805 * a upl commit is done. The caller 4806 * is obligated to preserve the 4807 * contents of all pages marked dirty 4808 */ 4809 upl->flags |= UPL_CLEAR_DIRTY; 4810 } 4811 dst_page->dirty = dirty; 4812 4813 if (!dirty) 4814 dst_page->precious = TRUE; 4815 4816 if ( !VM_PAGE_WIRED(dst_page)) { 4817 /* 4818 * deny access to the target page while 4819 * it is being worked on 4820 */ 4821 dst_page->busy = TRUE; 4822 } else 4823 dwp->dw_mask |= DW_vm_page_wire; 4824 4825 /* 4826 * We might be about to satisfy a fault which has been 4827 * requested. So no need for the "restart" bit. 4828 */ 4829 dst_page->restart = FALSE; 4830 if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) { 4831 /* 4832 * expect the page to be used 4833 */ 4834 dwp->dw_mask |= DW_set_reference; 4835 } 4836 if (cntrl_flags & UPL_PRECIOUS) { 4837 if (dst_page->object->internal) { 4838 SET_PAGE_DIRTY(dst_page, FALSE); 4839 dst_page->precious = FALSE; 4840 } else { 4841 dst_page->precious = TRUE; 4842 } 4843 } else { 4844 dst_page->precious = FALSE; 4845 } 4846 } 4847 if (dst_page->busy) 4848 upl->flags |= UPL_HAS_BUSY; 4849 4850 if (dst_page->phys_page > upl->highest_page) 4851 upl->highest_page = dst_page->phys_page; 4852 if (user_page_list) { 4853 user_page_list[entry].phys_addr = dst_page->phys_page; 4854 user_page_list[entry].pageout = dst_page->pageout; 4855 user_page_list[entry].absent = dst_page->absent; 4856 user_page_list[entry].dirty = dst_page->dirty; 4857 user_page_list[entry].precious = dst_page->precious; 4858 user_page_list[entry].device = FALSE; 4859 user_page_list[entry].needed = FALSE; 4860 if (dst_page->clustered == TRUE) 4861 user_page_list[entry].speculative = dst_page->speculative; 4862 else 4863 user_page_list[entry].speculative = FALSE; 4864 user_page_list[entry].cs_validated = dst_page->cs_validated; 4865 user_page_list[entry].cs_tainted = dst_page->cs_tainted; 4866 } 4867 /* 4868 * if UPL_RET_ONLY_ABSENT is set, then 4869 * we are working with a fresh page and we've 4870 * just set the clustered flag on it to 4871 * indicate that it was drug in as part of a 4872 * speculative cluster... so leave it alone 4873 */ 4874 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) { 4875 /* 4876 * someone is explicitly grabbing this page... 4877 * update clustered and speculative state 4878 * 4879 */ 4880 VM_PAGE_CONSUME_CLUSTERED(dst_page); 4881 } 4882try_next_page: 4883 if (dwp->dw_mask) { 4884 if (dwp->dw_mask & DW_vm_page_activate) 4885 VM_STAT_INCR(reactivations); 4886 4887 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count); 4888 4889 if (dw_count >= dw_limit) { 4890 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 4891 4892 dwp = &dw_array[0]; 4893 dw_count = 0; 4894 } 4895 } 4896 entry++; 4897 dst_offset += PAGE_SIZE_64; 4898 xfer_size -= PAGE_SIZE; 4899 } 4900 if (dw_count) 4901 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 4902 4903 if (alias_page != NULL) { 4904 VM_PAGE_FREE(alias_page); 4905 } 4906 4907 if (page_list_count != NULL) { 4908 if (upl->flags & UPL_INTERNAL) 4909 *page_list_count = 0; 4910 else if (*page_list_count > entry) 4911 *page_list_count = entry; 4912 } 4913#if UPL_DEBUG 4914 upl->upl_state = 1; 4915#endif 4916 vm_object_unlock(object); 4917 4918 return KERN_SUCCESS; 4919} 4920 4921/* JMM - Backward compatability for now */ 4922kern_return_t 4923vm_fault_list_request( /* forward */ 4924 memory_object_control_t control, 4925 vm_object_offset_t offset, 4926 upl_size_t size, 4927 upl_t *upl_ptr, 4928 upl_page_info_t **user_page_list_ptr, 4929 unsigned int page_list_count, 4930 int cntrl_flags); 4931kern_return_t 4932vm_fault_list_request( 4933 memory_object_control_t control, 4934 vm_object_offset_t offset, 4935 upl_size_t size, 4936 upl_t *upl_ptr, 4937 upl_page_info_t **user_page_list_ptr, 4938 unsigned int page_list_count, 4939 int cntrl_flags) 4940{ 4941 unsigned int local_list_count; 4942 upl_page_info_t *user_page_list; 4943 kern_return_t kr; 4944 4945 if((cntrl_flags & UPL_VECTOR)==UPL_VECTOR) 4946 return KERN_INVALID_ARGUMENT; 4947 4948 if (user_page_list_ptr != NULL) { 4949 local_list_count = page_list_count; 4950 user_page_list = *user_page_list_ptr; 4951 } else { 4952 local_list_count = 0; 4953 user_page_list = NULL; 4954 } 4955 kr = memory_object_upl_request(control, 4956 offset, 4957 size, 4958 upl_ptr, 4959 user_page_list, 4960 &local_list_count, 4961 cntrl_flags); 4962 4963 if(kr != KERN_SUCCESS) 4964 return kr; 4965 4966 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) { 4967 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr); 4968 } 4969 4970 return KERN_SUCCESS; 4971} 4972 4973 4974 4975/* 4976 * Routine: vm_object_super_upl_request 4977 * Purpose: 4978 * Cause the population of a portion of a vm_object 4979 * in much the same way as memory_object_upl_request. 4980 * Depending on the nature of the request, the pages 4981 * returned may be contain valid data or be uninitialized. 4982 * However, the region may be expanded up to the super 4983 * cluster size provided. 4984 */ 4985 4986__private_extern__ kern_return_t 4987vm_object_super_upl_request( 4988 vm_object_t object, 4989 vm_object_offset_t offset, 4990 upl_size_t size, 4991 upl_size_t super_cluster, 4992 upl_t *upl, 4993 upl_page_info_t *user_page_list, 4994 unsigned int *page_list_count, 4995 int cntrl_flags) 4996{ 4997 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR)) 4998 return KERN_FAILURE; 4999 5000 assert(object->paging_in_progress); 5001 offset = offset - object->paging_offset; 5002 5003 if (super_cluster > size) { 5004 5005 vm_object_offset_t base_offset; 5006 upl_size_t super_size; 5007 vm_object_size_t super_size_64; 5008 5009 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1)); 5010 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster; 5011 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size; 5012 super_size = (upl_size_t) super_size_64; 5013 assert(super_size == super_size_64); 5014 5015 if (offset > (base_offset + super_size)) { 5016 panic("vm_object_super_upl_request: Missed target pageout" 5017 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n", 5018 offset, base_offset, super_size, super_cluster, 5019 size, object->paging_offset); 5020 } 5021 /* 5022 * apparently there is a case where the vm requests a 5023 * page to be written out who's offset is beyond the 5024 * object size 5025 */ 5026 if ((offset + size) > (base_offset + super_size)) { 5027 super_size_64 = (offset + size) - base_offset; 5028 super_size = (upl_size_t) super_size_64; 5029 assert(super_size == super_size_64); 5030 } 5031 5032 offset = base_offset; 5033 size = super_size; 5034 } 5035 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags); 5036} 5037 5038 5039kern_return_t 5040vm_map_create_upl( 5041 vm_map_t map, 5042 vm_map_address_t offset, 5043 upl_size_t *upl_size, 5044 upl_t *upl, 5045 upl_page_info_array_t page_list, 5046 unsigned int *count, 5047 int *flags) 5048{ 5049 vm_map_entry_t entry; 5050 int caller_flags; 5051 int force_data_sync; 5052 int sync_cow_data; 5053 vm_object_t local_object; 5054 vm_map_offset_t local_offset; 5055 vm_map_offset_t local_start; 5056 kern_return_t ret; 5057 5058 caller_flags = *flags; 5059 5060 if (caller_flags & ~UPL_VALID_FLAGS) { 5061 /* 5062 * For forward compatibility's sake, 5063 * reject any unknown flag. 5064 */ 5065 return KERN_INVALID_VALUE; 5066 } 5067 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC); 5068 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM); 5069 5070 if (upl == NULL) 5071 return KERN_INVALID_ARGUMENT; 5072 5073REDISCOVER_ENTRY: 5074 vm_map_lock_read(map); 5075 5076 if (vm_map_lookup_entry(map, offset, &entry)) { 5077 5078 if ((entry->vme_end - offset) < *upl_size) { 5079 *upl_size = (upl_size_t) (entry->vme_end - offset); 5080 assert(*upl_size == entry->vme_end - offset); 5081 } 5082 5083 if (caller_flags & UPL_QUERY_OBJECT_TYPE) { 5084 *flags = 0; 5085 5086 if ( !entry->is_sub_map && entry->object.vm_object != VM_OBJECT_NULL) { 5087 if (entry->object.vm_object->private) 5088 *flags = UPL_DEV_MEMORY; 5089 5090 if (entry->object.vm_object->phys_contiguous) 5091 *flags |= UPL_PHYS_CONTIG; 5092 } 5093 vm_map_unlock_read(map); 5094 5095 return KERN_SUCCESS; 5096 } 5097 5098 if (entry->is_sub_map) { 5099 vm_map_t submap; 5100 5101 submap = entry->object.sub_map; 5102 local_start = entry->vme_start; 5103 local_offset = entry->offset; 5104 5105 vm_map_reference(submap); 5106 vm_map_unlock_read(map); 5107 5108 ret = vm_map_create_upl(submap, 5109 local_offset + (offset - local_start), 5110 upl_size, upl, page_list, count, flags); 5111 vm_map_deallocate(submap); 5112 5113 return ret; 5114 } 5115 5116 if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) { 5117 if ((*upl_size/PAGE_SIZE) > MAX_UPL_SIZE) 5118 *upl_size = MAX_UPL_SIZE * PAGE_SIZE; 5119 } 5120 /* 5121 * Create an object if necessary. 5122 */ 5123 if (entry->object.vm_object == VM_OBJECT_NULL) { 5124 5125 if (vm_map_lock_read_to_write(map)) 5126 goto REDISCOVER_ENTRY; 5127 5128 entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start)); 5129 entry->offset = 0; 5130 5131 vm_map_lock_write_to_read(map); 5132 } 5133 if (!(caller_flags & UPL_COPYOUT_FROM)) { 5134 if (!(entry->protection & VM_PROT_WRITE)) { 5135 vm_map_unlock_read(map); 5136 return KERN_PROTECTION_FAILURE; 5137 } 5138 5139 local_object = entry->object.vm_object; 5140 if (vm_map_entry_should_cow_for_true_share(entry) && 5141 local_object->vo_size > *upl_size && 5142 *upl_size != 0) { 5143 vm_prot_t prot; 5144 5145 /* 5146 * Set up the targeted range for copy-on-write to avoid 5147 * applying true_share/copy_delay to the entire object. 5148 */ 5149 5150 if (vm_map_lock_read_to_write(map)) { 5151 goto REDISCOVER_ENTRY; 5152 } 5153 5154 vm_map_clip_start(map, 5155 entry, 5156 vm_map_trunc_page(offset, 5157 VM_MAP_PAGE_MASK(map))); 5158 vm_map_clip_end(map, 5159 entry, 5160 vm_map_round_page(offset + *upl_size, 5161 VM_MAP_PAGE_MASK(map))); 5162 prot = entry->protection & ~VM_PROT_WRITE; 5163 if (override_nx(map, entry->alias) && prot) 5164 prot |= VM_PROT_EXECUTE; 5165 vm_object_pmap_protect(local_object, 5166 entry->offset, 5167 entry->vme_end - entry->vme_start, 5168 ((entry->is_shared || map->mapped_in_other_pmaps) 5169 ? PMAP_NULL 5170 : map->pmap), 5171 entry->vme_start, 5172 prot); 5173 entry->needs_copy = TRUE; 5174 5175 vm_map_lock_write_to_read(map); 5176 } 5177 5178 if (entry->needs_copy) { 5179 /* 5180 * Honor copy-on-write for COPY_SYMMETRIC 5181 * strategy. 5182 */ 5183 vm_map_t local_map; 5184 vm_object_t object; 5185 vm_object_offset_t new_offset; 5186 vm_prot_t prot; 5187 boolean_t wired; 5188 vm_map_version_t version; 5189 vm_map_t real_map; 5190 5191 local_map = map; 5192 5193 if (vm_map_lookup_locked(&local_map, 5194 offset, VM_PROT_WRITE, 5195 OBJECT_LOCK_EXCLUSIVE, 5196 &version, &object, 5197 &new_offset, &prot, &wired, 5198 NULL, 5199 &real_map) != KERN_SUCCESS) { 5200 vm_map_unlock_read(local_map); 5201 return KERN_FAILURE; 5202 } 5203 if (real_map != map) 5204 vm_map_unlock(real_map); 5205 vm_map_unlock_read(local_map); 5206 5207 vm_object_unlock(object); 5208 5209 goto REDISCOVER_ENTRY; 5210 } 5211 } 5212 if (sync_cow_data) { 5213 if (entry->object.vm_object->shadow || entry->object.vm_object->copy) { 5214 local_object = entry->object.vm_object; 5215 local_start = entry->vme_start; 5216 local_offset = entry->offset; 5217 5218 vm_object_reference(local_object); 5219 vm_map_unlock_read(map); 5220 5221 if (local_object->shadow && local_object->copy) { 5222 vm_object_lock_request( 5223 local_object->shadow, 5224 (vm_object_offset_t) 5225 ((offset - local_start) + 5226 local_offset) + 5227 local_object->vo_shadow_offset, 5228 *upl_size, FALSE, 5229 MEMORY_OBJECT_DATA_SYNC, 5230 VM_PROT_NO_CHANGE); 5231 } 5232 sync_cow_data = FALSE; 5233 vm_object_deallocate(local_object); 5234 5235 goto REDISCOVER_ENTRY; 5236 } 5237 } 5238 if (force_data_sync) { 5239 local_object = entry->object.vm_object; 5240 local_start = entry->vme_start; 5241 local_offset = entry->offset; 5242 5243 vm_object_reference(local_object); 5244 vm_map_unlock_read(map); 5245 5246 vm_object_lock_request( 5247 local_object, 5248 (vm_object_offset_t) 5249 ((offset - local_start) + local_offset), 5250 (vm_object_size_t)*upl_size, FALSE, 5251 MEMORY_OBJECT_DATA_SYNC, 5252 VM_PROT_NO_CHANGE); 5253 5254 force_data_sync = FALSE; 5255 vm_object_deallocate(local_object); 5256 5257 goto REDISCOVER_ENTRY; 5258 } 5259 if (entry->object.vm_object->private) 5260 *flags = UPL_DEV_MEMORY; 5261 else 5262 *flags = 0; 5263 5264 if (entry->object.vm_object->phys_contiguous) 5265 *flags |= UPL_PHYS_CONTIG; 5266 5267 local_object = entry->object.vm_object; 5268 local_offset = entry->offset; 5269 local_start = entry->vme_start; 5270 5271 vm_object_reference(local_object); 5272 vm_map_unlock_read(map); 5273 5274 ret = vm_object_iopl_request(local_object, 5275 (vm_object_offset_t) ((offset - local_start) + local_offset), 5276 *upl_size, 5277 upl, 5278 page_list, 5279 count, 5280 caller_flags); 5281 vm_object_deallocate(local_object); 5282 5283 return(ret); 5284 } 5285 vm_map_unlock_read(map); 5286 5287 return(KERN_FAILURE); 5288} 5289 5290/* 5291 * Internal routine to enter a UPL into a VM map. 5292 * 5293 * JMM - This should just be doable through the standard 5294 * vm_map_enter() API. 5295 */ 5296kern_return_t 5297vm_map_enter_upl( 5298 vm_map_t map, 5299 upl_t upl, 5300 vm_map_offset_t *dst_addr) 5301{ 5302 vm_map_size_t size; 5303 vm_object_offset_t offset; 5304 vm_map_offset_t addr; 5305 vm_page_t m; 5306 kern_return_t kr; 5307 int isVectorUPL = 0, curr_upl=0; 5308 upl_t vector_upl = NULL; 5309 vm_offset_t vector_upl_dst_addr = 0; 5310 vm_map_t vector_upl_submap = NULL; 5311 upl_offset_t subupl_offset = 0; 5312 upl_size_t subupl_size = 0; 5313 5314 if (upl == UPL_NULL) 5315 return KERN_INVALID_ARGUMENT; 5316 5317 if((isVectorUPL = vector_upl_is_valid(upl))) { 5318 int mapped=0,valid_upls=0; 5319 vector_upl = upl; 5320 5321 upl_lock(vector_upl); 5322 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) { 5323 upl = vector_upl_subupl_byindex(vector_upl, curr_upl ); 5324 if(upl == NULL) 5325 continue; 5326 valid_upls++; 5327 if (UPL_PAGE_LIST_MAPPED & upl->flags) 5328 mapped++; 5329 } 5330 5331 if(mapped) { 5332 if(mapped != valid_upls) 5333 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls); 5334 else { 5335 upl_unlock(vector_upl); 5336 return KERN_FAILURE; 5337 } 5338 } 5339 5340 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap); 5341 if( kr != KERN_SUCCESS ) 5342 panic("Vector UPL submap allocation failed\n"); 5343 map = vector_upl_submap; 5344 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr); 5345 curr_upl=0; 5346 } 5347 else 5348 upl_lock(upl); 5349 5350process_upl_to_enter: 5351 if(isVectorUPL){ 5352 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) { 5353 *dst_addr = vector_upl_dst_addr; 5354 upl_unlock(vector_upl); 5355 return KERN_SUCCESS; 5356 } 5357 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ ); 5358 if(upl == NULL) 5359 goto process_upl_to_enter; 5360 5361 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size); 5362 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset); 5363 } else { 5364 /* 5365 * check to see if already mapped 5366 */ 5367 if (UPL_PAGE_LIST_MAPPED & upl->flags) { 5368 upl_unlock(upl); 5369 return KERN_FAILURE; 5370 } 5371 } 5372 if ((!(upl->flags & UPL_SHADOWED)) && 5373 ((upl->flags & UPL_HAS_BUSY) || 5374 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) { 5375 5376 vm_object_t object; 5377 vm_page_t alias_page; 5378 vm_object_offset_t new_offset; 5379 unsigned int pg_num; 5380 wpl_array_t lite_list; 5381 5382 if (upl->flags & UPL_INTERNAL) { 5383 lite_list = (wpl_array_t) 5384 ((((uintptr_t)upl) + sizeof(struct upl)) 5385 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 5386 } else { 5387 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl)); 5388 } 5389 object = upl->map_object; 5390 upl->map_object = vm_object_allocate(upl->size); 5391 5392 vm_object_lock(upl->map_object); 5393 5394 upl->map_object->shadow = object; 5395 upl->map_object->pageout = TRUE; 5396 upl->map_object->can_persist = FALSE; 5397 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 5398 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset; 5399 upl->map_object->wimg_bits = object->wimg_bits; 5400 offset = upl->map_object->vo_shadow_offset; 5401 new_offset = 0; 5402 size = upl->size; 5403 5404 upl->flags |= UPL_SHADOWED; 5405 5406 while (size) { 5407 pg_num = (unsigned int) (new_offset / PAGE_SIZE); 5408 assert(pg_num == new_offset / PAGE_SIZE); 5409 5410 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 5411 5412 VM_PAGE_GRAB_FICTITIOUS(alias_page); 5413 5414 vm_object_lock(object); 5415 5416 m = vm_page_lookup(object, offset); 5417 if (m == VM_PAGE_NULL) { 5418 panic("vm_upl_map: page missing\n"); 5419 } 5420 5421 /* 5422 * Convert the fictitious page to a private 5423 * shadow of the real page. 5424 */ 5425 assert(alias_page->fictitious); 5426 alias_page->fictitious = FALSE; 5427 alias_page->private = TRUE; 5428 alias_page->pageout = TRUE; 5429 /* 5430 * since m is a page in the upl it must 5431 * already be wired or BUSY, so it's 5432 * safe to assign the underlying physical 5433 * page to the alias 5434 */ 5435 alias_page->phys_page = m->phys_page; 5436 5437 vm_object_unlock(object); 5438 5439 vm_page_lockspin_queues(); 5440 vm_page_wire(alias_page); 5441 vm_page_unlock_queues(); 5442 5443 /* 5444 * ENCRYPTED SWAP: 5445 * The virtual page ("m") has to be wired in some way 5446 * here or its physical page ("m->phys_page") could 5447 * be recycled at any time. 5448 * Assuming this is enforced by the caller, we can't 5449 * get an encrypted page here. Since the encryption 5450 * key depends on the VM page's "pager" object and 5451 * the "paging_offset", we couldn't handle 2 pageable 5452 * VM pages (with different pagers and paging_offsets) 5453 * sharing the same physical page: we could end up 5454 * encrypting with one key (via one VM page) and 5455 * decrypting with another key (via the alias VM page). 5456 */ 5457 ASSERT_PAGE_DECRYPTED(m); 5458 5459 vm_page_insert(alias_page, upl->map_object, new_offset); 5460 5461 assert(!alias_page->wanted); 5462 alias_page->busy = FALSE; 5463 alias_page->absent = FALSE; 5464 } 5465 size -= PAGE_SIZE; 5466 offset += PAGE_SIZE_64; 5467 new_offset += PAGE_SIZE_64; 5468 } 5469 vm_object_unlock(upl->map_object); 5470 } 5471 if (upl->flags & UPL_SHADOWED) 5472 offset = 0; 5473 else 5474 offset = upl->offset - upl->map_object->paging_offset; 5475 5476 size = upl->size; 5477 5478 vm_object_reference(upl->map_object); 5479 5480 if(!isVectorUPL) { 5481 *dst_addr = 0; 5482 /* 5483 * NEED A UPL_MAP ALIAS 5484 */ 5485 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, 5486 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE, 5487 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 5488 5489 if (kr != KERN_SUCCESS) { 5490 upl_unlock(upl); 5491 return(kr); 5492 } 5493 } 5494 else { 5495 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, 5496 VM_FLAGS_FIXED, upl->map_object, offset, FALSE, 5497 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 5498 if(kr) 5499 panic("vm_map_enter failed for a Vector UPL\n"); 5500 } 5501 vm_object_lock(upl->map_object); 5502 5503 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) { 5504 m = vm_page_lookup(upl->map_object, offset); 5505 5506 if (m) { 5507 m->pmapped = TRUE; 5508 5509 /* CODE SIGNING ENFORCEMENT: page has been wpmapped, 5510 * but only in kernel space. If this was on a user map, 5511 * we'd have to set the wpmapped bit. */ 5512 /* m->wpmapped = TRUE; */ 5513 assert(map==kernel_map); 5514 5515 PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, VM_PROT_NONE, 0, TRUE); 5516 } 5517 offset += PAGE_SIZE_64; 5518 } 5519 vm_object_unlock(upl->map_object); 5520 5521 /* 5522 * hold a reference for the mapping 5523 */ 5524 upl->ref_count++; 5525 upl->flags |= UPL_PAGE_LIST_MAPPED; 5526 upl->kaddr = (vm_offset_t) *dst_addr; 5527 assert(upl->kaddr == *dst_addr); 5528 5529 if(isVectorUPL) 5530 goto process_upl_to_enter; 5531 5532 upl_unlock(upl); 5533 5534 return KERN_SUCCESS; 5535} 5536 5537/* 5538 * Internal routine to remove a UPL mapping from a VM map. 5539 * 5540 * XXX - This should just be doable through a standard 5541 * vm_map_remove() operation. Otherwise, implicit clean-up 5542 * of the target map won't be able to correctly remove 5543 * these (and release the reference on the UPL). Having 5544 * to do this means we can't map these into user-space 5545 * maps yet. 5546 */ 5547kern_return_t 5548vm_map_remove_upl( 5549 vm_map_t map, 5550 upl_t upl) 5551{ 5552 vm_address_t addr; 5553 upl_size_t size; 5554 int isVectorUPL = 0, curr_upl = 0; 5555 upl_t vector_upl = NULL; 5556 5557 if (upl == UPL_NULL) 5558 return KERN_INVALID_ARGUMENT; 5559 5560 if((isVectorUPL = vector_upl_is_valid(upl))) { 5561 int unmapped=0, valid_upls=0; 5562 vector_upl = upl; 5563 upl_lock(vector_upl); 5564 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) { 5565 upl = vector_upl_subupl_byindex(vector_upl, curr_upl ); 5566 if(upl == NULL) 5567 continue; 5568 valid_upls++; 5569 if (!(UPL_PAGE_LIST_MAPPED & upl->flags)) 5570 unmapped++; 5571 } 5572 5573 if(unmapped) { 5574 if(unmapped != valid_upls) 5575 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls); 5576 else { 5577 upl_unlock(vector_upl); 5578 return KERN_FAILURE; 5579 } 5580 } 5581 curr_upl=0; 5582 } 5583 else 5584 upl_lock(upl); 5585 5586process_upl_to_remove: 5587 if(isVectorUPL) { 5588 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) { 5589 vm_map_t v_upl_submap; 5590 vm_offset_t v_upl_submap_dst_addr; 5591 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr); 5592 5593 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS); 5594 vm_map_deallocate(v_upl_submap); 5595 upl_unlock(vector_upl); 5596 return KERN_SUCCESS; 5597 } 5598 5599 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ ); 5600 if(upl == NULL) 5601 goto process_upl_to_remove; 5602 } 5603 5604 if (upl->flags & UPL_PAGE_LIST_MAPPED) { 5605 addr = upl->kaddr; 5606 size = upl->size; 5607 5608 assert(upl->ref_count > 1); 5609 upl->ref_count--; /* removing mapping ref */ 5610 5611 upl->flags &= ~UPL_PAGE_LIST_MAPPED; 5612 upl->kaddr = (vm_offset_t) 0; 5613 5614 if(!isVectorUPL) { 5615 upl_unlock(upl); 5616 5617 vm_map_remove( 5618 map, 5619 vm_map_trunc_page(addr, 5620 VM_MAP_PAGE_MASK(map)), 5621 vm_map_round_page(addr + size, 5622 VM_MAP_PAGE_MASK(map)), 5623 VM_MAP_NO_FLAGS); 5624 5625 return KERN_SUCCESS; 5626 } 5627 else { 5628 /* 5629 * If it's a Vectored UPL, we'll be removing the entire 5630 * submap anyways, so no need to remove individual UPL 5631 * element mappings from within the submap 5632 */ 5633 goto process_upl_to_remove; 5634 } 5635 } 5636 upl_unlock(upl); 5637 5638 return KERN_FAILURE; 5639} 5640 5641extern int panic_on_cs_killed; 5642kern_return_t 5643upl_commit_range( 5644 upl_t upl, 5645 upl_offset_t offset, 5646 upl_size_t size, 5647 int flags, 5648 upl_page_info_t *page_list, 5649 mach_msg_type_number_t count, 5650 boolean_t *empty) 5651{ 5652 upl_size_t xfer_size, subupl_size = size; 5653 vm_object_t shadow_object; 5654 vm_object_t object; 5655 vm_object_offset_t target_offset; 5656 upl_offset_t subupl_offset = offset; 5657 int entry; 5658 wpl_array_t lite_list; 5659 int occupied; 5660 int clear_refmod = 0; 5661 int pgpgout_count = 0; 5662 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 5663 struct vm_page_delayed_work *dwp; 5664 int dw_count; 5665 int dw_limit; 5666 int isVectorUPL = 0; 5667 upl_t vector_upl = NULL; 5668 boolean_t should_be_throttled = FALSE; 5669 5670 *empty = FALSE; 5671 5672 if (upl == UPL_NULL) 5673 return KERN_INVALID_ARGUMENT; 5674 5675 if (count == 0) 5676 page_list = NULL; 5677 5678 if((isVectorUPL = vector_upl_is_valid(upl))) { 5679 vector_upl = upl; 5680 upl_lock(vector_upl); 5681 } 5682 else 5683 upl_lock(upl); 5684 5685process_upl_to_commit: 5686 5687 if(isVectorUPL) { 5688 size = subupl_size; 5689 offset = subupl_offset; 5690 if(size == 0) { 5691 upl_unlock(vector_upl); 5692 return KERN_SUCCESS; 5693 } 5694 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size); 5695 if(upl == NULL) { 5696 upl_unlock(vector_upl); 5697 return KERN_FAILURE; 5698 } 5699 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl); 5700 subupl_size -= size; 5701 subupl_offset += size; 5702 } 5703 5704#if UPL_DEBUG 5705 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) { 5706 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES); 5707 5708 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset; 5709 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size); 5710 5711 upl->upl_commit_index++; 5712 } 5713#endif 5714 if (upl->flags & UPL_DEVICE_MEMORY) 5715 xfer_size = 0; 5716 else if ((offset + size) <= upl->size) 5717 xfer_size = size; 5718 else { 5719 if(!isVectorUPL) 5720 upl_unlock(upl); 5721 else { 5722 upl_unlock(vector_upl); 5723 } 5724 return KERN_FAILURE; 5725 } 5726 if (upl->flags & UPL_SET_DIRTY) 5727 flags |= UPL_COMMIT_SET_DIRTY; 5728 if (upl->flags & UPL_CLEAR_DIRTY) 5729 flags |= UPL_COMMIT_CLEAR_DIRTY; 5730 5731 if (upl->flags & UPL_INTERNAL) 5732 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl)) 5733 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 5734 else 5735 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 5736 5737 object = upl->map_object; 5738 5739 if (upl->flags & UPL_SHADOWED) { 5740 vm_object_lock(object); 5741 shadow_object = object->shadow; 5742 } else { 5743 shadow_object = object; 5744 } 5745 entry = offset/PAGE_SIZE; 5746 target_offset = (vm_object_offset_t)offset; 5747 5748 if (upl->flags & UPL_KERNEL_OBJECT) 5749 vm_object_lock_shared(shadow_object); 5750 else 5751 vm_object_lock(shadow_object); 5752 5753 if (upl->flags & UPL_ACCESS_BLOCKED) { 5754 assert(shadow_object->blocked_access); 5755 shadow_object->blocked_access = FALSE; 5756 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED); 5757 } 5758 5759 if (shadow_object->code_signed) { 5760 /* 5761 * CODE SIGNING: 5762 * If the object is code-signed, do not let this UPL tell 5763 * us if the pages are valid or not. Let the pages be 5764 * validated by VM the normal way (when they get mapped or 5765 * copied). 5766 */ 5767 flags &= ~UPL_COMMIT_CS_VALIDATED; 5768 } 5769 if (! page_list) { 5770 /* 5771 * No page list to get the code-signing info from !? 5772 */ 5773 flags &= ~UPL_COMMIT_CS_VALIDATED; 5774 } 5775 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal) 5776 should_be_throttled = TRUE; 5777 5778 dwp = &dw_array[0]; 5779 dw_count = 0; 5780 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 5781 5782 while (xfer_size) { 5783 vm_page_t t, m; 5784 5785 dwp->dw_mask = 0; 5786 clear_refmod = 0; 5787 5788 m = VM_PAGE_NULL; 5789 5790 if (upl->flags & UPL_LITE) { 5791 unsigned int pg_num; 5792 5793 pg_num = (unsigned int) (target_offset/PAGE_SIZE); 5794 assert(pg_num == target_offset/PAGE_SIZE); 5795 5796 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 5797 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); 5798 5799 if (!(upl->flags & UPL_KERNEL_OBJECT)) 5800 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset)); 5801 } 5802 } 5803 if (upl->flags & UPL_SHADOWED) { 5804 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) { 5805 5806 t->pageout = FALSE; 5807 5808 VM_PAGE_FREE(t); 5809 5810 if (m == VM_PAGE_NULL) 5811 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset); 5812 } 5813 } 5814 if ((upl->flags & UPL_KERNEL_OBJECT) || m == VM_PAGE_NULL) 5815 goto commit_next_page; 5816 5817 if (m->compressor) { 5818 assert(m->busy); 5819 5820 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 5821 goto commit_next_page; 5822 } 5823 5824 if (flags & UPL_COMMIT_CS_VALIDATED) { 5825 /* 5826 * CODE SIGNING: 5827 * Set the code signing bits according to 5828 * what the UPL says they should be. 5829 */ 5830 m->cs_validated = page_list[entry].cs_validated; 5831 m->cs_tainted = page_list[entry].cs_tainted; 5832 } 5833 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL) 5834 m->written_by_kernel = TRUE; 5835 5836 if (upl->flags & UPL_IO_WIRE) { 5837 5838 if (page_list) 5839 page_list[entry].phys_addr = 0; 5840 5841 if (flags & UPL_COMMIT_SET_DIRTY) { 5842 SET_PAGE_DIRTY(m, FALSE); 5843 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) { 5844 m->dirty = FALSE; 5845 5846 if (! (flags & UPL_COMMIT_CS_VALIDATED) && 5847 m->cs_validated && !m->cs_tainted) { 5848 /* 5849 * CODE SIGNING: 5850 * This page is no longer dirty 5851 * but could have been modified, 5852 * so it will need to be 5853 * re-validated. 5854 */ 5855 if (panic_on_cs_killed && 5856 m->slid) { 5857 panic("upl_commit_range(%p): page %p was slid\n", 5858 upl, m); 5859 } 5860 assert(!m->slid); 5861 m->cs_validated = FALSE; 5862#if DEVELOPMENT || DEBUG 5863 vm_cs_validated_resets++; 5864#endif 5865 pmap_disconnect(m->phys_page); 5866 } 5867 clear_refmod |= VM_MEM_MODIFIED; 5868 } 5869 if (flags & UPL_COMMIT_INACTIVATE) { 5870 dwp->dw_mask |= DW_vm_page_deactivate_internal; 5871 clear_refmod |= VM_MEM_REFERENCED; 5872 } 5873 if (upl->flags & UPL_ACCESS_BLOCKED) { 5874 /* 5875 * We blocked access to the pages in this UPL. 5876 * Clear the "busy" bit and wake up any waiter 5877 * for this page. 5878 */ 5879 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 5880 } 5881 if (m->absent) { 5882 if (flags & UPL_COMMIT_FREE_ABSENT) 5883 dwp->dw_mask |= DW_vm_page_free; 5884 else { 5885 m->absent = FALSE; 5886 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 5887 5888 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal)) 5889 dwp->dw_mask |= DW_vm_page_activate; 5890 } 5891 } else 5892 dwp->dw_mask |= DW_vm_page_unwire; 5893 5894 goto commit_next_page; 5895 } 5896 assert(!m->compressor); 5897 5898 if (page_list) 5899 page_list[entry].phys_addr = 0; 5900 5901 /* 5902 * make sure to clear the hardware 5903 * modify or reference bits before 5904 * releasing the BUSY bit on this page 5905 * otherwise we risk losing a legitimate 5906 * change of state 5907 */ 5908 if (flags & UPL_COMMIT_CLEAR_DIRTY) { 5909 m->dirty = FALSE; 5910 5911 clear_refmod |= VM_MEM_MODIFIED; 5912 } 5913 if (m->laundry) 5914 dwp->dw_mask |= DW_vm_pageout_throttle_up; 5915 5916 if (VM_PAGE_WIRED(m)) 5917 m->pageout = FALSE; 5918 5919 if (! (flags & UPL_COMMIT_CS_VALIDATED) && 5920 m->cs_validated && !m->cs_tainted) { 5921 /* 5922 * CODE SIGNING: 5923 * This page is no longer dirty 5924 * but could have been modified, 5925 * so it will need to be 5926 * re-validated. 5927 */ 5928 if (panic_on_cs_killed && 5929 m->slid) { 5930 panic("upl_commit_range(%p): page %p was slid\n", 5931 upl, m); 5932 } 5933 assert(!m->slid); 5934 m->cs_validated = FALSE; 5935#if DEVELOPMENT || DEBUG 5936 vm_cs_validated_resets++; 5937#endif 5938 pmap_disconnect(m->phys_page); 5939 } 5940 if (m->overwriting) { 5941 /* 5942 * the (COPY_OUT_FROM == FALSE) request_page_list case 5943 */ 5944 if (m->busy) { 5945 m->absent = FALSE; 5946 5947 dwp->dw_mask |= DW_clear_busy; 5948 } else { 5949 /* 5950 * alternate (COPY_OUT_FROM == FALSE) page_list case 5951 * Occurs when the original page was wired 5952 * at the time of the list request 5953 */ 5954 assert(VM_PAGE_WIRED(m)); 5955 5956 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */ 5957 } 5958 m->overwriting = FALSE; 5959 } 5960 if (m->encrypted_cleaning == TRUE) { 5961 m->encrypted_cleaning = FALSE; 5962 5963 dwp->dw_mask |= DW_clear_busy | DW_PAGE_WAKEUP; 5964 } 5965 m->cleaning = FALSE; 5966 5967 if (m->pageout) { 5968 /* 5969 * With the clean queue enabled, UPL_PAGEOUT should 5970 * no longer set the pageout bit. It's pages now go 5971 * to the clean queue. 5972 */ 5973 assert(!(flags & UPL_PAGEOUT)); 5974 5975 m->pageout = FALSE; 5976#if MACH_CLUSTER_STATS 5977 if (m->wanted) vm_pageout_target_collisions++; 5978#endif 5979 if ((flags & UPL_COMMIT_SET_DIRTY) || 5980 (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))) { 5981 /* 5982 * page was re-dirtied after we started 5983 * the pageout... reactivate it since 5984 * we don't know whether the on-disk 5985 * copy matches what is now in memory 5986 */ 5987 SET_PAGE_DIRTY(m, FALSE); 5988 5989 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP; 5990 5991 if (upl->flags & UPL_PAGEOUT) { 5992 CLUSTER_STAT(vm_pageout_target_page_dirtied++;) 5993 VM_STAT_INCR(reactivations); 5994 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL); 5995 } 5996 } else { 5997 /* 5998 * page has been successfully cleaned 5999 * go ahead and free it for other use 6000 */ 6001 if (m->object->internal) { 6002 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL); 6003 } else { 6004 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL); 6005 } 6006 m->dirty = FALSE; 6007 m->busy = TRUE; 6008 6009 dwp->dw_mask |= DW_vm_page_free; 6010 } 6011 goto commit_next_page; 6012 } 6013#if MACH_CLUSTER_STATS 6014 if (m->wpmapped) 6015 m->dirty = pmap_is_modified(m->phys_page); 6016 6017 if (m->dirty) vm_pageout_cluster_dirtied++; 6018 else vm_pageout_cluster_cleaned++; 6019 if (m->wanted) vm_pageout_cluster_collisions++; 6020#endif 6021 /* 6022 * It is a part of the semantic of COPYOUT_FROM 6023 * UPLs that a commit implies cache sync 6024 * between the vm page and the backing store 6025 * this can be used to strip the precious bit 6026 * as well as clean 6027 */ 6028 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS)) 6029 m->precious = FALSE; 6030 6031 if (flags & UPL_COMMIT_SET_DIRTY) { 6032 SET_PAGE_DIRTY(m, FALSE); 6033 } else { 6034 m->dirty = FALSE; 6035 } 6036 6037 /* with the clean queue on, move *all* cleaned pages to the clean queue */ 6038 if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) { 6039 pgpgout_count++; 6040 6041 /* this page used to be dirty; now it's on the clean queue. */ 6042 m->was_dirty = TRUE; 6043 6044 dwp->dw_mask |= DW_enqueue_cleaned; 6045 vm_pageout_enqueued_cleaned_from_inactive_dirty++; 6046 } else if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) { 6047 /* 6048 * page coming back in from being 'frozen'... 6049 * it was dirty before it was frozen, so keep it so 6050 * the vm_page_activate will notice that it really belongs 6051 * on the throttle queue and put it there 6052 */ 6053 SET_PAGE_DIRTY(m, FALSE); 6054 dwp->dw_mask |= DW_vm_page_activate; 6055 6056 } else { 6057 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) { 6058 dwp->dw_mask |= DW_vm_page_deactivate_internal; 6059 clear_refmod |= VM_MEM_REFERENCED; 6060 } else if (!m->active && !m->inactive && !m->speculative) { 6061 6062 if (m->clustered || (flags & UPL_COMMIT_SPECULATE)) 6063 dwp->dw_mask |= DW_vm_page_speculate; 6064 else if (m->reference) 6065 dwp->dw_mask |= DW_vm_page_activate; 6066 else { 6067 dwp->dw_mask |= DW_vm_page_deactivate_internal; 6068 clear_refmod |= VM_MEM_REFERENCED; 6069 } 6070 } 6071 } 6072 if (upl->flags & UPL_ACCESS_BLOCKED) { 6073 /* 6074 * We blocked access to the pages in this URL. 6075 * Clear the "busy" bit on this page before we 6076 * wake up any waiter. 6077 */ 6078 dwp->dw_mask |= DW_clear_busy; 6079 } 6080 6081 /* 6082 * Wakeup any thread waiting for the page to be un-cleaning. 6083 */ 6084 dwp->dw_mask |= DW_PAGE_WAKEUP; 6085 6086commit_next_page: 6087 if (clear_refmod) 6088 pmap_clear_refmod(m->phys_page, clear_refmod); 6089 6090 target_offset += PAGE_SIZE_64; 6091 xfer_size -= PAGE_SIZE; 6092 entry++; 6093 6094 if (dwp->dw_mask) { 6095 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) { 6096 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); 6097 6098 if (dw_count >= dw_limit) { 6099 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 6100 6101 dwp = &dw_array[0]; 6102 dw_count = 0; 6103 } 6104 } else { 6105 if (dwp->dw_mask & DW_clear_busy) 6106 m->busy = FALSE; 6107 6108 if (dwp->dw_mask & DW_PAGE_WAKEUP) 6109 PAGE_WAKEUP(m); 6110 } 6111 } 6112 } 6113 if (dw_count) 6114 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 6115 6116 occupied = 1; 6117 6118 if (upl->flags & UPL_DEVICE_MEMORY) { 6119 occupied = 0; 6120 } else if (upl->flags & UPL_LITE) { 6121 int pg_num; 6122 int i; 6123 6124 pg_num = upl->size/PAGE_SIZE; 6125 pg_num = (pg_num + 31) >> 5; 6126 occupied = 0; 6127 6128 for (i = 0; i < pg_num; i++) { 6129 if (lite_list[i] != 0) { 6130 occupied = 1; 6131 break; 6132 } 6133 } 6134 } else { 6135 if (queue_empty(&upl->map_object->memq)) 6136 occupied = 0; 6137 } 6138 if (occupied == 0) { 6139 /* 6140 * If this UPL element belongs to a Vector UPL and is 6141 * empty, then this is the right function to deallocate 6142 * it. So go ahead set the *empty variable. The flag 6143 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view 6144 * should be considered relevant for the Vector UPL and not 6145 * the internal UPLs. 6146 */ 6147 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL) 6148 *empty = TRUE; 6149 6150 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) { 6151 /* 6152 * this is not a paging object 6153 * so we need to drop the paging reference 6154 * that was taken when we created the UPL 6155 * against this object 6156 */ 6157 vm_object_activity_end(shadow_object); 6158 vm_object_collapse(shadow_object, 0, TRUE); 6159 } else { 6160 /* 6161 * we dontated the paging reference to 6162 * the map object... vm_pageout_object_terminate 6163 * will drop this reference 6164 */ 6165 } 6166 } 6167 vm_object_unlock(shadow_object); 6168 if (object != shadow_object) 6169 vm_object_unlock(object); 6170 6171 if(!isVectorUPL) 6172 upl_unlock(upl); 6173 else { 6174 /* 6175 * If we completed our operations on an UPL that is 6176 * part of a Vectored UPL and if empty is TRUE, then 6177 * we should go ahead and deallocate this UPL element. 6178 * Then we check if this was the last of the UPL elements 6179 * within that Vectored UPL. If so, set empty to TRUE 6180 * so that in ubc_upl_commit_range or ubc_upl_commit, we 6181 * can go ahead and deallocate the Vector UPL too. 6182 */ 6183 if(*empty==TRUE) { 6184 *empty = vector_upl_set_subupl(vector_upl, upl, 0); 6185 upl_deallocate(upl); 6186 } 6187 goto process_upl_to_commit; 6188 } 6189 6190 if (pgpgout_count) { 6191 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL); 6192 } 6193 6194 return KERN_SUCCESS; 6195} 6196 6197kern_return_t 6198upl_abort_range( 6199 upl_t upl, 6200 upl_offset_t offset, 6201 upl_size_t size, 6202 int error, 6203 boolean_t *empty) 6204{ 6205 upl_page_info_t *user_page_list = NULL; 6206 upl_size_t xfer_size, subupl_size = size; 6207 vm_object_t shadow_object; 6208 vm_object_t object; 6209 vm_object_offset_t target_offset; 6210 upl_offset_t subupl_offset = offset; 6211 int entry; 6212 wpl_array_t lite_list; 6213 int occupied; 6214 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 6215 struct vm_page_delayed_work *dwp; 6216 int dw_count; 6217 int dw_limit; 6218 int isVectorUPL = 0; 6219 upl_t vector_upl = NULL; 6220 6221 *empty = FALSE; 6222 6223 if (upl == UPL_NULL) 6224 return KERN_INVALID_ARGUMENT; 6225 6226 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) ) 6227 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty); 6228 6229 if((isVectorUPL = vector_upl_is_valid(upl))) { 6230 vector_upl = upl; 6231 upl_lock(vector_upl); 6232 } 6233 else 6234 upl_lock(upl); 6235 6236process_upl_to_abort: 6237 if(isVectorUPL) { 6238 size = subupl_size; 6239 offset = subupl_offset; 6240 if(size == 0) { 6241 upl_unlock(vector_upl); 6242 return KERN_SUCCESS; 6243 } 6244 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size); 6245 if(upl == NULL) { 6246 upl_unlock(vector_upl); 6247 return KERN_FAILURE; 6248 } 6249 subupl_size -= size; 6250 subupl_offset += size; 6251 } 6252 6253 *empty = FALSE; 6254 6255#if UPL_DEBUG 6256 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) { 6257 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES); 6258 6259 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset; 6260 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size); 6261 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1; 6262 6263 upl->upl_commit_index++; 6264 } 6265#endif 6266 if (upl->flags & UPL_DEVICE_MEMORY) 6267 xfer_size = 0; 6268 else if ((offset + size) <= upl->size) 6269 xfer_size = size; 6270 else { 6271 if(!isVectorUPL) 6272 upl_unlock(upl); 6273 else { 6274 upl_unlock(vector_upl); 6275 } 6276 6277 return KERN_FAILURE; 6278 } 6279 if (upl->flags & UPL_INTERNAL) { 6280 lite_list = (wpl_array_t) 6281 ((((uintptr_t)upl) + sizeof(struct upl)) 6282 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 6283 6284 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 6285 } else { 6286 lite_list = (wpl_array_t) 6287 (((uintptr_t)upl) + sizeof(struct upl)); 6288 } 6289 object = upl->map_object; 6290 6291 if (upl->flags & UPL_SHADOWED) { 6292 vm_object_lock(object); 6293 shadow_object = object->shadow; 6294 } else 6295 shadow_object = object; 6296 6297 entry = offset/PAGE_SIZE; 6298 target_offset = (vm_object_offset_t)offset; 6299 6300 if (upl->flags & UPL_KERNEL_OBJECT) 6301 vm_object_lock_shared(shadow_object); 6302 else 6303 vm_object_lock(shadow_object); 6304 6305 if (upl->flags & UPL_ACCESS_BLOCKED) { 6306 assert(shadow_object->blocked_access); 6307 shadow_object->blocked_access = FALSE; 6308 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED); 6309 } 6310 6311 dwp = &dw_array[0]; 6312 dw_count = 0; 6313 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 6314 6315 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT)) 6316 panic("upl_abort_range: kernel_object being DUMPED"); 6317 6318 while (xfer_size) { 6319 vm_page_t t, m; 6320 unsigned int pg_num; 6321 boolean_t needed; 6322 6323 pg_num = (unsigned int) (target_offset/PAGE_SIZE); 6324 assert(pg_num == target_offset/PAGE_SIZE); 6325 6326 needed = FALSE; 6327 6328 if (user_page_list) 6329 needed = user_page_list[pg_num].needed; 6330 6331 dwp->dw_mask = 0; 6332 m = VM_PAGE_NULL; 6333 6334 if (upl->flags & UPL_LITE) { 6335 6336 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 6337 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); 6338 6339 if ( !(upl->flags & UPL_KERNEL_OBJECT)) 6340 m = vm_page_lookup(shadow_object, target_offset + 6341 (upl->offset - shadow_object->paging_offset)); 6342 } 6343 } 6344 if (upl->flags & UPL_SHADOWED) { 6345 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) { 6346 t->pageout = FALSE; 6347 6348 VM_PAGE_FREE(t); 6349 6350 if (m == VM_PAGE_NULL) 6351 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset); 6352 } 6353 } 6354 if ((upl->flags & UPL_KERNEL_OBJECT)) 6355 goto abort_next_page; 6356 6357 if (m != VM_PAGE_NULL) { 6358 6359 assert(!m->compressor); 6360 6361 if (m->absent) { 6362 boolean_t must_free = TRUE; 6363 6364 /* 6365 * COPYOUT = FALSE case 6366 * check for error conditions which must 6367 * be passed back to the pages customer 6368 */ 6369 if (error & UPL_ABORT_RESTART) { 6370 m->restart = TRUE; 6371 m->absent = FALSE; 6372 m->unusual = TRUE; 6373 must_free = FALSE; 6374 } else if (error & UPL_ABORT_UNAVAILABLE) { 6375 m->restart = FALSE; 6376 m->unusual = TRUE; 6377 must_free = FALSE; 6378 } else if (error & UPL_ABORT_ERROR) { 6379 m->restart = FALSE; 6380 m->absent = FALSE; 6381 m->error = TRUE; 6382 m->unusual = TRUE; 6383 must_free = FALSE; 6384 } 6385 if (m->clustered && needed == FALSE) { 6386 /* 6387 * This page was a part of a speculative 6388 * read-ahead initiated by the kernel 6389 * itself. No one is expecting this 6390 * page and no one will clean up its 6391 * error state if it ever becomes valid 6392 * in the future. 6393 * We have to free it here. 6394 */ 6395 must_free = TRUE; 6396 } 6397 6398 /* 6399 * ENCRYPTED SWAP: 6400 * If the page was already encrypted, 6401 * we don't really need to decrypt it 6402 * now. It will get decrypted later, 6403 * on demand, as soon as someone needs 6404 * to access its contents. 6405 */ 6406 6407 m->cleaning = FALSE; 6408 m->encrypted_cleaning = FALSE; 6409 6410 if (m->overwriting && !m->busy) { 6411 /* 6412 * this shouldn't happen since 6413 * this is an 'absent' page, but 6414 * it doesn't hurt to check for 6415 * the 'alternate' method of 6416 * stabilizing the page... 6417 * we will mark 'busy' to be cleared 6418 * in the following code which will 6419 * take care of the primary stabilzation 6420 * method (i.e. setting 'busy' to TRUE) 6421 */ 6422 dwp->dw_mask |= DW_vm_page_unwire; 6423 } 6424 m->overwriting = FALSE; 6425 6426 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 6427 6428 if (must_free == TRUE) 6429 dwp->dw_mask |= DW_vm_page_free; 6430 else 6431 dwp->dw_mask |= DW_vm_page_activate; 6432 } else { 6433 /* 6434 * Handle the trusted pager throttle. 6435 */ 6436 if (m->laundry) 6437 dwp->dw_mask |= DW_vm_pageout_throttle_up; 6438 6439 if (upl->flags & UPL_ACCESS_BLOCKED) { 6440 /* 6441 * We blocked access to the pages in this UPL. 6442 * Clear the "busy" bit and wake up any waiter 6443 * for this page. 6444 */ 6445 dwp->dw_mask |= DW_clear_busy; 6446 } 6447 if (m->overwriting) { 6448 if (m->busy) 6449 dwp->dw_mask |= DW_clear_busy; 6450 else { 6451 /* 6452 * deal with the 'alternate' method 6453 * of stabilizing the page... 6454 * we will either free the page 6455 * or mark 'busy' to be cleared 6456 * in the following code which will 6457 * take care of the primary stabilzation 6458 * method (i.e. setting 'busy' to TRUE) 6459 */ 6460 dwp->dw_mask |= DW_vm_page_unwire; 6461 } 6462 m->overwriting = FALSE; 6463 } 6464 if (m->encrypted_cleaning == TRUE) { 6465 m->encrypted_cleaning = FALSE; 6466 6467 dwp->dw_mask |= DW_clear_busy; 6468 } 6469 m->pageout = FALSE; 6470 m->cleaning = FALSE; 6471#if MACH_PAGEMAP 6472 vm_external_state_clr(m->object->existence_map, m->offset); 6473#endif /* MACH_PAGEMAP */ 6474 if (error & UPL_ABORT_DUMP_PAGES) { 6475 pmap_disconnect(m->phys_page); 6476 6477 dwp->dw_mask |= DW_vm_page_free; 6478 } else { 6479 if (!(dwp->dw_mask & DW_vm_page_unwire)) { 6480 if (error & UPL_ABORT_REFERENCE) { 6481 /* 6482 * we've been told to explictly 6483 * reference this page... for 6484 * file I/O, this is done by 6485 * implementing an LRU on the inactive q 6486 */ 6487 dwp->dw_mask |= DW_vm_page_lru; 6488 6489 } else if (!m->active && !m->inactive && !m->speculative) 6490 dwp->dw_mask |= DW_vm_page_deactivate_internal; 6491 } 6492 dwp->dw_mask |= DW_PAGE_WAKEUP; 6493 } 6494 } 6495 } 6496abort_next_page: 6497 target_offset += PAGE_SIZE_64; 6498 xfer_size -= PAGE_SIZE; 6499 entry++; 6500 6501 if (dwp->dw_mask) { 6502 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) { 6503 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); 6504 6505 if (dw_count >= dw_limit) { 6506 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 6507 6508 dwp = &dw_array[0]; 6509 dw_count = 0; 6510 } 6511 } else { 6512 if (dwp->dw_mask & DW_clear_busy) 6513 m->busy = FALSE; 6514 6515 if (dwp->dw_mask & DW_PAGE_WAKEUP) 6516 PAGE_WAKEUP(m); 6517 } 6518 } 6519 } 6520 if (dw_count) 6521 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 6522 6523 occupied = 1; 6524 6525 if (upl->flags & UPL_DEVICE_MEMORY) { 6526 occupied = 0; 6527 } else if (upl->flags & UPL_LITE) { 6528 int pg_num; 6529 int i; 6530 6531 pg_num = upl->size/PAGE_SIZE; 6532 pg_num = (pg_num + 31) >> 5; 6533 occupied = 0; 6534 6535 for (i = 0; i < pg_num; i++) { 6536 if (lite_list[i] != 0) { 6537 occupied = 1; 6538 break; 6539 } 6540 } 6541 } else { 6542 if (queue_empty(&upl->map_object->memq)) 6543 occupied = 0; 6544 } 6545 if (occupied == 0) { 6546 /* 6547 * If this UPL element belongs to a Vector UPL and is 6548 * empty, then this is the right function to deallocate 6549 * it. So go ahead set the *empty variable. The flag 6550 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view 6551 * should be considered relevant for the Vector UPL and 6552 * not the internal UPLs. 6553 */ 6554 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL) 6555 *empty = TRUE; 6556 6557 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) { 6558 /* 6559 * this is not a paging object 6560 * so we need to drop the paging reference 6561 * that was taken when we created the UPL 6562 * against this object 6563 */ 6564 vm_object_activity_end(shadow_object); 6565 vm_object_collapse(shadow_object, 0, TRUE); 6566 } else { 6567 /* 6568 * we dontated the paging reference to 6569 * the map object... vm_pageout_object_terminate 6570 * will drop this reference 6571 */ 6572 } 6573 } 6574 vm_object_unlock(shadow_object); 6575 if (object != shadow_object) 6576 vm_object_unlock(object); 6577 6578 if(!isVectorUPL) 6579 upl_unlock(upl); 6580 else { 6581 /* 6582 * If we completed our operations on an UPL that is 6583 * part of a Vectored UPL and if empty is TRUE, then 6584 * we should go ahead and deallocate this UPL element. 6585 * Then we check if this was the last of the UPL elements 6586 * within that Vectored UPL. If so, set empty to TRUE 6587 * so that in ubc_upl_abort_range or ubc_upl_abort, we 6588 * can go ahead and deallocate the Vector UPL too. 6589 */ 6590 if(*empty == TRUE) { 6591 *empty = vector_upl_set_subupl(vector_upl, upl,0); 6592 upl_deallocate(upl); 6593 } 6594 goto process_upl_to_abort; 6595 } 6596 6597 return KERN_SUCCESS; 6598} 6599 6600 6601kern_return_t 6602upl_abort( 6603 upl_t upl, 6604 int error) 6605{ 6606 boolean_t empty; 6607 6608 return upl_abort_range(upl, 0, upl->size, error, &empty); 6609} 6610 6611 6612/* an option on commit should be wire */ 6613kern_return_t 6614upl_commit( 6615 upl_t upl, 6616 upl_page_info_t *page_list, 6617 mach_msg_type_number_t count) 6618{ 6619 boolean_t empty; 6620 6621 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty); 6622} 6623 6624void 6625vm_object_set_pmap_cache_attr( 6626 vm_object_t object, 6627 upl_page_info_array_t user_page_list, 6628 unsigned int num_pages, 6629 boolean_t batch_pmap_op) 6630{ 6631 unsigned int cache_attr = 0; 6632 6633 cache_attr = object->wimg_bits & VM_WIMG_MASK; 6634 assert(user_page_list); 6635 if (cache_attr != VM_WIMG_USE_DEFAULT) { 6636 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op); 6637 } 6638} 6639 6640unsigned int vm_object_iopl_request_sleep_for_cleaning = 0; 6641 6642kern_return_t 6643vm_object_iopl_request( 6644 vm_object_t object, 6645 vm_object_offset_t offset, 6646 upl_size_t size, 6647 upl_t *upl_ptr, 6648 upl_page_info_array_t user_page_list, 6649 unsigned int *page_list_count, 6650 int cntrl_flags) 6651{ 6652 vm_page_t dst_page; 6653 vm_object_offset_t dst_offset; 6654 upl_size_t xfer_size; 6655 upl_t upl = NULL; 6656 unsigned int entry; 6657 wpl_array_t lite_list = NULL; 6658 int no_zero_fill = FALSE; 6659 unsigned int size_in_pages; 6660 u_int32_t psize; 6661 kern_return_t ret; 6662 vm_prot_t prot; 6663 struct vm_object_fault_info fault_info; 6664 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 6665 struct vm_page_delayed_work *dwp; 6666 int dw_count; 6667 int dw_limit; 6668 int dw_index; 6669 boolean_t caller_lookup; 6670 6671 if (cntrl_flags & ~UPL_VALID_FLAGS) { 6672 /* 6673 * For forward compatibility's sake, 6674 * reject any unknown flag. 6675 */ 6676 return KERN_INVALID_VALUE; 6677 } 6678 if (vm_lopage_needed == FALSE) 6679 cntrl_flags &= ~UPL_NEED_32BIT_ADDR; 6680 6681 if (cntrl_flags & UPL_NEED_32BIT_ADDR) { 6682 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE)) 6683 return KERN_INVALID_VALUE; 6684 6685 if (object->phys_contiguous) { 6686 if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address) 6687 return KERN_INVALID_ADDRESS; 6688 6689 if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address) 6690 return KERN_INVALID_ADDRESS; 6691 } 6692 } 6693 6694 if (cntrl_flags & UPL_ENCRYPT) { 6695 /* 6696 * ENCRYPTED SWAP: 6697 * The paging path doesn't use this interface, 6698 * so we don't support the UPL_ENCRYPT flag 6699 * here. We won't encrypt the pages. 6700 */ 6701 assert(! (cntrl_flags & UPL_ENCRYPT)); 6702 } 6703 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO)) 6704 no_zero_fill = TRUE; 6705 6706 if (cntrl_flags & UPL_COPYOUT_FROM) 6707 prot = VM_PROT_READ; 6708 else 6709 prot = VM_PROT_READ | VM_PROT_WRITE; 6710 6711 if (((size/PAGE_SIZE) > MAX_UPL_SIZE) && !object->phys_contiguous) 6712 size = MAX_UPL_SIZE * PAGE_SIZE; 6713 6714 if (cntrl_flags & UPL_SET_INTERNAL) { 6715 if (page_list_count != NULL) 6716 *page_list_count = MAX_UPL_SIZE; 6717 } 6718 if (((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) && 6719 ((page_list_count != NULL) && (*page_list_count != 0) && *page_list_count < (size/page_size))) 6720 return KERN_INVALID_ARGUMENT; 6721 6722 if ((!object->internal) && (object->paging_offset != 0)) 6723 panic("vm_object_iopl_request: external object with non-zero paging offset\n"); 6724 6725 6726 if (object->phys_contiguous) 6727 psize = PAGE_SIZE; 6728 else 6729 psize = size; 6730 6731 if (cntrl_flags & UPL_SET_INTERNAL) { 6732 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, UPL_IO_WIRE, psize); 6733 6734 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 6735 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) + 6736 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t))); 6737 if (size == 0) { 6738 user_page_list = NULL; 6739 lite_list = NULL; 6740 } 6741 } else { 6742 upl = upl_create(UPL_CREATE_LITE, UPL_IO_WIRE, psize); 6743 6744 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 6745 if (size == 0) { 6746 lite_list = NULL; 6747 } 6748 } 6749 if (user_page_list) 6750 user_page_list[0].device = FALSE; 6751 *upl_ptr = upl; 6752 6753 upl->map_object = object; 6754 upl->size = size; 6755 6756 size_in_pages = size / PAGE_SIZE; 6757 6758 if (object == kernel_object && 6759 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) { 6760 upl->flags |= UPL_KERNEL_OBJECT; 6761#if UPL_DEBUG 6762 vm_object_lock(object); 6763#else 6764 vm_object_lock_shared(object); 6765#endif 6766 } else { 6767 vm_object_lock(object); 6768 vm_object_activity_begin(object); 6769 } 6770 /* 6771 * paging in progress also protects the paging_offset 6772 */ 6773 upl->offset = offset + object->paging_offset; 6774 6775 if (cntrl_flags & UPL_BLOCK_ACCESS) { 6776 /* 6777 * The user requested that access to the pages in this UPL 6778 * be blocked until the UPL is commited or aborted. 6779 */ 6780 upl->flags |= UPL_ACCESS_BLOCKED; 6781 } 6782 6783 if (object->phys_contiguous) { 6784#if UPL_DEBUG 6785 vm_object_activity_begin(object); 6786 queue_enter(&object->uplq, upl, upl_t, uplq); 6787#endif /* UPL_DEBUG */ 6788 6789 if (upl->flags & UPL_ACCESS_BLOCKED) { 6790 assert(!object->blocked_access); 6791 object->blocked_access = TRUE; 6792 } 6793 6794 vm_object_unlock(object); 6795 6796 /* 6797 * don't need any shadow mappings for this one 6798 * since it is already I/O memory 6799 */ 6800 upl->flags |= UPL_DEVICE_MEMORY; 6801 6802 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT); 6803 6804 if (user_page_list) { 6805 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT); 6806 user_page_list[0].device = TRUE; 6807 } 6808 if (page_list_count != NULL) { 6809 if (upl->flags & UPL_INTERNAL) 6810 *page_list_count = 0; 6811 else 6812 *page_list_count = 1; 6813 } 6814 return KERN_SUCCESS; 6815 } 6816 if (object != kernel_object && object != compressor_object) { 6817 /* 6818 * Protect user space from future COW operations 6819 */ 6820 object->true_share = TRUE; 6821 6822 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) 6823 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; 6824 } 6825 6826#if UPL_DEBUG 6827 vm_object_activity_begin(object); 6828 queue_enter(&object->uplq, upl, upl_t, uplq); 6829#endif /* UPL_DEBUG */ 6830 6831 if (!(cntrl_flags & UPL_COPYOUT_FROM) && 6832 object->copy != VM_OBJECT_NULL) { 6833 /* 6834 * Honor copy-on-write obligations 6835 * 6836 * The caller is gathering these pages and 6837 * might modify their contents. We need to 6838 * make sure that the copy object has its own 6839 * private copies of these pages before we let 6840 * the caller modify them. 6841 * 6842 * NOTE: someone else could map the original object 6843 * after we've done this copy-on-write here, and they 6844 * could then see an inconsistent picture of the memory 6845 * while it's being modified via the UPL. To prevent this, 6846 * we would have to block access to these pages until the 6847 * UPL is released. We could use the UPL_BLOCK_ACCESS 6848 * code path for that... 6849 */ 6850 vm_object_update(object, 6851 offset, 6852 size, 6853 NULL, 6854 NULL, 6855 FALSE, /* should_return */ 6856 MEMORY_OBJECT_COPY_SYNC, 6857 VM_PROT_NO_CHANGE); 6858#if DEVELOPMENT || DEBUG 6859 iopl_cow++; 6860 iopl_cow_pages += size >> PAGE_SHIFT; 6861#endif 6862 } 6863 6864 6865 entry = 0; 6866 6867 xfer_size = size; 6868 dst_offset = offset; 6869 6870 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; 6871 fault_info.user_tag = 0; 6872 fault_info.lo_offset = offset; 6873 fault_info.hi_offset = offset + xfer_size; 6874 fault_info.no_cache = FALSE; 6875 fault_info.stealth = FALSE; 6876 fault_info.io_sync = FALSE; 6877 fault_info.cs_bypass = FALSE; 6878 fault_info.mark_zf_absent = (0 == (cntrl_flags & UPL_NOZEROFILLIO)); 6879 6880 dwp = &dw_array[0]; 6881 dw_count = 0; 6882 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 6883 6884 while (xfer_size) { 6885 vm_fault_return_t result; 6886 unsigned int pg_num; 6887 6888 dwp->dw_mask = 0; 6889 6890 dst_page = vm_page_lookup(object, dst_offset); 6891 6892 /* 6893 * ENCRYPTED SWAP: 6894 * If the page is encrypted, we need to decrypt it, 6895 * so force a soft page fault. 6896 */ 6897 if (dst_page == VM_PAGE_NULL || 6898 dst_page->busy || 6899 dst_page->encrypted || 6900 dst_page->error || 6901 dst_page->restart || 6902 dst_page->absent || 6903 dst_page->fictitious) { 6904 6905 if (object == kernel_object) 6906 panic("vm_object_iopl_request: missing/bad page in kernel object\n"); 6907 if (object == compressor_object) 6908 panic("vm_object_iopl_request: missing/bad page in compressor object\n"); 6909 6910 if (cntrl_flags & UPL_REQUEST_NO_FAULT) { 6911 ret = KERN_MEMORY_ERROR; 6912 goto return_err; 6913 } 6914 6915 /* 6916 * We just looked up the page and the result remains valid 6917 * until the object lock is release, so send it to 6918 * vm_fault_page() (as "dst_page"), to avoid having to 6919 * look it up again there. 6920 */ 6921 caller_lookup = TRUE; 6922 6923 do { 6924 vm_page_t top_page; 6925 kern_return_t error_code; 6926 int interruptible; 6927 6928 if (cntrl_flags & UPL_SET_INTERRUPTIBLE) 6929 interruptible = THREAD_ABORTSAFE; 6930 else 6931 interruptible = THREAD_UNINT; 6932 6933 fault_info.interruptible = interruptible; 6934 fault_info.cluster_size = xfer_size; 6935 fault_info.batch_pmap_op = TRUE; 6936 6937 vm_object_paging_begin(object); 6938 6939 result = vm_fault_page(object, dst_offset, 6940 prot | VM_PROT_WRITE, FALSE, 6941 caller_lookup, 6942 &prot, &dst_page, &top_page, 6943 (int *)0, 6944 &error_code, no_zero_fill, 6945 FALSE, &fault_info); 6946 6947 /* our lookup is no longer valid at this point */ 6948 caller_lookup = FALSE; 6949 6950 switch (result) { 6951 6952 case VM_FAULT_SUCCESS: 6953 6954 if ( !dst_page->absent) { 6955 PAGE_WAKEUP_DONE(dst_page); 6956 } else { 6957 /* 6958 * we only get back an absent page if we 6959 * requested that it not be zero-filled 6960 * because we are about to fill it via I/O 6961 * 6962 * absent pages should be left BUSY 6963 * to prevent them from being faulted 6964 * into an address space before we've 6965 * had a chance to complete the I/O on 6966 * them since they may contain info that 6967 * shouldn't be seen by the faulting task 6968 */ 6969 } 6970 /* 6971 * Release paging references and 6972 * top-level placeholder page, if any. 6973 */ 6974 if (top_page != VM_PAGE_NULL) { 6975 vm_object_t local_object; 6976 6977 local_object = top_page->object; 6978 6979 if (top_page->object != dst_page->object) { 6980 vm_object_lock(local_object); 6981 VM_PAGE_FREE(top_page); 6982 vm_object_paging_end(local_object); 6983 vm_object_unlock(local_object); 6984 } else { 6985 VM_PAGE_FREE(top_page); 6986 vm_object_paging_end(local_object); 6987 } 6988 } 6989 vm_object_paging_end(object); 6990 break; 6991 6992 case VM_FAULT_RETRY: 6993 vm_object_lock(object); 6994 break; 6995 6996 case VM_FAULT_MEMORY_SHORTAGE: 6997 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); 6998 6999 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); 7000 7001 if (vm_page_wait(interruptible)) { 7002 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 7003 7004 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); 7005 vm_object_lock(object); 7006 7007 break; 7008 } 7009 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 7010 7011 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1); 7012 7013 /* fall thru */ 7014 7015 case VM_FAULT_INTERRUPTED: 7016 error_code = MACH_SEND_INTERRUPTED; 7017 case VM_FAULT_MEMORY_ERROR: 7018 memory_error: 7019 ret = (error_code ? error_code: KERN_MEMORY_ERROR); 7020 7021 vm_object_lock(object); 7022 goto return_err; 7023 7024 case VM_FAULT_SUCCESS_NO_VM_PAGE: 7025 /* success but no page: fail */ 7026 vm_object_paging_end(object); 7027 vm_object_unlock(object); 7028 goto memory_error; 7029 7030 default: 7031 panic("vm_object_iopl_request: unexpected error" 7032 " 0x%x from vm_fault_page()\n", result); 7033 } 7034 } while (result != VM_FAULT_SUCCESS); 7035 7036 } 7037 if (upl->flags & UPL_KERNEL_OBJECT) 7038 goto record_phys_addr; 7039 7040 if (dst_page->compressor) { 7041 dst_page->busy = TRUE; 7042 goto record_phys_addr; 7043 } 7044 7045 if (dst_page->cleaning) { 7046 /* 7047 * Someone else is cleaning this page in place. 7048 * In theory, we should be able to proceed and use this 7049 * page but they'll probably end up clearing the "busy" 7050 * bit on it in upl_commit_range() but they didn't set 7051 * it, so they would clear our "busy" bit and open 7052 * us to race conditions. 7053 * We'd better wait for the cleaning to complete and 7054 * then try again. 7055 */ 7056 vm_object_iopl_request_sleep_for_cleaning++; 7057 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 7058 continue; 7059 } 7060 if (dst_page->laundry) { 7061 dst_page->pageout = FALSE; 7062 7063 vm_pageout_steal_laundry(dst_page, FALSE); 7064 } 7065 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) && 7066 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) { 7067 vm_page_t low_page; 7068 int refmod; 7069 7070 /* 7071 * support devices that can't DMA above 32 bits 7072 * by substituting pages from a pool of low address 7073 * memory for any pages we find above the 4G mark 7074 * can't substitute if the page is already wired because 7075 * we don't know whether that physical address has been 7076 * handed out to some other 64 bit capable DMA device to use 7077 */ 7078 if (VM_PAGE_WIRED(dst_page)) { 7079 ret = KERN_PROTECTION_FAILURE; 7080 goto return_err; 7081 } 7082 low_page = vm_page_grablo(); 7083 7084 if (low_page == VM_PAGE_NULL) { 7085 ret = KERN_RESOURCE_SHORTAGE; 7086 goto return_err; 7087 } 7088 /* 7089 * from here until the vm_page_replace completes 7090 * we musn't drop the object lock... we don't 7091 * want anyone refaulting this page in and using 7092 * it after we disconnect it... we want the fault 7093 * to find the new page being substituted. 7094 */ 7095 if (dst_page->pmapped) 7096 refmod = pmap_disconnect(dst_page->phys_page); 7097 else 7098 refmod = 0; 7099 7100 if (!dst_page->absent) 7101 vm_page_copy(dst_page, low_page); 7102 7103 low_page->reference = dst_page->reference; 7104 low_page->dirty = dst_page->dirty; 7105 low_page->absent = dst_page->absent; 7106 7107 if (refmod & VM_MEM_REFERENCED) 7108 low_page->reference = TRUE; 7109 if (refmod & VM_MEM_MODIFIED) { 7110 SET_PAGE_DIRTY(low_page, FALSE); 7111 } 7112 7113 vm_page_replace(low_page, object, dst_offset); 7114 7115 dst_page = low_page; 7116 /* 7117 * vm_page_grablo returned the page marked 7118 * BUSY... we don't need a PAGE_WAKEUP_DONE 7119 * here, because we've never dropped the object lock 7120 */ 7121 if ( !dst_page->absent) 7122 dst_page->busy = FALSE; 7123 } 7124 if ( !dst_page->busy) 7125 dwp->dw_mask |= DW_vm_page_wire; 7126 7127 if (cntrl_flags & UPL_BLOCK_ACCESS) { 7128 /* 7129 * Mark the page "busy" to block any future page fault 7130 * on this page in addition to wiring it. 7131 * We'll also remove the mapping 7132 * of all these pages before leaving this routine. 7133 */ 7134 assert(!dst_page->fictitious); 7135 dst_page->busy = TRUE; 7136 } 7137 /* 7138 * expect the page to be used 7139 * page queues lock must be held to set 'reference' 7140 */ 7141 dwp->dw_mask |= DW_set_reference; 7142 7143 if (!(cntrl_flags & UPL_COPYOUT_FROM)) { 7144 SET_PAGE_DIRTY(dst_page, TRUE); 7145 } 7146 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) { 7147 pmap_sync_page_attributes_phys(dst_page->phys_page); 7148 dst_page->written_by_kernel = FALSE; 7149 } 7150 7151record_phys_addr: 7152 if (dst_page->busy) 7153 upl->flags |= UPL_HAS_BUSY; 7154 7155 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); 7156 assert(pg_num == (dst_offset-offset)/PAGE_SIZE); 7157 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 7158 7159 if (dst_page->phys_page > upl->highest_page) 7160 upl->highest_page = dst_page->phys_page; 7161 7162 if (user_page_list) { 7163 user_page_list[entry].phys_addr = dst_page->phys_page; 7164 user_page_list[entry].pageout = dst_page->pageout; 7165 user_page_list[entry].absent = dst_page->absent; 7166 user_page_list[entry].dirty = dst_page->dirty; 7167 user_page_list[entry].precious = dst_page->precious; 7168 user_page_list[entry].device = FALSE; 7169 user_page_list[entry].needed = FALSE; 7170 if (dst_page->clustered == TRUE) 7171 user_page_list[entry].speculative = dst_page->speculative; 7172 else 7173 user_page_list[entry].speculative = FALSE; 7174 user_page_list[entry].cs_validated = dst_page->cs_validated; 7175 user_page_list[entry].cs_tainted = dst_page->cs_tainted; 7176 } 7177 if (object != kernel_object && object != compressor_object) { 7178 /* 7179 * someone is explicitly grabbing this page... 7180 * update clustered and speculative state 7181 * 7182 */ 7183 VM_PAGE_CONSUME_CLUSTERED(dst_page); 7184 } 7185 entry++; 7186 dst_offset += PAGE_SIZE_64; 7187 xfer_size -= PAGE_SIZE; 7188 7189 if (dwp->dw_mask) { 7190 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count); 7191 7192 if (dw_count >= dw_limit) { 7193 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 7194 7195 dwp = &dw_array[0]; 7196 dw_count = 0; 7197 } 7198 } 7199 } 7200 if (dw_count) 7201 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 7202 7203 vm_object_set_pmap_cache_attr(object, user_page_list, entry, TRUE); 7204 7205 if (page_list_count != NULL) { 7206 if (upl->flags & UPL_INTERNAL) 7207 *page_list_count = 0; 7208 else if (*page_list_count > entry) 7209 *page_list_count = entry; 7210 } 7211 vm_object_unlock(object); 7212 7213 if (cntrl_flags & UPL_BLOCK_ACCESS) { 7214 /* 7215 * We've marked all the pages "busy" so that future 7216 * page faults will block. 7217 * Now remove the mapping for these pages, so that they 7218 * can't be accessed without causing a page fault. 7219 */ 7220 vm_object_pmap_protect(object, offset, (vm_object_size_t)size, 7221 PMAP_NULL, 0, VM_PROT_NONE); 7222 assert(!object->blocked_access); 7223 object->blocked_access = TRUE; 7224 } 7225 return KERN_SUCCESS; 7226 7227return_err: 7228 dw_index = 0; 7229 7230 for (; offset < dst_offset; offset += PAGE_SIZE) { 7231 boolean_t need_unwire; 7232 7233 dst_page = vm_page_lookup(object, offset); 7234 7235 if (dst_page == VM_PAGE_NULL) 7236 panic("vm_object_iopl_request: Wired page missing. \n"); 7237 7238 /* 7239 * if we've already processed this page in an earlier 7240 * dw_do_work, we need to undo the wiring... we will 7241 * leave the dirty and reference bits on if they 7242 * were set, since we don't have a good way of knowing 7243 * what the previous state was and we won't get here 7244 * under any normal circumstances... we will always 7245 * clear BUSY and wakeup any waiters via vm_page_free 7246 * or PAGE_WAKEUP_DONE 7247 */ 7248 need_unwire = TRUE; 7249 7250 if (dw_count) { 7251 if (dw_array[dw_index].dw_m == dst_page) { 7252 /* 7253 * still in the deferred work list 7254 * which means we haven't yet called 7255 * vm_page_wire on this page 7256 */ 7257 need_unwire = FALSE; 7258 7259 dw_index++; 7260 dw_count--; 7261 } 7262 } 7263 vm_page_lock_queues(); 7264 7265 if (dst_page->absent) { 7266 vm_page_free(dst_page); 7267 7268 need_unwire = FALSE; 7269 } else { 7270 if (need_unwire == TRUE) 7271 vm_page_unwire(dst_page, TRUE); 7272 7273 PAGE_WAKEUP_DONE(dst_page); 7274 } 7275 vm_page_unlock_queues(); 7276 7277 if (need_unwire == TRUE) 7278 VM_STAT_INCR(reactivations); 7279 } 7280#if UPL_DEBUG 7281 upl->upl_state = 2; 7282#endif 7283 if (! (upl->flags & UPL_KERNEL_OBJECT)) { 7284 vm_object_activity_end(object); 7285 vm_object_collapse(object, 0, TRUE); 7286 } 7287 vm_object_unlock(object); 7288 upl_destroy(upl); 7289 7290 return ret; 7291} 7292 7293kern_return_t 7294upl_transpose( 7295 upl_t upl1, 7296 upl_t upl2) 7297{ 7298 kern_return_t retval; 7299 boolean_t upls_locked; 7300 vm_object_t object1, object2; 7301 7302 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) { 7303 return KERN_INVALID_ARGUMENT; 7304 } 7305 7306 upls_locked = FALSE; 7307 7308 /* 7309 * Since we need to lock both UPLs at the same time, 7310 * avoid deadlocks by always taking locks in the same order. 7311 */ 7312 if (upl1 < upl2) { 7313 upl_lock(upl1); 7314 upl_lock(upl2); 7315 } else { 7316 upl_lock(upl2); 7317 upl_lock(upl1); 7318 } 7319 upls_locked = TRUE; /* the UPLs will need to be unlocked */ 7320 7321 object1 = upl1->map_object; 7322 object2 = upl2->map_object; 7323 7324 if (upl1->offset != 0 || upl2->offset != 0 || 7325 upl1->size != upl2->size) { 7326 /* 7327 * We deal only with full objects, not subsets. 7328 * That's because we exchange the entire backing store info 7329 * for the objects: pager, resident pages, etc... We can't do 7330 * only part of it. 7331 */ 7332 retval = KERN_INVALID_VALUE; 7333 goto done; 7334 } 7335 7336 /* 7337 * Tranpose the VM objects' backing store. 7338 */ 7339 retval = vm_object_transpose(object1, object2, 7340 (vm_object_size_t) upl1->size); 7341 7342 if (retval == KERN_SUCCESS) { 7343 /* 7344 * Make each UPL point to the correct VM object, i.e. the 7345 * object holding the pages that the UPL refers to... 7346 */ 7347#if UPL_DEBUG 7348 queue_remove(&object1->uplq, upl1, upl_t, uplq); 7349 queue_remove(&object2->uplq, upl2, upl_t, uplq); 7350#endif 7351 upl1->map_object = object2; 7352 upl2->map_object = object1; 7353#if UPL_DEBUG 7354 queue_enter(&object1->uplq, upl2, upl_t, uplq); 7355 queue_enter(&object2->uplq, upl1, upl_t, uplq); 7356#endif 7357 } 7358 7359done: 7360 /* 7361 * Cleanup. 7362 */ 7363 if (upls_locked) { 7364 upl_unlock(upl1); 7365 upl_unlock(upl2); 7366 upls_locked = FALSE; 7367 } 7368 7369 return retval; 7370} 7371 7372void 7373upl_range_needed( 7374 upl_t upl, 7375 int index, 7376 int count) 7377{ 7378 upl_page_info_t *user_page_list; 7379 int size_in_pages; 7380 7381 if ( !(upl->flags & UPL_INTERNAL) || count <= 0) 7382 return; 7383 7384 size_in_pages = upl->size / PAGE_SIZE; 7385 7386 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 7387 7388 while (count-- && index < size_in_pages) 7389 user_page_list[index++].needed = TRUE; 7390} 7391 7392 7393/* 7394 * ENCRYPTED SWAP: 7395 * 7396 * Rationale: the user might have some encrypted data on disk (via 7397 * FileVault or any other mechanism). That data is then decrypted in 7398 * memory, which is safe as long as the machine is secure. But that 7399 * decrypted data in memory could be paged out to disk by the default 7400 * pager. The data would then be stored on disk in clear (not encrypted) 7401 * and it could be accessed by anyone who gets physical access to the 7402 * disk (if the laptop or the disk gets stolen for example). This weakens 7403 * the security offered by FileVault. 7404 * 7405 * Solution: the default pager will optionally request that all the 7406 * pages it gathers for pageout be encrypted, via the UPL interfaces, 7407 * before it sends this UPL to disk via the vnode_pageout() path. 7408 * 7409 * Notes: 7410 * 7411 * To avoid disrupting the VM LRU algorithms, we want to keep the 7412 * clean-in-place mechanisms, which allow us to send some extra pages to 7413 * swap (clustering) without actually removing them from the user's 7414 * address space. We don't want the user to unknowingly access encrypted 7415 * data, so we have to actually remove the encrypted pages from the page 7416 * table. When the user accesses the data, the hardware will fail to 7417 * locate the virtual page in its page table and will trigger a page 7418 * fault. We can then decrypt the page and enter it in the page table 7419 * again. Whenever we allow the user to access the contents of a page, 7420 * we have to make sure it's not encrypted. 7421 * 7422 * 7423 */ 7424/* 7425 * ENCRYPTED SWAP: 7426 * Reserve of virtual addresses in the kernel address space. 7427 * We need to map the physical pages in the kernel, so that we 7428 * can call the encryption/decryption routines with a kernel 7429 * virtual address. We keep this pool of pre-allocated kernel 7430 * virtual addresses so that we don't have to scan the kernel's 7431 * virtaul address space each time we need to encrypt or decrypt 7432 * a physical page. 7433 * It would be nice to be able to encrypt and decrypt in physical 7434 * mode but that might not always be more efficient... 7435 */ 7436decl_simple_lock_data(,vm_paging_lock) 7437#define VM_PAGING_NUM_PAGES 64 7438vm_map_offset_t vm_paging_base_address = 0; 7439boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, }; 7440int vm_paging_max_index = 0; 7441int vm_paging_page_waiter = 0; 7442int vm_paging_page_waiter_total = 0; 7443unsigned long vm_paging_no_kernel_page = 0; 7444unsigned long vm_paging_objects_mapped = 0; 7445unsigned long vm_paging_pages_mapped = 0; 7446unsigned long vm_paging_objects_mapped_slow = 0; 7447unsigned long vm_paging_pages_mapped_slow = 0; 7448 7449void 7450vm_paging_map_init(void) 7451{ 7452 kern_return_t kr; 7453 vm_map_offset_t page_map_offset; 7454 vm_map_entry_t map_entry; 7455 7456 assert(vm_paging_base_address == 0); 7457 7458 /* 7459 * Initialize our pool of pre-allocated kernel 7460 * virtual addresses. 7461 */ 7462 page_map_offset = 0; 7463 kr = vm_map_find_space(kernel_map, 7464 &page_map_offset, 7465 VM_PAGING_NUM_PAGES * PAGE_SIZE, 7466 0, 7467 0, 7468 &map_entry); 7469 if (kr != KERN_SUCCESS) { 7470 panic("vm_paging_map_init: kernel_map full\n"); 7471 } 7472 map_entry->object.vm_object = kernel_object; 7473 map_entry->offset = page_map_offset; 7474 map_entry->protection = VM_PROT_NONE; 7475 map_entry->max_protection = VM_PROT_NONE; 7476 map_entry->permanent = TRUE; 7477 vm_object_reference(kernel_object); 7478 vm_map_unlock(kernel_map); 7479 7480 assert(vm_paging_base_address == 0); 7481 vm_paging_base_address = page_map_offset; 7482} 7483 7484/* 7485 * ENCRYPTED SWAP: 7486 * vm_paging_map_object: 7487 * Maps part of a VM object's pages in the kernel 7488 * virtual address space, using the pre-allocated 7489 * kernel virtual addresses, if possible. 7490 * Context: 7491 * The VM object is locked. This lock will get 7492 * dropped and re-acquired though, so the caller 7493 * must make sure the VM object is kept alive 7494 * (by holding a VM map that has a reference 7495 * on it, for example, or taking an extra reference). 7496 * The page should also be kept busy to prevent 7497 * it from being reclaimed. 7498 */ 7499kern_return_t 7500vm_paging_map_object( 7501 vm_page_t page, 7502 vm_object_t object, 7503 vm_object_offset_t offset, 7504 vm_prot_t protection, 7505 boolean_t can_unlock_object, 7506 vm_map_size_t *size, /* IN/OUT */ 7507 vm_map_offset_t *address, /* OUT */ 7508 boolean_t *need_unmap) /* OUT */ 7509{ 7510 kern_return_t kr; 7511 vm_map_offset_t page_map_offset; 7512 vm_map_size_t map_size; 7513 vm_object_offset_t object_offset; 7514 int i; 7515 7516 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) { 7517 /* use permanent 1-to-1 kernel mapping of physical memory ? */ 7518#if __x86_64__ 7519 *address = (vm_map_offset_t) 7520 PHYSMAP_PTOV((pmap_paddr_t)page->phys_page << 7521 PAGE_SHIFT); 7522 *need_unmap = FALSE; 7523 return KERN_SUCCESS; 7524#else 7525#warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..." 7526#endif 7527 7528 assert(page->busy); 7529 /* 7530 * Use one of the pre-allocated kernel virtual addresses 7531 * and just enter the VM page in the kernel address space 7532 * at that virtual address. 7533 */ 7534 simple_lock(&vm_paging_lock); 7535 7536 /* 7537 * Try and find an available kernel virtual address 7538 * from our pre-allocated pool. 7539 */ 7540 page_map_offset = 0; 7541 for (;;) { 7542 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) { 7543 if (vm_paging_page_inuse[i] == FALSE) { 7544 page_map_offset = 7545 vm_paging_base_address + 7546 (i * PAGE_SIZE); 7547 break; 7548 } 7549 } 7550 if (page_map_offset != 0) { 7551 /* found a space to map our page ! */ 7552 break; 7553 } 7554 7555 if (can_unlock_object) { 7556 /* 7557 * If we can afford to unlock the VM object, 7558 * let's take the slow path now... 7559 */ 7560 break; 7561 } 7562 /* 7563 * We can't afford to unlock the VM object, so 7564 * let's wait for a space to become available... 7565 */ 7566 vm_paging_page_waiter_total++; 7567 vm_paging_page_waiter++; 7568 thread_sleep_fast_usimple_lock(&vm_paging_page_waiter, 7569 &vm_paging_lock, 7570 THREAD_UNINT); 7571 vm_paging_page_waiter--; 7572 /* ... and try again */ 7573 } 7574 7575 if (page_map_offset != 0) { 7576 /* 7577 * We found a kernel virtual address; 7578 * map the physical page to that virtual address. 7579 */ 7580 if (i > vm_paging_max_index) { 7581 vm_paging_max_index = i; 7582 } 7583 vm_paging_page_inuse[i] = TRUE; 7584 simple_unlock(&vm_paging_lock); 7585 7586 page->pmapped = TRUE; 7587 7588 /* 7589 * Keep the VM object locked over the PMAP_ENTER 7590 * and the actual use of the page by the kernel, 7591 * or this pmap mapping might get undone by a 7592 * vm_object_pmap_protect() call... 7593 */ 7594 PMAP_ENTER(kernel_pmap, 7595 page_map_offset, 7596 page, 7597 protection, 7598 VM_PROT_NONE, 7599 0, 7600 TRUE); 7601 vm_paging_objects_mapped++; 7602 vm_paging_pages_mapped++; 7603 *address = page_map_offset; 7604 *need_unmap = TRUE; 7605 7606 /* all done and mapped, ready to use ! */ 7607 return KERN_SUCCESS; 7608 } 7609 7610 /* 7611 * We ran out of pre-allocated kernel virtual 7612 * addresses. Just map the page in the kernel 7613 * the slow and regular way. 7614 */ 7615 vm_paging_no_kernel_page++; 7616 simple_unlock(&vm_paging_lock); 7617 } 7618 7619 if (! can_unlock_object) { 7620 *address = 0; 7621 *size = 0; 7622 *need_unmap = FALSE; 7623 return KERN_NOT_SUPPORTED; 7624 } 7625 7626 object_offset = vm_object_trunc_page(offset); 7627 map_size = vm_map_round_page(*size, 7628 VM_MAP_PAGE_MASK(kernel_map)); 7629 7630 /* 7631 * Try and map the required range of the object 7632 * in the kernel_map 7633 */ 7634 7635 vm_object_reference_locked(object); /* for the map entry */ 7636 vm_object_unlock(object); 7637 7638 kr = vm_map_enter(kernel_map, 7639 address, 7640 map_size, 7641 0, 7642 VM_FLAGS_ANYWHERE, 7643 object, 7644 object_offset, 7645 FALSE, 7646 protection, 7647 VM_PROT_ALL, 7648 VM_INHERIT_NONE); 7649 if (kr != KERN_SUCCESS) { 7650 *address = 0; 7651 *size = 0; 7652 *need_unmap = FALSE; 7653 vm_object_deallocate(object); /* for the map entry */ 7654 vm_object_lock(object); 7655 return kr; 7656 } 7657 7658 *size = map_size; 7659 7660 /* 7661 * Enter the mapped pages in the page table now. 7662 */ 7663 vm_object_lock(object); 7664 /* 7665 * VM object must be kept locked from before PMAP_ENTER() 7666 * until after the kernel is done accessing the page(s). 7667 * Otherwise, the pmap mappings in the kernel could be 7668 * undone by a call to vm_object_pmap_protect(). 7669 */ 7670 7671 for (page_map_offset = 0; 7672 map_size != 0; 7673 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) { 7674 7675 page = vm_page_lookup(object, offset + page_map_offset); 7676 if (page == VM_PAGE_NULL) { 7677 printf("vm_paging_map_object: no page !?"); 7678 vm_object_unlock(object); 7679 kr = vm_map_remove(kernel_map, *address, *size, 7680 VM_MAP_NO_FLAGS); 7681 assert(kr == KERN_SUCCESS); 7682 *address = 0; 7683 *size = 0; 7684 *need_unmap = FALSE; 7685 vm_object_lock(object); 7686 return KERN_MEMORY_ERROR; 7687 } 7688 page->pmapped = TRUE; 7689 7690 //assert(pmap_verify_free(page->phys_page)); 7691 PMAP_ENTER(kernel_pmap, 7692 *address + page_map_offset, 7693 page, 7694 protection, 7695 VM_PROT_NONE, 7696 0, 7697 TRUE); 7698 } 7699 7700 vm_paging_objects_mapped_slow++; 7701 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64); 7702 7703 *need_unmap = TRUE; 7704 7705 return KERN_SUCCESS; 7706} 7707 7708/* 7709 * ENCRYPTED SWAP: 7710 * vm_paging_unmap_object: 7711 * Unmaps part of a VM object's pages from the kernel 7712 * virtual address space. 7713 * Context: 7714 * The VM object is locked. This lock will get 7715 * dropped and re-acquired though. 7716 */ 7717void 7718vm_paging_unmap_object( 7719 vm_object_t object, 7720 vm_map_offset_t start, 7721 vm_map_offset_t end) 7722{ 7723 kern_return_t kr; 7724 int i; 7725 7726 if ((vm_paging_base_address == 0) || 7727 (start < vm_paging_base_address) || 7728 (end > (vm_paging_base_address 7729 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) { 7730 /* 7731 * We didn't use our pre-allocated pool of 7732 * kernel virtual address. Deallocate the 7733 * virtual memory. 7734 */ 7735 if (object != VM_OBJECT_NULL) { 7736 vm_object_unlock(object); 7737 } 7738 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS); 7739 if (object != VM_OBJECT_NULL) { 7740 vm_object_lock(object); 7741 } 7742 assert(kr == KERN_SUCCESS); 7743 } else { 7744 /* 7745 * We used a kernel virtual address from our 7746 * pre-allocated pool. Put it back in the pool 7747 * for next time. 7748 */ 7749 assert(end - start == PAGE_SIZE); 7750 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT); 7751 assert(i >= 0 && i < VM_PAGING_NUM_PAGES); 7752 7753 /* undo the pmap mapping */ 7754 pmap_remove(kernel_pmap, start, end); 7755 7756 simple_lock(&vm_paging_lock); 7757 vm_paging_page_inuse[i] = FALSE; 7758 if (vm_paging_page_waiter) { 7759 thread_wakeup(&vm_paging_page_waiter); 7760 } 7761 simple_unlock(&vm_paging_lock); 7762 } 7763} 7764 7765#if CRYPTO 7766/* 7767 * Encryption data. 7768 * "iv" is the "initial vector". Ideally, we want to 7769 * have a different one for each page we encrypt, so that 7770 * crackers can't find encryption patterns too easily. 7771 */ 7772#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */ 7773boolean_t swap_crypt_ctx_initialized = FALSE; 7774uint32_t swap_crypt_key[8]; /* big enough for a 256 key */ 7775aes_ctx swap_crypt_ctx; 7776const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, }; 7777 7778#if DEBUG 7779boolean_t swap_crypt_ctx_tested = FALSE; 7780unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096))); 7781unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096))); 7782unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096))); 7783#endif /* DEBUG */ 7784 7785/* 7786 * Initialize the encryption context: key and key size. 7787 */ 7788void swap_crypt_ctx_initialize(void); /* forward */ 7789void 7790swap_crypt_ctx_initialize(void) 7791{ 7792 unsigned int i; 7793 7794 /* 7795 * No need for locking to protect swap_crypt_ctx_initialized 7796 * because the first use of encryption will come from the 7797 * pageout thread (we won't pagein before there's been a pageout) 7798 * and there's only one pageout thread. 7799 */ 7800 if (swap_crypt_ctx_initialized == FALSE) { 7801 for (i = 0; 7802 i < (sizeof (swap_crypt_key) / 7803 sizeof (swap_crypt_key[0])); 7804 i++) { 7805 swap_crypt_key[i] = random(); 7806 } 7807 aes_encrypt_key((const unsigned char *) swap_crypt_key, 7808 SWAP_CRYPT_AES_KEY_SIZE, 7809 &swap_crypt_ctx.encrypt); 7810 aes_decrypt_key((const unsigned char *) swap_crypt_key, 7811 SWAP_CRYPT_AES_KEY_SIZE, 7812 &swap_crypt_ctx.decrypt); 7813 swap_crypt_ctx_initialized = TRUE; 7814 } 7815 7816#if DEBUG 7817 /* 7818 * Validate the encryption algorithms. 7819 */ 7820 if (swap_crypt_ctx_tested == FALSE) { 7821 /* initialize */ 7822 for (i = 0; i < 4096; i++) { 7823 swap_crypt_test_page_ref[i] = (char) i; 7824 } 7825 /* encrypt */ 7826 aes_encrypt_cbc(swap_crypt_test_page_ref, 7827 swap_crypt_null_iv, 7828 PAGE_SIZE / AES_BLOCK_SIZE, 7829 swap_crypt_test_page_encrypt, 7830 &swap_crypt_ctx.encrypt); 7831 /* decrypt */ 7832 aes_decrypt_cbc(swap_crypt_test_page_encrypt, 7833 swap_crypt_null_iv, 7834 PAGE_SIZE / AES_BLOCK_SIZE, 7835 swap_crypt_test_page_decrypt, 7836 &swap_crypt_ctx.decrypt); 7837 /* compare result with original */ 7838 for (i = 0; i < 4096; i ++) { 7839 if (swap_crypt_test_page_decrypt[i] != 7840 swap_crypt_test_page_ref[i]) { 7841 panic("encryption test failed"); 7842 } 7843 } 7844 7845 /* encrypt again */ 7846 aes_encrypt_cbc(swap_crypt_test_page_decrypt, 7847 swap_crypt_null_iv, 7848 PAGE_SIZE / AES_BLOCK_SIZE, 7849 swap_crypt_test_page_decrypt, 7850 &swap_crypt_ctx.encrypt); 7851 /* decrypt in place */ 7852 aes_decrypt_cbc(swap_crypt_test_page_decrypt, 7853 swap_crypt_null_iv, 7854 PAGE_SIZE / AES_BLOCK_SIZE, 7855 swap_crypt_test_page_decrypt, 7856 &swap_crypt_ctx.decrypt); 7857 for (i = 0; i < 4096; i ++) { 7858 if (swap_crypt_test_page_decrypt[i] != 7859 swap_crypt_test_page_ref[i]) { 7860 panic("in place encryption test failed"); 7861 } 7862 } 7863 7864 swap_crypt_ctx_tested = TRUE; 7865 } 7866#endif /* DEBUG */ 7867} 7868 7869/* 7870 * ENCRYPTED SWAP: 7871 * vm_page_encrypt: 7872 * Encrypt the given page, for secure paging. 7873 * The page might already be mapped at kernel virtual 7874 * address "kernel_mapping_offset". Otherwise, we need 7875 * to map it. 7876 * 7877 * Context: 7878 * The page's object is locked, but this lock will be released 7879 * and re-acquired. 7880 * The page is busy and not accessible by users (not entered in any pmap). 7881 */ 7882void 7883vm_page_encrypt( 7884 vm_page_t page, 7885 vm_map_offset_t kernel_mapping_offset) 7886{ 7887 kern_return_t kr; 7888 vm_map_size_t kernel_mapping_size; 7889 boolean_t kernel_mapping_needs_unmap; 7890 vm_offset_t kernel_vaddr; 7891 union { 7892 unsigned char aes_iv[AES_BLOCK_SIZE]; 7893 struct { 7894 memory_object_t pager_object; 7895 vm_object_offset_t paging_offset; 7896 } vm; 7897 } encrypt_iv; 7898 7899 if (! vm_pages_encrypted) { 7900 vm_pages_encrypted = TRUE; 7901 } 7902 7903 assert(page->busy); 7904 7905 if (page->encrypted) { 7906 /* 7907 * Already encrypted: no need to do it again. 7908 */ 7909 vm_page_encrypt_already_encrypted_counter++; 7910 return; 7911 } 7912 assert(page->dirty || page->precious); 7913 7914 ASSERT_PAGE_DECRYPTED(page); 7915 7916 /* 7917 * Take a paging-in-progress reference to keep the object 7918 * alive even if we have to unlock it (in vm_paging_map_object() 7919 * for example)... 7920 */ 7921 vm_object_paging_begin(page->object); 7922 7923 if (kernel_mapping_offset == 0) { 7924 /* 7925 * The page hasn't already been mapped in kernel space 7926 * by the caller. Map it now, so that we can access 7927 * its contents and encrypt them. 7928 */ 7929 kernel_mapping_size = PAGE_SIZE; 7930 kernel_mapping_needs_unmap = FALSE; 7931 kr = vm_paging_map_object(page, 7932 page->object, 7933 page->offset, 7934 VM_PROT_READ | VM_PROT_WRITE, 7935 FALSE, 7936 &kernel_mapping_size, 7937 &kernel_mapping_offset, 7938 &kernel_mapping_needs_unmap); 7939 if (kr != KERN_SUCCESS) { 7940 panic("vm_page_encrypt: " 7941 "could not map page in kernel: 0x%x\n", 7942 kr); 7943 } 7944 } else { 7945 kernel_mapping_size = 0; 7946 kernel_mapping_needs_unmap = FALSE; 7947 } 7948 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 7949 7950 if (swap_crypt_ctx_initialized == FALSE) { 7951 swap_crypt_ctx_initialize(); 7952 } 7953 assert(swap_crypt_ctx_initialized); 7954 7955 /* 7956 * Prepare an "initial vector" for the encryption. 7957 * We use the "pager" and the "paging_offset" for that 7958 * page to obfuscate the encrypted data a bit more and 7959 * prevent crackers from finding patterns that they could 7960 * use to break the key. 7961 */ 7962 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv)); 7963 encrypt_iv.vm.pager_object = page->object->pager; 7964 encrypt_iv.vm.paging_offset = 7965 page->object->paging_offset + page->offset; 7966 7967 /* encrypt the "initial vector" */ 7968 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0], 7969 swap_crypt_null_iv, 7970 1, 7971 &encrypt_iv.aes_iv[0], 7972 &swap_crypt_ctx.encrypt); 7973 7974 /* 7975 * Encrypt the page. 7976 */ 7977 aes_encrypt_cbc((const unsigned char *) kernel_vaddr, 7978 &encrypt_iv.aes_iv[0], 7979 PAGE_SIZE / AES_BLOCK_SIZE, 7980 (unsigned char *) kernel_vaddr, 7981 &swap_crypt_ctx.encrypt); 7982 7983 vm_page_encrypt_counter++; 7984 7985 /* 7986 * Unmap the page from the kernel's address space, 7987 * if we had to map it ourselves. Otherwise, let 7988 * the caller undo the mapping if needed. 7989 */ 7990 if (kernel_mapping_needs_unmap) { 7991 vm_paging_unmap_object(page->object, 7992 kernel_mapping_offset, 7993 kernel_mapping_offset + kernel_mapping_size); 7994 } 7995 7996 /* 7997 * Clear the "reference" and "modified" bits. 7998 * This should clean up any impact the encryption had 7999 * on them. 8000 * The page was kept busy and disconnected from all pmaps, 8001 * so it can't have been referenced or modified from user 8002 * space. 8003 * The software bits will be reset later after the I/O 8004 * has completed (in upl_commit_range()). 8005 */ 8006 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED); 8007 8008 page->encrypted = TRUE; 8009 8010 vm_object_paging_end(page->object); 8011} 8012 8013/* 8014 * ENCRYPTED SWAP: 8015 * vm_page_decrypt: 8016 * Decrypt the given page. 8017 * The page might already be mapped at kernel virtual 8018 * address "kernel_mapping_offset". Otherwise, we need 8019 * to map it. 8020 * 8021 * Context: 8022 * The page's VM object is locked but will be unlocked and relocked. 8023 * The page is busy and not accessible by users (not entered in any pmap). 8024 */ 8025void 8026vm_page_decrypt( 8027 vm_page_t page, 8028 vm_map_offset_t kernel_mapping_offset) 8029{ 8030 kern_return_t kr; 8031 vm_map_size_t kernel_mapping_size; 8032 vm_offset_t kernel_vaddr; 8033 boolean_t kernel_mapping_needs_unmap; 8034 union { 8035 unsigned char aes_iv[AES_BLOCK_SIZE]; 8036 struct { 8037 memory_object_t pager_object; 8038 vm_object_offset_t paging_offset; 8039 } vm; 8040 } decrypt_iv; 8041 boolean_t was_dirty; 8042 8043 assert(page->busy); 8044 assert(page->encrypted); 8045 8046 was_dirty = page->dirty; 8047 8048 /* 8049 * Take a paging-in-progress reference to keep the object 8050 * alive even if we have to unlock it (in vm_paging_map_object() 8051 * for example)... 8052 */ 8053 vm_object_paging_begin(page->object); 8054 8055 if (kernel_mapping_offset == 0) { 8056 /* 8057 * The page hasn't already been mapped in kernel space 8058 * by the caller. Map it now, so that we can access 8059 * its contents and decrypt them. 8060 */ 8061 kernel_mapping_size = PAGE_SIZE; 8062 kernel_mapping_needs_unmap = FALSE; 8063 kr = vm_paging_map_object(page, 8064 page->object, 8065 page->offset, 8066 VM_PROT_READ | VM_PROT_WRITE, 8067 FALSE, 8068 &kernel_mapping_size, 8069 &kernel_mapping_offset, 8070 &kernel_mapping_needs_unmap); 8071 if (kr != KERN_SUCCESS) { 8072 panic("vm_page_decrypt: " 8073 "could not map page in kernel: 0x%x\n", 8074 kr); 8075 } 8076 } else { 8077 kernel_mapping_size = 0; 8078 kernel_mapping_needs_unmap = FALSE; 8079 } 8080 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 8081 8082 assert(swap_crypt_ctx_initialized); 8083 8084 /* 8085 * Prepare an "initial vector" for the decryption. 8086 * It has to be the same as the "initial vector" we 8087 * used to encrypt that page. 8088 */ 8089 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv)); 8090 decrypt_iv.vm.pager_object = page->object->pager; 8091 decrypt_iv.vm.paging_offset = 8092 page->object->paging_offset + page->offset; 8093 8094 /* encrypt the "initial vector" */ 8095 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0], 8096 swap_crypt_null_iv, 8097 1, 8098 &decrypt_iv.aes_iv[0], 8099 &swap_crypt_ctx.encrypt); 8100 8101 /* 8102 * Decrypt the page. 8103 */ 8104 aes_decrypt_cbc((const unsigned char *) kernel_vaddr, 8105 &decrypt_iv.aes_iv[0], 8106 PAGE_SIZE / AES_BLOCK_SIZE, 8107 (unsigned char *) kernel_vaddr, 8108 &swap_crypt_ctx.decrypt); 8109 vm_page_decrypt_counter++; 8110 8111 /* 8112 * Unmap the page from the kernel's address space, 8113 * if we had to map it ourselves. Otherwise, let 8114 * the caller undo the mapping if needed. 8115 */ 8116 if (kernel_mapping_needs_unmap) { 8117 vm_paging_unmap_object(page->object, 8118 kernel_vaddr, 8119 kernel_vaddr + PAGE_SIZE); 8120 } 8121 8122 if (was_dirty) { 8123 /* 8124 * The pager did not specify that the page would be 8125 * clean when it got paged in, so let's not clean it here 8126 * either. 8127 */ 8128 } else { 8129 /* 8130 * After decryption, the page is actually still clean. 8131 * It was encrypted as part of paging, which "cleans" 8132 * the "dirty" pages. 8133 * Noone could access it after it was encrypted 8134 * and the decryption doesn't count. 8135 */ 8136 page->dirty = FALSE; 8137 assert (page->cs_validated == FALSE); 8138 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); 8139 } 8140 page->encrypted = FALSE; 8141 8142 /* 8143 * We've just modified the page's contents via the data cache and part 8144 * of the new contents might still be in the cache and not yet in RAM. 8145 * Since the page is now available and might get gathered in a UPL to 8146 * be part of a DMA transfer from a driver that expects the memory to 8147 * be coherent at this point, we have to flush the data cache. 8148 */ 8149 pmap_sync_page_attributes_phys(page->phys_page); 8150 /* 8151 * Since the page is not mapped yet, some code might assume that it 8152 * doesn't need to invalidate the instruction cache when writing to 8153 * that page. That code relies on "pmapped" being FALSE, so that the 8154 * caches get synchronized when the page is first mapped. 8155 */ 8156 assert(pmap_verify_free(page->phys_page)); 8157 page->pmapped = FALSE; 8158 page->wpmapped = FALSE; 8159 8160 vm_object_paging_end(page->object); 8161} 8162 8163#if DEVELOPMENT || DEBUG 8164unsigned long upl_encrypt_upls = 0; 8165unsigned long upl_encrypt_pages = 0; 8166#endif 8167 8168/* 8169 * ENCRYPTED SWAP: 8170 * 8171 * upl_encrypt: 8172 * Encrypts all the pages in the UPL, within the specified range. 8173 * 8174 */ 8175void 8176upl_encrypt( 8177 upl_t upl, 8178 upl_offset_t crypt_offset, 8179 upl_size_t crypt_size) 8180{ 8181 upl_size_t upl_size, subupl_size=crypt_size; 8182 upl_offset_t offset_in_upl, subupl_offset=crypt_offset; 8183 vm_object_t upl_object; 8184 vm_object_offset_t upl_offset; 8185 vm_page_t page; 8186 vm_object_t shadow_object; 8187 vm_object_offset_t shadow_offset; 8188 vm_object_offset_t paging_offset; 8189 vm_object_offset_t base_offset; 8190 int isVectorUPL = 0; 8191 upl_t vector_upl = NULL; 8192 8193 if((isVectorUPL = vector_upl_is_valid(upl))) 8194 vector_upl = upl; 8195 8196process_upl_to_encrypt: 8197 if(isVectorUPL) { 8198 crypt_size = subupl_size; 8199 crypt_offset = subupl_offset; 8200 upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size); 8201 if(upl == NULL) 8202 panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n"); 8203 subupl_size -= crypt_size; 8204 subupl_offset += crypt_size; 8205 } 8206 8207#if DEVELOPMENT || DEBUG 8208 upl_encrypt_upls++; 8209 upl_encrypt_pages += crypt_size / PAGE_SIZE; 8210#endif 8211 upl_object = upl->map_object; 8212 upl_offset = upl->offset; 8213 upl_size = upl->size; 8214 8215 vm_object_lock(upl_object); 8216 8217 /* 8218 * Find the VM object that contains the actual pages. 8219 */ 8220 if (upl_object->pageout) { 8221 shadow_object = upl_object->shadow; 8222 /* 8223 * The offset in the shadow object is actually also 8224 * accounted for in upl->offset. It possibly shouldn't be 8225 * this way, but for now don't account for it twice. 8226 */ 8227 shadow_offset = 0; 8228 assert(upl_object->paging_offset == 0); /* XXX ? */ 8229 vm_object_lock(shadow_object); 8230 } else { 8231 shadow_object = upl_object; 8232 shadow_offset = 0; 8233 } 8234 8235 paging_offset = shadow_object->paging_offset; 8236 vm_object_paging_begin(shadow_object); 8237 8238 if (shadow_object != upl_object) 8239 vm_object_unlock(upl_object); 8240 8241 8242 base_offset = shadow_offset; 8243 base_offset += upl_offset; 8244 base_offset += crypt_offset; 8245 base_offset -= paging_offset; 8246 8247 assert(crypt_offset + crypt_size <= upl_size); 8248 8249 for (offset_in_upl = 0; 8250 offset_in_upl < crypt_size; 8251 offset_in_upl += PAGE_SIZE) { 8252 page = vm_page_lookup(shadow_object, 8253 base_offset + offset_in_upl); 8254 if (page == VM_PAGE_NULL) { 8255 panic("upl_encrypt: " 8256 "no page for (obj=%p,off=0x%llx+0x%x)!\n", 8257 shadow_object, 8258 base_offset, 8259 offset_in_upl); 8260 } 8261 /* 8262 * Disconnect the page from all pmaps, so that nobody can 8263 * access it while it's encrypted. After that point, all 8264 * accesses to this page will cause a page fault and block 8265 * while the page is busy being encrypted. After the 8266 * encryption completes, any access will cause a 8267 * page fault and the page gets decrypted at that time. 8268 */ 8269 pmap_disconnect(page->phys_page); 8270 vm_page_encrypt(page, 0); 8271 8272 if (vm_object_lock_avoid(shadow_object)) { 8273 /* 8274 * Give vm_pageout_scan() a chance to convert more 8275 * pages from "clean-in-place" to "clean-and-free", 8276 * if it's interested in the same pages we selected 8277 * in this cluster. 8278 */ 8279 vm_object_unlock(shadow_object); 8280 mutex_pause(2); 8281 vm_object_lock(shadow_object); 8282 } 8283 } 8284 8285 vm_object_paging_end(shadow_object); 8286 vm_object_unlock(shadow_object); 8287 8288 if(isVectorUPL && subupl_size) 8289 goto process_upl_to_encrypt; 8290} 8291 8292#else /* CRYPTO */ 8293void 8294upl_encrypt( 8295 __unused upl_t upl, 8296 __unused upl_offset_t crypt_offset, 8297 __unused upl_size_t crypt_size) 8298{ 8299} 8300 8301void 8302vm_page_encrypt( 8303 __unused vm_page_t page, 8304 __unused vm_map_offset_t kernel_mapping_offset) 8305{ 8306} 8307 8308void 8309vm_page_decrypt( 8310 __unused vm_page_t page, 8311 __unused vm_map_offset_t kernel_mapping_offset) 8312{ 8313} 8314 8315#endif /* CRYPTO */ 8316 8317/* 8318 * page->object must be locked 8319 */ 8320void 8321vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked) 8322{ 8323 if (!queues_locked) { 8324 vm_page_lockspin_queues(); 8325 } 8326 8327 /* 8328 * need to drop the laundry count... 8329 * we may also need to remove it 8330 * from the I/O paging queue... 8331 * vm_pageout_throttle_up handles both cases 8332 * 8333 * the laundry and pageout_queue flags are cleared... 8334 */ 8335 vm_pageout_throttle_up(page); 8336 8337 vm_page_steal_pageout_page++; 8338 8339 if (!queues_locked) { 8340 vm_page_unlock_queues(); 8341 } 8342} 8343 8344upl_t 8345vector_upl_create(vm_offset_t upl_offset) 8346{ 8347 int vector_upl_size = sizeof(struct _vector_upl); 8348 int i=0; 8349 upl_t upl; 8350 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size); 8351 8352 upl = upl_create(0,UPL_VECTOR,0); 8353 upl->vector_upl = vector_upl; 8354 upl->offset = upl_offset; 8355 vector_upl->size = 0; 8356 vector_upl->offset = upl_offset; 8357 vector_upl->invalid_upls=0; 8358 vector_upl->num_upls=0; 8359 vector_upl->pagelist = NULL; 8360 8361 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) { 8362 vector_upl->upl_iostates[i].size = 0; 8363 vector_upl->upl_iostates[i].offset = 0; 8364 8365 } 8366 return upl; 8367} 8368 8369void 8370vector_upl_deallocate(upl_t upl) 8371{ 8372 if(upl) { 8373 vector_upl_t vector_upl = upl->vector_upl; 8374 if(vector_upl) { 8375 if(vector_upl->invalid_upls != vector_upl->num_upls) 8376 panic("Deallocating non-empty Vectored UPL\n"); 8377 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE))); 8378 vector_upl->invalid_upls=0; 8379 vector_upl->num_upls = 0; 8380 vector_upl->pagelist = NULL; 8381 vector_upl->size = 0; 8382 vector_upl->offset = 0; 8383 kfree(vector_upl, sizeof(struct _vector_upl)); 8384 vector_upl = (vector_upl_t)0xfeedfeed; 8385 } 8386 else 8387 panic("vector_upl_deallocate was passed a non-vectored upl\n"); 8388 } 8389 else 8390 panic("vector_upl_deallocate was passed a NULL upl\n"); 8391} 8392 8393boolean_t 8394vector_upl_is_valid(upl_t upl) 8395{ 8396 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) { 8397 vector_upl_t vector_upl = upl->vector_upl; 8398 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef) 8399 return FALSE; 8400 else 8401 return TRUE; 8402 } 8403 return FALSE; 8404} 8405 8406boolean_t 8407vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size) 8408{ 8409 if(vector_upl_is_valid(upl)) { 8410 vector_upl_t vector_upl = upl->vector_upl; 8411 8412 if(vector_upl) { 8413 if(subupl) { 8414 if(io_size) { 8415 if(io_size < PAGE_SIZE) 8416 io_size = PAGE_SIZE; 8417 subupl->vector_upl = (void*)vector_upl; 8418 vector_upl->upl_elems[vector_upl->num_upls++] = subupl; 8419 vector_upl->size += io_size; 8420 upl->size += io_size; 8421 } 8422 else { 8423 uint32_t i=0,invalid_upls=0; 8424 for(i = 0; i < vector_upl->num_upls; i++) { 8425 if(vector_upl->upl_elems[i] == subupl) 8426 break; 8427 } 8428 if(i == vector_upl->num_upls) 8429 panic("Trying to remove sub-upl when none exists"); 8430 8431 vector_upl->upl_elems[i] = NULL; 8432 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1); 8433 if(invalid_upls == vector_upl->num_upls) 8434 return TRUE; 8435 else 8436 return FALSE; 8437 } 8438 } 8439 else 8440 panic("vector_upl_set_subupl was passed a NULL upl element\n"); 8441 } 8442 else 8443 panic("vector_upl_set_subupl was passed a non-vectored upl\n"); 8444 } 8445 else 8446 panic("vector_upl_set_subupl was passed a NULL upl\n"); 8447 8448 return FALSE; 8449} 8450 8451void 8452vector_upl_set_pagelist(upl_t upl) 8453{ 8454 if(vector_upl_is_valid(upl)) { 8455 uint32_t i=0; 8456 vector_upl_t vector_upl = upl->vector_upl; 8457 8458 if(vector_upl) { 8459 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0; 8460 8461 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)); 8462 8463 for(i=0; i < vector_upl->num_upls; i++) { 8464 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE; 8465 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size); 8466 pagelist_size += cur_upl_pagelist_size; 8467 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page) 8468 upl->highest_page = vector_upl->upl_elems[i]->highest_page; 8469 } 8470 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) ); 8471 } 8472 else 8473 panic("vector_upl_set_pagelist was passed a non-vectored upl\n"); 8474 } 8475 else 8476 panic("vector_upl_set_pagelist was passed a NULL upl\n"); 8477 8478} 8479 8480upl_t 8481vector_upl_subupl_byindex(upl_t upl, uint32_t index) 8482{ 8483 if(vector_upl_is_valid(upl)) { 8484 vector_upl_t vector_upl = upl->vector_upl; 8485 if(vector_upl) { 8486 if(index < vector_upl->num_upls) 8487 return vector_upl->upl_elems[index]; 8488 } 8489 else 8490 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n"); 8491 } 8492 return NULL; 8493} 8494 8495upl_t 8496vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size) 8497{ 8498 if(vector_upl_is_valid(upl)) { 8499 uint32_t i=0; 8500 vector_upl_t vector_upl = upl->vector_upl; 8501 8502 if(vector_upl) { 8503 upl_t subupl = NULL; 8504 vector_upl_iostates_t subupl_state; 8505 8506 for(i=0; i < vector_upl->num_upls; i++) { 8507 subupl = vector_upl->upl_elems[i]; 8508 subupl_state = vector_upl->upl_iostates[i]; 8509 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) { 8510 /* We could have been passed an offset/size pair that belongs 8511 * to an UPL element that has already been committed/aborted. 8512 * If so, return NULL. 8513 */ 8514 if(subupl == NULL) 8515 return NULL; 8516 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) { 8517 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset; 8518 if(*upl_size > subupl_state.size) 8519 *upl_size = subupl_state.size; 8520 } 8521 if(*upl_offset >= subupl_state.offset) 8522 *upl_offset -= subupl_state.offset; 8523 else if(i) 8524 panic("Vector UPL offset miscalculation\n"); 8525 return subupl; 8526 } 8527 } 8528 } 8529 else 8530 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n"); 8531 } 8532 return NULL; 8533} 8534 8535void 8536vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr) 8537{ 8538 *v_upl_submap = NULL; 8539 8540 if(vector_upl_is_valid(upl)) { 8541 vector_upl_t vector_upl = upl->vector_upl; 8542 if(vector_upl) { 8543 *v_upl_submap = vector_upl->submap; 8544 *submap_dst_addr = vector_upl->submap_dst_addr; 8545 } 8546 else 8547 panic("vector_upl_get_submap was passed a non-vectored UPL\n"); 8548 } 8549 else 8550 panic("vector_upl_get_submap was passed a null UPL\n"); 8551} 8552 8553void 8554vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr) 8555{ 8556 if(vector_upl_is_valid(upl)) { 8557 vector_upl_t vector_upl = upl->vector_upl; 8558 if(vector_upl) { 8559 vector_upl->submap = submap; 8560 vector_upl->submap_dst_addr = submap_dst_addr; 8561 } 8562 else 8563 panic("vector_upl_get_submap was passed a non-vectored UPL\n"); 8564 } 8565 else 8566 panic("vector_upl_get_submap was passed a NULL UPL\n"); 8567} 8568 8569void 8570vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size) 8571{ 8572 if(vector_upl_is_valid(upl)) { 8573 uint32_t i = 0; 8574 vector_upl_t vector_upl = upl->vector_upl; 8575 8576 if(vector_upl) { 8577 for(i = 0; i < vector_upl->num_upls; i++) { 8578 if(vector_upl->upl_elems[i] == subupl) 8579 break; 8580 } 8581 8582 if(i == vector_upl->num_upls) 8583 panic("setting sub-upl iostate when none exists"); 8584 8585 vector_upl->upl_iostates[i].offset = offset; 8586 if(size < PAGE_SIZE) 8587 size = PAGE_SIZE; 8588 vector_upl->upl_iostates[i].size = size; 8589 } 8590 else 8591 panic("vector_upl_set_iostate was passed a non-vectored UPL\n"); 8592 } 8593 else 8594 panic("vector_upl_set_iostate was passed a NULL UPL\n"); 8595} 8596 8597void 8598vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size) 8599{ 8600 if(vector_upl_is_valid(upl)) { 8601 uint32_t i = 0; 8602 vector_upl_t vector_upl = upl->vector_upl; 8603 8604 if(vector_upl) { 8605 for(i = 0; i < vector_upl->num_upls; i++) { 8606 if(vector_upl->upl_elems[i] == subupl) 8607 break; 8608 } 8609 8610 if(i == vector_upl->num_upls) 8611 panic("getting sub-upl iostate when none exists"); 8612 8613 *offset = vector_upl->upl_iostates[i].offset; 8614 *size = vector_upl->upl_iostates[i].size; 8615 } 8616 else 8617 panic("vector_upl_get_iostate was passed a non-vectored UPL\n"); 8618 } 8619 else 8620 panic("vector_upl_get_iostate was passed a NULL UPL\n"); 8621} 8622 8623void 8624vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size) 8625{ 8626 if(vector_upl_is_valid(upl)) { 8627 vector_upl_t vector_upl = upl->vector_upl; 8628 if(vector_upl) { 8629 if(index < vector_upl->num_upls) { 8630 *offset = vector_upl->upl_iostates[index].offset; 8631 *size = vector_upl->upl_iostates[index].size; 8632 } 8633 else 8634 *offset = *size = 0; 8635 } 8636 else 8637 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n"); 8638 } 8639 else 8640 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n"); 8641} 8642 8643upl_page_info_t * 8644upl_get_internal_vectorupl_pagelist(upl_t upl) 8645{ 8646 return ((vector_upl_t)(upl->vector_upl))->pagelist; 8647} 8648 8649void * 8650upl_get_internal_vectorupl(upl_t upl) 8651{ 8652 return upl->vector_upl; 8653} 8654 8655vm_size_t 8656upl_get_internal_pagelist_offset(void) 8657{ 8658 return sizeof(struct upl); 8659} 8660 8661void 8662upl_clear_dirty( 8663 upl_t upl, 8664 boolean_t value) 8665{ 8666 if (value) { 8667 upl->flags |= UPL_CLEAR_DIRTY; 8668 } else { 8669 upl->flags &= ~UPL_CLEAR_DIRTY; 8670 } 8671} 8672 8673void 8674upl_set_referenced( 8675 upl_t upl, 8676 boolean_t value) 8677{ 8678 upl_lock(upl); 8679 if (value) { 8680 upl->ext_ref_count++; 8681 } else { 8682 if (!upl->ext_ref_count) { 8683 panic("upl_set_referenced not %p\n", upl); 8684 } 8685 upl->ext_ref_count--; 8686 } 8687 upl_unlock(upl); 8688} 8689 8690boolean_t 8691vm_page_is_slideable(vm_page_t m) 8692{ 8693 boolean_t result = FALSE; 8694 vm_shared_region_slide_info_t si; 8695 8696 vm_object_lock_assert_held(m->object); 8697 8698 /* make sure our page belongs to the one object allowed to do this */ 8699 if (!m->object->object_slid) { 8700 goto done; 8701 } 8702 8703 si = m->object->vo_slide_info; 8704 if (si == NULL) { 8705 goto done; 8706 } 8707 8708 if(!m->slid && (si->start <= m->offset && si->end > m->offset)) { 8709 result = TRUE; 8710 } 8711 8712done: 8713 return result; 8714} 8715 8716int vm_page_slide_counter = 0; 8717int vm_page_slide_errors = 0; 8718kern_return_t 8719vm_page_slide( 8720 vm_page_t page, 8721 vm_map_offset_t kernel_mapping_offset) 8722{ 8723 kern_return_t kr; 8724 vm_map_size_t kernel_mapping_size; 8725 boolean_t kernel_mapping_needs_unmap; 8726 vm_offset_t kernel_vaddr; 8727 uint32_t pageIndex = 0; 8728 8729 assert(!page->slid); 8730 assert(page->object->object_slid); 8731 vm_object_lock_assert_exclusive(page->object); 8732 8733 if (page->error) 8734 return KERN_FAILURE; 8735 8736 /* 8737 * Take a paging-in-progress reference to keep the object 8738 * alive even if we have to unlock it (in vm_paging_map_object() 8739 * for example)... 8740 */ 8741 vm_object_paging_begin(page->object); 8742 8743 if (kernel_mapping_offset == 0) { 8744 /* 8745 * The page hasn't already been mapped in kernel space 8746 * by the caller. Map it now, so that we can access 8747 * its contents and decrypt them. 8748 */ 8749 kernel_mapping_size = PAGE_SIZE; 8750 kernel_mapping_needs_unmap = FALSE; 8751 kr = vm_paging_map_object(page, 8752 page->object, 8753 page->offset, 8754 VM_PROT_READ | VM_PROT_WRITE, 8755 FALSE, 8756 &kernel_mapping_size, 8757 &kernel_mapping_offset, 8758 &kernel_mapping_needs_unmap); 8759 if (kr != KERN_SUCCESS) { 8760 panic("vm_page_slide: " 8761 "could not map page in kernel: 0x%x\n", 8762 kr); 8763 } 8764 } else { 8765 kernel_mapping_size = 0; 8766 kernel_mapping_needs_unmap = FALSE; 8767 } 8768 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 8769 8770 /* 8771 * Slide the pointers on the page. 8772 */ 8773 8774 /*assert that slide_file_info.start/end are page-aligned?*/ 8775 8776 assert(!page->slid); 8777 assert(page->object->object_slid); 8778 8779 pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/PAGE_SIZE); 8780 kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr, pageIndex); 8781 vm_page_slide_counter++; 8782 8783 /* 8784 * Unmap the page from the kernel's address space, 8785 */ 8786 if (kernel_mapping_needs_unmap) { 8787 vm_paging_unmap_object(page->object, 8788 kernel_vaddr, 8789 kernel_vaddr + PAGE_SIZE); 8790 } 8791 8792 page->dirty = FALSE; 8793 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); 8794 8795 if (kr != KERN_SUCCESS || cs_debug > 1) { 8796 printf("vm_page_slide(%p): " 8797 "obj %p off 0x%llx mobj %p moff 0x%llx\n", 8798 page, 8799 page->object, page->offset, 8800 page->object->pager, 8801 page->offset + page->object->paging_offset); 8802 } 8803 8804 if (kr == KERN_SUCCESS) { 8805 page->slid = TRUE; 8806 } else { 8807 page->error = TRUE; 8808 vm_page_slide_errors++; 8809 } 8810 8811 vm_object_paging_end(page->object); 8812 8813 return kr; 8814} 8815 8816void inline memoryshot(unsigned int event, unsigned int control) 8817{ 8818 if (vm_debug_events) { 8819 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control, 8820 vm_page_active_count, vm_page_inactive_count, 8821 vm_page_free_count, vm_page_speculative_count, 8822 vm_page_throttled_count); 8823 } else { 8824 (void) event; 8825 (void) control; 8826 } 8827 8828} 8829 8830#ifdef MACH_BSD 8831 8832boolean_t upl_device_page(upl_page_info_t *upl) 8833{ 8834 return(UPL_DEVICE_PAGE(upl)); 8835} 8836boolean_t upl_page_present(upl_page_info_t *upl, int index) 8837{ 8838 return(UPL_PAGE_PRESENT(upl, index)); 8839} 8840boolean_t upl_speculative_page(upl_page_info_t *upl, int index) 8841{ 8842 return(UPL_SPECULATIVE_PAGE(upl, index)); 8843} 8844boolean_t upl_dirty_page(upl_page_info_t *upl, int index) 8845{ 8846 return(UPL_DIRTY_PAGE(upl, index)); 8847} 8848boolean_t upl_valid_page(upl_page_info_t *upl, int index) 8849{ 8850 return(UPL_VALID_PAGE(upl, index)); 8851} 8852ppnum_t upl_phys_page(upl_page_info_t *upl, int index) 8853{ 8854 return(UPL_PHYS_PAGE(upl, index)); 8855} 8856 8857 8858void 8859vm_countdirtypages(void) 8860{ 8861 vm_page_t m; 8862 int dpages; 8863 int pgopages; 8864 int precpages; 8865 8866 8867 dpages=0; 8868 pgopages=0; 8869 precpages=0; 8870 8871 vm_page_lock_queues(); 8872 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 8873 do { 8874 if (m ==(vm_page_t )0) break; 8875 8876 if(m->dirty) dpages++; 8877 if(m->pageout) pgopages++; 8878 if(m->precious) precpages++; 8879 8880 assert(m->object != kernel_object); 8881 m = (vm_page_t) queue_next(&m->pageq); 8882 if (m ==(vm_page_t )0) break; 8883 8884 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m)); 8885 vm_page_unlock_queues(); 8886 8887 vm_page_lock_queues(); 8888 m = (vm_page_t) queue_first(&vm_page_queue_throttled); 8889 do { 8890 if (m ==(vm_page_t )0) break; 8891 8892 dpages++; 8893 assert(m->dirty); 8894 assert(!m->pageout); 8895 assert(m->object != kernel_object); 8896 m = (vm_page_t) queue_next(&m->pageq); 8897 if (m ==(vm_page_t )0) break; 8898 8899 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m)); 8900 vm_page_unlock_queues(); 8901 8902 vm_page_lock_queues(); 8903 m = (vm_page_t) queue_first(&vm_page_queue_anonymous); 8904 do { 8905 if (m ==(vm_page_t )0) break; 8906 8907 if(m->dirty) dpages++; 8908 if(m->pageout) pgopages++; 8909 if(m->precious) precpages++; 8910 8911 assert(m->object != kernel_object); 8912 m = (vm_page_t) queue_next(&m->pageq); 8913 if (m ==(vm_page_t )0) break; 8914 8915 } while (!queue_end(&vm_page_queue_anonymous,(queue_entry_t) m)); 8916 vm_page_unlock_queues(); 8917 8918 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages); 8919 8920 dpages=0; 8921 pgopages=0; 8922 precpages=0; 8923 8924 vm_page_lock_queues(); 8925 m = (vm_page_t) queue_first(&vm_page_queue_active); 8926 8927 do { 8928 if(m == (vm_page_t )0) break; 8929 if(m->dirty) dpages++; 8930 if(m->pageout) pgopages++; 8931 if(m->precious) precpages++; 8932 8933 assert(m->object != kernel_object); 8934 m = (vm_page_t) queue_next(&m->pageq); 8935 if(m == (vm_page_t )0) break; 8936 8937 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m)); 8938 vm_page_unlock_queues(); 8939 8940 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages); 8941 8942} 8943#endif /* MACH_BSD */ 8944 8945ppnum_t upl_get_highest_page( 8946 upl_t upl) 8947{ 8948 return upl->highest_page; 8949} 8950 8951upl_size_t upl_get_size( 8952 upl_t upl) 8953{ 8954 return upl->size; 8955} 8956 8957#if UPL_DEBUG 8958kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2) 8959{ 8960 upl->ubc_alias1 = alias1; 8961 upl->ubc_alias2 = alias2; 8962 return KERN_SUCCESS; 8963} 8964int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2) 8965{ 8966 if(al) 8967 *al = upl->ubc_alias1; 8968 if(al2) 8969 *al2 = upl->ubc_alias2; 8970 return KERN_SUCCESS; 8971} 8972#endif /* UPL_DEBUG */ 8973