1/* 2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_pageout.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * Date: 1985 62 * 63 * The proverbial page-out daemon. 64 */ 65 66#include <stdint.h> 67 68#include <debug.h> 69#include <mach_pagemap.h> 70#include <mach_cluster_stats.h> 71 72#include <mach/mach_types.h> 73#include <mach/memory_object.h> 74#include <mach/memory_object_default.h> 75#include <mach/memory_object_control_server.h> 76#include <mach/mach_host_server.h> 77#include <mach/upl.h> 78#include <mach/vm_map.h> 79#include <mach/vm_param.h> 80#include <mach/vm_statistics.h> 81#include <mach/sdt.h> 82 83#include <kern/kern_types.h> 84#include <kern/counters.h> 85#include <kern/host_statistics.h> 86#include <kern/machine.h> 87#include <kern/misc_protos.h> 88#include <kern/sched.h> 89#include <kern/thread.h> 90#include <kern/xpr.h> 91#include <kern/kalloc.h> 92 93#include <machine/vm_tuning.h> 94#include <machine/commpage.h> 95 96#include <vm/pmap.h> 97#include <vm/vm_compressor_pager.h> 98#include <vm/vm_fault.h> 99#include <vm/vm_map.h> 100#include <vm/vm_object.h> 101#include <vm/vm_page.h> 102#include <vm/vm_pageout.h> 103#include <vm/vm_protos.h> /* must be last */ 104#include <vm/memory_object.h> 105#include <vm/vm_purgeable_internal.h> 106#include <vm/vm_shared_region.h> 107#include <vm/vm_compressor.h> 108 109#if CONFIG_PHANTOM_CACHE 110#include <vm/vm_phantom_cache.h> 111#endif 112/* 113 * ENCRYPTED SWAP: 114 */ 115#include <libkern/crypto/aes.h> 116extern u_int32_t random(void); /* from <libkern/libkern.h> */ 117 118extern int cs_debug; 119 120#if UPL_DEBUG 121#include <libkern/OSDebug.h> 122#endif 123 124extern void m_drain(void); 125 126#if VM_PRESSURE_EVENTS 127extern unsigned int memorystatus_available_pages; 128extern unsigned int memorystatus_available_pages_pressure; 129extern unsigned int memorystatus_available_pages_critical; 130extern unsigned int memorystatus_frozen_count; 131extern unsigned int memorystatus_suspended_count; 132 133extern vm_pressure_level_t memorystatus_vm_pressure_level; 134int memorystatus_purge_on_warning = 2; 135int memorystatus_purge_on_urgent = 5; 136int memorystatus_purge_on_critical = 8; 137 138void vm_pressure_response(void); 139boolean_t vm_pressure_thread_running = FALSE; 140extern void consider_vm_pressure_events(void); 141 142#define MEMORYSTATUS_SUSPENDED_THRESHOLD 4 143#endif /* VM_PRESSURE_EVENTS */ 144 145boolean_t vm_pressure_changed = FALSE; 146 147#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */ 148#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100 149#endif 150 151#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */ 152#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 153#endif 154 155#ifndef VM_PAGEOUT_DEADLOCK_RELIEF 156#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */ 157#endif 158 159#ifndef VM_PAGEOUT_INACTIVE_RELIEF 160#define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */ 161#endif 162 163#ifndef VM_PAGE_LAUNDRY_MAX 164#define VM_PAGE_LAUNDRY_MAX 128UL /* maximum pageouts on a given pageout queue */ 165#endif /* VM_PAGEOUT_LAUNDRY_MAX */ 166 167#ifndef VM_PAGEOUT_BURST_WAIT 168#define VM_PAGEOUT_BURST_WAIT 10 /* milliseconds */ 169#endif /* VM_PAGEOUT_BURST_WAIT */ 170 171#ifndef VM_PAGEOUT_EMPTY_WAIT 172#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */ 173#endif /* VM_PAGEOUT_EMPTY_WAIT */ 174 175#ifndef VM_PAGEOUT_DEADLOCK_WAIT 176#define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */ 177#endif /* VM_PAGEOUT_DEADLOCK_WAIT */ 178 179#ifndef VM_PAGEOUT_IDLE_WAIT 180#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */ 181#endif /* VM_PAGEOUT_IDLE_WAIT */ 182 183#ifndef VM_PAGEOUT_SWAP_WAIT 184#define VM_PAGEOUT_SWAP_WAIT 50 /* milliseconds */ 185#endif /* VM_PAGEOUT_SWAP_WAIT */ 186 187#ifndef VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 188#define VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED 1000 /* maximum pages considered before we issue a pressure event */ 189#endif /* VM_PAGEOUT_PRESSURE_PAGES_CONSIDERED */ 190 191#ifndef VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 192#define VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS 5 /* seconds */ 193#endif /* VM_PAGEOUT_PRESSURE_EVENT_MONITOR_SECS */ 194 195unsigned int vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS; 196unsigned int vm_page_speculative_percentage = 5; 197 198#ifndef VM_PAGE_SPECULATIVE_TARGET 199#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_page_speculative_percentage)) 200#endif /* VM_PAGE_SPECULATIVE_TARGET */ 201 202 203#ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT 204#define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200) 205#endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */ 206 207 208/* 209 * To obtain a reasonable LRU approximation, the inactive queue 210 * needs to be large enough to give pages on it a chance to be 211 * referenced a second time. This macro defines the fraction 212 * of active+inactive pages that should be inactive. 213 * The pageout daemon uses it to update vm_page_inactive_target. 214 * 215 * If vm_page_free_count falls below vm_page_free_target and 216 * vm_page_inactive_count is below vm_page_inactive_target, 217 * then the pageout daemon starts running. 218 */ 219 220#ifndef VM_PAGE_INACTIVE_TARGET 221#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 2) 222#endif /* VM_PAGE_INACTIVE_TARGET */ 223 224/* 225 * Once the pageout daemon starts running, it keeps going 226 * until vm_page_free_count meets or exceeds vm_page_free_target. 227 */ 228 229#ifndef VM_PAGE_FREE_TARGET 230#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80) 231#endif /* VM_PAGE_FREE_TARGET */ 232 233 234/* 235 * The pageout daemon always starts running once vm_page_free_count 236 * falls below vm_page_free_min. 237 */ 238 239#ifndef VM_PAGE_FREE_MIN 240#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100) 241#endif /* VM_PAGE_FREE_MIN */ 242 243#define VM_PAGE_FREE_RESERVED_LIMIT 1700 244#define VM_PAGE_FREE_MIN_LIMIT 3500 245#define VM_PAGE_FREE_TARGET_LIMIT 4000 246 247/* 248 * When vm_page_free_count falls below vm_page_free_reserved, 249 * only vm-privileged threads can allocate pages. vm-privilege 250 * allows the pageout daemon and default pager (and any other 251 * associated threads needed for default pageout) to continue 252 * operation by dipping into the reserved pool of pages. 253 */ 254 255#ifndef VM_PAGE_FREE_RESERVED 256#define VM_PAGE_FREE_RESERVED(n) \ 257 ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n)) 258#endif /* VM_PAGE_FREE_RESERVED */ 259 260/* 261 * When we dequeue pages from the inactive list, they are 262 * reactivated (ie, put back on the active queue) if referenced. 263 * However, it is possible to starve the free list if other 264 * processors are referencing pages faster than we can turn off 265 * the referenced bit. So we limit the number of reactivations 266 * we will make per call of vm_pageout_scan(). 267 */ 268#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000 269#ifndef VM_PAGE_REACTIVATE_LIMIT 270#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX)) 271#endif /* VM_PAGE_REACTIVATE_LIMIT */ 272#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100 273 274 275extern boolean_t hibernate_cleaning_in_progress; 276 277/* 278 * Exported variable used to broadcast the activation of the pageout scan 279 * Working Set uses this to throttle its use of pmap removes. In this 280 * way, code which runs within memory in an uncontested context does 281 * not keep encountering soft faults. 282 */ 283 284unsigned int vm_pageout_scan_event_counter = 0; 285 286/* 287 * Forward declarations for internal routines. 288 */ 289struct cq { 290 struct vm_pageout_queue *q; 291 void *current_chead; 292 char *scratch_buf; 293}; 294 295 296#if VM_PRESSURE_EVENTS 297void vm_pressure_thread(void); 298 299boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void); 300boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void); 301 302boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void); 303boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void); 304#endif 305static void vm_pageout_garbage_collect(int); 306static void vm_pageout_iothread_continue(struct vm_pageout_queue *); 307static void vm_pageout_iothread_external(void); 308static void vm_pageout_iothread_internal(struct cq *cq); 309static void vm_pageout_adjust_io_throttles(struct vm_pageout_queue *, struct vm_pageout_queue *, boolean_t); 310 311extern void vm_pageout_continue(void); 312extern void vm_pageout_scan(void); 313 314static thread_t vm_pageout_external_iothread = THREAD_NULL; 315static thread_t vm_pageout_internal_iothread = THREAD_NULL; 316 317unsigned int vm_pageout_reserved_internal = 0; 318unsigned int vm_pageout_reserved_really = 0; 319 320unsigned int vm_pageout_swap_wait = 0; 321unsigned int vm_pageout_idle_wait = 0; /* milliseconds */ 322unsigned int vm_pageout_empty_wait = 0; /* milliseconds */ 323unsigned int vm_pageout_burst_wait = 0; /* milliseconds */ 324unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */ 325unsigned int vm_pageout_deadlock_relief = 0; 326unsigned int vm_pageout_inactive_relief = 0; 327unsigned int vm_pageout_burst_active_throttle = 0; 328unsigned int vm_pageout_burst_inactive_throttle = 0; 329 330int vm_upl_wait_for_pages = 0; 331 332 333/* 334 * These variables record the pageout daemon's actions: 335 * how many pages it looks at and what happens to those pages. 336 * No locking needed because only one thread modifies the variables. 337 */ 338 339unsigned int vm_pageout_active = 0; /* debugging */ 340unsigned int vm_pageout_active_busy = 0; /* debugging */ 341unsigned int vm_pageout_inactive = 0; /* debugging */ 342unsigned int vm_pageout_inactive_throttled = 0; /* debugging */ 343unsigned int vm_pageout_inactive_forced = 0; /* debugging */ 344unsigned int vm_pageout_inactive_nolock = 0; /* debugging */ 345unsigned int vm_pageout_inactive_avoid = 0; /* debugging */ 346unsigned int vm_pageout_inactive_busy = 0; /* debugging */ 347unsigned int vm_pageout_inactive_error = 0; /* debugging */ 348unsigned int vm_pageout_inactive_absent = 0; /* debugging */ 349unsigned int vm_pageout_inactive_notalive = 0; /* debugging */ 350unsigned int vm_pageout_inactive_used = 0; /* debugging */ 351unsigned int vm_pageout_cache_evicted = 0; /* debugging */ 352unsigned int vm_pageout_inactive_clean = 0; /* debugging */ 353unsigned int vm_pageout_speculative_clean = 0; /* debugging */ 354 355unsigned int vm_pageout_freed_from_cleaned = 0; 356unsigned int vm_pageout_freed_from_speculative = 0; 357unsigned int vm_pageout_freed_from_inactive_clean = 0; 358 359unsigned int vm_pageout_enqueued_cleaned_from_inactive_clean = 0; 360unsigned int vm_pageout_enqueued_cleaned_from_inactive_dirty = 0; 361 362unsigned int vm_pageout_cleaned_reclaimed = 0; /* debugging; how many cleaned pages are reclaimed by the pageout scan */ 363unsigned int vm_pageout_cleaned_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on pageout (and are therefore reactivated) */ 364unsigned int vm_pageout_cleaned_reference_reactivated = 0; 365unsigned int vm_pageout_cleaned_volatile_reactivated = 0; 366unsigned int vm_pageout_cleaned_fault_reactivated = 0; 367unsigned int vm_pageout_cleaned_commit_reactivated = 0; /* debugging; how many cleaned pages are found to be referenced on commit (and are therefore reactivated) */ 368unsigned int vm_pageout_cleaned_busy = 0; 369unsigned int vm_pageout_cleaned_nolock = 0; 370 371unsigned int vm_pageout_inactive_dirty_internal = 0; /* debugging */ 372unsigned int vm_pageout_inactive_dirty_external = 0; /* debugging */ 373unsigned int vm_pageout_inactive_deactivated = 0; /* debugging */ 374unsigned int vm_pageout_inactive_anonymous = 0; /* debugging */ 375unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */ 376unsigned int vm_pageout_purged_objects = 0; /* debugging */ 377unsigned int vm_stat_discard = 0; /* debugging */ 378unsigned int vm_stat_discard_sent = 0; /* debugging */ 379unsigned int vm_stat_discard_failure = 0; /* debugging */ 380unsigned int vm_stat_discard_throttle = 0; /* debugging */ 381unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */ 382unsigned int vm_pageout_catch_ups = 0; /* debugging */ 383unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */ 384 385unsigned int vm_pageout_scan_reclaimed_throttled = 0; 386unsigned int vm_pageout_scan_active_throttled = 0; 387unsigned int vm_pageout_scan_inactive_throttled_internal = 0; 388unsigned int vm_pageout_scan_inactive_throttled_external = 0; 389unsigned int vm_pageout_scan_throttle = 0; /* debugging */ 390unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */ 391unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */ 392unsigned int vm_pageout_scan_swap_throttle = 0; /* debugging */ 393unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */ 394unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */ 395unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */ 396unsigned int vm_pageout_inactive_external_forced_jetsam_count = 0; /* debugging */ 397unsigned int vm_page_speculative_count_drifts = 0; 398unsigned int vm_page_speculative_count_drift_max = 0; 399 400 401/* 402 * Backing store throttle when BS is exhausted 403 */ 404unsigned int vm_backing_store_low = 0; 405 406unsigned int vm_pageout_out_of_line = 0; 407unsigned int vm_pageout_in_place = 0; 408 409unsigned int vm_page_steal_pageout_page = 0; 410 411/* 412 * ENCRYPTED SWAP: 413 * counters and statistics... 414 */ 415unsigned long vm_page_decrypt_counter = 0; 416unsigned long vm_page_decrypt_for_upl_counter = 0; 417unsigned long vm_page_encrypt_counter = 0; 418unsigned long vm_page_encrypt_abort_counter = 0; 419unsigned long vm_page_encrypt_already_encrypted_counter = 0; 420boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */ 421 422struct vm_pageout_queue vm_pageout_queue_internal; 423struct vm_pageout_queue vm_pageout_queue_external; 424 425unsigned int vm_page_speculative_target = 0; 426 427vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL; 428 429boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL; 430 431#if DEVELOPMENT || DEBUG 432unsigned long vm_cs_validated_resets = 0; 433#endif 434 435int vm_debug_events = 0; 436 437#if CONFIG_MEMORYSTATUS 438#if !CONFIG_JETSAM 439extern boolean_t memorystatus_idle_exit_from_VM(void); 440#endif 441extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async); 442extern void memorystatus_on_pageout_scan_end(void); 443#endif 444 445boolean_t vm_page_compressions_failing = FALSE; 446 447/* 448 * Routine: vm_backing_store_disable 449 * Purpose: 450 * Suspend non-privileged threads wishing to extend 451 * backing store when we are low on backing store 452 * (Synchronized by caller) 453 */ 454void 455vm_backing_store_disable( 456 boolean_t disable) 457{ 458 if(disable) { 459 vm_backing_store_low = 1; 460 } else { 461 if(vm_backing_store_low) { 462 vm_backing_store_low = 0; 463 thread_wakeup((event_t) &vm_backing_store_low); 464 } 465 } 466} 467 468 469#if MACH_CLUSTER_STATS 470unsigned long vm_pageout_cluster_dirtied = 0; 471unsigned long vm_pageout_cluster_cleaned = 0; 472unsigned long vm_pageout_cluster_collisions = 0; 473unsigned long vm_pageout_cluster_clusters = 0; 474unsigned long vm_pageout_cluster_conversions = 0; 475unsigned long vm_pageout_target_collisions = 0; 476unsigned long vm_pageout_target_page_dirtied = 0; 477unsigned long vm_pageout_target_page_freed = 0; 478#define CLUSTER_STAT(clause) clause 479#else /* MACH_CLUSTER_STATS */ 480#define CLUSTER_STAT(clause) 481#endif /* MACH_CLUSTER_STATS */ 482 483/* 484 * Routine: vm_pageout_object_terminate 485 * Purpose: 486 * Destroy the pageout_object, and perform all of the 487 * required cleanup actions. 488 * 489 * In/Out conditions: 490 * The object must be locked, and will be returned locked. 491 */ 492void 493vm_pageout_object_terminate( 494 vm_object_t object) 495{ 496 vm_object_t shadow_object; 497 498 /* 499 * Deal with the deallocation (last reference) of a pageout object 500 * (used for cleaning-in-place) by dropping the paging references/ 501 * freeing pages in the original object. 502 */ 503 504 assert(object->pageout); 505 shadow_object = object->shadow; 506 vm_object_lock(shadow_object); 507 508 while (!queue_empty(&object->memq)) { 509 vm_page_t p, m; 510 vm_object_offset_t offset; 511 512 p = (vm_page_t) queue_first(&object->memq); 513 514 assert(p->private); 515 assert(p->pageout); 516 p->pageout = FALSE; 517 assert(!p->cleaning); 518 assert(!p->laundry); 519 520 offset = p->offset; 521 VM_PAGE_FREE(p); 522 p = VM_PAGE_NULL; 523 524 m = vm_page_lookup(shadow_object, 525 offset + object->vo_shadow_offset); 526 527 if(m == VM_PAGE_NULL) 528 continue; 529 530 assert((m->dirty) || (m->precious) || 531 (m->busy && m->cleaning)); 532 533 /* 534 * Handle the trusted pager throttle. 535 * Also decrement the burst throttle (if external). 536 */ 537 vm_page_lock_queues(); 538 if (m->pageout_queue) 539 vm_pageout_throttle_up(m); 540 541 /* 542 * Handle the "target" page(s). These pages are to be freed if 543 * successfully cleaned. Target pages are always busy, and are 544 * wired exactly once. The initial target pages are not mapped, 545 * (so cannot be referenced or modified) but converted target 546 * pages may have been modified between the selection as an 547 * adjacent page and conversion to a target. 548 */ 549 if (m->pageout) { 550 assert(m->busy); 551 assert(m->wire_count == 1); 552 m->cleaning = FALSE; 553 m->encrypted_cleaning = FALSE; 554 m->pageout = FALSE; 555#if MACH_CLUSTER_STATS 556 if (m->wanted) vm_pageout_target_collisions++; 557#endif 558 /* 559 * Revoke all access to the page. Since the object is 560 * locked, and the page is busy, this prevents the page 561 * from being dirtied after the pmap_disconnect() call 562 * returns. 563 * 564 * Since the page is left "dirty" but "not modifed", we 565 * can detect whether the page was redirtied during 566 * pageout by checking the modify state. 567 */ 568 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) { 569 SET_PAGE_DIRTY(m, FALSE); 570 } else { 571 m->dirty = FALSE; 572 } 573 574 if (m->dirty) { 575 CLUSTER_STAT(vm_pageout_target_page_dirtied++;) 576 vm_page_unwire(m, TRUE); /* reactivates */ 577 VM_STAT_INCR(reactivations); 578 PAGE_WAKEUP_DONE(m); 579 } else { 580 CLUSTER_STAT(vm_pageout_target_page_freed++;) 581 vm_page_free(m);/* clears busy, etc. */ 582 } 583 vm_page_unlock_queues(); 584 continue; 585 } 586 /* 587 * Handle the "adjacent" pages. These pages were cleaned in 588 * place, and should be left alone. 589 * If prep_pin_count is nonzero, then someone is using the 590 * page, so make it active. 591 */ 592 if (!m->active && !m->inactive && !m->throttled && !m->private) { 593 if (m->reference) 594 vm_page_activate(m); 595 else 596 vm_page_deactivate(m); 597 } 598 if (m->overwriting) { 599 /* 600 * the (COPY_OUT_FROM == FALSE) request_page_list case 601 */ 602 if (m->busy) { 603 /* 604 * We do not re-set m->dirty ! 605 * The page was busy so no extraneous activity 606 * could have occurred. COPY_INTO is a read into the 607 * new pages. CLEAN_IN_PLACE does actually write 608 * out the pages but handling outside of this code 609 * will take care of resetting dirty. We clear the 610 * modify however for the Programmed I/O case. 611 */ 612 pmap_clear_modify(m->phys_page); 613 614 m->busy = FALSE; 615 m->absent = FALSE; 616 } else { 617 /* 618 * alternate (COPY_OUT_FROM == FALSE) request_page_list case 619 * Occurs when the original page was wired 620 * at the time of the list request 621 */ 622 assert(VM_PAGE_WIRED(m)); 623 vm_page_unwire(m, TRUE); /* reactivates */ 624 } 625 m->overwriting = FALSE; 626 } else { 627 /* 628 * Set the dirty state according to whether or not the page was 629 * modified during the pageout. Note that we purposefully do 630 * NOT call pmap_clear_modify since the page is still mapped. 631 * If the page were to be dirtied between the 2 calls, this 632 * this fact would be lost. This code is only necessary to 633 * maintain statistics, since the pmap module is always 634 * consulted if m->dirty is false. 635 */ 636#if MACH_CLUSTER_STATS 637 m->dirty = pmap_is_modified(m->phys_page); 638 639 if (m->dirty) vm_pageout_cluster_dirtied++; 640 else vm_pageout_cluster_cleaned++; 641 if (m->wanted) vm_pageout_cluster_collisions++; 642#else 643 m->dirty = FALSE; 644#endif 645 } 646 if (m->encrypted_cleaning == TRUE) { 647 m->encrypted_cleaning = FALSE; 648 m->busy = FALSE; 649 } 650 m->cleaning = FALSE; 651 652 /* 653 * Wakeup any thread waiting for the page to be un-cleaning. 654 */ 655 PAGE_WAKEUP(m); 656 vm_page_unlock_queues(); 657 } 658 /* 659 * Account for the paging reference taken in vm_paging_object_allocate. 660 */ 661 vm_object_activity_end(shadow_object); 662 vm_object_unlock(shadow_object); 663 664 assert(object->ref_count == 0); 665 assert(object->paging_in_progress == 0); 666 assert(object->activity_in_progress == 0); 667 assert(object->resident_page_count == 0); 668 return; 669} 670 671/* 672 * Routine: vm_pageclean_setup 673 * 674 * Purpose: setup a page to be cleaned (made non-dirty), but not 675 * necessarily flushed from the VM page cache. 676 * This is accomplished by cleaning in place. 677 * 678 * The page must not be busy, and new_object 679 * must be locked. 680 * 681 */ 682void 683vm_pageclean_setup( 684 vm_page_t m, 685 vm_page_t new_m, 686 vm_object_t new_object, 687 vm_object_offset_t new_offset) 688{ 689 assert(!m->busy); 690#if 0 691 assert(!m->cleaning); 692#endif 693 694 XPR(XPR_VM_PAGEOUT, 695 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n", 696 m->object, m->offset, m, 697 new_m, new_offset); 698 699 pmap_clear_modify(m->phys_page); 700 701 /* 702 * Mark original page as cleaning in place. 703 */ 704 m->cleaning = TRUE; 705 SET_PAGE_DIRTY(m, FALSE); 706 m->precious = FALSE; 707 708 /* 709 * Convert the fictitious page to a private shadow of 710 * the real page. 711 */ 712 assert(new_m->fictitious); 713 assert(new_m->phys_page == vm_page_fictitious_addr); 714 new_m->fictitious = FALSE; 715 new_m->private = TRUE; 716 new_m->pageout = TRUE; 717 new_m->phys_page = m->phys_page; 718 719 vm_page_lockspin_queues(); 720 vm_page_wire(new_m); 721 vm_page_unlock_queues(); 722 723 vm_page_insert(new_m, new_object, new_offset); 724 assert(!new_m->wanted); 725 new_m->busy = FALSE; 726} 727 728/* 729 * Routine: vm_pageout_initialize_page 730 * Purpose: 731 * Causes the specified page to be initialized in 732 * the appropriate memory object. This routine is used to push 733 * pages into a copy-object when they are modified in the 734 * permanent object. 735 * 736 * The page is moved to a temporary object and paged out. 737 * 738 * In/out conditions: 739 * The page in question must not be on any pageout queues. 740 * The object to which it belongs must be locked. 741 * The page must be busy, but not hold a paging reference. 742 * 743 * Implementation: 744 * Move this page to a completely new object. 745 */ 746void 747vm_pageout_initialize_page( 748 vm_page_t m) 749{ 750 vm_object_t object; 751 vm_object_offset_t paging_offset; 752 memory_object_t pager; 753 754 XPR(XPR_VM_PAGEOUT, 755 "vm_pageout_initialize_page, page 0x%X\n", 756 m, 0, 0, 0, 0); 757 assert(m->busy); 758 759 /* 760 * Verify that we really want to clean this page 761 */ 762 assert(!m->absent); 763 assert(!m->error); 764 assert(m->dirty); 765 766 /* 767 * Create a paging reference to let us play with the object. 768 */ 769 object = m->object; 770 paging_offset = m->offset + object->paging_offset; 771 772 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) { 773 VM_PAGE_FREE(m); 774 panic("reservation without pageout?"); /* alan */ 775 vm_object_unlock(object); 776 777 return; 778 } 779 780 /* 781 * If there's no pager, then we can't clean the page. This should 782 * never happen since this should be a copy object and therefore not 783 * an external object, so the pager should always be there. 784 */ 785 786 pager = object->pager; 787 788 if (pager == MEMORY_OBJECT_NULL) { 789 VM_PAGE_FREE(m); 790 panic("missing pager for copy object"); 791 return; 792 } 793 794 /* 795 * set the page for future call to vm_fault_list_request 796 */ 797 pmap_clear_modify(m->phys_page); 798 SET_PAGE_DIRTY(m, FALSE); 799 m->pageout = TRUE; 800 801 /* 802 * keep the object from collapsing or terminating 803 */ 804 vm_object_paging_begin(object); 805 vm_object_unlock(object); 806 807 /* 808 * Write the data to its pager. 809 * Note that the data is passed by naming the new object, 810 * not a virtual address; the pager interface has been 811 * manipulated to use the "internal memory" data type. 812 * [The object reference from its allocation is donated 813 * to the eventual recipient.] 814 */ 815 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE); 816 817 vm_object_lock(object); 818 vm_object_paging_end(object); 819} 820 821#if MACH_CLUSTER_STATS 822#define MAXCLUSTERPAGES 16 823struct { 824 unsigned long pages_in_cluster; 825 unsigned long pages_at_higher_offsets; 826 unsigned long pages_at_lower_offsets; 827} cluster_stats[MAXCLUSTERPAGES]; 828#endif /* MACH_CLUSTER_STATS */ 829 830 831/* 832 * vm_pageout_cluster: 833 * 834 * Given a page, queue it to the appropriate I/O thread, 835 * which will page it out and attempt to clean adjacent pages 836 * in the same operation. 837 * 838 * The object and queues must be locked. We will take a 839 * paging reference to prevent deallocation or collapse when we 840 * release the object lock back at the call site. The I/O thread 841 * is responsible for consuming this reference 842 * 843 * The page must not be on any pageout queue. 844 */ 845 846void 847vm_pageout_cluster(vm_page_t m, boolean_t pageout) 848{ 849 vm_object_t object = m->object; 850 struct vm_pageout_queue *q; 851 852 853 XPR(XPR_VM_PAGEOUT, 854 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n", 855 object, m->offset, m, 0, 0); 856 857 VM_PAGE_CHECK(m); 858#if DEBUG 859 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 860#endif 861 vm_object_lock_assert_exclusive(object); 862 863 /* 864 * Only a certain kind of page is appreciated here. 865 */ 866 assert((m->dirty || m->precious) && (!VM_PAGE_WIRED(m))); 867 assert(!m->cleaning && !m->pageout && !m->laundry); 868#ifndef CONFIG_FREEZE 869 assert(!m->inactive && !m->active); 870 assert(!m->throttled); 871#endif 872 873 /* 874 * protect the object from collapse or termination 875 */ 876 vm_object_activity_begin(object); 877 878 m->pageout = pageout; 879 880 if (object->internal == TRUE) { 881 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) 882 m->busy = TRUE; 883 884 q = &vm_pageout_queue_internal; 885 } else 886 q = &vm_pageout_queue_external; 887 888 /* 889 * pgo_laundry count is tied to the laundry bit 890 */ 891 m->laundry = TRUE; 892 q->pgo_laundry++; 893 894 m->pageout_queue = TRUE; 895 queue_enter(&q->pgo_pending, m, vm_page_t, pageq); 896 897 if (q->pgo_idle == TRUE) { 898 q->pgo_idle = FALSE; 899 thread_wakeup((event_t) &q->pgo_pending); 900 } 901 VM_PAGE_CHECK(m); 902} 903 904 905unsigned long vm_pageout_throttle_up_count = 0; 906 907/* 908 * A page is back from laundry or we are stealing it back from 909 * the laundering state. See if there are some pages waiting to 910 * go to laundry and if we can let some of them go now. 911 * 912 * Object and page queues must be locked. 913 */ 914void 915vm_pageout_throttle_up( 916 vm_page_t m) 917{ 918 struct vm_pageout_queue *q; 919 920 assert(m->object != VM_OBJECT_NULL); 921 assert(m->object != kernel_object); 922 923#if DEBUG 924 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 925 vm_object_lock_assert_exclusive(m->object); 926#endif 927 928 vm_pageout_throttle_up_count++; 929 930 if (m->object->internal == TRUE) 931 q = &vm_pageout_queue_internal; 932 else 933 q = &vm_pageout_queue_external; 934 935 if (m->pageout_queue == TRUE) { 936 937 queue_remove(&q->pgo_pending, m, vm_page_t, pageq); 938 m->pageout_queue = FALSE; 939 940 m->pageq.next = NULL; 941 m->pageq.prev = NULL; 942 943 vm_object_activity_end(m->object); 944 } 945 if (m->laundry == TRUE) { 946 947 m->laundry = FALSE; 948 q->pgo_laundry--; 949 950 if (q->pgo_throttled == TRUE) { 951 q->pgo_throttled = FALSE; 952 thread_wakeup((event_t) &q->pgo_laundry); 953 } 954 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) { 955 q->pgo_draining = FALSE; 956 thread_wakeup((event_t) (&q->pgo_laundry+1)); 957 } 958 } 959} 960 961 962static void 963vm_pageout_throttle_up_batch( 964 struct vm_pageout_queue *q, 965 int batch_cnt) 966{ 967#if DEBUG 968 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 969#endif 970 971 vm_pageout_throttle_up_count += batch_cnt; 972 973 q->pgo_laundry -= batch_cnt; 974 975 if (q->pgo_throttled == TRUE) { 976 q->pgo_throttled = FALSE; 977 thread_wakeup((event_t) &q->pgo_laundry); 978 } 979 if (q->pgo_draining == TRUE && q->pgo_laundry == 0) { 980 q->pgo_draining = FALSE; 981 thread_wakeup((event_t) (&q->pgo_laundry+1)); 982 } 983} 984 985 986 987/* 988 * VM memory pressure monitoring. 989 * 990 * vm_pageout_scan() keeps track of the number of pages it considers and 991 * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now]. 992 * 993 * compute_memory_pressure() is called every second from compute_averages() 994 * and moves "vm_pageout_stat_now" forward, to start accumulating the number 995 * of recalimed pages in a new vm_pageout_stat[] bucket. 996 * 997 * mach_vm_pressure_monitor() collects past statistics about memory pressure. 998 * The caller provides the number of seconds ("nsecs") worth of statistics 999 * it wants, up to 30 seconds. 1000 * It computes the number of pages reclaimed in the past "nsecs" seconds and 1001 * also returns the number of pages the system still needs to reclaim at this 1002 * moment in time. 1003 */ 1004#define VM_PAGEOUT_STAT_SIZE 31 1005struct vm_pageout_stat { 1006 unsigned int considered; 1007 unsigned int reclaimed; 1008} vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0}, }; 1009unsigned int vm_pageout_stat_now = 0; 1010unsigned int vm_memory_pressure = 0; 1011 1012#define VM_PAGEOUT_STAT_BEFORE(i) \ 1013 (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1) 1014#define VM_PAGEOUT_STAT_AFTER(i) \ 1015 (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1) 1016 1017#if VM_PAGE_BUCKETS_CHECK 1018int vm_page_buckets_check_interval = 10; /* in seconds */ 1019#endif /* VM_PAGE_BUCKETS_CHECK */ 1020 1021/* 1022 * Called from compute_averages(). 1023 */ 1024void 1025compute_memory_pressure( 1026 __unused void *arg) 1027{ 1028 unsigned int vm_pageout_next; 1029 1030#if VM_PAGE_BUCKETS_CHECK 1031 /* check the consistency of VM page buckets at regular interval */ 1032 static int counter = 0; 1033 if ((++counter % vm_page_buckets_check_interval) == 0) { 1034 vm_page_buckets_check(); 1035 } 1036#endif /* VM_PAGE_BUCKETS_CHECK */ 1037 1038 vm_memory_pressure = 1039 vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].reclaimed; 1040 1041 commpage_set_memory_pressure( vm_memory_pressure ); 1042 1043 /* move "now" forward */ 1044 vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now); 1045 vm_pageout_stats[vm_pageout_next].considered = 0; 1046 vm_pageout_stats[vm_pageout_next].reclaimed = 0; 1047 vm_pageout_stat_now = vm_pageout_next; 1048} 1049 1050 1051/* 1052 * IMPORTANT 1053 * mach_vm_ctl_page_free_wanted() is called indirectly, via 1054 * mach_vm_pressure_monitor(), when taking a stackshot. Therefore, 1055 * it must be safe in the restricted stackshot context. Locks and/or 1056 * blocking are not allowable. 1057 */ 1058unsigned int 1059mach_vm_ctl_page_free_wanted(void) 1060{ 1061 unsigned int page_free_target, page_free_count, page_free_wanted; 1062 1063 page_free_target = vm_page_free_target; 1064 page_free_count = vm_page_free_count; 1065 if (page_free_target > page_free_count) { 1066 page_free_wanted = page_free_target - page_free_count; 1067 } else { 1068 page_free_wanted = 0; 1069 } 1070 1071 return page_free_wanted; 1072} 1073 1074 1075/* 1076 * IMPORTANT: 1077 * mach_vm_pressure_monitor() is called when taking a stackshot, with 1078 * wait_for_pressure FALSE, so that code path must remain safe in the 1079 * restricted stackshot context. No blocking or locks are allowable. 1080 * on that code path. 1081 */ 1082 1083kern_return_t 1084mach_vm_pressure_monitor( 1085 boolean_t wait_for_pressure, 1086 unsigned int nsecs_monitored, 1087 unsigned int *pages_reclaimed_p, 1088 unsigned int *pages_wanted_p) 1089{ 1090 wait_result_t wr; 1091 unsigned int vm_pageout_then, vm_pageout_now; 1092 unsigned int pages_reclaimed; 1093 1094 /* 1095 * We don't take the vm_page_queue_lock here because we don't want 1096 * vm_pressure_monitor() to get in the way of the vm_pageout_scan() 1097 * thread when it's trying to reclaim memory. We don't need fully 1098 * accurate monitoring anyway... 1099 */ 1100 1101 if (wait_for_pressure) { 1102 /* wait until there's memory pressure */ 1103 while (vm_page_free_count >= vm_page_free_target) { 1104 wr = assert_wait((event_t) &vm_page_free_wanted, 1105 THREAD_INTERRUPTIBLE); 1106 if (wr == THREAD_WAITING) { 1107 wr = thread_block(THREAD_CONTINUE_NULL); 1108 } 1109 if (wr == THREAD_INTERRUPTED) { 1110 return KERN_ABORTED; 1111 } 1112 if (wr == THREAD_AWAKENED) { 1113 /* 1114 * The memory pressure might have already 1115 * been relieved but let's not block again 1116 * and let's report that there was memory 1117 * pressure at some point. 1118 */ 1119 break; 1120 } 1121 } 1122 } 1123 1124 /* provide the number of pages the system wants to reclaim */ 1125 if (pages_wanted_p != NULL) { 1126 *pages_wanted_p = mach_vm_ctl_page_free_wanted(); 1127 } 1128 1129 if (pages_reclaimed_p == NULL) { 1130 return KERN_SUCCESS; 1131 } 1132 1133 /* provide number of pages reclaimed in the last "nsecs_monitored" */ 1134 do { 1135 vm_pageout_now = vm_pageout_stat_now; 1136 pages_reclaimed = 0; 1137 for (vm_pageout_then = 1138 VM_PAGEOUT_STAT_BEFORE(vm_pageout_now); 1139 vm_pageout_then != vm_pageout_now && 1140 nsecs_monitored-- != 0; 1141 vm_pageout_then = 1142 VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) { 1143 pages_reclaimed += vm_pageout_stats[vm_pageout_then].reclaimed; 1144 } 1145 } while (vm_pageout_now != vm_pageout_stat_now); 1146 *pages_reclaimed_p = pages_reclaimed; 1147 1148 return KERN_SUCCESS; 1149} 1150 1151 1152 1153/* 1154 * function in BSD to apply I/O throttle to the pageout thread 1155 */ 1156extern void vm_pageout_io_throttle(void); 1157 1158/* 1159 * Page States: Used below to maintain the page state 1160 * before it's removed from it's Q. This saved state 1161 * helps us do the right accounting in certain cases 1162 */ 1163#define PAGE_STATE_SPECULATIVE 1 1164#define PAGE_STATE_ANONYMOUS 2 1165#define PAGE_STATE_INACTIVE 3 1166#define PAGE_STATE_INACTIVE_FIRST 4 1167#define PAGE_STATE_CLEAN 5 1168 1169 1170#define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m) \ 1171 MACRO_BEGIN \ 1172 /* \ 1173 * If a "reusable" page somehow made it back into \ 1174 * the active queue, it's been re-used and is not \ 1175 * quite re-usable. \ 1176 * If the VM object was "all_reusable", consider it \ 1177 * as "all re-used" instead of converting it to \ 1178 * "partially re-used", which could be expensive. \ 1179 */ \ 1180 if ((m)->reusable || \ 1181 (m)->object->all_reusable) { \ 1182 vm_object_reuse_pages((m)->object, \ 1183 (m)->offset, \ 1184 (m)->offset + PAGE_SIZE_64, \ 1185 FALSE); \ 1186 } \ 1187 MACRO_END 1188 1189 1190#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT 64 1191#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX 1024 1192 1193#define FCS_IDLE 0 1194#define FCS_DELAYED 1 1195#define FCS_DEADLOCK_DETECTED 2 1196 1197struct flow_control { 1198 int state; 1199 mach_timespec_t ts; 1200}; 1201 1202uint32_t vm_pageout_considered_page = 0; 1203uint32_t vm_page_filecache_min = 0; 1204 1205#define VM_PAGE_FILECACHE_MIN 50000 1206#define ANONS_GRABBED_LIMIT 2 1207 1208/* 1209 * vm_pageout_scan does the dirty work for the pageout daemon. 1210 * It returns with both vm_page_queue_free_lock and vm_page_queue_lock 1211 * held and vm_page_free_wanted == 0. 1212 */ 1213void 1214vm_pageout_scan(void) 1215{ 1216 unsigned int loop_count = 0; 1217 unsigned int inactive_burst_count = 0; 1218 unsigned int active_burst_count = 0; 1219 unsigned int reactivated_this_call; 1220 unsigned int reactivate_limit; 1221 vm_page_t local_freeq = NULL; 1222 int local_freed = 0; 1223 int delayed_unlock; 1224 int delayed_unlock_limit = 0; 1225 int refmod_state = 0; 1226 int vm_pageout_deadlock_target = 0; 1227 struct vm_pageout_queue *iq; 1228 struct vm_pageout_queue *eq; 1229 struct vm_speculative_age_q *sq; 1230 struct flow_control flow_control = { 0, { 0, 0 } }; 1231 boolean_t inactive_throttled = FALSE; 1232 boolean_t try_failed; 1233 mach_timespec_t ts; 1234 unsigned int msecs = 0; 1235 vm_object_t object; 1236 vm_object_t last_object_tried; 1237 uint32_t catch_up_count = 0; 1238 uint32_t inactive_reclaim_run; 1239 boolean_t forced_reclaim; 1240 boolean_t exceeded_burst_throttle; 1241 boolean_t grab_anonymous = FALSE; 1242 boolean_t force_anonymous = FALSE; 1243 int anons_grabbed = 0; 1244 int page_prev_state = 0; 1245 int cache_evict_throttle = 0; 1246 uint32_t vm_pageout_inactive_external_forced_reactivate_limit = 0; 1247 int force_purge = 0; 1248 1249#if VM_PRESSURE_EVENTS 1250 vm_pressure_level_t pressure_level; 1251#endif /* VM_PRESSURE_EVENTS */ 1252 1253 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START, 1254 vm_pageout_speculative_clean, vm_pageout_inactive_clean, 1255 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); 1256 1257 flow_control.state = FCS_IDLE; 1258 iq = &vm_pageout_queue_internal; 1259 eq = &vm_pageout_queue_external; 1260 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; 1261 1262 1263 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0); 1264 1265 1266 vm_page_lock_queues(); 1267 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */ 1268 1269 /* 1270 * Calculate the max number of referenced pages on the inactive 1271 * queue that we will reactivate. 1272 */ 1273 reactivated_this_call = 0; 1274 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count + 1275 vm_page_inactive_count); 1276 inactive_reclaim_run = 0; 1277 1278 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count; 1279 1280 /* 1281 * We want to gradually dribble pages from the active queue 1282 * to the inactive queue. If we let the inactive queue get 1283 * very small, and then suddenly dump many pages into it, 1284 * those pages won't get a sufficient chance to be referenced 1285 * before we start taking them from the inactive queue. 1286 * 1287 * We must limit the rate at which we send pages to the pagers 1288 * so that we don't tie up too many pages in the I/O queues. 1289 * We implement a throttling mechanism using the laundry count 1290 * to limit the number of pages outstanding to the default 1291 * and external pagers. We can bypass the throttles and look 1292 * for clean pages if the pageout queues don't drain in a timely 1293 * fashion since this may indicate that the pageout paths are 1294 * stalled waiting for memory, which only we can provide. 1295 */ 1296 1297 1298Restart: 1299 assert(delayed_unlock!=0); 1300 1301 /* 1302 * Recalculate vm_page_inactivate_target. 1303 */ 1304 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + 1305 vm_page_inactive_count + 1306 vm_page_speculative_count); 1307 1308 vm_page_anonymous_min = vm_page_inactive_target / 20; 1309 1310 1311 /* 1312 * don't want to wake the pageout_scan thread up everytime we fall below 1313 * the targets... set a low water mark at 0.25% below the target 1314 */ 1315 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400); 1316 1317 if (vm_page_speculative_percentage > 50) 1318 vm_page_speculative_percentage = 50; 1319 else if (vm_page_speculative_percentage <= 0) 1320 vm_page_speculative_percentage = 1; 1321 1322 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count + 1323 vm_page_inactive_count); 1324 1325 object = NULL; 1326 last_object_tried = NULL; 1327 try_failed = FALSE; 1328 1329 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count)) 1330 catch_up_count = vm_page_inactive_count + vm_page_speculative_count; 1331 else 1332 catch_up_count = 0; 1333 1334 for (;;) { 1335 vm_page_t m; 1336 1337 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL); 1338 1339 if (delayed_unlock == 0) { 1340 vm_page_lock_queues(); 1341 delayed_unlock = 1; 1342 } 1343 if (vm_upl_wait_for_pages < 0) 1344 vm_upl_wait_for_pages = 0; 1345 1346 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages; 1347 1348 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX) 1349 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX; 1350 1351 /* 1352 * Move pages from active to inactive if we're below the target 1353 */ 1354 /* if we are trying to make clean, we need to make sure we actually have inactive - mj */ 1355 if ((vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target) 1356 goto done_moving_active_pages; 1357 1358 if (object != NULL) { 1359 vm_object_unlock(object); 1360 object = NULL; 1361 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1362 } 1363 /* 1364 * Don't sweep through active queue more than the throttle 1365 * which should be kept relatively low 1366 */ 1367 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count); 1368 1369 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_START, 1370 vm_pageout_inactive, vm_pageout_inactive_used, vm_page_free_count, local_freed); 1371 1372 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_NONE, 1373 vm_pageout_speculative_clean, vm_pageout_inactive_clean, 1374 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); 1375 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_START); 1376 1377 1378 while (!queue_empty(&vm_page_queue_active) && active_burst_count--) { 1379 1380 vm_pageout_active++; 1381 1382 m = (vm_page_t) queue_first(&vm_page_queue_active); 1383 1384 assert(m->active && !m->inactive); 1385 assert(!m->laundry); 1386 assert(m->object != kernel_object); 1387 assert(m->phys_page != vm_page_guard_addr); 1388 1389 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); 1390 1391 /* 1392 * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise... 1393 * 1394 * a TLB flush isn't really needed here since at worst we'll miss the reference bit being 1395 * updated in the PTE if a remote processor still has this mapping cached in its TLB when the 1396 * new reference happens. If no futher references happen on the page after that remote TLB flushes 1397 * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue 1398 * by pageout_scan, which is just fine since the last reference would have happened quite far 1399 * in the past (TLB caches don't hang around for very long), and of course could just as easily 1400 * have happened before we moved the page 1401 */ 1402 pmap_clear_refmod_options(m->phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL); 1403 1404 /* 1405 * The page might be absent or busy, 1406 * but vm_page_deactivate can handle that. 1407 * FALSE indicates that we don't want a H/W clear reference 1408 */ 1409 vm_page_deactivate_internal(m, FALSE); 1410 1411 if (delayed_unlock++ > delayed_unlock_limit) { 1412 1413 if (local_freeq) { 1414 vm_page_unlock_queues(); 1415 1416 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1417 vm_page_free_count, local_freed, delayed_unlock_limit, 1); 1418 1419 vm_page_free_list(local_freeq, TRUE); 1420 1421 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1422 vm_page_free_count, 0, 0, 1); 1423 1424 local_freeq = NULL; 1425 local_freed = 0; 1426 vm_page_lock_queues(); 1427 } else { 1428 lck_mtx_yield(&vm_page_queue_lock); 1429 } 1430 1431 delayed_unlock = 1; 1432 1433 /* 1434 * continue the while loop processing 1435 * the active queue... need to hold 1436 * the page queues lock 1437 */ 1438 } 1439 } 1440 1441 VM_DEBUG_EVENT(vm_pageout_balance, VM_PAGEOUT_BALANCE, DBG_FUNC_END, 1442 vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, vm_page_inactive_target); 1443 memoryshot(VM_PAGEOUT_BALANCE, DBG_FUNC_END); 1444 1445 /********************************************************************** 1446 * above this point we're playing with the active queue 1447 * below this point we're playing with the throttling mechanisms 1448 * and the inactive queue 1449 **********************************************************************/ 1450 1451done_moving_active_pages: 1452 1453 if (vm_page_free_count + local_freed >= vm_page_free_target) { 1454 if (object != NULL) { 1455 vm_object_unlock(object); 1456 object = NULL; 1457 } 1458 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1459 1460 if (local_freeq) { 1461 vm_page_unlock_queues(); 1462 1463 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1464 vm_page_free_count, local_freed, delayed_unlock_limit, 2); 1465 1466 vm_page_free_list(local_freeq, TRUE); 1467 1468 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1469 vm_page_free_count, local_freed, 0, 2); 1470 1471 local_freeq = NULL; 1472 local_freed = 0; 1473 vm_page_lock_queues(); 1474 } 1475 /* 1476 * make sure the pageout I/O threads are running 1477 * throttled in case there are still requests 1478 * in the laundry... since we have met our targets 1479 * we don't need the laundry to be cleaned in a timely 1480 * fashion... so let's avoid interfering with foreground 1481 * activity 1482 */ 1483 vm_pageout_adjust_io_throttles(iq, eq, TRUE); 1484 1485 /* 1486 * recalculate vm_page_inactivate_target 1487 */ 1488 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + 1489 vm_page_inactive_count + 1490 vm_page_speculative_count); 1491 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && 1492 !queue_empty(&vm_page_queue_active)) { 1493 /* 1494 * inactive target still not met... keep going 1495 * until we get the queues balanced... 1496 */ 1497 continue; 1498 } 1499 lck_mtx_lock(&vm_page_queue_free_lock); 1500 1501 if ((vm_page_free_count >= vm_page_free_target) && 1502 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { 1503 /* 1504 * done - we have met our target *and* 1505 * there is no one waiting for a page. 1506 */ 1507return_from_scan: 1508 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); 1509 1510 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE, 1511 vm_pageout_inactive, vm_pageout_inactive_used, 0, 0); 1512 VM_DEBUG_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END, 1513 vm_pageout_speculative_clean, vm_pageout_inactive_clean, 1514 vm_pageout_inactive_dirty_internal, vm_pageout_inactive_dirty_external); 1515 1516 return; 1517 } 1518 lck_mtx_unlock(&vm_page_queue_free_lock); 1519 } 1520 1521 /* 1522 * Before anything, we check if we have any ripe volatile 1523 * objects around. If so, try to purge the first object. 1524 * If the purge fails, fall through to reclaim a page instead. 1525 * If the purge succeeds, go back to the top and reevalute 1526 * the new memory situation. 1527 */ 1528 1529 assert (available_for_purge>=0); 1530 force_purge = 0; /* no force-purging */ 1531 1532#if VM_PRESSURE_EVENTS 1533 pressure_level = memorystatus_vm_pressure_level; 1534 1535 if (pressure_level > kVMPressureNormal) { 1536 1537 if (pressure_level >= kVMPressureCritical) { 1538 force_purge = memorystatus_purge_on_critical; 1539 } else if (pressure_level >= kVMPressureUrgent) { 1540 force_purge = memorystatus_purge_on_urgent; 1541 } else if (pressure_level >= kVMPressureWarning) { 1542 force_purge = memorystatus_purge_on_warning; 1543 } 1544 } 1545#endif /* VM_PRESSURE_EVENTS */ 1546 1547 if (available_for_purge || force_purge) { 1548 1549 if (object != NULL) { 1550 vm_object_unlock(object); 1551 object = NULL; 1552 } 1553 1554 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START); 1555 1556 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0); 1557 if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) { 1558 1559 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0); 1560 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END); 1561 continue; 1562 } 1563 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1); 1564 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END); 1565 } 1566 1567 if (queue_empty(&sq->age_q) && vm_page_speculative_count) { 1568 /* 1569 * try to pull pages from the aging bins... 1570 * see vm_page.h for an explanation of how 1571 * this mechanism works 1572 */ 1573 struct vm_speculative_age_q *aq; 1574 mach_timespec_t ts_fully_aged; 1575 boolean_t can_steal = FALSE; 1576 int num_scanned_queues; 1577 1578 aq = &vm_page_queue_speculative[speculative_steal_index]; 1579 1580 num_scanned_queues = 0; 1581 while (queue_empty(&aq->age_q) && 1582 num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) { 1583 1584 speculative_steal_index++; 1585 1586 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) 1587 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q; 1588 1589 aq = &vm_page_queue_speculative[speculative_steal_index]; 1590 } 1591 1592 if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) { 1593 /* 1594 * XXX We've scanned all the speculative 1595 * queues but still haven't found one 1596 * that is not empty, even though 1597 * vm_page_speculative_count is not 0. 1598 * 1599 * report the anomaly... 1600 */ 1601 printf("vm_pageout_scan: " 1602 "all speculative queues empty " 1603 "but count=%d. Re-adjusting.\n", 1604 vm_page_speculative_count); 1605 if (vm_page_speculative_count > vm_page_speculative_count_drift_max) 1606 vm_page_speculative_count_drift_max = vm_page_speculative_count; 1607 vm_page_speculative_count_drifts++; 1608#if 6553678 1609 Debugger("vm_pageout_scan: no speculative pages"); 1610#endif 1611 /* readjust... */ 1612 vm_page_speculative_count = 0; 1613 /* ... and continue */ 1614 continue; 1615 } 1616 1617 if (vm_page_speculative_count > vm_page_speculative_target) 1618 can_steal = TRUE; 1619 else { 1620 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) / 1000; 1621 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_page_speculative_q_age_ms) % 1000) 1622 * 1000 * NSEC_PER_USEC; 1623 1624 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts); 1625 1626 clock_sec_t sec; 1627 clock_nsec_t nsec; 1628 clock_get_system_nanotime(&sec, &nsec); 1629 ts.tv_sec = (unsigned int) sec; 1630 ts.tv_nsec = nsec; 1631 1632 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) 1633 can_steal = TRUE; 1634 } 1635 if (can_steal == TRUE) 1636 vm_page_speculate_ageit(aq); 1637 } 1638 if (queue_empty(&sq->age_q) && cache_evict_throttle == 0) { 1639 int pages_evicted; 1640 1641 if (object != NULL) { 1642 vm_object_unlock(object); 1643 object = NULL; 1644 } 1645 pages_evicted = vm_object_cache_evict(100, 10); 1646 1647 if (pages_evicted) { 1648 1649 vm_pageout_cache_evicted += pages_evicted; 1650 1651 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE, 1652 vm_page_free_count, pages_evicted, vm_pageout_cache_evicted, 0); 1653 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE); 1654 1655 /* 1656 * we just freed up to 100 pages, 1657 * so go back to the top of the main loop 1658 * and re-evaulate the memory situation 1659 */ 1660 continue; 1661 } else 1662 cache_evict_throttle = 100; 1663 } 1664 if (cache_evict_throttle) 1665 cache_evict_throttle--; 1666 1667 1668 exceeded_burst_throttle = FALSE; 1669 /* 1670 * Sometimes we have to pause: 1671 * 1) No inactive pages - nothing to do. 1672 * 2) Loop control - no acceptable pages found on the inactive queue 1673 * within the last vm_pageout_burst_inactive_throttle iterations 1674 * 3) Flow control - default pageout queue is full 1675 */ 1676 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_anonymous) && queue_empty(&sq->age_q)) { 1677 vm_pageout_scan_empty_throttle++; 1678 msecs = vm_pageout_empty_wait; 1679 goto vm_pageout_scan_delay; 1680 1681 } else if (inactive_burst_count >= 1682 MIN(vm_pageout_burst_inactive_throttle, 1683 (vm_page_inactive_count + 1684 vm_page_speculative_count))) { 1685 vm_pageout_scan_burst_throttle++; 1686 msecs = vm_pageout_burst_wait; 1687 1688 exceeded_burst_throttle = TRUE; 1689 goto vm_pageout_scan_delay; 1690 1691 } else if (vm_page_free_count > (vm_page_free_reserved / 4) && 1692 VM_PAGEOUT_SCAN_NEEDS_TO_THROTTLE()) { 1693 vm_pageout_scan_swap_throttle++; 1694 msecs = vm_pageout_swap_wait; 1695 goto vm_pageout_scan_delay; 1696 1697 } else if (VM_PAGE_Q_THROTTLED(iq) && 1698 VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { 1699 clock_sec_t sec; 1700 clock_nsec_t nsec; 1701 1702 switch (flow_control.state) { 1703 1704 case FCS_IDLE: 1705 if ((vm_page_free_count + local_freed) < vm_page_free_target) { 1706 1707 if (vm_page_pageable_external_count > vm_page_filecache_min && !queue_empty(&vm_page_queue_inactive)) { 1708 anons_grabbed = ANONS_GRABBED_LIMIT; 1709 goto consider_inactive; 1710 } 1711 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && vm_page_active_count) 1712 continue; 1713 } 1714reset_deadlock_timer: 1715 ts.tv_sec = vm_pageout_deadlock_wait / 1000; 1716 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC; 1717 clock_get_system_nanotime(&sec, &nsec); 1718 flow_control.ts.tv_sec = (unsigned int) sec; 1719 flow_control.ts.tv_nsec = nsec; 1720 ADD_MACH_TIMESPEC(&flow_control.ts, &ts); 1721 1722 flow_control.state = FCS_DELAYED; 1723 msecs = vm_pageout_deadlock_wait; 1724 1725 break; 1726 1727 case FCS_DELAYED: 1728 clock_get_system_nanotime(&sec, &nsec); 1729 ts.tv_sec = (unsigned int) sec; 1730 ts.tv_nsec = nsec; 1731 1732 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) { 1733 /* 1734 * the pageout thread for the default pager is potentially 1735 * deadlocked since the 1736 * default pager queue has been throttled for more than the 1737 * allowable time... we need to move some clean pages or dirty 1738 * pages belonging to the external pagers if they aren't throttled 1739 * vm_page_free_wanted represents the number of threads currently 1740 * blocked waiting for pages... we'll move one page for each of 1741 * these plus a fixed amount to break the logjam... once we're done 1742 * moving this number of pages, we'll re-enter the FSC_DELAYED state 1743 * with a new timeout target since we have no way of knowing 1744 * whether we've broken the deadlock except through observation 1745 * of the queue associated with the default pager... we need to 1746 * stop moving pages and allow the system to run to see what 1747 * state it settles into. 1748 */ 1749 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged; 1750 vm_pageout_scan_deadlock_detected++; 1751 flow_control.state = FCS_DEADLOCK_DETECTED; 1752 thread_wakeup((event_t) &vm_pageout_garbage_collect); 1753 goto consider_inactive; 1754 } 1755 /* 1756 * just resniff instead of trying 1757 * to compute a new delay time... we're going to be 1758 * awakened immediately upon a laundry completion, 1759 * so we won't wait any longer than necessary 1760 */ 1761 msecs = vm_pageout_idle_wait; 1762 break; 1763 1764 case FCS_DEADLOCK_DETECTED: 1765 if (vm_pageout_deadlock_target) 1766 goto consider_inactive; 1767 goto reset_deadlock_timer; 1768 1769 } 1770vm_pageout_scan_delay: 1771 if (object != NULL) { 1772 vm_object_unlock(object); 1773 object = NULL; 1774 } 1775 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1776 1777 vm_page_unlock_queues(); 1778 1779 if (local_freeq) { 1780 1781 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1782 vm_page_free_count, local_freed, delayed_unlock_limit, 3); 1783 1784 vm_page_free_list(local_freeq, TRUE); 1785 1786 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1787 vm_page_free_count, local_freed, 0, 3); 1788 1789 local_freeq = NULL; 1790 local_freed = 0; 1791 } 1792 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) 1793 vm_consider_waking_compactor_swapper(); 1794 1795 vm_page_lock_queues(); 1796 1797 if (flow_control.state == FCS_DELAYED && 1798 !VM_PAGE_Q_THROTTLED(iq)) { 1799 flow_control.state = FCS_IDLE; 1800 goto consider_inactive; 1801 } 1802 1803 if (vm_page_free_count >= vm_page_free_target) { 1804 /* 1805 * we're here because 1806 * 1) someone else freed up some pages while we had 1807 * the queues unlocked above 1808 * and we've hit one of the 3 conditions that 1809 * cause us to pause the pageout scan thread 1810 * 1811 * since we already have enough free pages, 1812 * let's avoid stalling and return normally 1813 * 1814 * before we return, make sure the pageout I/O threads 1815 * are running throttled in case there are still requests 1816 * in the laundry... since we have enough free pages 1817 * we don't need the laundry to be cleaned in a timely 1818 * fashion... so let's avoid interfering with foreground 1819 * activity 1820 * 1821 * we don't want to hold vm_page_queue_free_lock when 1822 * calling vm_pageout_adjust_io_throttles (since it 1823 * may cause other locks to be taken), we do the intitial 1824 * check outside of the lock. Once we take the lock, 1825 * we recheck the condition since it may have changed. 1826 * if it has, no problem, we will make the threads 1827 * non-throttled before actually blocking 1828 */ 1829 vm_pageout_adjust_io_throttles(iq, eq, TRUE); 1830 } 1831 lck_mtx_lock(&vm_page_queue_free_lock); 1832 1833 if (vm_page_free_count >= vm_page_free_target && 1834 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { 1835 goto return_from_scan; 1836 } 1837 lck_mtx_unlock(&vm_page_queue_free_lock); 1838 1839 if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) { 1840 /* 1841 * we're most likely about to block due to one of 1842 * the 3 conditions that cause vm_pageout_scan to 1843 * not be able to make forward progress w/r 1844 * to providing new pages to the free queue, 1845 * so unthrottle the I/O threads in case we 1846 * have laundry to be cleaned... it needs 1847 * to be completed ASAP. 1848 * 1849 * even if we don't block, we want the io threads 1850 * running unthrottled since the sum of free + 1851 * clean pages is still under our free target 1852 */ 1853 vm_pageout_adjust_io_throttles(iq, eq, FALSE); 1854 } 1855 if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) { 1856 /* 1857 * if we get here we're below our free target and 1858 * we're stalling due to a full laundry queue or 1859 * we don't have any inactive pages other then 1860 * those in the clean queue... 1861 * however, we have pages on the clean queue that 1862 * can be moved to the free queue, so let's not 1863 * stall the pageout scan 1864 */ 1865 flow_control.state = FCS_IDLE; 1866 goto consider_inactive; 1867 } 1868 VM_CHECK_MEMORYSTATUS; 1869 1870 if (flow_control.state != FCS_IDLE) 1871 vm_pageout_scan_throttle++; 1872 iq->pgo_throttled = TRUE; 1873 1874 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) 1875 vm_consider_waking_compactor_swapper(); 1876 1877 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC); 1878 counter(c_vm_pageout_scan_block++); 1879 1880 vm_page_unlock_queues(); 1881 1882 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); 1883 1884 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START, 1885 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0); 1886 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START); 1887 1888 thread_block(THREAD_CONTINUE_NULL); 1889 1890 VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END, 1891 iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0); 1892 memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END); 1893 1894 vm_page_lock_queues(); 1895 delayed_unlock = 1; 1896 1897 iq->pgo_throttled = FALSE; 1898 1899 if (loop_count >= vm_page_inactive_count) 1900 loop_count = 0; 1901 inactive_burst_count = 0; 1902 1903 goto Restart; 1904 /*NOTREACHED*/ 1905 } 1906 1907 1908 flow_control.state = FCS_IDLE; 1909consider_inactive: 1910 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count), 1911 vm_pageout_inactive_external_forced_reactivate_limit); 1912 loop_count++; 1913 inactive_burst_count++; 1914 vm_pageout_inactive++; 1915 1916 1917 /* 1918 * Choose a victim. 1919 */ 1920 while (1) { 1921 m = NULL; 1922 1923 if (VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) { 1924 assert(vm_page_throttled_count == 0); 1925 assert(queue_empty(&vm_page_queue_throttled)); 1926 } 1927 /* 1928 * The most eligible pages are ones we paged in speculatively, 1929 * but which have not yet been touched. 1930 */ 1931 if (!queue_empty(&sq->age_q) && force_anonymous == FALSE) { 1932 m = (vm_page_t) queue_first(&sq->age_q); 1933 1934 page_prev_state = PAGE_STATE_SPECULATIVE; 1935 1936 break; 1937 } 1938 /* 1939 * Try a clean-queue inactive page. 1940 */ 1941 if (!queue_empty(&vm_page_queue_cleaned)) { 1942 m = (vm_page_t) queue_first(&vm_page_queue_cleaned); 1943 1944 page_prev_state = PAGE_STATE_CLEAN; 1945 1946 break; 1947 } 1948 1949 grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min); 1950 1951 if (vm_page_pageable_external_count < vm_page_filecache_min || force_anonymous == TRUE) { 1952 grab_anonymous = TRUE; 1953 anons_grabbed = 0; 1954 } 1955 1956 if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous)) { 1957 1958 if ( !queue_empty(&vm_page_queue_inactive) ) { 1959 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 1960 1961 page_prev_state = PAGE_STATE_INACTIVE; 1962 anons_grabbed = 0; 1963 1964 break; 1965 } 1966 } 1967 if ( !queue_empty(&vm_page_queue_anonymous) ) { 1968 m = (vm_page_t) queue_first(&vm_page_queue_anonymous); 1969 1970 page_prev_state = PAGE_STATE_ANONYMOUS; 1971 anons_grabbed++; 1972 1973 break; 1974 } 1975 1976 /* 1977 * if we've gotten here, we have no victim page. 1978 * if making clean, free the local freed list and return. 1979 * if making free, check to see if we've finished balancing the queues 1980 * yet, if we haven't just continue, else panic 1981 */ 1982 vm_page_unlock_queues(); 1983 1984 if (object != NULL) { 1985 vm_object_unlock(object); 1986 object = NULL; 1987 } 1988 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1989 1990 if (local_freeq) { 1991 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 1992 vm_page_free_count, local_freed, delayed_unlock_limit, 5); 1993 1994 vm_page_free_list(local_freeq, TRUE); 1995 1996 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 1997 vm_page_free_count, local_freed, 0, 5); 1998 1999 local_freeq = NULL; 2000 local_freed = 0; 2001 } 2002 vm_page_lock_queues(); 2003 delayed_unlock = 1; 2004 2005 force_anonymous = FALSE; 2006 2007 if ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) 2008 goto Restart; 2009 2010 if (!queue_empty(&sq->age_q)) 2011 goto Restart; 2012 2013 panic("vm_pageout: no victim"); 2014 2015 /* NOTREACHED */ 2016 } 2017 force_anonymous = FALSE; 2018 2019 /* 2020 * we just found this page on one of our queues... 2021 * it can't also be on the pageout queue, so safe 2022 * to call VM_PAGE_QUEUES_REMOVE 2023 */ 2024 assert(!m->pageout_queue); 2025 2026 VM_PAGE_QUEUES_REMOVE(m); 2027 2028 assert(!m->laundry); 2029 assert(!m->private); 2030 assert(!m->fictitious); 2031 assert(m->object != kernel_object); 2032 assert(m->phys_page != vm_page_guard_addr); 2033 2034 2035 if (page_prev_state != PAGE_STATE_SPECULATIVE) 2036 vm_pageout_stats[vm_pageout_stat_now].considered++; 2037 2038 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); 2039 2040 /* 2041 * check to see if we currently are working 2042 * with the same object... if so, we've 2043 * already got the lock 2044 */ 2045 if (m->object != object) { 2046 /* 2047 * the object associated with candidate page is 2048 * different from the one we were just working 2049 * with... dump the lock if we still own it 2050 */ 2051 if (object != NULL) { 2052 vm_object_unlock(object); 2053 object = NULL; 2054 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 2055 } 2056 /* 2057 * Try to lock object; since we've alread got the 2058 * page queues lock, we can only 'try' for this one. 2059 * if the 'try' fails, we need to do a mutex_pause 2060 * to allow the owner of the object lock a chance to 2061 * run... otherwise, we're likely to trip over this 2062 * object in the same state as we work our way through 2063 * the queue... clumps of pages associated with the same 2064 * object are fairly typical on the inactive and active queues 2065 */ 2066 if (!vm_object_lock_try_scan(m->object)) { 2067 vm_page_t m_want = NULL; 2068 2069 vm_pageout_inactive_nolock++; 2070 2071 if (page_prev_state == PAGE_STATE_CLEAN) 2072 vm_pageout_cleaned_nolock++; 2073 2074 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2075 page_prev_state = PAGE_STATE_INACTIVE_FIRST; 2076 2077 pmap_clear_reference(m->phys_page); 2078 m->reference = FALSE; 2079 2080 /* 2081 * m->object must be stable since we hold the page queues lock... 2082 * we can update the scan_collisions field sans the object lock 2083 * since it is a separate field and this is the only spot that does 2084 * a read-modify-write operation and it is never executed concurrently... 2085 * we can asynchronously set this field to 0 when creating a UPL, so it 2086 * is possible for the value to be a bit non-determistic, but that's ok 2087 * since it's only used as a hint 2088 */ 2089 m->object->scan_collisions++; 2090 2091 if ( !queue_empty(&sq->age_q) ) 2092 m_want = (vm_page_t) queue_first(&sq->age_q); 2093 else if ( !queue_empty(&vm_page_queue_cleaned)) 2094 m_want = (vm_page_t) queue_first(&vm_page_queue_cleaned); 2095 else if (anons_grabbed >= ANONS_GRABBED_LIMIT || queue_empty(&vm_page_queue_anonymous)) 2096 m_want = (vm_page_t) queue_first(&vm_page_queue_inactive); 2097 else if ( !queue_empty(&vm_page_queue_anonymous)) 2098 m_want = (vm_page_t) queue_first(&vm_page_queue_anonymous); 2099 2100 /* 2101 * this is the next object we're going to be interested in 2102 * try to make sure its available after the mutex_yield 2103 * returns control 2104 */ 2105 if (m_want) 2106 vm_pageout_scan_wants_object = m_want->object; 2107 2108 /* 2109 * force us to dump any collected free pages 2110 * and to pause before moving on 2111 */ 2112 try_failed = TRUE; 2113 2114 goto requeue_page; 2115 } 2116 object = m->object; 2117 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 2118 2119 try_failed = FALSE; 2120 } 2121 if (catch_up_count) 2122 catch_up_count--; 2123 2124 if (m->busy) { 2125 if (m->encrypted_cleaning) { 2126 /* 2127 * ENCRYPTED SWAP: 2128 * if this page has already been picked up as 2129 * part of a page-out cluster, it will be busy 2130 * because it is being encrypted (see 2131 * vm_object_upl_request()). But we still 2132 * want to demote it from "clean-in-place" 2133 * (aka "adjacent") to "clean-and-free" (aka 2134 * "target"), so let's ignore its "busy" bit 2135 * here and proceed to check for "cleaning" a 2136 * little bit below... 2137 * 2138 * CAUTION CAUTION: 2139 * A "busy" page should still be left alone for 2140 * most purposes, so we have to be very careful 2141 * not to process that page too much. 2142 */ 2143 assert(m->cleaning); 2144 goto consider_inactive_page; 2145 } 2146 2147 /* 2148 * Somebody is already playing with this page. 2149 * Put it back on the appropriate queue 2150 * 2151 */ 2152 vm_pageout_inactive_busy++; 2153 2154 if (page_prev_state == PAGE_STATE_CLEAN) 2155 vm_pageout_cleaned_busy++; 2156 2157requeue_page: 2158 switch (page_prev_state) { 2159 2160 case PAGE_STATE_SPECULATIVE: 2161 vm_page_speculate(m, FALSE); 2162 break; 2163 2164 case PAGE_STATE_ANONYMOUS: 2165 case PAGE_STATE_CLEAN: 2166 case PAGE_STATE_INACTIVE: 2167 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE); 2168 break; 2169 2170 case PAGE_STATE_INACTIVE_FIRST: 2171 VM_PAGE_ENQUEUE_INACTIVE(m, TRUE); 2172 break; 2173 } 2174 goto done_with_inactivepage; 2175 } 2176 2177 2178 /* 2179 * If it's absent, in error or the object is no longer alive, 2180 * we can reclaim the page... in the no longer alive case, 2181 * there are 2 states the page can be in that preclude us 2182 * from reclaiming it - busy or cleaning - that we've already 2183 * dealt with 2184 */ 2185 if (m->absent || m->error || !object->alive) { 2186 2187 if (m->absent) 2188 vm_pageout_inactive_absent++; 2189 else if (!object->alive) 2190 vm_pageout_inactive_notalive++; 2191 else 2192 vm_pageout_inactive_error++; 2193reclaim_page: 2194 if (vm_pageout_deadlock_target) { 2195 vm_pageout_scan_inactive_throttle_success++; 2196 vm_pageout_deadlock_target--; 2197 } 2198 2199 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL); 2200 2201 if (object->internal) { 2202 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL); 2203 } else { 2204 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL); 2205 } 2206 assert(!m->cleaning); 2207 assert(!m->laundry); 2208 2209 m->busy = TRUE; 2210 2211 /* 2212 * remove page from object here since we're already 2213 * behind the object lock... defer the rest of the work 2214 * we'd normally do in vm_page_free_prepare_object 2215 * until 'vm_page_free_list' is called 2216 */ 2217 if (m->tabled) 2218 vm_page_remove(m, TRUE); 2219 2220 assert(m->pageq.next == NULL && 2221 m->pageq.prev == NULL); 2222 m->pageq.next = (queue_entry_t)local_freeq; 2223 local_freeq = m; 2224 local_freed++; 2225 2226 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2227 vm_pageout_freed_from_speculative++; 2228 else if (page_prev_state == PAGE_STATE_CLEAN) 2229 vm_pageout_freed_from_cleaned++; 2230 else 2231 vm_pageout_freed_from_inactive_clean++; 2232 2233 if (page_prev_state != PAGE_STATE_SPECULATIVE) 2234 vm_pageout_stats[vm_pageout_stat_now].reclaimed++; 2235 2236 inactive_burst_count = 0; 2237 goto done_with_inactivepage; 2238 } 2239 /* 2240 * If the object is empty, the page must be reclaimed even 2241 * if dirty or used. 2242 * If the page belongs to a volatile object, we stick it back 2243 * on. 2244 */ 2245 if (object->copy == VM_OBJECT_NULL) { 2246 if (object->purgable == VM_PURGABLE_EMPTY) { 2247 if (m->pmapped == TRUE) { 2248 /* unmap the page */ 2249 refmod_state = pmap_disconnect(m->phys_page); 2250 if (refmod_state & VM_MEM_MODIFIED) { 2251 SET_PAGE_DIRTY(m, FALSE); 2252 } 2253 } 2254 if (m->dirty || m->precious) { 2255 /* we saved the cost of cleaning this page ! */ 2256 vm_page_purged_count++; 2257 } 2258 goto reclaim_page; 2259 } 2260 2261 if (COMPRESSED_PAGER_IS_ACTIVE) { 2262 /* 2263 * With the VM compressor, the cost of 2264 * reclaiming a page is much lower (no I/O), 2265 * so if we find a "volatile" page, it's better 2266 * to let it get compressed rather than letting 2267 * it occupy a full page until it gets purged. 2268 * So no need to check for "volatile" here. 2269 */ 2270 } else if (object->purgable == VM_PURGABLE_VOLATILE) { 2271 /* 2272 * Avoid cleaning a "volatile" page which might 2273 * be purged soon. 2274 */ 2275 2276 /* if it's wired, we can't put it on our queue */ 2277 assert(!VM_PAGE_WIRED(m)); 2278 2279 /* just stick it back on! */ 2280 reactivated_this_call++; 2281 2282 if (page_prev_state == PAGE_STATE_CLEAN) 2283 vm_pageout_cleaned_volatile_reactivated++; 2284 2285 goto reactivate_page; 2286 } 2287 } 2288 2289consider_inactive_page: 2290 if (m->busy) { 2291 /* 2292 * CAUTION CAUTION: 2293 * A "busy" page should always be left alone, except... 2294 */ 2295 if (m->cleaning && m->encrypted_cleaning) { 2296 /* 2297 * ENCRYPTED_SWAP: 2298 * We could get here with a "busy" page 2299 * if it's being encrypted during a 2300 * "clean-in-place" operation. We'll deal 2301 * with it right away by testing if it has been 2302 * referenced and either reactivating it or 2303 * promoting it from "clean-in-place" to 2304 * "clean-and-free". 2305 */ 2306 } else { 2307 panic("\"busy\" page considered for pageout\n"); 2308 } 2309 } 2310 2311 /* 2312 * If it's being used, reactivate. 2313 * (Fictitious pages are either busy or absent.) 2314 * First, update the reference and dirty bits 2315 * to make sure the page is unreferenced. 2316 */ 2317 refmod_state = -1; 2318 2319 if (m->reference == FALSE && m->pmapped == TRUE) { 2320 refmod_state = pmap_get_refmod(m->phys_page); 2321 2322 if (refmod_state & VM_MEM_REFERENCED) 2323 m->reference = TRUE; 2324 if (refmod_state & VM_MEM_MODIFIED) { 2325 SET_PAGE_DIRTY(m, FALSE); 2326 } 2327 } 2328 2329 /* 2330 * if (m->cleaning && !m->pageout) 2331 * If already cleaning this page in place and it hasn't 2332 * been recently referenced, just pull off the queue. 2333 * We can leave the page mapped, and upl_commit_range 2334 * will put it on the clean queue. 2335 * 2336 * note: if m->encrypted_cleaning == TRUE, then 2337 * m->cleaning == TRUE 2338 * and we'll handle it here 2339 * 2340 * if (m->pageout && !m->cleaning) 2341 * an msync INVALIDATE is in progress... 2342 * this page has been marked for destruction 2343 * after it has been cleaned, 2344 * but not yet gathered into a UPL 2345 * where 'cleaning' will be set... 2346 * just leave it off the paging queues 2347 * 2348 * if (m->pageout && m->clenaing) 2349 * an msync INVALIDATE is in progress 2350 * and the UPL has already gathered this page... 2351 * just leave it off the paging queues 2352 */ 2353 2354 /* 2355 * page with m->pageout and still on the queues means that an 2356 * MS_INVALIDATE is in progress on this page... leave it alone 2357 */ 2358 if (m->pageout) { 2359 goto done_with_inactivepage; 2360 } 2361 2362 /* if cleaning, reactivate if referenced. otherwise, just pull off queue */ 2363 if (m->cleaning) { 2364 if (m->reference == TRUE) { 2365 reactivated_this_call++; 2366 goto reactivate_page; 2367 } else { 2368 goto done_with_inactivepage; 2369 } 2370 } 2371 2372 if (m->reference || m->dirty) { 2373 /* deal with a rogue "reusable" page */ 2374 VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m); 2375 } 2376 2377 if (!m->no_cache && 2378 (m->reference || 2379 (m->xpmapped && !object->internal && (vm_page_xpmapped_external_count < (vm_page_external_count / 4))))) { 2380 /* 2381 * The page we pulled off the inactive list has 2382 * been referenced. It is possible for other 2383 * processors to be touching pages faster than we 2384 * can clear the referenced bit and traverse the 2385 * inactive queue, so we limit the number of 2386 * reactivations. 2387 */ 2388 if (++reactivated_this_call >= reactivate_limit) { 2389 vm_pageout_reactivation_limit_exceeded++; 2390 } else if (catch_up_count) { 2391 vm_pageout_catch_ups++; 2392 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) { 2393 vm_pageout_inactive_force_reclaim++; 2394 } else { 2395 uint32_t isinuse; 2396 2397 if (page_prev_state == PAGE_STATE_CLEAN) 2398 vm_pageout_cleaned_reference_reactivated++; 2399 2400reactivate_page: 2401 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL && 2402 vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) { 2403 /* 2404 * no explict mappings of this object exist 2405 * and it's not open via the filesystem 2406 */ 2407 vm_page_deactivate(m); 2408 vm_pageout_inactive_deactivated++; 2409 } else { 2410 /* 2411 * The page was/is being used, so put back on active list. 2412 */ 2413 vm_page_activate(m); 2414 VM_STAT_INCR(reactivations); 2415 inactive_burst_count = 0; 2416 } 2417 2418 if (page_prev_state == PAGE_STATE_CLEAN) 2419 vm_pageout_cleaned_reactivated++; 2420 2421 vm_pageout_inactive_used++; 2422 2423 goto done_with_inactivepage; 2424 } 2425 /* 2426 * Make sure we call pmap_get_refmod() if it 2427 * wasn't already called just above, to update 2428 * the dirty bit. 2429 */ 2430 if ((refmod_state == -1) && !m->dirty && m->pmapped) { 2431 refmod_state = pmap_get_refmod(m->phys_page); 2432 if (refmod_state & VM_MEM_MODIFIED) { 2433 SET_PAGE_DIRTY(m, FALSE); 2434 } 2435 } 2436 forced_reclaim = TRUE; 2437 } else { 2438 forced_reclaim = FALSE; 2439 } 2440 2441 XPR(XPR_VM_PAGEOUT, 2442 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n", 2443 object, m->offset, m, 0,0); 2444 2445 /* 2446 * we've got a candidate page to steal... 2447 * 2448 * m->dirty is up to date courtesy of the 2449 * preceding check for m->reference... if 2450 * we get here, then m->reference had to be 2451 * FALSE (or possibly "reactivate_limit" was 2452 * exceeded), but in either case we called 2453 * pmap_get_refmod() and updated both 2454 * m->reference and m->dirty 2455 * 2456 * if it's dirty or precious we need to 2457 * see if the target queue is throtttled 2458 * it if is, we need to skip over it by moving it back 2459 * to the end of the inactive queue 2460 */ 2461 2462 inactive_throttled = FALSE; 2463 2464 if (m->dirty || m->precious) { 2465 if (object->internal) { 2466 if (VM_PAGE_Q_THROTTLED(iq)) 2467 inactive_throttled = TRUE; 2468 } else if (VM_PAGE_Q_THROTTLED(eq)) { 2469 inactive_throttled = TRUE; 2470 } 2471 } 2472throttle_inactive: 2473 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && 2474 object->internal && m->dirty && 2475 (object->purgable == VM_PURGABLE_DENY || 2476 object->purgable == VM_PURGABLE_NONVOLATILE || 2477 object->purgable == VM_PURGABLE_VOLATILE)) { 2478 queue_enter(&vm_page_queue_throttled, m, 2479 vm_page_t, pageq); 2480 m->throttled = TRUE; 2481 vm_page_throttled_count++; 2482 2483 vm_pageout_scan_reclaimed_throttled++; 2484 2485 inactive_burst_count = 0; 2486 goto done_with_inactivepage; 2487 } 2488 if (inactive_throttled == TRUE) { 2489 2490 if (object->internal == FALSE) { 2491 /* 2492 * we need to break up the following potential deadlock case... 2493 * a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written. 2494 * b) The thread doing the writing is waiting for pages while holding the truncate lock 2495 * c) Most of the pages in the inactive queue belong to this file. 2496 * 2497 * we are potentially in this deadlock because... 2498 * a) the external pageout queue is throttled 2499 * b) we're done with the active queue and moved on to the inactive queue 2500 * c) we've got a dirty external page 2501 * 2502 * since we don't know the reason for the external pageout queue being throttled we 2503 * must suspect that we are deadlocked, so move the current page onto the active queue 2504 * in an effort to cause a page from the active queue to 'age' to the inactive queue 2505 * 2506 * if we don't have jetsam configured (i.e. we have a dynamic pager), set 2507 * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous 2508 * pool the next time we select a victim page... if we can make enough new free pages, 2509 * the deadlock will break, the external pageout queue will empty and it will no longer 2510 * be throttled 2511 * 2512 * if we have jestam configured, keep a count of the pages reactivated this way so 2513 * that we can try to find clean pages in the active/inactive queues before 2514 * deciding to jetsam a process 2515 */ 2516 vm_pageout_scan_inactive_throttled_external++; 2517 2518 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); 2519 m->active = TRUE; 2520 vm_page_active_count++; 2521 vm_page_pageable_external_count++; 2522 2523 vm_pageout_adjust_io_throttles(iq, eq, FALSE); 2524 2525#if CONFIG_MEMORYSTATUS && CONFIG_JETSAM 2526 vm_pageout_inactive_external_forced_reactivate_limit--; 2527 2528 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) { 2529 vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count; 2530 /* 2531 * Possible deadlock scenario so request jetsam action 2532 */ 2533 assert(object); 2534 vm_object_unlock(object); 2535 object = VM_OBJECT_NULL; 2536 vm_page_unlock_queues(); 2537 2538 VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START, 2539 vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count); 2540 2541 /* Kill first suitable process */ 2542 if (memorystatus_kill_on_VM_page_shortage(FALSE) == FALSE) { 2543 panic("vm_pageout_scan: Jetsam request failed\n"); 2544 } 2545 2546 VM_DEBUG_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END, 0, 0, 0, 0); 2547 2548 vm_pageout_inactive_external_forced_jetsam_count++; 2549 vm_page_lock_queues(); 2550 delayed_unlock = 1; 2551 } 2552#else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */ 2553 force_anonymous = TRUE; 2554#endif 2555 inactive_burst_count = 0; 2556 goto done_with_inactivepage; 2557 } else { 2558 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2559 page_prev_state = PAGE_STATE_INACTIVE; 2560 2561 vm_pageout_scan_inactive_throttled_internal++; 2562 2563 goto requeue_page; 2564 } 2565 } 2566 2567 /* 2568 * we've got a page that we can steal... 2569 * eliminate all mappings and make sure 2570 * we have the up-to-date modified state 2571 * 2572 * if we need to do a pmap_disconnect then we 2573 * need to re-evaluate m->dirty since the pmap_disconnect 2574 * provides the true state atomically... the 2575 * page was still mapped up to the pmap_disconnect 2576 * and may have been dirtied at the last microsecond 2577 * 2578 * Note that if 'pmapped' is FALSE then the page is not 2579 * and has not been in any map, so there is no point calling 2580 * pmap_disconnect(). m->dirty could have been set in anticipation 2581 * of likely usage of the page. 2582 */ 2583 if (m->pmapped == TRUE) { 2584 2585 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE || object->internal == FALSE) { 2586 /* 2587 * Don't count this page as going into the compressor if any of these are true: 2588 * 1) We have the dynamic pager i.e. no compressed pager 2589 * 2) Freezer enabled device with a freezer file to hold the app data i.e. no compressed pager 2590 * 3) Freezer enabled device with compressed pager backend (exclusive use) i.e. most of the VM system 2591 (including vm_pageout_scan) has no knowledge of the compressor 2592 * 4) This page belongs to a file and hence will not be sent into the compressor 2593 */ 2594 2595 refmod_state = pmap_disconnect_options(m->phys_page, 0, NULL); 2596 } else { 2597 refmod_state = pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL); 2598 } 2599 2600 if (refmod_state & VM_MEM_MODIFIED) { 2601 SET_PAGE_DIRTY(m, FALSE); 2602 } 2603 } 2604 /* 2605 * reset our count of pages that have been reclaimed 2606 * since the last page was 'stolen' 2607 */ 2608 inactive_reclaim_run = 0; 2609 2610 /* 2611 * If it's clean and not precious, we can free the page. 2612 */ 2613 if (!m->dirty && !m->precious) { 2614 2615 if (page_prev_state == PAGE_STATE_SPECULATIVE) 2616 vm_pageout_speculative_clean++; 2617 else { 2618 if (page_prev_state == PAGE_STATE_ANONYMOUS) 2619 vm_pageout_inactive_anonymous++; 2620 else if (page_prev_state == PAGE_STATE_CLEAN) 2621 vm_pageout_cleaned_reclaimed++; 2622 2623 vm_pageout_inactive_clean++; 2624 } 2625 2626 /* 2627 * OK, at this point we have found a page we are going to free. 2628 */ 2629#if CONFIG_PHANTOM_CACHE 2630 if (!object->internal) 2631 vm_phantom_cache_add_ghost(m); 2632#endif 2633 goto reclaim_page; 2634 } 2635 2636 /* 2637 * The page may have been dirtied since the last check 2638 * for a throttled target queue (which may have been skipped 2639 * if the page was clean then). With the dirty page 2640 * disconnected here, we can make one final check. 2641 */ 2642 if (object->internal) { 2643 if (VM_PAGE_Q_THROTTLED(iq)) 2644 inactive_throttled = TRUE; 2645 } else if (VM_PAGE_Q_THROTTLED(eq)) { 2646 inactive_throttled = TRUE; 2647 } 2648 2649 if (inactive_throttled == TRUE) 2650 goto throttle_inactive; 2651 2652#if VM_PRESSURE_EVENTS 2653#if CONFIG_JETSAM 2654 2655 /* 2656 * If Jetsam is enabled, then the sending 2657 * of memory pressure notifications is handled 2658 * from the same thread that takes care of high-water 2659 * and other jetsams i.e. the memorystatus_thread. 2660 */ 2661 2662#else /* CONFIG_JETSAM */ 2663 2664 vm_pressure_response(); 2665 2666#endif /* CONFIG_JETSAM */ 2667#endif /* VM_PRESSURE_EVENTS */ 2668 2669 /* 2670 * do NOT set the pageout bit! 2671 * sure, we might need free pages, but this page is going to take time to become free 2672 * anyway, so we may as well put it on the clean queue first and take it from there later 2673 * if necessary. that way, we'll ensure we don't free up too much. -mj 2674 */ 2675 vm_pageout_cluster(m, FALSE); 2676 2677 if (page_prev_state == PAGE_STATE_ANONYMOUS) 2678 vm_pageout_inactive_anonymous++; 2679 if (object->internal) 2680 vm_pageout_inactive_dirty_internal++; 2681 else 2682 vm_pageout_inactive_dirty_external++; 2683 2684 2685done_with_inactivepage: 2686 2687 if (delayed_unlock++ > delayed_unlock_limit || try_failed == TRUE) { 2688 boolean_t need_delay = TRUE; 2689 2690 if (object != NULL) { 2691 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 2692 vm_object_unlock(object); 2693 object = NULL; 2694 } 2695 vm_page_unlock_queues(); 2696 2697 if (local_freeq) { 2698 2699 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START, 2700 vm_page_free_count, local_freed, delayed_unlock_limit, 4); 2701 2702 vm_page_free_list(local_freeq, TRUE); 2703 2704 VM_DEBUG_EVENT(vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_END, 2705 vm_page_free_count, local_freed, 0, 4); 2706 2707 local_freeq = NULL; 2708 local_freed = 0; 2709 need_delay = FALSE; 2710 } 2711 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 2712 vm_consider_waking_compactor_swapper(); 2713 need_delay = FALSE; 2714 } 2715 vm_page_lock_queues(); 2716 2717 if (need_delay == TRUE) 2718 lck_mtx_yield(&vm_page_queue_lock); 2719 2720 delayed_unlock = 1; 2721 } 2722 vm_pageout_considered_page++; 2723 2724 /* 2725 * back to top of pageout scan loop 2726 */ 2727 } 2728} 2729 2730 2731int vm_page_free_count_init; 2732 2733void 2734vm_page_free_reserve( 2735 int pages) 2736{ 2737 int free_after_reserve; 2738 2739 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 2740 2741 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT)) 2742 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT; 2743 else 2744 vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT); 2745 2746 } else { 2747 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT) 2748 vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT; 2749 else 2750 vm_page_free_reserved += pages; 2751 } 2752 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved; 2753 2754 vm_page_free_min = vm_page_free_reserved + 2755 VM_PAGE_FREE_MIN(free_after_reserve); 2756 2757 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT) 2758 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT; 2759 2760 vm_page_free_target = vm_page_free_reserved + 2761 VM_PAGE_FREE_TARGET(free_after_reserve); 2762 2763 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT) 2764 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT; 2765 2766 if (vm_page_free_target < vm_page_free_min + 5) 2767 vm_page_free_target = vm_page_free_min + 5; 2768 2769 vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 3); 2770 vm_page_creation_throttle = vm_page_free_target * 3; 2771} 2772 2773/* 2774 * vm_pageout is the high level pageout daemon. 2775 */ 2776 2777void 2778vm_pageout_continue(void) 2779{ 2780 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL); 2781 vm_pageout_scan_event_counter++; 2782 2783 vm_pageout_scan(); 2784 /* 2785 * we hold both the vm_page_queue_free_lock 2786 * and the vm_page_queues_lock at this point 2787 */ 2788 assert(vm_page_free_wanted == 0); 2789 assert(vm_page_free_wanted_privileged == 0); 2790 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT); 2791 2792 lck_mtx_unlock(&vm_page_queue_free_lock); 2793 vm_page_unlock_queues(); 2794 2795 counter(c_vm_pageout_block++); 2796 thread_block((thread_continue_t)vm_pageout_continue); 2797 /*NOTREACHED*/ 2798} 2799 2800 2801#ifdef FAKE_DEADLOCK 2802 2803#define FAKE_COUNT 5000 2804 2805int internal_count = 0; 2806int fake_deadlock = 0; 2807 2808#endif 2809 2810static void 2811vm_pageout_iothread_continue(struct vm_pageout_queue *q) 2812{ 2813 vm_page_t m = NULL; 2814 vm_object_t object; 2815 vm_object_offset_t offset; 2816 memory_object_t pager; 2817 thread_t self = current_thread(); 2818 2819 if ((vm_pageout_internal_iothread != THREAD_NULL) 2820 && (self == vm_pageout_external_iothread ) 2821 && (self->options & TH_OPT_VMPRIV)) 2822 self->options &= ~TH_OPT_VMPRIV; 2823 2824 vm_page_lockspin_queues(); 2825 2826 while ( !queue_empty(&q->pgo_pending) ) { 2827 2828 q->pgo_busy = TRUE; 2829 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); 2830 if (m->object->object_slid) { 2831 panic("slid page %p not allowed on this path\n", m); 2832 } 2833 VM_PAGE_CHECK(m); 2834 m->pageout_queue = FALSE; 2835 m->pageq.next = NULL; 2836 m->pageq.prev = NULL; 2837 2838 /* 2839 * grab a snapshot of the object and offset this 2840 * page is tabled in so that we can relookup this 2841 * page after we've taken the object lock - these 2842 * fields are stable while we hold the page queues lock 2843 * but as soon as we drop it, there is nothing to keep 2844 * this page in this object... we hold an activity_in_progress 2845 * on this object which will keep it from terminating 2846 */ 2847 object = m->object; 2848 offset = m->offset; 2849 2850 vm_page_unlock_queues(); 2851 2852#ifdef FAKE_DEADLOCK 2853 if (q == &vm_pageout_queue_internal) { 2854 vm_offset_t addr; 2855 int pg_count; 2856 2857 internal_count++; 2858 2859 if ((internal_count == FAKE_COUNT)) { 2860 2861 pg_count = vm_page_free_count + vm_page_free_reserved; 2862 2863 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) { 2864 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count); 2865 } 2866 internal_count = 0; 2867 fake_deadlock++; 2868 } 2869 } 2870#endif 2871 vm_object_lock(object); 2872 2873 m = vm_page_lookup(object, offset); 2874 2875 if (m == NULL || 2876 m->busy || m->cleaning || m->pageout_queue || !m->laundry) { 2877 /* 2878 * it's either the same page that someone else has 2879 * started cleaning (or it's finished cleaning or 2880 * been put back on the pageout queue), or 2881 * the page has been freed or we have found a 2882 * new page at this offset... in all of these cases 2883 * we merely need to release the activity_in_progress 2884 * we took when we put the page on the pageout queue 2885 */ 2886 vm_object_activity_end(object); 2887 vm_object_unlock(object); 2888 2889 vm_page_lockspin_queues(); 2890 continue; 2891 } 2892 if (!object->pager_initialized) { 2893 2894 /* 2895 * If there is no memory object for the page, create 2896 * one and hand it to the default pager. 2897 */ 2898 2899 if (!object->pager_initialized) 2900 vm_object_collapse(object, 2901 (vm_object_offset_t) 0, 2902 TRUE); 2903 if (!object->pager_initialized) 2904 vm_object_pager_create(object); 2905 if (!object->pager_initialized) { 2906 /* 2907 * Still no pager for the object. 2908 * Reactivate the page. 2909 * 2910 * Should only happen if there is no 2911 * default pager. 2912 */ 2913 m->pageout = FALSE; 2914 2915 vm_page_lockspin_queues(); 2916 2917 vm_pageout_throttle_up(m); 2918 vm_page_activate(m); 2919 vm_pageout_dirty_no_pager++; 2920 2921 vm_page_unlock_queues(); 2922 2923 /* 2924 * And we are done with it. 2925 */ 2926 vm_object_activity_end(object); 2927 vm_object_unlock(object); 2928 2929 vm_page_lockspin_queues(); 2930 continue; 2931 } 2932 } 2933 pager = object->pager; 2934 2935 if (pager == MEMORY_OBJECT_NULL) { 2936 /* 2937 * This pager has been destroyed by either 2938 * memory_object_destroy or vm_object_destroy, and 2939 * so there is nowhere for the page to go. 2940 */ 2941 if (m->pageout) { 2942 /* 2943 * Just free the page... VM_PAGE_FREE takes 2944 * care of cleaning up all the state... 2945 * including doing the vm_pageout_throttle_up 2946 */ 2947 VM_PAGE_FREE(m); 2948 } else { 2949 vm_page_lockspin_queues(); 2950 2951 vm_pageout_throttle_up(m); 2952 vm_page_activate(m); 2953 2954 vm_page_unlock_queues(); 2955 2956 /* 2957 * And we are done with it. 2958 */ 2959 } 2960 vm_object_activity_end(object); 2961 vm_object_unlock(object); 2962 2963 vm_page_lockspin_queues(); 2964 continue; 2965 } 2966#if 0 2967 /* 2968 * we don't hold the page queue lock 2969 * so this check isn't safe to make 2970 */ 2971 VM_PAGE_CHECK(m); 2972#endif 2973 /* 2974 * give back the activity_in_progress reference we 2975 * took when we queued up this page and replace it 2976 * it with a paging_in_progress reference that will 2977 * also hold the paging offset from changing and 2978 * prevent the object from terminating 2979 */ 2980 vm_object_activity_end(object); 2981 vm_object_paging_begin(object); 2982 vm_object_unlock(object); 2983 2984 /* 2985 * Send the data to the pager. 2986 * any pageout clustering happens there 2987 */ 2988 memory_object_data_return(pager, 2989 m->offset + object->paging_offset, 2990 PAGE_SIZE, 2991 NULL, 2992 NULL, 2993 FALSE, 2994 FALSE, 2995 0); 2996 2997 vm_object_lock(object); 2998 vm_object_paging_end(object); 2999 vm_object_unlock(object); 3000 3001 vm_pageout_io_throttle(); 3002 3003 vm_page_lockspin_queues(); 3004 } 3005 q->pgo_busy = FALSE; 3006 q->pgo_idle = TRUE; 3007 3008 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT); 3009 vm_page_unlock_queues(); 3010 3011 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) q); 3012 /*NOTREACHED*/ 3013} 3014 3015 3016static void 3017vm_pageout_iothread_external_continue(struct vm_pageout_queue *q) 3018{ 3019 vm_page_t m = NULL; 3020 vm_object_t object; 3021 vm_object_offset_t offset; 3022 memory_object_t pager; 3023 3024 3025 if (vm_pageout_internal_iothread != THREAD_NULL) 3026 current_thread()->options &= ~TH_OPT_VMPRIV; 3027 3028 vm_page_lockspin_queues(); 3029 3030 while ( !queue_empty(&q->pgo_pending) ) { 3031 3032 q->pgo_busy = TRUE; 3033 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); 3034 if (m->object->object_slid) { 3035 panic("slid page %p not allowed on this path\n", m); 3036 } 3037 VM_PAGE_CHECK(m); 3038 m->pageout_queue = FALSE; 3039 m->pageq.next = NULL; 3040 m->pageq.prev = NULL; 3041 3042 /* 3043 * grab a snapshot of the object and offset this 3044 * page is tabled in so that we can relookup this 3045 * page after we've taken the object lock - these 3046 * fields are stable while we hold the page queues lock 3047 * but as soon as we drop it, there is nothing to keep 3048 * this page in this object... we hold an activity_in_progress 3049 * on this object which will keep it from terminating 3050 */ 3051 object = m->object; 3052 offset = m->offset; 3053 3054 vm_page_unlock_queues(); 3055 3056 vm_object_lock(object); 3057 3058 m = vm_page_lookup(object, offset); 3059 3060 if (m == NULL || 3061 m->busy || m->cleaning || m->pageout_queue || !m->laundry) { 3062 /* 3063 * it's either the same page that someone else has 3064 * started cleaning (or it's finished cleaning or 3065 * been put back on the pageout queue), or 3066 * the page has been freed or we have found a 3067 * new page at this offset... in all of these cases 3068 * we merely need to release the activity_in_progress 3069 * we took when we put the page on the pageout queue 3070 */ 3071 vm_object_activity_end(object); 3072 vm_object_unlock(object); 3073 3074 vm_page_lockspin_queues(); 3075 continue; 3076 } 3077 pager = object->pager; 3078 3079 if (pager == MEMORY_OBJECT_NULL) { 3080 /* 3081 * This pager has been destroyed by either 3082 * memory_object_destroy or vm_object_destroy, and 3083 * so there is nowhere for the page to go. 3084 */ 3085 if (m->pageout) { 3086 /* 3087 * Just free the page... VM_PAGE_FREE takes 3088 * care of cleaning up all the state... 3089 * including doing the vm_pageout_throttle_up 3090 */ 3091 VM_PAGE_FREE(m); 3092 } else { 3093 vm_page_lockspin_queues(); 3094 3095 vm_pageout_throttle_up(m); 3096 vm_page_activate(m); 3097 3098 vm_page_unlock_queues(); 3099 3100 /* 3101 * And we are done with it. 3102 */ 3103 } 3104 vm_object_activity_end(object); 3105 vm_object_unlock(object); 3106 3107 vm_page_lockspin_queues(); 3108 continue; 3109 } 3110#if 0 3111 /* 3112 * we don't hold the page queue lock 3113 * so this check isn't safe to make 3114 */ 3115 VM_PAGE_CHECK(m); 3116#endif 3117 /* 3118 * give back the activity_in_progress reference we 3119 * took when we queued up this page and replace it 3120 * it with a paging_in_progress reference that will 3121 * also hold the paging offset from changing and 3122 * prevent the object from terminating 3123 */ 3124 vm_object_activity_end(object); 3125 vm_object_paging_begin(object); 3126 vm_object_unlock(object); 3127 3128 /* 3129 * Send the data to the pager. 3130 * any pageout clustering happens there 3131 */ 3132 memory_object_data_return(pager, 3133 m->offset + object->paging_offset, 3134 PAGE_SIZE, 3135 NULL, 3136 NULL, 3137 FALSE, 3138 FALSE, 3139 0); 3140 3141 vm_object_lock(object); 3142 vm_object_paging_end(object); 3143 vm_object_unlock(object); 3144 3145 vm_pageout_io_throttle(); 3146 3147 vm_page_lockspin_queues(); 3148 } 3149 q->pgo_busy = FALSE; 3150 q->pgo_idle = TRUE; 3151 3152 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT); 3153 vm_page_unlock_queues(); 3154 3155 thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q); 3156 /*NOTREACHED*/ 3157} 3158 3159 3160uint32_t vm_compressor_failed; 3161 3162static void 3163vm_pageout_iothread_internal_continue(struct cq *cq) 3164{ 3165 struct vm_pageout_queue *q; 3166 vm_page_t m = NULL; 3167 vm_object_t object; 3168 memory_object_t pager; 3169 boolean_t pgo_draining; 3170 vm_page_t local_q; 3171 int local_cnt; 3172 vm_page_t local_freeq = NULL; 3173 int local_freed = 0; 3174 int local_batch_size; 3175 kern_return_t retval; 3176 int compressed_count_delta; 3177 3178 3179 KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0); 3180 3181 q = cq->q; 3182 local_batch_size = q->pgo_maxlaundry / (vm_compressor_thread_count * 4); 3183 3184 while (TRUE) { 3185 3186 local_cnt = 0; 3187 local_q = NULL; 3188 3189 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0); 3190 3191 vm_page_lock_queues(); 3192 3193 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0); 3194 3195 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, 0, 0, 0, 0, 0); 3196 3197 while ( !queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) { 3198 3199 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); 3200 3201 VM_PAGE_CHECK(m); 3202 3203 m->pageout_queue = FALSE; 3204 m->pageq.prev = NULL; 3205 3206 m->pageq.next = (queue_entry_t)local_q; 3207 local_q = m; 3208 local_cnt++; 3209 } 3210 if (local_q == NULL) 3211 break; 3212 3213 q->pgo_busy = TRUE; 3214 3215 if ((pgo_draining = q->pgo_draining) == FALSE) 3216 vm_pageout_throttle_up_batch(q, local_cnt); 3217 3218 vm_page_unlock_queues(); 3219 3220 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0); 3221 3222 while (local_q) { 3223 3224 m = local_q; 3225 local_q = (vm_page_t)m->pageq.next; 3226 m->pageq.next = NULL; 3227 3228 if (m->object->object_slid) { 3229 panic("slid page %p not allowed on this path\n", m); 3230 } 3231 3232 object = m->object; 3233 pager = object->pager; 3234 3235 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) { 3236 3237 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0); 3238 3239 vm_object_lock(object); 3240 3241 /* 3242 * If there is no memory object for the page, create 3243 * one and hand it to the compression pager. 3244 */ 3245 3246 if (!object->pager_initialized) 3247 vm_object_collapse(object, (vm_object_offset_t) 0, TRUE); 3248 if (!object->pager_initialized) 3249 vm_object_compressor_pager_create(object); 3250 3251 if (!object->pager_initialized) { 3252 /* 3253 * Still no pager for the object. 3254 * Reactivate the page. 3255 * 3256 * Should only happen if there is no 3257 * compression pager 3258 */ 3259 m->pageout = FALSE; 3260 m->laundry = FALSE; 3261 PAGE_WAKEUP_DONE(m); 3262 3263 vm_page_lockspin_queues(); 3264 vm_page_activate(m); 3265 vm_pageout_dirty_no_pager++; 3266 vm_page_unlock_queues(); 3267 3268 /* 3269 * And we are done with it. 3270 */ 3271 vm_object_activity_end(object); 3272 vm_object_unlock(object); 3273 3274 continue; 3275 } 3276 pager = object->pager; 3277 3278 if (pager == MEMORY_OBJECT_NULL) { 3279 /* 3280 * This pager has been destroyed by either 3281 * memory_object_destroy or vm_object_destroy, and 3282 * so there is nowhere for the page to go. 3283 */ 3284 if (m->pageout) { 3285 /* 3286 * Just free the page... VM_PAGE_FREE takes 3287 * care of cleaning up all the state... 3288 * including doing the vm_pageout_throttle_up 3289 */ 3290 VM_PAGE_FREE(m); 3291 } else { 3292 m->laundry = FALSE; 3293 PAGE_WAKEUP_DONE(m); 3294 3295 vm_page_lockspin_queues(); 3296 vm_page_activate(m); 3297 vm_page_unlock_queues(); 3298 3299 /* 3300 * And we are done with it. 3301 */ 3302 } 3303 vm_object_activity_end(object); 3304 vm_object_unlock(object); 3305 3306 continue; 3307 } 3308 vm_object_unlock(object); 3309 3310 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0); 3311 } 3312 while (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) { 3313 kern_return_t wait_result; 3314 int need_wakeup = 0; 3315 3316 if (local_freeq) { 3317 vm_page_free_list(local_freeq, TRUE); 3318 3319 local_freeq = NULL; 3320 local_freed = 0; 3321 3322 continue; 3323 } 3324 lck_mtx_lock_spin(&vm_page_queue_free_lock); 3325 3326 if (vm_page_free_count < (vm_page_free_reserved - COMPRESSOR_FREE_RESERVED_LIMIT)) { 3327 3328 if (vm_page_free_wanted_privileged++ == 0) 3329 need_wakeup = 1; 3330 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT); 3331 3332 lck_mtx_unlock(&vm_page_queue_free_lock); 3333 3334 if (need_wakeup) 3335 thread_wakeup((event_t)&vm_page_free_wanted); 3336 3337 if (wait_result == THREAD_WAITING) 3338 thread_block(THREAD_CONTINUE_NULL); 3339 } else 3340 lck_mtx_unlock(&vm_page_queue_free_lock); 3341 } 3342 3343 assert(object->activity_in_progress > 0); 3344 3345 retval = vm_compressor_pager_put( 3346 pager, 3347 m->offset + object->paging_offset, 3348 m->phys_page, 3349 &cq->current_chead, 3350 cq->scratch_buf, 3351 &compressed_count_delta); 3352 3353 vm_object_lock(object); 3354 assert(object->activity_in_progress > 0); 3355 3356 assert(m->object == object); 3357 3358 vm_compressor_pager_count(pager, 3359 compressed_count_delta, 3360 FALSE, /* shared_lock */ 3361 object); 3362 3363 m->laundry = FALSE; 3364 m->pageout = FALSE; 3365 3366 if (retval == KERN_SUCCESS) { 3367 /* 3368 * If the object is purgeable, its owner's 3369 * purgeable ledgers will be updated in 3370 * vm_page_remove() but the page still 3371 * contributes to the owner's memory footprint, 3372 * so account for it as such. 3373 */ 3374 if (object->purgable != VM_PURGABLE_DENY && 3375 object->vo_purgeable_owner != NULL) { 3376 /* one more compressed purgeable page */ 3377 vm_purgeable_compressed_update(object, 3378 +1); 3379 } 3380 3381 vm_page_compressions_failing = FALSE; 3382 3383 VM_STAT_INCR(compressions); 3384 3385 if (m->tabled) 3386 vm_page_remove(m, TRUE); 3387 vm_object_activity_end(object); 3388 vm_object_unlock(object); 3389 3390 m->pageq.next = (queue_entry_t)local_freeq; 3391 local_freeq = m; 3392 local_freed++; 3393 3394 } else { 3395 PAGE_WAKEUP_DONE(m); 3396 3397 vm_page_lockspin_queues(); 3398 3399 vm_page_activate(m); 3400 vm_compressor_failed++; 3401 3402 vm_page_compressions_failing = TRUE; 3403 3404 vm_page_unlock_queues(); 3405 3406 vm_object_activity_end(object); 3407 vm_object_unlock(object); 3408 } 3409 } 3410 if (local_freeq) { 3411 vm_page_free_list(local_freeq, TRUE); 3412 3413 local_freeq = NULL; 3414 local_freed = 0; 3415 } 3416 if (pgo_draining == TRUE) { 3417 vm_page_lockspin_queues(); 3418 vm_pageout_throttle_up_batch(q, local_cnt); 3419 vm_page_unlock_queues(); 3420 } 3421 } 3422 KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0); 3423 3424 /* 3425 * queue lock is held and our q is empty 3426 */ 3427 q->pgo_busy = FALSE; 3428 q->pgo_idle = TRUE; 3429 3430 assert_wait((event_t) &q->pgo_pending, THREAD_UNINT); 3431 vm_page_unlock_queues(); 3432 3433 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0); 3434 3435 thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq); 3436 /*NOTREACHED*/ 3437} 3438 3439 3440 3441static void 3442vm_pageout_adjust_io_throttles(struct vm_pageout_queue *iq, struct vm_pageout_queue *eq, boolean_t req_lowpriority) 3443{ 3444 uint32_t policy; 3445 boolean_t set_iq = FALSE; 3446 boolean_t set_eq = FALSE; 3447 3448 if (hibernate_cleaning_in_progress == TRUE) 3449 req_lowpriority = FALSE; 3450 3451 if ((DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) && iq->pgo_inited == TRUE && iq->pgo_lowpriority != req_lowpriority) 3452 set_iq = TRUE; 3453 3454 if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) 3455 set_eq = TRUE; 3456 3457 if (set_iq == TRUE || set_eq == TRUE) { 3458 3459 vm_page_unlock_queues(); 3460 3461 if (req_lowpriority == TRUE) { 3462 policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED; 3463 DTRACE_VM(laundrythrottle); 3464 } else { 3465 policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED; 3466 DTRACE_VM(laundryunthrottle); 3467 } 3468 if (set_iq == TRUE) { 3469 proc_set_task_policy_thread(kernel_task, iq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy); 3470 3471 iq->pgo_lowpriority = req_lowpriority; 3472 } 3473 if (set_eq == TRUE) { 3474 proc_set_task_policy_thread(kernel_task, eq->pgo_tid, TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy); 3475 3476 eq->pgo_lowpriority = req_lowpriority; 3477 } 3478 vm_page_lock_queues(); 3479 } 3480} 3481 3482 3483static void 3484vm_pageout_iothread_external(void) 3485{ 3486 thread_t self = current_thread(); 3487 3488 self->options |= TH_OPT_VMPRIV; 3489 3490 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL); 3491 3492 proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL, 3493 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED); 3494 3495 vm_page_lock_queues(); 3496 3497 vm_pageout_queue_external.pgo_tid = self->thread_id; 3498 vm_pageout_queue_external.pgo_lowpriority = TRUE; 3499 vm_pageout_queue_external.pgo_inited = TRUE; 3500 3501 vm_page_unlock_queues(); 3502 3503 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) 3504 vm_pageout_iothread_external_continue(&vm_pageout_queue_external); 3505 else 3506 vm_pageout_iothread_continue(&vm_pageout_queue_external); 3507 3508 /*NOTREACHED*/ 3509} 3510 3511 3512static void 3513vm_pageout_iothread_internal(struct cq *cq) 3514{ 3515 thread_t self = current_thread(); 3516 3517 self->options |= TH_OPT_VMPRIV; 3518 3519 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) { 3520 DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL); 3521 3522 proc_set_task_policy_thread(kernel_task, self->thread_id, TASK_POLICY_EXTERNAL, 3523 TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED); 3524 } 3525 vm_page_lock_queues(); 3526 3527 vm_pageout_queue_internal.pgo_tid = self->thread_id; 3528 vm_pageout_queue_internal.pgo_lowpriority = TRUE; 3529 vm_pageout_queue_internal.pgo_inited = TRUE; 3530 3531 vm_page_unlock_queues(); 3532 3533 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 3534 cq->q = &vm_pageout_queue_internal; 3535 cq->current_chead = NULL; 3536 cq->scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE); 3537 3538 vm_pageout_iothread_internal_continue(cq); 3539 } else 3540 vm_pageout_iothread_continue(&vm_pageout_queue_internal); 3541 3542 /*NOTREACHED*/ 3543} 3544 3545kern_return_t 3546vm_set_buffer_cleanup_callout(boolean_t (*func)(int)) 3547{ 3548 if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) { 3549 return KERN_SUCCESS; 3550 } else { 3551 return KERN_FAILURE; /* Already set */ 3552 } 3553} 3554 3555extern boolean_t memorystatus_manual_testing_on; 3556extern unsigned int memorystatus_level; 3557 3558 3559#if VM_PRESSURE_EVENTS 3560 3561boolean_t vm_pressure_events_enabled = FALSE; 3562 3563void 3564vm_pressure_response(void) 3565{ 3566 3567 vm_pressure_level_t old_level = kVMPressureNormal; 3568 int new_level = -1; 3569 3570 uint64_t available_memory = 0; 3571 3572 if (vm_pressure_events_enabled == FALSE) 3573 return; 3574 3575 3576 available_memory = (((uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY) * 100); 3577 3578 3579 memorystatus_level = (unsigned int) (available_memory / atop_64(max_mem)); 3580 3581 if (memorystatus_manual_testing_on) { 3582 return; 3583 } 3584 3585 old_level = memorystatus_vm_pressure_level; 3586 3587 switch (memorystatus_vm_pressure_level) { 3588 3589 case kVMPressureNormal: 3590 { 3591 if (VM_PRESSURE_WARNING_TO_CRITICAL()) { 3592 new_level = kVMPressureCritical; 3593 } else if (VM_PRESSURE_NORMAL_TO_WARNING()) { 3594 new_level = kVMPressureWarning; 3595 } 3596 break; 3597 } 3598 3599 case kVMPressureWarning: 3600 case kVMPressureUrgent: 3601 { 3602 if (VM_PRESSURE_WARNING_TO_NORMAL()) { 3603 new_level = kVMPressureNormal; 3604 } else if (VM_PRESSURE_WARNING_TO_CRITICAL()) { 3605 new_level = kVMPressureCritical; 3606 } 3607 break; 3608 } 3609 3610 case kVMPressureCritical: 3611 { 3612 if (VM_PRESSURE_WARNING_TO_NORMAL()) { 3613 new_level = kVMPressureNormal; 3614 } else if (VM_PRESSURE_CRITICAL_TO_WARNING()) { 3615 new_level = kVMPressureWarning; 3616 } 3617 break; 3618 } 3619 3620 default: 3621 return; 3622 } 3623 3624 if (new_level != -1) { 3625 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level; 3626 3627 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != new_level)) { 3628 if (vm_pressure_thread_running == FALSE) { 3629 thread_wakeup(&vm_pressure_thread); 3630 } 3631 3632 if (old_level != new_level) { 3633 thread_wakeup(&vm_pressure_changed); 3634 } 3635 } 3636 } 3637 3638} 3639#endif /* VM_PRESSURE_EVENTS */ 3640 3641kern_return_t 3642mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) { 3643 3644#if !VM_PRESSURE_EVENTS 3645 3646 return KERN_FAILURE; 3647 3648#else /* VM_PRESSURE_EVENTS */ 3649 3650 kern_return_t kr = KERN_SUCCESS; 3651 3652 if (pressure_level != NULL) { 3653 3654 vm_pressure_level_t old_level = memorystatus_vm_pressure_level; 3655 3656 if (wait_for_pressure == TRUE) { 3657 wait_result_t wr = 0; 3658 3659 while (old_level == *pressure_level) { 3660 wr = assert_wait((event_t) &vm_pressure_changed, 3661 THREAD_INTERRUPTIBLE); 3662 if (wr == THREAD_WAITING) { 3663 wr = thread_block(THREAD_CONTINUE_NULL); 3664 } 3665 if (wr == THREAD_INTERRUPTED) { 3666 return KERN_ABORTED; 3667 } 3668 if (wr == THREAD_AWAKENED) { 3669 3670 old_level = memorystatus_vm_pressure_level; 3671 3672 if (old_level != *pressure_level) { 3673 break; 3674 } 3675 } 3676 } 3677 } 3678 3679 *pressure_level = old_level; 3680 kr = KERN_SUCCESS; 3681 } else { 3682 kr = KERN_INVALID_ARGUMENT; 3683 } 3684 3685 return kr; 3686#endif /* VM_PRESSURE_EVENTS */ 3687} 3688 3689#if VM_PRESSURE_EVENTS 3690void 3691vm_pressure_thread(void) { 3692 static boolean_t thread_initialized = FALSE; 3693 3694 if (thread_initialized == TRUE) { 3695 vm_pressure_thread_running = TRUE; 3696 consider_vm_pressure_events(); 3697 vm_pressure_thread_running = FALSE; 3698 } 3699 3700 thread_initialized = TRUE; 3701 assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT); 3702 thread_block((thread_continue_t)vm_pressure_thread); 3703} 3704#endif /* VM_PRESSURE_EVENTS */ 3705 3706 3707uint32_t vm_pageout_considered_page_last = 0; 3708 3709/* 3710 * called once per-second via "compute_averages" 3711 */ 3712void 3713compute_pageout_gc_throttle() 3714{ 3715 if (vm_pageout_considered_page != vm_pageout_considered_page_last) { 3716 3717 vm_pageout_considered_page_last = vm_pageout_considered_page; 3718 3719 thread_wakeup((event_t) &vm_pageout_garbage_collect); 3720 } 3721} 3722 3723 3724static void 3725vm_pageout_garbage_collect(int collect) 3726{ 3727 3728 if (collect) { 3729 boolean_t buf_large_zfree = FALSE; 3730 boolean_t first_try = TRUE; 3731 3732 stack_collect(); 3733 3734 consider_machine_collect(); 3735 m_drain(); 3736 3737 do { 3738 if (consider_buffer_cache_collect != NULL) { 3739 buf_large_zfree = (*consider_buffer_cache_collect)(0); 3740 } 3741 if (first_try == TRUE || buf_large_zfree == TRUE) { 3742 /* 3743 * consider_zone_gc should be last, because the other operations 3744 * might return memory to zones. 3745 */ 3746 consider_zone_gc(buf_large_zfree); 3747 } 3748 first_try = FALSE; 3749 3750 } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target); 3751 3752 consider_machine_adjust(); 3753 } 3754 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT); 3755 3756 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1); 3757 /*NOTREACHED*/ 3758} 3759 3760 3761void vm_pageout_reinit_tuneables(void); 3762 3763void 3764vm_pageout_reinit_tuneables(void) 3765{ 3766 vm_page_filecache_min = (uint32_t) (max_mem / PAGE_SIZE) / 15; 3767 3768 if (vm_page_filecache_min < VM_PAGE_FILECACHE_MIN) 3769 vm_page_filecache_min = VM_PAGE_FILECACHE_MIN; 3770 3771 vm_compressor_minorcompact_threshold_divisor = 18; 3772 vm_compressor_majorcompact_threshold_divisor = 22; 3773 vm_compressor_unthrottle_threshold_divisor = 32; 3774} 3775 3776 3777#if VM_PAGE_BUCKETS_CHECK 3778#if VM_PAGE_FAKE_BUCKETS 3779extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end; 3780#endif /* VM_PAGE_FAKE_BUCKETS */ 3781#endif /* VM_PAGE_BUCKETS_CHECK */ 3782 3783#define FBDP_TEST_COLLAPSE_COMPRESSOR 0 3784#if FBDP_TEST_COLLAPSE_COMPRESSOR 3785extern boolean_t vm_object_collapse_compressor_allowed; 3786#include <IOKit/IOLib.h> 3787#endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */ 3788 3789#define FBDP_TEST_WIRE_AND_EXTRACT 0 3790#if FBDP_TEST_WIRE_AND_EXTRACT 3791extern ledger_template_t task_ledger_template; 3792#include <mach/mach_vm.h> 3793extern ppnum_t vm_map_get_phys_page(vm_map_t map, 3794 vm_offset_t offset); 3795#endif /* FBDP_TEST_WIRE_AND_EXTRACT */ 3796 3797void 3798vm_pageout(void) 3799{ 3800 thread_t self = current_thread(); 3801 thread_t thread; 3802 kern_return_t result; 3803 spl_t s; 3804 3805 /* 3806 * Set thread privileges. 3807 */ 3808 s = splsched(); 3809 thread_lock(self); 3810 self->priority = BASEPRI_PREEMPT - 1; 3811 set_sched_pri(self, self->priority); 3812 thread_unlock(self); 3813 3814 if (!self->reserved_stack) 3815 self->reserved_stack = self->kernel_stack; 3816 3817 splx(s); 3818 3819 /* 3820 * Initialize some paging parameters. 3821 */ 3822 3823 if (vm_pageout_swap_wait == 0) 3824 vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT; 3825 3826 if (vm_pageout_idle_wait == 0) 3827 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT; 3828 3829 if (vm_pageout_burst_wait == 0) 3830 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT; 3831 3832 if (vm_pageout_empty_wait == 0) 3833 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT; 3834 3835 if (vm_pageout_deadlock_wait == 0) 3836 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT; 3837 3838 if (vm_pageout_deadlock_relief == 0) 3839 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF; 3840 3841 if (vm_pageout_inactive_relief == 0) 3842 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF; 3843 3844 if (vm_pageout_burst_active_throttle == 0) 3845 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE; 3846 3847 if (vm_pageout_burst_inactive_throttle == 0) 3848 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE; 3849 3850#if !CONFIG_JETSAM 3851 vm_page_filecache_min = (uint32_t) (max_mem / PAGE_SIZE) / 20; 3852 if (vm_page_filecache_min < VM_PAGE_FILECACHE_MIN) 3853 vm_page_filecache_min = VM_PAGE_FILECACHE_MIN; 3854#endif 3855 3856 /* 3857 * Set kernel task to low backing store privileged 3858 * status 3859 */ 3860 task_lock(kernel_task); 3861 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV; 3862 task_unlock(kernel_task); 3863 3864 vm_page_free_count_init = vm_page_free_count; 3865 3866 /* 3867 * even if we've already called vm_page_free_reserve 3868 * call it again here to insure that the targets are 3869 * accurately calculated (it uses vm_page_free_count_init) 3870 * calling it with an arg of 0 will not change the reserve 3871 * but will re-calculate free_min and free_target 3872 */ 3873 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) { 3874 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved); 3875 } else 3876 vm_page_free_reserve(0); 3877 3878 3879 queue_init(&vm_pageout_queue_external.pgo_pending); 3880 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; 3881 vm_pageout_queue_external.pgo_laundry = 0; 3882 vm_pageout_queue_external.pgo_idle = FALSE; 3883 vm_pageout_queue_external.pgo_busy = FALSE; 3884 vm_pageout_queue_external.pgo_throttled = FALSE; 3885 vm_pageout_queue_external.pgo_draining = FALSE; 3886 vm_pageout_queue_external.pgo_lowpriority = FALSE; 3887 vm_pageout_queue_external.pgo_tid = -1; 3888 vm_pageout_queue_external.pgo_inited = FALSE; 3889 3890 3891 queue_init(&vm_pageout_queue_internal.pgo_pending); 3892 vm_pageout_queue_internal.pgo_maxlaundry = 0; 3893 vm_pageout_queue_internal.pgo_laundry = 0; 3894 vm_pageout_queue_internal.pgo_idle = FALSE; 3895 vm_pageout_queue_internal.pgo_busy = FALSE; 3896 vm_pageout_queue_internal.pgo_throttled = FALSE; 3897 vm_pageout_queue_internal.pgo_draining = FALSE; 3898 vm_pageout_queue_internal.pgo_lowpriority = FALSE; 3899 vm_pageout_queue_internal.pgo_tid = -1; 3900 vm_pageout_queue_internal.pgo_inited = FALSE; 3901 3902 /* internal pageout thread started when default pager registered first time */ 3903 /* external pageout and garbage collection threads started here */ 3904 3905 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL, 3906 BASEPRI_PREEMPT - 1, 3907 &vm_pageout_external_iothread); 3908 if (result != KERN_SUCCESS) 3909 panic("vm_pageout_iothread_external: create failed"); 3910 3911 thread_deallocate(vm_pageout_external_iothread); 3912 3913 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL, 3914 BASEPRI_DEFAULT, 3915 &thread); 3916 if (result != KERN_SUCCESS) 3917 panic("vm_pageout_garbage_collect: create failed"); 3918 3919 thread_deallocate(thread); 3920 3921#if VM_PRESSURE_EVENTS 3922 result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL, 3923 BASEPRI_DEFAULT, 3924 &thread); 3925 3926 if (result != KERN_SUCCESS) 3927 panic("vm_pressure_thread: create failed"); 3928 3929 thread_deallocate(thread); 3930#endif 3931 3932 vm_object_reaper_init(); 3933 3934 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) 3935 vm_compressor_pager_init(); 3936 3937#if VM_PRESSURE_EVENTS 3938 vm_pressure_events_enabled = TRUE; 3939#endif /* VM_PRESSURE_EVENTS */ 3940 3941#if CONFIG_PHANTOM_CACHE 3942 vm_phantom_cache_init(); 3943#endif 3944#if VM_PAGE_BUCKETS_CHECK 3945#if VM_PAGE_FAKE_BUCKETS 3946 printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n", 3947 (uint64_t) vm_page_fake_buckets_start, 3948 (uint64_t) vm_page_fake_buckets_end); 3949 pmap_protect(kernel_pmap, 3950 vm_page_fake_buckets_start, 3951 vm_page_fake_buckets_end, 3952 VM_PROT_READ); 3953// *(char *) vm_page_fake_buckets_start = 'x'; /* panic! */ 3954#endif /* VM_PAGE_FAKE_BUCKETS */ 3955#endif /* VM_PAGE_BUCKETS_CHECK */ 3956 3957#if VM_OBJECT_TRACKING 3958 vm_object_tracking_init(); 3959#endif /* VM_OBJECT_TRACKING */ 3960 3961 3962#if FBDP_TEST_COLLAPSE_COMPRESSOR 3963 vm_object_size_t backing_size, top_size; 3964 vm_object_t backing_object, top_object; 3965 vm_map_offset_t backing_offset, top_offset; 3966 unsigned char *backing_address, *top_address; 3967 kern_return_t kr; 3968 3969 printf("FBDP_TEST_COLLAPSE_COMPRESSOR:\n"); 3970 3971 /* create backing object */ 3972 backing_size = 15 * PAGE_SIZE; 3973 backing_object = vm_object_allocate(backing_size); 3974 assert(backing_object != VM_OBJECT_NULL); 3975 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n", 3976 backing_object); 3977 /* map backing object */ 3978 backing_offset = 0; 3979 kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0, 3980 VM_FLAGS_ANYWHERE, backing_object, 0, FALSE, 3981 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT); 3982 assert(kr == KERN_SUCCESS); 3983 backing_address = (unsigned char *) backing_offset; 3984 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " 3985 "mapped backing object %p at 0x%llx\n", 3986 backing_object, (uint64_t) backing_offset); 3987 /* populate with pages to be compressed in backing object */ 3988 backing_address[0x1*PAGE_SIZE] = 0xB1; 3989 backing_address[0x4*PAGE_SIZE] = 0xB4; 3990 backing_address[0x7*PAGE_SIZE] = 0xB7; 3991 backing_address[0xa*PAGE_SIZE] = 0xBA; 3992 backing_address[0xd*PAGE_SIZE] = 0xBD; 3993 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " 3994 "populated pages to be compressed in " 3995 "backing_object %p\n", backing_object); 3996 /* compress backing object */ 3997 vm_object_pageout(backing_object); 3998 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n", 3999 backing_object); 4000 /* wait for all the pages to be gone */ 4001 while (*(volatile int *)&backing_object->resident_page_count != 0) 4002 IODelay(10); 4003 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n", 4004 backing_object); 4005 /* populate with pages to be resident in backing object */ 4006 backing_address[0x0*PAGE_SIZE] = 0xB0; 4007 backing_address[0x3*PAGE_SIZE] = 0xB3; 4008 backing_address[0x6*PAGE_SIZE] = 0xB6; 4009 backing_address[0x9*PAGE_SIZE] = 0xB9; 4010 backing_address[0xc*PAGE_SIZE] = 0xBC; 4011 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " 4012 "populated pages to be resident in " 4013 "backing_object %p\n", backing_object); 4014 /* leave the other pages absent */ 4015 /* mess with the paging_offset of the backing_object */ 4016 assert(backing_object->paging_offset == 0); 4017 backing_object->paging_offset = 0x3000; 4018 4019 /* create top object */ 4020 top_size = 9 * PAGE_SIZE; 4021 top_object = vm_object_allocate(top_size); 4022 assert(top_object != VM_OBJECT_NULL); 4023 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: created top object %p\n", 4024 top_object); 4025 /* map top object */ 4026 top_offset = 0; 4027 kr = vm_map_enter(kernel_map, &top_offset, top_size, 0, 4028 VM_FLAGS_ANYWHERE, top_object, 0, FALSE, 4029 VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT); 4030 assert(kr == KERN_SUCCESS); 4031 top_address = (unsigned char *) top_offset; 4032 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " 4033 "mapped top object %p at 0x%llx\n", 4034 top_object, (uint64_t) top_offset); 4035 /* populate with pages to be compressed in top object */ 4036 top_address[0x3*PAGE_SIZE] = 0xA3; 4037 top_address[0x4*PAGE_SIZE] = 0xA4; 4038 top_address[0x5*PAGE_SIZE] = 0xA5; 4039 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " 4040 "populated pages to be compressed in " 4041 "top_object %p\n", top_object); 4042 /* compress top object */ 4043 vm_object_pageout(top_object); 4044 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n", 4045 top_object); 4046 /* wait for all the pages to be gone */ 4047 while (top_object->resident_page_count != 0); 4048 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n", 4049 top_object); 4050 /* populate with pages to be resident in top object */ 4051 top_address[0x0*PAGE_SIZE] = 0xA0; 4052 top_address[0x1*PAGE_SIZE] = 0xA1; 4053 top_address[0x2*PAGE_SIZE] = 0xA2; 4054 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " 4055 "populated pages to be resident in " 4056 "top_object %p\n", top_object); 4057 /* leave the other pages absent */ 4058 4059 /* link the 2 objects */ 4060 vm_object_reference(backing_object); 4061 top_object->shadow = backing_object; 4062 top_object->vo_shadow_offset = 0x3000; 4063 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n", 4064 top_object, backing_object); 4065 4066 /* unmap backing object */ 4067 vm_map_remove(kernel_map, 4068 backing_offset, 4069 backing_offset + backing_size, 4070 0); 4071 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " 4072 "unmapped backing_object %p [0x%llx:0x%llx]\n", 4073 backing_object, 4074 (uint64_t) backing_offset, 4075 (uint64_t) (backing_offset + backing_size)); 4076 4077 /* collapse */ 4078 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object); 4079 vm_object_lock(top_object); 4080 vm_object_collapse(top_object, 0, FALSE); 4081 vm_object_unlock(top_object); 4082 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object); 4083 4084 /* did it work? */ 4085 if (top_object->shadow != VM_OBJECT_NULL) { 4086 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: not collapsed\n"); 4087 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n"); 4088 if (vm_object_collapse_compressor_allowed) { 4089 panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n"); 4090 } 4091 } else { 4092 /* check the contents of the mapping */ 4093 unsigned char expect[9] = 4094 { 0xA0, 0xA1, 0xA2, /* resident in top */ 4095 0xA3, 0xA4, 0xA5, /* compressed in top */ 4096 0xB9, /* resident in backing + shadow_offset */ 4097 0xBD, /* compressed in backing + shadow_offset + paging_offset */ 4098 0x00 }; /* absent in both */ 4099 unsigned char actual[9]; 4100 unsigned int i, errors; 4101 4102 errors = 0; 4103 for (i = 0; i < sizeof (actual); i++) { 4104 actual[i] = (unsigned char) top_address[i*PAGE_SIZE]; 4105 if (actual[i] != expect[i]) { 4106 errors++; 4107 } 4108 } 4109 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: " 4110 "actual [%x %x %x %x %x %x %x %x %x] " 4111 "expect [%x %x %x %x %x %x %x %x %x] " 4112 "%d errors\n", 4113 actual[0], actual[1], actual[2], actual[3], 4114 actual[4], actual[5], actual[6], actual[7], 4115 actual[8], 4116 expect[0], expect[1], expect[2], expect[3], 4117 expect[4], expect[5], expect[6], expect[7], 4118 expect[8], 4119 errors); 4120 if (errors) { 4121 panic("FBDP_TEST_COLLAPSE_COMPRESSOR: FAIL\n"); 4122 } else { 4123 printf("FBDP_TEST_COLLAPSE_COMPRESSOR: PASS\n"); 4124 } 4125 } 4126#endif /* FBDP_TEST_COLLAPSE_COMPRESSOR */ 4127 4128#if FBDP_TEST_WIRE_AND_EXTRACT 4129 ledger_t ledger; 4130 vm_map_t user_map, wire_map; 4131 mach_vm_address_t user_addr, wire_addr; 4132 mach_vm_size_t user_size, wire_size; 4133 mach_vm_offset_t cur_offset; 4134 vm_prot_t cur_prot, max_prot; 4135 ppnum_t user_ppnum, wire_ppnum; 4136 kern_return_t kr; 4137 4138 ledger = ledger_instantiate(task_ledger_template, 4139 LEDGER_CREATE_ACTIVE_ENTRIES); 4140 user_map = vm_map_create(pmap_create(ledger, 0, TRUE), 4141 0x100000000ULL, 4142 0x200000000ULL, 4143 TRUE); 4144 wire_map = vm_map_create(NULL, 4145 0x100000000ULL, 4146 0x200000000ULL, 4147 TRUE); 4148 user_addr = 0; 4149 user_size = 0x10000; 4150 kr = mach_vm_allocate(user_map, 4151 &user_addr, 4152 user_size, 4153 VM_FLAGS_ANYWHERE); 4154 assert(kr == KERN_SUCCESS); 4155 wire_addr = 0; 4156 wire_size = user_size; 4157 kr = mach_vm_remap(wire_map, 4158 &wire_addr, 4159 wire_size, 4160 0, 4161 VM_FLAGS_ANYWHERE, 4162 user_map, 4163 user_addr, 4164 FALSE, 4165 &cur_prot, 4166 &max_prot, 4167 VM_INHERIT_NONE); 4168 assert(kr == KERN_SUCCESS); 4169 for (cur_offset = 0; 4170 cur_offset < wire_size; 4171 cur_offset += PAGE_SIZE) { 4172 kr = vm_map_wire_and_extract(wire_map, 4173 wire_addr + cur_offset, 4174 VM_PROT_DEFAULT, 4175 TRUE, 4176 &wire_ppnum); 4177 assert(kr == KERN_SUCCESS); 4178 user_ppnum = vm_map_get_phys_page(user_map, 4179 user_addr + cur_offset); 4180 printf("FBDP_TEST_WIRE_AND_EXTRACT: kr=0x%x " 4181 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n", 4182 kr, 4183 user_map, user_addr + cur_offset, user_ppnum, 4184 wire_map, wire_addr + cur_offset, wire_ppnum); 4185 if (kr != KERN_SUCCESS || 4186 wire_ppnum == 0 || 4187 wire_ppnum != user_ppnum) { 4188 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n"); 4189 } 4190 } 4191 cur_offset -= PAGE_SIZE; 4192 kr = vm_map_wire_and_extract(wire_map, 4193 wire_addr + cur_offset, 4194 VM_PROT_DEFAULT, 4195 TRUE, 4196 &wire_ppnum); 4197 assert(kr == KERN_SUCCESS); 4198 printf("FBDP_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x " 4199 "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n", 4200 kr, 4201 user_map, user_addr + cur_offset, user_ppnum, 4202 wire_map, wire_addr + cur_offset, wire_ppnum); 4203 if (kr != KERN_SUCCESS || 4204 wire_ppnum == 0 || 4205 wire_ppnum != user_ppnum) { 4206 panic("FBDP_TEST_WIRE_AND_EXTRACT: FAIL\n"); 4207 } 4208 4209 printf("FBDP_TEST_WIRE_AND_EXTRACT: PASS\n"); 4210#endif /* FBDP_TEST_WIRE_AND_EXTRACT */ 4211 4212 4213 vm_pageout_continue(); 4214 4215 /* 4216 * Unreached code! 4217 * 4218 * The vm_pageout_continue() call above never returns, so the code below is never 4219 * executed. We take advantage of this to declare several DTrace VM related probe 4220 * points that our kernel doesn't have an analog for. These are probe points that 4221 * exist in Solaris and are in the DTrace documentation, so people may have written 4222 * scripts that use them. Declaring the probe points here means their scripts will 4223 * compile and execute which we want for portability of the scripts, but since this 4224 * section of code is never reached, the probe points will simply never fire. Yes, 4225 * this is basically a hack. The problem is the DTrace probe points were chosen with 4226 * Solaris specific VM events in mind, not portability to different VM implementations. 4227 */ 4228 4229 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL); 4230 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL); 4231 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL); 4232 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL); 4233 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL); 4234 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL); 4235 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL); 4236 /*NOTREACHED*/ 4237} 4238 4239 4240 4241#define MAX_COMRPESSOR_THREAD_COUNT 8 4242 4243struct cq ciq[MAX_COMRPESSOR_THREAD_COUNT]; 4244 4245int vm_compressor_thread_count = 2; 4246 4247kern_return_t 4248vm_pageout_internal_start(void) 4249{ 4250 kern_return_t result; 4251 int i; 4252 host_basic_info_data_t hinfo; 4253 4254 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 4255 mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT; 4256#define BSD_HOST 1 4257 host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count); 4258 4259 assert(hinfo.max_cpus > 0); 4260 4261 if (vm_compressor_thread_count >= hinfo.max_cpus) 4262 vm_compressor_thread_count = hinfo.max_cpus - 1; 4263 if (vm_compressor_thread_count <= 0) 4264 vm_compressor_thread_count = 1; 4265 else if (vm_compressor_thread_count > MAX_COMRPESSOR_THREAD_COUNT) 4266 vm_compressor_thread_count = MAX_COMRPESSOR_THREAD_COUNT; 4267 4268 vm_pageout_queue_internal.pgo_maxlaundry = (vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX; 4269 } else { 4270 vm_compressor_thread_count = 1; 4271 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; 4272 } 4273 4274 for (i = 0; i < vm_compressor_thread_count; i++) { 4275 4276 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i], BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread); 4277 if (result == KERN_SUCCESS) 4278 thread_deallocate(vm_pageout_internal_iothread); 4279 else 4280 break; 4281 } 4282 return result; 4283} 4284 4285#if CONFIG_IOSCHED 4286/* 4287 * To support I/O Expedite for compressed files we mark the upls with special flags. 4288 * The way decmpfs works is that we create a big upl which marks all the pages needed to 4289 * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs 4290 * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages 4291 * being held in the big original UPL. We mark each of these smaller UPLs with the flag 4292 * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the 4293 * decmp_io_upl field (in the upl structure). This link is protected in the forward direction 4294 * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link 4295 * unless the real I/O upl is being destroyed). 4296 */ 4297 4298 4299static void 4300upl_set_decmp_info(upl_t upl, upl_t src_upl) 4301{ 4302 assert((src_upl->flags & UPL_DECMP_REQ) != 0); 4303 4304 upl_lock(src_upl); 4305 if (src_upl->decmp_io_upl) { 4306 /* 4307 * If there is already an alive real I/O UPL, ignore this new UPL. 4308 * This case should rarely happen and even if it does, it just means 4309 * that we might issue a spurious expedite which the driver is expected 4310 * to handle. 4311 */ 4312 upl_unlock(src_upl); 4313 return; 4314 } 4315 src_upl->decmp_io_upl = (void *)upl; 4316 src_upl->ref_count++; 4317 upl_unlock(src_upl); 4318 4319 upl->flags |= UPL_DECMP_REAL_IO; 4320 upl->decmp_io_upl = (void *)src_upl; 4321 4322} 4323#endif /* CONFIG_IOSCHED */ 4324 4325#if UPL_DEBUG 4326int upl_debug_enabled = 1; 4327#else 4328int upl_debug_enabled = 0; 4329#endif 4330 4331static upl_t 4332upl_create(int type, int flags, upl_size_t size) 4333{ 4334 upl_t upl; 4335 vm_size_t page_field_size = 0; 4336 int upl_flags = 0; 4337 vm_size_t upl_size = sizeof(struct upl); 4338 4339 size = round_page_32(size); 4340 4341 if (type & UPL_CREATE_LITE) { 4342 page_field_size = (atop(size) + 7) >> 3; 4343 page_field_size = (page_field_size + 3) & 0xFFFFFFFC; 4344 4345 upl_flags |= UPL_LITE; 4346 } 4347 if (type & UPL_CREATE_INTERNAL) { 4348 upl_size += sizeof(struct upl_page_info) * atop(size); 4349 4350 upl_flags |= UPL_INTERNAL; 4351 } 4352 upl = (upl_t)kalloc(upl_size + page_field_size); 4353 4354 if (page_field_size) 4355 bzero((char *)upl + upl_size, page_field_size); 4356 4357 upl->flags = upl_flags | flags; 4358 upl->src_object = NULL; 4359 upl->kaddr = (vm_offset_t)0; 4360 upl->size = 0; 4361 upl->map_object = NULL; 4362 upl->ref_count = 1; 4363 upl->ext_ref_count = 0; 4364 upl->highest_page = 0; 4365 upl_lock_init(upl); 4366 upl->vector_upl = NULL; 4367#if CONFIG_IOSCHED 4368 if (type & UPL_CREATE_IO_TRACKING) { 4369 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO); 4370 } 4371 4372 upl->upl_reprio_info = 0; 4373 upl->decmp_io_upl = 0; 4374 if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) { 4375 /* Only support expedite on internal UPLs */ 4376 thread_t curthread = current_thread(); 4377 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size)); 4378 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size))); 4379 upl->flags |= UPL_EXPEDITE_SUPPORTED; 4380 if (curthread->decmp_upl != NULL) 4381 upl_set_decmp_info(upl, curthread->decmp_upl); 4382 } 4383#endif 4384#if CONFIG_IOSCHED || UPL_DEBUG 4385 if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) { 4386 upl->upl_creator = current_thread(); 4387 upl->uplq.next = 0; 4388 upl->uplq.prev = 0; 4389 upl->flags |= UPL_TRACKED_BY_OBJECT; 4390 } 4391#endif 4392 4393#if UPL_DEBUG 4394 upl->ubc_alias1 = 0; 4395 upl->ubc_alias2 = 0; 4396 4397 upl->upl_state = 0; 4398 upl->upl_commit_index = 0; 4399 bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records)); 4400 4401 (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES); 4402#endif /* UPL_DEBUG */ 4403 4404 return(upl); 4405} 4406 4407static void 4408upl_destroy(upl_t upl) 4409{ 4410 int page_field_size; /* bit field in word size buf */ 4411 int size; 4412 4413 if (upl->ext_ref_count) { 4414 panic("upl(%p) ext_ref_count", upl); 4415 } 4416 4417#if CONFIG_IOSCHED 4418 if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) { 4419 upl_t src_upl; 4420 src_upl = upl->decmp_io_upl; 4421 assert((src_upl->flags & UPL_DECMP_REQ) != 0); 4422 upl_lock(src_upl); 4423 src_upl->decmp_io_upl = NULL; 4424 upl_unlock(src_upl); 4425 upl_deallocate(src_upl); 4426 } 4427#endif /* CONFIG_IOSCHED */ 4428 4429#if CONFIG_IOSCHED || UPL_DEBUG 4430 if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) { 4431 vm_object_t object; 4432 4433 if (upl->flags & UPL_SHADOWED) { 4434 object = upl->map_object->shadow; 4435 } else { 4436 object = upl->map_object; 4437 } 4438 4439 vm_object_lock(object); 4440 queue_remove(&object->uplq, upl, upl_t, uplq); 4441 vm_object_activity_end(object); 4442 vm_object_collapse(object, 0, TRUE); 4443 vm_object_unlock(object); 4444 } 4445#endif 4446 /* 4447 * drop a reference on the map_object whether or 4448 * not a pageout object is inserted 4449 */ 4450 if (upl->flags & UPL_SHADOWED) 4451 vm_object_deallocate(upl->map_object); 4452 4453 if (upl->flags & UPL_DEVICE_MEMORY) 4454 size = PAGE_SIZE; 4455 else 4456 size = upl->size; 4457 page_field_size = 0; 4458 4459 if (upl->flags & UPL_LITE) { 4460 page_field_size = ((size/PAGE_SIZE) + 7) >> 3; 4461 page_field_size = (page_field_size + 3) & 0xFFFFFFFC; 4462 } 4463 upl_lock_destroy(upl); 4464 upl->vector_upl = (vector_upl_t) 0xfeedbeef; 4465 4466#if CONFIG_IOSCHED 4467 if (upl->flags & UPL_EXPEDITE_SUPPORTED) 4468 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE)); 4469#endif 4470 4471 if (upl->flags & UPL_INTERNAL) { 4472 kfree(upl, 4473 sizeof(struct upl) + 4474 (sizeof(struct upl_page_info) * (size/PAGE_SIZE)) 4475 + page_field_size); 4476 } else { 4477 kfree(upl, sizeof(struct upl) + page_field_size); 4478 } 4479} 4480 4481void 4482upl_deallocate(upl_t upl) 4483{ 4484 upl_lock(upl); 4485 if (--upl->ref_count == 0) { 4486 if(vector_upl_is_valid(upl)) 4487 vector_upl_deallocate(upl); 4488 upl_unlock(upl); 4489 upl_destroy(upl); 4490 } 4491 else 4492 upl_unlock(upl); 4493} 4494 4495#if CONFIG_IOSCHED 4496void 4497upl_mark_decmp(upl_t upl) 4498{ 4499 if (upl->flags & UPL_TRACKED_BY_OBJECT) { 4500 upl->flags |= UPL_DECMP_REQ; 4501 upl->upl_creator->decmp_upl = (void *)upl; 4502 } 4503} 4504 4505void 4506upl_unmark_decmp(upl_t upl) 4507{ 4508 if(upl && (upl->flags & UPL_DECMP_REQ)) { 4509 upl->upl_creator->decmp_upl = NULL; 4510 } 4511} 4512 4513#endif /* CONFIG_IOSCHED */ 4514 4515#define VM_PAGE_Q_BACKING_UP(q) \ 4516 ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10)) 4517 4518boolean_t must_throttle_writes(void); 4519 4520boolean_t 4521must_throttle_writes() 4522{ 4523 if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) && 4524 vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10) 4525 return (TRUE); 4526 4527 return (FALSE); 4528} 4529 4530 4531#if DEVELOPMENT || DEBUG 4532/*/* 4533 * Statistics about UPL enforcement of copy-on-write obligations. 4534 */ 4535unsigned long upl_cow = 0; 4536unsigned long upl_cow_again = 0; 4537unsigned long upl_cow_pages = 0; 4538unsigned long upl_cow_again_pages = 0; 4539 4540unsigned long iopl_cow = 0; 4541unsigned long iopl_cow_pages = 0; 4542#endif 4543 4544/* 4545 * Routine: vm_object_upl_request 4546 * Purpose: 4547 * Cause the population of a portion of a vm_object. 4548 * Depending on the nature of the request, the pages 4549 * returned may be contain valid data or be uninitialized. 4550 * A page list structure, listing the physical pages 4551 * will be returned upon request. 4552 * This function is called by the file system or any other 4553 * supplier of backing store to a pager. 4554 * IMPORTANT NOTE: The caller must still respect the relationship 4555 * between the vm_object and its backing memory object. The 4556 * caller MUST NOT substitute changes in the backing file 4557 * without first doing a memory_object_lock_request on the 4558 * target range unless it is know that the pages are not 4559 * shared with another entity at the pager level. 4560 * Copy_in_to: 4561 * if a page list structure is present 4562 * return the mapped physical pages, where a 4563 * page is not present, return a non-initialized 4564 * one. If the no_sync bit is turned on, don't 4565 * call the pager unlock to synchronize with other 4566 * possible copies of the page. Leave pages busy 4567 * in the original object, if a page list structure 4568 * was specified. When a commit of the page list 4569 * pages is done, the dirty bit will be set for each one. 4570 * Copy_out_from: 4571 * If a page list structure is present, return 4572 * all mapped pages. Where a page does not exist 4573 * map a zero filled one. Leave pages busy in 4574 * the original object. If a page list structure 4575 * is not specified, this call is a no-op. 4576 * 4577 * Note: access of default pager objects has a rather interesting 4578 * twist. The caller of this routine, presumably the file system 4579 * page cache handling code, will never actually make a request 4580 * against a default pager backed object. Only the default 4581 * pager will make requests on backing store related vm_objects 4582 * In this way the default pager can maintain the relationship 4583 * between backing store files (abstract memory objects) and 4584 * the vm_objects (cache objects), they support. 4585 * 4586 */ 4587 4588__private_extern__ kern_return_t 4589vm_object_upl_request( 4590 vm_object_t object, 4591 vm_object_offset_t offset, 4592 upl_size_t size, 4593 upl_t *upl_ptr, 4594 upl_page_info_array_t user_page_list, 4595 unsigned int *page_list_count, 4596 int cntrl_flags) 4597{ 4598 vm_page_t dst_page = VM_PAGE_NULL; 4599 vm_object_offset_t dst_offset; 4600 upl_size_t xfer_size; 4601 unsigned int size_in_pages; 4602 boolean_t dirty; 4603 boolean_t hw_dirty; 4604 upl_t upl = NULL; 4605 unsigned int entry; 4606#if MACH_CLUSTER_STATS 4607 boolean_t encountered_lrp = FALSE; 4608#endif 4609 vm_page_t alias_page = NULL; 4610 int refmod_state = 0; 4611 wpl_array_t lite_list = NULL; 4612 vm_object_t last_copy_object; 4613 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 4614 struct vm_page_delayed_work *dwp; 4615 int dw_count; 4616 int dw_limit; 4617 int io_tracking_flag = 0; 4618 4619 if (cntrl_flags & ~UPL_VALID_FLAGS) { 4620 /* 4621 * For forward compatibility's sake, 4622 * reject any unknown flag. 4623 */ 4624 return KERN_INVALID_VALUE; 4625 } 4626 if ( (!object->internal) && (object->paging_offset != 0) ) 4627 panic("vm_object_upl_request: external object with non-zero paging offset\n"); 4628 if (object->phys_contiguous) 4629 panic("vm_object_upl_request: contiguous object specified\n"); 4630 4631 4632 if (size > MAX_UPL_SIZE_BYTES) 4633 size = MAX_UPL_SIZE_BYTES; 4634 4635 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL) 4636 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT; 4637 4638#if CONFIG_IOSCHED || UPL_DEBUG 4639 if (object->io_tracking || upl_debug_enabled) 4640 io_tracking_flag |= UPL_CREATE_IO_TRACKING; 4641#endif 4642#if CONFIG_IOSCHED 4643 if (object->io_tracking) 4644 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP; 4645#endif 4646 4647 if (cntrl_flags & UPL_SET_INTERNAL) { 4648 if (cntrl_flags & UPL_SET_LITE) { 4649 4650 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size); 4651 4652 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 4653 lite_list = (wpl_array_t) 4654 (((uintptr_t)user_page_list) + 4655 ((size/PAGE_SIZE) * sizeof(upl_page_info_t))); 4656 if (size == 0) { 4657 user_page_list = NULL; 4658 lite_list = NULL; 4659 } 4660 } else { 4661 upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size); 4662 4663 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 4664 if (size == 0) { 4665 user_page_list = NULL; 4666 } 4667 } 4668 } else { 4669 if (cntrl_flags & UPL_SET_LITE) { 4670 4671 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size); 4672 4673 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 4674 if (size == 0) { 4675 lite_list = NULL; 4676 } 4677 } else { 4678 upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size); 4679 } 4680 } 4681 *upl_ptr = upl; 4682 4683 if (user_page_list) 4684 user_page_list[0].device = FALSE; 4685 4686 if (cntrl_flags & UPL_SET_LITE) { 4687 upl->map_object = object; 4688 } else { 4689 upl->map_object = vm_object_allocate(size); 4690 /* 4691 * No neeed to lock the new object: nobody else knows 4692 * about it yet, so it's all ours so far. 4693 */ 4694 upl->map_object->shadow = object; 4695 upl->map_object->pageout = TRUE; 4696 upl->map_object->can_persist = FALSE; 4697 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 4698 upl->map_object->vo_shadow_offset = offset; 4699 upl->map_object->wimg_bits = object->wimg_bits; 4700 4701 VM_PAGE_GRAB_FICTITIOUS(alias_page); 4702 4703 upl->flags |= UPL_SHADOWED; 4704 } 4705 /* 4706 * ENCRYPTED SWAP: 4707 * Just mark the UPL as "encrypted" here. 4708 * We'll actually encrypt the pages later, 4709 * in upl_encrypt(), when the caller has 4710 * selected which pages need to go to swap. 4711 */ 4712 if (cntrl_flags & UPL_ENCRYPT) 4713 upl->flags |= UPL_ENCRYPTED; 4714 4715 if (cntrl_flags & UPL_FOR_PAGEOUT) 4716 upl->flags |= UPL_PAGEOUT; 4717 4718 vm_object_lock(object); 4719 vm_object_activity_begin(object); 4720 4721 /* 4722 * we can lock in the paging_offset once paging_in_progress is set 4723 */ 4724 upl->size = size; 4725 upl->offset = offset + object->paging_offset; 4726 4727#if CONFIG_IOSCHED || UPL_DEBUG 4728 if (object->io_tracking || upl_debug_enabled) { 4729 vm_object_activity_begin(object); 4730 queue_enter(&object->uplq, upl, upl_t, uplq); 4731 } 4732#endif 4733 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) { 4734 /* 4735 * Honor copy-on-write obligations 4736 * 4737 * The caller is gathering these pages and 4738 * might modify their contents. We need to 4739 * make sure that the copy object has its own 4740 * private copies of these pages before we let 4741 * the caller modify them. 4742 */ 4743 vm_object_update(object, 4744 offset, 4745 size, 4746 NULL, 4747 NULL, 4748 FALSE, /* should_return */ 4749 MEMORY_OBJECT_COPY_SYNC, 4750 VM_PROT_NO_CHANGE); 4751#if DEVELOPMENT || DEBUG 4752 upl_cow++; 4753 upl_cow_pages += size >> PAGE_SHIFT; 4754#endif 4755 } 4756 /* 4757 * remember which copy object we synchronized with 4758 */ 4759 last_copy_object = object->copy; 4760 entry = 0; 4761 4762 xfer_size = size; 4763 dst_offset = offset; 4764 size_in_pages = size / PAGE_SIZE; 4765 4766 dwp = &dw_array[0]; 4767 dw_count = 0; 4768 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 4769 4770 if (vm_page_free_count > (vm_page_free_target + size_in_pages) || 4771 object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT)) 4772 object->scan_collisions = 0; 4773 4774 if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) { 4775 boolean_t isSSD = FALSE; 4776 4777 vnode_pager_get_isSSD(object->pager, &isSSD); 4778 vm_object_unlock(object); 4779 4780 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); 4781 4782 if (isSSD == TRUE) 4783 delay(1000 * size_in_pages); 4784 else 4785 delay(5000 * size_in_pages); 4786 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 4787 4788 vm_object_lock(object); 4789 } 4790 4791 while (xfer_size) { 4792 4793 dwp->dw_mask = 0; 4794 4795 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) { 4796 vm_object_unlock(object); 4797 VM_PAGE_GRAB_FICTITIOUS(alias_page); 4798 vm_object_lock(object); 4799 } 4800 if (cntrl_flags & UPL_COPYOUT_FROM) { 4801 upl->flags |= UPL_PAGE_SYNC_DONE; 4802 4803 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) || 4804 dst_page->fictitious || 4805 dst_page->absent || 4806 dst_page->error || 4807 dst_page->cleaning || 4808 (VM_PAGE_WIRED(dst_page))) { 4809 4810 if (user_page_list) 4811 user_page_list[entry].phys_addr = 0; 4812 4813 goto try_next_page; 4814 } 4815 /* 4816 * grab this up front... 4817 * a high percentange of the time we're going to 4818 * need the hardware modification state a bit later 4819 * anyway... so we can eliminate an extra call into 4820 * the pmap layer by grabbing it here and recording it 4821 */ 4822 if (dst_page->pmapped) 4823 refmod_state = pmap_get_refmod(dst_page->phys_page); 4824 else 4825 refmod_state = 0; 4826 4827 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) { 4828 /* 4829 * page is on inactive list and referenced... 4830 * reactivate it now... this gets it out of the 4831 * way of vm_pageout_scan which would have to 4832 * reactivate it upon tripping over it 4833 */ 4834 dwp->dw_mask |= DW_vm_page_activate; 4835 } 4836 if (cntrl_flags & UPL_RET_ONLY_DIRTY) { 4837 /* 4838 * we're only asking for DIRTY pages to be returned 4839 */ 4840 if (dst_page->laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) { 4841 /* 4842 * if we were the page stolen by vm_pageout_scan to be 4843 * cleaned (as opposed to a buddy being clustered in 4844 * or this request is not being driven by a PAGEOUT cluster 4845 * then we only need to check for the page being dirty or 4846 * precious to decide whether to return it 4847 */ 4848 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED)) 4849 goto check_busy; 4850 goto dont_return; 4851 } 4852 /* 4853 * this is a request for a PAGEOUT cluster and this page 4854 * is merely along for the ride as a 'buddy'... not only 4855 * does it have to be dirty to be returned, but it also 4856 * can't have been referenced recently... 4857 */ 4858 if ( (hibernate_cleaning_in_progress == TRUE || 4859 (!((refmod_state & VM_MEM_REFERENCED) || dst_page->reference) || dst_page->throttled)) && 4860 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) { 4861 goto check_busy; 4862 } 4863dont_return: 4864 /* 4865 * if we reach here, we're not to return 4866 * the page... go on to the next one 4867 */ 4868 if (dst_page->laundry == TRUE) { 4869 /* 4870 * if we get here, the page is not 'cleaning' (filtered out above). 4871 * since it has been referenced, remove it from the laundry 4872 * so we don't pay the cost of an I/O to clean a page 4873 * we're just going to take back 4874 */ 4875 vm_page_lockspin_queues(); 4876 4877 vm_pageout_steal_laundry(dst_page, TRUE); 4878 vm_page_activate(dst_page); 4879 4880 vm_page_unlock_queues(); 4881 } 4882 if (user_page_list) 4883 user_page_list[entry].phys_addr = 0; 4884 4885 goto try_next_page; 4886 } 4887check_busy: 4888 if (dst_page->busy) { 4889 if (cntrl_flags & UPL_NOBLOCK) { 4890 if (user_page_list) 4891 user_page_list[entry].phys_addr = 0; 4892 4893 goto try_next_page; 4894 } 4895 /* 4896 * someone else is playing with the 4897 * page. We will have to wait. 4898 */ 4899 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 4900 4901 continue; 4902 } 4903 /* 4904 * ENCRYPTED SWAP: 4905 * The caller is gathering this page and might 4906 * access its contents later on. Decrypt the 4907 * page before adding it to the UPL, so that 4908 * the caller never sees encrypted data. 4909 */ 4910 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) { 4911 int was_busy; 4912 4913 /* 4914 * save the current state of busy 4915 * mark page as busy while decrypt 4916 * is in progress since it will drop 4917 * the object lock... 4918 */ 4919 was_busy = dst_page->busy; 4920 dst_page->busy = TRUE; 4921 4922 vm_page_decrypt(dst_page, 0); 4923 vm_page_decrypt_for_upl_counter++; 4924 /* 4925 * restore to original busy state 4926 */ 4927 dst_page->busy = was_busy; 4928 } 4929 if (dst_page->pageout_queue == TRUE) { 4930 4931 vm_page_lockspin_queues(); 4932 4933 if (dst_page->pageout_queue == TRUE) { 4934 /* 4935 * we've buddied up a page for a clustered pageout 4936 * that has already been moved to the pageout 4937 * queue by pageout_scan... we need to remove 4938 * it from the queue and drop the laundry count 4939 * on that queue 4940 */ 4941 vm_pageout_throttle_up(dst_page); 4942 } 4943 vm_page_unlock_queues(); 4944 } 4945#if MACH_CLUSTER_STATS 4946 /* 4947 * pageout statistics gathering. count 4948 * all the pages we will page out that 4949 * were not counted in the initial 4950 * vm_pageout_scan work 4951 */ 4952 if (dst_page->pageout) 4953 encountered_lrp = TRUE; 4954 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious))) { 4955 if (encountered_lrp) 4956 CLUSTER_STAT(pages_at_higher_offsets++;) 4957 else 4958 CLUSTER_STAT(pages_at_lower_offsets++;) 4959 } 4960#endif 4961 hw_dirty = refmod_state & VM_MEM_MODIFIED; 4962 dirty = hw_dirty ? TRUE : dst_page->dirty; 4963 4964 if (dst_page->phys_page > upl->highest_page) 4965 upl->highest_page = dst_page->phys_page; 4966 4967 if (cntrl_flags & UPL_SET_LITE) { 4968 unsigned int pg_num; 4969 4970 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); 4971 assert(pg_num == (dst_offset-offset)/PAGE_SIZE); 4972 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 4973 4974 if (hw_dirty) 4975 pmap_clear_modify(dst_page->phys_page); 4976 4977 /* 4978 * Mark original page as cleaning 4979 * in place. 4980 */ 4981 dst_page->cleaning = TRUE; 4982 dst_page->precious = FALSE; 4983 } else { 4984 /* 4985 * use pageclean setup, it is more 4986 * convenient even for the pageout 4987 * cases here 4988 */ 4989 vm_object_lock(upl->map_object); 4990 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); 4991 vm_object_unlock(upl->map_object); 4992 4993 alias_page->absent = FALSE; 4994 alias_page = NULL; 4995 } 4996#if MACH_PAGEMAP 4997 /* 4998 * Record that this page has been 4999 * written out 5000 */ 5001 vm_external_state_set(object->existence_map, dst_page->offset); 5002#endif /*MACH_PAGEMAP*/ 5003 if (dirty) { 5004 SET_PAGE_DIRTY(dst_page, FALSE); 5005 } else { 5006 dst_page->dirty = FALSE; 5007 } 5008 5009 if (!dirty) 5010 dst_page->precious = TRUE; 5011 5012 if ( (cntrl_flags & UPL_ENCRYPT) ) { 5013 /* 5014 * ENCRYPTED SWAP: 5015 * We want to deny access to the target page 5016 * because its contents are about to be 5017 * encrypted and the user would be very 5018 * confused to see encrypted data instead 5019 * of their data. 5020 * We also set "encrypted_cleaning" to allow 5021 * vm_pageout_scan() to demote that page 5022 * from "adjacent/clean-in-place" to 5023 * "target/clean-and-free" if it bumps into 5024 * this page during its scanning while we're 5025 * still processing this cluster. 5026 */ 5027 dst_page->busy = TRUE; 5028 dst_page->encrypted_cleaning = TRUE; 5029 } 5030 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) { 5031 if ( !VM_PAGE_WIRED(dst_page)) 5032 dst_page->pageout = TRUE; 5033 } 5034 } else { 5035 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) { 5036 /* 5037 * Honor copy-on-write obligations 5038 * 5039 * The copy object has changed since we 5040 * last synchronized for copy-on-write. 5041 * Another copy object might have been 5042 * inserted while we released the object's 5043 * lock. Since someone could have seen the 5044 * original contents of the remaining pages 5045 * through that new object, we have to 5046 * synchronize with it again for the remaining 5047 * pages only. The previous pages are "busy" 5048 * so they can not be seen through the new 5049 * mapping. The new mapping will see our 5050 * upcoming changes for those previous pages, 5051 * but that's OK since they couldn't see what 5052 * was there before. It's just a race anyway 5053 * and there's no guarantee of consistency or 5054 * atomicity. We just don't want new mappings 5055 * to see both the *before* and *after* pages. 5056 */ 5057 if (object->copy != VM_OBJECT_NULL) { 5058 vm_object_update( 5059 object, 5060 dst_offset,/* current offset */ 5061 xfer_size, /* remaining size */ 5062 NULL, 5063 NULL, 5064 FALSE, /* should_return */ 5065 MEMORY_OBJECT_COPY_SYNC, 5066 VM_PROT_NO_CHANGE); 5067 5068#if DEVELOPMENT || DEBUG 5069 upl_cow_again++; 5070 upl_cow_again_pages += xfer_size >> PAGE_SHIFT; 5071#endif 5072 } 5073 /* 5074 * remember the copy object we synced with 5075 */ 5076 last_copy_object = object->copy; 5077 } 5078 dst_page = vm_page_lookup(object, dst_offset); 5079 5080 if (dst_page != VM_PAGE_NULL) { 5081 5082 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) { 5083 /* 5084 * skip over pages already present in the cache 5085 */ 5086 if (user_page_list) 5087 user_page_list[entry].phys_addr = 0; 5088 5089 goto try_next_page; 5090 } 5091 if (dst_page->fictitious) { 5092 panic("need corner case for fictitious page"); 5093 } 5094 5095 if (dst_page->busy || dst_page->cleaning) { 5096 /* 5097 * someone else is playing with the 5098 * page. We will have to wait. 5099 */ 5100 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 5101 5102 continue; 5103 } 5104 if (dst_page->laundry) { 5105 dst_page->pageout = FALSE; 5106 5107 vm_pageout_steal_laundry(dst_page, FALSE); 5108 } 5109 } else { 5110 if (object->private) { 5111 /* 5112 * This is a nasty wrinkle for users 5113 * of upl who encounter device or 5114 * private memory however, it is 5115 * unavoidable, only a fault can 5116 * resolve the actual backing 5117 * physical page by asking the 5118 * backing device. 5119 */ 5120 if (user_page_list) 5121 user_page_list[entry].phys_addr = 0; 5122 5123 goto try_next_page; 5124 } 5125 if (object->scan_collisions) { 5126 /* 5127 * the pageout_scan thread is trying to steal 5128 * pages from this object, but has run into our 5129 * lock... grab 2 pages from the head of the object... 5130 * the first is freed on behalf of pageout_scan, the 5131 * 2nd is for our own use... we use vm_object_page_grab 5132 * in both cases to avoid taking pages from the free 5133 * list since we are under memory pressure and our 5134 * lock on this object is getting in the way of 5135 * relieving it 5136 */ 5137 dst_page = vm_object_page_grab(object); 5138 5139 if (dst_page != VM_PAGE_NULL) 5140 vm_page_release(dst_page); 5141 5142 dst_page = vm_object_page_grab(object); 5143 } 5144 if (dst_page == VM_PAGE_NULL) { 5145 /* 5146 * need to allocate a page 5147 */ 5148 dst_page = vm_page_grab(); 5149 } 5150 if (dst_page == VM_PAGE_NULL) { 5151 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) { 5152 /* 5153 * we don't want to stall waiting for pages to come onto the free list 5154 * while we're already holding absent pages in this UPL 5155 * the caller will deal with the empty slots 5156 */ 5157 if (user_page_list) 5158 user_page_list[entry].phys_addr = 0; 5159 5160 goto try_next_page; 5161 } 5162 /* 5163 * no pages available... wait 5164 * then try again for the same 5165 * offset... 5166 */ 5167 vm_object_unlock(object); 5168 5169 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); 5170 5171 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); 5172 5173 VM_PAGE_WAIT(); 5174 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 5175 5176 VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); 5177 5178 vm_object_lock(object); 5179 5180 continue; 5181 } 5182 vm_page_insert(dst_page, object, dst_offset); 5183 5184 dst_page->absent = TRUE; 5185 dst_page->busy = FALSE; 5186 5187 if (cntrl_flags & UPL_RET_ONLY_ABSENT) { 5188 /* 5189 * if UPL_RET_ONLY_ABSENT was specified, 5190 * than we're definitely setting up a 5191 * upl for a clustered read/pagein 5192 * operation... mark the pages as clustered 5193 * so upl_commit_range can put them on the 5194 * speculative list 5195 */ 5196 dst_page->clustered = TRUE; 5197 5198 if ( !(cntrl_flags & UPL_FILE_IO)) 5199 VM_STAT_INCR(pageins); 5200 } 5201 } 5202 /* 5203 * ENCRYPTED SWAP: 5204 */ 5205 if (cntrl_flags & UPL_ENCRYPT) { 5206 /* 5207 * The page is going to be encrypted when we 5208 * get it from the pager, so mark it so. 5209 */ 5210 dst_page->encrypted = TRUE; 5211 } else { 5212 /* 5213 * Otherwise, the page will not contain 5214 * encrypted data. 5215 */ 5216 dst_page->encrypted = FALSE; 5217 } 5218 dst_page->overwriting = TRUE; 5219 5220 if (dst_page->pmapped) { 5221 if ( !(cntrl_flags & UPL_FILE_IO)) 5222 /* 5223 * eliminate all mappings from the 5224 * original object and its prodigy 5225 */ 5226 refmod_state = pmap_disconnect(dst_page->phys_page); 5227 else 5228 refmod_state = pmap_get_refmod(dst_page->phys_page); 5229 } else 5230 refmod_state = 0; 5231 5232 hw_dirty = refmod_state & VM_MEM_MODIFIED; 5233 dirty = hw_dirty ? TRUE : dst_page->dirty; 5234 5235 if (cntrl_flags & UPL_SET_LITE) { 5236 unsigned int pg_num; 5237 5238 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); 5239 assert(pg_num == (dst_offset-offset)/PAGE_SIZE); 5240 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 5241 5242 if (hw_dirty) 5243 pmap_clear_modify(dst_page->phys_page); 5244 5245 /* 5246 * Mark original page as cleaning 5247 * in place. 5248 */ 5249 dst_page->cleaning = TRUE; 5250 dst_page->precious = FALSE; 5251 } else { 5252 /* 5253 * use pageclean setup, it is more 5254 * convenient even for the pageout 5255 * cases here 5256 */ 5257 vm_object_lock(upl->map_object); 5258 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); 5259 vm_object_unlock(upl->map_object); 5260 5261 alias_page->absent = FALSE; 5262 alias_page = NULL; 5263 } 5264 5265 if (cntrl_flags & UPL_REQUEST_SET_DIRTY) { 5266 upl->flags &= ~UPL_CLEAR_DIRTY; 5267 upl->flags |= UPL_SET_DIRTY; 5268 dirty = TRUE; 5269 upl->flags |= UPL_SET_DIRTY; 5270 } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) { 5271 /* 5272 * clean in place for read implies 5273 * that a write will be done on all 5274 * the pages that are dirty before 5275 * a upl commit is done. The caller 5276 * is obligated to preserve the 5277 * contents of all pages marked dirty 5278 */ 5279 upl->flags |= UPL_CLEAR_DIRTY; 5280 } 5281 dst_page->dirty = dirty; 5282 5283 if (!dirty) 5284 dst_page->precious = TRUE; 5285 5286 if ( !VM_PAGE_WIRED(dst_page)) { 5287 /* 5288 * deny access to the target page while 5289 * it is being worked on 5290 */ 5291 dst_page->busy = TRUE; 5292 } else 5293 dwp->dw_mask |= DW_vm_page_wire; 5294 5295 /* 5296 * We might be about to satisfy a fault which has been 5297 * requested. So no need for the "restart" bit. 5298 */ 5299 dst_page->restart = FALSE; 5300 if (!dst_page->absent && !(cntrl_flags & UPL_WILL_MODIFY)) { 5301 /* 5302 * expect the page to be used 5303 */ 5304 dwp->dw_mask |= DW_set_reference; 5305 } 5306 if (cntrl_flags & UPL_PRECIOUS) { 5307 if (dst_page->object->internal) { 5308 SET_PAGE_DIRTY(dst_page, FALSE); 5309 dst_page->precious = FALSE; 5310 } else { 5311 dst_page->precious = TRUE; 5312 } 5313 } else { 5314 dst_page->precious = FALSE; 5315 } 5316 } 5317 if (dst_page->busy) 5318 upl->flags |= UPL_HAS_BUSY; 5319 5320 if (dst_page->phys_page > upl->highest_page) 5321 upl->highest_page = dst_page->phys_page; 5322 if (user_page_list) { 5323 user_page_list[entry].phys_addr = dst_page->phys_page; 5324 user_page_list[entry].pageout = dst_page->pageout; 5325 user_page_list[entry].absent = dst_page->absent; 5326 user_page_list[entry].dirty = dst_page->dirty; 5327 user_page_list[entry].precious = dst_page->precious; 5328 user_page_list[entry].device = FALSE; 5329 user_page_list[entry].needed = FALSE; 5330 if (dst_page->clustered == TRUE) 5331 user_page_list[entry].speculative = dst_page->speculative; 5332 else 5333 user_page_list[entry].speculative = FALSE; 5334 user_page_list[entry].cs_validated = dst_page->cs_validated; 5335 user_page_list[entry].cs_tainted = dst_page->cs_tainted; 5336 } 5337 /* 5338 * if UPL_RET_ONLY_ABSENT is set, then 5339 * we are working with a fresh page and we've 5340 * just set the clustered flag on it to 5341 * indicate that it was drug in as part of a 5342 * speculative cluster... so leave it alone 5343 */ 5344 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) { 5345 /* 5346 * someone is explicitly grabbing this page... 5347 * update clustered and speculative state 5348 * 5349 */ 5350 if (dst_page->clustered) 5351 VM_PAGE_CONSUME_CLUSTERED(dst_page); 5352 } 5353try_next_page: 5354 if (dwp->dw_mask) { 5355 if (dwp->dw_mask & DW_vm_page_activate) 5356 VM_STAT_INCR(reactivations); 5357 5358 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count); 5359 5360 if (dw_count >= dw_limit) { 5361 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 5362 5363 dwp = &dw_array[0]; 5364 dw_count = 0; 5365 } 5366 } 5367 entry++; 5368 dst_offset += PAGE_SIZE_64; 5369 xfer_size -= PAGE_SIZE; 5370 } 5371 if (dw_count) 5372 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 5373 5374 if (alias_page != NULL) { 5375 VM_PAGE_FREE(alias_page); 5376 } 5377 5378 if (page_list_count != NULL) { 5379 if (upl->flags & UPL_INTERNAL) 5380 *page_list_count = 0; 5381 else if (*page_list_count > entry) 5382 *page_list_count = entry; 5383 } 5384#if UPL_DEBUG 5385 upl->upl_state = 1; 5386#endif 5387 vm_object_unlock(object); 5388 5389 return KERN_SUCCESS; 5390} 5391 5392/* JMM - Backward compatability for now */ 5393kern_return_t 5394vm_fault_list_request( /* forward */ 5395 memory_object_control_t control, 5396 vm_object_offset_t offset, 5397 upl_size_t size, 5398 upl_t *upl_ptr, 5399 upl_page_info_t **user_page_list_ptr, 5400 unsigned int page_list_count, 5401 int cntrl_flags); 5402kern_return_t 5403vm_fault_list_request( 5404 memory_object_control_t control, 5405 vm_object_offset_t offset, 5406 upl_size_t size, 5407 upl_t *upl_ptr, 5408 upl_page_info_t **user_page_list_ptr, 5409 unsigned int page_list_count, 5410 int cntrl_flags) 5411{ 5412 unsigned int local_list_count; 5413 upl_page_info_t *user_page_list; 5414 kern_return_t kr; 5415 5416 if((cntrl_flags & UPL_VECTOR)==UPL_VECTOR) 5417 return KERN_INVALID_ARGUMENT; 5418 5419 if (user_page_list_ptr != NULL) { 5420 local_list_count = page_list_count; 5421 user_page_list = *user_page_list_ptr; 5422 } else { 5423 local_list_count = 0; 5424 user_page_list = NULL; 5425 } 5426 kr = memory_object_upl_request(control, 5427 offset, 5428 size, 5429 upl_ptr, 5430 user_page_list, 5431 &local_list_count, 5432 cntrl_flags); 5433 5434 if(kr != KERN_SUCCESS) 5435 return kr; 5436 5437 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) { 5438 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr); 5439 } 5440 5441 return KERN_SUCCESS; 5442} 5443 5444 5445 5446/* 5447 * Routine: vm_object_super_upl_request 5448 * Purpose: 5449 * Cause the population of a portion of a vm_object 5450 * in much the same way as memory_object_upl_request. 5451 * Depending on the nature of the request, the pages 5452 * returned may be contain valid data or be uninitialized. 5453 * However, the region may be expanded up to the super 5454 * cluster size provided. 5455 */ 5456 5457__private_extern__ kern_return_t 5458vm_object_super_upl_request( 5459 vm_object_t object, 5460 vm_object_offset_t offset, 5461 upl_size_t size, 5462 upl_size_t super_cluster, 5463 upl_t *upl, 5464 upl_page_info_t *user_page_list, 5465 unsigned int *page_list_count, 5466 int cntrl_flags) 5467{ 5468 if (object->paging_offset > offset || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR)) 5469 return KERN_FAILURE; 5470 5471 assert(object->paging_in_progress); 5472 offset = offset - object->paging_offset; 5473 5474 if (super_cluster > size) { 5475 5476 vm_object_offset_t base_offset; 5477 upl_size_t super_size; 5478 vm_object_size_t super_size_64; 5479 5480 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1)); 5481 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster; 5482 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size; 5483 super_size = (upl_size_t) super_size_64; 5484 assert(super_size == super_size_64); 5485 5486 if (offset > (base_offset + super_size)) { 5487 panic("vm_object_super_upl_request: Missed target pageout" 5488 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n", 5489 offset, base_offset, super_size, super_cluster, 5490 size, object->paging_offset); 5491 } 5492 /* 5493 * apparently there is a case where the vm requests a 5494 * page to be written out who's offset is beyond the 5495 * object size 5496 */ 5497 if ((offset + size) > (base_offset + super_size)) { 5498 super_size_64 = (offset + size) - base_offset; 5499 super_size = (upl_size_t) super_size_64; 5500 assert(super_size == super_size_64); 5501 } 5502 5503 offset = base_offset; 5504 size = super_size; 5505 } 5506 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags); 5507} 5508 5509 5510kern_return_t 5511vm_map_create_upl( 5512 vm_map_t map, 5513 vm_map_address_t offset, 5514 upl_size_t *upl_size, 5515 upl_t *upl, 5516 upl_page_info_array_t page_list, 5517 unsigned int *count, 5518 int *flags) 5519{ 5520 vm_map_entry_t entry; 5521 int caller_flags; 5522 int force_data_sync; 5523 int sync_cow_data; 5524 vm_object_t local_object; 5525 vm_map_offset_t local_offset; 5526 vm_map_offset_t local_start; 5527 kern_return_t ret; 5528 5529 caller_flags = *flags; 5530 5531 if (caller_flags & ~UPL_VALID_FLAGS) { 5532 /* 5533 * For forward compatibility's sake, 5534 * reject any unknown flag. 5535 */ 5536 return KERN_INVALID_VALUE; 5537 } 5538 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC); 5539 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM); 5540 5541 if (upl == NULL) 5542 return KERN_INVALID_ARGUMENT; 5543 5544REDISCOVER_ENTRY: 5545 vm_map_lock_read(map); 5546 5547 if (vm_map_lookup_entry(map, offset, &entry)) { 5548 5549 if ((entry->vme_end - offset) < *upl_size) { 5550 *upl_size = (upl_size_t) (entry->vme_end - offset); 5551 assert(*upl_size == entry->vme_end - offset); 5552 } 5553 5554 if (caller_flags & UPL_QUERY_OBJECT_TYPE) { 5555 *flags = 0; 5556 5557 if ( !entry->is_sub_map && entry->object.vm_object != VM_OBJECT_NULL) { 5558 if (entry->object.vm_object->private) 5559 *flags = UPL_DEV_MEMORY; 5560 5561 if (entry->object.vm_object->phys_contiguous) 5562 *flags |= UPL_PHYS_CONTIG; 5563 } 5564 vm_map_unlock_read(map); 5565 5566 return KERN_SUCCESS; 5567 } 5568 5569 if (entry->is_sub_map) { 5570 vm_map_t submap; 5571 5572 submap = entry->object.sub_map; 5573 local_start = entry->vme_start; 5574 local_offset = entry->offset; 5575 5576 vm_map_reference(submap); 5577 vm_map_unlock_read(map); 5578 5579 ret = vm_map_create_upl(submap, 5580 local_offset + (offset - local_start), 5581 upl_size, upl, page_list, count, flags); 5582 vm_map_deallocate(submap); 5583 5584 return ret; 5585 } 5586 5587 if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) { 5588 if (*upl_size > MAX_UPL_SIZE_BYTES) 5589 *upl_size = MAX_UPL_SIZE_BYTES; 5590 } 5591 /* 5592 * Create an object if necessary. 5593 */ 5594 if (entry->object.vm_object == VM_OBJECT_NULL) { 5595 5596 if (vm_map_lock_read_to_write(map)) 5597 goto REDISCOVER_ENTRY; 5598 5599 entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start)); 5600 entry->offset = 0; 5601 5602 vm_map_lock_write_to_read(map); 5603 } 5604 if (!(caller_flags & UPL_COPYOUT_FROM)) { 5605 if (!(entry->protection & VM_PROT_WRITE)) { 5606 vm_map_unlock_read(map); 5607 return KERN_PROTECTION_FAILURE; 5608 } 5609 } 5610 5611 local_object = entry->object.vm_object; 5612 if (vm_map_entry_should_cow_for_true_share(entry) && 5613 local_object->vo_size > *upl_size && 5614 *upl_size != 0) { 5615 vm_prot_t prot; 5616 5617 /* 5618 * Set up the targeted range for copy-on-write to avoid 5619 * applying true_share/copy_delay to the entire object. 5620 */ 5621 5622 if (vm_map_lock_read_to_write(map)) { 5623 goto REDISCOVER_ENTRY; 5624 } 5625 5626 vm_map_clip_start(map, 5627 entry, 5628 vm_map_trunc_page(offset, 5629 VM_MAP_PAGE_MASK(map))); 5630 vm_map_clip_end(map, 5631 entry, 5632 vm_map_round_page(offset + *upl_size, 5633 VM_MAP_PAGE_MASK(map))); 5634 if ((entry->vme_end - offset) < *upl_size) { 5635 *upl_size = (upl_size_t) (entry->vme_end - offset); 5636 assert(*upl_size == entry->vme_end - offset); 5637 } 5638 5639 prot = entry->protection & ~VM_PROT_WRITE; 5640 if (override_nx(map, entry->alias) && prot) 5641 prot |= VM_PROT_EXECUTE; 5642 vm_object_pmap_protect(local_object, 5643 entry->offset, 5644 entry->vme_end - entry->vme_start, 5645 ((entry->is_shared || map->mapped_in_other_pmaps) 5646 ? PMAP_NULL 5647 : map->pmap), 5648 entry->vme_start, 5649 prot); 5650 entry->needs_copy = TRUE; 5651 5652 vm_map_lock_write_to_read(map); 5653 } 5654 5655 if (entry->needs_copy) { 5656 /* 5657 * Honor copy-on-write for COPY_SYMMETRIC 5658 * strategy. 5659 */ 5660 vm_map_t local_map; 5661 vm_object_t object; 5662 vm_object_offset_t new_offset; 5663 vm_prot_t prot; 5664 boolean_t wired; 5665 vm_map_version_t version; 5666 vm_map_t real_map; 5667 vm_prot_t fault_type; 5668 5669 local_map = map; 5670 5671 if (caller_flags & UPL_COPYOUT_FROM) { 5672 fault_type = VM_PROT_READ | VM_PROT_COPY; 5673 vm_counters.create_upl_extra_cow++; 5674 vm_counters.create_upl_extra_cow_pages += (entry->vme_end - entry->vme_start) / PAGE_SIZE; 5675 } else { 5676 fault_type = VM_PROT_WRITE; 5677 } 5678 if (vm_map_lookup_locked(&local_map, 5679 offset, fault_type, 5680 OBJECT_LOCK_EXCLUSIVE, 5681 &version, &object, 5682 &new_offset, &prot, &wired, 5683 NULL, 5684 &real_map) != KERN_SUCCESS) { 5685 if (fault_type == VM_PROT_WRITE) { 5686 vm_counters.create_upl_lookup_failure_write++; 5687 } else { 5688 vm_counters.create_upl_lookup_failure_copy++; 5689 } 5690 vm_map_unlock_read(local_map); 5691 return KERN_FAILURE; 5692 } 5693 if (real_map != map) 5694 vm_map_unlock(real_map); 5695 vm_map_unlock_read(local_map); 5696 5697 vm_object_unlock(object); 5698 5699 goto REDISCOVER_ENTRY; 5700 } 5701 5702 if (sync_cow_data) { 5703 if (entry->object.vm_object->shadow || entry->object.vm_object->copy) { 5704 local_object = entry->object.vm_object; 5705 local_start = entry->vme_start; 5706 local_offset = entry->offset; 5707 5708 vm_object_reference(local_object); 5709 vm_map_unlock_read(map); 5710 5711 if (local_object->shadow && local_object->copy) { 5712 vm_object_lock_request( 5713 local_object->shadow, 5714 (vm_object_offset_t) 5715 ((offset - local_start) + 5716 local_offset) + 5717 local_object->vo_shadow_offset, 5718 *upl_size, FALSE, 5719 MEMORY_OBJECT_DATA_SYNC, 5720 VM_PROT_NO_CHANGE); 5721 } 5722 sync_cow_data = FALSE; 5723 vm_object_deallocate(local_object); 5724 5725 goto REDISCOVER_ENTRY; 5726 } 5727 } 5728 if (force_data_sync) { 5729 local_object = entry->object.vm_object; 5730 local_start = entry->vme_start; 5731 local_offset = entry->offset; 5732 5733 vm_object_reference(local_object); 5734 vm_map_unlock_read(map); 5735 5736 vm_object_lock_request( 5737 local_object, 5738 (vm_object_offset_t) 5739 ((offset - local_start) + local_offset), 5740 (vm_object_size_t)*upl_size, FALSE, 5741 MEMORY_OBJECT_DATA_SYNC, 5742 VM_PROT_NO_CHANGE); 5743 5744 force_data_sync = FALSE; 5745 vm_object_deallocate(local_object); 5746 5747 goto REDISCOVER_ENTRY; 5748 } 5749 if (entry->object.vm_object->private) 5750 *flags = UPL_DEV_MEMORY; 5751 else 5752 *flags = 0; 5753 5754 if (entry->object.vm_object->phys_contiguous) 5755 *flags |= UPL_PHYS_CONTIG; 5756 5757 local_object = entry->object.vm_object; 5758 local_offset = entry->offset; 5759 local_start = entry->vme_start; 5760 5761 vm_object_reference(local_object); 5762 vm_map_unlock_read(map); 5763 5764 ret = vm_object_iopl_request(local_object, 5765 (vm_object_offset_t) ((offset - local_start) + local_offset), 5766 *upl_size, 5767 upl, 5768 page_list, 5769 count, 5770 caller_flags); 5771 vm_object_deallocate(local_object); 5772 5773 return(ret); 5774 } 5775 vm_map_unlock_read(map); 5776 5777 return(KERN_FAILURE); 5778} 5779 5780/* 5781 * Internal routine to enter a UPL into a VM map. 5782 * 5783 * JMM - This should just be doable through the standard 5784 * vm_map_enter() API. 5785 */ 5786kern_return_t 5787vm_map_enter_upl( 5788 vm_map_t map, 5789 upl_t upl, 5790 vm_map_offset_t *dst_addr) 5791{ 5792 vm_map_size_t size; 5793 vm_object_offset_t offset; 5794 vm_map_offset_t addr; 5795 vm_page_t m; 5796 kern_return_t kr; 5797 int isVectorUPL = 0, curr_upl=0; 5798 upl_t vector_upl = NULL; 5799 vm_offset_t vector_upl_dst_addr = 0; 5800 vm_map_t vector_upl_submap = NULL; 5801 upl_offset_t subupl_offset = 0; 5802 upl_size_t subupl_size = 0; 5803 5804 if (upl == UPL_NULL) 5805 return KERN_INVALID_ARGUMENT; 5806 5807 if((isVectorUPL = vector_upl_is_valid(upl))) { 5808 int mapped=0,valid_upls=0; 5809 vector_upl = upl; 5810 5811 upl_lock(vector_upl); 5812 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) { 5813 upl = vector_upl_subupl_byindex(vector_upl, curr_upl ); 5814 if(upl == NULL) 5815 continue; 5816 valid_upls++; 5817 if (UPL_PAGE_LIST_MAPPED & upl->flags) 5818 mapped++; 5819 } 5820 5821 if(mapped) { 5822 if(mapped != valid_upls) 5823 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls); 5824 else { 5825 upl_unlock(vector_upl); 5826 return KERN_FAILURE; 5827 } 5828 } 5829 5830 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE, VM_FLAGS_ANYWHERE, &vector_upl_submap); 5831 if( kr != KERN_SUCCESS ) 5832 panic("Vector UPL submap allocation failed\n"); 5833 map = vector_upl_submap; 5834 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr); 5835 curr_upl=0; 5836 } 5837 else 5838 upl_lock(upl); 5839 5840process_upl_to_enter: 5841 if(isVectorUPL){ 5842 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) { 5843 *dst_addr = vector_upl_dst_addr; 5844 upl_unlock(vector_upl); 5845 return KERN_SUCCESS; 5846 } 5847 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ ); 5848 if(upl == NULL) 5849 goto process_upl_to_enter; 5850 5851 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size); 5852 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset); 5853 } else { 5854 /* 5855 * check to see if already mapped 5856 */ 5857 if (UPL_PAGE_LIST_MAPPED & upl->flags) { 5858 upl_unlock(upl); 5859 return KERN_FAILURE; 5860 } 5861 } 5862 if ((!(upl->flags & UPL_SHADOWED)) && 5863 ((upl->flags & UPL_HAS_BUSY) || 5864 !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) { 5865 5866 vm_object_t object; 5867 vm_page_t alias_page; 5868 vm_object_offset_t new_offset; 5869 unsigned int pg_num; 5870 wpl_array_t lite_list; 5871 5872 if (upl->flags & UPL_INTERNAL) { 5873 lite_list = (wpl_array_t) 5874 ((((uintptr_t)upl) + sizeof(struct upl)) 5875 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 5876 } else { 5877 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl)); 5878 } 5879 object = upl->map_object; 5880 upl->map_object = vm_object_allocate(upl->size); 5881 5882 vm_object_lock(upl->map_object); 5883 5884 upl->map_object->shadow = object; 5885 upl->map_object->pageout = TRUE; 5886 upl->map_object->can_persist = FALSE; 5887 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 5888 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset; 5889 upl->map_object->wimg_bits = object->wimg_bits; 5890 offset = upl->map_object->vo_shadow_offset; 5891 new_offset = 0; 5892 size = upl->size; 5893 5894 upl->flags |= UPL_SHADOWED; 5895 5896 while (size) { 5897 pg_num = (unsigned int) (new_offset / PAGE_SIZE); 5898 assert(pg_num == new_offset / PAGE_SIZE); 5899 5900 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 5901 5902 VM_PAGE_GRAB_FICTITIOUS(alias_page); 5903 5904 vm_object_lock(object); 5905 5906 m = vm_page_lookup(object, offset); 5907 if (m == VM_PAGE_NULL) { 5908 panic("vm_upl_map: page missing\n"); 5909 } 5910 5911 /* 5912 * Convert the fictitious page to a private 5913 * shadow of the real page. 5914 */ 5915 assert(alias_page->fictitious); 5916 alias_page->fictitious = FALSE; 5917 alias_page->private = TRUE; 5918 alias_page->pageout = TRUE; 5919 /* 5920 * since m is a page in the upl it must 5921 * already be wired or BUSY, so it's 5922 * safe to assign the underlying physical 5923 * page to the alias 5924 */ 5925 alias_page->phys_page = m->phys_page; 5926 5927 vm_object_unlock(object); 5928 5929 vm_page_lockspin_queues(); 5930 vm_page_wire(alias_page); 5931 vm_page_unlock_queues(); 5932 5933 /* 5934 * ENCRYPTED SWAP: 5935 * The virtual page ("m") has to be wired in some way 5936 * here or its physical page ("m->phys_page") could 5937 * be recycled at any time. 5938 * Assuming this is enforced by the caller, we can't 5939 * get an encrypted page here. Since the encryption 5940 * key depends on the VM page's "pager" object and 5941 * the "paging_offset", we couldn't handle 2 pageable 5942 * VM pages (with different pagers and paging_offsets) 5943 * sharing the same physical page: we could end up 5944 * encrypting with one key (via one VM page) and 5945 * decrypting with another key (via the alias VM page). 5946 */ 5947 ASSERT_PAGE_DECRYPTED(m); 5948 5949 vm_page_insert(alias_page, upl->map_object, new_offset); 5950 5951 assert(!alias_page->wanted); 5952 alias_page->busy = FALSE; 5953 alias_page->absent = FALSE; 5954 } 5955 size -= PAGE_SIZE; 5956 offset += PAGE_SIZE_64; 5957 new_offset += PAGE_SIZE_64; 5958 } 5959 vm_object_unlock(upl->map_object); 5960 } 5961 if (upl->flags & UPL_SHADOWED) 5962 offset = 0; 5963 else 5964 offset = upl->offset - upl->map_object->paging_offset; 5965 5966 size = upl->size; 5967 5968 vm_object_reference(upl->map_object); 5969 5970 if(!isVectorUPL) { 5971 *dst_addr = 0; 5972 /* 5973 * NEED A UPL_MAP ALIAS 5974 */ 5975 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, 5976 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE, 5977 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 5978 5979 if (kr != KERN_SUCCESS) { 5980 upl_unlock(upl); 5981 return(kr); 5982 } 5983 } 5984 else { 5985 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, 5986 VM_FLAGS_FIXED, upl->map_object, offset, FALSE, 5987 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 5988 if(kr) 5989 panic("vm_map_enter failed for a Vector UPL\n"); 5990 } 5991 vm_object_lock(upl->map_object); 5992 5993 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) { 5994 m = vm_page_lookup(upl->map_object, offset); 5995 5996 if (m) { 5997 m->pmapped = TRUE; 5998 5999 /* CODE SIGNING ENFORCEMENT: page has been wpmapped, 6000 * but only in kernel space. If this was on a user map, 6001 * we'd have to set the wpmapped bit. */ 6002 /* m->wpmapped = TRUE; */ 6003 assert(map->pmap == kernel_pmap); 6004 6005 PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE); 6006 } 6007 offset += PAGE_SIZE_64; 6008 } 6009 vm_object_unlock(upl->map_object); 6010 6011 /* 6012 * hold a reference for the mapping 6013 */ 6014 upl->ref_count++; 6015 upl->flags |= UPL_PAGE_LIST_MAPPED; 6016 upl->kaddr = (vm_offset_t) *dst_addr; 6017 assert(upl->kaddr == *dst_addr); 6018 6019 if(isVectorUPL) 6020 goto process_upl_to_enter; 6021 6022 upl_unlock(upl); 6023 6024 return KERN_SUCCESS; 6025} 6026 6027/* 6028 * Internal routine to remove a UPL mapping from a VM map. 6029 * 6030 * XXX - This should just be doable through a standard 6031 * vm_map_remove() operation. Otherwise, implicit clean-up 6032 * of the target map won't be able to correctly remove 6033 * these (and release the reference on the UPL). Having 6034 * to do this means we can't map these into user-space 6035 * maps yet. 6036 */ 6037kern_return_t 6038vm_map_remove_upl( 6039 vm_map_t map, 6040 upl_t upl) 6041{ 6042 vm_address_t addr; 6043 upl_size_t size; 6044 int isVectorUPL = 0, curr_upl = 0; 6045 upl_t vector_upl = NULL; 6046 6047 if (upl == UPL_NULL) 6048 return KERN_INVALID_ARGUMENT; 6049 6050 if((isVectorUPL = vector_upl_is_valid(upl))) { 6051 int unmapped=0, valid_upls=0; 6052 vector_upl = upl; 6053 upl_lock(vector_upl); 6054 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) { 6055 upl = vector_upl_subupl_byindex(vector_upl, curr_upl ); 6056 if(upl == NULL) 6057 continue; 6058 valid_upls++; 6059 if (!(UPL_PAGE_LIST_MAPPED & upl->flags)) 6060 unmapped++; 6061 } 6062 6063 if(unmapped) { 6064 if(unmapped != valid_upls) 6065 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls); 6066 else { 6067 upl_unlock(vector_upl); 6068 return KERN_FAILURE; 6069 } 6070 } 6071 curr_upl=0; 6072 } 6073 else 6074 upl_lock(upl); 6075 6076process_upl_to_remove: 6077 if(isVectorUPL) { 6078 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) { 6079 vm_map_t v_upl_submap; 6080 vm_offset_t v_upl_submap_dst_addr; 6081 vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr); 6082 6083 vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_NO_FLAGS); 6084 vm_map_deallocate(v_upl_submap); 6085 upl_unlock(vector_upl); 6086 return KERN_SUCCESS; 6087 } 6088 6089 upl = vector_upl_subupl_byindex(vector_upl, curr_upl++ ); 6090 if(upl == NULL) 6091 goto process_upl_to_remove; 6092 } 6093 6094 if (upl->flags & UPL_PAGE_LIST_MAPPED) { 6095 addr = upl->kaddr; 6096 size = upl->size; 6097 6098 assert(upl->ref_count > 1); 6099 upl->ref_count--; /* removing mapping ref */ 6100 6101 upl->flags &= ~UPL_PAGE_LIST_MAPPED; 6102 upl->kaddr = (vm_offset_t) 0; 6103 6104 if(!isVectorUPL) { 6105 upl_unlock(upl); 6106 6107 vm_map_remove( 6108 map, 6109 vm_map_trunc_page(addr, 6110 VM_MAP_PAGE_MASK(map)), 6111 vm_map_round_page(addr + size, 6112 VM_MAP_PAGE_MASK(map)), 6113 VM_MAP_NO_FLAGS); 6114 6115 return KERN_SUCCESS; 6116 } 6117 else { 6118 /* 6119 * If it's a Vectored UPL, we'll be removing the entire 6120 * submap anyways, so no need to remove individual UPL 6121 * element mappings from within the submap 6122 */ 6123 goto process_upl_to_remove; 6124 } 6125 } 6126 upl_unlock(upl); 6127 6128 return KERN_FAILURE; 6129} 6130 6131kern_return_t 6132upl_commit_range( 6133 upl_t upl, 6134 upl_offset_t offset, 6135 upl_size_t size, 6136 int flags, 6137 upl_page_info_t *page_list, 6138 mach_msg_type_number_t count, 6139 boolean_t *empty) 6140{ 6141 upl_size_t xfer_size, subupl_size = size; 6142 vm_object_t shadow_object; 6143 vm_object_t object; 6144 vm_object_offset_t target_offset; 6145 upl_offset_t subupl_offset = offset; 6146 int entry; 6147 wpl_array_t lite_list; 6148 int occupied; 6149 int clear_refmod = 0; 6150 int pgpgout_count = 0; 6151 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 6152 struct vm_page_delayed_work *dwp; 6153 int dw_count; 6154 int dw_limit; 6155 int isVectorUPL = 0; 6156 upl_t vector_upl = NULL; 6157 boolean_t should_be_throttled = FALSE; 6158 6159 vm_page_t nxt_page = VM_PAGE_NULL; 6160 int fast_path_possible = 0; 6161 int fast_path_full_commit = 0; 6162 int throttle_page = 0; 6163 int unwired_count = 0; 6164 int local_queue_count = 0; 6165 queue_head_t local_queue; 6166 6167 *empty = FALSE; 6168 6169 if (upl == UPL_NULL) 6170 return KERN_INVALID_ARGUMENT; 6171 6172 if (count == 0) 6173 page_list = NULL; 6174 6175 if((isVectorUPL = vector_upl_is_valid(upl))) { 6176 vector_upl = upl; 6177 upl_lock(vector_upl); 6178 } 6179 else 6180 upl_lock(upl); 6181 6182process_upl_to_commit: 6183 6184 if(isVectorUPL) { 6185 size = subupl_size; 6186 offset = subupl_offset; 6187 if(size == 0) { 6188 upl_unlock(vector_upl); 6189 return KERN_SUCCESS; 6190 } 6191 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size); 6192 if(upl == NULL) { 6193 upl_unlock(vector_upl); 6194 return KERN_FAILURE; 6195 } 6196 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl); 6197 subupl_size -= size; 6198 subupl_offset += size; 6199 } 6200 6201#if UPL_DEBUG 6202 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) { 6203 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES); 6204 6205 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset; 6206 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size); 6207 6208 upl->upl_commit_index++; 6209 } 6210#endif 6211 if (upl->flags & UPL_DEVICE_MEMORY) 6212 xfer_size = 0; 6213 else if ((offset + size) <= upl->size) 6214 xfer_size = size; 6215 else { 6216 if(!isVectorUPL) 6217 upl_unlock(upl); 6218 else { 6219 upl_unlock(vector_upl); 6220 } 6221 return KERN_FAILURE; 6222 } 6223 if (upl->flags & UPL_SET_DIRTY) 6224 flags |= UPL_COMMIT_SET_DIRTY; 6225 if (upl->flags & UPL_CLEAR_DIRTY) 6226 flags |= UPL_COMMIT_CLEAR_DIRTY; 6227 6228 if (upl->flags & UPL_INTERNAL) 6229 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl)) 6230 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 6231 else 6232 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 6233 6234 object = upl->map_object; 6235 6236 if (upl->flags & UPL_SHADOWED) { 6237 vm_object_lock(object); 6238 shadow_object = object->shadow; 6239 } else { 6240 shadow_object = object; 6241 } 6242 entry = offset/PAGE_SIZE; 6243 target_offset = (vm_object_offset_t)offset; 6244 6245 if (upl->flags & UPL_KERNEL_OBJECT) 6246 vm_object_lock_shared(shadow_object); 6247 else 6248 vm_object_lock(shadow_object); 6249 6250 if (upl->flags & UPL_ACCESS_BLOCKED) { 6251 assert(shadow_object->blocked_access); 6252 shadow_object->blocked_access = FALSE; 6253 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED); 6254 } 6255 6256 if (shadow_object->code_signed) { 6257 /* 6258 * CODE SIGNING: 6259 * If the object is code-signed, do not let this UPL tell 6260 * us if the pages are valid or not. Let the pages be 6261 * validated by VM the normal way (when they get mapped or 6262 * copied). 6263 */ 6264 flags &= ~UPL_COMMIT_CS_VALIDATED; 6265 } 6266 if (! page_list) { 6267 /* 6268 * No page list to get the code-signing info from !? 6269 */ 6270 flags &= ~UPL_COMMIT_CS_VALIDATED; 6271 } 6272 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal) 6273 should_be_throttled = TRUE; 6274 6275 dwp = &dw_array[0]; 6276 dw_count = 0; 6277 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 6278 6279 if ((upl->flags & UPL_IO_WIRE) && 6280 !(flags & UPL_COMMIT_FREE_ABSENT) && 6281 !isVectorUPL && 6282 shadow_object->purgable != VM_PURGABLE_VOLATILE && 6283 shadow_object->purgable != VM_PURGABLE_EMPTY) { 6284 6285 if (!queue_empty(&shadow_object->memq)) { 6286 queue_init(&local_queue); 6287 if (size == shadow_object->vo_size) { 6288 nxt_page = (vm_page_t)queue_first(&shadow_object->memq); 6289 fast_path_full_commit = 1; 6290 } 6291 fast_path_possible = 1; 6292 6293 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && shadow_object->internal && 6294 (shadow_object->purgable == VM_PURGABLE_DENY || 6295 shadow_object->purgable == VM_PURGABLE_NONVOLATILE || 6296 shadow_object->purgable == VM_PURGABLE_VOLATILE)) { 6297 throttle_page = 1; 6298 } 6299 } 6300 } 6301 6302 while (xfer_size) { 6303 vm_page_t t, m; 6304 6305 dwp->dw_mask = 0; 6306 clear_refmod = 0; 6307 6308 m = VM_PAGE_NULL; 6309 6310 if (upl->flags & UPL_LITE) { 6311 unsigned int pg_num; 6312 6313 if (nxt_page != VM_PAGE_NULL) { 6314 m = nxt_page; 6315 nxt_page = (vm_page_t)queue_next(&nxt_page->listq); 6316 target_offset = m->offset; 6317 } 6318 pg_num = (unsigned int) (target_offset/PAGE_SIZE); 6319 assert(pg_num == target_offset/PAGE_SIZE); 6320 6321 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 6322 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); 6323 6324 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL) 6325 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset)); 6326 } else 6327 m = NULL; 6328 } 6329 if (upl->flags & UPL_SHADOWED) { 6330 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) { 6331 6332 t->pageout = FALSE; 6333 6334 VM_PAGE_FREE(t); 6335 6336 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL) 6337 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset); 6338 } 6339 } 6340 if (m == VM_PAGE_NULL) 6341 goto commit_next_page; 6342 6343 if (m->compressor) { 6344 assert(m->busy); 6345 6346 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 6347 goto commit_next_page; 6348 } 6349 6350 if (flags & UPL_COMMIT_CS_VALIDATED) { 6351 /* 6352 * CODE SIGNING: 6353 * Set the code signing bits according to 6354 * what the UPL says they should be. 6355 */ 6356 m->cs_validated = page_list[entry].cs_validated; 6357 m->cs_tainted = page_list[entry].cs_tainted; 6358 } 6359 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL) 6360 m->written_by_kernel = TRUE; 6361 6362 if (upl->flags & UPL_IO_WIRE) { 6363 6364 if (page_list) 6365 page_list[entry].phys_addr = 0; 6366 6367 if (flags & UPL_COMMIT_SET_DIRTY) { 6368 SET_PAGE_DIRTY(m, FALSE); 6369 } else if (flags & UPL_COMMIT_CLEAR_DIRTY) { 6370 m->dirty = FALSE; 6371 6372 if (! (flags & UPL_COMMIT_CS_VALIDATED) && 6373 m->cs_validated && !m->cs_tainted) { 6374 /* 6375 * CODE SIGNING: 6376 * This page is no longer dirty 6377 * but could have been modified, 6378 * so it will need to be 6379 * re-validated. 6380 */ 6381 if (m->slid) { 6382 panic("upl_commit_range(%p): page %p was slid\n", 6383 upl, m); 6384 } 6385 assert(!m->slid); 6386 m->cs_validated = FALSE; 6387#if DEVELOPMENT || DEBUG 6388 vm_cs_validated_resets++; 6389#endif 6390 pmap_disconnect(m->phys_page); 6391 } 6392 clear_refmod |= VM_MEM_MODIFIED; 6393 } 6394 if (upl->flags & UPL_ACCESS_BLOCKED) { 6395 /* 6396 * We blocked access to the pages in this UPL. 6397 * Clear the "busy" bit and wake up any waiter 6398 * for this page. 6399 */ 6400 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 6401 } 6402 if (fast_path_possible) { 6403 assert(m->object->purgable != VM_PURGABLE_EMPTY); 6404 assert(m->object->purgable != VM_PURGABLE_VOLATILE); 6405 if (m->absent) { 6406 assert(m->wire_count == 0); 6407 assert(m->busy); 6408 6409 m->absent = FALSE; 6410 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 6411 } else { 6412 if (m->wire_count == 0) 6413 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object); 6414 6415 /* 6416 * XXX FBDP need to update some other 6417 * counters here (purgeable_wired_count) 6418 * (ledgers), ... 6419 */ 6420 assert(m->wire_count); 6421 m->wire_count--; 6422 6423 if (m->wire_count == 0) 6424 unwired_count++; 6425 } 6426 if (m->wire_count == 0) { 6427 queue_enter(&local_queue, m, vm_page_t, pageq); 6428 local_queue_count++; 6429 6430 if (throttle_page) { 6431 m->throttled = TRUE; 6432 } else { 6433 if (flags & UPL_COMMIT_INACTIVATE) 6434 m->inactive = TRUE; 6435 else 6436 m->active = TRUE; 6437 } 6438 } 6439 } else { 6440 if (flags & UPL_COMMIT_INACTIVATE) { 6441 dwp->dw_mask |= DW_vm_page_deactivate_internal; 6442 clear_refmod |= VM_MEM_REFERENCED; 6443 } 6444 if (m->absent) { 6445 if (flags & UPL_COMMIT_FREE_ABSENT) 6446 dwp->dw_mask |= DW_vm_page_free; 6447 else { 6448 m->absent = FALSE; 6449 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 6450 6451 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal)) 6452 dwp->dw_mask |= DW_vm_page_activate; 6453 } 6454 } else 6455 dwp->dw_mask |= DW_vm_page_unwire; 6456 } 6457 goto commit_next_page; 6458 } 6459 assert(!m->compressor); 6460 6461 if (page_list) 6462 page_list[entry].phys_addr = 0; 6463 6464 /* 6465 * make sure to clear the hardware 6466 * modify or reference bits before 6467 * releasing the BUSY bit on this page 6468 * otherwise we risk losing a legitimate 6469 * change of state 6470 */ 6471 if (flags & UPL_COMMIT_CLEAR_DIRTY) { 6472 m->dirty = FALSE; 6473 6474 clear_refmod |= VM_MEM_MODIFIED; 6475 } 6476 if (m->laundry) 6477 dwp->dw_mask |= DW_vm_pageout_throttle_up; 6478 6479 if (VM_PAGE_WIRED(m)) 6480 m->pageout = FALSE; 6481 6482 if (! (flags & UPL_COMMIT_CS_VALIDATED) && 6483 m->cs_validated && !m->cs_tainted) { 6484 /* 6485 * CODE SIGNING: 6486 * This page is no longer dirty 6487 * but could have been modified, 6488 * so it will need to be 6489 * re-validated. 6490 */ 6491 if (m->slid) { 6492 panic("upl_commit_range(%p): page %p was slid\n", 6493 upl, m); 6494 } 6495 assert(!m->slid); 6496 m->cs_validated = FALSE; 6497#if DEVELOPMENT || DEBUG 6498 vm_cs_validated_resets++; 6499#endif 6500 pmap_disconnect(m->phys_page); 6501 } 6502 if (m->overwriting) { 6503 /* 6504 * the (COPY_OUT_FROM == FALSE) request_page_list case 6505 */ 6506 if (m->busy) { 6507#if CONFIG_PHANTOM_CACHE 6508 if (m->absent && !m->object->internal) 6509 dwp->dw_mask |= DW_vm_phantom_cache_update; 6510#endif 6511 m->absent = FALSE; 6512 6513 dwp->dw_mask |= DW_clear_busy; 6514 } else { 6515 /* 6516 * alternate (COPY_OUT_FROM == FALSE) page_list case 6517 * Occurs when the original page was wired 6518 * at the time of the list request 6519 */ 6520 assert(VM_PAGE_WIRED(m)); 6521 6522 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */ 6523 } 6524 m->overwriting = FALSE; 6525 } 6526 if (m->encrypted_cleaning == TRUE) { 6527 m->encrypted_cleaning = FALSE; 6528 6529 dwp->dw_mask |= DW_clear_busy | DW_PAGE_WAKEUP; 6530 } 6531 m->cleaning = FALSE; 6532 6533 if (m->pageout) { 6534 /* 6535 * With the clean queue enabled, UPL_PAGEOUT should 6536 * no longer set the pageout bit. It's pages now go 6537 * to the clean queue. 6538 */ 6539 assert(!(flags & UPL_PAGEOUT)); 6540 6541 m->pageout = FALSE; 6542#if MACH_CLUSTER_STATS 6543 if (m->wanted) vm_pageout_target_collisions++; 6544#endif 6545 if ((flags & UPL_COMMIT_SET_DIRTY) || 6546 (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))) { 6547 /* 6548 * page was re-dirtied after we started 6549 * the pageout... reactivate it since 6550 * we don't know whether the on-disk 6551 * copy matches what is now in memory 6552 */ 6553 SET_PAGE_DIRTY(m, FALSE); 6554 6555 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP; 6556 6557 if (upl->flags & UPL_PAGEOUT) { 6558 CLUSTER_STAT(vm_pageout_target_page_dirtied++;) 6559 VM_STAT_INCR(reactivations); 6560 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL); 6561 } 6562 } else { 6563 /* 6564 * page has been successfully cleaned 6565 * go ahead and free it for other use 6566 */ 6567 if (m->object->internal) { 6568 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL); 6569 } else { 6570 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL); 6571 } 6572 m->dirty = FALSE; 6573 m->busy = TRUE; 6574 6575 dwp->dw_mask |= DW_vm_page_free; 6576 } 6577 goto commit_next_page; 6578 } 6579#if MACH_CLUSTER_STATS 6580 if (m->wpmapped) 6581 m->dirty = pmap_is_modified(m->phys_page); 6582 6583 if (m->dirty) vm_pageout_cluster_dirtied++; 6584 else vm_pageout_cluster_cleaned++; 6585 if (m->wanted) vm_pageout_cluster_collisions++; 6586#endif 6587 /* 6588 * It is a part of the semantic of COPYOUT_FROM 6589 * UPLs that a commit implies cache sync 6590 * between the vm page and the backing store 6591 * this can be used to strip the precious bit 6592 * as well as clean 6593 */ 6594 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS)) 6595 m->precious = FALSE; 6596 6597 if (flags & UPL_COMMIT_SET_DIRTY) { 6598 SET_PAGE_DIRTY(m, FALSE); 6599 } else { 6600 m->dirty = FALSE; 6601 } 6602 6603 /* with the clean queue on, move *all* cleaned pages to the clean queue */ 6604 if (hibernate_cleaning_in_progress == FALSE && !m->dirty && (upl->flags & UPL_PAGEOUT)) { 6605 pgpgout_count++; 6606 6607 VM_STAT_INCR(pageouts); 6608 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL); 6609 6610 dwp->dw_mask |= DW_enqueue_cleaned; 6611 vm_pageout_enqueued_cleaned_from_inactive_dirty++; 6612 } else if (should_be_throttled == TRUE && !m->active && !m->inactive && !m->speculative && !m->throttled) { 6613 /* 6614 * page coming back in from being 'frozen'... 6615 * it was dirty before it was frozen, so keep it so 6616 * the vm_page_activate will notice that it really belongs 6617 * on the throttle queue and put it there 6618 */ 6619 SET_PAGE_DIRTY(m, FALSE); 6620 dwp->dw_mask |= DW_vm_page_activate; 6621 6622 } else { 6623 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) { 6624 dwp->dw_mask |= DW_vm_page_deactivate_internal; 6625 clear_refmod |= VM_MEM_REFERENCED; 6626 } else if (!m->active && !m->inactive && !m->speculative) { 6627 6628 if (m->clustered || (flags & UPL_COMMIT_SPECULATE)) 6629 dwp->dw_mask |= DW_vm_page_speculate; 6630 else if (m->reference) 6631 dwp->dw_mask |= DW_vm_page_activate; 6632 else { 6633 dwp->dw_mask |= DW_vm_page_deactivate_internal; 6634 clear_refmod |= VM_MEM_REFERENCED; 6635 } 6636 } 6637 } 6638 if (upl->flags & UPL_ACCESS_BLOCKED) { 6639 /* 6640 * We blocked access to the pages in this URL. 6641 * Clear the "busy" bit on this page before we 6642 * wake up any waiter. 6643 */ 6644 dwp->dw_mask |= DW_clear_busy; 6645 } 6646 /* 6647 * Wakeup any thread waiting for the page to be un-cleaning. 6648 */ 6649 dwp->dw_mask |= DW_PAGE_WAKEUP; 6650 6651commit_next_page: 6652 if (clear_refmod) 6653 pmap_clear_refmod(m->phys_page, clear_refmod); 6654 6655 target_offset += PAGE_SIZE_64; 6656 xfer_size -= PAGE_SIZE; 6657 entry++; 6658 6659 if (dwp->dw_mask) { 6660 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) { 6661 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); 6662 6663 if (dw_count >= dw_limit) { 6664 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 6665 6666 dwp = &dw_array[0]; 6667 dw_count = 0; 6668 } 6669 } else { 6670 if (dwp->dw_mask & DW_clear_busy) 6671 m->busy = FALSE; 6672 6673 if (dwp->dw_mask & DW_PAGE_WAKEUP) 6674 PAGE_WAKEUP(m); 6675 } 6676 } 6677 } 6678 if (dw_count) 6679 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 6680 6681 if (fast_path_possible) { 6682 6683 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE); 6684 assert(shadow_object->purgable != VM_PURGABLE_EMPTY); 6685 6686 if (local_queue_count || unwired_count) { 6687 6688 if (local_queue_count) { 6689 vm_page_t first_local, last_local; 6690 vm_page_t first_target; 6691 queue_head_t *target_queue; 6692 6693 if (throttle_page) 6694 target_queue = &vm_page_queue_throttled; 6695 else { 6696 if (flags & UPL_COMMIT_INACTIVATE) { 6697 if (shadow_object->internal) 6698 target_queue = &vm_page_queue_anonymous; 6699 else 6700 target_queue = &vm_page_queue_inactive; 6701 } else 6702 target_queue = &vm_page_queue_active; 6703 } 6704 /* 6705 * Transfer the entire local queue to a regular LRU page queues. 6706 */ 6707 first_local = (vm_page_t) queue_first(&local_queue); 6708 last_local = (vm_page_t) queue_last(&local_queue); 6709 6710 vm_page_lockspin_queues(); 6711 6712 first_target = (vm_page_t) queue_first(target_queue); 6713 6714 if (queue_empty(target_queue)) 6715 queue_last(target_queue) = (queue_entry_t) last_local; 6716 else 6717 queue_prev(&first_target->pageq) = (queue_entry_t) last_local; 6718 6719 queue_first(target_queue) = (queue_entry_t) first_local; 6720 queue_prev(&first_local->pageq) = (queue_entry_t) target_queue; 6721 queue_next(&last_local->pageq) = (queue_entry_t) first_target; 6722 6723 /* 6724 * Adjust the global page counts. 6725 */ 6726 if (throttle_page) { 6727 vm_page_throttled_count += local_queue_count; 6728 } else { 6729 if (flags & UPL_COMMIT_INACTIVATE) { 6730 if (shadow_object->internal) 6731 vm_page_anonymous_count += local_queue_count; 6732 vm_page_inactive_count += local_queue_count; 6733 6734 token_new_pagecount += local_queue_count; 6735 } else 6736 vm_page_active_count += local_queue_count; 6737 6738 if (shadow_object->internal) 6739 vm_page_pageable_internal_count += local_queue_count; 6740 else 6741 vm_page_pageable_external_count += local_queue_count; 6742 } 6743 } else { 6744 vm_page_lockspin_queues(); 6745 } 6746 if (unwired_count) { 6747 vm_page_wire_count -= unwired_count; 6748 VM_CHECK_MEMORYSTATUS; 6749 } 6750 vm_page_unlock_queues(); 6751 6752 shadow_object->wired_page_count -= unwired_count; 6753 } 6754 } 6755 occupied = 1; 6756 6757 if (upl->flags & UPL_DEVICE_MEMORY) { 6758 occupied = 0; 6759 } else if (upl->flags & UPL_LITE) { 6760 int pg_num; 6761 int i; 6762 6763 occupied = 0; 6764 6765 if (!fast_path_full_commit) { 6766 pg_num = upl->size/PAGE_SIZE; 6767 pg_num = (pg_num + 31) >> 5; 6768 6769 for (i = 0; i < pg_num; i++) { 6770 if (lite_list[i] != 0) { 6771 occupied = 1; 6772 break; 6773 } 6774 } 6775 } 6776 } else { 6777 if (queue_empty(&upl->map_object->memq)) 6778 occupied = 0; 6779 } 6780 if (occupied == 0) { 6781 /* 6782 * If this UPL element belongs to a Vector UPL and is 6783 * empty, then this is the right function to deallocate 6784 * it. So go ahead set the *empty variable. The flag 6785 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view 6786 * should be considered relevant for the Vector UPL and not 6787 * the internal UPLs. 6788 */ 6789 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL) 6790 *empty = TRUE; 6791 6792 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) { 6793 /* 6794 * this is not a paging object 6795 * so we need to drop the paging reference 6796 * that was taken when we created the UPL 6797 * against this object 6798 */ 6799 vm_object_activity_end(shadow_object); 6800 vm_object_collapse(shadow_object, 0, TRUE); 6801 } else { 6802 /* 6803 * we dontated the paging reference to 6804 * the map object... vm_pageout_object_terminate 6805 * will drop this reference 6806 */ 6807 } 6808 } 6809 vm_object_unlock(shadow_object); 6810 if (object != shadow_object) 6811 vm_object_unlock(object); 6812 6813 if(!isVectorUPL) 6814 upl_unlock(upl); 6815 else { 6816 /* 6817 * If we completed our operations on an UPL that is 6818 * part of a Vectored UPL and if empty is TRUE, then 6819 * we should go ahead and deallocate this UPL element. 6820 * Then we check if this was the last of the UPL elements 6821 * within that Vectored UPL. If so, set empty to TRUE 6822 * so that in ubc_upl_commit_range or ubc_upl_commit, we 6823 * can go ahead and deallocate the Vector UPL too. 6824 */ 6825 if(*empty==TRUE) { 6826 *empty = vector_upl_set_subupl(vector_upl, upl, 0); 6827 upl_deallocate(upl); 6828 } 6829 goto process_upl_to_commit; 6830 } 6831 6832 if (pgpgout_count) { 6833 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL); 6834 } 6835 6836 return KERN_SUCCESS; 6837} 6838 6839kern_return_t 6840upl_abort_range( 6841 upl_t upl, 6842 upl_offset_t offset, 6843 upl_size_t size, 6844 int error, 6845 boolean_t *empty) 6846{ 6847 upl_page_info_t *user_page_list = NULL; 6848 upl_size_t xfer_size, subupl_size = size; 6849 vm_object_t shadow_object; 6850 vm_object_t object; 6851 vm_object_offset_t target_offset; 6852 upl_offset_t subupl_offset = offset; 6853 int entry; 6854 wpl_array_t lite_list; 6855 int occupied; 6856 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 6857 struct vm_page_delayed_work *dwp; 6858 int dw_count; 6859 int dw_limit; 6860 int isVectorUPL = 0; 6861 upl_t vector_upl = NULL; 6862 6863 *empty = FALSE; 6864 6865 if (upl == UPL_NULL) 6866 return KERN_INVALID_ARGUMENT; 6867 6868 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) ) 6869 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty); 6870 6871 if((isVectorUPL = vector_upl_is_valid(upl))) { 6872 vector_upl = upl; 6873 upl_lock(vector_upl); 6874 } 6875 else 6876 upl_lock(upl); 6877 6878process_upl_to_abort: 6879 if(isVectorUPL) { 6880 size = subupl_size; 6881 offset = subupl_offset; 6882 if(size == 0) { 6883 upl_unlock(vector_upl); 6884 return KERN_SUCCESS; 6885 } 6886 upl = vector_upl_subupl_byoffset(vector_upl, &offset, &size); 6887 if(upl == NULL) { 6888 upl_unlock(vector_upl); 6889 return KERN_FAILURE; 6890 } 6891 subupl_size -= size; 6892 subupl_offset += size; 6893 } 6894 6895 *empty = FALSE; 6896 6897#if UPL_DEBUG 6898 if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) { 6899 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES); 6900 6901 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset; 6902 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size); 6903 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1; 6904 6905 upl->upl_commit_index++; 6906 } 6907#endif 6908 if (upl->flags & UPL_DEVICE_MEMORY) 6909 xfer_size = 0; 6910 else if ((offset + size) <= upl->size) 6911 xfer_size = size; 6912 else { 6913 if(!isVectorUPL) 6914 upl_unlock(upl); 6915 else { 6916 upl_unlock(vector_upl); 6917 } 6918 6919 return KERN_FAILURE; 6920 } 6921 if (upl->flags & UPL_INTERNAL) { 6922 lite_list = (wpl_array_t) 6923 ((((uintptr_t)upl) + sizeof(struct upl)) 6924 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 6925 6926 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 6927 } else { 6928 lite_list = (wpl_array_t) 6929 (((uintptr_t)upl) + sizeof(struct upl)); 6930 } 6931 object = upl->map_object; 6932 6933 if (upl->flags & UPL_SHADOWED) { 6934 vm_object_lock(object); 6935 shadow_object = object->shadow; 6936 } else 6937 shadow_object = object; 6938 6939 entry = offset/PAGE_SIZE; 6940 target_offset = (vm_object_offset_t)offset; 6941 6942 if (upl->flags & UPL_KERNEL_OBJECT) 6943 vm_object_lock_shared(shadow_object); 6944 else 6945 vm_object_lock(shadow_object); 6946 6947 if (upl->flags & UPL_ACCESS_BLOCKED) { 6948 assert(shadow_object->blocked_access); 6949 shadow_object->blocked_access = FALSE; 6950 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED); 6951 } 6952 6953 dwp = &dw_array[0]; 6954 dw_count = 0; 6955 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 6956 6957 if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT)) 6958 panic("upl_abort_range: kernel_object being DUMPED"); 6959 6960 while (xfer_size) { 6961 vm_page_t t, m; 6962 unsigned int pg_num; 6963 boolean_t needed; 6964 6965 pg_num = (unsigned int) (target_offset/PAGE_SIZE); 6966 assert(pg_num == target_offset/PAGE_SIZE); 6967 6968 needed = FALSE; 6969 6970 if (user_page_list) 6971 needed = user_page_list[pg_num].needed; 6972 6973 dwp->dw_mask = 0; 6974 m = VM_PAGE_NULL; 6975 6976 if (upl->flags & UPL_LITE) { 6977 6978 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 6979 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); 6980 6981 if ( !(upl->flags & UPL_KERNEL_OBJECT)) 6982 m = vm_page_lookup(shadow_object, target_offset + 6983 (upl->offset - shadow_object->paging_offset)); 6984 } 6985 } 6986 if (upl->flags & UPL_SHADOWED) { 6987 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) { 6988 t->pageout = FALSE; 6989 6990 VM_PAGE_FREE(t); 6991 6992 if (m == VM_PAGE_NULL) 6993 m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset); 6994 } 6995 } 6996 if ((upl->flags & UPL_KERNEL_OBJECT)) 6997 goto abort_next_page; 6998 6999 if (m != VM_PAGE_NULL) { 7000 7001 assert(!m->compressor); 7002 7003 if (m->absent) { 7004 boolean_t must_free = TRUE; 7005 7006 /* 7007 * COPYOUT = FALSE case 7008 * check for error conditions which must 7009 * be passed back to the pages customer 7010 */ 7011 if (error & UPL_ABORT_RESTART) { 7012 m->restart = TRUE; 7013 m->absent = FALSE; 7014 m->unusual = TRUE; 7015 must_free = FALSE; 7016 } else if (error & UPL_ABORT_UNAVAILABLE) { 7017 m->restart = FALSE; 7018 m->unusual = TRUE; 7019 must_free = FALSE; 7020 } else if (error & UPL_ABORT_ERROR) { 7021 m->restart = FALSE; 7022 m->absent = FALSE; 7023 m->error = TRUE; 7024 m->unusual = TRUE; 7025 must_free = FALSE; 7026 } 7027 if (m->clustered && needed == FALSE) { 7028 /* 7029 * This page was a part of a speculative 7030 * read-ahead initiated by the kernel 7031 * itself. No one is expecting this 7032 * page and no one will clean up its 7033 * error state if it ever becomes valid 7034 * in the future. 7035 * We have to free it here. 7036 */ 7037 must_free = TRUE; 7038 } 7039 7040 /* 7041 * ENCRYPTED SWAP: 7042 * If the page was already encrypted, 7043 * we don't really need to decrypt it 7044 * now. It will get decrypted later, 7045 * on demand, as soon as someone needs 7046 * to access its contents. 7047 */ 7048 7049 m->cleaning = FALSE; 7050 m->encrypted_cleaning = FALSE; 7051 7052 if (m->overwriting && !m->busy) { 7053 /* 7054 * this shouldn't happen since 7055 * this is an 'absent' page, but 7056 * it doesn't hurt to check for 7057 * the 'alternate' method of 7058 * stabilizing the page... 7059 * we will mark 'busy' to be cleared 7060 * in the following code which will 7061 * take care of the primary stabilzation 7062 * method (i.e. setting 'busy' to TRUE) 7063 */ 7064 dwp->dw_mask |= DW_vm_page_unwire; 7065 } 7066 m->overwriting = FALSE; 7067 7068 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); 7069 7070 if (must_free == TRUE) 7071 dwp->dw_mask |= DW_vm_page_free; 7072 else 7073 dwp->dw_mask |= DW_vm_page_activate; 7074 } else { 7075 /* 7076 * Handle the trusted pager throttle. 7077 */ 7078 if (m->laundry) 7079 dwp->dw_mask |= DW_vm_pageout_throttle_up; 7080 7081 if (upl->flags & UPL_ACCESS_BLOCKED) { 7082 /* 7083 * We blocked access to the pages in this UPL. 7084 * Clear the "busy" bit and wake up any waiter 7085 * for this page. 7086 */ 7087 dwp->dw_mask |= DW_clear_busy; 7088 } 7089 if (m->overwriting) { 7090 if (m->busy) 7091 dwp->dw_mask |= DW_clear_busy; 7092 else { 7093 /* 7094 * deal with the 'alternate' method 7095 * of stabilizing the page... 7096 * we will either free the page 7097 * or mark 'busy' to be cleared 7098 * in the following code which will 7099 * take care of the primary stabilzation 7100 * method (i.e. setting 'busy' to TRUE) 7101 */ 7102 dwp->dw_mask |= DW_vm_page_unwire; 7103 } 7104 m->overwriting = FALSE; 7105 } 7106 if (m->encrypted_cleaning == TRUE) { 7107 m->encrypted_cleaning = FALSE; 7108 7109 dwp->dw_mask |= DW_clear_busy; 7110 } 7111 m->pageout = FALSE; 7112 m->cleaning = FALSE; 7113#if MACH_PAGEMAP 7114 vm_external_state_clr(m->object->existence_map, m->offset); 7115#endif /* MACH_PAGEMAP */ 7116 if (error & UPL_ABORT_DUMP_PAGES) { 7117 pmap_disconnect(m->phys_page); 7118 7119 dwp->dw_mask |= DW_vm_page_free; 7120 } else { 7121 if (!(dwp->dw_mask & DW_vm_page_unwire)) { 7122 if (error & UPL_ABORT_REFERENCE) { 7123 /* 7124 * we've been told to explictly 7125 * reference this page... for 7126 * file I/O, this is done by 7127 * implementing an LRU on the inactive q 7128 */ 7129 dwp->dw_mask |= DW_vm_page_lru; 7130 7131 } else if (!m->active && !m->inactive && !m->speculative) 7132 dwp->dw_mask |= DW_vm_page_deactivate_internal; 7133 } 7134 dwp->dw_mask |= DW_PAGE_WAKEUP; 7135 } 7136 } 7137 } 7138abort_next_page: 7139 target_offset += PAGE_SIZE_64; 7140 xfer_size -= PAGE_SIZE; 7141 entry++; 7142 7143 if (dwp->dw_mask) { 7144 if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) { 7145 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); 7146 7147 if (dw_count >= dw_limit) { 7148 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 7149 7150 dwp = &dw_array[0]; 7151 dw_count = 0; 7152 } 7153 } else { 7154 if (dwp->dw_mask & DW_clear_busy) 7155 m->busy = FALSE; 7156 7157 if (dwp->dw_mask & DW_PAGE_WAKEUP) 7158 PAGE_WAKEUP(m); 7159 } 7160 } 7161 } 7162 if (dw_count) 7163 vm_page_do_delayed_work(shadow_object, &dw_array[0], dw_count); 7164 7165 occupied = 1; 7166 7167 if (upl->flags & UPL_DEVICE_MEMORY) { 7168 occupied = 0; 7169 } else if (upl->flags & UPL_LITE) { 7170 int pg_num; 7171 int i; 7172 7173 pg_num = upl->size/PAGE_SIZE; 7174 pg_num = (pg_num + 31) >> 5; 7175 occupied = 0; 7176 7177 for (i = 0; i < pg_num; i++) { 7178 if (lite_list[i] != 0) { 7179 occupied = 1; 7180 break; 7181 } 7182 } 7183 } else { 7184 if (queue_empty(&upl->map_object->memq)) 7185 occupied = 0; 7186 } 7187 if (occupied == 0) { 7188 /* 7189 * If this UPL element belongs to a Vector UPL and is 7190 * empty, then this is the right function to deallocate 7191 * it. So go ahead set the *empty variable. The flag 7192 * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view 7193 * should be considered relevant for the Vector UPL and 7194 * not the internal UPLs. 7195 */ 7196 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL) 7197 *empty = TRUE; 7198 7199 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) { 7200 /* 7201 * this is not a paging object 7202 * so we need to drop the paging reference 7203 * that was taken when we created the UPL 7204 * against this object 7205 */ 7206 vm_object_activity_end(shadow_object); 7207 vm_object_collapse(shadow_object, 0, TRUE); 7208 } else { 7209 /* 7210 * we dontated the paging reference to 7211 * the map object... vm_pageout_object_terminate 7212 * will drop this reference 7213 */ 7214 } 7215 } 7216 vm_object_unlock(shadow_object); 7217 if (object != shadow_object) 7218 vm_object_unlock(object); 7219 7220 if(!isVectorUPL) 7221 upl_unlock(upl); 7222 else { 7223 /* 7224 * If we completed our operations on an UPL that is 7225 * part of a Vectored UPL and if empty is TRUE, then 7226 * we should go ahead and deallocate this UPL element. 7227 * Then we check if this was the last of the UPL elements 7228 * within that Vectored UPL. If so, set empty to TRUE 7229 * so that in ubc_upl_abort_range or ubc_upl_abort, we 7230 * can go ahead and deallocate the Vector UPL too. 7231 */ 7232 if(*empty == TRUE) { 7233 *empty = vector_upl_set_subupl(vector_upl, upl,0); 7234 upl_deallocate(upl); 7235 } 7236 goto process_upl_to_abort; 7237 } 7238 7239 return KERN_SUCCESS; 7240} 7241 7242 7243kern_return_t 7244upl_abort( 7245 upl_t upl, 7246 int error) 7247{ 7248 boolean_t empty; 7249 7250 return upl_abort_range(upl, 0, upl->size, error, &empty); 7251} 7252 7253 7254/* an option on commit should be wire */ 7255kern_return_t 7256upl_commit( 7257 upl_t upl, 7258 upl_page_info_t *page_list, 7259 mach_msg_type_number_t count) 7260{ 7261 boolean_t empty; 7262 7263 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty); 7264} 7265 7266 7267void 7268iopl_valid_data( 7269 upl_t upl) 7270{ 7271 vm_object_t object; 7272 vm_offset_t offset; 7273 vm_page_t m, nxt_page = VM_PAGE_NULL; 7274 upl_size_t size; 7275 int wired_count = 0; 7276 7277 if (upl == NULL) 7278 panic("iopl_valid_data: NULL upl"); 7279 if (vector_upl_is_valid(upl)) 7280 panic("iopl_valid_data: vector upl"); 7281 if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE) 7282 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags); 7283 7284 object = upl->map_object; 7285 7286 if (object == kernel_object || object == compressor_object) 7287 panic("iopl_valid_data: object == kernel or compressor"); 7288 7289 if (object->purgable == VM_PURGABLE_VOLATILE) 7290 panic("iopl_valid_data: object == VM_PURGABLE_VOLATILE"); 7291 7292 size = upl->size; 7293 7294 vm_object_lock(object); 7295 7296 if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE)) 7297 nxt_page = (vm_page_t)queue_first(&object->memq); 7298 else 7299 offset = 0 + upl->offset - object->paging_offset; 7300 7301 while (size) { 7302 7303 if (nxt_page != VM_PAGE_NULL) { 7304 m = nxt_page; 7305 nxt_page = (vm_page_t)queue_next(&nxt_page->listq); 7306 } else { 7307 m = vm_page_lookup(object, offset); 7308 offset += PAGE_SIZE; 7309 7310 if (m == VM_PAGE_NULL) 7311 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset); 7312 } 7313 if (m->busy) { 7314 if (!m->absent) 7315 panic("iopl_valid_data: busy page w/o absent"); 7316 7317 if (m->pageq.next || m->pageq.prev) 7318 panic("iopl_valid_data: busy+absent page on page queue"); 7319 7320 m->absent = FALSE; 7321 m->dirty = TRUE; 7322 m->wire_count++; 7323 wired_count++; 7324 7325 PAGE_WAKEUP_DONE(m); 7326 } 7327 size -= PAGE_SIZE; 7328 } 7329 if (wired_count) { 7330 object->wired_page_count += wired_count; 7331 7332 vm_page_lockspin_queues(); 7333 vm_page_wire_count += wired_count; 7334 vm_page_unlock_queues(); 7335 } 7336 vm_object_unlock(object); 7337} 7338 7339 7340 7341 7342void 7343vm_object_set_pmap_cache_attr( 7344 vm_object_t object, 7345 upl_page_info_array_t user_page_list, 7346 unsigned int num_pages, 7347 boolean_t batch_pmap_op) 7348{ 7349 unsigned int cache_attr = 0; 7350 7351 cache_attr = object->wimg_bits & VM_WIMG_MASK; 7352 assert(user_page_list); 7353 if (cache_attr != VM_WIMG_USE_DEFAULT) { 7354 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op); 7355 } 7356} 7357 7358unsigned int vm_object_iopl_request_sleep_for_cleaning = 0; 7359 7360kern_return_t 7361vm_object_iopl_request( 7362 vm_object_t object, 7363 vm_object_offset_t offset, 7364 upl_size_t size, 7365 upl_t *upl_ptr, 7366 upl_page_info_array_t user_page_list, 7367 unsigned int *page_list_count, 7368 int cntrl_flags) 7369{ 7370 vm_page_t dst_page; 7371 vm_object_offset_t dst_offset; 7372 upl_size_t xfer_size; 7373 upl_t upl = NULL; 7374 unsigned int entry; 7375 wpl_array_t lite_list = NULL; 7376 int no_zero_fill = FALSE; 7377 unsigned int size_in_pages; 7378 u_int32_t psize; 7379 kern_return_t ret; 7380 vm_prot_t prot; 7381 struct vm_object_fault_info fault_info; 7382 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 7383 struct vm_page_delayed_work *dwp; 7384 int dw_count; 7385 int dw_limit; 7386 int dw_index; 7387 boolean_t caller_lookup; 7388 int io_tracking_flag = 0; 7389 int interruptible; 7390 7391 boolean_t set_cache_attr_needed = FALSE; 7392 boolean_t free_wired_pages = FALSE; 7393 int fast_path_possible = 0; 7394 7395 7396 if (cntrl_flags & ~UPL_VALID_FLAGS) { 7397 /* 7398 * For forward compatibility's sake, 7399 * reject any unknown flag. 7400 */ 7401 return KERN_INVALID_VALUE; 7402 } 7403 if (vm_lopage_needed == FALSE) 7404 cntrl_flags &= ~UPL_NEED_32BIT_ADDR; 7405 7406 if (cntrl_flags & UPL_NEED_32BIT_ADDR) { 7407 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE)) 7408 return KERN_INVALID_VALUE; 7409 7410 if (object->phys_contiguous) { 7411 if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address) 7412 return KERN_INVALID_ADDRESS; 7413 7414 if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address) 7415 return KERN_INVALID_ADDRESS; 7416 } 7417 } 7418 7419 if (cntrl_flags & UPL_ENCRYPT) { 7420 /* 7421 * ENCRYPTED SWAP: 7422 * The paging path doesn't use this interface, 7423 * so we don't support the UPL_ENCRYPT flag 7424 * here. We won't encrypt the pages. 7425 */ 7426 assert(! (cntrl_flags & UPL_ENCRYPT)); 7427 } 7428 if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO)) 7429 no_zero_fill = TRUE; 7430 7431 if (cntrl_flags & UPL_COPYOUT_FROM) 7432 prot = VM_PROT_READ; 7433 else 7434 prot = VM_PROT_READ | VM_PROT_WRITE; 7435 7436 if ((!object->internal) && (object->paging_offset != 0)) 7437 panic("vm_object_iopl_request: external object with non-zero paging offset\n"); 7438 7439#if CONFIG_IOSCHED || UPL_DEBUG 7440 if ((object->io_tracking && object != kernel_object) || upl_debug_enabled) 7441 io_tracking_flag |= UPL_CREATE_IO_TRACKING; 7442#endif 7443 7444#if CONFIG_IOSCHED 7445 if (object->io_tracking) { 7446 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */ 7447 if (object != kernel_object) 7448 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP; 7449 } 7450#endif 7451 7452 if (object->phys_contiguous) 7453 psize = PAGE_SIZE; 7454 else 7455 psize = size; 7456 7457 if (cntrl_flags & UPL_SET_INTERNAL) { 7458 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize); 7459 7460 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 7461 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) + 7462 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t))); 7463 if (size == 0) { 7464 user_page_list = NULL; 7465 lite_list = NULL; 7466 } 7467 } else { 7468 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize); 7469 7470 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 7471 if (size == 0) { 7472 lite_list = NULL; 7473 } 7474 } 7475 if (user_page_list) 7476 user_page_list[0].device = FALSE; 7477 *upl_ptr = upl; 7478 7479 upl->map_object = object; 7480 upl->size = size; 7481 7482 size_in_pages = size / PAGE_SIZE; 7483 7484 if (object == kernel_object && 7485 !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) { 7486 upl->flags |= UPL_KERNEL_OBJECT; 7487#if UPL_DEBUG 7488 vm_object_lock(object); 7489#else 7490 vm_object_lock_shared(object); 7491#endif 7492 } else { 7493 vm_object_lock(object); 7494 vm_object_activity_begin(object); 7495 } 7496 /* 7497 * paging in progress also protects the paging_offset 7498 */ 7499 upl->offset = offset + object->paging_offset; 7500 7501 if (cntrl_flags & UPL_BLOCK_ACCESS) { 7502 /* 7503 * The user requested that access to the pages in this UPL 7504 * be blocked until the UPL is commited or aborted. 7505 */ 7506 upl->flags |= UPL_ACCESS_BLOCKED; 7507 } 7508 7509 if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) && 7510 object->purgable != VM_PURGABLE_VOLATILE && 7511 object->purgable != VM_PURGABLE_EMPTY && 7512 object->copy == NULL && 7513 size == object->vo_size && 7514 offset == 0 && 7515 object->resident_page_count == 0 && 7516 object->shadow == NULL && 7517 object->pager == NULL) 7518 { 7519 fast_path_possible = 1; 7520 set_cache_attr_needed = TRUE; 7521 } 7522 7523#if CONFIG_IOSCHED || UPL_DEBUG 7524 if (upl->flags & UPL_TRACKED_BY_OBJECT) { 7525 vm_object_activity_begin(object); 7526 queue_enter(&object->uplq, upl, upl_t, uplq); 7527 } 7528#endif 7529 7530 if (object->phys_contiguous) { 7531 7532 if (upl->flags & UPL_ACCESS_BLOCKED) { 7533 assert(!object->blocked_access); 7534 object->blocked_access = TRUE; 7535 } 7536 7537 vm_object_unlock(object); 7538 7539 /* 7540 * don't need any shadow mappings for this one 7541 * since it is already I/O memory 7542 */ 7543 upl->flags |= UPL_DEVICE_MEMORY; 7544 7545 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT); 7546 7547 if (user_page_list) { 7548 user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT); 7549 user_page_list[0].device = TRUE; 7550 } 7551 if (page_list_count != NULL) { 7552 if (upl->flags & UPL_INTERNAL) 7553 *page_list_count = 0; 7554 else 7555 *page_list_count = 1; 7556 } 7557 return KERN_SUCCESS; 7558 } 7559 if (object != kernel_object && object != compressor_object) { 7560 /* 7561 * Protect user space from future COW operations 7562 */ 7563#if VM_OBJECT_TRACKING_OP_TRUESHARE 7564 if (!object->true_share && 7565 vm_object_tracking_inited) { 7566 void *bt[VM_OBJECT_TRACKING_BTDEPTH]; 7567 int num = 0; 7568 7569 num = OSBacktrace(bt, 7570 VM_OBJECT_TRACKING_BTDEPTH); 7571 btlog_add_entry(vm_object_tracking_btlog, 7572 object, 7573 VM_OBJECT_TRACKING_OP_TRUESHARE, 7574 bt, 7575 num); 7576 } 7577#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ 7578 7579 object->true_share = TRUE; 7580 7581 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) 7582 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; 7583 } 7584 7585 if (!(cntrl_flags & UPL_COPYOUT_FROM) && 7586 object->copy != VM_OBJECT_NULL) { 7587 /* 7588 * Honor copy-on-write obligations 7589 * 7590 * The caller is gathering these pages and 7591 * might modify their contents. We need to 7592 * make sure that the copy object has its own 7593 * private copies of these pages before we let 7594 * the caller modify them. 7595 * 7596 * NOTE: someone else could map the original object 7597 * after we've done this copy-on-write here, and they 7598 * could then see an inconsistent picture of the memory 7599 * while it's being modified via the UPL. To prevent this, 7600 * we would have to block access to these pages until the 7601 * UPL is released. We could use the UPL_BLOCK_ACCESS 7602 * code path for that... 7603 */ 7604 vm_object_update(object, 7605 offset, 7606 size, 7607 NULL, 7608 NULL, 7609 FALSE, /* should_return */ 7610 MEMORY_OBJECT_COPY_SYNC, 7611 VM_PROT_NO_CHANGE); 7612#if DEVELOPMENT || DEBUG 7613 iopl_cow++; 7614 iopl_cow_pages += size >> PAGE_SHIFT; 7615#endif 7616 } 7617 if (cntrl_flags & UPL_SET_INTERRUPTIBLE) 7618 interruptible = THREAD_ABORTSAFE; 7619 else 7620 interruptible = THREAD_UNINT; 7621 7622 entry = 0; 7623 7624 xfer_size = size; 7625 dst_offset = offset; 7626 dw_count = 0; 7627 7628 if (fast_path_possible) { 7629 int wired_count = 0; 7630 7631 while (xfer_size) { 7632 7633 while ( (dst_page = vm_page_grab()) == VM_PAGE_NULL) { 7634 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); 7635 7636 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); 7637 7638 if (vm_page_wait(interruptible) == FALSE) { 7639 /* 7640 * interrupted case 7641 */ 7642 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 7643 7644 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1); 7645 7646 if (wired_count) { 7647 vm_page_lockspin_queues(); 7648 vm_page_wire_count += wired_count; 7649 vm_page_unlock_queues(); 7650 7651 free_wired_pages = TRUE; 7652 } 7653 ret = MACH_SEND_INTERRUPTED; 7654 7655 goto return_err; 7656 } 7657 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 7658 7659 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); 7660 } 7661 if (no_zero_fill == FALSE) 7662 vm_page_zero_fill(dst_page); 7663 else 7664 dst_page->absent = TRUE; 7665 7666 dst_page->reference = TRUE; 7667 7668 if (!(cntrl_flags & UPL_COPYOUT_FROM)) { 7669 SET_PAGE_DIRTY(dst_page, FALSE); 7670 } 7671 if (dst_page->absent == FALSE) { 7672 assert(object->purgable != VM_PURGABLE_VOLATILE); 7673 assert(object->purgable != VM_PURGABLE_EMPTY); 7674 dst_page->wire_count++; 7675 wired_count++; 7676 7677 PAGE_WAKEUP_DONE(dst_page); 7678 } 7679 vm_page_insert_internal(dst_page, object, dst_offset, FALSE, TRUE, TRUE); 7680 7681 lite_list[entry>>5] |= 1 << (entry & 31); 7682 7683 if (dst_page->phys_page > upl->highest_page) 7684 upl->highest_page = dst_page->phys_page; 7685 7686 if (user_page_list) { 7687 user_page_list[entry].phys_addr = dst_page->phys_page; 7688 user_page_list[entry].absent = dst_page->absent; 7689 user_page_list[entry].dirty = dst_page->dirty; 7690 user_page_list[entry].precious = FALSE; 7691 user_page_list[entry].pageout = FALSE; 7692 user_page_list[entry].device = FALSE; 7693 user_page_list[entry].needed = FALSE; 7694 user_page_list[entry].speculative = FALSE; 7695 user_page_list[entry].cs_validated = FALSE; 7696 user_page_list[entry].cs_tainted = FALSE; 7697 } 7698 entry++; 7699 dst_offset += PAGE_SIZE_64; 7700 xfer_size -= PAGE_SIZE; 7701 size_in_pages--; 7702 } 7703 if (wired_count) { 7704 vm_page_lockspin_queues(); 7705 vm_page_wire_count += wired_count; 7706 vm_page_unlock_queues(); 7707 } 7708 goto finish; 7709 } 7710 7711 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; 7712 fault_info.user_tag = 0; 7713 fault_info.lo_offset = offset; 7714 fault_info.hi_offset = offset + xfer_size; 7715 fault_info.no_cache = FALSE; 7716 fault_info.stealth = FALSE; 7717 fault_info.io_sync = FALSE; 7718 fault_info.cs_bypass = FALSE; 7719 fault_info.mark_zf_absent = TRUE; 7720 fault_info.interruptible = interruptible; 7721 fault_info.batch_pmap_op = TRUE; 7722 7723 dwp = &dw_array[0]; 7724 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 7725 7726 while (xfer_size) { 7727 vm_fault_return_t result; 7728 unsigned int pg_num; 7729 7730 dwp->dw_mask = 0; 7731 7732 dst_page = vm_page_lookup(object, dst_offset); 7733 7734 /* 7735 * ENCRYPTED SWAP: 7736 * If the page is encrypted, we need to decrypt it, 7737 * so force a soft page fault. 7738 */ 7739 if (dst_page == VM_PAGE_NULL || 7740 dst_page->busy || 7741 dst_page->encrypted || 7742 dst_page->error || 7743 dst_page->restart || 7744 dst_page->absent || 7745 dst_page->fictitious) { 7746 7747 if (object == kernel_object) 7748 panic("vm_object_iopl_request: missing/bad page in kernel object\n"); 7749 if (object == compressor_object) 7750 panic("vm_object_iopl_request: missing/bad page in compressor object\n"); 7751 7752 if (cntrl_flags & UPL_REQUEST_NO_FAULT) { 7753 ret = KERN_MEMORY_ERROR; 7754 goto return_err; 7755 } 7756 set_cache_attr_needed = TRUE; 7757 7758 /* 7759 * We just looked up the page and the result remains valid 7760 * until the object lock is release, so send it to 7761 * vm_fault_page() (as "dst_page"), to avoid having to 7762 * look it up again there. 7763 */ 7764 caller_lookup = TRUE; 7765 7766 do { 7767 vm_page_t top_page; 7768 kern_return_t error_code; 7769 7770 fault_info.cluster_size = xfer_size; 7771 7772 vm_object_paging_begin(object); 7773 7774 result = vm_fault_page(object, dst_offset, 7775 prot | VM_PROT_WRITE, FALSE, 7776 caller_lookup, 7777 &prot, &dst_page, &top_page, 7778 (int *)0, 7779 &error_code, no_zero_fill, 7780 FALSE, &fault_info); 7781 7782 /* our lookup is no longer valid at this point */ 7783 caller_lookup = FALSE; 7784 7785 switch (result) { 7786 7787 case VM_FAULT_SUCCESS: 7788 7789 if ( !dst_page->absent) { 7790 PAGE_WAKEUP_DONE(dst_page); 7791 } else { 7792 /* 7793 * we only get back an absent page if we 7794 * requested that it not be zero-filled 7795 * because we are about to fill it via I/O 7796 * 7797 * absent pages should be left BUSY 7798 * to prevent them from being faulted 7799 * into an address space before we've 7800 * had a chance to complete the I/O on 7801 * them since they may contain info that 7802 * shouldn't be seen by the faulting task 7803 */ 7804 } 7805 /* 7806 * Release paging references and 7807 * top-level placeholder page, if any. 7808 */ 7809 if (top_page != VM_PAGE_NULL) { 7810 vm_object_t local_object; 7811 7812 local_object = top_page->object; 7813 7814 if (top_page->object != dst_page->object) { 7815 vm_object_lock(local_object); 7816 VM_PAGE_FREE(top_page); 7817 vm_object_paging_end(local_object); 7818 vm_object_unlock(local_object); 7819 } else { 7820 VM_PAGE_FREE(top_page); 7821 vm_object_paging_end(local_object); 7822 } 7823 } 7824 vm_object_paging_end(object); 7825 break; 7826 7827 case VM_FAULT_RETRY: 7828 vm_object_lock(object); 7829 break; 7830 7831 case VM_FAULT_MEMORY_SHORTAGE: 7832 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages); 7833 7834 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0); 7835 7836 if (vm_page_wait(interruptible)) { 7837 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 7838 7839 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0); 7840 vm_object_lock(object); 7841 7842 break; 7843 } 7844 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages); 7845 7846 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1); 7847 7848 /* fall thru */ 7849 7850 case VM_FAULT_INTERRUPTED: 7851 error_code = MACH_SEND_INTERRUPTED; 7852 case VM_FAULT_MEMORY_ERROR: 7853 memory_error: 7854 ret = (error_code ? error_code: KERN_MEMORY_ERROR); 7855 7856 vm_object_lock(object); 7857 goto return_err; 7858 7859 case VM_FAULT_SUCCESS_NO_VM_PAGE: 7860 /* success but no page: fail */ 7861 vm_object_paging_end(object); 7862 vm_object_unlock(object); 7863 goto memory_error; 7864 7865 default: 7866 panic("vm_object_iopl_request: unexpected error" 7867 " 0x%x from vm_fault_page()\n", result); 7868 } 7869 } while (result != VM_FAULT_SUCCESS); 7870 7871 } 7872 if (upl->flags & UPL_KERNEL_OBJECT) 7873 goto record_phys_addr; 7874 7875 if (dst_page->compressor) { 7876 dst_page->busy = TRUE; 7877 goto record_phys_addr; 7878 } 7879 7880 if (dst_page->cleaning) { 7881 /* 7882 * Someone else is cleaning this page in place. 7883 * In theory, we should be able to proceed and use this 7884 * page but they'll probably end up clearing the "busy" 7885 * bit on it in upl_commit_range() but they didn't set 7886 * it, so they would clear our "busy" bit and open 7887 * us to race conditions. 7888 * We'd better wait for the cleaning to complete and 7889 * then try again. 7890 */ 7891 vm_object_iopl_request_sleep_for_cleaning++; 7892 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 7893 continue; 7894 } 7895 if (dst_page->laundry) { 7896 dst_page->pageout = FALSE; 7897 7898 vm_pageout_steal_laundry(dst_page, FALSE); 7899 } 7900 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) && 7901 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) { 7902 vm_page_t low_page; 7903 int refmod; 7904 7905 /* 7906 * support devices that can't DMA above 32 bits 7907 * by substituting pages from a pool of low address 7908 * memory for any pages we find above the 4G mark 7909 * can't substitute if the page is already wired because 7910 * we don't know whether that physical address has been 7911 * handed out to some other 64 bit capable DMA device to use 7912 */ 7913 if (VM_PAGE_WIRED(dst_page)) { 7914 ret = KERN_PROTECTION_FAILURE; 7915 goto return_err; 7916 } 7917 low_page = vm_page_grablo(); 7918 7919 if (low_page == VM_PAGE_NULL) { 7920 ret = KERN_RESOURCE_SHORTAGE; 7921 goto return_err; 7922 } 7923 /* 7924 * from here until the vm_page_replace completes 7925 * we musn't drop the object lock... we don't 7926 * want anyone refaulting this page in and using 7927 * it after we disconnect it... we want the fault 7928 * to find the new page being substituted. 7929 */ 7930 if (dst_page->pmapped) 7931 refmod = pmap_disconnect(dst_page->phys_page); 7932 else 7933 refmod = 0; 7934 7935 if (!dst_page->absent) 7936 vm_page_copy(dst_page, low_page); 7937 7938 low_page->reference = dst_page->reference; 7939 low_page->dirty = dst_page->dirty; 7940 low_page->absent = dst_page->absent; 7941 7942 if (refmod & VM_MEM_REFERENCED) 7943 low_page->reference = TRUE; 7944 if (refmod & VM_MEM_MODIFIED) { 7945 SET_PAGE_DIRTY(low_page, FALSE); 7946 } 7947 7948 vm_page_replace(low_page, object, dst_offset); 7949 7950 dst_page = low_page; 7951 /* 7952 * vm_page_grablo returned the page marked 7953 * BUSY... we don't need a PAGE_WAKEUP_DONE 7954 * here, because we've never dropped the object lock 7955 */ 7956 if ( !dst_page->absent) 7957 dst_page->busy = FALSE; 7958 } 7959 if ( !dst_page->busy) 7960 dwp->dw_mask |= DW_vm_page_wire; 7961 7962 if (cntrl_flags & UPL_BLOCK_ACCESS) { 7963 /* 7964 * Mark the page "busy" to block any future page fault 7965 * on this page in addition to wiring it. 7966 * We'll also remove the mapping 7967 * of all these pages before leaving this routine. 7968 */ 7969 assert(!dst_page->fictitious); 7970 dst_page->busy = TRUE; 7971 } 7972 /* 7973 * expect the page to be used 7974 * page queues lock must be held to set 'reference' 7975 */ 7976 dwp->dw_mask |= DW_set_reference; 7977 7978 if (!(cntrl_flags & UPL_COPYOUT_FROM)) { 7979 SET_PAGE_DIRTY(dst_page, TRUE); 7980 } 7981 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->written_by_kernel == TRUE) { 7982 pmap_sync_page_attributes_phys(dst_page->phys_page); 7983 dst_page->written_by_kernel = FALSE; 7984 } 7985 7986record_phys_addr: 7987 if (dst_page->busy) 7988 upl->flags |= UPL_HAS_BUSY; 7989 7990 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE); 7991 assert(pg_num == (dst_offset-offset)/PAGE_SIZE); 7992 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 7993 7994 if (dst_page->phys_page > upl->highest_page) 7995 upl->highest_page = dst_page->phys_page; 7996 7997 if (user_page_list) { 7998 user_page_list[entry].phys_addr = dst_page->phys_page; 7999 user_page_list[entry].pageout = dst_page->pageout; 8000 user_page_list[entry].absent = dst_page->absent; 8001 user_page_list[entry].dirty = dst_page->dirty; 8002 user_page_list[entry].precious = dst_page->precious; 8003 user_page_list[entry].device = FALSE; 8004 user_page_list[entry].needed = FALSE; 8005 if (dst_page->clustered == TRUE) 8006 user_page_list[entry].speculative = dst_page->speculative; 8007 else 8008 user_page_list[entry].speculative = FALSE; 8009 user_page_list[entry].cs_validated = dst_page->cs_validated; 8010 user_page_list[entry].cs_tainted = dst_page->cs_tainted; 8011 } 8012 if (object != kernel_object && object != compressor_object) { 8013 /* 8014 * someone is explicitly grabbing this page... 8015 * update clustered and speculative state 8016 * 8017 */ 8018 if (dst_page->clustered) 8019 VM_PAGE_CONSUME_CLUSTERED(dst_page); 8020 } 8021 entry++; 8022 dst_offset += PAGE_SIZE_64; 8023 xfer_size -= PAGE_SIZE; 8024 size_in_pages--; 8025 8026 if (dwp->dw_mask) { 8027 VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count); 8028 8029 if (dw_count >= dw_limit) { 8030 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 8031 8032 dwp = &dw_array[0]; 8033 dw_count = 0; 8034 } 8035 } 8036 } 8037 if (dw_count) 8038 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 8039 8040finish: 8041 if (user_page_list && set_cache_attr_needed == TRUE) 8042 vm_object_set_pmap_cache_attr(object, user_page_list, entry, TRUE); 8043 8044 if (page_list_count != NULL) { 8045 if (upl->flags & UPL_INTERNAL) 8046 *page_list_count = 0; 8047 else if (*page_list_count > entry) 8048 *page_list_count = entry; 8049 } 8050 vm_object_unlock(object); 8051 8052 if (cntrl_flags & UPL_BLOCK_ACCESS) { 8053 /* 8054 * We've marked all the pages "busy" so that future 8055 * page faults will block. 8056 * Now remove the mapping for these pages, so that they 8057 * can't be accessed without causing a page fault. 8058 */ 8059 vm_object_pmap_protect(object, offset, (vm_object_size_t)size, 8060 PMAP_NULL, 0, VM_PROT_NONE); 8061 assert(!object->blocked_access); 8062 object->blocked_access = TRUE; 8063 } 8064 return KERN_SUCCESS; 8065 8066return_err: 8067 dw_index = 0; 8068 8069 for (; offset < dst_offset; offset += PAGE_SIZE) { 8070 boolean_t need_unwire; 8071 8072 dst_page = vm_page_lookup(object, offset); 8073 8074 if (dst_page == VM_PAGE_NULL) 8075 panic("vm_object_iopl_request: Wired page missing. \n"); 8076 8077 /* 8078 * if we've already processed this page in an earlier 8079 * dw_do_work, we need to undo the wiring... we will 8080 * leave the dirty and reference bits on if they 8081 * were set, since we don't have a good way of knowing 8082 * what the previous state was and we won't get here 8083 * under any normal circumstances... we will always 8084 * clear BUSY and wakeup any waiters via vm_page_free 8085 * or PAGE_WAKEUP_DONE 8086 */ 8087 need_unwire = TRUE; 8088 8089 if (dw_count) { 8090 if (dw_array[dw_index].dw_m == dst_page) { 8091 /* 8092 * still in the deferred work list 8093 * which means we haven't yet called 8094 * vm_page_wire on this page 8095 */ 8096 need_unwire = FALSE; 8097 8098 dw_index++; 8099 dw_count--; 8100 } 8101 } 8102 vm_page_lock_queues(); 8103 8104 if (dst_page->absent || free_wired_pages == TRUE) { 8105 vm_page_free(dst_page); 8106 8107 need_unwire = FALSE; 8108 } else { 8109 if (need_unwire == TRUE) 8110 vm_page_unwire(dst_page, TRUE); 8111 8112 PAGE_WAKEUP_DONE(dst_page); 8113 } 8114 vm_page_unlock_queues(); 8115 8116 if (need_unwire == TRUE) 8117 VM_STAT_INCR(reactivations); 8118 } 8119#if UPL_DEBUG 8120 upl->upl_state = 2; 8121#endif 8122 if (! (upl->flags & UPL_KERNEL_OBJECT)) { 8123 vm_object_activity_end(object); 8124 vm_object_collapse(object, 0, TRUE); 8125 } 8126 vm_object_unlock(object); 8127 upl_destroy(upl); 8128 8129 return ret; 8130} 8131 8132kern_return_t 8133upl_transpose( 8134 upl_t upl1, 8135 upl_t upl2) 8136{ 8137 kern_return_t retval; 8138 boolean_t upls_locked; 8139 vm_object_t object1, object2; 8140 8141 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2 || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR) || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) { 8142 return KERN_INVALID_ARGUMENT; 8143 } 8144 8145 upls_locked = FALSE; 8146 8147 /* 8148 * Since we need to lock both UPLs at the same time, 8149 * avoid deadlocks by always taking locks in the same order. 8150 */ 8151 if (upl1 < upl2) { 8152 upl_lock(upl1); 8153 upl_lock(upl2); 8154 } else { 8155 upl_lock(upl2); 8156 upl_lock(upl1); 8157 } 8158 upls_locked = TRUE; /* the UPLs will need to be unlocked */ 8159 8160 object1 = upl1->map_object; 8161 object2 = upl2->map_object; 8162 8163 if (upl1->offset != 0 || upl2->offset != 0 || 8164 upl1->size != upl2->size) { 8165 /* 8166 * We deal only with full objects, not subsets. 8167 * That's because we exchange the entire backing store info 8168 * for the objects: pager, resident pages, etc... We can't do 8169 * only part of it. 8170 */ 8171 retval = KERN_INVALID_VALUE; 8172 goto done; 8173 } 8174 8175 /* 8176 * Tranpose the VM objects' backing store. 8177 */ 8178 retval = vm_object_transpose(object1, object2, 8179 (vm_object_size_t) upl1->size); 8180 8181 if (retval == KERN_SUCCESS) { 8182 /* 8183 * Make each UPL point to the correct VM object, i.e. the 8184 * object holding the pages that the UPL refers to... 8185 */ 8186#if CONFIG_IOSCHED || UPL_DEBUG 8187 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) { 8188 vm_object_lock(object1); 8189 vm_object_lock(object2); 8190 } 8191 if (upl1->flags & UPL_TRACKED_BY_OBJECT) 8192 queue_remove(&object1->uplq, upl1, upl_t, uplq); 8193 if (upl2->flags & UPL_TRACKED_BY_OBJECT) 8194 queue_remove(&object2->uplq, upl2, upl_t, uplq); 8195#endif 8196 upl1->map_object = object2; 8197 upl2->map_object = object1; 8198 8199#if CONFIG_IOSCHED || UPL_DEBUG 8200 if (upl1->flags & UPL_TRACKED_BY_OBJECT) 8201 queue_enter(&object2->uplq, upl1, upl_t, uplq); 8202 if (upl2->flags & UPL_TRACKED_BY_OBJECT) 8203 queue_enter(&object1->uplq, upl2, upl_t, uplq); 8204 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) { 8205 vm_object_unlock(object2); 8206 vm_object_unlock(object1); 8207 } 8208#endif 8209 } 8210 8211done: 8212 /* 8213 * Cleanup. 8214 */ 8215 if (upls_locked) { 8216 upl_unlock(upl1); 8217 upl_unlock(upl2); 8218 upls_locked = FALSE; 8219 } 8220 8221 return retval; 8222} 8223 8224void 8225upl_range_needed( 8226 upl_t upl, 8227 int index, 8228 int count) 8229{ 8230 upl_page_info_t *user_page_list; 8231 int size_in_pages; 8232 8233 if ( !(upl->flags & UPL_INTERNAL) || count <= 0) 8234 return; 8235 8236 size_in_pages = upl->size / PAGE_SIZE; 8237 8238 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 8239 8240 while (count-- && index < size_in_pages) 8241 user_page_list[index++].needed = TRUE; 8242} 8243 8244 8245/* 8246 * ENCRYPTED SWAP: 8247 * 8248 * Rationale: the user might have some encrypted data on disk (via 8249 * FileVault or any other mechanism). That data is then decrypted in 8250 * memory, which is safe as long as the machine is secure. But that 8251 * decrypted data in memory could be paged out to disk by the default 8252 * pager. The data would then be stored on disk in clear (not encrypted) 8253 * and it could be accessed by anyone who gets physical access to the 8254 * disk (if the laptop or the disk gets stolen for example). This weakens 8255 * the security offered by FileVault. 8256 * 8257 * Solution: the default pager will optionally request that all the 8258 * pages it gathers for pageout be encrypted, via the UPL interfaces, 8259 * before it sends this UPL to disk via the vnode_pageout() path. 8260 * 8261 * Notes: 8262 * 8263 * To avoid disrupting the VM LRU algorithms, we want to keep the 8264 * clean-in-place mechanisms, which allow us to send some extra pages to 8265 * swap (clustering) without actually removing them from the user's 8266 * address space. We don't want the user to unknowingly access encrypted 8267 * data, so we have to actually remove the encrypted pages from the page 8268 * table. When the user accesses the data, the hardware will fail to 8269 * locate the virtual page in its page table and will trigger a page 8270 * fault. We can then decrypt the page and enter it in the page table 8271 * again. Whenever we allow the user to access the contents of a page, 8272 * we have to make sure it's not encrypted. 8273 * 8274 * 8275 */ 8276/* 8277 * ENCRYPTED SWAP: 8278 * Reserve of virtual addresses in the kernel address space. 8279 * We need to map the physical pages in the kernel, so that we 8280 * can call the encryption/decryption routines with a kernel 8281 * virtual address. We keep this pool of pre-allocated kernel 8282 * virtual addresses so that we don't have to scan the kernel's 8283 * virtaul address space each time we need to encrypt or decrypt 8284 * a physical page. 8285 * It would be nice to be able to encrypt and decrypt in physical 8286 * mode but that might not always be more efficient... 8287 */ 8288decl_simple_lock_data(,vm_paging_lock) 8289#define VM_PAGING_NUM_PAGES 64 8290vm_map_offset_t vm_paging_base_address = 0; 8291boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, }; 8292int vm_paging_max_index = 0; 8293int vm_paging_page_waiter = 0; 8294int vm_paging_page_waiter_total = 0; 8295unsigned long vm_paging_no_kernel_page = 0; 8296unsigned long vm_paging_objects_mapped = 0; 8297unsigned long vm_paging_pages_mapped = 0; 8298unsigned long vm_paging_objects_mapped_slow = 0; 8299unsigned long vm_paging_pages_mapped_slow = 0; 8300 8301void 8302vm_paging_map_init(void) 8303{ 8304 kern_return_t kr; 8305 vm_map_offset_t page_map_offset; 8306 vm_map_entry_t map_entry; 8307 8308 assert(vm_paging_base_address == 0); 8309 8310 /* 8311 * Initialize our pool of pre-allocated kernel 8312 * virtual addresses. 8313 */ 8314 page_map_offset = 0; 8315 kr = vm_map_find_space(kernel_map, 8316 &page_map_offset, 8317 VM_PAGING_NUM_PAGES * PAGE_SIZE, 8318 0, 8319 0, 8320 &map_entry); 8321 if (kr != KERN_SUCCESS) { 8322 panic("vm_paging_map_init: kernel_map full\n"); 8323 } 8324 map_entry->object.vm_object = kernel_object; 8325 map_entry->offset = page_map_offset; 8326 map_entry->protection = VM_PROT_NONE; 8327 map_entry->max_protection = VM_PROT_NONE; 8328 map_entry->permanent = TRUE; 8329 vm_object_reference(kernel_object); 8330 vm_map_unlock(kernel_map); 8331 8332 assert(vm_paging_base_address == 0); 8333 vm_paging_base_address = page_map_offset; 8334} 8335 8336/* 8337 * ENCRYPTED SWAP: 8338 * vm_paging_map_object: 8339 * Maps part of a VM object's pages in the kernel 8340 * virtual address space, using the pre-allocated 8341 * kernel virtual addresses, if possible. 8342 * Context: 8343 * The VM object is locked. This lock will get 8344 * dropped and re-acquired though, so the caller 8345 * must make sure the VM object is kept alive 8346 * (by holding a VM map that has a reference 8347 * on it, for example, or taking an extra reference). 8348 * The page should also be kept busy to prevent 8349 * it from being reclaimed. 8350 */ 8351kern_return_t 8352vm_paging_map_object( 8353 vm_page_t page, 8354 vm_object_t object, 8355 vm_object_offset_t offset, 8356 vm_prot_t protection, 8357 boolean_t can_unlock_object, 8358 vm_map_size_t *size, /* IN/OUT */ 8359 vm_map_offset_t *address, /* OUT */ 8360 boolean_t *need_unmap) /* OUT */ 8361{ 8362 kern_return_t kr; 8363 vm_map_offset_t page_map_offset; 8364 vm_map_size_t map_size; 8365 vm_object_offset_t object_offset; 8366 int i; 8367 8368 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) { 8369 /* use permanent 1-to-1 kernel mapping of physical memory ? */ 8370#if __x86_64__ 8371 *address = (vm_map_offset_t) 8372 PHYSMAP_PTOV((pmap_paddr_t)page->phys_page << 8373 PAGE_SHIFT); 8374 *need_unmap = FALSE; 8375 return KERN_SUCCESS; 8376#else 8377#warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..." 8378#endif 8379 8380 assert(page->busy); 8381 /* 8382 * Use one of the pre-allocated kernel virtual addresses 8383 * and just enter the VM page in the kernel address space 8384 * at that virtual address. 8385 */ 8386 simple_lock(&vm_paging_lock); 8387 8388 /* 8389 * Try and find an available kernel virtual address 8390 * from our pre-allocated pool. 8391 */ 8392 page_map_offset = 0; 8393 for (;;) { 8394 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) { 8395 if (vm_paging_page_inuse[i] == FALSE) { 8396 page_map_offset = 8397 vm_paging_base_address + 8398 (i * PAGE_SIZE); 8399 break; 8400 } 8401 } 8402 if (page_map_offset != 0) { 8403 /* found a space to map our page ! */ 8404 break; 8405 } 8406 8407 if (can_unlock_object) { 8408 /* 8409 * If we can afford to unlock the VM object, 8410 * let's take the slow path now... 8411 */ 8412 break; 8413 } 8414 /* 8415 * We can't afford to unlock the VM object, so 8416 * let's wait for a space to become available... 8417 */ 8418 vm_paging_page_waiter_total++; 8419 vm_paging_page_waiter++; 8420 kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT); 8421 if (kr == THREAD_WAITING) { 8422 simple_unlock(&vm_paging_lock); 8423 kr = thread_block(THREAD_CONTINUE_NULL); 8424 simple_lock(&vm_paging_lock); 8425 } 8426 vm_paging_page_waiter--; 8427 /* ... and try again */ 8428 } 8429 8430 if (page_map_offset != 0) { 8431 /* 8432 * We found a kernel virtual address; 8433 * map the physical page to that virtual address. 8434 */ 8435 if (i > vm_paging_max_index) { 8436 vm_paging_max_index = i; 8437 } 8438 vm_paging_page_inuse[i] = TRUE; 8439 simple_unlock(&vm_paging_lock); 8440 8441 page->pmapped = TRUE; 8442 8443 /* 8444 * Keep the VM object locked over the PMAP_ENTER 8445 * and the actual use of the page by the kernel, 8446 * or this pmap mapping might get undone by a 8447 * vm_object_pmap_protect() call... 8448 */ 8449 PMAP_ENTER(kernel_pmap, 8450 page_map_offset, 8451 page, 8452 protection, 8453 VM_PROT_NONE, 8454 0, 8455 TRUE); 8456 vm_paging_objects_mapped++; 8457 vm_paging_pages_mapped++; 8458 *address = page_map_offset; 8459 *need_unmap = TRUE; 8460 8461 /* all done and mapped, ready to use ! */ 8462 return KERN_SUCCESS; 8463 } 8464 8465 /* 8466 * We ran out of pre-allocated kernel virtual 8467 * addresses. Just map the page in the kernel 8468 * the slow and regular way. 8469 */ 8470 vm_paging_no_kernel_page++; 8471 simple_unlock(&vm_paging_lock); 8472 } 8473 8474 if (! can_unlock_object) { 8475 *address = 0; 8476 *size = 0; 8477 *need_unmap = FALSE; 8478 return KERN_NOT_SUPPORTED; 8479 } 8480 8481 object_offset = vm_object_trunc_page(offset); 8482 map_size = vm_map_round_page(*size, 8483 VM_MAP_PAGE_MASK(kernel_map)); 8484 8485 /* 8486 * Try and map the required range of the object 8487 * in the kernel_map 8488 */ 8489 8490 vm_object_reference_locked(object); /* for the map entry */ 8491 vm_object_unlock(object); 8492 8493 kr = vm_map_enter(kernel_map, 8494 address, 8495 map_size, 8496 0, 8497 VM_FLAGS_ANYWHERE, 8498 object, 8499 object_offset, 8500 FALSE, 8501 protection, 8502 VM_PROT_ALL, 8503 VM_INHERIT_NONE); 8504 if (kr != KERN_SUCCESS) { 8505 *address = 0; 8506 *size = 0; 8507 *need_unmap = FALSE; 8508 vm_object_deallocate(object); /* for the map entry */ 8509 vm_object_lock(object); 8510 return kr; 8511 } 8512 8513 *size = map_size; 8514 8515 /* 8516 * Enter the mapped pages in the page table now. 8517 */ 8518 vm_object_lock(object); 8519 /* 8520 * VM object must be kept locked from before PMAP_ENTER() 8521 * until after the kernel is done accessing the page(s). 8522 * Otherwise, the pmap mappings in the kernel could be 8523 * undone by a call to vm_object_pmap_protect(). 8524 */ 8525 8526 for (page_map_offset = 0; 8527 map_size != 0; 8528 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) { 8529 8530 page = vm_page_lookup(object, offset + page_map_offset); 8531 if (page == VM_PAGE_NULL) { 8532 printf("vm_paging_map_object: no page !?"); 8533 vm_object_unlock(object); 8534 kr = vm_map_remove(kernel_map, *address, *size, 8535 VM_MAP_NO_FLAGS); 8536 assert(kr == KERN_SUCCESS); 8537 *address = 0; 8538 *size = 0; 8539 *need_unmap = FALSE; 8540 vm_object_lock(object); 8541 return KERN_MEMORY_ERROR; 8542 } 8543 page->pmapped = TRUE; 8544 8545 //assert(pmap_verify_free(page->phys_page)); 8546 PMAP_ENTER(kernel_pmap, 8547 *address + page_map_offset, 8548 page, 8549 protection, 8550 VM_PROT_NONE, 8551 0, 8552 TRUE); 8553 } 8554 8555 vm_paging_objects_mapped_slow++; 8556 vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64); 8557 8558 *need_unmap = TRUE; 8559 8560 return KERN_SUCCESS; 8561} 8562 8563/* 8564 * ENCRYPTED SWAP: 8565 * vm_paging_unmap_object: 8566 * Unmaps part of a VM object's pages from the kernel 8567 * virtual address space. 8568 * Context: 8569 * The VM object is locked. This lock will get 8570 * dropped and re-acquired though. 8571 */ 8572void 8573vm_paging_unmap_object( 8574 vm_object_t object, 8575 vm_map_offset_t start, 8576 vm_map_offset_t end) 8577{ 8578 kern_return_t kr; 8579 int i; 8580 8581 if ((vm_paging_base_address == 0) || 8582 (start < vm_paging_base_address) || 8583 (end > (vm_paging_base_address 8584 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) { 8585 /* 8586 * We didn't use our pre-allocated pool of 8587 * kernel virtual address. Deallocate the 8588 * virtual memory. 8589 */ 8590 if (object != VM_OBJECT_NULL) { 8591 vm_object_unlock(object); 8592 } 8593 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS); 8594 if (object != VM_OBJECT_NULL) { 8595 vm_object_lock(object); 8596 } 8597 assert(kr == KERN_SUCCESS); 8598 } else { 8599 /* 8600 * We used a kernel virtual address from our 8601 * pre-allocated pool. Put it back in the pool 8602 * for next time. 8603 */ 8604 assert(end - start == PAGE_SIZE); 8605 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT); 8606 assert(i >= 0 && i < VM_PAGING_NUM_PAGES); 8607 8608 /* undo the pmap mapping */ 8609 pmap_remove(kernel_pmap, start, end); 8610 8611 simple_lock(&vm_paging_lock); 8612 vm_paging_page_inuse[i] = FALSE; 8613 if (vm_paging_page_waiter) { 8614 thread_wakeup(&vm_paging_page_waiter); 8615 } 8616 simple_unlock(&vm_paging_lock); 8617 } 8618} 8619 8620#if ENCRYPTED_SWAP 8621/* 8622 * Encryption data. 8623 * "iv" is the "initial vector". Ideally, we want to 8624 * have a different one for each page we encrypt, so that 8625 * crackers can't find encryption patterns too easily. 8626 */ 8627#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */ 8628boolean_t swap_crypt_ctx_initialized = FALSE; 8629uint32_t swap_crypt_key[8]; /* big enough for a 256 key */ 8630aes_ctx swap_crypt_ctx; 8631const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, }; 8632 8633#if DEBUG 8634boolean_t swap_crypt_ctx_tested = FALSE; 8635unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096))); 8636unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096))); 8637unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096))); 8638#endif /* DEBUG */ 8639 8640/* 8641 * Initialize the encryption context: key and key size. 8642 */ 8643void swap_crypt_ctx_initialize(void); /* forward */ 8644void 8645swap_crypt_ctx_initialize(void) 8646{ 8647 unsigned int i; 8648 8649 /* 8650 * No need for locking to protect swap_crypt_ctx_initialized 8651 * because the first use of encryption will come from the 8652 * pageout thread (we won't pagein before there's been a pageout) 8653 * and there's only one pageout thread. 8654 */ 8655 if (swap_crypt_ctx_initialized == FALSE) { 8656 for (i = 0; 8657 i < (sizeof (swap_crypt_key) / 8658 sizeof (swap_crypt_key[0])); 8659 i++) { 8660 swap_crypt_key[i] = random(); 8661 } 8662 aes_encrypt_key((const unsigned char *) swap_crypt_key, 8663 SWAP_CRYPT_AES_KEY_SIZE, 8664 &swap_crypt_ctx.encrypt); 8665 aes_decrypt_key((const unsigned char *) swap_crypt_key, 8666 SWAP_CRYPT_AES_KEY_SIZE, 8667 &swap_crypt_ctx.decrypt); 8668 swap_crypt_ctx_initialized = TRUE; 8669 } 8670 8671#if DEBUG 8672 /* 8673 * Validate the encryption algorithms. 8674 */ 8675 if (swap_crypt_ctx_tested == FALSE) { 8676 /* initialize */ 8677 for (i = 0; i < 4096; i++) { 8678 swap_crypt_test_page_ref[i] = (char) i; 8679 } 8680 /* encrypt */ 8681 aes_encrypt_cbc(swap_crypt_test_page_ref, 8682 swap_crypt_null_iv, 8683 PAGE_SIZE / AES_BLOCK_SIZE, 8684 swap_crypt_test_page_encrypt, 8685 &swap_crypt_ctx.encrypt); 8686 /* decrypt */ 8687 aes_decrypt_cbc(swap_crypt_test_page_encrypt, 8688 swap_crypt_null_iv, 8689 PAGE_SIZE / AES_BLOCK_SIZE, 8690 swap_crypt_test_page_decrypt, 8691 &swap_crypt_ctx.decrypt); 8692 /* compare result with original */ 8693 for (i = 0; i < 4096; i ++) { 8694 if (swap_crypt_test_page_decrypt[i] != 8695 swap_crypt_test_page_ref[i]) { 8696 panic("encryption test failed"); 8697 } 8698 } 8699 8700 /* encrypt again */ 8701 aes_encrypt_cbc(swap_crypt_test_page_decrypt, 8702 swap_crypt_null_iv, 8703 PAGE_SIZE / AES_BLOCK_SIZE, 8704 swap_crypt_test_page_decrypt, 8705 &swap_crypt_ctx.encrypt); 8706 /* decrypt in place */ 8707 aes_decrypt_cbc(swap_crypt_test_page_decrypt, 8708 swap_crypt_null_iv, 8709 PAGE_SIZE / AES_BLOCK_SIZE, 8710 swap_crypt_test_page_decrypt, 8711 &swap_crypt_ctx.decrypt); 8712 for (i = 0; i < 4096; i ++) { 8713 if (swap_crypt_test_page_decrypt[i] != 8714 swap_crypt_test_page_ref[i]) { 8715 panic("in place encryption test failed"); 8716 } 8717 } 8718 8719 swap_crypt_ctx_tested = TRUE; 8720 } 8721#endif /* DEBUG */ 8722} 8723 8724/* 8725 * ENCRYPTED SWAP: 8726 * vm_page_encrypt: 8727 * Encrypt the given page, for secure paging. 8728 * The page might already be mapped at kernel virtual 8729 * address "kernel_mapping_offset". Otherwise, we need 8730 * to map it. 8731 * 8732 * Context: 8733 * The page's object is locked, but this lock will be released 8734 * and re-acquired. 8735 * The page is busy and not accessible by users (not entered in any pmap). 8736 */ 8737void 8738vm_page_encrypt( 8739 vm_page_t page, 8740 vm_map_offset_t kernel_mapping_offset) 8741{ 8742 kern_return_t kr; 8743 vm_map_size_t kernel_mapping_size; 8744 boolean_t kernel_mapping_needs_unmap; 8745 vm_offset_t kernel_vaddr; 8746 union { 8747 unsigned char aes_iv[AES_BLOCK_SIZE]; 8748 struct { 8749 memory_object_t pager_object; 8750 vm_object_offset_t paging_offset; 8751 } vm; 8752 } encrypt_iv; 8753 8754 if (! vm_pages_encrypted) { 8755 vm_pages_encrypted = TRUE; 8756 } 8757 8758 assert(page->busy); 8759 8760 if (page->encrypted) { 8761 /* 8762 * Already encrypted: no need to do it again. 8763 */ 8764 vm_page_encrypt_already_encrypted_counter++; 8765 return; 8766 } 8767 assert(page->dirty || page->precious); 8768 8769 ASSERT_PAGE_DECRYPTED(page); 8770 8771 /* 8772 * Take a paging-in-progress reference to keep the object 8773 * alive even if we have to unlock it (in vm_paging_map_object() 8774 * for example)... 8775 */ 8776 vm_object_paging_begin(page->object); 8777 8778 if (kernel_mapping_offset == 0) { 8779 /* 8780 * The page hasn't already been mapped in kernel space 8781 * by the caller. Map it now, so that we can access 8782 * its contents and encrypt them. 8783 */ 8784 kernel_mapping_size = PAGE_SIZE; 8785 kernel_mapping_needs_unmap = FALSE; 8786 kr = vm_paging_map_object(page, 8787 page->object, 8788 page->offset, 8789 VM_PROT_READ | VM_PROT_WRITE, 8790 FALSE, 8791 &kernel_mapping_size, 8792 &kernel_mapping_offset, 8793 &kernel_mapping_needs_unmap); 8794 if (kr != KERN_SUCCESS) { 8795 panic("vm_page_encrypt: " 8796 "could not map page in kernel: 0x%x\n", 8797 kr); 8798 } 8799 } else { 8800 kernel_mapping_size = 0; 8801 kernel_mapping_needs_unmap = FALSE; 8802 } 8803 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 8804 8805 if (swap_crypt_ctx_initialized == FALSE) { 8806 swap_crypt_ctx_initialize(); 8807 } 8808 assert(swap_crypt_ctx_initialized); 8809 8810 /* 8811 * Prepare an "initial vector" for the encryption. 8812 * We use the "pager" and the "paging_offset" for that 8813 * page to obfuscate the encrypted data a bit more and 8814 * prevent crackers from finding patterns that they could 8815 * use to break the key. 8816 */ 8817 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv)); 8818 encrypt_iv.vm.pager_object = page->object->pager; 8819 encrypt_iv.vm.paging_offset = 8820 page->object->paging_offset + page->offset; 8821 8822 /* encrypt the "initial vector" */ 8823 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0], 8824 swap_crypt_null_iv, 8825 1, 8826 &encrypt_iv.aes_iv[0], 8827 &swap_crypt_ctx.encrypt); 8828 8829 /* 8830 * Encrypt the page. 8831 */ 8832 aes_encrypt_cbc((const unsigned char *) kernel_vaddr, 8833 &encrypt_iv.aes_iv[0], 8834 PAGE_SIZE / AES_BLOCK_SIZE, 8835 (unsigned char *) kernel_vaddr, 8836 &swap_crypt_ctx.encrypt); 8837 8838 vm_page_encrypt_counter++; 8839 8840 /* 8841 * Unmap the page from the kernel's address space, 8842 * if we had to map it ourselves. Otherwise, let 8843 * the caller undo the mapping if needed. 8844 */ 8845 if (kernel_mapping_needs_unmap) { 8846 vm_paging_unmap_object(page->object, 8847 kernel_mapping_offset, 8848 kernel_mapping_offset + kernel_mapping_size); 8849 } 8850 8851 /* 8852 * Clear the "reference" and "modified" bits. 8853 * This should clean up any impact the encryption had 8854 * on them. 8855 * The page was kept busy and disconnected from all pmaps, 8856 * so it can't have been referenced or modified from user 8857 * space. 8858 * The software bits will be reset later after the I/O 8859 * has completed (in upl_commit_range()). 8860 */ 8861 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED); 8862 8863 page->encrypted = TRUE; 8864 8865 vm_object_paging_end(page->object); 8866} 8867 8868/* 8869 * ENCRYPTED SWAP: 8870 * vm_page_decrypt: 8871 * Decrypt the given page. 8872 * The page might already be mapped at kernel virtual 8873 * address "kernel_mapping_offset". Otherwise, we need 8874 * to map it. 8875 * 8876 * Context: 8877 * The page's VM object is locked but will be unlocked and relocked. 8878 * The page is busy and not accessible by users (not entered in any pmap). 8879 */ 8880void 8881vm_page_decrypt( 8882 vm_page_t page, 8883 vm_map_offset_t kernel_mapping_offset) 8884{ 8885 kern_return_t kr; 8886 vm_map_size_t kernel_mapping_size; 8887 vm_offset_t kernel_vaddr; 8888 boolean_t kernel_mapping_needs_unmap; 8889 union { 8890 unsigned char aes_iv[AES_BLOCK_SIZE]; 8891 struct { 8892 memory_object_t pager_object; 8893 vm_object_offset_t paging_offset; 8894 } vm; 8895 } decrypt_iv; 8896 boolean_t was_dirty; 8897 8898 assert(page->busy); 8899 assert(page->encrypted); 8900 8901 was_dirty = page->dirty; 8902 8903 /* 8904 * Take a paging-in-progress reference to keep the object 8905 * alive even if we have to unlock it (in vm_paging_map_object() 8906 * for example)... 8907 */ 8908 vm_object_paging_begin(page->object); 8909 8910 if (kernel_mapping_offset == 0) { 8911 /* 8912 * The page hasn't already been mapped in kernel space 8913 * by the caller. Map it now, so that we can access 8914 * its contents and decrypt them. 8915 */ 8916 kernel_mapping_size = PAGE_SIZE; 8917 kernel_mapping_needs_unmap = FALSE; 8918 kr = vm_paging_map_object(page, 8919 page->object, 8920 page->offset, 8921 VM_PROT_READ | VM_PROT_WRITE, 8922 FALSE, 8923 &kernel_mapping_size, 8924 &kernel_mapping_offset, 8925 &kernel_mapping_needs_unmap); 8926 if (kr != KERN_SUCCESS) { 8927 panic("vm_page_decrypt: " 8928 "could not map page in kernel: 0x%x\n", 8929 kr); 8930 } 8931 } else { 8932 kernel_mapping_size = 0; 8933 kernel_mapping_needs_unmap = FALSE; 8934 } 8935 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 8936 8937 assert(swap_crypt_ctx_initialized); 8938 8939 /* 8940 * Prepare an "initial vector" for the decryption. 8941 * It has to be the same as the "initial vector" we 8942 * used to encrypt that page. 8943 */ 8944 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv)); 8945 decrypt_iv.vm.pager_object = page->object->pager; 8946 decrypt_iv.vm.paging_offset = 8947 page->object->paging_offset + page->offset; 8948 8949 /* encrypt the "initial vector" */ 8950 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0], 8951 swap_crypt_null_iv, 8952 1, 8953 &decrypt_iv.aes_iv[0], 8954 &swap_crypt_ctx.encrypt); 8955 8956 /* 8957 * Decrypt the page. 8958 */ 8959 aes_decrypt_cbc((const unsigned char *) kernel_vaddr, 8960 &decrypt_iv.aes_iv[0], 8961 PAGE_SIZE / AES_BLOCK_SIZE, 8962 (unsigned char *) kernel_vaddr, 8963 &swap_crypt_ctx.decrypt); 8964 vm_page_decrypt_counter++; 8965 8966 /* 8967 * Unmap the page from the kernel's address space, 8968 * if we had to map it ourselves. Otherwise, let 8969 * the caller undo the mapping if needed. 8970 */ 8971 if (kernel_mapping_needs_unmap) { 8972 vm_paging_unmap_object(page->object, 8973 kernel_vaddr, 8974 kernel_vaddr + PAGE_SIZE); 8975 } 8976 8977 if (was_dirty) { 8978 /* 8979 * The pager did not specify that the page would be 8980 * clean when it got paged in, so let's not clean it here 8981 * either. 8982 */ 8983 } else { 8984 /* 8985 * After decryption, the page is actually still clean. 8986 * It was encrypted as part of paging, which "cleans" 8987 * the "dirty" pages. 8988 * Noone could access it after it was encrypted 8989 * and the decryption doesn't count. 8990 */ 8991 page->dirty = FALSE; 8992 assert (page->cs_validated == FALSE); 8993 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); 8994 } 8995 page->encrypted = FALSE; 8996 8997 /* 8998 * We've just modified the page's contents via the data cache and part 8999 * of the new contents might still be in the cache and not yet in RAM. 9000 * Since the page is now available and might get gathered in a UPL to 9001 * be part of a DMA transfer from a driver that expects the memory to 9002 * be coherent at this point, we have to flush the data cache. 9003 */ 9004 pmap_sync_page_attributes_phys(page->phys_page); 9005 /* 9006 * Since the page is not mapped yet, some code might assume that it 9007 * doesn't need to invalidate the instruction cache when writing to 9008 * that page. That code relies on "pmapped" being FALSE, so that the 9009 * caches get synchronized when the page is first mapped. 9010 */ 9011 assert(pmap_verify_free(page->phys_page)); 9012 page->pmapped = FALSE; 9013 page->wpmapped = FALSE; 9014 9015 vm_object_paging_end(page->object); 9016} 9017 9018#if DEVELOPMENT || DEBUG 9019unsigned long upl_encrypt_upls = 0; 9020unsigned long upl_encrypt_pages = 0; 9021#endif 9022 9023/* 9024 * ENCRYPTED SWAP: 9025 * 9026 * upl_encrypt: 9027 * Encrypts all the pages in the UPL, within the specified range. 9028 * 9029 */ 9030void 9031upl_encrypt( 9032 upl_t upl, 9033 upl_offset_t crypt_offset, 9034 upl_size_t crypt_size) 9035{ 9036 upl_size_t upl_size, subupl_size=crypt_size; 9037 upl_offset_t offset_in_upl, subupl_offset=crypt_offset; 9038 vm_object_t upl_object; 9039 vm_object_offset_t upl_offset; 9040 vm_page_t page; 9041 vm_object_t shadow_object; 9042 vm_object_offset_t shadow_offset; 9043 vm_object_offset_t paging_offset; 9044 vm_object_offset_t base_offset; 9045 int isVectorUPL = 0; 9046 upl_t vector_upl = NULL; 9047 9048 if((isVectorUPL = vector_upl_is_valid(upl))) 9049 vector_upl = upl; 9050 9051process_upl_to_encrypt: 9052 if(isVectorUPL) { 9053 crypt_size = subupl_size; 9054 crypt_offset = subupl_offset; 9055 upl = vector_upl_subupl_byoffset(vector_upl, &crypt_offset, &crypt_size); 9056 if(upl == NULL) 9057 panic("upl_encrypt: Accessing a sub-upl that doesn't exist\n"); 9058 subupl_size -= crypt_size; 9059 subupl_offset += crypt_size; 9060 } 9061 9062#if DEVELOPMENT || DEBUG 9063 upl_encrypt_upls++; 9064 upl_encrypt_pages += crypt_size / PAGE_SIZE; 9065#endif 9066 upl_object = upl->map_object; 9067 upl_offset = upl->offset; 9068 upl_size = upl->size; 9069 9070 vm_object_lock(upl_object); 9071 9072 /* 9073 * Find the VM object that contains the actual pages. 9074 */ 9075 if (upl_object->pageout) { 9076 shadow_object = upl_object->shadow; 9077 /* 9078 * The offset in the shadow object is actually also 9079 * accounted for in upl->offset. It possibly shouldn't be 9080 * this way, but for now don't account for it twice. 9081 */ 9082 shadow_offset = 0; 9083 assert(upl_object->paging_offset == 0); /* XXX ? */ 9084 vm_object_lock(shadow_object); 9085 } else { 9086 shadow_object = upl_object; 9087 shadow_offset = 0; 9088 } 9089 9090 paging_offset = shadow_object->paging_offset; 9091 vm_object_paging_begin(shadow_object); 9092 9093 if (shadow_object != upl_object) 9094 vm_object_unlock(upl_object); 9095 9096 9097 base_offset = shadow_offset; 9098 base_offset += upl_offset; 9099 base_offset += crypt_offset; 9100 base_offset -= paging_offset; 9101 9102 assert(crypt_offset + crypt_size <= upl_size); 9103 9104 for (offset_in_upl = 0; 9105 offset_in_upl < crypt_size; 9106 offset_in_upl += PAGE_SIZE) { 9107 page = vm_page_lookup(shadow_object, 9108 base_offset + offset_in_upl); 9109 if (page == VM_PAGE_NULL) { 9110 panic("upl_encrypt: " 9111 "no page for (obj=%p,off=0x%llx+0x%x)!\n", 9112 shadow_object, 9113 base_offset, 9114 offset_in_upl); 9115 } 9116 /* 9117 * Disconnect the page from all pmaps, so that nobody can 9118 * access it while it's encrypted. After that point, all 9119 * accesses to this page will cause a page fault and block 9120 * while the page is busy being encrypted. After the 9121 * encryption completes, any access will cause a 9122 * page fault and the page gets decrypted at that time. 9123 */ 9124 pmap_disconnect(page->phys_page); 9125 vm_page_encrypt(page, 0); 9126 9127 if (vm_object_lock_avoid(shadow_object)) { 9128 /* 9129 * Give vm_pageout_scan() a chance to convert more 9130 * pages from "clean-in-place" to "clean-and-free", 9131 * if it's interested in the same pages we selected 9132 * in this cluster. 9133 */ 9134 vm_object_unlock(shadow_object); 9135 mutex_pause(2); 9136 vm_object_lock(shadow_object); 9137 } 9138 } 9139 9140 vm_object_paging_end(shadow_object); 9141 vm_object_unlock(shadow_object); 9142 9143 if(isVectorUPL && subupl_size) 9144 goto process_upl_to_encrypt; 9145} 9146 9147#else /* ENCRYPTED_SWAP */ 9148void 9149upl_encrypt( 9150 __unused upl_t upl, 9151 __unused upl_offset_t crypt_offset, 9152 __unused upl_size_t crypt_size) 9153{ 9154} 9155 9156void 9157vm_page_encrypt( 9158 __unused vm_page_t page, 9159 __unused vm_map_offset_t kernel_mapping_offset) 9160{ 9161} 9162 9163void 9164vm_page_decrypt( 9165 __unused vm_page_t page, 9166 __unused vm_map_offset_t kernel_mapping_offset) 9167{ 9168} 9169 9170#endif /* ENCRYPTED_SWAP */ 9171 9172/* 9173 * page->object must be locked 9174 */ 9175void 9176vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked) 9177{ 9178 if (!queues_locked) { 9179 vm_page_lockspin_queues(); 9180 } 9181 9182 /* 9183 * need to drop the laundry count... 9184 * we may also need to remove it 9185 * from the I/O paging queue... 9186 * vm_pageout_throttle_up handles both cases 9187 * 9188 * the laundry and pageout_queue flags are cleared... 9189 */ 9190 vm_pageout_throttle_up(page); 9191 9192 vm_page_steal_pageout_page++; 9193 9194 if (!queues_locked) { 9195 vm_page_unlock_queues(); 9196 } 9197} 9198 9199upl_t 9200vector_upl_create(vm_offset_t upl_offset) 9201{ 9202 int vector_upl_size = sizeof(struct _vector_upl); 9203 int i=0; 9204 upl_t upl; 9205 vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size); 9206 9207 upl = upl_create(0,UPL_VECTOR,0); 9208 upl->vector_upl = vector_upl; 9209 upl->offset = upl_offset; 9210 vector_upl->size = 0; 9211 vector_upl->offset = upl_offset; 9212 vector_upl->invalid_upls=0; 9213 vector_upl->num_upls=0; 9214 vector_upl->pagelist = NULL; 9215 9216 for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) { 9217 vector_upl->upl_iostates[i].size = 0; 9218 vector_upl->upl_iostates[i].offset = 0; 9219 9220 } 9221 return upl; 9222} 9223 9224void 9225vector_upl_deallocate(upl_t upl) 9226{ 9227 if(upl) { 9228 vector_upl_t vector_upl = upl->vector_upl; 9229 if(vector_upl) { 9230 if(vector_upl->invalid_upls != vector_upl->num_upls) 9231 panic("Deallocating non-empty Vectored UPL\n"); 9232 kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE))); 9233 vector_upl->invalid_upls=0; 9234 vector_upl->num_upls = 0; 9235 vector_upl->pagelist = NULL; 9236 vector_upl->size = 0; 9237 vector_upl->offset = 0; 9238 kfree(vector_upl, sizeof(struct _vector_upl)); 9239 vector_upl = (vector_upl_t)0xfeedfeed; 9240 } 9241 else 9242 panic("vector_upl_deallocate was passed a non-vectored upl\n"); 9243 } 9244 else 9245 panic("vector_upl_deallocate was passed a NULL upl\n"); 9246} 9247 9248boolean_t 9249vector_upl_is_valid(upl_t upl) 9250{ 9251 if(upl && ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) { 9252 vector_upl_t vector_upl = upl->vector_upl; 9253 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef) 9254 return FALSE; 9255 else 9256 return TRUE; 9257 } 9258 return FALSE; 9259} 9260 9261boolean_t 9262vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size) 9263{ 9264 if(vector_upl_is_valid(upl)) { 9265 vector_upl_t vector_upl = upl->vector_upl; 9266 9267 if(vector_upl) { 9268 if(subupl) { 9269 if(io_size) { 9270 if(io_size < PAGE_SIZE) 9271 io_size = PAGE_SIZE; 9272 subupl->vector_upl = (void*)vector_upl; 9273 vector_upl->upl_elems[vector_upl->num_upls++] = subupl; 9274 vector_upl->size += io_size; 9275 upl->size += io_size; 9276 } 9277 else { 9278 uint32_t i=0,invalid_upls=0; 9279 for(i = 0; i < vector_upl->num_upls; i++) { 9280 if(vector_upl->upl_elems[i] == subupl) 9281 break; 9282 } 9283 if(i == vector_upl->num_upls) 9284 panic("Trying to remove sub-upl when none exists"); 9285 9286 vector_upl->upl_elems[i] = NULL; 9287 invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1); 9288 if(invalid_upls == vector_upl->num_upls) 9289 return TRUE; 9290 else 9291 return FALSE; 9292 } 9293 } 9294 else 9295 panic("vector_upl_set_subupl was passed a NULL upl element\n"); 9296 } 9297 else 9298 panic("vector_upl_set_subupl was passed a non-vectored upl\n"); 9299 } 9300 else 9301 panic("vector_upl_set_subupl was passed a NULL upl\n"); 9302 9303 return FALSE; 9304} 9305 9306void 9307vector_upl_set_pagelist(upl_t upl) 9308{ 9309 if(vector_upl_is_valid(upl)) { 9310 uint32_t i=0; 9311 vector_upl_t vector_upl = upl->vector_upl; 9312 9313 if(vector_upl) { 9314 vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0; 9315 9316 vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)); 9317 9318 for(i=0; i < vector_upl->num_upls; i++) { 9319 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE; 9320 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size); 9321 pagelist_size += cur_upl_pagelist_size; 9322 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page) 9323 upl->highest_page = vector_upl->upl_elems[i]->highest_page; 9324 } 9325 assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) ); 9326 } 9327 else 9328 panic("vector_upl_set_pagelist was passed a non-vectored upl\n"); 9329 } 9330 else 9331 panic("vector_upl_set_pagelist was passed a NULL upl\n"); 9332 9333} 9334 9335upl_t 9336vector_upl_subupl_byindex(upl_t upl, uint32_t index) 9337{ 9338 if(vector_upl_is_valid(upl)) { 9339 vector_upl_t vector_upl = upl->vector_upl; 9340 if(vector_upl) { 9341 if(index < vector_upl->num_upls) 9342 return vector_upl->upl_elems[index]; 9343 } 9344 else 9345 panic("vector_upl_subupl_byindex was passed a non-vectored upl\n"); 9346 } 9347 return NULL; 9348} 9349 9350upl_t 9351vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size) 9352{ 9353 if(vector_upl_is_valid(upl)) { 9354 uint32_t i=0; 9355 vector_upl_t vector_upl = upl->vector_upl; 9356 9357 if(vector_upl) { 9358 upl_t subupl = NULL; 9359 vector_upl_iostates_t subupl_state; 9360 9361 for(i=0; i < vector_upl->num_upls; i++) { 9362 subupl = vector_upl->upl_elems[i]; 9363 subupl_state = vector_upl->upl_iostates[i]; 9364 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) { 9365 /* We could have been passed an offset/size pair that belongs 9366 * to an UPL element that has already been committed/aborted. 9367 * If so, return NULL. 9368 */ 9369 if(subupl == NULL) 9370 return NULL; 9371 if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) { 9372 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset; 9373 if(*upl_size > subupl_state.size) 9374 *upl_size = subupl_state.size; 9375 } 9376 if(*upl_offset >= subupl_state.offset) 9377 *upl_offset -= subupl_state.offset; 9378 else if(i) 9379 panic("Vector UPL offset miscalculation\n"); 9380 return subupl; 9381 } 9382 } 9383 } 9384 else 9385 panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n"); 9386 } 9387 return NULL; 9388} 9389 9390void 9391vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr) 9392{ 9393 *v_upl_submap = NULL; 9394 9395 if(vector_upl_is_valid(upl)) { 9396 vector_upl_t vector_upl = upl->vector_upl; 9397 if(vector_upl) { 9398 *v_upl_submap = vector_upl->submap; 9399 *submap_dst_addr = vector_upl->submap_dst_addr; 9400 } 9401 else 9402 panic("vector_upl_get_submap was passed a non-vectored UPL\n"); 9403 } 9404 else 9405 panic("vector_upl_get_submap was passed a null UPL\n"); 9406} 9407 9408void 9409vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr) 9410{ 9411 if(vector_upl_is_valid(upl)) { 9412 vector_upl_t vector_upl = upl->vector_upl; 9413 if(vector_upl) { 9414 vector_upl->submap = submap; 9415 vector_upl->submap_dst_addr = submap_dst_addr; 9416 } 9417 else 9418 panic("vector_upl_get_submap was passed a non-vectored UPL\n"); 9419 } 9420 else 9421 panic("vector_upl_get_submap was passed a NULL UPL\n"); 9422} 9423 9424void 9425vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size) 9426{ 9427 if(vector_upl_is_valid(upl)) { 9428 uint32_t i = 0; 9429 vector_upl_t vector_upl = upl->vector_upl; 9430 9431 if(vector_upl) { 9432 for(i = 0; i < vector_upl->num_upls; i++) { 9433 if(vector_upl->upl_elems[i] == subupl) 9434 break; 9435 } 9436 9437 if(i == vector_upl->num_upls) 9438 panic("setting sub-upl iostate when none exists"); 9439 9440 vector_upl->upl_iostates[i].offset = offset; 9441 if(size < PAGE_SIZE) 9442 size = PAGE_SIZE; 9443 vector_upl->upl_iostates[i].size = size; 9444 } 9445 else 9446 panic("vector_upl_set_iostate was passed a non-vectored UPL\n"); 9447 } 9448 else 9449 panic("vector_upl_set_iostate was passed a NULL UPL\n"); 9450} 9451 9452void 9453vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size) 9454{ 9455 if(vector_upl_is_valid(upl)) { 9456 uint32_t i = 0; 9457 vector_upl_t vector_upl = upl->vector_upl; 9458 9459 if(vector_upl) { 9460 for(i = 0; i < vector_upl->num_upls; i++) { 9461 if(vector_upl->upl_elems[i] == subupl) 9462 break; 9463 } 9464 9465 if(i == vector_upl->num_upls) 9466 panic("getting sub-upl iostate when none exists"); 9467 9468 *offset = vector_upl->upl_iostates[i].offset; 9469 *size = vector_upl->upl_iostates[i].size; 9470 } 9471 else 9472 panic("vector_upl_get_iostate was passed a non-vectored UPL\n"); 9473 } 9474 else 9475 panic("vector_upl_get_iostate was passed a NULL UPL\n"); 9476} 9477 9478void 9479vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size) 9480{ 9481 if(vector_upl_is_valid(upl)) { 9482 vector_upl_t vector_upl = upl->vector_upl; 9483 if(vector_upl) { 9484 if(index < vector_upl->num_upls) { 9485 *offset = vector_upl->upl_iostates[index].offset; 9486 *size = vector_upl->upl_iostates[index].size; 9487 } 9488 else 9489 *offset = *size = 0; 9490 } 9491 else 9492 panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n"); 9493 } 9494 else 9495 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n"); 9496} 9497 9498upl_page_info_t * 9499upl_get_internal_vectorupl_pagelist(upl_t upl) 9500{ 9501 return ((vector_upl_t)(upl->vector_upl))->pagelist; 9502} 9503 9504void * 9505upl_get_internal_vectorupl(upl_t upl) 9506{ 9507 return upl->vector_upl; 9508} 9509 9510vm_size_t 9511upl_get_internal_pagelist_offset(void) 9512{ 9513 return sizeof(struct upl); 9514} 9515 9516void 9517upl_clear_dirty( 9518 upl_t upl, 9519 boolean_t value) 9520{ 9521 if (value) { 9522 upl->flags |= UPL_CLEAR_DIRTY; 9523 } else { 9524 upl->flags &= ~UPL_CLEAR_DIRTY; 9525 } 9526} 9527 9528void 9529upl_set_referenced( 9530 upl_t upl, 9531 boolean_t value) 9532{ 9533 upl_lock(upl); 9534 if (value) { 9535 upl->ext_ref_count++; 9536 } else { 9537 if (!upl->ext_ref_count) { 9538 panic("upl_set_referenced not %p\n", upl); 9539 } 9540 upl->ext_ref_count--; 9541 } 9542 upl_unlock(upl); 9543} 9544 9545#if CONFIG_IOSCHED 9546void 9547upl_set_blkno( 9548 upl_t upl, 9549 vm_offset_t upl_offset, 9550 int io_size, 9551 int64_t blkno) 9552{ 9553 int i,j; 9554 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0) 9555 return; 9556 9557 assert(upl->upl_reprio_info != 0); 9558 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) { 9559 UPL_SET_REPRIO_INFO(upl, i, blkno, io_size); 9560 } 9561} 9562#endif 9563 9564boolean_t 9565vm_page_is_slideable(vm_page_t m) 9566{ 9567 boolean_t result = FALSE; 9568 vm_shared_region_slide_info_t si; 9569 9570 vm_object_lock_assert_held(m->object); 9571 9572 /* make sure our page belongs to the one object allowed to do this */ 9573 if (!m->object->object_slid) { 9574 goto done; 9575 } 9576 9577 si = m->object->vo_slide_info; 9578 if (si == NULL) { 9579 goto done; 9580 } 9581 9582 if(!m->slid && (si->start <= m->offset && si->end > m->offset)) { 9583 result = TRUE; 9584 } 9585 9586done: 9587 return result; 9588} 9589 9590int vm_page_slide_counter = 0; 9591int vm_page_slide_errors = 0; 9592kern_return_t 9593vm_page_slide( 9594 vm_page_t page, 9595 vm_map_offset_t kernel_mapping_offset) 9596{ 9597 kern_return_t kr; 9598 vm_map_size_t kernel_mapping_size; 9599 boolean_t kernel_mapping_needs_unmap; 9600 vm_offset_t kernel_vaddr; 9601 uint32_t pageIndex = 0; 9602 9603 assert(!page->slid); 9604 assert(page->object->object_slid); 9605 vm_object_lock_assert_exclusive(page->object); 9606 9607 if (page->error) 9608 return KERN_FAILURE; 9609 9610 /* 9611 * Take a paging-in-progress reference to keep the object 9612 * alive even if we have to unlock it (in vm_paging_map_object() 9613 * for example)... 9614 */ 9615 vm_object_paging_begin(page->object); 9616 9617 if (kernel_mapping_offset == 0) { 9618 /* 9619 * The page hasn't already been mapped in kernel space 9620 * by the caller. Map it now, so that we can access 9621 * its contents and decrypt them. 9622 */ 9623 kernel_mapping_size = PAGE_SIZE; 9624 kernel_mapping_needs_unmap = FALSE; 9625 kr = vm_paging_map_object(page, 9626 page->object, 9627 page->offset, 9628 VM_PROT_READ | VM_PROT_WRITE, 9629 FALSE, 9630 &kernel_mapping_size, 9631 &kernel_mapping_offset, 9632 &kernel_mapping_needs_unmap); 9633 if (kr != KERN_SUCCESS) { 9634 panic("vm_page_slide: " 9635 "could not map page in kernel: 0x%x\n", 9636 kr); 9637 } 9638 } else { 9639 kernel_mapping_size = 0; 9640 kernel_mapping_needs_unmap = FALSE; 9641 } 9642 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 9643 9644 /* 9645 * Slide the pointers on the page. 9646 */ 9647 9648 /*assert that slide_file_info.start/end are page-aligned?*/ 9649 9650 assert(!page->slid); 9651 assert(page->object->object_slid); 9652 9653 /* on some platforms this is an extern int, on others it's a cpp macro */ 9654 __unreachable_ok_push 9655 /* TODO: Consider this */ 9656 if (!TEST_PAGE_SIZE_4K) { 9657 for (int i = 0; i < 4; i++) { 9658 pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/0x1000); 9659 kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr + (0x1000*i), pageIndex + i); 9660 } 9661 } else { 9662 pageIndex = (uint32_t)((page->offset - page->object->vo_slide_info->start)/PAGE_SIZE); 9663 kr = vm_shared_region_slide_page(page->object->vo_slide_info, kernel_vaddr, pageIndex); 9664 } 9665 __unreachable_ok_pop 9666 9667 vm_page_slide_counter++; 9668 9669 /* 9670 * Unmap the page from the kernel's address space, 9671 */ 9672 if (kernel_mapping_needs_unmap) { 9673 vm_paging_unmap_object(page->object, 9674 kernel_vaddr, 9675 kernel_vaddr + PAGE_SIZE); 9676 } 9677 9678 page->dirty = FALSE; 9679 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); 9680 9681 if (kr != KERN_SUCCESS || cs_debug > 1) { 9682 printf("vm_page_slide(%p): " 9683 "obj %p off 0x%llx mobj %p moff 0x%llx\n", 9684 page, 9685 page->object, page->offset, 9686 page->object->pager, 9687 page->offset + page->object->paging_offset); 9688 } 9689 9690 if (kr == KERN_SUCCESS) { 9691 page->slid = TRUE; 9692 } else { 9693 page->error = TRUE; 9694 vm_page_slide_errors++; 9695 } 9696 9697 vm_object_paging_end(page->object); 9698 9699 return kr; 9700} 9701 9702void inline memoryshot(unsigned int event, unsigned int control) 9703{ 9704 if (vm_debug_events) { 9705 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control, 9706 vm_page_active_count, vm_page_inactive_count, 9707 vm_page_free_count, vm_page_speculative_count, 9708 vm_page_throttled_count); 9709 } else { 9710 (void) event; 9711 (void) control; 9712 } 9713 9714} 9715 9716#ifdef MACH_BSD 9717 9718boolean_t upl_device_page(upl_page_info_t *upl) 9719{ 9720 return(UPL_DEVICE_PAGE(upl)); 9721} 9722boolean_t upl_page_present(upl_page_info_t *upl, int index) 9723{ 9724 return(UPL_PAGE_PRESENT(upl, index)); 9725} 9726boolean_t upl_speculative_page(upl_page_info_t *upl, int index) 9727{ 9728 return(UPL_SPECULATIVE_PAGE(upl, index)); 9729} 9730boolean_t upl_dirty_page(upl_page_info_t *upl, int index) 9731{ 9732 return(UPL_DIRTY_PAGE(upl, index)); 9733} 9734boolean_t upl_valid_page(upl_page_info_t *upl, int index) 9735{ 9736 return(UPL_VALID_PAGE(upl, index)); 9737} 9738ppnum_t upl_phys_page(upl_page_info_t *upl, int index) 9739{ 9740 return(UPL_PHYS_PAGE(upl, index)); 9741} 9742 9743void 9744vm_countdirtypages(void) 9745{ 9746 vm_page_t m; 9747 int dpages; 9748 int pgopages; 9749 int precpages; 9750 9751 9752 dpages=0; 9753 pgopages=0; 9754 precpages=0; 9755 9756 vm_page_lock_queues(); 9757 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 9758 do { 9759 if (m ==(vm_page_t )0) break; 9760 9761 if(m->dirty) dpages++; 9762 if(m->pageout) pgopages++; 9763 if(m->precious) precpages++; 9764 9765 assert(m->object != kernel_object); 9766 m = (vm_page_t) queue_next(&m->pageq); 9767 if (m ==(vm_page_t )0) break; 9768 9769 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m)); 9770 vm_page_unlock_queues(); 9771 9772 vm_page_lock_queues(); 9773 m = (vm_page_t) queue_first(&vm_page_queue_throttled); 9774 do { 9775 if (m ==(vm_page_t )0) break; 9776 9777 dpages++; 9778 assert(m->dirty); 9779 assert(!m->pageout); 9780 assert(m->object != kernel_object); 9781 m = (vm_page_t) queue_next(&m->pageq); 9782 if (m ==(vm_page_t )0) break; 9783 9784 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m)); 9785 vm_page_unlock_queues(); 9786 9787 vm_page_lock_queues(); 9788 m = (vm_page_t) queue_first(&vm_page_queue_anonymous); 9789 do { 9790 if (m ==(vm_page_t )0) break; 9791 9792 if(m->dirty) dpages++; 9793 if(m->pageout) pgopages++; 9794 if(m->precious) precpages++; 9795 9796 assert(m->object != kernel_object); 9797 m = (vm_page_t) queue_next(&m->pageq); 9798 if (m ==(vm_page_t )0) break; 9799 9800 } while (!queue_end(&vm_page_queue_anonymous,(queue_entry_t) m)); 9801 vm_page_unlock_queues(); 9802 9803 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages); 9804 9805 dpages=0; 9806 pgopages=0; 9807 precpages=0; 9808 9809 vm_page_lock_queues(); 9810 m = (vm_page_t) queue_first(&vm_page_queue_active); 9811 9812 do { 9813 if(m == (vm_page_t )0) break; 9814 if(m->dirty) dpages++; 9815 if(m->pageout) pgopages++; 9816 if(m->precious) precpages++; 9817 9818 assert(m->object != kernel_object); 9819 m = (vm_page_t) queue_next(&m->pageq); 9820 if(m == (vm_page_t )0) break; 9821 9822 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m)); 9823 vm_page_unlock_queues(); 9824 9825 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages); 9826 9827} 9828#endif /* MACH_BSD */ 9829 9830ppnum_t upl_get_highest_page( 9831 upl_t upl) 9832{ 9833 return upl->highest_page; 9834} 9835 9836upl_size_t upl_get_size( 9837 upl_t upl) 9838{ 9839 return upl->size; 9840} 9841 9842#if UPL_DEBUG 9843kern_return_t upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2) 9844{ 9845 upl->ubc_alias1 = alias1; 9846 upl->ubc_alias2 = alias2; 9847 return KERN_SUCCESS; 9848} 9849int upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2) 9850{ 9851 if(al) 9852 *al = upl->ubc_alias1; 9853 if(al2) 9854 *al2 = upl->ubc_alias2; 9855 return KERN_SUCCESS; 9856} 9857#endif /* UPL_DEBUG */ 9858 9859#if VM_PRESSURE_EVENTS 9860/* 9861 * Upward trajectory. 9862 */ 9863extern boolean_t vm_compressor_low_on_space(void); 9864 9865boolean_t 9866VM_PRESSURE_NORMAL_TO_WARNING(void) { 9867 9868 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) { 9869 9870 /* Available pages below our threshold */ 9871 if (memorystatus_available_pages < memorystatus_available_pages_pressure) { 9872 /* No frozen processes to kill */ 9873 if (memorystatus_frozen_count == 0) { 9874 /* Not enough suspended processes available. */ 9875 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) { 9876 return TRUE; 9877 } 9878 } 9879 } 9880 return FALSE; 9881 9882 } else { 9883 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0); 9884 } 9885} 9886 9887boolean_t 9888VM_PRESSURE_WARNING_TO_CRITICAL(void) { 9889 9890 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) { 9891 /* Available pages below our threshold */ 9892 if (memorystatus_available_pages < memorystatus_available_pages_critical) { 9893 return TRUE; 9894 } 9895 return FALSE; 9896 } else { 9897 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0); 9898 } 9899} 9900 9901/* 9902 * Downward trajectory. 9903 */ 9904boolean_t 9905VM_PRESSURE_WARNING_TO_NORMAL(void) { 9906 9907 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) { 9908 /* Available pages above our threshold */ 9909 unsigned int target_threshold = memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100); 9910 if (memorystatus_available_pages > target_threshold) { 9911 return TRUE; 9912 } 9913 return FALSE; 9914 } else { 9915 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0); 9916 } 9917} 9918 9919boolean_t 9920VM_PRESSURE_CRITICAL_TO_WARNING(void) { 9921 9922 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_SWAPLESS) { 9923 /* Available pages above our threshold */ 9924 unsigned int target_threshold = memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100); 9925 if (memorystatus_available_pages > target_threshold) { 9926 return TRUE; 9927 } 9928 return FALSE; 9929 } else { 9930 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0); 9931 } 9932} 9933#endif /* VM_PRESSURE_EVENTS */ 9934 9935