1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_object.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * 62 * Virtual memory object module. 63 */ 64 65#include <debug.h> 66#include <mach_pagemap.h> 67#include <task_swapper.h> 68 69#include <mach/mach_types.h> 70#include <mach/memory_object.h> 71#include <mach/memory_object_default.h> 72#include <mach/memory_object_control_server.h> 73#include <mach/vm_param.h> 74 75#include <mach/sdt.h> 76 77#include <ipc/ipc_types.h> 78#include <ipc/ipc_port.h> 79 80#include <kern/kern_types.h> 81#include <kern/assert.h> 82#include <kern/lock.h> 83#include <kern/queue.h> 84#include <kern/xpr.h> 85#include <kern/kalloc.h> 86#include <kern/zalloc.h> 87#include <kern/host.h> 88#include <kern/host_statistics.h> 89#include <kern/processor.h> 90#include <kern/misc_protos.h> 91 92#include <vm/memory_object.h> 93#include <vm/vm_compressor_pager.h> 94#include <vm/vm_fault.h> 95#include <vm/vm_map.h> 96#include <vm/vm_object.h> 97#include <vm/vm_page.h> 98#include <vm/vm_pageout.h> 99#include <vm/vm_protos.h> 100#include <vm/vm_purgeable_internal.h> 101 102#include <vm/vm_compressor.h> 103 104/* 105 * Virtual memory objects maintain the actual data 106 * associated with allocated virtual memory. A given 107 * page of memory exists within exactly one object. 108 * 109 * An object is only deallocated when all "references" 110 * are given up. 111 * 112 * Associated with each object is a list of all resident 113 * memory pages belonging to that object; this list is 114 * maintained by the "vm_page" module, but locked by the object's 115 * lock. 116 * 117 * Each object also records the memory object reference 118 * that is used by the kernel to request and write 119 * back data (the memory object, field "pager"), etc... 120 * 121 * Virtual memory objects are allocated to provide 122 * zero-filled memory (vm_allocate) or map a user-defined 123 * memory object into a virtual address space (vm_map). 124 * 125 * Virtual memory objects that refer to a user-defined 126 * memory object are called "permanent", because all changes 127 * made in virtual memory are reflected back to the 128 * memory manager, which may then store it permanently. 129 * Other virtual memory objects are called "temporary", 130 * meaning that changes need be written back only when 131 * necessary to reclaim pages, and that storage associated 132 * with the object can be discarded once it is no longer 133 * mapped. 134 * 135 * A permanent memory object may be mapped into more 136 * than one virtual address space. Moreover, two threads 137 * may attempt to make the first mapping of a memory 138 * object concurrently. Only one thread is allowed to 139 * complete this mapping; all others wait for the 140 * "pager_initialized" field is asserted, indicating 141 * that the first thread has initialized all of the 142 * necessary fields in the virtual memory object structure. 143 * 144 * The kernel relies on a *default memory manager* to 145 * provide backing storage for the zero-filled virtual 146 * memory objects. The pager memory objects associated 147 * with these temporary virtual memory objects are only 148 * requested from the default memory manager when it 149 * becomes necessary. Virtual memory objects 150 * that depend on the default memory manager are called 151 * "internal". The "pager_created" field is provided to 152 * indicate whether these ports have ever been allocated. 153 * 154 * The kernel may also create virtual memory objects to 155 * hold changed pages after a copy-on-write operation. 156 * In this case, the virtual memory object (and its 157 * backing storage -- its memory object) only contain 158 * those pages that have been changed. The "shadow" 159 * field refers to the virtual memory object that contains 160 * the remainder of the contents. The "shadow_offset" 161 * field indicates where in the "shadow" these contents begin. 162 * The "copy" field refers to a virtual memory object 163 * to which changed pages must be copied before changing 164 * this object, in order to implement another form 165 * of copy-on-write optimization. 166 * 167 * The virtual memory object structure also records 168 * the attributes associated with its memory object. 169 * The "pager_ready", "can_persist" and "copy_strategy" 170 * fields represent those attributes. The "cached_list" 171 * field is used in the implementation of the persistence 172 * attribute. 173 * 174 * ZZZ Continue this comment. 175 */ 176 177/* Forward declarations for internal functions. */ 178static kern_return_t vm_object_terminate( 179 vm_object_t object); 180 181extern void vm_object_remove( 182 vm_object_t object); 183 184static kern_return_t vm_object_copy_call( 185 vm_object_t src_object, 186 vm_object_offset_t src_offset, 187 vm_object_size_t size, 188 vm_object_t *_result_object); 189 190static void vm_object_do_collapse( 191 vm_object_t object, 192 vm_object_t backing_object); 193 194static void vm_object_do_bypass( 195 vm_object_t object, 196 vm_object_t backing_object); 197 198static void vm_object_release_pager( 199 memory_object_t pager, 200 boolean_t hashed); 201 202static zone_t vm_object_zone; /* vm backing store zone */ 203 204/* 205 * All wired-down kernel memory belongs to a single virtual 206 * memory object (kernel_object) to avoid wasting data structures. 207 */ 208static struct vm_object kernel_object_store; 209vm_object_t kernel_object; 210 211static struct vm_object compressor_object_store; 212vm_object_t compressor_object = &compressor_object_store; 213 214/* 215 * The submap object is used as a placeholder for vm_map_submap 216 * operations. The object is declared in vm_map.c because it 217 * is exported by the vm_map module. The storage is declared 218 * here because it must be initialized here. 219 */ 220static struct vm_object vm_submap_object_store; 221 222/* 223 * Virtual memory objects are initialized from 224 * a template (see vm_object_allocate). 225 * 226 * When adding a new field to the virtual memory 227 * object structure, be sure to add initialization 228 * (see _vm_object_allocate()). 229 */ 230static struct vm_object vm_object_template; 231 232unsigned int vm_page_purged_wired = 0; 233unsigned int vm_page_purged_busy = 0; 234unsigned int vm_page_purged_others = 0; 235 236#if VM_OBJECT_CACHE 237/* 238 * Virtual memory objects that are not referenced by 239 * any address maps, but that are allowed to persist 240 * (an attribute specified by the associated memory manager), 241 * are kept in a queue (vm_object_cached_list). 242 * 243 * When an object from this queue is referenced again, 244 * for example to make another address space mapping, 245 * it must be removed from the queue. That is, the 246 * queue contains *only* objects with zero references. 247 * 248 * The kernel may choose to terminate objects from this 249 * queue in order to reclaim storage. The current policy 250 * is to permit a fixed maximum number of unreferenced 251 * objects (vm_object_cached_max). 252 * 253 * A spin lock (accessed by routines 254 * vm_object_cache_{lock,lock_try,unlock}) governs the 255 * object cache. It must be held when objects are 256 * added to or removed from the cache (in vm_object_terminate). 257 * The routines that acquire a reference to a virtual 258 * memory object based on one of the memory object ports 259 * must also lock the cache. 260 * 261 * Ideally, the object cache should be more isolated 262 * from the reference mechanism, so that the lock need 263 * not be held to make simple references. 264 */ 265static vm_object_t vm_object_cache_trim( 266 boolean_t called_from_vm_object_deallocate); 267 268static void vm_object_deactivate_all_pages( 269 vm_object_t object); 270 271static int vm_object_cached_high; /* highest # cached objects */ 272static int vm_object_cached_max = 512; /* may be patched*/ 273 274#define vm_object_cache_lock() \ 275 lck_mtx_lock(&vm_object_cached_lock_data) 276#define vm_object_cache_lock_try() \ 277 lck_mtx_try_lock(&vm_object_cached_lock_data) 278 279#endif /* VM_OBJECT_CACHE */ 280 281static queue_head_t vm_object_cached_list; 282static uint32_t vm_object_cache_pages_freed = 0; 283static uint32_t vm_object_cache_pages_moved = 0; 284static uint32_t vm_object_cache_pages_skipped = 0; 285static uint32_t vm_object_cache_adds = 0; 286static uint32_t vm_object_cached_count = 0; 287static lck_mtx_t vm_object_cached_lock_data; 288static lck_mtx_ext_t vm_object_cached_lock_data_ext; 289 290static uint32_t vm_object_page_grab_failed = 0; 291static uint32_t vm_object_page_grab_skipped = 0; 292static uint32_t vm_object_page_grab_returned = 0; 293static uint32_t vm_object_page_grab_pmapped = 0; 294static uint32_t vm_object_page_grab_reactivations = 0; 295 296#define vm_object_cache_lock_spin() \ 297 lck_mtx_lock_spin(&vm_object_cached_lock_data) 298#define vm_object_cache_unlock() \ 299 lck_mtx_unlock(&vm_object_cached_lock_data) 300 301static void vm_object_cache_remove_locked(vm_object_t); 302 303 304#define VM_OBJECT_HASH_COUNT 1024 305#define VM_OBJECT_HASH_LOCK_COUNT 512 306 307static lck_mtx_t vm_object_hashed_lock_data[VM_OBJECT_HASH_LOCK_COUNT]; 308static lck_mtx_ext_t vm_object_hashed_lock_data_ext[VM_OBJECT_HASH_LOCK_COUNT]; 309 310static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT]; 311static struct zone *vm_object_hash_zone; 312 313struct vm_object_hash_entry { 314 queue_chain_t hash_link; /* hash chain link */ 315 memory_object_t pager; /* pager we represent */ 316 vm_object_t object; /* corresponding object */ 317 boolean_t waiting; /* someone waiting for 318 * termination */ 319}; 320 321typedef struct vm_object_hash_entry *vm_object_hash_entry_t; 322#define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0) 323 324#define VM_OBJECT_HASH_SHIFT 5 325#define vm_object_hash(pager) \ 326 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)) 327 328#define vm_object_lock_hash(pager) \ 329 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_LOCK_COUNT)) 330 331void vm_object_hash_entry_free( 332 vm_object_hash_entry_t entry); 333 334static void vm_object_reap(vm_object_t object); 335static void vm_object_reap_async(vm_object_t object); 336static void vm_object_reaper_thread(void); 337 338static lck_mtx_t vm_object_reaper_lock_data; 339static lck_mtx_ext_t vm_object_reaper_lock_data_ext; 340 341static queue_head_t vm_object_reaper_queue; /* protected by vm_object_reaper_lock() */ 342unsigned int vm_object_reap_count = 0; 343unsigned int vm_object_reap_count_async = 0; 344 345#define vm_object_reaper_lock() \ 346 lck_mtx_lock(&vm_object_reaper_lock_data) 347#define vm_object_reaper_lock_spin() \ 348 lck_mtx_lock_spin(&vm_object_reaper_lock_data) 349#define vm_object_reaper_unlock() \ 350 lck_mtx_unlock(&vm_object_reaper_lock_data) 351 352#if 0 353#undef KERNEL_DEBUG 354#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT 355#endif 356 357 358static lck_mtx_t * 359vm_object_hash_lock_spin( 360 memory_object_t pager) 361{ 362 int index; 363 364 index = vm_object_lock_hash(pager); 365 366 lck_mtx_lock_spin(&vm_object_hashed_lock_data[index]); 367 368 return (&vm_object_hashed_lock_data[index]); 369} 370 371static void 372vm_object_hash_unlock(lck_mtx_t *lck) 373{ 374 lck_mtx_unlock(lck); 375} 376 377 378/* 379 * vm_object_hash_lookup looks up a pager in the hashtable 380 * and returns the corresponding entry, with optional removal. 381 */ 382static vm_object_hash_entry_t 383vm_object_hash_lookup( 384 memory_object_t pager, 385 boolean_t remove_entry) 386{ 387 queue_t bucket; 388 vm_object_hash_entry_t entry; 389 390 bucket = &vm_object_hashtable[vm_object_hash(pager)]; 391 392 entry = (vm_object_hash_entry_t)queue_first(bucket); 393 while (!queue_end(bucket, (queue_entry_t)entry)) { 394 if (entry->pager == pager) { 395 if (remove_entry) { 396 queue_remove(bucket, entry, 397 vm_object_hash_entry_t, hash_link); 398 } 399 return(entry); 400 } 401 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link); 402 } 403 return(VM_OBJECT_HASH_ENTRY_NULL); 404} 405 406/* 407 * vm_object_hash_enter enters the specified 408 * pager / cache object association in the hashtable. 409 */ 410 411static void 412vm_object_hash_insert( 413 vm_object_hash_entry_t entry, 414 vm_object_t object) 415{ 416 queue_t bucket; 417 418 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)]; 419 420 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link); 421 422 entry->object = object; 423 object->hashed = TRUE; 424} 425 426static vm_object_hash_entry_t 427vm_object_hash_entry_alloc( 428 memory_object_t pager) 429{ 430 vm_object_hash_entry_t entry; 431 432 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone); 433 entry->pager = pager; 434 entry->object = VM_OBJECT_NULL; 435 entry->waiting = FALSE; 436 437 return(entry); 438} 439 440void 441vm_object_hash_entry_free( 442 vm_object_hash_entry_t entry) 443{ 444 zfree(vm_object_hash_zone, entry); 445} 446 447/* 448 * vm_object_allocate: 449 * 450 * Returns a new object with the given size. 451 */ 452 453__private_extern__ void 454_vm_object_allocate( 455 vm_object_size_t size, 456 vm_object_t object) 457{ 458 XPR(XPR_VM_OBJECT, 459 "vm_object_allocate, object 0x%X size 0x%X\n", 460 object, size, 0,0,0); 461 462 *object = vm_object_template; 463 queue_init(&object->memq); 464 queue_init(&object->msr_q); 465#if UPL_DEBUG 466 queue_init(&object->uplq); 467#endif /* UPL_DEBUG */ 468 vm_object_lock_init(object); 469 object->vo_size = size; 470} 471 472__private_extern__ vm_object_t 473vm_object_allocate( 474 vm_object_size_t size) 475{ 476 register vm_object_t object; 477 478 object = (vm_object_t) zalloc(vm_object_zone); 479 480// dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */ 481 482 if (object != VM_OBJECT_NULL) 483 _vm_object_allocate(size, object); 484 485 return object; 486} 487 488 489lck_grp_t vm_object_lck_grp; 490lck_grp_t vm_object_cache_lck_grp; 491lck_grp_attr_t vm_object_lck_grp_attr; 492lck_attr_t vm_object_lck_attr; 493lck_attr_t kernel_object_lck_attr; 494lck_attr_t compressor_object_lck_attr; 495 496/* 497 * vm_object_bootstrap: 498 * 499 * Initialize the VM objects module. 500 */ 501__private_extern__ void 502vm_object_bootstrap(void) 503{ 504 register int i; 505 506 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object), 507 round_page(512*1024), 508 round_page(12*1024), 509 "vm objects"); 510 zone_change(vm_object_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ 511 zone_change(vm_object_zone, Z_NOENCRYPT, TRUE); 512 513 vm_object_init_lck_grp(); 514 515 queue_init(&vm_object_cached_list); 516 517 lck_mtx_init_ext(&vm_object_cached_lock_data, 518 &vm_object_cached_lock_data_ext, 519 &vm_object_cache_lck_grp, 520 &vm_object_lck_attr); 521 522 queue_init(&vm_object_reaper_queue); 523 524 for (i = 0; i < VM_OBJECT_HASH_LOCK_COUNT; i++) { 525 lck_mtx_init_ext(&vm_object_hashed_lock_data[i], 526 &vm_object_hashed_lock_data_ext[i], 527 &vm_object_lck_grp, 528 &vm_object_lck_attr); 529 } 530 lck_mtx_init_ext(&vm_object_reaper_lock_data, 531 &vm_object_reaper_lock_data_ext, 532 &vm_object_lck_grp, 533 &vm_object_lck_attr); 534 535 vm_object_hash_zone = 536 zinit((vm_size_t) sizeof (struct vm_object_hash_entry), 537 round_page(512*1024), 538 round_page(12*1024), 539 "vm object hash entries"); 540 zone_change(vm_object_hash_zone, Z_CALLERACCT, FALSE); 541 zone_change(vm_object_hash_zone, Z_NOENCRYPT, TRUE); 542 543 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++) 544 queue_init(&vm_object_hashtable[i]); 545 546 547 /* 548 * Fill in a template object, for quick initialization 549 */ 550 551 /* memq; Lock; init after allocation */ 552 vm_object_template.memq.prev = NULL; 553 vm_object_template.memq.next = NULL; 554#if 0 555 /* 556 * We can't call vm_object_lock_init() here because that will 557 * allocate some memory and VM is not fully initialized yet. 558 * The lock will be initialized for each allocated object in 559 * _vm_object_allocate(), so we don't need to initialize it in 560 * the vm_object_template. 561 */ 562 vm_object_lock_init(&vm_object_template); 563#endif 564 vm_object_template.vo_size = 0; 565 vm_object_template.memq_hint = VM_PAGE_NULL; 566 vm_object_template.ref_count = 1; 567#if TASK_SWAPPER 568 vm_object_template.res_count = 1; 569#endif /* TASK_SWAPPER */ 570 vm_object_template.resident_page_count = 0; 571 vm_object_template.wired_page_count = 0; 572 vm_object_template.reusable_page_count = 0; 573 vm_object_template.copy = VM_OBJECT_NULL; 574 vm_object_template.shadow = VM_OBJECT_NULL; 575 vm_object_template.vo_shadow_offset = (vm_object_offset_t) 0; 576 vm_object_template.pager = MEMORY_OBJECT_NULL; 577 vm_object_template.paging_offset = 0; 578 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL; 579 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC; 580 vm_object_template.paging_in_progress = 0; 581 vm_object_template.activity_in_progress = 0; 582 583 /* Begin bitfields */ 584 vm_object_template.all_wanted = 0; /* all bits FALSE */ 585 vm_object_template.pager_created = FALSE; 586 vm_object_template.pager_initialized = FALSE; 587 vm_object_template.pager_ready = FALSE; 588 vm_object_template.pager_trusted = FALSE; 589 vm_object_template.can_persist = FALSE; 590 vm_object_template.internal = TRUE; 591 vm_object_template.temporary = TRUE; 592 vm_object_template.private = FALSE; 593 vm_object_template.pageout = FALSE; 594 vm_object_template.alive = TRUE; 595 vm_object_template.purgable = VM_PURGABLE_DENY; 596 vm_object_template.purgeable_when_ripe = FALSE; 597 vm_object_template.shadowed = FALSE; 598 vm_object_template.advisory_pageout = FALSE; 599 vm_object_template.true_share = FALSE; 600 vm_object_template.terminating = FALSE; 601 vm_object_template.named = FALSE; 602 vm_object_template.shadow_severed = FALSE; 603 vm_object_template.phys_contiguous = FALSE; 604 vm_object_template.nophyscache = FALSE; 605 /* End bitfields */ 606 607 vm_object_template.cached_list.prev = NULL; 608 vm_object_template.cached_list.next = NULL; 609 vm_object_template.msr_q.prev = NULL; 610 vm_object_template.msr_q.next = NULL; 611 612 vm_object_template.last_alloc = (vm_object_offset_t) 0; 613 vm_object_template.sequential = (vm_object_offset_t) 0; 614 vm_object_template.pages_created = 0; 615 vm_object_template.pages_used = 0; 616 vm_object_template.scan_collisions = 0; 617 618#if MACH_PAGEMAP 619 vm_object_template.existence_map = VM_EXTERNAL_NULL; 620#endif /* MACH_PAGEMAP */ 621 vm_object_template.cow_hint = ~(vm_offset_t)0; 622#if MACH_ASSERT 623 vm_object_template.paging_object = VM_OBJECT_NULL; 624#endif /* MACH_ASSERT */ 625 626 /* cache bitfields */ 627 vm_object_template.wimg_bits = VM_WIMG_USE_DEFAULT; 628 vm_object_template.set_cache_attr = FALSE; 629 vm_object_template.object_slid = FALSE; 630 vm_object_template.code_signed = FALSE; 631 vm_object_template.hashed = FALSE; 632 vm_object_template.transposed = FALSE; 633 vm_object_template.mapping_in_progress = FALSE; 634 vm_object_template.volatile_empty = FALSE; 635 vm_object_template.volatile_fault = FALSE; 636 vm_object_template.all_reusable = FALSE; 637 vm_object_template.blocked_access = FALSE; 638 vm_object_template.__object2_unused_bits = 0; 639#if UPL_DEBUG 640 vm_object_template.uplq.prev = NULL; 641 vm_object_template.uplq.next = NULL; 642#endif /* UPL_DEBUG */ 643#ifdef VM_PIP_DEBUG 644 bzero(&vm_object_template.pip_holders, 645 sizeof (vm_object_template.pip_holders)); 646#endif /* VM_PIP_DEBUG */ 647 648 vm_object_template.objq.next=NULL; 649 vm_object_template.objq.prev=NULL; 650 651 vm_object_template.purgeable_queue_type = PURGEABLE_Q_TYPE_MAX; 652 vm_object_template.purgeable_queue_group = 0; 653 654 vm_object_template.vo_cache_ts = 0; 655 656 /* 657 * Initialize the "kernel object" 658 */ 659 660 kernel_object = &kernel_object_store; 661 662/* 663 * Note that in the following size specifications, we need to add 1 because 664 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size. 665 */ 666 667#ifdef ppc 668 _vm_object_allocate(vm_last_addr + 1, 669 kernel_object); 670#else 671 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, 672 kernel_object); 673 674 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, 675 compressor_object); 676#endif 677 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 678 compressor_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 679 680 /* 681 * Initialize the "submap object". Make it as large as the 682 * kernel object so that no limit is imposed on submap sizes. 683 */ 684 685 vm_submap_object = &vm_submap_object_store; 686#ifdef ppc 687 _vm_object_allocate(vm_last_addr + 1, 688 vm_submap_object); 689#else 690 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, 691 vm_submap_object); 692#endif 693 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 694 695 /* 696 * Create an "extra" reference to this object so that we never 697 * try to deallocate it; zfree doesn't like to be called with 698 * non-zone memory. 699 */ 700 vm_object_reference(vm_submap_object); 701 702#if MACH_PAGEMAP 703 vm_external_module_initialize(); 704#endif /* MACH_PAGEMAP */ 705} 706 707void 708vm_object_reaper_init(void) 709{ 710 kern_return_t kr; 711 thread_t thread; 712 713 kr = kernel_thread_start_priority( 714 (thread_continue_t) vm_object_reaper_thread, 715 NULL, 716 BASEPRI_PREEMPT - 1, 717 &thread); 718 if (kr != KERN_SUCCESS) { 719 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr); 720 } 721 thread_deallocate(thread); 722} 723 724__private_extern__ void 725vm_object_init(void) 726{ 727 /* 728 * Finish initializing the kernel object. 729 */ 730} 731 732 733__private_extern__ void 734vm_object_init_lck_grp(void) 735{ 736 /* 737 * initialze the vm_object lock world 738 */ 739 lck_grp_attr_setdefault(&vm_object_lck_grp_attr); 740 lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr); 741 lck_grp_init(&vm_object_cache_lck_grp, "vm_object_cache", &vm_object_lck_grp_attr); 742 lck_attr_setdefault(&vm_object_lck_attr); 743 lck_attr_setdefault(&kernel_object_lck_attr); 744 lck_attr_cleardebug(&kernel_object_lck_attr); 745 lck_attr_setdefault(&compressor_object_lck_attr); 746 lck_attr_cleardebug(&compressor_object_lck_attr); 747} 748 749#if VM_OBJECT_CACHE 750#define MIGHT_NOT_CACHE_SHADOWS 1 751#if MIGHT_NOT_CACHE_SHADOWS 752static int cache_shadows = TRUE; 753#endif /* MIGHT_NOT_CACHE_SHADOWS */ 754#endif 755 756/* 757 * vm_object_deallocate: 758 * 759 * Release a reference to the specified object, 760 * gained either through a vm_object_allocate 761 * or a vm_object_reference call. When all references 762 * are gone, storage associated with this object 763 * may be relinquished. 764 * 765 * No object may be locked. 766 */ 767unsigned long vm_object_deallocate_shared_successes = 0; 768unsigned long vm_object_deallocate_shared_failures = 0; 769unsigned long vm_object_deallocate_shared_swap_failures = 0; 770__private_extern__ void 771vm_object_deallocate( 772 register vm_object_t object) 773{ 774#if VM_OBJECT_CACHE 775 boolean_t retry_cache_trim = FALSE; 776 uint32_t try_failed_count = 0; 777#endif 778 vm_object_t shadow = VM_OBJECT_NULL; 779 780// if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */ 781// else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */ 782 783 if (object == VM_OBJECT_NULL) 784 return; 785 786 if (object == kernel_object || object == compressor_object) { 787 vm_object_lock_shared(object); 788 789 OSAddAtomic(-1, &object->ref_count); 790 791 if (object->ref_count == 0) { 792 if (object == kernel_object) 793 panic("vm_object_deallocate: losing kernel_object\n"); 794 else 795 panic("vm_object_deallocate: losing compressor_object\n"); 796 } 797 vm_object_unlock(object); 798 return; 799 } 800 801 if (object->ref_count > 2 || 802 (!object->named && object->ref_count > 1)) { 803 UInt32 original_ref_count; 804 volatile UInt32 *ref_count_p; 805 Boolean atomic_swap; 806 807 /* 808 * The object currently looks like it is not being 809 * kept alive solely by the reference we're about to release. 810 * Let's try and release our reference without taking 811 * all the locks we would need if we had to terminate the 812 * object (cache lock + exclusive object lock). 813 * Lock the object "shared" to make sure we don't race with 814 * anyone holding it "exclusive". 815 */ 816 vm_object_lock_shared(object); 817 ref_count_p = (volatile UInt32 *) &object->ref_count; 818 original_ref_count = object->ref_count; 819 /* 820 * Test again as "ref_count" could have changed. 821 * "named" shouldn't change. 822 */ 823 if (original_ref_count > 2 || 824 (!object->named && original_ref_count > 1)) { 825 atomic_swap = OSCompareAndSwap( 826 original_ref_count, 827 original_ref_count - 1, 828 (UInt32 *) &object->ref_count); 829 if (atomic_swap == FALSE) { 830 vm_object_deallocate_shared_swap_failures++; 831 } 832 833 } else { 834 atomic_swap = FALSE; 835 } 836 vm_object_unlock(object); 837 838 if (atomic_swap) { 839 /* 840 * ref_count was updated atomically ! 841 */ 842 vm_object_deallocate_shared_successes++; 843 return; 844 } 845 846 /* 847 * Someone else updated the ref_count at the same 848 * time and we lost the race. Fall back to the usual 849 * slow but safe path... 850 */ 851 vm_object_deallocate_shared_failures++; 852 } 853 854 while (object != VM_OBJECT_NULL) { 855 856 vm_object_lock(object); 857 858 assert(object->ref_count > 0); 859 860 /* 861 * If the object has a named reference, and only 862 * that reference would remain, inform the pager 863 * about the last "mapping" reference going away. 864 */ 865 if ((object->ref_count == 2) && (object->named)) { 866 memory_object_t pager = object->pager; 867 868 /* Notify the Pager that there are no */ 869 /* more mappers for this object */ 870 871 if (pager != MEMORY_OBJECT_NULL) { 872 vm_object_mapping_wait(object, THREAD_UNINT); 873 vm_object_mapping_begin(object); 874 vm_object_unlock(object); 875 876 memory_object_last_unmap(pager); 877 878 vm_object_lock(object); 879 vm_object_mapping_end(object); 880 } 881 assert(object->ref_count > 0); 882 } 883 884 /* 885 * Lose the reference. If other references 886 * remain, then we are done, unless we need 887 * to retry a cache trim. 888 * If it is the last reference, then keep it 889 * until any pending initialization is completed. 890 */ 891 892 /* if the object is terminating, it cannot go into */ 893 /* the cache and we obviously should not call */ 894 /* terminate again. */ 895 896 if ((object->ref_count > 1) || object->terminating) { 897 vm_object_lock_assert_exclusive(object); 898 object->ref_count--; 899 vm_object_res_deallocate(object); 900 901 if (object->ref_count == 1 && 902 object->shadow != VM_OBJECT_NULL) { 903 /* 904 * There's only one reference left on this 905 * VM object. We can't tell if it's a valid 906 * one (from a mapping for example) or if this 907 * object is just part of a possibly stale and 908 * useless shadow chain. 909 * We would like to try and collapse it into 910 * its parent, but we don't have any pointers 911 * back to this parent object. 912 * But we can try and collapse this object with 913 * its own shadows, in case these are useless 914 * too... 915 * We can't bypass this object though, since we 916 * don't know if this last reference on it is 917 * meaningful or not. 918 */ 919 vm_object_collapse(object, 0, FALSE); 920 } 921 vm_object_unlock(object); 922#if VM_OBJECT_CACHE 923 if (retry_cache_trim && 924 ((object = vm_object_cache_trim(TRUE)) != 925 VM_OBJECT_NULL)) { 926 continue; 927 } 928#endif 929 return; 930 } 931 932 /* 933 * We have to wait for initialization 934 * before destroying or caching the object. 935 */ 936 937 if (object->pager_created && ! object->pager_initialized) { 938 assert(! object->can_persist); 939 vm_object_assert_wait(object, 940 VM_OBJECT_EVENT_INITIALIZED, 941 THREAD_UNINT); 942 vm_object_unlock(object); 943 944 thread_block(THREAD_CONTINUE_NULL); 945 continue; 946 } 947 948#if VM_OBJECT_CACHE 949 /* 950 * If this object can persist, then enter it in 951 * the cache. Otherwise, terminate it. 952 * 953 * NOTE: Only permanent objects are cached, and 954 * permanent objects cannot have shadows. This 955 * affects the residence counting logic in a minor 956 * way (can do it in-line, mostly). 957 */ 958 959 if ((object->can_persist) && (object->alive)) { 960 /* 961 * Now it is safe to decrement reference count, 962 * and to return if reference count is > 0. 963 */ 964 965 vm_object_lock_assert_exclusive(object); 966 if (--object->ref_count > 0) { 967 vm_object_res_deallocate(object); 968 vm_object_unlock(object); 969 970 if (retry_cache_trim && 971 ((object = vm_object_cache_trim(TRUE)) != 972 VM_OBJECT_NULL)) { 973 continue; 974 } 975 return; 976 } 977 978#if MIGHT_NOT_CACHE_SHADOWS 979 /* 980 * Remove shadow now if we don't 981 * want to cache shadows. 982 */ 983 if (! cache_shadows) { 984 shadow = object->shadow; 985 object->shadow = VM_OBJECT_NULL; 986 } 987#endif /* MIGHT_NOT_CACHE_SHADOWS */ 988 989 /* 990 * Enter the object onto the queue of 991 * cached objects, and deactivate 992 * all of its pages. 993 */ 994 assert(object->shadow == VM_OBJECT_NULL); 995 VM_OBJ_RES_DECR(object); 996 XPR(XPR_VM_OBJECT, 997 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n", 998 object, 999 vm_object_cached_list.next, 1000 vm_object_cached_list.prev,0,0); 1001 1002 1003 vm_object_unlock(object); 1004 1005 try_failed_count = 0; 1006 for (;;) { 1007 vm_object_cache_lock(); 1008 1009 /* 1010 * if we try to take a regular lock here 1011 * we risk deadlocking against someone 1012 * holding a lock on this object while 1013 * trying to vm_object_deallocate a different 1014 * object 1015 */ 1016 if (vm_object_lock_try(object)) 1017 break; 1018 vm_object_cache_unlock(); 1019 try_failed_count++; 1020 1021 mutex_pause(try_failed_count); /* wait a bit */ 1022 } 1023 vm_object_cached_count++; 1024 if (vm_object_cached_count > vm_object_cached_high) 1025 vm_object_cached_high = vm_object_cached_count; 1026 queue_enter(&vm_object_cached_list, object, 1027 vm_object_t, cached_list); 1028 vm_object_cache_unlock(); 1029 1030 vm_object_deactivate_all_pages(object); 1031 vm_object_unlock(object); 1032 1033#if MIGHT_NOT_CACHE_SHADOWS 1034 /* 1035 * If we have a shadow that we need 1036 * to deallocate, do so now, remembering 1037 * to trim the cache later. 1038 */ 1039 if (! cache_shadows && shadow != VM_OBJECT_NULL) { 1040 object = shadow; 1041 retry_cache_trim = TRUE; 1042 continue; 1043 } 1044#endif /* MIGHT_NOT_CACHE_SHADOWS */ 1045 1046 /* 1047 * Trim the cache. If the cache trim 1048 * returns with a shadow for us to deallocate, 1049 * then remember to retry the cache trim 1050 * when we are done deallocating the shadow. 1051 * Otherwise, we are done. 1052 */ 1053 1054 object = vm_object_cache_trim(TRUE); 1055 if (object == VM_OBJECT_NULL) { 1056 return; 1057 } 1058 retry_cache_trim = TRUE; 1059 } else 1060#endif /* VM_OBJECT_CACHE */ 1061 { 1062 /* 1063 * This object is not cachable; terminate it. 1064 */ 1065 XPR(XPR_VM_OBJECT, 1066 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n", 1067 object, object->resident_page_count, 1068 object->paging_in_progress, 1069 (void *)current_thread(),object->ref_count); 1070 1071 VM_OBJ_RES_DECR(object); /* XXX ? */ 1072 /* 1073 * Terminate this object. If it had a shadow, 1074 * then deallocate it; otherwise, if we need 1075 * to retry a cache trim, do so now; otherwise, 1076 * we are done. "pageout" objects have a shadow, 1077 * but maintain a "paging reference" rather than 1078 * a normal reference. 1079 */ 1080 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 1081 1082 if (vm_object_terminate(object) != KERN_SUCCESS) { 1083 return; 1084 } 1085 if (shadow != VM_OBJECT_NULL) { 1086 object = shadow; 1087 continue; 1088 } 1089#if VM_OBJECT_CACHE 1090 if (retry_cache_trim && 1091 ((object = vm_object_cache_trim(TRUE)) != 1092 VM_OBJECT_NULL)) { 1093 continue; 1094 } 1095#endif 1096 return; 1097 } 1098 } 1099#if VM_OBJECT_CACHE 1100 assert(! retry_cache_trim); 1101#endif 1102} 1103 1104 1105 1106vm_page_t 1107vm_object_page_grab( 1108 vm_object_t object) 1109{ 1110 vm_page_t p, next_p; 1111 int p_limit = 0; 1112 int p_skipped = 0; 1113 1114 vm_object_lock_assert_exclusive(object); 1115 1116 next_p = (vm_page_t)queue_first(&object->memq); 1117 p_limit = MIN(50, object->resident_page_count); 1118 1119 while (!queue_end(&object->memq, (queue_entry_t)next_p) && --p_limit > 0) { 1120 1121 p = next_p; 1122 next_p = (vm_page_t)queue_next(&next_p->listq); 1123 1124 if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->laundry || p->fictitious) 1125 goto move_page_in_obj; 1126 1127 if (p->pmapped || p->dirty || p->precious) { 1128 vm_page_lockspin_queues(); 1129 1130 if (p->pmapped) { 1131 int refmod_state; 1132 1133 vm_object_page_grab_pmapped++; 1134 1135 if (p->reference == FALSE || p->dirty == FALSE) { 1136 1137 refmod_state = pmap_get_refmod(p->phys_page); 1138 1139 if (refmod_state & VM_MEM_REFERENCED) 1140 p->reference = TRUE; 1141 if (refmod_state & VM_MEM_MODIFIED) { 1142 SET_PAGE_DIRTY(p, FALSE); 1143 } 1144 } 1145 if (p->dirty == FALSE && p->precious == FALSE) { 1146 1147 refmod_state = pmap_disconnect(p->phys_page); 1148 1149 if (refmod_state & VM_MEM_REFERENCED) 1150 p->reference = TRUE; 1151 if (refmod_state & VM_MEM_MODIFIED) { 1152 SET_PAGE_DIRTY(p, FALSE); 1153 } 1154 1155 if (p->dirty == FALSE) 1156 goto take_page; 1157 } 1158 } 1159 if (p->inactive && p->reference == TRUE) { 1160 vm_page_activate(p); 1161 1162 VM_STAT_INCR(reactivations); 1163 vm_object_page_grab_reactivations++; 1164 } 1165 vm_page_unlock_queues(); 1166move_page_in_obj: 1167 queue_remove(&object->memq, p, vm_page_t, listq); 1168 queue_enter(&object->memq, p, vm_page_t, listq); 1169 1170 p_skipped++; 1171 continue; 1172 } 1173 vm_page_lockspin_queues(); 1174take_page: 1175 vm_page_free_prepare_queues(p); 1176 vm_object_page_grab_returned++; 1177 vm_object_page_grab_skipped += p_skipped; 1178 1179 vm_page_unlock_queues(); 1180 1181 vm_page_free_prepare_object(p, TRUE); 1182 1183 return (p); 1184 } 1185 vm_object_page_grab_skipped += p_skipped; 1186 vm_object_page_grab_failed++; 1187 1188 return (NULL); 1189} 1190 1191 1192 1193#define EVICT_PREPARE_LIMIT 64 1194#define EVICT_AGE 10 1195 1196static clock_sec_t vm_object_cache_aging_ts = 0; 1197 1198static void 1199vm_object_cache_remove_locked( 1200 vm_object_t object) 1201{ 1202 queue_remove(&vm_object_cached_list, object, vm_object_t, objq); 1203 object->objq.next = NULL; 1204 object->objq.prev = NULL; 1205 1206 vm_object_cached_count--; 1207} 1208 1209void 1210vm_object_cache_remove( 1211 vm_object_t object) 1212{ 1213 vm_object_cache_lock_spin(); 1214 1215 if (object->objq.next || object->objq.prev) 1216 vm_object_cache_remove_locked(object); 1217 1218 vm_object_cache_unlock(); 1219} 1220 1221void 1222vm_object_cache_add( 1223 vm_object_t object) 1224{ 1225 clock_sec_t sec; 1226 clock_nsec_t nsec; 1227 1228 if (object->resident_page_count == 0) 1229 return; 1230 clock_get_system_nanotime(&sec, &nsec); 1231 1232 vm_object_cache_lock_spin(); 1233 1234 if (object->objq.next == NULL && object->objq.prev == NULL) { 1235 queue_enter(&vm_object_cached_list, object, vm_object_t, objq); 1236 object->vo_cache_ts = sec + EVICT_AGE; 1237 object->vo_cache_pages_to_scan = object->resident_page_count; 1238 1239 vm_object_cached_count++; 1240 vm_object_cache_adds++; 1241 } 1242 vm_object_cache_unlock(); 1243} 1244 1245int 1246vm_object_cache_evict( 1247 int num_to_evict, 1248 int max_objects_to_examine) 1249{ 1250 vm_object_t object = VM_OBJECT_NULL; 1251 vm_object_t next_obj = VM_OBJECT_NULL; 1252 vm_page_t local_free_q = VM_PAGE_NULL; 1253 vm_page_t p; 1254 vm_page_t next_p; 1255 int object_cnt = 0; 1256 vm_page_t ep_array[EVICT_PREPARE_LIMIT]; 1257 int ep_count; 1258 int ep_limit; 1259 int ep_index; 1260 int ep_freed = 0; 1261 int ep_moved = 0; 1262 uint32_t ep_skipped = 0; 1263 clock_sec_t sec; 1264 clock_nsec_t nsec; 1265 1266 KERNEL_DEBUG(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0); 1267 /* 1268 * do a couple of quick checks to see if it's 1269 * worthwhile grabbing the lock 1270 */ 1271 if (queue_empty(&vm_object_cached_list)) { 1272 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, 0, 0, 0, 0, 0); 1273 return (0); 1274 } 1275 clock_get_system_nanotime(&sec, &nsec); 1276 1277 /* 1278 * the object on the head of the queue has not 1279 * yet sufficiently aged 1280 */ 1281 if (sec < vm_object_cache_aging_ts) { 1282 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, 0, 0, 0, 0, 0); 1283 return (0); 1284 } 1285 /* 1286 * don't need the queue lock to find 1287 * and lock an object on the cached list 1288 */ 1289 vm_page_unlock_queues(); 1290 1291 vm_object_cache_lock_spin(); 1292 1293 for (;;) { 1294 next_obj = (vm_object_t)queue_first(&vm_object_cached_list); 1295 1296 while (!queue_end(&vm_object_cached_list, (queue_entry_t)next_obj) && object_cnt++ < max_objects_to_examine) { 1297 1298 object = next_obj; 1299 next_obj = (vm_object_t)queue_next(&next_obj->objq); 1300 1301 if (sec < object->vo_cache_ts) { 1302 KERNEL_DEBUG(0x130020c, object, object->resident_page_count, object->vo_cache_ts, sec, 0); 1303 1304 vm_object_cache_aging_ts = object->vo_cache_ts; 1305 object = VM_OBJECT_NULL; 1306 break; 1307 } 1308 if (!vm_object_lock_try_scan(object)) { 1309 /* 1310 * just skip over this guy for now... if we find 1311 * an object to steal pages from, we'll revist in a bit... 1312 * hopefully, the lock will have cleared 1313 */ 1314 KERNEL_DEBUG(0x13001f8, object, object->resident_page_count, 0, 0, 0); 1315 1316 object = VM_OBJECT_NULL; 1317 continue; 1318 } 1319 if (queue_empty(&object->memq) || object->vo_cache_pages_to_scan == 0) { 1320 /* 1321 * this case really shouldn't happen, but it's not fatal 1322 * so deal with it... if we don't remove the object from 1323 * the list, we'll never move past it. 1324 */ 1325 KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0); 1326 1327 vm_object_cache_remove_locked(object); 1328 vm_object_unlock(object); 1329 object = VM_OBJECT_NULL; 1330 continue; 1331 } 1332 /* 1333 * we have a locked object with pages... 1334 * time to start harvesting 1335 */ 1336 break; 1337 } 1338 vm_object_cache_unlock(); 1339 1340 if (object == VM_OBJECT_NULL) 1341 break; 1342 1343 /* 1344 * object is locked at this point and 1345 * has resident pages 1346 */ 1347 next_p = (vm_page_t)queue_first(&object->memq); 1348 1349 /* 1350 * break the page scan into 2 pieces to minimize the time spent 1351 * behind the page queue lock... 1352 * the list of pages on these unused objects is likely to be cold 1353 * w/r to the cpu cache which increases the time to scan the list 1354 * tenfold... and we may have a 'run' of pages we can't utilize that 1355 * needs to be skipped over... 1356 */ 1357 if ((ep_limit = num_to_evict - (ep_freed + ep_moved)) > EVICT_PREPARE_LIMIT) 1358 ep_limit = EVICT_PREPARE_LIMIT; 1359 ep_count = 0; 1360 1361 while (!queue_end(&object->memq, (queue_entry_t)next_p) && object->vo_cache_pages_to_scan && ep_count < ep_limit) { 1362 1363 p = next_p; 1364 next_p = (vm_page_t)queue_next(&next_p->listq); 1365 1366 object->vo_cache_pages_to_scan--; 1367 1368 if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->laundry) { 1369 queue_remove(&object->memq, p, vm_page_t, listq); 1370 queue_enter(&object->memq, p, vm_page_t, listq); 1371 1372 ep_skipped++; 1373 continue; 1374 } 1375 if (p->wpmapped || p->dirty || p->precious) { 1376 queue_remove(&object->memq, p, vm_page_t, listq); 1377 queue_enter(&object->memq, p, vm_page_t, listq); 1378 1379 pmap_clear_reference(p->phys_page); 1380 } 1381 ep_array[ep_count++] = p; 1382 } 1383 KERNEL_DEBUG(0x13001f4 | DBG_FUNC_START, object, object->resident_page_count, ep_freed, ep_moved, 0); 1384 1385 vm_page_lockspin_queues(); 1386 1387 for (ep_index = 0; ep_index < ep_count; ep_index++) { 1388 1389 p = ep_array[ep_index]; 1390 1391 if (p->wpmapped || p->dirty || p->precious) { 1392 p->reference = FALSE; 1393 p->no_cache = FALSE; 1394 1395 /* 1396 * we've already filtered out pages that are in the laundry 1397 * so if we get here, this page can't be on the pageout queue 1398 */ 1399 assert(!p->pageout_queue); 1400 1401 VM_PAGE_QUEUES_REMOVE(p); 1402 VM_PAGE_ENQUEUE_INACTIVE(p, TRUE); 1403 1404 ep_moved++; 1405 } else { 1406 vm_page_free_prepare_queues(p); 1407 1408 assert(p->pageq.next == NULL && p->pageq.prev == NULL); 1409 /* 1410 * Add this page to our list of reclaimed pages, 1411 * to be freed later. 1412 */ 1413 p->pageq.next = (queue_entry_t) local_free_q; 1414 local_free_q = p; 1415 1416 ep_freed++; 1417 } 1418 } 1419 vm_page_unlock_queues(); 1420 1421 KERNEL_DEBUG(0x13001f4 | DBG_FUNC_END, object, object->resident_page_count, ep_freed, ep_moved, 0); 1422 1423 if (local_free_q) { 1424 vm_page_free_list(local_free_q, TRUE); 1425 local_free_q = VM_PAGE_NULL; 1426 } 1427 if (object->vo_cache_pages_to_scan == 0) { 1428 KERNEL_DEBUG(0x1300208, object, object->resident_page_count, ep_freed, ep_moved, 0); 1429 1430 vm_object_cache_remove(object); 1431 1432 KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0); 1433 } 1434 /* 1435 * done with this object 1436 */ 1437 vm_object_unlock(object); 1438 object = VM_OBJECT_NULL; 1439 1440 /* 1441 * at this point, we are not holding any locks 1442 */ 1443 if ((ep_freed + ep_moved) >= num_to_evict) { 1444 /* 1445 * we've reached our target for the 1446 * number of pages to evict 1447 */ 1448 break; 1449 } 1450 vm_object_cache_lock_spin(); 1451 } 1452 /* 1453 * put the page queues lock back to the caller's 1454 * idea of it 1455 */ 1456 vm_page_lock_queues(); 1457 1458 vm_object_cache_pages_freed += ep_freed; 1459 vm_object_cache_pages_moved += ep_moved; 1460 vm_object_cache_pages_skipped += ep_skipped; 1461 1462 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, ep_freed, 0, 0, 0, 0); 1463 return (ep_freed); 1464} 1465 1466 1467#if VM_OBJECT_CACHE 1468/* 1469 * Check to see whether we really need to trim 1470 * down the cache. If so, remove an object from 1471 * the cache, terminate it, and repeat. 1472 * 1473 * Called with, and returns with, cache lock unlocked. 1474 */ 1475vm_object_t 1476vm_object_cache_trim( 1477 boolean_t called_from_vm_object_deallocate) 1478{ 1479 register vm_object_t object = VM_OBJECT_NULL; 1480 vm_object_t shadow; 1481 1482 for (;;) { 1483 1484 /* 1485 * If we no longer need to trim the cache, 1486 * then we are done. 1487 */ 1488 if (vm_object_cached_count <= vm_object_cached_max) 1489 return VM_OBJECT_NULL; 1490 1491 vm_object_cache_lock(); 1492 if (vm_object_cached_count <= vm_object_cached_max) { 1493 vm_object_cache_unlock(); 1494 return VM_OBJECT_NULL; 1495 } 1496 1497 /* 1498 * We must trim down the cache, so remove 1499 * the first object in the cache. 1500 */ 1501 XPR(XPR_VM_OBJECT, 1502 "vm_object_cache_trim: removing from front of cache (%x, %x)\n", 1503 vm_object_cached_list.next, 1504 vm_object_cached_list.prev, 0, 0, 0); 1505 1506 object = (vm_object_t) queue_first(&vm_object_cached_list); 1507 if(object == (vm_object_t) &vm_object_cached_list) { 1508 /* something's wrong with the calling parameter or */ 1509 /* the value of vm_object_cached_count, just fix */ 1510 /* and return */ 1511 if(vm_object_cached_max < 0) 1512 vm_object_cached_max = 0; 1513 vm_object_cached_count = 0; 1514 vm_object_cache_unlock(); 1515 return VM_OBJECT_NULL; 1516 } 1517 vm_object_lock(object); 1518 queue_remove(&vm_object_cached_list, object, vm_object_t, 1519 cached_list); 1520 vm_object_cached_count--; 1521 1522 vm_object_cache_unlock(); 1523 /* 1524 * Since this object is in the cache, we know 1525 * that it is initialized and has no references. 1526 * Take a reference to avoid recursive deallocations. 1527 */ 1528 1529 assert(object->pager_initialized); 1530 assert(object->ref_count == 0); 1531 vm_object_lock_assert_exclusive(object); 1532 object->ref_count++; 1533 1534 /* 1535 * Terminate the object. 1536 * If the object had a shadow, we let vm_object_deallocate 1537 * deallocate it. "pageout" objects have a shadow, but 1538 * maintain a "paging reference" rather than a normal 1539 * reference. 1540 * (We are careful here to limit recursion.) 1541 */ 1542 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 1543 1544 if(vm_object_terminate(object) != KERN_SUCCESS) 1545 continue; 1546 1547 if (shadow != VM_OBJECT_NULL) { 1548 if (called_from_vm_object_deallocate) { 1549 return shadow; 1550 } else { 1551 vm_object_deallocate(shadow); 1552 } 1553 } 1554 } 1555} 1556#endif 1557 1558 1559/* 1560 * Routine: vm_object_terminate 1561 * Purpose: 1562 * Free all resources associated with a vm_object. 1563 * In/out conditions: 1564 * Upon entry, the object must be locked, 1565 * and the object must have exactly one reference. 1566 * 1567 * The shadow object reference is left alone. 1568 * 1569 * The object must be unlocked if its found that pages 1570 * must be flushed to a backing object. If someone 1571 * manages to map the object while it is being flushed 1572 * the object is returned unlocked and unchanged. Otherwise, 1573 * upon exit, the cache will be unlocked, and the 1574 * object will cease to exist. 1575 */ 1576static kern_return_t 1577vm_object_terminate( 1578 vm_object_t object) 1579{ 1580 vm_object_t shadow_object; 1581 1582 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n", 1583 object, object->ref_count, 0, 0, 0); 1584 1585 if (!object->pageout && (!object->temporary || object->can_persist) && 1586 (object->pager != NULL || object->shadow_severed)) { 1587 /* 1588 * Clear pager_trusted bit so that the pages get yanked 1589 * out of the object instead of cleaned in place. This 1590 * prevents a deadlock in XMM and makes more sense anyway. 1591 */ 1592 object->pager_trusted = FALSE; 1593 1594 vm_object_reap_pages(object, REAP_TERMINATE); 1595 } 1596 /* 1597 * Make sure the object isn't already being terminated 1598 */ 1599 if (object->terminating) { 1600 vm_object_lock_assert_exclusive(object); 1601 object->ref_count--; 1602 assert(object->ref_count > 0); 1603 vm_object_unlock(object); 1604 return KERN_FAILURE; 1605 } 1606 1607 /* 1608 * Did somebody get a reference to the object while we were 1609 * cleaning it? 1610 */ 1611 if (object->ref_count != 1) { 1612 vm_object_lock_assert_exclusive(object); 1613 object->ref_count--; 1614 assert(object->ref_count > 0); 1615 vm_object_res_deallocate(object); 1616 vm_object_unlock(object); 1617 return KERN_FAILURE; 1618 } 1619 1620 /* 1621 * Make sure no one can look us up now. 1622 */ 1623 1624 object->terminating = TRUE; 1625 object->alive = FALSE; 1626 1627 if ( !object->internal && (object->objq.next || object->objq.prev)) 1628 vm_object_cache_remove(object); 1629 1630 if (object->hashed) { 1631 lck_mtx_t *lck; 1632 1633 lck = vm_object_hash_lock_spin(object->pager); 1634 vm_object_remove(object); 1635 vm_object_hash_unlock(lck); 1636 } 1637 /* 1638 * Detach the object from its shadow if we are the shadow's 1639 * copy. The reference we hold on the shadow must be dropped 1640 * by our caller. 1641 */ 1642 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) && 1643 !(object->pageout)) { 1644 vm_object_lock(shadow_object); 1645 if (shadow_object->copy == object) 1646 shadow_object->copy = VM_OBJECT_NULL; 1647 vm_object_unlock(shadow_object); 1648 } 1649 1650 if (object->paging_in_progress != 0 || 1651 object->activity_in_progress != 0) { 1652 /* 1653 * There are still some paging_in_progress references 1654 * on this object, meaning that there are some paging 1655 * or other I/O operations in progress for this VM object. 1656 * Such operations take some paging_in_progress references 1657 * up front to ensure that the object doesn't go away, but 1658 * they may also need to acquire a reference on the VM object, 1659 * to map it in kernel space, for example. That means that 1660 * they may end up releasing the last reference on the VM 1661 * object, triggering its termination, while still holding 1662 * paging_in_progress references. Waiting for these 1663 * pending paging_in_progress references to go away here would 1664 * deadlock. 1665 * 1666 * To avoid deadlocking, we'll let the vm_object_reaper_thread 1667 * complete the VM object termination if it still holds 1668 * paging_in_progress references at this point. 1669 * 1670 * No new paging_in_progress should appear now that the 1671 * VM object is "terminating" and not "alive". 1672 */ 1673 vm_object_reap_async(object); 1674 vm_object_unlock(object); 1675 /* 1676 * Return KERN_FAILURE to let the caller know that we 1677 * haven't completed the termination and it can't drop this 1678 * object's reference on its shadow object yet. 1679 * The reaper thread will take care of that once it has 1680 * completed this object's termination. 1681 */ 1682 return KERN_FAILURE; 1683 } 1684 /* 1685 * complete the VM object termination 1686 */ 1687 vm_object_reap(object); 1688 object = VM_OBJECT_NULL; 1689 1690 /* 1691 * the object lock was released by vm_object_reap() 1692 * 1693 * KERN_SUCCESS means that this object has been terminated 1694 * and no longer needs its shadow object but still holds a 1695 * reference on it. 1696 * The caller is responsible for dropping that reference. 1697 * We can't call vm_object_deallocate() here because that 1698 * would create a recursion. 1699 */ 1700 return KERN_SUCCESS; 1701} 1702 1703 1704/* 1705 * vm_object_reap(): 1706 * 1707 * Complete the termination of a VM object after it's been marked 1708 * as "terminating" and "!alive" by vm_object_terminate(). 1709 * 1710 * The VM object must be locked by caller. 1711 * The lock will be released on return and the VM object is no longer valid. 1712 */ 1713void 1714vm_object_reap( 1715 vm_object_t object) 1716{ 1717 memory_object_t pager; 1718 1719 vm_object_lock_assert_exclusive(object); 1720 assert(object->paging_in_progress == 0); 1721 assert(object->activity_in_progress == 0); 1722 1723 vm_object_reap_count++; 1724 1725 pager = object->pager; 1726 object->pager = MEMORY_OBJECT_NULL; 1727 1728 if (pager != MEMORY_OBJECT_NULL) 1729 memory_object_control_disable(object->pager_control); 1730 1731 object->ref_count--; 1732#if TASK_SWAPPER 1733 assert(object->res_count == 0); 1734#endif /* TASK_SWAPPER */ 1735 1736 assert (object->ref_count == 0); 1737 1738 /* 1739 * remove from purgeable queue if it's on 1740 */ 1741 if (object->internal && (object->objq.next || object->objq.prev)) { 1742 purgeable_q_t queue = vm_purgeable_object_remove(object); 1743 assert(queue); 1744 1745 if (object->purgeable_when_ripe) { 1746 /* 1747 * Must take page lock for this - 1748 * using it to protect token queue 1749 */ 1750 vm_page_lock_queues(); 1751 vm_purgeable_token_delete_first(queue); 1752 1753 assert(queue->debug_count_objects>=0); 1754 vm_page_unlock_queues(); 1755 } 1756 } 1757 1758 /* 1759 * Clean or free the pages, as appropriate. 1760 * It is possible for us to find busy/absent pages, 1761 * if some faults on this object were aborted. 1762 */ 1763 if (object->pageout) { 1764 assert(object->shadow != VM_OBJECT_NULL); 1765 1766 vm_pageout_object_terminate(object); 1767 1768 } else if (((object->temporary && !object->can_persist) || (pager == MEMORY_OBJECT_NULL))) { 1769 1770 vm_object_reap_pages(object, REAP_REAP); 1771 } 1772 assert(queue_empty(&object->memq)); 1773 assert(object->paging_in_progress == 0); 1774 assert(object->activity_in_progress == 0); 1775 assert(object->ref_count == 0); 1776 1777 /* 1778 * If the pager has not already been released by 1779 * vm_object_destroy, we need to terminate it and 1780 * release our reference to it here. 1781 */ 1782 if (pager != MEMORY_OBJECT_NULL) { 1783 vm_object_unlock(object); 1784 vm_object_release_pager(pager, object->hashed); 1785 vm_object_lock(object); 1786 } 1787 1788 /* kick off anyone waiting on terminating */ 1789 object->terminating = FALSE; 1790 vm_object_paging_begin(object); 1791 vm_object_paging_end(object); 1792 vm_object_unlock(object); 1793 1794#if MACH_PAGEMAP 1795 vm_external_destroy(object->existence_map, object->vo_size); 1796#endif /* MACH_PAGEMAP */ 1797 1798 object->shadow = VM_OBJECT_NULL; 1799 1800 vm_object_lock_destroy(object); 1801 /* 1802 * Free the space for the object. 1803 */ 1804 zfree(vm_object_zone, object); 1805 object = VM_OBJECT_NULL; 1806} 1807 1808 1809unsigned int vm_max_batch = 256; 1810 1811#define V_O_R_MAX_BATCH 128 1812 1813#define BATCH_LIMIT(max) (vm_max_batch >= max ? max : vm_max_batch) 1814 1815 1816#define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect) \ 1817 MACRO_BEGIN \ 1818 if (_local_free_q) { \ 1819 if (do_disconnect) { \ 1820 vm_page_t m; \ 1821 for (m = _local_free_q; \ 1822 m != VM_PAGE_NULL; \ 1823 m = (vm_page_t) m->pageq.next) { \ 1824 if (m->pmapped) { \ 1825 pmap_disconnect(m->phys_page); \ 1826 } \ 1827 } \ 1828 } \ 1829 vm_page_free_list(_local_free_q, TRUE); \ 1830 _local_free_q = VM_PAGE_NULL; \ 1831 } \ 1832 MACRO_END 1833 1834 1835void 1836vm_object_reap_pages( 1837 vm_object_t object, 1838 int reap_type) 1839{ 1840 vm_page_t p; 1841 vm_page_t next; 1842 vm_page_t local_free_q = VM_PAGE_NULL; 1843 int loop_count; 1844 boolean_t disconnect_on_release; 1845 pmap_flush_context pmap_flush_context_storage; 1846 1847 if (reap_type == REAP_DATA_FLUSH) { 1848 /* 1849 * We need to disconnect pages from all pmaps before 1850 * releasing them to the free list 1851 */ 1852 disconnect_on_release = TRUE; 1853 } else { 1854 /* 1855 * Either the caller has already disconnected the pages 1856 * from all pmaps, or we disconnect them here as we add 1857 * them to out local list of pages to be released. 1858 * No need to re-disconnect them when we release the pages 1859 * to the free list. 1860 */ 1861 disconnect_on_release = FALSE; 1862 } 1863 1864restart_after_sleep: 1865 if (queue_empty(&object->memq)) 1866 return; 1867 loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH); 1868 1869 if (reap_type == REAP_PURGEABLE) 1870 pmap_flush_context_init(&pmap_flush_context_storage); 1871 1872 vm_page_lockspin_queues(); 1873 1874 next = (vm_page_t)queue_first(&object->memq); 1875 1876 while (!queue_end(&object->memq, (queue_entry_t)next)) { 1877 1878 p = next; 1879 next = (vm_page_t)queue_next(&next->listq); 1880 1881 if (--loop_count == 0) { 1882 1883 vm_page_unlock_queues(); 1884 1885 if (local_free_q) { 1886 1887 if (reap_type == REAP_PURGEABLE) { 1888 pmap_flush(&pmap_flush_context_storage); 1889 pmap_flush_context_init(&pmap_flush_context_storage); 1890 } 1891 /* 1892 * Free the pages we reclaimed so far 1893 * and take a little break to avoid 1894 * hogging the page queue lock too long 1895 */ 1896 VM_OBJ_REAP_FREELIST(local_free_q, 1897 disconnect_on_release); 1898 } else 1899 mutex_pause(0); 1900 1901 loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH); 1902 1903 vm_page_lockspin_queues(); 1904 } 1905 if (reap_type == REAP_DATA_FLUSH || reap_type == REAP_TERMINATE) { 1906 1907 if (p->busy || p->cleaning) { 1908 1909 vm_page_unlock_queues(); 1910 /* 1911 * free the pages reclaimed so far 1912 */ 1913 VM_OBJ_REAP_FREELIST(local_free_q, 1914 disconnect_on_release); 1915 1916 PAGE_SLEEP(object, p, THREAD_UNINT); 1917 1918 goto restart_after_sleep; 1919 } 1920 if (p->laundry) { 1921 p->pageout = FALSE; 1922 1923 vm_pageout_steal_laundry(p, TRUE); 1924 } 1925 } 1926 switch (reap_type) { 1927 1928 case REAP_DATA_FLUSH: 1929 if (VM_PAGE_WIRED(p)) { 1930 /* 1931 * this is an odd case... perhaps we should 1932 * zero-fill this page since we're conceptually 1933 * tossing its data at this point, but leaving 1934 * it on the object to honor the 'wire' contract 1935 */ 1936 continue; 1937 } 1938 break; 1939 1940 case REAP_PURGEABLE: 1941 if (VM_PAGE_WIRED(p)) { 1942 /* 1943 * can't purge a wired page 1944 */ 1945 vm_page_purged_wired++; 1946 continue; 1947 } 1948 if (p->laundry && !p->busy && !p->cleaning) { 1949 p->pageout = FALSE; 1950 1951 vm_pageout_steal_laundry(p, TRUE); 1952 } 1953 if (p->cleaning || p->laundry) { 1954 /* 1955 * page is being acted upon, 1956 * so don't mess with it 1957 */ 1958 vm_page_purged_others++; 1959 continue; 1960 } 1961 if (p->busy) { 1962 /* 1963 * We can't reclaim a busy page but we can 1964 * make it more likely to be paged (it's not wired) to make 1965 * sure that it gets considered by 1966 * vm_pageout_scan() later. 1967 */ 1968 vm_page_deactivate(p); 1969 vm_page_purged_busy++; 1970 continue; 1971 } 1972 1973 assert(p->object != kernel_object); 1974 1975 /* 1976 * we can discard this page... 1977 */ 1978 if (p->pmapped == TRUE) { 1979 /* 1980 * unmap the page 1981 */ 1982 pmap_disconnect_options(p->phys_page, PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_NOREFMOD, (void *)&pmap_flush_context_storage); 1983 } 1984 vm_page_purged_count++; 1985 1986 break; 1987 1988 case REAP_TERMINATE: 1989 if (p->absent || p->private) { 1990 /* 1991 * For private pages, VM_PAGE_FREE just 1992 * leaves the page structure around for 1993 * its owner to clean up. For absent 1994 * pages, the structure is returned to 1995 * the appropriate pool. 1996 */ 1997 break; 1998 } 1999 if (p->fictitious) { 2000 assert (p->phys_page == vm_page_guard_addr); 2001 break; 2002 } 2003 if (!p->dirty && p->wpmapped) 2004 p->dirty = pmap_is_modified(p->phys_page); 2005 2006 if ((p->dirty || p->precious) && !p->error && object->alive) { 2007 2008 if (!p->laundry) { 2009 VM_PAGE_QUEUES_REMOVE(p); 2010 /* 2011 * flush page... page will be freed 2012 * upon completion of I/O 2013 */ 2014 vm_pageout_cluster(p, TRUE); 2015 } 2016 vm_page_unlock_queues(); 2017 /* 2018 * free the pages reclaimed so far 2019 */ 2020 VM_OBJ_REAP_FREELIST(local_free_q, 2021 disconnect_on_release); 2022 2023 vm_object_paging_wait(object, THREAD_UNINT); 2024 2025 goto restart_after_sleep; 2026 } 2027 break; 2028 2029 case REAP_REAP: 2030 break; 2031 } 2032 vm_page_free_prepare_queues(p); 2033 assert(p->pageq.next == NULL && p->pageq.prev == NULL); 2034 /* 2035 * Add this page to our list of reclaimed pages, 2036 * to be freed later. 2037 */ 2038 p->pageq.next = (queue_entry_t) local_free_q; 2039 local_free_q = p; 2040 } 2041 vm_page_unlock_queues(); 2042 2043 /* 2044 * Free the remaining reclaimed pages 2045 */ 2046 if (reap_type == REAP_PURGEABLE) 2047 pmap_flush(&pmap_flush_context_storage); 2048 2049 VM_OBJ_REAP_FREELIST(local_free_q, 2050 disconnect_on_release); 2051} 2052 2053 2054void 2055vm_object_reap_async( 2056 vm_object_t object) 2057{ 2058 vm_object_lock_assert_exclusive(object); 2059 2060 vm_object_reaper_lock_spin(); 2061 2062 vm_object_reap_count_async++; 2063 2064 /* enqueue the VM object... */ 2065 queue_enter(&vm_object_reaper_queue, object, 2066 vm_object_t, cached_list); 2067 2068 vm_object_reaper_unlock(); 2069 2070 /* ... and wake up the reaper thread */ 2071 thread_wakeup((event_t) &vm_object_reaper_queue); 2072} 2073 2074 2075void 2076vm_object_reaper_thread(void) 2077{ 2078 vm_object_t object, shadow_object; 2079 2080 vm_object_reaper_lock_spin(); 2081 2082 while (!queue_empty(&vm_object_reaper_queue)) { 2083 queue_remove_first(&vm_object_reaper_queue, 2084 object, 2085 vm_object_t, 2086 cached_list); 2087 2088 vm_object_reaper_unlock(); 2089 vm_object_lock(object); 2090 2091 assert(object->terminating); 2092 assert(!object->alive); 2093 2094 /* 2095 * The pageout daemon might be playing with our pages. 2096 * Now that the object is dead, it won't touch any more 2097 * pages, but some pages might already be on their way out. 2098 * Hence, we wait until the active paging activities have 2099 * ceased before we break the association with the pager 2100 * itself. 2101 */ 2102 while (object->paging_in_progress != 0 || 2103 object->activity_in_progress != 0) { 2104 vm_object_wait(object, 2105 VM_OBJECT_EVENT_PAGING_IN_PROGRESS, 2106 THREAD_UNINT); 2107 vm_object_lock(object); 2108 } 2109 2110 shadow_object = 2111 object->pageout ? VM_OBJECT_NULL : object->shadow; 2112 2113 vm_object_reap(object); 2114 /* cache is unlocked and object is no longer valid */ 2115 object = VM_OBJECT_NULL; 2116 2117 if (shadow_object != VM_OBJECT_NULL) { 2118 /* 2119 * Drop the reference "object" was holding on 2120 * its shadow object. 2121 */ 2122 vm_object_deallocate(shadow_object); 2123 shadow_object = VM_OBJECT_NULL; 2124 } 2125 vm_object_reaper_lock_spin(); 2126 } 2127 2128 /* wait for more work... */ 2129 assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT); 2130 2131 vm_object_reaper_unlock(); 2132 2133 thread_block((thread_continue_t) vm_object_reaper_thread); 2134 /*NOTREACHED*/ 2135} 2136 2137/* 2138 * Routine: vm_object_pager_wakeup 2139 * Purpose: Wake up anyone waiting for termination of a pager. 2140 */ 2141 2142static void 2143vm_object_pager_wakeup( 2144 memory_object_t pager) 2145{ 2146 vm_object_hash_entry_t entry; 2147 boolean_t waiting = FALSE; 2148 lck_mtx_t *lck; 2149 2150 /* 2151 * If anyone was waiting for the memory_object_terminate 2152 * to be queued, wake them up now. 2153 */ 2154 lck = vm_object_hash_lock_spin(pager); 2155 entry = vm_object_hash_lookup(pager, TRUE); 2156 if (entry != VM_OBJECT_HASH_ENTRY_NULL) 2157 waiting = entry->waiting; 2158 vm_object_hash_unlock(lck); 2159 2160 if (entry != VM_OBJECT_HASH_ENTRY_NULL) { 2161 if (waiting) 2162 thread_wakeup((event_t) pager); 2163 vm_object_hash_entry_free(entry); 2164 } 2165} 2166 2167/* 2168 * Routine: vm_object_release_pager 2169 * Purpose: Terminate the pager and, upon completion, 2170 * release our last reference to it. 2171 * just like memory_object_terminate, except 2172 * that we wake up anyone blocked in vm_object_enter 2173 * waiting for termination message to be queued 2174 * before calling memory_object_init. 2175 */ 2176static void 2177vm_object_release_pager( 2178 memory_object_t pager, 2179 boolean_t hashed) 2180{ 2181 2182 /* 2183 * Terminate the pager. 2184 */ 2185 2186 (void) memory_object_terminate(pager); 2187 2188 if (hashed == TRUE) { 2189 /* 2190 * Wakeup anyone waiting for this terminate 2191 * and remove the entry from the hash 2192 */ 2193 vm_object_pager_wakeup(pager); 2194 } 2195 /* 2196 * Release reference to pager. 2197 */ 2198 memory_object_deallocate(pager); 2199} 2200 2201/* 2202 * Routine: vm_object_destroy 2203 * Purpose: 2204 * Shut down a VM object, despite the 2205 * presence of address map (or other) references 2206 * to the vm_object. 2207 */ 2208kern_return_t 2209vm_object_destroy( 2210 vm_object_t object, 2211 __unused kern_return_t reason) 2212{ 2213 memory_object_t old_pager; 2214 2215 if (object == VM_OBJECT_NULL) 2216 return(KERN_SUCCESS); 2217 2218 /* 2219 * Remove the pager association immediately. 2220 * 2221 * This will prevent the memory manager from further 2222 * meddling. [If it wanted to flush data or make 2223 * other changes, it should have done so before performing 2224 * the destroy call.] 2225 */ 2226 2227 vm_object_lock(object); 2228 object->can_persist = FALSE; 2229 object->named = FALSE; 2230 object->alive = FALSE; 2231 2232 if (object->hashed) { 2233 lck_mtx_t *lck; 2234 /* 2235 * Rip out the pager from the vm_object now... 2236 */ 2237 lck = vm_object_hash_lock_spin(object->pager); 2238 vm_object_remove(object); 2239 vm_object_hash_unlock(lck); 2240 } 2241 old_pager = object->pager; 2242 object->pager = MEMORY_OBJECT_NULL; 2243 if (old_pager != MEMORY_OBJECT_NULL) 2244 memory_object_control_disable(object->pager_control); 2245 2246 /* 2247 * Wait for the existing paging activity (that got 2248 * through before we nulled out the pager) to subside. 2249 */ 2250 2251 vm_object_paging_wait(object, THREAD_UNINT); 2252 vm_object_unlock(object); 2253 2254 /* 2255 * Terminate the object now. 2256 */ 2257 if (old_pager != MEMORY_OBJECT_NULL) { 2258 vm_object_release_pager(old_pager, object->hashed); 2259 2260 /* 2261 * JMM - Release the caller's reference. This assumes the 2262 * caller had a reference to release, which is a big (but 2263 * currently valid) assumption if this is driven from the 2264 * vnode pager (it is holding a named reference when making 2265 * this call).. 2266 */ 2267 vm_object_deallocate(object); 2268 2269 } 2270 return(KERN_SUCCESS); 2271} 2272 2273 2274#if VM_OBJECT_CACHE 2275 2276#define VM_OBJ_DEACT_ALL_STATS DEBUG 2277#if VM_OBJ_DEACT_ALL_STATS 2278uint32_t vm_object_deactivate_all_pages_batches = 0; 2279uint32_t vm_object_deactivate_all_pages_pages = 0; 2280#endif /* VM_OBJ_DEACT_ALL_STATS */ 2281/* 2282 * vm_object_deactivate_all_pages 2283 * 2284 * Deactivate all pages in the specified object. (Keep its pages 2285 * in memory even though it is no longer referenced.) 2286 * 2287 * The object must be locked. 2288 */ 2289static void 2290vm_object_deactivate_all_pages( 2291 register vm_object_t object) 2292{ 2293 register vm_page_t p; 2294 int loop_count; 2295#if VM_OBJ_DEACT_ALL_STATS 2296 int pages_count; 2297#endif /* VM_OBJ_DEACT_ALL_STATS */ 2298#define V_O_D_A_P_MAX_BATCH 256 2299 2300 loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH); 2301#if VM_OBJ_DEACT_ALL_STATS 2302 pages_count = 0; 2303#endif /* VM_OBJ_DEACT_ALL_STATS */ 2304 vm_page_lock_queues(); 2305 queue_iterate(&object->memq, p, vm_page_t, listq) { 2306 if (--loop_count == 0) { 2307#if VM_OBJ_DEACT_ALL_STATS 2308 hw_atomic_add(&vm_object_deactivate_all_pages_batches, 2309 1); 2310 hw_atomic_add(&vm_object_deactivate_all_pages_pages, 2311 pages_count); 2312 pages_count = 0; 2313#endif /* VM_OBJ_DEACT_ALL_STATS */ 2314 lck_mtx_yield(&vm_page_queue_lock); 2315 loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH); 2316 } 2317 if (!p->busy && !p->throttled) { 2318#if VM_OBJ_DEACT_ALL_STATS 2319 pages_count++; 2320#endif /* VM_OBJ_DEACT_ALL_STATS */ 2321 vm_page_deactivate(p); 2322 } 2323 } 2324#if VM_OBJ_DEACT_ALL_STATS 2325 if (pages_count) { 2326 hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1); 2327 hw_atomic_add(&vm_object_deactivate_all_pages_pages, 2328 pages_count); 2329 pages_count = 0; 2330 } 2331#endif /* VM_OBJ_DEACT_ALL_STATS */ 2332 vm_page_unlock_queues(); 2333} 2334#endif /* VM_OBJECT_CACHE */ 2335 2336 2337 2338/* 2339 * The "chunk" macros are used by routines below when looking for pages to deactivate. These 2340 * exist because of the need to handle shadow chains. When deactivating pages, we only 2341 * want to deactive the ones at the top most level in the object chain. In order to do 2342 * this efficiently, the specified address range is divided up into "chunks" and we use 2343 * a bit map to keep track of which pages have already been processed as we descend down 2344 * the shadow chain. These chunk macros hide the details of the bit map implementation 2345 * as much as we can. 2346 * 2347 * For convenience, we use a 64-bit data type as the bit map, and therefore a chunk is 2348 * set to 64 pages. The bit map is indexed from the low-order end, so that the lowest 2349 * order bit represents page 0 in the current range and highest order bit represents 2350 * page 63. 2351 * 2352 * For further convenience, we also use negative logic for the page state in the bit map. 2353 * The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has 2354 * been processed. This way we can simply test the 64-bit long word to see if it's zero 2355 * to easily tell if the whole range has been processed. Therefore, the bit map starts 2356 * out with all the bits set. The macros below hide all these details from the caller. 2357 */ 2358 2359#define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */ 2360 /* be the same as the number of bits in */ 2361 /* the chunk_state_t type. We use 64 */ 2362 /* just for convenience. */ 2363 2364#define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */ 2365 2366typedef uint64_t chunk_state_t; 2367 2368/* 2369 * The bit map uses negative logic, so we start out with all 64 bits set to indicate 2370 * that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE, 2371 * then we mark pages beyond the len as having been "processed" so that we don't waste time 2372 * looking at pages in that range. This can save us from unnecessarily chasing down the 2373 * shadow chain. 2374 */ 2375 2376#define CHUNK_INIT(c, len) \ 2377 MACRO_BEGIN \ 2378 uint64_t p; \ 2379 \ 2380 (c) = 0xffffffffffffffffLL; \ 2381 \ 2382 for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \ 2383 MARK_PAGE_HANDLED(c, p); \ 2384 MACRO_END 2385 2386 2387/* 2388 * Return true if all pages in the chunk have not yet been processed. 2389 */ 2390 2391#define CHUNK_NOT_COMPLETE(c) ((c) != 0) 2392 2393/* 2394 * Return true if the page at offset 'p' in the bit map has already been handled 2395 * while processing a higher level object in the shadow chain. 2396 */ 2397 2398#define PAGE_ALREADY_HANDLED(c, p) (((c) & (1LL << (p))) == 0) 2399 2400/* 2401 * Mark the page at offset 'p' in the bit map as having been processed. 2402 */ 2403 2404#define MARK_PAGE_HANDLED(c, p) \ 2405MACRO_BEGIN \ 2406 (c) = (c) & ~(1LL << (p)); \ 2407MACRO_END 2408 2409 2410/* 2411 * Return true if the page at the given offset has been paged out. Object is 2412 * locked upon entry and returned locked. 2413 */ 2414 2415static boolean_t 2416page_is_paged_out( 2417 vm_object_t object, 2418 vm_object_offset_t offset) 2419{ 2420 kern_return_t kr; 2421 memory_object_t pager; 2422 2423 /* 2424 * Check the existence map for the page if we have one, otherwise 2425 * ask the pager about this page. 2426 */ 2427 2428#if MACH_PAGEMAP 2429 if (object->existence_map) { 2430 if (vm_external_state_get(object->existence_map, offset) 2431 == VM_EXTERNAL_STATE_EXISTS) { 2432 /* 2433 * We found the page 2434 */ 2435 2436 return TRUE; 2437 } 2438 } else 2439#endif /* MACH_PAGEMAP */ 2440 if (object->internal && 2441 object->alive && 2442 !object->terminating && 2443 object->pager_ready) { 2444 2445 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 2446 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) 2447 == VM_EXTERNAL_STATE_EXISTS) { 2448 return TRUE; 2449 } else { 2450 return FALSE; 2451 } 2452 } 2453 2454 /* 2455 * We're already holding a "paging in progress" reference 2456 * so the object can't disappear when we release the lock. 2457 */ 2458 2459 assert(object->paging_in_progress); 2460 pager = object->pager; 2461 vm_object_unlock(object); 2462 2463 kr = memory_object_data_request( 2464 pager, 2465 offset + object->paging_offset, 2466 0, /* just poke the pager */ 2467 VM_PROT_READ, 2468 NULL); 2469 2470 vm_object_lock(object); 2471 2472 if (kr == KERN_SUCCESS) { 2473 2474 /* 2475 * We found the page 2476 */ 2477 2478 return TRUE; 2479 } 2480 } 2481 2482 return FALSE; 2483} 2484 2485 2486 2487/* 2488 * madvise_free_debug 2489 * 2490 * To help debug madvise(MADV_FREE*) mis-usage, this triggers a 2491 * zero-fill as soon as a page is affected by a madvise(MADV_FREE*), to 2492 * simulate the loss of the page's contents as if the page had been 2493 * reclaimed and then re-faulted. 2494 */ 2495#if DEVELOPMENT || DEBUG 2496int madvise_free_debug = 1; 2497#else /* DEBUG */ 2498int madvise_free_debug = 0; 2499#endif /* DEBUG */ 2500 2501/* 2502 * Deactivate the pages in the specified object and range. If kill_page is set, also discard any 2503 * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify 2504 * a size that is less than or equal to the CHUNK_SIZE. 2505 */ 2506 2507static void 2508deactivate_pages_in_object( 2509 vm_object_t object, 2510 vm_object_offset_t offset, 2511 vm_object_size_t size, 2512 boolean_t kill_page, 2513 boolean_t reusable_page, 2514 boolean_t all_reusable, 2515 chunk_state_t *chunk_state, 2516 pmap_flush_context *pfc) 2517{ 2518 vm_page_t m; 2519 int p; 2520 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 2521 struct vm_page_delayed_work *dwp; 2522 int dw_count; 2523 int dw_limit; 2524 unsigned int reusable = 0; 2525 2526 /* 2527 * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the 2528 * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may 2529 * have pages marked as having been processed already. We stop the loop early if we find we've handled 2530 * all the pages in the chunk. 2531 */ 2532 2533 dwp = &dw_array[0]; 2534 dw_count = 0; 2535 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 2536 2537 for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) { 2538 2539 /* 2540 * If this offset has already been found and handled in a higher level object, then don't 2541 * do anything with it in the current shadow object. 2542 */ 2543 2544 if (PAGE_ALREADY_HANDLED(*chunk_state, p)) 2545 continue; 2546 2547 /* 2548 * See if the page at this offset is around. First check to see if the page is resident, 2549 * then if not, check the existence map or with the pager. 2550 */ 2551 2552 if ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { 2553 2554 /* 2555 * We found a page we were looking for. Mark it as "handled" now in the chunk_state 2556 * so that we won't bother looking for a page at this offset again if there are more 2557 * shadow objects. Then deactivate the page. 2558 */ 2559 2560 MARK_PAGE_HANDLED(*chunk_state, p); 2561 2562 if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy) && (!m->laundry)) { 2563 int clear_refmod; 2564 2565 dwp->dw_mask = 0; 2566 2567 clear_refmod = VM_MEM_REFERENCED; 2568 dwp->dw_mask |= DW_clear_reference; 2569 2570 if ((kill_page) && (object->internal)) { 2571 if (madvise_free_debug) { 2572 /* 2573 * zero-fill the page now 2574 * to simulate it being 2575 * reclaimed and re-faulted. 2576 */ 2577 pmap_zero_page(m->phys_page); 2578 } 2579 m->precious = FALSE; 2580 m->dirty = FALSE; 2581 2582 clear_refmod |= VM_MEM_MODIFIED; 2583 if (m->throttled) { 2584 /* 2585 * This page is now clean and 2586 * reclaimable. Move it out 2587 * of the throttled queue, so 2588 * that vm_pageout_scan() can 2589 * find it. 2590 */ 2591 dwp->dw_mask |= DW_move_page; 2592 } 2593#if MACH_PAGEMAP 2594 vm_external_state_clr(object->existence_map, offset); 2595#endif /* MACH_PAGEMAP */ 2596 VM_COMPRESSOR_PAGER_STATE_CLR(object, 2597 offset); 2598 2599 if (reusable_page && !m->reusable) { 2600 assert(!all_reusable); 2601 assert(!object->all_reusable); 2602 m->reusable = TRUE; 2603 object->reusable_page_count++; 2604 assert(object->resident_page_count >= object->reusable_page_count); 2605 reusable++; 2606 } 2607 } 2608 pmap_clear_refmod_options(m->phys_page, clear_refmod, PMAP_OPTIONS_NOFLUSH, (void *)pfc); 2609 2610 if (!m->throttled && !(reusable_page || all_reusable)) 2611 dwp->dw_mask |= DW_move_page; 2612 2613 if (dwp->dw_mask) 2614 VM_PAGE_ADD_DELAYED_WORK(dwp, m, 2615 dw_count); 2616 2617 if (dw_count >= dw_limit) { 2618 if (reusable) { 2619 OSAddAtomic(reusable, 2620 &vm_page_stats_reusable.reusable_count); 2621 vm_page_stats_reusable.reusable += reusable; 2622 reusable = 0; 2623 } 2624 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 2625 2626 dwp = &dw_array[0]; 2627 dw_count = 0; 2628 } 2629 } 2630 2631 } else { 2632 2633 /* 2634 * The page at this offset isn't memory resident, check to see if it's 2635 * been paged out. If so, mark it as handled so we don't bother looking 2636 * for it in the shadow chain. 2637 */ 2638 2639 if (page_is_paged_out(object, offset)) { 2640 MARK_PAGE_HANDLED(*chunk_state, p); 2641 2642 /* 2643 * If we're killing a non-resident page, then clear the page in the existence 2644 * map so we don't bother paging it back in if it's touched again in the future. 2645 */ 2646 2647 if ((kill_page) && (object->internal)) { 2648#if MACH_PAGEMAP 2649 vm_external_state_clr(object->existence_map, offset); 2650#endif /* MACH_PAGEMAP */ 2651 VM_COMPRESSOR_PAGER_STATE_CLR(object, 2652 offset); 2653 } 2654 } 2655 } 2656 } 2657 2658 if (reusable) { 2659 OSAddAtomic(reusable, &vm_page_stats_reusable.reusable_count); 2660 vm_page_stats_reusable.reusable += reusable; 2661 reusable = 0; 2662 } 2663 2664 if (dw_count) 2665 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 2666} 2667 2668 2669/* 2670 * Deactive a "chunk" of the given range of the object starting at offset. A "chunk" 2671 * will always be less than or equal to the given size. The total range is divided up 2672 * into chunks for efficiency and performance related to the locks and handling the shadow 2673 * chain. This routine returns how much of the given "size" it actually processed. It's 2674 * up to the caler to loop and keep calling this routine until the entire range they want 2675 * to process has been done. 2676 */ 2677 2678static vm_object_size_t 2679deactivate_a_chunk( 2680 vm_object_t orig_object, 2681 vm_object_offset_t offset, 2682 vm_object_size_t size, 2683 boolean_t kill_page, 2684 boolean_t reusable_page, 2685 boolean_t all_reusable, 2686 pmap_flush_context *pfc) 2687{ 2688 vm_object_t object; 2689 vm_object_t tmp_object; 2690 vm_object_size_t length; 2691 chunk_state_t chunk_state; 2692 2693 2694 /* 2695 * Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the 2696 * remaining size the caller asked for. 2697 */ 2698 2699 length = MIN(size, CHUNK_SIZE); 2700 2701 /* 2702 * The chunk_state keeps track of which pages we've already processed if there's 2703 * a shadow chain on this object. At this point, we haven't done anything with this 2704 * range of pages yet, so initialize the state to indicate no pages processed yet. 2705 */ 2706 2707 CHUNK_INIT(chunk_state, length); 2708 object = orig_object; 2709 2710 /* 2711 * Start at the top level object and iterate around the loop once for each object 2712 * in the shadow chain. We stop processing early if we've already found all the pages 2713 * in the range. Otherwise we stop when we run out of shadow objects. 2714 */ 2715 2716 while (object && CHUNK_NOT_COMPLETE(chunk_state)) { 2717 vm_object_paging_begin(object); 2718 2719 deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state, pfc); 2720 2721 vm_object_paging_end(object); 2722 2723 /* 2724 * We've finished with this object, see if there's a shadow object. If 2725 * there is, update the offset and lock the new object. We also turn off 2726 * kill_page at this point since we only kill pages in the top most object. 2727 */ 2728 2729 tmp_object = object->shadow; 2730 2731 if (tmp_object) { 2732 kill_page = FALSE; 2733 reusable_page = FALSE; 2734 all_reusable = FALSE; 2735 offset += object->vo_shadow_offset; 2736 vm_object_lock(tmp_object); 2737 } 2738 2739 if (object != orig_object) 2740 vm_object_unlock(object); 2741 2742 object = tmp_object; 2743 } 2744 2745 if (object && object != orig_object) 2746 vm_object_unlock(object); 2747 2748 return length; 2749} 2750 2751 2752 2753/* 2754 * Move any resident pages in the specified range to the inactive queue. If kill_page is set, 2755 * we also clear the modified status of the page and "forget" any changes that have been made 2756 * to the page. 2757 */ 2758 2759__private_extern__ void 2760vm_object_deactivate_pages( 2761 vm_object_t object, 2762 vm_object_offset_t offset, 2763 vm_object_size_t size, 2764 boolean_t kill_page, 2765 boolean_t reusable_page) 2766{ 2767 vm_object_size_t length; 2768 boolean_t all_reusable; 2769 pmap_flush_context pmap_flush_context_storage; 2770 2771 /* 2772 * We break the range up into chunks and do one chunk at a time. This is for 2773 * efficiency and performance while handling the shadow chains and the locks. 2774 * The deactivate_a_chunk() function returns how much of the range it processed. 2775 * We keep calling this routine until the given size is exhausted. 2776 */ 2777 2778 2779 all_reusable = FALSE; 2780 if (reusable_page && 2781 object->internal && 2782 object->vo_size != 0 && 2783 object->vo_size == size && 2784 object->reusable_page_count == 0) { 2785 all_reusable = TRUE; 2786 reusable_page = FALSE; 2787 } 2788 2789 if ((reusable_page || all_reusable) && object->all_reusable) { 2790 /* This means MADV_FREE_REUSABLE has been called twice, which 2791 * is probably illegal. */ 2792 return; 2793 } 2794 2795 pmap_flush_context_init(&pmap_flush_context_storage); 2796 2797 while (size) { 2798 length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable, &pmap_flush_context_storage); 2799 2800 size -= length; 2801 offset += length; 2802 } 2803 pmap_flush(&pmap_flush_context_storage); 2804 2805 if (all_reusable) { 2806 if (!object->all_reusable) { 2807 unsigned int reusable; 2808 2809 object->all_reusable = TRUE; 2810 assert(object->reusable_page_count == 0); 2811 /* update global stats */ 2812 reusable = object->resident_page_count; 2813 OSAddAtomic(reusable, 2814 &vm_page_stats_reusable.reusable_count); 2815 vm_page_stats_reusable.reusable += reusable; 2816 vm_page_stats_reusable.all_reusable_calls++; 2817 } 2818 } else if (reusable_page) { 2819 vm_page_stats_reusable.partial_reusable_calls++; 2820 } 2821} 2822 2823void 2824vm_object_reuse_pages( 2825 vm_object_t object, 2826 vm_object_offset_t start_offset, 2827 vm_object_offset_t end_offset, 2828 boolean_t allow_partial_reuse) 2829{ 2830 vm_object_offset_t cur_offset; 2831 vm_page_t m; 2832 unsigned int reused, reusable; 2833 2834#define VM_OBJECT_REUSE_PAGE(object, m, reused) \ 2835 MACRO_BEGIN \ 2836 if ((m) != VM_PAGE_NULL && \ 2837 (m)->reusable) { \ 2838 assert((object)->reusable_page_count <= \ 2839 (object)->resident_page_count); \ 2840 assert((object)->reusable_page_count > 0); \ 2841 (object)->reusable_page_count--; \ 2842 (m)->reusable = FALSE; \ 2843 (reused)++; \ 2844 } \ 2845 MACRO_END 2846 2847 reused = 0; 2848 reusable = 0; 2849 2850 vm_object_lock_assert_exclusive(object); 2851 2852 if (object->all_reusable) { 2853 assert(object->reusable_page_count == 0); 2854 object->all_reusable = FALSE; 2855 if (end_offset - start_offset == object->vo_size || 2856 !allow_partial_reuse) { 2857 vm_page_stats_reusable.all_reuse_calls++; 2858 reused = object->resident_page_count; 2859 } else { 2860 vm_page_stats_reusable.partial_reuse_calls++; 2861 queue_iterate(&object->memq, m, vm_page_t, listq) { 2862 if (m->offset < start_offset || 2863 m->offset >= end_offset) { 2864 m->reusable = TRUE; 2865 object->reusable_page_count++; 2866 assert(object->resident_page_count >= object->reusable_page_count); 2867 continue; 2868 } else { 2869 assert(!m->reusable); 2870 reused++; 2871 } 2872 } 2873 } 2874 } else if (object->resident_page_count > 2875 ((end_offset - start_offset) >> PAGE_SHIFT)) { 2876 vm_page_stats_reusable.partial_reuse_calls++; 2877 for (cur_offset = start_offset; 2878 cur_offset < end_offset; 2879 cur_offset += PAGE_SIZE_64) { 2880 if (object->reusable_page_count == 0) { 2881 break; 2882 } 2883 m = vm_page_lookup(object, cur_offset); 2884 VM_OBJECT_REUSE_PAGE(object, m, reused); 2885 } 2886 } else { 2887 vm_page_stats_reusable.partial_reuse_calls++; 2888 queue_iterate(&object->memq, m, vm_page_t, listq) { 2889 if (object->reusable_page_count == 0) { 2890 break; 2891 } 2892 if (m->offset < start_offset || 2893 m->offset >= end_offset) { 2894 continue; 2895 } 2896 VM_OBJECT_REUSE_PAGE(object, m, reused); 2897 } 2898 } 2899 2900 /* update global stats */ 2901 OSAddAtomic(reusable-reused, &vm_page_stats_reusable.reusable_count); 2902 vm_page_stats_reusable.reused += reused; 2903 vm_page_stats_reusable.reusable += reusable; 2904} 2905 2906/* 2907 * Routine: vm_object_pmap_protect 2908 * 2909 * Purpose: 2910 * Reduces the permission for all physical 2911 * pages in the specified object range. 2912 * 2913 * If removing write permission only, it is 2914 * sufficient to protect only the pages in 2915 * the top-level object; only those pages may 2916 * have write permission. 2917 * 2918 * If removing all access, we must follow the 2919 * shadow chain from the top-level object to 2920 * remove access to all pages in shadowed objects. 2921 * 2922 * The object must *not* be locked. The object must 2923 * be temporary/internal. 2924 * 2925 * If pmap is not NULL, this routine assumes that 2926 * the only mappings for the pages are in that 2927 * pmap. 2928 */ 2929 2930__private_extern__ void 2931vm_object_pmap_protect( 2932 register vm_object_t object, 2933 register vm_object_offset_t offset, 2934 vm_object_size_t size, 2935 pmap_t pmap, 2936 vm_map_offset_t pmap_start, 2937 vm_prot_t prot) 2938{ 2939 vm_object_pmap_protect_options(object, offset, size, 2940 pmap, pmap_start, prot, 0); 2941} 2942 2943__private_extern__ void 2944vm_object_pmap_protect_options( 2945 register vm_object_t object, 2946 register vm_object_offset_t offset, 2947 vm_object_size_t size, 2948 pmap_t pmap, 2949 vm_map_offset_t pmap_start, 2950 vm_prot_t prot, 2951 int options) 2952{ 2953 pmap_flush_context pmap_flush_context_storage; 2954 boolean_t delayed_pmap_flush = FALSE; 2955 2956 if (object == VM_OBJECT_NULL) 2957 return; 2958 size = vm_object_round_page(size); 2959 offset = vm_object_trunc_page(offset); 2960 2961 vm_object_lock(object); 2962 2963 if (object->phys_contiguous) { 2964 if (pmap != NULL) { 2965 vm_object_unlock(object); 2966 pmap_protect_options(pmap, 2967 pmap_start, 2968 pmap_start + size, 2969 prot, 2970 options & ~PMAP_OPTIONS_NOFLUSH, 2971 NULL); 2972 } else { 2973 vm_object_offset_t phys_start, phys_end, phys_addr; 2974 2975 phys_start = object->vo_shadow_offset + offset; 2976 phys_end = phys_start + size; 2977 assert(phys_start <= phys_end); 2978 assert(phys_end <= object->vo_shadow_offset + object->vo_size); 2979 vm_object_unlock(object); 2980 2981 pmap_flush_context_init(&pmap_flush_context_storage); 2982 delayed_pmap_flush = FALSE; 2983 2984 for (phys_addr = phys_start; 2985 phys_addr < phys_end; 2986 phys_addr += PAGE_SIZE_64) { 2987 pmap_page_protect_options( 2988 (ppnum_t) (phys_addr >> PAGE_SHIFT), 2989 prot, 2990 options | PMAP_OPTIONS_NOFLUSH, 2991 (void *)&pmap_flush_context_storage); 2992 delayed_pmap_flush = TRUE; 2993 } 2994 if (delayed_pmap_flush == TRUE) 2995 pmap_flush(&pmap_flush_context_storage); 2996 } 2997 return; 2998 } 2999 3000 assert(object->internal); 3001 3002 while (TRUE) { 3003 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) { 3004 vm_object_unlock(object); 3005 pmap_protect_options(pmap, pmap_start, pmap_start + size, prot, 3006 options & ~PMAP_OPTIONS_NOFLUSH, NULL); 3007 return; 3008 } 3009 3010 pmap_flush_context_init(&pmap_flush_context_storage); 3011 delayed_pmap_flush = FALSE; 3012 3013 /* 3014 * if we are doing large ranges with respect to resident 3015 * page count then we should interate over pages otherwise 3016 * inverse page look-up will be faster 3017 */ 3018 if (ptoa_64(object->resident_page_count / 4) < size) { 3019 vm_page_t p; 3020 vm_object_offset_t end; 3021 3022 end = offset + size; 3023 3024 queue_iterate(&object->memq, p, vm_page_t, listq) { 3025 if (!p->fictitious && (offset <= p->offset) && (p->offset < end)) { 3026 vm_map_offset_t start; 3027 3028 start = pmap_start + p->offset - offset; 3029 3030 if (pmap != PMAP_NULL) 3031 pmap_protect_options( 3032 pmap, 3033 start, 3034 start + PAGE_SIZE_64, 3035 prot, 3036 options | PMAP_OPTIONS_NOFLUSH, 3037 &pmap_flush_context_storage); 3038 else 3039 pmap_page_protect_options( 3040 p->phys_page, 3041 prot, 3042 options | PMAP_OPTIONS_NOFLUSH, 3043 &pmap_flush_context_storage); 3044 delayed_pmap_flush = TRUE; 3045 } 3046 } 3047 3048 } else { 3049 vm_page_t p; 3050 vm_object_offset_t end; 3051 vm_object_offset_t target_off; 3052 3053 end = offset + size; 3054 3055 for (target_off = offset; 3056 target_off < end; target_off += PAGE_SIZE) { 3057 3058 p = vm_page_lookup(object, target_off); 3059 3060 if (p != VM_PAGE_NULL) { 3061 vm_object_offset_t start; 3062 3063 start = pmap_start + (p->offset - offset); 3064 3065 if (pmap != PMAP_NULL) 3066 pmap_protect_options( 3067 pmap, 3068 start, 3069 start + PAGE_SIZE_64, 3070 prot, 3071 options | PMAP_OPTIONS_NOFLUSH, 3072 &pmap_flush_context_storage); 3073 else 3074 pmap_page_protect_options( 3075 p->phys_page, 3076 prot, 3077 options | PMAP_OPTIONS_NOFLUSH, 3078 &pmap_flush_context_storage); 3079 delayed_pmap_flush = TRUE; 3080 } 3081 } 3082 } 3083 if (delayed_pmap_flush == TRUE) 3084 pmap_flush(&pmap_flush_context_storage); 3085 3086 if (prot == VM_PROT_NONE) { 3087 /* 3088 * Must follow shadow chain to remove access 3089 * to pages in shadowed objects. 3090 */ 3091 register vm_object_t next_object; 3092 3093 next_object = object->shadow; 3094 if (next_object != VM_OBJECT_NULL) { 3095 offset += object->vo_shadow_offset; 3096 vm_object_lock(next_object); 3097 vm_object_unlock(object); 3098 object = next_object; 3099 } 3100 else { 3101 /* 3102 * End of chain - we are done. 3103 */ 3104 break; 3105 } 3106 } 3107 else { 3108 /* 3109 * Pages in shadowed objects may never have 3110 * write permission - we may stop here. 3111 */ 3112 break; 3113 } 3114 } 3115 3116 vm_object_unlock(object); 3117} 3118 3119/* 3120 * Routine: vm_object_copy_slowly 3121 * 3122 * Description: 3123 * Copy the specified range of the source 3124 * virtual memory object without using 3125 * protection-based optimizations (such 3126 * as copy-on-write). The pages in the 3127 * region are actually copied. 3128 * 3129 * In/out conditions: 3130 * The caller must hold a reference and a lock 3131 * for the source virtual memory object. The source 3132 * object will be returned *unlocked*. 3133 * 3134 * Results: 3135 * If the copy is completed successfully, KERN_SUCCESS is 3136 * returned. If the caller asserted the interruptible 3137 * argument, and an interruption occurred while waiting 3138 * for a user-generated event, MACH_SEND_INTERRUPTED is 3139 * returned. Other values may be returned to indicate 3140 * hard errors during the copy operation. 3141 * 3142 * A new virtual memory object is returned in a 3143 * parameter (_result_object). The contents of this 3144 * new object, starting at a zero offset, are a copy 3145 * of the source memory region. In the event of 3146 * an error, this parameter will contain the value 3147 * VM_OBJECT_NULL. 3148 */ 3149__private_extern__ kern_return_t 3150vm_object_copy_slowly( 3151 register vm_object_t src_object, 3152 vm_object_offset_t src_offset, 3153 vm_object_size_t size, 3154 boolean_t interruptible, 3155 vm_object_t *_result_object) /* OUT */ 3156{ 3157 vm_object_t new_object; 3158 vm_object_offset_t new_offset; 3159 3160 struct vm_object_fault_info fault_info; 3161 3162 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n", 3163 src_object, src_offset, size, 0, 0); 3164 3165 if (size == 0) { 3166 vm_object_unlock(src_object); 3167 *_result_object = VM_OBJECT_NULL; 3168 return(KERN_INVALID_ARGUMENT); 3169 } 3170 3171 /* 3172 * Prevent destruction of the source object while we copy. 3173 */ 3174 3175 vm_object_reference_locked(src_object); 3176 vm_object_unlock(src_object); 3177 3178 /* 3179 * Create a new object to hold the copied pages. 3180 * A few notes: 3181 * We fill the new object starting at offset 0, 3182 * regardless of the input offset. 3183 * We don't bother to lock the new object within 3184 * this routine, since we have the only reference. 3185 */ 3186 3187 new_object = vm_object_allocate(size); 3188 new_offset = 0; 3189 3190 assert(size == trunc_page_64(size)); /* Will the loop terminate? */ 3191 3192 fault_info.interruptible = interruptible; 3193 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; 3194 fault_info.user_tag = 0; 3195 fault_info.lo_offset = src_offset; 3196 fault_info.hi_offset = src_offset + size; 3197 fault_info.no_cache = FALSE; 3198 fault_info.stealth = TRUE; 3199 fault_info.io_sync = FALSE; 3200 fault_info.cs_bypass = FALSE; 3201 fault_info.mark_zf_absent = FALSE; 3202 fault_info.batch_pmap_op = FALSE; 3203 3204 for ( ; 3205 size != 0 ; 3206 src_offset += PAGE_SIZE_64, 3207 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64 3208 ) { 3209 vm_page_t new_page; 3210 vm_fault_return_t result; 3211 3212 vm_object_lock(new_object); 3213 3214 while ((new_page = vm_page_alloc(new_object, new_offset)) 3215 == VM_PAGE_NULL) { 3216 3217 vm_object_unlock(new_object); 3218 3219 if (!vm_page_wait(interruptible)) { 3220 vm_object_deallocate(new_object); 3221 vm_object_deallocate(src_object); 3222 *_result_object = VM_OBJECT_NULL; 3223 return(MACH_SEND_INTERRUPTED); 3224 } 3225 vm_object_lock(new_object); 3226 } 3227 vm_object_unlock(new_object); 3228 3229 do { 3230 vm_prot_t prot = VM_PROT_READ; 3231 vm_page_t _result_page; 3232 vm_page_t top_page; 3233 register 3234 vm_page_t result_page; 3235 kern_return_t error_code; 3236 3237 vm_object_lock(src_object); 3238 vm_object_paging_begin(src_object); 3239 3240 if (size > (vm_size_t) -1) { 3241 /* 32-bit overflow */ 3242 fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE); 3243 } else { 3244 fault_info.cluster_size = (vm_size_t) size; 3245 assert(fault_info.cluster_size == size); 3246 } 3247 3248 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0); 3249 _result_page = VM_PAGE_NULL; 3250 result = vm_fault_page(src_object, src_offset, 3251 VM_PROT_READ, FALSE, 3252 FALSE, /* page not looked up */ 3253 &prot, &_result_page, &top_page, 3254 (int *)0, 3255 &error_code, FALSE, FALSE, &fault_info); 3256 3257 switch(result) { 3258 case VM_FAULT_SUCCESS: 3259 result_page = _result_page; 3260 3261 /* 3262 * Copy the page to the new object. 3263 * 3264 * POLICY DECISION: 3265 * If result_page is clean, 3266 * we could steal it instead 3267 * of copying. 3268 */ 3269 3270 vm_page_copy(result_page, new_page); 3271 vm_object_unlock(result_page->object); 3272 3273 /* 3274 * Let go of both pages (make them 3275 * not busy, perform wakeup, activate). 3276 */ 3277 vm_object_lock(new_object); 3278 SET_PAGE_DIRTY(new_page, FALSE); 3279 PAGE_WAKEUP_DONE(new_page); 3280 vm_object_unlock(new_object); 3281 3282 vm_object_lock(result_page->object); 3283 PAGE_WAKEUP_DONE(result_page); 3284 3285 vm_page_lockspin_queues(); 3286 if (!result_page->active && 3287 !result_page->inactive && 3288 !result_page->throttled) 3289 vm_page_activate(result_page); 3290 vm_page_activate(new_page); 3291 vm_page_unlock_queues(); 3292 3293 /* 3294 * Release paging references and 3295 * top-level placeholder page, if any. 3296 */ 3297 3298 vm_fault_cleanup(result_page->object, 3299 top_page); 3300 3301 break; 3302 3303 case VM_FAULT_RETRY: 3304 break; 3305 3306 case VM_FAULT_MEMORY_SHORTAGE: 3307 if (vm_page_wait(interruptible)) 3308 break; 3309 /* fall thru */ 3310 3311 case VM_FAULT_INTERRUPTED: 3312 vm_object_lock(new_object); 3313 VM_PAGE_FREE(new_page); 3314 vm_object_unlock(new_object); 3315 3316 vm_object_deallocate(new_object); 3317 vm_object_deallocate(src_object); 3318 *_result_object = VM_OBJECT_NULL; 3319 return(MACH_SEND_INTERRUPTED); 3320 3321 case VM_FAULT_SUCCESS_NO_VM_PAGE: 3322 /* success but no VM page: fail */ 3323 vm_object_paging_end(src_object); 3324 vm_object_unlock(src_object); 3325 /*FALLTHROUGH*/ 3326 case VM_FAULT_MEMORY_ERROR: 3327 /* 3328 * A policy choice: 3329 * (a) ignore pages that we can't 3330 * copy 3331 * (b) return the null object if 3332 * any page fails [chosen] 3333 */ 3334 3335 vm_object_lock(new_object); 3336 VM_PAGE_FREE(new_page); 3337 vm_object_unlock(new_object); 3338 3339 vm_object_deallocate(new_object); 3340 vm_object_deallocate(src_object); 3341 *_result_object = VM_OBJECT_NULL; 3342 return(error_code ? error_code: 3343 KERN_MEMORY_ERROR); 3344 3345 default: 3346 panic("vm_object_copy_slowly: unexpected error" 3347 " 0x%x from vm_fault_page()\n", result); 3348 } 3349 } while (result != VM_FAULT_SUCCESS); 3350 } 3351 3352 /* 3353 * Lose the extra reference, and return our object. 3354 */ 3355 vm_object_deallocate(src_object); 3356 *_result_object = new_object; 3357 return(KERN_SUCCESS); 3358} 3359 3360/* 3361 * Routine: vm_object_copy_quickly 3362 * 3363 * Purpose: 3364 * Copy the specified range of the source virtual 3365 * memory object, if it can be done without waiting 3366 * for user-generated events. 3367 * 3368 * Results: 3369 * If the copy is successful, the copy is returned in 3370 * the arguments; otherwise, the arguments are not 3371 * affected. 3372 * 3373 * In/out conditions: 3374 * The object should be unlocked on entry and exit. 3375 */ 3376 3377/*ARGSUSED*/ 3378__private_extern__ boolean_t 3379vm_object_copy_quickly( 3380 vm_object_t *_object, /* INOUT */ 3381 __unused vm_object_offset_t offset, /* IN */ 3382 __unused vm_object_size_t size, /* IN */ 3383 boolean_t *_src_needs_copy, /* OUT */ 3384 boolean_t *_dst_needs_copy) /* OUT */ 3385{ 3386 vm_object_t object = *_object; 3387 memory_object_copy_strategy_t copy_strategy; 3388 3389 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n", 3390 *_object, offset, size, 0, 0); 3391 if (object == VM_OBJECT_NULL) { 3392 *_src_needs_copy = FALSE; 3393 *_dst_needs_copy = FALSE; 3394 return(TRUE); 3395 } 3396 3397 vm_object_lock(object); 3398 3399 copy_strategy = object->copy_strategy; 3400 3401 switch (copy_strategy) { 3402 case MEMORY_OBJECT_COPY_SYMMETRIC: 3403 3404 /* 3405 * Symmetric copy strategy. 3406 * Make another reference to the object. 3407 * Leave object/offset unchanged. 3408 */ 3409 3410 vm_object_reference_locked(object); 3411 object->shadowed = TRUE; 3412 vm_object_unlock(object); 3413 3414 /* 3415 * Both source and destination must make 3416 * shadows, and the source must be made 3417 * read-only if not already. 3418 */ 3419 3420 *_src_needs_copy = TRUE; 3421 *_dst_needs_copy = TRUE; 3422 3423 break; 3424 3425 case MEMORY_OBJECT_COPY_DELAY: 3426 vm_object_unlock(object); 3427 return(FALSE); 3428 3429 default: 3430 vm_object_unlock(object); 3431 return(FALSE); 3432 } 3433 return(TRUE); 3434} 3435 3436static int copy_call_count = 0; 3437static int copy_call_sleep_count = 0; 3438static int copy_call_restart_count = 0; 3439 3440/* 3441 * Routine: vm_object_copy_call [internal] 3442 * 3443 * Description: 3444 * Copy the source object (src_object), using the 3445 * user-managed copy algorithm. 3446 * 3447 * In/out conditions: 3448 * The source object must be locked on entry. It 3449 * will be *unlocked* on exit. 3450 * 3451 * Results: 3452 * If the copy is successful, KERN_SUCCESS is returned. 3453 * A new object that represents the copied virtual 3454 * memory is returned in a parameter (*_result_object). 3455 * If the return value indicates an error, this parameter 3456 * is not valid. 3457 */ 3458static kern_return_t 3459vm_object_copy_call( 3460 vm_object_t src_object, 3461 vm_object_offset_t src_offset, 3462 vm_object_size_t size, 3463 vm_object_t *_result_object) /* OUT */ 3464{ 3465 kern_return_t kr; 3466 vm_object_t copy; 3467 boolean_t check_ready = FALSE; 3468 uint32_t try_failed_count = 0; 3469 3470 /* 3471 * If a copy is already in progress, wait and retry. 3472 * 3473 * XXX 3474 * Consider making this call interruptable, as Mike 3475 * intended it to be. 3476 * 3477 * XXXO 3478 * Need a counter or version or something to allow 3479 * us to use the copy that the currently requesting 3480 * thread is obtaining -- is it worth adding to the 3481 * vm object structure? Depends how common this case it. 3482 */ 3483 copy_call_count++; 3484 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) { 3485 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL, 3486 THREAD_UNINT); 3487 copy_call_restart_count++; 3488 } 3489 3490 /* 3491 * Indicate (for the benefit of memory_object_create_copy) 3492 * that we want a copy for src_object. (Note that we cannot 3493 * do a real assert_wait before calling memory_object_copy, 3494 * so we simply set the flag.) 3495 */ 3496 3497 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL); 3498 vm_object_unlock(src_object); 3499 3500 /* 3501 * Ask the memory manager to give us a memory object 3502 * which represents a copy of the src object. 3503 * The memory manager may give us a memory object 3504 * which we already have, or it may give us a 3505 * new memory object. This memory object will arrive 3506 * via memory_object_create_copy. 3507 */ 3508 3509 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */ 3510 if (kr != KERN_SUCCESS) { 3511 return kr; 3512 } 3513 3514 /* 3515 * Wait for the copy to arrive. 3516 */ 3517 vm_object_lock(src_object); 3518 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) { 3519 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL, 3520 THREAD_UNINT); 3521 copy_call_sleep_count++; 3522 } 3523Retry: 3524 assert(src_object->copy != VM_OBJECT_NULL); 3525 copy = src_object->copy; 3526 if (!vm_object_lock_try(copy)) { 3527 vm_object_unlock(src_object); 3528 3529 try_failed_count++; 3530 mutex_pause(try_failed_count); /* wait a bit */ 3531 3532 vm_object_lock(src_object); 3533 goto Retry; 3534 } 3535 if (copy->vo_size < src_offset+size) 3536 copy->vo_size = src_offset+size; 3537 3538 if (!copy->pager_ready) 3539 check_ready = TRUE; 3540 3541 /* 3542 * Return the copy. 3543 */ 3544 *_result_object = copy; 3545 vm_object_unlock(copy); 3546 vm_object_unlock(src_object); 3547 3548 /* Wait for the copy to be ready. */ 3549 if (check_ready == TRUE) { 3550 vm_object_lock(copy); 3551 while (!copy->pager_ready) { 3552 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT); 3553 } 3554 vm_object_unlock(copy); 3555 } 3556 3557 return KERN_SUCCESS; 3558} 3559 3560static int copy_delayed_lock_collisions = 0; 3561static int copy_delayed_max_collisions = 0; 3562static int copy_delayed_lock_contention = 0; 3563static int copy_delayed_protect_iterate = 0; 3564 3565/* 3566 * Routine: vm_object_copy_delayed [internal] 3567 * 3568 * Description: 3569 * Copy the specified virtual memory object, using 3570 * the asymmetric copy-on-write algorithm. 3571 * 3572 * In/out conditions: 3573 * The src_object must be locked on entry. It will be unlocked 3574 * on exit - so the caller must also hold a reference to it. 3575 * 3576 * This routine will not block waiting for user-generated 3577 * events. It is not interruptible. 3578 */ 3579__private_extern__ vm_object_t 3580vm_object_copy_delayed( 3581 vm_object_t src_object, 3582 vm_object_offset_t src_offset, 3583 vm_object_size_t size, 3584 boolean_t src_object_shared) 3585{ 3586 vm_object_t new_copy = VM_OBJECT_NULL; 3587 vm_object_t old_copy; 3588 vm_page_t p; 3589 vm_object_size_t copy_size = src_offset + size; 3590 pmap_flush_context pmap_flush_context_storage; 3591 boolean_t delayed_pmap_flush = FALSE; 3592 3593 3594 int collisions = 0; 3595 /* 3596 * The user-level memory manager wants to see all of the changes 3597 * to this object, but it has promised not to make any changes on 3598 * its own. 3599 * 3600 * Perform an asymmetric copy-on-write, as follows: 3601 * Create a new object, called a "copy object" to hold 3602 * pages modified by the new mapping (i.e., the copy, 3603 * not the original mapping). 3604 * Record the original object as the backing object for 3605 * the copy object. If the original mapping does not 3606 * change a page, it may be used read-only by the copy. 3607 * Record the copy object in the original object. 3608 * When the original mapping causes a page to be modified, 3609 * it must be copied to a new page that is "pushed" to 3610 * the copy object. 3611 * Mark the new mapping (the copy object) copy-on-write. 3612 * This makes the copy object itself read-only, allowing 3613 * it to be reused if the original mapping makes no 3614 * changes, and simplifying the synchronization required 3615 * in the "push" operation described above. 3616 * 3617 * The copy-on-write is said to be assymetric because the original 3618 * object is *not* marked copy-on-write. A copied page is pushed 3619 * to the copy object, regardless which party attempted to modify 3620 * the page. 3621 * 3622 * Repeated asymmetric copy operations may be done. If the 3623 * original object has not been changed since the last copy, its 3624 * copy object can be reused. Otherwise, a new copy object can be 3625 * inserted between the original object and its previous copy 3626 * object. Since any copy object is read-only, this cannot affect 3627 * affect the contents of the previous copy object. 3628 * 3629 * Note that a copy object is higher in the object tree than the 3630 * original object; therefore, use of the copy object recorded in 3631 * the original object must be done carefully, to avoid deadlock. 3632 */ 3633 3634 Retry: 3635 3636 /* 3637 * Wait for paging in progress. 3638 */ 3639 if (!src_object->true_share && 3640 (src_object->paging_in_progress != 0 || 3641 src_object->activity_in_progress != 0)) { 3642 if (src_object_shared == TRUE) { 3643 vm_object_unlock(src_object); 3644 vm_object_lock(src_object); 3645 src_object_shared = FALSE; 3646 goto Retry; 3647 } 3648 vm_object_paging_wait(src_object, THREAD_UNINT); 3649 } 3650 /* 3651 * See whether we can reuse the result of a previous 3652 * copy operation. 3653 */ 3654 3655 old_copy = src_object->copy; 3656 if (old_copy != VM_OBJECT_NULL) { 3657 int lock_granted; 3658 3659 /* 3660 * Try to get the locks (out of order) 3661 */ 3662 if (src_object_shared == TRUE) 3663 lock_granted = vm_object_lock_try_shared(old_copy); 3664 else 3665 lock_granted = vm_object_lock_try(old_copy); 3666 3667 if (!lock_granted) { 3668 vm_object_unlock(src_object); 3669 3670 if (collisions++ == 0) 3671 copy_delayed_lock_contention++; 3672 mutex_pause(collisions); 3673 3674 /* Heisenberg Rules */ 3675 copy_delayed_lock_collisions++; 3676 3677 if (collisions > copy_delayed_max_collisions) 3678 copy_delayed_max_collisions = collisions; 3679 3680 if (src_object_shared == TRUE) 3681 vm_object_lock_shared(src_object); 3682 else 3683 vm_object_lock(src_object); 3684 3685 goto Retry; 3686 } 3687 3688 /* 3689 * Determine whether the old copy object has 3690 * been modified. 3691 */ 3692 3693 if (old_copy->resident_page_count == 0 && 3694 !old_copy->pager_created) { 3695 /* 3696 * It has not been modified. 3697 * 3698 * Return another reference to 3699 * the existing copy-object if 3700 * we can safely grow it (if 3701 * needed). 3702 */ 3703 3704 if (old_copy->vo_size < copy_size) { 3705 if (src_object_shared == TRUE) { 3706 vm_object_unlock(old_copy); 3707 vm_object_unlock(src_object); 3708 3709 vm_object_lock(src_object); 3710 src_object_shared = FALSE; 3711 goto Retry; 3712 } 3713 /* 3714 * We can't perform a delayed copy if any of the 3715 * pages in the extended range are wired (because 3716 * we can't safely take write permission away from 3717 * wired pages). If the pages aren't wired, then 3718 * go ahead and protect them. 3719 */ 3720 copy_delayed_protect_iterate++; 3721 3722 pmap_flush_context_init(&pmap_flush_context_storage); 3723 delayed_pmap_flush = FALSE; 3724 3725 queue_iterate(&src_object->memq, p, vm_page_t, listq) { 3726 if (!p->fictitious && 3727 p->offset >= old_copy->vo_size && 3728 p->offset < copy_size) { 3729 if (VM_PAGE_WIRED(p)) { 3730 vm_object_unlock(old_copy); 3731 vm_object_unlock(src_object); 3732 3733 if (new_copy != VM_OBJECT_NULL) { 3734 vm_object_unlock(new_copy); 3735 vm_object_deallocate(new_copy); 3736 } 3737 if (delayed_pmap_flush == TRUE) 3738 pmap_flush(&pmap_flush_context_storage); 3739 3740 return VM_OBJECT_NULL; 3741 } else { 3742 pmap_page_protect_options(p->phys_page, (VM_PROT_ALL & ~VM_PROT_WRITE), 3743 PMAP_OPTIONS_NOFLUSH, (void *)&pmap_flush_context_storage); 3744 delayed_pmap_flush = TRUE; 3745 } 3746 } 3747 } 3748 if (delayed_pmap_flush == TRUE) 3749 pmap_flush(&pmap_flush_context_storage); 3750 3751 old_copy->vo_size = copy_size; 3752 } 3753 if (src_object_shared == TRUE) 3754 vm_object_reference_shared(old_copy); 3755 else 3756 vm_object_reference_locked(old_copy); 3757 vm_object_unlock(old_copy); 3758 vm_object_unlock(src_object); 3759 3760 if (new_copy != VM_OBJECT_NULL) { 3761 vm_object_unlock(new_copy); 3762 vm_object_deallocate(new_copy); 3763 } 3764 return(old_copy); 3765 } 3766 3767 3768 3769 /* 3770 * Adjust the size argument so that the newly-created 3771 * copy object will be large enough to back either the 3772 * old copy object or the new mapping. 3773 */ 3774 if (old_copy->vo_size > copy_size) 3775 copy_size = old_copy->vo_size; 3776 3777 if (new_copy == VM_OBJECT_NULL) { 3778 vm_object_unlock(old_copy); 3779 vm_object_unlock(src_object); 3780 new_copy = vm_object_allocate(copy_size); 3781 vm_object_lock(src_object); 3782 vm_object_lock(new_copy); 3783 3784 src_object_shared = FALSE; 3785 goto Retry; 3786 } 3787 new_copy->vo_size = copy_size; 3788 3789 /* 3790 * The copy-object is always made large enough to 3791 * completely shadow the original object, since 3792 * it may have several users who want to shadow 3793 * the original object at different points. 3794 */ 3795 3796 assert((old_copy->shadow == src_object) && 3797 (old_copy->vo_shadow_offset == (vm_object_offset_t) 0)); 3798 3799 } else if (new_copy == VM_OBJECT_NULL) { 3800 vm_object_unlock(src_object); 3801 new_copy = vm_object_allocate(copy_size); 3802 vm_object_lock(src_object); 3803 vm_object_lock(new_copy); 3804 3805 src_object_shared = FALSE; 3806 goto Retry; 3807 } 3808 3809 /* 3810 * We now have the src object locked, and the new copy object 3811 * allocated and locked (and potentially the old copy locked). 3812 * Before we go any further, make sure we can still perform 3813 * a delayed copy, as the situation may have changed. 3814 * 3815 * Specifically, we can't perform a delayed copy if any of the 3816 * pages in the range are wired (because we can't safely take 3817 * write permission away from wired pages). If the pages aren't 3818 * wired, then go ahead and protect them. 3819 */ 3820 copy_delayed_protect_iterate++; 3821 3822 pmap_flush_context_init(&pmap_flush_context_storage); 3823 delayed_pmap_flush = FALSE; 3824 3825 queue_iterate(&src_object->memq, p, vm_page_t, listq) { 3826 if (!p->fictitious && p->offset < copy_size) { 3827 if (VM_PAGE_WIRED(p)) { 3828 if (old_copy) 3829 vm_object_unlock(old_copy); 3830 vm_object_unlock(src_object); 3831 vm_object_unlock(new_copy); 3832 vm_object_deallocate(new_copy); 3833 3834 if (delayed_pmap_flush == TRUE) 3835 pmap_flush(&pmap_flush_context_storage); 3836 3837 return VM_OBJECT_NULL; 3838 } else { 3839 pmap_page_protect_options(p->phys_page, (VM_PROT_ALL & ~VM_PROT_WRITE), 3840 PMAP_OPTIONS_NOFLUSH, (void *)&pmap_flush_context_storage); 3841 delayed_pmap_flush = TRUE; 3842 } 3843 } 3844 } 3845 if (delayed_pmap_flush == TRUE) 3846 pmap_flush(&pmap_flush_context_storage); 3847 3848 if (old_copy != VM_OBJECT_NULL) { 3849 /* 3850 * Make the old copy-object shadow the new one. 3851 * It will receive no more pages from the original 3852 * object. 3853 */ 3854 3855 /* remove ref. from old_copy */ 3856 vm_object_lock_assert_exclusive(src_object); 3857 src_object->ref_count--; 3858 assert(src_object->ref_count > 0); 3859 vm_object_lock_assert_exclusive(old_copy); 3860 old_copy->shadow = new_copy; 3861 vm_object_lock_assert_exclusive(new_copy); 3862 assert(new_copy->ref_count > 0); 3863 new_copy->ref_count++; /* for old_copy->shadow ref. */ 3864 3865#if TASK_SWAPPER 3866 if (old_copy->res_count) { 3867 VM_OBJ_RES_INCR(new_copy); 3868 VM_OBJ_RES_DECR(src_object); 3869 } 3870#endif 3871 3872 vm_object_unlock(old_copy); /* done with old_copy */ 3873 } 3874 3875 /* 3876 * Point the new copy at the existing object. 3877 */ 3878 vm_object_lock_assert_exclusive(new_copy); 3879 new_copy->shadow = src_object; 3880 new_copy->vo_shadow_offset = 0; 3881 new_copy->shadowed = TRUE; /* caller must set needs_copy */ 3882 3883 vm_object_lock_assert_exclusive(src_object); 3884 vm_object_reference_locked(src_object); 3885 src_object->copy = new_copy; 3886 vm_object_unlock(src_object); 3887 vm_object_unlock(new_copy); 3888 3889 XPR(XPR_VM_OBJECT, 3890 "vm_object_copy_delayed: used copy object %X for source %X\n", 3891 new_copy, src_object, 0, 0, 0); 3892 3893 return new_copy; 3894} 3895 3896/* 3897 * Routine: vm_object_copy_strategically 3898 * 3899 * Purpose: 3900 * Perform a copy according to the source object's 3901 * declared strategy. This operation may block, 3902 * and may be interrupted. 3903 */ 3904__private_extern__ kern_return_t 3905vm_object_copy_strategically( 3906 register vm_object_t src_object, 3907 vm_object_offset_t src_offset, 3908 vm_object_size_t size, 3909 vm_object_t *dst_object, /* OUT */ 3910 vm_object_offset_t *dst_offset, /* OUT */ 3911 boolean_t *dst_needs_copy) /* OUT */ 3912{ 3913 boolean_t result; 3914 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */ 3915 boolean_t object_lock_shared = FALSE; 3916 memory_object_copy_strategy_t copy_strategy; 3917 3918 assert(src_object != VM_OBJECT_NULL); 3919 3920 copy_strategy = src_object->copy_strategy; 3921 3922 if (copy_strategy == MEMORY_OBJECT_COPY_DELAY) { 3923 vm_object_lock_shared(src_object); 3924 object_lock_shared = TRUE; 3925 } else 3926 vm_object_lock(src_object); 3927 3928 /* 3929 * The copy strategy is only valid if the memory manager 3930 * is "ready". Internal objects are always ready. 3931 */ 3932 3933 while (!src_object->internal && !src_object->pager_ready) { 3934 wait_result_t wait_result; 3935 3936 if (object_lock_shared == TRUE) { 3937 vm_object_unlock(src_object); 3938 vm_object_lock(src_object); 3939 object_lock_shared = FALSE; 3940 continue; 3941 } 3942 wait_result = vm_object_sleep( src_object, 3943 VM_OBJECT_EVENT_PAGER_READY, 3944 interruptible); 3945 if (wait_result != THREAD_AWAKENED) { 3946 vm_object_unlock(src_object); 3947 *dst_object = VM_OBJECT_NULL; 3948 *dst_offset = 0; 3949 *dst_needs_copy = FALSE; 3950 return(MACH_SEND_INTERRUPTED); 3951 } 3952 } 3953 3954 /* 3955 * Use the appropriate copy strategy. 3956 */ 3957 3958 switch (copy_strategy) { 3959 case MEMORY_OBJECT_COPY_DELAY: 3960 *dst_object = vm_object_copy_delayed(src_object, 3961 src_offset, size, object_lock_shared); 3962 if (*dst_object != VM_OBJECT_NULL) { 3963 *dst_offset = src_offset; 3964 *dst_needs_copy = TRUE; 3965 result = KERN_SUCCESS; 3966 break; 3967 } 3968 vm_object_lock(src_object); 3969 /* fall thru when delayed copy not allowed */ 3970 3971 case MEMORY_OBJECT_COPY_NONE: 3972 result = vm_object_copy_slowly(src_object, src_offset, size, 3973 interruptible, dst_object); 3974 if (result == KERN_SUCCESS) { 3975 *dst_offset = 0; 3976 *dst_needs_copy = FALSE; 3977 } 3978 break; 3979 3980 case MEMORY_OBJECT_COPY_CALL: 3981 result = vm_object_copy_call(src_object, src_offset, size, 3982 dst_object); 3983 if (result == KERN_SUCCESS) { 3984 *dst_offset = src_offset; 3985 *dst_needs_copy = TRUE; 3986 } 3987 break; 3988 3989 case MEMORY_OBJECT_COPY_SYMMETRIC: 3990 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n", src_object, src_offset, size, 0, 0); 3991 vm_object_unlock(src_object); 3992 result = KERN_MEMORY_RESTART_COPY; 3993 break; 3994 3995 default: 3996 panic("copy_strategically: bad strategy"); 3997 result = KERN_INVALID_ARGUMENT; 3998 } 3999 return(result); 4000} 4001 4002/* 4003 * vm_object_shadow: 4004 * 4005 * Create a new object which is backed by the 4006 * specified existing object range. The source 4007 * object reference is deallocated. 4008 * 4009 * The new object and offset into that object 4010 * are returned in the source parameters. 4011 */ 4012boolean_t vm_object_shadow_check = TRUE; 4013 4014__private_extern__ boolean_t 4015vm_object_shadow( 4016 vm_object_t *object, /* IN/OUT */ 4017 vm_object_offset_t *offset, /* IN/OUT */ 4018 vm_object_size_t length) 4019{ 4020 register vm_object_t source; 4021 register vm_object_t result; 4022 4023 source = *object; 4024 assert(source != VM_OBJECT_NULL); 4025 if (source == VM_OBJECT_NULL) 4026 return FALSE; 4027 4028#if 0 4029 /* 4030 * XXX FBDP 4031 * This assertion is valid but it gets triggered by Rosetta for example 4032 * due to a combination of vm_remap() that changes a VM object's 4033 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY) 4034 * that then sets "needs_copy" on its map entry. This creates a 4035 * mapping situation that VM should never see and doesn't know how to 4036 * handle. 4037 * It's not clear if this can create any real problem but we should 4038 * look into fixing this, probably by having vm_protect(VM_PROT_COPY) 4039 * do more than just set "needs_copy" to handle the copy-on-write... 4040 * In the meantime, let's disable the assertion. 4041 */ 4042 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC); 4043#endif 4044 4045 /* 4046 * Determine if we really need a shadow. 4047 * 4048 * If the source object is larger than what we are trying 4049 * to create, then force the shadow creation even if the 4050 * ref count is 1. This will allow us to [potentially] 4051 * collapse the underlying object away in the future 4052 * (freeing up the extra data it might contain and that 4053 * we don't need). 4054 */ 4055 if (vm_object_shadow_check && 4056 source->vo_size == length && 4057 source->ref_count == 1 && 4058 (source->shadow == VM_OBJECT_NULL || 4059 source->shadow->copy == VM_OBJECT_NULL) ) 4060 { 4061 source->shadowed = FALSE; 4062 return FALSE; 4063 } 4064 4065 /* 4066 * Allocate a new object with the given length 4067 */ 4068 4069 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL) 4070 panic("vm_object_shadow: no object for shadowing"); 4071 4072 /* 4073 * The new object shadows the source object, adding 4074 * a reference to it. Our caller changes his reference 4075 * to point to the new object, removing a reference to 4076 * the source object. Net result: no change of reference 4077 * count. 4078 */ 4079 result->shadow = source; 4080 4081 /* 4082 * Store the offset into the source object, 4083 * and fix up the offset into the new object. 4084 */ 4085 4086 result->vo_shadow_offset = *offset; 4087 4088 /* 4089 * Return the new things 4090 */ 4091 4092 *offset = 0; 4093 *object = result; 4094 return TRUE; 4095} 4096 4097/* 4098 * The relationship between vm_object structures and 4099 * the memory_object requires careful synchronization. 4100 * 4101 * All associations are created by memory_object_create_named 4102 * for external pagers and vm_object_pager_create for internal 4103 * objects as follows: 4104 * 4105 * pager: the memory_object itself, supplied by 4106 * the user requesting a mapping (or the kernel, 4107 * when initializing internal objects); the 4108 * kernel simulates holding send rights by keeping 4109 * a port reference; 4110 * 4111 * pager_request: 4112 * the memory object control port, 4113 * created by the kernel; the kernel holds 4114 * receive (and ownership) rights to this 4115 * port, but no other references. 4116 * 4117 * When initialization is complete, the "initialized" field 4118 * is asserted. Other mappings using a particular memory object, 4119 * and any references to the vm_object gained through the 4120 * port association must wait for this initialization to occur. 4121 * 4122 * In order to allow the memory manager to set attributes before 4123 * requests (notably virtual copy operations, but also data or 4124 * unlock requests) are made, a "ready" attribute is made available. 4125 * Only the memory manager may affect the value of this attribute. 4126 * Its value does not affect critical kernel functions, such as 4127 * internal object initialization or destruction. [Furthermore, 4128 * memory objects created by the kernel are assumed to be ready 4129 * immediately; the default memory manager need not explicitly 4130 * set the "ready" attribute.] 4131 * 4132 * [Both the "initialized" and "ready" attribute wait conditions 4133 * use the "pager" field as the wait event.] 4134 * 4135 * The port associations can be broken down by any of the 4136 * following routines: 4137 * vm_object_terminate: 4138 * No references to the vm_object remain, and 4139 * the object cannot (or will not) be cached. 4140 * This is the normal case, and is done even 4141 * though one of the other cases has already been 4142 * done. 4143 * memory_object_destroy: 4144 * The memory manager has requested that the 4145 * kernel relinquish references to the memory 4146 * object. [The memory manager may not want to 4147 * destroy the memory object, but may wish to 4148 * refuse or tear down existing memory mappings.] 4149 * 4150 * Each routine that breaks an association must break all of 4151 * them at once. At some later time, that routine must clear 4152 * the pager field and release the memory object references. 4153 * [Furthermore, each routine must cope with the simultaneous 4154 * or previous operations of the others.] 4155 * 4156 * In addition to the lock on the object, the vm_object_hash_lock 4157 * governs the associations. References gained through the 4158 * association require use of the hash lock. 4159 * 4160 * Because the pager field may be cleared spontaneously, it 4161 * cannot be used to determine whether a memory object has 4162 * ever been associated with a particular vm_object. [This 4163 * knowledge is important to the shadow object mechanism.] 4164 * For this reason, an additional "created" attribute is 4165 * provided. 4166 * 4167 * During various paging operations, the pager reference found in the 4168 * vm_object must be valid. To prevent this from being released, 4169 * (other than being removed, i.e., made null), routines may use 4170 * the vm_object_paging_begin/end routines [actually, macros]. 4171 * The implementation uses the "paging_in_progress" and "wanted" fields. 4172 * [Operations that alter the validity of the pager values include the 4173 * termination routines and vm_object_collapse.] 4174 */ 4175 4176 4177/* 4178 * Routine: vm_object_enter 4179 * Purpose: 4180 * Find a VM object corresponding to the given 4181 * pager; if no such object exists, create one, 4182 * and initialize the pager. 4183 */ 4184vm_object_t 4185vm_object_enter( 4186 memory_object_t pager, 4187 vm_object_size_t size, 4188 boolean_t internal, 4189 boolean_t init, 4190 boolean_t named) 4191{ 4192 register vm_object_t object; 4193 vm_object_t new_object; 4194 boolean_t must_init; 4195 vm_object_hash_entry_t entry, new_entry; 4196 uint32_t try_failed_count = 0; 4197 lck_mtx_t *lck; 4198 4199 if (pager == MEMORY_OBJECT_NULL) 4200 return(vm_object_allocate(size)); 4201 4202 new_object = VM_OBJECT_NULL; 4203 new_entry = VM_OBJECT_HASH_ENTRY_NULL; 4204 must_init = init; 4205 4206 /* 4207 * Look for an object associated with this port. 4208 */ 4209Retry: 4210 lck = vm_object_hash_lock_spin(pager); 4211 do { 4212 entry = vm_object_hash_lookup(pager, FALSE); 4213 4214 if (entry == VM_OBJECT_HASH_ENTRY_NULL) { 4215 if (new_object == VM_OBJECT_NULL) { 4216 /* 4217 * We must unlock to create a new object; 4218 * if we do so, we must try the lookup again. 4219 */ 4220 vm_object_hash_unlock(lck); 4221 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL); 4222 new_entry = vm_object_hash_entry_alloc(pager); 4223 new_object = vm_object_allocate(size); 4224 lck = vm_object_hash_lock_spin(pager); 4225 } else { 4226 /* 4227 * Lookup failed twice, and we have something 4228 * to insert; set the object. 4229 */ 4230 vm_object_hash_insert(new_entry, new_object); 4231 entry = new_entry; 4232 new_entry = VM_OBJECT_HASH_ENTRY_NULL; 4233 new_object = VM_OBJECT_NULL; 4234 must_init = TRUE; 4235 } 4236 } else if (entry->object == VM_OBJECT_NULL) { 4237 /* 4238 * If a previous object is being terminated, 4239 * we must wait for the termination message 4240 * to be queued (and lookup the entry again). 4241 */ 4242 entry->waiting = TRUE; 4243 entry = VM_OBJECT_HASH_ENTRY_NULL; 4244 assert_wait((event_t) pager, THREAD_UNINT); 4245 vm_object_hash_unlock(lck); 4246 4247 thread_block(THREAD_CONTINUE_NULL); 4248 lck = vm_object_hash_lock_spin(pager); 4249 } 4250 } while (entry == VM_OBJECT_HASH_ENTRY_NULL); 4251 4252 object = entry->object; 4253 assert(object != VM_OBJECT_NULL); 4254 4255 if (!must_init) { 4256 if ( !vm_object_lock_try(object)) { 4257 4258 vm_object_hash_unlock(lck); 4259 4260 try_failed_count++; 4261 mutex_pause(try_failed_count); /* wait a bit */ 4262 goto Retry; 4263 } 4264 assert(!internal || object->internal); 4265#if VM_OBJECT_CACHE 4266 if (object->ref_count == 0) { 4267 if ( !vm_object_cache_lock_try()) { 4268 4269 vm_object_hash_unlock(lck); 4270 vm_object_unlock(object); 4271 4272 try_failed_count++; 4273 mutex_pause(try_failed_count); /* wait a bit */ 4274 goto Retry; 4275 } 4276 XPR(XPR_VM_OBJECT_CACHE, 4277 "vm_object_enter: removing %x from cache, head (%x, %x)\n", 4278 object, 4279 vm_object_cached_list.next, 4280 vm_object_cached_list.prev, 0,0); 4281 queue_remove(&vm_object_cached_list, object, 4282 vm_object_t, cached_list); 4283 vm_object_cached_count--; 4284 4285 vm_object_cache_unlock(); 4286 } 4287#endif 4288 if (named) { 4289 assert(!object->named); 4290 object->named = TRUE; 4291 } 4292 vm_object_lock_assert_exclusive(object); 4293 object->ref_count++; 4294 vm_object_res_reference(object); 4295 4296 vm_object_hash_unlock(lck); 4297 vm_object_unlock(object); 4298 4299 VM_STAT_INCR(hits); 4300 } else 4301 vm_object_hash_unlock(lck); 4302 4303 assert(object->ref_count > 0); 4304 4305 VM_STAT_INCR(lookups); 4306 4307 XPR(XPR_VM_OBJECT, 4308 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n", 4309 pager, object, must_init, 0, 0); 4310 4311 /* 4312 * If we raced to create a vm_object but lost, let's 4313 * throw away ours. 4314 */ 4315 4316 if (new_object != VM_OBJECT_NULL) 4317 vm_object_deallocate(new_object); 4318 4319 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL) 4320 vm_object_hash_entry_free(new_entry); 4321 4322 if (must_init) { 4323 memory_object_control_t control; 4324 4325 /* 4326 * Allocate request port. 4327 */ 4328 4329 control = memory_object_control_allocate(object); 4330 assert (control != MEMORY_OBJECT_CONTROL_NULL); 4331 4332 vm_object_lock(object); 4333 assert(object != kernel_object); 4334 4335 /* 4336 * Copy the reference we were given. 4337 */ 4338 4339 memory_object_reference(pager); 4340 object->pager_created = TRUE; 4341 object->pager = pager; 4342 object->internal = internal; 4343 object->pager_trusted = internal; 4344 if (!internal) { 4345 /* copy strategy invalid until set by memory manager */ 4346 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID; 4347 } 4348 object->pager_control = control; 4349 object->pager_ready = FALSE; 4350 4351 vm_object_unlock(object); 4352 4353 /* 4354 * Let the pager know we're using it. 4355 */ 4356 4357 (void) memory_object_init(pager, 4358 object->pager_control, 4359 PAGE_SIZE); 4360 4361 vm_object_lock(object); 4362 if (named) 4363 object->named = TRUE; 4364 if (internal) { 4365 object->pager_ready = TRUE; 4366 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY); 4367 } 4368 4369 object->pager_initialized = TRUE; 4370 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED); 4371 } else { 4372 vm_object_lock(object); 4373 } 4374 4375 /* 4376 * [At this point, the object must be locked] 4377 */ 4378 4379 /* 4380 * Wait for the work above to be done by the first 4381 * thread to map this object. 4382 */ 4383 4384 while (!object->pager_initialized) { 4385 vm_object_sleep(object, 4386 VM_OBJECT_EVENT_INITIALIZED, 4387 THREAD_UNINT); 4388 } 4389 vm_object_unlock(object); 4390 4391 XPR(XPR_VM_OBJECT, 4392 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n", 4393 object, object->pager, internal, 0,0); 4394 return(object); 4395} 4396 4397/* 4398 * Routine: vm_object_pager_create 4399 * Purpose: 4400 * Create a memory object for an internal object. 4401 * In/out conditions: 4402 * The object is locked on entry and exit; 4403 * it may be unlocked within this call. 4404 * Limitations: 4405 * Only one thread may be performing a 4406 * vm_object_pager_create on an object at 4407 * a time. Presumably, only the pageout 4408 * daemon will be using this routine. 4409 */ 4410 4411void 4412vm_object_pager_create( 4413 register vm_object_t object) 4414{ 4415 memory_object_t pager; 4416 vm_object_hash_entry_t entry; 4417 lck_mtx_t *lck; 4418#if MACH_PAGEMAP 4419 vm_object_size_t size; 4420 vm_external_map_t map; 4421#endif /* MACH_PAGEMAP */ 4422 4423 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n", 4424 object, 0,0,0,0); 4425 4426 assert(object != kernel_object); 4427 4428 if (memory_manager_default_check() != KERN_SUCCESS) 4429 return; 4430 4431 /* 4432 * Prevent collapse or termination by holding a paging reference 4433 */ 4434 4435 vm_object_paging_begin(object); 4436 if (object->pager_created) { 4437 /* 4438 * Someone else got to it first... 4439 * wait for them to finish initializing the ports 4440 */ 4441 while (!object->pager_initialized) { 4442 vm_object_sleep(object, 4443 VM_OBJECT_EVENT_INITIALIZED, 4444 THREAD_UNINT); 4445 } 4446 vm_object_paging_end(object); 4447 return; 4448 } 4449 4450 /* 4451 * Indicate that a memory object has been assigned 4452 * before dropping the lock, to prevent a race. 4453 */ 4454 4455 object->pager_created = TRUE; 4456 object->paging_offset = 0; 4457 4458#if MACH_PAGEMAP 4459 size = object->vo_size; 4460#endif /* MACH_PAGEMAP */ 4461 vm_object_unlock(object); 4462 4463#if MACH_PAGEMAP 4464 if (DEFAULT_PAGER_IS_ACTIVE) { 4465 map = vm_external_create(size); 4466 vm_object_lock(object); 4467 assert(object->vo_size == size); 4468 object->existence_map = map; 4469 vm_object_unlock(object); 4470 } 4471#endif /* MACH_PAGEMAP */ 4472 4473 if ((uint32_t) object->vo_size != object->vo_size) { 4474 panic("vm_object_pager_create(): object size 0x%llx >= 4GB\n", 4475 (uint64_t) object->vo_size); 4476 } 4477 4478 /* 4479 * Create the [internal] pager, and associate it with this object. 4480 * 4481 * We make the association here so that vm_object_enter() 4482 * can look up the object to complete initializing it. No 4483 * user will ever map this object. 4484 */ 4485 { 4486 memory_object_default_t dmm; 4487 4488 /* acquire a reference for the default memory manager */ 4489 dmm = memory_manager_default_reference(); 4490 4491 assert(object->temporary); 4492 4493 /* create our new memory object */ 4494 assert((vm_size_t) object->vo_size == object->vo_size); 4495 (void) memory_object_create(dmm, (vm_size_t) object->vo_size, 4496 &pager); 4497 4498 memory_object_default_deallocate(dmm); 4499 } 4500 4501 entry = vm_object_hash_entry_alloc(pager); 4502 4503 lck = vm_object_hash_lock_spin(pager); 4504 vm_object_hash_insert(entry, object); 4505 vm_object_hash_unlock(lck); 4506 4507 /* 4508 * A reference was returned by 4509 * memory_object_create(), and it is 4510 * copied by vm_object_enter(). 4511 */ 4512 4513 if (vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE) != object) 4514 panic("vm_object_pager_create: mismatch"); 4515 4516 /* 4517 * Drop the reference we were passed. 4518 */ 4519 memory_object_deallocate(pager); 4520 4521 vm_object_lock(object); 4522 4523 /* 4524 * Release the paging reference 4525 */ 4526 vm_object_paging_end(object); 4527} 4528 4529void 4530vm_object_compressor_pager_create( 4531 register vm_object_t object) 4532{ 4533 memory_object_t pager; 4534 vm_object_hash_entry_t entry; 4535 lck_mtx_t *lck; 4536 4537 assert(object != kernel_object); 4538 4539 /* 4540 * Prevent collapse or termination by holding a paging reference 4541 */ 4542 4543 vm_object_paging_begin(object); 4544 if (object->pager_created) { 4545 /* 4546 * Someone else got to it first... 4547 * wait for them to finish initializing the ports 4548 */ 4549 while (!object->pager_initialized) { 4550 vm_object_sleep(object, 4551 VM_OBJECT_EVENT_INITIALIZED, 4552 THREAD_UNINT); 4553 } 4554 vm_object_paging_end(object); 4555 return; 4556 } 4557 4558 /* 4559 * Indicate that a memory object has been assigned 4560 * before dropping the lock, to prevent a race. 4561 */ 4562 4563 object->pager_created = TRUE; 4564 object->paging_offset = 0; 4565 4566 vm_object_unlock(object); 4567 4568 if ((uint32_t) (object->vo_size/PAGE_SIZE) != 4569 (object->vo_size/PAGE_SIZE)) { 4570 panic("vm_object_compressor_pager_create(%p): " 4571 "object size 0x%llx >= 0x%llx\n", 4572 object, 4573 (uint64_t) object->vo_size, 4574 0x0FFFFFFFFULL*PAGE_SIZE); 4575 } 4576 4577 /* 4578 * Create the [internal] pager, and associate it with this object. 4579 * 4580 * We make the association here so that vm_object_enter() 4581 * can look up the object to complete initializing it. No 4582 * user will ever map this object. 4583 */ 4584 { 4585 assert(object->temporary); 4586 4587 /* create our new memory object */ 4588 assert((uint32_t) (object->vo_size/PAGE_SIZE) == 4589 (object->vo_size/PAGE_SIZE)); 4590 (void) compressor_memory_object_create( 4591 (memory_object_size_t) object->vo_size, 4592 &pager); 4593 if (pager == NULL) { 4594 panic("vm_object_compressor_pager_create(): " 4595 "no pager for object %p size 0x%llx\n", 4596 object, (uint64_t) object->vo_size); 4597 } 4598 } 4599 4600 entry = vm_object_hash_entry_alloc(pager); 4601 4602 lck = vm_object_hash_lock_spin(pager); 4603 vm_object_hash_insert(entry, object); 4604 vm_object_hash_unlock(lck); 4605 4606 /* 4607 * A reference was returned by 4608 * memory_object_create(), and it is 4609 * copied by vm_object_enter(). 4610 */ 4611 4612 if (vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE) != object) 4613 panic("vm_object_compressor_pager_create: mismatch"); 4614 4615 /* 4616 * Drop the reference we were passed. 4617 */ 4618 memory_object_deallocate(pager); 4619 4620 vm_object_lock(object); 4621 4622 /* 4623 * Release the paging reference 4624 */ 4625 vm_object_paging_end(object); 4626} 4627 4628/* 4629 * Routine: vm_object_remove 4630 * Purpose: 4631 * Eliminate the pager/object association 4632 * for this pager. 4633 * Conditions: 4634 * The object cache must be locked. 4635 */ 4636__private_extern__ void 4637vm_object_remove( 4638 vm_object_t object) 4639{ 4640 memory_object_t pager; 4641 4642 if ((pager = object->pager) != MEMORY_OBJECT_NULL) { 4643 vm_object_hash_entry_t entry; 4644 4645 entry = vm_object_hash_lookup(pager, FALSE); 4646 if (entry != VM_OBJECT_HASH_ENTRY_NULL) 4647 entry->object = VM_OBJECT_NULL; 4648 } 4649 4650} 4651 4652/* 4653 * Global variables for vm_object_collapse(): 4654 * 4655 * Counts for normal collapses and bypasses. 4656 * Debugging variables, to watch or disable collapse. 4657 */ 4658static long object_collapses = 0; 4659static long object_bypasses = 0; 4660 4661static boolean_t vm_object_collapse_allowed = TRUE; 4662static boolean_t vm_object_bypass_allowed = TRUE; 4663 4664#if MACH_PAGEMAP 4665static int vm_external_discarded; 4666static int vm_external_collapsed; 4667#endif 4668 4669unsigned long vm_object_collapse_encrypted = 0; 4670 4671/* 4672 * Routine: vm_object_do_collapse 4673 * Purpose: 4674 * Collapse an object with the object backing it. 4675 * Pages in the backing object are moved into the 4676 * parent, and the backing object is deallocated. 4677 * Conditions: 4678 * Both objects and the cache are locked; the page 4679 * queues are unlocked. 4680 * 4681 */ 4682static void 4683vm_object_do_collapse( 4684 vm_object_t object, 4685 vm_object_t backing_object) 4686{ 4687 vm_page_t p, pp; 4688 vm_object_offset_t new_offset, backing_offset; 4689 vm_object_size_t size; 4690 4691 vm_object_lock_assert_exclusive(object); 4692 vm_object_lock_assert_exclusive(backing_object); 4693 4694 backing_offset = object->vo_shadow_offset; 4695 size = object->vo_size; 4696 4697 /* 4698 * Move all in-memory pages from backing_object 4699 * to the parent. Pages that have been paged out 4700 * will be overwritten by any of the parent's 4701 * pages that shadow them. 4702 */ 4703 4704 while (!queue_empty(&backing_object->memq)) { 4705 4706 p = (vm_page_t) queue_first(&backing_object->memq); 4707 4708 new_offset = (p->offset - backing_offset); 4709 4710 assert(!p->busy || p->absent); 4711 4712 /* 4713 * If the parent has a page here, or if 4714 * this page falls outside the parent, 4715 * dispose of it. 4716 * 4717 * Otherwise, move it as planned. 4718 */ 4719 4720 if (p->offset < backing_offset || new_offset >= size) { 4721 VM_PAGE_FREE(p); 4722 } else { 4723 /* 4724 * ENCRYPTED SWAP: 4725 * The encryption key includes the "pager" and the 4726 * "paging_offset". These will not change during the 4727 * object collapse, so we can just move an encrypted 4728 * page from one object to the other in this case. 4729 * We can't decrypt the page here, since we can't drop 4730 * the object lock. 4731 */ 4732 if (p->encrypted) { 4733 vm_object_collapse_encrypted++; 4734 } 4735 pp = vm_page_lookup(object, new_offset); 4736 if (pp == VM_PAGE_NULL) { 4737 4738 /* 4739 * Parent now has no page. 4740 * Move the backing object's page up. 4741 */ 4742 4743 vm_page_rename(p, object, new_offset, TRUE); 4744#if MACH_PAGEMAP 4745 } else if (pp->absent) { 4746 4747 /* 4748 * Parent has an absent page... 4749 * it's not being paged in, so 4750 * it must really be missing from 4751 * the parent. 4752 * 4753 * Throw out the absent page... 4754 * any faults looking for that 4755 * page will restart with the new 4756 * one. 4757 */ 4758 4759 VM_PAGE_FREE(pp); 4760 vm_page_rename(p, object, new_offset, TRUE); 4761#endif /* MACH_PAGEMAP */ 4762 } else { 4763 assert(! pp->absent); 4764 4765 /* 4766 * Parent object has a real page. 4767 * Throw away the backing object's 4768 * page. 4769 */ 4770 VM_PAGE_FREE(p); 4771 } 4772 } 4773 } 4774 4775#if !MACH_PAGEMAP 4776 assert((!object->pager_created && (object->pager == MEMORY_OBJECT_NULL)) 4777 || (!backing_object->pager_created 4778 && (backing_object->pager == MEMORY_OBJECT_NULL))); 4779#else 4780 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL); 4781#endif /* !MACH_PAGEMAP */ 4782 4783 if (backing_object->pager != MEMORY_OBJECT_NULL) { 4784 vm_object_hash_entry_t entry; 4785 4786#if 00 4787 if (COMPRESSED_PAGER_IS_ACTIVE) { 4788 panic("vm_object_do_collapse(%p,%p): " 4789 "backing_object has a compressor pager", 4790 object, backing_object); 4791 } 4792#endif 4793 4794 /* 4795 * Move the pager from backing_object to object. 4796 * 4797 * XXX We're only using part of the paging space 4798 * for keeps now... we ought to discard the 4799 * unused portion. 4800 */ 4801 4802 assert(!object->paging_in_progress); 4803 assert(!object->activity_in_progress); 4804 object->pager = backing_object->pager; 4805 4806 if (backing_object->hashed) { 4807 lck_mtx_t *lck; 4808 4809 lck = vm_object_hash_lock_spin(backing_object->pager); 4810 entry = vm_object_hash_lookup(object->pager, FALSE); 4811 assert(entry != VM_OBJECT_HASH_ENTRY_NULL); 4812 entry->object = object; 4813 vm_object_hash_unlock(lck); 4814 4815 object->hashed = TRUE; 4816 } 4817 object->pager_created = backing_object->pager_created; 4818 object->pager_control = backing_object->pager_control; 4819 object->pager_ready = backing_object->pager_ready; 4820 object->pager_initialized = backing_object->pager_initialized; 4821 object->paging_offset = 4822 backing_object->paging_offset + backing_offset; 4823 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) { 4824 memory_object_control_collapse(object->pager_control, 4825 object); 4826 } 4827 } 4828 4829#if MACH_PAGEMAP 4830 /* 4831 * If the shadow offset is 0, the use the existence map from 4832 * the backing object if there is one. If the shadow offset is 4833 * not zero, toss it. 4834 * 4835 * XXX - If the shadow offset is not 0 then a bit copy is needed 4836 * if the map is to be salvaged. For now, we just just toss the 4837 * old map, giving the collapsed object no map. This means that 4838 * the pager is invoked for zero fill pages. If analysis shows 4839 * that this happens frequently and is a performance hit, then 4840 * this code should be fixed to salvage the map. 4841 */ 4842 assert(object->existence_map == VM_EXTERNAL_NULL); 4843 if (backing_offset || (size != backing_object->vo_size)) { 4844 vm_external_discarded++; 4845 vm_external_destroy(backing_object->existence_map, 4846 backing_object->vo_size); 4847 } 4848 else { 4849 vm_external_collapsed++; 4850 object->existence_map = backing_object->existence_map; 4851 } 4852 backing_object->existence_map = VM_EXTERNAL_NULL; 4853#endif /* MACH_PAGEMAP */ 4854 4855 /* 4856 * Object now shadows whatever backing_object did. 4857 * Note that the reference to backing_object->shadow 4858 * moves from within backing_object to within object. 4859 */ 4860 4861 assert(!object->phys_contiguous); 4862 assert(!backing_object->phys_contiguous); 4863 object->shadow = backing_object->shadow; 4864 if (object->shadow) { 4865 object->vo_shadow_offset += backing_object->vo_shadow_offset; 4866 } else { 4867 /* no shadow, therefore no shadow offset... */ 4868 object->vo_shadow_offset = 0; 4869 } 4870 assert((object->shadow == VM_OBJECT_NULL) || 4871 (object->shadow->copy != backing_object)); 4872 4873 /* 4874 * Discard backing_object. 4875 * 4876 * Since the backing object has no pages, no 4877 * pager left, and no object references within it, 4878 * all that is necessary is to dispose of it. 4879 */ 4880 4881 assert((backing_object->ref_count == 1) && 4882 (backing_object->resident_page_count == 0) && 4883 (backing_object->paging_in_progress == 0) && 4884 (backing_object->activity_in_progress == 0)); 4885 4886 backing_object->alive = FALSE; 4887 vm_object_unlock(backing_object); 4888 4889 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n", 4890 backing_object, 0,0,0,0); 4891 4892 vm_object_lock_destroy(backing_object); 4893 4894 zfree(vm_object_zone, backing_object); 4895 4896 object_collapses++; 4897} 4898 4899static void 4900vm_object_do_bypass( 4901 vm_object_t object, 4902 vm_object_t backing_object) 4903{ 4904 /* 4905 * Make the parent shadow the next object 4906 * in the chain. 4907 */ 4908 4909 vm_object_lock_assert_exclusive(object); 4910 vm_object_lock_assert_exclusive(backing_object); 4911 4912#if TASK_SWAPPER 4913 /* 4914 * Do object reference in-line to 4915 * conditionally increment shadow's 4916 * residence count. If object is not 4917 * resident, leave residence count 4918 * on shadow alone. 4919 */ 4920 if (backing_object->shadow != VM_OBJECT_NULL) { 4921 vm_object_lock(backing_object->shadow); 4922 vm_object_lock_assert_exclusive(backing_object->shadow); 4923 backing_object->shadow->ref_count++; 4924 if (object->res_count != 0) 4925 vm_object_res_reference(backing_object->shadow); 4926 vm_object_unlock(backing_object->shadow); 4927 } 4928#else /* TASK_SWAPPER */ 4929 vm_object_reference(backing_object->shadow); 4930#endif /* TASK_SWAPPER */ 4931 4932 assert(!object->phys_contiguous); 4933 assert(!backing_object->phys_contiguous); 4934 object->shadow = backing_object->shadow; 4935 if (object->shadow) { 4936 object->vo_shadow_offset += backing_object->vo_shadow_offset; 4937 } else { 4938 /* no shadow, therefore no shadow offset... */ 4939 object->vo_shadow_offset = 0; 4940 } 4941 4942 /* 4943 * Backing object might have had a copy pointer 4944 * to us. If it did, clear it. 4945 */ 4946 if (backing_object->copy == object) { 4947 backing_object->copy = VM_OBJECT_NULL; 4948 } 4949 4950 /* 4951 * Drop the reference count on backing_object. 4952#if TASK_SWAPPER 4953 * Since its ref_count was at least 2, it 4954 * will not vanish; so we don't need to call 4955 * vm_object_deallocate. 4956 * [with a caveat for "named" objects] 4957 * 4958 * The res_count on the backing object is 4959 * conditionally decremented. It's possible 4960 * (via vm_pageout_scan) to get here with 4961 * a "swapped" object, which has a 0 res_count, 4962 * in which case, the backing object res_count 4963 * is already down by one. 4964#else 4965 * Don't call vm_object_deallocate unless 4966 * ref_count drops to zero. 4967 * 4968 * The ref_count can drop to zero here if the 4969 * backing object could be bypassed but not 4970 * collapsed, such as when the backing object 4971 * is temporary and cachable. 4972#endif 4973 */ 4974 if (backing_object->ref_count > 2 || 4975 (!backing_object->named && backing_object->ref_count > 1)) { 4976 vm_object_lock_assert_exclusive(backing_object); 4977 backing_object->ref_count--; 4978#if TASK_SWAPPER 4979 if (object->res_count != 0) 4980 vm_object_res_deallocate(backing_object); 4981 assert(backing_object->ref_count > 0); 4982#endif /* TASK_SWAPPER */ 4983 vm_object_unlock(backing_object); 4984 } else { 4985 4986 /* 4987 * Drop locks so that we can deallocate 4988 * the backing object. 4989 */ 4990 4991#if TASK_SWAPPER 4992 if (object->res_count == 0) { 4993 /* XXX get a reference for the deallocate below */ 4994 vm_object_res_reference(backing_object); 4995 } 4996#endif /* TASK_SWAPPER */ 4997 /* 4998 * vm_object_collapse (the caller of this function) is 4999 * now called from contexts that may not guarantee that a 5000 * valid reference is held on the object... w/o a valid 5001 * reference, it is unsafe and unwise (you will definitely 5002 * regret it) to unlock the object and then retake the lock 5003 * since the object may be terminated and recycled in between. 5004 * The "activity_in_progress" reference will keep the object 5005 * 'stable'. 5006 */ 5007 vm_object_activity_begin(object); 5008 vm_object_unlock(object); 5009 5010 vm_object_unlock(backing_object); 5011 vm_object_deallocate(backing_object); 5012 5013 /* 5014 * Relock object. We don't have to reverify 5015 * its state since vm_object_collapse will 5016 * do that for us as it starts at the 5017 * top of its loop. 5018 */ 5019 5020 vm_object_lock(object); 5021 vm_object_activity_end(object); 5022 } 5023 5024 object_bypasses++; 5025} 5026 5027 5028/* 5029 * vm_object_collapse: 5030 * 5031 * Perform an object collapse or an object bypass if appropriate. 5032 * The real work of collapsing and bypassing is performed in 5033 * the routines vm_object_do_collapse and vm_object_do_bypass. 5034 * 5035 * Requires that the object be locked and the page queues be unlocked. 5036 * 5037 */ 5038static unsigned long vm_object_collapse_calls = 0; 5039static unsigned long vm_object_collapse_objects = 0; 5040static unsigned long vm_object_collapse_do_collapse = 0; 5041static unsigned long vm_object_collapse_do_bypass = 0; 5042 5043__private_extern__ void 5044vm_object_collapse( 5045 register vm_object_t object, 5046 register vm_object_offset_t hint_offset, 5047 boolean_t can_bypass) 5048{ 5049 register vm_object_t backing_object; 5050 register unsigned int rcount; 5051 register unsigned int size; 5052 vm_object_t original_object; 5053 int object_lock_type; 5054 int backing_object_lock_type; 5055 5056 vm_object_collapse_calls++; 5057 5058 if (! vm_object_collapse_allowed && 5059 ! (can_bypass && vm_object_bypass_allowed)) { 5060 return; 5061 } 5062 5063 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n", 5064 object, 0,0,0,0); 5065 5066 if (object == VM_OBJECT_NULL) 5067 return; 5068 5069 original_object = object; 5070 5071 /* 5072 * The top object was locked "exclusive" by the caller. 5073 * In the first pass, to determine if we can collapse the shadow chain, 5074 * take a "shared" lock on the shadow objects. If we can collapse, 5075 * we'll have to go down the chain again with exclusive locks. 5076 */ 5077 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5078 backing_object_lock_type = OBJECT_LOCK_SHARED; 5079 5080retry: 5081 object = original_object; 5082 vm_object_lock_assert_exclusive(object); 5083 5084 while (TRUE) { 5085 vm_object_collapse_objects++; 5086 /* 5087 * Verify that the conditions are right for either 5088 * collapse or bypass: 5089 */ 5090 5091 /* 5092 * There is a backing object, and 5093 */ 5094 5095 backing_object = object->shadow; 5096 if (backing_object == VM_OBJECT_NULL) { 5097 if (object != original_object) { 5098 vm_object_unlock(object); 5099 } 5100 return; 5101 } 5102 if (backing_object_lock_type == OBJECT_LOCK_SHARED) { 5103 vm_object_lock_shared(backing_object); 5104 } else { 5105 vm_object_lock(backing_object); 5106 } 5107 5108 /* 5109 * No pages in the object are currently 5110 * being paged out, and 5111 */ 5112 if (object->paging_in_progress != 0 || 5113 object->activity_in_progress != 0) { 5114 /* try and collapse the rest of the shadow chain */ 5115 if (object != original_object) { 5116 vm_object_unlock(object); 5117 } 5118 object = backing_object; 5119 object_lock_type = backing_object_lock_type; 5120 continue; 5121 } 5122 5123 /* 5124 * ... 5125 * The backing object is not read_only, 5126 * and no pages in the backing object are 5127 * currently being paged out. 5128 * The backing object is internal. 5129 * 5130 */ 5131 5132 if (!backing_object->internal || 5133 backing_object->paging_in_progress != 0 || 5134 backing_object->activity_in_progress != 0) { 5135 /* try and collapse the rest of the shadow chain */ 5136 if (object != original_object) { 5137 vm_object_unlock(object); 5138 } 5139 object = backing_object; 5140 object_lock_type = backing_object_lock_type; 5141 continue; 5142 } 5143 5144 /* 5145 * The backing object can't be a copy-object: 5146 * the shadow_offset for the copy-object must stay 5147 * as 0. Furthermore (for the 'we have all the 5148 * pages' case), if we bypass backing_object and 5149 * just shadow the next object in the chain, old 5150 * pages from that object would then have to be copied 5151 * BOTH into the (former) backing_object and into the 5152 * parent object. 5153 */ 5154 if (backing_object->shadow != VM_OBJECT_NULL && 5155 backing_object->shadow->copy == backing_object) { 5156 /* try and collapse the rest of the shadow chain */ 5157 if (object != original_object) { 5158 vm_object_unlock(object); 5159 } 5160 object = backing_object; 5161 object_lock_type = backing_object_lock_type; 5162 continue; 5163 } 5164 5165 /* 5166 * We can now try to either collapse the backing 5167 * object (if the parent is the only reference to 5168 * it) or (perhaps) remove the parent's reference 5169 * to it. 5170 * 5171 * If there is exactly one reference to the backing 5172 * object, we may be able to collapse it into the 5173 * parent. 5174 * 5175 * If MACH_PAGEMAP is defined: 5176 * The parent must not have a pager created for it, 5177 * since collapsing a backing_object dumps new pages 5178 * into the parent that its pager doesn't know about 5179 * (and the collapse code can't merge the existence 5180 * maps). 5181 * Otherwise: 5182 * As long as one of the objects is still not known 5183 * to the pager, we can collapse them. 5184 */ 5185 if (backing_object->ref_count == 1 && 5186 (!object->pager_created 5187#if !MACH_PAGEMAP 5188 || (!backing_object->pager_created) 5189#endif /*!MACH_PAGEMAP */ 5190 ) && vm_object_collapse_allowed) { 5191 5192 /* 5193 * We need the exclusive lock on the VM objects. 5194 */ 5195 if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) { 5196 /* 5197 * We have an object and its shadow locked 5198 * "shared". We can't just upgrade the locks 5199 * to "exclusive", as some other thread might 5200 * also have these objects locked "shared" and 5201 * attempt to upgrade one or the other to 5202 * "exclusive". The upgrades would block 5203 * forever waiting for the other "shared" locks 5204 * to get released. 5205 * So we have to release the locks and go 5206 * down the shadow chain again (since it could 5207 * have changed) with "exclusive" locking. 5208 */ 5209 vm_object_unlock(backing_object); 5210 if (object != original_object) 5211 vm_object_unlock(object); 5212 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5213 backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5214 goto retry; 5215 } 5216 5217 XPR(XPR_VM_OBJECT, 5218 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n", 5219 backing_object, object, 5220 backing_object->pager, 5221 backing_object->pager_control, 0); 5222 5223 /* 5224 * Collapse the object with its backing 5225 * object, and try again with the object's 5226 * new backing object. 5227 */ 5228 5229 vm_object_do_collapse(object, backing_object); 5230 vm_object_collapse_do_collapse++; 5231 continue; 5232 } 5233 5234 /* 5235 * Collapsing the backing object was not possible 5236 * or permitted, so let's try bypassing it. 5237 */ 5238 5239 if (! (can_bypass && vm_object_bypass_allowed)) { 5240 /* try and collapse the rest of the shadow chain */ 5241 if (object != original_object) { 5242 vm_object_unlock(object); 5243 } 5244 object = backing_object; 5245 object_lock_type = backing_object_lock_type; 5246 continue; 5247 } 5248 5249 5250 /* 5251 * If the object doesn't have all its pages present, 5252 * we have to make sure no pages in the backing object 5253 * "show through" before bypassing it. 5254 */ 5255 size = (unsigned int)atop(object->vo_size); 5256 rcount = object->resident_page_count; 5257 5258 if (rcount != size) { 5259 vm_object_offset_t offset; 5260 vm_object_offset_t backing_offset; 5261 unsigned int backing_rcount; 5262 5263 /* 5264 * If the backing object has a pager but no pagemap, 5265 * then we cannot bypass it, because we don't know 5266 * what pages it has. 5267 */ 5268 if (backing_object->pager_created 5269#if MACH_PAGEMAP 5270 && (backing_object->existence_map == VM_EXTERNAL_NULL) 5271#endif /* MACH_PAGEMAP */ 5272 ) { 5273 /* try and collapse the rest of the shadow chain */ 5274 if (object != original_object) { 5275 vm_object_unlock(object); 5276 } 5277 object = backing_object; 5278 object_lock_type = backing_object_lock_type; 5279 continue; 5280 } 5281 5282 /* 5283 * If the object has a pager but no pagemap, 5284 * then we cannot bypass it, because we don't know 5285 * what pages it has. 5286 */ 5287 if (object->pager_created 5288#if MACH_PAGEMAP 5289 && (object->existence_map == VM_EXTERNAL_NULL) 5290#endif /* MACH_PAGEMAP */ 5291 ) { 5292 /* try and collapse the rest of the shadow chain */ 5293 if (object != original_object) { 5294 vm_object_unlock(object); 5295 } 5296 object = backing_object; 5297 object_lock_type = backing_object_lock_type; 5298 continue; 5299 } 5300 5301 backing_offset = object->vo_shadow_offset; 5302 backing_rcount = backing_object->resident_page_count; 5303 5304 if ( (int)backing_rcount - (int)(atop(backing_object->vo_size) - size) > (int)rcount) { 5305 /* 5306 * we have enough pages in the backing object to guarantee that 5307 * at least 1 of them must be 'uncovered' by a resident page 5308 * in the object we're evaluating, so move on and 5309 * try to collapse the rest of the shadow chain 5310 */ 5311 if (object != original_object) { 5312 vm_object_unlock(object); 5313 } 5314 object = backing_object; 5315 object_lock_type = backing_object_lock_type; 5316 continue; 5317 } 5318 5319 /* 5320 * If all of the pages in the backing object are 5321 * shadowed by the parent object, the parent 5322 * object no longer has to shadow the backing 5323 * object; it can shadow the next one in the 5324 * chain. 5325 * 5326 * If the backing object has existence info, 5327 * we must check examine its existence info 5328 * as well. 5329 * 5330 */ 5331 5332#if MACH_PAGEMAP 5333#define EXISTS_IN_OBJECT(obj, off, rc) \ 5334 ((vm_external_state_get((obj)->existence_map, \ 5335 (vm_offset_t)(off)) \ 5336 == VM_EXTERNAL_STATE_EXISTS) || \ 5337 (VM_COMPRESSOR_PAGER_STATE_GET((obj), (off)) \ 5338 == VM_EXTERNAL_STATE_EXISTS) || \ 5339 ((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) 5340#else /* MACH_PAGEMAP */ 5341#define EXISTS_IN_OBJECT(obj, off, rc) \ 5342 ((VM_COMPRESSOR_PAGER_STATE_GET((obj), (off)) \ 5343 == VM_EXTERNAL_STATE_EXISTS) || \ 5344 ((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) 5345#endif /* MACH_PAGEMAP */ 5346 5347 /* 5348 * Check the hint location first 5349 * (since it is often the quickest way out of here). 5350 */ 5351 if (object->cow_hint != ~(vm_offset_t)0) 5352 hint_offset = (vm_object_offset_t)object->cow_hint; 5353 else 5354 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ? 5355 (hint_offset - 8 * PAGE_SIZE_64) : 0; 5356 5357 if (EXISTS_IN_OBJECT(backing_object, hint_offset + 5358 backing_offset, backing_rcount) && 5359 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) { 5360 /* dependency right at the hint */ 5361 object->cow_hint = (vm_offset_t) hint_offset; /* atomic */ 5362 /* try and collapse the rest of the shadow chain */ 5363 if (object != original_object) { 5364 vm_object_unlock(object); 5365 } 5366 object = backing_object; 5367 object_lock_type = backing_object_lock_type; 5368 continue; 5369 } 5370 5371 /* 5372 * If the object's window onto the backing_object 5373 * is large compared to the number of resident 5374 * pages in the backing object, it makes sense to 5375 * walk the backing_object's resident pages first. 5376 * 5377 * NOTE: Pages may be in both the existence map and/or 5378 * resident, so if we don't find a dependency while 5379 * walking the backing object's resident page list 5380 * directly, and there is an existence map, we'll have 5381 * to run the offset based 2nd pass. Because we may 5382 * have to run both passes, we need to be careful 5383 * not to decrement 'rcount' in the 1st pass 5384 */ 5385 if (backing_rcount && backing_rcount < (size / 8)) { 5386 unsigned int rc = rcount; 5387 vm_page_t p; 5388 5389 backing_rcount = backing_object->resident_page_count; 5390 p = (vm_page_t)queue_first(&backing_object->memq); 5391 do { 5392 offset = (p->offset - backing_offset); 5393 5394 if (offset < object->vo_size && 5395 offset != hint_offset && 5396 !EXISTS_IN_OBJECT(object, offset, rc)) { 5397 /* found a dependency */ 5398 object->cow_hint = (vm_offset_t) offset; /* atomic */ 5399 5400 break; 5401 } 5402 p = (vm_page_t) queue_next(&p->listq); 5403 5404 } while (--backing_rcount); 5405 if (backing_rcount != 0 ) { 5406 /* try and collapse the rest of the shadow chain */ 5407 if (object != original_object) { 5408 vm_object_unlock(object); 5409 } 5410 object = backing_object; 5411 object_lock_type = backing_object_lock_type; 5412 continue; 5413 } 5414 } 5415 5416 /* 5417 * Walk through the offsets looking for pages in the 5418 * backing object that show through to the object. 5419 */ 5420 if (backing_rcount 5421#if MACH_PAGEMAP 5422 || backing_object->existence_map 5423#endif /* MACH_PAGEMAP */ 5424 ) { 5425 offset = hint_offset; 5426 5427 while((offset = 5428 (offset + PAGE_SIZE_64 < object->vo_size) ? 5429 (offset + PAGE_SIZE_64) : 0) != hint_offset) { 5430 5431 if (EXISTS_IN_OBJECT(backing_object, offset + 5432 backing_offset, backing_rcount) && 5433 !EXISTS_IN_OBJECT(object, offset, rcount)) { 5434 /* found a dependency */ 5435 object->cow_hint = (vm_offset_t) offset; /* atomic */ 5436 break; 5437 } 5438 } 5439 if (offset != hint_offset) { 5440 /* try and collapse the rest of the shadow chain */ 5441 if (object != original_object) { 5442 vm_object_unlock(object); 5443 } 5444 object = backing_object; 5445 object_lock_type = backing_object_lock_type; 5446 continue; 5447 } 5448 } 5449 } 5450 5451 /* 5452 * We need "exclusive" locks on the 2 VM objects. 5453 */ 5454 if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) { 5455 vm_object_unlock(backing_object); 5456 if (object != original_object) 5457 vm_object_unlock(object); 5458 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5459 backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5460 goto retry; 5461 } 5462 5463 /* reset the offset hint for any objects deeper in the chain */ 5464 object->cow_hint = (vm_offset_t)0; 5465 5466 /* 5467 * All interesting pages in the backing object 5468 * already live in the parent or its pager. 5469 * Thus we can bypass the backing object. 5470 */ 5471 5472 vm_object_do_bypass(object, backing_object); 5473 vm_object_collapse_do_bypass++; 5474 5475 /* 5476 * Try again with this object's new backing object. 5477 */ 5478 5479 continue; 5480 } 5481 5482 if (object != original_object) { 5483 vm_object_unlock(object); 5484 } 5485} 5486 5487/* 5488 * Routine: vm_object_page_remove: [internal] 5489 * Purpose: 5490 * Removes all physical pages in the specified 5491 * object range from the object's list of pages. 5492 * 5493 * In/out conditions: 5494 * The object must be locked. 5495 * The object must not have paging_in_progress, usually 5496 * guaranteed by not having a pager. 5497 */ 5498unsigned int vm_object_page_remove_lookup = 0; 5499unsigned int vm_object_page_remove_iterate = 0; 5500 5501__private_extern__ void 5502vm_object_page_remove( 5503 register vm_object_t object, 5504 register vm_object_offset_t start, 5505 register vm_object_offset_t end) 5506{ 5507 register vm_page_t p, next; 5508 5509 /* 5510 * One and two page removals are most popular. 5511 * The factor of 16 here is somewhat arbitrary. 5512 * It balances vm_object_lookup vs iteration. 5513 */ 5514 5515 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) { 5516 vm_object_page_remove_lookup++; 5517 5518 for (; start < end; start += PAGE_SIZE_64) { 5519 p = vm_page_lookup(object, start); 5520 if (p != VM_PAGE_NULL) { 5521 assert(!p->cleaning && !p->pageout && !p->laundry); 5522 if (!p->fictitious && p->pmapped) 5523 pmap_disconnect(p->phys_page); 5524 VM_PAGE_FREE(p); 5525 } 5526 } 5527 } else { 5528 vm_object_page_remove_iterate++; 5529 5530 p = (vm_page_t) queue_first(&object->memq); 5531 while (!queue_end(&object->memq, (queue_entry_t) p)) { 5532 next = (vm_page_t) queue_next(&p->listq); 5533 if ((start <= p->offset) && (p->offset < end)) { 5534 assert(!p->cleaning && !p->pageout && !p->laundry); 5535 if (!p->fictitious && p->pmapped) 5536 pmap_disconnect(p->phys_page); 5537 VM_PAGE_FREE(p); 5538 } 5539 p = next; 5540 } 5541 } 5542} 5543 5544 5545/* 5546 * Routine: vm_object_coalesce 5547 * Function: Coalesces two objects backing up adjoining 5548 * regions of memory into a single object. 5549 * 5550 * returns TRUE if objects were combined. 5551 * 5552 * NOTE: Only works at the moment if the second object is NULL - 5553 * if it's not, which object do we lock first? 5554 * 5555 * Parameters: 5556 * prev_object First object to coalesce 5557 * prev_offset Offset into prev_object 5558 * next_object Second object into coalesce 5559 * next_offset Offset into next_object 5560 * 5561 * prev_size Size of reference to prev_object 5562 * next_size Size of reference to next_object 5563 * 5564 * Conditions: 5565 * The object(s) must *not* be locked. The map must be locked 5566 * to preserve the reference to the object(s). 5567 */ 5568static int vm_object_coalesce_count = 0; 5569 5570__private_extern__ boolean_t 5571vm_object_coalesce( 5572 register vm_object_t prev_object, 5573 vm_object_t next_object, 5574 vm_object_offset_t prev_offset, 5575 __unused vm_object_offset_t next_offset, 5576 vm_object_size_t prev_size, 5577 vm_object_size_t next_size) 5578{ 5579 vm_object_size_t newsize; 5580 5581#ifdef lint 5582 next_offset++; 5583#endif /* lint */ 5584 5585 if (next_object != VM_OBJECT_NULL) { 5586 return(FALSE); 5587 } 5588 5589 if (prev_object == VM_OBJECT_NULL) { 5590 return(TRUE); 5591 } 5592 5593 XPR(XPR_VM_OBJECT, 5594 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n", 5595 prev_object, prev_offset, prev_size, next_size, 0); 5596 5597 vm_object_lock(prev_object); 5598 5599 /* 5600 * Try to collapse the object first 5601 */ 5602 vm_object_collapse(prev_object, prev_offset, TRUE); 5603 5604 /* 5605 * Can't coalesce if pages not mapped to 5606 * prev_entry may be in use any way: 5607 * . more than one reference 5608 * . paged out 5609 * . shadows another object 5610 * . has a copy elsewhere 5611 * . is purgeable 5612 * . paging references (pages might be in page-list) 5613 */ 5614 5615 if ((prev_object->ref_count > 1) || 5616 prev_object->pager_created || 5617 (prev_object->shadow != VM_OBJECT_NULL) || 5618 (prev_object->copy != VM_OBJECT_NULL) || 5619 (prev_object->true_share != FALSE) || 5620 (prev_object->purgable != VM_PURGABLE_DENY) || 5621 (prev_object->paging_in_progress != 0) || 5622 (prev_object->activity_in_progress != 0)) { 5623 vm_object_unlock(prev_object); 5624 return(FALSE); 5625 } 5626 5627 vm_object_coalesce_count++; 5628 5629 /* 5630 * Remove any pages that may still be in the object from 5631 * a previous deallocation. 5632 */ 5633 vm_object_page_remove(prev_object, 5634 prev_offset + prev_size, 5635 prev_offset + prev_size + next_size); 5636 5637 /* 5638 * Extend the object if necessary. 5639 */ 5640 newsize = prev_offset + prev_size + next_size; 5641 if (newsize > prev_object->vo_size) { 5642#if MACH_PAGEMAP 5643 /* 5644 * We cannot extend an object that has existence info, 5645 * since the existence info might then fail to cover 5646 * the entire object. 5647 * 5648 * This assertion must be true because the object 5649 * has no pager, and we only create existence info 5650 * for objects with pagers. 5651 */ 5652 assert(prev_object->existence_map == VM_EXTERNAL_NULL); 5653#endif /* MACH_PAGEMAP */ 5654 prev_object->vo_size = newsize; 5655 } 5656 5657 vm_object_unlock(prev_object); 5658 return(TRUE); 5659} 5660 5661/* 5662 * Attach a set of physical pages to an object, so that they can 5663 * be mapped by mapping the object. Typically used to map IO memory. 5664 * 5665 * The mapping function and its private data are used to obtain the 5666 * physical addresses for each page to be mapped. 5667 */ 5668void 5669vm_object_page_map( 5670 vm_object_t object, 5671 vm_object_offset_t offset, 5672 vm_object_size_t size, 5673 vm_object_offset_t (*map_fn)(void *map_fn_data, 5674 vm_object_offset_t offset), 5675 void *map_fn_data) /* private to map_fn */ 5676{ 5677 int64_t num_pages; 5678 int i; 5679 vm_page_t m; 5680 vm_page_t old_page; 5681 vm_object_offset_t addr; 5682 5683 num_pages = atop_64(size); 5684 5685 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) { 5686 5687 addr = (*map_fn)(map_fn_data, offset); 5688 5689 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) 5690 vm_page_more_fictitious(); 5691 5692 vm_object_lock(object); 5693 if ((old_page = vm_page_lookup(object, offset)) 5694 != VM_PAGE_NULL) 5695 { 5696 VM_PAGE_FREE(old_page); 5697 } 5698 5699 assert((ppnum_t) addr == addr); 5700 vm_page_init(m, (ppnum_t) addr, FALSE); 5701 /* 5702 * private normally requires lock_queues but since we 5703 * are initializing the page, its not necessary here 5704 */ 5705 m->private = TRUE; /* don`t free page */ 5706 m->wire_count = 1; 5707 vm_page_insert(m, object, offset); 5708 5709 PAGE_WAKEUP_DONE(m); 5710 vm_object_unlock(object); 5711 } 5712} 5713 5714kern_return_t 5715vm_object_populate_with_private( 5716 vm_object_t object, 5717 vm_object_offset_t offset, 5718 ppnum_t phys_page, 5719 vm_size_t size) 5720{ 5721 ppnum_t base_page; 5722 vm_object_offset_t base_offset; 5723 5724 5725 if (!object->private) 5726 return KERN_FAILURE; 5727 5728 base_page = phys_page; 5729 5730 vm_object_lock(object); 5731 5732 if (!object->phys_contiguous) { 5733 vm_page_t m; 5734 5735 if ((base_offset = trunc_page_64(offset)) != offset) { 5736 vm_object_unlock(object); 5737 return KERN_FAILURE; 5738 } 5739 base_offset += object->paging_offset; 5740 5741 while (size) { 5742 m = vm_page_lookup(object, base_offset); 5743 5744 if (m != VM_PAGE_NULL) { 5745 if (m->fictitious) { 5746 if (m->phys_page != vm_page_guard_addr) { 5747 5748 vm_page_lockspin_queues(); 5749 m->private = TRUE; 5750 vm_page_unlock_queues(); 5751 5752 m->fictitious = FALSE; 5753 m->phys_page = base_page; 5754 } 5755 } else if (m->phys_page != base_page) { 5756 5757 if ( !m->private) { 5758 /* 5759 * we'd leak a real page... that can't be right 5760 */ 5761 panic("vm_object_populate_with_private - %p not private", m); 5762 } 5763 if (m->pmapped) { 5764 /* 5765 * pmap call to clear old mapping 5766 */ 5767 pmap_disconnect(m->phys_page); 5768 } 5769 m->phys_page = base_page; 5770 } 5771 if (m->encrypted) { 5772 /* 5773 * we should never see this on a ficticious or private page 5774 */ 5775 panic("vm_object_populate_with_private - %p encrypted", m); 5776 } 5777 5778 } else { 5779 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) 5780 vm_page_more_fictitious(); 5781 5782 /* 5783 * private normally requires lock_queues but since we 5784 * are initializing the page, its not necessary here 5785 */ 5786 m->private = TRUE; 5787 m->fictitious = FALSE; 5788 m->phys_page = base_page; 5789 m->unusual = TRUE; 5790 m->busy = FALSE; 5791 5792 vm_page_insert(m, object, base_offset); 5793 } 5794 base_page++; /* Go to the next physical page */ 5795 base_offset += PAGE_SIZE; 5796 size -= PAGE_SIZE; 5797 } 5798 } else { 5799 /* NOTE: we should check the original settings here */ 5800 /* if we have a size > zero a pmap call should be made */ 5801 /* to disable the range */ 5802 5803 /* pmap_? */ 5804 5805 /* shadows on contiguous memory are not allowed */ 5806 /* we therefore can use the offset field */ 5807 object->vo_shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT; 5808 object->vo_size = size; 5809 } 5810 vm_object_unlock(object); 5811 5812 return KERN_SUCCESS; 5813} 5814 5815/* 5816 * memory_object_free_from_cache: 5817 * 5818 * Walk the vm_object cache list, removing and freeing vm_objects 5819 * which are backed by the pager identified by the caller, (pager_ops). 5820 * Remove up to "count" objects, if there are that may available 5821 * in the cache. 5822 * 5823 * Walk the list at most once, return the number of vm_objects 5824 * actually freed. 5825 */ 5826 5827__private_extern__ kern_return_t 5828memory_object_free_from_cache( 5829 __unused host_t host, 5830 __unused memory_object_pager_ops_t pager_ops, 5831 int *count) 5832{ 5833#if VM_OBJECT_CACHE 5834 int object_released = 0; 5835 5836 register vm_object_t object = VM_OBJECT_NULL; 5837 vm_object_t shadow; 5838 5839/* 5840 if(host == HOST_NULL) 5841 return(KERN_INVALID_ARGUMENT); 5842*/ 5843 5844 try_again: 5845 vm_object_cache_lock(); 5846 5847 queue_iterate(&vm_object_cached_list, object, 5848 vm_object_t, cached_list) { 5849 if (object->pager && 5850 (pager_ops == object->pager->mo_pager_ops)) { 5851 vm_object_lock(object); 5852 queue_remove(&vm_object_cached_list, object, 5853 vm_object_t, cached_list); 5854 vm_object_cached_count--; 5855 5856 vm_object_cache_unlock(); 5857 /* 5858 * Since this object is in the cache, we know 5859 * that it is initialized and has only a pager's 5860 * (implicit) reference. Take a reference to avoid 5861 * recursive deallocations. 5862 */ 5863 5864 assert(object->pager_initialized); 5865 assert(object->ref_count == 0); 5866 vm_object_lock_assert_exclusive(object); 5867 object->ref_count++; 5868 5869 /* 5870 * Terminate the object. 5871 * If the object had a shadow, we let 5872 * vm_object_deallocate deallocate it. 5873 * "pageout" objects have a shadow, but 5874 * maintain a "paging reference" rather 5875 * than a normal reference. 5876 * (We are careful here to limit recursion.) 5877 */ 5878 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 5879 5880 if ((vm_object_terminate(object) == KERN_SUCCESS) 5881 && (shadow != VM_OBJECT_NULL)) { 5882 vm_object_deallocate(shadow); 5883 } 5884 5885 if(object_released++ == *count) 5886 return KERN_SUCCESS; 5887 goto try_again; 5888 } 5889 } 5890 vm_object_cache_unlock(); 5891 *count = object_released; 5892#else 5893 *count = 0; 5894#endif 5895 return KERN_SUCCESS; 5896} 5897 5898 5899 5900kern_return_t 5901memory_object_create_named( 5902 memory_object_t pager, 5903 memory_object_offset_t size, 5904 memory_object_control_t *control) 5905{ 5906 vm_object_t object; 5907 vm_object_hash_entry_t entry; 5908 lck_mtx_t *lck; 5909 5910 *control = MEMORY_OBJECT_CONTROL_NULL; 5911 if (pager == MEMORY_OBJECT_NULL) 5912 return KERN_INVALID_ARGUMENT; 5913 5914 lck = vm_object_hash_lock_spin(pager); 5915 entry = vm_object_hash_lookup(pager, FALSE); 5916 5917 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) && 5918 (entry->object != VM_OBJECT_NULL)) { 5919 if (entry->object->named == TRUE) 5920 panic("memory_object_create_named: caller already holds the right"); } 5921 vm_object_hash_unlock(lck); 5922 5923 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE)) == VM_OBJECT_NULL) { 5924 return(KERN_INVALID_OBJECT); 5925 } 5926 5927 /* wait for object (if any) to be ready */ 5928 if (object != VM_OBJECT_NULL) { 5929 vm_object_lock(object); 5930 object->named = TRUE; 5931 while (!object->pager_ready) { 5932 vm_object_sleep(object, 5933 VM_OBJECT_EVENT_PAGER_READY, 5934 THREAD_UNINT); 5935 } 5936 *control = object->pager_control; 5937 vm_object_unlock(object); 5938 } 5939 return (KERN_SUCCESS); 5940} 5941 5942 5943/* 5944 * Routine: memory_object_recover_named [user interface] 5945 * Purpose: 5946 * Attempt to recover a named reference for a VM object. 5947 * VM will verify that the object has not already started 5948 * down the termination path, and if it has, will optionally 5949 * wait for that to finish. 5950 * Returns: 5951 * KERN_SUCCESS - we recovered a named reference on the object 5952 * KERN_FAILURE - we could not recover a reference (object dead) 5953 * KERN_INVALID_ARGUMENT - bad memory object control 5954 */ 5955kern_return_t 5956memory_object_recover_named( 5957 memory_object_control_t control, 5958 boolean_t wait_on_terminating) 5959{ 5960 vm_object_t object; 5961 5962 object = memory_object_control_to_vm_object(control); 5963 if (object == VM_OBJECT_NULL) { 5964 return (KERN_INVALID_ARGUMENT); 5965 } 5966restart: 5967 vm_object_lock(object); 5968 5969 if (object->terminating && wait_on_terminating) { 5970 vm_object_wait(object, 5971 VM_OBJECT_EVENT_PAGING_IN_PROGRESS, 5972 THREAD_UNINT); 5973 goto restart; 5974 } 5975 5976 if (!object->alive) { 5977 vm_object_unlock(object); 5978 return KERN_FAILURE; 5979 } 5980 5981 if (object->named == TRUE) { 5982 vm_object_unlock(object); 5983 return KERN_SUCCESS; 5984 } 5985#if VM_OBJECT_CACHE 5986 if ((object->ref_count == 0) && (!object->terminating)) { 5987 if (!vm_object_cache_lock_try()) { 5988 vm_object_unlock(object); 5989 goto restart; 5990 } 5991 queue_remove(&vm_object_cached_list, object, 5992 vm_object_t, cached_list); 5993 vm_object_cached_count--; 5994 XPR(XPR_VM_OBJECT_CACHE, 5995 "memory_object_recover_named: removing %X, head (%X, %X)\n", 5996 object, 5997 vm_object_cached_list.next, 5998 vm_object_cached_list.prev, 0,0); 5999 6000 vm_object_cache_unlock(); 6001 } 6002#endif 6003 object->named = TRUE; 6004 vm_object_lock_assert_exclusive(object); 6005 object->ref_count++; 6006 vm_object_res_reference(object); 6007 while (!object->pager_ready) { 6008 vm_object_sleep(object, 6009 VM_OBJECT_EVENT_PAGER_READY, 6010 THREAD_UNINT); 6011 } 6012 vm_object_unlock(object); 6013 return (KERN_SUCCESS); 6014} 6015 6016 6017/* 6018 * vm_object_release_name: 6019 * 6020 * Enforces name semantic on memory_object reference count decrement 6021 * This routine should not be called unless the caller holds a name 6022 * reference gained through the memory_object_create_named. 6023 * 6024 * If the TERMINATE_IDLE flag is set, the call will return if the 6025 * reference count is not 1. i.e. idle with the only remaining reference 6026 * being the name. 6027 * If the decision is made to proceed the name field flag is set to 6028 * false and the reference count is decremented. If the RESPECT_CACHE 6029 * flag is set and the reference count has gone to zero, the 6030 * memory_object is checked to see if it is cacheable otherwise when 6031 * the reference count is zero, it is simply terminated. 6032 */ 6033 6034__private_extern__ kern_return_t 6035vm_object_release_name( 6036 vm_object_t object, 6037 int flags) 6038{ 6039 vm_object_t shadow; 6040 boolean_t original_object = TRUE; 6041 6042 while (object != VM_OBJECT_NULL) { 6043 6044 vm_object_lock(object); 6045 6046 assert(object->alive); 6047 if (original_object) 6048 assert(object->named); 6049 assert(object->ref_count > 0); 6050 6051 /* 6052 * We have to wait for initialization before 6053 * destroying or caching the object. 6054 */ 6055 6056 if (object->pager_created && !object->pager_initialized) { 6057 assert(!object->can_persist); 6058 vm_object_assert_wait(object, 6059 VM_OBJECT_EVENT_INITIALIZED, 6060 THREAD_UNINT); 6061 vm_object_unlock(object); 6062 thread_block(THREAD_CONTINUE_NULL); 6063 continue; 6064 } 6065 6066 if (((object->ref_count > 1) 6067 && (flags & MEMORY_OBJECT_TERMINATE_IDLE)) 6068 || (object->terminating)) { 6069 vm_object_unlock(object); 6070 return KERN_FAILURE; 6071 } else { 6072 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) { 6073 vm_object_unlock(object); 6074 return KERN_SUCCESS; 6075 } 6076 } 6077 6078 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) && 6079 (object->ref_count == 1)) { 6080 if (original_object) 6081 object->named = FALSE; 6082 vm_object_unlock(object); 6083 /* let vm_object_deallocate push this thing into */ 6084 /* the cache, if that it is where it is bound */ 6085 vm_object_deallocate(object); 6086 return KERN_SUCCESS; 6087 } 6088 VM_OBJ_RES_DECR(object); 6089 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 6090 6091 if (object->ref_count == 1) { 6092 if (vm_object_terminate(object) != KERN_SUCCESS) { 6093 if (original_object) { 6094 return KERN_FAILURE; 6095 } else { 6096 return KERN_SUCCESS; 6097 } 6098 } 6099 if (shadow != VM_OBJECT_NULL) { 6100 original_object = FALSE; 6101 object = shadow; 6102 continue; 6103 } 6104 return KERN_SUCCESS; 6105 } else { 6106 vm_object_lock_assert_exclusive(object); 6107 object->ref_count--; 6108 assert(object->ref_count > 0); 6109 if(original_object) 6110 object->named = FALSE; 6111 vm_object_unlock(object); 6112 return KERN_SUCCESS; 6113 } 6114 } 6115 /*NOTREACHED*/ 6116 assert(0); 6117 return KERN_FAILURE; 6118} 6119 6120 6121__private_extern__ kern_return_t 6122vm_object_lock_request( 6123 vm_object_t object, 6124 vm_object_offset_t offset, 6125 vm_object_size_t size, 6126 memory_object_return_t should_return, 6127 int flags, 6128 vm_prot_t prot) 6129{ 6130 __unused boolean_t should_flush; 6131 6132 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH; 6133 6134 XPR(XPR_MEMORY_OBJECT, 6135 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n", 6136 object, offset, size, 6137 (((should_return&1)<<1)|should_flush), prot); 6138 6139 /* 6140 * Check for bogus arguments. 6141 */ 6142 if (object == VM_OBJECT_NULL) 6143 return (KERN_INVALID_ARGUMENT); 6144 6145 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE) 6146 return (KERN_INVALID_ARGUMENT); 6147 6148 size = round_page_64(size); 6149 6150 /* 6151 * Lock the object, and acquire a paging reference to 6152 * prevent the memory_object reference from being released. 6153 */ 6154 vm_object_lock(object); 6155 vm_object_paging_begin(object); 6156 6157 (void)vm_object_update(object, 6158 offset, size, NULL, NULL, should_return, flags, prot); 6159 6160 vm_object_paging_end(object); 6161 vm_object_unlock(object); 6162 6163 return (KERN_SUCCESS); 6164} 6165 6166/* 6167 * Empty a purgeable object by grabbing the physical pages assigned to it and 6168 * putting them on the free queue without writing them to backing store, etc. 6169 * When the pages are next touched they will be demand zero-fill pages. We 6170 * skip pages which are busy, being paged in/out, wired, etc. We do _not_ 6171 * skip referenced/dirty pages, pages on the active queue, etc. We're more 6172 * than happy to grab these since this is a purgeable object. We mark the 6173 * object as "empty" after reaping its pages. 6174 * 6175 * On entry the object must be locked and it must be 6176 * purgeable with no delayed copies pending. 6177 */ 6178void 6179vm_object_purge(vm_object_t object) 6180{ 6181 vm_object_lock_assert_exclusive(object); 6182 6183 if (object->purgable == VM_PURGABLE_DENY) 6184 return; 6185 6186 assert(object->copy == VM_OBJECT_NULL); 6187 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); 6188 6189 if(object->purgable == VM_PURGABLE_VOLATILE) { 6190 unsigned int delta; 6191 assert(object->resident_page_count >= 6192 object->wired_page_count); 6193 delta = (object->resident_page_count - 6194 object->wired_page_count); 6195 if (delta != 0) { 6196 assert(vm_page_purgeable_count >= 6197 delta); 6198 OSAddAtomic(-delta, 6199 (SInt32 *)&vm_page_purgeable_count); 6200 } 6201 if (object->wired_page_count != 0) { 6202 assert(vm_page_purgeable_wired_count >= 6203 object->wired_page_count); 6204 OSAddAtomic(-object->wired_page_count, 6205 (SInt32 *)&vm_page_purgeable_wired_count); 6206 } 6207 } 6208 object->purgable = VM_PURGABLE_EMPTY; 6209 6210 vm_object_reap_pages(object, REAP_PURGEABLE); 6211} 6212 6213 6214/* 6215 * vm_object_purgeable_control() allows the caller to control and investigate the 6216 * state of a purgeable object. A purgeable object is created via a call to 6217 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will 6218 * never be coalesced with any other object -- even other purgeable objects -- 6219 * and will thus always remain a distinct object. A purgeable object has 6220 * special semantics when its reference count is exactly 1. If its reference 6221 * count is greater than 1, then a purgeable object will behave like a normal 6222 * object and attempts to use this interface will result in an error return 6223 * of KERN_INVALID_ARGUMENT. 6224 * 6225 * A purgeable object may be put into a "volatile" state which will make the 6226 * object's pages elligable for being reclaimed without paging to backing 6227 * store if the system runs low on memory. If the pages in a volatile 6228 * purgeable object are reclaimed, the purgeable object is said to have been 6229 * "emptied." When a purgeable object is emptied the system will reclaim as 6230 * many pages from the object as it can in a convenient manner (pages already 6231 * en route to backing store or busy for other reasons are left as is). When 6232 * a purgeable object is made volatile, its pages will generally be reclaimed 6233 * before other pages in the application's working set. This semantic is 6234 * generally used by applications which can recreate the data in the object 6235 * faster than it can be paged in. One such example might be media assets 6236 * which can be reread from a much faster RAID volume. 6237 * 6238 * A purgeable object may be designated as "non-volatile" which means it will 6239 * behave like all other objects in the system with pages being written to and 6240 * read from backing store as needed to satisfy system memory needs. If the 6241 * object was emptied before the object was made non-volatile, that fact will 6242 * be returned as the old state of the purgeable object (see 6243 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which 6244 * were reclaimed as part of emptying the object will be refaulted in as 6245 * zero-fill on demand. It is up to the application to note that an object 6246 * was emptied and recreate the objects contents if necessary. When a 6247 * purgeable object is made non-volatile, its pages will generally not be paged 6248 * out to backing store in the immediate future. A purgeable object may also 6249 * be manually emptied. 6250 * 6251 * Finally, the current state (non-volatile, volatile, volatile & empty) of a 6252 * volatile purgeable object may be queried at any time. This information may 6253 * be used as a control input to let the application know when the system is 6254 * experiencing memory pressure and is reclaiming memory. 6255 * 6256 * The specified address may be any address within the purgeable object. If 6257 * the specified address does not represent any object in the target task's 6258 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the 6259 * object containing the specified address is not a purgeable object, then 6260 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be 6261 * returned. 6262 * 6263 * The control parameter may be any one of VM_PURGABLE_SET_STATE or 6264 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter 6265 * state is used to set the new state of the purgeable object and return its 6266 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable 6267 * object is returned in the parameter state. 6268 * 6269 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE, 6270 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent 6271 * the non-volatile, volatile and volatile/empty states described above. 6272 * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will 6273 * immediately reclaim as many pages in the object as can be conveniently 6274 * collected (some may have already been written to backing store or be 6275 * otherwise busy). 6276 * 6277 * The process of making a purgeable object non-volatile and determining its 6278 * previous state is atomic. Thus, if a purgeable object is made 6279 * VM_PURGABLE_NONVOLATILE and the old state is returned as 6280 * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are 6281 * completely intact and will remain so until the object is made volatile 6282 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object 6283 * was reclaimed while it was in a volatile state and its previous contents 6284 * have been lost. 6285 */ 6286/* 6287 * The object must be locked. 6288 */ 6289kern_return_t 6290vm_object_purgable_control( 6291 vm_object_t object, 6292 vm_purgable_t control, 6293 int *state) 6294{ 6295 int old_state; 6296 int new_state; 6297 6298 if (object == VM_OBJECT_NULL) { 6299 /* 6300 * Object must already be present or it can't be purgeable. 6301 */ 6302 return KERN_INVALID_ARGUMENT; 6303 } 6304 6305 /* 6306 * Get current state of the purgeable object. 6307 */ 6308 old_state = object->purgable; 6309 if (old_state == VM_PURGABLE_DENY) 6310 return KERN_INVALID_ARGUMENT; 6311 6312 /* purgeable cant have delayed copies - now or in the future */ 6313 assert(object->copy == VM_OBJECT_NULL); 6314 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); 6315 6316 /* 6317 * Execute the desired operation. 6318 */ 6319 if (control == VM_PURGABLE_GET_STATE) { 6320 *state = old_state; 6321 return KERN_SUCCESS; 6322 } 6323 6324 if ((*state) & VM_PURGABLE_DEBUG_EMPTY) { 6325 object->volatile_empty = TRUE; 6326 } 6327 if ((*state) & VM_PURGABLE_DEBUG_FAULT) { 6328 object->volatile_fault = TRUE; 6329 } 6330 6331 new_state = *state & VM_PURGABLE_STATE_MASK; 6332 if (new_state == VM_PURGABLE_VOLATILE && 6333 object->volatile_empty) { 6334 new_state = VM_PURGABLE_EMPTY; 6335 } 6336 6337 switch (new_state) { 6338 case VM_PURGABLE_DENY: 6339 case VM_PURGABLE_NONVOLATILE: 6340 object->purgable = new_state; 6341 6342 if (old_state == VM_PURGABLE_VOLATILE) { 6343 unsigned int delta; 6344 6345 assert(object->resident_page_count >= 6346 object->wired_page_count); 6347 delta = (object->resident_page_count - 6348 object->wired_page_count); 6349 6350 assert(vm_page_purgeable_count >= delta); 6351 6352 if (delta != 0) { 6353 OSAddAtomic(-delta, 6354 (SInt32 *)&vm_page_purgeable_count); 6355 } 6356 if (object->wired_page_count != 0) { 6357 assert(vm_page_purgeable_wired_count >= 6358 object->wired_page_count); 6359 OSAddAtomic(-object->wired_page_count, 6360 (SInt32 *)&vm_page_purgeable_wired_count); 6361 } 6362 6363 vm_page_lock_queues(); 6364 6365 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ 6366 purgeable_q_t queue = vm_purgeable_object_remove(object); 6367 assert(queue); 6368 6369 if (object->purgeable_when_ripe) { 6370 vm_purgeable_token_delete_last(queue); 6371 } 6372 assert(queue->debug_count_objects>=0); 6373 6374 vm_page_unlock_queues(); 6375 } 6376 break; 6377 6378 case VM_PURGABLE_VOLATILE: 6379 if (object->volatile_fault) { 6380 vm_page_t p; 6381 int refmod; 6382 6383 queue_iterate(&object->memq, p, vm_page_t, listq) { 6384 if (p->busy || 6385 VM_PAGE_WIRED(p) || 6386 p->fictitious) { 6387 continue; 6388 } 6389 refmod = pmap_disconnect(p->phys_page); 6390 if ((refmod & VM_MEM_MODIFIED) && 6391 !p->dirty) { 6392 SET_PAGE_DIRTY(p, FALSE); 6393 } 6394 } 6395 } 6396 6397 if (old_state == VM_PURGABLE_EMPTY && 6398 object->resident_page_count == 0) 6399 break; 6400 6401 purgeable_q_t queue; 6402 6403 /* find the correct queue */ 6404 if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE) 6405 queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE]; 6406 else { 6407 if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO) 6408 queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO]; 6409 else 6410 queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO]; 6411 } 6412 6413 if (old_state == VM_PURGABLE_NONVOLATILE || 6414 old_state == VM_PURGABLE_EMPTY) { 6415 unsigned int delta; 6416 6417 if ((*state & VM_PURGABLE_NO_AGING_MASK) == 6418 VM_PURGABLE_NO_AGING) { 6419 object->purgeable_when_ripe = FALSE; 6420 } else { 6421 object->purgeable_when_ripe = TRUE; 6422 } 6423 6424 if (object->purgeable_when_ripe) { 6425 kern_return_t result; 6426 6427 /* try to add token... this can fail */ 6428 vm_page_lock_queues(); 6429 6430 result = vm_purgeable_token_add(queue); 6431 if (result != KERN_SUCCESS) { 6432 vm_page_unlock_queues(); 6433 return result; 6434 } 6435 vm_page_unlock_queues(); 6436 } 6437 6438 assert(object->resident_page_count >= 6439 object->wired_page_count); 6440 delta = (object->resident_page_count - 6441 object->wired_page_count); 6442 6443 if (delta != 0) { 6444 OSAddAtomic(delta, 6445 &vm_page_purgeable_count); 6446 } 6447 if (object->wired_page_count != 0) { 6448 OSAddAtomic(object->wired_page_count, 6449 &vm_page_purgeable_wired_count); 6450 } 6451 6452 object->purgable = new_state; 6453 6454 /* object should not be on a queue */ 6455 assert(object->objq.next == NULL && object->objq.prev == NULL); 6456 } 6457 else if (old_state == VM_PURGABLE_VOLATILE) { 6458 purgeable_q_t old_queue; 6459 boolean_t purgeable_when_ripe; 6460 6461 /* 6462 * if reassigning priorities / purgeable groups, we don't change the 6463 * token queue. So moving priorities will not make pages stay around longer. 6464 * Reasoning is that the algorithm gives most priority to the most important 6465 * object. If a new token is added, the most important object' priority is boosted. 6466 * This biases the system already for purgeable queues that move a lot. 6467 * It doesn't seem more biasing is neccessary in this case, where no new object is added. 6468 */ 6469 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ 6470 6471 old_queue = vm_purgeable_object_remove(object); 6472 assert(old_queue); 6473 6474 if ((*state & VM_PURGABLE_NO_AGING_MASK) == 6475 VM_PURGABLE_NO_AGING) { 6476 purgeable_when_ripe = FALSE; 6477 } else { 6478 purgeable_when_ripe = TRUE; 6479 } 6480 6481 if (old_queue != queue || 6482 (purgeable_when_ripe != 6483 object->purgeable_when_ripe)) { 6484 kern_return_t result; 6485 6486 /* Changing queue. Have to move token. */ 6487 vm_page_lock_queues(); 6488 if (object->purgeable_when_ripe) { 6489 vm_purgeable_token_delete_last(old_queue); 6490 } 6491 object->purgeable_when_ripe = purgeable_when_ripe; 6492 if (object->purgeable_when_ripe) { 6493 result = vm_purgeable_token_add(queue); 6494 assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */ 6495 } 6496 vm_page_unlock_queues(); 6497 6498 } 6499 }; 6500 vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT ); 6501 6502 assert(queue->debug_count_objects>=0); 6503 6504 break; 6505 6506 6507 case VM_PURGABLE_EMPTY: 6508 if (object->volatile_fault) { 6509 vm_page_t p; 6510 int refmod; 6511 6512 queue_iterate(&object->memq, p, vm_page_t, listq) { 6513 if (p->busy || 6514 VM_PAGE_WIRED(p) || 6515 p->fictitious) { 6516 continue; 6517 } 6518 refmod = pmap_disconnect(p->phys_page); 6519 if ((refmod & VM_MEM_MODIFIED) && 6520 !p->dirty) { 6521 SET_PAGE_DIRTY(p, FALSE); 6522 } 6523 } 6524 } 6525 6526 if (old_state != new_state) { 6527 assert(old_state == VM_PURGABLE_NONVOLATILE || 6528 old_state == VM_PURGABLE_VOLATILE); 6529 if (old_state == VM_PURGABLE_VOLATILE) { 6530 purgeable_q_t old_queue; 6531 6532 /* object should be on a queue */ 6533 assert(object->objq.next != NULL && 6534 object->objq.prev != NULL); 6535 old_queue = vm_purgeable_object_remove(object); 6536 assert(old_queue); 6537 if (object->purgeable_when_ripe) { 6538 vm_page_lock_queues(); 6539 vm_purgeable_token_delete_first(old_queue); 6540 vm_page_unlock_queues(); 6541 } 6542 } 6543 (void) vm_object_purge(object); 6544 } 6545 break; 6546 6547 } 6548 *state = old_state; 6549 6550 return KERN_SUCCESS; 6551} 6552 6553kern_return_t 6554vm_object_get_page_counts( 6555 vm_object_t object, 6556 vm_object_offset_t offset, 6557 vm_object_size_t size, 6558 unsigned int *resident_page_count, 6559 unsigned int *dirty_page_count) 6560{ 6561 6562 kern_return_t kr = KERN_SUCCESS; 6563 boolean_t count_dirty_pages = FALSE; 6564 vm_page_t p = VM_PAGE_NULL; 6565 unsigned int local_resident_count = 0; 6566 unsigned int local_dirty_count = 0; 6567 vm_object_offset_t cur_offset = 0; 6568 vm_object_offset_t end_offset = 0; 6569 6570 if (object == VM_OBJECT_NULL) 6571 return KERN_INVALID_ARGUMENT; 6572 6573 6574 cur_offset = offset; 6575 6576 end_offset = offset + size; 6577 6578 vm_object_lock_assert_exclusive(object); 6579 6580 if (dirty_page_count != NULL) { 6581 6582 count_dirty_pages = TRUE; 6583 } 6584 6585 if (resident_page_count != NULL && count_dirty_pages == FALSE) { 6586 /* 6587 * Fast path when: 6588 * - we only want the resident page count, and, 6589 * - the entire object is exactly covered by the request. 6590 */ 6591 if (offset == 0 && (object->vo_size == size)) { 6592 6593 *resident_page_count = object->resident_page_count; 6594 goto out; 6595 } 6596 } 6597 6598 if (object->resident_page_count <= (size >> PAGE_SHIFT)) { 6599 6600 queue_iterate(&object->memq, p, vm_page_t, listq) { 6601 6602 if (p->offset >= cur_offset && p->offset < end_offset) { 6603 6604 local_resident_count++; 6605 6606 if (count_dirty_pages) { 6607 6608 if (p->dirty || (p->wpmapped && pmap_is_modified(p->phys_page))) { 6609 6610 local_dirty_count++; 6611 } 6612 } 6613 } 6614 } 6615 } else { 6616 6617 for (cur_offset = offset; cur_offset < end_offset; cur_offset += PAGE_SIZE_64) { 6618 6619 p = vm_page_lookup(object, cur_offset); 6620 6621 if (p != VM_PAGE_NULL) { 6622 6623 local_resident_count++; 6624 6625 if (count_dirty_pages) { 6626 6627 if (p->dirty || (p->wpmapped && pmap_is_modified(p->phys_page))) { 6628 6629 local_dirty_count++; 6630 } 6631 } 6632 } 6633 } 6634 6635 } 6636 6637 if (resident_page_count != NULL) { 6638 *resident_page_count = local_resident_count; 6639 } 6640 6641 if (dirty_page_count != NULL) { 6642 *dirty_page_count = local_dirty_count; 6643 } 6644 6645out: 6646 return kr; 6647} 6648 6649 6650#if TASK_SWAPPER 6651/* 6652 * vm_object_res_deallocate 6653 * 6654 * (recursively) decrement residence counts on vm objects and their shadows. 6655 * Called from vm_object_deallocate and when swapping out an object. 6656 * 6657 * The object is locked, and remains locked throughout the function, 6658 * even as we iterate down the shadow chain. Locks on intermediate objects 6659 * will be dropped, but not the original object. 6660 * 6661 * NOTE: this function used to use recursion, rather than iteration. 6662 */ 6663 6664__private_extern__ void 6665vm_object_res_deallocate( 6666 vm_object_t object) 6667{ 6668 vm_object_t orig_object = object; 6669 /* 6670 * Object is locked so it can be called directly 6671 * from vm_object_deallocate. Original object is never 6672 * unlocked. 6673 */ 6674 assert(object->res_count > 0); 6675 while (--object->res_count == 0) { 6676 assert(object->ref_count >= object->res_count); 6677 vm_object_deactivate_all_pages(object); 6678 /* iterate on shadow, if present */ 6679 if (object->shadow != VM_OBJECT_NULL) { 6680 vm_object_t tmp_object = object->shadow; 6681 vm_object_lock(tmp_object); 6682 if (object != orig_object) 6683 vm_object_unlock(object); 6684 object = tmp_object; 6685 assert(object->res_count > 0); 6686 } else 6687 break; 6688 } 6689 if (object != orig_object) 6690 vm_object_unlock(object); 6691} 6692 6693/* 6694 * vm_object_res_reference 6695 * 6696 * Internal function to increment residence count on a vm object 6697 * and its shadows. It is called only from vm_object_reference, and 6698 * when swapping in a vm object, via vm_map_swap. 6699 * 6700 * The object is locked, and remains locked throughout the function, 6701 * even as we iterate down the shadow chain. Locks on intermediate objects 6702 * will be dropped, but not the original object. 6703 * 6704 * NOTE: this function used to use recursion, rather than iteration. 6705 */ 6706 6707__private_extern__ void 6708vm_object_res_reference( 6709 vm_object_t object) 6710{ 6711 vm_object_t orig_object = object; 6712 /* 6713 * Object is locked, so this can be called directly 6714 * from vm_object_reference. This lock is never released. 6715 */ 6716 while ((++object->res_count == 1) && 6717 (object->shadow != VM_OBJECT_NULL)) { 6718 vm_object_t tmp_object = object->shadow; 6719 6720 assert(object->ref_count >= object->res_count); 6721 vm_object_lock(tmp_object); 6722 if (object != orig_object) 6723 vm_object_unlock(object); 6724 object = tmp_object; 6725 } 6726 if (object != orig_object) 6727 vm_object_unlock(object); 6728 assert(orig_object->ref_count >= orig_object->res_count); 6729} 6730#endif /* TASK_SWAPPER */ 6731 6732/* 6733 * vm_object_reference: 6734 * 6735 * Gets another reference to the given object. 6736 */ 6737#ifdef vm_object_reference 6738#undef vm_object_reference 6739#endif 6740__private_extern__ void 6741vm_object_reference( 6742 register vm_object_t object) 6743{ 6744 if (object == VM_OBJECT_NULL) 6745 return; 6746 6747 vm_object_lock(object); 6748 assert(object->ref_count > 0); 6749 vm_object_reference_locked(object); 6750 vm_object_unlock(object); 6751} 6752 6753#ifdef MACH_BSD 6754/* 6755 * Scale the vm_object_cache 6756 * This is required to make sure that the vm_object_cache is big 6757 * enough to effectively cache the mapped file. 6758 * This is really important with UBC as all the regular file vnodes 6759 * have memory object associated with them. Havving this cache too 6760 * small results in rapid reclaim of vnodes and hurts performance a LOT! 6761 * 6762 * This is also needed as number of vnodes can be dynamically scaled. 6763 */ 6764kern_return_t 6765adjust_vm_object_cache( 6766 __unused vm_size_t oval, 6767 __unused vm_size_t nval) 6768{ 6769#if VM_OBJECT_CACHE 6770 vm_object_cached_max = nval; 6771 vm_object_cache_trim(FALSE); 6772#endif 6773 return (KERN_SUCCESS); 6774} 6775#endif /* MACH_BSD */ 6776 6777 6778/* 6779 * vm_object_transpose 6780 * 6781 * This routine takes two VM objects of the same size and exchanges 6782 * their backing store. 6783 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE 6784 * and UPL_BLOCK_ACCESS if they are referenced anywhere. 6785 * 6786 * The VM objects must not be locked by caller. 6787 */ 6788unsigned int vm_object_transpose_count = 0; 6789kern_return_t 6790vm_object_transpose( 6791 vm_object_t object1, 6792 vm_object_t object2, 6793 vm_object_size_t transpose_size) 6794{ 6795 vm_object_t tmp_object; 6796 kern_return_t retval; 6797 boolean_t object1_locked, object2_locked; 6798 vm_page_t page; 6799 vm_object_offset_t page_offset; 6800 lck_mtx_t *hash_lck; 6801 vm_object_hash_entry_t hash_entry; 6802 6803 tmp_object = VM_OBJECT_NULL; 6804 object1_locked = FALSE; object2_locked = FALSE; 6805 6806 if (object1 == object2 || 6807 object1 == VM_OBJECT_NULL || 6808 object2 == VM_OBJECT_NULL) { 6809 /* 6810 * If the 2 VM objects are the same, there's 6811 * no point in exchanging their backing store. 6812 */ 6813 retval = KERN_INVALID_VALUE; 6814 goto done; 6815 } 6816 6817 /* 6818 * Since we need to lock both objects at the same time, 6819 * make sure we always lock them in the same order to 6820 * avoid deadlocks. 6821 */ 6822 if (object1 > object2) { 6823 tmp_object = object1; 6824 object1 = object2; 6825 object2 = tmp_object; 6826 } 6827 6828 /* 6829 * Allocate a temporary VM object to hold object1's contents 6830 * while we copy object2 to object1. 6831 */ 6832 tmp_object = vm_object_allocate(transpose_size); 6833 vm_object_lock(tmp_object); 6834 tmp_object->can_persist = FALSE; 6835 6836 6837 /* 6838 * Grab control of the 1st VM object. 6839 */ 6840 vm_object_lock(object1); 6841 object1_locked = TRUE; 6842 if (!object1->alive || object1->terminating || 6843 object1->copy || object1->shadow || object1->shadowed || 6844 object1->purgable != VM_PURGABLE_DENY) { 6845 /* 6846 * We don't deal with copy or shadow objects (yet). 6847 */ 6848 retval = KERN_INVALID_VALUE; 6849 goto done; 6850 } 6851 /* 6852 * We're about to mess with the object's backing store and 6853 * taking a "paging_in_progress" reference wouldn't be enough 6854 * to prevent any paging activity on this object, so the caller should 6855 * have "quiesced" the objects beforehand, via a UPL operation with 6856 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired) 6857 * and UPL_BLOCK_ACCESS (to mark the pages "busy"). 6858 * 6859 * Wait for any paging operation to complete (but only paging, not 6860 * other kind of activities not linked to the pager). After we're 6861 * statisfied that there's no more paging in progress, we keep the 6862 * object locked, to guarantee that no one tries to access its pager. 6863 */ 6864 vm_object_paging_only_wait(object1, THREAD_UNINT); 6865 6866 /* 6867 * Same as above for the 2nd object... 6868 */ 6869 vm_object_lock(object2); 6870 object2_locked = TRUE; 6871 if (! object2->alive || object2->terminating || 6872 object2->copy || object2->shadow || object2->shadowed || 6873 object2->purgable != VM_PURGABLE_DENY) { 6874 retval = KERN_INVALID_VALUE; 6875 goto done; 6876 } 6877 vm_object_paging_only_wait(object2, THREAD_UNINT); 6878 6879 6880 if (object1->vo_size != object2->vo_size || 6881 object1->vo_size != transpose_size) { 6882 /* 6883 * If the 2 objects don't have the same size, we can't 6884 * exchange their backing stores or one would overflow. 6885 * If their size doesn't match the caller's 6886 * "transpose_size", we can't do it either because the 6887 * transpose operation will affect the entire span of 6888 * the objects. 6889 */ 6890 retval = KERN_INVALID_VALUE; 6891 goto done; 6892 } 6893 6894 6895 /* 6896 * Transpose the lists of resident pages. 6897 * This also updates the resident_page_count and the memq_hint. 6898 */ 6899 if (object1->phys_contiguous || queue_empty(&object1->memq)) { 6900 /* 6901 * No pages in object1, just transfer pages 6902 * from object2 to object1. No need to go through 6903 * an intermediate object. 6904 */ 6905 while (!queue_empty(&object2->memq)) { 6906 page = (vm_page_t) queue_first(&object2->memq); 6907 vm_page_rename(page, object1, page->offset, FALSE); 6908 } 6909 assert(queue_empty(&object2->memq)); 6910 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) { 6911 /* 6912 * No pages in object2, just transfer pages 6913 * from object1 to object2. No need to go through 6914 * an intermediate object. 6915 */ 6916 while (!queue_empty(&object1->memq)) { 6917 page = (vm_page_t) queue_first(&object1->memq); 6918 vm_page_rename(page, object2, page->offset, FALSE); 6919 } 6920 assert(queue_empty(&object1->memq)); 6921 } else { 6922 /* transfer object1's pages to tmp_object */ 6923 while (!queue_empty(&object1->memq)) { 6924 page = (vm_page_t) queue_first(&object1->memq); 6925 page_offset = page->offset; 6926 vm_page_remove(page, TRUE); 6927 page->offset = page_offset; 6928 queue_enter(&tmp_object->memq, page, vm_page_t, listq); 6929 } 6930 assert(queue_empty(&object1->memq)); 6931 /* transfer object2's pages to object1 */ 6932 while (!queue_empty(&object2->memq)) { 6933 page = (vm_page_t) queue_first(&object2->memq); 6934 vm_page_rename(page, object1, page->offset, FALSE); 6935 } 6936 assert(queue_empty(&object2->memq)); 6937 /* transfer tmp_object's pages to object1 */ 6938 while (!queue_empty(&tmp_object->memq)) { 6939 page = (vm_page_t) queue_first(&tmp_object->memq); 6940 queue_remove(&tmp_object->memq, page, 6941 vm_page_t, listq); 6942 vm_page_insert(page, object2, page->offset); 6943 } 6944 assert(queue_empty(&tmp_object->memq)); 6945 } 6946 6947#define __TRANSPOSE_FIELD(field) \ 6948MACRO_BEGIN \ 6949 tmp_object->field = object1->field; \ 6950 object1->field = object2->field; \ 6951 object2->field = tmp_object->field; \ 6952MACRO_END 6953 6954 /* "Lock" refers to the object not its contents */ 6955 /* "size" should be identical */ 6956 assert(object1->vo_size == object2->vo_size); 6957 /* "memq_hint" was updated above when transposing pages */ 6958 /* "ref_count" refers to the object not its contents */ 6959#if TASK_SWAPPER 6960 /* "res_count" refers to the object not its contents */ 6961#endif 6962 /* "resident_page_count" was updated above when transposing pages */ 6963 /* "wired_page_count" was updated above when transposing pages */ 6964 /* "reusable_page_count" was updated above when transposing pages */ 6965 /* there should be no "copy" */ 6966 assert(!object1->copy); 6967 assert(!object2->copy); 6968 /* there should be no "shadow" */ 6969 assert(!object1->shadow); 6970 assert(!object2->shadow); 6971 __TRANSPOSE_FIELD(vo_shadow_offset); /* used by phys_contiguous objects */ 6972 __TRANSPOSE_FIELD(pager); 6973 __TRANSPOSE_FIELD(paging_offset); 6974 __TRANSPOSE_FIELD(pager_control); 6975 /* update the memory_objects' pointers back to the VM objects */ 6976 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) { 6977 memory_object_control_collapse(object1->pager_control, 6978 object1); 6979 } 6980 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) { 6981 memory_object_control_collapse(object2->pager_control, 6982 object2); 6983 } 6984 __TRANSPOSE_FIELD(copy_strategy); 6985 /* "paging_in_progress" refers to the object not its contents */ 6986 assert(!object1->paging_in_progress); 6987 assert(!object2->paging_in_progress); 6988 assert(object1->activity_in_progress); 6989 assert(object2->activity_in_progress); 6990 /* "all_wanted" refers to the object not its contents */ 6991 __TRANSPOSE_FIELD(pager_created); 6992 __TRANSPOSE_FIELD(pager_initialized); 6993 __TRANSPOSE_FIELD(pager_ready); 6994 __TRANSPOSE_FIELD(pager_trusted); 6995 __TRANSPOSE_FIELD(can_persist); 6996 __TRANSPOSE_FIELD(internal); 6997 __TRANSPOSE_FIELD(temporary); 6998 __TRANSPOSE_FIELD(private); 6999 __TRANSPOSE_FIELD(pageout); 7000 /* "alive" should be set */ 7001 assert(object1->alive); 7002 assert(object2->alive); 7003 /* "purgeable" should be non-purgeable */ 7004 assert(object1->purgable == VM_PURGABLE_DENY); 7005 assert(object2->purgable == VM_PURGABLE_DENY); 7006 /* "shadowed" refers to the the object not its contents */ 7007 __TRANSPOSE_FIELD(purgeable_when_ripe); 7008 __TRANSPOSE_FIELD(advisory_pageout); 7009 __TRANSPOSE_FIELD(true_share); 7010 /* "terminating" should not be set */ 7011 assert(!object1->terminating); 7012 assert(!object2->terminating); 7013 __TRANSPOSE_FIELD(named); 7014 /* "shadow_severed" refers to the object not its contents */ 7015 __TRANSPOSE_FIELD(phys_contiguous); 7016 __TRANSPOSE_FIELD(nophyscache); 7017 /* "cached_list.next" points to transposed object */ 7018 object1->cached_list.next = (queue_entry_t) object2; 7019 object2->cached_list.next = (queue_entry_t) object1; 7020 /* "cached_list.prev" should be NULL */ 7021 assert(object1->cached_list.prev == NULL); 7022 assert(object2->cached_list.prev == NULL); 7023 /* "msr_q" is linked to the object not its contents */ 7024 assert(queue_empty(&object1->msr_q)); 7025 assert(queue_empty(&object2->msr_q)); 7026 __TRANSPOSE_FIELD(last_alloc); 7027 __TRANSPOSE_FIELD(sequential); 7028 __TRANSPOSE_FIELD(pages_created); 7029 __TRANSPOSE_FIELD(pages_used); 7030 __TRANSPOSE_FIELD(scan_collisions); 7031#if MACH_PAGEMAP 7032 __TRANSPOSE_FIELD(existence_map); 7033#endif 7034 __TRANSPOSE_FIELD(cow_hint); 7035#if MACH_ASSERT 7036 __TRANSPOSE_FIELD(paging_object); 7037#endif 7038 __TRANSPOSE_FIELD(wimg_bits); 7039 __TRANSPOSE_FIELD(set_cache_attr); 7040 __TRANSPOSE_FIELD(code_signed); 7041 if (object1->hashed) { 7042 hash_lck = vm_object_hash_lock_spin(object2->pager); 7043 hash_entry = vm_object_hash_lookup(object2->pager, FALSE); 7044 assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL); 7045 hash_entry->object = object2; 7046 vm_object_hash_unlock(hash_lck); 7047 } 7048 if (object2->hashed) { 7049 hash_lck = vm_object_hash_lock_spin(object1->pager); 7050 hash_entry = vm_object_hash_lookup(object1->pager, FALSE); 7051 assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL); 7052 hash_entry->object = object1; 7053 vm_object_hash_unlock(hash_lck); 7054 } 7055 __TRANSPOSE_FIELD(hashed); 7056 object1->transposed = TRUE; 7057 object2->transposed = TRUE; 7058 __TRANSPOSE_FIELD(mapping_in_progress); 7059 __TRANSPOSE_FIELD(volatile_empty); 7060 __TRANSPOSE_FIELD(volatile_fault); 7061 __TRANSPOSE_FIELD(all_reusable); 7062 assert(object1->blocked_access); 7063 assert(object2->blocked_access); 7064 assert(object1->__object2_unused_bits == 0); 7065 assert(object2->__object2_unused_bits == 0); 7066#if UPL_DEBUG 7067 /* "uplq" refers to the object not its contents (see upl_transpose()) */ 7068#endif 7069 assert(object1->objq.next == NULL); 7070 assert(object1->objq.prev == NULL); 7071 assert(object2->objq.next == NULL); 7072 assert(object2->objq.prev == NULL); 7073 7074#undef __TRANSPOSE_FIELD 7075 7076 retval = KERN_SUCCESS; 7077 7078done: 7079 /* 7080 * Cleanup. 7081 */ 7082 if (tmp_object != VM_OBJECT_NULL) { 7083 vm_object_unlock(tmp_object); 7084 /* 7085 * Re-initialize the temporary object to avoid 7086 * deallocating a real pager. 7087 */ 7088 _vm_object_allocate(transpose_size, tmp_object); 7089 vm_object_deallocate(tmp_object); 7090 tmp_object = VM_OBJECT_NULL; 7091 } 7092 7093 if (object1_locked) { 7094 vm_object_unlock(object1); 7095 object1_locked = FALSE; 7096 } 7097 if (object2_locked) { 7098 vm_object_unlock(object2); 7099 object2_locked = FALSE; 7100 } 7101 7102 vm_object_transpose_count++; 7103 7104 return retval; 7105} 7106 7107 7108/* 7109 * vm_object_cluster_size 7110 * 7111 * Determine how big a cluster we should issue an I/O for... 7112 * 7113 * Inputs: *start == offset of page needed 7114 * *length == maximum cluster pager can handle 7115 * Outputs: *start == beginning offset of cluster 7116 * *length == length of cluster to try 7117 * 7118 * The original *start will be encompassed by the cluster 7119 * 7120 */ 7121extern int speculative_reads_disabled; 7122extern int ignore_is_ssd; 7123 7124unsigned int preheat_pages_max = MAX_UPL_TRANSFER; 7125unsigned int preheat_pages_min = 8; 7126 7127uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1]; 7128uint32_t pre_heat_cluster[MAX_UPL_TRANSFER + 1]; 7129 7130 7131__private_extern__ void 7132vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, 7133 vm_size_t *length, vm_object_fault_info_t fault_info, uint32_t *io_streaming) 7134{ 7135 vm_size_t pre_heat_size; 7136 vm_size_t tail_size; 7137 vm_size_t head_size; 7138 vm_size_t max_length; 7139 vm_size_t cluster_size; 7140 vm_object_offset_t object_size; 7141 vm_object_offset_t orig_start; 7142 vm_object_offset_t target_start; 7143 vm_object_offset_t offset; 7144 vm_behavior_t behavior; 7145 boolean_t look_behind = TRUE; 7146 boolean_t look_ahead = TRUE; 7147 boolean_t isSSD = FALSE; 7148 uint32_t throttle_limit; 7149 int sequential_run; 7150 int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; 7151 unsigned int max_ph_size; 7152 unsigned int min_ph_size; 7153 unsigned int min_ph_size_in_bytes; 7154 7155 assert( !(*length & PAGE_MASK)); 7156 assert( !(*start & PAGE_MASK_64)); 7157 7158 /* 7159 * remember maxiumum length of run requested 7160 */ 7161 max_length = *length; 7162 /* 7163 * we'll always return a cluster size of at least 7164 * 1 page, since the original fault must always 7165 * be processed 7166 */ 7167 *length = PAGE_SIZE; 7168 *io_streaming = 0; 7169 7170 if (speculative_reads_disabled || fault_info == NULL) { 7171 /* 7172 * no cluster... just fault the page in 7173 */ 7174 return; 7175 } 7176 orig_start = *start; 7177 target_start = orig_start; 7178 cluster_size = round_page(fault_info->cluster_size); 7179 behavior = fault_info->behavior; 7180 7181 vm_object_lock(object); 7182 7183 if (object->pager == MEMORY_OBJECT_NULL) 7184 goto out; /* pager is gone for this object, nothing more to do */ 7185 7186 if (!ignore_is_ssd) 7187 vnode_pager_get_isSSD(object->pager, &isSSD); 7188 7189 min_ph_size = preheat_pages_min; 7190 max_ph_size = preheat_pages_max; 7191 7192 if (isSSD) { 7193 min_ph_size /= 2; 7194 max_ph_size /= 8; 7195 } 7196 if (min_ph_size < 1) 7197 min_ph_size = 1; 7198 7199 if (max_ph_size < 1) 7200 max_ph_size = 1; 7201 else if (max_ph_size > MAX_UPL_TRANSFER) 7202 max_ph_size = MAX_UPL_TRANSFER; 7203 7204 if (max_length > (max_ph_size * PAGE_SIZE)) 7205 max_length = max_ph_size * PAGE_SIZE; 7206 7207 if (max_length <= PAGE_SIZE) 7208 goto out; 7209 7210 min_ph_size_in_bytes = min_ph_size * PAGE_SIZE; 7211 7212 if (object->internal) 7213 object_size = object->vo_size; 7214 else 7215 vnode_pager_get_object_size(object->pager, &object_size); 7216 7217 object_size = round_page_64(object_size); 7218 7219 if (orig_start >= object_size) { 7220 /* 7221 * fault occurred beyond the EOF... 7222 * we need to punt w/o changing the 7223 * starting offset 7224 */ 7225 goto out; 7226 } 7227 if (object->pages_used > object->pages_created) { 7228 /* 7229 * must have wrapped our 32 bit counters 7230 * so reset 7231 */ 7232 object->pages_used = object->pages_created = 0; 7233 } 7234 if ((sequential_run = object->sequential)) { 7235 if (sequential_run < 0) { 7236 sequential_behavior = VM_BEHAVIOR_RSEQNTL; 7237 sequential_run = 0 - sequential_run; 7238 } else { 7239 sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; 7240 } 7241 7242 } 7243 switch (behavior) { 7244 7245 default: 7246 behavior = VM_BEHAVIOR_DEFAULT; 7247 7248 case VM_BEHAVIOR_DEFAULT: 7249 if (object->internal && fault_info->user_tag == VM_MEMORY_STACK) 7250 goto out; 7251 7252 if (sequential_run >= (3 * PAGE_SIZE)) { 7253 pre_heat_size = sequential_run + PAGE_SIZE; 7254 7255 if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) 7256 look_behind = FALSE; 7257 else 7258 look_ahead = FALSE; 7259 7260 *io_streaming = 1; 7261 } else { 7262 7263 if (object->pages_created < (20 * min_ph_size)) { 7264 /* 7265 * prime the pump 7266 */ 7267 pre_heat_size = min_ph_size_in_bytes; 7268 } else { 7269 /* 7270 * Linear growth in PH size: The maximum size is max_length... 7271 * this cacluation will result in a size that is neither a 7272 * power of 2 nor a multiple of PAGE_SIZE... so round 7273 * it up to the nearest PAGE_SIZE boundary 7274 */ 7275 pre_heat_size = (max_length * object->pages_used) / object->pages_created; 7276 7277 if (pre_heat_size < min_ph_size_in_bytes) 7278 pre_heat_size = min_ph_size_in_bytes; 7279 else 7280 pre_heat_size = round_page(pre_heat_size); 7281 } 7282 } 7283 break; 7284 7285 case VM_BEHAVIOR_RANDOM: 7286 if ((pre_heat_size = cluster_size) <= PAGE_SIZE) 7287 goto out; 7288 break; 7289 7290 case VM_BEHAVIOR_SEQUENTIAL: 7291 if ((pre_heat_size = cluster_size) == 0) 7292 pre_heat_size = sequential_run + PAGE_SIZE; 7293 look_behind = FALSE; 7294 *io_streaming = 1; 7295 7296 break; 7297 7298 case VM_BEHAVIOR_RSEQNTL: 7299 if ((pre_heat_size = cluster_size) == 0) 7300 pre_heat_size = sequential_run + PAGE_SIZE; 7301 look_ahead = FALSE; 7302 *io_streaming = 1; 7303 7304 break; 7305 7306 } 7307 throttle_limit = (uint32_t) max_length; 7308 assert(throttle_limit == max_length); 7309 7310 if (vnode_pager_get_throttle_io_limit(object->pager, &throttle_limit) == KERN_SUCCESS) { 7311 if (max_length > throttle_limit) 7312 max_length = throttle_limit; 7313 } 7314 if (pre_heat_size > max_length) 7315 pre_heat_size = max_length; 7316 7317 if (behavior == VM_BEHAVIOR_DEFAULT && (pre_heat_size > min_ph_size_in_bytes)) { 7318 7319 unsigned int consider_free = vm_page_free_count + vm_page_cleaned_count; 7320 7321 if (consider_free < vm_page_throttle_limit) { 7322 pre_heat_size = trunc_page(pre_heat_size / 16); 7323 } else if (consider_free < vm_page_free_target) { 7324 pre_heat_size = trunc_page(pre_heat_size / 4); 7325 } 7326 7327 if (pre_heat_size < min_ph_size_in_bytes) 7328 pre_heat_size = min_ph_size_in_bytes; 7329 } 7330 if (look_ahead == TRUE) { 7331 if (look_behind == TRUE) { 7332 /* 7333 * if we get here its due to a random access... 7334 * so we want to center the original fault address 7335 * within the cluster we will issue... make sure 7336 * to calculate 'head_size' as a multiple of PAGE_SIZE... 7337 * 'pre_heat_size' is a multiple of PAGE_SIZE but not 7338 * necessarily an even number of pages so we need to truncate 7339 * the result to a PAGE_SIZE boundary 7340 */ 7341 head_size = trunc_page(pre_heat_size / 2); 7342 7343 if (target_start > head_size) 7344 target_start -= head_size; 7345 else 7346 target_start = 0; 7347 7348 /* 7349 * 'target_start' at this point represents the beginning offset 7350 * of the cluster we are considering... 'orig_start' will be in 7351 * the center of this cluster if we didn't have to clip the start 7352 * due to running into the start of the file 7353 */ 7354 } 7355 if ((target_start + pre_heat_size) > object_size) 7356 pre_heat_size = (vm_size_t)(round_page_64(object_size - target_start)); 7357 /* 7358 * at this point caclulate the number of pages beyond the original fault 7359 * address that we want to consider... this is guaranteed not to extend beyond 7360 * the current EOF... 7361 */ 7362 assert((vm_size_t)(orig_start - target_start) == (orig_start - target_start)); 7363 tail_size = pre_heat_size - (vm_size_t)(orig_start - target_start) - PAGE_SIZE; 7364 } else { 7365 if (pre_heat_size > target_start) { 7366 /* 7367 * since pre_heat_size is always smaller then 2^32, 7368 * if it is larger then target_start (a 64 bit value) 7369 * it is safe to clip target_start to 32 bits 7370 */ 7371 pre_heat_size = (vm_size_t) target_start; 7372 } 7373 tail_size = 0; 7374 } 7375 assert( !(target_start & PAGE_MASK_64)); 7376 assert( !(pre_heat_size & PAGE_MASK)); 7377 7378 pre_heat_scaling[pre_heat_size / PAGE_SIZE]++; 7379 7380 if (pre_heat_size <= PAGE_SIZE) 7381 goto out; 7382 7383 if (look_behind == TRUE) { 7384 /* 7385 * take a look at the pages before the original 7386 * faulting offset... recalculate this in case 7387 * we had to clip 'pre_heat_size' above to keep 7388 * from running past the EOF. 7389 */ 7390 head_size = pre_heat_size - tail_size - PAGE_SIZE; 7391 7392 for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) { 7393 /* 7394 * don't poke below the lowest offset 7395 */ 7396 if (offset < fault_info->lo_offset) 7397 break; 7398 /* 7399 * for external objects and internal objects w/o an existence map 7400 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN 7401 */ 7402#if MACH_PAGEMAP 7403 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) { 7404 /* 7405 * we know for a fact that the pager can't provide the page 7406 * so don't include it or any pages beyond it in this cluster 7407 */ 7408 break; 7409 } 7410#endif /* MACH_PAGEMAP */ 7411 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) 7412 == VM_EXTERNAL_STATE_ABSENT) { 7413 break; 7414 } 7415 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { 7416 /* 7417 * don't bridge resident pages 7418 */ 7419 break; 7420 } 7421 *start = offset; 7422 *length += PAGE_SIZE; 7423 } 7424 } 7425 if (look_ahead == TRUE) { 7426 for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) { 7427 /* 7428 * don't poke above the highest offset 7429 */ 7430 if (offset >= fault_info->hi_offset) 7431 break; 7432 assert(offset < object_size); 7433 7434 /* 7435 * for external objects and internal objects w/o an existence map 7436 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN 7437 */ 7438#if MACH_PAGEMAP 7439 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) { 7440 /* 7441 * we know for a fact that the pager can't provide the page 7442 * so don't include it or any pages beyond it in this cluster 7443 */ 7444 break; 7445 } 7446#endif /* MACH_PAGEMAP */ 7447 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) 7448 == VM_EXTERNAL_STATE_ABSENT) { 7449 break; 7450 } 7451 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { 7452 /* 7453 * don't bridge resident pages 7454 */ 7455 break; 7456 } 7457 *length += PAGE_SIZE; 7458 } 7459 } 7460out: 7461 if (*length > max_length) 7462 *length = max_length; 7463 7464 pre_heat_cluster[*length / PAGE_SIZE]++; 7465 7466 vm_object_unlock(object); 7467 7468 DTRACE_VM1(clustersize, vm_size_t, *length); 7469} 7470 7471 7472/* 7473 * Allow manipulation of individual page state. This is actually part of 7474 * the UPL regimen but takes place on the VM object rather than on a UPL 7475 */ 7476 7477kern_return_t 7478vm_object_page_op( 7479 vm_object_t object, 7480 vm_object_offset_t offset, 7481 int ops, 7482 ppnum_t *phys_entry, 7483 int *flags) 7484{ 7485 vm_page_t dst_page; 7486 7487 vm_object_lock(object); 7488 7489 if(ops & UPL_POP_PHYSICAL) { 7490 if(object->phys_contiguous) { 7491 if (phys_entry) { 7492 *phys_entry = (ppnum_t) 7493 (object->vo_shadow_offset >> PAGE_SHIFT); 7494 } 7495 vm_object_unlock(object); 7496 return KERN_SUCCESS; 7497 } else { 7498 vm_object_unlock(object); 7499 return KERN_INVALID_OBJECT; 7500 } 7501 } 7502 if(object->phys_contiguous) { 7503 vm_object_unlock(object); 7504 return KERN_INVALID_OBJECT; 7505 } 7506 7507 while(TRUE) { 7508 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) { 7509 vm_object_unlock(object); 7510 return KERN_FAILURE; 7511 } 7512 7513 /* Sync up on getting the busy bit */ 7514 if((dst_page->busy || dst_page->cleaning) && 7515 (((ops & UPL_POP_SET) && 7516 (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) { 7517 /* someone else is playing with the page, we will */ 7518 /* have to wait */ 7519 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 7520 continue; 7521 } 7522 7523 if (ops & UPL_POP_DUMP) { 7524 if (dst_page->pmapped == TRUE) 7525 pmap_disconnect(dst_page->phys_page); 7526 7527 VM_PAGE_FREE(dst_page); 7528 break; 7529 } 7530 7531 if (flags) { 7532 *flags = 0; 7533 7534 /* Get the condition of flags before requested ops */ 7535 /* are undertaken */ 7536 7537 if(dst_page->dirty) *flags |= UPL_POP_DIRTY; 7538 if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT; 7539 if(dst_page->precious) *flags |= UPL_POP_PRECIOUS; 7540 if(dst_page->absent) *flags |= UPL_POP_ABSENT; 7541 if(dst_page->busy) *flags |= UPL_POP_BUSY; 7542 } 7543 7544 /* The caller should have made a call either contingent with */ 7545 /* or prior to this call to set UPL_POP_BUSY */ 7546 if(ops & UPL_POP_SET) { 7547 /* The protection granted with this assert will */ 7548 /* not be complete. If the caller violates the */ 7549 /* convention and attempts to change page state */ 7550 /* without first setting busy we may not see it */ 7551 /* because the page may already be busy. However */ 7552 /* if such violations occur we will assert sooner */ 7553 /* or later. */ 7554 assert(dst_page->busy || (ops & UPL_POP_BUSY)); 7555 if (ops & UPL_POP_DIRTY) { 7556 SET_PAGE_DIRTY(dst_page, FALSE); 7557 } 7558 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE; 7559 if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE; 7560 if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE; 7561 if (ops & UPL_POP_BUSY) dst_page->busy = TRUE; 7562 } 7563 7564 if(ops & UPL_POP_CLR) { 7565 assert(dst_page->busy); 7566 if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE; 7567 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE; 7568 if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE; 7569 if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE; 7570 if (ops & UPL_POP_BUSY) { 7571 dst_page->busy = FALSE; 7572 PAGE_WAKEUP(dst_page); 7573 } 7574 } 7575 7576 if (dst_page->encrypted) { 7577 /* 7578 * ENCRYPTED SWAP: 7579 * We need to decrypt this encrypted page before the 7580 * caller can access its contents. 7581 * But if the caller really wants to access the page's 7582 * contents, they have to keep the page "busy". 7583 * Otherwise, the page could get recycled or re-encrypted 7584 * at any time. 7585 */ 7586 if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) && 7587 dst_page->busy) { 7588 /* 7589 * The page is stable enough to be accessed by 7590 * the caller, so make sure its contents are 7591 * not encrypted. 7592 */ 7593 vm_page_decrypt(dst_page, 0); 7594 } else { 7595 /* 7596 * The page is not busy, so don't bother 7597 * decrypting it, since anything could 7598 * happen to it between now and when the 7599 * caller wants to access it. 7600 * We should not give the caller access 7601 * to this page. 7602 */ 7603 assert(!phys_entry); 7604 } 7605 } 7606 7607 if (phys_entry) { 7608 /* 7609 * The physical page number will remain valid 7610 * only if the page is kept busy. 7611 * ENCRYPTED SWAP: make sure we don't let the 7612 * caller access an encrypted page. 7613 */ 7614 assert(dst_page->busy); 7615 assert(!dst_page->encrypted); 7616 *phys_entry = dst_page->phys_page; 7617 } 7618 7619 break; 7620 } 7621 7622 vm_object_unlock(object); 7623 return KERN_SUCCESS; 7624 7625} 7626 7627/* 7628 * vm_object_range_op offers performance enhancement over 7629 * vm_object_page_op for page_op functions which do not require page 7630 * level state to be returned from the call. Page_op was created to provide 7631 * a low-cost alternative to page manipulation via UPLs when only a single 7632 * page was involved. The range_op call establishes the ability in the _op 7633 * family of functions to work on multiple pages where the lack of page level 7634 * state handling allows the caller to avoid the overhead of the upl structures. 7635 */ 7636 7637kern_return_t 7638vm_object_range_op( 7639 vm_object_t object, 7640 vm_object_offset_t offset_beg, 7641 vm_object_offset_t offset_end, 7642 int ops, 7643 uint32_t *range) 7644{ 7645 vm_object_offset_t offset; 7646 vm_page_t dst_page; 7647 7648 if (offset_end - offset_beg > (uint32_t) -1) { 7649 /* range is too big and would overflow "*range" */ 7650 return KERN_INVALID_ARGUMENT; 7651 } 7652 if (object->resident_page_count == 0) { 7653 if (range) { 7654 if (ops & UPL_ROP_PRESENT) { 7655 *range = 0; 7656 } else { 7657 *range = (uint32_t) (offset_end - offset_beg); 7658 assert(*range == (offset_end - offset_beg)); 7659 } 7660 } 7661 return KERN_SUCCESS; 7662 } 7663 vm_object_lock(object); 7664 7665 if (object->phys_contiguous) { 7666 vm_object_unlock(object); 7667 return KERN_INVALID_OBJECT; 7668 } 7669 7670 offset = offset_beg & ~PAGE_MASK_64; 7671 7672 while (offset < offset_end) { 7673 dst_page = vm_page_lookup(object, offset); 7674 if (dst_page != VM_PAGE_NULL) { 7675 if (ops & UPL_ROP_DUMP) { 7676 if (dst_page->busy || dst_page->cleaning) { 7677 /* 7678 * someone else is playing with the 7679 * page, we will have to wait 7680 */ 7681 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 7682 /* 7683 * need to relook the page up since it's 7684 * state may have changed while we slept 7685 * it might even belong to a different object 7686 * at this point 7687 */ 7688 continue; 7689 } 7690 if (dst_page->laundry) { 7691 dst_page->pageout = FALSE; 7692 7693 vm_pageout_steal_laundry(dst_page, FALSE); 7694 } 7695 if (dst_page->pmapped == TRUE) 7696 pmap_disconnect(dst_page->phys_page); 7697 7698 VM_PAGE_FREE(dst_page); 7699 7700 } else if ((ops & UPL_ROP_ABSENT) && !dst_page->absent) 7701 break; 7702 } else if (ops & UPL_ROP_PRESENT) 7703 break; 7704 7705 offset += PAGE_SIZE; 7706 } 7707 vm_object_unlock(object); 7708 7709 if (range) { 7710 if (offset > offset_end) 7711 offset = offset_end; 7712 if(offset > offset_beg) { 7713 *range = (uint32_t) (offset - offset_beg); 7714 assert(*range == (offset - offset_beg)); 7715 } else { 7716 *range = 0; 7717 } 7718 } 7719 return KERN_SUCCESS; 7720} 7721 7722/* 7723 * Used to point a pager directly to a range of memory (when the pager may be associated 7724 * with a non-device vnode). Takes a virtual address, an offset, and a size. We currently 7725 * expect that the virtual address will denote the start of a range that is physically contiguous. 7726 */ 7727kern_return_t pager_map_to_phys_contiguous( 7728 memory_object_control_t object, 7729 memory_object_offset_t offset, 7730 addr64_t base_vaddr, 7731 vm_size_t size) 7732{ 7733 ppnum_t page_num; 7734 boolean_t clobbered_private; 7735 kern_return_t retval; 7736 vm_object_t pager_object; 7737 7738 page_num = pmap_find_phys(kernel_pmap, base_vaddr); 7739 7740 if (!page_num) { 7741 retval = KERN_FAILURE; 7742 goto out; 7743 } 7744 7745 pager_object = memory_object_control_to_vm_object(object); 7746 7747 if (!pager_object) { 7748 retval = KERN_FAILURE; 7749 goto out; 7750 } 7751 7752 clobbered_private = pager_object->private; 7753 pager_object->private = TRUE; 7754 retval = vm_object_populate_with_private(pager_object, offset, page_num, size); 7755 7756 if (retval != KERN_SUCCESS) 7757 pager_object->private = clobbered_private; 7758 7759out: 7760 return retval; 7761} 7762 7763uint32_t scan_object_collision = 0; 7764 7765void 7766vm_object_lock(vm_object_t object) 7767{ 7768 if (object == vm_pageout_scan_wants_object) { 7769 scan_object_collision++; 7770 mutex_pause(2); 7771 } 7772 lck_rw_lock_exclusive(&object->Lock); 7773} 7774 7775boolean_t 7776vm_object_lock_avoid(vm_object_t object) 7777{ 7778 if (object == vm_pageout_scan_wants_object) { 7779 scan_object_collision++; 7780 return TRUE; 7781 } 7782 return FALSE; 7783} 7784 7785boolean_t 7786_vm_object_lock_try(vm_object_t object) 7787{ 7788 return (lck_rw_try_lock_exclusive(&object->Lock)); 7789} 7790 7791boolean_t 7792vm_object_lock_try(vm_object_t object) 7793{ 7794 /* 7795 * Called from hibernate path so check before blocking. 7796 */ 7797 if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled() && get_preemption_level()==0) { 7798 mutex_pause(2); 7799 } 7800 return _vm_object_lock_try(object); 7801} 7802 7803void 7804vm_object_lock_shared(vm_object_t object) 7805{ 7806 if (vm_object_lock_avoid(object)) { 7807 mutex_pause(2); 7808 } 7809 lck_rw_lock_shared(&object->Lock); 7810} 7811 7812boolean_t 7813vm_object_lock_try_shared(vm_object_t object) 7814{ 7815 if (vm_object_lock_avoid(object)) { 7816 mutex_pause(2); 7817 } 7818 return (lck_rw_try_lock_shared(&object->Lock)); 7819} 7820 7821 7822unsigned int vm_object_change_wimg_mode_count = 0; 7823 7824/* 7825 * The object must be locked 7826 */ 7827void 7828vm_object_change_wimg_mode(vm_object_t object, unsigned int wimg_mode) 7829{ 7830 vm_page_t p; 7831 7832 vm_object_lock_assert_exclusive(object); 7833 7834 vm_object_paging_wait(object, THREAD_UNINT); 7835 7836 queue_iterate(&object->memq, p, vm_page_t, listq) { 7837 7838 if (!p->fictitious) 7839 pmap_set_cache_attributes(p->phys_page, wimg_mode); 7840 } 7841 if (wimg_mode == VM_WIMG_USE_DEFAULT) 7842 object->set_cache_attr = FALSE; 7843 else 7844 object->set_cache_attr = TRUE; 7845 7846 object->wimg_bits = wimg_mode; 7847 7848 vm_object_change_wimg_mode_count++; 7849} 7850 7851#if CONFIG_FREEZE 7852 7853kern_return_t vm_object_pack( 7854 unsigned int *purgeable_count, 7855 unsigned int *wired_count, 7856 unsigned int *clean_count, 7857 unsigned int *dirty_count, 7858 unsigned int dirty_budget, 7859 boolean_t *shared, 7860 vm_object_t src_object, 7861 struct default_freezer_handle *df_handle) 7862{ 7863 kern_return_t kr = KERN_SUCCESS; 7864 7865 vm_object_lock(src_object); 7866 7867 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; 7868 *shared = FALSE; 7869 7870 if (!src_object->alive || src_object->terminating){ 7871 kr = KERN_FAILURE; 7872 goto done; 7873 } 7874 7875 if (src_object->purgable == VM_PURGABLE_VOLATILE) { 7876 *purgeable_count = src_object->resident_page_count; 7877 7878 /* If the default freezer handle is null, we're just walking the pages to discover how many can be hibernated */ 7879 if (df_handle != NULL) { 7880 purgeable_q_t queue; 7881 /* object should be on a queue */ 7882 assert(src_object->objq.next != NULL && 7883 src_object->objq.prev != NULL); 7884 queue = vm_purgeable_object_remove(src_object); 7885 assert(queue); 7886 if (src_object->purgeable_when_ripe) { 7887 vm_page_lock_queues(); 7888 vm_purgeable_token_delete_first(queue); 7889 vm_page_unlock_queues(); 7890 } 7891 vm_object_purge(src_object); 7892 } 7893 goto done; 7894 } 7895 7896 if (src_object->ref_count == 1) { 7897 vm_object_pack_pages(wired_count, clean_count, dirty_count, dirty_budget, src_object, df_handle); 7898 } else { 7899 if (src_object->internal) { 7900 *shared = TRUE; 7901 } 7902 } 7903done: 7904 vm_object_unlock(src_object); 7905 7906 return kr; 7907} 7908 7909 7910void 7911vm_object_pack_pages( 7912 unsigned int *wired_count, 7913 unsigned int *clean_count, 7914 unsigned int *dirty_count, 7915 unsigned int dirty_budget, 7916 vm_object_t src_object, 7917 struct default_freezer_handle *df_handle) 7918{ 7919 vm_page_t p, next; 7920 7921 next = (vm_page_t)queue_first(&src_object->memq); 7922 7923 while (!queue_end(&src_object->memq, (queue_entry_t)next)) { 7924 p = next; 7925 next = (vm_page_t)queue_next(&next->listq); 7926 7927 /* Finish up if we've hit our pageout limit */ 7928 if (dirty_budget && (dirty_budget == *dirty_count)) { 7929 break; 7930 } 7931 assert(!p->laundry); 7932 7933 if (p->fictitious || p->busy ) 7934 continue; 7935 7936 if (p->absent || p->unusual || p->error) 7937 continue; 7938 7939 if (VM_PAGE_WIRED(p)) { 7940 (*wired_count)++; 7941 continue; 7942 } 7943 7944 if (df_handle == NULL) { 7945 if (p->dirty || pmap_is_modified(p->phys_page)) { 7946 (*dirty_count)++; 7947 } else { 7948 (*clean_count)++; 7949 } 7950 continue; 7951 } 7952 7953 if (p->cleaning) { 7954 p->pageout = TRUE; 7955 continue; 7956 } 7957 7958 if (p->pmapped == TRUE) { 7959 int refmod_state; 7960 refmod_state = pmap_disconnect(p->phys_page); 7961 if (refmod_state & VM_MEM_MODIFIED) { 7962 SET_PAGE_DIRTY(p, FALSE); 7963 } 7964 } 7965 7966 if (p->dirty) { 7967 default_freezer_pack_page(p, df_handle); 7968 (*dirty_count)++; 7969 } 7970 else { 7971 VM_PAGE_FREE(p); 7972 (*clean_count)++; 7973 } 7974 } 7975} 7976 7977void 7978vm_object_pageout( 7979 vm_object_t object) 7980{ 7981 vm_page_t p, next; 7982 struct vm_pageout_queue *iq; 7983 7984 iq = &vm_pageout_queue_internal; 7985 7986 assert(object != VM_OBJECT_NULL ); 7987 7988 vm_object_lock(object); 7989 7990 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) { 7991 if (!object->pager_initialized) { 7992 /* 7993 * If there is no memory object for the page, create 7994 * one and hand it to the default pager. 7995 */ 7996 vm_object_pager_create(object); 7997 } 7998 } 7999 8000ReScan: 8001 next = (vm_page_t)queue_first(&object->memq); 8002 8003 while (!queue_end(&object->memq, (queue_entry_t)next)) { 8004 p = next; 8005 next = (vm_page_t)queue_next(&next->listq); 8006 8007 /* Throw to the pageout queue */ 8008 vm_page_lockspin_queues(); 8009 8010 /* 8011 * see if page is already in the process of 8012 * being cleaned... if so, leave it alone 8013 */ 8014 if (!p->laundry) { 8015 8016 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 8017 8018 if (VM_PAGE_Q_THROTTLED(iq)) { 8019 8020 iq->pgo_draining = TRUE; 8021 8022 assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE); 8023 vm_page_unlock_queues(); 8024 vm_object_unlock(object); 8025 8026 thread_block(THREAD_CONTINUE_NULL); 8027 8028 vm_object_lock(object); 8029 goto ReScan; 8030 } 8031 8032 if (p->fictitious || p->busy ) { 8033 vm_page_unlock_queues(); 8034 continue; 8035 } 8036 8037 if (p->absent || p->unusual || p->error || VM_PAGE_WIRED(p)) { 8038 vm_page_unlock_queues(); 8039 continue; 8040 } 8041 8042 if (p->cleaning) { 8043 p->pageout = TRUE; 8044 vm_page_unlock_queues(); 8045 continue; 8046 } 8047 8048 if (p->pmapped == TRUE) { 8049 int refmod_state; 8050 refmod_state = pmap_disconnect_options(p->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL); 8051 if (refmod_state & VM_MEM_MODIFIED) { 8052 SET_PAGE_DIRTY(p, FALSE); 8053 } 8054 } 8055 8056 if (p->dirty == FALSE) { 8057 vm_page_unlock_queues(); 8058 VM_PAGE_FREE(p); 8059 continue; 8060 } 8061 } 8062 8063 VM_PAGE_QUEUES_REMOVE(p); 8064 vm_pageout_cluster(p, TRUE); 8065 } 8066 vm_page_unlock_queues(); 8067 } 8068 8069 vm_object_unlock(object); 8070} 8071 8072kern_return_t 8073vm_object_pagein( 8074 vm_object_t object) 8075{ 8076 memory_object_t pager; 8077 kern_return_t kr; 8078 8079 vm_object_lock(object); 8080 8081 pager = object->pager; 8082 8083 if (!object->pager_ready || pager == MEMORY_OBJECT_NULL) { 8084 vm_object_unlock(object); 8085 return KERN_FAILURE; 8086 } 8087 8088 vm_object_paging_wait(object, THREAD_UNINT); 8089 vm_object_paging_begin(object); 8090 8091 object->blocked_access = TRUE; 8092 vm_object_unlock(object); 8093 8094 kr = memory_object_data_reclaim(pager, TRUE); 8095 8096 vm_object_lock(object); 8097 8098 object->blocked_access = FALSE; 8099 vm_object_paging_end(object); 8100 8101 vm_object_unlock(object); 8102 8103 return kr; 8104} 8105#endif /* CONFIG_FREEZE */ 8106