1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_object.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * 62 * Virtual memory object module. 63 */ 64 65#include <debug.h> 66#include <mach_pagemap.h> 67#include <task_swapper.h> 68 69#include <mach/mach_types.h> 70#include <mach/memory_object.h> 71#include <mach/memory_object_default.h> 72#include <mach/memory_object_control_server.h> 73#include <mach/vm_param.h> 74 75#include <mach/sdt.h> 76 77#include <ipc/ipc_types.h> 78#include <ipc/ipc_port.h> 79 80#include <kern/kern_types.h> 81#include <kern/assert.h> 82#include <kern/lock.h> 83#include <kern/queue.h> 84#include <kern/xpr.h> 85#include <kern/kalloc.h> 86#include <kern/zalloc.h> 87#include <kern/host.h> 88#include <kern/host_statistics.h> 89#include <kern/processor.h> 90#include <kern/misc_protos.h> 91 92#include <vm/memory_object.h> 93#include <vm/vm_fault.h> 94#include <vm/vm_map.h> 95#include <vm/vm_object.h> 96#include <vm/vm_page.h> 97#include <vm/vm_pageout.h> 98#include <vm/vm_protos.h> 99#include <vm/vm_purgeable_internal.h> 100 101/* 102 * Virtual memory objects maintain the actual data 103 * associated with allocated virtual memory. A given 104 * page of memory exists within exactly one object. 105 * 106 * An object is only deallocated when all "references" 107 * are given up. 108 * 109 * Associated with each object is a list of all resident 110 * memory pages belonging to that object; this list is 111 * maintained by the "vm_page" module, but locked by the object's 112 * lock. 113 * 114 * Each object also records the memory object reference 115 * that is used by the kernel to request and write 116 * back data (the memory object, field "pager"), etc... 117 * 118 * Virtual memory objects are allocated to provide 119 * zero-filled memory (vm_allocate) or map a user-defined 120 * memory object into a virtual address space (vm_map). 121 * 122 * Virtual memory objects that refer to a user-defined 123 * memory object are called "permanent", because all changes 124 * made in virtual memory are reflected back to the 125 * memory manager, which may then store it permanently. 126 * Other virtual memory objects are called "temporary", 127 * meaning that changes need be written back only when 128 * necessary to reclaim pages, and that storage associated 129 * with the object can be discarded once it is no longer 130 * mapped. 131 * 132 * A permanent memory object may be mapped into more 133 * than one virtual address space. Moreover, two threads 134 * may attempt to make the first mapping of a memory 135 * object concurrently. Only one thread is allowed to 136 * complete this mapping; all others wait for the 137 * "pager_initialized" field is asserted, indicating 138 * that the first thread has initialized all of the 139 * necessary fields in the virtual memory object structure. 140 * 141 * The kernel relies on a *default memory manager* to 142 * provide backing storage for the zero-filled virtual 143 * memory objects. The pager memory objects associated 144 * with these temporary virtual memory objects are only 145 * requested from the default memory manager when it 146 * becomes necessary. Virtual memory objects 147 * that depend on the default memory manager are called 148 * "internal". The "pager_created" field is provided to 149 * indicate whether these ports have ever been allocated. 150 * 151 * The kernel may also create virtual memory objects to 152 * hold changed pages after a copy-on-write operation. 153 * In this case, the virtual memory object (and its 154 * backing storage -- its memory object) only contain 155 * those pages that have been changed. The "shadow" 156 * field refers to the virtual memory object that contains 157 * the remainder of the contents. The "shadow_offset" 158 * field indicates where in the "shadow" these contents begin. 159 * The "copy" field refers to a virtual memory object 160 * to which changed pages must be copied before changing 161 * this object, in order to implement another form 162 * of copy-on-write optimization. 163 * 164 * The virtual memory object structure also records 165 * the attributes associated with its memory object. 166 * The "pager_ready", "can_persist" and "copy_strategy" 167 * fields represent those attributes. The "cached_list" 168 * field is used in the implementation of the persistence 169 * attribute. 170 * 171 * ZZZ Continue this comment. 172 */ 173 174/* Forward declarations for internal functions. */ 175static kern_return_t vm_object_terminate( 176 vm_object_t object); 177 178extern void vm_object_remove( 179 vm_object_t object); 180 181static kern_return_t vm_object_copy_call( 182 vm_object_t src_object, 183 vm_object_offset_t src_offset, 184 vm_object_size_t size, 185 vm_object_t *_result_object); 186 187static void vm_object_do_collapse( 188 vm_object_t object, 189 vm_object_t backing_object); 190 191static void vm_object_do_bypass( 192 vm_object_t object, 193 vm_object_t backing_object); 194 195static void vm_object_release_pager( 196 memory_object_t pager, 197 boolean_t hashed); 198 199static zone_t vm_object_zone; /* vm backing store zone */ 200 201/* 202 * All wired-down kernel memory belongs to a single virtual 203 * memory object (kernel_object) to avoid wasting data structures. 204 */ 205static struct vm_object kernel_object_store; 206vm_object_t kernel_object; 207 208 209/* 210 * The submap object is used as a placeholder for vm_map_submap 211 * operations. The object is declared in vm_map.c because it 212 * is exported by the vm_map module. The storage is declared 213 * here because it must be initialized here. 214 */ 215static struct vm_object vm_submap_object_store; 216 217/* 218 * Virtual memory objects are initialized from 219 * a template (see vm_object_allocate). 220 * 221 * When adding a new field to the virtual memory 222 * object structure, be sure to add initialization 223 * (see _vm_object_allocate()). 224 */ 225static struct vm_object vm_object_template; 226 227unsigned int vm_page_purged_wired = 0; 228unsigned int vm_page_purged_busy = 0; 229unsigned int vm_page_purged_others = 0; 230 231#if VM_OBJECT_CACHE 232/* 233 * Virtual memory objects that are not referenced by 234 * any address maps, but that are allowed to persist 235 * (an attribute specified by the associated memory manager), 236 * are kept in a queue (vm_object_cached_list). 237 * 238 * When an object from this queue is referenced again, 239 * for example to make another address space mapping, 240 * it must be removed from the queue. That is, the 241 * queue contains *only* objects with zero references. 242 * 243 * The kernel may choose to terminate objects from this 244 * queue in order to reclaim storage. The current policy 245 * is to permit a fixed maximum number of unreferenced 246 * objects (vm_object_cached_max). 247 * 248 * A spin lock (accessed by routines 249 * vm_object_cache_{lock,lock_try,unlock}) governs the 250 * object cache. It must be held when objects are 251 * added to or removed from the cache (in vm_object_terminate). 252 * The routines that acquire a reference to a virtual 253 * memory object based on one of the memory object ports 254 * must also lock the cache. 255 * 256 * Ideally, the object cache should be more isolated 257 * from the reference mechanism, so that the lock need 258 * not be held to make simple references. 259 */ 260static vm_object_t vm_object_cache_trim( 261 boolean_t called_from_vm_object_deallocate); 262 263static void vm_object_deactivate_all_pages( 264 vm_object_t object); 265 266static int vm_object_cached_high; /* highest # cached objects */ 267static int vm_object_cached_max = 512; /* may be patched*/ 268 269#define vm_object_cache_lock() \ 270 lck_mtx_lock(&vm_object_cached_lock_data) 271#define vm_object_cache_lock_try() \ 272 lck_mtx_try_lock(&vm_object_cached_lock_data) 273 274#endif /* VM_OBJECT_CACHE */ 275 276static queue_head_t vm_object_cached_list; 277static uint32_t vm_object_cache_pages_freed = 0; 278static uint32_t vm_object_cache_pages_moved = 0; 279static uint32_t vm_object_cache_pages_skipped = 0; 280static uint32_t vm_object_cache_adds = 0; 281static uint32_t vm_object_cached_count = 0; 282static lck_mtx_t vm_object_cached_lock_data; 283static lck_mtx_ext_t vm_object_cached_lock_data_ext; 284 285static uint32_t vm_object_page_grab_failed = 0; 286static uint32_t vm_object_page_grab_skipped = 0; 287static uint32_t vm_object_page_grab_returned = 0; 288static uint32_t vm_object_page_grab_pmapped = 0; 289static uint32_t vm_object_page_grab_reactivations = 0; 290 291#define vm_object_cache_lock_spin() \ 292 lck_mtx_lock_spin(&vm_object_cached_lock_data) 293#define vm_object_cache_unlock() \ 294 lck_mtx_unlock(&vm_object_cached_lock_data) 295 296static void vm_object_cache_remove_locked(vm_object_t); 297 298 299#define VM_OBJECT_HASH_COUNT 1024 300#define VM_OBJECT_HASH_LOCK_COUNT 512 301 302static lck_mtx_t vm_object_hashed_lock_data[VM_OBJECT_HASH_LOCK_COUNT]; 303static lck_mtx_ext_t vm_object_hashed_lock_data_ext[VM_OBJECT_HASH_LOCK_COUNT]; 304 305static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT]; 306static struct zone *vm_object_hash_zone; 307 308struct vm_object_hash_entry { 309 queue_chain_t hash_link; /* hash chain link */ 310 memory_object_t pager; /* pager we represent */ 311 vm_object_t object; /* corresponding object */ 312 boolean_t waiting; /* someone waiting for 313 * termination */ 314}; 315 316typedef struct vm_object_hash_entry *vm_object_hash_entry_t; 317#define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0) 318 319#define VM_OBJECT_HASH_SHIFT 5 320#define vm_object_hash(pager) \ 321 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)) 322 323#define vm_object_lock_hash(pager) \ 324 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_LOCK_COUNT)) 325 326void vm_object_hash_entry_free( 327 vm_object_hash_entry_t entry); 328 329static void vm_object_reap(vm_object_t object); 330static void vm_object_reap_async(vm_object_t object); 331static void vm_object_reaper_thread(void); 332 333static lck_mtx_t vm_object_reaper_lock_data; 334static lck_mtx_ext_t vm_object_reaper_lock_data_ext; 335 336static queue_head_t vm_object_reaper_queue; /* protected by vm_object_reaper_lock() */ 337unsigned int vm_object_reap_count = 0; 338unsigned int vm_object_reap_count_async = 0; 339 340#define vm_object_reaper_lock() \ 341 lck_mtx_lock(&vm_object_reaper_lock_data) 342#define vm_object_reaper_lock_spin() \ 343 lck_mtx_lock_spin(&vm_object_reaper_lock_data) 344#define vm_object_reaper_unlock() \ 345 lck_mtx_unlock(&vm_object_reaper_lock_data) 346 347#if 0 348#undef KERNEL_DEBUG 349#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT 350#endif 351 352 353static lck_mtx_t * 354vm_object_hash_lock_spin( 355 memory_object_t pager) 356{ 357 int index; 358 359 index = vm_object_lock_hash(pager); 360 361 lck_mtx_lock_spin(&vm_object_hashed_lock_data[index]); 362 363 return (&vm_object_hashed_lock_data[index]); 364} 365 366static void 367vm_object_hash_unlock(lck_mtx_t *lck) 368{ 369 lck_mtx_unlock(lck); 370} 371 372 373/* 374 * vm_object_hash_lookup looks up a pager in the hashtable 375 * and returns the corresponding entry, with optional removal. 376 */ 377static vm_object_hash_entry_t 378vm_object_hash_lookup( 379 memory_object_t pager, 380 boolean_t remove_entry) 381{ 382 queue_t bucket; 383 vm_object_hash_entry_t entry; 384 385 bucket = &vm_object_hashtable[vm_object_hash(pager)]; 386 387 entry = (vm_object_hash_entry_t)queue_first(bucket); 388 while (!queue_end(bucket, (queue_entry_t)entry)) { 389 if (entry->pager == pager) { 390 if (remove_entry) { 391 queue_remove(bucket, entry, 392 vm_object_hash_entry_t, hash_link); 393 } 394 return(entry); 395 } 396 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link); 397 } 398 return(VM_OBJECT_HASH_ENTRY_NULL); 399} 400 401/* 402 * vm_object_hash_enter enters the specified 403 * pager / cache object association in the hashtable. 404 */ 405 406static void 407vm_object_hash_insert( 408 vm_object_hash_entry_t entry, 409 vm_object_t object) 410{ 411 queue_t bucket; 412 413 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)]; 414 415 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link); 416 417 entry->object = object; 418 object->hashed = TRUE; 419} 420 421static vm_object_hash_entry_t 422vm_object_hash_entry_alloc( 423 memory_object_t pager) 424{ 425 vm_object_hash_entry_t entry; 426 427 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone); 428 entry->pager = pager; 429 entry->object = VM_OBJECT_NULL; 430 entry->waiting = FALSE; 431 432 return(entry); 433} 434 435void 436vm_object_hash_entry_free( 437 vm_object_hash_entry_t entry) 438{ 439 zfree(vm_object_hash_zone, entry); 440} 441 442/* 443 * vm_object_allocate: 444 * 445 * Returns a new object with the given size. 446 */ 447 448__private_extern__ void 449_vm_object_allocate( 450 vm_object_size_t size, 451 vm_object_t object) 452{ 453 XPR(XPR_VM_OBJECT, 454 "vm_object_allocate, object 0x%X size 0x%X\n", 455 object, size, 0,0,0); 456 457 *object = vm_object_template; 458 queue_init(&object->memq); 459 queue_init(&object->msr_q); 460#if UPL_DEBUG 461 queue_init(&object->uplq); 462#endif /* UPL_DEBUG */ 463 vm_object_lock_init(object); 464 object->vo_size = size; 465} 466 467__private_extern__ vm_object_t 468vm_object_allocate( 469 vm_object_size_t size) 470{ 471 register vm_object_t object; 472 473 object = (vm_object_t) zalloc(vm_object_zone); 474 475// dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */ 476 477 if (object != VM_OBJECT_NULL) 478 _vm_object_allocate(size, object); 479 480 return object; 481} 482 483 484lck_grp_t vm_object_lck_grp; 485lck_grp_t vm_object_cache_lck_grp; 486lck_grp_attr_t vm_object_lck_grp_attr; 487lck_attr_t vm_object_lck_attr; 488lck_attr_t kernel_object_lck_attr; 489 490/* 491 * vm_object_bootstrap: 492 * 493 * Initialize the VM objects module. 494 */ 495__private_extern__ void 496vm_object_bootstrap(void) 497{ 498 register int i; 499 500 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object), 501 round_page(512*1024), 502 round_page(12*1024), 503 "vm objects"); 504 zone_change(vm_object_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ 505 zone_change(vm_object_zone, Z_NOENCRYPT, TRUE); 506 507 vm_object_init_lck_grp(); 508 509 queue_init(&vm_object_cached_list); 510 511 lck_mtx_init_ext(&vm_object_cached_lock_data, 512 &vm_object_cached_lock_data_ext, 513 &vm_object_cache_lck_grp, 514 &vm_object_lck_attr); 515 516 517 518 queue_init(&vm_object_reaper_queue); 519 520 for (i = 0; i < VM_OBJECT_HASH_LOCK_COUNT; i++) { 521 lck_mtx_init_ext(&vm_object_hashed_lock_data[i], 522 &vm_object_hashed_lock_data_ext[i], 523 &vm_object_lck_grp, 524 &vm_object_lck_attr); 525 } 526 lck_mtx_init_ext(&vm_object_reaper_lock_data, 527 &vm_object_reaper_lock_data_ext, 528 &vm_object_lck_grp, 529 &vm_object_lck_attr); 530 531 vm_object_hash_zone = 532 zinit((vm_size_t) sizeof (struct vm_object_hash_entry), 533 round_page(512*1024), 534 round_page(12*1024), 535 "vm object hash entries"); 536 zone_change(vm_object_hash_zone, Z_CALLERACCT, FALSE); 537 zone_change(vm_object_hash_zone, Z_NOENCRYPT, TRUE); 538 539 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++) 540 queue_init(&vm_object_hashtable[i]); 541 542 543 /* 544 * Fill in a template object, for quick initialization 545 */ 546 547 548 549 /* memq; Lock; init after allocation */ 550 vm_object_template.memq.prev = NULL; 551 vm_object_template.memq.next = NULL; 552#if 0 553 /* 554 * We can't call vm_object_lock_init() here because that will 555 * allocate some memory and VM is not fully initialized yet. 556 * The lock will be initialized for each allocated object in 557 * _vm_object_allocate(), so we don't need to initialize it in 558 * the vm_object_template. 559 */ 560 vm_object_lock_init(&vm_object_template); 561#endif 562 vm_object_template.vo_size = 0; 563 vm_object_template.memq_hint = VM_PAGE_NULL; 564 vm_object_template.ref_count = 1; 565#if TASK_SWAPPER 566 vm_object_template.res_count = 1; 567#endif /* TASK_SWAPPER */ 568 vm_object_template.resident_page_count = 0; 569 vm_object_template.wired_page_count = 0; 570 vm_object_template.reusable_page_count = 0; 571 vm_object_template.copy = VM_OBJECT_NULL; 572 vm_object_template.shadow = VM_OBJECT_NULL; 573 vm_object_template.vo_shadow_offset = (vm_object_offset_t) 0; 574 vm_object_template.pager = MEMORY_OBJECT_NULL; 575 vm_object_template.paging_offset = 0; 576 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL; 577 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC; 578 vm_object_template.paging_in_progress = 0; 579 vm_object_template.activity_in_progress = 0; 580 581 /* Begin bitfields */ 582 vm_object_template.all_wanted = 0; /* all bits FALSE */ 583 vm_object_template.pager_created = FALSE; 584 vm_object_template.pager_initialized = FALSE; 585 vm_object_template.pager_ready = FALSE; 586 vm_object_template.pager_trusted = FALSE; 587 vm_object_template.can_persist = FALSE; 588 vm_object_template.internal = TRUE; 589 vm_object_template.temporary = TRUE; 590 vm_object_template.private = FALSE; 591 vm_object_template.pageout = FALSE; 592 vm_object_template.alive = TRUE; 593 vm_object_template.purgable = VM_PURGABLE_DENY; 594 vm_object_template.shadowed = FALSE; 595 vm_object_template.silent_overwrite = FALSE; 596 vm_object_template.advisory_pageout = FALSE; 597 vm_object_template.true_share = FALSE; 598 vm_object_template.terminating = FALSE; 599 vm_object_template.named = FALSE; 600 vm_object_template.shadow_severed = FALSE; 601 vm_object_template.phys_contiguous = FALSE; 602 vm_object_template.nophyscache = FALSE; 603 /* End bitfields */ 604 605 vm_object_template.cached_list.prev = NULL; 606 vm_object_template.cached_list.next = NULL; 607 vm_object_template.msr_q.prev = NULL; 608 vm_object_template.msr_q.next = NULL; 609 610 vm_object_template.last_alloc = (vm_object_offset_t) 0; 611 vm_object_template.sequential = (vm_object_offset_t) 0; 612 vm_object_template.pages_created = 0; 613 vm_object_template.pages_used = 0; 614 vm_object_template.scan_collisions = 0; 615 616#if MACH_PAGEMAP 617 vm_object_template.existence_map = VM_EXTERNAL_NULL; 618#endif /* MACH_PAGEMAP */ 619 vm_object_template.cow_hint = ~(vm_offset_t)0; 620#if MACH_ASSERT 621 vm_object_template.paging_object = VM_OBJECT_NULL; 622#endif /* MACH_ASSERT */ 623 624 /* cache bitfields */ 625 vm_object_template.wimg_bits = VM_WIMG_USE_DEFAULT; 626 vm_object_template.set_cache_attr = FALSE; 627 vm_object_template.code_signed = FALSE; 628 vm_object_template.hashed = FALSE; 629 vm_object_template.transposed = FALSE; 630 vm_object_template.mapping_in_progress = FALSE; 631 vm_object_template.volatile_empty = FALSE; 632 vm_object_template.volatile_fault = FALSE; 633 vm_object_template.all_reusable = FALSE; 634 vm_object_template.blocked_access = FALSE; 635 vm_object_template.__object2_unused_bits = 0; 636#if UPL_DEBUG 637 vm_object_template.uplq.prev = NULL; 638 vm_object_template.uplq.next = NULL; 639#endif /* UPL_DEBUG */ 640#ifdef VM_PIP_DEBUG 641 bzero(&vm_object_template.pip_holders, 642 sizeof (vm_object_template.pip_holders)); 643#endif /* VM_PIP_DEBUG */ 644 645 vm_object_template.objq.next=NULL; 646 vm_object_template.objq.prev=NULL; 647 648 vm_object_template.vo_cache_ts = 0; 649 650 /* 651 * Initialize the "kernel object" 652 */ 653 654 kernel_object = &kernel_object_store; 655 656/* 657 * Note that in the following size specifications, we need to add 1 because 658 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size. 659 */ 660 661#ifdef ppc 662 _vm_object_allocate(vm_last_addr + 1, 663 kernel_object); 664#else 665 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, 666 kernel_object); 667#endif 668 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 669 670 /* 671 * Initialize the "submap object". Make it as large as the 672 * kernel object so that no limit is imposed on submap sizes. 673 */ 674 675 vm_submap_object = &vm_submap_object_store; 676#ifdef ppc 677 _vm_object_allocate(vm_last_addr + 1, 678 vm_submap_object); 679#else 680 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, 681 vm_submap_object); 682#endif 683 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 684 685 /* 686 * Create an "extra" reference to this object so that we never 687 * try to deallocate it; zfree doesn't like to be called with 688 * non-zone memory. 689 */ 690 691 vm_object_reference(vm_submap_object); 692 693 694#if MACH_PAGEMAP 695 vm_external_module_initialize(); 696#endif /* MACH_PAGEMAP */ 697} 698 699void 700vm_object_reaper_init(void) 701{ 702 kern_return_t kr; 703 thread_t thread; 704 705 kr = kernel_thread_start_priority( 706 (thread_continue_t) vm_object_reaper_thread, 707 NULL, 708 BASEPRI_PREEMPT - 1, 709 &thread); 710 if (kr != KERN_SUCCESS) { 711 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr); 712 } 713 thread_deallocate(thread); 714} 715 716__private_extern__ void 717vm_object_init(void) 718{ 719 /* 720 * Finish initializing the kernel object. 721 */ 722} 723 724 725__private_extern__ void 726vm_object_init_lck_grp(void) 727{ 728 /* 729 * initialze the vm_object lock world 730 */ 731 lck_grp_attr_setdefault(&vm_object_lck_grp_attr); 732 lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr); 733 lck_grp_init(&vm_object_cache_lck_grp, "vm_object_cache", &vm_object_lck_grp_attr); 734 lck_attr_setdefault(&vm_object_lck_attr); 735 lck_attr_setdefault(&kernel_object_lck_attr); 736 lck_attr_cleardebug(&kernel_object_lck_attr); 737} 738 739#if VM_OBJECT_CACHE 740#define MIGHT_NOT_CACHE_SHADOWS 1 741#if MIGHT_NOT_CACHE_SHADOWS 742static int cache_shadows = TRUE; 743#endif /* MIGHT_NOT_CACHE_SHADOWS */ 744#endif 745 746/* 747 * vm_object_deallocate: 748 * 749 * Release a reference to the specified object, 750 * gained either through a vm_object_allocate 751 * or a vm_object_reference call. When all references 752 * are gone, storage associated with this object 753 * may be relinquished. 754 * 755 * No object may be locked. 756 */ 757unsigned long vm_object_deallocate_shared_successes = 0; 758unsigned long vm_object_deallocate_shared_failures = 0; 759unsigned long vm_object_deallocate_shared_swap_failures = 0; 760__private_extern__ void 761vm_object_deallocate( 762 register vm_object_t object) 763{ 764#if VM_OBJECT_CACHE 765 boolean_t retry_cache_trim = FALSE; 766 uint32_t try_failed_count = 0; 767#endif 768 vm_object_t shadow = VM_OBJECT_NULL; 769 770// if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */ 771// else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */ 772 773 if (object == VM_OBJECT_NULL) 774 return; 775 776 if (object == kernel_object) { 777 vm_object_lock_shared(object); 778 779 OSAddAtomic(-1, &object->ref_count); 780 781 if (object->ref_count == 0) { 782 panic("vm_object_deallocate: losing kernel_object\n"); 783 } 784 vm_object_unlock(object); 785 return; 786 } 787 788 if (object->ref_count > 2 || 789 (!object->named && object->ref_count > 1)) { 790 UInt32 original_ref_count; 791 volatile UInt32 *ref_count_p; 792 Boolean atomic_swap; 793 794 /* 795 * The object currently looks like it is not being 796 * kept alive solely by the reference we're about to release. 797 * Let's try and release our reference without taking 798 * all the locks we would need if we had to terminate the 799 * object (cache lock + exclusive object lock). 800 * Lock the object "shared" to make sure we don't race with 801 * anyone holding it "exclusive". 802 */ 803 vm_object_lock_shared(object); 804 ref_count_p = (volatile UInt32 *) &object->ref_count; 805 original_ref_count = object->ref_count; 806 /* 807 * Test again as "ref_count" could have changed. 808 * "named" shouldn't change. 809 */ 810 if (original_ref_count > 2 || 811 (!object->named && original_ref_count > 1)) { 812 atomic_swap = OSCompareAndSwap( 813 original_ref_count, 814 original_ref_count - 1, 815 (UInt32 *) &object->ref_count); 816 if (atomic_swap == FALSE) { 817 vm_object_deallocate_shared_swap_failures++; 818 } 819 820 } else { 821 atomic_swap = FALSE; 822 } 823 vm_object_unlock(object); 824 825 if (atomic_swap) { 826 /* 827 * ref_count was updated atomically ! 828 */ 829 vm_object_deallocate_shared_successes++; 830 return; 831 } 832 833 /* 834 * Someone else updated the ref_count at the same 835 * time and we lost the race. Fall back to the usual 836 * slow but safe path... 837 */ 838 vm_object_deallocate_shared_failures++; 839 } 840 841 while (object != VM_OBJECT_NULL) { 842 843 vm_object_lock(object); 844 845 assert(object->ref_count > 0); 846 847 /* 848 * If the object has a named reference, and only 849 * that reference would remain, inform the pager 850 * about the last "mapping" reference going away. 851 */ 852 if ((object->ref_count == 2) && (object->named)) { 853 memory_object_t pager = object->pager; 854 855 /* Notify the Pager that there are no */ 856 /* more mappers for this object */ 857 858 if (pager != MEMORY_OBJECT_NULL) { 859 vm_object_mapping_wait(object, THREAD_UNINT); 860 vm_object_mapping_begin(object); 861 vm_object_unlock(object); 862 863 memory_object_last_unmap(pager); 864 865 vm_object_lock(object); 866 vm_object_mapping_end(object); 867 } 868 assert(object->ref_count > 0); 869 } 870 871 /* 872 * Lose the reference. If other references 873 * remain, then we are done, unless we need 874 * to retry a cache trim. 875 * If it is the last reference, then keep it 876 * until any pending initialization is completed. 877 */ 878 879 /* if the object is terminating, it cannot go into */ 880 /* the cache and we obviously should not call */ 881 /* terminate again. */ 882 883 if ((object->ref_count > 1) || object->terminating) { 884 vm_object_lock_assert_exclusive(object); 885 object->ref_count--; 886 vm_object_res_deallocate(object); 887 888 if (object->ref_count == 1 && 889 object->shadow != VM_OBJECT_NULL) { 890 /* 891 * There's only one reference left on this 892 * VM object. We can't tell if it's a valid 893 * one (from a mapping for example) or if this 894 * object is just part of a possibly stale and 895 * useless shadow chain. 896 * We would like to try and collapse it into 897 * its parent, but we don't have any pointers 898 * back to this parent object. 899 * But we can try and collapse this object with 900 * its own shadows, in case these are useless 901 * too... 902 * We can't bypass this object though, since we 903 * don't know if this last reference on it is 904 * meaningful or not. 905 */ 906 vm_object_collapse(object, 0, FALSE); 907 } 908 vm_object_unlock(object); 909#if VM_OBJECT_CACHE 910 if (retry_cache_trim && 911 ((object = vm_object_cache_trim(TRUE)) != 912 VM_OBJECT_NULL)) { 913 continue; 914 } 915#endif 916 return; 917 } 918 919 /* 920 * We have to wait for initialization 921 * before destroying or caching the object. 922 */ 923 924 if (object->pager_created && ! object->pager_initialized) { 925 assert(! object->can_persist); 926 vm_object_assert_wait(object, 927 VM_OBJECT_EVENT_INITIALIZED, 928 THREAD_UNINT); 929 vm_object_unlock(object); 930 931 thread_block(THREAD_CONTINUE_NULL); 932 continue; 933 } 934 935#if VM_OBJECT_CACHE 936 /* 937 * If this object can persist, then enter it in 938 * the cache. Otherwise, terminate it. 939 * 940 * NOTE: Only permanent objects are cached, and 941 * permanent objects cannot have shadows. This 942 * affects the residence counting logic in a minor 943 * way (can do it in-line, mostly). 944 */ 945 946 if ((object->can_persist) && (object->alive)) { 947 /* 948 * Now it is safe to decrement reference count, 949 * and to return if reference count is > 0. 950 */ 951 952 vm_object_lock_assert_exclusive(object); 953 if (--object->ref_count > 0) { 954 vm_object_res_deallocate(object); 955 vm_object_unlock(object); 956 957 if (retry_cache_trim && 958 ((object = vm_object_cache_trim(TRUE)) != 959 VM_OBJECT_NULL)) { 960 continue; 961 } 962 return; 963 } 964 965#if MIGHT_NOT_CACHE_SHADOWS 966 /* 967 * Remove shadow now if we don't 968 * want to cache shadows. 969 */ 970 if (! cache_shadows) { 971 shadow = object->shadow; 972 object->shadow = VM_OBJECT_NULL; 973 } 974#endif /* MIGHT_NOT_CACHE_SHADOWS */ 975 976 /* 977 * Enter the object onto the queue of 978 * cached objects, and deactivate 979 * all of its pages. 980 */ 981 assert(object->shadow == VM_OBJECT_NULL); 982 VM_OBJ_RES_DECR(object); 983 XPR(XPR_VM_OBJECT, 984 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n", 985 object, 986 vm_object_cached_list.next, 987 vm_object_cached_list.prev,0,0); 988 989 990 vm_object_unlock(object); 991 992 try_failed_count = 0; 993 for (;;) { 994 vm_object_cache_lock(); 995 996 /* 997 * if we try to take a regular lock here 998 * we risk deadlocking against someone 999 * holding a lock on this object while 1000 * trying to vm_object_deallocate a different 1001 * object 1002 */ 1003 if (vm_object_lock_try(object)) 1004 break; 1005 vm_object_cache_unlock(); 1006 try_failed_count++; 1007 1008 mutex_pause(try_failed_count); /* wait a bit */ 1009 } 1010 vm_object_cached_count++; 1011 if (vm_object_cached_count > vm_object_cached_high) 1012 vm_object_cached_high = vm_object_cached_count; 1013 queue_enter(&vm_object_cached_list, object, 1014 vm_object_t, cached_list); 1015 vm_object_cache_unlock(); 1016 1017 vm_object_deactivate_all_pages(object); 1018 vm_object_unlock(object); 1019 1020#if MIGHT_NOT_CACHE_SHADOWS 1021 /* 1022 * If we have a shadow that we need 1023 * to deallocate, do so now, remembering 1024 * to trim the cache later. 1025 */ 1026 if (! cache_shadows && shadow != VM_OBJECT_NULL) { 1027 object = shadow; 1028 retry_cache_trim = TRUE; 1029 continue; 1030 } 1031#endif /* MIGHT_NOT_CACHE_SHADOWS */ 1032 1033 /* 1034 * Trim the cache. If the cache trim 1035 * returns with a shadow for us to deallocate, 1036 * then remember to retry the cache trim 1037 * when we are done deallocating the shadow. 1038 * Otherwise, we are done. 1039 */ 1040 1041 object = vm_object_cache_trim(TRUE); 1042 if (object == VM_OBJECT_NULL) { 1043 return; 1044 } 1045 retry_cache_trim = TRUE; 1046 } else 1047#endif /* VM_OBJECT_CACHE */ 1048 { 1049 /* 1050 * This object is not cachable; terminate it. 1051 */ 1052 XPR(XPR_VM_OBJECT, 1053 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n", 1054 object, object->resident_page_count, 1055 object->paging_in_progress, 1056 (void *)current_thread(),object->ref_count); 1057 1058 VM_OBJ_RES_DECR(object); /* XXX ? */ 1059 /* 1060 * Terminate this object. If it had a shadow, 1061 * then deallocate it; otherwise, if we need 1062 * to retry a cache trim, do so now; otherwise, 1063 * we are done. "pageout" objects have a shadow, 1064 * but maintain a "paging reference" rather than 1065 * a normal reference. 1066 */ 1067 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 1068 1069 if (vm_object_terminate(object) != KERN_SUCCESS) { 1070 return; 1071 } 1072 if (shadow != VM_OBJECT_NULL) { 1073 object = shadow; 1074 continue; 1075 } 1076#if VM_OBJECT_CACHE 1077 if (retry_cache_trim && 1078 ((object = vm_object_cache_trim(TRUE)) != 1079 VM_OBJECT_NULL)) { 1080 continue; 1081 } 1082#endif 1083 return; 1084 } 1085 } 1086#if VM_OBJECT_CACHE 1087 assert(! retry_cache_trim); 1088#endif 1089} 1090 1091 1092 1093vm_page_t 1094vm_object_page_grab( 1095 vm_object_t object) 1096{ 1097 vm_page_t p, next_p; 1098 int p_limit = 0; 1099 int p_skipped = 0; 1100 1101 vm_object_lock_assert_exclusive(object); 1102 1103 next_p = (vm_page_t)queue_first(&object->memq); 1104 p_limit = MIN(50, object->resident_page_count); 1105 1106 while (!queue_end(&object->memq, (queue_entry_t)next_p) && --p_limit > 0) { 1107 1108 p = next_p; 1109 next_p = (vm_page_t)queue_next(&next_p->listq); 1110 1111 if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->laundry || p->fictitious) 1112 goto move_page_in_obj; 1113 1114 if (p->pmapped || p->dirty || p->precious) { 1115 vm_page_lockspin_queues(); 1116 1117 if (p->pmapped) { 1118 int refmod_state; 1119 1120 vm_object_page_grab_pmapped++; 1121 1122 if (p->reference == FALSE || p->dirty == FALSE) { 1123 1124 refmod_state = pmap_get_refmod(p->phys_page); 1125 1126 if (refmod_state & VM_MEM_REFERENCED) 1127 p->reference = TRUE; 1128 if (refmod_state & VM_MEM_MODIFIED) { 1129 SET_PAGE_DIRTY(p, FALSE); 1130 } 1131 } 1132 if (p->dirty == FALSE && p->precious == FALSE) { 1133 1134 refmod_state = pmap_disconnect(p->phys_page); 1135 1136 if (refmod_state & VM_MEM_REFERENCED) 1137 p->reference = TRUE; 1138 if (refmod_state & VM_MEM_MODIFIED) { 1139 SET_PAGE_DIRTY(p, FALSE); 1140 } 1141 1142 if (p->dirty == FALSE) 1143 goto take_page; 1144 } 1145 } 1146 if (p->inactive && p->reference == TRUE) { 1147 vm_page_activate(p); 1148 1149 VM_STAT_INCR(reactivations); 1150 vm_object_page_grab_reactivations++; 1151 } 1152 vm_page_unlock_queues(); 1153move_page_in_obj: 1154 queue_remove(&object->memq, p, vm_page_t, listq); 1155 queue_enter(&object->memq, p, vm_page_t, listq); 1156 1157 p_skipped++; 1158 continue; 1159 } 1160 vm_page_lockspin_queues(); 1161take_page: 1162 vm_page_free_prepare_queues(p); 1163 vm_object_page_grab_returned++; 1164 vm_object_page_grab_skipped += p_skipped; 1165 1166 vm_page_unlock_queues(); 1167 1168 vm_page_free_prepare_object(p, TRUE); 1169 1170 return (p); 1171 } 1172 vm_object_page_grab_skipped += p_skipped; 1173 vm_object_page_grab_failed++; 1174 1175 return (NULL); 1176} 1177 1178 1179 1180#define EVICT_PREPARE_LIMIT 64 1181#define EVICT_AGE 10 1182 1183static clock_sec_t vm_object_cache_aging_ts = 0; 1184 1185static void 1186vm_object_cache_remove_locked( 1187 vm_object_t object) 1188{ 1189 queue_remove(&vm_object_cached_list, object, vm_object_t, objq); 1190 object->objq.next = NULL; 1191 object->objq.prev = NULL; 1192 1193 vm_object_cached_count--; 1194} 1195 1196void 1197vm_object_cache_remove( 1198 vm_object_t object) 1199{ 1200 vm_object_cache_lock_spin(); 1201 1202 if (object->objq.next || object->objq.prev) 1203 vm_object_cache_remove_locked(object); 1204 1205 vm_object_cache_unlock(); 1206} 1207 1208void 1209vm_object_cache_add( 1210 vm_object_t object) 1211{ 1212 clock_sec_t sec; 1213 clock_nsec_t nsec; 1214 1215 if (object->resident_page_count == 0) 1216 return; 1217 clock_get_system_nanotime(&sec, &nsec); 1218 1219 vm_object_cache_lock_spin(); 1220 1221 if (object->objq.next == NULL && object->objq.prev == NULL) { 1222 queue_enter(&vm_object_cached_list, object, vm_object_t, objq); 1223 object->vo_cache_ts = sec + EVICT_AGE; 1224 object->vo_cache_pages_to_scan = object->resident_page_count; 1225 1226 vm_object_cached_count++; 1227 vm_object_cache_adds++; 1228 } 1229 vm_object_cache_unlock(); 1230} 1231 1232int 1233vm_object_cache_evict( 1234 int num_to_evict, 1235 int max_objects_to_examine) 1236{ 1237 vm_object_t object = VM_OBJECT_NULL; 1238 vm_object_t next_obj = VM_OBJECT_NULL; 1239 vm_page_t local_free_q = VM_PAGE_NULL; 1240 vm_page_t p; 1241 vm_page_t next_p; 1242 int object_cnt = 0; 1243 vm_page_t ep_array[EVICT_PREPARE_LIMIT]; 1244 int ep_count; 1245 int ep_limit; 1246 int ep_index; 1247 int ep_freed = 0; 1248 int ep_moved = 0; 1249 uint32_t ep_skipped = 0; 1250 clock_sec_t sec; 1251 clock_nsec_t nsec; 1252 1253 KERNEL_DEBUG(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0); 1254 /* 1255 * do a couple of quick checks to see if it's 1256 * worthwhile grabbing the lock 1257 */ 1258 if (queue_empty(&vm_object_cached_list)) { 1259 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, 0, 0, 0, 0, 0); 1260 return (0); 1261 } 1262 clock_get_system_nanotime(&sec, &nsec); 1263 1264 /* 1265 * the object on the head of the queue has not 1266 * yet sufficiently aged 1267 */ 1268 if (sec < vm_object_cache_aging_ts) { 1269 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, 0, 0, 0, 0, 0); 1270 return (0); 1271 } 1272 /* 1273 * don't need the queue lock to find 1274 * and lock an object on the cached list 1275 */ 1276 vm_page_unlock_queues(); 1277 1278 vm_object_cache_lock_spin(); 1279 1280 for (;;) { 1281 next_obj = (vm_object_t)queue_first(&vm_object_cached_list); 1282 1283 while (!queue_end(&vm_object_cached_list, (queue_entry_t)next_obj) && object_cnt++ < max_objects_to_examine) { 1284 1285 object = next_obj; 1286 next_obj = (vm_object_t)queue_next(&next_obj->objq); 1287 1288 if (sec < object->vo_cache_ts) { 1289 KERNEL_DEBUG(0x130020c, object, object->resident_page_count, object->vo_cache_ts, sec, 0); 1290 1291 vm_object_cache_aging_ts = object->vo_cache_ts; 1292 object = VM_OBJECT_NULL; 1293 break; 1294 } 1295 if (!vm_object_lock_try_scan(object)) { 1296 /* 1297 * just skip over this guy for now... if we find 1298 * an object to steal pages from, we'll revist in a bit... 1299 * hopefully, the lock will have cleared 1300 */ 1301 KERNEL_DEBUG(0x13001f8, object, object->resident_page_count, 0, 0, 0); 1302 1303 object = VM_OBJECT_NULL; 1304 continue; 1305 } 1306 if (queue_empty(&object->memq) || object->vo_cache_pages_to_scan == 0) { 1307 /* 1308 * this case really shouldn't happen, but it's not fatal 1309 * so deal with it... if we don't remove the object from 1310 * the list, we'll never move past it. 1311 */ 1312 KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0); 1313 1314 vm_object_cache_remove_locked(object); 1315 vm_object_unlock(object); 1316 object = VM_OBJECT_NULL; 1317 continue; 1318 } 1319 /* 1320 * we have a locked object with pages... 1321 * time to start harvesting 1322 */ 1323 break; 1324 } 1325 vm_object_cache_unlock(); 1326 1327 if (object == VM_OBJECT_NULL) 1328 break; 1329 1330 /* 1331 * object is locked at this point and 1332 * has resident pages 1333 */ 1334 next_p = (vm_page_t)queue_first(&object->memq); 1335 1336 /* 1337 * break the page scan into 2 pieces to minimize the time spent 1338 * behind the page queue lock... 1339 * the list of pages on these unused objects is likely to be cold 1340 * w/r to the cpu cache which increases the time to scan the list 1341 * tenfold... and we may have a 'run' of pages we can't utilize that 1342 * needs to be skipped over... 1343 */ 1344 if ((ep_limit = num_to_evict - (ep_freed + ep_moved)) > EVICT_PREPARE_LIMIT) 1345 ep_limit = EVICT_PREPARE_LIMIT; 1346 ep_count = 0; 1347 1348 while (!queue_end(&object->memq, (queue_entry_t)next_p) && object->vo_cache_pages_to_scan && ep_count < ep_limit) { 1349 1350 p = next_p; 1351 next_p = (vm_page_t)queue_next(&next_p->listq); 1352 1353 object->vo_cache_pages_to_scan--; 1354 1355 if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->laundry) { 1356 queue_remove(&object->memq, p, vm_page_t, listq); 1357 queue_enter(&object->memq, p, vm_page_t, listq); 1358 1359 ep_skipped++; 1360 continue; 1361 } 1362 if (p->wpmapped || p->dirty || p->precious) { 1363 queue_remove(&object->memq, p, vm_page_t, listq); 1364 queue_enter(&object->memq, p, vm_page_t, listq); 1365 1366 pmap_clear_reference(p->phys_page); 1367 } 1368 ep_array[ep_count++] = p; 1369 } 1370 KERNEL_DEBUG(0x13001f4 | DBG_FUNC_START, object, object->resident_page_count, ep_freed, ep_moved, 0); 1371 1372 vm_page_lockspin_queues(); 1373 1374 for (ep_index = 0; ep_index < ep_count; ep_index++) { 1375 1376 p = ep_array[ep_index]; 1377 1378 if (p->wpmapped || p->dirty || p->precious) { 1379 p->reference = FALSE; 1380 p->no_cache = FALSE; 1381 1382 /* 1383 * we've already filtered out pages that are in the laundry 1384 * so if we get here, this page can't be on the pageout queue 1385 */ 1386 assert(!p->pageout_queue); 1387 1388 VM_PAGE_QUEUES_REMOVE(p); 1389 VM_PAGE_ENQUEUE_INACTIVE(p, TRUE); 1390 1391 ep_moved++; 1392 } else { 1393 vm_page_free_prepare_queues(p); 1394 1395 assert(p->pageq.next == NULL && p->pageq.prev == NULL); 1396 /* 1397 * Add this page to our list of reclaimed pages, 1398 * to be freed later. 1399 */ 1400 p->pageq.next = (queue_entry_t) local_free_q; 1401 local_free_q = p; 1402 1403 ep_freed++; 1404 } 1405 } 1406 vm_page_unlock_queues(); 1407 1408 KERNEL_DEBUG(0x13001f4 | DBG_FUNC_END, object, object->resident_page_count, ep_freed, ep_moved, 0); 1409 1410 if (local_free_q) { 1411 vm_page_free_list(local_free_q, TRUE); 1412 local_free_q = VM_PAGE_NULL; 1413 } 1414 if (object->vo_cache_pages_to_scan == 0) { 1415 KERNEL_DEBUG(0x1300208, object, object->resident_page_count, ep_freed, ep_moved, 0); 1416 1417 vm_object_cache_remove(object); 1418 1419 KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0); 1420 } 1421 /* 1422 * done with this object 1423 */ 1424 vm_object_unlock(object); 1425 object = VM_OBJECT_NULL; 1426 1427 /* 1428 * at this point, we are not holding any locks 1429 */ 1430 if ((ep_freed + ep_moved) >= num_to_evict) { 1431 /* 1432 * we've reached our target for the 1433 * number of pages to evict 1434 */ 1435 break; 1436 } 1437 vm_object_cache_lock_spin(); 1438 } 1439 /* 1440 * put the page queues lock back to the caller's 1441 * idea of it 1442 */ 1443 vm_page_lock_queues(); 1444 1445 vm_object_cache_pages_freed += ep_freed; 1446 vm_object_cache_pages_moved += ep_moved; 1447 vm_object_cache_pages_skipped += ep_skipped; 1448 1449 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, ep_freed, 0, 0, 0, 0); 1450 return (ep_freed); 1451} 1452 1453 1454#if VM_OBJECT_CACHE 1455/* 1456 * Check to see whether we really need to trim 1457 * down the cache. If so, remove an object from 1458 * the cache, terminate it, and repeat. 1459 * 1460 * Called with, and returns with, cache lock unlocked. 1461 */ 1462vm_object_t 1463vm_object_cache_trim( 1464 boolean_t called_from_vm_object_deallocate) 1465{ 1466 register vm_object_t object = VM_OBJECT_NULL; 1467 vm_object_t shadow; 1468 1469 for (;;) { 1470 1471 /* 1472 * If we no longer need to trim the cache, 1473 * then we are done. 1474 */ 1475 if (vm_object_cached_count <= vm_object_cached_max) 1476 return VM_OBJECT_NULL; 1477 1478 vm_object_cache_lock(); 1479 if (vm_object_cached_count <= vm_object_cached_max) { 1480 vm_object_cache_unlock(); 1481 return VM_OBJECT_NULL; 1482 } 1483 1484 /* 1485 * We must trim down the cache, so remove 1486 * the first object in the cache. 1487 */ 1488 XPR(XPR_VM_OBJECT, 1489 "vm_object_cache_trim: removing from front of cache (%x, %x)\n", 1490 vm_object_cached_list.next, 1491 vm_object_cached_list.prev, 0, 0, 0); 1492 1493 object = (vm_object_t) queue_first(&vm_object_cached_list); 1494 if(object == (vm_object_t) &vm_object_cached_list) { 1495 /* something's wrong with the calling parameter or */ 1496 /* the value of vm_object_cached_count, just fix */ 1497 /* and return */ 1498 if(vm_object_cached_max < 0) 1499 vm_object_cached_max = 0; 1500 vm_object_cached_count = 0; 1501 vm_object_cache_unlock(); 1502 return VM_OBJECT_NULL; 1503 } 1504 vm_object_lock(object); 1505 queue_remove(&vm_object_cached_list, object, vm_object_t, 1506 cached_list); 1507 vm_object_cached_count--; 1508 1509 vm_object_cache_unlock(); 1510 /* 1511 * Since this object is in the cache, we know 1512 * that it is initialized and has no references. 1513 * Take a reference to avoid recursive deallocations. 1514 */ 1515 1516 assert(object->pager_initialized); 1517 assert(object->ref_count == 0); 1518 vm_object_lock_assert_exclusive(object); 1519 object->ref_count++; 1520 1521 /* 1522 * Terminate the object. 1523 * If the object had a shadow, we let vm_object_deallocate 1524 * deallocate it. "pageout" objects have a shadow, but 1525 * maintain a "paging reference" rather than a normal 1526 * reference. 1527 * (We are careful here to limit recursion.) 1528 */ 1529 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 1530 1531 if(vm_object_terminate(object) != KERN_SUCCESS) 1532 continue; 1533 1534 if (shadow != VM_OBJECT_NULL) { 1535 if (called_from_vm_object_deallocate) { 1536 return shadow; 1537 } else { 1538 vm_object_deallocate(shadow); 1539 } 1540 } 1541 } 1542} 1543#endif 1544 1545 1546/* 1547 * Routine: vm_object_terminate 1548 * Purpose: 1549 * Free all resources associated with a vm_object. 1550 * In/out conditions: 1551 * Upon entry, the object must be locked, 1552 * and the object must have exactly one reference. 1553 * 1554 * The shadow object reference is left alone. 1555 * 1556 * The object must be unlocked if its found that pages 1557 * must be flushed to a backing object. If someone 1558 * manages to map the object while it is being flushed 1559 * the object is returned unlocked and unchanged. Otherwise, 1560 * upon exit, the cache will be unlocked, and the 1561 * object will cease to exist. 1562 */ 1563static kern_return_t 1564vm_object_terminate( 1565 vm_object_t object) 1566{ 1567 vm_object_t shadow_object; 1568 1569 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n", 1570 object, object->ref_count, 0, 0, 0); 1571 1572 if (!object->pageout && (!object->temporary || object->can_persist) && 1573 (object->pager != NULL || object->shadow_severed)) { 1574 /* 1575 * Clear pager_trusted bit so that the pages get yanked 1576 * out of the object instead of cleaned in place. This 1577 * prevents a deadlock in XMM and makes more sense anyway. 1578 */ 1579 object->pager_trusted = FALSE; 1580 1581 vm_object_reap_pages(object, REAP_TERMINATE); 1582 } 1583 /* 1584 * Make sure the object isn't already being terminated 1585 */ 1586 if (object->terminating) { 1587 vm_object_lock_assert_exclusive(object); 1588 object->ref_count--; 1589 assert(object->ref_count > 0); 1590 vm_object_unlock(object); 1591 return KERN_FAILURE; 1592 } 1593 1594 /* 1595 * Did somebody get a reference to the object while we were 1596 * cleaning it? 1597 */ 1598 if (object->ref_count != 1) { 1599 vm_object_lock_assert_exclusive(object); 1600 object->ref_count--; 1601 assert(object->ref_count > 0); 1602 vm_object_res_deallocate(object); 1603 vm_object_unlock(object); 1604 return KERN_FAILURE; 1605 } 1606 1607 /* 1608 * Make sure no one can look us up now. 1609 */ 1610 1611 object->terminating = TRUE; 1612 object->alive = FALSE; 1613 1614 if ( !object->internal && (object->objq.next || object->objq.prev)) 1615 vm_object_cache_remove(object); 1616 1617 if (object->hashed) { 1618 lck_mtx_t *lck; 1619 1620 lck = vm_object_hash_lock_spin(object->pager); 1621 vm_object_remove(object); 1622 vm_object_hash_unlock(lck); 1623 } 1624 /* 1625 * Detach the object from its shadow if we are the shadow's 1626 * copy. The reference we hold on the shadow must be dropped 1627 * by our caller. 1628 */ 1629 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) && 1630 !(object->pageout)) { 1631 vm_object_lock(shadow_object); 1632 if (shadow_object->copy == object) 1633 shadow_object->copy = VM_OBJECT_NULL; 1634 vm_object_unlock(shadow_object); 1635 } 1636 1637 if (object->paging_in_progress != 0 || 1638 object->activity_in_progress != 0) { 1639 /* 1640 * There are still some paging_in_progress references 1641 * on this object, meaning that there are some paging 1642 * or other I/O operations in progress for this VM object. 1643 * Such operations take some paging_in_progress references 1644 * up front to ensure that the object doesn't go away, but 1645 * they may also need to acquire a reference on the VM object, 1646 * to map it in kernel space, for example. That means that 1647 * they may end up releasing the last reference on the VM 1648 * object, triggering its termination, while still holding 1649 * paging_in_progress references. Waiting for these 1650 * pending paging_in_progress references to go away here would 1651 * deadlock. 1652 * 1653 * To avoid deadlocking, we'll let the vm_object_reaper_thread 1654 * complete the VM object termination if it still holds 1655 * paging_in_progress references at this point. 1656 * 1657 * No new paging_in_progress should appear now that the 1658 * VM object is "terminating" and not "alive". 1659 */ 1660 vm_object_reap_async(object); 1661 vm_object_unlock(object); 1662 /* 1663 * Return KERN_FAILURE to let the caller know that we 1664 * haven't completed the termination and it can't drop this 1665 * object's reference on its shadow object yet. 1666 * The reaper thread will take care of that once it has 1667 * completed this object's termination. 1668 */ 1669 return KERN_FAILURE; 1670 } 1671 /* 1672 * complete the VM object termination 1673 */ 1674 vm_object_reap(object); 1675 object = VM_OBJECT_NULL; 1676 1677 /* 1678 * the object lock was released by vm_object_reap() 1679 * 1680 * KERN_SUCCESS means that this object has been terminated 1681 * and no longer needs its shadow object but still holds a 1682 * reference on it. 1683 * The caller is responsible for dropping that reference. 1684 * We can't call vm_object_deallocate() here because that 1685 * would create a recursion. 1686 */ 1687 return KERN_SUCCESS; 1688} 1689 1690 1691/* 1692 * vm_object_reap(): 1693 * 1694 * Complete the termination of a VM object after it's been marked 1695 * as "terminating" and "!alive" by vm_object_terminate(). 1696 * 1697 * The VM object must be locked by caller. 1698 * The lock will be released on return and the VM object is no longer valid. 1699 */ 1700void 1701vm_object_reap( 1702 vm_object_t object) 1703{ 1704 memory_object_t pager; 1705 1706 vm_object_lock_assert_exclusive(object); 1707 assert(object->paging_in_progress == 0); 1708 assert(object->activity_in_progress == 0); 1709 1710 vm_object_reap_count++; 1711 1712 pager = object->pager; 1713 object->pager = MEMORY_OBJECT_NULL; 1714 1715 if (pager != MEMORY_OBJECT_NULL) 1716 memory_object_control_disable(object->pager_control); 1717 1718 object->ref_count--; 1719#if TASK_SWAPPER 1720 assert(object->res_count == 0); 1721#endif /* TASK_SWAPPER */ 1722 1723 assert (object->ref_count == 0); 1724 1725 /* 1726 * remove from purgeable queue if it's on 1727 */ 1728 if (object->internal && (object->objq.next || object->objq.prev)) { 1729 purgeable_q_t queue = vm_purgeable_object_remove(object); 1730 assert(queue); 1731 1732 /* Must take page lock for this - using it to protect token queue */ 1733 vm_page_lock_queues(); 1734 vm_purgeable_token_delete_first(queue); 1735 1736 assert(queue->debug_count_objects>=0); 1737 vm_page_unlock_queues(); 1738 } 1739 1740 /* 1741 * Clean or free the pages, as appropriate. 1742 * It is possible for us to find busy/absent pages, 1743 * if some faults on this object were aborted. 1744 */ 1745 if (object->pageout) { 1746 assert(object->shadow != VM_OBJECT_NULL); 1747 1748 vm_pageout_object_terminate(object); 1749 1750 } else if (((object->temporary && !object->can_persist) || (pager == MEMORY_OBJECT_NULL))) { 1751 1752 vm_object_reap_pages(object, REAP_REAP); 1753 } 1754 assert(queue_empty(&object->memq)); 1755 assert(object->paging_in_progress == 0); 1756 assert(object->activity_in_progress == 0); 1757 assert(object->ref_count == 0); 1758 1759 /* 1760 * If the pager has not already been released by 1761 * vm_object_destroy, we need to terminate it and 1762 * release our reference to it here. 1763 */ 1764 if (pager != MEMORY_OBJECT_NULL) { 1765 vm_object_unlock(object); 1766 vm_object_release_pager(pager, object->hashed); 1767 vm_object_lock(object); 1768 } 1769 1770 /* kick off anyone waiting on terminating */ 1771 object->terminating = FALSE; 1772 vm_object_paging_begin(object); 1773 vm_object_paging_end(object); 1774 vm_object_unlock(object); 1775 1776#if MACH_PAGEMAP 1777 vm_external_destroy(object->existence_map, object->vo_size); 1778#endif /* MACH_PAGEMAP */ 1779 1780 object->shadow = VM_OBJECT_NULL; 1781 1782 vm_object_lock_destroy(object); 1783 /* 1784 * Free the space for the object. 1785 */ 1786 zfree(vm_object_zone, object); 1787 object = VM_OBJECT_NULL; 1788} 1789 1790 1791unsigned int vm_max_batch = 256; 1792 1793#define V_O_R_MAX_BATCH 128 1794 1795#define BATCH_LIMIT(max) (vm_max_batch >= max ? max : vm_max_batch) 1796 1797 1798#define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect) \ 1799 MACRO_BEGIN \ 1800 if (_local_free_q) { \ 1801 if (do_disconnect) { \ 1802 vm_page_t m; \ 1803 for (m = _local_free_q; \ 1804 m != VM_PAGE_NULL; \ 1805 m = (vm_page_t) m->pageq.next) { \ 1806 if (m->pmapped) { \ 1807 pmap_disconnect(m->phys_page); \ 1808 } \ 1809 } \ 1810 } \ 1811 vm_page_free_list(_local_free_q, TRUE); \ 1812 _local_free_q = VM_PAGE_NULL; \ 1813 } \ 1814 MACRO_END 1815 1816 1817void 1818vm_object_reap_pages( 1819 vm_object_t object, 1820 int reap_type) 1821{ 1822 vm_page_t p; 1823 vm_page_t next; 1824 vm_page_t local_free_q = VM_PAGE_NULL; 1825 int loop_count; 1826 boolean_t disconnect_on_release; 1827 1828 if (reap_type == REAP_DATA_FLUSH) { 1829 /* 1830 * We need to disconnect pages from all pmaps before 1831 * releasing them to the free list 1832 */ 1833 disconnect_on_release = TRUE; 1834 } else { 1835 /* 1836 * Either the caller has already disconnected the pages 1837 * from all pmaps, or we disconnect them here as we add 1838 * them to out local list of pages to be released. 1839 * No need to re-disconnect them when we release the pages 1840 * to the free list. 1841 */ 1842 disconnect_on_release = FALSE; 1843 } 1844 1845restart_after_sleep: 1846 if (queue_empty(&object->memq)) 1847 return; 1848 loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH); 1849 1850 vm_page_lockspin_queues(); 1851 1852 next = (vm_page_t)queue_first(&object->memq); 1853 1854 while (!queue_end(&object->memq, (queue_entry_t)next)) { 1855 1856 p = next; 1857 next = (vm_page_t)queue_next(&next->listq); 1858 1859 if (--loop_count == 0) { 1860 1861 vm_page_unlock_queues(); 1862 1863 if (local_free_q) { 1864 /* 1865 * Free the pages we reclaimed so far 1866 * and take a little break to avoid 1867 * hogging the page queue lock too long 1868 */ 1869 VM_OBJ_REAP_FREELIST(local_free_q, 1870 disconnect_on_release); 1871 } else 1872 mutex_pause(0); 1873 1874 loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH); 1875 1876 vm_page_lockspin_queues(); 1877 } 1878 if (reap_type == REAP_DATA_FLUSH || reap_type == REAP_TERMINATE) { 1879 1880 if (p->busy || p->cleaning) { 1881 1882 vm_page_unlock_queues(); 1883 /* 1884 * free the pages reclaimed so far 1885 */ 1886 VM_OBJ_REAP_FREELIST(local_free_q, 1887 disconnect_on_release); 1888 1889 PAGE_SLEEP(object, p, THREAD_UNINT); 1890 1891 goto restart_after_sleep; 1892 } 1893 if (p->laundry) { 1894 p->pageout = FALSE; 1895 1896 vm_pageout_steal_laundry(p, TRUE); 1897 } 1898 } 1899 switch (reap_type) { 1900 1901 case REAP_DATA_FLUSH: 1902 if (VM_PAGE_WIRED(p)) { 1903 /* 1904 * this is an odd case... perhaps we should 1905 * zero-fill this page since we're conceptually 1906 * tossing its data at this point, but leaving 1907 * it on the object to honor the 'wire' contract 1908 */ 1909 continue; 1910 } 1911 break; 1912 1913 case REAP_PURGEABLE: 1914 if (VM_PAGE_WIRED(p)) { 1915 /* 1916 * can't purge a wired page 1917 */ 1918 vm_page_purged_wired++; 1919 continue; 1920 } 1921 if (p->laundry && !p->busy && !p->cleaning) { 1922 p->pageout = FALSE; 1923 1924 vm_pageout_steal_laundry(p, TRUE); 1925 } 1926 if (p->cleaning || p->laundry) { 1927 /* 1928 * page is being acted upon, 1929 * so don't mess with it 1930 */ 1931 vm_page_purged_others++; 1932 continue; 1933 } 1934 if (p->busy) { 1935 /* 1936 * We can't reclaim a busy page but we can 1937 * make it more likely to be paged (it's not wired) to make 1938 * sure that it gets considered by 1939 * vm_pageout_scan() later. 1940 */ 1941 vm_page_deactivate(p); 1942 vm_page_purged_busy++; 1943 continue; 1944 } 1945 1946 assert(p->object != kernel_object); 1947 1948 /* 1949 * we can discard this page... 1950 */ 1951 if (p->pmapped == TRUE) { 1952 int refmod_state; 1953 /* 1954 * unmap the page 1955 */ 1956 refmod_state = pmap_disconnect(p->phys_page); 1957 if (refmod_state & VM_MEM_MODIFIED) { 1958 SET_PAGE_DIRTY(p, FALSE); 1959 } 1960 } 1961 if (p->dirty || p->precious) { 1962 /* 1963 * we saved the cost of cleaning this page ! 1964 */ 1965 vm_page_purged_count++; 1966 } 1967 1968 break; 1969 1970 case REAP_TERMINATE: 1971 if (p->absent || p->private) { 1972 /* 1973 * For private pages, VM_PAGE_FREE just 1974 * leaves the page structure around for 1975 * its owner to clean up. For absent 1976 * pages, the structure is returned to 1977 * the appropriate pool. 1978 */ 1979 break; 1980 } 1981 if (p->fictitious) { 1982 assert (p->phys_page == vm_page_guard_addr); 1983 break; 1984 } 1985 if (!p->dirty && p->wpmapped) 1986 p->dirty = pmap_is_modified(p->phys_page); 1987 1988 if ((p->dirty || p->precious) && !p->error && object->alive) { 1989 1990 if (!p->laundry) { 1991 VM_PAGE_QUEUES_REMOVE(p); 1992 /* 1993 * flush page... page will be freed 1994 * upon completion of I/O 1995 */ 1996 vm_pageout_cluster(p, TRUE); 1997 } 1998 vm_page_unlock_queues(); 1999 /* 2000 * free the pages reclaimed so far 2001 */ 2002 VM_OBJ_REAP_FREELIST(local_free_q, 2003 disconnect_on_release); 2004 2005 vm_object_paging_wait(object, THREAD_UNINT); 2006 2007 goto restart_after_sleep; 2008 } 2009 break; 2010 2011 case REAP_REAP: 2012 break; 2013 } 2014 vm_page_free_prepare_queues(p); 2015 assert(p->pageq.next == NULL && p->pageq.prev == NULL); 2016 /* 2017 * Add this page to our list of reclaimed pages, 2018 * to be freed later. 2019 */ 2020 p->pageq.next = (queue_entry_t) local_free_q; 2021 local_free_q = p; 2022 } 2023 vm_page_unlock_queues(); 2024 2025 /* 2026 * Free the remaining reclaimed pages 2027 */ 2028 VM_OBJ_REAP_FREELIST(local_free_q, 2029 disconnect_on_release); 2030} 2031 2032 2033void 2034vm_object_reap_async( 2035 vm_object_t object) 2036{ 2037 vm_object_lock_assert_exclusive(object); 2038 2039 vm_object_reaper_lock_spin(); 2040 2041 vm_object_reap_count_async++; 2042 2043 /* enqueue the VM object... */ 2044 queue_enter(&vm_object_reaper_queue, object, 2045 vm_object_t, cached_list); 2046 2047 vm_object_reaper_unlock(); 2048 2049 /* ... and wake up the reaper thread */ 2050 thread_wakeup((event_t) &vm_object_reaper_queue); 2051} 2052 2053 2054void 2055vm_object_reaper_thread(void) 2056{ 2057 vm_object_t object, shadow_object; 2058 2059 vm_object_reaper_lock_spin(); 2060 2061 while (!queue_empty(&vm_object_reaper_queue)) { 2062 queue_remove_first(&vm_object_reaper_queue, 2063 object, 2064 vm_object_t, 2065 cached_list); 2066 2067 vm_object_reaper_unlock(); 2068 vm_object_lock(object); 2069 2070 assert(object->terminating); 2071 assert(!object->alive); 2072 2073 /* 2074 * The pageout daemon might be playing with our pages. 2075 * Now that the object is dead, it won't touch any more 2076 * pages, but some pages might already be on their way out. 2077 * Hence, we wait until the active paging activities have 2078 * ceased before we break the association with the pager 2079 * itself. 2080 */ 2081 while (object->paging_in_progress != 0 || 2082 object->activity_in_progress != 0) { 2083 vm_object_wait(object, 2084 VM_OBJECT_EVENT_PAGING_IN_PROGRESS, 2085 THREAD_UNINT); 2086 vm_object_lock(object); 2087 } 2088 2089 shadow_object = 2090 object->pageout ? VM_OBJECT_NULL : object->shadow; 2091 2092 vm_object_reap(object); 2093 /* cache is unlocked and object is no longer valid */ 2094 object = VM_OBJECT_NULL; 2095 2096 if (shadow_object != VM_OBJECT_NULL) { 2097 /* 2098 * Drop the reference "object" was holding on 2099 * its shadow object. 2100 */ 2101 vm_object_deallocate(shadow_object); 2102 shadow_object = VM_OBJECT_NULL; 2103 } 2104 vm_object_reaper_lock_spin(); 2105 } 2106 2107 /* wait for more work... */ 2108 assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT); 2109 2110 vm_object_reaper_unlock(); 2111 2112 thread_block((thread_continue_t) vm_object_reaper_thread); 2113 /*NOTREACHED*/ 2114} 2115 2116/* 2117 * Routine: vm_object_pager_wakeup 2118 * Purpose: Wake up anyone waiting for termination of a pager. 2119 */ 2120 2121static void 2122vm_object_pager_wakeup( 2123 memory_object_t pager) 2124{ 2125 vm_object_hash_entry_t entry; 2126 boolean_t waiting = FALSE; 2127 lck_mtx_t *lck; 2128 2129 /* 2130 * If anyone was waiting for the memory_object_terminate 2131 * to be queued, wake them up now. 2132 */ 2133 lck = vm_object_hash_lock_spin(pager); 2134 entry = vm_object_hash_lookup(pager, TRUE); 2135 if (entry != VM_OBJECT_HASH_ENTRY_NULL) 2136 waiting = entry->waiting; 2137 vm_object_hash_unlock(lck); 2138 2139 if (entry != VM_OBJECT_HASH_ENTRY_NULL) { 2140 if (waiting) 2141 thread_wakeup((event_t) pager); 2142 vm_object_hash_entry_free(entry); 2143 } 2144} 2145 2146/* 2147 * Routine: vm_object_release_pager 2148 * Purpose: Terminate the pager and, upon completion, 2149 * release our last reference to it. 2150 * just like memory_object_terminate, except 2151 * that we wake up anyone blocked in vm_object_enter 2152 * waiting for termination message to be queued 2153 * before calling memory_object_init. 2154 */ 2155static void 2156vm_object_release_pager( 2157 memory_object_t pager, 2158 boolean_t hashed) 2159{ 2160 2161 /* 2162 * Terminate the pager. 2163 */ 2164 2165 (void) memory_object_terminate(pager); 2166 2167 if (hashed == TRUE) { 2168 /* 2169 * Wakeup anyone waiting for this terminate 2170 * and remove the entry from the hash 2171 */ 2172 vm_object_pager_wakeup(pager); 2173 } 2174 /* 2175 * Release reference to pager. 2176 */ 2177 memory_object_deallocate(pager); 2178} 2179 2180/* 2181 * Routine: vm_object_destroy 2182 * Purpose: 2183 * Shut down a VM object, despite the 2184 * presence of address map (or other) references 2185 * to the vm_object. 2186 */ 2187kern_return_t 2188vm_object_destroy( 2189 vm_object_t object, 2190 __unused kern_return_t reason) 2191{ 2192 memory_object_t old_pager; 2193 2194 if (object == VM_OBJECT_NULL) 2195 return(KERN_SUCCESS); 2196 2197 /* 2198 * Remove the pager association immediately. 2199 * 2200 * This will prevent the memory manager from further 2201 * meddling. [If it wanted to flush data or make 2202 * other changes, it should have done so before performing 2203 * the destroy call.] 2204 */ 2205 2206 vm_object_lock(object); 2207 object->can_persist = FALSE; 2208 object->named = FALSE; 2209 object->alive = FALSE; 2210 2211 if (object->hashed) { 2212 lck_mtx_t *lck; 2213 /* 2214 * Rip out the pager from the vm_object now... 2215 */ 2216 lck = vm_object_hash_lock_spin(object->pager); 2217 vm_object_remove(object); 2218 vm_object_hash_unlock(lck); 2219 } 2220 old_pager = object->pager; 2221 object->pager = MEMORY_OBJECT_NULL; 2222 if (old_pager != MEMORY_OBJECT_NULL) 2223 memory_object_control_disable(object->pager_control); 2224 2225 /* 2226 * Wait for the existing paging activity (that got 2227 * through before we nulled out the pager) to subside. 2228 */ 2229 2230 vm_object_paging_wait(object, THREAD_UNINT); 2231 vm_object_unlock(object); 2232 2233 /* 2234 * Terminate the object now. 2235 */ 2236 if (old_pager != MEMORY_OBJECT_NULL) { 2237 vm_object_release_pager(old_pager, object->hashed); 2238 2239 /* 2240 * JMM - Release the caller's reference. This assumes the 2241 * caller had a reference to release, which is a big (but 2242 * currently valid) assumption if this is driven from the 2243 * vnode pager (it is holding a named reference when making 2244 * this call).. 2245 */ 2246 vm_object_deallocate(object); 2247 2248 } 2249 return(KERN_SUCCESS); 2250} 2251 2252 2253#if VM_OBJECT_CACHE 2254 2255#define VM_OBJ_DEACT_ALL_STATS DEBUG 2256#if VM_OBJ_DEACT_ALL_STATS 2257uint32_t vm_object_deactivate_all_pages_batches = 0; 2258uint32_t vm_object_deactivate_all_pages_pages = 0; 2259#endif /* VM_OBJ_DEACT_ALL_STATS */ 2260/* 2261 * vm_object_deactivate_all_pages 2262 * 2263 * Deactivate all pages in the specified object. (Keep its pages 2264 * in memory even though it is no longer referenced.) 2265 * 2266 * The object must be locked. 2267 */ 2268static void 2269vm_object_deactivate_all_pages( 2270 register vm_object_t object) 2271{ 2272 register vm_page_t p; 2273 int loop_count; 2274#if VM_OBJ_DEACT_ALL_STATS 2275 int pages_count; 2276#endif /* VM_OBJ_DEACT_ALL_STATS */ 2277#define V_O_D_A_P_MAX_BATCH 256 2278 2279 loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH); 2280#if VM_OBJ_DEACT_ALL_STATS 2281 pages_count = 0; 2282#endif /* VM_OBJ_DEACT_ALL_STATS */ 2283 vm_page_lock_queues(); 2284 queue_iterate(&object->memq, p, vm_page_t, listq) { 2285 if (--loop_count == 0) { 2286#if VM_OBJ_DEACT_ALL_STATS 2287 hw_atomic_add(&vm_object_deactivate_all_pages_batches, 2288 1); 2289 hw_atomic_add(&vm_object_deactivate_all_pages_pages, 2290 pages_count); 2291 pages_count = 0; 2292#endif /* VM_OBJ_DEACT_ALL_STATS */ 2293 lck_mtx_yield(&vm_page_queue_lock); 2294 loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH); 2295 } 2296 if (!p->busy && !p->throttled) { 2297#if VM_OBJ_DEACT_ALL_STATS 2298 pages_count++; 2299#endif /* VM_OBJ_DEACT_ALL_STATS */ 2300 vm_page_deactivate(p); 2301 } 2302 } 2303#if VM_OBJ_DEACT_ALL_STATS 2304 if (pages_count) { 2305 hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1); 2306 hw_atomic_add(&vm_object_deactivate_all_pages_pages, 2307 pages_count); 2308 pages_count = 0; 2309 } 2310#endif /* VM_OBJ_DEACT_ALL_STATS */ 2311 vm_page_unlock_queues(); 2312} 2313#endif /* VM_OBJECT_CACHE */ 2314 2315 2316 2317/* 2318 * The "chunk" macros are used by routines below when looking for pages to deactivate. These 2319 * exist because of the need to handle shadow chains. When deactivating pages, we only 2320 * want to deactive the ones at the top most level in the object chain. In order to do 2321 * this efficiently, the specified address range is divided up into "chunks" and we use 2322 * a bit map to keep track of which pages have already been processed as we descend down 2323 * the shadow chain. These chunk macros hide the details of the bit map implementation 2324 * as much as we can. 2325 * 2326 * For convenience, we use a 64-bit data type as the bit map, and therefore a chunk is 2327 * set to 64 pages. The bit map is indexed from the low-order end, so that the lowest 2328 * order bit represents page 0 in the current range and highest order bit represents 2329 * page 63. 2330 * 2331 * For further convenience, we also use negative logic for the page state in the bit map. 2332 * The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has 2333 * been processed. This way we can simply test the 64-bit long word to see if it's zero 2334 * to easily tell if the whole range has been processed. Therefore, the bit map starts 2335 * out with all the bits set. The macros below hide all these details from the caller. 2336 */ 2337 2338#define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */ 2339 /* be the same as the number of bits in */ 2340 /* the chunk_state_t type. We use 64 */ 2341 /* just for convenience. */ 2342 2343#define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */ 2344 2345typedef uint64_t chunk_state_t; 2346 2347/* 2348 * The bit map uses negative logic, so we start out with all 64 bits set to indicate 2349 * that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE, 2350 * then we mark pages beyond the len as having been "processed" so that we don't waste time 2351 * looking at pages in that range. This can save us from unnecessarily chasing down the 2352 * shadow chain. 2353 */ 2354 2355#define CHUNK_INIT(c, len) \ 2356 MACRO_BEGIN \ 2357 uint64_t p; \ 2358 \ 2359 (c) = 0xffffffffffffffffLL; \ 2360 \ 2361 for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \ 2362 MARK_PAGE_HANDLED(c, p); \ 2363 MACRO_END 2364 2365 2366/* 2367 * Return true if all pages in the chunk have not yet been processed. 2368 */ 2369 2370#define CHUNK_NOT_COMPLETE(c) ((c) != 0) 2371 2372/* 2373 * Return true if the page at offset 'p' in the bit map has already been handled 2374 * while processing a higher level object in the shadow chain. 2375 */ 2376 2377#define PAGE_ALREADY_HANDLED(c, p) (((c) & (1LL << (p))) == 0) 2378 2379/* 2380 * Mark the page at offset 'p' in the bit map as having been processed. 2381 */ 2382 2383#define MARK_PAGE_HANDLED(c, p) \ 2384MACRO_BEGIN \ 2385 (c) = (c) & ~(1LL << (p)); \ 2386MACRO_END 2387 2388 2389/* 2390 * Return true if the page at the given offset has been paged out. Object is 2391 * locked upon entry and returned locked. 2392 */ 2393 2394static boolean_t 2395page_is_paged_out( 2396 vm_object_t object, 2397 vm_object_offset_t offset) 2398{ 2399 kern_return_t kr; 2400 memory_object_t pager; 2401 2402 /* 2403 * Check the existence map for the page if we have one, otherwise 2404 * ask the pager about this page. 2405 */ 2406 2407#if MACH_PAGEMAP 2408 if (object->existence_map) { 2409 if (vm_external_state_get(object->existence_map, offset) 2410 == VM_EXTERNAL_STATE_EXISTS) { 2411 /* 2412 * We found the page 2413 */ 2414 2415 return TRUE; 2416 } 2417 } else 2418#endif 2419 if (object->internal && 2420 object->alive && 2421 !object->terminating && 2422 object->pager_ready) { 2423 2424 /* 2425 * We're already holding a "paging in progress" reference 2426 * so the object can't disappear when we release the lock. 2427 */ 2428 2429 assert(object->paging_in_progress); 2430 pager = object->pager; 2431 vm_object_unlock(object); 2432 2433 kr = memory_object_data_request( 2434 pager, 2435 offset + object->paging_offset, 2436 0, /* just poke the pager */ 2437 VM_PROT_READ, 2438 NULL); 2439 2440 vm_object_lock(object); 2441 2442 if (kr == KERN_SUCCESS) { 2443 2444 /* 2445 * We found the page 2446 */ 2447 2448 return TRUE; 2449 } 2450 } 2451 2452 return FALSE; 2453} 2454 2455 2456 2457/* 2458 * Deactivate the pages in the specified object and range. If kill_page is set, also discard any 2459 * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify 2460 * a size that is less than or equal to the CHUNK_SIZE. 2461 */ 2462 2463static void 2464deactivate_pages_in_object( 2465 vm_object_t object, 2466 vm_object_offset_t offset, 2467 vm_object_size_t size, 2468 boolean_t kill_page, 2469 boolean_t reusable_page, 2470#if !MACH_ASSERT 2471 __unused 2472#endif 2473 boolean_t all_reusable, 2474 chunk_state_t *chunk_state) 2475{ 2476 vm_page_t m; 2477 int p; 2478 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 2479 struct vm_page_delayed_work *dwp; 2480 int dw_count; 2481 int dw_limit; 2482 unsigned int reusable = 0; 2483 2484 2485 /* 2486 * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the 2487 * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may 2488 * have pages marked as having been processed already. We stop the loop early if we find we've handled 2489 * all the pages in the chunk. 2490 */ 2491 2492 dwp = &dw_array[0]; 2493 dw_count = 0; 2494 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 2495 2496 for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) { 2497 2498 /* 2499 * If this offset has already been found and handled in a higher level object, then don't 2500 * do anything with it in the current shadow object. 2501 */ 2502 2503 if (PAGE_ALREADY_HANDLED(*chunk_state, p)) 2504 continue; 2505 2506 /* 2507 * See if the page at this offset is around. First check to see if the page is resident, 2508 * then if not, check the existence map or with the pager. 2509 */ 2510 2511 if ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { 2512 2513 /* 2514 * We found a page we were looking for. Mark it as "handled" now in the chunk_state 2515 * so that we won't bother looking for a page at this offset again if there are more 2516 * shadow objects. Then deactivate the page. 2517 */ 2518 2519 MARK_PAGE_HANDLED(*chunk_state, p); 2520 2521 if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy) && (!m->laundry)) { 2522 int clear_refmod; 2523 2524 clear_refmod = VM_MEM_REFERENCED; 2525 dwp->dw_mask = DW_clear_reference; 2526 2527 if ((kill_page) && (object->internal)) { 2528 m->precious = FALSE; 2529 m->dirty = FALSE; 2530 2531 clear_refmod |= VM_MEM_MODIFIED; 2532 if (m->throttled) { 2533 /* 2534 * This page is now clean and 2535 * reclaimable. Move it out 2536 * of the throttled queue, so 2537 * that vm_pageout_scan() can 2538 * find it. 2539 */ 2540 dwp->dw_mask |= DW_move_page; 2541 } 2542#if MACH_PAGEMAP 2543 vm_external_state_clr(object->existence_map, offset); 2544#endif /* MACH_PAGEMAP */ 2545 2546 if (reusable_page && !m->reusable) { 2547 assert(!all_reusable); 2548 assert(!object->all_reusable); 2549 m->reusable = TRUE; 2550 object->reusable_page_count++; 2551 assert(object->resident_page_count >= object->reusable_page_count); 2552 reusable++; 2553 } 2554 } 2555 pmap_clear_refmod(m->phys_page, clear_refmod); 2556 2557 if (!m->throttled && !(reusable_page || all_reusable)) 2558 dwp->dw_mask |= DW_move_page; 2559 2560 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count); 2561 2562 if (dw_count >= dw_limit) { 2563 if (reusable) { 2564 OSAddAtomic(reusable, 2565 &vm_page_stats_reusable.reusable_count); 2566 vm_page_stats_reusable.reusable += reusable; 2567 reusable = 0; 2568 } 2569 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 2570 2571 dwp = &dw_array[0]; 2572 dw_count = 0; 2573 } 2574 } 2575 2576 } else { 2577 2578 /* 2579 * The page at this offset isn't memory resident, check to see if it's 2580 * been paged out. If so, mark it as handled so we don't bother looking 2581 * for it in the shadow chain. 2582 */ 2583 2584 if (page_is_paged_out(object, offset)) { 2585 MARK_PAGE_HANDLED(*chunk_state, p); 2586 2587 /* 2588 * If we're killing a non-resident page, then clear the page in the existence 2589 * map so we don't bother paging it back in if it's touched again in the future. 2590 */ 2591 2592 if ((kill_page) && (object->internal)) { 2593#if MACH_PAGEMAP 2594 vm_external_state_clr(object->existence_map, offset); 2595#endif /* MACH_PAGEMAP */ 2596 } 2597 } 2598 } 2599 } 2600 2601 if (reusable) { 2602 OSAddAtomic(reusable, &vm_page_stats_reusable.reusable_count); 2603 vm_page_stats_reusable.reusable += reusable; 2604 reusable = 0; 2605 } 2606 2607 if (dw_count) 2608 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 2609} 2610 2611 2612/* 2613 * Deactive a "chunk" of the given range of the object starting at offset. A "chunk" 2614 * will always be less than or equal to the given size. The total range is divided up 2615 * into chunks for efficiency and performance related to the locks and handling the shadow 2616 * chain. This routine returns how much of the given "size" it actually processed. It's 2617 * up to the caler to loop and keep calling this routine until the entire range they want 2618 * to process has been done. 2619 */ 2620 2621static vm_object_size_t 2622deactivate_a_chunk( 2623 vm_object_t orig_object, 2624 vm_object_offset_t offset, 2625 vm_object_size_t size, 2626 boolean_t kill_page, 2627 boolean_t reusable_page, 2628 boolean_t all_reusable) 2629{ 2630 vm_object_t object; 2631 vm_object_t tmp_object; 2632 vm_object_size_t length; 2633 chunk_state_t chunk_state; 2634 2635 2636 /* 2637 * Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the 2638 * remaining size the caller asked for. 2639 */ 2640 2641 length = MIN(size, CHUNK_SIZE); 2642 2643 /* 2644 * The chunk_state keeps track of which pages we've already processed if there's 2645 * a shadow chain on this object. At this point, we haven't done anything with this 2646 * range of pages yet, so initialize the state to indicate no pages processed yet. 2647 */ 2648 2649 CHUNK_INIT(chunk_state, length); 2650 object = orig_object; 2651 2652 /* 2653 * Start at the top level object and iterate around the loop once for each object 2654 * in the shadow chain. We stop processing early if we've already found all the pages 2655 * in the range. Otherwise we stop when we run out of shadow objects. 2656 */ 2657 2658 while (object && CHUNK_NOT_COMPLETE(chunk_state)) { 2659 vm_object_paging_begin(object); 2660 2661 deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state); 2662 2663 vm_object_paging_end(object); 2664 2665 /* 2666 * We've finished with this object, see if there's a shadow object. If 2667 * there is, update the offset and lock the new object. We also turn off 2668 * kill_page at this point since we only kill pages in the top most object. 2669 */ 2670 2671 tmp_object = object->shadow; 2672 2673 if (tmp_object) { 2674 kill_page = FALSE; 2675 reusable_page = FALSE; 2676 all_reusable = FALSE; 2677 offset += object->vo_shadow_offset; 2678 vm_object_lock(tmp_object); 2679 } 2680 2681 if (object != orig_object) 2682 vm_object_unlock(object); 2683 2684 object = tmp_object; 2685 } 2686 2687 if (object && object != orig_object) 2688 vm_object_unlock(object); 2689 2690 return length; 2691} 2692 2693 2694 2695/* 2696 * Move any resident pages in the specified range to the inactive queue. If kill_page is set, 2697 * we also clear the modified status of the page and "forget" any changes that have been made 2698 * to the page. 2699 */ 2700 2701__private_extern__ void 2702vm_object_deactivate_pages( 2703 vm_object_t object, 2704 vm_object_offset_t offset, 2705 vm_object_size_t size, 2706 boolean_t kill_page, 2707 boolean_t reusable_page) 2708{ 2709 vm_object_size_t length; 2710 boolean_t all_reusable; 2711 2712 /* 2713 * We break the range up into chunks and do one chunk at a time. This is for 2714 * efficiency and performance while handling the shadow chains and the locks. 2715 * The deactivate_a_chunk() function returns how much of the range it processed. 2716 * We keep calling this routine until the given size is exhausted. 2717 */ 2718 2719 2720 all_reusable = FALSE; 2721 if (reusable_page && 2722 object->internal && 2723 object->vo_size != 0 && 2724 object->vo_size == size && 2725 object->reusable_page_count == 0) { 2726 all_reusable = TRUE; 2727 reusable_page = FALSE; 2728 } 2729 2730 if ((reusable_page || all_reusable) && object->all_reusable) { 2731 /* This means MADV_FREE_REUSABLE has been called twice, which 2732 * is probably illegal. */ 2733 return; 2734 } 2735 2736 while (size) { 2737 length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable); 2738 2739 size -= length; 2740 offset += length; 2741 } 2742 2743 if (all_reusable) { 2744 if (!object->all_reusable) { 2745 unsigned int reusable; 2746 2747 object->all_reusable = TRUE; 2748 assert(object->reusable_page_count == 0); 2749 /* update global stats */ 2750 reusable = object->resident_page_count; 2751 OSAddAtomic(reusable, 2752 &vm_page_stats_reusable.reusable_count); 2753 vm_page_stats_reusable.reusable += reusable; 2754 vm_page_stats_reusable.all_reusable_calls++; 2755 } 2756 } else if (reusable_page) { 2757 vm_page_stats_reusable.partial_reusable_calls++; 2758 } 2759} 2760 2761void 2762vm_object_reuse_pages( 2763 vm_object_t object, 2764 vm_object_offset_t start_offset, 2765 vm_object_offset_t end_offset, 2766 boolean_t allow_partial_reuse) 2767{ 2768 vm_object_offset_t cur_offset; 2769 vm_page_t m; 2770 unsigned int reused, reusable; 2771 2772#define VM_OBJECT_REUSE_PAGE(object, m, reused) \ 2773 MACRO_BEGIN \ 2774 if ((m) != VM_PAGE_NULL && \ 2775 (m)->reusable) { \ 2776 assert((object)->reusable_page_count <= \ 2777 (object)->resident_page_count); \ 2778 assert((object)->reusable_page_count > 0); \ 2779 (object)->reusable_page_count--; \ 2780 (m)->reusable = FALSE; \ 2781 (reused)++; \ 2782 } \ 2783 MACRO_END 2784 2785 reused = 0; 2786 reusable = 0; 2787 2788 vm_object_lock_assert_exclusive(object); 2789 2790 if (object->all_reusable) { 2791 assert(object->reusable_page_count == 0); 2792 object->all_reusable = FALSE; 2793 if (end_offset - start_offset == object->vo_size || 2794 !allow_partial_reuse) { 2795 vm_page_stats_reusable.all_reuse_calls++; 2796 reused = object->resident_page_count; 2797 } else { 2798 vm_page_stats_reusable.partial_reuse_calls++; 2799 queue_iterate(&object->memq, m, vm_page_t, listq) { 2800 if (m->offset < start_offset || 2801 m->offset >= end_offset) { 2802 m->reusable = TRUE; 2803 object->reusable_page_count++; 2804 assert(object->resident_page_count >= object->reusable_page_count); 2805 continue; 2806 } else { 2807 assert(!m->reusable); 2808 reused++; 2809 } 2810 } 2811 } 2812 } else if (object->resident_page_count > 2813 ((end_offset - start_offset) >> PAGE_SHIFT)) { 2814 vm_page_stats_reusable.partial_reuse_calls++; 2815 for (cur_offset = start_offset; 2816 cur_offset < end_offset; 2817 cur_offset += PAGE_SIZE_64) { 2818 if (object->reusable_page_count == 0) { 2819 break; 2820 } 2821 m = vm_page_lookup(object, cur_offset); 2822 VM_OBJECT_REUSE_PAGE(object, m, reused); 2823 } 2824 } else { 2825 vm_page_stats_reusable.partial_reuse_calls++; 2826 queue_iterate(&object->memq, m, vm_page_t, listq) { 2827 if (object->reusable_page_count == 0) { 2828 break; 2829 } 2830 if (m->offset < start_offset || 2831 m->offset >= end_offset) { 2832 continue; 2833 } 2834 VM_OBJECT_REUSE_PAGE(object, m, reused); 2835 } 2836 } 2837 2838 /* update global stats */ 2839 OSAddAtomic(reusable-reused, &vm_page_stats_reusable.reusable_count); 2840 vm_page_stats_reusable.reused += reused; 2841 vm_page_stats_reusable.reusable += reusable; 2842} 2843 2844/* 2845 * Routine: vm_object_pmap_protect 2846 * 2847 * Purpose: 2848 * Reduces the permission for all physical 2849 * pages in the specified object range. 2850 * 2851 * If removing write permission only, it is 2852 * sufficient to protect only the pages in 2853 * the top-level object; only those pages may 2854 * have write permission. 2855 * 2856 * If removing all access, we must follow the 2857 * shadow chain from the top-level object to 2858 * remove access to all pages in shadowed objects. 2859 * 2860 * The object must *not* be locked. The object must 2861 * be temporary/internal. 2862 * 2863 * If pmap is not NULL, this routine assumes that 2864 * the only mappings for the pages are in that 2865 * pmap. 2866 */ 2867 2868__private_extern__ void 2869vm_object_pmap_protect( 2870 register vm_object_t object, 2871 register vm_object_offset_t offset, 2872 vm_object_size_t size, 2873 pmap_t pmap, 2874 vm_map_offset_t pmap_start, 2875 vm_prot_t prot) 2876{ 2877 if (object == VM_OBJECT_NULL) 2878 return; 2879 size = vm_object_round_page(size); 2880 offset = vm_object_trunc_page(offset); 2881 2882 vm_object_lock(object); 2883 2884 if (object->phys_contiguous) { 2885 if (pmap != NULL) { 2886 vm_object_unlock(object); 2887 pmap_protect(pmap, pmap_start, pmap_start + size, prot); 2888 } else { 2889 vm_object_offset_t phys_start, phys_end, phys_addr; 2890 2891 phys_start = object->vo_shadow_offset + offset; 2892 phys_end = phys_start + size; 2893 assert(phys_start <= phys_end); 2894 assert(phys_end <= object->vo_shadow_offset + object->vo_size); 2895 vm_object_unlock(object); 2896 2897 for (phys_addr = phys_start; 2898 phys_addr < phys_end; 2899 phys_addr += PAGE_SIZE_64) { 2900 pmap_page_protect((ppnum_t) (phys_addr >> PAGE_SHIFT), prot); 2901 } 2902 } 2903 return; 2904 } 2905 2906 assert(object->internal); 2907 2908 while (TRUE) { 2909 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) { 2910 vm_object_unlock(object); 2911 pmap_protect(pmap, pmap_start, pmap_start + size, prot); 2912 return; 2913 } 2914 2915 /* if we are doing large ranges with respect to resident */ 2916 /* page count then we should interate over pages otherwise */ 2917 /* inverse page look-up will be faster */ 2918 if (ptoa_64(object->resident_page_count / 4) < size) { 2919 vm_page_t p; 2920 vm_object_offset_t end; 2921 2922 end = offset + size; 2923 2924 if (pmap != PMAP_NULL) { 2925 queue_iterate(&object->memq, p, vm_page_t, listq) { 2926 if (!p->fictitious && 2927 (offset <= p->offset) && (p->offset < end)) { 2928 vm_map_offset_t start; 2929 2930 start = pmap_start + p->offset - offset; 2931 pmap_protect(pmap, start, start + PAGE_SIZE_64, prot); 2932 } 2933 } 2934 } else { 2935 queue_iterate(&object->memq, p, vm_page_t, listq) { 2936 if (!p->fictitious && 2937 (offset <= p->offset) && (p->offset < end)) { 2938 2939 pmap_page_protect(p->phys_page, prot); 2940 } 2941 } 2942 } 2943 } else { 2944 vm_page_t p; 2945 vm_object_offset_t end; 2946 vm_object_offset_t target_off; 2947 2948 end = offset + size; 2949 2950 if (pmap != PMAP_NULL) { 2951 for(target_off = offset; 2952 target_off < end; 2953 target_off += PAGE_SIZE) { 2954 p = vm_page_lookup(object, target_off); 2955 if (p != VM_PAGE_NULL) { 2956 vm_object_offset_t start; 2957 start = pmap_start + 2958 (p->offset - offset); 2959 pmap_protect(pmap, start, 2960 start + PAGE_SIZE, prot); 2961 } 2962 } 2963 } else { 2964 for(target_off = offset; 2965 target_off < end; target_off += PAGE_SIZE) { 2966 p = vm_page_lookup(object, target_off); 2967 if (p != VM_PAGE_NULL) { 2968 pmap_page_protect(p->phys_page, prot); 2969 } 2970 } 2971 } 2972 } 2973 2974 if (prot == VM_PROT_NONE) { 2975 /* 2976 * Must follow shadow chain to remove access 2977 * to pages in shadowed objects. 2978 */ 2979 register vm_object_t next_object; 2980 2981 next_object = object->shadow; 2982 if (next_object != VM_OBJECT_NULL) { 2983 offset += object->vo_shadow_offset; 2984 vm_object_lock(next_object); 2985 vm_object_unlock(object); 2986 object = next_object; 2987 } 2988 else { 2989 /* 2990 * End of chain - we are done. 2991 */ 2992 break; 2993 } 2994 } 2995 else { 2996 /* 2997 * Pages in shadowed objects may never have 2998 * write permission - we may stop here. 2999 */ 3000 break; 3001 } 3002 } 3003 3004 vm_object_unlock(object); 3005} 3006 3007/* 3008 * Routine: vm_object_copy_slowly 3009 * 3010 * Description: 3011 * Copy the specified range of the source 3012 * virtual memory object without using 3013 * protection-based optimizations (such 3014 * as copy-on-write). The pages in the 3015 * region are actually copied. 3016 * 3017 * In/out conditions: 3018 * The caller must hold a reference and a lock 3019 * for the source virtual memory object. The source 3020 * object will be returned *unlocked*. 3021 * 3022 * Results: 3023 * If the copy is completed successfully, KERN_SUCCESS is 3024 * returned. If the caller asserted the interruptible 3025 * argument, and an interruption occurred while waiting 3026 * for a user-generated event, MACH_SEND_INTERRUPTED is 3027 * returned. Other values may be returned to indicate 3028 * hard errors during the copy operation. 3029 * 3030 * A new virtual memory object is returned in a 3031 * parameter (_result_object). The contents of this 3032 * new object, starting at a zero offset, are a copy 3033 * of the source memory region. In the event of 3034 * an error, this parameter will contain the value 3035 * VM_OBJECT_NULL. 3036 */ 3037__private_extern__ kern_return_t 3038vm_object_copy_slowly( 3039 register vm_object_t src_object, 3040 vm_object_offset_t src_offset, 3041 vm_object_size_t size, 3042 boolean_t interruptible, 3043 vm_object_t *_result_object) /* OUT */ 3044{ 3045 vm_object_t new_object; 3046 vm_object_offset_t new_offset; 3047 3048 struct vm_object_fault_info fault_info; 3049 3050 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n", 3051 src_object, src_offset, size, 0, 0); 3052 3053 if (size == 0) { 3054 vm_object_unlock(src_object); 3055 *_result_object = VM_OBJECT_NULL; 3056 return(KERN_INVALID_ARGUMENT); 3057 } 3058 3059 /* 3060 * Prevent destruction of the source object while we copy. 3061 */ 3062 3063 vm_object_reference_locked(src_object); 3064 vm_object_unlock(src_object); 3065 3066 /* 3067 * Create a new object to hold the copied pages. 3068 * A few notes: 3069 * We fill the new object starting at offset 0, 3070 * regardless of the input offset. 3071 * We don't bother to lock the new object within 3072 * this routine, since we have the only reference. 3073 */ 3074 3075 new_object = vm_object_allocate(size); 3076 new_offset = 0; 3077 3078 assert(size == trunc_page_64(size)); /* Will the loop terminate? */ 3079 3080 fault_info.interruptible = interruptible; 3081 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; 3082 fault_info.user_tag = 0; 3083 fault_info.lo_offset = src_offset; 3084 fault_info.hi_offset = src_offset + size; 3085 fault_info.no_cache = FALSE; 3086 fault_info.stealth = TRUE; 3087 fault_info.io_sync = FALSE; 3088 fault_info.cs_bypass = FALSE; 3089 fault_info.mark_zf_absent = FALSE; 3090 fault_info.batch_pmap_op = FALSE; 3091 3092 for ( ; 3093 size != 0 ; 3094 src_offset += PAGE_SIZE_64, 3095 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64 3096 ) { 3097 vm_page_t new_page; 3098 vm_fault_return_t result; 3099 3100 vm_object_lock(new_object); 3101 3102 while ((new_page = vm_page_alloc(new_object, new_offset)) 3103 == VM_PAGE_NULL) { 3104 3105 vm_object_unlock(new_object); 3106 3107 if (!vm_page_wait(interruptible)) { 3108 vm_object_deallocate(new_object); 3109 vm_object_deallocate(src_object); 3110 *_result_object = VM_OBJECT_NULL; 3111 return(MACH_SEND_INTERRUPTED); 3112 } 3113 vm_object_lock(new_object); 3114 } 3115 vm_object_unlock(new_object); 3116 3117 do { 3118 vm_prot_t prot = VM_PROT_READ; 3119 vm_page_t _result_page; 3120 vm_page_t top_page; 3121 register 3122 vm_page_t result_page; 3123 kern_return_t error_code; 3124 3125 vm_object_lock(src_object); 3126 vm_object_paging_begin(src_object); 3127 3128 if (size > (vm_size_t) -1) { 3129 /* 32-bit overflow */ 3130 fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE); 3131 } else { 3132 fault_info.cluster_size = (vm_size_t) size; 3133 assert(fault_info.cluster_size == size); 3134 } 3135 3136 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0); 3137 result = vm_fault_page(src_object, src_offset, 3138 VM_PROT_READ, FALSE, 3139 &prot, &_result_page, &top_page, 3140 (int *)0, 3141 &error_code, FALSE, FALSE, &fault_info); 3142 3143 switch(result) { 3144 case VM_FAULT_SUCCESS: 3145 result_page = _result_page; 3146 3147 /* 3148 * Copy the page to the new object. 3149 * 3150 * POLICY DECISION: 3151 * If result_page is clean, 3152 * we could steal it instead 3153 * of copying. 3154 */ 3155 3156 vm_page_copy(result_page, new_page); 3157 vm_object_unlock(result_page->object); 3158 3159 /* 3160 * Let go of both pages (make them 3161 * not busy, perform wakeup, activate). 3162 */ 3163 vm_object_lock(new_object); 3164 SET_PAGE_DIRTY(new_page, FALSE); 3165 PAGE_WAKEUP_DONE(new_page); 3166 vm_object_unlock(new_object); 3167 3168 vm_object_lock(result_page->object); 3169 PAGE_WAKEUP_DONE(result_page); 3170 3171 vm_page_lockspin_queues(); 3172 if (!result_page->active && 3173 !result_page->inactive && 3174 !result_page->throttled) 3175 vm_page_activate(result_page); 3176 vm_page_activate(new_page); 3177 vm_page_unlock_queues(); 3178 3179 /* 3180 * Release paging references and 3181 * top-level placeholder page, if any. 3182 */ 3183 3184 vm_fault_cleanup(result_page->object, 3185 top_page); 3186 3187 break; 3188 3189 case VM_FAULT_RETRY: 3190 break; 3191 3192 case VM_FAULT_MEMORY_SHORTAGE: 3193 if (vm_page_wait(interruptible)) 3194 break; 3195 /* fall thru */ 3196 3197 case VM_FAULT_INTERRUPTED: 3198 vm_object_lock(new_object); 3199 VM_PAGE_FREE(new_page); 3200 vm_object_unlock(new_object); 3201 3202 vm_object_deallocate(new_object); 3203 vm_object_deallocate(src_object); 3204 *_result_object = VM_OBJECT_NULL; 3205 return(MACH_SEND_INTERRUPTED); 3206 3207 case VM_FAULT_SUCCESS_NO_VM_PAGE: 3208 /* success but no VM page: fail */ 3209 vm_object_paging_end(src_object); 3210 vm_object_unlock(src_object); 3211 /*FALLTHROUGH*/ 3212 case VM_FAULT_MEMORY_ERROR: 3213 /* 3214 * A policy choice: 3215 * (a) ignore pages that we can't 3216 * copy 3217 * (b) return the null object if 3218 * any page fails [chosen] 3219 */ 3220 3221 vm_object_lock(new_object); 3222 VM_PAGE_FREE(new_page); 3223 vm_object_unlock(new_object); 3224 3225 vm_object_deallocate(new_object); 3226 vm_object_deallocate(src_object); 3227 *_result_object = VM_OBJECT_NULL; 3228 return(error_code ? error_code: 3229 KERN_MEMORY_ERROR); 3230 3231 default: 3232 panic("vm_object_copy_slowly: unexpected error" 3233 " 0x%x from vm_fault_page()\n", result); 3234 } 3235 } while (result != VM_FAULT_SUCCESS); 3236 } 3237 3238 /* 3239 * Lose the extra reference, and return our object. 3240 */ 3241 vm_object_deallocate(src_object); 3242 *_result_object = new_object; 3243 return(KERN_SUCCESS); 3244} 3245 3246/* 3247 * Routine: vm_object_copy_quickly 3248 * 3249 * Purpose: 3250 * Copy the specified range of the source virtual 3251 * memory object, if it can be done without waiting 3252 * for user-generated events. 3253 * 3254 * Results: 3255 * If the copy is successful, the copy is returned in 3256 * the arguments; otherwise, the arguments are not 3257 * affected. 3258 * 3259 * In/out conditions: 3260 * The object should be unlocked on entry and exit. 3261 */ 3262 3263/*ARGSUSED*/ 3264__private_extern__ boolean_t 3265vm_object_copy_quickly( 3266 vm_object_t *_object, /* INOUT */ 3267 __unused vm_object_offset_t offset, /* IN */ 3268 __unused vm_object_size_t size, /* IN */ 3269 boolean_t *_src_needs_copy, /* OUT */ 3270 boolean_t *_dst_needs_copy) /* OUT */ 3271{ 3272 vm_object_t object = *_object; 3273 memory_object_copy_strategy_t copy_strategy; 3274 3275 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n", 3276 *_object, offset, size, 0, 0); 3277 if (object == VM_OBJECT_NULL) { 3278 *_src_needs_copy = FALSE; 3279 *_dst_needs_copy = FALSE; 3280 return(TRUE); 3281 } 3282 3283 vm_object_lock(object); 3284 3285 copy_strategy = object->copy_strategy; 3286 3287 switch (copy_strategy) { 3288 case MEMORY_OBJECT_COPY_SYMMETRIC: 3289 3290 /* 3291 * Symmetric copy strategy. 3292 * Make another reference to the object. 3293 * Leave object/offset unchanged. 3294 */ 3295 3296 vm_object_reference_locked(object); 3297 object->shadowed = TRUE; 3298 vm_object_unlock(object); 3299 3300 /* 3301 * Both source and destination must make 3302 * shadows, and the source must be made 3303 * read-only if not already. 3304 */ 3305 3306 *_src_needs_copy = TRUE; 3307 *_dst_needs_copy = TRUE; 3308 3309 break; 3310 3311 case MEMORY_OBJECT_COPY_DELAY: 3312 vm_object_unlock(object); 3313 return(FALSE); 3314 3315 default: 3316 vm_object_unlock(object); 3317 return(FALSE); 3318 } 3319 return(TRUE); 3320} 3321 3322static int copy_call_count = 0; 3323static int copy_call_sleep_count = 0; 3324static int copy_call_restart_count = 0; 3325 3326/* 3327 * Routine: vm_object_copy_call [internal] 3328 * 3329 * Description: 3330 * Copy the source object (src_object), using the 3331 * user-managed copy algorithm. 3332 * 3333 * In/out conditions: 3334 * The source object must be locked on entry. It 3335 * will be *unlocked* on exit. 3336 * 3337 * Results: 3338 * If the copy is successful, KERN_SUCCESS is returned. 3339 * A new object that represents the copied virtual 3340 * memory is returned in a parameter (*_result_object). 3341 * If the return value indicates an error, this parameter 3342 * is not valid. 3343 */ 3344static kern_return_t 3345vm_object_copy_call( 3346 vm_object_t src_object, 3347 vm_object_offset_t src_offset, 3348 vm_object_size_t size, 3349 vm_object_t *_result_object) /* OUT */ 3350{ 3351 kern_return_t kr; 3352 vm_object_t copy; 3353 boolean_t check_ready = FALSE; 3354 uint32_t try_failed_count = 0; 3355 3356 /* 3357 * If a copy is already in progress, wait and retry. 3358 * 3359 * XXX 3360 * Consider making this call interruptable, as Mike 3361 * intended it to be. 3362 * 3363 * XXXO 3364 * Need a counter or version or something to allow 3365 * us to use the copy that the currently requesting 3366 * thread is obtaining -- is it worth adding to the 3367 * vm object structure? Depends how common this case it. 3368 */ 3369 copy_call_count++; 3370 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) { 3371 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL, 3372 THREAD_UNINT); 3373 copy_call_restart_count++; 3374 } 3375 3376 /* 3377 * Indicate (for the benefit of memory_object_create_copy) 3378 * that we want a copy for src_object. (Note that we cannot 3379 * do a real assert_wait before calling memory_object_copy, 3380 * so we simply set the flag.) 3381 */ 3382 3383 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL); 3384 vm_object_unlock(src_object); 3385 3386 /* 3387 * Ask the memory manager to give us a memory object 3388 * which represents a copy of the src object. 3389 * The memory manager may give us a memory object 3390 * which we already have, or it may give us a 3391 * new memory object. This memory object will arrive 3392 * via memory_object_create_copy. 3393 */ 3394 3395 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */ 3396 if (kr != KERN_SUCCESS) { 3397 return kr; 3398 } 3399 3400 /* 3401 * Wait for the copy to arrive. 3402 */ 3403 vm_object_lock(src_object); 3404 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) { 3405 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL, 3406 THREAD_UNINT); 3407 copy_call_sleep_count++; 3408 } 3409Retry: 3410 assert(src_object->copy != VM_OBJECT_NULL); 3411 copy = src_object->copy; 3412 if (!vm_object_lock_try(copy)) { 3413 vm_object_unlock(src_object); 3414 3415 try_failed_count++; 3416 mutex_pause(try_failed_count); /* wait a bit */ 3417 3418 vm_object_lock(src_object); 3419 goto Retry; 3420 } 3421 if (copy->vo_size < src_offset+size) 3422 copy->vo_size = src_offset+size; 3423 3424 if (!copy->pager_ready) 3425 check_ready = TRUE; 3426 3427 /* 3428 * Return the copy. 3429 */ 3430 *_result_object = copy; 3431 vm_object_unlock(copy); 3432 vm_object_unlock(src_object); 3433 3434 /* Wait for the copy to be ready. */ 3435 if (check_ready == TRUE) { 3436 vm_object_lock(copy); 3437 while (!copy->pager_ready) { 3438 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT); 3439 } 3440 vm_object_unlock(copy); 3441 } 3442 3443 return KERN_SUCCESS; 3444} 3445 3446static int copy_delayed_lock_collisions = 0; 3447static int copy_delayed_max_collisions = 0; 3448static int copy_delayed_lock_contention = 0; 3449static int copy_delayed_protect_iterate = 0; 3450 3451/* 3452 * Routine: vm_object_copy_delayed [internal] 3453 * 3454 * Description: 3455 * Copy the specified virtual memory object, using 3456 * the asymmetric copy-on-write algorithm. 3457 * 3458 * In/out conditions: 3459 * The src_object must be locked on entry. It will be unlocked 3460 * on exit - so the caller must also hold a reference to it. 3461 * 3462 * This routine will not block waiting for user-generated 3463 * events. It is not interruptible. 3464 */ 3465__private_extern__ vm_object_t 3466vm_object_copy_delayed( 3467 vm_object_t src_object, 3468 vm_object_offset_t src_offset, 3469 vm_object_size_t size, 3470 boolean_t src_object_shared) 3471{ 3472 vm_object_t new_copy = VM_OBJECT_NULL; 3473 vm_object_t old_copy; 3474 vm_page_t p; 3475 vm_object_size_t copy_size = src_offset + size; 3476 3477 3478 int collisions = 0; 3479 /* 3480 * The user-level memory manager wants to see all of the changes 3481 * to this object, but it has promised not to make any changes on 3482 * its own. 3483 * 3484 * Perform an asymmetric copy-on-write, as follows: 3485 * Create a new object, called a "copy object" to hold 3486 * pages modified by the new mapping (i.e., the copy, 3487 * not the original mapping). 3488 * Record the original object as the backing object for 3489 * the copy object. If the original mapping does not 3490 * change a page, it may be used read-only by the copy. 3491 * Record the copy object in the original object. 3492 * When the original mapping causes a page to be modified, 3493 * it must be copied to a new page that is "pushed" to 3494 * the copy object. 3495 * Mark the new mapping (the copy object) copy-on-write. 3496 * This makes the copy object itself read-only, allowing 3497 * it to be reused if the original mapping makes no 3498 * changes, and simplifying the synchronization required 3499 * in the "push" operation described above. 3500 * 3501 * The copy-on-write is said to be assymetric because the original 3502 * object is *not* marked copy-on-write. A copied page is pushed 3503 * to the copy object, regardless which party attempted to modify 3504 * the page. 3505 * 3506 * Repeated asymmetric copy operations may be done. If the 3507 * original object has not been changed since the last copy, its 3508 * copy object can be reused. Otherwise, a new copy object can be 3509 * inserted between the original object and its previous copy 3510 * object. Since any copy object is read-only, this cannot affect 3511 * affect the contents of the previous copy object. 3512 * 3513 * Note that a copy object is higher in the object tree than the 3514 * original object; therefore, use of the copy object recorded in 3515 * the original object must be done carefully, to avoid deadlock. 3516 */ 3517 3518 Retry: 3519 3520 /* 3521 * Wait for paging in progress. 3522 */ 3523 if (!src_object->true_share && 3524 (src_object->paging_in_progress != 0 || 3525 src_object->activity_in_progress != 0)) { 3526 if (src_object_shared == TRUE) { 3527 vm_object_unlock(src_object); 3528 vm_object_lock(src_object); 3529 src_object_shared = FALSE; 3530 goto Retry; 3531 } 3532 vm_object_paging_wait(src_object, THREAD_UNINT); 3533 } 3534 /* 3535 * See whether we can reuse the result of a previous 3536 * copy operation. 3537 */ 3538 3539 old_copy = src_object->copy; 3540 if (old_copy != VM_OBJECT_NULL) { 3541 int lock_granted; 3542 3543 /* 3544 * Try to get the locks (out of order) 3545 */ 3546 if (src_object_shared == TRUE) 3547 lock_granted = vm_object_lock_try_shared(old_copy); 3548 else 3549 lock_granted = vm_object_lock_try(old_copy); 3550 3551 if (!lock_granted) { 3552 vm_object_unlock(src_object); 3553 3554 if (collisions++ == 0) 3555 copy_delayed_lock_contention++; 3556 mutex_pause(collisions); 3557 3558 /* Heisenberg Rules */ 3559 copy_delayed_lock_collisions++; 3560 3561 if (collisions > copy_delayed_max_collisions) 3562 copy_delayed_max_collisions = collisions; 3563 3564 if (src_object_shared == TRUE) 3565 vm_object_lock_shared(src_object); 3566 else 3567 vm_object_lock(src_object); 3568 3569 goto Retry; 3570 } 3571 3572 /* 3573 * Determine whether the old copy object has 3574 * been modified. 3575 */ 3576 3577 if (old_copy->resident_page_count == 0 && 3578 !old_copy->pager_created) { 3579 /* 3580 * It has not been modified. 3581 * 3582 * Return another reference to 3583 * the existing copy-object if 3584 * we can safely grow it (if 3585 * needed). 3586 */ 3587 3588 if (old_copy->vo_size < copy_size) { 3589 if (src_object_shared == TRUE) { 3590 vm_object_unlock(old_copy); 3591 vm_object_unlock(src_object); 3592 3593 vm_object_lock(src_object); 3594 src_object_shared = FALSE; 3595 goto Retry; 3596 } 3597 /* 3598 * We can't perform a delayed copy if any of the 3599 * pages in the extended range are wired (because 3600 * we can't safely take write permission away from 3601 * wired pages). If the pages aren't wired, then 3602 * go ahead and protect them. 3603 */ 3604 copy_delayed_protect_iterate++; 3605 3606 queue_iterate(&src_object->memq, p, vm_page_t, listq) { 3607 if (!p->fictitious && 3608 p->offset >= old_copy->vo_size && 3609 p->offset < copy_size) { 3610 if (VM_PAGE_WIRED(p)) { 3611 vm_object_unlock(old_copy); 3612 vm_object_unlock(src_object); 3613 3614 if (new_copy != VM_OBJECT_NULL) { 3615 vm_object_unlock(new_copy); 3616 vm_object_deallocate(new_copy); 3617 } 3618 3619 return VM_OBJECT_NULL; 3620 } else { 3621 pmap_page_protect(p->phys_page, 3622 (VM_PROT_ALL & ~VM_PROT_WRITE)); 3623 } 3624 } 3625 } 3626 old_copy->vo_size = copy_size; 3627 } 3628 if (src_object_shared == TRUE) 3629 vm_object_reference_shared(old_copy); 3630 else 3631 vm_object_reference_locked(old_copy); 3632 vm_object_unlock(old_copy); 3633 vm_object_unlock(src_object); 3634 3635 if (new_copy != VM_OBJECT_NULL) { 3636 vm_object_unlock(new_copy); 3637 vm_object_deallocate(new_copy); 3638 } 3639 return(old_copy); 3640 } 3641 3642 3643 3644 /* 3645 * Adjust the size argument so that the newly-created 3646 * copy object will be large enough to back either the 3647 * old copy object or the new mapping. 3648 */ 3649 if (old_copy->vo_size > copy_size) 3650 copy_size = old_copy->vo_size; 3651 3652 if (new_copy == VM_OBJECT_NULL) { 3653 vm_object_unlock(old_copy); 3654 vm_object_unlock(src_object); 3655 new_copy = vm_object_allocate(copy_size); 3656 vm_object_lock(src_object); 3657 vm_object_lock(new_copy); 3658 3659 src_object_shared = FALSE; 3660 goto Retry; 3661 } 3662 new_copy->vo_size = copy_size; 3663 3664 /* 3665 * The copy-object is always made large enough to 3666 * completely shadow the original object, since 3667 * it may have several users who want to shadow 3668 * the original object at different points. 3669 */ 3670 3671 assert((old_copy->shadow == src_object) && 3672 (old_copy->vo_shadow_offset == (vm_object_offset_t) 0)); 3673 3674 } else if (new_copy == VM_OBJECT_NULL) { 3675 vm_object_unlock(src_object); 3676 new_copy = vm_object_allocate(copy_size); 3677 vm_object_lock(src_object); 3678 vm_object_lock(new_copy); 3679 3680 src_object_shared = FALSE; 3681 goto Retry; 3682 } 3683 3684 /* 3685 * We now have the src object locked, and the new copy object 3686 * allocated and locked (and potentially the old copy locked). 3687 * Before we go any further, make sure we can still perform 3688 * a delayed copy, as the situation may have changed. 3689 * 3690 * Specifically, we can't perform a delayed copy if any of the 3691 * pages in the range are wired (because we can't safely take 3692 * write permission away from wired pages). If the pages aren't 3693 * wired, then go ahead and protect them. 3694 */ 3695 copy_delayed_protect_iterate++; 3696 3697 queue_iterate(&src_object->memq, p, vm_page_t, listq) { 3698 if (!p->fictitious && p->offset < copy_size) { 3699 if (VM_PAGE_WIRED(p)) { 3700 if (old_copy) 3701 vm_object_unlock(old_copy); 3702 vm_object_unlock(src_object); 3703 vm_object_unlock(new_copy); 3704 vm_object_deallocate(new_copy); 3705 return VM_OBJECT_NULL; 3706 } else { 3707 pmap_page_protect(p->phys_page, 3708 (VM_PROT_ALL & ~VM_PROT_WRITE)); 3709 } 3710 } 3711 } 3712 if (old_copy != VM_OBJECT_NULL) { 3713 /* 3714 * Make the old copy-object shadow the new one. 3715 * It will receive no more pages from the original 3716 * object. 3717 */ 3718 3719 /* remove ref. from old_copy */ 3720 vm_object_lock_assert_exclusive(src_object); 3721 src_object->ref_count--; 3722 assert(src_object->ref_count > 0); 3723 vm_object_lock_assert_exclusive(old_copy); 3724 old_copy->shadow = new_copy; 3725 vm_object_lock_assert_exclusive(new_copy); 3726 assert(new_copy->ref_count > 0); 3727 new_copy->ref_count++; /* for old_copy->shadow ref. */ 3728 3729#if TASK_SWAPPER 3730 if (old_copy->res_count) { 3731 VM_OBJ_RES_INCR(new_copy); 3732 VM_OBJ_RES_DECR(src_object); 3733 } 3734#endif 3735 3736 vm_object_unlock(old_copy); /* done with old_copy */ 3737 } 3738 3739 /* 3740 * Point the new copy at the existing object. 3741 */ 3742 vm_object_lock_assert_exclusive(new_copy); 3743 new_copy->shadow = src_object; 3744 new_copy->vo_shadow_offset = 0; 3745 new_copy->shadowed = TRUE; /* caller must set needs_copy */ 3746 3747 vm_object_lock_assert_exclusive(src_object); 3748 vm_object_reference_locked(src_object); 3749 src_object->copy = new_copy; 3750 vm_object_unlock(src_object); 3751 vm_object_unlock(new_copy); 3752 3753 XPR(XPR_VM_OBJECT, 3754 "vm_object_copy_delayed: used copy object %X for source %X\n", 3755 new_copy, src_object, 0, 0, 0); 3756 3757 return new_copy; 3758} 3759 3760/* 3761 * Routine: vm_object_copy_strategically 3762 * 3763 * Purpose: 3764 * Perform a copy according to the source object's 3765 * declared strategy. This operation may block, 3766 * and may be interrupted. 3767 */ 3768__private_extern__ kern_return_t 3769vm_object_copy_strategically( 3770 register vm_object_t src_object, 3771 vm_object_offset_t src_offset, 3772 vm_object_size_t size, 3773 vm_object_t *dst_object, /* OUT */ 3774 vm_object_offset_t *dst_offset, /* OUT */ 3775 boolean_t *dst_needs_copy) /* OUT */ 3776{ 3777 boolean_t result; 3778 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */ 3779 boolean_t object_lock_shared = FALSE; 3780 memory_object_copy_strategy_t copy_strategy; 3781 3782 assert(src_object != VM_OBJECT_NULL); 3783 3784 copy_strategy = src_object->copy_strategy; 3785 3786 if (copy_strategy == MEMORY_OBJECT_COPY_DELAY) { 3787 vm_object_lock_shared(src_object); 3788 object_lock_shared = TRUE; 3789 } else 3790 vm_object_lock(src_object); 3791 3792 /* 3793 * The copy strategy is only valid if the memory manager 3794 * is "ready". Internal objects are always ready. 3795 */ 3796 3797 while (!src_object->internal && !src_object->pager_ready) { 3798 wait_result_t wait_result; 3799 3800 if (object_lock_shared == TRUE) { 3801 vm_object_unlock(src_object); 3802 vm_object_lock(src_object); 3803 object_lock_shared = FALSE; 3804 continue; 3805 } 3806 wait_result = vm_object_sleep( src_object, 3807 VM_OBJECT_EVENT_PAGER_READY, 3808 interruptible); 3809 if (wait_result != THREAD_AWAKENED) { 3810 vm_object_unlock(src_object); 3811 *dst_object = VM_OBJECT_NULL; 3812 *dst_offset = 0; 3813 *dst_needs_copy = FALSE; 3814 return(MACH_SEND_INTERRUPTED); 3815 } 3816 } 3817 3818 /* 3819 * Use the appropriate copy strategy. 3820 */ 3821 3822 switch (copy_strategy) { 3823 case MEMORY_OBJECT_COPY_DELAY: 3824 *dst_object = vm_object_copy_delayed(src_object, 3825 src_offset, size, object_lock_shared); 3826 if (*dst_object != VM_OBJECT_NULL) { 3827 *dst_offset = src_offset; 3828 *dst_needs_copy = TRUE; 3829 result = KERN_SUCCESS; 3830 break; 3831 } 3832 vm_object_lock(src_object); 3833 /* fall thru when delayed copy not allowed */ 3834 3835 case MEMORY_OBJECT_COPY_NONE: 3836 result = vm_object_copy_slowly(src_object, src_offset, size, 3837 interruptible, dst_object); 3838 if (result == KERN_SUCCESS) { 3839 *dst_offset = 0; 3840 *dst_needs_copy = FALSE; 3841 } 3842 break; 3843 3844 case MEMORY_OBJECT_COPY_CALL: 3845 result = vm_object_copy_call(src_object, src_offset, size, 3846 dst_object); 3847 if (result == KERN_SUCCESS) { 3848 *dst_offset = src_offset; 3849 *dst_needs_copy = TRUE; 3850 } 3851 break; 3852 3853 case MEMORY_OBJECT_COPY_SYMMETRIC: 3854 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n", src_object, src_offset, size, 0, 0); 3855 vm_object_unlock(src_object); 3856 result = KERN_MEMORY_RESTART_COPY; 3857 break; 3858 3859 default: 3860 panic("copy_strategically: bad strategy"); 3861 result = KERN_INVALID_ARGUMENT; 3862 } 3863 return(result); 3864} 3865 3866/* 3867 * vm_object_shadow: 3868 * 3869 * Create a new object which is backed by the 3870 * specified existing object range. The source 3871 * object reference is deallocated. 3872 * 3873 * The new object and offset into that object 3874 * are returned in the source parameters. 3875 */ 3876boolean_t vm_object_shadow_check = TRUE; 3877 3878__private_extern__ boolean_t 3879vm_object_shadow( 3880 vm_object_t *object, /* IN/OUT */ 3881 vm_object_offset_t *offset, /* IN/OUT */ 3882 vm_object_size_t length) 3883{ 3884 register vm_object_t source; 3885 register vm_object_t result; 3886 3887 source = *object; 3888 assert(source != VM_OBJECT_NULL); 3889 if (source == VM_OBJECT_NULL) 3890 return FALSE; 3891 3892#if 0 3893 /* 3894 * XXX FBDP 3895 * This assertion is valid but it gets triggered by Rosetta for example 3896 * due to a combination of vm_remap() that changes a VM object's 3897 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY) 3898 * that then sets "needs_copy" on its map entry. This creates a 3899 * mapping situation that VM should never see and doesn't know how to 3900 * handle. 3901 * It's not clear if this can create any real problem but we should 3902 * look into fixing this, probably by having vm_protect(VM_PROT_COPY) 3903 * do more than just set "needs_copy" to handle the copy-on-write... 3904 * In the meantime, let's disable the assertion. 3905 */ 3906 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC); 3907#endif 3908 3909 /* 3910 * Determine if we really need a shadow. 3911 * 3912 * If the source object is larger than what we are trying 3913 * to create, then force the shadow creation even if the 3914 * ref count is 1. This will allow us to [potentially] 3915 * collapse the underlying object away in the future 3916 * (freeing up the extra data it might contain and that 3917 * we don't need). 3918 */ 3919 if (vm_object_shadow_check && 3920 source->vo_size == length && 3921 source->ref_count == 1 && 3922 (source->shadow == VM_OBJECT_NULL || 3923 source->shadow->copy == VM_OBJECT_NULL) ) 3924 { 3925 source->shadowed = FALSE; 3926 return FALSE; 3927 } 3928 3929 /* 3930 * Allocate a new object with the given length 3931 */ 3932 3933 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL) 3934 panic("vm_object_shadow: no object for shadowing"); 3935 3936 /* 3937 * The new object shadows the source object, adding 3938 * a reference to it. Our caller changes his reference 3939 * to point to the new object, removing a reference to 3940 * the source object. Net result: no change of reference 3941 * count. 3942 */ 3943 result->shadow = source; 3944 3945 /* 3946 * Store the offset into the source object, 3947 * and fix up the offset into the new object. 3948 */ 3949 3950 result->vo_shadow_offset = *offset; 3951 3952 /* 3953 * Return the new things 3954 */ 3955 3956 *offset = 0; 3957 *object = result; 3958 return TRUE; 3959} 3960 3961/* 3962 * The relationship between vm_object structures and 3963 * the memory_object requires careful synchronization. 3964 * 3965 * All associations are created by memory_object_create_named 3966 * for external pagers and vm_object_pager_create for internal 3967 * objects as follows: 3968 * 3969 * pager: the memory_object itself, supplied by 3970 * the user requesting a mapping (or the kernel, 3971 * when initializing internal objects); the 3972 * kernel simulates holding send rights by keeping 3973 * a port reference; 3974 * 3975 * pager_request: 3976 * the memory object control port, 3977 * created by the kernel; the kernel holds 3978 * receive (and ownership) rights to this 3979 * port, but no other references. 3980 * 3981 * When initialization is complete, the "initialized" field 3982 * is asserted. Other mappings using a particular memory object, 3983 * and any references to the vm_object gained through the 3984 * port association must wait for this initialization to occur. 3985 * 3986 * In order to allow the memory manager to set attributes before 3987 * requests (notably virtual copy operations, but also data or 3988 * unlock requests) are made, a "ready" attribute is made available. 3989 * Only the memory manager may affect the value of this attribute. 3990 * Its value does not affect critical kernel functions, such as 3991 * internal object initialization or destruction. [Furthermore, 3992 * memory objects created by the kernel are assumed to be ready 3993 * immediately; the default memory manager need not explicitly 3994 * set the "ready" attribute.] 3995 * 3996 * [Both the "initialized" and "ready" attribute wait conditions 3997 * use the "pager" field as the wait event.] 3998 * 3999 * The port associations can be broken down by any of the 4000 * following routines: 4001 * vm_object_terminate: 4002 * No references to the vm_object remain, and 4003 * the object cannot (or will not) be cached. 4004 * This is the normal case, and is done even 4005 * though one of the other cases has already been 4006 * done. 4007 * memory_object_destroy: 4008 * The memory manager has requested that the 4009 * kernel relinquish references to the memory 4010 * object. [The memory manager may not want to 4011 * destroy the memory object, but may wish to 4012 * refuse or tear down existing memory mappings.] 4013 * 4014 * Each routine that breaks an association must break all of 4015 * them at once. At some later time, that routine must clear 4016 * the pager field and release the memory object references. 4017 * [Furthermore, each routine must cope with the simultaneous 4018 * or previous operations of the others.] 4019 * 4020 * In addition to the lock on the object, the vm_object_hash_lock 4021 * governs the associations. References gained through the 4022 * association require use of the hash lock. 4023 * 4024 * Because the pager field may be cleared spontaneously, it 4025 * cannot be used to determine whether a memory object has 4026 * ever been associated with a particular vm_object. [This 4027 * knowledge is important to the shadow object mechanism.] 4028 * For this reason, an additional "created" attribute is 4029 * provided. 4030 * 4031 * During various paging operations, the pager reference found in the 4032 * vm_object must be valid. To prevent this from being released, 4033 * (other than being removed, i.e., made null), routines may use 4034 * the vm_object_paging_begin/end routines [actually, macros]. 4035 * The implementation uses the "paging_in_progress" and "wanted" fields. 4036 * [Operations that alter the validity of the pager values include the 4037 * termination routines and vm_object_collapse.] 4038 */ 4039 4040 4041/* 4042 * Routine: vm_object_enter 4043 * Purpose: 4044 * Find a VM object corresponding to the given 4045 * pager; if no such object exists, create one, 4046 * and initialize the pager. 4047 */ 4048vm_object_t 4049vm_object_enter( 4050 memory_object_t pager, 4051 vm_object_size_t size, 4052 boolean_t internal, 4053 boolean_t init, 4054 boolean_t named) 4055{ 4056 register vm_object_t object; 4057 vm_object_t new_object; 4058 boolean_t must_init; 4059 vm_object_hash_entry_t entry, new_entry; 4060 uint32_t try_failed_count = 0; 4061 lck_mtx_t *lck; 4062 4063 if (pager == MEMORY_OBJECT_NULL) 4064 return(vm_object_allocate(size)); 4065 4066 new_object = VM_OBJECT_NULL; 4067 new_entry = VM_OBJECT_HASH_ENTRY_NULL; 4068 must_init = init; 4069 4070 /* 4071 * Look for an object associated with this port. 4072 */ 4073Retry: 4074 lck = vm_object_hash_lock_spin(pager); 4075 do { 4076 entry = vm_object_hash_lookup(pager, FALSE); 4077 4078 if (entry == VM_OBJECT_HASH_ENTRY_NULL) { 4079 if (new_object == VM_OBJECT_NULL) { 4080 /* 4081 * We must unlock to create a new object; 4082 * if we do so, we must try the lookup again. 4083 */ 4084 vm_object_hash_unlock(lck); 4085 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL); 4086 new_entry = vm_object_hash_entry_alloc(pager); 4087 new_object = vm_object_allocate(size); 4088 lck = vm_object_hash_lock_spin(pager); 4089 } else { 4090 /* 4091 * Lookup failed twice, and we have something 4092 * to insert; set the object. 4093 */ 4094 vm_object_hash_insert(new_entry, new_object); 4095 entry = new_entry; 4096 new_entry = VM_OBJECT_HASH_ENTRY_NULL; 4097 new_object = VM_OBJECT_NULL; 4098 must_init = TRUE; 4099 } 4100 } else if (entry->object == VM_OBJECT_NULL) { 4101 /* 4102 * If a previous object is being terminated, 4103 * we must wait for the termination message 4104 * to be queued (and lookup the entry again). 4105 */ 4106 entry->waiting = TRUE; 4107 entry = VM_OBJECT_HASH_ENTRY_NULL; 4108 assert_wait((event_t) pager, THREAD_UNINT); 4109 vm_object_hash_unlock(lck); 4110 4111 thread_block(THREAD_CONTINUE_NULL); 4112 lck = vm_object_hash_lock_spin(pager); 4113 } 4114 } while (entry == VM_OBJECT_HASH_ENTRY_NULL); 4115 4116 object = entry->object; 4117 assert(object != VM_OBJECT_NULL); 4118 4119 if (!must_init) { 4120 if ( !vm_object_lock_try(object)) { 4121 4122 vm_object_hash_unlock(lck); 4123 4124 try_failed_count++; 4125 mutex_pause(try_failed_count); /* wait a bit */ 4126 goto Retry; 4127 } 4128 assert(!internal || object->internal); 4129#if VM_OBJECT_CACHE 4130 if (object->ref_count == 0) { 4131 if ( !vm_object_cache_lock_try()) { 4132 4133 vm_object_hash_unlock(lck); 4134 vm_object_unlock(object); 4135 4136 try_failed_count++; 4137 mutex_pause(try_failed_count); /* wait a bit */ 4138 goto Retry; 4139 } 4140 XPR(XPR_VM_OBJECT_CACHE, 4141 "vm_object_enter: removing %x from cache, head (%x, %x)\n", 4142 object, 4143 vm_object_cached_list.next, 4144 vm_object_cached_list.prev, 0,0); 4145 queue_remove(&vm_object_cached_list, object, 4146 vm_object_t, cached_list); 4147 vm_object_cached_count--; 4148 4149 vm_object_cache_unlock(); 4150 } 4151#endif 4152 if (named) { 4153 assert(!object->named); 4154 object->named = TRUE; 4155 } 4156 vm_object_lock_assert_exclusive(object); 4157 object->ref_count++; 4158 vm_object_res_reference(object); 4159 4160 vm_object_hash_unlock(lck); 4161 vm_object_unlock(object); 4162 4163 VM_STAT_INCR(hits); 4164 } else 4165 vm_object_hash_unlock(lck); 4166 4167 assert(object->ref_count > 0); 4168 4169 VM_STAT_INCR(lookups); 4170 4171 XPR(XPR_VM_OBJECT, 4172 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n", 4173 pager, object, must_init, 0, 0); 4174 4175 /* 4176 * If we raced to create a vm_object but lost, let's 4177 * throw away ours. 4178 */ 4179 4180 if (new_object != VM_OBJECT_NULL) 4181 vm_object_deallocate(new_object); 4182 4183 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL) 4184 vm_object_hash_entry_free(new_entry); 4185 4186 if (must_init) { 4187 memory_object_control_t control; 4188 4189 /* 4190 * Allocate request port. 4191 */ 4192 4193 control = memory_object_control_allocate(object); 4194 assert (control != MEMORY_OBJECT_CONTROL_NULL); 4195 4196 vm_object_lock(object); 4197 assert(object != kernel_object); 4198 4199 /* 4200 * Copy the reference we were given. 4201 */ 4202 4203 memory_object_reference(pager); 4204 object->pager_created = TRUE; 4205 object->pager = pager; 4206 object->internal = internal; 4207 object->pager_trusted = internal; 4208 if (!internal) { 4209 /* copy strategy invalid until set by memory manager */ 4210 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID; 4211 } 4212 object->pager_control = control; 4213 object->pager_ready = FALSE; 4214 4215 vm_object_unlock(object); 4216 4217 /* 4218 * Let the pager know we're using it. 4219 */ 4220 4221 (void) memory_object_init(pager, 4222 object->pager_control, 4223 PAGE_SIZE); 4224 4225 vm_object_lock(object); 4226 if (named) 4227 object->named = TRUE; 4228 if (internal) { 4229 object->pager_ready = TRUE; 4230 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY); 4231 } 4232 4233 object->pager_initialized = TRUE; 4234 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED); 4235 } else { 4236 vm_object_lock(object); 4237 } 4238 4239 /* 4240 * [At this point, the object must be locked] 4241 */ 4242 4243 /* 4244 * Wait for the work above to be done by the first 4245 * thread to map this object. 4246 */ 4247 4248 while (!object->pager_initialized) { 4249 vm_object_sleep(object, 4250 VM_OBJECT_EVENT_INITIALIZED, 4251 THREAD_UNINT); 4252 } 4253 vm_object_unlock(object); 4254 4255 XPR(XPR_VM_OBJECT, 4256 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n", 4257 object, object->pager, internal, 0,0); 4258 return(object); 4259} 4260 4261/* 4262 * Routine: vm_object_pager_create 4263 * Purpose: 4264 * Create a memory object for an internal object. 4265 * In/out conditions: 4266 * The object is locked on entry and exit; 4267 * it may be unlocked within this call. 4268 * Limitations: 4269 * Only one thread may be performing a 4270 * vm_object_pager_create on an object at 4271 * a time. Presumably, only the pageout 4272 * daemon will be using this routine. 4273 */ 4274 4275void 4276vm_object_pager_create( 4277 register vm_object_t object) 4278{ 4279 memory_object_t pager; 4280 vm_object_hash_entry_t entry; 4281 lck_mtx_t *lck; 4282#if MACH_PAGEMAP 4283 vm_object_size_t size; 4284 vm_external_map_t map; 4285#endif /* MACH_PAGEMAP */ 4286 4287 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n", 4288 object, 0,0,0,0); 4289 4290 assert(object != kernel_object); 4291 4292 if (memory_manager_default_check() != KERN_SUCCESS) 4293 return; 4294 4295 /* 4296 * Prevent collapse or termination by holding a paging reference 4297 */ 4298 4299 vm_object_paging_begin(object); 4300 if (object->pager_created) { 4301 /* 4302 * Someone else got to it first... 4303 * wait for them to finish initializing the ports 4304 */ 4305 while (!object->pager_initialized) { 4306 vm_object_sleep(object, 4307 VM_OBJECT_EVENT_INITIALIZED, 4308 THREAD_UNINT); 4309 } 4310 vm_object_paging_end(object); 4311 return; 4312 } 4313 4314 /* 4315 * Indicate that a memory object has been assigned 4316 * before dropping the lock, to prevent a race. 4317 */ 4318 4319 object->pager_created = TRUE; 4320 object->paging_offset = 0; 4321 4322#if MACH_PAGEMAP 4323 size = object->vo_size; 4324#endif /* MACH_PAGEMAP */ 4325 vm_object_unlock(object); 4326 4327#if MACH_PAGEMAP 4328 map = vm_external_create(size); 4329 vm_object_lock(object); 4330 assert(object->vo_size == size); 4331 object->existence_map = map; 4332 vm_object_unlock(object); 4333#endif /* MACH_PAGEMAP */ 4334 4335 if ((uint32_t) object->vo_size != object->vo_size) { 4336 panic("vm_object_pager_create(): object size 0x%llx >= 4GB\n", 4337 (uint64_t) object->vo_size); 4338 } 4339 4340 /* 4341 * Create the [internal] pager, and associate it with this object. 4342 * 4343 * We make the association here so that vm_object_enter() 4344 * can look up the object to complete initializing it. No 4345 * user will ever map this object. 4346 */ 4347 { 4348 memory_object_default_t dmm; 4349 4350 /* acquire a reference for the default memory manager */ 4351 dmm = memory_manager_default_reference(); 4352 4353 assert(object->temporary); 4354 4355 /* create our new memory object */ 4356 assert((vm_size_t) object->vo_size == object->vo_size); 4357 (void) memory_object_create(dmm, (vm_size_t) object->vo_size, 4358 &pager); 4359 4360 memory_object_default_deallocate(dmm); 4361 } 4362 4363 entry = vm_object_hash_entry_alloc(pager); 4364 4365 lck = vm_object_hash_lock_spin(pager); 4366 vm_object_hash_insert(entry, object); 4367 vm_object_hash_unlock(lck); 4368 4369 /* 4370 * A reference was returned by 4371 * memory_object_create(), and it is 4372 * copied by vm_object_enter(). 4373 */ 4374 4375 if (vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE) != object) 4376 panic("vm_object_pager_create: mismatch"); 4377 4378 /* 4379 * Drop the reference we were passed. 4380 */ 4381 memory_object_deallocate(pager); 4382 4383 vm_object_lock(object); 4384 4385 /* 4386 * Release the paging reference 4387 */ 4388 vm_object_paging_end(object); 4389} 4390 4391/* 4392 * Routine: vm_object_remove 4393 * Purpose: 4394 * Eliminate the pager/object association 4395 * for this pager. 4396 * Conditions: 4397 * The object cache must be locked. 4398 */ 4399__private_extern__ void 4400vm_object_remove( 4401 vm_object_t object) 4402{ 4403 memory_object_t pager; 4404 4405 if ((pager = object->pager) != MEMORY_OBJECT_NULL) { 4406 vm_object_hash_entry_t entry; 4407 4408 entry = vm_object_hash_lookup(pager, FALSE); 4409 if (entry != VM_OBJECT_HASH_ENTRY_NULL) 4410 entry->object = VM_OBJECT_NULL; 4411 } 4412 4413} 4414 4415/* 4416 * Global variables for vm_object_collapse(): 4417 * 4418 * Counts for normal collapses and bypasses. 4419 * Debugging variables, to watch or disable collapse. 4420 */ 4421static long object_collapses = 0; 4422static long object_bypasses = 0; 4423 4424static boolean_t vm_object_collapse_allowed = TRUE; 4425static boolean_t vm_object_bypass_allowed = TRUE; 4426 4427#if MACH_PAGEMAP 4428static int vm_external_discarded; 4429static int vm_external_collapsed; 4430#endif 4431 4432unsigned long vm_object_collapse_encrypted = 0; 4433 4434/* 4435 * Routine: vm_object_do_collapse 4436 * Purpose: 4437 * Collapse an object with the object backing it. 4438 * Pages in the backing object are moved into the 4439 * parent, and the backing object is deallocated. 4440 * Conditions: 4441 * Both objects and the cache are locked; the page 4442 * queues are unlocked. 4443 * 4444 */ 4445static void 4446vm_object_do_collapse( 4447 vm_object_t object, 4448 vm_object_t backing_object) 4449{ 4450 vm_page_t p, pp; 4451 vm_object_offset_t new_offset, backing_offset; 4452 vm_object_size_t size; 4453 4454 vm_object_lock_assert_exclusive(object); 4455 vm_object_lock_assert_exclusive(backing_object); 4456 4457 backing_offset = object->vo_shadow_offset; 4458 size = object->vo_size; 4459 4460 /* 4461 * Move all in-memory pages from backing_object 4462 * to the parent. Pages that have been paged out 4463 * will be overwritten by any of the parent's 4464 * pages that shadow them. 4465 */ 4466 4467 while (!queue_empty(&backing_object->memq)) { 4468 4469 p = (vm_page_t) queue_first(&backing_object->memq); 4470 4471 new_offset = (p->offset - backing_offset); 4472 4473 assert(!p->busy || p->absent); 4474 4475 /* 4476 * If the parent has a page here, or if 4477 * this page falls outside the parent, 4478 * dispose of it. 4479 * 4480 * Otherwise, move it as planned. 4481 */ 4482 4483 if (p->offset < backing_offset || new_offset >= size) { 4484 VM_PAGE_FREE(p); 4485 } else { 4486 /* 4487 * ENCRYPTED SWAP: 4488 * The encryption key includes the "pager" and the 4489 * "paging_offset". These will not change during the 4490 * object collapse, so we can just move an encrypted 4491 * page from one object to the other in this case. 4492 * We can't decrypt the page here, since we can't drop 4493 * the object lock. 4494 */ 4495 if (p->encrypted) { 4496 vm_object_collapse_encrypted++; 4497 } 4498 pp = vm_page_lookup(object, new_offset); 4499 if (pp == VM_PAGE_NULL) { 4500 4501 /* 4502 * Parent now has no page. 4503 * Move the backing object's page up. 4504 */ 4505 4506 vm_page_rename(p, object, new_offset, TRUE); 4507#if MACH_PAGEMAP 4508 } else if (pp->absent) { 4509 4510 /* 4511 * Parent has an absent page... 4512 * it's not being paged in, so 4513 * it must really be missing from 4514 * the parent. 4515 * 4516 * Throw out the absent page... 4517 * any faults looking for that 4518 * page will restart with the new 4519 * one. 4520 */ 4521 4522 VM_PAGE_FREE(pp); 4523 vm_page_rename(p, object, new_offset, TRUE); 4524#endif /* MACH_PAGEMAP */ 4525 } else { 4526 assert(! pp->absent); 4527 4528 /* 4529 * Parent object has a real page. 4530 * Throw away the backing object's 4531 * page. 4532 */ 4533 VM_PAGE_FREE(p); 4534 } 4535 } 4536 } 4537 4538#if !MACH_PAGEMAP 4539 assert((!object->pager_created && (object->pager == MEMORY_OBJECT_NULL)) 4540 || (!backing_object->pager_created 4541 && (backing_object->pager == MEMORY_OBJECT_NULL))); 4542#else 4543 assert(!object->pager_created && object->pager == MEMORY_OBJECT_NULL); 4544#endif /* !MACH_PAGEMAP */ 4545 4546 if (backing_object->pager != MEMORY_OBJECT_NULL) { 4547 vm_object_hash_entry_t entry; 4548 4549 /* 4550 * Move the pager from backing_object to object. 4551 * 4552 * XXX We're only using part of the paging space 4553 * for keeps now... we ought to discard the 4554 * unused portion. 4555 */ 4556 4557 assert(!object->paging_in_progress); 4558 assert(!object->activity_in_progress); 4559 object->pager = backing_object->pager; 4560 4561 if (backing_object->hashed) { 4562 lck_mtx_t *lck; 4563 4564 lck = vm_object_hash_lock_spin(backing_object->pager); 4565 entry = vm_object_hash_lookup(object->pager, FALSE); 4566 assert(entry != VM_OBJECT_HASH_ENTRY_NULL); 4567 entry->object = object; 4568 vm_object_hash_unlock(lck); 4569 4570 object->hashed = TRUE; 4571 } 4572 object->pager_created = backing_object->pager_created; 4573 object->pager_control = backing_object->pager_control; 4574 object->pager_ready = backing_object->pager_ready; 4575 object->pager_initialized = backing_object->pager_initialized; 4576 object->paging_offset = 4577 backing_object->paging_offset + backing_offset; 4578 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) { 4579 memory_object_control_collapse(object->pager_control, 4580 object); 4581 } 4582 } 4583 4584#if MACH_PAGEMAP 4585 /* 4586 * If the shadow offset is 0, the use the existence map from 4587 * the backing object if there is one. If the shadow offset is 4588 * not zero, toss it. 4589 * 4590 * XXX - If the shadow offset is not 0 then a bit copy is needed 4591 * if the map is to be salvaged. For now, we just just toss the 4592 * old map, giving the collapsed object no map. This means that 4593 * the pager is invoked for zero fill pages. If analysis shows 4594 * that this happens frequently and is a performance hit, then 4595 * this code should be fixed to salvage the map. 4596 */ 4597 assert(object->existence_map == VM_EXTERNAL_NULL); 4598 if (backing_offset || (size != backing_object->vo_size)) { 4599 vm_external_discarded++; 4600 vm_external_destroy(backing_object->existence_map, 4601 backing_object->vo_size); 4602 } 4603 else { 4604 vm_external_collapsed++; 4605 object->existence_map = backing_object->existence_map; 4606 } 4607 backing_object->existence_map = VM_EXTERNAL_NULL; 4608#endif /* MACH_PAGEMAP */ 4609 4610 /* 4611 * Object now shadows whatever backing_object did. 4612 * Note that the reference to backing_object->shadow 4613 * moves from within backing_object to within object. 4614 */ 4615 4616 assert(!object->phys_contiguous); 4617 assert(!backing_object->phys_contiguous); 4618 object->shadow = backing_object->shadow; 4619 if (object->shadow) { 4620 object->vo_shadow_offset += backing_object->vo_shadow_offset; 4621 } else { 4622 /* no shadow, therefore no shadow offset... */ 4623 object->vo_shadow_offset = 0; 4624 } 4625 assert((object->shadow == VM_OBJECT_NULL) || 4626 (object->shadow->copy != backing_object)); 4627 4628 /* 4629 * Discard backing_object. 4630 * 4631 * Since the backing object has no pages, no 4632 * pager left, and no object references within it, 4633 * all that is necessary is to dispose of it. 4634 */ 4635 4636 assert((backing_object->ref_count == 1) && 4637 (backing_object->resident_page_count == 0) && 4638 (backing_object->paging_in_progress == 0) && 4639 (backing_object->activity_in_progress == 0)); 4640 4641 backing_object->alive = FALSE; 4642 vm_object_unlock(backing_object); 4643 4644 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n", 4645 backing_object, 0,0,0,0); 4646 4647 vm_object_lock_destroy(backing_object); 4648 4649 zfree(vm_object_zone, backing_object); 4650 4651 object_collapses++; 4652} 4653 4654static void 4655vm_object_do_bypass( 4656 vm_object_t object, 4657 vm_object_t backing_object) 4658{ 4659 /* 4660 * Make the parent shadow the next object 4661 * in the chain. 4662 */ 4663 4664 vm_object_lock_assert_exclusive(object); 4665 vm_object_lock_assert_exclusive(backing_object); 4666 4667#if TASK_SWAPPER 4668 /* 4669 * Do object reference in-line to 4670 * conditionally increment shadow's 4671 * residence count. If object is not 4672 * resident, leave residence count 4673 * on shadow alone. 4674 */ 4675 if (backing_object->shadow != VM_OBJECT_NULL) { 4676 vm_object_lock(backing_object->shadow); 4677 vm_object_lock_assert_exclusive(backing_object->shadow); 4678 backing_object->shadow->ref_count++; 4679 if (object->res_count != 0) 4680 vm_object_res_reference(backing_object->shadow); 4681 vm_object_unlock(backing_object->shadow); 4682 } 4683#else /* TASK_SWAPPER */ 4684 vm_object_reference(backing_object->shadow); 4685#endif /* TASK_SWAPPER */ 4686 4687 assert(!object->phys_contiguous); 4688 assert(!backing_object->phys_contiguous); 4689 object->shadow = backing_object->shadow; 4690 if (object->shadow) { 4691 object->vo_shadow_offset += backing_object->vo_shadow_offset; 4692 } else { 4693 /* no shadow, therefore no shadow offset... */ 4694 object->vo_shadow_offset = 0; 4695 } 4696 4697 /* 4698 * Backing object might have had a copy pointer 4699 * to us. If it did, clear it. 4700 */ 4701 if (backing_object->copy == object) { 4702 backing_object->copy = VM_OBJECT_NULL; 4703 } 4704 4705 /* 4706 * Drop the reference count on backing_object. 4707#if TASK_SWAPPER 4708 * Since its ref_count was at least 2, it 4709 * will not vanish; so we don't need to call 4710 * vm_object_deallocate. 4711 * [with a caveat for "named" objects] 4712 * 4713 * The res_count on the backing object is 4714 * conditionally decremented. It's possible 4715 * (via vm_pageout_scan) to get here with 4716 * a "swapped" object, which has a 0 res_count, 4717 * in which case, the backing object res_count 4718 * is already down by one. 4719#else 4720 * Don't call vm_object_deallocate unless 4721 * ref_count drops to zero. 4722 * 4723 * The ref_count can drop to zero here if the 4724 * backing object could be bypassed but not 4725 * collapsed, such as when the backing object 4726 * is temporary and cachable. 4727#endif 4728 */ 4729 if (backing_object->ref_count > 2 || 4730 (!backing_object->named && backing_object->ref_count > 1)) { 4731 vm_object_lock_assert_exclusive(backing_object); 4732 backing_object->ref_count--; 4733#if TASK_SWAPPER 4734 if (object->res_count != 0) 4735 vm_object_res_deallocate(backing_object); 4736 assert(backing_object->ref_count > 0); 4737#endif /* TASK_SWAPPER */ 4738 vm_object_unlock(backing_object); 4739 } else { 4740 4741 /* 4742 * Drop locks so that we can deallocate 4743 * the backing object. 4744 */ 4745 4746#if TASK_SWAPPER 4747 if (object->res_count == 0) { 4748 /* XXX get a reference for the deallocate below */ 4749 vm_object_res_reference(backing_object); 4750 } 4751#endif /* TASK_SWAPPER */ 4752 /* 4753 * vm_object_collapse (the caller of this function) is 4754 * now called from contexts that may not guarantee that a 4755 * valid reference is held on the object... w/o a valid 4756 * reference, it is unsafe and unwise (you will definitely 4757 * regret it) to unlock the object and then retake the lock 4758 * since the object may be terminated and recycled in between. 4759 * The "activity_in_progress" reference will keep the object 4760 * 'stable'. 4761 */ 4762 vm_object_activity_begin(object); 4763 vm_object_unlock(object); 4764 4765 vm_object_unlock(backing_object); 4766 vm_object_deallocate(backing_object); 4767 4768 /* 4769 * Relock object. We don't have to reverify 4770 * its state since vm_object_collapse will 4771 * do that for us as it starts at the 4772 * top of its loop. 4773 */ 4774 4775 vm_object_lock(object); 4776 vm_object_activity_end(object); 4777 } 4778 4779 object_bypasses++; 4780} 4781 4782 4783/* 4784 * vm_object_collapse: 4785 * 4786 * Perform an object collapse or an object bypass if appropriate. 4787 * The real work of collapsing and bypassing is performed in 4788 * the routines vm_object_do_collapse and vm_object_do_bypass. 4789 * 4790 * Requires that the object be locked and the page queues be unlocked. 4791 * 4792 */ 4793static unsigned long vm_object_collapse_calls = 0; 4794static unsigned long vm_object_collapse_objects = 0; 4795static unsigned long vm_object_collapse_do_collapse = 0; 4796static unsigned long vm_object_collapse_do_bypass = 0; 4797 4798__private_extern__ void 4799vm_object_collapse( 4800 register vm_object_t object, 4801 register vm_object_offset_t hint_offset, 4802 boolean_t can_bypass) 4803{ 4804 register vm_object_t backing_object; 4805 register unsigned int rcount; 4806 register unsigned int size; 4807 vm_object_t original_object; 4808 int object_lock_type; 4809 int backing_object_lock_type; 4810 4811 vm_object_collapse_calls++; 4812 4813 if (! vm_object_collapse_allowed && 4814 ! (can_bypass && vm_object_bypass_allowed)) { 4815 return; 4816 } 4817 4818 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n", 4819 object, 0,0,0,0); 4820 4821 if (object == VM_OBJECT_NULL) 4822 return; 4823 4824 original_object = object; 4825 4826 /* 4827 * The top object was locked "exclusive" by the caller. 4828 * In the first pass, to determine if we can collapse the shadow chain, 4829 * take a "shared" lock on the shadow objects. If we can collapse, 4830 * we'll have to go down the chain again with exclusive locks. 4831 */ 4832 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 4833 backing_object_lock_type = OBJECT_LOCK_SHARED; 4834 4835retry: 4836 object = original_object; 4837 vm_object_lock_assert_exclusive(object); 4838 4839 while (TRUE) { 4840 vm_object_collapse_objects++; 4841 /* 4842 * Verify that the conditions are right for either 4843 * collapse or bypass: 4844 */ 4845 4846 /* 4847 * There is a backing object, and 4848 */ 4849 4850 backing_object = object->shadow; 4851 if (backing_object == VM_OBJECT_NULL) { 4852 if (object != original_object) { 4853 vm_object_unlock(object); 4854 } 4855 return; 4856 } 4857 if (backing_object_lock_type == OBJECT_LOCK_SHARED) { 4858 vm_object_lock_shared(backing_object); 4859 } else { 4860 vm_object_lock(backing_object); 4861 } 4862 4863 /* 4864 * No pages in the object are currently 4865 * being paged out, and 4866 */ 4867 if (object->paging_in_progress != 0 || 4868 object->activity_in_progress != 0) { 4869 /* try and collapse the rest of the shadow chain */ 4870 if (object != original_object) { 4871 vm_object_unlock(object); 4872 } 4873 object = backing_object; 4874 object_lock_type = backing_object_lock_type; 4875 continue; 4876 } 4877 4878 /* 4879 * ... 4880 * The backing object is not read_only, 4881 * and no pages in the backing object are 4882 * currently being paged out. 4883 * The backing object is internal. 4884 * 4885 */ 4886 4887 if (!backing_object->internal || 4888 backing_object->paging_in_progress != 0 || 4889 backing_object->activity_in_progress != 0) { 4890 /* try and collapse the rest of the shadow chain */ 4891 if (object != original_object) { 4892 vm_object_unlock(object); 4893 } 4894 object = backing_object; 4895 object_lock_type = backing_object_lock_type; 4896 continue; 4897 } 4898 4899 /* 4900 * The backing object can't be a copy-object: 4901 * the shadow_offset for the copy-object must stay 4902 * as 0. Furthermore (for the 'we have all the 4903 * pages' case), if we bypass backing_object and 4904 * just shadow the next object in the chain, old 4905 * pages from that object would then have to be copied 4906 * BOTH into the (former) backing_object and into the 4907 * parent object. 4908 */ 4909 if (backing_object->shadow != VM_OBJECT_NULL && 4910 backing_object->shadow->copy == backing_object) { 4911 /* try and collapse the rest of the shadow chain */ 4912 if (object != original_object) { 4913 vm_object_unlock(object); 4914 } 4915 object = backing_object; 4916 object_lock_type = backing_object_lock_type; 4917 continue; 4918 } 4919 4920 /* 4921 * We can now try to either collapse the backing 4922 * object (if the parent is the only reference to 4923 * it) or (perhaps) remove the parent's reference 4924 * to it. 4925 * 4926 * If there is exactly one reference to the backing 4927 * object, we may be able to collapse it into the 4928 * parent. 4929 * 4930 * If MACH_PAGEMAP is defined: 4931 * The parent must not have a pager created for it, 4932 * since collapsing a backing_object dumps new pages 4933 * into the parent that its pager doesn't know about 4934 * (and the collapse code can't merge the existence 4935 * maps). 4936 * Otherwise: 4937 * As long as one of the objects is still not known 4938 * to the pager, we can collapse them. 4939 */ 4940 if (backing_object->ref_count == 1 && 4941 (!object->pager_created 4942#if !MACH_PAGEMAP 4943 || !backing_object->pager_created 4944#endif /*!MACH_PAGEMAP */ 4945 ) && vm_object_collapse_allowed) { 4946 4947 /* 4948 * We need the exclusive lock on the VM objects. 4949 */ 4950 if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) { 4951 /* 4952 * We have an object and its shadow locked 4953 * "shared". We can't just upgrade the locks 4954 * to "exclusive", as some other thread might 4955 * also have these objects locked "shared" and 4956 * attempt to upgrade one or the other to 4957 * "exclusive". The upgrades would block 4958 * forever waiting for the other "shared" locks 4959 * to get released. 4960 * So we have to release the locks and go 4961 * down the shadow chain again (since it could 4962 * have changed) with "exclusive" locking. 4963 */ 4964 vm_object_unlock(backing_object); 4965 if (object != original_object) 4966 vm_object_unlock(object); 4967 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 4968 backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 4969 goto retry; 4970 } 4971 4972 XPR(XPR_VM_OBJECT, 4973 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n", 4974 backing_object, object, 4975 backing_object->pager, 4976 backing_object->pager_control, 0); 4977 4978 /* 4979 * Collapse the object with its backing 4980 * object, and try again with the object's 4981 * new backing object. 4982 */ 4983 4984 vm_object_do_collapse(object, backing_object); 4985 vm_object_collapse_do_collapse++; 4986 continue; 4987 } 4988 4989 /* 4990 * Collapsing the backing object was not possible 4991 * or permitted, so let's try bypassing it. 4992 */ 4993 4994 if (! (can_bypass && vm_object_bypass_allowed)) { 4995 /* try and collapse the rest of the shadow chain */ 4996 if (object != original_object) { 4997 vm_object_unlock(object); 4998 } 4999 object = backing_object; 5000 object_lock_type = backing_object_lock_type; 5001 continue; 5002 } 5003 5004 5005 /* 5006 * If the object doesn't have all its pages present, 5007 * we have to make sure no pages in the backing object 5008 * "show through" before bypassing it. 5009 */ 5010 size = atop(object->vo_size); 5011 rcount = object->resident_page_count; 5012 5013 if (rcount != size) { 5014 vm_object_offset_t offset; 5015 vm_object_offset_t backing_offset; 5016 unsigned int backing_rcount; 5017 5018 /* 5019 * If the backing object has a pager but no pagemap, 5020 * then we cannot bypass it, because we don't know 5021 * what pages it has. 5022 */ 5023 if (backing_object->pager_created 5024#if MACH_PAGEMAP 5025 && (backing_object->existence_map == VM_EXTERNAL_NULL) 5026#endif /* MACH_PAGEMAP */ 5027 ) { 5028 /* try and collapse the rest of the shadow chain */ 5029 if (object != original_object) { 5030 vm_object_unlock(object); 5031 } 5032 object = backing_object; 5033 object_lock_type = backing_object_lock_type; 5034 continue; 5035 } 5036 5037 /* 5038 * If the object has a pager but no pagemap, 5039 * then we cannot bypass it, because we don't know 5040 * what pages it has. 5041 */ 5042 if (object->pager_created 5043#if MACH_PAGEMAP 5044 && (object->existence_map == VM_EXTERNAL_NULL) 5045#endif /* MACH_PAGEMAP */ 5046 ) { 5047 /* try and collapse the rest of the shadow chain */ 5048 if (object != original_object) { 5049 vm_object_unlock(object); 5050 } 5051 object = backing_object; 5052 object_lock_type = backing_object_lock_type; 5053 continue; 5054 } 5055 5056 backing_offset = object->vo_shadow_offset; 5057 backing_rcount = backing_object->resident_page_count; 5058 5059 if ( (int)backing_rcount - (int)(atop(backing_object->vo_size) - size) > (int)rcount) { 5060 /* 5061 * we have enough pages in the backing object to guarantee that 5062 * at least 1 of them must be 'uncovered' by a resident page 5063 * in the object we're evaluating, so move on and 5064 * try to collapse the rest of the shadow chain 5065 */ 5066 if (object != original_object) { 5067 vm_object_unlock(object); 5068 } 5069 object = backing_object; 5070 object_lock_type = backing_object_lock_type; 5071 continue; 5072 } 5073 5074 /* 5075 * If all of the pages in the backing object are 5076 * shadowed by the parent object, the parent 5077 * object no longer has to shadow the backing 5078 * object; it can shadow the next one in the 5079 * chain. 5080 * 5081 * If the backing object has existence info, 5082 * we must check examine its existence info 5083 * as well. 5084 * 5085 */ 5086 5087#if MACH_PAGEMAP 5088#define EXISTS_IN_OBJECT(obj, off, rc) \ 5089 (vm_external_state_get((obj)->existence_map, \ 5090 (vm_offset_t)(off)) == VM_EXTERNAL_STATE_EXISTS || \ 5091 ((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) 5092#else 5093#define EXISTS_IN_OBJECT(obj, off, rc) \ 5094 (((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) 5095#endif /* MACH_PAGEMAP */ 5096 5097 /* 5098 * Check the hint location first 5099 * (since it is often the quickest way out of here). 5100 */ 5101 if (object->cow_hint != ~(vm_offset_t)0) 5102 hint_offset = (vm_object_offset_t)object->cow_hint; 5103 else 5104 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ? 5105 (hint_offset - 8 * PAGE_SIZE_64) : 0; 5106 5107 if (EXISTS_IN_OBJECT(backing_object, hint_offset + 5108 backing_offset, backing_rcount) && 5109 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) { 5110 /* dependency right at the hint */ 5111 object->cow_hint = (vm_offset_t) hint_offset; /* atomic */ 5112 /* try and collapse the rest of the shadow chain */ 5113 if (object != original_object) { 5114 vm_object_unlock(object); 5115 } 5116 object = backing_object; 5117 object_lock_type = backing_object_lock_type; 5118 continue; 5119 } 5120 5121 /* 5122 * If the object's window onto the backing_object 5123 * is large compared to the number of resident 5124 * pages in the backing object, it makes sense to 5125 * walk the backing_object's resident pages first. 5126 * 5127 * NOTE: Pages may be in both the existence map and/or 5128 * resident, so if we don't find a dependency while 5129 * walking the backing object's resident page list 5130 * directly, and there is an existence map, we'll have 5131 * to run the offset based 2nd pass. Because we may 5132 * have to run both passes, we need to be careful 5133 * not to decrement 'rcount' in the 1st pass 5134 */ 5135 if (backing_rcount && backing_rcount < (size / 8)) { 5136 unsigned int rc = rcount; 5137 vm_page_t p; 5138 5139 backing_rcount = backing_object->resident_page_count; 5140 p = (vm_page_t)queue_first(&backing_object->memq); 5141 do { 5142 offset = (p->offset - backing_offset); 5143 5144 if (offset < object->vo_size && 5145 offset != hint_offset && 5146 !EXISTS_IN_OBJECT(object, offset, rc)) { 5147 /* found a dependency */ 5148 object->cow_hint = (vm_offset_t) offset; /* atomic */ 5149 5150 break; 5151 } 5152 p = (vm_page_t) queue_next(&p->listq); 5153 5154 } while (--backing_rcount); 5155 5156 if (backing_rcount != 0 ) { 5157 /* try and collapse the rest of the shadow chain */ 5158 if (object != original_object) { 5159 vm_object_unlock(object); 5160 } 5161 object = backing_object; 5162 object_lock_type = backing_object_lock_type; 5163 continue; 5164 } 5165 } 5166 5167 /* 5168 * Walk through the offsets looking for pages in the 5169 * backing object that show through to the object. 5170 */ 5171 if (backing_rcount 5172#if MACH_PAGEMAP 5173 || backing_object->existence_map 5174#endif /* MACH_PAGEMAP */ 5175 ) { 5176 offset = hint_offset; 5177 5178 while((offset = 5179 (offset + PAGE_SIZE_64 < object->vo_size) ? 5180 (offset + PAGE_SIZE_64) : 0) != hint_offset) { 5181 5182 if (EXISTS_IN_OBJECT(backing_object, offset + 5183 backing_offset, backing_rcount) && 5184 !EXISTS_IN_OBJECT(object, offset, rcount)) { 5185 /* found a dependency */ 5186 object->cow_hint = (vm_offset_t) offset; /* atomic */ 5187 break; 5188 } 5189 } 5190 if (offset != hint_offset) { 5191 /* try and collapse the rest of the shadow chain */ 5192 if (object != original_object) { 5193 vm_object_unlock(object); 5194 } 5195 object = backing_object; 5196 object_lock_type = backing_object_lock_type; 5197 continue; 5198 } 5199 } 5200 } 5201 5202 /* 5203 * We need "exclusive" locks on the 2 VM objects. 5204 */ 5205 if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) { 5206 vm_object_unlock(backing_object); 5207 if (object != original_object) 5208 vm_object_unlock(object); 5209 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5210 backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5211 goto retry; 5212 } 5213 5214 /* reset the offset hint for any objects deeper in the chain */ 5215 object->cow_hint = (vm_offset_t)0; 5216 5217 /* 5218 * All interesting pages in the backing object 5219 * already live in the parent or its pager. 5220 * Thus we can bypass the backing object. 5221 */ 5222 5223 vm_object_do_bypass(object, backing_object); 5224 vm_object_collapse_do_bypass++; 5225 5226 /* 5227 * Try again with this object's new backing object. 5228 */ 5229 5230 continue; 5231 } 5232 5233 if (object != original_object) { 5234 vm_object_unlock(object); 5235 } 5236} 5237 5238/* 5239 * Routine: vm_object_page_remove: [internal] 5240 * Purpose: 5241 * Removes all physical pages in the specified 5242 * object range from the object's list of pages. 5243 * 5244 * In/out conditions: 5245 * The object must be locked. 5246 * The object must not have paging_in_progress, usually 5247 * guaranteed by not having a pager. 5248 */ 5249unsigned int vm_object_page_remove_lookup = 0; 5250unsigned int vm_object_page_remove_iterate = 0; 5251 5252__private_extern__ void 5253vm_object_page_remove( 5254 register vm_object_t object, 5255 register vm_object_offset_t start, 5256 register vm_object_offset_t end) 5257{ 5258 register vm_page_t p, next; 5259 5260 /* 5261 * One and two page removals are most popular. 5262 * The factor of 16 here is somewhat arbitrary. 5263 * It balances vm_object_lookup vs iteration. 5264 */ 5265 5266 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) { 5267 vm_object_page_remove_lookup++; 5268 5269 for (; start < end; start += PAGE_SIZE_64) { 5270 p = vm_page_lookup(object, start); 5271 if (p != VM_PAGE_NULL) { 5272 assert(!p->cleaning && !p->pageout && !p->laundry); 5273 if (!p->fictitious && p->pmapped) 5274 pmap_disconnect(p->phys_page); 5275 VM_PAGE_FREE(p); 5276 } 5277 } 5278 } else { 5279 vm_object_page_remove_iterate++; 5280 5281 p = (vm_page_t) queue_first(&object->memq); 5282 while (!queue_end(&object->memq, (queue_entry_t) p)) { 5283 next = (vm_page_t) queue_next(&p->listq); 5284 if ((start <= p->offset) && (p->offset < end)) { 5285 assert(!p->cleaning && !p->pageout && !p->laundry); 5286 if (!p->fictitious && p->pmapped) 5287 pmap_disconnect(p->phys_page); 5288 VM_PAGE_FREE(p); 5289 } 5290 p = next; 5291 } 5292 } 5293} 5294 5295 5296/* 5297 * Routine: vm_object_coalesce 5298 * Function: Coalesces two objects backing up adjoining 5299 * regions of memory into a single object. 5300 * 5301 * returns TRUE if objects were combined. 5302 * 5303 * NOTE: Only works at the moment if the second object is NULL - 5304 * if it's not, which object do we lock first? 5305 * 5306 * Parameters: 5307 * prev_object First object to coalesce 5308 * prev_offset Offset into prev_object 5309 * next_object Second object into coalesce 5310 * next_offset Offset into next_object 5311 * 5312 * prev_size Size of reference to prev_object 5313 * next_size Size of reference to next_object 5314 * 5315 * Conditions: 5316 * The object(s) must *not* be locked. The map must be locked 5317 * to preserve the reference to the object(s). 5318 */ 5319static int vm_object_coalesce_count = 0; 5320 5321__private_extern__ boolean_t 5322vm_object_coalesce( 5323 register vm_object_t prev_object, 5324 vm_object_t next_object, 5325 vm_object_offset_t prev_offset, 5326 __unused vm_object_offset_t next_offset, 5327 vm_object_size_t prev_size, 5328 vm_object_size_t next_size) 5329{ 5330 vm_object_size_t newsize; 5331 5332#ifdef lint 5333 next_offset++; 5334#endif /* lint */ 5335 5336 if (next_object != VM_OBJECT_NULL) { 5337 return(FALSE); 5338 } 5339 5340 if (prev_object == VM_OBJECT_NULL) { 5341 return(TRUE); 5342 } 5343 5344 XPR(XPR_VM_OBJECT, 5345 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n", 5346 prev_object, prev_offset, prev_size, next_size, 0); 5347 5348 vm_object_lock(prev_object); 5349 5350 /* 5351 * Try to collapse the object first 5352 */ 5353 vm_object_collapse(prev_object, prev_offset, TRUE); 5354 5355 /* 5356 * Can't coalesce if pages not mapped to 5357 * prev_entry may be in use any way: 5358 * . more than one reference 5359 * . paged out 5360 * . shadows another object 5361 * . has a copy elsewhere 5362 * . is purgeable 5363 * . paging references (pages might be in page-list) 5364 */ 5365 5366 if ((prev_object->ref_count > 1) || 5367 prev_object->pager_created || 5368 (prev_object->shadow != VM_OBJECT_NULL) || 5369 (prev_object->copy != VM_OBJECT_NULL) || 5370 (prev_object->true_share != FALSE) || 5371 (prev_object->purgable != VM_PURGABLE_DENY) || 5372 (prev_object->paging_in_progress != 0) || 5373 (prev_object->activity_in_progress != 0)) { 5374 vm_object_unlock(prev_object); 5375 return(FALSE); 5376 } 5377 5378 vm_object_coalesce_count++; 5379 5380 /* 5381 * Remove any pages that may still be in the object from 5382 * a previous deallocation. 5383 */ 5384 vm_object_page_remove(prev_object, 5385 prev_offset + prev_size, 5386 prev_offset + prev_size + next_size); 5387 5388 /* 5389 * Extend the object if necessary. 5390 */ 5391 newsize = prev_offset + prev_size + next_size; 5392 if (newsize > prev_object->vo_size) { 5393#if MACH_PAGEMAP 5394 /* 5395 * We cannot extend an object that has existence info, 5396 * since the existence info might then fail to cover 5397 * the entire object. 5398 * 5399 * This assertion must be true because the object 5400 * has no pager, and we only create existence info 5401 * for objects with pagers. 5402 */ 5403 assert(prev_object->existence_map == VM_EXTERNAL_NULL); 5404#endif /* MACH_PAGEMAP */ 5405 prev_object->vo_size = newsize; 5406 } 5407 5408 vm_object_unlock(prev_object); 5409 return(TRUE); 5410} 5411 5412/* 5413 * Attach a set of physical pages to an object, so that they can 5414 * be mapped by mapping the object. Typically used to map IO memory. 5415 * 5416 * The mapping function and its private data are used to obtain the 5417 * physical addresses for each page to be mapped. 5418 */ 5419void 5420vm_object_page_map( 5421 vm_object_t object, 5422 vm_object_offset_t offset, 5423 vm_object_size_t size, 5424 vm_object_offset_t (*map_fn)(void *map_fn_data, 5425 vm_object_offset_t offset), 5426 void *map_fn_data) /* private to map_fn */ 5427{ 5428 int64_t num_pages; 5429 int i; 5430 vm_page_t m; 5431 vm_page_t old_page; 5432 vm_object_offset_t addr; 5433 5434 num_pages = atop_64(size); 5435 5436 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) { 5437 5438 addr = (*map_fn)(map_fn_data, offset); 5439 5440 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) 5441 vm_page_more_fictitious(); 5442 5443 vm_object_lock(object); 5444 if ((old_page = vm_page_lookup(object, offset)) 5445 != VM_PAGE_NULL) 5446 { 5447 VM_PAGE_FREE(old_page); 5448 } 5449 5450 assert((ppnum_t) addr == addr); 5451 vm_page_init(m, (ppnum_t) addr, FALSE); 5452 /* 5453 * private normally requires lock_queues but since we 5454 * are initializing the page, its not necessary here 5455 */ 5456 m->private = TRUE; /* don`t free page */ 5457 m->wire_count = 1; 5458 vm_page_insert(m, object, offset); 5459 5460 PAGE_WAKEUP_DONE(m); 5461 vm_object_unlock(object); 5462 } 5463} 5464 5465kern_return_t 5466vm_object_populate_with_private( 5467 vm_object_t object, 5468 vm_object_offset_t offset, 5469 ppnum_t phys_page, 5470 vm_size_t size) 5471{ 5472 ppnum_t base_page; 5473 vm_object_offset_t base_offset; 5474 5475 5476 if (!object->private) 5477 return KERN_FAILURE; 5478 5479 base_page = phys_page; 5480 5481 vm_object_lock(object); 5482 5483 if (!object->phys_contiguous) { 5484 vm_page_t m; 5485 5486 if ((base_offset = trunc_page_64(offset)) != offset) { 5487 vm_object_unlock(object); 5488 return KERN_FAILURE; 5489 } 5490 base_offset += object->paging_offset; 5491 5492 while (size) { 5493 m = vm_page_lookup(object, base_offset); 5494 5495 if (m != VM_PAGE_NULL) { 5496 if (m->fictitious) { 5497 if (m->phys_page != vm_page_guard_addr) { 5498 5499 vm_page_lockspin_queues(); 5500 m->private = TRUE; 5501 vm_page_unlock_queues(); 5502 5503 m->fictitious = FALSE; 5504 m->phys_page = base_page; 5505 } 5506 } else if (m->phys_page != base_page) { 5507 5508 if ( !m->private) { 5509 /* 5510 * we'd leak a real page... that can't be right 5511 */ 5512 panic("vm_object_populate_with_private - %p not private", m); 5513 } 5514 if (m->pmapped) { 5515 /* 5516 * pmap call to clear old mapping 5517 */ 5518 pmap_disconnect(m->phys_page); 5519 } 5520 m->phys_page = base_page; 5521 } 5522 if (m->encrypted) { 5523 /* 5524 * we should never see this on a ficticious or private page 5525 */ 5526 panic("vm_object_populate_with_private - %p encrypted", m); 5527 } 5528 5529 } else { 5530 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) 5531 vm_page_more_fictitious(); 5532 5533 /* 5534 * private normally requires lock_queues but since we 5535 * are initializing the page, its not necessary here 5536 */ 5537 m->private = TRUE; 5538 m->fictitious = FALSE; 5539 m->phys_page = base_page; 5540 m->unusual = TRUE; 5541 m->busy = FALSE; 5542 5543 vm_page_insert(m, object, base_offset); 5544 } 5545 base_page++; /* Go to the next physical page */ 5546 base_offset += PAGE_SIZE; 5547 size -= PAGE_SIZE; 5548 } 5549 } else { 5550 /* NOTE: we should check the original settings here */ 5551 /* if we have a size > zero a pmap call should be made */ 5552 /* to disable the range */ 5553 5554 /* pmap_? */ 5555 5556 /* shadows on contiguous memory are not allowed */ 5557 /* we therefore can use the offset field */ 5558 object->vo_shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT; 5559 object->vo_size = size; 5560 } 5561 vm_object_unlock(object); 5562 5563 return KERN_SUCCESS; 5564} 5565 5566/* 5567 * memory_object_free_from_cache: 5568 * 5569 * Walk the vm_object cache list, removing and freeing vm_objects 5570 * which are backed by the pager identified by the caller, (pager_ops). 5571 * Remove up to "count" objects, if there are that may available 5572 * in the cache. 5573 * 5574 * Walk the list at most once, return the number of vm_objects 5575 * actually freed. 5576 */ 5577 5578__private_extern__ kern_return_t 5579memory_object_free_from_cache( 5580 __unused host_t host, 5581 __unused memory_object_pager_ops_t pager_ops, 5582 int *count) 5583{ 5584#if VM_OBJECT_CACHE 5585 int object_released = 0; 5586 5587 register vm_object_t object = VM_OBJECT_NULL; 5588 vm_object_t shadow; 5589 5590/* 5591 if(host == HOST_NULL) 5592 return(KERN_INVALID_ARGUMENT); 5593*/ 5594 5595 try_again: 5596 vm_object_cache_lock(); 5597 5598 queue_iterate(&vm_object_cached_list, object, 5599 vm_object_t, cached_list) { 5600 if (object->pager && 5601 (pager_ops == object->pager->mo_pager_ops)) { 5602 vm_object_lock(object); 5603 queue_remove(&vm_object_cached_list, object, 5604 vm_object_t, cached_list); 5605 vm_object_cached_count--; 5606 5607 vm_object_cache_unlock(); 5608 /* 5609 * Since this object is in the cache, we know 5610 * that it is initialized and has only a pager's 5611 * (implicit) reference. Take a reference to avoid 5612 * recursive deallocations. 5613 */ 5614 5615 assert(object->pager_initialized); 5616 assert(object->ref_count == 0); 5617 vm_object_lock_assert_exclusive(object); 5618 object->ref_count++; 5619 5620 /* 5621 * Terminate the object. 5622 * If the object had a shadow, we let 5623 * vm_object_deallocate deallocate it. 5624 * "pageout" objects have a shadow, but 5625 * maintain a "paging reference" rather 5626 * than a normal reference. 5627 * (We are careful here to limit recursion.) 5628 */ 5629 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 5630 5631 if ((vm_object_terminate(object) == KERN_SUCCESS) 5632 && (shadow != VM_OBJECT_NULL)) { 5633 vm_object_deallocate(shadow); 5634 } 5635 5636 if(object_released++ == *count) 5637 return KERN_SUCCESS; 5638 goto try_again; 5639 } 5640 } 5641 vm_object_cache_unlock(); 5642 *count = object_released; 5643#else 5644 *count = 0; 5645#endif 5646 return KERN_SUCCESS; 5647} 5648 5649 5650 5651kern_return_t 5652memory_object_create_named( 5653 memory_object_t pager, 5654 memory_object_offset_t size, 5655 memory_object_control_t *control) 5656{ 5657 vm_object_t object; 5658 vm_object_hash_entry_t entry; 5659 lck_mtx_t *lck; 5660 5661 *control = MEMORY_OBJECT_CONTROL_NULL; 5662 if (pager == MEMORY_OBJECT_NULL) 5663 return KERN_INVALID_ARGUMENT; 5664 5665 lck = vm_object_hash_lock_spin(pager); 5666 entry = vm_object_hash_lookup(pager, FALSE); 5667 5668 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) && 5669 (entry->object != VM_OBJECT_NULL)) { 5670 if (entry->object->named == TRUE) 5671 panic("memory_object_create_named: caller already holds the right"); } 5672 vm_object_hash_unlock(lck); 5673 5674 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE)) == VM_OBJECT_NULL) { 5675 return(KERN_INVALID_OBJECT); 5676 } 5677 5678 /* wait for object (if any) to be ready */ 5679 if (object != VM_OBJECT_NULL) { 5680 vm_object_lock(object); 5681 object->named = TRUE; 5682 while (!object->pager_ready) { 5683 vm_object_sleep(object, 5684 VM_OBJECT_EVENT_PAGER_READY, 5685 THREAD_UNINT); 5686 } 5687 *control = object->pager_control; 5688 vm_object_unlock(object); 5689 } 5690 return (KERN_SUCCESS); 5691} 5692 5693 5694/* 5695 * Routine: memory_object_recover_named [user interface] 5696 * Purpose: 5697 * Attempt to recover a named reference for a VM object. 5698 * VM will verify that the object has not already started 5699 * down the termination path, and if it has, will optionally 5700 * wait for that to finish. 5701 * Returns: 5702 * KERN_SUCCESS - we recovered a named reference on the object 5703 * KERN_FAILURE - we could not recover a reference (object dead) 5704 * KERN_INVALID_ARGUMENT - bad memory object control 5705 */ 5706kern_return_t 5707memory_object_recover_named( 5708 memory_object_control_t control, 5709 boolean_t wait_on_terminating) 5710{ 5711 vm_object_t object; 5712 5713 object = memory_object_control_to_vm_object(control); 5714 if (object == VM_OBJECT_NULL) { 5715 return (KERN_INVALID_ARGUMENT); 5716 } 5717restart: 5718 vm_object_lock(object); 5719 5720 if (object->terminating && wait_on_terminating) { 5721 vm_object_wait(object, 5722 VM_OBJECT_EVENT_PAGING_IN_PROGRESS, 5723 THREAD_UNINT); 5724 goto restart; 5725 } 5726 5727 if (!object->alive) { 5728 vm_object_unlock(object); 5729 return KERN_FAILURE; 5730 } 5731 5732 if (object->named == TRUE) { 5733 vm_object_unlock(object); 5734 return KERN_SUCCESS; 5735 } 5736#if VM_OBJECT_CACHE 5737 if ((object->ref_count == 0) && (!object->terminating)) { 5738 if (!vm_object_cache_lock_try()) { 5739 vm_object_unlock(object); 5740 goto restart; 5741 } 5742 queue_remove(&vm_object_cached_list, object, 5743 vm_object_t, cached_list); 5744 vm_object_cached_count--; 5745 XPR(XPR_VM_OBJECT_CACHE, 5746 "memory_object_recover_named: removing %X, head (%X, %X)\n", 5747 object, 5748 vm_object_cached_list.next, 5749 vm_object_cached_list.prev, 0,0); 5750 5751 vm_object_cache_unlock(); 5752 } 5753#endif 5754 object->named = TRUE; 5755 vm_object_lock_assert_exclusive(object); 5756 object->ref_count++; 5757 vm_object_res_reference(object); 5758 while (!object->pager_ready) { 5759 vm_object_sleep(object, 5760 VM_OBJECT_EVENT_PAGER_READY, 5761 THREAD_UNINT); 5762 } 5763 vm_object_unlock(object); 5764 return (KERN_SUCCESS); 5765} 5766 5767 5768/* 5769 * vm_object_release_name: 5770 * 5771 * Enforces name semantic on memory_object reference count decrement 5772 * This routine should not be called unless the caller holds a name 5773 * reference gained through the memory_object_create_named. 5774 * 5775 * If the TERMINATE_IDLE flag is set, the call will return if the 5776 * reference count is not 1. i.e. idle with the only remaining reference 5777 * being the name. 5778 * If the decision is made to proceed the name field flag is set to 5779 * false and the reference count is decremented. If the RESPECT_CACHE 5780 * flag is set and the reference count has gone to zero, the 5781 * memory_object is checked to see if it is cacheable otherwise when 5782 * the reference count is zero, it is simply terminated. 5783 */ 5784 5785__private_extern__ kern_return_t 5786vm_object_release_name( 5787 vm_object_t object, 5788 int flags) 5789{ 5790 vm_object_t shadow; 5791 boolean_t original_object = TRUE; 5792 5793 while (object != VM_OBJECT_NULL) { 5794 5795 vm_object_lock(object); 5796 5797 assert(object->alive); 5798 if (original_object) 5799 assert(object->named); 5800 assert(object->ref_count > 0); 5801 5802 /* 5803 * We have to wait for initialization before 5804 * destroying or caching the object. 5805 */ 5806 5807 if (object->pager_created && !object->pager_initialized) { 5808 assert(!object->can_persist); 5809 vm_object_assert_wait(object, 5810 VM_OBJECT_EVENT_INITIALIZED, 5811 THREAD_UNINT); 5812 vm_object_unlock(object); 5813 thread_block(THREAD_CONTINUE_NULL); 5814 continue; 5815 } 5816 5817 if (((object->ref_count > 1) 5818 && (flags & MEMORY_OBJECT_TERMINATE_IDLE)) 5819 || (object->terminating)) { 5820 vm_object_unlock(object); 5821 return KERN_FAILURE; 5822 } else { 5823 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) { 5824 vm_object_unlock(object); 5825 return KERN_SUCCESS; 5826 } 5827 } 5828 5829 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) && 5830 (object->ref_count == 1)) { 5831 if (original_object) 5832 object->named = FALSE; 5833 vm_object_unlock(object); 5834 /* let vm_object_deallocate push this thing into */ 5835 /* the cache, if that it is where it is bound */ 5836 vm_object_deallocate(object); 5837 return KERN_SUCCESS; 5838 } 5839 VM_OBJ_RES_DECR(object); 5840 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 5841 5842 if (object->ref_count == 1) { 5843 if (vm_object_terminate(object) != KERN_SUCCESS) { 5844 if (original_object) { 5845 return KERN_FAILURE; 5846 } else { 5847 return KERN_SUCCESS; 5848 } 5849 } 5850 if (shadow != VM_OBJECT_NULL) { 5851 original_object = FALSE; 5852 object = shadow; 5853 continue; 5854 } 5855 return KERN_SUCCESS; 5856 } else { 5857 vm_object_lock_assert_exclusive(object); 5858 object->ref_count--; 5859 assert(object->ref_count > 0); 5860 if(original_object) 5861 object->named = FALSE; 5862 vm_object_unlock(object); 5863 return KERN_SUCCESS; 5864 } 5865 } 5866 /*NOTREACHED*/ 5867 assert(0); 5868 return KERN_FAILURE; 5869} 5870 5871 5872__private_extern__ kern_return_t 5873vm_object_lock_request( 5874 vm_object_t object, 5875 vm_object_offset_t offset, 5876 vm_object_size_t size, 5877 memory_object_return_t should_return, 5878 int flags, 5879 vm_prot_t prot) 5880{ 5881 __unused boolean_t should_flush; 5882 5883 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH; 5884 5885 XPR(XPR_MEMORY_OBJECT, 5886 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n", 5887 object, offset, size, 5888 (((should_return&1)<<1)|should_flush), prot); 5889 5890 /* 5891 * Check for bogus arguments. 5892 */ 5893 if (object == VM_OBJECT_NULL) 5894 return (KERN_INVALID_ARGUMENT); 5895 5896 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE) 5897 return (KERN_INVALID_ARGUMENT); 5898 5899 size = round_page_64(size); 5900 5901 /* 5902 * Lock the object, and acquire a paging reference to 5903 * prevent the memory_object reference from being released. 5904 */ 5905 vm_object_lock(object); 5906 vm_object_paging_begin(object); 5907 5908 (void)vm_object_update(object, 5909 offset, size, NULL, NULL, should_return, flags, prot); 5910 5911 vm_object_paging_end(object); 5912 vm_object_unlock(object); 5913 5914 return (KERN_SUCCESS); 5915} 5916 5917/* 5918 * Empty a purgeable object by grabbing the physical pages assigned to it and 5919 * putting them on the free queue without writing them to backing store, etc. 5920 * When the pages are next touched they will be demand zero-fill pages. We 5921 * skip pages which are busy, being paged in/out, wired, etc. We do _not_ 5922 * skip referenced/dirty pages, pages on the active queue, etc. We're more 5923 * than happy to grab these since this is a purgeable object. We mark the 5924 * object as "empty" after reaping its pages. 5925 * 5926 * On entry the object must be locked and it must be 5927 * purgeable with no delayed copies pending. 5928 */ 5929void 5930vm_object_purge(vm_object_t object) 5931{ 5932 vm_object_lock_assert_exclusive(object); 5933 5934 if (object->purgable == VM_PURGABLE_DENY) 5935 return; 5936 5937 assert(object->copy == VM_OBJECT_NULL); 5938 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); 5939 5940 if(object->purgable == VM_PURGABLE_VOLATILE) { 5941 unsigned int delta; 5942 assert(object->resident_page_count >= 5943 object->wired_page_count); 5944 delta = (object->resident_page_count - 5945 object->wired_page_count); 5946 if (delta != 0) { 5947 assert(vm_page_purgeable_count >= 5948 delta); 5949 OSAddAtomic(-delta, 5950 (SInt32 *)&vm_page_purgeable_count); 5951 } 5952 if (object->wired_page_count != 0) { 5953 assert(vm_page_purgeable_wired_count >= 5954 object->wired_page_count); 5955 OSAddAtomic(-object->wired_page_count, 5956 (SInt32 *)&vm_page_purgeable_wired_count); 5957 } 5958 } 5959 object->purgable = VM_PURGABLE_EMPTY; 5960 5961 vm_object_reap_pages(object, REAP_PURGEABLE); 5962} 5963 5964 5965/* 5966 * vm_object_purgeable_control() allows the caller to control and investigate the 5967 * state of a purgeable object. A purgeable object is created via a call to 5968 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will 5969 * never be coalesced with any other object -- even other purgeable objects -- 5970 * and will thus always remain a distinct object. A purgeable object has 5971 * special semantics when its reference count is exactly 1. If its reference 5972 * count is greater than 1, then a purgeable object will behave like a normal 5973 * object and attempts to use this interface will result in an error return 5974 * of KERN_INVALID_ARGUMENT. 5975 * 5976 * A purgeable object may be put into a "volatile" state which will make the 5977 * object's pages elligable for being reclaimed without paging to backing 5978 * store if the system runs low on memory. If the pages in a volatile 5979 * purgeable object are reclaimed, the purgeable object is said to have been 5980 * "emptied." When a purgeable object is emptied the system will reclaim as 5981 * many pages from the object as it can in a convenient manner (pages already 5982 * en route to backing store or busy for other reasons are left as is). When 5983 * a purgeable object is made volatile, its pages will generally be reclaimed 5984 * before other pages in the application's working set. This semantic is 5985 * generally used by applications which can recreate the data in the object 5986 * faster than it can be paged in. One such example might be media assets 5987 * which can be reread from a much faster RAID volume. 5988 * 5989 * A purgeable object may be designated as "non-volatile" which means it will 5990 * behave like all other objects in the system with pages being written to and 5991 * read from backing store as needed to satisfy system memory needs. If the 5992 * object was emptied before the object was made non-volatile, that fact will 5993 * be returned as the old state of the purgeable object (see 5994 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which 5995 * were reclaimed as part of emptying the object will be refaulted in as 5996 * zero-fill on demand. It is up to the application to note that an object 5997 * was emptied and recreate the objects contents if necessary. When a 5998 * purgeable object is made non-volatile, its pages will generally not be paged 5999 * out to backing store in the immediate future. A purgeable object may also 6000 * be manually emptied. 6001 * 6002 * Finally, the current state (non-volatile, volatile, volatile & empty) of a 6003 * volatile purgeable object may be queried at any time. This information may 6004 * be used as a control input to let the application know when the system is 6005 * experiencing memory pressure and is reclaiming memory. 6006 * 6007 * The specified address may be any address within the purgeable object. If 6008 * the specified address does not represent any object in the target task's 6009 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the 6010 * object containing the specified address is not a purgeable object, then 6011 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be 6012 * returned. 6013 * 6014 * The control parameter may be any one of VM_PURGABLE_SET_STATE or 6015 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter 6016 * state is used to set the new state of the purgeable object and return its 6017 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable 6018 * object is returned in the parameter state. 6019 * 6020 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE, 6021 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent 6022 * the non-volatile, volatile and volatile/empty states described above. 6023 * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will 6024 * immediately reclaim as many pages in the object as can be conveniently 6025 * collected (some may have already been written to backing store or be 6026 * otherwise busy). 6027 * 6028 * The process of making a purgeable object non-volatile and determining its 6029 * previous state is atomic. Thus, if a purgeable object is made 6030 * VM_PURGABLE_NONVOLATILE and the old state is returned as 6031 * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are 6032 * completely intact and will remain so until the object is made volatile 6033 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object 6034 * was reclaimed while it was in a volatile state and its previous contents 6035 * have been lost. 6036 */ 6037/* 6038 * The object must be locked. 6039 */ 6040kern_return_t 6041vm_object_purgable_control( 6042 vm_object_t object, 6043 vm_purgable_t control, 6044 int *state) 6045{ 6046 int old_state; 6047 int new_state; 6048 6049 if (object == VM_OBJECT_NULL) { 6050 /* 6051 * Object must already be present or it can't be purgeable. 6052 */ 6053 return KERN_INVALID_ARGUMENT; 6054 } 6055 6056 /* 6057 * Get current state of the purgeable object. 6058 */ 6059 old_state = object->purgable; 6060 if (old_state == VM_PURGABLE_DENY) 6061 return KERN_INVALID_ARGUMENT; 6062 6063 /* purgeable cant have delayed copies - now or in the future */ 6064 assert(object->copy == VM_OBJECT_NULL); 6065 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); 6066 6067 /* 6068 * Execute the desired operation. 6069 */ 6070 if (control == VM_PURGABLE_GET_STATE) { 6071 *state = old_state; 6072 return KERN_SUCCESS; 6073 } 6074 6075 if ((*state) & VM_PURGABLE_DEBUG_EMPTY) { 6076 object->volatile_empty = TRUE; 6077 } 6078 if ((*state) & VM_PURGABLE_DEBUG_FAULT) { 6079 object->volatile_fault = TRUE; 6080 } 6081 6082 new_state = *state & VM_PURGABLE_STATE_MASK; 6083 if (new_state == VM_PURGABLE_VOLATILE && 6084 object->volatile_empty) { 6085 new_state = VM_PURGABLE_EMPTY; 6086 } 6087 6088 switch (new_state) { 6089 case VM_PURGABLE_DENY: 6090 case VM_PURGABLE_NONVOLATILE: 6091 object->purgable = new_state; 6092 6093 if (old_state == VM_PURGABLE_VOLATILE) { 6094 unsigned int delta; 6095 6096 assert(object->resident_page_count >= 6097 object->wired_page_count); 6098 delta = (object->resident_page_count - 6099 object->wired_page_count); 6100 6101 assert(vm_page_purgeable_count >= delta); 6102 6103 if (delta != 0) { 6104 OSAddAtomic(-delta, 6105 (SInt32 *)&vm_page_purgeable_count); 6106 } 6107 if (object->wired_page_count != 0) { 6108 assert(vm_page_purgeable_wired_count >= 6109 object->wired_page_count); 6110 OSAddAtomic(-object->wired_page_count, 6111 (SInt32 *)&vm_page_purgeable_wired_count); 6112 } 6113 6114 vm_page_lock_queues(); 6115 6116 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ 6117 purgeable_q_t queue = vm_purgeable_object_remove(object); 6118 assert(queue); 6119 6120 vm_purgeable_token_delete_last(queue); 6121 assert(queue->debug_count_objects>=0); 6122 6123 vm_page_unlock_queues(); 6124 } 6125 break; 6126 6127 case VM_PURGABLE_VOLATILE: 6128 if (object->volatile_fault) { 6129 vm_page_t p; 6130 int refmod; 6131 6132 queue_iterate(&object->memq, p, vm_page_t, listq) { 6133 if (p->busy || 6134 VM_PAGE_WIRED(p) || 6135 p->fictitious) { 6136 continue; 6137 } 6138 refmod = pmap_disconnect(p->phys_page); 6139 if ((refmod & VM_MEM_MODIFIED) && 6140 !p->dirty) { 6141 SET_PAGE_DIRTY(p, FALSE); 6142 } 6143 } 6144 } 6145 6146 if (old_state == VM_PURGABLE_EMPTY && 6147 object->resident_page_count == 0) 6148 break; 6149 6150 purgeable_q_t queue; 6151 6152 /* find the correct queue */ 6153 if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE) 6154 queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE]; 6155 else { 6156 if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO) 6157 queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO]; 6158 else 6159 queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO]; 6160 } 6161 6162 if (old_state == VM_PURGABLE_NONVOLATILE || 6163 old_state == VM_PURGABLE_EMPTY) { 6164 unsigned int delta; 6165 6166 /* try to add token... this can fail */ 6167 vm_page_lock_queues(); 6168 6169 kern_return_t result = vm_purgeable_token_add(queue); 6170 if (result != KERN_SUCCESS) { 6171 vm_page_unlock_queues(); 6172 return result; 6173 } 6174 vm_page_unlock_queues(); 6175 6176 assert(object->resident_page_count >= 6177 object->wired_page_count); 6178 delta = (object->resident_page_count - 6179 object->wired_page_count); 6180 6181 if (delta != 0) { 6182 OSAddAtomic(delta, 6183 &vm_page_purgeable_count); 6184 } 6185 if (object->wired_page_count != 0) { 6186 OSAddAtomic(object->wired_page_count, 6187 &vm_page_purgeable_wired_count); 6188 } 6189 6190 object->purgable = new_state; 6191 6192 /* object should not be on a queue */ 6193 assert(object->objq.next == NULL && object->objq.prev == NULL); 6194 } 6195 else if (old_state == VM_PURGABLE_VOLATILE) { 6196 /* 6197 * if reassigning priorities / purgeable groups, we don't change the 6198 * token queue. So moving priorities will not make pages stay around longer. 6199 * Reasoning is that the algorithm gives most priority to the most important 6200 * object. If a new token is added, the most important object' priority is boosted. 6201 * This biases the system already for purgeable queues that move a lot. 6202 * It doesn't seem more biasing is neccessary in this case, where no new object is added. 6203 */ 6204 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ 6205 6206 purgeable_q_t old_queue=vm_purgeable_object_remove(object); 6207 assert(old_queue); 6208 6209 if (old_queue != queue) { 6210 kern_return_t result; 6211 6212 /* Changing queue. Have to move token. */ 6213 vm_page_lock_queues(); 6214 vm_purgeable_token_delete_last(old_queue); 6215 result = vm_purgeable_token_add(queue); 6216 vm_page_unlock_queues(); 6217 6218 assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */ 6219 } 6220 }; 6221 vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT ); 6222 6223 assert(queue->debug_count_objects>=0); 6224 6225 break; 6226 6227 6228 case VM_PURGABLE_EMPTY: 6229 if (object->volatile_fault) { 6230 vm_page_t p; 6231 int refmod; 6232 6233 queue_iterate(&object->memq, p, vm_page_t, listq) { 6234 if (p->busy || 6235 VM_PAGE_WIRED(p) || 6236 p->fictitious) { 6237 continue; 6238 } 6239 refmod = pmap_disconnect(p->phys_page); 6240 if ((refmod & VM_MEM_MODIFIED) && 6241 !p->dirty) { 6242 SET_PAGE_DIRTY(p, FALSE); 6243 } 6244 } 6245 } 6246 6247 if (old_state != new_state) { 6248 assert(old_state == VM_PURGABLE_NONVOLATILE || 6249 old_state == VM_PURGABLE_VOLATILE); 6250 if (old_state == VM_PURGABLE_VOLATILE) { 6251 purgeable_q_t old_queue; 6252 6253 /* object should be on a queue */ 6254 assert(object->objq.next != NULL && 6255 object->objq.prev != NULL); 6256 old_queue = vm_purgeable_object_remove(object); 6257 assert(old_queue); 6258 vm_page_lock_queues(); 6259 vm_purgeable_token_delete_last(old_queue); 6260 vm_page_unlock_queues(); 6261 } 6262 (void) vm_object_purge(object); 6263 } 6264 break; 6265 6266 } 6267 *state = old_state; 6268 6269 return KERN_SUCCESS; 6270} 6271 6272#if TASK_SWAPPER 6273/* 6274 * vm_object_res_deallocate 6275 * 6276 * (recursively) decrement residence counts on vm objects and their shadows. 6277 * Called from vm_object_deallocate and when swapping out an object. 6278 * 6279 * The object is locked, and remains locked throughout the function, 6280 * even as we iterate down the shadow chain. Locks on intermediate objects 6281 * will be dropped, but not the original object. 6282 * 6283 * NOTE: this function used to use recursion, rather than iteration. 6284 */ 6285 6286__private_extern__ void 6287vm_object_res_deallocate( 6288 vm_object_t object) 6289{ 6290 vm_object_t orig_object = object; 6291 /* 6292 * Object is locked so it can be called directly 6293 * from vm_object_deallocate. Original object is never 6294 * unlocked. 6295 */ 6296 assert(object->res_count > 0); 6297 while (--object->res_count == 0) { 6298 assert(object->ref_count >= object->res_count); 6299 vm_object_deactivate_all_pages(object); 6300 /* iterate on shadow, if present */ 6301 if (object->shadow != VM_OBJECT_NULL) { 6302 vm_object_t tmp_object = object->shadow; 6303 vm_object_lock(tmp_object); 6304 if (object != orig_object) 6305 vm_object_unlock(object); 6306 object = tmp_object; 6307 assert(object->res_count > 0); 6308 } else 6309 break; 6310 } 6311 if (object != orig_object) 6312 vm_object_unlock(object); 6313} 6314 6315/* 6316 * vm_object_res_reference 6317 * 6318 * Internal function to increment residence count on a vm object 6319 * and its shadows. It is called only from vm_object_reference, and 6320 * when swapping in a vm object, via vm_map_swap. 6321 * 6322 * The object is locked, and remains locked throughout the function, 6323 * even as we iterate down the shadow chain. Locks on intermediate objects 6324 * will be dropped, but not the original object. 6325 * 6326 * NOTE: this function used to use recursion, rather than iteration. 6327 */ 6328 6329__private_extern__ void 6330vm_object_res_reference( 6331 vm_object_t object) 6332{ 6333 vm_object_t orig_object = object; 6334 /* 6335 * Object is locked, so this can be called directly 6336 * from vm_object_reference. This lock is never released. 6337 */ 6338 while ((++object->res_count == 1) && 6339 (object->shadow != VM_OBJECT_NULL)) { 6340 vm_object_t tmp_object = object->shadow; 6341 6342 assert(object->ref_count >= object->res_count); 6343 vm_object_lock(tmp_object); 6344 if (object != orig_object) 6345 vm_object_unlock(object); 6346 object = tmp_object; 6347 } 6348 if (object != orig_object) 6349 vm_object_unlock(object); 6350 assert(orig_object->ref_count >= orig_object->res_count); 6351} 6352#endif /* TASK_SWAPPER */ 6353 6354/* 6355 * vm_object_reference: 6356 * 6357 * Gets another reference to the given object. 6358 */ 6359#ifdef vm_object_reference 6360#undef vm_object_reference 6361#endif 6362__private_extern__ void 6363vm_object_reference( 6364 register vm_object_t object) 6365{ 6366 if (object == VM_OBJECT_NULL) 6367 return; 6368 6369 vm_object_lock(object); 6370 assert(object->ref_count > 0); 6371 vm_object_reference_locked(object); 6372 vm_object_unlock(object); 6373} 6374 6375#ifdef MACH_BSD 6376/* 6377 * Scale the vm_object_cache 6378 * This is required to make sure that the vm_object_cache is big 6379 * enough to effectively cache the mapped file. 6380 * This is really important with UBC as all the regular file vnodes 6381 * have memory object associated with them. Havving this cache too 6382 * small results in rapid reclaim of vnodes and hurts performance a LOT! 6383 * 6384 * This is also needed as number of vnodes can be dynamically scaled. 6385 */ 6386kern_return_t 6387adjust_vm_object_cache( 6388 __unused vm_size_t oval, 6389 __unused vm_size_t nval) 6390{ 6391#if VM_OBJECT_CACHE 6392 vm_object_cached_max = nval; 6393 vm_object_cache_trim(FALSE); 6394#endif 6395 return (KERN_SUCCESS); 6396} 6397#endif /* MACH_BSD */ 6398 6399 6400/* 6401 * vm_object_transpose 6402 * 6403 * This routine takes two VM objects of the same size and exchanges 6404 * their backing store. 6405 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE 6406 * and UPL_BLOCK_ACCESS if they are referenced anywhere. 6407 * 6408 * The VM objects must not be locked by caller. 6409 */ 6410unsigned int vm_object_transpose_count = 0; 6411kern_return_t 6412vm_object_transpose( 6413 vm_object_t object1, 6414 vm_object_t object2, 6415 vm_object_size_t transpose_size) 6416{ 6417 vm_object_t tmp_object; 6418 kern_return_t retval; 6419 boolean_t object1_locked, object2_locked; 6420 vm_page_t page; 6421 vm_object_offset_t page_offset; 6422 lck_mtx_t *hash_lck; 6423 vm_object_hash_entry_t hash_entry; 6424 6425 tmp_object = VM_OBJECT_NULL; 6426 object1_locked = FALSE; object2_locked = FALSE; 6427 6428 if (object1 == object2 || 6429 object1 == VM_OBJECT_NULL || 6430 object2 == VM_OBJECT_NULL) { 6431 /* 6432 * If the 2 VM objects are the same, there's 6433 * no point in exchanging their backing store. 6434 */ 6435 retval = KERN_INVALID_VALUE; 6436 goto done; 6437 } 6438 6439 /* 6440 * Since we need to lock both objects at the same time, 6441 * make sure we always lock them in the same order to 6442 * avoid deadlocks. 6443 */ 6444 if (object1 > object2) { 6445 tmp_object = object1; 6446 object1 = object2; 6447 object2 = tmp_object; 6448 } 6449 6450 /* 6451 * Allocate a temporary VM object to hold object1's contents 6452 * while we copy object2 to object1. 6453 */ 6454 tmp_object = vm_object_allocate(transpose_size); 6455 vm_object_lock(tmp_object); 6456 tmp_object->can_persist = FALSE; 6457 6458 6459 /* 6460 * Grab control of the 1st VM object. 6461 */ 6462 vm_object_lock(object1); 6463 object1_locked = TRUE; 6464 if (!object1->alive || object1->terminating || 6465 object1->copy || object1->shadow || object1->shadowed || 6466 object1->purgable != VM_PURGABLE_DENY) { 6467 /* 6468 * We don't deal with copy or shadow objects (yet). 6469 */ 6470 retval = KERN_INVALID_VALUE; 6471 goto done; 6472 } 6473 /* 6474 * We're about to mess with the object's backing store and 6475 * taking a "paging_in_progress" reference wouldn't be enough 6476 * to prevent any paging activity on this object, so the caller should 6477 * have "quiesced" the objects beforehand, via a UPL operation with 6478 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired) 6479 * and UPL_BLOCK_ACCESS (to mark the pages "busy"). 6480 * 6481 * Wait for any paging operation to complete (but only paging, not 6482 * other kind of activities not linked to the pager). After we're 6483 * statisfied that there's no more paging in progress, we keep the 6484 * object locked, to guarantee that no one tries to access its pager. 6485 */ 6486 vm_object_paging_only_wait(object1, THREAD_UNINT); 6487 6488 /* 6489 * Same as above for the 2nd object... 6490 */ 6491 vm_object_lock(object2); 6492 object2_locked = TRUE; 6493 if (! object2->alive || object2->terminating || 6494 object2->copy || object2->shadow || object2->shadowed || 6495 object2->purgable != VM_PURGABLE_DENY) { 6496 retval = KERN_INVALID_VALUE; 6497 goto done; 6498 } 6499 vm_object_paging_only_wait(object2, THREAD_UNINT); 6500 6501 6502 if (object1->vo_size != object2->vo_size || 6503 object1->vo_size != transpose_size) { 6504 /* 6505 * If the 2 objects don't have the same size, we can't 6506 * exchange their backing stores or one would overflow. 6507 * If their size doesn't match the caller's 6508 * "transpose_size", we can't do it either because the 6509 * transpose operation will affect the entire span of 6510 * the objects. 6511 */ 6512 retval = KERN_INVALID_VALUE; 6513 goto done; 6514 } 6515 6516 6517 /* 6518 * Transpose the lists of resident pages. 6519 * This also updates the resident_page_count and the memq_hint. 6520 */ 6521 if (object1->phys_contiguous || queue_empty(&object1->memq)) { 6522 /* 6523 * No pages in object1, just transfer pages 6524 * from object2 to object1. No need to go through 6525 * an intermediate object. 6526 */ 6527 while (!queue_empty(&object2->memq)) { 6528 page = (vm_page_t) queue_first(&object2->memq); 6529 vm_page_rename(page, object1, page->offset, FALSE); 6530 } 6531 assert(queue_empty(&object2->memq)); 6532 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) { 6533 /* 6534 * No pages in object2, just transfer pages 6535 * from object1 to object2. No need to go through 6536 * an intermediate object. 6537 */ 6538 while (!queue_empty(&object1->memq)) { 6539 page = (vm_page_t) queue_first(&object1->memq); 6540 vm_page_rename(page, object2, page->offset, FALSE); 6541 } 6542 assert(queue_empty(&object1->memq)); 6543 } else { 6544 /* transfer object1's pages to tmp_object */ 6545 while (!queue_empty(&object1->memq)) { 6546 page = (vm_page_t) queue_first(&object1->memq); 6547 page_offset = page->offset; 6548 vm_page_remove(page, TRUE); 6549 page->offset = page_offset; 6550 queue_enter(&tmp_object->memq, page, vm_page_t, listq); 6551 } 6552 assert(queue_empty(&object1->memq)); 6553 /* transfer object2's pages to object1 */ 6554 while (!queue_empty(&object2->memq)) { 6555 page = (vm_page_t) queue_first(&object2->memq); 6556 vm_page_rename(page, object1, page->offset, FALSE); 6557 } 6558 assert(queue_empty(&object2->memq)); 6559 /* transfer tmp_object's pages to object1 */ 6560 while (!queue_empty(&tmp_object->memq)) { 6561 page = (vm_page_t) queue_first(&tmp_object->memq); 6562 queue_remove(&tmp_object->memq, page, 6563 vm_page_t, listq); 6564 vm_page_insert(page, object2, page->offset); 6565 } 6566 assert(queue_empty(&tmp_object->memq)); 6567 } 6568 6569#define __TRANSPOSE_FIELD(field) \ 6570MACRO_BEGIN \ 6571 tmp_object->field = object1->field; \ 6572 object1->field = object2->field; \ 6573 object2->field = tmp_object->field; \ 6574MACRO_END 6575 6576 /* "Lock" refers to the object not its contents */ 6577 /* "size" should be identical */ 6578 assert(object1->vo_size == object2->vo_size); 6579 /* "memq_hint" was updated above when transposing pages */ 6580 /* "ref_count" refers to the object not its contents */ 6581#if TASK_SWAPPER 6582 /* "res_count" refers to the object not its contents */ 6583#endif 6584 /* "resident_page_count" was updated above when transposing pages */ 6585 /* "wired_page_count" was updated above when transposing pages */ 6586 /* "reusable_page_count" was updated above when transposing pages */ 6587 /* there should be no "copy" */ 6588 assert(!object1->copy); 6589 assert(!object2->copy); 6590 /* there should be no "shadow" */ 6591 assert(!object1->shadow); 6592 assert(!object2->shadow); 6593 __TRANSPOSE_FIELD(vo_shadow_offset); /* used by phys_contiguous objects */ 6594 __TRANSPOSE_FIELD(pager); 6595 __TRANSPOSE_FIELD(paging_offset); 6596 __TRANSPOSE_FIELD(pager_control); 6597 /* update the memory_objects' pointers back to the VM objects */ 6598 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) { 6599 memory_object_control_collapse(object1->pager_control, 6600 object1); 6601 } 6602 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) { 6603 memory_object_control_collapse(object2->pager_control, 6604 object2); 6605 } 6606 __TRANSPOSE_FIELD(copy_strategy); 6607 /* "paging_in_progress" refers to the object not its contents */ 6608 assert(!object1->paging_in_progress); 6609 assert(!object2->paging_in_progress); 6610 assert(object1->activity_in_progress); 6611 assert(object2->activity_in_progress); 6612 /* "all_wanted" refers to the object not its contents */ 6613 __TRANSPOSE_FIELD(pager_created); 6614 __TRANSPOSE_FIELD(pager_initialized); 6615 __TRANSPOSE_FIELD(pager_ready); 6616 __TRANSPOSE_FIELD(pager_trusted); 6617 __TRANSPOSE_FIELD(can_persist); 6618 __TRANSPOSE_FIELD(internal); 6619 __TRANSPOSE_FIELD(temporary); 6620 __TRANSPOSE_FIELD(private); 6621 __TRANSPOSE_FIELD(pageout); 6622 /* "alive" should be set */ 6623 assert(object1->alive); 6624 assert(object2->alive); 6625 /* "purgeable" should be non-purgeable */ 6626 assert(object1->purgable == VM_PURGABLE_DENY); 6627 assert(object2->purgable == VM_PURGABLE_DENY); 6628 /* "shadowed" refers to the the object not its contents */ 6629 __TRANSPOSE_FIELD(silent_overwrite); 6630 __TRANSPOSE_FIELD(advisory_pageout); 6631 __TRANSPOSE_FIELD(true_share); 6632 /* "terminating" should not be set */ 6633 assert(!object1->terminating); 6634 assert(!object2->terminating); 6635 __TRANSPOSE_FIELD(named); 6636 /* "shadow_severed" refers to the object not its contents */ 6637 __TRANSPOSE_FIELD(phys_contiguous); 6638 __TRANSPOSE_FIELD(nophyscache); 6639 /* "cached_list.next" points to transposed object */ 6640 object1->cached_list.next = (queue_entry_t) object2; 6641 object2->cached_list.next = (queue_entry_t) object1; 6642 /* "cached_list.prev" should be NULL */ 6643 assert(object1->cached_list.prev == NULL); 6644 assert(object2->cached_list.prev == NULL); 6645 /* "msr_q" is linked to the object not its contents */ 6646 assert(queue_empty(&object1->msr_q)); 6647 assert(queue_empty(&object2->msr_q)); 6648 __TRANSPOSE_FIELD(last_alloc); 6649 __TRANSPOSE_FIELD(sequential); 6650 __TRANSPOSE_FIELD(pages_created); 6651 __TRANSPOSE_FIELD(pages_used); 6652 __TRANSPOSE_FIELD(scan_collisions); 6653#if MACH_PAGEMAP 6654 __TRANSPOSE_FIELD(existence_map); 6655#endif 6656 __TRANSPOSE_FIELD(cow_hint); 6657#if MACH_ASSERT 6658 __TRANSPOSE_FIELD(paging_object); 6659#endif 6660 __TRANSPOSE_FIELD(wimg_bits); 6661 __TRANSPOSE_FIELD(set_cache_attr); 6662 __TRANSPOSE_FIELD(code_signed); 6663 if (object1->hashed) { 6664 hash_lck = vm_object_hash_lock_spin(object2->pager); 6665 hash_entry = vm_object_hash_lookup(object2->pager, FALSE); 6666 assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL); 6667 hash_entry->object = object2; 6668 vm_object_hash_unlock(hash_lck); 6669 } 6670 if (object2->hashed) { 6671 hash_lck = vm_object_hash_lock_spin(object1->pager); 6672 hash_entry = vm_object_hash_lookup(object1->pager, FALSE); 6673 assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL); 6674 hash_entry->object = object1; 6675 vm_object_hash_unlock(hash_lck); 6676 } 6677 __TRANSPOSE_FIELD(hashed); 6678 object1->transposed = TRUE; 6679 object2->transposed = TRUE; 6680 __TRANSPOSE_FIELD(mapping_in_progress); 6681 __TRANSPOSE_FIELD(volatile_empty); 6682 __TRANSPOSE_FIELD(volatile_fault); 6683 __TRANSPOSE_FIELD(all_reusable); 6684 assert(object1->blocked_access); 6685 assert(object2->blocked_access); 6686 assert(object1->__object2_unused_bits == 0); 6687 assert(object2->__object2_unused_bits == 0); 6688#if UPL_DEBUG 6689 /* "uplq" refers to the object not its contents (see upl_transpose()) */ 6690#endif 6691 assert(object1->objq.next == NULL); 6692 assert(object1->objq.prev == NULL); 6693 assert(object2->objq.next == NULL); 6694 assert(object2->objq.prev == NULL); 6695 6696#undef __TRANSPOSE_FIELD 6697 6698 retval = KERN_SUCCESS; 6699 6700done: 6701 /* 6702 * Cleanup. 6703 */ 6704 if (tmp_object != VM_OBJECT_NULL) { 6705 vm_object_unlock(tmp_object); 6706 /* 6707 * Re-initialize the temporary object to avoid 6708 * deallocating a real pager. 6709 */ 6710 _vm_object_allocate(transpose_size, tmp_object); 6711 vm_object_deallocate(tmp_object); 6712 tmp_object = VM_OBJECT_NULL; 6713 } 6714 6715 if (object1_locked) { 6716 vm_object_unlock(object1); 6717 object1_locked = FALSE; 6718 } 6719 if (object2_locked) { 6720 vm_object_unlock(object2); 6721 object2_locked = FALSE; 6722 } 6723 6724 vm_object_transpose_count++; 6725 6726 return retval; 6727} 6728 6729 6730/* 6731 * vm_object_cluster_size 6732 * 6733 * Determine how big a cluster we should issue an I/O for... 6734 * 6735 * Inputs: *start == offset of page needed 6736 * *length == maximum cluster pager can handle 6737 * Outputs: *start == beginning offset of cluster 6738 * *length == length of cluster to try 6739 * 6740 * The original *start will be encompassed by the cluster 6741 * 6742 */ 6743extern int speculative_reads_disabled; 6744extern int ignore_is_ssd; 6745 6746#if CONFIG_EMBEDDED 6747unsigned int preheat_pages_max = MAX_UPL_TRANSFER; 6748unsigned int preheat_pages_min = 10; 6749#else 6750unsigned int preheat_pages_max = MAX_UPL_TRANSFER; 6751unsigned int preheat_pages_min = 8; 6752#endif 6753 6754uint32_t pre_heat_scaling[MAX_UPL_TRANSFER + 1]; 6755uint32_t pre_heat_cluster[MAX_UPL_TRANSFER + 1]; 6756 6757 6758__private_extern__ void 6759vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, 6760 vm_size_t *length, vm_object_fault_info_t fault_info, uint32_t *io_streaming) 6761{ 6762 vm_size_t pre_heat_size; 6763 vm_size_t tail_size; 6764 vm_size_t head_size; 6765 vm_size_t max_length; 6766 vm_size_t cluster_size; 6767 vm_object_offset_t object_size; 6768 vm_object_offset_t orig_start; 6769 vm_object_offset_t target_start; 6770 vm_object_offset_t offset; 6771 vm_behavior_t behavior; 6772 boolean_t look_behind = TRUE; 6773 boolean_t look_ahead = TRUE; 6774 boolean_t isSSD = FALSE; 6775 uint32_t throttle_limit; 6776 int sequential_run; 6777 int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; 6778 unsigned int max_ph_size; 6779 unsigned int min_ph_size; 6780 unsigned int min_ph_size_in_bytes; 6781 6782 assert( !(*length & PAGE_MASK)); 6783 assert( !(*start & PAGE_MASK_64)); 6784 6785 /* 6786 * remember maxiumum length of run requested 6787 */ 6788 max_length = *length; 6789 /* 6790 * we'll always return a cluster size of at least 6791 * 1 page, since the original fault must always 6792 * be processed 6793 */ 6794 *length = PAGE_SIZE; 6795 *io_streaming = 0; 6796 6797 if (speculative_reads_disabled || fault_info == NULL) { 6798 /* 6799 * no cluster... just fault the page in 6800 */ 6801 return; 6802 } 6803 orig_start = *start; 6804 target_start = orig_start; 6805 cluster_size = round_page(fault_info->cluster_size); 6806 behavior = fault_info->behavior; 6807 6808 vm_object_lock(object); 6809 6810 if (object->pager == MEMORY_OBJECT_NULL) 6811 goto out; /* pager is gone for this object, nothing more to do */ 6812 6813 if (!ignore_is_ssd) 6814 vnode_pager_get_isSSD(object->pager, &isSSD); 6815 6816 min_ph_size = preheat_pages_min; 6817 max_ph_size = preheat_pages_max; 6818 6819 if (isSSD) { 6820 min_ph_size /= 2; 6821 max_ph_size /= 8; 6822 } 6823 if (min_ph_size < 1) 6824 min_ph_size = 1; 6825 6826 if (max_ph_size < 1) 6827 max_ph_size = 1; 6828 else if (max_ph_size > MAX_UPL_TRANSFER) 6829 max_ph_size = MAX_UPL_TRANSFER; 6830 6831 if (max_length > (max_ph_size * PAGE_SIZE)) 6832 max_length = max_ph_size * PAGE_SIZE; 6833 6834 if (max_length <= PAGE_SIZE) 6835 goto out; 6836 6837 min_ph_size_in_bytes = min_ph_size * PAGE_SIZE; 6838 6839 if (object->internal) 6840 object_size = object->vo_size; 6841 else 6842 vnode_pager_get_object_size(object->pager, &object_size); 6843 6844 object_size = round_page_64(object_size); 6845 6846 if (orig_start >= object_size) { 6847 /* 6848 * fault occurred beyond the EOF... 6849 * we need to punt w/o changing the 6850 * starting offset 6851 */ 6852 goto out; 6853 } 6854 if (object->pages_used > object->pages_created) { 6855 /* 6856 * must have wrapped our 32 bit counters 6857 * so reset 6858 */ 6859 object->pages_used = object->pages_created = 0; 6860 } 6861 if ((sequential_run = object->sequential)) { 6862 if (sequential_run < 0) { 6863 sequential_behavior = VM_BEHAVIOR_RSEQNTL; 6864 sequential_run = 0 - sequential_run; 6865 } else { 6866 sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; 6867 } 6868 6869 } 6870 switch (behavior) { 6871 6872 default: 6873 behavior = VM_BEHAVIOR_DEFAULT; 6874 6875 case VM_BEHAVIOR_DEFAULT: 6876 if (object->internal && fault_info->user_tag == VM_MEMORY_STACK) 6877 goto out; 6878 6879 if (sequential_run >= (3 * PAGE_SIZE)) { 6880 pre_heat_size = sequential_run + PAGE_SIZE; 6881 6882 if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) 6883 look_behind = FALSE; 6884 else 6885 look_ahead = FALSE; 6886 6887 *io_streaming = 1; 6888 } else { 6889 6890 if (object->pages_created < (20 * min_ph_size)) { 6891 /* 6892 * prime the pump 6893 */ 6894 pre_heat_size = min_ph_size_in_bytes; 6895 } else { 6896 /* 6897 * Linear growth in PH size: The maximum size is max_length... 6898 * this cacluation will result in a size that is neither a 6899 * power of 2 nor a multiple of PAGE_SIZE... so round 6900 * it up to the nearest PAGE_SIZE boundary 6901 */ 6902 pre_heat_size = (max_length * object->pages_used) / object->pages_created; 6903 6904 if (pre_heat_size < min_ph_size_in_bytes) 6905 pre_heat_size = min_ph_size_in_bytes; 6906 else 6907 pre_heat_size = round_page(pre_heat_size); 6908 } 6909 } 6910 break; 6911 6912 case VM_BEHAVIOR_RANDOM: 6913 if ((pre_heat_size = cluster_size) <= PAGE_SIZE) 6914 goto out; 6915 break; 6916 6917 case VM_BEHAVIOR_SEQUENTIAL: 6918 if ((pre_heat_size = cluster_size) == 0) 6919 pre_heat_size = sequential_run + PAGE_SIZE; 6920 look_behind = FALSE; 6921 *io_streaming = 1; 6922 6923 break; 6924 6925 case VM_BEHAVIOR_RSEQNTL: 6926 if ((pre_heat_size = cluster_size) == 0) 6927 pre_heat_size = sequential_run + PAGE_SIZE; 6928 look_ahead = FALSE; 6929 *io_streaming = 1; 6930 6931 break; 6932 6933 } 6934 throttle_limit = (uint32_t) max_length; 6935 assert(throttle_limit == max_length); 6936 6937 if (vnode_pager_check_hard_throttle(object->pager, &throttle_limit, *io_streaming) == KERN_SUCCESS) { 6938 if (max_length > throttle_limit) 6939 max_length = throttle_limit; 6940 } 6941 if (pre_heat_size > max_length) 6942 pre_heat_size = max_length; 6943 6944 if (behavior == VM_BEHAVIOR_DEFAULT && (pre_heat_size > min_ph_size_in_bytes)) { 6945 6946 unsigned int consider_free = vm_page_free_count + vm_page_cleaned_count; 6947 6948 if (consider_free < vm_page_throttle_limit) { 6949 pre_heat_size = trunc_page(pre_heat_size / 16); 6950 } else if (consider_free < vm_page_free_target) { 6951 pre_heat_size = trunc_page(pre_heat_size / 4); 6952 } 6953 6954 if (pre_heat_size < min_ph_size_in_bytes) 6955 pre_heat_size = min_ph_size_in_bytes; 6956 } 6957 if (look_ahead == TRUE) { 6958 if (look_behind == TRUE) { 6959 /* 6960 * if we get here its due to a random access... 6961 * so we want to center the original fault address 6962 * within the cluster we will issue... make sure 6963 * to calculate 'head_size' as a multiple of PAGE_SIZE... 6964 * 'pre_heat_size' is a multiple of PAGE_SIZE but not 6965 * necessarily an even number of pages so we need to truncate 6966 * the result to a PAGE_SIZE boundary 6967 */ 6968 head_size = trunc_page(pre_heat_size / 2); 6969 6970 if (target_start > head_size) 6971 target_start -= head_size; 6972 else 6973 target_start = 0; 6974 6975 /* 6976 * 'target_start' at this point represents the beginning offset 6977 * of the cluster we are considering... 'orig_start' will be in 6978 * the center of this cluster if we didn't have to clip the start 6979 * due to running into the start of the file 6980 */ 6981 } 6982 if ((target_start + pre_heat_size) > object_size) 6983 pre_heat_size = (vm_size_t)(round_page_64(object_size - target_start)); 6984 /* 6985 * at this point caclulate the number of pages beyond the original fault 6986 * address that we want to consider... this is guaranteed not to extend beyond 6987 * the current EOF... 6988 */ 6989 assert((vm_size_t)(orig_start - target_start) == (orig_start - target_start)); 6990 tail_size = pre_heat_size - (vm_size_t)(orig_start - target_start) - PAGE_SIZE; 6991 } else { 6992 if (pre_heat_size > target_start) { 6993 /* 6994 * since pre_heat_size is always smaller then 2^32, 6995 * if it is larger then target_start (a 64 bit value) 6996 * it is safe to clip target_start to 32 bits 6997 */ 6998 pre_heat_size = (vm_size_t) target_start; 6999 } 7000 tail_size = 0; 7001 } 7002 assert( !(target_start & PAGE_MASK_64)); 7003 assert( !(pre_heat_size & PAGE_MASK)); 7004 7005 pre_heat_scaling[pre_heat_size / PAGE_SIZE]++; 7006 7007 if (pre_heat_size <= PAGE_SIZE) 7008 goto out; 7009 7010 if (look_behind == TRUE) { 7011 /* 7012 * take a look at the pages before the original 7013 * faulting offset... recalculate this in case 7014 * we had to clip 'pre_heat_size' above to keep 7015 * from running past the EOF. 7016 */ 7017 head_size = pre_heat_size - tail_size - PAGE_SIZE; 7018 7019 for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) { 7020 /* 7021 * don't poke below the lowest offset 7022 */ 7023 if (offset < fault_info->lo_offset) 7024 break; 7025 /* 7026 * for external objects and internal objects w/o an existence map 7027 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN 7028 */ 7029#if MACH_PAGEMAP 7030 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) { 7031 /* 7032 * we know for a fact that the pager can't provide the page 7033 * so don't include it or any pages beyond it in this cluster 7034 */ 7035 break; 7036 } 7037#endif 7038 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { 7039 /* 7040 * don't bridge resident pages 7041 */ 7042 break; 7043 } 7044 *start = offset; 7045 *length += PAGE_SIZE; 7046 } 7047 } 7048 if (look_ahead == TRUE) { 7049 for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) { 7050 /* 7051 * don't poke above the highest offset 7052 */ 7053 if (offset >= fault_info->hi_offset) 7054 break; 7055 assert(offset < object_size); 7056 7057 /* 7058 * for external objects and internal objects w/o an existence map 7059 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN 7060 */ 7061#if MACH_PAGEMAP 7062 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) { 7063 /* 7064 * we know for a fact that the pager can't provide the page 7065 * so don't include it or any pages beyond it in this cluster 7066 */ 7067 break; 7068 } 7069#endif 7070 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { 7071 /* 7072 * don't bridge resident pages 7073 */ 7074 break; 7075 } 7076 *length += PAGE_SIZE; 7077 } 7078 } 7079out: 7080 if (*length > max_length) 7081 *length = max_length; 7082 7083 pre_heat_cluster[*length / PAGE_SIZE]++; 7084 7085 vm_object_unlock(object); 7086 7087 DTRACE_VM1(clustersize, vm_size_t, *length); 7088} 7089 7090 7091/* 7092 * Allow manipulation of individual page state. This is actually part of 7093 * the UPL regimen but takes place on the VM object rather than on a UPL 7094 */ 7095 7096kern_return_t 7097vm_object_page_op( 7098 vm_object_t object, 7099 vm_object_offset_t offset, 7100 int ops, 7101 ppnum_t *phys_entry, 7102 int *flags) 7103{ 7104 vm_page_t dst_page; 7105 7106 vm_object_lock(object); 7107 7108 if(ops & UPL_POP_PHYSICAL) { 7109 if(object->phys_contiguous) { 7110 if (phys_entry) { 7111 *phys_entry = (ppnum_t) 7112 (object->vo_shadow_offset >> PAGE_SHIFT); 7113 } 7114 vm_object_unlock(object); 7115 return KERN_SUCCESS; 7116 } else { 7117 vm_object_unlock(object); 7118 return KERN_INVALID_OBJECT; 7119 } 7120 } 7121 if(object->phys_contiguous) { 7122 vm_object_unlock(object); 7123 return KERN_INVALID_OBJECT; 7124 } 7125 7126 while(TRUE) { 7127 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) { 7128 vm_object_unlock(object); 7129 return KERN_FAILURE; 7130 } 7131 7132 /* Sync up on getting the busy bit */ 7133 if((dst_page->busy || dst_page->cleaning) && 7134 (((ops & UPL_POP_SET) && 7135 (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) { 7136 /* someone else is playing with the page, we will */ 7137 /* have to wait */ 7138 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 7139 continue; 7140 } 7141 7142 if (ops & UPL_POP_DUMP) { 7143 if (dst_page->pmapped == TRUE) 7144 pmap_disconnect(dst_page->phys_page); 7145 7146 VM_PAGE_FREE(dst_page); 7147 break; 7148 } 7149 7150 if (flags) { 7151 *flags = 0; 7152 7153 /* Get the condition of flags before requested ops */ 7154 /* are undertaken */ 7155 7156 if(dst_page->dirty) *flags |= UPL_POP_DIRTY; 7157 if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT; 7158 if(dst_page->precious) *flags |= UPL_POP_PRECIOUS; 7159 if(dst_page->absent) *flags |= UPL_POP_ABSENT; 7160 if(dst_page->busy) *flags |= UPL_POP_BUSY; 7161 } 7162 7163 /* The caller should have made a call either contingent with */ 7164 /* or prior to this call to set UPL_POP_BUSY */ 7165 if(ops & UPL_POP_SET) { 7166 /* The protection granted with this assert will */ 7167 /* not be complete. If the caller violates the */ 7168 /* convention and attempts to change page state */ 7169 /* without first setting busy we may not see it */ 7170 /* because the page may already be busy. However */ 7171 /* if such violations occur we will assert sooner */ 7172 /* or later. */ 7173 assert(dst_page->busy || (ops & UPL_POP_BUSY)); 7174 if (ops & UPL_POP_DIRTY) { 7175 SET_PAGE_DIRTY(dst_page, FALSE); 7176 } 7177 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE; 7178 if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE; 7179 if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE; 7180 if (ops & UPL_POP_BUSY) dst_page->busy = TRUE; 7181 } 7182 7183 if(ops & UPL_POP_CLR) { 7184 assert(dst_page->busy); 7185 if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE; 7186 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE; 7187 if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE; 7188 if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE; 7189 if (ops & UPL_POP_BUSY) { 7190 dst_page->busy = FALSE; 7191 PAGE_WAKEUP(dst_page); 7192 } 7193 } 7194 7195 if (dst_page->encrypted) { 7196 /* 7197 * ENCRYPTED SWAP: 7198 * We need to decrypt this encrypted page before the 7199 * caller can access its contents. 7200 * But if the caller really wants to access the page's 7201 * contents, they have to keep the page "busy". 7202 * Otherwise, the page could get recycled or re-encrypted 7203 * at any time. 7204 */ 7205 if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) && 7206 dst_page->busy) { 7207 /* 7208 * The page is stable enough to be accessed by 7209 * the caller, so make sure its contents are 7210 * not encrypted. 7211 */ 7212 vm_page_decrypt(dst_page, 0); 7213 } else { 7214 /* 7215 * The page is not busy, so don't bother 7216 * decrypting it, since anything could 7217 * happen to it between now and when the 7218 * caller wants to access it. 7219 * We should not give the caller access 7220 * to this page. 7221 */ 7222 assert(!phys_entry); 7223 } 7224 } 7225 7226 if (phys_entry) { 7227 /* 7228 * The physical page number will remain valid 7229 * only if the page is kept busy. 7230 * ENCRYPTED SWAP: make sure we don't let the 7231 * caller access an encrypted page. 7232 */ 7233 assert(dst_page->busy); 7234 assert(!dst_page->encrypted); 7235 *phys_entry = dst_page->phys_page; 7236 } 7237 7238 break; 7239 } 7240 7241 vm_object_unlock(object); 7242 return KERN_SUCCESS; 7243 7244} 7245 7246/* 7247 * vm_object_range_op offers performance enhancement over 7248 * vm_object_page_op for page_op functions which do not require page 7249 * level state to be returned from the call. Page_op was created to provide 7250 * a low-cost alternative to page manipulation via UPLs when only a single 7251 * page was involved. The range_op call establishes the ability in the _op 7252 * family of functions to work on multiple pages where the lack of page level 7253 * state handling allows the caller to avoid the overhead of the upl structures. 7254 */ 7255 7256kern_return_t 7257vm_object_range_op( 7258 vm_object_t object, 7259 vm_object_offset_t offset_beg, 7260 vm_object_offset_t offset_end, 7261 int ops, 7262 uint32_t *range) 7263{ 7264 vm_object_offset_t offset; 7265 vm_page_t dst_page; 7266 7267 if (offset_end - offset_beg > (uint32_t) -1) { 7268 /* range is too big and would overflow "*range" */ 7269 return KERN_INVALID_ARGUMENT; 7270 } 7271 if (object->resident_page_count == 0) { 7272 if (range) { 7273 if (ops & UPL_ROP_PRESENT) { 7274 *range = 0; 7275 } else { 7276 *range = (uint32_t) (offset_end - offset_beg); 7277 assert(*range == (offset_end - offset_beg)); 7278 } 7279 } 7280 return KERN_SUCCESS; 7281 } 7282 vm_object_lock(object); 7283 7284 if (object->phys_contiguous) { 7285 vm_object_unlock(object); 7286 return KERN_INVALID_OBJECT; 7287 } 7288 7289 offset = offset_beg & ~PAGE_MASK_64; 7290 7291 while (offset < offset_end) { 7292 dst_page = vm_page_lookup(object, offset); 7293 if (dst_page != VM_PAGE_NULL) { 7294 if (ops & UPL_ROP_DUMP) { 7295 if (dst_page->busy || dst_page->cleaning) { 7296 /* 7297 * someone else is playing with the 7298 * page, we will have to wait 7299 */ 7300 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 7301 /* 7302 * need to relook the page up since it's 7303 * state may have changed while we slept 7304 * it might even belong to a different object 7305 * at this point 7306 */ 7307 continue; 7308 } 7309 if (dst_page->laundry) { 7310 dst_page->pageout = FALSE; 7311 7312 vm_pageout_steal_laundry(dst_page, FALSE); 7313 } 7314 if (dst_page->pmapped == TRUE) 7315 pmap_disconnect(dst_page->phys_page); 7316 7317 VM_PAGE_FREE(dst_page); 7318 7319 } else if ((ops & UPL_ROP_ABSENT) && !dst_page->absent) 7320 break; 7321 } else if (ops & UPL_ROP_PRESENT) 7322 break; 7323 7324 offset += PAGE_SIZE; 7325 } 7326 vm_object_unlock(object); 7327 7328 if (range) { 7329 if (offset > offset_end) 7330 offset = offset_end; 7331 if(offset > offset_beg) { 7332 *range = (uint32_t) (offset - offset_beg); 7333 assert(*range == (offset - offset_beg)); 7334 } else { 7335 *range = 0; 7336 } 7337 } 7338 return KERN_SUCCESS; 7339} 7340 7341 7342uint32_t scan_object_collision = 0; 7343 7344void 7345vm_object_lock(vm_object_t object) 7346{ 7347 if (object == vm_pageout_scan_wants_object) { 7348 scan_object_collision++; 7349 mutex_pause(2); 7350 } 7351 lck_rw_lock_exclusive(&object->Lock); 7352} 7353 7354boolean_t 7355vm_object_lock_avoid(vm_object_t object) 7356{ 7357 if (object == vm_pageout_scan_wants_object) { 7358 scan_object_collision++; 7359 return TRUE; 7360 } 7361 return FALSE; 7362} 7363 7364boolean_t 7365_vm_object_lock_try(vm_object_t object) 7366{ 7367 return (lck_rw_try_lock_exclusive(&object->Lock)); 7368} 7369 7370boolean_t 7371vm_object_lock_try(vm_object_t object) 7372{ 7373 /* 7374 * Called from hibernate path so check before blocking. 7375 */ 7376 if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled() && get_preemption_level()==0) { 7377 mutex_pause(2); 7378 } 7379 return _vm_object_lock_try(object); 7380} 7381 7382void 7383vm_object_lock_shared(vm_object_t object) 7384{ 7385 if (vm_object_lock_avoid(object)) { 7386 mutex_pause(2); 7387 } 7388 lck_rw_lock_shared(&object->Lock); 7389} 7390 7391boolean_t 7392vm_object_lock_try_shared(vm_object_t object) 7393{ 7394 if (vm_object_lock_avoid(object)) { 7395 mutex_pause(2); 7396 } 7397 return (lck_rw_try_lock_shared(&object->Lock)); 7398} 7399 7400 7401unsigned int vm_object_change_wimg_mode_count = 0; 7402 7403/* 7404 * The object must be locked 7405 */ 7406void 7407vm_object_change_wimg_mode(vm_object_t object, unsigned int wimg_mode) 7408{ 7409 vm_page_t p; 7410 7411 vm_object_lock_assert_exclusive(object); 7412 7413 vm_object_paging_wait(object, THREAD_UNINT); 7414 7415 queue_iterate(&object->memq, p, vm_page_t, listq) { 7416 7417 if (!p->fictitious) 7418 pmap_set_cache_attributes(p->phys_page, wimg_mode); 7419 } 7420 if (wimg_mode == VM_WIMG_USE_DEFAULT) 7421 object->set_cache_attr = FALSE; 7422 else 7423 object->set_cache_attr = TRUE; 7424 7425 object->wimg_bits = wimg_mode; 7426 7427 vm_object_change_wimg_mode_count++; 7428} 7429 7430#if CONFIG_FREEZE 7431 7432kern_return_t vm_object_pack( 7433 unsigned int *purgeable_count, 7434 unsigned int *wired_count, 7435 unsigned int *clean_count, 7436 unsigned int *dirty_count, 7437 unsigned int dirty_budget, 7438 boolean_t *shared, 7439 vm_object_t src_object, 7440 struct default_freezer_handle *df_handle) 7441{ 7442 kern_return_t kr = KERN_SUCCESS; 7443 7444 vm_object_lock(src_object); 7445 7446 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; 7447 *shared = FALSE; 7448 7449 if (!src_object->alive || src_object->terminating){ 7450 kr = KERN_FAILURE; 7451 goto done; 7452 } 7453 7454 if (src_object->purgable == VM_PURGABLE_VOLATILE) { 7455 *purgeable_count = src_object->resident_page_count; 7456 7457 /* If the default freezer handle is null, we're just walking the pages to discover how many can be hibernated */ 7458 if (df_handle != NULL) { 7459 purgeable_q_t queue; 7460 /* object should be on a queue */ 7461 assert(src_object->objq.next != NULL && 7462 src_object->objq.prev != NULL); 7463 queue = vm_purgeable_object_remove(src_object); 7464 assert(queue); 7465 vm_page_lock_queues(); 7466 vm_purgeable_token_delete_first(queue); 7467 vm_page_unlock_queues(); 7468 vm_object_purge(src_object); 7469 } 7470 goto done; 7471 } 7472 7473 if (src_object->ref_count == 1) { 7474 vm_object_pack_pages(wired_count, clean_count, dirty_count, dirty_budget, src_object, df_handle); 7475 } else { 7476 if (src_object->internal) { 7477 *shared = TRUE; 7478 } 7479 } 7480done: 7481 vm_object_unlock(src_object); 7482 7483 return kr; 7484} 7485 7486 7487void 7488vm_object_pack_pages( 7489 unsigned int *wired_count, 7490 unsigned int *clean_count, 7491 unsigned int *dirty_count, 7492 unsigned int dirty_budget, 7493 vm_object_t src_object, 7494 struct default_freezer_handle *df_handle) 7495{ 7496 vm_page_t p, next; 7497 7498 next = (vm_page_t)queue_first(&src_object->memq); 7499 7500 while (!queue_end(&src_object->memq, (queue_entry_t)next)) { 7501 p = next; 7502 next = (vm_page_t)queue_next(&next->listq); 7503 7504 /* Finish up if we've hit our pageout limit */ 7505 if (dirty_budget && (dirty_budget == *dirty_count)) { 7506 break; 7507 } 7508 assert(!p->laundry); 7509 7510 if (p->fictitious || p->busy ) 7511 continue; 7512 7513 if (p->absent || p->unusual || p->error) 7514 continue; 7515 7516 if (VM_PAGE_WIRED(p)) { 7517 (*wired_count)++; 7518 continue; 7519 } 7520 7521 if (df_handle == NULL) { 7522 if (p->dirty || pmap_is_modified(p->phys_page)) { 7523 (*dirty_count)++; 7524 } else { 7525 (*clean_count)++; 7526 } 7527 continue; 7528 } 7529 7530 if (p->cleaning) { 7531 p->pageout = TRUE; 7532 continue; 7533 } 7534 7535 if (p->pmapped == TRUE) { 7536 int refmod_state; 7537 refmod_state = pmap_disconnect(p->phys_page); 7538 if (refmod_state & VM_MEM_MODIFIED) { 7539 SET_PAGE_DIRTY(p, FALSE); 7540 } 7541 } 7542 7543 if (p->dirty) { 7544 default_freezer_pack_page(p, df_handle); 7545 (*dirty_count)++; 7546 } 7547 else { 7548 VM_PAGE_FREE(p); 7549 (*clean_count)++; 7550 } 7551 } 7552} 7553 7554void 7555vm_object_pageout( 7556 vm_object_t object) 7557{ 7558 vm_page_t p, next; 7559 7560 assert(object != VM_OBJECT_NULL ); 7561 7562 vm_object_lock(object); 7563 7564 next = (vm_page_t)queue_first(&object->memq); 7565 7566 while (!queue_end(&object->memq, (queue_entry_t)next)) { 7567 p = next; 7568 next = (vm_page_t)queue_next(&next->listq); 7569 7570 /* Throw to the pageout queue */ 7571 vm_page_lockspin_queues(); 7572 7573 /* 7574 * see if page is already in the process of 7575 * being cleaned... if so, leave it alone 7576 */ 7577 if (!p->laundry) { 7578 VM_PAGE_QUEUES_REMOVE(p); 7579 vm_pageout_cluster(p, TRUE); 7580 } 7581 vm_page_unlock_queues(); 7582 } 7583 7584 vm_object_unlock(object); 7585} 7586 7587kern_return_t 7588vm_object_pagein( 7589 vm_object_t object) 7590{ 7591 memory_object_t pager; 7592 kern_return_t kr; 7593 7594 vm_object_lock(object); 7595 7596 pager = object->pager; 7597 7598 if (!object->pager_ready || pager == MEMORY_OBJECT_NULL) { 7599 vm_object_unlock(object); 7600 return KERN_FAILURE; 7601 } 7602 7603 vm_object_paging_wait(object, THREAD_UNINT); 7604 vm_object_paging_begin(object); 7605 7606 object->blocked_access = TRUE; 7607 vm_object_unlock(object); 7608 7609 kr = memory_object_data_reclaim(pager, TRUE); 7610 7611 vm_object_lock(object); 7612 7613 object->blocked_access = FALSE; 7614 vm_object_paging_end(object); 7615 7616 vm_object_unlock(object); 7617 7618 return kr; 7619} 7620#endif /* CONFIG_FREEZE */ 7621