1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_object.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * 62 * Virtual memory object module. 63 */ 64 65#include <debug.h> 66#include <mach_pagemap.h> 67#include <task_swapper.h> 68 69#include <mach/mach_types.h> 70#include <mach/memory_object.h> 71#include <mach/memory_object_default.h> 72#include <mach/memory_object_control_server.h> 73#include <mach/vm_param.h> 74 75#include <mach/sdt.h> 76 77#include <ipc/ipc_types.h> 78#include <ipc/ipc_port.h> 79 80#include <kern/kern_types.h> 81#include <kern/assert.h> 82#include <kern/queue.h> 83#include <kern/xpr.h> 84#include <kern/kalloc.h> 85#include <kern/zalloc.h> 86#include <kern/host.h> 87#include <kern/host_statistics.h> 88#include <kern/processor.h> 89#include <kern/misc_protos.h> 90 91#include <vm/memory_object.h> 92#include <vm/vm_compressor_pager.h> 93#include <vm/vm_fault.h> 94#include <vm/vm_map.h> 95#include <vm/vm_object.h> 96#include <vm/vm_page.h> 97#include <vm/vm_pageout.h> 98#include <vm/vm_protos.h> 99#include <vm/vm_purgeable_internal.h> 100 101#include <vm/vm_compressor.h> 102 103#if CONFIG_PHANTOM_CACHE 104#include <vm/vm_phantom_cache.h> 105#endif 106 107boolean_t vm_object_collapse_compressor_allowed = TRUE; 108 109struct vm_counters vm_counters; 110 111#if VM_OBJECT_TRACKING 112boolean_t vm_object_tracking_inited = FALSE; 113decl_simple_lock_data(static,vm_object_tracking_lock_data); 114btlog_t *vm_object_tracking_btlog; 115static void 116vm_object_tracking_lock(void *context) 117{ 118 simple_lock((simple_lock_t)context); 119} 120static void 121vm_object_tracking_unlock(void *context) 122{ 123 simple_unlock((simple_lock_t)context); 124} 125void 126vm_object_tracking_init(void) 127{ 128 int vm_object_tracking; 129 130 vm_object_tracking = 1; 131 PE_parse_boot_argn("vm_object_tracking", &vm_object_tracking, 132 sizeof (vm_object_tracking)); 133 134 if (vm_object_tracking) { 135 simple_lock_init(&vm_object_tracking_lock_data, 0); 136 vm_object_tracking_btlog = btlog_create( 137 50000, 138 VM_OBJECT_TRACKING_BTDEPTH, 139 vm_object_tracking_lock, 140 vm_object_tracking_unlock, 141 &vm_object_tracking_lock_data); 142 assert(vm_object_tracking_btlog); 143 vm_object_tracking_inited = TRUE; 144 } 145} 146#endif /* VM_OBJECT_TRACKING */ 147 148/* 149 * Virtual memory objects maintain the actual data 150 * associated with allocated virtual memory. A given 151 * page of memory exists within exactly one object. 152 * 153 * An object is only deallocated when all "references" 154 * are given up. 155 * 156 * Associated with each object is a list of all resident 157 * memory pages belonging to that object; this list is 158 * maintained by the "vm_page" module, but locked by the object's 159 * lock. 160 * 161 * Each object also records the memory object reference 162 * that is used by the kernel to request and write 163 * back data (the memory object, field "pager"), etc... 164 * 165 * Virtual memory objects are allocated to provide 166 * zero-filled memory (vm_allocate) or map a user-defined 167 * memory object into a virtual address space (vm_map). 168 * 169 * Virtual memory objects that refer to a user-defined 170 * memory object are called "permanent", because all changes 171 * made in virtual memory are reflected back to the 172 * memory manager, which may then store it permanently. 173 * Other virtual memory objects are called "temporary", 174 * meaning that changes need be written back only when 175 * necessary to reclaim pages, and that storage associated 176 * with the object can be discarded once it is no longer 177 * mapped. 178 * 179 * A permanent memory object may be mapped into more 180 * than one virtual address space. Moreover, two threads 181 * may attempt to make the first mapping of a memory 182 * object concurrently. Only one thread is allowed to 183 * complete this mapping; all others wait for the 184 * "pager_initialized" field is asserted, indicating 185 * that the first thread has initialized all of the 186 * necessary fields in the virtual memory object structure. 187 * 188 * The kernel relies on a *default memory manager* to 189 * provide backing storage for the zero-filled virtual 190 * memory objects. The pager memory objects associated 191 * with these temporary virtual memory objects are only 192 * requested from the default memory manager when it 193 * becomes necessary. Virtual memory objects 194 * that depend on the default memory manager are called 195 * "internal". The "pager_created" field is provided to 196 * indicate whether these ports have ever been allocated. 197 * 198 * The kernel may also create virtual memory objects to 199 * hold changed pages after a copy-on-write operation. 200 * In this case, the virtual memory object (and its 201 * backing storage -- its memory object) only contain 202 * those pages that have been changed. The "shadow" 203 * field refers to the virtual memory object that contains 204 * the remainder of the contents. The "shadow_offset" 205 * field indicates where in the "shadow" these contents begin. 206 * The "copy" field refers to a virtual memory object 207 * to which changed pages must be copied before changing 208 * this object, in order to implement another form 209 * of copy-on-write optimization. 210 * 211 * The virtual memory object structure also records 212 * the attributes associated with its memory object. 213 * The "pager_ready", "can_persist" and "copy_strategy" 214 * fields represent those attributes. The "cached_list" 215 * field is used in the implementation of the persistence 216 * attribute. 217 * 218 * ZZZ Continue this comment. 219 */ 220 221/* Forward declarations for internal functions. */ 222static kern_return_t vm_object_terminate( 223 vm_object_t object); 224 225extern void vm_object_remove( 226 vm_object_t object); 227 228static kern_return_t vm_object_copy_call( 229 vm_object_t src_object, 230 vm_object_offset_t src_offset, 231 vm_object_size_t size, 232 vm_object_t *_result_object); 233 234static void vm_object_do_collapse( 235 vm_object_t object, 236 vm_object_t backing_object); 237 238static void vm_object_do_bypass( 239 vm_object_t object, 240 vm_object_t backing_object); 241 242static void vm_object_release_pager( 243 memory_object_t pager, 244 boolean_t hashed); 245 246static zone_t vm_object_zone; /* vm backing store zone */ 247 248/* 249 * All wired-down kernel memory belongs to a single virtual 250 * memory object (kernel_object) to avoid wasting data structures. 251 */ 252static struct vm_object kernel_object_store; 253vm_object_t kernel_object; 254 255static struct vm_object compressor_object_store; 256vm_object_t compressor_object = &compressor_object_store; 257 258/* 259 * The submap object is used as a placeholder for vm_map_submap 260 * operations. The object is declared in vm_map.c because it 261 * is exported by the vm_map module. The storage is declared 262 * here because it must be initialized here. 263 */ 264static struct vm_object vm_submap_object_store; 265 266/* 267 * Virtual memory objects are initialized from 268 * a template (see vm_object_allocate). 269 * 270 * When adding a new field to the virtual memory 271 * object structure, be sure to add initialization 272 * (see _vm_object_allocate()). 273 */ 274static struct vm_object vm_object_template; 275 276unsigned int vm_page_purged_wired = 0; 277unsigned int vm_page_purged_busy = 0; 278unsigned int vm_page_purged_others = 0; 279 280#if VM_OBJECT_CACHE 281/* 282 * Virtual memory objects that are not referenced by 283 * any address maps, but that are allowed to persist 284 * (an attribute specified by the associated memory manager), 285 * are kept in a queue (vm_object_cached_list). 286 * 287 * When an object from this queue is referenced again, 288 * for example to make another address space mapping, 289 * it must be removed from the queue. That is, the 290 * queue contains *only* objects with zero references. 291 * 292 * The kernel may choose to terminate objects from this 293 * queue in order to reclaim storage. The current policy 294 * is to permit a fixed maximum number of unreferenced 295 * objects (vm_object_cached_max). 296 * 297 * A spin lock (accessed by routines 298 * vm_object_cache_{lock,lock_try,unlock}) governs the 299 * object cache. It must be held when objects are 300 * added to or removed from the cache (in vm_object_terminate). 301 * The routines that acquire a reference to a virtual 302 * memory object based on one of the memory object ports 303 * must also lock the cache. 304 * 305 * Ideally, the object cache should be more isolated 306 * from the reference mechanism, so that the lock need 307 * not be held to make simple references. 308 */ 309static vm_object_t vm_object_cache_trim( 310 boolean_t called_from_vm_object_deallocate); 311 312static void vm_object_deactivate_all_pages( 313 vm_object_t object); 314 315static int vm_object_cached_high; /* highest # cached objects */ 316static int vm_object_cached_max = 512; /* may be patched*/ 317 318#define vm_object_cache_lock() \ 319 lck_mtx_lock(&vm_object_cached_lock_data) 320#define vm_object_cache_lock_try() \ 321 lck_mtx_try_lock(&vm_object_cached_lock_data) 322 323#endif /* VM_OBJECT_CACHE */ 324 325static queue_head_t vm_object_cached_list; 326static uint32_t vm_object_cache_pages_freed = 0; 327static uint32_t vm_object_cache_pages_moved = 0; 328static uint32_t vm_object_cache_pages_skipped = 0; 329static uint32_t vm_object_cache_adds = 0; 330static uint32_t vm_object_cached_count = 0; 331static lck_mtx_t vm_object_cached_lock_data; 332static lck_mtx_ext_t vm_object_cached_lock_data_ext; 333 334static uint32_t vm_object_page_grab_failed = 0; 335static uint32_t vm_object_page_grab_skipped = 0; 336static uint32_t vm_object_page_grab_returned = 0; 337static uint32_t vm_object_page_grab_pmapped = 0; 338static uint32_t vm_object_page_grab_reactivations = 0; 339 340#define vm_object_cache_lock_spin() \ 341 lck_mtx_lock_spin(&vm_object_cached_lock_data) 342#define vm_object_cache_unlock() \ 343 lck_mtx_unlock(&vm_object_cached_lock_data) 344 345static void vm_object_cache_remove_locked(vm_object_t); 346 347 348#define VM_OBJECT_HASH_COUNT 1024 349#define VM_OBJECT_HASH_LOCK_COUNT 512 350 351static lck_mtx_t vm_object_hashed_lock_data[VM_OBJECT_HASH_LOCK_COUNT]; 352static lck_mtx_ext_t vm_object_hashed_lock_data_ext[VM_OBJECT_HASH_LOCK_COUNT]; 353 354static queue_head_t vm_object_hashtable[VM_OBJECT_HASH_COUNT]; 355static struct zone *vm_object_hash_zone; 356 357struct vm_object_hash_entry { 358 queue_chain_t hash_link; /* hash chain link */ 359 memory_object_t pager; /* pager we represent */ 360 vm_object_t object; /* corresponding object */ 361 boolean_t waiting; /* someone waiting for 362 * termination */ 363}; 364 365typedef struct vm_object_hash_entry *vm_object_hash_entry_t; 366#define VM_OBJECT_HASH_ENTRY_NULL ((vm_object_hash_entry_t) 0) 367 368#define VM_OBJECT_HASH_SHIFT 5 369#define vm_object_hash(pager) \ 370 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_COUNT)) 371 372#define vm_object_lock_hash(pager) \ 373 ((int)((((uintptr_t)pager) >> VM_OBJECT_HASH_SHIFT) % VM_OBJECT_HASH_LOCK_COUNT)) 374 375void vm_object_hash_entry_free( 376 vm_object_hash_entry_t entry); 377 378static void vm_object_reap(vm_object_t object); 379static void vm_object_reap_async(vm_object_t object); 380static void vm_object_reaper_thread(void); 381 382static lck_mtx_t vm_object_reaper_lock_data; 383static lck_mtx_ext_t vm_object_reaper_lock_data_ext; 384 385static queue_head_t vm_object_reaper_queue; /* protected by vm_object_reaper_lock() */ 386unsigned int vm_object_reap_count = 0; 387unsigned int vm_object_reap_count_async = 0; 388 389#define vm_object_reaper_lock() \ 390 lck_mtx_lock(&vm_object_reaper_lock_data) 391#define vm_object_reaper_lock_spin() \ 392 lck_mtx_lock_spin(&vm_object_reaper_lock_data) 393#define vm_object_reaper_unlock() \ 394 lck_mtx_unlock(&vm_object_reaper_lock_data) 395 396#if CONFIG_IOSCHED 397/* I/O Re-prioritization request list */ 398queue_head_t io_reprioritize_list; 399lck_spin_t io_reprioritize_list_lock; 400 401#define IO_REPRIORITIZE_LIST_LOCK() \ 402 lck_spin_lock(&io_reprioritize_list_lock) 403#define IO_REPRIORITIZE_LIST_UNLOCK() \ 404 lck_spin_unlock(&io_reprioritize_list_lock) 405 406#define MAX_IO_REPRIORITIZE_REQS 8192 407zone_t io_reprioritize_req_zone; 408 409/* I/O Re-prioritization thread */ 410int io_reprioritize_wakeup = 0; 411static void io_reprioritize_thread(void *param __unused, wait_result_t wr __unused); 412 413#define IO_REPRIO_THREAD_WAKEUP() thread_wakeup((event_t)&io_reprioritize_wakeup) 414#define IO_REPRIO_THREAD_CONTINUATION() \ 415{ \ 416 assert_wait(&io_reprioritize_wakeup, THREAD_UNINT); \ 417 thread_block(io_reprioritize_thread); \ 418} 419 420void vm_page_request_reprioritize(vm_object_t, uint64_t, uint32_t, int); 421void vm_page_handle_prio_inversion(vm_object_t, vm_page_t); 422void vm_decmp_upl_reprioritize(upl_t, int); 423#endif 424 425#if 0 426#undef KERNEL_DEBUG 427#define KERNEL_DEBUG KERNEL_DEBUG_CONSTANT 428#endif 429 430 431static lck_mtx_t * 432vm_object_hash_lock_spin( 433 memory_object_t pager) 434{ 435 int index; 436 437 index = vm_object_lock_hash(pager); 438 439 lck_mtx_lock_spin(&vm_object_hashed_lock_data[index]); 440 441 return (&vm_object_hashed_lock_data[index]); 442} 443 444static void 445vm_object_hash_unlock(lck_mtx_t *lck) 446{ 447 lck_mtx_unlock(lck); 448} 449 450 451/* 452 * vm_object_hash_lookup looks up a pager in the hashtable 453 * and returns the corresponding entry, with optional removal. 454 */ 455static vm_object_hash_entry_t 456vm_object_hash_lookup( 457 memory_object_t pager, 458 boolean_t remove_entry) 459{ 460 queue_t bucket; 461 vm_object_hash_entry_t entry; 462 463 bucket = &vm_object_hashtable[vm_object_hash(pager)]; 464 465 entry = (vm_object_hash_entry_t)queue_first(bucket); 466 while (!queue_end(bucket, (queue_entry_t)entry)) { 467 if (entry->pager == pager) { 468 if (remove_entry) { 469 queue_remove(bucket, entry, 470 vm_object_hash_entry_t, hash_link); 471 } 472 return(entry); 473 } 474 entry = (vm_object_hash_entry_t)queue_next(&entry->hash_link); 475 } 476 return(VM_OBJECT_HASH_ENTRY_NULL); 477} 478 479/* 480 * vm_object_hash_enter enters the specified 481 * pager / cache object association in the hashtable. 482 */ 483 484static void 485vm_object_hash_insert( 486 vm_object_hash_entry_t entry, 487 vm_object_t object) 488{ 489 queue_t bucket; 490 491 vm_object_lock_assert_exclusive(object); 492 493 bucket = &vm_object_hashtable[vm_object_hash(entry->pager)]; 494 495 queue_enter(bucket, entry, vm_object_hash_entry_t, hash_link); 496 497 entry->object = object; 498 object->hashed = TRUE; 499} 500 501static vm_object_hash_entry_t 502vm_object_hash_entry_alloc( 503 memory_object_t pager) 504{ 505 vm_object_hash_entry_t entry; 506 507 entry = (vm_object_hash_entry_t)zalloc(vm_object_hash_zone); 508 entry->pager = pager; 509 entry->object = VM_OBJECT_NULL; 510 entry->waiting = FALSE; 511 512 return(entry); 513} 514 515void 516vm_object_hash_entry_free( 517 vm_object_hash_entry_t entry) 518{ 519 zfree(vm_object_hash_zone, entry); 520} 521 522/* 523 * vm_object_allocate: 524 * 525 * Returns a new object with the given size. 526 */ 527 528__private_extern__ void 529_vm_object_allocate( 530 vm_object_size_t size, 531 vm_object_t object) 532{ 533 XPR(XPR_VM_OBJECT, 534 "vm_object_allocate, object 0x%X size 0x%X\n", 535 object, size, 0,0,0); 536 537 *object = vm_object_template; 538 queue_init(&object->memq); 539 queue_init(&object->msr_q); 540#if UPL_DEBUG || CONFIG_IOSCHED 541 queue_init(&object->uplq); 542#endif 543 vm_object_lock_init(object); 544 object->vo_size = size; 545 546#if VM_OBJECT_TRACKING_OP_CREATED 547 if (vm_object_tracking_inited) { 548 void *bt[VM_OBJECT_TRACKING_BTDEPTH]; 549 int numsaved = 0; 550 551 numsaved = OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH); 552 btlog_add_entry(vm_object_tracking_btlog, 553 object, 554 VM_OBJECT_TRACKING_OP_CREATED, 555 bt, 556 numsaved); 557 } 558#endif /* VM_OBJECT_TRACKING_OP_CREATED */ 559} 560 561__private_extern__ vm_object_t 562vm_object_allocate( 563 vm_object_size_t size) 564{ 565 register vm_object_t object; 566 567 object = (vm_object_t) zalloc(vm_object_zone); 568 569// dbgLog(object, size, 0, 2); /* (TEST/DEBUG) */ 570 571 if (object != VM_OBJECT_NULL) 572 _vm_object_allocate(size, object); 573 574 return object; 575} 576 577 578lck_grp_t vm_object_lck_grp; 579lck_grp_t vm_object_cache_lck_grp; 580lck_grp_attr_t vm_object_lck_grp_attr; 581lck_attr_t vm_object_lck_attr; 582lck_attr_t kernel_object_lck_attr; 583lck_attr_t compressor_object_lck_attr; 584 585/* 586 * vm_object_bootstrap: 587 * 588 * Initialize the VM objects module. 589 */ 590__private_extern__ void 591vm_object_bootstrap(void) 592{ 593 register int i; 594 595 vm_object_zone = zinit((vm_size_t) sizeof(struct vm_object), 596 round_page(512*1024), 597 round_page(12*1024), 598 "vm objects"); 599 zone_change(vm_object_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ 600 zone_change(vm_object_zone, Z_NOENCRYPT, TRUE); 601 602 vm_object_init_lck_grp(); 603 604 queue_init(&vm_object_cached_list); 605 606 lck_mtx_init_ext(&vm_object_cached_lock_data, 607 &vm_object_cached_lock_data_ext, 608 &vm_object_cache_lck_grp, 609 &vm_object_lck_attr); 610 611 queue_init(&vm_object_reaper_queue); 612 613 for (i = 0; i < VM_OBJECT_HASH_LOCK_COUNT; i++) { 614 lck_mtx_init_ext(&vm_object_hashed_lock_data[i], 615 &vm_object_hashed_lock_data_ext[i], 616 &vm_object_lck_grp, 617 &vm_object_lck_attr); 618 } 619 lck_mtx_init_ext(&vm_object_reaper_lock_data, 620 &vm_object_reaper_lock_data_ext, 621 &vm_object_lck_grp, 622 &vm_object_lck_attr); 623 624 vm_object_hash_zone = 625 zinit((vm_size_t) sizeof (struct vm_object_hash_entry), 626 round_page(512*1024), 627 round_page(12*1024), 628 "vm object hash entries"); 629 zone_change(vm_object_hash_zone, Z_CALLERACCT, FALSE); 630 zone_change(vm_object_hash_zone, Z_NOENCRYPT, TRUE); 631 632 for (i = 0; i < VM_OBJECT_HASH_COUNT; i++) 633 queue_init(&vm_object_hashtable[i]); 634 635 636 /* 637 * Fill in a template object, for quick initialization 638 */ 639 640 /* memq; Lock; init after allocation */ 641 vm_object_template.memq.prev = NULL; 642 vm_object_template.memq.next = NULL; 643#if 0 644 /* 645 * We can't call vm_object_lock_init() here because that will 646 * allocate some memory and VM is not fully initialized yet. 647 * The lock will be initialized for each allocated object in 648 * _vm_object_allocate(), so we don't need to initialize it in 649 * the vm_object_template. 650 */ 651 vm_object_lock_init(&vm_object_template); 652#endif 653 vm_object_template.vo_size = 0; 654 vm_object_template.memq_hint = VM_PAGE_NULL; 655 vm_object_template.ref_count = 1; 656#if TASK_SWAPPER 657 vm_object_template.res_count = 1; 658#endif /* TASK_SWAPPER */ 659 vm_object_template.resident_page_count = 0; 660 vm_object_template.wired_page_count = 0; 661 vm_object_template.reusable_page_count = 0; 662 vm_object_template.copy = VM_OBJECT_NULL; 663 vm_object_template.shadow = VM_OBJECT_NULL; 664 vm_object_template.vo_shadow_offset = (vm_object_offset_t) 0; 665 vm_object_template.pager = MEMORY_OBJECT_NULL; 666 vm_object_template.paging_offset = 0; 667 vm_object_template.pager_control = MEMORY_OBJECT_CONTROL_NULL; 668 vm_object_template.copy_strategy = MEMORY_OBJECT_COPY_SYMMETRIC; 669 vm_object_template.paging_in_progress = 0; 670#if __LP64__ 671 vm_object_template.__object1_unused_bits = 0; 672#endif /* __LP64__ */ 673 vm_object_template.activity_in_progress = 0; 674 675 /* Begin bitfields */ 676 vm_object_template.all_wanted = 0; /* all bits FALSE */ 677 vm_object_template.pager_created = FALSE; 678 vm_object_template.pager_initialized = FALSE; 679 vm_object_template.pager_ready = FALSE; 680 vm_object_template.pager_trusted = FALSE; 681 vm_object_template.can_persist = FALSE; 682 vm_object_template.internal = TRUE; 683 vm_object_template.temporary = TRUE; 684 vm_object_template.private = FALSE; 685 vm_object_template.pageout = FALSE; 686 vm_object_template.alive = TRUE; 687 vm_object_template.purgable = VM_PURGABLE_DENY; 688 vm_object_template.purgeable_when_ripe = FALSE; 689 vm_object_template.shadowed = FALSE; 690 vm_object_template.advisory_pageout = FALSE; 691 vm_object_template.true_share = FALSE; 692 vm_object_template.terminating = FALSE; 693 vm_object_template.named = FALSE; 694 vm_object_template.shadow_severed = FALSE; 695 vm_object_template.phys_contiguous = FALSE; 696 vm_object_template.nophyscache = FALSE; 697 /* End bitfields */ 698 699 vm_object_template.cached_list.prev = NULL; 700 vm_object_template.cached_list.next = NULL; 701 vm_object_template.msr_q.prev = NULL; 702 vm_object_template.msr_q.next = NULL; 703 704 vm_object_template.last_alloc = (vm_object_offset_t) 0; 705 vm_object_template.sequential = (vm_object_offset_t) 0; 706 vm_object_template.pages_created = 0; 707 vm_object_template.pages_used = 0; 708 vm_object_template.scan_collisions = 0; 709#if CONFIG_PHANTOM_CACHE 710 vm_object_template.phantom_object_id = 0; 711#endif 712#if MACH_PAGEMAP 713 vm_object_template.existence_map = VM_EXTERNAL_NULL; 714#endif /* MACH_PAGEMAP */ 715 vm_object_template.cow_hint = ~(vm_offset_t)0; 716#if MACH_ASSERT 717 vm_object_template.paging_object = VM_OBJECT_NULL; 718#endif /* MACH_ASSERT */ 719 720 /* cache bitfields */ 721 vm_object_template.wimg_bits = VM_WIMG_USE_DEFAULT; 722 vm_object_template.set_cache_attr = FALSE; 723 vm_object_template.object_slid = FALSE; 724 vm_object_template.code_signed = FALSE; 725 vm_object_template.hashed = FALSE; 726 vm_object_template.transposed = FALSE; 727 vm_object_template.mapping_in_progress = FALSE; 728 vm_object_template.phantom_isssd = FALSE; 729 vm_object_template.volatile_empty = FALSE; 730 vm_object_template.volatile_fault = FALSE; 731 vm_object_template.all_reusable = FALSE; 732 vm_object_template.blocked_access = FALSE; 733 vm_object_template.__object2_unused_bits = 0; 734#if CONFIG_IOSCHED || UPL_DEBUG 735 vm_object_template.uplq.prev = NULL; 736 vm_object_template.uplq.next = NULL; 737#endif /* UPL_DEBUG */ 738#ifdef VM_PIP_DEBUG 739 bzero(&vm_object_template.pip_holders, 740 sizeof (vm_object_template.pip_holders)); 741#endif /* VM_PIP_DEBUG */ 742 743 vm_object_template.objq.next = NULL; 744 vm_object_template.objq.prev = NULL; 745 746 vm_object_template.purgeable_queue_type = PURGEABLE_Q_TYPE_MAX; 747 vm_object_template.purgeable_queue_group = 0; 748 749 vm_object_template.vo_cache_ts = 0; 750 751#if DEBUG 752 bzero(&vm_object_template.purgeable_owner_bt[0], 753 sizeof (vm_object_template.purgeable_owner_bt)); 754 vm_object_template.vo_purgeable_volatilizer = NULL; 755 bzero(&vm_object_template.purgeable_volatilizer_bt[0], 756 sizeof (vm_object_template.purgeable_volatilizer_bt)); 757#endif /* DEBUG */ 758 759 /* 760 * Initialize the "kernel object" 761 */ 762 763 kernel_object = &kernel_object_store; 764 765/* 766 * Note that in the following size specifications, we need to add 1 because 767 * VM_MAX_KERNEL_ADDRESS (vm_last_addr) is a maximum address, not a size. 768 */ 769 770#ifdef ppc 771 _vm_object_allocate(vm_last_addr + 1, 772 kernel_object); 773#else 774 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, 775 kernel_object); 776 777 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, 778 compressor_object); 779#endif 780 kernel_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 781 compressor_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 782 783 /* 784 * Initialize the "submap object". Make it as large as the 785 * kernel object so that no limit is imposed on submap sizes. 786 */ 787 788 vm_submap_object = &vm_submap_object_store; 789#ifdef ppc 790 _vm_object_allocate(vm_last_addr + 1, 791 vm_submap_object); 792#else 793 _vm_object_allocate(VM_MAX_KERNEL_ADDRESS + 1, 794 vm_submap_object); 795#endif 796 vm_submap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 797 798 /* 799 * Create an "extra" reference to this object so that we never 800 * try to deallocate it; zfree doesn't like to be called with 801 * non-zone memory. 802 */ 803 vm_object_reference(vm_submap_object); 804 805#if MACH_PAGEMAP 806 vm_external_module_initialize(); 807#endif /* MACH_PAGEMAP */ 808} 809 810#if CONFIG_IOSCHED 811void 812vm_io_reprioritize_init(void) 813{ 814 kern_return_t result; 815 thread_t thread = THREAD_NULL; 816 817 /* Initialze the I/O reprioritization subsystem */ 818 lck_spin_init(&io_reprioritize_list_lock, &vm_object_lck_grp, &vm_object_lck_attr); 819 queue_init(&io_reprioritize_list); 820 821 io_reprioritize_req_zone = zinit(sizeof(struct io_reprioritize_req), 822 MAX_IO_REPRIORITIZE_REQS * sizeof(struct io_reprioritize_req), 823 4096, "io_reprioritize_req"); 824 825 result = kernel_thread_start_priority(io_reprioritize_thread, NULL, 95 /* MAXPRI_KERNEL */, &thread); 826 if (result == KERN_SUCCESS) { 827 thread_deallocate(thread); 828 } else { 829 panic("Could not create io_reprioritize_thread"); 830 } 831} 832#endif 833 834void 835vm_object_reaper_init(void) 836{ 837 kern_return_t kr; 838 thread_t thread; 839 840 kr = kernel_thread_start_priority( 841 (thread_continue_t) vm_object_reaper_thread, 842 NULL, 843 BASEPRI_PREEMPT - 1, 844 &thread); 845 if (kr != KERN_SUCCESS) { 846 panic("failed to launch vm_object_reaper_thread kr=0x%x", kr); 847 } 848 thread_deallocate(thread); 849} 850 851__private_extern__ void 852vm_object_init(void) 853{ 854 /* 855 * Finish initializing the kernel object. 856 */ 857} 858 859 860__private_extern__ void 861vm_object_init_lck_grp(void) 862{ 863 /* 864 * initialze the vm_object lock world 865 */ 866 lck_grp_attr_setdefault(&vm_object_lck_grp_attr); 867 lck_grp_init(&vm_object_lck_grp, "vm_object", &vm_object_lck_grp_attr); 868 lck_grp_init(&vm_object_cache_lck_grp, "vm_object_cache", &vm_object_lck_grp_attr); 869 lck_attr_setdefault(&vm_object_lck_attr); 870 lck_attr_setdefault(&kernel_object_lck_attr); 871 lck_attr_cleardebug(&kernel_object_lck_attr); 872 lck_attr_setdefault(&compressor_object_lck_attr); 873 lck_attr_cleardebug(&compressor_object_lck_attr); 874} 875 876#if VM_OBJECT_CACHE 877#define MIGHT_NOT_CACHE_SHADOWS 1 878#if MIGHT_NOT_CACHE_SHADOWS 879static int cache_shadows = TRUE; 880#endif /* MIGHT_NOT_CACHE_SHADOWS */ 881#endif 882 883/* 884 * vm_object_deallocate: 885 * 886 * Release a reference to the specified object, 887 * gained either through a vm_object_allocate 888 * or a vm_object_reference call. When all references 889 * are gone, storage associated with this object 890 * may be relinquished. 891 * 892 * No object may be locked. 893 */ 894unsigned long vm_object_deallocate_shared_successes = 0; 895unsigned long vm_object_deallocate_shared_failures = 0; 896unsigned long vm_object_deallocate_shared_swap_failures = 0; 897__private_extern__ void 898vm_object_deallocate( 899 register vm_object_t object) 900{ 901#if VM_OBJECT_CACHE 902 boolean_t retry_cache_trim = FALSE; 903 uint32_t try_failed_count = 0; 904#endif 905 vm_object_t shadow = VM_OBJECT_NULL; 906 907// if(object)dbgLog(object, object->ref_count, object->can_persist, 3); /* (TEST/DEBUG) */ 908// else dbgLog(object, 0, 0, 3); /* (TEST/DEBUG) */ 909 910 if (object == VM_OBJECT_NULL) 911 return; 912 913 if (object == kernel_object || object == compressor_object) { 914 vm_object_lock_shared(object); 915 916 OSAddAtomic(-1, &object->ref_count); 917 918 if (object->ref_count == 0) { 919 if (object == kernel_object) 920 panic("vm_object_deallocate: losing kernel_object\n"); 921 else 922 panic("vm_object_deallocate: losing compressor_object\n"); 923 } 924 vm_object_unlock(object); 925 return; 926 } 927 928 if (object->ref_count == 2 && 929 object->named) { 930 /* 931 * This "named" object's reference count is about to 932 * drop from 2 to 1: 933 * we'll need to call memory_object_last_unmap(). 934 */ 935 } else if (object->ref_count == 2 && 936 object->internal && 937 object->shadow != VM_OBJECT_NULL) { 938 /* 939 * This internal object's reference count is about to 940 * drop from 2 to 1 and it has a shadow object: 941 * we'll want to try and collapse this object with its 942 * shadow. 943 */ 944 } else if (object->ref_count >= 2) { 945 UInt32 original_ref_count; 946 volatile UInt32 *ref_count_p; 947 Boolean atomic_swap; 948 949 /* 950 * The object currently looks like it is not being 951 * kept alive solely by the reference we're about to release. 952 * Let's try and release our reference without taking 953 * all the locks we would need if we had to terminate the 954 * object (cache lock + exclusive object lock). 955 * Lock the object "shared" to make sure we don't race with 956 * anyone holding it "exclusive". 957 */ 958 vm_object_lock_shared(object); 959 ref_count_p = (volatile UInt32 *) &object->ref_count; 960 original_ref_count = object->ref_count; 961 /* 962 * Test again as "ref_count" could have changed. 963 * "named" shouldn't change. 964 */ 965 if (original_ref_count == 2 && 966 object->named) { 967 /* need to take slow path for m_o_last_unmap() */ 968 atomic_swap = FALSE; 969 } else if (original_ref_count == 2 && 970 object->internal && 971 object->shadow != VM_OBJECT_NULL) { 972 /* need to take slow path for vm_object_collapse() */ 973 atomic_swap = FALSE; 974 } else if (original_ref_count < 2) { 975 /* need to take slow path for vm_object_terminate() */ 976 atomic_swap = FALSE; 977 } else { 978 /* try an atomic update with the shared lock */ 979 atomic_swap = OSCompareAndSwap( 980 original_ref_count, 981 original_ref_count - 1, 982 (UInt32 *) &object->ref_count); 983 if (atomic_swap == FALSE) { 984 vm_object_deallocate_shared_swap_failures++; 985 /* fall back to the slow path... */ 986 } 987 } 988 989 vm_object_unlock(object); 990 991 if (atomic_swap) { 992 /* 993 * ref_count was updated atomically ! 994 */ 995 vm_object_deallocate_shared_successes++; 996 return; 997 } 998 999 /* 1000 * Someone else updated the ref_count at the same 1001 * time and we lost the race. Fall back to the usual 1002 * slow but safe path... 1003 */ 1004 vm_object_deallocate_shared_failures++; 1005 } 1006 1007 while (object != VM_OBJECT_NULL) { 1008 1009 vm_object_lock(object); 1010 1011 assert(object->ref_count > 0); 1012 1013 /* 1014 * If the object has a named reference, and only 1015 * that reference would remain, inform the pager 1016 * about the last "mapping" reference going away. 1017 */ 1018 if ((object->ref_count == 2) && (object->named)) { 1019 memory_object_t pager = object->pager; 1020 1021 /* Notify the Pager that there are no */ 1022 /* more mappers for this object */ 1023 1024 if (pager != MEMORY_OBJECT_NULL) { 1025 vm_object_mapping_wait(object, THREAD_UNINT); 1026 vm_object_mapping_begin(object); 1027 vm_object_unlock(object); 1028 1029 memory_object_last_unmap(pager); 1030 1031 vm_object_lock(object); 1032 vm_object_mapping_end(object); 1033 } 1034 assert(object->ref_count > 0); 1035 } 1036 1037 /* 1038 * Lose the reference. If other references 1039 * remain, then we are done, unless we need 1040 * to retry a cache trim. 1041 * If it is the last reference, then keep it 1042 * until any pending initialization is completed. 1043 */ 1044 1045 /* if the object is terminating, it cannot go into */ 1046 /* the cache and we obviously should not call */ 1047 /* terminate again. */ 1048 1049 if ((object->ref_count > 1) || object->terminating) { 1050 vm_object_lock_assert_exclusive(object); 1051 object->ref_count--; 1052 vm_object_res_deallocate(object); 1053 1054 if (object->ref_count == 1 && 1055 object->shadow != VM_OBJECT_NULL) { 1056 /* 1057 * There's only one reference left on this 1058 * VM object. We can't tell if it's a valid 1059 * one (from a mapping for example) or if this 1060 * object is just part of a possibly stale and 1061 * useless shadow chain. 1062 * We would like to try and collapse it into 1063 * its parent, but we don't have any pointers 1064 * back to this parent object. 1065 * But we can try and collapse this object with 1066 * its own shadows, in case these are useless 1067 * too... 1068 * We can't bypass this object though, since we 1069 * don't know if this last reference on it is 1070 * meaningful or not. 1071 */ 1072 vm_object_collapse(object, 0, FALSE); 1073 } 1074 vm_object_unlock(object); 1075#if VM_OBJECT_CACHE 1076 if (retry_cache_trim && 1077 ((object = vm_object_cache_trim(TRUE)) != 1078 VM_OBJECT_NULL)) { 1079 continue; 1080 } 1081#endif 1082 return; 1083 } 1084 1085 /* 1086 * We have to wait for initialization 1087 * before destroying or caching the object. 1088 */ 1089 1090 if (object->pager_created && ! object->pager_initialized) { 1091 assert(! object->can_persist); 1092 vm_object_assert_wait(object, 1093 VM_OBJECT_EVENT_INITIALIZED, 1094 THREAD_UNINT); 1095 vm_object_unlock(object); 1096 1097 thread_block(THREAD_CONTINUE_NULL); 1098 continue; 1099 } 1100 1101#if VM_OBJECT_CACHE 1102 /* 1103 * If this object can persist, then enter it in 1104 * the cache. Otherwise, terminate it. 1105 * 1106 * NOTE: Only permanent objects are cached, and 1107 * permanent objects cannot have shadows. This 1108 * affects the residence counting logic in a minor 1109 * way (can do it in-line, mostly). 1110 */ 1111 1112 if ((object->can_persist) && (object->alive)) { 1113 /* 1114 * Now it is safe to decrement reference count, 1115 * and to return if reference count is > 0. 1116 */ 1117 1118 vm_object_lock_assert_exclusive(object); 1119 if (--object->ref_count > 0) { 1120 vm_object_res_deallocate(object); 1121 vm_object_unlock(object); 1122 1123 if (retry_cache_trim && 1124 ((object = vm_object_cache_trim(TRUE)) != 1125 VM_OBJECT_NULL)) { 1126 continue; 1127 } 1128 return; 1129 } 1130 1131#if MIGHT_NOT_CACHE_SHADOWS 1132 /* 1133 * Remove shadow now if we don't 1134 * want to cache shadows. 1135 */ 1136 if (! cache_shadows) { 1137 shadow = object->shadow; 1138 object->shadow = VM_OBJECT_NULL; 1139 } 1140#endif /* MIGHT_NOT_CACHE_SHADOWS */ 1141 1142 /* 1143 * Enter the object onto the queue of 1144 * cached objects, and deactivate 1145 * all of its pages. 1146 */ 1147 assert(object->shadow == VM_OBJECT_NULL); 1148 VM_OBJ_RES_DECR(object); 1149 XPR(XPR_VM_OBJECT, 1150 "vm_o_deallocate: adding %x to cache, queue = (%x, %x)\n", 1151 object, 1152 vm_object_cached_list.next, 1153 vm_object_cached_list.prev,0,0); 1154 1155 1156 vm_object_unlock(object); 1157 1158 try_failed_count = 0; 1159 for (;;) { 1160 vm_object_cache_lock(); 1161 1162 /* 1163 * if we try to take a regular lock here 1164 * we risk deadlocking against someone 1165 * holding a lock on this object while 1166 * trying to vm_object_deallocate a different 1167 * object 1168 */ 1169 if (vm_object_lock_try(object)) 1170 break; 1171 vm_object_cache_unlock(); 1172 try_failed_count++; 1173 1174 mutex_pause(try_failed_count); /* wait a bit */ 1175 } 1176 vm_object_cached_count++; 1177 if (vm_object_cached_count > vm_object_cached_high) 1178 vm_object_cached_high = vm_object_cached_count; 1179 queue_enter(&vm_object_cached_list, object, 1180 vm_object_t, cached_list); 1181 vm_object_cache_unlock(); 1182 1183 vm_object_deactivate_all_pages(object); 1184 vm_object_unlock(object); 1185 1186#if MIGHT_NOT_CACHE_SHADOWS 1187 /* 1188 * If we have a shadow that we need 1189 * to deallocate, do so now, remembering 1190 * to trim the cache later. 1191 */ 1192 if (! cache_shadows && shadow != VM_OBJECT_NULL) { 1193 object = shadow; 1194 retry_cache_trim = TRUE; 1195 continue; 1196 } 1197#endif /* MIGHT_NOT_CACHE_SHADOWS */ 1198 1199 /* 1200 * Trim the cache. If the cache trim 1201 * returns with a shadow for us to deallocate, 1202 * then remember to retry the cache trim 1203 * when we are done deallocating the shadow. 1204 * Otherwise, we are done. 1205 */ 1206 1207 object = vm_object_cache_trim(TRUE); 1208 if (object == VM_OBJECT_NULL) { 1209 return; 1210 } 1211 retry_cache_trim = TRUE; 1212 } else 1213#endif /* VM_OBJECT_CACHE */ 1214 { 1215 /* 1216 * This object is not cachable; terminate it. 1217 */ 1218 XPR(XPR_VM_OBJECT, 1219 "vm_o_deallocate: !cacheable 0x%X res %d paging_ops %d thread 0x%p ref %d\n", 1220 object, object->resident_page_count, 1221 object->paging_in_progress, 1222 (void *)current_thread(),object->ref_count); 1223 1224 VM_OBJ_RES_DECR(object); /* XXX ? */ 1225 /* 1226 * Terminate this object. If it had a shadow, 1227 * then deallocate it; otherwise, if we need 1228 * to retry a cache trim, do so now; otherwise, 1229 * we are done. "pageout" objects have a shadow, 1230 * but maintain a "paging reference" rather than 1231 * a normal reference. 1232 */ 1233 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 1234 1235 if (vm_object_terminate(object) != KERN_SUCCESS) { 1236 return; 1237 } 1238 if (shadow != VM_OBJECT_NULL) { 1239 object = shadow; 1240 continue; 1241 } 1242#if VM_OBJECT_CACHE 1243 if (retry_cache_trim && 1244 ((object = vm_object_cache_trim(TRUE)) != 1245 VM_OBJECT_NULL)) { 1246 continue; 1247 } 1248#endif 1249 return; 1250 } 1251 } 1252#if VM_OBJECT_CACHE 1253 assert(! retry_cache_trim); 1254#endif 1255} 1256 1257 1258 1259vm_page_t 1260vm_object_page_grab( 1261 vm_object_t object) 1262{ 1263 vm_page_t p, next_p; 1264 int p_limit = 0; 1265 int p_skipped = 0; 1266 1267 vm_object_lock_assert_exclusive(object); 1268 1269 next_p = (vm_page_t)queue_first(&object->memq); 1270 p_limit = MIN(50, object->resident_page_count); 1271 1272 while (!queue_end(&object->memq, (queue_entry_t)next_p) && --p_limit > 0) { 1273 1274 p = next_p; 1275 next_p = (vm_page_t)queue_next(&next_p->listq); 1276 1277 if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->laundry || p->fictitious) 1278 goto move_page_in_obj; 1279 1280 if (p->pmapped || p->dirty || p->precious) { 1281 vm_page_lockspin_queues(); 1282 1283 if (p->pmapped) { 1284 int refmod_state; 1285 1286 vm_object_page_grab_pmapped++; 1287 1288 if (p->reference == FALSE || p->dirty == FALSE) { 1289 1290 refmod_state = pmap_get_refmod(p->phys_page); 1291 1292 if (refmod_state & VM_MEM_REFERENCED) 1293 p->reference = TRUE; 1294 if (refmod_state & VM_MEM_MODIFIED) { 1295 SET_PAGE_DIRTY(p, FALSE); 1296 } 1297 } 1298 if (p->dirty == FALSE && p->precious == FALSE) { 1299 1300 refmod_state = pmap_disconnect(p->phys_page); 1301 1302 if (refmod_state & VM_MEM_REFERENCED) 1303 p->reference = TRUE; 1304 if (refmod_state & VM_MEM_MODIFIED) { 1305 SET_PAGE_DIRTY(p, FALSE); 1306 } 1307 1308 if (p->dirty == FALSE) 1309 goto take_page; 1310 } 1311 } 1312 if (p->inactive && p->reference == TRUE) { 1313 vm_page_activate(p); 1314 1315 VM_STAT_INCR(reactivations); 1316 vm_object_page_grab_reactivations++; 1317 } 1318 vm_page_unlock_queues(); 1319move_page_in_obj: 1320 queue_remove(&object->memq, p, vm_page_t, listq); 1321 queue_enter(&object->memq, p, vm_page_t, listq); 1322 1323 p_skipped++; 1324 continue; 1325 } 1326 vm_page_lockspin_queues(); 1327take_page: 1328 vm_page_free_prepare_queues(p); 1329 vm_object_page_grab_returned++; 1330 vm_object_page_grab_skipped += p_skipped; 1331 1332 vm_page_unlock_queues(); 1333 1334 vm_page_free_prepare_object(p, TRUE); 1335 1336 return (p); 1337 } 1338 vm_object_page_grab_skipped += p_skipped; 1339 vm_object_page_grab_failed++; 1340 1341 return (NULL); 1342} 1343 1344 1345 1346#define EVICT_PREPARE_LIMIT 64 1347#define EVICT_AGE 10 1348 1349static clock_sec_t vm_object_cache_aging_ts = 0; 1350 1351static void 1352vm_object_cache_remove_locked( 1353 vm_object_t object) 1354{ 1355 queue_remove(&vm_object_cached_list, object, vm_object_t, objq); 1356 object->objq.next = NULL; 1357 object->objq.prev = NULL; 1358 1359 vm_object_cached_count--; 1360} 1361 1362void 1363vm_object_cache_remove( 1364 vm_object_t object) 1365{ 1366 vm_object_cache_lock_spin(); 1367 1368 if (object->objq.next || object->objq.prev) 1369 vm_object_cache_remove_locked(object); 1370 1371 vm_object_cache_unlock(); 1372} 1373 1374void 1375vm_object_cache_add( 1376 vm_object_t object) 1377{ 1378 clock_sec_t sec; 1379 clock_nsec_t nsec; 1380 1381 if (object->resident_page_count == 0) 1382 return; 1383 clock_get_system_nanotime(&sec, &nsec); 1384 1385 vm_object_cache_lock_spin(); 1386 1387 if (object->objq.next == NULL && object->objq.prev == NULL) { 1388 queue_enter(&vm_object_cached_list, object, vm_object_t, objq); 1389 object->vo_cache_ts = sec + EVICT_AGE; 1390 object->vo_cache_pages_to_scan = object->resident_page_count; 1391 1392 vm_object_cached_count++; 1393 vm_object_cache_adds++; 1394 } 1395 vm_object_cache_unlock(); 1396} 1397 1398int 1399vm_object_cache_evict( 1400 int num_to_evict, 1401 int max_objects_to_examine) 1402{ 1403 vm_object_t object = VM_OBJECT_NULL; 1404 vm_object_t next_obj = VM_OBJECT_NULL; 1405 vm_page_t local_free_q = VM_PAGE_NULL; 1406 vm_page_t p; 1407 vm_page_t next_p; 1408 int object_cnt = 0; 1409 vm_page_t ep_array[EVICT_PREPARE_LIMIT]; 1410 int ep_count; 1411 int ep_limit; 1412 int ep_index; 1413 int ep_freed = 0; 1414 int ep_moved = 0; 1415 uint32_t ep_skipped = 0; 1416 clock_sec_t sec; 1417 clock_nsec_t nsec; 1418 1419 KERNEL_DEBUG(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0); 1420 /* 1421 * do a couple of quick checks to see if it's 1422 * worthwhile grabbing the lock 1423 */ 1424 if (queue_empty(&vm_object_cached_list)) { 1425 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, 0, 0, 0, 0, 0); 1426 return (0); 1427 } 1428 clock_get_system_nanotime(&sec, &nsec); 1429 1430 /* 1431 * the object on the head of the queue has not 1432 * yet sufficiently aged 1433 */ 1434 if (sec < vm_object_cache_aging_ts) { 1435 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, 0, 0, 0, 0, 0); 1436 return (0); 1437 } 1438 /* 1439 * don't need the queue lock to find 1440 * and lock an object on the cached list 1441 */ 1442 vm_page_unlock_queues(); 1443 1444 vm_object_cache_lock_spin(); 1445 1446 for (;;) { 1447 next_obj = (vm_object_t)queue_first(&vm_object_cached_list); 1448 1449 while (!queue_end(&vm_object_cached_list, (queue_entry_t)next_obj) && object_cnt++ < max_objects_to_examine) { 1450 1451 object = next_obj; 1452 next_obj = (vm_object_t)queue_next(&next_obj->objq); 1453 1454 if (sec < object->vo_cache_ts) { 1455 KERNEL_DEBUG(0x130020c, object, object->resident_page_count, object->vo_cache_ts, sec, 0); 1456 1457 vm_object_cache_aging_ts = object->vo_cache_ts; 1458 object = VM_OBJECT_NULL; 1459 break; 1460 } 1461 if (!vm_object_lock_try_scan(object)) { 1462 /* 1463 * just skip over this guy for now... if we find 1464 * an object to steal pages from, we'll revist in a bit... 1465 * hopefully, the lock will have cleared 1466 */ 1467 KERNEL_DEBUG(0x13001f8, object, object->resident_page_count, 0, 0, 0); 1468 1469 object = VM_OBJECT_NULL; 1470 continue; 1471 } 1472 if (queue_empty(&object->memq) || object->vo_cache_pages_to_scan == 0) { 1473 /* 1474 * this case really shouldn't happen, but it's not fatal 1475 * so deal with it... if we don't remove the object from 1476 * the list, we'll never move past it. 1477 */ 1478 KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0); 1479 1480 vm_object_cache_remove_locked(object); 1481 vm_object_unlock(object); 1482 object = VM_OBJECT_NULL; 1483 continue; 1484 } 1485 /* 1486 * we have a locked object with pages... 1487 * time to start harvesting 1488 */ 1489 break; 1490 } 1491 vm_object_cache_unlock(); 1492 1493 if (object == VM_OBJECT_NULL) 1494 break; 1495 1496 /* 1497 * object is locked at this point and 1498 * has resident pages 1499 */ 1500 next_p = (vm_page_t)queue_first(&object->memq); 1501 1502 /* 1503 * break the page scan into 2 pieces to minimize the time spent 1504 * behind the page queue lock... 1505 * the list of pages on these unused objects is likely to be cold 1506 * w/r to the cpu cache which increases the time to scan the list 1507 * tenfold... and we may have a 'run' of pages we can't utilize that 1508 * needs to be skipped over... 1509 */ 1510 if ((ep_limit = num_to_evict - (ep_freed + ep_moved)) > EVICT_PREPARE_LIMIT) 1511 ep_limit = EVICT_PREPARE_LIMIT; 1512 ep_count = 0; 1513 1514 while (!queue_end(&object->memq, (queue_entry_t)next_p) && object->vo_cache_pages_to_scan && ep_count < ep_limit) { 1515 1516 p = next_p; 1517 next_p = (vm_page_t)queue_next(&next_p->listq); 1518 1519 object->vo_cache_pages_to_scan--; 1520 1521 if (VM_PAGE_WIRED(p) || p->busy || p->cleaning || p->laundry) { 1522 queue_remove(&object->memq, p, vm_page_t, listq); 1523 queue_enter(&object->memq, p, vm_page_t, listq); 1524 1525 ep_skipped++; 1526 continue; 1527 } 1528 if (p->wpmapped || p->dirty || p->precious) { 1529 queue_remove(&object->memq, p, vm_page_t, listq); 1530 queue_enter(&object->memq, p, vm_page_t, listq); 1531 1532 pmap_clear_reference(p->phys_page); 1533 } 1534 ep_array[ep_count++] = p; 1535 } 1536 KERNEL_DEBUG(0x13001f4 | DBG_FUNC_START, object, object->resident_page_count, ep_freed, ep_moved, 0); 1537 1538 vm_page_lockspin_queues(); 1539 1540 for (ep_index = 0; ep_index < ep_count; ep_index++) { 1541 1542 p = ep_array[ep_index]; 1543 1544 if (p->wpmapped || p->dirty || p->precious) { 1545 p->reference = FALSE; 1546 p->no_cache = FALSE; 1547 1548 /* 1549 * we've already filtered out pages that are in the laundry 1550 * so if we get here, this page can't be on the pageout queue 1551 */ 1552 assert(!p->pageout_queue); 1553 1554 VM_PAGE_QUEUES_REMOVE(p); 1555 VM_PAGE_ENQUEUE_INACTIVE(p, TRUE); 1556 1557 ep_moved++; 1558 } else { 1559#if CONFIG_PHANTOM_CACHE 1560 vm_phantom_cache_add_ghost(p); 1561#endif 1562 vm_page_free_prepare_queues(p); 1563 1564 assert(p->pageq.next == NULL && p->pageq.prev == NULL); 1565 /* 1566 * Add this page to our list of reclaimed pages, 1567 * to be freed later. 1568 */ 1569 p->pageq.next = (queue_entry_t) local_free_q; 1570 local_free_q = p; 1571 1572 ep_freed++; 1573 } 1574 } 1575 vm_page_unlock_queues(); 1576 1577 KERNEL_DEBUG(0x13001f4 | DBG_FUNC_END, object, object->resident_page_count, ep_freed, ep_moved, 0); 1578 1579 if (local_free_q) { 1580 vm_page_free_list(local_free_q, TRUE); 1581 local_free_q = VM_PAGE_NULL; 1582 } 1583 if (object->vo_cache_pages_to_scan == 0) { 1584 KERNEL_DEBUG(0x1300208, object, object->resident_page_count, ep_freed, ep_moved, 0); 1585 1586 vm_object_cache_remove(object); 1587 1588 KERNEL_DEBUG(0x13001fc, object, object->resident_page_count, ep_freed, ep_moved, 0); 1589 } 1590 /* 1591 * done with this object 1592 */ 1593 vm_object_unlock(object); 1594 object = VM_OBJECT_NULL; 1595 1596 /* 1597 * at this point, we are not holding any locks 1598 */ 1599 if ((ep_freed + ep_moved) >= num_to_evict) { 1600 /* 1601 * we've reached our target for the 1602 * number of pages to evict 1603 */ 1604 break; 1605 } 1606 vm_object_cache_lock_spin(); 1607 } 1608 /* 1609 * put the page queues lock back to the caller's 1610 * idea of it 1611 */ 1612 vm_page_lock_queues(); 1613 1614 vm_object_cache_pages_freed += ep_freed; 1615 vm_object_cache_pages_moved += ep_moved; 1616 vm_object_cache_pages_skipped += ep_skipped; 1617 1618 KERNEL_DEBUG(0x13001ec | DBG_FUNC_END, ep_freed, 0, 0, 0, 0); 1619 return (ep_freed); 1620} 1621 1622 1623#if VM_OBJECT_CACHE 1624/* 1625 * Check to see whether we really need to trim 1626 * down the cache. If so, remove an object from 1627 * the cache, terminate it, and repeat. 1628 * 1629 * Called with, and returns with, cache lock unlocked. 1630 */ 1631vm_object_t 1632vm_object_cache_trim( 1633 boolean_t called_from_vm_object_deallocate) 1634{ 1635 register vm_object_t object = VM_OBJECT_NULL; 1636 vm_object_t shadow; 1637 1638 for (;;) { 1639 1640 /* 1641 * If we no longer need to trim the cache, 1642 * then we are done. 1643 */ 1644 if (vm_object_cached_count <= vm_object_cached_max) 1645 return VM_OBJECT_NULL; 1646 1647 vm_object_cache_lock(); 1648 if (vm_object_cached_count <= vm_object_cached_max) { 1649 vm_object_cache_unlock(); 1650 return VM_OBJECT_NULL; 1651 } 1652 1653 /* 1654 * We must trim down the cache, so remove 1655 * the first object in the cache. 1656 */ 1657 XPR(XPR_VM_OBJECT, 1658 "vm_object_cache_trim: removing from front of cache (%x, %x)\n", 1659 vm_object_cached_list.next, 1660 vm_object_cached_list.prev, 0, 0, 0); 1661 1662 object = (vm_object_t) queue_first(&vm_object_cached_list); 1663 if(object == (vm_object_t) &vm_object_cached_list) { 1664 /* something's wrong with the calling parameter or */ 1665 /* the value of vm_object_cached_count, just fix */ 1666 /* and return */ 1667 if(vm_object_cached_max < 0) 1668 vm_object_cached_max = 0; 1669 vm_object_cached_count = 0; 1670 vm_object_cache_unlock(); 1671 return VM_OBJECT_NULL; 1672 } 1673 vm_object_lock(object); 1674 queue_remove(&vm_object_cached_list, object, vm_object_t, 1675 cached_list); 1676 vm_object_cached_count--; 1677 1678 vm_object_cache_unlock(); 1679 /* 1680 * Since this object is in the cache, we know 1681 * that it is initialized and has no references. 1682 * Take a reference to avoid recursive deallocations. 1683 */ 1684 1685 assert(object->pager_initialized); 1686 assert(object->ref_count == 0); 1687 vm_object_lock_assert_exclusive(object); 1688 object->ref_count++; 1689 1690 /* 1691 * Terminate the object. 1692 * If the object had a shadow, we let vm_object_deallocate 1693 * deallocate it. "pageout" objects have a shadow, but 1694 * maintain a "paging reference" rather than a normal 1695 * reference. 1696 * (We are careful here to limit recursion.) 1697 */ 1698 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 1699 1700 if(vm_object_terminate(object) != KERN_SUCCESS) 1701 continue; 1702 1703 if (shadow != VM_OBJECT_NULL) { 1704 if (called_from_vm_object_deallocate) { 1705 return shadow; 1706 } else { 1707 vm_object_deallocate(shadow); 1708 } 1709 } 1710 } 1711} 1712#endif 1713 1714 1715/* 1716 * Routine: vm_object_terminate 1717 * Purpose: 1718 * Free all resources associated with a vm_object. 1719 * In/out conditions: 1720 * Upon entry, the object must be locked, 1721 * and the object must have exactly one reference. 1722 * 1723 * The shadow object reference is left alone. 1724 * 1725 * The object must be unlocked if its found that pages 1726 * must be flushed to a backing object. If someone 1727 * manages to map the object while it is being flushed 1728 * the object is returned unlocked and unchanged. Otherwise, 1729 * upon exit, the cache will be unlocked, and the 1730 * object will cease to exist. 1731 */ 1732static kern_return_t 1733vm_object_terminate( 1734 vm_object_t object) 1735{ 1736 vm_object_t shadow_object; 1737 1738 XPR(XPR_VM_OBJECT, "vm_object_terminate, object 0x%X ref %d\n", 1739 object, object->ref_count, 0, 0, 0); 1740 1741 if (!object->pageout && (!object->temporary || object->can_persist) && 1742 (object->pager != NULL || object->shadow_severed)) { 1743 /* 1744 * Clear pager_trusted bit so that the pages get yanked 1745 * out of the object instead of cleaned in place. This 1746 * prevents a deadlock in XMM and makes more sense anyway. 1747 */ 1748 object->pager_trusted = FALSE; 1749 1750 vm_object_reap_pages(object, REAP_TERMINATE); 1751 } 1752 /* 1753 * Make sure the object isn't already being terminated 1754 */ 1755 if (object->terminating) { 1756 vm_object_lock_assert_exclusive(object); 1757 object->ref_count--; 1758 assert(object->ref_count > 0); 1759 vm_object_unlock(object); 1760 return KERN_FAILURE; 1761 } 1762 1763 /* 1764 * Did somebody get a reference to the object while we were 1765 * cleaning it? 1766 */ 1767 if (object->ref_count != 1) { 1768 vm_object_lock_assert_exclusive(object); 1769 object->ref_count--; 1770 assert(object->ref_count > 0); 1771 vm_object_res_deallocate(object); 1772 vm_object_unlock(object); 1773 return KERN_FAILURE; 1774 } 1775 1776 /* 1777 * Make sure no one can look us up now. 1778 */ 1779 1780 object->terminating = TRUE; 1781 object->alive = FALSE; 1782 1783 if ( !object->internal && (object->objq.next || object->objq.prev)) 1784 vm_object_cache_remove(object); 1785 1786 if (object->hashed) { 1787 lck_mtx_t *lck; 1788 1789 lck = vm_object_hash_lock_spin(object->pager); 1790 vm_object_remove(object); 1791 vm_object_hash_unlock(lck); 1792 } 1793 /* 1794 * Detach the object from its shadow if we are the shadow's 1795 * copy. The reference we hold on the shadow must be dropped 1796 * by our caller. 1797 */ 1798 if (((shadow_object = object->shadow) != VM_OBJECT_NULL) && 1799 !(object->pageout)) { 1800 vm_object_lock(shadow_object); 1801 if (shadow_object->copy == object) 1802 shadow_object->copy = VM_OBJECT_NULL; 1803 vm_object_unlock(shadow_object); 1804 } 1805 1806 if (object->paging_in_progress != 0 || 1807 object->activity_in_progress != 0) { 1808 /* 1809 * There are still some paging_in_progress references 1810 * on this object, meaning that there are some paging 1811 * or other I/O operations in progress for this VM object. 1812 * Such operations take some paging_in_progress references 1813 * up front to ensure that the object doesn't go away, but 1814 * they may also need to acquire a reference on the VM object, 1815 * to map it in kernel space, for example. That means that 1816 * they may end up releasing the last reference on the VM 1817 * object, triggering its termination, while still holding 1818 * paging_in_progress references. Waiting for these 1819 * pending paging_in_progress references to go away here would 1820 * deadlock. 1821 * 1822 * To avoid deadlocking, we'll let the vm_object_reaper_thread 1823 * complete the VM object termination if it still holds 1824 * paging_in_progress references at this point. 1825 * 1826 * No new paging_in_progress should appear now that the 1827 * VM object is "terminating" and not "alive". 1828 */ 1829 vm_object_reap_async(object); 1830 vm_object_unlock(object); 1831 /* 1832 * Return KERN_FAILURE to let the caller know that we 1833 * haven't completed the termination and it can't drop this 1834 * object's reference on its shadow object yet. 1835 * The reaper thread will take care of that once it has 1836 * completed this object's termination. 1837 */ 1838 return KERN_FAILURE; 1839 } 1840 /* 1841 * complete the VM object termination 1842 */ 1843 vm_object_reap(object); 1844 object = VM_OBJECT_NULL; 1845 1846 /* 1847 * the object lock was released by vm_object_reap() 1848 * 1849 * KERN_SUCCESS means that this object has been terminated 1850 * and no longer needs its shadow object but still holds a 1851 * reference on it. 1852 * The caller is responsible for dropping that reference. 1853 * We can't call vm_object_deallocate() here because that 1854 * would create a recursion. 1855 */ 1856 return KERN_SUCCESS; 1857} 1858 1859 1860/* 1861 * vm_object_reap(): 1862 * 1863 * Complete the termination of a VM object after it's been marked 1864 * as "terminating" and "!alive" by vm_object_terminate(). 1865 * 1866 * The VM object must be locked by caller. 1867 * The lock will be released on return and the VM object is no longer valid. 1868 */ 1869void 1870vm_object_reap( 1871 vm_object_t object) 1872{ 1873 memory_object_t pager; 1874 1875 vm_object_lock_assert_exclusive(object); 1876 assert(object->paging_in_progress == 0); 1877 assert(object->activity_in_progress == 0); 1878 1879 vm_object_reap_count++; 1880 1881 /* 1882 * Disown this purgeable object to cleanup its owner's purgeable 1883 * ledgers. We need to do this before disconnecting the object 1884 * from its pager, to properly account for compressed pages. 1885 */ 1886 if (object->internal && 1887 object->purgable != VM_PURGABLE_DENY) { 1888 vm_purgeable_accounting(object, 1889 object->purgable, 1890 TRUE); /* disown */ 1891 } 1892 1893 pager = object->pager; 1894 object->pager = MEMORY_OBJECT_NULL; 1895 1896 if (pager != MEMORY_OBJECT_NULL) 1897 memory_object_control_disable(object->pager_control); 1898 1899 object->ref_count--; 1900#if TASK_SWAPPER 1901 assert(object->res_count == 0); 1902#endif /* TASK_SWAPPER */ 1903 1904 assert (object->ref_count == 0); 1905 1906 /* 1907 * remove from purgeable queue if it's on 1908 */ 1909 if (object->internal) { 1910 task_t owner; 1911 1912 owner = object->vo_purgeable_owner; 1913 1914 if (object->purgable == VM_PURGABLE_DENY) { 1915 /* not purgeable: nothing to do */ 1916 } else if (object->purgable == VM_PURGABLE_VOLATILE) { 1917 purgeable_q_t queue; 1918 1919 assert(object->vo_purgeable_owner == NULL); 1920 1921 queue = vm_purgeable_object_remove(object); 1922 assert(queue); 1923 1924 if (object->purgeable_when_ripe) { 1925 /* 1926 * Must take page lock for this - 1927 * using it to protect token queue 1928 */ 1929 vm_page_lock_queues(); 1930 vm_purgeable_token_delete_first(queue); 1931 1932 assert(queue->debug_count_objects>=0); 1933 vm_page_unlock_queues(); 1934 } 1935 1936 /* 1937 * Update "vm_page_purgeable_count" in bulk and mark 1938 * object as VM_PURGABLE_EMPTY to avoid updating 1939 * "vm_page_purgeable_count" again in vm_page_remove() 1940 * when reaping the pages. 1941 */ 1942 unsigned int delta; 1943 assert(object->resident_page_count >= 1944 object->wired_page_count); 1945 delta = (object->resident_page_count - 1946 object->wired_page_count); 1947 if (delta != 0) { 1948 assert(vm_page_purgeable_count >= delta); 1949 OSAddAtomic(-delta, 1950 (SInt32 *)&vm_page_purgeable_count); 1951 } 1952 if (object->wired_page_count != 0) { 1953 assert(vm_page_purgeable_wired_count >= 1954 object->wired_page_count); 1955 OSAddAtomic(-object->wired_page_count, 1956 (SInt32 *)&vm_page_purgeable_wired_count); 1957 } 1958 object->purgable = VM_PURGABLE_EMPTY; 1959 } 1960 else if (object->purgable == VM_PURGABLE_NONVOLATILE || 1961 object->purgable == VM_PURGABLE_EMPTY) { 1962 /* remove from nonvolatile queue */ 1963 assert(object->vo_purgeable_owner == TASK_NULL); 1964 vm_purgeable_nonvolatile_dequeue(object); 1965 } else { 1966 panic("object %p in unexpected purgeable state 0x%x\n", 1967 object, object->purgable); 1968 } 1969 assert(object->objq.next == NULL); 1970 assert(object->objq.prev == NULL); 1971 } 1972 1973 /* 1974 * Clean or free the pages, as appropriate. 1975 * It is possible for us to find busy/absent pages, 1976 * if some faults on this object were aborted. 1977 */ 1978 if (object->pageout) { 1979 assert(object->shadow != VM_OBJECT_NULL); 1980 1981 vm_pageout_object_terminate(object); 1982 1983 } else if (((object->temporary && !object->can_persist) || (pager == MEMORY_OBJECT_NULL))) { 1984 1985 vm_object_reap_pages(object, REAP_REAP); 1986 } 1987 assert(queue_empty(&object->memq)); 1988 assert(object->paging_in_progress == 0); 1989 assert(object->activity_in_progress == 0); 1990 assert(object->ref_count == 0); 1991 1992 /* 1993 * If the pager has not already been released by 1994 * vm_object_destroy, we need to terminate it and 1995 * release our reference to it here. 1996 */ 1997 if (pager != MEMORY_OBJECT_NULL) { 1998 vm_object_unlock(object); 1999 vm_object_release_pager(pager, object->hashed); 2000 vm_object_lock(object); 2001 } 2002 2003 /* kick off anyone waiting on terminating */ 2004 object->terminating = FALSE; 2005 vm_object_paging_begin(object); 2006 vm_object_paging_end(object); 2007 vm_object_unlock(object); 2008 2009#if MACH_PAGEMAP 2010 vm_external_destroy(object->existence_map, object->vo_size); 2011#endif /* MACH_PAGEMAP */ 2012 2013 object->shadow = VM_OBJECT_NULL; 2014 2015#if VM_OBJECT_TRACKING 2016 if (vm_object_tracking_inited) { 2017 btlog_remove_entries_for_element(vm_object_tracking_btlog, 2018 object); 2019 } 2020#endif /* VM_OBJECT_TRACKING */ 2021 2022 vm_object_lock_destroy(object); 2023 /* 2024 * Free the space for the object. 2025 */ 2026 zfree(vm_object_zone, object); 2027 object = VM_OBJECT_NULL; 2028} 2029 2030 2031unsigned int vm_max_batch = 256; 2032 2033#define V_O_R_MAX_BATCH 128 2034 2035#define BATCH_LIMIT(max) (vm_max_batch >= max ? max : vm_max_batch) 2036 2037 2038#define VM_OBJ_REAP_FREELIST(_local_free_q, do_disconnect) \ 2039 MACRO_BEGIN \ 2040 if (_local_free_q) { \ 2041 if (do_disconnect) { \ 2042 vm_page_t m; \ 2043 for (m = _local_free_q; \ 2044 m != VM_PAGE_NULL; \ 2045 m = (vm_page_t) m->pageq.next) { \ 2046 if (m->pmapped) { \ 2047 pmap_disconnect(m->phys_page); \ 2048 } \ 2049 } \ 2050 } \ 2051 vm_page_free_list(_local_free_q, TRUE); \ 2052 _local_free_q = VM_PAGE_NULL; \ 2053 } \ 2054 MACRO_END 2055 2056 2057void 2058vm_object_reap_pages( 2059 vm_object_t object, 2060 int reap_type) 2061{ 2062 vm_page_t p; 2063 vm_page_t next; 2064 vm_page_t local_free_q = VM_PAGE_NULL; 2065 int loop_count; 2066 boolean_t disconnect_on_release; 2067 pmap_flush_context pmap_flush_context_storage; 2068 2069 if (reap_type == REAP_DATA_FLUSH) { 2070 /* 2071 * We need to disconnect pages from all pmaps before 2072 * releasing them to the free list 2073 */ 2074 disconnect_on_release = TRUE; 2075 } else { 2076 /* 2077 * Either the caller has already disconnected the pages 2078 * from all pmaps, or we disconnect them here as we add 2079 * them to out local list of pages to be released. 2080 * No need to re-disconnect them when we release the pages 2081 * to the free list. 2082 */ 2083 disconnect_on_release = FALSE; 2084 } 2085 2086restart_after_sleep: 2087 if (queue_empty(&object->memq)) 2088 return; 2089 loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH); 2090 2091 if (reap_type == REAP_PURGEABLE) 2092 pmap_flush_context_init(&pmap_flush_context_storage); 2093 2094 vm_page_lockspin_queues(); 2095 2096 next = (vm_page_t)queue_first(&object->memq); 2097 2098 while (!queue_end(&object->memq, (queue_entry_t)next)) { 2099 2100 p = next; 2101 next = (vm_page_t)queue_next(&next->listq); 2102 2103 if (--loop_count == 0) { 2104 2105 vm_page_unlock_queues(); 2106 2107 if (local_free_q) { 2108 2109 if (reap_type == REAP_PURGEABLE) { 2110 pmap_flush(&pmap_flush_context_storage); 2111 pmap_flush_context_init(&pmap_flush_context_storage); 2112 } 2113 /* 2114 * Free the pages we reclaimed so far 2115 * and take a little break to avoid 2116 * hogging the page queue lock too long 2117 */ 2118 VM_OBJ_REAP_FREELIST(local_free_q, 2119 disconnect_on_release); 2120 } else 2121 mutex_pause(0); 2122 2123 loop_count = BATCH_LIMIT(V_O_R_MAX_BATCH); 2124 2125 vm_page_lockspin_queues(); 2126 } 2127 if (reap_type == REAP_DATA_FLUSH || reap_type == REAP_TERMINATE) { 2128 2129 if (p->busy || p->cleaning) { 2130 2131 vm_page_unlock_queues(); 2132 /* 2133 * free the pages reclaimed so far 2134 */ 2135 VM_OBJ_REAP_FREELIST(local_free_q, 2136 disconnect_on_release); 2137 2138 PAGE_SLEEP(object, p, THREAD_UNINT); 2139 2140 goto restart_after_sleep; 2141 } 2142 if (p->laundry) { 2143 p->pageout = FALSE; 2144 2145 vm_pageout_steal_laundry(p, TRUE); 2146 } 2147 } 2148 switch (reap_type) { 2149 2150 case REAP_DATA_FLUSH: 2151 if (VM_PAGE_WIRED(p)) { 2152 /* 2153 * this is an odd case... perhaps we should 2154 * zero-fill this page since we're conceptually 2155 * tossing its data at this point, but leaving 2156 * it on the object to honor the 'wire' contract 2157 */ 2158 continue; 2159 } 2160 break; 2161 2162 case REAP_PURGEABLE: 2163 if (VM_PAGE_WIRED(p)) { 2164 /* 2165 * can't purge a wired page 2166 */ 2167 vm_page_purged_wired++; 2168 continue; 2169 } 2170 if (p->laundry && !p->busy && !p->cleaning) { 2171 p->pageout = FALSE; 2172 2173 vm_pageout_steal_laundry(p, TRUE); 2174 } 2175 if (p->cleaning || p->laundry || p->absent) { 2176 /* 2177 * page is being acted upon, 2178 * so don't mess with it 2179 */ 2180 vm_page_purged_others++; 2181 continue; 2182 } 2183 if (p->busy) { 2184 /* 2185 * We can't reclaim a busy page but we can 2186 * make it more likely to be paged (it's not wired) to make 2187 * sure that it gets considered by 2188 * vm_pageout_scan() later. 2189 */ 2190 vm_page_deactivate(p); 2191 vm_page_purged_busy++; 2192 continue; 2193 } 2194 2195 assert(p->object != kernel_object); 2196 2197 /* 2198 * we can discard this page... 2199 */ 2200 if (p->pmapped == TRUE) { 2201 /* 2202 * unmap the page 2203 */ 2204 pmap_disconnect_options(p->phys_page, PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_NOREFMOD, (void *)&pmap_flush_context_storage); 2205 } 2206 vm_page_purged_count++; 2207 2208 break; 2209 2210 case REAP_TERMINATE: 2211 if (p->absent || p->private) { 2212 /* 2213 * For private pages, VM_PAGE_FREE just 2214 * leaves the page structure around for 2215 * its owner to clean up. For absent 2216 * pages, the structure is returned to 2217 * the appropriate pool. 2218 */ 2219 break; 2220 } 2221 if (p->fictitious) { 2222 assert (p->phys_page == vm_page_guard_addr); 2223 break; 2224 } 2225 if (!p->dirty && p->wpmapped) 2226 p->dirty = pmap_is_modified(p->phys_page); 2227 2228 if ((p->dirty || p->precious) && !p->error && object->alive) { 2229 2230 if (!p->laundry) { 2231 VM_PAGE_QUEUES_REMOVE(p); 2232 /* 2233 * flush page... page will be freed 2234 * upon completion of I/O 2235 */ 2236 vm_pageout_cluster(p, TRUE); 2237 } 2238 vm_page_unlock_queues(); 2239 /* 2240 * free the pages reclaimed so far 2241 */ 2242 VM_OBJ_REAP_FREELIST(local_free_q, 2243 disconnect_on_release); 2244 2245 vm_object_paging_wait(object, THREAD_UNINT); 2246 2247 goto restart_after_sleep; 2248 } 2249 break; 2250 2251 case REAP_REAP: 2252 break; 2253 } 2254 vm_page_free_prepare_queues(p); 2255 assert(p->pageq.next == NULL && p->pageq.prev == NULL); 2256 /* 2257 * Add this page to our list of reclaimed pages, 2258 * to be freed later. 2259 */ 2260 p->pageq.next = (queue_entry_t) local_free_q; 2261 local_free_q = p; 2262 } 2263 vm_page_unlock_queues(); 2264 2265 /* 2266 * Free the remaining reclaimed pages 2267 */ 2268 if (reap_type == REAP_PURGEABLE) 2269 pmap_flush(&pmap_flush_context_storage); 2270 2271 VM_OBJ_REAP_FREELIST(local_free_q, 2272 disconnect_on_release); 2273} 2274 2275 2276void 2277vm_object_reap_async( 2278 vm_object_t object) 2279{ 2280 vm_object_lock_assert_exclusive(object); 2281 2282 vm_object_reaper_lock_spin(); 2283 2284 vm_object_reap_count_async++; 2285 2286 /* enqueue the VM object... */ 2287 queue_enter(&vm_object_reaper_queue, object, 2288 vm_object_t, cached_list); 2289 2290 vm_object_reaper_unlock(); 2291 2292 /* ... and wake up the reaper thread */ 2293 thread_wakeup((event_t) &vm_object_reaper_queue); 2294} 2295 2296 2297void 2298vm_object_reaper_thread(void) 2299{ 2300 vm_object_t object, shadow_object; 2301 2302 vm_object_reaper_lock_spin(); 2303 2304 while (!queue_empty(&vm_object_reaper_queue)) { 2305 queue_remove_first(&vm_object_reaper_queue, 2306 object, 2307 vm_object_t, 2308 cached_list); 2309 2310 vm_object_reaper_unlock(); 2311 vm_object_lock(object); 2312 2313 assert(object->terminating); 2314 assert(!object->alive); 2315 2316 /* 2317 * The pageout daemon might be playing with our pages. 2318 * Now that the object is dead, it won't touch any more 2319 * pages, but some pages might already be on their way out. 2320 * Hence, we wait until the active paging activities have 2321 * ceased before we break the association with the pager 2322 * itself. 2323 */ 2324 while (object->paging_in_progress != 0 || 2325 object->activity_in_progress != 0) { 2326 vm_object_wait(object, 2327 VM_OBJECT_EVENT_PAGING_IN_PROGRESS, 2328 THREAD_UNINT); 2329 vm_object_lock(object); 2330 } 2331 2332 shadow_object = 2333 object->pageout ? VM_OBJECT_NULL : object->shadow; 2334 2335 vm_object_reap(object); 2336 /* cache is unlocked and object is no longer valid */ 2337 object = VM_OBJECT_NULL; 2338 2339 if (shadow_object != VM_OBJECT_NULL) { 2340 /* 2341 * Drop the reference "object" was holding on 2342 * its shadow object. 2343 */ 2344 vm_object_deallocate(shadow_object); 2345 shadow_object = VM_OBJECT_NULL; 2346 } 2347 vm_object_reaper_lock_spin(); 2348 } 2349 2350 /* wait for more work... */ 2351 assert_wait((event_t) &vm_object_reaper_queue, THREAD_UNINT); 2352 2353 vm_object_reaper_unlock(); 2354 2355 thread_block((thread_continue_t) vm_object_reaper_thread); 2356 /*NOTREACHED*/ 2357} 2358 2359/* 2360 * Routine: vm_object_pager_wakeup 2361 * Purpose: Wake up anyone waiting for termination of a pager. 2362 */ 2363 2364static void 2365vm_object_pager_wakeup( 2366 memory_object_t pager) 2367{ 2368 vm_object_hash_entry_t entry; 2369 boolean_t waiting = FALSE; 2370 lck_mtx_t *lck; 2371 2372 /* 2373 * If anyone was waiting for the memory_object_terminate 2374 * to be queued, wake them up now. 2375 */ 2376 lck = vm_object_hash_lock_spin(pager); 2377 entry = vm_object_hash_lookup(pager, TRUE); 2378 if (entry != VM_OBJECT_HASH_ENTRY_NULL) 2379 waiting = entry->waiting; 2380 vm_object_hash_unlock(lck); 2381 2382 if (entry != VM_OBJECT_HASH_ENTRY_NULL) { 2383 if (waiting) 2384 thread_wakeup((event_t) pager); 2385 vm_object_hash_entry_free(entry); 2386 } 2387} 2388 2389/* 2390 * Routine: vm_object_release_pager 2391 * Purpose: Terminate the pager and, upon completion, 2392 * release our last reference to it. 2393 * just like memory_object_terminate, except 2394 * that we wake up anyone blocked in vm_object_enter 2395 * waiting for termination message to be queued 2396 * before calling memory_object_init. 2397 */ 2398static void 2399vm_object_release_pager( 2400 memory_object_t pager, 2401 boolean_t hashed) 2402{ 2403 2404 /* 2405 * Terminate the pager. 2406 */ 2407 2408 (void) memory_object_terminate(pager); 2409 2410 if (hashed == TRUE) { 2411 /* 2412 * Wakeup anyone waiting for this terminate 2413 * and remove the entry from the hash 2414 */ 2415 vm_object_pager_wakeup(pager); 2416 } 2417 /* 2418 * Release reference to pager. 2419 */ 2420 memory_object_deallocate(pager); 2421} 2422 2423/* 2424 * Routine: vm_object_destroy 2425 * Purpose: 2426 * Shut down a VM object, despite the 2427 * presence of address map (or other) references 2428 * to the vm_object. 2429 */ 2430kern_return_t 2431vm_object_destroy( 2432 vm_object_t object, 2433 __unused kern_return_t reason) 2434{ 2435 memory_object_t old_pager; 2436 2437 if (object == VM_OBJECT_NULL) 2438 return(KERN_SUCCESS); 2439 2440 /* 2441 * Remove the pager association immediately. 2442 * 2443 * This will prevent the memory manager from further 2444 * meddling. [If it wanted to flush data or make 2445 * other changes, it should have done so before performing 2446 * the destroy call.] 2447 */ 2448 2449 vm_object_lock(object); 2450 object->can_persist = FALSE; 2451 object->named = FALSE; 2452 object->alive = FALSE; 2453 2454 if (object->hashed) { 2455 lck_mtx_t *lck; 2456 /* 2457 * Rip out the pager from the vm_object now... 2458 */ 2459 lck = vm_object_hash_lock_spin(object->pager); 2460 vm_object_remove(object); 2461 vm_object_hash_unlock(lck); 2462 } 2463 old_pager = object->pager; 2464 object->pager = MEMORY_OBJECT_NULL; 2465 if (old_pager != MEMORY_OBJECT_NULL) 2466 memory_object_control_disable(object->pager_control); 2467 2468 /* 2469 * Wait for the existing paging activity (that got 2470 * through before we nulled out the pager) to subside. 2471 */ 2472 2473 vm_object_paging_wait(object, THREAD_UNINT); 2474 vm_object_unlock(object); 2475 2476 /* 2477 * Terminate the object now. 2478 */ 2479 if (old_pager != MEMORY_OBJECT_NULL) { 2480 vm_object_release_pager(old_pager, object->hashed); 2481 2482 /* 2483 * JMM - Release the caller's reference. This assumes the 2484 * caller had a reference to release, which is a big (but 2485 * currently valid) assumption if this is driven from the 2486 * vnode pager (it is holding a named reference when making 2487 * this call).. 2488 */ 2489 vm_object_deallocate(object); 2490 2491 } 2492 return(KERN_SUCCESS); 2493} 2494 2495 2496#if VM_OBJECT_CACHE 2497 2498#define VM_OBJ_DEACT_ALL_STATS DEBUG 2499#if VM_OBJ_DEACT_ALL_STATS 2500uint32_t vm_object_deactivate_all_pages_batches = 0; 2501uint32_t vm_object_deactivate_all_pages_pages = 0; 2502#endif /* VM_OBJ_DEACT_ALL_STATS */ 2503/* 2504 * vm_object_deactivate_all_pages 2505 * 2506 * Deactivate all pages in the specified object. (Keep its pages 2507 * in memory even though it is no longer referenced.) 2508 * 2509 * The object must be locked. 2510 */ 2511static void 2512vm_object_deactivate_all_pages( 2513 register vm_object_t object) 2514{ 2515 register vm_page_t p; 2516 int loop_count; 2517#if VM_OBJ_DEACT_ALL_STATS 2518 int pages_count; 2519#endif /* VM_OBJ_DEACT_ALL_STATS */ 2520#define V_O_D_A_P_MAX_BATCH 256 2521 2522 loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH); 2523#if VM_OBJ_DEACT_ALL_STATS 2524 pages_count = 0; 2525#endif /* VM_OBJ_DEACT_ALL_STATS */ 2526 vm_page_lock_queues(); 2527 queue_iterate(&object->memq, p, vm_page_t, listq) { 2528 if (--loop_count == 0) { 2529#if VM_OBJ_DEACT_ALL_STATS 2530 hw_atomic_add(&vm_object_deactivate_all_pages_batches, 2531 1); 2532 hw_atomic_add(&vm_object_deactivate_all_pages_pages, 2533 pages_count); 2534 pages_count = 0; 2535#endif /* VM_OBJ_DEACT_ALL_STATS */ 2536 lck_mtx_yield(&vm_page_queue_lock); 2537 loop_count = BATCH_LIMIT(V_O_D_A_P_MAX_BATCH); 2538 } 2539 if (!p->busy && !p->throttled) { 2540#if VM_OBJ_DEACT_ALL_STATS 2541 pages_count++; 2542#endif /* VM_OBJ_DEACT_ALL_STATS */ 2543 vm_page_deactivate(p); 2544 } 2545 } 2546#if VM_OBJ_DEACT_ALL_STATS 2547 if (pages_count) { 2548 hw_atomic_add(&vm_object_deactivate_all_pages_batches, 1); 2549 hw_atomic_add(&vm_object_deactivate_all_pages_pages, 2550 pages_count); 2551 pages_count = 0; 2552 } 2553#endif /* VM_OBJ_DEACT_ALL_STATS */ 2554 vm_page_unlock_queues(); 2555} 2556#endif /* VM_OBJECT_CACHE */ 2557 2558 2559 2560/* 2561 * The "chunk" macros are used by routines below when looking for pages to deactivate. These 2562 * exist because of the need to handle shadow chains. When deactivating pages, we only 2563 * want to deactive the ones at the top most level in the object chain. In order to do 2564 * this efficiently, the specified address range is divided up into "chunks" and we use 2565 * a bit map to keep track of which pages have already been processed as we descend down 2566 * the shadow chain. These chunk macros hide the details of the bit map implementation 2567 * as much as we can. 2568 * 2569 * For convenience, we use a 64-bit data type as the bit map, and therefore a chunk is 2570 * set to 64 pages. The bit map is indexed from the low-order end, so that the lowest 2571 * order bit represents page 0 in the current range and highest order bit represents 2572 * page 63. 2573 * 2574 * For further convenience, we also use negative logic for the page state in the bit map. 2575 * The bit is set to 1 to indicate it has not yet been seen, and to 0 to indicate it has 2576 * been processed. This way we can simply test the 64-bit long word to see if it's zero 2577 * to easily tell if the whole range has been processed. Therefore, the bit map starts 2578 * out with all the bits set. The macros below hide all these details from the caller. 2579 */ 2580 2581#define PAGES_IN_A_CHUNK 64 /* The number of pages in the chunk must */ 2582 /* be the same as the number of bits in */ 2583 /* the chunk_state_t type. We use 64 */ 2584 /* just for convenience. */ 2585 2586#define CHUNK_SIZE (PAGES_IN_A_CHUNK * PAGE_SIZE_64) /* Size of a chunk in bytes */ 2587 2588typedef uint64_t chunk_state_t; 2589 2590/* 2591 * The bit map uses negative logic, so we start out with all 64 bits set to indicate 2592 * that no pages have been processed yet. Also, if len is less than the full CHUNK_SIZE, 2593 * then we mark pages beyond the len as having been "processed" so that we don't waste time 2594 * looking at pages in that range. This can save us from unnecessarily chasing down the 2595 * shadow chain. 2596 */ 2597 2598#define CHUNK_INIT(c, len) \ 2599 MACRO_BEGIN \ 2600 uint64_t p; \ 2601 \ 2602 (c) = 0xffffffffffffffffLL; \ 2603 \ 2604 for (p = (len) / PAGE_SIZE_64; p < PAGES_IN_A_CHUNK; p++) \ 2605 MARK_PAGE_HANDLED(c, p); \ 2606 MACRO_END 2607 2608 2609/* 2610 * Return true if all pages in the chunk have not yet been processed. 2611 */ 2612 2613#define CHUNK_NOT_COMPLETE(c) ((c) != 0) 2614 2615/* 2616 * Return true if the page at offset 'p' in the bit map has already been handled 2617 * while processing a higher level object in the shadow chain. 2618 */ 2619 2620#define PAGE_ALREADY_HANDLED(c, p) (((c) & (1LL << (p))) == 0) 2621 2622/* 2623 * Mark the page at offset 'p' in the bit map as having been processed. 2624 */ 2625 2626#define MARK_PAGE_HANDLED(c, p) \ 2627MACRO_BEGIN \ 2628 (c) = (c) & ~(1LL << (p)); \ 2629MACRO_END 2630 2631 2632/* 2633 * Return true if the page at the given offset has been paged out. Object is 2634 * locked upon entry and returned locked. 2635 */ 2636 2637static boolean_t 2638page_is_paged_out( 2639 vm_object_t object, 2640 vm_object_offset_t offset) 2641{ 2642 kern_return_t kr; 2643 memory_object_t pager; 2644 2645 /* 2646 * Check the existence map for the page if we have one, otherwise 2647 * ask the pager about this page. 2648 */ 2649 2650#if MACH_PAGEMAP 2651 if (object->existence_map) { 2652 if (vm_external_state_get(object->existence_map, offset) 2653 == VM_EXTERNAL_STATE_EXISTS) { 2654 /* 2655 * We found the page 2656 */ 2657 2658 return TRUE; 2659 } 2660 } else 2661#endif /* MACH_PAGEMAP */ 2662 if (object->internal && 2663 object->alive && 2664 !object->terminating && 2665 object->pager_ready) { 2666 2667 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 2668 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) 2669 == VM_EXTERNAL_STATE_EXISTS) { 2670 return TRUE; 2671 } else { 2672 return FALSE; 2673 } 2674 } 2675 2676 /* 2677 * We're already holding a "paging in progress" reference 2678 * so the object can't disappear when we release the lock. 2679 */ 2680 2681 assert(object->paging_in_progress); 2682 pager = object->pager; 2683 vm_object_unlock(object); 2684 2685 kr = memory_object_data_request( 2686 pager, 2687 offset + object->paging_offset, 2688 0, /* just poke the pager */ 2689 VM_PROT_READ, 2690 NULL); 2691 2692 vm_object_lock(object); 2693 2694 if (kr == KERN_SUCCESS) { 2695 2696 /* 2697 * We found the page 2698 */ 2699 2700 return TRUE; 2701 } 2702 } 2703 2704 return FALSE; 2705} 2706 2707 2708 2709/* 2710 * madvise_free_debug 2711 * 2712 * To help debug madvise(MADV_FREE*) mis-usage, this triggers a 2713 * zero-fill as soon as a page is affected by a madvise(MADV_FREE*), to 2714 * simulate the loss of the page's contents as if the page had been 2715 * reclaimed and then re-faulted. 2716 */ 2717#if DEVELOPMENT || DEBUG 2718int madvise_free_debug = 1; 2719#else /* DEBUG */ 2720int madvise_free_debug = 0; 2721#endif /* DEBUG */ 2722 2723/* 2724 * Deactivate the pages in the specified object and range. If kill_page is set, also discard any 2725 * page modified state from the pmap. Update the chunk_state as we go along. The caller must specify 2726 * a size that is less than or equal to the CHUNK_SIZE. 2727 */ 2728 2729static void 2730deactivate_pages_in_object( 2731 vm_object_t object, 2732 vm_object_offset_t offset, 2733 vm_object_size_t size, 2734 boolean_t kill_page, 2735 boolean_t reusable_page, 2736 boolean_t all_reusable, 2737 chunk_state_t *chunk_state, 2738 pmap_flush_context *pfc) 2739{ 2740 vm_page_t m; 2741 int p; 2742 struct vm_page_delayed_work dw_array[DEFAULT_DELAYED_WORK_LIMIT]; 2743 struct vm_page_delayed_work *dwp; 2744 int dw_count; 2745 int dw_limit; 2746 unsigned int reusable = 0; 2747 2748 /* 2749 * Examine each page in the chunk. The variable 'p' is the page number relative to the start of the 2750 * chunk. Since this routine is called once for each level in the shadow chain, the chunk_state may 2751 * have pages marked as having been processed already. We stop the loop early if we find we've handled 2752 * all the pages in the chunk. 2753 */ 2754 2755 dwp = &dw_array[0]; 2756 dw_count = 0; 2757 dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT); 2758 2759 for(p = 0; size && CHUNK_NOT_COMPLETE(*chunk_state); p++, size -= PAGE_SIZE_64, offset += PAGE_SIZE_64) { 2760 2761 /* 2762 * If this offset has already been found and handled in a higher level object, then don't 2763 * do anything with it in the current shadow object. 2764 */ 2765 2766 if (PAGE_ALREADY_HANDLED(*chunk_state, p)) 2767 continue; 2768 2769 /* 2770 * See if the page at this offset is around. First check to see if the page is resident, 2771 * then if not, check the existence map or with the pager. 2772 */ 2773 2774 if ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { 2775 2776 /* 2777 * We found a page we were looking for. Mark it as "handled" now in the chunk_state 2778 * so that we won't bother looking for a page at this offset again if there are more 2779 * shadow objects. Then deactivate the page. 2780 */ 2781 2782 MARK_PAGE_HANDLED(*chunk_state, p); 2783 2784 if (( !VM_PAGE_WIRED(m)) && (!m->private) && (!m->gobbled) && (!m->busy) && (!m->laundry)) { 2785 int clear_refmod; 2786 int pmap_options; 2787 2788 dwp->dw_mask = 0; 2789 2790 pmap_options = 0; 2791 clear_refmod = VM_MEM_REFERENCED; 2792 dwp->dw_mask |= DW_clear_reference; 2793 2794 if ((kill_page) && (object->internal)) { 2795 if (madvise_free_debug) { 2796 /* 2797 * zero-fill the page now 2798 * to simulate it being 2799 * reclaimed and re-faulted. 2800 */ 2801 pmap_zero_page(m->phys_page); 2802 } 2803 m->precious = FALSE; 2804 m->dirty = FALSE; 2805 2806 clear_refmod |= VM_MEM_MODIFIED; 2807 if (m->throttled) { 2808 /* 2809 * This page is now clean and 2810 * reclaimable. Move it out 2811 * of the throttled queue, so 2812 * that vm_pageout_scan() can 2813 * find it. 2814 */ 2815 dwp->dw_mask |= DW_move_page; 2816 } 2817#if MACH_PAGEMAP 2818 vm_external_state_clr(object->existence_map, offset); 2819#endif /* MACH_PAGEMAP */ 2820 VM_COMPRESSOR_PAGER_STATE_CLR(object, 2821 offset); 2822 2823 if (reusable_page && !m->reusable) { 2824 assert(!all_reusable); 2825 assert(!object->all_reusable); 2826 m->reusable = TRUE; 2827 object->reusable_page_count++; 2828 assert(object->resident_page_count >= object->reusable_page_count); 2829 reusable++; 2830 /* 2831 * Tell pmap this page is now 2832 * "reusable" (to update pmap 2833 * stats for all mappings). 2834 */ 2835 pmap_options |= PMAP_OPTIONS_SET_REUSABLE; 2836 } 2837 } 2838 pmap_options |= PMAP_OPTIONS_NOFLUSH; 2839 pmap_clear_refmod_options(m->phys_page, 2840 clear_refmod, 2841 pmap_options, 2842 (void *)pfc); 2843 2844 if (!m->throttled && !(reusable_page || all_reusable)) 2845 dwp->dw_mask |= DW_move_page; 2846 2847 if (dwp->dw_mask) 2848 VM_PAGE_ADD_DELAYED_WORK(dwp, m, 2849 dw_count); 2850 2851 if (dw_count >= dw_limit) { 2852 if (reusable) { 2853 OSAddAtomic(reusable, 2854 &vm_page_stats_reusable.reusable_count); 2855 vm_page_stats_reusable.reusable += reusable; 2856 reusable = 0; 2857 } 2858 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 2859 2860 dwp = &dw_array[0]; 2861 dw_count = 0; 2862 } 2863 } 2864 2865 } else { 2866 2867 /* 2868 * The page at this offset isn't memory resident, check to see if it's 2869 * been paged out. If so, mark it as handled so we don't bother looking 2870 * for it in the shadow chain. 2871 */ 2872 2873 if (page_is_paged_out(object, offset)) { 2874 MARK_PAGE_HANDLED(*chunk_state, p); 2875 2876 /* 2877 * If we're killing a non-resident page, then clear the page in the existence 2878 * map so we don't bother paging it back in if it's touched again in the future. 2879 */ 2880 2881 if ((kill_page) && (object->internal)) { 2882#if MACH_PAGEMAP 2883 vm_external_state_clr(object->existence_map, offset); 2884#endif /* MACH_PAGEMAP */ 2885 VM_COMPRESSOR_PAGER_STATE_CLR(object, 2886 offset); 2887 } 2888 } 2889 } 2890 } 2891 2892 if (reusable) { 2893 OSAddAtomic(reusable, &vm_page_stats_reusable.reusable_count); 2894 vm_page_stats_reusable.reusable += reusable; 2895 reusable = 0; 2896 } 2897 2898 if (dw_count) 2899 vm_page_do_delayed_work(object, &dw_array[0], dw_count); 2900} 2901 2902 2903/* 2904 * Deactive a "chunk" of the given range of the object starting at offset. A "chunk" 2905 * will always be less than or equal to the given size. The total range is divided up 2906 * into chunks for efficiency and performance related to the locks and handling the shadow 2907 * chain. This routine returns how much of the given "size" it actually processed. It's 2908 * up to the caler to loop and keep calling this routine until the entire range they want 2909 * to process has been done. 2910 */ 2911 2912static vm_object_size_t 2913deactivate_a_chunk( 2914 vm_object_t orig_object, 2915 vm_object_offset_t offset, 2916 vm_object_size_t size, 2917 boolean_t kill_page, 2918 boolean_t reusable_page, 2919 boolean_t all_reusable, 2920 pmap_flush_context *pfc) 2921{ 2922 vm_object_t object; 2923 vm_object_t tmp_object; 2924 vm_object_size_t length; 2925 chunk_state_t chunk_state; 2926 2927 2928 /* 2929 * Get set to do a chunk. We'll do up to CHUNK_SIZE, but no more than the 2930 * remaining size the caller asked for. 2931 */ 2932 2933 length = MIN(size, CHUNK_SIZE); 2934 2935 /* 2936 * The chunk_state keeps track of which pages we've already processed if there's 2937 * a shadow chain on this object. At this point, we haven't done anything with this 2938 * range of pages yet, so initialize the state to indicate no pages processed yet. 2939 */ 2940 2941 CHUNK_INIT(chunk_state, length); 2942 object = orig_object; 2943 2944 /* 2945 * Start at the top level object and iterate around the loop once for each object 2946 * in the shadow chain. We stop processing early if we've already found all the pages 2947 * in the range. Otherwise we stop when we run out of shadow objects. 2948 */ 2949 2950 while (object && CHUNK_NOT_COMPLETE(chunk_state)) { 2951 vm_object_paging_begin(object); 2952 2953 deactivate_pages_in_object(object, offset, length, kill_page, reusable_page, all_reusable, &chunk_state, pfc); 2954 2955 vm_object_paging_end(object); 2956 2957 /* 2958 * We've finished with this object, see if there's a shadow object. If 2959 * there is, update the offset and lock the new object. We also turn off 2960 * kill_page at this point since we only kill pages in the top most object. 2961 */ 2962 2963 tmp_object = object->shadow; 2964 2965 if (tmp_object) { 2966 kill_page = FALSE; 2967 reusable_page = FALSE; 2968 all_reusable = FALSE; 2969 offset += object->vo_shadow_offset; 2970 vm_object_lock(tmp_object); 2971 } 2972 2973 if (object != orig_object) 2974 vm_object_unlock(object); 2975 2976 object = tmp_object; 2977 } 2978 2979 if (object && object != orig_object) 2980 vm_object_unlock(object); 2981 2982 return length; 2983} 2984 2985 2986 2987/* 2988 * Move any resident pages in the specified range to the inactive queue. If kill_page is set, 2989 * we also clear the modified status of the page and "forget" any changes that have been made 2990 * to the page. 2991 */ 2992 2993__private_extern__ void 2994vm_object_deactivate_pages( 2995 vm_object_t object, 2996 vm_object_offset_t offset, 2997 vm_object_size_t size, 2998 boolean_t kill_page, 2999 boolean_t reusable_page) 3000{ 3001 vm_object_size_t length; 3002 boolean_t all_reusable; 3003 pmap_flush_context pmap_flush_context_storage; 3004 3005 /* 3006 * We break the range up into chunks and do one chunk at a time. This is for 3007 * efficiency and performance while handling the shadow chains and the locks. 3008 * The deactivate_a_chunk() function returns how much of the range it processed. 3009 * We keep calling this routine until the given size is exhausted. 3010 */ 3011 3012 3013 all_reusable = FALSE; 3014#if 11 3015 /* 3016 * For the sake of accurate "reusable" pmap stats, we need 3017 * to tell pmap about each page that is no longer "reusable", 3018 * so we can't do the "all_reusable" optimization. 3019 */ 3020#else 3021 if (reusable_page && 3022 object->internal && 3023 object->vo_size != 0 && 3024 object->vo_size == size && 3025 object->reusable_page_count == 0) { 3026 all_reusable = TRUE; 3027 reusable_page = FALSE; 3028 } 3029#endif 3030 3031 if ((reusable_page || all_reusable) && object->all_reusable) { 3032 /* This means MADV_FREE_REUSABLE has been called twice, which 3033 * is probably illegal. */ 3034 return; 3035 } 3036 3037 pmap_flush_context_init(&pmap_flush_context_storage); 3038 3039 while (size) { 3040 length = deactivate_a_chunk(object, offset, size, kill_page, reusable_page, all_reusable, &pmap_flush_context_storage); 3041 3042 size -= length; 3043 offset += length; 3044 } 3045 pmap_flush(&pmap_flush_context_storage); 3046 3047 if (all_reusable) { 3048 if (!object->all_reusable) { 3049 unsigned int reusable; 3050 3051 object->all_reusable = TRUE; 3052 assert(object->reusable_page_count == 0); 3053 /* update global stats */ 3054 reusable = object->resident_page_count; 3055 OSAddAtomic(reusable, 3056 &vm_page_stats_reusable.reusable_count); 3057 vm_page_stats_reusable.reusable += reusable; 3058 vm_page_stats_reusable.all_reusable_calls++; 3059 } 3060 } else if (reusable_page) { 3061 vm_page_stats_reusable.partial_reusable_calls++; 3062 } 3063} 3064 3065void 3066vm_object_reuse_pages( 3067 vm_object_t object, 3068 vm_object_offset_t start_offset, 3069 vm_object_offset_t end_offset, 3070 boolean_t allow_partial_reuse) 3071{ 3072 vm_object_offset_t cur_offset; 3073 vm_page_t m; 3074 unsigned int reused, reusable; 3075 3076#define VM_OBJECT_REUSE_PAGE(object, m, reused) \ 3077 MACRO_BEGIN \ 3078 if ((m) != VM_PAGE_NULL && \ 3079 (m)->reusable) { \ 3080 assert((object)->reusable_page_count <= \ 3081 (object)->resident_page_count); \ 3082 assert((object)->reusable_page_count > 0); \ 3083 (object)->reusable_page_count--; \ 3084 (m)->reusable = FALSE; \ 3085 (reused)++; \ 3086 /* \ 3087 * Tell pmap that this page is no longer \ 3088 * "reusable", to update the "reusable" stats \ 3089 * for all the pmaps that have mapped this \ 3090 * page. \ 3091 */ \ 3092 pmap_clear_refmod_options((m)->phys_page, \ 3093 0, /* refmod */ \ 3094 (PMAP_OPTIONS_CLEAR_REUSABLE \ 3095 | PMAP_OPTIONS_NOFLUSH), \ 3096 NULL); \ 3097 } \ 3098 MACRO_END 3099 3100 reused = 0; 3101 reusable = 0; 3102 3103 vm_object_lock_assert_exclusive(object); 3104 3105 if (object->all_reusable) { 3106 panic("object %p all_reusable: can't update pmap stats\n", 3107 object); 3108 assert(object->reusable_page_count == 0); 3109 object->all_reusable = FALSE; 3110 if (end_offset - start_offset == object->vo_size || 3111 !allow_partial_reuse) { 3112 vm_page_stats_reusable.all_reuse_calls++; 3113 reused = object->resident_page_count; 3114 } else { 3115 vm_page_stats_reusable.partial_reuse_calls++; 3116 queue_iterate(&object->memq, m, vm_page_t, listq) { 3117 if (m->offset < start_offset || 3118 m->offset >= end_offset) { 3119 m->reusable = TRUE; 3120 object->reusable_page_count++; 3121 assert(object->resident_page_count >= object->reusable_page_count); 3122 continue; 3123 } else { 3124 assert(!m->reusable); 3125 reused++; 3126 } 3127 } 3128 } 3129 } else if (object->resident_page_count > 3130 ((end_offset - start_offset) >> PAGE_SHIFT)) { 3131 vm_page_stats_reusable.partial_reuse_calls++; 3132 for (cur_offset = start_offset; 3133 cur_offset < end_offset; 3134 cur_offset += PAGE_SIZE_64) { 3135 if (object->reusable_page_count == 0) { 3136 break; 3137 } 3138 m = vm_page_lookup(object, cur_offset); 3139 VM_OBJECT_REUSE_PAGE(object, m, reused); 3140 } 3141 } else { 3142 vm_page_stats_reusable.partial_reuse_calls++; 3143 queue_iterate(&object->memq, m, vm_page_t, listq) { 3144 if (object->reusable_page_count == 0) { 3145 break; 3146 } 3147 if (m->offset < start_offset || 3148 m->offset >= end_offset) { 3149 continue; 3150 } 3151 VM_OBJECT_REUSE_PAGE(object, m, reused); 3152 } 3153 } 3154 3155 /* update global stats */ 3156 OSAddAtomic(reusable-reused, &vm_page_stats_reusable.reusable_count); 3157 vm_page_stats_reusable.reused += reused; 3158 vm_page_stats_reusable.reusable += reusable; 3159} 3160 3161/* 3162 * Routine: vm_object_pmap_protect 3163 * 3164 * Purpose: 3165 * Reduces the permission for all physical 3166 * pages in the specified object range. 3167 * 3168 * If removing write permission only, it is 3169 * sufficient to protect only the pages in 3170 * the top-level object; only those pages may 3171 * have write permission. 3172 * 3173 * If removing all access, we must follow the 3174 * shadow chain from the top-level object to 3175 * remove access to all pages in shadowed objects. 3176 * 3177 * The object must *not* be locked. The object must 3178 * be temporary/internal. 3179 * 3180 * If pmap is not NULL, this routine assumes that 3181 * the only mappings for the pages are in that 3182 * pmap. 3183 */ 3184 3185__private_extern__ void 3186vm_object_pmap_protect( 3187 register vm_object_t object, 3188 register vm_object_offset_t offset, 3189 vm_object_size_t size, 3190 pmap_t pmap, 3191 vm_map_offset_t pmap_start, 3192 vm_prot_t prot) 3193{ 3194 vm_object_pmap_protect_options(object, offset, size, 3195 pmap, pmap_start, prot, 0); 3196} 3197 3198__private_extern__ void 3199vm_object_pmap_protect_options( 3200 register vm_object_t object, 3201 register vm_object_offset_t offset, 3202 vm_object_size_t size, 3203 pmap_t pmap, 3204 vm_map_offset_t pmap_start, 3205 vm_prot_t prot, 3206 int options) 3207{ 3208 pmap_flush_context pmap_flush_context_storage; 3209 boolean_t delayed_pmap_flush = FALSE; 3210 3211 if (object == VM_OBJECT_NULL) 3212 return; 3213 size = vm_object_round_page(size); 3214 offset = vm_object_trunc_page(offset); 3215 3216 vm_object_lock(object); 3217 3218 if (object->phys_contiguous) { 3219 if (pmap != NULL) { 3220 vm_object_unlock(object); 3221 pmap_protect_options(pmap, 3222 pmap_start, 3223 pmap_start + size, 3224 prot, 3225 options & ~PMAP_OPTIONS_NOFLUSH, 3226 NULL); 3227 } else { 3228 vm_object_offset_t phys_start, phys_end, phys_addr; 3229 3230 phys_start = object->vo_shadow_offset + offset; 3231 phys_end = phys_start + size; 3232 assert(phys_start <= phys_end); 3233 assert(phys_end <= object->vo_shadow_offset + object->vo_size); 3234 vm_object_unlock(object); 3235 3236 pmap_flush_context_init(&pmap_flush_context_storage); 3237 delayed_pmap_flush = FALSE; 3238 3239 for (phys_addr = phys_start; 3240 phys_addr < phys_end; 3241 phys_addr += PAGE_SIZE_64) { 3242 pmap_page_protect_options( 3243 (ppnum_t) (phys_addr >> PAGE_SHIFT), 3244 prot, 3245 options | PMAP_OPTIONS_NOFLUSH, 3246 (void *)&pmap_flush_context_storage); 3247 delayed_pmap_flush = TRUE; 3248 } 3249 if (delayed_pmap_flush == TRUE) 3250 pmap_flush(&pmap_flush_context_storage); 3251 } 3252 return; 3253 } 3254 3255 assert(object->internal); 3256 3257 while (TRUE) { 3258 if (ptoa_64(object->resident_page_count) > size/2 && pmap != PMAP_NULL) { 3259 vm_object_unlock(object); 3260 pmap_protect_options(pmap, pmap_start, pmap_start + size, prot, 3261 options & ~PMAP_OPTIONS_NOFLUSH, NULL); 3262 return; 3263 } 3264 3265 pmap_flush_context_init(&pmap_flush_context_storage); 3266 delayed_pmap_flush = FALSE; 3267 3268 /* 3269 * if we are doing large ranges with respect to resident 3270 * page count then we should interate over pages otherwise 3271 * inverse page look-up will be faster 3272 */ 3273 if (ptoa_64(object->resident_page_count / 4) < size) { 3274 vm_page_t p; 3275 vm_object_offset_t end; 3276 3277 end = offset + size; 3278 3279 queue_iterate(&object->memq, p, vm_page_t, listq) { 3280 if (!p->fictitious && (offset <= p->offset) && (p->offset < end)) { 3281 vm_map_offset_t start; 3282 3283 start = pmap_start + p->offset - offset; 3284 3285 if (pmap != PMAP_NULL) 3286 pmap_protect_options( 3287 pmap, 3288 start, 3289 start + PAGE_SIZE_64, 3290 prot, 3291 options | PMAP_OPTIONS_NOFLUSH, 3292 &pmap_flush_context_storage); 3293 else 3294 pmap_page_protect_options( 3295 p->phys_page, 3296 prot, 3297 options | PMAP_OPTIONS_NOFLUSH, 3298 &pmap_flush_context_storage); 3299 delayed_pmap_flush = TRUE; 3300 } 3301 } 3302 3303 } else { 3304 vm_page_t p; 3305 vm_object_offset_t end; 3306 vm_object_offset_t target_off; 3307 3308 end = offset + size; 3309 3310 for (target_off = offset; 3311 target_off < end; target_off += PAGE_SIZE) { 3312 3313 p = vm_page_lookup(object, target_off); 3314 3315 if (p != VM_PAGE_NULL) { 3316 vm_object_offset_t start; 3317 3318 start = pmap_start + (p->offset - offset); 3319 3320 if (pmap != PMAP_NULL) 3321 pmap_protect_options( 3322 pmap, 3323 start, 3324 start + PAGE_SIZE_64, 3325 prot, 3326 options | PMAP_OPTIONS_NOFLUSH, 3327 &pmap_flush_context_storage); 3328 else 3329 pmap_page_protect_options( 3330 p->phys_page, 3331 prot, 3332 options | PMAP_OPTIONS_NOFLUSH, 3333 &pmap_flush_context_storage); 3334 delayed_pmap_flush = TRUE; 3335 } 3336 } 3337 } 3338 if (delayed_pmap_flush == TRUE) 3339 pmap_flush(&pmap_flush_context_storage); 3340 3341 if (prot == VM_PROT_NONE) { 3342 /* 3343 * Must follow shadow chain to remove access 3344 * to pages in shadowed objects. 3345 */ 3346 register vm_object_t next_object; 3347 3348 next_object = object->shadow; 3349 if (next_object != VM_OBJECT_NULL) { 3350 offset += object->vo_shadow_offset; 3351 vm_object_lock(next_object); 3352 vm_object_unlock(object); 3353 object = next_object; 3354 } 3355 else { 3356 /* 3357 * End of chain - we are done. 3358 */ 3359 break; 3360 } 3361 } 3362 else { 3363 /* 3364 * Pages in shadowed objects may never have 3365 * write permission - we may stop here. 3366 */ 3367 break; 3368 } 3369 } 3370 3371 vm_object_unlock(object); 3372} 3373 3374/* 3375 * Routine: vm_object_copy_slowly 3376 * 3377 * Description: 3378 * Copy the specified range of the source 3379 * virtual memory object without using 3380 * protection-based optimizations (such 3381 * as copy-on-write). The pages in the 3382 * region are actually copied. 3383 * 3384 * In/out conditions: 3385 * The caller must hold a reference and a lock 3386 * for the source virtual memory object. The source 3387 * object will be returned *unlocked*. 3388 * 3389 * Results: 3390 * If the copy is completed successfully, KERN_SUCCESS is 3391 * returned. If the caller asserted the interruptible 3392 * argument, and an interruption occurred while waiting 3393 * for a user-generated event, MACH_SEND_INTERRUPTED is 3394 * returned. Other values may be returned to indicate 3395 * hard errors during the copy operation. 3396 * 3397 * A new virtual memory object is returned in a 3398 * parameter (_result_object). The contents of this 3399 * new object, starting at a zero offset, are a copy 3400 * of the source memory region. In the event of 3401 * an error, this parameter will contain the value 3402 * VM_OBJECT_NULL. 3403 */ 3404__private_extern__ kern_return_t 3405vm_object_copy_slowly( 3406 register vm_object_t src_object, 3407 vm_object_offset_t src_offset, 3408 vm_object_size_t size, 3409 boolean_t interruptible, 3410 vm_object_t *_result_object) /* OUT */ 3411{ 3412 vm_object_t new_object; 3413 vm_object_offset_t new_offset; 3414 3415 struct vm_object_fault_info fault_info; 3416 3417 XPR(XPR_VM_OBJECT, "v_o_c_slowly obj 0x%x off 0x%x size 0x%x\n", 3418 src_object, src_offset, size, 0, 0); 3419 3420 if (size == 0) { 3421 vm_object_unlock(src_object); 3422 *_result_object = VM_OBJECT_NULL; 3423 return(KERN_INVALID_ARGUMENT); 3424 } 3425 3426 /* 3427 * Prevent destruction of the source object while we copy. 3428 */ 3429 3430 vm_object_reference_locked(src_object); 3431 vm_object_unlock(src_object); 3432 3433 /* 3434 * Create a new object to hold the copied pages. 3435 * A few notes: 3436 * We fill the new object starting at offset 0, 3437 * regardless of the input offset. 3438 * We don't bother to lock the new object within 3439 * this routine, since we have the only reference. 3440 */ 3441 3442 new_object = vm_object_allocate(size); 3443 new_offset = 0; 3444 3445 assert(size == trunc_page_64(size)); /* Will the loop terminate? */ 3446 3447 fault_info.interruptible = interruptible; 3448 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; 3449 fault_info.user_tag = 0; 3450 fault_info.pmap_options = 0; 3451 fault_info.lo_offset = src_offset; 3452 fault_info.hi_offset = src_offset + size; 3453 fault_info.no_cache = FALSE; 3454 fault_info.stealth = TRUE; 3455 fault_info.io_sync = FALSE; 3456 fault_info.cs_bypass = FALSE; 3457 fault_info.mark_zf_absent = FALSE; 3458 fault_info.batch_pmap_op = FALSE; 3459 3460 for ( ; 3461 size != 0 ; 3462 src_offset += PAGE_SIZE_64, 3463 new_offset += PAGE_SIZE_64, size -= PAGE_SIZE_64 3464 ) { 3465 vm_page_t new_page; 3466 vm_fault_return_t result; 3467 3468 vm_object_lock(new_object); 3469 3470 while ((new_page = vm_page_alloc(new_object, new_offset)) 3471 == VM_PAGE_NULL) { 3472 3473 vm_object_unlock(new_object); 3474 3475 if (!vm_page_wait(interruptible)) { 3476 vm_object_deallocate(new_object); 3477 vm_object_deallocate(src_object); 3478 *_result_object = VM_OBJECT_NULL; 3479 return(MACH_SEND_INTERRUPTED); 3480 } 3481 vm_object_lock(new_object); 3482 } 3483 vm_object_unlock(new_object); 3484 3485 do { 3486 vm_prot_t prot = VM_PROT_READ; 3487 vm_page_t _result_page; 3488 vm_page_t top_page; 3489 register 3490 vm_page_t result_page; 3491 kern_return_t error_code; 3492 3493 vm_object_lock(src_object); 3494 vm_object_paging_begin(src_object); 3495 3496 if (size > (vm_size_t) -1) { 3497 /* 32-bit overflow */ 3498 fault_info.cluster_size = (vm_size_t) (0 - PAGE_SIZE); 3499 } else { 3500 fault_info.cluster_size = (vm_size_t) size; 3501 assert(fault_info.cluster_size == size); 3502 } 3503 3504 XPR(XPR_VM_FAULT,"vm_object_copy_slowly -> vm_fault_page",0,0,0,0,0); 3505 _result_page = VM_PAGE_NULL; 3506 result = vm_fault_page(src_object, src_offset, 3507 VM_PROT_READ, FALSE, 3508 FALSE, /* page not looked up */ 3509 &prot, &_result_page, &top_page, 3510 (int *)0, 3511 &error_code, FALSE, FALSE, &fault_info); 3512 3513 switch(result) { 3514 case VM_FAULT_SUCCESS: 3515 result_page = _result_page; 3516 3517 /* 3518 * Copy the page to the new object. 3519 * 3520 * POLICY DECISION: 3521 * If result_page is clean, 3522 * we could steal it instead 3523 * of copying. 3524 */ 3525 3526 vm_page_copy(result_page, new_page); 3527 vm_object_unlock(result_page->object); 3528 3529 /* 3530 * Let go of both pages (make them 3531 * not busy, perform wakeup, activate). 3532 */ 3533 vm_object_lock(new_object); 3534 SET_PAGE_DIRTY(new_page, FALSE); 3535 PAGE_WAKEUP_DONE(new_page); 3536 vm_object_unlock(new_object); 3537 3538 vm_object_lock(result_page->object); 3539 PAGE_WAKEUP_DONE(result_page); 3540 3541 vm_page_lockspin_queues(); 3542 if (!result_page->active && 3543 !result_page->inactive && 3544 !result_page->throttled) 3545 vm_page_activate(result_page); 3546 vm_page_activate(new_page); 3547 vm_page_unlock_queues(); 3548 3549 /* 3550 * Release paging references and 3551 * top-level placeholder page, if any. 3552 */ 3553 3554 vm_fault_cleanup(result_page->object, 3555 top_page); 3556 3557 break; 3558 3559 case VM_FAULT_RETRY: 3560 break; 3561 3562 case VM_FAULT_MEMORY_SHORTAGE: 3563 if (vm_page_wait(interruptible)) 3564 break; 3565 /* fall thru */ 3566 3567 case VM_FAULT_INTERRUPTED: 3568 vm_object_lock(new_object); 3569 VM_PAGE_FREE(new_page); 3570 vm_object_unlock(new_object); 3571 3572 vm_object_deallocate(new_object); 3573 vm_object_deallocate(src_object); 3574 *_result_object = VM_OBJECT_NULL; 3575 return(MACH_SEND_INTERRUPTED); 3576 3577 case VM_FAULT_SUCCESS_NO_VM_PAGE: 3578 /* success but no VM page: fail */ 3579 vm_object_paging_end(src_object); 3580 vm_object_unlock(src_object); 3581 /*FALLTHROUGH*/ 3582 case VM_FAULT_MEMORY_ERROR: 3583 /* 3584 * A policy choice: 3585 * (a) ignore pages that we can't 3586 * copy 3587 * (b) return the null object if 3588 * any page fails [chosen] 3589 */ 3590 3591 vm_object_lock(new_object); 3592 VM_PAGE_FREE(new_page); 3593 vm_object_unlock(new_object); 3594 3595 vm_object_deallocate(new_object); 3596 vm_object_deallocate(src_object); 3597 *_result_object = VM_OBJECT_NULL; 3598 return(error_code ? error_code: 3599 KERN_MEMORY_ERROR); 3600 3601 default: 3602 panic("vm_object_copy_slowly: unexpected error" 3603 " 0x%x from vm_fault_page()\n", result); 3604 } 3605 } while (result != VM_FAULT_SUCCESS); 3606 } 3607 3608 /* 3609 * Lose the extra reference, and return our object. 3610 */ 3611 vm_object_deallocate(src_object); 3612 *_result_object = new_object; 3613 return(KERN_SUCCESS); 3614} 3615 3616/* 3617 * Routine: vm_object_copy_quickly 3618 * 3619 * Purpose: 3620 * Copy the specified range of the source virtual 3621 * memory object, if it can be done without waiting 3622 * for user-generated events. 3623 * 3624 * Results: 3625 * If the copy is successful, the copy is returned in 3626 * the arguments; otherwise, the arguments are not 3627 * affected. 3628 * 3629 * In/out conditions: 3630 * The object should be unlocked on entry and exit. 3631 */ 3632 3633/*ARGSUSED*/ 3634__private_extern__ boolean_t 3635vm_object_copy_quickly( 3636 vm_object_t *_object, /* INOUT */ 3637 __unused vm_object_offset_t offset, /* IN */ 3638 __unused vm_object_size_t size, /* IN */ 3639 boolean_t *_src_needs_copy, /* OUT */ 3640 boolean_t *_dst_needs_copy) /* OUT */ 3641{ 3642 vm_object_t object = *_object; 3643 memory_object_copy_strategy_t copy_strategy; 3644 3645 XPR(XPR_VM_OBJECT, "v_o_c_quickly obj 0x%x off 0x%x size 0x%x\n", 3646 *_object, offset, size, 0, 0); 3647 if (object == VM_OBJECT_NULL) { 3648 *_src_needs_copy = FALSE; 3649 *_dst_needs_copy = FALSE; 3650 return(TRUE); 3651 } 3652 3653 vm_object_lock(object); 3654 3655 copy_strategy = object->copy_strategy; 3656 3657 switch (copy_strategy) { 3658 case MEMORY_OBJECT_COPY_SYMMETRIC: 3659 3660 /* 3661 * Symmetric copy strategy. 3662 * Make another reference to the object. 3663 * Leave object/offset unchanged. 3664 */ 3665 3666 vm_object_reference_locked(object); 3667 object->shadowed = TRUE; 3668 vm_object_unlock(object); 3669 3670 /* 3671 * Both source and destination must make 3672 * shadows, and the source must be made 3673 * read-only if not already. 3674 */ 3675 3676 *_src_needs_copy = TRUE; 3677 *_dst_needs_copy = TRUE; 3678 3679 break; 3680 3681 case MEMORY_OBJECT_COPY_DELAY: 3682 vm_object_unlock(object); 3683 return(FALSE); 3684 3685 default: 3686 vm_object_unlock(object); 3687 return(FALSE); 3688 } 3689 return(TRUE); 3690} 3691 3692static int copy_call_count = 0; 3693static int copy_call_sleep_count = 0; 3694static int copy_call_restart_count = 0; 3695 3696/* 3697 * Routine: vm_object_copy_call [internal] 3698 * 3699 * Description: 3700 * Copy the source object (src_object), using the 3701 * user-managed copy algorithm. 3702 * 3703 * In/out conditions: 3704 * The source object must be locked on entry. It 3705 * will be *unlocked* on exit. 3706 * 3707 * Results: 3708 * If the copy is successful, KERN_SUCCESS is returned. 3709 * A new object that represents the copied virtual 3710 * memory is returned in a parameter (*_result_object). 3711 * If the return value indicates an error, this parameter 3712 * is not valid. 3713 */ 3714static kern_return_t 3715vm_object_copy_call( 3716 vm_object_t src_object, 3717 vm_object_offset_t src_offset, 3718 vm_object_size_t size, 3719 vm_object_t *_result_object) /* OUT */ 3720{ 3721 kern_return_t kr; 3722 vm_object_t copy; 3723 boolean_t check_ready = FALSE; 3724 uint32_t try_failed_count = 0; 3725 3726 /* 3727 * If a copy is already in progress, wait and retry. 3728 * 3729 * XXX 3730 * Consider making this call interruptable, as Mike 3731 * intended it to be. 3732 * 3733 * XXXO 3734 * Need a counter or version or something to allow 3735 * us to use the copy that the currently requesting 3736 * thread is obtaining -- is it worth adding to the 3737 * vm object structure? Depends how common this case it. 3738 */ 3739 copy_call_count++; 3740 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) { 3741 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL, 3742 THREAD_UNINT); 3743 copy_call_restart_count++; 3744 } 3745 3746 /* 3747 * Indicate (for the benefit of memory_object_create_copy) 3748 * that we want a copy for src_object. (Note that we cannot 3749 * do a real assert_wait before calling memory_object_copy, 3750 * so we simply set the flag.) 3751 */ 3752 3753 vm_object_set_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL); 3754 vm_object_unlock(src_object); 3755 3756 /* 3757 * Ask the memory manager to give us a memory object 3758 * which represents a copy of the src object. 3759 * The memory manager may give us a memory object 3760 * which we already have, or it may give us a 3761 * new memory object. This memory object will arrive 3762 * via memory_object_create_copy. 3763 */ 3764 3765 kr = KERN_FAILURE; /* XXX need to change memory_object.defs */ 3766 if (kr != KERN_SUCCESS) { 3767 return kr; 3768 } 3769 3770 /* 3771 * Wait for the copy to arrive. 3772 */ 3773 vm_object_lock(src_object); 3774 while (vm_object_wanted(src_object, VM_OBJECT_EVENT_COPY_CALL)) { 3775 vm_object_sleep(src_object, VM_OBJECT_EVENT_COPY_CALL, 3776 THREAD_UNINT); 3777 copy_call_sleep_count++; 3778 } 3779Retry: 3780 assert(src_object->copy != VM_OBJECT_NULL); 3781 copy = src_object->copy; 3782 if (!vm_object_lock_try(copy)) { 3783 vm_object_unlock(src_object); 3784 3785 try_failed_count++; 3786 mutex_pause(try_failed_count); /* wait a bit */ 3787 3788 vm_object_lock(src_object); 3789 goto Retry; 3790 } 3791 if (copy->vo_size < src_offset+size) 3792 copy->vo_size = src_offset+size; 3793 3794 if (!copy->pager_ready) 3795 check_ready = TRUE; 3796 3797 /* 3798 * Return the copy. 3799 */ 3800 *_result_object = copy; 3801 vm_object_unlock(copy); 3802 vm_object_unlock(src_object); 3803 3804 /* Wait for the copy to be ready. */ 3805 if (check_ready == TRUE) { 3806 vm_object_lock(copy); 3807 while (!copy->pager_ready) { 3808 vm_object_sleep(copy, VM_OBJECT_EVENT_PAGER_READY, THREAD_UNINT); 3809 } 3810 vm_object_unlock(copy); 3811 } 3812 3813 return KERN_SUCCESS; 3814} 3815 3816static int copy_delayed_lock_collisions = 0; 3817static int copy_delayed_max_collisions = 0; 3818static int copy_delayed_lock_contention = 0; 3819static int copy_delayed_protect_iterate = 0; 3820 3821/* 3822 * Routine: vm_object_copy_delayed [internal] 3823 * 3824 * Description: 3825 * Copy the specified virtual memory object, using 3826 * the asymmetric copy-on-write algorithm. 3827 * 3828 * In/out conditions: 3829 * The src_object must be locked on entry. It will be unlocked 3830 * on exit - so the caller must also hold a reference to it. 3831 * 3832 * This routine will not block waiting for user-generated 3833 * events. It is not interruptible. 3834 */ 3835__private_extern__ vm_object_t 3836vm_object_copy_delayed( 3837 vm_object_t src_object, 3838 vm_object_offset_t src_offset, 3839 vm_object_size_t size, 3840 boolean_t src_object_shared) 3841{ 3842 vm_object_t new_copy = VM_OBJECT_NULL; 3843 vm_object_t old_copy; 3844 vm_page_t p; 3845 vm_object_size_t copy_size = src_offset + size; 3846 pmap_flush_context pmap_flush_context_storage; 3847 boolean_t delayed_pmap_flush = FALSE; 3848 3849 3850 int collisions = 0; 3851 /* 3852 * The user-level memory manager wants to see all of the changes 3853 * to this object, but it has promised not to make any changes on 3854 * its own. 3855 * 3856 * Perform an asymmetric copy-on-write, as follows: 3857 * Create a new object, called a "copy object" to hold 3858 * pages modified by the new mapping (i.e., the copy, 3859 * not the original mapping). 3860 * Record the original object as the backing object for 3861 * the copy object. If the original mapping does not 3862 * change a page, it may be used read-only by the copy. 3863 * Record the copy object in the original object. 3864 * When the original mapping causes a page to be modified, 3865 * it must be copied to a new page that is "pushed" to 3866 * the copy object. 3867 * Mark the new mapping (the copy object) copy-on-write. 3868 * This makes the copy object itself read-only, allowing 3869 * it to be reused if the original mapping makes no 3870 * changes, and simplifying the synchronization required 3871 * in the "push" operation described above. 3872 * 3873 * The copy-on-write is said to be assymetric because the original 3874 * object is *not* marked copy-on-write. A copied page is pushed 3875 * to the copy object, regardless which party attempted to modify 3876 * the page. 3877 * 3878 * Repeated asymmetric copy operations may be done. If the 3879 * original object has not been changed since the last copy, its 3880 * copy object can be reused. Otherwise, a new copy object can be 3881 * inserted between the original object and its previous copy 3882 * object. Since any copy object is read-only, this cannot affect 3883 * affect the contents of the previous copy object. 3884 * 3885 * Note that a copy object is higher in the object tree than the 3886 * original object; therefore, use of the copy object recorded in 3887 * the original object must be done carefully, to avoid deadlock. 3888 */ 3889 3890 Retry: 3891 3892 /* 3893 * Wait for paging in progress. 3894 */ 3895 if (!src_object->true_share && 3896 (src_object->paging_in_progress != 0 || 3897 src_object->activity_in_progress != 0)) { 3898 if (src_object_shared == TRUE) { 3899 vm_object_unlock(src_object); 3900 vm_object_lock(src_object); 3901 src_object_shared = FALSE; 3902 goto Retry; 3903 } 3904 vm_object_paging_wait(src_object, THREAD_UNINT); 3905 } 3906 /* 3907 * See whether we can reuse the result of a previous 3908 * copy operation. 3909 */ 3910 3911 old_copy = src_object->copy; 3912 if (old_copy != VM_OBJECT_NULL) { 3913 int lock_granted; 3914 3915 /* 3916 * Try to get the locks (out of order) 3917 */ 3918 if (src_object_shared == TRUE) 3919 lock_granted = vm_object_lock_try_shared(old_copy); 3920 else 3921 lock_granted = vm_object_lock_try(old_copy); 3922 3923 if (!lock_granted) { 3924 vm_object_unlock(src_object); 3925 3926 if (collisions++ == 0) 3927 copy_delayed_lock_contention++; 3928 mutex_pause(collisions); 3929 3930 /* Heisenberg Rules */ 3931 copy_delayed_lock_collisions++; 3932 3933 if (collisions > copy_delayed_max_collisions) 3934 copy_delayed_max_collisions = collisions; 3935 3936 if (src_object_shared == TRUE) 3937 vm_object_lock_shared(src_object); 3938 else 3939 vm_object_lock(src_object); 3940 3941 goto Retry; 3942 } 3943 3944 /* 3945 * Determine whether the old copy object has 3946 * been modified. 3947 */ 3948 3949 if (old_copy->resident_page_count == 0 && 3950 !old_copy->pager_created) { 3951 /* 3952 * It has not been modified. 3953 * 3954 * Return another reference to 3955 * the existing copy-object if 3956 * we can safely grow it (if 3957 * needed). 3958 */ 3959 3960 if (old_copy->vo_size < copy_size) { 3961 if (src_object_shared == TRUE) { 3962 vm_object_unlock(old_copy); 3963 vm_object_unlock(src_object); 3964 3965 vm_object_lock(src_object); 3966 src_object_shared = FALSE; 3967 goto Retry; 3968 } 3969 /* 3970 * We can't perform a delayed copy if any of the 3971 * pages in the extended range are wired (because 3972 * we can't safely take write permission away from 3973 * wired pages). If the pages aren't wired, then 3974 * go ahead and protect them. 3975 */ 3976 copy_delayed_protect_iterate++; 3977 3978 pmap_flush_context_init(&pmap_flush_context_storage); 3979 delayed_pmap_flush = FALSE; 3980 3981 queue_iterate(&src_object->memq, p, vm_page_t, listq) { 3982 if (!p->fictitious && 3983 p->offset >= old_copy->vo_size && 3984 p->offset < copy_size) { 3985 if (VM_PAGE_WIRED(p)) { 3986 vm_object_unlock(old_copy); 3987 vm_object_unlock(src_object); 3988 3989 if (new_copy != VM_OBJECT_NULL) { 3990 vm_object_unlock(new_copy); 3991 vm_object_deallocate(new_copy); 3992 } 3993 if (delayed_pmap_flush == TRUE) 3994 pmap_flush(&pmap_flush_context_storage); 3995 3996 return VM_OBJECT_NULL; 3997 } else { 3998 pmap_page_protect_options(p->phys_page, (VM_PROT_ALL & ~VM_PROT_WRITE), 3999 PMAP_OPTIONS_NOFLUSH, (void *)&pmap_flush_context_storage); 4000 delayed_pmap_flush = TRUE; 4001 } 4002 } 4003 } 4004 if (delayed_pmap_flush == TRUE) 4005 pmap_flush(&pmap_flush_context_storage); 4006 4007 old_copy->vo_size = copy_size; 4008 } 4009 if (src_object_shared == TRUE) 4010 vm_object_reference_shared(old_copy); 4011 else 4012 vm_object_reference_locked(old_copy); 4013 vm_object_unlock(old_copy); 4014 vm_object_unlock(src_object); 4015 4016 if (new_copy != VM_OBJECT_NULL) { 4017 vm_object_unlock(new_copy); 4018 vm_object_deallocate(new_copy); 4019 } 4020 return(old_copy); 4021 } 4022 4023 4024 4025 /* 4026 * Adjust the size argument so that the newly-created 4027 * copy object will be large enough to back either the 4028 * old copy object or the new mapping. 4029 */ 4030 if (old_copy->vo_size > copy_size) 4031 copy_size = old_copy->vo_size; 4032 4033 if (new_copy == VM_OBJECT_NULL) { 4034 vm_object_unlock(old_copy); 4035 vm_object_unlock(src_object); 4036 new_copy = vm_object_allocate(copy_size); 4037 vm_object_lock(src_object); 4038 vm_object_lock(new_copy); 4039 4040 src_object_shared = FALSE; 4041 goto Retry; 4042 } 4043 new_copy->vo_size = copy_size; 4044 4045 /* 4046 * The copy-object is always made large enough to 4047 * completely shadow the original object, since 4048 * it may have several users who want to shadow 4049 * the original object at different points. 4050 */ 4051 4052 assert((old_copy->shadow == src_object) && 4053 (old_copy->vo_shadow_offset == (vm_object_offset_t) 0)); 4054 4055 } else if (new_copy == VM_OBJECT_NULL) { 4056 vm_object_unlock(src_object); 4057 new_copy = vm_object_allocate(copy_size); 4058 vm_object_lock(src_object); 4059 vm_object_lock(new_copy); 4060 4061 src_object_shared = FALSE; 4062 goto Retry; 4063 } 4064 4065 /* 4066 * We now have the src object locked, and the new copy object 4067 * allocated and locked (and potentially the old copy locked). 4068 * Before we go any further, make sure we can still perform 4069 * a delayed copy, as the situation may have changed. 4070 * 4071 * Specifically, we can't perform a delayed copy if any of the 4072 * pages in the range are wired (because we can't safely take 4073 * write permission away from wired pages). If the pages aren't 4074 * wired, then go ahead and protect them. 4075 */ 4076 copy_delayed_protect_iterate++; 4077 4078 pmap_flush_context_init(&pmap_flush_context_storage); 4079 delayed_pmap_flush = FALSE; 4080 4081 queue_iterate(&src_object->memq, p, vm_page_t, listq) { 4082 if (!p->fictitious && p->offset < copy_size) { 4083 if (VM_PAGE_WIRED(p)) { 4084 if (old_copy) 4085 vm_object_unlock(old_copy); 4086 vm_object_unlock(src_object); 4087 vm_object_unlock(new_copy); 4088 vm_object_deallocate(new_copy); 4089 4090 if (delayed_pmap_flush == TRUE) 4091 pmap_flush(&pmap_flush_context_storage); 4092 4093 return VM_OBJECT_NULL; 4094 } else { 4095 pmap_page_protect_options(p->phys_page, (VM_PROT_ALL & ~VM_PROT_WRITE), 4096 PMAP_OPTIONS_NOFLUSH, (void *)&pmap_flush_context_storage); 4097 delayed_pmap_flush = TRUE; 4098 } 4099 } 4100 } 4101 if (delayed_pmap_flush == TRUE) 4102 pmap_flush(&pmap_flush_context_storage); 4103 4104 if (old_copy != VM_OBJECT_NULL) { 4105 /* 4106 * Make the old copy-object shadow the new one. 4107 * It will receive no more pages from the original 4108 * object. 4109 */ 4110 4111 /* remove ref. from old_copy */ 4112 vm_object_lock_assert_exclusive(src_object); 4113 src_object->ref_count--; 4114 assert(src_object->ref_count > 0); 4115 vm_object_lock_assert_exclusive(old_copy); 4116 old_copy->shadow = new_copy; 4117 vm_object_lock_assert_exclusive(new_copy); 4118 assert(new_copy->ref_count > 0); 4119 new_copy->ref_count++; /* for old_copy->shadow ref. */ 4120 4121#if TASK_SWAPPER 4122 if (old_copy->res_count) { 4123 VM_OBJ_RES_INCR(new_copy); 4124 VM_OBJ_RES_DECR(src_object); 4125 } 4126#endif 4127 4128 vm_object_unlock(old_copy); /* done with old_copy */ 4129 } 4130 4131 /* 4132 * Point the new copy at the existing object. 4133 */ 4134 vm_object_lock_assert_exclusive(new_copy); 4135 new_copy->shadow = src_object; 4136 new_copy->vo_shadow_offset = 0; 4137 new_copy->shadowed = TRUE; /* caller must set needs_copy */ 4138 4139 vm_object_lock_assert_exclusive(src_object); 4140 vm_object_reference_locked(src_object); 4141 src_object->copy = new_copy; 4142 vm_object_unlock(src_object); 4143 vm_object_unlock(new_copy); 4144 4145 XPR(XPR_VM_OBJECT, 4146 "vm_object_copy_delayed: used copy object %X for source %X\n", 4147 new_copy, src_object, 0, 0, 0); 4148 4149 return new_copy; 4150} 4151 4152/* 4153 * Routine: vm_object_copy_strategically 4154 * 4155 * Purpose: 4156 * Perform a copy according to the source object's 4157 * declared strategy. This operation may block, 4158 * and may be interrupted. 4159 */ 4160__private_extern__ kern_return_t 4161vm_object_copy_strategically( 4162 register vm_object_t src_object, 4163 vm_object_offset_t src_offset, 4164 vm_object_size_t size, 4165 vm_object_t *dst_object, /* OUT */ 4166 vm_object_offset_t *dst_offset, /* OUT */ 4167 boolean_t *dst_needs_copy) /* OUT */ 4168{ 4169 boolean_t result; 4170 boolean_t interruptible = THREAD_ABORTSAFE; /* XXX */ 4171 boolean_t object_lock_shared = FALSE; 4172 memory_object_copy_strategy_t copy_strategy; 4173 4174 assert(src_object != VM_OBJECT_NULL); 4175 4176 copy_strategy = src_object->copy_strategy; 4177 4178 if (copy_strategy == MEMORY_OBJECT_COPY_DELAY) { 4179 vm_object_lock_shared(src_object); 4180 object_lock_shared = TRUE; 4181 } else 4182 vm_object_lock(src_object); 4183 4184 /* 4185 * The copy strategy is only valid if the memory manager 4186 * is "ready". Internal objects are always ready. 4187 */ 4188 4189 while (!src_object->internal && !src_object->pager_ready) { 4190 wait_result_t wait_result; 4191 4192 if (object_lock_shared == TRUE) { 4193 vm_object_unlock(src_object); 4194 vm_object_lock(src_object); 4195 object_lock_shared = FALSE; 4196 continue; 4197 } 4198 wait_result = vm_object_sleep( src_object, 4199 VM_OBJECT_EVENT_PAGER_READY, 4200 interruptible); 4201 if (wait_result != THREAD_AWAKENED) { 4202 vm_object_unlock(src_object); 4203 *dst_object = VM_OBJECT_NULL; 4204 *dst_offset = 0; 4205 *dst_needs_copy = FALSE; 4206 return(MACH_SEND_INTERRUPTED); 4207 } 4208 } 4209 4210 /* 4211 * Use the appropriate copy strategy. 4212 */ 4213 4214 switch (copy_strategy) { 4215 case MEMORY_OBJECT_COPY_DELAY: 4216 *dst_object = vm_object_copy_delayed(src_object, 4217 src_offset, size, object_lock_shared); 4218 if (*dst_object != VM_OBJECT_NULL) { 4219 *dst_offset = src_offset; 4220 *dst_needs_copy = TRUE; 4221 result = KERN_SUCCESS; 4222 break; 4223 } 4224 vm_object_lock(src_object); 4225 /* fall thru when delayed copy not allowed */ 4226 4227 case MEMORY_OBJECT_COPY_NONE: 4228 result = vm_object_copy_slowly(src_object, src_offset, size, 4229 interruptible, dst_object); 4230 if (result == KERN_SUCCESS) { 4231 *dst_offset = 0; 4232 *dst_needs_copy = FALSE; 4233 } 4234 break; 4235 4236 case MEMORY_OBJECT_COPY_CALL: 4237 result = vm_object_copy_call(src_object, src_offset, size, 4238 dst_object); 4239 if (result == KERN_SUCCESS) { 4240 *dst_offset = src_offset; 4241 *dst_needs_copy = TRUE; 4242 } 4243 break; 4244 4245 case MEMORY_OBJECT_COPY_SYMMETRIC: 4246 XPR(XPR_VM_OBJECT, "v_o_c_strategically obj 0x%x off 0x%x size 0x%x\n", src_object, src_offset, size, 0, 0); 4247 vm_object_unlock(src_object); 4248 result = KERN_MEMORY_RESTART_COPY; 4249 break; 4250 4251 default: 4252 panic("copy_strategically: bad strategy"); 4253 result = KERN_INVALID_ARGUMENT; 4254 } 4255 return(result); 4256} 4257 4258/* 4259 * vm_object_shadow: 4260 * 4261 * Create a new object which is backed by the 4262 * specified existing object range. The source 4263 * object reference is deallocated. 4264 * 4265 * The new object and offset into that object 4266 * are returned in the source parameters. 4267 */ 4268boolean_t vm_object_shadow_check = TRUE; 4269 4270__private_extern__ boolean_t 4271vm_object_shadow( 4272 vm_object_t *object, /* IN/OUT */ 4273 vm_object_offset_t *offset, /* IN/OUT */ 4274 vm_object_size_t length) 4275{ 4276 register vm_object_t source; 4277 register vm_object_t result; 4278 4279 source = *object; 4280 assert(source != VM_OBJECT_NULL); 4281 if (source == VM_OBJECT_NULL) 4282 return FALSE; 4283 4284#if 0 4285 /* 4286 * XXX FBDP 4287 * This assertion is valid but it gets triggered by Rosetta for example 4288 * due to a combination of vm_remap() that changes a VM object's 4289 * copy_strategy from SYMMETRIC to DELAY and vm_protect(VM_PROT_COPY) 4290 * that then sets "needs_copy" on its map entry. This creates a 4291 * mapping situation that VM should never see and doesn't know how to 4292 * handle. 4293 * It's not clear if this can create any real problem but we should 4294 * look into fixing this, probably by having vm_protect(VM_PROT_COPY) 4295 * do more than just set "needs_copy" to handle the copy-on-write... 4296 * In the meantime, let's disable the assertion. 4297 */ 4298 assert(source->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC); 4299#endif 4300 4301 /* 4302 * Determine if we really need a shadow. 4303 * 4304 * If the source object is larger than what we are trying 4305 * to create, then force the shadow creation even if the 4306 * ref count is 1. This will allow us to [potentially] 4307 * collapse the underlying object away in the future 4308 * (freeing up the extra data it might contain and that 4309 * we don't need). 4310 */ 4311 if (vm_object_shadow_check && 4312 source->vo_size == length && 4313 source->ref_count == 1 && 4314 (source->shadow == VM_OBJECT_NULL || 4315 source->shadow->copy == VM_OBJECT_NULL) ) 4316 { 4317 source->shadowed = FALSE; 4318 return FALSE; 4319 } 4320 4321 /* 4322 * Allocate a new object with the given length 4323 */ 4324 4325 if ((result = vm_object_allocate(length)) == VM_OBJECT_NULL) 4326 panic("vm_object_shadow: no object for shadowing"); 4327 4328 /* 4329 * The new object shadows the source object, adding 4330 * a reference to it. Our caller changes his reference 4331 * to point to the new object, removing a reference to 4332 * the source object. Net result: no change of reference 4333 * count. 4334 */ 4335 result->shadow = source; 4336 4337 /* 4338 * Store the offset into the source object, 4339 * and fix up the offset into the new object. 4340 */ 4341 4342 result->vo_shadow_offset = *offset; 4343 4344 /* 4345 * Return the new things 4346 */ 4347 4348 *offset = 0; 4349 *object = result; 4350 return TRUE; 4351} 4352 4353/* 4354 * The relationship between vm_object structures and 4355 * the memory_object requires careful synchronization. 4356 * 4357 * All associations are created by memory_object_create_named 4358 * for external pagers and vm_object_pager_create for internal 4359 * objects as follows: 4360 * 4361 * pager: the memory_object itself, supplied by 4362 * the user requesting a mapping (or the kernel, 4363 * when initializing internal objects); the 4364 * kernel simulates holding send rights by keeping 4365 * a port reference; 4366 * 4367 * pager_request: 4368 * the memory object control port, 4369 * created by the kernel; the kernel holds 4370 * receive (and ownership) rights to this 4371 * port, but no other references. 4372 * 4373 * When initialization is complete, the "initialized" field 4374 * is asserted. Other mappings using a particular memory object, 4375 * and any references to the vm_object gained through the 4376 * port association must wait for this initialization to occur. 4377 * 4378 * In order to allow the memory manager to set attributes before 4379 * requests (notably virtual copy operations, but also data or 4380 * unlock requests) are made, a "ready" attribute is made available. 4381 * Only the memory manager may affect the value of this attribute. 4382 * Its value does not affect critical kernel functions, such as 4383 * internal object initialization or destruction. [Furthermore, 4384 * memory objects created by the kernel are assumed to be ready 4385 * immediately; the default memory manager need not explicitly 4386 * set the "ready" attribute.] 4387 * 4388 * [Both the "initialized" and "ready" attribute wait conditions 4389 * use the "pager" field as the wait event.] 4390 * 4391 * The port associations can be broken down by any of the 4392 * following routines: 4393 * vm_object_terminate: 4394 * No references to the vm_object remain, and 4395 * the object cannot (or will not) be cached. 4396 * This is the normal case, and is done even 4397 * though one of the other cases has already been 4398 * done. 4399 * memory_object_destroy: 4400 * The memory manager has requested that the 4401 * kernel relinquish references to the memory 4402 * object. [The memory manager may not want to 4403 * destroy the memory object, but may wish to 4404 * refuse or tear down existing memory mappings.] 4405 * 4406 * Each routine that breaks an association must break all of 4407 * them at once. At some later time, that routine must clear 4408 * the pager field and release the memory object references. 4409 * [Furthermore, each routine must cope with the simultaneous 4410 * or previous operations of the others.] 4411 * 4412 * In addition to the lock on the object, the vm_object_hash_lock 4413 * governs the associations. References gained through the 4414 * association require use of the hash lock. 4415 * 4416 * Because the pager field may be cleared spontaneously, it 4417 * cannot be used to determine whether a memory object has 4418 * ever been associated with a particular vm_object. [This 4419 * knowledge is important to the shadow object mechanism.] 4420 * For this reason, an additional "created" attribute is 4421 * provided. 4422 * 4423 * During various paging operations, the pager reference found in the 4424 * vm_object must be valid. To prevent this from being released, 4425 * (other than being removed, i.e., made null), routines may use 4426 * the vm_object_paging_begin/end routines [actually, macros]. 4427 * The implementation uses the "paging_in_progress" and "wanted" fields. 4428 * [Operations that alter the validity of the pager values include the 4429 * termination routines and vm_object_collapse.] 4430 */ 4431 4432 4433/* 4434 * Routine: vm_object_enter 4435 * Purpose: 4436 * Find a VM object corresponding to the given 4437 * pager; if no such object exists, create one, 4438 * and initialize the pager. 4439 */ 4440vm_object_t 4441vm_object_enter( 4442 memory_object_t pager, 4443 vm_object_size_t size, 4444 boolean_t internal, 4445 boolean_t init, 4446 boolean_t named) 4447{ 4448 register vm_object_t object; 4449 vm_object_t new_object; 4450 boolean_t must_init; 4451 vm_object_hash_entry_t entry, new_entry; 4452 uint32_t try_failed_count = 0; 4453 lck_mtx_t *lck; 4454 4455 if (pager == MEMORY_OBJECT_NULL) 4456 return(vm_object_allocate(size)); 4457 4458 new_object = VM_OBJECT_NULL; 4459 new_entry = VM_OBJECT_HASH_ENTRY_NULL; 4460 must_init = init; 4461 4462 /* 4463 * Look for an object associated with this port. 4464 */ 4465Retry: 4466 lck = vm_object_hash_lock_spin(pager); 4467 do { 4468 entry = vm_object_hash_lookup(pager, FALSE); 4469 4470 if (entry == VM_OBJECT_HASH_ENTRY_NULL) { 4471 if (new_object == VM_OBJECT_NULL) { 4472 /* 4473 * We must unlock to create a new object; 4474 * if we do so, we must try the lookup again. 4475 */ 4476 vm_object_hash_unlock(lck); 4477 assert(new_entry == VM_OBJECT_HASH_ENTRY_NULL); 4478 new_entry = vm_object_hash_entry_alloc(pager); 4479 new_object = vm_object_allocate(size); 4480 lck = vm_object_hash_lock_spin(pager); 4481 } else { 4482 /* 4483 * Lookup failed twice, and we have something 4484 * to insert; set the object. 4485 */ 4486 vm_object_lock(new_object); 4487 vm_object_hash_insert(new_entry, new_object); 4488 vm_object_unlock(new_object); 4489 entry = new_entry; 4490 new_entry = VM_OBJECT_HASH_ENTRY_NULL; 4491 new_object = VM_OBJECT_NULL; 4492 must_init = TRUE; 4493 } 4494 } else if (entry->object == VM_OBJECT_NULL) { 4495 /* 4496 * If a previous object is being terminated, 4497 * we must wait for the termination message 4498 * to be queued (and lookup the entry again). 4499 */ 4500 entry->waiting = TRUE; 4501 entry = VM_OBJECT_HASH_ENTRY_NULL; 4502 assert_wait((event_t) pager, THREAD_UNINT); 4503 vm_object_hash_unlock(lck); 4504 4505 thread_block(THREAD_CONTINUE_NULL); 4506 lck = vm_object_hash_lock_spin(pager); 4507 } 4508 } while (entry == VM_OBJECT_HASH_ENTRY_NULL); 4509 4510 object = entry->object; 4511 assert(object != VM_OBJECT_NULL); 4512 4513 if (!must_init) { 4514 if ( !vm_object_lock_try(object)) { 4515 4516 vm_object_hash_unlock(lck); 4517 4518 try_failed_count++; 4519 mutex_pause(try_failed_count); /* wait a bit */ 4520 goto Retry; 4521 } 4522 assert(!internal || object->internal); 4523#if VM_OBJECT_CACHE 4524 if (object->ref_count == 0) { 4525 if ( !vm_object_cache_lock_try()) { 4526 4527 vm_object_hash_unlock(lck); 4528 vm_object_unlock(object); 4529 4530 try_failed_count++; 4531 mutex_pause(try_failed_count); /* wait a bit */ 4532 goto Retry; 4533 } 4534 XPR(XPR_VM_OBJECT_CACHE, 4535 "vm_object_enter: removing %x from cache, head (%x, %x)\n", 4536 object, 4537 vm_object_cached_list.next, 4538 vm_object_cached_list.prev, 0,0); 4539 queue_remove(&vm_object_cached_list, object, 4540 vm_object_t, cached_list); 4541 vm_object_cached_count--; 4542 4543 vm_object_cache_unlock(); 4544 } 4545#endif 4546 if (named) { 4547 assert(!object->named); 4548 object->named = TRUE; 4549 } 4550 vm_object_lock_assert_exclusive(object); 4551 object->ref_count++; 4552 vm_object_res_reference(object); 4553 4554 vm_object_hash_unlock(lck); 4555 vm_object_unlock(object); 4556 4557 VM_STAT_INCR(hits); 4558 } else 4559 vm_object_hash_unlock(lck); 4560 4561 assert(object->ref_count > 0); 4562 4563 VM_STAT_INCR(lookups); 4564 4565 XPR(XPR_VM_OBJECT, 4566 "vm_o_enter: pager 0x%x obj 0x%x must_init %d\n", 4567 pager, object, must_init, 0, 0); 4568 4569 /* 4570 * If we raced to create a vm_object but lost, let's 4571 * throw away ours. 4572 */ 4573 4574 if (new_object != VM_OBJECT_NULL) 4575 vm_object_deallocate(new_object); 4576 4577 if (new_entry != VM_OBJECT_HASH_ENTRY_NULL) 4578 vm_object_hash_entry_free(new_entry); 4579 4580 if (must_init) { 4581 memory_object_control_t control; 4582 4583 /* 4584 * Allocate request port. 4585 */ 4586 4587 control = memory_object_control_allocate(object); 4588 assert (control != MEMORY_OBJECT_CONTROL_NULL); 4589 4590 vm_object_lock(object); 4591 assert(object != kernel_object); 4592 4593 /* 4594 * Copy the reference we were given. 4595 */ 4596 4597 memory_object_reference(pager); 4598 object->pager_created = TRUE; 4599 object->pager = pager; 4600 object->internal = internal; 4601 object->pager_trusted = internal; 4602 if (!internal) { 4603 /* copy strategy invalid until set by memory manager */ 4604 object->copy_strategy = MEMORY_OBJECT_COPY_INVALID; 4605 } 4606 object->pager_control = control; 4607 object->pager_ready = FALSE; 4608 4609 vm_object_unlock(object); 4610 4611 /* 4612 * Let the pager know we're using it. 4613 */ 4614 4615 (void) memory_object_init(pager, 4616 object->pager_control, 4617 PAGE_SIZE); 4618 4619 vm_object_lock(object); 4620 if (named) 4621 object->named = TRUE; 4622 if (internal) { 4623 object->pager_ready = TRUE; 4624 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY); 4625 } 4626 4627 object->pager_initialized = TRUE; 4628 vm_object_wakeup(object, VM_OBJECT_EVENT_INITIALIZED); 4629 } else { 4630 vm_object_lock(object); 4631 } 4632 4633 /* 4634 * [At this point, the object must be locked] 4635 */ 4636 4637 /* 4638 * Wait for the work above to be done by the first 4639 * thread to map this object. 4640 */ 4641 4642 while (!object->pager_initialized) { 4643 vm_object_sleep(object, 4644 VM_OBJECT_EVENT_INITIALIZED, 4645 THREAD_UNINT); 4646 } 4647 vm_object_unlock(object); 4648 4649 XPR(XPR_VM_OBJECT, 4650 "vm_object_enter: vm_object %x, memory_object %x, internal %d\n", 4651 object, object->pager, internal, 0,0); 4652 return(object); 4653} 4654 4655/* 4656 * Routine: vm_object_pager_create 4657 * Purpose: 4658 * Create a memory object for an internal object. 4659 * In/out conditions: 4660 * The object is locked on entry and exit; 4661 * it may be unlocked within this call. 4662 * Limitations: 4663 * Only one thread may be performing a 4664 * vm_object_pager_create on an object at 4665 * a time. Presumably, only the pageout 4666 * daemon will be using this routine. 4667 */ 4668 4669void 4670vm_object_pager_create( 4671 register vm_object_t object) 4672{ 4673 memory_object_t pager; 4674 vm_object_hash_entry_t entry; 4675 lck_mtx_t *lck; 4676#if MACH_PAGEMAP 4677 vm_object_size_t size; 4678 vm_external_map_t map; 4679#endif /* MACH_PAGEMAP */ 4680 4681 XPR(XPR_VM_OBJECT, "vm_object_pager_create, object 0x%X\n", 4682 object, 0,0,0,0); 4683 4684 assert(object != kernel_object); 4685 4686 if (memory_manager_default_check() != KERN_SUCCESS) 4687 return; 4688 4689 /* 4690 * Prevent collapse or termination by holding a paging reference 4691 */ 4692 4693 vm_object_paging_begin(object); 4694 if (object->pager_created) { 4695 /* 4696 * Someone else got to it first... 4697 * wait for them to finish initializing the ports 4698 */ 4699 while (!object->pager_initialized) { 4700 vm_object_sleep(object, 4701 VM_OBJECT_EVENT_INITIALIZED, 4702 THREAD_UNINT); 4703 } 4704 vm_object_paging_end(object); 4705 return; 4706 } 4707 4708 /* 4709 * Indicate that a memory object has been assigned 4710 * before dropping the lock, to prevent a race. 4711 */ 4712 4713 object->pager_created = TRUE; 4714 object->paging_offset = 0; 4715 4716#if MACH_PAGEMAP 4717 size = object->vo_size; 4718#endif /* MACH_PAGEMAP */ 4719 vm_object_unlock(object); 4720 4721#if MACH_PAGEMAP 4722 if (DEFAULT_PAGER_IS_ACTIVE) { 4723 map = vm_external_create(size); 4724 vm_object_lock(object); 4725 assert(object->vo_size == size); 4726 object->existence_map = map; 4727 vm_object_unlock(object); 4728 } 4729#endif /* MACH_PAGEMAP */ 4730 4731 if ((uint32_t) object->vo_size != object->vo_size) { 4732 panic("vm_object_pager_create(): object size 0x%llx >= 4GB\n", 4733 (uint64_t) object->vo_size); 4734 } 4735 4736 /* 4737 * Create the [internal] pager, and associate it with this object. 4738 * 4739 * We make the association here so that vm_object_enter() 4740 * can look up the object to complete initializing it. No 4741 * user will ever map this object. 4742 */ 4743 { 4744 memory_object_default_t dmm; 4745 4746 /* acquire a reference for the default memory manager */ 4747 dmm = memory_manager_default_reference(); 4748 4749 assert(object->temporary); 4750 4751 /* create our new memory object */ 4752 assert((vm_size_t) object->vo_size == object->vo_size); 4753 (void) memory_object_create(dmm, (vm_size_t) object->vo_size, 4754 &pager); 4755 4756 memory_object_default_deallocate(dmm); 4757 } 4758 4759 entry = vm_object_hash_entry_alloc(pager); 4760 4761 vm_object_lock(object); 4762 lck = vm_object_hash_lock_spin(pager); 4763 vm_object_hash_insert(entry, object); 4764 vm_object_hash_unlock(lck); 4765 vm_object_unlock(object); 4766 4767 /* 4768 * A reference was returned by 4769 * memory_object_create(), and it is 4770 * copied by vm_object_enter(). 4771 */ 4772 4773 if (vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE) != object) 4774 panic("vm_object_pager_create: mismatch"); 4775 4776 /* 4777 * Drop the reference we were passed. 4778 */ 4779 memory_object_deallocate(pager); 4780 4781 vm_object_lock(object); 4782 4783 /* 4784 * Release the paging reference 4785 */ 4786 vm_object_paging_end(object); 4787} 4788 4789void 4790vm_object_compressor_pager_create( 4791 register vm_object_t object) 4792{ 4793 memory_object_t pager; 4794 vm_object_hash_entry_t entry; 4795 lck_mtx_t *lck; 4796 vm_object_t pager_object = VM_OBJECT_NULL; 4797 4798 assert(object != kernel_object); 4799 4800 /* 4801 * Prevent collapse or termination by holding a paging reference 4802 */ 4803 4804 vm_object_paging_begin(object); 4805 if (object->pager_created) { 4806 /* 4807 * Someone else got to it first... 4808 * wait for them to finish initializing the ports 4809 */ 4810 while (!object->pager_initialized) { 4811 vm_object_sleep(object, 4812 VM_OBJECT_EVENT_INITIALIZED, 4813 THREAD_UNINT); 4814 } 4815 vm_object_paging_end(object); 4816 return; 4817 } 4818 4819 /* 4820 * Indicate that a memory object has been assigned 4821 * before dropping the lock, to prevent a race. 4822 */ 4823 4824 object->pager_created = TRUE; 4825 object->paging_offset = 0; 4826 4827 vm_object_unlock(object); 4828 4829 if ((uint32_t) (object->vo_size/PAGE_SIZE) != 4830 (object->vo_size/PAGE_SIZE)) { 4831 panic("vm_object_compressor_pager_create(%p): " 4832 "object size 0x%llx >= 0x%llx\n", 4833 object, 4834 (uint64_t) object->vo_size, 4835 0x0FFFFFFFFULL*PAGE_SIZE); 4836 } 4837 4838 /* 4839 * Create the [internal] pager, and associate it with this object. 4840 * 4841 * We make the association here so that vm_object_enter() 4842 * can look up the object to complete initializing it. No 4843 * user will ever map this object. 4844 */ 4845 { 4846 assert(object->temporary); 4847 4848 /* create our new memory object */ 4849 assert((uint32_t) (object->vo_size/PAGE_SIZE) == 4850 (object->vo_size/PAGE_SIZE)); 4851 (void) compressor_memory_object_create( 4852 (memory_object_size_t) object->vo_size, 4853 &pager); 4854 if (pager == NULL) { 4855 panic("vm_object_compressor_pager_create(): " 4856 "no pager for object %p size 0x%llx\n", 4857 object, (uint64_t) object->vo_size); 4858 } 4859 } 4860 4861 entry = vm_object_hash_entry_alloc(pager); 4862 4863 vm_object_lock(object); 4864 lck = vm_object_hash_lock_spin(pager); 4865 vm_object_hash_insert(entry, object); 4866 vm_object_hash_unlock(lck); 4867 vm_object_unlock(object); 4868 4869 /* 4870 * A reference was returned by 4871 * memory_object_create(), and it is 4872 * copied by vm_object_enter(). 4873 */ 4874 4875 pager_object = vm_object_enter(pager, object->vo_size, TRUE, TRUE, FALSE); 4876 4877 if (pager_object != object) { 4878 panic("vm_object_compressor_pager_create: mismatch (pager: %p, pager_object: %p, orig_object: %p, orig_object size: 0x%llx)\n", pager, pager_object, object, (uint64_t) object->vo_size); 4879 } 4880 4881 /* 4882 * Drop the reference we were passed. 4883 */ 4884 memory_object_deallocate(pager); 4885 4886 vm_object_lock(object); 4887 4888 /* 4889 * Release the paging reference 4890 */ 4891 vm_object_paging_end(object); 4892} 4893 4894/* 4895 * Routine: vm_object_remove 4896 * Purpose: 4897 * Eliminate the pager/object association 4898 * for this pager. 4899 * Conditions: 4900 * The object cache must be locked. 4901 */ 4902__private_extern__ void 4903vm_object_remove( 4904 vm_object_t object) 4905{ 4906 memory_object_t pager; 4907 4908 if ((pager = object->pager) != MEMORY_OBJECT_NULL) { 4909 vm_object_hash_entry_t entry; 4910 4911 entry = vm_object_hash_lookup(pager, FALSE); 4912 if (entry != VM_OBJECT_HASH_ENTRY_NULL) 4913 entry->object = VM_OBJECT_NULL; 4914 } 4915 4916} 4917 4918/* 4919 * Global variables for vm_object_collapse(): 4920 * 4921 * Counts for normal collapses and bypasses. 4922 * Debugging variables, to watch or disable collapse. 4923 */ 4924static long object_collapses = 0; 4925static long object_bypasses = 0; 4926 4927static boolean_t vm_object_collapse_allowed = TRUE; 4928static boolean_t vm_object_bypass_allowed = TRUE; 4929 4930#if MACH_PAGEMAP 4931static int vm_external_discarded; 4932static int vm_external_collapsed; 4933#endif 4934 4935unsigned long vm_object_collapse_encrypted = 0; 4936 4937void vm_object_do_collapse_compressor(vm_object_t object, 4938 vm_object_t backing_object); 4939void 4940vm_object_do_collapse_compressor( 4941 vm_object_t object, 4942 vm_object_t backing_object) 4943{ 4944 vm_object_offset_t new_offset, backing_offset; 4945 vm_object_size_t size; 4946 4947 vm_counters.do_collapse_compressor++; 4948 4949 vm_object_lock_assert_exclusive(object); 4950 vm_object_lock_assert_exclusive(backing_object); 4951 4952 size = object->vo_size; 4953 4954 /* 4955 * Move all compressed pages from backing_object 4956 * to the parent. 4957 */ 4958 4959 for (backing_offset = object->vo_shadow_offset; 4960 backing_offset < object->vo_shadow_offset + object->vo_size; 4961 backing_offset += PAGE_SIZE) { 4962 memory_object_offset_t backing_pager_offset; 4963 4964 /* find the next compressed page at or after this offset */ 4965 backing_pager_offset = (backing_offset + 4966 backing_object->paging_offset); 4967 backing_pager_offset = vm_compressor_pager_next_compressed( 4968 backing_object->pager, 4969 backing_pager_offset); 4970 if (backing_pager_offset == (memory_object_offset_t) -1) { 4971 /* no more compressed pages */ 4972 break; 4973 } 4974 backing_offset = (backing_pager_offset - 4975 backing_object->paging_offset); 4976 4977 new_offset = backing_offset - object->vo_shadow_offset; 4978 4979 if (new_offset >= object->vo_size) { 4980 /* we're out of the scope of "object": done */ 4981 break; 4982 } 4983 4984 if ((vm_page_lookup(object, new_offset) != VM_PAGE_NULL) || 4985 (vm_compressor_pager_state_get(object->pager, 4986 (new_offset + 4987 object->paging_offset)) == 4988 VM_EXTERNAL_STATE_EXISTS)) { 4989 /* 4990 * This page already exists in object, resident or 4991 * compressed. 4992 * We don't need this compressed page in backing_object 4993 * and it will be reclaimed when we release 4994 * backing_object. 4995 */ 4996 continue; 4997 } 4998 4999 /* 5000 * backing_object has this page in the VM compressor and 5001 * we need to transfer it to object. 5002 */ 5003 vm_counters.do_collapse_compressor_pages++; 5004 vm_compressor_pager_transfer( 5005 /* destination: */ 5006 object->pager, 5007 (new_offset + object->paging_offset), 5008 /* source: */ 5009 backing_object->pager, 5010 (backing_offset + backing_object->paging_offset)); 5011 } 5012} 5013 5014/* 5015 * Routine: vm_object_do_collapse 5016 * Purpose: 5017 * Collapse an object with the object backing it. 5018 * Pages in the backing object are moved into the 5019 * parent, and the backing object is deallocated. 5020 * Conditions: 5021 * Both objects and the cache are locked; the page 5022 * queues are unlocked. 5023 * 5024 */ 5025static void 5026vm_object_do_collapse( 5027 vm_object_t object, 5028 vm_object_t backing_object) 5029{ 5030 vm_page_t p, pp; 5031 vm_object_offset_t new_offset, backing_offset; 5032 vm_object_size_t size; 5033 5034 vm_object_lock_assert_exclusive(object); 5035 vm_object_lock_assert_exclusive(backing_object); 5036 5037 assert(object->purgable == VM_PURGABLE_DENY); 5038 assert(backing_object->purgable == VM_PURGABLE_DENY); 5039 5040 backing_offset = object->vo_shadow_offset; 5041 size = object->vo_size; 5042 5043 /* 5044 * Move all in-memory pages from backing_object 5045 * to the parent. Pages that have been paged out 5046 * will be overwritten by any of the parent's 5047 * pages that shadow them. 5048 */ 5049 5050 while (!queue_empty(&backing_object->memq)) { 5051 5052 p = (vm_page_t) queue_first(&backing_object->memq); 5053 5054 new_offset = (p->offset - backing_offset); 5055 5056 assert(!p->busy || p->absent); 5057 5058 /* 5059 * If the parent has a page here, or if 5060 * this page falls outside the parent, 5061 * dispose of it. 5062 * 5063 * Otherwise, move it as planned. 5064 */ 5065 5066 if (p->offset < backing_offset || new_offset >= size) { 5067 VM_PAGE_FREE(p); 5068 } else { 5069 /* 5070 * ENCRYPTED SWAP: 5071 * The encryption key includes the "pager" and the 5072 * "paging_offset". These will not change during the 5073 * object collapse, so we can just move an encrypted 5074 * page from one object to the other in this case. 5075 * We can't decrypt the page here, since we can't drop 5076 * the object lock. 5077 */ 5078 if (p->encrypted) { 5079 vm_object_collapse_encrypted++; 5080 } 5081 pp = vm_page_lookup(object, new_offset); 5082 if (pp == VM_PAGE_NULL) { 5083 5084 if (VM_COMPRESSOR_PAGER_STATE_GET(object, 5085 new_offset) 5086 == VM_EXTERNAL_STATE_EXISTS) { 5087 /* 5088 * Parent object has this page 5089 * in the VM compressor. 5090 * Throw away the backing 5091 * object's page. 5092 */ 5093 VM_PAGE_FREE(p); 5094 } else { 5095 /* 5096 * Parent now has no page. 5097 * Move the backing object's page 5098 * up. 5099 */ 5100 vm_page_rename(p, object, new_offset, 5101 TRUE); 5102 } 5103 5104#if MACH_PAGEMAP 5105 } else if (pp->absent) { 5106 5107 /* 5108 * Parent has an absent page... 5109 * it's not being paged in, so 5110 * it must really be missing from 5111 * the parent. 5112 * 5113 * Throw out the absent page... 5114 * any faults looking for that 5115 * page will restart with the new 5116 * one. 5117 */ 5118 5119 VM_PAGE_FREE(pp); 5120 vm_page_rename(p, object, new_offset, TRUE); 5121#endif /* MACH_PAGEMAP */ 5122 } else { 5123 assert(! pp->absent); 5124 5125 /* 5126 * Parent object has a real page. 5127 * Throw away the backing object's 5128 * page. 5129 */ 5130 VM_PAGE_FREE(p); 5131 } 5132 } 5133 } 5134 5135 if (vm_object_collapse_compressor_allowed && 5136 object->pager != MEMORY_OBJECT_NULL && 5137 backing_object->pager != MEMORY_OBJECT_NULL) { 5138 5139 /* move compressed pages from backing_object to object */ 5140 vm_object_do_collapse_compressor(object, backing_object); 5141 5142 } else if (backing_object->pager != MEMORY_OBJECT_NULL) { 5143 vm_object_hash_entry_t entry; 5144 5145#if !MACH_PAGEMAP 5146 assert((!object->pager_created && 5147 (object->pager == MEMORY_OBJECT_NULL)) || 5148 (!backing_object->pager_created && 5149 (backing_object->pager == MEMORY_OBJECT_NULL))); 5150#else 5151 assert(!object->pager_created && 5152 object->pager == MEMORY_OBJECT_NULL); 5153#endif /* !MACH_PAGEMAP */ 5154 5155 /* 5156 * Move the pager from backing_object to object. 5157 * 5158 * XXX We're only using part of the paging space 5159 * for keeps now... we ought to discard the 5160 * unused portion. 5161 */ 5162 5163 assert(!object->paging_in_progress); 5164 assert(!object->activity_in_progress); 5165 assert(!object->pager_created); 5166 assert(object->pager == NULL); 5167 object->pager = backing_object->pager; 5168 5169 if (backing_object->hashed) { 5170 lck_mtx_t *lck; 5171 5172 lck = vm_object_hash_lock_spin(backing_object->pager); 5173 entry = vm_object_hash_lookup(object->pager, FALSE); 5174 assert(entry != VM_OBJECT_HASH_ENTRY_NULL); 5175 entry->object = object; 5176 vm_object_hash_unlock(lck); 5177 5178 object->hashed = TRUE; 5179 } 5180 object->pager_created = backing_object->pager_created; 5181 object->pager_control = backing_object->pager_control; 5182 object->pager_ready = backing_object->pager_ready; 5183 object->pager_initialized = backing_object->pager_initialized; 5184 object->paging_offset = 5185 backing_object->paging_offset + backing_offset; 5186 if (object->pager_control != MEMORY_OBJECT_CONTROL_NULL) { 5187 memory_object_control_collapse(object->pager_control, 5188 object); 5189 } 5190 /* the backing_object has lost its pager: reset all fields */ 5191 backing_object->pager_created = FALSE; 5192 backing_object->pager_control = NULL; 5193 backing_object->pager_ready = FALSE; 5194 backing_object->paging_offset = 0; 5195 backing_object->pager = NULL; 5196 } 5197 5198#if MACH_PAGEMAP 5199 /* 5200 * If the shadow offset is 0, the use the existence map from 5201 * the backing object if there is one. If the shadow offset is 5202 * not zero, toss it. 5203 * 5204 * XXX - If the shadow offset is not 0 then a bit copy is needed 5205 * if the map is to be salvaged. For now, we just just toss the 5206 * old map, giving the collapsed object no map. This means that 5207 * the pager is invoked for zero fill pages. If analysis shows 5208 * that this happens frequently and is a performance hit, then 5209 * this code should be fixed to salvage the map. 5210 */ 5211 assert(object->existence_map == VM_EXTERNAL_NULL); 5212 if (backing_offset || (size != backing_object->vo_size)) { 5213 vm_external_discarded++; 5214 vm_external_destroy(backing_object->existence_map, 5215 backing_object->vo_size); 5216 } 5217 else { 5218 vm_external_collapsed++; 5219 object->existence_map = backing_object->existence_map; 5220 } 5221 backing_object->existence_map = VM_EXTERNAL_NULL; 5222#endif /* MACH_PAGEMAP */ 5223 5224 /* 5225 * Object now shadows whatever backing_object did. 5226 * Note that the reference to backing_object->shadow 5227 * moves from within backing_object to within object. 5228 */ 5229 5230 assert(!object->phys_contiguous); 5231 assert(!backing_object->phys_contiguous); 5232 object->shadow = backing_object->shadow; 5233 if (object->shadow) { 5234 object->vo_shadow_offset += backing_object->vo_shadow_offset; 5235 /* "backing_object" gave its shadow to "object" */ 5236 backing_object->shadow = VM_OBJECT_NULL; 5237 backing_object->vo_shadow_offset = 0; 5238 } else { 5239 /* no shadow, therefore no shadow offset... */ 5240 object->vo_shadow_offset = 0; 5241 } 5242 assert((object->shadow == VM_OBJECT_NULL) || 5243 (object->shadow->copy != backing_object)); 5244 5245 /* 5246 * Discard backing_object. 5247 * 5248 * Since the backing object has no pages, no 5249 * pager left, and no object references within it, 5250 * all that is necessary is to dispose of it. 5251 */ 5252 object_collapses++; 5253 5254 assert(backing_object->ref_count == 1); 5255 assert(backing_object->resident_page_count == 0); 5256 assert(backing_object->paging_in_progress == 0); 5257 assert(backing_object->activity_in_progress == 0); 5258 assert(backing_object->shadow == VM_OBJECT_NULL); 5259 assert(backing_object->vo_shadow_offset == 0); 5260 5261 if (backing_object->pager != MEMORY_OBJECT_NULL) { 5262 /* ... unless it has a pager; need to terminate pager too */ 5263 vm_counters.do_collapse_terminate++; 5264 if (vm_object_terminate(backing_object) != KERN_SUCCESS) { 5265 vm_counters.do_collapse_terminate_failure++; 5266 } 5267 return; 5268 } 5269 5270 assert(backing_object->pager == NULL); 5271 5272 backing_object->alive = FALSE; 5273 vm_object_unlock(backing_object); 5274 5275 XPR(XPR_VM_OBJECT, "vm_object_collapse, collapsed 0x%X\n", 5276 backing_object, 0,0,0,0); 5277 5278#if VM_OBJECT_TRACKING 5279 if (vm_object_tracking_inited) { 5280 btlog_remove_entries_for_element(vm_object_tracking_btlog, 5281 backing_object); 5282 } 5283#endif /* VM_OBJECT_TRACKING */ 5284 5285 vm_object_lock_destroy(backing_object); 5286 5287 zfree(vm_object_zone, backing_object); 5288 5289} 5290 5291static void 5292vm_object_do_bypass( 5293 vm_object_t object, 5294 vm_object_t backing_object) 5295{ 5296 /* 5297 * Make the parent shadow the next object 5298 * in the chain. 5299 */ 5300 5301 vm_object_lock_assert_exclusive(object); 5302 vm_object_lock_assert_exclusive(backing_object); 5303 5304#if TASK_SWAPPER 5305 /* 5306 * Do object reference in-line to 5307 * conditionally increment shadow's 5308 * residence count. If object is not 5309 * resident, leave residence count 5310 * on shadow alone. 5311 */ 5312 if (backing_object->shadow != VM_OBJECT_NULL) { 5313 vm_object_lock(backing_object->shadow); 5314 vm_object_lock_assert_exclusive(backing_object->shadow); 5315 backing_object->shadow->ref_count++; 5316 if (object->res_count != 0) 5317 vm_object_res_reference(backing_object->shadow); 5318 vm_object_unlock(backing_object->shadow); 5319 } 5320#else /* TASK_SWAPPER */ 5321 vm_object_reference(backing_object->shadow); 5322#endif /* TASK_SWAPPER */ 5323 5324 assert(!object->phys_contiguous); 5325 assert(!backing_object->phys_contiguous); 5326 object->shadow = backing_object->shadow; 5327 if (object->shadow) { 5328 object->vo_shadow_offset += backing_object->vo_shadow_offset; 5329 } else { 5330 /* no shadow, therefore no shadow offset... */ 5331 object->vo_shadow_offset = 0; 5332 } 5333 5334 /* 5335 * Backing object might have had a copy pointer 5336 * to us. If it did, clear it. 5337 */ 5338 if (backing_object->copy == object) { 5339 backing_object->copy = VM_OBJECT_NULL; 5340 } 5341 5342 /* 5343 * Drop the reference count on backing_object. 5344#if TASK_SWAPPER 5345 * Since its ref_count was at least 2, it 5346 * will not vanish; so we don't need to call 5347 * vm_object_deallocate. 5348 * [with a caveat for "named" objects] 5349 * 5350 * The res_count on the backing object is 5351 * conditionally decremented. It's possible 5352 * (via vm_pageout_scan) to get here with 5353 * a "swapped" object, which has a 0 res_count, 5354 * in which case, the backing object res_count 5355 * is already down by one. 5356#else 5357 * Don't call vm_object_deallocate unless 5358 * ref_count drops to zero. 5359 * 5360 * The ref_count can drop to zero here if the 5361 * backing object could be bypassed but not 5362 * collapsed, such as when the backing object 5363 * is temporary and cachable. 5364#endif 5365 */ 5366 if (backing_object->ref_count > 2 || 5367 (!backing_object->named && backing_object->ref_count > 1)) { 5368 vm_object_lock_assert_exclusive(backing_object); 5369 backing_object->ref_count--; 5370#if TASK_SWAPPER 5371 if (object->res_count != 0) 5372 vm_object_res_deallocate(backing_object); 5373 assert(backing_object->ref_count > 0); 5374#endif /* TASK_SWAPPER */ 5375 vm_object_unlock(backing_object); 5376 } else { 5377 5378 /* 5379 * Drop locks so that we can deallocate 5380 * the backing object. 5381 */ 5382 5383#if TASK_SWAPPER 5384 if (object->res_count == 0) { 5385 /* XXX get a reference for the deallocate below */ 5386 vm_object_res_reference(backing_object); 5387 } 5388#endif /* TASK_SWAPPER */ 5389 /* 5390 * vm_object_collapse (the caller of this function) is 5391 * now called from contexts that may not guarantee that a 5392 * valid reference is held on the object... w/o a valid 5393 * reference, it is unsafe and unwise (you will definitely 5394 * regret it) to unlock the object and then retake the lock 5395 * since the object may be terminated and recycled in between. 5396 * The "activity_in_progress" reference will keep the object 5397 * 'stable'. 5398 */ 5399 vm_object_activity_begin(object); 5400 vm_object_unlock(object); 5401 5402 vm_object_unlock(backing_object); 5403 vm_object_deallocate(backing_object); 5404 5405 /* 5406 * Relock object. We don't have to reverify 5407 * its state since vm_object_collapse will 5408 * do that for us as it starts at the 5409 * top of its loop. 5410 */ 5411 5412 vm_object_lock(object); 5413 vm_object_activity_end(object); 5414 } 5415 5416 object_bypasses++; 5417} 5418 5419 5420/* 5421 * vm_object_collapse: 5422 * 5423 * Perform an object collapse or an object bypass if appropriate. 5424 * The real work of collapsing and bypassing is performed in 5425 * the routines vm_object_do_collapse and vm_object_do_bypass. 5426 * 5427 * Requires that the object be locked and the page queues be unlocked. 5428 * 5429 */ 5430static unsigned long vm_object_collapse_calls = 0; 5431static unsigned long vm_object_collapse_objects = 0; 5432static unsigned long vm_object_collapse_do_collapse = 0; 5433static unsigned long vm_object_collapse_do_bypass = 0; 5434 5435__private_extern__ void 5436vm_object_collapse( 5437 register vm_object_t object, 5438 register vm_object_offset_t hint_offset, 5439 boolean_t can_bypass) 5440{ 5441 register vm_object_t backing_object; 5442 register unsigned int rcount; 5443 register unsigned int size; 5444 vm_object_t original_object; 5445 int object_lock_type; 5446 int backing_object_lock_type; 5447 5448 vm_object_collapse_calls++; 5449 5450 if (! vm_object_collapse_allowed && 5451 ! (can_bypass && vm_object_bypass_allowed)) { 5452 return; 5453 } 5454 5455 XPR(XPR_VM_OBJECT, "vm_object_collapse, obj 0x%X\n", 5456 object, 0,0,0,0); 5457 5458 if (object == VM_OBJECT_NULL) 5459 return; 5460 5461 original_object = object; 5462 5463 /* 5464 * The top object was locked "exclusive" by the caller. 5465 * In the first pass, to determine if we can collapse the shadow chain, 5466 * take a "shared" lock on the shadow objects. If we can collapse, 5467 * we'll have to go down the chain again with exclusive locks. 5468 */ 5469 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5470 backing_object_lock_type = OBJECT_LOCK_SHARED; 5471 5472retry: 5473 object = original_object; 5474 vm_object_lock_assert_exclusive(object); 5475 5476 while (TRUE) { 5477 vm_object_collapse_objects++; 5478 /* 5479 * Verify that the conditions are right for either 5480 * collapse or bypass: 5481 */ 5482 5483 /* 5484 * There is a backing object, and 5485 */ 5486 5487 backing_object = object->shadow; 5488 if (backing_object == VM_OBJECT_NULL) { 5489 if (object != original_object) { 5490 vm_object_unlock(object); 5491 } 5492 return; 5493 } 5494 if (backing_object_lock_type == OBJECT_LOCK_SHARED) { 5495 vm_object_lock_shared(backing_object); 5496 } else { 5497 vm_object_lock(backing_object); 5498 } 5499 5500 /* 5501 * No pages in the object are currently 5502 * being paged out, and 5503 */ 5504 if (object->paging_in_progress != 0 || 5505 object->activity_in_progress != 0) { 5506 /* try and collapse the rest of the shadow chain */ 5507 if (object != original_object) { 5508 vm_object_unlock(object); 5509 } 5510 object = backing_object; 5511 object_lock_type = backing_object_lock_type; 5512 continue; 5513 } 5514 5515 /* 5516 * ... 5517 * The backing object is not read_only, 5518 * and no pages in the backing object are 5519 * currently being paged out. 5520 * The backing object is internal. 5521 * 5522 */ 5523 5524 if (!backing_object->internal || 5525 backing_object->paging_in_progress != 0 || 5526 backing_object->activity_in_progress != 0) { 5527 /* try and collapse the rest of the shadow chain */ 5528 if (object != original_object) { 5529 vm_object_unlock(object); 5530 } 5531 object = backing_object; 5532 object_lock_type = backing_object_lock_type; 5533 continue; 5534 } 5535 5536 /* 5537 * Purgeable objects are not supposed to engage in 5538 * copy-on-write activities, so should not have 5539 * any shadow objects or be a shadow object to another 5540 * object. 5541 * Collapsing a purgeable object would require some 5542 * updates to the purgeable compressed ledgers. 5543 */ 5544 if (object->purgable != VM_PURGABLE_DENY || 5545 backing_object->purgable != VM_PURGABLE_DENY) { 5546 panic("vm_object_collapse() attempting to collapse " 5547 "purgeable object: %p(%d) %p(%d)\n", 5548 object, object->purgable, 5549 backing_object, backing_object->purgable); 5550 /* try and collapse the rest of the shadow chain */ 5551 if (object != original_object) { 5552 vm_object_unlock(object); 5553 } 5554 object = backing_object; 5555 object_lock_type = backing_object_lock_type; 5556 continue; 5557 } 5558 5559 /* 5560 * The backing object can't be a copy-object: 5561 * the shadow_offset for the copy-object must stay 5562 * as 0. Furthermore (for the 'we have all the 5563 * pages' case), if we bypass backing_object and 5564 * just shadow the next object in the chain, old 5565 * pages from that object would then have to be copied 5566 * BOTH into the (former) backing_object and into the 5567 * parent object. 5568 */ 5569 if (backing_object->shadow != VM_OBJECT_NULL && 5570 backing_object->shadow->copy == backing_object) { 5571 /* try and collapse the rest of the shadow chain */ 5572 if (object != original_object) { 5573 vm_object_unlock(object); 5574 } 5575 object = backing_object; 5576 object_lock_type = backing_object_lock_type; 5577 continue; 5578 } 5579 5580 /* 5581 * We can now try to either collapse the backing 5582 * object (if the parent is the only reference to 5583 * it) or (perhaps) remove the parent's reference 5584 * to it. 5585 * 5586 * If there is exactly one reference to the backing 5587 * object, we may be able to collapse it into the 5588 * parent. 5589 * 5590 * If MACH_PAGEMAP is defined: 5591 * The parent must not have a pager created for it, 5592 * since collapsing a backing_object dumps new pages 5593 * into the parent that its pager doesn't know about 5594 * (and the collapse code can't merge the existence 5595 * maps). 5596 * Otherwise: 5597 * As long as one of the objects is still not known 5598 * to the pager, we can collapse them. 5599 */ 5600 if (backing_object->ref_count == 1 && 5601 (vm_object_collapse_compressor_allowed || 5602 !object->pager_created 5603#if !MACH_PAGEMAP 5604 || (!backing_object->pager_created) 5605#endif /*!MACH_PAGEMAP */ 5606 ) && vm_object_collapse_allowed) { 5607 5608 /* 5609 * We need the exclusive lock on the VM objects. 5610 */ 5611 if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) { 5612 /* 5613 * We have an object and its shadow locked 5614 * "shared". We can't just upgrade the locks 5615 * to "exclusive", as some other thread might 5616 * also have these objects locked "shared" and 5617 * attempt to upgrade one or the other to 5618 * "exclusive". The upgrades would block 5619 * forever waiting for the other "shared" locks 5620 * to get released. 5621 * So we have to release the locks and go 5622 * down the shadow chain again (since it could 5623 * have changed) with "exclusive" locking. 5624 */ 5625 vm_object_unlock(backing_object); 5626 if (object != original_object) 5627 vm_object_unlock(object); 5628 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5629 backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5630 goto retry; 5631 } 5632 5633 XPR(XPR_VM_OBJECT, 5634 "vm_object_collapse: %x to %x, pager %x, pager_control %x\n", 5635 backing_object, object, 5636 backing_object->pager, 5637 backing_object->pager_control, 0); 5638 5639 /* 5640 * Collapse the object with its backing 5641 * object, and try again with the object's 5642 * new backing object. 5643 */ 5644 5645 vm_object_do_collapse(object, backing_object); 5646 vm_object_collapse_do_collapse++; 5647 continue; 5648 } 5649 5650 /* 5651 * Collapsing the backing object was not possible 5652 * or permitted, so let's try bypassing it. 5653 */ 5654 5655 if (! (can_bypass && vm_object_bypass_allowed)) { 5656 /* try and collapse the rest of the shadow chain */ 5657 if (object != original_object) { 5658 vm_object_unlock(object); 5659 } 5660 object = backing_object; 5661 object_lock_type = backing_object_lock_type; 5662 continue; 5663 } 5664 5665 5666 /* 5667 * If the object doesn't have all its pages present, 5668 * we have to make sure no pages in the backing object 5669 * "show through" before bypassing it. 5670 */ 5671 size = (unsigned int)atop(object->vo_size); 5672 rcount = object->resident_page_count; 5673 5674 if (rcount != size) { 5675 vm_object_offset_t offset; 5676 vm_object_offset_t backing_offset; 5677 unsigned int backing_rcount; 5678 5679 /* 5680 * If the backing object has a pager but no pagemap, 5681 * then we cannot bypass it, because we don't know 5682 * what pages it has. 5683 */ 5684 if (backing_object->pager_created 5685#if MACH_PAGEMAP 5686 && (backing_object->existence_map == VM_EXTERNAL_NULL) 5687#endif /* MACH_PAGEMAP */ 5688 ) { 5689 /* try and collapse the rest of the shadow chain */ 5690 if (object != original_object) { 5691 vm_object_unlock(object); 5692 } 5693 object = backing_object; 5694 object_lock_type = backing_object_lock_type; 5695 continue; 5696 } 5697 5698 /* 5699 * If the object has a pager but no pagemap, 5700 * then we cannot bypass it, because we don't know 5701 * what pages it has. 5702 */ 5703 if (object->pager_created 5704#if MACH_PAGEMAP 5705 && (object->existence_map == VM_EXTERNAL_NULL) 5706#endif /* MACH_PAGEMAP */ 5707 ) { 5708 /* try and collapse the rest of the shadow chain */ 5709 if (object != original_object) { 5710 vm_object_unlock(object); 5711 } 5712 object = backing_object; 5713 object_lock_type = backing_object_lock_type; 5714 continue; 5715 } 5716 5717 backing_offset = object->vo_shadow_offset; 5718 backing_rcount = backing_object->resident_page_count; 5719 5720 if ( (int)backing_rcount - (int)(atop(backing_object->vo_size) - size) > (int)rcount) { 5721 /* 5722 * we have enough pages in the backing object to guarantee that 5723 * at least 1 of them must be 'uncovered' by a resident page 5724 * in the object we're evaluating, so move on and 5725 * try to collapse the rest of the shadow chain 5726 */ 5727 if (object != original_object) { 5728 vm_object_unlock(object); 5729 } 5730 object = backing_object; 5731 object_lock_type = backing_object_lock_type; 5732 continue; 5733 } 5734 5735 /* 5736 * If all of the pages in the backing object are 5737 * shadowed by the parent object, the parent 5738 * object no longer has to shadow the backing 5739 * object; it can shadow the next one in the 5740 * chain. 5741 * 5742 * If the backing object has existence info, 5743 * we must check examine its existence info 5744 * as well. 5745 * 5746 */ 5747 5748#if MACH_PAGEMAP 5749#define EXISTS_IN_OBJECT(obj, off, rc) \ 5750 ((vm_external_state_get((obj)->existence_map, \ 5751 (vm_offset_t)(off)) \ 5752 == VM_EXTERNAL_STATE_EXISTS) || \ 5753 (VM_COMPRESSOR_PAGER_STATE_GET((obj), (off)) \ 5754 == VM_EXTERNAL_STATE_EXISTS) || \ 5755 ((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) 5756#else /* MACH_PAGEMAP */ 5757#define EXISTS_IN_OBJECT(obj, off, rc) \ 5758 ((VM_COMPRESSOR_PAGER_STATE_GET((obj), (off)) \ 5759 == VM_EXTERNAL_STATE_EXISTS) || \ 5760 ((rc) && vm_page_lookup((obj), (off)) != VM_PAGE_NULL && (rc)--)) 5761#endif /* MACH_PAGEMAP */ 5762 5763 /* 5764 * Check the hint location first 5765 * (since it is often the quickest way out of here). 5766 */ 5767 if (object->cow_hint != ~(vm_offset_t)0) 5768 hint_offset = (vm_object_offset_t)object->cow_hint; 5769 else 5770 hint_offset = (hint_offset > 8 * PAGE_SIZE_64) ? 5771 (hint_offset - 8 * PAGE_SIZE_64) : 0; 5772 5773 if (EXISTS_IN_OBJECT(backing_object, hint_offset + 5774 backing_offset, backing_rcount) && 5775 !EXISTS_IN_OBJECT(object, hint_offset, rcount)) { 5776 /* dependency right at the hint */ 5777 object->cow_hint = (vm_offset_t) hint_offset; /* atomic */ 5778 /* try and collapse the rest of the shadow chain */ 5779 if (object != original_object) { 5780 vm_object_unlock(object); 5781 } 5782 object = backing_object; 5783 object_lock_type = backing_object_lock_type; 5784 continue; 5785 } 5786 5787 /* 5788 * If the object's window onto the backing_object 5789 * is large compared to the number of resident 5790 * pages in the backing object, it makes sense to 5791 * walk the backing_object's resident pages first. 5792 * 5793 * NOTE: Pages may be in both the existence map and/or 5794 * resident, so if we don't find a dependency while 5795 * walking the backing object's resident page list 5796 * directly, and there is an existence map, we'll have 5797 * to run the offset based 2nd pass. Because we may 5798 * have to run both passes, we need to be careful 5799 * not to decrement 'rcount' in the 1st pass 5800 */ 5801 if (backing_rcount && backing_rcount < (size / 8)) { 5802 unsigned int rc = rcount; 5803 vm_page_t p; 5804 5805 backing_rcount = backing_object->resident_page_count; 5806 p = (vm_page_t)queue_first(&backing_object->memq); 5807 do { 5808 offset = (p->offset - backing_offset); 5809 5810 if (offset < object->vo_size && 5811 offset != hint_offset && 5812 !EXISTS_IN_OBJECT(object, offset, rc)) { 5813 /* found a dependency */ 5814 object->cow_hint = (vm_offset_t) offset; /* atomic */ 5815 5816 break; 5817 } 5818 p = (vm_page_t) queue_next(&p->listq); 5819 5820 } while (--backing_rcount); 5821 if (backing_rcount != 0 ) { 5822 /* try and collapse the rest of the shadow chain */ 5823 if (object != original_object) { 5824 vm_object_unlock(object); 5825 } 5826 object = backing_object; 5827 object_lock_type = backing_object_lock_type; 5828 continue; 5829 } 5830 } 5831 5832 /* 5833 * Walk through the offsets looking for pages in the 5834 * backing object that show through to the object. 5835 */ 5836 if (backing_rcount 5837#if MACH_PAGEMAP 5838 || backing_object->existence_map 5839#endif /* MACH_PAGEMAP */ 5840 ) { 5841 offset = hint_offset; 5842 5843 while((offset = 5844 (offset + PAGE_SIZE_64 < object->vo_size) ? 5845 (offset + PAGE_SIZE_64) : 0) != hint_offset) { 5846 5847 if (EXISTS_IN_OBJECT(backing_object, offset + 5848 backing_offset, backing_rcount) && 5849 !EXISTS_IN_OBJECT(object, offset, rcount)) { 5850 /* found a dependency */ 5851 object->cow_hint = (vm_offset_t) offset; /* atomic */ 5852 break; 5853 } 5854 } 5855 if (offset != hint_offset) { 5856 /* try and collapse the rest of the shadow chain */ 5857 if (object != original_object) { 5858 vm_object_unlock(object); 5859 } 5860 object = backing_object; 5861 object_lock_type = backing_object_lock_type; 5862 continue; 5863 } 5864 } 5865 } 5866 5867 /* 5868 * We need "exclusive" locks on the 2 VM objects. 5869 */ 5870 if (backing_object_lock_type != OBJECT_LOCK_EXCLUSIVE) { 5871 vm_object_unlock(backing_object); 5872 if (object != original_object) 5873 vm_object_unlock(object); 5874 object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5875 backing_object_lock_type = OBJECT_LOCK_EXCLUSIVE; 5876 goto retry; 5877 } 5878 5879 /* reset the offset hint for any objects deeper in the chain */ 5880 object->cow_hint = (vm_offset_t)0; 5881 5882 /* 5883 * All interesting pages in the backing object 5884 * already live in the parent or its pager. 5885 * Thus we can bypass the backing object. 5886 */ 5887 5888 vm_object_do_bypass(object, backing_object); 5889 vm_object_collapse_do_bypass++; 5890 5891 /* 5892 * Try again with this object's new backing object. 5893 */ 5894 5895 continue; 5896 } 5897 5898 /* NOT REACHED */ 5899 /* 5900 if (object != original_object) { 5901 vm_object_unlock(object); 5902 } 5903 */ 5904} 5905 5906/* 5907 * Routine: vm_object_page_remove: [internal] 5908 * Purpose: 5909 * Removes all physical pages in the specified 5910 * object range from the object's list of pages. 5911 * 5912 * In/out conditions: 5913 * The object must be locked. 5914 * The object must not have paging_in_progress, usually 5915 * guaranteed by not having a pager. 5916 */ 5917unsigned int vm_object_page_remove_lookup = 0; 5918unsigned int vm_object_page_remove_iterate = 0; 5919 5920__private_extern__ void 5921vm_object_page_remove( 5922 register vm_object_t object, 5923 register vm_object_offset_t start, 5924 register vm_object_offset_t end) 5925{ 5926 register vm_page_t p, next; 5927 5928 /* 5929 * One and two page removals are most popular. 5930 * The factor of 16 here is somewhat arbitrary. 5931 * It balances vm_object_lookup vs iteration. 5932 */ 5933 5934 if (atop_64(end - start) < (unsigned)object->resident_page_count/16) { 5935 vm_object_page_remove_lookup++; 5936 5937 for (; start < end; start += PAGE_SIZE_64) { 5938 p = vm_page_lookup(object, start); 5939 if (p != VM_PAGE_NULL) { 5940 assert(!p->cleaning && !p->pageout && !p->laundry); 5941 if (!p->fictitious && p->pmapped) 5942 pmap_disconnect(p->phys_page); 5943 VM_PAGE_FREE(p); 5944 } 5945 } 5946 } else { 5947 vm_object_page_remove_iterate++; 5948 5949 p = (vm_page_t) queue_first(&object->memq); 5950 while (!queue_end(&object->memq, (queue_entry_t) p)) { 5951 next = (vm_page_t) queue_next(&p->listq); 5952 if ((start <= p->offset) && (p->offset < end)) { 5953 assert(!p->cleaning && !p->pageout && !p->laundry); 5954 if (!p->fictitious && p->pmapped) 5955 pmap_disconnect(p->phys_page); 5956 VM_PAGE_FREE(p); 5957 } 5958 p = next; 5959 } 5960 } 5961} 5962 5963 5964/* 5965 * Routine: vm_object_coalesce 5966 * Function: Coalesces two objects backing up adjoining 5967 * regions of memory into a single object. 5968 * 5969 * returns TRUE if objects were combined. 5970 * 5971 * NOTE: Only works at the moment if the second object is NULL - 5972 * if it's not, which object do we lock first? 5973 * 5974 * Parameters: 5975 * prev_object First object to coalesce 5976 * prev_offset Offset into prev_object 5977 * next_object Second object into coalesce 5978 * next_offset Offset into next_object 5979 * 5980 * prev_size Size of reference to prev_object 5981 * next_size Size of reference to next_object 5982 * 5983 * Conditions: 5984 * The object(s) must *not* be locked. The map must be locked 5985 * to preserve the reference to the object(s). 5986 */ 5987static int vm_object_coalesce_count = 0; 5988 5989__private_extern__ boolean_t 5990vm_object_coalesce( 5991 register vm_object_t prev_object, 5992 vm_object_t next_object, 5993 vm_object_offset_t prev_offset, 5994 __unused vm_object_offset_t next_offset, 5995 vm_object_size_t prev_size, 5996 vm_object_size_t next_size) 5997{ 5998 vm_object_size_t newsize; 5999 6000#ifdef lint 6001 next_offset++; 6002#endif /* lint */ 6003 6004 if (next_object != VM_OBJECT_NULL) { 6005 return(FALSE); 6006 } 6007 6008 if (prev_object == VM_OBJECT_NULL) { 6009 return(TRUE); 6010 } 6011 6012 XPR(XPR_VM_OBJECT, 6013 "vm_object_coalesce: 0x%X prev_off 0x%X prev_size 0x%X next_size 0x%X\n", 6014 prev_object, prev_offset, prev_size, next_size, 0); 6015 6016 vm_object_lock(prev_object); 6017 6018 /* 6019 * Try to collapse the object first 6020 */ 6021 vm_object_collapse(prev_object, prev_offset, TRUE); 6022 6023 /* 6024 * Can't coalesce if pages not mapped to 6025 * prev_entry may be in use any way: 6026 * . more than one reference 6027 * . paged out 6028 * . shadows another object 6029 * . has a copy elsewhere 6030 * . is purgeable 6031 * . paging references (pages might be in page-list) 6032 */ 6033 6034 if ((prev_object->ref_count > 1) || 6035 prev_object->pager_created || 6036 (prev_object->shadow != VM_OBJECT_NULL) || 6037 (prev_object->copy != VM_OBJECT_NULL) || 6038 (prev_object->true_share != FALSE) || 6039 (prev_object->purgable != VM_PURGABLE_DENY) || 6040 (prev_object->paging_in_progress != 0) || 6041 (prev_object->activity_in_progress != 0)) { 6042 vm_object_unlock(prev_object); 6043 return(FALSE); 6044 } 6045 6046 vm_object_coalesce_count++; 6047 6048 /* 6049 * Remove any pages that may still be in the object from 6050 * a previous deallocation. 6051 */ 6052 vm_object_page_remove(prev_object, 6053 prev_offset + prev_size, 6054 prev_offset + prev_size + next_size); 6055 6056 /* 6057 * Extend the object if necessary. 6058 */ 6059 newsize = prev_offset + prev_size + next_size; 6060 if (newsize > prev_object->vo_size) { 6061#if MACH_PAGEMAP 6062 /* 6063 * We cannot extend an object that has existence info, 6064 * since the existence info might then fail to cover 6065 * the entire object. 6066 * 6067 * This assertion must be true because the object 6068 * has no pager, and we only create existence info 6069 * for objects with pagers. 6070 */ 6071 assert(prev_object->existence_map == VM_EXTERNAL_NULL); 6072#endif /* MACH_PAGEMAP */ 6073 prev_object->vo_size = newsize; 6074 } 6075 6076 vm_object_unlock(prev_object); 6077 return(TRUE); 6078} 6079 6080/* 6081 * Attach a set of physical pages to an object, so that they can 6082 * be mapped by mapping the object. Typically used to map IO memory. 6083 * 6084 * The mapping function and its private data are used to obtain the 6085 * physical addresses for each page to be mapped. 6086 */ 6087void 6088vm_object_page_map( 6089 vm_object_t object, 6090 vm_object_offset_t offset, 6091 vm_object_size_t size, 6092 vm_object_offset_t (*map_fn)(void *map_fn_data, 6093 vm_object_offset_t offset), 6094 void *map_fn_data) /* private to map_fn */ 6095{ 6096 int64_t num_pages; 6097 int i; 6098 vm_page_t m; 6099 vm_page_t old_page; 6100 vm_object_offset_t addr; 6101 6102 num_pages = atop_64(size); 6103 6104 for (i = 0; i < num_pages; i++, offset += PAGE_SIZE_64) { 6105 6106 addr = (*map_fn)(map_fn_data, offset); 6107 6108 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) 6109 vm_page_more_fictitious(); 6110 6111 vm_object_lock(object); 6112 if ((old_page = vm_page_lookup(object, offset)) 6113 != VM_PAGE_NULL) 6114 { 6115 VM_PAGE_FREE(old_page); 6116 } 6117 6118 assert((ppnum_t) addr == addr); 6119 vm_page_init(m, (ppnum_t) addr, FALSE); 6120 /* 6121 * private normally requires lock_queues but since we 6122 * are initializing the page, its not necessary here 6123 */ 6124 m->private = TRUE; /* don`t free page */ 6125 m->wire_count = 1; 6126 vm_page_insert(m, object, offset); 6127 6128 PAGE_WAKEUP_DONE(m); 6129 vm_object_unlock(object); 6130 } 6131} 6132 6133kern_return_t 6134vm_object_populate_with_private( 6135 vm_object_t object, 6136 vm_object_offset_t offset, 6137 ppnum_t phys_page, 6138 vm_size_t size) 6139{ 6140 ppnum_t base_page; 6141 vm_object_offset_t base_offset; 6142 6143 6144 if (!object->private) 6145 return KERN_FAILURE; 6146 6147 base_page = phys_page; 6148 6149 vm_object_lock(object); 6150 6151 if (!object->phys_contiguous) { 6152 vm_page_t m; 6153 6154 if ((base_offset = trunc_page_64(offset)) != offset) { 6155 vm_object_unlock(object); 6156 return KERN_FAILURE; 6157 } 6158 base_offset += object->paging_offset; 6159 6160 while (size) { 6161 m = vm_page_lookup(object, base_offset); 6162 6163 if (m != VM_PAGE_NULL) { 6164 if (m->fictitious) { 6165 if (m->phys_page != vm_page_guard_addr) { 6166 6167 vm_page_lockspin_queues(); 6168 m->private = TRUE; 6169 vm_page_unlock_queues(); 6170 6171 m->fictitious = FALSE; 6172 m->phys_page = base_page; 6173 } 6174 } else if (m->phys_page != base_page) { 6175 6176 if ( !m->private) { 6177 /* 6178 * we'd leak a real page... that can't be right 6179 */ 6180 panic("vm_object_populate_with_private - %p not private", m); 6181 } 6182 if (m->pmapped) { 6183 /* 6184 * pmap call to clear old mapping 6185 */ 6186 pmap_disconnect(m->phys_page); 6187 } 6188 m->phys_page = base_page; 6189 } 6190 if (m->encrypted) { 6191 /* 6192 * we should never see this on a ficticious or private page 6193 */ 6194 panic("vm_object_populate_with_private - %p encrypted", m); 6195 } 6196 6197 } else { 6198 while ((m = vm_page_grab_fictitious()) == VM_PAGE_NULL) 6199 vm_page_more_fictitious(); 6200 6201 /* 6202 * private normally requires lock_queues but since we 6203 * are initializing the page, its not necessary here 6204 */ 6205 m->private = TRUE; 6206 m->fictitious = FALSE; 6207 m->phys_page = base_page; 6208 m->unusual = TRUE; 6209 m->busy = FALSE; 6210 6211 vm_page_insert(m, object, base_offset); 6212 } 6213 base_page++; /* Go to the next physical page */ 6214 base_offset += PAGE_SIZE; 6215 size -= PAGE_SIZE; 6216 } 6217 } else { 6218 /* NOTE: we should check the original settings here */ 6219 /* if we have a size > zero a pmap call should be made */ 6220 /* to disable the range */ 6221 6222 /* pmap_? */ 6223 6224 /* shadows on contiguous memory are not allowed */ 6225 /* we therefore can use the offset field */ 6226 object->vo_shadow_offset = (vm_object_offset_t)phys_page << PAGE_SHIFT; 6227 object->vo_size = size; 6228 } 6229 vm_object_unlock(object); 6230 6231 return KERN_SUCCESS; 6232} 6233 6234/* 6235 * memory_object_free_from_cache: 6236 * 6237 * Walk the vm_object cache list, removing and freeing vm_objects 6238 * which are backed by the pager identified by the caller, (pager_ops). 6239 * Remove up to "count" objects, if there are that may available 6240 * in the cache. 6241 * 6242 * Walk the list at most once, return the number of vm_objects 6243 * actually freed. 6244 */ 6245 6246__private_extern__ kern_return_t 6247memory_object_free_from_cache( 6248 __unused host_t host, 6249 __unused memory_object_pager_ops_t pager_ops, 6250 int *count) 6251{ 6252#if VM_OBJECT_CACHE 6253 int object_released = 0; 6254 6255 register vm_object_t object = VM_OBJECT_NULL; 6256 vm_object_t shadow; 6257 6258/* 6259 if(host == HOST_NULL) 6260 return(KERN_INVALID_ARGUMENT); 6261*/ 6262 6263 try_again: 6264 vm_object_cache_lock(); 6265 6266 queue_iterate(&vm_object_cached_list, object, 6267 vm_object_t, cached_list) { 6268 if (object->pager && 6269 (pager_ops == object->pager->mo_pager_ops)) { 6270 vm_object_lock(object); 6271 queue_remove(&vm_object_cached_list, object, 6272 vm_object_t, cached_list); 6273 vm_object_cached_count--; 6274 6275 vm_object_cache_unlock(); 6276 /* 6277 * Since this object is in the cache, we know 6278 * that it is initialized and has only a pager's 6279 * (implicit) reference. Take a reference to avoid 6280 * recursive deallocations. 6281 */ 6282 6283 assert(object->pager_initialized); 6284 assert(object->ref_count == 0); 6285 vm_object_lock_assert_exclusive(object); 6286 object->ref_count++; 6287 6288 /* 6289 * Terminate the object. 6290 * If the object had a shadow, we let 6291 * vm_object_deallocate deallocate it. 6292 * "pageout" objects have a shadow, but 6293 * maintain a "paging reference" rather 6294 * than a normal reference. 6295 * (We are careful here to limit recursion.) 6296 */ 6297 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 6298 6299 if ((vm_object_terminate(object) == KERN_SUCCESS) 6300 && (shadow != VM_OBJECT_NULL)) { 6301 vm_object_deallocate(shadow); 6302 } 6303 6304 if(object_released++ == *count) 6305 return KERN_SUCCESS; 6306 goto try_again; 6307 } 6308 } 6309 vm_object_cache_unlock(); 6310 *count = object_released; 6311#else 6312 *count = 0; 6313#endif 6314 return KERN_SUCCESS; 6315} 6316 6317 6318 6319kern_return_t 6320memory_object_create_named( 6321 memory_object_t pager, 6322 memory_object_offset_t size, 6323 memory_object_control_t *control) 6324{ 6325 vm_object_t object; 6326 vm_object_hash_entry_t entry; 6327 lck_mtx_t *lck; 6328 6329 *control = MEMORY_OBJECT_CONTROL_NULL; 6330 if (pager == MEMORY_OBJECT_NULL) 6331 return KERN_INVALID_ARGUMENT; 6332 6333 lck = vm_object_hash_lock_spin(pager); 6334 entry = vm_object_hash_lookup(pager, FALSE); 6335 6336 if ((entry != VM_OBJECT_HASH_ENTRY_NULL) && 6337 (entry->object != VM_OBJECT_NULL)) { 6338 if (entry->object->named == TRUE) 6339 panic("memory_object_create_named: caller already holds the right"); } 6340 vm_object_hash_unlock(lck); 6341 6342 if ((object = vm_object_enter(pager, size, FALSE, FALSE, TRUE)) == VM_OBJECT_NULL) { 6343 return(KERN_INVALID_OBJECT); 6344 } 6345 6346 /* wait for object (if any) to be ready */ 6347 if (object != VM_OBJECT_NULL) { 6348 vm_object_lock(object); 6349 object->named = TRUE; 6350 while (!object->pager_ready) { 6351 vm_object_sleep(object, 6352 VM_OBJECT_EVENT_PAGER_READY, 6353 THREAD_UNINT); 6354 } 6355 *control = object->pager_control; 6356 vm_object_unlock(object); 6357 } 6358 return (KERN_SUCCESS); 6359} 6360 6361 6362/* 6363 * Routine: memory_object_recover_named [user interface] 6364 * Purpose: 6365 * Attempt to recover a named reference for a VM object. 6366 * VM will verify that the object has not already started 6367 * down the termination path, and if it has, will optionally 6368 * wait for that to finish. 6369 * Returns: 6370 * KERN_SUCCESS - we recovered a named reference on the object 6371 * KERN_FAILURE - we could not recover a reference (object dead) 6372 * KERN_INVALID_ARGUMENT - bad memory object control 6373 */ 6374kern_return_t 6375memory_object_recover_named( 6376 memory_object_control_t control, 6377 boolean_t wait_on_terminating) 6378{ 6379 vm_object_t object; 6380 6381 object = memory_object_control_to_vm_object(control); 6382 if (object == VM_OBJECT_NULL) { 6383 return (KERN_INVALID_ARGUMENT); 6384 } 6385restart: 6386 vm_object_lock(object); 6387 6388 if (object->terminating && wait_on_terminating) { 6389 vm_object_wait(object, 6390 VM_OBJECT_EVENT_PAGING_IN_PROGRESS, 6391 THREAD_UNINT); 6392 goto restart; 6393 } 6394 6395 if (!object->alive) { 6396 vm_object_unlock(object); 6397 return KERN_FAILURE; 6398 } 6399 6400 if (object->named == TRUE) { 6401 vm_object_unlock(object); 6402 return KERN_SUCCESS; 6403 } 6404#if VM_OBJECT_CACHE 6405 if ((object->ref_count == 0) && (!object->terminating)) { 6406 if (!vm_object_cache_lock_try()) { 6407 vm_object_unlock(object); 6408 goto restart; 6409 } 6410 queue_remove(&vm_object_cached_list, object, 6411 vm_object_t, cached_list); 6412 vm_object_cached_count--; 6413 XPR(XPR_VM_OBJECT_CACHE, 6414 "memory_object_recover_named: removing %X, head (%X, %X)\n", 6415 object, 6416 vm_object_cached_list.next, 6417 vm_object_cached_list.prev, 0,0); 6418 6419 vm_object_cache_unlock(); 6420 } 6421#endif 6422 object->named = TRUE; 6423 vm_object_lock_assert_exclusive(object); 6424 object->ref_count++; 6425 vm_object_res_reference(object); 6426 while (!object->pager_ready) { 6427 vm_object_sleep(object, 6428 VM_OBJECT_EVENT_PAGER_READY, 6429 THREAD_UNINT); 6430 } 6431 vm_object_unlock(object); 6432 return (KERN_SUCCESS); 6433} 6434 6435 6436/* 6437 * vm_object_release_name: 6438 * 6439 * Enforces name semantic on memory_object reference count decrement 6440 * This routine should not be called unless the caller holds a name 6441 * reference gained through the memory_object_create_named. 6442 * 6443 * If the TERMINATE_IDLE flag is set, the call will return if the 6444 * reference count is not 1. i.e. idle with the only remaining reference 6445 * being the name. 6446 * If the decision is made to proceed the name field flag is set to 6447 * false and the reference count is decremented. If the RESPECT_CACHE 6448 * flag is set and the reference count has gone to zero, the 6449 * memory_object is checked to see if it is cacheable otherwise when 6450 * the reference count is zero, it is simply terminated. 6451 */ 6452 6453__private_extern__ kern_return_t 6454vm_object_release_name( 6455 vm_object_t object, 6456 int flags) 6457{ 6458 vm_object_t shadow; 6459 boolean_t original_object = TRUE; 6460 6461 while (object != VM_OBJECT_NULL) { 6462 6463 vm_object_lock(object); 6464 6465 assert(object->alive); 6466 if (original_object) 6467 assert(object->named); 6468 assert(object->ref_count > 0); 6469 6470 /* 6471 * We have to wait for initialization before 6472 * destroying or caching the object. 6473 */ 6474 6475 if (object->pager_created && !object->pager_initialized) { 6476 assert(!object->can_persist); 6477 vm_object_assert_wait(object, 6478 VM_OBJECT_EVENT_INITIALIZED, 6479 THREAD_UNINT); 6480 vm_object_unlock(object); 6481 thread_block(THREAD_CONTINUE_NULL); 6482 continue; 6483 } 6484 6485 if (((object->ref_count > 1) 6486 && (flags & MEMORY_OBJECT_TERMINATE_IDLE)) 6487 || (object->terminating)) { 6488 vm_object_unlock(object); 6489 return KERN_FAILURE; 6490 } else { 6491 if (flags & MEMORY_OBJECT_RELEASE_NO_OP) { 6492 vm_object_unlock(object); 6493 return KERN_SUCCESS; 6494 } 6495 } 6496 6497 if ((flags & MEMORY_OBJECT_RESPECT_CACHE) && 6498 (object->ref_count == 1)) { 6499 if (original_object) 6500 object->named = FALSE; 6501 vm_object_unlock(object); 6502 /* let vm_object_deallocate push this thing into */ 6503 /* the cache, if that it is where it is bound */ 6504 vm_object_deallocate(object); 6505 return KERN_SUCCESS; 6506 } 6507 VM_OBJ_RES_DECR(object); 6508 shadow = object->pageout?VM_OBJECT_NULL:object->shadow; 6509 6510 if (object->ref_count == 1) { 6511 if (vm_object_terminate(object) != KERN_SUCCESS) { 6512 if (original_object) { 6513 return KERN_FAILURE; 6514 } else { 6515 return KERN_SUCCESS; 6516 } 6517 } 6518 if (shadow != VM_OBJECT_NULL) { 6519 original_object = FALSE; 6520 object = shadow; 6521 continue; 6522 } 6523 return KERN_SUCCESS; 6524 } else { 6525 vm_object_lock_assert_exclusive(object); 6526 object->ref_count--; 6527 assert(object->ref_count > 0); 6528 if(original_object) 6529 object->named = FALSE; 6530 vm_object_unlock(object); 6531 return KERN_SUCCESS; 6532 } 6533 } 6534 /*NOTREACHED*/ 6535 assert(0); 6536 return KERN_FAILURE; 6537} 6538 6539 6540__private_extern__ kern_return_t 6541vm_object_lock_request( 6542 vm_object_t object, 6543 vm_object_offset_t offset, 6544 vm_object_size_t size, 6545 memory_object_return_t should_return, 6546 int flags, 6547 vm_prot_t prot) 6548{ 6549 __unused boolean_t should_flush; 6550 6551 should_flush = flags & MEMORY_OBJECT_DATA_FLUSH; 6552 6553 XPR(XPR_MEMORY_OBJECT, 6554 "vm_o_lock_request, obj 0x%X off 0x%X size 0x%X flags %X prot %X\n", 6555 object, offset, size, 6556 (((should_return&1)<<1)|should_flush), prot); 6557 6558 /* 6559 * Check for bogus arguments. 6560 */ 6561 if (object == VM_OBJECT_NULL) 6562 return (KERN_INVALID_ARGUMENT); 6563 6564 if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE) 6565 return (KERN_INVALID_ARGUMENT); 6566 6567 size = round_page_64(size); 6568 6569 /* 6570 * Lock the object, and acquire a paging reference to 6571 * prevent the memory_object reference from being released. 6572 */ 6573 vm_object_lock(object); 6574 vm_object_paging_begin(object); 6575 6576 (void)vm_object_update(object, 6577 offset, size, NULL, NULL, should_return, flags, prot); 6578 6579 vm_object_paging_end(object); 6580 vm_object_unlock(object); 6581 6582 return (KERN_SUCCESS); 6583} 6584 6585/* 6586 * Empty a purgeable object by grabbing the physical pages assigned to it and 6587 * putting them on the free queue without writing them to backing store, etc. 6588 * When the pages are next touched they will be demand zero-fill pages. We 6589 * skip pages which are busy, being paged in/out, wired, etc. We do _not_ 6590 * skip referenced/dirty pages, pages on the active queue, etc. We're more 6591 * than happy to grab these since this is a purgeable object. We mark the 6592 * object as "empty" after reaping its pages. 6593 * 6594 * On entry the object must be locked and it must be 6595 * purgeable with no delayed copies pending. 6596 */ 6597void 6598vm_object_purge(vm_object_t object, int flags) 6599{ 6600 vm_object_lock_assert_exclusive(object); 6601 6602 if (object->purgable == VM_PURGABLE_DENY) 6603 return; 6604 6605 assert(object->copy == VM_OBJECT_NULL); 6606 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); 6607 6608 /* 6609 * We need to set the object's state to VM_PURGABLE_EMPTY *before* 6610 * reaping its pages. We update vm_page_purgeable_count in bulk 6611 * and we don't want vm_page_remove() to update it again for each 6612 * page we reap later. 6613 * 6614 * For the purgeable ledgers, pages from VOLATILE and EMPTY objects 6615 * are all accounted for in the "volatile" ledgers, so this does not 6616 * make any difference. 6617 * If we transitioned directly from NONVOLATILE to EMPTY, 6618 * vm_page_purgeable_count must have been updated when the object 6619 * was dequeued from its volatile queue and the purgeable ledgers 6620 * must have also been updated accordingly at that time (in 6621 * vm_object_purgable_control()). 6622 */ 6623 if (object->purgable == VM_PURGABLE_VOLATILE) { 6624 unsigned int delta; 6625 assert(object->resident_page_count >= 6626 object->wired_page_count); 6627 delta = (object->resident_page_count - 6628 object->wired_page_count); 6629 if (delta != 0) { 6630 assert(vm_page_purgeable_count >= 6631 delta); 6632 OSAddAtomic(-delta, 6633 (SInt32 *)&vm_page_purgeable_count); 6634 } 6635 if (object->wired_page_count != 0) { 6636 assert(vm_page_purgeable_wired_count >= 6637 object->wired_page_count); 6638 OSAddAtomic(-object->wired_page_count, 6639 (SInt32 *)&vm_page_purgeable_wired_count); 6640 } 6641 object->purgable = VM_PURGABLE_EMPTY; 6642 } 6643 assert(object->purgable == VM_PURGABLE_EMPTY); 6644 6645 vm_object_reap_pages(object, REAP_PURGEABLE); 6646 6647 if (object->pager != NULL && 6648 COMPRESSED_PAGER_IS_ACTIVE) { 6649 unsigned int pgcount; 6650 6651 if (object->activity_in_progress == 0 && 6652 object->paging_in_progress == 0) { 6653 /* 6654 * Also reap any memory coming from this object 6655 * in the VM compressor. 6656 * 6657 * There are no operations in progress on the VM object 6658 * and no operation can start while we're holding the 6659 * VM object lock, so it's safe to reap the compressed 6660 * pages and update the page counts. 6661 */ 6662 pgcount = vm_compressor_pager_get_count(object->pager); 6663 if (pgcount) { 6664 pgcount = vm_compressor_pager_reap_pages(object->pager, flags); 6665 vm_compressor_pager_count(object->pager, 6666 -pgcount, 6667 FALSE, /* shared */ 6668 object); 6669 vm_purgeable_compressed_update(object, 6670 -pgcount); 6671 } 6672 if ( !(flags & C_DONT_BLOCK)) { 6673 assert(vm_compressor_pager_get_count(object->pager) 6674 == 0); 6675 } 6676 } else { 6677 /* 6678 * There's some kind of paging activity in progress 6679 * for this object, which could result in a page 6680 * being compressed or decompressed, possibly while 6681 * the VM object is not locked, so it could race 6682 * with us. 6683 * 6684 * We can't really synchronize this without possibly 6685 * causing a deadlock when the compressor needs to 6686 * allocate or free memory while compressing or 6687 * decompressing a page from a purgeable object 6688 * mapped in the kernel_map... 6689 * 6690 * So let's not attempt to purge the compressor 6691 * pager if there's any kind of operation in 6692 * progress on the VM object. 6693 */ 6694 } 6695 } 6696 6697 vm_object_lock_assert_exclusive(object); 6698} 6699 6700 6701/* 6702 * vm_object_purgeable_control() allows the caller to control and investigate the 6703 * state of a purgeable object. A purgeable object is created via a call to 6704 * vm_allocate() with VM_FLAGS_PURGABLE specified. A purgeable object will 6705 * never be coalesced with any other object -- even other purgeable objects -- 6706 * and will thus always remain a distinct object. A purgeable object has 6707 * special semantics when its reference count is exactly 1. If its reference 6708 * count is greater than 1, then a purgeable object will behave like a normal 6709 * object and attempts to use this interface will result in an error return 6710 * of KERN_INVALID_ARGUMENT. 6711 * 6712 * A purgeable object may be put into a "volatile" state which will make the 6713 * object's pages elligable for being reclaimed without paging to backing 6714 * store if the system runs low on memory. If the pages in a volatile 6715 * purgeable object are reclaimed, the purgeable object is said to have been 6716 * "emptied." When a purgeable object is emptied the system will reclaim as 6717 * many pages from the object as it can in a convenient manner (pages already 6718 * en route to backing store or busy for other reasons are left as is). When 6719 * a purgeable object is made volatile, its pages will generally be reclaimed 6720 * before other pages in the application's working set. This semantic is 6721 * generally used by applications which can recreate the data in the object 6722 * faster than it can be paged in. One such example might be media assets 6723 * which can be reread from a much faster RAID volume. 6724 * 6725 * A purgeable object may be designated as "non-volatile" which means it will 6726 * behave like all other objects in the system with pages being written to and 6727 * read from backing store as needed to satisfy system memory needs. If the 6728 * object was emptied before the object was made non-volatile, that fact will 6729 * be returned as the old state of the purgeable object (see 6730 * VM_PURGABLE_SET_STATE below). In this case, any pages of the object which 6731 * were reclaimed as part of emptying the object will be refaulted in as 6732 * zero-fill on demand. It is up to the application to note that an object 6733 * was emptied and recreate the objects contents if necessary. When a 6734 * purgeable object is made non-volatile, its pages will generally not be paged 6735 * out to backing store in the immediate future. A purgeable object may also 6736 * be manually emptied. 6737 * 6738 * Finally, the current state (non-volatile, volatile, volatile & empty) of a 6739 * volatile purgeable object may be queried at any time. This information may 6740 * be used as a control input to let the application know when the system is 6741 * experiencing memory pressure and is reclaiming memory. 6742 * 6743 * The specified address may be any address within the purgeable object. If 6744 * the specified address does not represent any object in the target task's 6745 * virtual address space, then KERN_INVALID_ADDRESS will be returned. If the 6746 * object containing the specified address is not a purgeable object, then 6747 * KERN_INVALID_ARGUMENT will be returned. Otherwise, KERN_SUCCESS will be 6748 * returned. 6749 * 6750 * The control parameter may be any one of VM_PURGABLE_SET_STATE or 6751 * VM_PURGABLE_GET_STATE. For VM_PURGABLE_SET_STATE, the in/out parameter 6752 * state is used to set the new state of the purgeable object and return its 6753 * old state. For VM_PURGABLE_GET_STATE, the current state of the purgeable 6754 * object is returned in the parameter state. 6755 * 6756 * The in/out parameter state may be one of VM_PURGABLE_NONVOLATILE, 6757 * VM_PURGABLE_VOLATILE or VM_PURGABLE_EMPTY. These, respectively, represent 6758 * the non-volatile, volatile and volatile/empty states described above. 6759 * Setting the state of a purgeable object to VM_PURGABLE_EMPTY will 6760 * immediately reclaim as many pages in the object as can be conveniently 6761 * collected (some may have already been written to backing store or be 6762 * otherwise busy). 6763 * 6764 * The process of making a purgeable object non-volatile and determining its 6765 * previous state is atomic. Thus, if a purgeable object is made 6766 * VM_PURGABLE_NONVOLATILE and the old state is returned as 6767 * VM_PURGABLE_VOLATILE, then the purgeable object's previous contents are 6768 * completely intact and will remain so until the object is made volatile 6769 * again. If the old state is returned as VM_PURGABLE_EMPTY then the object 6770 * was reclaimed while it was in a volatile state and its previous contents 6771 * have been lost. 6772 */ 6773/* 6774 * The object must be locked. 6775 */ 6776kern_return_t 6777vm_object_purgable_control( 6778 vm_object_t object, 6779 vm_purgable_t control, 6780 int *state) 6781{ 6782 int old_state; 6783 int new_state; 6784 6785 if (object == VM_OBJECT_NULL) { 6786 /* 6787 * Object must already be present or it can't be purgeable. 6788 */ 6789 return KERN_INVALID_ARGUMENT; 6790 } 6791 6792 vm_object_lock_assert_exclusive(object); 6793 6794 /* 6795 * Get current state of the purgeable object. 6796 */ 6797 old_state = object->purgable; 6798 if (old_state == VM_PURGABLE_DENY) 6799 return KERN_INVALID_ARGUMENT; 6800 6801 /* purgeable cant have delayed copies - now or in the future */ 6802 assert(object->copy == VM_OBJECT_NULL); 6803 assert(object->copy_strategy == MEMORY_OBJECT_COPY_NONE); 6804 6805 /* 6806 * Execute the desired operation. 6807 */ 6808 if (control == VM_PURGABLE_GET_STATE) { 6809 *state = old_state; 6810 return KERN_SUCCESS; 6811 } 6812 6813 if ((*state) & VM_PURGABLE_DEBUG_EMPTY) { 6814 object->volatile_empty = TRUE; 6815 } 6816 if ((*state) & VM_PURGABLE_DEBUG_FAULT) { 6817 object->volatile_fault = TRUE; 6818 } 6819 6820 new_state = *state & VM_PURGABLE_STATE_MASK; 6821 if (new_state == VM_PURGABLE_VOLATILE && 6822 object->volatile_empty) { 6823 new_state = VM_PURGABLE_EMPTY; 6824 } 6825 6826 switch (new_state) { 6827 case VM_PURGABLE_DENY: 6828 case VM_PURGABLE_NONVOLATILE: 6829 object->purgable = new_state; 6830 6831 if (old_state == VM_PURGABLE_VOLATILE) { 6832 unsigned int delta; 6833 6834 assert(object->resident_page_count >= 6835 object->wired_page_count); 6836 delta = (object->resident_page_count - 6837 object->wired_page_count); 6838 6839 assert(vm_page_purgeable_count >= delta); 6840 6841 if (delta != 0) { 6842 OSAddAtomic(-delta, 6843 (SInt32 *)&vm_page_purgeable_count); 6844 } 6845 if (object->wired_page_count != 0) { 6846 assert(vm_page_purgeable_wired_count >= 6847 object->wired_page_count); 6848 OSAddAtomic(-object->wired_page_count, 6849 (SInt32 *)&vm_page_purgeable_wired_count); 6850 } 6851 6852 vm_page_lock_queues(); 6853 6854 /* object should be on a queue */ 6855 assert(object->objq.next != NULL && 6856 object->objq.prev != NULL); 6857 purgeable_q_t queue; 6858 6859 /* 6860 * Move object from its volatile queue to the 6861 * non-volatile queue... 6862 */ 6863 queue = vm_purgeable_object_remove(object); 6864 assert(queue); 6865 6866 if (object->purgeable_when_ripe) { 6867 vm_purgeable_token_delete_last(queue); 6868 } 6869 assert(queue->debug_count_objects>=0); 6870 6871 vm_page_unlock_queues(); 6872 } 6873 if (old_state == VM_PURGABLE_VOLATILE || 6874 old_state == VM_PURGABLE_EMPTY) { 6875 /* 6876 * Transfer the object's pages from the volatile to 6877 * non-volatile ledgers. 6878 */ 6879 vm_purgeable_accounting(object, VM_PURGABLE_VOLATILE, 6880 FALSE); 6881 } 6882 6883 break; 6884 6885 case VM_PURGABLE_VOLATILE: 6886 if (object->volatile_fault) { 6887 vm_page_t p; 6888 int refmod; 6889 6890 queue_iterate(&object->memq, p, vm_page_t, listq) { 6891 if (p->busy || 6892 VM_PAGE_WIRED(p) || 6893 p->fictitious) { 6894 continue; 6895 } 6896 refmod = pmap_disconnect(p->phys_page); 6897 if ((refmod & VM_MEM_MODIFIED) && 6898 !p->dirty) { 6899 SET_PAGE_DIRTY(p, FALSE); 6900 } 6901 } 6902 } 6903 6904 if (old_state == VM_PURGABLE_EMPTY && 6905 object->resident_page_count == 0 && 6906 object->pager == NULL) 6907 break; 6908 6909 purgeable_q_t queue; 6910 6911 /* find the correct queue */ 6912 if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE) 6913 queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE]; 6914 else { 6915 if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO) 6916 queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO]; 6917 else 6918 queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO]; 6919 } 6920 6921 if (old_state == VM_PURGABLE_NONVOLATILE || 6922 old_state == VM_PURGABLE_EMPTY) { 6923 unsigned int delta; 6924 6925 if ((*state & VM_PURGABLE_NO_AGING_MASK) == 6926 VM_PURGABLE_NO_AGING) { 6927 object->purgeable_when_ripe = FALSE; 6928 } else { 6929 object->purgeable_when_ripe = TRUE; 6930 } 6931 6932 if (object->purgeable_when_ripe) { 6933 kern_return_t result; 6934 6935 /* try to add token... this can fail */ 6936 vm_page_lock_queues(); 6937 6938 result = vm_purgeable_token_add(queue); 6939 if (result != KERN_SUCCESS) { 6940 vm_page_unlock_queues(); 6941 return result; 6942 } 6943 vm_page_unlock_queues(); 6944 } 6945 6946 assert(object->resident_page_count >= 6947 object->wired_page_count); 6948 delta = (object->resident_page_count - 6949 object->wired_page_count); 6950 6951 if (delta != 0) { 6952 OSAddAtomic(delta, 6953 &vm_page_purgeable_count); 6954 } 6955 if (object->wired_page_count != 0) { 6956 OSAddAtomic(object->wired_page_count, 6957 &vm_page_purgeable_wired_count); 6958 } 6959 6960 object->purgable = new_state; 6961 6962 /* object should be on "non-volatile" queue */ 6963 assert(object->objq.next != NULL); 6964 assert(object->objq.prev != NULL); 6965 } 6966 else if (old_state == VM_PURGABLE_VOLATILE) { 6967 purgeable_q_t old_queue; 6968 boolean_t purgeable_when_ripe; 6969 6970 /* 6971 * if reassigning priorities / purgeable groups, we don't change the 6972 * token queue. So moving priorities will not make pages stay around longer. 6973 * Reasoning is that the algorithm gives most priority to the most important 6974 * object. If a new token is added, the most important object' priority is boosted. 6975 * This biases the system already for purgeable queues that move a lot. 6976 * It doesn't seem more biasing is neccessary in this case, where no new object is added. 6977 */ 6978 assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */ 6979 6980 old_queue = vm_purgeable_object_remove(object); 6981 assert(old_queue); 6982 6983 if ((*state & VM_PURGABLE_NO_AGING_MASK) == 6984 VM_PURGABLE_NO_AGING) { 6985 purgeable_when_ripe = FALSE; 6986 } else { 6987 purgeable_when_ripe = TRUE; 6988 } 6989 6990 if (old_queue != queue || 6991 (purgeable_when_ripe != 6992 object->purgeable_when_ripe)) { 6993 kern_return_t result; 6994 6995 /* Changing queue. Have to move token. */ 6996 vm_page_lock_queues(); 6997 if (object->purgeable_when_ripe) { 6998 vm_purgeable_token_delete_last(old_queue); 6999 } 7000 object->purgeable_when_ripe = purgeable_when_ripe; 7001 if (object->purgeable_when_ripe) { 7002 result = vm_purgeable_token_add(queue); 7003 assert(result==KERN_SUCCESS); /* this should never fail since we just freed a token */ 7004 } 7005 vm_page_unlock_queues(); 7006 7007 } 7008 }; 7009 vm_purgeable_object_add(object, queue, (*state&VM_VOLATILE_GROUP_MASK)>>VM_VOLATILE_GROUP_SHIFT ); 7010 if (old_state == VM_PURGABLE_NONVOLATILE) { 7011 vm_purgeable_accounting(object, VM_PURGABLE_NONVOLATILE, 7012 FALSE); 7013 } 7014 7015 assert(queue->debug_count_objects>=0); 7016 7017 break; 7018 7019 7020 case VM_PURGABLE_EMPTY: 7021 if (object->volatile_fault) { 7022 vm_page_t p; 7023 int refmod; 7024 7025 queue_iterate(&object->memq, p, vm_page_t, listq) { 7026 if (p->busy || 7027 VM_PAGE_WIRED(p) || 7028 p->fictitious) { 7029 continue; 7030 } 7031 refmod = pmap_disconnect(p->phys_page); 7032 if ((refmod & VM_MEM_MODIFIED) && 7033 !p->dirty) { 7034 SET_PAGE_DIRTY(p, FALSE); 7035 } 7036 } 7037 } 7038 7039 if (old_state == new_state) { 7040 /* nothing changes */ 7041 break; 7042 } 7043 7044 assert(old_state == VM_PURGABLE_NONVOLATILE || 7045 old_state == VM_PURGABLE_VOLATILE); 7046 if (old_state == VM_PURGABLE_VOLATILE) { 7047 purgeable_q_t old_queue; 7048 7049 /* object should be on a queue */ 7050 assert(object->objq.next != NULL && 7051 object->objq.prev != NULL); 7052 7053 old_queue = vm_purgeable_object_remove(object); 7054 assert(old_queue); 7055 if (object->purgeable_when_ripe) { 7056 vm_page_lock_queues(); 7057 vm_purgeable_token_delete_first(old_queue); 7058 vm_page_unlock_queues(); 7059 } 7060 } 7061 7062 if (old_state == VM_PURGABLE_NONVOLATILE) { 7063 /* 7064 * This object's pages were previously accounted as 7065 * "non-volatile" and now need to be accounted as 7066 * "volatile". 7067 */ 7068 vm_purgeable_accounting(object, VM_PURGABLE_NONVOLATILE, 7069 FALSE); 7070 /* 7071 * Set to VM_PURGABLE_EMPTY because the pages are no 7072 * longer accounted in the "non-volatile" ledger 7073 * and are also not accounted for in 7074 * "vm_page_purgeable_count". 7075 */ 7076 object->purgable = VM_PURGABLE_EMPTY; 7077 } 7078 7079 (void) vm_object_purge(object, 0); 7080 assert(object->purgable == VM_PURGABLE_EMPTY); 7081 7082 break; 7083 } 7084 7085 *state = old_state; 7086 7087 vm_object_lock_assert_exclusive(object); 7088 7089 return KERN_SUCCESS; 7090} 7091 7092kern_return_t 7093vm_object_get_page_counts( 7094 vm_object_t object, 7095 vm_object_offset_t offset, 7096 vm_object_size_t size, 7097 unsigned int *resident_page_count, 7098 unsigned int *dirty_page_count) 7099{ 7100 7101 kern_return_t kr = KERN_SUCCESS; 7102 boolean_t count_dirty_pages = FALSE; 7103 vm_page_t p = VM_PAGE_NULL; 7104 unsigned int local_resident_count = 0; 7105 unsigned int local_dirty_count = 0; 7106 vm_object_offset_t cur_offset = 0; 7107 vm_object_offset_t end_offset = 0; 7108 7109 if (object == VM_OBJECT_NULL) 7110 return KERN_INVALID_ARGUMENT; 7111 7112 7113 cur_offset = offset; 7114 7115 end_offset = offset + size; 7116 7117 vm_object_lock_assert_exclusive(object); 7118 7119 if (dirty_page_count != NULL) { 7120 7121 count_dirty_pages = TRUE; 7122 } 7123 7124 if (resident_page_count != NULL && count_dirty_pages == FALSE) { 7125 /* 7126 * Fast path when: 7127 * - we only want the resident page count, and, 7128 * - the entire object is exactly covered by the request. 7129 */ 7130 if (offset == 0 && (object->vo_size == size)) { 7131 7132 *resident_page_count = object->resident_page_count; 7133 goto out; 7134 } 7135 } 7136 7137 if (object->resident_page_count <= (size >> PAGE_SHIFT)) { 7138 7139 queue_iterate(&object->memq, p, vm_page_t, listq) { 7140 7141 if (p->offset >= cur_offset && p->offset < end_offset) { 7142 7143 local_resident_count++; 7144 7145 if (count_dirty_pages) { 7146 7147 if (p->dirty || (p->wpmapped && pmap_is_modified(p->phys_page))) { 7148 7149 local_dirty_count++; 7150 } 7151 } 7152 } 7153 } 7154 } else { 7155 7156 for (cur_offset = offset; cur_offset < end_offset; cur_offset += PAGE_SIZE_64) { 7157 7158 p = vm_page_lookup(object, cur_offset); 7159 7160 if (p != VM_PAGE_NULL) { 7161 7162 local_resident_count++; 7163 7164 if (count_dirty_pages) { 7165 7166 if (p->dirty || (p->wpmapped && pmap_is_modified(p->phys_page))) { 7167 7168 local_dirty_count++; 7169 } 7170 } 7171 } 7172 } 7173 7174 } 7175 7176 if (resident_page_count != NULL) { 7177 *resident_page_count = local_resident_count; 7178 } 7179 7180 if (dirty_page_count != NULL) { 7181 *dirty_page_count = local_dirty_count; 7182 } 7183 7184out: 7185 return kr; 7186} 7187 7188 7189#if TASK_SWAPPER 7190/* 7191 * vm_object_res_deallocate 7192 * 7193 * (recursively) decrement residence counts on vm objects and their shadows. 7194 * Called from vm_object_deallocate and when swapping out an object. 7195 * 7196 * The object is locked, and remains locked throughout the function, 7197 * even as we iterate down the shadow chain. Locks on intermediate objects 7198 * will be dropped, but not the original object. 7199 * 7200 * NOTE: this function used to use recursion, rather than iteration. 7201 */ 7202 7203__private_extern__ void 7204vm_object_res_deallocate( 7205 vm_object_t object) 7206{ 7207 vm_object_t orig_object = object; 7208 /* 7209 * Object is locked so it can be called directly 7210 * from vm_object_deallocate. Original object is never 7211 * unlocked. 7212 */ 7213 assert(object->res_count > 0); 7214 while (--object->res_count == 0) { 7215 assert(object->ref_count >= object->res_count); 7216 vm_object_deactivate_all_pages(object); 7217 /* iterate on shadow, if present */ 7218 if (object->shadow != VM_OBJECT_NULL) { 7219 vm_object_t tmp_object = object->shadow; 7220 vm_object_lock(tmp_object); 7221 if (object != orig_object) 7222 vm_object_unlock(object); 7223 object = tmp_object; 7224 assert(object->res_count > 0); 7225 } else 7226 break; 7227 } 7228 if (object != orig_object) 7229 vm_object_unlock(object); 7230} 7231 7232/* 7233 * vm_object_res_reference 7234 * 7235 * Internal function to increment residence count on a vm object 7236 * and its shadows. It is called only from vm_object_reference, and 7237 * when swapping in a vm object, via vm_map_swap. 7238 * 7239 * The object is locked, and remains locked throughout the function, 7240 * even as we iterate down the shadow chain. Locks on intermediate objects 7241 * will be dropped, but not the original object. 7242 * 7243 * NOTE: this function used to use recursion, rather than iteration. 7244 */ 7245 7246__private_extern__ void 7247vm_object_res_reference( 7248 vm_object_t object) 7249{ 7250 vm_object_t orig_object = object; 7251 /* 7252 * Object is locked, so this can be called directly 7253 * from vm_object_reference. This lock is never released. 7254 */ 7255 while ((++object->res_count == 1) && 7256 (object->shadow != VM_OBJECT_NULL)) { 7257 vm_object_t tmp_object = object->shadow; 7258 7259 assert(object->ref_count >= object->res_count); 7260 vm_object_lock(tmp_object); 7261 if (object != orig_object) 7262 vm_object_unlock(object); 7263 object = tmp_object; 7264 } 7265 if (object != orig_object) 7266 vm_object_unlock(object); 7267 assert(orig_object->ref_count >= orig_object->res_count); 7268} 7269#endif /* TASK_SWAPPER */ 7270 7271/* 7272 * vm_object_reference: 7273 * 7274 * Gets another reference to the given object. 7275 */ 7276#ifdef vm_object_reference 7277#undef vm_object_reference 7278#endif 7279__private_extern__ void 7280vm_object_reference( 7281 register vm_object_t object) 7282{ 7283 if (object == VM_OBJECT_NULL) 7284 return; 7285 7286 vm_object_lock(object); 7287 assert(object->ref_count > 0); 7288 vm_object_reference_locked(object); 7289 vm_object_unlock(object); 7290} 7291 7292#ifdef MACH_BSD 7293/* 7294 * Scale the vm_object_cache 7295 * This is required to make sure that the vm_object_cache is big 7296 * enough to effectively cache the mapped file. 7297 * This is really important with UBC as all the regular file vnodes 7298 * have memory object associated with them. Havving this cache too 7299 * small results in rapid reclaim of vnodes and hurts performance a LOT! 7300 * 7301 * This is also needed as number of vnodes can be dynamically scaled. 7302 */ 7303kern_return_t 7304adjust_vm_object_cache( 7305 __unused vm_size_t oval, 7306 __unused vm_size_t nval) 7307{ 7308#if VM_OBJECT_CACHE 7309 vm_object_cached_max = nval; 7310 vm_object_cache_trim(FALSE); 7311#endif 7312 return (KERN_SUCCESS); 7313} 7314#endif /* MACH_BSD */ 7315 7316 7317/* 7318 * vm_object_transpose 7319 * 7320 * This routine takes two VM objects of the same size and exchanges 7321 * their backing store. 7322 * The objects should be "quiesced" via a UPL operation with UPL_SET_IO_WIRE 7323 * and UPL_BLOCK_ACCESS if they are referenced anywhere. 7324 * 7325 * The VM objects must not be locked by caller. 7326 */ 7327unsigned int vm_object_transpose_count = 0; 7328kern_return_t 7329vm_object_transpose( 7330 vm_object_t object1, 7331 vm_object_t object2, 7332 vm_object_size_t transpose_size) 7333{ 7334 vm_object_t tmp_object; 7335 kern_return_t retval; 7336 boolean_t object1_locked, object2_locked; 7337 vm_page_t page; 7338 vm_object_offset_t page_offset; 7339 lck_mtx_t *hash_lck; 7340 vm_object_hash_entry_t hash_entry; 7341 7342 tmp_object = VM_OBJECT_NULL; 7343 object1_locked = FALSE; object2_locked = FALSE; 7344 7345 if (object1 == object2 || 7346 object1 == VM_OBJECT_NULL || 7347 object2 == VM_OBJECT_NULL) { 7348 /* 7349 * If the 2 VM objects are the same, there's 7350 * no point in exchanging their backing store. 7351 */ 7352 retval = KERN_INVALID_VALUE; 7353 goto done; 7354 } 7355 7356 /* 7357 * Since we need to lock both objects at the same time, 7358 * make sure we always lock them in the same order to 7359 * avoid deadlocks. 7360 */ 7361 if (object1 > object2) { 7362 tmp_object = object1; 7363 object1 = object2; 7364 object2 = tmp_object; 7365 } 7366 7367 /* 7368 * Allocate a temporary VM object to hold object1's contents 7369 * while we copy object2 to object1. 7370 */ 7371 tmp_object = vm_object_allocate(transpose_size); 7372 vm_object_lock(tmp_object); 7373 tmp_object->can_persist = FALSE; 7374 7375 7376 /* 7377 * Grab control of the 1st VM object. 7378 */ 7379 vm_object_lock(object1); 7380 object1_locked = TRUE; 7381 if (!object1->alive || object1->terminating || 7382 object1->copy || object1->shadow || object1->shadowed || 7383 object1->purgable != VM_PURGABLE_DENY) { 7384 /* 7385 * We don't deal with copy or shadow objects (yet). 7386 */ 7387 retval = KERN_INVALID_VALUE; 7388 goto done; 7389 } 7390 /* 7391 * We're about to mess with the object's backing store and 7392 * taking a "paging_in_progress" reference wouldn't be enough 7393 * to prevent any paging activity on this object, so the caller should 7394 * have "quiesced" the objects beforehand, via a UPL operation with 7395 * UPL_SET_IO_WIRE (to make sure all the pages are there and wired) 7396 * and UPL_BLOCK_ACCESS (to mark the pages "busy"). 7397 * 7398 * Wait for any paging operation to complete (but only paging, not 7399 * other kind of activities not linked to the pager). After we're 7400 * statisfied that there's no more paging in progress, we keep the 7401 * object locked, to guarantee that no one tries to access its pager. 7402 */ 7403 vm_object_paging_only_wait(object1, THREAD_UNINT); 7404 7405 /* 7406 * Same as above for the 2nd object... 7407 */ 7408 vm_object_lock(object2); 7409 object2_locked = TRUE; 7410 if (! object2->alive || object2->terminating || 7411 object2->copy || object2->shadow || object2->shadowed || 7412 object2->purgable != VM_PURGABLE_DENY) { 7413 retval = KERN_INVALID_VALUE; 7414 goto done; 7415 } 7416 vm_object_paging_only_wait(object2, THREAD_UNINT); 7417 7418 7419 if (object1->vo_size != object2->vo_size || 7420 object1->vo_size != transpose_size) { 7421 /* 7422 * If the 2 objects don't have the same size, we can't 7423 * exchange their backing stores or one would overflow. 7424 * If their size doesn't match the caller's 7425 * "transpose_size", we can't do it either because the 7426 * transpose operation will affect the entire span of 7427 * the objects. 7428 */ 7429 retval = KERN_INVALID_VALUE; 7430 goto done; 7431 } 7432 7433 7434 /* 7435 * Transpose the lists of resident pages. 7436 * This also updates the resident_page_count and the memq_hint. 7437 */ 7438 if (object1->phys_contiguous || queue_empty(&object1->memq)) { 7439 /* 7440 * No pages in object1, just transfer pages 7441 * from object2 to object1. No need to go through 7442 * an intermediate object. 7443 */ 7444 while (!queue_empty(&object2->memq)) { 7445 page = (vm_page_t) queue_first(&object2->memq); 7446 vm_page_rename(page, object1, page->offset, FALSE); 7447 } 7448 assert(queue_empty(&object2->memq)); 7449 } else if (object2->phys_contiguous || queue_empty(&object2->memq)) { 7450 /* 7451 * No pages in object2, just transfer pages 7452 * from object1 to object2. No need to go through 7453 * an intermediate object. 7454 */ 7455 while (!queue_empty(&object1->memq)) { 7456 page = (vm_page_t) queue_first(&object1->memq); 7457 vm_page_rename(page, object2, page->offset, FALSE); 7458 } 7459 assert(queue_empty(&object1->memq)); 7460 } else { 7461 /* transfer object1's pages to tmp_object */ 7462 while (!queue_empty(&object1->memq)) { 7463 page = (vm_page_t) queue_first(&object1->memq); 7464 page_offset = page->offset; 7465 vm_page_remove(page, TRUE); 7466 page->offset = page_offset; 7467 queue_enter(&tmp_object->memq, page, vm_page_t, listq); 7468 } 7469 assert(queue_empty(&object1->memq)); 7470 /* transfer object2's pages to object1 */ 7471 while (!queue_empty(&object2->memq)) { 7472 page = (vm_page_t) queue_first(&object2->memq); 7473 vm_page_rename(page, object1, page->offset, FALSE); 7474 } 7475 assert(queue_empty(&object2->memq)); 7476 /* transfer tmp_object's pages to object1 */ 7477 while (!queue_empty(&tmp_object->memq)) { 7478 page = (vm_page_t) queue_first(&tmp_object->memq); 7479 queue_remove(&tmp_object->memq, page, 7480 vm_page_t, listq); 7481 vm_page_insert(page, object2, page->offset); 7482 } 7483 assert(queue_empty(&tmp_object->memq)); 7484 } 7485 7486#define __TRANSPOSE_FIELD(field) \ 7487MACRO_BEGIN \ 7488 tmp_object->field = object1->field; \ 7489 object1->field = object2->field; \ 7490 object2->field = tmp_object->field; \ 7491MACRO_END 7492 7493 /* "Lock" refers to the object not its contents */ 7494 /* "size" should be identical */ 7495 assert(object1->vo_size == object2->vo_size); 7496 /* "memq_hint" was updated above when transposing pages */ 7497 /* "ref_count" refers to the object not its contents */ 7498#if TASK_SWAPPER 7499 /* "res_count" refers to the object not its contents */ 7500#endif 7501 /* "resident_page_count" was updated above when transposing pages */ 7502 /* "wired_page_count" was updated above when transposing pages */ 7503 /* "reusable_page_count" was updated above when transposing pages */ 7504 /* there should be no "copy" */ 7505 assert(!object1->copy); 7506 assert(!object2->copy); 7507 /* there should be no "shadow" */ 7508 assert(!object1->shadow); 7509 assert(!object2->shadow); 7510 __TRANSPOSE_FIELD(vo_shadow_offset); /* used by phys_contiguous objects */ 7511 __TRANSPOSE_FIELD(pager); 7512 __TRANSPOSE_FIELD(paging_offset); 7513 __TRANSPOSE_FIELD(pager_control); 7514 /* update the memory_objects' pointers back to the VM objects */ 7515 if (object1->pager_control != MEMORY_OBJECT_CONTROL_NULL) { 7516 memory_object_control_collapse(object1->pager_control, 7517 object1); 7518 } 7519 if (object2->pager_control != MEMORY_OBJECT_CONTROL_NULL) { 7520 memory_object_control_collapse(object2->pager_control, 7521 object2); 7522 } 7523 __TRANSPOSE_FIELD(copy_strategy); 7524 /* "paging_in_progress" refers to the object not its contents */ 7525 assert(!object1->paging_in_progress); 7526 assert(!object2->paging_in_progress); 7527 assert(object1->activity_in_progress); 7528 assert(object2->activity_in_progress); 7529 /* "all_wanted" refers to the object not its contents */ 7530 __TRANSPOSE_FIELD(pager_created); 7531 __TRANSPOSE_FIELD(pager_initialized); 7532 __TRANSPOSE_FIELD(pager_ready); 7533 __TRANSPOSE_FIELD(pager_trusted); 7534 __TRANSPOSE_FIELD(can_persist); 7535 __TRANSPOSE_FIELD(internal); 7536 __TRANSPOSE_FIELD(temporary); 7537 __TRANSPOSE_FIELD(private); 7538 __TRANSPOSE_FIELD(pageout); 7539 /* "alive" should be set */ 7540 assert(object1->alive); 7541 assert(object2->alive); 7542 /* "purgeable" should be non-purgeable */ 7543 assert(object1->purgable == VM_PURGABLE_DENY); 7544 assert(object2->purgable == VM_PURGABLE_DENY); 7545 /* "shadowed" refers to the the object not its contents */ 7546 __TRANSPOSE_FIELD(purgeable_when_ripe); 7547 __TRANSPOSE_FIELD(advisory_pageout); 7548 __TRANSPOSE_FIELD(true_share); 7549 /* "terminating" should not be set */ 7550 assert(!object1->terminating); 7551 assert(!object2->terminating); 7552 __TRANSPOSE_FIELD(named); 7553 /* "shadow_severed" refers to the object not its contents */ 7554 __TRANSPOSE_FIELD(phys_contiguous); 7555 __TRANSPOSE_FIELD(nophyscache); 7556 /* "cached_list.next" points to transposed object */ 7557 object1->cached_list.next = (queue_entry_t) object2; 7558 object2->cached_list.next = (queue_entry_t) object1; 7559 /* "cached_list.prev" should be NULL */ 7560 assert(object1->cached_list.prev == NULL); 7561 assert(object2->cached_list.prev == NULL); 7562 /* "msr_q" is linked to the object not its contents */ 7563 assert(queue_empty(&object1->msr_q)); 7564 assert(queue_empty(&object2->msr_q)); 7565 __TRANSPOSE_FIELD(last_alloc); 7566 __TRANSPOSE_FIELD(sequential); 7567 __TRANSPOSE_FIELD(pages_created); 7568 __TRANSPOSE_FIELD(pages_used); 7569 __TRANSPOSE_FIELD(scan_collisions); 7570#if MACH_PAGEMAP 7571 __TRANSPOSE_FIELD(existence_map); 7572#endif 7573 __TRANSPOSE_FIELD(cow_hint); 7574#if MACH_ASSERT 7575 __TRANSPOSE_FIELD(paging_object); 7576#endif 7577 __TRANSPOSE_FIELD(wimg_bits); 7578 __TRANSPOSE_FIELD(set_cache_attr); 7579 __TRANSPOSE_FIELD(code_signed); 7580 if (object1->hashed) { 7581 hash_lck = vm_object_hash_lock_spin(object2->pager); 7582 hash_entry = vm_object_hash_lookup(object2->pager, FALSE); 7583 assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL); 7584 hash_entry->object = object2; 7585 vm_object_hash_unlock(hash_lck); 7586 } 7587 if (object2->hashed) { 7588 hash_lck = vm_object_hash_lock_spin(object1->pager); 7589 hash_entry = vm_object_hash_lookup(object1->pager, FALSE); 7590 assert(hash_entry != VM_OBJECT_HASH_ENTRY_NULL); 7591 hash_entry->object = object1; 7592 vm_object_hash_unlock(hash_lck); 7593 } 7594 __TRANSPOSE_FIELD(hashed); 7595 object1->transposed = TRUE; 7596 object2->transposed = TRUE; 7597 __TRANSPOSE_FIELD(mapping_in_progress); 7598 __TRANSPOSE_FIELD(volatile_empty); 7599 __TRANSPOSE_FIELD(volatile_fault); 7600 __TRANSPOSE_FIELD(all_reusable); 7601 assert(object1->blocked_access); 7602 assert(object2->blocked_access); 7603 assert(object1->__object2_unused_bits == 0); 7604 assert(object2->__object2_unused_bits == 0); 7605#if UPL_DEBUG 7606 /* "uplq" refers to the object not its contents (see upl_transpose()) */ 7607#endif 7608 assert(object1->objq.next == NULL); 7609 assert(object1->objq.prev == NULL); 7610 assert(object2->objq.next == NULL); 7611 assert(object2->objq.prev == NULL); 7612 7613#undef __TRANSPOSE_FIELD 7614 7615 retval = KERN_SUCCESS; 7616 7617done: 7618 /* 7619 * Cleanup. 7620 */ 7621 if (tmp_object != VM_OBJECT_NULL) { 7622 vm_object_unlock(tmp_object); 7623 /* 7624 * Re-initialize the temporary object to avoid 7625 * deallocating a real pager. 7626 */ 7627 _vm_object_allocate(transpose_size, tmp_object); 7628 vm_object_deallocate(tmp_object); 7629 tmp_object = VM_OBJECT_NULL; 7630 } 7631 7632 if (object1_locked) { 7633 vm_object_unlock(object1); 7634 object1_locked = FALSE; 7635 } 7636 if (object2_locked) { 7637 vm_object_unlock(object2); 7638 object2_locked = FALSE; 7639 } 7640 7641 vm_object_transpose_count++; 7642 7643 return retval; 7644} 7645 7646 7647/* 7648 * vm_object_cluster_size 7649 * 7650 * Determine how big a cluster we should issue an I/O for... 7651 * 7652 * Inputs: *start == offset of page needed 7653 * *length == maximum cluster pager can handle 7654 * Outputs: *start == beginning offset of cluster 7655 * *length == length of cluster to try 7656 * 7657 * The original *start will be encompassed by the cluster 7658 * 7659 */ 7660extern int speculative_reads_disabled; 7661extern int ignore_is_ssd; 7662 7663unsigned int preheat_max_bytes = MAX_UPL_TRANSFER_BYTES; 7664unsigned int preheat_min_bytes = (1024 * 32); 7665 7666 7667__private_extern__ void 7668vm_object_cluster_size(vm_object_t object, vm_object_offset_t *start, 7669 vm_size_t *length, vm_object_fault_info_t fault_info, uint32_t *io_streaming) 7670{ 7671 vm_size_t pre_heat_size; 7672 vm_size_t tail_size; 7673 vm_size_t head_size; 7674 vm_size_t max_length; 7675 vm_size_t cluster_size; 7676 vm_object_offset_t object_size; 7677 vm_object_offset_t orig_start; 7678 vm_object_offset_t target_start; 7679 vm_object_offset_t offset; 7680 vm_behavior_t behavior; 7681 boolean_t look_behind = TRUE; 7682 boolean_t look_ahead = TRUE; 7683 boolean_t isSSD = FALSE; 7684 uint32_t throttle_limit; 7685 int sequential_run; 7686 int sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; 7687 vm_size_t max_ph_size; 7688 vm_size_t min_ph_size; 7689 7690 assert( !(*length & PAGE_MASK)); 7691 assert( !(*start & PAGE_MASK_64)); 7692 7693 /* 7694 * remember maxiumum length of run requested 7695 */ 7696 max_length = *length; 7697 /* 7698 * we'll always return a cluster size of at least 7699 * 1 page, since the original fault must always 7700 * be processed 7701 */ 7702 *length = PAGE_SIZE; 7703 *io_streaming = 0; 7704 7705 if (speculative_reads_disabled || fault_info == NULL) { 7706 /* 7707 * no cluster... just fault the page in 7708 */ 7709 return; 7710 } 7711 orig_start = *start; 7712 target_start = orig_start; 7713 cluster_size = round_page(fault_info->cluster_size); 7714 behavior = fault_info->behavior; 7715 7716 vm_object_lock(object); 7717 7718 if (object->pager == MEMORY_OBJECT_NULL) 7719 goto out; /* pager is gone for this object, nothing more to do */ 7720 7721 if (!ignore_is_ssd) 7722 vnode_pager_get_isSSD(object->pager, &isSSD); 7723 7724 min_ph_size = round_page(preheat_min_bytes); 7725 max_ph_size = round_page(preheat_max_bytes); 7726 7727 if (isSSD) { 7728 min_ph_size /= 2; 7729 max_ph_size /= 8; 7730 } 7731 if (min_ph_size < PAGE_SIZE) 7732 min_ph_size = PAGE_SIZE; 7733 7734 if (max_ph_size < PAGE_SIZE) 7735 max_ph_size = PAGE_SIZE; 7736 else if (max_ph_size > MAX_UPL_TRANSFER_BYTES) 7737 max_ph_size = MAX_UPL_TRANSFER_BYTES; 7738 7739 if (max_length > max_ph_size) 7740 max_length = max_ph_size; 7741 7742 if (max_length <= PAGE_SIZE) 7743 goto out; 7744 7745 if (object->internal) 7746 object_size = object->vo_size; 7747 else 7748 vnode_pager_get_object_size(object->pager, &object_size); 7749 7750 object_size = round_page_64(object_size); 7751 7752 if (orig_start >= object_size) { 7753 /* 7754 * fault occurred beyond the EOF... 7755 * we need to punt w/o changing the 7756 * starting offset 7757 */ 7758 goto out; 7759 } 7760 if (object->pages_used > object->pages_created) { 7761 /* 7762 * must have wrapped our 32 bit counters 7763 * so reset 7764 */ 7765 object->pages_used = object->pages_created = 0; 7766 } 7767 if ((sequential_run = object->sequential)) { 7768 if (sequential_run < 0) { 7769 sequential_behavior = VM_BEHAVIOR_RSEQNTL; 7770 sequential_run = 0 - sequential_run; 7771 } else { 7772 sequential_behavior = VM_BEHAVIOR_SEQUENTIAL; 7773 } 7774 7775 } 7776 switch (behavior) { 7777 7778 default: 7779 behavior = VM_BEHAVIOR_DEFAULT; 7780 7781 case VM_BEHAVIOR_DEFAULT: 7782 if (object->internal && fault_info->user_tag == VM_MEMORY_STACK) 7783 goto out; 7784 7785 if (sequential_run >= (3 * PAGE_SIZE)) { 7786 pre_heat_size = sequential_run + PAGE_SIZE; 7787 7788 if (sequential_behavior == VM_BEHAVIOR_SEQUENTIAL) 7789 look_behind = FALSE; 7790 else 7791 look_ahead = FALSE; 7792 7793 *io_streaming = 1; 7794 } else { 7795 7796 if (object->pages_created < (20 * (min_ph_size >> PAGE_SHIFT))) { 7797 /* 7798 * prime the pump 7799 */ 7800 pre_heat_size = min_ph_size; 7801 } else { 7802 /* 7803 * Linear growth in PH size: The maximum size is max_length... 7804 * this cacluation will result in a size that is neither a 7805 * power of 2 nor a multiple of PAGE_SIZE... so round 7806 * it up to the nearest PAGE_SIZE boundary 7807 */ 7808 pre_heat_size = (max_length * object->pages_used) / object->pages_created; 7809 7810 if (pre_heat_size < min_ph_size) 7811 pre_heat_size = min_ph_size; 7812 else 7813 pre_heat_size = round_page(pre_heat_size); 7814 } 7815 } 7816 break; 7817 7818 case VM_BEHAVIOR_RANDOM: 7819 if ((pre_heat_size = cluster_size) <= PAGE_SIZE) 7820 goto out; 7821 break; 7822 7823 case VM_BEHAVIOR_SEQUENTIAL: 7824 if ((pre_heat_size = cluster_size) == 0) 7825 pre_heat_size = sequential_run + PAGE_SIZE; 7826 look_behind = FALSE; 7827 *io_streaming = 1; 7828 7829 break; 7830 7831 case VM_BEHAVIOR_RSEQNTL: 7832 if ((pre_heat_size = cluster_size) == 0) 7833 pre_heat_size = sequential_run + PAGE_SIZE; 7834 look_ahead = FALSE; 7835 *io_streaming = 1; 7836 7837 break; 7838 7839 } 7840 throttle_limit = (uint32_t) max_length; 7841 assert(throttle_limit == max_length); 7842 7843 if (vnode_pager_get_throttle_io_limit(object->pager, &throttle_limit) == KERN_SUCCESS) { 7844 if (max_length > throttle_limit) 7845 max_length = throttle_limit; 7846 } 7847 if (pre_heat_size > max_length) 7848 pre_heat_size = max_length; 7849 7850 if (behavior == VM_BEHAVIOR_DEFAULT && (pre_heat_size > min_ph_size)) { 7851 7852 unsigned int consider_free = vm_page_free_count + vm_page_cleaned_count; 7853 7854 if (consider_free < vm_page_throttle_limit) { 7855 pre_heat_size = trunc_page(pre_heat_size / 16); 7856 } else if (consider_free < vm_page_free_target) { 7857 pre_heat_size = trunc_page(pre_heat_size / 4); 7858 } 7859 7860 if (pre_heat_size < min_ph_size) 7861 pre_heat_size = min_ph_size; 7862 } 7863 if (look_ahead == TRUE) { 7864 if (look_behind == TRUE) { 7865 /* 7866 * if we get here its due to a random access... 7867 * so we want to center the original fault address 7868 * within the cluster we will issue... make sure 7869 * to calculate 'head_size' as a multiple of PAGE_SIZE... 7870 * 'pre_heat_size' is a multiple of PAGE_SIZE but not 7871 * necessarily an even number of pages so we need to truncate 7872 * the result to a PAGE_SIZE boundary 7873 */ 7874 head_size = trunc_page(pre_heat_size / 2); 7875 7876 if (target_start > head_size) 7877 target_start -= head_size; 7878 else 7879 target_start = 0; 7880 7881 /* 7882 * 'target_start' at this point represents the beginning offset 7883 * of the cluster we are considering... 'orig_start' will be in 7884 * the center of this cluster if we didn't have to clip the start 7885 * due to running into the start of the file 7886 */ 7887 } 7888 if ((target_start + pre_heat_size) > object_size) 7889 pre_heat_size = (vm_size_t)(round_page_64(object_size - target_start)); 7890 /* 7891 * at this point caclulate the number of pages beyond the original fault 7892 * address that we want to consider... this is guaranteed not to extend beyond 7893 * the current EOF... 7894 */ 7895 assert((vm_size_t)(orig_start - target_start) == (orig_start - target_start)); 7896 tail_size = pre_heat_size - (vm_size_t)(orig_start - target_start) - PAGE_SIZE; 7897 } else { 7898 if (pre_heat_size > target_start) { 7899 /* 7900 * since pre_heat_size is always smaller then 2^32, 7901 * if it is larger then target_start (a 64 bit value) 7902 * it is safe to clip target_start to 32 bits 7903 */ 7904 pre_heat_size = (vm_size_t) target_start; 7905 } 7906 tail_size = 0; 7907 } 7908 assert( !(target_start & PAGE_MASK_64)); 7909 assert( !(pre_heat_size & PAGE_MASK)); 7910 7911 if (pre_heat_size <= PAGE_SIZE) 7912 goto out; 7913 7914 if (look_behind == TRUE) { 7915 /* 7916 * take a look at the pages before the original 7917 * faulting offset... recalculate this in case 7918 * we had to clip 'pre_heat_size' above to keep 7919 * from running past the EOF. 7920 */ 7921 head_size = pre_heat_size - tail_size - PAGE_SIZE; 7922 7923 for (offset = orig_start - PAGE_SIZE_64; head_size; offset -= PAGE_SIZE_64, head_size -= PAGE_SIZE) { 7924 /* 7925 * don't poke below the lowest offset 7926 */ 7927 if (offset < fault_info->lo_offset) 7928 break; 7929 /* 7930 * for external objects and internal objects w/o an existence map 7931 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN 7932 */ 7933#if MACH_PAGEMAP 7934 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) { 7935 /* 7936 * we know for a fact that the pager can't provide the page 7937 * so don't include it or any pages beyond it in this cluster 7938 */ 7939 break; 7940 } 7941#endif /* MACH_PAGEMAP */ 7942 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) 7943 == VM_EXTERNAL_STATE_ABSENT) { 7944 break; 7945 } 7946 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { 7947 /* 7948 * don't bridge resident pages 7949 */ 7950 break; 7951 } 7952 *start = offset; 7953 *length += PAGE_SIZE; 7954 } 7955 } 7956 if (look_ahead == TRUE) { 7957 for (offset = orig_start + PAGE_SIZE_64; tail_size; offset += PAGE_SIZE_64, tail_size -= PAGE_SIZE) { 7958 /* 7959 * don't poke above the highest offset 7960 */ 7961 if (offset >= fault_info->hi_offset) 7962 break; 7963 assert(offset < object_size); 7964 7965 /* 7966 * for external objects and internal objects w/o an existence map 7967 * vm_externl_state_get will return VM_EXTERNAL_STATE_UNKNOWN 7968 */ 7969#if MACH_PAGEMAP 7970 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_ABSENT) { 7971 /* 7972 * we know for a fact that the pager can't provide the page 7973 * so don't include it or any pages beyond it in this cluster 7974 */ 7975 break; 7976 } 7977#endif /* MACH_PAGEMAP */ 7978 if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset) == VM_EXTERNAL_STATE_ABSENT) { 7979 break; 7980 } 7981 if (vm_page_lookup(object, offset) != VM_PAGE_NULL) { 7982 /* 7983 * don't bridge resident pages 7984 */ 7985 break; 7986 } 7987 *length += PAGE_SIZE; 7988 } 7989 } 7990out: 7991 if (*length > max_length) 7992 *length = max_length; 7993 7994 vm_object_unlock(object); 7995 7996 DTRACE_VM1(clustersize, vm_size_t, *length); 7997} 7998 7999 8000/* 8001 * Allow manipulation of individual page state. This is actually part of 8002 * the UPL regimen but takes place on the VM object rather than on a UPL 8003 */ 8004 8005kern_return_t 8006vm_object_page_op( 8007 vm_object_t object, 8008 vm_object_offset_t offset, 8009 int ops, 8010 ppnum_t *phys_entry, 8011 int *flags) 8012{ 8013 vm_page_t dst_page; 8014 8015 vm_object_lock(object); 8016 8017 if(ops & UPL_POP_PHYSICAL) { 8018 if(object->phys_contiguous) { 8019 if (phys_entry) { 8020 *phys_entry = (ppnum_t) 8021 (object->vo_shadow_offset >> PAGE_SHIFT); 8022 } 8023 vm_object_unlock(object); 8024 return KERN_SUCCESS; 8025 } else { 8026 vm_object_unlock(object); 8027 return KERN_INVALID_OBJECT; 8028 } 8029 } 8030 if(object->phys_contiguous) { 8031 vm_object_unlock(object); 8032 return KERN_INVALID_OBJECT; 8033 } 8034 8035 while(TRUE) { 8036 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) { 8037 vm_object_unlock(object); 8038 return KERN_FAILURE; 8039 } 8040 8041 /* Sync up on getting the busy bit */ 8042 if((dst_page->busy || dst_page->cleaning) && 8043 (((ops & UPL_POP_SET) && 8044 (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) { 8045 /* someone else is playing with the page, we will */ 8046 /* have to wait */ 8047 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 8048 continue; 8049 } 8050 8051 if (ops & UPL_POP_DUMP) { 8052 if (dst_page->pmapped == TRUE) 8053 pmap_disconnect(dst_page->phys_page); 8054 8055 VM_PAGE_FREE(dst_page); 8056 break; 8057 } 8058 8059 if (flags) { 8060 *flags = 0; 8061 8062 /* Get the condition of flags before requested ops */ 8063 /* are undertaken */ 8064 8065 if(dst_page->dirty) *flags |= UPL_POP_DIRTY; 8066 if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT; 8067 if(dst_page->precious) *flags |= UPL_POP_PRECIOUS; 8068 if(dst_page->absent) *flags |= UPL_POP_ABSENT; 8069 if(dst_page->busy) *flags |= UPL_POP_BUSY; 8070 } 8071 8072 /* The caller should have made a call either contingent with */ 8073 /* or prior to this call to set UPL_POP_BUSY */ 8074 if(ops & UPL_POP_SET) { 8075 /* The protection granted with this assert will */ 8076 /* not be complete. If the caller violates the */ 8077 /* convention and attempts to change page state */ 8078 /* without first setting busy we may not see it */ 8079 /* because the page may already be busy. However */ 8080 /* if such violations occur we will assert sooner */ 8081 /* or later. */ 8082 assert(dst_page->busy || (ops & UPL_POP_BUSY)); 8083 if (ops & UPL_POP_DIRTY) { 8084 SET_PAGE_DIRTY(dst_page, FALSE); 8085 } 8086 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE; 8087 if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE; 8088 if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE; 8089 if (ops & UPL_POP_BUSY) dst_page->busy = TRUE; 8090 } 8091 8092 if(ops & UPL_POP_CLR) { 8093 assert(dst_page->busy); 8094 if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE; 8095 if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE; 8096 if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE; 8097 if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE; 8098 if (ops & UPL_POP_BUSY) { 8099 dst_page->busy = FALSE; 8100 PAGE_WAKEUP(dst_page); 8101 } 8102 } 8103 8104 if (dst_page->encrypted) { 8105 /* 8106 * ENCRYPTED SWAP: 8107 * We need to decrypt this encrypted page before the 8108 * caller can access its contents. 8109 * But if the caller really wants to access the page's 8110 * contents, they have to keep the page "busy". 8111 * Otherwise, the page could get recycled or re-encrypted 8112 * at any time. 8113 */ 8114 if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) && 8115 dst_page->busy) { 8116 /* 8117 * The page is stable enough to be accessed by 8118 * the caller, so make sure its contents are 8119 * not encrypted. 8120 */ 8121 vm_page_decrypt(dst_page, 0); 8122 } else { 8123 /* 8124 * The page is not busy, so don't bother 8125 * decrypting it, since anything could 8126 * happen to it between now and when the 8127 * caller wants to access it. 8128 * We should not give the caller access 8129 * to this page. 8130 */ 8131 assert(!phys_entry); 8132 } 8133 } 8134 8135 if (phys_entry) { 8136 /* 8137 * The physical page number will remain valid 8138 * only if the page is kept busy. 8139 * ENCRYPTED SWAP: make sure we don't let the 8140 * caller access an encrypted page. 8141 */ 8142 assert(dst_page->busy); 8143 assert(!dst_page->encrypted); 8144 *phys_entry = dst_page->phys_page; 8145 } 8146 8147 break; 8148 } 8149 8150 vm_object_unlock(object); 8151 return KERN_SUCCESS; 8152 8153} 8154 8155/* 8156 * vm_object_range_op offers performance enhancement over 8157 * vm_object_page_op for page_op functions which do not require page 8158 * level state to be returned from the call. Page_op was created to provide 8159 * a low-cost alternative to page manipulation via UPLs when only a single 8160 * page was involved. The range_op call establishes the ability in the _op 8161 * family of functions to work on multiple pages where the lack of page level 8162 * state handling allows the caller to avoid the overhead of the upl structures. 8163 */ 8164 8165kern_return_t 8166vm_object_range_op( 8167 vm_object_t object, 8168 vm_object_offset_t offset_beg, 8169 vm_object_offset_t offset_end, 8170 int ops, 8171 uint32_t *range) 8172{ 8173 vm_object_offset_t offset; 8174 vm_page_t dst_page; 8175 8176 if (offset_end - offset_beg > (uint32_t) -1) { 8177 /* range is too big and would overflow "*range" */ 8178 return KERN_INVALID_ARGUMENT; 8179 } 8180 if (object->resident_page_count == 0) { 8181 if (range) { 8182 if (ops & UPL_ROP_PRESENT) { 8183 *range = 0; 8184 } else { 8185 *range = (uint32_t) (offset_end - offset_beg); 8186 assert(*range == (offset_end - offset_beg)); 8187 } 8188 } 8189 return KERN_SUCCESS; 8190 } 8191 vm_object_lock(object); 8192 8193 if (object->phys_contiguous) { 8194 vm_object_unlock(object); 8195 return KERN_INVALID_OBJECT; 8196 } 8197 8198 offset = offset_beg & ~PAGE_MASK_64; 8199 8200 while (offset < offset_end) { 8201 dst_page = vm_page_lookup(object, offset); 8202 if (dst_page != VM_PAGE_NULL) { 8203 if (ops & UPL_ROP_DUMP) { 8204 if (dst_page->busy || dst_page->cleaning) { 8205 /* 8206 * someone else is playing with the 8207 * page, we will have to wait 8208 */ 8209 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 8210 /* 8211 * need to relook the page up since it's 8212 * state may have changed while we slept 8213 * it might even belong to a different object 8214 * at this point 8215 */ 8216 continue; 8217 } 8218 if (dst_page->laundry) { 8219 dst_page->pageout = FALSE; 8220 8221 vm_pageout_steal_laundry(dst_page, FALSE); 8222 } 8223 if (dst_page->pmapped == TRUE) 8224 pmap_disconnect(dst_page->phys_page); 8225 8226 VM_PAGE_FREE(dst_page); 8227 8228 } else if ((ops & UPL_ROP_ABSENT) && !dst_page->absent) 8229 break; 8230 } else if (ops & UPL_ROP_PRESENT) 8231 break; 8232 8233 offset += PAGE_SIZE; 8234 } 8235 vm_object_unlock(object); 8236 8237 if (range) { 8238 if (offset > offset_end) 8239 offset = offset_end; 8240 if(offset > offset_beg) { 8241 *range = (uint32_t) (offset - offset_beg); 8242 assert(*range == (offset - offset_beg)); 8243 } else { 8244 *range = 0; 8245 } 8246 } 8247 return KERN_SUCCESS; 8248} 8249 8250/* 8251 * Used to point a pager directly to a range of memory (when the pager may be associated 8252 * with a non-device vnode). Takes a virtual address, an offset, and a size. We currently 8253 * expect that the virtual address will denote the start of a range that is physically contiguous. 8254 */ 8255kern_return_t pager_map_to_phys_contiguous( 8256 memory_object_control_t object, 8257 memory_object_offset_t offset, 8258 addr64_t base_vaddr, 8259 vm_size_t size) 8260{ 8261 ppnum_t page_num; 8262 boolean_t clobbered_private; 8263 kern_return_t retval; 8264 vm_object_t pager_object; 8265 8266 page_num = pmap_find_phys(kernel_pmap, base_vaddr); 8267 8268 if (!page_num) { 8269 retval = KERN_FAILURE; 8270 goto out; 8271 } 8272 8273 pager_object = memory_object_control_to_vm_object(object); 8274 8275 if (!pager_object) { 8276 retval = KERN_FAILURE; 8277 goto out; 8278 } 8279 8280 clobbered_private = pager_object->private; 8281 pager_object->private = TRUE; 8282 retval = vm_object_populate_with_private(pager_object, offset, page_num, size); 8283 8284 if (retval != KERN_SUCCESS) 8285 pager_object->private = clobbered_private; 8286 8287out: 8288 return retval; 8289} 8290 8291uint32_t scan_object_collision = 0; 8292 8293void 8294vm_object_lock(vm_object_t object) 8295{ 8296 if (object == vm_pageout_scan_wants_object) { 8297 scan_object_collision++; 8298 mutex_pause(2); 8299 } 8300 lck_rw_lock_exclusive(&object->Lock); 8301} 8302 8303boolean_t 8304vm_object_lock_avoid(vm_object_t object) 8305{ 8306 if (object == vm_pageout_scan_wants_object) { 8307 scan_object_collision++; 8308 return TRUE; 8309 } 8310 return FALSE; 8311} 8312 8313boolean_t 8314_vm_object_lock_try(vm_object_t object) 8315{ 8316 return (lck_rw_try_lock_exclusive(&object->Lock)); 8317} 8318 8319boolean_t 8320vm_object_lock_try(vm_object_t object) 8321{ 8322 /* 8323 * Called from hibernate path so check before blocking. 8324 */ 8325 if (vm_object_lock_avoid(object) && ml_get_interrupts_enabled() && get_preemption_level()==0) { 8326 mutex_pause(2); 8327 } 8328 return _vm_object_lock_try(object); 8329} 8330 8331void 8332vm_object_lock_shared(vm_object_t object) 8333{ 8334 if (vm_object_lock_avoid(object)) { 8335 mutex_pause(2); 8336 } 8337 lck_rw_lock_shared(&object->Lock); 8338} 8339 8340boolean_t 8341vm_object_lock_try_shared(vm_object_t object) 8342{ 8343 if (vm_object_lock_avoid(object)) { 8344 mutex_pause(2); 8345 } 8346 return (lck_rw_try_lock_shared(&object->Lock)); 8347} 8348 8349 8350unsigned int vm_object_change_wimg_mode_count = 0; 8351 8352/* 8353 * The object must be locked 8354 */ 8355void 8356vm_object_change_wimg_mode(vm_object_t object, unsigned int wimg_mode) 8357{ 8358 vm_page_t p; 8359 8360 vm_object_lock_assert_exclusive(object); 8361 8362 vm_object_paging_wait(object, THREAD_UNINT); 8363 8364 queue_iterate(&object->memq, p, vm_page_t, listq) { 8365 8366 if (!p->fictitious) 8367 pmap_set_cache_attributes(p->phys_page, wimg_mode); 8368 } 8369 if (wimg_mode == VM_WIMG_USE_DEFAULT) 8370 object->set_cache_attr = FALSE; 8371 else 8372 object->set_cache_attr = TRUE; 8373 8374 object->wimg_bits = wimg_mode; 8375 8376 vm_object_change_wimg_mode_count++; 8377} 8378 8379#if CONFIG_FREEZE 8380 8381kern_return_t vm_object_pack( 8382 unsigned int *purgeable_count, 8383 unsigned int *wired_count, 8384 unsigned int *clean_count, 8385 unsigned int *dirty_count, 8386 unsigned int dirty_budget, 8387 boolean_t *shared, 8388 vm_object_t src_object, 8389 struct default_freezer_handle *df_handle) 8390{ 8391 kern_return_t kr = KERN_SUCCESS; 8392 8393 vm_object_lock(src_object); 8394 8395 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; 8396 *shared = FALSE; 8397 8398 if (!src_object->alive || src_object->terminating){ 8399 kr = KERN_FAILURE; 8400 goto done; 8401 } 8402 8403 if (src_object->purgable == VM_PURGABLE_VOLATILE) { 8404 *purgeable_count = src_object->resident_page_count; 8405 8406 /* If the default freezer handle is null, we're just walking the pages to discover how many can be hibernated */ 8407 if (df_handle != NULL) { 8408 purgeable_q_t queue; 8409 /* object should be on a queue */ 8410 assert(src_object->objq.next != NULL && 8411 src_object->objq.prev != NULL); 8412 8413 queue = vm_purgeable_object_remove(src_object); 8414 assert(queue); 8415 if (src_object->purgeable_when_ripe) { 8416 vm_page_lock_queues(); 8417 vm_purgeable_token_delete_first(queue); 8418 vm_page_unlock_queues(); 8419 } 8420 8421 vm_object_purge(src_object, 0); 8422 assert(src_object->purgable == VM_PURGABLE_EMPTY); 8423 8424 /* 8425 * This object was "volatile" so its pages must have 8426 * already been accounted as "volatile": no change 8427 * in accounting now that it's "empty". 8428 */ 8429 } 8430 goto done; 8431 } 8432 8433 if (src_object->ref_count == 1) { 8434 vm_object_pack_pages(wired_count, clean_count, dirty_count, dirty_budget, src_object, df_handle); 8435 } else { 8436 if (src_object->internal) { 8437 *shared = TRUE; 8438 } 8439 } 8440done: 8441 vm_object_unlock(src_object); 8442 8443 return kr; 8444} 8445 8446 8447void 8448vm_object_pack_pages( 8449 unsigned int *wired_count, 8450 unsigned int *clean_count, 8451 unsigned int *dirty_count, 8452 unsigned int dirty_budget, 8453 vm_object_t src_object, 8454 struct default_freezer_handle *df_handle) 8455{ 8456 vm_page_t p, next; 8457 8458 next = (vm_page_t)queue_first(&src_object->memq); 8459 8460 while (!queue_end(&src_object->memq, (queue_entry_t)next)) { 8461 p = next; 8462 next = (vm_page_t)queue_next(&next->listq); 8463 8464 /* Finish up if we've hit our pageout limit */ 8465 if (dirty_budget && (dirty_budget == *dirty_count)) { 8466 break; 8467 } 8468 assert(!p->laundry); 8469 8470 if (p->fictitious || p->busy ) 8471 continue; 8472 8473 if (p->absent || p->unusual || p->error) 8474 continue; 8475 8476 if (VM_PAGE_WIRED(p)) { 8477 (*wired_count)++; 8478 continue; 8479 } 8480 8481 if (df_handle == NULL) { 8482 if (p->dirty || pmap_is_modified(p->phys_page)) { 8483 (*dirty_count)++; 8484 } else { 8485 (*clean_count)++; 8486 } 8487 continue; 8488 } 8489 8490 if (p->cleaning) { 8491 p->pageout = TRUE; 8492 continue; 8493 } 8494 8495 if (p->pmapped == TRUE) { 8496 int refmod_state; 8497 refmod_state = pmap_disconnect(p->phys_page); 8498 if (refmod_state & VM_MEM_MODIFIED) { 8499 SET_PAGE_DIRTY(p, FALSE); 8500 } 8501 } 8502 8503 if (p->dirty) { 8504 default_freezer_pack_page(p, df_handle); 8505 (*dirty_count)++; 8506 } 8507 else { 8508 VM_PAGE_FREE(p); 8509 (*clean_count)++; 8510 } 8511 } 8512} 8513 8514void 8515vm_object_pageout( 8516 vm_object_t object) 8517{ 8518 vm_page_t p, next; 8519 struct vm_pageout_queue *iq; 8520 boolean_t set_pageout_bit = FALSE; 8521 8522 iq = &vm_pageout_queue_internal; 8523 8524 assert(object != VM_OBJECT_NULL ); 8525 8526 vm_object_lock(object); 8527 8528 if (DEFAULT_PAGER_IS_ACTIVE || DEFAULT_FREEZER_IS_ACTIVE) { 8529 if (!object->pager_initialized) { 8530 /* 8531 * If there is no memory object for the page, create 8532 * one and hand it to the default pager. 8533 */ 8534 vm_object_pager_create(object); 8535 } 8536 8537 set_pageout_bit = TRUE; 8538 } 8539 8540 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 8541 8542 set_pageout_bit = FALSE; 8543 } 8544 8545ReScan: 8546 next = (vm_page_t)queue_first(&object->memq); 8547 8548 while (!queue_end(&object->memq, (queue_entry_t)next)) { 8549 p = next; 8550 next = (vm_page_t)queue_next(&next->listq); 8551 8552 /* Throw to the pageout queue */ 8553 vm_page_lockspin_queues(); 8554 8555 /* 8556 * see if page is already in the process of 8557 * being cleaned... if so, leave it alone 8558 */ 8559 if (!p->laundry) { 8560 8561 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 8562 8563 if (VM_PAGE_Q_THROTTLED(iq)) { 8564 8565 iq->pgo_draining = TRUE; 8566 8567 assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE); 8568 vm_page_unlock_queues(); 8569 vm_object_unlock(object); 8570 8571 thread_block(THREAD_CONTINUE_NULL); 8572 8573 vm_object_lock(object); 8574 goto ReScan; 8575 } 8576 8577 if (p->fictitious || p->busy ) { 8578 vm_page_unlock_queues(); 8579 continue; 8580 } 8581 8582 if (p->absent || p->unusual || p->error || VM_PAGE_WIRED(p)) { 8583 vm_page_unlock_queues(); 8584 continue; 8585 } 8586 8587 if (p->cleaning) { 8588 p->pageout = TRUE; 8589 vm_page_unlock_queues(); 8590 continue; 8591 } 8592 8593 if (p->pmapped == TRUE) { 8594 int refmod_state; 8595 refmod_state = pmap_disconnect_options(p->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL); 8596 if (refmod_state & VM_MEM_MODIFIED) { 8597 SET_PAGE_DIRTY(p, FALSE); 8598 } 8599 } 8600 8601 if (p->dirty == FALSE) { 8602 vm_page_unlock_queues(); 8603 VM_PAGE_FREE(p); 8604 continue; 8605 } 8606 } 8607 8608 VM_PAGE_QUEUES_REMOVE(p); 8609 vm_pageout_cluster(p, set_pageout_bit); 8610 } 8611 vm_page_unlock_queues(); 8612 } 8613 8614 vm_object_unlock(object); 8615} 8616 8617kern_return_t 8618vm_object_pagein( 8619 vm_object_t object) 8620{ 8621 memory_object_t pager; 8622 kern_return_t kr; 8623 8624 vm_object_lock(object); 8625 8626 pager = object->pager; 8627 8628 if (!object->pager_ready || pager == MEMORY_OBJECT_NULL) { 8629 vm_object_unlock(object); 8630 return KERN_FAILURE; 8631 } 8632 8633 vm_object_paging_wait(object, THREAD_UNINT); 8634 vm_object_paging_begin(object); 8635 8636 object->blocked_access = TRUE; 8637 vm_object_unlock(object); 8638 8639 kr = memory_object_data_reclaim(pager, TRUE); 8640 8641 vm_object_lock(object); 8642 8643 object->blocked_access = FALSE; 8644 vm_object_paging_end(object); 8645 8646 vm_object_unlock(object); 8647 8648 return kr; 8649} 8650#endif /* CONFIG_FREEZE */ 8651 8652 8653#if CONFIG_IOSCHED 8654void 8655vm_page_request_reprioritize(vm_object_t o, uint64_t blkno, uint32_t len, int prio) 8656{ 8657 io_reprioritize_req_t req; 8658 struct vnode *devvp = NULL; 8659 8660 if(vnode_pager_get_object_devvp(o->pager, (uintptr_t *)&devvp) != KERN_SUCCESS) 8661 return; 8662 8663 /* Create the request for I/O reprioritization */ 8664 req = (io_reprioritize_req_t)zalloc(io_reprioritize_req_zone); 8665 assert(req != NULL); 8666 req->blkno = blkno; 8667 req->len = len; 8668 req->priority = prio; 8669 req->devvp = devvp; 8670 8671 /* Insert request into the reprioritization list */ 8672 IO_REPRIORITIZE_LIST_LOCK(); 8673 queue_enter(&io_reprioritize_list, req, io_reprioritize_req_t, io_reprioritize_list); 8674 IO_REPRIORITIZE_LIST_UNLOCK(); 8675 8676 /* Wakeup reprioritize thread */ 8677 IO_REPRIO_THREAD_WAKEUP(); 8678 8679 return; 8680} 8681 8682void 8683vm_decmp_upl_reprioritize(upl_t upl, int prio) 8684{ 8685 int offset; 8686 vm_object_t object; 8687 io_reprioritize_req_t req; 8688 struct vnode *devvp = NULL; 8689 uint64_t blkno; 8690 uint32_t len; 8691 upl_t io_upl; 8692 uint64_t *io_upl_reprio_info; 8693 int io_upl_size; 8694 8695 if ((upl->flags & UPL_TRACKED_BY_OBJECT) == 0 || (upl->flags & UPL_EXPEDITE_SUPPORTED) == 0) 8696 return; 8697 8698 /* 8699 * We dont want to perform any allocations with the upl lock held since that might 8700 * result in a deadlock. If the system is low on memory, the pageout thread would 8701 * try to pageout stuff and might wait on this lock. If we are waiting for the memory to 8702 * be freed up by the pageout thread, it would be a deadlock. 8703 */ 8704 8705 8706 /* First step is just to get the size of the upl to find out how big the reprio info is */ 8707 upl_lock(upl); 8708 if (upl->decmp_io_upl == NULL) { 8709 /* The real I/O upl was destroyed by the time we came in here. Nothing to do. */ 8710 upl_unlock(upl); 8711 return; 8712 } 8713 8714 io_upl = upl->decmp_io_upl; 8715 assert((io_upl->flags & UPL_DECMP_REAL_IO) != 0); 8716 io_upl_size = io_upl->size; 8717 upl_unlock(upl); 8718 8719 /* Now perform the allocation */ 8720 io_upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * (io_upl_size / PAGE_SIZE)); 8721 if (io_upl_reprio_info == NULL) 8722 return; 8723 8724 /* Now again take the lock, recheck the state and grab out the required info */ 8725 upl_lock(upl); 8726 if (upl->decmp_io_upl == NULL || upl->decmp_io_upl != io_upl) { 8727 /* The real I/O upl was destroyed by the time we came in here. Nothing to do. */ 8728 upl_unlock(upl); 8729 goto out; 8730 } 8731 memcpy(io_upl_reprio_info, io_upl->upl_reprio_info, sizeof(uint64_t) * (io_upl_size / PAGE_SIZE)); 8732 8733 /* Get the VM object for this UPL */ 8734 if (io_upl->flags & UPL_SHADOWED) { 8735 object = io_upl->map_object->shadow; 8736 } else { 8737 object = io_upl->map_object; 8738 } 8739 8740 /* Get the dev vnode ptr for this object */ 8741 if(!object || !object->pager || 8742 vnode_pager_get_object_devvp(object->pager, (uintptr_t *)&devvp) != KERN_SUCCESS) { 8743 upl_unlock(upl); 8744 goto out; 8745 } 8746 8747 upl_unlock(upl); 8748 8749 /* Now we have all the information needed to do the expedite */ 8750 8751 offset = 0; 8752 while (offset < io_upl_size) { 8753 blkno = io_upl_reprio_info[(offset / PAGE_SIZE)] & UPL_REPRIO_INFO_MASK; 8754 len = (io_upl_reprio_info[(offset / PAGE_SIZE)] >> UPL_REPRIO_INFO_SHIFT) & UPL_REPRIO_INFO_MASK; 8755 8756 /* 8757 * This implementation may cause some spurious expedites due to the 8758 * fact that we dont cleanup the blkno & len from the upl_reprio_info 8759 * even after the I/O is complete. 8760 */ 8761 8762 if (blkno != 0 && len != 0) { 8763 /* Create the request for I/O reprioritization */ 8764 req = (io_reprioritize_req_t)zalloc(io_reprioritize_req_zone); 8765 assert(req != NULL); 8766 req->blkno = blkno; 8767 req->len = len; 8768 req->priority = prio; 8769 req->devvp = devvp; 8770 8771 /* Insert request into the reprioritization list */ 8772 IO_REPRIORITIZE_LIST_LOCK(); 8773 queue_enter(&io_reprioritize_list, req, io_reprioritize_req_t, io_reprioritize_list); 8774 IO_REPRIORITIZE_LIST_UNLOCK(); 8775 8776 offset += len; 8777 } else { 8778 offset += PAGE_SIZE; 8779 } 8780 } 8781 8782 /* Wakeup reprioritize thread */ 8783 IO_REPRIO_THREAD_WAKEUP(); 8784 8785out: 8786 kfree(io_upl_reprio_info, sizeof(uint64_t) * (io_upl_size / PAGE_SIZE)); 8787 return; 8788} 8789 8790void 8791vm_page_handle_prio_inversion(vm_object_t o, vm_page_t m) 8792{ 8793 upl_t upl; 8794 upl_page_info_t *pl; 8795 unsigned int i, num_pages; 8796 int cur_tier; 8797 8798 cur_tier = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO); 8799 8800 /* 8801 Scan through all UPLs associated with the object to find the 8802 UPL containing the contended page. 8803 */ 8804 queue_iterate(&o->uplq, upl, upl_t, uplq) { 8805 if (((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0) || upl->upl_priority <= cur_tier) 8806 continue; 8807 pl = UPL_GET_INTERNAL_PAGE_LIST(upl); 8808 num_pages = (upl->size / PAGE_SIZE); 8809 8810 /* 8811 For each page in the UPL page list, see if it matches the contended 8812 page and was issued as a low prio I/O. 8813 */ 8814 for(i=0; i < num_pages; i++) { 8815 if(UPL_PAGE_PRESENT(pl,i) && m->phys_page == pl[i].phys_addr) { 8816 if ((upl->flags & UPL_DECMP_REQ) && upl->decmp_io_upl) { 8817 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_EXPEDITE)) | DBG_FUNC_NONE, upl->upl_creator, m, upl, upl->upl_priority, 0); 8818 vm_decmp_upl_reprioritize(upl, cur_tier); 8819 break; 8820 } 8821 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_EXPEDITE)) | DBG_FUNC_NONE, upl->upl_creator, m, upl->upl_reprio_info[i], upl->upl_priority, 0); 8822 if (UPL_REPRIO_INFO_BLKNO(upl, i) != 0 && UPL_REPRIO_INFO_LEN(upl, i) != 0) 8823 vm_page_request_reprioritize(o, UPL_REPRIO_INFO_BLKNO(upl, i), UPL_REPRIO_INFO_LEN(upl, i), cur_tier); 8824 break; 8825 } 8826 } 8827 /* Check if we found any hits */ 8828 if (i != num_pages) 8829 break; 8830 } 8831 8832 return; 8833} 8834 8835wait_result_t 8836vm_page_sleep(vm_object_t o, vm_page_t m, int interruptible) 8837{ 8838 wait_result_t ret; 8839 8840 KERNEL_DEBUG((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_SLEEP)) | DBG_FUNC_START, o, m, 0, 0, 0); 8841 8842 if (o->io_tracking && ((m->busy == TRUE) || (m->cleaning == TRUE) || VM_PAGE_WIRED(m))) { 8843 /* 8844 Indicates page is busy due to an I/O. Issue a reprioritize request if necessary. 8845 */ 8846 vm_page_handle_prio_inversion(o,m); 8847 } 8848 m->wanted = TRUE; 8849 ret = thread_sleep_vm_object(o, m, interruptible); 8850 KERNEL_DEBUG((MACHDBG_CODE(DBG_MACH_VM, VM_PAGE_SLEEP)) | DBG_FUNC_END, o, m, 0, 0, 0); 8851 return ret; 8852} 8853 8854static void 8855io_reprioritize_thread(void *param __unused, wait_result_t wr __unused) 8856{ 8857 io_reprioritize_req_t req = NULL; 8858 8859 while(1) { 8860 8861 IO_REPRIORITIZE_LIST_LOCK(); 8862 if (queue_empty(&io_reprioritize_list)) { 8863 IO_REPRIORITIZE_LIST_UNLOCK(); 8864 break; 8865 } 8866 8867 queue_remove_first(&io_reprioritize_list, req, io_reprioritize_req_t, io_reprioritize_list); 8868 IO_REPRIORITIZE_LIST_UNLOCK(); 8869 8870 vnode_pager_issue_reprioritize_io(req->devvp, req->blkno, req->len, req->priority); 8871 zfree(io_reprioritize_req_zone, req); 8872 } 8873 8874 IO_REPRIO_THREAD_CONTINUATION(); 8875} 8876#endif 8877