1/* 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_page.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * 62 * Resident memory management module. 63 */ 64 65#include <debug.h> 66#include <libkern/OSAtomic.h> 67 68#include <mach/clock_types.h> 69#include <mach/vm_prot.h> 70#include <mach/vm_statistics.h> 71#include <mach/sdt.h> 72#include <kern/counters.h> 73#include <kern/sched_prim.h> 74#include <kern/task.h> 75#include <kern/thread.h> 76#include <kern/kalloc.h> 77#include <kern/zalloc.h> 78#include <kern/xpr.h> 79#include <kern/ledger.h> 80#include <vm/pmap.h> 81#include <vm/vm_init.h> 82#include <vm/vm_map.h> 83#include <vm/vm_page.h> 84#include <vm/vm_pageout.h> 85#include <vm/vm_kern.h> /* kernel_memory_allocate() */ 86#include <kern/misc_protos.h> 87#include <zone_debug.h> 88#include <vm/cpm.h> 89#include <pexpert/pexpert.h> 90 91#include <vm/vm_protos.h> 92#include <vm/memory_object.h> 93#include <vm/vm_purgeable_internal.h> 94#include <vm/vm_compressor.h> 95 96#if CONFIG_PHANTOM_CACHE 97#include <vm/vm_phantom_cache.h> 98#endif 99 100#include <IOKit/IOHibernatePrivate.h> 101 102#include <sys/kdebug.h> 103 104boolean_t hibernate_cleaning_in_progress = FALSE; 105boolean_t vm_page_free_verify = TRUE; 106 107uint32_t vm_lopage_free_count = 0; 108uint32_t vm_lopage_free_limit = 0; 109uint32_t vm_lopage_lowater = 0; 110boolean_t vm_lopage_refill = FALSE; 111boolean_t vm_lopage_needed = FALSE; 112 113lck_mtx_ext_t vm_page_queue_lock_ext; 114lck_mtx_ext_t vm_page_queue_free_lock_ext; 115lck_mtx_ext_t vm_purgeable_queue_lock_ext; 116 117int speculative_age_index = 0; 118int speculative_steal_index = 0; 119struct vm_speculative_age_q vm_page_queue_speculative[VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1]; 120 121 122__private_extern__ void vm_page_init_lck_grp(void); 123 124static void vm_page_free_prepare(vm_page_t page); 125static vm_page_t vm_page_grab_fictitious_common(ppnum_t phys_addr); 126 127 128 129 130/* 131 * Associated with page of user-allocatable memory is a 132 * page structure. 133 */ 134 135/* 136 * These variables record the values returned by vm_page_bootstrap, 137 * for debugging purposes. The implementation of pmap_steal_memory 138 * and pmap_startup here also uses them internally. 139 */ 140 141vm_offset_t virtual_space_start; 142vm_offset_t virtual_space_end; 143uint32_t vm_page_pages; 144 145/* 146 * The vm_page_lookup() routine, which provides for fast 147 * (virtual memory object, offset) to page lookup, employs 148 * the following hash table. The vm_page_{insert,remove} 149 * routines install and remove associations in the table. 150 * [This table is often called the virtual-to-physical, 151 * or VP, table.] 152 */ 153typedef struct { 154 vm_page_packed_t page_list; 155#if MACH_PAGE_HASH_STATS 156 int cur_count; /* current count */ 157 int hi_count; /* high water mark */ 158#endif /* MACH_PAGE_HASH_STATS */ 159} vm_page_bucket_t; 160 161 162#define BUCKETS_PER_LOCK 16 163 164vm_page_bucket_t *vm_page_buckets; /* Array of buckets */ 165unsigned int vm_page_bucket_count = 0; /* How big is array? */ 166unsigned int vm_page_hash_mask; /* Mask for hash function */ 167unsigned int vm_page_hash_shift; /* Shift for hash function */ 168uint32_t vm_page_bucket_hash; /* Basic bucket hash */ 169unsigned int vm_page_bucket_lock_count = 0; /* How big is array of locks? */ 170 171lck_spin_t *vm_page_bucket_locks; 172 173#if VM_PAGE_BUCKETS_CHECK 174boolean_t vm_page_buckets_check_ready = FALSE; 175#if VM_PAGE_FAKE_BUCKETS 176vm_page_bucket_t *vm_page_fake_buckets; /* decoy buckets */ 177vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end; 178#endif /* VM_PAGE_FAKE_BUCKETS */ 179#endif /* VM_PAGE_BUCKETS_CHECK */ 180 181#if MACH_PAGE_HASH_STATS 182/* This routine is only for debug. It is intended to be called by 183 * hand by a developer using a kernel debugger. This routine prints 184 * out vm_page_hash table statistics to the kernel debug console. 185 */ 186void 187hash_debug(void) 188{ 189 int i; 190 int numbuckets = 0; 191 int highsum = 0; 192 int maxdepth = 0; 193 194 for (i = 0; i < vm_page_bucket_count; i++) { 195 if (vm_page_buckets[i].hi_count) { 196 numbuckets++; 197 highsum += vm_page_buckets[i].hi_count; 198 if (vm_page_buckets[i].hi_count > maxdepth) 199 maxdepth = vm_page_buckets[i].hi_count; 200 } 201 } 202 printf("Total number of buckets: %d\n", vm_page_bucket_count); 203 printf("Number used buckets: %d = %d%%\n", 204 numbuckets, 100*numbuckets/vm_page_bucket_count); 205 printf("Number unused buckets: %d = %d%%\n", 206 vm_page_bucket_count - numbuckets, 207 100*(vm_page_bucket_count-numbuckets)/vm_page_bucket_count); 208 printf("Sum of bucket max depth: %d\n", highsum); 209 printf("Average bucket depth: %d.%2d\n", 210 highsum/vm_page_bucket_count, 211 highsum%vm_page_bucket_count); 212 printf("Maximum bucket depth: %d\n", maxdepth); 213} 214#endif /* MACH_PAGE_HASH_STATS */ 215 216/* 217 * The virtual page size is currently implemented as a runtime 218 * variable, but is constant once initialized using vm_set_page_size. 219 * This initialization must be done in the machine-dependent 220 * bootstrap sequence, before calling other machine-independent 221 * initializations. 222 * 223 * All references to the virtual page size outside this 224 * module must use the PAGE_SIZE, PAGE_MASK and PAGE_SHIFT 225 * constants. 226 */ 227vm_size_t page_size = PAGE_SIZE; 228vm_size_t page_mask = PAGE_MASK; 229int page_shift = PAGE_SHIFT; 230 231/* 232 * Resident page structures are initialized from 233 * a template (see vm_page_alloc). 234 * 235 * When adding a new field to the virtual memory 236 * object structure, be sure to add initialization 237 * (see vm_page_bootstrap). 238 */ 239struct vm_page vm_page_template; 240 241vm_page_t vm_pages = VM_PAGE_NULL; 242unsigned int vm_pages_count = 0; 243ppnum_t vm_page_lowest = 0; 244 245/* 246 * Resident pages that represent real memory 247 * are allocated from a set of free lists, 248 * one per color. 249 */ 250unsigned int vm_colors; 251unsigned int vm_color_mask; /* mask is == (vm_colors-1) */ 252unsigned int vm_cache_geometry_colors = 0; /* set by hw dependent code during startup */ 253unsigned int vm_free_magazine_refill_limit = 0; 254queue_head_t vm_page_queue_free[MAX_COLORS]; 255unsigned int vm_page_free_wanted; 256unsigned int vm_page_free_wanted_privileged; 257unsigned int vm_page_free_count; 258unsigned int vm_page_fictitious_count; 259 260/* 261 * Occasionally, the virtual memory system uses 262 * resident page structures that do not refer to 263 * real pages, for example to leave a page with 264 * important state information in the VP table. 265 * 266 * These page structures are allocated the way 267 * most other kernel structures are. 268 */ 269zone_t vm_page_zone; 270vm_locks_array_t vm_page_locks; 271decl_lck_mtx_data(,vm_page_alloc_lock) 272lck_mtx_ext_t vm_page_alloc_lock_ext; 273 274unsigned int io_throttle_zero_fill; 275 276unsigned int vm_page_local_q_count = 0; 277unsigned int vm_page_local_q_soft_limit = 250; 278unsigned int vm_page_local_q_hard_limit = 500; 279struct vplq *vm_page_local_q = NULL; 280 281/* N.B. Guard and fictitious pages must not 282 * be assigned a zero phys_page value. 283 */ 284/* 285 * Fictitious pages don't have a physical address, 286 * but we must initialize phys_page to something. 287 * For debugging, this should be a strange value 288 * that the pmap module can recognize in assertions. 289 */ 290ppnum_t vm_page_fictitious_addr = (ppnum_t) -1; 291 292/* 293 * Guard pages are not accessible so they don't 294 * need a physical address, but we need to enter 295 * one in the pmap. 296 * Let's make it recognizable and make sure that 297 * we don't use a real physical page with that 298 * physical address. 299 */ 300ppnum_t vm_page_guard_addr = (ppnum_t) -2; 301 302/* 303 * Resident page structures are also chained on 304 * queues that are used by the page replacement 305 * system (pageout daemon). These queues are 306 * defined here, but are shared by the pageout 307 * module. The inactive queue is broken into 308 * file backed and anonymous for convenience as the 309 * pageout daemon often assignes a higher 310 * importance to anonymous pages (less likely to pick) 311 */ 312queue_head_t vm_page_queue_active; 313queue_head_t vm_page_queue_inactive; 314queue_head_t vm_page_queue_anonymous; /* inactive memory queue for anonymous pages */ 315queue_head_t vm_page_queue_throttled; 316 317unsigned int vm_page_active_count; 318unsigned int vm_page_inactive_count; 319unsigned int vm_page_anonymous_count; 320unsigned int vm_page_throttled_count; 321unsigned int vm_page_speculative_count; 322unsigned int vm_page_wire_count; 323unsigned int vm_page_wire_count_initial; 324unsigned int vm_page_gobble_count = 0; 325 326#define VM_PAGE_WIRE_COUNT_WARNING 0 327#define VM_PAGE_GOBBLE_COUNT_WARNING 0 328 329unsigned int vm_page_purgeable_count = 0; /* # of pages purgeable now */ 330unsigned int vm_page_purgeable_wired_count = 0; /* # of purgeable pages that are wired now */ 331uint64_t vm_page_purged_count = 0; /* total count of purged pages */ 332 333unsigned int vm_page_xpmapped_external_count = 0; 334unsigned int vm_page_external_count = 0; 335unsigned int vm_page_internal_count = 0; 336unsigned int vm_page_pageable_external_count = 0; 337unsigned int vm_page_pageable_internal_count = 0; 338 339#if DEVELOPMENT || DEBUG 340unsigned int vm_page_speculative_recreated = 0; 341unsigned int vm_page_speculative_created = 0; 342unsigned int vm_page_speculative_used = 0; 343#endif 344 345queue_head_t vm_page_queue_cleaned; 346 347unsigned int vm_page_cleaned_count = 0; 348unsigned int vm_pageout_enqueued_cleaned = 0; 349 350uint64_t max_valid_dma_address = 0xffffffffffffffffULL; 351ppnum_t max_valid_low_ppnum = 0xffffffff; 352 353 354/* 355 * Several page replacement parameters are also 356 * shared with this module, so that page allocation 357 * (done here in vm_page_alloc) can trigger the 358 * pageout daemon. 359 */ 360unsigned int vm_page_free_target = 0; 361unsigned int vm_page_free_min = 0; 362unsigned int vm_page_throttle_limit = 0; 363uint32_t vm_page_creation_throttle = 0; 364unsigned int vm_page_inactive_target = 0; 365unsigned int vm_page_anonymous_min = 0; 366unsigned int vm_page_inactive_min = 0; 367unsigned int vm_page_free_reserved = 0; 368unsigned int vm_page_throttle_count = 0; 369 370 371/* 372 * The VM system has a couple of heuristics for deciding 373 * that pages are "uninteresting" and should be placed 374 * on the inactive queue as likely candidates for replacement. 375 * These variables let the heuristics be controlled at run-time 376 * to make experimentation easier. 377 */ 378 379boolean_t vm_page_deactivate_hint = TRUE; 380 381struct vm_page_stats_reusable vm_page_stats_reusable; 382 383/* 384 * vm_set_page_size: 385 * 386 * Sets the page size, perhaps based upon the memory 387 * size. Must be called before any use of page-size 388 * dependent functions. 389 * 390 * Sets page_shift and page_mask from page_size. 391 */ 392void 393vm_set_page_size(void) 394{ 395 page_size = PAGE_SIZE; 396 page_mask = PAGE_MASK; 397 page_shift = PAGE_SHIFT; 398 399 if ((page_mask & page_size) != 0) 400 panic("vm_set_page_size: page size not a power of two"); 401 402 for (page_shift = 0; ; page_shift++) 403 if ((1U << page_shift) == page_size) 404 break; 405} 406 407#define COLOR_GROUPS_TO_STEAL 4 408 409 410/* Called once during statup, once the cache geometry is known. 411 */ 412static void 413vm_page_set_colors( void ) 414{ 415 unsigned int n, override; 416 417 if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */ 418 n = override; 419 else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */ 420 n = vm_cache_geometry_colors; 421 else n = DEFAULT_COLORS; /* use default if all else fails */ 422 423 if ( n == 0 ) 424 n = 1; 425 if ( n > MAX_COLORS ) 426 n = MAX_COLORS; 427 428 /* the count must be a power of 2 */ 429 if ( ( n & (n - 1)) != 0 ) 430 panic("vm_page_set_colors"); 431 432 vm_colors = n; 433 vm_color_mask = n - 1; 434 435 vm_free_magazine_refill_limit = vm_colors * COLOR_GROUPS_TO_STEAL; 436} 437 438 439lck_grp_t vm_page_lck_grp_free; 440lck_grp_t vm_page_lck_grp_queue; 441lck_grp_t vm_page_lck_grp_local; 442lck_grp_t vm_page_lck_grp_purge; 443lck_grp_t vm_page_lck_grp_alloc; 444lck_grp_t vm_page_lck_grp_bucket; 445lck_grp_attr_t vm_page_lck_grp_attr; 446lck_attr_t vm_page_lck_attr; 447 448 449__private_extern__ void 450vm_page_init_lck_grp(void) 451{ 452 /* 453 * initialze the vm_page lock world 454 */ 455 lck_grp_attr_setdefault(&vm_page_lck_grp_attr); 456 lck_grp_init(&vm_page_lck_grp_free, "vm_page_free", &vm_page_lck_grp_attr); 457 lck_grp_init(&vm_page_lck_grp_queue, "vm_page_queue", &vm_page_lck_grp_attr); 458 lck_grp_init(&vm_page_lck_grp_local, "vm_page_queue_local", &vm_page_lck_grp_attr); 459 lck_grp_init(&vm_page_lck_grp_purge, "vm_page_purge", &vm_page_lck_grp_attr); 460 lck_grp_init(&vm_page_lck_grp_alloc, "vm_page_alloc", &vm_page_lck_grp_attr); 461 lck_grp_init(&vm_page_lck_grp_bucket, "vm_page_bucket", &vm_page_lck_grp_attr); 462 lck_attr_setdefault(&vm_page_lck_attr); 463 lck_mtx_init_ext(&vm_page_alloc_lock, &vm_page_alloc_lock_ext, &vm_page_lck_grp_alloc, &vm_page_lck_attr); 464 465 vm_compressor_init_locks(); 466} 467 468void 469vm_page_init_local_q() 470{ 471 unsigned int num_cpus; 472 unsigned int i; 473 struct vplq *t_local_q; 474 475 num_cpus = ml_get_max_cpus(); 476 477 /* 478 * no point in this for a uni-processor system 479 */ 480 if (num_cpus >= 2) { 481 t_local_q = (struct vplq *)kalloc(num_cpus * sizeof(struct vplq)); 482 483 for (i = 0; i < num_cpus; i++) { 484 struct vpl *lq; 485 486 lq = &t_local_q[i].vpl_un.vpl; 487 VPL_LOCK_INIT(lq, &vm_page_lck_grp_local, &vm_page_lck_attr); 488 queue_init(&lq->vpl_queue); 489 lq->vpl_count = 0; 490 lq->vpl_internal_count = 0; 491 lq->vpl_external_count = 0; 492 } 493 vm_page_local_q_count = num_cpus; 494 495 vm_page_local_q = (struct vplq *)t_local_q; 496 } 497} 498 499 500/* 501 * vm_page_bootstrap: 502 * 503 * Initializes the resident memory module. 504 * 505 * Allocates memory for the page cells, and 506 * for the object/offset-to-page hash table headers. 507 * Each page cell is initialized and placed on the free list. 508 * Returns the range of available kernel virtual memory. 509 */ 510 511void 512vm_page_bootstrap( 513 vm_offset_t *startp, 514 vm_offset_t *endp) 515{ 516 register vm_page_t m; 517 unsigned int i; 518 unsigned int log1; 519 unsigned int log2; 520 unsigned int size; 521 522 /* 523 * Initialize the vm_page template. 524 */ 525 526 m = &vm_page_template; 527 bzero(m, sizeof (*m)); 528 529 m->pageq.next = NULL; 530 m->pageq.prev = NULL; 531 m->listq.next = NULL; 532 m->listq.prev = NULL; 533 m->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL); 534 535 m->object = VM_OBJECT_NULL; /* reset later */ 536 m->offset = (vm_object_offset_t) -1; /* reset later */ 537 538 m->wire_count = 0; 539 m->local = FALSE; 540 m->inactive = FALSE; 541 m->active = FALSE; 542 m->pageout_queue = FALSE; 543 m->speculative = FALSE; 544 m->laundry = FALSE; 545 m->free = FALSE; 546 m->reference = FALSE; 547 m->gobbled = FALSE; 548 m->private = FALSE; 549 m->throttled = FALSE; 550 m->__unused_pageq_bits = 0; 551 552 m->phys_page = 0; /* reset later */ 553 554 m->busy = TRUE; 555 m->wanted = FALSE; 556 m->tabled = FALSE; 557 m->hashed = FALSE; 558 m->fictitious = FALSE; 559 m->pmapped = FALSE; 560 m->wpmapped = FALSE; 561 m->pageout = FALSE; 562 m->absent = FALSE; 563 m->error = FALSE; 564 m->dirty = FALSE; 565 m->cleaning = FALSE; 566 m->precious = FALSE; 567 m->clustered = FALSE; 568 m->overwriting = FALSE; 569 m->restart = FALSE; 570 m->unusual = FALSE; 571 m->encrypted = FALSE; 572 m->encrypted_cleaning = FALSE; 573 m->cs_validated = FALSE; 574 m->cs_tainted = FALSE; 575 m->no_cache = FALSE; 576 m->reusable = FALSE; 577 m->slid = FALSE; 578 m->xpmapped = FALSE; 579 m->compressor = FALSE; 580 m->written_by_kernel = FALSE; 581 m->__unused_object_bits = 0; 582 583 /* 584 * Initialize the page queues. 585 */ 586 vm_page_init_lck_grp(); 587 588 lck_mtx_init_ext(&vm_page_queue_free_lock, &vm_page_queue_free_lock_ext, &vm_page_lck_grp_free, &vm_page_lck_attr); 589 lck_mtx_init_ext(&vm_page_queue_lock, &vm_page_queue_lock_ext, &vm_page_lck_grp_queue, &vm_page_lck_attr); 590 lck_mtx_init_ext(&vm_purgeable_queue_lock, &vm_purgeable_queue_lock_ext, &vm_page_lck_grp_purge, &vm_page_lck_attr); 591 592 for (i = 0; i < PURGEABLE_Q_TYPE_MAX; i++) { 593 int group; 594 595 purgeable_queues[i].token_q_head = 0; 596 purgeable_queues[i].token_q_tail = 0; 597 for (group = 0; group < NUM_VOLATILE_GROUPS; group++) 598 queue_init(&purgeable_queues[i].objq[group]); 599 600 purgeable_queues[i].type = i; 601 purgeable_queues[i].new_pages = 0; 602#if MACH_ASSERT 603 purgeable_queues[i].debug_count_tokens = 0; 604 purgeable_queues[i].debug_count_objects = 0; 605#endif 606 }; 607 purgeable_nonvolatile_count = 0; 608 queue_init(&purgeable_nonvolatile_queue); 609 610 for (i = 0; i < MAX_COLORS; i++ ) 611 queue_init(&vm_page_queue_free[i]); 612 613 queue_init(&vm_lopage_queue_free); 614 queue_init(&vm_page_queue_active); 615 queue_init(&vm_page_queue_inactive); 616 queue_init(&vm_page_queue_cleaned); 617 queue_init(&vm_page_queue_throttled); 618 queue_init(&vm_page_queue_anonymous); 619 620 for ( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) { 621 queue_init(&vm_page_queue_speculative[i].age_q); 622 623 vm_page_queue_speculative[i].age_ts.tv_sec = 0; 624 vm_page_queue_speculative[i].age_ts.tv_nsec = 0; 625 } 626 vm_page_free_wanted = 0; 627 vm_page_free_wanted_privileged = 0; 628 629 vm_page_set_colors(); 630 631 632 /* 633 * Steal memory for the map and zone subsystems. 634 */ 635 kernel_debug_string("zone_steal_memory"); 636 zone_steal_memory(); 637 kernel_debug_string("vm_map_steal_memory"); 638 vm_map_steal_memory(); 639 640 /* 641 * Allocate (and initialize) the virtual-to-physical 642 * table hash buckets. 643 * 644 * The number of buckets should be a power of two to 645 * get a good hash function. The following computation 646 * chooses the first power of two that is greater 647 * than the number of physical pages in the system. 648 */ 649 650 if (vm_page_bucket_count == 0) { 651 unsigned int npages = pmap_free_pages(); 652 653 vm_page_bucket_count = 1; 654 while (vm_page_bucket_count < npages) 655 vm_page_bucket_count <<= 1; 656 } 657 vm_page_bucket_lock_count = (vm_page_bucket_count + BUCKETS_PER_LOCK - 1) / BUCKETS_PER_LOCK; 658 659 vm_page_hash_mask = vm_page_bucket_count - 1; 660 661 /* 662 * Calculate object shift value for hashing algorithm: 663 * O = log2(sizeof(struct vm_object)) 664 * B = log2(vm_page_bucket_count) 665 * hash shifts the object left by 666 * B/2 - O 667 */ 668 size = vm_page_bucket_count; 669 for (log1 = 0; size > 1; log1++) 670 size /= 2; 671 size = sizeof(struct vm_object); 672 for (log2 = 0; size > 1; log2++) 673 size /= 2; 674 vm_page_hash_shift = log1/2 - log2 + 1; 675 676 vm_page_bucket_hash = 1 << ((log1 + 1) >> 1); /* Get (ceiling of sqrt of table size) */ 677 vm_page_bucket_hash |= 1 << ((log1 + 1) >> 2); /* Get (ceiling of quadroot of table size) */ 678 vm_page_bucket_hash |= 1; /* Set bit and add 1 - always must be 1 to insure unique series */ 679 680 if (vm_page_hash_mask & vm_page_bucket_count) 681 printf("vm_page_bootstrap: WARNING -- strange page hash\n"); 682 683#if VM_PAGE_BUCKETS_CHECK 684#if VM_PAGE_FAKE_BUCKETS 685 /* 686 * Allocate a decoy set of page buckets, to detect 687 * any stomping there. 688 */ 689 vm_page_fake_buckets = (vm_page_bucket_t *) 690 pmap_steal_memory(vm_page_bucket_count * 691 sizeof(vm_page_bucket_t)); 692 vm_page_fake_buckets_start = (vm_map_offset_t) vm_page_fake_buckets; 693 vm_page_fake_buckets_end = 694 vm_map_round_page((vm_page_fake_buckets_start + 695 (vm_page_bucket_count * 696 sizeof (vm_page_bucket_t))), 697 PAGE_MASK); 698 char *cp; 699 for (cp = (char *)vm_page_fake_buckets_start; 700 cp < (char *)vm_page_fake_buckets_end; 701 cp++) { 702 *cp = 0x5a; 703 } 704#endif /* VM_PAGE_FAKE_BUCKETS */ 705#endif /* VM_PAGE_BUCKETS_CHECK */ 706 707 kernel_debug_string("vm_page_buckets"); 708 vm_page_buckets = (vm_page_bucket_t *) 709 pmap_steal_memory(vm_page_bucket_count * 710 sizeof(vm_page_bucket_t)); 711 712 kernel_debug_string("vm_page_bucket_locks"); 713 vm_page_bucket_locks = (lck_spin_t *) 714 pmap_steal_memory(vm_page_bucket_lock_count * 715 sizeof(lck_spin_t)); 716 717 for (i = 0; i < vm_page_bucket_count; i++) { 718 register vm_page_bucket_t *bucket = &vm_page_buckets[i]; 719 720 bucket->page_list = VM_PAGE_PACK_PTR(VM_PAGE_NULL); 721#if MACH_PAGE_HASH_STATS 722 bucket->cur_count = 0; 723 bucket->hi_count = 0; 724#endif /* MACH_PAGE_HASH_STATS */ 725 } 726 727 for (i = 0; i < vm_page_bucket_lock_count; i++) 728 lck_spin_init(&vm_page_bucket_locks[i], &vm_page_lck_grp_bucket, &vm_page_lck_attr); 729 730#if VM_PAGE_BUCKETS_CHECK 731 vm_page_buckets_check_ready = TRUE; 732#endif /* VM_PAGE_BUCKETS_CHECK */ 733 734 /* 735 * Machine-dependent code allocates the resident page table. 736 * It uses vm_page_init to initialize the page frames. 737 * The code also returns to us the virtual space available 738 * to the kernel. We don't trust the pmap module 739 * to get the alignment right. 740 */ 741 742 kernel_debug_string("pmap_startup"); 743 pmap_startup(&virtual_space_start, &virtual_space_end); 744 virtual_space_start = round_page(virtual_space_start); 745 virtual_space_end = trunc_page(virtual_space_end); 746 747 *startp = virtual_space_start; 748 *endp = virtual_space_end; 749 750 /* 751 * Compute the initial "wire" count. 752 * Up until now, the pages which have been set aside are not under 753 * the VM system's control, so although they aren't explicitly 754 * wired, they nonetheless can't be moved. At this moment, 755 * all VM managed pages are "free", courtesy of pmap_startup. 756 */ 757 assert((unsigned int) atop_64(max_mem) == atop_64(max_mem)); 758 vm_page_wire_count = ((unsigned int) atop_64(max_mem)) - vm_page_free_count - vm_lopage_free_count; /* initial value */ 759 vm_page_wire_count_initial = vm_page_wire_count; 760 761 printf("vm_page_bootstrap: %d free pages and %d wired pages\n", 762 vm_page_free_count, vm_page_wire_count); 763 764 kernel_debug_string("vm_page_bootstrap complete"); 765 simple_lock_init(&vm_paging_lock, 0); 766} 767 768#ifndef MACHINE_PAGES 769/* 770 * We implement pmap_steal_memory and pmap_startup with the help 771 * of two simpler functions, pmap_virtual_space and pmap_next_page. 772 */ 773 774void * 775pmap_steal_memory( 776 vm_size_t size) 777{ 778 vm_offset_t addr, vaddr; 779 ppnum_t phys_page; 780 781 /* 782 * We round the size to a round multiple. 783 */ 784 785 size = (size + sizeof (void *) - 1) &~ (sizeof (void *) - 1); 786 787 /* 788 * If this is the first call to pmap_steal_memory, 789 * we have to initialize ourself. 790 */ 791 792 if (virtual_space_start == virtual_space_end) { 793 pmap_virtual_space(&virtual_space_start, &virtual_space_end); 794 795 /* 796 * The initial values must be aligned properly, and 797 * we don't trust the pmap module to do it right. 798 */ 799 800 virtual_space_start = round_page(virtual_space_start); 801 virtual_space_end = trunc_page(virtual_space_end); 802 } 803 804 /* 805 * Allocate virtual memory for this request. 806 */ 807 808 addr = virtual_space_start; 809 virtual_space_start += size; 810 811 //kprintf("pmap_steal_memory: %08lX - %08lX; size=%08lX\n", (long)addr, (long)virtual_space_start, (long)size); /* (TEST/DEBUG) */ 812 813 /* 814 * Allocate and map physical pages to back new virtual pages. 815 */ 816 817 for (vaddr = round_page(addr); 818 vaddr < addr + size; 819 vaddr += PAGE_SIZE) { 820 821 if (!pmap_next_page_hi(&phys_page)) 822 panic("pmap_steal_memory"); 823 824 /* 825 * XXX Logically, these mappings should be wired, 826 * but some pmap modules barf if they are. 827 */ 828#if defined(__LP64__) 829 pmap_pre_expand(kernel_pmap, vaddr); 830#endif 831 832 pmap_enter(kernel_pmap, vaddr, phys_page, 833 VM_PROT_READ|VM_PROT_WRITE, VM_PROT_NONE, 834 VM_WIMG_USE_DEFAULT, FALSE); 835 /* 836 * Account for newly stolen memory 837 */ 838 vm_page_wire_count++; 839 840 } 841 842 return (void *) addr; 843} 844 845void vm_page_release_startup(vm_page_t mem); 846void 847pmap_startup( 848 vm_offset_t *startp, 849 vm_offset_t *endp) 850{ 851 unsigned int i, npages, pages_initialized, fill, fillval; 852 ppnum_t phys_page; 853 addr64_t tmpaddr; 854 855 856#if defined(__LP64__) 857 /* 858 * struct vm_page must be of size 64 due to VM_PAGE_PACK_PTR use 859 */ 860 assert(sizeof(struct vm_page) == 64); 861 862 /* 863 * make sure we are aligned on a 64 byte boundary 864 * for VM_PAGE_PACK_PTR (it clips off the low-order 865 * 6 bits of the pointer) 866 */ 867 if (virtual_space_start != virtual_space_end) 868 virtual_space_start = round_page(virtual_space_start); 869#endif 870 871 /* 872 * We calculate how many page frames we will have 873 * and then allocate the page structures in one chunk. 874 */ 875 876 tmpaddr = (addr64_t)pmap_free_pages() * (addr64_t)PAGE_SIZE; /* Get the amount of memory left */ 877 tmpaddr = tmpaddr + (addr64_t)(round_page(virtual_space_start) - virtual_space_start); /* Account for any slop */ 878 npages = (unsigned int)(tmpaddr / (addr64_t)(PAGE_SIZE + sizeof(*vm_pages))); /* Figure size of all vm_page_ts, including enough to hold the vm_page_ts */ 879 880 vm_pages = (vm_page_t) pmap_steal_memory(npages * sizeof *vm_pages); 881 882 /* 883 * Initialize the page frames. 884 */ 885 kernel_debug_string("Initialize the page frames"); 886 for (i = 0, pages_initialized = 0; i < npages; i++) { 887 if (!pmap_next_page(&phys_page)) 888 break; 889 if (pages_initialized == 0 || phys_page < vm_page_lowest) 890 vm_page_lowest = phys_page; 891 892 vm_page_init(&vm_pages[i], phys_page, FALSE); 893 vm_page_pages++; 894 pages_initialized++; 895 } 896 vm_pages_count = pages_initialized; 897 898#if defined(__LP64__) 899 900 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[0])) != &vm_pages[0]) 901 panic("VM_PAGE_PACK_PTR failed on &vm_pages[0] - %p", (void *)&vm_pages[0]); 902 903 if (VM_PAGE_UNPACK_PTR(VM_PAGE_PACK_PTR(&vm_pages[vm_pages_count-1])) != &vm_pages[vm_pages_count-1]) 904 panic("VM_PAGE_PACK_PTR failed on &vm_pages[vm_pages_count-1] - %p", (void *)&vm_pages[vm_pages_count-1]); 905#endif 906 kernel_debug_string("page fill/release"); 907 /* 908 * Check if we want to initialize pages to a known value 909 */ 910 fill = 0; /* Assume no fill */ 911 if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */ 912#if DEBUG 913 /* This slows down booting the DEBUG kernel, particularly on 914 * large memory systems, but is worthwhile in deterministically 915 * trapping uninitialized memory usage. 916 */ 917 if (fill == 0) { 918 fill = 1; 919 fillval = 0xDEB8F177; 920 } 921#endif 922 if (fill) 923 kprintf("Filling vm_pages with pattern: 0x%x\n", fillval); 924 // -debug code remove 925 if (2 == vm_himemory_mode) { 926 // free low -> high so high is preferred 927 for (i = 1; i <= pages_initialized; i++) { 928 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ 929 vm_page_release_startup(&vm_pages[i - 1]); 930 } 931 } 932 else 933 // debug code remove- 934 935 /* 936 * Release pages in reverse order so that physical pages 937 * initially get allocated in ascending addresses. This keeps 938 * the devices (which must address physical memory) happy if 939 * they require several consecutive pages. 940 */ 941 for (i = pages_initialized; i > 0; i--) { 942 if(fill) fillPage(vm_pages[i - 1].phys_page, fillval); /* Fill the page with a know value if requested at boot */ 943 vm_page_release_startup(&vm_pages[i - 1]); 944 } 945 946 VM_CHECK_MEMORYSTATUS; 947 948#if 0 949 { 950 vm_page_t xx, xxo, xxl; 951 int i, j, k, l; 952 953 j = 0; /* (BRINGUP) */ 954 xxl = 0; 955 956 for( i = 0; i < vm_colors; i++ ) { 957 queue_iterate(&vm_page_queue_free[i], 958 xx, 959 vm_page_t, 960 pageq) { /* BRINGUP */ 961 j++; /* (BRINGUP) */ 962 if(j > vm_page_free_count) { /* (BRINGUP) */ 963 panic("pmap_startup: too many pages, xx = %08X, xxl = %08X\n", xx, xxl); 964 } 965 966 l = vm_page_free_count - j; /* (BRINGUP) */ 967 k = 0; /* (BRINGUP) */ 968 969 if(((j - 1) & 0xFFFF) == 0) kprintf("checking number %d of %d\n", j, vm_page_free_count); 970 971 for(xxo = xx->pageq.next; xxo != &vm_page_queue_free[i]; xxo = xxo->pageq.next) { /* (BRINGUP) */ 972 k++; 973 if(k > l) panic("pmap_startup: too many in secondary check %d %d\n", k, l); 974 if((xx->phys_page & 0xFFFFFFFF) == (xxo->phys_page & 0xFFFFFFFF)) { /* (BRINGUP) */ 975 panic("pmap_startup: duplicate physaddr, xx = %08X, xxo = %08X\n", xx, xxo); 976 } 977 } 978 979 xxl = xx; 980 } 981 } 982 983 if(j != vm_page_free_count) { /* (BRINGUP) */ 984 panic("pmap_startup: vm_page_free_count does not match, calc = %d, vm_page_free_count = %08X\n", j, vm_page_free_count); 985 } 986 } 987#endif 988 989 990 /* 991 * We have to re-align virtual_space_start, 992 * because pmap_steal_memory has been using it. 993 */ 994 995 virtual_space_start = round_page(virtual_space_start); 996 997 *startp = virtual_space_start; 998 *endp = virtual_space_end; 999} 1000#endif /* MACHINE_PAGES */ 1001 1002/* 1003 * Routine: vm_page_module_init 1004 * Purpose: 1005 * Second initialization pass, to be done after 1006 * the basic VM system is ready. 1007 */ 1008void 1009vm_page_module_init(void) 1010{ 1011 vm_page_zone = zinit((vm_size_t) sizeof(struct vm_page), 1012 0, PAGE_SIZE, "vm pages"); 1013 1014#if ZONE_DEBUG 1015 zone_debug_disable(vm_page_zone); 1016#endif /* ZONE_DEBUG */ 1017 1018 zone_change(vm_page_zone, Z_CALLERACCT, FALSE); 1019 zone_change(vm_page_zone, Z_EXPAND, FALSE); 1020 zone_change(vm_page_zone, Z_EXHAUST, TRUE); 1021 zone_change(vm_page_zone, Z_FOREIGN, TRUE); 1022 zone_change(vm_page_zone, Z_GZALLOC_EXEMPT, TRUE); 1023 /* 1024 * Adjust zone statistics to account for the real pages allocated 1025 * in vm_page_create(). [Q: is this really what we want?] 1026 */ 1027 vm_page_zone->count += vm_page_pages; 1028 vm_page_zone->sum_count += vm_page_pages; 1029 vm_page_zone->cur_size += vm_page_pages * vm_page_zone->elem_size; 1030} 1031 1032/* 1033 * Routine: vm_page_create 1034 * Purpose: 1035 * After the VM system is up, machine-dependent code 1036 * may stumble across more physical memory. For example, 1037 * memory that it was reserving for a frame buffer. 1038 * vm_page_create turns this memory into available pages. 1039 */ 1040 1041void 1042vm_page_create( 1043 ppnum_t start, 1044 ppnum_t end) 1045{ 1046 ppnum_t phys_page; 1047 vm_page_t m; 1048 1049 for (phys_page = start; 1050 phys_page < end; 1051 phys_page++) { 1052 while ((m = (vm_page_t) vm_page_grab_fictitious_common(phys_page)) 1053 == VM_PAGE_NULL) 1054 vm_page_more_fictitious(); 1055 1056 m->fictitious = FALSE; 1057 pmap_clear_noencrypt(phys_page); 1058 1059 vm_page_pages++; 1060 vm_page_release(m); 1061 } 1062} 1063 1064/* 1065 * vm_page_hash: 1066 * 1067 * Distributes the object/offset key pair among hash buckets. 1068 * 1069 * NOTE: The bucket count must be a power of 2 1070 */ 1071#define vm_page_hash(object, offset) (\ 1072 ( (natural_t)((uintptr_t)object * vm_page_bucket_hash) + ((uint32_t)atop_64(offset) ^ vm_page_bucket_hash))\ 1073 & vm_page_hash_mask) 1074 1075 1076/* 1077 * vm_page_insert: [ internal use only ] 1078 * 1079 * Inserts the given mem entry into the object/object-page 1080 * table and object list. 1081 * 1082 * The object must be locked. 1083 */ 1084void 1085vm_page_insert( 1086 vm_page_t mem, 1087 vm_object_t object, 1088 vm_object_offset_t offset) 1089{ 1090 vm_page_insert_internal(mem, object, offset, FALSE, TRUE, FALSE); 1091} 1092 1093void 1094vm_page_insert_internal( 1095 vm_page_t mem, 1096 vm_object_t object, 1097 vm_object_offset_t offset, 1098 boolean_t queues_lock_held, 1099 boolean_t insert_in_hash, 1100 boolean_t batch_pmap_op) 1101{ 1102 vm_page_bucket_t *bucket; 1103 lck_spin_t *bucket_lock; 1104 int hash_id; 1105 task_t owner; 1106 1107 XPR(XPR_VM_PAGE, 1108 "vm_page_insert, object 0x%X offset 0x%X page 0x%X\n", 1109 object, offset, mem, 0,0); 1110#if 0 1111 /* 1112 * we may not hold the page queue lock 1113 * so this check isn't safe to make 1114 */ 1115 VM_PAGE_CHECK(mem); 1116#endif 1117 1118 assert(page_aligned(offset)); 1119 1120 /* the vm_submap_object is only a placeholder for submaps */ 1121 assert(object != vm_submap_object); 1122 1123 vm_object_lock_assert_exclusive(object); 1124#if DEBUG 1125 lck_mtx_assert(&vm_page_queue_lock, 1126 queues_lock_held ? LCK_MTX_ASSERT_OWNED 1127 : LCK_MTX_ASSERT_NOTOWNED); 1128#endif /* DEBUG */ 1129 1130 if (insert_in_hash == TRUE) { 1131#if DEBUG || VM_PAGE_CHECK_BUCKETS 1132 if (mem->tabled || mem->object != VM_OBJECT_NULL) 1133 panic("vm_page_insert: page %p for (obj=%p,off=0x%llx) " 1134 "already in (obj=%p,off=0x%llx)", 1135 mem, object, offset, mem->object, mem->offset); 1136#endif 1137 assert(!object->internal || offset < object->vo_size); 1138 1139 /* only insert "pageout" pages into "pageout" objects, 1140 * and normal pages into normal objects */ 1141 assert(object->pageout == mem->pageout); 1142 1143 assert(vm_page_lookup(object, offset) == VM_PAGE_NULL); 1144 1145 /* 1146 * Record the object/offset pair in this page 1147 */ 1148 1149 mem->object = object; 1150 mem->offset = offset; 1151 1152 /* 1153 * Insert it into the object_object/offset hash table 1154 */ 1155 hash_id = vm_page_hash(object, offset); 1156 bucket = &vm_page_buckets[hash_id]; 1157 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK]; 1158 1159 lck_spin_lock(bucket_lock); 1160 1161 mem->next_m = bucket->page_list; 1162 bucket->page_list = VM_PAGE_PACK_PTR(mem); 1163 assert(mem == VM_PAGE_UNPACK_PTR(bucket->page_list)); 1164 1165#if MACH_PAGE_HASH_STATS 1166 if (++bucket->cur_count > bucket->hi_count) 1167 bucket->hi_count = bucket->cur_count; 1168#endif /* MACH_PAGE_HASH_STATS */ 1169 mem->hashed = TRUE; 1170 lck_spin_unlock(bucket_lock); 1171 } 1172 1173 { 1174 unsigned int cache_attr; 1175 1176 cache_attr = object->wimg_bits & VM_WIMG_MASK; 1177 1178 if (cache_attr != VM_WIMG_USE_DEFAULT) { 1179 PMAP_SET_CACHE_ATTR(mem, object, cache_attr, batch_pmap_op); 1180 } 1181 } 1182 /* 1183 * Now link into the object's list of backed pages. 1184 */ 1185 VM_PAGE_INSERT(mem, object); 1186 mem->tabled = TRUE; 1187 1188 /* 1189 * Show that the object has one more resident page. 1190 */ 1191 1192 object->resident_page_count++; 1193 if (VM_PAGE_WIRED(mem)) { 1194 object->wired_page_count++; 1195 } 1196 assert(object->resident_page_count >= object->wired_page_count); 1197 1198 if (object->internal) { 1199 OSAddAtomic(1, &vm_page_internal_count); 1200 } else { 1201 OSAddAtomic(1, &vm_page_external_count); 1202 } 1203 1204 /* 1205 * It wouldn't make sense to insert a "reusable" page in 1206 * an object (the page would have been marked "reusable" only 1207 * at the time of a madvise(MADV_FREE_REUSABLE) if it was already 1208 * in the object at that time). 1209 * But a page could be inserted in a "all_reusable" object, if 1210 * something faults it in (a vm_read() from another task or a 1211 * "use-after-free" issue in user space, for example). It can 1212 * also happen if we're relocating a page from that object to 1213 * a different physical page during a physically-contiguous 1214 * allocation. 1215 */ 1216 assert(!mem->reusable); 1217 if (mem->object->all_reusable) { 1218 OSAddAtomic(+1, &vm_page_stats_reusable.reusable_count); 1219 } 1220 1221 if (object->purgable == VM_PURGABLE_DENY) { 1222 owner = TASK_NULL; 1223 } else { 1224 owner = object->vo_purgeable_owner; 1225 } 1226 if (owner && 1227 (object->purgable == VM_PURGABLE_NONVOLATILE || 1228 VM_PAGE_WIRED(mem))) { 1229 /* more non-volatile bytes */ 1230 ledger_credit(owner->ledger, 1231 task_ledgers.purgeable_nonvolatile, 1232 PAGE_SIZE); 1233 /* more footprint */ 1234 ledger_credit(owner->ledger, 1235 task_ledgers.phys_footprint, 1236 PAGE_SIZE); 1237 1238 } else if (owner && 1239 (object->purgable == VM_PURGABLE_VOLATILE || 1240 object->purgable == VM_PURGABLE_EMPTY)) { 1241 assert(! VM_PAGE_WIRED(mem)); 1242 /* more volatile bytes */ 1243 ledger_credit(owner->ledger, 1244 task_ledgers.purgeable_volatile, 1245 PAGE_SIZE); 1246 } 1247 1248 if (object->purgable == VM_PURGABLE_VOLATILE) { 1249 if (VM_PAGE_WIRED(mem)) { 1250 OSAddAtomic(+1, &vm_page_purgeable_wired_count); 1251 } else { 1252 OSAddAtomic(+1, &vm_page_purgeable_count); 1253 } 1254 } else if (object->purgable == VM_PURGABLE_EMPTY && 1255 mem->throttled) { 1256 /* 1257 * This page belongs to a purged VM object but hasn't 1258 * been purged (because it was "busy"). 1259 * It's in the "throttled" queue and hence not 1260 * visible to vm_pageout_scan(). Move it to a pageable 1261 * queue, so that it can eventually be reclaimed, instead 1262 * of lingering in the "empty" object. 1263 */ 1264 if (queues_lock_held == FALSE) 1265 vm_page_lockspin_queues(); 1266 vm_page_deactivate(mem); 1267 if (queues_lock_held == FALSE) 1268 vm_page_unlock_queues(); 1269 } 1270 1271#if VM_OBJECT_TRACKING_OP_MODIFIED 1272 if (vm_object_tracking_inited && 1273 object->internal && 1274 object->resident_page_count == 0 && 1275 object->pager == NULL && 1276 object->shadow != NULL && 1277 object->shadow->copy == object) { 1278 void *bt[VM_OBJECT_TRACKING_BTDEPTH]; 1279 int numsaved = 0; 1280 1281 numsaved =OSBacktrace(bt, VM_OBJECT_TRACKING_BTDEPTH); 1282 btlog_add_entry(vm_object_tracking_btlog, 1283 object, 1284 VM_OBJECT_TRACKING_OP_MODIFIED, 1285 bt, 1286 numsaved); 1287 } 1288#endif /* VM_OBJECT_TRACKING_OP_MODIFIED */ 1289} 1290 1291/* 1292 * vm_page_replace: 1293 * 1294 * Exactly like vm_page_insert, except that we first 1295 * remove any existing page at the given offset in object. 1296 * 1297 * The object must be locked. 1298 */ 1299void 1300vm_page_replace( 1301 register vm_page_t mem, 1302 register vm_object_t object, 1303 register vm_object_offset_t offset) 1304{ 1305 vm_page_bucket_t *bucket; 1306 vm_page_t found_m = VM_PAGE_NULL; 1307 lck_spin_t *bucket_lock; 1308 int hash_id; 1309 1310#if 0 1311 /* 1312 * we don't hold the page queue lock 1313 * so this check isn't safe to make 1314 */ 1315 VM_PAGE_CHECK(mem); 1316#endif 1317 vm_object_lock_assert_exclusive(object); 1318#if DEBUG || VM_PAGE_CHECK_BUCKETS 1319 if (mem->tabled || mem->object != VM_OBJECT_NULL) 1320 panic("vm_page_replace: page %p for (obj=%p,off=0x%llx) " 1321 "already in (obj=%p,off=0x%llx)", 1322 mem, object, offset, mem->object, mem->offset); 1323 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_NOTOWNED); 1324#endif 1325 /* 1326 * Record the object/offset pair in this page 1327 */ 1328 1329 mem->object = object; 1330 mem->offset = offset; 1331 1332 /* 1333 * Insert it into the object_object/offset hash table, 1334 * replacing any page that might have been there. 1335 */ 1336 1337 hash_id = vm_page_hash(object, offset); 1338 bucket = &vm_page_buckets[hash_id]; 1339 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK]; 1340 1341 lck_spin_lock(bucket_lock); 1342 1343 if (bucket->page_list) { 1344 vm_page_packed_t *mp = &bucket->page_list; 1345 vm_page_t m = VM_PAGE_UNPACK_PTR(*mp); 1346 1347 do { 1348 if (m->object == object && m->offset == offset) { 1349 /* 1350 * Remove old page from hash list 1351 */ 1352 *mp = m->next_m; 1353 m->hashed = FALSE; 1354 1355 found_m = m; 1356 break; 1357 } 1358 mp = &m->next_m; 1359 } while ((m = VM_PAGE_UNPACK_PTR(*mp))); 1360 1361 mem->next_m = bucket->page_list; 1362 } else { 1363 mem->next_m = VM_PAGE_PACK_PTR(VM_PAGE_NULL); 1364 } 1365 /* 1366 * insert new page at head of hash list 1367 */ 1368 bucket->page_list = VM_PAGE_PACK_PTR(mem); 1369 mem->hashed = TRUE; 1370 1371 lck_spin_unlock(bucket_lock); 1372 1373 if (found_m) { 1374 /* 1375 * there was already a page at the specified 1376 * offset for this object... remove it from 1377 * the object and free it back to the free list 1378 */ 1379 vm_page_free_unlocked(found_m, FALSE); 1380 } 1381 vm_page_insert_internal(mem, object, offset, FALSE, FALSE, FALSE); 1382} 1383 1384/* 1385 * vm_page_remove: [ internal use only ] 1386 * 1387 * Removes the given mem entry from the object/offset-page 1388 * table and the object page list. 1389 * 1390 * The object must be locked. 1391 */ 1392 1393void 1394vm_page_remove( 1395 vm_page_t mem, 1396 boolean_t remove_from_hash) 1397{ 1398 vm_page_bucket_t *bucket; 1399 vm_page_t this; 1400 lck_spin_t *bucket_lock; 1401 int hash_id; 1402 task_t owner; 1403 1404 XPR(XPR_VM_PAGE, 1405 "vm_page_remove, object 0x%X offset 0x%X page 0x%X\n", 1406 mem->object, mem->offset, 1407 mem, 0,0); 1408 1409 vm_object_lock_assert_exclusive(mem->object); 1410 assert(mem->tabled); 1411 assert(!mem->cleaning); 1412 assert(!mem->laundry); 1413#if 0 1414 /* 1415 * we don't hold the page queue lock 1416 * so this check isn't safe to make 1417 */ 1418 VM_PAGE_CHECK(mem); 1419#endif 1420 if (remove_from_hash == TRUE) { 1421 /* 1422 * Remove from the object_object/offset hash table 1423 */ 1424 hash_id = vm_page_hash(mem->object, mem->offset); 1425 bucket = &vm_page_buckets[hash_id]; 1426 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK]; 1427 1428 lck_spin_lock(bucket_lock); 1429 1430 if ((this = VM_PAGE_UNPACK_PTR(bucket->page_list)) == mem) { 1431 /* optimize for common case */ 1432 1433 bucket->page_list = mem->next_m; 1434 } else { 1435 vm_page_packed_t *prev; 1436 1437 for (prev = &this->next_m; 1438 (this = VM_PAGE_UNPACK_PTR(*prev)) != mem; 1439 prev = &this->next_m) 1440 continue; 1441 *prev = this->next_m; 1442 } 1443#if MACH_PAGE_HASH_STATS 1444 bucket->cur_count--; 1445#endif /* MACH_PAGE_HASH_STATS */ 1446 mem->hashed = FALSE; 1447 lck_spin_unlock(bucket_lock); 1448 } 1449 /* 1450 * Now remove from the object's list of backed pages. 1451 */ 1452 1453 VM_PAGE_REMOVE(mem); 1454 1455 /* 1456 * And show that the object has one fewer resident 1457 * page. 1458 */ 1459 1460 assert(mem->object->resident_page_count > 0); 1461 mem->object->resident_page_count--; 1462 1463 if (mem->object->internal) { 1464#if DEBUG 1465 assert(vm_page_internal_count); 1466#endif /* DEBUG */ 1467 1468 OSAddAtomic(-1, &vm_page_internal_count); 1469 } else { 1470 assert(vm_page_external_count); 1471 OSAddAtomic(-1, &vm_page_external_count); 1472 1473 if (mem->xpmapped) { 1474 assert(vm_page_xpmapped_external_count); 1475 OSAddAtomic(-1, &vm_page_xpmapped_external_count); 1476 } 1477 } 1478 if (!mem->object->internal && (mem->object->objq.next || mem->object->objq.prev)) { 1479 if (mem->object->resident_page_count == 0) 1480 vm_object_cache_remove(mem->object); 1481 } 1482 1483 if (VM_PAGE_WIRED(mem)) { 1484 assert(mem->object->wired_page_count > 0); 1485 mem->object->wired_page_count--; 1486 } 1487 assert(mem->object->resident_page_count >= 1488 mem->object->wired_page_count); 1489 if (mem->reusable) { 1490 assert(mem->object->reusable_page_count > 0); 1491 mem->object->reusable_page_count--; 1492 assert(mem->object->reusable_page_count <= 1493 mem->object->resident_page_count); 1494 mem->reusable = FALSE; 1495 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count); 1496 vm_page_stats_reusable.reused_remove++; 1497 } else if (mem->object->all_reusable) { 1498 OSAddAtomic(-1, &vm_page_stats_reusable.reusable_count); 1499 vm_page_stats_reusable.reused_remove++; 1500 } 1501 1502 if (mem->object->purgable == VM_PURGABLE_DENY) { 1503 owner = TASK_NULL; 1504 } else { 1505 owner = mem->object->vo_purgeable_owner; 1506 } 1507 if (owner && 1508 (mem->object->purgable == VM_PURGABLE_NONVOLATILE || 1509 VM_PAGE_WIRED(mem))) { 1510 /* less non-volatile bytes */ 1511 ledger_debit(owner->ledger, 1512 task_ledgers.purgeable_nonvolatile, 1513 PAGE_SIZE); 1514 /* less footprint */ 1515 ledger_debit(owner->ledger, 1516 task_ledgers.phys_footprint, 1517 PAGE_SIZE); 1518 } else if (owner && 1519 (mem->object->purgable == VM_PURGABLE_VOLATILE || 1520 mem->object->purgable == VM_PURGABLE_EMPTY)) { 1521 assert(! VM_PAGE_WIRED(mem)); 1522 /* less volatile bytes */ 1523 ledger_debit(owner->ledger, 1524 task_ledgers.purgeable_volatile, 1525 PAGE_SIZE); 1526 } 1527 if (mem->object->purgable == VM_PURGABLE_VOLATILE) { 1528 if (VM_PAGE_WIRED(mem)) { 1529 assert(vm_page_purgeable_wired_count > 0); 1530 OSAddAtomic(-1, &vm_page_purgeable_wired_count); 1531 } else { 1532 assert(vm_page_purgeable_count > 0); 1533 OSAddAtomic(-1, &vm_page_purgeable_count); 1534 } 1535 } 1536 if (mem->object->set_cache_attr == TRUE) 1537 pmap_set_cache_attributes(mem->phys_page, 0); 1538 1539 mem->tabled = FALSE; 1540 mem->object = VM_OBJECT_NULL; 1541 mem->offset = (vm_object_offset_t) -1; 1542} 1543 1544 1545/* 1546 * vm_page_lookup: 1547 * 1548 * Returns the page associated with the object/offset 1549 * pair specified; if none is found, VM_PAGE_NULL is returned. 1550 * 1551 * The object must be locked. No side effects. 1552 */ 1553 1554unsigned long vm_page_lookup_hint = 0; 1555unsigned long vm_page_lookup_hint_next = 0; 1556unsigned long vm_page_lookup_hint_prev = 0; 1557unsigned long vm_page_lookup_hint_miss = 0; 1558unsigned long vm_page_lookup_bucket_NULL = 0; 1559unsigned long vm_page_lookup_miss = 0; 1560 1561 1562vm_page_t 1563vm_page_lookup( 1564 vm_object_t object, 1565 vm_object_offset_t offset) 1566{ 1567 vm_page_t mem; 1568 vm_page_bucket_t *bucket; 1569 queue_entry_t qe; 1570 lck_spin_t *bucket_lock; 1571 int hash_id; 1572 1573 vm_object_lock_assert_held(object); 1574 mem = object->memq_hint; 1575 1576 if (mem != VM_PAGE_NULL) { 1577 assert(mem->object == object); 1578 1579 if (mem->offset == offset) { 1580 vm_page_lookup_hint++; 1581 return mem; 1582 } 1583 qe = queue_next(&mem->listq); 1584 1585 if (! queue_end(&object->memq, qe)) { 1586 vm_page_t next_page; 1587 1588 next_page = (vm_page_t) qe; 1589 assert(next_page->object == object); 1590 1591 if (next_page->offset == offset) { 1592 vm_page_lookup_hint_next++; 1593 object->memq_hint = next_page; /* new hint */ 1594 return next_page; 1595 } 1596 } 1597 qe = queue_prev(&mem->listq); 1598 1599 if (! queue_end(&object->memq, qe)) { 1600 vm_page_t prev_page; 1601 1602 prev_page = (vm_page_t) qe; 1603 assert(prev_page->object == object); 1604 1605 if (prev_page->offset == offset) { 1606 vm_page_lookup_hint_prev++; 1607 object->memq_hint = prev_page; /* new hint */ 1608 return prev_page; 1609 } 1610 } 1611 } 1612 /* 1613 * Search the hash table for this object/offset pair 1614 */ 1615 hash_id = vm_page_hash(object, offset); 1616 bucket = &vm_page_buckets[hash_id]; 1617 1618 /* 1619 * since we hold the object lock, we are guaranteed that no 1620 * new pages can be inserted into this object... this in turn 1621 * guarantess that the page we're looking for can't exist 1622 * if the bucket it hashes to is currently NULL even when looked 1623 * at outside the scope of the hash bucket lock... this is a 1624 * really cheap optimiztion to avoid taking the lock 1625 */ 1626 if (!bucket->page_list) { 1627 vm_page_lookup_bucket_NULL++; 1628 1629 return (VM_PAGE_NULL); 1630 } 1631 bucket_lock = &vm_page_bucket_locks[hash_id / BUCKETS_PER_LOCK]; 1632 1633 lck_spin_lock(bucket_lock); 1634 1635 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = VM_PAGE_UNPACK_PTR(mem->next_m)) { 1636#if 0 1637 /* 1638 * we don't hold the page queue lock 1639 * so this check isn't safe to make 1640 */ 1641 VM_PAGE_CHECK(mem); 1642#endif 1643 if ((mem->object == object) && (mem->offset == offset)) 1644 break; 1645 } 1646 lck_spin_unlock(bucket_lock); 1647 1648 if (mem != VM_PAGE_NULL) { 1649 if (object->memq_hint != VM_PAGE_NULL) { 1650 vm_page_lookup_hint_miss++; 1651 } 1652 assert(mem->object == object); 1653 object->memq_hint = mem; 1654 } else 1655 vm_page_lookup_miss++; 1656 1657 return(mem); 1658} 1659 1660 1661/* 1662 * vm_page_rename: 1663 * 1664 * Move the given memory entry from its 1665 * current object to the specified target object/offset. 1666 * 1667 * The object must be locked. 1668 */ 1669void 1670vm_page_rename( 1671 register vm_page_t mem, 1672 register vm_object_t new_object, 1673 vm_object_offset_t new_offset, 1674 boolean_t encrypted_ok) 1675{ 1676 boolean_t internal_to_external, external_to_internal; 1677 1678 assert(mem->object != new_object); 1679 1680 /* 1681 * ENCRYPTED SWAP: 1682 * The encryption key is based on the page's memory object 1683 * (aka "pager") and paging offset. Moving the page to 1684 * another VM object changes its "pager" and "paging_offset" 1685 * so it has to be decrypted first, or we would lose the key. 1686 * 1687 * One exception is VM object collapsing, where we transfer pages 1688 * from one backing object to its parent object. This operation also 1689 * transfers the paging information, so the <pager,paging_offset> info 1690 * should remain consistent. The caller (vm_object_do_collapse()) 1691 * sets "encrypted_ok" in this case. 1692 */ 1693 if (!encrypted_ok && mem->encrypted) { 1694 panic("vm_page_rename: page %p is encrypted\n", mem); 1695 } 1696 1697 XPR(XPR_VM_PAGE, 1698 "vm_page_rename, new object 0x%X, offset 0x%X page 0x%X\n", 1699 new_object, new_offset, 1700 mem, 0,0); 1701 1702 /* 1703 * Changes to mem->object require the page lock because 1704 * the pageout daemon uses that lock to get the object. 1705 */ 1706 vm_page_lockspin_queues(); 1707 1708 internal_to_external = FALSE; 1709 external_to_internal = FALSE; 1710 1711 if (mem->local) { 1712 /* 1713 * it's much easier to get the vm_page_pageable_xxx accounting correct 1714 * if we first move the page to the active queue... it's going to end 1715 * up there anyway, and we don't do vm_page_rename's frequently enough 1716 * for this to matter. 1717 */ 1718 VM_PAGE_QUEUES_REMOVE(mem); 1719 vm_page_activate(mem); 1720 } 1721 if (mem->active || mem->inactive || mem->speculative) { 1722 if (mem->object->internal && !new_object->internal) { 1723 internal_to_external = TRUE; 1724 } 1725 if (!mem->object->internal && new_object->internal) { 1726 external_to_internal = TRUE; 1727 } 1728 } 1729 1730 vm_page_remove(mem, TRUE); 1731 vm_page_insert_internal(mem, new_object, new_offset, TRUE, TRUE, FALSE); 1732 1733 if (internal_to_external) { 1734 vm_page_pageable_internal_count--; 1735 vm_page_pageable_external_count++; 1736 } else if (external_to_internal) { 1737 vm_page_pageable_external_count--; 1738 vm_page_pageable_internal_count++; 1739 } 1740 1741 vm_page_unlock_queues(); 1742} 1743 1744/* 1745 * vm_page_init: 1746 * 1747 * Initialize the fields in a new page. 1748 * This takes a structure with random values and initializes it 1749 * so that it can be given to vm_page_release or vm_page_insert. 1750 */ 1751void 1752vm_page_init( 1753 vm_page_t mem, 1754 ppnum_t phys_page, 1755 boolean_t lopage) 1756{ 1757 assert(phys_page); 1758 1759#if DEBUG 1760 if ((phys_page != vm_page_fictitious_addr) && (phys_page != vm_page_guard_addr)) { 1761 if (!(pmap_valid_page(phys_page))) { 1762 panic("vm_page_init: non-DRAM phys_page 0x%x\n", phys_page); 1763 } 1764 } 1765#endif 1766 *mem = vm_page_template; 1767 mem->phys_page = phys_page; 1768#if 0 1769 /* 1770 * we're leaving this turned off for now... currently pages 1771 * come off the free list and are either immediately dirtied/referenced 1772 * due to zero-fill or COW faults, or are used to read or write files... 1773 * in the file I/O case, the UPL mechanism takes care of clearing 1774 * the state of the HW ref/mod bits in a somewhat fragile way. 1775 * Since we may change the way this works in the future (to toughen it up), 1776 * I'm leaving this as a reminder of where these bits could get cleared 1777 */ 1778 1779 /* 1780 * make sure both the h/w referenced and modified bits are 1781 * clear at this point... we are especially dependent on 1782 * not finding a 'stale' h/w modified in a number of spots 1783 * once this page goes back into use 1784 */ 1785 pmap_clear_refmod(phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); 1786#endif 1787 mem->lopage = lopage; 1788} 1789 1790/* 1791 * vm_page_grab_fictitious: 1792 * 1793 * Remove a fictitious page from the free list. 1794 * Returns VM_PAGE_NULL if there are no free pages. 1795 */ 1796int c_vm_page_grab_fictitious = 0; 1797int c_vm_page_grab_fictitious_failed = 0; 1798int c_vm_page_release_fictitious = 0; 1799int c_vm_page_more_fictitious = 0; 1800 1801vm_page_t 1802vm_page_grab_fictitious_common( 1803 ppnum_t phys_addr) 1804{ 1805 vm_page_t m; 1806 1807 if ((m = (vm_page_t)zget(vm_page_zone))) { 1808 1809 vm_page_init(m, phys_addr, FALSE); 1810 m->fictitious = TRUE; 1811 1812 c_vm_page_grab_fictitious++; 1813 } else 1814 c_vm_page_grab_fictitious_failed++; 1815 1816 return m; 1817} 1818 1819vm_page_t 1820vm_page_grab_fictitious(void) 1821{ 1822 return vm_page_grab_fictitious_common(vm_page_fictitious_addr); 1823} 1824 1825vm_page_t 1826vm_page_grab_guard(void) 1827{ 1828 return vm_page_grab_fictitious_common(vm_page_guard_addr); 1829} 1830 1831 1832/* 1833 * vm_page_release_fictitious: 1834 * 1835 * Release a fictitious page to the zone pool 1836 */ 1837void 1838vm_page_release_fictitious( 1839 vm_page_t m) 1840{ 1841 assert(!m->free); 1842 assert(m->fictitious); 1843 assert(m->phys_page == vm_page_fictitious_addr || 1844 m->phys_page == vm_page_guard_addr); 1845 1846 c_vm_page_release_fictitious++; 1847 1848 zfree(vm_page_zone, m); 1849} 1850 1851/* 1852 * vm_page_more_fictitious: 1853 * 1854 * Add more fictitious pages to the zone. 1855 * Allowed to block. This routine is way intimate 1856 * with the zones code, for several reasons: 1857 * 1. we need to carve some page structures out of physical 1858 * memory before zones work, so they _cannot_ come from 1859 * the zone_map. 1860 * 2. the zone needs to be collectable in order to prevent 1861 * growth without bound. These structures are used by 1862 * the device pager (by the hundreds and thousands), as 1863 * private pages for pageout, and as blocking pages for 1864 * pagein. Temporary bursts in demand should not result in 1865 * permanent allocation of a resource. 1866 * 3. To smooth allocation humps, we allocate single pages 1867 * with kernel_memory_allocate(), and cram them into the 1868 * zone. 1869 */ 1870 1871void vm_page_more_fictitious(void) 1872{ 1873 vm_offset_t addr; 1874 kern_return_t retval; 1875 1876 c_vm_page_more_fictitious++; 1877 1878 /* 1879 * Allocate a single page from the zone_map. Do not wait if no physical 1880 * pages are immediately available, and do not zero the space. We need 1881 * our own blocking lock here to prevent having multiple, 1882 * simultaneous requests from piling up on the zone_map lock. Exactly 1883 * one (of our) threads should be potentially waiting on the map lock. 1884 * If winner is not vm-privileged, then the page allocation will fail, 1885 * and it will temporarily block here in the vm_page_wait(). 1886 */ 1887 lck_mtx_lock(&vm_page_alloc_lock); 1888 /* 1889 * If another thread allocated space, just bail out now. 1890 */ 1891 if (zone_free_count(vm_page_zone) > 5) { 1892 /* 1893 * The number "5" is a small number that is larger than the 1894 * number of fictitious pages that any single caller will 1895 * attempt to allocate. Otherwise, a thread will attempt to 1896 * acquire a fictitious page (vm_page_grab_fictitious), fail, 1897 * release all of the resources and locks already acquired, 1898 * and then call this routine. This routine finds the pages 1899 * that the caller released, so fails to allocate new space. 1900 * The process repeats infinitely. The largest known number 1901 * of fictitious pages required in this manner is 2. 5 is 1902 * simply a somewhat larger number. 1903 */ 1904 lck_mtx_unlock(&vm_page_alloc_lock); 1905 return; 1906 } 1907 1908 retval = kernel_memory_allocate(zone_map, 1909 &addr, PAGE_SIZE, VM_PROT_ALL, 1910 KMA_KOBJECT|KMA_NOPAGEWAIT); 1911 if (retval != KERN_SUCCESS) { 1912 /* 1913 * No page was available. Drop the 1914 * lock to give another thread a chance at it, and 1915 * wait for the pageout daemon to make progress. 1916 */ 1917 lck_mtx_unlock(&vm_page_alloc_lock); 1918 vm_page_wait(THREAD_UNINT); 1919 return; 1920 } 1921 1922 /* Increment zone page count. We account for all memory managed by the zone in z->page_count */ 1923 OSAddAtomic64(1, &(vm_page_zone->page_count)); 1924 1925 zcram(vm_page_zone, addr, PAGE_SIZE); 1926 1927 lck_mtx_unlock(&vm_page_alloc_lock); 1928} 1929 1930 1931/* 1932 * vm_pool_low(): 1933 * 1934 * Return true if it is not likely that a non-vm_privileged thread 1935 * can get memory without blocking. Advisory only, since the 1936 * situation may change under us. 1937 */ 1938int 1939vm_pool_low(void) 1940{ 1941 /* No locking, at worst we will fib. */ 1942 return( vm_page_free_count <= vm_page_free_reserved ); 1943} 1944 1945 1946 1947/* 1948 * this is an interface to support bring-up of drivers 1949 * on platforms with physical memory > 4G... 1950 */ 1951int vm_himemory_mode = 2; 1952 1953 1954/* 1955 * this interface exists to support hardware controllers 1956 * incapable of generating DMAs with more than 32 bits 1957 * of address on platforms with physical memory > 4G... 1958 */ 1959unsigned int vm_lopages_allocated_q = 0; 1960unsigned int vm_lopages_allocated_cpm_success = 0; 1961unsigned int vm_lopages_allocated_cpm_failed = 0; 1962queue_head_t vm_lopage_queue_free; 1963 1964vm_page_t 1965vm_page_grablo(void) 1966{ 1967 vm_page_t mem; 1968 1969 if (vm_lopage_needed == FALSE) 1970 return (vm_page_grab()); 1971 1972 lck_mtx_lock_spin(&vm_page_queue_free_lock); 1973 1974 if ( !queue_empty(&vm_lopage_queue_free)) { 1975 queue_remove_first(&vm_lopage_queue_free, 1976 mem, 1977 vm_page_t, 1978 pageq); 1979 assert(vm_lopage_free_count); 1980 1981 vm_lopage_free_count--; 1982 vm_lopages_allocated_q++; 1983 1984 if (vm_lopage_free_count < vm_lopage_lowater) 1985 vm_lopage_refill = TRUE; 1986 1987 lck_mtx_unlock(&vm_page_queue_free_lock); 1988 } else { 1989 lck_mtx_unlock(&vm_page_queue_free_lock); 1990 1991 if (cpm_allocate(PAGE_SIZE, &mem, atop(0xffffffff), 0, FALSE, KMA_LOMEM) != KERN_SUCCESS) { 1992 1993 lck_mtx_lock_spin(&vm_page_queue_free_lock); 1994 vm_lopages_allocated_cpm_failed++; 1995 lck_mtx_unlock(&vm_page_queue_free_lock); 1996 1997 return (VM_PAGE_NULL); 1998 } 1999 mem->busy = TRUE; 2000 2001 vm_page_lockspin_queues(); 2002 2003 mem->gobbled = FALSE; 2004 vm_page_gobble_count--; 2005 vm_page_wire_count--; 2006 2007 vm_lopages_allocated_cpm_success++; 2008 vm_page_unlock_queues(); 2009 } 2010 assert(mem->busy); 2011 assert(!mem->free); 2012 assert(!mem->pmapped); 2013 assert(!mem->wpmapped); 2014 assert(!pmap_is_noencrypt(mem->phys_page)); 2015 2016 mem->pageq.next = NULL; 2017 mem->pageq.prev = NULL; 2018 2019 return (mem); 2020} 2021 2022 2023/* 2024 * vm_page_grab: 2025 * 2026 * first try to grab a page from the per-cpu free list... 2027 * this must be done while pre-emption is disabled... if 2028 * a page is available, we're done... 2029 * if no page is available, grab the vm_page_queue_free_lock 2030 * and see if current number of free pages would allow us 2031 * to grab at least 1... if not, return VM_PAGE_NULL as before... 2032 * if there are pages available, disable preemption and 2033 * recheck the state of the per-cpu free list... we could 2034 * have been preempted and moved to a different cpu, or 2035 * some other thread could have re-filled it... if still 2036 * empty, figure out how many pages we can steal from the 2037 * global free queue and move to the per-cpu queue... 2038 * return 1 of these pages when done... only wakeup the 2039 * pageout_scan thread if we moved pages from the global 2040 * list... no need for the wakeup if we've satisfied the 2041 * request from the per-cpu queue. 2042 */ 2043 2044 2045vm_page_t 2046vm_page_grab( void ) 2047{ 2048 vm_page_t mem; 2049 2050 2051 disable_preemption(); 2052 2053 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) { 2054return_page_from_cpu_list: 2055 PROCESSOR_DATA(current_processor(), page_grab_count) += 1; 2056 PROCESSOR_DATA(current_processor(), free_pages) = mem->pageq.next; 2057 2058 enable_preemption(); 2059 mem->pageq.next = NULL; 2060 2061 assert(mem->listq.next == NULL && mem->listq.prev == NULL); 2062 assert(mem->tabled == FALSE); 2063 assert(mem->object == VM_OBJECT_NULL); 2064 assert(!mem->laundry); 2065 assert(!mem->free); 2066 assert(pmap_verify_free(mem->phys_page)); 2067 assert(mem->busy); 2068 assert(!mem->encrypted); 2069 assert(!mem->pmapped); 2070 assert(!mem->wpmapped); 2071 assert(!mem->active); 2072 assert(!mem->inactive); 2073 assert(!mem->throttled); 2074 assert(!mem->speculative); 2075 assert(!pmap_is_noencrypt(mem->phys_page)); 2076 2077 return mem; 2078 } 2079 enable_preemption(); 2080 2081 2082 /* 2083 * Optionally produce warnings if the wire or gobble 2084 * counts exceed some threshold. 2085 */ 2086#if VM_PAGE_WIRE_COUNT_WARNING 2087 if (vm_page_wire_count >= VM_PAGE_WIRE_COUNT_WARNING) { 2088 printf("mk: vm_page_grab(): high wired page count of %d\n", 2089 vm_page_wire_count); 2090 } 2091#endif 2092#if VM_PAGE_GOBBLE_COUNT_WARNING 2093 if (vm_page_gobble_count >= VM_PAGE_GOBBLE_COUNT_WARNING) { 2094 printf("mk: vm_page_grab(): high gobbled page count of %d\n", 2095 vm_page_gobble_count); 2096 } 2097#endif 2098 lck_mtx_lock_spin(&vm_page_queue_free_lock); 2099 2100 /* 2101 * Only let privileged threads (involved in pageout) 2102 * dip into the reserved pool. 2103 */ 2104 if ((vm_page_free_count < vm_page_free_reserved) && 2105 !(current_thread()->options & TH_OPT_VMPRIV)) { 2106 lck_mtx_unlock(&vm_page_queue_free_lock); 2107 mem = VM_PAGE_NULL; 2108 } 2109 else { 2110 vm_page_t head; 2111 vm_page_t tail; 2112 unsigned int pages_to_steal; 2113 unsigned int color; 2114 2115 while ( vm_page_free_count == 0 ) { 2116 2117 lck_mtx_unlock(&vm_page_queue_free_lock); 2118 /* 2119 * must be a privileged thread to be 2120 * in this state since a non-privileged 2121 * thread would have bailed if we were 2122 * under the vm_page_free_reserved mark 2123 */ 2124 VM_PAGE_WAIT(); 2125 lck_mtx_lock_spin(&vm_page_queue_free_lock); 2126 } 2127 2128 disable_preemption(); 2129 2130 if ((mem = PROCESSOR_DATA(current_processor(), free_pages))) { 2131 lck_mtx_unlock(&vm_page_queue_free_lock); 2132 2133 /* 2134 * we got preempted and moved to another processor 2135 * or we got preempted and someone else ran and filled the cache 2136 */ 2137 goto return_page_from_cpu_list; 2138 } 2139 if (vm_page_free_count <= vm_page_free_reserved) 2140 pages_to_steal = 1; 2141 else { 2142 if (vm_free_magazine_refill_limit <= (vm_page_free_count - vm_page_free_reserved)) 2143 pages_to_steal = vm_free_magazine_refill_limit; 2144 else 2145 pages_to_steal = (vm_page_free_count - vm_page_free_reserved); 2146 } 2147 color = PROCESSOR_DATA(current_processor(), start_color); 2148 head = tail = NULL; 2149 2150 vm_page_free_count -= pages_to_steal; 2151 2152 while (pages_to_steal--) { 2153 2154 while (queue_empty(&vm_page_queue_free[color])) 2155 color = (color + 1) & vm_color_mask; 2156 2157 queue_remove_first(&vm_page_queue_free[color], 2158 mem, 2159 vm_page_t, 2160 pageq); 2161 mem->pageq.next = NULL; 2162 mem->pageq.prev = NULL; 2163 2164 assert(!mem->active); 2165 assert(!mem->inactive); 2166 assert(!mem->throttled); 2167 assert(!mem->speculative); 2168 2169 color = (color + 1) & vm_color_mask; 2170 2171 if (head == NULL) 2172 head = mem; 2173 else 2174 tail->pageq.next = (queue_t)mem; 2175 tail = mem; 2176 2177 assert(mem->listq.next == NULL && mem->listq.prev == NULL); 2178 assert(mem->tabled == FALSE); 2179 assert(mem->object == VM_OBJECT_NULL); 2180 assert(!mem->laundry); 2181 assert(mem->free); 2182 mem->free = FALSE; 2183 2184 assert(pmap_verify_free(mem->phys_page)); 2185 assert(mem->busy); 2186 assert(!mem->free); 2187 assert(!mem->encrypted); 2188 assert(!mem->pmapped); 2189 assert(!mem->wpmapped); 2190 assert(!pmap_is_noencrypt(mem->phys_page)); 2191 } 2192 lck_mtx_unlock(&vm_page_queue_free_lock); 2193 2194 PROCESSOR_DATA(current_processor(), free_pages) = head->pageq.next; 2195 PROCESSOR_DATA(current_processor(), start_color) = color; 2196 2197 /* 2198 * satisfy this request 2199 */ 2200 PROCESSOR_DATA(current_processor(), page_grab_count) += 1; 2201 mem = head; 2202 mem->pageq.next = NULL; 2203 2204 enable_preemption(); 2205 } 2206 /* 2207 * Decide if we should poke the pageout daemon. 2208 * We do this if the free count is less than the low 2209 * water mark, or if the free count is less than the high 2210 * water mark (but above the low water mark) and the inactive 2211 * count is less than its target. 2212 * 2213 * We don't have the counts locked ... if they change a little, 2214 * it doesn't really matter. 2215 */ 2216 if ((vm_page_free_count < vm_page_free_min) || 2217 ((vm_page_free_count < vm_page_free_target) && 2218 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min))) 2219 thread_wakeup((event_t) &vm_page_free_wanted); 2220 2221 VM_CHECK_MEMORYSTATUS; 2222 2223// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 4); /* (TEST/DEBUG) */ 2224 2225 return mem; 2226} 2227 2228/* 2229 * vm_page_release: 2230 * 2231 * Return a page to the free list. 2232 */ 2233 2234void 2235vm_page_release( 2236 register vm_page_t mem) 2237{ 2238 unsigned int color; 2239 int need_wakeup = 0; 2240 int need_priv_wakeup = 0; 2241 2242 2243 assert(!mem->private && !mem->fictitious); 2244 if (vm_page_free_verify) { 2245 assert(pmap_verify_free(mem->phys_page)); 2246 } 2247// dbgLog(mem->phys_page, vm_page_free_count, vm_page_wire_count, 5); /* (TEST/DEBUG) */ 2248 2249 pmap_clear_noencrypt(mem->phys_page); 2250 2251 lck_mtx_lock_spin(&vm_page_queue_free_lock); 2252#if DEBUG 2253 if (mem->free) 2254 panic("vm_page_release"); 2255#endif 2256 2257 assert(mem->busy); 2258 assert(!mem->laundry); 2259 assert(mem->object == VM_OBJECT_NULL); 2260 assert(mem->pageq.next == NULL && 2261 mem->pageq.prev == NULL); 2262 assert(mem->listq.next == NULL && 2263 mem->listq.prev == NULL); 2264 2265 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) && 2266 vm_lopage_free_count < vm_lopage_free_limit && 2267 mem->phys_page < max_valid_low_ppnum) { 2268 /* 2269 * this exists to support hardware controllers 2270 * incapable of generating DMAs with more than 32 bits 2271 * of address on platforms with physical memory > 4G... 2272 */ 2273 queue_enter_first(&vm_lopage_queue_free, 2274 mem, 2275 vm_page_t, 2276 pageq); 2277 vm_lopage_free_count++; 2278 2279 if (vm_lopage_free_count >= vm_lopage_free_limit) 2280 vm_lopage_refill = FALSE; 2281 2282 mem->lopage = TRUE; 2283 } else { 2284 mem->lopage = FALSE; 2285 mem->free = TRUE; 2286 2287 color = mem->phys_page & vm_color_mask; 2288 queue_enter_first(&vm_page_queue_free[color], 2289 mem, 2290 vm_page_t, 2291 pageq); 2292 vm_page_free_count++; 2293 /* 2294 * Check if we should wake up someone waiting for page. 2295 * But don't bother waking them unless they can allocate. 2296 * 2297 * We wakeup only one thread, to prevent starvation. 2298 * Because the scheduling system handles wait queues FIFO, 2299 * if we wakeup all waiting threads, one greedy thread 2300 * can starve multiple niceguy threads. When the threads 2301 * all wakeup, the greedy threads runs first, grabs the page, 2302 * and waits for another page. It will be the first to run 2303 * when the next page is freed. 2304 * 2305 * However, there is a slight danger here. 2306 * The thread we wake might not use the free page. 2307 * Then the other threads could wait indefinitely 2308 * while the page goes unused. To forestall this, 2309 * the pageout daemon will keep making free pages 2310 * as long as vm_page_free_wanted is non-zero. 2311 */ 2312 2313 assert(vm_page_free_count > 0); 2314 if (vm_page_free_wanted_privileged > 0) { 2315 vm_page_free_wanted_privileged--; 2316 need_priv_wakeup = 1; 2317 } else if (vm_page_free_wanted > 0 && 2318 vm_page_free_count > vm_page_free_reserved) { 2319 vm_page_free_wanted--; 2320 need_wakeup = 1; 2321 } 2322 } 2323 lck_mtx_unlock(&vm_page_queue_free_lock); 2324 2325 if (need_priv_wakeup) 2326 thread_wakeup_one((event_t) &vm_page_free_wanted_privileged); 2327 else if (need_wakeup) 2328 thread_wakeup_one((event_t) &vm_page_free_count); 2329 2330 VM_CHECK_MEMORYSTATUS; 2331} 2332 2333/* 2334 * This version of vm_page_release() is used only at startup 2335 * when we are single-threaded and pages are being released 2336 * for the first time. Hence, no locking or unnecessary checks are made. 2337 * Note: VM_CHECK_MEMORYSTATUS invoked by the caller. 2338 */ 2339void 2340vm_page_release_startup( 2341 register vm_page_t mem) 2342{ 2343 queue_t queue_free; 2344 2345 if (vm_lopage_free_count < vm_lopage_free_limit && 2346 mem->phys_page < max_valid_low_ppnum) { 2347 mem->lopage = TRUE; 2348 vm_lopage_free_count++; 2349 queue_free = &vm_lopage_queue_free; 2350 } else { 2351 mem->lopage = FALSE; 2352 mem->free = TRUE; 2353 vm_page_free_count++; 2354 queue_free = &vm_page_queue_free[mem->phys_page & vm_color_mask]; 2355 } 2356 queue_enter_first(queue_free, mem, vm_page_t, pageq); 2357} 2358 2359/* 2360 * vm_page_wait: 2361 * 2362 * Wait for a page to become available. 2363 * If there are plenty of free pages, then we don't sleep. 2364 * 2365 * Returns: 2366 * TRUE: There may be another page, try again 2367 * FALSE: We were interrupted out of our wait, don't try again 2368 */ 2369 2370boolean_t 2371vm_page_wait( 2372 int interruptible ) 2373{ 2374 /* 2375 * We can't use vm_page_free_reserved to make this 2376 * determination. Consider: some thread might 2377 * need to allocate two pages. The first allocation 2378 * succeeds, the second fails. After the first page is freed, 2379 * a call to vm_page_wait must really block. 2380 */ 2381 kern_return_t wait_result; 2382 int need_wakeup = 0; 2383 int is_privileged = current_thread()->options & TH_OPT_VMPRIV; 2384 2385 lck_mtx_lock_spin(&vm_page_queue_free_lock); 2386 2387 if (is_privileged && vm_page_free_count) { 2388 lck_mtx_unlock(&vm_page_queue_free_lock); 2389 return TRUE; 2390 } 2391 if (vm_page_free_count < vm_page_free_target) { 2392 2393 if (is_privileged) { 2394 if (vm_page_free_wanted_privileged++ == 0) 2395 need_wakeup = 1; 2396 wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, interruptible); 2397 } else { 2398 if (vm_page_free_wanted++ == 0) 2399 need_wakeup = 1; 2400 wait_result = assert_wait((event_t)&vm_page_free_count, interruptible); 2401 } 2402 lck_mtx_unlock(&vm_page_queue_free_lock); 2403 counter(c_vm_page_wait_block++); 2404 2405 if (need_wakeup) 2406 thread_wakeup((event_t)&vm_page_free_wanted); 2407 2408 if (wait_result == THREAD_WAITING) { 2409 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_START, 2410 vm_page_free_wanted_privileged, vm_page_free_wanted, 0, 0); 2411 wait_result = thread_block(THREAD_CONTINUE_NULL); 2412 VM_DEBUG_EVENT(vm_page_wait_block, VM_PAGE_WAIT_BLOCK, DBG_FUNC_END, 0, 0, 0, 0); 2413 } 2414 2415 return(wait_result == THREAD_AWAKENED); 2416 } else { 2417 lck_mtx_unlock(&vm_page_queue_free_lock); 2418 return TRUE; 2419 } 2420} 2421 2422/* 2423 * vm_page_alloc: 2424 * 2425 * Allocate and return a memory cell associated 2426 * with this VM object/offset pair. 2427 * 2428 * Object must be locked. 2429 */ 2430 2431vm_page_t 2432vm_page_alloc( 2433 vm_object_t object, 2434 vm_object_offset_t offset) 2435{ 2436 register vm_page_t mem; 2437 2438 vm_object_lock_assert_exclusive(object); 2439 mem = vm_page_grab(); 2440 if (mem == VM_PAGE_NULL) 2441 return VM_PAGE_NULL; 2442 2443 vm_page_insert(mem, object, offset); 2444 2445 return(mem); 2446} 2447 2448vm_page_t 2449vm_page_alloclo( 2450 vm_object_t object, 2451 vm_object_offset_t offset) 2452{ 2453 register vm_page_t mem; 2454 2455 vm_object_lock_assert_exclusive(object); 2456 mem = vm_page_grablo(); 2457 if (mem == VM_PAGE_NULL) 2458 return VM_PAGE_NULL; 2459 2460 vm_page_insert(mem, object, offset); 2461 2462 return(mem); 2463} 2464 2465 2466/* 2467 * vm_page_alloc_guard: 2468 * 2469 * Allocate a fictitious page which will be used 2470 * as a guard page. The page will be inserted into 2471 * the object and returned to the caller. 2472 */ 2473 2474vm_page_t 2475vm_page_alloc_guard( 2476 vm_object_t object, 2477 vm_object_offset_t offset) 2478{ 2479 register vm_page_t mem; 2480 2481 vm_object_lock_assert_exclusive(object); 2482 mem = vm_page_grab_guard(); 2483 if (mem == VM_PAGE_NULL) 2484 return VM_PAGE_NULL; 2485 2486 vm_page_insert(mem, object, offset); 2487 2488 return(mem); 2489} 2490 2491 2492counter(unsigned int c_laundry_pages_freed = 0;) 2493 2494/* 2495 * vm_page_free_prepare: 2496 * 2497 * Removes page from any queue it may be on 2498 * and disassociates it from its VM object. 2499 * 2500 * Object and page queues must be locked prior to entry. 2501 */ 2502static void 2503vm_page_free_prepare( 2504 vm_page_t mem) 2505{ 2506 vm_page_free_prepare_queues(mem); 2507 vm_page_free_prepare_object(mem, TRUE); 2508} 2509 2510 2511void 2512vm_page_free_prepare_queues( 2513 vm_page_t mem) 2514{ 2515 VM_PAGE_CHECK(mem); 2516 assert(!mem->free); 2517 assert(!mem->cleaning); 2518 2519#if MACH_ASSERT || DEBUG 2520 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 2521 if (mem->free) 2522 panic("vm_page_free: freeing page on free list\n"); 2523#endif /* MACH_ASSERT || DEBUG */ 2524 if (mem->object) { 2525 vm_object_lock_assert_exclusive(mem->object); 2526 } 2527 if (mem->laundry) { 2528 /* 2529 * We may have to free a page while it's being laundered 2530 * if we lost its pager (due to a forced unmount, for example). 2531 * We need to call vm_pageout_steal_laundry() before removing 2532 * the page from its VM object, so that we can remove it 2533 * from its pageout queue and adjust the laundry accounting 2534 */ 2535 vm_pageout_steal_laundry(mem, TRUE); 2536 counter(++c_laundry_pages_freed); 2537 } 2538 2539 VM_PAGE_QUEUES_REMOVE(mem); /* clears local/active/inactive/throttled/speculative */ 2540 2541 if (VM_PAGE_WIRED(mem)) { 2542 if (mem->object) { 2543 assert(mem->object->wired_page_count > 0); 2544 mem->object->wired_page_count--; 2545 assert(mem->object->resident_page_count >= 2546 mem->object->wired_page_count); 2547 2548 if (mem->object->purgable == VM_PURGABLE_VOLATILE) { 2549 OSAddAtomic(+1, &vm_page_purgeable_count); 2550 assert(vm_page_purgeable_wired_count > 0); 2551 OSAddAtomic(-1, &vm_page_purgeable_wired_count); 2552 } 2553 if ((mem->object->purgable == VM_PURGABLE_VOLATILE || 2554 mem->object->purgable == VM_PURGABLE_EMPTY) && 2555 mem->object->vo_purgeable_owner != TASK_NULL) { 2556 task_t owner; 2557 2558 owner = mem->object->vo_purgeable_owner; 2559 /* 2560 * While wired, this page was accounted 2561 * as "non-volatile" but it should now 2562 * be accounted as "volatile". 2563 */ 2564 /* one less "non-volatile"... */ 2565 ledger_debit(owner->ledger, 2566 task_ledgers.purgeable_nonvolatile, 2567 PAGE_SIZE); 2568 /* ... and "phys_footprint" */ 2569 ledger_debit(owner->ledger, 2570 task_ledgers.phys_footprint, 2571 PAGE_SIZE); 2572 /* one more "volatile" */ 2573 ledger_credit(owner->ledger, 2574 task_ledgers.purgeable_volatile, 2575 PAGE_SIZE); 2576 } 2577 } 2578 if (!mem->private && !mem->fictitious) 2579 vm_page_wire_count--; 2580 mem->wire_count = 0; 2581 assert(!mem->gobbled); 2582 } else if (mem->gobbled) { 2583 if (!mem->private && !mem->fictitious) 2584 vm_page_wire_count--; 2585 vm_page_gobble_count--; 2586 } 2587} 2588 2589 2590void 2591vm_page_free_prepare_object( 2592 vm_page_t mem, 2593 boolean_t remove_from_hash) 2594{ 2595 if (mem->tabled) 2596 vm_page_remove(mem, remove_from_hash); /* clears tabled, object, offset */ 2597 2598 PAGE_WAKEUP(mem); /* clears wanted */ 2599 2600 if (mem->private) { 2601 mem->private = FALSE; 2602 mem->fictitious = TRUE; 2603 mem->phys_page = vm_page_fictitious_addr; 2604 } 2605 if ( !mem->fictitious) { 2606 vm_page_init(mem, mem->phys_page, mem->lopage); 2607 } 2608} 2609 2610 2611/* 2612 * vm_page_free: 2613 * 2614 * Returns the given page to the free list, 2615 * disassociating it with any VM object. 2616 * 2617 * Object and page queues must be locked prior to entry. 2618 */ 2619void 2620vm_page_free( 2621 vm_page_t mem) 2622{ 2623 vm_page_free_prepare(mem); 2624 2625 if (mem->fictitious) { 2626 vm_page_release_fictitious(mem); 2627 } else { 2628 vm_page_release(mem); 2629 } 2630} 2631 2632 2633void 2634vm_page_free_unlocked( 2635 vm_page_t mem, 2636 boolean_t remove_from_hash) 2637{ 2638 vm_page_lockspin_queues(); 2639 vm_page_free_prepare_queues(mem); 2640 vm_page_unlock_queues(); 2641 2642 vm_page_free_prepare_object(mem, remove_from_hash); 2643 2644 if (mem->fictitious) { 2645 vm_page_release_fictitious(mem); 2646 } else { 2647 vm_page_release(mem); 2648 } 2649} 2650 2651 2652/* 2653 * Free a list of pages. The list can be up to several hundred pages, 2654 * as blocked up by vm_pageout_scan(). 2655 * The big win is not having to take the free list lock once 2656 * per page. 2657 */ 2658void 2659vm_page_free_list( 2660 vm_page_t freeq, 2661 boolean_t prepare_object) 2662{ 2663 vm_page_t mem; 2664 vm_page_t nxt; 2665 vm_page_t local_freeq; 2666 int pg_count; 2667 2668 while (freeq) { 2669 2670 pg_count = 0; 2671 local_freeq = VM_PAGE_NULL; 2672 mem = freeq; 2673 2674 /* 2675 * break up the processing into smaller chunks so 2676 * that we can 'pipeline' the pages onto the 2677 * free list w/o introducing too much 2678 * contention on the global free queue lock 2679 */ 2680 while (mem && pg_count < 64) { 2681 2682 assert(!mem->inactive); 2683 assert(!mem->active); 2684 assert(!mem->throttled); 2685 assert(!mem->free); 2686 assert(!mem->speculative); 2687 assert(!VM_PAGE_WIRED(mem)); 2688 assert(mem->pageq.prev == NULL); 2689 2690 nxt = (vm_page_t)(mem->pageq.next); 2691 2692 if (vm_page_free_verify && !mem->fictitious && !mem->private) { 2693 assert(pmap_verify_free(mem->phys_page)); 2694 } 2695 if (prepare_object == TRUE) 2696 vm_page_free_prepare_object(mem, TRUE); 2697 2698 if (!mem->fictitious) { 2699 assert(mem->busy); 2700 2701 if ((mem->lopage == TRUE || vm_lopage_refill == TRUE) && 2702 vm_lopage_free_count < vm_lopage_free_limit && 2703 mem->phys_page < max_valid_low_ppnum) { 2704 mem->pageq.next = NULL; 2705 vm_page_release(mem); 2706 } else { 2707 /* 2708 * IMPORTANT: we can't set the page "free" here 2709 * because that would make the page eligible for 2710 * a physically-contiguous allocation (see 2711 * vm_page_find_contiguous()) right away (we don't 2712 * hold the vm_page_queue_free lock). That would 2713 * cause trouble because the page is not actually 2714 * in the free queue yet... 2715 */ 2716 mem->pageq.next = (queue_entry_t)local_freeq; 2717 local_freeq = mem; 2718 pg_count++; 2719 2720 pmap_clear_noencrypt(mem->phys_page); 2721 } 2722 } else { 2723 assert(mem->phys_page == vm_page_fictitious_addr || 2724 mem->phys_page == vm_page_guard_addr); 2725 vm_page_release_fictitious(mem); 2726 } 2727 mem = nxt; 2728 } 2729 freeq = mem; 2730 2731 if ( (mem = local_freeq) ) { 2732 unsigned int avail_free_count; 2733 unsigned int need_wakeup = 0; 2734 unsigned int need_priv_wakeup = 0; 2735 2736 lck_mtx_lock_spin(&vm_page_queue_free_lock); 2737 2738 while (mem) { 2739 int color; 2740 2741 nxt = (vm_page_t)(mem->pageq.next); 2742 2743 assert(!mem->free); 2744 assert(mem->busy); 2745 mem->free = TRUE; 2746 2747 color = mem->phys_page & vm_color_mask; 2748 queue_enter_first(&vm_page_queue_free[color], 2749 mem, 2750 vm_page_t, 2751 pageq); 2752 mem = nxt; 2753 } 2754 vm_page_free_count += pg_count; 2755 avail_free_count = vm_page_free_count; 2756 2757 if (vm_page_free_wanted_privileged > 0 && avail_free_count > 0) { 2758 2759 if (avail_free_count < vm_page_free_wanted_privileged) { 2760 need_priv_wakeup = avail_free_count; 2761 vm_page_free_wanted_privileged -= avail_free_count; 2762 avail_free_count = 0; 2763 } else { 2764 need_priv_wakeup = vm_page_free_wanted_privileged; 2765 vm_page_free_wanted_privileged = 0; 2766 avail_free_count -= vm_page_free_wanted_privileged; 2767 } 2768 } 2769 if (vm_page_free_wanted > 0 && avail_free_count > vm_page_free_reserved) { 2770 unsigned int available_pages; 2771 2772 available_pages = avail_free_count - vm_page_free_reserved; 2773 2774 if (available_pages >= vm_page_free_wanted) { 2775 need_wakeup = vm_page_free_wanted; 2776 vm_page_free_wanted = 0; 2777 } else { 2778 need_wakeup = available_pages; 2779 vm_page_free_wanted -= available_pages; 2780 } 2781 } 2782 lck_mtx_unlock(&vm_page_queue_free_lock); 2783 2784 if (need_priv_wakeup != 0) { 2785 /* 2786 * There shouldn't be that many VM-privileged threads, 2787 * so let's wake them all up, even if we don't quite 2788 * have enough pages to satisfy them all. 2789 */ 2790 thread_wakeup((event_t)&vm_page_free_wanted_privileged); 2791 } 2792 if (need_wakeup != 0 && vm_page_free_wanted == 0) { 2793 /* 2794 * We don't expect to have any more waiters 2795 * after this, so let's wake them all up at 2796 * once. 2797 */ 2798 thread_wakeup((event_t) &vm_page_free_count); 2799 } else for (; need_wakeup != 0; need_wakeup--) { 2800 /* 2801 * Wake up one waiter per page we just released. 2802 */ 2803 thread_wakeup_one((event_t) &vm_page_free_count); 2804 } 2805 2806 VM_CHECK_MEMORYSTATUS; 2807 } 2808 } 2809} 2810 2811 2812/* 2813 * vm_page_wire: 2814 * 2815 * Mark this page as wired down by yet 2816 * another map, removing it from paging queues 2817 * as necessary. 2818 * 2819 * The page's object and the page queues must be locked. 2820 */ 2821void 2822vm_page_wire( 2823 register vm_page_t mem) 2824{ 2825 2826// dbgLog(current_thread(), mem->offset, mem->object, 1); /* (TEST/DEBUG) */ 2827 2828 VM_PAGE_CHECK(mem); 2829 if (mem->object) { 2830 vm_object_lock_assert_exclusive(mem->object); 2831 } else { 2832 /* 2833 * In theory, the page should be in an object before it 2834 * gets wired, since we need to hold the object lock 2835 * to update some fields in the page structure. 2836 * However, some code (i386 pmap, for example) might want 2837 * to wire a page before it gets inserted into an object. 2838 * That's somewhat OK, as long as nobody else can get to 2839 * that page and update it at the same time. 2840 */ 2841 } 2842#if DEBUG 2843 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 2844#endif 2845 if ( !VM_PAGE_WIRED(mem)) { 2846 2847 if (mem->pageout_queue) { 2848 mem->pageout = FALSE; 2849 vm_pageout_throttle_up(mem); 2850 } 2851 VM_PAGE_QUEUES_REMOVE(mem); 2852 2853 if (mem->object) { 2854 mem->object->wired_page_count++; 2855 assert(mem->object->resident_page_count >= 2856 mem->object->wired_page_count); 2857 if (mem->object->purgable == VM_PURGABLE_VOLATILE) { 2858 assert(vm_page_purgeable_count > 0); 2859 OSAddAtomic(-1, &vm_page_purgeable_count); 2860 OSAddAtomic(1, &vm_page_purgeable_wired_count); 2861 } 2862 if ((mem->object->purgable == VM_PURGABLE_VOLATILE || 2863 mem->object->purgable == VM_PURGABLE_EMPTY) && 2864 mem->object->vo_purgeable_owner != TASK_NULL) { 2865 task_t owner; 2866 2867 owner = mem->object->vo_purgeable_owner; 2868 /* less volatile bytes */ 2869 ledger_debit(owner->ledger, 2870 task_ledgers.purgeable_volatile, 2871 PAGE_SIZE); 2872 /* more not-quite-volatile bytes */ 2873 ledger_credit(owner->ledger, 2874 task_ledgers.purgeable_nonvolatile, 2875 PAGE_SIZE); 2876 /* more footprint */ 2877 ledger_credit(owner->ledger, 2878 task_ledgers.phys_footprint, 2879 PAGE_SIZE); 2880 } 2881 if (mem->object->all_reusable) { 2882 /* 2883 * Wired pages are not counted as "re-usable" 2884 * in "all_reusable" VM objects, so nothing 2885 * to do here. 2886 */ 2887 } else if (mem->reusable) { 2888 /* 2889 * This page is not "re-usable" when it's 2890 * wired, so adjust its state and the 2891 * accounting. 2892 */ 2893 vm_object_reuse_pages(mem->object, 2894 mem->offset, 2895 mem->offset+PAGE_SIZE_64, 2896 FALSE); 2897 } 2898 } 2899 assert(!mem->reusable); 2900 2901 if (!mem->private && !mem->fictitious && !mem->gobbled) 2902 vm_page_wire_count++; 2903 if (mem->gobbled) 2904 vm_page_gobble_count--; 2905 mem->gobbled = FALSE; 2906 2907 VM_CHECK_MEMORYSTATUS; 2908 2909 /* 2910 * ENCRYPTED SWAP: 2911 * The page could be encrypted, but 2912 * We don't have to decrypt it here 2913 * because we don't guarantee that the 2914 * data is actually valid at this point. 2915 * The page will get decrypted in 2916 * vm_fault_wire() if needed. 2917 */ 2918 } 2919 assert(!mem->gobbled); 2920 mem->wire_count++; 2921 VM_PAGE_CHECK(mem); 2922} 2923 2924/* 2925 * vm_page_gobble: 2926 * 2927 * Mark this page as consumed by the vm/ipc/xmm subsystems. 2928 * 2929 * Called only for freshly vm_page_grab()ed pages - w/ nothing locked. 2930 */ 2931void 2932vm_page_gobble( 2933 register vm_page_t mem) 2934{ 2935 vm_page_lockspin_queues(); 2936 VM_PAGE_CHECK(mem); 2937 2938 assert(!mem->gobbled); 2939 assert( !VM_PAGE_WIRED(mem)); 2940 2941 if (!mem->gobbled && !VM_PAGE_WIRED(mem)) { 2942 if (!mem->private && !mem->fictitious) 2943 vm_page_wire_count++; 2944 } 2945 vm_page_gobble_count++; 2946 mem->gobbled = TRUE; 2947 vm_page_unlock_queues(); 2948} 2949 2950/* 2951 * vm_page_unwire: 2952 * 2953 * Release one wiring of this page, potentially 2954 * enabling it to be paged again. 2955 * 2956 * The page's object and the page queues must be locked. 2957 */ 2958void 2959vm_page_unwire( 2960 vm_page_t mem, 2961 boolean_t queueit) 2962{ 2963 2964// dbgLog(current_thread(), mem->offset, mem->object, 0); /* (TEST/DEBUG) */ 2965 2966 VM_PAGE_CHECK(mem); 2967 assert(VM_PAGE_WIRED(mem)); 2968 assert(mem->object != VM_OBJECT_NULL); 2969#if DEBUG 2970 vm_object_lock_assert_exclusive(mem->object); 2971 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 2972#endif 2973 if (--mem->wire_count == 0) { 2974 assert(!mem->private && !mem->fictitious); 2975 vm_page_wire_count--; 2976 assert(mem->object->wired_page_count > 0); 2977 mem->object->wired_page_count--; 2978 assert(mem->object->resident_page_count >= 2979 mem->object->wired_page_count); 2980 if (mem->object->purgable == VM_PURGABLE_VOLATILE) { 2981 OSAddAtomic(+1, &vm_page_purgeable_count); 2982 assert(vm_page_purgeable_wired_count > 0); 2983 OSAddAtomic(-1, &vm_page_purgeable_wired_count); 2984 } 2985 if ((mem->object->purgable == VM_PURGABLE_VOLATILE || 2986 mem->object->purgable == VM_PURGABLE_EMPTY) && 2987 mem->object->vo_purgeable_owner != TASK_NULL) { 2988 task_t owner; 2989 2990 owner = mem->object->vo_purgeable_owner; 2991 /* more volatile bytes */ 2992 ledger_credit(owner->ledger, 2993 task_ledgers.purgeable_volatile, 2994 PAGE_SIZE); 2995 /* less not-quite-volatile bytes */ 2996 ledger_debit(owner->ledger, 2997 task_ledgers.purgeable_nonvolatile, 2998 PAGE_SIZE); 2999 /* less footprint */ 3000 ledger_debit(owner->ledger, 3001 task_ledgers.phys_footprint, 3002 PAGE_SIZE); 3003 } 3004 assert(mem->object != kernel_object); 3005 assert(mem->pageq.next == NULL && mem->pageq.prev == NULL); 3006 3007 if (queueit == TRUE) { 3008 if (mem->object->purgable == VM_PURGABLE_EMPTY) { 3009 vm_page_deactivate(mem); 3010 } else { 3011 vm_page_activate(mem); 3012 } 3013 } 3014 3015 VM_CHECK_MEMORYSTATUS; 3016 3017 } 3018 VM_PAGE_CHECK(mem); 3019} 3020 3021/* 3022 * vm_page_deactivate: 3023 * 3024 * Returns the given page to the inactive list, 3025 * indicating that no physical maps have access 3026 * to this page. [Used by the physical mapping system.] 3027 * 3028 * The page queues must be locked. 3029 */ 3030void 3031vm_page_deactivate( 3032 vm_page_t m) 3033{ 3034 vm_page_deactivate_internal(m, TRUE); 3035} 3036 3037 3038void 3039vm_page_deactivate_internal( 3040 vm_page_t m, 3041 boolean_t clear_hw_reference) 3042{ 3043 3044 VM_PAGE_CHECK(m); 3045 assert(m->object != kernel_object); 3046 assert(m->phys_page != vm_page_guard_addr); 3047 3048// dbgLog(m->phys_page, vm_page_free_count, vm_page_wire_count, 6); /* (TEST/DEBUG) */ 3049#if DEBUG 3050 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 3051#endif 3052 /* 3053 * This page is no longer very interesting. If it was 3054 * interesting (active or inactive/referenced), then we 3055 * clear the reference bit and (re)enter it in the 3056 * inactive queue. Note wired pages should not have 3057 * their reference bit cleared. 3058 */ 3059 assert ( !(m->absent && !m->unusual)); 3060 3061 if (m->gobbled) { /* can this happen? */ 3062 assert( !VM_PAGE_WIRED(m)); 3063 3064 if (!m->private && !m->fictitious) 3065 vm_page_wire_count--; 3066 vm_page_gobble_count--; 3067 m->gobbled = FALSE; 3068 } 3069 /* 3070 * if this page is currently on the pageout queue, we can't do the 3071 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case) 3072 * and we can't remove it manually since we would need the object lock 3073 * (which is not required here) to decrement the activity_in_progress 3074 * reference which is held on the object while the page is in the pageout queue... 3075 * just let the normal laundry processing proceed 3076 */ 3077 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor || (VM_PAGE_WIRED(m))) 3078 return; 3079 3080 if (!m->absent && clear_hw_reference == TRUE) 3081 pmap_clear_reference(m->phys_page); 3082 3083 m->reference = FALSE; 3084 m->no_cache = FALSE; 3085 3086 if (!m->inactive) { 3087 VM_PAGE_QUEUES_REMOVE(m); 3088 3089 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && 3090 m->dirty && m->object->internal && 3091 (m->object->purgable == VM_PURGABLE_DENY || 3092 m->object->purgable == VM_PURGABLE_NONVOLATILE || 3093 m->object->purgable == VM_PURGABLE_VOLATILE)) { 3094 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq); 3095 m->throttled = TRUE; 3096 vm_page_throttled_count++; 3097 } else { 3098 if (m->object->named && m->object->ref_count == 1) { 3099 vm_page_speculate(m, FALSE); 3100#if DEVELOPMENT || DEBUG 3101 vm_page_speculative_recreated++; 3102#endif 3103 } else { 3104 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE); 3105 } 3106 } 3107 } 3108} 3109 3110/* 3111 * vm_page_enqueue_cleaned 3112 * 3113 * Put the page on the cleaned queue, mark it cleaned, etc. 3114 * Being on the cleaned queue (and having m->clean_queue set) 3115 * does ** NOT ** guarantee that the page is clean! 3116 * 3117 * Call with the queues lock held. 3118 */ 3119 3120void vm_page_enqueue_cleaned(vm_page_t m) 3121{ 3122 assert(m->phys_page != vm_page_guard_addr); 3123#if DEBUG 3124 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 3125#endif 3126 assert( !(m->absent && !m->unusual)); 3127 3128 if (m->gobbled) { 3129 assert( !VM_PAGE_WIRED(m)); 3130 if (!m->private && !m->fictitious) 3131 vm_page_wire_count--; 3132 vm_page_gobble_count--; 3133 m->gobbled = FALSE; 3134 } 3135 /* 3136 * if this page is currently on the pageout queue, we can't do the 3137 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case) 3138 * and we can't remove it manually since we would need the object lock 3139 * (which is not required here) to decrement the activity_in_progress 3140 * reference which is held on the object while the page is in the pageout queue... 3141 * just let the normal laundry processing proceed 3142 */ 3143 if (m->laundry || m->clean_queue || m->pageout_queue || m->private || m->fictitious) 3144 return; 3145 3146 VM_PAGE_QUEUES_REMOVE(m); 3147 3148 queue_enter(&vm_page_queue_cleaned, m, vm_page_t, pageq); 3149 m->clean_queue = TRUE; 3150 vm_page_cleaned_count++; 3151 3152 m->inactive = TRUE; 3153 vm_page_inactive_count++; 3154 if (m->object->internal) { 3155 vm_page_pageable_internal_count++; 3156 } else { 3157 vm_page_pageable_external_count++; 3158 } 3159 3160 vm_pageout_enqueued_cleaned++; 3161} 3162 3163/* 3164 * vm_page_activate: 3165 * 3166 * Put the specified page on the active list (if appropriate). 3167 * 3168 * The page queues must be locked. 3169 */ 3170 3171void 3172vm_page_activate( 3173 register vm_page_t m) 3174{ 3175 VM_PAGE_CHECK(m); 3176#ifdef FIXME_4778297 3177 assert(m->object != kernel_object); 3178#endif 3179 assert(m->phys_page != vm_page_guard_addr); 3180#if DEBUG 3181 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 3182#endif 3183 assert( !(m->absent && !m->unusual)); 3184 3185 if (m->gobbled) { 3186 assert( !VM_PAGE_WIRED(m)); 3187 if (!m->private && !m->fictitious) 3188 vm_page_wire_count--; 3189 vm_page_gobble_count--; 3190 m->gobbled = FALSE; 3191 } 3192 /* 3193 * if this page is currently on the pageout queue, we can't do the 3194 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case) 3195 * and we can't remove it manually since we would need the object lock 3196 * (which is not required here) to decrement the activity_in_progress 3197 * reference which is held on the object while the page is in the pageout queue... 3198 * just let the normal laundry processing proceed 3199 */ 3200 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor) 3201 return; 3202 3203#if DEBUG 3204 if (m->active) 3205 panic("vm_page_activate: already active"); 3206#endif 3207 3208 if (m->speculative) { 3209 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL); 3210 DTRACE_VM2(pgfrec, int, 1, (uint64_t *), NULL); 3211 } 3212 3213 VM_PAGE_QUEUES_REMOVE(m); 3214 3215 if ( !VM_PAGE_WIRED(m)) { 3216 3217 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default) && 3218 m->dirty && m->object->internal && 3219 (m->object->purgable == VM_PURGABLE_DENY || 3220 m->object->purgable == VM_PURGABLE_NONVOLATILE || 3221 m->object->purgable == VM_PURGABLE_VOLATILE)) { 3222 queue_enter(&vm_page_queue_throttled, m, vm_page_t, pageq); 3223 m->throttled = TRUE; 3224 vm_page_throttled_count++; 3225 } else { 3226 queue_enter(&vm_page_queue_active, m, vm_page_t, pageq); 3227 m->active = TRUE; 3228 vm_page_active_count++; 3229 if (m->object->internal) { 3230 vm_page_pageable_internal_count++; 3231 } else { 3232 vm_page_pageable_external_count++; 3233 } 3234 } 3235 m->reference = TRUE; 3236 m->no_cache = FALSE; 3237 } 3238 VM_PAGE_CHECK(m); 3239} 3240 3241 3242/* 3243 * vm_page_speculate: 3244 * 3245 * Put the specified page on the speculative list (if appropriate). 3246 * 3247 * The page queues must be locked. 3248 */ 3249void 3250vm_page_speculate( 3251 vm_page_t m, 3252 boolean_t new) 3253{ 3254 struct vm_speculative_age_q *aq; 3255 3256 VM_PAGE_CHECK(m); 3257 assert(m->object != kernel_object); 3258 assert(m->phys_page != vm_page_guard_addr); 3259#if DEBUG 3260 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 3261#endif 3262 assert( !(m->absent && !m->unusual)); 3263 3264 /* 3265 * if this page is currently on the pageout queue, we can't do the 3266 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case) 3267 * and we can't remove it manually since we would need the object lock 3268 * (which is not required here) to decrement the activity_in_progress 3269 * reference which is held on the object while the page is in the pageout queue... 3270 * just let the normal laundry processing proceed 3271 */ 3272 if (m->laundry || m->pageout_queue || m->private || m->fictitious || m->compressor) 3273 return; 3274 3275 VM_PAGE_QUEUES_REMOVE(m); 3276 3277 if ( !VM_PAGE_WIRED(m)) { 3278 mach_timespec_t ts; 3279 clock_sec_t sec; 3280 clock_nsec_t nsec; 3281 3282 clock_get_system_nanotime(&sec, &nsec); 3283 ts.tv_sec = (unsigned int) sec; 3284 ts.tv_nsec = nsec; 3285 3286 if (vm_page_speculative_count == 0) { 3287 3288 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q; 3289 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q; 3290 3291 aq = &vm_page_queue_speculative[speculative_age_index]; 3292 3293 /* 3294 * set the timer to begin a new group 3295 */ 3296 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000; 3297 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC; 3298 3299 ADD_MACH_TIMESPEC(&aq->age_ts, &ts); 3300 } else { 3301 aq = &vm_page_queue_speculative[speculative_age_index]; 3302 3303 if (CMP_MACH_TIMESPEC(&ts, &aq->age_ts) >= 0) { 3304 3305 speculative_age_index++; 3306 3307 if (speculative_age_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) 3308 speculative_age_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q; 3309 if (speculative_age_index == speculative_steal_index) { 3310 speculative_steal_index = speculative_age_index + 1; 3311 3312 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) 3313 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q; 3314 } 3315 aq = &vm_page_queue_speculative[speculative_age_index]; 3316 3317 if (!queue_empty(&aq->age_q)) 3318 vm_page_speculate_ageit(aq); 3319 3320 aq->age_ts.tv_sec = vm_page_speculative_q_age_ms / 1000; 3321 aq->age_ts.tv_nsec = (vm_page_speculative_q_age_ms % 1000) * 1000 * NSEC_PER_USEC; 3322 3323 ADD_MACH_TIMESPEC(&aq->age_ts, &ts); 3324 } 3325 } 3326 enqueue_tail(&aq->age_q, &m->pageq); 3327 m->speculative = TRUE; 3328 vm_page_speculative_count++; 3329 if (m->object->internal) { 3330 vm_page_pageable_internal_count++; 3331 } else { 3332 vm_page_pageable_external_count++; 3333 } 3334 3335 if (new == TRUE) { 3336 vm_object_lock_assert_exclusive(m->object); 3337 3338 m->object->pages_created++; 3339#if DEVELOPMENT || DEBUG 3340 vm_page_speculative_created++; 3341#endif 3342 } 3343 } 3344 VM_PAGE_CHECK(m); 3345} 3346 3347 3348/* 3349 * move pages from the specified aging bin to 3350 * the speculative bin that pageout_scan claims from 3351 * 3352 * The page queues must be locked. 3353 */ 3354void 3355vm_page_speculate_ageit(struct vm_speculative_age_q *aq) 3356{ 3357 struct vm_speculative_age_q *sq; 3358 vm_page_t t; 3359 3360 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; 3361 3362 if (queue_empty(&sq->age_q)) { 3363 sq->age_q.next = aq->age_q.next; 3364 sq->age_q.prev = aq->age_q.prev; 3365 3366 t = (vm_page_t)sq->age_q.next; 3367 t->pageq.prev = &sq->age_q; 3368 3369 t = (vm_page_t)sq->age_q.prev; 3370 t->pageq.next = &sq->age_q; 3371 } else { 3372 t = (vm_page_t)sq->age_q.prev; 3373 t->pageq.next = aq->age_q.next; 3374 3375 t = (vm_page_t)aq->age_q.next; 3376 t->pageq.prev = sq->age_q.prev; 3377 3378 t = (vm_page_t)aq->age_q.prev; 3379 t->pageq.next = &sq->age_q; 3380 3381 sq->age_q.prev = aq->age_q.prev; 3382 } 3383 queue_init(&aq->age_q); 3384} 3385 3386 3387void 3388vm_page_lru( 3389 vm_page_t m) 3390{ 3391 VM_PAGE_CHECK(m); 3392 assert(m->object != kernel_object); 3393 assert(m->phys_page != vm_page_guard_addr); 3394 3395#if DEBUG 3396 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 3397#endif 3398 /* 3399 * if this page is currently on the pageout queue, we can't do the 3400 * VM_PAGE_QUEUES_REMOVE (which doesn't handle the pageout queue case) 3401 * and we can't remove it manually since we would need the object lock 3402 * (which is not required here) to decrement the activity_in_progress 3403 * reference which is held on the object while the page is in the pageout queue... 3404 * just let the normal laundry processing proceed 3405 */ 3406 if (m->laundry || m->pageout_queue || m->private || m->compressor || (VM_PAGE_WIRED(m))) 3407 return; 3408 3409 m->no_cache = FALSE; 3410 3411 VM_PAGE_QUEUES_REMOVE(m); 3412 3413 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE); 3414} 3415 3416 3417void 3418vm_page_reactivate_all_throttled(void) 3419{ 3420 vm_page_t first_throttled, last_throttled; 3421 vm_page_t first_active; 3422 vm_page_t m; 3423 int extra_active_count; 3424 int extra_internal_count, extra_external_count; 3425 3426 if (!VM_DYNAMIC_PAGING_ENABLED(memory_manager_default)) 3427 return; 3428 3429 extra_active_count = 0; 3430 extra_internal_count = 0; 3431 extra_external_count = 0; 3432 vm_page_lock_queues(); 3433 if (! queue_empty(&vm_page_queue_throttled)) { 3434 /* 3435 * Switch "throttled" pages to "active". 3436 */ 3437 queue_iterate(&vm_page_queue_throttled, m, vm_page_t, pageq) { 3438 VM_PAGE_CHECK(m); 3439 assert(m->throttled); 3440 assert(!m->active); 3441 assert(!m->inactive); 3442 assert(!m->speculative); 3443 assert(!VM_PAGE_WIRED(m)); 3444 3445 extra_active_count++; 3446 if (m->object->internal) { 3447 extra_internal_count++; 3448 } else { 3449 extra_external_count++; 3450 } 3451 3452 m->throttled = FALSE; 3453 m->active = TRUE; 3454 VM_PAGE_CHECK(m); 3455 } 3456 3457 /* 3458 * Transfer the entire throttled queue to a regular LRU page queues. 3459 * We insert it at the head of the active queue, so that these pages 3460 * get re-evaluated by the LRU algorithm first, since they've been 3461 * completely out of it until now. 3462 */ 3463 first_throttled = (vm_page_t) queue_first(&vm_page_queue_throttled); 3464 last_throttled = (vm_page_t) queue_last(&vm_page_queue_throttled); 3465 first_active = (vm_page_t) queue_first(&vm_page_queue_active); 3466 if (queue_empty(&vm_page_queue_active)) { 3467 queue_last(&vm_page_queue_active) = (queue_entry_t) last_throttled; 3468 } else { 3469 queue_prev(&first_active->pageq) = (queue_entry_t) last_throttled; 3470 } 3471 queue_first(&vm_page_queue_active) = (queue_entry_t) first_throttled; 3472 queue_prev(&first_throttled->pageq) = (queue_entry_t) &vm_page_queue_active; 3473 queue_next(&last_throttled->pageq) = (queue_entry_t) first_active; 3474 3475#if DEBUG 3476 printf("reactivated %d throttled pages\n", vm_page_throttled_count); 3477#endif 3478 queue_init(&vm_page_queue_throttled); 3479 /* 3480 * Adjust the global page counts. 3481 */ 3482 vm_page_active_count += extra_active_count; 3483 vm_page_pageable_internal_count += extra_internal_count; 3484 vm_page_pageable_external_count += extra_external_count; 3485 vm_page_throttled_count = 0; 3486 } 3487 assert(vm_page_throttled_count == 0); 3488 assert(queue_empty(&vm_page_queue_throttled)); 3489 vm_page_unlock_queues(); 3490} 3491 3492 3493/* 3494 * move pages from the indicated local queue to the global active queue 3495 * its ok to fail if we're below the hard limit and force == FALSE 3496 * the nolocks == TRUE case is to allow this function to be run on 3497 * the hibernate path 3498 */ 3499 3500void 3501vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks) 3502{ 3503 struct vpl *lq; 3504 vm_page_t first_local, last_local; 3505 vm_page_t first_active; 3506 vm_page_t m; 3507 uint32_t count = 0; 3508 3509 if (vm_page_local_q == NULL) 3510 return; 3511 3512 lq = &vm_page_local_q[lid].vpl_un.vpl; 3513 3514 if (nolocks == FALSE) { 3515 if (lq->vpl_count < vm_page_local_q_hard_limit && force == FALSE) { 3516 if ( !vm_page_trylockspin_queues()) 3517 return; 3518 } else 3519 vm_page_lockspin_queues(); 3520 3521 VPL_LOCK(&lq->vpl_lock); 3522 } 3523 if (lq->vpl_count) { 3524 /* 3525 * Switch "local" pages to "active". 3526 */ 3527 assert(!queue_empty(&lq->vpl_queue)); 3528 3529 queue_iterate(&lq->vpl_queue, m, vm_page_t, pageq) { 3530 VM_PAGE_CHECK(m); 3531 assert(m->local); 3532 assert(!m->active); 3533 assert(!m->inactive); 3534 assert(!m->speculative); 3535 assert(!VM_PAGE_WIRED(m)); 3536 assert(!m->throttled); 3537 assert(!m->fictitious); 3538 3539 if (m->local_id != lid) 3540 panic("vm_page_reactivate_local: found vm_page_t(%p) with wrong cpuid", m); 3541 3542 m->local_id = 0; 3543 m->local = FALSE; 3544 m->active = TRUE; 3545 VM_PAGE_CHECK(m); 3546 3547 count++; 3548 } 3549 if (count != lq->vpl_count) 3550 panic("vm_page_reactivate_local: count = %d, vm_page_local_count = %d\n", count, lq->vpl_count); 3551 3552 /* 3553 * Transfer the entire local queue to a regular LRU page queues. 3554 */ 3555 first_local = (vm_page_t) queue_first(&lq->vpl_queue); 3556 last_local = (vm_page_t) queue_last(&lq->vpl_queue); 3557 first_active = (vm_page_t) queue_first(&vm_page_queue_active); 3558 3559 if (queue_empty(&vm_page_queue_active)) { 3560 queue_last(&vm_page_queue_active) = (queue_entry_t) last_local; 3561 } else { 3562 queue_prev(&first_active->pageq) = (queue_entry_t) last_local; 3563 } 3564 queue_first(&vm_page_queue_active) = (queue_entry_t) first_local; 3565 queue_prev(&first_local->pageq) = (queue_entry_t) &vm_page_queue_active; 3566 queue_next(&last_local->pageq) = (queue_entry_t) first_active; 3567 3568 queue_init(&lq->vpl_queue); 3569 /* 3570 * Adjust the global page counts. 3571 */ 3572 vm_page_active_count += lq->vpl_count; 3573 vm_page_pageable_internal_count += lq->vpl_internal_count; 3574 vm_page_pageable_external_count += lq->vpl_external_count; 3575 lq->vpl_count = 0; 3576 lq->vpl_internal_count = 0; 3577 lq->vpl_external_count = 0; 3578 } 3579 assert(queue_empty(&lq->vpl_queue)); 3580 3581 if (nolocks == FALSE) { 3582 VPL_UNLOCK(&lq->vpl_lock); 3583 vm_page_unlock_queues(); 3584 } 3585} 3586 3587/* 3588 * vm_page_part_zero_fill: 3589 * 3590 * Zero-fill a part of the page. 3591 */ 3592#define PMAP_ZERO_PART_PAGE_IMPLEMENTED 3593void 3594vm_page_part_zero_fill( 3595 vm_page_t m, 3596 vm_offset_t m_pa, 3597 vm_size_t len) 3598{ 3599 3600#if 0 3601 /* 3602 * we don't hold the page queue lock 3603 * so this check isn't safe to make 3604 */ 3605 VM_PAGE_CHECK(m); 3606#endif 3607 3608#ifdef PMAP_ZERO_PART_PAGE_IMPLEMENTED 3609 pmap_zero_part_page(m->phys_page, m_pa, len); 3610#else 3611 vm_page_t tmp; 3612 while (1) { 3613 tmp = vm_page_grab(); 3614 if (tmp == VM_PAGE_NULL) { 3615 vm_page_wait(THREAD_UNINT); 3616 continue; 3617 } 3618 break; 3619 } 3620 vm_page_zero_fill(tmp); 3621 if(m_pa != 0) { 3622 vm_page_part_copy(m, 0, tmp, 0, m_pa); 3623 } 3624 if((m_pa + len) < PAGE_SIZE) { 3625 vm_page_part_copy(m, m_pa + len, tmp, 3626 m_pa + len, PAGE_SIZE - (m_pa + len)); 3627 } 3628 vm_page_copy(tmp,m); 3629 VM_PAGE_FREE(tmp); 3630#endif 3631 3632} 3633 3634/* 3635 * vm_page_zero_fill: 3636 * 3637 * Zero-fill the specified page. 3638 */ 3639void 3640vm_page_zero_fill( 3641 vm_page_t m) 3642{ 3643 XPR(XPR_VM_PAGE, 3644 "vm_page_zero_fill, object 0x%X offset 0x%X page 0x%X\n", 3645 m->object, m->offset, m, 0,0); 3646#if 0 3647 /* 3648 * we don't hold the page queue lock 3649 * so this check isn't safe to make 3650 */ 3651 VM_PAGE_CHECK(m); 3652#endif 3653 3654// dbgTrace(0xAEAEAEAE, m->phys_page, 0); /* (BRINGUP) */ 3655 pmap_zero_page(m->phys_page); 3656} 3657 3658/* 3659 * vm_page_part_copy: 3660 * 3661 * copy part of one page to another 3662 */ 3663 3664void 3665vm_page_part_copy( 3666 vm_page_t src_m, 3667 vm_offset_t src_pa, 3668 vm_page_t dst_m, 3669 vm_offset_t dst_pa, 3670 vm_size_t len) 3671{ 3672#if 0 3673 /* 3674 * we don't hold the page queue lock 3675 * so this check isn't safe to make 3676 */ 3677 VM_PAGE_CHECK(src_m); 3678 VM_PAGE_CHECK(dst_m); 3679#endif 3680 pmap_copy_part_page(src_m->phys_page, src_pa, 3681 dst_m->phys_page, dst_pa, len); 3682} 3683 3684/* 3685 * vm_page_copy: 3686 * 3687 * Copy one page to another 3688 * 3689 * ENCRYPTED SWAP: 3690 * The source page should not be encrypted. The caller should 3691 * make sure the page is decrypted first, if necessary. 3692 */ 3693 3694int vm_page_copy_cs_validations = 0; 3695int vm_page_copy_cs_tainted = 0; 3696 3697void 3698vm_page_copy( 3699 vm_page_t src_m, 3700 vm_page_t dest_m) 3701{ 3702 XPR(XPR_VM_PAGE, 3703 "vm_page_copy, object 0x%X offset 0x%X to object 0x%X offset 0x%X\n", 3704 src_m->object, src_m->offset, 3705 dest_m->object, dest_m->offset, 3706 0); 3707#if 0 3708 /* 3709 * we don't hold the page queue lock 3710 * so this check isn't safe to make 3711 */ 3712 VM_PAGE_CHECK(src_m); 3713 VM_PAGE_CHECK(dest_m); 3714#endif 3715 vm_object_lock_assert_held(src_m->object); 3716 3717 /* 3718 * ENCRYPTED SWAP: 3719 * The source page should not be encrypted at this point. 3720 * The destination page will therefore not contain encrypted 3721 * data after the copy. 3722 */ 3723 if (src_m->encrypted) { 3724 panic("vm_page_copy: source page %p is encrypted\n", src_m); 3725 } 3726 dest_m->encrypted = FALSE; 3727 3728 if (src_m->object != VM_OBJECT_NULL && 3729 src_m->object->code_signed) { 3730 /* 3731 * We're copying a page from a code-signed object. 3732 * Whoever ends up mapping the copy page might care about 3733 * the original page's integrity, so let's validate the 3734 * source page now. 3735 */ 3736 vm_page_copy_cs_validations++; 3737 vm_page_validate_cs(src_m); 3738 } 3739 3740 if (vm_page_is_slideable(src_m)) { 3741 boolean_t was_busy = src_m->busy; 3742 src_m->busy = TRUE; 3743 (void) vm_page_slide(src_m, 0); 3744 assert(src_m->busy); 3745 if (!was_busy) { 3746 PAGE_WAKEUP_DONE(src_m); 3747 } 3748 } 3749 3750 /* 3751 * Propagate the cs_tainted bit to the copy page. Do not propagate 3752 * the cs_validated bit. 3753 */ 3754 dest_m->cs_tainted = src_m->cs_tainted; 3755 if (dest_m->cs_tainted) { 3756 vm_page_copy_cs_tainted++; 3757 } 3758 dest_m->slid = src_m->slid; 3759 dest_m->error = src_m->error; /* sliding src_m might have failed... */ 3760 pmap_copy_page(src_m->phys_page, dest_m->phys_page); 3761} 3762 3763#if MACH_ASSERT 3764static void 3765_vm_page_print( 3766 vm_page_t p) 3767{ 3768 printf("vm_page %p: \n", p); 3769 printf(" pageq: next=%p prev=%p\n", p->pageq.next, p->pageq.prev); 3770 printf(" listq: next=%p prev=%p\n", p->listq.next, p->listq.prev); 3771 printf(" next=%p\n", VM_PAGE_UNPACK_PTR(p->next_m)); 3772 printf(" object=%p offset=0x%llx\n", p->object, p->offset); 3773 printf(" wire_count=%u\n", p->wire_count); 3774 3775 printf(" %slocal, %sinactive, %sactive, %spageout_queue, %sspeculative, %slaundry\n", 3776 (p->local ? "" : "!"), 3777 (p->inactive ? "" : "!"), 3778 (p->active ? "" : "!"), 3779 (p->pageout_queue ? "" : "!"), 3780 (p->speculative ? "" : "!"), 3781 (p->laundry ? "" : "!")); 3782 printf(" %sfree, %sref, %sgobbled, %sprivate, %sthrottled\n", 3783 (p->free ? "" : "!"), 3784 (p->reference ? "" : "!"), 3785 (p->gobbled ? "" : "!"), 3786 (p->private ? "" : "!"), 3787 (p->throttled ? "" : "!")); 3788 printf(" %sbusy, %swanted, %stabled, %sfictitious, %spmapped, %swpmapped\n", 3789 (p->busy ? "" : "!"), 3790 (p->wanted ? "" : "!"), 3791 (p->tabled ? "" : "!"), 3792 (p->fictitious ? "" : "!"), 3793 (p->pmapped ? "" : "!"), 3794 (p->wpmapped ? "" : "!")); 3795 printf(" %spageout, %sabsent, %serror, %sdirty, %scleaning, %sprecious, %sclustered\n", 3796 (p->pageout ? "" : "!"), 3797 (p->absent ? "" : "!"), 3798 (p->error ? "" : "!"), 3799 (p->dirty ? "" : "!"), 3800 (p->cleaning ? "" : "!"), 3801 (p->precious ? "" : "!"), 3802 (p->clustered ? "" : "!")); 3803 printf(" %soverwriting, %srestart, %sunusual, %sencrypted, %sencrypted_cleaning\n", 3804 (p->overwriting ? "" : "!"), 3805 (p->restart ? "" : "!"), 3806 (p->unusual ? "" : "!"), 3807 (p->encrypted ? "" : "!"), 3808 (p->encrypted_cleaning ? "" : "!")); 3809 printf(" %scs_validated, %scs_tainted, %sno_cache\n", 3810 (p->cs_validated ? "" : "!"), 3811 (p->cs_tainted ? "" : "!"), 3812 (p->no_cache ? "" : "!")); 3813 3814 printf("phys_page=0x%x\n", p->phys_page); 3815} 3816 3817/* 3818 * Check that the list of pages is ordered by 3819 * ascending physical address and has no holes. 3820 */ 3821static int 3822vm_page_verify_contiguous( 3823 vm_page_t pages, 3824 unsigned int npages) 3825{ 3826 register vm_page_t m; 3827 unsigned int page_count; 3828 vm_offset_t prev_addr; 3829 3830 prev_addr = pages->phys_page; 3831 page_count = 1; 3832 for (m = NEXT_PAGE(pages); m != VM_PAGE_NULL; m = NEXT_PAGE(m)) { 3833 if (m->phys_page != prev_addr + 1) { 3834 printf("m %p prev_addr 0x%lx, current addr 0x%x\n", 3835 m, (long)prev_addr, m->phys_page); 3836 printf("pages %p page_count %d npages %d\n", pages, page_count, npages); 3837 panic("vm_page_verify_contiguous: not contiguous!"); 3838 } 3839 prev_addr = m->phys_page; 3840 ++page_count; 3841 } 3842 if (page_count != npages) { 3843 printf("pages %p actual count 0x%x but requested 0x%x\n", 3844 pages, page_count, npages); 3845 panic("vm_page_verify_contiguous: count error"); 3846 } 3847 return 1; 3848} 3849 3850 3851/* 3852 * Check the free lists for proper length etc. 3853 */ 3854static boolean_t vm_page_verify_this_free_list_enabled = FALSE; 3855static unsigned int 3856vm_page_verify_free_list( 3857 queue_head_t *vm_page_queue, 3858 unsigned int color, 3859 vm_page_t look_for_page, 3860 boolean_t expect_page) 3861{ 3862 unsigned int npages; 3863 vm_page_t m; 3864 vm_page_t prev_m; 3865 boolean_t found_page; 3866 3867 if (! vm_page_verify_this_free_list_enabled) 3868 return 0; 3869 3870 found_page = FALSE; 3871 npages = 0; 3872 prev_m = (vm_page_t) vm_page_queue; 3873 queue_iterate(vm_page_queue, 3874 m, 3875 vm_page_t, 3876 pageq) { 3877 3878 if (m == look_for_page) { 3879 found_page = TRUE; 3880 } 3881 if ((vm_page_t) m->pageq.prev != prev_m) 3882 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p corrupted prev ptr %p instead of %p\n", 3883 color, npages, m, m->pageq.prev, prev_m); 3884 if ( ! m->busy ) 3885 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not busy\n", 3886 color, npages, m); 3887 if (color != (unsigned int) -1) { 3888 if ((m->phys_page & vm_color_mask) != color) 3889 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p wrong color %u instead of %u\n", 3890 color, npages, m, m->phys_page & vm_color_mask, color); 3891 if ( ! m->free ) 3892 panic("vm_page_verify_free_list(color=%u, npages=%u): page %p not free\n", 3893 color, npages, m); 3894 } 3895 ++npages; 3896 prev_m = m; 3897 } 3898 if (look_for_page != VM_PAGE_NULL) { 3899 unsigned int other_color; 3900 3901 if (expect_page && !found_page) { 3902 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p not found phys=%u\n", 3903 color, npages, look_for_page, look_for_page->phys_page); 3904 _vm_page_print(look_for_page); 3905 for (other_color = 0; 3906 other_color < vm_colors; 3907 other_color++) { 3908 if (other_color == color) 3909 continue; 3910 vm_page_verify_free_list(&vm_page_queue_free[other_color], 3911 other_color, look_for_page, FALSE); 3912 } 3913 if (color == (unsigned int) -1) { 3914 vm_page_verify_free_list(&vm_lopage_queue_free, 3915 (unsigned int) -1, look_for_page, FALSE); 3916 } 3917 panic("vm_page_verify_free_list(color=%u)\n", color); 3918 } 3919 if (!expect_page && found_page) { 3920 printf("vm_page_verify_free_list(color=%u, npages=%u): page %p found phys=%u\n", 3921 color, npages, look_for_page, look_for_page->phys_page); 3922 } 3923 } 3924 return npages; 3925} 3926 3927static boolean_t vm_page_verify_all_free_lists_enabled = FALSE; 3928static void 3929vm_page_verify_free_lists( void ) 3930{ 3931 unsigned int color, npages, nlopages; 3932 boolean_t toggle = TRUE; 3933 3934 if (! vm_page_verify_all_free_lists_enabled) 3935 return; 3936 3937 npages = 0; 3938 3939 lck_mtx_lock(&vm_page_queue_free_lock); 3940 3941 if (vm_page_verify_this_free_list_enabled == TRUE) { 3942 /* 3943 * This variable has been set globally for extra checking of 3944 * each free list Q. Since we didn't set it, we don't own it 3945 * and we shouldn't toggle it. 3946 */ 3947 toggle = FALSE; 3948 } 3949 3950 if (toggle == TRUE) { 3951 vm_page_verify_this_free_list_enabled = TRUE; 3952 } 3953 3954 for( color = 0; color < vm_colors; color++ ) { 3955 npages += vm_page_verify_free_list(&vm_page_queue_free[color], 3956 color, VM_PAGE_NULL, FALSE); 3957 } 3958 nlopages = vm_page_verify_free_list(&vm_lopage_queue_free, 3959 (unsigned int) -1, 3960 VM_PAGE_NULL, FALSE); 3961 if (npages != vm_page_free_count || nlopages != vm_lopage_free_count) 3962 panic("vm_page_verify_free_lists: " 3963 "npages %u free_count %d nlopages %u lo_free_count %u", 3964 npages, vm_page_free_count, nlopages, vm_lopage_free_count); 3965 3966 if (toggle == TRUE) { 3967 vm_page_verify_this_free_list_enabled = FALSE; 3968 } 3969 3970 lck_mtx_unlock(&vm_page_queue_free_lock); 3971} 3972 3973void 3974vm_page_queues_assert( 3975 vm_page_t mem, 3976 int val) 3977{ 3978#if DEBUG 3979 lck_mtx_assert(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED); 3980#endif 3981 if (mem->free + mem->active + mem->inactive + mem->speculative + 3982 mem->throttled + mem->pageout_queue > (val)) { 3983 _vm_page_print(mem); 3984 panic("vm_page_queues_assert(%p, %d)\n", mem, val); 3985 } 3986 if (VM_PAGE_WIRED(mem)) { 3987 assert(!mem->active); 3988 assert(!mem->inactive); 3989 assert(!mem->speculative); 3990 assert(!mem->throttled); 3991 assert(!mem->pageout_queue); 3992 } 3993} 3994#endif /* MACH_ASSERT */ 3995 3996 3997/* 3998 * CONTIGUOUS PAGE ALLOCATION 3999 * 4000 * Find a region large enough to contain at least n pages 4001 * of contiguous physical memory. 4002 * 4003 * This is done by traversing the vm_page_t array in a linear fashion 4004 * we assume that the vm_page_t array has the avaiable physical pages in an 4005 * ordered, ascending list... this is currently true of all our implementations 4006 * and must remain so... there can be 'holes' in the array... we also can 4007 * no longer tolerate the vm_page_t's in the list being 'freed' and reclaimed 4008 * which use to happen via 'vm_page_convert'... that function was no longer 4009 * being called and was removed... 4010 * 4011 * The basic flow consists of stabilizing some of the interesting state of 4012 * a vm_page_t behind the vm_page_queue and vm_page_free locks... we start our 4013 * sweep at the beginning of the array looking for pages that meet our criterea 4014 * for a 'stealable' page... currently we are pretty conservative... if the page 4015 * meets this criterea and is physically contiguous to the previous page in the 'run' 4016 * we keep developing it. If we hit a page that doesn't fit, we reset our state 4017 * and start to develop a new run... if at this point we've already considered 4018 * at least MAX_CONSIDERED_BEFORE_YIELD pages, we'll drop the 2 locks we hold, 4019 * and mutex_pause (which will yield the processor), to keep the latency low w/r 4020 * to other threads trying to acquire free pages (or move pages from q to q), 4021 * and then continue from the spot we left off... we only make 1 pass through the 4022 * array. Once we have a 'run' that is long enough, we'll go into the loop which 4023 * which steals the pages from the queues they're currently on... pages on the free 4024 * queue can be stolen directly... pages that are on any of the other queues 4025 * must be removed from the object they are tabled on... this requires taking the 4026 * object lock... we do this as a 'try' to prevent deadlocks... if the 'try' fails 4027 * or if the state of the page behind the vm_object lock is no longer viable, we'll 4028 * dump the pages we've currently stolen back to the free list, and pick up our 4029 * scan from the point where we aborted the 'current' run. 4030 * 4031 * 4032 * Requirements: 4033 * - neither vm_page_queue nor vm_free_list lock can be held on entry 4034 * 4035 * Returns a pointer to a list of gobbled/wired pages or VM_PAGE_NULL. 4036 * 4037 * Algorithm: 4038 */ 4039 4040#define MAX_CONSIDERED_BEFORE_YIELD 1000 4041 4042 4043#define RESET_STATE_OF_RUN() \ 4044 MACRO_BEGIN \ 4045 prevcontaddr = -2; \ 4046 start_pnum = -1; \ 4047 free_considered = 0; \ 4048 substitute_needed = 0; \ 4049 npages = 0; \ 4050 MACRO_END 4051 4052/* 4053 * Can we steal in-use (i.e. not free) pages when searching for 4054 * physically-contiguous pages ? 4055 */ 4056#define VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 1 4057 4058static unsigned int vm_page_find_contiguous_last_idx = 0, vm_page_lomem_find_contiguous_last_idx = 0; 4059#if DEBUG 4060int vm_page_find_contig_debug = 0; 4061#endif 4062 4063static vm_page_t 4064vm_page_find_contiguous( 4065 unsigned int contig_pages, 4066 ppnum_t max_pnum, 4067 ppnum_t pnum_mask, 4068 boolean_t wire, 4069 int flags) 4070{ 4071 vm_page_t m = NULL; 4072 ppnum_t prevcontaddr; 4073 ppnum_t start_pnum; 4074 unsigned int npages, considered, scanned; 4075 unsigned int page_idx, start_idx, last_idx, orig_last_idx; 4076 unsigned int idx_last_contig_page_found = 0; 4077 int free_considered, free_available; 4078 int substitute_needed; 4079 boolean_t wrapped; 4080#if DEBUG 4081 clock_sec_t tv_start_sec, tv_end_sec; 4082 clock_usec_t tv_start_usec, tv_end_usec; 4083#endif 4084#if MACH_ASSERT 4085 int yielded = 0; 4086 int dumped_run = 0; 4087 int stolen_pages = 0; 4088 int compressed_pages = 0; 4089#endif 4090 4091 if (contig_pages == 0) 4092 return VM_PAGE_NULL; 4093 4094#if MACH_ASSERT 4095 vm_page_verify_free_lists(); 4096#endif 4097#if DEBUG 4098 clock_get_system_microtime(&tv_start_sec, &tv_start_usec); 4099#endif 4100 PAGE_REPLACEMENT_ALLOWED(TRUE); 4101 4102 vm_page_lock_queues(); 4103 lck_mtx_lock(&vm_page_queue_free_lock); 4104 4105 RESET_STATE_OF_RUN(); 4106 4107 scanned = 0; 4108 considered = 0; 4109 free_available = vm_page_free_count - vm_page_free_reserved; 4110 4111 wrapped = FALSE; 4112 4113 if(flags & KMA_LOMEM) 4114 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx; 4115 else 4116 idx_last_contig_page_found = vm_page_find_contiguous_last_idx; 4117 4118 orig_last_idx = idx_last_contig_page_found; 4119 last_idx = orig_last_idx; 4120 4121 for (page_idx = last_idx, start_idx = last_idx; 4122 npages < contig_pages && page_idx < vm_pages_count; 4123 page_idx++) { 4124retry: 4125 if (wrapped && 4126 npages == 0 && 4127 page_idx >= orig_last_idx) { 4128 /* 4129 * We're back where we started and we haven't 4130 * found any suitable contiguous range. Let's 4131 * give up. 4132 */ 4133 break; 4134 } 4135 scanned++; 4136 m = &vm_pages[page_idx]; 4137 4138 assert(!m->fictitious); 4139 assert(!m->private); 4140 4141 if (max_pnum && m->phys_page > max_pnum) { 4142 /* no more low pages... */ 4143 break; 4144 } 4145 if (!npages & ((m->phys_page & pnum_mask) != 0)) { 4146 /* 4147 * not aligned 4148 */ 4149 RESET_STATE_OF_RUN(); 4150 4151 } else if (VM_PAGE_WIRED(m) || m->gobbled || 4152 m->encrypted_cleaning || 4153 m->pageout_queue || m->laundry || m->wanted || 4154 m->cleaning || m->overwriting || m->pageout) { 4155 /* 4156 * page is in a transient state 4157 * or a state we don't want to deal 4158 * with, so don't consider it which 4159 * means starting a new run 4160 */ 4161 RESET_STATE_OF_RUN(); 4162 4163 } else if (!m->free && !m->active && !m->inactive && !m->speculative && !m->throttled && !m->compressor) { 4164 /* 4165 * page needs to be on one of our queues 4166 * or it needs to belong to the compressor pool 4167 * in order for it to be stable behind the 4168 * locks we hold at this point... 4169 * if not, don't consider it which 4170 * means starting a new run 4171 */ 4172 RESET_STATE_OF_RUN(); 4173 4174 } else if (!m->free && (!m->tabled || m->busy)) { 4175 /* 4176 * pages on the free list are always 'busy' 4177 * so we couldn't test for 'busy' in the check 4178 * for the transient states... pages that are 4179 * 'free' are never 'tabled', so we also couldn't 4180 * test for 'tabled'. So we check here to make 4181 * sure that a non-free page is not busy and is 4182 * tabled on an object... 4183 * if not, don't consider it which 4184 * means starting a new run 4185 */ 4186 RESET_STATE_OF_RUN(); 4187 4188 } else { 4189 if (m->phys_page != prevcontaddr + 1) { 4190 if ((m->phys_page & pnum_mask) != 0) { 4191 RESET_STATE_OF_RUN(); 4192 goto did_consider; 4193 } else { 4194 npages = 1; 4195 start_idx = page_idx; 4196 start_pnum = m->phys_page; 4197 } 4198 } else { 4199 npages++; 4200 } 4201 prevcontaddr = m->phys_page; 4202 4203 VM_PAGE_CHECK(m); 4204 if (m->free) { 4205 free_considered++; 4206 } else { 4207 /* 4208 * This page is not free. 4209 * If we can't steal used pages, 4210 * we have to give up this run 4211 * and keep looking. 4212 * Otherwise, we might need to 4213 * move the contents of this page 4214 * into a substitute page. 4215 */ 4216#if VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 4217 if (m->pmapped || m->dirty || m->precious) { 4218 substitute_needed++; 4219 } 4220#else 4221 RESET_STATE_OF_RUN(); 4222#endif 4223 } 4224 4225 if ((free_considered + substitute_needed) > free_available) { 4226 /* 4227 * if we let this run continue 4228 * we will end up dropping the vm_page_free_count 4229 * below the reserve limit... we need to abort 4230 * this run, but we can at least re-consider this 4231 * page... thus the jump back to 'retry' 4232 */ 4233 RESET_STATE_OF_RUN(); 4234 4235 if (free_available && considered <= MAX_CONSIDERED_BEFORE_YIELD) { 4236 considered++; 4237 goto retry; 4238 } 4239 /* 4240 * free_available == 0 4241 * so can't consider any free pages... if 4242 * we went to retry in this case, we'd 4243 * get stuck looking at the same page 4244 * w/o making any forward progress 4245 * we also want to take this path if we've already 4246 * reached our limit that controls the lock latency 4247 */ 4248 } 4249 } 4250did_consider: 4251 if (considered > MAX_CONSIDERED_BEFORE_YIELD && npages <= 1) { 4252 4253 PAGE_REPLACEMENT_ALLOWED(FALSE); 4254 4255 lck_mtx_unlock(&vm_page_queue_free_lock); 4256 vm_page_unlock_queues(); 4257 4258 mutex_pause(0); 4259 4260 PAGE_REPLACEMENT_ALLOWED(TRUE); 4261 4262 vm_page_lock_queues(); 4263 lck_mtx_lock(&vm_page_queue_free_lock); 4264 4265 RESET_STATE_OF_RUN(); 4266 /* 4267 * reset our free page limit since we 4268 * dropped the lock protecting the vm_page_free_queue 4269 */ 4270 free_available = vm_page_free_count - vm_page_free_reserved; 4271 considered = 0; 4272#if MACH_ASSERT 4273 yielded++; 4274#endif 4275 goto retry; 4276 } 4277 considered++; 4278 } 4279 m = VM_PAGE_NULL; 4280 4281 if (npages != contig_pages) { 4282 if (!wrapped) { 4283 /* 4284 * We didn't find a contiguous range but we didn't 4285 * start from the very first page. 4286 * Start again from the very first page. 4287 */ 4288 RESET_STATE_OF_RUN(); 4289 if( flags & KMA_LOMEM) 4290 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = 0; 4291 else 4292 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = 0; 4293 last_idx = 0; 4294 page_idx = last_idx; 4295 wrapped = TRUE; 4296 goto retry; 4297 } 4298 lck_mtx_unlock(&vm_page_queue_free_lock); 4299 } else { 4300 vm_page_t m1; 4301 vm_page_t m2; 4302 unsigned int cur_idx; 4303 unsigned int tmp_start_idx; 4304 vm_object_t locked_object = VM_OBJECT_NULL; 4305 boolean_t abort_run = FALSE; 4306 4307 assert(page_idx - start_idx == contig_pages); 4308 4309 tmp_start_idx = start_idx; 4310 4311 /* 4312 * first pass through to pull the free pages 4313 * off of the free queue so that in case we 4314 * need substitute pages, we won't grab any 4315 * of the free pages in the run... we'll clear 4316 * the 'free' bit in the 2nd pass, and even in 4317 * an abort_run case, we'll collect all of the 4318 * free pages in this run and return them to the free list 4319 */ 4320 while (start_idx < page_idx) { 4321 4322 m1 = &vm_pages[start_idx++]; 4323 4324#if !VM_PAGE_FIND_CONTIGUOUS_CAN_STEAL 4325 assert(m1->free); 4326#endif 4327 4328 if (m1->free) { 4329 unsigned int color; 4330 4331 color = m1->phys_page & vm_color_mask; 4332#if MACH_ASSERT 4333 vm_page_verify_free_list(&vm_page_queue_free[color], color, m1, TRUE); 4334#endif 4335 queue_remove(&vm_page_queue_free[color], 4336 m1, 4337 vm_page_t, 4338 pageq); 4339 m1->pageq.next = NULL; 4340 m1->pageq.prev = NULL; 4341#if MACH_ASSERT 4342 vm_page_verify_free_list(&vm_page_queue_free[color], color, VM_PAGE_NULL, FALSE); 4343#endif 4344 /* 4345 * Clear the "free" bit so that this page 4346 * does not get considered for another 4347 * concurrent physically-contiguous allocation. 4348 */ 4349 m1->free = FALSE; 4350 assert(m1->busy); 4351 4352 vm_page_free_count--; 4353 } 4354 } 4355 if( flags & KMA_LOMEM) 4356 vm_page_lomem_find_contiguous_last_idx = page_idx; 4357 else 4358 vm_page_find_contiguous_last_idx = page_idx; 4359 4360 /* 4361 * we can drop the free queue lock at this point since 4362 * we've pulled any 'free' candidates off of the list 4363 * we need it dropped so that we can do a vm_page_grab 4364 * when substituing for pmapped/dirty pages 4365 */ 4366 lck_mtx_unlock(&vm_page_queue_free_lock); 4367 4368 start_idx = tmp_start_idx; 4369 cur_idx = page_idx - 1; 4370 4371 while (start_idx++ < page_idx) { 4372 /* 4373 * must go through the list from back to front 4374 * so that the page list is created in the 4375 * correct order - low -> high phys addresses 4376 */ 4377 m1 = &vm_pages[cur_idx--]; 4378 4379 assert(!m1->free); 4380 4381 if (m1->object == VM_OBJECT_NULL) { 4382 /* 4383 * page has already been removed from 4384 * the free list in the 1st pass 4385 */ 4386 assert(m1->offset == (vm_object_offset_t) -1); 4387 assert(m1->busy); 4388 assert(!m1->wanted); 4389 assert(!m1->laundry); 4390 } else { 4391 vm_object_t object; 4392 int refmod; 4393 boolean_t disconnected, reusable; 4394 4395 if (abort_run == TRUE) 4396 continue; 4397 4398 object = m1->object; 4399 4400 if (object != locked_object) { 4401 if (locked_object) { 4402 vm_object_unlock(locked_object); 4403 locked_object = VM_OBJECT_NULL; 4404 } 4405 if (vm_object_lock_try(object)) 4406 locked_object = object; 4407 } 4408 if (locked_object == VM_OBJECT_NULL || 4409 (VM_PAGE_WIRED(m1) || m1->gobbled || 4410 m1->encrypted_cleaning || 4411 m1->pageout_queue || m1->laundry || m1->wanted || 4412 m1->cleaning || m1->overwriting || m1->pageout || m1->busy)) { 4413 4414 if (locked_object) { 4415 vm_object_unlock(locked_object); 4416 locked_object = VM_OBJECT_NULL; 4417 } 4418 tmp_start_idx = cur_idx; 4419 abort_run = TRUE; 4420 continue; 4421 } 4422 4423 disconnected = FALSE; 4424 reusable = FALSE; 4425 4426 if ((m1->reusable || 4427 m1->object->all_reusable) && 4428 m1->inactive && 4429 !m1->dirty && 4430 !m1->reference) { 4431 /* reusable page... */ 4432 refmod = pmap_disconnect(m1->phys_page); 4433 disconnected = TRUE; 4434 if (refmod == 0) { 4435 /* 4436 * ... not reused: can steal 4437 * without relocating contents. 4438 */ 4439 reusable = TRUE; 4440 } 4441 } 4442 4443 if ((m1->pmapped && 4444 ! reusable) || 4445 m1->dirty || 4446 m1->precious) { 4447 vm_object_offset_t offset; 4448 4449 m2 = vm_page_grab(); 4450 4451 if (m2 == VM_PAGE_NULL) { 4452 if (locked_object) { 4453 vm_object_unlock(locked_object); 4454 locked_object = VM_OBJECT_NULL; 4455 } 4456 tmp_start_idx = cur_idx; 4457 abort_run = TRUE; 4458 continue; 4459 } 4460 if (! disconnected) { 4461 if (m1->pmapped) 4462 refmod = pmap_disconnect(m1->phys_page); 4463 else 4464 refmod = 0; 4465 } 4466 4467 /* copy the page's contents */ 4468 pmap_copy_page(m1->phys_page, m2->phys_page); 4469 /* copy the page's state */ 4470 assert(!VM_PAGE_WIRED(m1)); 4471 assert(!m1->free); 4472 assert(!m1->pageout_queue); 4473 assert(!m1->laundry); 4474 m2->reference = m1->reference; 4475 assert(!m1->gobbled); 4476 assert(!m1->private); 4477 m2->no_cache = m1->no_cache; 4478 m2->xpmapped = 0; 4479 assert(!m1->busy); 4480 assert(!m1->wanted); 4481 assert(!m1->fictitious); 4482 m2->pmapped = m1->pmapped; /* should flush cache ? */ 4483 m2->wpmapped = m1->wpmapped; 4484 assert(!m1->pageout); 4485 m2->absent = m1->absent; 4486 m2->error = m1->error; 4487 m2->dirty = m1->dirty; 4488 assert(!m1->cleaning); 4489 m2->precious = m1->precious; 4490 m2->clustered = m1->clustered; 4491 assert(!m1->overwriting); 4492 m2->restart = m1->restart; 4493 m2->unusual = m1->unusual; 4494 m2->encrypted = m1->encrypted; 4495 assert(!m1->encrypted_cleaning); 4496 m2->cs_validated = m1->cs_validated; 4497 m2->cs_tainted = m1->cs_tainted; 4498 4499 /* 4500 * If m1 had really been reusable, 4501 * we would have just stolen it, so 4502 * let's not propagate it's "reusable" 4503 * bit and assert that m2 is not 4504 * marked as "reusable". 4505 */ 4506 // m2->reusable = m1->reusable; 4507 assert(!m2->reusable); 4508 4509 assert(!m1->lopage); 4510 m2->slid = m1->slid; 4511 m2->compressor = m1->compressor; 4512 4513 /* 4514 * page may need to be flushed if 4515 * it is marshalled into a UPL 4516 * that is going to be used by a device 4517 * that doesn't support coherency 4518 */ 4519 m2->written_by_kernel = TRUE; 4520 4521 /* 4522 * make sure we clear the ref/mod state 4523 * from the pmap layer... else we risk 4524 * inheriting state from the last time 4525 * this page was used... 4526 */ 4527 pmap_clear_refmod(m2->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); 4528 4529 if (refmod & VM_MEM_REFERENCED) 4530 m2->reference = TRUE; 4531 if (refmod & VM_MEM_MODIFIED) { 4532 SET_PAGE_DIRTY(m2, TRUE); 4533 } 4534 offset = m1->offset; 4535 4536 /* 4537 * completely cleans up the state 4538 * of the page so that it is ready 4539 * to be put onto the free list, or 4540 * for this purpose it looks like it 4541 * just came off of the free list 4542 */ 4543 vm_page_free_prepare(m1); 4544 4545 /* 4546 * now put the substitute page 4547 * on the object 4548 */ 4549 vm_page_insert_internal(m2, locked_object, offset, TRUE, TRUE, FALSE); 4550 4551 if (m2->compressor) { 4552 m2->pmapped = TRUE; 4553 m2->wpmapped = TRUE; 4554 4555 PMAP_ENTER(kernel_pmap, m2->offset, m2, 4556 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE); 4557#if MACH_ASSERT 4558 compressed_pages++; 4559#endif 4560 } else { 4561 if (m2->reference) 4562 vm_page_activate(m2); 4563 else 4564 vm_page_deactivate(m2); 4565 } 4566 PAGE_WAKEUP_DONE(m2); 4567 4568 } else { 4569 assert(!m1->compressor); 4570 4571 /* 4572 * completely cleans up the state 4573 * of the page so that it is ready 4574 * to be put onto the free list, or 4575 * for this purpose it looks like it 4576 * just came off of the free list 4577 */ 4578 vm_page_free_prepare(m1); 4579 } 4580#if MACH_ASSERT 4581 stolen_pages++; 4582#endif 4583 } 4584 m1->pageq.next = (queue_entry_t) m; 4585 m1->pageq.prev = NULL; 4586 m = m1; 4587 } 4588 if (locked_object) { 4589 vm_object_unlock(locked_object); 4590 locked_object = VM_OBJECT_NULL; 4591 } 4592 4593 if (abort_run == TRUE) { 4594 if (m != VM_PAGE_NULL) { 4595 vm_page_free_list(m, FALSE); 4596 } 4597#if MACH_ASSERT 4598 dumped_run++; 4599#endif 4600 /* 4601 * want the index of the last 4602 * page in this run that was 4603 * successfully 'stolen', so back 4604 * it up 1 for the auto-decrement on use 4605 * and 1 more to bump back over this page 4606 */ 4607 page_idx = tmp_start_idx + 2; 4608 if (page_idx >= vm_pages_count) { 4609 if (wrapped) 4610 goto done_scanning; 4611 page_idx = last_idx = 0; 4612 wrapped = TRUE; 4613 } 4614 abort_run = FALSE; 4615 4616 /* 4617 * We didn't find a contiguous range but we didn't 4618 * start from the very first page. 4619 * Start again from the very first page. 4620 */ 4621 RESET_STATE_OF_RUN(); 4622 4623 if( flags & KMA_LOMEM) 4624 idx_last_contig_page_found = vm_page_lomem_find_contiguous_last_idx = page_idx; 4625 else 4626 idx_last_contig_page_found = vm_page_find_contiguous_last_idx = page_idx; 4627 4628 last_idx = page_idx; 4629 4630 lck_mtx_lock(&vm_page_queue_free_lock); 4631 /* 4632 * reset our free page limit since we 4633 * dropped the lock protecting the vm_page_free_queue 4634 */ 4635 free_available = vm_page_free_count - vm_page_free_reserved; 4636 goto retry; 4637 } 4638 4639 for (m1 = m; m1 != VM_PAGE_NULL; m1 = NEXT_PAGE(m1)) { 4640 4641 if (wire == TRUE) 4642 m1->wire_count++; 4643 else 4644 m1->gobbled = TRUE; 4645 } 4646 if (wire == FALSE) 4647 vm_page_gobble_count += npages; 4648 4649 /* 4650 * gobbled pages are also counted as wired pages 4651 */ 4652 vm_page_wire_count += npages; 4653 4654 assert(vm_page_verify_contiguous(m, npages)); 4655 } 4656done_scanning: 4657 PAGE_REPLACEMENT_ALLOWED(FALSE); 4658 4659 vm_page_unlock_queues(); 4660 4661#if DEBUG 4662 clock_get_system_microtime(&tv_end_sec, &tv_end_usec); 4663 4664 tv_end_sec -= tv_start_sec; 4665 if (tv_end_usec < tv_start_usec) { 4666 tv_end_sec--; 4667 tv_end_usec += 1000000; 4668 } 4669 tv_end_usec -= tv_start_usec; 4670 if (tv_end_usec >= 1000000) { 4671 tv_end_sec++; 4672 tv_end_sec -= 1000000; 4673 } 4674 if (vm_page_find_contig_debug) { 4675 printf("%s(num=%d,low=%d): found %d pages at 0x%llx in %ld.%06ds... started at %d... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages... stole %d compressed pages\n", 4676 __func__, contig_pages, max_pnum, npages, (vm_object_offset_t)start_pnum << PAGE_SHIFT, 4677 (long)tv_end_sec, tv_end_usec, orig_last_idx, 4678 scanned, yielded, dumped_run, stolen_pages, compressed_pages); 4679 } 4680 4681#endif 4682#if MACH_ASSERT 4683 vm_page_verify_free_lists(); 4684#endif 4685 return m; 4686} 4687 4688/* 4689 * Allocate a list of contiguous, wired pages. 4690 */ 4691kern_return_t 4692cpm_allocate( 4693 vm_size_t size, 4694 vm_page_t *list, 4695 ppnum_t max_pnum, 4696 ppnum_t pnum_mask, 4697 boolean_t wire, 4698 int flags) 4699{ 4700 vm_page_t pages; 4701 unsigned int npages; 4702 4703 if (size % PAGE_SIZE != 0) 4704 return KERN_INVALID_ARGUMENT; 4705 4706 npages = (unsigned int) (size / PAGE_SIZE); 4707 if (npages != size / PAGE_SIZE) { 4708 /* 32-bit overflow */ 4709 return KERN_INVALID_ARGUMENT; 4710 } 4711 4712 /* 4713 * Obtain a pointer to a subset of the free 4714 * list large enough to satisfy the request; 4715 * the region will be physically contiguous. 4716 */ 4717 pages = vm_page_find_contiguous(npages, max_pnum, pnum_mask, wire, flags); 4718 4719 if (pages == VM_PAGE_NULL) 4720 return KERN_NO_SPACE; 4721 /* 4722 * determine need for wakeups 4723 */ 4724 if ((vm_page_free_count < vm_page_free_min) || 4725 ((vm_page_free_count < vm_page_free_target) && 4726 ((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_min))) 4727 thread_wakeup((event_t) &vm_page_free_wanted); 4728 4729 VM_CHECK_MEMORYSTATUS; 4730 4731 /* 4732 * The CPM pages should now be available and 4733 * ordered by ascending physical address. 4734 */ 4735 assert(vm_page_verify_contiguous(pages, npages)); 4736 4737 *list = pages; 4738 return KERN_SUCCESS; 4739} 4740 4741 4742unsigned int vm_max_delayed_work_limit = DEFAULT_DELAYED_WORK_LIMIT; 4743 4744/* 4745 * when working on a 'run' of pages, it is necessary to hold 4746 * the vm_page_queue_lock (a hot global lock) for certain operations 4747 * on the page... however, the majority of the work can be done 4748 * while merely holding the object lock... in fact there are certain 4749 * collections of pages that don't require any work brokered by the 4750 * vm_page_queue_lock... to mitigate the time spent behind the global 4751 * lock, go to a 2 pass algorithm... collect pages up to DELAYED_WORK_LIMIT 4752 * while doing all of the work that doesn't require the vm_page_queue_lock... 4753 * then call vm_page_do_delayed_work to acquire the vm_page_queue_lock and do the 4754 * necessary work for each page... we will grab the busy bit on the page 4755 * if it's not already held so that vm_page_do_delayed_work can drop the object lock 4756 * if it can't immediately take the vm_page_queue_lock in order to compete 4757 * for the locks in the same order that vm_pageout_scan takes them. 4758 * the operation names are modeled after the names of the routines that 4759 * need to be called in order to make the changes very obvious in the 4760 * original loop 4761 */ 4762 4763void 4764vm_page_do_delayed_work( 4765 vm_object_t object, 4766 struct vm_page_delayed_work *dwp, 4767 int dw_count) 4768{ 4769 int j; 4770 vm_page_t m; 4771 vm_page_t local_free_q = VM_PAGE_NULL; 4772 4773 /* 4774 * pageout_scan takes the vm_page_lock_queues first 4775 * then tries for the object lock... to avoid what 4776 * is effectively a lock inversion, we'll go to the 4777 * trouble of taking them in that same order... otherwise 4778 * if this object contains the majority of the pages resident 4779 * in the UBC (or a small set of large objects actively being 4780 * worked on contain the majority of the pages), we could 4781 * cause the pageout_scan thread to 'starve' in its attempt 4782 * to find pages to move to the free queue, since it has to 4783 * successfully acquire the object lock of any candidate page 4784 * before it can steal/clean it. 4785 */ 4786 if (!vm_page_trylockspin_queues()) { 4787 vm_object_unlock(object); 4788 4789 vm_page_lockspin_queues(); 4790 4791 for (j = 0; ; j++) { 4792 if (!vm_object_lock_avoid(object) && 4793 _vm_object_lock_try(object)) 4794 break; 4795 vm_page_unlock_queues(); 4796 mutex_pause(j); 4797 vm_page_lockspin_queues(); 4798 } 4799 } 4800 for (j = 0; j < dw_count; j++, dwp++) { 4801 4802 m = dwp->dw_m; 4803 4804 if (dwp->dw_mask & DW_vm_pageout_throttle_up) 4805 vm_pageout_throttle_up(m); 4806#if CONFIG_PHANTOM_CACHE 4807 if (dwp->dw_mask & DW_vm_phantom_cache_update) 4808 vm_phantom_cache_update(m); 4809#endif 4810 if (dwp->dw_mask & DW_vm_page_wire) 4811 vm_page_wire(m); 4812 else if (dwp->dw_mask & DW_vm_page_unwire) { 4813 boolean_t queueit; 4814 4815 queueit = (dwp->dw_mask & (DW_vm_page_free | DW_vm_page_deactivate_internal)) ? FALSE : TRUE; 4816 4817 vm_page_unwire(m, queueit); 4818 } 4819 if (dwp->dw_mask & DW_vm_page_free) { 4820 vm_page_free_prepare_queues(m); 4821 4822 assert(m->pageq.next == NULL && m->pageq.prev == NULL); 4823 /* 4824 * Add this page to our list of reclaimed pages, 4825 * to be freed later. 4826 */ 4827 m->pageq.next = (queue_entry_t) local_free_q; 4828 local_free_q = m; 4829 } else { 4830 if (dwp->dw_mask & DW_vm_page_deactivate_internal) 4831 vm_page_deactivate_internal(m, FALSE); 4832 else if (dwp->dw_mask & DW_vm_page_activate) { 4833 if (m->active == FALSE) { 4834 vm_page_activate(m); 4835 } 4836 } 4837 else if (dwp->dw_mask & DW_vm_page_speculate) 4838 vm_page_speculate(m, TRUE); 4839 else if (dwp->dw_mask & DW_enqueue_cleaned) { 4840 /* 4841 * if we didn't hold the object lock and did this, 4842 * we might disconnect the page, then someone might 4843 * soft fault it back in, then we would put it on the 4844 * cleaned queue, and so we would have a referenced (maybe even dirty) 4845 * page on that queue, which we don't want 4846 */ 4847 int refmod_state = pmap_disconnect(m->phys_page); 4848 4849 if ((refmod_state & VM_MEM_REFERENCED)) { 4850 /* 4851 * this page has been touched since it got cleaned; let's activate it 4852 * if it hasn't already been 4853 */ 4854 vm_pageout_enqueued_cleaned++; 4855 vm_pageout_cleaned_reactivated++; 4856 vm_pageout_cleaned_commit_reactivated++; 4857 4858 if (m->active == FALSE) 4859 vm_page_activate(m); 4860 } else { 4861 m->reference = FALSE; 4862 vm_page_enqueue_cleaned(m); 4863 } 4864 } 4865 else if (dwp->dw_mask & DW_vm_page_lru) 4866 vm_page_lru(m); 4867 else if (dwp->dw_mask & DW_VM_PAGE_QUEUES_REMOVE) { 4868 if ( !m->pageout_queue) 4869 VM_PAGE_QUEUES_REMOVE(m); 4870 } 4871 if (dwp->dw_mask & DW_set_reference) 4872 m->reference = TRUE; 4873 else if (dwp->dw_mask & DW_clear_reference) 4874 m->reference = FALSE; 4875 4876 if (dwp->dw_mask & DW_move_page) { 4877 if ( !m->pageout_queue) { 4878 VM_PAGE_QUEUES_REMOVE(m); 4879 4880 assert(m->object != kernel_object); 4881 4882 VM_PAGE_ENQUEUE_INACTIVE(m, FALSE); 4883 } 4884 } 4885 if (dwp->dw_mask & DW_clear_busy) 4886 m->busy = FALSE; 4887 4888 if (dwp->dw_mask & DW_PAGE_WAKEUP) 4889 PAGE_WAKEUP(m); 4890 } 4891 } 4892 vm_page_unlock_queues(); 4893 4894 if (local_free_q) 4895 vm_page_free_list(local_free_q, TRUE); 4896 4897 VM_CHECK_MEMORYSTATUS; 4898 4899} 4900 4901kern_return_t 4902vm_page_alloc_list( 4903 int page_count, 4904 int flags, 4905 vm_page_t *list) 4906{ 4907 vm_page_t lo_page_list = VM_PAGE_NULL; 4908 vm_page_t mem; 4909 int i; 4910 4911 if ( !(flags & KMA_LOMEM)) 4912 panic("vm_page_alloc_list: called w/o KMA_LOMEM"); 4913 4914 for (i = 0; i < page_count; i++) { 4915 4916 mem = vm_page_grablo(); 4917 4918 if (mem == VM_PAGE_NULL) { 4919 if (lo_page_list) 4920 vm_page_free_list(lo_page_list, FALSE); 4921 4922 *list = VM_PAGE_NULL; 4923 4924 return (KERN_RESOURCE_SHORTAGE); 4925 } 4926 mem->pageq.next = (queue_entry_t) lo_page_list; 4927 lo_page_list = mem; 4928 } 4929 *list = lo_page_list; 4930 4931 return (KERN_SUCCESS); 4932} 4933 4934void 4935vm_page_set_offset(vm_page_t page, vm_object_offset_t offset) 4936{ 4937 page->offset = offset; 4938} 4939 4940vm_page_t 4941vm_page_get_next(vm_page_t page) 4942{ 4943 return ((vm_page_t) page->pageq.next); 4944} 4945 4946vm_object_offset_t 4947vm_page_get_offset(vm_page_t page) 4948{ 4949 return (page->offset); 4950} 4951 4952ppnum_t 4953vm_page_get_phys_page(vm_page_t page) 4954{ 4955 return (page->phys_page); 4956} 4957 4958 4959/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 4960 4961#if HIBERNATION 4962 4963static vm_page_t hibernate_gobble_queue; 4964 4965extern boolean_t (* volatile consider_buffer_cache_collect)(int); 4966 4967static int hibernate_drain_pageout_queue(struct vm_pageout_queue *); 4968static int hibernate_flush_dirty_pages(int); 4969static int hibernate_flush_queue(queue_head_t *, int); 4970 4971void hibernate_flush_wait(void); 4972void hibernate_mark_in_progress(void); 4973void hibernate_clear_in_progress(void); 4974 4975void hibernate_free_range(int, int); 4976void hibernate_hash_insert_page(vm_page_t); 4977uint32_t hibernate_mark_as_unneeded(addr64_t, addr64_t, hibernate_page_list_t *, hibernate_page_list_t *); 4978void hibernate_rebuild_vm_structs(void); 4979uint32_t hibernate_teardown_vm_structs(hibernate_page_list_t *, hibernate_page_list_t *); 4980ppnum_t hibernate_lookup_paddr(unsigned int); 4981 4982struct hibernate_statistics { 4983 int hibernate_considered; 4984 int hibernate_reentered_on_q; 4985 int hibernate_found_dirty; 4986 int hibernate_skipped_cleaning; 4987 int hibernate_skipped_transient; 4988 int hibernate_skipped_precious; 4989 int hibernate_skipped_external; 4990 int hibernate_queue_nolock; 4991 int hibernate_queue_paused; 4992 int hibernate_throttled; 4993 int hibernate_throttle_timeout; 4994 int hibernate_drained; 4995 int hibernate_drain_timeout; 4996 int cd_lock_failed; 4997 int cd_found_precious; 4998 int cd_found_wired; 4999 int cd_found_busy; 5000 int cd_found_unusual; 5001 int cd_found_cleaning; 5002 int cd_found_laundry; 5003 int cd_found_dirty; 5004 int cd_found_xpmapped; 5005 int cd_skipped_xpmapped; 5006 int cd_local_free; 5007 int cd_total_free; 5008 int cd_vm_page_wire_count; 5009 int cd_vm_struct_pages_unneeded; 5010 int cd_pages; 5011 int cd_discarded; 5012 int cd_count_wire; 5013} hibernate_stats; 5014 5015 5016/* 5017 * clamp the number of 'xpmapped' pages we'll sweep into the hibernation image 5018 * so that we don't overrun the estimated image size, which would 5019 * result in a hibernation failure. 5020 */ 5021#define HIBERNATE_XPMAPPED_LIMIT 40000 5022 5023 5024static int 5025hibernate_drain_pageout_queue(struct vm_pageout_queue *q) 5026{ 5027 wait_result_t wait_result; 5028 5029 vm_page_lock_queues(); 5030 5031 while ( !queue_empty(&q->pgo_pending) ) { 5032 5033 q->pgo_draining = TRUE; 5034 5035 assert_wait_timeout((event_t) (&q->pgo_laundry+1), THREAD_INTERRUPTIBLE, 5000, 1000*NSEC_PER_USEC); 5036 5037 vm_page_unlock_queues(); 5038 5039 wait_result = thread_block(THREAD_CONTINUE_NULL); 5040 5041 if (wait_result == THREAD_TIMED_OUT && !queue_empty(&q->pgo_pending)) { 5042 hibernate_stats.hibernate_drain_timeout++; 5043 5044 if (q == &vm_pageout_queue_external) 5045 return (0); 5046 5047 return (1); 5048 } 5049 vm_page_lock_queues(); 5050 5051 hibernate_stats.hibernate_drained++; 5052 } 5053 vm_page_unlock_queues(); 5054 5055 return (0); 5056} 5057 5058 5059boolean_t hibernate_skip_external = FALSE; 5060 5061static int 5062hibernate_flush_queue(queue_head_t *q, int qcount) 5063{ 5064 vm_page_t m; 5065 vm_object_t l_object = NULL; 5066 vm_object_t m_object = NULL; 5067 int refmod_state = 0; 5068 int try_failed_count = 0; 5069 int retval = 0; 5070 int current_run = 0; 5071 struct vm_pageout_queue *iq; 5072 struct vm_pageout_queue *eq; 5073 struct vm_pageout_queue *tq; 5074 5075 5076 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_START, q, qcount, 0, 0, 0); 5077 5078 iq = &vm_pageout_queue_internal; 5079 eq = &vm_pageout_queue_external; 5080 5081 vm_page_lock_queues(); 5082 5083 while (qcount && !queue_empty(q)) { 5084 5085 if (current_run++ == 1000) { 5086 if (hibernate_should_abort()) { 5087 retval = 1; 5088 break; 5089 } 5090 current_run = 0; 5091 } 5092 5093 m = (vm_page_t) queue_first(q); 5094 m_object = m->object; 5095 5096 /* 5097 * check to see if we currently are working 5098 * with the same object... if so, we've 5099 * already got the lock 5100 */ 5101 if (m_object != l_object) { 5102 /* 5103 * the object associated with candidate page is 5104 * different from the one we were just working 5105 * with... dump the lock if we still own it 5106 */ 5107 if (l_object != NULL) { 5108 vm_object_unlock(l_object); 5109 l_object = NULL; 5110 } 5111 /* 5112 * Try to lock object; since we've alread got the 5113 * page queues lock, we can only 'try' for this one. 5114 * if the 'try' fails, we need to do a mutex_pause 5115 * to allow the owner of the object lock a chance to 5116 * run... 5117 */ 5118 if ( !vm_object_lock_try_scan(m_object)) { 5119 5120 if (try_failed_count > 20) { 5121 hibernate_stats.hibernate_queue_nolock++; 5122 5123 goto reenter_pg_on_q; 5124 } 5125 vm_pageout_scan_wants_object = m_object; 5126 5127 vm_page_unlock_queues(); 5128 mutex_pause(try_failed_count++); 5129 vm_page_lock_queues(); 5130 5131 hibernate_stats.hibernate_queue_paused++; 5132 continue; 5133 } else { 5134 l_object = m_object; 5135 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 5136 } 5137 } 5138 if ( !m_object->alive || m->encrypted_cleaning || m->cleaning || m->laundry || m->busy || m->absent || m->error) { 5139 /* 5140 * page is not to be cleaned 5141 * put it back on the head of its queue 5142 */ 5143 if (m->cleaning) 5144 hibernate_stats.hibernate_skipped_cleaning++; 5145 else 5146 hibernate_stats.hibernate_skipped_transient++; 5147 5148 goto reenter_pg_on_q; 5149 } 5150 if (m_object->copy == VM_OBJECT_NULL) { 5151 if (m_object->purgable == VM_PURGABLE_VOLATILE || m_object->purgable == VM_PURGABLE_EMPTY) { 5152 /* 5153 * let the normal hibernate image path 5154 * deal with these 5155 */ 5156 goto reenter_pg_on_q; 5157 } 5158 } 5159 if ( !m->dirty && m->pmapped) { 5160 refmod_state = pmap_get_refmod(m->phys_page); 5161 5162 if ((refmod_state & VM_MEM_MODIFIED)) { 5163 SET_PAGE_DIRTY(m, FALSE); 5164 } 5165 } else 5166 refmod_state = 0; 5167 5168 if ( !m->dirty) { 5169 /* 5170 * page is not to be cleaned 5171 * put it back on the head of its queue 5172 */ 5173 if (m->precious) 5174 hibernate_stats.hibernate_skipped_precious++; 5175 5176 goto reenter_pg_on_q; 5177 } 5178 5179 if (hibernate_skip_external == TRUE && !m_object->internal) { 5180 5181 hibernate_stats.hibernate_skipped_external++; 5182 5183 goto reenter_pg_on_q; 5184 } 5185 tq = NULL; 5186 5187 if (m_object->internal) { 5188 if (VM_PAGE_Q_THROTTLED(iq)) 5189 tq = iq; 5190 } else if (VM_PAGE_Q_THROTTLED(eq)) 5191 tq = eq; 5192 5193 if (tq != NULL) { 5194 wait_result_t wait_result; 5195 int wait_count = 5; 5196 5197 if (l_object != NULL) { 5198 vm_object_unlock(l_object); 5199 l_object = NULL; 5200 } 5201 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 5202 5203 while (retval == 0) { 5204 5205 tq->pgo_throttled = TRUE; 5206 5207 assert_wait_timeout((event_t) &tq->pgo_laundry, THREAD_INTERRUPTIBLE, 1000, 1000*NSEC_PER_USEC); 5208 5209 vm_page_unlock_queues(); 5210 5211 wait_result = thread_block(THREAD_CONTINUE_NULL); 5212 5213 vm_page_lock_queues(); 5214 5215 if (wait_result != THREAD_TIMED_OUT) 5216 break; 5217 if (!VM_PAGE_Q_THROTTLED(tq)) 5218 break; 5219 5220 if (hibernate_should_abort()) 5221 retval = 1; 5222 5223 if (--wait_count == 0) { 5224 5225 hibernate_stats.hibernate_throttle_timeout++; 5226 5227 if (tq == eq) { 5228 hibernate_skip_external = TRUE; 5229 break; 5230 } 5231 retval = 1; 5232 } 5233 } 5234 if (retval) 5235 break; 5236 5237 hibernate_stats.hibernate_throttled++; 5238 5239 continue; 5240 } 5241 /* 5242 * we've already factored out pages in the laundry which 5243 * means this page can't be on the pageout queue so it's 5244 * safe to do the VM_PAGE_QUEUES_REMOVE 5245 */ 5246 assert(!m->pageout_queue); 5247 5248 VM_PAGE_QUEUES_REMOVE(m); 5249 5250 if (COMPRESSED_PAGER_IS_ACTIVE && m_object->internal == TRUE) 5251 pmap_disconnect_options(m->phys_page, PMAP_OPTIONS_COMPRESSOR, NULL); 5252 5253 vm_pageout_cluster(m, FALSE); 5254 5255 hibernate_stats.hibernate_found_dirty++; 5256 5257 goto next_pg; 5258 5259reenter_pg_on_q: 5260 queue_remove(q, m, vm_page_t, pageq); 5261 queue_enter(q, m, vm_page_t, pageq); 5262 5263 hibernate_stats.hibernate_reentered_on_q++; 5264next_pg: 5265 hibernate_stats.hibernate_considered++; 5266 5267 qcount--; 5268 try_failed_count = 0; 5269 } 5270 if (l_object != NULL) { 5271 vm_object_unlock(l_object); 5272 l_object = NULL; 5273 } 5274 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 5275 5276 vm_page_unlock_queues(); 5277 5278 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 4) | DBG_FUNC_END, hibernate_stats.hibernate_found_dirty, retval, 0, 0, 0); 5279 5280 return (retval); 5281} 5282 5283 5284static int 5285hibernate_flush_dirty_pages(int pass) 5286{ 5287 struct vm_speculative_age_q *aq; 5288 uint32_t i; 5289 5290 if (vm_page_local_q) { 5291 for (i = 0; i < vm_page_local_q_count; i++) 5292 vm_page_reactivate_local(i, TRUE, FALSE); 5293 } 5294 5295 for (i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++) { 5296 int qcount; 5297 vm_page_t m; 5298 5299 aq = &vm_page_queue_speculative[i]; 5300 5301 if (queue_empty(&aq->age_q)) 5302 continue; 5303 qcount = 0; 5304 5305 vm_page_lockspin_queues(); 5306 5307 queue_iterate(&aq->age_q, 5308 m, 5309 vm_page_t, 5310 pageq) 5311 { 5312 qcount++; 5313 } 5314 vm_page_unlock_queues(); 5315 5316 if (qcount) { 5317 if (hibernate_flush_queue(&aq->age_q, qcount)) 5318 return (1); 5319 } 5320 } 5321 if (hibernate_flush_queue(&vm_page_queue_inactive, vm_page_inactive_count - vm_page_anonymous_count - vm_page_cleaned_count)) 5322 return (1); 5323 if (hibernate_flush_queue(&vm_page_queue_anonymous, vm_page_anonymous_count)) 5324 return (1); 5325 if (hibernate_flush_queue(&vm_page_queue_cleaned, vm_page_cleaned_count)) 5326 return (1); 5327 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) 5328 return (1); 5329 5330 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1) 5331 vm_compressor_record_warmup_start(); 5332 5333 if (hibernate_flush_queue(&vm_page_queue_active, vm_page_active_count)) { 5334 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1) 5335 vm_compressor_record_warmup_end(); 5336 return (1); 5337 } 5338 if (hibernate_drain_pageout_queue(&vm_pageout_queue_internal)) { 5339 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1) 5340 vm_compressor_record_warmup_end(); 5341 return (1); 5342 } 5343 if (COMPRESSED_PAGER_IS_ACTIVE && pass == 1) 5344 vm_compressor_record_warmup_end(); 5345 5346 if (hibernate_skip_external == FALSE && hibernate_drain_pageout_queue(&vm_pageout_queue_external)) 5347 return (1); 5348 5349 return (0); 5350} 5351 5352 5353void 5354hibernate_reset_stats() 5355{ 5356 bzero(&hibernate_stats, sizeof(struct hibernate_statistics)); 5357} 5358 5359 5360int 5361hibernate_flush_memory() 5362{ 5363 int retval; 5364 5365 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_START, vm_page_free_count, 0, 0, 0, 0); 5366 5367 hibernate_cleaning_in_progress = TRUE; 5368 hibernate_skip_external = FALSE; 5369 5370 if ((retval = hibernate_flush_dirty_pages(1)) == 0) { 5371 5372 if (COMPRESSED_PAGER_IS_ACTIVE) { 5373 5374 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_START, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0); 5375 5376 vm_compressor_flush(); 5377 5378 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 10) | DBG_FUNC_END, VM_PAGE_COMPRESSOR_COUNT, 0, 0, 0, 0); 5379 } 5380 if (consider_buffer_cache_collect != NULL) { 5381 unsigned int orig_wire_count; 5382 5383 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_START, 0, 0, 0, 0, 0); 5384 orig_wire_count = vm_page_wire_count; 5385 5386 (void)(*consider_buffer_cache_collect)(1); 5387 consider_zone_gc(TRUE); 5388 5389 HIBLOG("hibernate_flush_memory: buffer_cache_gc freed up %d wired pages\n", orig_wire_count - vm_page_wire_count); 5390 5391 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 7) | DBG_FUNC_END, orig_wire_count - vm_page_wire_count, 0, 0, 0, 0); 5392 } 5393 } 5394 hibernate_cleaning_in_progress = FALSE; 5395 5396 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 3) | DBG_FUNC_END, vm_page_free_count, hibernate_stats.hibernate_found_dirty, retval, 0, 0); 5397 5398 if (retval && COMPRESSED_PAGER_IS_ACTIVE) 5399 HIBLOG("hibernate_flush_memory() failed to finish - vm_page_compressor_count(%d)\n", VM_PAGE_COMPRESSOR_COUNT); 5400 5401 5402 HIBPRINT("hibernate_flush_memory() considered(%d) reentered_on_q(%d) found_dirty(%d)\n", 5403 hibernate_stats.hibernate_considered, 5404 hibernate_stats.hibernate_reentered_on_q, 5405 hibernate_stats.hibernate_found_dirty); 5406 HIBPRINT(" skipped_cleaning(%d) skipped_transient(%d) skipped_precious(%d) skipped_external(%d) queue_nolock(%d)\n", 5407 hibernate_stats.hibernate_skipped_cleaning, 5408 hibernate_stats.hibernate_skipped_transient, 5409 hibernate_stats.hibernate_skipped_precious, 5410 hibernate_stats.hibernate_skipped_external, 5411 hibernate_stats.hibernate_queue_nolock); 5412 HIBPRINT(" queue_paused(%d) throttled(%d) throttle_timeout(%d) drained(%d) drain_timeout(%d)\n", 5413 hibernate_stats.hibernate_queue_paused, 5414 hibernate_stats.hibernate_throttled, 5415 hibernate_stats.hibernate_throttle_timeout, 5416 hibernate_stats.hibernate_drained, 5417 hibernate_stats.hibernate_drain_timeout); 5418 5419 return (retval); 5420} 5421 5422 5423static void 5424hibernate_page_list_zero(hibernate_page_list_t *list) 5425{ 5426 uint32_t bank; 5427 hibernate_bitmap_t * bitmap; 5428 5429 bitmap = &list->bank_bitmap[0]; 5430 for (bank = 0; bank < list->bank_count; bank++) 5431 { 5432 uint32_t last_bit; 5433 5434 bzero((void *) &bitmap->bitmap[0], bitmap->bitmapwords << 2); 5435 // set out-of-bound bits at end of bitmap. 5436 last_bit = ((bitmap->last_page - bitmap->first_page + 1) & 31); 5437 if (last_bit) 5438 bitmap->bitmap[bitmap->bitmapwords - 1] = (0xFFFFFFFF >> last_bit); 5439 5440 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap[bitmap->bitmapwords]; 5441 } 5442} 5443 5444void 5445hibernate_gobble_pages(uint32_t gobble_count, uint32_t free_page_time) 5446{ 5447 uint32_t i; 5448 vm_page_t m; 5449 uint64_t start, end, timeout, nsec; 5450 clock_interval_to_deadline(free_page_time, 1000 * 1000 /*ms*/, &timeout); 5451 clock_get_uptime(&start); 5452 5453 for (i = 0; i < gobble_count; i++) 5454 { 5455 while (VM_PAGE_NULL == (m = vm_page_grab())) 5456 { 5457 clock_get_uptime(&end); 5458 if (end >= timeout) 5459 break; 5460 VM_PAGE_WAIT(); 5461 } 5462 if (!m) 5463 break; 5464 m->busy = FALSE; 5465 vm_page_gobble(m); 5466 5467 m->pageq.next = (queue_entry_t) hibernate_gobble_queue; 5468 hibernate_gobble_queue = m; 5469 } 5470 5471 clock_get_uptime(&end); 5472 absolutetime_to_nanoseconds(end - start, &nsec); 5473 HIBLOG("Gobbled %d pages, time: %qd ms\n", i, nsec / 1000000ULL); 5474} 5475 5476void 5477hibernate_free_gobble_pages(void) 5478{ 5479 vm_page_t m, next; 5480 uint32_t count = 0; 5481 5482 m = (vm_page_t) hibernate_gobble_queue; 5483 while(m) 5484 { 5485 next = (vm_page_t) m->pageq.next; 5486 vm_page_free(m); 5487 count++; 5488 m = next; 5489 } 5490 hibernate_gobble_queue = VM_PAGE_NULL; 5491 5492 if (count) 5493 HIBLOG("Freed %d pages\n", count); 5494} 5495 5496static boolean_t 5497hibernate_consider_discard(vm_page_t m, boolean_t preflight) 5498{ 5499 vm_object_t object = NULL; 5500 int refmod_state; 5501 boolean_t discard = FALSE; 5502 5503 do 5504 { 5505 if (m->private) 5506 panic("hibernate_consider_discard: private"); 5507 5508 if (!vm_object_lock_try(m->object)) { 5509 if (!preflight) hibernate_stats.cd_lock_failed++; 5510 break; 5511 } 5512 object = m->object; 5513 5514 if (VM_PAGE_WIRED(m)) { 5515 if (!preflight) hibernate_stats.cd_found_wired++; 5516 break; 5517 } 5518 if (m->precious) { 5519 if (!preflight) hibernate_stats.cd_found_precious++; 5520 break; 5521 } 5522 if (m->busy || !object->alive) { 5523 /* 5524 * Somebody is playing with this page. 5525 */ 5526 if (!preflight) hibernate_stats.cd_found_busy++; 5527 break; 5528 } 5529 if (m->absent || m->unusual || m->error) { 5530 /* 5531 * If it's unusual in anyway, ignore it 5532 */ 5533 if (!preflight) hibernate_stats.cd_found_unusual++; 5534 break; 5535 } 5536 if (m->cleaning) { 5537 if (!preflight) hibernate_stats.cd_found_cleaning++; 5538 break; 5539 } 5540 if (m->laundry) { 5541 if (!preflight) hibernate_stats.cd_found_laundry++; 5542 break; 5543 } 5544 if (!m->dirty) 5545 { 5546 refmod_state = pmap_get_refmod(m->phys_page); 5547 5548 if (refmod_state & VM_MEM_REFERENCED) 5549 m->reference = TRUE; 5550 if (refmod_state & VM_MEM_MODIFIED) { 5551 SET_PAGE_DIRTY(m, FALSE); 5552 } 5553 } 5554 5555 /* 5556 * If it's clean or purgeable we can discard the page on wakeup. 5557 */ 5558 discard = (!m->dirty) 5559 || (VM_PURGABLE_VOLATILE == object->purgable) 5560 || (VM_PURGABLE_EMPTY == object->purgable); 5561 5562 5563 if (discard == FALSE) { 5564 if (!preflight) 5565 hibernate_stats.cd_found_dirty++; 5566 } else if (m->xpmapped && m->reference && !object->internal) { 5567 if (hibernate_stats.cd_found_xpmapped < HIBERNATE_XPMAPPED_LIMIT) { 5568 if (!preflight) 5569 hibernate_stats.cd_found_xpmapped++; 5570 discard = FALSE; 5571 } else { 5572 if (!preflight) 5573 hibernate_stats.cd_skipped_xpmapped++; 5574 } 5575 } 5576 } 5577 while (FALSE); 5578 5579 if (object) 5580 vm_object_unlock(object); 5581 5582 return (discard); 5583} 5584 5585 5586static void 5587hibernate_discard_page(vm_page_t m) 5588{ 5589 if (m->absent || m->unusual || m->error) 5590 /* 5591 * If it's unusual in anyway, ignore 5592 */ 5593 return; 5594 5595#if MACH_ASSERT || DEBUG 5596 vm_object_t object = m->object; 5597 if (!vm_object_lock_try(m->object)) 5598 panic("hibernate_discard_page(%p) !vm_object_lock_try", m); 5599#else 5600 /* No need to lock page queue for token delete, hibernate_vm_unlock() 5601 makes sure these locks are uncontended before sleep */ 5602#endif /* MACH_ASSERT || DEBUG */ 5603 5604 if (m->pmapped == TRUE) 5605 { 5606 __unused int refmod_state = pmap_disconnect(m->phys_page); 5607 } 5608 5609 if (m->laundry) 5610 panic("hibernate_discard_page(%p) laundry", m); 5611 if (m->private) 5612 panic("hibernate_discard_page(%p) private", m); 5613 if (m->fictitious) 5614 panic("hibernate_discard_page(%p) fictitious", m); 5615 5616 if (VM_PURGABLE_VOLATILE == m->object->purgable) 5617 { 5618 /* object should be on a queue */ 5619 assert((m->object->objq.next != NULL) && (m->object->objq.prev != NULL)); 5620 purgeable_q_t old_queue = vm_purgeable_object_remove(m->object); 5621 assert(old_queue); 5622 if (m->object->purgeable_when_ripe) { 5623 vm_purgeable_token_delete_first(old_queue); 5624 } 5625 m->object->purgable = VM_PURGABLE_EMPTY; 5626 5627 /* 5628 * Purgeable ledgers: pages of VOLATILE and EMPTY objects are 5629 * accounted in the "volatile" ledger, so no change here. 5630 * We have to update vm_page_purgeable_count, though, since we're 5631 * effectively purging this object. 5632 */ 5633 unsigned int delta; 5634 assert(m->object->resident_page_count >= m->object->wired_page_count); 5635 delta = (m->object->resident_page_count - m->object->wired_page_count); 5636 assert(vm_page_purgeable_count >= delta); 5637 assert(delta > 0); 5638 OSAddAtomic(-delta, (SInt32 *)&vm_page_purgeable_count); 5639 } 5640 5641 vm_page_free(m); 5642 5643#if MACH_ASSERT || DEBUG 5644 vm_object_unlock(object); 5645#endif /* MACH_ASSERT || DEBUG */ 5646} 5647 5648/* 5649 Grab locks for hibernate_page_list_setall() 5650*/ 5651void 5652hibernate_vm_lock_queues(void) 5653{ 5654 vm_object_lock(compressor_object); 5655 vm_page_lock_queues(); 5656 lck_mtx_lock(&vm_page_queue_free_lock); 5657 5658 if (vm_page_local_q) { 5659 uint32_t i; 5660 for (i = 0; i < vm_page_local_q_count; i++) { 5661 struct vpl *lq; 5662 lq = &vm_page_local_q[i].vpl_un.vpl; 5663 VPL_LOCK(&lq->vpl_lock); 5664 } 5665 } 5666} 5667 5668void 5669hibernate_vm_unlock_queues(void) 5670{ 5671 if (vm_page_local_q) { 5672 uint32_t i; 5673 for (i = 0; i < vm_page_local_q_count; i++) { 5674 struct vpl *lq; 5675 lq = &vm_page_local_q[i].vpl_un.vpl; 5676 VPL_UNLOCK(&lq->vpl_lock); 5677 } 5678 } 5679 lck_mtx_unlock(&vm_page_queue_free_lock); 5680 vm_page_unlock_queues(); 5681 vm_object_unlock(compressor_object); 5682} 5683 5684/* 5685 Bits zero in the bitmaps => page needs to be saved. All pages default to be saved, 5686 pages known to VM to not need saving are subtracted. 5687 Wired pages to be saved are present in page_list_wired, pageable in page_list. 5688*/ 5689 5690void 5691hibernate_page_list_setall(hibernate_page_list_t * page_list, 5692 hibernate_page_list_t * page_list_wired, 5693 hibernate_page_list_t * page_list_pal, 5694 boolean_t preflight, 5695 boolean_t will_discard, 5696 uint32_t * pagesOut) 5697{ 5698 uint64_t start, end, nsec; 5699 vm_page_t m; 5700 vm_page_t next; 5701 uint32_t pages = page_list->page_count; 5702 uint32_t count_anonymous = 0, count_throttled = 0, count_compressor = 0; 5703 uint32_t count_inactive = 0, count_active = 0, count_speculative = 0, count_cleaned = 0; 5704 uint32_t count_wire = pages; 5705 uint32_t count_discard_active = 0; 5706 uint32_t count_discard_inactive = 0; 5707 uint32_t count_discard_cleaned = 0; 5708 uint32_t count_discard_purgeable = 0; 5709 uint32_t count_discard_speculative = 0; 5710 uint32_t count_discard_vm_struct_pages = 0; 5711 uint32_t i; 5712 uint32_t bank; 5713 hibernate_bitmap_t * bitmap; 5714 hibernate_bitmap_t * bitmap_wired; 5715 boolean_t discard_all; 5716 boolean_t discard; 5717 5718 HIBLOG("hibernate_page_list_setall(preflight %d) start %p, %p\n", preflight, page_list, page_list_wired); 5719 5720 if (preflight) { 5721 page_list = NULL; 5722 page_list_wired = NULL; 5723 page_list_pal = NULL; 5724 discard_all = FALSE; 5725 } else { 5726 discard_all = will_discard; 5727 } 5728 5729#if MACH_ASSERT || DEBUG 5730 if (!preflight) 5731 { 5732 vm_page_lock_queues(); 5733 if (vm_page_local_q) { 5734 for (i = 0; i < vm_page_local_q_count; i++) { 5735 struct vpl *lq; 5736 lq = &vm_page_local_q[i].vpl_un.vpl; 5737 VPL_LOCK(&lq->vpl_lock); 5738 } 5739 } 5740 } 5741#endif /* MACH_ASSERT || DEBUG */ 5742 5743 5744 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_START, count_wire, 0, 0, 0, 0); 5745 5746 clock_get_uptime(&start); 5747 5748 if (!preflight) { 5749 hibernate_page_list_zero(page_list); 5750 hibernate_page_list_zero(page_list_wired); 5751 hibernate_page_list_zero(page_list_pal); 5752 5753 hibernate_stats.cd_vm_page_wire_count = vm_page_wire_count; 5754 hibernate_stats.cd_pages = pages; 5755 } 5756 5757 if (vm_page_local_q) { 5758 for (i = 0; i < vm_page_local_q_count; i++) 5759 vm_page_reactivate_local(i, TRUE, !preflight); 5760 } 5761 5762 if (preflight) { 5763 vm_object_lock(compressor_object); 5764 vm_page_lock_queues(); 5765 lck_mtx_lock(&vm_page_queue_free_lock); 5766 } 5767 5768 m = (vm_page_t) hibernate_gobble_queue; 5769 while (m) 5770 { 5771 pages--; 5772 count_wire--; 5773 if (!preflight) { 5774 hibernate_page_bitset(page_list, TRUE, m->phys_page); 5775 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5776 } 5777 m = (vm_page_t) m->pageq.next; 5778 } 5779 5780 if (!preflight) for( i = 0; i < real_ncpus; i++ ) 5781 { 5782 if (cpu_data_ptr[i] && cpu_data_ptr[i]->cpu_processor) 5783 { 5784 for (m = PROCESSOR_DATA(cpu_data_ptr[i]->cpu_processor, free_pages); m; m = (vm_page_t)m->pageq.next) 5785 { 5786 pages--; 5787 count_wire--; 5788 hibernate_page_bitset(page_list, TRUE, m->phys_page); 5789 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5790 5791 hibernate_stats.cd_local_free++; 5792 hibernate_stats.cd_total_free++; 5793 } 5794 } 5795 } 5796 5797 for( i = 0; i < vm_colors; i++ ) 5798 { 5799 queue_iterate(&vm_page_queue_free[i], 5800 m, 5801 vm_page_t, 5802 pageq) 5803 { 5804 pages--; 5805 count_wire--; 5806 if (!preflight) { 5807 hibernate_page_bitset(page_list, TRUE, m->phys_page); 5808 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5809 5810 hibernate_stats.cd_total_free++; 5811 } 5812 } 5813 } 5814 5815 queue_iterate(&vm_lopage_queue_free, 5816 m, 5817 vm_page_t, 5818 pageq) 5819 { 5820 pages--; 5821 count_wire--; 5822 if (!preflight) { 5823 hibernate_page_bitset(page_list, TRUE, m->phys_page); 5824 hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5825 5826 hibernate_stats.cd_total_free++; 5827 } 5828 } 5829 5830 m = (vm_page_t) queue_first(&vm_page_queue_throttled); 5831 while (m && !queue_end(&vm_page_queue_throttled, (queue_entry_t)m)) 5832 { 5833 next = (vm_page_t) m->pageq.next; 5834 discard = FALSE; 5835 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) 5836 && hibernate_consider_discard(m, preflight)) 5837 { 5838 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page); 5839 count_discard_inactive++; 5840 discard = discard_all; 5841 } 5842 else 5843 count_throttled++; 5844 count_wire--; 5845 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5846 5847 if (discard) hibernate_discard_page(m); 5848 m = next; 5849 } 5850 5851 m = (vm_page_t) queue_first(&vm_page_queue_anonymous); 5852 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m)) 5853 { 5854 next = (vm_page_t) m->pageq.next; 5855 discard = FALSE; 5856 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) 5857 && hibernate_consider_discard(m, preflight)) 5858 { 5859 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page); 5860 if (m->dirty) 5861 count_discard_purgeable++; 5862 else 5863 count_discard_inactive++; 5864 discard = discard_all; 5865 } 5866 else 5867 count_anonymous++; 5868 count_wire--; 5869 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5870 if (discard) hibernate_discard_page(m); 5871 m = next; 5872 } 5873 5874 m = (vm_page_t) queue_first(&vm_page_queue_cleaned); 5875 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m)) 5876 { 5877 next = (vm_page_t) m->pageq.next; 5878 discard = FALSE; 5879 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) 5880 && hibernate_consider_discard(m, preflight)) 5881 { 5882 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page); 5883 if (m->dirty) 5884 count_discard_purgeable++; 5885 else 5886 count_discard_cleaned++; 5887 discard = discard_all; 5888 } 5889 else 5890 count_cleaned++; 5891 count_wire--; 5892 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5893 if (discard) hibernate_discard_page(m); 5894 m = next; 5895 } 5896 5897 m = (vm_page_t) queue_first(&vm_page_queue_active); 5898 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m)) 5899 { 5900 next = (vm_page_t) m->pageq.next; 5901 discard = FALSE; 5902 if ((kIOHibernateModeDiscardCleanActive & gIOHibernateMode) 5903 && hibernate_consider_discard(m, preflight)) 5904 { 5905 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page); 5906 if (m->dirty) 5907 count_discard_purgeable++; 5908 else 5909 count_discard_active++; 5910 discard = discard_all; 5911 } 5912 else 5913 count_active++; 5914 count_wire--; 5915 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5916 if (discard) hibernate_discard_page(m); 5917 m = next; 5918 } 5919 5920 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 5921 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m)) 5922 { 5923 next = (vm_page_t) m->pageq.next; 5924 discard = FALSE; 5925 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) 5926 && hibernate_consider_discard(m, preflight)) 5927 { 5928 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page); 5929 if (m->dirty) 5930 count_discard_purgeable++; 5931 else 5932 count_discard_inactive++; 5933 discard = discard_all; 5934 } 5935 else 5936 count_inactive++; 5937 count_wire--; 5938 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5939 if (discard) hibernate_discard_page(m); 5940 m = next; 5941 } 5942 5943 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) 5944 { 5945 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q); 5946 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m)) 5947 { 5948 next = (vm_page_t) m->pageq.next; 5949 discard = FALSE; 5950 if ((kIOHibernateModeDiscardCleanInactive & gIOHibernateMode) 5951 && hibernate_consider_discard(m, preflight)) 5952 { 5953 if (!preflight) hibernate_page_bitset(page_list, TRUE, m->phys_page); 5954 count_discard_speculative++; 5955 discard = discard_all; 5956 } 5957 else 5958 count_speculative++; 5959 count_wire--; 5960 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5961 if (discard) hibernate_discard_page(m); 5962 m = next; 5963 } 5964 } 5965 5966 queue_iterate(&compressor_object->memq, m, vm_page_t, listq) 5967 { 5968 count_compressor++; 5969 count_wire--; 5970 if (!preflight) hibernate_page_bitset(page_list_wired, TRUE, m->phys_page); 5971 } 5972 5973 if (preflight == FALSE && discard_all == TRUE) { 5974 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 12) | DBG_FUNC_START, 0, 0, 0, 0, 0); 5975 5976 HIBLOG("hibernate_teardown started\n"); 5977 count_discard_vm_struct_pages = hibernate_teardown_vm_structs(page_list, page_list_wired); 5978 HIBLOG("hibernate_teardown completed - discarded %d\n", count_discard_vm_struct_pages); 5979 5980 pages -= count_discard_vm_struct_pages; 5981 count_wire -= count_discard_vm_struct_pages; 5982 5983 hibernate_stats.cd_vm_struct_pages_unneeded = count_discard_vm_struct_pages; 5984 5985 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0); 5986 } 5987 5988 if (!preflight) { 5989 // pull wired from hibernate_bitmap 5990 bitmap = &page_list->bank_bitmap[0]; 5991 bitmap_wired = &page_list_wired->bank_bitmap[0]; 5992 for (bank = 0; bank < page_list->bank_count; bank++) 5993 { 5994 for (i = 0; i < bitmap->bitmapwords; i++) 5995 bitmap->bitmap[i] = bitmap->bitmap[i] | ~bitmap_wired->bitmap[i]; 5996 bitmap = (hibernate_bitmap_t *) &bitmap->bitmap [bitmap->bitmapwords]; 5997 bitmap_wired = (hibernate_bitmap_t *) &bitmap_wired->bitmap[bitmap_wired->bitmapwords]; 5998 } 5999 } 6000 6001 // machine dependent adjustments 6002 hibernate_page_list_setall_machine(page_list, page_list_wired, preflight, &pages); 6003 6004 if (!preflight) { 6005 hibernate_stats.cd_count_wire = count_wire; 6006 hibernate_stats.cd_discarded = count_discard_active + count_discard_inactive + count_discard_purgeable + 6007 count_discard_speculative + count_discard_cleaned + count_discard_vm_struct_pages; 6008 } 6009 6010 clock_get_uptime(&end); 6011 absolutetime_to_nanoseconds(end - start, &nsec); 6012 HIBLOG("hibernate_page_list_setall time: %qd ms\n", nsec / 1000000ULL); 6013 6014 HIBLOG("pages %d, wire %d, act %d, inact %d, cleaned %d spec %d, zf %d, throt %d, compr %d, xpmapped %d\n %s discard act %d inact %d purgeable %d spec %d cleaned %d\n", 6015 pages, count_wire, count_active, count_inactive, count_cleaned, count_speculative, count_anonymous, count_throttled, count_compressor, hibernate_stats.cd_found_xpmapped, 6016 discard_all ? "did" : "could", 6017 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned); 6018 6019 if (hibernate_stats.cd_skipped_xpmapped) 6020 HIBLOG("WARNING: hibernate_page_list_setall skipped %d xpmapped pages\n", hibernate_stats.cd_skipped_xpmapped); 6021 6022 *pagesOut = pages - count_discard_active - count_discard_inactive - count_discard_purgeable - count_discard_speculative - count_discard_cleaned; 6023 6024 if (preflight && will_discard) *pagesOut -= count_compressor + count_throttled + count_anonymous + count_inactive + count_cleaned + count_speculative + count_active; 6025 6026#if MACH_ASSERT || DEBUG 6027 if (!preflight) 6028 { 6029 if (vm_page_local_q) { 6030 for (i = 0; i < vm_page_local_q_count; i++) { 6031 struct vpl *lq; 6032 lq = &vm_page_local_q[i].vpl_un.vpl; 6033 VPL_UNLOCK(&lq->vpl_lock); 6034 } 6035 } 6036 vm_page_unlock_queues(); 6037 } 6038#endif /* MACH_ASSERT || DEBUG */ 6039 6040 if (preflight) { 6041 lck_mtx_unlock(&vm_page_queue_free_lock); 6042 vm_page_unlock_queues(); 6043 vm_object_unlock(compressor_object); 6044 } 6045 6046 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 8) | DBG_FUNC_END, count_wire, *pagesOut, 0, 0, 0); 6047} 6048 6049void 6050hibernate_page_list_discard(hibernate_page_list_t * page_list) 6051{ 6052 uint64_t start, end, nsec; 6053 vm_page_t m; 6054 vm_page_t next; 6055 uint32_t i; 6056 uint32_t count_discard_active = 0; 6057 uint32_t count_discard_inactive = 0; 6058 uint32_t count_discard_purgeable = 0; 6059 uint32_t count_discard_cleaned = 0; 6060 uint32_t count_discard_speculative = 0; 6061 6062 6063#if MACH_ASSERT || DEBUG 6064 vm_page_lock_queues(); 6065 if (vm_page_local_q) { 6066 for (i = 0; i < vm_page_local_q_count; i++) { 6067 struct vpl *lq; 6068 lq = &vm_page_local_q[i].vpl_un.vpl; 6069 VPL_LOCK(&lq->vpl_lock); 6070 } 6071 } 6072#endif /* MACH_ASSERT || DEBUG */ 6073 6074 clock_get_uptime(&start); 6075 6076 m = (vm_page_t) queue_first(&vm_page_queue_anonymous); 6077 while (m && !queue_end(&vm_page_queue_anonymous, (queue_entry_t)m)) 6078 { 6079 next = (vm_page_t) m->pageq.next; 6080 if (hibernate_page_bittst(page_list, m->phys_page)) 6081 { 6082 if (m->dirty) 6083 count_discard_purgeable++; 6084 else 6085 count_discard_inactive++; 6086 hibernate_discard_page(m); 6087 } 6088 m = next; 6089 } 6090 6091 for( i = 0; i <= VM_PAGE_MAX_SPECULATIVE_AGE_Q; i++ ) 6092 { 6093 m = (vm_page_t) queue_first(&vm_page_queue_speculative[i].age_q); 6094 while (m && !queue_end(&vm_page_queue_speculative[i].age_q, (queue_entry_t)m)) 6095 { 6096 next = (vm_page_t) m->pageq.next; 6097 if (hibernate_page_bittst(page_list, m->phys_page)) 6098 { 6099 count_discard_speculative++; 6100 hibernate_discard_page(m); 6101 } 6102 m = next; 6103 } 6104 } 6105 6106 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 6107 while (m && !queue_end(&vm_page_queue_inactive, (queue_entry_t)m)) 6108 { 6109 next = (vm_page_t) m->pageq.next; 6110 if (hibernate_page_bittst(page_list, m->phys_page)) 6111 { 6112 if (m->dirty) 6113 count_discard_purgeable++; 6114 else 6115 count_discard_inactive++; 6116 hibernate_discard_page(m); 6117 } 6118 m = next; 6119 } 6120 6121 m = (vm_page_t) queue_first(&vm_page_queue_active); 6122 while (m && !queue_end(&vm_page_queue_active, (queue_entry_t)m)) 6123 { 6124 next = (vm_page_t) m->pageq.next; 6125 if (hibernate_page_bittst(page_list, m->phys_page)) 6126 { 6127 if (m->dirty) 6128 count_discard_purgeable++; 6129 else 6130 count_discard_active++; 6131 hibernate_discard_page(m); 6132 } 6133 m = next; 6134 } 6135 6136 m = (vm_page_t) queue_first(&vm_page_queue_cleaned); 6137 while (m && !queue_end(&vm_page_queue_cleaned, (queue_entry_t)m)) 6138 { 6139 next = (vm_page_t) m->pageq.next; 6140 if (hibernate_page_bittst(page_list, m->phys_page)) 6141 { 6142 if (m->dirty) 6143 count_discard_purgeable++; 6144 else 6145 count_discard_cleaned++; 6146 hibernate_discard_page(m); 6147 } 6148 m = next; 6149 } 6150 6151#if MACH_ASSERT || DEBUG 6152 if (vm_page_local_q) { 6153 for (i = 0; i < vm_page_local_q_count; i++) { 6154 struct vpl *lq; 6155 lq = &vm_page_local_q[i].vpl_un.vpl; 6156 VPL_UNLOCK(&lq->vpl_lock); 6157 } 6158 } 6159 vm_page_unlock_queues(); 6160#endif /* MACH_ASSERT || DEBUG */ 6161 6162 clock_get_uptime(&end); 6163 absolutetime_to_nanoseconds(end - start, &nsec); 6164 HIBLOG("hibernate_page_list_discard time: %qd ms, discarded act %d inact %d purgeable %d spec %d cleaned %d\n", 6165 nsec / 1000000ULL, 6166 count_discard_active, count_discard_inactive, count_discard_purgeable, count_discard_speculative, count_discard_cleaned); 6167} 6168 6169boolean_t hibernate_paddr_map_inited = FALSE; 6170boolean_t hibernate_rebuild_needed = FALSE; 6171unsigned int hibernate_teardown_last_valid_compact_indx = -1; 6172vm_page_t hibernate_rebuild_hash_list = NULL; 6173 6174unsigned int hibernate_teardown_found_tabled_pages = 0; 6175unsigned int hibernate_teardown_found_created_pages = 0; 6176unsigned int hibernate_teardown_found_free_pages = 0; 6177unsigned int hibernate_teardown_vm_page_free_count; 6178 6179 6180struct ppnum_mapping { 6181 struct ppnum_mapping *ppnm_next; 6182 ppnum_t ppnm_base_paddr; 6183 unsigned int ppnm_sindx; 6184 unsigned int ppnm_eindx; 6185}; 6186 6187struct ppnum_mapping *ppnm_head; 6188struct ppnum_mapping *ppnm_last_found = NULL; 6189 6190 6191void 6192hibernate_create_paddr_map() 6193{ 6194 unsigned int i; 6195 ppnum_t next_ppnum_in_run = 0; 6196 struct ppnum_mapping *ppnm = NULL; 6197 6198 if (hibernate_paddr_map_inited == FALSE) { 6199 6200 for (i = 0; i < vm_pages_count; i++) { 6201 6202 if (ppnm) 6203 ppnm->ppnm_eindx = i; 6204 6205 if (ppnm == NULL || vm_pages[i].phys_page != next_ppnum_in_run) { 6206 6207 ppnm = kalloc(sizeof(struct ppnum_mapping)); 6208 6209 ppnm->ppnm_next = ppnm_head; 6210 ppnm_head = ppnm; 6211 6212 ppnm->ppnm_sindx = i; 6213 ppnm->ppnm_base_paddr = vm_pages[i].phys_page; 6214 } 6215 next_ppnum_in_run = vm_pages[i].phys_page + 1; 6216 } 6217 ppnm->ppnm_eindx++; 6218 6219 hibernate_paddr_map_inited = TRUE; 6220 } 6221} 6222 6223ppnum_t 6224hibernate_lookup_paddr(unsigned int indx) 6225{ 6226 struct ppnum_mapping *ppnm = NULL; 6227 6228 ppnm = ppnm_last_found; 6229 6230 if (ppnm) { 6231 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) 6232 goto done; 6233 } 6234 for (ppnm = ppnm_head; ppnm; ppnm = ppnm->ppnm_next) { 6235 6236 if (indx >= ppnm->ppnm_sindx && indx < ppnm->ppnm_eindx) { 6237 ppnm_last_found = ppnm; 6238 break; 6239 } 6240 } 6241 if (ppnm == NULL) 6242 panic("hibernate_lookup_paddr of %d failed\n", indx); 6243done: 6244 return (ppnm->ppnm_base_paddr + (indx - ppnm->ppnm_sindx)); 6245} 6246 6247 6248uint32_t 6249hibernate_mark_as_unneeded(addr64_t saddr, addr64_t eaddr, hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired) 6250{ 6251 addr64_t saddr_aligned; 6252 addr64_t eaddr_aligned; 6253 addr64_t addr; 6254 ppnum_t paddr; 6255 unsigned int mark_as_unneeded_pages = 0; 6256 6257 saddr_aligned = (saddr + PAGE_MASK_64) & ~PAGE_MASK_64; 6258 eaddr_aligned = eaddr & ~PAGE_MASK_64; 6259 6260 for (addr = saddr_aligned; addr < eaddr_aligned; addr += PAGE_SIZE_64) { 6261 6262 paddr = pmap_find_phys(kernel_pmap, addr); 6263 6264 assert(paddr); 6265 6266 hibernate_page_bitset(page_list, TRUE, paddr); 6267 hibernate_page_bitset(page_list_wired, TRUE, paddr); 6268 6269 mark_as_unneeded_pages++; 6270 } 6271 return (mark_as_unneeded_pages); 6272} 6273 6274 6275void 6276hibernate_hash_insert_page(vm_page_t mem) 6277{ 6278 vm_page_bucket_t *bucket; 6279 int hash_id; 6280 6281 assert(mem->hashed); 6282 assert(mem->object); 6283 assert(mem->offset != (vm_object_offset_t) -1); 6284 6285 /* 6286 * Insert it into the object_object/offset hash table 6287 */ 6288 hash_id = vm_page_hash(mem->object, mem->offset); 6289 bucket = &vm_page_buckets[hash_id]; 6290 6291 mem->next_m = bucket->page_list; 6292 bucket->page_list = VM_PAGE_PACK_PTR(mem); 6293} 6294 6295 6296void 6297hibernate_free_range(int sindx, int eindx) 6298{ 6299 vm_page_t mem; 6300 unsigned int color; 6301 6302 while (sindx < eindx) { 6303 mem = &vm_pages[sindx]; 6304 6305 vm_page_init(mem, hibernate_lookup_paddr(sindx), FALSE); 6306 6307 mem->lopage = FALSE; 6308 mem->free = TRUE; 6309 6310 color = mem->phys_page & vm_color_mask; 6311 queue_enter_first(&vm_page_queue_free[color], 6312 mem, 6313 vm_page_t, 6314 pageq); 6315 vm_page_free_count++; 6316 6317 sindx++; 6318 } 6319} 6320 6321 6322extern void hibernate_rebuild_pmap_structs(void); 6323 6324void 6325hibernate_rebuild_vm_structs(void) 6326{ 6327 int cindx, sindx, eindx; 6328 vm_page_t mem, tmem, mem_next; 6329 AbsoluteTime startTime, endTime; 6330 uint64_t nsec; 6331 6332 if (hibernate_rebuild_needed == FALSE) 6333 return; 6334 6335 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_START, 0, 0, 0, 0, 0); 6336 HIBLOG("hibernate_rebuild started\n"); 6337 6338 clock_get_uptime(&startTime); 6339 6340 hibernate_rebuild_pmap_structs(); 6341 6342 bzero(&vm_page_buckets[0], vm_page_bucket_count * sizeof(vm_page_bucket_t)); 6343 eindx = vm_pages_count; 6344 6345 for (cindx = hibernate_teardown_last_valid_compact_indx; cindx >= 0; cindx--) { 6346 6347 mem = &vm_pages[cindx]; 6348 /* 6349 * hibernate_teardown_vm_structs leaves the location where 6350 * this vm_page_t must be located in "next". 6351 */ 6352 tmem = VM_PAGE_UNPACK_PTR(mem->next_m); 6353 mem->next_m = VM_PAGE_PACK_PTR(NULL); 6354 6355 sindx = (int)(tmem - &vm_pages[0]); 6356 6357 if (mem != tmem) { 6358 /* 6359 * this vm_page_t was moved by hibernate_teardown_vm_structs, 6360 * so move it back to its real location 6361 */ 6362 *tmem = *mem; 6363 mem = tmem; 6364 } 6365 if (mem->hashed) 6366 hibernate_hash_insert_page(mem); 6367 /* 6368 * the 'hole' between this vm_page_t and the previous 6369 * vm_page_t we moved needs to be initialized as 6370 * a range of free vm_page_t's 6371 */ 6372 hibernate_free_range(sindx + 1, eindx); 6373 6374 eindx = sindx; 6375 } 6376 if (sindx) 6377 hibernate_free_range(0, sindx); 6378 6379 assert(vm_page_free_count == hibernate_teardown_vm_page_free_count); 6380 6381 /* 6382 * process the list of vm_page_t's that were entered in the hash, 6383 * but were not located in the vm_pages arrary... these are 6384 * vm_page_t's that were created on the fly (i.e. fictitious) 6385 */ 6386 for (mem = hibernate_rebuild_hash_list; mem; mem = mem_next) { 6387 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m); 6388 6389 mem->next_m = VM_PAGE_PACK_PTR(NULL); 6390 hibernate_hash_insert_page(mem); 6391 } 6392 hibernate_rebuild_hash_list = NULL; 6393 6394 clock_get_uptime(&endTime); 6395 SUB_ABSOLUTETIME(&endTime, &startTime); 6396 absolutetime_to_nanoseconds(endTime, &nsec); 6397 6398 HIBLOG("hibernate_rebuild completed - took %qd msecs\n", nsec / 1000000ULL); 6399 6400 hibernate_rebuild_needed = FALSE; 6401 6402 KERNEL_DEBUG_CONSTANT(IOKDBG_CODE(DBG_HIBERNATE, 13) | DBG_FUNC_END, 0, 0, 0, 0, 0); 6403} 6404 6405 6406extern void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *); 6407 6408uint32_t 6409hibernate_teardown_vm_structs(hibernate_page_list_t *page_list, hibernate_page_list_t *page_list_wired) 6410{ 6411 unsigned int i; 6412 unsigned int compact_target_indx; 6413 vm_page_t mem, mem_next; 6414 vm_page_bucket_t *bucket; 6415 unsigned int mark_as_unneeded_pages = 0; 6416 unsigned int unneeded_vm_page_bucket_pages = 0; 6417 unsigned int unneeded_vm_pages_pages = 0; 6418 unsigned int unneeded_pmap_pages = 0; 6419 addr64_t start_of_unneeded = 0; 6420 addr64_t end_of_unneeded = 0; 6421 6422 6423 if (hibernate_should_abort()) 6424 return (0); 6425 6426 HIBLOG("hibernate_teardown: wired_pages %d, free_pages %d, active_pages %d, inactive_pages %d, speculative_pages %d, cleaned_pages %d, compressor_pages %d\n", 6427 vm_page_wire_count, vm_page_free_count, vm_page_active_count, vm_page_inactive_count, vm_page_speculative_count, 6428 vm_page_cleaned_count, compressor_object->resident_page_count); 6429 6430 for (i = 0; i < vm_page_bucket_count; i++) { 6431 6432 bucket = &vm_page_buckets[i]; 6433 6434 for (mem = VM_PAGE_UNPACK_PTR(bucket->page_list); mem != VM_PAGE_NULL; mem = mem_next) { 6435 assert(mem->hashed); 6436 6437 mem_next = VM_PAGE_UNPACK_PTR(mem->next_m); 6438 6439 if (mem < &vm_pages[0] || mem >= &vm_pages[vm_pages_count]) { 6440 mem->next_m = VM_PAGE_PACK_PTR(hibernate_rebuild_hash_list); 6441 hibernate_rebuild_hash_list = mem; 6442 } 6443 } 6444 } 6445 unneeded_vm_page_bucket_pages = hibernate_mark_as_unneeded((addr64_t)&vm_page_buckets[0], (addr64_t)&vm_page_buckets[vm_page_bucket_count], page_list, page_list_wired); 6446 mark_as_unneeded_pages += unneeded_vm_page_bucket_pages; 6447 6448 hibernate_teardown_vm_page_free_count = vm_page_free_count; 6449 6450 compact_target_indx = 0; 6451 6452 for (i = 0; i < vm_pages_count; i++) { 6453 6454 mem = &vm_pages[i]; 6455 6456 if (mem->free) { 6457 unsigned int color; 6458 6459 assert(mem->busy); 6460 assert(!mem->lopage); 6461 6462 color = mem->phys_page & vm_color_mask; 6463 6464 queue_remove(&vm_page_queue_free[color], 6465 mem, 6466 vm_page_t, 6467 pageq); 6468 mem->pageq.next = NULL; 6469 mem->pageq.prev = NULL; 6470 6471 vm_page_free_count--; 6472 6473 hibernate_teardown_found_free_pages++; 6474 6475 if ( !vm_pages[compact_target_indx].free) 6476 compact_target_indx = i; 6477 } else { 6478 /* 6479 * record this vm_page_t's original location 6480 * we need this even if it doesn't get moved 6481 * as an indicator to the rebuild function that 6482 * we don't have to move it 6483 */ 6484 mem->next_m = VM_PAGE_PACK_PTR(mem); 6485 6486 if (vm_pages[compact_target_indx].free) { 6487 /* 6488 * we've got a hole to fill, so 6489 * move this vm_page_t to it's new home 6490 */ 6491 vm_pages[compact_target_indx] = *mem; 6492 mem->free = TRUE; 6493 6494 hibernate_teardown_last_valid_compact_indx = compact_target_indx; 6495 compact_target_indx++; 6496 } else 6497 hibernate_teardown_last_valid_compact_indx = i; 6498 } 6499 } 6500 unneeded_vm_pages_pages = hibernate_mark_as_unneeded((addr64_t)&vm_pages[hibernate_teardown_last_valid_compact_indx+1], 6501 (addr64_t)&vm_pages[vm_pages_count-1], page_list, page_list_wired); 6502 mark_as_unneeded_pages += unneeded_vm_pages_pages; 6503 6504 hibernate_teardown_pmap_structs(&start_of_unneeded, &end_of_unneeded); 6505 6506 if (start_of_unneeded) { 6507 unneeded_pmap_pages = hibernate_mark_as_unneeded(start_of_unneeded, end_of_unneeded, page_list, page_list_wired); 6508 mark_as_unneeded_pages += unneeded_pmap_pages; 6509 } 6510 HIBLOG("hibernate_teardown: mark_as_unneeded_pages %d, %d, %d\n", unneeded_vm_page_bucket_pages, unneeded_vm_pages_pages, unneeded_pmap_pages); 6511 6512 hibernate_rebuild_needed = TRUE; 6513 6514 return (mark_as_unneeded_pages); 6515} 6516 6517 6518#endif /* HIBERNATION */ 6519 6520/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ 6521 6522#include <mach_vm_debug.h> 6523#if MACH_VM_DEBUG 6524 6525#include <mach_debug/hash_info.h> 6526#include <vm/vm_debug.h> 6527 6528/* 6529 * Routine: vm_page_info 6530 * Purpose: 6531 * Return information about the global VP table. 6532 * Fills the buffer with as much information as possible 6533 * and returns the desired size of the buffer. 6534 * Conditions: 6535 * Nothing locked. The caller should provide 6536 * possibly-pageable memory. 6537 */ 6538 6539unsigned int 6540vm_page_info( 6541 hash_info_bucket_t *info, 6542 unsigned int count) 6543{ 6544 unsigned int i; 6545 lck_spin_t *bucket_lock; 6546 6547 if (vm_page_bucket_count < count) 6548 count = vm_page_bucket_count; 6549 6550 for (i = 0; i < count; i++) { 6551 vm_page_bucket_t *bucket = &vm_page_buckets[i]; 6552 unsigned int bucket_count = 0; 6553 vm_page_t m; 6554 6555 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK]; 6556 lck_spin_lock(bucket_lock); 6557 6558 for (m = VM_PAGE_UNPACK_PTR(bucket->page_list); m != VM_PAGE_NULL; m = VM_PAGE_UNPACK_PTR(m->next_m)) 6559 bucket_count++; 6560 6561 lck_spin_unlock(bucket_lock); 6562 6563 /* don't touch pageable memory while holding locks */ 6564 info[i].hib_count = bucket_count; 6565 } 6566 6567 return vm_page_bucket_count; 6568} 6569#endif /* MACH_VM_DEBUG */ 6570 6571#if VM_PAGE_BUCKETS_CHECK 6572void 6573vm_page_buckets_check(void) 6574{ 6575 unsigned int i; 6576 vm_page_t p; 6577 unsigned int p_hash; 6578 vm_page_bucket_t *bucket; 6579 lck_spin_t *bucket_lock; 6580 6581 if (!vm_page_buckets_check_ready) { 6582 return; 6583 } 6584 6585#if HIBERNATION 6586 if (hibernate_rebuild_needed || 6587 hibernate_rebuild_hash_list) { 6588 panic("BUCKET_CHECK: hibernation in progress: " 6589 "rebuild_needed=%d rebuild_hash_list=%p\n", 6590 hibernate_rebuild_needed, 6591 hibernate_rebuild_hash_list); 6592 } 6593#endif /* HIBERNATION */ 6594 6595#if VM_PAGE_FAKE_BUCKETS 6596 char *cp; 6597 for (cp = (char *) vm_page_fake_buckets_start; 6598 cp < (char *) vm_page_fake_buckets_end; 6599 cp++) { 6600 if (*cp != 0x5a) { 6601 panic("BUCKET_CHECK: corruption at %p in fake buckets " 6602 "[0x%llx:0x%llx]\n", 6603 cp, 6604 (uint64_t) vm_page_fake_buckets_start, 6605 (uint64_t) vm_page_fake_buckets_end); 6606 } 6607 } 6608#endif /* VM_PAGE_FAKE_BUCKETS */ 6609 6610 for (i = 0; i < vm_page_bucket_count; i++) { 6611 bucket = &vm_page_buckets[i]; 6612 if (!bucket->page_list) { 6613 continue; 6614 } 6615 6616 bucket_lock = &vm_page_bucket_locks[i / BUCKETS_PER_LOCK]; 6617 lck_spin_lock(bucket_lock); 6618 p = VM_PAGE_UNPACK_PTR(bucket->page_list); 6619 while (p != VM_PAGE_NULL) { 6620 if (!p->hashed) { 6621 panic("BUCKET_CHECK: page %p (%p,0x%llx) " 6622 "hash %d in bucket %d at %p " 6623 "is not hashed\n", 6624 p, p->object, p->offset, 6625 p_hash, i, bucket); 6626 } 6627 p_hash = vm_page_hash(p->object, p->offset); 6628 if (p_hash != i) { 6629 panic("BUCKET_CHECK: corruption in bucket %d " 6630 "at %p: page %p object %p offset 0x%llx " 6631 "hash %d\n", 6632 i, bucket, p, p->object, p->offset, 6633 p_hash); 6634 } 6635 p = VM_PAGE_UNPACK_PTR(p->next_m); 6636 } 6637 lck_spin_unlock(bucket_lock); 6638 } 6639 6640// printf("BUCKET_CHECK: checked buckets\n"); 6641} 6642#endif /* VM_PAGE_BUCKETS_CHECK */ 6643