1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_pageout.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * Date: 1985 62 * 63 * The proverbial page-out daemon. 64 */ 65 66#include <stdint.h> 67 68#include <debug.h> 69#include <mach_pagemap.h> 70#include <mach_cluster_stats.h> 71#include <mach_kdb.h> 72#include <advisory_pageout.h> 73 74#include <mach/mach_types.h> 75#include <mach/memory_object.h> 76#include <mach/memory_object_default.h> 77#include <mach/memory_object_control_server.h> 78#include <mach/mach_host_server.h> 79#include <mach/upl.h> 80#include <mach/vm_map.h> 81#include <mach/vm_param.h> 82#include <mach/vm_statistics.h> 83#include <mach/sdt.h> 84 85#include <kern/kern_types.h> 86#include <kern/counters.h> 87#include <kern/host_statistics.h> 88#include <kern/machine.h> 89#include <kern/misc_protos.h> 90#include <kern/thread.h> 91#include <kern/xpr.h> 92#include <kern/kalloc.h> 93 94#include <machine/vm_tuning.h> 95 96#if CONFIG_EMBEDDED 97#include <sys/kern_memorystatus.h> 98#endif 99 100#include <vm/pmap.h> 101#include <vm/vm_fault.h> 102#include <vm/vm_map.h> 103#include <vm/vm_object.h> 104#include <vm/vm_page.h> 105#include <vm/vm_pageout.h> 106#include <vm/vm_protos.h> /* must be last */ 107#include <vm/memory_object.h> 108#include <vm/vm_purgeable_internal.h> 109 110/* 111 * ENCRYPTED SWAP: 112 */ 113#include <../bsd/crypto/aes/aes.h> 114 115 116#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */ 117#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100 118#endif 119 120#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */ 121#ifdef CONFIG_EMBEDDED 122#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024 123#else 124#define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096 125#endif 126#endif 127 128#ifndef VM_PAGEOUT_DEADLOCK_RELIEF 129#define VM_PAGEOUT_DEADLOCK_RELIEF 100 /* number of pages to move to break deadlock */ 130#endif 131 132#ifndef VM_PAGEOUT_INACTIVE_RELIEF 133#define VM_PAGEOUT_INACTIVE_RELIEF 50 /* minimum number of pages to move to the inactive q */ 134#endif 135 136#ifndef VM_PAGE_LAUNDRY_MAX 137#define VM_PAGE_LAUNDRY_MAX 16UL /* maximum pageouts on a given pageout queue */ 138#endif /* VM_PAGEOUT_LAUNDRY_MAX */ 139 140#ifndef VM_PAGEOUT_BURST_WAIT 141#define VM_PAGEOUT_BURST_WAIT 30 /* milliseconds per page */ 142#endif /* VM_PAGEOUT_BURST_WAIT */ 143 144#ifndef VM_PAGEOUT_EMPTY_WAIT 145#define VM_PAGEOUT_EMPTY_WAIT 200 /* milliseconds */ 146#endif /* VM_PAGEOUT_EMPTY_WAIT */ 147 148#ifndef VM_PAGEOUT_DEADLOCK_WAIT 149#define VM_PAGEOUT_DEADLOCK_WAIT 300 /* milliseconds */ 150#endif /* VM_PAGEOUT_DEADLOCK_WAIT */ 151 152#ifndef VM_PAGEOUT_IDLE_WAIT 153#define VM_PAGEOUT_IDLE_WAIT 10 /* milliseconds */ 154#endif /* VM_PAGEOUT_IDLE_WAIT */ 155 156#ifndef VM_PAGE_SPECULATIVE_TARGET 157#define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / 20) 158#endif /* VM_PAGE_SPECULATIVE_TARGET */ 159 160#ifndef VM_PAGE_INACTIVE_HEALTHY_LIMIT 161#define VM_PAGE_INACTIVE_HEALTHY_LIMIT(total) ((total) * 1 / 200) 162#endif /* VM_PAGE_INACTIVE_HEALTHY_LIMIT */ 163 164 165/* 166 * To obtain a reasonable LRU approximation, the inactive queue 167 * needs to be large enough to give pages on it a chance to be 168 * referenced a second time. This macro defines the fraction 169 * of active+inactive pages that should be inactive. 170 * The pageout daemon uses it to update vm_page_inactive_target. 171 * 172 * If vm_page_free_count falls below vm_page_free_target and 173 * vm_page_inactive_count is below vm_page_inactive_target, 174 * then the pageout daemon starts running. 175 */ 176 177#ifndef VM_PAGE_INACTIVE_TARGET 178#define VM_PAGE_INACTIVE_TARGET(avail) ((avail) * 1 / 3) 179#endif /* VM_PAGE_INACTIVE_TARGET */ 180 181/* 182 * Once the pageout daemon starts running, it keeps going 183 * until vm_page_free_count meets or exceeds vm_page_free_target. 184 */ 185 186#ifndef VM_PAGE_FREE_TARGET 187#ifdef CONFIG_EMBEDDED 188#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 100) 189#else 190#define VM_PAGE_FREE_TARGET(free) (15 + (free) / 80) 191#endif 192#endif /* VM_PAGE_FREE_TARGET */ 193 194/* 195 * The pageout daemon always starts running once vm_page_free_count 196 * falls below vm_page_free_min. 197 */ 198 199#ifndef VM_PAGE_FREE_MIN 200#ifdef CONFIG_EMBEDDED 201#define VM_PAGE_FREE_MIN(free) (10 + (free) / 200) 202#else 203#define VM_PAGE_FREE_MIN(free) (10 + (free) / 100) 204#endif 205#endif /* VM_PAGE_FREE_MIN */ 206 207#define VM_PAGE_FREE_MIN_LIMIT 1500 208#define VM_PAGE_FREE_TARGET_LIMIT 2000 209 210 211/* 212 * When vm_page_free_count falls below vm_page_free_reserved, 213 * only vm-privileged threads can allocate pages. vm-privilege 214 * allows the pageout daemon and default pager (and any other 215 * associated threads needed for default pageout) to continue 216 * operation by dipping into the reserved pool of pages. 217 */ 218 219#ifndef VM_PAGE_FREE_RESERVED 220#define VM_PAGE_FREE_RESERVED(n) \ 221 ((6 * VM_PAGE_LAUNDRY_MAX) + (n)) 222#endif /* VM_PAGE_FREE_RESERVED */ 223 224/* 225 * When we dequeue pages from the inactive list, they are 226 * reactivated (ie, put back on the active queue) if referenced. 227 * However, it is possible to starve the free list if other 228 * processors are referencing pages faster than we can turn off 229 * the referenced bit. So we limit the number of reactivations 230 * we will make per call of vm_pageout_scan(). 231 */ 232#define VM_PAGE_REACTIVATE_LIMIT_MAX 20000 233#ifndef VM_PAGE_REACTIVATE_LIMIT 234#ifdef CONFIG_EMBEDDED 235#define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2) 236#else 237#define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX)) 238#endif 239#endif /* VM_PAGE_REACTIVATE_LIMIT */ 240#define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM 100 241 242 243/* 244 * must hold the page queues lock to 245 * manipulate this structure 246 */ 247struct vm_pageout_queue { 248 queue_head_t pgo_pending; /* laundry pages to be processed by pager's iothread */ 249 unsigned int pgo_laundry; /* current count of laundry pages on queue or in flight */ 250 unsigned int pgo_maxlaundry; 251 252 unsigned int pgo_idle:1, /* iothread is blocked waiting for work to do */ 253 pgo_busy:1, /* iothread is currently processing request from pgo_pending */ 254 pgo_throttled:1,/* vm_pageout_scan thread needs a wakeup when pgo_laundry drops */ 255 :0; 256}; 257 258#define VM_PAGE_Q_THROTTLED(q) \ 259 ((q)->pgo_laundry >= (q)->pgo_maxlaundry) 260 261 262/* 263 * Exported variable used to broadcast the activation of the pageout scan 264 * Working Set uses this to throttle its use of pmap removes. In this 265 * way, code which runs within memory in an uncontested context does 266 * not keep encountering soft faults. 267 */ 268 269unsigned int vm_pageout_scan_event_counter = 0; 270 271/* 272 * Forward declarations for internal routines. 273 */ 274 275static void vm_pageout_garbage_collect(int); 276static void vm_pageout_iothread_continue(struct vm_pageout_queue *); 277static void vm_pageout_iothread_external(void); 278static void vm_pageout_iothread_internal(void); 279static void vm_pageout_queue_steal(vm_page_t); 280 281extern void vm_pageout_continue(void); 282extern void vm_pageout_scan(void); 283 284static thread_t vm_pageout_external_iothread = THREAD_NULL; 285static thread_t vm_pageout_internal_iothread = THREAD_NULL; 286 287unsigned int vm_pageout_reserved_internal = 0; 288unsigned int vm_pageout_reserved_really = 0; 289 290unsigned int vm_pageout_idle_wait = 0; /* milliseconds */ 291unsigned int vm_pageout_empty_wait = 0; /* milliseconds */ 292unsigned int vm_pageout_burst_wait = 0; /* milliseconds */ 293unsigned int vm_pageout_deadlock_wait = 0; /* milliseconds */ 294unsigned int vm_pageout_deadlock_relief = 0; 295unsigned int vm_pageout_inactive_relief = 0; 296unsigned int vm_pageout_burst_active_throttle = 0; 297unsigned int vm_pageout_burst_inactive_throttle = 0; 298 299/* 300 * Protection against zero fill flushing live working sets derived 301 * from existing backing store and files 302 */ 303unsigned int vm_accellerate_zf_pageout_trigger = 400; 304unsigned int zf_queue_min_count = 100; 305unsigned int vm_zf_count = 0; 306unsigned int vm_zf_queue_count = 0; 307 308/* 309 * These variables record the pageout daemon's actions: 310 * how many pages it looks at and what happens to those pages. 311 * No locking needed because only one thread modifies the variables. 312 */ 313 314unsigned int vm_pageout_active = 0; /* debugging */ 315unsigned int vm_pageout_inactive = 0; /* debugging */ 316unsigned int vm_pageout_inactive_throttled = 0; /* debugging */ 317unsigned int vm_pageout_inactive_forced = 0; /* debugging */ 318unsigned int vm_pageout_inactive_nolock = 0; /* debugging */ 319unsigned int vm_pageout_inactive_avoid = 0; /* debugging */ 320unsigned int vm_pageout_inactive_busy = 0; /* debugging */ 321unsigned int vm_pageout_inactive_absent = 0; /* debugging */ 322unsigned int vm_pageout_inactive_used = 0; /* debugging */ 323unsigned int vm_pageout_inactive_clean = 0; /* debugging */ 324unsigned int vm_pageout_inactive_dirty = 0; /* debugging */ 325unsigned int vm_pageout_dirty_no_pager = 0; /* debugging */ 326unsigned int vm_pageout_purged_objects = 0; /* debugging */ 327unsigned int vm_stat_discard = 0; /* debugging */ 328unsigned int vm_stat_discard_sent = 0; /* debugging */ 329unsigned int vm_stat_discard_failure = 0; /* debugging */ 330unsigned int vm_stat_discard_throttle = 0; /* debugging */ 331unsigned int vm_pageout_reactivation_limit_exceeded = 0; /* debugging */ 332unsigned int vm_pageout_catch_ups = 0; /* debugging */ 333unsigned int vm_pageout_inactive_force_reclaim = 0; /* debugging */ 334 335unsigned int vm_pageout_scan_active_throttled = 0; 336unsigned int vm_pageout_scan_inactive_throttled = 0; 337unsigned int vm_pageout_scan_throttle = 0; /* debugging */ 338unsigned int vm_pageout_scan_burst_throttle = 0; /* debugging */ 339unsigned int vm_pageout_scan_empty_throttle = 0; /* debugging */ 340unsigned int vm_pageout_scan_deadlock_detected = 0; /* debugging */ 341unsigned int vm_pageout_scan_active_throttle_success = 0; /* debugging */ 342unsigned int vm_pageout_scan_inactive_throttle_success = 0; /* debugging */ 343/* 344 * Backing store throttle when BS is exhausted 345 */ 346unsigned int vm_backing_store_low = 0; 347 348unsigned int vm_pageout_out_of_line = 0; 349unsigned int vm_pageout_in_place = 0; 350 351/* 352 * ENCRYPTED SWAP: 353 * counters and statistics... 354 */ 355unsigned long vm_page_decrypt_counter = 0; 356unsigned long vm_page_decrypt_for_upl_counter = 0; 357unsigned long vm_page_encrypt_counter = 0; 358unsigned long vm_page_encrypt_abort_counter = 0; 359unsigned long vm_page_encrypt_already_encrypted_counter = 0; 360boolean_t vm_pages_encrypted = FALSE; /* are there encrypted pages ? */ 361 362struct vm_pageout_queue vm_pageout_queue_internal; 363struct vm_pageout_queue vm_pageout_queue_external; 364 365unsigned int vm_page_speculative_target = 0; 366 367vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL; 368 369unsigned long vm_cs_validated_resets = 0; 370 371/* 372 * Routine: vm_backing_store_disable 373 * Purpose: 374 * Suspend non-privileged threads wishing to extend 375 * backing store when we are low on backing store 376 * (Synchronized by caller) 377 */ 378void 379vm_backing_store_disable( 380 boolean_t disable) 381{ 382 if(disable) { 383 vm_backing_store_low = 1; 384 } else { 385 if(vm_backing_store_low) { 386 vm_backing_store_low = 0; 387 thread_wakeup((event_t) &vm_backing_store_low); 388 } 389 } 390} 391 392 393#if MACH_CLUSTER_STATS 394unsigned long vm_pageout_cluster_dirtied = 0; 395unsigned long vm_pageout_cluster_cleaned = 0; 396unsigned long vm_pageout_cluster_collisions = 0; 397unsigned long vm_pageout_cluster_clusters = 0; 398unsigned long vm_pageout_cluster_conversions = 0; 399unsigned long vm_pageout_target_collisions = 0; 400unsigned long vm_pageout_target_page_dirtied = 0; 401unsigned long vm_pageout_target_page_freed = 0; 402#define CLUSTER_STAT(clause) clause 403#else /* MACH_CLUSTER_STATS */ 404#define CLUSTER_STAT(clause) 405#endif /* MACH_CLUSTER_STATS */ 406 407/* 408 * Routine: vm_pageout_object_terminate 409 * Purpose: 410 * Destroy the pageout_object, and perform all of the 411 * required cleanup actions. 412 * 413 * In/Out conditions: 414 * The object must be locked, and will be returned locked. 415 */ 416void 417vm_pageout_object_terminate( 418 vm_object_t object) 419{ 420 vm_object_t shadow_object; 421 422 /* 423 * Deal with the deallocation (last reference) of a pageout object 424 * (used for cleaning-in-place) by dropping the paging references/ 425 * freeing pages in the original object. 426 */ 427 428 assert(object->pageout); 429 shadow_object = object->shadow; 430 vm_object_lock(shadow_object); 431 432 while (!queue_empty(&object->memq)) { 433 vm_page_t p, m; 434 vm_object_offset_t offset; 435 436 p = (vm_page_t) queue_first(&object->memq); 437 438 assert(p->private); 439 assert(p->pageout); 440 p->pageout = FALSE; 441 assert(!p->cleaning); 442 443 offset = p->offset; 444 VM_PAGE_FREE(p); 445 p = VM_PAGE_NULL; 446 447 m = vm_page_lookup(shadow_object, 448 offset + object->shadow_offset); 449 450 if(m == VM_PAGE_NULL) 451 continue; 452 assert(m->cleaning); 453 /* used as a trigger on upl_commit etc to recognize the */ 454 /* pageout daemon's subseqent desire to pageout a cleaning */ 455 /* page. When the bit is on the upl commit code will */ 456 /* respect the pageout bit in the target page over the */ 457 /* caller's page list indication */ 458 m->dump_cleaning = FALSE; 459 460 assert((m->dirty) || (m->precious) || 461 (m->busy && m->cleaning)); 462 463 /* 464 * Handle the trusted pager throttle. 465 * Also decrement the burst throttle (if external). 466 */ 467 vm_page_lock_queues(); 468 if (m->laundry) { 469 vm_pageout_throttle_up(m); 470 } 471 472 /* 473 * Handle the "target" page(s). These pages are to be freed if 474 * successfully cleaned. Target pages are always busy, and are 475 * wired exactly once. The initial target pages are not mapped, 476 * (so cannot be referenced or modified) but converted target 477 * pages may have been modified between the selection as an 478 * adjacent page and conversion to a target. 479 */ 480 if (m->pageout) { 481 assert(m->busy); 482 assert(m->wire_count == 1); 483 m->cleaning = FALSE; 484 m->encrypted_cleaning = FALSE; 485 m->pageout = FALSE; 486#if MACH_CLUSTER_STATS 487 if (m->wanted) vm_pageout_target_collisions++; 488#endif 489 /* 490 * Revoke all access to the page. Since the object is 491 * locked, and the page is busy, this prevents the page 492 * from being dirtied after the pmap_disconnect() call 493 * returns. 494 * 495 * Since the page is left "dirty" but "not modifed", we 496 * can detect whether the page was redirtied during 497 * pageout by checking the modify state. 498 */ 499 if (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED) 500 m->dirty = TRUE; 501 else 502 m->dirty = FALSE; 503 504 if (m->dirty) { 505 CLUSTER_STAT(vm_pageout_target_page_dirtied++;) 506 vm_page_unwire(m);/* reactivates */ 507 VM_STAT_INCR(reactivations); 508 PAGE_WAKEUP_DONE(m); 509 } else { 510 CLUSTER_STAT(vm_pageout_target_page_freed++;) 511 vm_page_free(m);/* clears busy, etc. */ 512 } 513 vm_page_unlock_queues(); 514 continue; 515 } 516 /* 517 * Handle the "adjacent" pages. These pages were cleaned in 518 * place, and should be left alone. 519 * If prep_pin_count is nonzero, then someone is using the 520 * page, so make it active. 521 */ 522 if (!m->active && !m->inactive && !m->throttled && !m->private) { 523 if (m->reference) 524 vm_page_activate(m); 525 else 526 vm_page_deactivate(m); 527 } 528 if((m->busy) && (m->cleaning)) { 529 530 /* the request_page_list case, (COPY_OUT_FROM FALSE) */ 531 m->busy = FALSE; 532 533 /* We do not re-set m->dirty ! */ 534 /* The page was busy so no extraneous activity */ 535 /* could have occurred. COPY_INTO is a read into the */ 536 /* new pages. CLEAN_IN_PLACE does actually write */ 537 /* out the pages but handling outside of this code */ 538 /* will take care of resetting dirty. We clear the */ 539 /* modify however for the Programmed I/O case. */ 540 pmap_clear_modify(m->phys_page); 541 542 m->absent = FALSE; 543 m->overwriting = FALSE; 544 } else if (m->overwriting) { 545 /* alternate request page list, write to page_list */ 546 /* case. Occurs when the original page was wired */ 547 /* at the time of the list request */ 548 assert(m->wire_count != 0); 549 vm_page_unwire(m);/* reactivates */ 550 m->overwriting = FALSE; 551 } else { 552 /* 553 * Set the dirty state according to whether or not the page was 554 * modified during the pageout. Note that we purposefully do 555 * NOT call pmap_clear_modify since the page is still mapped. 556 * If the page were to be dirtied between the 2 calls, this 557 * this fact would be lost. This code is only necessary to 558 * maintain statistics, since the pmap module is always 559 * consulted if m->dirty is false. 560 */ 561#if MACH_CLUSTER_STATS 562 m->dirty = pmap_is_modified(m->phys_page); 563 564 if (m->dirty) vm_pageout_cluster_dirtied++; 565 else vm_pageout_cluster_cleaned++; 566 if (m->wanted) vm_pageout_cluster_collisions++; 567#else 568 m->dirty = 0; 569#endif 570 } 571 m->cleaning = FALSE; 572 m->encrypted_cleaning = FALSE; 573 574 /* 575 * Wakeup any thread waiting for the page to be un-cleaning. 576 */ 577 PAGE_WAKEUP(m); 578 vm_page_unlock_queues(); 579 } 580 /* 581 * Account for the paging reference taken in vm_paging_object_allocate. 582 */ 583 vm_object_paging_end(shadow_object); 584 vm_object_unlock(shadow_object); 585 586 assert(object->ref_count == 0); 587 assert(object->paging_in_progress == 0); 588 assert(object->resident_page_count == 0); 589 return; 590} 591 592/* 593 * Routine: vm_pageclean_setup 594 * 595 * Purpose: setup a page to be cleaned (made non-dirty), but not 596 * necessarily flushed from the VM page cache. 597 * This is accomplished by cleaning in place. 598 * 599 * The page must not be busy, and the object and page 600 * queues must be locked. 601 * 602 */ 603void 604vm_pageclean_setup( 605 vm_page_t m, 606 vm_page_t new_m, 607 vm_object_t new_object, 608 vm_object_offset_t new_offset) 609{ 610 assert(!m->busy); 611#if 0 612 assert(!m->cleaning); 613#endif 614 615 XPR(XPR_VM_PAGEOUT, 616 "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n", 617 (integer_t)m->object, m->offset, (integer_t)m, 618 (integer_t)new_m, new_offset); 619 620 pmap_clear_modify(m->phys_page); 621 622 /* 623 * Mark original page as cleaning in place. 624 */ 625 m->cleaning = TRUE; 626 m->dirty = TRUE; 627 m->precious = FALSE; 628 629 /* 630 * Convert the fictitious page to a private shadow of 631 * the real page. 632 */ 633 assert(new_m->fictitious); 634 assert(new_m->phys_page == vm_page_fictitious_addr); 635 new_m->fictitious = FALSE; 636 new_m->private = TRUE; 637 new_m->pageout = TRUE; 638 new_m->phys_page = m->phys_page; 639 vm_page_wire(new_m); 640 641 vm_page_insert(new_m, new_object, new_offset); 642 assert(!new_m->wanted); 643 new_m->busy = FALSE; 644} 645 646/* 647 * Routine: vm_pageout_initialize_page 648 * Purpose: 649 * Causes the specified page to be initialized in 650 * the appropriate memory object. This routine is used to push 651 * pages into a copy-object when they are modified in the 652 * permanent object. 653 * 654 * The page is moved to a temporary object and paged out. 655 * 656 * In/out conditions: 657 * The page in question must not be on any pageout queues. 658 * The object to which it belongs must be locked. 659 * The page must be busy, but not hold a paging reference. 660 * 661 * Implementation: 662 * Move this page to a completely new object. 663 */ 664void 665vm_pageout_initialize_page( 666 vm_page_t m) 667{ 668 vm_object_t object; 669 vm_object_offset_t paging_offset; 670 vm_page_t holding_page; 671 memory_object_t pager; 672 673 XPR(XPR_VM_PAGEOUT, 674 "vm_pageout_initialize_page, page 0x%X\n", 675 (integer_t)m, 0, 0, 0, 0); 676 assert(m->busy); 677 678 /* 679 * Verify that we really want to clean this page 680 */ 681 assert(!m->absent); 682 assert(!m->error); 683 assert(m->dirty); 684 685 /* 686 * Create a paging reference to let us play with the object. 687 */ 688 object = m->object; 689 paging_offset = m->offset + object->paging_offset; 690 691 if (m->absent || m->error || m->restart || (!m->dirty && !m->precious)) { 692 VM_PAGE_FREE(m); 693 panic("reservation without pageout?"); /* alan */ 694 vm_object_unlock(object); 695 696 return; 697 } 698 699 /* 700 * If there's no pager, then we can't clean the page. This should 701 * never happen since this should be a copy object and therefore not 702 * an external object, so the pager should always be there. 703 */ 704 705 pager = object->pager; 706 707 if (pager == MEMORY_OBJECT_NULL) { 708 VM_PAGE_FREE(m); 709 panic("missing pager for copy object"); 710 return; 711 } 712 713 /* set the page for future call to vm_fault_list_request */ 714 vm_object_paging_begin(object); 715 holding_page = NULL; 716 vm_page_lock_queues(); 717 pmap_clear_modify(m->phys_page); 718 m->dirty = TRUE; 719 m->busy = TRUE; 720 m->list_req_pending = TRUE; 721 m->cleaning = TRUE; 722 m->pageout = TRUE; 723 vm_page_wire(m); 724 vm_page_unlock_queues(); 725 vm_object_unlock(object); 726 727 /* 728 * Write the data to its pager. 729 * Note that the data is passed by naming the new object, 730 * not a virtual address; the pager interface has been 731 * manipulated to use the "internal memory" data type. 732 * [The object reference from its allocation is donated 733 * to the eventual recipient.] 734 */ 735 memory_object_data_initialize(pager, paging_offset, PAGE_SIZE); 736 737 vm_object_lock(object); 738 vm_object_paging_end(object); 739} 740 741#if MACH_CLUSTER_STATS 742#define MAXCLUSTERPAGES 16 743struct { 744 unsigned long pages_in_cluster; 745 unsigned long pages_at_higher_offsets; 746 unsigned long pages_at_lower_offsets; 747} cluster_stats[MAXCLUSTERPAGES]; 748#endif /* MACH_CLUSTER_STATS */ 749 750 751/* 752 * vm_pageout_cluster: 753 * 754 * Given a page, queue it to the appropriate I/O thread, 755 * which will page it out and attempt to clean adjacent pages 756 * in the same operation. 757 * 758 * The page must be busy, and the object and queues locked. We will take a 759 * paging reference to prevent deallocation or collapse when we 760 * release the object lock back at the call site. The I/O thread 761 * is responsible for consuming this reference 762 * 763 * The page must not be on any pageout queue. 764 */ 765 766void 767vm_pageout_cluster(vm_page_t m) 768{ 769 vm_object_t object = m->object; 770 struct vm_pageout_queue *q; 771 772 773 XPR(XPR_VM_PAGEOUT, 774 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n", 775 (integer_t)object, m->offset, (integer_t)m, 0, 0); 776 777 /* 778 * Only a certain kind of page is appreciated here. 779 */ 780 assert(m->busy && (m->dirty || m->precious) && (m->wire_count == 0)); 781 assert(!m->cleaning && !m->pageout && !m->inactive && !m->active); 782 assert(!m->throttled); 783 784 /* 785 * protect the object from collapse - 786 * locking in the object's paging_offset. 787 */ 788 vm_object_paging_begin(object); 789 790 /* 791 * set the page for future call to vm_fault_list_request 792 * page should already be marked busy 793 */ 794 vm_page_wire(m); 795 m->list_req_pending = TRUE; 796 m->cleaning = TRUE; 797 m->pageout = TRUE; 798 m->laundry = TRUE; 799 800 if (object->internal == TRUE) 801 q = &vm_pageout_queue_internal; 802 else 803 q = &vm_pageout_queue_external; 804 q->pgo_laundry++; 805 806 m->pageout_queue = TRUE; 807 queue_enter(&q->pgo_pending, m, vm_page_t, pageq); 808 809 if (q->pgo_idle == TRUE) { 810 q->pgo_idle = FALSE; 811 thread_wakeup((event_t) &q->pgo_pending); 812 } 813} 814 815 816unsigned long vm_pageout_throttle_up_count = 0; 817 818/* 819 * A page is back from laundry. See if there are some pages waiting to 820 * go to laundry and if we can let some of them go now. 821 * 822 * Object and page queues must be locked. 823 */ 824void 825vm_pageout_throttle_up( 826 vm_page_t m) 827{ 828 struct vm_pageout_queue *q; 829 830 vm_pageout_throttle_up_count++; 831 832 assert(m->laundry); 833 assert(m->object != VM_OBJECT_NULL); 834 assert(m->object != kernel_object); 835 836 if (m->object->internal == TRUE) 837 q = &vm_pageout_queue_internal; 838 else 839 q = &vm_pageout_queue_external; 840 841 m->laundry = FALSE; 842 q->pgo_laundry--; 843 844 if (q->pgo_throttled == TRUE) { 845 q->pgo_throttled = FALSE; 846 thread_wakeup((event_t) &q->pgo_laundry); 847 } 848} 849 850 851/* 852 * vm_pageout_scan does the dirty work for the pageout daemon. 853 * It returns with vm_page_queue_free_lock held and 854 * vm_page_free_wanted == 0. 855 */ 856 857#define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT (3 * MAX_UPL_TRANSFER) 858 859#define FCS_IDLE 0 860#define FCS_DELAYED 1 861#define FCS_DEADLOCK_DETECTED 2 862 863struct flow_control { 864 int state; 865 mach_timespec_t ts; 866}; 867 868void 869vm_pageout_scan(void) 870{ 871 unsigned int loop_count = 0; 872 unsigned int inactive_burst_count = 0; 873 unsigned int active_burst_count = 0; 874 unsigned int reactivated_this_call; 875 unsigned int reactivate_limit; 876 vm_page_t local_freeq = NULL; 877 int local_freed = 0; 878 int delayed_unlock; 879 int need_internal_inactive = 0; 880 int refmod_state = 0; 881 int vm_pageout_deadlock_target = 0; 882 struct vm_pageout_queue *iq; 883 struct vm_pageout_queue *eq; 884 struct vm_speculative_age_q *sq; 885 struct flow_control flow_control; 886 boolean_t inactive_throttled = FALSE; 887 boolean_t try_failed; 888 mach_timespec_t ts; 889 unsigned int msecs = 0; 890 vm_object_t object; 891 vm_object_t last_object_tried; 892 int zf_ratio; 893 int zf_run_count; 894 uint32_t catch_up_count = 0; 895 uint32_t inactive_reclaim_run; 896 boolean_t forced_reclaim; 897 898 flow_control.state = FCS_IDLE; 899 iq = &vm_pageout_queue_internal; 900 eq = &vm_pageout_queue_external; 901 sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q]; 902 903 904 XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0); 905 906 907 vm_page_lock_queues(); 908 delayed_unlock = 1; /* must be nonzero if Qs are locked, 0 if unlocked */ 909 910 /* 911 * Calculate the max number of referenced pages on the inactive 912 * queue that we will reactivate. 913 */ 914 reactivated_this_call = 0; 915 reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count + 916 vm_page_inactive_count); 917 inactive_reclaim_run = 0; 918 919 920/*???*/ /* 921 * We want to gradually dribble pages from the active queue 922 * to the inactive queue. If we let the inactive queue get 923 * very small, and then suddenly dump many pages into it, 924 * those pages won't get a sufficient chance to be referenced 925 * before we start taking them from the inactive queue. 926 * 927 * We must limit the rate at which we send pages to the pagers. 928 * data_write messages consume memory, for message buffers and 929 * for map-copy objects. If we get too far ahead of the pagers, 930 * we can potentially run out of memory. 931 * 932 * We can use the laundry count to limit directly the number 933 * of pages outstanding to the default pager. A similar 934 * strategy for external pagers doesn't work, because 935 * external pagers don't have to deallocate the pages sent them, 936 * and because we might have to send pages to external pagers 937 * even if they aren't processing writes. So we also 938 * use a burst count to limit writes to external pagers. 939 * 940 * When memory is very tight, we can't rely on external pagers to 941 * clean pages. They probably aren't running, because they 942 * aren't vm-privileged. If we kept sending dirty pages to them, 943 * we could exhaust the free list. 944 */ 945 946 947Restart: 948 assert(delayed_unlock!=0); 949 950 /* 951 * A page is "zero-filled" if it was not paged in from somewhere, 952 * and it belongs to an object at least VM_ZF_OBJECT_SIZE_THRESHOLD big. 953 * Recalculate the zero-filled page ratio. We use this to apportion 954 * victimized pages between the normal and zero-filled inactive 955 * queues according to their relative abundance in memory. Thus if a task 956 * is flooding memory with zf pages, we begin to hunt them down. 957 * It would be better to throttle greedy tasks at a higher level, 958 * but at the moment mach vm cannot do this. 959 */ 960 { 961 uint32_t total = vm_page_active_count + vm_page_inactive_count; 962 uint32_t normal = total - vm_zf_count; 963 964 /* zf_ratio is the number of zf pages we victimize per normal page */ 965 966 if (vm_zf_count < vm_accellerate_zf_pageout_trigger) 967 zf_ratio = 0; 968 else if ((vm_zf_count <= normal) || (normal == 0)) 969 zf_ratio = 1; 970 else 971 zf_ratio = vm_zf_count / normal; 972 973 zf_run_count = 0; 974 } 975 976 /* 977 * Recalculate vm_page_inactivate_target. 978 */ 979 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + 980 vm_page_inactive_count + 981 vm_page_speculative_count); 982 /* 983 * don't want to wake the pageout_scan thread up everytime we fall below 984 * the targets... set a low water mark at 0.25% below the target 985 */ 986 vm_page_inactive_min = vm_page_inactive_target - (vm_page_inactive_target / 400); 987 988 vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count + 989 vm_page_inactive_count); 990 object = NULL; 991 last_object_tried = NULL; 992 try_failed = FALSE; 993 994 if ((vm_page_inactive_count + vm_page_speculative_count) < VM_PAGE_INACTIVE_HEALTHY_LIMIT(vm_page_active_count)) 995 catch_up_count = vm_page_inactive_count + vm_page_speculative_count; 996 else 997 catch_up_count = 0; 998 999 for (;;) { 1000 vm_page_t m; 1001 1002 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL); 1003 1004 if (delayed_unlock == 0) { 1005 vm_page_lock_queues(); 1006 delayed_unlock = 1; 1007 } 1008 1009 /* 1010 * Don't sweep through active queue more than the throttle 1011 * which should be kept relatively low 1012 */ 1013 active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count); 1014 1015 /* 1016 * Move pages from active to inactive. 1017 */ 1018 if (need_internal_inactive == 0 && (vm_page_inactive_count + vm_page_speculative_count) >= vm_page_inactive_target) 1019 goto done_moving_active_pages; 1020 1021 while (!queue_empty(&vm_page_queue_active) && 1022 (need_internal_inactive || active_burst_count)) { 1023 1024 if (active_burst_count) 1025 active_burst_count--; 1026 1027 vm_pageout_active++; 1028 1029 m = (vm_page_t) queue_first(&vm_page_queue_active); 1030 1031 assert(m->active && !m->inactive); 1032 assert(!m->laundry); 1033 assert(m->object != kernel_object); 1034 assert(m->phys_page != vm_page_guard_addr); 1035 1036 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); 1037 1038 /* 1039 * Try to lock object; since we've already got the 1040 * page queues lock, we can only 'try' for this one. 1041 * if the 'try' fails, we need to do a mutex_pause 1042 * to allow the owner of the object lock a chance to 1043 * run... otherwise, we're likely to trip over this 1044 * object in the same state as we work our way through 1045 * the queue... clumps of pages associated with the same 1046 * object are fairly typical on the inactive and active queues 1047 */ 1048 if (m->object != object) { 1049 if (object != NULL) { 1050 vm_object_unlock(object); 1051 object = NULL; 1052 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1053 } 1054 if (!vm_object_lock_try_scan(m->object)) { 1055 /* 1056 * move page to end of active queue and continue 1057 */ 1058 queue_remove(&vm_page_queue_active, m, 1059 vm_page_t, pageq); 1060 queue_enter(&vm_page_queue_active, m, 1061 vm_page_t, pageq); 1062 1063 try_failed = TRUE; 1064 1065 m = (vm_page_t) queue_first(&vm_page_queue_active); 1066 /* 1067 * this is the next object we're going to be interested in 1068 * try to make sure its available after the mutex_yield 1069 * returns control 1070 */ 1071 vm_pageout_scan_wants_object = m->object; 1072 1073 goto done_with_activepage; 1074 } 1075 object = m->object; 1076 1077 try_failed = FALSE; 1078 } 1079 1080 /* 1081 * if the page is BUSY, then we pull it 1082 * off the active queue and leave it alone. 1083 * when BUSY is cleared, it will get stuck 1084 * back on the appropriate queue 1085 */ 1086 if (m->busy) { 1087 queue_remove(&vm_page_queue_active, m, 1088 vm_page_t, pageq); 1089 m->pageq.next = NULL; 1090 m->pageq.prev = NULL; 1091 1092 if (!m->fictitious) 1093 vm_page_active_count--; 1094 m->active = FALSE; 1095 1096 goto done_with_activepage; 1097 } 1098 1099 /* 1100 * Deactivate the page while holding the object 1101 * locked, so we know the page is still not busy. 1102 * This should prevent races between pmap_enter 1103 * and pmap_clear_reference. The page might be 1104 * absent or fictitious, but vm_page_deactivate 1105 * can handle that. 1106 */ 1107 vm_page_deactivate(m); 1108 1109 if (need_internal_inactive) { 1110 vm_pageout_scan_active_throttle_success++; 1111 need_internal_inactive--; 1112 } 1113done_with_activepage: 1114 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) { 1115 1116 if (object != NULL) { 1117 vm_object_unlock(object); 1118 object = NULL; 1119 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1120 } 1121 if (local_freeq) { 1122 vm_page_free_list(local_freeq); 1123 1124 local_freeq = NULL; 1125 local_freed = 0; 1126 } 1127 mutex_yield(&vm_page_queue_lock); 1128 1129 delayed_unlock = 1; 1130 1131 /* 1132 * continue the while loop processing 1133 * the active queue... need to hold 1134 * the page queues lock 1135 */ 1136 } 1137 } 1138 1139 1140 1141 /********************************************************************** 1142 * above this point we're playing with the active queue 1143 * below this point we're playing with the throttling mechanisms 1144 * and the inactive queue 1145 **********************************************************************/ 1146 1147done_moving_active_pages: 1148 1149 /* 1150 * We are done if we have met our target *and* 1151 * nobody is still waiting for a page. 1152 */ 1153 if (vm_page_free_count + local_freed >= vm_page_free_target) { 1154 if (object != NULL) { 1155 vm_object_unlock(object); 1156 object = NULL; 1157 } 1158 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1159 1160 if (local_freeq) { 1161 vm_page_free_list(local_freeq); 1162 1163 local_freeq = NULL; 1164 local_freed = 0; 1165 } 1166 /* 1167 * inactive target still not met... keep going 1168 * until we get the queues balanced 1169 */ 1170 1171 /* 1172 * Recalculate vm_page_inactivate_target. 1173 */ 1174 vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count + 1175 vm_page_inactive_count + 1176 vm_page_speculative_count); 1177 1178#ifndef CONFIG_EMBEDDED 1179 /* 1180 * XXX: if no active pages can be reclaimed, pageout scan can be stuck trying 1181 * to balance the queues 1182 */ 1183 if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) && 1184 !queue_empty(&vm_page_queue_active)) 1185 continue; 1186#endif 1187 1188 mutex_lock(&vm_page_queue_free_lock); 1189 1190 if ((vm_page_free_count >= vm_page_free_target) && 1191 (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) { 1192 1193 vm_page_unlock_queues(); 1194 1195 thread_wakeup((event_t) &vm_pageout_garbage_collect); 1196 1197 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); 1198 1199 return; 1200 } 1201 mutex_unlock(&vm_page_queue_free_lock); 1202 } 1203 /* 1204 * Before anything, we check if we have any ripe volatile objects around. 1205 * If so, purge the first and see what it gives us. 1206 */ 1207 assert (available_for_purge>=0); 1208 if (available_for_purge) 1209 { 1210 if (object != NULL) { 1211 vm_object_unlock(object); 1212 object = NULL; 1213 } 1214 vm_purgeable_object_purge_one(); 1215 continue; 1216 } 1217 1218 if (queue_empty(&sq->age_q) && vm_page_speculative_count) { 1219 /* 1220 * try to pull pages from the aging bins 1221 * see vm_page.h for an explanation of how 1222 * this mechanism works 1223 */ 1224 struct vm_speculative_age_q *aq; 1225 mach_timespec_t ts_fully_aged; 1226 boolean_t can_steal = FALSE; 1227 1228 aq = &vm_page_queue_speculative[speculative_steal_index]; 1229 1230 while (queue_empty(&aq->age_q)) { 1231 1232 speculative_steal_index++; 1233 1234 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q) 1235 speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q; 1236 1237 aq = &vm_page_queue_speculative[speculative_steal_index]; 1238 } 1239 if (vm_page_speculative_count > vm_page_speculative_target) 1240 can_steal = TRUE; 1241 else { 1242 ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) / 1000; 1243 ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS) % 1000) 1244 * 1000 * NSEC_PER_USEC; 1245 1246 ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts); 1247 1248 clock_get_system_nanotime(&ts.tv_sec, (unsigned *)&ts.tv_nsec); 1249 1250 if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0) 1251 can_steal = TRUE; 1252 } 1253 if (can_steal == TRUE) 1254 vm_page_speculate_ageit(aq); 1255 } 1256 1257 /* 1258 * Sometimes we have to pause: 1259 * 1) No inactive pages - nothing to do. 1260 * 2) Flow control - default pageout queue is full 1261 * 3) Loop control - no acceptable pages found on the inactive queue 1262 * within the last vm_pageout_burst_inactive_throttle iterations 1263 */ 1264 if (queue_empty(&vm_page_queue_inactive) && queue_empty(&vm_page_queue_zf) && queue_empty(&sq->age_q) && 1265 (VM_PAGE_Q_THROTTLED(iq) || queue_empty(&vm_page_queue_throttled))) { 1266 vm_pageout_scan_empty_throttle++; 1267 msecs = vm_pageout_empty_wait; 1268 goto vm_pageout_scan_delay; 1269 1270 } else if (inactive_burst_count >= 1271 MIN(vm_pageout_burst_inactive_throttle, 1272 (vm_page_inactive_count + 1273 vm_page_speculative_count))) { 1274 vm_pageout_scan_burst_throttle++; 1275 msecs = vm_pageout_burst_wait; 1276 goto vm_pageout_scan_delay; 1277 1278 } else if (VM_PAGE_Q_THROTTLED(iq) && IP_VALID(memory_manager_default)) { 1279 1280 switch (flow_control.state) { 1281 1282 case FCS_IDLE: 1283reset_deadlock_timer: 1284 ts.tv_sec = vm_pageout_deadlock_wait / 1000; 1285 ts.tv_nsec = (vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC; 1286 clock_get_system_nanotime(&flow_control.ts.tv_sec, 1287 (unsigned *)&flow_control.ts.tv_nsec); 1288 ADD_MACH_TIMESPEC(&flow_control.ts, &ts); 1289 1290 flow_control.state = FCS_DELAYED; 1291 msecs = vm_pageout_deadlock_wait; 1292 1293 break; 1294 1295 case FCS_DELAYED: 1296 clock_get_system_nanotime(&ts.tv_sec, 1297 (unsigned *)&ts.tv_nsec); 1298 1299 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) { 1300 /* 1301 * the pageout thread for the default pager is potentially 1302 * deadlocked since the 1303 * default pager queue has been throttled for more than the 1304 * allowable time... we need to move some clean pages or dirty 1305 * pages belonging to the external pagers if they aren't throttled 1306 * vm_page_free_wanted represents the number of threads currently 1307 * blocked waiting for pages... we'll move one page for each of 1308 * these plus a fixed amount to break the logjam... once we're done 1309 * moving this number of pages, we'll re-enter the FSC_DELAYED state 1310 * with a new timeout target since we have no way of knowing 1311 * whether we've broken the deadlock except through observation 1312 * of the queue associated with the default pager... we need to 1313 * stop moving pages and allow the system to run to see what 1314 * state it settles into. 1315 */ 1316 vm_pageout_deadlock_target = vm_pageout_deadlock_relief + vm_page_free_wanted + vm_page_free_wanted_privileged; 1317 vm_pageout_scan_deadlock_detected++; 1318 flow_control.state = FCS_DEADLOCK_DETECTED; 1319 1320 thread_wakeup((event_t) &vm_pageout_garbage_collect); 1321 goto consider_inactive; 1322 } 1323 /* 1324 * just resniff instead of trying 1325 * to compute a new delay time... we're going to be 1326 * awakened immediately upon a laundry completion, 1327 * so we won't wait any longer than necessary 1328 */ 1329 msecs = vm_pageout_idle_wait; 1330 break; 1331 1332 case FCS_DEADLOCK_DETECTED: 1333 if (vm_pageout_deadlock_target) 1334 goto consider_inactive; 1335 goto reset_deadlock_timer; 1336 1337 } 1338 vm_pageout_scan_throttle++; 1339 iq->pgo_throttled = TRUE; 1340vm_pageout_scan_delay: 1341 if (object != NULL) { 1342 vm_object_unlock(object); 1343 object = NULL; 1344 } 1345 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1346 1347 if (local_freeq) { 1348 vm_page_free_list(local_freeq); 1349 1350 local_freeq = NULL; 1351 local_freed = 0; 1352 } 1353#if CONFIG_EMBEDDED 1354 { 1355 int percent_avail; 1356 1357 /* 1358 * Decide if we need to send a memory status notification. 1359 */ 1360 percent_avail = 1361 (vm_page_active_count + vm_page_inactive_count + 1362 vm_page_speculative_count + vm_page_free_count + 1363 (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 / 1364 atop_64(max_mem); 1365 if (percent_avail >= (kern_memorystatus_level + 5) || 1366 percent_avail <= (kern_memorystatus_level - 5)) { 1367 kern_memorystatus_level = percent_avail; 1368 thread_wakeup((event_t)&kern_memorystatus_wakeup); 1369 } 1370 } 1371#endif 1372 assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC); 1373 1374 counter(c_vm_pageout_scan_block++); 1375 1376 vm_page_unlock_queues(); 1377 1378 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL); 1379 1380 thread_block(THREAD_CONTINUE_NULL); 1381 1382 vm_page_lock_queues(); 1383 delayed_unlock = 1; 1384 1385 iq->pgo_throttled = FALSE; 1386 1387 if (loop_count >= vm_page_inactive_count) 1388 loop_count = 0; 1389 inactive_burst_count = 0; 1390 1391 goto Restart; 1392 /*NOTREACHED*/ 1393 } 1394 1395 1396 flow_control.state = FCS_IDLE; 1397consider_inactive: 1398 loop_count++; 1399 inactive_burst_count++; 1400 vm_pageout_inactive++; 1401 1402 /* Choose a victim. */ 1403 1404 while (1) { 1405 m = NULL; 1406 1407 /* 1408 * the most eligible pages are ones that were throttled because the 1409 * pager wasn't ready at the time. If a pager is ready now, 1410 * see if one of these is useful. 1411 */ 1412 if (!VM_PAGE_Q_THROTTLED(iq) && !queue_empty(&vm_page_queue_throttled)) { 1413 m = (vm_page_t) queue_first(&vm_page_queue_throttled); 1414 break; 1415 } 1416 1417 /* 1418 * The second most eligible pages are ones we paged in speculatively, 1419 * but which have not yet been touched. 1420 */ 1421 if ( !queue_empty(&sq->age_q) ) { 1422 m = (vm_page_t) queue_first(&sq->age_q); 1423 break; 1424 } 1425 /* 1426 * Time for a zero-filled inactive page? 1427 */ 1428 if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) || 1429 queue_empty(&vm_page_queue_inactive)) { 1430 if ( !queue_empty(&vm_page_queue_zf) ) { 1431 m = (vm_page_t) queue_first(&vm_page_queue_zf); 1432 zf_run_count++; 1433 break; 1434 } 1435 } 1436 /* 1437 * It's either a normal inactive page or nothing. 1438 */ 1439 if ( !queue_empty(&vm_page_queue_inactive) ) { 1440 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 1441 zf_run_count = 0; 1442 break; 1443 } 1444 1445 panic("vm_pageout: no victim"); 1446 } 1447 1448 assert(!m->active && (m->inactive || m->speculative || m->throttled)); 1449 assert(!m->laundry); 1450 assert(m->object != kernel_object); 1451 assert(m->phys_page != vm_page_guard_addr); 1452 1453 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL); 1454 1455 /* 1456 * check to see if we currently are working 1457 * with the same object... if so, we've 1458 * already got the lock 1459 */ 1460 if (m->object != object) { 1461 /* 1462 * the object associated with candidate page is 1463 * different from the one we were just working 1464 * with... dump the lock if we still own it 1465 */ 1466 if (object != NULL) { 1467 vm_object_unlock(object); 1468 object = NULL; 1469 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1470 } 1471 /* 1472 * Try to lock object; since we've alread got the 1473 * page queues lock, we can only 'try' for this one. 1474 * if the 'try' fails, we need to do a mutex_pause 1475 * to allow the owner of the object lock a chance to 1476 * run... otherwise, we're likely to trip over this 1477 * object in the same state as we work our way through 1478 * the queue... clumps of pages associated with the same 1479 * object are fairly typical on the inactive and active queues 1480 */ 1481 if (!vm_object_lock_try_scan(m->object)) { 1482 /* 1483 * Move page to end and continue. 1484 * Don't re-issue ticket 1485 */ 1486 if (m->zero_fill) { 1487 queue_remove(&vm_page_queue_zf, m, 1488 vm_page_t, pageq); 1489 queue_enter(&vm_page_queue_zf, m, 1490 vm_page_t, pageq); 1491 } else if (m->speculative) { 1492 remque(&m->pageq); 1493 m->speculative = FALSE; 1494 vm_page_speculative_count--; 1495 1496 /* 1497 * move to the tail of the inactive queue 1498 * to get it out of the way... the speculative 1499 * queue is generally too small to depend 1500 * on there being enough pages from other 1501 * objects to make cycling it back on the 1502 * same queue a winning proposition 1503 */ 1504 queue_enter(&vm_page_queue_inactive, m, 1505 vm_page_t, pageq); 1506 m->inactive = TRUE; 1507 vm_page_inactive_count++; 1508 token_new_pagecount++; 1509 } else if (m->throttled) { 1510 queue_remove(&vm_page_queue_throttled, m, 1511 vm_page_t, pageq); 1512 m->throttled = FALSE; 1513 vm_page_throttled_count--; 1514 1515 /* 1516 * not throttled any more, so can stick 1517 * it on the inactive queue. 1518 */ 1519 queue_enter(&vm_page_queue_inactive, m, 1520 vm_page_t, pageq); 1521 m->inactive = TRUE; 1522 vm_page_inactive_count++; 1523 token_new_pagecount++; 1524 } else { 1525 queue_remove(&vm_page_queue_inactive, m, 1526 vm_page_t, pageq); 1527#if MACH_ASSERT 1528 vm_page_inactive_count--; /* balance for purgeable queue asserts */ 1529#endif 1530 vm_purgeable_q_advance_all(); 1531 1532 queue_enter(&vm_page_queue_inactive, m, 1533 vm_page_t, pageq); 1534#if MACH_ASSERT 1535 vm_page_inactive_count++; /* balance for purgeable queue asserts */ 1536#endif 1537 token_new_pagecount++; 1538 } 1539 pmap_clear_reference(m->phys_page); 1540 m->reference = FALSE; 1541 1542 vm_pageout_inactive_nolock++; 1543 1544 if ( !queue_empty(&sq->age_q) ) 1545 m = (vm_page_t) queue_first(&sq->age_q); 1546 else if ( ((zf_run_count < zf_ratio) && vm_zf_queue_count >= zf_queue_min_count) || 1547 queue_empty(&vm_page_queue_inactive)) { 1548 if ( !queue_empty(&vm_page_queue_zf) ) 1549 m = (vm_page_t) queue_first(&vm_page_queue_zf); 1550 } else if ( !queue_empty(&vm_page_queue_inactive) ) { 1551 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 1552 } 1553 /* 1554 * this is the next object we're going to be interested in 1555 * try to make sure its available after the mutex_yield 1556 * returns control 1557 */ 1558 vm_pageout_scan_wants_object = m->object; 1559 1560 /* 1561 * force us to dump any collected free pages 1562 * and to pause before moving on 1563 */ 1564 try_failed = TRUE; 1565 1566 goto done_with_inactivepage; 1567 } 1568 object = m->object; 1569 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1570 1571 try_failed = FALSE; 1572 } 1573 1574 /* 1575 * Paging out pages of external objects which 1576 * are currently being created must be avoided. 1577 * The pager may claim for memory, thus leading to a 1578 * possible dead lock between it and the pageout thread, 1579 * if such pages are finally chosen. The remaining assumption 1580 * is that there will finally be enough available pages in the 1581 * inactive pool to page out in order to satisfy all memory 1582 * claimed by the thread which concurrently creates the pager. 1583 */ 1584 if (!object->pager_initialized && object->pager_created) { 1585 /* 1586 * Move page to end and continue, hoping that 1587 * there will be enough other inactive pages to 1588 * page out so that the thread which currently 1589 * initializes the pager will succeed. 1590 * Don't re-grant the ticket, the page should 1591 * pulled from the queue and paged out whenever 1592 * one of its logically adjacent fellows is 1593 * targeted. 1594 * 1595 * Pages found on the speculative list can never be 1596 * in this state... they always have a pager associated 1597 * with them. 1598 */ 1599 assert(!m->speculative); 1600 1601 if (m->zero_fill) { 1602 queue_remove(&vm_page_queue_zf, m, 1603 vm_page_t, pageq); 1604 queue_enter(&vm_page_queue_zf, m, 1605 vm_page_t, pageq); 1606 } else { 1607 queue_remove(&vm_page_queue_inactive, m, 1608 vm_page_t, pageq); 1609#if MACH_ASSERT 1610 vm_page_inactive_count--; /* balance for purgeable queue asserts */ 1611#endif 1612 vm_purgeable_q_advance_all(); 1613 1614 queue_enter(&vm_page_queue_inactive, m, 1615 vm_page_t, pageq); 1616#if MACH_ASSERT 1617 vm_page_inactive_count++; /* balance for purgeable queue asserts */ 1618#endif 1619 token_new_pagecount++; 1620 } 1621 vm_pageout_inactive_avoid++; 1622 1623 goto done_with_inactivepage; 1624 } 1625 /* 1626 * Remove the page from its list. 1627 */ 1628 if (m->speculative) { 1629 remque(&m->pageq); 1630 m->speculative = FALSE; 1631 vm_page_speculative_count--; 1632 } else if (m->throttled) { 1633 queue_remove(&vm_page_queue_throttled, m, vm_page_t, pageq); 1634 m->throttled = FALSE; 1635 vm_page_throttled_count--; 1636 } else { 1637 if (m->zero_fill) { 1638 queue_remove(&vm_page_queue_zf, m, vm_page_t, pageq); 1639 vm_zf_queue_count--; 1640 } else { 1641 queue_remove(&vm_page_queue_inactive, m, vm_page_t, pageq); 1642 } 1643 m->inactive = FALSE; 1644 if (!m->fictitious) 1645 vm_page_inactive_count--; 1646 vm_purgeable_q_advance_all(); 1647 } 1648 1649 /* If the object is empty, the page must be reclaimed even if dirty or used. */ 1650 /* If the page belongs to a volatile object, we stick it back on. */ 1651 if (object->copy == VM_OBJECT_NULL) { 1652 if(object->purgable == VM_PURGABLE_EMPTY && !m->cleaning) { 1653 m->busy = TRUE; 1654 if (m->pmapped == TRUE) { 1655 /* unmap the page */ 1656 refmod_state = pmap_disconnect(m->phys_page); 1657 if (refmod_state & VM_MEM_MODIFIED) { 1658 m->dirty = TRUE; 1659 } 1660 } 1661 if (m->dirty || m->precious) { 1662 /* we saved the cost of cleaning this page ! */ 1663 vm_page_purged_count++; 1664 } 1665 goto reclaim_page; 1666 } 1667 if (object->purgable == VM_PURGABLE_VOLATILE) { 1668 /* if it's wired, we can't put it on our queue */ 1669 assert(m->wire_count == 0); 1670 /* just stick it back on! */ 1671 goto reactivate_page; 1672 } 1673 } 1674 m->pageq.next = NULL; 1675 m->pageq.prev = NULL; 1676 1677 if ( !m->fictitious && catch_up_count) 1678 catch_up_count--; 1679 1680 /* 1681 * ENCRYPTED SWAP: 1682 * if this page has already been picked up as part of a 1683 * page-out cluster, it will be busy because it is being 1684 * encrypted (see vm_object_upl_request()). But we still 1685 * want to demote it from "clean-in-place" (aka "adjacent") 1686 * to "clean-and-free" (aka "target"), so let's ignore its 1687 * "busy" bit here and proceed to check for "cleaning" a 1688 * little bit below... 1689 */ 1690 if ( !m->encrypted_cleaning && (m->busy || !object->alive)) { 1691 /* 1692 * Somebody is already playing with this page. 1693 * Leave it off the pageout queues. 1694 * 1695 */ 1696 vm_pageout_inactive_busy++; 1697 1698 goto done_with_inactivepage; 1699 } 1700 1701 /* 1702 * If it's absent or in error, we can reclaim the page. 1703 */ 1704 1705 if (m->absent || m->error) { 1706 vm_pageout_inactive_absent++; 1707reclaim_page: 1708 if (vm_pageout_deadlock_target) { 1709 vm_pageout_scan_inactive_throttle_success++; 1710 vm_pageout_deadlock_target--; 1711 } 1712 1713 DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL); 1714 1715 if (m->object->internal) { 1716 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL); 1717 } else { 1718 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL); 1719 } 1720 1721 vm_page_free_prepare(m); 1722 1723 assert(m->pageq.next == NULL && 1724 m->pageq.prev == NULL); 1725 m->pageq.next = (queue_entry_t)local_freeq; 1726 local_freeq = m; 1727 local_freed++; 1728 1729 inactive_burst_count = 0; 1730 1731 goto done_with_inactivepage; 1732 } 1733 1734 assert(!m->private); 1735 assert(!m->fictitious); 1736 1737 /* 1738 * If already cleaning this page in place, convert from 1739 * "adjacent" to "target". We can leave the page mapped, 1740 * and vm_pageout_object_terminate will determine whether 1741 * to free or reactivate. 1742 */ 1743 1744 if (m->cleaning) { 1745 m->busy = TRUE; 1746 m->pageout = TRUE; 1747 m->dump_cleaning = TRUE; 1748 vm_page_wire(m); 1749 1750 CLUSTER_STAT(vm_pageout_cluster_conversions++); 1751 1752 inactive_burst_count = 0; 1753 1754 goto done_with_inactivepage; 1755 } 1756 1757 /* 1758 * If it's being used, reactivate. 1759 * (Fictitious pages are either busy or absent.) 1760 * First, update the reference and dirty bits 1761 * to make sure the page is unreferenced. 1762 */ 1763 refmod_state = -1; 1764 1765 if (m->reference == FALSE && m->pmapped == TRUE) { 1766 refmod_state = pmap_get_refmod(m->phys_page); 1767 1768 if (refmod_state & VM_MEM_REFERENCED) 1769 m->reference = TRUE; 1770 if (refmod_state & VM_MEM_MODIFIED) 1771 m->dirty = TRUE; 1772 } 1773 if (m->reference && !m->no_cache) { 1774 /* 1775 * The page we pulled off the inactive list has 1776 * been referenced. It is possible for other 1777 * processors to be touching pages faster than we 1778 * can clear the referenced bit and traverse the 1779 * inactive queue, so we limit the number of 1780 * reactivations. 1781 */ 1782 if (++reactivated_this_call >= reactivate_limit) { 1783 vm_pageout_reactivation_limit_exceeded++; 1784 } else if (catch_up_count) { 1785 vm_pageout_catch_ups++; 1786 } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) { 1787 vm_pageout_inactive_force_reclaim++; 1788 } else { 1789 /* 1790 * The page was being used, so put back on active list. 1791 */ 1792reactivate_page: 1793 vm_page_activate(m); 1794 VM_STAT_INCR(reactivations); 1795 1796 vm_pageout_inactive_used++; 1797 inactive_burst_count = 0; 1798 1799 goto done_with_inactivepage; 1800 } 1801 /* 1802 * Make sure we call pmap_get_refmod() if it 1803 * wasn't already called just above, to update 1804 * the dirty bit. 1805 */ 1806 if ((refmod_state == -1) && !m->dirty && m->pmapped) { 1807 refmod_state = pmap_get_refmod(m->phys_page); 1808 if (refmod_state & VM_MEM_MODIFIED) 1809 m->dirty = TRUE; 1810 } 1811 forced_reclaim = TRUE; 1812 } else { 1813 forced_reclaim = FALSE; 1814 } 1815 1816 XPR(XPR_VM_PAGEOUT, 1817 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n", 1818 (integer_t)object, (integer_t)m->offset, (integer_t)m, 0,0); 1819 1820 /* 1821 * we've got a candidate page to steal... 1822 * 1823 * m->dirty is up to date courtesy of the 1824 * preceding check for m->reference... if 1825 * we get here, then m->reference had to be 1826 * FALSE (or possibly "reactivate_limit" was 1827 * exceeded), but in either case we called 1828 * pmap_get_refmod() and updated both 1829 * m->reference and m->dirty 1830 * 1831 * if it's dirty or precious we need to 1832 * see if the target queue is throtttled 1833 * it if is, we need to skip over it by moving it back 1834 * to the end of the inactive queue 1835 */ 1836 inactive_throttled = FALSE; 1837 1838 if (m->dirty || m->precious) { 1839 if (object->internal) { 1840 if (VM_PAGE_Q_THROTTLED(iq)) 1841 inactive_throttled = TRUE; 1842 } else if (VM_PAGE_Q_THROTTLED(eq)) { 1843 inactive_throttled = TRUE; 1844 } 1845 } 1846 if (inactive_throttled == TRUE) { 1847throttle_inactive: 1848 if (!IP_VALID(memory_manager_default) && 1849 object->internal && 1850 (object->purgable == VM_PURGABLE_DENY || 1851 object->purgable == VM_PURGABLE_NONVOLATILE || 1852 object->purgable == VM_PURGABLE_VOLATILE )) { 1853 queue_enter(&vm_page_queue_throttled, m, 1854 vm_page_t, pageq); 1855 m->throttled = TRUE; 1856 vm_page_throttled_count++; 1857 } else { 1858 if (m->zero_fill) { 1859 queue_enter(&vm_page_queue_zf, m, 1860 vm_page_t, pageq); 1861 vm_zf_queue_count++; 1862 } else 1863 queue_enter(&vm_page_queue_inactive, m, 1864 vm_page_t, pageq); 1865 m->inactive = TRUE; 1866 if (!m->fictitious) { 1867 vm_page_inactive_count++; 1868 token_new_pagecount++; 1869 } 1870 } 1871 vm_pageout_scan_inactive_throttled++; 1872 goto done_with_inactivepage; 1873 } 1874 1875 /* 1876 * we've got a page that we can steal... 1877 * eliminate all mappings and make sure 1878 * we have the up-to-date modified state 1879 * first take the page BUSY, so that no new 1880 * mappings can be made 1881 */ 1882 m->busy = TRUE; 1883 1884 /* 1885 * if we need to do a pmap_disconnect then we 1886 * need to re-evaluate m->dirty since the pmap_disconnect 1887 * provides the true state atomically... the 1888 * page was still mapped up to the pmap_disconnect 1889 * and may have been dirtied at the last microsecond 1890 * 1891 * we also check for the page being referenced 'late' 1892 * if it was, we first need to do a WAKEUP_DONE on it 1893 * since we already set m->busy = TRUE, before 1894 * going off to reactivate it 1895 * 1896 * Note that if 'pmapped' is FALSE then the page is not 1897 * and has not been in any map, so there is no point calling 1898 * pmap_disconnect(). m->dirty and/or m->reference could 1899 * have been set in anticipation of likely usage of the page. 1900 */ 1901 if (m->pmapped == TRUE) { 1902 refmod_state = pmap_disconnect(m->phys_page); 1903 1904 if (refmod_state & VM_MEM_MODIFIED) 1905 m->dirty = TRUE; 1906 if (refmod_state & VM_MEM_REFERENCED) { 1907 1908 /* If m->reference is already set, this page must have 1909 * already failed the reactivate_limit test, so don't 1910 * bump the counts twice. 1911 */ 1912 if ( ! m->reference ) { 1913 m->reference = TRUE; 1914 if (forced_reclaim || 1915 ++reactivated_this_call >= reactivate_limit) 1916 vm_pageout_reactivation_limit_exceeded++; 1917 else { 1918 PAGE_WAKEUP_DONE(m); 1919 goto reactivate_page; 1920 } 1921 } 1922 } 1923 } 1924 /* 1925 * reset our count of pages that have been reclaimed 1926 * since the last page was 'stolen' 1927 */ 1928 inactive_reclaim_run = 0; 1929 1930 /* 1931 * If it's clean and not precious, we can free the page. 1932 */ 1933 if (!m->dirty && !m->precious) { 1934 vm_pageout_inactive_clean++; 1935 goto reclaim_page; 1936 } 1937 1938 /* 1939 * The page may have been dirtied since the last check 1940 * for a throttled target queue (which may have been skipped 1941 * if the page was clean then). With the dirty page 1942 * disconnected here, we can make one final check. 1943 */ 1944 { 1945 boolean_t disconnect_throttled = FALSE; 1946 if (object->internal) { 1947 if (VM_PAGE_Q_THROTTLED(iq)) 1948 disconnect_throttled = TRUE; 1949 } else if (VM_PAGE_Q_THROTTLED(eq)) { 1950 disconnect_throttled = TRUE; 1951 } 1952 1953 if (disconnect_throttled == TRUE) { 1954 PAGE_WAKEUP_DONE(m); 1955 goto throttle_inactive; 1956 } 1957 } 1958 1959 vm_pageout_cluster(m); 1960 1961 vm_pageout_inactive_dirty++; 1962 1963 inactive_burst_count = 0; 1964 1965done_with_inactivepage: 1966 if (delayed_unlock++ > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT || try_failed == TRUE) { 1967 1968 if (object != NULL) { 1969 vm_object_unlock(object); 1970 object = NULL; 1971 vm_pageout_scan_wants_object = VM_OBJECT_NULL; 1972 } 1973 if (local_freeq) { 1974 vm_page_free_list(local_freeq); 1975 1976 local_freeq = NULL; 1977 local_freed = 0; 1978 } 1979 mutex_yield(&vm_page_queue_lock); 1980 1981 delayed_unlock = 1; 1982 } 1983 /* 1984 * back to top of pageout scan loop 1985 */ 1986 } 1987} 1988 1989 1990int vm_page_free_count_init; 1991 1992void 1993vm_page_free_reserve( 1994 int pages) 1995{ 1996 int free_after_reserve; 1997 1998 vm_page_free_reserved += pages; 1999 2000 free_after_reserve = vm_page_free_count_init - vm_page_free_reserved; 2001 2002 vm_page_free_min = vm_page_free_reserved + 2003 VM_PAGE_FREE_MIN(free_after_reserve); 2004 2005 if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT) 2006 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT; 2007 2008 vm_page_free_target = vm_page_free_reserved + 2009 VM_PAGE_FREE_TARGET(free_after_reserve); 2010 2011 if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT) 2012 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT; 2013 2014 if (vm_page_free_target < vm_page_free_min + 5) 2015 vm_page_free_target = vm_page_free_min + 5; 2016 2017} 2018 2019/* 2020 * vm_pageout is the high level pageout daemon. 2021 */ 2022 2023void 2024vm_pageout_continue(void) 2025{ 2026 DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL); 2027 vm_pageout_scan_event_counter++; 2028 vm_pageout_scan(); 2029 /* we hold vm_page_queue_free_lock now */ 2030 assert(vm_page_free_wanted == 0); 2031 assert(vm_page_free_wanted_privileged == 0); 2032 assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT); 2033 mutex_unlock(&vm_page_queue_free_lock); 2034 2035 counter(c_vm_pageout_block++); 2036 thread_block((thread_continue_t)vm_pageout_continue); 2037 /*NOTREACHED*/ 2038} 2039 2040 2041/* 2042 * must be called with the 2043 * queues and object locks held 2044 */ 2045static void 2046vm_pageout_queue_steal(vm_page_t m) 2047{ 2048 struct vm_pageout_queue *q; 2049 2050 if (m->object->internal == TRUE) 2051 q = &vm_pageout_queue_internal; 2052 else 2053 q = &vm_pageout_queue_external; 2054 2055 m->laundry = FALSE; 2056 m->pageout_queue = FALSE; 2057 queue_remove(&q->pgo_pending, m, vm_page_t, pageq); 2058 2059 m->pageq.next = NULL; 2060 m->pageq.prev = NULL; 2061 2062 vm_object_paging_end(m->object); 2063 2064 q->pgo_laundry--; 2065} 2066 2067 2068#ifdef FAKE_DEADLOCK 2069 2070#define FAKE_COUNT 5000 2071 2072int internal_count = 0; 2073int fake_deadlock = 0; 2074 2075#endif 2076 2077static void 2078vm_pageout_iothread_continue(struct vm_pageout_queue *q) 2079{ 2080 vm_page_t m = NULL; 2081 vm_object_t object; 2082 boolean_t need_wakeup; 2083 memory_object_t pager; 2084 thread_t self = current_thread(); 2085 2086 if ((vm_pageout_internal_iothread != THREAD_NULL) 2087 && (self == vm_pageout_external_iothread ) 2088 && (self->options & TH_OPT_VMPRIV)) 2089 self->options &= ~TH_OPT_VMPRIV; 2090 2091 vm_page_lockspin_queues(); 2092 2093 while ( !queue_empty(&q->pgo_pending) ) { 2094 2095 q->pgo_busy = TRUE; 2096 queue_remove_first(&q->pgo_pending, m, vm_page_t, pageq); 2097 m->pageout_queue = FALSE; 2098 vm_page_unlock_queues(); 2099 2100 m->pageq.next = NULL; 2101 m->pageq.prev = NULL; 2102#ifdef FAKE_DEADLOCK 2103 if (q == &vm_pageout_queue_internal) { 2104 vm_offset_t addr; 2105 int pg_count; 2106 2107 internal_count++; 2108 2109 if ((internal_count == FAKE_COUNT)) { 2110 2111 pg_count = vm_page_free_count + vm_page_free_reserved; 2112 2113 if (kmem_alloc(kernel_map, &addr, PAGE_SIZE * pg_count) == KERN_SUCCESS) { 2114 kmem_free(kernel_map, addr, PAGE_SIZE * pg_count); 2115 } 2116 internal_count = 0; 2117 fake_deadlock++; 2118 } 2119 } 2120#endif 2121 object = m->object; 2122 2123 vm_object_lock(object); 2124 2125 if (!object->pager_initialized) { 2126 2127 /* 2128 * If there is no memory object for the page, create 2129 * one and hand it to the default pager. 2130 */ 2131 2132 if (!object->pager_initialized) 2133 vm_object_collapse(object, 2134 (vm_object_offset_t) 0, 2135 TRUE); 2136 if (!object->pager_initialized) 2137 vm_object_pager_create(object); 2138 if (!object->pager_initialized) { 2139 /* 2140 * Still no pager for the object. 2141 * Reactivate the page. 2142 * 2143 * Should only happen if there is no 2144 * default pager. 2145 */ 2146 m->list_req_pending = FALSE; 2147 m->cleaning = FALSE; 2148 m->pageout = FALSE; 2149 2150 vm_page_lockspin_queues(); 2151 vm_page_unwire(m); 2152 vm_pageout_throttle_up(m); 2153 vm_pageout_dirty_no_pager++; 2154 vm_page_activate(m); 2155 vm_page_unlock_queues(); 2156 2157 /* 2158 * And we are done with it. 2159 */ 2160 PAGE_WAKEUP_DONE(m); 2161 2162 vm_object_paging_end(object); 2163 vm_object_unlock(object); 2164 2165 vm_page_lockspin_queues(); 2166 continue; 2167 } 2168 } 2169 pager = object->pager; 2170 if (pager == MEMORY_OBJECT_NULL) { 2171 /* 2172 * This pager has been destroyed by either 2173 * memory_object_destroy or vm_object_destroy, and 2174 * so there is nowhere for the page to go. 2175 * Just free the page... VM_PAGE_FREE takes 2176 * care of cleaning up all the state... 2177 * including doing the vm_pageout_throttle_up 2178 */ 2179 2180 VM_PAGE_FREE(m); 2181 2182 vm_object_paging_end(object); 2183 vm_object_unlock(object); 2184 2185 vm_page_lockspin_queues(); 2186 continue; 2187 } 2188 vm_object_unlock(object); 2189 /* 2190 * we expect the paging_in_progress reference to have 2191 * already been taken on the object before it was added 2192 * to the appropriate pageout I/O queue... this will 2193 * keep the object from being terminated and/or the 2194 * paging_offset from changing until the I/O has 2195 * completed... therefore no need to lock the object to 2196 * pull the paging_offset from it. 2197 * 2198 * Send the data to the pager. 2199 * any pageout clustering happens there 2200 */ 2201 memory_object_data_return(pager, 2202 m->offset + object->paging_offset, 2203 PAGE_SIZE, 2204 NULL, 2205 NULL, 2206 FALSE, 2207 FALSE, 2208 0); 2209 2210 vm_object_lock(object); 2211 vm_object_paging_end(object); 2212 vm_object_unlock(object); 2213 2214 vm_page_lockspin_queues(); 2215 } 2216 assert_wait((event_t) q, THREAD_UNINT); 2217 2218 2219 if (q->pgo_throttled == TRUE && !VM_PAGE_Q_THROTTLED(q)) { 2220 q->pgo_throttled = FALSE; 2221 need_wakeup = TRUE; 2222 } else 2223 need_wakeup = FALSE; 2224 2225 q->pgo_busy = FALSE; 2226 q->pgo_idle = TRUE; 2227 vm_page_unlock_queues(); 2228 2229 if (need_wakeup == TRUE) 2230 thread_wakeup((event_t) &q->pgo_laundry); 2231 2232 thread_block_parameter((thread_continue_t)vm_pageout_iothread_continue, (void *) &q->pgo_pending); 2233 /*NOTREACHED*/ 2234} 2235 2236 2237static void 2238vm_pageout_iothread_external(void) 2239{ 2240 thread_t self = current_thread(); 2241 2242 self->options |= TH_OPT_VMPRIV; 2243 2244 vm_pageout_iothread_continue(&vm_pageout_queue_external); 2245 /*NOTREACHED*/ 2246} 2247 2248 2249static void 2250vm_pageout_iothread_internal(void) 2251{ 2252 thread_t self = current_thread(); 2253 2254 self->options |= TH_OPT_VMPRIV; 2255 2256 vm_pageout_iothread_continue(&vm_pageout_queue_internal); 2257 /*NOTREACHED*/ 2258} 2259 2260static void 2261vm_pageout_garbage_collect(int collect) 2262{ 2263 if (collect) { 2264 stack_collect(); 2265 2266 /* 2267 * consider_zone_gc should be last, because the other operations 2268 * might return memory to zones. 2269 */ 2270 consider_machine_collect(); 2271 consider_zone_gc(); 2272 2273 consider_machine_adjust(); 2274 } 2275 2276 assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT); 2277 2278 thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1); 2279 /*NOTREACHED*/ 2280} 2281 2282 2283 2284void 2285vm_pageout(void) 2286{ 2287 thread_t self = current_thread(); 2288 thread_t thread; 2289 kern_return_t result; 2290 spl_t s; 2291 2292 /* 2293 * Set thread privileges. 2294 */ 2295 s = splsched(); 2296 thread_lock(self); 2297 self->priority = BASEPRI_PREEMPT - 1; 2298 set_sched_pri(self, self->priority); 2299 thread_unlock(self); 2300 2301 if (!self->reserved_stack) 2302 self->reserved_stack = self->kernel_stack; 2303 2304 splx(s); 2305 2306 /* 2307 * Initialize some paging parameters. 2308 */ 2309 2310 if (vm_pageout_idle_wait == 0) 2311 vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT; 2312 2313 if (vm_pageout_burst_wait == 0) 2314 vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT; 2315 2316 if (vm_pageout_empty_wait == 0) 2317 vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT; 2318 2319 if (vm_pageout_deadlock_wait == 0) 2320 vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT; 2321 2322 if (vm_pageout_deadlock_relief == 0) 2323 vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF; 2324 2325 if (vm_pageout_inactive_relief == 0) 2326 vm_pageout_inactive_relief = VM_PAGEOUT_INACTIVE_RELIEF; 2327 2328 if (vm_pageout_burst_active_throttle == 0) 2329 vm_pageout_burst_active_throttle = VM_PAGEOUT_BURST_ACTIVE_THROTTLE; 2330 2331 if (vm_pageout_burst_inactive_throttle == 0) 2332 vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE; 2333 2334 /* 2335 * Set kernel task to low backing store privileged 2336 * status 2337 */ 2338 task_lock(kernel_task); 2339 kernel_task->priv_flags |= VM_BACKING_STORE_PRIV; 2340 task_unlock(kernel_task); 2341 2342 vm_page_free_count_init = vm_page_free_count; 2343 2344 /* 2345 * even if we've already called vm_page_free_reserve 2346 * call it again here to insure that the targets are 2347 * accurately calculated (it uses vm_page_free_count_init) 2348 * calling it with an arg of 0 will not change the reserve 2349 * but will re-calculate free_min and free_target 2350 */ 2351 if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) { 2352 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved); 2353 } else 2354 vm_page_free_reserve(0); 2355 2356 2357 queue_init(&vm_pageout_queue_external.pgo_pending); 2358 vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; 2359 vm_pageout_queue_external.pgo_laundry = 0; 2360 vm_pageout_queue_external.pgo_idle = FALSE; 2361 vm_pageout_queue_external.pgo_busy = FALSE; 2362 vm_pageout_queue_external.pgo_throttled = FALSE; 2363 2364 queue_init(&vm_pageout_queue_internal.pgo_pending); 2365 vm_pageout_queue_internal.pgo_maxlaundry = 0; 2366 vm_pageout_queue_internal.pgo_laundry = 0; 2367 vm_pageout_queue_internal.pgo_idle = FALSE; 2368 vm_pageout_queue_internal.pgo_busy = FALSE; 2369 vm_pageout_queue_internal.pgo_throttled = FALSE; 2370 2371 2372 /* internal pageout thread started when default pager registered first time */ 2373 /* external pageout and garbage collection threads started here */ 2374 2375 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL, 2376 BASEPRI_PREEMPT - 1, 2377 &vm_pageout_external_iothread); 2378 if (result != KERN_SUCCESS) 2379 panic("vm_pageout_iothread_external: create failed"); 2380 2381 thread_deallocate(vm_pageout_external_iothread); 2382 2383 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL, 2384 MINPRI_KERNEL, 2385 &thread); 2386 if (result != KERN_SUCCESS) 2387 panic("vm_pageout_garbage_collect: create failed"); 2388 2389 thread_deallocate(thread); 2390 2391 vm_object_reaper_init(); 2392 2393 2394 vm_pageout_continue(); 2395 2396 /* 2397 * Unreached code! 2398 * 2399 * The vm_pageout_continue() call above never returns, so the code below is never 2400 * executed. We take advantage of this to declare several DTrace VM related probe 2401 * points that our kernel doesn't have an analog for. These are probe points that 2402 * exist in Solaris and are in the DTrace documentation, so people may have written 2403 * scripts that use them. Declaring the probe points here means their scripts will 2404 * compile and execute which we want for portability of the scripts, but since this 2405 * section of code is never reached, the probe points will simply never fire. Yes, 2406 * this is basically a hack. The problem is the DTrace probe points were chosen with 2407 * Solaris specific VM events in mind, not portability to different VM implementations. 2408 */ 2409 2410 DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL); 2411 DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL); 2412 DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL); 2413 DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL); 2414 DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL); 2415 DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL); 2416 DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL); 2417 /*NOTREACHED*/ 2418} 2419 2420kern_return_t 2421vm_pageout_internal_start(void) 2422{ 2423 kern_return_t result; 2424 2425 vm_pageout_queue_internal.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX; 2426 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, NULL, BASEPRI_PREEMPT - 1, &vm_pageout_internal_iothread); 2427 if (result == KERN_SUCCESS) 2428 thread_deallocate(vm_pageout_internal_iothread); 2429 return result; 2430} 2431 2432#define UPL_DELAYED_UNLOCK_LIMIT (MAX_UPL_TRANSFER / 2) 2433 2434static upl_t 2435upl_create(int type, int flags, upl_size_t size) 2436{ 2437 upl_t upl; 2438 int page_field_size = 0; 2439 int upl_flags = 0; 2440 int upl_size = sizeof(struct upl); 2441 2442 if (type & UPL_CREATE_LITE) { 2443 page_field_size = ((size/PAGE_SIZE) + 7) >> 3; 2444 page_field_size = (page_field_size + 3) & 0xFFFFFFFC; 2445 2446 upl_flags |= UPL_LITE; 2447 } 2448 if (type & UPL_CREATE_INTERNAL) { 2449 upl_size += sizeof(struct upl_page_info) * (size/PAGE_SIZE); 2450 2451 upl_flags |= UPL_INTERNAL; 2452 } 2453 upl = (upl_t)kalloc(upl_size + page_field_size); 2454 2455 if (page_field_size) 2456 bzero((char *)upl + upl_size, page_field_size); 2457 2458 upl->flags = upl_flags | flags; 2459 upl->src_object = NULL; 2460 upl->kaddr = (vm_offset_t)0; 2461 upl->size = 0; 2462 upl->map_object = NULL; 2463 upl->ref_count = 1; 2464 upl->highest_page = 0; 2465 upl_lock_init(upl); 2466#ifdef UPL_DEBUG 2467 upl->ubc_alias1 = 0; 2468 upl->ubc_alias2 = 0; 2469#endif /* UPL_DEBUG */ 2470 return(upl); 2471} 2472 2473static void 2474upl_destroy(upl_t upl) 2475{ 2476 int page_field_size; /* bit field in word size buf */ 2477 int size; 2478 2479#ifdef UPL_DEBUG 2480 { 2481 vm_object_t object; 2482 2483 if (upl->flags & UPL_SHADOWED) { 2484 object = upl->map_object->shadow; 2485 } else { 2486 object = upl->map_object; 2487 } 2488 vm_object_lock(object); 2489 queue_remove(&object->uplq, upl, upl_t, uplq); 2490 vm_object_unlock(object); 2491 } 2492#endif /* UPL_DEBUG */ 2493 /* 2494 * drop a reference on the map_object whether or 2495 * not a pageout object is inserted 2496 */ 2497 if (upl->flags & UPL_SHADOWED) 2498 vm_object_deallocate(upl->map_object); 2499 2500 if (upl->flags & UPL_DEVICE_MEMORY) 2501 size = PAGE_SIZE; 2502 else 2503 size = upl->size; 2504 page_field_size = 0; 2505 2506 if (upl->flags & UPL_LITE) { 2507 page_field_size = ((size/PAGE_SIZE) + 7) >> 3; 2508 page_field_size = (page_field_size + 3) & 0xFFFFFFFC; 2509 } 2510 if (upl->flags & UPL_INTERNAL) { 2511 kfree(upl, 2512 sizeof(struct upl) + 2513 (sizeof(struct upl_page_info) * (size/PAGE_SIZE)) 2514 + page_field_size); 2515 } else { 2516 kfree(upl, sizeof(struct upl) + page_field_size); 2517 } 2518} 2519 2520void uc_upl_dealloc(upl_t upl); 2521__private_extern__ void 2522uc_upl_dealloc(upl_t upl) 2523{ 2524 if (--upl->ref_count == 0) 2525 upl_destroy(upl); 2526} 2527 2528void 2529upl_deallocate(upl_t upl) 2530{ 2531 if (--upl->ref_count == 0) 2532 upl_destroy(upl); 2533} 2534 2535/* 2536 * Statistics about UPL enforcement of copy-on-write obligations. 2537 */ 2538unsigned long upl_cow = 0; 2539unsigned long upl_cow_again = 0; 2540unsigned long upl_cow_contiguous = 0; 2541unsigned long upl_cow_pages = 0; 2542unsigned long upl_cow_again_pages = 0; 2543unsigned long upl_cow_contiguous_pages = 0; 2544 2545/* 2546 * Routine: vm_object_upl_request 2547 * Purpose: 2548 * Cause the population of a portion of a vm_object. 2549 * Depending on the nature of the request, the pages 2550 * returned may be contain valid data or be uninitialized. 2551 * A page list structure, listing the physical pages 2552 * will be returned upon request. 2553 * This function is called by the file system or any other 2554 * supplier of backing store to a pager. 2555 * IMPORTANT NOTE: The caller must still respect the relationship 2556 * between the vm_object and its backing memory object. The 2557 * caller MUST NOT substitute changes in the backing file 2558 * without first doing a memory_object_lock_request on the 2559 * target range unless it is know that the pages are not 2560 * shared with another entity at the pager level. 2561 * Copy_in_to: 2562 * if a page list structure is present 2563 * return the mapped physical pages, where a 2564 * page is not present, return a non-initialized 2565 * one. If the no_sync bit is turned on, don't 2566 * call the pager unlock to synchronize with other 2567 * possible copies of the page. Leave pages busy 2568 * in the original object, if a page list structure 2569 * was specified. When a commit of the page list 2570 * pages is done, the dirty bit will be set for each one. 2571 * Copy_out_from: 2572 * If a page list structure is present, return 2573 * all mapped pages. Where a page does not exist 2574 * map a zero filled one. Leave pages busy in 2575 * the original object. If a page list structure 2576 * is not specified, this call is a no-op. 2577 * 2578 * Note: access of default pager objects has a rather interesting 2579 * twist. The caller of this routine, presumably the file system 2580 * page cache handling code, will never actually make a request 2581 * against a default pager backed object. Only the default 2582 * pager will make requests on backing store related vm_objects 2583 * In this way the default pager can maintain the relationship 2584 * between backing store files (abstract memory objects) and 2585 * the vm_objects (cache objects), they support. 2586 * 2587 */ 2588 2589__private_extern__ kern_return_t 2590vm_object_upl_request( 2591 vm_object_t object, 2592 vm_object_offset_t offset, 2593 upl_size_t size, 2594 upl_t *upl_ptr, 2595 upl_page_info_array_t user_page_list, 2596 unsigned int *page_list_count, 2597 int cntrl_flags) 2598{ 2599 vm_page_t dst_page = VM_PAGE_NULL; 2600 vm_object_offset_t dst_offset; 2601 upl_size_t xfer_size; 2602 boolean_t dirty; 2603 boolean_t hw_dirty; 2604 upl_t upl = NULL; 2605 unsigned int entry; 2606#if MACH_CLUSTER_STATS 2607 boolean_t encountered_lrp = FALSE; 2608#endif 2609 vm_page_t alias_page = NULL; 2610 int refmod_state = 0; 2611 wpl_array_t lite_list = NULL; 2612 vm_object_t last_copy_object; 2613 int delayed_unlock = 0; 2614 int j; 2615 2616 if (cntrl_flags & ~UPL_VALID_FLAGS) { 2617 /* 2618 * For forward compatibility's sake, 2619 * reject any unknown flag. 2620 */ 2621 return KERN_INVALID_VALUE; 2622 } 2623 if ( (!object->internal) && (object->paging_offset != 0) ) 2624 panic("vm_object_upl_request: external object with non-zero paging offset\n"); 2625 if (object->phys_contiguous) 2626 panic("vm_object_upl_request: contiguous object specified\n"); 2627 2628 2629 if ((size / PAGE_SIZE) > MAX_UPL_SIZE) 2630 size = MAX_UPL_SIZE * PAGE_SIZE; 2631 2632 if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL) 2633 *page_list_count = MAX_UPL_SIZE; 2634 2635 if (cntrl_flags & UPL_SET_INTERNAL) { 2636 if (cntrl_flags & UPL_SET_LITE) { 2637 2638 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, 0, size); 2639 2640 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 2641 lite_list = (wpl_array_t) 2642 (((uintptr_t)user_page_list) + 2643 ((size/PAGE_SIZE) * sizeof(upl_page_info_t))); 2644 } else { 2645 upl = upl_create(UPL_CREATE_INTERNAL, 0, size); 2646 2647 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 2648 } 2649 } else { 2650 if (cntrl_flags & UPL_SET_LITE) { 2651 2652 upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE, 0, size); 2653 2654 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 2655 } else { 2656 upl = upl_create(UPL_CREATE_EXTERNAL, 0, size); 2657 } 2658 } 2659 *upl_ptr = upl; 2660 2661 if (user_page_list) 2662 user_page_list[0].device = FALSE; 2663 2664 if (cntrl_flags & UPL_SET_LITE) { 2665 upl->map_object = object; 2666 } else { 2667 upl->map_object = vm_object_allocate(size); 2668 /* 2669 * No neeed to lock the new object: nobody else knows 2670 * about it yet, so it's all ours so far. 2671 */ 2672 upl->map_object->shadow = object; 2673 upl->map_object->pageout = TRUE; 2674 upl->map_object->can_persist = FALSE; 2675 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 2676 upl->map_object->shadow_offset = offset; 2677 upl->map_object->wimg_bits = object->wimg_bits; 2678 2679 VM_PAGE_GRAB_FICTITIOUS(alias_page); 2680 2681 upl->flags |= UPL_SHADOWED; 2682 } 2683 /* 2684 * ENCRYPTED SWAP: 2685 * Just mark the UPL as "encrypted" here. 2686 * We'll actually encrypt the pages later, 2687 * in upl_encrypt(), when the caller has 2688 * selected which pages need to go to swap. 2689 */ 2690 if (cntrl_flags & UPL_ENCRYPT) 2691 upl->flags |= UPL_ENCRYPTED; 2692 2693 if (cntrl_flags & UPL_FOR_PAGEOUT) 2694 upl->flags |= UPL_PAGEOUT; 2695 2696 vm_object_lock(object); 2697 vm_object_paging_begin(object); 2698 2699 /* 2700 * we can lock in the paging_offset once paging_in_progress is set 2701 */ 2702 upl->size = size; 2703 upl->offset = offset + object->paging_offset; 2704 2705#ifdef UPL_DEBUG 2706 queue_enter(&object->uplq, upl, upl_t, uplq); 2707#endif /* UPL_DEBUG */ 2708 2709 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) { 2710 /* 2711 * Honor copy-on-write obligations 2712 * 2713 * The caller is gathering these pages and 2714 * might modify their contents. We need to 2715 * make sure that the copy object has its own 2716 * private copies of these pages before we let 2717 * the caller modify them. 2718 */ 2719 vm_object_update(object, 2720 offset, 2721 size, 2722 NULL, 2723 NULL, 2724 FALSE, /* should_return */ 2725 MEMORY_OBJECT_COPY_SYNC, 2726 VM_PROT_NO_CHANGE); 2727 upl_cow++; 2728 upl_cow_pages += size >> PAGE_SHIFT; 2729 } 2730 /* 2731 * remember which copy object we synchronized with 2732 */ 2733 last_copy_object = object->copy; 2734 entry = 0; 2735 2736 xfer_size = size; 2737 dst_offset = offset; 2738 2739 while (xfer_size) { 2740 2741 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) { 2742 if (delayed_unlock) { 2743 delayed_unlock = 0; 2744 vm_page_unlock_queues(); 2745 } 2746 vm_object_unlock(object); 2747 VM_PAGE_GRAB_FICTITIOUS(alias_page); 2748 goto relock; 2749 } 2750 if (delayed_unlock == 0) { 2751 /* 2752 * pageout_scan takes the vm_page_lock_queues first 2753 * then tries for the object lock... to avoid what 2754 * is effectively a lock inversion, we'll go to the 2755 * trouble of taking them in that same order... otherwise 2756 * if this object contains the majority of the pages resident 2757 * in the UBC (or a small set of large objects actively being 2758 * worked on contain the majority of the pages), we could 2759 * cause the pageout_scan thread to 'starve' in its attempt 2760 * to find pages to move to the free queue, since it has to 2761 * successfully acquire the object lock of any candidate page 2762 * before it can steal/clean it. 2763 */ 2764 vm_object_unlock(object); 2765relock: 2766 for (j = 0; ; j++) { 2767 vm_page_lock_queues(); 2768 2769 if (vm_object_lock_try(object)) 2770 break; 2771 vm_page_unlock_queues(); 2772 mutex_pause(j); 2773 } 2774 delayed_unlock = 1; 2775 } 2776 if (cntrl_flags & UPL_COPYOUT_FROM) { 2777 upl->flags |= UPL_PAGE_SYNC_DONE; 2778 2779 if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) || 2780 dst_page->fictitious || 2781 dst_page->absent || 2782 dst_page->error || 2783 (dst_page->wire_count && !dst_page->pageout && !dst_page->list_req_pending)) { 2784 2785 if (user_page_list) 2786 user_page_list[entry].phys_addr = 0; 2787 2788 goto delay_unlock_queues; 2789 } 2790 /* 2791 * grab this up front... 2792 * a high percentange of the time we're going to 2793 * need the hardware modification state a bit later 2794 * anyway... so we can eliminate an extra call into 2795 * the pmap layer by grabbing it here and recording it 2796 */ 2797 if (dst_page->pmapped) 2798 refmod_state = pmap_get_refmod(dst_page->phys_page); 2799 else 2800 refmod_state = 0; 2801 2802 if ( (refmod_state & VM_MEM_REFERENCED) && dst_page->inactive ) { 2803 /* 2804 * page is on inactive list and referenced... 2805 * reactivate it now... this gets it out of the 2806 * way of vm_pageout_scan which would have to 2807 * reactivate it upon tripping over it 2808 */ 2809 vm_page_activate(dst_page); 2810 VM_STAT_INCR(reactivations); 2811 } 2812 if (cntrl_flags & UPL_RET_ONLY_DIRTY) { 2813 /* 2814 * we're only asking for DIRTY pages to be returned 2815 */ 2816 if (dst_page->list_req_pending || !(cntrl_flags & UPL_FOR_PAGEOUT)) { 2817 /* 2818 * if we were the page stolen by vm_pageout_scan to be 2819 * cleaned (as opposed to a buddy being clustered in 2820 * or this request is not being driven by a PAGEOUT cluster 2821 * then we only need to check for the page being dirty or 2822 * precious to decide whether to return it 2823 */ 2824 if (dst_page->dirty || dst_page->precious || (refmod_state & VM_MEM_MODIFIED)) 2825 goto check_busy; 2826 goto dont_return; 2827 } 2828 /* 2829 * this is a request for a PAGEOUT cluster and this page 2830 * is merely along for the ride as a 'buddy'... not only 2831 * does it have to be dirty to be returned, but it also 2832 * can't have been referenced recently... note that we've 2833 * already filtered above based on whether this page is 2834 * currently on the inactive queue or it meets the page 2835 * ticket (generation count) check 2836 */ 2837 if ( !(refmod_state & VM_MEM_REFERENCED) && 2838 ((refmod_state & VM_MEM_MODIFIED) || dst_page->dirty || dst_page->precious) ) { 2839 goto check_busy; 2840 } 2841dont_return: 2842 /* 2843 * if we reach here, we're not to return 2844 * the page... go on to the next one 2845 */ 2846 if (user_page_list) 2847 user_page_list[entry].phys_addr = 0; 2848 2849 goto delay_unlock_queues; 2850 } 2851check_busy: 2852 if (dst_page->busy && (!(dst_page->list_req_pending && dst_page->pageout))) { 2853 if (cntrl_flags & UPL_NOBLOCK) { 2854 if (user_page_list) 2855 user_page_list[entry].phys_addr = 0; 2856 2857 goto delay_unlock_queues; 2858 } 2859 /* 2860 * someone else is playing with the 2861 * page. We will have to wait. 2862 */ 2863 delayed_unlock = 0; 2864 vm_page_unlock_queues(); 2865 2866 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 2867 2868 continue; 2869 } 2870 /* 2871 * Someone else already cleaning the page? 2872 */ 2873 if ((dst_page->cleaning || dst_page->absent || dst_page->wire_count != 0) && !dst_page->list_req_pending) { 2874 if (user_page_list) 2875 user_page_list[entry].phys_addr = 0; 2876 2877 goto delay_unlock_queues; 2878 } 2879 /* 2880 * ENCRYPTED SWAP: 2881 * The caller is gathering this page and might 2882 * access its contents later on. Decrypt the 2883 * page before adding it to the UPL, so that 2884 * the caller never sees encrypted data. 2885 */ 2886 if (! (cntrl_flags & UPL_ENCRYPT) && dst_page->encrypted) { 2887 int was_busy; 2888 2889 delayed_unlock = 0; 2890 vm_page_unlock_queues(); 2891 /* 2892 * save the current state of busy 2893 * mark page as busy while decrypt 2894 * is in progress since it will drop 2895 * the object lock... 2896 */ 2897 was_busy = dst_page->busy; 2898 dst_page->busy = TRUE; 2899 2900 vm_page_decrypt(dst_page, 0); 2901 vm_page_decrypt_for_upl_counter++; 2902 /* 2903 * restore to original busy state 2904 */ 2905 dst_page->busy = was_busy; 2906 2907 vm_page_lock_queues(); 2908 delayed_unlock = 1; 2909 } 2910 if (dst_page->pageout_queue == TRUE) 2911 /* 2912 * we've buddied up a page for a clustered pageout 2913 * that has already been moved to the pageout 2914 * queue by pageout_scan... we need to remove 2915 * it from the queue and drop the laundry count 2916 * on that queue 2917 */ 2918 vm_pageout_queue_steal(dst_page); 2919#if MACH_CLUSTER_STATS 2920 /* 2921 * pageout statistics gathering. count 2922 * all the pages we will page out that 2923 * were not counted in the initial 2924 * vm_pageout_scan work 2925 */ 2926 if (dst_page->list_req_pending) 2927 encountered_lrp = TRUE; 2928 if ((dst_page->dirty || (dst_page->object->internal && dst_page->precious)) && !dst_page->list_req_pending) { 2929 if (encountered_lrp) 2930 CLUSTER_STAT(pages_at_higher_offsets++;) 2931 else 2932 CLUSTER_STAT(pages_at_lower_offsets++;) 2933 } 2934#endif 2935 /* 2936 * Turn off busy indication on pending 2937 * pageout. Note: we can only get here 2938 * in the request pending case. 2939 */ 2940 dst_page->list_req_pending = FALSE; 2941 dst_page->busy = FALSE; 2942 2943 hw_dirty = refmod_state & VM_MEM_MODIFIED; 2944 dirty = hw_dirty ? TRUE : dst_page->dirty; 2945 2946 if (dst_page->phys_page > upl->highest_page) 2947 upl->highest_page = dst_page->phys_page; 2948 2949 if (cntrl_flags & UPL_SET_LITE) { 2950 int pg_num; 2951 2952 pg_num = (dst_offset-offset)/PAGE_SIZE; 2953 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 2954 2955 if (hw_dirty) 2956 pmap_clear_modify(dst_page->phys_page); 2957 2958 /* 2959 * Mark original page as cleaning 2960 * in place. 2961 */ 2962 dst_page->cleaning = TRUE; 2963 dst_page->precious = FALSE; 2964 } else { 2965 /* 2966 * use pageclean setup, it is more 2967 * convenient even for the pageout 2968 * cases here 2969 */ 2970 vm_object_lock(upl->map_object); 2971 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); 2972 vm_object_unlock(upl->map_object); 2973 2974 alias_page->absent = FALSE; 2975 alias_page = NULL; 2976 } 2977#if MACH_PAGEMAP 2978 /* 2979 * Record that this page has been 2980 * written out 2981 */ 2982 vm_external_state_set(object->existence_map, dst_page->offset); 2983#endif /*MACH_PAGEMAP*/ 2984 dst_page->dirty = dirty; 2985 2986 if (!dirty) 2987 dst_page->precious = TRUE; 2988 2989 if (dst_page->pageout) 2990 dst_page->busy = TRUE; 2991 2992 if ( (cntrl_flags & UPL_ENCRYPT) ) { 2993 /* 2994 * ENCRYPTED SWAP: 2995 * We want to deny access to the target page 2996 * because its contents are about to be 2997 * encrypted and the user would be very 2998 * confused to see encrypted data instead 2999 * of their data. 3000 * We also set "encrypted_cleaning" to allow 3001 * vm_pageout_scan() to demote that page 3002 * from "adjacent/clean-in-place" to 3003 * "target/clean-and-free" if it bumps into 3004 * this page during its scanning while we're 3005 * still processing this cluster. 3006 */ 3007 dst_page->busy = TRUE; 3008 dst_page->encrypted_cleaning = TRUE; 3009 } 3010 if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) { 3011 /* 3012 * deny access to the target page 3013 * while it is being worked on 3014 */ 3015 if ((!dst_page->pageout) && (dst_page->wire_count == 0)) { 3016 dst_page->busy = TRUE; 3017 dst_page->pageout = TRUE; 3018 vm_page_wire(dst_page); 3019 } 3020 } 3021 } else { 3022 if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) { 3023 /* 3024 * Honor copy-on-write obligations 3025 * 3026 * The copy object has changed since we 3027 * last synchronized for copy-on-write. 3028 * Another copy object might have been 3029 * inserted while we released the object's 3030 * lock. Since someone could have seen the 3031 * original contents of the remaining pages 3032 * through that new object, we have to 3033 * synchronize with it again for the remaining 3034 * pages only. The previous pages are "busy" 3035 * so they can not be seen through the new 3036 * mapping. The new mapping will see our 3037 * upcoming changes for those previous pages, 3038 * but that's OK since they couldn't see what 3039 * was there before. It's just a race anyway 3040 * and there's no guarantee of consistency or 3041 * atomicity. We just don't want new mappings 3042 * to see both the *before* and *after* pages. 3043 */ 3044 if (object->copy != VM_OBJECT_NULL) { 3045 delayed_unlock = 0; 3046 vm_page_unlock_queues(); 3047 3048 vm_object_update( 3049 object, 3050 dst_offset,/* current offset */ 3051 xfer_size, /* remaining size */ 3052 NULL, 3053 NULL, 3054 FALSE, /* should_return */ 3055 MEMORY_OBJECT_COPY_SYNC, 3056 VM_PROT_NO_CHANGE); 3057 3058 upl_cow_again++; 3059 upl_cow_again_pages += xfer_size >> PAGE_SHIFT; 3060 3061 vm_page_lock_queues(); 3062 delayed_unlock = 1; 3063 } 3064 /* 3065 * remember the copy object we synced with 3066 */ 3067 last_copy_object = object->copy; 3068 } 3069 dst_page = vm_page_lookup(object, dst_offset); 3070 3071 if (dst_page != VM_PAGE_NULL) { 3072 if ( !(dst_page->list_req_pending) ) { 3073 if ((cntrl_flags & UPL_RET_ONLY_ABSENT) && !dst_page->absent) { 3074 /* 3075 * skip over pages already present in the cache 3076 */ 3077 if (user_page_list) 3078 user_page_list[entry].phys_addr = 0; 3079 3080 goto delay_unlock_queues; 3081 } 3082 if (dst_page->cleaning) { 3083 /* 3084 * someone else is writing to the page... wait... 3085 */ 3086 delayed_unlock = 0; 3087 vm_page_unlock_queues(); 3088 3089 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 3090 3091 continue; 3092 } 3093 } else { 3094 if (dst_page->fictitious && 3095 dst_page->phys_page == vm_page_fictitious_addr) { 3096 assert( !dst_page->speculative); 3097 /* 3098 * dump the fictitious page 3099 */ 3100 dst_page->list_req_pending = FALSE; 3101 3102 vm_page_free(dst_page); 3103 3104 dst_page = NULL; 3105 } else if (dst_page->absent) { 3106 /* 3107 * the default_pager case 3108 */ 3109 dst_page->list_req_pending = FALSE; 3110 dst_page->busy = FALSE; 3111 } 3112 } 3113 } 3114 if (dst_page == VM_PAGE_NULL) { 3115 if (object->private) { 3116 /* 3117 * This is a nasty wrinkle for users 3118 * of upl who encounter device or 3119 * private memory however, it is 3120 * unavoidable, only a fault can 3121 * resolve the actual backing 3122 * physical page by asking the 3123 * backing device. 3124 */ 3125 if (user_page_list) 3126 user_page_list[entry].phys_addr = 0; 3127 3128 goto delay_unlock_queues; 3129 } 3130 /* 3131 * need to allocate a page 3132 */ 3133 dst_page = vm_page_grab(); 3134 3135 if (dst_page == VM_PAGE_NULL) { 3136 if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) { 3137 /* 3138 * we don't want to stall waiting for pages to come onto the free list 3139 * while we're already holding absent pages in this UPL 3140 * the caller will deal with the empty slots 3141 */ 3142 if (user_page_list) 3143 user_page_list[entry].phys_addr = 0; 3144 3145 goto try_next_page; 3146 } 3147 /* 3148 * no pages available... wait 3149 * then try again for the same 3150 * offset... 3151 */ 3152 delayed_unlock = 0; 3153 vm_page_unlock_queues(); 3154 3155 vm_object_unlock(object); 3156 VM_PAGE_WAIT(); 3157 3158 /* 3159 * pageout_scan takes the vm_page_lock_queues first 3160 * then tries for the object lock... to avoid what 3161 * is effectively a lock inversion, we'll go to the 3162 * trouble of taking them in that same order... otherwise 3163 * if this object contains the majority of the pages resident 3164 * in the UBC (or a small set of large objects actively being 3165 * worked on contain the majority of the pages), we could 3166 * cause the pageout_scan thread to 'starve' in its attempt 3167 * to find pages to move to the free queue, since it has to 3168 * successfully acquire the object lock of any candidate page 3169 * before it can steal/clean it. 3170 */ 3171 for (j = 0; ; j++) { 3172 vm_page_lock_queues(); 3173 3174 if (vm_object_lock_try(object)) 3175 break; 3176 vm_page_unlock_queues(); 3177 mutex_pause(j); 3178 } 3179 delayed_unlock = 1; 3180 3181 continue; 3182 } 3183 vm_page_insert_internal(dst_page, object, dst_offset, TRUE); 3184 3185 dst_page->absent = TRUE; 3186 dst_page->busy = FALSE; 3187 3188 if (cntrl_flags & UPL_RET_ONLY_ABSENT) { 3189 /* 3190 * if UPL_RET_ONLY_ABSENT was specified, 3191 * than we're definitely setting up a 3192 * upl for a clustered read/pagein 3193 * operation... mark the pages as clustered 3194 * so upl_commit_range can put them on the 3195 * speculative list 3196 */ 3197 dst_page->clustered = TRUE; 3198 } 3199 } 3200 /* 3201 * ENCRYPTED SWAP: 3202 */ 3203 if (cntrl_flags & UPL_ENCRYPT) { 3204 /* 3205 * The page is going to be encrypted when we 3206 * get it from the pager, so mark it so. 3207 */ 3208 dst_page->encrypted = TRUE; 3209 } else { 3210 /* 3211 * Otherwise, the page will not contain 3212 * encrypted data. 3213 */ 3214 dst_page->encrypted = FALSE; 3215 } 3216 dst_page->overwriting = TRUE; 3217 3218 if (dst_page->fictitious) { 3219 panic("need corner case for fictitious page"); 3220 } 3221 if (dst_page->busy) { 3222 /* 3223 * someone else is playing with the 3224 * page. We will have to wait. 3225 */ 3226 delayed_unlock = 0; 3227 vm_page_unlock_queues(); 3228 3229 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 3230 3231 continue; 3232 } 3233 if (dst_page->pmapped) { 3234 if ( !(cntrl_flags & UPL_FILE_IO)) 3235 /* 3236 * eliminate all mappings from the 3237 * original object and its prodigy 3238 */ 3239 refmod_state = pmap_disconnect(dst_page->phys_page); 3240 else 3241 refmod_state = pmap_get_refmod(dst_page->phys_page); 3242 } else 3243 refmod_state = 0; 3244 3245 hw_dirty = refmod_state & VM_MEM_MODIFIED; 3246 dirty = hw_dirty ? TRUE : dst_page->dirty; 3247 3248 if (cntrl_flags & UPL_SET_LITE) { 3249 int pg_num; 3250 3251 pg_num = (dst_offset-offset)/PAGE_SIZE; 3252 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 3253 3254 if (hw_dirty) 3255 pmap_clear_modify(dst_page->phys_page); 3256 3257 /* 3258 * Mark original page as cleaning 3259 * in place. 3260 */ 3261 dst_page->cleaning = TRUE; 3262 dst_page->precious = FALSE; 3263 } else { 3264 /* 3265 * use pageclean setup, it is more 3266 * convenient even for the pageout 3267 * cases here 3268 */ 3269 vm_object_lock(upl->map_object); 3270 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size); 3271 vm_object_unlock(upl->map_object); 3272 3273 alias_page->absent = FALSE; 3274 alias_page = NULL; 3275 } 3276 3277 if (cntrl_flags & UPL_CLEAN_IN_PLACE) { 3278 /* 3279 * clean in place for read implies 3280 * that a write will be done on all 3281 * the pages that are dirty before 3282 * a upl commit is done. The caller 3283 * is obligated to preserve the 3284 * contents of all pages marked dirty 3285 */ 3286 upl->flags |= UPL_CLEAR_DIRTY; 3287 } 3288 dst_page->dirty = dirty; 3289 3290 if (!dirty) 3291 dst_page->precious = TRUE; 3292 3293 if (dst_page->wire_count == 0) { 3294 /* 3295 * deny access to the target page while 3296 * it is being worked on 3297 */ 3298 dst_page->busy = TRUE; 3299 } else 3300 vm_page_wire(dst_page); 3301 3302 if (dst_page->clustered) { 3303 /* 3304 * expect the page not to be used 3305 * since it's coming in as part 3306 * of a speculative cluster... 3307 * pages that are 'consumed' will 3308 * get a hardware reference 3309 */ 3310 dst_page->reference = FALSE; 3311 } else { 3312 /* 3313 * expect the page to be used 3314 */ 3315 dst_page->reference = TRUE; 3316 } 3317 dst_page->precious = (cntrl_flags & UPL_PRECIOUS) ? TRUE : FALSE; 3318 } 3319 if (dst_page->phys_page > upl->highest_page) 3320 upl->highest_page = dst_page->phys_page; 3321 if (user_page_list) { 3322 user_page_list[entry].phys_addr = dst_page->phys_page; 3323 user_page_list[entry].pageout = dst_page->pageout; 3324 user_page_list[entry].absent = dst_page->absent; 3325 user_page_list[entry].dirty = dst_page->dirty; 3326 user_page_list[entry].precious = dst_page->precious; 3327 user_page_list[entry].device = FALSE; 3328 if (dst_page->clustered == TRUE) 3329 user_page_list[entry].speculative = dst_page->speculative; 3330 else 3331 user_page_list[entry].speculative = FALSE; 3332 user_page_list[entry].cs_validated = dst_page->cs_validated; 3333 user_page_list[entry].cs_tainted = dst_page->cs_tainted; 3334 } 3335 /* 3336 * if UPL_RET_ONLY_ABSENT is set, then 3337 * we are working with a fresh page and we've 3338 * just set the clustered flag on it to 3339 * indicate that it was drug in as part of a 3340 * speculative cluster... so leave it alone 3341 */ 3342 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) { 3343 /* 3344 * someone is explicitly grabbing this page... 3345 * update clustered and speculative state 3346 * 3347 */ 3348 VM_PAGE_CONSUME_CLUSTERED(dst_page); 3349 } 3350delay_unlock_queues: 3351 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) { 3352 /* 3353 * pageout_scan takes the vm_page_lock_queues first 3354 * then tries for the object lock... to avoid what 3355 * is effectively a lock inversion, we'll go to the 3356 * trouble of taking them in that same order... otherwise 3357 * if this object contains the majority of the pages resident 3358 * in the UBC (or a small set of large objects actively being 3359 * worked on contain the majority of the pages), we could 3360 * cause the pageout_scan thread to 'starve' in its attempt 3361 * to find pages to move to the free queue, since it has to 3362 * successfully acquire the object lock of any candidate page 3363 * before it can steal/clean it. 3364 */ 3365 vm_object_unlock(object); 3366 mutex_yield(&vm_page_queue_lock); 3367 3368 for (j = 0; ; j++) { 3369 if (vm_object_lock_try(object)) 3370 break; 3371 vm_page_unlock_queues(); 3372 mutex_pause(j); 3373 vm_page_lock_queues(); 3374 } 3375 delayed_unlock = 1; 3376 } 3377try_next_page: 3378 entry++; 3379 dst_offset += PAGE_SIZE_64; 3380 xfer_size -= PAGE_SIZE; 3381 } 3382 if (alias_page != NULL) { 3383 if (delayed_unlock == 0) { 3384 vm_page_lock_queues(); 3385 delayed_unlock = 1; 3386 } 3387 vm_page_free(alias_page); 3388 } 3389 if (delayed_unlock) 3390 vm_page_unlock_queues(); 3391 3392 if (page_list_count != NULL) { 3393 if (upl->flags & UPL_INTERNAL) 3394 *page_list_count = 0; 3395 else if (*page_list_count > entry) 3396 *page_list_count = entry; 3397 } 3398 vm_object_unlock(object); 3399 3400 return KERN_SUCCESS; 3401} 3402 3403/* JMM - Backward compatability for now */ 3404kern_return_t 3405vm_fault_list_request( /* forward */ 3406 memory_object_control_t control, 3407 vm_object_offset_t offset, 3408 upl_size_t size, 3409 upl_t *upl_ptr, 3410 upl_page_info_t **user_page_list_ptr, 3411 unsigned int page_list_count, 3412 int cntrl_flags); 3413kern_return_t 3414vm_fault_list_request( 3415 memory_object_control_t control, 3416 vm_object_offset_t offset, 3417 upl_size_t size, 3418 upl_t *upl_ptr, 3419 upl_page_info_t **user_page_list_ptr, 3420 unsigned int page_list_count, 3421 int cntrl_flags) 3422{ 3423 unsigned int local_list_count; 3424 upl_page_info_t *user_page_list; 3425 kern_return_t kr; 3426 3427 if (user_page_list_ptr != NULL) { 3428 local_list_count = page_list_count; 3429 user_page_list = *user_page_list_ptr; 3430 } else { 3431 local_list_count = 0; 3432 user_page_list = NULL; 3433 } 3434 kr = memory_object_upl_request(control, 3435 offset, 3436 size, 3437 upl_ptr, 3438 user_page_list, 3439 &local_list_count, 3440 cntrl_flags); 3441 3442 if(kr != KERN_SUCCESS) 3443 return kr; 3444 3445 if ((user_page_list_ptr != NULL) && (cntrl_flags & UPL_INTERNAL)) { 3446 *user_page_list_ptr = UPL_GET_INTERNAL_PAGE_LIST(*upl_ptr); 3447 } 3448 3449 return KERN_SUCCESS; 3450} 3451 3452 3453 3454/* 3455 * Routine: vm_object_super_upl_request 3456 * Purpose: 3457 * Cause the population of a portion of a vm_object 3458 * in much the same way as memory_object_upl_request. 3459 * Depending on the nature of the request, the pages 3460 * returned may be contain valid data or be uninitialized. 3461 * However, the region may be expanded up to the super 3462 * cluster size provided. 3463 */ 3464 3465__private_extern__ kern_return_t 3466vm_object_super_upl_request( 3467 vm_object_t object, 3468 vm_object_offset_t offset, 3469 upl_size_t size, 3470 upl_size_t super_cluster, 3471 upl_t *upl, 3472 upl_page_info_t *user_page_list, 3473 unsigned int *page_list_count, 3474 int cntrl_flags) 3475{ 3476 if (object->paging_offset > offset) 3477 return KERN_FAILURE; 3478 3479 assert(object->paging_in_progress); 3480 offset = offset - object->paging_offset; 3481 3482 if (super_cluster > size) { 3483 3484 vm_object_offset_t base_offset; 3485 upl_size_t super_size; 3486 3487 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1)); 3488 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster; 3489 super_size = ((base_offset + super_size) > object->size) ? (object->size - base_offset) : super_size; 3490 3491 if (offset > (base_offset + super_size)) { 3492 panic("vm_object_super_upl_request: Missed target pageout" 3493 " %#llx,%#llx, %#x, %#x, %#x, %#llx\n", 3494 offset, base_offset, super_size, super_cluster, 3495 size, object->paging_offset); 3496 } 3497 /* 3498 * apparently there is a case where the vm requests a 3499 * page to be written out who's offset is beyond the 3500 * object size 3501 */ 3502 if ((offset + size) > (base_offset + super_size)) 3503 super_size = (offset + size) - base_offset; 3504 3505 offset = base_offset; 3506 size = super_size; 3507 } 3508 return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags); 3509} 3510 3511 3512kern_return_t 3513vm_map_create_upl( 3514 vm_map_t map, 3515 vm_map_address_t offset, 3516 upl_size_t *upl_size, 3517 upl_t *upl, 3518 upl_page_info_array_t page_list, 3519 unsigned int *count, 3520 int *flags) 3521{ 3522 vm_map_entry_t entry; 3523 int caller_flags; 3524 int force_data_sync; 3525 int sync_cow_data; 3526 vm_object_t local_object; 3527 vm_map_offset_t local_offset; 3528 vm_map_offset_t local_start; 3529 kern_return_t ret; 3530 3531 caller_flags = *flags; 3532 3533 if (caller_flags & ~UPL_VALID_FLAGS) { 3534 /* 3535 * For forward compatibility's sake, 3536 * reject any unknown flag. 3537 */ 3538 return KERN_INVALID_VALUE; 3539 } 3540 force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC); 3541 sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM); 3542 3543 if (upl == NULL) 3544 return KERN_INVALID_ARGUMENT; 3545 3546REDISCOVER_ENTRY: 3547 vm_map_lock(map); 3548 3549 if (vm_map_lookup_entry(map, offset, &entry)) { 3550 3551 if ((entry->vme_end - offset) < *upl_size) 3552 *upl_size = entry->vme_end - offset; 3553 3554 if (caller_flags & UPL_QUERY_OBJECT_TYPE) { 3555 *flags = 0; 3556 3557 if (entry->object.vm_object != VM_OBJECT_NULL) { 3558 if (entry->object.vm_object->private) 3559 *flags = UPL_DEV_MEMORY; 3560 3561 if (entry->object.vm_object->phys_contiguous) 3562 *flags |= UPL_PHYS_CONTIG; 3563 } 3564 vm_map_unlock(map); 3565 3566 return KERN_SUCCESS; 3567 } 3568 if (entry->object.vm_object == VM_OBJECT_NULL || !entry->object.vm_object->phys_contiguous) { 3569 if ((*upl_size/page_size) > MAX_UPL_SIZE) 3570 *upl_size = MAX_UPL_SIZE * page_size; 3571 } 3572 /* 3573 * Create an object if necessary. 3574 */ 3575 if (entry->object.vm_object == VM_OBJECT_NULL) { 3576 entry->object.vm_object = vm_object_allocate((vm_size_t)(entry->vme_end - entry->vme_start)); 3577 entry->offset = 0; 3578 } 3579 if (!(caller_flags & UPL_COPYOUT_FROM)) { 3580 if (!(entry->protection & VM_PROT_WRITE)) { 3581 vm_map_unlock(map); 3582 return KERN_PROTECTION_FAILURE; 3583 } 3584 if (entry->needs_copy) { 3585 vm_map_t local_map; 3586 vm_object_t object; 3587 vm_object_offset_t new_offset; 3588 vm_prot_t prot; 3589 boolean_t wired; 3590 vm_map_version_t version; 3591 vm_map_t real_map; 3592 3593 local_map = map; 3594 vm_map_lock_write_to_read(map); 3595 3596 if (vm_map_lookup_locked(&local_map, 3597 offset, VM_PROT_WRITE, 3598 OBJECT_LOCK_EXCLUSIVE, 3599 &version, &object, 3600 &new_offset, &prot, &wired, 3601 NULL, 3602 &real_map)) { 3603 vm_map_unlock(local_map); 3604 return KERN_FAILURE; 3605 } 3606 if (real_map != map) 3607 vm_map_unlock(real_map); 3608 vm_object_unlock(object); 3609 vm_map_unlock(local_map); 3610 3611 goto REDISCOVER_ENTRY; 3612 } 3613 } 3614 if (entry->is_sub_map) { 3615 vm_map_t submap; 3616 3617 submap = entry->object.sub_map; 3618 local_start = entry->vme_start; 3619 local_offset = entry->offset; 3620 3621 vm_map_reference(submap); 3622 vm_map_unlock(map); 3623 3624 ret = vm_map_create_upl(submap, 3625 local_offset + (offset - local_start), 3626 upl_size, upl, page_list, count, flags); 3627 vm_map_deallocate(submap); 3628 3629 return ret; 3630 } 3631 if (sync_cow_data) { 3632 if (entry->object.vm_object->shadow || entry->object.vm_object->copy) { 3633 local_object = entry->object.vm_object; 3634 local_start = entry->vme_start; 3635 local_offset = entry->offset; 3636 3637 vm_object_reference(local_object); 3638 vm_map_unlock(map); 3639 3640 if (entry->object.vm_object->shadow && entry->object.vm_object->copy) { 3641 vm_object_lock_request( 3642 local_object->shadow, 3643 (vm_object_offset_t) 3644 ((offset - local_start) + 3645 local_offset) + 3646 local_object->shadow_offset, 3647 *upl_size, FALSE, 3648 MEMORY_OBJECT_DATA_SYNC, 3649 VM_PROT_NO_CHANGE); 3650 } 3651 sync_cow_data = FALSE; 3652 vm_object_deallocate(local_object); 3653 3654 goto REDISCOVER_ENTRY; 3655 } 3656 } 3657 if (force_data_sync) { 3658 local_object = entry->object.vm_object; 3659 local_start = entry->vme_start; 3660 local_offset = entry->offset; 3661 3662 vm_object_reference(local_object); 3663 vm_map_unlock(map); 3664 3665 vm_object_lock_request( 3666 local_object, 3667 (vm_object_offset_t) 3668 ((offset - local_start) + local_offset), 3669 (vm_object_size_t)*upl_size, FALSE, 3670 MEMORY_OBJECT_DATA_SYNC, 3671 VM_PROT_NO_CHANGE); 3672 3673 force_data_sync = FALSE; 3674 vm_object_deallocate(local_object); 3675 3676 goto REDISCOVER_ENTRY; 3677 } 3678 if (entry->object.vm_object->private) 3679 *flags = UPL_DEV_MEMORY; 3680 else 3681 *flags = 0; 3682 3683 if (entry->object.vm_object->phys_contiguous) 3684 *flags |= UPL_PHYS_CONTIG; 3685 3686 local_object = entry->object.vm_object; 3687 local_offset = entry->offset; 3688 local_start = entry->vme_start; 3689 3690 vm_object_reference(local_object); 3691 vm_map_unlock(map); 3692 3693 ret = vm_object_iopl_request(local_object, 3694 (vm_object_offset_t) ((offset - local_start) + local_offset), 3695 *upl_size, 3696 upl, 3697 page_list, 3698 count, 3699 caller_flags); 3700 vm_object_deallocate(local_object); 3701 3702 return(ret); 3703 } 3704 vm_map_unlock(map); 3705 3706 return(KERN_FAILURE); 3707} 3708 3709/* 3710 * Internal routine to enter a UPL into a VM map. 3711 * 3712 * JMM - This should just be doable through the standard 3713 * vm_map_enter() API. 3714 */ 3715kern_return_t 3716vm_map_enter_upl( 3717 vm_map_t map, 3718 upl_t upl, 3719 vm_map_offset_t *dst_addr) 3720{ 3721 vm_map_size_t size; 3722 vm_object_offset_t offset; 3723 vm_map_offset_t addr; 3724 vm_page_t m; 3725 kern_return_t kr; 3726 3727 if (upl == UPL_NULL) 3728 return KERN_INVALID_ARGUMENT; 3729 3730 upl_lock(upl); 3731 3732 /* 3733 * check to see if already mapped 3734 */ 3735 if (UPL_PAGE_LIST_MAPPED & upl->flags) { 3736 upl_unlock(upl); 3737 return KERN_FAILURE; 3738 } 3739 3740 if ((!(upl->flags & UPL_SHADOWED)) && !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || 3741 (upl->map_object->phys_contiguous))) { 3742 vm_object_t object; 3743 vm_page_t alias_page; 3744 vm_object_offset_t new_offset; 3745 int pg_num; 3746 wpl_array_t lite_list; 3747 3748 if (upl->flags & UPL_INTERNAL) { 3749 lite_list = (wpl_array_t) 3750 ((((uintptr_t)upl) + sizeof(struct upl)) 3751 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 3752 } else { 3753 lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl)); 3754 } 3755 object = upl->map_object; 3756 upl->map_object = vm_object_allocate(upl->size); 3757 3758 vm_object_lock(upl->map_object); 3759 3760 upl->map_object->shadow = object; 3761 upl->map_object->pageout = TRUE; 3762 upl->map_object->can_persist = FALSE; 3763 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 3764 upl->map_object->shadow_offset = upl->offset - object->paging_offset; 3765 upl->map_object->wimg_bits = object->wimg_bits; 3766 offset = upl->map_object->shadow_offset; 3767 new_offset = 0; 3768 size = upl->size; 3769 3770 upl->flags |= UPL_SHADOWED; 3771 3772 while (size) { 3773 pg_num = (new_offset)/PAGE_SIZE; 3774 3775 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 3776 3777 VM_PAGE_GRAB_FICTITIOUS(alias_page); 3778 3779 vm_object_lock(object); 3780 3781 m = vm_page_lookup(object, offset); 3782 if (m == VM_PAGE_NULL) { 3783 panic("vm_upl_map: page missing\n"); 3784 } 3785 3786 /* 3787 * Convert the fictitious page to a private 3788 * shadow of the real page. 3789 */ 3790 assert(alias_page->fictitious); 3791 alias_page->fictitious = FALSE; 3792 alias_page->private = TRUE; 3793 alias_page->pageout = TRUE; 3794 /* 3795 * since m is a page in the upl it must 3796 * already be wired or BUSY, so it's 3797 * safe to assign the underlying physical 3798 * page to the alias 3799 */ 3800 alias_page->phys_page = m->phys_page; 3801 3802 vm_object_unlock(object); 3803 3804 vm_page_lockspin_queues(); 3805 vm_page_wire(alias_page); 3806 vm_page_unlock_queues(); 3807 3808 /* 3809 * ENCRYPTED SWAP: 3810 * The virtual page ("m") has to be wired in some way 3811 * here or its physical page ("m->phys_page") could 3812 * be recycled at any time. 3813 * Assuming this is enforced by the caller, we can't 3814 * get an encrypted page here. Since the encryption 3815 * key depends on the VM page's "pager" object and 3816 * the "paging_offset", we couldn't handle 2 pageable 3817 * VM pages (with different pagers and paging_offsets) 3818 * sharing the same physical page: we could end up 3819 * encrypting with one key (via one VM page) and 3820 * decrypting with another key (via the alias VM page). 3821 */ 3822 ASSERT_PAGE_DECRYPTED(m); 3823 3824 vm_page_insert(alias_page, upl->map_object, new_offset); 3825 3826 assert(!alias_page->wanted); 3827 alias_page->busy = FALSE; 3828 alias_page->absent = FALSE; 3829 } 3830 size -= PAGE_SIZE; 3831 offset += PAGE_SIZE_64; 3832 new_offset += PAGE_SIZE_64; 3833 } 3834 vm_object_unlock(upl->map_object); 3835 } 3836 if ((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || upl->map_object->phys_contiguous) 3837 offset = upl->offset - upl->map_object->paging_offset; 3838 else 3839 offset = 0; 3840 size = upl->size; 3841 3842 vm_object_reference(upl->map_object); 3843 3844 *dst_addr = 0; 3845 /* 3846 * NEED A UPL_MAP ALIAS 3847 */ 3848 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0, 3849 VM_FLAGS_ANYWHERE, upl->map_object, offset, FALSE, 3850 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 3851 3852 if (kr != KERN_SUCCESS) { 3853 upl_unlock(upl); 3854 return(kr); 3855 } 3856 vm_object_lock(upl->map_object); 3857 3858 for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) { 3859 m = vm_page_lookup(upl->map_object, offset); 3860 3861 if (m) { 3862 unsigned int cache_attr; 3863 cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK; 3864 3865 m->pmapped = TRUE; 3866 m->wpmapped = TRUE; 3867 3868 PMAP_ENTER(map->pmap, addr, m, VM_PROT_ALL, cache_attr, TRUE); 3869 } 3870 offset += PAGE_SIZE_64; 3871 } 3872 vm_object_unlock(upl->map_object); 3873 3874 /* 3875 * hold a reference for the mapping 3876 */ 3877 upl->ref_count++; 3878 upl->flags |= UPL_PAGE_LIST_MAPPED; 3879 upl->kaddr = *dst_addr; 3880 upl_unlock(upl); 3881 3882 return KERN_SUCCESS; 3883} 3884 3885/* 3886 * Internal routine to remove a UPL mapping from a VM map. 3887 * 3888 * XXX - This should just be doable through a standard 3889 * vm_map_remove() operation. Otherwise, implicit clean-up 3890 * of the target map won't be able to correctly remove 3891 * these (and release the reference on the UPL). Having 3892 * to do this means we can't map these into user-space 3893 * maps yet. 3894 */ 3895kern_return_t 3896vm_map_remove_upl( 3897 vm_map_t map, 3898 upl_t upl) 3899{ 3900 vm_address_t addr; 3901 upl_size_t size; 3902 3903 if (upl == UPL_NULL) 3904 return KERN_INVALID_ARGUMENT; 3905 3906 upl_lock(upl); 3907 3908 if (upl->flags & UPL_PAGE_LIST_MAPPED) { 3909 addr = upl->kaddr; 3910 size = upl->size; 3911 3912 assert(upl->ref_count > 1); 3913 upl->ref_count--; /* removing mapping ref */ 3914 3915 upl->flags &= ~UPL_PAGE_LIST_MAPPED; 3916 upl->kaddr = (vm_offset_t) 0; 3917 upl_unlock(upl); 3918 3919 vm_map_remove(map, 3920 vm_map_trunc_page(addr), 3921 vm_map_round_page(addr + size), 3922 VM_MAP_NO_FLAGS); 3923 3924 return KERN_SUCCESS; 3925 } 3926 upl_unlock(upl); 3927 3928 return KERN_FAILURE; 3929} 3930 3931kern_return_t 3932upl_commit_range( 3933 upl_t upl, 3934 upl_offset_t offset, 3935 upl_size_t size, 3936 int flags, 3937 upl_page_info_t *page_list, 3938 mach_msg_type_number_t count, 3939 boolean_t *empty) 3940{ 3941 upl_size_t xfer_size; 3942 vm_object_t shadow_object; 3943 vm_object_t object; 3944 vm_object_offset_t target_offset; 3945 int entry; 3946 wpl_array_t lite_list; 3947 int occupied; 3948 int delayed_unlock = 0; 3949 int clear_refmod = 0; 3950 int pgpgout_count = 0; 3951 int j; 3952 3953 *empty = FALSE; 3954 3955 if (upl == UPL_NULL) 3956 return KERN_INVALID_ARGUMENT; 3957 3958 if (count == 0) 3959 page_list = NULL; 3960 3961 if (upl->flags & UPL_DEVICE_MEMORY) 3962 xfer_size = 0; 3963 else if ((offset + size) <= upl->size) 3964 xfer_size = size; 3965 else 3966 return KERN_FAILURE; 3967 3968 upl_lock(upl); 3969 3970 if (upl->flags & UPL_ACCESS_BLOCKED) { 3971 /* 3972 * We used this UPL to block access to the pages by marking 3973 * them "busy". Now we need to clear the "busy" bit to allow 3974 * access to these pages again. 3975 */ 3976 flags |= UPL_COMMIT_ALLOW_ACCESS; 3977 } 3978 if (upl->flags & UPL_CLEAR_DIRTY) 3979 flags |= UPL_COMMIT_CLEAR_DIRTY; 3980 3981 if (upl->flags & UPL_INTERNAL) 3982 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl)) 3983 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 3984 else 3985 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 3986 3987 object = upl->map_object; 3988 3989 if (upl->flags & UPL_SHADOWED) { 3990 vm_object_lock(object); 3991 shadow_object = object->shadow; 3992 } else { 3993 shadow_object = object; 3994 } 3995 entry = offset/PAGE_SIZE; 3996 target_offset = (vm_object_offset_t)offset; 3997 3998 /* 3999 * pageout_scan takes the vm_page_lock_queues first 4000 * then tries for the object lock... to avoid what 4001 * is effectively a lock inversion, we'll go to the 4002 * trouble of taking them in that same order... otherwise 4003 * if this object contains the majority of the pages resident 4004 * in the UBC (or a small set of large objects actively being 4005 * worked on contain the majority of the pages), we could 4006 * cause the pageout_scan thread to 'starve' in its attempt 4007 * to find pages to move to the free queue, since it has to 4008 * successfully acquire the object lock of any candidate page 4009 * before it can steal/clean it. 4010 */ 4011 for (j = 0; ; j++) { 4012 vm_page_lock_queues(); 4013 4014 if (vm_object_lock_try(shadow_object)) 4015 break; 4016 vm_page_unlock_queues(); 4017 mutex_pause(j); 4018 } 4019 delayed_unlock = 1; 4020 4021 if (shadow_object->code_signed) { 4022 /* 4023 * CODE SIGNING: 4024 * If the object is code-signed, do not let this UPL tell 4025 * us if the pages are valid or not. Let the pages be 4026 * validated by VM the normal way (when they get mapped or 4027 * copied). 4028 */ 4029 flags &= ~UPL_COMMIT_CS_VALIDATED; 4030 } 4031 if (! page_list) { 4032 /* 4033 * No page list to get the code-signing info from !? 4034 */ 4035 flags &= ~UPL_COMMIT_CS_VALIDATED; 4036 } 4037 4038 while (xfer_size) { 4039 vm_page_t t, m; 4040 4041 m = VM_PAGE_NULL; 4042 4043 if (upl->flags & UPL_LITE) { 4044 int pg_num; 4045 4046 pg_num = target_offset/PAGE_SIZE; 4047 4048 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 4049 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); 4050 4051 m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset)); 4052 } 4053 } 4054 if (upl->flags & UPL_SHADOWED) { 4055 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) { 4056 4057 t->pageout = FALSE; 4058 4059 vm_page_free(t); 4060 4061 if (m == VM_PAGE_NULL) 4062 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset); 4063 } 4064 } 4065 if (m == VM_PAGE_NULL) { 4066 goto commit_next_page; 4067 } 4068 4069 clear_refmod = 0; 4070 4071 if (flags & UPL_COMMIT_CS_VALIDATED) { 4072 /* 4073 * CODE SIGNING: 4074 * Set the code signing bits according to 4075 * what the UPL says they should be. 4076 */ 4077 m->cs_validated = page_list[entry].cs_validated; 4078 m->cs_tainted = page_list[entry].cs_tainted; 4079 } 4080 if (upl->flags & UPL_IO_WIRE) { 4081 4082 vm_page_unwire(m); 4083 4084 if (page_list) 4085 page_list[entry].phys_addr = 0; 4086 4087 if (flags & UPL_COMMIT_SET_DIRTY) 4088 m->dirty = TRUE; 4089 else if (flags & UPL_COMMIT_CLEAR_DIRTY) { 4090 m->dirty = FALSE; 4091 if (! (flags & UPL_COMMIT_CS_VALIDATED) && 4092 m->cs_validated && !m->cs_tainted) { 4093 /* 4094 * CODE SIGNING: 4095 * This page is no longer dirty 4096 * but could have been modified, 4097 * so it will need to be 4098 * re-validated. 4099 */ 4100 m->cs_validated = FALSE; 4101 vm_cs_validated_resets++; 4102 } 4103 clear_refmod |= VM_MEM_MODIFIED; 4104 } 4105 4106 if (flags & UPL_COMMIT_INACTIVATE) 4107 vm_page_deactivate(m); 4108 4109 if (clear_refmod) 4110 pmap_clear_refmod(m->phys_page, clear_refmod); 4111 4112 if (flags & UPL_COMMIT_ALLOW_ACCESS) { 4113 /* 4114 * We blocked access to the pages in this UPL. 4115 * Clear the "busy" bit and wake up any waiter 4116 * for this page. 4117 */ 4118 PAGE_WAKEUP_DONE(m); 4119 } 4120 goto commit_next_page; 4121 } 4122 /* 4123 * make sure to clear the hardware 4124 * modify or reference bits before 4125 * releasing the BUSY bit on this page 4126 * otherwise we risk losing a legitimate 4127 * change of state 4128 */ 4129 if (flags & UPL_COMMIT_CLEAR_DIRTY) { 4130 m->dirty = FALSE; 4131 4132 if (! (flags & UPL_COMMIT_CS_VALIDATED) && 4133 m->cs_validated && !m->cs_tainted) { 4134 /* 4135 * CODE SIGNING: 4136 * This page is no longer dirty 4137 * but could have been modified, 4138 * so it will need to be 4139 * re-validated. 4140 */ 4141 m->cs_validated = FALSE; 4142#if DEVELOPMENT || DEBUG 4143 vm_cs_validated_resets++; 4144#endif 4145 } 4146 clear_refmod |= VM_MEM_MODIFIED; 4147 } 4148 if (clear_refmod) 4149 pmap_clear_refmod(m->phys_page, clear_refmod); 4150 4151 if (page_list) { 4152 upl_page_info_t *p; 4153 4154 p = &(page_list[entry]); 4155 4156 if (p->phys_addr && p->pageout && !m->pageout) { 4157 m->busy = TRUE; 4158 m->pageout = TRUE; 4159 vm_page_wire(m); 4160 } else if (p->phys_addr && 4161 !p->pageout && m->pageout && 4162 !m->dump_cleaning) { 4163 m->pageout = FALSE; 4164 m->absent = FALSE; 4165 m->overwriting = FALSE; 4166 vm_page_unwire(m); 4167 4168 PAGE_WAKEUP_DONE(m); 4169 } 4170 page_list[entry].phys_addr = 0; 4171 } 4172 m->dump_cleaning = FALSE; 4173 4174 if (m->laundry) 4175 vm_pageout_throttle_up(m); 4176 4177 if (m->pageout) { 4178 m->cleaning = FALSE; 4179 m->encrypted_cleaning = FALSE; 4180 m->pageout = FALSE; 4181#if MACH_CLUSTER_STATS 4182 if (m->wanted) vm_pageout_target_collisions++; 4183#endif 4184 m->dirty = FALSE; 4185 4186 if (! (flags & UPL_COMMIT_CS_VALIDATED) && 4187 m->cs_validated && !m->cs_tainted) { 4188 /* 4189 * CODE SIGNING: 4190 * This page is no longer dirty 4191 * but could have been modified, 4192 * so it will need to be 4193 * re-validated. 4194 */ 4195 m->cs_validated = FALSE; 4196#if DEVELOPMENT || DEBUG 4197 vm_cs_validated_resets++; 4198#endif 4199 } 4200 4201 if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED)) 4202 m->dirty = TRUE; 4203 4204 if (m->dirty) { 4205 /* 4206 * page was re-dirtied after we started 4207 * the pageout... reactivate it since 4208 * we don't know whether the on-disk 4209 * copy matches what is now in memory 4210 */ 4211 vm_page_unwire(m); 4212 4213 if (upl->flags & UPL_PAGEOUT) { 4214 CLUSTER_STAT(vm_pageout_target_page_dirtied++;) 4215 VM_STAT_INCR(reactivations); 4216 DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL); 4217 } 4218 PAGE_WAKEUP_DONE(m); 4219 } else { 4220 /* 4221 * page has been successfully cleaned 4222 * go ahead and free it for other use 4223 */ 4224 4225 if (m->object->internal) { 4226 DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL); 4227 } else { 4228 DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL); 4229 } 4230 4231 vm_page_free(m); 4232 4233 if (upl->flags & UPL_PAGEOUT) { 4234 CLUSTER_STAT(vm_pageout_target_page_freed++;) 4235 4236 if (page_list[entry].dirty) { 4237 VM_STAT_INCR(pageouts); 4238 DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL); 4239 pgpgout_count++; 4240 } 4241 } 4242 } 4243 goto commit_next_page; 4244 } 4245#if MACH_CLUSTER_STATS 4246 if (m->wpmapped) 4247 m->dirty = pmap_is_modified(m->phys_page); 4248 4249 if (m->dirty) vm_pageout_cluster_dirtied++; 4250 else vm_pageout_cluster_cleaned++; 4251 if (m->wanted) vm_pageout_cluster_collisions++; 4252#endif 4253 m->dirty = FALSE; 4254 4255 if (! (flags & UPL_COMMIT_CS_VALIDATED) && 4256 m->cs_validated && !m->cs_tainted) { 4257 /* 4258 * CODE SIGNING: 4259 * This page is no longer dirty 4260 * but could have been modified, 4261 * so it will need to be 4262 * re-validated. 4263 */ 4264 m->cs_validated = FALSE; 4265#if DEVELOPMENT || DEBUG 4266 vm_cs_validated_resets++; 4267#endif 4268 } 4269 4270 if ((m->busy) && (m->cleaning)) { 4271 /* 4272 * the request_page_list case 4273 */ 4274 m->absent = FALSE; 4275 m->overwriting = FALSE; 4276 m->busy = FALSE; 4277 } else if (m->overwriting) { 4278 /* 4279 * alternate request page list, write to 4280 * page_list case. Occurs when the original 4281 * page was wired at the time of the list 4282 * request 4283 */ 4284 assert(m->wire_count != 0); 4285 vm_page_unwire(m);/* reactivates */ 4286 m->overwriting = FALSE; 4287 } 4288 m->cleaning = FALSE; 4289 m->encrypted_cleaning = FALSE; 4290 4291 /* 4292 * It is a part of the semantic of COPYOUT_FROM 4293 * UPLs that a commit implies cache sync 4294 * between the vm page and the backing store 4295 * this can be used to strip the precious bit 4296 * as well as clean 4297 */ 4298 if (upl->flags & UPL_PAGE_SYNC_DONE) 4299 m->precious = FALSE; 4300 4301 if (flags & UPL_COMMIT_SET_DIRTY) 4302 m->dirty = TRUE; 4303 4304 if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) { 4305 vm_page_deactivate(m); 4306 } else if (!m->active && !m->inactive && !m->speculative) { 4307 4308 if (m->clustered) 4309 vm_page_speculate(m, TRUE); 4310 else if (m->reference) 4311 vm_page_activate(m); 4312 else 4313 vm_page_deactivate(m); 4314 } 4315 if (flags & UPL_COMMIT_ALLOW_ACCESS) { 4316 /* 4317 * We blocked access to the pages in this URL. 4318 * Clear the "busy" bit on this page before we 4319 * wake up any waiter. 4320 */ 4321 m->busy = FALSE; 4322 } 4323 /* 4324 * Wakeup any thread waiting for the page to be un-cleaning. 4325 */ 4326 PAGE_WAKEUP(m); 4327 4328commit_next_page: 4329 target_offset += PAGE_SIZE_64; 4330 xfer_size -= PAGE_SIZE; 4331 entry++; 4332 4333 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) { 4334 /* 4335 * pageout_scan takes the vm_page_lock_queues first 4336 * then tries for the object lock... to avoid what 4337 * is effectively a lock inversion, we'll go to the 4338 * trouble of taking them in that same order... otherwise 4339 * if this object contains the majority of the pages resident 4340 * in the UBC (or a small set of large objects actively being 4341 * worked on contain the majority of the pages), we could 4342 * cause the pageout_scan thread to 'starve' in its attempt 4343 * to find pages to move to the free queue, since it has to 4344 * successfully acquire the object lock of any candidate page 4345 * before it can steal/clean it. 4346 */ 4347 vm_object_unlock(shadow_object); 4348 mutex_yield(&vm_page_queue_lock); 4349 4350 for (j = 0; ; j++) { 4351 if (vm_object_lock_try(shadow_object)) 4352 break; 4353 vm_page_unlock_queues(); 4354 mutex_pause(j); 4355 vm_page_lock_queues(); 4356 } 4357 delayed_unlock = 1; 4358 } 4359 } 4360 if (delayed_unlock) 4361 vm_page_unlock_queues(); 4362 4363 occupied = 1; 4364 4365 if (upl->flags & UPL_DEVICE_MEMORY) { 4366 occupied = 0; 4367 } else if (upl->flags & UPL_LITE) { 4368 int pg_num; 4369 int i; 4370 4371 pg_num = upl->size/PAGE_SIZE; 4372 pg_num = (pg_num + 31) >> 5; 4373 occupied = 0; 4374 4375 for (i = 0; i < pg_num; i++) { 4376 if (lite_list[i] != 0) { 4377 occupied = 1; 4378 break; 4379 } 4380 } 4381 } else { 4382 if (queue_empty(&upl->map_object->memq)) 4383 occupied = 0; 4384 } 4385 if (occupied == 0) { 4386 if (upl->flags & UPL_COMMIT_NOTIFY_EMPTY) 4387 *empty = TRUE; 4388 4389 if (object == shadow_object) { 4390 /* 4391 * this is not a paging object 4392 * so we need to drop the paging reference 4393 * that was taken when we created the UPL 4394 * against this object 4395 */ 4396 vm_object_paging_end(shadow_object); 4397 } else { 4398 /* 4399 * we dontated the paging reference to 4400 * the map object... vm_pageout_object_terminate 4401 * will drop this reference 4402 */ 4403 } 4404 } 4405 vm_object_unlock(shadow_object); 4406 if (object != shadow_object) 4407 vm_object_unlock(object); 4408 upl_unlock(upl); 4409 4410 if (pgpgout_count) { 4411 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL); 4412 } 4413 4414 return KERN_SUCCESS; 4415} 4416 4417kern_return_t 4418upl_abort_range( 4419 upl_t upl, 4420 upl_offset_t offset, 4421 upl_size_t size, 4422 int error, 4423 boolean_t *empty) 4424{ 4425 upl_size_t xfer_size; 4426 vm_object_t shadow_object; 4427 vm_object_t object; 4428 vm_object_offset_t target_offset; 4429 int entry; 4430 wpl_array_t lite_list; 4431 int occupied; 4432 int delayed_unlock = 0; 4433 int j; 4434 4435 *empty = FALSE; 4436 4437 if (upl == UPL_NULL) 4438 return KERN_INVALID_ARGUMENT; 4439 4440 if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) ) 4441 return upl_commit_range(upl, offset, size, 0, NULL, 0, empty); 4442 4443 if (upl->flags & UPL_DEVICE_MEMORY) 4444 xfer_size = 0; 4445 else if ((offset + size) <= upl->size) 4446 xfer_size = size; 4447 else 4448 return KERN_FAILURE; 4449 4450 upl_lock(upl); 4451 4452 if (upl->flags & UPL_INTERNAL) { 4453 lite_list = (wpl_array_t) 4454 ((((uintptr_t)upl) + sizeof(struct upl)) 4455 + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t))); 4456 } else { 4457 lite_list = (wpl_array_t) 4458 (((uintptr_t)upl) + sizeof(struct upl)); 4459 } 4460 object = upl->map_object; 4461 4462 if (upl->flags & UPL_SHADOWED) { 4463 vm_object_lock(object); 4464 shadow_object = object->shadow; 4465 } else 4466 shadow_object = object; 4467 4468 entry = offset/PAGE_SIZE; 4469 target_offset = (vm_object_offset_t)offset; 4470 4471 /* 4472 * pageout_scan takes the vm_page_lock_queues first 4473 * then tries for the object lock... to avoid what 4474 * is effectively a lock inversion, we'll go to the 4475 * trouble of taking them in that same order... otherwise 4476 * if this object contains the majority of the pages resident 4477 * in the UBC (or a small set of large objects actively being 4478 * worked on contain the majority of the pages), we could 4479 * cause the pageout_scan thread to 'starve' in its attempt 4480 * to find pages to move to the free queue, since it has to 4481 * successfully acquire the object lock of any candidate page 4482 * before it can steal/clean it. 4483 */ 4484 for (j = 0; ; j++) { 4485 vm_page_lock_queues(); 4486 4487 if (vm_object_lock_try(shadow_object)) 4488 break; 4489 vm_page_unlock_queues(); 4490 mutex_pause(j); 4491 } 4492 delayed_unlock = 1; 4493 4494 while (xfer_size) { 4495 vm_page_t t, m; 4496 4497 m = VM_PAGE_NULL; 4498 4499 if (upl->flags & UPL_LITE) { 4500 int pg_num; 4501 pg_num = target_offset/PAGE_SIZE; 4502 4503 if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) { 4504 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31)); 4505 4506 m = vm_page_lookup(shadow_object, target_offset + 4507 (upl->offset - shadow_object->paging_offset)); 4508 } 4509 } 4510 if (upl->flags & UPL_SHADOWED) { 4511 if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) { 4512 t->pageout = FALSE; 4513 4514 vm_page_free(t); 4515 4516 if (m == VM_PAGE_NULL) 4517 m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset); 4518 } 4519 } 4520 if (m != VM_PAGE_NULL) { 4521 4522 if (m->absent) { 4523 boolean_t must_free = TRUE; 4524 4525 m->clustered = FALSE; 4526 /* 4527 * COPYOUT = FALSE case 4528 * check for error conditions which must 4529 * be passed back to the pages customer 4530 */ 4531 if (error & UPL_ABORT_RESTART) { 4532 m->restart = TRUE; 4533 m->absent = FALSE; 4534 m->error = TRUE; 4535 m->unusual = TRUE; 4536 must_free = FALSE; 4537 } else if (error & UPL_ABORT_UNAVAILABLE) { 4538 m->restart = FALSE; 4539 m->unusual = TRUE; 4540 must_free = FALSE; 4541 } else if (error & UPL_ABORT_ERROR) { 4542 m->restart = FALSE; 4543 m->absent = FALSE; 4544 m->error = TRUE; 4545 m->unusual = TRUE; 4546 must_free = FALSE; 4547 } 4548 4549 /* 4550 * ENCRYPTED SWAP: 4551 * If the page was already encrypted, 4552 * we don't really need to decrypt it 4553 * now. It will get decrypted later, 4554 * on demand, as soon as someone needs 4555 * to access its contents. 4556 */ 4557 4558 m->cleaning = FALSE; 4559 m->encrypted_cleaning = FALSE; 4560 m->overwriting = FALSE; 4561 PAGE_WAKEUP_DONE(m); 4562 4563 if (must_free == TRUE) 4564 vm_page_free(m); 4565 else 4566 vm_page_activate(m); 4567 } else { 4568 /* 4569 * Handle the trusted pager throttle. 4570 */ 4571 if (m->laundry) 4572 vm_pageout_throttle_up(m); 4573 4574 if (m->pageout) { 4575 assert(m->busy); 4576 assert(m->wire_count == 1); 4577 m->pageout = FALSE; 4578 vm_page_unwire(m); 4579 } 4580 m->dump_cleaning = FALSE; 4581 m->cleaning = FALSE; 4582 m->encrypted_cleaning = FALSE; 4583 m->overwriting = FALSE; 4584#if MACH_PAGEMAP 4585 vm_external_state_clr(m->object->existence_map, m->offset); 4586#endif /* MACH_PAGEMAP */ 4587 if (error & UPL_ABORT_DUMP_PAGES) { 4588 pmap_disconnect(m->phys_page); 4589 vm_page_free(m); 4590 } else { 4591 if (error & UPL_ABORT_REFERENCE) { 4592 /* 4593 * we've been told to explictly 4594 * reference this page... for 4595 * file I/O, this is done by 4596 * implementing an LRU on the inactive q 4597 */ 4598 vm_page_lru(m); 4599 } 4600 PAGE_WAKEUP_DONE(m); 4601 } 4602 } 4603 } 4604 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) { 4605 /* 4606 * pageout_scan takes the vm_page_lock_queues first 4607 * then tries for the object lock... to avoid what 4608 * is effectively a lock inversion, we'll go to the 4609 * trouble of taking them in that same order... otherwise 4610 * if this object contains the majority of the pages resident 4611 * in the UBC (or a small set of large objects actively being 4612 * worked on contain the majority of the pages), we could 4613 * cause the pageout_scan thread to 'starve' in its attempt 4614 * to find pages to move to the free queue, since it has to 4615 * successfully acquire the object lock of any candidate page 4616 * before it can steal/clean it. 4617 */ 4618 vm_object_unlock(shadow_object); 4619 mutex_yield(&vm_page_queue_lock); 4620 4621 for (j = 0; ; j++) { 4622 if (vm_object_lock_try(shadow_object)) 4623 break; 4624 vm_page_unlock_queues(); 4625 mutex_pause(j); 4626 vm_page_lock_queues(); 4627 } 4628 delayed_unlock = 1; 4629 } 4630 target_offset += PAGE_SIZE_64; 4631 xfer_size -= PAGE_SIZE; 4632 entry++; 4633 } 4634 if (delayed_unlock) 4635 vm_page_unlock_queues(); 4636 4637 occupied = 1; 4638 4639 if (upl->flags & UPL_DEVICE_MEMORY) { 4640 occupied = 0; 4641 } else if (upl->flags & UPL_LITE) { 4642 int pg_num; 4643 int i; 4644 4645 pg_num = upl->size/PAGE_SIZE; 4646 pg_num = (pg_num + 31) >> 5; 4647 occupied = 0; 4648 4649 for (i = 0; i < pg_num; i++) { 4650 if (lite_list[i] != 0) { 4651 occupied = 1; 4652 break; 4653 } 4654 } 4655 } else { 4656 if (queue_empty(&upl->map_object->memq)) 4657 occupied = 0; 4658 } 4659 if (occupied == 0) { 4660 if (upl->flags & UPL_COMMIT_NOTIFY_EMPTY) 4661 *empty = TRUE; 4662 4663 if (object == shadow_object) { 4664 /* 4665 * this is not a paging object 4666 * so we need to drop the paging reference 4667 * that was taken when we created the UPL 4668 * against this object 4669 */ 4670 vm_object_paging_end(shadow_object); 4671 } else { 4672 /* 4673 * we dontated the paging reference to 4674 * the map object... vm_pageout_object_terminate 4675 * will drop this reference 4676 */ 4677 } 4678 } 4679 vm_object_unlock(shadow_object); 4680 if (object != shadow_object) 4681 vm_object_unlock(object); 4682 upl_unlock(upl); 4683 4684 return KERN_SUCCESS; 4685} 4686 4687 4688kern_return_t 4689upl_abort( 4690 upl_t upl, 4691 int error) 4692{ 4693 boolean_t empty; 4694 4695 return upl_abort_range(upl, 0, upl->size, error, &empty); 4696} 4697 4698 4699/* an option on commit should be wire */ 4700kern_return_t 4701upl_commit( 4702 upl_t upl, 4703 upl_page_info_t *page_list, 4704 mach_msg_type_number_t count) 4705{ 4706 boolean_t empty; 4707 4708 return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty); 4709} 4710 4711 4712kern_return_t 4713vm_object_iopl_request( 4714 vm_object_t object, 4715 vm_object_offset_t offset, 4716 upl_size_t size, 4717 upl_t *upl_ptr, 4718 upl_page_info_array_t user_page_list, 4719 unsigned int *page_list_count, 4720 int cntrl_flags) 4721{ 4722 vm_page_t dst_page; 4723 vm_object_offset_t dst_offset; 4724 upl_size_t xfer_size; 4725 upl_t upl = NULL; 4726 unsigned int entry; 4727 wpl_array_t lite_list = NULL; 4728 int delayed_unlock = 0; 4729 int no_zero_fill = FALSE; 4730 u_int32_t psize; 4731 kern_return_t ret; 4732 vm_prot_t prot; 4733 struct vm_object_fault_info fault_info; 4734 4735 4736 if (cntrl_flags & ~UPL_VALID_FLAGS) { 4737 /* 4738 * For forward compatibility's sake, 4739 * reject any unknown flag. 4740 */ 4741 return KERN_INVALID_VALUE; 4742 } 4743 if (vm_lopage_poolsize == 0) 4744 cntrl_flags &= ~UPL_NEED_32BIT_ADDR; 4745 4746 if (cntrl_flags & UPL_NEED_32BIT_ADDR) { 4747 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE)) 4748 return KERN_INVALID_VALUE; 4749 4750 if (object->phys_contiguous) { 4751 if ((offset + object->shadow_offset) >= (vm_object_offset_t)max_valid_dma_address) 4752 return KERN_INVALID_ADDRESS; 4753 4754 if (((offset + object->shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address) 4755 return KERN_INVALID_ADDRESS; 4756 } 4757 } 4758 4759 if (cntrl_flags & UPL_ENCRYPT) { 4760 /* 4761 * ENCRYPTED SWAP: 4762 * The paging path doesn't use this interface, 4763 * so we don't support the UPL_ENCRYPT flag 4764 * here. We won't encrypt the pages. 4765 */ 4766 assert(! (cntrl_flags & UPL_ENCRYPT)); 4767 } 4768 if (cntrl_flags & UPL_NOZEROFILL) 4769 no_zero_fill = TRUE; 4770 4771 if (cntrl_flags & UPL_COPYOUT_FROM) 4772 prot = VM_PROT_READ; 4773 else 4774 prot = VM_PROT_READ | VM_PROT_WRITE; 4775 4776 if (((size/page_size) > MAX_UPL_SIZE) && !object->phys_contiguous) 4777 size = MAX_UPL_SIZE * page_size; 4778 4779 if (cntrl_flags & UPL_SET_INTERNAL) { 4780 if (page_list_count != NULL) 4781 *page_list_count = MAX_UPL_SIZE; 4782 } 4783 if (((cntrl_flags & UPL_SET_INTERNAL) && !(object->phys_contiguous)) && 4784 ((page_list_count != NULL) && (*page_list_count != 0) && *page_list_count < (size/page_size))) 4785 return KERN_INVALID_ARGUMENT; 4786 4787 if ((!object->internal) && (object->paging_offset != 0)) 4788 panic("vm_object_iopl_request: external object with non-zero paging offset\n"); 4789 4790 4791 if (object->phys_contiguous) 4792 psize = PAGE_SIZE; 4793 else 4794 psize = size; 4795 4796 if (cntrl_flags & UPL_SET_INTERNAL) { 4797 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE, UPL_IO_WIRE, psize); 4798 4799 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl)); 4800 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) + 4801 ((psize / PAGE_SIZE) * sizeof(upl_page_info_t))); 4802 } else { 4803 upl = upl_create(UPL_CREATE_LITE, UPL_IO_WIRE, psize); 4804 4805 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl)); 4806 } 4807 if (user_page_list) 4808 user_page_list[0].device = FALSE; 4809 *upl_ptr = upl; 4810 4811 upl->map_object = object; 4812 upl->size = size; 4813 4814 vm_object_lock(object); 4815 vm_object_paging_begin(object); 4816 /* 4817 * paging in progress also protects the paging_offset 4818 */ 4819 upl->offset = offset + object->paging_offset; 4820 4821 if (object->phys_contiguous) { 4822#ifdef UPL_DEBUG 4823 queue_enter(&object->uplq, upl, upl_t, uplq); 4824#endif /* UPL_DEBUG */ 4825 4826 vm_object_unlock(object); 4827 4828 /* 4829 * don't need any shadow mappings for this one 4830 * since it is already I/O memory 4831 */ 4832 upl->flags |= UPL_DEVICE_MEMORY; 4833 4834 upl->highest_page = (offset + object->shadow_offset + size - 1)>>PAGE_SHIFT; 4835 4836 if (user_page_list) { 4837 user_page_list[0].phys_addr = (offset + object->shadow_offset)>>PAGE_SHIFT; 4838 user_page_list[0].device = TRUE; 4839 } 4840 if (page_list_count != NULL) { 4841 if (upl->flags & UPL_INTERNAL) 4842 *page_list_count = 0; 4843 else 4844 *page_list_count = 1; 4845 } 4846 return KERN_SUCCESS; 4847 } 4848 /* 4849 * Protect user space from future COW operations 4850 */ 4851 object->true_share = TRUE; 4852 4853 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) 4854 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; 4855 4856#ifdef UPL_DEBUG 4857 queue_enter(&object->uplq, upl, upl_t, uplq); 4858#endif /* UPL_DEBUG */ 4859 4860 if (cntrl_flags & UPL_BLOCK_ACCESS) { 4861 /* 4862 * The user requested that access to the pages in this URL 4863 * be blocked until the UPL is commited or aborted. 4864 */ 4865 upl->flags |= UPL_ACCESS_BLOCKED; 4866 } 4867 entry = 0; 4868 4869 xfer_size = size; 4870 dst_offset = offset; 4871 4872 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; 4873 fault_info.user_tag = 0; 4874 fault_info.lo_offset = offset; 4875 fault_info.hi_offset = offset + xfer_size; 4876 fault_info.no_cache = FALSE; 4877 4878 while (xfer_size) { 4879 vm_fault_return_t result; 4880 int pg_num; 4881 4882 dst_page = vm_page_lookup(object, dst_offset); 4883 4884 /* 4885 * ENCRYPTED SWAP: 4886 * If the page is encrypted, we need to decrypt it, 4887 * so force a soft page fault. 4888 */ 4889 if ((dst_page == VM_PAGE_NULL) || (dst_page->busy) || 4890 (dst_page->encrypted) || 4891 (dst_page->unusual && (dst_page->error || 4892 dst_page->restart || 4893 dst_page->absent || 4894 dst_page->fictitious))) { 4895 4896 do { 4897 vm_page_t top_page; 4898 kern_return_t error_code; 4899 int interruptible; 4900 4901 if (delayed_unlock) { 4902 delayed_unlock = 0; 4903 vm_page_unlock_queues(); 4904 } 4905 if (cntrl_flags & UPL_SET_INTERRUPTIBLE) 4906 interruptible = THREAD_ABORTSAFE; 4907 else 4908 interruptible = THREAD_UNINT; 4909 4910 fault_info.interruptible = interruptible; 4911 fault_info.cluster_size = xfer_size; 4912 4913 result = vm_fault_page(object, dst_offset, 4914 prot | VM_PROT_WRITE, FALSE, 4915 &prot, &dst_page, &top_page, 4916 (int *)0, 4917 &error_code, no_zero_fill, 4918 FALSE, &fault_info); 4919 4920 switch (result) { 4921 4922 case VM_FAULT_SUCCESS: 4923 4924 PAGE_WAKEUP_DONE(dst_page); 4925 /* 4926 * Release paging references and 4927 * top-level placeholder page, if any. 4928 */ 4929 if (top_page != VM_PAGE_NULL) { 4930 vm_object_t local_object; 4931 4932 local_object = top_page->object; 4933 4934 if (top_page->object != dst_page->object) { 4935 vm_object_lock(local_object); 4936 VM_PAGE_FREE(top_page); 4937 vm_object_paging_end(local_object); 4938 vm_object_unlock(local_object); 4939 } else { 4940 VM_PAGE_FREE(top_page); 4941 vm_object_paging_end(local_object); 4942 } 4943 } 4944 break; 4945 4946 case VM_FAULT_RETRY: 4947 vm_object_lock(object); 4948 vm_object_paging_begin(object); 4949 break; 4950 4951 case VM_FAULT_FICTITIOUS_SHORTAGE: 4952 vm_page_more_fictitious(); 4953 4954 vm_object_lock(object); 4955 vm_object_paging_begin(object); 4956 break; 4957 4958 case VM_FAULT_MEMORY_SHORTAGE: 4959 if (vm_page_wait(interruptible)) { 4960 vm_object_lock(object); 4961 vm_object_paging_begin(object); 4962 break; 4963 } 4964 /* fall thru */ 4965 4966 case VM_FAULT_INTERRUPTED: 4967 error_code = MACH_SEND_INTERRUPTED; 4968 case VM_FAULT_MEMORY_ERROR: 4969 ret = (error_code ? error_code: KERN_MEMORY_ERROR); 4970 4971 vm_object_lock(object); 4972 vm_object_paging_begin(object); 4973 goto return_err; 4974 } 4975 } while (result != VM_FAULT_SUCCESS); 4976 } 4977 4978 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) && 4979 dst_page->phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) { 4980 vm_page_t low_page; 4981 int refmod; 4982 4983 /* 4984 * support devices that can't DMA above 32 bits 4985 * by substituting pages from a pool of low address 4986 * memory for any pages we find above the 4G mark 4987 * can't substitute if the page is already wired because 4988 * we don't know whether that physical address has been 4989 * handed out to some other 64 bit capable DMA device to use 4990 */ 4991 if (dst_page->wire_count) { 4992 ret = KERN_PROTECTION_FAILURE; 4993 goto return_err; 4994 } 4995 if (delayed_unlock) { 4996 delayed_unlock = 0; 4997 vm_page_unlock_queues(); 4998 } 4999 low_page = vm_page_grablo(); 5000 5001 if (low_page == VM_PAGE_NULL) { 5002 ret = KERN_RESOURCE_SHORTAGE; 5003 goto return_err; 5004 } 5005 /* 5006 * from here until the vm_page_replace completes 5007 * we musn't drop the object lock... we don't 5008 * want anyone refaulting this page in and using 5009 * it after we disconnect it... we want the fault 5010 * to find the new page being substituted. 5011 */ 5012 if (dst_page->pmapped) 5013 refmod = pmap_disconnect(dst_page->phys_page); 5014 else 5015 refmod = 0; 5016 vm_page_copy(dst_page, low_page); 5017 5018 low_page->reference = dst_page->reference; 5019 low_page->dirty = dst_page->dirty; 5020 5021 if (refmod & VM_MEM_REFERENCED) 5022 low_page->reference = TRUE; 5023 if (refmod & VM_MEM_MODIFIED) 5024 low_page->dirty = TRUE; 5025 5026 vm_page_lock_queues(); 5027 vm_page_replace(low_page, object, dst_offset); 5028 /* 5029 * keep the queue lock since we're going to 5030 * need it immediately 5031 */ 5032 delayed_unlock = 1; 5033 5034 dst_page = low_page; 5035 /* 5036 * vm_page_grablo returned the page marked 5037 * BUSY... we don't need a PAGE_WAKEUP_DONE 5038 * here, because we've never dropped the object lock 5039 */ 5040 dst_page->busy = FALSE; 5041 } 5042 if (delayed_unlock == 0) 5043 vm_page_lock_queues(); 5044 5045 vm_page_wire(dst_page); 5046 5047 if (cntrl_flags & UPL_BLOCK_ACCESS) { 5048 /* 5049 * Mark the page "busy" to block any future page fault 5050 * on this page. We'll also remove the mapping 5051 * of all these pages before leaving this routine. 5052 */ 5053 assert(!dst_page->fictitious); 5054 dst_page->busy = TRUE; 5055 } 5056 pg_num = (dst_offset-offset)/PAGE_SIZE; 5057 lite_list[pg_num>>5] |= 1 << (pg_num & 31); 5058 5059 /* 5060 * expect the page to be used 5061 * page queues lock must be held to set 'reference' 5062 */ 5063 dst_page->reference = TRUE; 5064 5065 if (!(cntrl_flags & UPL_COPYOUT_FROM)) 5066 dst_page->dirty = TRUE; 5067 5068 if (dst_page->phys_page > upl->highest_page) 5069 upl->highest_page = dst_page->phys_page; 5070 5071 if (user_page_list) { 5072 user_page_list[entry].phys_addr = dst_page->phys_page; 5073 user_page_list[entry].pageout = dst_page->pageout; 5074 user_page_list[entry].absent = dst_page->absent; 5075 user_page_list[entry].dirty = dst_page->dirty; 5076 user_page_list[entry].precious = dst_page->precious; 5077 user_page_list[entry].device = FALSE; 5078 if (dst_page->clustered == TRUE) 5079 user_page_list[entry].speculative = dst_page->speculative; 5080 else 5081 user_page_list[entry].speculative = FALSE; 5082 user_page_list[entry].cs_validated = dst_page->cs_validated; 5083 user_page_list[entry].cs_tainted = dst_page->cs_tainted; 5084 } 5085 /* 5086 * someone is explicitly grabbing this page... 5087 * update clustered and speculative state 5088 * 5089 */ 5090 VM_PAGE_CONSUME_CLUSTERED(dst_page); 5091 5092 if (delayed_unlock++ > UPL_DELAYED_UNLOCK_LIMIT) { 5093 mutex_yield(&vm_page_queue_lock); 5094 delayed_unlock = 1; 5095 } 5096 entry++; 5097 dst_offset += PAGE_SIZE_64; 5098 xfer_size -= PAGE_SIZE; 5099 } 5100 if (delayed_unlock) 5101 vm_page_unlock_queues(); 5102 5103 if (page_list_count != NULL) { 5104 if (upl->flags & UPL_INTERNAL) 5105 *page_list_count = 0; 5106 else if (*page_list_count > entry) 5107 *page_list_count = entry; 5108 } 5109 vm_object_unlock(object); 5110 5111 if (cntrl_flags & UPL_BLOCK_ACCESS) { 5112 /* 5113 * We've marked all the pages "busy" so that future 5114 * page faults will block. 5115 * Now remove the mapping for these pages, so that they 5116 * can't be accessed without causing a page fault. 5117 */ 5118 vm_object_pmap_protect(object, offset, (vm_object_size_t)size, 5119 PMAP_NULL, 0, VM_PROT_NONE); 5120 } 5121 return KERN_SUCCESS; 5122 5123return_err: 5124 if (delayed_unlock) 5125 vm_page_unlock_queues(); 5126 5127 for (; offset < dst_offset; offset += PAGE_SIZE) { 5128 dst_page = vm_page_lookup(object, offset); 5129 5130 if (dst_page == VM_PAGE_NULL) 5131 panic("vm_object_iopl_request: Wired pages missing. \n"); 5132 5133 vm_page_lockspin_queues(); 5134 vm_page_unwire(dst_page); 5135 vm_page_unlock_queues(); 5136 5137 VM_STAT_INCR(reactivations); 5138 } 5139 vm_object_paging_end(object); 5140 vm_object_unlock(object); 5141 upl_destroy(upl); 5142 5143 return ret; 5144} 5145 5146kern_return_t 5147upl_transpose( 5148 upl_t upl1, 5149 upl_t upl2) 5150{ 5151 kern_return_t retval; 5152 boolean_t upls_locked; 5153 vm_object_t object1, object2; 5154 5155 if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2) { 5156 return KERN_INVALID_ARGUMENT; 5157 } 5158 5159 upls_locked = FALSE; 5160 5161 /* 5162 * Since we need to lock both UPLs at the same time, 5163 * avoid deadlocks by always taking locks in the same order. 5164 */ 5165 if (upl1 < upl2) { 5166 upl_lock(upl1); 5167 upl_lock(upl2); 5168 } else { 5169 upl_lock(upl2); 5170 upl_lock(upl1); 5171 } 5172 upls_locked = TRUE; /* the UPLs will need to be unlocked */ 5173 5174 object1 = upl1->map_object; 5175 object2 = upl2->map_object; 5176 5177 if (upl1->offset != 0 || upl2->offset != 0 || 5178 upl1->size != upl2->size) { 5179 /* 5180 * We deal only with full objects, not subsets. 5181 * That's because we exchange the entire backing store info 5182 * for the objects: pager, resident pages, etc... We can't do 5183 * only part of it. 5184 */ 5185 retval = KERN_INVALID_VALUE; 5186 goto done; 5187 } 5188 5189 /* 5190 * Tranpose the VM objects' backing store. 5191 */ 5192 retval = vm_object_transpose(object1, object2, 5193 (vm_object_size_t) upl1->size); 5194 5195 if (retval == KERN_SUCCESS) { 5196 /* 5197 * Make each UPL point to the correct VM object, i.e. the 5198 * object holding the pages that the UPL refers to... 5199 */ 5200#ifdef UPL_DEBUG 5201 queue_remove(&object1->uplq, upl1, upl_t, uplq); 5202 queue_remove(&object2->uplq, upl2, upl_t, uplq); 5203#endif 5204 upl1->map_object = object2; 5205 upl2->map_object = object1; 5206#ifdef UPL_DEBUG 5207 queue_enter(&object1->uplq, upl2, upl_t, uplq); 5208 queue_enter(&object2->uplq, upl1, upl_t, uplq); 5209#endif 5210 } 5211 5212done: 5213 /* 5214 * Cleanup. 5215 */ 5216 if (upls_locked) { 5217 upl_unlock(upl1); 5218 upl_unlock(upl2); 5219 upls_locked = FALSE; 5220 } 5221 5222 return retval; 5223} 5224 5225/* 5226 * ENCRYPTED SWAP: 5227 * 5228 * Rationale: the user might have some encrypted data on disk (via 5229 * FileVault or any other mechanism). That data is then decrypted in 5230 * memory, which is safe as long as the machine is secure. But that 5231 * decrypted data in memory could be paged out to disk by the default 5232 * pager. The data would then be stored on disk in clear (not encrypted) 5233 * and it could be accessed by anyone who gets physical access to the 5234 * disk (if the laptop or the disk gets stolen for example). This weakens 5235 * the security offered by FileVault. 5236 * 5237 * Solution: the default pager will optionally request that all the 5238 * pages it gathers for pageout be encrypted, via the UPL interfaces, 5239 * before it sends this UPL to disk via the vnode_pageout() path. 5240 * 5241 * Notes: 5242 * 5243 * To avoid disrupting the VM LRU algorithms, we want to keep the 5244 * clean-in-place mechanisms, which allow us to send some extra pages to 5245 * swap (clustering) without actually removing them from the user's 5246 * address space. We don't want the user to unknowingly access encrypted 5247 * data, so we have to actually remove the encrypted pages from the page 5248 * table. When the user accesses the data, the hardware will fail to 5249 * locate the virtual page in its page table and will trigger a page 5250 * fault. We can then decrypt the page and enter it in the page table 5251 * again. Whenever we allow the user to access the contents of a page, 5252 * we have to make sure it's not encrypted. 5253 * 5254 * 5255 */ 5256/* 5257 * ENCRYPTED SWAP: 5258 * Reserve of virtual addresses in the kernel address space. 5259 * We need to map the physical pages in the kernel, so that we 5260 * can call the encryption/decryption routines with a kernel 5261 * virtual address. We keep this pool of pre-allocated kernel 5262 * virtual addresses so that we don't have to scan the kernel's 5263 * virtaul address space each time we need to encrypt or decrypt 5264 * a physical page. 5265 * It would be nice to be able to encrypt and decrypt in physical 5266 * mode but that might not always be more efficient... 5267 */ 5268decl_simple_lock_data(,vm_paging_lock) 5269#define VM_PAGING_NUM_PAGES 64 5270vm_map_offset_t vm_paging_base_address = 0; 5271boolean_t vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, }; 5272int vm_paging_max_index = 0; 5273int vm_paging_page_waiter = 0; 5274int vm_paging_page_waiter_total = 0; 5275unsigned long vm_paging_no_kernel_page = 0; 5276unsigned long vm_paging_objects_mapped = 0; 5277unsigned long vm_paging_pages_mapped = 0; 5278unsigned long vm_paging_objects_mapped_slow = 0; 5279unsigned long vm_paging_pages_mapped_slow = 0; 5280 5281void 5282vm_paging_map_init(void) 5283{ 5284 kern_return_t kr; 5285 vm_map_offset_t page_map_offset; 5286 vm_map_entry_t map_entry; 5287 5288 assert(vm_paging_base_address == 0); 5289 5290 /* 5291 * Initialize our pool of pre-allocated kernel 5292 * virtual addresses. 5293 */ 5294 page_map_offset = 0; 5295 kr = vm_map_find_space(kernel_map, 5296 &page_map_offset, 5297 VM_PAGING_NUM_PAGES * PAGE_SIZE, 5298 0, 5299 0, 5300 &map_entry); 5301 if (kr != KERN_SUCCESS) { 5302 panic("vm_paging_map_init: kernel_map full\n"); 5303 } 5304 map_entry->object.vm_object = kernel_object; 5305 map_entry->offset = 5306 page_map_offset - VM_MIN_KERNEL_ADDRESS; 5307 vm_object_reference(kernel_object); 5308 vm_map_unlock(kernel_map); 5309 5310 assert(vm_paging_base_address == 0); 5311 vm_paging_base_address = page_map_offset; 5312} 5313 5314/* 5315 * ENCRYPTED SWAP: 5316 * vm_paging_map_object: 5317 * Maps part of a VM object's pages in the kernel 5318 * virtual address space, using the pre-allocated 5319 * kernel virtual addresses, if possible. 5320 * Context: 5321 * The VM object is locked. This lock will get 5322 * dropped and re-acquired though, so the caller 5323 * must make sure the VM object is kept alive 5324 * (by holding a VM map that has a reference 5325 * on it, for example, or taking an extra reference). 5326 * The page should also be kept busy to prevent 5327 * it from being reclaimed. 5328 */ 5329kern_return_t 5330vm_paging_map_object( 5331 vm_map_offset_t *address, 5332 vm_page_t page, 5333 vm_object_t object, 5334 vm_object_offset_t offset, 5335 vm_map_size_t *size, 5336 vm_prot_t protection, 5337 boolean_t can_unlock_object) 5338{ 5339 kern_return_t kr; 5340 vm_map_offset_t page_map_offset; 5341 vm_map_size_t map_size; 5342 vm_object_offset_t object_offset; 5343 int i; 5344 5345 5346 if (page != VM_PAGE_NULL && *size == PAGE_SIZE) { 5347 assert(page->busy); 5348 /* 5349 * Use one of the pre-allocated kernel virtual addresses 5350 * and just enter the VM page in the kernel address space 5351 * at that virtual address. 5352 */ 5353 simple_lock(&vm_paging_lock); 5354 5355 /* 5356 * Try and find an available kernel virtual address 5357 * from our pre-allocated pool. 5358 */ 5359 page_map_offset = 0; 5360 for (;;) { 5361 for (i = 0; i < VM_PAGING_NUM_PAGES; i++) { 5362 if (vm_paging_page_inuse[i] == FALSE) { 5363 page_map_offset = 5364 vm_paging_base_address + 5365 (i * PAGE_SIZE); 5366 break; 5367 } 5368 } 5369 if (page_map_offset != 0) { 5370 /* found a space to map our page ! */ 5371 break; 5372 } 5373 5374 if (can_unlock_object) { 5375 /* 5376 * If we can afford to unlock the VM object, 5377 * let's take the slow path now... 5378 */ 5379 break; 5380 } 5381 /* 5382 * We can't afford to unlock the VM object, so 5383 * let's wait for a space to become available... 5384 */ 5385 vm_paging_page_waiter_total++; 5386 vm_paging_page_waiter++; 5387 thread_sleep_fast_usimple_lock(&vm_paging_page_waiter, 5388 &vm_paging_lock, 5389 THREAD_UNINT); 5390 vm_paging_page_waiter--; 5391 /* ... and try again */ 5392 } 5393 5394 if (page_map_offset != 0) { 5395 /* 5396 * We found a kernel virtual address; 5397 * map the physical page to that virtual address. 5398 */ 5399 if (i > vm_paging_max_index) { 5400 vm_paging_max_index = i; 5401 } 5402 vm_paging_page_inuse[i] = TRUE; 5403 simple_unlock(&vm_paging_lock); 5404 5405 if (page->pmapped == FALSE) { 5406 pmap_sync_page_data_phys(page->phys_page); 5407 } 5408 page->pmapped = TRUE; 5409 5410 /* 5411 * Keep the VM object locked over the PMAP_ENTER 5412 * and the actual use of the page by the kernel, 5413 * or this pmap mapping might get undone by a 5414 * vm_object_pmap_protect() call... 5415 */ 5416 PMAP_ENTER(kernel_pmap, 5417 page_map_offset, 5418 page, 5419 protection, 5420 ((int) page->object->wimg_bits & 5421 VM_WIMG_MASK), 5422 TRUE); 5423 vm_paging_objects_mapped++; 5424 vm_paging_pages_mapped++; 5425 *address = page_map_offset; 5426 5427 /* all done and mapped, ready to use ! */ 5428 return KERN_SUCCESS; 5429 } 5430 5431 /* 5432 * We ran out of pre-allocated kernel virtual 5433 * addresses. Just map the page in the kernel 5434 * the slow and regular way. 5435 */ 5436 vm_paging_no_kernel_page++; 5437 simple_unlock(&vm_paging_lock); 5438 } 5439 5440 if (! can_unlock_object) { 5441 return KERN_NOT_SUPPORTED; 5442 } 5443 5444 object_offset = vm_object_trunc_page(offset); 5445 map_size = vm_map_round_page(*size); 5446 5447 /* 5448 * Try and map the required range of the object 5449 * in the kernel_map 5450 */ 5451 5452 vm_object_reference_locked(object); /* for the map entry */ 5453 vm_object_unlock(object); 5454 5455 kr = vm_map_enter(kernel_map, 5456 address, 5457 map_size, 5458 0, 5459 VM_FLAGS_ANYWHERE, 5460 object, 5461 object_offset, 5462 FALSE, 5463 protection, 5464 VM_PROT_ALL, 5465 VM_INHERIT_NONE); 5466 if (kr != KERN_SUCCESS) { 5467 *address = 0; 5468 *size = 0; 5469 vm_object_deallocate(object); /* for the map entry */ 5470 vm_object_lock(object); 5471 return kr; 5472 } 5473 5474 *size = map_size; 5475 5476 /* 5477 * Enter the mapped pages in the page table now. 5478 */ 5479 vm_object_lock(object); 5480 /* 5481 * VM object must be kept locked from before PMAP_ENTER() 5482 * until after the kernel is done accessing the page(s). 5483 * Otherwise, the pmap mappings in the kernel could be 5484 * undone by a call to vm_object_pmap_protect(). 5485 */ 5486 5487 for (page_map_offset = 0; 5488 map_size != 0; 5489 map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) { 5490 unsigned int cache_attr; 5491 5492 page = vm_page_lookup(object, offset + page_map_offset); 5493 if (page == VM_PAGE_NULL) { 5494 printf("vm_paging_map_object: no page !?"); 5495 vm_object_unlock(object); 5496 kr = vm_map_remove(kernel_map, *address, *size, 5497 VM_MAP_NO_FLAGS); 5498 assert(kr == KERN_SUCCESS); 5499 *address = 0; 5500 *size = 0; 5501 vm_object_lock(object); 5502 return KERN_MEMORY_ERROR; 5503 } 5504 if (page->pmapped == FALSE) { 5505 pmap_sync_page_data_phys(page->phys_page); 5506 } 5507 page->pmapped = TRUE; 5508 cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK; 5509 5510 //assert(pmap_verify_free(page->phys_page)); 5511 PMAP_ENTER(kernel_pmap, 5512 *address + page_map_offset, 5513 page, 5514 protection, 5515 cache_attr, 5516 TRUE); 5517 } 5518 5519 vm_paging_objects_mapped_slow++; 5520 vm_paging_pages_mapped_slow += map_size / PAGE_SIZE_64; 5521 5522 return KERN_SUCCESS; 5523} 5524 5525/* 5526 * ENCRYPTED SWAP: 5527 * vm_paging_unmap_object: 5528 * Unmaps part of a VM object's pages from the kernel 5529 * virtual address space. 5530 * Context: 5531 * The VM object is locked. This lock will get 5532 * dropped and re-acquired though. 5533 */ 5534void 5535vm_paging_unmap_object( 5536 vm_object_t object, 5537 vm_map_offset_t start, 5538 vm_map_offset_t end) 5539{ 5540 kern_return_t kr; 5541 int i; 5542 5543 if ((vm_paging_base_address == 0) || 5544 (start < vm_paging_base_address) || 5545 (end > (vm_paging_base_address 5546 + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) { 5547 /* 5548 * We didn't use our pre-allocated pool of 5549 * kernel virtual address. Deallocate the 5550 * virtual memory. 5551 */ 5552 if (object != VM_OBJECT_NULL) { 5553 vm_object_unlock(object); 5554 } 5555 kr = vm_map_remove(kernel_map, start, end, VM_MAP_NO_FLAGS); 5556 if (object != VM_OBJECT_NULL) { 5557 vm_object_lock(object); 5558 } 5559 assert(kr == KERN_SUCCESS); 5560 } else { 5561 /* 5562 * We used a kernel virtual address from our 5563 * pre-allocated pool. Put it back in the pool 5564 * for next time. 5565 */ 5566 assert(end - start == PAGE_SIZE); 5567 i = (start - vm_paging_base_address) >> PAGE_SHIFT; 5568 5569 /* undo the pmap mapping */ 5570 pmap_remove(kernel_pmap, start, end); 5571 5572 simple_lock(&vm_paging_lock); 5573 vm_paging_page_inuse[i] = FALSE; 5574 if (vm_paging_page_waiter) { 5575 thread_wakeup(&vm_paging_page_waiter); 5576 } 5577 simple_unlock(&vm_paging_lock); 5578 } 5579} 5580 5581#if CRYPTO 5582/* 5583 * Encryption data. 5584 * "iv" is the "initial vector". Ideally, we want to 5585 * have a different one for each page we encrypt, so that 5586 * crackers can't find encryption patterns too easily. 5587 */ 5588#define SWAP_CRYPT_AES_KEY_SIZE 128 /* XXX 192 and 256 don't work ! */ 5589boolean_t swap_crypt_ctx_initialized = FALSE; 5590aes_32t swap_crypt_key[8]; /* big enough for a 256 key */ 5591aes_ctx swap_crypt_ctx; 5592const unsigned char swap_crypt_null_iv[AES_BLOCK_SIZE] = {0xa, }; 5593 5594#if DEBUG 5595boolean_t swap_crypt_ctx_tested = FALSE; 5596unsigned char swap_crypt_test_page_ref[4096] __attribute__((aligned(4096))); 5597unsigned char swap_crypt_test_page_encrypt[4096] __attribute__((aligned(4096))); 5598unsigned char swap_crypt_test_page_decrypt[4096] __attribute__((aligned(4096))); 5599#endif /* DEBUG */ 5600 5601extern u_long random(void); 5602 5603/* 5604 * Initialize the encryption context: key and key size. 5605 */ 5606void swap_crypt_ctx_initialize(void); /* forward */ 5607void 5608swap_crypt_ctx_initialize(void) 5609{ 5610 unsigned int i; 5611 5612 /* 5613 * No need for locking to protect swap_crypt_ctx_initialized 5614 * because the first use of encryption will come from the 5615 * pageout thread (we won't pagein before there's been a pageout) 5616 * and there's only one pageout thread. 5617 */ 5618 if (swap_crypt_ctx_initialized == FALSE) { 5619 for (i = 0; 5620 i < (sizeof (swap_crypt_key) / 5621 sizeof (swap_crypt_key[0])); 5622 i++) { 5623 swap_crypt_key[i] = random(); 5624 } 5625 aes_encrypt_key((const unsigned char *) swap_crypt_key, 5626 SWAP_CRYPT_AES_KEY_SIZE, 5627 &swap_crypt_ctx.encrypt); 5628 aes_decrypt_key((const unsigned char *) swap_crypt_key, 5629 SWAP_CRYPT_AES_KEY_SIZE, 5630 &swap_crypt_ctx.decrypt); 5631 swap_crypt_ctx_initialized = TRUE; 5632 } 5633 5634#if DEBUG 5635 /* 5636 * Validate the encryption algorithms. 5637 */ 5638 if (swap_crypt_ctx_tested == FALSE) { 5639 /* initialize */ 5640 for (i = 0; i < 4096; i++) { 5641 swap_crypt_test_page_ref[i] = (char) i; 5642 } 5643 /* encrypt */ 5644 aes_encrypt_cbc(swap_crypt_test_page_ref, 5645 swap_crypt_null_iv, 5646 PAGE_SIZE / AES_BLOCK_SIZE, 5647 swap_crypt_test_page_encrypt, 5648 &swap_crypt_ctx.encrypt); 5649 /* decrypt */ 5650 aes_decrypt_cbc(swap_crypt_test_page_encrypt, 5651 swap_crypt_null_iv, 5652 PAGE_SIZE / AES_BLOCK_SIZE, 5653 swap_crypt_test_page_decrypt, 5654 &swap_crypt_ctx.decrypt); 5655 /* compare result with original */ 5656 for (i = 0; i < 4096; i ++) { 5657 if (swap_crypt_test_page_decrypt[i] != 5658 swap_crypt_test_page_ref[i]) { 5659 panic("encryption test failed"); 5660 } 5661 } 5662 5663 /* encrypt again */ 5664 aes_encrypt_cbc(swap_crypt_test_page_decrypt, 5665 swap_crypt_null_iv, 5666 PAGE_SIZE / AES_BLOCK_SIZE, 5667 swap_crypt_test_page_decrypt, 5668 &swap_crypt_ctx.encrypt); 5669 /* decrypt in place */ 5670 aes_decrypt_cbc(swap_crypt_test_page_decrypt, 5671 swap_crypt_null_iv, 5672 PAGE_SIZE / AES_BLOCK_SIZE, 5673 swap_crypt_test_page_decrypt, 5674 &swap_crypt_ctx.decrypt); 5675 for (i = 0; i < 4096; i ++) { 5676 if (swap_crypt_test_page_decrypt[i] != 5677 swap_crypt_test_page_ref[i]) { 5678 panic("in place encryption test failed"); 5679 } 5680 } 5681 5682 swap_crypt_ctx_tested = TRUE; 5683 } 5684#endif /* DEBUG */ 5685} 5686 5687/* 5688 * ENCRYPTED SWAP: 5689 * vm_page_encrypt: 5690 * Encrypt the given page, for secure paging. 5691 * The page might already be mapped at kernel virtual 5692 * address "kernel_mapping_offset". Otherwise, we need 5693 * to map it. 5694 * 5695 * Context: 5696 * The page's object is locked, but this lock will be released 5697 * and re-acquired. 5698 * The page is busy and not accessible by users (not entered in any pmap). 5699 */ 5700void 5701vm_page_encrypt( 5702 vm_page_t page, 5703 vm_map_offset_t kernel_mapping_offset) 5704{ 5705 kern_return_t kr; 5706 vm_map_size_t kernel_mapping_size; 5707 vm_offset_t kernel_vaddr; 5708 union { 5709 unsigned char aes_iv[AES_BLOCK_SIZE]; 5710 struct { 5711 memory_object_t pager_object; 5712 vm_object_offset_t paging_offset; 5713 } vm; 5714 } encrypt_iv; 5715 5716 if (! vm_pages_encrypted) { 5717 vm_pages_encrypted = TRUE; 5718 } 5719 5720 assert(page->busy); 5721 assert(page->dirty || page->precious); 5722 5723 if (page->encrypted) { 5724 /* 5725 * Already encrypted: no need to do it again. 5726 */ 5727 vm_page_encrypt_already_encrypted_counter++; 5728 return; 5729 } 5730 ASSERT_PAGE_DECRYPTED(page); 5731 5732 /* 5733 * Take a paging-in-progress reference to keep the object 5734 * alive even if we have to unlock it (in vm_paging_map_object() 5735 * for example)... 5736 */ 5737 vm_object_paging_begin(page->object); 5738 5739 if (kernel_mapping_offset == 0) { 5740 /* 5741 * The page hasn't already been mapped in kernel space 5742 * by the caller. Map it now, so that we can access 5743 * its contents and encrypt them. 5744 */ 5745 kernel_mapping_size = PAGE_SIZE; 5746 kr = vm_paging_map_object(&kernel_mapping_offset, 5747 page, 5748 page->object, 5749 page->offset, 5750 &kernel_mapping_size, 5751 VM_PROT_READ | VM_PROT_WRITE, 5752 FALSE); 5753 if (kr != KERN_SUCCESS) { 5754 panic("vm_page_encrypt: " 5755 "could not map page in kernel: 0x%x\n", 5756 kr); 5757 } 5758 } else { 5759 kernel_mapping_size = 0; 5760 } 5761 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 5762 5763 if (swap_crypt_ctx_initialized == FALSE) { 5764 swap_crypt_ctx_initialize(); 5765 } 5766 assert(swap_crypt_ctx_initialized); 5767 5768 /* 5769 * Prepare an "initial vector" for the encryption. 5770 * We use the "pager" and the "paging_offset" for that 5771 * page to obfuscate the encrypted data a bit more and 5772 * prevent crackers from finding patterns that they could 5773 * use to break the key. 5774 */ 5775 bzero(&encrypt_iv.aes_iv[0], sizeof (encrypt_iv.aes_iv)); 5776 encrypt_iv.vm.pager_object = page->object->pager; 5777 encrypt_iv.vm.paging_offset = 5778 page->object->paging_offset + page->offset; 5779 5780 /* encrypt the "initial vector" */ 5781 aes_encrypt_cbc((const unsigned char *) &encrypt_iv.aes_iv[0], 5782 swap_crypt_null_iv, 5783 1, 5784 &encrypt_iv.aes_iv[0], 5785 &swap_crypt_ctx.encrypt); 5786 5787 /* 5788 * Encrypt the page. 5789 */ 5790 aes_encrypt_cbc((const unsigned char *) kernel_vaddr, 5791 &encrypt_iv.aes_iv[0], 5792 PAGE_SIZE / AES_BLOCK_SIZE, 5793 (unsigned char *) kernel_vaddr, 5794 &swap_crypt_ctx.encrypt); 5795 5796 vm_page_encrypt_counter++; 5797 5798 /* 5799 * Unmap the page from the kernel's address space, 5800 * if we had to map it ourselves. Otherwise, let 5801 * the caller undo the mapping if needed. 5802 */ 5803 if (kernel_mapping_size != 0) { 5804 vm_paging_unmap_object(page->object, 5805 kernel_mapping_offset, 5806 kernel_mapping_offset + kernel_mapping_size); 5807 } 5808 5809 /* 5810 * Clear the "reference" and "modified" bits. 5811 * This should clean up any impact the encryption had 5812 * on them. 5813 * The page was kept busy and disconnected from all pmaps, 5814 * so it can't have been referenced or modified from user 5815 * space. 5816 * The software bits will be reset later after the I/O 5817 * has completed (in upl_commit_range()). 5818 */ 5819 pmap_clear_refmod(page->phys_page, VM_MEM_REFERENCED | VM_MEM_MODIFIED); 5820 5821 page->encrypted = TRUE; 5822 5823 vm_object_paging_end(page->object); 5824} 5825 5826/* 5827 * ENCRYPTED SWAP: 5828 * vm_page_decrypt: 5829 * Decrypt the given page. 5830 * The page might already be mapped at kernel virtual 5831 * address "kernel_mapping_offset". Otherwise, we need 5832 * to map it. 5833 * 5834 * Context: 5835 * The page's VM object is locked but will be unlocked and relocked. 5836 * The page is busy and not accessible by users (not entered in any pmap). 5837 */ 5838void 5839vm_page_decrypt( 5840 vm_page_t page, 5841 vm_map_offset_t kernel_mapping_offset) 5842{ 5843 kern_return_t kr; 5844 vm_map_size_t kernel_mapping_size; 5845 vm_offset_t kernel_vaddr; 5846 union { 5847 unsigned char aes_iv[AES_BLOCK_SIZE]; 5848 struct { 5849 memory_object_t pager_object; 5850 vm_object_offset_t paging_offset; 5851 } vm; 5852 } decrypt_iv; 5853 5854 assert(page->busy); 5855 assert(page->encrypted); 5856 5857 /* 5858 * Take a paging-in-progress reference to keep the object 5859 * alive even if we have to unlock it (in vm_paging_map_object() 5860 * for example)... 5861 */ 5862 vm_object_paging_begin(page->object); 5863 5864 if (kernel_mapping_offset == 0) { 5865 /* 5866 * The page hasn't already been mapped in kernel space 5867 * by the caller. Map it now, so that we can access 5868 * its contents and decrypt them. 5869 */ 5870 kernel_mapping_size = PAGE_SIZE; 5871 kr = vm_paging_map_object(&kernel_mapping_offset, 5872 page, 5873 page->object, 5874 page->offset, 5875 &kernel_mapping_size, 5876 VM_PROT_READ | VM_PROT_WRITE, 5877 FALSE); 5878 if (kr != KERN_SUCCESS) { 5879 panic("vm_page_decrypt: " 5880 "could not map page in kernel: 0x%x\n", 5881 kr); 5882 } 5883 } else { 5884 kernel_mapping_size = 0; 5885 } 5886 kernel_vaddr = CAST_DOWN(vm_offset_t, kernel_mapping_offset); 5887 5888 assert(swap_crypt_ctx_initialized); 5889 5890 /* 5891 * Prepare an "initial vector" for the decryption. 5892 * It has to be the same as the "initial vector" we 5893 * used to encrypt that page. 5894 */ 5895 bzero(&decrypt_iv.aes_iv[0], sizeof (decrypt_iv.aes_iv)); 5896 decrypt_iv.vm.pager_object = page->object->pager; 5897 decrypt_iv.vm.paging_offset = 5898 page->object->paging_offset + page->offset; 5899 5900 /* encrypt the "initial vector" */ 5901 aes_encrypt_cbc((const unsigned char *) &decrypt_iv.aes_iv[0], 5902 swap_crypt_null_iv, 5903 1, 5904 &decrypt_iv.aes_iv[0], 5905 &swap_crypt_ctx.encrypt); 5906 5907 /* 5908 * Decrypt the page. 5909 */ 5910 aes_decrypt_cbc((const unsigned char *) kernel_vaddr, 5911 &decrypt_iv.aes_iv[0], 5912 PAGE_SIZE / AES_BLOCK_SIZE, 5913 (unsigned char *) kernel_vaddr, 5914 &swap_crypt_ctx.decrypt); 5915 vm_page_decrypt_counter++; 5916 5917 /* 5918 * Unmap the page from the kernel's address space, 5919 * if we had to map it ourselves. Otherwise, let 5920 * the caller undo the mapping if needed. 5921 */ 5922 if (kernel_mapping_size != 0) { 5923 vm_paging_unmap_object(page->object, 5924 kernel_vaddr, 5925 kernel_vaddr + PAGE_SIZE); 5926 } 5927 5928 /* 5929 * After decryption, the page is actually clean. 5930 * It was encrypted as part of paging, which "cleans" 5931 * the "dirty" pages. 5932 * Noone could access it after it was encrypted 5933 * and the decryption doesn't count. 5934 */ 5935 page->dirty = FALSE; 5936 if (page->cs_validated && !page->cs_tainted) { 5937 /* 5938 * CODE SIGNING: 5939 * This page is no longer dirty 5940 * but could have been modified, 5941 * so it will need to be 5942 * re-validated. 5943 */ 5944 page->cs_validated = FALSE; 5945 vm_cs_validated_resets++; 5946 } 5947 pmap_clear_refmod(page->phys_page, VM_MEM_MODIFIED | VM_MEM_REFERENCED); 5948 5949 page->encrypted = FALSE; 5950 5951 /* 5952 * We've just modified the page's contents via the data cache and part 5953 * of the new contents might still be in the cache and not yet in RAM. 5954 * Since the page is now available and might get gathered in a UPL to 5955 * be part of a DMA transfer from a driver that expects the memory to 5956 * be coherent at this point, we have to flush the data cache. 5957 */ 5958 pmap_sync_page_attributes_phys(page->phys_page); 5959 /* 5960 * Since the page is not mapped yet, some code might assume that it 5961 * doesn't need to invalidate the instruction cache when writing to 5962 * that page. That code relies on "pmapped" being FALSE, so that the 5963 * caches get synchronized when the page is first mapped. 5964 */ 5965 assert(pmap_verify_free(page->phys_page)); 5966 page->pmapped = FALSE; 5967 page->wpmapped = FALSE; 5968 5969 vm_object_paging_end(page->object); 5970} 5971 5972unsigned long upl_encrypt_upls = 0; 5973unsigned long upl_encrypt_pages = 0; 5974 5975/* 5976 * ENCRYPTED SWAP: 5977 * 5978 * upl_encrypt: 5979 * Encrypts all the pages in the UPL, within the specified range. 5980 * 5981 */ 5982void 5983upl_encrypt( 5984 upl_t upl, 5985 upl_offset_t crypt_offset, 5986 upl_size_t crypt_size) 5987{ 5988 upl_size_t upl_size; 5989 upl_offset_t upl_offset; 5990 vm_object_t upl_object; 5991 vm_page_t page; 5992 vm_object_t shadow_object; 5993 vm_object_offset_t shadow_offset; 5994 vm_object_offset_t paging_offset; 5995 vm_object_offset_t base_offset; 5996 5997 upl_encrypt_upls++; 5998 upl_encrypt_pages += crypt_size / PAGE_SIZE; 5999 6000 upl_object = upl->map_object; 6001 upl_offset = upl->offset; 6002 upl_size = upl->size; 6003 6004 vm_object_lock(upl_object); 6005 6006 /* 6007 * Find the VM object that contains the actual pages. 6008 */ 6009 if (upl_object->pageout) { 6010 shadow_object = upl_object->shadow; 6011 /* 6012 * The offset in the shadow object is actually also 6013 * accounted for in upl->offset. It possibly shouldn't be 6014 * this way, but for now don't account for it twice. 6015 */ 6016 shadow_offset = 0; 6017 assert(upl_object->paging_offset == 0); /* XXX ? */ 6018 vm_object_lock(shadow_object); 6019 } else { 6020 shadow_object = upl_object; 6021 shadow_offset = 0; 6022 } 6023 6024 paging_offset = shadow_object->paging_offset; 6025 vm_object_paging_begin(shadow_object); 6026 6027 if (shadow_object != upl_object) 6028 vm_object_unlock(upl_object); 6029 6030 6031 base_offset = shadow_offset; 6032 base_offset += upl_offset; 6033 base_offset += crypt_offset; 6034 base_offset -= paging_offset; 6035 6036 assert(crypt_offset + crypt_size <= upl_size); 6037 6038 for (upl_offset = 0; 6039 upl_offset < crypt_size; 6040 upl_offset += PAGE_SIZE) { 6041 page = vm_page_lookup(shadow_object, 6042 base_offset + upl_offset); 6043 if (page == VM_PAGE_NULL) { 6044 panic("upl_encrypt: " 6045 "no page for (obj=%p,off=%lld+%d)!\n", 6046 shadow_object, 6047 base_offset, 6048 upl_offset); 6049 } 6050 /* 6051 * Disconnect the page from all pmaps, so that nobody can 6052 * access it while it's encrypted. After that point, all 6053 * accesses to this page will cause a page fault and block 6054 * while the page is busy being encrypted. After the 6055 * encryption completes, any access will cause a 6056 * page fault and the page gets decrypted at that time. 6057 */ 6058 pmap_disconnect(page->phys_page); 6059 vm_page_encrypt(page, 0); 6060 6061 if (shadow_object == vm_pageout_scan_wants_object) { 6062 /* 6063 * Give vm_pageout_scan() a chance to convert more 6064 * pages from "clean-in-place" to "clean-and-free", 6065 * if it's interested in the same pages we selected 6066 * in this cluster. 6067 */ 6068 vm_object_unlock(shadow_object); 6069 vm_object_lock(shadow_object); 6070 } 6071 } 6072 6073 vm_object_paging_end(shadow_object); 6074 vm_object_unlock(shadow_object); 6075} 6076 6077#else /* CRYPTO */ 6078void 6079upl_encrypt( 6080 __unused upl_t upl, 6081 __unused upl_offset_t crypt_offset, 6082 __unused upl_size_t crypt_size) 6083{ 6084} 6085 6086void 6087vm_page_encrypt( 6088 __unused vm_page_t page, 6089 __unused vm_map_offset_t kernel_mapping_offset) 6090{ 6091} 6092 6093void 6094vm_page_decrypt( 6095 __unused vm_page_t page, 6096 __unused vm_map_offset_t kernel_mapping_offset) 6097{ 6098} 6099 6100#endif /* CRYPTO */ 6101 6102vm_size_t 6103upl_get_internal_pagelist_offset(void) 6104{ 6105 return sizeof(struct upl); 6106} 6107 6108void 6109upl_clear_dirty( 6110 upl_t upl, 6111 boolean_t value) 6112{ 6113 if (value) { 6114 upl->flags |= UPL_CLEAR_DIRTY; 6115 } else { 6116 upl->flags &= ~UPL_CLEAR_DIRTY; 6117 } 6118} 6119 6120 6121#ifdef MACH_BSD 6122 6123boolean_t upl_device_page(upl_page_info_t *upl) 6124{ 6125 return(UPL_DEVICE_PAGE(upl)); 6126} 6127boolean_t upl_page_present(upl_page_info_t *upl, int index) 6128{ 6129 return(UPL_PAGE_PRESENT(upl, index)); 6130} 6131boolean_t upl_speculative_page(upl_page_info_t *upl, int index) 6132{ 6133 return(UPL_SPECULATIVE_PAGE(upl, index)); 6134} 6135boolean_t upl_dirty_page(upl_page_info_t *upl, int index) 6136{ 6137 return(UPL_DIRTY_PAGE(upl, index)); 6138} 6139boolean_t upl_valid_page(upl_page_info_t *upl, int index) 6140{ 6141 return(UPL_VALID_PAGE(upl, index)); 6142} 6143ppnum_t upl_phys_page(upl_page_info_t *upl, int index) 6144{ 6145 return(UPL_PHYS_PAGE(upl, index)); 6146} 6147 6148 6149void 6150vm_countdirtypages(void) 6151{ 6152 vm_page_t m; 6153 int dpages; 6154 int pgopages; 6155 int precpages; 6156 6157 6158 dpages=0; 6159 pgopages=0; 6160 precpages=0; 6161 6162 vm_page_lock_queues(); 6163 m = (vm_page_t) queue_first(&vm_page_queue_inactive); 6164 do { 6165 if (m ==(vm_page_t )0) break; 6166 6167 if(m->dirty) dpages++; 6168 if(m->pageout) pgopages++; 6169 if(m->precious) precpages++; 6170 6171 assert(m->object != kernel_object); 6172 m = (vm_page_t) queue_next(&m->pageq); 6173 if (m ==(vm_page_t )0) break; 6174 6175 } while (!queue_end(&vm_page_queue_inactive,(queue_entry_t) m)); 6176 vm_page_unlock_queues(); 6177 6178 vm_page_lock_queues(); 6179 m = (vm_page_t) queue_first(&vm_page_queue_throttled); 6180 do { 6181 if (m ==(vm_page_t )0) break; 6182 6183 dpages++; 6184 assert(m->dirty); 6185 assert(!m->pageout); 6186 assert(m->object != kernel_object); 6187 m = (vm_page_t) queue_next(&m->pageq); 6188 if (m ==(vm_page_t )0) break; 6189 6190 } while (!queue_end(&vm_page_queue_throttled,(queue_entry_t) m)); 6191 vm_page_unlock_queues(); 6192 6193 vm_page_lock_queues(); 6194 m = (vm_page_t) queue_first(&vm_page_queue_zf); 6195 do { 6196 if (m ==(vm_page_t )0) break; 6197 6198 if(m->dirty) dpages++; 6199 if(m->pageout) pgopages++; 6200 if(m->precious) precpages++; 6201 6202 assert(m->object != kernel_object); 6203 m = (vm_page_t) queue_next(&m->pageq); 6204 if (m ==(vm_page_t )0) break; 6205 6206 } while (!queue_end(&vm_page_queue_zf,(queue_entry_t) m)); 6207 vm_page_unlock_queues(); 6208 6209 printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages); 6210 6211 dpages=0; 6212 pgopages=0; 6213 precpages=0; 6214 6215 vm_page_lock_queues(); 6216 m = (vm_page_t) queue_first(&vm_page_queue_active); 6217 6218 do { 6219 if(m == (vm_page_t )0) break; 6220 if(m->dirty) dpages++; 6221 if(m->pageout) pgopages++; 6222 if(m->precious) precpages++; 6223 6224 assert(m->object != kernel_object); 6225 m = (vm_page_t) queue_next(&m->pageq); 6226 if(m == (vm_page_t )0) break; 6227 6228 } while (!queue_end(&vm_page_queue_active,(queue_entry_t) m)); 6229 vm_page_unlock_queues(); 6230 6231 printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages); 6232 6233} 6234#endif /* MACH_BSD */ 6235 6236ppnum_t upl_get_highest_page( 6237 upl_t upl) 6238{ 6239 return upl->highest_page; 6240} 6241 6242#ifdef UPL_DEBUG 6243kern_return_t upl_ubc_alias_set(upl_t upl, unsigned int alias1, unsigned int alias2) 6244{ 6245 upl->ubc_alias1 = alias1; 6246 upl->ubc_alias2 = alias2; 6247 return KERN_SUCCESS; 6248} 6249int upl_ubc_alias_get(upl_t upl, unsigned int * al, unsigned int * al2) 6250{ 6251 if(al) 6252 *al = upl->ubc_alias1; 6253 if(al2) 6254 *al2 = upl->ubc_alias2; 6255 return KERN_SUCCESS; 6256} 6257#endif /* UPL_DEBUG */ 6258 6259 6260 6261#if MACH_KDB 6262#include <ddb/db_output.h> 6263#include <ddb/db_print.h> 6264#include <vm/vm_print.h> 6265 6266#define printf kdbprintf 6267void db_pageout(void); 6268 6269void 6270db_vm(void) 6271{ 6272 6273 iprintf("VM Statistics:\n"); 6274 db_indent += 2; 6275 iprintf("pages:\n"); 6276 db_indent += 2; 6277 iprintf("activ %5d inact %5d free %5d", 6278 vm_page_active_count, vm_page_inactive_count, 6279 vm_page_free_count); 6280 printf(" wire %5d gobbl %5d\n", 6281 vm_page_wire_count, vm_page_gobble_count); 6282 db_indent -= 2; 6283 iprintf("target:\n"); 6284 db_indent += 2; 6285 iprintf("min %5d inact %5d free %5d", 6286 vm_page_free_min, vm_page_inactive_target, 6287 vm_page_free_target); 6288 printf(" resrv %5d\n", vm_page_free_reserved); 6289 db_indent -= 2; 6290 iprintf("pause:\n"); 6291 db_pageout(); 6292 db_indent -= 2; 6293} 6294 6295#if MACH_COUNTERS 6296extern int c_laundry_pages_freed; 6297#endif /* MACH_COUNTERS */ 6298 6299void 6300db_pageout(void) 6301{ 6302 iprintf("Pageout Statistics:\n"); 6303 db_indent += 2; 6304 iprintf("active %5d inactv %5d\n", 6305 vm_pageout_active, vm_pageout_inactive); 6306 iprintf("nolock %5d avoid %5d busy %5d absent %5d\n", 6307 vm_pageout_inactive_nolock, vm_pageout_inactive_avoid, 6308 vm_pageout_inactive_busy, vm_pageout_inactive_absent); 6309 iprintf("used %5d clean %5d dirty %5d\n", 6310 vm_pageout_inactive_used, vm_pageout_inactive_clean, 6311 vm_pageout_inactive_dirty); 6312#if MACH_COUNTERS 6313 iprintf("laundry_pages_freed %d\n", c_laundry_pages_freed); 6314#endif /* MACH_COUNTERS */ 6315#if MACH_CLUSTER_STATS 6316 iprintf("Cluster Statistics:\n"); 6317 db_indent += 2; 6318 iprintf("dirtied %5d cleaned %5d collisions %5d\n", 6319 vm_pageout_cluster_dirtied, vm_pageout_cluster_cleaned, 6320 vm_pageout_cluster_collisions); 6321 iprintf("clusters %5d conversions %5d\n", 6322 vm_pageout_cluster_clusters, vm_pageout_cluster_conversions); 6323 db_indent -= 2; 6324 iprintf("Target Statistics:\n"); 6325 db_indent += 2; 6326 iprintf("collisions %5d page_dirtied %5d page_freed %5d\n", 6327 vm_pageout_target_collisions, vm_pageout_target_page_dirtied, 6328 vm_pageout_target_page_freed); 6329 db_indent -= 2; 6330#endif /* MACH_CLUSTER_STATS */ 6331 db_indent -= 2; 6332} 6333 6334#endif /* MACH_KDB */ 6335