1/* 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <sys/errno.h> 30 31#include <mach/mach_types.h> 32#include <mach/mach_traps.h> 33#include <mach/host_priv.h> 34#include <mach/kern_return.h> 35#include <mach/memory_object_control.h> 36#include <mach/memory_object_types.h> 37#include <mach/port.h> 38#include <mach/policy.h> 39#include <mach/upl.h> 40#include <mach/thread_act.h> 41 42#include <kern/assert.h> 43#include <kern/host.h> 44#include <kern/thread.h> 45 46#include <ipc/ipc_port.h> 47#include <ipc/ipc_space.h> 48 49#include <default_pager/default_pager_types.h> 50#include <default_pager/default_pager_object_server.h> 51 52#include <vm/vm_map.h> 53#include <vm/vm_pageout.h> 54#include <vm/memory_object.h> 55#include <vm/vm_pageout.h> 56#include <vm/vm_protos.h> 57#include <vm/vm_purgeable_internal.h> 58 59 60/* BSD VM COMPONENT INTERFACES */ 61int 62get_map_nentries( 63 vm_map_t); 64 65vm_offset_t 66get_map_start( 67 vm_map_t); 68 69vm_offset_t 70get_map_end( 71 vm_map_t); 72 73/* 74 * 75 */ 76int 77get_map_nentries( 78 vm_map_t map) 79{ 80 return(map->hdr.nentries); 81} 82 83mach_vm_offset_t 84mach_get_vm_start(vm_map_t map) 85{ 86 return( vm_map_first_entry(map)->vme_start); 87} 88 89mach_vm_offset_t 90mach_get_vm_end(vm_map_t map) 91{ 92 return( vm_map_last_entry(map)->vme_end); 93} 94 95/* 96 * BSD VNODE PAGER 97 */ 98 99const struct memory_object_pager_ops vnode_pager_ops = { 100 vnode_pager_reference, 101 vnode_pager_deallocate, 102 vnode_pager_init, 103 vnode_pager_terminate, 104 vnode_pager_data_request, 105 vnode_pager_data_return, 106 vnode_pager_data_initialize, 107 vnode_pager_data_unlock, 108 vnode_pager_synchronize, 109 vnode_pager_map, 110 vnode_pager_last_unmap, 111 NULL, /* data_reclaim */ 112 "vnode pager" 113}; 114 115typedef struct vnode_pager { 116 struct ipc_object_header pager_header; /* fake ip_kotype() */ 117 memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */ 118 unsigned int ref_count; /* reference count */ 119 memory_object_control_t control_handle; /* mem object control handle */ 120 struct vnode *vnode_handle; /* vnode handle */ 121} *vnode_pager_t; 122 123#define pager_ikot pager_header.io_bits 124 125ipc_port_t 126trigger_name_to_port( /* forward */ 127 mach_port_t); 128 129kern_return_t 130vnode_pager_cluster_read( /* forward */ 131 vnode_pager_t, 132 vm_object_offset_t, 133 vm_object_offset_t, 134 uint32_t, 135 vm_size_t); 136 137void 138vnode_pager_cluster_write( /* forward */ 139 vnode_pager_t, 140 vm_object_offset_t, 141 vm_size_t, 142 vm_object_offset_t *, 143 int *, 144 int); 145 146 147vnode_pager_t 148vnode_object_create( /* forward */ 149 struct vnode *); 150 151vnode_pager_t 152vnode_pager_lookup( /* forward */ 153 memory_object_t); 154 155zone_t vnode_pager_zone; 156 157 158#define VNODE_PAGER_NULL ((vnode_pager_t) 0) 159 160/* TODO: Should be set dynamically by vnode_pager_init() */ 161#define CLUSTER_SHIFT 1 162 163/* TODO: Should be set dynamically by vnode_pager_bootstrap() */ 164#define MAX_VNODE 10000 165 166 167#if DEBUG 168int pagerdebug=0; 169 170#define PAGER_ALL 0xffffffff 171#define PAGER_INIT 0x00000001 172#define PAGER_PAGEIN 0x00000002 173 174#define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}} 175#else 176#define PAGER_DEBUG(LEVEL, A) 177#endif 178 179extern int proc_resetpcontrol(int); 180 181#if DEVELOPMENT || DEBUG 182extern unsigned long vm_cs_validated_resets; 183#endif 184 185/* 186 * Routine: mach_macx_triggers 187 * Function: 188 * Syscall interface to set the call backs for low and 189 * high water marks. 190 */ 191int 192mach_macx_triggers( 193 struct macx_triggers_args *args) 194{ 195 int hi_water = args->hi_water; 196 int low_water = args->low_water; 197 int flags = args->flags; 198 mach_port_t trigger_name = args->alert_port; 199 kern_return_t kr; 200 memory_object_default_t default_pager; 201 ipc_port_t trigger_port; 202 203 default_pager = MEMORY_OBJECT_DEFAULT_NULL; 204 kr = host_default_memory_manager(host_priv_self(), 205 &default_pager, 0); 206 if(kr != KERN_SUCCESS) { 207 return EINVAL; 208 } 209 210 if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) || 211 ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) { 212 /* can't have it both ways */ 213 return EINVAL; 214 } 215 216 if (default_pager_init_flag == 0) { 217 start_def_pager(NULL); 218 default_pager_init_flag = 1; 219 } 220 221 if (flags & SWAP_ENCRYPT_ON) { 222 /* ENCRYPTED SWAP: tell default_pager to encrypt */ 223 default_pager_triggers(default_pager, 224 0, 0, 225 SWAP_ENCRYPT_ON, 226 IP_NULL); 227 } else if (flags & SWAP_ENCRYPT_OFF) { 228 /* ENCRYPTED SWAP: tell default_pager not to encrypt */ 229 default_pager_triggers(default_pager, 230 0, 0, 231 SWAP_ENCRYPT_OFF, 232 IP_NULL); 233 } 234 235 if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) { 236 /* 237 * Time to switch to the emergency segment. 238 */ 239 return default_pager_triggers(default_pager, 240 0, 0, 241 USE_EMERGENCY_SWAP_FILE_FIRST, 242 IP_NULL); 243 } 244 245 if (flags & SWAP_FILE_CREATION_ERROR) { 246 /* 247 * For some reason, the dynamic pager failed to create a swap file. 248 */ 249 trigger_port = trigger_name_to_port(trigger_name); 250 if(trigger_port == NULL) { 251 return EINVAL; 252 } 253 /* trigger_port is locked and active */ 254 ipc_port_make_send_locked(trigger_port); 255 /* now unlocked */ 256 default_pager_triggers(default_pager, 257 0, 0, 258 SWAP_FILE_CREATION_ERROR, 259 trigger_port); 260 } 261 262 if (flags & HI_WAT_ALERT) { 263 trigger_port = trigger_name_to_port(trigger_name); 264 if(trigger_port == NULL) { 265 return EINVAL; 266 } 267 /* trigger_port is locked and active */ 268 ipc_port_make_send_locked(trigger_port); 269 /* now unlocked */ 270 default_pager_triggers(default_pager, 271 hi_water, low_water, 272 HI_WAT_ALERT, trigger_port); 273 } 274 275 if (flags & LO_WAT_ALERT) { 276 trigger_port = trigger_name_to_port(trigger_name); 277 if(trigger_port == NULL) { 278 return EINVAL; 279 } 280 /* trigger_port is locked and active */ 281 ipc_port_make_send_locked(trigger_port); 282 /* and now its unlocked */ 283 default_pager_triggers(default_pager, 284 hi_water, low_water, 285 LO_WAT_ALERT, trigger_port); 286 } 287 288 289 if (flags & PROC_RESUME) { 290 291 /* 292 * For this call, hi_water is used to pass in the pid of the process we want to resume 293 * or unthrottle. This is of course restricted to the superuser (checked inside of 294 * proc_resetpcontrol). 295 */ 296 297 return proc_resetpcontrol(hi_water); 298 } 299 300 /* 301 * Set thread scheduling priority and policy for the current thread 302 * it is assumed for the time being that the thread setting the alert 303 * is the same one which will be servicing it. 304 * 305 * XXX This does not belong in the kernel XXX 306 */ 307 if (flags & HI_WAT_ALERT) { 308 thread_precedence_policy_data_t pre; 309 thread_extended_policy_data_t ext; 310 311 ext.timeshare = FALSE; 312 pre.importance = INT32_MAX; 313 314 thread_policy_set(current_thread(), 315 THREAD_EXTENDED_POLICY, 316 (thread_policy_t)&ext, 317 THREAD_EXTENDED_POLICY_COUNT); 318 319 thread_policy_set(current_thread(), 320 THREAD_PRECEDENCE_POLICY, 321 (thread_policy_t)&pre, 322 THREAD_PRECEDENCE_POLICY_COUNT); 323 324 current_thread()->options |= TH_OPT_VMPRIV; 325 } 326 327 if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) { 328 return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)); 329 } 330 331 return 0; 332} 333 334/* 335 * 336 */ 337ipc_port_t 338trigger_name_to_port( 339 mach_port_t trigger_name) 340{ 341 ipc_port_t trigger_port; 342 ipc_space_t space; 343 344 if (trigger_name == 0) 345 return (NULL); 346 347 space = current_space(); 348 if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name), 349 &trigger_port) != KERN_SUCCESS) 350 return (NULL); 351 return trigger_port; 352} 353 354 355extern int uiomove64(addr64_t, int, void *); 356#define MAX_RUN 32 357 358int 359memory_object_control_uiomove( 360 memory_object_control_t control, 361 memory_object_offset_t offset, 362 void * uio, 363 int start_offset, 364 int io_requested, 365 int mark_dirty, 366 int take_reference) 367{ 368 vm_object_t object; 369 vm_page_t dst_page; 370 int xsize; 371 int retval = 0; 372 int cur_run; 373 int cur_needed; 374 int i; 375 int orig_offset; 376 vm_page_t page_run[MAX_RUN]; 377 378 object = memory_object_control_to_vm_object(control); 379 if (object == VM_OBJECT_NULL) { 380 return (0); 381 } 382 assert(!object->internal); 383 384 vm_object_lock(object); 385 386 if (mark_dirty && object->copy != VM_OBJECT_NULL) { 387 /* 388 * We can't modify the pages without honoring 389 * copy-on-write obligations first, so fall off 390 * this optimized path and fall back to the regular 391 * path. 392 */ 393 vm_object_unlock(object); 394 return 0; 395 } 396 orig_offset = start_offset; 397 398 while (io_requested && retval == 0) { 399 400 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE; 401 402 if (cur_needed > MAX_RUN) 403 cur_needed = MAX_RUN; 404 405 for (cur_run = 0; cur_run < cur_needed; ) { 406 407 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) 408 break; 409 410 411 if (dst_page->busy || dst_page->cleaning) { 412 /* 413 * someone else is playing with the page... if we've 414 * already collected pages into this run, go ahead 415 * and process now, we can't block on this 416 * page while holding other pages in the BUSY state 417 * otherwise we will wait 418 */ 419 if (cur_run) 420 break; 421 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 422 continue; 423 } 424 if (dst_page->laundry) { 425 dst_page->pageout = FALSE; 426 427 vm_pageout_steal_laundry(dst_page, FALSE); 428 } 429 /* 430 * this routine is only called when copying 431 * to/from real files... no need to consider 432 * encrypted swap pages 433 */ 434 assert(!dst_page->encrypted); 435 436 if (mark_dirty) { 437 SET_PAGE_DIRTY(dst_page, FALSE); 438 if (dst_page->cs_validated && 439 !dst_page->cs_tainted) { 440 /* 441 * CODE SIGNING: 442 * We're modifying a code-signed 443 * page: force revalidate 444 */ 445 dst_page->cs_validated = FALSE; 446#if DEVELOPMENT || DEBUG 447 vm_cs_validated_resets++; 448#endif 449 pmap_disconnect(dst_page->phys_page); 450 } 451 } 452 dst_page->busy = TRUE; 453 454 page_run[cur_run++] = dst_page; 455 456 offset += PAGE_SIZE_64; 457 } 458 if (cur_run == 0) 459 /* 460 * we hit a 'hole' in the cache or 461 * a page we don't want to try to handle, 462 * so bail at this point 463 * we'll unlock the object below 464 */ 465 break; 466 vm_object_unlock(object); 467 468 for (i = 0; i < cur_run; i++) { 469 470 dst_page = page_run[i]; 471 472 if ((xsize = PAGE_SIZE - start_offset) > io_requested) 473 xsize = io_requested; 474 475 if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) ) 476 break; 477 478 io_requested -= xsize; 479 start_offset = 0; 480 } 481 vm_object_lock(object); 482 483 /* 484 * if we have more than 1 page to work on 485 * in the current run, or the original request 486 * started at offset 0 of the page, or we're 487 * processing multiple batches, we will move 488 * the pages to the tail of the inactive queue 489 * to implement an LRU for read/write accesses 490 * 491 * the check for orig_offset == 0 is there to 492 * mitigate the cost of small (< page_size) requests 493 * to the same page (this way we only move it once) 494 */ 495 if (take_reference && (cur_run > 1 || orig_offset == 0)) { 496 497 vm_page_lockspin_queues(); 498 499 for (i = 0; i < cur_run; i++) 500 vm_page_lru(page_run[i]); 501 502 vm_page_unlock_queues(); 503 } 504 for (i = 0; i < cur_run; i++) { 505 dst_page = page_run[i]; 506 507 /* 508 * someone is explicitly referencing this page... 509 * update clustered and speculative state 510 * 511 */ 512 VM_PAGE_CONSUME_CLUSTERED(dst_page); 513 514 PAGE_WAKEUP_DONE(dst_page); 515 } 516 orig_offset = 0; 517 } 518 vm_object_unlock(object); 519 520 return (retval); 521} 522 523 524/* 525 * 526 */ 527void 528vnode_pager_bootstrap(void) 529{ 530 register vm_size_t size; 531 532 size = (vm_size_t) sizeof(struct vnode_pager); 533 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size, 534 PAGE_SIZE, "vnode pager structures"); 535 zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE); 536 zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE); 537 538 539#if CONFIG_CODE_DECRYPTION 540 apple_protect_pager_bootstrap(); 541#endif /* CONFIG_CODE_DECRYPTION */ 542 swapfile_pager_bootstrap(); 543 return; 544} 545 546/* 547 * 548 */ 549memory_object_t 550vnode_pager_setup( 551 struct vnode *vp, 552 __unused memory_object_t pager) 553{ 554 vnode_pager_t vnode_object; 555 556 vnode_object = vnode_object_create(vp); 557 if (vnode_object == VNODE_PAGER_NULL) 558 panic("vnode_pager_setup: vnode_object_create() failed"); 559 return((memory_object_t)vnode_object); 560} 561 562/* 563 * 564 */ 565kern_return_t 566vnode_pager_init(memory_object_t mem_obj, 567 memory_object_control_t control, 568#if !DEBUG 569 __unused 570#endif 571 memory_object_cluster_size_t pg_size) 572{ 573 vnode_pager_t vnode_object; 574 kern_return_t kr; 575 memory_object_attr_info_data_t attributes; 576 577 578 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size)); 579 580 if (control == MEMORY_OBJECT_CONTROL_NULL) 581 return KERN_INVALID_ARGUMENT; 582 583 vnode_object = vnode_pager_lookup(mem_obj); 584 585 memory_object_control_reference(control); 586 587 vnode_object->control_handle = control; 588 589 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY; 590 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/ 591 attributes.cluster_size = (1 << (PAGE_SHIFT)); 592 attributes.may_cache_object = TRUE; 593 attributes.temporary = TRUE; 594 595 kr = memory_object_change_attributes( 596 control, 597 MEMORY_OBJECT_ATTRIBUTE_INFO, 598 (memory_object_info_t) &attributes, 599 MEMORY_OBJECT_ATTR_INFO_COUNT); 600 if (kr != KERN_SUCCESS) 601 panic("vnode_pager_init: memory_object_change_attributes() failed"); 602 603 return(KERN_SUCCESS); 604} 605 606/* 607 * 608 */ 609kern_return_t 610vnode_pager_data_return( 611 memory_object_t mem_obj, 612 memory_object_offset_t offset, 613 memory_object_cluster_size_t data_cnt, 614 memory_object_offset_t *resid_offset, 615 int *io_error, 616 __unused boolean_t dirty, 617 __unused boolean_t kernel_copy, 618 int upl_flags) 619{ 620 register vnode_pager_t vnode_object; 621 622 vnode_object = vnode_pager_lookup(mem_obj); 623 624 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags); 625 626 return KERN_SUCCESS; 627} 628 629kern_return_t 630vnode_pager_data_initialize( 631 __unused memory_object_t mem_obj, 632 __unused memory_object_offset_t offset, 633 __unused memory_object_cluster_size_t data_cnt) 634{ 635 panic("vnode_pager_data_initialize"); 636 return KERN_FAILURE; 637} 638 639kern_return_t 640vnode_pager_data_unlock( 641 __unused memory_object_t mem_obj, 642 __unused memory_object_offset_t offset, 643 __unused memory_object_size_t size, 644 __unused vm_prot_t desired_access) 645{ 646 return KERN_FAILURE; 647} 648 649kern_return_t 650vnode_pager_get_isinuse( 651 memory_object_t mem_obj, 652 uint32_t *isinuse) 653{ 654 vnode_pager_t vnode_object; 655 656 if (mem_obj->mo_pager_ops != &vnode_pager_ops) { 657 *isinuse = 1; 658 return KERN_INVALID_ARGUMENT; 659 } 660 661 vnode_object = vnode_pager_lookup(mem_obj); 662 663 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle); 664 return KERN_SUCCESS; 665} 666 667kern_return_t 668vnode_pager_check_hard_throttle( 669 memory_object_t mem_obj, 670 uint32_t *limit, 671 uint32_t hard_throttle) 672{ 673 vnode_pager_t vnode_object; 674 675 if (mem_obj->mo_pager_ops != &vnode_pager_ops) 676 return KERN_INVALID_ARGUMENT; 677 678 vnode_object = vnode_pager_lookup(mem_obj); 679 680 (void)vnode_pager_return_hard_throttle_limit(vnode_object->vnode_handle, limit, hard_throttle); 681 return KERN_SUCCESS; 682} 683 684kern_return_t 685vnode_pager_get_isSSD( 686 memory_object_t mem_obj, 687 boolean_t *isSSD) 688{ 689 vnode_pager_t vnode_object; 690 691 if (mem_obj->mo_pager_ops != &vnode_pager_ops) 692 return KERN_INVALID_ARGUMENT; 693 694 vnode_object = vnode_pager_lookup(mem_obj); 695 696 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle); 697 return KERN_SUCCESS; 698} 699 700kern_return_t 701vnode_pager_get_object_size( 702 memory_object_t mem_obj, 703 memory_object_offset_t *length) 704{ 705 vnode_pager_t vnode_object; 706 707 if (mem_obj->mo_pager_ops != &vnode_pager_ops) { 708 *length = 0; 709 return KERN_INVALID_ARGUMENT; 710 } 711 712 vnode_object = vnode_pager_lookup(mem_obj); 713 714 *length = vnode_pager_get_filesize(vnode_object->vnode_handle); 715 return KERN_SUCCESS; 716} 717 718kern_return_t 719vnode_pager_get_object_pathname( 720 memory_object_t mem_obj, 721 char *pathname, 722 vm_size_t *length_p) 723{ 724 vnode_pager_t vnode_object; 725 726 if (mem_obj->mo_pager_ops != &vnode_pager_ops) { 727 return KERN_INVALID_ARGUMENT; 728 } 729 730 vnode_object = vnode_pager_lookup(mem_obj); 731 732 return vnode_pager_get_pathname(vnode_object->vnode_handle, 733 pathname, 734 length_p); 735} 736 737kern_return_t 738vnode_pager_get_object_filename( 739 memory_object_t mem_obj, 740 const char **filename) 741{ 742 vnode_pager_t vnode_object; 743 744 if (mem_obj->mo_pager_ops != &vnode_pager_ops) { 745 return KERN_INVALID_ARGUMENT; 746 } 747 748 vnode_object = vnode_pager_lookup(mem_obj); 749 750 return vnode_pager_get_filename(vnode_object->vnode_handle, 751 filename); 752} 753 754kern_return_t 755vnode_pager_get_object_cs_blobs( 756 memory_object_t mem_obj, 757 void **blobs) 758{ 759 vnode_pager_t vnode_object; 760 761 if (mem_obj == MEMORY_OBJECT_NULL || 762 mem_obj->mo_pager_ops != &vnode_pager_ops) { 763 return KERN_INVALID_ARGUMENT; 764 } 765 766 vnode_object = vnode_pager_lookup(mem_obj); 767 768 return vnode_pager_get_cs_blobs(vnode_object->vnode_handle, 769 blobs); 770} 771 772#if CHECK_CS_VALIDATION_BITMAP 773kern_return_t 774vnode_pager_cs_check_validation_bitmap( 775 memory_object_t mem_obj, 776 memory_object_offset_t offset, 777 int optype ) 778{ 779 vnode_pager_t vnode_object; 780 781 if (mem_obj == MEMORY_OBJECT_NULL || 782 mem_obj->mo_pager_ops != &vnode_pager_ops) { 783 return KERN_INVALID_ARGUMENT; 784 } 785 786 vnode_object = vnode_pager_lookup(mem_obj); 787 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype ); 788} 789#endif /* CHECK_CS_VALIDATION_BITMAP */ 790 791/* 792 * 793 */ 794kern_return_t 795vnode_pager_data_request( 796 memory_object_t mem_obj, 797 memory_object_offset_t offset, 798 __unused memory_object_cluster_size_t length, 799 __unused vm_prot_t desired_access, 800 memory_object_fault_info_t fault_info) 801{ 802 vnode_pager_t vnode_object; 803 memory_object_offset_t base_offset; 804 vm_size_t size; 805 uint32_t io_streaming = 0; 806 807 vnode_object = vnode_pager_lookup(mem_obj); 808 809 size = MAX_UPL_TRANSFER * PAGE_SIZE; 810 base_offset = offset; 811 812 if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS) 813 size = PAGE_SIZE; 814 815 assert(offset >= base_offset && 816 offset < base_offset + size); 817 818 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size); 819} 820 821/* 822 * 823 */ 824void 825vnode_pager_reference( 826 memory_object_t mem_obj) 827{ 828 register vnode_pager_t vnode_object; 829 unsigned int new_ref_count; 830 831 vnode_object = vnode_pager_lookup(mem_obj); 832 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1); 833 assert(new_ref_count > 1); 834} 835 836/* 837 * 838 */ 839void 840vnode_pager_deallocate( 841 memory_object_t mem_obj) 842{ 843 register vnode_pager_t vnode_object; 844 845 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj)); 846 847 vnode_object = vnode_pager_lookup(mem_obj); 848 849 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) { 850 if (vnode_object->vnode_handle != NULL) { 851 vnode_pager_vrele(vnode_object->vnode_handle); 852 } 853 zfree(vnode_pager_zone, vnode_object); 854 } 855 return; 856} 857 858/* 859 * 860 */ 861kern_return_t 862vnode_pager_terminate( 863#if !DEBUG 864 __unused 865#endif 866 memory_object_t mem_obj) 867{ 868 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj)); 869 870 return(KERN_SUCCESS); 871} 872 873/* 874 * 875 */ 876kern_return_t 877vnode_pager_synchronize( 878 memory_object_t mem_obj, 879 memory_object_offset_t offset, 880 memory_object_size_t length, 881 __unused vm_sync_t sync_flags) 882{ 883 register vnode_pager_t vnode_object; 884 885 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj)); 886 887 vnode_object = vnode_pager_lookup(mem_obj); 888 889 memory_object_synchronize_completed(vnode_object->control_handle, offset, length); 890 891 return (KERN_SUCCESS); 892} 893 894/* 895 * 896 */ 897kern_return_t 898vnode_pager_map( 899 memory_object_t mem_obj, 900 vm_prot_t prot) 901{ 902 vnode_pager_t vnode_object; 903 int ret; 904 kern_return_t kr; 905 906 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot)); 907 908 vnode_object = vnode_pager_lookup(mem_obj); 909 910 ret = ubc_map(vnode_object->vnode_handle, prot); 911 912 if (ret != 0) { 913 kr = KERN_FAILURE; 914 } else { 915 kr = KERN_SUCCESS; 916 } 917 918 return kr; 919} 920 921kern_return_t 922vnode_pager_last_unmap( 923 memory_object_t mem_obj) 924{ 925 register vnode_pager_t vnode_object; 926 927 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj)); 928 929 vnode_object = vnode_pager_lookup(mem_obj); 930 931 ubc_unmap(vnode_object->vnode_handle); 932 return KERN_SUCCESS; 933} 934 935 936 937/* 938 * 939 */ 940void 941vnode_pager_cluster_write( 942 vnode_pager_t vnode_object, 943 vm_object_offset_t offset, 944 vm_size_t cnt, 945 vm_object_offset_t * resid_offset, 946 int * io_error, 947 int upl_flags) 948{ 949 vm_size_t size; 950 int errno; 951 952 if (upl_flags & UPL_MSYNC) { 953 954 upl_flags |= UPL_VNODE_PAGER; 955 956 if ( (upl_flags & UPL_IOSYNC) && io_error) 957 upl_flags |= UPL_KEEPCACHED; 958 959 while (cnt) { 960 size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */ 961 962 assert((upl_size_t) size == size); 963 vnode_pageout(vnode_object->vnode_handle, 964 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno); 965 966 if ( (upl_flags & UPL_KEEPCACHED) ) { 967 if ( (*io_error = errno) ) 968 break; 969 } 970 cnt -= size; 971 offset += size; 972 } 973 if (resid_offset) 974 *resid_offset = offset; 975 976 } else { 977 vm_object_offset_t vnode_size; 978 vm_object_offset_t base_offset; 979 980 /* 981 * this is the pageout path 982 */ 983 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle); 984 985 if (vnode_size > (offset + PAGE_SIZE)) { 986 /* 987 * preset the maximum size of the cluster 988 * and put us on a nice cluster boundary... 989 * and then clip the size to insure we 990 * don't request past the end of the underlying file 991 */ 992 size = PAGE_SIZE * MAX_UPL_TRANSFER; 993 base_offset = offset & ~((signed)(size - 1)); 994 995 if ((base_offset + size) > vnode_size) 996 size = round_page(((vm_size_t)(vnode_size - base_offset))); 997 } else { 998 /* 999 * we've been requested to page out a page beyond the current 1000 * end of the 'file'... don't try to cluster in this case... 1001 * we still need to send this page through because it might 1002 * be marked precious and the underlying filesystem may need 1003 * to do something with it (besides page it out)... 1004 */ 1005 base_offset = offset; 1006 size = PAGE_SIZE; 1007 } 1008 assert((upl_size_t) size == size); 1009 vnode_pageout(vnode_object->vnode_handle, 1010 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size, UPL_VNODE_PAGER, NULL); 1011 } 1012} 1013 1014 1015/* 1016 * 1017 */ 1018kern_return_t 1019vnode_pager_cluster_read( 1020 vnode_pager_t vnode_object, 1021 vm_object_offset_t base_offset, 1022 vm_object_offset_t offset, 1023 uint32_t io_streaming, 1024 vm_size_t cnt) 1025{ 1026 int local_error = 0; 1027 int kret; 1028 int flags = 0; 1029 1030 assert(! (cnt & PAGE_MASK)); 1031 1032 if (io_streaming) 1033 flags |= UPL_IOSTREAMING; 1034 1035 assert((upl_size_t) cnt == cnt); 1036 kret = vnode_pagein(vnode_object->vnode_handle, 1037 (upl_t) NULL, 1038 (upl_offset_t) (offset - base_offset), 1039 base_offset, 1040 (upl_size_t) cnt, 1041 flags, 1042 &local_error); 1043/* 1044 if(kret == PAGER_ABSENT) { 1045 Need to work out the defs here, 1 corresponds to PAGER_ABSENT 1046 defined in bsd/vm/vm_pager.h However, we should not be including 1047 that file here it is a layering violation. 1048*/ 1049 if (kret == 1) { 1050 int uplflags; 1051 upl_t upl = NULL; 1052 unsigned int count = 0; 1053 kern_return_t kr; 1054 1055 uplflags = (UPL_NO_SYNC | 1056 UPL_CLEAN_IN_PLACE | 1057 UPL_SET_INTERNAL); 1058 count = 0; 1059 assert((upl_size_t) cnt == cnt); 1060 kr = memory_object_upl_request(vnode_object->control_handle, 1061 base_offset, (upl_size_t) cnt, 1062 &upl, NULL, &count, uplflags); 1063 if (kr == KERN_SUCCESS) { 1064 upl_abort(upl, 0); 1065 upl_deallocate(upl); 1066 } else { 1067 /* 1068 * We couldn't gather the page list, probably 1069 * because the memory object doesn't have a link 1070 * to a VM object anymore (forced unmount, for 1071 * example). Just return an error to the vm_fault() 1072 * path and let it handle it. 1073 */ 1074 } 1075 1076 return KERN_FAILURE; 1077 } 1078 1079 return KERN_SUCCESS; 1080 1081} 1082 1083 1084/* 1085 * 1086 */ 1087void 1088vnode_pager_release_from_cache( 1089 int *cnt) 1090{ 1091 memory_object_free_from_cache( 1092 &realhost, &vnode_pager_ops, cnt); 1093} 1094 1095/* 1096 * 1097 */ 1098vnode_pager_t 1099vnode_object_create( 1100 struct vnode *vp) 1101{ 1102 register vnode_pager_t vnode_object; 1103 1104 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone); 1105 if (vnode_object == VNODE_PAGER_NULL) 1106 return(VNODE_PAGER_NULL); 1107 1108 /* 1109 * The vm_map call takes both named entry ports and raw memory 1110 * objects in the same parameter. We need to make sure that 1111 * vm_map does not see this object as a named entry port. So, 1112 * we reserve the first word in the object for a fake ip_kotype 1113 * setting - that will tell vm_map to use it as a memory object. 1114 */ 1115 vnode_object->pager_ops = &vnode_pager_ops; 1116 vnode_object->pager_ikot = IKOT_MEMORY_OBJECT; 1117 vnode_object->ref_count = 1; 1118 vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL; 1119 vnode_object->vnode_handle = vp; 1120 1121 return(vnode_object); 1122} 1123 1124/* 1125 * 1126 */ 1127vnode_pager_t 1128vnode_pager_lookup( 1129 memory_object_t name) 1130{ 1131 vnode_pager_t vnode_object; 1132 1133 vnode_object = (vnode_pager_t)name; 1134 assert(vnode_object->pager_ops == &vnode_pager_ops); 1135 return (vnode_object); 1136} 1137 1138 1139/*********************** proc_info implementation *************/ 1140 1141#include <sys/bsdtask_info.h> 1142 1143static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid); 1144 1145 1146int 1147fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid) 1148{ 1149 1150 vm_map_t map; 1151 vm_map_offset_t address = (vm_map_offset_t )arg; 1152 vm_map_entry_t tmp_entry; 1153 vm_map_entry_t entry; 1154 vm_map_offset_t start; 1155 vm_region_extended_info_data_t extended; 1156 vm_region_top_info_data_t top; 1157 1158 task_lock(task); 1159 map = task->map; 1160 if (map == VM_MAP_NULL) 1161 { 1162 task_unlock(task); 1163 return(0); 1164 } 1165 vm_map_reference(map); 1166 task_unlock(task); 1167 1168 vm_map_lock_read(map); 1169 1170 start = address; 1171 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 1172 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 1173 vm_map_unlock_read(map); 1174 vm_map_deallocate(map); 1175 return(0); 1176 } 1177 } else { 1178 entry = tmp_entry; 1179 } 1180 1181 start = entry->vme_start; 1182 1183 pinfo->pri_offset = entry->offset; 1184 pinfo->pri_protection = entry->protection; 1185 pinfo->pri_max_protection = entry->max_protection; 1186 pinfo->pri_inheritance = entry->inheritance; 1187 pinfo->pri_behavior = entry->behavior; 1188 pinfo->pri_user_wired_count = entry->user_wired_count; 1189 pinfo->pri_user_tag = entry->alias; 1190 1191 if (entry->is_sub_map) { 1192 pinfo->pri_flags |= PROC_REGION_SUBMAP; 1193 } else { 1194 if (entry->is_shared) 1195 pinfo->pri_flags |= PROC_REGION_SHARED; 1196 } 1197 1198 1199 extended.protection = entry->protection; 1200 extended.user_tag = entry->alias; 1201 extended.pages_resident = 0; 1202 extended.pages_swapped_out = 0; 1203 extended.pages_shared_now_private = 0; 1204 extended.pages_dirtied = 0; 1205 extended.external_pager = 0; 1206 extended.shadow_depth = 0; 1207 1208 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended); 1209 1210 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) 1211 extended.share_mode = SM_PRIVATE; 1212 1213 top.private_pages_resident = 0; 1214 top.shared_pages_resident = 0; 1215 vm_map_region_top_walk(entry, &top); 1216 1217 1218 pinfo->pri_pages_resident = extended.pages_resident; 1219 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private; 1220 pinfo->pri_pages_swapped_out = extended.pages_swapped_out; 1221 pinfo->pri_pages_dirtied = extended.pages_dirtied; 1222 pinfo->pri_ref_count = extended.ref_count; 1223 pinfo->pri_shadow_depth = extended.shadow_depth; 1224 pinfo->pri_share_mode = extended.share_mode; 1225 1226 pinfo->pri_private_pages_resident = top.private_pages_resident; 1227 pinfo->pri_shared_pages_resident = top.shared_pages_resident; 1228 pinfo->pri_obj_id = top.obj_id; 1229 1230 pinfo->pri_address = (uint64_t)start; 1231 pinfo->pri_size = (uint64_t)(entry->vme_end - start); 1232 pinfo->pri_depth = 0; 1233 1234 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) { 1235 *vnodeaddr = (uintptr_t)0; 1236 1237 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) { 1238 vm_map_unlock_read(map); 1239 vm_map_deallocate(map); 1240 return(1); 1241 } 1242 } 1243 1244 vm_map_unlock_read(map); 1245 vm_map_deallocate(map); 1246 return(1); 1247} 1248 1249static int 1250fill_vnodeinfoforaddr( 1251 vm_map_entry_t entry, 1252 uintptr_t * vnodeaddr, 1253 uint32_t * vid) 1254{ 1255 vm_object_t top_object, object; 1256 memory_object_t memory_object; 1257 memory_object_pager_ops_t pager_ops; 1258 kern_return_t kr; 1259 int shadow_depth; 1260 1261 1262 if (entry->is_sub_map) { 1263 return(0); 1264 } else { 1265 /* 1266 * The last object in the shadow chain has the 1267 * relevant pager information. 1268 */ 1269 top_object = entry->object.vm_object; 1270 if (top_object == VM_OBJECT_NULL) { 1271 object = VM_OBJECT_NULL; 1272 shadow_depth = 0; 1273 } else { 1274 vm_object_lock(top_object); 1275 for (object = top_object, shadow_depth = 0; 1276 object->shadow != VM_OBJECT_NULL; 1277 object = object->shadow, shadow_depth++) { 1278 vm_object_lock(object->shadow); 1279 vm_object_unlock(object); 1280 } 1281 } 1282 } 1283 1284 if (object == VM_OBJECT_NULL) { 1285 return(0); 1286 } else if (object->internal) { 1287 vm_object_unlock(object); 1288 return(0); 1289 } else if (! object->pager_ready || 1290 object->terminating || 1291 ! object->alive) { 1292 vm_object_unlock(object); 1293 return(0); 1294 } else { 1295 memory_object = object->pager; 1296 pager_ops = memory_object->mo_pager_ops; 1297 if (pager_ops == &vnode_pager_ops) { 1298 kr = vnode_pager_get_object_vnode( 1299 memory_object, 1300 vnodeaddr, vid); 1301 if (kr != KERN_SUCCESS) { 1302 vm_object_unlock(object); 1303 return(0); 1304 } 1305 } else { 1306 vm_object_unlock(object); 1307 return(0); 1308 } 1309 } 1310 vm_object_unlock(object); 1311 return(1); 1312} 1313 1314kern_return_t 1315vnode_pager_get_object_vnode ( 1316 memory_object_t mem_obj, 1317 uintptr_t * vnodeaddr, 1318 uint32_t * vid) 1319{ 1320 vnode_pager_t vnode_object; 1321 1322 vnode_object = vnode_pager_lookup(mem_obj); 1323 if (vnode_object->vnode_handle) { 1324 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle; 1325 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle); 1326 1327 return(KERN_SUCCESS); 1328 } 1329 1330 return(KERN_FAILURE); 1331} 1332 1333 1334/* 1335 * Find the underlying vnode object for the given vm_map_entry. If found, return with the 1336 * object locked, otherwise return NULL with nothing locked. 1337 */ 1338 1339vm_object_t 1340find_vnode_object( 1341 vm_map_entry_t entry 1342) 1343{ 1344 vm_object_t top_object, object; 1345 memory_object_t memory_object; 1346 memory_object_pager_ops_t pager_ops; 1347 1348 if (!entry->is_sub_map) { 1349 1350 /* 1351 * The last object in the shadow chain has the 1352 * relevant pager information. 1353 */ 1354 1355 top_object = entry->object.vm_object; 1356 1357 if (top_object) { 1358 vm_object_lock(top_object); 1359 1360 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) { 1361 vm_object_lock(object->shadow); 1362 vm_object_unlock(object); 1363 } 1364 1365 if (object && !object->internal && object->pager_ready && !object->terminating && 1366 object->alive) { 1367 memory_object = object->pager; 1368 pager_ops = memory_object->mo_pager_ops; 1369 1370 /* 1371 * If this object points to the vnode_pager_ops, then we found what we're 1372 * looking for. Otherwise, this vm_map_entry doesn't have an underlying 1373 * vnode and so we fall through to the bottom and return NULL. 1374 */ 1375 1376 if (pager_ops == &vnode_pager_ops) 1377 return object; /* we return with the object locked */ 1378 } 1379 1380 vm_object_unlock(object); 1381 } 1382 1383 } 1384 1385 return(VM_OBJECT_NULL); 1386} 1387