1/* 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <sys/errno.h> 30 31#include <mach/mach_types.h> 32#include <mach/mach_traps.h> 33#include <mach/host_priv.h> 34#include <mach/kern_return.h> 35#include <mach/memory_object_control.h> 36#include <mach/memory_object_types.h> 37#include <mach/port.h> 38#include <mach/policy.h> 39#include <mach/upl.h> 40#include <mach/thread_act.h> 41 42#include <kern/assert.h> 43#include <kern/host.h> 44#include <kern/thread.h> 45 46#include <ipc/ipc_port.h> 47#include <ipc/ipc_space.h> 48 49#include <default_pager/default_pager_types.h> 50#include <default_pager/default_pager_object_server.h> 51 52#include <vm/vm_map.h> 53#include <vm/vm_pageout.h> 54#include <vm/memory_object.h> 55#include <vm/vm_pageout.h> 56#include <vm/vm_protos.h> 57#include <vm/vm_purgeable_internal.h> 58 59 60/* BSD VM COMPONENT INTERFACES */ 61int 62get_map_nentries( 63 vm_map_t); 64 65vm_offset_t 66get_map_start( 67 vm_map_t); 68 69vm_offset_t 70get_map_end( 71 vm_map_t); 72 73/* 74 * 75 */ 76int 77get_map_nentries( 78 vm_map_t map) 79{ 80 return(map->hdr.nentries); 81} 82 83mach_vm_offset_t 84mach_get_vm_start(vm_map_t map) 85{ 86 return( vm_map_first_entry(map)->vme_start); 87} 88 89mach_vm_offset_t 90mach_get_vm_end(vm_map_t map) 91{ 92 return( vm_map_last_entry(map)->vme_end); 93} 94 95/* 96 * BSD VNODE PAGER 97 */ 98 99const struct memory_object_pager_ops vnode_pager_ops = { 100 vnode_pager_reference, 101 vnode_pager_deallocate, 102 vnode_pager_init, 103 vnode_pager_terminate, 104 vnode_pager_data_request, 105 vnode_pager_data_return, 106 vnode_pager_data_initialize, 107 vnode_pager_data_unlock, 108 vnode_pager_synchronize, 109 vnode_pager_map, 110 vnode_pager_last_unmap, 111 NULL, /* data_reclaim */ 112 "vnode pager" 113}; 114 115typedef struct vnode_pager { 116 struct ipc_object_header pager_header; /* fake ip_kotype() */ 117 memory_object_pager_ops_t pager_ops; /* == &vnode_pager_ops */ 118 unsigned int ref_count; /* reference count */ 119 memory_object_control_t control_handle; /* mem object control handle */ 120 struct vnode *vnode_handle; /* vnode handle */ 121} *vnode_pager_t; 122 123#define pager_ikot pager_header.io_bits 124 125ipc_port_t 126trigger_name_to_port( /* forward */ 127 mach_port_t); 128 129kern_return_t 130vnode_pager_cluster_read( /* forward */ 131 vnode_pager_t, 132 vm_object_offset_t, 133 vm_object_offset_t, 134 uint32_t, 135 vm_size_t); 136 137void 138vnode_pager_cluster_write( /* forward */ 139 vnode_pager_t, 140 vm_object_offset_t, 141 vm_size_t, 142 vm_object_offset_t *, 143 int *, 144 int); 145 146 147vnode_pager_t 148vnode_object_create( /* forward */ 149 struct vnode *); 150 151vnode_pager_t 152vnode_pager_lookup( /* forward */ 153 memory_object_t); 154 155zone_t vnode_pager_zone; 156 157 158#define VNODE_PAGER_NULL ((vnode_pager_t) 0) 159 160/* TODO: Should be set dynamically by vnode_pager_init() */ 161#define CLUSTER_SHIFT 1 162 163/* TODO: Should be set dynamically by vnode_pager_bootstrap() */ 164#define MAX_VNODE 10000 165 166 167#if DEBUG 168int pagerdebug=0; 169 170#define PAGER_ALL 0xffffffff 171#define PAGER_INIT 0x00000001 172#define PAGER_PAGEIN 0x00000002 173 174#define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}} 175#else 176#define PAGER_DEBUG(LEVEL, A) 177#endif 178 179extern int proc_resetpcontrol(int); 180 181#if DEVELOPMENT || DEBUG 182extern unsigned long vm_cs_validated_resets; 183#endif 184 185/* 186 * Routine: mach_macx_triggers 187 * Function: 188 * Syscall interface to set the call backs for low and 189 * high water marks. 190 */ 191int 192mach_macx_triggers( 193 struct macx_triggers_args *args) 194{ 195 int hi_water = args->hi_water; 196 int low_water = args->low_water; 197 int flags = args->flags; 198 mach_port_t trigger_name = args->alert_port; 199 kern_return_t kr; 200 memory_object_default_t default_pager; 201 ipc_port_t trigger_port; 202 203 default_pager = MEMORY_OBJECT_DEFAULT_NULL; 204 kr = host_default_memory_manager(host_priv_self(), 205 &default_pager, 0); 206 if(kr != KERN_SUCCESS) { 207 return EINVAL; 208 } 209 210 if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) || 211 ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) { 212 /* can't have it both ways */ 213 return EINVAL; 214 } 215 216 if (default_pager_init_flag == 0) { 217 start_def_pager(NULL); 218 default_pager_init_flag = 1; 219 } 220 221 if (flags & SWAP_ENCRYPT_ON) { 222 /* ENCRYPTED SWAP: tell default_pager to encrypt */ 223 default_pager_triggers(default_pager, 224 0, 0, 225 SWAP_ENCRYPT_ON, 226 IP_NULL); 227 } else if (flags & SWAP_ENCRYPT_OFF) { 228 /* ENCRYPTED SWAP: tell default_pager not to encrypt */ 229 default_pager_triggers(default_pager, 230 0, 0, 231 SWAP_ENCRYPT_OFF, 232 IP_NULL); 233 } 234 235 if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) { 236 /* 237 * Time to switch to the emergency segment. 238 */ 239 return default_pager_triggers(default_pager, 240 0, 0, 241 USE_EMERGENCY_SWAP_FILE_FIRST, 242 IP_NULL); 243 } 244 245 if (flags & SWAP_FILE_CREATION_ERROR) { 246 /* 247 * For some reason, the dynamic pager failed to create a swap file. 248 */ 249 trigger_port = trigger_name_to_port(trigger_name); 250 if(trigger_port == NULL) { 251 return EINVAL; 252 } 253 /* trigger_port is locked and active */ 254 ipc_port_make_send_locked(trigger_port); 255 ip_unlock(trigger_port); 256 default_pager_triggers(default_pager, 257 0, 0, 258 SWAP_FILE_CREATION_ERROR, 259 trigger_port); 260 } 261 262 if (flags & HI_WAT_ALERT) { 263 trigger_port = trigger_name_to_port(trigger_name); 264 if(trigger_port == NULL) { 265 return EINVAL; 266 } 267 /* trigger_port is locked and active */ 268 ipc_port_make_send_locked(trigger_port); 269 ip_unlock(trigger_port); 270 default_pager_triggers(default_pager, 271 hi_water, low_water, 272 HI_WAT_ALERT, trigger_port); 273 } 274 275 if (flags & LO_WAT_ALERT) { 276 trigger_port = trigger_name_to_port(trigger_name); 277 if(trigger_port == NULL) { 278 return EINVAL; 279 } 280 /* trigger_port is locked and active */ 281 ipc_port_make_send_locked(trigger_port); 282 ip_unlock(trigger_port); 283 default_pager_triggers(default_pager, 284 hi_water, low_water, 285 LO_WAT_ALERT, trigger_port); 286 } 287 288 289 if (flags & PROC_RESUME) { 290 291 /* 292 * For this call, hi_water is used to pass in the pid of the process we want to resume 293 * or unthrottle. This is of course restricted to the superuser (checked inside of 294 * proc_resetpcontrol). 295 */ 296 297 return proc_resetpcontrol(hi_water); 298 } 299 300 /* 301 * Set thread scheduling priority and policy for the current thread 302 * it is assumed for the time being that the thread setting the alert 303 * is the same one which will be servicing it. 304 * 305 * XXX This does not belong in the kernel XXX 306 */ 307 if (flags & HI_WAT_ALERT) { 308 thread_precedence_policy_data_t pre; 309 thread_extended_policy_data_t ext; 310 311 ext.timeshare = FALSE; 312 pre.importance = INT32_MAX; 313 314 thread_policy_set(current_thread(), 315 THREAD_EXTENDED_POLICY, 316 (thread_policy_t)&ext, 317 THREAD_EXTENDED_POLICY_COUNT); 318 319 thread_policy_set(current_thread(), 320 THREAD_PRECEDENCE_POLICY, 321 (thread_policy_t)&pre, 322 THREAD_PRECEDENCE_POLICY_COUNT); 323 324 current_thread()->options |= TH_OPT_VMPRIV; 325 } 326 327 if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) { 328 return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)); 329 } 330 331 return 0; 332} 333 334/* 335 * 336 */ 337ipc_port_t 338trigger_name_to_port( 339 mach_port_t trigger_name) 340{ 341 ipc_port_t trigger_port; 342 ipc_space_t space; 343 344 if (trigger_name == 0) 345 return (NULL); 346 347 space = current_space(); 348 if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name), 349 &trigger_port) != KERN_SUCCESS) 350 return (NULL); 351 return trigger_port; 352} 353 354 355extern int uiomove64(addr64_t, int, void *); 356#define MAX_RUN 32 357 358int 359memory_object_control_uiomove( 360 memory_object_control_t control, 361 memory_object_offset_t offset, 362 void * uio, 363 int start_offset, 364 int io_requested, 365 int mark_dirty, 366 int take_reference) 367{ 368 vm_object_t object; 369 vm_page_t dst_page; 370 int xsize; 371 int retval = 0; 372 int cur_run; 373 int cur_needed; 374 int i; 375 int orig_offset; 376 vm_page_t page_run[MAX_RUN]; 377 378 object = memory_object_control_to_vm_object(control); 379 if (object == VM_OBJECT_NULL) { 380 return (0); 381 } 382 assert(!object->internal); 383 384 vm_object_lock(object); 385 386 if (mark_dirty && object->copy != VM_OBJECT_NULL) { 387 /* 388 * We can't modify the pages without honoring 389 * copy-on-write obligations first, so fall off 390 * this optimized path and fall back to the regular 391 * path. 392 */ 393 vm_object_unlock(object); 394 return 0; 395 } 396 orig_offset = start_offset; 397 398 while (io_requested && retval == 0) { 399 400 cur_needed = (start_offset + io_requested + (PAGE_SIZE - 1)) / PAGE_SIZE; 401 402 if (cur_needed > MAX_RUN) 403 cur_needed = MAX_RUN; 404 405 for (cur_run = 0; cur_run < cur_needed; ) { 406 407 if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL) 408 break; 409 410 411 if (dst_page->busy || dst_page->cleaning) { 412 /* 413 * someone else is playing with the page... if we've 414 * already collected pages into this run, go ahead 415 * and process now, we can't block on this 416 * page while holding other pages in the BUSY state 417 * otherwise we will wait 418 */ 419 if (cur_run) 420 break; 421 PAGE_SLEEP(object, dst_page, THREAD_UNINT); 422 continue; 423 } 424 if (dst_page->laundry) { 425 dst_page->pageout = FALSE; 426 427 vm_pageout_steal_laundry(dst_page, FALSE); 428 } 429 /* 430 * this routine is only called when copying 431 * to/from real files... no need to consider 432 * encrypted swap pages 433 */ 434 assert(!dst_page->encrypted); 435 436 if (mark_dirty) { 437 SET_PAGE_DIRTY(dst_page, FALSE); 438 if (dst_page->cs_validated && 439 !dst_page->cs_tainted) { 440 /* 441 * CODE SIGNING: 442 * We're modifying a code-signed 443 * page: force revalidate 444 */ 445 dst_page->cs_validated = FALSE; 446#if DEVELOPMENT || DEBUG 447 vm_cs_validated_resets++; 448#endif 449 pmap_disconnect(dst_page->phys_page); 450 } 451 } 452 dst_page->busy = TRUE; 453 454 page_run[cur_run++] = dst_page; 455 456 offset += PAGE_SIZE_64; 457 } 458 if (cur_run == 0) 459 /* 460 * we hit a 'hole' in the cache or 461 * a page we don't want to try to handle, 462 * so bail at this point 463 * we'll unlock the object below 464 */ 465 break; 466 vm_object_unlock(object); 467 468 for (i = 0; i < cur_run; i++) { 469 470 dst_page = page_run[i]; 471 472 if ((xsize = PAGE_SIZE - start_offset) > io_requested) 473 xsize = io_requested; 474 475 if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << PAGE_SHIFT) + start_offset), xsize, uio)) ) 476 break; 477 478 io_requested -= xsize; 479 start_offset = 0; 480 } 481 vm_object_lock(object); 482 483 /* 484 * if we have more than 1 page to work on 485 * in the current run, or the original request 486 * started at offset 0 of the page, or we're 487 * processing multiple batches, we will move 488 * the pages to the tail of the inactive queue 489 * to implement an LRU for read/write accesses 490 * 491 * the check for orig_offset == 0 is there to 492 * mitigate the cost of small (< page_size) requests 493 * to the same page (this way we only move it once) 494 */ 495 if (take_reference && (cur_run > 1 || orig_offset == 0)) { 496 497 vm_page_lockspin_queues(); 498 499 for (i = 0; i < cur_run; i++) 500 vm_page_lru(page_run[i]); 501 502 vm_page_unlock_queues(); 503 } 504 for (i = 0; i < cur_run; i++) { 505 dst_page = page_run[i]; 506 507 /* 508 * someone is explicitly referencing this page... 509 * update clustered and speculative state 510 * 511 */ 512 if (dst_page->clustered) 513 VM_PAGE_CONSUME_CLUSTERED(dst_page); 514 515 PAGE_WAKEUP_DONE(dst_page); 516 } 517 orig_offset = 0; 518 } 519 vm_object_unlock(object); 520 521 return (retval); 522} 523 524 525/* 526 * 527 */ 528void 529vnode_pager_bootstrap(void) 530{ 531 register vm_size_t size; 532 533 size = (vm_size_t) sizeof(struct vnode_pager); 534 vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size, 535 PAGE_SIZE, "vnode pager structures"); 536 zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE); 537 zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE); 538 539 540#if CONFIG_CODE_DECRYPTION 541 apple_protect_pager_bootstrap(); 542#endif /* CONFIG_CODE_DECRYPTION */ 543 swapfile_pager_bootstrap(); 544 return; 545} 546 547/* 548 * 549 */ 550memory_object_t 551vnode_pager_setup( 552 struct vnode *vp, 553 __unused memory_object_t pager) 554{ 555 vnode_pager_t vnode_object; 556 557 vnode_object = vnode_object_create(vp); 558 if (vnode_object == VNODE_PAGER_NULL) 559 panic("vnode_pager_setup: vnode_object_create() failed"); 560 return((memory_object_t)vnode_object); 561} 562 563/* 564 * 565 */ 566kern_return_t 567vnode_pager_init(memory_object_t mem_obj, 568 memory_object_control_t control, 569#if !DEBUG 570 __unused 571#endif 572 memory_object_cluster_size_t pg_size) 573{ 574 vnode_pager_t vnode_object; 575 kern_return_t kr; 576 memory_object_attr_info_data_t attributes; 577 578 579 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size)); 580 581 if (control == MEMORY_OBJECT_CONTROL_NULL) 582 return KERN_INVALID_ARGUMENT; 583 584 vnode_object = vnode_pager_lookup(mem_obj); 585 586 memory_object_control_reference(control); 587 588 vnode_object->control_handle = control; 589 590 attributes.copy_strategy = MEMORY_OBJECT_COPY_DELAY; 591 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/ 592 attributes.cluster_size = (1 << (PAGE_SHIFT)); 593 attributes.may_cache_object = TRUE; 594 attributes.temporary = TRUE; 595 596 kr = memory_object_change_attributes( 597 control, 598 MEMORY_OBJECT_ATTRIBUTE_INFO, 599 (memory_object_info_t) &attributes, 600 MEMORY_OBJECT_ATTR_INFO_COUNT); 601 if (kr != KERN_SUCCESS) 602 panic("vnode_pager_init: memory_object_change_attributes() failed"); 603 604 return(KERN_SUCCESS); 605} 606 607/* 608 * 609 */ 610kern_return_t 611vnode_pager_data_return( 612 memory_object_t mem_obj, 613 memory_object_offset_t offset, 614 memory_object_cluster_size_t data_cnt, 615 memory_object_offset_t *resid_offset, 616 int *io_error, 617 __unused boolean_t dirty, 618 __unused boolean_t kernel_copy, 619 int upl_flags) 620{ 621 register vnode_pager_t vnode_object; 622 623 vnode_object = vnode_pager_lookup(mem_obj); 624 625 vnode_pager_cluster_write(vnode_object, offset, data_cnt, resid_offset, io_error, upl_flags); 626 627 return KERN_SUCCESS; 628} 629 630kern_return_t 631vnode_pager_data_initialize( 632 __unused memory_object_t mem_obj, 633 __unused memory_object_offset_t offset, 634 __unused memory_object_cluster_size_t data_cnt) 635{ 636 panic("vnode_pager_data_initialize"); 637 return KERN_FAILURE; 638} 639 640kern_return_t 641vnode_pager_data_unlock( 642 __unused memory_object_t mem_obj, 643 __unused memory_object_offset_t offset, 644 __unused memory_object_size_t size, 645 __unused vm_prot_t desired_access) 646{ 647 return KERN_FAILURE; 648} 649 650kern_return_t 651vnode_pager_get_isinuse( 652 memory_object_t mem_obj, 653 uint32_t *isinuse) 654{ 655 vnode_pager_t vnode_object; 656 657 if (mem_obj->mo_pager_ops != &vnode_pager_ops) { 658 *isinuse = 1; 659 return KERN_INVALID_ARGUMENT; 660 } 661 662 vnode_object = vnode_pager_lookup(mem_obj); 663 664 *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle); 665 return KERN_SUCCESS; 666} 667 668kern_return_t 669vnode_pager_get_throttle_io_limit( 670 memory_object_t mem_obj, 671 uint32_t *limit) 672{ 673 vnode_pager_t vnode_object; 674 675 if (mem_obj->mo_pager_ops != &vnode_pager_ops) 676 return KERN_INVALID_ARGUMENT; 677 678 vnode_object = vnode_pager_lookup(mem_obj); 679 680 (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit); 681 return KERN_SUCCESS; 682} 683 684kern_return_t 685vnode_pager_get_isSSD( 686 memory_object_t mem_obj, 687 boolean_t *isSSD) 688{ 689 vnode_pager_t vnode_object; 690 691 if (mem_obj->mo_pager_ops != &vnode_pager_ops) 692 return KERN_INVALID_ARGUMENT; 693 694 vnode_object = vnode_pager_lookup(mem_obj); 695 696 *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle); 697 return KERN_SUCCESS; 698} 699 700kern_return_t 701vnode_pager_get_object_size( 702 memory_object_t mem_obj, 703 memory_object_offset_t *length) 704{ 705 vnode_pager_t vnode_object; 706 707 if (mem_obj->mo_pager_ops != &vnode_pager_ops) { 708 *length = 0; 709 return KERN_INVALID_ARGUMENT; 710 } 711 712 vnode_object = vnode_pager_lookup(mem_obj); 713 714 *length = vnode_pager_get_filesize(vnode_object->vnode_handle); 715 return KERN_SUCCESS; 716} 717 718kern_return_t 719vnode_pager_get_object_name( 720 memory_object_t mem_obj, 721 char *pathname, 722 vm_size_t pathname_len, 723 char *filename, 724 vm_size_t filename_len, 725 boolean_t *truncated_path_p) 726{ 727 vnode_pager_t vnode_object; 728 729 if (mem_obj->mo_pager_ops != &vnode_pager_ops) { 730 return KERN_INVALID_ARGUMENT; 731 } 732 733 vnode_object = vnode_pager_lookup(mem_obj); 734 735 return vnode_pager_get_name(vnode_object->vnode_handle, 736 pathname, 737 pathname_len, 738 filename, 739 filename_len, 740 truncated_path_p); 741} 742 743kern_return_t 744vnode_pager_get_object_mtime( 745 memory_object_t mem_obj, 746 struct timespec *mtime, 747 struct timespec *cs_mtime) 748{ 749 vnode_pager_t vnode_object; 750 751 if (mem_obj->mo_pager_ops != &vnode_pager_ops) { 752 return KERN_INVALID_ARGUMENT; 753 } 754 755 vnode_object = vnode_pager_lookup(mem_obj); 756 757 return vnode_pager_get_mtime(vnode_object->vnode_handle, 758 mtime, 759 cs_mtime); 760} 761 762kern_return_t 763vnode_pager_get_object_cs_blobs( 764 memory_object_t mem_obj, 765 void **blobs) 766{ 767 vnode_pager_t vnode_object; 768 769 if (mem_obj == MEMORY_OBJECT_NULL || 770 mem_obj->mo_pager_ops != &vnode_pager_ops) { 771 return KERN_INVALID_ARGUMENT; 772 } 773 774 vnode_object = vnode_pager_lookup(mem_obj); 775 776 return vnode_pager_get_cs_blobs(vnode_object->vnode_handle, 777 blobs); 778} 779 780#if CHECK_CS_VALIDATION_BITMAP 781kern_return_t 782vnode_pager_cs_check_validation_bitmap( 783 memory_object_t mem_obj, 784 memory_object_offset_t offset, 785 int optype ) 786{ 787 vnode_pager_t vnode_object; 788 789 if (mem_obj == MEMORY_OBJECT_NULL || 790 mem_obj->mo_pager_ops != &vnode_pager_ops) { 791 return KERN_INVALID_ARGUMENT; 792 } 793 794 vnode_object = vnode_pager_lookup(mem_obj); 795 return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype ); 796} 797#endif /* CHECK_CS_VALIDATION_BITMAP */ 798 799/* 800 * 801 */ 802kern_return_t 803vnode_pager_data_request( 804 memory_object_t mem_obj, 805 memory_object_offset_t offset, 806 __unused memory_object_cluster_size_t length, 807 __unused vm_prot_t desired_access, 808 memory_object_fault_info_t fault_info) 809{ 810 vnode_pager_t vnode_object; 811 memory_object_offset_t base_offset; 812 vm_size_t size; 813 uint32_t io_streaming = 0; 814 815 vnode_object = vnode_pager_lookup(mem_obj); 816 817 size = MAX_UPL_TRANSFER_BYTES; 818 base_offset = offset; 819 820 if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS) 821 size = PAGE_SIZE; 822 823 assert(offset >= base_offset && 824 offset < base_offset + size); 825 826 return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size); 827} 828 829/* 830 * 831 */ 832void 833vnode_pager_reference( 834 memory_object_t mem_obj) 835{ 836 register vnode_pager_t vnode_object; 837 unsigned int new_ref_count; 838 839 vnode_object = vnode_pager_lookup(mem_obj); 840 new_ref_count = hw_atomic_add(&vnode_object->ref_count, 1); 841 assert(new_ref_count > 1); 842} 843 844/* 845 * 846 */ 847void 848vnode_pager_deallocate( 849 memory_object_t mem_obj) 850{ 851 register vnode_pager_t vnode_object; 852 853 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj)); 854 855 vnode_object = vnode_pager_lookup(mem_obj); 856 857 if (hw_atomic_sub(&vnode_object->ref_count, 1) == 0) { 858 if (vnode_object->vnode_handle != NULL) { 859 vnode_pager_vrele(vnode_object->vnode_handle); 860 } 861 zfree(vnode_pager_zone, vnode_object); 862 } 863 return; 864} 865 866/* 867 * 868 */ 869kern_return_t 870vnode_pager_terminate( 871#if !DEBUG 872 __unused 873#endif 874 memory_object_t mem_obj) 875{ 876 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj)); 877 878 return(KERN_SUCCESS); 879} 880 881/* 882 * 883 */ 884kern_return_t 885vnode_pager_synchronize( 886 memory_object_t mem_obj, 887 memory_object_offset_t offset, 888 memory_object_size_t length, 889 __unused vm_sync_t sync_flags) 890{ 891 register vnode_pager_t vnode_object; 892 893 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj)); 894 895 vnode_object = vnode_pager_lookup(mem_obj); 896 897 memory_object_synchronize_completed(vnode_object->control_handle, offset, length); 898 899 return (KERN_SUCCESS); 900} 901 902/* 903 * 904 */ 905kern_return_t 906vnode_pager_map( 907 memory_object_t mem_obj, 908 vm_prot_t prot) 909{ 910 vnode_pager_t vnode_object; 911 int ret; 912 kern_return_t kr; 913 914 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot)); 915 916 vnode_object = vnode_pager_lookup(mem_obj); 917 918 ret = ubc_map(vnode_object->vnode_handle, prot); 919 920 if (ret != 0) { 921 kr = KERN_FAILURE; 922 } else { 923 kr = KERN_SUCCESS; 924 } 925 926 return kr; 927} 928 929kern_return_t 930vnode_pager_last_unmap( 931 memory_object_t mem_obj) 932{ 933 register vnode_pager_t vnode_object; 934 935 PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj)); 936 937 vnode_object = vnode_pager_lookup(mem_obj); 938 939 ubc_unmap(vnode_object->vnode_handle); 940 return KERN_SUCCESS; 941} 942 943 944 945/* 946 * 947 */ 948void 949vnode_pager_cluster_write( 950 vnode_pager_t vnode_object, 951 vm_object_offset_t offset, 952 vm_size_t cnt, 953 vm_object_offset_t * resid_offset, 954 int * io_error, 955 int upl_flags) 956{ 957 vm_size_t size; 958 int errno; 959 960 if (upl_flags & UPL_MSYNC) { 961 962 upl_flags |= UPL_VNODE_PAGER; 963 964 if ( (upl_flags & UPL_IOSYNC) && io_error) 965 upl_flags |= UPL_KEEPCACHED; 966 967 while (cnt) { 968 size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */ 969 970 assert((upl_size_t) size == size); 971 vnode_pageout(vnode_object->vnode_handle, 972 NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno); 973 974 if ( (upl_flags & UPL_KEEPCACHED) ) { 975 if ( (*io_error = errno) ) 976 break; 977 } 978 cnt -= size; 979 offset += size; 980 } 981 if (resid_offset) 982 *resid_offset = offset; 983 984 } else { 985 vm_object_offset_t vnode_size; 986 vm_object_offset_t base_offset; 987 988 /* 989 * this is the pageout path 990 */ 991 vnode_size = vnode_pager_get_filesize(vnode_object->vnode_handle); 992 993 if (vnode_size > (offset + PAGE_SIZE)) { 994 /* 995 * preset the maximum size of the cluster 996 * and put us on a nice cluster boundary... 997 * and then clip the size to insure we 998 * don't request past the end of the underlying file 999 */ 1000 size = MAX_UPL_TRANSFER_BYTES; 1001 base_offset = offset & ~((signed)(size - 1)); 1002 1003 if ((base_offset + size) > vnode_size) 1004 size = round_page(((vm_size_t)(vnode_size - base_offset))); 1005 } else { 1006 /* 1007 * we've been requested to page out a page beyond the current 1008 * end of the 'file'... don't try to cluster in this case... 1009 * we still need to send this page through because it might 1010 * be marked precious and the underlying filesystem may need 1011 * to do something with it (besides page it out)... 1012 */ 1013 base_offset = offset; 1014 size = PAGE_SIZE; 1015 } 1016 assert((upl_size_t) size == size); 1017 vnode_pageout(vnode_object->vnode_handle, 1018 NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size, 1019 (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL); 1020 } 1021} 1022 1023 1024/* 1025 * 1026 */ 1027kern_return_t 1028vnode_pager_cluster_read( 1029 vnode_pager_t vnode_object, 1030 vm_object_offset_t base_offset, 1031 vm_object_offset_t offset, 1032 uint32_t io_streaming, 1033 vm_size_t cnt) 1034{ 1035 int local_error = 0; 1036 int kret; 1037 int flags = 0; 1038 1039 assert(! (cnt & PAGE_MASK)); 1040 1041 if (io_streaming) 1042 flags |= UPL_IOSTREAMING; 1043 1044 assert((upl_size_t) cnt == cnt); 1045 kret = vnode_pagein(vnode_object->vnode_handle, 1046 (upl_t) NULL, 1047 (upl_offset_t) (offset - base_offset), 1048 base_offset, 1049 (upl_size_t) cnt, 1050 flags, 1051 &local_error); 1052/* 1053 if(kret == PAGER_ABSENT) { 1054 Need to work out the defs here, 1 corresponds to PAGER_ABSENT 1055 defined in bsd/vm/vm_pager.h However, we should not be including 1056 that file here it is a layering violation. 1057*/ 1058 if (kret == 1) { 1059 int uplflags; 1060 upl_t upl = NULL; 1061 unsigned int count = 0; 1062 kern_return_t kr; 1063 1064 uplflags = (UPL_NO_SYNC | 1065 UPL_CLEAN_IN_PLACE | 1066 UPL_SET_INTERNAL); 1067 count = 0; 1068 assert((upl_size_t) cnt == cnt); 1069 kr = memory_object_upl_request(vnode_object->control_handle, 1070 base_offset, (upl_size_t) cnt, 1071 &upl, NULL, &count, uplflags); 1072 if (kr == KERN_SUCCESS) { 1073 upl_abort(upl, 0); 1074 upl_deallocate(upl); 1075 } else { 1076 /* 1077 * We couldn't gather the page list, probably 1078 * because the memory object doesn't have a link 1079 * to a VM object anymore (forced unmount, for 1080 * example). Just return an error to the vm_fault() 1081 * path and let it handle it. 1082 */ 1083 } 1084 1085 return KERN_FAILURE; 1086 } 1087 1088 return KERN_SUCCESS; 1089 1090} 1091 1092 1093/* 1094 * 1095 */ 1096void 1097vnode_pager_release_from_cache( 1098 int *cnt) 1099{ 1100 memory_object_free_from_cache( 1101 &realhost, &vnode_pager_ops, cnt); 1102} 1103 1104/* 1105 * 1106 */ 1107vnode_pager_t 1108vnode_object_create( 1109 struct vnode *vp) 1110{ 1111 register vnode_pager_t vnode_object; 1112 1113 vnode_object = (struct vnode_pager *) zalloc(vnode_pager_zone); 1114 if (vnode_object == VNODE_PAGER_NULL) 1115 return(VNODE_PAGER_NULL); 1116 1117 /* 1118 * The vm_map call takes both named entry ports and raw memory 1119 * objects in the same parameter. We need to make sure that 1120 * vm_map does not see this object as a named entry port. So, 1121 * we reserve the first word in the object for a fake ip_kotype 1122 * setting - that will tell vm_map to use it as a memory object. 1123 */ 1124 vnode_object->pager_ops = &vnode_pager_ops; 1125 vnode_object->pager_ikot = IKOT_MEMORY_OBJECT; 1126 vnode_object->ref_count = 1; 1127 vnode_object->control_handle = MEMORY_OBJECT_CONTROL_NULL; 1128 vnode_object->vnode_handle = vp; 1129 1130 return(vnode_object); 1131} 1132 1133/* 1134 * 1135 */ 1136vnode_pager_t 1137vnode_pager_lookup( 1138 memory_object_t name) 1139{ 1140 vnode_pager_t vnode_object; 1141 1142 vnode_object = (vnode_pager_t)name; 1143 assert(vnode_object->pager_ops == &vnode_pager_ops); 1144 return (vnode_object); 1145} 1146 1147 1148/*********************** proc_info implementation *************/ 1149 1150#include <sys/bsdtask_info.h> 1151 1152static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid); 1153 1154 1155int 1156fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid) 1157{ 1158 1159 vm_map_t map; 1160 vm_map_offset_t address = (vm_map_offset_t )arg; 1161 vm_map_entry_t tmp_entry; 1162 vm_map_entry_t entry; 1163 vm_map_offset_t start; 1164 vm_region_extended_info_data_t extended; 1165 vm_region_top_info_data_t top; 1166 1167 task_lock(task); 1168 map = task->map; 1169 if (map == VM_MAP_NULL) 1170 { 1171 task_unlock(task); 1172 return(0); 1173 } 1174 vm_map_reference(map); 1175 task_unlock(task); 1176 1177 vm_map_lock_read(map); 1178 1179 start = address; 1180 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 1181 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 1182 vm_map_unlock_read(map); 1183 vm_map_deallocate(map); 1184 return(0); 1185 } 1186 } else { 1187 entry = tmp_entry; 1188 } 1189 1190 start = entry->vme_start; 1191 1192 pinfo->pri_offset = entry->offset; 1193 pinfo->pri_protection = entry->protection; 1194 pinfo->pri_max_protection = entry->max_protection; 1195 pinfo->pri_inheritance = entry->inheritance; 1196 pinfo->pri_behavior = entry->behavior; 1197 pinfo->pri_user_wired_count = entry->user_wired_count; 1198 pinfo->pri_user_tag = entry->alias; 1199 1200 if (entry->is_sub_map) { 1201 pinfo->pri_flags |= PROC_REGION_SUBMAP; 1202 } else { 1203 if (entry->is_shared) 1204 pinfo->pri_flags |= PROC_REGION_SHARED; 1205 } 1206 1207 1208 extended.protection = entry->protection; 1209 extended.user_tag = entry->alias; 1210 extended.pages_resident = 0; 1211 extended.pages_swapped_out = 0; 1212 extended.pages_shared_now_private = 0; 1213 extended.pages_dirtied = 0; 1214 extended.external_pager = 0; 1215 extended.shadow_depth = 0; 1216 1217 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended); 1218 1219 if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED) 1220 extended.share_mode = SM_PRIVATE; 1221 1222 top.private_pages_resident = 0; 1223 top.shared_pages_resident = 0; 1224 vm_map_region_top_walk(entry, &top); 1225 1226 1227 pinfo->pri_pages_resident = extended.pages_resident; 1228 pinfo->pri_pages_shared_now_private = extended.pages_shared_now_private; 1229 pinfo->pri_pages_swapped_out = extended.pages_swapped_out; 1230 pinfo->pri_pages_dirtied = extended.pages_dirtied; 1231 pinfo->pri_ref_count = extended.ref_count; 1232 pinfo->pri_shadow_depth = extended.shadow_depth; 1233 pinfo->pri_share_mode = extended.share_mode; 1234 1235 pinfo->pri_private_pages_resident = top.private_pages_resident; 1236 pinfo->pri_shared_pages_resident = top.shared_pages_resident; 1237 pinfo->pri_obj_id = top.obj_id; 1238 1239 pinfo->pri_address = (uint64_t)start; 1240 pinfo->pri_size = (uint64_t)(entry->vme_end - start); 1241 pinfo->pri_depth = 0; 1242 1243 if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) { 1244 *vnodeaddr = (uintptr_t)0; 1245 1246 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) { 1247 vm_map_unlock_read(map); 1248 vm_map_deallocate(map); 1249 return(1); 1250 } 1251 } 1252 1253 vm_map_unlock_read(map); 1254 vm_map_deallocate(map); 1255 return(1); 1256} 1257 1258int 1259fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t *vid) 1260{ 1261 1262 vm_map_t map; 1263 vm_map_offset_t address = (vm_map_offset_t )arg; 1264 vm_map_entry_t tmp_entry; 1265 vm_map_entry_t entry; 1266 1267 task_lock(task); 1268 map = task->map; 1269 if (map == VM_MAP_NULL) 1270 { 1271 task_unlock(task); 1272 return(0); 1273 } 1274 vm_map_reference(map); 1275 task_unlock(task); 1276 1277 vm_map_lock_read(map); 1278 1279 if (!vm_map_lookup_entry(map, address, &tmp_entry)) { 1280 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 1281 vm_map_unlock_read(map); 1282 vm_map_deallocate(map); 1283 return(0); 1284 } 1285 } else { 1286 entry = tmp_entry; 1287 } 1288 1289 while ((entry != vm_map_to_entry(map))) { 1290 *vnodeaddr = 0; 1291 *vid = 0; 1292 1293 if (entry->is_sub_map == 0) { 1294 if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) { 1295 1296 pinfo->pri_offset = entry->offset; 1297 pinfo->pri_protection = entry->protection; 1298 pinfo->pri_max_protection = entry->max_protection; 1299 pinfo->pri_inheritance = entry->inheritance; 1300 pinfo->pri_behavior = entry->behavior; 1301 pinfo->pri_user_wired_count = entry->user_wired_count; 1302 pinfo->pri_user_tag = entry->alias; 1303 1304 if (entry->is_shared) 1305 pinfo->pri_flags |= PROC_REGION_SHARED; 1306 1307 pinfo->pri_pages_resident = 0; 1308 pinfo->pri_pages_shared_now_private = 0; 1309 pinfo->pri_pages_swapped_out = 0; 1310 pinfo->pri_pages_dirtied = 0; 1311 pinfo->pri_ref_count = 0; 1312 pinfo->pri_shadow_depth = 0; 1313 pinfo->pri_share_mode = 0; 1314 1315 pinfo->pri_private_pages_resident = 0; 1316 pinfo->pri_shared_pages_resident = 0; 1317 pinfo->pri_obj_id = 0; 1318 1319 pinfo->pri_address = (uint64_t)entry->vme_start; 1320 pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start); 1321 pinfo->pri_depth = 0; 1322 1323 vm_map_unlock_read(map); 1324 vm_map_deallocate(map); 1325 return(1); 1326 } 1327 } 1328 1329 /* Keep searching for a vnode-backed mapping */ 1330 entry = entry->vme_next; 1331 } 1332 1333 vm_map_unlock_read(map); 1334 vm_map_deallocate(map); 1335 return(0); 1336} 1337 1338static int 1339fill_vnodeinfoforaddr( 1340 vm_map_entry_t entry, 1341 uintptr_t * vnodeaddr, 1342 uint32_t * vid) 1343{ 1344 vm_object_t top_object, object; 1345 memory_object_t memory_object; 1346 memory_object_pager_ops_t pager_ops; 1347 kern_return_t kr; 1348 int shadow_depth; 1349 1350 1351 if (entry->is_sub_map) { 1352 return(0); 1353 } else { 1354 /* 1355 * The last object in the shadow chain has the 1356 * relevant pager information. 1357 */ 1358 top_object = entry->object.vm_object; 1359 if (top_object == VM_OBJECT_NULL) { 1360 object = VM_OBJECT_NULL; 1361 shadow_depth = 0; 1362 } else { 1363 vm_object_lock(top_object); 1364 for (object = top_object, shadow_depth = 0; 1365 object->shadow != VM_OBJECT_NULL; 1366 object = object->shadow, shadow_depth++) { 1367 vm_object_lock(object->shadow); 1368 vm_object_unlock(object); 1369 } 1370 } 1371 } 1372 1373 if (object == VM_OBJECT_NULL) { 1374 return(0); 1375 } else if (object->internal) { 1376 vm_object_unlock(object); 1377 return(0); 1378 } else if (! object->pager_ready || 1379 object->terminating || 1380 ! object->alive) { 1381 vm_object_unlock(object); 1382 return(0); 1383 } else { 1384 memory_object = object->pager; 1385 pager_ops = memory_object->mo_pager_ops; 1386 if (pager_ops == &vnode_pager_ops) { 1387 kr = vnode_pager_get_object_vnode( 1388 memory_object, 1389 vnodeaddr, vid); 1390 if (kr != KERN_SUCCESS) { 1391 vm_object_unlock(object); 1392 return(0); 1393 } 1394 } else { 1395 vm_object_unlock(object); 1396 return(0); 1397 } 1398 } 1399 vm_object_unlock(object); 1400 return(1); 1401} 1402 1403kern_return_t 1404vnode_pager_get_object_vnode ( 1405 memory_object_t mem_obj, 1406 uintptr_t * vnodeaddr, 1407 uint32_t * vid) 1408{ 1409 vnode_pager_t vnode_object; 1410 1411 vnode_object = vnode_pager_lookup(mem_obj); 1412 if (vnode_object->vnode_handle) { 1413 *vnodeaddr = (uintptr_t)vnode_object->vnode_handle; 1414 *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle); 1415 1416 return(KERN_SUCCESS); 1417 } 1418 1419 return(KERN_FAILURE); 1420} 1421 1422#if CONFIG_IOSCHED 1423kern_return_t 1424vnode_pager_get_object_devvp( 1425 memory_object_t mem_obj, 1426 uintptr_t *devvp) 1427{ 1428 struct vnode *vp; 1429 uint32_t vid; 1430 1431 if(vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS) 1432 return (KERN_FAILURE); 1433 *devvp = (uintptr_t)vnode_mountdevvp(vp); 1434 if (*devvp) 1435 return (KERN_SUCCESS); 1436 return (KERN_FAILURE); 1437} 1438#endif 1439 1440/* 1441 * Find the underlying vnode object for the given vm_map_entry. If found, return with the 1442 * object locked, otherwise return NULL with nothing locked. 1443 */ 1444 1445vm_object_t 1446find_vnode_object( 1447 vm_map_entry_t entry 1448) 1449{ 1450 vm_object_t top_object, object; 1451 memory_object_t memory_object; 1452 memory_object_pager_ops_t pager_ops; 1453 1454 if (!entry->is_sub_map) { 1455 1456 /* 1457 * The last object in the shadow chain has the 1458 * relevant pager information. 1459 */ 1460 1461 top_object = entry->object.vm_object; 1462 1463 if (top_object) { 1464 vm_object_lock(top_object); 1465 1466 for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) { 1467 vm_object_lock(object->shadow); 1468 vm_object_unlock(object); 1469 } 1470 1471 if (object && !object->internal && object->pager_ready && !object->terminating && 1472 object->alive) { 1473 memory_object = object->pager; 1474 pager_ops = memory_object->mo_pager_ops; 1475 1476 /* 1477 * If this object points to the vnode_pager_ops, then we found what we're 1478 * looking for. Otherwise, this vm_map_entry doesn't have an underlying 1479 * vnode and so we fall through to the bottom and return NULL. 1480 */ 1481 1482 if (pager_ops == &vnode_pager_ops) 1483 return object; /* we return with the object locked */ 1484 } 1485 1486 vm_object_unlock(object); 1487 } 1488 1489 } 1490 1491 return(VM_OBJECT_NULL); 1492} 1493