1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_map.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * Date: 1985 62 * 63 * Virtual memory mapping module. 64 */ 65 66#include <task_swapper.h> 67#include <mach_assert.h> 68 69#include <vm/vm_options.h> 70 71#include <libkern/OSAtomic.h> 72 73#include <mach/kern_return.h> 74#include <mach/port.h> 75#include <mach/vm_attributes.h> 76#include <mach/vm_param.h> 77#include <mach/vm_behavior.h> 78#include <mach/vm_statistics.h> 79#include <mach/memory_object.h> 80#include <mach/mach_vm.h> 81#include <machine/cpu_capabilities.h> 82#include <mach/sdt.h> 83 84#include <kern/assert.h> 85#include <kern/counters.h> 86#include <kern/kalloc.h> 87#include <kern/zalloc.h> 88 89#include <vm/cpm.h> 90#include <vm/vm_compressor_pager.h> 91#include <vm/vm_init.h> 92#include <vm/vm_fault.h> 93#include <vm/vm_map.h> 94#include <vm/vm_object.h> 95#include <vm/vm_page.h> 96#include <vm/vm_pageout.h> 97#include <vm/vm_kern.h> 98#include <ipc/ipc_port.h> 99#include <kern/sched_prim.h> 100#include <kern/misc_protos.h> 101#include <kern/xpr.h> 102 103#include <mach/vm_map_server.h> 104#include <mach/mach_host_server.h> 105#include <vm/vm_protos.h> 106#include <vm/vm_purgeable_internal.h> 107 108#include <vm/vm_protos.h> 109#include <vm/vm_shared_region.h> 110#include <vm/vm_map_store.h> 111 112extern u_int32_t random(void); /* from <libkern/libkern.h> */ 113/* Internal prototypes 114 */ 115 116static void vm_map_simplify_range( 117 vm_map_t map, 118 vm_map_offset_t start, 119 vm_map_offset_t end); /* forward */ 120 121static boolean_t vm_map_range_check( 122 vm_map_t map, 123 vm_map_offset_t start, 124 vm_map_offset_t end, 125 vm_map_entry_t *entry); 126 127static vm_map_entry_t _vm_map_entry_create( 128 struct vm_map_header *map_header, boolean_t map_locked); 129 130static void _vm_map_entry_dispose( 131 struct vm_map_header *map_header, 132 vm_map_entry_t entry); 133 134static void vm_map_pmap_enter( 135 vm_map_t map, 136 vm_map_offset_t addr, 137 vm_map_offset_t end_addr, 138 vm_object_t object, 139 vm_object_offset_t offset, 140 vm_prot_t protection); 141 142static void _vm_map_clip_end( 143 struct vm_map_header *map_header, 144 vm_map_entry_t entry, 145 vm_map_offset_t end); 146 147static void _vm_map_clip_start( 148 struct vm_map_header *map_header, 149 vm_map_entry_t entry, 150 vm_map_offset_t start); 151 152static void vm_map_entry_delete( 153 vm_map_t map, 154 vm_map_entry_t entry); 155 156static kern_return_t vm_map_delete( 157 vm_map_t map, 158 vm_map_offset_t start, 159 vm_map_offset_t end, 160 int flags, 161 vm_map_t zap_map); 162 163static kern_return_t vm_map_copy_overwrite_unaligned( 164 vm_map_t dst_map, 165 vm_map_entry_t entry, 166 vm_map_copy_t copy, 167 vm_map_address_t start, 168 boolean_t discard_on_success); 169 170static kern_return_t vm_map_copy_overwrite_aligned( 171 vm_map_t dst_map, 172 vm_map_entry_t tmp_entry, 173 vm_map_copy_t copy, 174 vm_map_offset_t start, 175 pmap_t pmap); 176 177static kern_return_t vm_map_copyin_kernel_buffer( 178 vm_map_t src_map, 179 vm_map_address_t src_addr, 180 vm_map_size_t len, 181 boolean_t src_destroy, 182 vm_map_copy_t *copy_result); /* OUT */ 183 184static kern_return_t vm_map_copyout_kernel_buffer( 185 vm_map_t map, 186 vm_map_address_t *addr, /* IN/OUT */ 187 vm_map_copy_t copy, 188 boolean_t overwrite, 189 boolean_t consume_on_success); 190 191static void vm_map_fork_share( 192 vm_map_t old_map, 193 vm_map_entry_t old_entry, 194 vm_map_t new_map); 195 196static boolean_t vm_map_fork_copy( 197 vm_map_t old_map, 198 vm_map_entry_t *old_entry_p, 199 vm_map_t new_map); 200 201void vm_map_region_top_walk( 202 vm_map_entry_t entry, 203 vm_region_top_info_t top); 204 205void vm_map_region_walk( 206 vm_map_t map, 207 vm_map_offset_t va, 208 vm_map_entry_t entry, 209 vm_object_offset_t offset, 210 vm_object_size_t range, 211 vm_region_extended_info_t extended, 212 boolean_t look_for_pages, 213 mach_msg_type_number_t count); 214 215static kern_return_t vm_map_wire_nested( 216 vm_map_t map, 217 vm_map_offset_t start, 218 vm_map_offset_t end, 219 vm_prot_t access_type, 220 boolean_t user_wire, 221 pmap_t map_pmap, 222 vm_map_offset_t pmap_addr, 223 ppnum_t *physpage_p); 224 225static kern_return_t vm_map_unwire_nested( 226 vm_map_t map, 227 vm_map_offset_t start, 228 vm_map_offset_t end, 229 boolean_t user_wire, 230 pmap_t map_pmap, 231 vm_map_offset_t pmap_addr); 232 233static kern_return_t vm_map_overwrite_submap_recurse( 234 vm_map_t dst_map, 235 vm_map_offset_t dst_addr, 236 vm_map_size_t dst_size); 237 238static kern_return_t vm_map_copy_overwrite_nested( 239 vm_map_t dst_map, 240 vm_map_offset_t dst_addr, 241 vm_map_copy_t copy, 242 boolean_t interruptible, 243 pmap_t pmap, 244 boolean_t discard_on_success); 245 246static kern_return_t vm_map_remap_extract( 247 vm_map_t map, 248 vm_map_offset_t addr, 249 vm_map_size_t size, 250 boolean_t copy, 251 struct vm_map_header *map_header, 252 vm_prot_t *cur_protection, 253 vm_prot_t *max_protection, 254 vm_inherit_t inheritance, 255 boolean_t pageable); 256 257static kern_return_t vm_map_remap_range_allocate( 258 vm_map_t map, 259 vm_map_address_t *address, 260 vm_map_size_t size, 261 vm_map_offset_t mask, 262 int flags, 263 vm_map_entry_t *map_entry); 264 265static void vm_map_region_look_for_page( 266 vm_map_t map, 267 vm_map_offset_t va, 268 vm_object_t object, 269 vm_object_offset_t offset, 270 int max_refcnt, 271 int depth, 272 vm_region_extended_info_t extended, 273 mach_msg_type_number_t count); 274 275static int vm_map_region_count_obj_refs( 276 vm_map_entry_t entry, 277 vm_object_t object); 278 279 280static kern_return_t vm_map_willneed( 281 vm_map_t map, 282 vm_map_offset_t start, 283 vm_map_offset_t end); 284 285static kern_return_t vm_map_reuse_pages( 286 vm_map_t map, 287 vm_map_offset_t start, 288 vm_map_offset_t end); 289 290static kern_return_t vm_map_reusable_pages( 291 vm_map_t map, 292 vm_map_offset_t start, 293 vm_map_offset_t end); 294 295static kern_return_t vm_map_can_reuse( 296 vm_map_t map, 297 vm_map_offset_t start, 298 vm_map_offset_t end); 299 300 301/* 302 * Macros to copy a vm_map_entry. We must be careful to correctly 303 * manage the wired page count. vm_map_entry_copy() creates a new 304 * map entry to the same memory - the wired count in the new entry 305 * must be set to zero. vm_map_entry_copy_full() creates a new 306 * entry that is identical to the old entry. This preserves the 307 * wire count; it's used for map splitting and zone changing in 308 * vm_map_copyout. 309 */ 310 311#define vm_map_entry_copy(NEW,OLD) \ 312MACRO_BEGIN \ 313boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \ 314 *(NEW) = *(OLD); \ 315 (NEW)->is_shared = FALSE; \ 316 (NEW)->needs_wakeup = FALSE; \ 317 (NEW)->in_transition = FALSE; \ 318 (NEW)->wired_count = 0; \ 319 (NEW)->user_wired_count = 0; \ 320 (NEW)->permanent = FALSE; \ 321 (NEW)->used_for_jit = FALSE; \ 322 (NEW)->from_reserved_zone = _vmec_reserved; \ 323 (NEW)->iokit_acct = FALSE; \ 324MACRO_END 325 326#define vm_map_entry_copy_full(NEW,OLD) \ 327MACRO_BEGIN \ 328boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \ 329(*(NEW) = *(OLD)); \ 330(NEW)->from_reserved_zone = _vmecf_reserved; \ 331MACRO_END 332 333/* 334 * Decide if we want to allow processes to execute from their data or stack areas. 335 * override_nx() returns true if we do. Data/stack execution can be enabled independently 336 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec 337 * or allow_stack_exec to enable data execution for that type of data area for that particular 338 * ABI (or both by or'ing the flags together). These are initialized in the architecture 339 * specific pmap files since the default behavior varies according to architecture. The 340 * main reason it varies is because of the need to provide binary compatibility with old 341 * applications that were written before these restrictions came into being. In the old 342 * days, an app could execute anything it could read, but this has slowly been tightened 343 * up over time. The default behavior is: 344 * 345 * 32-bit PPC apps may execute from both stack and data areas 346 * 32-bit Intel apps may exeucte from data areas but not stack 347 * 64-bit PPC/Intel apps may not execute from either data or stack 348 * 349 * An application on any architecture may override these defaults by explicitly 350 * adding PROT_EXEC permission to the page in question with the mprotect(2) 351 * system call. This code here just determines what happens when an app tries to 352 * execute from a page that lacks execute permission. 353 * 354 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the 355 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore, 356 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow 357 * execution from data areas for a particular binary even if the arch normally permits it. As 358 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit 359 * to support some complicated use cases, notably browsers with out-of-process plugins that 360 * are not all NX-safe. 361 */ 362 363extern int allow_data_exec, allow_stack_exec; 364 365int 366override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */ 367{ 368 int current_abi; 369 370 /* 371 * Determine if the app is running in 32 or 64 bit mode. 372 */ 373 374 if (vm_map_is_64bit(map)) 375 current_abi = VM_ABI_64; 376 else 377 current_abi = VM_ABI_32; 378 379 /* 380 * Determine if we should allow the execution based on whether it's a 381 * stack or data area and the current architecture. 382 */ 383 384 if (user_tag == VM_MEMORY_STACK) 385 return allow_stack_exec & current_abi; 386 387 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE); 388} 389 390 391/* 392 * Virtual memory maps provide for the mapping, protection, 393 * and sharing of virtual memory objects. In addition, 394 * this module provides for an efficient virtual copy of 395 * memory from one map to another. 396 * 397 * Synchronization is required prior to most operations. 398 * 399 * Maps consist of an ordered doubly-linked list of simple 400 * entries; a single hint is used to speed up lookups. 401 * 402 * Sharing maps have been deleted from this version of Mach. 403 * All shared objects are now mapped directly into the respective 404 * maps. This requires a change in the copy on write strategy; 405 * the asymmetric (delayed) strategy is used for shared temporary 406 * objects instead of the symmetric (shadow) strategy. All maps 407 * are now "top level" maps (either task map, kernel map or submap 408 * of the kernel map). 409 * 410 * Since portions of maps are specified by start/end addreses, 411 * which may not align with existing map entries, all 412 * routines merely "clip" entries to these start/end values. 413 * [That is, an entry is split into two, bordering at a 414 * start or end value.] Note that these clippings may not 415 * always be necessary (as the two resulting entries are then 416 * not changed); however, the clipping is done for convenience. 417 * No attempt is currently made to "glue back together" two 418 * abutting entries. 419 * 420 * The symmetric (shadow) copy strategy implements virtual copy 421 * by copying VM object references from one map to 422 * another, and then marking both regions as copy-on-write. 423 * It is important to note that only one writeable reference 424 * to a VM object region exists in any map when this strategy 425 * is used -- this means that shadow object creation can be 426 * delayed until a write operation occurs. The symmetric (delayed) 427 * strategy allows multiple maps to have writeable references to 428 * the same region of a vm object, and hence cannot delay creating 429 * its copy objects. See vm_object_copy_quickly() in vm_object.c. 430 * Copying of permanent objects is completely different; see 431 * vm_object_copy_strategically() in vm_object.c. 432 */ 433 434static zone_t vm_map_zone; /* zone for vm_map structures */ 435static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */ 436static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking 437 * allocations */ 438static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ 439 440 441/* 442 * Placeholder object for submap operations. This object is dropped 443 * into the range by a call to vm_map_find, and removed when 444 * vm_map_submap creates the submap. 445 */ 446 447vm_object_t vm_submap_object; 448 449static void *map_data; 450static vm_size_t map_data_size; 451static void *kentry_data; 452static vm_size_t kentry_data_size; 453 454#define NO_COALESCE_LIMIT ((1024 * 128) - 1) 455 456/* Skip acquiring locks if we're in the midst of a kernel core dump */ 457unsigned int not_in_kdp = 1; 458 459unsigned int vm_map_set_cache_attr_count = 0; 460 461kern_return_t 462vm_map_set_cache_attr( 463 vm_map_t map, 464 vm_map_offset_t va) 465{ 466 vm_map_entry_t map_entry; 467 vm_object_t object; 468 kern_return_t kr = KERN_SUCCESS; 469 470 vm_map_lock_read(map); 471 472 if (!vm_map_lookup_entry(map, va, &map_entry) || 473 map_entry->is_sub_map) { 474 /* 475 * that memory is not properly mapped 476 */ 477 kr = KERN_INVALID_ARGUMENT; 478 goto done; 479 } 480 object = map_entry->object.vm_object; 481 482 if (object == VM_OBJECT_NULL) { 483 /* 484 * there should be a VM object here at this point 485 */ 486 kr = KERN_INVALID_ARGUMENT; 487 goto done; 488 } 489 vm_object_lock(object); 490 object->set_cache_attr = TRUE; 491 vm_object_unlock(object); 492 493 vm_map_set_cache_attr_count++; 494done: 495 vm_map_unlock_read(map); 496 497 return kr; 498} 499 500 501#if CONFIG_CODE_DECRYPTION 502/* 503 * vm_map_apple_protected: 504 * This remaps the requested part of the object with an object backed by 505 * the decrypting pager. 506 * crypt_info contains entry points and session data for the crypt module. 507 * The crypt_info block will be copied by vm_map_apple_protected. The data structures 508 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called. 509 */ 510kern_return_t 511vm_map_apple_protected( 512 vm_map_t map, 513 vm_map_offset_t start, 514 vm_map_offset_t end, 515 struct pager_crypt_info *crypt_info) 516{ 517 boolean_t map_locked; 518 kern_return_t kr; 519 vm_map_entry_t map_entry; 520 memory_object_t protected_mem_obj; 521 vm_object_t protected_object; 522 vm_map_offset_t map_addr; 523 524 vm_map_lock_read(map); 525 map_locked = TRUE; 526 527 /* lookup the protected VM object */ 528 if (!vm_map_lookup_entry(map, 529 start, 530 &map_entry) || 531 map_entry->vme_end < end || 532 map_entry->is_sub_map || 533 !(map_entry->protection & VM_PROT_EXECUTE)) { 534 /* that memory is not properly mapped */ 535 kr = KERN_INVALID_ARGUMENT; 536 goto done; 537 } 538 protected_object = map_entry->object.vm_object; 539 if (protected_object == VM_OBJECT_NULL) { 540 /* there should be a VM object here at this point */ 541 kr = KERN_INVALID_ARGUMENT; 542 goto done; 543 } 544 545 /* make sure protected object stays alive while map is unlocked */ 546 vm_object_reference(protected_object); 547 548 vm_map_unlock_read(map); 549 map_locked = FALSE; 550 551 /* 552 * Lookup (and create if necessary) the protected memory object 553 * matching that VM object. 554 * If successful, this also grabs a reference on the memory object, 555 * to guarantee that it doesn't go away before we get a chance to map 556 * it. 557 */ 558 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info); 559 560 /* release extra ref on protected object */ 561 vm_object_deallocate(protected_object); 562 563 if (protected_mem_obj == NULL) { 564 kr = KERN_FAILURE; 565 goto done; 566 } 567 568 /* map this memory object in place of the current one */ 569 map_addr = start; 570 kr = vm_map_enter_mem_object(map, 571 &map_addr, 572 end - start, 573 (mach_vm_offset_t) 0, 574 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, 575 (ipc_port_t) protected_mem_obj, 576 (map_entry->offset + 577 (start - map_entry->vme_start)), 578 TRUE, 579 map_entry->protection, 580 map_entry->max_protection, 581 map_entry->inheritance); 582 assert(map_addr == start); 583 /* 584 * Release the reference obtained by apple_protect_pager_setup(). 585 * The mapping (if it succeeded) is now holding a reference on the 586 * memory object. 587 */ 588 memory_object_deallocate(protected_mem_obj); 589 590done: 591 if (map_locked) { 592 vm_map_unlock_read(map); 593 } 594 return kr; 595} 596#endif /* CONFIG_CODE_DECRYPTION */ 597 598 599lck_grp_t vm_map_lck_grp; 600lck_grp_attr_t vm_map_lck_grp_attr; 601lck_attr_t vm_map_lck_attr; 602lck_attr_t vm_map_lck_rw_attr; 603 604 605/* 606 * vm_map_init: 607 * 608 * Initialize the vm_map module. Must be called before 609 * any other vm_map routines. 610 * 611 * Map and entry structures are allocated from zones -- we must 612 * initialize those zones. 613 * 614 * There are three zones of interest: 615 * 616 * vm_map_zone: used to allocate maps. 617 * vm_map_entry_zone: used to allocate map entries. 618 * vm_map_entry_reserved_zone: fallback zone for kernel map entries 619 * 620 * The kernel allocates map entries from a special zone that is initially 621 * "crammed" with memory. It would be difficult (perhaps impossible) for 622 * the kernel to allocate more memory to a entry zone when it became 623 * empty since the very act of allocating memory implies the creation 624 * of a new entry. 625 */ 626void 627vm_map_init( 628 void) 629{ 630 vm_size_t entry_zone_alloc_size; 631 const char *mez_name = "VM map entries"; 632 633 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024, 634 PAGE_SIZE, "maps"); 635 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE); 636#if defined(__LP64__) 637 entry_zone_alloc_size = PAGE_SIZE * 5; 638#else 639 entry_zone_alloc_size = PAGE_SIZE * 6; 640#endif 641 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), 642 1024*1024, entry_zone_alloc_size, 643 mez_name); 644 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE); 645 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE); 646 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE); 647 648 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), 649 kentry_data_size * 64, kentry_data_size, 650 "Reserved VM map entries"); 651 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE); 652 653 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy), 654 16*1024, PAGE_SIZE, "VM map copies"); 655 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE); 656 657 /* 658 * Cram the map and kentry zones with initial data. 659 * Set reserved_zone non-collectible to aid zone_gc(). 660 */ 661 zone_change(vm_map_zone, Z_COLLECT, FALSE); 662 663 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE); 664 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE); 665 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE); 666 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE); 667 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ 668 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ 669 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE); 670 671 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size); 672 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size); 673 674 lck_grp_attr_setdefault(&vm_map_lck_grp_attr); 675 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr); 676 lck_attr_setdefault(&vm_map_lck_attr); 677 678 lck_attr_setdefault(&vm_map_lck_rw_attr); 679 lck_attr_cleardebug(&vm_map_lck_rw_attr); 680 681#if CONFIG_FREEZE 682 default_freezer_init(); 683#endif /* CONFIG_FREEZE */ 684} 685 686void 687vm_map_steal_memory( 688 void) 689{ 690 uint32_t kentry_initial_pages; 691 692 map_data_size = round_page(10 * sizeof(struct _vm_map)); 693 map_data = pmap_steal_memory(map_data_size); 694 695 /* 696 * kentry_initial_pages corresponds to the number of kernel map entries 697 * required during bootstrap until the asynchronous replenishment 698 * scheme is activated and/or entries are available from the general 699 * map entry pool. 700 */ 701#if defined(__LP64__) 702 kentry_initial_pages = 10; 703#else 704 kentry_initial_pages = 6; 705#endif 706 707#if CONFIG_GZALLOC 708 /* If using the guard allocator, reserve more memory for the kernel 709 * reserved map entry pool. 710 */ 711 if (gzalloc_enabled()) 712 kentry_initial_pages *= 1024; 713#endif 714 715 kentry_data_size = kentry_initial_pages * PAGE_SIZE; 716 kentry_data = pmap_steal_memory(kentry_data_size); 717} 718 719void vm_kernel_reserved_entry_init(void) { 720 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry)); 721} 722 723/* 724 * vm_map_create: 725 * 726 * Creates and returns a new empty VM map with 727 * the given physical map structure, and having 728 * the given lower and upper address bounds. 729 */ 730vm_map_t 731vm_map_create( 732 pmap_t pmap, 733 vm_map_offset_t min, 734 vm_map_offset_t max, 735 boolean_t pageable) 736{ 737 static int color_seed = 0; 738 register vm_map_t result; 739 740 result = (vm_map_t) zalloc(vm_map_zone); 741 if (result == VM_MAP_NULL) 742 panic("vm_map_create"); 743 744 vm_map_first_entry(result) = vm_map_to_entry(result); 745 vm_map_last_entry(result) = vm_map_to_entry(result); 746 result->hdr.nentries = 0; 747 result->hdr.entries_pageable = pageable; 748 749 vm_map_store_init( &(result->hdr) ); 750 751 result->hdr.page_shift = PAGE_SHIFT; 752 753 result->size = 0; 754 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */ 755 result->user_wire_size = 0; 756 result->ref_count = 1; 757#if TASK_SWAPPER 758 result->res_count = 1; 759 result->sw_state = MAP_SW_IN; 760#endif /* TASK_SWAPPER */ 761 result->pmap = pmap; 762 result->min_offset = min; 763 result->max_offset = max; 764 result->wiring_required = FALSE; 765 result->no_zero_fill = FALSE; 766 result->mapped_in_other_pmaps = FALSE; 767 result->wait_for_space = FALSE; 768 result->switch_protect = FALSE; 769 result->disable_vmentry_reuse = FALSE; 770 result->map_disallow_data_exec = FALSE; 771 result->highest_entry_end = 0; 772 result->first_free = vm_map_to_entry(result); 773 result->hint = vm_map_to_entry(result); 774 result->color_rr = (color_seed++) & vm_color_mask; 775 result->jit_entry_exists = FALSE; 776#if CONFIG_FREEZE 777 result->default_freezer_handle = NULL; 778#endif 779 vm_map_lock_init(result); 780 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr); 781 782 return(result); 783} 784 785/* 786 * vm_map_entry_create: [ internal use only ] 787 * 788 * Allocates a VM map entry for insertion in the 789 * given map (or map copy). No fields are filled. 790 */ 791#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked) 792 793#define vm_map_copy_entry_create(copy, map_locked) \ 794 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked) 795unsigned reserved_zalloc_count, nonreserved_zalloc_count; 796 797static vm_map_entry_t 798_vm_map_entry_create( 799 struct vm_map_header *map_header, boolean_t __unused map_locked) 800{ 801 zone_t zone; 802 vm_map_entry_t entry; 803 804 zone = vm_map_entry_zone; 805 806 assert(map_header->entries_pageable ? !map_locked : TRUE); 807 808 if (map_header->entries_pageable) { 809 entry = (vm_map_entry_t) zalloc(zone); 810 } 811 else { 812 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE); 813 814 if (entry == VM_MAP_ENTRY_NULL) { 815 zone = vm_map_entry_reserved_zone; 816 entry = (vm_map_entry_t) zalloc(zone); 817 OSAddAtomic(1, &reserved_zalloc_count); 818 } else 819 OSAddAtomic(1, &nonreserved_zalloc_count); 820 } 821 822 if (entry == VM_MAP_ENTRY_NULL) 823 panic("vm_map_entry_create"); 824 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone); 825 826 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE); 827#if MAP_ENTRY_CREATION_DEBUG 828 entry->vme_creation_maphdr = map_header; 829 fastbacktrace(&entry->vme_creation_bt[0], 830 (sizeof(entry->vme_creation_bt)/sizeof(uintptr_t))); 831#endif 832 return(entry); 833} 834 835/* 836 * vm_map_entry_dispose: [ internal use only ] 837 * 838 * Inverse of vm_map_entry_create. 839 * 840 * write map lock held so no need to 841 * do anything special to insure correctness 842 * of the stores 843 */ 844#define vm_map_entry_dispose(map, entry) \ 845 _vm_map_entry_dispose(&(map)->hdr, (entry)) 846 847#define vm_map_copy_entry_dispose(map, entry) \ 848 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry)) 849 850static void 851_vm_map_entry_dispose( 852 register struct vm_map_header *map_header, 853 register vm_map_entry_t entry) 854{ 855 register zone_t zone; 856 857 if (map_header->entries_pageable || !(entry->from_reserved_zone)) 858 zone = vm_map_entry_zone; 859 else 860 zone = vm_map_entry_reserved_zone; 861 862 if (!map_header->entries_pageable) { 863 if (zone == vm_map_entry_zone) 864 OSAddAtomic(-1, &nonreserved_zalloc_count); 865 else 866 OSAddAtomic(-1, &reserved_zalloc_count); 867 } 868 869 zfree(zone, entry); 870} 871 872#if MACH_ASSERT 873static boolean_t first_free_check = FALSE; 874boolean_t 875first_free_is_valid( 876 vm_map_t map) 877{ 878 if (!first_free_check) 879 return TRUE; 880 881 return( first_free_is_valid_store( map )); 882} 883#endif /* MACH_ASSERT */ 884 885 886#define vm_map_copy_entry_link(copy, after_where, entry) \ 887 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry)) 888 889#define vm_map_copy_entry_unlink(copy, entry) \ 890 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry)) 891 892#if MACH_ASSERT && TASK_SWAPPER 893/* 894 * vm_map_res_reference: 895 * 896 * Adds another valid residence count to the given map. 897 * 898 * Map is locked so this function can be called from 899 * vm_map_swapin. 900 * 901 */ 902void vm_map_res_reference(register vm_map_t map) 903{ 904 /* assert map is locked */ 905 assert(map->res_count >= 0); 906 assert(map->ref_count >= map->res_count); 907 if (map->res_count == 0) { 908 lck_mtx_unlock(&map->s_lock); 909 vm_map_lock(map); 910 vm_map_swapin(map); 911 lck_mtx_lock(&map->s_lock); 912 ++map->res_count; 913 vm_map_unlock(map); 914 } else 915 ++map->res_count; 916} 917 918/* 919 * vm_map_reference_swap: 920 * 921 * Adds valid reference and residence counts to the given map. 922 * 923 * The map may not be in memory (i.e. zero residence count). 924 * 925 */ 926void vm_map_reference_swap(register vm_map_t map) 927{ 928 assert(map != VM_MAP_NULL); 929 lck_mtx_lock(&map->s_lock); 930 assert(map->res_count >= 0); 931 assert(map->ref_count >= map->res_count); 932 map->ref_count++; 933 vm_map_res_reference(map); 934 lck_mtx_unlock(&map->s_lock); 935} 936 937/* 938 * vm_map_res_deallocate: 939 * 940 * Decrement residence count on a map; possibly causing swapout. 941 * 942 * The map must be in memory (i.e. non-zero residence count). 943 * 944 * The map is locked, so this function is callable from vm_map_deallocate. 945 * 946 */ 947void vm_map_res_deallocate(register vm_map_t map) 948{ 949 assert(map->res_count > 0); 950 if (--map->res_count == 0) { 951 lck_mtx_unlock(&map->s_lock); 952 vm_map_lock(map); 953 vm_map_swapout(map); 954 vm_map_unlock(map); 955 lck_mtx_lock(&map->s_lock); 956 } 957 assert(map->ref_count >= map->res_count); 958} 959#endif /* MACH_ASSERT && TASK_SWAPPER */ 960 961/* 962 * vm_map_destroy: 963 * 964 * Actually destroy a map. 965 */ 966void 967vm_map_destroy( 968 vm_map_t map, 969 int flags) 970{ 971 vm_map_lock(map); 972 973 /* clean up regular map entries */ 974 (void) vm_map_delete(map, map->min_offset, map->max_offset, 975 flags, VM_MAP_NULL); 976 /* clean up leftover special mappings (commpage, etc...) */ 977 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL, 978 flags, VM_MAP_NULL); 979 980#if CONFIG_FREEZE 981 if (map->default_freezer_handle) { 982 default_freezer_handle_deallocate(map->default_freezer_handle); 983 map->default_freezer_handle = NULL; 984 } 985#endif 986 vm_map_unlock(map); 987 988 assert(map->hdr.nentries == 0); 989 990 if(map->pmap) 991 pmap_destroy(map->pmap); 992 993 zfree(vm_map_zone, map); 994} 995 996#if TASK_SWAPPER 997/* 998 * vm_map_swapin/vm_map_swapout 999 * 1000 * Swap a map in and out, either referencing or releasing its resources. 1001 * These functions are internal use only; however, they must be exported 1002 * because they may be called from macros, which are exported. 1003 * 1004 * In the case of swapout, there could be races on the residence count, 1005 * so if the residence count is up, we return, assuming that a 1006 * vm_map_deallocate() call in the near future will bring us back. 1007 * 1008 * Locking: 1009 * -- We use the map write lock for synchronization among races. 1010 * -- The map write lock, and not the simple s_lock, protects the 1011 * swap state of the map. 1012 * -- If a map entry is a share map, then we hold both locks, in 1013 * hierarchical order. 1014 * 1015 * Synchronization Notes: 1016 * 1) If a vm_map_swapin() call happens while swapout in progress, it 1017 * will block on the map lock and proceed when swapout is through. 1018 * 2) A vm_map_reference() call at this time is illegal, and will 1019 * cause a panic. vm_map_reference() is only allowed on resident 1020 * maps, since it refuses to block. 1021 * 3) A vm_map_swapin() call during a swapin will block, and 1022 * proceeed when the first swapin is done, turning into a nop. 1023 * This is the reason the res_count is not incremented until 1024 * after the swapin is complete. 1025 * 4) There is a timing hole after the checks of the res_count, before 1026 * the map lock is taken, during which a swapin may get the lock 1027 * before a swapout about to happen. If this happens, the swapin 1028 * will detect the state and increment the reference count, causing 1029 * the swapout to be a nop, thereby delaying it until a later 1030 * vm_map_deallocate. If the swapout gets the lock first, then 1031 * the swapin will simply block until the swapout is done, and 1032 * then proceed. 1033 * 1034 * Because vm_map_swapin() is potentially an expensive operation, it 1035 * should be used with caution. 1036 * 1037 * Invariants: 1038 * 1) A map with a residence count of zero is either swapped, or 1039 * being swapped. 1040 * 2) A map with a non-zero residence count is either resident, 1041 * or being swapped in. 1042 */ 1043 1044int vm_map_swap_enable = 1; 1045 1046void vm_map_swapin (vm_map_t map) 1047{ 1048 register vm_map_entry_t entry; 1049 1050 if (!vm_map_swap_enable) /* debug */ 1051 return; 1052 1053 /* 1054 * Map is locked 1055 * First deal with various races. 1056 */ 1057 if (map->sw_state == MAP_SW_IN) 1058 /* 1059 * we raced with swapout and won. Returning will incr. 1060 * the res_count, turning the swapout into a nop. 1061 */ 1062 return; 1063 1064 /* 1065 * The residence count must be zero. If we raced with another 1066 * swapin, the state would have been IN; if we raced with a 1067 * swapout (after another competing swapin), we must have lost 1068 * the race to get here (see above comment), in which case 1069 * res_count is still 0. 1070 */ 1071 assert(map->res_count == 0); 1072 1073 /* 1074 * There are no intermediate states of a map going out or 1075 * coming in, since the map is locked during the transition. 1076 */ 1077 assert(map->sw_state == MAP_SW_OUT); 1078 1079 /* 1080 * We now operate upon each map entry. If the entry is a sub- 1081 * or share-map, we call vm_map_res_reference upon it. 1082 * If the entry is an object, we call vm_object_res_reference 1083 * (this may iterate through the shadow chain). 1084 * Note that we hold the map locked the entire time, 1085 * even if we get back here via a recursive call in 1086 * vm_map_res_reference. 1087 */ 1088 entry = vm_map_first_entry(map); 1089 1090 while (entry != vm_map_to_entry(map)) { 1091 if (entry->object.vm_object != VM_OBJECT_NULL) { 1092 if (entry->is_sub_map) { 1093 vm_map_t lmap = entry->object.sub_map; 1094 lck_mtx_lock(&lmap->s_lock); 1095 vm_map_res_reference(lmap); 1096 lck_mtx_unlock(&lmap->s_lock); 1097 } else { 1098 vm_object_t object = entry->object.vm_object; 1099 vm_object_lock(object); 1100 /* 1101 * This call may iterate through the 1102 * shadow chain. 1103 */ 1104 vm_object_res_reference(object); 1105 vm_object_unlock(object); 1106 } 1107 } 1108 entry = entry->vme_next; 1109 } 1110 assert(map->sw_state == MAP_SW_OUT); 1111 map->sw_state = MAP_SW_IN; 1112} 1113 1114void vm_map_swapout(vm_map_t map) 1115{ 1116 register vm_map_entry_t entry; 1117 1118 /* 1119 * Map is locked 1120 * First deal with various races. 1121 * If we raced with a swapin and lost, the residence count 1122 * will have been incremented to 1, and we simply return. 1123 */ 1124 lck_mtx_lock(&map->s_lock); 1125 if (map->res_count != 0) { 1126 lck_mtx_unlock(&map->s_lock); 1127 return; 1128 } 1129 lck_mtx_unlock(&map->s_lock); 1130 1131 /* 1132 * There are no intermediate states of a map going out or 1133 * coming in, since the map is locked during the transition. 1134 */ 1135 assert(map->sw_state == MAP_SW_IN); 1136 1137 if (!vm_map_swap_enable) 1138 return; 1139 1140 /* 1141 * We now operate upon each map entry. If the entry is a sub- 1142 * or share-map, we call vm_map_res_deallocate upon it. 1143 * If the entry is an object, we call vm_object_res_deallocate 1144 * (this may iterate through the shadow chain). 1145 * Note that we hold the map locked the entire time, 1146 * even if we get back here via a recursive call in 1147 * vm_map_res_deallocate. 1148 */ 1149 entry = vm_map_first_entry(map); 1150 1151 while (entry != vm_map_to_entry(map)) { 1152 if (entry->object.vm_object != VM_OBJECT_NULL) { 1153 if (entry->is_sub_map) { 1154 vm_map_t lmap = entry->object.sub_map; 1155 lck_mtx_lock(&lmap->s_lock); 1156 vm_map_res_deallocate(lmap); 1157 lck_mtx_unlock(&lmap->s_lock); 1158 } else { 1159 vm_object_t object = entry->object.vm_object; 1160 vm_object_lock(object); 1161 /* 1162 * This call may take a long time, 1163 * since it could actively push 1164 * out pages (if we implement it 1165 * that way). 1166 */ 1167 vm_object_res_deallocate(object); 1168 vm_object_unlock(object); 1169 } 1170 } 1171 entry = entry->vme_next; 1172 } 1173 assert(map->sw_state == MAP_SW_IN); 1174 map->sw_state = MAP_SW_OUT; 1175} 1176 1177#endif /* TASK_SWAPPER */ 1178 1179/* 1180 * vm_map_lookup_entry: [ internal use only ] 1181 * 1182 * Calls into the vm map store layer to find the map 1183 * entry containing (or immediately preceding) the 1184 * specified address in the given map; the entry is returned 1185 * in the "entry" parameter. The boolean 1186 * result indicates whether the address is 1187 * actually contained in the map. 1188 */ 1189boolean_t 1190vm_map_lookup_entry( 1191 register vm_map_t map, 1192 register vm_map_offset_t address, 1193 vm_map_entry_t *entry) /* OUT */ 1194{ 1195 return ( vm_map_store_lookup_entry( map, address, entry )); 1196} 1197 1198/* 1199 * Routine: vm_map_find_space 1200 * Purpose: 1201 * Allocate a range in the specified virtual address map, 1202 * returning the entry allocated for that range. 1203 * Used by kmem_alloc, etc. 1204 * 1205 * The map must be NOT be locked. It will be returned locked 1206 * on KERN_SUCCESS, unlocked on failure. 1207 * 1208 * If an entry is allocated, the object/offset fields 1209 * are initialized to zero. 1210 */ 1211kern_return_t 1212vm_map_find_space( 1213 register vm_map_t map, 1214 vm_map_offset_t *address, /* OUT */ 1215 vm_map_size_t size, 1216 vm_map_offset_t mask, 1217 int flags, 1218 vm_map_entry_t *o_entry) /* OUT */ 1219{ 1220 register vm_map_entry_t entry, new_entry; 1221 register vm_map_offset_t start; 1222 register vm_map_offset_t end; 1223 1224 if (size == 0) { 1225 *address = 0; 1226 return KERN_INVALID_ARGUMENT; 1227 } 1228 1229 if (flags & VM_FLAGS_GUARD_AFTER) { 1230 /* account for the back guard page in the size */ 1231 size += VM_MAP_PAGE_SIZE(map); 1232 } 1233 1234 new_entry = vm_map_entry_create(map, FALSE); 1235 1236 /* 1237 * Look for the first possible address; if there's already 1238 * something at this address, we have to start after it. 1239 */ 1240 1241 vm_map_lock(map); 1242 1243 if( map->disable_vmentry_reuse == TRUE) { 1244 VM_MAP_HIGHEST_ENTRY(map, entry, start); 1245 } else { 1246 assert(first_free_is_valid(map)); 1247 if ((entry = map->first_free) == vm_map_to_entry(map)) 1248 start = map->min_offset; 1249 else 1250 start = entry->vme_end; 1251 } 1252 1253 /* 1254 * In any case, the "entry" always precedes 1255 * the proposed new region throughout the loop: 1256 */ 1257 1258 while (TRUE) { 1259 register vm_map_entry_t next; 1260 1261 /* 1262 * Find the end of the proposed new region. 1263 * Be sure we didn't go beyond the end, or 1264 * wrap around the address. 1265 */ 1266 1267 if (flags & VM_FLAGS_GUARD_BEFORE) { 1268 /* reserve space for the front guard page */ 1269 start += VM_MAP_PAGE_SIZE(map); 1270 } 1271 end = ((start + mask) & ~mask); 1272 1273 if (end < start) { 1274 vm_map_entry_dispose(map, new_entry); 1275 vm_map_unlock(map); 1276 return(KERN_NO_SPACE); 1277 } 1278 start = end; 1279 end += size; 1280 1281 if ((end > map->max_offset) || (end < start)) { 1282 vm_map_entry_dispose(map, new_entry); 1283 vm_map_unlock(map); 1284 return(KERN_NO_SPACE); 1285 } 1286 1287 /* 1288 * If there are no more entries, we must win. 1289 */ 1290 1291 next = entry->vme_next; 1292 if (next == vm_map_to_entry(map)) 1293 break; 1294 1295 /* 1296 * If there is another entry, it must be 1297 * after the end of the potential new region. 1298 */ 1299 1300 if (next->vme_start >= end) 1301 break; 1302 1303 /* 1304 * Didn't fit -- move to the next entry. 1305 */ 1306 1307 entry = next; 1308 start = entry->vme_end; 1309 } 1310 1311 /* 1312 * At this point, 1313 * "start" and "end" should define the endpoints of the 1314 * available new range, and 1315 * "entry" should refer to the region before the new 1316 * range, and 1317 * 1318 * the map should be locked. 1319 */ 1320 1321 if (flags & VM_FLAGS_GUARD_BEFORE) { 1322 /* go back for the front guard page */ 1323 start -= VM_MAP_PAGE_SIZE(map); 1324 } 1325 *address = start; 1326 1327 assert(start < end); 1328 new_entry->vme_start = start; 1329 new_entry->vme_end = end; 1330 assert(page_aligned(new_entry->vme_start)); 1331 assert(page_aligned(new_entry->vme_end)); 1332 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, 1333 VM_MAP_PAGE_MASK(map))); 1334 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, 1335 VM_MAP_PAGE_MASK(map))); 1336 1337 new_entry->is_shared = FALSE; 1338 new_entry->is_sub_map = FALSE; 1339 new_entry->use_pmap = TRUE; 1340 new_entry->object.vm_object = VM_OBJECT_NULL; 1341 new_entry->offset = (vm_object_offset_t) 0; 1342 1343 new_entry->needs_copy = FALSE; 1344 1345 new_entry->inheritance = VM_INHERIT_DEFAULT; 1346 new_entry->protection = VM_PROT_DEFAULT; 1347 new_entry->max_protection = VM_PROT_ALL; 1348 new_entry->behavior = VM_BEHAVIOR_DEFAULT; 1349 new_entry->wired_count = 0; 1350 new_entry->user_wired_count = 0; 1351 1352 new_entry->in_transition = FALSE; 1353 new_entry->needs_wakeup = FALSE; 1354 new_entry->no_cache = FALSE; 1355 new_entry->permanent = FALSE; 1356 new_entry->superpage_size = FALSE; 1357 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) { 1358 new_entry->map_aligned = TRUE; 1359 } else { 1360 new_entry->map_aligned = FALSE; 1361 } 1362 1363 new_entry->used_for_jit = 0; 1364 1365 new_entry->alias = 0; 1366 new_entry->zero_wired_pages = FALSE; 1367 new_entry->iokit_acct = FALSE; 1368 1369 VM_GET_FLAGS_ALIAS(flags, new_entry->alias); 1370 1371 /* 1372 * Insert the new entry into the list 1373 */ 1374 1375 vm_map_store_entry_link(map, entry, new_entry); 1376 1377 map->size += size; 1378 1379 /* 1380 * Update the lookup hint 1381 */ 1382 SAVE_HINT_MAP_WRITE(map, new_entry); 1383 1384 *o_entry = new_entry; 1385 return(KERN_SUCCESS); 1386} 1387 1388int vm_map_pmap_enter_print = FALSE; 1389int vm_map_pmap_enter_enable = FALSE; 1390 1391/* 1392 * Routine: vm_map_pmap_enter [internal only] 1393 * 1394 * Description: 1395 * Force pages from the specified object to be entered into 1396 * the pmap at the specified address if they are present. 1397 * As soon as a page not found in the object the scan ends. 1398 * 1399 * Returns: 1400 * Nothing. 1401 * 1402 * In/out conditions: 1403 * The source map should not be locked on entry. 1404 */ 1405__unused static void 1406vm_map_pmap_enter( 1407 vm_map_t map, 1408 register vm_map_offset_t addr, 1409 register vm_map_offset_t end_addr, 1410 register vm_object_t object, 1411 vm_object_offset_t offset, 1412 vm_prot_t protection) 1413{ 1414 int type_of_fault; 1415 kern_return_t kr; 1416 1417 if(map->pmap == 0) 1418 return; 1419 1420 while (addr < end_addr) { 1421 register vm_page_t m; 1422 1423 1424 /* 1425 * TODO: 1426 * From vm_map_enter(), we come into this function without the map 1427 * lock held or the object lock held. 1428 * We haven't taken a reference on the object either. 1429 * We should do a proper lookup on the map to make sure 1430 * that things are sane before we go locking objects that 1431 * could have been deallocated from under us. 1432 */ 1433 1434 vm_object_lock(object); 1435 1436 m = vm_page_lookup(object, offset); 1437 /* 1438 * ENCRYPTED SWAP: 1439 * The user should never see encrypted data, so do not 1440 * enter an encrypted page in the page table. 1441 */ 1442 if (m == VM_PAGE_NULL || m->busy || m->encrypted || 1443 m->fictitious || 1444 (m->unusual && ( m->error || m->restart || m->absent))) { 1445 vm_object_unlock(object); 1446 return; 1447 } 1448 1449 if (vm_map_pmap_enter_print) { 1450 printf("vm_map_pmap_enter:"); 1451 printf("map: %p, addr: %llx, object: %p, offset: %llx\n", 1452 map, (unsigned long long)addr, object, (unsigned long long)offset); 1453 } 1454 type_of_fault = DBG_CACHE_HIT_FAULT; 1455 kr = vm_fault_enter(m, map->pmap, addr, protection, protection, 1456 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 1457 0, /* XXX need user tag / alias? */ 1458 0, /* alternate accounting? */ 1459 NULL, 1460 &type_of_fault); 1461 1462 vm_object_unlock(object); 1463 1464 offset += PAGE_SIZE_64; 1465 addr += PAGE_SIZE; 1466 } 1467} 1468 1469boolean_t vm_map_pmap_is_empty( 1470 vm_map_t map, 1471 vm_map_offset_t start, 1472 vm_map_offset_t end); 1473boolean_t vm_map_pmap_is_empty( 1474 vm_map_t map, 1475 vm_map_offset_t start, 1476 vm_map_offset_t end) 1477{ 1478#ifdef MACHINE_PMAP_IS_EMPTY 1479 return pmap_is_empty(map->pmap, start, end); 1480#else /* MACHINE_PMAP_IS_EMPTY */ 1481 vm_map_offset_t offset; 1482 ppnum_t phys_page; 1483 1484 if (map->pmap == NULL) { 1485 return TRUE; 1486 } 1487 1488 for (offset = start; 1489 offset < end; 1490 offset += PAGE_SIZE) { 1491 phys_page = pmap_find_phys(map->pmap, offset); 1492 if (phys_page) { 1493 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): " 1494 "page %d at 0x%llx\n", 1495 map, (long long)start, (long long)end, 1496 phys_page, (long long)offset); 1497 return FALSE; 1498 } 1499 } 1500 return TRUE; 1501#endif /* MACHINE_PMAP_IS_EMPTY */ 1502} 1503 1504#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000 1505kern_return_t 1506vm_map_random_address_for_size( 1507 vm_map_t map, 1508 vm_map_offset_t *address, 1509 vm_map_size_t size) 1510{ 1511 kern_return_t kr = KERN_SUCCESS; 1512 int tries = 0; 1513 vm_map_offset_t random_addr = 0; 1514 vm_map_offset_t hole_end; 1515 1516 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL; 1517 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL; 1518 vm_map_size_t vm_hole_size = 0; 1519 vm_map_size_t addr_space_size; 1520 1521 addr_space_size = vm_map_max(map) - vm_map_min(map); 1522 1523 assert(page_aligned(size)); 1524 1525 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) { 1526 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT; 1527 random_addr = vm_map_trunc_page( 1528 vm_map_min(map) +(random_addr % addr_space_size), 1529 VM_MAP_PAGE_MASK(map)); 1530 1531 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) { 1532 if (prev_entry == vm_map_to_entry(map)) { 1533 next_entry = vm_map_first_entry(map); 1534 } else { 1535 next_entry = prev_entry->vme_next; 1536 } 1537 if (next_entry == vm_map_to_entry(map)) { 1538 hole_end = vm_map_max(map); 1539 } else { 1540 hole_end = next_entry->vme_start; 1541 } 1542 vm_hole_size = hole_end - random_addr; 1543 if (vm_hole_size >= size) { 1544 *address = random_addr; 1545 break; 1546 } 1547 } 1548 tries++; 1549 } 1550 1551 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) { 1552 kr = KERN_NO_SPACE; 1553 } 1554 return kr; 1555} 1556 1557/* 1558 * Routine: vm_map_enter 1559 * 1560 * Description: 1561 * Allocate a range in the specified virtual address map. 1562 * The resulting range will refer to memory defined by 1563 * the given memory object and offset into that object. 1564 * 1565 * Arguments are as defined in the vm_map call. 1566 */ 1567int _map_enter_debug = 0; 1568static unsigned int vm_map_enter_restore_successes = 0; 1569static unsigned int vm_map_enter_restore_failures = 0; 1570kern_return_t 1571vm_map_enter( 1572 vm_map_t map, 1573 vm_map_offset_t *address, /* IN/OUT */ 1574 vm_map_size_t size, 1575 vm_map_offset_t mask, 1576 int flags, 1577 vm_object_t object, 1578 vm_object_offset_t offset, 1579 boolean_t needs_copy, 1580 vm_prot_t cur_protection, 1581 vm_prot_t max_protection, 1582 vm_inherit_t inheritance) 1583{ 1584 vm_map_entry_t entry, new_entry; 1585 vm_map_offset_t start, tmp_start, tmp_offset; 1586 vm_map_offset_t end, tmp_end; 1587 vm_map_offset_t tmp2_start, tmp2_end; 1588 vm_map_offset_t step; 1589 kern_return_t result = KERN_SUCCESS; 1590 vm_map_t zap_old_map = VM_MAP_NULL; 1591 vm_map_t zap_new_map = VM_MAP_NULL; 1592 boolean_t map_locked = FALSE; 1593 boolean_t pmap_empty = TRUE; 1594 boolean_t new_mapping_established = FALSE; 1595 boolean_t keep_map_locked = ((flags & VM_FLAGS_KEEP_MAP_LOCKED) != 0); 1596 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0); 1597 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0); 1598 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0); 1599 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0); 1600 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0); 1601 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0); 1602 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0); 1603 boolean_t iokit_acct = ((flags & VM_FLAGS_IOKIT_ACCT) != 0); 1604 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT); 1605 char alias; 1606 vm_map_offset_t effective_min_offset, effective_max_offset; 1607 kern_return_t kr; 1608 boolean_t clear_map_aligned = FALSE; 1609 1610 if (superpage_size) { 1611 switch (superpage_size) { 1612 /* 1613 * Note that the current implementation only supports 1614 * a single size for superpages, SUPERPAGE_SIZE, per 1615 * architecture. As soon as more sizes are supposed 1616 * to be supported, SUPERPAGE_SIZE has to be replaced 1617 * with a lookup of the size depending on superpage_size. 1618 */ 1619#ifdef __x86_64__ 1620 case SUPERPAGE_SIZE_ANY: 1621 /* handle it like 2 MB and round up to page size */ 1622 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1); 1623 case SUPERPAGE_SIZE_2MB: 1624 break; 1625#endif 1626 default: 1627 return KERN_INVALID_ARGUMENT; 1628 } 1629 mask = SUPERPAGE_SIZE-1; 1630 if (size & (SUPERPAGE_SIZE-1)) 1631 return KERN_INVALID_ARGUMENT; 1632 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */ 1633 } 1634 1635 1636 1637 if (is_submap) { 1638 if (purgable) { 1639 /* submaps can not be purgeable */ 1640 return KERN_INVALID_ARGUMENT; 1641 } 1642 if (object == VM_OBJECT_NULL) { 1643 /* submaps can not be created lazily */ 1644 return KERN_INVALID_ARGUMENT; 1645 } 1646 } 1647 if (flags & VM_FLAGS_ALREADY) { 1648 /* 1649 * VM_FLAGS_ALREADY says that it's OK if the same mapping 1650 * is already present. For it to be meaningul, the requested 1651 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and 1652 * we shouldn't try and remove what was mapped there first 1653 * (!VM_FLAGS_OVERWRITE). 1654 */ 1655 if ((flags & VM_FLAGS_ANYWHERE) || 1656 (flags & VM_FLAGS_OVERWRITE)) { 1657 return KERN_INVALID_ARGUMENT; 1658 } 1659 } 1660 1661 effective_min_offset = map->min_offset; 1662 1663 if (flags & VM_FLAGS_BEYOND_MAX) { 1664 /* 1665 * Allow an insertion beyond the map's max offset. 1666 */ 1667 if (vm_map_is_64bit(map)) 1668 effective_max_offset = 0xFFFFFFFFFFFFF000ULL; 1669 else 1670 effective_max_offset = 0x00000000FFFFF000ULL; 1671 } else { 1672 effective_max_offset = map->max_offset; 1673 } 1674 1675 if (size == 0 || 1676 (offset & PAGE_MASK_64) != 0) { 1677 *address = 0; 1678 return KERN_INVALID_ARGUMENT; 1679 } 1680 1681 VM_GET_FLAGS_ALIAS(flags, alias); 1682 1683#define RETURN(value) { result = value; goto BailOut; } 1684 1685 assert(page_aligned(*address)); 1686 assert(page_aligned(size)); 1687 1688 if (!VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) { 1689 /* 1690 * In most cases, the caller rounds the size up to the 1691 * map's page size. 1692 * If we get a size that is explicitly not map-aligned here, 1693 * we'll have to respect the caller's wish and mark the 1694 * mapping as "not map-aligned" to avoid tripping the 1695 * map alignment checks later. 1696 */ 1697 clear_map_aligned = TRUE; 1698 } 1699 if (!anywhere && 1700 !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) { 1701 /* 1702 * We've been asked to map at a fixed address and that 1703 * address is not aligned to the map's specific alignment. 1704 * The caller should know what it's doing (i.e. most likely 1705 * mapping some fragmented copy map, transferring memory from 1706 * a VM map with a different alignment), so clear map_aligned 1707 * for this new VM map entry and proceed. 1708 */ 1709 clear_map_aligned = TRUE; 1710 } 1711 1712 /* 1713 * Only zero-fill objects are allowed to be purgable. 1714 * LP64todo - limit purgable objects to 32-bits for now 1715 */ 1716 if (purgable && 1717 (offset != 0 || 1718 (object != VM_OBJECT_NULL && 1719 (object->vo_size != size || 1720 object->purgable == VM_PURGABLE_DENY)) 1721 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */ 1722 return KERN_INVALID_ARGUMENT; 1723 1724 if (!anywhere && overwrite) { 1725 /* 1726 * Create a temporary VM map to hold the old mappings in the 1727 * affected area while we create the new one. 1728 * This avoids releasing the VM map lock in 1729 * vm_map_entry_delete() and allows atomicity 1730 * when we want to replace some mappings with a new one. 1731 * It also allows us to restore the old VM mappings if the 1732 * new mapping fails. 1733 */ 1734 zap_old_map = vm_map_create(PMAP_NULL, 1735 *address, 1736 *address + size, 1737 map->hdr.entries_pageable); 1738 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map)); 1739 } 1740 1741StartAgain: ; 1742 1743 start = *address; 1744 1745 if (anywhere) { 1746 vm_map_lock(map); 1747 map_locked = TRUE; 1748 1749 if (entry_for_jit) { 1750 if (map->jit_entry_exists) { 1751 result = KERN_INVALID_ARGUMENT; 1752 goto BailOut; 1753 } 1754 /* 1755 * Get a random start address. 1756 */ 1757 result = vm_map_random_address_for_size(map, address, size); 1758 if (result != KERN_SUCCESS) { 1759 goto BailOut; 1760 } 1761 start = *address; 1762 } 1763 1764 1765 /* 1766 * Calculate the first possible address. 1767 */ 1768 1769 if (start < effective_min_offset) 1770 start = effective_min_offset; 1771 if (start > effective_max_offset) 1772 RETURN(KERN_NO_SPACE); 1773 1774 /* 1775 * Look for the first possible address; 1776 * if there's already something at this 1777 * address, we have to start after it. 1778 */ 1779 1780 if( map->disable_vmentry_reuse == TRUE) { 1781 VM_MAP_HIGHEST_ENTRY(map, entry, start); 1782 } else { 1783 assert(first_free_is_valid(map)); 1784 1785 entry = map->first_free; 1786 1787 if (entry == vm_map_to_entry(map)) { 1788 entry = NULL; 1789 } else { 1790 if (entry->vme_next == vm_map_to_entry(map)){ 1791 /* 1792 * Hole at the end of the map. 1793 */ 1794 entry = NULL; 1795 } else { 1796 if (start < (entry->vme_next)->vme_start ) { 1797 start = entry->vme_end; 1798 start = vm_map_round_page(start, 1799 VM_MAP_PAGE_MASK(map)); 1800 } else { 1801 /* 1802 * Need to do a lookup. 1803 */ 1804 entry = NULL; 1805 } 1806 } 1807 } 1808 1809 if (entry == NULL) { 1810 vm_map_entry_t tmp_entry; 1811 if (vm_map_lookup_entry(map, start, &tmp_entry)) { 1812 assert(!entry_for_jit); 1813 start = tmp_entry->vme_end; 1814 start = vm_map_round_page(start, 1815 VM_MAP_PAGE_MASK(map)); 1816 } 1817 entry = tmp_entry; 1818 } 1819 } 1820 1821 /* 1822 * In any case, the "entry" always precedes 1823 * the proposed new region throughout the 1824 * loop: 1825 */ 1826 1827 while (TRUE) { 1828 register vm_map_entry_t next; 1829 1830 /* 1831 * Find the end of the proposed new region. 1832 * Be sure we didn't go beyond the end, or 1833 * wrap around the address. 1834 */ 1835 1836 end = ((start + mask) & ~mask); 1837 end = vm_map_round_page(end, 1838 VM_MAP_PAGE_MASK(map)); 1839 if (end < start) 1840 RETURN(KERN_NO_SPACE); 1841 start = end; 1842 assert(VM_MAP_PAGE_ALIGNED(start, 1843 VM_MAP_PAGE_MASK(map))); 1844 end += size; 1845 1846 if ((end > effective_max_offset) || (end < start)) { 1847 if (map->wait_for_space) { 1848 assert(!keep_map_locked); 1849 if (size <= (effective_max_offset - 1850 effective_min_offset)) { 1851 assert_wait((event_t)map, 1852 THREAD_ABORTSAFE); 1853 vm_map_unlock(map); 1854 map_locked = FALSE; 1855 thread_block(THREAD_CONTINUE_NULL); 1856 goto StartAgain; 1857 } 1858 } 1859 RETURN(KERN_NO_SPACE); 1860 } 1861 1862 /* 1863 * If there are no more entries, we must win. 1864 */ 1865 1866 next = entry->vme_next; 1867 if (next == vm_map_to_entry(map)) 1868 break; 1869 1870 /* 1871 * If there is another entry, it must be 1872 * after the end of the potential new region. 1873 */ 1874 1875 if (next->vme_start >= end) 1876 break; 1877 1878 /* 1879 * Didn't fit -- move to the next entry. 1880 */ 1881 1882 entry = next; 1883 start = entry->vme_end; 1884 start = vm_map_round_page(start, 1885 VM_MAP_PAGE_MASK(map)); 1886 } 1887 *address = start; 1888 assert(VM_MAP_PAGE_ALIGNED(*address, 1889 VM_MAP_PAGE_MASK(map))); 1890 } else { 1891 /* 1892 * Verify that: 1893 * the address doesn't itself violate 1894 * the mask requirement. 1895 */ 1896 1897 vm_map_lock(map); 1898 map_locked = TRUE; 1899 if ((start & mask) != 0) 1900 RETURN(KERN_NO_SPACE); 1901 1902 /* 1903 * ... the address is within bounds 1904 */ 1905 1906 end = start + size; 1907 1908 if ((start < effective_min_offset) || 1909 (end > effective_max_offset) || 1910 (start >= end)) { 1911 RETURN(KERN_INVALID_ADDRESS); 1912 } 1913 1914 if (overwrite && zap_old_map != VM_MAP_NULL) { 1915 /* 1916 * Fixed mapping and "overwrite" flag: attempt to 1917 * remove all existing mappings in the specified 1918 * address range, saving them in our "zap_old_map". 1919 */ 1920 (void) vm_map_delete(map, start, end, 1921 (VM_MAP_REMOVE_SAVE_ENTRIES | 1922 VM_MAP_REMOVE_NO_MAP_ALIGN), 1923 zap_old_map); 1924 } 1925 1926 /* 1927 * ... the starting address isn't allocated 1928 */ 1929 1930 if (vm_map_lookup_entry(map, start, &entry)) { 1931 if (! (flags & VM_FLAGS_ALREADY)) { 1932 RETURN(KERN_NO_SPACE); 1933 } 1934 /* 1935 * Check if what's already there is what we want. 1936 */ 1937 tmp_start = start; 1938 tmp_offset = offset; 1939 if (entry->vme_start < start) { 1940 tmp_start -= start - entry->vme_start; 1941 tmp_offset -= start - entry->vme_start; 1942 1943 } 1944 for (; entry->vme_start < end; 1945 entry = entry->vme_next) { 1946 /* 1947 * Check if the mapping's attributes 1948 * match the existing map entry. 1949 */ 1950 if (entry == vm_map_to_entry(map) || 1951 entry->vme_start != tmp_start || 1952 entry->is_sub_map != is_submap || 1953 entry->offset != tmp_offset || 1954 entry->needs_copy != needs_copy || 1955 entry->protection != cur_protection || 1956 entry->max_protection != max_protection || 1957 entry->inheritance != inheritance || 1958 entry->iokit_acct != iokit_acct || 1959 entry->alias != alias) { 1960 /* not the same mapping ! */ 1961 RETURN(KERN_NO_SPACE); 1962 } 1963 /* 1964 * Check if the same object is being mapped. 1965 */ 1966 if (is_submap) { 1967 if (entry->object.sub_map != 1968 (vm_map_t) object) { 1969 /* not the same submap */ 1970 RETURN(KERN_NO_SPACE); 1971 } 1972 } else { 1973 if (entry->object.vm_object != object) { 1974 /* not the same VM object... */ 1975 vm_object_t obj2; 1976 1977 obj2 = entry->object.vm_object; 1978 if ((obj2 == VM_OBJECT_NULL || 1979 obj2->internal) && 1980 (object == VM_OBJECT_NULL || 1981 object->internal)) { 1982 /* 1983 * ... but both are 1984 * anonymous memory, 1985 * so equivalent. 1986 */ 1987 } else { 1988 RETURN(KERN_NO_SPACE); 1989 } 1990 } 1991 } 1992 1993 tmp_offset += entry->vme_end - entry->vme_start; 1994 tmp_start += entry->vme_end - entry->vme_start; 1995 if (entry->vme_end >= end) { 1996 /* reached the end of our mapping */ 1997 break; 1998 } 1999 } 2000 /* it all matches: let's use what's already there ! */ 2001 RETURN(KERN_MEMORY_PRESENT); 2002 } 2003 2004 /* 2005 * ... the next region doesn't overlap the 2006 * end point. 2007 */ 2008 2009 if ((entry->vme_next != vm_map_to_entry(map)) && 2010 (entry->vme_next->vme_start < end)) 2011 RETURN(KERN_NO_SPACE); 2012 } 2013 2014 /* 2015 * At this point, 2016 * "start" and "end" should define the endpoints of the 2017 * available new range, and 2018 * "entry" should refer to the region before the new 2019 * range, and 2020 * 2021 * the map should be locked. 2022 */ 2023 2024 /* 2025 * See whether we can avoid creating a new entry (and object) by 2026 * extending one of our neighbors. [So far, we only attempt to 2027 * extend from below.] Note that we can never extend/join 2028 * purgable objects because they need to remain distinct 2029 * entities in order to implement their "volatile object" 2030 * semantics. 2031 */ 2032 2033 if (purgable || entry_for_jit) { 2034 if (object == VM_OBJECT_NULL) { 2035 object = vm_object_allocate(size); 2036 object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 2037 object->true_share = TRUE; 2038 if (purgable) { 2039 task_t owner; 2040 object->purgable = VM_PURGABLE_NONVOLATILE; 2041 if (map->pmap == kernel_pmap) { 2042 /* 2043 * Purgeable mappings made in a kernel 2044 * map are "owned" by the kernel itself 2045 * rather than the current user task 2046 * because they're likely to be used by 2047 * more than this user task (see 2048 * execargs_purgeable_allocate(), for 2049 * example). 2050 */ 2051 owner = kernel_task; 2052 } else { 2053 owner = current_task(); 2054 } 2055 assert(object->vo_purgeable_owner == NULL); 2056 assert(object->resident_page_count == 0); 2057 assert(object->wired_page_count == 0); 2058 vm_object_lock(object); 2059 vm_purgeable_nonvolatile_enqueue(object, owner); 2060 vm_object_unlock(object); 2061 } 2062 offset = (vm_object_offset_t)0; 2063 } 2064 } else if ((is_submap == FALSE) && 2065 (object == VM_OBJECT_NULL) && 2066 (entry != vm_map_to_entry(map)) && 2067 (entry->vme_end == start) && 2068 (!entry->is_shared) && 2069 (!entry->is_sub_map) && 2070 (!entry->in_transition) && 2071 (!entry->needs_wakeup) && 2072 (entry->behavior == VM_BEHAVIOR_DEFAULT) && 2073 (entry->protection == cur_protection) && 2074 (entry->max_protection == max_protection) && 2075 (entry->inheritance == inheritance) && 2076 ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) && 2077 (entry->no_cache == no_cache) && 2078 (entry->permanent == permanent) && 2079 (!entry->superpage_size && !superpage_size) && 2080 /* 2081 * No coalescing if not map-aligned, to avoid propagating 2082 * that condition any further than needed: 2083 */ 2084 (!entry->map_aligned || !clear_map_aligned) && 2085 (!entry->zero_wired_pages) && 2086 (!entry->used_for_jit && !entry_for_jit) && 2087 (entry->iokit_acct == iokit_acct) && 2088 2089 ((entry->vme_end - entry->vme_start) + size <= 2090 (alias == VM_MEMORY_REALLOC ? 2091 ANON_CHUNK_SIZE : 2092 NO_COALESCE_LIMIT)) && 2093 2094 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */ 2095 if (vm_object_coalesce(entry->object.vm_object, 2096 VM_OBJECT_NULL, 2097 entry->offset, 2098 (vm_object_offset_t) 0, 2099 (vm_map_size_t)(entry->vme_end - entry->vme_start), 2100 (vm_map_size_t)(end - entry->vme_end))) { 2101 2102 /* 2103 * Coalesced the two objects - can extend 2104 * the previous map entry to include the 2105 * new range. 2106 */ 2107 map->size += (end - entry->vme_end); 2108 assert(entry->vme_start < end); 2109 assert(VM_MAP_PAGE_ALIGNED(end, 2110 VM_MAP_PAGE_MASK(map))); 2111 entry->vme_end = end; 2112 vm_map_store_update_first_free(map, map->first_free); 2113 new_mapping_established = TRUE; 2114 RETURN(KERN_SUCCESS); 2115 } 2116 } 2117 2118 step = superpage_size ? SUPERPAGE_SIZE : (end - start); 2119 new_entry = NULL; 2120 2121 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) { 2122 tmp2_end = tmp2_start + step; 2123 /* 2124 * Create a new entry 2125 * LP64todo - for now, we can only allocate 4GB internal objects 2126 * because the default pager can't page bigger ones. Remove this 2127 * when it can. 2128 * 2129 * XXX FBDP 2130 * The reserved "page zero" in each process's address space can 2131 * be arbitrarily large. Splitting it into separate 4GB objects and 2132 * therefore different VM map entries serves no purpose and just 2133 * slows down operations on the VM map, so let's not split the 2134 * allocation into 4GB chunks if the max protection is NONE. That 2135 * memory should never be accessible, so it will never get to the 2136 * default pager. 2137 */ 2138 tmp_start = tmp2_start; 2139 if (object == VM_OBJECT_NULL && 2140 size > (vm_map_size_t)ANON_CHUNK_SIZE && 2141 max_protection != VM_PROT_NONE && 2142 superpage_size == 0) 2143 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE; 2144 else 2145 tmp_end = tmp2_end; 2146 do { 2147 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end, 2148 object, offset, needs_copy, 2149 FALSE, FALSE, 2150 cur_protection, max_protection, 2151 VM_BEHAVIOR_DEFAULT, 2152 (entry_for_jit)? VM_INHERIT_NONE: inheritance, 2153 0, no_cache, 2154 permanent, 2155 superpage_size, 2156 clear_map_aligned, 2157 is_submap); 2158 new_entry->alias = alias; 2159 if (entry_for_jit){ 2160 if (!(map->jit_entry_exists)){ 2161 new_entry->used_for_jit = TRUE; 2162 map->jit_entry_exists = TRUE; 2163 } 2164 } 2165 2166 assert(!new_entry->iokit_acct); 2167 if (!is_submap && 2168 object != VM_OBJECT_NULL && 2169 object->purgable != VM_PURGABLE_DENY) { 2170 assert(new_entry->use_pmap); 2171 assert(!new_entry->iokit_acct); 2172 /* 2173 * Turn off pmap accounting since 2174 * purgeable objects have their 2175 * own ledgers. 2176 */ 2177 new_entry->use_pmap = FALSE; 2178 } else if (!is_submap && 2179 iokit_acct) { 2180 /* alternate accounting */ 2181 assert(!new_entry->iokit_acct); 2182 assert(new_entry->use_pmap); 2183 new_entry->iokit_acct = TRUE; 2184 new_entry->use_pmap = FALSE; 2185 vm_map_iokit_mapped_region( 2186 map, 2187 (new_entry->vme_end - 2188 new_entry->vme_start)); 2189 } else if (!is_submap) { 2190 assert(!new_entry->iokit_acct); 2191 assert(new_entry->use_pmap); 2192 } 2193 2194 if (is_submap) { 2195 vm_map_t submap; 2196 boolean_t submap_is_64bit; 2197 boolean_t use_pmap; 2198 2199 assert(new_entry->is_sub_map); 2200 assert(!new_entry->use_pmap); 2201 assert(!new_entry->iokit_acct); 2202 submap = (vm_map_t) object; 2203 submap_is_64bit = vm_map_is_64bit(submap); 2204 use_pmap = (alias == VM_MEMORY_SHARED_PMAP); 2205#ifndef NO_NESTED_PMAP 2206 if (use_pmap && submap->pmap == NULL) { 2207 ledger_t ledger = map->pmap->ledger; 2208 /* we need a sub pmap to nest... */ 2209 submap->pmap = pmap_create(ledger, 0, 2210 submap_is_64bit); 2211 if (submap->pmap == NULL) { 2212 /* let's proceed without nesting... */ 2213 } 2214 } 2215 if (use_pmap && submap->pmap != NULL) { 2216 kr = pmap_nest(map->pmap, 2217 submap->pmap, 2218 tmp_start, 2219 tmp_start, 2220 tmp_end - tmp_start); 2221 if (kr != KERN_SUCCESS) { 2222 printf("vm_map_enter: " 2223 "pmap_nest(0x%llx,0x%llx) " 2224 "error 0x%x\n", 2225 (long long)tmp_start, 2226 (long long)tmp_end, 2227 kr); 2228 } else { 2229 /* we're now nested ! */ 2230 new_entry->use_pmap = TRUE; 2231 pmap_empty = FALSE; 2232 } 2233 } 2234#endif /* NO_NESTED_PMAP */ 2235 } 2236 entry = new_entry; 2237 2238 if (superpage_size) { 2239 vm_page_t pages, m; 2240 vm_object_t sp_object; 2241 2242 entry->offset = 0; 2243 2244 /* allocate one superpage */ 2245 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0); 2246 if (kr != KERN_SUCCESS) { 2247 new_mapping_established = TRUE; /* will cause deallocation of whole range */ 2248 RETURN(kr); 2249 } 2250 2251 /* create one vm_object per superpage */ 2252 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start)); 2253 sp_object->phys_contiguous = TRUE; 2254 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE; 2255 entry->object.vm_object = sp_object; 2256 assert(entry->use_pmap); 2257 2258 /* enter the base pages into the object */ 2259 vm_object_lock(sp_object); 2260 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) { 2261 m = pages; 2262 pmap_zero_page(m->phys_page); 2263 pages = NEXT_PAGE(m); 2264 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; 2265 vm_page_insert(m, sp_object, offset); 2266 } 2267 vm_object_unlock(sp_object); 2268 } 2269 } while (tmp_end != tmp2_end && 2270 (tmp_start = tmp_end) && 2271 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ? 2272 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end)); 2273 } 2274 2275 new_mapping_established = TRUE; 2276 2277BailOut: 2278 assert(map_locked == TRUE); 2279 2280 if (result == KERN_SUCCESS) { 2281 vm_prot_t pager_prot; 2282 memory_object_t pager; 2283 2284#if DEBUG 2285 if (pmap_empty && 2286 !(flags & VM_FLAGS_NO_PMAP_CHECK)) { 2287 assert(vm_map_pmap_is_empty(map, 2288 *address, 2289 *address+size)); 2290 } 2291#endif /* DEBUG */ 2292 2293 /* 2294 * For "named" VM objects, let the pager know that the 2295 * memory object is being mapped. Some pagers need to keep 2296 * track of this, to know when they can reclaim the memory 2297 * object, for example. 2298 * VM calls memory_object_map() for each mapping (specifying 2299 * the protection of each mapping) and calls 2300 * memory_object_last_unmap() when all the mappings are gone. 2301 */ 2302 pager_prot = max_protection; 2303 if (needs_copy) { 2304 /* 2305 * Copy-On-Write mapping: won't modify 2306 * the memory object. 2307 */ 2308 pager_prot &= ~VM_PROT_WRITE; 2309 } 2310 if (!is_submap && 2311 object != VM_OBJECT_NULL && 2312 object->named && 2313 object->pager != MEMORY_OBJECT_NULL) { 2314 vm_object_lock(object); 2315 pager = object->pager; 2316 if (object->named && 2317 pager != MEMORY_OBJECT_NULL) { 2318 assert(object->pager_ready); 2319 vm_object_mapping_wait(object, THREAD_UNINT); 2320 vm_object_mapping_begin(object); 2321 vm_object_unlock(object); 2322 2323 kr = memory_object_map(pager, pager_prot); 2324 assert(kr == KERN_SUCCESS); 2325 2326 vm_object_lock(object); 2327 vm_object_mapping_end(object); 2328 } 2329 vm_object_unlock(object); 2330 } 2331 } 2332 2333 assert(map_locked == TRUE); 2334 2335 if (!keep_map_locked) { 2336 vm_map_unlock(map); 2337 map_locked = FALSE; 2338 } 2339 2340 /* 2341 * We can't hold the map lock if we enter this block. 2342 */ 2343 2344 if (result == KERN_SUCCESS) { 2345 2346 /* Wire down the new entry if the user 2347 * requested all new map entries be wired. 2348 */ 2349 if ((map->wiring_required)||(superpage_size)) { 2350 assert(!keep_map_locked); 2351 pmap_empty = FALSE; /* pmap won't be empty */ 2352 kr = vm_map_wire(map, start, end, 2353 new_entry->protection, TRUE); 2354 result = kr; 2355 } 2356 2357 } 2358 2359 if (result != KERN_SUCCESS) { 2360 if (new_mapping_established) { 2361 /* 2362 * We have to get rid of the new mappings since we 2363 * won't make them available to the user. 2364 * Try and do that atomically, to minimize the risk 2365 * that someone else create new mappings that range. 2366 */ 2367 zap_new_map = vm_map_create(PMAP_NULL, 2368 *address, 2369 *address + size, 2370 map->hdr.entries_pageable); 2371 vm_map_set_page_shift(zap_new_map, 2372 VM_MAP_PAGE_SHIFT(map)); 2373 if (!map_locked) { 2374 vm_map_lock(map); 2375 map_locked = TRUE; 2376 } 2377 (void) vm_map_delete(map, *address, *address+size, 2378 (VM_MAP_REMOVE_SAVE_ENTRIES | 2379 VM_MAP_REMOVE_NO_MAP_ALIGN), 2380 zap_new_map); 2381 } 2382 if (zap_old_map != VM_MAP_NULL && 2383 zap_old_map->hdr.nentries != 0) { 2384 vm_map_entry_t entry1, entry2; 2385 2386 /* 2387 * The new mapping failed. Attempt to restore 2388 * the old mappings, saved in the "zap_old_map". 2389 */ 2390 if (!map_locked) { 2391 vm_map_lock(map); 2392 map_locked = TRUE; 2393 } 2394 2395 /* first check if the coast is still clear */ 2396 start = vm_map_first_entry(zap_old_map)->vme_start; 2397 end = vm_map_last_entry(zap_old_map)->vme_end; 2398 if (vm_map_lookup_entry(map, start, &entry1) || 2399 vm_map_lookup_entry(map, end, &entry2) || 2400 entry1 != entry2) { 2401 /* 2402 * Part of that range has already been 2403 * re-mapped: we can't restore the old 2404 * mappings... 2405 */ 2406 vm_map_enter_restore_failures++; 2407 } else { 2408 /* 2409 * Transfer the saved map entries from 2410 * "zap_old_map" to the original "map", 2411 * inserting them all after "entry1". 2412 */ 2413 for (entry2 = vm_map_first_entry(zap_old_map); 2414 entry2 != vm_map_to_entry(zap_old_map); 2415 entry2 = vm_map_first_entry(zap_old_map)) { 2416 vm_map_size_t entry_size; 2417 2418 entry_size = (entry2->vme_end - 2419 entry2->vme_start); 2420 vm_map_store_entry_unlink(zap_old_map, 2421 entry2); 2422 zap_old_map->size -= entry_size; 2423 vm_map_store_entry_link(map, entry1, entry2); 2424 map->size += entry_size; 2425 entry1 = entry2; 2426 } 2427 if (map->wiring_required) { 2428 /* 2429 * XXX TODO: we should rewire the 2430 * old pages here... 2431 */ 2432 } 2433 vm_map_enter_restore_successes++; 2434 } 2435 } 2436 } 2437 2438 /* 2439 * The caller is responsible for releasing the lock if it requested to 2440 * keep the map locked. 2441 */ 2442 if (map_locked && !keep_map_locked) { 2443 vm_map_unlock(map); 2444 } 2445 2446 /* 2447 * Get rid of the "zap_maps" and all the map entries that 2448 * they may still contain. 2449 */ 2450 if (zap_old_map != VM_MAP_NULL) { 2451 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); 2452 zap_old_map = VM_MAP_NULL; 2453 } 2454 if (zap_new_map != VM_MAP_NULL) { 2455 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); 2456 zap_new_map = VM_MAP_NULL; 2457 } 2458 2459 return result; 2460 2461#undef RETURN 2462} 2463 2464/* 2465 * Counters for the prefault optimization. 2466 */ 2467int64_t vm_prefault_nb_pages = 0; 2468int64_t vm_prefault_nb_bailout = 0; 2469 2470static kern_return_t 2471vm_map_enter_mem_object_helper( 2472 vm_map_t target_map, 2473 vm_map_offset_t *address, 2474 vm_map_size_t initial_size, 2475 vm_map_offset_t mask, 2476 int flags, 2477 ipc_port_t port, 2478 vm_object_offset_t offset, 2479 boolean_t copy, 2480 vm_prot_t cur_protection, 2481 vm_prot_t max_protection, 2482 vm_inherit_t inheritance, 2483 upl_page_list_ptr_t page_list, 2484 unsigned int page_list_count) 2485{ 2486 vm_map_address_t map_addr; 2487 vm_map_size_t map_size; 2488 vm_object_t object; 2489 vm_object_size_t size; 2490 kern_return_t result; 2491 boolean_t mask_cur_protection, mask_max_protection; 2492 boolean_t try_prefault = (page_list_count != 0); 2493 vm_map_offset_t offset_in_mapping; 2494 2495 mask_cur_protection = cur_protection & VM_PROT_IS_MASK; 2496 mask_max_protection = max_protection & VM_PROT_IS_MASK; 2497 cur_protection &= ~VM_PROT_IS_MASK; 2498 max_protection &= ~VM_PROT_IS_MASK; 2499 2500 /* 2501 * Check arguments for validity 2502 */ 2503 if ((target_map == VM_MAP_NULL) || 2504 (cur_protection & ~VM_PROT_ALL) || 2505 (max_protection & ~VM_PROT_ALL) || 2506 (inheritance > VM_INHERIT_LAST_VALID) || 2507 (try_prefault && (copy || !page_list)) || 2508 initial_size == 0) 2509 return KERN_INVALID_ARGUMENT; 2510 2511 map_addr = vm_map_trunc_page(*address, 2512 VM_MAP_PAGE_MASK(target_map)); 2513 map_size = vm_map_round_page(initial_size, 2514 VM_MAP_PAGE_MASK(target_map)); 2515 size = vm_object_round_page(initial_size); 2516 2517 /* 2518 * Find the vm object (if any) corresponding to this port. 2519 */ 2520 if (!IP_VALID(port)) { 2521 object = VM_OBJECT_NULL; 2522 offset = 0; 2523 copy = FALSE; 2524 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) { 2525 vm_named_entry_t named_entry; 2526 2527 named_entry = (vm_named_entry_t) port->ip_kobject; 2528 2529 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { 2530 offset += named_entry->data_offset; 2531 } 2532 2533 /* a few checks to make sure user is obeying rules */ 2534 if (size == 0) { 2535 if (offset >= named_entry->size) 2536 return KERN_INVALID_RIGHT; 2537 size = named_entry->size - offset; 2538 } 2539 if (mask_max_protection) { 2540 max_protection &= named_entry->protection; 2541 } 2542 if (mask_cur_protection) { 2543 cur_protection &= named_entry->protection; 2544 } 2545 if ((named_entry->protection & max_protection) != 2546 max_protection) 2547 return KERN_INVALID_RIGHT; 2548 if ((named_entry->protection & cur_protection) != 2549 cur_protection) 2550 return KERN_INVALID_RIGHT; 2551 if (offset + size < offset) { 2552 /* overflow */ 2553 return KERN_INVALID_ARGUMENT; 2554 } 2555 if (named_entry->size < (offset + size)) 2556 return KERN_INVALID_ARGUMENT; 2557 2558 if (named_entry->is_copy) { 2559 /* for a vm_map_copy, we can only map it whole */ 2560 if ((size != named_entry->size) && 2561 (vm_map_round_page(size, 2562 VM_MAP_PAGE_MASK(target_map)) == 2563 named_entry->size)) { 2564 /* XXX FBDP use the rounded size... */ 2565 size = vm_map_round_page( 2566 size, 2567 VM_MAP_PAGE_MASK(target_map)); 2568 } 2569 2570 if (!(flags & VM_FLAGS_ANYWHERE) && 2571 (offset != 0 || 2572 size != named_entry->size)) { 2573 /* 2574 * XXX for a mapping at a "fixed" address, 2575 * we can't trim after mapping the whole 2576 * memory entry, so reject a request for a 2577 * partial mapping. 2578 */ 2579 return KERN_INVALID_ARGUMENT; 2580 } 2581 } 2582 2583 /* the callers parameter offset is defined to be the */ 2584 /* offset from beginning of named entry offset in object */ 2585 offset = offset + named_entry->offset; 2586 2587 if (! VM_MAP_PAGE_ALIGNED(size, 2588 VM_MAP_PAGE_MASK(target_map))) { 2589 /* 2590 * Let's not map more than requested; 2591 * vm_map_enter() will handle this "not map-aligned" 2592 * case. 2593 */ 2594 map_size = size; 2595 } 2596 2597 named_entry_lock(named_entry); 2598 if (named_entry->is_sub_map) { 2599 vm_map_t submap; 2600 2601 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { 2602 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap."); 2603 } 2604 2605 submap = named_entry->backing.map; 2606 vm_map_lock(submap); 2607 vm_map_reference(submap); 2608 vm_map_unlock(submap); 2609 named_entry_unlock(named_entry); 2610 2611 result = vm_map_enter(target_map, 2612 &map_addr, 2613 map_size, 2614 mask, 2615 flags | VM_FLAGS_SUBMAP, 2616 (vm_object_t) submap, 2617 offset, 2618 copy, 2619 cur_protection, 2620 max_protection, 2621 inheritance); 2622 if (result != KERN_SUCCESS) { 2623 vm_map_deallocate(submap); 2624 } else { 2625 /* 2626 * No need to lock "submap" just to check its 2627 * "mapped" flag: that flag is never reset 2628 * once it's been set and if we race, we'll 2629 * just end up setting it twice, which is OK. 2630 */ 2631 if (submap->mapped_in_other_pmaps == FALSE && 2632 vm_map_pmap(submap) != PMAP_NULL && 2633 vm_map_pmap(submap) != 2634 vm_map_pmap(target_map)) { 2635 /* 2636 * This submap is being mapped in a map 2637 * that uses a different pmap. 2638 * Set its "mapped_in_other_pmaps" flag 2639 * to indicate that we now need to 2640 * remove mappings from all pmaps rather 2641 * than just the submap's pmap. 2642 */ 2643 vm_map_lock(submap); 2644 submap->mapped_in_other_pmaps = TRUE; 2645 vm_map_unlock(submap); 2646 } 2647 *address = map_addr; 2648 } 2649 return result; 2650 2651 } else if (named_entry->is_pager) { 2652 unsigned int access; 2653 vm_prot_t protections; 2654 unsigned int wimg_mode; 2655 2656 protections = named_entry->protection & VM_PROT_ALL; 2657 access = GET_MAP_MEM(named_entry->protection); 2658 2659 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { 2660 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap."); 2661 } 2662 2663 object = vm_object_enter(named_entry->backing.pager, 2664 named_entry->size, 2665 named_entry->internal, 2666 FALSE, 2667 FALSE); 2668 if (object == VM_OBJECT_NULL) { 2669 named_entry_unlock(named_entry); 2670 return KERN_INVALID_OBJECT; 2671 } 2672 2673 /* JMM - drop reference on pager here */ 2674 2675 /* create an extra ref for the named entry */ 2676 vm_object_lock(object); 2677 vm_object_reference_locked(object); 2678 named_entry->backing.object = object; 2679 named_entry->is_pager = FALSE; 2680 named_entry_unlock(named_entry); 2681 2682 wimg_mode = object->wimg_bits; 2683 2684 if (access == MAP_MEM_IO) { 2685 wimg_mode = VM_WIMG_IO; 2686 } else if (access == MAP_MEM_COPYBACK) { 2687 wimg_mode = VM_WIMG_USE_DEFAULT; 2688 } else if (access == MAP_MEM_INNERWBACK) { 2689 wimg_mode = VM_WIMG_INNERWBACK; 2690 } else if (access == MAP_MEM_WTHRU) { 2691 wimg_mode = VM_WIMG_WTHRU; 2692 } else if (access == MAP_MEM_WCOMB) { 2693 wimg_mode = VM_WIMG_WCOMB; 2694 } 2695 2696 /* wait for object (if any) to be ready */ 2697 if (!named_entry->internal) { 2698 while (!object->pager_ready) { 2699 vm_object_wait( 2700 object, 2701 VM_OBJECT_EVENT_PAGER_READY, 2702 THREAD_UNINT); 2703 vm_object_lock(object); 2704 } 2705 } 2706 2707 if (object->wimg_bits != wimg_mode) 2708 vm_object_change_wimg_mode(object, wimg_mode); 2709 2710#if VM_OBJECT_TRACKING_OP_TRUESHARE 2711 if (!object->true_share && 2712 vm_object_tracking_inited) { 2713 void *bt[VM_OBJECT_TRACKING_BTDEPTH]; 2714 int num = 0; 2715 2716 num = OSBacktrace(bt, 2717 VM_OBJECT_TRACKING_BTDEPTH); 2718 btlog_add_entry(vm_object_tracking_btlog, 2719 object, 2720 VM_OBJECT_TRACKING_OP_TRUESHARE, 2721 bt, 2722 num); 2723 } 2724#endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */ 2725 2726 object->true_share = TRUE; 2727 2728 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) 2729 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; 2730 vm_object_unlock(object); 2731 2732 } else if (named_entry->is_copy) { 2733 kern_return_t kr; 2734 vm_map_copy_t copy_map; 2735 vm_map_entry_t copy_entry; 2736 vm_map_offset_t copy_addr; 2737 2738 if (flags & ~(VM_FLAGS_FIXED | 2739 VM_FLAGS_ANYWHERE | 2740 VM_FLAGS_OVERWRITE | 2741 VM_FLAGS_RETURN_DATA_ADDR)) { 2742 named_entry_unlock(named_entry); 2743 return KERN_INVALID_ARGUMENT; 2744 } 2745 2746 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { 2747 offset_in_mapping = offset - vm_object_trunc_page(offset); 2748 offset = vm_object_trunc_page(offset); 2749 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset; 2750 } 2751 2752 copy_map = named_entry->backing.copy; 2753 assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST); 2754 if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) { 2755 /* unsupported type; should not happen */ 2756 printf("vm_map_enter_mem_object: " 2757 "memory_entry->backing.copy " 2758 "unsupported type 0x%x\n", 2759 copy_map->type); 2760 named_entry_unlock(named_entry); 2761 return KERN_INVALID_ARGUMENT; 2762 } 2763 2764 /* reserve a contiguous range */ 2765 kr = vm_map_enter(target_map, 2766 &map_addr, 2767 /* map whole mem entry, trim later: */ 2768 named_entry->size, 2769 mask, 2770 flags & (VM_FLAGS_ANYWHERE | 2771 VM_FLAGS_OVERWRITE | 2772 VM_FLAGS_RETURN_DATA_ADDR), 2773 VM_OBJECT_NULL, 2774 0, 2775 FALSE, /* copy */ 2776 cur_protection, 2777 max_protection, 2778 inheritance); 2779 if (kr != KERN_SUCCESS) { 2780 named_entry_unlock(named_entry); 2781 return kr; 2782 } 2783 2784 copy_addr = map_addr; 2785 2786 for (copy_entry = vm_map_copy_first_entry(copy_map); 2787 copy_entry != vm_map_copy_to_entry(copy_map); 2788 copy_entry = copy_entry->vme_next) { 2789 int remap_flags = 0; 2790 vm_map_t copy_submap; 2791 vm_object_t copy_object; 2792 vm_map_size_t copy_size; 2793 vm_object_offset_t copy_offset; 2794 2795 copy_offset = copy_entry->offset; 2796 copy_size = (copy_entry->vme_end - 2797 copy_entry->vme_start); 2798 2799 /* sanity check */ 2800 if ((copy_addr + copy_size) > 2801 (map_addr + 2802 named_entry->size /* XXX full size */ )) { 2803 /* over-mapping too much !? */ 2804 kr = KERN_INVALID_ARGUMENT; 2805 /* abort */ 2806 break; 2807 } 2808 2809 /* take a reference on the object */ 2810 if (copy_entry->is_sub_map) { 2811 remap_flags |= VM_FLAGS_SUBMAP; 2812 copy_submap = 2813 copy_entry->object.sub_map; 2814 vm_map_lock(copy_submap); 2815 vm_map_reference(copy_submap); 2816 vm_map_unlock(copy_submap); 2817 copy_object = (vm_object_t) copy_submap; 2818 } else { 2819 copy_object = 2820 copy_entry->object.vm_object; 2821 vm_object_reference(copy_object); 2822 } 2823 2824 /* over-map the object into destination */ 2825 remap_flags |= flags; 2826 remap_flags |= VM_FLAGS_FIXED; 2827 remap_flags |= VM_FLAGS_OVERWRITE; 2828 remap_flags &= ~VM_FLAGS_ANYWHERE; 2829 kr = vm_map_enter(target_map, 2830 ©_addr, 2831 copy_size, 2832 (vm_map_offset_t) 0, 2833 remap_flags, 2834 copy_object, 2835 copy_offset, 2836 copy, 2837 cur_protection, 2838 max_protection, 2839 inheritance); 2840 if (kr != KERN_SUCCESS) { 2841 if (copy_entry->is_sub_map) { 2842 vm_map_deallocate(copy_submap); 2843 } else { 2844 vm_object_deallocate(copy_object); 2845 } 2846 /* abort */ 2847 break; 2848 } 2849 2850 /* next mapping */ 2851 copy_addr += copy_size; 2852 } 2853 2854 if (kr == KERN_SUCCESS) { 2855 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { 2856 *address = map_addr + offset_in_mapping; 2857 } else { 2858 *address = map_addr; 2859 } 2860 2861 if (offset) { 2862 /* 2863 * Trim in front, from 0 to "offset". 2864 */ 2865 vm_map_remove(target_map, 2866 map_addr, 2867 map_addr + offset, 2868 0); 2869 *address += offset; 2870 } 2871 if (offset + map_size < named_entry->size) { 2872 /* 2873 * Trim in back, from 2874 * "offset + map_size" to 2875 * "named_entry->size". 2876 */ 2877 vm_map_remove(target_map, 2878 (map_addr + 2879 offset + map_size), 2880 (map_addr + 2881 named_entry->size), 2882 0); 2883 } 2884 } 2885 named_entry_unlock(named_entry); 2886 2887 if (kr != KERN_SUCCESS) { 2888 if (! (flags & VM_FLAGS_OVERWRITE)) { 2889 /* deallocate the contiguous range */ 2890 (void) vm_deallocate(target_map, 2891 map_addr, 2892 map_size); 2893 } 2894 } 2895 2896 return kr; 2897 2898 } else { 2899 /* This is the case where we are going to map */ 2900 /* an already mapped object. If the object is */ 2901 /* not ready it is internal. An external */ 2902 /* object cannot be mapped until it is ready */ 2903 /* we can therefore avoid the ready check */ 2904 /* in this case. */ 2905 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { 2906 offset_in_mapping = offset - vm_object_trunc_page(offset); 2907 offset = vm_object_trunc_page(offset); 2908 map_size = vm_object_round_page(offset + offset_in_mapping + initial_size) - offset; 2909 } 2910 2911 object = named_entry->backing.object; 2912 assert(object != VM_OBJECT_NULL); 2913 named_entry_unlock(named_entry); 2914 vm_object_reference(object); 2915 } 2916 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) { 2917 /* 2918 * JMM - This is temporary until we unify named entries 2919 * and raw memory objects. 2920 * 2921 * Detected fake ip_kotype for a memory object. In 2922 * this case, the port isn't really a port at all, but 2923 * instead is just a raw memory object. 2924 */ 2925 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { 2926 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object."); 2927 } 2928 2929 object = vm_object_enter((memory_object_t)port, 2930 size, FALSE, FALSE, FALSE); 2931 if (object == VM_OBJECT_NULL) 2932 return KERN_INVALID_OBJECT; 2933 2934 /* wait for object (if any) to be ready */ 2935 if (object != VM_OBJECT_NULL) { 2936 if (object == kernel_object) { 2937 printf("Warning: Attempt to map kernel object" 2938 " by a non-private kernel entity\n"); 2939 return KERN_INVALID_OBJECT; 2940 } 2941 if (!object->pager_ready) { 2942 vm_object_lock(object); 2943 2944 while (!object->pager_ready) { 2945 vm_object_wait(object, 2946 VM_OBJECT_EVENT_PAGER_READY, 2947 THREAD_UNINT); 2948 vm_object_lock(object); 2949 } 2950 vm_object_unlock(object); 2951 } 2952 } 2953 } else { 2954 return KERN_INVALID_OBJECT; 2955 } 2956 2957 if (object != VM_OBJECT_NULL && 2958 object->named && 2959 object->pager != MEMORY_OBJECT_NULL && 2960 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { 2961 memory_object_t pager; 2962 vm_prot_t pager_prot; 2963 kern_return_t kr; 2964 2965 /* 2966 * For "named" VM objects, let the pager know that the 2967 * memory object is being mapped. Some pagers need to keep 2968 * track of this, to know when they can reclaim the memory 2969 * object, for example. 2970 * VM calls memory_object_map() for each mapping (specifying 2971 * the protection of each mapping) and calls 2972 * memory_object_last_unmap() when all the mappings are gone. 2973 */ 2974 pager_prot = max_protection; 2975 if (copy) { 2976 /* 2977 * Copy-On-Write mapping: won't modify the 2978 * memory object. 2979 */ 2980 pager_prot &= ~VM_PROT_WRITE; 2981 } 2982 vm_object_lock(object); 2983 pager = object->pager; 2984 if (object->named && 2985 pager != MEMORY_OBJECT_NULL && 2986 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { 2987 assert(object->pager_ready); 2988 vm_object_mapping_wait(object, THREAD_UNINT); 2989 vm_object_mapping_begin(object); 2990 vm_object_unlock(object); 2991 2992 kr = memory_object_map(pager, pager_prot); 2993 assert(kr == KERN_SUCCESS); 2994 2995 vm_object_lock(object); 2996 vm_object_mapping_end(object); 2997 } 2998 vm_object_unlock(object); 2999 } 3000 3001 /* 3002 * Perform the copy if requested 3003 */ 3004 3005 if (copy) { 3006 vm_object_t new_object; 3007 vm_object_offset_t new_offset; 3008 3009 result = vm_object_copy_strategically(object, offset, size, 3010 &new_object, &new_offset, 3011 ©); 3012 3013 3014 if (result == KERN_MEMORY_RESTART_COPY) { 3015 boolean_t success; 3016 boolean_t src_needs_copy; 3017 3018 /* 3019 * XXX 3020 * We currently ignore src_needs_copy. 3021 * This really is the issue of how to make 3022 * MEMORY_OBJECT_COPY_SYMMETRIC safe for 3023 * non-kernel users to use. Solution forthcoming. 3024 * In the meantime, since we don't allow non-kernel 3025 * memory managers to specify symmetric copy, 3026 * we won't run into problems here. 3027 */ 3028 new_object = object; 3029 new_offset = offset; 3030 success = vm_object_copy_quickly(&new_object, 3031 new_offset, size, 3032 &src_needs_copy, 3033 ©); 3034 assert(success); 3035 result = KERN_SUCCESS; 3036 } 3037 /* 3038 * Throw away the reference to the 3039 * original object, as it won't be mapped. 3040 */ 3041 3042 vm_object_deallocate(object); 3043 3044 if (result != KERN_SUCCESS) 3045 return result; 3046 3047 object = new_object; 3048 offset = new_offset; 3049 } 3050 3051 /* 3052 * If users want to try to prefault pages, the mapping and prefault 3053 * needs to be atomic. 3054 */ 3055 if (try_prefault) 3056 flags |= VM_FLAGS_KEEP_MAP_LOCKED; 3057 result = vm_map_enter(target_map, 3058 &map_addr, map_size, 3059 (vm_map_offset_t)mask, 3060 flags, 3061 object, offset, 3062 copy, 3063 cur_protection, max_protection, inheritance); 3064 if (result != KERN_SUCCESS) 3065 vm_object_deallocate(object); 3066 3067 /* 3068 * Try to prefault, and do not forget to release the vm map lock. 3069 */ 3070 if (result == KERN_SUCCESS && try_prefault) { 3071 mach_vm_address_t va = map_addr; 3072 kern_return_t kr = KERN_SUCCESS; 3073 unsigned int i = 0; 3074 3075 for (i = 0; i < page_list_count; ++i) { 3076 if (UPL_VALID_PAGE(page_list, i)) { 3077 /* 3078 * If this function call failed, we should stop 3079 * trying to optimize, other calls are likely 3080 * going to fail too. 3081 * 3082 * We are not gonna report an error for such 3083 * failure though. That's an optimization, not 3084 * something critical. 3085 */ 3086 kr = pmap_enter_options(target_map->pmap, 3087 va, UPL_PHYS_PAGE(page_list, i), 3088 cur_protection, VM_PROT_NONE, 3089 0, TRUE, PMAP_OPTIONS_NOWAIT, NULL); 3090 if (kr != KERN_SUCCESS) { 3091 OSIncrementAtomic64(&vm_prefault_nb_bailout); 3092 goto BailOut; 3093 } 3094 OSIncrementAtomic64(&vm_prefault_nb_pages); 3095 } 3096 3097 /* Next virtual address */ 3098 va += PAGE_SIZE; 3099 } 3100BailOut: 3101 vm_map_unlock(target_map); 3102 } 3103 3104 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { 3105 *address = map_addr + offset_in_mapping; 3106 } else { 3107 *address = map_addr; 3108 } 3109 return result; 3110} 3111 3112kern_return_t 3113vm_map_enter_mem_object( 3114 vm_map_t target_map, 3115 vm_map_offset_t *address, 3116 vm_map_size_t initial_size, 3117 vm_map_offset_t mask, 3118 int flags, 3119 ipc_port_t port, 3120 vm_object_offset_t offset, 3121 boolean_t copy, 3122 vm_prot_t cur_protection, 3123 vm_prot_t max_protection, 3124 vm_inherit_t inheritance) 3125{ 3126 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags, 3127 port, offset, copy, cur_protection, max_protection, 3128 inheritance, NULL, 0); 3129} 3130 3131kern_return_t 3132vm_map_enter_mem_object_prefault( 3133 vm_map_t target_map, 3134 vm_map_offset_t *address, 3135 vm_map_size_t initial_size, 3136 vm_map_offset_t mask, 3137 int flags, 3138 ipc_port_t port, 3139 vm_object_offset_t offset, 3140 vm_prot_t cur_protection, 3141 vm_prot_t max_protection, 3142 upl_page_list_ptr_t page_list, 3143 unsigned int page_list_count) 3144{ 3145 return vm_map_enter_mem_object_helper(target_map, address, initial_size, mask, flags, 3146 port, offset, FALSE, cur_protection, max_protection, 3147 VM_INHERIT_DEFAULT, page_list, page_list_count); 3148} 3149 3150 3151kern_return_t 3152vm_map_enter_mem_object_control( 3153 vm_map_t target_map, 3154 vm_map_offset_t *address, 3155 vm_map_size_t initial_size, 3156 vm_map_offset_t mask, 3157 int flags, 3158 memory_object_control_t control, 3159 vm_object_offset_t offset, 3160 boolean_t copy, 3161 vm_prot_t cur_protection, 3162 vm_prot_t max_protection, 3163 vm_inherit_t inheritance) 3164{ 3165 vm_map_address_t map_addr; 3166 vm_map_size_t map_size; 3167 vm_object_t object; 3168 vm_object_size_t size; 3169 kern_return_t result; 3170 memory_object_t pager; 3171 vm_prot_t pager_prot; 3172 kern_return_t kr; 3173 3174 /* 3175 * Check arguments for validity 3176 */ 3177 if ((target_map == VM_MAP_NULL) || 3178 (cur_protection & ~VM_PROT_ALL) || 3179 (max_protection & ~VM_PROT_ALL) || 3180 (inheritance > VM_INHERIT_LAST_VALID) || 3181 initial_size == 0) 3182 return KERN_INVALID_ARGUMENT; 3183 3184 map_addr = vm_map_trunc_page(*address, 3185 VM_MAP_PAGE_MASK(target_map)); 3186 map_size = vm_map_round_page(initial_size, 3187 VM_MAP_PAGE_MASK(target_map)); 3188 size = vm_object_round_page(initial_size); 3189 3190 object = memory_object_control_to_vm_object(control); 3191 3192 if (object == VM_OBJECT_NULL) 3193 return KERN_INVALID_OBJECT; 3194 3195 if (object == kernel_object) { 3196 printf("Warning: Attempt to map kernel object" 3197 " by a non-private kernel entity\n"); 3198 return KERN_INVALID_OBJECT; 3199 } 3200 3201 vm_object_lock(object); 3202 object->ref_count++; 3203 vm_object_res_reference(object); 3204 3205 /* 3206 * For "named" VM objects, let the pager know that the 3207 * memory object is being mapped. Some pagers need to keep 3208 * track of this, to know when they can reclaim the memory 3209 * object, for example. 3210 * VM calls memory_object_map() for each mapping (specifying 3211 * the protection of each mapping) and calls 3212 * memory_object_last_unmap() when all the mappings are gone. 3213 */ 3214 pager_prot = max_protection; 3215 if (copy) { 3216 pager_prot &= ~VM_PROT_WRITE; 3217 } 3218 pager = object->pager; 3219 if (object->named && 3220 pager != MEMORY_OBJECT_NULL && 3221 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { 3222 assert(object->pager_ready); 3223 vm_object_mapping_wait(object, THREAD_UNINT); 3224 vm_object_mapping_begin(object); 3225 vm_object_unlock(object); 3226 3227 kr = memory_object_map(pager, pager_prot); 3228 assert(kr == KERN_SUCCESS); 3229 3230 vm_object_lock(object); 3231 vm_object_mapping_end(object); 3232 } 3233 vm_object_unlock(object); 3234 3235 /* 3236 * Perform the copy if requested 3237 */ 3238 3239 if (copy) { 3240 vm_object_t new_object; 3241 vm_object_offset_t new_offset; 3242 3243 result = vm_object_copy_strategically(object, offset, size, 3244 &new_object, &new_offset, 3245 ©); 3246 3247 3248 if (result == KERN_MEMORY_RESTART_COPY) { 3249 boolean_t success; 3250 boolean_t src_needs_copy; 3251 3252 /* 3253 * XXX 3254 * We currently ignore src_needs_copy. 3255 * This really is the issue of how to make 3256 * MEMORY_OBJECT_COPY_SYMMETRIC safe for 3257 * non-kernel users to use. Solution forthcoming. 3258 * In the meantime, since we don't allow non-kernel 3259 * memory managers to specify symmetric copy, 3260 * we won't run into problems here. 3261 */ 3262 new_object = object; 3263 new_offset = offset; 3264 success = vm_object_copy_quickly(&new_object, 3265 new_offset, size, 3266 &src_needs_copy, 3267 ©); 3268 assert(success); 3269 result = KERN_SUCCESS; 3270 } 3271 /* 3272 * Throw away the reference to the 3273 * original object, as it won't be mapped. 3274 */ 3275 3276 vm_object_deallocate(object); 3277 3278 if (result != KERN_SUCCESS) 3279 return result; 3280 3281 object = new_object; 3282 offset = new_offset; 3283 } 3284 3285 result = vm_map_enter(target_map, 3286 &map_addr, map_size, 3287 (vm_map_offset_t)mask, 3288 flags, 3289 object, offset, 3290 copy, 3291 cur_protection, max_protection, inheritance); 3292 if (result != KERN_SUCCESS) 3293 vm_object_deallocate(object); 3294 *address = map_addr; 3295 3296 return result; 3297} 3298 3299 3300#if VM_CPM 3301 3302#ifdef MACH_ASSERT 3303extern pmap_paddr_t avail_start, avail_end; 3304#endif 3305 3306/* 3307 * Allocate memory in the specified map, with the caveat that 3308 * the memory is physically contiguous. This call may fail 3309 * if the system can't find sufficient contiguous memory. 3310 * This call may cause or lead to heart-stopping amounts of 3311 * paging activity. 3312 * 3313 * Memory obtained from this call should be freed in the 3314 * normal way, viz., via vm_deallocate. 3315 */ 3316kern_return_t 3317vm_map_enter_cpm( 3318 vm_map_t map, 3319 vm_map_offset_t *addr, 3320 vm_map_size_t size, 3321 int flags) 3322{ 3323 vm_object_t cpm_obj; 3324 pmap_t pmap; 3325 vm_page_t m, pages; 3326 kern_return_t kr; 3327 vm_map_offset_t va, start, end, offset; 3328#if MACH_ASSERT 3329 vm_map_offset_t prev_addr = 0; 3330#endif /* MACH_ASSERT */ 3331 3332 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); 3333 3334 if (size == 0) { 3335 *addr = 0; 3336 return KERN_SUCCESS; 3337 } 3338 if (anywhere) 3339 *addr = vm_map_min(map); 3340 else 3341 *addr = vm_map_trunc_page(*addr, 3342 VM_MAP_PAGE_MASK(map)); 3343 size = vm_map_round_page(size, 3344 VM_MAP_PAGE_MASK(map)); 3345 3346 /* 3347 * LP64todo - cpm_allocate should probably allow 3348 * allocations of >4GB, but not with the current 3349 * algorithm, so just cast down the size for now. 3350 */ 3351 if (size > VM_MAX_ADDRESS) 3352 return KERN_RESOURCE_SHORTAGE; 3353 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size), 3354 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) 3355 return kr; 3356 3357 cpm_obj = vm_object_allocate((vm_object_size_t)size); 3358 assert(cpm_obj != VM_OBJECT_NULL); 3359 assert(cpm_obj->internal); 3360 assert(cpm_obj->vo_size == (vm_object_size_t)size); 3361 assert(cpm_obj->can_persist == FALSE); 3362 assert(cpm_obj->pager_created == FALSE); 3363 assert(cpm_obj->pageout == FALSE); 3364 assert(cpm_obj->shadow == VM_OBJECT_NULL); 3365 3366 /* 3367 * Insert pages into object. 3368 */ 3369 3370 vm_object_lock(cpm_obj); 3371 for (offset = 0; offset < size; offset += PAGE_SIZE) { 3372 m = pages; 3373 pages = NEXT_PAGE(m); 3374 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; 3375 3376 assert(!m->gobbled); 3377 assert(!m->wanted); 3378 assert(!m->pageout); 3379 assert(!m->tabled); 3380 assert(VM_PAGE_WIRED(m)); 3381 /* 3382 * ENCRYPTED SWAP: 3383 * "m" is not supposed to be pageable, so it 3384 * should not be encrypted. It wouldn't be safe 3385 * to enter it in a new VM object while encrypted. 3386 */ 3387 ASSERT_PAGE_DECRYPTED(m); 3388 assert(m->busy); 3389 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT)); 3390 3391 m->busy = FALSE; 3392 vm_page_insert(m, cpm_obj, offset); 3393 } 3394 assert(cpm_obj->resident_page_count == size / PAGE_SIZE); 3395 vm_object_unlock(cpm_obj); 3396 3397 /* 3398 * Hang onto a reference on the object in case a 3399 * multi-threaded application for some reason decides 3400 * to deallocate the portion of the address space into 3401 * which we will insert this object. 3402 * 3403 * Unfortunately, we must insert the object now before 3404 * we can talk to the pmap module about which addresses 3405 * must be wired down. Hence, the race with a multi- 3406 * threaded app. 3407 */ 3408 vm_object_reference(cpm_obj); 3409 3410 /* 3411 * Insert object into map. 3412 */ 3413 3414 kr = vm_map_enter( 3415 map, 3416 addr, 3417 size, 3418 (vm_map_offset_t)0, 3419 flags, 3420 cpm_obj, 3421 (vm_object_offset_t)0, 3422 FALSE, 3423 VM_PROT_ALL, 3424 VM_PROT_ALL, 3425 VM_INHERIT_DEFAULT); 3426 3427 if (kr != KERN_SUCCESS) { 3428 /* 3429 * A CPM object doesn't have can_persist set, 3430 * so all we have to do is deallocate it to 3431 * free up these pages. 3432 */ 3433 assert(cpm_obj->pager_created == FALSE); 3434 assert(cpm_obj->can_persist == FALSE); 3435 assert(cpm_obj->pageout == FALSE); 3436 assert(cpm_obj->shadow == VM_OBJECT_NULL); 3437 vm_object_deallocate(cpm_obj); /* kill acquired ref */ 3438 vm_object_deallocate(cpm_obj); /* kill creation ref */ 3439 } 3440 3441 /* 3442 * Inform the physical mapping system that the 3443 * range of addresses may not fault, so that 3444 * page tables and such can be locked down as well. 3445 */ 3446 start = *addr; 3447 end = start + size; 3448 pmap = vm_map_pmap(map); 3449 pmap_pageable(pmap, start, end, FALSE); 3450 3451 /* 3452 * Enter each page into the pmap, to avoid faults. 3453 * Note that this loop could be coded more efficiently, 3454 * if the need arose, rather than looking up each page 3455 * again. 3456 */ 3457 for (offset = 0, va = start; offset < size; 3458 va += PAGE_SIZE, offset += PAGE_SIZE) { 3459 int type_of_fault; 3460 3461 vm_object_lock(cpm_obj); 3462 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); 3463 assert(m != VM_PAGE_NULL); 3464 3465 vm_page_zero_fill(m); 3466 3467 type_of_fault = DBG_ZERO_FILL_FAULT; 3468 3469 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE, 3470 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, 0, NULL, 3471 &type_of_fault); 3472 3473 vm_object_unlock(cpm_obj); 3474 } 3475 3476#if MACH_ASSERT 3477 /* 3478 * Verify ordering in address space. 3479 */ 3480 for (offset = 0; offset < size; offset += PAGE_SIZE) { 3481 vm_object_lock(cpm_obj); 3482 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); 3483 vm_object_unlock(cpm_obj); 3484 if (m == VM_PAGE_NULL) 3485 panic("vm_allocate_cpm: obj %p off 0x%llx no page", 3486 cpm_obj, (uint64_t)offset); 3487 assert(m->tabled); 3488 assert(!m->busy); 3489 assert(!m->wanted); 3490 assert(!m->fictitious); 3491 assert(!m->private); 3492 assert(!m->absent); 3493 assert(!m->error); 3494 assert(!m->cleaning); 3495 assert(!m->laundry); 3496 assert(!m->precious); 3497 assert(!m->clustered); 3498 if (offset != 0) { 3499 if (m->phys_page != prev_addr + 1) { 3500 printf("start 0x%llx end 0x%llx va 0x%llx\n", 3501 (uint64_t)start, (uint64_t)end, (uint64_t)va); 3502 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset); 3503 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr); 3504 panic("vm_allocate_cpm: pages not contig!"); 3505 } 3506 } 3507 prev_addr = m->phys_page; 3508 } 3509#endif /* MACH_ASSERT */ 3510 3511 vm_object_deallocate(cpm_obj); /* kill extra ref */ 3512 3513 return kr; 3514} 3515 3516 3517#else /* VM_CPM */ 3518 3519/* 3520 * Interface is defined in all cases, but unless the kernel 3521 * is built explicitly for this option, the interface does 3522 * nothing. 3523 */ 3524 3525kern_return_t 3526vm_map_enter_cpm( 3527 __unused vm_map_t map, 3528 __unused vm_map_offset_t *addr, 3529 __unused vm_map_size_t size, 3530 __unused int flags) 3531{ 3532 return KERN_FAILURE; 3533} 3534#endif /* VM_CPM */ 3535 3536/* Not used without nested pmaps */ 3537#ifndef NO_NESTED_PMAP 3538/* 3539 * Clip and unnest a portion of a nested submap mapping. 3540 */ 3541 3542 3543static void 3544vm_map_clip_unnest( 3545 vm_map_t map, 3546 vm_map_entry_t entry, 3547 vm_map_offset_t start_unnest, 3548 vm_map_offset_t end_unnest) 3549{ 3550 vm_map_offset_t old_start_unnest = start_unnest; 3551 vm_map_offset_t old_end_unnest = end_unnest; 3552 3553 assert(entry->is_sub_map); 3554 assert(entry->object.sub_map != NULL); 3555 assert(entry->use_pmap); 3556 3557 /* 3558 * Query the platform for the optimal unnest range. 3559 * DRK: There's some duplication of effort here, since 3560 * callers may have adjusted the range to some extent. This 3561 * routine was introduced to support 1GiB subtree nesting 3562 * for x86 platforms, which can also nest on 2MiB boundaries 3563 * depending on size/alignment. 3564 */ 3565 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) { 3566 log_unnest_badness(map, old_start_unnest, old_end_unnest); 3567 } 3568 3569 if (entry->vme_start > start_unnest || 3570 entry->vme_end < end_unnest) { 3571 panic("vm_map_clip_unnest(0x%llx,0x%llx): " 3572 "bad nested entry: start=0x%llx end=0x%llx\n", 3573 (long long)start_unnest, (long long)end_unnest, 3574 (long long)entry->vme_start, (long long)entry->vme_end); 3575 } 3576 3577 if (start_unnest > entry->vme_start) { 3578 _vm_map_clip_start(&map->hdr, 3579 entry, 3580 start_unnest); 3581 vm_map_store_update_first_free(map, map->first_free); 3582 } 3583 if (entry->vme_end > end_unnest) { 3584 _vm_map_clip_end(&map->hdr, 3585 entry, 3586 end_unnest); 3587 vm_map_store_update_first_free(map, map->first_free); 3588 } 3589 3590 pmap_unnest(map->pmap, 3591 entry->vme_start, 3592 entry->vme_end - entry->vme_start); 3593 if ((map->mapped_in_other_pmaps) && (map->ref_count)) { 3594 /* clean up parent map/maps */ 3595 vm_map_submap_pmap_clean( 3596 map, entry->vme_start, 3597 entry->vme_end, 3598 entry->object.sub_map, 3599 entry->offset); 3600 } 3601 entry->use_pmap = FALSE; 3602 if (entry->alias == VM_MEMORY_SHARED_PMAP) { 3603 entry->alias = VM_MEMORY_UNSHARED_PMAP; 3604 } 3605} 3606#endif /* NO_NESTED_PMAP */ 3607 3608/* 3609 * vm_map_clip_start: [ internal use only ] 3610 * 3611 * Asserts that the given entry begins at or after 3612 * the specified address; if necessary, 3613 * it splits the entry into two. 3614 */ 3615void 3616vm_map_clip_start( 3617 vm_map_t map, 3618 vm_map_entry_t entry, 3619 vm_map_offset_t startaddr) 3620{ 3621#ifndef NO_NESTED_PMAP 3622 if (entry->is_sub_map && 3623 entry->use_pmap && 3624 startaddr >= entry->vme_start) { 3625 vm_map_offset_t start_unnest, end_unnest; 3626 3627 /* 3628 * Make sure "startaddr" is no longer in a nested range 3629 * before we clip. Unnest only the minimum range the platform 3630 * can handle. 3631 * vm_map_clip_unnest may perform additional adjustments to 3632 * the unnest range. 3633 */ 3634 start_unnest = startaddr & ~(pmap_nesting_size_min - 1); 3635 end_unnest = start_unnest + pmap_nesting_size_min; 3636 vm_map_clip_unnest(map, entry, start_unnest, end_unnest); 3637 } 3638#endif /* NO_NESTED_PMAP */ 3639 if (startaddr > entry->vme_start) { 3640 if (entry->object.vm_object && 3641 !entry->is_sub_map && 3642 entry->object.vm_object->phys_contiguous) { 3643 pmap_remove(map->pmap, 3644 (addr64_t)(entry->vme_start), 3645 (addr64_t)(entry->vme_end)); 3646 } 3647 _vm_map_clip_start(&map->hdr, entry, startaddr); 3648 vm_map_store_update_first_free(map, map->first_free); 3649 } 3650} 3651 3652 3653#define vm_map_copy_clip_start(copy, entry, startaddr) \ 3654 MACRO_BEGIN \ 3655 if ((startaddr) > (entry)->vme_start) \ 3656 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \ 3657 MACRO_END 3658 3659/* 3660 * This routine is called only when it is known that 3661 * the entry must be split. 3662 */ 3663static void 3664_vm_map_clip_start( 3665 register struct vm_map_header *map_header, 3666 register vm_map_entry_t entry, 3667 register vm_map_offset_t start) 3668{ 3669 register vm_map_entry_t new_entry; 3670 3671 /* 3672 * Split off the front portion -- 3673 * note that we must insert the new 3674 * entry BEFORE this one, so that 3675 * this entry has the specified starting 3676 * address. 3677 */ 3678 3679 if (entry->map_aligned) { 3680 assert(VM_MAP_PAGE_ALIGNED(start, 3681 VM_MAP_HDR_PAGE_MASK(map_header))); 3682 } 3683 3684 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); 3685 vm_map_entry_copy_full(new_entry, entry); 3686 3687 new_entry->vme_end = start; 3688 assert(new_entry->vme_start < new_entry->vme_end); 3689 entry->offset += (start - entry->vme_start); 3690 assert(start < entry->vme_end); 3691 entry->vme_start = start; 3692 3693 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry); 3694 3695 if (entry->is_sub_map) 3696 vm_map_reference(new_entry->object.sub_map); 3697 else 3698 vm_object_reference(new_entry->object.vm_object); 3699} 3700 3701 3702/* 3703 * vm_map_clip_end: [ internal use only ] 3704 * 3705 * Asserts that the given entry ends at or before 3706 * the specified address; if necessary, 3707 * it splits the entry into two. 3708 */ 3709void 3710vm_map_clip_end( 3711 vm_map_t map, 3712 vm_map_entry_t entry, 3713 vm_map_offset_t endaddr) 3714{ 3715 if (endaddr > entry->vme_end) { 3716 /* 3717 * Within the scope of this clipping, limit "endaddr" to 3718 * the end of this map entry... 3719 */ 3720 endaddr = entry->vme_end; 3721 } 3722#ifndef NO_NESTED_PMAP 3723 if (entry->is_sub_map && entry->use_pmap) { 3724 vm_map_offset_t start_unnest, end_unnest; 3725 3726 /* 3727 * Make sure the range between the start of this entry and 3728 * the new "endaddr" is no longer nested before we clip. 3729 * Unnest only the minimum range the platform can handle. 3730 * vm_map_clip_unnest may perform additional adjustments to 3731 * the unnest range. 3732 */ 3733 start_unnest = entry->vme_start; 3734 end_unnest = 3735 (endaddr + pmap_nesting_size_min - 1) & 3736 ~(pmap_nesting_size_min - 1); 3737 vm_map_clip_unnest(map, entry, start_unnest, end_unnest); 3738 } 3739#endif /* NO_NESTED_PMAP */ 3740 if (endaddr < entry->vme_end) { 3741 if (entry->object.vm_object && 3742 !entry->is_sub_map && 3743 entry->object.vm_object->phys_contiguous) { 3744 pmap_remove(map->pmap, 3745 (addr64_t)(entry->vme_start), 3746 (addr64_t)(entry->vme_end)); 3747 } 3748 _vm_map_clip_end(&map->hdr, entry, endaddr); 3749 vm_map_store_update_first_free(map, map->first_free); 3750 } 3751} 3752 3753 3754#define vm_map_copy_clip_end(copy, entry, endaddr) \ 3755 MACRO_BEGIN \ 3756 if ((endaddr) < (entry)->vme_end) \ 3757 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \ 3758 MACRO_END 3759 3760/* 3761 * This routine is called only when it is known that 3762 * the entry must be split. 3763 */ 3764static void 3765_vm_map_clip_end( 3766 register struct vm_map_header *map_header, 3767 register vm_map_entry_t entry, 3768 register vm_map_offset_t end) 3769{ 3770 register vm_map_entry_t new_entry; 3771 3772 /* 3773 * Create a new entry and insert it 3774 * AFTER the specified entry 3775 */ 3776 3777 if (entry->map_aligned) { 3778 assert(VM_MAP_PAGE_ALIGNED(end, 3779 VM_MAP_HDR_PAGE_MASK(map_header))); 3780 } 3781 3782 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); 3783 vm_map_entry_copy_full(new_entry, entry); 3784 3785 assert(entry->vme_start < end); 3786 new_entry->vme_start = entry->vme_end = end; 3787 new_entry->offset += (end - entry->vme_start); 3788 assert(new_entry->vme_start < new_entry->vme_end); 3789 3790 _vm_map_store_entry_link(map_header, entry, new_entry); 3791 3792 if (entry->is_sub_map) 3793 vm_map_reference(new_entry->object.sub_map); 3794 else 3795 vm_object_reference(new_entry->object.vm_object); 3796} 3797 3798 3799/* 3800 * VM_MAP_RANGE_CHECK: [ internal use only ] 3801 * 3802 * Asserts that the starting and ending region 3803 * addresses fall within the valid range of the map. 3804 */ 3805#define VM_MAP_RANGE_CHECK(map, start, end) \ 3806 MACRO_BEGIN \ 3807 if (start < vm_map_min(map)) \ 3808 start = vm_map_min(map); \ 3809 if (end > vm_map_max(map)) \ 3810 end = vm_map_max(map); \ 3811 if (start > end) \ 3812 start = end; \ 3813 MACRO_END 3814 3815/* 3816 * vm_map_range_check: [ internal use only ] 3817 * 3818 * Check that the region defined by the specified start and 3819 * end addresses are wholly contained within a single map 3820 * entry or set of adjacent map entries of the spacified map, 3821 * i.e. the specified region contains no unmapped space. 3822 * If any or all of the region is unmapped, FALSE is returned. 3823 * Otherwise, TRUE is returned and if the output argument 'entry' 3824 * is not NULL it points to the map entry containing the start 3825 * of the region. 3826 * 3827 * The map is locked for reading on entry and is left locked. 3828 */ 3829static boolean_t 3830vm_map_range_check( 3831 register vm_map_t map, 3832 register vm_map_offset_t start, 3833 register vm_map_offset_t end, 3834 vm_map_entry_t *entry) 3835{ 3836 vm_map_entry_t cur; 3837 register vm_map_offset_t prev; 3838 3839 /* 3840 * Basic sanity checks first 3841 */ 3842 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) 3843 return (FALSE); 3844 3845 /* 3846 * Check first if the region starts within a valid 3847 * mapping for the map. 3848 */ 3849 if (!vm_map_lookup_entry(map, start, &cur)) 3850 return (FALSE); 3851 3852 /* 3853 * Optimize for the case that the region is contained 3854 * in a single map entry. 3855 */ 3856 if (entry != (vm_map_entry_t *) NULL) 3857 *entry = cur; 3858 if (end <= cur->vme_end) 3859 return (TRUE); 3860 3861 /* 3862 * If the region is not wholly contained within a 3863 * single entry, walk the entries looking for holes. 3864 */ 3865 prev = cur->vme_end; 3866 cur = cur->vme_next; 3867 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) { 3868 if (end <= cur->vme_end) 3869 return (TRUE); 3870 prev = cur->vme_end; 3871 cur = cur->vme_next; 3872 } 3873 return (FALSE); 3874} 3875 3876/* 3877 * vm_map_submap: [ kernel use only ] 3878 * 3879 * Mark the given range as handled by a subordinate map. 3880 * 3881 * This range must have been created with vm_map_find using 3882 * the vm_submap_object, and no other operations may have been 3883 * performed on this range prior to calling vm_map_submap. 3884 * 3885 * Only a limited number of operations can be performed 3886 * within this rage after calling vm_map_submap: 3887 * vm_fault 3888 * [Don't try vm_map_copyin!] 3889 * 3890 * To remove a submapping, one must first remove the 3891 * range from the superior map, and then destroy the 3892 * submap (if desired). [Better yet, don't try it.] 3893 */ 3894kern_return_t 3895vm_map_submap( 3896 vm_map_t map, 3897 vm_map_offset_t start, 3898 vm_map_offset_t end, 3899 vm_map_t submap, 3900 vm_map_offset_t offset, 3901#ifdef NO_NESTED_PMAP 3902 __unused 3903#endif /* NO_NESTED_PMAP */ 3904 boolean_t use_pmap) 3905{ 3906 vm_map_entry_t entry; 3907 register kern_return_t result = KERN_INVALID_ARGUMENT; 3908 register vm_object_t object; 3909 3910 vm_map_lock(map); 3911 3912 if (! vm_map_lookup_entry(map, start, &entry)) { 3913 entry = entry->vme_next; 3914 } 3915 3916 if (entry == vm_map_to_entry(map) || 3917 entry->is_sub_map) { 3918 vm_map_unlock(map); 3919 return KERN_INVALID_ARGUMENT; 3920 } 3921 3922 vm_map_clip_start(map, entry, start); 3923 vm_map_clip_end(map, entry, end); 3924 3925 if ((entry->vme_start == start) && (entry->vme_end == end) && 3926 (!entry->is_sub_map) && 3927 ((object = entry->object.vm_object) == vm_submap_object) && 3928 (object->resident_page_count == 0) && 3929 (object->copy == VM_OBJECT_NULL) && 3930 (object->shadow == VM_OBJECT_NULL) && 3931 (!object->pager_created)) { 3932 entry->offset = (vm_object_offset_t)offset; 3933 entry->object.vm_object = VM_OBJECT_NULL; 3934 vm_object_deallocate(object); 3935 entry->is_sub_map = TRUE; 3936 entry->use_pmap = FALSE; 3937 entry->object.sub_map = submap; 3938 vm_map_reference(submap); 3939 if (submap->mapped_in_other_pmaps == FALSE && 3940 vm_map_pmap(submap) != PMAP_NULL && 3941 vm_map_pmap(submap) != vm_map_pmap(map)) { 3942 /* 3943 * This submap is being mapped in a map 3944 * that uses a different pmap. 3945 * Set its "mapped_in_other_pmaps" flag 3946 * to indicate that we now need to 3947 * remove mappings from all pmaps rather 3948 * than just the submap's pmap. 3949 */ 3950 submap->mapped_in_other_pmaps = TRUE; 3951 } 3952 3953#ifndef NO_NESTED_PMAP 3954 if (use_pmap) { 3955 /* nest if platform code will allow */ 3956 if(submap->pmap == NULL) { 3957 ledger_t ledger = map->pmap->ledger; 3958 submap->pmap = pmap_create(ledger, 3959 (vm_map_size_t) 0, FALSE); 3960 if(submap->pmap == PMAP_NULL) { 3961 vm_map_unlock(map); 3962 return(KERN_NO_SPACE); 3963 } 3964 } 3965 result = pmap_nest(map->pmap, 3966 (entry->object.sub_map)->pmap, 3967 (addr64_t)start, 3968 (addr64_t)start, 3969 (uint64_t)(end - start)); 3970 if(result) 3971 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result); 3972 entry->use_pmap = TRUE; 3973 } 3974#else /* NO_NESTED_PMAP */ 3975 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end); 3976#endif /* NO_NESTED_PMAP */ 3977 result = KERN_SUCCESS; 3978 } 3979 vm_map_unlock(map); 3980 3981 return(result); 3982} 3983 3984/* 3985 * vm_map_protect: 3986 * 3987 * Sets the protection of the specified address 3988 * region in the target map. If "set_max" is 3989 * specified, the maximum protection is to be set; 3990 * otherwise, only the current protection is affected. 3991 */ 3992kern_return_t 3993vm_map_protect( 3994 register vm_map_t map, 3995 register vm_map_offset_t start, 3996 register vm_map_offset_t end, 3997 register vm_prot_t new_prot, 3998 register boolean_t set_max) 3999{ 4000 register vm_map_entry_t current; 4001 register vm_map_offset_t prev; 4002 vm_map_entry_t entry; 4003 vm_prot_t new_max; 4004 4005 XPR(XPR_VM_MAP, 4006 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d", 4007 map, start, end, new_prot, set_max); 4008 4009 vm_map_lock(map); 4010 4011 /* LP64todo - remove this check when vm_map_commpage64() 4012 * no longer has to stuff in a map_entry for the commpage 4013 * above the map's max_offset. 4014 */ 4015 if (start >= map->max_offset) { 4016 vm_map_unlock(map); 4017 return(KERN_INVALID_ADDRESS); 4018 } 4019 4020 while(1) { 4021 /* 4022 * Lookup the entry. If it doesn't start in a valid 4023 * entry, return an error. 4024 */ 4025 if (! vm_map_lookup_entry(map, start, &entry)) { 4026 vm_map_unlock(map); 4027 return(KERN_INVALID_ADDRESS); 4028 } 4029 4030 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */ 4031 start = SUPERPAGE_ROUND_DOWN(start); 4032 continue; 4033 } 4034 break; 4035 } 4036 if (entry->superpage_size) 4037 end = SUPERPAGE_ROUND_UP(end); 4038 4039 /* 4040 * Make a first pass to check for protection and address 4041 * violations. 4042 */ 4043 4044 current = entry; 4045 prev = current->vme_start; 4046 while ((current != vm_map_to_entry(map)) && 4047 (current->vme_start < end)) { 4048 4049 /* 4050 * If there is a hole, return an error. 4051 */ 4052 if (current->vme_start != prev) { 4053 vm_map_unlock(map); 4054 return(KERN_INVALID_ADDRESS); 4055 } 4056 4057 new_max = current->max_protection; 4058 if(new_prot & VM_PROT_COPY) { 4059 new_max |= VM_PROT_WRITE; 4060 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) { 4061 vm_map_unlock(map); 4062 return(KERN_PROTECTION_FAILURE); 4063 } 4064 } else { 4065 if ((new_prot & new_max) != new_prot) { 4066 vm_map_unlock(map); 4067 return(KERN_PROTECTION_FAILURE); 4068 } 4069 } 4070 4071 4072 prev = current->vme_end; 4073 current = current->vme_next; 4074 } 4075 if (end > prev) { 4076 vm_map_unlock(map); 4077 return(KERN_INVALID_ADDRESS); 4078 } 4079 4080 /* 4081 * Go back and fix up protections. 4082 * Clip to start here if the range starts within 4083 * the entry. 4084 */ 4085 4086 current = entry; 4087 if (current != vm_map_to_entry(map)) { 4088 /* clip and unnest if necessary */ 4089 vm_map_clip_start(map, current, start); 4090 } 4091 4092 while ((current != vm_map_to_entry(map)) && 4093 (current->vme_start < end)) { 4094 4095 vm_prot_t old_prot; 4096 4097 vm_map_clip_end(map, current, end); 4098 4099 if (current->is_sub_map) { 4100 /* clipping did unnest if needed */ 4101 assert(!current->use_pmap); 4102 } 4103 4104 old_prot = current->protection; 4105 4106 if(new_prot & VM_PROT_COPY) { 4107 /* caller is asking specifically to copy the */ 4108 /* mapped data, this implies that max protection */ 4109 /* will include write. Caller must be prepared */ 4110 /* for loss of shared memory communication in the */ 4111 /* target area after taking this step */ 4112 4113 if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){ 4114 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start)); 4115 current->offset = 0; 4116 assert(current->use_pmap); 4117 } 4118 current->needs_copy = TRUE; 4119 current->max_protection |= VM_PROT_WRITE; 4120 } 4121 4122 if (set_max) 4123 current->protection = 4124 (current->max_protection = 4125 new_prot & ~VM_PROT_COPY) & 4126 old_prot; 4127 else 4128 current->protection = new_prot & ~VM_PROT_COPY; 4129 4130 /* 4131 * Update physical map if necessary. 4132 * If the request is to turn off write protection, 4133 * we won't do it for real (in pmap). This is because 4134 * it would cause copy-on-write to fail. We've already 4135 * set, the new protection in the map, so if a 4136 * write-protect fault occurred, it will be fixed up 4137 * properly, COW or not. 4138 */ 4139 if (current->protection != old_prot) { 4140 /* Look one level in we support nested pmaps */ 4141 /* from mapped submaps which are direct entries */ 4142 /* in our map */ 4143 4144 vm_prot_t prot; 4145 4146 prot = current->protection & ~VM_PROT_WRITE; 4147 4148 if (override_nx(map, current->alias) && prot) 4149 prot |= VM_PROT_EXECUTE; 4150 4151 if (current->is_sub_map && current->use_pmap) { 4152 pmap_protect(current->object.sub_map->pmap, 4153 current->vme_start, 4154 current->vme_end, 4155 prot); 4156 } else { 4157 pmap_protect(map->pmap, 4158 current->vme_start, 4159 current->vme_end, 4160 prot); 4161 } 4162 } 4163 current = current->vme_next; 4164 } 4165 4166 current = entry; 4167 while ((current != vm_map_to_entry(map)) && 4168 (current->vme_start <= end)) { 4169 vm_map_simplify_entry(map, current); 4170 current = current->vme_next; 4171 } 4172 4173 vm_map_unlock(map); 4174 return(KERN_SUCCESS); 4175} 4176 4177/* 4178 * vm_map_inherit: 4179 * 4180 * Sets the inheritance of the specified address 4181 * range in the target map. Inheritance 4182 * affects how the map will be shared with 4183 * child maps at the time of vm_map_fork. 4184 */ 4185kern_return_t 4186vm_map_inherit( 4187 register vm_map_t map, 4188 register vm_map_offset_t start, 4189 register vm_map_offset_t end, 4190 register vm_inherit_t new_inheritance) 4191{ 4192 register vm_map_entry_t entry; 4193 vm_map_entry_t temp_entry; 4194 4195 vm_map_lock(map); 4196 4197 VM_MAP_RANGE_CHECK(map, start, end); 4198 4199 if (vm_map_lookup_entry(map, start, &temp_entry)) { 4200 entry = temp_entry; 4201 } 4202 else { 4203 temp_entry = temp_entry->vme_next; 4204 entry = temp_entry; 4205 } 4206 4207 /* first check entire range for submaps which can't support the */ 4208 /* given inheritance. */ 4209 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 4210 if(entry->is_sub_map) { 4211 if(new_inheritance == VM_INHERIT_COPY) { 4212 vm_map_unlock(map); 4213 return(KERN_INVALID_ARGUMENT); 4214 } 4215 } 4216 4217 entry = entry->vme_next; 4218 } 4219 4220 entry = temp_entry; 4221 if (entry != vm_map_to_entry(map)) { 4222 /* clip and unnest if necessary */ 4223 vm_map_clip_start(map, entry, start); 4224 } 4225 4226 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 4227 vm_map_clip_end(map, entry, end); 4228 if (entry->is_sub_map) { 4229 /* clip did unnest if needed */ 4230 assert(!entry->use_pmap); 4231 } 4232 4233 entry->inheritance = new_inheritance; 4234 4235 entry = entry->vme_next; 4236 } 4237 4238 vm_map_unlock(map); 4239 return(KERN_SUCCESS); 4240} 4241 4242/* 4243 * Update the accounting for the amount of wired memory in this map. If the user has 4244 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails. 4245 */ 4246 4247static kern_return_t 4248add_wire_counts( 4249 vm_map_t map, 4250 vm_map_entry_t entry, 4251 boolean_t user_wire) 4252{ 4253 vm_map_size_t size; 4254 4255 if (user_wire) { 4256 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count; 4257 4258 /* 4259 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring 4260 * this map entry. 4261 */ 4262 4263 if (entry->user_wired_count == 0) { 4264 size = entry->vme_end - entry->vme_start; 4265 4266 /* 4267 * Since this is the first time the user is wiring this map entry, check to see if we're 4268 * exceeding the user wire limits. There is a per map limit which is the smaller of either 4269 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also 4270 * a system-wide limit on the amount of memory all users can wire. If the user is over either 4271 * limit, then we fail. 4272 */ 4273 4274 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) || 4275 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit || 4276 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount) 4277 return KERN_RESOURCE_SHORTAGE; 4278 4279 /* 4280 * The first time the user wires an entry, we also increment the wired_count and add this to 4281 * the total that has been wired in the map. 4282 */ 4283 4284 if (entry->wired_count >= MAX_WIRE_COUNT) 4285 return KERN_FAILURE; 4286 4287 entry->wired_count++; 4288 map->user_wire_size += size; 4289 } 4290 4291 if (entry->user_wired_count >= MAX_WIRE_COUNT) 4292 return KERN_FAILURE; 4293 4294 entry->user_wired_count++; 4295 4296 } else { 4297 4298 /* 4299 * The kernel's wiring the memory. Just bump the count and continue. 4300 */ 4301 4302 if (entry->wired_count >= MAX_WIRE_COUNT) 4303 panic("vm_map_wire: too many wirings"); 4304 4305 entry->wired_count++; 4306 } 4307 4308 return KERN_SUCCESS; 4309} 4310 4311/* 4312 * Update the memory wiring accounting now that the given map entry is being unwired. 4313 */ 4314 4315static void 4316subtract_wire_counts( 4317 vm_map_t map, 4318 vm_map_entry_t entry, 4319 boolean_t user_wire) 4320{ 4321 4322 if (user_wire) { 4323 4324 /* 4325 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference. 4326 */ 4327 4328 if (entry->user_wired_count == 1) { 4329 4330 /* 4331 * We're removing the last user wire reference. Decrement the wired_count and the total 4332 * user wired memory for this map. 4333 */ 4334 4335 assert(entry->wired_count >= 1); 4336 entry->wired_count--; 4337 map->user_wire_size -= entry->vme_end - entry->vme_start; 4338 } 4339 4340 assert(entry->user_wired_count >= 1); 4341 entry->user_wired_count--; 4342 4343 } else { 4344 4345 /* 4346 * The kernel is unwiring the memory. Just update the count. 4347 */ 4348 4349 assert(entry->wired_count >= 1); 4350 entry->wired_count--; 4351 } 4352} 4353 4354/* 4355 * vm_map_wire: 4356 * 4357 * Sets the pageability of the specified address range in the 4358 * target map as wired. Regions specified as not pageable require 4359 * locked-down physical memory and physical page maps. The 4360 * access_type variable indicates types of accesses that must not 4361 * generate page faults. This is checked against protection of 4362 * memory being locked-down. 4363 * 4364 * The map must not be locked, but a reference must remain to the 4365 * map throughout the call. 4366 */ 4367static kern_return_t 4368vm_map_wire_nested( 4369 register vm_map_t map, 4370 register vm_map_offset_t start, 4371 register vm_map_offset_t end, 4372 register vm_prot_t access_type, 4373 boolean_t user_wire, 4374 pmap_t map_pmap, 4375 vm_map_offset_t pmap_addr, 4376 ppnum_t *physpage_p) 4377{ 4378 register vm_map_entry_t entry; 4379 struct vm_map_entry *first_entry, tmp_entry; 4380 vm_map_t real_map; 4381 register vm_map_offset_t s,e; 4382 kern_return_t rc; 4383 boolean_t need_wakeup; 4384 boolean_t main_map = FALSE; 4385 wait_interrupt_t interruptible_state; 4386 thread_t cur_thread; 4387 unsigned int last_timestamp; 4388 vm_map_size_t size; 4389 boolean_t wire_and_extract; 4390 4391 wire_and_extract = FALSE; 4392 if (physpage_p != NULL) { 4393 /* 4394 * The caller wants the physical page number of the 4395 * wired page. We return only one physical page number 4396 * so this works for only one page at a time. 4397 */ 4398 if ((end - start) != PAGE_SIZE) { 4399 return KERN_INVALID_ARGUMENT; 4400 } 4401 wire_and_extract = TRUE; 4402 *physpage_p = 0; 4403 } 4404 4405 vm_map_lock(map); 4406 if(map_pmap == NULL) 4407 main_map = TRUE; 4408 last_timestamp = map->timestamp; 4409 4410 VM_MAP_RANGE_CHECK(map, start, end); 4411 assert(page_aligned(start)); 4412 assert(page_aligned(end)); 4413 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map))); 4414 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))); 4415 if (start == end) { 4416 /* We wired what the caller asked for, zero pages */ 4417 vm_map_unlock(map); 4418 return KERN_SUCCESS; 4419 } 4420 4421 need_wakeup = FALSE; 4422 cur_thread = current_thread(); 4423 4424 s = start; 4425 rc = KERN_SUCCESS; 4426 4427 if (vm_map_lookup_entry(map, s, &first_entry)) { 4428 entry = first_entry; 4429 /* 4430 * vm_map_clip_start will be done later. 4431 * We don't want to unnest any nested submaps here ! 4432 */ 4433 } else { 4434 /* Start address is not in map */ 4435 rc = KERN_INVALID_ADDRESS; 4436 goto done; 4437 } 4438 4439 while ((entry != vm_map_to_entry(map)) && (s < end)) { 4440 /* 4441 * At this point, we have wired from "start" to "s". 4442 * We still need to wire from "s" to "end". 4443 * 4444 * "entry" hasn't been clipped, so it could start before "s" 4445 * and/or end after "end". 4446 */ 4447 4448 /* "e" is how far we want to wire in this entry */ 4449 e = entry->vme_end; 4450 if (e > end) 4451 e = end; 4452 4453 /* 4454 * If another thread is wiring/unwiring this entry then 4455 * block after informing other thread to wake us up. 4456 */ 4457 if (entry->in_transition) { 4458 wait_result_t wait_result; 4459 4460 /* 4461 * We have not clipped the entry. Make sure that 4462 * the start address is in range so that the lookup 4463 * below will succeed. 4464 * "s" is the current starting point: we've already 4465 * wired from "start" to "s" and we still have 4466 * to wire from "s" to "end". 4467 */ 4468 4469 entry->needs_wakeup = TRUE; 4470 4471 /* 4472 * wake up anybody waiting on entries that we have 4473 * already wired. 4474 */ 4475 if (need_wakeup) { 4476 vm_map_entry_wakeup(map); 4477 need_wakeup = FALSE; 4478 } 4479 /* 4480 * User wiring is interruptible 4481 */ 4482 wait_result = vm_map_entry_wait(map, 4483 (user_wire) ? THREAD_ABORTSAFE : 4484 THREAD_UNINT); 4485 if (user_wire && wait_result == THREAD_INTERRUPTED) { 4486 /* 4487 * undo the wirings we have done so far 4488 * We do not clear the needs_wakeup flag, 4489 * because we cannot tell if we were the 4490 * only one waiting. 4491 */ 4492 rc = KERN_FAILURE; 4493 goto done; 4494 } 4495 4496 /* 4497 * Cannot avoid a lookup here. reset timestamp. 4498 */ 4499 last_timestamp = map->timestamp; 4500 4501 /* 4502 * The entry could have been clipped, look it up again. 4503 * Worse that can happen is, it may not exist anymore. 4504 */ 4505 if (!vm_map_lookup_entry(map, s, &first_entry)) { 4506 /* 4507 * User: undo everything upto the previous 4508 * entry. let vm_map_unwire worry about 4509 * checking the validity of the range. 4510 */ 4511 rc = KERN_FAILURE; 4512 goto done; 4513 } 4514 entry = first_entry; 4515 continue; 4516 } 4517 4518 if (entry->is_sub_map) { 4519 vm_map_offset_t sub_start; 4520 vm_map_offset_t sub_end; 4521 vm_map_offset_t local_start; 4522 vm_map_offset_t local_end; 4523 pmap_t pmap; 4524 4525 if (wire_and_extract) { 4526 /* 4527 * Wiring would result in copy-on-write 4528 * which would not be compatible with 4529 * the sharing we have with the original 4530 * provider of this memory. 4531 */ 4532 rc = KERN_INVALID_ARGUMENT; 4533 goto done; 4534 } 4535 4536 vm_map_clip_start(map, entry, s); 4537 vm_map_clip_end(map, entry, end); 4538 4539 sub_start = entry->offset; 4540 sub_end = entry->vme_end; 4541 sub_end += entry->offset - entry->vme_start; 4542 4543 local_end = entry->vme_end; 4544 if(map_pmap == NULL) { 4545 vm_object_t object; 4546 vm_object_offset_t offset; 4547 vm_prot_t prot; 4548 boolean_t wired; 4549 vm_map_entry_t local_entry; 4550 vm_map_version_t version; 4551 vm_map_t lookup_map; 4552 4553 if(entry->use_pmap) { 4554 pmap = entry->object.sub_map->pmap; 4555 /* ppc implementation requires that */ 4556 /* submaps pmap address ranges line */ 4557 /* up with parent map */ 4558#ifdef notdef 4559 pmap_addr = sub_start; 4560#endif 4561 pmap_addr = s; 4562 } else { 4563 pmap = map->pmap; 4564 pmap_addr = s; 4565 } 4566 4567 if (entry->wired_count) { 4568 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 4569 goto done; 4570 4571 /* 4572 * The map was not unlocked: 4573 * no need to goto re-lookup. 4574 * Just go directly to next entry. 4575 */ 4576 entry = entry->vme_next; 4577 s = entry->vme_start; 4578 continue; 4579 4580 } 4581 4582 /* call vm_map_lookup_locked to */ 4583 /* cause any needs copy to be */ 4584 /* evaluated */ 4585 local_start = entry->vme_start; 4586 lookup_map = map; 4587 vm_map_lock_write_to_read(map); 4588 if(vm_map_lookup_locked( 4589 &lookup_map, local_start, 4590 access_type, 4591 OBJECT_LOCK_EXCLUSIVE, 4592 &version, &object, 4593 &offset, &prot, &wired, 4594 NULL, 4595 &real_map)) { 4596 4597 vm_map_unlock_read(lookup_map); 4598 vm_map_unwire(map, start, 4599 s, user_wire); 4600 return(KERN_FAILURE); 4601 } 4602 vm_object_unlock(object); 4603 if(real_map != lookup_map) 4604 vm_map_unlock(real_map); 4605 vm_map_unlock_read(lookup_map); 4606 vm_map_lock(map); 4607 4608 /* we unlocked, so must re-lookup */ 4609 if (!vm_map_lookup_entry(map, 4610 local_start, 4611 &local_entry)) { 4612 rc = KERN_FAILURE; 4613 goto done; 4614 } 4615 4616 /* 4617 * entry could have been "simplified", 4618 * so re-clip 4619 */ 4620 entry = local_entry; 4621 assert(s == local_start); 4622 vm_map_clip_start(map, entry, s); 4623 vm_map_clip_end(map, entry, end); 4624 /* re-compute "e" */ 4625 e = entry->vme_end; 4626 if (e > end) 4627 e = end; 4628 4629 /* did we have a change of type? */ 4630 if (!entry->is_sub_map) { 4631 last_timestamp = map->timestamp; 4632 continue; 4633 } 4634 } else { 4635 local_start = entry->vme_start; 4636 pmap = map_pmap; 4637 } 4638 4639 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 4640 goto done; 4641 4642 entry->in_transition = TRUE; 4643 4644 vm_map_unlock(map); 4645 rc = vm_map_wire_nested(entry->object.sub_map, 4646 sub_start, sub_end, 4647 access_type, 4648 user_wire, pmap, pmap_addr, 4649 NULL); 4650 vm_map_lock(map); 4651 4652 /* 4653 * Find the entry again. It could have been clipped 4654 * after we unlocked the map. 4655 */ 4656 if (!vm_map_lookup_entry(map, local_start, 4657 &first_entry)) 4658 panic("vm_map_wire: re-lookup failed"); 4659 entry = first_entry; 4660 4661 assert(local_start == s); 4662 /* re-compute "e" */ 4663 e = entry->vme_end; 4664 if (e > end) 4665 e = end; 4666 4667 last_timestamp = map->timestamp; 4668 while ((entry != vm_map_to_entry(map)) && 4669 (entry->vme_start < e)) { 4670 assert(entry->in_transition); 4671 entry->in_transition = FALSE; 4672 if (entry->needs_wakeup) { 4673 entry->needs_wakeup = FALSE; 4674 need_wakeup = TRUE; 4675 } 4676 if (rc != KERN_SUCCESS) {/* from vm_*_wire */ 4677 subtract_wire_counts(map, entry, user_wire); 4678 } 4679 entry = entry->vme_next; 4680 } 4681 if (rc != KERN_SUCCESS) { /* from vm_*_wire */ 4682 goto done; 4683 } 4684 4685 /* no need to relookup again */ 4686 s = entry->vme_start; 4687 continue; 4688 } 4689 4690 /* 4691 * If this entry is already wired then increment 4692 * the appropriate wire reference count. 4693 */ 4694 if (entry->wired_count) { 4695 4696 if ((entry->protection & access_type) != access_type) { 4697 /* found a protection problem */ 4698 4699 /* 4700 * XXX FBDP 4701 * We should always return an error 4702 * in this case but since we didn't 4703 * enforce it before, let's do 4704 * it only for the new "wire_and_extract" 4705 * code path for now... 4706 */ 4707 if (wire_and_extract) { 4708 rc = KERN_PROTECTION_FAILURE; 4709 goto done; 4710 } 4711 } 4712 4713 /* 4714 * entry is already wired down, get our reference 4715 * after clipping to our range. 4716 */ 4717 vm_map_clip_start(map, entry, s); 4718 vm_map_clip_end(map, entry, end); 4719 4720 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 4721 goto done; 4722 4723 if (wire_and_extract) { 4724 vm_object_t object; 4725 vm_object_offset_t offset; 4726 vm_page_t m; 4727 4728 /* 4729 * We don't have to "wire" the page again 4730 * bit we still have to "extract" its 4731 * physical page number, after some sanity 4732 * checks. 4733 */ 4734 assert((entry->vme_end - entry->vme_start) 4735 == PAGE_SIZE); 4736 assert(!entry->needs_copy); 4737 assert(!entry->is_sub_map); 4738 assert(entry->object.vm_object); 4739 if (((entry->vme_end - entry->vme_start) 4740 != PAGE_SIZE) || 4741 entry->needs_copy || 4742 entry->is_sub_map || 4743 entry->object.vm_object == VM_OBJECT_NULL) { 4744 rc = KERN_INVALID_ARGUMENT; 4745 goto done; 4746 } 4747 4748 object = entry->object.vm_object; 4749 offset = entry->offset; 4750 /* need exclusive lock to update m->dirty */ 4751 if (entry->protection & VM_PROT_WRITE) { 4752 vm_object_lock(object); 4753 } else { 4754 vm_object_lock_shared(object); 4755 } 4756 m = vm_page_lookup(object, offset); 4757 assert(m != VM_PAGE_NULL); 4758 assert(m->wire_count); 4759 if (m != VM_PAGE_NULL && m->wire_count) { 4760 *physpage_p = m->phys_page; 4761 if (entry->protection & VM_PROT_WRITE) { 4762 vm_object_lock_assert_exclusive( 4763 m->object); 4764 m->dirty = TRUE; 4765 } 4766 } else { 4767 /* not already wired !? */ 4768 *physpage_p = 0; 4769 } 4770 vm_object_unlock(object); 4771 } 4772 4773 /* map was not unlocked: no need to relookup */ 4774 entry = entry->vme_next; 4775 s = entry->vme_start; 4776 continue; 4777 } 4778 4779 /* 4780 * Unwired entry or wire request transmitted via submap 4781 */ 4782 4783 4784 /* 4785 * Perform actions of vm_map_lookup that need the write 4786 * lock on the map: create a shadow object for a 4787 * copy-on-write region, or an object for a zero-fill 4788 * region. 4789 */ 4790 size = entry->vme_end - entry->vme_start; 4791 /* 4792 * If wiring a copy-on-write page, we need to copy it now 4793 * even if we're only (currently) requesting read access. 4794 * This is aggressive, but once it's wired we can't move it. 4795 */ 4796 if (entry->needs_copy) { 4797 if (wire_and_extract) { 4798 /* 4799 * We're supposed to share with the original 4800 * provider so should not be "needs_copy" 4801 */ 4802 rc = KERN_INVALID_ARGUMENT; 4803 goto done; 4804 } 4805 4806 vm_object_shadow(&entry->object.vm_object, 4807 &entry->offset, size); 4808 entry->needs_copy = FALSE; 4809 } else if (entry->object.vm_object == VM_OBJECT_NULL) { 4810 if (wire_and_extract) { 4811 /* 4812 * We're supposed to share with the original 4813 * provider so should already have an object. 4814 */ 4815 rc = KERN_INVALID_ARGUMENT; 4816 goto done; 4817 } 4818 entry->object.vm_object = vm_object_allocate(size); 4819 entry->offset = (vm_object_offset_t)0; 4820 assert(entry->use_pmap); 4821 } 4822 4823 vm_map_clip_start(map, entry, s); 4824 vm_map_clip_end(map, entry, end); 4825 4826 /* re-compute "e" */ 4827 e = entry->vme_end; 4828 if (e > end) 4829 e = end; 4830 4831 /* 4832 * Check for holes and protection mismatch. 4833 * Holes: Next entry should be contiguous unless this 4834 * is the end of the region. 4835 * Protection: Access requested must be allowed, unless 4836 * wiring is by protection class 4837 */ 4838 if ((entry->vme_end < end) && 4839 ((entry->vme_next == vm_map_to_entry(map)) || 4840 (entry->vme_next->vme_start > entry->vme_end))) { 4841 /* found a hole */ 4842 rc = KERN_INVALID_ADDRESS; 4843 goto done; 4844 } 4845 if ((entry->protection & access_type) != access_type) { 4846 /* found a protection problem */ 4847 rc = KERN_PROTECTION_FAILURE; 4848 goto done; 4849 } 4850 4851 assert(entry->wired_count == 0 && entry->user_wired_count == 0); 4852 4853 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 4854 goto done; 4855 4856 entry->in_transition = TRUE; 4857 4858 /* 4859 * This entry might get split once we unlock the map. 4860 * In vm_fault_wire(), we need the current range as 4861 * defined by this entry. In order for this to work 4862 * along with a simultaneous clip operation, we make a 4863 * temporary copy of this entry and use that for the 4864 * wiring. Note that the underlying objects do not 4865 * change during a clip. 4866 */ 4867 tmp_entry = *entry; 4868 4869 /* 4870 * The in_transition state guarentees that the entry 4871 * (or entries for this range, if split occured) will be 4872 * there when the map lock is acquired for the second time. 4873 */ 4874 vm_map_unlock(map); 4875 4876 if (!user_wire && cur_thread != THREAD_NULL) 4877 interruptible_state = thread_interrupt_level(THREAD_UNINT); 4878 else 4879 interruptible_state = THREAD_UNINT; 4880 4881 if(map_pmap) 4882 rc = vm_fault_wire(map, 4883 &tmp_entry, map_pmap, pmap_addr, 4884 physpage_p); 4885 else 4886 rc = vm_fault_wire(map, 4887 &tmp_entry, map->pmap, 4888 tmp_entry.vme_start, 4889 physpage_p); 4890 4891 if (!user_wire && cur_thread != THREAD_NULL) 4892 thread_interrupt_level(interruptible_state); 4893 4894 vm_map_lock(map); 4895 4896 if (last_timestamp+1 != map->timestamp) { 4897 /* 4898 * Find the entry again. It could have been clipped 4899 * after we unlocked the map. 4900 */ 4901 if (!vm_map_lookup_entry(map, tmp_entry.vme_start, 4902 &first_entry)) 4903 panic("vm_map_wire: re-lookup failed"); 4904 4905 entry = first_entry; 4906 } 4907 4908 last_timestamp = map->timestamp; 4909 4910 while ((entry != vm_map_to_entry(map)) && 4911 (entry->vme_start < tmp_entry.vme_end)) { 4912 assert(entry->in_transition); 4913 entry->in_transition = FALSE; 4914 if (entry->needs_wakeup) { 4915 entry->needs_wakeup = FALSE; 4916 need_wakeup = TRUE; 4917 } 4918 if (rc != KERN_SUCCESS) { /* from vm_*_wire */ 4919 subtract_wire_counts(map, entry, user_wire); 4920 } 4921 entry = entry->vme_next; 4922 } 4923 4924 if (rc != KERN_SUCCESS) { /* from vm_*_wire */ 4925 goto done; 4926 } 4927 4928 s = entry->vme_start; 4929 } /* end while loop through map entries */ 4930 4931done: 4932 if (rc == KERN_SUCCESS) { 4933 /* repair any damage we may have made to the VM map */ 4934 vm_map_simplify_range(map, start, end); 4935 } 4936 4937 vm_map_unlock(map); 4938 4939 /* 4940 * wake up anybody waiting on entries we wired. 4941 */ 4942 if (need_wakeup) 4943 vm_map_entry_wakeup(map); 4944 4945 if (rc != KERN_SUCCESS) { 4946 /* undo what has been wired so far */ 4947 vm_map_unwire(map, start, s, user_wire); 4948 if (physpage_p) { 4949 *physpage_p = 0; 4950 } 4951 } 4952 4953 return rc; 4954 4955} 4956 4957kern_return_t 4958vm_map_wire( 4959 register vm_map_t map, 4960 register vm_map_offset_t start, 4961 register vm_map_offset_t end, 4962 register vm_prot_t access_type, 4963 boolean_t user_wire) 4964{ 4965 4966 kern_return_t kret; 4967 4968 kret = vm_map_wire_nested(map, start, end, access_type, 4969 user_wire, (pmap_t)NULL, 0, NULL); 4970 return kret; 4971} 4972 4973kern_return_t 4974vm_map_wire_and_extract( 4975 vm_map_t map, 4976 vm_map_offset_t start, 4977 vm_prot_t access_type, 4978 boolean_t user_wire, 4979 ppnum_t *physpage_p) 4980{ 4981 4982 kern_return_t kret; 4983 4984 kret = vm_map_wire_nested(map, 4985 start, 4986 start+VM_MAP_PAGE_SIZE(map), 4987 access_type, 4988 user_wire, 4989 (pmap_t)NULL, 4990 0, 4991 physpage_p); 4992 if (kret != KERN_SUCCESS && 4993 physpage_p != NULL) { 4994 *physpage_p = 0; 4995 } 4996 return kret; 4997} 4998 4999/* 5000 * vm_map_unwire: 5001 * 5002 * Sets the pageability of the specified address range in the target 5003 * as pageable. Regions specified must have been wired previously. 5004 * 5005 * The map must not be locked, but a reference must remain to the map 5006 * throughout the call. 5007 * 5008 * Kernel will panic on failures. User unwire ignores holes and 5009 * unwired and intransition entries to avoid losing memory by leaving 5010 * it unwired. 5011 */ 5012static kern_return_t 5013vm_map_unwire_nested( 5014 register vm_map_t map, 5015 register vm_map_offset_t start, 5016 register vm_map_offset_t end, 5017 boolean_t user_wire, 5018 pmap_t map_pmap, 5019 vm_map_offset_t pmap_addr) 5020{ 5021 register vm_map_entry_t entry; 5022 struct vm_map_entry *first_entry, tmp_entry; 5023 boolean_t need_wakeup; 5024 boolean_t main_map = FALSE; 5025 unsigned int last_timestamp; 5026 5027 vm_map_lock(map); 5028 if(map_pmap == NULL) 5029 main_map = TRUE; 5030 last_timestamp = map->timestamp; 5031 5032 VM_MAP_RANGE_CHECK(map, start, end); 5033 assert(page_aligned(start)); 5034 assert(page_aligned(end)); 5035 assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map))); 5036 assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map))); 5037 5038 if (start == end) { 5039 /* We unwired what the caller asked for: zero pages */ 5040 vm_map_unlock(map); 5041 return KERN_SUCCESS; 5042 } 5043 5044 if (vm_map_lookup_entry(map, start, &first_entry)) { 5045 entry = first_entry; 5046 /* 5047 * vm_map_clip_start will be done later. 5048 * We don't want to unnest any nested sub maps here ! 5049 */ 5050 } 5051 else { 5052 if (!user_wire) { 5053 panic("vm_map_unwire: start not found"); 5054 } 5055 /* Start address is not in map. */ 5056 vm_map_unlock(map); 5057 return(KERN_INVALID_ADDRESS); 5058 } 5059 5060 if (entry->superpage_size) { 5061 /* superpages are always wired */ 5062 vm_map_unlock(map); 5063 return KERN_INVALID_ADDRESS; 5064 } 5065 5066 need_wakeup = FALSE; 5067 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 5068 if (entry->in_transition) { 5069 /* 5070 * 1) 5071 * Another thread is wiring down this entry. Note 5072 * that if it is not for the other thread we would 5073 * be unwiring an unwired entry. This is not 5074 * permitted. If we wait, we will be unwiring memory 5075 * we did not wire. 5076 * 5077 * 2) 5078 * Another thread is unwiring this entry. We did not 5079 * have a reference to it, because if we did, this 5080 * entry will not be getting unwired now. 5081 */ 5082 if (!user_wire) { 5083 /* 5084 * XXX FBDP 5085 * This could happen: there could be some 5086 * overlapping vslock/vsunlock operations 5087 * going on. 5088 * We should probably just wait and retry, 5089 * but then we have to be careful that this 5090 * entry could get "simplified" after 5091 * "in_transition" gets unset and before 5092 * we re-lookup the entry, so we would 5093 * have to re-clip the entry to avoid 5094 * re-unwiring what we have already unwired... 5095 * See vm_map_wire_nested(). 5096 * 5097 * Or we could just ignore "in_transition" 5098 * here and proceed to decement the wired 5099 * count(s) on this entry. That should be fine 5100 * as long as "wired_count" doesn't drop all 5101 * the way to 0 (and we should panic if THAT 5102 * happens). 5103 */ 5104 panic("vm_map_unwire: in_transition entry"); 5105 } 5106 5107 entry = entry->vme_next; 5108 continue; 5109 } 5110 5111 if (entry->is_sub_map) { 5112 vm_map_offset_t sub_start; 5113 vm_map_offset_t sub_end; 5114 vm_map_offset_t local_end; 5115 pmap_t pmap; 5116 5117 vm_map_clip_start(map, entry, start); 5118 vm_map_clip_end(map, entry, end); 5119 5120 sub_start = entry->offset; 5121 sub_end = entry->vme_end - entry->vme_start; 5122 sub_end += entry->offset; 5123 local_end = entry->vme_end; 5124 if(map_pmap == NULL) { 5125 if(entry->use_pmap) { 5126 pmap = entry->object.sub_map->pmap; 5127 pmap_addr = sub_start; 5128 } else { 5129 pmap = map->pmap; 5130 pmap_addr = start; 5131 } 5132 if (entry->wired_count == 0 || 5133 (user_wire && entry->user_wired_count == 0)) { 5134 if (!user_wire) 5135 panic("vm_map_unwire: entry is unwired"); 5136 entry = entry->vme_next; 5137 continue; 5138 } 5139 5140 /* 5141 * Check for holes 5142 * Holes: Next entry should be contiguous unless 5143 * this is the end of the region. 5144 */ 5145 if (((entry->vme_end < end) && 5146 ((entry->vme_next == vm_map_to_entry(map)) || 5147 (entry->vme_next->vme_start 5148 > entry->vme_end)))) { 5149 if (!user_wire) 5150 panic("vm_map_unwire: non-contiguous region"); 5151/* 5152 entry = entry->vme_next; 5153 continue; 5154*/ 5155 } 5156 5157 subtract_wire_counts(map, entry, user_wire); 5158 5159 if (entry->wired_count != 0) { 5160 entry = entry->vme_next; 5161 continue; 5162 } 5163 5164 entry->in_transition = TRUE; 5165 tmp_entry = *entry;/* see comment in vm_map_wire() */ 5166 5167 /* 5168 * We can unlock the map now. The in_transition state 5169 * guarantees existance of the entry. 5170 */ 5171 vm_map_unlock(map); 5172 vm_map_unwire_nested(entry->object.sub_map, 5173 sub_start, sub_end, user_wire, pmap, pmap_addr); 5174 vm_map_lock(map); 5175 5176 if (last_timestamp+1 != map->timestamp) { 5177 /* 5178 * Find the entry again. It could have been 5179 * clipped or deleted after we unlocked the map. 5180 */ 5181 if (!vm_map_lookup_entry(map, 5182 tmp_entry.vme_start, 5183 &first_entry)) { 5184 if (!user_wire) 5185 panic("vm_map_unwire: re-lookup failed"); 5186 entry = first_entry->vme_next; 5187 } else 5188 entry = first_entry; 5189 } 5190 last_timestamp = map->timestamp; 5191 5192 /* 5193 * clear transition bit for all constituent entries 5194 * that were in the original entry (saved in 5195 * tmp_entry). Also check for waiters. 5196 */ 5197 while ((entry != vm_map_to_entry(map)) && 5198 (entry->vme_start < tmp_entry.vme_end)) { 5199 assert(entry->in_transition); 5200 entry->in_transition = FALSE; 5201 if (entry->needs_wakeup) { 5202 entry->needs_wakeup = FALSE; 5203 need_wakeup = TRUE; 5204 } 5205 entry = entry->vme_next; 5206 } 5207 continue; 5208 } else { 5209 vm_map_unlock(map); 5210 vm_map_unwire_nested(entry->object.sub_map, 5211 sub_start, sub_end, user_wire, map_pmap, 5212 pmap_addr); 5213 vm_map_lock(map); 5214 5215 if (last_timestamp+1 != map->timestamp) { 5216 /* 5217 * Find the entry again. It could have been 5218 * clipped or deleted after we unlocked the map. 5219 */ 5220 if (!vm_map_lookup_entry(map, 5221 tmp_entry.vme_start, 5222 &first_entry)) { 5223 if (!user_wire) 5224 panic("vm_map_unwire: re-lookup failed"); 5225 entry = first_entry->vme_next; 5226 } else 5227 entry = first_entry; 5228 } 5229 last_timestamp = map->timestamp; 5230 } 5231 } 5232 5233 5234 if ((entry->wired_count == 0) || 5235 (user_wire && entry->user_wired_count == 0)) { 5236 if (!user_wire) 5237 panic("vm_map_unwire: entry is unwired"); 5238 5239 entry = entry->vme_next; 5240 continue; 5241 } 5242 5243 assert(entry->wired_count > 0 && 5244 (!user_wire || entry->user_wired_count > 0)); 5245 5246 vm_map_clip_start(map, entry, start); 5247 vm_map_clip_end(map, entry, end); 5248 5249 /* 5250 * Check for holes 5251 * Holes: Next entry should be contiguous unless 5252 * this is the end of the region. 5253 */ 5254 if (((entry->vme_end < end) && 5255 ((entry->vme_next == vm_map_to_entry(map)) || 5256 (entry->vme_next->vme_start > entry->vme_end)))) { 5257 5258 if (!user_wire) 5259 panic("vm_map_unwire: non-contiguous region"); 5260 entry = entry->vme_next; 5261 continue; 5262 } 5263 5264 subtract_wire_counts(map, entry, user_wire); 5265 5266 if (entry->wired_count != 0) { 5267 entry = entry->vme_next; 5268 continue; 5269 } 5270 5271 if(entry->zero_wired_pages) { 5272 entry->zero_wired_pages = FALSE; 5273 } 5274 5275 entry->in_transition = TRUE; 5276 tmp_entry = *entry; /* see comment in vm_map_wire() */ 5277 5278 /* 5279 * We can unlock the map now. The in_transition state 5280 * guarantees existance of the entry. 5281 */ 5282 vm_map_unlock(map); 5283 if(map_pmap) { 5284 vm_fault_unwire(map, 5285 &tmp_entry, FALSE, map_pmap, pmap_addr); 5286 } else { 5287 vm_fault_unwire(map, 5288 &tmp_entry, FALSE, map->pmap, 5289 tmp_entry.vme_start); 5290 } 5291 vm_map_lock(map); 5292 5293 if (last_timestamp+1 != map->timestamp) { 5294 /* 5295 * Find the entry again. It could have been clipped 5296 * or deleted after we unlocked the map. 5297 */ 5298 if (!vm_map_lookup_entry(map, tmp_entry.vme_start, 5299 &first_entry)) { 5300 if (!user_wire) 5301 panic("vm_map_unwire: re-lookup failed"); 5302 entry = first_entry->vme_next; 5303 } else 5304 entry = first_entry; 5305 } 5306 last_timestamp = map->timestamp; 5307 5308 /* 5309 * clear transition bit for all constituent entries that 5310 * were in the original entry (saved in tmp_entry). Also 5311 * check for waiters. 5312 */ 5313 while ((entry != vm_map_to_entry(map)) && 5314 (entry->vme_start < tmp_entry.vme_end)) { 5315 assert(entry->in_transition); 5316 entry->in_transition = FALSE; 5317 if (entry->needs_wakeup) { 5318 entry->needs_wakeup = FALSE; 5319 need_wakeup = TRUE; 5320 } 5321 entry = entry->vme_next; 5322 } 5323 } 5324 5325 /* 5326 * We might have fragmented the address space when we wired this 5327 * range of addresses. Attempt to re-coalesce these VM map entries 5328 * with their neighbors now that they're no longer wired. 5329 * Under some circumstances, address space fragmentation can 5330 * prevent VM object shadow chain collapsing, which can cause 5331 * swap space leaks. 5332 */ 5333 vm_map_simplify_range(map, start, end); 5334 5335 vm_map_unlock(map); 5336 /* 5337 * wake up anybody waiting on entries that we have unwired. 5338 */ 5339 if (need_wakeup) 5340 vm_map_entry_wakeup(map); 5341 return(KERN_SUCCESS); 5342 5343} 5344 5345kern_return_t 5346vm_map_unwire( 5347 register vm_map_t map, 5348 register vm_map_offset_t start, 5349 register vm_map_offset_t end, 5350 boolean_t user_wire) 5351{ 5352 return vm_map_unwire_nested(map, start, end, 5353 user_wire, (pmap_t)NULL, 0); 5354} 5355 5356 5357/* 5358 * vm_map_entry_delete: [ internal use only ] 5359 * 5360 * Deallocate the given entry from the target map. 5361 */ 5362static void 5363vm_map_entry_delete( 5364 register vm_map_t map, 5365 register vm_map_entry_t entry) 5366{ 5367 register vm_map_offset_t s, e; 5368 register vm_object_t object; 5369 register vm_map_t submap; 5370 5371 s = entry->vme_start; 5372 e = entry->vme_end; 5373 assert(page_aligned(s)); 5374 assert(page_aligned(e)); 5375 if (entry->map_aligned == TRUE) { 5376 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map))); 5377 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map))); 5378 } 5379 assert(entry->wired_count == 0); 5380 assert(entry->user_wired_count == 0); 5381 assert(!entry->permanent); 5382 5383 if (entry->is_sub_map) { 5384 object = NULL; 5385 submap = entry->object.sub_map; 5386 } else { 5387 submap = NULL; 5388 object = entry->object.vm_object; 5389 } 5390 5391 vm_map_store_entry_unlink(map, entry); 5392 map->size -= e - s; 5393 5394 vm_map_entry_dispose(map, entry); 5395 5396 vm_map_unlock(map); 5397 /* 5398 * Deallocate the object only after removing all 5399 * pmap entries pointing to its pages. 5400 */ 5401 if (submap) 5402 vm_map_deallocate(submap); 5403 else 5404 vm_object_deallocate(object); 5405 5406} 5407 5408void 5409vm_map_submap_pmap_clean( 5410 vm_map_t map, 5411 vm_map_offset_t start, 5412 vm_map_offset_t end, 5413 vm_map_t sub_map, 5414 vm_map_offset_t offset) 5415{ 5416 vm_map_offset_t submap_start; 5417 vm_map_offset_t submap_end; 5418 vm_map_size_t remove_size; 5419 vm_map_entry_t entry; 5420 5421 submap_end = offset + (end - start); 5422 submap_start = offset; 5423 5424 vm_map_lock_read(sub_map); 5425 if(vm_map_lookup_entry(sub_map, offset, &entry)) { 5426 5427 remove_size = (entry->vme_end - entry->vme_start); 5428 if(offset > entry->vme_start) 5429 remove_size -= offset - entry->vme_start; 5430 5431 5432 if(submap_end < entry->vme_end) { 5433 remove_size -= 5434 entry->vme_end - submap_end; 5435 } 5436 if(entry->is_sub_map) { 5437 vm_map_submap_pmap_clean( 5438 sub_map, 5439 start, 5440 start + remove_size, 5441 entry->object.sub_map, 5442 entry->offset); 5443 } else { 5444 5445 if((map->mapped_in_other_pmaps) && (map->ref_count) 5446 && (entry->object.vm_object != NULL)) { 5447 vm_object_pmap_protect( 5448 entry->object.vm_object, 5449 entry->offset+(offset-entry->vme_start), 5450 remove_size, 5451 PMAP_NULL, 5452 entry->vme_start, 5453 VM_PROT_NONE); 5454 } else { 5455 pmap_remove(map->pmap, 5456 (addr64_t)start, 5457 (addr64_t)(start + remove_size)); 5458 } 5459 } 5460 } 5461 5462 entry = entry->vme_next; 5463 5464 while((entry != vm_map_to_entry(sub_map)) 5465 && (entry->vme_start < submap_end)) { 5466 remove_size = (entry->vme_end - entry->vme_start); 5467 if(submap_end < entry->vme_end) { 5468 remove_size -= entry->vme_end - submap_end; 5469 } 5470 if(entry->is_sub_map) { 5471 vm_map_submap_pmap_clean( 5472 sub_map, 5473 (start + entry->vme_start) - offset, 5474 ((start + entry->vme_start) - offset) + remove_size, 5475 entry->object.sub_map, 5476 entry->offset); 5477 } else { 5478 if((map->mapped_in_other_pmaps) && (map->ref_count) 5479 && (entry->object.vm_object != NULL)) { 5480 vm_object_pmap_protect( 5481 entry->object.vm_object, 5482 entry->offset, 5483 remove_size, 5484 PMAP_NULL, 5485 entry->vme_start, 5486 VM_PROT_NONE); 5487 } else { 5488 pmap_remove(map->pmap, 5489 (addr64_t)((start + entry->vme_start) 5490 - offset), 5491 (addr64_t)(((start + entry->vme_start) 5492 - offset) + remove_size)); 5493 } 5494 } 5495 entry = entry->vme_next; 5496 } 5497 vm_map_unlock_read(sub_map); 5498 return; 5499} 5500 5501/* 5502 * vm_map_delete: [ internal use only ] 5503 * 5504 * Deallocates the given address range from the target map. 5505 * Removes all user wirings. Unwires one kernel wiring if 5506 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go 5507 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps 5508 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set. 5509 * 5510 * This routine is called with map locked and leaves map locked. 5511 */ 5512static kern_return_t 5513vm_map_delete( 5514 vm_map_t map, 5515 vm_map_offset_t start, 5516 vm_map_offset_t end, 5517 int flags, 5518 vm_map_t zap_map) 5519{ 5520 vm_map_entry_t entry, next; 5521 struct vm_map_entry *first_entry, tmp_entry; 5522 register vm_map_offset_t s; 5523 register vm_object_t object; 5524 boolean_t need_wakeup; 5525 unsigned int last_timestamp = ~0; /* unlikely value */ 5526 int interruptible; 5527 5528 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ? 5529 THREAD_ABORTSAFE : THREAD_UNINT; 5530 5531 /* 5532 * All our DMA I/O operations in IOKit are currently done by 5533 * wiring through the map entries of the task requesting the I/O. 5534 * Because of this, we must always wait for kernel wirings 5535 * to go away on the entries before deleting them. 5536 * 5537 * Any caller who wants to actually remove a kernel wiring 5538 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to 5539 * properly remove one wiring instead of blasting through 5540 * them all. 5541 */ 5542 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE; 5543 5544 while(1) { 5545 /* 5546 * Find the start of the region, and clip it 5547 */ 5548 if (vm_map_lookup_entry(map, start, &first_entry)) { 5549 entry = first_entry; 5550 if (map == kalloc_map && 5551 (entry->vme_start != start || 5552 entry->vme_end != end)) { 5553 panic("vm_map_delete(%p,0x%llx,0x%llx): " 5554 "mismatched entry %p [0x%llx:0x%llx]\n", 5555 map, 5556 (uint64_t)start, 5557 (uint64_t)end, 5558 entry, 5559 (uint64_t)entry->vme_start, 5560 (uint64_t)entry->vme_end); 5561 } 5562 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start); 5563 start = SUPERPAGE_ROUND_DOWN(start); 5564 continue; 5565 } 5566 if (start == entry->vme_start) { 5567 /* 5568 * No need to clip. We don't want to cause 5569 * any unnecessary unnesting in this case... 5570 */ 5571 } else { 5572 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) && 5573 entry->map_aligned && 5574 !VM_MAP_PAGE_ALIGNED( 5575 start, 5576 VM_MAP_PAGE_MASK(map))) { 5577 /* 5578 * The entry will no longer be 5579 * map-aligned after clipping 5580 * and the caller said it's OK. 5581 */ 5582 entry->map_aligned = FALSE; 5583 } 5584 if (map == kalloc_map) { 5585 panic("vm_map_delete(%p,0x%llx,0x%llx):" 5586 " clipping %p at 0x%llx\n", 5587 map, 5588 (uint64_t)start, 5589 (uint64_t)end, 5590 entry, 5591 (uint64_t)start); 5592 } 5593 vm_map_clip_start(map, entry, start); 5594 } 5595 5596 /* 5597 * Fix the lookup hint now, rather than each 5598 * time through the loop. 5599 */ 5600 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 5601 } else { 5602 if (map->pmap == kernel_pmap && 5603 map->ref_count != 0) { 5604 panic("vm_map_delete(%p,0x%llx,0x%llx): " 5605 "no map entry at 0x%llx\n", 5606 map, 5607 (uint64_t)start, 5608 (uint64_t)end, 5609 (uint64_t)start); 5610 } 5611 entry = first_entry->vme_next; 5612 } 5613 break; 5614 } 5615 if (entry->superpage_size) 5616 end = SUPERPAGE_ROUND_UP(end); 5617 5618 need_wakeup = FALSE; 5619 /* 5620 * Step through all entries in this region 5621 */ 5622 s = entry->vme_start; 5623 while ((entry != vm_map_to_entry(map)) && (s < end)) { 5624 /* 5625 * At this point, we have deleted all the memory entries 5626 * between "start" and "s". We still need to delete 5627 * all memory entries between "s" and "end". 5628 * While we were blocked and the map was unlocked, some 5629 * new memory entries could have been re-allocated between 5630 * "start" and "s" and we don't want to mess with those. 5631 * Some of those entries could even have been re-assembled 5632 * with an entry after "s" (in vm_map_simplify_entry()), so 5633 * we may have to vm_map_clip_start() again. 5634 */ 5635 5636 if (entry->vme_start >= s) { 5637 /* 5638 * This entry starts on or after "s" 5639 * so no need to clip its start. 5640 */ 5641 } else { 5642 /* 5643 * This entry has been re-assembled by a 5644 * vm_map_simplify_entry(). We need to 5645 * re-clip its start. 5646 */ 5647 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) && 5648 entry->map_aligned && 5649 !VM_MAP_PAGE_ALIGNED(s, 5650 VM_MAP_PAGE_MASK(map))) { 5651 /* 5652 * The entry will no longer be map-aligned 5653 * after clipping and the caller said it's OK. 5654 */ 5655 entry->map_aligned = FALSE; 5656 } 5657 if (map == kalloc_map) { 5658 panic("vm_map_delete(%p,0x%llx,0x%llx): " 5659 "clipping %p at 0x%llx\n", 5660 map, 5661 (uint64_t)start, 5662 (uint64_t)end, 5663 entry, 5664 (uint64_t)s); 5665 } 5666 vm_map_clip_start(map, entry, s); 5667 } 5668 if (entry->vme_end <= end) { 5669 /* 5670 * This entry is going away completely, so no need 5671 * to clip and possibly cause an unnecessary unnesting. 5672 */ 5673 } else { 5674 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) && 5675 entry->map_aligned && 5676 !VM_MAP_PAGE_ALIGNED(end, 5677 VM_MAP_PAGE_MASK(map))) { 5678 /* 5679 * The entry will no longer be map-aligned 5680 * after clipping and the caller said it's OK. 5681 */ 5682 entry->map_aligned = FALSE; 5683 } 5684 if (map == kalloc_map) { 5685 panic("vm_map_delete(%p,0x%llx,0x%llx): " 5686 "clipping %p at 0x%llx\n", 5687 map, 5688 (uint64_t)start, 5689 (uint64_t)end, 5690 entry, 5691 (uint64_t)end); 5692 } 5693 vm_map_clip_end(map, entry, end); 5694 } 5695 5696 if (entry->permanent) { 5697 panic("attempt to remove permanent VM map entry " 5698 "%p [0x%llx:0x%llx]\n", 5699 entry, (uint64_t) s, (uint64_t) end); 5700 } 5701 5702 5703 if (entry->in_transition) { 5704 wait_result_t wait_result; 5705 5706 /* 5707 * Another thread is wiring/unwiring this entry. 5708 * Let the other thread know we are waiting. 5709 */ 5710 assert(s == entry->vme_start); 5711 entry->needs_wakeup = TRUE; 5712 5713 /* 5714 * wake up anybody waiting on entries that we have 5715 * already unwired/deleted. 5716 */ 5717 if (need_wakeup) { 5718 vm_map_entry_wakeup(map); 5719 need_wakeup = FALSE; 5720 } 5721 5722 wait_result = vm_map_entry_wait(map, interruptible); 5723 5724 if (interruptible && 5725 wait_result == THREAD_INTERRUPTED) { 5726 /* 5727 * We do not clear the needs_wakeup flag, 5728 * since we cannot tell if we were the only one. 5729 */ 5730 return KERN_ABORTED; 5731 } 5732 5733 /* 5734 * The entry could have been clipped or it 5735 * may not exist anymore. Look it up again. 5736 */ 5737 if (!vm_map_lookup_entry(map, s, &first_entry)) { 5738 assert((map != kernel_map) && 5739 (!entry->is_sub_map)); 5740 /* 5741 * User: use the next entry 5742 */ 5743 entry = first_entry->vme_next; 5744 s = entry->vme_start; 5745 } else { 5746 entry = first_entry; 5747 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 5748 } 5749 last_timestamp = map->timestamp; 5750 continue; 5751 } /* end in_transition */ 5752 5753 if (entry->wired_count) { 5754 boolean_t user_wire; 5755 5756 user_wire = entry->user_wired_count > 0; 5757 5758 /* 5759 * Remove a kernel wiring if requested 5760 */ 5761 if (flags & VM_MAP_REMOVE_KUNWIRE) { 5762 entry->wired_count--; 5763 } 5764 5765 /* 5766 * Remove all user wirings for proper accounting 5767 */ 5768 if (entry->user_wired_count > 0) { 5769 while (entry->user_wired_count) 5770 subtract_wire_counts(map, entry, user_wire); 5771 } 5772 5773 if (entry->wired_count != 0) { 5774 assert(map != kernel_map); 5775 /* 5776 * Cannot continue. Typical case is when 5777 * a user thread has physical io pending on 5778 * on this page. Either wait for the 5779 * kernel wiring to go away or return an 5780 * error. 5781 */ 5782 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) { 5783 wait_result_t wait_result; 5784 5785 assert(s == entry->vme_start); 5786 entry->needs_wakeup = TRUE; 5787 wait_result = vm_map_entry_wait(map, 5788 interruptible); 5789 5790 if (interruptible && 5791 wait_result == THREAD_INTERRUPTED) { 5792 /* 5793 * We do not clear the 5794 * needs_wakeup flag, since we 5795 * cannot tell if we were the 5796 * only one. 5797 */ 5798 return KERN_ABORTED; 5799 } 5800 5801 /* 5802 * The entry could have been clipped or 5803 * it may not exist anymore. Look it 5804 * up again. 5805 */ 5806 if (!vm_map_lookup_entry(map, s, 5807 &first_entry)) { 5808 assert(map != kernel_map); 5809 /* 5810 * User: use the next entry 5811 */ 5812 entry = first_entry->vme_next; 5813 s = entry->vme_start; 5814 } else { 5815 entry = first_entry; 5816 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 5817 } 5818 last_timestamp = map->timestamp; 5819 continue; 5820 } 5821 else { 5822 return KERN_FAILURE; 5823 } 5824 } 5825 5826 entry->in_transition = TRUE; 5827 /* 5828 * copy current entry. see comment in vm_map_wire() 5829 */ 5830 tmp_entry = *entry; 5831 assert(s == entry->vme_start); 5832 5833 /* 5834 * We can unlock the map now. The in_transition 5835 * state guarentees existance of the entry. 5836 */ 5837 vm_map_unlock(map); 5838 5839 if (tmp_entry.is_sub_map) { 5840 vm_map_t sub_map; 5841 vm_map_offset_t sub_start, sub_end; 5842 pmap_t pmap; 5843 vm_map_offset_t pmap_addr; 5844 5845 5846 sub_map = tmp_entry.object.sub_map; 5847 sub_start = tmp_entry.offset; 5848 sub_end = sub_start + (tmp_entry.vme_end - 5849 tmp_entry.vme_start); 5850 if (tmp_entry.use_pmap) { 5851 pmap = sub_map->pmap; 5852 pmap_addr = tmp_entry.vme_start; 5853 } else { 5854 pmap = map->pmap; 5855 pmap_addr = tmp_entry.vme_start; 5856 } 5857 (void) vm_map_unwire_nested(sub_map, 5858 sub_start, sub_end, 5859 user_wire, 5860 pmap, pmap_addr); 5861 } else { 5862 5863 if (tmp_entry.object.vm_object == kernel_object) { 5864 pmap_protect_options( 5865 map->pmap, 5866 tmp_entry.vme_start, 5867 tmp_entry.vme_end, 5868 VM_PROT_NONE, 5869 PMAP_OPTIONS_REMOVE, 5870 NULL); 5871 } 5872 vm_fault_unwire(map, &tmp_entry, 5873 tmp_entry.object.vm_object == kernel_object, 5874 map->pmap, tmp_entry.vme_start); 5875 } 5876 5877 vm_map_lock(map); 5878 5879 if (last_timestamp+1 != map->timestamp) { 5880 /* 5881 * Find the entry again. It could have 5882 * been clipped after we unlocked the map. 5883 */ 5884 if (!vm_map_lookup_entry(map, s, &first_entry)){ 5885 assert((map != kernel_map) && 5886 (!entry->is_sub_map)); 5887 first_entry = first_entry->vme_next; 5888 s = first_entry->vme_start; 5889 } else { 5890 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 5891 } 5892 } else { 5893 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 5894 first_entry = entry; 5895 } 5896 5897 last_timestamp = map->timestamp; 5898 5899 entry = first_entry; 5900 while ((entry != vm_map_to_entry(map)) && 5901 (entry->vme_start < tmp_entry.vme_end)) { 5902 assert(entry->in_transition); 5903 entry->in_transition = FALSE; 5904 if (entry->needs_wakeup) { 5905 entry->needs_wakeup = FALSE; 5906 need_wakeup = TRUE; 5907 } 5908 entry = entry->vme_next; 5909 } 5910 /* 5911 * We have unwired the entry(s). Go back and 5912 * delete them. 5913 */ 5914 entry = first_entry; 5915 continue; 5916 } 5917 5918 /* entry is unwired */ 5919 assert(entry->wired_count == 0); 5920 assert(entry->user_wired_count == 0); 5921 5922 assert(s == entry->vme_start); 5923 5924 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) { 5925 /* 5926 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to 5927 * vm_map_delete(), some map entries might have been 5928 * transferred to a "zap_map", which doesn't have a 5929 * pmap. The original pmap has already been flushed 5930 * in the vm_map_delete() call targeting the original 5931 * map, but when we get to destroying the "zap_map", 5932 * we don't have any pmap to flush, so let's just skip 5933 * all this. 5934 */ 5935 } else if (entry->is_sub_map) { 5936 if (entry->use_pmap) { 5937#ifndef NO_NESTED_PMAP 5938 pmap_unnest(map->pmap, 5939 (addr64_t)entry->vme_start, 5940 entry->vme_end - entry->vme_start); 5941#endif /* NO_NESTED_PMAP */ 5942 if ((map->mapped_in_other_pmaps) && (map->ref_count)) { 5943 /* clean up parent map/maps */ 5944 vm_map_submap_pmap_clean( 5945 map, entry->vme_start, 5946 entry->vme_end, 5947 entry->object.sub_map, 5948 entry->offset); 5949 } 5950 } else { 5951 vm_map_submap_pmap_clean( 5952 map, entry->vme_start, entry->vme_end, 5953 entry->object.sub_map, 5954 entry->offset); 5955 } 5956 } else if (entry->object.vm_object != kernel_object && 5957 entry->object.vm_object != compressor_object) { 5958 object = entry->object.vm_object; 5959 if ((map->mapped_in_other_pmaps) && (map->ref_count)) { 5960 vm_object_pmap_protect_options( 5961 object, entry->offset, 5962 entry->vme_end - entry->vme_start, 5963 PMAP_NULL, 5964 entry->vme_start, 5965 VM_PROT_NONE, 5966 PMAP_OPTIONS_REMOVE); 5967 } else if ((entry->object.vm_object != 5968 VM_OBJECT_NULL) || 5969 (map->pmap == kernel_pmap)) { 5970 /* Remove translations associated 5971 * with this range unless the entry 5972 * does not have an object, or 5973 * it's the kernel map or a descendant 5974 * since the platform could potentially 5975 * create "backdoor" mappings invisible 5976 * to the VM. It is expected that 5977 * objectless, non-kernel ranges 5978 * do not have such VM invisible 5979 * translations. 5980 */ 5981 pmap_remove_options(map->pmap, 5982 (addr64_t)entry->vme_start, 5983 (addr64_t)entry->vme_end, 5984 PMAP_OPTIONS_REMOVE); 5985 } 5986 } 5987 5988 if (entry->iokit_acct) { 5989 /* alternate accounting */ 5990 vm_map_iokit_unmapped_region(map, 5991 (entry->vme_end - 5992 entry->vme_start)); 5993 entry->iokit_acct = FALSE; 5994 } 5995 5996 /* 5997 * All pmap mappings for this map entry must have been 5998 * cleared by now. 5999 */ 6000#if DEBUG 6001 assert(vm_map_pmap_is_empty(map, 6002 entry->vme_start, 6003 entry->vme_end)); 6004#endif /* DEBUG */ 6005 6006 next = entry->vme_next; 6007 6008 if (map->pmap == kernel_pmap && 6009 map->ref_count != 0 && 6010 entry->vme_end < end && 6011 (next == vm_map_to_entry(map) || 6012 next->vme_start != entry->vme_end)) { 6013 panic("vm_map_delete(%p,0x%llx,0x%llx): " 6014 "hole after %p at 0x%llx\n", 6015 map, 6016 (uint64_t)start, 6017 (uint64_t)end, 6018 entry, 6019 (uint64_t)entry->vme_end); 6020 } 6021 6022 s = next->vme_start; 6023 last_timestamp = map->timestamp; 6024 6025 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) && 6026 zap_map != VM_MAP_NULL) { 6027 vm_map_size_t entry_size; 6028 /* 6029 * The caller wants to save the affected VM map entries 6030 * into the "zap_map". The caller will take care of 6031 * these entries. 6032 */ 6033 /* unlink the entry from "map" ... */ 6034 vm_map_store_entry_unlink(map, entry); 6035 /* ... and add it to the end of the "zap_map" */ 6036 vm_map_store_entry_link(zap_map, 6037 vm_map_last_entry(zap_map), 6038 entry); 6039 entry_size = entry->vme_end - entry->vme_start; 6040 map->size -= entry_size; 6041 zap_map->size += entry_size; 6042 /* we didn't unlock the map, so no timestamp increase */ 6043 last_timestamp--; 6044 } else { 6045 vm_map_entry_delete(map, entry); 6046 /* vm_map_entry_delete unlocks the map */ 6047 vm_map_lock(map); 6048 } 6049 6050 entry = next; 6051 6052 if(entry == vm_map_to_entry(map)) { 6053 break; 6054 } 6055 if (last_timestamp+1 != map->timestamp) { 6056 /* 6057 * we are responsible for deleting everything 6058 * from the give space, if someone has interfered 6059 * we pick up where we left off, back fills should 6060 * be all right for anyone except map_delete and 6061 * we have to assume that the task has been fully 6062 * disabled before we get here 6063 */ 6064 if (!vm_map_lookup_entry(map, s, &entry)){ 6065 entry = entry->vme_next; 6066 s = entry->vme_start; 6067 } else { 6068 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 6069 } 6070 /* 6071 * others can not only allocate behind us, we can 6072 * also see coalesce while we don't have the map lock 6073 */ 6074 if(entry == vm_map_to_entry(map)) { 6075 break; 6076 } 6077 } 6078 last_timestamp = map->timestamp; 6079 } 6080 6081 if (map->wait_for_space) 6082 thread_wakeup((event_t) map); 6083 /* 6084 * wake up anybody waiting on entries that we have already deleted. 6085 */ 6086 if (need_wakeup) 6087 vm_map_entry_wakeup(map); 6088 6089 return KERN_SUCCESS; 6090} 6091 6092/* 6093 * vm_map_remove: 6094 * 6095 * Remove the given address range from the target map. 6096 * This is the exported form of vm_map_delete. 6097 */ 6098kern_return_t 6099vm_map_remove( 6100 register vm_map_t map, 6101 register vm_map_offset_t start, 6102 register vm_map_offset_t end, 6103 register boolean_t flags) 6104{ 6105 register kern_return_t result; 6106 6107 vm_map_lock(map); 6108 VM_MAP_RANGE_CHECK(map, start, end); 6109 /* 6110 * For the zone_map, the kernel controls the allocation/freeing of memory. 6111 * Any free to the zone_map should be within the bounds of the map and 6112 * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a 6113 * free to the zone_map into a no-op, there is a problem and we should 6114 * panic. 6115 */ 6116 if ((map == zone_map) && (start == end)) 6117 panic("Nothing being freed to the zone_map. start = end = %p\n", (void *)start); 6118 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL); 6119 vm_map_unlock(map); 6120 6121 return(result); 6122} 6123 6124 6125/* 6126 * Routine: vm_map_copy_discard 6127 * 6128 * Description: 6129 * Dispose of a map copy object (returned by 6130 * vm_map_copyin). 6131 */ 6132void 6133vm_map_copy_discard( 6134 vm_map_copy_t copy) 6135{ 6136 if (copy == VM_MAP_COPY_NULL) 6137 return; 6138 6139 switch (copy->type) { 6140 case VM_MAP_COPY_ENTRY_LIST: 6141 while (vm_map_copy_first_entry(copy) != 6142 vm_map_copy_to_entry(copy)) { 6143 vm_map_entry_t entry = vm_map_copy_first_entry(copy); 6144 6145 vm_map_copy_entry_unlink(copy, entry); 6146 if (entry->is_sub_map) { 6147 vm_map_deallocate(entry->object.sub_map); 6148 } else { 6149 vm_object_deallocate(entry->object.vm_object); 6150 } 6151 vm_map_copy_entry_dispose(copy, entry); 6152 } 6153 break; 6154 case VM_MAP_COPY_OBJECT: 6155 vm_object_deallocate(copy->cpy_object); 6156 break; 6157 case VM_MAP_COPY_KERNEL_BUFFER: 6158 6159 /* 6160 * The vm_map_copy_t and possibly the data buffer were 6161 * allocated by a single call to kalloc(), i.e. the 6162 * vm_map_copy_t was not allocated out of the zone. 6163 */ 6164 kfree(copy, copy->cpy_kalloc_size); 6165 return; 6166 } 6167 zfree(vm_map_copy_zone, copy); 6168} 6169 6170/* 6171 * Routine: vm_map_copy_copy 6172 * 6173 * Description: 6174 * Move the information in a map copy object to 6175 * a new map copy object, leaving the old one 6176 * empty. 6177 * 6178 * This is used by kernel routines that need 6179 * to look at out-of-line data (in copyin form) 6180 * before deciding whether to return SUCCESS. 6181 * If the routine returns FAILURE, the original 6182 * copy object will be deallocated; therefore, 6183 * these routines must make a copy of the copy 6184 * object and leave the original empty so that 6185 * deallocation will not fail. 6186 */ 6187vm_map_copy_t 6188vm_map_copy_copy( 6189 vm_map_copy_t copy) 6190{ 6191 vm_map_copy_t new_copy; 6192 6193 if (copy == VM_MAP_COPY_NULL) 6194 return VM_MAP_COPY_NULL; 6195 6196 /* 6197 * Allocate a new copy object, and copy the information 6198 * from the old one into it. 6199 */ 6200 6201 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 6202 *new_copy = *copy; 6203 6204 if (copy->type == VM_MAP_COPY_ENTRY_LIST) { 6205 /* 6206 * The links in the entry chain must be 6207 * changed to point to the new copy object. 6208 */ 6209 vm_map_copy_first_entry(copy)->vme_prev 6210 = vm_map_copy_to_entry(new_copy); 6211 vm_map_copy_last_entry(copy)->vme_next 6212 = vm_map_copy_to_entry(new_copy); 6213 } 6214 6215 /* 6216 * Change the old copy object into one that contains 6217 * nothing to be deallocated. 6218 */ 6219 copy->type = VM_MAP_COPY_OBJECT; 6220 copy->cpy_object = VM_OBJECT_NULL; 6221 6222 /* 6223 * Return the new object. 6224 */ 6225 return new_copy; 6226} 6227 6228static kern_return_t 6229vm_map_overwrite_submap_recurse( 6230 vm_map_t dst_map, 6231 vm_map_offset_t dst_addr, 6232 vm_map_size_t dst_size) 6233{ 6234 vm_map_offset_t dst_end; 6235 vm_map_entry_t tmp_entry; 6236 vm_map_entry_t entry; 6237 kern_return_t result; 6238 boolean_t encountered_sub_map = FALSE; 6239 6240 6241 6242 /* 6243 * Verify that the destination is all writeable 6244 * initially. We have to trunc the destination 6245 * address and round the copy size or we'll end up 6246 * splitting entries in strange ways. 6247 */ 6248 6249 dst_end = vm_map_round_page(dst_addr + dst_size, 6250 VM_MAP_PAGE_MASK(dst_map)); 6251 vm_map_lock(dst_map); 6252 6253start_pass_1: 6254 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) { 6255 vm_map_unlock(dst_map); 6256 return(KERN_INVALID_ADDRESS); 6257 } 6258 6259 vm_map_clip_start(dst_map, 6260 tmp_entry, 6261 vm_map_trunc_page(dst_addr, 6262 VM_MAP_PAGE_MASK(dst_map))); 6263 if (tmp_entry->is_sub_map) { 6264 /* clipping did unnest if needed */ 6265 assert(!tmp_entry->use_pmap); 6266 } 6267 6268 for (entry = tmp_entry;;) { 6269 vm_map_entry_t next; 6270 6271 next = entry->vme_next; 6272 while(entry->is_sub_map) { 6273 vm_map_offset_t sub_start; 6274 vm_map_offset_t sub_end; 6275 vm_map_offset_t local_end; 6276 6277 if (entry->in_transition) { 6278 /* 6279 * Say that we are waiting, and wait for entry. 6280 */ 6281 entry->needs_wakeup = TRUE; 6282 vm_map_entry_wait(dst_map, THREAD_UNINT); 6283 6284 goto start_pass_1; 6285 } 6286 6287 encountered_sub_map = TRUE; 6288 sub_start = entry->offset; 6289 6290 if(entry->vme_end < dst_end) 6291 sub_end = entry->vme_end; 6292 else 6293 sub_end = dst_end; 6294 sub_end -= entry->vme_start; 6295 sub_end += entry->offset; 6296 local_end = entry->vme_end; 6297 vm_map_unlock(dst_map); 6298 6299 result = vm_map_overwrite_submap_recurse( 6300 entry->object.sub_map, 6301 sub_start, 6302 sub_end - sub_start); 6303 6304 if(result != KERN_SUCCESS) 6305 return result; 6306 if (dst_end <= entry->vme_end) 6307 return KERN_SUCCESS; 6308 vm_map_lock(dst_map); 6309 if(!vm_map_lookup_entry(dst_map, local_end, 6310 &tmp_entry)) { 6311 vm_map_unlock(dst_map); 6312 return(KERN_INVALID_ADDRESS); 6313 } 6314 entry = tmp_entry; 6315 next = entry->vme_next; 6316 } 6317 6318 if ( ! (entry->protection & VM_PROT_WRITE)) { 6319 vm_map_unlock(dst_map); 6320 return(KERN_PROTECTION_FAILURE); 6321 } 6322 6323 /* 6324 * If the entry is in transition, we must wait 6325 * for it to exit that state. Anything could happen 6326 * when we unlock the map, so start over. 6327 */ 6328 if (entry->in_transition) { 6329 6330 /* 6331 * Say that we are waiting, and wait for entry. 6332 */ 6333 entry->needs_wakeup = TRUE; 6334 vm_map_entry_wait(dst_map, THREAD_UNINT); 6335 6336 goto start_pass_1; 6337 } 6338 6339/* 6340 * our range is contained completely within this map entry 6341 */ 6342 if (dst_end <= entry->vme_end) { 6343 vm_map_unlock(dst_map); 6344 return KERN_SUCCESS; 6345 } 6346/* 6347 * check that range specified is contiguous region 6348 */ 6349 if ((next == vm_map_to_entry(dst_map)) || 6350 (next->vme_start != entry->vme_end)) { 6351 vm_map_unlock(dst_map); 6352 return(KERN_INVALID_ADDRESS); 6353 } 6354 6355 /* 6356 * Check for permanent objects in the destination. 6357 */ 6358 if ((entry->object.vm_object != VM_OBJECT_NULL) && 6359 ((!entry->object.vm_object->internal) || 6360 (entry->object.vm_object->true_share))) { 6361 if(encountered_sub_map) { 6362 vm_map_unlock(dst_map); 6363 return(KERN_FAILURE); 6364 } 6365 } 6366 6367 6368 entry = next; 6369 }/* for */ 6370 vm_map_unlock(dst_map); 6371 return(KERN_SUCCESS); 6372} 6373 6374/* 6375 * Routine: vm_map_copy_overwrite 6376 * 6377 * Description: 6378 * Copy the memory described by the map copy 6379 * object (copy; returned by vm_map_copyin) onto 6380 * the specified destination region (dst_map, dst_addr). 6381 * The destination must be writeable. 6382 * 6383 * Unlike vm_map_copyout, this routine actually 6384 * writes over previously-mapped memory. If the 6385 * previous mapping was to a permanent (user-supplied) 6386 * memory object, it is preserved. 6387 * 6388 * The attributes (protection and inheritance) of the 6389 * destination region are preserved. 6390 * 6391 * If successful, consumes the copy object. 6392 * Otherwise, the caller is responsible for it. 6393 * 6394 * Implementation notes: 6395 * To overwrite aligned temporary virtual memory, it is 6396 * sufficient to remove the previous mapping and insert 6397 * the new copy. This replacement is done either on 6398 * the whole region (if no permanent virtual memory 6399 * objects are embedded in the destination region) or 6400 * in individual map entries. 6401 * 6402 * To overwrite permanent virtual memory , it is necessary 6403 * to copy each page, as the external memory management 6404 * interface currently does not provide any optimizations. 6405 * 6406 * Unaligned memory also has to be copied. It is possible 6407 * to use 'vm_trickery' to copy the aligned data. This is 6408 * not done but not hard to implement. 6409 * 6410 * Once a page of permanent memory has been overwritten, 6411 * it is impossible to interrupt this function; otherwise, 6412 * the call would be neither atomic nor location-independent. 6413 * The kernel-state portion of a user thread must be 6414 * interruptible. 6415 * 6416 * It may be expensive to forward all requests that might 6417 * overwrite permanent memory (vm_write, vm_copy) to 6418 * uninterruptible kernel threads. This routine may be 6419 * called by interruptible threads; however, success is 6420 * not guaranteed -- if the request cannot be performed 6421 * atomically and interruptibly, an error indication is 6422 * returned. 6423 */ 6424 6425static kern_return_t 6426vm_map_copy_overwrite_nested( 6427 vm_map_t dst_map, 6428 vm_map_address_t dst_addr, 6429 vm_map_copy_t copy, 6430 boolean_t interruptible, 6431 pmap_t pmap, 6432 boolean_t discard_on_success) 6433{ 6434 vm_map_offset_t dst_end; 6435 vm_map_entry_t tmp_entry; 6436 vm_map_entry_t entry; 6437 kern_return_t kr; 6438 boolean_t aligned = TRUE; 6439 boolean_t contains_permanent_objects = FALSE; 6440 boolean_t encountered_sub_map = FALSE; 6441 vm_map_offset_t base_addr; 6442 vm_map_size_t copy_size; 6443 vm_map_size_t total_size; 6444 6445 6446 /* 6447 * Check for null copy object. 6448 */ 6449 6450 if (copy == VM_MAP_COPY_NULL) 6451 return(KERN_SUCCESS); 6452 6453 /* 6454 * Check for special kernel buffer allocated 6455 * by new_ipc_kmsg_copyin. 6456 */ 6457 6458 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { 6459 return(vm_map_copyout_kernel_buffer( 6460 dst_map, &dst_addr, 6461 copy, TRUE, discard_on_success)); 6462 } 6463 6464 /* 6465 * Only works for entry lists at the moment. Will 6466 * support page lists later. 6467 */ 6468 6469 assert(copy->type == VM_MAP_COPY_ENTRY_LIST); 6470 6471 if (copy->size == 0) { 6472 if (discard_on_success) 6473 vm_map_copy_discard(copy); 6474 return(KERN_SUCCESS); 6475 } 6476 6477 /* 6478 * Verify that the destination is all writeable 6479 * initially. We have to trunc the destination 6480 * address and round the copy size or we'll end up 6481 * splitting entries in strange ways. 6482 */ 6483 6484 if (!VM_MAP_PAGE_ALIGNED(copy->size, 6485 VM_MAP_PAGE_MASK(dst_map)) || 6486 !VM_MAP_PAGE_ALIGNED(copy->offset, 6487 VM_MAP_PAGE_MASK(dst_map)) || 6488 !VM_MAP_PAGE_ALIGNED(dst_addr, 6489 VM_MAP_PAGE_MASK(dst_map))) 6490 { 6491 aligned = FALSE; 6492 dst_end = vm_map_round_page(dst_addr + copy->size, 6493 VM_MAP_PAGE_MASK(dst_map)); 6494 } else { 6495 dst_end = dst_addr + copy->size; 6496 } 6497 6498 vm_map_lock(dst_map); 6499 6500 /* LP64todo - remove this check when vm_map_commpage64() 6501 * no longer has to stuff in a map_entry for the commpage 6502 * above the map's max_offset. 6503 */ 6504 if (dst_addr >= dst_map->max_offset) { 6505 vm_map_unlock(dst_map); 6506 return(KERN_INVALID_ADDRESS); 6507 } 6508 6509start_pass_1: 6510 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) { 6511 vm_map_unlock(dst_map); 6512 return(KERN_INVALID_ADDRESS); 6513 } 6514 vm_map_clip_start(dst_map, 6515 tmp_entry, 6516 vm_map_trunc_page(dst_addr, 6517 VM_MAP_PAGE_MASK(dst_map))); 6518 for (entry = tmp_entry;;) { 6519 vm_map_entry_t next = entry->vme_next; 6520 6521 while(entry->is_sub_map) { 6522 vm_map_offset_t sub_start; 6523 vm_map_offset_t sub_end; 6524 vm_map_offset_t local_end; 6525 6526 if (entry->in_transition) { 6527 6528 /* 6529 * Say that we are waiting, and wait for entry. 6530 */ 6531 entry->needs_wakeup = TRUE; 6532 vm_map_entry_wait(dst_map, THREAD_UNINT); 6533 6534 goto start_pass_1; 6535 } 6536 6537 local_end = entry->vme_end; 6538 if (!(entry->needs_copy)) { 6539 /* if needs_copy we are a COW submap */ 6540 /* in such a case we just replace so */ 6541 /* there is no need for the follow- */ 6542 /* ing check. */ 6543 encountered_sub_map = TRUE; 6544 sub_start = entry->offset; 6545 6546 if(entry->vme_end < dst_end) 6547 sub_end = entry->vme_end; 6548 else 6549 sub_end = dst_end; 6550 sub_end -= entry->vme_start; 6551 sub_end += entry->offset; 6552 vm_map_unlock(dst_map); 6553 6554 kr = vm_map_overwrite_submap_recurse( 6555 entry->object.sub_map, 6556 sub_start, 6557 sub_end - sub_start); 6558 if(kr != KERN_SUCCESS) 6559 return kr; 6560 vm_map_lock(dst_map); 6561 } 6562 6563 if (dst_end <= entry->vme_end) 6564 goto start_overwrite; 6565 if(!vm_map_lookup_entry(dst_map, local_end, 6566 &entry)) { 6567 vm_map_unlock(dst_map); 6568 return(KERN_INVALID_ADDRESS); 6569 } 6570 next = entry->vme_next; 6571 } 6572 6573 if ( ! (entry->protection & VM_PROT_WRITE)) { 6574 vm_map_unlock(dst_map); 6575 return(KERN_PROTECTION_FAILURE); 6576 } 6577 6578 /* 6579 * If the entry is in transition, we must wait 6580 * for it to exit that state. Anything could happen 6581 * when we unlock the map, so start over. 6582 */ 6583 if (entry->in_transition) { 6584 6585 /* 6586 * Say that we are waiting, and wait for entry. 6587 */ 6588 entry->needs_wakeup = TRUE; 6589 vm_map_entry_wait(dst_map, THREAD_UNINT); 6590 6591 goto start_pass_1; 6592 } 6593 6594/* 6595 * our range is contained completely within this map entry 6596 */ 6597 if (dst_end <= entry->vme_end) 6598 break; 6599/* 6600 * check that range specified is contiguous region 6601 */ 6602 if ((next == vm_map_to_entry(dst_map)) || 6603 (next->vme_start != entry->vme_end)) { 6604 vm_map_unlock(dst_map); 6605 return(KERN_INVALID_ADDRESS); 6606 } 6607 6608 6609 /* 6610 * Check for permanent objects in the destination. 6611 */ 6612 if ((entry->object.vm_object != VM_OBJECT_NULL) && 6613 ((!entry->object.vm_object->internal) || 6614 (entry->object.vm_object->true_share))) { 6615 contains_permanent_objects = TRUE; 6616 } 6617 6618 entry = next; 6619 }/* for */ 6620 6621start_overwrite: 6622 /* 6623 * If there are permanent objects in the destination, then 6624 * the copy cannot be interrupted. 6625 */ 6626 6627 if (interruptible && contains_permanent_objects) { 6628 vm_map_unlock(dst_map); 6629 return(KERN_FAILURE); /* XXX */ 6630 } 6631 6632 /* 6633 * 6634 * Make a second pass, overwriting the data 6635 * At the beginning of each loop iteration, 6636 * the next entry to be overwritten is "tmp_entry" 6637 * (initially, the value returned from the lookup above), 6638 * and the starting address expected in that entry 6639 * is "start". 6640 */ 6641 6642 total_size = copy->size; 6643 if(encountered_sub_map) { 6644 copy_size = 0; 6645 /* re-calculate tmp_entry since we've had the map */ 6646 /* unlocked */ 6647 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) { 6648 vm_map_unlock(dst_map); 6649 return(KERN_INVALID_ADDRESS); 6650 } 6651 } else { 6652 copy_size = copy->size; 6653 } 6654 6655 base_addr = dst_addr; 6656 while(TRUE) { 6657 /* deconstruct the copy object and do in parts */ 6658 /* only in sub_map, interruptable case */ 6659 vm_map_entry_t copy_entry; 6660 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL; 6661 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL; 6662 int nentries; 6663 int remaining_entries = 0; 6664 vm_map_offset_t new_offset = 0; 6665 6666 for (entry = tmp_entry; copy_size == 0;) { 6667 vm_map_entry_t next; 6668 6669 next = entry->vme_next; 6670 6671 /* tmp_entry and base address are moved along */ 6672 /* each time we encounter a sub-map. Otherwise */ 6673 /* entry can outpase tmp_entry, and the copy_size */ 6674 /* may reflect the distance between them */ 6675 /* if the current entry is found to be in transition */ 6676 /* we will start over at the beginning or the last */ 6677 /* encounter of a submap as dictated by base_addr */ 6678 /* we will zero copy_size accordingly. */ 6679 if (entry->in_transition) { 6680 /* 6681 * Say that we are waiting, and wait for entry. 6682 */ 6683 entry->needs_wakeup = TRUE; 6684 vm_map_entry_wait(dst_map, THREAD_UNINT); 6685 6686 if(!vm_map_lookup_entry(dst_map, base_addr, 6687 &tmp_entry)) { 6688 vm_map_unlock(dst_map); 6689 return(KERN_INVALID_ADDRESS); 6690 } 6691 copy_size = 0; 6692 entry = tmp_entry; 6693 continue; 6694 } 6695 if(entry->is_sub_map) { 6696 vm_map_offset_t sub_start; 6697 vm_map_offset_t sub_end; 6698 vm_map_offset_t local_end; 6699 6700 if (entry->needs_copy) { 6701 /* if this is a COW submap */ 6702 /* just back the range with a */ 6703 /* anonymous entry */ 6704 if(entry->vme_end < dst_end) 6705 sub_end = entry->vme_end; 6706 else 6707 sub_end = dst_end; 6708 if(entry->vme_start < base_addr) 6709 sub_start = base_addr; 6710 else 6711 sub_start = entry->vme_start; 6712 vm_map_clip_end( 6713 dst_map, entry, sub_end); 6714 vm_map_clip_start( 6715 dst_map, entry, sub_start); 6716 assert(!entry->use_pmap); 6717 entry->is_sub_map = FALSE; 6718 vm_map_deallocate( 6719 entry->object.sub_map); 6720 entry->object.sub_map = NULL; 6721 entry->is_shared = FALSE; 6722 entry->needs_copy = FALSE; 6723 entry->offset = 0; 6724 /* 6725 * XXX FBDP 6726 * We should propagate the protections 6727 * of the submap entry here instead 6728 * of forcing them to VM_PROT_ALL... 6729 * Or better yet, we should inherit 6730 * the protection of the copy_entry. 6731 */ 6732 entry->protection = VM_PROT_ALL; 6733 entry->max_protection = VM_PROT_ALL; 6734 entry->wired_count = 0; 6735 entry->user_wired_count = 0; 6736 if(entry->inheritance 6737 == VM_INHERIT_SHARE) 6738 entry->inheritance = VM_INHERIT_COPY; 6739 continue; 6740 } 6741 /* first take care of any non-sub_map */ 6742 /* entries to send */ 6743 if(base_addr < entry->vme_start) { 6744 /* stuff to send */ 6745 copy_size = 6746 entry->vme_start - base_addr; 6747 break; 6748 } 6749 sub_start = entry->offset; 6750 6751 if(entry->vme_end < dst_end) 6752 sub_end = entry->vme_end; 6753 else 6754 sub_end = dst_end; 6755 sub_end -= entry->vme_start; 6756 sub_end += entry->offset; 6757 local_end = entry->vme_end; 6758 vm_map_unlock(dst_map); 6759 copy_size = sub_end - sub_start; 6760 6761 /* adjust the copy object */ 6762 if (total_size > copy_size) { 6763 vm_map_size_t local_size = 0; 6764 vm_map_size_t entry_size; 6765 6766 nentries = 1; 6767 new_offset = copy->offset; 6768 copy_entry = vm_map_copy_first_entry(copy); 6769 while(copy_entry != 6770 vm_map_copy_to_entry(copy)){ 6771 entry_size = copy_entry->vme_end - 6772 copy_entry->vme_start; 6773 if((local_size < copy_size) && 6774 ((local_size + entry_size) 6775 >= copy_size)) { 6776 vm_map_copy_clip_end(copy, 6777 copy_entry, 6778 copy_entry->vme_start + 6779 (copy_size - local_size)); 6780 entry_size = copy_entry->vme_end - 6781 copy_entry->vme_start; 6782 local_size += entry_size; 6783 new_offset += entry_size; 6784 } 6785 if(local_size >= copy_size) { 6786 next_copy = copy_entry->vme_next; 6787 copy_entry->vme_next = 6788 vm_map_copy_to_entry(copy); 6789 previous_prev = 6790 copy->cpy_hdr.links.prev; 6791 copy->cpy_hdr.links.prev = copy_entry; 6792 copy->size = copy_size; 6793 remaining_entries = 6794 copy->cpy_hdr.nentries; 6795 remaining_entries -= nentries; 6796 copy->cpy_hdr.nentries = nentries; 6797 break; 6798 } else { 6799 local_size += entry_size; 6800 new_offset += entry_size; 6801 nentries++; 6802 } 6803 copy_entry = copy_entry->vme_next; 6804 } 6805 } 6806 6807 if((entry->use_pmap) && (pmap == NULL)) { 6808 kr = vm_map_copy_overwrite_nested( 6809 entry->object.sub_map, 6810 sub_start, 6811 copy, 6812 interruptible, 6813 entry->object.sub_map->pmap, 6814 TRUE); 6815 } else if (pmap != NULL) { 6816 kr = vm_map_copy_overwrite_nested( 6817 entry->object.sub_map, 6818 sub_start, 6819 copy, 6820 interruptible, pmap, 6821 TRUE); 6822 } else { 6823 kr = vm_map_copy_overwrite_nested( 6824 entry->object.sub_map, 6825 sub_start, 6826 copy, 6827 interruptible, 6828 dst_map->pmap, 6829 TRUE); 6830 } 6831 if(kr != KERN_SUCCESS) { 6832 if(next_copy != NULL) { 6833 copy->cpy_hdr.nentries += 6834 remaining_entries; 6835 copy->cpy_hdr.links.prev->vme_next = 6836 next_copy; 6837 copy->cpy_hdr.links.prev 6838 = previous_prev; 6839 copy->size = total_size; 6840 } 6841 return kr; 6842 } 6843 if (dst_end <= local_end) { 6844 return(KERN_SUCCESS); 6845 } 6846 /* otherwise copy no longer exists, it was */ 6847 /* destroyed after successful copy_overwrite */ 6848 copy = (vm_map_copy_t) 6849 zalloc(vm_map_copy_zone); 6850 vm_map_copy_first_entry(copy) = 6851 vm_map_copy_last_entry(copy) = 6852 vm_map_copy_to_entry(copy); 6853 copy->type = VM_MAP_COPY_ENTRY_LIST; 6854 copy->offset = new_offset; 6855 6856 /* 6857 * XXX FBDP 6858 * this does not seem to deal with 6859 * the VM map store (R&B tree) 6860 */ 6861 6862 total_size -= copy_size; 6863 copy_size = 0; 6864 /* put back remainder of copy in container */ 6865 if(next_copy != NULL) { 6866 copy->cpy_hdr.nentries = remaining_entries; 6867 copy->cpy_hdr.links.next = next_copy; 6868 copy->cpy_hdr.links.prev = previous_prev; 6869 copy->size = total_size; 6870 next_copy->vme_prev = 6871 vm_map_copy_to_entry(copy); 6872 next_copy = NULL; 6873 } 6874 base_addr = local_end; 6875 vm_map_lock(dst_map); 6876 if(!vm_map_lookup_entry(dst_map, 6877 local_end, &tmp_entry)) { 6878 vm_map_unlock(dst_map); 6879 return(KERN_INVALID_ADDRESS); 6880 } 6881 entry = tmp_entry; 6882 continue; 6883 } 6884 if (dst_end <= entry->vme_end) { 6885 copy_size = dst_end - base_addr; 6886 break; 6887 } 6888 6889 if ((next == vm_map_to_entry(dst_map)) || 6890 (next->vme_start != entry->vme_end)) { 6891 vm_map_unlock(dst_map); 6892 return(KERN_INVALID_ADDRESS); 6893 } 6894 6895 entry = next; 6896 }/* for */ 6897 6898 next_copy = NULL; 6899 nentries = 1; 6900 6901 /* adjust the copy object */ 6902 if (total_size > copy_size) { 6903 vm_map_size_t local_size = 0; 6904 vm_map_size_t entry_size; 6905 6906 new_offset = copy->offset; 6907 copy_entry = vm_map_copy_first_entry(copy); 6908 while(copy_entry != vm_map_copy_to_entry(copy)) { 6909 entry_size = copy_entry->vme_end - 6910 copy_entry->vme_start; 6911 if((local_size < copy_size) && 6912 ((local_size + entry_size) 6913 >= copy_size)) { 6914 vm_map_copy_clip_end(copy, copy_entry, 6915 copy_entry->vme_start + 6916 (copy_size - local_size)); 6917 entry_size = copy_entry->vme_end - 6918 copy_entry->vme_start; 6919 local_size += entry_size; 6920 new_offset += entry_size; 6921 } 6922 if(local_size >= copy_size) { 6923 next_copy = copy_entry->vme_next; 6924 copy_entry->vme_next = 6925 vm_map_copy_to_entry(copy); 6926 previous_prev = 6927 copy->cpy_hdr.links.prev; 6928 copy->cpy_hdr.links.prev = copy_entry; 6929 copy->size = copy_size; 6930 remaining_entries = 6931 copy->cpy_hdr.nentries; 6932 remaining_entries -= nentries; 6933 copy->cpy_hdr.nentries = nentries; 6934 break; 6935 } else { 6936 local_size += entry_size; 6937 new_offset += entry_size; 6938 nentries++; 6939 } 6940 copy_entry = copy_entry->vme_next; 6941 } 6942 } 6943 6944 if (aligned) { 6945 pmap_t local_pmap; 6946 6947 if(pmap) 6948 local_pmap = pmap; 6949 else 6950 local_pmap = dst_map->pmap; 6951 6952 if ((kr = vm_map_copy_overwrite_aligned( 6953 dst_map, tmp_entry, copy, 6954 base_addr, local_pmap)) != KERN_SUCCESS) { 6955 if(next_copy != NULL) { 6956 copy->cpy_hdr.nentries += 6957 remaining_entries; 6958 copy->cpy_hdr.links.prev->vme_next = 6959 next_copy; 6960 copy->cpy_hdr.links.prev = 6961 previous_prev; 6962 copy->size += copy_size; 6963 } 6964 return kr; 6965 } 6966 vm_map_unlock(dst_map); 6967 } else { 6968 /* 6969 * Performance gain: 6970 * 6971 * if the copy and dst address are misaligned but the same 6972 * offset within the page we can copy_not_aligned the 6973 * misaligned parts and copy aligned the rest. If they are 6974 * aligned but len is unaligned we simply need to copy 6975 * the end bit unaligned. We'll need to split the misaligned 6976 * bits of the region in this case ! 6977 */ 6978 /* ALWAYS UNLOCKS THE dst_map MAP */ 6979 kr = vm_map_copy_overwrite_unaligned( 6980 dst_map, 6981 tmp_entry, 6982 copy, 6983 base_addr, 6984 discard_on_success); 6985 if (kr != KERN_SUCCESS) { 6986 if(next_copy != NULL) { 6987 copy->cpy_hdr.nentries += 6988 remaining_entries; 6989 copy->cpy_hdr.links.prev->vme_next = 6990 next_copy; 6991 copy->cpy_hdr.links.prev = 6992 previous_prev; 6993 copy->size += copy_size; 6994 } 6995 return kr; 6996 } 6997 } 6998 total_size -= copy_size; 6999 if(total_size == 0) 7000 break; 7001 base_addr += copy_size; 7002 copy_size = 0; 7003 copy->offset = new_offset; 7004 if(next_copy != NULL) { 7005 copy->cpy_hdr.nentries = remaining_entries; 7006 copy->cpy_hdr.links.next = next_copy; 7007 copy->cpy_hdr.links.prev = previous_prev; 7008 next_copy->vme_prev = vm_map_copy_to_entry(copy); 7009 copy->size = total_size; 7010 } 7011 vm_map_lock(dst_map); 7012 while(TRUE) { 7013 if (!vm_map_lookup_entry(dst_map, 7014 base_addr, &tmp_entry)) { 7015 vm_map_unlock(dst_map); 7016 return(KERN_INVALID_ADDRESS); 7017 } 7018 if (tmp_entry->in_transition) { 7019 entry->needs_wakeup = TRUE; 7020 vm_map_entry_wait(dst_map, THREAD_UNINT); 7021 } else { 7022 break; 7023 } 7024 } 7025 vm_map_clip_start(dst_map, 7026 tmp_entry, 7027 vm_map_trunc_page(base_addr, 7028 VM_MAP_PAGE_MASK(dst_map))); 7029 7030 entry = tmp_entry; 7031 } /* while */ 7032 7033 /* 7034 * Throw away the vm_map_copy object 7035 */ 7036 if (discard_on_success) 7037 vm_map_copy_discard(copy); 7038 7039 return(KERN_SUCCESS); 7040}/* vm_map_copy_overwrite */ 7041 7042kern_return_t 7043vm_map_copy_overwrite( 7044 vm_map_t dst_map, 7045 vm_map_offset_t dst_addr, 7046 vm_map_copy_t copy, 7047 boolean_t interruptible) 7048{ 7049 vm_map_size_t head_size, tail_size; 7050 vm_map_copy_t head_copy, tail_copy; 7051 vm_map_offset_t head_addr, tail_addr; 7052 vm_map_entry_t entry; 7053 kern_return_t kr; 7054 7055 head_size = 0; 7056 tail_size = 0; 7057 head_copy = NULL; 7058 tail_copy = NULL; 7059 head_addr = 0; 7060 tail_addr = 0; 7061 7062 if (interruptible || 7063 copy == VM_MAP_COPY_NULL || 7064 copy->type != VM_MAP_COPY_ENTRY_LIST) { 7065 /* 7066 * We can't split the "copy" map if we're interruptible 7067 * or if we don't have a "copy" map... 7068 */ 7069 blunt_copy: 7070 return vm_map_copy_overwrite_nested(dst_map, 7071 dst_addr, 7072 copy, 7073 interruptible, 7074 (pmap_t) NULL, 7075 TRUE); 7076 } 7077 7078 if (copy->size < 3 * PAGE_SIZE) { 7079 /* 7080 * Too small to bother with optimizing... 7081 */ 7082 goto blunt_copy; 7083 } 7084 7085 if ((dst_addr & VM_MAP_PAGE_MASK(dst_map)) != 7086 (copy->offset & VM_MAP_PAGE_MASK(dst_map))) { 7087 /* 7088 * Incompatible mis-alignment of source and destination... 7089 */ 7090 goto blunt_copy; 7091 } 7092 7093 /* 7094 * Proper alignment or identical mis-alignment at the beginning. 7095 * Let's try and do a small unaligned copy first (if needed) 7096 * and then an aligned copy for the rest. 7097 */ 7098 if (!page_aligned(dst_addr)) { 7099 head_addr = dst_addr; 7100 head_size = (VM_MAP_PAGE_SIZE(dst_map) - 7101 (copy->offset & VM_MAP_PAGE_MASK(dst_map))); 7102 } 7103 if (!page_aligned(copy->offset + copy->size)) { 7104 /* 7105 * Mis-alignment at the end. 7106 * Do an aligned copy up to the last page and 7107 * then an unaligned copy for the remaining bytes. 7108 */ 7109 tail_size = ((copy->offset + copy->size) & 7110 VM_MAP_PAGE_MASK(dst_map)); 7111 tail_addr = dst_addr + copy->size - tail_size; 7112 } 7113 7114 if (head_size + tail_size == copy->size) { 7115 /* 7116 * It's all unaligned, no optimization possible... 7117 */ 7118 goto blunt_copy; 7119 } 7120 7121 /* 7122 * Can't optimize if there are any submaps in the 7123 * destination due to the way we free the "copy" map 7124 * progressively in vm_map_copy_overwrite_nested() 7125 * in that case. 7126 */ 7127 vm_map_lock_read(dst_map); 7128 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) { 7129 vm_map_unlock_read(dst_map); 7130 goto blunt_copy; 7131 } 7132 for (; 7133 (entry != vm_map_copy_to_entry(copy) && 7134 entry->vme_start < dst_addr + copy->size); 7135 entry = entry->vme_next) { 7136 if (entry->is_sub_map) { 7137 vm_map_unlock_read(dst_map); 7138 goto blunt_copy; 7139 } 7140 } 7141 vm_map_unlock_read(dst_map); 7142 7143 if (head_size) { 7144 /* 7145 * Unaligned copy of the first "head_size" bytes, to reach 7146 * a page boundary. 7147 */ 7148 7149 /* 7150 * Extract "head_copy" out of "copy". 7151 */ 7152 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 7153 vm_map_copy_first_entry(head_copy) = 7154 vm_map_copy_to_entry(head_copy); 7155 vm_map_copy_last_entry(head_copy) = 7156 vm_map_copy_to_entry(head_copy); 7157 head_copy->type = VM_MAP_COPY_ENTRY_LIST; 7158 head_copy->cpy_hdr.nentries = 0; 7159 head_copy->cpy_hdr.entries_pageable = 7160 copy->cpy_hdr.entries_pageable; 7161 vm_map_store_init(&head_copy->cpy_hdr); 7162 7163 head_copy->offset = copy->offset; 7164 head_copy->size = head_size; 7165 7166 copy->offset += head_size; 7167 copy->size -= head_size; 7168 7169 entry = vm_map_copy_first_entry(copy); 7170 vm_map_copy_clip_end(copy, entry, copy->offset); 7171 vm_map_copy_entry_unlink(copy, entry); 7172 vm_map_copy_entry_link(head_copy, 7173 vm_map_copy_to_entry(head_copy), 7174 entry); 7175 7176 /* 7177 * Do the unaligned copy. 7178 */ 7179 kr = vm_map_copy_overwrite_nested(dst_map, 7180 head_addr, 7181 head_copy, 7182 interruptible, 7183 (pmap_t) NULL, 7184 FALSE); 7185 if (kr != KERN_SUCCESS) 7186 goto done; 7187 } 7188 7189 if (tail_size) { 7190 /* 7191 * Extract "tail_copy" out of "copy". 7192 */ 7193 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 7194 vm_map_copy_first_entry(tail_copy) = 7195 vm_map_copy_to_entry(tail_copy); 7196 vm_map_copy_last_entry(tail_copy) = 7197 vm_map_copy_to_entry(tail_copy); 7198 tail_copy->type = VM_MAP_COPY_ENTRY_LIST; 7199 tail_copy->cpy_hdr.nentries = 0; 7200 tail_copy->cpy_hdr.entries_pageable = 7201 copy->cpy_hdr.entries_pageable; 7202 vm_map_store_init(&tail_copy->cpy_hdr); 7203 7204 tail_copy->offset = copy->offset + copy->size - tail_size; 7205 tail_copy->size = tail_size; 7206 7207 copy->size -= tail_size; 7208 7209 entry = vm_map_copy_last_entry(copy); 7210 vm_map_copy_clip_start(copy, entry, tail_copy->offset); 7211 entry = vm_map_copy_last_entry(copy); 7212 vm_map_copy_entry_unlink(copy, entry); 7213 vm_map_copy_entry_link(tail_copy, 7214 vm_map_copy_last_entry(tail_copy), 7215 entry); 7216 } 7217 7218 /* 7219 * Copy most (or possibly all) of the data. 7220 */ 7221 kr = vm_map_copy_overwrite_nested(dst_map, 7222 dst_addr + head_size, 7223 copy, 7224 interruptible, 7225 (pmap_t) NULL, 7226 FALSE); 7227 if (kr != KERN_SUCCESS) { 7228 goto done; 7229 } 7230 7231 if (tail_size) { 7232 kr = vm_map_copy_overwrite_nested(dst_map, 7233 tail_addr, 7234 tail_copy, 7235 interruptible, 7236 (pmap_t) NULL, 7237 FALSE); 7238 } 7239 7240done: 7241 assert(copy->type == VM_MAP_COPY_ENTRY_LIST); 7242 if (kr == KERN_SUCCESS) { 7243 /* 7244 * Discard all the copy maps. 7245 */ 7246 if (head_copy) { 7247 vm_map_copy_discard(head_copy); 7248 head_copy = NULL; 7249 } 7250 vm_map_copy_discard(copy); 7251 if (tail_copy) { 7252 vm_map_copy_discard(tail_copy); 7253 tail_copy = NULL; 7254 } 7255 } else { 7256 /* 7257 * Re-assemble the original copy map. 7258 */ 7259 if (head_copy) { 7260 entry = vm_map_copy_first_entry(head_copy); 7261 vm_map_copy_entry_unlink(head_copy, entry); 7262 vm_map_copy_entry_link(copy, 7263 vm_map_copy_to_entry(copy), 7264 entry); 7265 copy->offset -= head_size; 7266 copy->size += head_size; 7267 vm_map_copy_discard(head_copy); 7268 head_copy = NULL; 7269 } 7270 if (tail_copy) { 7271 entry = vm_map_copy_last_entry(tail_copy); 7272 vm_map_copy_entry_unlink(tail_copy, entry); 7273 vm_map_copy_entry_link(copy, 7274 vm_map_copy_last_entry(copy), 7275 entry); 7276 copy->size += tail_size; 7277 vm_map_copy_discard(tail_copy); 7278 tail_copy = NULL; 7279 } 7280 } 7281 return kr; 7282} 7283 7284 7285/* 7286 * Routine: vm_map_copy_overwrite_unaligned [internal use only] 7287 * 7288 * Decription: 7289 * Physically copy unaligned data 7290 * 7291 * Implementation: 7292 * Unaligned parts of pages have to be physically copied. We use 7293 * a modified form of vm_fault_copy (which understands none-aligned 7294 * page offsets and sizes) to do the copy. We attempt to copy as 7295 * much memory in one go as possibly, however vm_fault_copy copies 7296 * within 1 memory object so we have to find the smaller of "amount left" 7297 * "source object data size" and "target object data size". With 7298 * unaligned data we don't need to split regions, therefore the source 7299 * (copy) object should be one map entry, the target range may be split 7300 * over multiple map entries however. In any event we are pessimistic 7301 * about these assumptions. 7302 * 7303 * Assumptions: 7304 * dst_map is locked on entry and is return locked on success, 7305 * unlocked on error. 7306 */ 7307 7308static kern_return_t 7309vm_map_copy_overwrite_unaligned( 7310 vm_map_t dst_map, 7311 vm_map_entry_t entry, 7312 vm_map_copy_t copy, 7313 vm_map_offset_t start, 7314 boolean_t discard_on_success) 7315{ 7316 vm_map_entry_t copy_entry; 7317 vm_map_entry_t copy_entry_next; 7318 vm_map_version_t version; 7319 vm_object_t dst_object; 7320 vm_object_offset_t dst_offset; 7321 vm_object_offset_t src_offset; 7322 vm_object_offset_t entry_offset; 7323 vm_map_offset_t entry_end; 7324 vm_map_size_t src_size, 7325 dst_size, 7326 copy_size, 7327 amount_left; 7328 kern_return_t kr = KERN_SUCCESS; 7329 7330 7331 copy_entry = vm_map_copy_first_entry(copy); 7332 7333 vm_map_lock_write_to_read(dst_map); 7334 7335 src_offset = copy->offset - vm_object_trunc_page(copy->offset); 7336 amount_left = copy->size; 7337/* 7338 * unaligned so we never clipped this entry, we need the offset into 7339 * the vm_object not just the data. 7340 */ 7341 while (amount_left > 0) { 7342 7343 if (entry == vm_map_to_entry(dst_map)) { 7344 vm_map_unlock_read(dst_map); 7345 return KERN_INVALID_ADDRESS; 7346 } 7347 7348 /* "start" must be within the current map entry */ 7349 assert ((start>=entry->vme_start) && (start<entry->vme_end)); 7350 7351 dst_offset = start - entry->vme_start; 7352 7353 dst_size = entry->vme_end - start; 7354 7355 src_size = copy_entry->vme_end - 7356 (copy_entry->vme_start + src_offset); 7357 7358 if (dst_size < src_size) { 7359/* 7360 * we can only copy dst_size bytes before 7361 * we have to get the next destination entry 7362 */ 7363 copy_size = dst_size; 7364 } else { 7365/* 7366 * we can only copy src_size bytes before 7367 * we have to get the next source copy entry 7368 */ 7369 copy_size = src_size; 7370 } 7371 7372 if (copy_size > amount_left) { 7373 copy_size = amount_left; 7374 } 7375/* 7376 * Entry needs copy, create a shadow shadow object for 7377 * Copy on write region. 7378 */ 7379 if (entry->needs_copy && 7380 ((entry->protection & VM_PROT_WRITE) != 0)) 7381 { 7382 if (vm_map_lock_read_to_write(dst_map)) { 7383 vm_map_lock_read(dst_map); 7384 goto RetryLookup; 7385 } 7386 vm_object_shadow(&entry->object.vm_object, 7387 &entry->offset, 7388 (vm_map_size_t)(entry->vme_end 7389 - entry->vme_start)); 7390 entry->needs_copy = FALSE; 7391 vm_map_lock_write_to_read(dst_map); 7392 } 7393 dst_object = entry->object.vm_object; 7394/* 7395 * unlike with the virtual (aligned) copy we're going 7396 * to fault on it therefore we need a target object. 7397 */ 7398 if (dst_object == VM_OBJECT_NULL) { 7399 if (vm_map_lock_read_to_write(dst_map)) { 7400 vm_map_lock_read(dst_map); 7401 goto RetryLookup; 7402 } 7403 dst_object = vm_object_allocate((vm_map_size_t) 7404 entry->vme_end - entry->vme_start); 7405 entry->object.vm_object = dst_object; 7406 entry->offset = 0; 7407 assert(entry->use_pmap); 7408 vm_map_lock_write_to_read(dst_map); 7409 } 7410/* 7411 * Take an object reference and unlock map. The "entry" may 7412 * disappear or change when the map is unlocked. 7413 */ 7414 vm_object_reference(dst_object); 7415 version.main_timestamp = dst_map->timestamp; 7416 entry_offset = entry->offset; 7417 entry_end = entry->vme_end; 7418 vm_map_unlock_read(dst_map); 7419/* 7420 * Copy as much as possible in one pass 7421 */ 7422 kr = vm_fault_copy( 7423 copy_entry->object.vm_object, 7424 copy_entry->offset + src_offset, 7425 ©_size, 7426 dst_object, 7427 entry_offset + dst_offset, 7428 dst_map, 7429 &version, 7430 THREAD_UNINT ); 7431 7432 start += copy_size; 7433 src_offset += copy_size; 7434 amount_left -= copy_size; 7435/* 7436 * Release the object reference 7437 */ 7438 vm_object_deallocate(dst_object); 7439/* 7440 * If a hard error occurred, return it now 7441 */ 7442 if (kr != KERN_SUCCESS) 7443 return kr; 7444 7445 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end 7446 || amount_left == 0) 7447 { 7448/* 7449 * all done with this copy entry, dispose. 7450 */ 7451 copy_entry_next = copy_entry->vme_next; 7452 7453 if (discard_on_success) { 7454 vm_map_copy_entry_unlink(copy, copy_entry); 7455 assert(!copy_entry->is_sub_map); 7456 vm_object_deallocate( 7457 copy_entry->object.vm_object); 7458 vm_map_copy_entry_dispose(copy, copy_entry); 7459 } 7460 7461 if (copy_entry_next == vm_map_copy_to_entry(copy) && 7462 amount_left) { 7463/* 7464 * not finished copying but run out of source 7465 */ 7466 return KERN_INVALID_ADDRESS; 7467 } 7468 7469 copy_entry = copy_entry_next; 7470 7471 src_offset = 0; 7472 } 7473 7474 if (amount_left == 0) 7475 return KERN_SUCCESS; 7476 7477 vm_map_lock_read(dst_map); 7478 if (version.main_timestamp == dst_map->timestamp) { 7479 if (start == entry_end) { 7480/* 7481 * destination region is split. Use the version 7482 * information to avoid a lookup in the normal 7483 * case. 7484 */ 7485 entry = entry->vme_next; 7486/* 7487 * should be contiguous. Fail if we encounter 7488 * a hole in the destination. 7489 */ 7490 if (start != entry->vme_start) { 7491 vm_map_unlock_read(dst_map); 7492 return KERN_INVALID_ADDRESS ; 7493 } 7494 } 7495 } else { 7496/* 7497 * Map version check failed. 7498 * we must lookup the entry because somebody 7499 * might have changed the map behind our backs. 7500 */ 7501 RetryLookup: 7502 if (!vm_map_lookup_entry(dst_map, start, &entry)) 7503 { 7504 vm_map_unlock_read(dst_map); 7505 return KERN_INVALID_ADDRESS ; 7506 } 7507 } 7508 }/* while */ 7509 7510 return KERN_SUCCESS; 7511}/* vm_map_copy_overwrite_unaligned */ 7512 7513/* 7514 * Routine: vm_map_copy_overwrite_aligned [internal use only] 7515 * 7516 * Description: 7517 * Does all the vm_trickery possible for whole pages. 7518 * 7519 * Implementation: 7520 * 7521 * If there are no permanent objects in the destination, 7522 * and the source and destination map entry zones match, 7523 * and the destination map entry is not shared, 7524 * then the map entries can be deleted and replaced 7525 * with those from the copy. The following code is the 7526 * basic idea of what to do, but there are lots of annoying 7527 * little details about getting protection and inheritance 7528 * right. Should add protection, inheritance, and sharing checks 7529 * to the above pass and make sure that no wiring is involved. 7530 */ 7531 7532int vm_map_copy_overwrite_aligned_src_not_internal = 0; 7533int vm_map_copy_overwrite_aligned_src_not_symmetric = 0; 7534int vm_map_copy_overwrite_aligned_src_large = 0; 7535 7536static kern_return_t 7537vm_map_copy_overwrite_aligned( 7538 vm_map_t dst_map, 7539 vm_map_entry_t tmp_entry, 7540 vm_map_copy_t copy, 7541 vm_map_offset_t start, 7542 __unused pmap_t pmap) 7543{ 7544 vm_object_t object; 7545 vm_map_entry_t copy_entry; 7546 vm_map_size_t copy_size; 7547 vm_map_size_t size; 7548 vm_map_entry_t entry; 7549 7550 while ((copy_entry = vm_map_copy_first_entry(copy)) 7551 != vm_map_copy_to_entry(copy)) 7552 { 7553 copy_size = (copy_entry->vme_end - copy_entry->vme_start); 7554 7555 entry = tmp_entry; 7556 if (entry->is_sub_map) { 7557 /* unnested when clipped earlier */ 7558 assert(!entry->use_pmap); 7559 } 7560 if (entry == vm_map_to_entry(dst_map)) { 7561 vm_map_unlock(dst_map); 7562 return KERN_INVALID_ADDRESS; 7563 } 7564 size = (entry->vme_end - entry->vme_start); 7565 /* 7566 * Make sure that no holes popped up in the 7567 * address map, and that the protection is 7568 * still valid, in case the map was unlocked 7569 * earlier. 7570 */ 7571 7572 if ((entry->vme_start != start) || ((entry->is_sub_map) 7573 && !entry->needs_copy)) { 7574 vm_map_unlock(dst_map); 7575 return(KERN_INVALID_ADDRESS); 7576 } 7577 assert(entry != vm_map_to_entry(dst_map)); 7578 7579 /* 7580 * Check protection again 7581 */ 7582 7583 if ( ! (entry->protection & VM_PROT_WRITE)) { 7584 vm_map_unlock(dst_map); 7585 return(KERN_PROTECTION_FAILURE); 7586 } 7587 7588 /* 7589 * Adjust to source size first 7590 */ 7591 7592 if (copy_size < size) { 7593 if (entry->map_aligned && 7594 !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size, 7595 VM_MAP_PAGE_MASK(dst_map))) { 7596 /* no longer map-aligned */ 7597 entry->map_aligned = FALSE; 7598 } 7599 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size); 7600 size = copy_size; 7601 } 7602 7603 /* 7604 * Adjust to destination size 7605 */ 7606 7607 if (size < copy_size) { 7608 vm_map_copy_clip_end(copy, copy_entry, 7609 copy_entry->vme_start + size); 7610 copy_size = size; 7611 } 7612 7613 assert((entry->vme_end - entry->vme_start) == size); 7614 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size); 7615 assert((copy_entry->vme_end - copy_entry->vme_start) == size); 7616 7617 /* 7618 * If the destination contains temporary unshared memory, 7619 * we can perform the copy by throwing it away and 7620 * installing the source data. 7621 */ 7622 7623 object = entry->object.vm_object; 7624 if ((!entry->is_shared && 7625 ((object == VM_OBJECT_NULL) || 7626 (object->internal && !object->true_share))) || 7627 entry->needs_copy) { 7628 vm_object_t old_object = entry->object.vm_object; 7629 vm_object_offset_t old_offset = entry->offset; 7630 vm_object_offset_t offset; 7631 7632 /* 7633 * Ensure that the source and destination aren't 7634 * identical 7635 */ 7636 if (old_object == copy_entry->object.vm_object && 7637 old_offset == copy_entry->offset) { 7638 vm_map_copy_entry_unlink(copy, copy_entry); 7639 vm_map_copy_entry_dispose(copy, copy_entry); 7640 7641 if (old_object != VM_OBJECT_NULL) 7642 vm_object_deallocate(old_object); 7643 7644 start = tmp_entry->vme_end; 7645 tmp_entry = tmp_entry->vme_next; 7646 continue; 7647 } 7648 7649#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */ 7650#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */ 7651 if (copy_entry->object.vm_object != VM_OBJECT_NULL && 7652 copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE && 7653 copy_size <= __TRADEOFF1_COPY_SIZE) { 7654 /* 7655 * Virtual vs. Physical copy tradeoff #1. 7656 * 7657 * Copying only a few pages out of a large 7658 * object: do a physical copy instead of 7659 * a virtual copy, to avoid possibly keeping 7660 * the entire large object alive because of 7661 * those few copy-on-write pages. 7662 */ 7663 vm_map_copy_overwrite_aligned_src_large++; 7664 goto slow_copy; 7665 } 7666 7667 if (entry->alias >= VM_MEMORY_MALLOC && 7668 entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) { 7669 vm_object_t new_object, new_shadow; 7670 7671 /* 7672 * We're about to map something over a mapping 7673 * established by malloc()... 7674 */ 7675 new_object = copy_entry->object.vm_object; 7676 if (new_object != VM_OBJECT_NULL) { 7677 vm_object_lock_shared(new_object); 7678 } 7679 while (new_object != VM_OBJECT_NULL && 7680 !new_object->true_share && 7681 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && 7682 new_object->internal) { 7683 new_shadow = new_object->shadow; 7684 if (new_shadow == VM_OBJECT_NULL) { 7685 break; 7686 } 7687 vm_object_lock_shared(new_shadow); 7688 vm_object_unlock(new_object); 7689 new_object = new_shadow; 7690 } 7691 if (new_object != VM_OBJECT_NULL) { 7692 if (!new_object->internal) { 7693 /* 7694 * The new mapping is backed 7695 * by an external object. We 7696 * don't want malloc'ed memory 7697 * to be replaced with such a 7698 * non-anonymous mapping, so 7699 * let's go off the optimized 7700 * path... 7701 */ 7702 vm_map_copy_overwrite_aligned_src_not_internal++; 7703 vm_object_unlock(new_object); 7704 goto slow_copy; 7705 } 7706 if (new_object->true_share || 7707 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { 7708 /* 7709 * Same if there's a "true_share" 7710 * object in the shadow chain, or 7711 * an object with a non-default 7712 * (SYMMETRIC) copy strategy. 7713 */ 7714 vm_map_copy_overwrite_aligned_src_not_symmetric++; 7715 vm_object_unlock(new_object); 7716 goto slow_copy; 7717 } 7718 vm_object_unlock(new_object); 7719 } 7720 /* 7721 * The new mapping is still backed by 7722 * anonymous (internal) memory, so it's 7723 * OK to substitute it for the original 7724 * malloc() mapping. 7725 */ 7726 } 7727 7728 if (old_object != VM_OBJECT_NULL) { 7729 if(entry->is_sub_map) { 7730 if(entry->use_pmap) { 7731#ifndef NO_NESTED_PMAP 7732 pmap_unnest(dst_map->pmap, 7733 (addr64_t)entry->vme_start, 7734 entry->vme_end - entry->vme_start); 7735#endif /* NO_NESTED_PMAP */ 7736 if(dst_map->mapped_in_other_pmaps) { 7737 /* clean up parent */ 7738 /* map/maps */ 7739 vm_map_submap_pmap_clean( 7740 dst_map, entry->vme_start, 7741 entry->vme_end, 7742 entry->object.sub_map, 7743 entry->offset); 7744 } 7745 } else { 7746 vm_map_submap_pmap_clean( 7747 dst_map, entry->vme_start, 7748 entry->vme_end, 7749 entry->object.sub_map, 7750 entry->offset); 7751 } 7752 vm_map_deallocate( 7753 entry->object.sub_map); 7754 } else { 7755 if(dst_map->mapped_in_other_pmaps) { 7756 vm_object_pmap_protect_options( 7757 entry->object.vm_object, 7758 entry->offset, 7759 entry->vme_end 7760 - entry->vme_start, 7761 PMAP_NULL, 7762 entry->vme_start, 7763 VM_PROT_NONE, 7764 PMAP_OPTIONS_REMOVE); 7765 } else { 7766 pmap_remove_options( 7767 dst_map->pmap, 7768 (addr64_t)(entry->vme_start), 7769 (addr64_t)(entry->vme_end), 7770 PMAP_OPTIONS_REMOVE); 7771 } 7772 vm_object_deallocate(old_object); 7773 } 7774 } 7775 7776 entry->is_sub_map = FALSE; 7777 entry->object = copy_entry->object; 7778 object = entry->object.vm_object; 7779 entry->needs_copy = copy_entry->needs_copy; 7780 entry->wired_count = 0; 7781 entry->user_wired_count = 0; 7782 offset = entry->offset = copy_entry->offset; 7783 7784 vm_map_copy_entry_unlink(copy, copy_entry); 7785 vm_map_copy_entry_dispose(copy, copy_entry); 7786 7787 /* 7788 * we could try to push pages into the pmap at this point, BUT 7789 * this optimization only saved on average 2 us per page if ALL 7790 * the pages in the source were currently mapped 7791 * and ALL the pages in the dest were touched, if there were fewer 7792 * than 2/3 of the pages touched, this optimization actually cost more cycles 7793 * it also puts a lot of pressure on the pmap layer w/r to mapping structures 7794 */ 7795 7796 /* 7797 * Set up for the next iteration. The map 7798 * has not been unlocked, so the next 7799 * address should be at the end of this 7800 * entry, and the next map entry should be 7801 * the one following it. 7802 */ 7803 7804 start = tmp_entry->vme_end; 7805 tmp_entry = tmp_entry->vme_next; 7806 } else { 7807 vm_map_version_t version; 7808 vm_object_t dst_object; 7809 vm_object_offset_t dst_offset; 7810 kern_return_t r; 7811 7812 slow_copy: 7813 if (entry->needs_copy) { 7814 vm_object_shadow(&entry->object.vm_object, 7815 &entry->offset, 7816 (entry->vme_end - 7817 entry->vme_start)); 7818 entry->needs_copy = FALSE; 7819 } 7820 7821 dst_object = entry->object.vm_object; 7822 dst_offset = entry->offset; 7823 7824 /* 7825 * Take an object reference, and record 7826 * the map version information so that the 7827 * map can be safely unlocked. 7828 */ 7829 7830 if (dst_object == VM_OBJECT_NULL) { 7831 /* 7832 * We would usually have just taken the 7833 * optimized path above if the destination 7834 * object has not been allocated yet. But we 7835 * now disable that optimization if the copy 7836 * entry's object is not backed by anonymous 7837 * memory to avoid replacing malloc'ed 7838 * (i.e. re-usable) anonymous memory with a 7839 * not-so-anonymous mapping. 7840 * So we have to handle this case here and 7841 * allocate a new VM object for this map entry. 7842 */ 7843 dst_object = vm_object_allocate( 7844 entry->vme_end - entry->vme_start); 7845 dst_offset = 0; 7846 entry->object.vm_object = dst_object; 7847 entry->offset = dst_offset; 7848 assert(entry->use_pmap); 7849 7850 } 7851 7852 vm_object_reference(dst_object); 7853 7854 /* account for unlock bumping up timestamp */ 7855 version.main_timestamp = dst_map->timestamp + 1; 7856 7857 vm_map_unlock(dst_map); 7858 7859 /* 7860 * Copy as much as possible in one pass 7861 */ 7862 7863 copy_size = size; 7864 r = vm_fault_copy( 7865 copy_entry->object.vm_object, 7866 copy_entry->offset, 7867 ©_size, 7868 dst_object, 7869 dst_offset, 7870 dst_map, 7871 &version, 7872 THREAD_UNINT ); 7873 7874 /* 7875 * Release the object reference 7876 */ 7877 7878 vm_object_deallocate(dst_object); 7879 7880 /* 7881 * If a hard error occurred, return it now 7882 */ 7883 7884 if (r != KERN_SUCCESS) 7885 return(r); 7886 7887 if (copy_size != 0) { 7888 /* 7889 * Dispose of the copied region 7890 */ 7891 7892 vm_map_copy_clip_end(copy, copy_entry, 7893 copy_entry->vme_start + copy_size); 7894 vm_map_copy_entry_unlink(copy, copy_entry); 7895 vm_object_deallocate(copy_entry->object.vm_object); 7896 vm_map_copy_entry_dispose(copy, copy_entry); 7897 } 7898 7899 /* 7900 * Pick up in the destination map where we left off. 7901 * 7902 * Use the version information to avoid a lookup 7903 * in the normal case. 7904 */ 7905 7906 start += copy_size; 7907 vm_map_lock(dst_map); 7908 if (version.main_timestamp == dst_map->timestamp && 7909 copy_size != 0) { 7910 /* We can safely use saved tmp_entry value */ 7911 7912 if (tmp_entry->map_aligned && 7913 !VM_MAP_PAGE_ALIGNED( 7914 start, 7915 VM_MAP_PAGE_MASK(dst_map))) { 7916 /* no longer map-aligned */ 7917 tmp_entry->map_aligned = FALSE; 7918 } 7919 vm_map_clip_end(dst_map, tmp_entry, start); 7920 tmp_entry = tmp_entry->vme_next; 7921 } else { 7922 /* Must do lookup of tmp_entry */ 7923 7924 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) { 7925 vm_map_unlock(dst_map); 7926 return(KERN_INVALID_ADDRESS); 7927 } 7928 if (tmp_entry->map_aligned && 7929 !VM_MAP_PAGE_ALIGNED( 7930 start, 7931 VM_MAP_PAGE_MASK(dst_map))) { 7932 /* no longer map-aligned */ 7933 tmp_entry->map_aligned = FALSE; 7934 } 7935 vm_map_clip_start(dst_map, tmp_entry, start); 7936 } 7937 } 7938 }/* while */ 7939 7940 return(KERN_SUCCESS); 7941}/* vm_map_copy_overwrite_aligned */ 7942 7943/* 7944 * Routine: vm_map_copyin_kernel_buffer [internal use only] 7945 * 7946 * Description: 7947 * Copy in data to a kernel buffer from space in the 7948 * source map. The original space may be optionally 7949 * deallocated. 7950 * 7951 * If successful, returns a new copy object. 7952 */ 7953static kern_return_t 7954vm_map_copyin_kernel_buffer( 7955 vm_map_t src_map, 7956 vm_map_offset_t src_addr, 7957 vm_map_size_t len, 7958 boolean_t src_destroy, 7959 vm_map_copy_t *copy_result) 7960{ 7961 kern_return_t kr; 7962 vm_map_copy_t copy; 7963 vm_size_t kalloc_size; 7964 7965 if ((vm_size_t) len != len) { 7966 /* "len" is too big and doesn't fit in a "vm_size_t" */ 7967 return KERN_RESOURCE_SHORTAGE; 7968 } 7969 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len); 7970 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len); 7971 7972 copy = (vm_map_copy_t) kalloc(kalloc_size); 7973 if (copy == VM_MAP_COPY_NULL) { 7974 return KERN_RESOURCE_SHORTAGE; 7975 } 7976 copy->type = VM_MAP_COPY_KERNEL_BUFFER; 7977 copy->size = len; 7978 copy->offset = 0; 7979 copy->cpy_kdata = (void *) (copy + 1); 7980 copy->cpy_kalloc_size = kalloc_size; 7981 7982 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len); 7983 if (kr != KERN_SUCCESS) { 7984 kfree(copy, kalloc_size); 7985 return kr; 7986 } 7987 if (src_destroy) { 7988 (void) vm_map_remove( 7989 src_map, 7990 vm_map_trunc_page(src_addr, 7991 VM_MAP_PAGE_MASK(src_map)), 7992 vm_map_round_page(src_addr + len, 7993 VM_MAP_PAGE_MASK(src_map)), 7994 (VM_MAP_REMOVE_INTERRUPTIBLE | 7995 VM_MAP_REMOVE_WAIT_FOR_KWIRE | 7996 (src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : 0)); 7997 } 7998 *copy_result = copy; 7999 return KERN_SUCCESS; 8000} 8001 8002/* 8003 * Routine: vm_map_copyout_kernel_buffer [internal use only] 8004 * 8005 * Description: 8006 * Copy out data from a kernel buffer into space in the 8007 * destination map. The space may be otpionally dynamically 8008 * allocated. 8009 * 8010 * If successful, consumes the copy object. 8011 * Otherwise, the caller is responsible for it. 8012 */ 8013static int vm_map_copyout_kernel_buffer_failures = 0; 8014static kern_return_t 8015vm_map_copyout_kernel_buffer( 8016 vm_map_t map, 8017 vm_map_address_t *addr, /* IN/OUT */ 8018 vm_map_copy_t copy, 8019 boolean_t overwrite, 8020 boolean_t consume_on_success) 8021{ 8022 kern_return_t kr = KERN_SUCCESS; 8023 thread_t thread = current_thread(); 8024 8025 if (!overwrite) { 8026 8027 /* 8028 * Allocate space in the target map for the data 8029 */ 8030 *addr = 0; 8031 kr = vm_map_enter(map, 8032 addr, 8033 vm_map_round_page(copy->size, 8034 VM_MAP_PAGE_MASK(map)), 8035 (vm_map_offset_t) 0, 8036 VM_FLAGS_ANYWHERE, 8037 VM_OBJECT_NULL, 8038 (vm_object_offset_t) 0, 8039 FALSE, 8040 VM_PROT_DEFAULT, 8041 VM_PROT_ALL, 8042 VM_INHERIT_DEFAULT); 8043 if (kr != KERN_SUCCESS) 8044 return kr; 8045 } 8046 8047 /* 8048 * Copyout the data from the kernel buffer to the target map. 8049 */ 8050 if (thread->map == map) { 8051 8052 /* 8053 * If the target map is the current map, just do 8054 * the copy. 8055 */ 8056 assert((vm_size_t) copy->size == copy->size); 8057 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) { 8058 kr = KERN_INVALID_ADDRESS; 8059 } 8060 } 8061 else { 8062 vm_map_t oldmap; 8063 8064 /* 8065 * If the target map is another map, assume the 8066 * target's address space identity for the duration 8067 * of the copy. 8068 */ 8069 vm_map_reference(map); 8070 oldmap = vm_map_switch(map); 8071 8072 assert((vm_size_t) copy->size == copy->size); 8073 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) { 8074 vm_map_copyout_kernel_buffer_failures++; 8075 kr = KERN_INVALID_ADDRESS; 8076 } 8077 8078 (void) vm_map_switch(oldmap); 8079 vm_map_deallocate(map); 8080 } 8081 8082 if (kr != KERN_SUCCESS) { 8083 /* the copy failed, clean up */ 8084 if (!overwrite) { 8085 /* 8086 * Deallocate the space we allocated in the target map. 8087 */ 8088 (void) vm_map_remove( 8089 map, 8090 vm_map_trunc_page(*addr, 8091 VM_MAP_PAGE_MASK(map)), 8092 vm_map_round_page((*addr + 8093 vm_map_round_page(copy->size, 8094 VM_MAP_PAGE_MASK(map))), 8095 VM_MAP_PAGE_MASK(map)), 8096 VM_MAP_NO_FLAGS); 8097 *addr = 0; 8098 } 8099 } else { 8100 /* copy was successful, dicard the copy structure */ 8101 if (consume_on_success) { 8102 kfree(copy, copy->cpy_kalloc_size); 8103 } 8104 } 8105 8106 return kr; 8107} 8108 8109/* 8110 * Macro: vm_map_copy_insert 8111 * 8112 * Description: 8113 * Link a copy chain ("copy") into a map at the 8114 * specified location (after "where"). 8115 * Side effects: 8116 * The copy chain is destroyed. 8117 * Warning: 8118 * The arguments are evaluated multiple times. 8119 */ 8120#define vm_map_copy_insert(map, where, copy) \ 8121MACRO_BEGIN \ 8122 vm_map_store_copy_insert(map, where, copy); \ 8123 zfree(vm_map_copy_zone, copy); \ 8124MACRO_END 8125 8126void 8127vm_map_copy_remap( 8128 vm_map_t map, 8129 vm_map_entry_t where, 8130 vm_map_copy_t copy, 8131 vm_map_offset_t adjustment, 8132 vm_prot_t cur_prot, 8133 vm_prot_t max_prot, 8134 vm_inherit_t inheritance) 8135{ 8136 vm_map_entry_t copy_entry, new_entry; 8137 8138 for (copy_entry = vm_map_copy_first_entry(copy); 8139 copy_entry != vm_map_copy_to_entry(copy); 8140 copy_entry = copy_entry->vme_next) { 8141 /* get a new VM map entry for the map */ 8142 new_entry = vm_map_entry_create(map, 8143 !map->hdr.entries_pageable); 8144 /* copy the "copy entry" to the new entry */ 8145 vm_map_entry_copy(new_entry, copy_entry); 8146 /* adjust "start" and "end" */ 8147 new_entry->vme_start += adjustment; 8148 new_entry->vme_end += adjustment; 8149 /* clear some attributes */ 8150 new_entry->inheritance = inheritance; 8151 new_entry->protection = cur_prot; 8152 new_entry->max_protection = max_prot; 8153 new_entry->behavior = VM_BEHAVIOR_DEFAULT; 8154 /* take an extra reference on the entry's "object" */ 8155 if (new_entry->is_sub_map) { 8156 assert(!new_entry->use_pmap); /* not nested */ 8157 vm_map_lock(new_entry->object.sub_map); 8158 vm_map_reference(new_entry->object.sub_map); 8159 vm_map_unlock(new_entry->object.sub_map); 8160 } else { 8161 vm_object_reference(new_entry->object.vm_object); 8162 } 8163 /* insert the new entry in the map */ 8164 vm_map_store_entry_link(map, where, new_entry); 8165 /* continue inserting the "copy entries" after the new entry */ 8166 where = new_entry; 8167 } 8168} 8169 8170/* 8171 * Routine: vm_map_copyout 8172 * 8173 * Description: 8174 * Copy out a copy chain ("copy") into newly-allocated 8175 * space in the destination map. 8176 * 8177 * If successful, consumes the copy object. 8178 * Otherwise, the caller is responsible for it. 8179 */ 8180 8181kern_return_t 8182vm_map_copyout( 8183 vm_map_t dst_map, 8184 vm_map_address_t *dst_addr, /* OUT */ 8185 vm_map_copy_t copy) 8186{ 8187 return vm_map_copyout_internal(dst_map, dst_addr, copy, 8188 TRUE, /* consume_on_success */ 8189 VM_PROT_DEFAULT, 8190 VM_PROT_ALL, 8191 VM_INHERIT_DEFAULT); 8192} 8193 8194kern_return_t 8195vm_map_copyout_internal( 8196 vm_map_t dst_map, 8197 vm_map_address_t *dst_addr, /* OUT */ 8198 vm_map_copy_t copy, 8199 boolean_t consume_on_success, 8200 vm_prot_t cur_protection, 8201 vm_prot_t max_protection, 8202 vm_inherit_t inheritance) 8203{ 8204 vm_map_size_t size; 8205 vm_map_size_t adjustment; 8206 vm_map_offset_t start; 8207 vm_object_offset_t vm_copy_start; 8208 vm_map_entry_t last; 8209 vm_map_entry_t entry; 8210 8211 /* 8212 * Check for null copy object. 8213 */ 8214 8215 if (copy == VM_MAP_COPY_NULL) { 8216 *dst_addr = 0; 8217 return(KERN_SUCCESS); 8218 } 8219 8220 /* 8221 * Check for special copy object, created 8222 * by vm_map_copyin_object. 8223 */ 8224 8225 if (copy->type == VM_MAP_COPY_OBJECT) { 8226 vm_object_t object = copy->cpy_object; 8227 kern_return_t kr; 8228 vm_object_offset_t offset; 8229 8230 offset = vm_object_trunc_page(copy->offset); 8231 size = vm_map_round_page((copy->size + 8232 (vm_map_size_t)(copy->offset - 8233 offset)), 8234 VM_MAP_PAGE_MASK(dst_map)); 8235 *dst_addr = 0; 8236 kr = vm_map_enter(dst_map, dst_addr, size, 8237 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, 8238 object, offset, FALSE, 8239 VM_PROT_DEFAULT, VM_PROT_ALL, 8240 VM_INHERIT_DEFAULT); 8241 if (kr != KERN_SUCCESS) 8242 return(kr); 8243 /* Account for non-pagealigned copy object */ 8244 *dst_addr += (vm_map_offset_t)(copy->offset - offset); 8245 if (consume_on_success) 8246 zfree(vm_map_copy_zone, copy); 8247 return(KERN_SUCCESS); 8248 } 8249 8250 /* 8251 * Check for special kernel buffer allocated 8252 * by new_ipc_kmsg_copyin. 8253 */ 8254 8255 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { 8256 return vm_map_copyout_kernel_buffer(dst_map, dst_addr, 8257 copy, FALSE, 8258 consume_on_success); 8259 } 8260 8261 8262 /* 8263 * Find space for the data 8264 */ 8265 8266 vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset, 8267 VM_MAP_COPY_PAGE_MASK(copy)); 8268 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size, 8269 VM_MAP_COPY_PAGE_MASK(copy)) 8270 - vm_copy_start; 8271 8272 8273StartAgain: ; 8274 8275 vm_map_lock(dst_map); 8276 if( dst_map->disable_vmentry_reuse == TRUE) { 8277 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start); 8278 last = entry; 8279 } else { 8280 assert(first_free_is_valid(dst_map)); 8281 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ? 8282 vm_map_min(dst_map) : last->vme_end; 8283 start = vm_map_round_page(start, 8284 VM_MAP_PAGE_MASK(dst_map)); 8285 } 8286 8287 while (TRUE) { 8288 vm_map_entry_t next = last->vme_next; 8289 vm_map_offset_t end = start + size; 8290 8291 if ((end > dst_map->max_offset) || (end < start)) { 8292 if (dst_map->wait_for_space) { 8293 if (size <= (dst_map->max_offset - dst_map->min_offset)) { 8294 assert_wait((event_t) dst_map, 8295 THREAD_INTERRUPTIBLE); 8296 vm_map_unlock(dst_map); 8297 thread_block(THREAD_CONTINUE_NULL); 8298 goto StartAgain; 8299 } 8300 } 8301 vm_map_unlock(dst_map); 8302 return(KERN_NO_SPACE); 8303 } 8304 8305 if ((next == vm_map_to_entry(dst_map)) || 8306 (next->vme_start >= end)) 8307 break; 8308 8309 last = next; 8310 start = last->vme_end; 8311 start = vm_map_round_page(start, 8312 VM_MAP_PAGE_MASK(dst_map)); 8313 } 8314 8315 adjustment = start - vm_copy_start; 8316 if (! consume_on_success) { 8317 /* 8318 * We're not allowed to consume "copy", so we'll have to 8319 * copy its map entries into the destination map below. 8320 * No need to re-allocate map entries from the correct 8321 * (pageable or not) zone, since we'll get new map entries 8322 * during the transfer. 8323 * We'll also adjust the map entries's "start" and "end" 8324 * during the transfer, to keep "copy"'s entries consistent 8325 * with its "offset". 8326 */ 8327 goto after_adjustments; 8328 } 8329 8330 /* 8331 * Since we're going to just drop the map 8332 * entries from the copy into the destination 8333 * map, they must come from the same pool. 8334 */ 8335 8336 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) { 8337 /* 8338 * Mismatches occur when dealing with the default 8339 * pager. 8340 */ 8341 zone_t old_zone; 8342 vm_map_entry_t next, new; 8343 8344 /* 8345 * Find the zone that the copies were allocated from 8346 */ 8347 8348 entry = vm_map_copy_first_entry(copy); 8349 8350 /* 8351 * Reinitialize the copy so that vm_map_copy_entry_link 8352 * will work. 8353 */ 8354 vm_map_store_copy_reset(copy, entry); 8355 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable; 8356 8357 /* 8358 * Copy each entry. 8359 */ 8360 while (entry != vm_map_copy_to_entry(copy)) { 8361 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); 8362 vm_map_entry_copy_full(new, entry); 8363 assert(!new->iokit_acct); 8364 if (new->is_sub_map) { 8365 /* clr address space specifics */ 8366 new->use_pmap = FALSE; 8367 } 8368 vm_map_copy_entry_link(copy, 8369 vm_map_copy_last_entry(copy), 8370 new); 8371 next = entry->vme_next; 8372 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone; 8373 zfree(old_zone, entry); 8374 entry = next; 8375 } 8376 } 8377 8378 /* 8379 * Adjust the addresses in the copy chain, and 8380 * reset the region attributes. 8381 */ 8382 8383 for (entry = vm_map_copy_first_entry(copy); 8384 entry != vm_map_copy_to_entry(copy); 8385 entry = entry->vme_next) { 8386 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) { 8387 /* 8388 * We're injecting this copy entry into a map that 8389 * has the standard page alignment, so clear 8390 * "map_aligned" (which might have been inherited 8391 * from the original map entry). 8392 */ 8393 entry->map_aligned = FALSE; 8394 } 8395 8396 entry->vme_start += adjustment; 8397 entry->vme_end += adjustment; 8398 8399 if (entry->map_aligned) { 8400 assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, 8401 VM_MAP_PAGE_MASK(dst_map))); 8402 assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, 8403 VM_MAP_PAGE_MASK(dst_map))); 8404 } 8405 8406 entry->inheritance = VM_INHERIT_DEFAULT; 8407 entry->protection = VM_PROT_DEFAULT; 8408 entry->max_protection = VM_PROT_ALL; 8409 entry->behavior = VM_BEHAVIOR_DEFAULT; 8410 8411 /* 8412 * If the entry is now wired, 8413 * map the pages into the destination map. 8414 */ 8415 if (entry->wired_count != 0) { 8416 register vm_map_offset_t va; 8417 vm_object_offset_t offset; 8418 register vm_object_t object; 8419 vm_prot_t prot; 8420 int type_of_fault; 8421 8422 object = entry->object.vm_object; 8423 offset = entry->offset; 8424 va = entry->vme_start; 8425 8426 pmap_pageable(dst_map->pmap, 8427 entry->vme_start, 8428 entry->vme_end, 8429 TRUE); 8430 8431 while (va < entry->vme_end) { 8432 register vm_page_t m; 8433 8434 /* 8435 * Look up the page in the object. 8436 * Assert that the page will be found in the 8437 * top object: 8438 * either 8439 * the object was newly created by 8440 * vm_object_copy_slowly, and has 8441 * copies of all of the pages from 8442 * the source object 8443 * or 8444 * the object was moved from the old 8445 * map entry; because the old map 8446 * entry was wired, all of the pages 8447 * were in the top-level object. 8448 * (XXX not true if we wire pages for 8449 * reading) 8450 */ 8451 vm_object_lock(object); 8452 8453 m = vm_page_lookup(object, offset); 8454 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) || 8455 m->absent) 8456 panic("vm_map_copyout: wiring %p", m); 8457 8458 /* 8459 * ENCRYPTED SWAP: 8460 * The page is assumed to be wired here, so it 8461 * shouldn't be encrypted. Otherwise, we 8462 * couldn't enter it in the page table, since 8463 * we don't want the user to see the encrypted 8464 * data. 8465 */ 8466 ASSERT_PAGE_DECRYPTED(m); 8467 8468 prot = entry->protection; 8469 8470 if (override_nx(dst_map, entry->alias) && prot) 8471 prot |= VM_PROT_EXECUTE; 8472 8473 type_of_fault = DBG_CACHE_HIT_FAULT; 8474 8475 vm_fault_enter(m, dst_map->pmap, va, prot, prot, 8476 VM_PAGE_WIRED(m), FALSE, FALSE, 8477 FALSE, entry->alias, 8478 ((entry->iokit_acct || 8479 (!entry->is_sub_map && 8480 !entry->use_pmap)) 8481 ? PMAP_OPTIONS_ALT_ACCT 8482 : 0), 8483 NULL, &type_of_fault); 8484 8485 vm_object_unlock(object); 8486 8487 offset += PAGE_SIZE_64; 8488 va += PAGE_SIZE; 8489 } 8490 } 8491 } 8492 8493after_adjustments: 8494 8495 /* 8496 * Correct the page alignment for the result 8497 */ 8498 8499 *dst_addr = start + (copy->offset - vm_copy_start); 8500 8501 /* 8502 * Update the hints and the map size 8503 */ 8504 8505 if (consume_on_success) { 8506 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy)); 8507 } else { 8508 SAVE_HINT_MAP_WRITE(dst_map, last); 8509 } 8510 8511 dst_map->size += size; 8512 8513 /* 8514 * Link in the copy 8515 */ 8516 8517 if (consume_on_success) { 8518 vm_map_copy_insert(dst_map, last, copy); 8519 } else { 8520 vm_map_copy_remap(dst_map, last, copy, adjustment, 8521 cur_protection, max_protection, 8522 inheritance); 8523 } 8524 8525 vm_map_unlock(dst_map); 8526 8527 /* 8528 * XXX If wiring_required, call vm_map_pageable 8529 */ 8530 8531 return(KERN_SUCCESS); 8532} 8533 8534/* 8535 * Routine: vm_map_copyin 8536 * 8537 * Description: 8538 * see vm_map_copyin_common. Exported via Unsupported.exports. 8539 * 8540 */ 8541 8542#undef vm_map_copyin 8543 8544kern_return_t 8545vm_map_copyin( 8546 vm_map_t src_map, 8547 vm_map_address_t src_addr, 8548 vm_map_size_t len, 8549 boolean_t src_destroy, 8550 vm_map_copy_t *copy_result) /* OUT */ 8551{ 8552 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy, 8553 FALSE, copy_result, FALSE)); 8554} 8555 8556/* 8557 * Routine: vm_map_copyin_common 8558 * 8559 * Description: 8560 * Copy the specified region (src_addr, len) from the 8561 * source address space (src_map), possibly removing 8562 * the region from the source address space (src_destroy). 8563 * 8564 * Returns: 8565 * A vm_map_copy_t object (copy_result), suitable for 8566 * insertion into another address space (using vm_map_copyout), 8567 * copying over another address space region (using 8568 * vm_map_copy_overwrite). If the copy is unused, it 8569 * should be destroyed (using vm_map_copy_discard). 8570 * 8571 * In/out conditions: 8572 * The source map should not be locked on entry. 8573 */ 8574 8575typedef struct submap_map { 8576 vm_map_t parent_map; 8577 vm_map_offset_t base_start; 8578 vm_map_offset_t base_end; 8579 vm_map_size_t base_len; 8580 struct submap_map *next; 8581} submap_map_t; 8582 8583kern_return_t 8584vm_map_copyin_common( 8585 vm_map_t src_map, 8586 vm_map_address_t src_addr, 8587 vm_map_size_t len, 8588 boolean_t src_destroy, 8589 __unused boolean_t src_volatile, 8590 vm_map_copy_t *copy_result, /* OUT */ 8591 boolean_t use_maxprot) 8592{ 8593 vm_map_entry_t tmp_entry; /* Result of last map lookup -- 8594 * in multi-level lookup, this 8595 * entry contains the actual 8596 * vm_object/offset. 8597 */ 8598 register 8599 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */ 8600 8601 vm_map_offset_t src_start; /* Start of current entry -- 8602 * where copy is taking place now 8603 */ 8604 vm_map_offset_t src_end; /* End of entire region to be 8605 * copied */ 8606 vm_map_offset_t src_base; 8607 vm_map_t base_map = src_map; 8608 boolean_t map_share=FALSE; 8609 submap_map_t *parent_maps = NULL; 8610 8611 register 8612 vm_map_copy_t copy; /* Resulting copy */ 8613 vm_map_address_t copy_addr; 8614 vm_map_size_t copy_size; 8615 8616 /* 8617 * Check for copies of zero bytes. 8618 */ 8619 8620 if (len == 0) { 8621 *copy_result = VM_MAP_COPY_NULL; 8622 return(KERN_SUCCESS); 8623 } 8624 8625 /* 8626 * Check that the end address doesn't overflow 8627 */ 8628 src_end = src_addr + len; 8629 if (src_end < src_addr) 8630 return KERN_INVALID_ADDRESS; 8631 8632 /* 8633 * If the copy is sufficiently small, use a kernel buffer instead 8634 * of making a virtual copy. The theory being that the cost of 8635 * setting up VM (and taking C-O-W faults) dominates the copy costs 8636 * for small regions. 8637 */ 8638 if ((len < msg_ool_size_small) && !use_maxprot) 8639 return vm_map_copyin_kernel_buffer(src_map, src_addr, len, 8640 src_destroy, copy_result); 8641 8642 /* 8643 * Compute (page aligned) start and end of region 8644 */ 8645 src_start = vm_map_trunc_page(src_addr, 8646 VM_MAP_PAGE_MASK(src_map)); 8647 src_end = vm_map_round_page(src_end, 8648 VM_MAP_PAGE_MASK(src_map)); 8649 8650 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0); 8651 8652 /* 8653 * Allocate a header element for the list. 8654 * 8655 * Use the start and end in the header to 8656 * remember the endpoints prior to rounding. 8657 */ 8658 8659 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 8660 vm_map_copy_first_entry(copy) = 8661 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); 8662 copy->type = VM_MAP_COPY_ENTRY_LIST; 8663 copy->cpy_hdr.nentries = 0; 8664 copy->cpy_hdr.entries_pageable = TRUE; 8665#if 00 8666 copy->cpy_hdr.page_shift = src_map->hdr.page_shift; 8667#else 8668 /* 8669 * The copy entries can be broken down for a variety of reasons, 8670 * so we can't guarantee that they will remain map-aligned... 8671 * Will need to adjust the first copy_entry's "vme_start" and 8672 * the last copy_entry's "vme_end" to be rounded to PAGE_MASK 8673 * rather than the original map's alignment. 8674 */ 8675 copy->cpy_hdr.page_shift = PAGE_SHIFT; 8676#endif 8677 8678 vm_map_store_init( &(copy->cpy_hdr) ); 8679 8680 copy->offset = src_addr; 8681 copy->size = len; 8682 8683 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); 8684 8685#define RETURN(x) \ 8686 MACRO_BEGIN \ 8687 vm_map_unlock(src_map); \ 8688 if(src_map != base_map) \ 8689 vm_map_deallocate(src_map); \ 8690 if (new_entry != VM_MAP_ENTRY_NULL) \ 8691 vm_map_copy_entry_dispose(copy,new_entry); \ 8692 vm_map_copy_discard(copy); \ 8693 { \ 8694 submap_map_t *_ptr; \ 8695 \ 8696 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \ 8697 parent_maps=parent_maps->next; \ 8698 if (_ptr->parent_map != base_map) \ 8699 vm_map_deallocate(_ptr->parent_map); \ 8700 kfree(_ptr, sizeof(submap_map_t)); \ 8701 } \ 8702 } \ 8703 MACRO_RETURN(x); \ 8704 MACRO_END 8705 8706 /* 8707 * Find the beginning of the region. 8708 */ 8709 8710 vm_map_lock(src_map); 8711 8712 /* 8713 * Lookup the original "src_addr" rather than the truncated 8714 * "src_start", in case "src_start" falls in a non-map-aligned 8715 * map entry *before* the map entry that contains "src_addr"... 8716 */ 8717 if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) 8718 RETURN(KERN_INVALID_ADDRESS); 8719 if(!tmp_entry->is_sub_map) { 8720 /* 8721 * ... but clip to the map-rounded "src_start" rather than 8722 * "src_addr" to preserve map-alignment. We'll adjust the 8723 * first copy entry at the end, if needed. 8724 */ 8725 vm_map_clip_start(src_map, tmp_entry, src_start); 8726 } 8727 if (src_start < tmp_entry->vme_start) { 8728 /* 8729 * Move "src_start" up to the start of the 8730 * first map entry to copy. 8731 */ 8732 src_start = tmp_entry->vme_start; 8733 } 8734 /* set for later submap fix-up */ 8735 copy_addr = src_start; 8736 8737 /* 8738 * Go through entries until we get to the end. 8739 */ 8740 8741 while (TRUE) { 8742 register 8743 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */ 8744 vm_map_size_t src_size; /* Size of source 8745 * map entry (in both 8746 * maps) 8747 */ 8748 8749 register 8750 vm_object_t src_object; /* Object to copy */ 8751 vm_object_offset_t src_offset; 8752 8753 boolean_t src_needs_copy; /* Should source map 8754 * be made read-only 8755 * for copy-on-write? 8756 */ 8757 8758 boolean_t new_entry_needs_copy; /* Will new entry be COW? */ 8759 8760 boolean_t was_wired; /* Was source wired? */ 8761 vm_map_version_t version; /* Version before locks 8762 * dropped to make copy 8763 */ 8764 kern_return_t result; /* Return value from 8765 * copy_strategically. 8766 */ 8767 while(tmp_entry->is_sub_map) { 8768 vm_map_size_t submap_len; 8769 submap_map_t *ptr; 8770 8771 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t)); 8772 ptr->next = parent_maps; 8773 parent_maps = ptr; 8774 ptr->parent_map = src_map; 8775 ptr->base_start = src_start; 8776 ptr->base_end = src_end; 8777 submap_len = tmp_entry->vme_end - src_start; 8778 if(submap_len > (src_end-src_start)) 8779 submap_len = src_end-src_start; 8780 ptr->base_len = submap_len; 8781 8782 src_start -= tmp_entry->vme_start; 8783 src_start += tmp_entry->offset; 8784 src_end = src_start + submap_len; 8785 src_map = tmp_entry->object.sub_map; 8786 vm_map_lock(src_map); 8787 /* keep an outstanding reference for all maps in */ 8788 /* the parents tree except the base map */ 8789 vm_map_reference(src_map); 8790 vm_map_unlock(ptr->parent_map); 8791 if (!vm_map_lookup_entry( 8792 src_map, src_start, &tmp_entry)) 8793 RETURN(KERN_INVALID_ADDRESS); 8794 map_share = TRUE; 8795 if(!tmp_entry->is_sub_map) 8796 vm_map_clip_start(src_map, tmp_entry, src_start); 8797 src_entry = tmp_entry; 8798 } 8799 /* we are now in the lowest level submap... */ 8800 8801 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) && 8802 (tmp_entry->object.vm_object->phys_contiguous)) { 8803 /* This is not, supported for now.In future */ 8804 /* we will need to detect the phys_contig */ 8805 /* condition and then upgrade copy_slowly */ 8806 /* to do physical copy from the device mem */ 8807 /* based object. We can piggy-back off of */ 8808 /* the was wired boolean to set-up the */ 8809 /* proper handling */ 8810 RETURN(KERN_PROTECTION_FAILURE); 8811 } 8812 /* 8813 * Create a new address map entry to hold the result. 8814 * Fill in the fields from the appropriate source entries. 8815 * We must unlock the source map to do this if we need 8816 * to allocate a map entry. 8817 */ 8818 if (new_entry == VM_MAP_ENTRY_NULL) { 8819 version.main_timestamp = src_map->timestamp; 8820 vm_map_unlock(src_map); 8821 8822 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); 8823 8824 vm_map_lock(src_map); 8825 if ((version.main_timestamp + 1) != src_map->timestamp) { 8826 if (!vm_map_lookup_entry(src_map, src_start, 8827 &tmp_entry)) { 8828 RETURN(KERN_INVALID_ADDRESS); 8829 } 8830 if (!tmp_entry->is_sub_map) 8831 vm_map_clip_start(src_map, tmp_entry, src_start); 8832 continue; /* restart w/ new tmp_entry */ 8833 } 8834 } 8835 8836 /* 8837 * Verify that the region can be read. 8838 */ 8839 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE && 8840 !use_maxprot) || 8841 (src_entry->max_protection & VM_PROT_READ) == 0) 8842 RETURN(KERN_PROTECTION_FAILURE); 8843 8844 /* 8845 * Clip against the endpoints of the entire region. 8846 */ 8847 8848 vm_map_clip_end(src_map, src_entry, src_end); 8849 8850 src_size = src_entry->vme_end - src_start; 8851 src_object = src_entry->object.vm_object; 8852 src_offset = src_entry->offset; 8853 was_wired = (src_entry->wired_count != 0); 8854 8855 vm_map_entry_copy(new_entry, src_entry); 8856 if (new_entry->is_sub_map) { 8857 /* clr address space specifics */ 8858 new_entry->use_pmap = FALSE; 8859 } 8860 8861 /* 8862 * Attempt non-blocking copy-on-write optimizations. 8863 */ 8864 8865 if (src_destroy && 8866 (src_object == VM_OBJECT_NULL || 8867 (src_object->internal && !src_object->true_share 8868 && !map_share))) { 8869 /* 8870 * If we are destroying the source, and the object 8871 * is internal, we can move the object reference 8872 * from the source to the copy. The copy is 8873 * copy-on-write only if the source is. 8874 * We make another reference to the object, because 8875 * destroying the source entry will deallocate it. 8876 */ 8877 vm_object_reference(src_object); 8878 8879 /* 8880 * Copy is always unwired. vm_map_copy_entry 8881 * set its wired count to zero. 8882 */ 8883 8884 goto CopySuccessful; 8885 } 8886 8887 8888 RestartCopy: 8889 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n", 8890 src_object, new_entry, new_entry->object.vm_object, 8891 was_wired, 0); 8892 if ((src_object == VM_OBJECT_NULL || 8893 (!was_wired && !map_share && !tmp_entry->is_shared)) && 8894 vm_object_copy_quickly( 8895 &new_entry->object.vm_object, 8896 src_offset, 8897 src_size, 8898 &src_needs_copy, 8899 &new_entry_needs_copy)) { 8900 8901 new_entry->needs_copy = new_entry_needs_copy; 8902 8903 /* 8904 * Handle copy-on-write obligations 8905 */ 8906 8907 if (src_needs_copy && !tmp_entry->needs_copy) { 8908 vm_prot_t prot; 8909 8910 prot = src_entry->protection & ~VM_PROT_WRITE; 8911 8912 if (override_nx(src_map, src_entry->alias) && prot) 8913 prot |= VM_PROT_EXECUTE; 8914 8915 vm_object_pmap_protect( 8916 src_object, 8917 src_offset, 8918 src_size, 8919 (src_entry->is_shared ? 8920 PMAP_NULL 8921 : src_map->pmap), 8922 src_entry->vme_start, 8923 prot); 8924 8925 tmp_entry->needs_copy = TRUE; 8926 } 8927 8928 /* 8929 * The map has never been unlocked, so it's safe 8930 * to move to the next entry rather than doing 8931 * another lookup. 8932 */ 8933 8934 goto CopySuccessful; 8935 } 8936 8937 /* 8938 * Take an object reference, so that we may 8939 * release the map lock(s). 8940 */ 8941 8942 assert(src_object != VM_OBJECT_NULL); 8943 vm_object_reference(src_object); 8944 8945 /* 8946 * Record the timestamp for later verification. 8947 * Unlock the map. 8948 */ 8949 8950 version.main_timestamp = src_map->timestamp; 8951 vm_map_unlock(src_map); /* Increments timestamp once! */ 8952 8953 /* 8954 * Perform the copy 8955 */ 8956 8957 if (was_wired) { 8958 CopySlowly: 8959 vm_object_lock(src_object); 8960 result = vm_object_copy_slowly( 8961 src_object, 8962 src_offset, 8963 src_size, 8964 THREAD_UNINT, 8965 &new_entry->object.vm_object); 8966 new_entry->offset = 0; 8967 new_entry->needs_copy = FALSE; 8968 8969 } 8970 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && 8971 (tmp_entry->is_shared || map_share)) { 8972 vm_object_t new_object; 8973 8974 vm_object_lock_shared(src_object); 8975 new_object = vm_object_copy_delayed( 8976 src_object, 8977 src_offset, 8978 src_size, 8979 TRUE); 8980 if (new_object == VM_OBJECT_NULL) 8981 goto CopySlowly; 8982 8983 new_entry->object.vm_object = new_object; 8984 new_entry->needs_copy = TRUE; 8985 assert(!new_entry->iokit_acct); 8986 assert(new_object->purgable == VM_PURGABLE_DENY); 8987 new_entry->use_pmap = TRUE; 8988 result = KERN_SUCCESS; 8989 8990 } else { 8991 result = vm_object_copy_strategically(src_object, 8992 src_offset, 8993 src_size, 8994 &new_entry->object.vm_object, 8995 &new_entry->offset, 8996 &new_entry_needs_copy); 8997 8998 new_entry->needs_copy = new_entry_needs_copy; 8999 } 9000 9001 if (result != KERN_SUCCESS && 9002 result != KERN_MEMORY_RESTART_COPY) { 9003 vm_map_lock(src_map); 9004 RETURN(result); 9005 } 9006 9007 /* 9008 * Throw away the extra reference 9009 */ 9010 9011 vm_object_deallocate(src_object); 9012 9013 /* 9014 * Verify that the map has not substantially 9015 * changed while the copy was being made. 9016 */ 9017 9018 vm_map_lock(src_map); 9019 9020 if ((version.main_timestamp + 1) == src_map->timestamp) 9021 goto VerificationSuccessful; 9022 9023 /* 9024 * Simple version comparison failed. 9025 * 9026 * Retry the lookup and verify that the 9027 * same object/offset are still present. 9028 * 9029 * [Note: a memory manager that colludes with 9030 * the calling task can detect that we have 9031 * cheated. While the map was unlocked, the 9032 * mapping could have been changed and restored.] 9033 */ 9034 9035 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) { 9036 if (result != KERN_MEMORY_RESTART_COPY) { 9037 vm_object_deallocate(new_entry->object.vm_object); 9038 new_entry->object.vm_object = VM_OBJECT_NULL; 9039 assert(!new_entry->iokit_acct); 9040 new_entry->use_pmap = TRUE; 9041 } 9042 RETURN(KERN_INVALID_ADDRESS); 9043 } 9044 9045 src_entry = tmp_entry; 9046 vm_map_clip_start(src_map, src_entry, src_start); 9047 9048 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) && 9049 !use_maxprot) || 9050 ((src_entry->max_protection & VM_PROT_READ) == 0)) 9051 goto VerificationFailed; 9052 9053 if (src_entry->vme_end < new_entry->vme_end) { 9054 assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end, 9055 VM_MAP_COPY_PAGE_MASK(copy))); 9056 new_entry->vme_end = src_entry->vme_end; 9057 src_size = new_entry->vme_end - src_start; 9058 } 9059 9060 if ((src_entry->object.vm_object != src_object) || 9061 (src_entry->offset != src_offset) ) { 9062 9063 /* 9064 * Verification failed. 9065 * 9066 * Start over with this top-level entry. 9067 */ 9068 9069 VerificationFailed: ; 9070 9071 vm_object_deallocate(new_entry->object.vm_object); 9072 tmp_entry = src_entry; 9073 continue; 9074 } 9075 9076 /* 9077 * Verification succeeded. 9078 */ 9079 9080 VerificationSuccessful: ; 9081 9082 if (result == KERN_MEMORY_RESTART_COPY) 9083 goto RestartCopy; 9084 9085 /* 9086 * Copy succeeded. 9087 */ 9088 9089 CopySuccessful: ; 9090 9091 /* 9092 * Link in the new copy entry. 9093 */ 9094 9095 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), 9096 new_entry); 9097 9098 /* 9099 * Determine whether the entire region 9100 * has been copied. 9101 */ 9102 src_base = src_start; 9103 src_start = new_entry->vme_end; 9104 new_entry = VM_MAP_ENTRY_NULL; 9105 while ((src_start >= src_end) && (src_end != 0)) { 9106 submap_map_t *ptr; 9107 9108 if (src_map == base_map) { 9109 /* back to the top */ 9110 break; 9111 } 9112 9113 ptr = parent_maps; 9114 assert(ptr != NULL); 9115 parent_maps = parent_maps->next; 9116 9117 /* fix up the damage we did in that submap */ 9118 vm_map_simplify_range(src_map, 9119 src_base, 9120 src_end); 9121 9122 vm_map_unlock(src_map); 9123 vm_map_deallocate(src_map); 9124 vm_map_lock(ptr->parent_map); 9125 src_map = ptr->parent_map; 9126 src_base = ptr->base_start; 9127 src_start = ptr->base_start + ptr->base_len; 9128 src_end = ptr->base_end; 9129 if (!vm_map_lookup_entry(src_map, 9130 src_start, 9131 &tmp_entry) && 9132 (src_end > src_start)) { 9133 RETURN(KERN_INVALID_ADDRESS); 9134 } 9135 kfree(ptr, sizeof(submap_map_t)); 9136 if (parent_maps == NULL) 9137 map_share = FALSE; 9138 src_entry = tmp_entry->vme_prev; 9139 } 9140 9141 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) && 9142 (src_start >= src_addr + len) && 9143 (src_addr + len != 0)) { 9144 /* 9145 * Stop copying now, even though we haven't reached 9146 * "src_end". We'll adjust the end of the last copy 9147 * entry at the end, if needed. 9148 * 9149 * If src_map's aligment is different from the 9150 * system's page-alignment, there could be 9151 * extra non-map-aligned map entries between 9152 * the original (non-rounded) "src_addr + len" 9153 * and the rounded "src_end". 9154 * We do not want to copy those map entries since 9155 * they're not part of the copied range. 9156 */ 9157 break; 9158 } 9159 9160 if ((src_start >= src_end) && (src_end != 0)) 9161 break; 9162 9163 /* 9164 * Verify that there are no gaps in the region 9165 */ 9166 9167 tmp_entry = src_entry->vme_next; 9168 if ((tmp_entry->vme_start != src_start) || 9169 (tmp_entry == vm_map_to_entry(src_map))) { 9170 RETURN(KERN_INVALID_ADDRESS); 9171 } 9172 } 9173 9174 /* 9175 * If the source should be destroyed, do it now, since the 9176 * copy was successful. 9177 */ 9178 if (src_destroy) { 9179 (void) vm_map_delete( 9180 src_map, 9181 vm_map_trunc_page(src_addr, 9182 VM_MAP_PAGE_MASK(src_map)), 9183 src_end, 9184 ((src_map == kernel_map) ? 9185 VM_MAP_REMOVE_KUNWIRE : 9186 VM_MAP_NO_FLAGS), 9187 VM_MAP_NULL); 9188 } else { 9189 /* fix up the damage we did in the base map */ 9190 vm_map_simplify_range( 9191 src_map, 9192 vm_map_trunc_page(src_addr, 9193 VM_MAP_PAGE_MASK(src_map)), 9194 vm_map_round_page(src_end, 9195 VM_MAP_PAGE_MASK(src_map))); 9196 } 9197 9198 vm_map_unlock(src_map); 9199 9200 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) { 9201 vm_map_offset_t original_start, original_offset, original_end; 9202 9203 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK); 9204 9205 /* adjust alignment of first copy_entry's "vme_start" */ 9206 tmp_entry = vm_map_copy_first_entry(copy); 9207 if (tmp_entry != vm_map_copy_to_entry(copy)) { 9208 vm_map_offset_t adjustment; 9209 9210 original_start = tmp_entry->vme_start; 9211 original_offset = tmp_entry->offset; 9212 9213 /* map-align the start of the first copy entry... */ 9214 adjustment = (tmp_entry->vme_start - 9215 vm_map_trunc_page( 9216 tmp_entry->vme_start, 9217 VM_MAP_PAGE_MASK(src_map))); 9218 tmp_entry->vme_start -= adjustment; 9219 tmp_entry->offset -= adjustment; 9220 copy_addr -= adjustment; 9221 assert(tmp_entry->vme_start < tmp_entry->vme_end); 9222 /* ... adjust for mis-aligned start of copy range */ 9223 adjustment = 9224 (vm_map_trunc_page(copy->offset, 9225 PAGE_MASK) - 9226 vm_map_trunc_page(copy->offset, 9227 VM_MAP_PAGE_MASK(src_map))); 9228 if (adjustment) { 9229 assert(page_aligned(adjustment)); 9230 assert(adjustment < VM_MAP_PAGE_SIZE(src_map)); 9231 tmp_entry->vme_start += adjustment; 9232 tmp_entry->offset += adjustment; 9233 copy_addr += adjustment; 9234 assert(tmp_entry->vme_start < tmp_entry->vme_end); 9235 } 9236 9237 /* 9238 * Assert that the adjustments haven't exposed 9239 * more than was originally copied... 9240 */ 9241 assert(tmp_entry->vme_start >= original_start); 9242 assert(tmp_entry->offset >= original_offset); 9243 /* 9244 * ... and that it did not adjust outside of a 9245 * a single 16K page. 9246 */ 9247 assert(vm_map_trunc_page(tmp_entry->vme_start, 9248 VM_MAP_PAGE_MASK(src_map)) == 9249 vm_map_trunc_page(original_start, 9250 VM_MAP_PAGE_MASK(src_map))); 9251 } 9252 9253 /* adjust alignment of last copy_entry's "vme_end" */ 9254 tmp_entry = vm_map_copy_last_entry(copy); 9255 if (tmp_entry != vm_map_copy_to_entry(copy)) { 9256 vm_map_offset_t adjustment; 9257 9258 original_end = tmp_entry->vme_end; 9259 9260 /* map-align the end of the last copy entry... */ 9261 tmp_entry->vme_end = 9262 vm_map_round_page(tmp_entry->vme_end, 9263 VM_MAP_PAGE_MASK(src_map)); 9264 /* ... adjust for mis-aligned end of copy range */ 9265 adjustment = 9266 (vm_map_round_page((copy->offset + 9267 copy->size), 9268 VM_MAP_PAGE_MASK(src_map)) - 9269 vm_map_round_page((copy->offset + 9270 copy->size), 9271 PAGE_MASK)); 9272 if (adjustment) { 9273 assert(page_aligned(adjustment)); 9274 assert(adjustment < VM_MAP_PAGE_SIZE(src_map)); 9275 tmp_entry->vme_end -= adjustment; 9276 assert(tmp_entry->vme_start < tmp_entry->vme_end); 9277 } 9278 9279 /* 9280 * Assert that the adjustments haven't exposed 9281 * more than was originally copied... 9282 */ 9283 assert(tmp_entry->vme_end <= original_end); 9284 /* 9285 * ... and that it did not adjust outside of a 9286 * a single 16K page. 9287 */ 9288 assert(vm_map_round_page(tmp_entry->vme_end, 9289 VM_MAP_PAGE_MASK(src_map)) == 9290 vm_map_round_page(original_end, 9291 VM_MAP_PAGE_MASK(src_map))); 9292 } 9293 } 9294 9295 /* Fix-up start and end points in copy. This is necessary */ 9296 /* when the various entries in the copy object were picked */ 9297 /* up from different sub-maps */ 9298 9299 tmp_entry = vm_map_copy_first_entry(copy); 9300 copy_size = 0; /* compute actual size */ 9301 while (tmp_entry != vm_map_copy_to_entry(copy)) { 9302 assert(VM_MAP_PAGE_ALIGNED( 9303 copy_addr + (tmp_entry->vme_end - 9304 tmp_entry->vme_start), 9305 VM_MAP_COPY_PAGE_MASK(copy))); 9306 assert(VM_MAP_PAGE_ALIGNED( 9307 copy_addr, 9308 VM_MAP_COPY_PAGE_MASK(copy))); 9309 9310 /* 9311 * The copy_entries will be injected directly into the 9312 * destination map and might not be "map aligned" there... 9313 */ 9314 tmp_entry->map_aligned = FALSE; 9315 9316 tmp_entry->vme_end = copy_addr + 9317 (tmp_entry->vme_end - tmp_entry->vme_start); 9318 tmp_entry->vme_start = copy_addr; 9319 assert(tmp_entry->vme_start < tmp_entry->vme_end); 9320 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start; 9321 copy_size += tmp_entry->vme_end - tmp_entry->vme_start; 9322 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next; 9323 } 9324 9325 if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT && 9326 copy_size < copy->size) { 9327 /* 9328 * The actual size of the VM map copy is smaller than what 9329 * was requested by the caller. This must be because some 9330 * PAGE_SIZE-sized pages are missing at the end of the last 9331 * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range. 9332 * The caller might not have been aware of those missing 9333 * pages and might not want to be aware of it, which is 9334 * fine as long as they don't try to access (and crash on) 9335 * those missing pages. 9336 * Let's adjust the size of the "copy", to avoid failing 9337 * in vm_map_copyout() or vm_map_copy_overwrite(). 9338 */ 9339 assert(vm_map_round_page(copy_size, 9340 VM_MAP_PAGE_MASK(src_map)) == 9341 vm_map_round_page(copy->size, 9342 VM_MAP_PAGE_MASK(src_map))); 9343 copy->size = copy_size; 9344 } 9345 9346 *copy_result = copy; 9347 return(KERN_SUCCESS); 9348 9349#undef RETURN 9350} 9351 9352kern_return_t 9353vm_map_copy_extract( 9354 vm_map_t src_map, 9355 vm_map_address_t src_addr, 9356 vm_map_size_t len, 9357 vm_map_copy_t *copy_result, /* OUT */ 9358 vm_prot_t *cur_prot, /* OUT */ 9359 vm_prot_t *max_prot) 9360{ 9361 vm_map_offset_t src_start, src_end; 9362 vm_map_copy_t copy; 9363 kern_return_t kr; 9364 9365 /* 9366 * Check for copies of zero bytes. 9367 */ 9368 9369 if (len == 0) { 9370 *copy_result = VM_MAP_COPY_NULL; 9371 return(KERN_SUCCESS); 9372 } 9373 9374 /* 9375 * Check that the end address doesn't overflow 9376 */ 9377 src_end = src_addr + len; 9378 if (src_end < src_addr) 9379 return KERN_INVALID_ADDRESS; 9380 9381 /* 9382 * Compute (page aligned) start and end of region 9383 */ 9384 src_start = vm_map_trunc_page(src_addr, PAGE_MASK); 9385 src_end = vm_map_round_page(src_end, PAGE_MASK); 9386 9387 /* 9388 * Allocate a header element for the list. 9389 * 9390 * Use the start and end in the header to 9391 * remember the endpoints prior to rounding. 9392 */ 9393 9394 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 9395 vm_map_copy_first_entry(copy) = 9396 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); 9397 copy->type = VM_MAP_COPY_ENTRY_LIST; 9398 copy->cpy_hdr.nentries = 0; 9399 copy->cpy_hdr.entries_pageable = TRUE; 9400 9401 vm_map_store_init(©->cpy_hdr); 9402 9403 copy->offset = 0; 9404 copy->size = len; 9405 9406 kr = vm_map_remap_extract(src_map, 9407 src_addr, 9408 len, 9409 FALSE, /* copy */ 9410 ©->cpy_hdr, 9411 cur_prot, 9412 max_prot, 9413 VM_INHERIT_SHARE, 9414 TRUE); /* pageable */ 9415 if (kr != KERN_SUCCESS) { 9416 vm_map_copy_discard(copy); 9417 return kr; 9418 } 9419 9420 *copy_result = copy; 9421 return KERN_SUCCESS; 9422} 9423 9424/* 9425 * vm_map_copyin_object: 9426 * 9427 * Create a copy object from an object. 9428 * Our caller donates an object reference. 9429 */ 9430 9431kern_return_t 9432vm_map_copyin_object( 9433 vm_object_t object, 9434 vm_object_offset_t offset, /* offset of region in object */ 9435 vm_object_size_t size, /* size of region in object */ 9436 vm_map_copy_t *copy_result) /* OUT */ 9437{ 9438 vm_map_copy_t copy; /* Resulting copy */ 9439 9440 /* 9441 * We drop the object into a special copy object 9442 * that contains the object directly. 9443 */ 9444 9445 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 9446 copy->type = VM_MAP_COPY_OBJECT; 9447 copy->cpy_object = object; 9448 copy->offset = offset; 9449 copy->size = size; 9450 9451 *copy_result = copy; 9452 return(KERN_SUCCESS); 9453} 9454 9455static void 9456vm_map_fork_share( 9457 vm_map_t old_map, 9458 vm_map_entry_t old_entry, 9459 vm_map_t new_map) 9460{ 9461 vm_object_t object; 9462 vm_map_entry_t new_entry; 9463 9464 /* 9465 * New sharing code. New map entry 9466 * references original object. Internal 9467 * objects use asynchronous copy algorithm for 9468 * future copies. First make sure we have 9469 * the right object. If we need a shadow, 9470 * or someone else already has one, then 9471 * make a new shadow and share it. 9472 */ 9473 9474 object = old_entry->object.vm_object; 9475 if (old_entry->is_sub_map) { 9476 assert(old_entry->wired_count == 0); 9477#ifndef NO_NESTED_PMAP 9478 if(old_entry->use_pmap) { 9479 kern_return_t result; 9480 9481 result = pmap_nest(new_map->pmap, 9482 (old_entry->object.sub_map)->pmap, 9483 (addr64_t)old_entry->vme_start, 9484 (addr64_t)old_entry->vme_start, 9485 (uint64_t)(old_entry->vme_end - old_entry->vme_start)); 9486 if(result) 9487 panic("vm_map_fork_share: pmap_nest failed!"); 9488 } 9489#endif /* NO_NESTED_PMAP */ 9490 } else if (object == VM_OBJECT_NULL) { 9491 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end - 9492 old_entry->vme_start)); 9493 old_entry->offset = 0; 9494 old_entry->object.vm_object = object; 9495 old_entry->use_pmap = TRUE; 9496 assert(!old_entry->needs_copy); 9497 } else if (object->copy_strategy != 9498 MEMORY_OBJECT_COPY_SYMMETRIC) { 9499 9500 /* 9501 * We are already using an asymmetric 9502 * copy, and therefore we already have 9503 * the right object. 9504 */ 9505 9506 assert(! old_entry->needs_copy); 9507 } 9508 else if (old_entry->needs_copy || /* case 1 */ 9509 object->shadowed || /* case 2 */ 9510 (!object->true_share && /* case 3 */ 9511 !old_entry->is_shared && 9512 (object->vo_size > 9513 (vm_map_size_t)(old_entry->vme_end - 9514 old_entry->vme_start)))) { 9515 9516 /* 9517 * We need to create a shadow. 9518 * There are three cases here. 9519 * In the first case, we need to 9520 * complete a deferred symmetrical 9521 * copy that we participated in. 9522 * In the second and third cases, 9523 * we need to create the shadow so 9524 * that changes that we make to the 9525 * object do not interfere with 9526 * any symmetrical copies which 9527 * have occured (case 2) or which 9528 * might occur (case 3). 9529 * 9530 * The first case is when we had 9531 * deferred shadow object creation 9532 * via the entry->needs_copy mechanism. 9533 * This mechanism only works when 9534 * only one entry points to the source 9535 * object, and we are about to create 9536 * a second entry pointing to the 9537 * same object. The problem is that 9538 * there is no way of mapping from 9539 * an object to the entries pointing 9540 * to it. (Deferred shadow creation 9541 * works with one entry because occurs 9542 * at fault time, and we walk from the 9543 * entry to the object when handling 9544 * the fault.) 9545 * 9546 * The second case is when the object 9547 * to be shared has already been copied 9548 * with a symmetric copy, but we point 9549 * directly to the object without 9550 * needs_copy set in our entry. (This 9551 * can happen because different ranges 9552 * of an object can be pointed to by 9553 * different entries. In particular, 9554 * a single entry pointing to an object 9555 * can be split by a call to vm_inherit, 9556 * which, combined with task_create, can 9557 * result in the different entries 9558 * having different needs_copy values.) 9559 * The shadowed flag in the object allows 9560 * us to detect this case. The problem 9561 * with this case is that if this object 9562 * has or will have shadows, then we 9563 * must not perform an asymmetric copy 9564 * of this object, since such a copy 9565 * allows the object to be changed, which 9566 * will break the previous symmetrical 9567 * copies (which rely upon the object 9568 * not changing). In a sense, the shadowed 9569 * flag says "don't change this object". 9570 * We fix this by creating a shadow 9571 * object for this object, and sharing 9572 * that. This works because we are free 9573 * to change the shadow object (and thus 9574 * to use an asymmetric copy strategy); 9575 * this is also semantically correct, 9576 * since this object is temporary, and 9577 * therefore a copy of the object is 9578 * as good as the object itself. (This 9579 * is not true for permanent objects, 9580 * since the pager needs to see changes, 9581 * which won't happen if the changes 9582 * are made to a copy.) 9583 * 9584 * The third case is when the object 9585 * to be shared has parts sticking 9586 * outside of the entry we're working 9587 * with, and thus may in the future 9588 * be subject to a symmetrical copy. 9589 * (This is a preemptive version of 9590 * case 2.) 9591 */ 9592 vm_object_shadow(&old_entry->object.vm_object, 9593 &old_entry->offset, 9594 (vm_map_size_t) (old_entry->vme_end - 9595 old_entry->vme_start)); 9596 9597 /* 9598 * If we're making a shadow for other than 9599 * copy on write reasons, then we have 9600 * to remove write permission. 9601 */ 9602 9603 if (!old_entry->needs_copy && 9604 (old_entry->protection & VM_PROT_WRITE)) { 9605 vm_prot_t prot; 9606 9607 prot = old_entry->protection & ~VM_PROT_WRITE; 9608 9609 if (override_nx(old_map, old_entry->alias) && prot) 9610 prot |= VM_PROT_EXECUTE; 9611 9612 if (old_map->mapped_in_other_pmaps) { 9613 vm_object_pmap_protect( 9614 old_entry->object.vm_object, 9615 old_entry->offset, 9616 (old_entry->vme_end - 9617 old_entry->vme_start), 9618 PMAP_NULL, 9619 old_entry->vme_start, 9620 prot); 9621 } else { 9622 pmap_protect(old_map->pmap, 9623 old_entry->vme_start, 9624 old_entry->vme_end, 9625 prot); 9626 } 9627 } 9628 9629 old_entry->needs_copy = FALSE; 9630 object = old_entry->object.vm_object; 9631 } 9632 9633 9634 /* 9635 * If object was using a symmetric copy strategy, 9636 * change its copy strategy to the default 9637 * asymmetric copy strategy, which is copy_delay 9638 * in the non-norma case and copy_call in the 9639 * norma case. Bump the reference count for the 9640 * new entry. 9641 */ 9642 9643 if(old_entry->is_sub_map) { 9644 vm_map_lock(old_entry->object.sub_map); 9645 vm_map_reference(old_entry->object.sub_map); 9646 vm_map_unlock(old_entry->object.sub_map); 9647 } else { 9648 vm_object_lock(object); 9649 vm_object_reference_locked(object); 9650 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) { 9651 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; 9652 } 9653 vm_object_unlock(object); 9654 } 9655 9656 /* 9657 * Clone the entry, using object ref from above. 9658 * Mark both entries as shared. 9659 */ 9660 9661 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel 9662 * map or descendants */ 9663 vm_map_entry_copy(new_entry, old_entry); 9664 old_entry->is_shared = TRUE; 9665 new_entry->is_shared = TRUE; 9666 9667 /* 9668 * Insert the entry into the new map -- we 9669 * know we're inserting at the end of the new 9670 * map. 9671 */ 9672 9673 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry); 9674 9675 /* 9676 * Update the physical map 9677 */ 9678 9679 if (old_entry->is_sub_map) { 9680 /* Bill Angell pmap support goes here */ 9681 } else { 9682 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start, 9683 old_entry->vme_end - old_entry->vme_start, 9684 old_entry->vme_start); 9685 } 9686} 9687 9688static boolean_t 9689vm_map_fork_copy( 9690 vm_map_t old_map, 9691 vm_map_entry_t *old_entry_p, 9692 vm_map_t new_map) 9693{ 9694 vm_map_entry_t old_entry = *old_entry_p; 9695 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start; 9696 vm_map_offset_t start = old_entry->vme_start; 9697 vm_map_copy_t copy; 9698 vm_map_entry_t last = vm_map_last_entry(new_map); 9699 9700 vm_map_unlock(old_map); 9701 /* 9702 * Use maxprot version of copyin because we 9703 * care about whether this memory can ever 9704 * be accessed, not just whether it's accessible 9705 * right now. 9706 */ 9707 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, ©) 9708 != KERN_SUCCESS) { 9709 /* 9710 * The map might have changed while it 9711 * was unlocked, check it again. Skip 9712 * any blank space or permanently 9713 * unreadable region. 9714 */ 9715 vm_map_lock(old_map); 9716 if (!vm_map_lookup_entry(old_map, start, &last) || 9717 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) { 9718 last = last->vme_next; 9719 } 9720 *old_entry_p = last; 9721 9722 /* 9723 * XXX For some error returns, want to 9724 * XXX skip to the next element. Note 9725 * that INVALID_ADDRESS and 9726 * PROTECTION_FAILURE are handled above. 9727 */ 9728 9729 return FALSE; 9730 } 9731 9732 /* 9733 * Insert the copy into the new map 9734 */ 9735 9736 vm_map_copy_insert(new_map, last, copy); 9737 9738 /* 9739 * Pick up the traversal at the end of 9740 * the copied region. 9741 */ 9742 9743 vm_map_lock(old_map); 9744 start += entry_size; 9745 if (! vm_map_lookup_entry(old_map, start, &last)) { 9746 last = last->vme_next; 9747 } else { 9748 if (last->vme_start == start) { 9749 /* 9750 * No need to clip here and we don't 9751 * want to cause any unnecessary 9752 * unnesting... 9753 */ 9754 } else { 9755 vm_map_clip_start(old_map, last, start); 9756 } 9757 } 9758 *old_entry_p = last; 9759 9760 return TRUE; 9761} 9762 9763/* 9764 * vm_map_fork: 9765 * 9766 * Create and return a new map based on the old 9767 * map, according to the inheritance values on the 9768 * regions in that map. 9769 * 9770 * The source map must not be locked. 9771 */ 9772vm_map_t 9773vm_map_fork( 9774 ledger_t ledger, 9775 vm_map_t old_map) 9776{ 9777 pmap_t new_pmap; 9778 vm_map_t new_map; 9779 vm_map_entry_t old_entry; 9780 vm_map_size_t new_size = 0, entry_size; 9781 vm_map_entry_t new_entry; 9782 boolean_t src_needs_copy; 9783 boolean_t new_entry_needs_copy; 9784 9785 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, 9786#if defined(__i386__) || defined(__x86_64__) 9787 old_map->pmap->pm_task_map != TASK_MAP_32BIT 9788#else 9789#error Unknown architecture. 9790#endif 9791 ); 9792 9793 vm_map_reference_swap(old_map); 9794 vm_map_lock(old_map); 9795 9796 new_map = vm_map_create(new_pmap, 9797 old_map->min_offset, 9798 old_map->max_offset, 9799 old_map->hdr.entries_pageable); 9800 /* inherit the parent map's page size */ 9801 vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map)); 9802 for ( 9803 old_entry = vm_map_first_entry(old_map); 9804 old_entry != vm_map_to_entry(old_map); 9805 ) { 9806 9807 entry_size = old_entry->vme_end - old_entry->vme_start; 9808 9809 switch (old_entry->inheritance) { 9810 case VM_INHERIT_NONE: 9811 break; 9812 9813 case VM_INHERIT_SHARE: 9814 vm_map_fork_share(old_map, old_entry, new_map); 9815 new_size += entry_size; 9816 break; 9817 9818 case VM_INHERIT_COPY: 9819 9820 /* 9821 * Inline the copy_quickly case; 9822 * upon failure, fall back on call 9823 * to vm_map_fork_copy. 9824 */ 9825 9826 if(old_entry->is_sub_map) 9827 break; 9828 if ((old_entry->wired_count != 0) || 9829 ((old_entry->object.vm_object != NULL) && 9830 (old_entry->object.vm_object->true_share))) { 9831 goto slow_vm_map_fork_copy; 9832 } 9833 9834 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */ 9835 vm_map_entry_copy(new_entry, old_entry); 9836 if (new_entry->is_sub_map) { 9837 /* clear address space specifics */ 9838 new_entry->use_pmap = FALSE; 9839 } 9840 9841 if (! vm_object_copy_quickly( 9842 &new_entry->object.vm_object, 9843 old_entry->offset, 9844 (old_entry->vme_end - 9845 old_entry->vme_start), 9846 &src_needs_copy, 9847 &new_entry_needs_copy)) { 9848 vm_map_entry_dispose(new_map, new_entry); 9849 goto slow_vm_map_fork_copy; 9850 } 9851 9852 /* 9853 * Handle copy-on-write obligations 9854 */ 9855 9856 if (src_needs_copy && !old_entry->needs_copy) { 9857 vm_prot_t prot; 9858 9859 prot = old_entry->protection & ~VM_PROT_WRITE; 9860 9861 if (override_nx(old_map, old_entry->alias) && prot) 9862 prot |= VM_PROT_EXECUTE; 9863 9864 vm_object_pmap_protect( 9865 old_entry->object.vm_object, 9866 old_entry->offset, 9867 (old_entry->vme_end - 9868 old_entry->vme_start), 9869 ((old_entry->is_shared 9870 || old_map->mapped_in_other_pmaps) 9871 ? PMAP_NULL : 9872 old_map->pmap), 9873 old_entry->vme_start, 9874 prot); 9875 9876 old_entry->needs_copy = TRUE; 9877 } 9878 new_entry->needs_copy = new_entry_needs_copy; 9879 9880 /* 9881 * Insert the entry at the end 9882 * of the map. 9883 */ 9884 9885 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), 9886 new_entry); 9887 new_size += entry_size; 9888 break; 9889 9890 slow_vm_map_fork_copy: 9891 if (vm_map_fork_copy(old_map, &old_entry, new_map)) { 9892 new_size += entry_size; 9893 } 9894 continue; 9895 } 9896 old_entry = old_entry->vme_next; 9897 } 9898 9899 9900 new_map->size = new_size; 9901 vm_map_unlock(old_map); 9902 vm_map_deallocate(old_map); 9903 9904 return(new_map); 9905} 9906 9907/* 9908 * vm_map_exec: 9909 * 9910 * Setup the "new_map" with the proper execution environment according 9911 * to the type of executable (platform, 64bit, chroot environment). 9912 * Map the comm page and shared region, etc... 9913 */ 9914kern_return_t 9915vm_map_exec( 9916 vm_map_t new_map, 9917 task_t task, 9918 void *fsroot, 9919 cpu_type_t cpu) 9920{ 9921 SHARED_REGION_TRACE_DEBUG( 9922 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n", 9923 (void *)VM_KERNEL_ADDRPERM(current_task()), 9924 (void *)VM_KERNEL_ADDRPERM(new_map), 9925 (void *)VM_KERNEL_ADDRPERM(task), 9926 (void *)VM_KERNEL_ADDRPERM(fsroot), 9927 cpu)); 9928 (void) vm_commpage_enter(new_map, task); 9929 (void) vm_shared_region_enter(new_map, task, fsroot, cpu); 9930 SHARED_REGION_TRACE_DEBUG( 9931 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n", 9932 (void *)VM_KERNEL_ADDRPERM(current_task()), 9933 (void *)VM_KERNEL_ADDRPERM(new_map), 9934 (void *)VM_KERNEL_ADDRPERM(task), 9935 (void *)VM_KERNEL_ADDRPERM(fsroot), 9936 cpu)); 9937 return KERN_SUCCESS; 9938} 9939 9940/* 9941 * vm_map_lookup_locked: 9942 * 9943 * Finds the VM object, offset, and 9944 * protection for a given virtual address in the 9945 * specified map, assuming a page fault of the 9946 * type specified. 9947 * 9948 * Returns the (object, offset, protection) for 9949 * this address, whether it is wired down, and whether 9950 * this map has the only reference to the data in question. 9951 * In order to later verify this lookup, a "version" 9952 * is returned. 9953 * 9954 * The map MUST be locked by the caller and WILL be 9955 * locked on exit. In order to guarantee the 9956 * existence of the returned object, it is returned 9957 * locked. 9958 * 9959 * If a lookup is requested with "write protection" 9960 * specified, the map may be changed to perform virtual 9961 * copying operations, although the data referenced will 9962 * remain the same. 9963 */ 9964kern_return_t 9965vm_map_lookup_locked( 9966 vm_map_t *var_map, /* IN/OUT */ 9967 vm_map_offset_t vaddr, 9968 vm_prot_t fault_type, 9969 int object_lock_type, 9970 vm_map_version_t *out_version, /* OUT */ 9971 vm_object_t *object, /* OUT */ 9972 vm_object_offset_t *offset, /* OUT */ 9973 vm_prot_t *out_prot, /* OUT */ 9974 boolean_t *wired, /* OUT */ 9975 vm_object_fault_info_t fault_info, /* OUT */ 9976 vm_map_t *real_map) 9977{ 9978 vm_map_entry_t entry; 9979 register vm_map_t map = *var_map; 9980 vm_map_t old_map = *var_map; 9981 vm_map_t cow_sub_map_parent = VM_MAP_NULL; 9982 vm_map_offset_t cow_parent_vaddr = 0; 9983 vm_map_offset_t old_start = 0; 9984 vm_map_offset_t old_end = 0; 9985 register vm_prot_t prot; 9986 boolean_t mask_protections; 9987 boolean_t force_copy; 9988 vm_prot_t original_fault_type; 9989 9990 /* 9991 * VM_PROT_MASK means that the caller wants us to use "fault_type" 9992 * as a mask against the mapping's actual protections, not as an 9993 * absolute value. 9994 */ 9995 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE; 9996 force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE; 9997 fault_type &= VM_PROT_ALL; 9998 original_fault_type = fault_type; 9999 10000 *real_map = map; 10001 10002RetryLookup: 10003 fault_type = original_fault_type; 10004 10005 /* 10006 * If the map has an interesting hint, try it before calling 10007 * full blown lookup routine. 10008 */ 10009 entry = map->hint; 10010 10011 if ((entry == vm_map_to_entry(map)) || 10012 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) { 10013 vm_map_entry_t tmp_entry; 10014 10015 /* 10016 * Entry was either not a valid hint, or the vaddr 10017 * was not contained in the entry, so do a full lookup. 10018 */ 10019 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) { 10020 if((cow_sub_map_parent) && (cow_sub_map_parent != map)) 10021 vm_map_unlock(cow_sub_map_parent); 10022 if((*real_map != map) 10023 && (*real_map != cow_sub_map_parent)) 10024 vm_map_unlock(*real_map); 10025 return KERN_INVALID_ADDRESS; 10026 } 10027 10028 entry = tmp_entry; 10029 } 10030 if(map == old_map) { 10031 old_start = entry->vme_start; 10032 old_end = entry->vme_end; 10033 } 10034 10035 /* 10036 * Handle submaps. Drop lock on upper map, submap is 10037 * returned locked. 10038 */ 10039 10040submap_recurse: 10041 if (entry->is_sub_map) { 10042 vm_map_offset_t local_vaddr; 10043 vm_map_offset_t end_delta; 10044 vm_map_offset_t start_delta; 10045 vm_map_entry_t submap_entry; 10046 boolean_t mapped_needs_copy=FALSE; 10047 10048 local_vaddr = vaddr; 10049 10050 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) { 10051 /* if real_map equals map we unlock below */ 10052 if ((*real_map != map) && 10053 (*real_map != cow_sub_map_parent)) 10054 vm_map_unlock(*real_map); 10055 *real_map = entry->object.sub_map; 10056 } 10057 10058 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) { 10059 if (!mapped_needs_copy) { 10060 if (vm_map_lock_read_to_write(map)) { 10061 vm_map_lock_read(map); 10062 *real_map = map; 10063 goto RetryLookup; 10064 } 10065 vm_map_lock_read(entry->object.sub_map); 10066 *var_map = entry->object.sub_map; 10067 cow_sub_map_parent = map; 10068 /* reset base to map before cow object */ 10069 /* this is the map which will accept */ 10070 /* the new cow object */ 10071 old_start = entry->vme_start; 10072 old_end = entry->vme_end; 10073 cow_parent_vaddr = vaddr; 10074 mapped_needs_copy = TRUE; 10075 } else { 10076 vm_map_lock_read(entry->object.sub_map); 10077 *var_map = entry->object.sub_map; 10078 if((cow_sub_map_parent != map) && 10079 (*real_map != map)) 10080 vm_map_unlock(map); 10081 } 10082 } else { 10083 vm_map_lock_read(entry->object.sub_map); 10084 *var_map = entry->object.sub_map; 10085 /* leave map locked if it is a target */ 10086 /* cow sub_map above otherwise, just */ 10087 /* follow the maps down to the object */ 10088 /* here we unlock knowing we are not */ 10089 /* revisiting the map. */ 10090 if((*real_map != map) && (map != cow_sub_map_parent)) 10091 vm_map_unlock_read(map); 10092 } 10093 10094 map = *var_map; 10095 10096 /* calculate the offset in the submap for vaddr */ 10097 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset; 10098 10099 RetrySubMap: 10100 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) { 10101 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){ 10102 vm_map_unlock(cow_sub_map_parent); 10103 } 10104 if((*real_map != map) 10105 && (*real_map != cow_sub_map_parent)) { 10106 vm_map_unlock(*real_map); 10107 } 10108 *real_map = map; 10109 return KERN_INVALID_ADDRESS; 10110 } 10111 10112 /* find the attenuated shadow of the underlying object */ 10113 /* on our target map */ 10114 10115 /* in english the submap object may extend beyond the */ 10116 /* region mapped by the entry or, may only fill a portion */ 10117 /* of it. For our purposes, we only care if the object */ 10118 /* doesn't fill. In this case the area which will */ 10119 /* ultimately be clipped in the top map will only need */ 10120 /* to be as big as the portion of the underlying entry */ 10121 /* which is mapped */ 10122 start_delta = submap_entry->vme_start > entry->offset ? 10123 submap_entry->vme_start - entry->offset : 0; 10124 10125 end_delta = 10126 (entry->offset + start_delta + (old_end - old_start)) <= 10127 submap_entry->vme_end ? 10128 0 : (entry->offset + 10129 (old_end - old_start)) 10130 - submap_entry->vme_end; 10131 10132 old_start += start_delta; 10133 old_end -= end_delta; 10134 10135 if(submap_entry->is_sub_map) { 10136 entry = submap_entry; 10137 vaddr = local_vaddr; 10138 goto submap_recurse; 10139 } 10140 10141 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) { 10142 10143 vm_object_t sub_object, copy_object; 10144 vm_object_offset_t copy_offset; 10145 vm_map_offset_t local_start; 10146 vm_map_offset_t local_end; 10147 boolean_t copied_slowly = FALSE; 10148 10149 if (vm_map_lock_read_to_write(map)) { 10150 vm_map_lock_read(map); 10151 old_start -= start_delta; 10152 old_end += end_delta; 10153 goto RetrySubMap; 10154 } 10155 10156 10157 sub_object = submap_entry->object.vm_object; 10158 if (sub_object == VM_OBJECT_NULL) { 10159 sub_object = 10160 vm_object_allocate( 10161 (vm_map_size_t) 10162 (submap_entry->vme_end - 10163 submap_entry->vme_start)); 10164 submap_entry->object.vm_object = sub_object; 10165 submap_entry->offset = 0; 10166 } 10167 local_start = local_vaddr - 10168 (cow_parent_vaddr - old_start); 10169 local_end = local_vaddr + 10170 (old_end - cow_parent_vaddr); 10171 vm_map_clip_start(map, submap_entry, local_start); 10172 vm_map_clip_end(map, submap_entry, local_end); 10173 if (submap_entry->is_sub_map) { 10174 /* unnesting was done when clipping */ 10175 assert(!submap_entry->use_pmap); 10176 } 10177 10178 /* This is the COW case, lets connect */ 10179 /* an entry in our space to the underlying */ 10180 /* object in the submap, bypassing the */ 10181 /* submap. */ 10182 10183 10184 if(submap_entry->wired_count != 0 || 10185 (sub_object->copy_strategy == 10186 MEMORY_OBJECT_COPY_NONE)) { 10187 vm_object_lock(sub_object); 10188 vm_object_copy_slowly(sub_object, 10189 submap_entry->offset, 10190 (submap_entry->vme_end - 10191 submap_entry->vme_start), 10192 FALSE, 10193 ©_object); 10194 copied_slowly = TRUE; 10195 } else { 10196 10197 /* set up shadow object */ 10198 copy_object = sub_object; 10199 vm_object_reference(copy_object); 10200 sub_object->shadowed = TRUE; 10201 submap_entry->needs_copy = TRUE; 10202 10203 prot = submap_entry->protection & ~VM_PROT_WRITE; 10204 10205 if (override_nx(old_map, submap_entry->alias) && prot) 10206 prot |= VM_PROT_EXECUTE; 10207 10208 vm_object_pmap_protect( 10209 sub_object, 10210 submap_entry->offset, 10211 submap_entry->vme_end - 10212 submap_entry->vme_start, 10213 (submap_entry->is_shared 10214 || map->mapped_in_other_pmaps) ? 10215 PMAP_NULL : map->pmap, 10216 submap_entry->vme_start, 10217 prot); 10218 } 10219 10220 /* 10221 * Adjust the fault offset to the submap entry. 10222 */ 10223 copy_offset = (local_vaddr - 10224 submap_entry->vme_start + 10225 submap_entry->offset); 10226 10227 /* This works diffently than the */ 10228 /* normal submap case. We go back */ 10229 /* to the parent of the cow map and*/ 10230 /* clip out the target portion of */ 10231 /* the sub_map, substituting the */ 10232 /* new copy object, */ 10233 10234 vm_map_unlock(map); 10235 local_start = old_start; 10236 local_end = old_end; 10237 map = cow_sub_map_parent; 10238 *var_map = cow_sub_map_parent; 10239 vaddr = cow_parent_vaddr; 10240 cow_sub_map_parent = NULL; 10241 10242 if(!vm_map_lookup_entry(map, 10243 vaddr, &entry)) { 10244 vm_object_deallocate( 10245 copy_object); 10246 vm_map_lock_write_to_read(map); 10247 return KERN_INVALID_ADDRESS; 10248 } 10249 10250 /* clip out the portion of space */ 10251 /* mapped by the sub map which */ 10252 /* corresponds to the underlying */ 10253 /* object */ 10254 10255 /* 10256 * Clip (and unnest) the smallest nested chunk 10257 * possible around the faulting address... 10258 */ 10259 local_start = vaddr & ~(pmap_nesting_size_min - 1); 10260 local_end = local_start + pmap_nesting_size_min; 10261 /* 10262 * ... but don't go beyond the "old_start" to "old_end" 10263 * range, to avoid spanning over another VM region 10264 * with a possibly different VM object and/or offset. 10265 */ 10266 if (local_start < old_start) { 10267 local_start = old_start; 10268 } 10269 if (local_end > old_end) { 10270 local_end = old_end; 10271 } 10272 /* 10273 * Adjust copy_offset to the start of the range. 10274 */ 10275 copy_offset -= (vaddr - local_start); 10276 10277 vm_map_clip_start(map, entry, local_start); 10278 vm_map_clip_end(map, entry, local_end); 10279 if (entry->is_sub_map) { 10280 /* unnesting was done when clipping */ 10281 assert(!entry->use_pmap); 10282 } 10283 10284 /* substitute copy object for */ 10285 /* shared map entry */ 10286 vm_map_deallocate(entry->object.sub_map); 10287 assert(!entry->iokit_acct); 10288 entry->is_sub_map = FALSE; 10289 entry->use_pmap = TRUE; 10290 entry->object.vm_object = copy_object; 10291 10292 /* propagate the submap entry's protections */ 10293 entry->protection |= submap_entry->protection; 10294 entry->max_protection |= submap_entry->max_protection; 10295 10296 if(copied_slowly) { 10297 entry->offset = local_start - old_start; 10298 entry->needs_copy = FALSE; 10299 entry->is_shared = FALSE; 10300 } else { 10301 entry->offset = copy_offset; 10302 entry->needs_copy = TRUE; 10303 if(entry->inheritance == VM_INHERIT_SHARE) 10304 entry->inheritance = VM_INHERIT_COPY; 10305 if (map != old_map) 10306 entry->is_shared = TRUE; 10307 } 10308 if(entry->inheritance == VM_INHERIT_SHARE) 10309 entry->inheritance = VM_INHERIT_COPY; 10310 10311 vm_map_lock_write_to_read(map); 10312 } else { 10313 if((cow_sub_map_parent) 10314 && (cow_sub_map_parent != *real_map) 10315 && (cow_sub_map_parent != map)) { 10316 vm_map_unlock(cow_sub_map_parent); 10317 } 10318 entry = submap_entry; 10319 vaddr = local_vaddr; 10320 } 10321 } 10322 10323 /* 10324 * Check whether this task is allowed to have 10325 * this page. 10326 */ 10327 10328 prot = entry->protection; 10329 10330 if (override_nx(old_map, entry->alias) && prot) { 10331 /* 10332 * HACK -- if not a stack, then allow execution 10333 */ 10334 prot |= VM_PROT_EXECUTE; 10335 } 10336 10337 if (mask_protections) { 10338 fault_type &= prot; 10339 if (fault_type == VM_PROT_NONE) { 10340 goto protection_failure; 10341 } 10342 } 10343 if ((fault_type & (prot)) != fault_type) { 10344 protection_failure: 10345 if (*real_map != map) { 10346 vm_map_unlock(*real_map); 10347 } 10348 *real_map = map; 10349 10350 if ((fault_type & VM_PROT_EXECUTE) && prot) 10351 log_stack_execution_failure((addr64_t)vaddr, prot); 10352 10353 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL); 10354 return KERN_PROTECTION_FAILURE; 10355 } 10356 10357 /* 10358 * If this page is not pageable, we have to get 10359 * it for all possible accesses. 10360 */ 10361 10362 *wired = (entry->wired_count != 0); 10363 if (*wired) 10364 fault_type = prot; 10365 10366 /* 10367 * If the entry was copy-on-write, we either ... 10368 */ 10369 10370 if (entry->needs_copy) { 10371 /* 10372 * If we want to write the page, we may as well 10373 * handle that now since we've got the map locked. 10374 * 10375 * If we don't need to write the page, we just 10376 * demote the permissions allowed. 10377 */ 10378 10379 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) { 10380 /* 10381 * Make a new object, and place it in the 10382 * object chain. Note that no new references 10383 * have appeared -- one just moved from the 10384 * map to the new object. 10385 */ 10386 10387 if (vm_map_lock_read_to_write(map)) { 10388 vm_map_lock_read(map); 10389 goto RetryLookup; 10390 } 10391 vm_object_shadow(&entry->object.vm_object, 10392 &entry->offset, 10393 (vm_map_size_t) (entry->vme_end - 10394 entry->vme_start)); 10395 10396 entry->object.vm_object->shadowed = TRUE; 10397 entry->needs_copy = FALSE; 10398 vm_map_lock_write_to_read(map); 10399 } 10400 else { 10401 /* 10402 * We're attempting to read a copy-on-write 10403 * page -- don't allow writes. 10404 */ 10405 10406 prot &= (~VM_PROT_WRITE); 10407 } 10408 } 10409 10410 /* 10411 * Create an object if necessary. 10412 */ 10413 if (entry->object.vm_object == VM_OBJECT_NULL) { 10414 10415 if (vm_map_lock_read_to_write(map)) { 10416 vm_map_lock_read(map); 10417 goto RetryLookup; 10418 } 10419 10420 entry->object.vm_object = vm_object_allocate( 10421 (vm_map_size_t)(entry->vme_end - entry->vme_start)); 10422 entry->offset = 0; 10423 vm_map_lock_write_to_read(map); 10424 } 10425 10426 /* 10427 * Return the object/offset from this entry. If the entry 10428 * was copy-on-write or empty, it has been fixed up. Also 10429 * return the protection. 10430 */ 10431 10432 *offset = (vaddr - entry->vme_start) + entry->offset; 10433 *object = entry->object.vm_object; 10434 *out_prot = prot; 10435 10436 if (fault_info) { 10437 fault_info->interruptible = THREAD_UNINT; /* for now... */ 10438 /* ... the caller will change "interruptible" if needed */ 10439 fault_info->cluster_size = 0; 10440 fault_info->user_tag = entry->alias; 10441 fault_info->pmap_options = 0; 10442 if (entry->iokit_acct || 10443 (!entry->is_sub_map && !entry->use_pmap)) { 10444 fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT; 10445 } 10446 fault_info->behavior = entry->behavior; 10447 fault_info->lo_offset = entry->offset; 10448 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; 10449 fault_info->no_cache = entry->no_cache; 10450 fault_info->stealth = FALSE; 10451 fault_info->io_sync = FALSE; 10452 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE; 10453 fault_info->mark_zf_absent = FALSE; 10454 fault_info->batch_pmap_op = FALSE; 10455 } 10456 10457 /* 10458 * Lock the object to prevent it from disappearing 10459 */ 10460 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) 10461 vm_object_lock(*object); 10462 else 10463 vm_object_lock_shared(*object); 10464 10465 /* 10466 * Save the version number 10467 */ 10468 10469 out_version->main_timestamp = map->timestamp; 10470 10471 return KERN_SUCCESS; 10472} 10473 10474 10475/* 10476 * vm_map_verify: 10477 * 10478 * Verifies that the map in question has not changed 10479 * since the given version. If successful, the map 10480 * will not change until vm_map_verify_done() is called. 10481 */ 10482boolean_t 10483vm_map_verify( 10484 register vm_map_t map, 10485 register vm_map_version_t *version) /* REF */ 10486{ 10487 boolean_t result; 10488 10489 vm_map_lock_read(map); 10490 result = (map->timestamp == version->main_timestamp); 10491 10492 if (!result) 10493 vm_map_unlock_read(map); 10494 10495 return(result); 10496} 10497 10498/* 10499 * vm_map_verify_done: 10500 * 10501 * Releases locks acquired by a vm_map_verify. 10502 * 10503 * This is now a macro in vm/vm_map.h. It does a 10504 * vm_map_unlock_read on the map. 10505 */ 10506 10507 10508/* 10509 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY 10510 * Goes away after regular vm_region_recurse function migrates to 10511 * 64 bits 10512 * vm_region_recurse: A form of vm_region which follows the 10513 * submaps in a target map 10514 * 10515 */ 10516 10517kern_return_t 10518vm_map_region_recurse_64( 10519 vm_map_t map, 10520 vm_map_offset_t *address, /* IN/OUT */ 10521 vm_map_size_t *size, /* OUT */ 10522 natural_t *nesting_depth, /* IN/OUT */ 10523 vm_region_submap_info_64_t submap_info, /* IN/OUT */ 10524 mach_msg_type_number_t *count) /* IN/OUT */ 10525{ 10526 mach_msg_type_number_t original_count; 10527 vm_region_extended_info_data_t extended; 10528 vm_map_entry_t tmp_entry; 10529 vm_map_offset_t user_address; 10530 unsigned int user_max_depth; 10531 10532 /* 10533 * "curr_entry" is the VM map entry preceding or including the 10534 * address we're looking for. 10535 * "curr_map" is the map or sub-map containing "curr_entry". 10536 * "curr_address" is the equivalent of the top map's "user_address" 10537 * in the current map. 10538 * "curr_offset" is the cumulated offset of "curr_map" in the 10539 * target task's address space. 10540 * "curr_depth" is the depth of "curr_map" in the chain of 10541 * sub-maps. 10542 * 10543 * "curr_max_below" and "curr_max_above" limit the range (around 10544 * "curr_address") we should take into account in the current (sub)map. 10545 * They limit the range to what's visible through the map entries 10546 * we've traversed from the top map to the current map. 10547 10548 */ 10549 vm_map_entry_t curr_entry; 10550 vm_map_address_t curr_address; 10551 vm_map_offset_t curr_offset; 10552 vm_map_t curr_map; 10553 unsigned int curr_depth; 10554 vm_map_offset_t curr_max_below, curr_max_above; 10555 vm_map_offset_t curr_skip; 10556 10557 /* 10558 * "next_" is the same as "curr_" but for the VM region immediately 10559 * after the address we're looking for. We need to keep track of this 10560 * too because we want to return info about that region if the 10561 * address we're looking for is not mapped. 10562 */ 10563 vm_map_entry_t next_entry; 10564 vm_map_offset_t next_offset; 10565 vm_map_offset_t next_address; 10566 vm_map_t next_map; 10567 unsigned int next_depth; 10568 vm_map_offset_t next_max_below, next_max_above; 10569 vm_map_offset_t next_skip; 10570 10571 boolean_t look_for_pages; 10572 vm_region_submap_short_info_64_t short_info; 10573 10574 if (map == VM_MAP_NULL) { 10575 /* no address space to work on */ 10576 return KERN_INVALID_ARGUMENT; 10577 } 10578 10579 10580 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) { 10581 /* 10582 * "info" structure is not big enough and 10583 * would overflow 10584 */ 10585 return KERN_INVALID_ARGUMENT; 10586 } 10587 10588 original_count = *count; 10589 10590 if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) { 10591 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; 10592 look_for_pages = FALSE; 10593 short_info = (vm_region_submap_short_info_64_t) submap_info; 10594 submap_info = NULL; 10595 } else { 10596 look_for_pages = TRUE; 10597 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64; 10598 short_info = NULL; 10599 10600 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) { 10601 *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64; 10602 } 10603 } 10604 10605 user_address = *address; 10606 user_max_depth = *nesting_depth; 10607 10608 curr_entry = NULL; 10609 curr_map = map; 10610 curr_address = user_address; 10611 curr_offset = 0; 10612 curr_skip = 0; 10613 curr_depth = 0; 10614 curr_max_above = ((vm_map_offset_t) -1) - curr_address; 10615 curr_max_below = curr_address; 10616 10617 next_entry = NULL; 10618 next_map = NULL; 10619 next_address = 0; 10620 next_offset = 0; 10621 next_skip = 0; 10622 next_depth = 0; 10623 next_max_above = (vm_map_offset_t) -1; 10624 next_max_below = (vm_map_offset_t) -1; 10625 10626 if (not_in_kdp) { 10627 vm_map_lock_read(curr_map); 10628 } 10629 10630 for (;;) { 10631 if (vm_map_lookup_entry(curr_map, 10632 curr_address, 10633 &tmp_entry)) { 10634 /* tmp_entry contains the address we're looking for */ 10635 curr_entry = tmp_entry; 10636 } else { 10637 vm_map_offset_t skip; 10638 /* 10639 * The address is not mapped. "tmp_entry" is the 10640 * map entry preceding the address. We want the next 10641 * one, if it exists. 10642 */ 10643 curr_entry = tmp_entry->vme_next; 10644 10645 if (curr_entry == vm_map_to_entry(curr_map) || 10646 (curr_entry->vme_start >= 10647 curr_address + curr_max_above)) { 10648 /* no next entry at this level: stop looking */ 10649 if (not_in_kdp) { 10650 vm_map_unlock_read(curr_map); 10651 } 10652 curr_entry = NULL; 10653 curr_map = NULL; 10654 curr_offset = 0; 10655 curr_depth = 0; 10656 curr_max_above = 0; 10657 curr_max_below = 0; 10658 break; 10659 } 10660 10661 /* adjust current address and offset */ 10662 skip = curr_entry->vme_start - curr_address; 10663 curr_address = curr_entry->vme_start; 10664 curr_skip = skip; 10665 curr_offset += skip; 10666 curr_max_above -= skip; 10667 curr_max_below = 0; 10668 } 10669 10670 /* 10671 * Is the next entry at this level closer to the address (or 10672 * deeper in the submap chain) than the one we had 10673 * so far ? 10674 */ 10675 tmp_entry = curr_entry->vme_next; 10676 if (tmp_entry == vm_map_to_entry(curr_map)) { 10677 /* no next entry at this level */ 10678 } else if (tmp_entry->vme_start >= 10679 curr_address + curr_max_above) { 10680 /* 10681 * tmp_entry is beyond the scope of what we mapped of 10682 * this submap in the upper level: ignore it. 10683 */ 10684 } else if ((next_entry == NULL) || 10685 (tmp_entry->vme_start + curr_offset <= 10686 next_entry->vme_start + next_offset)) { 10687 /* 10688 * We didn't have a "next_entry" or this one is 10689 * closer to the address we're looking for: 10690 * use this "tmp_entry" as the new "next_entry". 10691 */ 10692 if (next_entry != NULL) { 10693 /* unlock the last "next_map" */ 10694 if (next_map != curr_map && not_in_kdp) { 10695 vm_map_unlock_read(next_map); 10696 } 10697 } 10698 next_entry = tmp_entry; 10699 next_map = curr_map; 10700 next_depth = curr_depth; 10701 next_address = next_entry->vme_start; 10702 next_skip = curr_skip; 10703 next_offset = curr_offset; 10704 next_offset += (next_address - curr_address); 10705 next_max_above = MIN(next_max_above, curr_max_above); 10706 next_max_above = MIN(next_max_above, 10707 next_entry->vme_end - next_address); 10708 next_max_below = MIN(next_max_below, curr_max_below); 10709 next_max_below = MIN(next_max_below, 10710 next_address - next_entry->vme_start); 10711 } 10712 10713 /* 10714 * "curr_max_{above,below}" allow us to keep track of the 10715 * portion of the submap that is actually mapped at this level: 10716 * the rest of that submap is irrelevant to us, since it's not 10717 * mapped here. 10718 * The relevant portion of the map starts at 10719 * "curr_entry->offset" up to the size of "curr_entry". 10720 */ 10721 curr_max_above = MIN(curr_max_above, 10722 curr_entry->vme_end - curr_address); 10723 curr_max_below = MIN(curr_max_below, 10724 curr_address - curr_entry->vme_start); 10725 10726 if (!curr_entry->is_sub_map || 10727 curr_depth >= user_max_depth) { 10728 /* 10729 * We hit a leaf map or we reached the maximum depth 10730 * we could, so stop looking. Keep the current map 10731 * locked. 10732 */ 10733 break; 10734 } 10735 10736 /* 10737 * Get down to the next submap level. 10738 */ 10739 10740 /* 10741 * Lock the next level and unlock the current level, 10742 * unless we need to keep it locked to access the "next_entry" 10743 * later. 10744 */ 10745 if (not_in_kdp) { 10746 vm_map_lock_read(curr_entry->object.sub_map); 10747 } 10748 if (curr_map == next_map) { 10749 /* keep "next_map" locked in case we need it */ 10750 } else { 10751 /* release this map */ 10752 if (not_in_kdp) 10753 vm_map_unlock_read(curr_map); 10754 } 10755 10756 /* 10757 * Adjust the offset. "curr_entry" maps the submap 10758 * at relative address "curr_entry->vme_start" in the 10759 * curr_map but skips the first "curr_entry->offset" 10760 * bytes of the submap. 10761 * "curr_offset" always represents the offset of a virtual 10762 * address in the curr_map relative to the absolute address 10763 * space (i.e. the top-level VM map). 10764 */ 10765 curr_offset += 10766 (curr_entry->offset - curr_entry->vme_start); 10767 curr_address = user_address + curr_offset; 10768 /* switch to the submap */ 10769 curr_map = curr_entry->object.sub_map; 10770 curr_depth++; 10771 curr_entry = NULL; 10772 } 10773 10774 if (curr_entry == NULL) { 10775 /* no VM region contains the address... */ 10776 if (next_entry == NULL) { 10777 /* ... and no VM region follows it either */ 10778 return KERN_INVALID_ADDRESS; 10779 } 10780 /* ... gather info about the next VM region */ 10781 curr_entry = next_entry; 10782 curr_map = next_map; /* still locked ... */ 10783 curr_address = next_address; 10784 curr_skip = next_skip; 10785 curr_offset = next_offset; 10786 curr_depth = next_depth; 10787 curr_max_above = next_max_above; 10788 curr_max_below = next_max_below; 10789 if (curr_map == map) { 10790 user_address = curr_address; 10791 } 10792 } else { 10793 /* we won't need "next_entry" after all */ 10794 if (next_entry != NULL) { 10795 /* release "next_map" */ 10796 if (next_map != curr_map && not_in_kdp) { 10797 vm_map_unlock_read(next_map); 10798 } 10799 } 10800 } 10801 next_entry = NULL; 10802 next_map = NULL; 10803 next_offset = 0; 10804 next_skip = 0; 10805 next_depth = 0; 10806 next_max_below = -1; 10807 next_max_above = -1; 10808 10809 *nesting_depth = curr_depth; 10810 *size = curr_max_above + curr_max_below; 10811 *address = user_address + curr_skip - curr_max_below; 10812 10813// LP64todo: all the current tools are 32bit, obviously never worked for 64b 10814// so probably should be a real 32b ID vs. ptr. 10815// Current users just check for equality 10816#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)VM_KERNEL_ADDRPERM(p)) 10817 10818 if (look_for_pages) { 10819 submap_info->user_tag = curr_entry->alias; 10820 submap_info->offset = curr_entry->offset; 10821 submap_info->protection = curr_entry->protection; 10822 submap_info->inheritance = curr_entry->inheritance; 10823 submap_info->max_protection = curr_entry->max_protection; 10824 submap_info->behavior = curr_entry->behavior; 10825 submap_info->user_wired_count = curr_entry->user_wired_count; 10826 submap_info->is_submap = curr_entry->is_sub_map; 10827 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object); 10828 } else { 10829 short_info->user_tag = curr_entry->alias; 10830 short_info->offset = curr_entry->offset; 10831 short_info->protection = curr_entry->protection; 10832 short_info->inheritance = curr_entry->inheritance; 10833 short_info->max_protection = curr_entry->max_protection; 10834 short_info->behavior = curr_entry->behavior; 10835 short_info->user_wired_count = curr_entry->user_wired_count; 10836 short_info->is_submap = curr_entry->is_sub_map; 10837 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object); 10838 } 10839 10840 extended.pages_resident = 0; 10841 extended.pages_swapped_out = 0; 10842 extended.pages_shared_now_private = 0; 10843 extended.pages_dirtied = 0; 10844 extended.pages_reusable = 0; 10845 extended.external_pager = 0; 10846 extended.shadow_depth = 0; 10847 10848 if (not_in_kdp) { 10849 if (!curr_entry->is_sub_map) { 10850 vm_map_offset_t range_start, range_end; 10851 range_start = MAX((curr_address - curr_max_below), 10852 curr_entry->vme_start); 10853 range_end = MIN((curr_address + curr_max_above), 10854 curr_entry->vme_end); 10855 vm_map_region_walk(curr_map, 10856 range_start, 10857 curr_entry, 10858 (curr_entry->offset + 10859 (range_start - 10860 curr_entry->vme_start)), 10861 range_end - range_start, 10862 &extended, 10863 look_for_pages, VM_REGION_EXTENDED_INFO_COUNT); 10864 if (extended.external_pager && 10865 extended.ref_count == 2 && 10866 extended.share_mode == SM_SHARED) { 10867 extended.share_mode = SM_PRIVATE; 10868 } 10869 } else { 10870 if (curr_entry->use_pmap) { 10871 extended.share_mode = SM_TRUESHARED; 10872 } else { 10873 extended.share_mode = SM_PRIVATE; 10874 } 10875 extended.ref_count = 10876 curr_entry->object.sub_map->ref_count; 10877 } 10878 } 10879 10880 if (look_for_pages) { 10881 submap_info->pages_resident = extended.pages_resident; 10882 submap_info->pages_swapped_out = extended.pages_swapped_out; 10883 submap_info->pages_shared_now_private = 10884 extended.pages_shared_now_private; 10885 submap_info->pages_dirtied = extended.pages_dirtied; 10886 submap_info->external_pager = extended.external_pager; 10887 submap_info->shadow_depth = extended.shadow_depth; 10888 submap_info->share_mode = extended.share_mode; 10889 submap_info->ref_count = extended.ref_count; 10890 10891 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) { 10892 submap_info->pages_reusable = extended.pages_reusable; 10893 } 10894 } else { 10895 short_info->external_pager = extended.external_pager; 10896 short_info->shadow_depth = extended.shadow_depth; 10897 short_info->share_mode = extended.share_mode; 10898 short_info->ref_count = extended.ref_count; 10899 } 10900 10901 if (not_in_kdp) { 10902 vm_map_unlock_read(curr_map); 10903 } 10904 10905 return KERN_SUCCESS; 10906} 10907 10908/* 10909 * vm_region: 10910 * 10911 * User call to obtain information about a region in 10912 * a task's address map. Currently, only one flavor is 10913 * supported. 10914 * 10915 * XXX The reserved and behavior fields cannot be filled 10916 * in until the vm merge from the IK is completed, and 10917 * vm_reserve is implemented. 10918 */ 10919 10920kern_return_t 10921vm_map_region( 10922 vm_map_t map, 10923 vm_map_offset_t *address, /* IN/OUT */ 10924 vm_map_size_t *size, /* OUT */ 10925 vm_region_flavor_t flavor, /* IN */ 10926 vm_region_info_t info, /* OUT */ 10927 mach_msg_type_number_t *count, /* IN/OUT */ 10928 mach_port_t *object_name) /* OUT */ 10929{ 10930 vm_map_entry_t tmp_entry; 10931 vm_map_entry_t entry; 10932 vm_map_offset_t start; 10933 10934 if (map == VM_MAP_NULL) 10935 return(KERN_INVALID_ARGUMENT); 10936 10937 switch (flavor) { 10938 10939 case VM_REGION_BASIC_INFO: 10940 /* legacy for old 32-bit objects info */ 10941 { 10942 vm_region_basic_info_t basic; 10943 10944 if (*count < VM_REGION_BASIC_INFO_COUNT) 10945 return(KERN_INVALID_ARGUMENT); 10946 10947 basic = (vm_region_basic_info_t) info; 10948 *count = VM_REGION_BASIC_INFO_COUNT; 10949 10950 vm_map_lock_read(map); 10951 10952 start = *address; 10953 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 10954 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 10955 vm_map_unlock_read(map); 10956 return(KERN_INVALID_ADDRESS); 10957 } 10958 } else { 10959 entry = tmp_entry; 10960 } 10961 10962 start = entry->vme_start; 10963 10964 basic->offset = (uint32_t)entry->offset; 10965 basic->protection = entry->protection; 10966 basic->inheritance = entry->inheritance; 10967 basic->max_protection = entry->max_protection; 10968 basic->behavior = entry->behavior; 10969 basic->user_wired_count = entry->user_wired_count; 10970 basic->reserved = entry->is_sub_map; 10971 *address = start; 10972 *size = (entry->vme_end - start); 10973 10974 if (object_name) *object_name = IP_NULL; 10975 if (entry->is_sub_map) { 10976 basic->shared = FALSE; 10977 } else { 10978 basic->shared = entry->is_shared; 10979 } 10980 10981 vm_map_unlock_read(map); 10982 return(KERN_SUCCESS); 10983 } 10984 10985 case VM_REGION_BASIC_INFO_64: 10986 { 10987 vm_region_basic_info_64_t basic; 10988 10989 if (*count < VM_REGION_BASIC_INFO_COUNT_64) 10990 return(KERN_INVALID_ARGUMENT); 10991 10992 basic = (vm_region_basic_info_64_t) info; 10993 *count = VM_REGION_BASIC_INFO_COUNT_64; 10994 10995 vm_map_lock_read(map); 10996 10997 start = *address; 10998 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 10999 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 11000 vm_map_unlock_read(map); 11001 return(KERN_INVALID_ADDRESS); 11002 } 11003 } else { 11004 entry = tmp_entry; 11005 } 11006 11007 start = entry->vme_start; 11008 11009 basic->offset = entry->offset; 11010 basic->protection = entry->protection; 11011 basic->inheritance = entry->inheritance; 11012 basic->max_protection = entry->max_protection; 11013 basic->behavior = entry->behavior; 11014 basic->user_wired_count = entry->user_wired_count; 11015 basic->reserved = entry->is_sub_map; 11016 *address = start; 11017 *size = (entry->vme_end - start); 11018 11019 if (object_name) *object_name = IP_NULL; 11020 if (entry->is_sub_map) { 11021 basic->shared = FALSE; 11022 } else { 11023 basic->shared = entry->is_shared; 11024 } 11025 11026 vm_map_unlock_read(map); 11027 return(KERN_SUCCESS); 11028 } 11029 case VM_REGION_EXTENDED_INFO: 11030 if (*count < VM_REGION_EXTENDED_INFO_COUNT) 11031 return(KERN_INVALID_ARGUMENT); 11032 /*fallthru*/ 11033 case VM_REGION_EXTENDED_INFO__legacy: 11034 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) 11035 return KERN_INVALID_ARGUMENT; 11036 11037 { 11038 vm_region_extended_info_t extended; 11039 mach_msg_type_number_t original_count; 11040 11041 extended = (vm_region_extended_info_t) info; 11042 11043 vm_map_lock_read(map); 11044 11045 start = *address; 11046 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 11047 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 11048 vm_map_unlock_read(map); 11049 return(KERN_INVALID_ADDRESS); 11050 } 11051 } else { 11052 entry = tmp_entry; 11053 } 11054 start = entry->vme_start; 11055 11056 extended->protection = entry->protection; 11057 extended->user_tag = entry->alias; 11058 extended->pages_resident = 0; 11059 extended->pages_swapped_out = 0; 11060 extended->pages_shared_now_private = 0; 11061 extended->pages_dirtied = 0; 11062 extended->external_pager = 0; 11063 extended->shadow_depth = 0; 11064 11065 original_count = *count; 11066 if (flavor == VM_REGION_EXTENDED_INFO__legacy) { 11067 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy; 11068 } else { 11069 extended->pages_reusable = 0; 11070 *count = VM_REGION_EXTENDED_INFO_COUNT; 11071 } 11072 11073 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE, *count); 11074 11075 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) 11076 extended->share_mode = SM_PRIVATE; 11077 11078 if (object_name) 11079 *object_name = IP_NULL; 11080 *address = start; 11081 *size = (entry->vme_end - start); 11082 11083 vm_map_unlock_read(map); 11084 return(KERN_SUCCESS); 11085 } 11086 case VM_REGION_TOP_INFO: 11087 { 11088 vm_region_top_info_t top; 11089 11090 if (*count < VM_REGION_TOP_INFO_COUNT) 11091 return(KERN_INVALID_ARGUMENT); 11092 11093 top = (vm_region_top_info_t) info; 11094 *count = VM_REGION_TOP_INFO_COUNT; 11095 11096 vm_map_lock_read(map); 11097 11098 start = *address; 11099 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 11100 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 11101 vm_map_unlock_read(map); 11102 return(KERN_INVALID_ADDRESS); 11103 } 11104 } else { 11105 entry = tmp_entry; 11106 11107 } 11108 start = entry->vme_start; 11109 11110 top->private_pages_resident = 0; 11111 top->shared_pages_resident = 0; 11112 11113 vm_map_region_top_walk(entry, top); 11114 11115 if (object_name) 11116 *object_name = IP_NULL; 11117 *address = start; 11118 *size = (entry->vme_end - start); 11119 11120 vm_map_unlock_read(map); 11121 return(KERN_SUCCESS); 11122 } 11123 default: 11124 return(KERN_INVALID_ARGUMENT); 11125 } 11126} 11127 11128#define OBJ_RESIDENT_COUNT(obj, entry_size) \ 11129 MIN((entry_size), \ 11130 ((obj)->all_reusable ? \ 11131 (obj)->wired_page_count : \ 11132 (obj)->resident_page_count - (obj)->reusable_page_count)) 11133 11134void 11135vm_map_region_top_walk( 11136 vm_map_entry_t entry, 11137 vm_region_top_info_t top) 11138{ 11139 11140 if (entry->object.vm_object == 0 || entry->is_sub_map) { 11141 top->share_mode = SM_EMPTY; 11142 top->ref_count = 0; 11143 top->obj_id = 0; 11144 return; 11145 } 11146 11147 { 11148 struct vm_object *obj, *tmp_obj; 11149 int ref_count; 11150 uint32_t entry_size; 11151 11152 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64); 11153 11154 obj = entry->object.vm_object; 11155 11156 vm_object_lock(obj); 11157 11158 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 11159 ref_count--; 11160 11161 assert(obj->reusable_page_count <= obj->resident_page_count); 11162 if (obj->shadow) { 11163 if (ref_count == 1) 11164 top->private_pages_resident = 11165 OBJ_RESIDENT_COUNT(obj, entry_size); 11166 else 11167 top->shared_pages_resident = 11168 OBJ_RESIDENT_COUNT(obj, entry_size); 11169 top->ref_count = ref_count; 11170 top->share_mode = SM_COW; 11171 11172 while ((tmp_obj = obj->shadow)) { 11173 vm_object_lock(tmp_obj); 11174 vm_object_unlock(obj); 11175 obj = tmp_obj; 11176 11177 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 11178 ref_count--; 11179 11180 assert(obj->reusable_page_count <= obj->resident_page_count); 11181 top->shared_pages_resident += 11182 OBJ_RESIDENT_COUNT(obj, entry_size); 11183 top->ref_count += ref_count - 1; 11184 } 11185 } else { 11186 if (entry->superpage_size) { 11187 top->share_mode = SM_LARGE_PAGE; 11188 top->shared_pages_resident = 0; 11189 top->private_pages_resident = entry_size; 11190 } else if (entry->needs_copy) { 11191 top->share_mode = SM_COW; 11192 top->shared_pages_resident = 11193 OBJ_RESIDENT_COUNT(obj, entry_size); 11194 } else { 11195 if (ref_count == 1 || 11196 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { 11197 top->share_mode = SM_PRIVATE; 11198 top->private_pages_resident = 11199 OBJ_RESIDENT_COUNT(obj, 11200 entry_size); 11201 } else { 11202 top->share_mode = SM_SHARED; 11203 top->shared_pages_resident = 11204 OBJ_RESIDENT_COUNT(obj, 11205 entry_size); 11206 } 11207 } 11208 top->ref_count = ref_count; 11209 } 11210 /* XXX K64: obj_id will be truncated */ 11211 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj); 11212 11213 vm_object_unlock(obj); 11214 } 11215} 11216 11217void 11218vm_map_region_walk( 11219 vm_map_t map, 11220 vm_map_offset_t va, 11221 vm_map_entry_t entry, 11222 vm_object_offset_t offset, 11223 vm_object_size_t range, 11224 vm_region_extended_info_t extended, 11225 boolean_t look_for_pages, 11226 mach_msg_type_number_t count) 11227{ 11228 register struct vm_object *obj, *tmp_obj; 11229 register vm_map_offset_t last_offset; 11230 register int i; 11231 register int ref_count; 11232 struct vm_object *shadow_object; 11233 int shadow_depth; 11234 11235 if ((entry->object.vm_object == 0) || 11236 (entry->is_sub_map) || 11237 (entry->object.vm_object->phys_contiguous && 11238 !entry->superpage_size)) { 11239 extended->share_mode = SM_EMPTY; 11240 extended->ref_count = 0; 11241 return; 11242 } 11243 11244 if (entry->superpage_size) { 11245 extended->shadow_depth = 0; 11246 extended->share_mode = SM_LARGE_PAGE; 11247 extended->ref_count = 1; 11248 extended->external_pager = 0; 11249 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT); 11250 extended->shadow_depth = 0; 11251 return; 11252 } 11253 11254 { 11255 obj = entry->object.vm_object; 11256 11257 vm_object_lock(obj); 11258 11259 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 11260 ref_count--; 11261 11262 if (look_for_pages) { 11263 for (last_offset = offset + range; 11264 offset < last_offset; 11265 offset += PAGE_SIZE_64, va += PAGE_SIZE) { 11266 vm_map_region_look_for_page(map, va, obj, 11267 offset, ref_count, 11268 0, extended, count); 11269 } 11270 } else { 11271 shadow_object = obj->shadow; 11272 shadow_depth = 0; 11273 11274 if ( !(obj->pager_trusted) && !(obj->internal)) 11275 extended->external_pager = 1; 11276 11277 if (shadow_object != VM_OBJECT_NULL) { 11278 vm_object_lock(shadow_object); 11279 for (; 11280 shadow_object != VM_OBJECT_NULL; 11281 shadow_depth++) { 11282 vm_object_t next_shadow; 11283 11284 if ( !(shadow_object->pager_trusted) && 11285 !(shadow_object->internal)) 11286 extended->external_pager = 1; 11287 11288 next_shadow = shadow_object->shadow; 11289 if (next_shadow) { 11290 vm_object_lock(next_shadow); 11291 } 11292 vm_object_unlock(shadow_object); 11293 shadow_object = next_shadow; 11294 } 11295 } 11296 extended->shadow_depth = shadow_depth; 11297 } 11298 11299 if (extended->shadow_depth || entry->needs_copy) 11300 extended->share_mode = SM_COW; 11301 else { 11302 if (ref_count == 1) 11303 extended->share_mode = SM_PRIVATE; 11304 else { 11305 if (obj->true_share) 11306 extended->share_mode = SM_TRUESHARED; 11307 else 11308 extended->share_mode = SM_SHARED; 11309 } 11310 } 11311 extended->ref_count = ref_count - extended->shadow_depth; 11312 11313 for (i = 0; i < extended->shadow_depth; i++) { 11314 if ((tmp_obj = obj->shadow) == 0) 11315 break; 11316 vm_object_lock(tmp_obj); 11317 vm_object_unlock(obj); 11318 11319 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) 11320 ref_count--; 11321 11322 extended->ref_count += ref_count; 11323 obj = tmp_obj; 11324 } 11325 vm_object_unlock(obj); 11326 11327 if (extended->share_mode == SM_SHARED) { 11328 register vm_map_entry_t cur; 11329 register vm_map_entry_t last; 11330 int my_refs; 11331 11332 obj = entry->object.vm_object; 11333 last = vm_map_to_entry(map); 11334 my_refs = 0; 11335 11336 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 11337 ref_count--; 11338 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) 11339 my_refs += vm_map_region_count_obj_refs(cur, obj); 11340 11341 if (my_refs == ref_count) 11342 extended->share_mode = SM_PRIVATE_ALIASED; 11343 else if (my_refs > 1) 11344 extended->share_mode = SM_SHARED_ALIASED; 11345 } 11346 } 11347} 11348 11349 11350/* object is locked on entry and locked on return */ 11351 11352 11353static void 11354vm_map_region_look_for_page( 11355 __unused vm_map_t map, 11356 __unused vm_map_offset_t va, 11357 vm_object_t object, 11358 vm_object_offset_t offset, 11359 int max_refcnt, 11360 int depth, 11361 vm_region_extended_info_t extended, 11362 mach_msg_type_number_t count) 11363{ 11364 register vm_page_t p; 11365 register vm_object_t shadow; 11366 register int ref_count; 11367 vm_object_t caller_object; 11368 kern_return_t kr; 11369 shadow = object->shadow; 11370 caller_object = object; 11371 11372 11373 while (TRUE) { 11374 11375 if ( !(object->pager_trusted) && !(object->internal)) 11376 extended->external_pager = 1; 11377 11378 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { 11379 if (shadow && (max_refcnt == 1)) 11380 extended->pages_shared_now_private++; 11381 11382 if (!p->fictitious && 11383 (p->dirty || pmap_is_modified(p->phys_page))) 11384 extended->pages_dirtied++; 11385 else if (count >= VM_REGION_EXTENDED_INFO_COUNT) { 11386 if (p->reusable || p->object->all_reusable) { 11387 extended->pages_reusable++; 11388 } 11389 } 11390 11391 extended->pages_resident++; 11392 11393 if(object != caller_object) 11394 vm_object_unlock(object); 11395 11396 return; 11397 } 11398#if MACH_PAGEMAP 11399 if (object->existence_map) { 11400 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) { 11401 11402 extended->pages_swapped_out++; 11403 11404 if(object != caller_object) 11405 vm_object_unlock(object); 11406 11407 return; 11408 } 11409 } else 11410#endif /* MACH_PAGEMAP */ 11411 if (object->internal && 11412 object->alive && 11413 !object->terminating && 11414 object->pager_ready) { 11415 11416 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 11417 if (VM_COMPRESSOR_PAGER_STATE_GET(object, 11418 offset) 11419 == VM_EXTERNAL_STATE_EXISTS) { 11420 /* the pager has that page */ 11421 extended->pages_swapped_out++; 11422 if (object != caller_object) 11423 vm_object_unlock(object); 11424 return; 11425 } 11426 } else { 11427 memory_object_t pager; 11428 11429 vm_object_paging_begin(object); 11430 pager = object->pager; 11431 vm_object_unlock(object); 11432 11433 kr = memory_object_data_request( 11434 pager, 11435 offset + object->paging_offset, 11436 0, /* just poke the pager */ 11437 VM_PROT_READ, 11438 NULL); 11439 11440 vm_object_lock(object); 11441 vm_object_paging_end(object); 11442 11443 if (kr == KERN_SUCCESS) { 11444 /* the pager has that page */ 11445 extended->pages_swapped_out++; 11446 if (object != caller_object) 11447 vm_object_unlock(object); 11448 return; 11449 } 11450 } 11451 } 11452 11453 if (shadow) { 11454 vm_object_lock(shadow); 11455 11456 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) 11457 ref_count--; 11458 11459 if (++depth > extended->shadow_depth) 11460 extended->shadow_depth = depth; 11461 11462 if (ref_count > max_refcnt) 11463 max_refcnt = ref_count; 11464 11465 if(object != caller_object) 11466 vm_object_unlock(object); 11467 11468 offset = offset + object->vo_shadow_offset; 11469 object = shadow; 11470 shadow = object->shadow; 11471 continue; 11472 } 11473 if(object != caller_object) 11474 vm_object_unlock(object); 11475 break; 11476 } 11477} 11478 11479static int 11480vm_map_region_count_obj_refs( 11481 vm_map_entry_t entry, 11482 vm_object_t object) 11483{ 11484 register int ref_count; 11485 register vm_object_t chk_obj; 11486 register vm_object_t tmp_obj; 11487 11488 if (entry->object.vm_object == 0) 11489 return(0); 11490 11491 if (entry->is_sub_map) 11492 return(0); 11493 else { 11494 ref_count = 0; 11495 11496 chk_obj = entry->object.vm_object; 11497 vm_object_lock(chk_obj); 11498 11499 while (chk_obj) { 11500 if (chk_obj == object) 11501 ref_count++; 11502 tmp_obj = chk_obj->shadow; 11503 if (tmp_obj) 11504 vm_object_lock(tmp_obj); 11505 vm_object_unlock(chk_obj); 11506 11507 chk_obj = tmp_obj; 11508 } 11509 } 11510 return(ref_count); 11511} 11512 11513 11514/* 11515 * Routine: vm_map_simplify 11516 * 11517 * Description: 11518 * Attempt to simplify the map representation in 11519 * the vicinity of the given starting address. 11520 * Note: 11521 * This routine is intended primarily to keep the 11522 * kernel maps more compact -- they generally don't 11523 * benefit from the "expand a map entry" technology 11524 * at allocation time because the adjacent entry 11525 * is often wired down. 11526 */ 11527void 11528vm_map_simplify_entry( 11529 vm_map_t map, 11530 vm_map_entry_t this_entry) 11531{ 11532 vm_map_entry_t prev_entry; 11533 11534 counter(c_vm_map_simplify_entry_called++); 11535 11536 prev_entry = this_entry->vme_prev; 11537 11538 if ((this_entry != vm_map_to_entry(map)) && 11539 (prev_entry != vm_map_to_entry(map)) && 11540 11541 (prev_entry->vme_end == this_entry->vme_start) && 11542 11543 (prev_entry->is_sub_map == this_entry->is_sub_map) && 11544 (prev_entry->object.vm_object == this_entry->object.vm_object) && 11545 ((prev_entry->offset + (prev_entry->vme_end - 11546 prev_entry->vme_start)) 11547 == this_entry->offset) && 11548 11549 (prev_entry->behavior == this_entry->behavior) && 11550 (prev_entry->needs_copy == this_entry->needs_copy) && 11551 (prev_entry->protection == this_entry->protection) && 11552 (prev_entry->max_protection == this_entry->max_protection) && 11553 (prev_entry->inheritance == this_entry->inheritance) && 11554 (prev_entry->use_pmap == this_entry->use_pmap) && 11555 (prev_entry->alias == this_entry->alias) && 11556 (prev_entry->no_cache == this_entry->no_cache) && 11557 (prev_entry->permanent == this_entry->permanent) && 11558 (prev_entry->map_aligned == this_entry->map_aligned) && 11559 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) && 11560 (prev_entry->used_for_jit == this_entry->used_for_jit) && 11561 /* from_reserved_zone: OK if that field doesn't match */ 11562 (prev_entry->iokit_acct == this_entry->iokit_acct) && 11563 11564 (prev_entry->wired_count == this_entry->wired_count) && 11565 (prev_entry->user_wired_count == this_entry->user_wired_count) && 11566 11567 (prev_entry->in_transition == FALSE) && 11568 (this_entry->in_transition == FALSE) && 11569 (prev_entry->needs_wakeup == FALSE) && 11570 (this_entry->needs_wakeup == FALSE) && 11571 (prev_entry->is_shared == FALSE) && 11572 (this_entry->is_shared == FALSE) && 11573 (prev_entry->superpage_size == FALSE) && 11574 (this_entry->superpage_size == FALSE) 11575 ) { 11576 vm_map_store_entry_unlink(map, prev_entry); 11577 assert(prev_entry->vme_start < this_entry->vme_end); 11578 if (prev_entry->map_aligned) 11579 assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start, 11580 VM_MAP_PAGE_MASK(map))); 11581 this_entry->vme_start = prev_entry->vme_start; 11582 this_entry->offset = prev_entry->offset; 11583 if (prev_entry->is_sub_map) { 11584 vm_map_deallocate(prev_entry->object.sub_map); 11585 } else { 11586 vm_object_deallocate(prev_entry->object.vm_object); 11587 } 11588 vm_map_entry_dispose(map, prev_entry); 11589 SAVE_HINT_MAP_WRITE(map, this_entry); 11590 counter(c_vm_map_simplified++); 11591 } 11592} 11593 11594void 11595vm_map_simplify( 11596 vm_map_t map, 11597 vm_map_offset_t start) 11598{ 11599 vm_map_entry_t this_entry; 11600 11601 vm_map_lock(map); 11602 if (vm_map_lookup_entry(map, start, &this_entry)) { 11603 vm_map_simplify_entry(map, this_entry); 11604 vm_map_simplify_entry(map, this_entry->vme_next); 11605 } 11606 counter(c_vm_map_simplify_called++); 11607 vm_map_unlock(map); 11608} 11609 11610static void 11611vm_map_simplify_range( 11612 vm_map_t map, 11613 vm_map_offset_t start, 11614 vm_map_offset_t end) 11615{ 11616 vm_map_entry_t entry; 11617 11618 /* 11619 * The map should be locked (for "write") by the caller. 11620 */ 11621 11622 if (start >= end) { 11623 /* invalid address range */ 11624 return; 11625 } 11626 11627 start = vm_map_trunc_page(start, 11628 VM_MAP_PAGE_MASK(map)); 11629 end = vm_map_round_page(end, 11630 VM_MAP_PAGE_MASK(map)); 11631 11632 if (!vm_map_lookup_entry(map, start, &entry)) { 11633 /* "start" is not mapped and "entry" ends before "start" */ 11634 if (entry == vm_map_to_entry(map)) { 11635 /* start with first entry in the map */ 11636 entry = vm_map_first_entry(map); 11637 } else { 11638 /* start with next entry */ 11639 entry = entry->vme_next; 11640 } 11641 } 11642 11643 while (entry != vm_map_to_entry(map) && 11644 entry->vme_start <= end) { 11645 /* try and coalesce "entry" with its previous entry */ 11646 vm_map_simplify_entry(map, entry); 11647 entry = entry->vme_next; 11648 } 11649} 11650 11651 11652/* 11653 * Routine: vm_map_machine_attribute 11654 * Purpose: 11655 * Provide machine-specific attributes to mappings, 11656 * such as cachability etc. for machines that provide 11657 * them. NUMA architectures and machines with big/strange 11658 * caches will use this. 11659 * Note: 11660 * Responsibilities for locking and checking are handled here, 11661 * everything else in the pmap module. If any non-volatile 11662 * information must be kept, the pmap module should handle 11663 * it itself. [This assumes that attributes do not 11664 * need to be inherited, which seems ok to me] 11665 */ 11666kern_return_t 11667vm_map_machine_attribute( 11668 vm_map_t map, 11669 vm_map_offset_t start, 11670 vm_map_offset_t end, 11671 vm_machine_attribute_t attribute, 11672 vm_machine_attribute_val_t* value) /* IN/OUT */ 11673{ 11674 kern_return_t ret; 11675 vm_map_size_t sync_size; 11676 vm_map_entry_t entry; 11677 11678 if (start < vm_map_min(map) || end > vm_map_max(map)) 11679 return KERN_INVALID_ADDRESS; 11680 11681 /* Figure how much memory we need to flush (in page increments) */ 11682 sync_size = end - start; 11683 11684 vm_map_lock(map); 11685 11686 if (attribute != MATTR_CACHE) { 11687 /* If we don't have to find physical addresses, we */ 11688 /* don't have to do an explicit traversal here. */ 11689 ret = pmap_attribute(map->pmap, start, end-start, 11690 attribute, value); 11691 vm_map_unlock(map); 11692 return ret; 11693 } 11694 11695 ret = KERN_SUCCESS; /* Assume it all worked */ 11696 11697 while(sync_size) { 11698 if (vm_map_lookup_entry(map, start, &entry)) { 11699 vm_map_size_t sub_size; 11700 if((entry->vme_end - start) > sync_size) { 11701 sub_size = sync_size; 11702 sync_size = 0; 11703 } else { 11704 sub_size = entry->vme_end - start; 11705 sync_size -= sub_size; 11706 } 11707 if(entry->is_sub_map) { 11708 vm_map_offset_t sub_start; 11709 vm_map_offset_t sub_end; 11710 11711 sub_start = (start - entry->vme_start) 11712 + entry->offset; 11713 sub_end = sub_start + sub_size; 11714 vm_map_machine_attribute( 11715 entry->object.sub_map, 11716 sub_start, 11717 sub_end, 11718 attribute, value); 11719 } else { 11720 if(entry->object.vm_object) { 11721 vm_page_t m; 11722 vm_object_t object; 11723 vm_object_t base_object; 11724 vm_object_t last_object; 11725 vm_object_offset_t offset; 11726 vm_object_offset_t base_offset; 11727 vm_map_size_t range; 11728 range = sub_size; 11729 offset = (start - entry->vme_start) 11730 + entry->offset; 11731 base_offset = offset; 11732 object = entry->object.vm_object; 11733 base_object = object; 11734 last_object = NULL; 11735 11736 vm_object_lock(object); 11737 11738 while (range) { 11739 m = vm_page_lookup( 11740 object, offset); 11741 11742 if (m && !m->fictitious) { 11743 ret = 11744 pmap_attribute_cache_sync( 11745 m->phys_page, 11746 PAGE_SIZE, 11747 attribute, value); 11748 11749 } else if (object->shadow) { 11750 offset = offset + object->vo_shadow_offset; 11751 last_object = object; 11752 object = object->shadow; 11753 vm_object_lock(last_object->shadow); 11754 vm_object_unlock(last_object); 11755 continue; 11756 } 11757 range -= PAGE_SIZE; 11758 11759 if (base_object != object) { 11760 vm_object_unlock(object); 11761 vm_object_lock(base_object); 11762 object = base_object; 11763 } 11764 /* Bump to the next page */ 11765 base_offset += PAGE_SIZE; 11766 offset = base_offset; 11767 } 11768 vm_object_unlock(object); 11769 } 11770 } 11771 start += sub_size; 11772 } else { 11773 vm_map_unlock(map); 11774 return KERN_FAILURE; 11775 } 11776 11777 } 11778 11779 vm_map_unlock(map); 11780 11781 return ret; 11782} 11783 11784/* 11785 * vm_map_behavior_set: 11786 * 11787 * Sets the paging reference behavior of the specified address 11788 * range in the target map. Paging reference behavior affects 11789 * how pagein operations resulting from faults on the map will be 11790 * clustered. 11791 */ 11792kern_return_t 11793vm_map_behavior_set( 11794 vm_map_t map, 11795 vm_map_offset_t start, 11796 vm_map_offset_t end, 11797 vm_behavior_t new_behavior) 11798{ 11799 register vm_map_entry_t entry; 11800 vm_map_entry_t temp_entry; 11801 11802 XPR(XPR_VM_MAP, 11803 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d", 11804 map, start, end, new_behavior, 0); 11805 11806 if (start > end || 11807 start < vm_map_min(map) || 11808 end > vm_map_max(map)) { 11809 return KERN_NO_SPACE; 11810 } 11811 11812 switch (new_behavior) { 11813 11814 /* 11815 * This first block of behaviors all set a persistent state on the specified 11816 * memory range. All we have to do here is to record the desired behavior 11817 * in the vm_map_entry_t's. 11818 */ 11819 11820 case VM_BEHAVIOR_DEFAULT: 11821 case VM_BEHAVIOR_RANDOM: 11822 case VM_BEHAVIOR_SEQUENTIAL: 11823 case VM_BEHAVIOR_RSEQNTL: 11824 case VM_BEHAVIOR_ZERO_WIRED_PAGES: 11825 vm_map_lock(map); 11826 11827 /* 11828 * The entire address range must be valid for the map. 11829 * Note that vm_map_range_check() does a 11830 * vm_map_lookup_entry() internally and returns the 11831 * entry containing the start of the address range if 11832 * the entire range is valid. 11833 */ 11834 if (vm_map_range_check(map, start, end, &temp_entry)) { 11835 entry = temp_entry; 11836 vm_map_clip_start(map, entry, start); 11837 } 11838 else { 11839 vm_map_unlock(map); 11840 return(KERN_INVALID_ADDRESS); 11841 } 11842 11843 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 11844 vm_map_clip_end(map, entry, end); 11845 if (entry->is_sub_map) { 11846 assert(!entry->use_pmap); 11847 } 11848 11849 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) { 11850 entry->zero_wired_pages = TRUE; 11851 } else { 11852 entry->behavior = new_behavior; 11853 } 11854 entry = entry->vme_next; 11855 } 11856 11857 vm_map_unlock(map); 11858 break; 11859 11860 /* 11861 * The rest of these are different from the above in that they cause 11862 * an immediate action to take place as opposed to setting a behavior that 11863 * affects future actions. 11864 */ 11865 11866 case VM_BEHAVIOR_WILLNEED: 11867 return vm_map_willneed(map, start, end); 11868 11869 case VM_BEHAVIOR_DONTNEED: 11870 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS); 11871 11872 case VM_BEHAVIOR_FREE: 11873 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS); 11874 11875 case VM_BEHAVIOR_REUSABLE: 11876 return vm_map_reusable_pages(map, start, end); 11877 11878 case VM_BEHAVIOR_REUSE: 11879 return vm_map_reuse_pages(map, start, end); 11880 11881 case VM_BEHAVIOR_CAN_REUSE: 11882 return vm_map_can_reuse(map, start, end); 11883 11884 default: 11885 return(KERN_INVALID_ARGUMENT); 11886 } 11887 11888 return(KERN_SUCCESS); 11889} 11890 11891 11892/* 11893 * Internals for madvise(MADV_WILLNEED) system call. 11894 * 11895 * The present implementation is to do a read-ahead if the mapping corresponds 11896 * to a mapped regular file. If it's an anonymous mapping, then we do nothing 11897 * and basically ignore the "advice" (which we are always free to do). 11898 */ 11899 11900 11901static kern_return_t 11902vm_map_willneed( 11903 vm_map_t map, 11904 vm_map_offset_t start, 11905 vm_map_offset_t end 11906) 11907{ 11908 vm_map_entry_t entry; 11909 vm_object_t object; 11910 memory_object_t pager; 11911 struct vm_object_fault_info fault_info; 11912 kern_return_t kr; 11913 vm_object_size_t len; 11914 vm_object_offset_t offset; 11915 11916 /* 11917 * Fill in static values in fault_info. Several fields get ignored by the code 11918 * we call, but we'll fill them in anyway since uninitialized fields are bad 11919 * when it comes to future backwards compatibility. 11920 */ 11921 11922 fault_info.interruptible = THREAD_UNINT; /* ignored value */ 11923 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; 11924 fault_info.no_cache = FALSE; /* ignored value */ 11925 fault_info.stealth = TRUE; 11926 fault_info.io_sync = FALSE; 11927 fault_info.cs_bypass = FALSE; 11928 fault_info.mark_zf_absent = FALSE; 11929 fault_info.batch_pmap_op = FALSE; 11930 11931 /* 11932 * The MADV_WILLNEED operation doesn't require any changes to the 11933 * vm_map_entry_t's, so the read lock is sufficient. 11934 */ 11935 11936 vm_map_lock_read(map); 11937 11938 /* 11939 * The madvise semantics require that the address range be fully 11940 * allocated with no holes. Otherwise, we're required to return 11941 * an error. 11942 */ 11943 11944 if (! vm_map_range_check(map, start, end, &entry)) { 11945 vm_map_unlock_read(map); 11946 return KERN_INVALID_ADDRESS; 11947 } 11948 11949 /* 11950 * Examine each vm_map_entry_t in the range. 11951 */ 11952 for (; entry != vm_map_to_entry(map) && start < end; ) { 11953 11954 /* 11955 * The first time through, the start address could be anywhere 11956 * within the vm_map_entry we found. So adjust the offset to 11957 * correspond. After that, the offset will always be zero to 11958 * correspond to the beginning of the current vm_map_entry. 11959 */ 11960 offset = (start - entry->vme_start) + entry->offset; 11961 11962 /* 11963 * Set the length so we don't go beyond the end of the 11964 * map_entry or beyond the end of the range we were given. 11965 * This range could span also multiple map entries all of which 11966 * map different files, so make sure we only do the right amount 11967 * of I/O for each object. Note that it's possible for there 11968 * to be multiple map entries all referring to the same object 11969 * but with different page permissions, but it's not worth 11970 * trying to optimize that case. 11971 */ 11972 len = MIN(entry->vme_end - start, end - start); 11973 11974 if ((vm_size_t) len != len) { 11975 /* 32-bit overflow */ 11976 len = (vm_size_t) (0 - PAGE_SIZE); 11977 } 11978 fault_info.cluster_size = (vm_size_t) len; 11979 fault_info.lo_offset = offset; 11980 fault_info.hi_offset = offset + len; 11981 fault_info.user_tag = entry->alias; 11982 fault_info.pmap_options = 0; 11983 if (entry->iokit_acct || 11984 (!entry->is_sub_map && !entry->use_pmap)) { 11985 fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT; 11986 } 11987 11988 /* 11989 * If there's no read permission to this mapping, then just 11990 * skip it. 11991 */ 11992 if ((entry->protection & VM_PROT_READ) == 0) { 11993 entry = entry->vme_next; 11994 start = entry->vme_start; 11995 continue; 11996 } 11997 11998 /* 11999 * Find the file object backing this map entry. If there is 12000 * none, then we simply ignore the "will need" advice for this 12001 * entry and go on to the next one. 12002 */ 12003 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) { 12004 entry = entry->vme_next; 12005 start = entry->vme_start; 12006 continue; 12007 } 12008 12009 /* 12010 * The data_request() could take a long time, so let's 12011 * release the map lock to avoid blocking other threads. 12012 */ 12013 vm_map_unlock_read(map); 12014 12015 vm_object_paging_begin(object); 12016 pager = object->pager; 12017 vm_object_unlock(object); 12018 12019 /* 12020 * Get the data from the object asynchronously. 12021 * 12022 * Note that memory_object_data_request() places limits on the 12023 * amount of I/O it will do. Regardless of the len we 12024 * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it 12025 * silently truncates the len to that size. This isn't 12026 * necessarily bad since madvise shouldn't really be used to 12027 * page in unlimited amounts of data. Other Unix variants 12028 * limit the willneed case as well. If this turns out to be an 12029 * issue for developers, then we can always adjust the policy 12030 * here and still be backwards compatible since this is all 12031 * just "advice". 12032 */ 12033 kr = memory_object_data_request( 12034 pager, 12035 offset + object->paging_offset, 12036 0, /* ignored */ 12037 VM_PROT_READ, 12038 (memory_object_fault_info_t)&fault_info); 12039 12040 vm_object_lock(object); 12041 vm_object_paging_end(object); 12042 vm_object_unlock(object); 12043 12044 /* 12045 * If we couldn't do the I/O for some reason, just give up on 12046 * the madvise. We still return success to the user since 12047 * madvise isn't supposed to fail when the advice can't be 12048 * taken. 12049 */ 12050 if (kr != KERN_SUCCESS) { 12051 return KERN_SUCCESS; 12052 } 12053 12054 start += len; 12055 if (start >= end) { 12056 /* done */ 12057 return KERN_SUCCESS; 12058 } 12059 12060 /* look up next entry */ 12061 vm_map_lock_read(map); 12062 if (! vm_map_lookup_entry(map, start, &entry)) { 12063 /* 12064 * There's a new hole in the address range. 12065 */ 12066 vm_map_unlock_read(map); 12067 return KERN_INVALID_ADDRESS; 12068 } 12069 } 12070 12071 vm_map_unlock_read(map); 12072 return KERN_SUCCESS; 12073} 12074 12075static boolean_t 12076vm_map_entry_is_reusable( 12077 vm_map_entry_t entry) 12078{ 12079 vm_object_t object; 12080 12081 switch (entry->alias) { 12082 case VM_MEMORY_MALLOC: 12083 case VM_MEMORY_MALLOC_SMALL: 12084 case VM_MEMORY_MALLOC_LARGE: 12085 case VM_MEMORY_REALLOC: 12086 case VM_MEMORY_MALLOC_TINY: 12087 case VM_MEMORY_MALLOC_LARGE_REUSABLE: 12088 case VM_MEMORY_MALLOC_LARGE_REUSED: 12089 /* 12090 * This is a malloc() memory region: check if it's still 12091 * in its original state and can be re-used for more 12092 * malloc() allocations. 12093 */ 12094 break; 12095 default: 12096 /* 12097 * Not a malloc() memory region: let the caller decide if 12098 * it's re-usable. 12099 */ 12100 return TRUE; 12101 } 12102 12103 if (entry->is_shared || 12104 entry->is_sub_map || 12105 entry->in_transition || 12106 entry->protection != VM_PROT_DEFAULT || 12107 entry->max_protection != VM_PROT_ALL || 12108 entry->inheritance != VM_INHERIT_DEFAULT || 12109 entry->no_cache || 12110 entry->permanent || 12111 entry->superpage_size != FALSE || 12112 entry->zero_wired_pages || 12113 entry->wired_count != 0 || 12114 entry->user_wired_count != 0) { 12115 return FALSE; 12116 } 12117 12118 object = entry->object.vm_object; 12119 if (object == VM_OBJECT_NULL) { 12120 return TRUE; 12121 } 12122 if ( 12123#if 0 12124 /* 12125 * Let's proceed even if the VM object is potentially 12126 * shared. 12127 * We check for this later when processing the actual 12128 * VM pages, so the contents will be safe if shared. 12129 * 12130 * But we can still mark this memory region as "reusable" to 12131 * acknowledge that the caller did let us know that the memory 12132 * could be re-used and should not be penalized for holding 12133 * on to it. This allows its "resident size" to not include 12134 * the reusable range. 12135 */ 12136 object->ref_count == 1 && 12137#endif 12138 object->wired_page_count == 0 && 12139 object->copy == VM_OBJECT_NULL && 12140 object->shadow == VM_OBJECT_NULL && 12141 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && 12142 object->internal && 12143 !object->true_share && 12144 object->wimg_bits == VM_WIMG_USE_DEFAULT && 12145 !object->code_signed) { 12146 return TRUE; 12147 } 12148 return FALSE; 12149 12150 12151} 12152 12153static kern_return_t 12154vm_map_reuse_pages( 12155 vm_map_t map, 12156 vm_map_offset_t start, 12157 vm_map_offset_t end) 12158{ 12159 vm_map_entry_t entry; 12160 vm_object_t object; 12161 vm_object_offset_t start_offset, end_offset; 12162 12163 /* 12164 * The MADV_REUSE operation doesn't require any changes to the 12165 * vm_map_entry_t's, so the read lock is sufficient. 12166 */ 12167 12168 vm_map_lock_read(map); 12169 12170 /* 12171 * The madvise semantics require that the address range be fully 12172 * allocated with no holes. Otherwise, we're required to return 12173 * an error. 12174 */ 12175 12176 if (!vm_map_range_check(map, start, end, &entry)) { 12177 vm_map_unlock_read(map); 12178 vm_page_stats_reusable.reuse_pages_failure++; 12179 return KERN_INVALID_ADDRESS; 12180 } 12181 12182 /* 12183 * Examine each vm_map_entry_t in the range. 12184 */ 12185 for (; entry != vm_map_to_entry(map) && entry->vme_start < end; 12186 entry = entry->vme_next) { 12187 /* 12188 * Sanity check on the VM map entry. 12189 */ 12190 if (! vm_map_entry_is_reusable(entry)) { 12191 vm_map_unlock_read(map); 12192 vm_page_stats_reusable.reuse_pages_failure++; 12193 return KERN_INVALID_ADDRESS; 12194 } 12195 12196 /* 12197 * The first time through, the start address could be anywhere 12198 * within the vm_map_entry we found. So adjust the offset to 12199 * correspond. 12200 */ 12201 if (entry->vme_start < start) { 12202 start_offset = start - entry->vme_start; 12203 } else { 12204 start_offset = 0; 12205 } 12206 end_offset = MIN(end, entry->vme_end) - entry->vme_start; 12207 start_offset += entry->offset; 12208 end_offset += entry->offset; 12209 12210 object = entry->object.vm_object; 12211 if (object != VM_OBJECT_NULL) { 12212 vm_object_lock(object); 12213 vm_object_reuse_pages(object, start_offset, end_offset, 12214 TRUE); 12215 vm_object_unlock(object); 12216 } 12217 12218 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) { 12219 /* 12220 * XXX 12221 * We do not hold the VM map exclusively here. 12222 * The "alias" field is not that critical, so it's 12223 * safe to update it here, as long as it is the only 12224 * one that can be modified while holding the VM map 12225 * "shared". 12226 */ 12227 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED; 12228 } 12229 } 12230 12231 vm_map_unlock_read(map); 12232 vm_page_stats_reusable.reuse_pages_success++; 12233 return KERN_SUCCESS; 12234} 12235 12236 12237static kern_return_t 12238vm_map_reusable_pages( 12239 vm_map_t map, 12240 vm_map_offset_t start, 12241 vm_map_offset_t end) 12242{ 12243 vm_map_entry_t entry; 12244 vm_object_t object; 12245 vm_object_offset_t start_offset, end_offset; 12246 12247 /* 12248 * The MADV_REUSABLE operation doesn't require any changes to the 12249 * vm_map_entry_t's, so the read lock is sufficient. 12250 */ 12251 12252 vm_map_lock_read(map); 12253 12254 /* 12255 * The madvise semantics require that the address range be fully 12256 * allocated with no holes. Otherwise, we're required to return 12257 * an error. 12258 */ 12259 12260 if (!vm_map_range_check(map, start, end, &entry)) { 12261 vm_map_unlock_read(map); 12262 vm_page_stats_reusable.reusable_pages_failure++; 12263 return KERN_INVALID_ADDRESS; 12264 } 12265 12266 /* 12267 * Examine each vm_map_entry_t in the range. 12268 */ 12269 for (; entry != vm_map_to_entry(map) && entry->vme_start < end; 12270 entry = entry->vme_next) { 12271 int kill_pages = 0; 12272 12273 /* 12274 * Sanity check on the VM map entry. 12275 */ 12276 if (! vm_map_entry_is_reusable(entry)) { 12277 vm_map_unlock_read(map); 12278 vm_page_stats_reusable.reusable_pages_failure++; 12279 return KERN_INVALID_ADDRESS; 12280 } 12281 12282 /* 12283 * The first time through, the start address could be anywhere 12284 * within the vm_map_entry we found. So adjust the offset to 12285 * correspond. 12286 */ 12287 if (entry->vme_start < start) { 12288 start_offset = start - entry->vme_start; 12289 } else { 12290 start_offset = 0; 12291 } 12292 end_offset = MIN(end, entry->vme_end) - entry->vme_start; 12293 start_offset += entry->offset; 12294 end_offset += entry->offset; 12295 12296 object = entry->object.vm_object; 12297 if (object == VM_OBJECT_NULL) 12298 continue; 12299 12300 12301 vm_object_lock(object); 12302 if (object->ref_count == 1 && 12303 !object->shadow && 12304 /* 12305 * "iokit_acct" entries are billed for their virtual size 12306 * (rather than for their resident pages only), so they 12307 * wouldn't benefit from making pages reusable, and it 12308 * would be hard to keep track of pages that are both 12309 * "iokit_acct" and "reusable" in the pmap stats and ledgers. 12310 */ 12311 !(entry->iokit_acct || 12312 (!entry->is_sub_map && !entry->use_pmap))) 12313 kill_pages = 1; 12314 else 12315 kill_pages = -1; 12316 if (kill_pages != -1) { 12317 vm_object_deactivate_pages(object, 12318 start_offset, 12319 end_offset - start_offset, 12320 kill_pages, 12321 TRUE /*reusable_pages*/); 12322 } else { 12323 vm_page_stats_reusable.reusable_pages_shared++; 12324 } 12325 vm_object_unlock(object); 12326 12327 if (entry->alias == VM_MEMORY_MALLOC_LARGE || 12328 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) { 12329 /* 12330 * XXX 12331 * We do not hold the VM map exclusively here. 12332 * The "alias" field is not that critical, so it's 12333 * safe to update it here, as long as it is the only 12334 * one that can be modified while holding the VM map 12335 * "shared". 12336 */ 12337 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE; 12338 } 12339 } 12340 12341 vm_map_unlock_read(map); 12342 vm_page_stats_reusable.reusable_pages_success++; 12343 return KERN_SUCCESS; 12344} 12345 12346 12347static kern_return_t 12348vm_map_can_reuse( 12349 vm_map_t map, 12350 vm_map_offset_t start, 12351 vm_map_offset_t end) 12352{ 12353 vm_map_entry_t entry; 12354 12355 /* 12356 * The MADV_REUSABLE operation doesn't require any changes to the 12357 * vm_map_entry_t's, so the read lock is sufficient. 12358 */ 12359 12360 vm_map_lock_read(map); 12361 12362 /* 12363 * The madvise semantics require that the address range be fully 12364 * allocated with no holes. Otherwise, we're required to return 12365 * an error. 12366 */ 12367 12368 if (!vm_map_range_check(map, start, end, &entry)) { 12369 vm_map_unlock_read(map); 12370 vm_page_stats_reusable.can_reuse_failure++; 12371 return KERN_INVALID_ADDRESS; 12372 } 12373 12374 /* 12375 * Examine each vm_map_entry_t in the range. 12376 */ 12377 for (; entry != vm_map_to_entry(map) && entry->vme_start < end; 12378 entry = entry->vme_next) { 12379 /* 12380 * Sanity check on the VM map entry. 12381 */ 12382 if (! vm_map_entry_is_reusable(entry)) { 12383 vm_map_unlock_read(map); 12384 vm_page_stats_reusable.can_reuse_failure++; 12385 return KERN_INVALID_ADDRESS; 12386 } 12387 } 12388 12389 vm_map_unlock_read(map); 12390 vm_page_stats_reusable.can_reuse_success++; 12391 return KERN_SUCCESS; 12392} 12393 12394 12395/* 12396 * Routine: vm_map_entry_insert 12397 * 12398 * Descritpion: This routine inserts a new vm_entry in a locked map. 12399 */ 12400vm_map_entry_t 12401vm_map_entry_insert( 12402 vm_map_t map, 12403 vm_map_entry_t insp_entry, 12404 vm_map_offset_t start, 12405 vm_map_offset_t end, 12406 vm_object_t object, 12407 vm_object_offset_t offset, 12408 boolean_t needs_copy, 12409 boolean_t is_shared, 12410 boolean_t in_transition, 12411 vm_prot_t cur_protection, 12412 vm_prot_t max_protection, 12413 vm_behavior_t behavior, 12414 vm_inherit_t inheritance, 12415 unsigned wired_count, 12416 boolean_t no_cache, 12417 boolean_t permanent, 12418 unsigned int superpage_size, 12419 boolean_t clear_map_aligned, 12420 boolean_t is_submap) 12421{ 12422 vm_map_entry_t new_entry; 12423 12424 assert(insp_entry != (vm_map_entry_t)0); 12425 12426 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable); 12427 12428 if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) { 12429 new_entry->map_aligned = TRUE; 12430 } else { 12431 new_entry->map_aligned = FALSE; 12432 } 12433 if (clear_map_aligned && 12434 (! VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) || 12435 ! VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) { 12436 new_entry->map_aligned = FALSE; 12437 } 12438 12439 new_entry->vme_start = start; 12440 new_entry->vme_end = end; 12441 assert(page_aligned(new_entry->vme_start)); 12442 assert(page_aligned(new_entry->vme_end)); 12443 if (new_entry->map_aligned) { 12444 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start, 12445 VM_MAP_PAGE_MASK(map))); 12446 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end, 12447 VM_MAP_PAGE_MASK(map))); 12448 } 12449 assert(new_entry->vme_start < new_entry->vme_end); 12450 12451 new_entry->object.vm_object = object; 12452 new_entry->offset = offset; 12453 new_entry->is_shared = is_shared; 12454 new_entry->is_sub_map = is_submap; 12455 new_entry->needs_copy = needs_copy; 12456 new_entry->in_transition = in_transition; 12457 new_entry->needs_wakeup = FALSE; 12458 new_entry->inheritance = inheritance; 12459 new_entry->protection = cur_protection; 12460 new_entry->max_protection = max_protection; 12461 new_entry->behavior = behavior; 12462 new_entry->wired_count = wired_count; 12463 new_entry->user_wired_count = 0; 12464 if (is_submap) { 12465 /* 12466 * submap: "use_pmap" means "nested". 12467 * default: false. 12468 */ 12469 new_entry->use_pmap = FALSE; 12470 } else { 12471 /* 12472 * object: "use_pmap" means "use pmap accounting" for footprint. 12473 * default: true. 12474 */ 12475 new_entry->use_pmap = TRUE; 12476 } 12477 new_entry->alias = 0; 12478 new_entry->zero_wired_pages = FALSE; 12479 new_entry->no_cache = no_cache; 12480 new_entry->permanent = permanent; 12481 if (superpage_size) 12482 new_entry->superpage_size = TRUE; 12483 else 12484 new_entry->superpage_size = FALSE; 12485 new_entry->used_for_jit = FALSE; 12486 new_entry->iokit_acct = FALSE; 12487 12488 /* 12489 * Insert the new entry into the list. 12490 */ 12491 12492 vm_map_store_entry_link(map, insp_entry, new_entry); 12493 map->size += end - start; 12494 12495 /* 12496 * Update the free space hint and the lookup hint. 12497 */ 12498 12499 SAVE_HINT_MAP_WRITE(map, new_entry); 12500 return new_entry; 12501} 12502 12503/* 12504 * Routine: vm_map_remap_extract 12505 * 12506 * Descritpion: This routine returns a vm_entry list from a map. 12507 */ 12508static kern_return_t 12509vm_map_remap_extract( 12510 vm_map_t map, 12511 vm_map_offset_t addr, 12512 vm_map_size_t size, 12513 boolean_t copy, 12514 struct vm_map_header *map_header, 12515 vm_prot_t *cur_protection, 12516 vm_prot_t *max_protection, 12517 /* What, no behavior? */ 12518 vm_inherit_t inheritance, 12519 boolean_t pageable) 12520{ 12521 kern_return_t result; 12522 vm_map_size_t mapped_size; 12523 vm_map_size_t tmp_size; 12524 vm_map_entry_t src_entry; /* result of last map lookup */ 12525 vm_map_entry_t new_entry; 12526 vm_object_offset_t offset; 12527 vm_map_offset_t map_address; 12528 vm_map_offset_t src_start; /* start of entry to map */ 12529 vm_map_offset_t src_end; /* end of region to be mapped */ 12530 vm_object_t object; 12531 vm_map_version_t version; 12532 boolean_t src_needs_copy; 12533 boolean_t new_entry_needs_copy; 12534 12535 assert(map != VM_MAP_NULL); 12536 assert(size != 0); 12537 assert(size == vm_map_round_page(size, PAGE_MASK)); 12538 assert(inheritance == VM_INHERIT_NONE || 12539 inheritance == VM_INHERIT_COPY || 12540 inheritance == VM_INHERIT_SHARE); 12541 12542 /* 12543 * Compute start and end of region. 12544 */ 12545 src_start = vm_map_trunc_page(addr, PAGE_MASK); 12546 src_end = vm_map_round_page(src_start + size, PAGE_MASK); 12547 12548 12549 /* 12550 * Initialize map_header. 12551 */ 12552 map_header->links.next = (struct vm_map_entry *)&map_header->links; 12553 map_header->links.prev = (struct vm_map_entry *)&map_header->links; 12554 map_header->nentries = 0; 12555 map_header->entries_pageable = pageable; 12556 map_header->page_shift = PAGE_SHIFT; 12557 12558 vm_map_store_init( map_header ); 12559 12560 *cur_protection = VM_PROT_ALL; 12561 *max_protection = VM_PROT_ALL; 12562 12563 map_address = 0; 12564 mapped_size = 0; 12565 result = KERN_SUCCESS; 12566 12567 /* 12568 * The specified source virtual space might correspond to 12569 * multiple map entries, need to loop on them. 12570 */ 12571 vm_map_lock(map); 12572 while (mapped_size != size) { 12573 vm_map_size_t entry_size; 12574 12575 /* 12576 * Find the beginning of the region. 12577 */ 12578 if (! vm_map_lookup_entry(map, src_start, &src_entry)) { 12579 result = KERN_INVALID_ADDRESS; 12580 break; 12581 } 12582 12583 if (src_start < src_entry->vme_start || 12584 (mapped_size && src_start != src_entry->vme_start)) { 12585 result = KERN_INVALID_ADDRESS; 12586 break; 12587 } 12588 12589 tmp_size = size - mapped_size; 12590 if (src_end > src_entry->vme_end) 12591 tmp_size -= (src_end - src_entry->vme_end); 12592 12593 entry_size = (vm_map_size_t)(src_entry->vme_end - 12594 src_entry->vme_start); 12595 12596 if(src_entry->is_sub_map) { 12597 vm_map_reference(src_entry->object.sub_map); 12598 object = VM_OBJECT_NULL; 12599 } else { 12600 object = src_entry->object.vm_object; 12601 if (src_entry->iokit_acct) { 12602 /* 12603 * This entry uses "IOKit accounting". 12604 */ 12605 } else if (object != VM_OBJECT_NULL && 12606 object->purgable != VM_PURGABLE_DENY) { 12607 /* 12608 * Purgeable objects have their own accounting: 12609 * no pmap accounting for them. 12610 */ 12611 assert(!src_entry->use_pmap); 12612 } else { 12613 /* 12614 * Not IOKit or purgeable: 12615 * must be accounted by pmap stats. 12616 */ 12617 assert(src_entry->use_pmap); 12618 } 12619 12620 if (object == VM_OBJECT_NULL) { 12621 object = vm_object_allocate(entry_size); 12622 src_entry->offset = 0; 12623 src_entry->object.vm_object = object; 12624 } else if (object->copy_strategy != 12625 MEMORY_OBJECT_COPY_SYMMETRIC) { 12626 /* 12627 * We are already using an asymmetric 12628 * copy, and therefore we already have 12629 * the right object. 12630 */ 12631 assert(!src_entry->needs_copy); 12632 } else if (src_entry->needs_copy || object->shadowed || 12633 (object->internal && !object->true_share && 12634 !src_entry->is_shared && 12635 object->vo_size > entry_size)) { 12636 12637 vm_object_shadow(&src_entry->object.vm_object, 12638 &src_entry->offset, 12639 entry_size); 12640 12641 if (!src_entry->needs_copy && 12642 (src_entry->protection & VM_PROT_WRITE)) { 12643 vm_prot_t prot; 12644 12645 prot = src_entry->protection & ~VM_PROT_WRITE; 12646 12647 if (override_nx(map, src_entry->alias) && prot) 12648 prot |= VM_PROT_EXECUTE; 12649 12650 if(map->mapped_in_other_pmaps) { 12651 vm_object_pmap_protect( 12652 src_entry->object.vm_object, 12653 src_entry->offset, 12654 entry_size, 12655 PMAP_NULL, 12656 src_entry->vme_start, 12657 prot); 12658 } else { 12659 pmap_protect(vm_map_pmap(map), 12660 src_entry->vme_start, 12661 src_entry->vme_end, 12662 prot); 12663 } 12664 } 12665 12666 object = src_entry->object.vm_object; 12667 src_entry->needs_copy = FALSE; 12668 } 12669 12670 12671 vm_object_lock(object); 12672 vm_object_reference_locked(object); /* object ref. for new entry */ 12673 if (object->copy_strategy == 12674 MEMORY_OBJECT_COPY_SYMMETRIC) { 12675 object->copy_strategy = 12676 MEMORY_OBJECT_COPY_DELAY; 12677 } 12678 vm_object_unlock(object); 12679 } 12680 12681 offset = src_entry->offset + (src_start - src_entry->vme_start); 12682 12683 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); 12684 vm_map_entry_copy(new_entry, src_entry); 12685 if (new_entry->is_sub_map) { 12686 /* clr address space specifics */ 12687 new_entry->use_pmap = FALSE; 12688 } 12689 12690 new_entry->map_aligned = FALSE; 12691 12692 new_entry->vme_start = map_address; 12693 new_entry->vme_end = map_address + tmp_size; 12694 assert(new_entry->vme_start < new_entry->vme_end); 12695 new_entry->inheritance = inheritance; 12696 new_entry->offset = offset; 12697 12698 /* 12699 * The new region has to be copied now if required. 12700 */ 12701 RestartCopy: 12702 if (!copy) { 12703 /* 12704 * Cannot allow an entry describing a JIT 12705 * region to be shared across address spaces. 12706 */ 12707 if (src_entry->used_for_jit == TRUE) { 12708 result = KERN_INVALID_ARGUMENT; 12709 break; 12710 } 12711 src_entry->is_shared = TRUE; 12712 new_entry->is_shared = TRUE; 12713 if (!(new_entry->is_sub_map)) 12714 new_entry->needs_copy = FALSE; 12715 12716 } else if (src_entry->is_sub_map) { 12717 /* make this a COW sub_map if not already */ 12718 new_entry->needs_copy = TRUE; 12719 object = VM_OBJECT_NULL; 12720 } else if (src_entry->wired_count == 0 && 12721 vm_object_copy_quickly(&new_entry->object.vm_object, 12722 new_entry->offset, 12723 (new_entry->vme_end - 12724 new_entry->vme_start), 12725 &src_needs_copy, 12726 &new_entry_needs_copy)) { 12727 12728 new_entry->needs_copy = new_entry_needs_copy; 12729 new_entry->is_shared = FALSE; 12730 12731 /* 12732 * Handle copy_on_write semantics. 12733 */ 12734 if (src_needs_copy && !src_entry->needs_copy) { 12735 vm_prot_t prot; 12736 12737 prot = src_entry->protection & ~VM_PROT_WRITE; 12738 12739 if (override_nx(map, src_entry->alias) && prot) 12740 prot |= VM_PROT_EXECUTE; 12741 12742 vm_object_pmap_protect(object, 12743 offset, 12744 entry_size, 12745 ((src_entry->is_shared 12746 || map->mapped_in_other_pmaps) ? 12747 PMAP_NULL : map->pmap), 12748 src_entry->vme_start, 12749 prot); 12750 12751 src_entry->needs_copy = TRUE; 12752 } 12753 /* 12754 * Throw away the old object reference of the new entry. 12755 */ 12756 vm_object_deallocate(object); 12757 12758 } else { 12759 new_entry->is_shared = FALSE; 12760 12761 /* 12762 * The map can be safely unlocked since we 12763 * already hold a reference on the object. 12764 * 12765 * Record the timestamp of the map for later 12766 * verification, and unlock the map. 12767 */ 12768 version.main_timestamp = map->timestamp; 12769 vm_map_unlock(map); /* Increments timestamp once! */ 12770 12771 /* 12772 * Perform the copy. 12773 */ 12774 if (src_entry->wired_count > 0) { 12775 vm_object_lock(object); 12776 result = vm_object_copy_slowly( 12777 object, 12778 offset, 12779 entry_size, 12780 THREAD_UNINT, 12781 &new_entry->object.vm_object); 12782 12783 new_entry->offset = 0; 12784 new_entry->needs_copy = FALSE; 12785 } else { 12786 result = vm_object_copy_strategically( 12787 object, 12788 offset, 12789 entry_size, 12790 &new_entry->object.vm_object, 12791 &new_entry->offset, 12792 &new_entry_needs_copy); 12793 12794 new_entry->needs_copy = new_entry_needs_copy; 12795 } 12796 12797 /* 12798 * Throw away the old object reference of the new entry. 12799 */ 12800 vm_object_deallocate(object); 12801 12802 if (result != KERN_SUCCESS && 12803 result != KERN_MEMORY_RESTART_COPY) { 12804 _vm_map_entry_dispose(map_header, new_entry); 12805 break; 12806 } 12807 12808 /* 12809 * Verify that the map has not substantially 12810 * changed while the copy was being made. 12811 */ 12812 12813 vm_map_lock(map); 12814 if (version.main_timestamp + 1 != map->timestamp) { 12815 /* 12816 * Simple version comparison failed. 12817 * 12818 * Retry the lookup and verify that the 12819 * same object/offset are still present. 12820 */ 12821 vm_object_deallocate(new_entry-> 12822 object.vm_object); 12823 _vm_map_entry_dispose(map_header, new_entry); 12824 if (result == KERN_MEMORY_RESTART_COPY) 12825 result = KERN_SUCCESS; 12826 continue; 12827 } 12828 12829 if (result == KERN_MEMORY_RESTART_COPY) { 12830 vm_object_reference(object); 12831 goto RestartCopy; 12832 } 12833 } 12834 12835 _vm_map_store_entry_link(map_header, 12836 map_header->links.prev, new_entry); 12837 12838 /*Protections for submap mapping are irrelevant here*/ 12839 if( !src_entry->is_sub_map ) { 12840 *cur_protection &= src_entry->protection; 12841 *max_protection &= src_entry->max_protection; 12842 } 12843 map_address += tmp_size; 12844 mapped_size += tmp_size; 12845 src_start += tmp_size; 12846 12847 } /* end while */ 12848 12849 vm_map_unlock(map); 12850 if (result != KERN_SUCCESS) { 12851 /* 12852 * Free all allocated elements. 12853 */ 12854 for (src_entry = map_header->links.next; 12855 src_entry != (struct vm_map_entry *)&map_header->links; 12856 src_entry = new_entry) { 12857 new_entry = src_entry->vme_next; 12858 _vm_map_store_entry_unlink(map_header, src_entry); 12859 if (src_entry->is_sub_map) { 12860 vm_map_deallocate(src_entry->object.sub_map); 12861 } else { 12862 vm_object_deallocate(src_entry->object.vm_object); 12863 } 12864 _vm_map_entry_dispose(map_header, src_entry); 12865 } 12866 } 12867 return result; 12868} 12869 12870/* 12871 * Routine: vm_remap 12872 * 12873 * Map portion of a task's address space. 12874 * Mapped region must not overlap more than 12875 * one vm memory object. Protections and 12876 * inheritance attributes remain the same 12877 * as in the original task and are out parameters. 12878 * Source and Target task can be identical 12879 * Other attributes are identical as for vm_map() 12880 */ 12881kern_return_t 12882vm_map_remap( 12883 vm_map_t target_map, 12884 vm_map_address_t *address, 12885 vm_map_size_t size, 12886 vm_map_offset_t mask, 12887 int flags, 12888 vm_map_t src_map, 12889 vm_map_offset_t memory_address, 12890 boolean_t copy, 12891 vm_prot_t *cur_protection, 12892 vm_prot_t *max_protection, 12893 vm_inherit_t inheritance) 12894{ 12895 kern_return_t result; 12896 vm_map_entry_t entry; 12897 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL; 12898 vm_map_entry_t new_entry; 12899 struct vm_map_header map_header; 12900 vm_map_offset_t offset_in_mapping; 12901 12902 if (target_map == VM_MAP_NULL) 12903 return KERN_INVALID_ARGUMENT; 12904 12905 switch (inheritance) { 12906 case VM_INHERIT_NONE: 12907 case VM_INHERIT_COPY: 12908 case VM_INHERIT_SHARE: 12909 if (size != 0 && src_map != VM_MAP_NULL) 12910 break; 12911 /*FALL THRU*/ 12912 default: 12913 return KERN_INVALID_ARGUMENT; 12914 } 12915 12916 /* 12917 * If the user is requesting that we return the address of the 12918 * first byte of the data (rather than the base of the page), 12919 * then we use different rounding semantics: specifically, 12920 * we assume that (memory_address, size) describes a region 12921 * all of whose pages we must cover, rather than a base to be truncated 12922 * down and a size to be added to that base. So we figure out 12923 * the highest page that the requested region includes and make 12924 * sure that the size will cover it. 12925 * 12926 * The key example we're worried about it is of the form: 12927 * 12928 * memory_address = 0x1ff0, size = 0x20 12929 * 12930 * With the old semantics, we round down the memory_address to 0x1000 12931 * and round up the size to 0x1000, resulting in our covering *only* 12932 * page 0x1000. With the new semantics, we'd realize that the region covers 12933 * 0x1ff0-0x2010, and compute a size of 0x2000. Thus, we cover both page 12934 * 0x1000 and page 0x2000 in the region we remap. 12935 */ 12936 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { 12937 offset_in_mapping = memory_address - vm_map_trunc_page(memory_address, PAGE_MASK); 12938 size = vm_map_round_page(memory_address + size - vm_map_trunc_page(memory_address, PAGE_MASK), PAGE_MASK); 12939 } else { 12940 size = vm_map_round_page(size, PAGE_MASK); 12941 } 12942 12943 result = vm_map_remap_extract(src_map, memory_address, 12944 size, copy, &map_header, 12945 cur_protection, 12946 max_protection, 12947 inheritance, 12948 target_map->hdr.entries_pageable); 12949 12950 if (result != KERN_SUCCESS) { 12951 return result; 12952 } 12953 12954 /* 12955 * Allocate/check a range of free virtual address 12956 * space for the target 12957 */ 12958 *address = vm_map_trunc_page(*address, 12959 VM_MAP_PAGE_MASK(target_map)); 12960 vm_map_lock(target_map); 12961 result = vm_map_remap_range_allocate(target_map, address, size, 12962 mask, flags, &insp_entry); 12963 12964 for (entry = map_header.links.next; 12965 entry != (struct vm_map_entry *)&map_header.links; 12966 entry = new_entry) { 12967 new_entry = entry->vme_next; 12968 _vm_map_store_entry_unlink(&map_header, entry); 12969 if (result == KERN_SUCCESS) { 12970 entry->vme_start += *address; 12971 entry->vme_end += *address; 12972 assert(!entry->map_aligned); 12973 vm_map_store_entry_link(target_map, insp_entry, entry); 12974 insp_entry = entry; 12975 } else { 12976 if (!entry->is_sub_map) { 12977 vm_object_deallocate(entry->object.vm_object); 12978 } else { 12979 vm_map_deallocate(entry->object.sub_map); 12980 } 12981 _vm_map_entry_dispose(&map_header, entry); 12982 } 12983 } 12984 12985 if( target_map->disable_vmentry_reuse == TRUE) { 12986 if( target_map->highest_entry_end < insp_entry->vme_end ){ 12987 target_map->highest_entry_end = insp_entry->vme_end; 12988 } 12989 } 12990 12991 if (result == KERN_SUCCESS) { 12992 target_map->size += size; 12993 SAVE_HINT_MAP_WRITE(target_map, insp_entry); 12994 } 12995 vm_map_unlock(target_map); 12996 12997 if (result == KERN_SUCCESS && target_map->wiring_required) 12998 result = vm_map_wire(target_map, *address, 12999 *address + size, *cur_protection, TRUE); 13000 13001 /* 13002 * If requested, return the address of the data pointed to by the 13003 * request, rather than the base of the resulting page. 13004 */ 13005 if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) { 13006 *address += offset_in_mapping; 13007 } 13008 13009 return result; 13010} 13011 13012/* 13013 * Routine: vm_map_remap_range_allocate 13014 * 13015 * Description: 13016 * Allocate a range in the specified virtual address map. 13017 * returns the address and the map entry just before the allocated 13018 * range 13019 * 13020 * Map must be locked. 13021 */ 13022 13023static kern_return_t 13024vm_map_remap_range_allocate( 13025 vm_map_t map, 13026 vm_map_address_t *address, /* IN/OUT */ 13027 vm_map_size_t size, 13028 vm_map_offset_t mask, 13029 int flags, 13030 vm_map_entry_t *map_entry) /* OUT */ 13031{ 13032 vm_map_entry_t entry; 13033 vm_map_offset_t start; 13034 vm_map_offset_t end; 13035 kern_return_t kr; 13036 13037StartAgain: ; 13038 13039 start = *address; 13040 13041 if (flags & VM_FLAGS_ANYWHERE) 13042 { 13043 /* 13044 * Calculate the first possible address. 13045 */ 13046 13047 if (start < map->min_offset) 13048 start = map->min_offset; 13049 if (start > map->max_offset) 13050 return(KERN_NO_SPACE); 13051 13052 /* 13053 * Look for the first possible address; 13054 * if there's already something at this 13055 * address, we have to start after it. 13056 */ 13057 13058 if( map->disable_vmentry_reuse == TRUE) { 13059 VM_MAP_HIGHEST_ENTRY(map, entry, start); 13060 } else { 13061 assert(first_free_is_valid(map)); 13062 if (start == map->min_offset) { 13063 if ((entry = map->first_free) != vm_map_to_entry(map)) 13064 start = entry->vme_end; 13065 } else { 13066 vm_map_entry_t tmp_entry; 13067 if (vm_map_lookup_entry(map, start, &tmp_entry)) 13068 start = tmp_entry->vme_end; 13069 entry = tmp_entry; 13070 } 13071 start = vm_map_round_page(start, 13072 VM_MAP_PAGE_MASK(map)); 13073 } 13074 13075 /* 13076 * In any case, the "entry" always precedes 13077 * the proposed new region throughout the 13078 * loop: 13079 */ 13080 13081 while (TRUE) { 13082 register vm_map_entry_t next; 13083 13084 /* 13085 * Find the end of the proposed new region. 13086 * Be sure we didn't go beyond the end, or 13087 * wrap around the address. 13088 */ 13089 13090 end = ((start + mask) & ~mask); 13091 end = vm_map_round_page(end, 13092 VM_MAP_PAGE_MASK(map)); 13093 if (end < start) 13094 return(KERN_NO_SPACE); 13095 start = end; 13096 end += size; 13097 13098 if ((end > map->max_offset) || (end < start)) { 13099 if (map->wait_for_space) { 13100 if (size <= (map->max_offset - 13101 map->min_offset)) { 13102 assert_wait((event_t) map, THREAD_INTERRUPTIBLE); 13103 vm_map_unlock(map); 13104 thread_block(THREAD_CONTINUE_NULL); 13105 vm_map_lock(map); 13106 goto StartAgain; 13107 } 13108 } 13109 13110 return(KERN_NO_SPACE); 13111 } 13112 13113 /* 13114 * If there are no more entries, we must win. 13115 */ 13116 13117 next = entry->vme_next; 13118 if (next == vm_map_to_entry(map)) 13119 break; 13120 13121 /* 13122 * If there is another entry, it must be 13123 * after the end of the potential new region. 13124 */ 13125 13126 if (next->vme_start >= end) 13127 break; 13128 13129 /* 13130 * Didn't fit -- move to the next entry. 13131 */ 13132 13133 entry = next; 13134 start = entry->vme_end; 13135 } 13136 *address = start; 13137 } else { 13138 vm_map_entry_t temp_entry; 13139 13140 /* 13141 * Verify that: 13142 * the address doesn't itself violate 13143 * the mask requirement. 13144 */ 13145 13146 if ((start & mask) != 0) 13147 return(KERN_NO_SPACE); 13148 13149 13150 /* 13151 * ... the address is within bounds 13152 */ 13153 13154 end = start + size; 13155 13156 if ((start < map->min_offset) || 13157 (end > map->max_offset) || 13158 (start >= end)) { 13159 return(KERN_INVALID_ADDRESS); 13160 } 13161 13162 /* 13163 * If we're asked to overwrite whatever was mapped in that 13164 * range, first deallocate that range. 13165 */ 13166 if (flags & VM_FLAGS_OVERWRITE) { 13167 vm_map_t zap_map; 13168 13169 /* 13170 * We use a "zap_map" to avoid having to unlock 13171 * the "map" in vm_map_delete(), which would compromise 13172 * the atomicity of the "deallocate" and then "remap" 13173 * combination. 13174 */ 13175 zap_map = vm_map_create(PMAP_NULL, 13176 start, 13177 end, 13178 map->hdr.entries_pageable); 13179 if (zap_map == VM_MAP_NULL) { 13180 return KERN_RESOURCE_SHORTAGE; 13181 } 13182 vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map)); 13183 13184 kr = vm_map_delete(map, start, end, 13185 (VM_MAP_REMOVE_SAVE_ENTRIES | 13186 VM_MAP_REMOVE_NO_MAP_ALIGN), 13187 zap_map); 13188 if (kr == KERN_SUCCESS) { 13189 vm_map_destroy(zap_map, 13190 VM_MAP_REMOVE_NO_PMAP_CLEANUP); 13191 zap_map = VM_MAP_NULL; 13192 } 13193 } 13194 13195 /* 13196 * ... the starting address isn't allocated 13197 */ 13198 13199 if (vm_map_lookup_entry(map, start, &temp_entry)) 13200 return(KERN_NO_SPACE); 13201 13202 entry = temp_entry; 13203 13204 /* 13205 * ... the next region doesn't overlap the 13206 * end point. 13207 */ 13208 13209 if ((entry->vme_next != vm_map_to_entry(map)) && 13210 (entry->vme_next->vme_start < end)) 13211 return(KERN_NO_SPACE); 13212 } 13213 *map_entry = entry; 13214 return(KERN_SUCCESS); 13215} 13216 13217/* 13218 * vm_map_switch: 13219 * 13220 * Set the address map for the current thread to the specified map 13221 */ 13222 13223vm_map_t 13224vm_map_switch( 13225 vm_map_t map) 13226{ 13227 int mycpu; 13228 thread_t thread = current_thread(); 13229 vm_map_t oldmap = thread->map; 13230 13231 mp_disable_preemption(); 13232 mycpu = cpu_number(); 13233 13234 /* 13235 * Deactivate the current map and activate the requested map 13236 */ 13237 PMAP_SWITCH_USER(thread, map, mycpu); 13238 13239 mp_enable_preemption(); 13240 return(oldmap); 13241} 13242 13243 13244/* 13245 * Routine: vm_map_write_user 13246 * 13247 * Description: 13248 * Copy out data from a kernel space into space in the 13249 * destination map. The space must already exist in the 13250 * destination map. 13251 * NOTE: This routine should only be called by threads 13252 * which can block on a page fault. i.e. kernel mode user 13253 * threads. 13254 * 13255 */ 13256kern_return_t 13257vm_map_write_user( 13258 vm_map_t map, 13259 void *src_p, 13260 vm_map_address_t dst_addr, 13261 vm_size_t size) 13262{ 13263 kern_return_t kr = KERN_SUCCESS; 13264 13265 if(current_map() == map) { 13266 if (copyout(src_p, dst_addr, size)) { 13267 kr = KERN_INVALID_ADDRESS; 13268 } 13269 } else { 13270 vm_map_t oldmap; 13271 13272 /* take on the identity of the target map while doing */ 13273 /* the transfer */ 13274 13275 vm_map_reference(map); 13276 oldmap = vm_map_switch(map); 13277 if (copyout(src_p, dst_addr, size)) { 13278 kr = KERN_INVALID_ADDRESS; 13279 } 13280 vm_map_switch(oldmap); 13281 vm_map_deallocate(map); 13282 } 13283 return kr; 13284} 13285 13286/* 13287 * Routine: vm_map_read_user 13288 * 13289 * Description: 13290 * Copy in data from a user space source map into the 13291 * kernel map. The space must already exist in the 13292 * kernel map. 13293 * NOTE: This routine should only be called by threads 13294 * which can block on a page fault. i.e. kernel mode user 13295 * threads. 13296 * 13297 */ 13298kern_return_t 13299vm_map_read_user( 13300 vm_map_t map, 13301 vm_map_address_t src_addr, 13302 void *dst_p, 13303 vm_size_t size) 13304{ 13305 kern_return_t kr = KERN_SUCCESS; 13306 13307 if(current_map() == map) { 13308 if (copyin(src_addr, dst_p, size)) { 13309 kr = KERN_INVALID_ADDRESS; 13310 } 13311 } else { 13312 vm_map_t oldmap; 13313 13314 /* take on the identity of the target map while doing */ 13315 /* the transfer */ 13316 13317 vm_map_reference(map); 13318 oldmap = vm_map_switch(map); 13319 if (copyin(src_addr, dst_p, size)) { 13320 kr = KERN_INVALID_ADDRESS; 13321 } 13322 vm_map_switch(oldmap); 13323 vm_map_deallocate(map); 13324 } 13325 return kr; 13326} 13327 13328 13329/* 13330 * vm_map_check_protection: 13331 * 13332 * Assert that the target map allows the specified 13333 * privilege on the entire address region given. 13334 * The entire region must be allocated. 13335 */ 13336boolean_t 13337vm_map_check_protection(vm_map_t map, vm_map_offset_t start, 13338 vm_map_offset_t end, vm_prot_t protection) 13339{ 13340 vm_map_entry_t entry; 13341 vm_map_entry_t tmp_entry; 13342 13343 vm_map_lock(map); 13344 13345 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) 13346 { 13347 vm_map_unlock(map); 13348 return (FALSE); 13349 } 13350 13351 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 13352 vm_map_unlock(map); 13353 return(FALSE); 13354 } 13355 13356 entry = tmp_entry; 13357 13358 while (start < end) { 13359 if (entry == vm_map_to_entry(map)) { 13360 vm_map_unlock(map); 13361 return(FALSE); 13362 } 13363 13364 /* 13365 * No holes allowed! 13366 */ 13367 13368 if (start < entry->vme_start) { 13369 vm_map_unlock(map); 13370 return(FALSE); 13371 } 13372 13373 /* 13374 * Check protection associated with entry. 13375 */ 13376 13377 if ((entry->protection & protection) != protection) { 13378 vm_map_unlock(map); 13379 return(FALSE); 13380 } 13381 13382 /* go to next entry */ 13383 13384 start = entry->vme_end; 13385 entry = entry->vme_next; 13386 } 13387 vm_map_unlock(map); 13388 return(TRUE); 13389} 13390 13391kern_return_t 13392vm_map_purgable_control( 13393 vm_map_t map, 13394 vm_map_offset_t address, 13395 vm_purgable_t control, 13396 int *state) 13397{ 13398 vm_map_entry_t entry; 13399 vm_object_t object; 13400 kern_return_t kr; 13401 boolean_t was_nonvolatile; 13402 13403 /* 13404 * Vet all the input parameters and current type and state of the 13405 * underlaying object. Return with an error if anything is amiss. 13406 */ 13407 if (map == VM_MAP_NULL) 13408 return(KERN_INVALID_ARGUMENT); 13409 13410 if (control != VM_PURGABLE_SET_STATE && 13411 control != VM_PURGABLE_GET_STATE && 13412 control != VM_PURGABLE_PURGE_ALL) 13413 return(KERN_INVALID_ARGUMENT); 13414 13415 if (control == VM_PURGABLE_PURGE_ALL) { 13416 vm_purgeable_object_purge_all(); 13417 return KERN_SUCCESS; 13418 } 13419 13420 if (control == VM_PURGABLE_SET_STATE && 13421 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) || 13422 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) 13423 return(KERN_INVALID_ARGUMENT); 13424 13425 vm_map_lock_read(map); 13426 13427 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) { 13428 13429 /* 13430 * Must pass a valid non-submap address. 13431 */ 13432 vm_map_unlock_read(map); 13433 return(KERN_INVALID_ADDRESS); 13434 } 13435 13436 if ((entry->protection & VM_PROT_WRITE) == 0) { 13437 /* 13438 * Can't apply purgable controls to something you can't write. 13439 */ 13440 vm_map_unlock_read(map); 13441 return(KERN_PROTECTION_FAILURE); 13442 } 13443 13444 object = entry->object.vm_object; 13445 if (object == VM_OBJECT_NULL || 13446 object->purgable == VM_PURGABLE_DENY) { 13447 /* 13448 * Object must already be present and be purgeable. 13449 */ 13450 vm_map_unlock_read(map); 13451 return KERN_INVALID_ARGUMENT; 13452 } 13453 13454 vm_object_lock(object); 13455 13456#if 00 13457 if (entry->offset != 0 || 13458 entry->vme_end - entry->vme_start != object->vo_size) { 13459 /* 13460 * Can only apply purgable controls to the whole (existing) 13461 * object at once. 13462 */ 13463 vm_map_unlock_read(map); 13464 vm_object_unlock(object); 13465 return KERN_INVALID_ARGUMENT; 13466 } 13467#endif 13468 13469 assert(!entry->is_sub_map); 13470 assert(!entry->use_pmap); /* purgeable has its own accounting */ 13471 13472 vm_map_unlock_read(map); 13473 13474 was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE); 13475 13476 kr = vm_object_purgable_control(object, control, state); 13477 13478 if (was_nonvolatile && 13479 object->purgable != VM_PURGABLE_NONVOLATILE && 13480 map->pmap == kernel_pmap) { 13481#if DEBUG 13482 object->vo_purgeable_volatilizer = kernel_task; 13483#endif /* DEBUG */ 13484 } 13485 13486 vm_object_unlock(object); 13487 13488 return kr; 13489} 13490 13491kern_return_t 13492vm_map_page_query_internal( 13493 vm_map_t target_map, 13494 vm_map_offset_t offset, 13495 int *disposition, 13496 int *ref_count) 13497{ 13498 kern_return_t kr; 13499 vm_page_info_basic_data_t info; 13500 mach_msg_type_number_t count; 13501 13502 count = VM_PAGE_INFO_BASIC_COUNT; 13503 kr = vm_map_page_info(target_map, 13504 offset, 13505 VM_PAGE_INFO_BASIC, 13506 (vm_page_info_t) &info, 13507 &count); 13508 if (kr == KERN_SUCCESS) { 13509 *disposition = info.disposition; 13510 *ref_count = info.ref_count; 13511 } else { 13512 *disposition = 0; 13513 *ref_count = 0; 13514 } 13515 13516 return kr; 13517} 13518 13519kern_return_t 13520vm_map_page_info( 13521 vm_map_t map, 13522 vm_map_offset_t offset, 13523 vm_page_info_flavor_t flavor, 13524 vm_page_info_t info, 13525 mach_msg_type_number_t *count) 13526{ 13527 vm_map_entry_t map_entry; 13528 vm_object_t object; 13529 vm_page_t m; 13530 kern_return_t kr; 13531 kern_return_t retval = KERN_SUCCESS; 13532 boolean_t top_object; 13533 int disposition; 13534 int ref_count; 13535 vm_page_info_basic_t basic_info; 13536 int depth; 13537 vm_map_offset_t offset_in_page; 13538 13539 switch (flavor) { 13540 case VM_PAGE_INFO_BASIC: 13541 if (*count != VM_PAGE_INFO_BASIC_COUNT) { 13542 /* 13543 * The "vm_page_info_basic_data" structure was not 13544 * properly padded, so allow the size to be off by 13545 * one to maintain backwards binary compatibility... 13546 */ 13547 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) 13548 return KERN_INVALID_ARGUMENT; 13549 } 13550 break; 13551 default: 13552 return KERN_INVALID_ARGUMENT; 13553 } 13554 13555 disposition = 0; 13556 ref_count = 0; 13557 top_object = TRUE; 13558 depth = 0; 13559 13560 retval = KERN_SUCCESS; 13561 offset_in_page = offset & PAGE_MASK; 13562 offset = vm_map_trunc_page(offset, PAGE_MASK); 13563 13564 vm_map_lock_read(map); 13565 13566 /* 13567 * First, find the map entry covering "offset", going down 13568 * submaps if necessary. 13569 */ 13570 for (;;) { 13571 if (!vm_map_lookup_entry(map, offset, &map_entry)) { 13572 vm_map_unlock_read(map); 13573 return KERN_INVALID_ADDRESS; 13574 } 13575 /* compute offset from this map entry's start */ 13576 offset -= map_entry->vme_start; 13577 /* compute offset into this map entry's object (or submap) */ 13578 offset += map_entry->offset; 13579 13580 if (map_entry->is_sub_map) { 13581 vm_map_t sub_map; 13582 13583 sub_map = map_entry->object.sub_map; 13584 vm_map_lock_read(sub_map); 13585 vm_map_unlock_read(map); 13586 13587 map = sub_map; 13588 13589 ref_count = MAX(ref_count, map->ref_count); 13590 continue; 13591 } 13592 break; 13593 } 13594 13595 object = map_entry->object.vm_object; 13596 if (object == VM_OBJECT_NULL) { 13597 /* no object -> no page */ 13598 vm_map_unlock_read(map); 13599 goto done; 13600 } 13601 13602 vm_object_lock(object); 13603 vm_map_unlock_read(map); 13604 13605 /* 13606 * Go down the VM object shadow chain until we find the page 13607 * we're looking for. 13608 */ 13609 for (;;) { 13610 ref_count = MAX(ref_count, object->ref_count); 13611 13612 m = vm_page_lookup(object, offset); 13613 13614 if (m != VM_PAGE_NULL) { 13615 disposition |= VM_PAGE_QUERY_PAGE_PRESENT; 13616 break; 13617 } else { 13618#if MACH_PAGEMAP 13619 if (object->existence_map) { 13620 if (vm_external_state_get(object->existence_map, 13621 offset) == 13622 VM_EXTERNAL_STATE_EXISTS) { 13623 /* 13624 * this page has been paged out 13625 */ 13626 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; 13627 break; 13628 } 13629 } else 13630#endif 13631 if (object->internal && 13632 object->alive && 13633 !object->terminating && 13634 object->pager_ready) { 13635 13636 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 13637 if (VM_COMPRESSOR_PAGER_STATE_GET( 13638 object, 13639 offset) 13640 == VM_EXTERNAL_STATE_EXISTS) { 13641 /* the pager has that page */ 13642 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; 13643 break; 13644 } 13645 } else { 13646 memory_object_t pager; 13647 13648 vm_object_paging_begin(object); 13649 pager = object->pager; 13650 vm_object_unlock(object); 13651 13652 /* 13653 * Ask the default pager if 13654 * it has this page. 13655 */ 13656 kr = memory_object_data_request( 13657 pager, 13658 offset + object->paging_offset, 13659 0, /* just poke the pager */ 13660 VM_PROT_READ, 13661 NULL); 13662 13663 vm_object_lock(object); 13664 vm_object_paging_end(object); 13665 13666 if (kr == KERN_SUCCESS) { 13667 /* the default pager has it */ 13668 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; 13669 break; 13670 } 13671 } 13672 } 13673 13674 if (object->shadow != VM_OBJECT_NULL) { 13675 vm_object_t shadow; 13676 13677 offset += object->vo_shadow_offset; 13678 shadow = object->shadow; 13679 13680 vm_object_lock(shadow); 13681 vm_object_unlock(object); 13682 13683 object = shadow; 13684 top_object = FALSE; 13685 depth++; 13686 } else { 13687// if (!object->internal) 13688// break; 13689// retval = KERN_FAILURE; 13690// goto done_with_object; 13691 break; 13692 } 13693 } 13694 } 13695 /* The ref_count is not strictly accurate, it measures the number */ 13696 /* of entities holding a ref on the object, they may not be mapping */ 13697 /* the object or may not be mapping the section holding the */ 13698 /* target page but its still a ball park number and though an over- */ 13699 /* count, it picks up the copy-on-write cases */ 13700 13701 /* We could also get a picture of page sharing from pmap_attributes */ 13702 /* but this would under count as only faulted-in mappings would */ 13703 /* show up. */ 13704 13705 if (top_object == TRUE && object->shadow) 13706 disposition |= VM_PAGE_QUERY_PAGE_COPIED; 13707 13708 if (! object->internal) 13709 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL; 13710 13711 if (m == VM_PAGE_NULL) 13712 goto done_with_object; 13713 13714 if (m->fictitious) { 13715 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; 13716 goto done_with_object; 13717 } 13718 if (m->dirty || pmap_is_modified(m->phys_page)) 13719 disposition |= VM_PAGE_QUERY_PAGE_DIRTY; 13720 13721 if (m->reference || pmap_is_referenced(m->phys_page)) 13722 disposition |= VM_PAGE_QUERY_PAGE_REF; 13723 13724 if (m->speculative) 13725 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE; 13726 13727 if (m->cs_validated) 13728 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED; 13729 if (m->cs_tainted) 13730 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED; 13731 13732done_with_object: 13733 vm_object_unlock(object); 13734done: 13735 13736 switch (flavor) { 13737 case VM_PAGE_INFO_BASIC: 13738 basic_info = (vm_page_info_basic_t) info; 13739 basic_info->disposition = disposition; 13740 basic_info->ref_count = ref_count; 13741 basic_info->object_id = (vm_object_id_t) (uintptr_t) 13742 VM_KERNEL_ADDRPERM(object); 13743 basic_info->offset = 13744 (memory_object_offset_t) offset + offset_in_page; 13745 basic_info->depth = depth; 13746 break; 13747 } 13748 13749 return retval; 13750} 13751 13752/* 13753 * vm_map_msync 13754 * 13755 * Synchronises the memory range specified with its backing store 13756 * image by either flushing or cleaning the contents to the appropriate 13757 * memory manager engaging in a memory object synchronize dialog with 13758 * the manager. The client doesn't return until the manager issues 13759 * m_o_s_completed message. MIG Magically converts user task parameter 13760 * to the task's address map. 13761 * 13762 * interpretation of sync_flags 13763 * VM_SYNC_INVALIDATE - discard pages, only return precious 13764 * pages to manager. 13765 * 13766 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS) 13767 * - discard pages, write dirty or precious 13768 * pages back to memory manager. 13769 * 13770 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS 13771 * - write dirty or precious pages back to 13772 * the memory manager. 13773 * 13774 * VM_SYNC_CONTIGUOUS - does everything normally, but if there 13775 * is a hole in the region, and we would 13776 * have returned KERN_SUCCESS, return 13777 * KERN_INVALID_ADDRESS instead. 13778 * 13779 * NOTE 13780 * The memory object attributes have not yet been implemented, this 13781 * function will have to deal with the invalidate attribute 13782 * 13783 * RETURNS 13784 * KERN_INVALID_TASK Bad task parameter 13785 * KERN_INVALID_ARGUMENT both sync and async were specified. 13786 * KERN_SUCCESS The usual. 13787 * KERN_INVALID_ADDRESS There was a hole in the region. 13788 */ 13789 13790kern_return_t 13791vm_map_msync( 13792 vm_map_t map, 13793 vm_map_address_t address, 13794 vm_map_size_t size, 13795 vm_sync_t sync_flags) 13796{ 13797 msync_req_t msr; 13798 msync_req_t new_msr; 13799 queue_chain_t req_q; /* queue of requests for this msync */ 13800 vm_map_entry_t entry; 13801 vm_map_size_t amount_left; 13802 vm_object_offset_t offset; 13803 boolean_t do_sync_req; 13804 boolean_t had_hole = FALSE; 13805 memory_object_t pager; 13806 13807 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) && 13808 (sync_flags & VM_SYNC_SYNCHRONOUS)) 13809 return(KERN_INVALID_ARGUMENT); 13810 13811 /* 13812 * align address and size on page boundaries 13813 */ 13814 size = (vm_map_round_page(address + size, 13815 VM_MAP_PAGE_MASK(map)) - 13816 vm_map_trunc_page(address, 13817 VM_MAP_PAGE_MASK(map))); 13818 address = vm_map_trunc_page(address, 13819 VM_MAP_PAGE_MASK(map)); 13820 13821 if (map == VM_MAP_NULL) 13822 return(KERN_INVALID_TASK); 13823 13824 if (size == 0) 13825 return(KERN_SUCCESS); 13826 13827 queue_init(&req_q); 13828 amount_left = size; 13829 13830 while (amount_left > 0) { 13831 vm_object_size_t flush_size; 13832 vm_object_t object; 13833 13834 vm_map_lock(map); 13835 if (!vm_map_lookup_entry(map, 13836 vm_map_trunc_page( 13837 address, 13838 VM_MAP_PAGE_MASK(map)), 13839 &entry)) { 13840 13841 vm_map_size_t skip; 13842 13843 /* 13844 * hole in the address map. 13845 */ 13846 had_hole = TRUE; 13847 13848 /* 13849 * Check for empty map. 13850 */ 13851 if (entry == vm_map_to_entry(map) && 13852 entry->vme_next == entry) { 13853 vm_map_unlock(map); 13854 break; 13855 } 13856 /* 13857 * Check that we don't wrap and that 13858 * we have at least one real map entry. 13859 */ 13860 if ((map->hdr.nentries == 0) || 13861 (entry->vme_next->vme_start < address)) { 13862 vm_map_unlock(map); 13863 break; 13864 } 13865 /* 13866 * Move up to the next entry if needed 13867 */ 13868 skip = (entry->vme_next->vme_start - address); 13869 if (skip >= amount_left) 13870 amount_left = 0; 13871 else 13872 amount_left -= skip; 13873 address = entry->vme_next->vme_start; 13874 vm_map_unlock(map); 13875 continue; 13876 } 13877 13878 offset = address - entry->vme_start; 13879 13880 /* 13881 * do we have more to flush than is contained in this 13882 * entry ? 13883 */ 13884 if (amount_left + entry->vme_start + offset > entry->vme_end) { 13885 flush_size = entry->vme_end - 13886 (entry->vme_start + offset); 13887 } else { 13888 flush_size = amount_left; 13889 } 13890 amount_left -= flush_size; 13891 address += flush_size; 13892 13893 if (entry->is_sub_map == TRUE) { 13894 vm_map_t local_map; 13895 vm_map_offset_t local_offset; 13896 13897 local_map = entry->object.sub_map; 13898 local_offset = entry->offset; 13899 vm_map_unlock(map); 13900 if (vm_map_msync( 13901 local_map, 13902 local_offset, 13903 flush_size, 13904 sync_flags) == KERN_INVALID_ADDRESS) { 13905 had_hole = TRUE; 13906 } 13907 continue; 13908 } 13909 object = entry->object.vm_object; 13910 13911 /* 13912 * We can't sync this object if the object has not been 13913 * created yet 13914 */ 13915 if (object == VM_OBJECT_NULL) { 13916 vm_map_unlock(map); 13917 continue; 13918 } 13919 offset += entry->offset; 13920 13921 vm_object_lock(object); 13922 13923 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) { 13924 int kill_pages = 0; 13925 boolean_t reusable_pages = FALSE; 13926 13927 if (sync_flags & VM_SYNC_KILLPAGES) { 13928 if (object->ref_count == 1 && !object->shadow) 13929 kill_pages = 1; 13930 else 13931 kill_pages = -1; 13932 } 13933 if (kill_pages != -1) 13934 vm_object_deactivate_pages(object, offset, 13935 (vm_object_size_t)flush_size, kill_pages, reusable_pages); 13936 vm_object_unlock(object); 13937 vm_map_unlock(map); 13938 continue; 13939 } 13940 /* 13941 * We can't sync this object if there isn't a pager. 13942 * Don't bother to sync internal objects, since there can't 13943 * be any "permanent" storage for these objects anyway. 13944 */ 13945 if ((object->pager == MEMORY_OBJECT_NULL) || 13946 (object->internal) || (object->private)) { 13947 vm_object_unlock(object); 13948 vm_map_unlock(map); 13949 continue; 13950 } 13951 /* 13952 * keep reference on the object until syncing is done 13953 */ 13954 vm_object_reference_locked(object); 13955 vm_object_unlock(object); 13956 13957 vm_map_unlock(map); 13958 13959 do_sync_req = vm_object_sync(object, 13960 offset, 13961 flush_size, 13962 sync_flags & VM_SYNC_INVALIDATE, 13963 ((sync_flags & VM_SYNC_SYNCHRONOUS) || 13964 (sync_flags & VM_SYNC_ASYNCHRONOUS)), 13965 sync_flags & VM_SYNC_SYNCHRONOUS); 13966 /* 13967 * only send a m_o_s if we returned pages or if the entry 13968 * is writable (ie dirty pages may have already been sent back) 13969 */ 13970 if (!do_sync_req) { 13971 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) { 13972 /* 13973 * clear out the clustering and read-ahead hints 13974 */ 13975 vm_object_lock(object); 13976 13977 object->pages_created = 0; 13978 object->pages_used = 0; 13979 object->sequential = 0; 13980 object->last_alloc = 0; 13981 13982 vm_object_unlock(object); 13983 } 13984 vm_object_deallocate(object); 13985 continue; 13986 } 13987 msync_req_alloc(new_msr); 13988 13989 vm_object_lock(object); 13990 offset += object->paging_offset; 13991 13992 new_msr->offset = offset; 13993 new_msr->length = flush_size; 13994 new_msr->object = object; 13995 new_msr->flag = VM_MSYNC_SYNCHRONIZING; 13996 re_iterate: 13997 13998 /* 13999 * We can't sync this object if there isn't a pager. The 14000 * pager can disappear anytime we're not holding the object 14001 * lock. So this has to be checked anytime we goto re_iterate. 14002 */ 14003 14004 pager = object->pager; 14005 14006 if (pager == MEMORY_OBJECT_NULL) { 14007 vm_object_unlock(object); 14008 vm_object_deallocate(object); 14009 msync_req_free(new_msr); 14010 new_msr = NULL; 14011 continue; 14012 } 14013 14014 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) { 14015 /* 14016 * need to check for overlapping entry, if found, wait 14017 * on overlapping msr to be done, then reiterate 14018 */ 14019 msr_lock(msr); 14020 if (msr->flag == VM_MSYNC_SYNCHRONIZING && 14021 ((offset >= msr->offset && 14022 offset < (msr->offset + msr->length)) || 14023 (msr->offset >= offset && 14024 msr->offset < (offset + flush_size)))) 14025 { 14026 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE); 14027 msr_unlock(msr); 14028 vm_object_unlock(object); 14029 thread_block(THREAD_CONTINUE_NULL); 14030 vm_object_lock(object); 14031 goto re_iterate; 14032 } 14033 msr_unlock(msr); 14034 }/* queue_iterate */ 14035 14036 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q); 14037 14038 vm_object_paging_begin(object); 14039 vm_object_unlock(object); 14040 14041 queue_enter(&req_q, new_msr, msync_req_t, req_q); 14042 14043 (void) memory_object_synchronize( 14044 pager, 14045 offset, 14046 flush_size, 14047 sync_flags & ~VM_SYNC_CONTIGUOUS); 14048 14049 vm_object_lock(object); 14050 vm_object_paging_end(object); 14051 vm_object_unlock(object); 14052 }/* while */ 14053 14054 /* 14055 * wait for memory_object_sychronize_completed messages from pager(s) 14056 */ 14057 14058 while (!queue_empty(&req_q)) { 14059 msr = (msync_req_t)queue_first(&req_q); 14060 msr_lock(msr); 14061 while(msr->flag != VM_MSYNC_DONE) { 14062 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE); 14063 msr_unlock(msr); 14064 thread_block(THREAD_CONTINUE_NULL); 14065 msr_lock(msr); 14066 }/* while */ 14067 queue_remove(&req_q, msr, msync_req_t, req_q); 14068 msr_unlock(msr); 14069 vm_object_deallocate(msr->object); 14070 msync_req_free(msr); 14071 }/* queue_iterate */ 14072 14073 /* for proper msync() behaviour */ 14074 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) 14075 return(KERN_INVALID_ADDRESS); 14076 14077 return(KERN_SUCCESS); 14078}/* vm_msync */ 14079 14080/* 14081 * Routine: convert_port_entry_to_map 14082 * Purpose: 14083 * Convert from a port specifying an entry or a task 14084 * to a map. Doesn't consume the port ref; produces a map ref, 14085 * which may be null. Unlike convert_port_to_map, the 14086 * port may be task or a named entry backed. 14087 * Conditions: 14088 * Nothing locked. 14089 */ 14090 14091 14092vm_map_t 14093convert_port_entry_to_map( 14094 ipc_port_t port) 14095{ 14096 vm_map_t map; 14097 vm_named_entry_t named_entry; 14098 uint32_t try_failed_count = 0; 14099 14100 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { 14101 while(TRUE) { 14102 ip_lock(port); 14103 if(ip_active(port) && (ip_kotype(port) 14104 == IKOT_NAMED_ENTRY)) { 14105 named_entry = 14106 (vm_named_entry_t)port->ip_kobject; 14107 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { 14108 ip_unlock(port); 14109 14110 try_failed_count++; 14111 mutex_pause(try_failed_count); 14112 continue; 14113 } 14114 named_entry->ref_count++; 14115 lck_mtx_unlock(&(named_entry)->Lock); 14116 ip_unlock(port); 14117 if ((named_entry->is_sub_map) && 14118 (named_entry->protection 14119 & VM_PROT_WRITE)) { 14120 map = named_entry->backing.map; 14121 } else { 14122 mach_destroy_memory_entry(port); 14123 return VM_MAP_NULL; 14124 } 14125 vm_map_reference_swap(map); 14126 mach_destroy_memory_entry(port); 14127 break; 14128 } 14129 else 14130 return VM_MAP_NULL; 14131 } 14132 } 14133 else 14134 map = convert_port_to_map(port); 14135 14136 return map; 14137} 14138 14139/* 14140 * Routine: convert_port_entry_to_object 14141 * Purpose: 14142 * Convert from a port specifying a named entry to an 14143 * object. Doesn't consume the port ref; produces a map ref, 14144 * which may be null. 14145 * Conditions: 14146 * Nothing locked. 14147 */ 14148 14149 14150vm_object_t 14151convert_port_entry_to_object( 14152 ipc_port_t port) 14153{ 14154 vm_object_t object = VM_OBJECT_NULL; 14155 vm_named_entry_t named_entry; 14156 uint32_t try_failed_count = 0; 14157 14158 if (IP_VALID(port) && 14159 (ip_kotype(port) == IKOT_NAMED_ENTRY)) { 14160 try_again: 14161 ip_lock(port); 14162 if (ip_active(port) && 14163 (ip_kotype(port) == IKOT_NAMED_ENTRY)) { 14164 named_entry = (vm_named_entry_t)port->ip_kobject; 14165 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { 14166 ip_unlock(port); 14167 try_failed_count++; 14168 mutex_pause(try_failed_count); 14169 goto try_again; 14170 } 14171 named_entry->ref_count++; 14172 lck_mtx_unlock(&(named_entry)->Lock); 14173 ip_unlock(port); 14174 if (!(named_entry->is_sub_map) && 14175 !(named_entry->is_pager) && 14176 !(named_entry->is_copy) && 14177 (named_entry->protection & VM_PROT_WRITE)) { 14178 object = named_entry->backing.object; 14179 vm_object_reference(object); 14180 } 14181 mach_destroy_memory_entry(port); 14182 } 14183 } 14184 14185 return object; 14186} 14187 14188/* 14189 * Export routines to other components for the things we access locally through 14190 * macros. 14191 */ 14192#undef current_map 14193vm_map_t 14194current_map(void) 14195{ 14196 return (current_map_fast()); 14197} 14198 14199/* 14200 * vm_map_reference: 14201 * 14202 * Most code internal to the osfmk will go through a 14203 * macro defining this. This is always here for the 14204 * use of other kernel components. 14205 */ 14206#undef vm_map_reference 14207void 14208vm_map_reference( 14209 register vm_map_t map) 14210{ 14211 if (map == VM_MAP_NULL) 14212 return; 14213 14214 lck_mtx_lock(&map->s_lock); 14215#if TASK_SWAPPER 14216 assert(map->res_count > 0); 14217 assert(map->ref_count >= map->res_count); 14218 map->res_count++; 14219#endif 14220 map->ref_count++; 14221 lck_mtx_unlock(&map->s_lock); 14222} 14223 14224/* 14225 * vm_map_deallocate: 14226 * 14227 * Removes a reference from the specified map, 14228 * destroying it if no references remain. 14229 * The map should not be locked. 14230 */ 14231void 14232vm_map_deallocate( 14233 register vm_map_t map) 14234{ 14235 unsigned int ref; 14236 14237 if (map == VM_MAP_NULL) 14238 return; 14239 14240 lck_mtx_lock(&map->s_lock); 14241 ref = --map->ref_count; 14242 if (ref > 0) { 14243 vm_map_res_deallocate(map); 14244 lck_mtx_unlock(&map->s_lock); 14245 return; 14246 } 14247 assert(map->ref_count == 0); 14248 lck_mtx_unlock(&map->s_lock); 14249 14250#if TASK_SWAPPER 14251 /* 14252 * The map residence count isn't decremented here because 14253 * the vm_map_delete below will traverse the entire map, 14254 * deleting entries, and the residence counts on objects 14255 * and sharing maps will go away then. 14256 */ 14257#endif 14258 14259 vm_map_destroy(map, VM_MAP_NO_FLAGS); 14260} 14261 14262 14263void 14264vm_map_disable_NX(vm_map_t map) 14265{ 14266 if (map == NULL) 14267 return; 14268 if (map->pmap == NULL) 14269 return; 14270 14271 pmap_disable_NX(map->pmap); 14272} 14273 14274void 14275vm_map_disallow_data_exec(vm_map_t map) 14276{ 14277 if (map == NULL) 14278 return; 14279 14280 map->map_disallow_data_exec = TRUE; 14281} 14282 14283/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS) 14284 * more descriptive. 14285 */ 14286void 14287vm_map_set_32bit(vm_map_t map) 14288{ 14289 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS; 14290} 14291 14292 14293void 14294vm_map_set_64bit(vm_map_t map) 14295{ 14296 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS; 14297} 14298 14299vm_map_offset_t 14300vm_compute_max_offset(unsigned is64) 14301{ 14302 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS); 14303} 14304 14305uint64_t 14306vm_map_get_max_aslr_slide_pages(vm_map_t map) 14307{ 14308 return (1 << (vm_map_is_64bit(map) ? 16 : 8)); 14309} 14310 14311boolean_t 14312vm_map_is_64bit( 14313 vm_map_t map) 14314{ 14315 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS); 14316} 14317 14318boolean_t 14319vm_map_has_hard_pagezero( 14320 vm_map_t map, 14321 vm_map_offset_t pagezero_size) 14322{ 14323 /* 14324 * XXX FBDP 14325 * We should lock the VM map (for read) here but we can get away 14326 * with it for now because there can't really be any race condition: 14327 * the VM map's min_offset is changed only when the VM map is created 14328 * and when the zero page is established (when the binary gets loaded), 14329 * and this routine gets called only when the task terminates and the 14330 * VM map is being torn down, and when a new map is created via 14331 * load_machfile()/execve(). 14332 */ 14333 return (map->min_offset >= pagezero_size); 14334} 14335 14336/* 14337 * Raise a VM map's maximun offset. 14338 */ 14339kern_return_t 14340vm_map_raise_max_offset( 14341 vm_map_t map, 14342 vm_map_offset_t new_max_offset) 14343{ 14344 kern_return_t ret; 14345 14346 vm_map_lock(map); 14347 ret = KERN_INVALID_ADDRESS; 14348 14349 if (new_max_offset >= map->max_offset) { 14350 if (!vm_map_is_64bit(map)) { 14351 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) { 14352 map->max_offset = new_max_offset; 14353 ret = KERN_SUCCESS; 14354 } 14355 } else { 14356 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) { 14357 map->max_offset = new_max_offset; 14358 ret = KERN_SUCCESS; 14359 } 14360 } 14361 } 14362 14363 vm_map_unlock(map); 14364 return ret; 14365} 14366 14367 14368/* 14369 * Raise a VM map's minimum offset. 14370 * To strictly enforce "page zero" reservation. 14371 */ 14372kern_return_t 14373vm_map_raise_min_offset( 14374 vm_map_t map, 14375 vm_map_offset_t new_min_offset) 14376{ 14377 vm_map_entry_t first_entry; 14378 14379 new_min_offset = vm_map_round_page(new_min_offset, 14380 VM_MAP_PAGE_MASK(map)); 14381 14382 vm_map_lock(map); 14383 14384 if (new_min_offset < map->min_offset) { 14385 /* 14386 * Can't move min_offset backwards, as that would expose 14387 * a part of the address space that was previously, and for 14388 * possibly good reasons, inaccessible. 14389 */ 14390 vm_map_unlock(map); 14391 return KERN_INVALID_ADDRESS; 14392 } 14393 14394 first_entry = vm_map_first_entry(map); 14395 if (first_entry != vm_map_to_entry(map) && 14396 first_entry->vme_start < new_min_offset) { 14397 /* 14398 * Some memory was already allocated below the new 14399 * minimun offset. It's too late to change it now... 14400 */ 14401 vm_map_unlock(map); 14402 return KERN_NO_SPACE; 14403 } 14404 14405 map->min_offset = new_min_offset; 14406 14407 vm_map_unlock(map); 14408 14409 return KERN_SUCCESS; 14410} 14411 14412/* 14413 * Set the limit on the maximum amount of user wired memory allowed for this map. 14414 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of 14415 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we 14416 * don't have to reach over to the BSD data structures. 14417 */ 14418 14419void 14420vm_map_set_user_wire_limit(vm_map_t map, 14421 vm_size_t limit) 14422{ 14423 map->user_wire_limit = limit; 14424} 14425 14426 14427void vm_map_switch_protect(vm_map_t map, 14428 boolean_t val) 14429{ 14430 vm_map_lock(map); 14431 map->switch_protect=val; 14432 vm_map_unlock(map); 14433} 14434 14435/* 14436 * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately. 14437 * phys_footprint is a composite limit consisting of iokit + physmem, so we need to 14438 * bump both counters. 14439 */ 14440void 14441vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes) 14442{ 14443 pmap_t pmap = vm_map_pmap(map); 14444 14445 ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes); 14446 ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes); 14447} 14448 14449void 14450vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes) 14451{ 14452 pmap_t pmap = vm_map_pmap(map); 14453 14454 ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes); 14455 ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes); 14456} 14457 14458/* Add (generate) code signature for memory range */ 14459#if CONFIG_DYNAMIC_CODE_SIGNING 14460kern_return_t vm_map_sign(vm_map_t map, 14461 vm_map_offset_t start, 14462 vm_map_offset_t end) 14463{ 14464 vm_map_entry_t entry; 14465 vm_page_t m; 14466 vm_object_t object; 14467 14468 /* 14469 * Vet all the input parameters and current type and state of the 14470 * underlaying object. Return with an error if anything is amiss. 14471 */ 14472 if (map == VM_MAP_NULL) 14473 return(KERN_INVALID_ARGUMENT); 14474 14475 vm_map_lock_read(map); 14476 14477 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) { 14478 /* 14479 * Must pass a valid non-submap address. 14480 */ 14481 vm_map_unlock_read(map); 14482 return(KERN_INVALID_ADDRESS); 14483 } 14484 14485 if((entry->vme_start > start) || (entry->vme_end < end)) { 14486 /* 14487 * Map entry doesn't cover the requested range. Not handling 14488 * this situation currently. 14489 */ 14490 vm_map_unlock_read(map); 14491 return(KERN_INVALID_ARGUMENT); 14492 } 14493 14494 object = entry->object.vm_object; 14495 if (object == VM_OBJECT_NULL) { 14496 /* 14497 * Object must already be present or we can't sign. 14498 */ 14499 vm_map_unlock_read(map); 14500 return KERN_INVALID_ARGUMENT; 14501 } 14502 14503 vm_object_lock(object); 14504 vm_map_unlock_read(map); 14505 14506 while(start < end) { 14507 uint32_t refmod; 14508 14509 m = vm_page_lookup(object, start - entry->vme_start + entry->offset ); 14510 if (m==VM_PAGE_NULL) { 14511 /* shoud we try to fault a page here? we can probably 14512 * demand it exists and is locked for this request */ 14513 vm_object_unlock(object); 14514 return KERN_FAILURE; 14515 } 14516 /* deal with special page status */ 14517 if (m->busy || 14518 (m->unusual && (m->error || m->restart || m->private || m->absent))) { 14519 vm_object_unlock(object); 14520 return KERN_FAILURE; 14521 } 14522 14523 /* Page is OK... now "validate" it */ 14524 /* This is the place where we'll call out to create a code 14525 * directory, later */ 14526 m->cs_validated = TRUE; 14527 14528 /* The page is now "clean" for codesigning purposes. That means 14529 * we don't consider it as modified (wpmapped) anymore. But 14530 * we'll disconnect the page so we note any future modification 14531 * attempts. */ 14532 m->wpmapped = FALSE; 14533 refmod = pmap_disconnect(m->phys_page); 14534 14535 /* Pull the dirty status from the pmap, since we cleared the 14536 * wpmapped bit */ 14537 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) { 14538 SET_PAGE_DIRTY(m, FALSE); 14539 } 14540 14541 /* On to the next page */ 14542 start += PAGE_SIZE; 14543 } 14544 vm_object_unlock(object); 14545 14546 return KERN_SUCCESS; 14547} 14548#endif 14549 14550kern_return_t vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed) 14551{ 14552 vm_map_entry_t entry = VM_MAP_ENTRY_NULL; 14553 vm_map_entry_t next_entry; 14554 kern_return_t kr = KERN_SUCCESS; 14555 vm_map_t zap_map; 14556 14557 vm_map_lock(map); 14558 14559 /* 14560 * We use a "zap_map" to avoid having to unlock 14561 * the "map" in vm_map_delete(). 14562 */ 14563 zap_map = vm_map_create(PMAP_NULL, 14564 map->min_offset, 14565 map->max_offset, 14566 map->hdr.entries_pageable); 14567 14568 if (zap_map == VM_MAP_NULL) { 14569 return KERN_RESOURCE_SHORTAGE; 14570 } 14571 14572 vm_map_set_page_shift(zap_map, 14573 VM_MAP_PAGE_SHIFT(map)); 14574 14575 for (entry = vm_map_first_entry(map); 14576 entry != vm_map_to_entry(map); 14577 entry = next_entry) { 14578 next_entry = entry->vme_next; 14579 14580 if (entry->object.vm_object && !entry->is_sub_map && (entry->object.vm_object->internal == TRUE) 14581 && (entry->object.vm_object->ref_count == 1)) { 14582 14583 *reclaimed_resident += entry->object.vm_object->resident_page_count; 14584 *reclaimed_compressed += vm_compressor_pager_get_count(entry->object.vm_object->pager); 14585 14586 (void)vm_map_delete(map, 14587 entry->vme_start, 14588 entry->vme_end, 14589 VM_MAP_REMOVE_SAVE_ENTRIES, 14590 zap_map); 14591 } 14592 } 14593 14594 vm_map_unlock(map); 14595 14596 /* 14597 * Get rid of the "zap_maps" and all the map entries that 14598 * they may still contain. 14599 */ 14600 if (zap_map != VM_MAP_NULL) { 14601 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); 14602 zap_map = VM_MAP_NULL; 14603 } 14604 14605 return kr; 14606} 14607 14608#if CONFIG_FREEZE 14609 14610kern_return_t vm_map_freeze_walk( 14611 vm_map_t map, 14612 unsigned int *purgeable_count, 14613 unsigned int *wired_count, 14614 unsigned int *clean_count, 14615 unsigned int *dirty_count, 14616 unsigned int dirty_budget, 14617 boolean_t *has_shared) 14618{ 14619 vm_map_entry_t entry; 14620 14621 vm_map_lock_read(map); 14622 14623 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; 14624 *has_shared = FALSE; 14625 14626 for (entry = vm_map_first_entry(map); 14627 entry != vm_map_to_entry(map); 14628 entry = entry->vme_next) { 14629 unsigned int purgeable, clean, dirty, wired; 14630 boolean_t shared; 14631 14632 if ((entry->object.vm_object == 0) || 14633 (entry->is_sub_map) || 14634 (entry->object.vm_object->phys_contiguous)) { 14635 continue; 14636 } 14637 14638 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL); 14639 14640 *purgeable_count += purgeable; 14641 *wired_count += wired; 14642 *clean_count += clean; 14643 *dirty_count += dirty; 14644 14645 if (shared) { 14646 *has_shared = TRUE; 14647 } 14648 14649 /* Adjust pageout budget and finish up if reached */ 14650 if (dirty_budget) { 14651 dirty_budget -= dirty; 14652 if (dirty_budget == 0) { 14653 break; 14654 } 14655 } 14656 } 14657 14658 vm_map_unlock_read(map); 14659 14660 return KERN_SUCCESS; 14661} 14662 14663kern_return_t vm_map_freeze( 14664 vm_map_t map, 14665 unsigned int *purgeable_count, 14666 unsigned int *wired_count, 14667 unsigned int *clean_count, 14668 unsigned int *dirty_count, 14669 unsigned int dirty_budget, 14670 boolean_t *has_shared) 14671{ 14672 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL; 14673 kern_return_t kr = KERN_SUCCESS; 14674 boolean_t default_freezer_active = TRUE; 14675 14676 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; 14677 *has_shared = FALSE; 14678 14679 /* 14680 * We need the exclusive lock here so that we can 14681 * block any page faults or lookups while we are 14682 * in the middle of freezing this vm map. 14683 */ 14684 vm_map_lock(map); 14685 14686 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 14687 default_freezer_active = FALSE; 14688 } 14689 14690 if (default_freezer_active) { 14691 if (map->default_freezer_handle == NULL) { 14692 map->default_freezer_handle = default_freezer_handle_allocate(); 14693 } 14694 14695 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) { 14696 /* 14697 * Can happen if default_freezer_handle passed in is NULL 14698 * Or, a table has already been allocated and associated 14699 * with this handle, i.e. the map is already frozen. 14700 */ 14701 goto done; 14702 } 14703 } 14704 14705 for (entry2 = vm_map_first_entry(map); 14706 entry2 != vm_map_to_entry(map); 14707 entry2 = entry2->vme_next) { 14708 14709 vm_object_t src_object = entry2->object.vm_object; 14710 14711 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) { 14712 /* If eligible, scan the entry, moving eligible pages over to our parent object */ 14713 if (default_freezer_active) { 14714 unsigned int purgeable, clean, dirty, wired; 14715 boolean_t shared; 14716 14717 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, 14718 src_object, map->default_freezer_handle); 14719 14720 *purgeable_count += purgeable; 14721 *wired_count += wired; 14722 *clean_count += clean; 14723 *dirty_count += dirty; 14724 14725 /* Adjust pageout budget and finish up if reached */ 14726 if (dirty_budget) { 14727 dirty_budget -= dirty; 14728 if (dirty_budget == 0) { 14729 break; 14730 } 14731 } 14732 14733 if (shared) { 14734 *has_shared = TRUE; 14735 } 14736 } else { 14737 /* 14738 * To the compressor. 14739 */ 14740 if (entry2->object.vm_object->internal == TRUE) { 14741 vm_object_pageout(entry2->object.vm_object); 14742 } 14743 } 14744 } 14745 } 14746 14747 if (default_freezer_active) { 14748 /* Finally, throw out the pages to swap */ 14749 default_freezer_pageout(map->default_freezer_handle); 14750 } 14751 14752done: 14753 vm_map_unlock(map); 14754 14755 return kr; 14756} 14757 14758kern_return_t 14759vm_map_thaw( 14760 vm_map_t map) 14761{ 14762 kern_return_t kr = KERN_SUCCESS; 14763 14764 if (COMPRESSED_PAGER_IS_ACTIVE || DEFAULT_FREEZER_COMPRESSED_PAGER_IS_ACTIVE) { 14765 /* 14766 * We will on-demand thaw in the presence of the compressed pager. 14767 */ 14768 return kr; 14769 } 14770 14771 vm_map_lock(map); 14772 14773 if (map->default_freezer_handle == NULL) { 14774 /* 14775 * This map is not in a frozen state. 14776 */ 14777 kr = KERN_FAILURE; 14778 goto out; 14779 } 14780 14781 kr = default_freezer_unpack(map->default_freezer_handle); 14782out: 14783 vm_map_unlock(map); 14784 14785 return kr; 14786} 14787#endif 14788 14789/* 14790 * vm_map_entry_should_cow_for_true_share: 14791 * 14792 * Determines if the map entry should be clipped and setup for copy-on-write 14793 * to avoid applying "true_share" to a large VM object when only a subset is 14794 * targeted. 14795 * 14796 * For now, we target only the map entries created for the Objective C 14797 * Garbage Collector, which initially have the following properties: 14798 * - alias == VM_MEMORY_MALLOC 14799 * - wired_count == 0 14800 * - !needs_copy 14801 * and a VM object with: 14802 * - internal 14803 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC 14804 * - !true_share 14805 * - vo_size == ANON_CHUNK_SIZE 14806 */ 14807boolean_t 14808vm_map_entry_should_cow_for_true_share( 14809 vm_map_entry_t entry) 14810{ 14811 vm_object_t object; 14812 14813 if (entry->is_sub_map) { 14814 /* entry does not point at a VM object */ 14815 return FALSE; 14816 } 14817 14818 if (entry->needs_copy) { 14819 /* already set for copy_on_write: done! */ 14820 return FALSE; 14821 } 14822 14823 if (entry->alias != VM_MEMORY_MALLOC && 14824 entry->alias != VM_MEMORY_MALLOC_SMALL) { 14825 /* not a malloc heap or Obj-C Garbage Collector heap */ 14826 return FALSE; 14827 } 14828 14829 if (entry->wired_count) { 14830 /* wired: can't change the map entry... */ 14831 vm_counters.should_cow_but_wired++; 14832 return FALSE; 14833 } 14834 14835 object = entry->object.vm_object; 14836 14837 if (object == VM_OBJECT_NULL) { 14838 /* no object yet... */ 14839 return FALSE; 14840 } 14841 14842 if (!object->internal) { 14843 /* not an internal object */ 14844 return FALSE; 14845 } 14846 14847 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { 14848 /* not the default copy strategy */ 14849 return FALSE; 14850 } 14851 14852 if (object->true_share) { 14853 /* already true_share: too late to avoid it */ 14854 return FALSE; 14855 } 14856 14857 if (entry->alias == VM_MEMORY_MALLOC && 14858 object->vo_size != ANON_CHUNK_SIZE) { 14859 /* ... not an object created for the ObjC Garbage Collector */ 14860 return FALSE; 14861 } 14862 14863 if (entry->alias == VM_MEMORY_MALLOC_SMALL && 14864 object->vo_size != 2048 * 4096) { 14865 /* ... not a "MALLOC_SMALL" heap */ 14866 return FALSE; 14867 } 14868 14869 /* 14870 * All the criteria match: we have a large object being targeted for "true_share". 14871 * To limit the adverse side-effects linked with "true_share", tell the caller to 14872 * try and avoid setting up the entire object for "true_share" by clipping the 14873 * targeted range and setting it up for copy-on-write. 14874 */ 14875 return TRUE; 14876} 14877 14878vm_map_offset_t 14879vm_map_round_page_mask( 14880 vm_map_offset_t offset, 14881 vm_map_offset_t mask) 14882{ 14883 return VM_MAP_ROUND_PAGE(offset, mask); 14884} 14885 14886vm_map_offset_t 14887vm_map_trunc_page_mask( 14888 vm_map_offset_t offset, 14889 vm_map_offset_t mask) 14890{ 14891 return VM_MAP_TRUNC_PAGE(offset, mask); 14892} 14893 14894int 14895vm_map_page_shift( 14896 vm_map_t map) 14897{ 14898 return VM_MAP_PAGE_SHIFT(map); 14899} 14900 14901int 14902vm_map_page_size( 14903 vm_map_t map) 14904{ 14905 return VM_MAP_PAGE_SIZE(map); 14906} 14907 14908int 14909vm_map_page_mask( 14910 vm_map_t map) 14911{ 14912 return VM_MAP_PAGE_MASK(map); 14913} 14914 14915kern_return_t 14916vm_map_set_page_shift( 14917 vm_map_t map, 14918 int pageshift) 14919{ 14920 if (map->hdr.nentries != 0) { 14921 /* too late to change page size */ 14922 return KERN_FAILURE; 14923 } 14924 14925 map->hdr.page_shift = pageshift; 14926 14927 return KERN_SUCCESS; 14928} 14929 14930int 14931vm_map_purge( 14932 vm_map_t map) 14933{ 14934 int num_object_purged; 14935 vm_map_entry_t entry; 14936 vm_map_offset_t next_address; 14937 vm_object_t object; 14938 int state; 14939 kern_return_t kr; 14940 14941 num_object_purged = 0; 14942 14943 vm_map_lock_read(map); 14944 entry = vm_map_first_entry(map); 14945 while (entry != vm_map_to_entry(map)) { 14946 if (entry->is_sub_map) { 14947 goto next; 14948 } 14949 if (! (entry->protection & VM_PROT_WRITE)) { 14950 goto next; 14951 } 14952 object = entry->object.vm_object; 14953 if (object == VM_OBJECT_NULL) { 14954 goto next; 14955 } 14956 if (object->purgable != VM_PURGABLE_VOLATILE) { 14957 goto next; 14958 } 14959 14960 vm_object_lock(object); 14961#if 00 14962 if (entry->offset != 0 || 14963 (entry->vme_end - entry->vme_start) != object->vo_size) { 14964 vm_object_unlock(object); 14965 goto next; 14966 } 14967#endif 14968 next_address = entry->vme_end; 14969 vm_map_unlock_read(map); 14970 state = VM_PURGABLE_EMPTY; 14971 kr = vm_object_purgable_control(object, 14972 VM_PURGABLE_SET_STATE, 14973 &state); 14974 if (kr == KERN_SUCCESS) { 14975 num_object_purged++; 14976 } 14977 vm_object_unlock(object); 14978 14979 vm_map_lock_read(map); 14980 if (vm_map_lookup_entry(map, next_address, &entry)) { 14981 continue; 14982 } 14983 next: 14984 entry = entry->vme_next; 14985 } 14986 vm_map_unlock_read(map); 14987 14988 return num_object_purged; 14989} 14990 14991kern_return_t 14992vm_map_query_volatile( 14993 vm_map_t map, 14994 mach_vm_size_t *volatile_virtual_size_p, 14995 mach_vm_size_t *volatile_resident_size_p, 14996 mach_vm_size_t *volatile_pmap_size_p) 14997{ 14998 mach_vm_size_t volatile_virtual_size; 14999 mach_vm_size_t volatile_resident_count; 15000 mach_vm_size_t volatile_pmap_count; 15001 mach_vm_size_t resident_count; 15002 vm_map_entry_t entry; 15003 vm_object_t object; 15004 15005 /* map should be locked by caller */ 15006 15007 volatile_virtual_size = 0; 15008 volatile_resident_count = 0; 15009 volatile_pmap_count = 0; 15010 15011 for (entry = vm_map_first_entry(map); 15012 entry != vm_map_to_entry(map); 15013 entry = entry->vme_next) { 15014 if (entry->is_sub_map) { 15015 continue; 15016 } 15017 if (! (entry->protection & VM_PROT_WRITE)) { 15018 continue; 15019 } 15020 object = entry->object.vm_object; 15021 if (object == VM_OBJECT_NULL) { 15022 continue; 15023 } 15024 if (object->purgable != VM_PURGABLE_VOLATILE) { 15025 continue; 15026 } 15027 if (entry->offset != 0) { 15028 /* 15029 * If the map entry has been split and the object now 15030 * appears several times in the VM map, we don't want 15031 * to count the object's resident_page_count more than 15032 * once. We count it only for the first one, starting 15033 * at offset 0 and ignore the other VM map entries. 15034 */ 15035 continue; 15036 } 15037 resident_count = object->resident_page_count; 15038 if ((entry->offset / PAGE_SIZE) >= resident_count) { 15039 resident_count = 0; 15040 } else { 15041 resident_count -= (entry->offset / PAGE_SIZE); 15042 } 15043 15044 volatile_virtual_size += entry->vme_end - entry->vme_start; 15045 volatile_resident_count += resident_count; 15046 volatile_pmap_count += pmap_query_resident(map->pmap, 15047 entry->vme_start, 15048 entry->vme_end); 15049 } 15050 15051 /* map is still locked on return */ 15052 15053 *volatile_virtual_size_p = volatile_virtual_size; 15054 *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE; 15055 *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE; 15056 15057 return KERN_SUCCESS; 15058} 15059 15060#if VM_SCAN_FOR_SHADOW_CHAIN 15061int vm_map_shadow_max(vm_map_t map); 15062int vm_map_shadow_max( 15063 vm_map_t map) 15064{ 15065 int shadows, shadows_max; 15066 vm_map_entry_t entry; 15067 vm_object_t object, next_object; 15068 15069 if (map == NULL) 15070 return 0; 15071 15072 shadows_max = 0; 15073 15074 vm_map_lock_read(map); 15075 15076 for (entry = vm_map_first_entry(map); 15077 entry != vm_map_to_entry(map); 15078 entry = entry->vme_next) { 15079 if (entry->is_sub_map) { 15080 continue; 15081 } 15082 object = entry->object.vm_object; 15083 if (object == NULL) { 15084 continue; 15085 } 15086 vm_object_lock_shared(object); 15087 for (shadows = 0; 15088 object->shadow != NULL; 15089 shadows++, object = next_object) { 15090 next_object = object->shadow; 15091 vm_object_lock_shared(next_object); 15092 vm_object_unlock(object); 15093 } 15094 vm_object_unlock(object); 15095 if (shadows > shadows_max) { 15096 shadows_max = shadows; 15097 } 15098 } 15099 15100 vm_map_unlock_read(map); 15101 15102 return shadows_max; 15103} 15104#endif /* VM_SCAN_FOR_SHADOW_CHAIN */ 15105