1/* 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_map.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * Date: 1985 62 * 63 * Virtual memory mapping module. 64 */ 65 66#include <task_swapper.h> 67#include <mach_assert.h> 68#include <libkern/OSAtomic.h> 69 70#include <mach/kern_return.h> 71#include <mach/port.h> 72#include <mach/vm_attributes.h> 73#include <mach/vm_param.h> 74#include <mach/vm_behavior.h> 75#include <mach/vm_statistics.h> 76#include <mach/memory_object.h> 77#include <mach/mach_vm.h> 78#include <machine/cpu_capabilities.h> 79#include <mach/sdt.h> 80 81#include <kern/assert.h> 82#include <kern/counters.h> 83#include <kern/kalloc.h> 84#include <kern/zalloc.h> 85 86#include <vm/cpm.h> 87#include <vm/vm_init.h> 88#include <vm/vm_fault.h> 89#include <vm/vm_map.h> 90#include <vm/vm_object.h> 91#include <vm/vm_page.h> 92#include <vm/vm_pageout.h> 93#include <vm/vm_kern.h> 94#include <ipc/ipc_port.h> 95#include <kern/sched_prim.h> 96#include <kern/misc_protos.h> 97#include <kern/xpr.h> 98 99#include <mach/vm_map_server.h> 100#include <mach/mach_host_server.h> 101#include <vm/vm_protos.h> 102#include <vm/vm_purgeable_internal.h> 103 104#include <vm/vm_protos.h> 105#include <vm/vm_shared_region.h> 106#include <vm/vm_map_store.h> 107 108extern u_int32_t random(void); /* from <libkern/libkern.h> */ 109/* Internal prototypes 110 */ 111 112static void vm_map_simplify_range( 113 vm_map_t map, 114 vm_map_offset_t start, 115 vm_map_offset_t end); /* forward */ 116 117static boolean_t vm_map_range_check( 118 vm_map_t map, 119 vm_map_offset_t start, 120 vm_map_offset_t end, 121 vm_map_entry_t *entry); 122 123static vm_map_entry_t _vm_map_entry_create( 124 struct vm_map_header *map_header, boolean_t map_locked); 125 126static void _vm_map_entry_dispose( 127 struct vm_map_header *map_header, 128 vm_map_entry_t entry); 129 130static void vm_map_pmap_enter( 131 vm_map_t map, 132 vm_map_offset_t addr, 133 vm_map_offset_t end_addr, 134 vm_object_t object, 135 vm_object_offset_t offset, 136 vm_prot_t protection); 137 138static void _vm_map_clip_end( 139 struct vm_map_header *map_header, 140 vm_map_entry_t entry, 141 vm_map_offset_t end); 142 143static void _vm_map_clip_start( 144 struct vm_map_header *map_header, 145 vm_map_entry_t entry, 146 vm_map_offset_t start); 147 148static void vm_map_entry_delete( 149 vm_map_t map, 150 vm_map_entry_t entry); 151 152static kern_return_t vm_map_delete( 153 vm_map_t map, 154 vm_map_offset_t start, 155 vm_map_offset_t end, 156 int flags, 157 vm_map_t zap_map); 158 159static kern_return_t vm_map_copy_overwrite_unaligned( 160 vm_map_t dst_map, 161 vm_map_entry_t entry, 162 vm_map_copy_t copy, 163 vm_map_address_t start); 164 165static kern_return_t vm_map_copy_overwrite_aligned( 166 vm_map_t dst_map, 167 vm_map_entry_t tmp_entry, 168 vm_map_copy_t copy, 169 vm_map_offset_t start, 170 pmap_t pmap); 171 172static kern_return_t vm_map_copyin_kernel_buffer( 173 vm_map_t src_map, 174 vm_map_address_t src_addr, 175 vm_map_size_t len, 176 boolean_t src_destroy, 177 vm_map_copy_t *copy_result); /* OUT */ 178 179static kern_return_t vm_map_copyout_kernel_buffer( 180 vm_map_t map, 181 vm_map_address_t *addr, /* IN/OUT */ 182 vm_map_copy_t copy, 183 boolean_t overwrite); 184 185static void vm_map_fork_share( 186 vm_map_t old_map, 187 vm_map_entry_t old_entry, 188 vm_map_t new_map); 189 190static boolean_t vm_map_fork_copy( 191 vm_map_t old_map, 192 vm_map_entry_t *old_entry_p, 193 vm_map_t new_map); 194 195void vm_map_region_top_walk( 196 vm_map_entry_t entry, 197 vm_region_top_info_t top); 198 199void vm_map_region_walk( 200 vm_map_t map, 201 vm_map_offset_t va, 202 vm_map_entry_t entry, 203 vm_object_offset_t offset, 204 vm_object_size_t range, 205 vm_region_extended_info_t extended, 206 boolean_t look_for_pages); 207 208static kern_return_t vm_map_wire_nested( 209 vm_map_t map, 210 vm_map_offset_t start, 211 vm_map_offset_t end, 212 vm_prot_t access_type, 213 boolean_t user_wire, 214 pmap_t map_pmap, 215 vm_map_offset_t pmap_addr); 216 217static kern_return_t vm_map_unwire_nested( 218 vm_map_t map, 219 vm_map_offset_t start, 220 vm_map_offset_t end, 221 boolean_t user_wire, 222 pmap_t map_pmap, 223 vm_map_offset_t pmap_addr); 224 225static kern_return_t vm_map_overwrite_submap_recurse( 226 vm_map_t dst_map, 227 vm_map_offset_t dst_addr, 228 vm_map_size_t dst_size); 229 230static kern_return_t vm_map_copy_overwrite_nested( 231 vm_map_t dst_map, 232 vm_map_offset_t dst_addr, 233 vm_map_copy_t copy, 234 boolean_t interruptible, 235 pmap_t pmap, 236 boolean_t discard_on_success); 237 238static kern_return_t vm_map_remap_extract( 239 vm_map_t map, 240 vm_map_offset_t addr, 241 vm_map_size_t size, 242 boolean_t copy, 243 struct vm_map_header *map_header, 244 vm_prot_t *cur_protection, 245 vm_prot_t *max_protection, 246 vm_inherit_t inheritance, 247 boolean_t pageable); 248 249static kern_return_t vm_map_remap_range_allocate( 250 vm_map_t map, 251 vm_map_address_t *address, 252 vm_map_size_t size, 253 vm_map_offset_t mask, 254 int flags, 255 vm_map_entry_t *map_entry); 256 257static void vm_map_region_look_for_page( 258 vm_map_t map, 259 vm_map_offset_t va, 260 vm_object_t object, 261 vm_object_offset_t offset, 262 int max_refcnt, 263 int depth, 264 vm_region_extended_info_t extended); 265 266static int vm_map_region_count_obj_refs( 267 vm_map_entry_t entry, 268 vm_object_t object); 269 270 271static kern_return_t vm_map_willneed( 272 vm_map_t map, 273 vm_map_offset_t start, 274 vm_map_offset_t end); 275 276static kern_return_t vm_map_reuse_pages( 277 vm_map_t map, 278 vm_map_offset_t start, 279 vm_map_offset_t end); 280 281static kern_return_t vm_map_reusable_pages( 282 vm_map_t map, 283 vm_map_offset_t start, 284 vm_map_offset_t end); 285 286static kern_return_t vm_map_can_reuse( 287 vm_map_t map, 288 vm_map_offset_t start, 289 vm_map_offset_t end); 290 291 292/* 293 * Macros to copy a vm_map_entry. We must be careful to correctly 294 * manage the wired page count. vm_map_entry_copy() creates a new 295 * map entry to the same memory - the wired count in the new entry 296 * must be set to zero. vm_map_entry_copy_full() creates a new 297 * entry that is identical to the old entry. This preserves the 298 * wire count; it's used for map splitting and zone changing in 299 * vm_map_copyout. 300 */ 301 302#define vm_map_entry_copy(NEW,OLD) \ 303MACRO_BEGIN \ 304boolean_t _vmec_reserved = (NEW)->from_reserved_zone; \ 305 *(NEW) = *(OLD); \ 306 (NEW)->is_shared = FALSE; \ 307 (NEW)->needs_wakeup = FALSE; \ 308 (NEW)->in_transition = FALSE; \ 309 (NEW)->wired_count = 0; \ 310 (NEW)->user_wired_count = 0; \ 311 (NEW)->permanent = FALSE; \ 312 (NEW)->used_for_jit = FALSE; \ 313 (NEW)->from_reserved_zone = _vmec_reserved; \ 314MACRO_END 315 316#define vm_map_entry_copy_full(NEW,OLD) \ 317MACRO_BEGIN \ 318boolean_t _vmecf_reserved = (NEW)->from_reserved_zone; \ 319(*(NEW) = *(OLD)); \ 320(NEW)->from_reserved_zone = _vmecf_reserved; \ 321MACRO_END 322 323/* 324 * Decide if we want to allow processes to execute from their data or stack areas. 325 * override_nx() returns true if we do. Data/stack execution can be enabled independently 326 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec 327 * or allow_stack_exec to enable data execution for that type of data area for that particular 328 * ABI (or both by or'ing the flags together). These are initialized in the architecture 329 * specific pmap files since the default behavior varies according to architecture. The 330 * main reason it varies is because of the need to provide binary compatibility with old 331 * applications that were written before these restrictions came into being. In the old 332 * days, an app could execute anything it could read, but this has slowly been tightened 333 * up over time. The default behavior is: 334 * 335 * 32-bit PPC apps may execute from both stack and data areas 336 * 32-bit Intel apps may exeucte from data areas but not stack 337 * 64-bit PPC/Intel apps may not execute from either data or stack 338 * 339 * An application on any architecture may override these defaults by explicitly 340 * adding PROT_EXEC permission to the page in question with the mprotect(2) 341 * system call. This code here just determines what happens when an app tries to 342 * execute from a page that lacks execute permission. 343 * 344 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the 345 * default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore, 346 * a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow 347 * execution from data areas for a particular binary even if the arch normally permits it. As 348 * a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit 349 * to support some complicated use cases, notably browsers with out-of-process plugins that 350 * are not all NX-safe. 351 */ 352 353extern int allow_data_exec, allow_stack_exec; 354 355int 356override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */ 357{ 358 int current_abi; 359 360 /* 361 * Determine if the app is running in 32 or 64 bit mode. 362 */ 363 364 if (vm_map_is_64bit(map)) 365 current_abi = VM_ABI_64; 366 else 367 current_abi = VM_ABI_32; 368 369 /* 370 * Determine if we should allow the execution based on whether it's a 371 * stack or data area and the current architecture. 372 */ 373 374 if (user_tag == VM_MEMORY_STACK) 375 return allow_stack_exec & current_abi; 376 377 return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE); 378} 379 380 381/* 382 * Virtual memory maps provide for the mapping, protection, 383 * and sharing of virtual memory objects. In addition, 384 * this module provides for an efficient virtual copy of 385 * memory from one map to another. 386 * 387 * Synchronization is required prior to most operations. 388 * 389 * Maps consist of an ordered doubly-linked list of simple 390 * entries; a single hint is used to speed up lookups. 391 * 392 * Sharing maps have been deleted from this version of Mach. 393 * All shared objects are now mapped directly into the respective 394 * maps. This requires a change in the copy on write strategy; 395 * the asymmetric (delayed) strategy is used for shared temporary 396 * objects instead of the symmetric (shadow) strategy. All maps 397 * are now "top level" maps (either task map, kernel map or submap 398 * of the kernel map). 399 * 400 * Since portions of maps are specified by start/end addreses, 401 * which may not align with existing map entries, all 402 * routines merely "clip" entries to these start/end values. 403 * [That is, an entry is split into two, bordering at a 404 * start or end value.] Note that these clippings may not 405 * always be necessary (as the two resulting entries are then 406 * not changed); however, the clipping is done for convenience. 407 * No attempt is currently made to "glue back together" two 408 * abutting entries. 409 * 410 * The symmetric (shadow) copy strategy implements virtual copy 411 * by copying VM object references from one map to 412 * another, and then marking both regions as copy-on-write. 413 * It is important to note that only one writeable reference 414 * to a VM object region exists in any map when this strategy 415 * is used -- this means that shadow object creation can be 416 * delayed until a write operation occurs. The symmetric (delayed) 417 * strategy allows multiple maps to have writeable references to 418 * the same region of a vm object, and hence cannot delay creating 419 * its copy objects. See vm_object_copy_quickly() in vm_object.c. 420 * Copying of permanent objects is completely different; see 421 * vm_object_copy_strategically() in vm_object.c. 422 */ 423 424static zone_t vm_map_zone; /* zone for vm_map structures */ 425static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */ 426static zone_t vm_map_entry_reserved_zone; /* zone with reserve for non-blocking 427 * allocations */ 428static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ 429 430 431/* 432 * Placeholder object for submap operations. This object is dropped 433 * into the range by a call to vm_map_find, and removed when 434 * vm_map_submap creates the submap. 435 */ 436 437vm_object_t vm_submap_object; 438 439static void *map_data; 440static vm_size_t map_data_size; 441static void *kentry_data; 442static vm_size_t kentry_data_size; 443 444#if CONFIG_EMBEDDED 445#define NO_COALESCE_LIMIT 0 446#else 447#define NO_COALESCE_LIMIT ((1024 * 128) - 1) 448#endif 449 450/* Skip acquiring locks if we're in the midst of a kernel core dump */ 451unsigned int not_in_kdp = 1; 452 453unsigned int vm_map_set_cache_attr_count = 0; 454 455kern_return_t 456vm_map_set_cache_attr( 457 vm_map_t map, 458 vm_map_offset_t va) 459{ 460 vm_map_entry_t map_entry; 461 vm_object_t object; 462 kern_return_t kr = KERN_SUCCESS; 463 464 vm_map_lock_read(map); 465 466 if (!vm_map_lookup_entry(map, va, &map_entry) || 467 map_entry->is_sub_map) { 468 /* 469 * that memory is not properly mapped 470 */ 471 kr = KERN_INVALID_ARGUMENT; 472 goto done; 473 } 474 object = map_entry->object.vm_object; 475 476 if (object == VM_OBJECT_NULL) { 477 /* 478 * there should be a VM object here at this point 479 */ 480 kr = KERN_INVALID_ARGUMENT; 481 goto done; 482 } 483 vm_object_lock(object); 484 object->set_cache_attr = TRUE; 485 vm_object_unlock(object); 486 487 vm_map_set_cache_attr_count++; 488done: 489 vm_map_unlock_read(map); 490 491 return kr; 492} 493 494 495#if CONFIG_CODE_DECRYPTION 496/* 497 * vm_map_apple_protected: 498 * This remaps the requested part of the object with an object backed by 499 * the decrypting pager. 500 * crypt_info contains entry points and session data for the crypt module. 501 * The crypt_info block will be copied by vm_map_apple_protected. The data structures 502 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called. 503 */ 504kern_return_t 505vm_map_apple_protected( 506 vm_map_t map, 507 vm_map_offset_t start, 508 vm_map_offset_t end, 509 struct pager_crypt_info *crypt_info) 510{ 511 boolean_t map_locked; 512 kern_return_t kr; 513 vm_map_entry_t map_entry; 514 memory_object_t protected_mem_obj; 515 vm_object_t protected_object; 516 vm_map_offset_t map_addr; 517 518 vm_map_lock_read(map); 519 map_locked = TRUE; 520 521 /* lookup the protected VM object */ 522 if (!vm_map_lookup_entry(map, 523 start, 524 &map_entry) || 525 map_entry->vme_end < end || 526 map_entry->is_sub_map) { 527 /* that memory is not properly mapped */ 528 kr = KERN_INVALID_ARGUMENT; 529 goto done; 530 } 531 protected_object = map_entry->object.vm_object; 532 if (protected_object == VM_OBJECT_NULL) { 533 /* there should be a VM object here at this point */ 534 kr = KERN_INVALID_ARGUMENT; 535 goto done; 536 } 537 538 /* make sure protected object stays alive while map is unlocked */ 539 vm_object_reference(protected_object); 540 541 vm_map_unlock_read(map); 542 map_locked = FALSE; 543 544 /* 545 * Lookup (and create if necessary) the protected memory object 546 * matching that VM object. 547 * If successful, this also grabs a reference on the memory object, 548 * to guarantee that it doesn't go away before we get a chance to map 549 * it. 550 */ 551 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info); 552 553 /* release extra ref on protected object */ 554 vm_object_deallocate(protected_object); 555 556 if (protected_mem_obj == NULL) { 557 kr = KERN_FAILURE; 558 goto done; 559 } 560 561 /* map this memory object in place of the current one */ 562 map_addr = start; 563 kr = vm_map_enter_mem_object(map, 564 &map_addr, 565 end - start, 566 (mach_vm_offset_t) 0, 567 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, 568 (ipc_port_t) protected_mem_obj, 569 (map_entry->offset + 570 (start - map_entry->vme_start)), 571 TRUE, 572 map_entry->protection, 573 map_entry->max_protection, 574 map_entry->inheritance); 575 assert(map_addr == start); 576 /* 577 * Release the reference obtained by apple_protect_pager_setup(). 578 * The mapping (if it succeeded) is now holding a reference on the 579 * memory object. 580 */ 581 memory_object_deallocate(protected_mem_obj); 582 583done: 584 if (map_locked) { 585 vm_map_unlock_read(map); 586 } 587 return kr; 588} 589#endif /* CONFIG_CODE_DECRYPTION */ 590 591 592lck_grp_t vm_map_lck_grp; 593lck_grp_attr_t vm_map_lck_grp_attr; 594lck_attr_t vm_map_lck_attr; 595 596 597/* 598 * vm_map_init: 599 * 600 * Initialize the vm_map module. Must be called before 601 * any other vm_map routines. 602 * 603 * Map and entry structures are allocated from zones -- we must 604 * initialize those zones. 605 * 606 * There are three zones of interest: 607 * 608 * vm_map_zone: used to allocate maps. 609 * vm_map_entry_zone: used to allocate map entries. 610 * vm_map_entry_reserved_zone: fallback zone for kernel map entries 611 * 612 * The kernel allocates map entries from a special zone that is initially 613 * "crammed" with memory. It would be difficult (perhaps impossible) for 614 * the kernel to allocate more memory to a entry zone when it became 615 * empty since the very act of allocating memory implies the creation 616 * of a new entry. 617 */ 618void 619vm_map_init( 620 void) 621{ 622 vm_size_t entry_zone_alloc_size; 623 const char *mez_name = "VM map entries"; 624 625 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024, 626 PAGE_SIZE, "maps"); 627 zone_change(vm_map_zone, Z_NOENCRYPT, TRUE); 628#if defined(__LP64__) 629 entry_zone_alloc_size = PAGE_SIZE * 5; 630#else 631 entry_zone_alloc_size = PAGE_SIZE * 6; 632#endif 633 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), 634 1024*1024, entry_zone_alloc_size, 635 mez_name); 636 zone_change(vm_map_entry_zone, Z_NOENCRYPT, TRUE); 637 zone_change(vm_map_entry_zone, Z_NOCALLOUT, TRUE); 638 zone_change(vm_map_entry_zone, Z_GZALLOC_EXEMPT, TRUE); 639 640 vm_map_entry_reserved_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), 641 kentry_data_size * 64, kentry_data_size, 642 "Reserved VM map entries"); 643 zone_change(vm_map_entry_reserved_zone, Z_NOENCRYPT, TRUE); 644 645 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy), 646 16*1024, PAGE_SIZE, "VM map copies"); 647 zone_change(vm_map_copy_zone, Z_NOENCRYPT, TRUE); 648 649 /* 650 * Cram the map and kentry zones with initial data. 651 * Set reserved_zone non-collectible to aid zone_gc(). 652 */ 653 zone_change(vm_map_zone, Z_COLLECT, FALSE); 654 655 zone_change(vm_map_entry_reserved_zone, Z_COLLECT, FALSE); 656 zone_change(vm_map_entry_reserved_zone, Z_EXPAND, FALSE); 657 zone_change(vm_map_entry_reserved_zone, Z_FOREIGN, TRUE); 658 zone_change(vm_map_entry_reserved_zone, Z_NOCALLOUT, TRUE); 659 zone_change(vm_map_entry_reserved_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ 660 zone_change(vm_map_copy_zone, Z_CALLERACCT, FALSE); /* don't charge caller */ 661 zone_change(vm_map_entry_reserved_zone, Z_GZALLOC_EXEMPT, TRUE); 662 663 zcram(vm_map_zone, (vm_offset_t)map_data, map_data_size); 664 zcram(vm_map_entry_reserved_zone, (vm_offset_t)kentry_data, kentry_data_size); 665 666 lck_grp_attr_setdefault(&vm_map_lck_grp_attr); 667 lck_grp_init(&vm_map_lck_grp, "vm_map", &vm_map_lck_grp_attr); 668 lck_attr_setdefault(&vm_map_lck_attr); 669 670#if CONFIG_FREEZE 671 default_freezer_init(); 672#endif /* CONFIG_FREEZE */ 673} 674 675void 676vm_map_steal_memory( 677 void) 678{ 679 uint32_t kentry_initial_pages; 680 681 map_data_size = round_page(10 * sizeof(struct _vm_map)); 682 map_data = pmap_steal_memory(map_data_size); 683 684 /* 685 * kentry_initial_pages corresponds to the number of kernel map entries 686 * required during bootstrap until the asynchronous replenishment 687 * scheme is activated and/or entries are available from the general 688 * map entry pool. 689 */ 690#if defined(__LP64__) 691 kentry_initial_pages = 10; 692#else 693 kentry_initial_pages = 6; 694#endif 695 696#if CONFIG_GZALLOC 697 /* If using the guard allocator, reserve more memory for the kernel 698 * reserved map entry pool. 699 */ 700 if (gzalloc_enabled()) 701 kentry_initial_pages *= 1024; 702#endif 703 704 kentry_data_size = kentry_initial_pages * PAGE_SIZE; 705 kentry_data = pmap_steal_memory(kentry_data_size); 706} 707 708void vm_kernel_reserved_entry_init(void) { 709 zone_prio_refill_configure(vm_map_entry_reserved_zone, (6*PAGE_SIZE)/sizeof(struct vm_map_entry)); 710} 711 712/* 713 * vm_map_create: 714 * 715 * Creates and returns a new empty VM map with 716 * the given physical map structure, and having 717 * the given lower and upper address bounds. 718 */ 719vm_map_t 720vm_map_create( 721 pmap_t pmap, 722 vm_map_offset_t min, 723 vm_map_offset_t max, 724 boolean_t pageable) 725{ 726 static int color_seed = 0; 727 register vm_map_t result; 728 729 result = (vm_map_t) zalloc(vm_map_zone); 730 if (result == VM_MAP_NULL) 731 panic("vm_map_create"); 732 733 vm_map_first_entry(result) = vm_map_to_entry(result); 734 vm_map_last_entry(result) = vm_map_to_entry(result); 735 result->hdr.nentries = 0; 736 result->hdr.entries_pageable = pageable; 737 738 vm_map_store_init( &(result->hdr) ); 739 740 result->size = 0; 741 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */ 742 result->user_wire_size = 0; 743 result->ref_count = 1; 744#if TASK_SWAPPER 745 result->res_count = 1; 746 result->sw_state = MAP_SW_IN; 747#endif /* TASK_SWAPPER */ 748 result->pmap = pmap; 749 result->min_offset = min; 750 result->max_offset = max; 751 result->wiring_required = FALSE; 752 result->no_zero_fill = FALSE; 753 result->mapped_in_other_pmaps = FALSE; 754 result->wait_for_space = FALSE; 755 result->switch_protect = FALSE; 756 result->disable_vmentry_reuse = FALSE; 757 result->map_disallow_data_exec = FALSE; 758 result->highest_entry_end = 0; 759 result->first_free = vm_map_to_entry(result); 760 result->hint = vm_map_to_entry(result); 761 result->color_rr = (color_seed++) & vm_color_mask; 762 result->jit_entry_exists = FALSE; 763#if CONFIG_FREEZE 764 result->default_freezer_handle = NULL; 765#endif 766 vm_map_lock_init(result); 767 lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr); 768 769 return(result); 770} 771 772/* 773 * vm_map_entry_create: [ internal use only ] 774 * 775 * Allocates a VM map entry for insertion in the 776 * given map (or map copy). No fields are filled. 777 */ 778#define vm_map_entry_create(map, map_locked) _vm_map_entry_create(&(map)->hdr, map_locked) 779 780#define vm_map_copy_entry_create(copy, map_locked) \ 781 _vm_map_entry_create(&(copy)->cpy_hdr, map_locked) 782unsigned reserved_zalloc_count, nonreserved_zalloc_count; 783 784static vm_map_entry_t 785_vm_map_entry_create( 786 struct vm_map_header *map_header, boolean_t __unused map_locked) 787{ 788 zone_t zone; 789 vm_map_entry_t entry; 790 791 zone = vm_map_entry_zone; 792 793 assert(map_header->entries_pageable ? !map_locked : TRUE); 794 795 if (map_header->entries_pageable) { 796 entry = (vm_map_entry_t) zalloc(zone); 797 } 798 else { 799 entry = (vm_map_entry_t) zalloc_canblock(zone, FALSE); 800 801 if (entry == VM_MAP_ENTRY_NULL) { 802 zone = vm_map_entry_reserved_zone; 803 entry = (vm_map_entry_t) zalloc(zone); 804 OSAddAtomic(1, &reserved_zalloc_count); 805 } else 806 OSAddAtomic(1, &nonreserved_zalloc_count); 807 } 808 809 if (entry == VM_MAP_ENTRY_NULL) 810 panic("vm_map_entry_create"); 811 entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone); 812 813 vm_map_store_update( (vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE); 814#if MAP_ENTRY_CREATION_DEBUG 815 fastbacktrace(&entry->vme_bt[0], (sizeof(entry->vme_bt)/sizeof(uintptr_t))); 816#endif 817 return(entry); 818} 819 820/* 821 * vm_map_entry_dispose: [ internal use only ] 822 * 823 * Inverse of vm_map_entry_create. 824 * 825 * write map lock held so no need to 826 * do anything special to insure correctness 827 * of the stores 828 */ 829#define vm_map_entry_dispose(map, entry) \ 830 _vm_map_entry_dispose(&(map)->hdr, (entry)) 831 832#define vm_map_copy_entry_dispose(map, entry) \ 833 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry)) 834 835static void 836_vm_map_entry_dispose( 837 register struct vm_map_header *map_header, 838 register vm_map_entry_t entry) 839{ 840 register zone_t zone; 841 842 if (map_header->entries_pageable || !(entry->from_reserved_zone)) 843 zone = vm_map_entry_zone; 844 else 845 zone = vm_map_entry_reserved_zone; 846 847 if (!map_header->entries_pageable) { 848 if (zone == vm_map_entry_zone) 849 OSAddAtomic(-1, &nonreserved_zalloc_count); 850 else 851 OSAddAtomic(-1, &reserved_zalloc_count); 852 } 853 854 zfree(zone, entry); 855} 856 857#if MACH_ASSERT 858static boolean_t first_free_check = FALSE; 859boolean_t 860first_free_is_valid( 861 vm_map_t map) 862{ 863 if (!first_free_check) 864 return TRUE; 865 866 return( first_free_is_valid_store( map )); 867} 868#endif /* MACH_ASSERT */ 869 870 871#define vm_map_copy_entry_link(copy, after_where, entry) \ 872 _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry)) 873 874#define vm_map_copy_entry_unlink(copy, entry) \ 875 _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry)) 876 877#if MACH_ASSERT && TASK_SWAPPER 878/* 879 * vm_map_res_reference: 880 * 881 * Adds another valid residence count to the given map. 882 * 883 * Map is locked so this function can be called from 884 * vm_map_swapin. 885 * 886 */ 887void vm_map_res_reference(register vm_map_t map) 888{ 889 /* assert map is locked */ 890 assert(map->res_count >= 0); 891 assert(map->ref_count >= map->res_count); 892 if (map->res_count == 0) { 893 lck_mtx_unlock(&map->s_lock); 894 vm_map_lock(map); 895 vm_map_swapin(map); 896 lck_mtx_lock(&map->s_lock); 897 ++map->res_count; 898 vm_map_unlock(map); 899 } else 900 ++map->res_count; 901} 902 903/* 904 * vm_map_reference_swap: 905 * 906 * Adds valid reference and residence counts to the given map. 907 * 908 * The map may not be in memory (i.e. zero residence count). 909 * 910 */ 911void vm_map_reference_swap(register vm_map_t map) 912{ 913 assert(map != VM_MAP_NULL); 914 lck_mtx_lock(&map->s_lock); 915 assert(map->res_count >= 0); 916 assert(map->ref_count >= map->res_count); 917 map->ref_count++; 918 vm_map_res_reference(map); 919 lck_mtx_unlock(&map->s_lock); 920} 921 922/* 923 * vm_map_res_deallocate: 924 * 925 * Decrement residence count on a map; possibly causing swapout. 926 * 927 * The map must be in memory (i.e. non-zero residence count). 928 * 929 * The map is locked, so this function is callable from vm_map_deallocate. 930 * 931 */ 932void vm_map_res_deallocate(register vm_map_t map) 933{ 934 assert(map->res_count > 0); 935 if (--map->res_count == 0) { 936 lck_mtx_unlock(&map->s_lock); 937 vm_map_lock(map); 938 vm_map_swapout(map); 939 vm_map_unlock(map); 940 lck_mtx_lock(&map->s_lock); 941 } 942 assert(map->ref_count >= map->res_count); 943} 944#endif /* MACH_ASSERT && TASK_SWAPPER */ 945 946/* 947 * vm_map_destroy: 948 * 949 * Actually destroy a map. 950 */ 951void 952vm_map_destroy( 953 vm_map_t map, 954 int flags) 955{ 956 vm_map_lock(map); 957 958 /* clean up regular map entries */ 959 (void) vm_map_delete(map, map->min_offset, map->max_offset, 960 flags, VM_MAP_NULL); 961 /* clean up leftover special mappings (commpage, etc...) */ 962 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL, 963 flags, VM_MAP_NULL); 964 965#if CONFIG_FREEZE 966 if (map->default_freezer_handle) { 967 default_freezer_handle_deallocate(map->default_freezer_handle); 968 map->default_freezer_handle = NULL; 969 } 970#endif 971 vm_map_unlock(map); 972 973 assert(map->hdr.nentries == 0); 974 975 if(map->pmap) 976 pmap_destroy(map->pmap); 977 978 zfree(vm_map_zone, map); 979} 980 981#if TASK_SWAPPER 982/* 983 * vm_map_swapin/vm_map_swapout 984 * 985 * Swap a map in and out, either referencing or releasing its resources. 986 * These functions are internal use only; however, they must be exported 987 * because they may be called from macros, which are exported. 988 * 989 * In the case of swapout, there could be races on the residence count, 990 * so if the residence count is up, we return, assuming that a 991 * vm_map_deallocate() call in the near future will bring us back. 992 * 993 * Locking: 994 * -- We use the map write lock for synchronization among races. 995 * -- The map write lock, and not the simple s_lock, protects the 996 * swap state of the map. 997 * -- If a map entry is a share map, then we hold both locks, in 998 * hierarchical order. 999 * 1000 * Synchronization Notes: 1001 * 1) If a vm_map_swapin() call happens while swapout in progress, it 1002 * will block on the map lock and proceed when swapout is through. 1003 * 2) A vm_map_reference() call at this time is illegal, and will 1004 * cause a panic. vm_map_reference() is only allowed on resident 1005 * maps, since it refuses to block. 1006 * 3) A vm_map_swapin() call during a swapin will block, and 1007 * proceeed when the first swapin is done, turning into a nop. 1008 * This is the reason the res_count is not incremented until 1009 * after the swapin is complete. 1010 * 4) There is a timing hole after the checks of the res_count, before 1011 * the map lock is taken, during which a swapin may get the lock 1012 * before a swapout about to happen. If this happens, the swapin 1013 * will detect the state and increment the reference count, causing 1014 * the swapout to be a nop, thereby delaying it until a later 1015 * vm_map_deallocate. If the swapout gets the lock first, then 1016 * the swapin will simply block until the swapout is done, and 1017 * then proceed. 1018 * 1019 * Because vm_map_swapin() is potentially an expensive operation, it 1020 * should be used with caution. 1021 * 1022 * Invariants: 1023 * 1) A map with a residence count of zero is either swapped, or 1024 * being swapped. 1025 * 2) A map with a non-zero residence count is either resident, 1026 * or being swapped in. 1027 */ 1028 1029int vm_map_swap_enable = 1; 1030 1031void vm_map_swapin (vm_map_t map) 1032{ 1033 register vm_map_entry_t entry; 1034 1035 if (!vm_map_swap_enable) /* debug */ 1036 return; 1037 1038 /* 1039 * Map is locked 1040 * First deal with various races. 1041 */ 1042 if (map->sw_state == MAP_SW_IN) 1043 /* 1044 * we raced with swapout and won. Returning will incr. 1045 * the res_count, turning the swapout into a nop. 1046 */ 1047 return; 1048 1049 /* 1050 * The residence count must be zero. If we raced with another 1051 * swapin, the state would have been IN; if we raced with a 1052 * swapout (after another competing swapin), we must have lost 1053 * the race to get here (see above comment), in which case 1054 * res_count is still 0. 1055 */ 1056 assert(map->res_count == 0); 1057 1058 /* 1059 * There are no intermediate states of a map going out or 1060 * coming in, since the map is locked during the transition. 1061 */ 1062 assert(map->sw_state == MAP_SW_OUT); 1063 1064 /* 1065 * We now operate upon each map entry. If the entry is a sub- 1066 * or share-map, we call vm_map_res_reference upon it. 1067 * If the entry is an object, we call vm_object_res_reference 1068 * (this may iterate through the shadow chain). 1069 * Note that we hold the map locked the entire time, 1070 * even if we get back here via a recursive call in 1071 * vm_map_res_reference. 1072 */ 1073 entry = vm_map_first_entry(map); 1074 1075 while (entry != vm_map_to_entry(map)) { 1076 if (entry->object.vm_object != VM_OBJECT_NULL) { 1077 if (entry->is_sub_map) { 1078 vm_map_t lmap = entry->object.sub_map; 1079 lck_mtx_lock(&lmap->s_lock); 1080 vm_map_res_reference(lmap); 1081 lck_mtx_unlock(&lmap->s_lock); 1082 } else { 1083 vm_object_t object = entry->object.vm_object; 1084 vm_object_lock(object); 1085 /* 1086 * This call may iterate through the 1087 * shadow chain. 1088 */ 1089 vm_object_res_reference(object); 1090 vm_object_unlock(object); 1091 } 1092 } 1093 entry = entry->vme_next; 1094 } 1095 assert(map->sw_state == MAP_SW_OUT); 1096 map->sw_state = MAP_SW_IN; 1097} 1098 1099void vm_map_swapout(vm_map_t map) 1100{ 1101 register vm_map_entry_t entry; 1102 1103 /* 1104 * Map is locked 1105 * First deal with various races. 1106 * If we raced with a swapin and lost, the residence count 1107 * will have been incremented to 1, and we simply return. 1108 */ 1109 lck_mtx_lock(&map->s_lock); 1110 if (map->res_count != 0) { 1111 lck_mtx_unlock(&map->s_lock); 1112 return; 1113 } 1114 lck_mtx_unlock(&map->s_lock); 1115 1116 /* 1117 * There are no intermediate states of a map going out or 1118 * coming in, since the map is locked during the transition. 1119 */ 1120 assert(map->sw_state == MAP_SW_IN); 1121 1122 if (!vm_map_swap_enable) 1123 return; 1124 1125 /* 1126 * We now operate upon each map entry. If the entry is a sub- 1127 * or share-map, we call vm_map_res_deallocate upon it. 1128 * If the entry is an object, we call vm_object_res_deallocate 1129 * (this may iterate through the shadow chain). 1130 * Note that we hold the map locked the entire time, 1131 * even if we get back here via a recursive call in 1132 * vm_map_res_deallocate. 1133 */ 1134 entry = vm_map_first_entry(map); 1135 1136 while (entry != vm_map_to_entry(map)) { 1137 if (entry->object.vm_object != VM_OBJECT_NULL) { 1138 if (entry->is_sub_map) { 1139 vm_map_t lmap = entry->object.sub_map; 1140 lck_mtx_lock(&lmap->s_lock); 1141 vm_map_res_deallocate(lmap); 1142 lck_mtx_unlock(&lmap->s_lock); 1143 } else { 1144 vm_object_t object = entry->object.vm_object; 1145 vm_object_lock(object); 1146 /* 1147 * This call may take a long time, 1148 * since it could actively push 1149 * out pages (if we implement it 1150 * that way). 1151 */ 1152 vm_object_res_deallocate(object); 1153 vm_object_unlock(object); 1154 } 1155 } 1156 entry = entry->vme_next; 1157 } 1158 assert(map->sw_state == MAP_SW_IN); 1159 map->sw_state = MAP_SW_OUT; 1160} 1161 1162#endif /* TASK_SWAPPER */ 1163 1164/* 1165 * vm_map_lookup_entry: [ internal use only ] 1166 * 1167 * Calls into the vm map store layer to find the map 1168 * entry containing (or immediately preceding) the 1169 * specified address in the given map; the entry is returned 1170 * in the "entry" parameter. The boolean 1171 * result indicates whether the address is 1172 * actually contained in the map. 1173 */ 1174boolean_t 1175vm_map_lookup_entry( 1176 register vm_map_t map, 1177 register vm_map_offset_t address, 1178 vm_map_entry_t *entry) /* OUT */ 1179{ 1180 return ( vm_map_store_lookup_entry( map, address, entry )); 1181} 1182 1183/* 1184 * Routine: vm_map_find_space 1185 * Purpose: 1186 * Allocate a range in the specified virtual address map, 1187 * returning the entry allocated for that range. 1188 * Used by kmem_alloc, etc. 1189 * 1190 * The map must be NOT be locked. It will be returned locked 1191 * on KERN_SUCCESS, unlocked on failure. 1192 * 1193 * If an entry is allocated, the object/offset fields 1194 * are initialized to zero. 1195 */ 1196kern_return_t 1197vm_map_find_space( 1198 register vm_map_t map, 1199 vm_map_offset_t *address, /* OUT */ 1200 vm_map_size_t size, 1201 vm_map_offset_t mask, 1202 int flags, 1203 vm_map_entry_t *o_entry) /* OUT */ 1204{ 1205 register vm_map_entry_t entry, new_entry; 1206 register vm_map_offset_t start; 1207 register vm_map_offset_t end; 1208 1209 if (size == 0) { 1210 *address = 0; 1211 return KERN_INVALID_ARGUMENT; 1212 } 1213 1214 if (flags & VM_FLAGS_GUARD_AFTER) { 1215 /* account for the back guard page in the size */ 1216 size += PAGE_SIZE_64; 1217 } 1218 1219 new_entry = vm_map_entry_create(map, FALSE); 1220 1221 /* 1222 * Look for the first possible address; if there's already 1223 * something at this address, we have to start after it. 1224 */ 1225 1226 vm_map_lock(map); 1227 1228 if( map->disable_vmentry_reuse == TRUE) { 1229 VM_MAP_HIGHEST_ENTRY(map, entry, start); 1230 } else { 1231 assert(first_free_is_valid(map)); 1232 if ((entry = map->first_free) == vm_map_to_entry(map)) 1233 start = map->min_offset; 1234 else 1235 start = entry->vme_end; 1236 } 1237 1238 /* 1239 * In any case, the "entry" always precedes 1240 * the proposed new region throughout the loop: 1241 */ 1242 1243 while (TRUE) { 1244 register vm_map_entry_t next; 1245 1246 /* 1247 * Find the end of the proposed new region. 1248 * Be sure we didn't go beyond the end, or 1249 * wrap around the address. 1250 */ 1251 1252 if (flags & VM_FLAGS_GUARD_BEFORE) { 1253 /* reserve space for the front guard page */ 1254 start += PAGE_SIZE_64; 1255 } 1256 end = ((start + mask) & ~mask); 1257 1258 if (end < start) { 1259 vm_map_entry_dispose(map, new_entry); 1260 vm_map_unlock(map); 1261 return(KERN_NO_SPACE); 1262 } 1263 start = end; 1264 end += size; 1265 1266 if ((end > map->max_offset) || (end < start)) { 1267 vm_map_entry_dispose(map, new_entry); 1268 vm_map_unlock(map); 1269 return(KERN_NO_SPACE); 1270 } 1271 1272 /* 1273 * If there are no more entries, we must win. 1274 */ 1275 1276 next = entry->vme_next; 1277 if (next == vm_map_to_entry(map)) 1278 break; 1279 1280 /* 1281 * If there is another entry, it must be 1282 * after the end of the potential new region. 1283 */ 1284 1285 if (next->vme_start >= end) 1286 break; 1287 1288 /* 1289 * Didn't fit -- move to the next entry. 1290 */ 1291 1292 entry = next; 1293 start = entry->vme_end; 1294 } 1295 1296 /* 1297 * At this point, 1298 * "start" and "end" should define the endpoints of the 1299 * available new range, and 1300 * "entry" should refer to the region before the new 1301 * range, and 1302 * 1303 * the map should be locked. 1304 */ 1305 1306 if (flags & VM_FLAGS_GUARD_BEFORE) { 1307 /* go back for the front guard page */ 1308 start -= PAGE_SIZE_64; 1309 } 1310 *address = start; 1311 1312 assert(start < end); 1313 new_entry->vme_start = start; 1314 new_entry->vme_end = end; 1315 assert(page_aligned(new_entry->vme_start)); 1316 assert(page_aligned(new_entry->vme_end)); 1317 1318 new_entry->is_shared = FALSE; 1319 new_entry->is_sub_map = FALSE; 1320 new_entry->use_pmap = FALSE; 1321 new_entry->object.vm_object = VM_OBJECT_NULL; 1322 new_entry->offset = (vm_object_offset_t) 0; 1323 1324 new_entry->needs_copy = FALSE; 1325 1326 new_entry->inheritance = VM_INHERIT_DEFAULT; 1327 new_entry->protection = VM_PROT_DEFAULT; 1328 new_entry->max_protection = VM_PROT_ALL; 1329 new_entry->behavior = VM_BEHAVIOR_DEFAULT; 1330 new_entry->wired_count = 0; 1331 new_entry->user_wired_count = 0; 1332 1333 new_entry->in_transition = FALSE; 1334 new_entry->needs_wakeup = FALSE; 1335 new_entry->no_cache = FALSE; 1336 new_entry->permanent = FALSE; 1337 new_entry->superpage_size = 0; 1338 1339 new_entry->used_for_jit = 0; 1340 1341 new_entry->alias = 0; 1342 new_entry->zero_wired_pages = FALSE; 1343 1344 VM_GET_FLAGS_ALIAS(flags, new_entry->alias); 1345 1346 /* 1347 * Insert the new entry into the list 1348 */ 1349 1350 vm_map_store_entry_link(map, entry, new_entry); 1351 1352 map->size += size; 1353 1354 /* 1355 * Update the lookup hint 1356 */ 1357 SAVE_HINT_MAP_WRITE(map, new_entry); 1358 1359 *o_entry = new_entry; 1360 return(KERN_SUCCESS); 1361} 1362 1363int vm_map_pmap_enter_print = TRUE; 1364int vm_map_pmap_enter_enable = TRUE; 1365 1366/* 1367 * Routine: vm_map_pmap_enter [internal only] 1368 * 1369 * Description: 1370 * Force pages from the specified object to be entered into 1371 * the pmap at the specified address if they are present. 1372 * As soon as a page not found in the object the scan ends. 1373 * 1374 * Returns: 1375 * Nothing. 1376 * 1377 * In/out conditions: 1378 * The source map should not be locked on entry. 1379 */ 1380static void 1381vm_map_pmap_enter( 1382 vm_map_t map, 1383 register vm_map_offset_t addr, 1384 register vm_map_offset_t end_addr, 1385 register vm_object_t object, 1386 vm_object_offset_t offset, 1387 vm_prot_t protection) 1388{ 1389 int type_of_fault; 1390 kern_return_t kr; 1391 1392 if(map->pmap == 0) 1393 return; 1394 1395 while (addr < end_addr) { 1396 register vm_page_t m; 1397 1398 vm_object_lock(object); 1399 1400 m = vm_page_lookup(object, offset); 1401 /* 1402 * ENCRYPTED SWAP: 1403 * The user should never see encrypted data, so do not 1404 * enter an encrypted page in the page table. 1405 */ 1406 if (m == VM_PAGE_NULL || m->busy || m->encrypted || 1407 m->fictitious || 1408 (m->unusual && ( m->error || m->restart || m->absent))) { 1409 vm_object_unlock(object); 1410 return; 1411 } 1412 1413 { 1414 kprintf("vm_map_pmap_enter:"); 1415 kprintf("map: %p, addr: %llx, object: %p, offset: %llx\n", 1416 map, (unsigned long long)addr, object, (unsigned long long)offset); 1417 } 1418 type_of_fault = DBG_CACHE_HIT_FAULT; 1419 kr = vm_fault_enter(m, map->pmap, addr, protection, protection, 1420 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL, 1421 &type_of_fault); 1422 1423 vm_object_unlock(object); 1424 1425 offset += PAGE_SIZE_64; 1426 addr += PAGE_SIZE; 1427 } 1428} 1429 1430boolean_t vm_map_pmap_is_empty( 1431 vm_map_t map, 1432 vm_map_offset_t start, 1433 vm_map_offset_t end); 1434boolean_t vm_map_pmap_is_empty( 1435 vm_map_t map, 1436 vm_map_offset_t start, 1437 vm_map_offset_t end) 1438{ 1439#ifdef MACHINE_PMAP_IS_EMPTY 1440 return pmap_is_empty(map->pmap, start, end); 1441#else /* MACHINE_PMAP_IS_EMPTY */ 1442 vm_map_offset_t offset; 1443 ppnum_t phys_page; 1444 1445 if (map->pmap == NULL) { 1446 return TRUE; 1447 } 1448 1449 for (offset = start; 1450 offset < end; 1451 offset += PAGE_SIZE) { 1452 phys_page = pmap_find_phys(map->pmap, offset); 1453 if (phys_page) { 1454 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): " 1455 "page %d at 0x%llx\n", 1456 map, (long long)start, (long long)end, 1457 phys_page, (long long)offset); 1458 return FALSE; 1459 } 1460 } 1461 return TRUE; 1462#endif /* MACHINE_PMAP_IS_EMPTY */ 1463} 1464 1465#define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000 1466kern_return_t 1467vm_map_random_address_for_size( 1468 vm_map_t map, 1469 vm_map_offset_t *address, 1470 vm_map_size_t size) 1471{ 1472 kern_return_t kr = KERN_SUCCESS; 1473 int tries = 0; 1474 vm_map_offset_t random_addr = 0; 1475 vm_map_offset_t hole_end; 1476 1477 vm_map_entry_t next_entry = VM_MAP_ENTRY_NULL; 1478 vm_map_entry_t prev_entry = VM_MAP_ENTRY_NULL; 1479 vm_map_size_t vm_hole_size = 0; 1480 vm_map_size_t addr_space_size; 1481 1482 addr_space_size = vm_map_max(map) - vm_map_min(map); 1483 1484 assert(page_aligned(size)); 1485 1486 while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) { 1487 random_addr = ((vm_map_offset_t)random()) << PAGE_SHIFT; 1488 random_addr = trunc_page(vm_map_min(map) + 1489 (random_addr % addr_space_size)); 1490 1491 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) { 1492 if (prev_entry == vm_map_to_entry(map)) { 1493 next_entry = vm_map_first_entry(map); 1494 } else { 1495 next_entry = prev_entry->vme_next; 1496 } 1497 if (next_entry == vm_map_to_entry(map)) { 1498 hole_end = vm_map_max(map); 1499 } else { 1500 hole_end = next_entry->vme_start; 1501 } 1502 vm_hole_size = hole_end - random_addr; 1503 if (vm_hole_size >= size) { 1504 *address = random_addr; 1505 break; 1506 } 1507 } 1508 tries++; 1509 } 1510 1511 if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) { 1512 kr = KERN_NO_SPACE; 1513 } 1514 return kr; 1515} 1516 1517/* 1518 * Routine: vm_map_enter 1519 * 1520 * Description: 1521 * Allocate a range in the specified virtual address map. 1522 * The resulting range will refer to memory defined by 1523 * the given memory object and offset into that object. 1524 * 1525 * Arguments are as defined in the vm_map call. 1526 */ 1527int _map_enter_debug = 0; 1528static unsigned int vm_map_enter_restore_successes = 0; 1529static unsigned int vm_map_enter_restore_failures = 0; 1530kern_return_t 1531vm_map_enter( 1532 vm_map_t map, 1533 vm_map_offset_t *address, /* IN/OUT */ 1534 vm_map_size_t size, 1535 vm_map_offset_t mask, 1536 int flags, 1537 vm_object_t object, 1538 vm_object_offset_t offset, 1539 boolean_t needs_copy, 1540 vm_prot_t cur_protection, 1541 vm_prot_t max_protection, 1542 vm_inherit_t inheritance) 1543{ 1544 vm_map_entry_t entry, new_entry; 1545 vm_map_offset_t start, tmp_start, tmp_offset; 1546 vm_map_offset_t end, tmp_end; 1547 vm_map_offset_t tmp2_start, tmp2_end; 1548 vm_map_offset_t step; 1549 kern_return_t result = KERN_SUCCESS; 1550 vm_map_t zap_old_map = VM_MAP_NULL; 1551 vm_map_t zap_new_map = VM_MAP_NULL; 1552 boolean_t map_locked = FALSE; 1553 boolean_t pmap_empty = TRUE; 1554 boolean_t new_mapping_established = FALSE; 1555 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0); 1556 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0); 1557 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0); 1558 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0); 1559 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0); 1560 boolean_t permanent = ((flags & VM_FLAGS_PERMANENT) != 0); 1561 boolean_t entry_for_jit = ((flags & VM_FLAGS_MAP_JIT) != 0); 1562 unsigned int superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT); 1563 char alias; 1564 vm_map_offset_t effective_min_offset, effective_max_offset; 1565 kern_return_t kr; 1566 1567#if 0 1568 kprintf("vm_map_enter: pmap -> <0x%08x>, 0x%08x (%08x, %08x)\n", map, map->pmap, *address, size); 1569#endif 1570 1571 if (superpage_size) { 1572 switch (superpage_size) { 1573 /* 1574 * Note that the current implementation only supports 1575 * a single size for superpages, SUPERPAGE_SIZE, per 1576 * architecture. As soon as more sizes are supposed 1577 * to be supported, sSUPERPAGE_SIZE has to be replaced 1578 * with a lookup of the size depending on superpage_size. 1579 */ 1580#ifdef __x86_64__ 1581 case SUPERPAGE_SIZE_ANY: 1582 /* handle it like 2 MB and round up to page size */ 1583 size = (size + 2*1024*1024 - 1) & ~(2*1024*1024 - 1); 1584 case SUPERPAGE_SIZE_2MB: 1585 break; 1586#endif 1587 default: 1588 return KERN_INVALID_ARGUMENT; 1589 } 1590 mask = SUPERPAGE_SIZE-1; 1591 if (size & (SUPERPAGE_SIZE-1)) 1592 return KERN_INVALID_ARGUMENT; 1593 inheritance = VM_INHERIT_NONE; /* fork() children won't inherit superpages */ 1594 } 1595 1596 1597#if CONFIG_EMBEDDED 1598 if (cur_protection & VM_PROT_WRITE){ 1599 if ((cur_protection & VM_PROT_EXECUTE) && !entry_for_jit){ 1600 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); 1601 cur_protection &= ~VM_PROT_EXECUTE; 1602 } 1603 } 1604#endif /* CONFIG_EMBEDDED */ 1605 1606 if (is_submap) { 1607 if (purgable) { 1608 /* submaps can not be purgeable */ 1609 return KERN_INVALID_ARGUMENT; 1610 } 1611 if (object == VM_OBJECT_NULL) { 1612 /* submaps can not be created lazily */ 1613 return KERN_INVALID_ARGUMENT; 1614 } 1615 } 1616 if (flags & VM_FLAGS_ALREADY) { 1617 /* 1618 * VM_FLAGS_ALREADY says that it's OK if the same mapping 1619 * is already present. For it to be meaningul, the requested 1620 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and 1621 * we shouldn't try and remove what was mapped there first 1622 * (!VM_FLAGS_OVERWRITE). 1623 */ 1624 if ((flags & VM_FLAGS_ANYWHERE) || 1625 (flags & VM_FLAGS_OVERWRITE)) { 1626 return KERN_INVALID_ARGUMENT; 1627 } 1628 } 1629 1630 effective_min_offset = map->min_offset; 1631 1632 if (flags & VM_FLAGS_BEYOND_MAX) { 1633 /* 1634 * Allow an insertion beyond the map's max offset. 1635 */ 1636 if (vm_map_is_64bit(map)) 1637 effective_max_offset = 0xFFFFFFFFFFFFF000ULL; 1638 else 1639 effective_max_offset = 0x00000000FFFFF000ULL; 1640 } else { 1641 effective_max_offset = map->max_offset; 1642 } 1643 1644 if (size == 0 || 1645 (offset & PAGE_MASK_64) != 0) { 1646 *address = 0; 1647 return KERN_INVALID_ARGUMENT; 1648 } 1649 1650 VM_GET_FLAGS_ALIAS(flags, alias); 1651 1652#define RETURN(value) { result = value; goto BailOut; } 1653 1654 assert(page_aligned(*address)); 1655 assert(page_aligned(size)); 1656 1657 /* 1658 * Only zero-fill objects are allowed to be purgable. 1659 * LP64todo - limit purgable objects to 32-bits for now 1660 */ 1661 if (purgable && 1662 (offset != 0 || 1663 (object != VM_OBJECT_NULL && 1664 (object->vo_size != size || 1665 object->purgable == VM_PURGABLE_DENY)) 1666 || size > ANON_MAX_SIZE)) /* LP64todo: remove when dp capable */ 1667 return KERN_INVALID_ARGUMENT; 1668 1669 if (!anywhere && overwrite) { 1670 /* 1671 * Create a temporary VM map to hold the old mappings in the 1672 * affected area while we create the new one. 1673 * This avoids releasing the VM map lock in 1674 * vm_map_entry_delete() and allows atomicity 1675 * when we want to replace some mappings with a new one. 1676 * It also allows us to restore the old VM mappings if the 1677 * new mapping fails. 1678 */ 1679 zap_old_map = vm_map_create(PMAP_NULL, 1680 *address, 1681 *address + size, 1682 map->hdr.entries_pageable); 1683 } 1684 1685StartAgain: ; 1686 1687 start = *address; 1688 1689 if (anywhere) { 1690 vm_map_lock(map); 1691 map_locked = TRUE; 1692 1693 if (entry_for_jit) { 1694 if (map->jit_entry_exists) { 1695 result = KERN_INVALID_ARGUMENT; 1696 goto BailOut; 1697 } 1698 /* 1699 * Get a random start address. 1700 */ 1701 result = vm_map_random_address_for_size(map, address, size); 1702 if (result != KERN_SUCCESS) { 1703 goto BailOut; 1704 } 1705 start = *address; 1706 } 1707 1708 1709 /* 1710 * Calculate the first possible address. 1711 */ 1712 if (start < effective_min_offset) 1713 start = effective_min_offset; 1714 if (start > effective_max_offset) 1715 RETURN(KERN_NO_SPACE); 1716 1717 /* 1718 * Look for the first possible address; 1719 * if there's already something at this 1720 * address, we have to start after it. 1721 */ 1722 1723 if( map->disable_vmentry_reuse == TRUE) { 1724 VM_MAP_HIGHEST_ENTRY(map, entry, start); 1725 } else { 1726 assert(first_free_is_valid(map)); 1727 1728 entry = map->first_free; 1729 1730 if (entry == vm_map_to_entry(map)) { 1731 entry = NULL; 1732 } else { 1733 if (entry->vme_next == vm_map_to_entry(map)){ 1734 /* 1735 * Hole at the end of the map. 1736 */ 1737 entry = NULL; 1738 } else { 1739 if (start < (entry->vme_next)->vme_start ) { 1740 start = entry->vme_end; 1741 } else { 1742 /* 1743 * Need to do a lookup. 1744 */ 1745 entry = NULL; 1746 } 1747 } 1748 } 1749 1750 if (entry == NULL) { 1751 vm_map_entry_t tmp_entry; 1752 if (vm_map_lookup_entry(map, start, &tmp_entry)) { 1753 assert(!entry_for_jit); 1754 start = tmp_entry->vme_end; 1755 } 1756 entry = tmp_entry; 1757 } 1758 } 1759 1760 /* 1761 * In any case, the "entry" always precedes 1762 * the proposed new region throughout the 1763 * loop: 1764 */ 1765 1766 while (TRUE) { 1767 register vm_map_entry_t next; 1768 1769 /* 1770 * Find the end of the proposed new region. 1771 * Be sure we didn't go beyond the end, or 1772 * wrap around the address. 1773 */ 1774 1775 end = ((start + mask) & ~mask); 1776 if (end < start) 1777 RETURN(KERN_NO_SPACE); 1778 start = end; 1779 end += size; 1780 1781 1782 1783 if ((end > effective_max_offset) || (end < start)) { 1784 if (map->wait_for_space) { 1785 if (size <= (effective_max_offset - 1786 effective_min_offset)) { 1787 assert_wait((event_t)map, 1788 THREAD_ABORTSAFE); 1789 vm_map_unlock(map); 1790 map_locked = FALSE; 1791 thread_block(THREAD_CONTINUE_NULL); 1792 goto StartAgain; 1793 } 1794 } 1795 RETURN(KERN_NO_SPACE); 1796 1797 } 1798 1799 /* 1800 * If there are no more entries, we must win. 1801 */ 1802 1803 next = entry->vme_next; 1804 if (next == vm_map_to_entry(map)) 1805 break; 1806 1807 /* 1808 * If there is another entry, it must be 1809 * after the end of the potential new region. 1810 */ 1811 1812 if (next->vme_start >= end) 1813 break; 1814 1815 /* 1816 * Didn't fit -- move to the next entry. 1817 */ 1818 1819 entry = next; 1820 start = entry->vme_end; 1821 } 1822 *address = start; 1823 } else { 1824 /* 1825 * Verify that: 1826 * the address doesn't itself violate 1827 * the mask requirement. 1828 */ 1829 1830 vm_map_lock(map); 1831 map_locked = TRUE; 1832 if ((start & mask) != 0) 1833 RETURN(KERN_NO_SPACE); 1834 1835 /* 1836 * ... the address is within bounds 1837 */ 1838 1839 end = start + size; 1840 1841 if ((start < effective_min_offset) || 1842 (end > effective_max_offset) || 1843 (start >= end)) { 1844 RETURN(KERN_INVALID_ADDRESS); 1845 } 1846 1847 if (overwrite && zap_old_map != VM_MAP_NULL) { 1848 /* 1849 * Fixed mapping and "overwrite" flag: attempt to 1850 * remove all existing mappings in the specified 1851 * address range, saving them in our "zap_old_map". 1852 */ 1853 (void) vm_map_delete(map, start, end, 1854 VM_MAP_REMOVE_SAVE_ENTRIES, 1855 zap_old_map); 1856 } 1857 1858 /* 1859 * ... the starting address isn't allocated 1860 */ 1861 1862 if (vm_map_lookup_entry(map, start, &entry)) { 1863 if (! (flags & VM_FLAGS_ALREADY)) { 1864 RETURN(KERN_NO_SPACE); 1865 } 1866 /* 1867 * Check if what's already there is what we want. 1868 */ 1869 tmp_start = start; 1870 tmp_offset = offset; 1871 if (entry->vme_start < start) { 1872 tmp_start -= start - entry->vme_start; 1873 tmp_offset -= start - entry->vme_start; 1874 1875 } 1876 for (; entry->vme_start < end; 1877 entry = entry->vme_next) { 1878 /* 1879 * Check if the mapping's attributes 1880 * match the existing map entry. 1881 */ 1882 if (entry == vm_map_to_entry(map) || 1883 entry->vme_start != tmp_start || 1884 entry->is_sub_map != is_submap || 1885 entry->offset != tmp_offset || 1886 entry->needs_copy != needs_copy || 1887 entry->protection != cur_protection || 1888 entry->max_protection != max_protection || 1889 entry->inheritance != inheritance || 1890 entry->alias != alias) { 1891 /* not the same mapping ! */ 1892 RETURN(KERN_NO_SPACE); 1893 } 1894 /* 1895 * Check if the same object is being mapped. 1896 */ 1897 if (is_submap) { 1898 if (entry->object.sub_map != 1899 (vm_map_t) object) { 1900 /* not the same submap */ 1901 RETURN(KERN_NO_SPACE); 1902 } 1903 } else { 1904 if (entry->object.vm_object != object) { 1905 /* not the same VM object... */ 1906 vm_object_t obj2; 1907 1908 obj2 = entry->object.vm_object; 1909 if ((obj2 == VM_OBJECT_NULL || 1910 obj2->internal) && 1911 (object == VM_OBJECT_NULL || 1912 object->internal)) { 1913 /* 1914 * ... but both are 1915 * anonymous memory, 1916 * so equivalent. 1917 */ 1918 } else { 1919 RETURN(KERN_NO_SPACE); 1920 } 1921 } 1922 } 1923 1924 tmp_offset += entry->vme_end - entry->vme_start; 1925 tmp_start += entry->vme_end - entry->vme_start; 1926 if (entry->vme_end >= end) { 1927 /* reached the end of our mapping */ 1928 break; 1929 } 1930 } 1931 /* it all matches: let's use what's already there ! */ 1932 RETURN(KERN_MEMORY_PRESENT); 1933 } 1934 1935 /* 1936 * ... the next region doesn't overlap the 1937 * end point. 1938 */ 1939 1940 if ((entry->vme_next != vm_map_to_entry(map)) && 1941 (entry->vme_next->vme_start < end)) 1942 RETURN(KERN_NO_SPACE); 1943 } 1944 1945 /* 1946 * At this point, 1947 * "start" and "end" should define the endpoints of the 1948 * available new range, and 1949 * "entry" should refer to the region before the new 1950 * range, and 1951 * 1952 * the map should be locked. 1953 */ 1954 1955 /* 1956 * See whether we can avoid creating a new entry (and object) by 1957 * extending one of our neighbors. [So far, we only attempt to 1958 * extend from below.] Note that we can never extend/join 1959 * purgable objects because they need to remain distinct 1960 * entities in order to implement their "volatile object" 1961 * semantics. 1962 */ 1963 1964 if (purgable || entry_for_jit) { 1965 if (object == VM_OBJECT_NULL) { 1966 object = vm_object_allocate(size); 1967 object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 1968 if (purgable) { 1969 object->purgable = VM_PURGABLE_NONVOLATILE; 1970 } 1971 offset = (vm_object_offset_t)0; 1972 } 1973 } else if ((is_submap == FALSE) && 1974 (object == VM_OBJECT_NULL) && 1975 (entry != vm_map_to_entry(map)) && 1976 (entry->vme_end == start) && 1977 (!entry->is_shared) && 1978 (!entry->is_sub_map) && 1979 ((alias == VM_MEMORY_REALLOC) || (entry->alias == alias)) && 1980 (entry->inheritance == inheritance) && 1981 (entry->protection == cur_protection) && 1982 (entry->max_protection == max_protection) && 1983 (entry->behavior == VM_BEHAVIOR_DEFAULT) && 1984 (entry->in_transition == 0) && 1985 (entry->no_cache == no_cache) && 1986 ((entry->vme_end - entry->vme_start) + size <= 1987 (alias == VM_MEMORY_REALLOC ? 1988 ANON_CHUNK_SIZE : 1989 NO_COALESCE_LIMIT)) && 1990 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */ 1991 if (vm_object_coalesce(entry->object.vm_object, 1992 VM_OBJECT_NULL, 1993 entry->offset, 1994 (vm_object_offset_t) 0, 1995 (vm_map_size_t)(entry->vme_end - entry->vme_start), 1996 (vm_map_size_t)(end - entry->vme_end))) { 1997 /* 1998 * Coalesced the two objects - can extend 1999 * the previous map entry to include the 2000 * new range. 2001 */ 2002 map->size += (end - entry->vme_end); 2003 assert(entry->vme_start < end); 2004 entry->vme_end = end; 2005 vm_map_store_update_first_free(map, map->first_free); 2006 RETURN(KERN_SUCCESS); 2007 } 2008 } 2009 2010 step = superpage_size ? SUPERPAGE_SIZE : (end - start); 2011 new_entry = NULL; 2012 2013 for (tmp2_start = start; tmp2_start<end; tmp2_start += step) { 2014 tmp2_end = tmp2_start + step; 2015 /* 2016 * Create a new entry 2017 * LP64todo - for now, we can only allocate 4GB internal objects 2018 * because the default pager can't page bigger ones. Remove this 2019 * when it can. 2020 * 2021 * XXX FBDP 2022 * The reserved "page zero" in each process's address space can 2023 * be arbitrarily large. Splitting it into separate 4GB objects and 2024 * therefore different VM map entries serves no purpose and just 2025 * slows down operations on the VM map, so let's not split the 2026 * allocation into 4GB chunks if the max protection is NONE. That 2027 * memory should never be accessible, so it will never get to the 2028 * default pager. 2029 */ 2030 tmp_start = tmp2_start; 2031 if (object == VM_OBJECT_NULL && 2032 size > (vm_map_size_t)ANON_CHUNK_SIZE && 2033 max_protection != VM_PROT_NONE && 2034 superpage_size == 0) 2035 tmp_end = tmp_start + (vm_map_size_t)ANON_CHUNK_SIZE; 2036 else 2037 tmp_end = tmp2_end; 2038 do { 2039 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end, 2040 object, offset, needs_copy, 2041 FALSE, FALSE, 2042 cur_protection, max_protection, 2043 VM_BEHAVIOR_DEFAULT, 2044 (entry_for_jit)? VM_INHERIT_NONE: inheritance, 2045 0, no_cache, 2046 permanent, superpage_size); 2047 new_entry->alias = alias; 2048 if (entry_for_jit){ 2049 if (!(map->jit_entry_exists)){ 2050 new_entry->used_for_jit = TRUE; 2051 map->jit_entry_exists = TRUE; 2052 } 2053 } 2054 2055 if (is_submap) { 2056 vm_map_t submap; 2057 boolean_t submap_is_64bit; 2058 boolean_t use_pmap; 2059 2060 new_entry->is_sub_map = TRUE; 2061 submap = (vm_map_t) object; 2062 submap_is_64bit = vm_map_is_64bit(submap); 2063 use_pmap = (alias == VM_MEMORY_SHARED_PMAP); 2064 2065 #ifndef NO_NESTED_PMAP 2066 if (use_pmap && submap->pmap == NULL) { 2067 ledger_t ledger = map->pmap->ledger; 2068 /* we need a sub pmap to nest... */ 2069 submap->pmap = pmap_create(ledger, 0, 2070 submap_is_64bit); 2071 if (submap->pmap == NULL) { 2072 /* let's proceed without nesting... */ 2073 } 2074 } 2075 if (use_pmap && submap->pmap != NULL) { 2076 kr = pmap_nest(map->pmap, 2077 submap->pmap, 2078 tmp_start, 2079 tmp_start, 2080 tmp_end - tmp_start); 2081 if (kr != KERN_SUCCESS) { 2082 printf("vm_map_enter: " 2083 "pmap_nest(0x%llx,0x%llx) " 2084 "error 0x%x\n", 2085 (long long)tmp_start, 2086 (long long)tmp_end, 2087 kr); 2088 } else { 2089 /* we're now nested ! */ 2090 new_entry->use_pmap = TRUE; 2091 pmap_empty = FALSE; 2092 } 2093 } 2094 #endif /* NO_NESTED_PMAP */ 2095 } 2096 entry = new_entry; 2097 2098 if (superpage_size) { 2099 vm_page_t pages, m; 2100 vm_object_t sp_object; 2101 2102 entry->offset = 0; 2103 2104 /* allocate one superpage */ 2105 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES-1, TRUE, 0); 2106 if (kr != KERN_SUCCESS) { 2107 new_mapping_established = TRUE; /* will cause deallocation of whole range */ 2108 RETURN(kr); 2109 } 2110 2111 /* create one vm_object per superpage */ 2112 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start)); 2113 sp_object->phys_contiguous = TRUE; 2114 sp_object->vo_shadow_offset = (vm_object_offset_t)pages->phys_page*PAGE_SIZE; 2115 entry->object.vm_object = sp_object; 2116 2117 /* enter the base pages into the object */ 2118 vm_object_lock(sp_object); 2119 for (offset = 0; offset < SUPERPAGE_SIZE; offset += PAGE_SIZE) { 2120 m = pages; 2121 pmap_zero_page(m->phys_page); 2122 pages = NEXT_PAGE(m); 2123 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; 2124 vm_page_insert(m, sp_object, offset); 2125 } 2126 vm_object_unlock(sp_object); 2127 } 2128 } while (tmp_end != tmp2_end && 2129 (tmp_start = tmp_end) && 2130 (tmp_end = (tmp2_end - tmp_end > (vm_map_size_t)ANON_CHUNK_SIZE) ? 2131 tmp_end + (vm_map_size_t)ANON_CHUNK_SIZE : tmp2_end)); 2132 } 2133 2134 vm_map_unlock(map); 2135 map_locked = FALSE; 2136 new_mapping_established = TRUE; 2137 /* Wire down the new entry if the user 2138 * requested all new map entries be wired. 2139 */ 2140 if ((map->wiring_required)||(superpage_size)) { 2141 pmap_empty = FALSE; /* pmap won't be empty */ 2142 kr = vm_map_wire(map, start, end, 2143 new_entry->protection, TRUE); 2144 RETURN(kr); 2145 } 2146 2147 if ((object != VM_OBJECT_NULL) && 2148 (vm_map_pmap_enter_enable) && 2149 (!anywhere) && 2150 (!needs_copy) && 2151 (size < (128*1024))) { 2152 pmap_empty = FALSE; /* pmap won't be empty */ 2153 2154 if (override_nx(map, alias) && cur_protection) 2155 cur_protection |= VM_PROT_EXECUTE; 2156 2157 vm_map_pmap_enter(map, start, end, 2158 object, offset, cur_protection); 2159 } 2160 2161BailOut: ; 2162 if (result == KERN_SUCCESS) { 2163 vm_prot_t pager_prot; 2164 memory_object_t pager; 2165 2166 if (pmap_empty && 2167 !(flags & VM_FLAGS_NO_PMAP_CHECK)) { 2168 assert(vm_map_pmap_is_empty(map, 2169 *address, 2170 *address+size)); 2171 } 2172 2173 /* 2174 * For "named" VM objects, let the pager know that the 2175 * memory object is being mapped. Some pagers need to keep 2176 * track of this, to know when they can reclaim the memory 2177 * object, for example. 2178 * VM calls memory_object_map() for each mapping (specifying 2179 * the protection of each mapping) and calls 2180 * memory_object_last_unmap() when all the mappings are gone. 2181 */ 2182 pager_prot = max_protection; 2183 if (needs_copy) { 2184 /* 2185 * Copy-On-Write mapping: won't modify 2186 * the memory object. 2187 */ 2188 pager_prot &= ~VM_PROT_WRITE; 2189 } 2190 if (!is_submap && 2191 object != VM_OBJECT_NULL && 2192 object->named && 2193 object->pager != MEMORY_OBJECT_NULL) { 2194 vm_object_lock(object); 2195 pager = object->pager; 2196 if (object->named && 2197 pager != MEMORY_OBJECT_NULL) { 2198 assert(object->pager_ready); 2199 vm_object_mapping_wait(object, THREAD_UNINT); 2200 vm_object_mapping_begin(object); 2201 vm_object_unlock(object); 2202 2203 kr = memory_object_map(pager, pager_prot); 2204 assert(kr == KERN_SUCCESS); 2205 2206 vm_object_lock(object); 2207 vm_object_mapping_end(object); 2208 } 2209 vm_object_unlock(object); 2210 } 2211 } else { 2212 if (new_mapping_established) { 2213 /* 2214 * We have to get rid of the new mappings since we 2215 * won't make them available to the user. 2216 * Try and do that atomically, to minimize the risk 2217 * that someone else create new mappings that range. 2218 */ 2219 zap_new_map = vm_map_create(PMAP_NULL, 2220 *address, 2221 *address + size, 2222 map->hdr.entries_pageable); 2223 if (!map_locked) { 2224 vm_map_lock(map); 2225 map_locked = TRUE; 2226 } 2227 (void) vm_map_delete(map, *address, *address+size, 2228 VM_MAP_REMOVE_SAVE_ENTRIES, 2229 zap_new_map); 2230 } 2231 if (zap_old_map != VM_MAP_NULL && 2232 zap_old_map->hdr.nentries != 0) { 2233 vm_map_entry_t entry1, entry2; 2234 2235 /* 2236 * The new mapping failed. Attempt to restore 2237 * the old mappings, saved in the "zap_old_map". 2238 */ 2239 if (!map_locked) { 2240 vm_map_lock(map); 2241 map_locked = TRUE; 2242 } 2243 2244 /* first check if the coast is still clear */ 2245 start = vm_map_first_entry(zap_old_map)->vme_start; 2246 end = vm_map_last_entry(zap_old_map)->vme_end; 2247 if (vm_map_lookup_entry(map, start, &entry1) || 2248 vm_map_lookup_entry(map, end, &entry2) || 2249 entry1 != entry2) { 2250 /* 2251 * Part of that range has already been 2252 * re-mapped: we can't restore the old 2253 * mappings... 2254 */ 2255 vm_map_enter_restore_failures++; 2256 } else { 2257 /* 2258 * Transfer the saved map entries from 2259 * "zap_old_map" to the original "map", 2260 * inserting them all after "entry1". 2261 */ 2262 for (entry2 = vm_map_first_entry(zap_old_map); 2263 entry2 != vm_map_to_entry(zap_old_map); 2264 entry2 = vm_map_first_entry(zap_old_map)) { 2265 vm_map_size_t entry_size; 2266 2267 entry_size = (entry2->vme_end - 2268 entry2->vme_start); 2269 vm_map_store_entry_unlink(zap_old_map, 2270 entry2); 2271 zap_old_map->size -= entry_size; 2272 vm_map_store_entry_link(map, entry1, entry2); 2273 map->size += entry_size; 2274 entry1 = entry2; 2275 } 2276 if (map->wiring_required) { 2277 /* 2278 * XXX TODO: we should rewire the 2279 * old pages here... 2280 */ 2281 } 2282 vm_map_enter_restore_successes++; 2283 } 2284 } 2285 } 2286 2287 if (map_locked) { 2288 vm_map_unlock(map); 2289 } 2290 2291 /* 2292 * Get rid of the "zap_maps" and all the map entries that 2293 * they may still contain. 2294 */ 2295 if (zap_old_map != VM_MAP_NULL) { 2296 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); 2297 zap_old_map = VM_MAP_NULL; 2298 } 2299 if (zap_new_map != VM_MAP_NULL) { 2300 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); 2301 zap_new_map = VM_MAP_NULL; 2302 } 2303 2304 return result; 2305 2306#undef RETURN 2307} 2308 2309kern_return_t 2310vm_map_enter_mem_object( 2311 vm_map_t target_map, 2312 vm_map_offset_t *address, 2313 vm_map_size_t initial_size, 2314 vm_map_offset_t mask, 2315 int flags, 2316 ipc_port_t port, 2317 vm_object_offset_t offset, 2318 boolean_t copy, 2319 vm_prot_t cur_protection, 2320 vm_prot_t max_protection, 2321 vm_inherit_t inheritance) 2322{ 2323 vm_map_address_t map_addr; 2324 vm_map_size_t map_size; 2325 vm_object_t object; 2326 vm_object_size_t size; 2327 kern_return_t result; 2328 boolean_t mask_cur_protection, mask_max_protection; 2329 2330 mask_cur_protection = cur_protection & VM_PROT_IS_MASK; 2331 mask_max_protection = max_protection & VM_PROT_IS_MASK; 2332 cur_protection &= ~VM_PROT_IS_MASK; 2333 max_protection &= ~VM_PROT_IS_MASK; 2334 2335 /* 2336 * Check arguments for validity 2337 */ 2338 if ((target_map == VM_MAP_NULL) || 2339 (cur_protection & ~VM_PROT_ALL) || 2340 (max_protection & ~VM_PROT_ALL) || 2341 (inheritance > VM_INHERIT_LAST_VALID) || 2342 initial_size == 0) 2343 return KERN_INVALID_ARGUMENT; 2344 2345 map_addr = vm_map_trunc_page(*address); 2346 map_size = vm_map_round_page(initial_size); 2347 size = vm_object_round_page(initial_size); 2348 2349 /* 2350 * Find the vm object (if any) corresponding to this port. 2351 */ 2352 if (!IP_VALID(port)) { 2353 object = VM_OBJECT_NULL; 2354 offset = 0; 2355 copy = FALSE; 2356 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) { 2357 vm_named_entry_t named_entry; 2358 2359 named_entry = (vm_named_entry_t) port->ip_kobject; 2360 /* a few checks to make sure user is obeying rules */ 2361 if (size == 0) { 2362 if (offset >= named_entry->size) 2363 return KERN_INVALID_RIGHT; 2364 size = named_entry->size - offset; 2365 } 2366 if (mask_max_protection) { 2367 max_protection &= named_entry->protection; 2368 } 2369 if (mask_cur_protection) { 2370 cur_protection &= named_entry->protection; 2371 } 2372 if ((named_entry->protection & max_protection) != 2373 max_protection) 2374 return KERN_INVALID_RIGHT; 2375 if ((named_entry->protection & cur_protection) != 2376 cur_protection) 2377 return KERN_INVALID_RIGHT; 2378 if (named_entry->size < (offset + size)) 2379 return KERN_INVALID_ARGUMENT; 2380 2381 /* the callers parameter offset is defined to be the */ 2382 /* offset from beginning of named entry offset in object */ 2383 offset = offset + named_entry->offset; 2384 2385 named_entry_lock(named_entry); 2386 if (named_entry->is_sub_map) { 2387 vm_map_t submap; 2388 2389 submap = named_entry->backing.map; 2390 vm_map_lock(submap); 2391 vm_map_reference(submap); 2392 vm_map_unlock(submap); 2393 named_entry_unlock(named_entry); 2394 2395 result = vm_map_enter(target_map, 2396 &map_addr, 2397 map_size, 2398 mask, 2399 flags | VM_FLAGS_SUBMAP, 2400 (vm_object_t) submap, 2401 offset, 2402 copy, 2403 cur_protection, 2404 max_protection, 2405 inheritance); 2406 if (result != KERN_SUCCESS) { 2407 vm_map_deallocate(submap); 2408 } else { 2409 /* 2410 * No need to lock "submap" just to check its 2411 * "mapped" flag: that flag is never reset 2412 * once it's been set and if we race, we'll 2413 * just end up setting it twice, which is OK. 2414 */ 2415 if (submap->mapped_in_other_pmaps == FALSE && 2416 vm_map_pmap(submap) != PMAP_NULL && 2417 vm_map_pmap(submap) != 2418 vm_map_pmap(target_map)) { 2419 /* 2420 * This submap is being mapped in a map 2421 * that uses a different pmap. 2422 * Set its "mapped_in_other_pmaps" flag 2423 * to indicate that we now need to 2424 * remove mappings from all pmaps rather 2425 * than just the submap's pmap. 2426 */ 2427 vm_map_lock(submap); 2428 submap->mapped_in_other_pmaps = TRUE; 2429 vm_map_unlock(submap); 2430 } 2431 *address = map_addr; 2432 } 2433 return result; 2434 2435 } else if (named_entry->is_pager) { 2436 unsigned int access; 2437 vm_prot_t protections; 2438 unsigned int wimg_mode; 2439 2440 protections = named_entry->protection & VM_PROT_ALL; 2441 access = GET_MAP_MEM(named_entry->protection); 2442 2443 object = vm_object_enter(named_entry->backing.pager, 2444 named_entry->size, 2445 named_entry->internal, 2446 FALSE, 2447 FALSE); 2448 if (object == VM_OBJECT_NULL) { 2449 named_entry_unlock(named_entry); 2450 return KERN_INVALID_OBJECT; 2451 } 2452 2453 /* JMM - drop reference on pager here */ 2454 2455 /* create an extra ref for the named entry */ 2456 vm_object_lock(object); 2457 vm_object_reference_locked(object); 2458 named_entry->backing.object = object; 2459 named_entry->is_pager = FALSE; 2460 named_entry_unlock(named_entry); 2461 2462 wimg_mode = object->wimg_bits; 2463 2464 if (access == MAP_MEM_IO) { 2465 wimg_mode = VM_WIMG_IO; 2466 } else if (access == MAP_MEM_COPYBACK) { 2467 wimg_mode = VM_WIMG_USE_DEFAULT; 2468 } else if (access == MAP_MEM_INNERWBACK) { 2469 wimg_mode = VM_WIMG_INNERWBACK; 2470 } else if (access == MAP_MEM_WTHRU) { 2471 wimg_mode = VM_WIMG_WTHRU; 2472 } else if (access == MAP_MEM_WCOMB) { 2473 wimg_mode = VM_WIMG_WCOMB; 2474 } 2475 2476 /* wait for object (if any) to be ready */ 2477 if (!named_entry->internal) { 2478 while (!object->pager_ready) { 2479 vm_object_wait( 2480 object, 2481 VM_OBJECT_EVENT_PAGER_READY, 2482 THREAD_UNINT); 2483 vm_object_lock(object); 2484 } 2485 } 2486 2487 if (object->wimg_bits != wimg_mode) 2488 vm_object_change_wimg_mode(object, wimg_mode); 2489 2490 object->true_share = TRUE; 2491 2492 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) 2493 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; 2494 vm_object_unlock(object); 2495 } else { 2496 /* This is the case where we are going to map */ 2497 /* an already mapped object. If the object is */ 2498 /* not ready it is internal. An external */ 2499 /* object cannot be mapped until it is ready */ 2500 /* we can therefore avoid the ready check */ 2501 /* in this case. */ 2502 object = named_entry->backing.object; 2503 assert(object != VM_OBJECT_NULL); 2504 named_entry_unlock(named_entry); 2505 vm_object_reference(object); 2506 } 2507 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) { 2508 /* 2509 * JMM - This is temporary until we unify named entries 2510 * and raw memory objects. 2511 * 2512 * Detected fake ip_kotype for a memory object. In 2513 * this case, the port isn't really a port at all, but 2514 * instead is just a raw memory object. 2515 */ 2516 2517 object = vm_object_enter((memory_object_t)port, 2518 size, FALSE, FALSE, FALSE); 2519 if (object == VM_OBJECT_NULL) 2520 return KERN_INVALID_OBJECT; 2521 2522 /* wait for object (if any) to be ready */ 2523 if (object != VM_OBJECT_NULL) { 2524 if (object == kernel_object) { 2525 printf("Warning: Attempt to map kernel object" 2526 " by a non-private kernel entity\n"); 2527 return KERN_INVALID_OBJECT; 2528 } 2529 if (!object->pager_ready) { 2530 vm_object_lock(object); 2531 2532 while (!object->pager_ready) { 2533 vm_object_wait(object, 2534 VM_OBJECT_EVENT_PAGER_READY, 2535 THREAD_UNINT); 2536 vm_object_lock(object); 2537 } 2538 vm_object_unlock(object); 2539 } 2540 } 2541 } else { 2542 return KERN_INVALID_OBJECT; 2543 } 2544 2545 if (object != VM_OBJECT_NULL && 2546 object->named && 2547 object->pager != MEMORY_OBJECT_NULL && 2548 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { 2549 memory_object_t pager; 2550 vm_prot_t pager_prot; 2551 kern_return_t kr; 2552 2553 /* 2554 * For "named" VM objects, let the pager know that the 2555 * memory object is being mapped. Some pagers need to keep 2556 * track of this, to know when they can reclaim the memory 2557 * object, for example. 2558 * VM calls memory_object_map() for each mapping (specifying 2559 * the protection of each mapping) and calls 2560 * memory_object_last_unmap() when all the mappings are gone. 2561 */ 2562 pager_prot = max_protection; 2563 if (copy) { 2564 /* 2565 * Copy-On-Write mapping: won't modify the 2566 * memory object. 2567 */ 2568 pager_prot &= ~VM_PROT_WRITE; 2569 } 2570 vm_object_lock(object); 2571 pager = object->pager; 2572 if (object->named && 2573 pager != MEMORY_OBJECT_NULL && 2574 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { 2575 assert(object->pager_ready); 2576 vm_object_mapping_wait(object, THREAD_UNINT); 2577 vm_object_mapping_begin(object); 2578 vm_object_unlock(object); 2579 2580 kr = memory_object_map(pager, pager_prot); 2581 assert(kr == KERN_SUCCESS); 2582 2583 vm_object_lock(object); 2584 vm_object_mapping_end(object); 2585 } 2586 vm_object_unlock(object); 2587 } 2588 2589 /* 2590 * Perform the copy if requested 2591 */ 2592 2593 if (copy) { 2594 vm_object_t new_object; 2595 vm_object_offset_t new_offset; 2596 2597 result = vm_object_copy_strategically(object, offset, size, 2598 &new_object, &new_offset, 2599 ©); 2600 2601 2602 if (result == KERN_MEMORY_RESTART_COPY) { 2603 boolean_t success; 2604 boolean_t src_needs_copy; 2605 2606 /* 2607 * XXX 2608 * We currently ignore src_needs_copy. 2609 * This really is the issue of how to make 2610 * MEMORY_OBJECT_COPY_SYMMETRIC safe for 2611 * non-kernel users to use. Solution forthcoming. 2612 * In the meantime, since we don't allow non-kernel 2613 * memory managers to specify symmetric copy, 2614 * we won't run into problems here. 2615 */ 2616 new_object = object; 2617 new_offset = offset; 2618 success = vm_object_copy_quickly(&new_object, 2619 new_offset, size, 2620 &src_needs_copy, 2621 ©); 2622 assert(success); 2623 result = KERN_SUCCESS; 2624 } 2625 /* 2626 * Throw away the reference to the 2627 * original object, as it won't be mapped. 2628 */ 2629 2630 vm_object_deallocate(object); 2631 2632 if (result != KERN_SUCCESS) 2633 return result; 2634 2635 object = new_object; 2636 offset = new_offset; 2637 } 2638 2639 result = vm_map_enter(target_map, 2640 &map_addr, map_size, 2641 (vm_map_offset_t)mask, 2642 flags, 2643 object, offset, 2644 copy, 2645 cur_protection, max_protection, inheritance); 2646 if (result != KERN_SUCCESS) 2647 vm_object_deallocate(object); 2648 *address = map_addr; 2649 return result; 2650} 2651 2652 2653 2654 2655kern_return_t 2656vm_map_enter_mem_object_control( 2657 vm_map_t target_map, 2658 vm_map_offset_t *address, 2659 vm_map_size_t initial_size, 2660 vm_map_offset_t mask, 2661 int flags, 2662 memory_object_control_t control, 2663 vm_object_offset_t offset, 2664 boolean_t copy, 2665 vm_prot_t cur_protection, 2666 vm_prot_t max_protection, 2667 vm_inherit_t inheritance) 2668{ 2669 vm_map_address_t map_addr; 2670 vm_map_size_t map_size; 2671 vm_object_t object; 2672 vm_object_size_t size; 2673 kern_return_t result; 2674 memory_object_t pager; 2675 vm_prot_t pager_prot; 2676 kern_return_t kr; 2677 2678 /* 2679 * Check arguments for validity 2680 */ 2681 if ((target_map == VM_MAP_NULL) || 2682 (cur_protection & ~VM_PROT_ALL) || 2683 (max_protection & ~VM_PROT_ALL) || 2684 (inheritance > VM_INHERIT_LAST_VALID) || 2685 initial_size == 0) 2686 return KERN_INVALID_ARGUMENT; 2687 2688 map_addr = vm_map_trunc_page(*address); 2689 map_size = vm_map_round_page(initial_size); 2690 size = vm_object_round_page(initial_size); 2691 2692 object = memory_object_control_to_vm_object(control); 2693 2694 if (object == VM_OBJECT_NULL) 2695 return KERN_INVALID_OBJECT; 2696 2697 if (object == kernel_object) { 2698 printf("Warning: Attempt to map kernel object" 2699 " by a non-private kernel entity\n"); 2700 return KERN_INVALID_OBJECT; 2701 } 2702 2703 vm_object_lock(object); 2704 object->ref_count++; 2705 vm_object_res_reference(object); 2706 2707 /* 2708 * For "named" VM objects, let the pager know that the 2709 * memory object is being mapped. Some pagers need to keep 2710 * track of this, to know when they can reclaim the memory 2711 * object, for example. 2712 * VM calls memory_object_map() for each mapping (specifying 2713 * the protection of each mapping) and calls 2714 * memory_object_last_unmap() when all the mappings are gone. 2715 */ 2716 pager_prot = max_protection; 2717 if (copy) { 2718 pager_prot &= ~VM_PROT_WRITE; 2719 } 2720 pager = object->pager; 2721 if (object->named && 2722 pager != MEMORY_OBJECT_NULL && 2723 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { 2724 assert(object->pager_ready); 2725 vm_object_mapping_wait(object, THREAD_UNINT); 2726 vm_object_mapping_begin(object); 2727 vm_object_unlock(object); 2728 2729 kr = memory_object_map(pager, pager_prot); 2730 assert(kr == KERN_SUCCESS); 2731 2732 vm_object_lock(object); 2733 vm_object_mapping_end(object); 2734 } 2735 vm_object_unlock(object); 2736 2737 /* 2738 * Perform the copy if requested 2739 */ 2740 2741 if (copy) { 2742 vm_object_t new_object; 2743 vm_object_offset_t new_offset; 2744 2745 result = vm_object_copy_strategically(object, offset, size, 2746 &new_object, &new_offset, 2747 ©); 2748 2749 2750 if (result == KERN_MEMORY_RESTART_COPY) { 2751 boolean_t success; 2752 boolean_t src_needs_copy; 2753 2754 /* 2755 * XXX 2756 * We currently ignore src_needs_copy. 2757 * This really is the issue of how to make 2758 * MEMORY_OBJECT_COPY_SYMMETRIC safe for 2759 * non-kernel users to use. Solution forthcoming. 2760 * In the meantime, since we don't allow non-kernel 2761 * memory managers to specify symmetric copy, 2762 * we won't run into problems here. 2763 */ 2764 new_object = object; 2765 new_offset = offset; 2766 success = vm_object_copy_quickly(&new_object, 2767 new_offset, size, 2768 &src_needs_copy, 2769 ©); 2770 assert(success); 2771 result = KERN_SUCCESS; 2772 } 2773 /* 2774 * Throw away the reference to the 2775 * original object, as it won't be mapped. 2776 */ 2777 2778 vm_object_deallocate(object); 2779 2780 if (result != KERN_SUCCESS) 2781 return result; 2782 2783 object = new_object; 2784 offset = new_offset; 2785 } 2786 2787 result = vm_map_enter(target_map, 2788 &map_addr, map_size, 2789 (vm_map_offset_t)mask, 2790 flags, 2791 object, offset, 2792 copy, 2793 cur_protection, max_protection, inheritance); 2794 if (result != KERN_SUCCESS) 2795 vm_object_deallocate(object); 2796 *address = map_addr; 2797 2798 return result; 2799} 2800 2801 2802#if VM_CPM 2803 2804#ifdef MACH_ASSERT 2805extern pmap_paddr_t avail_start, avail_end; 2806#endif 2807 2808/* 2809 * Allocate memory in the specified map, with the caveat that 2810 * the memory is physically contiguous. This call may fail 2811 * if the system can't find sufficient contiguous memory. 2812 * This call may cause or lead to heart-stopping amounts of 2813 * paging activity. 2814 * 2815 * Memory obtained from this call should be freed in the 2816 * normal way, viz., via vm_deallocate. 2817 */ 2818kern_return_t 2819vm_map_enter_cpm( 2820 vm_map_t map, 2821 vm_map_offset_t *addr, 2822 vm_map_size_t size, 2823 int flags) 2824{ 2825 vm_object_t cpm_obj; 2826 pmap_t pmap; 2827 vm_page_t m, pages; 2828 kern_return_t kr; 2829 vm_map_offset_t va, start, end, offset; 2830#if MACH_ASSERT 2831 vm_map_offset_t prev_addr = 0; 2832#endif /* MACH_ASSERT */ 2833 2834 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); 2835 2836 if (size == 0) { 2837 *addr = 0; 2838 return KERN_SUCCESS; 2839 } 2840 if (anywhere) 2841 *addr = vm_map_min(map); 2842 else 2843 *addr = vm_map_trunc_page(*addr); 2844 size = vm_map_round_page(size); 2845 2846 /* 2847 * LP64todo - cpm_allocate should probably allow 2848 * allocations of >4GB, but not with the current 2849 * algorithm, so just cast down the size for now. 2850 */ 2851 if (size > VM_MAX_ADDRESS) 2852 return KERN_RESOURCE_SHORTAGE; 2853 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size), 2854 &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) 2855 return kr; 2856 2857 cpm_obj = vm_object_allocate((vm_object_size_t)size); 2858 assert(cpm_obj != VM_OBJECT_NULL); 2859 assert(cpm_obj->internal); 2860 assert(cpm_obj->vo_size == (vm_object_size_t)size); 2861 assert(cpm_obj->can_persist == FALSE); 2862 assert(cpm_obj->pager_created == FALSE); 2863 assert(cpm_obj->pageout == FALSE); 2864 assert(cpm_obj->shadow == VM_OBJECT_NULL); 2865 2866 /* 2867 * Insert pages into object. 2868 */ 2869 2870 vm_object_lock(cpm_obj); 2871 for (offset = 0; offset < size; offset += PAGE_SIZE) { 2872 m = pages; 2873 pages = NEXT_PAGE(m); 2874 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; 2875 2876 assert(!m->gobbled); 2877 assert(!m->wanted); 2878 assert(!m->pageout); 2879 assert(!m->tabled); 2880 assert(VM_PAGE_WIRED(m)); 2881 /* 2882 * ENCRYPTED SWAP: 2883 * "m" is not supposed to be pageable, so it 2884 * should not be encrypted. It wouldn't be safe 2885 * to enter it in a new VM object while encrypted. 2886 */ 2887 ASSERT_PAGE_DECRYPTED(m); 2888 assert(m->busy); 2889 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT)); 2890 2891 m->busy = FALSE; 2892 vm_page_insert(m, cpm_obj, offset); 2893 } 2894 assert(cpm_obj->resident_page_count == size / PAGE_SIZE); 2895 vm_object_unlock(cpm_obj); 2896 2897 /* 2898 * Hang onto a reference on the object in case a 2899 * multi-threaded application for some reason decides 2900 * to deallocate the portion of the address space into 2901 * which we will insert this object. 2902 * 2903 * Unfortunately, we must insert the object now before 2904 * we can talk to the pmap module about which addresses 2905 * must be wired down. Hence, the race with a multi- 2906 * threaded app. 2907 */ 2908 vm_object_reference(cpm_obj); 2909 2910 /* 2911 * Insert object into map. 2912 */ 2913 2914 kr = vm_map_enter( 2915 map, 2916 addr, 2917 size, 2918 (vm_map_offset_t)0, 2919 flags, 2920 cpm_obj, 2921 (vm_object_offset_t)0, 2922 FALSE, 2923 VM_PROT_ALL, 2924 VM_PROT_ALL, 2925 VM_INHERIT_DEFAULT); 2926 2927 if (kr != KERN_SUCCESS) { 2928 /* 2929 * A CPM object doesn't have can_persist set, 2930 * so all we have to do is deallocate it to 2931 * free up these pages. 2932 */ 2933 assert(cpm_obj->pager_created == FALSE); 2934 assert(cpm_obj->can_persist == FALSE); 2935 assert(cpm_obj->pageout == FALSE); 2936 assert(cpm_obj->shadow == VM_OBJECT_NULL); 2937 vm_object_deallocate(cpm_obj); /* kill acquired ref */ 2938 vm_object_deallocate(cpm_obj); /* kill creation ref */ 2939 } 2940 2941 /* 2942 * Inform the physical mapping system that the 2943 * range of addresses may not fault, so that 2944 * page tables and such can be locked down as well. 2945 */ 2946 start = *addr; 2947 end = start + size; 2948 pmap = vm_map_pmap(map); 2949 pmap_pageable(pmap, start, end, FALSE); 2950 2951 /* 2952 * Enter each page into the pmap, to avoid faults. 2953 * Note that this loop could be coded more efficiently, 2954 * if the need arose, rather than looking up each page 2955 * again. 2956 */ 2957 for (offset = 0, va = start; offset < size; 2958 va += PAGE_SIZE, offset += PAGE_SIZE) { 2959 int type_of_fault; 2960 2961 vm_object_lock(cpm_obj); 2962 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); 2963 assert(m != VM_PAGE_NULL); 2964 2965 vm_page_zero_fill(m); 2966 2967 type_of_fault = DBG_ZERO_FILL_FAULT; 2968 2969 vm_fault_enter(m, pmap, va, VM_PROT_ALL, VM_PROT_WRITE, 2970 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL, 2971 &type_of_fault); 2972 2973 vm_object_unlock(cpm_obj); 2974 } 2975 2976#if MACH_ASSERT 2977 /* 2978 * Verify ordering in address space. 2979 */ 2980 for (offset = 0; offset < size; offset += PAGE_SIZE) { 2981 vm_object_lock(cpm_obj); 2982 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); 2983 vm_object_unlock(cpm_obj); 2984 if (m == VM_PAGE_NULL) 2985 panic("vm_allocate_cpm: obj %p off 0x%llx no page", 2986 cpm_obj, (uint64_t)offset); 2987 assert(m->tabled); 2988 assert(!m->busy); 2989 assert(!m->wanted); 2990 assert(!m->fictitious); 2991 assert(!m->private); 2992 assert(!m->absent); 2993 assert(!m->error); 2994 assert(!m->cleaning); 2995 assert(!m->laundry); 2996 assert(!m->precious); 2997 assert(!m->clustered); 2998 if (offset != 0) { 2999 if (m->phys_page != prev_addr + 1) { 3000 printf("start 0x%llx end 0x%llx va 0x%llx\n", 3001 (uint64_t)start, (uint64_t)end, (uint64_t)va); 3002 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset); 3003 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr); 3004 panic("vm_allocate_cpm: pages not contig!"); 3005 } 3006 } 3007 prev_addr = m->phys_page; 3008 } 3009#endif /* MACH_ASSERT */ 3010 3011 vm_object_deallocate(cpm_obj); /* kill extra ref */ 3012 3013 return kr; 3014} 3015 3016 3017#else /* VM_CPM */ 3018 3019/* 3020 * Interface is defined in all cases, but unless the kernel 3021 * is built explicitly for this option, the interface does 3022 * nothing. 3023 */ 3024 3025kern_return_t 3026vm_map_enter_cpm( 3027 __unused vm_map_t map, 3028 __unused vm_map_offset_t *addr, 3029 __unused vm_map_size_t size, 3030 __unused int flags) 3031{ 3032 return KERN_FAILURE; 3033} 3034#endif /* VM_CPM */ 3035 3036/* Not used without nested pmaps */ 3037#ifndef NO_NESTED_PMAP 3038/* 3039 * Clip and unnest a portion of a nested submap mapping. 3040 */ 3041 3042 3043static void 3044vm_map_clip_unnest( 3045 vm_map_t map, 3046 vm_map_entry_t entry, 3047 vm_map_offset_t start_unnest, 3048 vm_map_offset_t end_unnest) 3049{ 3050 vm_map_offset_t old_start_unnest = start_unnest; 3051 vm_map_offset_t old_end_unnest = end_unnest; 3052 3053 assert(entry->is_sub_map); 3054 assert(entry->object.sub_map != NULL); 3055 3056 /* 3057 * Query the platform for the optimal unnest range. 3058 * DRK: There's some duplication of effort here, since 3059 * callers may have adjusted the range to some extent. This 3060 * routine was introduced to support 1GiB subtree nesting 3061 * for x86 platforms, which can also nest on 2MiB boundaries 3062 * depending on size/alignment. 3063 */ 3064 if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) { 3065 log_unnest_badness(map, old_start_unnest, old_end_unnest); 3066 } 3067 3068 if (entry->vme_start > start_unnest || 3069 entry->vme_end < end_unnest) { 3070 panic("vm_map_clip_unnest(0x%llx,0x%llx): " 3071 "bad nested entry: start=0x%llx end=0x%llx\n", 3072 (long long)start_unnest, (long long)end_unnest, 3073 (long long)entry->vme_start, (long long)entry->vme_end); 3074 } 3075 3076 if (start_unnest > entry->vme_start) { 3077 _vm_map_clip_start(&map->hdr, 3078 entry, 3079 start_unnest); 3080 vm_map_store_update_first_free(map, map->first_free); 3081 } 3082 if (entry->vme_end > end_unnest) { 3083 _vm_map_clip_end(&map->hdr, 3084 entry, 3085 end_unnest); 3086 vm_map_store_update_first_free(map, map->first_free); 3087 } 3088 3089 pmap_unnest(map->pmap, 3090 entry->vme_start, 3091 entry->vme_end - entry->vme_start); 3092 if ((map->mapped_in_other_pmaps) && (map->ref_count)) { 3093 /* clean up parent map/maps */ 3094 vm_map_submap_pmap_clean( 3095 map, entry->vme_start, 3096 entry->vme_end, 3097 entry->object.sub_map, 3098 entry->offset); 3099 } 3100 entry->use_pmap = FALSE; 3101 if (entry->alias == VM_MEMORY_SHARED_PMAP) { 3102 entry->alias = VM_MEMORY_UNSHARED_PMAP; 3103 } 3104} 3105#endif /* NO_NESTED_PMAP */ 3106 3107/* 3108 * vm_map_clip_start: [ internal use only ] 3109 * 3110 * Asserts that the given entry begins at or after 3111 * the specified address; if necessary, 3112 * it splits the entry into two. 3113 */ 3114void 3115vm_map_clip_start( 3116 vm_map_t map, 3117 vm_map_entry_t entry, 3118 vm_map_offset_t startaddr) 3119{ 3120#ifndef NO_NESTED_PMAP 3121 if (entry->use_pmap && 3122 startaddr >= entry->vme_start) { 3123 vm_map_offset_t start_unnest, end_unnest; 3124 3125 /* 3126 * Make sure "startaddr" is no longer in a nested range 3127 * before we clip. Unnest only the minimum range the platform 3128 * can handle. 3129 * vm_map_clip_unnest may perform additional adjustments to 3130 * the unnest range. 3131 */ 3132 start_unnest = startaddr & ~(pmap_nesting_size_min - 1); 3133 end_unnest = start_unnest + pmap_nesting_size_min; 3134 vm_map_clip_unnest(map, entry, start_unnest, end_unnest); 3135 } 3136#endif /* NO_NESTED_PMAP */ 3137 if (startaddr > entry->vme_start) { 3138 if (entry->object.vm_object && 3139 !entry->is_sub_map && 3140 entry->object.vm_object->phys_contiguous) { 3141 pmap_remove(map->pmap, 3142 (addr64_t)(entry->vme_start), 3143 (addr64_t)(entry->vme_end)); 3144 } 3145 _vm_map_clip_start(&map->hdr, entry, startaddr); 3146 vm_map_store_update_first_free(map, map->first_free); 3147 } 3148} 3149 3150 3151#define vm_map_copy_clip_start(copy, entry, startaddr) \ 3152 MACRO_BEGIN \ 3153 if ((startaddr) > (entry)->vme_start) \ 3154 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \ 3155 MACRO_END 3156 3157/* 3158 * This routine is called only when it is known that 3159 * the entry must be split. 3160 */ 3161static void 3162_vm_map_clip_start( 3163 register struct vm_map_header *map_header, 3164 register vm_map_entry_t entry, 3165 register vm_map_offset_t start) 3166{ 3167 register vm_map_entry_t new_entry; 3168 3169 /* 3170 * Split off the front portion -- 3171 * note that we must insert the new 3172 * entry BEFORE this one, so that 3173 * this entry has the specified starting 3174 * address. 3175 */ 3176 3177 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); 3178 vm_map_entry_copy_full(new_entry, entry); 3179 3180 new_entry->vme_end = start; 3181 assert(new_entry->vme_start < new_entry->vme_end); 3182 entry->offset += (start - entry->vme_start); 3183 assert(start < entry->vme_end); 3184 entry->vme_start = start; 3185 3186 _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry); 3187 3188 if (entry->is_sub_map) 3189 vm_map_reference(new_entry->object.sub_map); 3190 else 3191 vm_object_reference(new_entry->object.vm_object); 3192} 3193 3194 3195/* 3196 * vm_map_clip_end: [ internal use only ] 3197 * 3198 * Asserts that the given entry ends at or before 3199 * the specified address; if necessary, 3200 * it splits the entry into two. 3201 */ 3202void 3203vm_map_clip_end( 3204 vm_map_t map, 3205 vm_map_entry_t entry, 3206 vm_map_offset_t endaddr) 3207{ 3208 if (endaddr > entry->vme_end) { 3209 /* 3210 * Within the scope of this clipping, limit "endaddr" to 3211 * the end of this map entry... 3212 */ 3213 endaddr = entry->vme_end; 3214 } 3215#ifndef NO_NESTED_PMAP 3216 if (entry->use_pmap) { 3217 vm_map_offset_t start_unnest, end_unnest; 3218 3219 /* 3220 * Make sure the range between the start of this entry and 3221 * the new "endaddr" is no longer nested before we clip. 3222 * Unnest only the minimum range the platform can handle. 3223 * vm_map_clip_unnest may perform additional adjustments to 3224 * the unnest range. 3225 */ 3226 start_unnest = entry->vme_start; 3227 end_unnest = 3228 (endaddr + pmap_nesting_size_min - 1) & 3229 ~(pmap_nesting_size_min - 1); 3230 vm_map_clip_unnest(map, entry, start_unnest, end_unnest); 3231 } 3232#endif /* NO_NESTED_PMAP */ 3233 if (endaddr < entry->vme_end) { 3234 if (entry->object.vm_object && 3235 !entry->is_sub_map && 3236 entry->object.vm_object->phys_contiguous) { 3237 pmap_remove(map->pmap, 3238 (addr64_t)(entry->vme_start), 3239 (addr64_t)(entry->vme_end)); 3240 } 3241 _vm_map_clip_end(&map->hdr, entry, endaddr); 3242 vm_map_store_update_first_free(map, map->first_free); 3243 } 3244} 3245 3246 3247#define vm_map_copy_clip_end(copy, entry, endaddr) \ 3248 MACRO_BEGIN \ 3249 if ((endaddr) < (entry)->vme_end) \ 3250 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \ 3251 MACRO_END 3252 3253/* 3254 * This routine is called only when it is known that 3255 * the entry must be split. 3256 */ 3257static void 3258_vm_map_clip_end( 3259 register struct vm_map_header *map_header, 3260 register vm_map_entry_t entry, 3261 register vm_map_offset_t end) 3262{ 3263 register vm_map_entry_t new_entry; 3264 3265 /* 3266 * Create a new entry and insert it 3267 * AFTER the specified entry 3268 */ 3269 3270 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); 3271 vm_map_entry_copy_full(new_entry, entry); 3272 3273 assert(entry->vme_start < end); 3274 new_entry->vme_start = entry->vme_end = end; 3275 new_entry->offset += (end - entry->vme_start); 3276 assert(new_entry->vme_start < new_entry->vme_end); 3277 3278 _vm_map_store_entry_link(map_header, entry, new_entry); 3279 3280 if (entry->is_sub_map) 3281 vm_map_reference(new_entry->object.sub_map); 3282 else 3283 vm_object_reference(new_entry->object.vm_object); 3284} 3285 3286 3287/* 3288 * VM_MAP_RANGE_CHECK: [ internal use only ] 3289 * 3290 * Asserts that the starting and ending region 3291 * addresses fall within the valid range of the map. 3292 */ 3293#define VM_MAP_RANGE_CHECK(map, start, end) \ 3294 MACRO_BEGIN \ 3295 if (start < vm_map_min(map)) \ 3296 start = vm_map_min(map); \ 3297 if (end > vm_map_max(map)) \ 3298 end = vm_map_max(map); \ 3299 if (start > end) \ 3300 start = end; \ 3301 MACRO_END 3302 3303/* 3304 * vm_map_range_check: [ internal use only ] 3305 * 3306 * Check that the region defined by the specified start and 3307 * end addresses are wholly contained within a single map 3308 * entry or set of adjacent map entries of the spacified map, 3309 * i.e. the specified region contains no unmapped space. 3310 * If any or all of the region is unmapped, FALSE is returned. 3311 * Otherwise, TRUE is returned and if the output argument 'entry' 3312 * is not NULL it points to the map entry containing the start 3313 * of the region. 3314 * 3315 * The map is locked for reading on entry and is left locked. 3316 */ 3317static boolean_t 3318vm_map_range_check( 3319 register vm_map_t map, 3320 register vm_map_offset_t start, 3321 register vm_map_offset_t end, 3322 vm_map_entry_t *entry) 3323{ 3324 vm_map_entry_t cur; 3325 register vm_map_offset_t prev; 3326 3327 /* 3328 * Basic sanity checks first 3329 */ 3330 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) 3331 return (FALSE); 3332 3333 /* 3334 * Check first if the region starts within a valid 3335 * mapping for the map. 3336 */ 3337 if (!vm_map_lookup_entry(map, start, &cur)) 3338 return (FALSE); 3339 3340 /* 3341 * Optimize for the case that the region is contained 3342 * in a single map entry. 3343 */ 3344 if (entry != (vm_map_entry_t *) NULL) 3345 *entry = cur; 3346 if (end <= cur->vme_end) 3347 return (TRUE); 3348 3349 /* 3350 * If the region is not wholly contained within a 3351 * single entry, walk the entries looking for holes. 3352 */ 3353 prev = cur->vme_end; 3354 cur = cur->vme_next; 3355 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) { 3356 if (end <= cur->vme_end) 3357 return (TRUE); 3358 prev = cur->vme_end; 3359 cur = cur->vme_next; 3360 } 3361 return (FALSE); 3362} 3363 3364/* 3365 * vm_map_submap: [ kernel use only ] 3366 * 3367 * Mark the given range as handled by a subordinate map. 3368 * 3369 * This range must have been created with vm_map_find using 3370 * the vm_submap_object, and no other operations may have been 3371 * performed on this range prior to calling vm_map_submap. 3372 * 3373 * Only a limited number of operations can be performed 3374 * within this rage after calling vm_map_submap: 3375 * vm_fault 3376 * [Don't try vm_map_copyin!] 3377 * 3378 * To remove a submapping, one must first remove the 3379 * range from the superior map, and then destroy the 3380 * submap (if desired). [Better yet, don't try it.] 3381 */ 3382kern_return_t 3383vm_map_submap( 3384 vm_map_t map, 3385 vm_map_offset_t start, 3386 vm_map_offset_t end, 3387 vm_map_t submap, 3388 vm_map_offset_t offset, 3389#ifdef NO_NESTED_PMAP 3390 __unused 3391#endif /* NO_NESTED_PMAP */ 3392 boolean_t use_pmap) 3393{ 3394 vm_map_entry_t entry; 3395 register kern_return_t result = KERN_INVALID_ARGUMENT; 3396 register vm_object_t object; 3397 3398 vm_map_lock(map); 3399 3400 if (! vm_map_lookup_entry(map, start, &entry)) { 3401 entry = entry->vme_next; 3402 } 3403 3404 if (entry == vm_map_to_entry(map) || 3405 entry->is_sub_map) { 3406 vm_map_unlock(map); 3407 return KERN_INVALID_ARGUMENT; 3408 } 3409 3410 assert(!entry->use_pmap); /* we don't want to unnest anything here */ 3411 vm_map_clip_start(map, entry, start); 3412 vm_map_clip_end(map, entry, end); 3413 3414 if ((entry->vme_start == start) && (entry->vme_end == end) && 3415 (!entry->is_sub_map) && 3416 ((object = entry->object.vm_object) == vm_submap_object) && 3417 (object->resident_page_count == 0) && 3418 (object->copy == VM_OBJECT_NULL) && 3419 (object->shadow == VM_OBJECT_NULL) && 3420 (!object->pager_created)) { 3421 entry->offset = (vm_object_offset_t)offset; 3422 entry->object.vm_object = VM_OBJECT_NULL; 3423 vm_object_deallocate(object); 3424 entry->is_sub_map = TRUE; 3425 entry->object.sub_map = submap; 3426 vm_map_reference(submap); 3427 if (submap->mapped_in_other_pmaps == FALSE && 3428 vm_map_pmap(submap) != PMAP_NULL && 3429 vm_map_pmap(submap) != vm_map_pmap(map)) { 3430 /* 3431 * This submap is being mapped in a map 3432 * that uses a different pmap. 3433 * Set its "mapped_in_other_pmaps" flag 3434 * to indicate that we now need to 3435 * remove mappings from all pmaps rather 3436 * than just the submap's pmap. 3437 */ 3438 submap->mapped_in_other_pmaps = TRUE; 3439 } 3440 3441#ifndef NO_NESTED_PMAP 3442 if (use_pmap) { 3443 /* nest if platform code will allow */ 3444 if(submap->pmap == NULL) { 3445 ledger_t ledger = map->pmap->ledger; 3446 submap->pmap = pmap_create(ledger, 3447 (vm_map_size_t) 0, FALSE); 3448 if(submap->pmap == PMAP_NULL) { 3449 vm_map_unlock(map); 3450 return(KERN_NO_SPACE); 3451 } 3452 } 3453 result = pmap_nest(map->pmap, 3454 (entry->object.sub_map)->pmap, 3455 (addr64_t)start, 3456 (addr64_t)start, 3457 (uint64_t)(end - start)); 3458 if(result) 3459 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result); 3460 entry->use_pmap = TRUE; 3461 } 3462#else /* NO_NESTED_PMAP */ 3463 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end); 3464#endif /* NO_NESTED_PMAP */ 3465 result = KERN_SUCCESS; 3466 } 3467 vm_map_unlock(map); 3468 3469 return(result); 3470} 3471 3472/* 3473 * vm_map_protect: 3474 * 3475 * Sets the protection of the specified address 3476 * region in the target map. If "set_max" is 3477 * specified, the maximum protection is to be set; 3478 * otherwise, only the current protection is affected. 3479 */ 3480kern_return_t 3481vm_map_protect( 3482 register vm_map_t map, 3483 register vm_map_offset_t start, 3484 register vm_map_offset_t end, 3485 register vm_prot_t new_prot, 3486 register boolean_t set_max) 3487{ 3488 register vm_map_entry_t current; 3489 register vm_map_offset_t prev; 3490 vm_map_entry_t entry; 3491 vm_prot_t new_max; 3492 3493 XPR(XPR_VM_MAP, 3494 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d", 3495 map, start, end, new_prot, set_max); 3496 3497 vm_map_lock(map); 3498 3499 /* LP64todo - remove this check when vm_map_commpage64() 3500 * no longer has to stuff in a map_entry for the commpage 3501 * above the map's max_offset. 3502 */ 3503 if (start >= map->max_offset) { 3504 vm_map_unlock(map); 3505 return(KERN_INVALID_ADDRESS); 3506 } 3507 3508 while(1) { 3509 /* 3510 * Lookup the entry. If it doesn't start in a valid 3511 * entry, return an error. 3512 */ 3513 if (! vm_map_lookup_entry(map, start, &entry)) { 3514 vm_map_unlock(map); 3515 return(KERN_INVALID_ADDRESS); 3516 } 3517 3518 if (entry->superpage_size && (start & (SUPERPAGE_SIZE-1))) { /* extend request to whole entry */ 3519 start = SUPERPAGE_ROUND_DOWN(start); 3520 continue; 3521 } 3522 break; 3523 } 3524 if (entry->superpage_size) 3525 end = SUPERPAGE_ROUND_UP(end); 3526 3527 /* 3528 * Make a first pass to check for protection and address 3529 * violations. 3530 */ 3531 3532 current = entry; 3533 prev = current->vme_start; 3534 while ((current != vm_map_to_entry(map)) && 3535 (current->vme_start < end)) { 3536 3537 /* 3538 * If there is a hole, return an error. 3539 */ 3540 if (current->vme_start != prev) { 3541 vm_map_unlock(map); 3542 return(KERN_INVALID_ADDRESS); 3543 } 3544 3545 new_max = current->max_protection; 3546 if(new_prot & VM_PROT_COPY) { 3547 new_max |= VM_PROT_WRITE; 3548 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) { 3549 vm_map_unlock(map); 3550 return(KERN_PROTECTION_FAILURE); 3551 } 3552 } else { 3553 if ((new_prot & new_max) != new_prot) { 3554 vm_map_unlock(map); 3555 return(KERN_PROTECTION_FAILURE); 3556 } 3557 } 3558 3559#if CONFIG_EMBEDDED 3560 if (new_prot & VM_PROT_WRITE) { 3561 if ((new_prot & VM_PROT_EXECUTE) && !(current->used_for_jit)) { 3562 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__); 3563 new_prot &= ~VM_PROT_EXECUTE; 3564 } 3565 } 3566#endif 3567 3568 prev = current->vme_end; 3569 current = current->vme_next; 3570 } 3571 if (end > prev) { 3572 vm_map_unlock(map); 3573 return(KERN_INVALID_ADDRESS); 3574 } 3575 3576 /* 3577 * Go back and fix up protections. 3578 * Clip to start here if the range starts within 3579 * the entry. 3580 */ 3581 3582 current = entry; 3583 if (current != vm_map_to_entry(map)) { 3584 /* clip and unnest if necessary */ 3585 vm_map_clip_start(map, current, start); 3586 } 3587 3588 while ((current != vm_map_to_entry(map)) && 3589 (current->vme_start < end)) { 3590 3591 vm_prot_t old_prot; 3592 3593 vm_map_clip_end(map, current, end); 3594 3595 assert(!current->use_pmap); /* clipping did unnest if needed */ 3596 3597 old_prot = current->protection; 3598 3599 if(new_prot & VM_PROT_COPY) { 3600 /* caller is asking specifically to copy the */ 3601 /* mapped data, this implies that max protection */ 3602 /* will include write. Caller must be prepared */ 3603 /* for loss of shared memory communication in the */ 3604 /* target area after taking this step */ 3605 3606 if (current->is_sub_map == FALSE && current->object.vm_object == VM_OBJECT_NULL){ 3607 current->object.vm_object = vm_object_allocate((vm_map_size_t)(current->vme_end - current->vme_start)); 3608 current->offset = 0; 3609 } 3610 current->needs_copy = TRUE; 3611 current->max_protection |= VM_PROT_WRITE; 3612 } 3613 3614 if (set_max) 3615 current->protection = 3616 (current->max_protection = 3617 new_prot & ~VM_PROT_COPY) & 3618 old_prot; 3619 else 3620 current->protection = new_prot & ~VM_PROT_COPY; 3621 3622 /* 3623 * Update physical map if necessary. 3624 * If the request is to turn off write protection, 3625 * we won't do it for real (in pmap). This is because 3626 * it would cause copy-on-write to fail. We've already 3627 * set, the new protection in the map, so if a 3628 * write-protect fault occurred, it will be fixed up 3629 * properly, COW or not. 3630 */ 3631 if (current->protection != old_prot) { 3632 /* Look one level in we support nested pmaps */ 3633 /* from mapped submaps which are direct entries */ 3634 /* in our map */ 3635 3636 vm_prot_t prot; 3637 3638 prot = current->protection & ~VM_PROT_WRITE; 3639 3640 if (override_nx(map, current->alias) && prot) 3641 prot |= VM_PROT_EXECUTE; 3642 3643 if (current->is_sub_map && current->use_pmap) { 3644 pmap_protect(current->object.sub_map->pmap, 3645 current->vme_start, 3646 current->vme_end, 3647 prot); 3648 } else { 3649 pmap_protect(map->pmap, 3650 current->vme_start, 3651 current->vme_end, 3652 prot); 3653 } 3654 } 3655 current = current->vme_next; 3656 } 3657 3658 current = entry; 3659 while ((current != vm_map_to_entry(map)) && 3660 (current->vme_start <= end)) { 3661 vm_map_simplify_entry(map, current); 3662 current = current->vme_next; 3663 } 3664 3665 vm_map_unlock(map); 3666 return(KERN_SUCCESS); 3667} 3668 3669/* 3670 * vm_map_inherit: 3671 * 3672 * Sets the inheritance of the specified address 3673 * range in the target map. Inheritance 3674 * affects how the map will be shared with 3675 * child maps at the time of vm_map_fork. 3676 */ 3677kern_return_t 3678vm_map_inherit( 3679 register vm_map_t map, 3680 register vm_map_offset_t start, 3681 register vm_map_offset_t end, 3682 register vm_inherit_t new_inheritance) 3683{ 3684 register vm_map_entry_t entry; 3685 vm_map_entry_t temp_entry; 3686 3687 vm_map_lock(map); 3688 3689 VM_MAP_RANGE_CHECK(map, start, end); 3690 3691 if (vm_map_lookup_entry(map, start, &temp_entry)) { 3692 entry = temp_entry; 3693 } 3694 else { 3695 temp_entry = temp_entry->vme_next; 3696 entry = temp_entry; 3697 } 3698 3699 /* first check entire range for submaps which can't support the */ 3700 /* given inheritance. */ 3701 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 3702 if(entry->is_sub_map) { 3703 if(new_inheritance == VM_INHERIT_COPY) { 3704 vm_map_unlock(map); 3705 return(KERN_INVALID_ARGUMENT); 3706 } 3707 } 3708 3709 entry = entry->vme_next; 3710 } 3711 3712 entry = temp_entry; 3713 if (entry != vm_map_to_entry(map)) { 3714 /* clip and unnest if necessary */ 3715 vm_map_clip_start(map, entry, start); 3716 } 3717 3718 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 3719 vm_map_clip_end(map, entry, end); 3720 assert(!entry->use_pmap); /* clip did unnest if needed */ 3721 3722 entry->inheritance = new_inheritance; 3723 3724 entry = entry->vme_next; 3725 } 3726 3727 vm_map_unlock(map); 3728 return(KERN_SUCCESS); 3729} 3730 3731/* 3732 * Update the accounting for the amount of wired memory in this map. If the user has 3733 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails. 3734 */ 3735 3736static kern_return_t 3737add_wire_counts( 3738 vm_map_t map, 3739 vm_map_entry_t entry, 3740 boolean_t user_wire) 3741{ 3742 vm_map_size_t size; 3743 3744 if (user_wire) { 3745 unsigned int total_wire_count = vm_page_wire_count + vm_lopage_free_count; 3746 3747 /* 3748 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring 3749 * this map entry. 3750 */ 3751 3752 if (entry->user_wired_count == 0) { 3753 size = entry->vme_end - entry->vme_start; 3754 3755 /* 3756 * Since this is the first time the user is wiring this map entry, check to see if we're 3757 * exceeding the user wire limits. There is a per map limit which is the smaller of either 3758 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also 3759 * a system-wide limit on the amount of memory all users can wire. If the user is over either 3760 * limit, then we fail. 3761 */ 3762 3763 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) || 3764 size + ptoa_64(total_wire_count) > vm_global_user_wire_limit || 3765 size + ptoa_64(total_wire_count) > max_mem - vm_global_no_user_wire_amount) 3766 return KERN_RESOURCE_SHORTAGE; 3767 3768 /* 3769 * The first time the user wires an entry, we also increment the wired_count and add this to 3770 * the total that has been wired in the map. 3771 */ 3772 3773 if (entry->wired_count >= MAX_WIRE_COUNT) 3774 return KERN_FAILURE; 3775 3776 entry->wired_count++; 3777 map->user_wire_size += size; 3778 } 3779 3780 if (entry->user_wired_count >= MAX_WIRE_COUNT) 3781 return KERN_FAILURE; 3782 3783 entry->user_wired_count++; 3784 3785 } else { 3786 3787 /* 3788 * The kernel's wiring the memory. Just bump the count and continue. 3789 */ 3790 3791 if (entry->wired_count >= MAX_WIRE_COUNT) 3792 panic("vm_map_wire: too many wirings"); 3793 3794 entry->wired_count++; 3795 } 3796 3797 return KERN_SUCCESS; 3798} 3799 3800/* 3801 * Update the memory wiring accounting now that the given map entry is being unwired. 3802 */ 3803 3804static void 3805subtract_wire_counts( 3806 vm_map_t map, 3807 vm_map_entry_t entry, 3808 boolean_t user_wire) 3809{ 3810 3811 if (user_wire) { 3812 3813 /* 3814 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference. 3815 */ 3816 3817 if (entry->user_wired_count == 1) { 3818 3819 /* 3820 * We're removing the last user wire reference. Decrement the wired_count and the total 3821 * user wired memory for this map. 3822 */ 3823 3824 assert(entry->wired_count >= 1); 3825 entry->wired_count--; 3826 map->user_wire_size -= entry->vme_end - entry->vme_start; 3827 } 3828 3829 assert(entry->user_wired_count >= 1); 3830 entry->user_wired_count--; 3831 3832 } else { 3833 3834 /* 3835 * The kernel is unwiring the memory. Just update the count. 3836 */ 3837 3838 assert(entry->wired_count >= 1); 3839 entry->wired_count--; 3840 } 3841} 3842 3843/* 3844 * vm_map_wire: 3845 * 3846 * Sets the pageability of the specified address range in the 3847 * target map as wired. Regions specified as not pageable require 3848 * locked-down physical memory and physical page maps. The 3849 * access_type variable indicates types of accesses that must not 3850 * generate page faults. This is checked against protection of 3851 * memory being locked-down. 3852 * 3853 * The map must not be locked, but a reference must remain to the 3854 * map throughout the call. 3855 */ 3856static kern_return_t 3857vm_map_wire_nested( 3858 register vm_map_t map, 3859 register vm_map_offset_t start, 3860 register vm_map_offset_t end, 3861 register vm_prot_t access_type, 3862 boolean_t user_wire, 3863 pmap_t map_pmap, 3864 vm_map_offset_t pmap_addr) 3865{ 3866 register vm_map_entry_t entry; 3867 struct vm_map_entry *first_entry, tmp_entry; 3868 vm_map_t real_map; 3869 register vm_map_offset_t s,e; 3870 kern_return_t rc; 3871 boolean_t need_wakeup; 3872 boolean_t main_map = FALSE; 3873 wait_interrupt_t interruptible_state; 3874 thread_t cur_thread; 3875 unsigned int last_timestamp; 3876 vm_map_size_t size; 3877 3878 vm_map_lock(map); 3879 if(map_pmap == NULL) 3880 main_map = TRUE; 3881 last_timestamp = map->timestamp; 3882 3883 VM_MAP_RANGE_CHECK(map, start, end); 3884 assert(page_aligned(start)); 3885 assert(page_aligned(end)); 3886 if (start == end) { 3887 /* We wired what the caller asked for, zero pages */ 3888 vm_map_unlock(map); 3889 return KERN_SUCCESS; 3890 } 3891 3892 need_wakeup = FALSE; 3893 cur_thread = current_thread(); 3894 3895 s = start; 3896 rc = KERN_SUCCESS; 3897 3898 if (vm_map_lookup_entry(map, s, &first_entry)) { 3899 entry = first_entry; 3900 /* 3901 * vm_map_clip_start will be done later. 3902 * We don't want to unnest any nested submaps here ! 3903 */ 3904 } else { 3905 /* Start address is not in map */ 3906 rc = KERN_INVALID_ADDRESS; 3907 goto done; 3908 } 3909 3910 while ((entry != vm_map_to_entry(map)) && (s < end)) { 3911 /* 3912 * At this point, we have wired from "start" to "s". 3913 * We still need to wire from "s" to "end". 3914 * 3915 * "entry" hasn't been clipped, so it could start before "s" 3916 * and/or end after "end". 3917 */ 3918 3919 /* "e" is how far we want to wire in this entry */ 3920 e = entry->vme_end; 3921 if (e > end) 3922 e = end; 3923 3924 /* 3925 * If another thread is wiring/unwiring this entry then 3926 * block after informing other thread to wake us up. 3927 */ 3928 if (entry->in_transition) { 3929 wait_result_t wait_result; 3930 3931 /* 3932 * We have not clipped the entry. Make sure that 3933 * the start address is in range so that the lookup 3934 * below will succeed. 3935 * "s" is the current starting point: we've already 3936 * wired from "start" to "s" and we still have 3937 * to wire from "s" to "end". 3938 */ 3939 3940 entry->needs_wakeup = TRUE; 3941 3942 /* 3943 * wake up anybody waiting on entries that we have 3944 * already wired. 3945 */ 3946 if (need_wakeup) { 3947 vm_map_entry_wakeup(map); 3948 need_wakeup = FALSE; 3949 } 3950 /* 3951 * User wiring is interruptible 3952 */ 3953 wait_result = vm_map_entry_wait(map, 3954 (user_wire) ? THREAD_ABORTSAFE : 3955 THREAD_UNINT); 3956 if (user_wire && wait_result == THREAD_INTERRUPTED) { 3957 /* 3958 * undo the wirings we have done so far 3959 * We do not clear the needs_wakeup flag, 3960 * because we cannot tell if we were the 3961 * only one waiting. 3962 */ 3963 rc = KERN_FAILURE; 3964 goto done; 3965 } 3966 3967 /* 3968 * Cannot avoid a lookup here. reset timestamp. 3969 */ 3970 last_timestamp = map->timestamp; 3971 3972 /* 3973 * The entry could have been clipped, look it up again. 3974 * Worse that can happen is, it may not exist anymore. 3975 */ 3976 if (!vm_map_lookup_entry(map, s, &first_entry)) { 3977 /* 3978 * User: undo everything upto the previous 3979 * entry. let vm_map_unwire worry about 3980 * checking the validity of the range. 3981 */ 3982 rc = KERN_FAILURE; 3983 goto done; 3984 } 3985 entry = first_entry; 3986 continue; 3987 } 3988 3989 if (entry->is_sub_map) { 3990 vm_map_offset_t sub_start; 3991 vm_map_offset_t sub_end; 3992 vm_map_offset_t local_start; 3993 vm_map_offset_t local_end; 3994 pmap_t pmap; 3995 3996 vm_map_clip_start(map, entry, s); 3997 vm_map_clip_end(map, entry, end); 3998 3999 sub_start = entry->offset; 4000 sub_end = entry->vme_end; 4001 sub_end += entry->offset - entry->vme_start; 4002 4003 local_end = entry->vme_end; 4004 if(map_pmap == NULL) { 4005 vm_object_t object; 4006 vm_object_offset_t offset; 4007 vm_prot_t prot; 4008 boolean_t wired; 4009 vm_map_entry_t local_entry; 4010 vm_map_version_t version; 4011 vm_map_t lookup_map; 4012 4013 if(entry->use_pmap) { 4014 pmap = entry->object.sub_map->pmap; 4015 /* ppc implementation requires that */ 4016 /* submaps pmap address ranges line */ 4017 /* up with parent map */ 4018#ifdef notdef 4019 pmap_addr = sub_start; 4020#endif 4021 pmap_addr = s; 4022 } else { 4023 pmap = map->pmap; 4024 pmap_addr = s; 4025 } 4026 4027 if (entry->wired_count) { 4028 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 4029 goto done; 4030 4031 /* 4032 * The map was not unlocked: 4033 * no need to goto re-lookup. 4034 * Just go directly to next entry. 4035 */ 4036 entry = entry->vme_next; 4037 s = entry->vme_start; 4038 continue; 4039 4040 } 4041 4042 /* call vm_map_lookup_locked to */ 4043 /* cause any needs copy to be */ 4044 /* evaluated */ 4045 local_start = entry->vme_start; 4046 lookup_map = map; 4047 vm_map_lock_write_to_read(map); 4048 if(vm_map_lookup_locked( 4049 &lookup_map, local_start, 4050 access_type, 4051 OBJECT_LOCK_EXCLUSIVE, 4052 &version, &object, 4053 &offset, &prot, &wired, 4054 NULL, 4055 &real_map)) { 4056 4057 vm_map_unlock_read(lookup_map); 4058 vm_map_unwire(map, start, 4059 s, user_wire); 4060 return(KERN_FAILURE); 4061 } 4062 vm_object_unlock(object); 4063 if(real_map != lookup_map) 4064 vm_map_unlock(real_map); 4065 vm_map_unlock_read(lookup_map); 4066 vm_map_lock(map); 4067 4068 /* we unlocked, so must re-lookup */ 4069 if (!vm_map_lookup_entry(map, 4070 local_start, 4071 &local_entry)) { 4072 rc = KERN_FAILURE; 4073 goto done; 4074 } 4075 4076 /* 4077 * entry could have been "simplified", 4078 * so re-clip 4079 */ 4080 entry = local_entry; 4081 assert(s == local_start); 4082 vm_map_clip_start(map, entry, s); 4083 vm_map_clip_end(map, entry, end); 4084 /* re-compute "e" */ 4085 e = entry->vme_end; 4086 if (e > end) 4087 e = end; 4088 4089 /* did we have a change of type? */ 4090 if (!entry->is_sub_map) { 4091 last_timestamp = map->timestamp; 4092 continue; 4093 } 4094 } else { 4095 local_start = entry->vme_start; 4096 pmap = map_pmap; 4097 } 4098 4099 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 4100 goto done; 4101 4102 entry->in_transition = TRUE; 4103 4104 vm_map_unlock(map); 4105 rc = vm_map_wire_nested(entry->object.sub_map, 4106 sub_start, sub_end, 4107 access_type, 4108 user_wire, pmap, pmap_addr); 4109 vm_map_lock(map); 4110 4111 /* 4112 * Find the entry again. It could have been clipped 4113 * after we unlocked the map. 4114 */ 4115 if (!vm_map_lookup_entry(map, local_start, 4116 &first_entry)) 4117 panic("vm_map_wire: re-lookup failed"); 4118 entry = first_entry; 4119 4120 assert(local_start == s); 4121 /* re-compute "e" */ 4122 e = entry->vme_end; 4123 if (e > end) 4124 e = end; 4125 4126 last_timestamp = map->timestamp; 4127 while ((entry != vm_map_to_entry(map)) && 4128 (entry->vme_start < e)) { 4129 assert(entry->in_transition); 4130 entry->in_transition = FALSE; 4131 if (entry->needs_wakeup) { 4132 entry->needs_wakeup = FALSE; 4133 need_wakeup = TRUE; 4134 } 4135 if (rc != KERN_SUCCESS) {/* from vm_*_wire */ 4136 subtract_wire_counts(map, entry, user_wire); 4137 } 4138 entry = entry->vme_next; 4139 } 4140 if (rc != KERN_SUCCESS) { /* from vm_*_wire */ 4141 goto done; 4142 } 4143 4144 /* no need to relookup again */ 4145 s = entry->vme_start; 4146 continue; 4147 } 4148 4149 /* 4150 * If this entry is already wired then increment 4151 * the appropriate wire reference count. 4152 */ 4153 if (entry->wired_count) { 4154 /* 4155 * entry is already wired down, get our reference 4156 * after clipping to our range. 4157 */ 4158 vm_map_clip_start(map, entry, s); 4159 vm_map_clip_end(map, entry, end); 4160 4161 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 4162 goto done; 4163 4164 /* map was not unlocked: no need to relookup */ 4165 entry = entry->vme_next; 4166 s = entry->vme_start; 4167 continue; 4168 } 4169 4170 /* 4171 * Unwired entry or wire request transmitted via submap 4172 */ 4173 4174 4175 /* 4176 * Perform actions of vm_map_lookup that need the write 4177 * lock on the map: create a shadow object for a 4178 * copy-on-write region, or an object for a zero-fill 4179 * region. 4180 */ 4181 size = entry->vme_end - entry->vme_start; 4182 /* 4183 * If wiring a copy-on-write page, we need to copy it now 4184 * even if we're only (currently) requesting read access. 4185 * This is aggressive, but once it's wired we can't move it. 4186 */ 4187 if (entry->needs_copy) { 4188 vm_object_shadow(&entry->object.vm_object, 4189 &entry->offset, size); 4190 entry->needs_copy = FALSE; 4191 } else if (entry->object.vm_object == VM_OBJECT_NULL) { 4192 entry->object.vm_object = vm_object_allocate(size); 4193 entry->offset = (vm_object_offset_t)0; 4194 } 4195 4196 vm_map_clip_start(map, entry, s); 4197 vm_map_clip_end(map, entry, end); 4198 4199 /* re-compute "e" */ 4200 e = entry->vme_end; 4201 if (e > end) 4202 e = end; 4203 4204 /* 4205 * Check for holes and protection mismatch. 4206 * Holes: Next entry should be contiguous unless this 4207 * is the end of the region. 4208 * Protection: Access requested must be allowed, unless 4209 * wiring is by protection class 4210 */ 4211 if ((entry->vme_end < end) && 4212 ((entry->vme_next == vm_map_to_entry(map)) || 4213 (entry->vme_next->vme_start > entry->vme_end))) { 4214 /* found a hole */ 4215 rc = KERN_INVALID_ADDRESS; 4216 goto done; 4217 } 4218 if ((entry->protection & access_type) != access_type) { 4219 /* found a protection problem */ 4220 rc = KERN_PROTECTION_FAILURE; 4221 goto done; 4222 } 4223 4224 assert(entry->wired_count == 0 && entry->user_wired_count == 0); 4225 4226 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 4227 goto done; 4228 4229 entry->in_transition = TRUE; 4230 4231 /* 4232 * This entry might get split once we unlock the map. 4233 * In vm_fault_wire(), we need the current range as 4234 * defined by this entry. In order for this to work 4235 * along with a simultaneous clip operation, we make a 4236 * temporary copy of this entry and use that for the 4237 * wiring. Note that the underlying objects do not 4238 * change during a clip. 4239 */ 4240 tmp_entry = *entry; 4241 4242 /* 4243 * The in_transition state guarentees that the entry 4244 * (or entries for this range, if split occured) will be 4245 * there when the map lock is acquired for the second time. 4246 */ 4247 vm_map_unlock(map); 4248 4249 if (!user_wire && cur_thread != THREAD_NULL) 4250 interruptible_state = thread_interrupt_level(THREAD_UNINT); 4251 else 4252 interruptible_state = THREAD_UNINT; 4253 4254 if(map_pmap) 4255 rc = vm_fault_wire(map, 4256 &tmp_entry, map_pmap, pmap_addr); 4257 else 4258 rc = vm_fault_wire(map, 4259 &tmp_entry, map->pmap, 4260 tmp_entry.vme_start); 4261 4262 if (!user_wire && cur_thread != THREAD_NULL) 4263 thread_interrupt_level(interruptible_state); 4264 4265 vm_map_lock(map); 4266 4267 if (last_timestamp+1 != map->timestamp) { 4268 /* 4269 * Find the entry again. It could have been clipped 4270 * after we unlocked the map. 4271 */ 4272 if (!vm_map_lookup_entry(map, tmp_entry.vme_start, 4273 &first_entry)) 4274 panic("vm_map_wire: re-lookup failed"); 4275 4276 entry = first_entry; 4277 } 4278 4279 last_timestamp = map->timestamp; 4280 4281 while ((entry != vm_map_to_entry(map)) && 4282 (entry->vme_start < tmp_entry.vme_end)) { 4283 assert(entry->in_transition); 4284 entry->in_transition = FALSE; 4285 if (entry->needs_wakeup) { 4286 entry->needs_wakeup = FALSE; 4287 need_wakeup = TRUE; 4288 } 4289 if (rc != KERN_SUCCESS) { /* from vm_*_wire */ 4290 subtract_wire_counts(map, entry, user_wire); 4291 } 4292 entry = entry->vme_next; 4293 } 4294 4295 if (rc != KERN_SUCCESS) { /* from vm_*_wire */ 4296 goto done; 4297 } 4298 4299 s = entry->vme_start; 4300 } /* end while loop through map entries */ 4301 4302done: 4303 if (rc == KERN_SUCCESS) { 4304 /* repair any damage we may have made to the VM map */ 4305 vm_map_simplify_range(map, start, end); 4306 } 4307 4308 vm_map_unlock(map); 4309 4310 /* 4311 * wake up anybody waiting on entries we wired. 4312 */ 4313 if (need_wakeup) 4314 vm_map_entry_wakeup(map); 4315 4316 if (rc != KERN_SUCCESS) { 4317 /* undo what has been wired so far */ 4318 vm_map_unwire(map, start, s, user_wire); 4319 } 4320 4321 return rc; 4322 4323} 4324 4325kern_return_t 4326vm_map_wire( 4327 register vm_map_t map, 4328 register vm_map_offset_t start, 4329 register vm_map_offset_t end, 4330 register vm_prot_t access_type, 4331 boolean_t user_wire) 4332{ 4333 4334 kern_return_t kret; 4335 4336 kret = vm_map_wire_nested(map, start, end, access_type, 4337 user_wire, (pmap_t)NULL, 0); 4338 return kret; 4339} 4340 4341/* 4342 * vm_map_unwire: 4343 * 4344 * Sets the pageability of the specified address range in the target 4345 * as pageable. Regions specified must have been wired previously. 4346 * 4347 * The map must not be locked, but a reference must remain to the map 4348 * throughout the call. 4349 * 4350 * Kernel will panic on failures. User unwire ignores holes and 4351 * unwired and intransition entries to avoid losing memory by leaving 4352 * it unwired. 4353 */ 4354static kern_return_t 4355vm_map_unwire_nested( 4356 register vm_map_t map, 4357 register vm_map_offset_t start, 4358 register vm_map_offset_t end, 4359 boolean_t user_wire, 4360 pmap_t map_pmap, 4361 vm_map_offset_t pmap_addr) 4362{ 4363 register vm_map_entry_t entry; 4364 struct vm_map_entry *first_entry, tmp_entry; 4365 boolean_t need_wakeup; 4366 boolean_t main_map = FALSE; 4367 unsigned int last_timestamp; 4368 4369 vm_map_lock(map); 4370 if(map_pmap == NULL) 4371 main_map = TRUE; 4372 last_timestamp = map->timestamp; 4373 4374 VM_MAP_RANGE_CHECK(map, start, end); 4375 assert(page_aligned(start)); 4376 assert(page_aligned(end)); 4377 4378 if (start == end) { 4379 /* We unwired what the caller asked for: zero pages */ 4380 vm_map_unlock(map); 4381 return KERN_SUCCESS; 4382 } 4383 4384 if (vm_map_lookup_entry(map, start, &first_entry)) { 4385 entry = first_entry; 4386 /* 4387 * vm_map_clip_start will be done later. 4388 * We don't want to unnest any nested sub maps here ! 4389 */ 4390 } 4391 else { 4392 if (!user_wire) { 4393 panic("vm_map_unwire: start not found"); 4394 } 4395 /* Start address is not in map. */ 4396 vm_map_unlock(map); 4397 return(KERN_INVALID_ADDRESS); 4398 } 4399 4400 if (entry->superpage_size) { 4401 /* superpages are always wired */ 4402 vm_map_unlock(map); 4403 return KERN_INVALID_ADDRESS; 4404 } 4405 4406 need_wakeup = FALSE; 4407 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 4408 if (entry->in_transition) { 4409 /* 4410 * 1) 4411 * Another thread is wiring down this entry. Note 4412 * that if it is not for the other thread we would 4413 * be unwiring an unwired entry. This is not 4414 * permitted. If we wait, we will be unwiring memory 4415 * we did not wire. 4416 * 4417 * 2) 4418 * Another thread is unwiring this entry. We did not 4419 * have a reference to it, because if we did, this 4420 * entry will not be getting unwired now. 4421 */ 4422 if (!user_wire) { 4423 /* 4424 * XXX FBDP 4425 * This could happen: there could be some 4426 * overlapping vslock/vsunlock operations 4427 * going on. 4428 * We should probably just wait and retry, 4429 * but then we have to be careful that this 4430 * entry could get "simplified" after 4431 * "in_transition" gets unset and before 4432 * we re-lookup the entry, so we would 4433 * have to re-clip the entry to avoid 4434 * re-unwiring what we have already unwired... 4435 * See vm_map_wire_nested(). 4436 * 4437 * Or we could just ignore "in_transition" 4438 * here and proceed to decement the wired 4439 * count(s) on this entry. That should be fine 4440 * as long as "wired_count" doesn't drop all 4441 * the way to 0 (and we should panic if THAT 4442 * happens). 4443 */ 4444 panic("vm_map_unwire: in_transition entry"); 4445 } 4446 4447 entry = entry->vme_next; 4448 continue; 4449 } 4450 4451 if (entry->is_sub_map) { 4452 vm_map_offset_t sub_start; 4453 vm_map_offset_t sub_end; 4454 vm_map_offset_t local_end; 4455 pmap_t pmap; 4456 4457 vm_map_clip_start(map, entry, start); 4458 vm_map_clip_end(map, entry, end); 4459 4460 sub_start = entry->offset; 4461 sub_end = entry->vme_end - entry->vme_start; 4462 sub_end += entry->offset; 4463 local_end = entry->vme_end; 4464 if(map_pmap == NULL) { 4465 if(entry->use_pmap) { 4466 pmap = entry->object.sub_map->pmap; 4467 pmap_addr = sub_start; 4468 } else { 4469 pmap = map->pmap; 4470 pmap_addr = start; 4471 } 4472 if (entry->wired_count == 0 || 4473 (user_wire && entry->user_wired_count == 0)) { 4474 if (!user_wire) 4475 panic("vm_map_unwire: entry is unwired"); 4476 entry = entry->vme_next; 4477 continue; 4478 } 4479 4480 /* 4481 * Check for holes 4482 * Holes: Next entry should be contiguous unless 4483 * this is the end of the region. 4484 */ 4485 if (((entry->vme_end < end) && 4486 ((entry->vme_next == vm_map_to_entry(map)) || 4487 (entry->vme_next->vme_start 4488 > entry->vme_end)))) { 4489 if (!user_wire) 4490 panic("vm_map_unwire: non-contiguous region"); 4491/* 4492 entry = entry->vme_next; 4493 continue; 4494*/ 4495 } 4496 4497 subtract_wire_counts(map, entry, user_wire); 4498 4499 if (entry->wired_count != 0) { 4500 entry = entry->vme_next; 4501 continue; 4502 } 4503 4504 entry->in_transition = TRUE; 4505 tmp_entry = *entry;/* see comment in vm_map_wire() */ 4506 4507 /* 4508 * We can unlock the map now. The in_transition state 4509 * guarantees existance of the entry. 4510 */ 4511 vm_map_unlock(map); 4512 vm_map_unwire_nested(entry->object.sub_map, 4513 sub_start, sub_end, user_wire, pmap, pmap_addr); 4514 vm_map_lock(map); 4515 4516 if (last_timestamp+1 != map->timestamp) { 4517 /* 4518 * Find the entry again. It could have been 4519 * clipped or deleted after we unlocked the map. 4520 */ 4521 if (!vm_map_lookup_entry(map, 4522 tmp_entry.vme_start, 4523 &first_entry)) { 4524 if (!user_wire) 4525 panic("vm_map_unwire: re-lookup failed"); 4526 entry = first_entry->vme_next; 4527 } else 4528 entry = first_entry; 4529 } 4530 last_timestamp = map->timestamp; 4531 4532 /* 4533 * clear transition bit for all constituent entries 4534 * that were in the original entry (saved in 4535 * tmp_entry). Also check for waiters. 4536 */ 4537 while ((entry != vm_map_to_entry(map)) && 4538 (entry->vme_start < tmp_entry.vme_end)) { 4539 assert(entry->in_transition); 4540 entry->in_transition = FALSE; 4541 if (entry->needs_wakeup) { 4542 entry->needs_wakeup = FALSE; 4543 need_wakeup = TRUE; 4544 } 4545 entry = entry->vme_next; 4546 } 4547 continue; 4548 } else { 4549 vm_map_unlock(map); 4550 vm_map_unwire_nested(entry->object.sub_map, 4551 sub_start, sub_end, user_wire, map_pmap, 4552 pmap_addr); 4553 vm_map_lock(map); 4554 4555 if (last_timestamp+1 != map->timestamp) { 4556 /* 4557 * Find the entry again. It could have been 4558 * clipped or deleted after we unlocked the map. 4559 */ 4560 if (!vm_map_lookup_entry(map, 4561 tmp_entry.vme_start, 4562 &first_entry)) { 4563 if (!user_wire) 4564 panic("vm_map_unwire: re-lookup failed"); 4565 entry = first_entry->vme_next; 4566 } else 4567 entry = first_entry; 4568 } 4569 last_timestamp = map->timestamp; 4570 } 4571 } 4572 4573 4574 if ((entry->wired_count == 0) || 4575 (user_wire && entry->user_wired_count == 0)) { 4576 if (!user_wire) 4577 panic("vm_map_unwire: entry is unwired"); 4578 4579 entry = entry->vme_next; 4580 continue; 4581 } 4582 4583 assert(entry->wired_count > 0 && 4584 (!user_wire || entry->user_wired_count > 0)); 4585 4586 vm_map_clip_start(map, entry, start); 4587 vm_map_clip_end(map, entry, end); 4588 4589 /* 4590 * Check for holes 4591 * Holes: Next entry should be contiguous unless 4592 * this is the end of the region. 4593 */ 4594 if (((entry->vme_end < end) && 4595 ((entry->vme_next == vm_map_to_entry(map)) || 4596 (entry->vme_next->vme_start > entry->vme_end)))) { 4597 4598 if (!user_wire) 4599 panic("vm_map_unwire: non-contiguous region"); 4600 entry = entry->vme_next; 4601 continue; 4602 } 4603 4604 subtract_wire_counts(map, entry, user_wire); 4605 4606 if (entry->wired_count != 0) { 4607 entry = entry->vme_next; 4608 continue; 4609 } 4610 4611 if(entry->zero_wired_pages) { 4612 entry->zero_wired_pages = FALSE; 4613 } 4614 4615 entry->in_transition = TRUE; 4616 tmp_entry = *entry; /* see comment in vm_map_wire() */ 4617 4618 /* 4619 * We can unlock the map now. The in_transition state 4620 * guarantees existance of the entry. 4621 */ 4622 vm_map_unlock(map); 4623 if(map_pmap) { 4624 vm_fault_unwire(map, 4625 &tmp_entry, FALSE, map_pmap, pmap_addr); 4626 } else { 4627 vm_fault_unwire(map, 4628 &tmp_entry, FALSE, map->pmap, 4629 tmp_entry.vme_start); 4630 } 4631 vm_map_lock(map); 4632 4633 if (last_timestamp+1 != map->timestamp) { 4634 /* 4635 * Find the entry again. It could have been clipped 4636 * or deleted after we unlocked the map. 4637 */ 4638 if (!vm_map_lookup_entry(map, tmp_entry.vme_start, 4639 &first_entry)) { 4640 if (!user_wire) 4641 panic("vm_map_unwire: re-lookup failed"); 4642 entry = first_entry->vme_next; 4643 } else 4644 entry = first_entry; 4645 } 4646 last_timestamp = map->timestamp; 4647 4648 /* 4649 * clear transition bit for all constituent entries that 4650 * were in the original entry (saved in tmp_entry). Also 4651 * check for waiters. 4652 */ 4653 while ((entry != vm_map_to_entry(map)) && 4654 (entry->vme_start < tmp_entry.vme_end)) { 4655 assert(entry->in_transition); 4656 entry->in_transition = FALSE; 4657 if (entry->needs_wakeup) { 4658 entry->needs_wakeup = FALSE; 4659 need_wakeup = TRUE; 4660 } 4661 entry = entry->vme_next; 4662 } 4663 } 4664 4665 /* 4666 * We might have fragmented the address space when we wired this 4667 * range of addresses. Attempt to re-coalesce these VM map entries 4668 * with their neighbors now that they're no longer wired. 4669 * Under some circumstances, address space fragmentation can 4670 * prevent VM object shadow chain collapsing, which can cause 4671 * swap space leaks. 4672 */ 4673 vm_map_simplify_range(map, start, end); 4674 4675 vm_map_unlock(map); 4676 /* 4677 * wake up anybody waiting on entries that we have unwired. 4678 */ 4679 if (need_wakeup) 4680 vm_map_entry_wakeup(map); 4681 return(KERN_SUCCESS); 4682 4683} 4684 4685kern_return_t 4686vm_map_unwire( 4687 register vm_map_t map, 4688 register vm_map_offset_t start, 4689 register vm_map_offset_t end, 4690 boolean_t user_wire) 4691{ 4692 return vm_map_unwire_nested(map, start, end, 4693 user_wire, (pmap_t)NULL, 0); 4694} 4695 4696 4697/* 4698 * vm_map_entry_delete: [ internal use only ] 4699 * 4700 * Deallocate the given entry from the target map. 4701 */ 4702static void 4703vm_map_entry_delete( 4704 register vm_map_t map, 4705 register vm_map_entry_t entry) 4706{ 4707 register vm_map_offset_t s, e; 4708 register vm_object_t object; 4709 register vm_map_t submap; 4710 4711 s = entry->vme_start; 4712 e = entry->vme_end; 4713 assert(page_aligned(s)); 4714 assert(page_aligned(e)); 4715 assert(entry->wired_count == 0); 4716 assert(entry->user_wired_count == 0); 4717 assert(!entry->permanent); 4718 4719 if (entry->is_sub_map) { 4720 object = NULL; 4721 submap = entry->object.sub_map; 4722 } else { 4723 submap = NULL; 4724 object = entry->object.vm_object; 4725 } 4726 4727 vm_map_store_entry_unlink(map, entry); 4728 map->size -= e - s; 4729 4730 vm_map_entry_dispose(map, entry); 4731 4732 vm_map_unlock(map); 4733 /* 4734 * Deallocate the object only after removing all 4735 * pmap entries pointing to its pages. 4736 */ 4737 if (submap) 4738 vm_map_deallocate(submap); 4739 else 4740 vm_object_deallocate(object); 4741 4742} 4743 4744void 4745vm_map_submap_pmap_clean( 4746 vm_map_t map, 4747 vm_map_offset_t start, 4748 vm_map_offset_t end, 4749 vm_map_t sub_map, 4750 vm_map_offset_t offset) 4751{ 4752 vm_map_offset_t submap_start; 4753 vm_map_offset_t submap_end; 4754 vm_map_size_t remove_size; 4755 vm_map_entry_t entry; 4756 4757 submap_end = offset + (end - start); 4758 submap_start = offset; 4759 4760 vm_map_lock_read(sub_map); 4761 if(vm_map_lookup_entry(sub_map, offset, &entry)) { 4762 4763 remove_size = (entry->vme_end - entry->vme_start); 4764 if(offset > entry->vme_start) 4765 remove_size -= offset - entry->vme_start; 4766 4767 4768 if(submap_end < entry->vme_end) { 4769 remove_size -= 4770 entry->vme_end - submap_end; 4771 } 4772 if(entry->is_sub_map) { 4773 vm_map_submap_pmap_clean( 4774 sub_map, 4775 start, 4776 start + remove_size, 4777 entry->object.sub_map, 4778 entry->offset); 4779 } else { 4780 4781 if((map->mapped_in_other_pmaps) && (map->ref_count) 4782 && (entry->object.vm_object != NULL)) { 4783 vm_object_pmap_protect( 4784 entry->object.vm_object, 4785 entry->offset+(offset-entry->vme_start), 4786 remove_size, 4787 PMAP_NULL, 4788 entry->vme_start, 4789 VM_PROT_NONE); 4790 } else { 4791 pmap_remove(map->pmap, 4792 (addr64_t)start, 4793 (addr64_t)(start + remove_size)); 4794 } 4795 } 4796 } 4797 4798 entry = entry->vme_next; 4799 4800 while((entry != vm_map_to_entry(sub_map)) 4801 && (entry->vme_start < submap_end)) { 4802 remove_size = (entry->vme_end - entry->vme_start); 4803 if(submap_end < entry->vme_end) { 4804 remove_size -= entry->vme_end - submap_end; 4805 } 4806 if(entry->is_sub_map) { 4807 vm_map_submap_pmap_clean( 4808 sub_map, 4809 (start + entry->vme_start) - offset, 4810 ((start + entry->vme_start) - offset) + remove_size, 4811 entry->object.sub_map, 4812 entry->offset); 4813 } else { 4814 if((map->mapped_in_other_pmaps) && (map->ref_count) 4815 && (entry->object.vm_object != NULL)) { 4816 vm_object_pmap_protect( 4817 entry->object.vm_object, 4818 entry->offset, 4819 remove_size, 4820 PMAP_NULL, 4821 entry->vme_start, 4822 VM_PROT_NONE); 4823 } else { 4824 pmap_remove(map->pmap, 4825 (addr64_t)((start + entry->vme_start) 4826 - offset), 4827 (addr64_t)(((start + entry->vme_start) 4828 - offset) + remove_size)); 4829 } 4830 } 4831 entry = entry->vme_next; 4832 } 4833 vm_map_unlock_read(sub_map); 4834 return; 4835} 4836 4837/* 4838 * vm_map_delete: [ internal use only ] 4839 * 4840 * Deallocates the given address range from the target map. 4841 * Removes all user wirings. Unwires one kernel wiring if 4842 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go 4843 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps 4844 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set. 4845 * 4846 * This routine is called with map locked and leaves map locked. 4847 */ 4848static kern_return_t 4849vm_map_delete( 4850 vm_map_t map, 4851 vm_map_offset_t start, 4852 vm_map_offset_t end, 4853 int flags, 4854 vm_map_t zap_map) 4855{ 4856 vm_map_entry_t entry, next; 4857 struct vm_map_entry *first_entry, tmp_entry; 4858 register vm_map_offset_t s; 4859 register vm_object_t object; 4860 boolean_t need_wakeup; 4861 unsigned int last_timestamp = ~0; /* unlikely value */ 4862 int interruptible; 4863 4864 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ? 4865 THREAD_ABORTSAFE : THREAD_UNINT; 4866 4867 /* 4868 * All our DMA I/O operations in IOKit are currently done by 4869 * wiring through the map entries of the task requesting the I/O. 4870 * Because of this, we must always wait for kernel wirings 4871 * to go away on the entries before deleting them. 4872 * 4873 * Any caller who wants to actually remove a kernel wiring 4874 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to 4875 * properly remove one wiring instead of blasting through 4876 * them all. 4877 */ 4878 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE; 4879 4880 while(1) { 4881 /* 4882 * Find the start of the region, and clip it 4883 */ 4884 if (vm_map_lookup_entry(map, start, &first_entry)) { 4885 entry = first_entry; 4886 if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) { /* extend request to whole entry */ start = SUPERPAGE_ROUND_DOWN(start); 4887 start = SUPERPAGE_ROUND_DOWN(start); 4888 continue; 4889 } 4890 if (start == entry->vme_start) { 4891 /* 4892 * No need to clip. We don't want to cause 4893 * any unnecessary unnesting in this case... 4894 */ 4895 } else { 4896 vm_map_clip_start(map, entry, start); 4897 } 4898 4899 /* 4900 * Fix the lookup hint now, rather than each 4901 * time through the loop. 4902 */ 4903 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 4904 } else { 4905 entry = first_entry->vme_next; 4906 } 4907 break; 4908 } 4909 if (entry->superpage_size) 4910 end = SUPERPAGE_ROUND_UP(end); 4911 4912 need_wakeup = FALSE; 4913 /* 4914 * Step through all entries in this region 4915 */ 4916 s = entry->vme_start; 4917 while ((entry != vm_map_to_entry(map)) && (s < end)) { 4918 /* 4919 * At this point, we have deleted all the memory entries 4920 * between "start" and "s". We still need to delete 4921 * all memory entries between "s" and "end". 4922 * While we were blocked and the map was unlocked, some 4923 * new memory entries could have been re-allocated between 4924 * "start" and "s" and we don't want to mess with those. 4925 * Some of those entries could even have been re-assembled 4926 * with an entry after "s" (in vm_map_simplify_entry()), so 4927 * we may have to vm_map_clip_start() again. 4928 */ 4929 4930 if (entry->vme_start >= s) { 4931 /* 4932 * This entry starts on or after "s" 4933 * so no need to clip its start. 4934 */ 4935 } else { 4936 /* 4937 * This entry has been re-assembled by a 4938 * vm_map_simplify_entry(). We need to 4939 * re-clip its start. 4940 */ 4941 vm_map_clip_start(map, entry, s); 4942 } 4943 if (entry->vme_end <= end) { 4944 /* 4945 * This entry is going away completely, so no need 4946 * to clip and possibly cause an unnecessary unnesting. 4947 */ 4948 } else { 4949 vm_map_clip_end(map, entry, end); 4950 } 4951 4952 if (entry->permanent) { 4953 panic("attempt to remove permanent VM map entry " 4954 "%p [0x%llx:0x%llx]\n", 4955 entry, (uint64_t) s, (uint64_t) end); 4956 } 4957 4958 4959 if (entry->in_transition) { 4960 wait_result_t wait_result; 4961 4962 /* 4963 * Another thread is wiring/unwiring this entry. 4964 * Let the other thread know we are waiting. 4965 */ 4966 assert(s == entry->vme_start); 4967 entry->needs_wakeup = TRUE; 4968 4969 /* 4970 * wake up anybody waiting on entries that we have 4971 * already unwired/deleted. 4972 */ 4973 if (need_wakeup) { 4974 vm_map_entry_wakeup(map); 4975 need_wakeup = FALSE; 4976 } 4977 4978 wait_result = vm_map_entry_wait(map, interruptible); 4979 4980 if (interruptible && 4981 wait_result == THREAD_INTERRUPTED) { 4982 /* 4983 * We do not clear the needs_wakeup flag, 4984 * since we cannot tell if we were the only one. 4985 */ 4986 vm_map_unlock(map); 4987 return KERN_ABORTED; 4988 } 4989 4990 /* 4991 * The entry could have been clipped or it 4992 * may not exist anymore. Look it up again. 4993 */ 4994 if (!vm_map_lookup_entry(map, s, &first_entry)) { 4995 assert((map != kernel_map) && 4996 (!entry->is_sub_map)); 4997 /* 4998 * User: use the next entry 4999 */ 5000 entry = first_entry->vme_next; 5001 s = entry->vme_start; 5002 } else { 5003 entry = first_entry; 5004 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 5005 } 5006 last_timestamp = map->timestamp; 5007 continue; 5008 } /* end in_transition */ 5009 5010 if (entry->wired_count) { 5011 boolean_t user_wire; 5012 5013 user_wire = entry->user_wired_count > 0; 5014 5015 /* 5016 * Remove a kernel wiring if requested 5017 */ 5018 if (flags & VM_MAP_REMOVE_KUNWIRE) { 5019 entry->wired_count--; 5020 } 5021 5022 /* 5023 * Remove all user wirings for proper accounting 5024 */ 5025 if (entry->user_wired_count > 0) { 5026 while (entry->user_wired_count) 5027 subtract_wire_counts(map, entry, user_wire); 5028 } 5029 5030 if (entry->wired_count != 0) { 5031 assert(map != kernel_map); 5032 /* 5033 * Cannot continue. Typical case is when 5034 * a user thread has physical io pending on 5035 * on this page. Either wait for the 5036 * kernel wiring to go away or return an 5037 * error. 5038 */ 5039 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) { 5040 wait_result_t wait_result; 5041 5042 assert(s == entry->vme_start); 5043 entry->needs_wakeup = TRUE; 5044 wait_result = vm_map_entry_wait(map, 5045 interruptible); 5046 5047 if (interruptible && 5048 wait_result == THREAD_INTERRUPTED) { 5049 /* 5050 * We do not clear the 5051 * needs_wakeup flag, since we 5052 * cannot tell if we were the 5053 * only one. 5054 */ 5055 vm_map_unlock(map); 5056 return KERN_ABORTED; 5057 } 5058 5059 /* 5060 * The entry could have been clipped or 5061 * it may not exist anymore. Look it 5062 * up again. 5063 */ 5064 if (!vm_map_lookup_entry(map, s, 5065 &first_entry)) { 5066 assert(map != kernel_map); 5067 /* 5068 * User: use the next entry 5069 */ 5070 entry = first_entry->vme_next; 5071 s = entry->vme_start; 5072 } else { 5073 entry = first_entry; 5074 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 5075 } 5076 last_timestamp = map->timestamp; 5077 continue; 5078 } 5079 else { 5080 return KERN_FAILURE; 5081 } 5082 } 5083 5084 entry->in_transition = TRUE; 5085 /* 5086 * copy current entry. see comment in vm_map_wire() 5087 */ 5088 tmp_entry = *entry; 5089 assert(s == entry->vme_start); 5090 5091 /* 5092 * We can unlock the map now. The in_transition 5093 * state guarentees existance of the entry. 5094 */ 5095 vm_map_unlock(map); 5096 5097 if (tmp_entry.is_sub_map) { 5098 vm_map_t sub_map; 5099 vm_map_offset_t sub_start, sub_end; 5100 pmap_t pmap; 5101 vm_map_offset_t pmap_addr; 5102 5103 5104 sub_map = tmp_entry.object.sub_map; 5105 sub_start = tmp_entry.offset; 5106 sub_end = sub_start + (tmp_entry.vme_end - 5107 tmp_entry.vme_start); 5108 if (tmp_entry.use_pmap) { 5109 pmap = sub_map->pmap; 5110 pmap_addr = tmp_entry.vme_start; 5111 } else { 5112 pmap = map->pmap; 5113 pmap_addr = tmp_entry.vme_start; 5114 } 5115 (void) vm_map_unwire_nested(sub_map, 5116 sub_start, sub_end, 5117 user_wire, 5118 pmap, pmap_addr); 5119 } else { 5120 5121 vm_fault_unwire(map, &tmp_entry, 5122 tmp_entry.object.vm_object == kernel_object, 5123 map->pmap, tmp_entry.vme_start); 5124 } 5125 5126 vm_map_lock(map); 5127 5128 if (last_timestamp+1 != map->timestamp) { 5129 /* 5130 * Find the entry again. It could have 5131 * been clipped after we unlocked the map. 5132 */ 5133 if (!vm_map_lookup_entry(map, s, &first_entry)){ 5134 assert((map != kernel_map) && 5135 (!entry->is_sub_map)); 5136 first_entry = first_entry->vme_next; 5137 s = first_entry->vme_start; 5138 } else { 5139 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 5140 } 5141 } else { 5142 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 5143 first_entry = entry; 5144 } 5145 5146 last_timestamp = map->timestamp; 5147 5148 entry = first_entry; 5149 while ((entry != vm_map_to_entry(map)) && 5150 (entry->vme_start < tmp_entry.vme_end)) { 5151 assert(entry->in_transition); 5152 entry->in_transition = FALSE; 5153 if (entry->needs_wakeup) { 5154 entry->needs_wakeup = FALSE; 5155 need_wakeup = TRUE; 5156 } 5157 entry = entry->vme_next; 5158 } 5159 /* 5160 * We have unwired the entry(s). Go back and 5161 * delete them. 5162 */ 5163 entry = first_entry; 5164 continue; 5165 } 5166 5167 /* entry is unwired */ 5168 assert(entry->wired_count == 0); 5169 assert(entry->user_wired_count == 0); 5170 5171 assert(s == entry->vme_start); 5172 5173 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) { 5174 /* 5175 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to 5176 * vm_map_delete(), some map entries might have been 5177 * transferred to a "zap_map", which doesn't have a 5178 * pmap. The original pmap has already been flushed 5179 * in the vm_map_delete() call targeting the original 5180 * map, but when we get to destroying the "zap_map", 5181 * we don't have any pmap to flush, so let's just skip 5182 * all this. 5183 */ 5184 } else if (entry->is_sub_map) { 5185 if (entry->use_pmap) { 5186#ifndef NO_NESTED_PMAP 5187 pmap_unnest(map->pmap, 5188 (addr64_t)entry->vme_start, 5189 entry->vme_end - entry->vme_start); 5190#endif /* NO_NESTED_PMAP */ 5191 if ((map->mapped_in_other_pmaps) && (map->ref_count)) { 5192 /* clean up parent map/maps */ 5193 vm_map_submap_pmap_clean( 5194 map, entry->vme_start, 5195 entry->vme_end, 5196 entry->object.sub_map, 5197 entry->offset); 5198 } 5199 } else { 5200 vm_map_submap_pmap_clean( 5201 map, entry->vme_start, entry->vme_end, 5202 entry->object.sub_map, 5203 entry->offset); 5204 } 5205 } else if (entry->object.vm_object != kernel_object) { 5206 object = entry->object.vm_object; 5207 if((map->mapped_in_other_pmaps) && (map->ref_count)) { 5208 vm_object_pmap_protect( 5209 object, entry->offset, 5210 entry->vme_end - entry->vme_start, 5211 PMAP_NULL, 5212 entry->vme_start, 5213 VM_PROT_NONE); 5214 } else { 5215 pmap_remove(map->pmap, 5216 (addr64_t)entry->vme_start, 5217 (addr64_t)entry->vme_end); 5218 } 5219 } 5220 5221 /* 5222 * All pmap mappings for this map entry must have been 5223 * cleared by now. 5224 */ 5225 assert(vm_map_pmap_is_empty(map, 5226 entry->vme_start, 5227 entry->vme_end)); 5228 5229 next = entry->vme_next; 5230 s = next->vme_start; 5231 last_timestamp = map->timestamp; 5232 5233 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) && 5234 zap_map != VM_MAP_NULL) { 5235 vm_map_size_t entry_size; 5236 /* 5237 * The caller wants to save the affected VM map entries 5238 * into the "zap_map". The caller will take care of 5239 * these entries. 5240 */ 5241 /* unlink the entry from "map" ... */ 5242 vm_map_store_entry_unlink(map, entry); 5243 /* ... and add it to the end of the "zap_map" */ 5244 vm_map_store_entry_link(zap_map, 5245 vm_map_last_entry(zap_map), 5246 entry); 5247 entry_size = entry->vme_end - entry->vme_start; 5248 map->size -= entry_size; 5249 zap_map->size += entry_size; 5250 /* we didn't unlock the map, so no timestamp increase */ 5251 last_timestamp--; 5252 } else { 5253 vm_map_entry_delete(map, entry); 5254 /* vm_map_entry_delete unlocks the map */ 5255 vm_map_lock(map); 5256 } 5257 5258 entry = next; 5259 5260 if(entry == vm_map_to_entry(map)) { 5261 break; 5262 } 5263 if (last_timestamp+1 != map->timestamp) { 5264 /* 5265 * we are responsible for deleting everything 5266 * from the give space, if someone has interfered 5267 * we pick up where we left off, back fills should 5268 * be all right for anyone except map_delete and 5269 * we have to assume that the task has been fully 5270 * disabled before we get here 5271 */ 5272 if (!vm_map_lookup_entry(map, s, &entry)){ 5273 entry = entry->vme_next; 5274 s = entry->vme_start; 5275 } else { 5276 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 5277 } 5278 /* 5279 * others can not only allocate behind us, we can 5280 * also see coalesce while we don't have the map lock 5281 */ 5282 if(entry == vm_map_to_entry(map)) { 5283 break; 5284 } 5285 } 5286 last_timestamp = map->timestamp; 5287 } 5288 5289 if (map->wait_for_space) 5290 thread_wakeup((event_t) map); 5291 /* 5292 * wake up anybody waiting on entries that we have already deleted. 5293 */ 5294 if (need_wakeup) 5295 vm_map_entry_wakeup(map); 5296 5297 return KERN_SUCCESS; 5298} 5299 5300/* 5301 * vm_map_remove: 5302 * 5303 * Remove the given address range from the target map. 5304 * This is the exported form of vm_map_delete. 5305 */ 5306kern_return_t 5307vm_map_remove( 5308 register vm_map_t map, 5309 register vm_map_offset_t start, 5310 register vm_map_offset_t end, 5311 register boolean_t flags) 5312{ 5313 register kern_return_t result; 5314 5315 vm_map_lock(map); 5316 VM_MAP_RANGE_CHECK(map, start, end); 5317 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL); 5318 vm_map_unlock(map); 5319 5320 return(result); 5321} 5322 5323 5324/* 5325 * Routine: vm_map_copy_discard 5326 * 5327 * Description: 5328 * Dispose of a map copy object (returned by 5329 * vm_map_copyin). 5330 */ 5331void 5332vm_map_copy_discard( 5333 vm_map_copy_t copy) 5334{ 5335 if (copy == VM_MAP_COPY_NULL) 5336 return; 5337 5338 switch (copy->type) { 5339 case VM_MAP_COPY_ENTRY_LIST: 5340 while (vm_map_copy_first_entry(copy) != 5341 vm_map_copy_to_entry(copy)) { 5342 vm_map_entry_t entry = vm_map_copy_first_entry(copy); 5343 5344 vm_map_copy_entry_unlink(copy, entry); 5345 vm_object_deallocate(entry->object.vm_object); 5346 vm_map_copy_entry_dispose(copy, entry); 5347 } 5348 break; 5349 case VM_MAP_COPY_OBJECT: 5350 vm_object_deallocate(copy->cpy_object); 5351 break; 5352 case VM_MAP_COPY_KERNEL_BUFFER: 5353 5354 /* 5355 * The vm_map_copy_t and possibly the data buffer were 5356 * allocated by a single call to kalloc(), i.e. the 5357 * vm_map_copy_t was not allocated out of the zone. 5358 */ 5359 kfree(copy, copy->cpy_kalloc_size); 5360 return; 5361 } 5362 zfree(vm_map_copy_zone, copy); 5363} 5364 5365/* 5366 * Routine: vm_map_copy_copy 5367 * 5368 * Description: 5369 * Move the information in a map copy object to 5370 * a new map copy object, leaving the old one 5371 * empty. 5372 * 5373 * This is used by kernel routines that need 5374 * to look at out-of-line data (in copyin form) 5375 * before deciding whether to return SUCCESS. 5376 * If the routine returns FAILURE, the original 5377 * copy object will be deallocated; therefore, 5378 * these routines must make a copy of the copy 5379 * object and leave the original empty so that 5380 * deallocation will not fail. 5381 */ 5382vm_map_copy_t 5383vm_map_copy_copy( 5384 vm_map_copy_t copy) 5385{ 5386 vm_map_copy_t new_copy; 5387 5388 if (copy == VM_MAP_COPY_NULL) 5389 return VM_MAP_COPY_NULL; 5390 5391 /* 5392 * Allocate a new copy object, and copy the information 5393 * from the old one into it. 5394 */ 5395 5396 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 5397 *new_copy = *copy; 5398 5399 if (copy->type == VM_MAP_COPY_ENTRY_LIST) { 5400 /* 5401 * The links in the entry chain must be 5402 * changed to point to the new copy object. 5403 */ 5404 vm_map_copy_first_entry(copy)->vme_prev 5405 = vm_map_copy_to_entry(new_copy); 5406 vm_map_copy_last_entry(copy)->vme_next 5407 = vm_map_copy_to_entry(new_copy); 5408 } 5409 5410 /* 5411 * Change the old copy object into one that contains 5412 * nothing to be deallocated. 5413 */ 5414 copy->type = VM_MAP_COPY_OBJECT; 5415 copy->cpy_object = VM_OBJECT_NULL; 5416 5417 /* 5418 * Return the new object. 5419 */ 5420 return new_copy; 5421} 5422 5423static kern_return_t 5424vm_map_overwrite_submap_recurse( 5425 vm_map_t dst_map, 5426 vm_map_offset_t dst_addr, 5427 vm_map_size_t dst_size) 5428{ 5429 vm_map_offset_t dst_end; 5430 vm_map_entry_t tmp_entry; 5431 vm_map_entry_t entry; 5432 kern_return_t result; 5433 boolean_t encountered_sub_map = FALSE; 5434 5435 5436 5437 /* 5438 * Verify that the destination is all writeable 5439 * initially. We have to trunc the destination 5440 * address and round the copy size or we'll end up 5441 * splitting entries in strange ways. 5442 */ 5443 5444 dst_end = vm_map_round_page(dst_addr + dst_size); 5445 vm_map_lock(dst_map); 5446 5447start_pass_1: 5448 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) { 5449 vm_map_unlock(dst_map); 5450 return(KERN_INVALID_ADDRESS); 5451 } 5452 5453 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); 5454 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */ 5455 5456 for (entry = tmp_entry;;) { 5457 vm_map_entry_t next; 5458 5459 next = entry->vme_next; 5460 while(entry->is_sub_map) { 5461 vm_map_offset_t sub_start; 5462 vm_map_offset_t sub_end; 5463 vm_map_offset_t local_end; 5464 5465 if (entry->in_transition) { 5466 /* 5467 * Say that we are waiting, and wait for entry. 5468 */ 5469 entry->needs_wakeup = TRUE; 5470 vm_map_entry_wait(dst_map, THREAD_UNINT); 5471 5472 goto start_pass_1; 5473 } 5474 5475 encountered_sub_map = TRUE; 5476 sub_start = entry->offset; 5477 5478 if(entry->vme_end < dst_end) 5479 sub_end = entry->vme_end; 5480 else 5481 sub_end = dst_end; 5482 sub_end -= entry->vme_start; 5483 sub_end += entry->offset; 5484 local_end = entry->vme_end; 5485 vm_map_unlock(dst_map); 5486 5487 result = vm_map_overwrite_submap_recurse( 5488 entry->object.sub_map, 5489 sub_start, 5490 sub_end - sub_start); 5491 5492 if(result != KERN_SUCCESS) 5493 return result; 5494 if (dst_end <= entry->vme_end) 5495 return KERN_SUCCESS; 5496 vm_map_lock(dst_map); 5497 if(!vm_map_lookup_entry(dst_map, local_end, 5498 &tmp_entry)) { 5499 vm_map_unlock(dst_map); 5500 return(KERN_INVALID_ADDRESS); 5501 } 5502 entry = tmp_entry; 5503 next = entry->vme_next; 5504 } 5505 5506 if ( ! (entry->protection & VM_PROT_WRITE)) { 5507 vm_map_unlock(dst_map); 5508 return(KERN_PROTECTION_FAILURE); 5509 } 5510 5511 /* 5512 * If the entry is in transition, we must wait 5513 * for it to exit that state. Anything could happen 5514 * when we unlock the map, so start over. 5515 */ 5516 if (entry->in_transition) { 5517 5518 /* 5519 * Say that we are waiting, and wait for entry. 5520 */ 5521 entry->needs_wakeup = TRUE; 5522 vm_map_entry_wait(dst_map, THREAD_UNINT); 5523 5524 goto start_pass_1; 5525 } 5526 5527/* 5528 * our range is contained completely within this map entry 5529 */ 5530 if (dst_end <= entry->vme_end) { 5531 vm_map_unlock(dst_map); 5532 return KERN_SUCCESS; 5533 } 5534/* 5535 * check that range specified is contiguous region 5536 */ 5537 if ((next == vm_map_to_entry(dst_map)) || 5538 (next->vme_start != entry->vme_end)) { 5539 vm_map_unlock(dst_map); 5540 return(KERN_INVALID_ADDRESS); 5541 } 5542 5543 /* 5544 * Check for permanent objects in the destination. 5545 */ 5546 if ((entry->object.vm_object != VM_OBJECT_NULL) && 5547 ((!entry->object.vm_object->internal) || 5548 (entry->object.vm_object->true_share))) { 5549 if(encountered_sub_map) { 5550 vm_map_unlock(dst_map); 5551 return(KERN_FAILURE); 5552 } 5553 } 5554 5555 5556 entry = next; 5557 }/* for */ 5558 vm_map_unlock(dst_map); 5559 return(KERN_SUCCESS); 5560} 5561 5562/* 5563 * Routine: vm_map_copy_overwrite 5564 * 5565 * Description: 5566 * Copy the memory described by the map copy 5567 * object (copy; returned by vm_map_copyin) onto 5568 * the specified destination region (dst_map, dst_addr). 5569 * The destination must be writeable. 5570 * 5571 * Unlike vm_map_copyout, this routine actually 5572 * writes over previously-mapped memory. If the 5573 * previous mapping was to a permanent (user-supplied) 5574 * memory object, it is preserved. 5575 * 5576 * The attributes (protection and inheritance) of the 5577 * destination region are preserved. 5578 * 5579 * If successful, consumes the copy object. 5580 * Otherwise, the caller is responsible for it. 5581 * 5582 * Implementation notes: 5583 * To overwrite aligned temporary virtual memory, it is 5584 * sufficient to remove the previous mapping and insert 5585 * the new copy. This replacement is done either on 5586 * the whole region (if no permanent virtual memory 5587 * objects are embedded in the destination region) or 5588 * in individual map entries. 5589 * 5590 * To overwrite permanent virtual memory , it is necessary 5591 * to copy each page, as the external memory management 5592 * interface currently does not provide any optimizations. 5593 * 5594 * Unaligned memory also has to be copied. It is possible 5595 * to use 'vm_trickery' to copy the aligned data. This is 5596 * not done but not hard to implement. 5597 * 5598 * Once a page of permanent memory has been overwritten, 5599 * it is impossible to interrupt this function; otherwise, 5600 * the call would be neither atomic nor location-independent. 5601 * The kernel-state portion of a user thread must be 5602 * interruptible. 5603 * 5604 * It may be expensive to forward all requests that might 5605 * overwrite permanent memory (vm_write, vm_copy) to 5606 * uninterruptible kernel threads. This routine may be 5607 * called by interruptible threads; however, success is 5608 * not guaranteed -- if the request cannot be performed 5609 * atomically and interruptibly, an error indication is 5610 * returned. 5611 */ 5612 5613static kern_return_t 5614vm_map_copy_overwrite_nested( 5615 vm_map_t dst_map, 5616 vm_map_address_t dst_addr, 5617 vm_map_copy_t copy, 5618 boolean_t interruptible, 5619 pmap_t pmap, 5620 boolean_t discard_on_success) 5621{ 5622 vm_map_offset_t dst_end; 5623 vm_map_entry_t tmp_entry; 5624 vm_map_entry_t entry; 5625 kern_return_t kr; 5626 boolean_t aligned = TRUE; 5627 boolean_t contains_permanent_objects = FALSE; 5628 boolean_t encountered_sub_map = FALSE; 5629 vm_map_offset_t base_addr; 5630 vm_map_size_t copy_size; 5631 vm_map_size_t total_size; 5632 5633 5634 /* 5635 * Check for null copy object. 5636 */ 5637 5638 if (copy == VM_MAP_COPY_NULL) 5639 return(KERN_SUCCESS); 5640 5641 /* 5642 * Check for special kernel buffer allocated 5643 * by new_ipc_kmsg_copyin. 5644 */ 5645 5646 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { 5647 return(vm_map_copyout_kernel_buffer( 5648 dst_map, &dst_addr, 5649 copy, TRUE)); 5650 } 5651 5652 /* 5653 * Only works for entry lists at the moment. Will 5654 * support page lists later. 5655 */ 5656 5657 assert(copy->type == VM_MAP_COPY_ENTRY_LIST); 5658 5659 if (copy->size == 0) { 5660 if (discard_on_success) 5661 vm_map_copy_discard(copy); 5662 return(KERN_SUCCESS); 5663 } 5664 5665 /* 5666 * Verify that the destination is all writeable 5667 * initially. We have to trunc the destination 5668 * address and round the copy size or we'll end up 5669 * splitting entries in strange ways. 5670 */ 5671 5672 if (!page_aligned(copy->size) || 5673 !page_aligned (copy->offset) || 5674 !page_aligned (dst_addr)) 5675 { 5676 aligned = FALSE; 5677 dst_end = vm_map_round_page(dst_addr + copy->size); 5678 } else { 5679 dst_end = dst_addr + copy->size; 5680 } 5681 5682 vm_map_lock(dst_map); 5683 5684 /* LP64todo - remove this check when vm_map_commpage64() 5685 * no longer has to stuff in a map_entry for the commpage 5686 * above the map's max_offset. 5687 */ 5688 if (dst_addr >= dst_map->max_offset) { 5689 vm_map_unlock(dst_map); 5690 return(KERN_INVALID_ADDRESS); 5691 } 5692 5693start_pass_1: 5694 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) { 5695 vm_map_unlock(dst_map); 5696 return(KERN_INVALID_ADDRESS); 5697 } 5698 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); 5699 for (entry = tmp_entry;;) { 5700 vm_map_entry_t next = entry->vme_next; 5701 5702 while(entry->is_sub_map) { 5703 vm_map_offset_t sub_start; 5704 vm_map_offset_t sub_end; 5705 vm_map_offset_t local_end; 5706 5707 if (entry->in_transition) { 5708 5709 /* 5710 * Say that we are waiting, and wait for entry. 5711 */ 5712 entry->needs_wakeup = TRUE; 5713 vm_map_entry_wait(dst_map, THREAD_UNINT); 5714 5715 goto start_pass_1; 5716 } 5717 5718 local_end = entry->vme_end; 5719 if (!(entry->needs_copy)) { 5720 /* if needs_copy we are a COW submap */ 5721 /* in such a case we just replace so */ 5722 /* there is no need for the follow- */ 5723 /* ing check. */ 5724 encountered_sub_map = TRUE; 5725 sub_start = entry->offset; 5726 5727 if(entry->vme_end < dst_end) 5728 sub_end = entry->vme_end; 5729 else 5730 sub_end = dst_end; 5731 sub_end -= entry->vme_start; 5732 sub_end += entry->offset; 5733 vm_map_unlock(dst_map); 5734 5735 kr = vm_map_overwrite_submap_recurse( 5736 entry->object.sub_map, 5737 sub_start, 5738 sub_end - sub_start); 5739 if(kr != KERN_SUCCESS) 5740 return kr; 5741 vm_map_lock(dst_map); 5742 } 5743 5744 if (dst_end <= entry->vme_end) 5745 goto start_overwrite; 5746 if(!vm_map_lookup_entry(dst_map, local_end, 5747 &entry)) { 5748 vm_map_unlock(dst_map); 5749 return(KERN_INVALID_ADDRESS); 5750 } 5751 next = entry->vme_next; 5752 } 5753 5754 if ( ! (entry->protection & VM_PROT_WRITE)) { 5755 vm_map_unlock(dst_map); 5756 return(KERN_PROTECTION_FAILURE); 5757 } 5758 5759 /* 5760 * If the entry is in transition, we must wait 5761 * for it to exit that state. Anything could happen 5762 * when we unlock the map, so start over. 5763 */ 5764 if (entry->in_transition) { 5765 5766 /* 5767 * Say that we are waiting, and wait for entry. 5768 */ 5769 entry->needs_wakeup = TRUE; 5770 vm_map_entry_wait(dst_map, THREAD_UNINT); 5771 5772 goto start_pass_1; 5773 } 5774 5775/* 5776 * our range is contained completely within this map entry 5777 */ 5778 if (dst_end <= entry->vme_end) 5779 break; 5780/* 5781 * check that range specified is contiguous region 5782 */ 5783 if ((next == vm_map_to_entry(dst_map)) || 5784 (next->vme_start != entry->vme_end)) { 5785 vm_map_unlock(dst_map); 5786 return(KERN_INVALID_ADDRESS); 5787 } 5788 5789 5790 /* 5791 * Check for permanent objects in the destination. 5792 */ 5793 if ((entry->object.vm_object != VM_OBJECT_NULL) && 5794 ((!entry->object.vm_object->internal) || 5795 (entry->object.vm_object->true_share))) { 5796 contains_permanent_objects = TRUE; 5797 } 5798 5799 entry = next; 5800 }/* for */ 5801 5802start_overwrite: 5803 /* 5804 * If there are permanent objects in the destination, then 5805 * the copy cannot be interrupted. 5806 */ 5807 5808 if (interruptible && contains_permanent_objects) { 5809 vm_map_unlock(dst_map); 5810 return(KERN_FAILURE); /* XXX */ 5811 } 5812 5813 /* 5814 * 5815 * Make a second pass, overwriting the data 5816 * At the beginning of each loop iteration, 5817 * the next entry to be overwritten is "tmp_entry" 5818 * (initially, the value returned from the lookup above), 5819 * and the starting address expected in that entry 5820 * is "start". 5821 */ 5822 5823 total_size = copy->size; 5824 if(encountered_sub_map) { 5825 copy_size = 0; 5826 /* re-calculate tmp_entry since we've had the map */ 5827 /* unlocked */ 5828 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) { 5829 vm_map_unlock(dst_map); 5830 return(KERN_INVALID_ADDRESS); 5831 } 5832 } else { 5833 copy_size = copy->size; 5834 } 5835 5836 base_addr = dst_addr; 5837 while(TRUE) { 5838 /* deconstruct the copy object and do in parts */ 5839 /* only in sub_map, interruptable case */ 5840 vm_map_entry_t copy_entry; 5841 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL; 5842 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL; 5843 int nentries; 5844 int remaining_entries = 0; 5845 vm_map_offset_t new_offset = 0; 5846 5847 for (entry = tmp_entry; copy_size == 0;) { 5848 vm_map_entry_t next; 5849 5850 next = entry->vme_next; 5851 5852 /* tmp_entry and base address are moved along */ 5853 /* each time we encounter a sub-map. Otherwise */ 5854 /* entry can outpase tmp_entry, and the copy_size */ 5855 /* may reflect the distance between them */ 5856 /* if the current entry is found to be in transition */ 5857 /* we will start over at the beginning or the last */ 5858 /* encounter of a submap as dictated by base_addr */ 5859 /* we will zero copy_size accordingly. */ 5860 if (entry->in_transition) { 5861 /* 5862 * Say that we are waiting, and wait for entry. 5863 */ 5864 entry->needs_wakeup = TRUE; 5865 vm_map_entry_wait(dst_map, THREAD_UNINT); 5866 5867 if(!vm_map_lookup_entry(dst_map, base_addr, 5868 &tmp_entry)) { 5869 vm_map_unlock(dst_map); 5870 return(KERN_INVALID_ADDRESS); 5871 } 5872 copy_size = 0; 5873 entry = tmp_entry; 5874 continue; 5875 } 5876 if(entry->is_sub_map) { 5877 vm_map_offset_t sub_start; 5878 vm_map_offset_t sub_end; 5879 vm_map_offset_t local_end; 5880 5881 if (entry->needs_copy) { 5882 /* if this is a COW submap */ 5883 /* just back the range with a */ 5884 /* anonymous entry */ 5885 if(entry->vme_end < dst_end) 5886 sub_end = entry->vme_end; 5887 else 5888 sub_end = dst_end; 5889 if(entry->vme_start < base_addr) 5890 sub_start = base_addr; 5891 else 5892 sub_start = entry->vme_start; 5893 vm_map_clip_end( 5894 dst_map, entry, sub_end); 5895 vm_map_clip_start( 5896 dst_map, entry, sub_start); 5897 assert(!entry->use_pmap); 5898 entry->is_sub_map = FALSE; 5899 vm_map_deallocate( 5900 entry->object.sub_map); 5901 entry->object.sub_map = NULL; 5902 entry->is_shared = FALSE; 5903 entry->needs_copy = FALSE; 5904 entry->offset = 0; 5905 /* 5906 * XXX FBDP 5907 * We should propagate the protections 5908 * of the submap entry here instead 5909 * of forcing them to VM_PROT_ALL... 5910 * Or better yet, we should inherit 5911 * the protection of the copy_entry. 5912 */ 5913 entry->protection = VM_PROT_ALL; 5914 entry->max_protection = VM_PROT_ALL; 5915 entry->wired_count = 0; 5916 entry->user_wired_count = 0; 5917 if(entry->inheritance 5918 == VM_INHERIT_SHARE) 5919 entry->inheritance = VM_INHERIT_COPY; 5920 continue; 5921 } 5922 /* first take care of any non-sub_map */ 5923 /* entries to send */ 5924 if(base_addr < entry->vme_start) { 5925 /* stuff to send */ 5926 copy_size = 5927 entry->vme_start - base_addr; 5928 break; 5929 } 5930 sub_start = entry->offset; 5931 5932 if(entry->vme_end < dst_end) 5933 sub_end = entry->vme_end; 5934 else 5935 sub_end = dst_end; 5936 sub_end -= entry->vme_start; 5937 sub_end += entry->offset; 5938 local_end = entry->vme_end; 5939 vm_map_unlock(dst_map); 5940 copy_size = sub_end - sub_start; 5941 5942 /* adjust the copy object */ 5943 if (total_size > copy_size) { 5944 vm_map_size_t local_size = 0; 5945 vm_map_size_t entry_size; 5946 5947 nentries = 1; 5948 new_offset = copy->offset; 5949 copy_entry = vm_map_copy_first_entry(copy); 5950 while(copy_entry != 5951 vm_map_copy_to_entry(copy)){ 5952 entry_size = copy_entry->vme_end - 5953 copy_entry->vme_start; 5954 if((local_size < copy_size) && 5955 ((local_size + entry_size) 5956 >= copy_size)) { 5957 vm_map_copy_clip_end(copy, 5958 copy_entry, 5959 copy_entry->vme_start + 5960 (copy_size - local_size)); 5961 entry_size = copy_entry->vme_end - 5962 copy_entry->vme_start; 5963 local_size += entry_size; 5964 new_offset += entry_size; 5965 } 5966 if(local_size >= copy_size) { 5967 next_copy = copy_entry->vme_next; 5968 copy_entry->vme_next = 5969 vm_map_copy_to_entry(copy); 5970 previous_prev = 5971 copy->cpy_hdr.links.prev; 5972 copy->cpy_hdr.links.prev = copy_entry; 5973 copy->size = copy_size; 5974 remaining_entries = 5975 copy->cpy_hdr.nentries; 5976 remaining_entries -= nentries; 5977 copy->cpy_hdr.nentries = nentries; 5978 break; 5979 } else { 5980 local_size += entry_size; 5981 new_offset += entry_size; 5982 nentries++; 5983 } 5984 copy_entry = copy_entry->vme_next; 5985 } 5986 } 5987 5988 if((entry->use_pmap) && (pmap == NULL)) { 5989 kr = vm_map_copy_overwrite_nested( 5990 entry->object.sub_map, 5991 sub_start, 5992 copy, 5993 interruptible, 5994 entry->object.sub_map->pmap, 5995 TRUE); 5996 } else if (pmap != NULL) { 5997 kr = vm_map_copy_overwrite_nested( 5998 entry->object.sub_map, 5999 sub_start, 6000 copy, 6001 interruptible, pmap, 6002 TRUE); 6003 } else { 6004 kr = vm_map_copy_overwrite_nested( 6005 entry->object.sub_map, 6006 sub_start, 6007 copy, 6008 interruptible, 6009 dst_map->pmap, 6010 TRUE); 6011 } 6012 if(kr != KERN_SUCCESS) { 6013 if(next_copy != NULL) { 6014 copy->cpy_hdr.nentries += 6015 remaining_entries; 6016 copy->cpy_hdr.links.prev->vme_next = 6017 next_copy; 6018 copy->cpy_hdr.links.prev 6019 = previous_prev; 6020 copy->size = total_size; 6021 } 6022 return kr; 6023 } 6024 if (dst_end <= local_end) { 6025 return(KERN_SUCCESS); 6026 } 6027 /* otherwise copy no longer exists, it was */ 6028 /* destroyed after successful copy_overwrite */ 6029 copy = (vm_map_copy_t) 6030 zalloc(vm_map_copy_zone); 6031 vm_map_copy_first_entry(copy) = 6032 vm_map_copy_last_entry(copy) = 6033 vm_map_copy_to_entry(copy); 6034 copy->type = VM_MAP_COPY_ENTRY_LIST; 6035 copy->offset = new_offset; 6036 6037 /* 6038 * XXX FBDP 6039 * this does not seem to deal with 6040 * the VM map store (R&B tree) 6041 */ 6042 6043 total_size -= copy_size; 6044 copy_size = 0; 6045 /* put back remainder of copy in container */ 6046 if(next_copy != NULL) { 6047 copy->cpy_hdr.nentries = remaining_entries; 6048 copy->cpy_hdr.links.next = next_copy; 6049 copy->cpy_hdr.links.prev = previous_prev; 6050 copy->size = total_size; 6051 next_copy->vme_prev = 6052 vm_map_copy_to_entry(copy); 6053 next_copy = NULL; 6054 } 6055 base_addr = local_end; 6056 vm_map_lock(dst_map); 6057 if(!vm_map_lookup_entry(dst_map, 6058 local_end, &tmp_entry)) { 6059 vm_map_unlock(dst_map); 6060 return(KERN_INVALID_ADDRESS); 6061 } 6062 entry = tmp_entry; 6063 continue; 6064 } 6065 if (dst_end <= entry->vme_end) { 6066 copy_size = dst_end - base_addr; 6067 break; 6068 } 6069 6070 if ((next == vm_map_to_entry(dst_map)) || 6071 (next->vme_start != entry->vme_end)) { 6072 vm_map_unlock(dst_map); 6073 return(KERN_INVALID_ADDRESS); 6074 } 6075 6076 entry = next; 6077 }/* for */ 6078 6079 next_copy = NULL; 6080 nentries = 1; 6081 6082 /* adjust the copy object */ 6083 if (total_size > copy_size) { 6084 vm_map_size_t local_size = 0; 6085 vm_map_size_t entry_size; 6086 6087 new_offset = copy->offset; 6088 copy_entry = vm_map_copy_first_entry(copy); 6089 while(copy_entry != vm_map_copy_to_entry(copy)) { 6090 entry_size = copy_entry->vme_end - 6091 copy_entry->vme_start; 6092 if((local_size < copy_size) && 6093 ((local_size + entry_size) 6094 >= copy_size)) { 6095 vm_map_copy_clip_end(copy, copy_entry, 6096 copy_entry->vme_start + 6097 (copy_size - local_size)); 6098 entry_size = copy_entry->vme_end - 6099 copy_entry->vme_start; 6100 local_size += entry_size; 6101 new_offset += entry_size; 6102 } 6103 if(local_size >= copy_size) { 6104 next_copy = copy_entry->vme_next; 6105 copy_entry->vme_next = 6106 vm_map_copy_to_entry(copy); 6107 previous_prev = 6108 copy->cpy_hdr.links.prev; 6109 copy->cpy_hdr.links.prev = copy_entry; 6110 copy->size = copy_size; 6111 remaining_entries = 6112 copy->cpy_hdr.nentries; 6113 remaining_entries -= nentries; 6114 copy->cpy_hdr.nentries = nentries; 6115 break; 6116 } else { 6117 local_size += entry_size; 6118 new_offset += entry_size; 6119 nentries++; 6120 } 6121 copy_entry = copy_entry->vme_next; 6122 } 6123 } 6124 6125 if (aligned) { 6126 pmap_t local_pmap; 6127 6128 if(pmap) 6129 local_pmap = pmap; 6130 else 6131 local_pmap = dst_map->pmap; 6132 6133 if ((kr = vm_map_copy_overwrite_aligned( 6134 dst_map, tmp_entry, copy, 6135 base_addr, local_pmap)) != KERN_SUCCESS) { 6136 if(next_copy != NULL) { 6137 copy->cpy_hdr.nentries += 6138 remaining_entries; 6139 copy->cpy_hdr.links.prev->vme_next = 6140 next_copy; 6141 copy->cpy_hdr.links.prev = 6142 previous_prev; 6143 copy->size += copy_size; 6144 } 6145 return kr; 6146 } 6147 vm_map_unlock(dst_map); 6148 } else { 6149 /* 6150 * Performance gain: 6151 * 6152 * if the copy and dst address are misaligned but the same 6153 * offset within the page we can copy_not_aligned the 6154 * misaligned parts and copy aligned the rest. If they are 6155 * aligned but len is unaligned we simply need to copy 6156 * the end bit unaligned. We'll need to split the misaligned 6157 * bits of the region in this case ! 6158 */ 6159 /* ALWAYS UNLOCKS THE dst_map MAP */ 6160 if ((kr = vm_map_copy_overwrite_unaligned( dst_map, 6161 tmp_entry, copy, base_addr)) != KERN_SUCCESS) { 6162 if(next_copy != NULL) { 6163 copy->cpy_hdr.nentries += 6164 remaining_entries; 6165 copy->cpy_hdr.links.prev->vme_next = 6166 next_copy; 6167 copy->cpy_hdr.links.prev = 6168 previous_prev; 6169 copy->size += copy_size; 6170 } 6171 return kr; 6172 } 6173 } 6174 total_size -= copy_size; 6175 if(total_size == 0) 6176 break; 6177 base_addr += copy_size; 6178 copy_size = 0; 6179 copy->offset = new_offset; 6180 if(next_copy != NULL) { 6181 copy->cpy_hdr.nentries = remaining_entries; 6182 copy->cpy_hdr.links.next = next_copy; 6183 copy->cpy_hdr.links.prev = previous_prev; 6184 next_copy->vme_prev = vm_map_copy_to_entry(copy); 6185 copy->size = total_size; 6186 } 6187 vm_map_lock(dst_map); 6188 while(TRUE) { 6189 if (!vm_map_lookup_entry(dst_map, 6190 base_addr, &tmp_entry)) { 6191 vm_map_unlock(dst_map); 6192 return(KERN_INVALID_ADDRESS); 6193 } 6194 if (tmp_entry->in_transition) { 6195 entry->needs_wakeup = TRUE; 6196 vm_map_entry_wait(dst_map, THREAD_UNINT); 6197 } else { 6198 break; 6199 } 6200 } 6201 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr)); 6202 6203 entry = tmp_entry; 6204 } /* while */ 6205 6206 /* 6207 * Throw away the vm_map_copy object 6208 */ 6209 if (discard_on_success) 6210 vm_map_copy_discard(copy); 6211 6212 return(KERN_SUCCESS); 6213}/* vm_map_copy_overwrite */ 6214 6215kern_return_t 6216vm_map_copy_overwrite( 6217 vm_map_t dst_map, 6218 vm_map_offset_t dst_addr, 6219 vm_map_copy_t copy, 6220 boolean_t interruptible) 6221{ 6222 vm_map_size_t head_size, tail_size; 6223 vm_map_copy_t head_copy, tail_copy; 6224 vm_map_offset_t head_addr, tail_addr; 6225 vm_map_entry_t entry; 6226 kern_return_t kr; 6227 6228 head_size = 0; 6229 tail_size = 0; 6230 head_copy = NULL; 6231 tail_copy = NULL; 6232 head_addr = 0; 6233 tail_addr = 0; 6234 6235 if (interruptible || 6236 copy == VM_MAP_COPY_NULL || 6237 copy->type != VM_MAP_COPY_ENTRY_LIST) { 6238 /* 6239 * We can't split the "copy" map if we're interruptible 6240 * or if we don't have a "copy" map... 6241 */ 6242 blunt_copy: 6243 return vm_map_copy_overwrite_nested(dst_map, 6244 dst_addr, 6245 copy, 6246 interruptible, 6247 (pmap_t) NULL, 6248 TRUE); 6249 } 6250 6251 if (copy->size < 3 * PAGE_SIZE) { 6252 /* 6253 * Too small to bother with optimizing... 6254 */ 6255 goto blunt_copy; 6256 } 6257 6258 if ((dst_addr & PAGE_MASK) != (copy->offset & PAGE_MASK)) { 6259 /* 6260 * Incompatible mis-alignment of source and destination... 6261 */ 6262 goto blunt_copy; 6263 } 6264 6265 /* 6266 * Proper alignment or identical mis-alignment at the beginning. 6267 * Let's try and do a small unaligned copy first (if needed) 6268 * and then an aligned copy for the rest. 6269 */ 6270 if (!page_aligned(dst_addr)) { 6271 head_addr = dst_addr; 6272 head_size = PAGE_SIZE - (copy->offset & PAGE_MASK); 6273 } 6274 if (!page_aligned(copy->offset + copy->size)) { 6275 /* 6276 * Mis-alignment at the end. 6277 * Do an aligned copy up to the last page and 6278 * then an unaligned copy for the remaining bytes. 6279 */ 6280 tail_size = (copy->offset + copy->size) & PAGE_MASK; 6281 tail_addr = dst_addr + copy->size - tail_size; 6282 } 6283 6284 if (head_size + tail_size == copy->size) { 6285 /* 6286 * It's all unaligned, no optimization possible... 6287 */ 6288 goto blunt_copy; 6289 } 6290 6291 /* 6292 * Can't optimize if there are any submaps in the 6293 * destination due to the way we free the "copy" map 6294 * progressively in vm_map_copy_overwrite_nested() 6295 * in that case. 6296 */ 6297 vm_map_lock_read(dst_map); 6298 if (! vm_map_lookup_entry(dst_map, dst_addr, &entry)) { 6299 vm_map_unlock_read(dst_map); 6300 goto blunt_copy; 6301 } 6302 for (; 6303 (entry != vm_map_copy_to_entry(copy) && 6304 entry->vme_start < dst_addr + copy->size); 6305 entry = entry->vme_next) { 6306 if (entry->is_sub_map) { 6307 vm_map_unlock_read(dst_map); 6308 goto blunt_copy; 6309 } 6310 } 6311 vm_map_unlock_read(dst_map); 6312 6313 if (head_size) { 6314 /* 6315 * Unaligned copy of the first "head_size" bytes, to reach 6316 * a page boundary. 6317 */ 6318 6319 /* 6320 * Extract "head_copy" out of "copy". 6321 */ 6322 head_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 6323 vm_map_copy_first_entry(head_copy) = 6324 vm_map_copy_to_entry(head_copy); 6325 vm_map_copy_last_entry(head_copy) = 6326 vm_map_copy_to_entry(head_copy); 6327 head_copy->type = VM_MAP_COPY_ENTRY_LIST; 6328 head_copy->cpy_hdr.nentries = 0; 6329 head_copy->cpy_hdr.entries_pageable = 6330 copy->cpy_hdr.entries_pageable; 6331 vm_map_store_init(&head_copy->cpy_hdr); 6332 6333 head_copy->offset = copy->offset; 6334 head_copy->size = head_size; 6335 6336 copy->offset += head_size; 6337 copy->size -= head_size; 6338 6339 entry = vm_map_copy_first_entry(copy); 6340 vm_map_copy_clip_end(copy, entry, copy->offset); 6341 vm_map_copy_entry_unlink(copy, entry); 6342 vm_map_copy_entry_link(head_copy, 6343 vm_map_copy_to_entry(head_copy), 6344 entry); 6345 6346 /* 6347 * Do the unaligned copy. 6348 */ 6349 kr = vm_map_copy_overwrite_nested(dst_map, 6350 head_addr, 6351 head_copy, 6352 interruptible, 6353 (pmap_t) NULL, 6354 FALSE); 6355 if (kr != KERN_SUCCESS) 6356 goto done; 6357 } 6358 6359 if (tail_size) { 6360 /* 6361 * Extract "tail_copy" out of "copy". 6362 */ 6363 tail_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 6364 vm_map_copy_first_entry(tail_copy) = 6365 vm_map_copy_to_entry(tail_copy); 6366 vm_map_copy_last_entry(tail_copy) = 6367 vm_map_copy_to_entry(tail_copy); 6368 tail_copy->type = VM_MAP_COPY_ENTRY_LIST; 6369 tail_copy->cpy_hdr.nentries = 0; 6370 tail_copy->cpy_hdr.entries_pageable = 6371 copy->cpy_hdr.entries_pageable; 6372 vm_map_store_init(&tail_copy->cpy_hdr); 6373 6374 tail_copy->offset = copy->offset + copy->size - tail_size; 6375 tail_copy->size = tail_size; 6376 6377 copy->size -= tail_size; 6378 6379 entry = vm_map_copy_last_entry(copy); 6380 vm_map_copy_clip_start(copy, entry, tail_copy->offset); 6381 entry = vm_map_copy_last_entry(copy); 6382 vm_map_copy_entry_unlink(copy, entry); 6383 vm_map_copy_entry_link(tail_copy, 6384 vm_map_copy_last_entry(tail_copy), 6385 entry); 6386 } 6387 6388 /* 6389 * Copy most (or possibly all) of the data. 6390 */ 6391 kr = vm_map_copy_overwrite_nested(dst_map, 6392 dst_addr + head_size, 6393 copy, 6394 interruptible, 6395 (pmap_t) NULL, 6396 FALSE); 6397 if (kr != KERN_SUCCESS) { 6398 goto done; 6399 } 6400 6401 if (tail_size) { 6402 kr = vm_map_copy_overwrite_nested(dst_map, 6403 tail_addr, 6404 tail_copy, 6405 interruptible, 6406 (pmap_t) NULL, 6407 FALSE); 6408 } 6409 6410done: 6411 assert(copy->type == VM_MAP_COPY_ENTRY_LIST); 6412 if (kr == KERN_SUCCESS) { 6413 /* 6414 * Discard all the copy maps. 6415 */ 6416 if (head_copy) { 6417 vm_map_copy_discard(head_copy); 6418 head_copy = NULL; 6419 } 6420 vm_map_copy_discard(copy); 6421 if (tail_copy) { 6422 vm_map_copy_discard(tail_copy); 6423 tail_copy = NULL; 6424 } 6425 } else { 6426 /* 6427 * Re-assemble the original copy map. 6428 */ 6429 if (head_copy) { 6430 entry = vm_map_copy_first_entry(head_copy); 6431 vm_map_copy_entry_unlink(head_copy, entry); 6432 vm_map_copy_entry_link(copy, 6433 vm_map_copy_to_entry(copy), 6434 entry); 6435 copy->offset -= head_size; 6436 copy->size += head_size; 6437 vm_map_copy_discard(head_copy); 6438 head_copy = NULL; 6439 } 6440 if (tail_copy) { 6441 entry = vm_map_copy_last_entry(tail_copy); 6442 vm_map_copy_entry_unlink(tail_copy, entry); 6443 vm_map_copy_entry_link(copy, 6444 vm_map_copy_last_entry(copy), 6445 entry); 6446 copy->size += tail_size; 6447 vm_map_copy_discard(tail_copy); 6448 tail_copy = NULL; 6449 } 6450 } 6451 return kr; 6452} 6453 6454 6455/* 6456 * Routine: vm_map_copy_overwrite_unaligned [internal use only] 6457 * 6458 * Decription: 6459 * Physically copy unaligned data 6460 * 6461 * Implementation: 6462 * Unaligned parts of pages have to be physically copied. We use 6463 * a modified form of vm_fault_copy (which understands none-aligned 6464 * page offsets and sizes) to do the copy. We attempt to copy as 6465 * much memory in one go as possibly, however vm_fault_copy copies 6466 * within 1 memory object so we have to find the smaller of "amount left" 6467 * "source object data size" and "target object data size". With 6468 * unaligned data we don't need to split regions, therefore the source 6469 * (copy) object should be one map entry, the target range may be split 6470 * over multiple map entries however. In any event we are pessimistic 6471 * about these assumptions. 6472 * 6473 * Assumptions: 6474 * dst_map is locked on entry and is return locked on success, 6475 * unlocked on error. 6476 */ 6477 6478static kern_return_t 6479vm_map_copy_overwrite_unaligned( 6480 vm_map_t dst_map, 6481 vm_map_entry_t entry, 6482 vm_map_copy_t copy, 6483 vm_map_offset_t start) 6484{ 6485 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy); 6486 vm_map_version_t version; 6487 vm_object_t dst_object; 6488 vm_object_offset_t dst_offset; 6489 vm_object_offset_t src_offset; 6490 vm_object_offset_t entry_offset; 6491 vm_map_offset_t entry_end; 6492 vm_map_size_t src_size, 6493 dst_size, 6494 copy_size, 6495 amount_left; 6496 kern_return_t kr = KERN_SUCCESS; 6497 6498 vm_map_lock_write_to_read(dst_map); 6499 6500 src_offset = copy->offset - vm_object_trunc_page(copy->offset); 6501 amount_left = copy->size; 6502/* 6503 * unaligned so we never clipped this entry, we need the offset into 6504 * the vm_object not just the data. 6505 */ 6506 while (amount_left > 0) { 6507 6508 if (entry == vm_map_to_entry(dst_map)) { 6509 vm_map_unlock_read(dst_map); 6510 return KERN_INVALID_ADDRESS; 6511 } 6512 6513 /* "start" must be within the current map entry */ 6514 assert ((start>=entry->vme_start) && (start<entry->vme_end)); 6515 6516 dst_offset = start - entry->vme_start; 6517 6518 dst_size = entry->vme_end - start; 6519 6520 src_size = copy_entry->vme_end - 6521 (copy_entry->vme_start + src_offset); 6522 6523 if (dst_size < src_size) { 6524/* 6525 * we can only copy dst_size bytes before 6526 * we have to get the next destination entry 6527 */ 6528 copy_size = dst_size; 6529 } else { 6530/* 6531 * we can only copy src_size bytes before 6532 * we have to get the next source copy entry 6533 */ 6534 copy_size = src_size; 6535 } 6536 6537 if (copy_size > amount_left) { 6538 copy_size = amount_left; 6539 } 6540/* 6541 * Entry needs copy, create a shadow shadow object for 6542 * Copy on write region. 6543 */ 6544 if (entry->needs_copy && 6545 ((entry->protection & VM_PROT_WRITE) != 0)) 6546 { 6547 if (vm_map_lock_read_to_write(dst_map)) { 6548 vm_map_lock_read(dst_map); 6549 goto RetryLookup; 6550 } 6551 vm_object_shadow(&entry->object.vm_object, 6552 &entry->offset, 6553 (vm_map_size_t)(entry->vme_end 6554 - entry->vme_start)); 6555 entry->needs_copy = FALSE; 6556 vm_map_lock_write_to_read(dst_map); 6557 } 6558 dst_object = entry->object.vm_object; 6559/* 6560 * unlike with the virtual (aligned) copy we're going 6561 * to fault on it therefore we need a target object. 6562 */ 6563 if (dst_object == VM_OBJECT_NULL) { 6564 if (vm_map_lock_read_to_write(dst_map)) { 6565 vm_map_lock_read(dst_map); 6566 goto RetryLookup; 6567 } 6568 dst_object = vm_object_allocate((vm_map_size_t) 6569 entry->vme_end - entry->vme_start); 6570 entry->object.vm_object = dst_object; 6571 entry->offset = 0; 6572 vm_map_lock_write_to_read(dst_map); 6573 } 6574/* 6575 * Take an object reference and unlock map. The "entry" may 6576 * disappear or change when the map is unlocked. 6577 */ 6578 vm_object_reference(dst_object); 6579 version.main_timestamp = dst_map->timestamp; 6580 entry_offset = entry->offset; 6581 entry_end = entry->vme_end; 6582 vm_map_unlock_read(dst_map); 6583/* 6584 * Copy as much as possible in one pass 6585 */ 6586 kr = vm_fault_copy( 6587 copy_entry->object.vm_object, 6588 copy_entry->offset + src_offset, 6589 ©_size, 6590 dst_object, 6591 entry_offset + dst_offset, 6592 dst_map, 6593 &version, 6594 THREAD_UNINT ); 6595 6596 start += copy_size; 6597 src_offset += copy_size; 6598 amount_left -= copy_size; 6599/* 6600 * Release the object reference 6601 */ 6602 vm_object_deallocate(dst_object); 6603/* 6604 * If a hard error occurred, return it now 6605 */ 6606 if (kr != KERN_SUCCESS) 6607 return kr; 6608 6609 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end 6610 || amount_left == 0) 6611 { 6612/* 6613 * all done with this copy entry, dispose. 6614 */ 6615 vm_map_copy_entry_unlink(copy, copy_entry); 6616 vm_object_deallocate(copy_entry->object.vm_object); 6617 vm_map_copy_entry_dispose(copy, copy_entry); 6618 6619 if ((copy_entry = vm_map_copy_first_entry(copy)) 6620 == vm_map_copy_to_entry(copy) && amount_left) { 6621/* 6622 * not finished copying but run out of source 6623 */ 6624 return KERN_INVALID_ADDRESS; 6625 } 6626 src_offset = 0; 6627 } 6628 6629 if (amount_left == 0) 6630 return KERN_SUCCESS; 6631 6632 vm_map_lock_read(dst_map); 6633 if (version.main_timestamp == dst_map->timestamp) { 6634 if (start == entry_end) { 6635/* 6636 * destination region is split. Use the version 6637 * information to avoid a lookup in the normal 6638 * case. 6639 */ 6640 entry = entry->vme_next; 6641/* 6642 * should be contiguous. Fail if we encounter 6643 * a hole in the destination. 6644 */ 6645 if (start != entry->vme_start) { 6646 vm_map_unlock_read(dst_map); 6647 return KERN_INVALID_ADDRESS ; 6648 } 6649 } 6650 } else { 6651/* 6652 * Map version check failed. 6653 * we must lookup the entry because somebody 6654 * might have changed the map behind our backs. 6655 */ 6656 RetryLookup: 6657 if (!vm_map_lookup_entry(dst_map, start, &entry)) 6658 { 6659 vm_map_unlock_read(dst_map); 6660 return KERN_INVALID_ADDRESS ; 6661 } 6662 } 6663 }/* while */ 6664 6665 return KERN_SUCCESS; 6666}/* vm_map_copy_overwrite_unaligned */ 6667 6668/* 6669 * Routine: vm_map_copy_overwrite_aligned [internal use only] 6670 * 6671 * Description: 6672 * Does all the vm_trickery possible for whole pages. 6673 * 6674 * Implementation: 6675 * 6676 * If there are no permanent objects in the destination, 6677 * and the source and destination map entry zones match, 6678 * and the destination map entry is not shared, 6679 * then the map entries can be deleted and replaced 6680 * with those from the copy. The following code is the 6681 * basic idea of what to do, but there are lots of annoying 6682 * little details about getting protection and inheritance 6683 * right. Should add protection, inheritance, and sharing checks 6684 * to the above pass and make sure that no wiring is involved. 6685 */ 6686 6687int vm_map_copy_overwrite_aligned_src_not_internal = 0; 6688int vm_map_copy_overwrite_aligned_src_not_symmetric = 0; 6689int vm_map_copy_overwrite_aligned_src_large = 0; 6690 6691static kern_return_t 6692vm_map_copy_overwrite_aligned( 6693 vm_map_t dst_map, 6694 vm_map_entry_t tmp_entry, 6695 vm_map_copy_t copy, 6696 vm_map_offset_t start, 6697 __unused pmap_t pmap) 6698{ 6699 vm_object_t object; 6700 vm_map_entry_t copy_entry; 6701 vm_map_size_t copy_size; 6702 vm_map_size_t size; 6703 vm_map_entry_t entry; 6704 6705 while ((copy_entry = vm_map_copy_first_entry(copy)) 6706 != vm_map_copy_to_entry(copy)) 6707 { 6708 copy_size = (copy_entry->vme_end - copy_entry->vme_start); 6709 6710 entry = tmp_entry; 6711 assert(!entry->use_pmap); /* unnested when clipped earlier */ 6712 if (entry == vm_map_to_entry(dst_map)) { 6713 vm_map_unlock(dst_map); 6714 return KERN_INVALID_ADDRESS; 6715 } 6716 size = (entry->vme_end - entry->vme_start); 6717 /* 6718 * Make sure that no holes popped up in the 6719 * address map, and that the protection is 6720 * still valid, in case the map was unlocked 6721 * earlier. 6722 */ 6723 6724 if ((entry->vme_start != start) || ((entry->is_sub_map) 6725 && !entry->needs_copy)) { 6726 vm_map_unlock(dst_map); 6727 return(KERN_INVALID_ADDRESS); 6728 } 6729 assert(entry != vm_map_to_entry(dst_map)); 6730 6731 /* 6732 * Check protection again 6733 */ 6734 6735 if ( ! (entry->protection & VM_PROT_WRITE)) { 6736 vm_map_unlock(dst_map); 6737 return(KERN_PROTECTION_FAILURE); 6738 } 6739 6740 /* 6741 * Adjust to source size first 6742 */ 6743 6744 if (copy_size < size) { 6745 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size); 6746 size = copy_size; 6747 } 6748 6749 /* 6750 * Adjust to destination size 6751 */ 6752 6753 if (size < copy_size) { 6754 vm_map_copy_clip_end(copy, copy_entry, 6755 copy_entry->vme_start + size); 6756 copy_size = size; 6757 } 6758 6759 assert((entry->vme_end - entry->vme_start) == size); 6760 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size); 6761 assert((copy_entry->vme_end - copy_entry->vme_start) == size); 6762 6763 /* 6764 * If the destination contains temporary unshared memory, 6765 * we can perform the copy by throwing it away and 6766 * installing the source data. 6767 */ 6768 6769 object = entry->object.vm_object; 6770 if ((!entry->is_shared && 6771 ((object == VM_OBJECT_NULL) || 6772 (object->internal && !object->true_share))) || 6773 entry->needs_copy) { 6774 vm_object_t old_object = entry->object.vm_object; 6775 vm_object_offset_t old_offset = entry->offset; 6776 vm_object_offset_t offset; 6777 6778 /* 6779 * Ensure that the source and destination aren't 6780 * identical 6781 */ 6782 if (old_object == copy_entry->object.vm_object && 6783 old_offset == copy_entry->offset) { 6784 vm_map_copy_entry_unlink(copy, copy_entry); 6785 vm_map_copy_entry_dispose(copy, copy_entry); 6786 6787 if (old_object != VM_OBJECT_NULL) 6788 vm_object_deallocate(old_object); 6789 6790 start = tmp_entry->vme_end; 6791 tmp_entry = tmp_entry->vme_next; 6792 continue; 6793 } 6794 6795#if !CONFIG_EMBEDDED 6796#define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */ 6797#define __TRADEOFF1_COPY_SIZE (128 * 1024) /* 128 KB */ 6798 if (copy_entry->object.vm_object != VM_OBJECT_NULL && 6799 copy_entry->object.vm_object->vo_size >= __TRADEOFF1_OBJ_SIZE && 6800 copy_size <= __TRADEOFF1_COPY_SIZE) { 6801 /* 6802 * Virtual vs. Physical copy tradeoff #1. 6803 * 6804 * Copying only a few pages out of a large 6805 * object: do a physical copy instead of 6806 * a virtual copy, to avoid possibly keeping 6807 * the entire large object alive because of 6808 * those few copy-on-write pages. 6809 */ 6810 vm_map_copy_overwrite_aligned_src_large++; 6811 goto slow_copy; 6812 } 6813#endif /* !CONFIG_EMBEDDED */ 6814 6815 if (entry->alias >= VM_MEMORY_MALLOC && 6816 entry->alias <= VM_MEMORY_MALLOC_LARGE_REUSED) { 6817 vm_object_t new_object, new_shadow; 6818 6819 /* 6820 * We're about to map something over a mapping 6821 * established by malloc()... 6822 */ 6823 new_object = copy_entry->object.vm_object; 6824 if (new_object != VM_OBJECT_NULL) { 6825 vm_object_lock_shared(new_object); 6826 } 6827 while (new_object != VM_OBJECT_NULL && 6828#if !CONFIG_EMBEDDED 6829 !new_object->true_share && 6830 new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && 6831#endif /* !CONFIG_EMBEDDED */ 6832 new_object->internal) { 6833 new_shadow = new_object->shadow; 6834 if (new_shadow == VM_OBJECT_NULL) { 6835 break; 6836 } 6837 vm_object_lock_shared(new_shadow); 6838 vm_object_unlock(new_object); 6839 new_object = new_shadow; 6840 } 6841 if (new_object != VM_OBJECT_NULL) { 6842 if (!new_object->internal) { 6843 /* 6844 * The new mapping is backed 6845 * by an external object. We 6846 * don't want malloc'ed memory 6847 * to be replaced with such a 6848 * non-anonymous mapping, so 6849 * let's go off the optimized 6850 * path... 6851 */ 6852 vm_map_copy_overwrite_aligned_src_not_internal++; 6853 vm_object_unlock(new_object); 6854 goto slow_copy; 6855 } 6856#if !CONFIG_EMBEDDED 6857 if (new_object->true_share || 6858 new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { 6859 /* 6860 * Same if there's a "true_share" 6861 * object in the shadow chain, or 6862 * an object with a non-default 6863 * (SYMMETRIC) copy strategy. 6864 */ 6865 vm_map_copy_overwrite_aligned_src_not_symmetric++; 6866 vm_object_unlock(new_object); 6867 goto slow_copy; 6868 } 6869#endif /* !CONFIG_EMBEDDED */ 6870 vm_object_unlock(new_object); 6871 } 6872 /* 6873 * The new mapping is still backed by 6874 * anonymous (internal) memory, so it's 6875 * OK to substitute it for the original 6876 * malloc() mapping. 6877 */ 6878 } 6879 6880 if (old_object != VM_OBJECT_NULL) { 6881 if(entry->is_sub_map) { 6882 if(entry->use_pmap) { 6883#ifndef NO_NESTED_PMAP 6884 pmap_unnest(dst_map->pmap, 6885 (addr64_t)entry->vme_start, 6886 entry->vme_end - entry->vme_start); 6887#endif /* NO_NESTED_PMAP */ 6888 if(dst_map->mapped_in_other_pmaps) { 6889 /* clean up parent */ 6890 /* map/maps */ 6891 vm_map_submap_pmap_clean( 6892 dst_map, entry->vme_start, 6893 entry->vme_end, 6894 entry->object.sub_map, 6895 entry->offset); 6896 } 6897 } else { 6898 vm_map_submap_pmap_clean( 6899 dst_map, entry->vme_start, 6900 entry->vme_end, 6901 entry->object.sub_map, 6902 entry->offset); 6903 } 6904 vm_map_deallocate( 6905 entry->object.sub_map); 6906 } else { 6907 if(dst_map->mapped_in_other_pmaps) { 6908 vm_object_pmap_protect( 6909 entry->object.vm_object, 6910 entry->offset, 6911 entry->vme_end 6912 - entry->vme_start, 6913 PMAP_NULL, 6914 entry->vme_start, 6915 VM_PROT_NONE); 6916 } else { 6917 pmap_remove(dst_map->pmap, 6918 (addr64_t)(entry->vme_start), 6919 (addr64_t)(entry->vme_end)); 6920 } 6921 vm_object_deallocate(old_object); 6922 } 6923 } 6924 6925 entry->is_sub_map = FALSE; 6926 entry->object = copy_entry->object; 6927 object = entry->object.vm_object; 6928 entry->needs_copy = copy_entry->needs_copy; 6929 entry->wired_count = 0; 6930 entry->user_wired_count = 0; 6931 offset = entry->offset = copy_entry->offset; 6932 6933 vm_map_copy_entry_unlink(copy, copy_entry); 6934 vm_map_copy_entry_dispose(copy, copy_entry); 6935 6936 /* 6937 * we could try to push pages into the pmap at this point, BUT 6938 * this optimization only saved on average 2 us per page if ALL 6939 * the pages in the source were currently mapped 6940 * and ALL the pages in the dest were touched, if there were fewer 6941 * than 2/3 of the pages touched, this optimization actually cost more cycles 6942 * it also puts a lot of pressure on the pmap layer w/r to mapping structures 6943 */ 6944 6945 /* 6946 * Set up for the next iteration. The map 6947 * has not been unlocked, so the next 6948 * address should be at the end of this 6949 * entry, and the next map entry should be 6950 * the one following it. 6951 */ 6952 6953 start = tmp_entry->vme_end; 6954 tmp_entry = tmp_entry->vme_next; 6955 } else { 6956 vm_map_version_t version; 6957 vm_object_t dst_object; 6958 vm_object_offset_t dst_offset; 6959 kern_return_t r; 6960 6961 slow_copy: 6962 if (entry->needs_copy) { 6963 vm_object_shadow(&entry->object.vm_object, 6964 &entry->offset, 6965 (entry->vme_end - 6966 entry->vme_start)); 6967 entry->needs_copy = FALSE; 6968 } 6969 6970 dst_object = entry->object.vm_object; 6971 dst_offset = entry->offset; 6972 6973 /* 6974 * Take an object reference, and record 6975 * the map version information so that the 6976 * map can be safely unlocked. 6977 */ 6978 6979 if (dst_object == VM_OBJECT_NULL) { 6980 /* 6981 * We would usually have just taken the 6982 * optimized path above if the destination 6983 * object has not been allocated yet. But we 6984 * now disable that optimization if the copy 6985 * entry's object is not backed by anonymous 6986 * memory to avoid replacing malloc'ed 6987 * (i.e. re-usable) anonymous memory with a 6988 * not-so-anonymous mapping. 6989 * So we have to handle this case here and 6990 * allocate a new VM object for this map entry. 6991 */ 6992 dst_object = vm_object_allocate( 6993 entry->vme_end - entry->vme_start); 6994 dst_offset = 0; 6995 entry->object.vm_object = dst_object; 6996 entry->offset = dst_offset; 6997 6998 } 6999 7000 vm_object_reference(dst_object); 7001 7002 /* account for unlock bumping up timestamp */ 7003 version.main_timestamp = dst_map->timestamp + 1; 7004 7005 vm_map_unlock(dst_map); 7006 7007 /* 7008 * Copy as much as possible in one pass 7009 */ 7010 7011 copy_size = size; 7012 r = vm_fault_copy( 7013 copy_entry->object.vm_object, 7014 copy_entry->offset, 7015 ©_size, 7016 dst_object, 7017 dst_offset, 7018 dst_map, 7019 &version, 7020 THREAD_UNINT ); 7021 7022 /* 7023 * Release the object reference 7024 */ 7025 7026 vm_object_deallocate(dst_object); 7027 7028 /* 7029 * If a hard error occurred, return it now 7030 */ 7031 7032 if (r != KERN_SUCCESS) 7033 return(r); 7034 7035 if (copy_size != 0) { 7036 /* 7037 * Dispose of the copied region 7038 */ 7039 7040 vm_map_copy_clip_end(copy, copy_entry, 7041 copy_entry->vme_start + copy_size); 7042 vm_map_copy_entry_unlink(copy, copy_entry); 7043 vm_object_deallocate(copy_entry->object.vm_object); 7044 vm_map_copy_entry_dispose(copy, copy_entry); 7045 } 7046 7047 /* 7048 * Pick up in the destination map where we left off. 7049 * 7050 * Use the version information to avoid a lookup 7051 * in the normal case. 7052 */ 7053 7054 start += copy_size; 7055 vm_map_lock(dst_map); 7056 if (version.main_timestamp == dst_map->timestamp && 7057 copy_size != 0) { 7058 /* We can safely use saved tmp_entry value */ 7059 7060 vm_map_clip_end(dst_map, tmp_entry, start); 7061 tmp_entry = tmp_entry->vme_next; 7062 } else { 7063 /* Must do lookup of tmp_entry */ 7064 7065 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) { 7066 vm_map_unlock(dst_map); 7067 return(KERN_INVALID_ADDRESS); 7068 } 7069 vm_map_clip_start(dst_map, tmp_entry, start); 7070 } 7071 } 7072 }/* while */ 7073 7074 return(KERN_SUCCESS); 7075}/* vm_map_copy_overwrite_aligned */ 7076 7077/* 7078 * Routine: vm_map_copyin_kernel_buffer [internal use only] 7079 * 7080 * Description: 7081 * Copy in data to a kernel buffer from space in the 7082 * source map. The original space may be optionally 7083 * deallocated. 7084 * 7085 * If successful, returns a new copy object. 7086 */ 7087static kern_return_t 7088vm_map_copyin_kernel_buffer( 7089 vm_map_t src_map, 7090 vm_map_offset_t src_addr, 7091 vm_map_size_t len, 7092 boolean_t src_destroy, 7093 vm_map_copy_t *copy_result) 7094{ 7095 kern_return_t kr; 7096 vm_map_copy_t copy; 7097 vm_size_t kalloc_size; 7098 7099 if ((vm_size_t) len != len) { 7100 /* "len" is too big and doesn't fit in a "vm_size_t" */ 7101 return KERN_RESOURCE_SHORTAGE; 7102 } 7103 kalloc_size = (vm_size_t) (sizeof(struct vm_map_copy) + len); 7104 assert((vm_map_size_t) kalloc_size == sizeof (struct vm_map_copy) + len); 7105 7106 copy = (vm_map_copy_t) kalloc(kalloc_size); 7107 if (copy == VM_MAP_COPY_NULL) { 7108 return KERN_RESOURCE_SHORTAGE; 7109 } 7110 copy->type = VM_MAP_COPY_KERNEL_BUFFER; 7111 copy->size = len; 7112 copy->offset = 0; 7113 copy->cpy_kdata = (void *) (copy + 1); 7114 copy->cpy_kalloc_size = kalloc_size; 7115 7116 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t) len); 7117 if (kr != KERN_SUCCESS) { 7118 kfree(copy, kalloc_size); 7119 return kr; 7120 } 7121 if (src_destroy) { 7122 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr), 7123 vm_map_round_page(src_addr + len), 7124 VM_MAP_REMOVE_INTERRUPTIBLE | 7125 VM_MAP_REMOVE_WAIT_FOR_KWIRE | 7126 (src_map == kernel_map) ? 7127 VM_MAP_REMOVE_KUNWIRE : 0); 7128 } 7129 *copy_result = copy; 7130 return KERN_SUCCESS; 7131} 7132 7133/* 7134 * Routine: vm_map_copyout_kernel_buffer [internal use only] 7135 * 7136 * Description: 7137 * Copy out data from a kernel buffer into space in the 7138 * destination map. The space may be otpionally dynamically 7139 * allocated. 7140 * 7141 * If successful, consumes the copy object. 7142 * Otherwise, the caller is responsible for it. 7143 */ 7144static int vm_map_copyout_kernel_buffer_failures = 0; 7145static kern_return_t 7146vm_map_copyout_kernel_buffer( 7147 vm_map_t map, 7148 vm_map_address_t *addr, /* IN/OUT */ 7149 vm_map_copy_t copy, 7150 boolean_t overwrite) 7151{ 7152 kern_return_t kr = KERN_SUCCESS; 7153 thread_t thread = current_thread(); 7154 7155 if (!overwrite) { 7156 7157 /* 7158 * Allocate space in the target map for the data 7159 */ 7160 *addr = 0; 7161 kr = vm_map_enter(map, 7162 addr, 7163 vm_map_round_page(copy->size), 7164 (vm_map_offset_t) 0, 7165 VM_FLAGS_ANYWHERE, 7166 VM_OBJECT_NULL, 7167 (vm_object_offset_t) 0, 7168 FALSE, 7169 VM_PROT_DEFAULT, 7170 VM_PROT_ALL, 7171 VM_INHERIT_DEFAULT); 7172 if (kr != KERN_SUCCESS) 7173 return kr; 7174 } 7175 7176 /* 7177 * Copyout the data from the kernel buffer to the target map. 7178 */ 7179 if (thread->map == map) { 7180 7181 /* 7182 * If the target map is the current map, just do 7183 * the copy. 7184 */ 7185 assert((vm_size_t) copy->size == copy->size); 7186 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) { 7187 kr = KERN_INVALID_ADDRESS; 7188 } 7189 } 7190 else { 7191 vm_map_t oldmap; 7192 7193 /* 7194 * If the target map is another map, assume the 7195 * target's address space identity for the duration 7196 * of the copy. 7197 */ 7198 vm_map_reference(map); 7199 oldmap = vm_map_switch(map); 7200 7201 assert((vm_size_t) copy->size == copy->size); 7202 if (copyout(copy->cpy_kdata, *addr, (vm_size_t) copy->size)) { 7203 vm_map_copyout_kernel_buffer_failures++; 7204 kr = KERN_INVALID_ADDRESS; 7205 } 7206 7207 (void) vm_map_switch(oldmap); 7208 vm_map_deallocate(map); 7209 } 7210 7211 if (kr != KERN_SUCCESS) { 7212 /* the copy failed, clean up */ 7213 if (!overwrite) { 7214 /* 7215 * Deallocate the space we allocated in the target map. 7216 */ 7217 (void) vm_map_remove(map, 7218 vm_map_trunc_page(*addr), 7219 vm_map_round_page(*addr + 7220 vm_map_round_page(copy->size)), 7221 VM_MAP_NO_FLAGS); 7222 *addr = 0; 7223 } 7224 } else { 7225 /* copy was successful, dicard the copy structure */ 7226 kfree(copy, copy->cpy_kalloc_size); 7227 } 7228 7229 return kr; 7230} 7231 7232/* 7233 * Macro: vm_map_copy_insert 7234 * 7235 * Description: 7236 * Link a copy chain ("copy") into a map at the 7237 * specified location (after "where"). 7238 * Side effects: 7239 * The copy chain is destroyed. 7240 * Warning: 7241 * The arguments are evaluated multiple times. 7242 */ 7243#define vm_map_copy_insert(map, where, copy) \ 7244MACRO_BEGIN \ 7245 vm_map_store_copy_insert(map, where, copy); \ 7246 zfree(vm_map_copy_zone, copy); \ 7247MACRO_END 7248 7249/* 7250 * Routine: vm_map_copyout 7251 * 7252 * Description: 7253 * Copy out a copy chain ("copy") into newly-allocated 7254 * space in the destination map. 7255 * 7256 * If successful, consumes the copy object. 7257 * Otherwise, the caller is responsible for it. 7258 */ 7259kern_return_t 7260vm_map_copyout( 7261 vm_map_t dst_map, 7262 vm_map_address_t *dst_addr, /* OUT */ 7263 vm_map_copy_t copy) 7264{ 7265 vm_map_size_t size; 7266 vm_map_size_t adjustment; 7267 vm_map_offset_t start; 7268 vm_object_offset_t vm_copy_start; 7269 vm_map_entry_t last; 7270 register 7271 vm_map_entry_t entry; 7272 7273 /* 7274 * Check for null copy object. 7275 */ 7276 7277 if (copy == VM_MAP_COPY_NULL) { 7278 *dst_addr = 0; 7279 return(KERN_SUCCESS); 7280 } 7281 7282 /* 7283 * Check for special copy object, created 7284 * by vm_map_copyin_object. 7285 */ 7286 7287 if (copy->type == VM_MAP_COPY_OBJECT) { 7288 vm_object_t object = copy->cpy_object; 7289 kern_return_t kr; 7290 vm_object_offset_t offset; 7291 7292 offset = vm_object_trunc_page(copy->offset); 7293 size = vm_map_round_page(copy->size + 7294 (vm_map_size_t)(copy->offset - offset)); 7295 *dst_addr = 0; 7296 kr = vm_map_enter(dst_map, dst_addr, size, 7297 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, 7298 object, offset, FALSE, 7299 VM_PROT_DEFAULT, VM_PROT_ALL, 7300 VM_INHERIT_DEFAULT); 7301 if (kr != KERN_SUCCESS) 7302 return(kr); 7303 /* Account for non-pagealigned copy object */ 7304 *dst_addr += (vm_map_offset_t)(copy->offset - offset); 7305 zfree(vm_map_copy_zone, copy); 7306 return(KERN_SUCCESS); 7307 } 7308 7309 /* 7310 * Check for special kernel buffer allocated 7311 * by new_ipc_kmsg_copyin. 7312 */ 7313 7314 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { 7315 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr, 7316 copy, FALSE)); 7317 } 7318 7319 /* 7320 * Find space for the data 7321 */ 7322 7323 vm_copy_start = vm_object_trunc_page(copy->offset); 7324 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size) 7325 - vm_copy_start; 7326 7327StartAgain: ; 7328 7329 vm_map_lock(dst_map); 7330 if( dst_map->disable_vmentry_reuse == TRUE) { 7331 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start); 7332 last = entry; 7333 } else { 7334 assert(first_free_is_valid(dst_map)); 7335 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ? 7336 vm_map_min(dst_map) : last->vme_end; 7337 } 7338 7339 while (TRUE) { 7340 vm_map_entry_t next = last->vme_next; 7341 vm_map_offset_t end = start + size; 7342 7343 if ((end > dst_map->max_offset) || (end < start)) { 7344 if (dst_map->wait_for_space) { 7345 if (size <= (dst_map->max_offset - dst_map->min_offset)) { 7346 assert_wait((event_t) dst_map, 7347 THREAD_INTERRUPTIBLE); 7348 vm_map_unlock(dst_map); 7349 thread_block(THREAD_CONTINUE_NULL); 7350 goto StartAgain; 7351 } 7352 } 7353 vm_map_unlock(dst_map); 7354 return(KERN_NO_SPACE); 7355 } 7356 7357 if ((next == vm_map_to_entry(dst_map)) || 7358 (next->vme_start >= end)) 7359 break; 7360 7361 last = next; 7362 start = last->vme_end; 7363 } 7364 7365 /* 7366 * Since we're going to just drop the map 7367 * entries from the copy into the destination 7368 * map, they must come from the same pool. 7369 */ 7370 7371 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) { 7372 /* 7373 * Mismatches occur when dealing with the default 7374 * pager. 7375 */ 7376 zone_t old_zone; 7377 vm_map_entry_t next, new; 7378 7379 /* 7380 * Find the zone that the copies were allocated from 7381 */ 7382 7383 entry = vm_map_copy_first_entry(copy); 7384 7385 /* 7386 * Reinitialize the copy so that vm_map_copy_entry_link 7387 * will work. 7388 */ 7389 vm_map_store_copy_reset(copy, entry); 7390 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable; 7391 7392 /* 7393 * Copy each entry. 7394 */ 7395 while (entry != vm_map_copy_to_entry(copy)) { 7396 new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); 7397 vm_map_entry_copy_full(new, entry); 7398 new->use_pmap = FALSE; /* clr address space specifics */ 7399 vm_map_copy_entry_link(copy, 7400 vm_map_copy_last_entry(copy), 7401 new); 7402 next = entry->vme_next; 7403 old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone; 7404 zfree(old_zone, entry); 7405 entry = next; 7406 } 7407 } 7408 7409 /* 7410 * Adjust the addresses in the copy chain, and 7411 * reset the region attributes. 7412 */ 7413 7414 adjustment = start - vm_copy_start; 7415 for (entry = vm_map_copy_first_entry(copy); 7416 entry != vm_map_copy_to_entry(copy); 7417 entry = entry->vme_next) { 7418 entry->vme_start += adjustment; 7419 entry->vme_end += adjustment; 7420 7421 entry->inheritance = VM_INHERIT_DEFAULT; 7422 entry->protection = VM_PROT_DEFAULT; 7423 entry->max_protection = VM_PROT_ALL; 7424 entry->behavior = VM_BEHAVIOR_DEFAULT; 7425 7426 /* 7427 * If the entry is now wired, 7428 * map the pages into the destination map. 7429 */ 7430 if (entry->wired_count != 0) { 7431 register vm_map_offset_t va; 7432 vm_object_offset_t offset; 7433 register vm_object_t object; 7434 vm_prot_t prot; 7435 int type_of_fault; 7436 7437 object = entry->object.vm_object; 7438 offset = entry->offset; 7439 va = entry->vme_start; 7440 7441 pmap_pageable(dst_map->pmap, 7442 entry->vme_start, 7443 entry->vme_end, 7444 TRUE); 7445 7446 while (va < entry->vme_end) { 7447 register vm_page_t m; 7448 7449 /* 7450 * Look up the page in the object. 7451 * Assert that the page will be found in the 7452 * top object: 7453 * either 7454 * the object was newly created by 7455 * vm_object_copy_slowly, and has 7456 * copies of all of the pages from 7457 * the source object 7458 * or 7459 * the object was moved from the old 7460 * map entry; because the old map 7461 * entry was wired, all of the pages 7462 * were in the top-level object. 7463 * (XXX not true if we wire pages for 7464 * reading) 7465 */ 7466 vm_object_lock(object); 7467 7468 m = vm_page_lookup(object, offset); 7469 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) || 7470 m->absent) 7471 panic("vm_map_copyout: wiring %p", m); 7472 7473 /* 7474 * ENCRYPTED SWAP: 7475 * The page is assumed to be wired here, so it 7476 * shouldn't be encrypted. Otherwise, we 7477 * couldn't enter it in the page table, since 7478 * we don't want the user to see the encrypted 7479 * data. 7480 */ 7481 ASSERT_PAGE_DECRYPTED(m); 7482 7483 prot = entry->protection; 7484 7485 if (override_nx(dst_map, entry->alias) && prot) 7486 prot |= VM_PROT_EXECUTE; 7487 7488 type_of_fault = DBG_CACHE_HIT_FAULT; 7489 7490 vm_fault_enter(m, dst_map->pmap, va, prot, prot, 7491 VM_PAGE_WIRED(m), FALSE, FALSE, FALSE, NULL, 7492 &type_of_fault); 7493 7494 vm_object_unlock(object); 7495 7496 offset += PAGE_SIZE_64; 7497 va += PAGE_SIZE; 7498 } 7499 } 7500 } 7501 7502 /* 7503 * Correct the page alignment for the result 7504 */ 7505 7506 *dst_addr = start + (copy->offset - vm_copy_start); 7507 7508 /* 7509 * Update the hints and the map size 7510 */ 7511 7512 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy)); 7513 7514 dst_map->size += size; 7515 7516 /* 7517 * Link in the copy 7518 */ 7519 7520 vm_map_copy_insert(dst_map, last, copy); 7521 7522 vm_map_unlock(dst_map); 7523 7524 /* 7525 * XXX If wiring_required, call vm_map_pageable 7526 */ 7527 7528 return(KERN_SUCCESS); 7529} 7530 7531/* 7532 * Routine: vm_map_copyin 7533 * 7534 * Description: 7535 * see vm_map_copyin_common. Exported via Unsupported.exports. 7536 * 7537 */ 7538 7539#undef vm_map_copyin 7540 7541kern_return_t 7542vm_map_copyin( 7543 vm_map_t src_map, 7544 vm_map_address_t src_addr, 7545 vm_map_size_t len, 7546 boolean_t src_destroy, 7547 vm_map_copy_t *copy_result) /* OUT */ 7548{ 7549 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy, 7550 FALSE, copy_result, FALSE)); 7551} 7552 7553/* 7554 * Routine: vm_map_copyin_common 7555 * 7556 * Description: 7557 * Copy the specified region (src_addr, len) from the 7558 * source address space (src_map), possibly removing 7559 * the region from the source address space (src_destroy). 7560 * 7561 * Returns: 7562 * A vm_map_copy_t object (copy_result), suitable for 7563 * insertion into another address space (using vm_map_copyout), 7564 * copying over another address space region (using 7565 * vm_map_copy_overwrite). If the copy is unused, it 7566 * should be destroyed (using vm_map_copy_discard). 7567 * 7568 * In/out conditions: 7569 * The source map should not be locked on entry. 7570 */ 7571 7572typedef struct submap_map { 7573 vm_map_t parent_map; 7574 vm_map_offset_t base_start; 7575 vm_map_offset_t base_end; 7576 vm_map_size_t base_len; 7577 struct submap_map *next; 7578} submap_map_t; 7579 7580kern_return_t 7581vm_map_copyin_common( 7582 vm_map_t src_map, 7583 vm_map_address_t src_addr, 7584 vm_map_size_t len, 7585 boolean_t src_destroy, 7586 __unused boolean_t src_volatile, 7587 vm_map_copy_t *copy_result, /* OUT */ 7588 boolean_t use_maxprot) 7589{ 7590 vm_map_entry_t tmp_entry; /* Result of last map lookup -- 7591 * in multi-level lookup, this 7592 * entry contains the actual 7593 * vm_object/offset. 7594 */ 7595 register 7596 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */ 7597 7598 vm_map_offset_t src_start; /* Start of current entry -- 7599 * where copy is taking place now 7600 */ 7601 vm_map_offset_t src_end; /* End of entire region to be 7602 * copied */ 7603 vm_map_offset_t src_base; 7604 vm_map_t base_map = src_map; 7605 boolean_t map_share=FALSE; 7606 submap_map_t *parent_maps = NULL; 7607 7608 register 7609 vm_map_copy_t copy; /* Resulting copy */ 7610 vm_map_address_t copy_addr; 7611 7612 /* 7613 * Check for copies of zero bytes. 7614 */ 7615 7616 if (len == 0) { 7617 *copy_result = VM_MAP_COPY_NULL; 7618 return(KERN_SUCCESS); 7619 } 7620 7621 /* 7622 * Check that the end address doesn't overflow 7623 */ 7624 src_end = src_addr + len; 7625 if (src_end < src_addr) 7626 return KERN_INVALID_ADDRESS; 7627 7628 /* 7629 * If the copy is sufficiently small, use a kernel buffer instead 7630 * of making a virtual copy. The theory being that the cost of 7631 * setting up VM (and taking C-O-W faults) dominates the copy costs 7632 * for small regions. 7633 */ 7634 if ((len < msg_ool_size_small) && !use_maxprot) 7635 return vm_map_copyin_kernel_buffer(src_map, src_addr, len, 7636 src_destroy, copy_result); 7637 7638 /* 7639 * Compute (page aligned) start and end of region 7640 */ 7641 src_start = vm_map_trunc_page(src_addr); 7642 src_end = vm_map_round_page(src_end); 7643 7644 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", src_map, src_addr, len, src_destroy, 0); 7645 7646 /* 7647 * Allocate a header element for the list. 7648 * 7649 * Use the start and end in the header to 7650 * remember the endpoints prior to rounding. 7651 */ 7652 7653 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 7654 vm_map_copy_first_entry(copy) = 7655 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); 7656 copy->type = VM_MAP_COPY_ENTRY_LIST; 7657 copy->cpy_hdr.nentries = 0; 7658 copy->cpy_hdr.entries_pageable = TRUE; 7659 7660 vm_map_store_init( &(copy->cpy_hdr) ); 7661 7662 copy->offset = src_addr; 7663 copy->size = len; 7664 7665 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); 7666 7667#define RETURN(x) \ 7668 MACRO_BEGIN \ 7669 vm_map_unlock(src_map); \ 7670 if(src_map != base_map) \ 7671 vm_map_deallocate(src_map); \ 7672 if (new_entry != VM_MAP_ENTRY_NULL) \ 7673 vm_map_copy_entry_dispose(copy,new_entry); \ 7674 vm_map_copy_discard(copy); \ 7675 { \ 7676 submap_map_t *_ptr; \ 7677 \ 7678 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \ 7679 parent_maps=parent_maps->next; \ 7680 if (_ptr->parent_map != base_map) \ 7681 vm_map_deallocate(_ptr->parent_map); \ 7682 kfree(_ptr, sizeof(submap_map_t)); \ 7683 } \ 7684 } \ 7685 MACRO_RETURN(x); \ 7686 MACRO_END 7687 7688 /* 7689 * Find the beginning of the region. 7690 */ 7691 7692 vm_map_lock(src_map); 7693 7694 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) 7695 RETURN(KERN_INVALID_ADDRESS); 7696 if(!tmp_entry->is_sub_map) { 7697 vm_map_clip_start(src_map, tmp_entry, src_start); 7698 } 7699 /* set for later submap fix-up */ 7700 copy_addr = src_start; 7701 7702 /* 7703 * Go through entries until we get to the end. 7704 */ 7705 7706 while (TRUE) { 7707 register 7708 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */ 7709 vm_map_size_t src_size; /* Size of source 7710 * map entry (in both 7711 * maps) 7712 */ 7713 7714 register 7715 vm_object_t src_object; /* Object to copy */ 7716 vm_object_offset_t src_offset; 7717 7718 boolean_t src_needs_copy; /* Should source map 7719 * be made read-only 7720 * for copy-on-write? 7721 */ 7722 7723 boolean_t new_entry_needs_copy; /* Will new entry be COW? */ 7724 7725 boolean_t was_wired; /* Was source wired? */ 7726 vm_map_version_t version; /* Version before locks 7727 * dropped to make copy 7728 */ 7729 kern_return_t result; /* Return value from 7730 * copy_strategically. 7731 */ 7732 while(tmp_entry->is_sub_map) { 7733 vm_map_size_t submap_len; 7734 submap_map_t *ptr; 7735 7736 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t)); 7737 ptr->next = parent_maps; 7738 parent_maps = ptr; 7739 ptr->parent_map = src_map; 7740 ptr->base_start = src_start; 7741 ptr->base_end = src_end; 7742 submap_len = tmp_entry->vme_end - src_start; 7743 if(submap_len > (src_end-src_start)) 7744 submap_len = src_end-src_start; 7745 ptr->base_len = submap_len; 7746 7747 src_start -= tmp_entry->vme_start; 7748 src_start += tmp_entry->offset; 7749 src_end = src_start + submap_len; 7750 src_map = tmp_entry->object.sub_map; 7751 vm_map_lock(src_map); 7752 /* keep an outstanding reference for all maps in */ 7753 /* the parents tree except the base map */ 7754 vm_map_reference(src_map); 7755 vm_map_unlock(ptr->parent_map); 7756 if (!vm_map_lookup_entry( 7757 src_map, src_start, &tmp_entry)) 7758 RETURN(KERN_INVALID_ADDRESS); 7759 map_share = TRUE; 7760 if(!tmp_entry->is_sub_map) 7761 vm_map_clip_start(src_map, tmp_entry, src_start); 7762 src_entry = tmp_entry; 7763 } 7764 /* we are now in the lowest level submap... */ 7765 7766 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) && 7767 (tmp_entry->object.vm_object->phys_contiguous)) { 7768 /* This is not, supported for now.In future */ 7769 /* we will need to detect the phys_contig */ 7770 /* condition and then upgrade copy_slowly */ 7771 /* to do physical copy from the device mem */ 7772 /* based object. We can piggy-back off of */ 7773 /* the was wired boolean to set-up the */ 7774 /* proper handling */ 7775 RETURN(KERN_PROTECTION_FAILURE); 7776 } 7777 /* 7778 * Create a new address map entry to hold the result. 7779 * Fill in the fields from the appropriate source entries. 7780 * We must unlock the source map to do this if we need 7781 * to allocate a map entry. 7782 */ 7783 if (new_entry == VM_MAP_ENTRY_NULL) { 7784 version.main_timestamp = src_map->timestamp; 7785 vm_map_unlock(src_map); 7786 7787 new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable); 7788 7789 vm_map_lock(src_map); 7790 if ((version.main_timestamp + 1) != src_map->timestamp) { 7791 if (!vm_map_lookup_entry(src_map, src_start, 7792 &tmp_entry)) { 7793 RETURN(KERN_INVALID_ADDRESS); 7794 } 7795 if (!tmp_entry->is_sub_map) 7796 vm_map_clip_start(src_map, tmp_entry, src_start); 7797 continue; /* restart w/ new tmp_entry */ 7798 } 7799 } 7800 7801 /* 7802 * Verify that the region can be read. 7803 */ 7804 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE && 7805 !use_maxprot) || 7806 (src_entry->max_protection & VM_PROT_READ) == 0) 7807 RETURN(KERN_PROTECTION_FAILURE); 7808 7809 /* 7810 * Clip against the endpoints of the entire region. 7811 */ 7812 7813 vm_map_clip_end(src_map, src_entry, src_end); 7814 7815 src_size = src_entry->vme_end - src_start; 7816 src_object = src_entry->object.vm_object; 7817 src_offset = src_entry->offset; 7818 was_wired = (src_entry->wired_count != 0); 7819 7820 vm_map_entry_copy(new_entry, src_entry); 7821 new_entry->use_pmap = FALSE; /* clr address space specifics */ 7822 7823 /* 7824 * Attempt non-blocking copy-on-write optimizations. 7825 */ 7826 7827 if (src_destroy && 7828 (src_object == VM_OBJECT_NULL || 7829 (src_object->internal && !src_object->true_share 7830 && !map_share))) { 7831 /* 7832 * If we are destroying the source, and the object 7833 * is internal, we can move the object reference 7834 * from the source to the copy. The copy is 7835 * copy-on-write only if the source is. 7836 * We make another reference to the object, because 7837 * destroying the source entry will deallocate it. 7838 */ 7839 vm_object_reference(src_object); 7840 7841 /* 7842 * Copy is always unwired. vm_map_copy_entry 7843 * set its wired count to zero. 7844 */ 7845 7846 goto CopySuccessful; 7847 } 7848 7849 7850 RestartCopy: 7851 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n", 7852 src_object, new_entry, new_entry->object.vm_object, 7853 was_wired, 0); 7854 if ((src_object == VM_OBJECT_NULL || 7855 (!was_wired && !map_share && !tmp_entry->is_shared)) && 7856 vm_object_copy_quickly( 7857 &new_entry->object.vm_object, 7858 src_offset, 7859 src_size, 7860 &src_needs_copy, 7861 &new_entry_needs_copy)) { 7862 7863 new_entry->needs_copy = new_entry_needs_copy; 7864 7865 /* 7866 * Handle copy-on-write obligations 7867 */ 7868 7869 if (src_needs_copy && !tmp_entry->needs_copy) { 7870 vm_prot_t prot; 7871 7872 prot = src_entry->protection & ~VM_PROT_WRITE; 7873 7874 if (override_nx(src_map, src_entry->alias) && prot) 7875 prot |= VM_PROT_EXECUTE; 7876 7877 vm_object_pmap_protect( 7878 src_object, 7879 src_offset, 7880 src_size, 7881 (src_entry->is_shared ? 7882 PMAP_NULL 7883 : src_map->pmap), 7884 src_entry->vme_start, 7885 prot); 7886 7887 tmp_entry->needs_copy = TRUE; 7888 } 7889 7890 /* 7891 * The map has never been unlocked, so it's safe 7892 * to move to the next entry rather than doing 7893 * another lookup. 7894 */ 7895 7896 goto CopySuccessful; 7897 } 7898 7899 /* 7900 * Take an object reference, so that we may 7901 * release the map lock(s). 7902 */ 7903 7904 assert(src_object != VM_OBJECT_NULL); 7905 vm_object_reference(src_object); 7906 7907 /* 7908 * Record the timestamp for later verification. 7909 * Unlock the map. 7910 */ 7911 7912 version.main_timestamp = src_map->timestamp; 7913 vm_map_unlock(src_map); /* Increments timestamp once! */ 7914 7915 /* 7916 * Perform the copy 7917 */ 7918 7919 if (was_wired) { 7920 CopySlowly: 7921 vm_object_lock(src_object); 7922 result = vm_object_copy_slowly( 7923 src_object, 7924 src_offset, 7925 src_size, 7926 THREAD_UNINT, 7927 &new_entry->object.vm_object); 7928 new_entry->offset = 0; 7929 new_entry->needs_copy = FALSE; 7930 7931 } 7932 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && 7933 (tmp_entry->is_shared || map_share)) { 7934 vm_object_t new_object; 7935 7936 vm_object_lock_shared(src_object); 7937 new_object = vm_object_copy_delayed( 7938 src_object, 7939 src_offset, 7940 src_size, 7941 TRUE); 7942 if (new_object == VM_OBJECT_NULL) 7943 goto CopySlowly; 7944 7945 new_entry->object.vm_object = new_object; 7946 new_entry->needs_copy = TRUE; 7947 result = KERN_SUCCESS; 7948 7949 } else { 7950 result = vm_object_copy_strategically(src_object, 7951 src_offset, 7952 src_size, 7953 &new_entry->object.vm_object, 7954 &new_entry->offset, 7955 &new_entry_needs_copy); 7956 7957 new_entry->needs_copy = new_entry_needs_copy; 7958 } 7959 7960 if (result != KERN_SUCCESS && 7961 result != KERN_MEMORY_RESTART_COPY) { 7962 vm_map_lock(src_map); 7963 RETURN(result); 7964 } 7965 7966 /* 7967 * Throw away the extra reference 7968 */ 7969 7970 vm_object_deallocate(src_object); 7971 7972 /* 7973 * Verify that the map has not substantially 7974 * changed while the copy was being made. 7975 */ 7976 7977 vm_map_lock(src_map); 7978 7979 if ((version.main_timestamp + 1) == src_map->timestamp) 7980 goto VerificationSuccessful; 7981 7982 /* 7983 * Simple version comparison failed. 7984 * 7985 * Retry the lookup and verify that the 7986 * same object/offset are still present. 7987 * 7988 * [Note: a memory manager that colludes with 7989 * the calling task can detect that we have 7990 * cheated. While the map was unlocked, the 7991 * mapping could have been changed and restored.] 7992 */ 7993 7994 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) { 7995 RETURN(KERN_INVALID_ADDRESS); 7996 } 7997 7998 src_entry = tmp_entry; 7999 vm_map_clip_start(src_map, src_entry, src_start); 8000 8001 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) && 8002 !use_maxprot) || 8003 ((src_entry->max_protection & VM_PROT_READ) == 0)) 8004 goto VerificationFailed; 8005 8006 if (src_entry->vme_end < new_entry->vme_end) 8007 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start; 8008 8009 if ((src_entry->object.vm_object != src_object) || 8010 (src_entry->offset != src_offset) ) { 8011 8012 /* 8013 * Verification failed. 8014 * 8015 * Start over with this top-level entry. 8016 */ 8017 8018 VerificationFailed: ; 8019 8020 vm_object_deallocate(new_entry->object.vm_object); 8021 tmp_entry = src_entry; 8022 continue; 8023 } 8024 8025 /* 8026 * Verification succeeded. 8027 */ 8028 8029 VerificationSuccessful: ; 8030 8031 if (result == KERN_MEMORY_RESTART_COPY) 8032 goto RestartCopy; 8033 8034 /* 8035 * Copy succeeded. 8036 */ 8037 8038 CopySuccessful: ; 8039 8040 /* 8041 * Link in the new copy entry. 8042 */ 8043 8044 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), 8045 new_entry); 8046 8047 /* 8048 * Determine whether the entire region 8049 * has been copied. 8050 */ 8051 src_base = src_start; 8052 src_start = new_entry->vme_end; 8053 new_entry = VM_MAP_ENTRY_NULL; 8054 while ((src_start >= src_end) && (src_end != 0)) { 8055 if (src_map != base_map) { 8056 submap_map_t *ptr; 8057 8058 ptr = parent_maps; 8059 assert(ptr != NULL); 8060 parent_maps = parent_maps->next; 8061 8062 /* fix up the damage we did in that submap */ 8063 vm_map_simplify_range(src_map, 8064 src_base, 8065 src_end); 8066 8067 vm_map_unlock(src_map); 8068 vm_map_deallocate(src_map); 8069 vm_map_lock(ptr->parent_map); 8070 src_map = ptr->parent_map; 8071 src_base = ptr->base_start; 8072 src_start = ptr->base_start + ptr->base_len; 8073 src_end = ptr->base_end; 8074 if ((src_end > src_start) && 8075 !vm_map_lookup_entry( 8076 src_map, src_start, &tmp_entry)) 8077 RETURN(KERN_INVALID_ADDRESS); 8078 kfree(ptr, sizeof(submap_map_t)); 8079 if(parent_maps == NULL) 8080 map_share = FALSE; 8081 src_entry = tmp_entry->vme_prev; 8082 } else 8083 break; 8084 } 8085 if ((src_start >= src_end) && (src_end != 0)) 8086 break; 8087 8088 /* 8089 * Verify that there are no gaps in the region 8090 */ 8091 8092 tmp_entry = src_entry->vme_next; 8093 if ((tmp_entry->vme_start != src_start) || 8094 (tmp_entry == vm_map_to_entry(src_map))) 8095 RETURN(KERN_INVALID_ADDRESS); 8096 } 8097 8098 /* 8099 * If the source should be destroyed, do it now, since the 8100 * copy was successful. 8101 */ 8102 if (src_destroy) { 8103 (void) vm_map_delete(src_map, 8104 vm_map_trunc_page(src_addr), 8105 src_end, 8106 (src_map == kernel_map) ? 8107 VM_MAP_REMOVE_KUNWIRE : 8108 VM_MAP_NO_FLAGS, 8109 VM_MAP_NULL); 8110 } else { 8111 /* fix up the damage we did in the base map */ 8112 vm_map_simplify_range(src_map, 8113 vm_map_trunc_page(src_addr), 8114 vm_map_round_page(src_end)); 8115 } 8116 8117 vm_map_unlock(src_map); 8118 8119 /* Fix-up start and end points in copy. This is necessary */ 8120 /* when the various entries in the copy object were picked */ 8121 /* up from different sub-maps */ 8122 8123 tmp_entry = vm_map_copy_first_entry(copy); 8124 while (tmp_entry != vm_map_copy_to_entry(copy)) { 8125 tmp_entry->vme_end = copy_addr + 8126 (tmp_entry->vme_end - tmp_entry->vme_start); 8127 tmp_entry->vme_start = copy_addr; 8128 assert(tmp_entry->vme_start < tmp_entry->vme_end); 8129 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start; 8130 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next; 8131 } 8132 8133 *copy_result = copy; 8134 return(KERN_SUCCESS); 8135 8136#undef RETURN 8137} 8138 8139/* 8140 * vm_map_copyin_object: 8141 * 8142 * Create a copy object from an object. 8143 * Our caller donates an object reference. 8144 */ 8145 8146kern_return_t 8147vm_map_copyin_object( 8148 vm_object_t object, 8149 vm_object_offset_t offset, /* offset of region in object */ 8150 vm_object_size_t size, /* size of region in object */ 8151 vm_map_copy_t *copy_result) /* OUT */ 8152{ 8153 vm_map_copy_t copy; /* Resulting copy */ 8154 8155 /* 8156 * We drop the object into a special copy object 8157 * that contains the object directly. 8158 */ 8159 8160 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 8161 copy->type = VM_MAP_COPY_OBJECT; 8162 copy->cpy_object = object; 8163 copy->offset = offset; 8164 copy->size = size; 8165 8166 *copy_result = copy; 8167 return(KERN_SUCCESS); 8168} 8169 8170static void 8171vm_map_fork_share( 8172 vm_map_t old_map, 8173 vm_map_entry_t old_entry, 8174 vm_map_t new_map) 8175{ 8176 vm_object_t object; 8177 vm_map_entry_t new_entry; 8178 8179 /* 8180 * New sharing code. New map entry 8181 * references original object. Internal 8182 * objects use asynchronous copy algorithm for 8183 * future copies. First make sure we have 8184 * the right object. If we need a shadow, 8185 * or someone else already has one, then 8186 * make a new shadow and share it. 8187 */ 8188 8189 object = old_entry->object.vm_object; 8190 if (old_entry->is_sub_map) { 8191 assert(old_entry->wired_count == 0); 8192#ifndef NO_NESTED_PMAP 8193 if(old_entry->use_pmap) { 8194 kern_return_t result; 8195 8196 result = pmap_nest(new_map->pmap, 8197 (old_entry->object.sub_map)->pmap, 8198 (addr64_t)old_entry->vme_start, 8199 (addr64_t)old_entry->vme_start, 8200 (uint64_t)(old_entry->vme_end - old_entry->vme_start)); 8201 if(result) 8202 panic("vm_map_fork_share: pmap_nest failed!"); 8203 } 8204#endif /* NO_NESTED_PMAP */ 8205 } else if (object == VM_OBJECT_NULL) { 8206 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end - 8207 old_entry->vme_start)); 8208 old_entry->offset = 0; 8209 old_entry->object.vm_object = object; 8210 assert(!old_entry->needs_copy); 8211 } else if (object->copy_strategy != 8212 MEMORY_OBJECT_COPY_SYMMETRIC) { 8213 8214 /* 8215 * We are already using an asymmetric 8216 * copy, and therefore we already have 8217 * the right object. 8218 */ 8219 8220 assert(! old_entry->needs_copy); 8221 } 8222 else if (old_entry->needs_copy || /* case 1 */ 8223 object->shadowed || /* case 2 */ 8224 (!object->true_share && /* case 3 */ 8225 !old_entry->is_shared && 8226 (object->vo_size > 8227 (vm_map_size_t)(old_entry->vme_end - 8228 old_entry->vme_start)))) { 8229 8230 /* 8231 * We need to create a shadow. 8232 * There are three cases here. 8233 * In the first case, we need to 8234 * complete a deferred symmetrical 8235 * copy that we participated in. 8236 * In the second and third cases, 8237 * we need to create the shadow so 8238 * that changes that we make to the 8239 * object do not interfere with 8240 * any symmetrical copies which 8241 * have occured (case 2) or which 8242 * might occur (case 3). 8243 * 8244 * The first case is when we had 8245 * deferred shadow object creation 8246 * via the entry->needs_copy mechanism. 8247 * This mechanism only works when 8248 * only one entry points to the source 8249 * object, and we are about to create 8250 * a second entry pointing to the 8251 * same object. The problem is that 8252 * there is no way of mapping from 8253 * an object to the entries pointing 8254 * to it. (Deferred shadow creation 8255 * works with one entry because occurs 8256 * at fault time, and we walk from the 8257 * entry to the object when handling 8258 * the fault.) 8259 * 8260 * The second case is when the object 8261 * to be shared has already been copied 8262 * with a symmetric copy, but we point 8263 * directly to the object without 8264 * needs_copy set in our entry. (This 8265 * can happen because different ranges 8266 * of an object can be pointed to by 8267 * different entries. In particular, 8268 * a single entry pointing to an object 8269 * can be split by a call to vm_inherit, 8270 * which, combined with task_create, can 8271 * result in the different entries 8272 * having different needs_copy values.) 8273 * The shadowed flag in the object allows 8274 * us to detect this case. The problem 8275 * with this case is that if this object 8276 * has or will have shadows, then we 8277 * must not perform an asymmetric copy 8278 * of this object, since such a copy 8279 * allows the object to be changed, which 8280 * will break the previous symmetrical 8281 * copies (which rely upon the object 8282 * not changing). In a sense, the shadowed 8283 * flag says "don't change this object". 8284 * We fix this by creating a shadow 8285 * object for this object, and sharing 8286 * that. This works because we are free 8287 * to change the shadow object (and thus 8288 * to use an asymmetric copy strategy); 8289 * this is also semantically correct, 8290 * since this object is temporary, and 8291 * therefore a copy of the object is 8292 * as good as the object itself. (This 8293 * is not true for permanent objects, 8294 * since the pager needs to see changes, 8295 * which won't happen if the changes 8296 * are made to a copy.) 8297 * 8298 * The third case is when the object 8299 * to be shared has parts sticking 8300 * outside of the entry we're working 8301 * with, and thus may in the future 8302 * be subject to a symmetrical copy. 8303 * (This is a preemptive version of 8304 * case 2.) 8305 */ 8306 vm_object_shadow(&old_entry->object.vm_object, 8307 &old_entry->offset, 8308 (vm_map_size_t) (old_entry->vme_end - 8309 old_entry->vme_start)); 8310 8311 /* 8312 * If we're making a shadow for other than 8313 * copy on write reasons, then we have 8314 * to remove write permission. 8315 */ 8316 8317 if (!old_entry->needs_copy && 8318 (old_entry->protection & VM_PROT_WRITE)) { 8319 vm_prot_t prot; 8320 8321 prot = old_entry->protection & ~VM_PROT_WRITE; 8322 8323 if (override_nx(old_map, old_entry->alias) && prot) 8324 prot |= VM_PROT_EXECUTE; 8325 8326 if (old_map->mapped_in_other_pmaps) { 8327 vm_object_pmap_protect( 8328 old_entry->object.vm_object, 8329 old_entry->offset, 8330 (old_entry->vme_end - 8331 old_entry->vme_start), 8332 PMAP_NULL, 8333 old_entry->vme_start, 8334 prot); 8335 } else { 8336 pmap_protect(old_map->pmap, 8337 old_entry->vme_start, 8338 old_entry->vme_end, 8339 prot); 8340 } 8341 } 8342 8343 old_entry->needs_copy = FALSE; 8344 object = old_entry->object.vm_object; 8345 } 8346 8347 8348 /* 8349 * If object was using a symmetric copy strategy, 8350 * change its copy strategy to the default 8351 * asymmetric copy strategy, which is copy_delay 8352 * in the non-norma case and copy_call in the 8353 * norma case. Bump the reference count for the 8354 * new entry. 8355 */ 8356 8357 if(old_entry->is_sub_map) { 8358 vm_map_lock(old_entry->object.sub_map); 8359 vm_map_reference(old_entry->object.sub_map); 8360 vm_map_unlock(old_entry->object.sub_map); 8361 } else { 8362 vm_object_lock(object); 8363 vm_object_reference_locked(object); 8364 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) { 8365 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; 8366 } 8367 vm_object_unlock(object); 8368 } 8369 8370 /* 8371 * Clone the entry, using object ref from above. 8372 * Mark both entries as shared. 8373 */ 8374 8375 new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel 8376 * map or descendants */ 8377 vm_map_entry_copy(new_entry, old_entry); 8378 old_entry->is_shared = TRUE; 8379 new_entry->is_shared = TRUE; 8380 8381 /* 8382 * Insert the entry into the new map -- we 8383 * know we're inserting at the end of the new 8384 * map. 8385 */ 8386 8387 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry); 8388 8389 /* 8390 * Update the physical map 8391 */ 8392 8393 if (old_entry->is_sub_map) { 8394 /* Bill Angell pmap support goes here */ 8395 } else { 8396 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start, 8397 old_entry->vme_end - old_entry->vme_start, 8398 old_entry->vme_start); 8399 } 8400} 8401 8402static boolean_t 8403vm_map_fork_copy( 8404 vm_map_t old_map, 8405 vm_map_entry_t *old_entry_p, 8406 vm_map_t new_map) 8407{ 8408 vm_map_entry_t old_entry = *old_entry_p; 8409 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start; 8410 vm_map_offset_t start = old_entry->vme_start; 8411 vm_map_copy_t copy; 8412 vm_map_entry_t last = vm_map_last_entry(new_map); 8413 8414 vm_map_unlock(old_map); 8415 /* 8416 * Use maxprot version of copyin because we 8417 * care about whether this memory can ever 8418 * be accessed, not just whether it's accessible 8419 * right now. 8420 */ 8421 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, ©) 8422 != KERN_SUCCESS) { 8423 /* 8424 * The map might have changed while it 8425 * was unlocked, check it again. Skip 8426 * any blank space or permanently 8427 * unreadable region. 8428 */ 8429 vm_map_lock(old_map); 8430 if (!vm_map_lookup_entry(old_map, start, &last) || 8431 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) { 8432 last = last->vme_next; 8433 } 8434 *old_entry_p = last; 8435 8436 /* 8437 * XXX For some error returns, want to 8438 * XXX skip to the next element. Note 8439 * that INVALID_ADDRESS and 8440 * PROTECTION_FAILURE are handled above. 8441 */ 8442 8443 return FALSE; 8444 } 8445 8446 /* 8447 * Insert the copy into the new map 8448 */ 8449 8450 vm_map_copy_insert(new_map, last, copy); 8451 8452 /* 8453 * Pick up the traversal at the end of 8454 * the copied region. 8455 */ 8456 8457 vm_map_lock(old_map); 8458 start += entry_size; 8459 if (! vm_map_lookup_entry(old_map, start, &last)) { 8460 last = last->vme_next; 8461 } else { 8462 if (last->vme_start == start) { 8463 /* 8464 * No need to clip here and we don't 8465 * want to cause any unnecessary 8466 * unnesting... 8467 */ 8468 } else { 8469 vm_map_clip_start(old_map, last, start); 8470 } 8471 } 8472 *old_entry_p = last; 8473 8474 return TRUE; 8475} 8476 8477/* 8478 * vm_map_fork: 8479 * 8480 * Create and return a new map based on the old 8481 * map, according to the inheritance values on the 8482 * regions in that map. 8483 * 8484 * The source map must not be locked. 8485 */ 8486vm_map_t 8487vm_map_fork( 8488 ledger_t ledger, 8489 vm_map_t old_map) 8490{ 8491 pmap_t new_pmap; 8492 vm_map_t new_map; 8493 vm_map_entry_t old_entry; 8494 vm_map_size_t new_size = 0, entry_size; 8495 vm_map_entry_t new_entry; 8496 boolean_t src_needs_copy; 8497 boolean_t new_entry_needs_copy; 8498 8499 new_pmap = pmap_create(ledger, (vm_map_size_t) 0, 8500#if defined(__i386__) || defined(__x86_64__) 8501 old_map->pmap->pm_task_map != TASK_MAP_32BIT 8502#elif defined(__arm__) 8503 0 8504#else 8505#error Unknown architecture. 8506#endif 8507 ); 8508#if defined(__i386__) 8509 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED) 8510 pmap_set_4GB_pagezero(new_pmap); 8511#endif 8512 8513 vm_map_reference_swap(old_map); 8514 vm_map_lock(old_map); 8515 8516 new_map = vm_map_create(new_pmap, 8517 old_map->min_offset, 8518 old_map->max_offset, 8519 old_map->hdr.entries_pageable); 8520 for ( 8521 old_entry = vm_map_first_entry(old_map); 8522 old_entry != vm_map_to_entry(old_map); 8523 ) { 8524 8525 entry_size = old_entry->vme_end - old_entry->vme_start; 8526 8527 switch (old_entry->inheritance) { 8528 case VM_INHERIT_NONE: 8529 break; 8530 8531 case VM_INHERIT_SHARE: 8532 vm_map_fork_share(old_map, old_entry, new_map); 8533 new_size += entry_size; 8534 break; 8535 8536 case VM_INHERIT_COPY: 8537 8538 /* 8539 * Inline the copy_quickly case; 8540 * upon failure, fall back on call 8541 * to vm_map_fork_copy. 8542 */ 8543 8544 if(old_entry->is_sub_map) 8545 break; 8546 if ((old_entry->wired_count != 0) || 8547 ((old_entry->object.vm_object != NULL) && 8548 (old_entry->object.vm_object->true_share))) { 8549 goto slow_vm_map_fork_copy; 8550 } 8551 8552 new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */ 8553 vm_map_entry_copy(new_entry, old_entry); 8554 /* clear address space specifics */ 8555 new_entry->use_pmap = FALSE; 8556 8557 if (! vm_object_copy_quickly( 8558 &new_entry->object.vm_object, 8559 old_entry->offset, 8560 (old_entry->vme_end - 8561 old_entry->vme_start), 8562 &src_needs_copy, 8563 &new_entry_needs_copy)) { 8564 vm_map_entry_dispose(new_map, new_entry); 8565 goto slow_vm_map_fork_copy; 8566 } 8567 8568 /* 8569 * Handle copy-on-write obligations 8570 */ 8571 8572 if (src_needs_copy && !old_entry->needs_copy) { 8573 vm_prot_t prot; 8574 8575 prot = old_entry->protection & ~VM_PROT_WRITE; 8576 8577 if (override_nx(old_map, old_entry->alias) && prot) 8578 prot |= VM_PROT_EXECUTE; 8579 8580 vm_object_pmap_protect( 8581 old_entry->object.vm_object, 8582 old_entry->offset, 8583 (old_entry->vme_end - 8584 old_entry->vme_start), 8585 ((old_entry->is_shared 8586 || old_map->mapped_in_other_pmaps) 8587 ? PMAP_NULL : 8588 old_map->pmap), 8589 old_entry->vme_start, 8590 prot); 8591 8592 old_entry->needs_copy = TRUE; 8593 } 8594 new_entry->needs_copy = new_entry_needs_copy; 8595 8596 /* 8597 * Insert the entry at the end 8598 * of the map. 8599 */ 8600 8601 vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), 8602 new_entry); 8603 new_size += entry_size; 8604 break; 8605 8606 slow_vm_map_fork_copy: 8607 if (vm_map_fork_copy(old_map, &old_entry, new_map)) { 8608 new_size += entry_size; 8609 } 8610 continue; 8611 } 8612 old_entry = old_entry->vme_next; 8613 } 8614 8615 new_map->size = new_size; 8616 vm_map_unlock(old_map); 8617 vm_map_deallocate(old_map); 8618 8619 return(new_map); 8620} 8621 8622/* 8623 * vm_map_exec: 8624 * 8625 * Setup the "new_map" with the proper execution environment according 8626 * to the type of executable (platform, 64bit, chroot environment). 8627 * Map the comm page and shared region, etc... 8628 */ 8629kern_return_t 8630vm_map_exec( 8631 vm_map_t new_map, 8632 task_t task, 8633 void *fsroot, 8634 cpu_type_t cpu) 8635{ 8636 SHARED_REGION_TRACE_DEBUG( 8637 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n", 8638 current_task(), new_map, task, fsroot, cpu)); 8639 (void) vm_commpage_enter(new_map, task); 8640 (void) vm_shared_region_enter(new_map, task, fsroot, cpu); 8641 SHARED_REGION_TRACE_DEBUG( 8642 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n", 8643 current_task(), new_map, task, fsroot, cpu)); 8644 return KERN_SUCCESS; 8645} 8646 8647/* 8648 * vm_map_lookup_locked: 8649 * 8650 * Finds the VM object, offset, and 8651 * protection for a given virtual address in the 8652 * specified map, assuming a page fault of the 8653 * type specified. 8654 * 8655 * Returns the (object, offset, protection) for 8656 * this address, whether it is wired down, and whether 8657 * this map has the only reference to the data in question. 8658 * In order to later verify this lookup, a "version" 8659 * is returned. 8660 * 8661 * The map MUST be locked by the caller and WILL be 8662 * locked on exit. In order to guarantee the 8663 * existence of the returned object, it is returned 8664 * locked. 8665 * 8666 * If a lookup is requested with "write protection" 8667 * specified, the map may be changed to perform virtual 8668 * copying operations, although the data referenced will 8669 * remain the same. 8670 */ 8671kern_return_t 8672vm_map_lookup_locked( 8673 vm_map_t *var_map, /* IN/OUT */ 8674 vm_map_offset_t vaddr, 8675 vm_prot_t fault_type, 8676 int object_lock_type, 8677 vm_map_version_t *out_version, /* OUT */ 8678 vm_object_t *object, /* OUT */ 8679 vm_object_offset_t *offset, /* OUT */ 8680 vm_prot_t *out_prot, /* OUT */ 8681 boolean_t *wired, /* OUT */ 8682 vm_object_fault_info_t fault_info, /* OUT */ 8683 vm_map_t *real_map) 8684{ 8685 vm_map_entry_t entry; 8686 register vm_map_t map = *var_map; 8687 vm_map_t old_map = *var_map; 8688 vm_map_t cow_sub_map_parent = VM_MAP_NULL; 8689 vm_map_offset_t cow_parent_vaddr = 0; 8690 vm_map_offset_t old_start = 0; 8691 vm_map_offset_t old_end = 0; 8692 register vm_prot_t prot; 8693 boolean_t mask_protections; 8694 vm_prot_t original_fault_type; 8695 8696 /* 8697 * VM_PROT_MASK means that the caller wants us to use "fault_type" 8698 * as a mask against the mapping's actual protections, not as an 8699 * absolute value. 8700 */ 8701 mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE; 8702 fault_type &= ~VM_PROT_IS_MASK; 8703 original_fault_type = fault_type; 8704 8705 *real_map = map; 8706 8707RetryLookup: 8708 fault_type = original_fault_type; 8709 8710 /* 8711 * If the map has an interesting hint, try it before calling 8712 * full blown lookup routine. 8713 */ 8714 entry = map->hint; 8715 8716 if ((entry == vm_map_to_entry(map)) || 8717 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) { 8718 vm_map_entry_t tmp_entry; 8719 8720 /* 8721 * Entry was either not a valid hint, or the vaddr 8722 * was not contained in the entry, so do a full lookup. 8723 */ 8724 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) { 8725 if((cow_sub_map_parent) && (cow_sub_map_parent != map)) 8726 vm_map_unlock(cow_sub_map_parent); 8727 if((*real_map != map) 8728 && (*real_map != cow_sub_map_parent)) 8729 vm_map_unlock(*real_map); 8730 return KERN_INVALID_ADDRESS; 8731 } 8732 8733 entry = tmp_entry; 8734 } 8735 if(map == old_map) { 8736 old_start = entry->vme_start; 8737 old_end = entry->vme_end; 8738 } 8739 8740 /* 8741 * Handle submaps. Drop lock on upper map, submap is 8742 * returned locked. 8743 */ 8744 8745submap_recurse: 8746 if (entry->is_sub_map) { 8747 vm_map_offset_t local_vaddr; 8748 vm_map_offset_t end_delta; 8749 vm_map_offset_t start_delta; 8750 vm_map_entry_t submap_entry; 8751 boolean_t mapped_needs_copy=FALSE; 8752 8753 local_vaddr = vaddr; 8754 8755 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) { 8756 /* if real_map equals map we unlock below */ 8757 if ((*real_map != map) && 8758 (*real_map != cow_sub_map_parent)) 8759 vm_map_unlock(*real_map); 8760 *real_map = entry->object.sub_map; 8761 } 8762 8763 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) { 8764 if (!mapped_needs_copy) { 8765 if (vm_map_lock_read_to_write(map)) { 8766 vm_map_lock_read(map); 8767 *real_map = map; 8768 goto RetryLookup; 8769 } 8770 vm_map_lock_read(entry->object.sub_map); 8771 *var_map = entry->object.sub_map; 8772 cow_sub_map_parent = map; 8773 /* reset base to map before cow object */ 8774 /* this is the map which will accept */ 8775 /* the new cow object */ 8776 old_start = entry->vme_start; 8777 old_end = entry->vme_end; 8778 cow_parent_vaddr = vaddr; 8779 mapped_needs_copy = TRUE; 8780 } else { 8781 vm_map_lock_read(entry->object.sub_map); 8782 *var_map = entry->object.sub_map; 8783 if((cow_sub_map_parent != map) && 8784 (*real_map != map)) 8785 vm_map_unlock(map); 8786 } 8787 } else { 8788 vm_map_lock_read(entry->object.sub_map); 8789 *var_map = entry->object.sub_map; 8790 /* leave map locked if it is a target */ 8791 /* cow sub_map above otherwise, just */ 8792 /* follow the maps down to the object */ 8793 /* here we unlock knowing we are not */ 8794 /* revisiting the map. */ 8795 if((*real_map != map) && (map != cow_sub_map_parent)) 8796 vm_map_unlock_read(map); 8797 } 8798 8799 map = *var_map; 8800 8801 /* calculate the offset in the submap for vaddr */ 8802 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset; 8803 8804 RetrySubMap: 8805 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) { 8806 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){ 8807 vm_map_unlock(cow_sub_map_parent); 8808 } 8809 if((*real_map != map) 8810 && (*real_map != cow_sub_map_parent)) { 8811 vm_map_unlock(*real_map); 8812 } 8813 *real_map = map; 8814 return KERN_INVALID_ADDRESS; 8815 } 8816 8817 /* find the attenuated shadow of the underlying object */ 8818 /* on our target map */ 8819 8820 /* in english the submap object may extend beyond the */ 8821 /* region mapped by the entry or, may only fill a portion */ 8822 /* of it. For our purposes, we only care if the object */ 8823 /* doesn't fill. In this case the area which will */ 8824 /* ultimately be clipped in the top map will only need */ 8825 /* to be as big as the portion of the underlying entry */ 8826 /* which is mapped */ 8827 start_delta = submap_entry->vme_start > entry->offset ? 8828 submap_entry->vme_start - entry->offset : 0; 8829 8830 end_delta = 8831 (entry->offset + start_delta + (old_end - old_start)) <= 8832 submap_entry->vme_end ? 8833 0 : (entry->offset + 8834 (old_end - old_start)) 8835 - submap_entry->vme_end; 8836 8837 old_start += start_delta; 8838 old_end -= end_delta; 8839 8840 if(submap_entry->is_sub_map) { 8841 entry = submap_entry; 8842 vaddr = local_vaddr; 8843 goto submap_recurse; 8844 } 8845 8846 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) { 8847 8848 vm_object_t sub_object, copy_object; 8849 vm_object_offset_t copy_offset; 8850 vm_map_offset_t local_start; 8851 vm_map_offset_t local_end; 8852 boolean_t copied_slowly = FALSE; 8853 8854 if (vm_map_lock_read_to_write(map)) { 8855 vm_map_lock_read(map); 8856 old_start -= start_delta; 8857 old_end += end_delta; 8858 goto RetrySubMap; 8859 } 8860 8861 8862 sub_object = submap_entry->object.vm_object; 8863 if (sub_object == VM_OBJECT_NULL) { 8864 sub_object = 8865 vm_object_allocate( 8866 (vm_map_size_t) 8867 (submap_entry->vme_end - 8868 submap_entry->vme_start)); 8869 submap_entry->object.vm_object = sub_object; 8870 submap_entry->offset = 0; 8871 } 8872 local_start = local_vaddr - 8873 (cow_parent_vaddr - old_start); 8874 local_end = local_vaddr + 8875 (old_end - cow_parent_vaddr); 8876 vm_map_clip_start(map, submap_entry, local_start); 8877 vm_map_clip_end(map, submap_entry, local_end); 8878 /* unnesting was done in vm_map_clip_start/end() */ 8879 assert(!submap_entry->use_pmap); 8880 8881 /* This is the COW case, lets connect */ 8882 /* an entry in our space to the underlying */ 8883 /* object in the submap, bypassing the */ 8884 /* submap. */ 8885 8886 8887 if(submap_entry->wired_count != 0 || 8888 (sub_object->copy_strategy == 8889 MEMORY_OBJECT_COPY_NONE)) { 8890 vm_object_lock(sub_object); 8891 vm_object_copy_slowly(sub_object, 8892 submap_entry->offset, 8893 (submap_entry->vme_end - 8894 submap_entry->vme_start), 8895 FALSE, 8896 ©_object); 8897 copied_slowly = TRUE; 8898 } else { 8899 8900 /* set up shadow object */ 8901 copy_object = sub_object; 8902 vm_object_reference(copy_object); 8903 sub_object->shadowed = TRUE; 8904 submap_entry->needs_copy = TRUE; 8905 8906 prot = submap_entry->protection & ~VM_PROT_WRITE; 8907 8908 if (override_nx(old_map, submap_entry->alias) && prot) 8909 prot |= VM_PROT_EXECUTE; 8910 8911 vm_object_pmap_protect( 8912 sub_object, 8913 submap_entry->offset, 8914 submap_entry->vme_end - 8915 submap_entry->vme_start, 8916 (submap_entry->is_shared 8917 || map->mapped_in_other_pmaps) ? 8918 PMAP_NULL : map->pmap, 8919 submap_entry->vme_start, 8920 prot); 8921 } 8922 8923 /* 8924 * Adjust the fault offset to the submap entry. 8925 */ 8926 copy_offset = (local_vaddr - 8927 submap_entry->vme_start + 8928 submap_entry->offset); 8929 8930 /* This works diffently than the */ 8931 /* normal submap case. We go back */ 8932 /* to the parent of the cow map and*/ 8933 /* clip out the target portion of */ 8934 /* the sub_map, substituting the */ 8935 /* new copy object, */ 8936 8937 vm_map_unlock(map); 8938 local_start = old_start; 8939 local_end = old_end; 8940 map = cow_sub_map_parent; 8941 *var_map = cow_sub_map_parent; 8942 vaddr = cow_parent_vaddr; 8943 cow_sub_map_parent = NULL; 8944 8945 if(!vm_map_lookup_entry(map, 8946 vaddr, &entry)) { 8947 vm_object_deallocate( 8948 copy_object); 8949 vm_map_lock_write_to_read(map); 8950 return KERN_INVALID_ADDRESS; 8951 } 8952 8953 /* clip out the portion of space */ 8954 /* mapped by the sub map which */ 8955 /* corresponds to the underlying */ 8956 /* object */ 8957 8958 /* 8959 * Clip (and unnest) the smallest nested chunk 8960 * possible around the faulting address... 8961 */ 8962 local_start = vaddr & ~(pmap_nesting_size_min - 1); 8963 local_end = local_start + pmap_nesting_size_min; 8964 /* 8965 * ... but don't go beyond the "old_start" to "old_end" 8966 * range, to avoid spanning over another VM region 8967 * with a possibly different VM object and/or offset. 8968 */ 8969 if (local_start < old_start) { 8970 local_start = old_start; 8971 } 8972 if (local_end > old_end) { 8973 local_end = old_end; 8974 } 8975 /* 8976 * Adjust copy_offset to the start of the range. 8977 */ 8978 copy_offset -= (vaddr - local_start); 8979 8980 vm_map_clip_start(map, entry, local_start); 8981 vm_map_clip_end(map, entry, local_end); 8982 /* unnesting was done in vm_map_clip_start/end() */ 8983 assert(!entry->use_pmap); 8984 8985 /* substitute copy object for */ 8986 /* shared map entry */ 8987 vm_map_deallocate(entry->object.sub_map); 8988 entry->is_sub_map = FALSE; 8989 entry->object.vm_object = copy_object; 8990 8991 /* propagate the submap entry's protections */ 8992 entry->protection |= submap_entry->protection; 8993 entry->max_protection |= submap_entry->max_protection; 8994 8995 if(copied_slowly) { 8996 entry->offset = local_start - old_start; 8997 entry->needs_copy = FALSE; 8998 entry->is_shared = FALSE; 8999 } else { 9000 entry->offset = copy_offset; 9001 entry->needs_copy = TRUE; 9002 if(entry->inheritance == VM_INHERIT_SHARE) 9003 entry->inheritance = VM_INHERIT_COPY; 9004 if (map != old_map) 9005 entry->is_shared = TRUE; 9006 } 9007 if(entry->inheritance == VM_INHERIT_SHARE) 9008 entry->inheritance = VM_INHERIT_COPY; 9009 9010 vm_map_lock_write_to_read(map); 9011 } else { 9012 if((cow_sub_map_parent) 9013 && (cow_sub_map_parent != *real_map) 9014 && (cow_sub_map_parent != map)) { 9015 vm_map_unlock(cow_sub_map_parent); 9016 } 9017 entry = submap_entry; 9018 vaddr = local_vaddr; 9019 } 9020 } 9021 9022 /* 9023 * Check whether this task is allowed to have 9024 * this page. 9025 */ 9026 9027 prot = entry->protection; 9028 9029 if (override_nx(old_map, entry->alias) && prot) { 9030 /* 9031 * HACK -- if not a stack, then allow execution 9032 */ 9033 prot |= VM_PROT_EXECUTE; 9034 } 9035 9036 if (mask_protections) { 9037 fault_type &= prot; 9038 if (fault_type == VM_PROT_NONE) { 9039 goto protection_failure; 9040 } 9041 } 9042 if ((fault_type & (prot)) != fault_type) { 9043 protection_failure: 9044 if (*real_map != map) { 9045 vm_map_unlock(*real_map); 9046 } 9047 *real_map = map; 9048 9049 if ((fault_type & VM_PROT_EXECUTE) && prot) 9050 log_stack_execution_failure((addr64_t)vaddr, prot); 9051 9052 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL); 9053 return KERN_PROTECTION_FAILURE; 9054 } 9055 9056 /* 9057 * If this page is not pageable, we have to get 9058 * it for all possible accesses. 9059 */ 9060 9061 *wired = (entry->wired_count != 0); 9062 if (*wired) 9063 fault_type = prot; 9064 9065 /* 9066 * If the entry was copy-on-write, we either ... 9067 */ 9068 9069 if (entry->needs_copy) { 9070 /* 9071 * If we want to write the page, we may as well 9072 * handle that now since we've got the map locked. 9073 * 9074 * If we don't need to write the page, we just 9075 * demote the permissions allowed. 9076 */ 9077 9078 if ((fault_type & VM_PROT_WRITE) || *wired) { 9079 /* 9080 * Make a new object, and place it in the 9081 * object chain. Note that no new references 9082 * have appeared -- one just moved from the 9083 * map to the new object. 9084 */ 9085 9086 if (vm_map_lock_read_to_write(map)) { 9087 vm_map_lock_read(map); 9088 goto RetryLookup; 9089 } 9090 vm_object_shadow(&entry->object.vm_object, 9091 &entry->offset, 9092 (vm_map_size_t) (entry->vme_end - 9093 entry->vme_start)); 9094 9095 entry->object.vm_object->shadowed = TRUE; 9096 entry->needs_copy = FALSE; 9097 vm_map_lock_write_to_read(map); 9098 } 9099 else { 9100 /* 9101 * We're attempting to read a copy-on-write 9102 * page -- don't allow writes. 9103 */ 9104 9105 prot &= (~VM_PROT_WRITE); 9106 } 9107 } 9108 9109 /* 9110 * Create an object if necessary. 9111 */ 9112 if (entry->object.vm_object == VM_OBJECT_NULL) { 9113 9114 if (vm_map_lock_read_to_write(map)) { 9115 vm_map_lock_read(map); 9116 goto RetryLookup; 9117 } 9118 9119 entry->object.vm_object = vm_object_allocate( 9120 (vm_map_size_t)(entry->vme_end - entry->vme_start)); 9121 entry->offset = 0; 9122 vm_map_lock_write_to_read(map); 9123 } 9124 9125 /* 9126 * Return the object/offset from this entry. If the entry 9127 * was copy-on-write or empty, it has been fixed up. Also 9128 * return the protection. 9129 */ 9130 9131 *offset = (vaddr - entry->vme_start) + entry->offset; 9132 *object = entry->object.vm_object; 9133 *out_prot = prot; 9134 9135 if (fault_info) { 9136 fault_info->interruptible = THREAD_UNINT; /* for now... */ 9137 /* ... the caller will change "interruptible" if needed */ 9138 fault_info->cluster_size = 0; 9139 fault_info->user_tag = entry->alias; 9140 fault_info->behavior = entry->behavior; 9141 fault_info->lo_offset = entry->offset; 9142 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; 9143 fault_info->no_cache = entry->no_cache; 9144 fault_info->stealth = FALSE; 9145 fault_info->io_sync = FALSE; 9146 fault_info->cs_bypass = (entry->used_for_jit)? TRUE : FALSE; 9147 fault_info->mark_zf_absent = FALSE; 9148 fault_info->batch_pmap_op = FALSE; 9149 } 9150 9151 /* 9152 * Lock the object to prevent it from disappearing 9153 */ 9154 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) 9155 vm_object_lock(*object); 9156 else 9157 vm_object_lock_shared(*object); 9158 9159 /* 9160 * Save the version number 9161 */ 9162 9163 out_version->main_timestamp = map->timestamp; 9164 9165 return KERN_SUCCESS; 9166} 9167 9168 9169/* 9170 * vm_map_verify: 9171 * 9172 * Verifies that the map in question has not changed 9173 * since the given version. If successful, the map 9174 * will not change until vm_map_verify_done() is called. 9175 */ 9176boolean_t 9177vm_map_verify( 9178 register vm_map_t map, 9179 register vm_map_version_t *version) /* REF */ 9180{ 9181 boolean_t result; 9182 9183 vm_map_lock_read(map); 9184 result = (map->timestamp == version->main_timestamp); 9185 9186 if (!result) 9187 vm_map_unlock_read(map); 9188 9189 return(result); 9190} 9191 9192/* 9193 * vm_map_verify_done: 9194 * 9195 * Releases locks acquired by a vm_map_verify. 9196 * 9197 * This is now a macro in vm/vm_map.h. It does a 9198 * vm_map_unlock_read on the map. 9199 */ 9200 9201 9202/* 9203 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY 9204 * Goes away after regular vm_region_recurse function migrates to 9205 * 64 bits 9206 * vm_region_recurse: A form of vm_region which follows the 9207 * submaps in a target map 9208 * 9209 */ 9210 9211kern_return_t 9212vm_map_region_recurse_64( 9213 vm_map_t map, 9214 vm_map_offset_t *address, /* IN/OUT */ 9215 vm_map_size_t *size, /* OUT */ 9216 natural_t *nesting_depth, /* IN/OUT */ 9217 vm_region_submap_info_64_t submap_info, /* IN/OUT */ 9218 mach_msg_type_number_t *count) /* IN/OUT */ 9219{ 9220 vm_region_extended_info_data_t extended; 9221 vm_map_entry_t tmp_entry; 9222 vm_map_offset_t user_address; 9223 unsigned int user_max_depth; 9224 9225 /* 9226 * "curr_entry" is the VM map entry preceding or including the 9227 * address we're looking for. 9228 * "curr_map" is the map or sub-map containing "curr_entry". 9229 * "curr_address" is the equivalent of the top map's "user_address" 9230 * in the current map. 9231 * "curr_offset" is the cumulated offset of "curr_map" in the 9232 * target task's address space. 9233 * "curr_depth" is the depth of "curr_map" in the chain of 9234 * sub-maps. 9235 * 9236 * "curr_max_below" and "curr_max_above" limit the range (around 9237 * "curr_address") we should take into account in the current (sub)map. 9238 * They limit the range to what's visible through the map entries 9239 * we've traversed from the top map to the current map. 9240 9241 */ 9242 vm_map_entry_t curr_entry; 9243 vm_map_address_t curr_address; 9244 vm_map_offset_t curr_offset; 9245 vm_map_t curr_map; 9246 unsigned int curr_depth; 9247 vm_map_offset_t curr_max_below, curr_max_above; 9248 vm_map_offset_t curr_skip; 9249 9250 /* 9251 * "next_" is the same as "curr_" but for the VM region immediately 9252 * after the address we're looking for. We need to keep track of this 9253 * too because we want to return info about that region if the 9254 * address we're looking for is not mapped. 9255 */ 9256 vm_map_entry_t next_entry; 9257 vm_map_offset_t next_offset; 9258 vm_map_offset_t next_address; 9259 vm_map_t next_map; 9260 unsigned int next_depth; 9261 vm_map_offset_t next_max_below, next_max_above; 9262 vm_map_offset_t next_skip; 9263 9264 boolean_t look_for_pages; 9265 vm_region_submap_short_info_64_t short_info; 9266 9267 if (map == VM_MAP_NULL) { 9268 /* no address space to work on */ 9269 return KERN_INVALID_ARGUMENT; 9270 } 9271 9272 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) { 9273 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) { 9274 /* 9275 * "info" structure is not big enough and 9276 * would overflow 9277 */ 9278 return KERN_INVALID_ARGUMENT; 9279 } else { 9280 look_for_pages = FALSE; 9281 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; 9282 short_info = (vm_region_submap_short_info_64_t) submap_info; 9283 submap_info = NULL; 9284 } 9285 } else { 9286 look_for_pages = TRUE; 9287 *count = VM_REGION_SUBMAP_INFO_COUNT_64; 9288 short_info = NULL; 9289 } 9290 9291 9292 user_address = *address; 9293 user_max_depth = *nesting_depth; 9294 9295 curr_entry = NULL; 9296 curr_map = map; 9297 curr_address = user_address; 9298 curr_offset = 0; 9299 curr_skip = 0; 9300 curr_depth = 0; 9301 curr_max_above = ((vm_map_offset_t) -1) - curr_address; 9302 curr_max_below = curr_address; 9303 9304 next_entry = NULL; 9305 next_map = NULL; 9306 next_address = 0; 9307 next_offset = 0; 9308 next_skip = 0; 9309 next_depth = 0; 9310 next_max_above = (vm_map_offset_t) -1; 9311 next_max_below = (vm_map_offset_t) -1; 9312 9313 if (not_in_kdp) { 9314 vm_map_lock_read(curr_map); 9315 } 9316 9317 for (;;) { 9318 if (vm_map_lookup_entry(curr_map, 9319 curr_address, 9320 &tmp_entry)) { 9321 /* tmp_entry contains the address we're looking for */ 9322 curr_entry = tmp_entry; 9323 } else { 9324 vm_map_offset_t skip; 9325 /* 9326 * The address is not mapped. "tmp_entry" is the 9327 * map entry preceding the address. We want the next 9328 * one, if it exists. 9329 */ 9330 curr_entry = tmp_entry->vme_next; 9331 9332 if (curr_entry == vm_map_to_entry(curr_map) || 9333 (curr_entry->vme_start >= 9334 curr_address + curr_max_above)) { 9335 /* no next entry at this level: stop looking */ 9336 if (not_in_kdp) { 9337 vm_map_unlock_read(curr_map); 9338 } 9339 curr_entry = NULL; 9340 curr_map = NULL; 9341 curr_offset = 0; 9342 curr_depth = 0; 9343 curr_max_above = 0; 9344 curr_max_below = 0; 9345 break; 9346 } 9347 9348 /* adjust current address and offset */ 9349 skip = curr_entry->vme_start - curr_address; 9350 curr_address = curr_entry->vme_start; 9351 curr_skip = skip; 9352 curr_offset += skip; 9353 curr_max_above -= skip; 9354 curr_max_below = 0; 9355 } 9356 9357 /* 9358 * Is the next entry at this level closer to the address (or 9359 * deeper in the submap chain) than the one we had 9360 * so far ? 9361 */ 9362 tmp_entry = curr_entry->vme_next; 9363 if (tmp_entry == vm_map_to_entry(curr_map)) { 9364 /* no next entry at this level */ 9365 } else if (tmp_entry->vme_start >= 9366 curr_address + curr_max_above) { 9367 /* 9368 * tmp_entry is beyond the scope of what we mapped of 9369 * this submap in the upper level: ignore it. 9370 */ 9371 } else if ((next_entry == NULL) || 9372 (tmp_entry->vme_start + curr_offset <= 9373 next_entry->vme_start + next_offset)) { 9374 /* 9375 * We didn't have a "next_entry" or this one is 9376 * closer to the address we're looking for: 9377 * use this "tmp_entry" as the new "next_entry". 9378 */ 9379 if (next_entry != NULL) { 9380 /* unlock the last "next_map" */ 9381 if (next_map != curr_map && not_in_kdp) { 9382 vm_map_unlock_read(next_map); 9383 } 9384 } 9385 next_entry = tmp_entry; 9386 next_map = curr_map; 9387 next_depth = curr_depth; 9388 next_address = next_entry->vme_start; 9389 next_skip = curr_skip; 9390 next_offset = curr_offset; 9391 next_offset += (next_address - curr_address); 9392 next_max_above = MIN(next_max_above, curr_max_above); 9393 next_max_above = MIN(next_max_above, 9394 next_entry->vme_end - next_address); 9395 next_max_below = MIN(next_max_below, curr_max_below); 9396 next_max_below = MIN(next_max_below, 9397 next_address - next_entry->vme_start); 9398 } 9399 9400 /* 9401 * "curr_max_{above,below}" allow us to keep track of the 9402 * portion of the submap that is actually mapped at this level: 9403 * the rest of that submap is irrelevant to us, since it's not 9404 * mapped here. 9405 * The relevant portion of the map starts at 9406 * "curr_entry->offset" up to the size of "curr_entry". 9407 */ 9408 curr_max_above = MIN(curr_max_above, 9409 curr_entry->vme_end - curr_address); 9410 curr_max_below = MIN(curr_max_below, 9411 curr_address - curr_entry->vme_start); 9412 9413 if (!curr_entry->is_sub_map || 9414 curr_depth >= user_max_depth) { 9415 /* 9416 * We hit a leaf map or we reached the maximum depth 9417 * we could, so stop looking. Keep the current map 9418 * locked. 9419 */ 9420 break; 9421 } 9422 9423 /* 9424 * Get down to the next submap level. 9425 */ 9426 9427 /* 9428 * Lock the next level and unlock the current level, 9429 * unless we need to keep it locked to access the "next_entry" 9430 * later. 9431 */ 9432 if (not_in_kdp) { 9433 vm_map_lock_read(curr_entry->object.sub_map); 9434 } 9435 if (curr_map == next_map) { 9436 /* keep "next_map" locked in case we need it */ 9437 } else { 9438 /* release this map */ 9439 if (not_in_kdp) 9440 vm_map_unlock_read(curr_map); 9441 } 9442 9443 /* 9444 * Adjust the offset. "curr_entry" maps the submap 9445 * at relative address "curr_entry->vme_start" in the 9446 * curr_map but skips the first "curr_entry->offset" 9447 * bytes of the submap. 9448 * "curr_offset" always represents the offset of a virtual 9449 * address in the curr_map relative to the absolute address 9450 * space (i.e. the top-level VM map). 9451 */ 9452 curr_offset += 9453 (curr_entry->offset - curr_entry->vme_start); 9454 curr_address = user_address + curr_offset; 9455 /* switch to the submap */ 9456 curr_map = curr_entry->object.sub_map; 9457 curr_depth++; 9458 curr_entry = NULL; 9459 } 9460 9461 if (curr_entry == NULL) { 9462 /* no VM region contains the address... */ 9463 if (next_entry == NULL) { 9464 /* ... and no VM region follows it either */ 9465 return KERN_INVALID_ADDRESS; 9466 } 9467 /* ... gather info about the next VM region */ 9468 curr_entry = next_entry; 9469 curr_map = next_map; /* still locked ... */ 9470 curr_address = next_address; 9471 curr_skip = next_skip; 9472 curr_offset = next_offset; 9473 curr_depth = next_depth; 9474 curr_max_above = next_max_above; 9475 curr_max_below = next_max_below; 9476 if (curr_map == map) { 9477 user_address = curr_address; 9478 } 9479 } else { 9480 /* we won't need "next_entry" after all */ 9481 if (next_entry != NULL) { 9482 /* release "next_map" */ 9483 if (next_map != curr_map && not_in_kdp) { 9484 vm_map_unlock_read(next_map); 9485 } 9486 } 9487 } 9488 next_entry = NULL; 9489 next_map = NULL; 9490 next_offset = 0; 9491 next_skip = 0; 9492 next_depth = 0; 9493 next_max_below = -1; 9494 next_max_above = -1; 9495 9496 *nesting_depth = curr_depth; 9497 *size = curr_max_above + curr_max_below; 9498 *address = user_address + curr_skip - curr_max_below; 9499 9500// LP64todo: all the current tools are 32bit, obviously never worked for 64b 9501// so probably should be a real 32b ID vs. ptr. 9502// Current users just check for equality 9503#define INFO_MAKE_OBJECT_ID(p) ((uint32_t)(uintptr_t)p) 9504 9505 if (look_for_pages) { 9506 submap_info->user_tag = curr_entry->alias; 9507 submap_info->offset = curr_entry->offset; 9508 submap_info->protection = curr_entry->protection; 9509 submap_info->inheritance = curr_entry->inheritance; 9510 submap_info->max_protection = curr_entry->max_protection; 9511 submap_info->behavior = curr_entry->behavior; 9512 submap_info->user_wired_count = curr_entry->user_wired_count; 9513 submap_info->is_submap = curr_entry->is_sub_map; 9514 submap_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object); 9515 } else { 9516 short_info->user_tag = curr_entry->alias; 9517 short_info->offset = curr_entry->offset; 9518 short_info->protection = curr_entry->protection; 9519 short_info->inheritance = curr_entry->inheritance; 9520 short_info->max_protection = curr_entry->max_protection; 9521 short_info->behavior = curr_entry->behavior; 9522 short_info->user_wired_count = curr_entry->user_wired_count; 9523 short_info->is_submap = curr_entry->is_sub_map; 9524 short_info->object_id = INFO_MAKE_OBJECT_ID(curr_entry->object.vm_object); 9525 } 9526 9527 extended.pages_resident = 0; 9528 extended.pages_swapped_out = 0; 9529 extended.pages_shared_now_private = 0; 9530 extended.pages_dirtied = 0; 9531 extended.external_pager = 0; 9532 extended.shadow_depth = 0; 9533 9534 if (not_in_kdp) { 9535 if (!curr_entry->is_sub_map) { 9536 vm_map_offset_t range_start, range_end; 9537 range_start = MAX((curr_address - curr_max_below), 9538 curr_entry->vme_start); 9539 range_end = MIN((curr_address + curr_max_above), 9540 curr_entry->vme_end); 9541 vm_map_region_walk(curr_map, 9542 range_start, 9543 curr_entry, 9544 (curr_entry->offset + 9545 (range_start - 9546 curr_entry->vme_start)), 9547 range_end - range_start, 9548 &extended, 9549 look_for_pages); 9550 if (extended.external_pager && 9551 extended.ref_count == 2 && 9552 extended.share_mode == SM_SHARED) { 9553 extended.share_mode = SM_PRIVATE; 9554 } 9555 } else { 9556 if (curr_entry->use_pmap) { 9557 extended.share_mode = SM_TRUESHARED; 9558 } else { 9559 extended.share_mode = SM_PRIVATE; 9560 } 9561 extended.ref_count = 9562 curr_entry->object.sub_map->ref_count; 9563 } 9564 } 9565 9566 if (look_for_pages) { 9567 submap_info->pages_resident = extended.pages_resident; 9568 submap_info->pages_swapped_out = extended.pages_swapped_out; 9569 submap_info->pages_shared_now_private = 9570 extended.pages_shared_now_private; 9571 submap_info->pages_dirtied = extended.pages_dirtied; 9572 submap_info->external_pager = extended.external_pager; 9573 submap_info->shadow_depth = extended.shadow_depth; 9574 submap_info->share_mode = extended.share_mode; 9575 submap_info->ref_count = extended.ref_count; 9576 } else { 9577 short_info->external_pager = extended.external_pager; 9578 short_info->shadow_depth = extended.shadow_depth; 9579 short_info->share_mode = extended.share_mode; 9580 short_info->ref_count = extended.ref_count; 9581 } 9582 9583 if (not_in_kdp) { 9584 vm_map_unlock_read(curr_map); 9585 } 9586 9587 return KERN_SUCCESS; 9588} 9589 9590/* 9591 * vm_region: 9592 * 9593 * User call to obtain information about a region in 9594 * a task's address map. Currently, only one flavor is 9595 * supported. 9596 * 9597 * XXX The reserved and behavior fields cannot be filled 9598 * in until the vm merge from the IK is completed, and 9599 * vm_reserve is implemented. 9600 */ 9601 9602kern_return_t 9603vm_map_region( 9604 vm_map_t map, 9605 vm_map_offset_t *address, /* IN/OUT */ 9606 vm_map_size_t *size, /* OUT */ 9607 vm_region_flavor_t flavor, /* IN */ 9608 vm_region_info_t info, /* OUT */ 9609 mach_msg_type_number_t *count, /* IN/OUT */ 9610 mach_port_t *object_name) /* OUT */ 9611{ 9612 vm_map_entry_t tmp_entry; 9613 vm_map_entry_t entry; 9614 vm_map_offset_t start; 9615 9616 if (map == VM_MAP_NULL) 9617 return(KERN_INVALID_ARGUMENT); 9618 9619 switch (flavor) { 9620 9621 case VM_REGION_BASIC_INFO: 9622 /* legacy for old 32-bit objects info */ 9623 { 9624 vm_region_basic_info_t basic; 9625 9626 if (*count < VM_REGION_BASIC_INFO_COUNT) 9627 return(KERN_INVALID_ARGUMENT); 9628 9629 basic = (vm_region_basic_info_t) info; 9630 *count = VM_REGION_BASIC_INFO_COUNT; 9631 9632 vm_map_lock_read(map); 9633 9634 start = *address; 9635 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 9636 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 9637 vm_map_unlock_read(map); 9638 return(KERN_INVALID_ADDRESS); 9639 } 9640 } else { 9641 entry = tmp_entry; 9642 } 9643 9644 start = entry->vme_start; 9645 9646 basic->offset = (uint32_t)entry->offset; 9647 basic->protection = entry->protection; 9648 basic->inheritance = entry->inheritance; 9649 basic->max_protection = entry->max_protection; 9650 basic->behavior = entry->behavior; 9651 basic->user_wired_count = entry->user_wired_count; 9652 basic->reserved = entry->is_sub_map; 9653 *address = start; 9654 *size = (entry->vme_end - start); 9655 9656 if (object_name) *object_name = IP_NULL; 9657 if (entry->is_sub_map) { 9658 basic->shared = FALSE; 9659 } else { 9660 basic->shared = entry->is_shared; 9661 } 9662 9663 vm_map_unlock_read(map); 9664 return(KERN_SUCCESS); 9665 } 9666 9667 case VM_REGION_BASIC_INFO_64: 9668 { 9669 vm_region_basic_info_64_t basic; 9670 9671 if (*count < VM_REGION_BASIC_INFO_COUNT_64) 9672 return(KERN_INVALID_ARGUMENT); 9673 9674 basic = (vm_region_basic_info_64_t) info; 9675 *count = VM_REGION_BASIC_INFO_COUNT_64; 9676 9677 vm_map_lock_read(map); 9678 9679 start = *address; 9680 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 9681 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 9682 vm_map_unlock_read(map); 9683 return(KERN_INVALID_ADDRESS); 9684 } 9685 } else { 9686 entry = tmp_entry; 9687 } 9688 9689 start = entry->vme_start; 9690 9691 basic->offset = entry->offset; 9692 basic->protection = entry->protection; 9693 basic->inheritance = entry->inheritance; 9694 basic->max_protection = entry->max_protection; 9695 basic->behavior = entry->behavior; 9696 basic->user_wired_count = entry->user_wired_count; 9697 basic->reserved = entry->is_sub_map; 9698 *address = start; 9699 *size = (entry->vme_end - start); 9700 9701 if (object_name) *object_name = IP_NULL; 9702 if (entry->is_sub_map) { 9703 basic->shared = FALSE; 9704 } else { 9705 basic->shared = entry->is_shared; 9706 } 9707 9708 vm_map_unlock_read(map); 9709 return(KERN_SUCCESS); 9710 } 9711 case VM_REGION_EXTENDED_INFO: 9712 { 9713 vm_region_extended_info_t extended; 9714 9715 if (*count < VM_REGION_EXTENDED_INFO_COUNT) 9716 return(KERN_INVALID_ARGUMENT); 9717 9718 extended = (vm_region_extended_info_t) info; 9719 *count = VM_REGION_EXTENDED_INFO_COUNT; 9720 9721 vm_map_lock_read(map); 9722 9723 start = *address; 9724 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 9725 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 9726 vm_map_unlock_read(map); 9727 return(KERN_INVALID_ADDRESS); 9728 } 9729 } else { 9730 entry = tmp_entry; 9731 } 9732 start = entry->vme_start; 9733 9734 extended->protection = entry->protection; 9735 extended->user_tag = entry->alias; 9736 extended->pages_resident = 0; 9737 extended->pages_swapped_out = 0; 9738 extended->pages_shared_now_private = 0; 9739 extended->pages_dirtied = 0; 9740 extended->external_pager = 0; 9741 extended->shadow_depth = 0; 9742 9743 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE); 9744 9745 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) 9746 extended->share_mode = SM_PRIVATE; 9747 9748 if (object_name) 9749 *object_name = IP_NULL; 9750 *address = start; 9751 *size = (entry->vme_end - start); 9752 9753 vm_map_unlock_read(map); 9754 return(KERN_SUCCESS); 9755 } 9756 case VM_REGION_TOP_INFO: 9757 { 9758 vm_region_top_info_t top; 9759 9760 if (*count < VM_REGION_TOP_INFO_COUNT) 9761 return(KERN_INVALID_ARGUMENT); 9762 9763 top = (vm_region_top_info_t) info; 9764 *count = VM_REGION_TOP_INFO_COUNT; 9765 9766 vm_map_lock_read(map); 9767 9768 start = *address; 9769 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 9770 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 9771 vm_map_unlock_read(map); 9772 return(KERN_INVALID_ADDRESS); 9773 } 9774 } else { 9775 entry = tmp_entry; 9776 9777 } 9778 start = entry->vme_start; 9779 9780 top->private_pages_resident = 0; 9781 top->shared_pages_resident = 0; 9782 9783 vm_map_region_top_walk(entry, top); 9784 9785 if (object_name) 9786 *object_name = IP_NULL; 9787 *address = start; 9788 *size = (entry->vme_end - start); 9789 9790 vm_map_unlock_read(map); 9791 return(KERN_SUCCESS); 9792 } 9793 default: 9794 return(KERN_INVALID_ARGUMENT); 9795 } 9796} 9797 9798#define OBJ_RESIDENT_COUNT(obj, entry_size) \ 9799 MIN((entry_size), \ 9800 ((obj)->all_reusable ? \ 9801 (obj)->wired_page_count : \ 9802 (obj)->resident_page_count - (obj)->reusable_page_count)) 9803 9804void 9805vm_map_region_top_walk( 9806 vm_map_entry_t entry, 9807 vm_region_top_info_t top) 9808{ 9809 9810 if (entry->object.vm_object == 0 || entry->is_sub_map) { 9811 top->share_mode = SM_EMPTY; 9812 top->ref_count = 0; 9813 top->obj_id = 0; 9814 return; 9815 } 9816 9817 { 9818 struct vm_object *obj, *tmp_obj; 9819 int ref_count; 9820 uint32_t entry_size; 9821 9822 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64); 9823 9824 obj = entry->object.vm_object; 9825 9826 vm_object_lock(obj); 9827 9828 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 9829 ref_count--; 9830 9831 assert(obj->reusable_page_count <= obj->resident_page_count); 9832 if (obj->shadow) { 9833 if (ref_count == 1) 9834 top->private_pages_resident = 9835 OBJ_RESIDENT_COUNT(obj, entry_size); 9836 else 9837 top->shared_pages_resident = 9838 OBJ_RESIDENT_COUNT(obj, entry_size); 9839 top->ref_count = ref_count; 9840 top->share_mode = SM_COW; 9841 9842 while ((tmp_obj = obj->shadow)) { 9843 vm_object_lock(tmp_obj); 9844 vm_object_unlock(obj); 9845 obj = tmp_obj; 9846 9847 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 9848 ref_count--; 9849 9850 assert(obj->reusable_page_count <= obj->resident_page_count); 9851 top->shared_pages_resident += 9852 OBJ_RESIDENT_COUNT(obj, entry_size); 9853 top->ref_count += ref_count - 1; 9854 } 9855 } else { 9856 if (entry->superpage_size) { 9857 top->share_mode = SM_LARGE_PAGE; 9858 top->shared_pages_resident = 0; 9859 top->private_pages_resident = entry_size; 9860 } else if (entry->needs_copy) { 9861 top->share_mode = SM_COW; 9862 top->shared_pages_resident = 9863 OBJ_RESIDENT_COUNT(obj, entry_size); 9864 } else { 9865 if (ref_count == 1 || 9866 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { 9867 top->share_mode = SM_PRIVATE; 9868 top->private_pages_resident = 9869 OBJ_RESIDENT_COUNT(obj, 9870 entry_size); 9871 } else { 9872 top->share_mode = SM_SHARED; 9873 top->shared_pages_resident = 9874 OBJ_RESIDENT_COUNT(obj, 9875 entry_size); 9876 } 9877 } 9878 top->ref_count = ref_count; 9879 } 9880 /* XXX K64: obj_id will be truncated */ 9881 top->obj_id = (unsigned int) (uintptr_t)obj; 9882 9883 vm_object_unlock(obj); 9884 } 9885} 9886 9887void 9888vm_map_region_walk( 9889 vm_map_t map, 9890 vm_map_offset_t va, 9891 vm_map_entry_t entry, 9892 vm_object_offset_t offset, 9893 vm_object_size_t range, 9894 vm_region_extended_info_t extended, 9895 boolean_t look_for_pages) 9896{ 9897 register struct vm_object *obj, *tmp_obj; 9898 register vm_map_offset_t last_offset; 9899 register int i; 9900 register int ref_count; 9901 struct vm_object *shadow_object; 9902 int shadow_depth; 9903 9904 if ((entry->object.vm_object == 0) || 9905 (entry->is_sub_map) || 9906 (entry->object.vm_object->phys_contiguous && 9907 !entry->superpage_size)) { 9908 extended->share_mode = SM_EMPTY; 9909 extended->ref_count = 0; 9910 return; 9911 } 9912 9913 if (entry->superpage_size) { 9914 extended->shadow_depth = 0; 9915 extended->share_mode = SM_LARGE_PAGE; 9916 extended->ref_count = 1; 9917 extended->external_pager = 0; 9918 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT); 9919 extended->shadow_depth = 0; 9920 return; 9921 } 9922 9923 { 9924 obj = entry->object.vm_object; 9925 9926 vm_object_lock(obj); 9927 9928 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 9929 ref_count--; 9930 9931 if (look_for_pages) { 9932 for (last_offset = offset + range; 9933 offset < last_offset; 9934 offset += PAGE_SIZE_64, va += PAGE_SIZE) 9935 vm_map_region_look_for_page(map, va, obj, 9936 offset, ref_count, 9937 0, extended); 9938 } else { 9939 shadow_object = obj->shadow; 9940 shadow_depth = 0; 9941 9942 if ( !(obj->pager_trusted) && !(obj->internal)) 9943 extended->external_pager = 1; 9944 9945 if (shadow_object != VM_OBJECT_NULL) { 9946 vm_object_lock(shadow_object); 9947 for (; 9948 shadow_object != VM_OBJECT_NULL; 9949 shadow_depth++) { 9950 vm_object_t next_shadow; 9951 9952 if ( !(shadow_object->pager_trusted) && 9953 !(shadow_object->internal)) 9954 extended->external_pager = 1; 9955 9956 next_shadow = shadow_object->shadow; 9957 if (next_shadow) { 9958 vm_object_lock(next_shadow); 9959 } 9960 vm_object_unlock(shadow_object); 9961 shadow_object = next_shadow; 9962 } 9963 } 9964 extended->shadow_depth = shadow_depth; 9965 } 9966 9967 if (extended->shadow_depth || entry->needs_copy) 9968 extended->share_mode = SM_COW; 9969 else { 9970 if (ref_count == 1) 9971 extended->share_mode = SM_PRIVATE; 9972 else { 9973 if (obj->true_share) 9974 extended->share_mode = SM_TRUESHARED; 9975 else 9976 extended->share_mode = SM_SHARED; 9977 } 9978 } 9979 extended->ref_count = ref_count - extended->shadow_depth; 9980 9981 for (i = 0; i < extended->shadow_depth; i++) { 9982 if ((tmp_obj = obj->shadow) == 0) 9983 break; 9984 vm_object_lock(tmp_obj); 9985 vm_object_unlock(obj); 9986 9987 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) 9988 ref_count--; 9989 9990 extended->ref_count += ref_count; 9991 obj = tmp_obj; 9992 } 9993 vm_object_unlock(obj); 9994 9995 if (extended->share_mode == SM_SHARED) { 9996 register vm_map_entry_t cur; 9997 register vm_map_entry_t last; 9998 int my_refs; 9999 10000 obj = entry->object.vm_object; 10001 last = vm_map_to_entry(map); 10002 my_refs = 0; 10003 10004 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 10005 ref_count--; 10006 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) 10007 my_refs += vm_map_region_count_obj_refs(cur, obj); 10008 10009 if (my_refs == ref_count) 10010 extended->share_mode = SM_PRIVATE_ALIASED; 10011 else if (my_refs > 1) 10012 extended->share_mode = SM_SHARED_ALIASED; 10013 } 10014 } 10015} 10016 10017 10018/* object is locked on entry and locked on return */ 10019 10020 10021static void 10022vm_map_region_look_for_page( 10023 __unused vm_map_t map, 10024 __unused vm_map_offset_t va, 10025 vm_object_t object, 10026 vm_object_offset_t offset, 10027 int max_refcnt, 10028 int depth, 10029 vm_region_extended_info_t extended) 10030{ 10031 register vm_page_t p; 10032 register vm_object_t shadow; 10033 register int ref_count; 10034 vm_object_t caller_object; 10035#if MACH_PAGEMAP 10036 kern_return_t kr; 10037#endif 10038 shadow = object->shadow; 10039 caller_object = object; 10040 10041 10042 while (TRUE) { 10043 10044 if ( !(object->pager_trusted) && !(object->internal)) 10045 extended->external_pager = 1; 10046 10047 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { 10048 if (shadow && (max_refcnt == 1)) 10049 extended->pages_shared_now_private++; 10050 10051 if (!p->fictitious && 10052 (p->dirty || pmap_is_modified(p->phys_page))) 10053 extended->pages_dirtied++; 10054 10055 extended->pages_resident++; 10056 10057 if(object != caller_object) 10058 vm_object_unlock(object); 10059 10060 return; 10061 } 10062#if MACH_PAGEMAP 10063 if (object->existence_map) { 10064 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) { 10065 10066 extended->pages_swapped_out++; 10067 10068 if(object != caller_object) 10069 vm_object_unlock(object); 10070 10071 return; 10072 } 10073 } else if (object->internal && 10074 object->alive && 10075 !object->terminating && 10076 object->pager_ready) { 10077 10078 memory_object_t pager; 10079 10080 vm_object_paging_begin(object); 10081 pager = object->pager; 10082 vm_object_unlock(object); 10083 10084 kr = memory_object_data_request( 10085 pager, 10086 offset + object->paging_offset, 10087 0, /* just poke the pager */ 10088 VM_PROT_READ, 10089 NULL); 10090 10091 vm_object_lock(object); 10092 vm_object_paging_end(object); 10093 10094 if (kr == KERN_SUCCESS) { 10095 /* the pager has that page */ 10096 extended->pages_swapped_out++; 10097 if (object != caller_object) 10098 vm_object_unlock(object); 10099 return; 10100 } 10101 } 10102#endif /* MACH_PAGEMAP */ 10103 10104 if (shadow) { 10105 vm_object_lock(shadow); 10106 10107 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) 10108 ref_count--; 10109 10110 if (++depth > extended->shadow_depth) 10111 extended->shadow_depth = depth; 10112 10113 if (ref_count > max_refcnt) 10114 max_refcnt = ref_count; 10115 10116 if(object != caller_object) 10117 vm_object_unlock(object); 10118 10119 offset = offset + object->vo_shadow_offset; 10120 object = shadow; 10121 shadow = object->shadow; 10122 continue; 10123 } 10124 if(object != caller_object) 10125 vm_object_unlock(object); 10126 break; 10127 } 10128} 10129 10130static int 10131vm_map_region_count_obj_refs( 10132 vm_map_entry_t entry, 10133 vm_object_t object) 10134{ 10135 register int ref_count; 10136 register vm_object_t chk_obj; 10137 register vm_object_t tmp_obj; 10138 10139 if (entry->object.vm_object == 0) 10140 return(0); 10141 10142 if (entry->is_sub_map) 10143 return(0); 10144 else { 10145 ref_count = 0; 10146 10147 chk_obj = entry->object.vm_object; 10148 vm_object_lock(chk_obj); 10149 10150 while (chk_obj) { 10151 if (chk_obj == object) 10152 ref_count++; 10153 tmp_obj = chk_obj->shadow; 10154 if (tmp_obj) 10155 vm_object_lock(tmp_obj); 10156 vm_object_unlock(chk_obj); 10157 10158 chk_obj = tmp_obj; 10159 } 10160 } 10161 return(ref_count); 10162} 10163 10164 10165/* 10166 * Routine: vm_map_simplify 10167 * 10168 * Description: 10169 * Attempt to simplify the map representation in 10170 * the vicinity of the given starting address. 10171 * Note: 10172 * This routine is intended primarily to keep the 10173 * kernel maps more compact -- they generally don't 10174 * benefit from the "expand a map entry" technology 10175 * at allocation time because the adjacent entry 10176 * is often wired down. 10177 */ 10178void 10179vm_map_simplify_entry( 10180 vm_map_t map, 10181 vm_map_entry_t this_entry) 10182{ 10183 vm_map_entry_t prev_entry; 10184 10185 counter(c_vm_map_simplify_entry_called++); 10186 10187 prev_entry = this_entry->vme_prev; 10188 10189 if ((this_entry != vm_map_to_entry(map)) && 10190 (prev_entry != vm_map_to_entry(map)) && 10191 10192 (prev_entry->vme_end == this_entry->vme_start) && 10193 10194 (prev_entry->is_sub_map == this_entry->is_sub_map) && 10195 10196 (prev_entry->object.vm_object == this_entry->object.vm_object) && 10197 ((prev_entry->offset + (prev_entry->vme_end - 10198 prev_entry->vme_start)) 10199 == this_entry->offset) && 10200 10201 (prev_entry->inheritance == this_entry->inheritance) && 10202 (prev_entry->protection == this_entry->protection) && 10203 (prev_entry->max_protection == this_entry->max_protection) && 10204 (prev_entry->behavior == this_entry->behavior) && 10205 (prev_entry->alias == this_entry->alias) && 10206 (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) && 10207 (prev_entry->no_cache == this_entry->no_cache) && 10208 (prev_entry->wired_count == this_entry->wired_count) && 10209 (prev_entry->user_wired_count == this_entry->user_wired_count) && 10210 10211 (prev_entry->needs_copy == this_entry->needs_copy) && 10212 (prev_entry->permanent == this_entry->permanent) && 10213 10214 (prev_entry->use_pmap == FALSE) && 10215 (this_entry->use_pmap == FALSE) && 10216 (prev_entry->in_transition == FALSE) && 10217 (this_entry->in_transition == FALSE) && 10218 (prev_entry->needs_wakeup == FALSE) && 10219 (this_entry->needs_wakeup == FALSE) && 10220 (prev_entry->is_shared == FALSE) && 10221 (this_entry->is_shared == FALSE) 10222 ) { 10223 vm_map_store_entry_unlink(map, prev_entry); 10224 assert(prev_entry->vme_start < this_entry->vme_end); 10225 this_entry->vme_start = prev_entry->vme_start; 10226 this_entry->offset = prev_entry->offset; 10227 if (prev_entry->is_sub_map) { 10228 vm_map_deallocate(prev_entry->object.sub_map); 10229 } else { 10230 vm_object_deallocate(prev_entry->object.vm_object); 10231 } 10232 vm_map_entry_dispose(map, prev_entry); 10233 SAVE_HINT_MAP_WRITE(map, this_entry); 10234 counter(c_vm_map_simplified++); 10235 } 10236} 10237 10238void 10239vm_map_simplify( 10240 vm_map_t map, 10241 vm_map_offset_t start) 10242{ 10243 vm_map_entry_t this_entry; 10244 10245 vm_map_lock(map); 10246 if (vm_map_lookup_entry(map, start, &this_entry)) { 10247 vm_map_simplify_entry(map, this_entry); 10248 vm_map_simplify_entry(map, this_entry->vme_next); 10249 } 10250 counter(c_vm_map_simplify_called++); 10251 vm_map_unlock(map); 10252} 10253 10254static void 10255vm_map_simplify_range( 10256 vm_map_t map, 10257 vm_map_offset_t start, 10258 vm_map_offset_t end) 10259{ 10260 vm_map_entry_t entry; 10261 10262 /* 10263 * The map should be locked (for "write") by the caller. 10264 */ 10265 10266 if (start >= end) { 10267 /* invalid address range */ 10268 return; 10269 } 10270 10271 start = vm_map_trunc_page(start); 10272 end = vm_map_round_page(end); 10273 10274 if (!vm_map_lookup_entry(map, start, &entry)) { 10275 /* "start" is not mapped and "entry" ends before "start" */ 10276 if (entry == vm_map_to_entry(map)) { 10277 /* start with first entry in the map */ 10278 entry = vm_map_first_entry(map); 10279 } else { 10280 /* start with next entry */ 10281 entry = entry->vme_next; 10282 } 10283 } 10284 10285 while (entry != vm_map_to_entry(map) && 10286 entry->vme_start <= end) { 10287 /* try and coalesce "entry" with its previous entry */ 10288 vm_map_simplify_entry(map, entry); 10289 entry = entry->vme_next; 10290 } 10291} 10292 10293 10294/* 10295 * Routine: vm_map_machine_attribute 10296 * Purpose: 10297 * Provide machine-specific attributes to mappings, 10298 * such as cachability etc. for machines that provide 10299 * them. NUMA architectures and machines with big/strange 10300 * caches will use this. 10301 * Note: 10302 * Responsibilities for locking and checking are handled here, 10303 * everything else in the pmap module. If any non-volatile 10304 * information must be kept, the pmap module should handle 10305 * it itself. [This assumes that attributes do not 10306 * need to be inherited, which seems ok to me] 10307 */ 10308kern_return_t 10309vm_map_machine_attribute( 10310 vm_map_t map, 10311 vm_map_offset_t start, 10312 vm_map_offset_t end, 10313 vm_machine_attribute_t attribute, 10314 vm_machine_attribute_val_t* value) /* IN/OUT */ 10315{ 10316 kern_return_t ret; 10317 vm_map_size_t sync_size; 10318 vm_map_entry_t entry; 10319 10320 if (start < vm_map_min(map) || end > vm_map_max(map)) 10321 return KERN_INVALID_ADDRESS; 10322 10323 /* Figure how much memory we need to flush (in page increments) */ 10324 sync_size = end - start; 10325 10326 vm_map_lock(map); 10327 10328 if (attribute != MATTR_CACHE) { 10329 /* If we don't have to find physical addresses, we */ 10330 /* don't have to do an explicit traversal here. */ 10331 ret = pmap_attribute(map->pmap, start, end-start, 10332 attribute, value); 10333 vm_map_unlock(map); 10334 return ret; 10335 } 10336 10337 ret = KERN_SUCCESS; /* Assume it all worked */ 10338 10339 while(sync_size) { 10340 if (vm_map_lookup_entry(map, start, &entry)) { 10341 vm_map_size_t sub_size; 10342 if((entry->vme_end - start) > sync_size) { 10343 sub_size = sync_size; 10344 sync_size = 0; 10345 } else { 10346 sub_size = entry->vme_end - start; 10347 sync_size -= sub_size; 10348 } 10349 if(entry->is_sub_map) { 10350 vm_map_offset_t sub_start; 10351 vm_map_offset_t sub_end; 10352 10353 sub_start = (start - entry->vme_start) 10354 + entry->offset; 10355 sub_end = sub_start + sub_size; 10356 vm_map_machine_attribute( 10357 entry->object.sub_map, 10358 sub_start, 10359 sub_end, 10360 attribute, value); 10361 } else { 10362 if(entry->object.vm_object) { 10363 vm_page_t m; 10364 vm_object_t object; 10365 vm_object_t base_object; 10366 vm_object_t last_object; 10367 vm_object_offset_t offset; 10368 vm_object_offset_t base_offset; 10369 vm_map_size_t range; 10370 range = sub_size; 10371 offset = (start - entry->vme_start) 10372 + entry->offset; 10373 base_offset = offset; 10374 object = entry->object.vm_object; 10375 base_object = object; 10376 last_object = NULL; 10377 10378 vm_object_lock(object); 10379 10380 while (range) { 10381 m = vm_page_lookup( 10382 object, offset); 10383 10384 if (m && !m->fictitious) { 10385 ret = 10386 pmap_attribute_cache_sync( 10387 m->phys_page, 10388 PAGE_SIZE, 10389 attribute, value); 10390 10391 } else if (object->shadow) { 10392 offset = offset + object->vo_shadow_offset; 10393 last_object = object; 10394 object = object->shadow; 10395 vm_object_lock(last_object->shadow); 10396 vm_object_unlock(last_object); 10397 continue; 10398 } 10399 range -= PAGE_SIZE; 10400 10401 if (base_object != object) { 10402 vm_object_unlock(object); 10403 vm_object_lock(base_object); 10404 object = base_object; 10405 } 10406 /* Bump to the next page */ 10407 base_offset += PAGE_SIZE; 10408 offset = base_offset; 10409 } 10410 vm_object_unlock(object); 10411 } 10412 } 10413 start += sub_size; 10414 } else { 10415 vm_map_unlock(map); 10416 return KERN_FAILURE; 10417 } 10418 10419 } 10420 10421 vm_map_unlock(map); 10422 10423 return ret; 10424} 10425 10426/* 10427 * vm_map_behavior_set: 10428 * 10429 * Sets the paging reference behavior of the specified address 10430 * range in the target map. Paging reference behavior affects 10431 * how pagein operations resulting from faults on the map will be 10432 * clustered. 10433 */ 10434kern_return_t 10435vm_map_behavior_set( 10436 vm_map_t map, 10437 vm_map_offset_t start, 10438 vm_map_offset_t end, 10439 vm_behavior_t new_behavior) 10440{ 10441 register vm_map_entry_t entry; 10442 vm_map_entry_t temp_entry; 10443 10444 XPR(XPR_VM_MAP, 10445 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d", 10446 map, start, end, new_behavior, 0); 10447 10448 if (start > end || 10449 start < vm_map_min(map) || 10450 end > vm_map_max(map)) { 10451 return KERN_NO_SPACE; 10452 } 10453 10454 switch (new_behavior) { 10455 10456 /* 10457 * This first block of behaviors all set a persistent state on the specified 10458 * memory range. All we have to do here is to record the desired behavior 10459 * in the vm_map_entry_t's. 10460 */ 10461 10462 case VM_BEHAVIOR_DEFAULT: 10463 case VM_BEHAVIOR_RANDOM: 10464 case VM_BEHAVIOR_SEQUENTIAL: 10465 case VM_BEHAVIOR_RSEQNTL: 10466 case VM_BEHAVIOR_ZERO_WIRED_PAGES: 10467 vm_map_lock(map); 10468 10469 /* 10470 * The entire address range must be valid for the map. 10471 * Note that vm_map_range_check() does a 10472 * vm_map_lookup_entry() internally and returns the 10473 * entry containing the start of the address range if 10474 * the entire range is valid. 10475 */ 10476 if (vm_map_range_check(map, start, end, &temp_entry)) { 10477 entry = temp_entry; 10478 vm_map_clip_start(map, entry, start); 10479 } 10480 else { 10481 vm_map_unlock(map); 10482 return(KERN_INVALID_ADDRESS); 10483 } 10484 10485 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 10486 vm_map_clip_end(map, entry, end); 10487 assert(!entry->use_pmap); 10488 10489 if( new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES ) { 10490 entry->zero_wired_pages = TRUE; 10491 } else { 10492 entry->behavior = new_behavior; 10493 } 10494 entry = entry->vme_next; 10495 } 10496 10497 vm_map_unlock(map); 10498 break; 10499 10500 /* 10501 * The rest of these are different from the above in that they cause 10502 * an immediate action to take place as opposed to setting a behavior that 10503 * affects future actions. 10504 */ 10505 10506 case VM_BEHAVIOR_WILLNEED: 10507 return vm_map_willneed(map, start, end); 10508 10509 case VM_BEHAVIOR_DONTNEED: 10510 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS); 10511 10512 case VM_BEHAVIOR_FREE: 10513 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS); 10514 10515 case VM_BEHAVIOR_REUSABLE: 10516 return vm_map_reusable_pages(map, start, end); 10517 10518 case VM_BEHAVIOR_REUSE: 10519 return vm_map_reuse_pages(map, start, end); 10520 10521 case VM_BEHAVIOR_CAN_REUSE: 10522 return vm_map_can_reuse(map, start, end); 10523 10524 default: 10525 return(KERN_INVALID_ARGUMENT); 10526 } 10527 10528 return(KERN_SUCCESS); 10529} 10530 10531 10532/* 10533 * Internals for madvise(MADV_WILLNEED) system call. 10534 * 10535 * The present implementation is to do a read-ahead if the mapping corresponds 10536 * to a mapped regular file. If it's an anonymous mapping, then we do nothing 10537 * and basically ignore the "advice" (which we are always free to do). 10538 */ 10539 10540 10541static kern_return_t 10542vm_map_willneed( 10543 vm_map_t map, 10544 vm_map_offset_t start, 10545 vm_map_offset_t end 10546) 10547{ 10548 vm_map_entry_t entry; 10549 vm_object_t object; 10550 memory_object_t pager; 10551 struct vm_object_fault_info fault_info; 10552 kern_return_t kr; 10553 vm_object_size_t len; 10554 vm_object_offset_t offset; 10555 10556 /* 10557 * Fill in static values in fault_info. Several fields get ignored by the code 10558 * we call, but we'll fill them in anyway since uninitialized fields are bad 10559 * when it comes to future backwards compatibility. 10560 */ 10561 10562 fault_info.interruptible = THREAD_UNINT; /* ignored value */ 10563 fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; 10564 fault_info.no_cache = FALSE; /* ignored value */ 10565 fault_info.stealth = TRUE; 10566 fault_info.io_sync = FALSE; 10567 fault_info.cs_bypass = FALSE; 10568 fault_info.mark_zf_absent = FALSE; 10569 fault_info.batch_pmap_op = FALSE; 10570 10571 /* 10572 * The MADV_WILLNEED operation doesn't require any changes to the 10573 * vm_map_entry_t's, so the read lock is sufficient. 10574 */ 10575 10576 vm_map_lock_read(map); 10577 10578 /* 10579 * The madvise semantics require that the address range be fully 10580 * allocated with no holes. Otherwise, we're required to return 10581 * an error. 10582 */ 10583 10584 if (! vm_map_range_check(map, start, end, &entry)) { 10585 vm_map_unlock_read(map); 10586 return KERN_INVALID_ADDRESS; 10587 } 10588 10589 /* 10590 * Examine each vm_map_entry_t in the range. 10591 */ 10592 for (; entry != vm_map_to_entry(map) && start < end; ) { 10593 10594 /* 10595 * The first time through, the start address could be anywhere 10596 * within the vm_map_entry we found. So adjust the offset to 10597 * correspond. After that, the offset will always be zero to 10598 * correspond to the beginning of the current vm_map_entry. 10599 */ 10600 offset = (start - entry->vme_start) + entry->offset; 10601 10602 /* 10603 * Set the length so we don't go beyond the end of the 10604 * map_entry or beyond the end of the range we were given. 10605 * This range could span also multiple map entries all of which 10606 * map different files, so make sure we only do the right amount 10607 * of I/O for each object. Note that it's possible for there 10608 * to be multiple map entries all referring to the same object 10609 * but with different page permissions, but it's not worth 10610 * trying to optimize that case. 10611 */ 10612 len = MIN(entry->vme_end - start, end - start); 10613 10614 if ((vm_size_t) len != len) { 10615 /* 32-bit overflow */ 10616 len = (vm_size_t) (0 - PAGE_SIZE); 10617 } 10618 fault_info.cluster_size = (vm_size_t) len; 10619 fault_info.lo_offset = offset; 10620 fault_info.hi_offset = offset + len; 10621 fault_info.user_tag = entry->alias; 10622 10623 /* 10624 * If there's no read permission to this mapping, then just 10625 * skip it. 10626 */ 10627 if ((entry->protection & VM_PROT_READ) == 0) { 10628 entry = entry->vme_next; 10629 start = entry->vme_start; 10630 continue; 10631 } 10632 10633 /* 10634 * Find the file object backing this map entry. If there is 10635 * none, then we simply ignore the "will need" advice for this 10636 * entry and go on to the next one. 10637 */ 10638 if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) { 10639 entry = entry->vme_next; 10640 start = entry->vme_start; 10641 continue; 10642 } 10643 10644 /* 10645 * The data_request() could take a long time, so let's 10646 * release the map lock to avoid blocking other threads. 10647 */ 10648 vm_map_unlock_read(map); 10649 10650 vm_object_paging_begin(object); 10651 pager = object->pager; 10652 vm_object_unlock(object); 10653 10654 /* 10655 * Get the data from the object asynchronously. 10656 * 10657 * Note that memory_object_data_request() places limits on the 10658 * amount of I/O it will do. Regardless of the len we 10659 * specified, it won't do more than MAX_UPL_TRANSFER and it 10660 * silently truncates the len to that size. This isn't 10661 * necessarily bad since madvise shouldn't really be used to 10662 * page in unlimited amounts of data. Other Unix variants 10663 * limit the willneed case as well. If this turns out to be an 10664 * issue for developers, then we can always adjust the policy 10665 * here and still be backwards compatible since this is all 10666 * just "advice". 10667 */ 10668 kr = memory_object_data_request( 10669 pager, 10670 offset + object->paging_offset, 10671 0, /* ignored */ 10672 VM_PROT_READ, 10673 (memory_object_fault_info_t)&fault_info); 10674 10675 vm_object_lock(object); 10676 vm_object_paging_end(object); 10677 vm_object_unlock(object); 10678 10679 /* 10680 * If we couldn't do the I/O for some reason, just give up on 10681 * the madvise. We still return success to the user since 10682 * madvise isn't supposed to fail when the advice can't be 10683 * taken. 10684 */ 10685 if (kr != KERN_SUCCESS) { 10686 return KERN_SUCCESS; 10687 } 10688 10689 start += len; 10690 if (start >= end) { 10691 /* done */ 10692 return KERN_SUCCESS; 10693 } 10694 10695 /* look up next entry */ 10696 vm_map_lock_read(map); 10697 if (! vm_map_lookup_entry(map, start, &entry)) { 10698 /* 10699 * There's a new hole in the address range. 10700 */ 10701 vm_map_unlock_read(map); 10702 return KERN_INVALID_ADDRESS; 10703 } 10704 } 10705 10706 vm_map_unlock_read(map); 10707 return KERN_SUCCESS; 10708} 10709 10710static boolean_t 10711vm_map_entry_is_reusable( 10712 vm_map_entry_t entry) 10713{ 10714 vm_object_t object; 10715 10716 if (entry->is_shared || 10717 entry->is_sub_map || 10718 entry->in_transition || 10719 entry->protection != VM_PROT_DEFAULT || 10720 entry->max_protection != VM_PROT_ALL || 10721 entry->inheritance != VM_INHERIT_DEFAULT || 10722 entry->no_cache || 10723 entry->permanent || 10724 entry->superpage_size != 0 || 10725 entry->zero_wired_pages || 10726 entry->wired_count != 0 || 10727 entry->user_wired_count != 0) { 10728 return FALSE; 10729 } 10730 10731 object = entry->object.vm_object; 10732 if (object == VM_OBJECT_NULL) { 10733 return TRUE; 10734 } 10735 if ( 10736#if 0 10737 /* 10738 * Let's proceed even if the VM object is potentially 10739 * shared. 10740 * We check for this later when processing the actual 10741 * VM pages, so the contents will be safe if shared. 10742 * 10743 * But we can still mark this memory region as "reusable" to 10744 * acknowledge that the caller did let us know that the memory 10745 * could be re-used and should not be penalized for holding 10746 * on to it. This allows its "resident size" to not include 10747 * the reusable range. 10748 */ 10749 object->ref_count == 1 && 10750#endif 10751 object->wired_page_count == 0 && 10752 object->copy == VM_OBJECT_NULL && 10753 object->shadow == VM_OBJECT_NULL && 10754 object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && 10755 object->internal && 10756 !object->true_share && 10757 object->wimg_bits == VM_WIMG_USE_DEFAULT && 10758 !object->code_signed) { 10759 return TRUE; 10760 } 10761 return FALSE; 10762 10763 10764} 10765 10766static kern_return_t 10767vm_map_reuse_pages( 10768 vm_map_t map, 10769 vm_map_offset_t start, 10770 vm_map_offset_t end) 10771{ 10772 vm_map_entry_t entry; 10773 vm_object_t object; 10774 vm_object_offset_t start_offset, end_offset; 10775 10776 /* 10777 * The MADV_REUSE operation doesn't require any changes to the 10778 * vm_map_entry_t's, so the read lock is sufficient. 10779 */ 10780 10781 vm_map_lock_read(map); 10782 10783 /* 10784 * The madvise semantics require that the address range be fully 10785 * allocated with no holes. Otherwise, we're required to return 10786 * an error. 10787 */ 10788 10789 if (!vm_map_range_check(map, start, end, &entry)) { 10790 vm_map_unlock_read(map); 10791 vm_page_stats_reusable.reuse_pages_failure++; 10792 return KERN_INVALID_ADDRESS; 10793 } 10794 10795 /* 10796 * Examine each vm_map_entry_t in the range. 10797 */ 10798 for (; entry != vm_map_to_entry(map) && entry->vme_start < end; 10799 entry = entry->vme_next) { 10800 /* 10801 * Sanity check on the VM map entry. 10802 */ 10803 if (! vm_map_entry_is_reusable(entry)) { 10804 vm_map_unlock_read(map); 10805 vm_page_stats_reusable.reuse_pages_failure++; 10806 return KERN_INVALID_ADDRESS; 10807 } 10808 10809 /* 10810 * The first time through, the start address could be anywhere 10811 * within the vm_map_entry we found. So adjust the offset to 10812 * correspond. 10813 */ 10814 if (entry->vme_start < start) { 10815 start_offset = start - entry->vme_start; 10816 } else { 10817 start_offset = 0; 10818 } 10819 end_offset = MIN(end, entry->vme_end) - entry->vme_start; 10820 start_offset += entry->offset; 10821 end_offset += entry->offset; 10822 10823 object = entry->object.vm_object; 10824 if (object != VM_OBJECT_NULL) { 10825 vm_object_lock(object); 10826 vm_object_reuse_pages(object, start_offset, end_offset, 10827 TRUE); 10828 vm_object_unlock(object); 10829 } 10830 10831 if (entry->alias == VM_MEMORY_MALLOC_LARGE_REUSABLE) { 10832 /* 10833 * XXX 10834 * We do not hold the VM map exclusively here. 10835 * The "alias" field is not that critical, so it's 10836 * safe to update it here, as long as it is the only 10837 * one that can be modified while holding the VM map 10838 * "shared". 10839 */ 10840 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSED; 10841 } 10842 } 10843 10844 vm_map_unlock_read(map); 10845 vm_page_stats_reusable.reuse_pages_success++; 10846 return KERN_SUCCESS; 10847} 10848 10849 10850static kern_return_t 10851vm_map_reusable_pages( 10852 vm_map_t map, 10853 vm_map_offset_t start, 10854 vm_map_offset_t end) 10855{ 10856 vm_map_entry_t entry; 10857 vm_object_t object; 10858 vm_object_offset_t start_offset, end_offset; 10859 10860 /* 10861 * The MADV_REUSABLE operation doesn't require any changes to the 10862 * vm_map_entry_t's, so the read lock is sufficient. 10863 */ 10864 10865 vm_map_lock_read(map); 10866 10867 /* 10868 * The madvise semantics require that the address range be fully 10869 * allocated with no holes. Otherwise, we're required to return 10870 * an error. 10871 */ 10872 10873 if (!vm_map_range_check(map, start, end, &entry)) { 10874 vm_map_unlock_read(map); 10875 vm_page_stats_reusable.reusable_pages_failure++; 10876 return KERN_INVALID_ADDRESS; 10877 } 10878 10879 /* 10880 * Examine each vm_map_entry_t in the range. 10881 */ 10882 for (; entry != vm_map_to_entry(map) && entry->vme_start < end; 10883 entry = entry->vme_next) { 10884 int kill_pages = 0; 10885 10886 /* 10887 * Sanity check on the VM map entry. 10888 */ 10889 if (! vm_map_entry_is_reusable(entry)) { 10890 vm_map_unlock_read(map); 10891 vm_page_stats_reusable.reusable_pages_failure++; 10892 return KERN_INVALID_ADDRESS; 10893 } 10894 10895 /* 10896 * The first time through, the start address could be anywhere 10897 * within the vm_map_entry we found. So adjust the offset to 10898 * correspond. 10899 */ 10900 if (entry->vme_start < start) { 10901 start_offset = start - entry->vme_start; 10902 } else { 10903 start_offset = 0; 10904 } 10905 end_offset = MIN(end, entry->vme_end) - entry->vme_start; 10906 start_offset += entry->offset; 10907 end_offset += entry->offset; 10908 10909 object = entry->object.vm_object; 10910 if (object == VM_OBJECT_NULL) 10911 continue; 10912 10913 10914 vm_object_lock(object); 10915 if (object->ref_count == 1 && !object->shadow) 10916 kill_pages = 1; 10917 else 10918 kill_pages = -1; 10919 if (kill_pages != -1) { 10920 vm_object_deactivate_pages(object, 10921 start_offset, 10922 end_offset - start_offset, 10923 kill_pages, 10924 TRUE /*reusable_pages*/); 10925 } else { 10926 vm_page_stats_reusable.reusable_pages_shared++; 10927 } 10928 vm_object_unlock(object); 10929 10930 if (entry->alias == VM_MEMORY_MALLOC_LARGE || 10931 entry->alias == VM_MEMORY_MALLOC_LARGE_REUSED) { 10932 /* 10933 * XXX 10934 * We do not hold the VM map exclusively here. 10935 * The "alias" field is not that critical, so it's 10936 * safe to update it here, as long as it is the only 10937 * one that can be modified while holding the VM map 10938 * "shared". 10939 */ 10940 entry->alias = VM_MEMORY_MALLOC_LARGE_REUSABLE; 10941 } 10942 } 10943 10944 vm_map_unlock_read(map); 10945 vm_page_stats_reusable.reusable_pages_success++; 10946 return KERN_SUCCESS; 10947} 10948 10949 10950static kern_return_t 10951vm_map_can_reuse( 10952 vm_map_t map, 10953 vm_map_offset_t start, 10954 vm_map_offset_t end) 10955{ 10956 vm_map_entry_t entry; 10957 10958 /* 10959 * The MADV_REUSABLE operation doesn't require any changes to the 10960 * vm_map_entry_t's, so the read lock is sufficient. 10961 */ 10962 10963 vm_map_lock_read(map); 10964 10965 /* 10966 * The madvise semantics require that the address range be fully 10967 * allocated with no holes. Otherwise, we're required to return 10968 * an error. 10969 */ 10970 10971 if (!vm_map_range_check(map, start, end, &entry)) { 10972 vm_map_unlock_read(map); 10973 vm_page_stats_reusable.can_reuse_failure++; 10974 return KERN_INVALID_ADDRESS; 10975 } 10976 10977 /* 10978 * Examine each vm_map_entry_t in the range. 10979 */ 10980 for (; entry != vm_map_to_entry(map) && entry->vme_start < end; 10981 entry = entry->vme_next) { 10982 /* 10983 * Sanity check on the VM map entry. 10984 */ 10985 if (! vm_map_entry_is_reusable(entry)) { 10986 vm_map_unlock_read(map); 10987 vm_page_stats_reusable.can_reuse_failure++; 10988 return KERN_INVALID_ADDRESS; 10989 } 10990 } 10991 10992 vm_map_unlock_read(map); 10993 vm_page_stats_reusable.can_reuse_success++; 10994 return KERN_SUCCESS; 10995} 10996 10997 10998/* 10999 * Routine: vm_map_entry_insert 11000 * 11001 * Descritpion: This routine inserts a new vm_entry in a locked map. 11002 */ 11003vm_map_entry_t 11004vm_map_entry_insert( 11005 vm_map_t map, 11006 vm_map_entry_t insp_entry, 11007 vm_map_offset_t start, 11008 vm_map_offset_t end, 11009 vm_object_t object, 11010 vm_object_offset_t offset, 11011 boolean_t needs_copy, 11012 boolean_t is_shared, 11013 boolean_t in_transition, 11014 vm_prot_t cur_protection, 11015 vm_prot_t max_protection, 11016 vm_behavior_t behavior, 11017 vm_inherit_t inheritance, 11018 unsigned wired_count, 11019 boolean_t no_cache, 11020 boolean_t permanent, 11021 unsigned int superpage_size) 11022{ 11023 vm_map_entry_t new_entry; 11024 11025 assert(insp_entry != (vm_map_entry_t)0); 11026 11027 new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable); 11028 11029 new_entry->vme_start = start; 11030 new_entry->vme_end = end; 11031 assert(page_aligned(new_entry->vme_start)); 11032 assert(page_aligned(new_entry->vme_end)); 11033 assert(new_entry->vme_start < new_entry->vme_end); 11034 11035 new_entry->object.vm_object = object; 11036 new_entry->offset = offset; 11037 new_entry->is_shared = is_shared; 11038 new_entry->is_sub_map = FALSE; 11039 new_entry->needs_copy = needs_copy; 11040 new_entry->in_transition = in_transition; 11041 new_entry->needs_wakeup = FALSE; 11042 new_entry->inheritance = inheritance; 11043 new_entry->protection = cur_protection; 11044 new_entry->max_protection = max_protection; 11045 new_entry->behavior = behavior; 11046 new_entry->wired_count = wired_count; 11047 new_entry->user_wired_count = 0; 11048 new_entry->use_pmap = FALSE; 11049 new_entry->alias = 0; 11050 new_entry->zero_wired_pages = FALSE; 11051 new_entry->no_cache = no_cache; 11052 new_entry->permanent = permanent; 11053 new_entry->superpage_size = superpage_size; 11054 new_entry->used_for_jit = FALSE; 11055 11056 /* 11057 * Insert the new entry into the list. 11058 */ 11059 11060 vm_map_store_entry_link(map, insp_entry, new_entry); 11061 map->size += end - start; 11062 11063 /* 11064 * Update the free space hint and the lookup hint. 11065 */ 11066 11067 SAVE_HINT_MAP_WRITE(map, new_entry); 11068 return new_entry; 11069} 11070 11071/* 11072 * Routine: vm_map_remap_extract 11073 * 11074 * Descritpion: This routine returns a vm_entry list from a map. 11075 */ 11076static kern_return_t 11077vm_map_remap_extract( 11078 vm_map_t map, 11079 vm_map_offset_t addr, 11080 vm_map_size_t size, 11081 boolean_t copy, 11082 struct vm_map_header *map_header, 11083 vm_prot_t *cur_protection, 11084 vm_prot_t *max_protection, 11085 /* What, no behavior? */ 11086 vm_inherit_t inheritance, 11087 boolean_t pageable) 11088{ 11089 kern_return_t result; 11090 vm_map_size_t mapped_size; 11091 vm_map_size_t tmp_size; 11092 vm_map_entry_t src_entry; /* result of last map lookup */ 11093 vm_map_entry_t new_entry; 11094 vm_object_offset_t offset; 11095 vm_map_offset_t map_address; 11096 vm_map_offset_t src_start; /* start of entry to map */ 11097 vm_map_offset_t src_end; /* end of region to be mapped */ 11098 vm_object_t object; 11099 vm_map_version_t version; 11100 boolean_t src_needs_copy; 11101 boolean_t new_entry_needs_copy; 11102 11103 assert(map != VM_MAP_NULL); 11104 assert(size != 0 && size == vm_map_round_page(size)); 11105 assert(inheritance == VM_INHERIT_NONE || 11106 inheritance == VM_INHERIT_COPY || 11107 inheritance == VM_INHERIT_SHARE); 11108 11109 /* 11110 * Compute start and end of region. 11111 */ 11112 src_start = vm_map_trunc_page(addr); 11113 src_end = vm_map_round_page(src_start + size); 11114 11115 /* 11116 * Initialize map_header. 11117 */ 11118 map_header->links.next = (struct vm_map_entry *)&map_header->links; 11119 map_header->links.prev = (struct vm_map_entry *)&map_header->links; 11120 map_header->nentries = 0; 11121 map_header->entries_pageable = pageable; 11122 11123 vm_map_store_init( map_header ); 11124 11125 *cur_protection = VM_PROT_ALL; 11126 *max_protection = VM_PROT_ALL; 11127 11128 map_address = 0; 11129 mapped_size = 0; 11130 result = KERN_SUCCESS; 11131 11132 /* 11133 * The specified source virtual space might correspond to 11134 * multiple map entries, need to loop on them. 11135 */ 11136 vm_map_lock(map); 11137 while (mapped_size != size) { 11138 vm_map_size_t entry_size; 11139 11140 /* 11141 * Find the beginning of the region. 11142 */ 11143 if (! vm_map_lookup_entry(map, src_start, &src_entry)) { 11144 result = KERN_INVALID_ADDRESS; 11145 break; 11146 } 11147 11148 if (src_start < src_entry->vme_start || 11149 (mapped_size && src_start != src_entry->vme_start)) { 11150 result = KERN_INVALID_ADDRESS; 11151 break; 11152 } 11153 11154 tmp_size = size - mapped_size; 11155 if (src_end > src_entry->vme_end) 11156 tmp_size -= (src_end - src_entry->vme_end); 11157 11158 entry_size = (vm_map_size_t)(src_entry->vme_end - 11159 src_entry->vme_start); 11160 11161 if(src_entry->is_sub_map) { 11162 vm_map_reference(src_entry->object.sub_map); 11163 object = VM_OBJECT_NULL; 11164 } else { 11165 object = src_entry->object.vm_object; 11166 11167 if (object == VM_OBJECT_NULL) { 11168 object = vm_object_allocate(entry_size); 11169 src_entry->offset = 0; 11170 src_entry->object.vm_object = object; 11171 } else if (object->copy_strategy != 11172 MEMORY_OBJECT_COPY_SYMMETRIC) { 11173 /* 11174 * We are already using an asymmetric 11175 * copy, and therefore we already have 11176 * the right object. 11177 */ 11178 assert(!src_entry->needs_copy); 11179 } else if (src_entry->needs_copy || object->shadowed || 11180 (object->internal && !object->true_share && 11181 !src_entry->is_shared && 11182 object->vo_size > entry_size)) { 11183 11184 vm_object_shadow(&src_entry->object.vm_object, 11185 &src_entry->offset, 11186 entry_size); 11187 11188 if (!src_entry->needs_copy && 11189 (src_entry->protection & VM_PROT_WRITE)) { 11190 vm_prot_t prot; 11191 11192 prot = src_entry->protection & ~VM_PROT_WRITE; 11193 11194 if (override_nx(map, src_entry->alias) && prot) 11195 prot |= VM_PROT_EXECUTE; 11196 11197 if(map->mapped_in_other_pmaps) { 11198 vm_object_pmap_protect( 11199 src_entry->object.vm_object, 11200 src_entry->offset, 11201 entry_size, 11202 PMAP_NULL, 11203 src_entry->vme_start, 11204 prot); 11205 } else { 11206 pmap_protect(vm_map_pmap(map), 11207 src_entry->vme_start, 11208 src_entry->vme_end, 11209 prot); 11210 } 11211 } 11212 11213 object = src_entry->object.vm_object; 11214 src_entry->needs_copy = FALSE; 11215 } 11216 11217 11218 vm_object_lock(object); 11219 vm_object_reference_locked(object); /* object ref. for new entry */ 11220 if (object->copy_strategy == 11221 MEMORY_OBJECT_COPY_SYMMETRIC) { 11222 object->copy_strategy = 11223 MEMORY_OBJECT_COPY_DELAY; 11224 } 11225 vm_object_unlock(object); 11226 } 11227 11228 offset = src_entry->offset + (src_start - src_entry->vme_start); 11229 11230 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable); 11231 vm_map_entry_copy(new_entry, src_entry); 11232 new_entry->use_pmap = FALSE; /* clr address space specifics */ 11233 11234 new_entry->vme_start = map_address; 11235 new_entry->vme_end = map_address + tmp_size; 11236 assert(new_entry->vme_start < new_entry->vme_end); 11237 new_entry->inheritance = inheritance; 11238 new_entry->offset = offset; 11239 11240 /* 11241 * The new region has to be copied now if required. 11242 */ 11243 RestartCopy: 11244 if (!copy) { 11245 /* 11246 * Cannot allow an entry describing a JIT 11247 * region to be shared across address spaces. 11248 */ 11249 if (src_entry->used_for_jit == TRUE) { 11250 result = KERN_INVALID_ARGUMENT; 11251 break; 11252 } 11253 src_entry->is_shared = TRUE; 11254 new_entry->is_shared = TRUE; 11255 if (!(new_entry->is_sub_map)) 11256 new_entry->needs_copy = FALSE; 11257 11258 } else if (src_entry->is_sub_map) { 11259 /* make this a COW sub_map if not already */ 11260 new_entry->needs_copy = TRUE; 11261 object = VM_OBJECT_NULL; 11262 } else if (src_entry->wired_count == 0 && 11263 vm_object_copy_quickly(&new_entry->object.vm_object, 11264 new_entry->offset, 11265 (new_entry->vme_end - 11266 new_entry->vme_start), 11267 &src_needs_copy, 11268 &new_entry_needs_copy)) { 11269 11270 new_entry->needs_copy = new_entry_needs_copy; 11271 new_entry->is_shared = FALSE; 11272 11273 /* 11274 * Handle copy_on_write semantics. 11275 */ 11276 if (src_needs_copy && !src_entry->needs_copy) { 11277 vm_prot_t prot; 11278 11279 prot = src_entry->protection & ~VM_PROT_WRITE; 11280 11281 if (override_nx(map, src_entry->alias) && prot) 11282 prot |= VM_PROT_EXECUTE; 11283 11284 vm_object_pmap_protect(object, 11285 offset, 11286 entry_size, 11287 ((src_entry->is_shared 11288 || map->mapped_in_other_pmaps) ? 11289 PMAP_NULL : map->pmap), 11290 src_entry->vme_start, 11291 prot); 11292 11293 src_entry->needs_copy = TRUE; 11294 } 11295 /* 11296 * Throw away the old object reference of the new entry. 11297 */ 11298 vm_object_deallocate(object); 11299 11300 } else { 11301 new_entry->is_shared = FALSE; 11302 11303 /* 11304 * The map can be safely unlocked since we 11305 * already hold a reference on the object. 11306 * 11307 * Record the timestamp of the map for later 11308 * verification, and unlock the map. 11309 */ 11310 version.main_timestamp = map->timestamp; 11311 vm_map_unlock(map); /* Increments timestamp once! */ 11312 11313 /* 11314 * Perform the copy. 11315 */ 11316 if (src_entry->wired_count > 0) { 11317 vm_object_lock(object); 11318 result = vm_object_copy_slowly( 11319 object, 11320 offset, 11321 entry_size, 11322 THREAD_UNINT, 11323 &new_entry->object.vm_object); 11324 11325 new_entry->offset = 0; 11326 new_entry->needs_copy = FALSE; 11327 } else { 11328 result = vm_object_copy_strategically( 11329 object, 11330 offset, 11331 entry_size, 11332 &new_entry->object.vm_object, 11333 &new_entry->offset, 11334 &new_entry_needs_copy); 11335 11336 new_entry->needs_copy = new_entry_needs_copy; 11337 } 11338 11339 /* 11340 * Throw away the old object reference of the new entry. 11341 */ 11342 vm_object_deallocate(object); 11343 11344 if (result != KERN_SUCCESS && 11345 result != KERN_MEMORY_RESTART_COPY) { 11346 _vm_map_entry_dispose(map_header, new_entry); 11347 break; 11348 } 11349 11350 /* 11351 * Verify that the map has not substantially 11352 * changed while the copy was being made. 11353 */ 11354 11355 vm_map_lock(map); 11356 if (version.main_timestamp + 1 != map->timestamp) { 11357 /* 11358 * Simple version comparison failed. 11359 * 11360 * Retry the lookup and verify that the 11361 * same object/offset are still present. 11362 */ 11363 vm_object_deallocate(new_entry-> 11364 object.vm_object); 11365 _vm_map_entry_dispose(map_header, new_entry); 11366 if (result == KERN_MEMORY_RESTART_COPY) 11367 result = KERN_SUCCESS; 11368 continue; 11369 } 11370 11371 if (result == KERN_MEMORY_RESTART_COPY) { 11372 vm_object_reference(object); 11373 goto RestartCopy; 11374 } 11375 } 11376 11377 _vm_map_store_entry_link(map_header, 11378 map_header->links.prev, new_entry); 11379 11380 /*Protections for submap mapping are irrelevant here*/ 11381 if( !src_entry->is_sub_map ) { 11382 *cur_protection &= src_entry->protection; 11383 *max_protection &= src_entry->max_protection; 11384 } 11385 map_address += tmp_size; 11386 mapped_size += tmp_size; 11387 src_start += tmp_size; 11388 11389 } /* end while */ 11390 11391 vm_map_unlock(map); 11392 if (result != KERN_SUCCESS) { 11393 /* 11394 * Free all allocated elements. 11395 */ 11396 for (src_entry = map_header->links.next; 11397 src_entry != (struct vm_map_entry *)&map_header->links; 11398 src_entry = new_entry) { 11399 new_entry = src_entry->vme_next; 11400 _vm_map_store_entry_unlink(map_header, src_entry); 11401 vm_object_deallocate(src_entry->object.vm_object); 11402 _vm_map_entry_dispose(map_header, src_entry); 11403 } 11404 } 11405 return result; 11406} 11407 11408/* 11409 * Routine: vm_remap 11410 * 11411 * Map portion of a task's address space. 11412 * Mapped region must not overlap more than 11413 * one vm memory object. Protections and 11414 * inheritance attributes remain the same 11415 * as in the original task and are out parameters. 11416 * Source and Target task can be identical 11417 * Other attributes are identical as for vm_map() 11418 */ 11419kern_return_t 11420vm_map_remap( 11421 vm_map_t target_map, 11422 vm_map_address_t *address, 11423 vm_map_size_t size, 11424 vm_map_offset_t mask, 11425 int flags, 11426 vm_map_t src_map, 11427 vm_map_offset_t memory_address, 11428 boolean_t copy, 11429 vm_prot_t *cur_protection, 11430 vm_prot_t *max_protection, 11431 vm_inherit_t inheritance) 11432{ 11433 kern_return_t result; 11434 vm_map_entry_t entry; 11435 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL; 11436 vm_map_entry_t new_entry; 11437 struct vm_map_header map_header; 11438 11439 if (target_map == VM_MAP_NULL) 11440 return KERN_INVALID_ARGUMENT; 11441 11442 switch (inheritance) { 11443 case VM_INHERIT_NONE: 11444 case VM_INHERIT_COPY: 11445 case VM_INHERIT_SHARE: 11446 if (size != 0 && src_map != VM_MAP_NULL) 11447 break; 11448 /*FALL THRU*/ 11449 default: 11450 return KERN_INVALID_ARGUMENT; 11451 } 11452 11453 size = vm_map_round_page(size); 11454 11455 result = vm_map_remap_extract(src_map, memory_address, 11456 size, copy, &map_header, 11457 cur_protection, 11458 max_protection, 11459 inheritance, 11460 target_map->hdr. 11461 entries_pageable); 11462 11463 if (result != KERN_SUCCESS) { 11464 return result; 11465 } 11466 11467 /* 11468 * Allocate/check a range of free virtual address 11469 * space for the target 11470 */ 11471 *address = vm_map_trunc_page(*address); 11472 vm_map_lock(target_map); 11473 result = vm_map_remap_range_allocate(target_map, address, size, 11474 mask, flags, &insp_entry); 11475 11476 for (entry = map_header.links.next; 11477 entry != (struct vm_map_entry *)&map_header.links; 11478 entry = new_entry) { 11479 new_entry = entry->vme_next; 11480 _vm_map_store_entry_unlink(&map_header, entry); 11481 if (result == KERN_SUCCESS) { 11482 entry->vme_start += *address; 11483 entry->vme_end += *address; 11484 vm_map_store_entry_link(target_map, insp_entry, entry); 11485 insp_entry = entry; 11486 } else { 11487 if (!entry->is_sub_map) { 11488 vm_object_deallocate(entry->object.vm_object); 11489 } else { 11490 vm_map_deallocate(entry->object.sub_map); 11491 } 11492 _vm_map_entry_dispose(&map_header, entry); 11493 } 11494 } 11495 11496 if( target_map->disable_vmentry_reuse == TRUE) { 11497 if( target_map->highest_entry_end < insp_entry->vme_end ){ 11498 target_map->highest_entry_end = insp_entry->vme_end; 11499 } 11500 } 11501 11502 if (result == KERN_SUCCESS) { 11503 target_map->size += size; 11504 SAVE_HINT_MAP_WRITE(target_map, insp_entry); 11505 } 11506 vm_map_unlock(target_map); 11507 11508 if (result == KERN_SUCCESS && target_map->wiring_required) 11509 result = vm_map_wire(target_map, *address, 11510 *address + size, *cur_protection, TRUE); 11511 return result; 11512} 11513 11514/* 11515 * Routine: vm_map_remap_range_allocate 11516 * 11517 * Description: 11518 * Allocate a range in the specified virtual address map. 11519 * returns the address and the map entry just before the allocated 11520 * range 11521 * 11522 * Map must be locked. 11523 */ 11524 11525static kern_return_t 11526vm_map_remap_range_allocate( 11527 vm_map_t map, 11528 vm_map_address_t *address, /* IN/OUT */ 11529 vm_map_size_t size, 11530 vm_map_offset_t mask, 11531 int flags, 11532 vm_map_entry_t *map_entry) /* OUT */ 11533{ 11534 vm_map_entry_t entry; 11535 vm_map_offset_t start; 11536 vm_map_offset_t end; 11537 kern_return_t kr; 11538 11539StartAgain: ; 11540 11541 start = *address; 11542 11543 if (flags & VM_FLAGS_ANYWHERE) 11544 { 11545 /* 11546 * Calculate the first possible address. 11547 */ 11548 11549 if (start < map->min_offset) 11550 start = map->min_offset; 11551 if (start > map->max_offset) 11552 return(KERN_NO_SPACE); 11553 11554 /* 11555 * Look for the first possible address; 11556 * if there's already something at this 11557 * address, we have to start after it. 11558 */ 11559 11560 if( map->disable_vmentry_reuse == TRUE) { 11561 VM_MAP_HIGHEST_ENTRY(map, entry, start); 11562 } else { 11563 assert(first_free_is_valid(map)); 11564 if (start == map->min_offset) { 11565 if ((entry = map->first_free) != vm_map_to_entry(map)) 11566 start = entry->vme_end; 11567 } else { 11568 vm_map_entry_t tmp_entry; 11569 if (vm_map_lookup_entry(map, start, &tmp_entry)) 11570 start = tmp_entry->vme_end; 11571 entry = tmp_entry; 11572 } 11573 } 11574 11575 /* 11576 * In any case, the "entry" always precedes 11577 * the proposed new region throughout the 11578 * loop: 11579 */ 11580 11581 while (TRUE) { 11582 register vm_map_entry_t next; 11583 11584 /* 11585 * Find the end of the proposed new region. 11586 * Be sure we didn't go beyond the end, or 11587 * wrap around the address. 11588 */ 11589 11590 end = ((start + mask) & ~mask); 11591 if (end < start) 11592 return(KERN_NO_SPACE); 11593 start = end; 11594 end += size; 11595 11596 if ((end > map->max_offset) || (end < start)) { 11597 if (map->wait_for_space) { 11598 if (size <= (map->max_offset - 11599 map->min_offset)) { 11600 assert_wait((event_t) map, THREAD_INTERRUPTIBLE); 11601 vm_map_unlock(map); 11602 thread_block(THREAD_CONTINUE_NULL); 11603 vm_map_lock(map); 11604 goto StartAgain; 11605 } 11606 } 11607 11608 return(KERN_NO_SPACE); 11609 } 11610 11611 /* 11612 * If there are no more entries, we must win. 11613 */ 11614 11615 next = entry->vme_next; 11616 if (next == vm_map_to_entry(map)) 11617 break; 11618 11619 /* 11620 * If there is another entry, it must be 11621 * after the end of the potential new region. 11622 */ 11623 11624 if (next->vme_start >= end) 11625 break; 11626 11627 /* 11628 * Didn't fit -- move to the next entry. 11629 */ 11630 11631 entry = next; 11632 start = entry->vme_end; 11633 } 11634 *address = start; 11635 } else { 11636 vm_map_entry_t temp_entry; 11637 11638 /* 11639 * Verify that: 11640 * the address doesn't itself violate 11641 * the mask requirement. 11642 */ 11643 11644 if ((start & mask) != 0) 11645 return(KERN_NO_SPACE); 11646 11647 11648 /* 11649 * ... the address is within bounds 11650 */ 11651 11652 end = start + size; 11653 11654 if ((start < map->min_offset) || 11655 (end > map->max_offset) || 11656 (start >= end)) { 11657 return(KERN_INVALID_ADDRESS); 11658 } 11659 11660 /* 11661 * If we're asked to overwrite whatever was mapped in that 11662 * range, first deallocate that range. 11663 */ 11664 if (flags & VM_FLAGS_OVERWRITE) { 11665 vm_map_t zap_map; 11666 11667 /* 11668 * We use a "zap_map" to avoid having to unlock 11669 * the "map" in vm_map_delete(), which would compromise 11670 * the atomicity of the "deallocate" and then "remap" 11671 * combination. 11672 */ 11673 zap_map = vm_map_create(PMAP_NULL, 11674 start, 11675 end, 11676 map->hdr.entries_pageable); 11677 if (zap_map == VM_MAP_NULL) { 11678 return KERN_RESOURCE_SHORTAGE; 11679 } 11680 11681 kr = vm_map_delete(map, start, end, 11682 VM_MAP_REMOVE_SAVE_ENTRIES, 11683 zap_map); 11684 if (kr == KERN_SUCCESS) { 11685 vm_map_destroy(zap_map, 11686 VM_MAP_REMOVE_NO_PMAP_CLEANUP); 11687 zap_map = VM_MAP_NULL; 11688 } 11689 } 11690 11691 /* 11692 * ... the starting address isn't allocated 11693 */ 11694 11695 if (vm_map_lookup_entry(map, start, &temp_entry)) 11696 return(KERN_NO_SPACE); 11697 11698 entry = temp_entry; 11699 11700 /* 11701 * ... the next region doesn't overlap the 11702 * end point. 11703 */ 11704 11705 if ((entry->vme_next != vm_map_to_entry(map)) && 11706 (entry->vme_next->vme_start < end)) 11707 return(KERN_NO_SPACE); 11708 } 11709 *map_entry = entry; 11710 return(KERN_SUCCESS); 11711} 11712 11713/* 11714 * vm_map_switch: 11715 * 11716 * Set the address map for the current thread to the specified map 11717 */ 11718 11719vm_map_t 11720vm_map_switch( 11721 vm_map_t map) 11722{ 11723 int mycpu; 11724 thread_t thread = current_thread(); 11725 vm_map_t oldmap = thread->map; 11726 11727 mp_disable_preemption(); 11728 mycpu = cpu_number(); 11729 11730 /* 11731 * Deactivate the current map and activate the requested map 11732 */ 11733 PMAP_SWITCH_USER(thread, map, mycpu); 11734 11735 mp_enable_preemption(); 11736 return(oldmap); 11737} 11738 11739 11740/* 11741 * Routine: vm_map_write_user 11742 * 11743 * Description: 11744 * Copy out data from a kernel space into space in the 11745 * destination map. The space must already exist in the 11746 * destination map. 11747 * NOTE: This routine should only be called by threads 11748 * which can block on a page fault. i.e. kernel mode user 11749 * threads. 11750 * 11751 */ 11752kern_return_t 11753vm_map_write_user( 11754 vm_map_t map, 11755 void *src_p, 11756 vm_map_address_t dst_addr, 11757 vm_size_t size) 11758{ 11759 kern_return_t kr = KERN_SUCCESS; 11760 11761 if(current_map() == map) { 11762 if (copyout(src_p, dst_addr, size)) { 11763 kr = KERN_INVALID_ADDRESS; 11764 } 11765 } else { 11766 vm_map_t oldmap; 11767 11768 /* take on the identity of the target map while doing */ 11769 /* the transfer */ 11770 11771 vm_map_reference(map); 11772 oldmap = vm_map_switch(map); 11773 if (copyout(src_p, dst_addr, size)) { 11774 kr = KERN_INVALID_ADDRESS; 11775 } 11776 vm_map_switch(oldmap); 11777 vm_map_deallocate(map); 11778 } 11779 return kr; 11780} 11781 11782/* 11783 * Routine: vm_map_read_user 11784 * 11785 * Description: 11786 * Copy in data from a user space source map into the 11787 * kernel map. The space must already exist in the 11788 * kernel map. 11789 * NOTE: This routine should only be called by threads 11790 * which can block on a page fault. i.e. kernel mode user 11791 * threads. 11792 * 11793 */ 11794kern_return_t 11795vm_map_read_user( 11796 vm_map_t map, 11797 vm_map_address_t src_addr, 11798 void *dst_p, 11799 vm_size_t size) 11800{ 11801 kern_return_t kr = KERN_SUCCESS; 11802 11803 if(current_map() == map) { 11804 if (copyin(src_addr, dst_p, size)) { 11805 kr = KERN_INVALID_ADDRESS; 11806 } 11807 } else { 11808 vm_map_t oldmap; 11809 11810 /* take on the identity of the target map while doing */ 11811 /* the transfer */ 11812 11813 vm_map_reference(map); 11814 oldmap = vm_map_switch(map); 11815 if (copyin(src_addr, dst_p, size)) { 11816 kr = KERN_INVALID_ADDRESS; 11817 } 11818 vm_map_switch(oldmap); 11819 vm_map_deallocate(map); 11820 } 11821 return kr; 11822} 11823 11824 11825/* 11826 * vm_map_check_protection: 11827 * 11828 * Assert that the target map allows the specified 11829 * privilege on the entire address region given. 11830 * The entire region must be allocated. 11831 */ 11832boolean_t 11833vm_map_check_protection(vm_map_t map, vm_map_offset_t start, 11834 vm_map_offset_t end, vm_prot_t protection) 11835{ 11836 vm_map_entry_t entry; 11837 vm_map_entry_t tmp_entry; 11838 11839 vm_map_lock(map); 11840 11841 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) 11842 { 11843 vm_map_unlock(map); 11844 return (FALSE); 11845 } 11846 11847 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 11848 vm_map_unlock(map); 11849 return(FALSE); 11850 } 11851 11852 entry = tmp_entry; 11853 11854 while (start < end) { 11855 if (entry == vm_map_to_entry(map)) { 11856 vm_map_unlock(map); 11857 return(FALSE); 11858 } 11859 11860 /* 11861 * No holes allowed! 11862 */ 11863 11864 if (start < entry->vme_start) { 11865 vm_map_unlock(map); 11866 return(FALSE); 11867 } 11868 11869 /* 11870 * Check protection associated with entry. 11871 */ 11872 11873 if ((entry->protection & protection) != protection) { 11874 vm_map_unlock(map); 11875 return(FALSE); 11876 } 11877 11878 /* go to next entry */ 11879 11880 start = entry->vme_end; 11881 entry = entry->vme_next; 11882 } 11883 vm_map_unlock(map); 11884 return(TRUE); 11885} 11886 11887kern_return_t 11888vm_map_purgable_control( 11889 vm_map_t map, 11890 vm_map_offset_t address, 11891 vm_purgable_t control, 11892 int *state) 11893{ 11894 vm_map_entry_t entry; 11895 vm_object_t object; 11896 kern_return_t kr; 11897 11898 /* 11899 * Vet all the input parameters and current type and state of the 11900 * underlaying object. Return with an error if anything is amiss. 11901 */ 11902 if (map == VM_MAP_NULL) 11903 return(KERN_INVALID_ARGUMENT); 11904 11905 if (control != VM_PURGABLE_SET_STATE && 11906 control != VM_PURGABLE_GET_STATE && 11907 control != VM_PURGABLE_PURGE_ALL) 11908 return(KERN_INVALID_ARGUMENT); 11909 11910 if (control == VM_PURGABLE_PURGE_ALL) { 11911 vm_purgeable_object_purge_all(); 11912 return KERN_SUCCESS; 11913 } 11914 11915 if (control == VM_PURGABLE_SET_STATE && 11916 (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) || 11917 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) 11918 return(KERN_INVALID_ARGUMENT); 11919 11920 vm_map_lock_read(map); 11921 11922 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) { 11923 11924 /* 11925 * Must pass a valid non-submap address. 11926 */ 11927 vm_map_unlock_read(map); 11928 return(KERN_INVALID_ADDRESS); 11929 } 11930 11931 if ((entry->protection & VM_PROT_WRITE) == 0) { 11932 /* 11933 * Can't apply purgable controls to something you can't write. 11934 */ 11935 vm_map_unlock_read(map); 11936 return(KERN_PROTECTION_FAILURE); 11937 } 11938 11939 object = entry->object.vm_object; 11940 if (object == VM_OBJECT_NULL) { 11941 /* 11942 * Object must already be present or it can't be purgable. 11943 */ 11944 vm_map_unlock_read(map); 11945 return KERN_INVALID_ARGUMENT; 11946 } 11947 11948 vm_object_lock(object); 11949 11950 if (entry->offset != 0 || 11951 entry->vme_end - entry->vme_start != object->vo_size) { 11952 /* 11953 * Can only apply purgable controls to the whole (existing) 11954 * object at once. 11955 */ 11956 vm_map_unlock_read(map); 11957 vm_object_unlock(object); 11958 return KERN_INVALID_ARGUMENT; 11959 } 11960 11961 vm_map_unlock_read(map); 11962 11963 kr = vm_object_purgable_control(object, control, state); 11964 11965 vm_object_unlock(object); 11966 11967 return kr; 11968} 11969 11970kern_return_t 11971vm_map_page_query_internal( 11972 vm_map_t target_map, 11973 vm_map_offset_t offset, 11974 int *disposition, 11975 int *ref_count) 11976{ 11977 kern_return_t kr; 11978 vm_page_info_basic_data_t info; 11979 mach_msg_type_number_t count; 11980 11981 count = VM_PAGE_INFO_BASIC_COUNT; 11982 kr = vm_map_page_info(target_map, 11983 offset, 11984 VM_PAGE_INFO_BASIC, 11985 (vm_page_info_t) &info, 11986 &count); 11987 if (kr == KERN_SUCCESS) { 11988 *disposition = info.disposition; 11989 *ref_count = info.ref_count; 11990 } else { 11991 *disposition = 0; 11992 *ref_count = 0; 11993 } 11994 11995 return kr; 11996} 11997 11998kern_return_t 11999vm_map_page_info( 12000 vm_map_t map, 12001 vm_map_offset_t offset, 12002 vm_page_info_flavor_t flavor, 12003 vm_page_info_t info, 12004 mach_msg_type_number_t *count) 12005{ 12006 vm_map_entry_t map_entry; 12007 vm_object_t object; 12008 vm_page_t m; 12009 kern_return_t kr; 12010 kern_return_t retval = KERN_SUCCESS; 12011 boolean_t top_object; 12012 int disposition; 12013 int ref_count; 12014 vm_object_id_t object_id; 12015 vm_page_info_basic_t basic_info; 12016 int depth; 12017 vm_map_offset_t offset_in_page; 12018 12019 switch (flavor) { 12020 case VM_PAGE_INFO_BASIC: 12021 if (*count != VM_PAGE_INFO_BASIC_COUNT) { 12022 /* 12023 * The "vm_page_info_basic_data" structure was not 12024 * properly padded, so allow the size to be off by 12025 * one to maintain backwards binary compatibility... 12026 */ 12027 if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) 12028 return KERN_INVALID_ARGUMENT; 12029 } 12030 break; 12031 default: 12032 return KERN_INVALID_ARGUMENT; 12033 } 12034 12035 disposition = 0; 12036 ref_count = 0; 12037 object_id = 0; 12038 top_object = TRUE; 12039 depth = 0; 12040 12041 retval = KERN_SUCCESS; 12042 offset_in_page = offset & PAGE_MASK; 12043 offset = vm_map_trunc_page(offset); 12044 12045 vm_map_lock_read(map); 12046 12047 /* 12048 * First, find the map entry covering "offset", going down 12049 * submaps if necessary. 12050 */ 12051 for (;;) { 12052 if (!vm_map_lookup_entry(map, offset, &map_entry)) { 12053 vm_map_unlock_read(map); 12054 return KERN_INVALID_ADDRESS; 12055 } 12056 /* compute offset from this map entry's start */ 12057 offset -= map_entry->vme_start; 12058 /* compute offset into this map entry's object (or submap) */ 12059 offset += map_entry->offset; 12060 12061 if (map_entry->is_sub_map) { 12062 vm_map_t sub_map; 12063 12064 sub_map = map_entry->object.sub_map; 12065 vm_map_lock_read(sub_map); 12066 vm_map_unlock_read(map); 12067 12068 map = sub_map; 12069 12070 ref_count = MAX(ref_count, map->ref_count); 12071 continue; 12072 } 12073 break; 12074 } 12075 12076 object = map_entry->object.vm_object; 12077 if (object == VM_OBJECT_NULL) { 12078 /* no object -> no page */ 12079 vm_map_unlock_read(map); 12080 goto done; 12081 } 12082 12083 vm_object_lock(object); 12084 vm_map_unlock_read(map); 12085 12086 /* 12087 * Go down the VM object shadow chain until we find the page 12088 * we're looking for. 12089 */ 12090 for (;;) { 12091 ref_count = MAX(ref_count, object->ref_count); 12092 12093 m = vm_page_lookup(object, offset); 12094 12095 if (m != VM_PAGE_NULL) { 12096 disposition |= VM_PAGE_QUERY_PAGE_PRESENT; 12097 break; 12098 } else { 12099#if MACH_PAGEMAP 12100 if (object->existence_map) { 12101 if (vm_external_state_get(object->existence_map, 12102 offset) == 12103 VM_EXTERNAL_STATE_EXISTS) { 12104 /* 12105 * this page has been paged out 12106 */ 12107 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; 12108 break; 12109 } 12110 } else 12111#endif 12112 { 12113 if (object->internal && 12114 object->alive && 12115 !object->terminating && 12116 object->pager_ready) { 12117 12118 memory_object_t pager; 12119 12120 vm_object_paging_begin(object); 12121 pager = object->pager; 12122 vm_object_unlock(object); 12123 12124 /* 12125 * Ask the default pager if 12126 * it has this page. 12127 */ 12128 kr = memory_object_data_request( 12129 pager, 12130 offset + object->paging_offset, 12131 0, /* just poke the pager */ 12132 VM_PROT_READ, 12133 NULL); 12134 12135 vm_object_lock(object); 12136 vm_object_paging_end(object); 12137 12138 if (kr == KERN_SUCCESS) { 12139 /* the default pager has it */ 12140 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; 12141 break; 12142 } 12143 } 12144 } 12145 12146 if (object->shadow != VM_OBJECT_NULL) { 12147 vm_object_t shadow; 12148 12149 offset += object->vo_shadow_offset; 12150 shadow = object->shadow; 12151 12152 vm_object_lock(shadow); 12153 vm_object_unlock(object); 12154 12155 object = shadow; 12156 top_object = FALSE; 12157 depth++; 12158 } else { 12159// if (!object->internal) 12160// break; 12161// retval = KERN_FAILURE; 12162// goto done_with_object; 12163 break; 12164 } 12165 } 12166 } 12167 /* The ref_count is not strictly accurate, it measures the number */ 12168 /* of entities holding a ref on the object, they may not be mapping */ 12169 /* the object or may not be mapping the section holding the */ 12170 /* target page but its still a ball park number and though an over- */ 12171 /* count, it picks up the copy-on-write cases */ 12172 12173 /* We could also get a picture of page sharing from pmap_attributes */ 12174 /* but this would under count as only faulted-in mappings would */ 12175 /* show up. */ 12176 12177 if (top_object == TRUE && object->shadow) 12178 disposition |= VM_PAGE_QUERY_PAGE_COPIED; 12179 12180 if (! object->internal) 12181 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL; 12182 12183 if (m == VM_PAGE_NULL) 12184 goto done_with_object; 12185 12186 if (m->fictitious) { 12187 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; 12188 goto done_with_object; 12189 } 12190 if (m->dirty || pmap_is_modified(m->phys_page)) 12191 disposition |= VM_PAGE_QUERY_PAGE_DIRTY; 12192 12193 if (m->reference || pmap_is_referenced(m->phys_page)) 12194 disposition |= VM_PAGE_QUERY_PAGE_REF; 12195 12196 if (m->speculative) 12197 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE; 12198 12199 if (m->cs_validated) 12200 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED; 12201 if (m->cs_tainted) 12202 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED; 12203 12204done_with_object: 12205 vm_object_unlock(object); 12206done: 12207 12208 switch (flavor) { 12209 case VM_PAGE_INFO_BASIC: 12210 basic_info = (vm_page_info_basic_t) info; 12211 basic_info->disposition = disposition; 12212 basic_info->ref_count = ref_count; 12213 basic_info->object_id = (vm_object_id_t) (uintptr_t) object; 12214 basic_info->offset = 12215 (memory_object_offset_t) offset + offset_in_page; 12216 basic_info->depth = depth; 12217 break; 12218 } 12219 12220 return retval; 12221} 12222 12223/* 12224 * vm_map_msync 12225 * 12226 * Synchronises the memory range specified with its backing store 12227 * image by either flushing or cleaning the contents to the appropriate 12228 * memory manager engaging in a memory object synchronize dialog with 12229 * the manager. The client doesn't return until the manager issues 12230 * m_o_s_completed message. MIG Magically converts user task parameter 12231 * to the task's address map. 12232 * 12233 * interpretation of sync_flags 12234 * VM_SYNC_INVALIDATE - discard pages, only return precious 12235 * pages to manager. 12236 * 12237 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS) 12238 * - discard pages, write dirty or precious 12239 * pages back to memory manager. 12240 * 12241 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS 12242 * - write dirty or precious pages back to 12243 * the memory manager. 12244 * 12245 * VM_SYNC_CONTIGUOUS - does everything normally, but if there 12246 * is a hole in the region, and we would 12247 * have returned KERN_SUCCESS, return 12248 * KERN_INVALID_ADDRESS instead. 12249 * 12250 * NOTE 12251 * The memory object attributes have not yet been implemented, this 12252 * function will have to deal with the invalidate attribute 12253 * 12254 * RETURNS 12255 * KERN_INVALID_TASK Bad task parameter 12256 * KERN_INVALID_ARGUMENT both sync and async were specified. 12257 * KERN_SUCCESS The usual. 12258 * KERN_INVALID_ADDRESS There was a hole in the region. 12259 */ 12260 12261kern_return_t 12262vm_map_msync( 12263 vm_map_t map, 12264 vm_map_address_t address, 12265 vm_map_size_t size, 12266 vm_sync_t sync_flags) 12267{ 12268 msync_req_t msr; 12269 msync_req_t new_msr; 12270 queue_chain_t req_q; /* queue of requests for this msync */ 12271 vm_map_entry_t entry; 12272 vm_map_size_t amount_left; 12273 vm_object_offset_t offset; 12274 boolean_t do_sync_req; 12275 boolean_t had_hole = FALSE; 12276 memory_object_t pager; 12277 12278 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) && 12279 (sync_flags & VM_SYNC_SYNCHRONOUS)) 12280 return(KERN_INVALID_ARGUMENT); 12281 12282 /* 12283 * align address and size on page boundaries 12284 */ 12285 size = vm_map_round_page(address + size) - vm_map_trunc_page(address); 12286 address = vm_map_trunc_page(address); 12287 12288 if (map == VM_MAP_NULL) 12289 return(KERN_INVALID_TASK); 12290 12291 if (size == 0) 12292 return(KERN_SUCCESS); 12293 12294 queue_init(&req_q); 12295 amount_left = size; 12296 12297 while (amount_left > 0) { 12298 vm_object_size_t flush_size; 12299 vm_object_t object; 12300 12301 vm_map_lock(map); 12302 if (!vm_map_lookup_entry(map, 12303 vm_map_trunc_page(address), &entry)) { 12304 12305 vm_map_size_t skip; 12306 12307 /* 12308 * hole in the address map. 12309 */ 12310 had_hole = TRUE; 12311 12312 /* 12313 * Check for empty map. 12314 */ 12315 if (entry == vm_map_to_entry(map) && 12316 entry->vme_next == entry) { 12317 vm_map_unlock(map); 12318 break; 12319 } 12320 /* 12321 * Check that we don't wrap and that 12322 * we have at least one real map entry. 12323 */ 12324 if ((map->hdr.nentries == 0) || 12325 (entry->vme_next->vme_start < address)) { 12326 vm_map_unlock(map); 12327 break; 12328 } 12329 /* 12330 * Move up to the next entry if needed 12331 */ 12332 skip = (entry->vme_next->vme_start - address); 12333 if (skip >= amount_left) 12334 amount_left = 0; 12335 else 12336 amount_left -= skip; 12337 address = entry->vme_next->vme_start; 12338 vm_map_unlock(map); 12339 continue; 12340 } 12341 12342 offset = address - entry->vme_start; 12343 12344 /* 12345 * do we have more to flush than is contained in this 12346 * entry ? 12347 */ 12348 if (amount_left + entry->vme_start + offset > entry->vme_end) { 12349 flush_size = entry->vme_end - 12350 (entry->vme_start + offset); 12351 } else { 12352 flush_size = amount_left; 12353 } 12354 amount_left -= flush_size; 12355 address += flush_size; 12356 12357 if (entry->is_sub_map == TRUE) { 12358 vm_map_t local_map; 12359 vm_map_offset_t local_offset; 12360 12361 local_map = entry->object.sub_map; 12362 local_offset = entry->offset; 12363 vm_map_unlock(map); 12364 if (vm_map_msync( 12365 local_map, 12366 local_offset, 12367 flush_size, 12368 sync_flags) == KERN_INVALID_ADDRESS) { 12369 had_hole = TRUE; 12370 } 12371 continue; 12372 } 12373 object = entry->object.vm_object; 12374 12375 /* 12376 * We can't sync this object if the object has not been 12377 * created yet 12378 */ 12379 if (object == VM_OBJECT_NULL) { 12380 vm_map_unlock(map); 12381 continue; 12382 } 12383 offset += entry->offset; 12384 12385 vm_object_lock(object); 12386 12387 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) { 12388 int kill_pages = 0; 12389 boolean_t reusable_pages = FALSE; 12390 12391 if (sync_flags & VM_SYNC_KILLPAGES) { 12392 if (object->ref_count == 1 && !object->shadow) 12393 kill_pages = 1; 12394 else 12395 kill_pages = -1; 12396 } 12397 if (kill_pages != -1) 12398 vm_object_deactivate_pages(object, offset, 12399 (vm_object_size_t)flush_size, kill_pages, reusable_pages); 12400 vm_object_unlock(object); 12401 vm_map_unlock(map); 12402 continue; 12403 } 12404 /* 12405 * We can't sync this object if there isn't a pager. 12406 * Don't bother to sync internal objects, since there can't 12407 * be any "permanent" storage for these objects anyway. 12408 */ 12409 if ((object->pager == MEMORY_OBJECT_NULL) || 12410 (object->internal) || (object->private)) { 12411 vm_object_unlock(object); 12412 vm_map_unlock(map); 12413 continue; 12414 } 12415 /* 12416 * keep reference on the object until syncing is done 12417 */ 12418 vm_object_reference_locked(object); 12419 vm_object_unlock(object); 12420 12421 vm_map_unlock(map); 12422 12423 do_sync_req = vm_object_sync(object, 12424 offset, 12425 flush_size, 12426 sync_flags & VM_SYNC_INVALIDATE, 12427 ((sync_flags & VM_SYNC_SYNCHRONOUS) || 12428 (sync_flags & VM_SYNC_ASYNCHRONOUS)), 12429 sync_flags & VM_SYNC_SYNCHRONOUS); 12430 /* 12431 * only send a m_o_s if we returned pages or if the entry 12432 * is writable (ie dirty pages may have already been sent back) 12433 */ 12434 if (!do_sync_req) { 12435 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) { 12436 /* 12437 * clear out the clustering and read-ahead hints 12438 */ 12439 vm_object_lock(object); 12440 12441 object->pages_created = 0; 12442 object->pages_used = 0; 12443 object->sequential = 0; 12444 object->last_alloc = 0; 12445 12446 vm_object_unlock(object); 12447 } 12448 vm_object_deallocate(object); 12449 continue; 12450 } 12451 msync_req_alloc(new_msr); 12452 12453 vm_object_lock(object); 12454 offset += object->paging_offset; 12455 12456 new_msr->offset = offset; 12457 new_msr->length = flush_size; 12458 new_msr->object = object; 12459 new_msr->flag = VM_MSYNC_SYNCHRONIZING; 12460 re_iterate: 12461 12462 /* 12463 * We can't sync this object if there isn't a pager. The 12464 * pager can disappear anytime we're not holding the object 12465 * lock. So this has to be checked anytime we goto re_iterate. 12466 */ 12467 12468 pager = object->pager; 12469 12470 if (pager == MEMORY_OBJECT_NULL) { 12471 vm_object_unlock(object); 12472 vm_object_deallocate(object); 12473 continue; 12474 } 12475 12476 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) { 12477 /* 12478 * need to check for overlapping entry, if found, wait 12479 * on overlapping msr to be done, then reiterate 12480 */ 12481 msr_lock(msr); 12482 if (msr->flag == VM_MSYNC_SYNCHRONIZING && 12483 ((offset >= msr->offset && 12484 offset < (msr->offset + msr->length)) || 12485 (msr->offset >= offset && 12486 msr->offset < (offset + flush_size)))) 12487 { 12488 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE); 12489 msr_unlock(msr); 12490 vm_object_unlock(object); 12491 thread_block(THREAD_CONTINUE_NULL); 12492 vm_object_lock(object); 12493 goto re_iterate; 12494 } 12495 msr_unlock(msr); 12496 }/* queue_iterate */ 12497 12498 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q); 12499 12500 vm_object_paging_begin(object); 12501 vm_object_unlock(object); 12502 12503 queue_enter(&req_q, new_msr, msync_req_t, req_q); 12504 12505 (void) memory_object_synchronize( 12506 pager, 12507 offset, 12508 flush_size, 12509 sync_flags & ~VM_SYNC_CONTIGUOUS); 12510 12511 vm_object_lock(object); 12512 vm_object_paging_end(object); 12513 vm_object_unlock(object); 12514 }/* while */ 12515 12516 /* 12517 * wait for memory_object_sychronize_completed messages from pager(s) 12518 */ 12519 12520 while (!queue_empty(&req_q)) { 12521 msr = (msync_req_t)queue_first(&req_q); 12522 msr_lock(msr); 12523 while(msr->flag != VM_MSYNC_DONE) { 12524 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE); 12525 msr_unlock(msr); 12526 thread_block(THREAD_CONTINUE_NULL); 12527 msr_lock(msr); 12528 }/* while */ 12529 queue_remove(&req_q, msr, msync_req_t, req_q); 12530 msr_unlock(msr); 12531 vm_object_deallocate(msr->object); 12532 msync_req_free(msr); 12533 }/* queue_iterate */ 12534 12535 /* for proper msync() behaviour */ 12536 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) 12537 return(KERN_INVALID_ADDRESS); 12538 12539 return(KERN_SUCCESS); 12540}/* vm_msync */ 12541 12542/* 12543 * Routine: convert_port_entry_to_map 12544 * Purpose: 12545 * Convert from a port specifying an entry or a task 12546 * to a map. Doesn't consume the port ref; produces a map ref, 12547 * which may be null. Unlike convert_port_to_map, the 12548 * port may be task or a named entry backed. 12549 * Conditions: 12550 * Nothing locked. 12551 */ 12552 12553 12554vm_map_t 12555convert_port_entry_to_map( 12556 ipc_port_t port) 12557{ 12558 vm_map_t map; 12559 vm_named_entry_t named_entry; 12560 uint32_t try_failed_count = 0; 12561 12562 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { 12563 while(TRUE) { 12564 ip_lock(port); 12565 if(ip_active(port) && (ip_kotype(port) 12566 == IKOT_NAMED_ENTRY)) { 12567 named_entry = 12568 (vm_named_entry_t)port->ip_kobject; 12569 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { 12570 ip_unlock(port); 12571 12572 try_failed_count++; 12573 mutex_pause(try_failed_count); 12574 continue; 12575 } 12576 named_entry->ref_count++; 12577 lck_mtx_unlock(&(named_entry)->Lock); 12578 ip_unlock(port); 12579 if ((named_entry->is_sub_map) && 12580 (named_entry->protection 12581 & VM_PROT_WRITE)) { 12582 map = named_entry->backing.map; 12583 } else { 12584 mach_destroy_memory_entry(port); 12585 return VM_MAP_NULL; 12586 } 12587 vm_map_reference_swap(map); 12588 mach_destroy_memory_entry(port); 12589 break; 12590 } 12591 else 12592 return VM_MAP_NULL; 12593 } 12594 } 12595 else 12596 map = convert_port_to_map(port); 12597 12598 return map; 12599} 12600 12601/* 12602 * Routine: convert_port_entry_to_object 12603 * Purpose: 12604 * Convert from a port specifying a named entry to an 12605 * object. Doesn't consume the port ref; produces a map ref, 12606 * which may be null. 12607 * Conditions: 12608 * Nothing locked. 12609 */ 12610 12611 12612vm_object_t 12613convert_port_entry_to_object( 12614 ipc_port_t port) 12615{ 12616 vm_object_t object; 12617 vm_named_entry_t named_entry; 12618 uint32_t try_failed_count = 0; 12619 12620 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { 12621 while(TRUE) { 12622 ip_lock(port); 12623 if(ip_active(port) && (ip_kotype(port) 12624 == IKOT_NAMED_ENTRY)) { 12625 named_entry = 12626 (vm_named_entry_t)port->ip_kobject; 12627 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) { 12628 ip_unlock(port); 12629 12630 try_failed_count++; 12631 mutex_pause(try_failed_count); 12632 continue; 12633 } 12634 named_entry->ref_count++; 12635 lck_mtx_unlock(&(named_entry)->Lock); 12636 ip_unlock(port); 12637 if ((!named_entry->is_sub_map) && 12638 (!named_entry->is_pager) && 12639 (named_entry->protection 12640 & VM_PROT_WRITE)) { 12641 object = named_entry->backing.object; 12642 } else { 12643 mach_destroy_memory_entry(port); 12644 return (vm_object_t)NULL; 12645 } 12646 vm_object_reference(named_entry->backing.object); 12647 mach_destroy_memory_entry(port); 12648 break; 12649 } 12650 else 12651 return (vm_object_t)NULL; 12652 } 12653 } else { 12654 return (vm_object_t)NULL; 12655 } 12656 12657 return object; 12658} 12659 12660/* 12661 * Export routines to other components for the things we access locally through 12662 * macros. 12663 */ 12664#undef current_map 12665vm_map_t 12666current_map(void) 12667{ 12668 return (current_map_fast()); 12669} 12670 12671/* 12672 * vm_map_reference: 12673 * 12674 * Most code internal to the osfmk will go through a 12675 * macro defining this. This is always here for the 12676 * use of other kernel components. 12677 */ 12678#undef vm_map_reference 12679void 12680vm_map_reference( 12681 register vm_map_t map) 12682{ 12683 if (map == VM_MAP_NULL) 12684 return; 12685 12686 lck_mtx_lock(&map->s_lock); 12687#if TASK_SWAPPER 12688 assert(map->res_count > 0); 12689 assert(map->ref_count >= map->res_count); 12690 map->res_count++; 12691#endif 12692 map->ref_count++; 12693 lck_mtx_unlock(&map->s_lock); 12694} 12695 12696/* 12697 * vm_map_deallocate: 12698 * 12699 * Removes a reference from the specified map, 12700 * destroying it if no references remain. 12701 * The map should not be locked. 12702 */ 12703void 12704vm_map_deallocate( 12705 register vm_map_t map) 12706{ 12707 unsigned int ref; 12708 12709 if (map == VM_MAP_NULL) 12710 return; 12711 12712 lck_mtx_lock(&map->s_lock); 12713 ref = --map->ref_count; 12714 if (ref > 0) { 12715 vm_map_res_deallocate(map); 12716 lck_mtx_unlock(&map->s_lock); 12717 return; 12718 } 12719 assert(map->ref_count == 0); 12720 lck_mtx_unlock(&map->s_lock); 12721 12722#if TASK_SWAPPER 12723 /* 12724 * The map residence count isn't decremented here because 12725 * the vm_map_delete below will traverse the entire map, 12726 * deleting entries, and the residence counts on objects 12727 * and sharing maps will go away then. 12728 */ 12729#endif 12730 12731 vm_map_destroy(map, VM_MAP_NO_FLAGS); 12732} 12733 12734 12735void 12736vm_map_disable_NX(vm_map_t map) 12737{ 12738 if (map == NULL) 12739 return; 12740 if (map->pmap == NULL) 12741 return; 12742 12743 pmap_disable_NX(map->pmap); 12744} 12745 12746void 12747vm_map_disallow_data_exec(vm_map_t map) 12748{ 12749 if (map == NULL) 12750 return; 12751 12752 map->map_disallow_data_exec = TRUE; 12753} 12754 12755/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS) 12756 * more descriptive. 12757 */ 12758void 12759vm_map_set_32bit(vm_map_t map) 12760{ 12761 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS; 12762} 12763 12764 12765void 12766vm_map_set_64bit(vm_map_t map) 12767{ 12768 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS; 12769} 12770 12771vm_map_offset_t 12772vm_compute_max_offset(unsigned is64) 12773{ 12774 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS); 12775} 12776 12777boolean_t 12778vm_map_is_64bit( 12779 vm_map_t map) 12780{ 12781 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS); 12782} 12783 12784boolean_t 12785vm_map_has_hard_pagezero( 12786 vm_map_t map, 12787 vm_map_offset_t pagezero_size) 12788{ 12789 /* 12790 * XXX FBDP 12791 * We should lock the VM map (for read) here but we can get away 12792 * with it for now because there can't really be any race condition: 12793 * the VM map's min_offset is changed only when the VM map is created 12794 * and when the zero page is established (when the binary gets loaded), 12795 * and this routine gets called only when the task terminates and the 12796 * VM map is being torn down, and when a new map is created via 12797 * load_machfile()/execve(). 12798 */ 12799 return (map->min_offset >= pagezero_size); 12800} 12801 12802void 12803vm_map_set_4GB_pagezero(vm_map_t map) 12804{ 12805#if defined(__i386__) 12806 pmap_set_4GB_pagezero(map->pmap); 12807#else 12808#pragma unused(map) 12809#endif 12810 12811} 12812 12813void 12814vm_map_clear_4GB_pagezero(vm_map_t map) 12815{ 12816#if defined(__i386__) 12817 pmap_clear_4GB_pagezero(map->pmap); 12818#else 12819#pragma unused(map) 12820#endif 12821} 12822 12823/* 12824 * Raise a VM map's maximun offset. 12825 */ 12826kern_return_t 12827vm_map_raise_max_offset( 12828 vm_map_t map, 12829 vm_map_offset_t new_max_offset) 12830{ 12831 kern_return_t ret; 12832 12833 vm_map_lock(map); 12834 ret = KERN_INVALID_ADDRESS; 12835 12836 if (new_max_offset >= map->max_offset) { 12837 if (!vm_map_is_64bit(map)) { 12838 if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) { 12839 map->max_offset = new_max_offset; 12840 ret = KERN_SUCCESS; 12841 } 12842 } else { 12843 if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) { 12844 map->max_offset = new_max_offset; 12845 ret = KERN_SUCCESS; 12846 } 12847 } 12848 } 12849 12850 vm_map_unlock(map); 12851 return ret; 12852} 12853 12854 12855/* 12856 * Raise a VM map's minimum offset. 12857 * To strictly enforce "page zero" reservation. 12858 */ 12859kern_return_t 12860vm_map_raise_min_offset( 12861 vm_map_t map, 12862 vm_map_offset_t new_min_offset) 12863{ 12864 vm_map_entry_t first_entry; 12865 12866 new_min_offset = vm_map_round_page(new_min_offset); 12867 12868 vm_map_lock(map); 12869 12870 if (new_min_offset < map->min_offset) { 12871 /* 12872 * Can't move min_offset backwards, as that would expose 12873 * a part of the address space that was previously, and for 12874 * possibly good reasons, inaccessible. 12875 */ 12876 vm_map_unlock(map); 12877 return KERN_INVALID_ADDRESS; 12878 } 12879 12880 first_entry = vm_map_first_entry(map); 12881 if (first_entry != vm_map_to_entry(map) && 12882 first_entry->vme_start < new_min_offset) { 12883 /* 12884 * Some memory was already allocated below the new 12885 * minimun offset. It's too late to change it now... 12886 */ 12887 vm_map_unlock(map); 12888 return KERN_NO_SPACE; 12889 } 12890 12891 map->min_offset = new_min_offset; 12892 12893 vm_map_unlock(map); 12894 12895 return KERN_SUCCESS; 12896} 12897 12898/* 12899 * Set the limit on the maximum amount of user wired memory allowed for this map. 12900 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of 12901 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we 12902 * don't have to reach over to the BSD data structures. 12903 */ 12904 12905void 12906vm_map_set_user_wire_limit(vm_map_t map, 12907 vm_size_t limit) 12908{ 12909 map->user_wire_limit = limit; 12910} 12911 12912 12913void vm_map_switch_protect(vm_map_t map, 12914 boolean_t val) 12915{ 12916 vm_map_lock(map); 12917 map->switch_protect=val; 12918 vm_map_unlock(map); 12919} 12920 12921/* Add (generate) code signature for memory range */ 12922#if CONFIG_DYNAMIC_CODE_SIGNING 12923kern_return_t vm_map_sign(vm_map_t map, 12924 vm_map_offset_t start, 12925 vm_map_offset_t end) 12926{ 12927 vm_map_entry_t entry; 12928 vm_page_t m; 12929 vm_object_t object; 12930 12931 /* 12932 * Vet all the input parameters and current type and state of the 12933 * underlaying object. Return with an error if anything is amiss. 12934 */ 12935 if (map == VM_MAP_NULL) 12936 return(KERN_INVALID_ARGUMENT); 12937 12938 vm_map_lock_read(map); 12939 12940 if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) { 12941 /* 12942 * Must pass a valid non-submap address. 12943 */ 12944 vm_map_unlock_read(map); 12945 return(KERN_INVALID_ADDRESS); 12946 } 12947 12948 if((entry->vme_start > start) || (entry->vme_end < end)) { 12949 /* 12950 * Map entry doesn't cover the requested range. Not handling 12951 * this situation currently. 12952 */ 12953 vm_map_unlock_read(map); 12954 return(KERN_INVALID_ARGUMENT); 12955 } 12956 12957 object = entry->object.vm_object; 12958 if (object == VM_OBJECT_NULL) { 12959 /* 12960 * Object must already be present or we can't sign. 12961 */ 12962 vm_map_unlock_read(map); 12963 return KERN_INVALID_ARGUMENT; 12964 } 12965 12966 vm_object_lock(object); 12967 vm_map_unlock_read(map); 12968 12969 while(start < end) { 12970 uint32_t refmod; 12971 12972 m = vm_page_lookup(object, start - entry->vme_start + entry->offset ); 12973 if (m==VM_PAGE_NULL) { 12974 /* shoud we try to fault a page here? we can probably 12975 * demand it exists and is locked for this request */ 12976 vm_object_unlock(object); 12977 return KERN_FAILURE; 12978 } 12979 /* deal with special page status */ 12980 if (m->busy || 12981 (m->unusual && (m->error || m->restart || m->private || m->absent))) { 12982 vm_object_unlock(object); 12983 return KERN_FAILURE; 12984 } 12985 12986 /* Page is OK... now "validate" it */ 12987 /* This is the place where we'll call out to create a code 12988 * directory, later */ 12989 m->cs_validated = TRUE; 12990 12991 /* The page is now "clean" for codesigning purposes. That means 12992 * we don't consider it as modified (wpmapped) anymore. But 12993 * we'll disconnect the page so we note any future modification 12994 * attempts. */ 12995 m->wpmapped = FALSE; 12996 refmod = pmap_disconnect(m->phys_page); 12997 12998 /* Pull the dirty status from the pmap, since we cleared the 12999 * wpmapped bit */ 13000 if ((refmod & VM_MEM_MODIFIED) && !m->dirty) { 13001 SET_PAGE_DIRTY(m, FALSE); 13002 } 13003 13004 /* On to the next page */ 13005 start += PAGE_SIZE; 13006 } 13007 vm_object_unlock(object); 13008 13009 return KERN_SUCCESS; 13010} 13011#endif 13012 13013#if CONFIG_FREEZE 13014 13015kern_return_t vm_map_freeze_walk( 13016 vm_map_t map, 13017 unsigned int *purgeable_count, 13018 unsigned int *wired_count, 13019 unsigned int *clean_count, 13020 unsigned int *dirty_count, 13021 unsigned int dirty_budget, 13022 boolean_t *has_shared) 13023{ 13024 vm_map_entry_t entry; 13025 13026 vm_map_lock_read(map); 13027 13028 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; 13029 *has_shared = FALSE; 13030 13031 for (entry = vm_map_first_entry(map); 13032 entry != vm_map_to_entry(map); 13033 entry = entry->vme_next) { 13034 unsigned int purgeable, clean, dirty, wired; 13035 boolean_t shared; 13036 13037 if ((entry->object.vm_object == 0) || 13038 (entry->is_sub_map) || 13039 (entry->object.vm_object->phys_contiguous)) { 13040 continue; 13041 } 13042 13043 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, entry->object.vm_object, NULL); 13044 13045 *purgeable_count += purgeable; 13046 *wired_count += wired; 13047 *clean_count += clean; 13048 *dirty_count += dirty; 13049 13050 if (shared) { 13051 *has_shared = TRUE; 13052 } 13053 13054 /* Adjust pageout budget and finish up if reached */ 13055 if (dirty_budget) { 13056 dirty_budget -= dirty; 13057 if (dirty_budget == 0) { 13058 break; 13059 } 13060 } 13061 } 13062 13063 vm_map_unlock_read(map); 13064 13065 return KERN_SUCCESS; 13066} 13067 13068kern_return_t vm_map_freeze( 13069 vm_map_t map, 13070 unsigned int *purgeable_count, 13071 unsigned int *wired_count, 13072 unsigned int *clean_count, 13073 unsigned int *dirty_count, 13074 unsigned int dirty_budget, 13075 boolean_t *has_shared) 13076{ 13077 vm_map_entry_t entry2 = VM_MAP_ENTRY_NULL; 13078 kern_return_t kr = KERN_SUCCESS; 13079 13080 *purgeable_count = *wired_count = *clean_count = *dirty_count = 0; 13081 *has_shared = FALSE; 13082 13083 /* 13084 * We need the exclusive lock here so that we can 13085 * block any page faults or lookups while we are 13086 * in the middle of freezing this vm map. 13087 */ 13088 vm_map_lock(map); 13089 13090 if (map->default_freezer_handle == NULL) { 13091 map->default_freezer_handle = default_freezer_handle_allocate(); 13092 } 13093 13094 if ((kr = default_freezer_handle_init(map->default_freezer_handle)) != KERN_SUCCESS) { 13095 /* 13096 * Can happen if default_freezer_handle passed in is NULL 13097 * Or, a table has already been allocated and associated 13098 * with this handle, i.e. the map is already frozen. 13099 */ 13100 goto done; 13101 } 13102 13103 for (entry2 = vm_map_first_entry(map); 13104 entry2 != vm_map_to_entry(map); 13105 entry2 = entry2->vme_next) { 13106 13107 vm_object_t src_object = entry2->object.vm_object; 13108 13109 /* If eligible, scan the entry, moving eligible pages over to our parent object */ 13110 if (entry2->object.vm_object && !entry2->is_sub_map && !entry2->object.vm_object->phys_contiguous) { 13111 unsigned int purgeable, clean, dirty, wired; 13112 boolean_t shared; 13113 13114 default_freezer_pack(&purgeable, &wired, &clean, &dirty, dirty_budget, &shared, 13115 src_object, map->default_freezer_handle); 13116 13117 *purgeable_count += purgeable; 13118 *wired_count += wired; 13119 *clean_count += clean; 13120 *dirty_count += dirty; 13121 13122 /* Adjust pageout budget and finish up if reached */ 13123 if (dirty_budget) { 13124 dirty_budget -= dirty; 13125 if (dirty_budget == 0) { 13126 break; 13127 } 13128 } 13129 13130 if (shared) { 13131 *has_shared = TRUE; 13132 } 13133 } 13134 } 13135 13136 /* Finally, throw out the pages to swap */ 13137 default_freezer_pageout(map->default_freezer_handle); 13138 13139done: 13140 vm_map_unlock(map); 13141 13142 return kr; 13143} 13144 13145kern_return_t 13146vm_map_thaw( 13147 vm_map_t map) 13148{ 13149 kern_return_t kr = KERN_SUCCESS; 13150 13151 vm_map_lock(map); 13152 13153 if (map->default_freezer_handle == NULL) { 13154 /* 13155 * This map is not in a frozen state. 13156 */ 13157 kr = KERN_FAILURE; 13158 goto out; 13159 } 13160 13161 default_freezer_unpack(map->default_freezer_handle); 13162out: 13163 vm_map_unlock(map); 13164 13165 return kr; 13166} 13167#endif 13168 13169#if !CONFIG_EMBEDDED 13170/* 13171 * vm_map_entry_should_cow_for_true_share: 13172 * 13173 * Determines if the map entry should be clipped and setup for copy-on-write 13174 * to avoid applying "true_share" to a large VM object when only a subset is 13175 * targeted. 13176 * 13177 * For now, we target only the map entries created for the Objective C 13178 * Garbage Collector, which initially have the following properties: 13179 * - alias == VM_MEMORY_MALLOC 13180 * - wired_count == 0 13181 * - !needs_copy 13182 * and a VM object with: 13183 * - internal 13184 * - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC 13185 * - !true_share 13186 * - vo_size == ANON_CHUNK_SIZE 13187 */ 13188boolean_t 13189vm_map_entry_should_cow_for_true_share( 13190 vm_map_entry_t entry) 13191{ 13192 vm_object_t object; 13193 13194 if (entry->is_sub_map) { 13195 /* entry does not point at a VM object */ 13196 return FALSE; 13197 } 13198 13199 if (entry->needs_copy) { 13200 /* already set for copy_on_write: done! */ 13201 return FALSE; 13202 } 13203 13204 if (entry->alias != VM_MEMORY_MALLOC) { 13205 /* not tagged as an ObjectiveC's Garbage Collector entry */ 13206 return FALSE; 13207 } 13208 13209 if (entry->wired_count) { 13210 /* wired: can't change the map entry... */ 13211 return FALSE; 13212 } 13213 13214 object = entry->object.vm_object; 13215 13216 if (object == VM_OBJECT_NULL) { 13217 /* no object yet... */ 13218 return FALSE; 13219 } 13220 13221 if (!object->internal) { 13222 /* not an internal object */ 13223 return FALSE; 13224 } 13225 13226 if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) { 13227 /* not the default copy strategy */ 13228 return FALSE; 13229 } 13230 13231 if (object->true_share) { 13232 /* already true_share: too late to avoid it */ 13233 return FALSE; 13234 } 13235 13236 if (object->vo_size != ANON_CHUNK_SIZE) { 13237 /* not an object created for the ObjC Garbage Collector */ 13238 return FALSE; 13239 } 13240 13241 /* 13242 * All the criteria match: we have a large object being targeted for "true_share". 13243 * To limit the adverse side-effects linked with "true_share", tell the caller to 13244 * try and avoid setting up the entire object for "true_share" by clipping the 13245 * targeted range and setting it up for copy-on-write. 13246 */ 13247 return TRUE; 13248} 13249#endif /* !CONFIG_EMBEDDED */ 13250