1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_map.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * Date: 1985 62 * 63 * Virtual memory mapping module. 64 */ 65 66#include <task_swapper.h> 67#include <mach_assert.h> 68#include <libkern/OSAtomic.h> 69 70#include <mach/kern_return.h> 71#include <mach/port.h> 72#include <mach/vm_attributes.h> 73#include <mach/vm_param.h> 74#include <mach/vm_behavior.h> 75#include <mach/vm_statistics.h> 76#include <mach/memory_object.h> 77#include <mach/mach_vm.h> 78#include <machine/cpu_capabilities.h> 79#include <mach/sdt.h> 80 81#include <kern/assert.h> 82#include <kern/counters.h> 83#include <kern/kalloc.h> 84#include <kern/zalloc.h> 85 86#include <vm/cpm.h> 87#include <vm/vm_init.h> 88#include <vm/vm_fault.h> 89#include <vm/vm_map.h> 90#include <vm/vm_object.h> 91#include <vm/vm_page.h> 92#include <vm/vm_kern.h> 93#include <ipc/ipc_port.h> 94#include <kern/sched_prim.h> 95#include <kern/misc_protos.h> 96#include <ddb/tr.h> 97#include <machine/db_machdep.h> 98#include <kern/xpr.h> 99 100#include <mach/vm_map_server.h> 101#include <mach/mach_host_server.h> 102#include <vm/vm_protos.h> 103 104#ifdef ppc 105#include <ppc/mappings.h> 106#endif /* ppc */ 107 108#include <vm/vm_protos.h> 109#include <vm/vm_shared_region.h> 110 111/* Internal prototypes 112 */ 113 114static void vm_map_simplify_range( 115 vm_map_t map, 116 vm_map_offset_t start, 117 vm_map_offset_t end); /* forward */ 118 119static boolean_t vm_map_range_check( 120 vm_map_t map, 121 vm_map_offset_t start, 122 vm_map_offset_t end, 123 vm_map_entry_t *entry); 124 125static vm_map_entry_t _vm_map_entry_create( 126 struct vm_map_header *map_header); 127 128static void _vm_map_entry_dispose( 129 struct vm_map_header *map_header, 130 vm_map_entry_t entry); 131 132static void vm_map_pmap_enter( 133 vm_map_t map, 134 vm_map_offset_t addr, 135 vm_map_offset_t end_addr, 136 vm_object_t object, 137 vm_object_offset_t offset, 138 vm_prot_t protection); 139 140static void _vm_map_clip_end( 141 struct vm_map_header *map_header, 142 vm_map_entry_t entry, 143 vm_map_offset_t end); 144 145static void _vm_map_clip_start( 146 struct vm_map_header *map_header, 147 vm_map_entry_t entry, 148 vm_map_offset_t start); 149 150static void vm_map_entry_delete( 151 vm_map_t map, 152 vm_map_entry_t entry); 153 154static kern_return_t vm_map_delete( 155 vm_map_t map, 156 vm_map_offset_t start, 157 vm_map_offset_t end, 158 int flags, 159 vm_map_t zap_map); 160 161static kern_return_t vm_map_copy_overwrite_unaligned( 162 vm_map_t dst_map, 163 vm_map_entry_t entry, 164 vm_map_copy_t copy, 165 vm_map_address_t start); 166 167static kern_return_t vm_map_copy_overwrite_aligned( 168 vm_map_t dst_map, 169 vm_map_entry_t tmp_entry, 170 vm_map_copy_t copy, 171 vm_map_offset_t start, 172 pmap_t pmap); 173 174static kern_return_t vm_map_copyin_kernel_buffer( 175 vm_map_t src_map, 176 vm_map_address_t src_addr, 177 vm_map_size_t len, 178 boolean_t src_destroy, 179 vm_map_copy_t *copy_result); /* OUT */ 180 181static kern_return_t vm_map_copyout_kernel_buffer( 182 vm_map_t map, 183 vm_map_address_t *addr, /* IN/OUT */ 184 vm_map_copy_t copy, 185 boolean_t overwrite); 186 187static void vm_map_fork_share( 188 vm_map_t old_map, 189 vm_map_entry_t old_entry, 190 vm_map_t new_map); 191 192static boolean_t vm_map_fork_copy( 193 vm_map_t old_map, 194 vm_map_entry_t *old_entry_p, 195 vm_map_t new_map); 196 197void vm_map_region_top_walk( 198 vm_map_entry_t entry, 199 vm_region_top_info_t top); 200 201void vm_map_region_walk( 202 vm_map_t map, 203 vm_map_offset_t va, 204 vm_map_entry_t entry, 205 vm_object_offset_t offset, 206 vm_object_size_t range, 207 vm_region_extended_info_t extended, 208 boolean_t look_for_pages); 209 210static kern_return_t vm_map_wire_nested( 211 vm_map_t map, 212 vm_map_offset_t start, 213 vm_map_offset_t end, 214 vm_prot_t access_type, 215 boolean_t user_wire, 216 pmap_t map_pmap, 217 vm_map_offset_t pmap_addr); 218 219static kern_return_t vm_map_unwire_nested( 220 vm_map_t map, 221 vm_map_offset_t start, 222 vm_map_offset_t end, 223 boolean_t user_wire, 224 pmap_t map_pmap, 225 vm_map_offset_t pmap_addr); 226 227static kern_return_t vm_map_overwrite_submap_recurse( 228 vm_map_t dst_map, 229 vm_map_offset_t dst_addr, 230 vm_map_size_t dst_size); 231 232static kern_return_t vm_map_copy_overwrite_nested( 233 vm_map_t dst_map, 234 vm_map_offset_t dst_addr, 235 vm_map_copy_t copy, 236 boolean_t interruptible, 237 pmap_t pmap); 238 239static kern_return_t vm_map_remap_extract( 240 vm_map_t map, 241 vm_map_offset_t addr, 242 vm_map_size_t size, 243 boolean_t copy, 244 struct vm_map_header *map_header, 245 vm_prot_t *cur_protection, 246 vm_prot_t *max_protection, 247 vm_inherit_t inheritance, 248 boolean_t pageable); 249 250static kern_return_t vm_map_remap_range_allocate( 251 vm_map_t map, 252 vm_map_address_t *address, 253 vm_map_size_t size, 254 vm_map_offset_t mask, 255 boolean_t anywhere, 256 vm_map_entry_t *map_entry); 257 258static void vm_map_region_look_for_page( 259 vm_map_t map, 260 vm_map_offset_t va, 261 vm_object_t object, 262 vm_object_offset_t offset, 263 int max_refcnt, 264 int depth, 265 vm_region_extended_info_t extended); 266 267static int vm_map_region_count_obj_refs( 268 vm_map_entry_t entry, 269 vm_object_t object); 270 271/* 272 * Macros to copy a vm_map_entry. We must be careful to correctly 273 * manage the wired page count. vm_map_entry_copy() creates a new 274 * map entry to the same memory - the wired count in the new entry 275 * must be set to zero. vm_map_entry_copy_full() creates a new 276 * entry that is identical to the old entry. This preserves the 277 * wire count; it's used for map splitting and zone changing in 278 * vm_map_copyout. 279 */ 280#define vm_map_entry_copy(NEW,OLD) \ 281MACRO_BEGIN \ 282 *(NEW) = *(OLD); \ 283 (NEW)->is_shared = FALSE; \ 284 (NEW)->needs_wakeup = FALSE; \ 285 (NEW)->in_transition = FALSE; \ 286 (NEW)->wired_count = 0; \ 287 (NEW)->user_wired_count = 0; \ 288MACRO_END 289 290#define vm_map_entry_copy_full(NEW,OLD) (*(NEW) = *(OLD)) 291 292/* 293 * Decide if we want to allow processes to execute from their data or stack areas. 294 * override_nx() returns true if we do. Data/stack execution can be enabled independently 295 * for 32 and 64 bit processes. Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec 296 * or allow_stack_exec to enable data execution for that type of data area for that particular 297 * ABI (or both by or'ing the flags together). These are initialized in the architecture 298 * specific pmap files since the default behavior varies according to architecture. The 299 * main reason it varies is because of the need to provide binary compatibility with old 300 * applications that were written before these restrictions came into being. In the old 301 * days, an app could execute anything it could read, but this has slowly been tightened 302 * up over time. The default behavior is: 303 * 304 * 32-bit PPC apps may execute from both stack and data areas 305 * 32-bit Intel apps may exeucte from data areas but not stack 306 * 64-bit PPC/Intel apps may not execute from either data or stack 307 * 308 * An application on any architecture may override these defaults by explicitly 309 * adding PROT_EXEC permission to the page in question with the mprotect(2) 310 * system call. This code here just determines what happens when an app tries to 311 * execute from a page that lacks execute permission. 312 * 313 * Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the 314 * default behavior for both 32 and 64 bit apps on a system-wide basis. 315 */ 316 317extern int allow_data_exec, allow_stack_exec; 318 319int 320override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */ 321{ 322 int current_abi; 323 324 /* 325 * Determine if the app is running in 32 or 64 bit mode. 326 */ 327 328 if (vm_map_is_64bit(map)) 329 current_abi = VM_ABI_64; 330 else 331 current_abi = VM_ABI_32; 332 333 /* 334 * Determine if we should allow the execution based on whether it's a 335 * stack or data area and the current architecture. 336 */ 337 338 if (user_tag == VM_MEMORY_STACK) 339 return allow_stack_exec & current_abi; 340 341 return allow_data_exec & current_abi; 342} 343 344 345/* 346 * Virtual memory maps provide for the mapping, protection, 347 * and sharing of virtual memory objects. In addition, 348 * this module provides for an efficient virtual copy of 349 * memory from one map to another. 350 * 351 * Synchronization is required prior to most operations. 352 * 353 * Maps consist of an ordered doubly-linked list of simple 354 * entries; a single hint is used to speed up lookups. 355 * 356 * Sharing maps have been deleted from this version of Mach. 357 * All shared objects are now mapped directly into the respective 358 * maps. This requires a change in the copy on write strategy; 359 * the asymmetric (delayed) strategy is used for shared temporary 360 * objects instead of the symmetric (shadow) strategy. All maps 361 * are now "top level" maps (either task map, kernel map or submap 362 * of the kernel map). 363 * 364 * Since portions of maps are specified by start/end addreses, 365 * which may not align with existing map entries, all 366 * routines merely "clip" entries to these start/end values. 367 * [That is, an entry is split into two, bordering at a 368 * start or end value.] Note that these clippings may not 369 * always be necessary (as the two resulting entries are then 370 * not changed); however, the clipping is done for convenience. 371 * No attempt is currently made to "glue back together" two 372 * abutting entries. 373 * 374 * The symmetric (shadow) copy strategy implements virtual copy 375 * by copying VM object references from one map to 376 * another, and then marking both regions as copy-on-write. 377 * It is important to note that only one writeable reference 378 * to a VM object region exists in any map when this strategy 379 * is used -- this means that shadow object creation can be 380 * delayed until a write operation occurs. The symmetric (delayed) 381 * strategy allows multiple maps to have writeable references to 382 * the same region of a vm object, and hence cannot delay creating 383 * its copy objects. See vm_object_copy_quickly() in vm_object.c. 384 * Copying of permanent objects is completely different; see 385 * vm_object_copy_strategically() in vm_object.c. 386 */ 387 388static zone_t vm_map_zone; /* zone for vm_map structures */ 389static zone_t vm_map_entry_zone; /* zone for vm_map_entry structures */ 390static zone_t vm_map_kentry_zone; /* zone for kernel entry structures */ 391static zone_t vm_map_copy_zone; /* zone for vm_map_copy structures */ 392 393 394/* 395 * Placeholder object for submap operations. This object is dropped 396 * into the range by a call to vm_map_find, and removed when 397 * vm_map_submap creates the submap. 398 */ 399 400vm_object_t vm_submap_object; 401 402static void *map_data; 403static vm_map_size_t map_data_size; 404static void *kentry_data; 405static vm_map_size_t kentry_data_size; 406static int kentry_count = 2048; /* to init kentry_data_size */ 407 408#define NO_COALESCE_LIMIT (1024 * 128) 409 410 411/* Skip acquiring locks if we're in the midst of a kernel core dump */ 412extern unsigned int not_in_kdp; 413 414#if CONFIG_CODE_DECRYPTION 415/* 416 * vm_map_apple_protected: 417 * This remaps the requested part of the object with an object backed by 418 * the decrypting pager. 419 * crypt_info contains entry points and session data for the crypt module. 420 * The crypt_info block will be copied by vm_map_apple_protected. The data structures 421 * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called. 422 */ 423kern_return_t 424vm_map_apple_protected( 425 vm_map_t map, 426 vm_map_offset_t start, 427 vm_map_offset_t end, 428 struct pager_crypt_info *crypt_info) 429{ 430 boolean_t map_locked; 431 kern_return_t kr; 432 vm_map_entry_t map_entry; 433 memory_object_t protected_mem_obj; 434 vm_object_t protected_object; 435 vm_map_offset_t map_addr; 436 437 vm_map_lock_read(map); 438 map_locked = TRUE; 439 440 /* lookup the protected VM object */ 441 if (!vm_map_lookup_entry(map, 442 start, 443 &map_entry) || 444 map_entry->vme_end < end || 445 map_entry->is_sub_map) { 446 /* that memory is not properly mapped */ 447 kr = KERN_INVALID_ARGUMENT; 448 goto done; 449 } 450 protected_object = map_entry->object.vm_object; 451 if (protected_object == VM_OBJECT_NULL) { 452 /* there should be a VM object here at this point */ 453 kr = KERN_INVALID_ARGUMENT; 454 goto done; 455 } 456 457 /* 458 * Lookup (and create if necessary) the protected memory object 459 * matching that VM object. 460 * If successful, this also grabs a reference on the memory object, 461 * to guarantee that it doesn't go away before we get a chance to map 462 * it. 463 */ 464 465 protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info); 466 if (protected_mem_obj == NULL) { 467 kr = KERN_FAILURE; 468 goto done; 469 } 470 471 vm_map_unlock_read(map); 472 map_locked = FALSE; 473 474 /* map this memory object in place of the current one */ 475 map_addr = start; 476 kr = vm_map_enter_mem_object(map, 477 &map_addr, 478 end - start, 479 (mach_vm_offset_t) 0, 480 VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE, 481 (ipc_port_t) protected_mem_obj, 482 (map_entry->offset + 483 (start - map_entry->vme_start)), 484 TRUE, 485 map_entry->protection, 486 map_entry->max_protection, 487 map_entry->inheritance); 488 assert(map_addr == start); 489 /* 490 * Release the reference obtained by apple_protect_pager_setup(). 491 * The mapping (if it succeeded) is now holding a reference on the 492 * memory object. 493 */ 494 memory_object_deallocate(protected_mem_obj); 495 496done: 497 if (map_locked) { 498 vm_map_unlock_read(map); 499 } 500 return kr; 501} 502#endif /* CONFIG_CODE_DECRYPTION */ 503 504 505/* 506 * vm_map_init: 507 * 508 * Initialize the vm_map module. Must be called before 509 * any other vm_map routines. 510 * 511 * Map and entry structures are allocated from zones -- we must 512 * initialize those zones. 513 * 514 * There are three zones of interest: 515 * 516 * vm_map_zone: used to allocate maps. 517 * vm_map_entry_zone: used to allocate map entries. 518 * vm_map_kentry_zone: used to allocate map entries for the kernel. 519 * 520 * The kernel allocates map entries from a special zone that is initially 521 * "crammed" with memory. It would be difficult (perhaps impossible) for 522 * the kernel to allocate more memory to a entry zone when it became 523 * empty since the very act of allocating memory implies the creation 524 * of a new entry. 525 */ 526void 527vm_map_init( 528 void) 529{ 530 vm_map_zone = zinit((vm_map_size_t) sizeof(struct _vm_map), 40*1024, 531 PAGE_SIZE, "maps"); 532 533 vm_map_entry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), 534 1024*1024, PAGE_SIZE*5, 535 "non-kernel map entries"); 536 537 vm_map_kentry_zone = zinit((vm_map_size_t) sizeof(struct vm_map_entry), 538 kentry_data_size, kentry_data_size, 539 "kernel map entries"); 540 541 vm_map_copy_zone = zinit((vm_map_size_t) sizeof(struct vm_map_copy), 542 16*1024, PAGE_SIZE, "map copies"); 543 544 /* 545 * Cram the map and kentry zones with initial data. 546 * Set kentry_zone non-collectible to aid zone_gc(). 547 */ 548 zone_change(vm_map_zone, Z_COLLECT, FALSE); 549 zone_change(vm_map_kentry_zone, Z_COLLECT, FALSE); 550 zone_change(vm_map_kentry_zone, Z_EXPAND, FALSE); 551 zcram(vm_map_zone, map_data, map_data_size); 552 zcram(vm_map_kentry_zone, kentry_data, kentry_data_size); 553} 554 555void 556vm_map_steal_memory( 557 void) 558{ 559 map_data_size = vm_map_round_page(10 * sizeof(struct _vm_map)); 560 map_data = pmap_steal_memory(map_data_size); 561 562#if 0 563 /* 564 * Limiting worst case: vm_map_kentry_zone needs to map each "available" 565 * physical page (i.e. that beyond the kernel image and page tables) 566 * individually; we guess at most one entry per eight pages in the 567 * real world. This works out to roughly .1 of 1% of physical memory, 568 * or roughly 1900 entries (64K) for a 64M machine with 4K pages. 569 */ 570#endif 571 kentry_count = pmap_free_pages() / 8; 572 573 574 kentry_data_size = 575 vm_map_round_page(kentry_count * sizeof(struct vm_map_entry)); 576 kentry_data = pmap_steal_memory(kentry_data_size); 577} 578 579/* 580 * vm_map_create: 581 * 582 * Creates and returns a new empty VM map with 583 * the given physical map structure, and having 584 * the given lower and upper address bounds. 585 */ 586vm_map_t 587vm_map_create( 588 pmap_t pmap, 589 vm_map_offset_t min, 590 vm_map_offset_t max, 591 boolean_t pageable) 592{ 593 static int color_seed = 0; 594 register vm_map_t result; 595 596 result = (vm_map_t) zalloc(vm_map_zone); 597 if (result == VM_MAP_NULL) 598 panic("vm_map_create"); 599 600 vm_map_first_entry(result) = vm_map_to_entry(result); 601 vm_map_last_entry(result) = vm_map_to_entry(result); 602 result->hdr.nentries = 0; 603 result->hdr.entries_pageable = pageable; 604 605 result->size = 0; 606 result->user_wire_limit = MACH_VM_MAX_ADDRESS; /* default limit is unlimited */ 607 result->user_wire_size = 0; 608 result->ref_count = 1; 609#if TASK_SWAPPER 610 result->res_count = 1; 611 result->sw_state = MAP_SW_IN; 612#endif /* TASK_SWAPPER */ 613 result->pmap = pmap; 614 result->min_offset = min; 615 result->max_offset = max; 616 result->wiring_required = FALSE; 617 result->no_zero_fill = FALSE; 618 result->mapped = FALSE; 619#if CONFIG_EMBEDDED 620 result->prot_copy_allow = FALSE; 621#else 622 result->prot_copy_allow = TRUE; 623#endif 624 result->wait_for_space = FALSE; 625 result->first_free = vm_map_to_entry(result); 626 result->hint = vm_map_to_entry(result); 627 result->color_rr = (color_seed++) & vm_color_mask; 628 vm_map_lock_init(result); 629 mutex_init(&result->s_lock, 0); 630 631 return(result); 632} 633 634/* 635 * vm_map_entry_create: [ internal use only ] 636 * 637 * Allocates a VM map entry for insertion in the 638 * given map (or map copy). No fields are filled. 639 */ 640#define vm_map_entry_create(map) \ 641 _vm_map_entry_create(&(map)->hdr) 642 643#define vm_map_copy_entry_create(copy) \ 644 _vm_map_entry_create(&(copy)->cpy_hdr) 645 646static vm_map_entry_t 647_vm_map_entry_create( 648 register struct vm_map_header *map_header) 649{ 650 register zone_t zone; 651 register vm_map_entry_t entry; 652 653 if (map_header->entries_pageable) 654 zone = vm_map_entry_zone; 655 else 656 zone = vm_map_kentry_zone; 657 658 entry = (vm_map_entry_t) zalloc(zone); 659 if (entry == VM_MAP_ENTRY_NULL) 660 panic("vm_map_entry_create"); 661 662 return(entry); 663} 664 665/* 666 * vm_map_entry_dispose: [ internal use only ] 667 * 668 * Inverse of vm_map_entry_create. 669 * 670 * write map lock held so no need to 671 * do anything special to insure correctness 672 * of the stores 673 */ 674#define vm_map_entry_dispose(map, entry) \ 675 MACRO_BEGIN \ 676 if((entry) == (map)->first_free) \ 677 (map)->first_free = vm_map_to_entry(map); \ 678 if((entry) == (map)->hint) \ 679 (map)->hint = vm_map_to_entry(map); \ 680 _vm_map_entry_dispose(&(map)->hdr, (entry)); \ 681 MACRO_END 682 683#define vm_map_copy_entry_dispose(map, entry) \ 684 _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry)) 685 686static void 687_vm_map_entry_dispose( 688 register struct vm_map_header *map_header, 689 register vm_map_entry_t entry) 690{ 691 register zone_t zone; 692 693 if (map_header->entries_pageable) 694 zone = vm_map_entry_zone; 695 else 696 zone = vm_map_kentry_zone; 697 698 zfree(zone, entry); 699} 700 701#if MACH_ASSERT 702static boolean_t first_free_is_valid(vm_map_t map); /* forward */ 703static boolean_t first_free_check = FALSE; 704static boolean_t 705first_free_is_valid( 706 vm_map_t map) 707{ 708 vm_map_entry_t entry, next; 709 710 if (!first_free_check) 711 return TRUE; 712 713 entry = vm_map_to_entry(map); 714 next = entry->vme_next; 715 while (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_end) || 716 (vm_map_trunc_page(next->vme_start) == vm_map_trunc_page(entry->vme_start) && 717 next != vm_map_to_entry(map))) { 718 entry = next; 719 next = entry->vme_next; 720 if (entry == vm_map_to_entry(map)) 721 break; 722 } 723 if (map->first_free != entry) { 724 printf("Bad first_free for map %p: %p should be %p\n", 725 map, map->first_free, entry); 726 return FALSE; 727 } 728 return TRUE; 729} 730#endif /* MACH_ASSERT */ 731 732/* 733 * UPDATE_FIRST_FREE: 734 * 735 * Updates the map->first_free pointer to the 736 * entry immediately before the first hole in the map. 737 * The map should be locked. 738 */ 739#define UPDATE_FIRST_FREE(map, new_first_free) \ 740 MACRO_BEGIN \ 741 vm_map_t UFF_map; \ 742 vm_map_entry_t UFF_first_free; \ 743 vm_map_entry_t UFF_next_entry; \ 744 UFF_map = (map); \ 745 UFF_first_free = (new_first_free); \ 746 UFF_next_entry = UFF_first_free->vme_next; \ 747 while (vm_map_trunc_page(UFF_next_entry->vme_start) == \ 748 vm_map_trunc_page(UFF_first_free->vme_end) || \ 749 (vm_map_trunc_page(UFF_next_entry->vme_start) == \ 750 vm_map_trunc_page(UFF_first_free->vme_start) && \ 751 UFF_next_entry != vm_map_to_entry(UFF_map))) { \ 752 UFF_first_free = UFF_next_entry; \ 753 UFF_next_entry = UFF_first_free->vme_next; \ 754 if (UFF_first_free == vm_map_to_entry(UFF_map)) \ 755 break; \ 756 } \ 757 UFF_map->first_free = UFF_first_free; \ 758 assert(first_free_is_valid(UFF_map)); \ 759 MACRO_END 760 761/* 762 * vm_map_entry_{un,}link: 763 * 764 * Insert/remove entries from maps (or map copies). 765 */ 766#define vm_map_entry_link(map, after_where, entry) \ 767 MACRO_BEGIN \ 768 vm_map_t VMEL_map; \ 769 vm_map_entry_t VMEL_entry; \ 770 VMEL_map = (map); \ 771 VMEL_entry = (entry); \ 772 _vm_map_entry_link(&VMEL_map->hdr, after_where, VMEL_entry); \ 773 UPDATE_FIRST_FREE(VMEL_map, VMEL_map->first_free); \ 774 MACRO_END 775 776 777#define vm_map_copy_entry_link(copy, after_where, entry) \ 778 _vm_map_entry_link(&(copy)->cpy_hdr, after_where, (entry)) 779 780#define _vm_map_entry_link(hdr, after_where, entry) \ 781 MACRO_BEGIN \ 782 (hdr)->nentries++; \ 783 (entry)->vme_prev = (after_where); \ 784 (entry)->vme_next = (after_where)->vme_next; \ 785 (entry)->vme_prev->vme_next = (entry)->vme_next->vme_prev = (entry); \ 786 MACRO_END 787 788#define vm_map_entry_unlink(map, entry) \ 789 MACRO_BEGIN \ 790 vm_map_t VMEU_map; \ 791 vm_map_entry_t VMEU_entry; \ 792 vm_map_entry_t VMEU_first_free; \ 793 VMEU_map = (map); \ 794 VMEU_entry = (entry); \ 795 if (VMEU_entry->vme_start <= VMEU_map->first_free->vme_start) \ 796 VMEU_first_free = VMEU_entry->vme_prev; \ 797 else \ 798 VMEU_first_free = VMEU_map->first_free; \ 799 _vm_map_entry_unlink(&VMEU_map->hdr, VMEU_entry); \ 800 UPDATE_FIRST_FREE(VMEU_map, VMEU_first_free); \ 801 MACRO_END 802 803#define vm_map_copy_entry_unlink(copy, entry) \ 804 _vm_map_entry_unlink(&(copy)->cpy_hdr, (entry)) 805 806#define _vm_map_entry_unlink(hdr, entry) \ 807 MACRO_BEGIN \ 808 (hdr)->nentries--; \ 809 (entry)->vme_next->vme_prev = (entry)->vme_prev; \ 810 (entry)->vme_prev->vme_next = (entry)->vme_next; \ 811 MACRO_END 812 813#if MACH_ASSERT && TASK_SWAPPER 814/* 815 * vm_map_res_reference: 816 * 817 * Adds another valid residence count to the given map. 818 * 819 * Map is locked so this function can be called from 820 * vm_map_swapin. 821 * 822 */ 823void vm_map_res_reference(register vm_map_t map) 824{ 825 /* assert map is locked */ 826 assert(map->res_count >= 0); 827 assert(map->ref_count >= map->res_count); 828 if (map->res_count == 0) { 829 mutex_unlock(&map->s_lock); 830 vm_map_lock(map); 831 vm_map_swapin(map); 832 mutex_lock(&map->s_lock); 833 ++map->res_count; 834 vm_map_unlock(map); 835 } else 836 ++map->res_count; 837} 838 839/* 840 * vm_map_reference_swap: 841 * 842 * Adds valid reference and residence counts to the given map. 843 * 844 * The map may not be in memory (i.e. zero residence count). 845 * 846 */ 847void vm_map_reference_swap(register vm_map_t map) 848{ 849 assert(map != VM_MAP_NULL); 850 mutex_lock(&map->s_lock); 851 assert(map->res_count >= 0); 852 assert(map->ref_count >= map->res_count); 853 map->ref_count++; 854 vm_map_res_reference(map); 855 mutex_unlock(&map->s_lock); 856} 857 858/* 859 * vm_map_res_deallocate: 860 * 861 * Decrement residence count on a map; possibly causing swapout. 862 * 863 * The map must be in memory (i.e. non-zero residence count). 864 * 865 * The map is locked, so this function is callable from vm_map_deallocate. 866 * 867 */ 868void vm_map_res_deallocate(register vm_map_t map) 869{ 870 assert(map->res_count > 0); 871 if (--map->res_count == 0) { 872 mutex_unlock(&map->s_lock); 873 vm_map_lock(map); 874 vm_map_swapout(map); 875 vm_map_unlock(map); 876 mutex_lock(&map->s_lock); 877 } 878 assert(map->ref_count >= map->res_count); 879} 880#endif /* MACH_ASSERT && TASK_SWAPPER */ 881 882/* 883 * vm_map_destroy: 884 * 885 * Actually destroy a map. 886 */ 887void 888vm_map_destroy( 889 vm_map_t map, 890 int flags) 891{ 892 vm_map_lock(map); 893 894 /* clean up regular map entries */ 895 (void) vm_map_delete(map, map->min_offset, map->max_offset, 896 flags, VM_MAP_NULL); 897 /* clean up leftover special mappings (commpage, etc...) */ 898#ifdef __ppc__ 899 /* 900 * PPC51: ppc64 is limited to 51-bit addresses. 901 * Memory beyond this 51-bit limit is mapped specially at the 902 * pmap level, so do not interfere. 903 * On PPC64, the commpage is mapped beyond the addressable range 904 * via a special pmap hack, so ask pmap to clean it explicitly... 905 */ 906 if (map->pmap) { 907 pmap_unmap_sharedpage(map->pmap); 908 } 909 /* ... and do not let regular pmap cleanup apply here */ 910 flags |= VM_MAP_REMOVE_NO_PMAP_CLEANUP; 911#endif /* __ppc__ */ 912 (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL, 913 flags, VM_MAP_NULL); 914 vm_map_unlock(map); 915 916 assert(map->hdr.nentries == 0); 917 918 if(map->pmap) 919 pmap_destroy(map->pmap); 920 921 zfree(vm_map_zone, map); 922} 923 924#if TASK_SWAPPER 925/* 926 * vm_map_swapin/vm_map_swapout 927 * 928 * Swap a map in and out, either referencing or releasing its resources. 929 * These functions are internal use only; however, they must be exported 930 * because they may be called from macros, which are exported. 931 * 932 * In the case of swapout, there could be races on the residence count, 933 * so if the residence count is up, we return, assuming that a 934 * vm_map_deallocate() call in the near future will bring us back. 935 * 936 * Locking: 937 * -- We use the map write lock for synchronization among races. 938 * -- The map write lock, and not the simple s_lock, protects the 939 * swap state of the map. 940 * -- If a map entry is a share map, then we hold both locks, in 941 * hierarchical order. 942 * 943 * Synchronization Notes: 944 * 1) If a vm_map_swapin() call happens while swapout in progress, it 945 * will block on the map lock and proceed when swapout is through. 946 * 2) A vm_map_reference() call at this time is illegal, and will 947 * cause a panic. vm_map_reference() is only allowed on resident 948 * maps, since it refuses to block. 949 * 3) A vm_map_swapin() call during a swapin will block, and 950 * proceeed when the first swapin is done, turning into a nop. 951 * This is the reason the res_count is not incremented until 952 * after the swapin is complete. 953 * 4) There is a timing hole after the checks of the res_count, before 954 * the map lock is taken, during which a swapin may get the lock 955 * before a swapout about to happen. If this happens, the swapin 956 * will detect the state and increment the reference count, causing 957 * the swapout to be a nop, thereby delaying it until a later 958 * vm_map_deallocate. If the swapout gets the lock first, then 959 * the swapin will simply block until the swapout is done, and 960 * then proceed. 961 * 962 * Because vm_map_swapin() is potentially an expensive operation, it 963 * should be used with caution. 964 * 965 * Invariants: 966 * 1) A map with a residence count of zero is either swapped, or 967 * being swapped. 968 * 2) A map with a non-zero residence count is either resident, 969 * or being swapped in. 970 */ 971 972int vm_map_swap_enable = 1; 973 974void vm_map_swapin (vm_map_t map) 975{ 976 register vm_map_entry_t entry; 977 978 if (!vm_map_swap_enable) /* debug */ 979 return; 980 981 /* 982 * Map is locked 983 * First deal with various races. 984 */ 985 if (map->sw_state == MAP_SW_IN) 986 /* 987 * we raced with swapout and won. Returning will incr. 988 * the res_count, turning the swapout into a nop. 989 */ 990 return; 991 992 /* 993 * The residence count must be zero. If we raced with another 994 * swapin, the state would have been IN; if we raced with a 995 * swapout (after another competing swapin), we must have lost 996 * the race to get here (see above comment), in which case 997 * res_count is still 0. 998 */ 999 assert(map->res_count == 0); 1000 1001 /* 1002 * There are no intermediate states of a map going out or 1003 * coming in, since the map is locked during the transition. 1004 */ 1005 assert(map->sw_state == MAP_SW_OUT); 1006 1007 /* 1008 * We now operate upon each map entry. If the entry is a sub- 1009 * or share-map, we call vm_map_res_reference upon it. 1010 * If the entry is an object, we call vm_object_res_reference 1011 * (this may iterate through the shadow chain). 1012 * Note that we hold the map locked the entire time, 1013 * even if we get back here via a recursive call in 1014 * vm_map_res_reference. 1015 */ 1016 entry = vm_map_first_entry(map); 1017 1018 while (entry != vm_map_to_entry(map)) { 1019 if (entry->object.vm_object != VM_OBJECT_NULL) { 1020 if (entry->is_sub_map) { 1021 vm_map_t lmap = entry->object.sub_map; 1022 mutex_lock(&lmap->s_lock); 1023 vm_map_res_reference(lmap); 1024 mutex_unlock(&lmap->s_lock); 1025 } else { 1026 vm_object_t object = entry->object.vm_object; 1027 vm_object_lock(object); 1028 /* 1029 * This call may iterate through the 1030 * shadow chain. 1031 */ 1032 vm_object_res_reference(object); 1033 vm_object_unlock(object); 1034 } 1035 } 1036 entry = entry->vme_next; 1037 } 1038 assert(map->sw_state == MAP_SW_OUT); 1039 map->sw_state = MAP_SW_IN; 1040} 1041 1042void vm_map_swapout(vm_map_t map) 1043{ 1044 register vm_map_entry_t entry; 1045 1046 /* 1047 * Map is locked 1048 * First deal with various races. 1049 * If we raced with a swapin and lost, the residence count 1050 * will have been incremented to 1, and we simply return. 1051 */ 1052 mutex_lock(&map->s_lock); 1053 if (map->res_count != 0) { 1054 mutex_unlock(&map->s_lock); 1055 return; 1056 } 1057 mutex_unlock(&map->s_lock); 1058 1059 /* 1060 * There are no intermediate states of a map going out or 1061 * coming in, since the map is locked during the transition. 1062 */ 1063 assert(map->sw_state == MAP_SW_IN); 1064 1065 if (!vm_map_swap_enable) 1066 return; 1067 1068 /* 1069 * We now operate upon each map entry. If the entry is a sub- 1070 * or share-map, we call vm_map_res_deallocate upon it. 1071 * If the entry is an object, we call vm_object_res_deallocate 1072 * (this may iterate through the shadow chain). 1073 * Note that we hold the map locked the entire time, 1074 * even if we get back here via a recursive call in 1075 * vm_map_res_deallocate. 1076 */ 1077 entry = vm_map_first_entry(map); 1078 1079 while (entry != vm_map_to_entry(map)) { 1080 if (entry->object.vm_object != VM_OBJECT_NULL) { 1081 if (entry->is_sub_map) { 1082 vm_map_t lmap = entry->object.sub_map; 1083 mutex_lock(&lmap->s_lock); 1084 vm_map_res_deallocate(lmap); 1085 mutex_unlock(&lmap->s_lock); 1086 } else { 1087 vm_object_t object = entry->object.vm_object; 1088 vm_object_lock(object); 1089 /* 1090 * This call may take a long time, 1091 * since it could actively push 1092 * out pages (if we implement it 1093 * that way). 1094 */ 1095 vm_object_res_deallocate(object); 1096 vm_object_unlock(object); 1097 } 1098 } 1099 entry = entry->vme_next; 1100 } 1101 assert(map->sw_state == MAP_SW_IN); 1102 map->sw_state = MAP_SW_OUT; 1103} 1104 1105#endif /* TASK_SWAPPER */ 1106 1107 1108/* 1109 * SAVE_HINT_MAP_READ: 1110 * 1111 * Saves the specified entry as the hint for 1112 * future lookups. only a read lock is held on map, 1113 * so make sure the store is atomic... OSCompareAndSwap 1114 * guarantees this... also, we don't care if we collide 1115 * and someone else wins and stores their 'hint' 1116 */ 1117#define SAVE_HINT_MAP_READ(map,value) \ 1118 MACRO_BEGIN \ 1119 OSCompareAndSwap((UInt32)((map)->hint), (UInt32)value, (UInt32 *)(&(map)->hint)); \ 1120 MACRO_END 1121 1122 1123/* 1124 * SAVE_HINT_MAP_WRITE: 1125 * 1126 * Saves the specified entry as the hint for 1127 * future lookups. write lock held on map, 1128 * so no one else can be writing or looking 1129 * until the lock is dropped, so it's safe 1130 * to just do an assignment 1131 */ 1132#define SAVE_HINT_MAP_WRITE(map,value) \ 1133 MACRO_BEGIN \ 1134 (map)->hint = (value); \ 1135 MACRO_END 1136 1137/* 1138 * vm_map_lookup_entry: [ internal use only ] 1139 * 1140 * Finds the map entry containing (or 1141 * immediately preceding) the specified address 1142 * in the given map; the entry is returned 1143 * in the "entry" parameter. The boolean 1144 * result indicates whether the address is 1145 * actually contained in the map. 1146 */ 1147boolean_t 1148vm_map_lookup_entry( 1149 register vm_map_t map, 1150 register vm_map_offset_t address, 1151 vm_map_entry_t *entry) /* OUT */ 1152{ 1153 register vm_map_entry_t cur; 1154 register vm_map_entry_t last; 1155 1156 /* 1157 * Start looking either from the head of the 1158 * list, or from the hint. 1159 */ 1160 cur = map->hint; 1161 1162 if (cur == vm_map_to_entry(map)) 1163 cur = cur->vme_next; 1164 1165 if (address >= cur->vme_start) { 1166 /* 1167 * Go from hint to end of list. 1168 * 1169 * But first, make a quick check to see if 1170 * we are already looking at the entry we 1171 * want (which is usually the case). 1172 * Note also that we don't need to save the hint 1173 * here... it is the same hint (unless we are 1174 * at the header, in which case the hint didn't 1175 * buy us anything anyway). 1176 */ 1177 last = vm_map_to_entry(map); 1178 if ((cur != last) && (cur->vme_end > address)) { 1179 *entry = cur; 1180 return(TRUE); 1181 } 1182 } 1183 else { 1184 /* 1185 * Go from start to hint, *inclusively* 1186 */ 1187 last = cur->vme_next; 1188 cur = vm_map_first_entry(map); 1189 } 1190 1191 /* 1192 * Search linearly 1193 */ 1194 1195 while (cur != last) { 1196 if (cur->vme_end > address) { 1197 if (address >= cur->vme_start) { 1198 /* 1199 * Save this lookup for future 1200 * hints, and return 1201 */ 1202 1203 *entry = cur; 1204 SAVE_HINT_MAP_READ(map, cur); 1205 1206 return(TRUE); 1207 } 1208 break; 1209 } 1210 cur = cur->vme_next; 1211 } 1212 *entry = cur->vme_prev; 1213 SAVE_HINT_MAP_READ(map, *entry); 1214 1215 return(FALSE); 1216} 1217 1218/* 1219 * Routine: vm_map_find_space 1220 * Purpose: 1221 * Allocate a range in the specified virtual address map, 1222 * returning the entry allocated for that range. 1223 * Used by kmem_alloc, etc. 1224 * 1225 * The map must be NOT be locked. It will be returned locked 1226 * on KERN_SUCCESS, unlocked on failure. 1227 * 1228 * If an entry is allocated, the object/offset fields 1229 * are initialized to zero. 1230 */ 1231kern_return_t 1232vm_map_find_space( 1233 register vm_map_t map, 1234 vm_map_offset_t *address, /* OUT */ 1235 vm_map_size_t size, 1236 vm_map_offset_t mask, 1237 int flags, 1238 vm_map_entry_t *o_entry) /* OUT */ 1239{ 1240 register vm_map_entry_t entry, new_entry; 1241 register vm_map_offset_t start; 1242 register vm_map_offset_t end; 1243 1244 if (size == 0) { 1245 *address = 0; 1246 return KERN_INVALID_ARGUMENT; 1247 } 1248 1249 if (flags & VM_FLAGS_GUARD_AFTER) { 1250 /* account for the back guard page in the size */ 1251 size += PAGE_SIZE_64; 1252 } 1253 1254 new_entry = vm_map_entry_create(map); 1255 1256 /* 1257 * Look for the first possible address; if there's already 1258 * something at this address, we have to start after it. 1259 */ 1260 1261 vm_map_lock(map); 1262 1263 assert(first_free_is_valid(map)); 1264 if ((entry = map->first_free) == vm_map_to_entry(map)) 1265 start = map->min_offset; 1266 else 1267 start = entry->vme_end; 1268 1269 /* 1270 * In any case, the "entry" always precedes 1271 * the proposed new region throughout the loop: 1272 */ 1273 1274 while (TRUE) { 1275 register vm_map_entry_t next; 1276 1277 /* 1278 * Find the end of the proposed new region. 1279 * Be sure we didn't go beyond the end, or 1280 * wrap around the address. 1281 */ 1282 1283 if (flags & VM_FLAGS_GUARD_BEFORE) { 1284 /* reserve space for the front guard page */ 1285 start += PAGE_SIZE_64; 1286 } 1287 end = ((start + mask) & ~mask); 1288 1289 if (end < start) { 1290 vm_map_entry_dispose(map, new_entry); 1291 vm_map_unlock(map); 1292 return(KERN_NO_SPACE); 1293 } 1294 start = end; 1295 end += size; 1296 1297 if ((end > map->max_offset) || (end < start)) { 1298 vm_map_entry_dispose(map, new_entry); 1299 vm_map_unlock(map); 1300 return(KERN_NO_SPACE); 1301 } 1302 1303 /* 1304 * If there are no more entries, we must win. 1305 */ 1306 1307 next = entry->vme_next; 1308 if (next == vm_map_to_entry(map)) 1309 break; 1310 1311 /* 1312 * If there is another entry, it must be 1313 * after the end of the potential new region. 1314 */ 1315 1316 if (next->vme_start >= end) 1317 break; 1318 1319 /* 1320 * Didn't fit -- move to the next entry. 1321 */ 1322 1323 entry = next; 1324 start = entry->vme_end; 1325 } 1326 1327 /* 1328 * At this point, 1329 * "start" and "end" should define the endpoints of the 1330 * available new range, and 1331 * "entry" should refer to the region before the new 1332 * range, and 1333 * 1334 * the map should be locked. 1335 */ 1336 1337 if (flags & VM_FLAGS_GUARD_BEFORE) { 1338 /* go back for the front guard page */ 1339 start -= PAGE_SIZE_64; 1340 } 1341 *address = start; 1342 1343 new_entry->vme_start = start; 1344 new_entry->vme_end = end; 1345 assert(page_aligned(new_entry->vme_start)); 1346 assert(page_aligned(new_entry->vme_end)); 1347 1348 new_entry->is_shared = FALSE; 1349 new_entry->is_sub_map = FALSE; 1350 new_entry->use_pmap = FALSE; 1351 new_entry->object.vm_object = VM_OBJECT_NULL; 1352 new_entry->offset = (vm_object_offset_t) 0; 1353 1354 new_entry->needs_copy = FALSE; 1355 1356 new_entry->inheritance = VM_INHERIT_DEFAULT; 1357 new_entry->protection = VM_PROT_DEFAULT; 1358 new_entry->max_protection = VM_PROT_ALL; 1359 new_entry->behavior = VM_BEHAVIOR_DEFAULT; 1360 new_entry->wired_count = 0; 1361 new_entry->user_wired_count = 0; 1362 1363 new_entry->in_transition = FALSE; 1364 new_entry->needs_wakeup = FALSE; 1365 new_entry->no_cache = FALSE; 1366 1367 new_entry->alias = 0; 1368 1369 VM_GET_FLAGS_ALIAS(flags, new_entry->alias); 1370 1371 /* 1372 * Insert the new entry into the list 1373 */ 1374 1375 vm_map_entry_link(map, entry, new_entry); 1376 1377 map->size += size; 1378 1379 /* 1380 * Update the lookup hint 1381 */ 1382 SAVE_HINT_MAP_WRITE(map, new_entry); 1383 1384 *o_entry = new_entry; 1385 return(KERN_SUCCESS); 1386} 1387 1388int vm_map_pmap_enter_print = FALSE; 1389int vm_map_pmap_enter_enable = FALSE; 1390 1391/* 1392 * Routine: vm_map_pmap_enter [internal only] 1393 * 1394 * Description: 1395 * Force pages from the specified object to be entered into 1396 * the pmap at the specified address if they are present. 1397 * As soon as a page not found in the object the scan ends. 1398 * 1399 * Returns: 1400 * Nothing. 1401 * 1402 * In/out conditions: 1403 * The source map should not be locked on entry. 1404 */ 1405static void 1406vm_map_pmap_enter( 1407 vm_map_t map, 1408 register vm_map_offset_t addr, 1409 register vm_map_offset_t end_addr, 1410 register vm_object_t object, 1411 vm_object_offset_t offset, 1412 vm_prot_t protection) 1413{ 1414 int type_of_fault; 1415 kern_return_t kr; 1416 1417 if(map->pmap == 0) 1418 return; 1419 1420 while (addr < end_addr) { 1421 register vm_page_t m; 1422 1423 vm_object_lock(object); 1424 1425 m = vm_page_lookup(object, offset); 1426 /* 1427 * ENCRYPTED SWAP: 1428 * The user should never see encrypted data, so do not 1429 * enter an encrypted page in the page table. 1430 */ 1431 if (m == VM_PAGE_NULL || m->busy || m->encrypted || 1432 m->fictitious || 1433 (m->unusual && ( m->error || m->restart || m->absent))) { 1434 vm_object_unlock(object); 1435 return; 1436 } 1437 1438 if (vm_map_pmap_enter_print) { 1439 printf("vm_map_pmap_enter:"); 1440 printf("map: %p, addr: %llx, object: %p, offset: %llx\n", 1441 map, (unsigned long long)addr, object, (unsigned long long)offset); 1442 } 1443 type_of_fault = DBG_CACHE_HIT_FAULT; 1444 kr = vm_fault_enter(m, map->pmap, addr, protection, 1445 m->wire_count != 0, FALSE, FALSE, 1446 &type_of_fault); 1447 1448 vm_object_unlock(object); 1449 1450 offset += PAGE_SIZE_64; 1451 addr += PAGE_SIZE; 1452 } 1453} 1454 1455boolean_t vm_map_pmap_is_empty( 1456 vm_map_t map, 1457 vm_map_offset_t start, 1458 vm_map_offset_t end); 1459boolean_t vm_map_pmap_is_empty( 1460 vm_map_t map, 1461 vm_map_offset_t start, 1462 vm_map_offset_t end) 1463{ 1464#ifdef MACHINE_PMAP_IS_EMPTY 1465 return pmap_is_empty(map->pmap, start, end); 1466#else /* MACHINE_PMAP_IS_EMPTY */ 1467 vm_map_offset_t offset; 1468 ppnum_t phys_page; 1469 1470 if (map->pmap == NULL) { 1471 return TRUE; 1472 } 1473 1474 for (offset = start; 1475 offset < end; 1476 offset += PAGE_SIZE) { 1477 phys_page = pmap_find_phys(map->pmap, offset); 1478 if (phys_page) { 1479 kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): " 1480 "page %d at 0x%llx\n", 1481 map, (long long)start, (long long)end, 1482 phys_page, (long long)offset); 1483 return FALSE; 1484 } 1485 } 1486 return TRUE; 1487#endif /* MACHINE_PMAP_IS_EMPTY */ 1488} 1489 1490/* 1491 * Routine: vm_map_enter 1492 * 1493 * Description: 1494 * Allocate a range in the specified virtual address map. 1495 * The resulting range will refer to memory defined by 1496 * the given memory object and offset into that object. 1497 * 1498 * Arguments are as defined in the vm_map call. 1499 */ 1500int _map_enter_debug = 0; 1501static unsigned int vm_map_enter_restore_successes = 0; 1502static unsigned int vm_map_enter_restore_failures = 0; 1503kern_return_t 1504vm_map_enter( 1505 vm_map_t map, 1506 vm_map_offset_t *address, /* IN/OUT */ 1507 vm_map_size_t size, 1508 vm_map_offset_t mask, 1509 int flags, 1510 vm_object_t object, 1511 vm_object_offset_t offset, 1512 boolean_t needs_copy, 1513 vm_prot_t cur_protection, 1514 vm_prot_t max_protection, 1515 vm_inherit_t inheritance) 1516{ 1517 vm_map_entry_t entry, new_entry; 1518 vm_map_offset_t start, tmp_start, tmp_offset; 1519 vm_map_offset_t end, tmp_end; 1520 kern_return_t result = KERN_SUCCESS; 1521 vm_map_t zap_old_map = VM_MAP_NULL; 1522 vm_map_t zap_new_map = VM_MAP_NULL; 1523 boolean_t map_locked = FALSE; 1524 boolean_t pmap_empty = TRUE; 1525 boolean_t new_mapping_established = FALSE; 1526 boolean_t anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0); 1527 boolean_t purgable = ((flags & VM_FLAGS_PURGABLE) != 0); 1528 boolean_t overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0); 1529 boolean_t no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0); 1530 boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0); 1531 char alias; 1532 vm_map_offset_t effective_min_offset, effective_max_offset; 1533 kern_return_t kr; 1534 1535#if CONFIG_EMBEDDED 1536 if (cur_protection & VM_PROT_WRITE) { 1537 if (cur_protection & VM_PROT_EXECUTE) { 1538 printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); 1539 cur_protection &= ~VM_PROT_EXECUTE; 1540 } 1541 } 1542 if (max_protection & VM_PROT_WRITE) { 1543 if (max_protection & VM_PROT_EXECUTE) { 1544 /* Right now all kinds of data segments are RWX. No point in logging that. */ 1545 /* printf("EMBEDDED: %s maxprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); */ 1546 1547 /* Try to take a hint from curprot. If curprot is not writable, 1548 * make maxprot not writable. Otherwise make it not executable. 1549 */ 1550 if((cur_protection & VM_PROT_WRITE) == 0) { 1551 max_protection &= ~VM_PROT_WRITE; 1552 } else { 1553 max_protection &= ~VM_PROT_EXECUTE; 1554 } 1555 } 1556 } 1557 assert ((cur_protection | max_protection) == max_protection); 1558#endif /* CONFIG_EMBEDDED */ 1559 1560 if (is_submap) { 1561 if (purgable) { 1562 /* submaps can not be purgeable */ 1563 return KERN_INVALID_ARGUMENT; 1564 } 1565 if (object == VM_OBJECT_NULL) { 1566 /* submaps can not be created lazily */ 1567 return KERN_INVALID_ARGUMENT; 1568 } 1569 } 1570 if (flags & VM_FLAGS_ALREADY) { 1571 /* 1572 * VM_FLAGS_ALREADY says that it's OK if the same mapping 1573 * is already present. For it to be meaningul, the requested 1574 * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and 1575 * we shouldn't try and remove what was mapped there first 1576 * (!VM_FLAGS_OVERWRITE). 1577 */ 1578 if ((flags & VM_FLAGS_ANYWHERE) || 1579 (flags & VM_FLAGS_OVERWRITE)) { 1580 return KERN_INVALID_ARGUMENT; 1581 } 1582 } 1583 1584 effective_min_offset = map->min_offset; 1585 if (flags & VM_FLAGS_BEYOND_MAX) { 1586 /* 1587 * Allow an insertion beyond the map's official top boundary. 1588 */ 1589 if (vm_map_is_64bit(map)) 1590 effective_max_offset = 0xFFFFFFFFFFFFF000ULL; 1591 else 1592 effective_max_offset = 0x00000000FFFFF000ULL; 1593 } else { 1594 effective_max_offset = map->max_offset; 1595 } 1596 1597 if (size == 0 || 1598 (offset & PAGE_MASK_64) != 0) { 1599 *address = 0; 1600 return KERN_INVALID_ARGUMENT; 1601 } 1602 1603 VM_GET_FLAGS_ALIAS(flags, alias); 1604 1605#define RETURN(value) { result = value; goto BailOut; } 1606 1607 assert(page_aligned(*address)); 1608 assert(page_aligned(size)); 1609 1610 /* 1611 * Only zero-fill objects are allowed to be purgable. 1612 * LP64todo - limit purgable objects to 32-bits for now 1613 */ 1614 if (purgable && 1615 (offset != 0 || 1616 (object != VM_OBJECT_NULL && 1617 (object->size != size || 1618 object->purgable == VM_PURGABLE_DENY)) 1619 || size > VM_MAX_ADDRESS)) /* LP64todo: remove when dp capable */ 1620 return KERN_INVALID_ARGUMENT; 1621 1622 if (!anywhere && overwrite) { 1623 /* 1624 * Create a temporary VM map to hold the old mappings in the 1625 * affected area while we create the new one. 1626 * This avoids releasing the VM map lock in 1627 * vm_map_entry_delete() and allows atomicity 1628 * when we want to replace some mappings with a new one. 1629 * It also allows us to restore the old VM mappings if the 1630 * new mapping fails. 1631 */ 1632 zap_old_map = vm_map_create(PMAP_NULL, 1633 *address, 1634 *address + size, 1635 TRUE); 1636 } 1637 1638StartAgain: ; 1639 1640 start = *address; 1641 1642 if (anywhere) { 1643 vm_map_lock(map); 1644 map_locked = TRUE; 1645 1646 /* 1647 * Calculate the first possible address. 1648 */ 1649 1650 if (start < effective_min_offset) 1651 start = effective_min_offset; 1652 if (start > effective_max_offset) 1653 RETURN(KERN_NO_SPACE); 1654 1655 /* 1656 * Look for the first possible address; 1657 * if there's already something at this 1658 * address, we have to start after it. 1659 */ 1660 1661 assert(first_free_is_valid(map)); 1662 if (start == effective_min_offset) { 1663 if ((entry = map->first_free) != vm_map_to_entry(map)) 1664 start = entry->vme_end; 1665 } else { 1666 vm_map_entry_t tmp_entry; 1667 if (vm_map_lookup_entry(map, start, &tmp_entry)) 1668 start = tmp_entry->vme_end; 1669 entry = tmp_entry; 1670 } 1671 1672 /* 1673 * In any case, the "entry" always precedes 1674 * the proposed new region throughout the 1675 * loop: 1676 */ 1677 1678 while (TRUE) { 1679 register vm_map_entry_t next; 1680 1681 /* 1682 * Find the end of the proposed new region. 1683 * Be sure we didn't go beyond the end, or 1684 * wrap around the address. 1685 */ 1686 1687 end = ((start + mask) & ~mask); 1688 if (end < start) 1689 RETURN(KERN_NO_SPACE); 1690 start = end; 1691 end += size; 1692 1693 if ((end > effective_max_offset) || (end < start)) { 1694 if (map->wait_for_space) { 1695 if (size <= (effective_max_offset - 1696 effective_min_offset)) { 1697 assert_wait((event_t)map, 1698 THREAD_ABORTSAFE); 1699 vm_map_unlock(map); 1700 map_locked = FALSE; 1701 thread_block(THREAD_CONTINUE_NULL); 1702 goto StartAgain; 1703 } 1704 } 1705 RETURN(KERN_NO_SPACE); 1706 } 1707 1708 /* 1709 * If there are no more entries, we must win. 1710 */ 1711 1712 next = entry->vme_next; 1713 if (next == vm_map_to_entry(map)) 1714 break; 1715 1716 /* 1717 * If there is another entry, it must be 1718 * after the end of the potential new region. 1719 */ 1720 1721 if (next->vme_start >= end) 1722 break; 1723 1724 /* 1725 * Didn't fit -- move to the next entry. 1726 */ 1727 1728 entry = next; 1729 start = entry->vme_end; 1730 } 1731 *address = start; 1732 } else { 1733 /* 1734 * Verify that: 1735 * the address doesn't itself violate 1736 * the mask requirement. 1737 */ 1738 1739 vm_map_lock(map); 1740 map_locked = TRUE; 1741 if ((start & mask) != 0) 1742 RETURN(KERN_NO_SPACE); 1743 1744 /* 1745 * ... the address is within bounds 1746 */ 1747 1748 end = start + size; 1749 1750 if ((start < effective_min_offset) || 1751 (end > effective_max_offset) || 1752 (start >= end)) { 1753 RETURN(KERN_INVALID_ADDRESS); 1754 } 1755 1756 if (overwrite && zap_old_map != VM_MAP_NULL) { 1757 /* 1758 * Fixed mapping and "overwrite" flag: attempt to 1759 * remove all existing mappings in the specified 1760 * address range, saving them in our "zap_old_map". 1761 */ 1762 (void) vm_map_delete(map, start, end, 1763 VM_MAP_REMOVE_SAVE_ENTRIES, 1764 zap_old_map); 1765 } 1766 1767 /* 1768 * ... the starting address isn't allocated 1769 */ 1770 1771 if (vm_map_lookup_entry(map, start, &entry)) { 1772 if (! (flags & VM_FLAGS_ALREADY)) { 1773 RETURN(KERN_NO_SPACE); 1774 } 1775 /* 1776 * Check if what's already there is what we want. 1777 */ 1778 tmp_start = start; 1779 tmp_offset = offset; 1780 if (entry->vme_start < start) { 1781 tmp_start -= start - entry->vme_start; 1782 tmp_offset -= start - entry->vme_start; 1783 1784 } 1785 for (; entry->vme_start < end; 1786 entry = entry->vme_next) { 1787 /* 1788 * Check if the mapping's attributes 1789 * match the existing map entry. 1790 */ 1791 if (entry == vm_map_to_entry(map) || 1792 entry->vme_start != tmp_start || 1793 entry->is_sub_map != is_submap || 1794 entry->offset != tmp_offset || 1795 entry->needs_copy != needs_copy || 1796 entry->protection != cur_protection || 1797 entry->max_protection != max_protection || 1798 entry->inheritance != inheritance || 1799 entry->alias != alias) { 1800 /* not the same mapping ! */ 1801 RETURN(KERN_NO_SPACE); 1802 } 1803 /* 1804 * Check if the same object is being mapped. 1805 */ 1806 if (is_submap) { 1807 if (entry->object.sub_map != 1808 (vm_map_t) object) { 1809 /* not the same submap */ 1810 RETURN(KERN_NO_SPACE); 1811 } 1812 } else { 1813 if (entry->object.vm_object != object) { 1814 /* not the same VM object... */ 1815 vm_object_t obj2; 1816 1817 obj2 = entry->object.vm_object; 1818 if ((obj2 == VM_OBJECT_NULL || 1819 obj2->internal) && 1820 (object == VM_OBJECT_NULL || 1821 object->internal)) { 1822 /* 1823 * ... but both are 1824 * anonymous memory, 1825 * so equivalent. 1826 */ 1827 } else { 1828 RETURN(KERN_NO_SPACE); 1829 } 1830 } 1831 } 1832 1833 tmp_offset += entry->vme_end - entry->vme_start; 1834 tmp_start += entry->vme_end - entry->vme_start; 1835 if (entry->vme_end >= end) { 1836 /* reached the end of our mapping */ 1837 break; 1838 } 1839 } 1840 /* it all matches: let's use what's already there ! */ 1841 RETURN(KERN_MEMORY_PRESENT); 1842 } 1843 1844 /* 1845 * ... the next region doesn't overlap the 1846 * end point. 1847 */ 1848 1849 if ((entry->vme_next != vm_map_to_entry(map)) && 1850 (entry->vme_next->vme_start < end)) 1851 RETURN(KERN_NO_SPACE); 1852 } 1853 1854 /* 1855 * At this point, 1856 * "start" and "end" should define the endpoints of the 1857 * available new range, and 1858 * "entry" should refer to the region before the new 1859 * range, and 1860 * 1861 * the map should be locked. 1862 */ 1863 1864 /* 1865 * See whether we can avoid creating a new entry (and object) by 1866 * extending one of our neighbors. [So far, we only attempt to 1867 * extend from below.] Note that we can never extend/join 1868 * purgable objects because they need to remain distinct 1869 * entities in order to implement their "volatile object" 1870 * semantics. 1871 */ 1872 1873 if (purgable) { 1874 if (object == VM_OBJECT_NULL) { 1875 object = vm_object_allocate(size); 1876 object->copy_strategy = MEMORY_OBJECT_COPY_NONE; 1877 object->purgable = VM_PURGABLE_NONVOLATILE; 1878 offset = (vm_object_offset_t)0; 1879 } 1880 } else if ((is_submap == FALSE) && 1881 (object == VM_OBJECT_NULL) && 1882 (entry != vm_map_to_entry(map)) && 1883 (entry->vme_end == start) && 1884 (!entry->is_shared) && 1885 (!entry->is_sub_map) && 1886 (entry->alias == alias) && 1887 (entry->inheritance == inheritance) && 1888 (entry->protection == cur_protection) && 1889 (entry->max_protection == max_protection) && 1890 (entry->behavior == VM_BEHAVIOR_DEFAULT) && 1891 (entry->in_transition == 0) && 1892 (entry->no_cache == no_cache) && 1893 ((alias == VM_MEMORY_REALLOC) || 1894 ((entry->vme_end - entry->vme_start) + size < NO_COALESCE_LIMIT)) && 1895 (entry->wired_count == 0)) { /* implies user_wired_count == 0 */ 1896 if (vm_object_coalesce(entry->object.vm_object, 1897 VM_OBJECT_NULL, 1898 entry->offset, 1899 (vm_object_offset_t) 0, 1900 (vm_map_size_t)(entry->vme_end - entry->vme_start), 1901 (vm_map_size_t)(end - entry->vme_end))) { 1902 1903 /* 1904 * Coalesced the two objects - can extend 1905 * the previous map entry to include the 1906 * new range. 1907 */ 1908 map->size += (end - entry->vme_end); 1909 entry->vme_end = end; 1910 UPDATE_FIRST_FREE(map, map->first_free); 1911 RETURN(KERN_SUCCESS); 1912 } 1913 } 1914 1915 /* 1916 * Create a new entry 1917 * LP64todo - for now, we can only allocate 4GB internal objects 1918 * because the default pager can't page bigger ones. Remove this 1919 * when it can. 1920 * 1921 * XXX FBDP 1922 * The reserved "page zero" in each process's address space can 1923 * be arbitrarily large. Splitting it into separate 4GB objects and 1924 * therefore different VM map entries serves no purpose and just 1925 * slows down operations on the VM map, so let's not split the 1926 * allocation into 4GB chunks if the max protection is NONE. That 1927 * memory should never be accessible, so it will never get to the 1928 * default pager. 1929 */ 1930 tmp_start = start; 1931 if (object == VM_OBJECT_NULL && 1932 size > (vm_map_size_t)VM_MAX_ADDRESS && 1933 max_protection != VM_PROT_NONE) 1934 tmp_end = tmp_start + (vm_map_size_t)VM_MAX_ADDRESS; 1935 else 1936 tmp_end = end; 1937 do { 1938 new_entry = vm_map_entry_insert(map, entry, tmp_start, tmp_end, 1939 object, offset, needs_copy, 1940 FALSE, FALSE, 1941 cur_protection, max_protection, 1942 VM_BEHAVIOR_DEFAULT, 1943 inheritance, 0, no_cache); 1944 new_entry->alias = alias; 1945 if (is_submap) { 1946 vm_map_t submap; 1947 boolean_t submap_is_64bit; 1948 boolean_t use_pmap; 1949 1950 new_entry->is_sub_map = TRUE; 1951 submap = (vm_map_t) object; 1952 submap_is_64bit = vm_map_is_64bit(submap); 1953 use_pmap = (alias == VM_MEMORY_SHARED_PMAP); 1954#ifndef NO_NESTED_PMAP 1955 if (use_pmap && submap->pmap == NULL) { 1956 /* we need a sub pmap to nest... */ 1957 submap->pmap = pmap_create(0, submap_is_64bit); 1958 if (submap->pmap == NULL) { 1959 /* let's proceed without nesting... */ 1960 } 1961 } 1962 if (use_pmap && submap->pmap != NULL) { 1963 kr = pmap_nest(map->pmap, 1964 submap->pmap, 1965 tmp_start, 1966 tmp_start, 1967 tmp_end - tmp_start); 1968 if (kr != KERN_SUCCESS) { 1969 printf("vm_map_enter: " 1970 "pmap_nest(0x%llx,0x%llx) " 1971 "error 0x%x\n", 1972 (long long)tmp_start, 1973 (long long)tmp_end, 1974 kr); 1975 } else { 1976 /* we're now nested ! */ 1977 new_entry->use_pmap = TRUE; 1978 pmap_empty = FALSE; 1979 } 1980 } 1981#endif /* NO_NESTED_PMAP */ 1982 } 1983 entry = new_entry; 1984 } while (tmp_end != end && 1985 (tmp_start = tmp_end) && 1986 (tmp_end = (end - tmp_end > (vm_map_size_t)VM_MAX_ADDRESS) ? 1987 tmp_end + (vm_map_size_t)VM_MAX_ADDRESS : end)); 1988 1989 vm_map_unlock(map); 1990 map_locked = FALSE; 1991 1992 new_mapping_established = TRUE; 1993 1994 /* Wire down the new entry if the user 1995 * requested all new map entries be wired. 1996 */ 1997 if (map->wiring_required) { 1998 pmap_empty = FALSE; /* pmap won't be empty */ 1999 result = vm_map_wire(map, start, end, 2000 new_entry->protection, TRUE); 2001 RETURN(result); 2002 } 2003 2004 if ((object != VM_OBJECT_NULL) && 2005 (vm_map_pmap_enter_enable) && 2006 (!anywhere) && 2007 (!needs_copy) && 2008 (size < (128*1024))) { 2009 pmap_empty = FALSE; /* pmap won't be empty */ 2010 2011 if (override_nx(map, alias) && cur_protection) 2012 cur_protection |= VM_PROT_EXECUTE; 2013 2014 vm_map_pmap_enter(map, start, end, 2015 object, offset, cur_protection); 2016 } 2017 2018BailOut: ; 2019 if (result == KERN_SUCCESS) { 2020 vm_prot_t pager_prot; 2021 memory_object_t pager; 2022 2023 if (pmap_empty && 2024 !(flags & VM_FLAGS_NO_PMAP_CHECK)) { 2025 assert(vm_map_pmap_is_empty(map, 2026 *address, 2027 *address+size)); 2028 } 2029 2030 /* 2031 * For "named" VM objects, let the pager know that the 2032 * memory object is being mapped. Some pagers need to keep 2033 * track of this, to know when they can reclaim the memory 2034 * object, for example. 2035 * VM calls memory_object_map() for each mapping (specifying 2036 * the protection of each mapping) and calls 2037 * memory_object_last_unmap() when all the mappings are gone. 2038 */ 2039 pager_prot = max_protection; 2040 if (needs_copy) { 2041 /* 2042 * Copy-On-Write mapping: won't modify 2043 * the memory object. 2044 */ 2045 pager_prot &= ~VM_PROT_WRITE; 2046 } 2047 if (!is_submap && 2048 object != VM_OBJECT_NULL && 2049 object->named && 2050 object->pager != MEMORY_OBJECT_NULL) { 2051 vm_object_lock(object); 2052 pager = object->pager; 2053 if (object->named && 2054 pager != MEMORY_OBJECT_NULL) { 2055 assert(object->pager_ready); 2056 vm_object_mapping_wait(object, THREAD_UNINT); 2057 vm_object_mapping_begin(object); 2058 vm_object_unlock(object); 2059 2060 kr = memory_object_map(pager, pager_prot); 2061 assert(kr == KERN_SUCCESS); 2062 2063 vm_object_lock(object); 2064 vm_object_mapping_end(object); 2065 } 2066 vm_object_unlock(object); 2067 } 2068 } else { 2069 if (new_mapping_established) { 2070 /* 2071 * We have to get rid of the new mappings since we 2072 * won't make them available to the user. 2073 * Try and do that atomically, to minimize the risk 2074 * that someone else create new mappings that range. 2075 */ 2076 zap_new_map = vm_map_create(PMAP_NULL, 2077 *address, 2078 *address + size, 2079 TRUE); 2080 if (!map_locked) { 2081 vm_map_lock(map); 2082 map_locked = TRUE; 2083 } 2084 (void) vm_map_delete(map, *address, *address+size, 2085 VM_MAP_REMOVE_SAVE_ENTRIES, 2086 zap_new_map); 2087 } 2088 if (zap_old_map != VM_MAP_NULL && 2089 zap_old_map->hdr.nentries != 0) { 2090 vm_map_entry_t entry1, entry2; 2091 2092 /* 2093 * The new mapping failed. Attempt to restore 2094 * the old mappings, saved in the "zap_old_map". 2095 */ 2096 if (!map_locked) { 2097 vm_map_lock(map); 2098 map_locked = TRUE; 2099 } 2100 2101 /* first check if the coast is still clear */ 2102 start = vm_map_first_entry(zap_old_map)->vme_start; 2103 end = vm_map_last_entry(zap_old_map)->vme_end; 2104 if (vm_map_lookup_entry(map, start, &entry1) || 2105 vm_map_lookup_entry(map, end, &entry2) || 2106 entry1 != entry2) { 2107 /* 2108 * Part of that range has already been 2109 * re-mapped: we can't restore the old 2110 * mappings... 2111 */ 2112 vm_map_enter_restore_failures++; 2113 } else { 2114 /* 2115 * Transfer the saved map entries from 2116 * "zap_old_map" to the original "map", 2117 * inserting them all after "entry1". 2118 */ 2119 for (entry2 = vm_map_first_entry(zap_old_map); 2120 entry2 != vm_map_to_entry(zap_old_map); 2121 entry2 = vm_map_first_entry(zap_old_map)) { 2122 vm_map_size_t entry_size; 2123 2124 entry_size = (entry2->vme_end - 2125 entry2->vme_start); 2126 vm_map_entry_unlink(zap_old_map, 2127 entry2); 2128 zap_old_map->size -= entry_size; 2129 vm_map_entry_link(map, entry1, entry2); 2130 map->size += entry_size; 2131 entry1 = entry2; 2132 } 2133 if (map->wiring_required) { 2134 /* 2135 * XXX TODO: we should rewire the 2136 * old pages here... 2137 */ 2138 } 2139 vm_map_enter_restore_successes++; 2140 } 2141 } 2142 } 2143 2144 if (map_locked) { 2145 vm_map_unlock(map); 2146 } 2147 2148 /* 2149 * Get rid of the "zap_maps" and all the map entries that 2150 * they may still contain. 2151 */ 2152 if (zap_old_map != VM_MAP_NULL) { 2153 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); 2154 zap_old_map = VM_MAP_NULL; 2155 } 2156 if (zap_new_map != VM_MAP_NULL) { 2157 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP); 2158 zap_new_map = VM_MAP_NULL; 2159 } 2160 2161 return result; 2162 2163#undef RETURN 2164} 2165 2166kern_return_t 2167vm_map_enter_mem_object( 2168 vm_map_t target_map, 2169 vm_map_offset_t *address, 2170 vm_map_size_t initial_size, 2171 vm_map_offset_t mask, 2172 int flags, 2173 ipc_port_t port, 2174 vm_object_offset_t offset, 2175 boolean_t copy, 2176 vm_prot_t cur_protection, 2177 vm_prot_t max_protection, 2178 vm_inherit_t inheritance) 2179{ 2180 vm_map_address_t map_addr; 2181 vm_map_size_t map_size; 2182 vm_object_t object; 2183 vm_object_size_t size; 2184 kern_return_t result; 2185 2186 /* 2187 * Check arguments for validity 2188 */ 2189 if ((target_map == VM_MAP_NULL) || 2190 (cur_protection & ~VM_PROT_ALL) || 2191 (max_protection & ~VM_PROT_ALL) || 2192 (inheritance > VM_INHERIT_LAST_VALID) || 2193 initial_size == 0) 2194 return KERN_INVALID_ARGUMENT; 2195 2196 map_addr = vm_map_trunc_page(*address); 2197 map_size = vm_map_round_page(initial_size); 2198 size = vm_object_round_page(initial_size); 2199 2200 /* 2201 * Find the vm object (if any) corresponding to this port. 2202 */ 2203 if (!IP_VALID(port)) { 2204 object = VM_OBJECT_NULL; 2205 offset = 0; 2206 copy = FALSE; 2207 } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) { 2208 vm_named_entry_t named_entry; 2209 2210 named_entry = (vm_named_entry_t) port->ip_kobject; 2211 /* a few checks to make sure user is obeying rules */ 2212 if (size == 0) { 2213 if (offset >= named_entry->size) 2214 return KERN_INVALID_RIGHT; 2215 size = named_entry->size - offset; 2216 } 2217 if ((named_entry->protection & max_protection) != 2218 max_protection) 2219 return KERN_INVALID_RIGHT; 2220 if ((named_entry->protection & cur_protection) != 2221 cur_protection) 2222 return KERN_INVALID_RIGHT; 2223 if (named_entry->size < (offset + size)) 2224 return KERN_INVALID_ARGUMENT; 2225 2226 /* the callers parameter offset is defined to be the */ 2227 /* offset from beginning of named entry offset in object */ 2228 offset = offset + named_entry->offset; 2229 2230 named_entry_lock(named_entry); 2231 if (named_entry->is_sub_map) { 2232 vm_map_t submap; 2233 2234 submap = named_entry->backing.map; 2235 vm_map_lock(submap); 2236 vm_map_reference(submap); 2237 vm_map_unlock(submap); 2238 named_entry_unlock(named_entry); 2239 2240 result = vm_map_enter(target_map, 2241 &map_addr, 2242 map_size, 2243 mask, 2244 flags | VM_FLAGS_SUBMAP, 2245 (vm_object_t) submap, 2246 offset, 2247 copy, 2248 cur_protection, 2249 max_protection, 2250 inheritance); 2251 if (result != KERN_SUCCESS) { 2252 vm_map_deallocate(submap); 2253 } else { 2254 /* 2255 * No need to lock "submap" just to check its 2256 * "mapped" flag: that flag is never reset 2257 * once it's been set and if we race, we'll 2258 * just end up setting it twice, which is OK. 2259 */ 2260 if (submap->mapped == FALSE) { 2261 /* 2262 * This submap has never been mapped. 2263 * Set its "mapped" flag now that it 2264 * has been mapped. 2265 * This happens only for the first ever 2266 * mapping of a "submap". 2267 */ 2268 vm_map_lock(submap); 2269 submap->mapped = TRUE; 2270 vm_map_unlock(submap); 2271 } 2272 *address = map_addr; 2273 } 2274 return result; 2275 2276 } else if (named_entry->is_pager) { 2277 unsigned int access; 2278 vm_prot_t protections; 2279 unsigned int wimg_mode; 2280 boolean_t cache_attr; 2281 2282 protections = named_entry->protection & VM_PROT_ALL; 2283 access = GET_MAP_MEM(named_entry->protection); 2284 2285 object = vm_object_enter(named_entry->backing.pager, 2286 named_entry->size, 2287 named_entry->internal, 2288 FALSE, 2289 FALSE); 2290 if (object == VM_OBJECT_NULL) { 2291 named_entry_unlock(named_entry); 2292 return KERN_INVALID_OBJECT; 2293 } 2294 2295 /* JMM - drop reference on pager here */ 2296 2297 /* create an extra ref for the named entry */ 2298 vm_object_lock(object); 2299 vm_object_reference_locked(object); 2300 named_entry->backing.object = object; 2301 named_entry->is_pager = FALSE; 2302 named_entry_unlock(named_entry); 2303 2304 wimg_mode = object->wimg_bits; 2305 if (access == MAP_MEM_IO) { 2306 wimg_mode = VM_WIMG_IO; 2307 } else if (access == MAP_MEM_COPYBACK) { 2308 wimg_mode = VM_WIMG_USE_DEFAULT; 2309 } else if (access == MAP_MEM_WTHRU) { 2310 wimg_mode = VM_WIMG_WTHRU; 2311 } else if (access == MAP_MEM_WCOMB) { 2312 wimg_mode = VM_WIMG_WCOMB; 2313 } 2314 if (wimg_mode == VM_WIMG_IO || 2315 wimg_mode == VM_WIMG_WCOMB) 2316 cache_attr = TRUE; 2317 else 2318 cache_attr = FALSE; 2319 2320 /* wait for object (if any) to be ready */ 2321 if (!named_entry->internal) { 2322 while (!object->pager_ready) { 2323 vm_object_wait( 2324 object, 2325 VM_OBJECT_EVENT_PAGER_READY, 2326 THREAD_UNINT); 2327 vm_object_lock(object); 2328 } 2329 } 2330 2331 if (object->wimg_bits != wimg_mode) { 2332 vm_page_t p; 2333 2334 vm_object_paging_wait(object, THREAD_UNINT); 2335 2336 object->wimg_bits = wimg_mode; 2337 queue_iterate(&object->memq, p, vm_page_t, listq) { 2338 if (!p->fictitious) { 2339 if (p->pmapped) 2340 pmap_disconnect(p->phys_page); 2341 if (cache_attr) 2342 pmap_sync_page_attributes_phys(p->phys_page); 2343 } 2344 } 2345 } 2346 object->true_share = TRUE; 2347 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) 2348 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; 2349 vm_object_unlock(object); 2350 } else { 2351 /* This is the case where we are going to map */ 2352 /* an already mapped object. If the object is */ 2353 /* not ready it is internal. An external */ 2354 /* object cannot be mapped until it is ready */ 2355 /* we can therefore avoid the ready check */ 2356 /* in this case. */ 2357 object = named_entry->backing.object; 2358 assert(object != VM_OBJECT_NULL); 2359 named_entry_unlock(named_entry); 2360 vm_object_reference(object); 2361 } 2362 } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) { 2363 /* 2364 * JMM - This is temporary until we unify named entries 2365 * and raw memory objects. 2366 * 2367 * Detected fake ip_kotype for a memory object. In 2368 * this case, the port isn't really a port at all, but 2369 * instead is just a raw memory object. 2370 */ 2371 2372 object = vm_object_enter((memory_object_t)port, 2373 size, FALSE, FALSE, FALSE); 2374 if (object == VM_OBJECT_NULL) 2375 return KERN_INVALID_OBJECT; 2376 2377 /* wait for object (if any) to be ready */ 2378 if (object != VM_OBJECT_NULL) { 2379 if (object == kernel_object) { 2380 printf("Warning: Attempt to map kernel object" 2381 " by a non-private kernel entity\n"); 2382 return KERN_INVALID_OBJECT; 2383 } 2384 vm_object_lock(object); 2385 while (!object->pager_ready) { 2386 vm_object_wait(object, 2387 VM_OBJECT_EVENT_PAGER_READY, 2388 THREAD_UNINT); 2389 vm_object_lock(object); 2390 } 2391 vm_object_unlock(object); 2392 } 2393 } else { 2394 return KERN_INVALID_OBJECT; 2395 } 2396 2397 if (object != VM_OBJECT_NULL && 2398 object->named && 2399 object->pager != MEMORY_OBJECT_NULL && 2400 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { 2401 memory_object_t pager; 2402 vm_prot_t pager_prot; 2403 kern_return_t kr; 2404 2405 /* 2406 * For "named" VM objects, let the pager know that the 2407 * memory object is being mapped. Some pagers need to keep 2408 * track of this, to know when they can reclaim the memory 2409 * object, for example. 2410 * VM calls memory_object_map() for each mapping (specifying 2411 * the protection of each mapping) and calls 2412 * memory_object_last_unmap() when all the mappings are gone. 2413 */ 2414 pager_prot = max_protection; 2415 if (copy) { 2416 /* 2417 * Copy-On-Write mapping: won't modify the 2418 * memory object. 2419 */ 2420 pager_prot &= ~VM_PROT_WRITE; 2421 } 2422 vm_object_lock(object); 2423 pager = object->pager; 2424 if (object->named && 2425 pager != MEMORY_OBJECT_NULL && 2426 object->copy_strategy != MEMORY_OBJECT_COPY_NONE) { 2427 assert(object->pager_ready); 2428 vm_object_mapping_wait(object, THREAD_UNINT); 2429 vm_object_mapping_begin(object); 2430 vm_object_unlock(object); 2431 2432 kr = memory_object_map(pager, pager_prot); 2433 assert(kr == KERN_SUCCESS); 2434 2435 vm_object_lock(object); 2436 vm_object_mapping_end(object); 2437 } 2438 vm_object_unlock(object); 2439 } 2440 2441 /* 2442 * Perform the copy if requested 2443 */ 2444 2445 if (copy) { 2446 vm_object_t new_object; 2447 vm_object_offset_t new_offset; 2448 2449 result = vm_object_copy_strategically(object, offset, size, 2450 &new_object, &new_offset, 2451 ©); 2452 2453 2454 if (result == KERN_MEMORY_RESTART_COPY) { 2455 boolean_t success; 2456 boolean_t src_needs_copy; 2457 2458 /* 2459 * XXX 2460 * We currently ignore src_needs_copy. 2461 * This really is the issue of how to make 2462 * MEMORY_OBJECT_COPY_SYMMETRIC safe for 2463 * non-kernel users to use. Solution forthcoming. 2464 * In the meantime, since we don't allow non-kernel 2465 * memory managers to specify symmetric copy, 2466 * we won't run into problems here. 2467 */ 2468 new_object = object; 2469 new_offset = offset; 2470 success = vm_object_copy_quickly(&new_object, 2471 new_offset, size, 2472 &src_needs_copy, 2473 ©); 2474 assert(success); 2475 result = KERN_SUCCESS; 2476 } 2477 /* 2478 * Throw away the reference to the 2479 * original object, as it won't be mapped. 2480 */ 2481 2482 vm_object_deallocate(object); 2483 2484 if (result != KERN_SUCCESS) 2485 return result; 2486 2487 object = new_object; 2488 offset = new_offset; 2489 } 2490 2491 result = vm_map_enter(target_map, 2492 &map_addr, map_size, 2493 (vm_map_offset_t)mask, 2494 flags, 2495 object, offset, 2496 copy, 2497 cur_protection, max_protection, inheritance); 2498 if (result != KERN_SUCCESS) 2499 vm_object_deallocate(object); 2500 *address = map_addr; 2501 return result; 2502} 2503 2504#if VM_CPM 2505 2506#ifdef MACH_ASSERT 2507extern pmap_paddr_t avail_start, avail_end; 2508#endif 2509 2510/* 2511 * Allocate memory in the specified map, with the caveat that 2512 * the memory is physically contiguous. This call may fail 2513 * if the system can't find sufficient contiguous memory. 2514 * This call may cause or lead to heart-stopping amounts of 2515 * paging activity. 2516 * 2517 * Memory obtained from this call should be freed in the 2518 * normal way, viz., via vm_deallocate. 2519 */ 2520kern_return_t 2521vm_map_enter_cpm( 2522 vm_map_t map, 2523 vm_map_offset_t *addr, 2524 vm_map_size_t size, 2525 int flags) 2526{ 2527 vm_object_t cpm_obj; 2528 pmap_t pmap; 2529 vm_page_t m, pages; 2530 kern_return_t kr; 2531 vm_map_offset_t va, start, end, offset; 2532#if MACH_ASSERT 2533 vm_map_offset_t prev_addr; 2534#endif /* MACH_ASSERT */ 2535 2536 boolean_t anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0); 2537 2538 if (!vm_allocate_cpm_enabled) 2539 return KERN_FAILURE; 2540 2541 if (size == 0) { 2542 *addr = 0; 2543 return KERN_SUCCESS; 2544 } 2545 if (anywhere) 2546 *addr = vm_map_min(map); 2547 else 2548 *addr = vm_map_trunc_page(*addr); 2549 size = vm_map_round_page(size); 2550 2551 /* 2552 * LP64todo - cpm_allocate should probably allow 2553 * allocations of >4GB, but not with the current 2554 * algorithm, so just cast down the size for now. 2555 */ 2556 if (size > VM_MAX_ADDRESS) 2557 return KERN_RESOURCE_SHORTAGE; 2558 if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size), 2559 &pages, 0, TRUE)) != KERN_SUCCESS) 2560 return kr; 2561 2562 cpm_obj = vm_object_allocate((vm_object_size_t)size); 2563 assert(cpm_obj != VM_OBJECT_NULL); 2564 assert(cpm_obj->internal); 2565 assert(cpm_obj->size == (vm_object_size_t)size); 2566 assert(cpm_obj->can_persist == FALSE); 2567 assert(cpm_obj->pager_created == FALSE); 2568 assert(cpm_obj->pageout == FALSE); 2569 assert(cpm_obj->shadow == VM_OBJECT_NULL); 2570 2571 /* 2572 * Insert pages into object. 2573 */ 2574 2575 vm_object_lock(cpm_obj); 2576 for (offset = 0; offset < size; offset += PAGE_SIZE) { 2577 m = pages; 2578 pages = NEXT_PAGE(m); 2579 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; 2580 2581 assert(!m->gobbled); 2582 assert(!m->wanted); 2583 assert(!m->pageout); 2584 assert(!m->tabled); 2585 assert(m->wire_count); 2586 /* 2587 * ENCRYPTED SWAP: 2588 * "m" is not supposed to be pageable, so it 2589 * should not be encrypted. It wouldn't be safe 2590 * to enter it in a new VM object while encrypted. 2591 */ 2592 ASSERT_PAGE_DECRYPTED(m); 2593 assert(m->busy); 2594 assert(m->phys_page>=(avail_start>>PAGE_SHIFT) && m->phys_page<=(avail_end>>PAGE_SHIFT)); 2595 2596 m->busy = FALSE; 2597 vm_page_insert(m, cpm_obj, offset); 2598 } 2599 assert(cpm_obj->resident_page_count == size / PAGE_SIZE); 2600 vm_object_unlock(cpm_obj); 2601 2602 /* 2603 * Hang onto a reference on the object in case a 2604 * multi-threaded application for some reason decides 2605 * to deallocate the portion of the address space into 2606 * which we will insert this object. 2607 * 2608 * Unfortunately, we must insert the object now before 2609 * we can talk to the pmap module about which addresses 2610 * must be wired down. Hence, the race with a multi- 2611 * threaded app. 2612 */ 2613 vm_object_reference(cpm_obj); 2614 2615 /* 2616 * Insert object into map. 2617 */ 2618 2619 kr = vm_map_enter( 2620 map, 2621 addr, 2622 size, 2623 (vm_map_offset_t)0, 2624 flags, 2625 cpm_obj, 2626 (vm_object_offset_t)0, 2627 FALSE, 2628 VM_PROT_ALL, 2629 VM_PROT_ALL, 2630 VM_INHERIT_DEFAULT); 2631 2632 if (kr != KERN_SUCCESS) { 2633 /* 2634 * A CPM object doesn't have can_persist set, 2635 * so all we have to do is deallocate it to 2636 * free up these pages. 2637 */ 2638 assert(cpm_obj->pager_created == FALSE); 2639 assert(cpm_obj->can_persist == FALSE); 2640 assert(cpm_obj->pageout == FALSE); 2641 assert(cpm_obj->shadow == VM_OBJECT_NULL); 2642 vm_object_deallocate(cpm_obj); /* kill acquired ref */ 2643 vm_object_deallocate(cpm_obj); /* kill creation ref */ 2644 } 2645 2646 /* 2647 * Inform the physical mapping system that the 2648 * range of addresses may not fault, so that 2649 * page tables and such can be locked down as well. 2650 */ 2651 start = *addr; 2652 end = start + size; 2653 pmap = vm_map_pmap(map); 2654 pmap_pageable(pmap, start, end, FALSE); 2655 2656 /* 2657 * Enter each page into the pmap, to avoid faults. 2658 * Note that this loop could be coded more efficiently, 2659 * if the need arose, rather than looking up each page 2660 * again. 2661 */ 2662 for (offset = 0, va = start; offset < size; 2663 va += PAGE_SIZE, offset += PAGE_SIZE) { 2664 int type_of_fault; 2665 2666 vm_object_lock(cpm_obj); 2667 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); 2668 assert(m != VM_PAGE_NULL); 2669 2670 vm_page_zero_fill(m); 2671 2672 type_of_fault = DBG_ZERO_FILL_FAULT; 2673 2674 vm_fault_enter(m, pmap, va, VM_PROT_ALL, 2675 m->wire_count != 0, FALSE, FALSE, 2676 &type_of_fault); 2677 2678 vm_object_unlock(cpm_obj); 2679 } 2680 2681#if MACH_ASSERT 2682 /* 2683 * Verify ordering in address space. 2684 */ 2685 for (offset = 0; offset < size; offset += PAGE_SIZE) { 2686 vm_object_lock(cpm_obj); 2687 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset); 2688 vm_object_unlock(cpm_obj); 2689 if (m == VM_PAGE_NULL) 2690 panic("vm_allocate_cpm: obj 0x%x off 0x%x no page", 2691 cpm_obj, offset); 2692 assert(m->tabled); 2693 assert(!m->busy); 2694 assert(!m->wanted); 2695 assert(!m->fictitious); 2696 assert(!m->private); 2697 assert(!m->absent); 2698 assert(!m->error); 2699 assert(!m->cleaning); 2700 assert(!m->precious); 2701 assert(!m->clustered); 2702 if (offset != 0) { 2703 if (m->phys_page != prev_addr + 1) { 2704 printf("start 0x%x end 0x%x va 0x%x\n", 2705 start, end, va); 2706 printf("obj 0x%x off 0x%x\n", cpm_obj, offset); 2707 printf("m 0x%x prev_address 0x%x\n", m, 2708 prev_addr); 2709 panic("vm_allocate_cpm: pages not contig!"); 2710 } 2711 } 2712 prev_addr = m->phys_page; 2713 } 2714#endif /* MACH_ASSERT */ 2715 2716 vm_object_deallocate(cpm_obj); /* kill extra ref */ 2717 2718 return kr; 2719} 2720 2721 2722#else /* VM_CPM */ 2723 2724/* 2725 * Interface is defined in all cases, but unless the kernel 2726 * is built explicitly for this option, the interface does 2727 * nothing. 2728 */ 2729 2730kern_return_t 2731vm_map_enter_cpm( 2732 __unused vm_map_t map, 2733 __unused vm_map_offset_t *addr, 2734 __unused vm_map_size_t size, 2735 __unused int flags) 2736{ 2737 return KERN_FAILURE; 2738} 2739#endif /* VM_CPM */ 2740 2741/* 2742 * Clip and unnest a portion of a nested submap mapping. 2743 */ 2744static void 2745vm_map_clip_unnest( 2746 vm_map_t map, 2747 vm_map_entry_t entry, 2748 vm_map_offset_t start_unnest, 2749 vm_map_offset_t end_unnest) 2750{ 2751 assert(entry->is_sub_map); 2752 assert(entry->object.sub_map != NULL); 2753 2754 if (entry->vme_start > start_unnest || 2755 entry->vme_end < end_unnest) { 2756 panic("vm_map_clip_unnest(0x%llx,0x%llx): " 2757 "bad nested entry: start=0x%llx end=0x%llx\n", 2758 (long long)start_unnest, (long long)end_unnest, 2759 (long long)entry->vme_start, (long long)entry->vme_end); 2760 } 2761 if (start_unnest > entry->vme_start) { 2762 _vm_map_clip_start(&map->hdr, 2763 entry, 2764 start_unnest); 2765 UPDATE_FIRST_FREE(map, map->first_free); 2766 } 2767 if (entry->vme_end > end_unnest) { 2768 _vm_map_clip_end(&map->hdr, 2769 entry, 2770 end_unnest); 2771 UPDATE_FIRST_FREE(map, map->first_free); 2772 } 2773 2774 pmap_unnest(map->pmap, 2775 entry->vme_start, 2776 entry->vme_end - entry->vme_start); 2777 if ((map->mapped) && (map->ref_count)) { 2778 /* clean up parent map/maps */ 2779 vm_map_submap_pmap_clean( 2780 map, entry->vme_start, 2781 entry->vme_end, 2782 entry->object.sub_map, 2783 entry->offset); 2784 } 2785 entry->use_pmap = FALSE; 2786} 2787 2788/* 2789 * vm_map_clip_start: [ internal use only ] 2790 * 2791 * Asserts that the given entry begins at or after 2792 * the specified address; if necessary, 2793 * it splits the entry into two. 2794 */ 2795static void 2796vm_map_clip_start( 2797 vm_map_t map, 2798 vm_map_entry_t entry, 2799 vm_map_offset_t startaddr) 2800{ 2801#ifndef NO_NESTED_PMAP 2802 if (entry->use_pmap && 2803 startaddr >= entry->vme_start) { 2804 vm_map_offset_t start_unnest, end_unnest; 2805 2806 /* 2807 * Make sure "startaddr" is no longer in a nested range 2808 * before we clip. Unnest only the minimum range the platform 2809 * can handle. 2810 */ 2811 start_unnest = startaddr & ~(pmap_nesting_size_min - 1); 2812 end_unnest = start_unnest + pmap_nesting_size_min; 2813 vm_map_clip_unnest(map, entry, start_unnest, end_unnest); 2814 } 2815#endif /* NO_NESTED_PMAP */ 2816 if (startaddr > entry->vme_start) { 2817 if (entry->object.vm_object && 2818 !entry->is_sub_map && 2819 entry->object.vm_object->phys_contiguous) { 2820 pmap_remove(map->pmap, 2821 (addr64_t)(entry->vme_start), 2822 (addr64_t)(entry->vme_end)); 2823 } 2824 _vm_map_clip_start(&map->hdr, entry, startaddr); 2825 UPDATE_FIRST_FREE(map, map->first_free); 2826 } 2827} 2828 2829 2830#define vm_map_copy_clip_start(copy, entry, startaddr) \ 2831 MACRO_BEGIN \ 2832 if ((startaddr) > (entry)->vme_start) \ 2833 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \ 2834 MACRO_END 2835 2836/* 2837 * This routine is called only when it is known that 2838 * the entry must be split. 2839 */ 2840static void 2841_vm_map_clip_start( 2842 register struct vm_map_header *map_header, 2843 register vm_map_entry_t entry, 2844 register vm_map_offset_t start) 2845{ 2846 register vm_map_entry_t new_entry; 2847 2848 /* 2849 * Split off the front portion -- 2850 * note that we must insert the new 2851 * entry BEFORE this one, so that 2852 * this entry has the specified starting 2853 * address. 2854 */ 2855 2856 new_entry = _vm_map_entry_create(map_header); 2857 vm_map_entry_copy_full(new_entry, entry); 2858 2859 new_entry->vme_end = start; 2860 entry->offset += (start - entry->vme_start); 2861 entry->vme_start = start; 2862 2863 _vm_map_entry_link(map_header, entry->vme_prev, new_entry); 2864 2865 if (entry->is_sub_map) 2866 vm_map_reference(new_entry->object.sub_map); 2867 else 2868 vm_object_reference(new_entry->object.vm_object); 2869} 2870 2871 2872/* 2873 * vm_map_clip_end: [ internal use only ] 2874 * 2875 * Asserts that the given entry ends at or before 2876 * the specified address; if necessary, 2877 * it splits the entry into two. 2878 */ 2879static void 2880vm_map_clip_end( 2881 vm_map_t map, 2882 vm_map_entry_t entry, 2883 vm_map_offset_t endaddr) 2884{ 2885 if (endaddr > entry->vme_end) { 2886 /* 2887 * Within the scope of this clipping, limit "endaddr" to 2888 * the end of this map entry... 2889 */ 2890 endaddr = entry->vme_end; 2891 } 2892#ifndef NO_NESTED_PMAP 2893 if (entry->use_pmap) { 2894 vm_map_offset_t start_unnest, end_unnest; 2895 2896 /* 2897 * Make sure the range between the start of this entry and 2898 * the new "endaddr" is no longer nested before we clip. 2899 * Unnest only the minimum range the platform can handle. 2900 */ 2901 start_unnest = entry->vme_start; 2902 end_unnest = 2903 (endaddr + pmap_nesting_size_min - 1) & 2904 ~(pmap_nesting_size_min - 1); 2905 vm_map_clip_unnest(map, entry, start_unnest, end_unnest); 2906 } 2907#endif /* NO_NESTED_PMAP */ 2908 if (endaddr < entry->vme_end) { 2909 if (entry->object.vm_object && 2910 !entry->is_sub_map && 2911 entry->object.vm_object->phys_contiguous) { 2912 pmap_remove(map->pmap, 2913 (addr64_t)(entry->vme_start), 2914 (addr64_t)(entry->vme_end)); 2915 } 2916 _vm_map_clip_end(&map->hdr, entry, endaddr); 2917 UPDATE_FIRST_FREE(map, map->first_free); 2918 } 2919} 2920 2921 2922#define vm_map_copy_clip_end(copy, entry, endaddr) \ 2923 MACRO_BEGIN \ 2924 if ((endaddr) < (entry)->vme_end) \ 2925 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \ 2926 MACRO_END 2927 2928/* 2929 * This routine is called only when it is known that 2930 * the entry must be split. 2931 */ 2932static void 2933_vm_map_clip_end( 2934 register struct vm_map_header *map_header, 2935 register vm_map_entry_t entry, 2936 register vm_map_offset_t end) 2937{ 2938 register vm_map_entry_t new_entry; 2939 2940 /* 2941 * Create a new entry and insert it 2942 * AFTER the specified entry 2943 */ 2944 2945 new_entry = _vm_map_entry_create(map_header); 2946 vm_map_entry_copy_full(new_entry, entry); 2947 2948 new_entry->vme_start = entry->vme_end = end; 2949 new_entry->offset += (end - entry->vme_start); 2950 2951 _vm_map_entry_link(map_header, entry, new_entry); 2952 2953 if (entry->is_sub_map) 2954 vm_map_reference(new_entry->object.sub_map); 2955 else 2956 vm_object_reference(new_entry->object.vm_object); 2957} 2958 2959 2960/* 2961 * VM_MAP_RANGE_CHECK: [ internal use only ] 2962 * 2963 * Asserts that the starting and ending region 2964 * addresses fall within the valid range of the map. 2965 */ 2966#define VM_MAP_RANGE_CHECK(map, start, end) \ 2967 MACRO_BEGIN \ 2968 if (start < vm_map_min(map)) \ 2969 start = vm_map_min(map); \ 2970 if (end > vm_map_max(map)) \ 2971 end = vm_map_max(map); \ 2972 if (start > end) \ 2973 start = end; \ 2974 MACRO_END 2975 2976/* 2977 * vm_map_range_check: [ internal use only ] 2978 * 2979 * Check that the region defined by the specified start and 2980 * end addresses are wholly contained within a single map 2981 * entry or set of adjacent map entries of the spacified map, 2982 * i.e. the specified region contains no unmapped space. 2983 * If any or all of the region is unmapped, FALSE is returned. 2984 * Otherwise, TRUE is returned and if the output argument 'entry' 2985 * is not NULL it points to the map entry containing the start 2986 * of the region. 2987 * 2988 * The map is locked for reading on entry and is left locked. 2989 */ 2990static boolean_t 2991vm_map_range_check( 2992 register vm_map_t map, 2993 register vm_map_offset_t start, 2994 register vm_map_offset_t end, 2995 vm_map_entry_t *entry) 2996{ 2997 vm_map_entry_t cur; 2998 register vm_map_offset_t prev; 2999 3000 /* 3001 * Basic sanity checks first 3002 */ 3003 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) 3004 return (FALSE); 3005 3006 /* 3007 * Check first if the region starts within a valid 3008 * mapping for the map. 3009 */ 3010 if (!vm_map_lookup_entry(map, start, &cur)) 3011 return (FALSE); 3012 3013 /* 3014 * Optimize for the case that the region is contained 3015 * in a single map entry. 3016 */ 3017 if (entry != (vm_map_entry_t *) NULL) 3018 *entry = cur; 3019 if (end <= cur->vme_end) 3020 return (TRUE); 3021 3022 /* 3023 * If the region is not wholly contained within a 3024 * single entry, walk the entries looking for holes. 3025 */ 3026 prev = cur->vme_end; 3027 cur = cur->vme_next; 3028 while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) { 3029 if (end <= cur->vme_end) 3030 return (TRUE); 3031 prev = cur->vme_end; 3032 cur = cur->vme_next; 3033 } 3034 return (FALSE); 3035} 3036 3037/* 3038 * vm_map_submap: [ kernel use only ] 3039 * 3040 * Mark the given range as handled by a subordinate map. 3041 * 3042 * This range must have been created with vm_map_find using 3043 * the vm_submap_object, and no other operations may have been 3044 * performed on this range prior to calling vm_map_submap. 3045 * 3046 * Only a limited number of operations can be performed 3047 * within this rage after calling vm_map_submap: 3048 * vm_fault 3049 * [Don't try vm_map_copyin!] 3050 * 3051 * To remove a submapping, one must first remove the 3052 * range from the superior map, and then destroy the 3053 * submap (if desired). [Better yet, don't try it.] 3054 */ 3055kern_return_t 3056vm_map_submap( 3057 vm_map_t map, 3058 vm_map_offset_t start, 3059 vm_map_offset_t end, 3060 vm_map_t submap, 3061 vm_map_offset_t offset, 3062#ifdef NO_NESTED_PMAP 3063 __unused 3064#endif /* NO_NESTED_PMAP */ 3065 boolean_t use_pmap) 3066{ 3067 vm_map_entry_t entry; 3068 register kern_return_t result = KERN_INVALID_ARGUMENT; 3069 register vm_object_t object; 3070 3071 vm_map_lock(map); 3072 3073 if (! vm_map_lookup_entry(map, start, &entry)) { 3074 entry = entry->vme_next; 3075 } 3076 3077 if (entry == vm_map_to_entry(map) || 3078 entry->is_sub_map) { 3079 vm_map_unlock(map); 3080 return KERN_INVALID_ARGUMENT; 3081 } 3082 3083 assert(!entry->use_pmap); /* we don't want to unnest anything here */ 3084 vm_map_clip_start(map, entry, start); 3085 vm_map_clip_end(map, entry, end); 3086 3087 if ((entry->vme_start == start) && (entry->vme_end == end) && 3088 (!entry->is_sub_map) && 3089 ((object = entry->object.vm_object) == vm_submap_object) && 3090 (object->resident_page_count == 0) && 3091 (object->copy == VM_OBJECT_NULL) && 3092 (object->shadow == VM_OBJECT_NULL) && 3093 (!object->pager_created)) { 3094 entry->offset = (vm_object_offset_t)offset; 3095 entry->object.vm_object = VM_OBJECT_NULL; 3096 vm_object_deallocate(object); 3097 entry->is_sub_map = TRUE; 3098 entry->object.sub_map = submap; 3099 vm_map_reference(submap); 3100 submap->mapped = TRUE; 3101 3102#ifndef NO_NESTED_PMAP 3103 if (use_pmap) { 3104 /* nest if platform code will allow */ 3105 if(submap->pmap == NULL) { 3106 submap->pmap = pmap_create((vm_map_size_t) 0, FALSE); 3107 if(submap->pmap == PMAP_NULL) { 3108 vm_map_unlock(map); 3109 return(KERN_NO_SPACE); 3110 } 3111 } 3112 result = pmap_nest(map->pmap, 3113 (entry->object.sub_map)->pmap, 3114 (addr64_t)start, 3115 (addr64_t)start, 3116 (uint64_t)(end - start)); 3117 if(result) 3118 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result); 3119 entry->use_pmap = TRUE; 3120 } 3121#else /* NO_NESTED_PMAP */ 3122 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end); 3123#endif /* NO_NESTED_PMAP */ 3124 result = KERN_SUCCESS; 3125 } 3126 vm_map_unlock(map); 3127 3128 return(result); 3129} 3130 3131/* 3132 * vm_map_protect: 3133 * 3134 * Sets the protection of the specified address 3135 * region in the target map. If "set_max" is 3136 * specified, the maximum protection is to be set; 3137 * otherwise, only the current protection is affected. 3138 */ 3139kern_return_t 3140vm_map_protect( 3141 register vm_map_t map, 3142 register vm_map_offset_t start, 3143 register vm_map_offset_t end, 3144 register vm_prot_t new_prot, 3145 register boolean_t set_max) 3146{ 3147 register vm_map_entry_t current; 3148 register vm_map_offset_t prev; 3149 vm_map_entry_t entry; 3150 vm_prot_t new_max; 3151 3152 XPR(XPR_VM_MAP, 3153 "vm_map_protect, 0x%X start 0x%X end 0x%X, new 0x%X %d", 3154 (integer_t)map, start, end, new_prot, set_max); 3155 3156 vm_map_lock(map); 3157 3158 if ((new_prot & VM_PROT_COPY) && !map->prot_copy_allow) { 3159 vm_map_unlock(map); 3160 return(KERN_PROTECTION_FAILURE); 3161 } 3162 3163 /* LP64todo - remove this check when vm_map_commpage64() 3164 * no longer has to stuff in a map_entry for the commpage 3165 * above the map's max_offset. 3166 */ 3167 if (start >= map->max_offset) { 3168 vm_map_unlock(map); 3169 return(KERN_INVALID_ADDRESS); 3170 } 3171 3172 /* 3173 * Lookup the entry. If it doesn't start in a valid 3174 * entry, return an error. 3175 */ 3176 if (! vm_map_lookup_entry(map, start, &entry)) { 3177 vm_map_unlock(map); 3178 return(KERN_INVALID_ADDRESS); 3179 } 3180 3181 /* 3182 * Make a first pass to check for protection and address 3183 * violations. 3184 */ 3185 3186 current = entry; 3187 prev = current->vme_start; 3188 while ((current != vm_map_to_entry(map)) && 3189 (current->vme_start < end)) { 3190 3191 /* 3192 * If there is a hole, return an error. 3193 */ 3194 if (current->vme_start != prev) { 3195 vm_map_unlock(map); 3196 return(KERN_INVALID_ADDRESS); 3197 } 3198 3199 new_max = current->max_protection; 3200 if(new_prot & VM_PROT_COPY) { 3201 new_max |= VM_PROT_WRITE; 3202 if ((new_prot & (new_max | VM_PROT_COPY)) != new_prot) { 3203 vm_map_unlock(map); 3204 return(KERN_PROTECTION_FAILURE); 3205 } 3206 } else { 3207 if ((new_prot & new_max) != new_prot) { 3208 vm_map_unlock(map); 3209 return(KERN_PROTECTION_FAILURE); 3210 } 3211 } 3212 3213#if CONFIG_EMBEDDED 3214 if (new_prot & VM_PROT_WRITE) { 3215 if (new_prot & VM_PROT_EXECUTE) { 3216 printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__); 3217 new_prot &= ~VM_PROT_EXECUTE; 3218 } 3219 } 3220#endif 3221 3222 prev = current->vme_end; 3223 current = current->vme_next; 3224 } 3225 if (end > prev) { 3226 vm_map_unlock(map); 3227 return(KERN_INVALID_ADDRESS); 3228 } 3229 3230 /* 3231 * Go back and fix up protections. 3232 * Clip to start here if the range starts within 3233 * the entry. 3234 */ 3235 3236 current = entry; 3237 if (current != vm_map_to_entry(map)) { 3238 /* clip and unnest if necessary */ 3239 vm_map_clip_start(map, current, start); 3240 } 3241 3242 while ((current != vm_map_to_entry(map)) && 3243 (current->vme_start < end)) { 3244 3245 vm_prot_t old_prot; 3246 3247 vm_map_clip_end(map, current, end); 3248 3249 assert(!current->use_pmap); /* clipping did unnest if needed */ 3250 3251 old_prot = current->protection; 3252 3253 if(new_prot & VM_PROT_COPY) { 3254 /* caller is asking specifically to copy the */ 3255 /* mapped data, this implies that max protection */ 3256 /* will include write. Caller must be prepared */ 3257 /* for loss of shared memory communication in the */ 3258 /* target area after taking this step */ 3259 current->needs_copy = TRUE; 3260 current->max_protection |= VM_PROT_WRITE; 3261 } 3262 3263 if (set_max) 3264 current->protection = 3265 (current->max_protection = 3266 new_prot & ~VM_PROT_COPY) & 3267 old_prot; 3268 else 3269 current->protection = new_prot & ~VM_PROT_COPY; 3270 3271 /* 3272 * Update physical map if necessary. 3273 * If the request is to turn off write protection, 3274 * we won't do it for real (in pmap). This is because 3275 * it would cause copy-on-write to fail. We've already 3276 * set, the new protection in the map, so if a 3277 * write-protect fault occurred, it will be fixed up 3278 * properly, COW or not. 3279 */ 3280 if (current->protection != old_prot) { 3281 /* Look one level in we support nested pmaps */ 3282 /* from mapped submaps which are direct entries */ 3283 /* in our map */ 3284 3285 vm_prot_t prot; 3286 3287 prot = current->protection & ~VM_PROT_WRITE; 3288 3289 if (override_nx(map, current->alias) && prot) 3290 prot |= VM_PROT_EXECUTE; 3291 3292 if (current->is_sub_map && current->use_pmap) { 3293 pmap_protect(current->object.sub_map->pmap, 3294 current->vme_start, 3295 current->vme_end, 3296 prot); 3297 } else { 3298 pmap_protect(map->pmap, 3299 current->vme_start, 3300 current->vme_end, 3301 prot); 3302 } 3303 } 3304 current = current->vme_next; 3305 } 3306 3307 current = entry; 3308 while ((current != vm_map_to_entry(map)) && 3309 (current->vme_start <= end)) { 3310 vm_map_simplify_entry(map, current); 3311 current = current->vme_next; 3312 } 3313 3314 vm_map_unlock(map); 3315 return(KERN_SUCCESS); 3316} 3317 3318/* 3319 * vm_map_inherit: 3320 * 3321 * Sets the inheritance of the specified address 3322 * range in the target map. Inheritance 3323 * affects how the map will be shared with 3324 * child maps at the time of vm_map_fork. 3325 */ 3326kern_return_t 3327vm_map_inherit( 3328 register vm_map_t map, 3329 register vm_map_offset_t start, 3330 register vm_map_offset_t end, 3331 register vm_inherit_t new_inheritance) 3332{ 3333 register vm_map_entry_t entry; 3334 vm_map_entry_t temp_entry; 3335 3336 vm_map_lock(map); 3337 3338 VM_MAP_RANGE_CHECK(map, start, end); 3339 3340 if (vm_map_lookup_entry(map, start, &temp_entry)) { 3341 entry = temp_entry; 3342 } 3343 else { 3344 temp_entry = temp_entry->vme_next; 3345 entry = temp_entry; 3346 } 3347 3348 /* first check entire range for submaps which can't support the */ 3349 /* given inheritance. */ 3350 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 3351 if(entry->is_sub_map) { 3352 if(new_inheritance == VM_INHERIT_COPY) { 3353 vm_map_unlock(map); 3354 return(KERN_INVALID_ARGUMENT); 3355 } 3356 } 3357 3358 entry = entry->vme_next; 3359 } 3360 3361 entry = temp_entry; 3362 if (entry != vm_map_to_entry(map)) { 3363 /* clip and unnest if necessary */ 3364 vm_map_clip_start(map, entry, start); 3365 } 3366 3367 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 3368 vm_map_clip_end(map, entry, end); 3369 assert(!entry->use_pmap); /* clip did unnest if needed */ 3370 3371 entry->inheritance = new_inheritance; 3372 3373 entry = entry->vme_next; 3374 } 3375 3376 vm_map_unlock(map); 3377 return(KERN_SUCCESS); 3378} 3379 3380/* 3381 * Update the accounting for the amount of wired memory in this map. If the user has 3382 * exceeded the defined limits, then we fail. Wiring on behalf of the kernel never fails. 3383 */ 3384 3385static kern_return_t 3386add_wire_counts( 3387 vm_map_t map, 3388 vm_map_entry_t entry, 3389 boolean_t user_wire) 3390{ 3391 vm_map_size_t size; 3392 3393 if (user_wire) { 3394 3395 /* 3396 * We're wiring memory at the request of the user. Check if this is the first time the user is wiring 3397 * this map entry. 3398 */ 3399 3400 if (entry->user_wired_count == 0) { 3401 size = entry->vme_end - entry->vme_start; 3402 3403 /* 3404 * Since this is the first time the user is wiring this map entry, check to see if we're 3405 * exceeding the user wire limits. There is a per map limit which is the smaller of either 3406 * the process's rlimit or the global vm_user_wire_limit which caps this value. There is also 3407 * a system-wide limit on the amount of memory all users can wire. If the user is over either 3408 * limit, then we fail. 3409 */ 3410 3411 if(size + map->user_wire_size > MIN(map->user_wire_limit, vm_user_wire_limit) || 3412 size + ptoa_64(vm_page_wire_count) > vm_global_user_wire_limit) 3413 return KERN_RESOURCE_SHORTAGE; 3414 3415 /* 3416 * The first time the user wires an entry, we also increment the wired_count and add this to 3417 * the total that has been wired in the map. 3418 */ 3419 3420 if (entry->wired_count >= MAX_WIRE_COUNT) 3421 return KERN_FAILURE; 3422 3423 entry->wired_count++; 3424 map->user_wire_size += size; 3425 } 3426 3427 if (entry->user_wired_count >= MAX_WIRE_COUNT) 3428 return KERN_FAILURE; 3429 3430 entry->user_wired_count++; 3431 3432 } else { 3433 3434 /* 3435 * The kernel's wiring the memory. Just bump the count and continue. 3436 */ 3437 3438 if (entry->wired_count >= MAX_WIRE_COUNT) 3439 panic("vm_map_wire: too many wirings"); 3440 3441 entry->wired_count++; 3442 } 3443 3444 return KERN_SUCCESS; 3445} 3446 3447/* 3448 * Update the memory wiring accounting now that the given map entry is being unwired. 3449 */ 3450 3451static void 3452subtract_wire_counts( 3453 vm_map_t map, 3454 vm_map_entry_t entry, 3455 boolean_t user_wire) 3456{ 3457 3458 if (user_wire) { 3459 3460 /* 3461 * We're unwiring memory at the request of the user. See if we're removing the last user wire reference. 3462 */ 3463 3464 if (entry->user_wired_count == 1) { 3465 3466 /* 3467 * We're removing the last user wire reference. Decrement the wired_count and the total 3468 * user wired memory for this map. 3469 */ 3470 3471 assert(entry->wired_count >= 1); 3472 entry->wired_count--; 3473 map->user_wire_size -= entry->vme_end - entry->vme_start; 3474 } 3475 3476 assert(entry->user_wired_count >= 1); 3477 entry->user_wired_count--; 3478 3479 } else { 3480 3481 /* 3482 * The kernel is unwiring the memory. Just update the count. 3483 */ 3484 3485 assert(entry->wired_count >= 1); 3486 entry->wired_count--; 3487 } 3488} 3489 3490/* 3491 * vm_map_wire: 3492 * 3493 * Sets the pageability of the specified address range in the 3494 * target map as wired. Regions specified as not pageable require 3495 * locked-down physical memory and physical page maps. The 3496 * access_type variable indicates types of accesses that must not 3497 * generate page faults. This is checked against protection of 3498 * memory being locked-down. 3499 * 3500 * The map must not be locked, but a reference must remain to the 3501 * map throughout the call. 3502 */ 3503static kern_return_t 3504vm_map_wire_nested( 3505 register vm_map_t map, 3506 register vm_map_offset_t start, 3507 register vm_map_offset_t end, 3508 register vm_prot_t access_type, 3509 boolean_t user_wire, 3510 pmap_t map_pmap, 3511 vm_map_offset_t pmap_addr) 3512{ 3513 register vm_map_entry_t entry; 3514 struct vm_map_entry *first_entry, tmp_entry; 3515 vm_map_t real_map; 3516 register vm_map_offset_t s,e; 3517 kern_return_t rc; 3518 boolean_t need_wakeup; 3519 boolean_t main_map = FALSE; 3520 wait_interrupt_t interruptible_state; 3521 thread_t cur_thread; 3522 unsigned int last_timestamp; 3523 vm_map_size_t size; 3524 3525 vm_map_lock(map); 3526 if(map_pmap == NULL) 3527 main_map = TRUE; 3528 last_timestamp = map->timestamp; 3529 3530 VM_MAP_RANGE_CHECK(map, start, end); 3531 assert(page_aligned(start)); 3532 assert(page_aligned(end)); 3533 if (start == end) { 3534 /* We wired what the caller asked for, zero pages */ 3535 vm_map_unlock(map); 3536 return KERN_SUCCESS; 3537 } 3538 3539 need_wakeup = FALSE; 3540 cur_thread = current_thread(); 3541 3542 s = start; 3543 rc = KERN_SUCCESS; 3544 3545 if (vm_map_lookup_entry(map, s, &first_entry)) { 3546 entry = first_entry; 3547 /* 3548 * vm_map_clip_start will be done later. 3549 * We don't want to unnest any nested submaps here ! 3550 */ 3551 } else { 3552 /* Start address is not in map */ 3553 rc = KERN_INVALID_ADDRESS; 3554 goto done; 3555 } 3556 3557 while ((entry != vm_map_to_entry(map)) && (s < end)) { 3558 /* 3559 * At this point, we have wired from "start" to "s". 3560 * We still need to wire from "s" to "end". 3561 * 3562 * "entry" hasn't been clipped, so it could start before "s" 3563 * and/or end after "end". 3564 */ 3565 3566 /* "e" is how far we want to wire in this entry */ 3567 e = entry->vme_end; 3568 if (e > end) 3569 e = end; 3570 3571 /* 3572 * If another thread is wiring/unwiring this entry then 3573 * block after informing other thread to wake us up. 3574 */ 3575 if (entry->in_transition) { 3576 wait_result_t wait_result; 3577 3578 /* 3579 * We have not clipped the entry. Make sure that 3580 * the start address is in range so that the lookup 3581 * below will succeed. 3582 * "s" is the current starting point: we've already 3583 * wired from "start" to "s" and we still have 3584 * to wire from "s" to "end". 3585 */ 3586 3587 entry->needs_wakeup = TRUE; 3588 3589 /* 3590 * wake up anybody waiting on entries that we have 3591 * already wired. 3592 */ 3593 if (need_wakeup) { 3594 vm_map_entry_wakeup(map); 3595 need_wakeup = FALSE; 3596 } 3597 /* 3598 * User wiring is interruptible 3599 */ 3600 wait_result = vm_map_entry_wait(map, 3601 (user_wire) ? THREAD_ABORTSAFE : 3602 THREAD_UNINT); 3603 if (user_wire && wait_result == THREAD_INTERRUPTED) { 3604 /* 3605 * undo the wirings we have done so far 3606 * We do not clear the needs_wakeup flag, 3607 * because we cannot tell if we were the 3608 * only one waiting. 3609 */ 3610 rc = KERN_FAILURE; 3611 goto done; 3612 } 3613 3614 /* 3615 * Cannot avoid a lookup here. reset timestamp. 3616 */ 3617 last_timestamp = map->timestamp; 3618 3619 /* 3620 * The entry could have been clipped, look it up again. 3621 * Worse that can happen is, it may not exist anymore. 3622 */ 3623 if (!vm_map_lookup_entry(map, s, &first_entry)) { 3624 if (!user_wire) 3625 panic("vm_map_wire: re-lookup failed"); 3626 3627 /* 3628 * User: undo everything upto the previous 3629 * entry. let vm_map_unwire worry about 3630 * checking the validity of the range. 3631 */ 3632 rc = KERN_FAILURE; 3633 goto done; 3634 } 3635 entry = first_entry; 3636 continue; 3637 } 3638 3639 if (entry->is_sub_map) { 3640 vm_map_offset_t sub_start; 3641 vm_map_offset_t sub_end; 3642 vm_map_offset_t local_start; 3643 vm_map_offset_t local_end; 3644 pmap_t pmap; 3645 3646 vm_map_clip_start(map, entry, s); 3647 vm_map_clip_end(map, entry, end); 3648 3649 sub_start = entry->offset; 3650 sub_end = entry->vme_end; 3651 sub_end += entry->offset - entry->vme_start; 3652 3653 local_end = entry->vme_end; 3654 if(map_pmap == NULL) { 3655 vm_object_t object; 3656 vm_object_offset_t offset; 3657 vm_prot_t prot; 3658 boolean_t wired; 3659 vm_map_entry_t local_entry; 3660 vm_map_version_t version; 3661 vm_map_t lookup_map; 3662 3663 if(entry->use_pmap) { 3664 pmap = entry->object.sub_map->pmap; 3665 /* ppc implementation requires that */ 3666 /* submaps pmap address ranges line */ 3667 /* up with parent map */ 3668#ifdef notdef 3669 pmap_addr = sub_start; 3670#endif 3671 pmap_addr = s; 3672 } else { 3673 pmap = map->pmap; 3674 pmap_addr = s; 3675 } 3676 3677 if (entry->wired_count) { 3678 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 3679 goto done; 3680 3681 /* 3682 * The map was not unlocked: 3683 * no need to goto re-lookup. 3684 * Just go directly to next entry. 3685 */ 3686 entry = entry->vme_next; 3687 s = entry->vme_start; 3688 continue; 3689 3690 } 3691 3692 /* call vm_map_lookup_locked to */ 3693 /* cause any needs copy to be */ 3694 /* evaluated */ 3695 local_start = entry->vme_start; 3696 lookup_map = map; 3697 vm_map_lock_write_to_read(map); 3698 if(vm_map_lookup_locked( 3699 &lookup_map, local_start, 3700 access_type, 3701 OBJECT_LOCK_EXCLUSIVE, 3702 &version, &object, 3703 &offset, &prot, &wired, 3704 NULL, 3705 &real_map)) { 3706 3707 vm_map_unlock_read(lookup_map); 3708 vm_map_unwire(map, start, 3709 s, user_wire); 3710 return(KERN_FAILURE); 3711 } 3712 if(real_map != lookup_map) 3713 vm_map_unlock(real_map); 3714 vm_map_unlock_read(lookup_map); 3715 vm_map_lock(map); 3716 vm_object_unlock(object); 3717 3718 /* we unlocked, so must re-lookup */ 3719 if (!vm_map_lookup_entry(map, 3720 local_start, 3721 &local_entry)) { 3722 rc = KERN_FAILURE; 3723 goto done; 3724 } 3725 3726 /* 3727 * entry could have been "simplified", 3728 * so re-clip 3729 */ 3730 entry = local_entry; 3731 assert(s == local_start); 3732 vm_map_clip_start(map, entry, s); 3733 vm_map_clip_end(map, entry, end); 3734 /* re-compute "e" */ 3735 e = entry->vme_end; 3736 if (e > end) 3737 e = end; 3738 3739 /* did we have a change of type? */ 3740 if (!entry->is_sub_map) { 3741 last_timestamp = map->timestamp; 3742 continue; 3743 } 3744 } else { 3745 local_start = entry->vme_start; 3746 pmap = map_pmap; 3747 } 3748 3749 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 3750 goto done; 3751 3752 entry->in_transition = TRUE; 3753 3754 vm_map_unlock(map); 3755 rc = vm_map_wire_nested(entry->object.sub_map, 3756 sub_start, sub_end, 3757 access_type, 3758 user_wire, pmap, pmap_addr); 3759 vm_map_lock(map); 3760 3761 /* 3762 * Find the entry again. It could have been clipped 3763 * after we unlocked the map. 3764 */ 3765 if (!vm_map_lookup_entry(map, local_start, 3766 &first_entry)) 3767 panic("vm_map_wire: re-lookup failed"); 3768 entry = first_entry; 3769 3770 assert(local_start == s); 3771 /* re-compute "e" */ 3772 e = entry->vme_end; 3773 if (e > end) 3774 e = end; 3775 3776 last_timestamp = map->timestamp; 3777 while ((entry != vm_map_to_entry(map)) && 3778 (entry->vme_start < e)) { 3779 assert(entry->in_transition); 3780 entry->in_transition = FALSE; 3781 if (entry->needs_wakeup) { 3782 entry->needs_wakeup = FALSE; 3783 need_wakeup = TRUE; 3784 } 3785 if (rc != KERN_SUCCESS) {/* from vm_*_wire */ 3786 subtract_wire_counts(map, entry, user_wire); 3787 } 3788 entry = entry->vme_next; 3789 } 3790 if (rc != KERN_SUCCESS) { /* from vm_*_wire */ 3791 goto done; 3792 } 3793 3794 /* no need to relookup again */ 3795 s = entry->vme_start; 3796 continue; 3797 } 3798 3799 /* 3800 * If this entry is already wired then increment 3801 * the appropriate wire reference count. 3802 */ 3803 if (entry->wired_count) { 3804 /* 3805 * entry is already wired down, get our reference 3806 * after clipping to our range. 3807 */ 3808 vm_map_clip_start(map, entry, s); 3809 vm_map_clip_end(map, entry, end); 3810 3811 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 3812 goto done; 3813 3814 /* map was not unlocked: no need to relookup */ 3815 entry = entry->vme_next; 3816 s = entry->vme_start; 3817 continue; 3818 } 3819 3820 /* 3821 * Unwired entry or wire request transmitted via submap 3822 */ 3823 3824 3825 /* 3826 * Perform actions of vm_map_lookup that need the write 3827 * lock on the map: create a shadow object for a 3828 * copy-on-write region, or an object for a zero-fill 3829 * region. 3830 */ 3831 size = entry->vme_end - entry->vme_start; 3832 /* 3833 * If wiring a copy-on-write page, we need to copy it now 3834 * even if we're only (currently) requesting read access. 3835 * This is aggressive, but once it's wired we can't move it. 3836 */ 3837 if (entry->needs_copy) { 3838 vm_object_shadow(&entry->object.vm_object, 3839 &entry->offset, size); 3840 entry->needs_copy = FALSE; 3841 } else if (entry->object.vm_object == VM_OBJECT_NULL) { 3842 entry->object.vm_object = vm_object_allocate(size); 3843 entry->offset = (vm_object_offset_t)0; 3844 } 3845 3846 vm_map_clip_start(map, entry, s); 3847 vm_map_clip_end(map, entry, end); 3848 3849 /* re-compute "e" */ 3850 e = entry->vme_end; 3851 if (e > end) 3852 e = end; 3853 3854 /* 3855 * Check for holes and protection mismatch. 3856 * Holes: Next entry should be contiguous unless this 3857 * is the end of the region. 3858 * Protection: Access requested must be allowed, unless 3859 * wiring is by protection class 3860 */ 3861 if ((entry->vme_end < end) && 3862 ((entry->vme_next == vm_map_to_entry(map)) || 3863 (entry->vme_next->vme_start > entry->vme_end))) { 3864 /* found a hole */ 3865 rc = KERN_INVALID_ADDRESS; 3866 goto done; 3867 } 3868 if ((entry->protection & access_type) != access_type) { 3869 /* found a protection problem */ 3870 rc = KERN_PROTECTION_FAILURE; 3871 goto done; 3872 } 3873 3874 assert(entry->wired_count == 0 && entry->user_wired_count == 0); 3875 3876 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) 3877 goto done; 3878 3879 entry->in_transition = TRUE; 3880 3881 /* 3882 * This entry might get split once we unlock the map. 3883 * In vm_fault_wire(), we need the current range as 3884 * defined by this entry. In order for this to work 3885 * along with a simultaneous clip operation, we make a 3886 * temporary copy of this entry and use that for the 3887 * wiring. Note that the underlying objects do not 3888 * change during a clip. 3889 */ 3890 tmp_entry = *entry; 3891 3892 /* 3893 * The in_transition state guarentees that the entry 3894 * (or entries for this range, if split occured) will be 3895 * there when the map lock is acquired for the second time. 3896 */ 3897 vm_map_unlock(map); 3898 3899 if (!user_wire && cur_thread != THREAD_NULL) 3900 interruptible_state = thread_interrupt_level(THREAD_UNINT); 3901 else 3902 interruptible_state = THREAD_UNINT; 3903 3904 if(map_pmap) 3905 rc = vm_fault_wire(map, 3906 &tmp_entry, map_pmap, pmap_addr); 3907 else 3908 rc = vm_fault_wire(map, 3909 &tmp_entry, map->pmap, 3910 tmp_entry.vme_start); 3911 3912 if (!user_wire && cur_thread != THREAD_NULL) 3913 thread_interrupt_level(interruptible_state); 3914 3915 vm_map_lock(map); 3916 3917 if (last_timestamp+1 != map->timestamp) { 3918 /* 3919 * Find the entry again. It could have been clipped 3920 * after we unlocked the map. 3921 */ 3922 if (!vm_map_lookup_entry(map, tmp_entry.vme_start, 3923 &first_entry)) 3924 panic("vm_map_wire: re-lookup failed"); 3925 3926 entry = first_entry; 3927 } 3928 3929 last_timestamp = map->timestamp; 3930 3931 while ((entry != vm_map_to_entry(map)) && 3932 (entry->vme_start < tmp_entry.vme_end)) { 3933 assert(entry->in_transition); 3934 entry->in_transition = FALSE; 3935 if (entry->needs_wakeup) { 3936 entry->needs_wakeup = FALSE; 3937 need_wakeup = TRUE; 3938 } 3939 if (rc != KERN_SUCCESS) { /* from vm_*_wire */ 3940 subtract_wire_counts(map, entry, user_wire); 3941 } 3942 entry = entry->vme_next; 3943 } 3944 3945 if (rc != KERN_SUCCESS) { /* from vm_*_wire */ 3946 goto done; 3947 } 3948 3949 s = entry->vme_start; 3950 } /* end while loop through map entries */ 3951 3952done: 3953 if (rc == KERN_SUCCESS) { 3954 /* repair any damage we may have made to the VM map */ 3955 vm_map_simplify_range(map, start, end); 3956 } 3957 3958 vm_map_unlock(map); 3959 3960 /* 3961 * wake up anybody waiting on entries we wired. 3962 */ 3963 if (need_wakeup) 3964 vm_map_entry_wakeup(map); 3965 3966 if (rc != KERN_SUCCESS) { 3967 /* undo what has been wired so far */ 3968 vm_map_unwire(map, start, s, user_wire); 3969 } 3970 3971 return rc; 3972 3973} 3974 3975kern_return_t 3976vm_map_wire( 3977 register vm_map_t map, 3978 register vm_map_offset_t start, 3979 register vm_map_offset_t end, 3980 register vm_prot_t access_type, 3981 boolean_t user_wire) 3982{ 3983 3984 kern_return_t kret; 3985 3986#ifdef ppc 3987 /* 3988 * the calls to mapping_prealloc and mapping_relpre 3989 * (along with the VM_MAP_RANGE_CHECK to insure a 3990 * resonable range was passed in) are 3991 * currently necessary because 3992 * we haven't enabled kernel pre-emption 3993 * and/or the pmap_enter cannot purge and re-use 3994 * existing mappings 3995 */ 3996 VM_MAP_RANGE_CHECK(map, start, end); 3997 mapping_prealloc(end - start); 3998#endif 3999 kret = vm_map_wire_nested(map, start, end, access_type, 4000 user_wire, (pmap_t)NULL, 0); 4001#ifdef ppc 4002 mapping_relpre(); 4003#endif 4004 return kret; 4005} 4006 4007/* 4008 * vm_map_unwire: 4009 * 4010 * Sets the pageability of the specified address range in the target 4011 * as pageable. Regions specified must have been wired previously. 4012 * 4013 * The map must not be locked, but a reference must remain to the map 4014 * throughout the call. 4015 * 4016 * Kernel will panic on failures. User unwire ignores holes and 4017 * unwired and intransition entries to avoid losing memory by leaving 4018 * it unwired. 4019 */ 4020static kern_return_t 4021vm_map_unwire_nested( 4022 register vm_map_t map, 4023 register vm_map_offset_t start, 4024 register vm_map_offset_t end, 4025 boolean_t user_wire, 4026 pmap_t map_pmap, 4027 vm_map_offset_t pmap_addr) 4028{ 4029 register vm_map_entry_t entry; 4030 struct vm_map_entry *first_entry, tmp_entry; 4031 boolean_t need_wakeup; 4032 boolean_t main_map = FALSE; 4033 unsigned int last_timestamp; 4034 4035 vm_map_lock(map); 4036 if(map_pmap == NULL) 4037 main_map = TRUE; 4038 last_timestamp = map->timestamp; 4039 4040 VM_MAP_RANGE_CHECK(map, start, end); 4041 assert(page_aligned(start)); 4042 assert(page_aligned(end)); 4043 4044 if (start == end) { 4045 /* We unwired what the caller asked for: zero pages */ 4046 vm_map_unlock(map); 4047 return KERN_SUCCESS; 4048 } 4049 4050 if (vm_map_lookup_entry(map, start, &first_entry)) { 4051 entry = first_entry; 4052 /* 4053 * vm_map_clip_start will be done later. 4054 * We don't want to unnest any nested sub maps here ! 4055 */ 4056 } 4057 else { 4058 if (!user_wire) { 4059 panic("vm_map_unwire: start not found"); 4060 } 4061 /* Start address is not in map. */ 4062 vm_map_unlock(map); 4063 return(KERN_INVALID_ADDRESS); 4064 } 4065 4066 need_wakeup = FALSE; 4067 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 4068 if (entry->in_transition) { 4069 /* 4070 * 1) 4071 * Another thread is wiring down this entry. Note 4072 * that if it is not for the other thread we would 4073 * be unwiring an unwired entry. This is not 4074 * permitted. If we wait, we will be unwiring memory 4075 * we did not wire. 4076 * 4077 * 2) 4078 * Another thread is unwiring this entry. We did not 4079 * have a reference to it, because if we did, this 4080 * entry will not be getting unwired now. 4081 */ 4082 if (!user_wire) { 4083 /* 4084 * XXX FBDP 4085 * This could happen: there could be some 4086 * overlapping vslock/vsunlock operations 4087 * going on. 4088 * We should probably just wait and retry, 4089 * but then we have to be careful that this 4090 * entry could get "simplified" after 4091 * "in_transition" gets unset and before 4092 * we re-lookup the entry, so we would 4093 * have to re-clip the entry to avoid 4094 * re-unwiring what we have already unwired... 4095 * See vm_map_wire_nested(). 4096 * 4097 * Or we could just ignore "in_transition" 4098 * here and proceed to decement the wired 4099 * count(s) on this entry. That should be fine 4100 * as long as "wired_count" doesn't drop all 4101 * the way to 0 (and we should panic if THAT 4102 * happens). 4103 */ 4104 panic("vm_map_unwire: in_transition entry"); 4105 } 4106 4107 entry = entry->vme_next; 4108 continue; 4109 } 4110 4111 if (entry->is_sub_map) { 4112 vm_map_offset_t sub_start; 4113 vm_map_offset_t sub_end; 4114 vm_map_offset_t local_end; 4115 pmap_t pmap; 4116 4117 vm_map_clip_start(map, entry, start); 4118 vm_map_clip_end(map, entry, end); 4119 4120 sub_start = entry->offset; 4121 sub_end = entry->vme_end - entry->vme_start; 4122 sub_end += entry->offset; 4123 local_end = entry->vme_end; 4124 if(map_pmap == NULL) { 4125 if(entry->use_pmap) { 4126 pmap = entry->object.sub_map->pmap; 4127 pmap_addr = sub_start; 4128 } else { 4129 pmap = map->pmap; 4130 pmap_addr = start; 4131 } 4132 if (entry->wired_count == 0 || 4133 (user_wire && entry->user_wired_count == 0)) { 4134 if (!user_wire) 4135 panic("vm_map_unwire: entry is unwired"); 4136 entry = entry->vme_next; 4137 continue; 4138 } 4139 4140 /* 4141 * Check for holes 4142 * Holes: Next entry should be contiguous unless 4143 * this is the end of the region. 4144 */ 4145 if (((entry->vme_end < end) && 4146 ((entry->vme_next == vm_map_to_entry(map)) || 4147 (entry->vme_next->vme_start 4148 > entry->vme_end)))) { 4149 if (!user_wire) 4150 panic("vm_map_unwire: non-contiguous region"); 4151/* 4152 entry = entry->vme_next; 4153 continue; 4154*/ 4155 } 4156 4157 subtract_wire_counts(map, entry, user_wire); 4158 4159 if (entry->wired_count != 0) { 4160 entry = entry->vme_next; 4161 continue; 4162 } 4163 4164 entry->in_transition = TRUE; 4165 tmp_entry = *entry;/* see comment in vm_map_wire() */ 4166 4167 /* 4168 * We can unlock the map now. The in_transition state 4169 * guarantees existance of the entry. 4170 */ 4171 vm_map_unlock(map); 4172 vm_map_unwire_nested(entry->object.sub_map, 4173 sub_start, sub_end, user_wire, pmap, pmap_addr); 4174 vm_map_lock(map); 4175 4176 if (last_timestamp+1 != map->timestamp) { 4177 /* 4178 * Find the entry again. It could have been 4179 * clipped or deleted after we unlocked the map. 4180 */ 4181 if (!vm_map_lookup_entry(map, 4182 tmp_entry.vme_start, 4183 &first_entry)) { 4184 if (!user_wire) 4185 panic("vm_map_unwire: re-lookup failed"); 4186 entry = first_entry->vme_next; 4187 } else 4188 entry = first_entry; 4189 } 4190 last_timestamp = map->timestamp; 4191 4192 /* 4193 * clear transition bit for all constituent entries 4194 * that were in the original entry (saved in 4195 * tmp_entry). Also check for waiters. 4196 */ 4197 while ((entry != vm_map_to_entry(map)) && 4198 (entry->vme_start < tmp_entry.vme_end)) { 4199 assert(entry->in_transition); 4200 entry->in_transition = FALSE; 4201 if (entry->needs_wakeup) { 4202 entry->needs_wakeup = FALSE; 4203 need_wakeup = TRUE; 4204 } 4205 entry = entry->vme_next; 4206 } 4207 continue; 4208 } else { 4209 vm_map_unlock(map); 4210 vm_map_unwire_nested(entry->object.sub_map, 4211 sub_start, sub_end, user_wire, map_pmap, 4212 pmap_addr); 4213 vm_map_lock(map); 4214 4215 if (last_timestamp+1 != map->timestamp) { 4216 /* 4217 * Find the entry again. It could have been 4218 * clipped or deleted after we unlocked the map. 4219 */ 4220 if (!vm_map_lookup_entry(map, 4221 tmp_entry.vme_start, 4222 &first_entry)) { 4223 if (!user_wire) 4224 panic("vm_map_unwire: re-lookup failed"); 4225 entry = first_entry->vme_next; 4226 } else 4227 entry = first_entry; 4228 } 4229 last_timestamp = map->timestamp; 4230 } 4231 } 4232 4233 4234 if ((entry->wired_count == 0) || 4235 (user_wire && entry->user_wired_count == 0)) { 4236 if (!user_wire) 4237 panic("vm_map_unwire: entry is unwired"); 4238 4239 entry = entry->vme_next; 4240 continue; 4241 } 4242 4243 assert(entry->wired_count > 0 && 4244 (!user_wire || entry->user_wired_count > 0)); 4245 4246 vm_map_clip_start(map, entry, start); 4247 vm_map_clip_end(map, entry, end); 4248 4249 /* 4250 * Check for holes 4251 * Holes: Next entry should be contiguous unless 4252 * this is the end of the region. 4253 */ 4254 if (((entry->vme_end < end) && 4255 ((entry->vme_next == vm_map_to_entry(map)) || 4256 (entry->vme_next->vme_start > entry->vme_end)))) { 4257 4258 if (!user_wire) 4259 panic("vm_map_unwire: non-contiguous region"); 4260 entry = entry->vme_next; 4261 continue; 4262 } 4263 4264 subtract_wire_counts(map, entry, user_wire); 4265 4266 if (entry->wired_count != 0) { 4267 entry = entry->vme_next; 4268 continue; 4269 } 4270 4271 entry->in_transition = TRUE; 4272 tmp_entry = *entry; /* see comment in vm_map_wire() */ 4273 4274 /* 4275 * We can unlock the map now. The in_transition state 4276 * guarantees existance of the entry. 4277 */ 4278 vm_map_unlock(map); 4279 if(map_pmap) { 4280 vm_fault_unwire(map, 4281 &tmp_entry, FALSE, map_pmap, pmap_addr); 4282 } else { 4283 vm_fault_unwire(map, 4284 &tmp_entry, FALSE, map->pmap, 4285 tmp_entry.vme_start); 4286 } 4287 vm_map_lock(map); 4288 4289 if (last_timestamp+1 != map->timestamp) { 4290 /* 4291 * Find the entry again. It could have been clipped 4292 * or deleted after we unlocked the map. 4293 */ 4294 if (!vm_map_lookup_entry(map, tmp_entry.vme_start, 4295 &first_entry)) { 4296 if (!user_wire) 4297 panic("vm_map_unwire: re-lookup failed"); 4298 entry = first_entry->vme_next; 4299 } else 4300 entry = first_entry; 4301 } 4302 last_timestamp = map->timestamp; 4303 4304 /* 4305 * clear transition bit for all constituent entries that 4306 * were in the original entry (saved in tmp_entry). Also 4307 * check for waiters. 4308 */ 4309 while ((entry != vm_map_to_entry(map)) && 4310 (entry->vme_start < tmp_entry.vme_end)) { 4311 assert(entry->in_transition); 4312 entry->in_transition = FALSE; 4313 if (entry->needs_wakeup) { 4314 entry->needs_wakeup = FALSE; 4315 need_wakeup = TRUE; 4316 } 4317 entry = entry->vme_next; 4318 } 4319 } 4320 4321 /* 4322 * We might have fragmented the address space when we wired this 4323 * range of addresses. Attempt to re-coalesce these VM map entries 4324 * with their neighbors now that they're no longer wired. 4325 * Under some circumstances, address space fragmentation can 4326 * prevent VM object shadow chain collapsing, which can cause 4327 * swap space leaks. 4328 */ 4329 vm_map_simplify_range(map, start, end); 4330 4331 vm_map_unlock(map); 4332 /* 4333 * wake up anybody waiting on entries that we have unwired. 4334 */ 4335 if (need_wakeup) 4336 vm_map_entry_wakeup(map); 4337 return(KERN_SUCCESS); 4338 4339} 4340 4341kern_return_t 4342vm_map_unwire( 4343 register vm_map_t map, 4344 register vm_map_offset_t start, 4345 register vm_map_offset_t end, 4346 boolean_t user_wire) 4347{ 4348 return vm_map_unwire_nested(map, start, end, 4349 user_wire, (pmap_t)NULL, 0); 4350} 4351 4352 4353/* 4354 * vm_map_entry_delete: [ internal use only ] 4355 * 4356 * Deallocate the given entry from the target map. 4357 */ 4358static void 4359vm_map_entry_delete( 4360 register vm_map_t map, 4361 register vm_map_entry_t entry) 4362{ 4363 register vm_map_offset_t s, e; 4364 register vm_object_t object; 4365 register vm_map_t submap; 4366 4367 s = entry->vme_start; 4368 e = entry->vme_end; 4369 assert(page_aligned(s)); 4370 assert(page_aligned(e)); 4371 assert(entry->wired_count == 0); 4372 assert(entry->user_wired_count == 0); 4373 4374 if (entry->is_sub_map) { 4375 object = NULL; 4376 submap = entry->object.sub_map; 4377 } else { 4378 submap = NULL; 4379 object = entry->object.vm_object; 4380 } 4381 4382 vm_map_entry_unlink(map, entry); 4383 map->size -= e - s; 4384 4385 vm_map_entry_dispose(map, entry); 4386 4387 vm_map_unlock(map); 4388 /* 4389 * Deallocate the object only after removing all 4390 * pmap entries pointing to its pages. 4391 */ 4392 if (submap) 4393 vm_map_deallocate(submap); 4394 else 4395 vm_object_deallocate(object); 4396 4397} 4398 4399void 4400vm_map_submap_pmap_clean( 4401 vm_map_t map, 4402 vm_map_offset_t start, 4403 vm_map_offset_t end, 4404 vm_map_t sub_map, 4405 vm_map_offset_t offset) 4406{ 4407 vm_map_offset_t submap_start; 4408 vm_map_offset_t submap_end; 4409 vm_map_size_t remove_size; 4410 vm_map_entry_t entry; 4411 4412 submap_end = offset + (end - start); 4413 submap_start = offset; 4414 if(vm_map_lookup_entry(sub_map, offset, &entry)) { 4415 4416 remove_size = (entry->vme_end - entry->vme_start); 4417 if(offset > entry->vme_start) 4418 remove_size -= offset - entry->vme_start; 4419 4420 4421 if(submap_end < entry->vme_end) { 4422 remove_size -= 4423 entry->vme_end - submap_end; 4424 } 4425 if(entry->is_sub_map) { 4426 vm_map_submap_pmap_clean( 4427 sub_map, 4428 start, 4429 start + remove_size, 4430 entry->object.sub_map, 4431 entry->offset); 4432 } else { 4433 4434 if((map->mapped) && (map->ref_count) 4435 && (entry->object.vm_object != NULL)) { 4436 vm_object_pmap_protect( 4437 entry->object.vm_object, 4438 entry->offset, 4439 remove_size, 4440 PMAP_NULL, 4441 entry->vme_start, 4442 VM_PROT_NONE); 4443 } else { 4444 pmap_remove(map->pmap, 4445 (addr64_t)start, 4446 (addr64_t)(start + remove_size)); 4447 } 4448 } 4449 } 4450 4451 entry = entry->vme_next; 4452 4453 while((entry != vm_map_to_entry(sub_map)) 4454 && (entry->vme_start < submap_end)) { 4455 remove_size = (entry->vme_end - entry->vme_start); 4456 if(submap_end < entry->vme_end) { 4457 remove_size -= entry->vme_end - submap_end; 4458 } 4459 if(entry->is_sub_map) { 4460 vm_map_submap_pmap_clean( 4461 sub_map, 4462 (start + entry->vme_start) - offset, 4463 ((start + entry->vme_start) - offset) + remove_size, 4464 entry->object.sub_map, 4465 entry->offset); 4466 } else { 4467 if((map->mapped) && (map->ref_count) 4468 && (entry->object.vm_object != NULL)) { 4469 vm_object_pmap_protect( 4470 entry->object.vm_object, 4471 entry->offset, 4472 remove_size, 4473 PMAP_NULL, 4474 entry->vme_start, 4475 VM_PROT_NONE); 4476 } else { 4477 pmap_remove(map->pmap, 4478 (addr64_t)((start + entry->vme_start) 4479 - offset), 4480 (addr64_t)(((start + entry->vme_start) 4481 - offset) + remove_size)); 4482 } 4483 } 4484 entry = entry->vme_next; 4485 } 4486 return; 4487} 4488 4489/* 4490 * vm_map_delete: [ internal use only ] 4491 * 4492 * Deallocates the given address range from the target map. 4493 * Removes all user wirings. Unwires one kernel wiring if 4494 * VM_MAP_REMOVE_KUNWIRE is set. Waits for kernel wirings to go 4495 * away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set. Sleeps 4496 * interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set. 4497 * 4498 * This routine is called with map locked and leaves map locked. 4499 */ 4500static kern_return_t 4501vm_map_delete( 4502 vm_map_t map, 4503 vm_map_offset_t start, 4504 vm_map_offset_t end, 4505 int flags, 4506 vm_map_t zap_map) 4507{ 4508 vm_map_entry_t entry, next; 4509 struct vm_map_entry *first_entry, tmp_entry; 4510 register vm_map_offset_t s; 4511 register vm_object_t object; 4512 boolean_t need_wakeup; 4513 unsigned int last_timestamp = ~0; /* unlikely value */ 4514 int interruptible; 4515 4516 interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ? 4517 THREAD_ABORTSAFE : THREAD_UNINT; 4518 4519 /* 4520 * All our DMA I/O operations in IOKit are currently done by 4521 * wiring through the map entries of the task requesting the I/O. 4522 * Because of this, we must always wait for kernel wirings 4523 * to go away on the entries before deleting them. 4524 * 4525 * Any caller who wants to actually remove a kernel wiring 4526 * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to 4527 * properly remove one wiring instead of blasting through 4528 * them all. 4529 */ 4530 flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE; 4531 4532 /* 4533 * Find the start of the region, and clip it 4534 */ 4535 if (vm_map_lookup_entry(map, start, &first_entry)) { 4536 entry = first_entry; 4537 if (start == entry->vme_start) { 4538 /* 4539 * No need to clip. We don't want to cause 4540 * any unnecessary unnesting in this case... 4541 */ 4542 } else { 4543 vm_map_clip_start(map, entry, start); 4544 } 4545 4546 /* 4547 * Fix the lookup hint now, rather than each 4548 * time through the loop. 4549 */ 4550 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 4551 } else { 4552 entry = first_entry->vme_next; 4553 } 4554 4555 need_wakeup = FALSE; 4556 /* 4557 * Step through all entries in this region 4558 */ 4559 s = entry->vme_start; 4560 while ((entry != vm_map_to_entry(map)) && (s < end)) { 4561 /* 4562 * At this point, we have deleted all the memory entries 4563 * between "start" and "s". We still need to delete 4564 * all memory entries between "s" and "end". 4565 * While we were blocked and the map was unlocked, some 4566 * new memory entries could have been re-allocated between 4567 * "start" and "s" and we don't want to mess with those. 4568 * Some of those entries could even have been re-assembled 4569 * with an entry after "s" (in vm_map_simplify_entry()), so 4570 * we may have to vm_map_clip_start() again. 4571 */ 4572 4573 if (entry->vme_start >= s) { 4574 /* 4575 * This entry starts on or after "s" 4576 * so no need to clip its start. 4577 */ 4578 } else { 4579 /* 4580 * This entry has been re-assembled by a 4581 * vm_map_simplify_entry(). We need to 4582 * re-clip its start. 4583 */ 4584 vm_map_clip_start(map, entry, s); 4585 } 4586 if (entry->vme_end <= end) { 4587 /* 4588 * This entry is going away completely, so no need 4589 * to clip and possibly cause an unnecessary unnesting. 4590 */ 4591 } else { 4592 vm_map_clip_end(map, entry, end); 4593 } 4594 if (entry->in_transition) { 4595 wait_result_t wait_result; 4596 4597 /* 4598 * Another thread is wiring/unwiring this entry. 4599 * Let the other thread know we are waiting. 4600 */ 4601 assert(s == entry->vme_start); 4602 entry->needs_wakeup = TRUE; 4603 4604 /* 4605 * wake up anybody waiting on entries that we have 4606 * already unwired/deleted. 4607 */ 4608 if (need_wakeup) { 4609 vm_map_entry_wakeup(map); 4610 need_wakeup = FALSE; 4611 } 4612 4613 wait_result = vm_map_entry_wait(map, interruptible); 4614 4615 if (interruptible && 4616 wait_result == THREAD_INTERRUPTED) { 4617 /* 4618 * We do not clear the needs_wakeup flag, 4619 * since we cannot tell if we were the only one. 4620 */ 4621 vm_map_unlock(map); 4622 return KERN_ABORTED; 4623 } 4624 4625 /* 4626 * The entry could have been clipped or it 4627 * may not exist anymore. Look it up again. 4628 */ 4629 if (!vm_map_lookup_entry(map, s, &first_entry)) { 4630 assert((map != kernel_map) && 4631 (!entry->is_sub_map)); 4632 /* 4633 * User: use the next entry 4634 */ 4635 entry = first_entry->vme_next; 4636 s = entry->vme_start; 4637 } else { 4638 entry = first_entry; 4639 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 4640 } 4641 last_timestamp = map->timestamp; 4642 continue; 4643 } /* end in_transition */ 4644 4645 if (entry->wired_count) { 4646 boolean_t user_wire; 4647 4648 user_wire = entry->user_wired_count > 0; 4649 4650 /* 4651 * Remove a kernel wiring if requested or if 4652 * there are user wirings. 4653 */ 4654 if ((flags & VM_MAP_REMOVE_KUNWIRE) || 4655 (entry->user_wired_count > 0)) 4656 entry->wired_count--; 4657 4658 /* remove all user wire references */ 4659 entry->user_wired_count = 0; 4660 4661 if (entry->wired_count != 0) { 4662 assert(map != kernel_map); 4663 /* 4664 * Cannot continue. Typical case is when 4665 * a user thread has physical io pending on 4666 * on this page. Either wait for the 4667 * kernel wiring to go away or return an 4668 * error. 4669 */ 4670 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) { 4671 wait_result_t wait_result; 4672 4673 assert(s == entry->vme_start); 4674 entry->needs_wakeup = TRUE; 4675 wait_result = vm_map_entry_wait(map, 4676 interruptible); 4677 4678 if (interruptible && 4679 wait_result == THREAD_INTERRUPTED) { 4680 /* 4681 * We do not clear the 4682 * needs_wakeup flag, since we 4683 * cannot tell if we were the 4684 * only one. 4685 */ 4686 vm_map_unlock(map); 4687 return KERN_ABORTED; 4688 } 4689 4690 /* 4691 * The entry could have been clipped or 4692 * it may not exist anymore. Look it 4693 * up again. 4694 */ 4695 if (!vm_map_lookup_entry(map, s, 4696 &first_entry)) { 4697 assert(map != kernel_map); 4698 /* 4699 * User: use the next entry 4700 */ 4701 entry = first_entry->vme_next; 4702 s = entry->vme_start; 4703 } else { 4704 entry = first_entry; 4705 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 4706 } 4707 last_timestamp = map->timestamp; 4708 continue; 4709 } 4710 else { 4711 return KERN_FAILURE; 4712 } 4713 } 4714 4715 entry->in_transition = TRUE; 4716 /* 4717 * copy current entry. see comment in vm_map_wire() 4718 */ 4719 tmp_entry = *entry; 4720 assert(s == entry->vme_start); 4721 4722 /* 4723 * We can unlock the map now. The in_transition 4724 * state guarentees existance of the entry. 4725 */ 4726 vm_map_unlock(map); 4727 4728 if (tmp_entry.is_sub_map) { 4729 vm_map_t sub_map; 4730 vm_map_offset_t sub_start, sub_end; 4731 pmap_t pmap; 4732 vm_map_offset_t pmap_addr; 4733 4734 4735 sub_map = tmp_entry.object.sub_map; 4736 sub_start = tmp_entry.offset; 4737 sub_end = sub_start + (tmp_entry.vme_end - 4738 tmp_entry.vme_start); 4739 if (tmp_entry.use_pmap) { 4740 pmap = sub_map->pmap; 4741 pmap_addr = tmp_entry.vme_start; 4742 } else { 4743 pmap = map->pmap; 4744 pmap_addr = tmp_entry.vme_start; 4745 } 4746 (void) vm_map_unwire_nested(sub_map, 4747 sub_start, sub_end, 4748 user_wire, 4749 pmap, pmap_addr); 4750 } else { 4751 4752 vm_fault_unwire(map, &tmp_entry, 4753 tmp_entry.object.vm_object == kernel_object, 4754 map->pmap, tmp_entry.vme_start); 4755 } 4756 4757 vm_map_lock(map); 4758 4759 if (last_timestamp+1 != map->timestamp) { 4760 /* 4761 * Find the entry again. It could have 4762 * been clipped after we unlocked the map. 4763 */ 4764 if (!vm_map_lookup_entry(map, s, &first_entry)){ 4765 assert((map != kernel_map) && 4766 (!entry->is_sub_map)); 4767 first_entry = first_entry->vme_next; 4768 s = first_entry->vme_start; 4769 } else { 4770 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 4771 } 4772 } else { 4773 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 4774 first_entry = entry; 4775 } 4776 4777 last_timestamp = map->timestamp; 4778 4779 entry = first_entry; 4780 while ((entry != vm_map_to_entry(map)) && 4781 (entry->vme_start < tmp_entry.vme_end)) { 4782 assert(entry->in_transition); 4783 entry->in_transition = FALSE; 4784 if (entry->needs_wakeup) { 4785 entry->needs_wakeup = FALSE; 4786 need_wakeup = TRUE; 4787 } 4788 entry = entry->vme_next; 4789 } 4790 /* 4791 * We have unwired the entry(s). Go back and 4792 * delete them. 4793 */ 4794 entry = first_entry; 4795 continue; 4796 } 4797 4798 /* entry is unwired */ 4799 assert(entry->wired_count == 0); 4800 assert(entry->user_wired_count == 0); 4801 4802 assert(s == entry->vme_start); 4803 4804 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) { 4805 /* 4806 * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to 4807 * vm_map_delete(), some map entries might have been 4808 * transferred to a "zap_map", which doesn't have a 4809 * pmap. The original pmap has already been flushed 4810 * in the vm_map_delete() call targeting the original 4811 * map, but when we get to destroying the "zap_map", 4812 * we don't have any pmap to flush, so let's just skip 4813 * all this. 4814 */ 4815 } else if (entry->is_sub_map) { 4816 if (entry->use_pmap) { 4817#ifndef NO_NESTED_PMAP 4818 pmap_unnest(map->pmap, 4819 (addr64_t)entry->vme_start, 4820 entry->vme_end - entry->vme_start); 4821#endif /* NO_NESTED_PMAP */ 4822 if ((map->mapped) && (map->ref_count)) { 4823 /* clean up parent map/maps */ 4824 vm_map_submap_pmap_clean( 4825 map, entry->vme_start, 4826 entry->vme_end, 4827 entry->object.sub_map, 4828 entry->offset); 4829 } 4830 } else { 4831 vm_map_submap_pmap_clean( 4832 map, entry->vme_start, entry->vme_end, 4833 entry->object.sub_map, 4834 entry->offset); 4835 } 4836 } else if (entry->object.vm_object != kernel_object) { 4837 object = entry->object.vm_object; 4838 if((map->mapped) && (map->ref_count)) { 4839 vm_object_pmap_protect( 4840 object, entry->offset, 4841 entry->vme_end - entry->vme_start, 4842 PMAP_NULL, 4843 entry->vme_start, 4844 VM_PROT_NONE); 4845 } else { 4846 pmap_remove(map->pmap, 4847 (addr64_t)entry->vme_start, 4848 (addr64_t)entry->vme_end); 4849 } 4850 } 4851 4852 /* 4853 * All pmap mappings for this map entry must have been 4854 * cleared by now. 4855 */ 4856 assert(vm_map_pmap_is_empty(map, 4857 entry->vme_start, 4858 entry->vme_end)); 4859 4860 next = entry->vme_next; 4861 s = next->vme_start; 4862 last_timestamp = map->timestamp; 4863 4864 if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) && 4865 zap_map != VM_MAP_NULL) { 4866 vm_map_size_t entry_size; 4867 /* 4868 * The caller wants to save the affected VM map entries 4869 * into the "zap_map". The caller will take care of 4870 * these entries. 4871 */ 4872 /* unlink the entry from "map" ... */ 4873 vm_map_entry_unlink(map, entry); 4874 /* ... and add it to the end of the "zap_map" */ 4875 vm_map_entry_link(zap_map, 4876 vm_map_last_entry(zap_map), 4877 entry); 4878 entry_size = entry->vme_end - entry->vme_start; 4879 map->size -= entry_size; 4880 zap_map->size += entry_size; 4881 /* we didn't unlock the map, so no timestamp increase */ 4882 last_timestamp--; 4883 } else { 4884 vm_map_entry_delete(map, entry); 4885 /* vm_map_entry_delete unlocks the map */ 4886 vm_map_lock(map); 4887 } 4888 4889 entry = next; 4890 4891 if(entry == vm_map_to_entry(map)) { 4892 break; 4893 } 4894 if (last_timestamp+1 != map->timestamp) { 4895 /* 4896 * we are responsible for deleting everything 4897 * from the give space, if someone has interfered 4898 * we pick up where we left off, back fills should 4899 * be all right for anyone except map_delete and 4900 * we have to assume that the task has been fully 4901 * disabled before we get here 4902 */ 4903 if (!vm_map_lookup_entry(map, s, &entry)){ 4904 entry = entry->vme_next; 4905 s = entry->vme_start; 4906 } else { 4907 SAVE_HINT_MAP_WRITE(map, entry->vme_prev); 4908 } 4909 /* 4910 * others can not only allocate behind us, we can 4911 * also see coalesce while we don't have the map lock 4912 */ 4913 if(entry == vm_map_to_entry(map)) { 4914 break; 4915 } 4916 } 4917 last_timestamp = map->timestamp; 4918 } 4919 4920 if (map->wait_for_space) 4921 thread_wakeup((event_t) map); 4922 /* 4923 * wake up anybody waiting on entries that we have already deleted. 4924 */ 4925 if (need_wakeup) 4926 vm_map_entry_wakeup(map); 4927 4928 return KERN_SUCCESS; 4929} 4930 4931/* 4932 * vm_map_remove: 4933 * 4934 * Remove the given address range from the target map. 4935 * This is the exported form of vm_map_delete. 4936 */ 4937kern_return_t 4938vm_map_remove( 4939 register vm_map_t map, 4940 register vm_map_offset_t start, 4941 register vm_map_offset_t end, 4942 register boolean_t flags) 4943{ 4944 register kern_return_t result; 4945 4946 vm_map_lock(map); 4947 VM_MAP_RANGE_CHECK(map, start, end); 4948 result = vm_map_delete(map, start, end, flags, VM_MAP_NULL); 4949 vm_map_unlock(map); 4950 4951 return(result); 4952} 4953 4954 4955/* 4956 * Routine: vm_map_copy_discard 4957 * 4958 * Description: 4959 * Dispose of a map copy object (returned by 4960 * vm_map_copyin). 4961 */ 4962void 4963vm_map_copy_discard( 4964 vm_map_copy_t copy) 4965{ 4966 TR_DECL("vm_map_copy_discard"); 4967 4968/* tr3("enter: copy 0x%x type %d", copy, copy->type);*/ 4969 4970 if (copy == VM_MAP_COPY_NULL) 4971 return; 4972 4973 switch (copy->type) { 4974 case VM_MAP_COPY_ENTRY_LIST: 4975 while (vm_map_copy_first_entry(copy) != 4976 vm_map_copy_to_entry(copy)) { 4977 vm_map_entry_t entry = vm_map_copy_first_entry(copy); 4978 4979 vm_map_copy_entry_unlink(copy, entry); 4980 vm_object_deallocate(entry->object.vm_object); 4981 vm_map_copy_entry_dispose(copy, entry); 4982 } 4983 break; 4984 case VM_MAP_COPY_OBJECT: 4985 vm_object_deallocate(copy->cpy_object); 4986 break; 4987 case VM_MAP_COPY_KERNEL_BUFFER: 4988 4989 /* 4990 * The vm_map_copy_t and possibly the data buffer were 4991 * allocated by a single call to kalloc(), i.e. the 4992 * vm_map_copy_t was not allocated out of the zone. 4993 */ 4994 kfree(copy, copy->cpy_kalloc_size); 4995 return; 4996 } 4997 zfree(vm_map_copy_zone, copy); 4998} 4999 5000/* 5001 * Routine: vm_map_copy_copy 5002 * 5003 * Description: 5004 * Move the information in a map copy object to 5005 * a new map copy object, leaving the old one 5006 * empty. 5007 * 5008 * This is used by kernel routines that need 5009 * to look at out-of-line data (in copyin form) 5010 * before deciding whether to return SUCCESS. 5011 * If the routine returns FAILURE, the original 5012 * copy object will be deallocated; therefore, 5013 * these routines must make a copy of the copy 5014 * object and leave the original empty so that 5015 * deallocation will not fail. 5016 */ 5017vm_map_copy_t 5018vm_map_copy_copy( 5019 vm_map_copy_t copy) 5020{ 5021 vm_map_copy_t new_copy; 5022 5023 if (copy == VM_MAP_COPY_NULL) 5024 return VM_MAP_COPY_NULL; 5025 5026 /* 5027 * Allocate a new copy object, and copy the information 5028 * from the old one into it. 5029 */ 5030 5031 new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 5032 *new_copy = *copy; 5033 5034 if (copy->type == VM_MAP_COPY_ENTRY_LIST) { 5035 /* 5036 * The links in the entry chain must be 5037 * changed to point to the new copy object. 5038 */ 5039 vm_map_copy_first_entry(copy)->vme_prev 5040 = vm_map_copy_to_entry(new_copy); 5041 vm_map_copy_last_entry(copy)->vme_next 5042 = vm_map_copy_to_entry(new_copy); 5043 } 5044 5045 /* 5046 * Change the old copy object into one that contains 5047 * nothing to be deallocated. 5048 */ 5049 copy->type = VM_MAP_COPY_OBJECT; 5050 copy->cpy_object = VM_OBJECT_NULL; 5051 5052 /* 5053 * Return the new object. 5054 */ 5055 return new_copy; 5056} 5057 5058static kern_return_t 5059vm_map_overwrite_submap_recurse( 5060 vm_map_t dst_map, 5061 vm_map_offset_t dst_addr, 5062 vm_map_size_t dst_size) 5063{ 5064 vm_map_offset_t dst_end; 5065 vm_map_entry_t tmp_entry; 5066 vm_map_entry_t entry; 5067 kern_return_t result; 5068 boolean_t encountered_sub_map = FALSE; 5069 5070 5071 5072 /* 5073 * Verify that the destination is all writeable 5074 * initially. We have to trunc the destination 5075 * address and round the copy size or we'll end up 5076 * splitting entries in strange ways. 5077 */ 5078 5079 dst_end = vm_map_round_page(dst_addr + dst_size); 5080 vm_map_lock(dst_map); 5081 5082start_pass_1: 5083 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) { 5084 vm_map_unlock(dst_map); 5085 return(KERN_INVALID_ADDRESS); 5086 } 5087 5088 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); 5089 assert(!tmp_entry->use_pmap); /* clipping did unnest if needed */ 5090 5091 for (entry = tmp_entry;;) { 5092 vm_map_entry_t next; 5093 5094 next = entry->vme_next; 5095 while(entry->is_sub_map) { 5096 vm_map_offset_t sub_start; 5097 vm_map_offset_t sub_end; 5098 vm_map_offset_t local_end; 5099 5100 if (entry->in_transition) { 5101 /* 5102 * Say that we are waiting, and wait for entry. 5103 */ 5104 entry->needs_wakeup = TRUE; 5105 vm_map_entry_wait(dst_map, THREAD_UNINT); 5106 5107 goto start_pass_1; 5108 } 5109 5110 encountered_sub_map = TRUE; 5111 sub_start = entry->offset; 5112 5113 if(entry->vme_end < dst_end) 5114 sub_end = entry->vme_end; 5115 else 5116 sub_end = dst_end; 5117 sub_end -= entry->vme_start; 5118 sub_end += entry->offset; 5119 local_end = entry->vme_end; 5120 vm_map_unlock(dst_map); 5121 5122 result = vm_map_overwrite_submap_recurse( 5123 entry->object.sub_map, 5124 sub_start, 5125 sub_end - sub_start); 5126 5127 if(result != KERN_SUCCESS) 5128 return result; 5129 if (dst_end <= entry->vme_end) 5130 return KERN_SUCCESS; 5131 vm_map_lock(dst_map); 5132 if(!vm_map_lookup_entry(dst_map, local_end, 5133 &tmp_entry)) { 5134 vm_map_unlock(dst_map); 5135 return(KERN_INVALID_ADDRESS); 5136 } 5137 entry = tmp_entry; 5138 next = entry->vme_next; 5139 } 5140 5141 if ( ! (entry->protection & VM_PROT_WRITE)) { 5142 vm_map_unlock(dst_map); 5143 return(KERN_PROTECTION_FAILURE); 5144 } 5145 5146 /* 5147 * If the entry is in transition, we must wait 5148 * for it to exit that state. Anything could happen 5149 * when we unlock the map, so start over. 5150 */ 5151 if (entry->in_transition) { 5152 5153 /* 5154 * Say that we are waiting, and wait for entry. 5155 */ 5156 entry->needs_wakeup = TRUE; 5157 vm_map_entry_wait(dst_map, THREAD_UNINT); 5158 5159 goto start_pass_1; 5160 } 5161 5162/* 5163 * our range is contained completely within this map entry 5164 */ 5165 if (dst_end <= entry->vme_end) { 5166 vm_map_unlock(dst_map); 5167 return KERN_SUCCESS; 5168 } 5169/* 5170 * check that range specified is contiguous region 5171 */ 5172 if ((next == vm_map_to_entry(dst_map)) || 5173 (next->vme_start != entry->vme_end)) { 5174 vm_map_unlock(dst_map); 5175 return(KERN_INVALID_ADDRESS); 5176 } 5177 5178 /* 5179 * Check for permanent objects in the destination. 5180 */ 5181 if ((entry->object.vm_object != VM_OBJECT_NULL) && 5182 ((!entry->object.vm_object->internal) || 5183 (entry->object.vm_object->true_share))) { 5184 if(encountered_sub_map) { 5185 vm_map_unlock(dst_map); 5186 return(KERN_FAILURE); 5187 } 5188 } 5189 5190 5191 entry = next; 5192 }/* for */ 5193 vm_map_unlock(dst_map); 5194 return(KERN_SUCCESS); 5195} 5196 5197/* 5198 * Routine: vm_map_copy_overwrite 5199 * 5200 * Description: 5201 * Copy the memory described by the map copy 5202 * object (copy; returned by vm_map_copyin) onto 5203 * the specified destination region (dst_map, dst_addr). 5204 * The destination must be writeable. 5205 * 5206 * Unlike vm_map_copyout, this routine actually 5207 * writes over previously-mapped memory. If the 5208 * previous mapping was to a permanent (user-supplied) 5209 * memory object, it is preserved. 5210 * 5211 * The attributes (protection and inheritance) of the 5212 * destination region are preserved. 5213 * 5214 * If successful, consumes the copy object. 5215 * Otherwise, the caller is responsible for it. 5216 * 5217 * Implementation notes: 5218 * To overwrite aligned temporary virtual memory, it is 5219 * sufficient to remove the previous mapping and insert 5220 * the new copy. This replacement is done either on 5221 * the whole region (if no permanent virtual memory 5222 * objects are embedded in the destination region) or 5223 * in individual map entries. 5224 * 5225 * To overwrite permanent virtual memory , it is necessary 5226 * to copy each page, as the external memory management 5227 * interface currently does not provide any optimizations. 5228 * 5229 * Unaligned memory also has to be copied. It is possible 5230 * to use 'vm_trickery' to copy the aligned data. This is 5231 * not done but not hard to implement. 5232 * 5233 * Once a page of permanent memory has been overwritten, 5234 * it is impossible to interrupt this function; otherwise, 5235 * the call would be neither atomic nor location-independent. 5236 * The kernel-state portion of a user thread must be 5237 * interruptible. 5238 * 5239 * It may be expensive to forward all requests that might 5240 * overwrite permanent memory (vm_write, vm_copy) to 5241 * uninterruptible kernel threads. This routine may be 5242 * called by interruptible threads; however, success is 5243 * not guaranteed -- if the request cannot be performed 5244 * atomically and interruptibly, an error indication is 5245 * returned. 5246 */ 5247 5248static kern_return_t 5249vm_map_copy_overwrite_nested( 5250 vm_map_t dst_map, 5251 vm_map_address_t dst_addr, 5252 vm_map_copy_t copy, 5253 boolean_t interruptible, 5254 pmap_t pmap) 5255{ 5256 vm_map_offset_t dst_end; 5257 vm_map_entry_t tmp_entry; 5258 vm_map_entry_t entry; 5259 kern_return_t kr; 5260 boolean_t aligned = TRUE; 5261 boolean_t contains_permanent_objects = FALSE; 5262 boolean_t encountered_sub_map = FALSE; 5263 vm_map_offset_t base_addr; 5264 vm_map_size_t copy_size; 5265 vm_map_size_t total_size; 5266 5267 5268 /* 5269 * Check for null copy object. 5270 */ 5271 5272 if (copy == VM_MAP_COPY_NULL) 5273 return(KERN_SUCCESS); 5274 5275 /* 5276 * Check for special kernel buffer allocated 5277 * by new_ipc_kmsg_copyin. 5278 */ 5279 5280 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { 5281 return(vm_map_copyout_kernel_buffer( 5282 dst_map, &dst_addr, 5283 copy, TRUE)); 5284 } 5285 5286 /* 5287 * Only works for entry lists at the moment. Will 5288 * support page lists later. 5289 */ 5290 5291 assert(copy->type == VM_MAP_COPY_ENTRY_LIST); 5292 5293 if (copy->size == 0) { 5294 vm_map_copy_discard(copy); 5295 return(KERN_SUCCESS); 5296 } 5297 5298 /* 5299 * Verify that the destination is all writeable 5300 * initially. We have to trunc the destination 5301 * address and round the copy size or we'll end up 5302 * splitting entries in strange ways. 5303 */ 5304 5305 if (!page_aligned(copy->size) || 5306 !page_aligned (copy->offset) || 5307 !page_aligned (dst_addr)) 5308 { 5309 aligned = FALSE; 5310 dst_end = vm_map_round_page(dst_addr + copy->size); 5311 } else { 5312 dst_end = dst_addr + copy->size; 5313 } 5314 5315 vm_map_lock(dst_map); 5316 5317 /* LP64todo - remove this check when vm_map_commpage64() 5318 * no longer has to stuff in a map_entry for the commpage 5319 * above the map's max_offset. 5320 */ 5321 if (dst_addr >= dst_map->max_offset) { 5322 vm_map_unlock(dst_map); 5323 return(KERN_INVALID_ADDRESS); 5324 } 5325 5326start_pass_1: 5327 if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) { 5328 vm_map_unlock(dst_map); 5329 return(KERN_INVALID_ADDRESS); 5330 } 5331 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(dst_addr)); 5332 for (entry = tmp_entry;;) { 5333 vm_map_entry_t next = entry->vme_next; 5334 5335 while(entry->is_sub_map) { 5336 vm_map_offset_t sub_start; 5337 vm_map_offset_t sub_end; 5338 vm_map_offset_t local_end; 5339 5340 if (entry->in_transition) { 5341 5342 /* 5343 * Say that we are waiting, and wait for entry. 5344 */ 5345 entry->needs_wakeup = TRUE; 5346 vm_map_entry_wait(dst_map, THREAD_UNINT); 5347 5348 goto start_pass_1; 5349 } 5350 5351 local_end = entry->vme_end; 5352 if (!(entry->needs_copy)) { 5353 /* if needs_copy we are a COW submap */ 5354 /* in such a case we just replace so */ 5355 /* there is no need for the follow- */ 5356 /* ing check. */ 5357 encountered_sub_map = TRUE; 5358 sub_start = entry->offset; 5359 5360 if(entry->vme_end < dst_end) 5361 sub_end = entry->vme_end; 5362 else 5363 sub_end = dst_end; 5364 sub_end -= entry->vme_start; 5365 sub_end += entry->offset; 5366 vm_map_unlock(dst_map); 5367 5368 kr = vm_map_overwrite_submap_recurse( 5369 entry->object.sub_map, 5370 sub_start, 5371 sub_end - sub_start); 5372 if(kr != KERN_SUCCESS) 5373 return kr; 5374 vm_map_lock(dst_map); 5375 } 5376 5377 if (dst_end <= entry->vme_end) 5378 goto start_overwrite; 5379 if(!vm_map_lookup_entry(dst_map, local_end, 5380 &entry)) { 5381 vm_map_unlock(dst_map); 5382 return(KERN_INVALID_ADDRESS); 5383 } 5384 next = entry->vme_next; 5385 } 5386 5387 if ( ! (entry->protection & VM_PROT_WRITE)) { 5388 vm_map_unlock(dst_map); 5389 return(KERN_PROTECTION_FAILURE); 5390 } 5391 5392 /* 5393 * If the entry is in transition, we must wait 5394 * for it to exit that state. Anything could happen 5395 * when we unlock the map, so start over. 5396 */ 5397 if (entry->in_transition) { 5398 5399 /* 5400 * Say that we are waiting, and wait for entry. 5401 */ 5402 entry->needs_wakeup = TRUE; 5403 vm_map_entry_wait(dst_map, THREAD_UNINT); 5404 5405 goto start_pass_1; 5406 } 5407 5408/* 5409 * our range is contained completely within this map entry 5410 */ 5411 if (dst_end <= entry->vme_end) 5412 break; 5413/* 5414 * check that range specified is contiguous region 5415 */ 5416 if ((next == vm_map_to_entry(dst_map)) || 5417 (next->vme_start != entry->vme_end)) { 5418 vm_map_unlock(dst_map); 5419 return(KERN_INVALID_ADDRESS); 5420 } 5421 5422 5423 /* 5424 * Check for permanent objects in the destination. 5425 */ 5426 if ((entry->object.vm_object != VM_OBJECT_NULL) && 5427 ((!entry->object.vm_object->internal) || 5428 (entry->object.vm_object->true_share))) { 5429 contains_permanent_objects = TRUE; 5430 } 5431 5432 entry = next; 5433 }/* for */ 5434 5435start_overwrite: 5436 /* 5437 * If there are permanent objects in the destination, then 5438 * the copy cannot be interrupted. 5439 */ 5440 5441 if (interruptible && contains_permanent_objects) { 5442 vm_map_unlock(dst_map); 5443 return(KERN_FAILURE); /* XXX */ 5444 } 5445 5446 /* 5447 * 5448 * Make a second pass, overwriting the data 5449 * At the beginning of each loop iteration, 5450 * the next entry to be overwritten is "tmp_entry" 5451 * (initially, the value returned from the lookup above), 5452 * and the starting address expected in that entry 5453 * is "start". 5454 */ 5455 5456 total_size = copy->size; 5457 if(encountered_sub_map) { 5458 copy_size = 0; 5459 /* re-calculate tmp_entry since we've had the map */ 5460 /* unlocked */ 5461 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) { 5462 vm_map_unlock(dst_map); 5463 return(KERN_INVALID_ADDRESS); 5464 } 5465 } else { 5466 copy_size = copy->size; 5467 } 5468 5469 base_addr = dst_addr; 5470 while(TRUE) { 5471 /* deconstruct the copy object and do in parts */ 5472 /* only in sub_map, interruptable case */ 5473 vm_map_entry_t copy_entry; 5474 vm_map_entry_t previous_prev = VM_MAP_ENTRY_NULL; 5475 vm_map_entry_t next_copy = VM_MAP_ENTRY_NULL; 5476 int nentries; 5477 int remaining_entries = 0; 5478 int new_offset = 0; 5479 5480 for (entry = tmp_entry; copy_size == 0;) { 5481 vm_map_entry_t next; 5482 5483 next = entry->vme_next; 5484 5485 /* tmp_entry and base address are moved along */ 5486 /* each time we encounter a sub-map. Otherwise */ 5487 /* entry can outpase tmp_entry, and the copy_size */ 5488 /* may reflect the distance between them */ 5489 /* if the current entry is found to be in transition */ 5490 /* we will start over at the beginning or the last */ 5491 /* encounter of a submap as dictated by base_addr */ 5492 /* we will zero copy_size accordingly. */ 5493 if (entry->in_transition) { 5494 /* 5495 * Say that we are waiting, and wait for entry. 5496 */ 5497 entry->needs_wakeup = TRUE; 5498 vm_map_entry_wait(dst_map, THREAD_UNINT); 5499 5500 if(!vm_map_lookup_entry(dst_map, base_addr, 5501 &tmp_entry)) { 5502 vm_map_unlock(dst_map); 5503 return(KERN_INVALID_ADDRESS); 5504 } 5505 copy_size = 0; 5506 entry = tmp_entry; 5507 continue; 5508 } 5509 if(entry->is_sub_map) { 5510 vm_map_offset_t sub_start; 5511 vm_map_offset_t sub_end; 5512 vm_map_offset_t local_end; 5513 5514 if (entry->needs_copy) { 5515 /* if this is a COW submap */ 5516 /* just back the range with a */ 5517 /* anonymous entry */ 5518 if(entry->vme_end < dst_end) 5519 sub_end = entry->vme_end; 5520 else 5521 sub_end = dst_end; 5522 if(entry->vme_start < base_addr) 5523 sub_start = base_addr; 5524 else 5525 sub_start = entry->vme_start; 5526 vm_map_clip_end( 5527 dst_map, entry, sub_end); 5528 vm_map_clip_start( 5529 dst_map, entry, sub_start); 5530 assert(!entry->use_pmap); 5531 entry->is_sub_map = FALSE; 5532 vm_map_deallocate( 5533 entry->object.sub_map); 5534 entry->object.sub_map = NULL; 5535 entry->is_shared = FALSE; 5536 entry->needs_copy = FALSE; 5537 entry->offset = 0; 5538 /* 5539 * XXX FBDP 5540 * We should propagate the protections 5541 * of the submap entry here instead 5542 * of forcing them to VM_PROT_ALL... 5543 * Or better yet, we should inherit 5544 * the protection of the copy_entry. 5545 */ 5546 entry->protection = VM_PROT_ALL; 5547 entry->max_protection = VM_PROT_ALL; 5548 entry->wired_count = 0; 5549 entry->user_wired_count = 0; 5550 if(entry->inheritance 5551 == VM_INHERIT_SHARE) 5552 entry->inheritance = VM_INHERIT_COPY; 5553 continue; 5554 } 5555 /* first take care of any non-sub_map */ 5556 /* entries to send */ 5557 if(base_addr < entry->vme_start) { 5558 /* stuff to send */ 5559 copy_size = 5560 entry->vme_start - base_addr; 5561 break; 5562 } 5563 sub_start = entry->offset; 5564 5565 if(entry->vme_end < dst_end) 5566 sub_end = entry->vme_end; 5567 else 5568 sub_end = dst_end; 5569 sub_end -= entry->vme_start; 5570 sub_end += entry->offset; 5571 local_end = entry->vme_end; 5572 vm_map_unlock(dst_map); 5573 copy_size = sub_end - sub_start; 5574 5575 /* adjust the copy object */ 5576 if (total_size > copy_size) { 5577 vm_map_size_t local_size = 0; 5578 vm_map_size_t entry_size; 5579 5580 nentries = 1; 5581 new_offset = copy->offset; 5582 copy_entry = vm_map_copy_first_entry(copy); 5583 while(copy_entry != 5584 vm_map_copy_to_entry(copy)){ 5585 entry_size = copy_entry->vme_end - 5586 copy_entry->vme_start; 5587 if((local_size < copy_size) && 5588 ((local_size + entry_size) 5589 >= copy_size)) { 5590 vm_map_copy_clip_end(copy, 5591 copy_entry, 5592 copy_entry->vme_start + 5593 (copy_size - local_size)); 5594 entry_size = copy_entry->vme_end - 5595 copy_entry->vme_start; 5596 local_size += entry_size; 5597 new_offset += entry_size; 5598 } 5599 if(local_size >= copy_size) { 5600 next_copy = copy_entry->vme_next; 5601 copy_entry->vme_next = 5602 vm_map_copy_to_entry(copy); 5603 previous_prev = 5604 copy->cpy_hdr.links.prev; 5605 copy->cpy_hdr.links.prev = copy_entry; 5606 copy->size = copy_size; 5607 remaining_entries = 5608 copy->cpy_hdr.nentries; 5609 remaining_entries -= nentries; 5610 copy->cpy_hdr.nentries = nentries; 5611 break; 5612 } else { 5613 local_size += entry_size; 5614 new_offset += entry_size; 5615 nentries++; 5616 } 5617 copy_entry = copy_entry->vme_next; 5618 } 5619 } 5620 5621 if((entry->use_pmap) && (pmap == NULL)) { 5622 kr = vm_map_copy_overwrite_nested( 5623 entry->object.sub_map, 5624 sub_start, 5625 copy, 5626 interruptible, 5627 entry->object.sub_map->pmap); 5628 } else if (pmap != NULL) { 5629 kr = vm_map_copy_overwrite_nested( 5630 entry->object.sub_map, 5631 sub_start, 5632 copy, 5633 interruptible, pmap); 5634 } else { 5635 kr = vm_map_copy_overwrite_nested( 5636 entry->object.sub_map, 5637 sub_start, 5638 copy, 5639 interruptible, 5640 dst_map->pmap); 5641 } 5642 if(kr != KERN_SUCCESS) { 5643 if(next_copy != NULL) { 5644 copy->cpy_hdr.nentries += 5645 remaining_entries; 5646 copy->cpy_hdr.links.prev->vme_next = 5647 next_copy; 5648 copy->cpy_hdr.links.prev 5649 = previous_prev; 5650 copy->size = total_size; 5651 } 5652 return kr; 5653 } 5654 if (dst_end <= local_end) { 5655 return(KERN_SUCCESS); 5656 } 5657 /* otherwise copy no longer exists, it was */ 5658 /* destroyed after successful copy_overwrite */ 5659 copy = (vm_map_copy_t) 5660 zalloc(vm_map_copy_zone); 5661 vm_map_copy_first_entry(copy) = 5662 vm_map_copy_last_entry(copy) = 5663 vm_map_copy_to_entry(copy); 5664 copy->type = VM_MAP_COPY_ENTRY_LIST; 5665 copy->offset = new_offset; 5666 5667 total_size -= copy_size; 5668 copy_size = 0; 5669 /* put back remainder of copy in container */ 5670 if(next_copy != NULL) { 5671 copy->cpy_hdr.nentries = remaining_entries; 5672 copy->cpy_hdr.links.next = next_copy; 5673 copy->cpy_hdr.links.prev = previous_prev; 5674 copy->size = total_size; 5675 next_copy->vme_prev = 5676 vm_map_copy_to_entry(copy); 5677 next_copy = NULL; 5678 } 5679 base_addr = local_end; 5680 vm_map_lock(dst_map); 5681 if(!vm_map_lookup_entry(dst_map, 5682 local_end, &tmp_entry)) { 5683 vm_map_unlock(dst_map); 5684 return(KERN_INVALID_ADDRESS); 5685 } 5686 entry = tmp_entry; 5687 continue; 5688 } 5689 if (dst_end <= entry->vme_end) { 5690 copy_size = dst_end - base_addr; 5691 break; 5692 } 5693 5694 if ((next == vm_map_to_entry(dst_map)) || 5695 (next->vme_start != entry->vme_end)) { 5696 vm_map_unlock(dst_map); 5697 return(KERN_INVALID_ADDRESS); 5698 } 5699 5700 entry = next; 5701 }/* for */ 5702 5703 next_copy = NULL; 5704 nentries = 1; 5705 5706 /* adjust the copy object */ 5707 if (total_size > copy_size) { 5708 vm_map_size_t local_size = 0; 5709 vm_map_size_t entry_size; 5710 5711 new_offset = copy->offset; 5712 copy_entry = vm_map_copy_first_entry(copy); 5713 while(copy_entry != vm_map_copy_to_entry(copy)) { 5714 entry_size = copy_entry->vme_end - 5715 copy_entry->vme_start; 5716 if((local_size < copy_size) && 5717 ((local_size + entry_size) 5718 >= copy_size)) { 5719 vm_map_copy_clip_end(copy, copy_entry, 5720 copy_entry->vme_start + 5721 (copy_size - local_size)); 5722 entry_size = copy_entry->vme_end - 5723 copy_entry->vme_start; 5724 local_size += entry_size; 5725 new_offset += entry_size; 5726 } 5727 if(local_size >= copy_size) { 5728 next_copy = copy_entry->vme_next; 5729 copy_entry->vme_next = 5730 vm_map_copy_to_entry(copy); 5731 previous_prev = 5732 copy->cpy_hdr.links.prev; 5733 copy->cpy_hdr.links.prev = copy_entry; 5734 copy->size = copy_size; 5735 remaining_entries = 5736 copy->cpy_hdr.nentries; 5737 remaining_entries -= nentries; 5738 copy->cpy_hdr.nentries = nentries; 5739 break; 5740 } else { 5741 local_size += entry_size; 5742 new_offset += entry_size; 5743 nentries++; 5744 } 5745 copy_entry = copy_entry->vme_next; 5746 } 5747 } 5748 5749 if (aligned) { 5750 pmap_t local_pmap; 5751 5752 if(pmap) 5753 local_pmap = pmap; 5754 else 5755 local_pmap = dst_map->pmap; 5756 5757 if ((kr = vm_map_copy_overwrite_aligned( 5758 dst_map, tmp_entry, copy, 5759 base_addr, local_pmap)) != KERN_SUCCESS) { 5760 if(next_copy != NULL) { 5761 copy->cpy_hdr.nentries += 5762 remaining_entries; 5763 copy->cpy_hdr.links.prev->vme_next = 5764 next_copy; 5765 copy->cpy_hdr.links.prev = 5766 previous_prev; 5767 copy->size += copy_size; 5768 } 5769 return kr; 5770 } 5771 vm_map_unlock(dst_map); 5772 } else { 5773 /* 5774 * Performance gain: 5775 * 5776 * if the copy and dst address are misaligned but the same 5777 * offset within the page we can copy_not_aligned the 5778 * misaligned parts and copy aligned the rest. If they are 5779 * aligned but len is unaligned we simply need to copy 5780 * the end bit unaligned. We'll need to split the misaligned 5781 * bits of the region in this case ! 5782 */ 5783 /* ALWAYS UNLOCKS THE dst_map MAP */ 5784 if ((kr = vm_map_copy_overwrite_unaligned( dst_map, 5785 tmp_entry, copy, base_addr)) != KERN_SUCCESS) { 5786 if(next_copy != NULL) { 5787 copy->cpy_hdr.nentries += 5788 remaining_entries; 5789 copy->cpy_hdr.links.prev->vme_next = 5790 next_copy; 5791 copy->cpy_hdr.links.prev = 5792 previous_prev; 5793 copy->size += copy_size; 5794 } 5795 return kr; 5796 } 5797 } 5798 total_size -= copy_size; 5799 if(total_size == 0) 5800 break; 5801 base_addr += copy_size; 5802 copy_size = 0; 5803 copy->offset = new_offset; 5804 if(next_copy != NULL) { 5805 copy->cpy_hdr.nentries = remaining_entries; 5806 copy->cpy_hdr.links.next = next_copy; 5807 copy->cpy_hdr.links.prev = previous_prev; 5808 next_copy->vme_prev = vm_map_copy_to_entry(copy); 5809 copy->size = total_size; 5810 } 5811 vm_map_lock(dst_map); 5812 while(TRUE) { 5813 if (!vm_map_lookup_entry(dst_map, 5814 base_addr, &tmp_entry)) { 5815 vm_map_unlock(dst_map); 5816 return(KERN_INVALID_ADDRESS); 5817 } 5818 if (tmp_entry->in_transition) { 5819 entry->needs_wakeup = TRUE; 5820 vm_map_entry_wait(dst_map, THREAD_UNINT); 5821 } else { 5822 break; 5823 } 5824 } 5825 vm_map_clip_start(dst_map, tmp_entry, vm_map_trunc_page(base_addr)); 5826 5827 entry = tmp_entry; 5828 } /* while */ 5829 5830 /* 5831 * Throw away the vm_map_copy object 5832 */ 5833 vm_map_copy_discard(copy); 5834 5835 return(KERN_SUCCESS); 5836}/* vm_map_copy_overwrite */ 5837 5838kern_return_t 5839vm_map_copy_overwrite( 5840 vm_map_t dst_map, 5841 vm_map_offset_t dst_addr, 5842 vm_map_copy_t copy, 5843 boolean_t interruptible) 5844{ 5845 return vm_map_copy_overwrite_nested( 5846 dst_map, dst_addr, copy, interruptible, (pmap_t) NULL); 5847} 5848 5849 5850/* 5851 * Routine: vm_map_copy_overwrite_unaligned [internal use only] 5852 * 5853 * Decription: 5854 * Physically copy unaligned data 5855 * 5856 * Implementation: 5857 * Unaligned parts of pages have to be physically copied. We use 5858 * a modified form of vm_fault_copy (which understands none-aligned 5859 * page offsets and sizes) to do the copy. We attempt to copy as 5860 * much memory in one go as possibly, however vm_fault_copy copies 5861 * within 1 memory object so we have to find the smaller of "amount left" 5862 * "source object data size" and "target object data size". With 5863 * unaligned data we don't need to split regions, therefore the source 5864 * (copy) object should be one map entry, the target range may be split 5865 * over multiple map entries however. In any event we are pessimistic 5866 * about these assumptions. 5867 * 5868 * Assumptions: 5869 * dst_map is locked on entry and is return locked on success, 5870 * unlocked on error. 5871 */ 5872 5873static kern_return_t 5874vm_map_copy_overwrite_unaligned( 5875 vm_map_t dst_map, 5876 vm_map_entry_t entry, 5877 vm_map_copy_t copy, 5878 vm_map_offset_t start) 5879{ 5880 vm_map_entry_t copy_entry = vm_map_copy_first_entry(copy); 5881 vm_map_version_t version; 5882 vm_object_t dst_object; 5883 vm_object_offset_t dst_offset; 5884 vm_object_offset_t src_offset; 5885 vm_object_offset_t entry_offset; 5886 vm_map_offset_t entry_end; 5887 vm_map_size_t src_size, 5888 dst_size, 5889 copy_size, 5890 amount_left; 5891 kern_return_t kr = KERN_SUCCESS; 5892 5893 vm_map_lock_write_to_read(dst_map); 5894 5895 src_offset = copy->offset - vm_object_trunc_page(copy->offset); 5896 amount_left = copy->size; 5897/* 5898 * unaligned so we never clipped this entry, we need the offset into 5899 * the vm_object not just the data. 5900 */ 5901 while (amount_left > 0) { 5902 5903 if (entry == vm_map_to_entry(dst_map)) { 5904 vm_map_unlock_read(dst_map); 5905 return KERN_INVALID_ADDRESS; 5906 } 5907 5908 /* "start" must be within the current map entry */ 5909 assert ((start>=entry->vme_start) && (start<entry->vme_end)); 5910 5911 dst_offset = start - entry->vme_start; 5912 5913 dst_size = entry->vme_end - start; 5914 5915 src_size = copy_entry->vme_end - 5916 (copy_entry->vme_start + src_offset); 5917 5918 if (dst_size < src_size) { 5919/* 5920 * we can only copy dst_size bytes before 5921 * we have to get the next destination entry 5922 */ 5923 copy_size = dst_size; 5924 } else { 5925/* 5926 * we can only copy src_size bytes before 5927 * we have to get the next source copy entry 5928 */ 5929 copy_size = src_size; 5930 } 5931 5932 if (copy_size > amount_left) { 5933 copy_size = amount_left; 5934 } 5935/* 5936 * Entry needs copy, create a shadow shadow object for 5937 * Copy on write region. 5938 */ 5939 if (entry->needs_copy && 5940 ((entry->protection & VM_PROT_WRITE) != 0)) 5941 { 5942 if (vm_map_lock_read_to_write(dst_map)) { 5943 vm_map_lock_read(dst_map); 5944 goto RetryLookup; 5945 } 5946 vm_object_shadow(&entry->object.vm_object, 5947 &entry->offset, 5948 (vm_map_size_t)(entry->vme_end 5949 - entry->vme_start)); 5950 entry->needs_copy = FALSE; 5951 vm_map_lock_write_to_read(dst_map); 5952 } 5953 dst_object = entry->object.vm_object; 5954/* 5955 * unlike with the virtual (aligned) copy we're going 5956 * to fault on it therefore we need a target object. 5957 */ 5958 if (dst_object == VM_OBJECT_NULL) { 5959 if (vm_map_lock_read_to_write(dst_map)) { 5960 vm_map_lock_read(dst_map); 5961 goto RetryLookup; 5962 } 5963 dst_object = vm_object_allocate((vm_map_size_t) 5964 entry->vme_end - entry->vme_start); 5965 entry->object.vm_object = dst_object; 5966 entry->offset = 0; 5967 vm_map_lock_write_to_read(dst_map); 5968 } 5969/* 5970 * Take an object reference and unlock map. The "entry" may 5971 * disappear or change when the map is unlocked. 5972 */ 5973 vm_object_reference(dst_object); 5974 version.main_timestamp = dst_map->timestamp; 5975 entry_offset = entry->offset; 5976 entry_end = entry->vme_end; 5977 vm_map_unlock_read(dst_map); 5978/* 5979 * Copy as much as possible in one pass 5980 */ 5981 kr = vm_fault_copy( 5982 copy_entry->object.vm_object, 5983 copy_entry->offset + src_offset, 5984 ©_size, 5985 dst_object, 5986 entry_offset + dst_offset, 5987 dst_map, 5988 &version, 5989 THREAD_UNINT ); 5990 5991 start += copy_size; 5992 src_offset += copy_size; 5993 amount_left -= copy_size; 5994/* 5995 * Release the object reference 5996 */ 5997 vm_object_deallocate(dst_object); 5998/* 5999 * If a hard error occurred, return it now 6000 */ 6001 if (kr != KERN_SUCCESS) 6002 return kr; 6003 6004 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end 6005 || amount_left == 0) 6006 { 6007/* 6008 * all done with this copy entry, dispose. 6009 */ 6010 vm_map_copy_entry_unlink(copy, copy_entry); 6011 vm_object_deallocate(copy_entry->object.vm_object); 6012 vm_map_copy_entry_dispose(copy, copy_entry); 6013 6014 if ((copy_entry = vm_map_copy_first_entry(copy)) 6015 == vm_map_copy_to_entry(copy) && amount_left) { 6016/* 6017 * not finished copying but run out of source 6018 */ 6019 return KERN_INVALID_ADDRESS; 6020 } 6021 src_offset = 0; 6022 } 6023 6024 if (amount_left == 0) 6025 return KERN_SUCCESS; 6026 6027 vm_map_lock_read(dst_map); 6028 if (version.main_timestamp == dst_map->timestamp) { 6029 if (start == entry_end) { 6030/* 6031 * destination region is split. Use the version 6032 * information to avoid a lookup in the normal 6033 * case. 6034 */ 6035 entry = entry->vme_next; 6036/* 6037 * should be contiguous. Fail if we encounter 6038 * a hole in the destination. 6039 */ 6040 if (start != entry->vme_start) { 6041 vm_map_unlock_read(dst_map); 6042 return KERN_INVALID_ADDRESS ; 6043 } 6044 } 6045 } else { 6046/* 6047 * Map version check failed. 6048 * we must lookup the entry because somebody 6049 * might have changed the map behind our backs. 6050 */ 6051 RetryLookup: 6052 if (!vm_map_lookup_entry(dst_map, start, &entry)) 6053 { 6054 vm_map_unlock_read(dst_map); 6055 return KERN_INVALID_ADDRESS ; 6056 } 6057 } 6058 }/* while */ 6059 6060 return KERN_SUCCESS; 6061}/* vm_map_copy_overwrite_unaligned */ 6062 6063/* 6064 * Routine: vm_map_copy_overwrite_aligned [internal use only] 6065 * 6066 * Description: 6067 * Does all the vm_trickery possible for whole pages. 6068 * 6069 * Implementation: 6070 * 6071 * If there are no permanent objects in the destination, 6072 * and the source and destination map entry zones match, 6073 * and the destination map entry is not shared, 6074 * then the map entries can be deleted and replaced 6075 * with those from the copy. The following code is the 6076 * basic idea of what to do, but there are lots of annoying 6077 * little details about getting protection and inheritance 6078 * right. Should add protection, inheritance, and sharing checks 6079 * to the above pass and make sure that no wiring is involved. 6080 */ 6081 6082static kern_return_t 6083vm_map_copy_overwrite_aligned( 6084 vm_map_t dst_map, 6085 vm_map_entry_t tmp_entry, 6086 vm_map_copy_t copy, 6087 vm_map_offset_t start, 6088 __unused pmap_t pmap) 6089{ 6090 vm_object_t object; 6091 vm_map_entry_t copy_entry; 6092 vm_map_size_t copy_size; 6093 vm_map_size_t size; 6094 vm_map_entry_t entry; 6095 6096 while ((copy_entry = vm_map_copy_first_entry(copy)) 6097 != vm_map_copy_to_entry(copy)) 6098 { 6099 copy_size = (copy_entry->vme_end - copy_entry->vme_start); 6100 6101 entry = tmp_entry; 6102 assert(!entry->use_pmap); /* unnested when clipped earlier */ 6103 if (entry == vm_map_to_entry(dst_map)) { 6104 vm_map_unlock(dst_map); 6105 return KERN_INVALID_ADDRESS; 6106 } 6107 size = (entry->vme_end - entry->vme_start); 6108 /* 6109 * Make sure that no holes popped up in the 6110 * address map, and that the protection is 6111 * still valid, in case the map was unlocked 6112 * earlier. 6113 */ 6114 6115 if ((entry->vme_start != start) || ((entry->is_sub_map) 6116 && !entry->needs_copy)) { 6117 vm_map_unlock(dst_map); 6118 return(KERN_INVALID_ADDRESS); 6119 } 6120 assert(entry != vm_map_to_entry(dst_map)); 6121 6122 /* 6123 * Check protection again 6124 */ 6125 6126 if ( ! (entry->protection & VM_PROT_WRITE)) { 6127 vm_map_unlock(dst_map); 6128 return(KERN_PROTECTION_FAILURE); 6129 } 6130 6131 /* 6132 * Adjust to source size first 6133 */ 6134 6135 if (copy_size < size) { 6136 vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size); 6137 size = copy_size; 6138 } 6139 6140 /* 6141 * Adjust to destination size 6142 */ 6143 6144 if (size < copy_size) { 6145 vm_map_copy_clip_end(copy, copy_entry, 6146 copy_entry->vme_start + size); 6147 copy_size = size; 6148 } 6149 6150 assert((entry->vme_end - entry->vme_start) == size); 6151 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size); 6152 assert((copy_entry->vme_end - copy_entry->vme_start) == size); 6153 6154 /* 6155 * If the destination contains temporary unshared memory, 6156 * we can perform the copy by throwing it away and 6157 * installing the source data. 6158 */ 6159 6160 object = entry->object.vm_object; 6161 if ((!entry->is_shared && 6162 ((object == VM_OBJECT_NULL) || 6163 (object->internal && !object->true_share))) || 6164 entry->needs_copy) { 6165 vm_object_t old_object = entry->object.vm_object; 6166 vm_object_offset_t old_offset = entry->offset; 6167 vm_object_offset_t offset; 6168 6169 /* 6170 * Ensure that the source and destination aren't 6171 * identical 6172 */ 6173 if (old_object == copy_entry->object.vm_object && 6174 old_offset == copy_entry->offset) { 6175 vm_map_copy_entry_unlink(copy, copy_entry); 6176 vm_map_copy_entry_dispose(copy, copy_entry); 6177 6178 if (old_object != VM_OBJECT_NULL) 6179 vm_object_deallocate(old_object); 6180 6181 start = tmp_entry->vme_end; 6182 tmp_entry = tmp_entry->vme_next; 6183 continue; 6184 } 6185 6186 if (old_object != VM_OBJECT_NULL) { 6187 if(entry->is_sub_map) { 6188 if(entry->use_pmap) { 6189#ifndef NO_NESTED_PMAP 6190 pmap_unnest(dst_map->pmap, 6191 (addr64_t)entry->vme_start, 6192 entry->vme_end - entry->vme_start); 6193#endif /* NO_NESTED_PMAP */ 6194 if(dst_map->mapped) { 6195 /* clean up parent */ 6196 /* map/maps */ 6197 vm_map_submap_pmap_clean( 6198 dst_map, entry->vme_start, 6199 entry->vme_end, 6200 entry->object.sub_map, 6201 entry->offset); 6202 } 6203 } else { 6204 vm_map_submap_pmap_clean( 6205 dst_map, entry->vme_start, 6206 entry->vme_end, 6207 entry->object.sub_map, 6208 entry->offset); 6209 } 6210 vm_map_deallocate( 6211 entry->object.sub_map); 6212 } else { 6213 if(dst_map->mapped) { 6214 vm_object_pmap_protect( 6215 entry->object.vm_object, 6216 entry->offset, 6217 entry->vme_end 6218 - entry->vme_start, 6219 PMAP_NULL, 6220 entry->vme_start, 6221 VM_PROT_NONE); 6222 } else { 6223 pmap_remove(dst_map->pmap, 6224 (addr64_t)(entry->vme_start), 6225 (addr64_t)(entry->vme_end)); 6226 } 6227 vm_object_deallocate(old_object); 6228 } 6229 } 6230 6231 entry->is_sub_map = FALSE; 6232 entry->object = copy_entry->object; 6233 object = entry->object.vm_object; 6234 entry->needs_copy = copy_entry->needs_copy; 6235 entry->wired_count = 0; 6236 entry->user_wired_count = 0; 6237 offset = entry->offset = copy_entry->offset; 6238 6239 vm_map_copy_entry_unlink(copy, copy_entry); 6240 vm_map_copy_entry_dispose(copy, copy_entry); 6241 6242 /* 6243 * we could try to push pages into the pmap at this point, BUT 6244 * this optimization only saved on average 2 us per page if ALL 6245 * the pages in the source were currently mapped 6246 * and ALL the pages in the dest were touched, if there were fewer 6247 * than 2/3 of the pages touched, this optimization actually cost more cycles 6248 * it also puts a lot of pressure on the pmap layer w/r to mapping structures 6249 */ 6250 6251 /* 6252 * Set up for the next iteration. The map 6253 * has not been unlocked, so the next 6254 * address should be at the end of this 6255 * entry, and the next map entry should be 6256 * the one following it. 6257 */ 6258 6259 start = tmp_entry->vme_end; 6260 tmp_entry = tmp_entry->vme_next; 6261 } else { 6262 vm_map_version_t version; 6263 vm_object_t dst_object = entry->object.vm_object; 6264 vm_object_offset_t dst_offset = entry->offset; 6265 kern_return_t r; 6266 6267 /* 6268 * Take an object reference, and record 6269 * the map version information so that the 6270 * map can be safely unlocked. 6271 */ 6272 6273 vm_object_reference(dst_object); 6274 6275 /* account for unlock bumping up timestamp */ 6276 version.main_timestamp = dst_map->timestamp + 1; 6277 6278 vm_map_unlock(dst_map); 6279 6280 /* 6281 * Copy as much as possible in one pass 6282 */ 6283 6284 copy_size = size; 6285 r = vm_fault_copy( 6286 copy_entry->object.vm_object, 6287 copy_entry->offset, 6288 ©_size, 6289 dst_object, 6290 dst_offset, 6291 dst_map, 6292 &version, 6293 THREAD_UNINT ); 6294 6295 /* 6296 * Release the object reference 6297 */ 6298 6299 vm_object_deallocate(dst_object); 6300 6301 /* 6302 * If a hard error occurred, return it now 6303 */ 6304 6305 if (r != KERN_SUCCESS) 6306 return(r); 6307 6308 if (copy_size != 0) { 6309 /* 6310 * Dispose of the copied region 6311 */ 6312 6313 vm_map_copy_clip_end(copy, copy_entry, 6314 copy_entry->vme_start + copy_size); 6315 vm_map_copy_entry_unlink(copy, copy_entry); 6316 vm_object_deallocate(copy_entry->object.vm_object); 6317 vm_map_copy_entry_dispose(copy, copy_entry); 6318 } 6319 6320 /* 6321 * Pick up in the destination map where we left off. 6322 * 6323 * Use the version information to avoid a lookup 6324 * in the normal case. 6325 */ 6326 6327 start += copy_size; 6328 vm_map_lock(dst_map); 6329 if (version.main_timestamp == dst_map->timestamp) { 6330 /* We can safely use saved tmp_entry value */ 6331 6332 vm_map_clip_end(dst_map, tmp_entry, start); 6333 tmp_entry = tmp_entry->vme_next; 6334 } else { 6335 /* Must do lookup of tmp_entry */ 6336 6337 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) { 6338 vm_map_unlock(dst_map); 6339 return(KERN_INVALID_ADDRESS); 6340 } 6341 vm_map_clip_start(dst_map, tmp_entry, start); 6342 } 6343 } 6344 }/* while */ 6345 6346 return(KERN_SUCCESS); 6347}/* vm_map_copy_overwrite_aligned */ 6348 6349/* 6350 * Routine: vm_map_copyin_kernel_buffer [internal use only] 6351 * 6352 * Description: 6353 * Copy in data to a kernel buffer from space in the 6354 * source map. The original space may be optionally 6355 * deallocated. 6356 * 6357 * If successful, returns a new copy object. 6358 */ 6359static kern_return_t 6360vm_map_copyin_kernel_buffer( 6361 vm_map_t src_map, 6362 vm_map_offset_t src_addr, 6363 vm_map_size_t len, 6364 boolean_t src_destroy, 6365 vm_map_copy_t *copy_result) 6366{ 6367 kern_return_t kr; 6368 vm_map_copy_t copy; 6369 vm_map_size_t kalloc_size = sizeof(struct vm_map_copy) + len; 6370 6371 copy = (vm_map_copy_t) kalloc(kalloc_size); 6372 if (copy == VM_MAP_COPY_NULL) { 6373 return KERN_RESOURCE_SHORTAGE; 6374 } 6375 copy->type = VM_MAP_COPY_KERNEL_BUFFER; 6376 copy->size = len; 6377 copy->offset = 0; 6378 copy->cpy_kdata = (void *) (copy + 1); 6379 copy->cpy_kalloc_size = kalloc_size; 6380 6381 kr = copyinmap(src_map, src_addr, copy->cpy_kdata, len); 6382 if (kr != KERN_SUCCESS) { 6383 kfree(copy, kalloc_size); 6384 return kr; 6385 } 6386 if (src_destroy) { 6387 (void) vm_map_remove(src_map, vm_map_trunc_page(src_addr), 6388 vm_map_round_page(src_addr + len), 6389 VM_MAP_REMOVE_INTERRUPTIBLE | 6390 VM_MAP_REMOVE_WAIT_FOR_KWIRE | 6391 (src_map == kernel_map) ? 6392 VM_MAP_REMOVE_KUNWIRE : 0); 6393 } 6394 *copy_result = copy; 6395 return KERN_SUCCESS; 6396} 6397 6398/* 6399 * Routine: vm_map_copyout_kernel_buffer [internal use only] 6400 * 6401 * Description: 6402 * Copy out data from a kernel buffer into space in the 6403 * destination map. The space may be otpionally dynamically 6404 * allocated. 6405 * 6406 * If successful, consumes the copy object. 6407 * Otherwise, the caller is responsible for it. 6408 */ 6409static int vm_map_copyout_kernel_buffer_failures = 0; 6410static kern_return_t 6411vm_map_copyout_kernel_buffer( 6412 vm_map_t map, 6413 vm_map_address_t *addr, /* IN/OUT */ 6414 vm_map_copy_t copy, 6415 boolean_t overwrite) 6416{ 6417 kern_return_t kr = KERN_SUCCESS; 6418 thread_t thread = current_thread(); 6419 6420 if (!overwrite) { 6421 6422 /* 6423 * Allocate space in the target map for the data 6424 */ 6425 *addr = 0; 6426 kr = vm_map_enter(map, 6427 addr, 6428 vm_map_round_page(copy->size), 6429 (vm_map_offset_t) 0, 6430 VM_FLAGS_ANYWHERE, 6431 VM_OBJECT_NULL, 6432 (vm_object_offset_t) 0, 6433 FALSE, 6434 VM_PROT_DEFAULT, 6435 VM_PROT_ALL, 6436 VM_INHERIT_DEFAULT); 6437 if (kr != KERN_SUCCESS) 6438 return kr; 6439 } 6440 6441 /* 6442 * Copyout the data from the kernel buffer to the target map. 6443 */ 6444 if (thread->map == map) { 6445 6446 /* 6447 * If the target map is the current map, just do 6448 * the copy. 6449 */ 6450 if (copyout(copy->cpy_kdata, *addr, copy->size)) { 6451 kr = KERN_INVALID_ADDRESS; 6452 } 6453 } 6454 else { 6455 vm_map_t oldmap; 6456 6457 /* 6458 * If the target map is another map, assume the 6459 * target's address space identity for the duration 6460 * of the copy. 6461 */ 6462 vm_map_reference(map); 6463 oldmap = vm_map_switch(map); 6464 6465 if (copyout(copy->cpy_kdata, *addr, copy->size)) { 6466 vm_map_copyout_kernel_buffer_failures++; 6467 kr = KERN_INVALID_ADDRESS; 6468 } 6469 6470 (void) vm_map_switch(oldmap); 6471 vm_map_deallocate(map); 6472 } 6473 6474 if (kr != KERN_SUCCESS) { 6475 /* the copy failed, clean up */ 6476 if (!overwrite) { 6477 /* 6478 * Deallocate the space we allocated in the target map. 6479 */ 6480 (void) vm_map_remove(map, 6481 vm_map_trunc_page(*addr), 6482 vm_map_round_page(*addr + 6483 vm_map_round_page(copy->size)), 6484 VM_MAP_NO_FLAGS); 6485 *addr = 0; 6486 } 6487 } else { 6488 /* copy was successful, dicard the copy structure */ 6489 kfree(copy, copy->cpy_kalloc_size); 6490 } 6491 6492 return kr; 6493} 6494 6495/* 6496 * Macro: vm_map_copy_insert 6497 * 6498 * Description: 6499 * Link a copy chain ("copy") into a map at the 6500 * specified location (after "where"). 6501 * Side effects: 6502 * The copy chain is destroyed. 6503 * Warning: 6504 * The arguments are evaluated multiple times. 6505 */ 6506#define vm_map_copy_insert(map, where, copy) \ 6507MACRO_BEGIN \ 6508 vm_map_t VMCI_map; \ 6509 vm_map_entry_t VMCI_where; \ 6510 vm_map_copy_t VMCI_copy; \ 6511 VMCI_map = (map); \ 6512 VMCI_where = (where); \ 6513 VMCI_copy = (copy); \ 6514 ((VMCI_where->vme_next)->vme_prev = vm_map_copy_last_entry(VMCI_copy))\ 6515 ->vme_next = (VMCI_where->vme_next); \ 6516 ((VMCI_where)->vme_next = vm_map_copy_first_entry(VMCI_copy)) \ 6517 ->vme_prev = VMCI_where; \ 6518 VMCI_map->hdr.nentries += VMCI_copy->cpy_hdr.nentries; \ 6519 UPDATE_FIRST_FREE(VMCI_map, VMCI_map->first_free); \ 6520 zfree(vm_map_copy_zone, VMCI_copy); \ 6521MACRO_END 6522 6523/* 6524 * Routine: vm_map_copyout 6525 * 6526 * Description: 6527 * Copy out a copy chain ("copy") into newly-allocated 6528 * space in the destination map. 6529 * 6530 * If successful, consumes the copy object. 6531 * Otherwise, the caller is responsible for it. 6532 */ 6533kern_return_t 6534vm_map_copyout( 6535 vm_map_t dst_map, 6536 vm_map_address_t *dst_addr, /* OUT */ 6537 vm_map_copy_t copy) 6538{ 6539 vm_map_size_t size; 6540 vm_map_size_t adjustment; 6541 vm_map_offset_t start; 6542 vm_object_offset_t vm_copy_start; 6543 vm_map_entry_t last; 6544 register 6545 vm_map_entry_t entry; 6546 6547 /* 6548 * Check for null copy object. 6549 */ 6550 6551 if (copy == VM_MAP_COPY_NULL) { 6552 *dst_addr = 0; 6553 return(KERN_SUCCESS); 6554 } 6555 6556 /* 6557 * Check for special copy object, created 6558 * by vm_map_copyin_object. 6559 */ 6560 6561 if (copy->type == VM_MAP_COPY_OBJECT) { 6562 vm_object_t object = copy->cpy_object; 6563 kern_return_t kr; 6564 vm_object_offset_t offset; 6565 6566 offset = vm_object_trunc_page(copy->offset); 6567 size = vm_map_round_page(copy->size + 6568 (vm_map_size_t)(copy->offset - offset)); 6569 *dst_addr = 0; 6570 kr = vm_map_enter(dst_map, dst_addr, size, 6571 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, 6572 object, offset, FALSE, 6573 VM_PROT_DEFAULT, VM_PROT_ALL, 6574 VM_INHERIT_DEFAULT); 6575 if (kr != KERN_SUCCESS) 6576 return(kr); 6577 /* Account for non-pagealigned copy object */ 6578 *dst_addr += (vm_map_offset_t)(copy->offset - offset); 6579 zfree(vm_map_copy_zone, copy); 6580 return(KERN_SUCCESS); 6581 } 6582 6583 /* 6584 * Check for special kernel buffer allocated 6585 * by new_ipc_kmsg_copyin. 6586 */ 6587 6588 if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) { 6589 return(vm_map_copyout_kernel_buffer(dst_map, dst_addr, 6590 copy, FALSE)); 6591 } 6592 6593 /* 6594 * Find space for the data 6595 */ 6596 6597 vm_copy_start = vm_object_trunc_page(copy->offset); 6598 size = vm_map_round_page((vm_map_size_t)copy->offset + copy->size) 6599 - vm_copy_start; 6600 6601StartAgain: ; 6602 6603 vm_map_lock(dst_map); 6604 assert(first_free_is_valid(dst_map)); 6605 start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ? 6606 vm_map_min(dst_map) : last->vme_end; 6607 6608 while (TRUE) { 6609 vm_map_entry_t next = last->vme_next; 6610 vm_map_offset_t end = start + size; 6611 6612 if ((end > dst_map->max_offset) || (end < start)) { 6613 if (dst_map->wait_for_space) { 6614 if (size <= (dst_map->max_offset - dst_map->min_offset)) { 6615 assert_wait((event_t) dst_map, 6616 THREAD_INTERRUPTIBLE); 6617 vm_map_unlock(dst_map); 6618 thread_block(THREAD_CONTINUE_NULL); 6619 goto StartAgain; 6620 } 6621 } 6622 vm_map_unlock(dst_map); 6623 return(KERN_NO_SPACE); 6624 } 6625 6626 if ((next == vm_map_to_entry(dst_map)) || 6627 (next->vme_start >= end)) 6628 break; 6629 6630 last = next; 6631 start = last->vme_end; 6632 } 6633 6634 /* 6635 * Since we're going to just drop the map 6636 * entries from the copy into the destination 6637 * map, they must come from the same pool. 6638 */ 6639 6640 if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) { 6641 /* 6642 * Mismatches occur when dealing with the default 6643 * pager. 6644 */ 6645 zone_t old_zone; 6646 vm_map_entry_t next, new; 6647 6648 /* 6649 * Find the zone that the copies were allocated from 6650 */ 6651 old_zone = (copy->cpy_hdr.entries_pageable) 6652 ? vm_map_entry_zone 6653 : vm_map_kentry_zone; 6654 entry = vm_map_copy_first_entry(copy); 6655 6656 /* 6657 * Reinitialize the copy so that vm_map_copy_entry_link 6658 * will work. 6659 */ 6660 copy->cpy_hdr.nentries = 0; 6661 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable; 6662 vm_map_copy_first_entry(copy) = 6663 vm_map_copy_last_entry(copy) = 6664 vm_map_copy_to_entry(copy); 6665 6666 /* 6667 * Copy each entry. 6668 */ 6669 while (entry != vm_map_copy_to_entry(copy)) { 6670 new = vm_map_copy_entry_create(copy); 6671 vm_map_entry_copy_full(new, entry); 6672 new->use_pmap = FALSE; /* clr address space specifics */ 6673 vm_map_copy_entry_link(copy, 6674 vm_map_copy_last_entry(copy), 6675 new); 6676 next = entry->vme_next; 6677 zfree(old_zone, entry); 6678 entry = next; 6679 } 6680 } 6681 6682 /* 6683 * Adjust the addresses in the copy chain, and 6684 * reset the region attributes. 6685 */ 6686 6687 adjustment = start - vm_copy_start; 6688 for (entry = vm_map_copy_first_entry(copy); 6689 entry != vm_map_copy_to_entry(copy); 6690 entry = entry->vme_next) { 6691 entry->vme_start += adjustment; 6692 entry->vme_end += adjustment; 6693 6694 entry->inheritance = VM_INHERIT_DEFAULT; 6695 entry->protection = VM_PROT_DEFAULT; 6696 entry->max_protection = VM_PROT_ALL; 6697 entry->behavior = VM_BEHAVIOR_DEFAULT; 6698 6699 /* 6700 * If the entry is now wired, 6701 * map the pages into the destination map. 6702 */ 6703 if (entry->wired_count != 0) { 6704 register vm_map_offset_t va; 6705 vm_object_offset_t offset; 6706 register vm_object_t object; 6707 vm_prot_t prot; 6708 int type_of_fault; 6709 6710 object = entry->object.vm_object; 6711 offset = entry->offset; 6712 va = entry->vme_start; 6713 6714 pmap_pageable(dst_map->pmap, 6715 entry->vme_start, 6716 entry->vme_end, 6717 TRUE); 6718 6719 while (va < entry->vme_end) { 6720 register vm_page_t m; 6721 6722 /* 6723 * Look up the page in the object. 6724 * Assert that the page will be found in the 6725 * top object: 6726 * either 6727 * the object was newly created by 6728 * vm_object_copy_slowly, and has 6729 * copies of all of the pages from 6730 * the source object 6731 * or 6732 * the object was moved from the old 6733 * map entry; because the old map 6734 * entry was wired, all of the pages 6735 * were in the top-level object. 6736 * (XXX not true if we wire pages for 6737 * reading) 6738 */ 6739 vm_object_lock(object); 6740 6741 m = vm_page_lookup(object, offset); 6742 if (m == VM_PAGE_NULL || m->wire_count == 0 || 6743 m->absent) 6744 panic("vm_map_copyout: wiring %p", m); 6745 6746 /* 6747 * ENCRYPTED SWAP: 6748 * The page is assumed to be wired here, so it 6749 * shouldn't be encrypted. Otherwise, we 6750 * couldn't enter it in the page table, since 6751 * we don't want the user to see the encrypted 6752 * data. 6753 */ 6754 ASSERT_PAGE_DECRYPTED(m); 6755 6756 prot = entry->protection; 6757 6758 if (override_nx(dst_map, entry->alias) && prot) 6759 prot |= VM_PROT_EXECUTE; 6760 6761 type_of_fault = DBG_CACHE_HIT_FAULT; 6762 6763 vm_fault_enter(m, dst_map->pmap, va, prot, 6764 m->wire_count != 0, FALSE, FALSE, 6765 &type_of_fault); 6766 6767 vm_object_unlock(object); 6768 6769 offset += PAGE_SIZE_64; 6770 va += PAGE_SIZE; 6771 } 6772 } 6773 } 6774 6775 /* 6776 * Correct the page alignment for the result 6777 */ 6778 6779 *dst_addr = start + (copy->offset - vm_copy_start); 6780 6781 /* 6782 * Update the hints and the map size 6783 */ 6784 6785 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy)); 6786 6787 dst_map->size += size; 6788 6789 /* 6790 * Link in the copy 6791 */ 6792 6793 vm_map_copy_insert(dst_map, last, copy); 6794 6795 vm_map_unlock(dst_map); 6796 6797 /* 6798 * XXX If wiring_required, call vm_map_pageable 6799 */ 6800 6801 return(KERN_SUCCESS); 6802} 6803 6804/* 6805 * Routine: vm_map_copyin 6806 * 6807 * Description: 6808 * see vm_map_copyin_common. Exported via Unsupported.exports. 6809 * 6810 */ 6811 6812#undef vm_map_copyin 6813 6814kern_return_t 6815vm_map_copyin( 6816 vm_map_t src_map, 6817 vm_map_address_t src_addr, 6818 vm_map_size_t len, 6819 boolean_t src_destroy, 6820 vm_map_copy_t *copy_result) /* OUT */ 6821{ 6822 return(vm_map_copyin_common(src_map, src_addr, len, src_destroy, 6823 FALSE, copy_result, FALSE)); 6824} 6825 6826/* 6827 * Routine: vm_map_copyin_common 6828 * 6829 * Description: 6830 * Copy the specified region (src_addr, len) from the 6831 * source address space (src_map), possibly removing 6832 * the region from the source address space (src_destroy). 6833 * 6834 * Returns: 6835 * A vm_map_copy_t object (copy_result), suitable for 6836 * insertion into another address space (using vm_map_copyout), 6837 * copying over another address space region (using 6838 * vm_map_copy_overwrite). If the copy is unused, it 6839 * should be destroyed (using vm_map_copy_discard). 6840 * 6841 * In/out conditions: 6842 * The source map should not be locked on entry. 6843 */ 6844 6845typedef struct submap_map { 6846 vm_map_t parent_map; 6847 vm_map_offset_t base_start; 6848 vm_map_offset_t base_end; 6849 vm_map_size_t base_len; 6850 struct submap_map *next; 6851} submap_map_t; 6852 6853kern_return_t 6854vm_map_copyin_common( 6855 vm_map_t src_map, 6856 vm_map_address_t src_addr, 6857 vm_map_size_t len, 6858 boolean_t src_destroy, 6859 __unused boolean_t src_volatile, 6860 vm_map_copy_t *copy_result, /* OUT */ 6861 boolean_t use_maxprot) 6862{ 6863 vm_map_entry_t tmp_entry; /* Result of last map lookup -- 6864 * in multi-level lookup, this 6865 * entry contains the actual 6866 * vm_object/offset. 6867 */ 6868 register 6869 vm_map_entry_t new_entry = VM_MAP_ENTRY_NULL; /* Map entry for copy */ 6870 6871 vm_map_offset_t src_start; /* Start of current entry -- 6872 * where copy is taking place now 6873 */ 6874 vm_map_offset_t src_end; /* End of entire region to be 6875 * copied */ 6876 vm_map_offset_t src_base; 6877 vm_map_t base_map = src_map; 6878 boolean_t map_share=FALSE; 6879 submap_map_t *parent_maps = NULL; 6880 6881 register 6882 vm_map_copy_t copy; /* Resulting copy */ 6883 vm_map_address_t copy_addr; 6884 6885 /* 6886 * Check for copies of zero bytes. 6887 */ 6888 6889 if (len == 0) { 6890 *copy_result = VM_MAP_COPY_NULL; 6891 return(KERN_SUCCESS); 6892 } 6893 6894 /* 6895 * Check that the end address doesn't overflow 6896 */ 6897 src_end = src_addr + len; 6898 if (src_end < src_addr) 6899 return KERN_INVALID_ADDRESS; 6900 6901 /* 6902 * If the copy is sufficiently small, use a kernel buffer instead 6903 * of making a virtual copy. The theory being that the cost of 6904 * setting up VM (and taking C-O-W faults) dominates the copy costs 6905 * for small regions. 6906 */ 6907 if ((len < msg_ool_size_small) && !use_maxprot) 6908 return vm_map_copyin_kernel_buffer(src_map, src_addr, len, 6909 src_destroy, copy_result); 6910 6911 /* 6912 * Compute (page aligned) start and end of region 6913 */ 6914 src_start = vm_map_trunc_page(src_addr); 6915 src_end = vm_map_round_page(src_end); 6916 6917 XPR(XPR_VM_MAP, "vm_map_copyin_common map 0x%x addr 0x%x len 0x%x dest %d\n", (natural_t)src_map, src_addr, len, src_destroy, 0); 6918 6919 /* 6920 * Allocate a header element for the list. 6921 * 6922 * Use the start and end in the header to 6923 * remember the endpoints prior to rounding. 6924 */ 6925 6926 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 6927 vm_map_copy_first_entry(copy) = 6928 vm_map_copy_last_entry(copy) = vm_map_copy_to_entry(copy); 6929 copy->type = VM_MAP_COPY_ENTRY_LIST; 6930 copy->cpy_hdr.nentries = 0; 6931 copy->cpy_hdr.entries_pageable = TRUE; 6932 6933 copy->offset = src_addr; 6934 copy->size = len; 6935 6936 new_entry = vm_map_copy_entry_create(copy); 6937 6938#define RETURN(x) \ 6939 MACRO_BEGIN \ 6940 vm_map_unlock(src_map); \ 6941 if(src_map != base_map) \ 6942 vm_map_deallocate(src_map); \ 6943 if (new_entry != VM_MAP_ENTRY_NULL) \ 6944 vm_map_copy_entry_dispose(copy,new_entry); \ 6945 vm_map_copy_discard(copy); \ 6946 { \ 6947 submap_map_t *_ptr; \ 6948 \ 6949 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \ 6950 parent_maps=parent_maps->next; \ 6951 if (_ptr->parent_map != base_map) \ 6952 vm_map_deallocate(_ptr->parent_map); \ 6953 kfree(_ptr, sizeof(submap_map_t)); \ 6954 } \ 6955 } \ 6956 MACRO_RETURN(x); \ 6957 MACRO_END 6958 6959 /* 6960 * Find the beginning of the region. 6961 */ 6962 6963 vm_map_lock(src_map); 6964 6965 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) 6966 RETURN(KERN_INVALID_ADDRESS); 6967 if(!tmp_entry->is_sub_map) { 6968 vm_map_clip_start(src_map, tmp_entry, src_start); 6969 } 6970 /* set for later submap fix-up */ 6971 copy_addr = src_start; 6972 6973 /* 6974 * Go through entries until we get to the end. 6975 */ 6976 6977 while (TRUE) { 6978 register 6979 vm_map_entry_t src_entry = tmp_entry; /* Top-level entry */ 6980 vm_map_size_t src_size; /* Size of source 6981 * map entry (in both 6982 * maps) 6983 */ 6984 6985 register 6986 vm_object_t src_object; /* Object to copy */ 6987 vm_object_offset_t src_offset; 6988 6989 boolean_t src_needs_copy; /* Should source map 6990 * be made read-only 6991 * for copy-on-write? 6992 */ 6993 6994 boolean_t new_entry_needs_copy; /* Will new entry be COW? */ 6995 6996 boolean_t was_wired; /* Was source wired? */ 6997 vm_map_version_t version; /* Version before locks 6998 * dropped to make copy 6999 */ 7000 kern_return_t result; /* Return value from 7001 * copy_strategically. 7002 */ 7003 while(tmp_entry->is_sub_map) { 7004 vm_map_size_t submap_len; 7005 submap_map_t *ptr; 7006 7007 ptr = (submap_map_t *)kalloc(sizeof(submap_map_t)); 7008 ptr->next = parent_maps; 7009 parent_maps = ptr; 7010 ptr->parent_map = src_map; 7011 ptr->base_start = src_start; 7012 ptr->base_end = src_end; 7013 submap_len = tmp_entry->vme_end - src_start; 7014 if(submap_len > (src_end-src_start)) 7015 submap_len = src_end-src_start; 7016 ptr->base_len = submap_len; 7017 7018 src_start -= tmp_entry->vme_start; 7019 src_start += tmp_entry->offset; 7020 src_end = src_start + submap_len; 7021 src_map = tmp_entry->object.sub_map; 7022 vm_map_lock(src_map); 7023 /* keep an outstanding reference for all maps in */ 7024 /* the parents tree except the base map */ 7025 vm_map_reference(src_map); 7026 vm_map_unlock(ptr->parent_map); 7027 if (!vm_map_lookup_entry( 7028 src_map, src_start, &tmp_entry)) 7029 RETURN(KERN_INVALID_ADDRESS); 7030 map_share = TRUE; 7031 if(!tmp_entry->is_sub_map) 7032 vm_map_clip_start(src_map, tmp_entry, src_start); 7033 src_entry = tmp_entry; 7034 } 7035 /* we are now in the lowest level submap... */ 7036 7037 if ((tmp_entry->object.vm_object != VM_OBJECT_NULL) && 7038 (tmp_entry->object.vm_object->phys_contiguous)) { 7039 /* This is not, supported for now.In future */ 7040 /* we will need to detect the phys_contig */ 7041 /* condition and then upgrade copy_slowly */ 7042 /* to do physical copy from the device mem */ 7043 /* based object. We can piggy-back off of */ 7044 /* the was wired boolean to set-up the */ 7045 /* proper handling */ 7046 RETURN(KERN_PROTECTION_FAILURE); 7047 } 7048 /* 7049 * Create a new address map entry to hold the result. 7050 * Fill in the fields from the appropriate source entries. 7051 * We must unlock the source map to do this if we need 7052 * to allocate a map entry. 7053 */ 7054 if (new_entry == VM_MAP_ENTRY_NULL) { 7055 version.main_timestamp = src_map->timestamp; 7056 vm_map_unlock(src_map); 7057 7058 new_entry = vm_map_copy_entry_create(copy); 7059 7060 vm_map_lock(src_map); 7061 if ((version.main_timestamp + 1) != src_map->timestamp) { 7062 if (!vm_map_lookup_entry(src_map, src_start, 7063 &tmp_entry)) { 7064 RETURN(KERN_INVALID_ADDRESS); 7065 } 7066 if (!tmp_entry->is_sub_map) 7067 vm_map_clip_start(src_map, tmp_entry, src_start); 7068 continue; /* restart w/ new tmp_entry */ 7069 } 7070 } 7071 7072 /* 7073 * Verify that the region can be read. 7074 */ 7075 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE && 7076 !use_maxprot) || 7077 (src_entry->max_protection & VM_PROT_READ) == 0) 7078 RETURN(KERN_PROTECTION_FAILURE); 7079 7080 /* 7081 * Clip against the endpoints of the entire region. 7082 */ 7083 7084 vm_map_clip_end(src_map, src_entry, src_end); 7085 7086 src_size = src_entry->vme_end - src_start; 7087 src_object = src_entry->object.vm_object; 7088 src_offset = src_entry->offset; 7089 was_wired = (src_entry->wired_count != 0); 7090 7091 vm_map_entry_copy(new_entry, src_entry); 7092 new_entry->use_pmap = FALSE; /* clr address space specifics */ 7093 7094 /* 7095 * Attempt non-blocking copy-on-write optimizations. 7096 */ 7097 7098 if (src_destroy && 7099 (src_object == VM_OBJECT_NULL || 7100 (src_object->internal && !src_object->true_share 7101 && !map_share))) { 7102 /* 7103 * If we are destroying the source, and the object 7104 * is internal, we can move the object reference 7105 * from the source to the copy. The copy is 7106 * copy-on-write only if the source is. 7107 * We make another reference to the object, because 7108 * destroying the source entry will deallocate it. 7109 */ 7110 vm_object_reference(src_object); 7111 7112 /* 7113 * Copy is always unwired. vm_map_copy_entry 7114 * set its wired count to zero. 7115 */ 7116 7117 goto CopySuccessful; 7118 } 7119 7120 7121 RestartCopy: 7122 XPR(XPR_VM_MAP, "vm_map_copyin_common src_obj 0x%x ent 0x%x obj 0x%x was_wired %d\n", 7123 src_object, new_entry, new_entry->object.vm_object, 7124 was_wired, 0); 7125 if ((src_object == VM_OBJECT_NULL || 7126 (!was_wired && !map_share && !tmp_entry->is_shared)) && 7127 vm_object_copy_quickly( 7128 &new_entry->object.vm_object, 7129 src_offset, 7130 src_size, 7131 &src_needs_copy, 7132 &new_entry_needs_copy)) { 7133 7134 new_entry->needs_copy = new_entry_needs_copy; 7135 7136 /* 7137 * Handle copy-on-write obligations 7138 */ 7139 7140 if (src_needs_copy && !tmp_entry->needs_copy) { 7141 vm_prot_t prot; 7142 7143 prot = src_entry->protection & ~VM_PROT_WRITE; 7144 7145 if (override_nx(src_map, src_entry->alias) && prot) 7146 prot |= VM_PROT_EXECUTE; 7147 7148 vm_object_pmap_protect( 7149 src_object, 7150 src_offset, 7151 src_size, 7152 (src_entry->is_shared ? 7153 PMAP_NULL 7154 : src_map->pmap), 7155 src_entry->vme_start, 7156 prot); 7157 7158 tmp_entry->needs_copy = TRUE; 7159 } 7160 7161 /* 7162 * The map has never been unlocked, so it's safe 7163 * to move to the next entry rather than doing 7164 * another lookup. 7165 */ 7166 7167 goto CopySuccessful; 7168 } 7169 7170 /* 7171 * Take an object reference, so that we may 7172 * release the map lock(s). 7173 */ 7174 7175 assert(src_object != VM_OBJECT_NULL); 7176 vm_object_reference(src_object); 7177 7178 /* 7179 * Record the timestamp for later verification. 7180 * Unlock the map. 7181 */ 7182 7183 version.main_timestamp = src_map->timestamp; 7184 vm_map_unlock(src_map); /* Increments timestamp once! */ 7185 7186 /* 7187 * Perform the copy 7188 */ 7189 7190 if (was_wired) { 7191 CopySlowly: 7192 vm_object_lock(src_object); 7193 result = vm_object_copy_slowly( 7194 src_object, 7195 src_offset, 7196 src_size, 7197 THREAD_UNINT, 7198 &new_entry->object.vm_object); 7199 new_entry->offset = 0; 7200 new_entry->needs_copy = FALSE; 7201 7202 } 7203 else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC && 7204 (tmp_entry->is_shared || map_share)) { 7205 vm_object_t new_object; 7206 7207 vm_object_lock_shared(src_object); 7208 new_object = vm_object_copy_delayed( 7209 src_object, 7210 src_offset, 7211 src_size, 7212 TRUE); 7213 if (new_object == VM_OBJECT_NULL) 7214 goto CopySlowly; 7215 7216 new_entry->object.vm_object = new_object; 7217 new_entry->needs_copy = TRUE; 7218 result = KERN_SUCCESS; 7219 7220 } else { 7221 result = vm_object_copy_strategically(src_object, 7222 src_offset, 7223 src_size, 7224 &new_entry->object.vm_object, 7225 &new_entry->offset, 7226 &new_entry_needs_copy); 7227 7228 new_entry->needs_copy = new_entry_needs_copy; 7229 } 7230 7231 if (result != KERN_SUCCESS && 7232 result != KERN_MEMORY_RESTART_COPY) { 7233 vm_map_lock(src_map); 7234 RETURN(result); 7235 } 7236 7237 /* 7238 * Throw away the extra reference 7239 */ 7240 7241 vm_object_deallocate(src_object); 7242 7243 /* 7244 * Verify that the map has not substantially 7245 * changed while the copy was being made. 7246 */ 7247 7248 vm_map_lock(src_map); 7249 7250 if ((version.main_timestamp + 1) == src_map->timestamp) 7251 goto VerificationSuccessful; 7252 7253 /* 7254 * Simple version comparison failed. 7255 * 7256 * Retry the lookup and verify that the 7257 * same object/offset are still present. 7258 * 7259 * [Note: a memory manager that colludes with 7260 * the calling task can detect that we have 7261 * cheated. While the map was unlocked, the 7262 * mapping could have been changed and restored.] 7263 */ 7264 7265 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) { 7266 RETURN(KERN_INVALID_ADDRESS); 7267 } 7268 7269 src_entry = tmp_entry; 7270 vm_map_clip_start(src_map, src_entry, src_start); 7271 7272 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) && 7273 !use_maxprot) || 7274 ((src_entry->max_protection & VM_PROT_READ) == 0)) 7275 goto VerificationFailed; 7276 7277 if (src_entry->vme_end < new_entry->vme_end) 7278 src_size = (new_entry->vme_end = src_entry->vme_end) - src_start; 7279 7280 if ((src_entry->object.vm_object != src_object) || 7281 (src_entry->offset != src_offset) ) { 7282 7283 /* 7284 * Verification failed. 7285 * 7286 * Start over with this top-level entry. 7287 */ 7288 7289 VerificationFailed: ; 7290 7291 vm_object_deallocate(new_entry->object.vm_object); 7292 tmp_entry = src_entry; 7293 continue; 7294 } 7295 7296 /* 7297 * Verification succeeded. 7298 */ 7299 7300 VerificationSuccessful: ; 7301 7302 if (result == KERN_MEMORY_RESTART_COPY) 7303 goto RestartCopy; 7304 7305 /* 7306 * Copy succeeded. 7307 */ 7308 7309 CopySuccessful: ; 7310 7311 /* 7312 * Link in the new copy entry. 7313 */ 7314 7315 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), 7316 new_entry); 7317 7318 /* 7319 * Determine whether the entire region 7320 * has been copied. 7321 */ 7322 src_base = src_start; 7323 src_start = new_entry->vme_end; 7324 new_entry = VM_MAP_ENTRY_NULL; 7325 while ((src_start >= src_end) && (src_end != 0)) { 7326 if (src_map != base_map) { 7327 submap_map_t *ptr; 7328 7329 ptr = parent_maps; 7330 assert(ptr != NULL); 7331 parent_maps = parent_maps->next; 7332 7333 /* fix up the damage we did in that submap */ 7334 vm_map_simplify_range(src_map, 7335 src_base, 7336 src_end); 7337 7338 vm_map_unlock(src_map); 7339 vm_map_deallocate(src_map); 7340 vm_map_lock(ptr->parent_map); 7341 src_map = ptr->parent_map; 7342 src_base = ptr->base_start; 7343 src_start = ptr->base_start + ptr->base_len; 7344 src_end = ptr->base_end; 7345 if ((src_end > src_start) && 7346 !vm_map_lookup_entry( 7347 src_map, src_start, &tmp_entry)) 7348 RETURN(KERN_INVALID_ADDRESS); 7349 kfree(ptr, sizeof(submap_map_t)); 7350 if(parent_maps == NULL) 7351 map_share = FALSE; 7352 src_entry = tmp_entry->vme_prev; 7353 } else 7354 break; 7355 } 7356 if ((src_start >= src_end) && (src_end != 0)) 7357 break; 7358 7359 /* 7360 * Verify that there are no gaps in the region 7361 */ 7362 7363 tmp_entry = src_entry->vme_next; 7364 if ((tmp_entry->vme_start != src_start) || 7365 (tmp_entry == vm_map_to_entry(src_map))) 7366 RETURN(KERN_INVALID_ADDRESS); 7367 } 7368 7369 /* 7370 * If the source should be destroyed, do it now, since the 7371 * copy was successful. 7372 */ 7373 if (src_destroy) { 7374 (void) vm_map_delete(src_map, 7375 vm_map_trunc_page(src_addr), 7376 src_end, 7377 (src_map == kernel_map) ? 7378 VM_MAP_REMOVE_KUNWIRE : 7379 VM_MAP_NO_FLAGS, 7380 VM_MAP_NULL); 7381 } else { 7382 /* fix up the damage we did in the base map */ 7383 vm_map_simplify_range(src_map, 7384 vm_map_trunc_page(src_addr), 7385 vm_map_round_page(src_end)); 7386 } 7387 7388 vm_map_unlock(src_map); 7389 7390 /* Fix-up start and end points in copy. This is necessary */ 7391 /* when the various entries in the copy object were picked */ 7392 /* up from different sub-maps */ 7393 7394 tmp_entry = vm_map_copy_first_entry(copy); 7395 while (tmp_entry != vm_map_copy_to_entry(copy)) { 7396 tmp_entry->vme_end = copy_addr + 7397 (tmp_entry->vme_end - tmp_entry->vme_start); 7398 tmp_entry->vme_start = copy_addr; 7399 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start; 7400 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next; 7401 } 7402 7403 *copy_result = copy; 7404 return(KERN_SUCCESS); 7405 7406#undef RETURN 7407} 7408 7409/* 7410 * vm_map_copyin_object: 7411 * 7412 * Create a copy object from an object. 7413 * Our caller donates an object reference. 7414 */ 7415 7416kern_return_t 7417vm_map_copyin_object( 7418 vm_object_t object, 7419 vm_object_offset_t offset, /* offset of region in object */ 7420 vm_object_size_t size, /* size of region in object */ 7421 vm_map_copy_t *copy_result) /* OUT */ 7422{ 7423 vm_map_copy_t copy; /* Resulting copy */ 7424 7425 /* 7426 * We drop the object into a special copy object 7427 * that contains the object directly. 7428 */ 7429 7430 copy = (vm_map_copy_t) zalloc(vm_map_copy_zone); 7431 copy->type = VM_MAP_COPY_OBJECT; 7432 copy->cpy_object = object; 7433 copy->offset = offset; 7434 copy->size = size; 7435 7436 *copy_result = copy; 7437 return(KERN_SUCCESS); 7438} 7439 7440static void 7441vm_map_fork_share( 7442 vm_map_t old_map, 7443 vm_map_entry_t old_entry, 7444 vm_map_t new_map) 7445{ 7446 vm_object_t object; 7447 vm_map_entry_t new_entry; 7448 7449 /* 7450 * New sharing code. New map entry 7451 * references original object. Internal 7452 * objects use asynchronous copy algorithm for 7453 * future copies. First make sure we have 7454 * the right object. If we need a shadow, 7455 * or someone else already has one, then 7456 * make a new shadow and share it. 7457 */ 7458 7459 object = old_entry->object.vm_object; 7460 if (old_entry->is_sub_map) { 7461 assert(old_entry->wired_count == 0); 7462#ifndef NO_NESTED_PMAP 7463 if(old_entry->use_pmap) { 7464 kern_return_t result; 7465 7466 result = pmap_nest(new_map->pmap, 7467 (old_entry->object.sub_map)->pmap, 7468 (addr64_t)old_entry->vme_start, 7469 (addr64_t)old_entry->vme_start, 7470 (uint64_t)(old_entry->vme_end - old_entry->vme_start)); 7471 if(result) 7472 panic("vm_map_fork_share: pmap_nest failed!"); 7473 } 7474#endif /* NO_NESTED_PMAP */ 7475 } else if (object == VM_OBJECT_NULL) { 7476 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end - 7477 old_entry->vme_start)); 7478 old_entry->offset = 0; 7479 old_entry->object.vm_object = object; 7480 assert(!old_entry->needs_copy); 7481 } else if (object->copy_strategy != 7482 MEMORY_OBJECT_COPY_SYMMETRIC) { 7483 7484 /* 7485 * We are already using an asymmetric 7486 * copy, and therefore we already have 7487 * the right object. 7488 */ 7489 7490 assert(! old_entry->needs_copy); 7491 } 7492 else if (old_entry->needs_copy || /* case 1 */ 7493 object->shadowed || /* case 2 */ 7494 (!object->true_share && /* case 3 */ 7495 !old_entry->is_shared && 7496 (object->size > 7497 (vm_map_size_t)(old_entry->vme_end - 7498 old_entry->vme_start)))) { 7499 7500 /* 7501 * We need to create a shadow. 7502 * There are three cases here. 7503 * In the first case, we need to 7504 * complete a deferred symmetrical 7505 * copy that we participated in. 7506 * In the second and third cases, 7507 * we need to create the shadow so 7508 * that changes that we make to the 7509 * object do not interfere with 7510 * any symmetrical copies which 7511 * have occured (case 2) or which 7512 * might occur (case 3). 7513 * 7514 * The first case is when we had 7515 * deferred shadow object creation 7516 * via the entry->needs_copy mechanism. 7517 * This mechanism only works when 7518 * only one entry points to the source 7519 * object, and we are about to create 7520 * a second entry pointing to the 7521 * same object. The problem is that 7522 * there is no way of mapping from 7523 * an object to the entries pointing 7524 * to it. (Deferred shadow creation 7525 * works with one entry because occurs 7526 * at fault time, and we walk from the 7527 * entry to the object when handling 7528 * the fault.) 7529 * 7530 * The second case is when the object 7531 * to be shared has already been copied 7532 * with a symmetric copy, but we point 7533 * directly to the object without 7534 * needs_copy set in our entry. (This 7535 * can happen because different ranges 7536 * of an object can be pointed to by 7537 * different entries. In particular, 7538 * a single entry pointing to an object 7539 * can be split by a call to vm_inherit, 7540 * which, combined with task_create, can 7541 * result in the different entries 7542 * having different needs_copy values.) 7543 * The shadowed flag in the object allows 7544 * us to detect this case. The problem 7545 * with this case is that if this object 7546 * has or will have shadows, then we 7547 * must not perform an asymmetric copy 7548 * of this object, since such a copy 7549 * allows the object to be changed, which 7550 * will break the previous symmetrical 7551 * copies (which rely upon the object 7552 * not changing). In a sense, the shadowed 7553 * flag says "don't change this object". 7554 * We fix this by creating a shadow 7555 * object for this object, and sharing 7556 * that. This works because we are free 7557 * to change the shadow object (and thus 7558 * to use an asymmetric copy strategy); 7559 * this is also semantically correct, 7560 * since this object is temporary, and 7561 * therefore a copy of the object is 7562 * as good as the object itself. (This 7563 * is not true for permanent objects, 7564 * since the pager needs to see changes, 7565 * which won't happen if the changes 7566 * are made to a copy.) 7567 * 7568 * The third case is when the object 7569 * to be shared has parts sticking 7570 * outside of the entry we're working 7571 * with, and thus may in the future 7572 * be subject to a symmetrical copy. 7573 * (This is a preemptive version of 7574 * case 2.) 7575 */ 7576 7577 vm_object_shadow(&old_entry->object.vm_object, 7578 &old_entry->offset, 7579 (vm_map_size_t) (old_entry->vme_end - 7580 old_entry->vme_start)); 7581 7582 /* 7583 * If we're making a shadow for other than 7584 * copy on write reasons, then we have 7585 * to remove write permission. 7586 */ 7587 7588 if (!old_entry->needs_copy && 7589 (old_entry->protection & VM_PROT_WRITE)) { 7590 vm_prot_t prot; 7591 7592 prot = old_entry->protection & ~VM_PROT_WRITE; 7593 7594 if (override_nx(old_map, old_entry->alias) && prot) 7595 prot |= VM_PROT_EXECUTE; 7596 7597 if (old_map->mapped) { 7598 vm_object_pmap_protect( 7599 old_entry->object.vm_object, 7600 old_entry->offset, 7601 (old_entry->vme_end - 7602 old_entry->vme_start), 7603 PMAP_NULL, 7604 old_entry->vme_start, 7605 prot); 7606 } else { 7607 pmap_protect(old_map->pmap, 7608 old_entry->vme_start, 7609 old_entry->vme_end, 7610 prot); 7611 } 7612 } 7613 7614 old_entry->needs_copy = FALSE; 7615 object = old_entry->object.vm_object; 7616 } 7617 7618 /* 7619 * If object was using a symmetric copy strategy, 7620 * change its copy strategy to the default 7621 * asymmetric copy strategy, which is copy_delay 7622 * in the non-norma case and copy_call in the 7623 * norma case. Bump the reference count for the 7624 * new entry. 7625 */ 7626 7627 if(old_entry->is_sub_map) { 7628 vm_map_lock(old_entry->object.sub_map); 7629 vm_map_reference(old_entry->object.sub_map); 7630 vm_map_unlock(old_entry->object.sub_map); 7631 } else { 7632 vm_object_lock(object); 7633 vm_object_reference_locked(object); 7634 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) { 7635 object->copy_strategy = MEMORY_OBJECT_COPY_DELAY; 7636 } 7637 vm_object_unlock(object); 7638 } 7639 7640 /* 7641 * Clone the entry, using object ref from above. 7642 * Mark both entries as shared. 7643 */ 7644 7645 new_entry = vm_map_entry_create(new_map); 7646 vm_map_entry_copy(new_entry, old_entry); 7647 old_entry->is_shared = TRUE; 7648 new_entry->is_shared = TRUE; 7649 7650 /* 7651 * Insert the entry into the new map -- we 7652 * know we're inserting at the end of the new 7653 * map. 7654 */ 7655 7656 vm_map_entry_link(new_map, vm_map_last_entry(new_map), new_entry); 7657 7658 /* 7659 * Update the physical map 7660 */ 7661 7662 if (old_entry->is_sub_map) { 7663 /* Bill Angell pmap support goes here */ 7664 } else { 7665 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start, 7666 old_entry->vme_end - old_entry->vme_start, 7667 old_entry->vme_start); 7668 } 7669} 7670 7671static boolean_t 7672vm_map_fork_copy( 7673 vm_map_t old_map, 7674 vm_map_entry_t *old_entry_p, 7675 vm_map_t new_map) 7676{ 7677 vm_map_entry_t old_entry = *old_entry_p; 7678 vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start; 7679 vm_map_offset_t start = old_entry->vme_start; 7680 vm_map_copy_t copy; 7681 vm_map_entry_t last = vm_map_last_entry(new_map); 7682 7683 vm_map_unlock(old_map); 7684 /* 7685 * Use maxprot version of copyin because we 7686 * care about whether this memory can ever 7687 * be accessed, not just whether it's accessible 7688 * right now. 7689 */ 7690 if (vm_map_copyin_maxprot(old_map, start, entry_size, FALSE, ©) 7691 != KERN_SUCCESS) { 7692 /* 7693 * The map might have changed while it 7694 * was unlocked, check it again. Skip 7695 * any blank space or permanently 7696 * unreadable region. 7697 */ 7698 vm_map_lock(old_map); 7699 if (!vm_map_lookup_entry(old_map, start, &last) || 7700 (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) { 7701 last = last->vme_next; 7702 } 7703 *old_entry_p = last; 7704 7705 /* 7706 * XXX For some error returns, want to 7707 * XXX skip to the next element. Note 7708 * that INVALID_ADDRESS and 7709 * PROTECTION_FAILURE are handled above. 7710 */ 7711 7712 return FALSE; 7713 } 7714 7715 /* 7716 * Insert the copy into the new map 7717 */ 7718 7719 vm_map_copy_insert(new_map, last, copy); 7720 7721 /* 7722 * Pick up the traversal at the end of 7723 * the copied region. 7724 */ 7725 7726 vm_map_lock(old_map); 7727 start += entry_size; 7728 if (! vm_map_lookup_entry(old_map, start, &last)) { 7729 last = last->vme_next; 7730 } else { 7731 if (last->vme_start == start) { 7732 /* 7733 * No need to clip here and we don't 7734 * want to cause any unnecessary 7735 * unnesting... 7736 */ 7737 } else { 7738 vm_map_clip_start(old_map, last, start); 7739 } 7740 } 7741 *old_entry_p = last; 7742 7743 return TRUE; 7744} 7745 7746/* 7747 * vm_map_fork: 7748 * 7749 * Create and return a new map based on the old 7750 * map, according to the inheritance values on the 7751 * regions in that map. 7752 * 7753 * The source map must not be locked. 7754 */ 7755vm_map_t 7756vm_map_fork( 7757 vm_map_t old_map) 7758{ 7759 pmap_t new_pmap; 7760 vm_map_t new_map; 7761 vm_map_entry_t old_entry; 7762 vm_map_size_t new_size = 0, entry_size; 7763 vm_map_entry_t new_entry; 7764 boolean_t src_needs_copy; 7765 boolean_t new_entry_needs_copy; 7766 7767#ifdef __i386__ 7768 new_pmap = pmap_create((vm_map_size_t) 0, 7769 old_map->pmap->pm_task_map != TASK_MAP_32BIT); 7770 if (old_map->pmap->pm_task_map == TASK_MAP_64BIT_SHARED) 7771 pmap_set_4GB_pagezero(new_pmap); 7772#else 7773 new_pmap = pmap_create((vm_map_size_t) 0, 0); 7774#endif 7775 7776 vm_map_reference_swap(old_map); 7777 vm_map_lock(old_map); 7778 7779 new_map = vm_map_create(new_pmap, 7780 old_map->min_offset, 7781 old_map->max_offset, 7782 old_map->hdr.entries_pageable); 7783 7784 for ( 7785 old_entry = vm_map_first_entry(old_map); 7786 old_entry != vm_map_to_entry(old_map); 7787 ) { 7788 7789 entry_size = old_entry->vme_end - old_entry->vme_start; 7790 7791 switch (old_entry->inheritance) { 7792 case VM_INHERIT_NONE: 7793 break; 7794 7795 case VM_INHERIT_SHARE: 7796 vm_map_fork_share(old_map, old_entry, new_map); 7797 new_size += entry_size; 7798 break; 7799 7800 case VM_INHERIT_COPY: 7801 7802 /* 7803 * Inline the copy_quickly case; 7804 * upon failure, fall back on call 7805 * to vm_map_fork_copy. 7806 */ 7807 7808 if(old_entry->is_sub_map) 7809 break; 7810 if ((old_entry->wired_count != 0) || 7811 ((old_entry->object.vm_object != NULL) && 7812 (old_entry->object.vm_object->true_share))) { 7813 goto slow_vm_map_fork_copy; 7814 } 7815 7816 new_entry = vm_map_entry_create(new_map); 7817 vm_map_entry_copy(new_entry, old_entry); 7818 /* clear address space specifics */ 7819 new_entry->use_pmap = FALSE; 7820 7821 if (! vm_object_copy_quickly( 7822 &new_entry->object.vm_object, 7823 old_entry->offset, 7824 (old_entry->vme_end - 7825 old_entry->vme_start), 7826 &src_needs_copy, 7827 &new_entry_needs_copy)) { 7828 vm_map_entry_dispose(new_map, new_entry); 7829 goto slow_vm_map_fork_copy; 7830 } 7831 7832 /* 7833 * Handle copy-on-write obligations 7834 */ 7835 7836 if (src_needs_copy && !old_entry->needs_copy) { 7837 vm_prot_t prot; 7838 7839 prot = old_entry->protection & ~VM_PROT_WRITE; 7840 7841 if (override_nx(old_map, old_entry->alias) && prot) 7842 prot |= VM_PROT_EXECUTE; 7843 7844 vm_object_pmap_protect( 7845 old_entry->object.vm_object, 7846 old_entry->offset, 7847 (old_entry->vme_end - 7848 old_entry->vme_start), 7849 ((old_entry->is_shared 7850 || old_map->mapped) 7851 ? PMAP_NULL : 7852 old_map->pmap), 7853 old_entry->vme_start, 7854 prot); 7855 7856 old_entry->needs_copy = TRUE; 7857 } 7858 new_entry->needs_copy = new_entry_needs_copy; 7859 7860 /* 7861 * Insert the entry at the end 7862 * of the map. 7863 */ 7864 7865 vm_map_entry_link(new_map, vm_map_last_entry(new_map), 7866 new_entry); 7867 new_size += entry_size; 7868 break; 7869 7870 slow_vm_map_fork_copy: 7871 if (vm_map_fork_copy(old_map, &old_entry, new_map)) { 7872 new_size += entry_size; 7873 } 7874 continue; 7875 } 7876 old_entry = old_entry->vme_next; 7877 } 7878 7879 new_map->size = new_size; 7880 vm_map_unlock(old_map); 7881 vm_map_deallocate(old_map); 7882 7883 return(new_map); 7884} 7885 7886/* 7887 * vm_map_exec: 7888 * 7889 * Setup the "new_map" with the proper execution environment according 7890 * to the type of executable (platform, 64bit, chroot environment). 7891 * Map the comm page and shared region, etc... 7892 */ 7893kern_return_t 7894vm_map_exec( 7895 vm_map_t new_map, 7896 task_t task, 7897 void *fsroot, 7898 cpu_type_t cpu) 7899{ 7900 SHARED_REGION_TRACE_DEBUG( 7901 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): ->\n", 7902 current_task(), new_map, task, fsroot, cpu)); 7903 (void) vm_commpage_enter(new_map, task); 7904 (void) vm_shared_region_enter(new_map, task, fsroot, cpu); 7905 SHARED_REGION_TRACE_DEBUG( 7906 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x): <-\n", 7907 current_task(), new_map, task, fsroot, cpu)); 7908 return KERN_SUCCESS; 7909} 7910 7911/* 7912 * vm_map_lookup_locked: 7913 * 7914 * Finds the VM object, offset, and 7915 * protection for a given virtual address in the 7916 * specified map, assuming a page fault of the 7917 * type specified. 7918 * 7919 * Returns the (object, offset, protection) for 7920 * this address, whether it is wired down, and whether 7921 * this map has the only reference to the data in question. 7922 * In order to later verify this lookup, a "version" 7923 * is returned. 7924 * 7925 * The map MUST be locked by the caller and WILL be 7926 * locked on exit. In order to guarantee the 7927 * existence of the returned object, it is returned 7928 * locked. 7929 * 7930 * If a lookup is requested with "write protection" 7931 * specified, the map may be changed to perform virtual 7932 * copying operations, although the data referenced will 7933 * remain the same. 7934 */ 7935kern_return_t 7936vm_map_lookup_locked( 7937 vm_map_t *var_map, /* IN/OUT */ 7938 vm_map_offset_t vaddr, 7939 vm_prot_t fault_type, 7940 int object_lock_type, 7941 vm_map_version_t *out_version, /* OUT */ 7942 vm_object_t *object, /* OUT */ 7943 vm_object_offset_t *offset, /* OUT */ 7944 vm_prot_t *out_prot, /* OUT */ 7945 boolean_t *wired, /* OUT */ 7946 vm_object_fault_info_t fault_info, /* OUT */ 7947 vm_map_t *real_map) 7948{ 7949 vm_map_entry_t entry; 7950 register vm_map_t map = *var_map; 7951 vm_map_t old_map = *var_map; 7952 vm_map_t cow_sub_map_parent = VM_MAP_NULL; 7953 vm_map_offset_t cow_parent_vaddr = 0; 7954 vm_map_offset_t old_start = 0; 7955 vm_map_offset_t old_end = 0; 7956 register vm_prot_t prot; 7957 7958 *real_map = map; 7959RetryLookup: ; 7960 7961 /* 7962 * If the map has an interesting hint, try it before calling 7963 * full blown lookup routine. 7964 */ 7965 entry = map->hint; 7966 7967 if ((entry == vm_map_to_entry(map)) || 7968 (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) { 7969 vm_map_entry_t tmp_entry; 7970 7971 /* 7972 * Entry was either not a valid hint, or the vaddr 7973 * was not contained in the entry, so do a full lookup. 7974 */ 7975 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) { 7976 if((cow_sub_map_parent) && (cow_sub_map_parent != map)) 7977 vm_map_unlock(cow_sub_map_parent); 7978 if((*real_map != map) 7979 && (*real_map != cow_sub_map_parent)) 7980 vm_map_unlock(*real_map); 7981 return KERN_INVALID_ADDRESS; 7982 } 7983 7984 entry = tmp_entry; 7985 } 7986 if(map == old_map) { 7987 old_start = entry->vme_start; 7988 old_end = entry->vme_end; 7989 } 7990 7991 /* 7992 * Handle submaps. Drop lock on upper map, submap is 7993 * returned locked. 7994 */ 7995 7996submap_recurse: 7997 if (entry->is_sub_map) { 7998 vm_map_offset_t local_vaddr; 7999 vm_map_offset_t end_delta; 8000 vm_map_offset_t start_delta; 8001 vm_map_entry_t submap_entry; 8002 boolean_t mapped_needs_copy=FALSE; 8003 8004 local_vaddr = vaddr; 8005 8006 if ((entry->use_pmap && !(fault_type & VM_PROT_WRITE))) { 8007 /* if real_map equals map we unlock below */ 8008 if ((*real_map != map) && 8009 (*real_map != cow_sub_map_parent)) 8010 vm_map_unlock(*real_map); 8011 *real_map = entry->object.sub_map; 8012 } 8013 8014 if(entry->needs_copy && (fault_type & VM_PROT_WRITE)) { 8015 if (!mapped_needs_copy) { 8016 if (vm_map_lock_read_to_write(map)) { 8017 vm_map_lock_read(map); 8018 /* XXX FBDP: entry still valid ? */ 8019 if(*real_map == entry->object.sub_map) 8020 *real_map = map; 8021 goto RetryLookup; 8022 } 8023 vm_map_lock_read(entry->object.sub_map); 8024 cow_sub_map_parent = map; 8025 /* reset base to map before cow object */ 8026 /* this is the map which will accept */ 8027 /* the new cow object */ 8028 old_start = entry->vme_start; 8029 old_end = entry->vme_end; 8030 cow_parent_vaddr = vaddr; 8031 mapped_needs_copy = TRUE; 8032 } else { 8033 vm_map_lock_read(entry->object.sub_map); 8034 if((cow_sub_map_parent != map) && 8035 (*real_map != map)) 8036 vm_map_unlock(map); 8037 } 8038 } else { 8039 vm_map_lock_read(entry->object.sub_map); 8040 /* leave map locked if it is a target */ 8041 /* cow sub_map above otherwise, just */ 8042 /* follow the maps down to the object */ 8043 /* here we unlock knowing we are not */ 8044 /* revisiting the map. */ 8045 if((*real_map != map) && (map != cow_sub_map_parent)) 8046 vm_map_unlock_read(map); 8047 } 8048 8049 /* XXX FBDP: map has been unlocked, what protects "entry" !? */ 8050 *var_map = map = entry->object.sub_map; 8051 8052 /* calculate the offset in the submap for vaddr */ 8053 local_vaddr = (local_vaddr - entry->vme_start) + entry->offset; 8054 8055 RetrySubMap: 8056 if(!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) { 8057 if((cow_sub_map_parent) && (cow_sub_map_parent != map)){ 8058 vm_map_unlock(cow_sub_map_parent); 8059 } 8060 if((*real_map != map) 8061 && (*real_map != cow_sub_map_parent)) { 8062 vm_map_unlock(*real_map); 8063 } 8064 *real_map = map; 8065 return KERN_INVALID_ADDRESS; 8066 } 8067 8068 /* find the attenuated shadow of the underlying object */ 8069 /* on our target map */ 8070 8071 /* in english the submap object may extend beyond the */ 8072 /* region mapped by the entry or, may only fill a portion */ 8073 /* of it. For our purposes, we only care if the object */ 8074 /* doesn't fill. In this case the area which will */ 8075 /* ultimately be clipped in the top map will only need */ 8076 /* to be as big as the portion of the underlying entry */ 8077 /* which is mapped */ 8078 start_delta = submap_entry->vme_start > entry->offset ? 8079 submap_entry->vme_start - entry->offset : 0; 8080 8081 end_delta = 8082 (entry->offset + start_delta + (old_end - old_start)) <= 8083 submap_entry->vme_end ? 8084 0 : (entry->offset + 8085 (old_end - old_start)) 8086 - submap_entry->vme_end; 8087 8088 old_start += start_delta; 8089 old_end -= end_delta; 8090 8091 if(submap_entry->is_sub_map) { 8092 entry = submap_entry; 8093 vaddr = local_vaddr; 8094 goto submap_recurse; 8095 } 8096 8097 if(((fault_type & VM_PROT_WRITE) && cow_sub_map_parent)) { 8098 8099 vm_object_t sub_object, copy_object; 8100 vm_object_offset_t copy_offset; 8101 vm_map_offset_t local_start; 8102 vm_map_offset_t local_end; 8103 boolean_t copied_slowly = FALSE; 8104 8105 if (vm_map_lock_read_to_write(map)) { 8106 vm_map_lock_read(map); 8107 old_start -= start_delta; 8108 old_end += end_delta; 8109 goto RetrySubMap; 8110 } 8111 8112 8113 sub_object = submap_entry->object.vm_object; 8114 if (sub_object == VM_OBJECT_NULL) { 8115 sub_object = 8116 vm_object_allocate( 8117 (vm_map_size_t) 8118 (submap_entry->vme_end - 8119 submap_entry->vme_start)); 8120 submap_entry->object.vm_object = sub_object; 8121 submap_entry->offset = 0; 8122 } 8123 local_start = local_vaddr - 8124 (cow_parent_vaddr - old_start); 8125 local_end = local_vaddr + 8126 (old_end - cow_parent_vaddr); 8127 vm_map_clip_start(map, submap_entry, local_start); 8128 vm_map_clip_end(map, submap_entry, local_end); 8129 /* unnesting was done in vm_map_clip_start/end() */ 8130 assert(!submap_entry->use_pmap); 8131 8132 /* This is the COW case, lets connect */ 8133 /* an entry in our space to the underlying */ 8134 /* object in the submap, bypassing the */ 8135 /* submap. */ 8136 8137 8138 if(submap_entry->wired_count != 0 || 8139 (sub_object->copy_strategy == 8140 MEMORY_OBJECT_COPY_NONE)) { 8141 vm_object_lock(sub_object); 8142 vm_object_copy_slowly(sub_object, 8143 submap_entry->offset, 8144 (submap_entry->vme_end - 8145 submap_entry->vme_start), 8146 FALSE, 8147 ©_object); 8148 copied_slowly = TRUE; 8149 } else { 8150 8151 /* set up shadow object */ 8152 copy_object = sub_object; 8153 vm_object_reference(copy_object); 8154 sub_object->shadowed = TRUE; 8155 submap_entry->needs_copy = TRUE; 8156 8157 prot = submap_entry->protection & ~VM_PROT_WRITE; 8158 8159 if (override_nx(map, submap_entry->alias) && prot) 8160 prot |= VM_PROT_EXECUTE; 8161 8162 vm_object_pmap_protect( 8163 sub_object, 8164 submap_entry->offset, 8165 submap_entry->vme_end - 8166 submap_entry->vme_start, 8167 (submap_entry->is_shared 8168 || map->mapped) ? 8169 PMAP_NULL : map->pmap, 8170 submap_entry->vme_start, 8171 prot); 8172 } 8173 8174 /* 8175 * Adjust the fault offset to the submap entry. 8176 */ 8177 copy_offset = (local_vaddr - 8178 submap_entry->vme_start + 8179 submap_entry->offset); 8180 8181 /* This works diffently than the */ 8182 /* normal submap case. We go back */ 8183 /* to the parent of the cow map and*/ 8184 /* clip out the target portion of */ 8185 /* the sub_map, substituting the */ 8186 /* new copy object, */ 8187 8188 vm_map_unlock(map); 8189 local_start = old_start; 8190 local_end = old_end; 8191 map = cow_sub_map_parent; 8192 *var_map = cow_sub_map_parent; 8193 vaddr = cow_parent_vaddr; 8194 cow_sub_map_parent = NULL; 8195 8196 if(!vm_map_lookup_entry(map, 8197 vaddr, &entry)) { 8198 vm_object_deallocate( 8199 copy_object); 8200 vm_map_lock_write_to_read(map); 8201 return KERN_INVALID_ADDRESS; 8202 } 8203 8204 /* clip out the portion of space */ 8205 /* mapped by the sub map which */ 8206 /* corresponds to the underlying */ 8207 /* object */ 8208 8209 /* 8210 * Clip (and unnest) the smallest nested chunk 8211 * possible around the faulting address... 8212 */ 8213 local_start = vaddr & ~(pmap_nesting_size_min - 1); 8214 local_end = local_start + pmap_nesting_size_min; 8215 /* 8216 * ... but don't go beyond the "old_start" to "old_end" 8217 * range, to avoid spanning over another VM region 8218 * with a possibly different VM object and/or offset. 8219 */ 8220 if (local_start < old_start) { 8221 local_start = old_start; 8222 } 8223 if (local_end > old_end) { 8224 local_end = old_end; 8225 } 8226 /* 8227 * Adjust copy_offset to the start of the range. 8228 */ 8229 copy_offset -= (vaddr - local_start); 8230 8231 vm_map_clip_start(map, entry, local_start); 8232 vm_map_clip_end(map, entry, local_end); 8233 /* unnesting was done in vm_map_clip_start/end() */ 8234 assert(!entry->use_pmap); 8235 8236 /* substitute copy object for */ 8237 /* shared map entry */ 8238 vm_map_deallocate(entry->object.sub_map); 8239 entry->is_sub_map = FALSE; 8240 entry->object.vm_object = copy_object; 8241 8242 /* propagate the submap entry's protections */ 8243 entry->protection |= submap_entry->protection; 8244 entry->max_protection |= submap_entry->max_protection; 8245 8246 if(copied_slowly) { 8247 entry->offset = local_start - old_start; 8248 entry->needs_copy = FALSE; 8249 entry->is_shared = FALSE; 8250 } else { 8251 entry->offset = copy_offset; 8252 entry->needs_copy = TRUE; 8253 if(entry->inheritance == VM_INHERIT_SHARE) 8254 entry->inheritance = VM_INHERIT_COPY; 8255 if (map != old_map) 8256 entry->is_shared = TRUE; 8257 } 8258 if(entry->inheritance == VM_INHERIT_SHARE) 8259 entry->inheritance = VM_INHERIT_COPY; 8260 8261 vm_map_lock_write_to_read(map); 8262 } else { 8263 if((cow_sub_map_parent) 8264 && (cow_sub_map_parent != *real_map) 8265 && (cow_sub_map_parent != map)) { 8266 vm_map_unlock(cow_sub_map_parent); 8267 } 8268 entry = submap_entry; 8269 vaddr = local_vaddr; 8270 } 8271 } 8272 8273 /* 8274 * Check whether this task is allowed to have 8275 * this page. 8276 */ 8277 8278 prot = entry->protection; 8279 8280 if (override_nx(map, entry->alias) && prot) { 8281 /* 8282 * HACK -- if not a stack, then allow execution 8283 */ 8284 prot |= VM_PROT_EXECUTE; 8285 } 8286 8287 if ((fault_type & (prot)) != fault_type) { 8288 if (*real_map != map) { 8289 vm_map_unlock(*real_map); 8290 } 8291 *real_map = map; 8292 8293 if ((fault_type & VM_PROT_EXECUTE) && prot) 8294 log_stack_execution_failure((addr64_t)vaddr, prot); 8295 8296 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL); 8297 return KERN_PROTECTION_FAILURE; 8298 } 8299 8300 /* 8301 * If this page is not pageable, we have to get 8302 * it for all possible accesses. 8303 */ 8304 8305 *wired = (entry->wired_count != 0); 8306 if (*wired) 8307 fault_type = prot; 8308 8309 /* 8310 * If the entry was copy-on-write, we either ... 8311 */ 8312 8313 if (entry->needs_copy) { 8314 /* 8315 * If we want to write the page, we may as well 8316 * handle that now since we've got the map locked. 8317 * 8318 * If we don't need to write the page, we just 8319 * demote the permissions allowed. 8320 */ 8321 8322 if ((fault_type & VM_PROT_WRITE) || *wired) { 8323 /* 8324 * Make a new object, and place it in the 8325 * object chain. Note that no new references 8326 * have appeared -- one just moved from the 8327 * map to the new object. 8328 */ 8329 8330 if (vm_map_lock_read_to_write(map)) { 8331 vm_map_lock_read(map); 8332 goto RetryLookup; 8333 } 8334 vm_object_shadow(&entry->object.vm_object, 8335 &entry->offset, 8336 (vm_map_size_t) (entry->vme_end - 8337 entry->vme_start)); 8338 8339 entry->object.vm_object->shadowed = TRUE; 8340 entry->needs_copy = FALSE; 8341 vm_map_lock_write_to_read(map); 8342 } 8343 else { 8344 /* 8345 * We're attempting to read a copy-on-write 8346 * page -- don't allow writes. 8347 */ 8348 8349 prot &= (~VM_PROT_WRITE); 8350 } 8351 } 8352 8353 /* 8354 * Create an object if necessary. 8355 */ 8356 if (entry->object.vm_object == VM_OBJECT_NULL) { 8357 8358 if (vm_map_lock_read_to_write(map)) { 8359 vm_map_lock_read(map); 8360 goto RetryLookup; 8361 } 8362 8363 entry->object.vm_object = vm_object_allocate( 8364 (vm_map_size_t)(entry->vme_end - entry->vme_start)); 8365 entry->offset = 0; 8366 vm_map_lock_write_to_read(map); 8367 } 8368 8369 /* 8370 * Return the object/offset from this entry. If the entry 8371 * was copy-on-write or empty, it has been fixed up. Also 8372 * return the protection. 8373 */ 8374 8375 *offset = (vaddr - entry->vme_start) + entry->offset; 8376 *object = entry->object.vm_object; 8377 *out_prot = prot; 8378 8379 if (fault_info) { 8380 fault_info->interruptible = THREAD_UNINT; /* for now... */ 8381 /* ... the caller will change "interruptible" if needed */ 8382 fault_info->cluster_size = 0; 8383 fault_info->user_tag = entry->alias; 8384 fault_info->behavior = entry->behavior; 8385 fault_info->lo_offset = entry->offset; 8386 fault_info->hi_offset = (entry->vme_end - entry->vme_start) + entry->offset; 8387 fault_info->no_cache = entry->no_cache; 8388 } 8389 8390 /* 8391 * Lock the object to prevent it from disappearing 8392 */ 8393 if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) 8394 vm_object_lock(*object); 8395 else 8396 vm_object_lock_shared(*object); 8397 8398 /* 8399 * Save the version number 8400 */ 8401 8402 out_version->main_timestamp = map->timestamp; 8403 8404 return KERN_SUCCESS; 8405} 8406 8407 8408/* 8409 * vm_map_verify: 8410 * 8411 * Verifies that the map in question has not changed 8412 * since the given version. If successful, the map 8413 * will not change until vm_map_verify_done() is called. 8414 */ 8415boolean_t 8416vm_map_verify( 8417 register vm_map_t map, 8418 register vm_map_version_t *version) /* REF */ 8419{ 8420 boolean_t result; 8421 8422 vm_map_lock_read(map); 8423 result = (map->timestamp == version->main_timestamp); 8424 8425 if (!result) 8426 vm_map_unlock_read(map); 8427 8428 return(result); 8429} 8430 8431/* 8432 * vm_map_verify_done: 8433 * 8434 * Releases locks acquired by a vm_map_verify. 8435 * 8436 * This is now a macro in vm/vm_map.h. It does a 8437 * vm_map_unlock_read on the map. 8438 */ 8439 8440 8441/* 8442 * TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY 8443 * Goes away after regular vm_region_recurse function migrates to 8444 * 64 bits 8445 * vm_region_recurse: A form of vm_region which follows the 8446 * submaps in a target map 8447 * 8448 */ 8449 8450kern_return_t 8451vm_map_region_recurse_64( 8452 vm_map_t map, 8453 vm_map_offset_t *address, /* IN/OUT */ 8454 vm_map_size_t *size, /* OUT */ 8455 natural_t *nesting_depth, /* IN/OUT */ 8456 vm_region_submap_info_64_t submap_info, /* IN/OUT */ 8457 mach_msg_type_number_t *count) /* IN/OUT */ 8458{ 8459 vm_region_extended_info_data_t extended; 8460 vm_map_entry_t tmp_entry; 8461 vm_map_offset_t user_address; 8462 unsigned int user_max_depth; 8463 8464 /* 8465 * "curr_entry" is the VM map entry preceding or including the 8466 * address we're looking for. 8467 * "curr_map" is the map or sub-map containing "curr_entry". 8468 * "curr_offset" is the cumulated offset of "curr_map" in the 8469 * target task's address space. 8470 * "curr_depth" is the depth of "curr_map" in the chain of 8471 * sub-maps. 8472 * "curr_max_offset" is the maximum offset we should take into 8473 * account in the current map. It may be smaller than the current 8474 * map's "max_offset" because we might not have mapped it all in 8475 * the upper level map. 8476 */ 8477 vm_map_entry_t curr_entry; 8478 vm_map_offset_t curr_offset; 8479 vm_map_t curr_map; 8480 unsigned int curr_depth; 8481 vm_map_offset_t curr_max_offset; 8482 8483 /* 8484 * "next_" is the same as "curr_" but for the VM region immediately 8485 * after the address we're looking for. We need to keep track of this 8486 * too because we want to return info about that region if the 8487 * address we're looking for is not mapped. 8488 */ 8489 vm_map_entry_t next_entry; 8490 vm_map_offset_t next_offset; 8491 vm_map_t next_map; 8492 unsigned int next_depth; 8493 vm_map_offset_t next_max_offset; 8494 8495 boolean_t look_for_pages; 8496 vm_region_submap_short_info_64_t short_info; 8497 8498 if (map == VM_MAP_NULL) { 8499 /* no address space to work on */ 8500 return KERN_INVALID_ARGUMENT; 8501 } 8502 8503 if (*count < VM_REGION_SUBMAP_INFO_COUNT_64) { 8504 if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) { 8505 /* 8506 * "info" structure is not big enough and 8507 * would overflow 8508 */ 8509 return KERN_INVALID_ARGUMENT; 8510 } else { 8511 look_for_pages = FALSE; 8512 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64; 8513 short_info = (vm_region_submap_short_info_64_t) submap_info; 8514 submap_info = NULL; 8515 } 8516 } else { 8517 look_for_pages = TRUE; 8518 *count = VM_REGION_SUBMAP_INFO_COUNT_64; 8519 short_info = NULL; 8520 } 8521 8522 8523 user_address = *address; 8524 user_max_depth = *nesting_depth; 8525 8526 curr_entry = NULL; 8527 curr_map = map; 8528 curr_offset = 0; 8529 curr_depth = 0; 8530 curr_max_offset = curr_map->max_offset; 8531 8532 next_entry = NULL; 8533 next_map = NULL; 8534 next_offset = 0; 8535 next_depth = 0; 8536 next_max_offset = curr_max_offset; 8537 8538 if (not_in_kdp) { 8539 vm_map_lock_read(curr_map); 8540 } 8541 8542 for (;;) { 8543 if (vm_map_lookup_entry(curr_map, 8544 user_address - curr_offset, 8545 &tmp_entry)) { 8546 /* tmp_entry contains the address we're looking for */ 8547 curr_entry = tmp_entry; 8548 } else { 8549 /* 8550 * The address is not mapped. "tmp_entry" is the 8551 * map entry preceding the address. We want the next 8552 * one, if it exists. 8553 */ 8554 curr_entry = tmp_entry->vme_next; 8555 if (curr_entry == vm_map_to_entry(curr_map) || 8556 curr_entry->vme_start >= curr_max_offset) { 8557 /* no next entry at this level: stop looking */ 8558 if (not_in_kdp) { 8559 vm_map_unlock_read(curr_map); 8560 } 8561 curr_entry = NULL; 8562 curr_map = NULL; 8563 curr_offset = 0; 8564 curr_depth = 0; 8565 curr_max_offset = 0; 8566 break; 8567 } 8568 } 8569 8570 /* 8571 * Is the next entry at this level closer to the address (or 8572 * deeper in the submap chain) than the one we had 8573 * so far ? 8574 */ 8575 tmp_entry = curr_entry->vme_next; 8576 if (tmp_entry == vm_map_to_entry(curr_map)) { 8577 /* no next entry at this level */ 8578 } else if (tmp_entry->vme_start >= curr_max_offset) { 8579 /* 8580 * tmp_entry is beyond the scope of what we mapped of 8581 * this submap in the upper level: ignore it. 8582 */ 8583 } else if ((next_entry == NULL) || 8584 (tmp_entry->vme_start + curr_offset <= 8585 next_entry->vme_start + next_offset)) { 8586 /* 8587 * We didn't have a "next_entry" or this one is 8588 * closer to the address we're looking for: 8589 * use this "tmp_entry" as the new "next_entry". 8590 */ 8591 if (next_entry != NULL) { 8592 /* unlock the last "next_map" */ 8593 if (next_map != curr_map && not_in_kdp) { 8594 vm_map_unlock_read(next_map); 8595 } 8596 } 8597 next_entry = tmp_entry; 8598 next_map = curr_map; 8599 next_offset = curr_offset; 8600 next_depth = curr_depth; 8601 next_max_offset = curr_max_offset; 8602 } 8603 8604 if (!curr_entry->is_sub_map || 8605 curr_depth >= user_max_depth) { 8606 /* 8607 * We hit a leaf map or we reached the maximum depth 8608 * we could, so stop looking. Keep the current map 8609 * locked. 8610 */ 8611 break; 8612 } 8613 8614 /* 8615 * Get down to the next submap level. 8616 */ 8617 8618 /* 8619 * Lock the next level and unlock the current level, 8620 * unless we need to keep it locked to access the "next_entry" 8621 * later. 8622 */ 8623 if (not_in_kdp) { 8624 vm_map_lock_read(curr_entry->object.sub_map); 8625 } 8626 if (curr_map == next_map) { 8627 /* keep "next_map" locked in case we need it */ 8628 } else { 8629 /* release this map */ 8630 vm_map_unlock_read(curr_map); 8631 } 8632 8633 /* 8634 * Adjust the offset. "curr_entry" maps the submap 8635 * at relative address "curr_entry->vme_start" in the 8636 * curr_map but skips the first "curr_entry->offset" 8637 * bytes of the submap. 8638 * "curr_offset" always represents the offset of a virtual 8639 * address in the curr_map relative to the absolute address 8640 * space (i.e. the top-level VM map). 8641 */ 8642 curr_offset += 8643 (curr_entry->vme_start - curr_entry->offset); 8644 /* switch to the submap */ 8645 curr_map = curr_entry->object.sub_map; 8646 curr_depth++; 8647 /* 8648 * "curr_max_offset" allows us to keep track of the 8649 * portion of the submap that is actually mapped at this level: 8650 * the rest of that submap is irrelevant to us, since it's not 8651 * mapped here. 8652 * The relevant portion of the map starts at 8653 * "curr_entry->offset" up to the size of "curr_entry". 8654 */ 8655 curr_max_offset = 8656 curr_entry->vme_end - curr_entry->vme_start + 8657 curr_entry->offset; 8658 curr_entry = NULL; 8659 } 8660 8661 if (curr_entry == NULL) { 8662 /* no VM region contains the address... */ 8663 if (next_entry == NULL) { 8664 /* ... and no VM region follows it either */ 8665 return KERN_INVALID_ADDRESS; 8666 } 8667 /* ... gather info about the next VM region */ 8668 curr_entry = next_entry; 8669 curr_map = next_map; /* still locked ... */ 8670 curr_offset = next_offset; 8671 curr_depth = next_depth; 8672 curr_max_offset = next_max_offset; 8673 } else { 8674 /* we won't need "next_entry" after all */ 8675 if (next_entry != NULL) { 8676 /* release "next_map" */ 8677 if (next_map != curr_map && not_in_kdp) { 8678 vm_map_unlock_read(next_map); 8679 } 8680 } 8681 } 8682 next_entry = NULL; 8683 next_map = NULL; 8684 next_offset = 0; 8685 next_depth = 0; 8686 next_max_offset = 0; 8687 8688 *nesting_depth = curr_depth; 8689 *size = curr_entry->vme_end - curr_entry->vme_start; 8690 *address = curr_entry->vme_start + curr_offset; 8691 8692 if (look_for_pages) { 8693 submap_info->user_tag = curr_entry->alias; 8694 submap_info->offset = curr_entry->offset; 8695 submap_info->protection = curr_entry->protection; 8696 submap_info->inheritance = curr_entry->inheritance; 8697 submap_info->max_protection = curr_entry->max_protection; 8698 submap_info->behavior = curr_entry->behavior; 8699 submap_info->user_wired_count = curr_entry->user_wired_count; 8700 submap_info->is_submap = curr_entry->is_sub_map; 8701 submap_info->object_id = (uint32_t) curr_entry->object.vm_object; 8702 } else { 8703 short_info->user_tag = curr_entry->alias; 8704 short_info->offset = curr_entry->offset; 8705 short_info->protection = curr_entry->protection; 8706 short_info->inheritance = curr_entry->inheritance; 8707 short_info->max_protection = curr_entry->max_protection; 8708 short_info->behavior = curr_entry->behavior; 8709 short_info->user_wired_count = curr_entry->user_wired_count; 8710 short_info->is_submap = curr_entry->is_sub_map; 8711 short_info->object_id = (uint32_t) curr_entry->object.vm_object; 8712 } 8713 8714 extended.pages_resident = 0; 8715 extended.pages_swapped_out = 0; 8716 extended.pages_shared_now_private = 0; 8717 extended.pages_dirtied = 0; 8718 extended.external_pager = 0; 8719 extended.shadow_depth = 0; 8720 8721 if (not_in_kdp) { 8722 if (!curr_entry->is_sub_map) { 8723 vm_map_region_walk(curr_map, 8724 curr_entry->vme_start, 8725 curr_entry, 8726 curr_entry->offset, 8727 (curr_entry->vme_end - 8728 curr_entry->vme_start), 8729 &extended, 8730 look_for_pages); 8731 if (extended.external_pager && 8732 extended.ref_count == 2 && 8733 extended.share_mode == SM_SHARED) { 8734 extended.share_mode = SM_PRIVATE; 8735 } 8736 } else { 8737 if (curr_entry->use_pmap) { 8738 extended.share_mode = SM_TRUESHARED; 8739 } else { 8740 extended.share_mode = SM_PRIVATE; 8741 } 8742 extended.ref_count = 8743 curr_entry->object.sub_map->ref_count; 8744 } 8745 } 8746 8747 if (look_for_pages) { 8748 submap_info->pages_resident = extended.pages_resident; 8749 submap_info->pages_swapped_out = extended.pages_swapped_out; 8750 submap_info->pages_shared_now_private = 8751 extended.pages_shared_now_private; 8752 submap_info->pages_dirtied = extended.pages_dirtied; 8753 submap_info->external_pager = extended.external_pager; 8754 submap_info->shadow_depth = extended.shadow_depth; 8755 submap_info->share_mode = extended.share_mode; 8756 submap_info->ref_count = extended.ref_count; 8757 } else { 8758 short_info->external_pager = extended.external_pager; 8759 short_info->shadow_depth = extended.shadow_depth; 8760 short_info->share_mode = extended.share_mode; 8761 short_info->ref_count = extended.ref_count; 8762 } 8763 8764 if (not_in_kdp) { 8765 vm_map_unlock_read(curr_map); 8766 } 8767 8768 return KERN_SUCCESS; 8769} 8770 8771/* 8772 * vm_region: 8773 * 8774 * User call to obtain information about a region in 8775 * a task's address map. Currently, only one flavor is 8776 * supported. 8777 * 8778 * XXX The reserved and behavior fields cannot be filled 8779 * in until the vm merge from the IK is completed, and 8780 * vm_reserve is implemented. 8781 */ 8782 8783kern_return_t 8784vm_map_region( 8785 vm_map_t map, 8786 vm_map_offset_t *address, /* IN/OUT */ 8787 vm_map_size_t *size, /* OUT */ 8788 vm_region_flavor_t flavor, /* IN */ 8789 vm_region_info_t info, /* OUT */ 8790 mach_msg_type_number_t *count, /* IN/OUT */ 8791 mach_port_t *object_name) /* OUT */ 8792{ 8793 vm_map_entry_t tmp_entry; 8794 vm_map_entry_t entry; 8795 vm_map_offset_t start; 8796 8797 if (map == VM_MAP_NULL) 8798 return(KERN_INVALID_ARGUMENT); 8799 8800 switch (flavor) { 8801 8802 case VM_REGION_BASIC_INFO: 8803 /* legacy for old 32-bit objects info */ 8804 { 8805 vm_region_basic_info_t basic; 8806 8807 if (*count < VM_REGION_BASIC_INFO_COUNT) 8808 return(KERN_INVALID_ARGUMENT); 8809 8810 basic = (vm_region_basic_info_t) info; 8811 *count = VM_REGION_BASIC_INFO_COUNT; 8812 8813 vm_map_lock_read(map); 8814 8815 start = *address; 8816 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 8817 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 8818 vm_map_unlock_read(map); 8819 return(KERN_INVALID_ADDRESS); 8820 } 8821 } else { 8822 entry = tmp_entry; 8823 } 8824 8825 start = entry->vme_start; 8826 8827 basic->offset = (uint32_t)entry->offset; 8828 basic->protection = entry->protection; 8829 basic->inheritance = entry->inheritance; 8830 basic->max_protection = entry->max_protection; 8831 basic->behavior = entry->behavior; 8832 basic->user_wired_count = entry->user_wired_count; 8833 basic->reserved = entry->is_sub_map; 8834 *address = start; 8835 *size = (entry->vme_end - start); 8836 8837 if (object_name) *object_name = IP_NULL; 8838 if (entry->is_sub_map) { 8839 basic->shared = FALSE; 8840 } else { 8841 basic->shared = entry->is_shared; 8842 } 8843 8844 vm_map_unlock_read(map); 8845 return(KERN_SUCCESS); 8846 } 8847 8848 case VM_REGION_BASIC_INFO_64: 8849 { 8850 vm_region_basic_info_64_t basic; 8851 8852 if (*count < VM_REGION_BASIC_INFO_COUNT_64) 8853 return(KERN_INVALID_ARGUMENT); 8854 8855 basic = (vm_region_basic_info_64_t) info; 8856 *count = VM_REGION_BASIC_INFO_COUNT_64; 8857 8858 vm_map_lock_read(map); 8859 8860 start = *address; 8861 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 8862 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 8863 vm_map_unlock_read(map); 8864 return(KERN_INVALID_ADDRESS); 8865 } 8866 } else { 8867 entry = tmp_entry; 8868 } 8869 8870 start = entry->vme_start; 8871 8872 basic->offset = entry->offset; 8873 basic->protection = entry->protection; 8874 basic->inheritance = entry->inheritance; 8875 basic->max_protection = entry->max_protection; 8876 basic->behavior = entry->behavior; 8877 basic->user_wired_count = entry->user_wired_count; 8878 basic->reserved = entry->is_sub_map; 8879 *address = start; 8880 *size = (entry->vme_end - start); 8881 8882 if (object_name) *object_name = IP_NULL; 8883 if (entry->is_sub_map) { 8884 basic->shared = FALSE; 8885 } else { 8886 basic->shared = entry->is_shared; 8887 } 8888 8889 vm_map_unlock_read(map); 8890 return(KERN_SUCCESS); 8891 } 8892 case VM_REGION_EXTENDED_INFO: 8893 { 8894 vm_region_extended_info_t extended; 8895 8896 if (*count < VM_REGION_EXTENDED_INFO_COUNT) 8897 return(KERN_INVALID_ARGUMENT); 8898 8899 extended = (vm_region_extended_info_t) info; 8900 *count = VM_REGION_EXTENDED_INFO_COUNT; 8901 8902 vm_map_lock_read(map); 8903 8904 start = *address; 8905 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 8906 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 8907 vm_map_unlock_read(map); 8908 return(KERN_INVALID_ADDRESS); 8909 } 8910 } else { 8911 entry = tmp_entry; 8912 } 8913 start = entry->vme_start; 8914 8915 extended->protection = entry->protection; 8916 extended->user_tag = entry->alias; 8917 extended->pages_resident = 0; 8918 extended->pages_swapped_out = 0; 8919 extended->pages_shared_now_private = 0; 8920 extended->pages_dirtied = 0; 8921 extended->external_pager = 0; 8922 extended->shadow_depth = 0; 8923 8924 vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, extended, TRUE); 8925 8926 if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) 8927 extended->share_mode = SM_PRIVATE; 8928 8929 if (object_name) 8930 *object_name = IP_NULL; 8931 *address = start; 8932 *size = (entry->vme_end - start); 8933 8934 vm_map_unlock_read(map); 8935 return(KERN_SUCCESS); 8936 } 8937 case VM_REGION_TOP_INFO: 8938 { 8939 vm_region_top_info_t top; 8940 8941 if (*count < VM_REGION_TOP_INFO_COUNT) 8942 return(KERN_INVALID_ARGUMENT); 8943 8944 top = (vm_region_top_info_t) info; 8945 *count = VM_REGION_TOP_INFO_COUNT; 8946 8947 vm_map_lock_read(map); 8948 8949 start = *address; 8950 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 8951 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) { 8952 vm_map_unlock_read(map); 8953 return(KERN_INVALID_ADDRESS); 8954 } 8955 } else { 8956 entry = tmp_entry; 8957 8958 } 8959 start = entry->vme_start; 8960 8961 top->private_pages_resident = 0; 8962 top->shared_pages_resident = 0; 8963 8964 vm_map_region_top_walk(entry, top); 8965 8966 if (object_name) 8967 *object_name = IP_NULL; 8968 *address = start; 8969 *size = (entry->vme_end - start); 8970 8971 vm_map_unlock_read(map); 8972 return(KERN_SUCCESS); 8973 } 8974 default: 8975 return(KERN_INVALID_ARGUMENT); 8976 } 8977} 8978 8979#define min(a, b) (((a) < (b)) ? (a) : (b)) 8980 8981void 8982vm_map_region_top_walk( 8983 vm_map_entry_t entry, 8984 vm_region_top_info_t top) 8985{ 8986 8987 if (entry->object.vm_object == 0 || entry->is_sub_map) { 8988 top->share_mode = SM_EMPTY; 8989 top->ref_count = 0; 8990 top->obj_id = 0; 8991 return; 8992 } 8993 8994 { 8995 struct vm_object *obj, *tmp_obj; 8996 int ref_count; 8997 uint32_t entry_size; 8998 8999 entry_size = (entry->vme_end - entry->vme_start) / PAGE_SIZE; 9000 9001 obj = entry->object.vm_object; 9002 9003 vm_object_lock(obj); 9004 9005 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 9006 ref_count--; 9007 9008 if (obj->shadow) { 9009 if (ref_count == 1) 9010 top->private_pages_resident = min(obj->resident_page_count, entry_size); 9011 else 9012 top->shared_pages_resident = min(obj->resident_page_count, entry_size); 9013 top->ref_count = ref_count; 9014 top->share_mode = SM_COW; 9015 9016 while ((tmp_obj = obj->shadow)) { 9017 vm_object_lock(tmp_obj); 9018 vm_object_unlock(obj); 9019 obj = tmp_obj; 9020 9021 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 9022 ref_count--; 9023 9024 top->shared_pages_resident += min(obj->resident_page_count, entry_size); 9025 top->ref_count += ref_count - 1; 9026 } 9027 } else { 9028 if (entry->needs_copy) { 9029 top->share_mode = SM_COW; 9030 top->shared_pages_resident = min(obj->resident_page_count, entry_size); 9031 } else { 9032 if (ref_count == 1 || 9033 (ref_count == 2 && !(obj->pager_trusted) && !(obj->internal))) { 9034 top->share_mode = SM_PRIVATE; 9035 top->private_pages_resident = min(obj->resident_page_count, entry_size); 9036 } else { 9037 top->share_mode = SM_SHARED; 9038 top->shared_pages_resident = min(obj->resident_page_count, entry_size); 9039 } 9040 } 9041 top->ref_count = ref_count; 9042 } 9043 top->obj_id = (int)obj; 9044 9045 vm_object_unlock(obj); 9046 } 9047} 9048 9049void 9050vm_map_region_walk( 9051 vm_map_t map, 9052 vm_map_offset_t va, 9053 vm_map_entry_t entry, 9054 vm_object_offset_t offset, 9055 vm_object_size_t range, 9056 vm_region_extended_info_t extended, 9057 boolean_t look_for_pages) 9058{ 9059 register struct vm_object *obj, *tmp_obj; 9060 register vm_map_offset_t last_offset; 9061 register int i; 9062 register int ref_count; 9063 struct vm_object *shadow_object; 9064 int shadow_depth; 9065 9066 if ((entry->object.vm_object == 0) || 9067 (entry->is_sub_map) || 9068 (entry->object.vm_object->phys_contiguous)) { 9069 extended->share_mode = SM_EMPTY; 9070 extended->ref_count = 0; 9071 return; 9072 } 9073 { 9074 obj = entry->object.vm_object; 9075 9076 vm_object_lock(obj); 9077 9078 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 9079 ref_count--; 9080 9081 if (look_for_pages) { 9082 for (last_offset = offset + range; 9083 offset < last_offset; 9084 offset += PAGE_SIZE_64, va += PAGE_SIZE) 9085 vm_map_region_look_for_page(map, va, obj, 9086 offset, ref_count, 9087 0, extended); 9088 } 9089 9090 shadow_object = obj->shadow; 9091 shadow_depth = 0; 9092 if (shadow_object != VM_OBJECT_NULL) { 9093 vm_object_lock(shadow_object); 9094 for (; 9095 shadow_object != VM_OBJECT_NULL; 9096 shadow_depth++) { 9097 vm_object_t next_shadow; 9098 9099 next_shadow = shadow_object->shadow; 9100 if (next_shadow) { 9101 vm_object_lock(next_shadow); 9102 } 9103 vm_object_unlock(shadow_object); 9104 shadow_object = next_shadow; 9105 } 9106 } 9107 extended->shadow_depth = shadow_depth; 9108 9109 if (extended->shadow_depth || entry->needs_copy) 9110 extended->share_mode = SM_COW; 9111 else { 9112 if (ref_count == 1) 9113 extended->share_mode = SM_PRIVATE; 9114 else { 9115 if (obj->true_share) 9116 extended->share_mode = SM_TRUESHARED; 9117 else 9118 extended->share_mode = SM_SHARED; 9119 } 9120 } 9121 extended->ref_count = ref_count - extended->shadow_depth; 9122 9123 for (i = 0; i < extended->shadow_depth; i++) { 9124 if ((tmp_obj = obj->shadow) == 0) 9125 break; 9126 vm_object_lock(tmp_obj); 9127 vm_object_unlock(obj); 9128 9129 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) 9130 ref_count--; 9131 9132 extended->ref_count += ref_count; 9133 obj = tmp_obj; 9134 } 9135 vm_object_unlock(obj); 9136 9137 if (extended->share_mode == SM_SHARED) { 9138 register vm_map_entry_t cur; 9139 register vm_map_entry_t last; 9140 int my_refs; 9141 9142 obj = entry->object.vm_object; 9143 last = vm_map_to_entry(map); 9144 my_refs = 0; 9145 9146 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) 9147 ref_count--; 9148 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) 9149 my_refs += vm_map_region_count_obj_refs(cur, obj); 9150 9151 if (my_refs == ref_count) 9152 extended->share_mode = SM_PRIVATE_ALIASED; 9153 else if (my_refs > 1) 9154 extended->share_mode = SM_SHARED_ALIASED; 9155 } 9156 } 9157} 9158 9159 9160/* object is locked on entry and locked on return */ 9161 9162 9163static void 9164vm_map_region_look_for_page( 9165 __unused vm_map_t map, 9166 __unused vm_map_offset_t va, 9167 vm_object_t object, 9168 vm_object_offset_t offset, 9169 int max_refcnt, 9170 int depth, 9171 vm_region_extended_info_t extended) 9172{ 9173 register vm_page_t p; 9174 register vm_object_t shadow; 9175 register int ref_count; 9176 vm_object_t caller_object; 9177#if MACH_PAGEMAP 9178 kern_return_t kr; 9179#endif 9180 shadow = object->shadow; 9181 caller_object = object; 9182 9183 9184 while (TRUE) { 9185 9186 if ( !(object->pager_trusted) && !(object->internal)) 9187 extended->external_pager = 1; 9188 9189 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { 9190 if (shadow && (max_refcnt == 1)) 9191 extended->pages_shared_now_private++; 9192 9193 if (!p->fictitious && 9194 (p->dirty || pmap_is_modified(p->phys_page))) 9195 extended->pages_dirtied++; 9196 9197 extended->pages_resident++; 9198 9199 if(object != caller_object) 9200 vm_object_unlock(object); 9201 9202 return; 9203 } 9204#if MACH_PAGEMAP 9205 if (object->existence_map) { 9206 if (vm_external_state_get(object->existence_map, offset) == VM_EXTERNAL_STATE_EXISTS) { 9207 9208 extended->pages_swapped_out++; 9209 9210 if(object != caller_object) 9211 vm_object_unlock(object); 9212 9213 return; 9214 } 9215 } else if (object->internal && 9216 object->alive && 9217 !object->terminating && 9218 object->pager_ready) { 9219 9220 memory_object_t pager; 9221 9222 vm_object_paging_begin(object); 9223 pager = object->pager; 9224 vm_object_unlock(object); 9225 9226 kr = memory_object_data_request( 9227 pager, 9228 offset + object->paging_offset, 9229 0, /* just poke the pager */ 9230 VM_PROT_READ, 9231 NULL); 9232 9233 vm_object_lock(object); 9234 vm_object_paging_end(object); 9235 9236 if (kr == KERN_SUCCESS) { 9237 /* the pager has that page */ 9238 extended->pages_swapped_out++; 9239 if (object != caller_object) 9240 vm_object_unlock(object); 9241 return; 9242 } 9243 } 9244#endif /* MACH_PAGEMAP */ 9245 9246 if (shadow) { 9247 vm_object_lock(shadow); 9248 9249 if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) 9250 ref_count--; 9251 9252 if (++depth > extended->shadow_depth) 9253 extended->shadow_depth = depth; 9254 9255 if (ref_count > max_refcnt) 9256 max_refcnt = ref_count; 9257 9258 if(object != caller_object) 9259 vm_object_unlock(object); 9260 9261 offset = offset + object->shadow_offset; 9262 object = shadow; 9263 shadow = object->shadow; 9264 continue; 9265 } 9266 if(object != caller_object) 9267 vm_object_unlock(object); 9268 break; 9269 } 9270} 9271 9272static int 9273vm_map_region_count_obj_refs( 9274 vm_map_entry_t entry, 9275 vm_object_t object) 9276{ 9277 register int ref_count; 9278 register vm_object_t chk_obj; 9279 register vm_object_t tmp_obj; 9280 9281 if (entry->object.vm_object == 0) 9282 return(0); 9283 9284 if (entry->is_sub_map) 9285 return(0); 9286 else { 9287 ref_count = 0; 9288 9289 chk_obj = entry->object.vm_object; 9290 vm_object_lock(chk_obj); 9291 9292 while (chk_obj) { 9293 if (chk_obj == object) 9294 ref_count++; 9295 tmp_obj = chk_obj->shadow; 9296 if (tmp_obj) 9297 vm_object_lock(tmp_obj); 9298 vm_object_unlock(chk_obj); 9299 9300 chk_obj = tmp_obj; 9301 } 9302 } 9303 return(ref_count); 9304} 9305 9306 9307/* 9308 * Routine: vm_map_simplify 9309 * 9310 * Description: 9311 * Attempt to simplify the map representation in 9312 * the vicinity of the given starting address. 9313 * Note: 9314 * This routine is intended primarily to keep the 9315 * kernel maps more compact -- they generally don't 9316 * benefit from the "expand a map entry" technology 9317 * at allocation time because the adjacent entry 9318 * is often wired down. 9319 */ 9320void 9321vm_map_simplify_entry( 9322 vm_map_t map, 9323 vm_map_entry_t this_entry) 9324{ 9325 vm_map_entry_t prev_entry; 9326 9327 counter(c_vm_map_simplify_entry_called++); 9328 9329 prev_entry = this_entry->vme_prev; 9330 9331 if ((this_entry != vm_map_to_entry(map)) && 9332 (prev_entry != vm_map_to_entry(map)) && 9333 9334 (prev_entry->vme_end == this_entry->vme_start) && 9335 9336 (prev_entry->is_sub_map == this_entry->is_sub_map) && 9337 9338 (prev_entry->object.vm_object == this_entry->object.vm_object) && 9339 ((prev_entry->offset + (prev_entry->vme_end - 9340 prev_entry->vme_start)) 9341 == this_entry->offset) && 9342 9343 (prev_entry->inheritance == this_entry->inheritance) && 9344 (prev_entry->protection == this_entry->protection) && 9345 (prev_entry->max_protection == this_entry->max_protection) && 9346 (prev_entry->behavior == this_entry->behavior) && 9347 (prev_entry->alias == this_entry->alias) && 9348 (prev_entry->no_cache == this_entry->no_cache) && 9349 (prev_entry->wired_count == this_entry->wired_count) && 9350 (prev_entry->user_wired_count == this_entry->user_wired_count) && 9351 9352 (prev_entry->needs_copy == this_entry->needs_copy) && 9353 9354 (prev_entry->use_pmap == FALSE) && 9355 (this_entry->use_pmap == FALSE) && 9356 (prev_entry->in_transition == FALSE) && 9357 (this_entry->in_transition == FALSE) && 9358 (prev_entry->needs_wakeup == FALSE) && 9359 (this_entry->needs_wakeup == FALSE) && 9360 (prev_entry->is_shared == FALSE) && 9361 (this_entry->is_shared == FALSE) 9362 ) { 9363 _vm_map_entry_unlink(&map->hdr, prev_entry); 9364 this_entry->vme_start = prev_entry->vme_start; 9365 this_entry->offset = prev_entry->offset; 9366 if (prev_entry->is_sub_map) { 9367 vm_map_deallocate(prev_entry->object.sub_map); 9368 } else { 9369 vm_object_deallocate(prev_entry->object.vm_object); 9370 } 9371 vm_map_entry_dispose(map, prev_entry); 9372 SAVE_HINT_MAP_WRITE(map, this_entry); 9373 counter(c_vm_map_simplified++); 9374 } 9375} 9376 9377void 9378vm_map_simplify( 9379 vm_map_t map, 9380 vm_map_offset_t start) 9381{ 9382 vm_map_entry_t this_entry; 9383 9384 vm_map_lock(map); 9385 if (vm_map_lookup_entry(map, start, &this_entry)) { 9386 vm_map_simplify_entry(map, this_entry); 9387 vm_map_simplify_entry(map, this_entry->vme_next); 9388 } 9389 counter(c_vm_map_simplify_called++); 9390 vm_map_unlock(map); 9391} 9392 9393static void 9394vm_map_simplify_range( 9395 vm_map_t map, 9396 vm_map_offset_t start, 9397 vm_map_offset_t end) 9398{ 9399 vm_map_entry_t entry; 9400 9401 /* 9402 * The map should be locked (for "write") by the caller. 9403 */ 9404 9405 if (start >= end) { 9406 /* invalid address range */ 9407 return; 9408 } 9409 9410 start = vm_map_trunc_page(start); 9411 end = vm_map_round_page(end); 9412 9413 if (!vm_map_lookup_entry(map, start, &entry)) { 9414 /* "start" is not mapped and "entry" ends before "start" */ 9415 if (entry == vm_map_to_entry(map)) { 9416 /* start with first entry in the map */ 9417 entry = vm_map_first_entry(map); 9418 } else { 9419 /* start with next entry */ 9420 entry = entry->vme_next; 9421 } 9422 } 9423 9424 while (entry != vm_map_to_entry(map) && 9425 entry->vme_start <= end) { 9426 /* try and coalesce "entry" with its previous entry */ 9427 vm_map_simplify_entry(map, entry); 9428 entry = entry->vme_next; 9429 } 9430} 9431 9432 9433/* 9434 * Routine: vm_map_machine_attribute 9435 * Purpose: 9436 * Provide machine-specific attributes to mappings, 9437 * such as cachability etc. for machines that provide 9438 * them. NUMA architectures and machines with big/strange 9439 * caches will use this. 9440 * Note: 9441 * Responsibilities for locking and checking are handled here, 9442 * everything else in the pmap module. If any non-volatile 9443 * information must be kept, the pmap module should handle 9444 * it itself. [This assumes that attributes do not 9445 * need to be inherited, which seems ok to me] 9446 */ 9447kern_return_t 9448vm_map_machine_attribute( 9449 vm_map_t map, 9450 vm_map_offset_t start, 9451 vm_map_offset_t end, 9452 vm_machine_attribute_t attribute, 9453 vm_machine_attribute_val_t* value) /* IN/OUT */ 9454{ 9455 kern_return_t ret; 9456 vm_map_size_t sync_size; 9457 vm_map_entry_t entry; 9458 9459 if (start < vm_map_min(map) || end > vm_map_max(map)) 9460 return KERN_INVALID_ADDRESS; 9461 9462 /* Figure how much memory we need to flush (in page increments) */ 9463 sync_size = end - start; 9464 9465 vm_map_lock(map); 9466 9467 if (attribute != MATTR_CACHE) { 9468 /* If we don't have to find physical addresses, we */ 9469 /* don't have to do an explicit traversal here. */ 9470 ret = pmap_attribute(map->pmap, start, end-start, 9471 attribute, value); 9472 vm_map_unlock(map); 9473 return ret; 9474 } 9475 9476 ret = KERN_SUCCESS; /* Assume it all worked */ 9477 9478 while(sync_size) { 9479 if (vm_map_lookup_entry(map, start, &entry)) { 9480 vm_map_size_t sub_size; 9481 if((entry->vme_end - start) > sync_size) { 9482 sub_size = sync_size; 9483 sync_size = 0; 9484 } else { 9485 sub_size = entry->vme_end - start; 9486 sync_size -= sub_size; 9487 } 9488 if(entry->is_sub_map) { 9489 vm_map_offset_t sub_start; 9490 vm_map_offset_t sub_end; 9491 9492 sub_start = (start - entry->vme_start) 9493 + entry->offset; 9494 sub_end = sub_start + sub_size; 9495 vm_map_machine_attribute( 9496 entry->object.sub_map, 9497 sub_start, 9498 sub_end, 9499 attribute, value); 9500 } else { 9501 if(entry->object.vm_object) { 9502 vm_page_t m; 9503 vm_object_t object; 9504 vm_object_t base_object; 9505 vm_object_t last_object; 9506 vm_object_offset_t offset; 9507 vm_object_offset_t base_offset; 9508 vm_map_size_t range; 9509 range = sub_size; 9510 offset = (start - entry->vme_start) 9511 + entry->offset; 9512 base_offset = offset; 9513 object = entry->object.vm_object; 9514 base_object = object; 9515 last_object = NULL; 9516 9517 vm_object_lock(object); 9518 9519 while (range) { 9520 m = vm_page_lookup( 9521 object, offset); 9522 9523 if (m && !m->fictitious) { 9524 ret = 9525 pmap_attribute_cache_sync( 9526 m->phys_page, 9527 PAGE_SIZE, 9528 attribute, value); 9529 9530 } else if (object->shadow) { 9531 offset = offset + object->shadow_offset; 9532 last_object = object; 9533 object = object->shadow; 9534 vm_object_lock(last_object->shadow); 9535 vm_object_unlock(last_object); 9536 continue; 9537 } 9538 range -= PAGE_SIZE; 9539 9540 if (base_object != object) { 9541 vm_object_unlock(object); 9542 vm_object_lock(base_object); 9543 object = base_object; 9544 } 9545 /* Bump to the next page */ 9546 base_offset += PAGE_SIZE; 9547 offset = base_offset; 9548 } 9549 vm_object_unlock(object); 9550 } 9551 } 9552 start += sub_size; 9553 } else { 9554 vm_map_unlock(map); 9555 return KERN_FAILURE; 9556 } 9557 9558 } 9559 9560 vm_map_unlock(map); 9561 9562 return ret; 9563} 9564 9565/* 9566 * vm_map_behavior_set: 9567 * 9568 * Sets the paging reference behavior of the specified address 9569 * range in the target map. Paging reference behavior affects 9570 * how pagein operations resulting from faults on the map will be 9571 * clustered. 9572 */ 9573kern_return_t 9574vm_map_behavior_set( 9575 vm_map_t map, 9576 vm_map_offset_t start, 9577 vm_map_offset_t end, 9578 vm_behavior_t new_behavior) 9579{ 9580 register vm_map_entry_t entry; 9581 vm_map_entry_t temp_entry; 9582 9583 XPR(XPR_VM_MAP, 9584 "vm_map_behavior_set, 0x%X start 0x%X end 0x%X behavior %d", 9585 (integer_t)map, start, end, new_behavior, 0); 9586 9587 switch (new_behavior) { 9588 case VM_BEHAVIOR_DEFAULT: 9589 case VM_BEHAVIOR_RANDOM: 9590 case VM_BEHAVIOR_SEQUENTIAL: 9591 case VM_BEHAVIOR_RSEQNTL: 9592 break; 9593 case VM_BEHAVIOR_WILLNEED: 9594 case VM_BEHAVIOR_DONTNEED: 9595 new_behavior = VM_BEHAVIOR_DEFAULT; 9596 break; 9597 default: 9598 return(KERN_INVALID_ARGUMENT); 9599 } 9600 9601 vm_map_lock(map); 9602 9603 /* 9604 * The entire address range must be valid for the map. 9605 * Note that vm_map_range_check() does a 9606 * vm_map_lookup_entry() internally and returns the 9607 * entry containing the start of the address range if 9608 * the entire range is valid. 9609 */ 9610 if (vm_map_range_check(map, start, end, &temp_entry)) { 9611 entry = temp_entry; 9612 vm_map_clip_start(map, entry, start); 9613 } 9614 else { 9615 vm_map_unlock(map); 9616 return(KERN_INVALID_ADDRESS); 9617 } 9618 9619 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) { 9620 vm_map_clip_end(map, entry, end); 9621 assert(!entry->use_pmap); 9622 9623 entry->behavior = new_behavior; 9624 9625 entry = entry->vme_next; 9626 } 9627 9628 vm_map_unlock(map); 9629 return(KERN_SUCCESS); 9630} 9631 9632 9633#include <mach_kdb.h> 9634#if MACH_KDB 9635#include <ddb/db_output.h> 9636#include <vm/vm_print.h> 9637 9638#define printf db_printf 9639 9640/* 9641 * Forward declarations for internal functions. 9642 */ 9643extern void vm_map_links_print( 9644 struct vm_map_links *links); 9645 9646extern void vm_map_header_print( 9647 struct vm_map_header *header); 9648 9649extern void vm_map_entry_print( 9650 vm_map_entry_t entry); 9651 9652extern void vm_follow_entry( 9653 vm_map_entry_t entry); 9654 9655extern void vm_follow_map( 9656 vm_map_t map); 9657 9658/* 9659 * vm_map_links_print: [ debug ] 9660 */ 9661void 9662vm_map_links_print( 9663 struct vm_map_links *links) 9664{ 9665 iprintf("prev = %08X next = %08X start = %016llX end = %016llX\n", 9666 links->prev, 9667 links->next, 9668 (unsigned long long)links->start, 9669 (unsigned long long)links->end); 9670} 9671 9672/* 9673 * vm_map_header_print: [ debug ] 9674 */ 9675void 9676vm_map_header_print( 9677 struct vm_map_header *header) 9678{ 9679 vm_map_links_print(&header->links); 9680 iprintf("nentries = %08X, %sentries_pageable\n", 9681 header->nentries, 9682 (header->entries_pageable ? "" : "!")); 9683} 9684 9685/* 9686 * vm_follow_entry: [ debug ] 9687 */ 9688void 9689vm_follow_entry( 9690 vm_map_entry_t entry) 9691{ 9692 int shadows; 9693 9694 iprintf("map entry %08X\n", entry); 9695 9696 db_indent += 2; 9697 9698 shadows = vm_follow_object(entry->object.vm_object); 9699 iprintf("Total objects : %d\n",shadows); 9700 9701 db_indent -= 2; 9702} 9703 9704/* 9705 * vm_map_entry_print: [ debug ] 9706 */ 9707void 9708vm_map_entry_print( 9709 register vm_map_entry_t entry) 9710{ 9711 static const char *inheritance_name[4] = 9712 { "share", "copy", "none", "?"}; 9713 static const char *behavior_name[4] = 9714 { "dflt", "rand", "seqtl", "rseqntl" }; 9715 9716 iprintf("map entry %08X - prev = %08X next = %08X\n", entry, entry->vme_prev, entry->vme_next); 9717 9718 db_indent += 2; 9719 9720 vm_map_links_print(&entry->links); 9721 9722 iprintf("start = %016llX end = %016llX - prot=%x/%x/%s\n", 9723 (unsigned long long)entry->vme_start, 9724 (unsigned long long)entry->vme_end, 9725 entry->protection, 9726 entry->max_protection, 9727 inheritance_name[(entry->inheritance & 0x3)]); 9728 9729 iprintf("behavior = %s, wired_count = %d, user_wired_count = %d\n", 9730 behavior_name[(entry->behavior & 0x3)], 9731 entry->wired_count, 9732 entry->user_wired_count); 9733 iprintf("%sin_transition, %sneeds_wakeup\n", 9734 (entry->in_transition ? "" : "!"), 9735 (entry->needs_wakeup ? "" : "!")); 9736 9737 if (entry->is_sub_map) { 9738 iprintf("submap = %08X - offset = %016llX\n", 9739 entry->object.sub_map, 9740 (unsigned long long)entry->offset); 9741 } else { 9742 iprintf("object = %08X offset = %016llX - ", 9743 entry->object.vm_object, 9744 (unsigned long long)entry->offset); 9745 printf("%sis_shared, %sneeds_copy\n", 9746 (entry->is_shared ? "" : "!"), 9747 (entry->needs_copy ? "" : "!")); 9748 } 9749 9750 db_indent -= 2; 9751} 9752 9753/* 9754 * vm_follow_map: [ debug ] 9755 */ 9756void 9757vm_follow_map( 9758 vm_map_t map) 9759{ 9760 register vm_map_entry_t entry; 9761 9762 iprintf("task map %08X\n", map); 9763 9764 db_indent += 2; 9765 9766 for (entry = vm_map_first_entry(map); 9767 entry && entry != vm_map_to_entry(map); 9768 entry = entry->vme_next) { 9769 vm_follow_entry(entry); 9770 } 9771 9772 db_indent -= 2; 9773} 9774 9775/* 9776 * vm_map_print: [ debug ] 9777 */ 9778void 9779vm_map_print( 9780 db_addr_t inmap) 9781{ 9782 register vm_map_entry_t entry; 9783 vm_map_t map; 9784#if TASK_SWAPPER 9785 char *swstate; 9786#endif /* TASK_SWAPPER */ 9787 9788 map = (vm_map_t)(long) 9789 inmap; /* Make sure we have the right type */ 9790 9791 iprintf("task map %08X\n", map); 9792 9793 db_indent += 2; 9794 9795 vm_map_header_print(&map->hdr); 9796 9797 iprintf("pmap = %08X size = %08X ref = %d hint = %08X first_free = %08X\n", 9798 map->pmap, 9799 map->size, 9800 map->ref_count, 9801 map->hint, 9802 map->first_free); 9803 9804 iprintf("%swait_for_space, %swiring_required, timestamp = %d\n", 9805 (map->wait_for_space ? "" : "!"), 9806 (map->wiring_required ? "" : "!"), 9807 map->timestamp); 9808 9809#if TASK_SWAPPER 9810 switch (map->sw_state) { 9811 case MAP_SW_IN: 9812 swstate = "SW_IN"; 9813 break; 9814 case MAP_SW_OUT: 9815 swstate = "SW_OUT"; 9816 break; 9817 default: 9818 swstate = "????"; 9819 break; 9820 } 9821 iprintf("res = %d, sw_state = %s\n", map->res_count, swstate); 9822#endif /* TASK_SWAPPER */ 9823 9824 for (entry = vm_map_first_entry(map); 9825 entry && entry != vm_map_to_entry(map); 9826 entry = entry->vme_next) { 9827 vm_map_entry_print(entry); 9828 } 9829 9830 db_indent -= 2; 9831} 9832 9833/* 9834 * Routine: vm_map_copy_print 9835 * Purpose: 9836 * Pretty-print a copy object for ddb. 9837 */ 9838 9839void 9840vm_map_copy_print( 9841 db_addr_t incopy) 9842{ 9843 vm_map_copy_t copy; 9844 vm_map_entry_t entry; 9845 9846 copy = (vm_map_copy_t)(long) 9847 incopy; /* Make sure we have the right type */ 9848 9849 printf("copy object 0x%x\n", copy); 9850 9851 db_indent += 2; 9852 9853 iprintf("type=%d", copy->type); 9854 switch (copy->type) { 9855 case VM_MAP_COPY_ENTRY_LIST: 9856 printf("[entry_list]"); 9857 break; 9858 9859 case VM_MAP_COPY_OBJECT: 9860 printf("[object]"); 9861 break; 9862 9863 case VM_MAP_COPY_KERNEL_BUFFER: 9864 printf("[kernel_buffer]"); 9865 break; 9866 9867 default: 9868 printf("[bad type]"); 9869 break; 9870 } 9871 printf(", offset=0x%llx", (unsigned long long)copy->offset); 9872 printf(", size=0x%x\n", copy->size); 9873 9874 switch (copy->type) { 9875 case VM_MAP_COPY_ENTRY_LIST: 9876 vm_map_header_print(©->cpy_hdr); 9877 for (entry = vm_map_copy_first_entry(copy); 9878 entry && entry != vm_map_copy_to_entry(copy); 9879 entry = entry->vme_next) { 9880 vm_map_entry_print(entry); 9881 } 9882 break; 9883 9884 case VM_MAP_COPY_OBJECT: 9885 iprintf("object=0x%x\n", copy->cpy_object); 9886 break; 9887 9888 case VM_MAP_COPY_KERNEL_BUFFER: 9889 iprintf("kernel buffer=0x%x", copy->cpy_kdata); 9890 printf(", kalloc_size=0x%x\n", copy->cpy_kalloc_size); 9891 break; 9892 9893 } 9894 9895 db_indent -=2; 9896} 9897 9898/* 9899 * db_vm_map_total_size(map) [ debug ] 9900 * 9901 * return the total virtual size (in bytes) of the map 9902 */ 9903vm_map_size_t 9904db_vm_map_total_size( 9905 db_addr_t inmap) 9906{ 9907 vm_map_entry_t entry; 9908 vm_map_size_t total; 9909 vm_map_t map; 9910 9911 map = (vm_map_t)(long) 9912 inmap; /* Make sure we have the right type */ 9913 9914 total = 0; 9915 for (entry = vm_map_first_entry(map); 9916 entry != vm_map_to_entry(map); 9917 entry = entry->vme_next) { 9918 total += entry->vme_end - entry->vme_start; 9919 } 9920 9921 return total; 9922} 9923 9924#endif /* MACH_KDB */ 9925 9926/* 9927 * Routine: vm_map_entry_insert 9928 * 9929 * Descritpion: This routine inserts a new vm_entry in a locked map. 9930 */ 9931vm_map_entry_t 9932vm_map_entry_insert( 9933 vm_map_t map, 9934 vm_map_entry_t insp_entry, 9935 vm_map_offset_t start, 9936 vm_map_offset_t end, 9937 vm_object_t object, 9938 vm_object_offset_t offset, 9939 boolean_t needs_copy, 9940 boolean_t is_shared, 9941 boolean_t in_transition, 9942 vm_prot_t cur_protection, 9943 vm_prot_t max_protection, 9944 vm_behavior_t behavior, 9945 vm_inherit_t inheritance, 9946 unsigned wired_count, 9947 boolean_t no_cache) 9948{ 9949 vm_map_entry_t new_entry; 9950 9951 assert(insp_entry != (vm_map_entry_t)0); 9952 9953 new_entry = vm_map_entry_create(map); 9954 9955 new_entry->vme_start = start; 9956 new_entry->vme_end = end; 9957 assert(page_aligned(new_entry->vme_start)); 9958 assert(page_aligned(new_entry->vme_end)); 9959 9960 new_entry->object.vm_object = object; 9961 new_entry->offset = offset; 9962 new_entry->is_shared = is_shared; 9963 new_entry->is_sub_map = FALSE; 9964 new_entry->needs_copy = needs_copy; 9965 new_entry->in_transition = in_transition; 9966 new_entry->needs_wakeup = FALSE; 9967 new_entry->inheritance = inheritance; 9968 new_entry->protection = cur_protection; 9969 new_entry->max_protection = max_protection; 9970 new_entry->behavior = behavior; 9971 new_entry->wired_count = wired_count; 9972 new_entry->user_wired_count = 0; 9973 new_entry->use_pmap = FALSE; 9974 new_entry->alias = 0; 9975 new_entry->no_cache = no_cache; 9976 9977 /* 9978 * Insert the new entry into the list. 9979 */ 9980 9981 vm_map_entry_link(map, insp_entry, new_entry); 9982 map->size += end - start; 9983 9984 /* 9985 * Update the free space hint and the lookup hint. 9986 */ 9987 9988 SAVE_HINT_MAP_WRITE(map, new_entry); 9989 return new_entry; 9990} 9991 9992/* 9993 * Routine: vm_map_remap_extract 9994 * 9995 * Descritpion: This routine returns a vm_entry list from a map. 9996 */ 9997static kern_return_t 9998vm_map_remap_extract( 9999 vm_map_t map, 10000 vm_map_offset_t addr, 10001 vm_map_size_t size, 10002 boolean_t copy, 10003 struct vm_map_header *map_header, 10004 vm_prot_t *cur_protection, 10005 vm_prot_t *max_protection, 10006 /* What, no behavior? */ 10007 vm_inherit_t inheritance, 10008 boolean_t pageable) 10009{ 10010 kern_return_t result; 10011 vm_map_size_t mapped_size; 10012 vm_map_size_t tmp_size; 10013 vm_map_entry_t src_entry; /* result of last map lookup */ 10014 vm_map_entry_t new_entry; 10015 vm_object_offset_t offset; 10016 vm_map_offset_t map_address; 10017 vm_map_offset_t src_start; /* start of entry to map */ 10018 vm_map_offset_t src_end; /* end of region to be mapped */ 10019 vm_object_t object; 10020 vm_map_version_t version; 10021 boolean_t src_needs_copy; 10022 boolean_t new_entry_needs_copy; 10023 10024 assert(map != VM_MAP_NULL); 10025 assert(size != 0 && size == vm_map_round_page(size)); 10026 assert(inheritance == VM_INHERIT_NONE || 10027 inheritance == VM_INHERIT_COPY || 10028 inheritance == VM_INHERIT_SHARE); 10029 10030 /* 10031 * Compute start and end of region. 10032 */ 10033 src_start = vm_map_trunc_page(addr); 10034 src_end = vm_map_round_page(src_start + size); 10035 10036 /* 10037 * Initialize map_header. 10038 */ 10039 map_header->links.next = (struct vm_map_entry *)&map_header->links; 10040 map_header->links.prev = (struct vm_map_entry *)&map_header->links; 10041 map_header->nentries = 0; 10042 map_header->entries_pageable = pageable; 10043 10044 *cur_protection = VM_PROT_ALL; 10045 *max_protection = VM_PROT_ALL; 10046 10047 map_address = 0; 10048 mapped_size = 0; 10049 result = KERN_SUCCESS; 10050 10051 /* 10052 * The specified source virtual space might correspond to 10053 * multiple map entries, need to loop on them. 10054 */ 10055 vm_map_lock(map); 10056 while (mapped_size != size) { 10057 vm_map_size_t entry_size; 10058 10059 /* 10060 * Find the beginning of the region. 10061 */ 10062 if (! vm_map_lookup_entry(map, src_start, &src_entry)) { 10063 result = KERN_INVALID_ADDRESS; 10064 break; 10065 } 10066 10067 if (src_start < src_entry->vme_start || 10068 (mapped_size && src_start != src_entry->vme_start)) { 10069 result = KERN_INVALID_ADDRESS; 10070 break; 10071 } 10072 10073 if(src_entry->is_sub_map) { 10074 result = KERN_INVALID_ADDRESS; 10075 break; 10076 } 10077 10078 tmp_size = size - mapped_size; 10079 if (src_end > src_entry->vme_end) 10080 tmp_size -= (src_end - src_entry->vme_end); 10081 10082 entry_size = (vm_map_size_t)(src_entry->vme_end - 10083 src_entry->vme_start); 10084 10085 if(src_entry->is_sub_map) { 10086 vm_map_reference(src_entry->object.sub_map); 10087 object = VM_OBJECT_NULL; 10088 } else { 10089 object = src_entry->object.vm_object; 10090 10091 if (object == VM_OBJECT_NULL) { 10092 object = vm_object_allocate(entry_size); 10093 src_entry->offset = 0; 10094 src_entry->object.vm_object = object; 10095 } else if (object->copy_strategy != 10096 MEMORY_OBJECT_COPY_SYMMETRIC) { 10097 /* 10098 * We are already using an asymmetric 10099 * copy, and therefore we already have 10100 * the right object. 10101 */ 10102 assert(!src_entry->needs_copy); 10103 } else if (src_entry->needs_copy || object->shadowed || 10104 (object->internal && !object->true_share && 10105 !src_entry->is_shared && 10106 object->size > entry_size)) { 10107 10108 vm_object_shadow(&src_entry->object.vm_object, 10109 &src_entry->offset, 10110 entry_size); 10111 10112 if (!src_entry->needs_copy && 10113 (src_entry->protection & VM_PROT_WRITE)) { 10114 vm_prot_t prot; 10115 10116 prot = src_entry->protection & ~VM_PROT_WRITE; 10117 10118 if (override_nx(map, src_entry->alias) && prot) 10119 prot |= VM_PROT_EXECUTE; 10120 10121 if(map->mapped) { 10122 vm_object_pmap_protect( 10123 src_entry->object.vm_object, 10124 src_entry->offset, 10125 entry_size, 10126 PMAP_NULL, 10127 src_entry->vme_start, 10128 prot); 10129 } else { 10130 pmap_protect(vm_map_pmap(map), 10131 src_entry->vme_start, 10132 src_entry->vme_end, 10133 prot); 10134 } 10135 } 10136 10137 object = src_entry->object.vm_object; 10138 src_entry->needs_copy = FALSE; 10139 } 10140 10141 10142 vm_object_lock(object); 10143 vm_object_reference_locked(object); /* object ref. for new entry */ 10144 if (object->copy_strategy == 10145 MEMORY_OBJECT_COPY_SYMMETRIC) { 10146 object->copy_strategy = 10147 MEMORY_OBJECT_COPY_DELAY; 10148 } 10149 vm_object_unlock(object); 10150 } 10151 10152 offset = src_entry->offset + (src_start - src_entry->vme_start); 10153 10154 new_entry = _vm_map_entry_create(map_header); 10155 vm_map_entry_copy(new_entry, src_entry); 10156 new_entry->use_pmap = FALSE; /* clr address space specifics */ 10157 10158 new_entry->vme_start = map_address; 10159 new_entry->vme_end = map_address + tmp_size; 10160 new_entry->inheritance = inheritance; 10161 new_entry->offset = offset; 10162 10163 /* 10164 * The new region has to be copied now if required. 10165 */ 10166 RestartCopy: 10167 if (!copy) { 10168 src_entry->is_shared = TRUE; 10169 new_entry->is_shared = TRUE; 10170 if (!(new_entry->is_sub_map)) 10171 new_entry->needs_copy = FALSE; 10172 10173 } else if (src_entry->is_sub_map) { 10174 /* make this a COW sub_map if not already */ 10175 new_entry->needs_copy = TRUE; 10176 object = VM_OBJECT_NULL; 10177 } else if (src_entry->wired_count == 0 && 10178 vm_object_copy_quickly(&new_entry->object.vm_object, 10179 new_entry->offset, 10180 (new_entry->vme_end - 10181 new_entry->vme_start), 10182 &src_needs_copy, 10183 &new_entry_needs_copy)) { 10184 10185 new_entry->needs_copy = new_entry_needs_copy; 10186 new_entry->is_shared = FALSE; 10187 10188 /* 10189 * Handle copy_on_write semantics. 10190 */ 10191 if (src_needs_copy && !src_entry->needs_copy) { 10192 vm_prot_t prot; 10193 10194 prot = src_entry->protection & ~VM_PROT_WRITE; 10195 10196 if (override_nx(map, src_entry->alias) && prot) 10197 prot |= VM_PROT_EXECUTE; 10198 10199 vm_object_pmap_protect(object, 10200 offset, 10201 entry_size, 10202 ((src_entry->is_shared 10203 || map->mapped) ? 10204 PMAP_NULL : map->pmap), 10205 src_entry->vme_start, 10206 prot); 10207 10208 src_entry->needs_copy = TRUE; 10209 } 10210 /* 10211 * Throw away the old object reference of the new entry. 10212 */ 10213 vm_object_deallocate(object); 10214 10215 } else { 10216 new_entry->is_shared = FALSE; 10217 10218 /* 10219 * The map can be safely unlocked since we 10220 * already hold a reference on the object. 10221 * 10222 * Record the timestamp of the map for later 10223 * verification, and unlock the map. 10224 */ 10225 version.main_timestamp = map->timestamp; 10226 vm_map_unlock(map); /* Increments timestamp once! */ 10227 10228 /* 10229 * Perform the copy. 10230 */ 10231 if (src_entry->wired_count > 0) { 10232 vm_object_lock(object); 10233 result = vm_object_copy_slowly( 10234 object, 10235 offset, 10236 entry_size, 10237 THREAD_UNINT, 10238 &new_entry->object.vm_object); 10239 10240 new_entry->offset = 0; 10241 new_entry->needs_copy = FALSE; 10242 } else { 10243 result = vm_object_copy_strategically( 10244 object, 10245 offset, 10246 entry_size, 10247 &new_entry->object.vm_object, 10248 &new_entry->offset, 10249 &new_entry_needs_copy); 10250 10251 new_entry->needs_copy = new_entry_needs_copy; 10252 } 10253 10254 /* 10255 * Throw away the old object reference of the new entry. 10256 */ 10257 vm_object_deallocate(object); 10258 10259 if (result != KERN_SUCCESS && 10260 result != KERN_MEMORY_RESTART_COPY) { 10261 _vm_map_entry_dispose(map_header, new_entry); 10262 break; 10263 } 10264 10265 /* 10266 * Verify that the map has not substantially 10267 * changed while the copy was being made. 10268 */ 10269 10270 vm_map_lock(map); 10271 if (version.main_timestamp + 1 != map->timestamp) { 10272 /* 10273 * Simple version comparison failed. 10274 * 10275 * Retry the lookup and verify that the 10276 * same object/offset are still present. 10277 */ 10278 vm_object_deallocate(new_entry-> 10279 object.vm_object); 10280 _vm_map_entry_dispose(map_header, new_entry); 10281 if (result == KERN_MEMORY_RESTART_COPY) 10282 result = KERN_SUCCESS; 10283 continue; 10284 } 10285 10286 if (result == KERN_MEMORY_RESTART_COPY) { 10287 vm_object_reference(object); 10288 goto RestartCopy; 10289 } 10290 } 10291 10292 _vm_map_entry_link(map_header, 10293 map_header->links.prev, new_entry); 10294 10295 *cur_protection &= src_entry->protection; 10296 *max_protection &= src_entry->max_protection; 10297 10298 map_address += tmp_size; 10299 mapped_size += tmp_size; 10300 src_start += tmp_size; 10301 10302 } /* end while */ 10303 10304 vm_map_unlock(map); 10305 if (result != KERN_SUCCESS) { 10306 /* 10307 * Free all allocated elements. 10308 */ 10309 for (src_entry = map_header->links.next; 10310 src_entry != (struct vm_map_entry *)&map_header->links; 10311 src_entry = new_entry) { 10312 new_entry = src_entry->vme_next; 10313 _vm_map_entry_unlink(map_header, src_entry); 10314 vm_object_deallocate(src_entry->object.vm_object); 10315 _vm_map_entry_dispose(map_header, src_entry); 10316 } 10317 } 10318 return result; 10319} 10320 10321/* 10322 * Routine: vm_remap 10323 * 10324 * Map portion of a task's address space. 10325 * Mapped region must not overlap more than 10326 * one vm memory object. Protections and 10327 * inheritance attributes remain the same 10328 * as in the original task and are out parameters. 10329 * Source and Target task can be identical 10330 * Other attributes are identical as for vm_map() 10331 */ 10332kern_return_t 10333vm_map_remap( 10334 vm_map_t target_map, 10335 vm_map_address_t *address, 10336 vm_map_size_t size, 10337 vm_map_offset_t mask, 10338 boolean_t anywhere, 10339 vm_map_t src_map, 10340 vm_map_offset_t memory_address, 10341 boolean_t copy, 10342 vm_prot_t *cur_protection, 10343 vm_prot_t *max_protection, 10344 vm_inherit_t inheritance) 10345{ 10346 kern_return_t result; 10347 vm_map_entry_t entry; 10348 vm_map_entry_t insp_entry = VM_MAP_ENTRY_NULL; 10349 vm_map_entry_t new_entry; 10350 struct vm_map_header map_header; 10351 10352 if (target_map == VM_MAP_NULL) 10353 return KERN_INVALID_ARGUMENT; 10354 10355 switch (inheritance) { 10356 case VM_INHERIT_NONE: 10357 case VM_INHERIT_COPY: 10358 case VM_INHERIT_SHARE: 10359 if (size != 0 && src_map != VM_MAP_NULL) 10360 break; 10361 /*FALL THRU*/ 10362 default: 10363 return KERN_INVALID_ARGUMENT; 10364 } 10365 10366 size = vm_map_round_page(size); 10367 10368 result = vm_map_remap_extract(src_map, memory_address, 10369 size, copy, &map_header, 10370 cur_protection, 10371 max_protection, 10372 inheritance, 10373 target_map->hdr. 10374 entries_pageable); 10375 10376 if (result != KERN_SUCCESS) { 10377 return result; 10378 } 10379 10380 /* 10381 * Allocate/check a range of free virtual address 10382 * space for the target 10383 */ 10384 *address = vm_map_trunc_page(*address); 10385 vm_map_lock(target_map); 10386 result = vm_map_remap_range_allocate(target_map, address, size, 10387 mask, anywhere, &insp_entry); 10388 10389 for (entry = map_header.links.next; 10390 entry != (struct vm_map_entry *)&map_header.links; 10391 entry = new_entry) { 10392 new_entry = entry->vme_next; 10393 _vm_map_entry_unlink(&map_header, entry); 10394 if (result == KERN_SUCCESS) { 10395 entry->vme_start += *address; 10396 entry->vme_end += *address; 10397 vm_map_entry_link(target_map, insp_entry, entry); 10398 insp_entry = entry; 10399 } else { 10400 if (!entry->is_sub_map) { 10401 vm_object_deallocate(entry->object.vm_object); 10402 } else { 10403 vm_map_deallocate(entry->object.sub_map); 10404 } 10405 _vm_map_entry_dispose(&map_header, entry); 10406 } 10407 } 10408 10409 if (result == KERN_SUCCESS) { 10410 target_map->size += size; 10411 SAVE_HINT_MAP_WRITE(target_map, insp_entry); 10412 } 10413 vm_map_unlock(target_map); 10414 10415 if (result == KERN_SUCCESS && target_map->wiring_required) 10416 result = vm_map_wire(target_map, *address, 10417 *address + size, *cur_protection, TRUE); 10418 return result; 10419} 10420 10421/* 10422 * Routine: vm_map_remap_range_allocate 10423 * 10424 * Description: 10425 * Allocate a range in the specified virtual address map. 10426 * returns the address and the map entry just before the allocated 10427 * range 10428 * 10429 * Map must be locked. 10430 */ 10431 10432static kern_return_t 10433vm_map_remap_range_allocate( 10434 vm_map_t map, 10435 vm_map_address_t *address, /* IN/OUT */ 10436 vm_map_size_t size, 10437 vm_map_offset_t mask, 10438 boolean_t anywhere, 10439 vm_map_entry_t *map_entry) /* OUT */ 10440{ 10441 register vm_map_entry_t entry; 10442 register vm_map_offset_t start; 10443 register vm_map_offset_t end; 10444 10445StartAgain: ; 10446 10447 start = *address; 10448 10449 if (anywhere) 10450 { 10451 /* 10452 * Calculate the first possible address. 10453 */ 10454 10455 if (start < map->min_offset) 10456 start = map->min_offset; 10457 if (start > map->max_offset) 10458 return(KERN_NO_SPACE); 10459 10460 /* 10461 * Look for the first possible address; 10462 * if there's already something at this 10463 * address, we have to start after it. 10464 */ 10465 10466 assert(first_free_is_valid(map)); 10467 if (start == map->min_offset) { 10468 if ((entry = map->first_free) != vm_map_to_entry(map)) 10469 start = entry->vme_end; 10470 } else { 10471 vm_map_entry_t tmp_entry; 10472 if (vm_map_lookup_entry(map, start, &tmp_entry)) 10473 start = tmp_entry->vme_end; 10474 entry = tmp_entry; 10475 } 10476 10477 /* 10478 * In any case, the "entry" always precedes 10479 * the proposed new region throughout the 10480 * loop: 10481 */ 10482 10483 while (TRUE) { 10484 register vm_map_entry_t next; 10485 10486 /* 10487 * Find the end of the proposed new region. 10488 * Be sure we didn't go beyond the end, or 10489 * wrap around the address. 10490 */ 10491 10492 end = ((start + mask) & ~mask); 10493 if (end < start) 10494 return(KERN_NO_SPACE); 10495 start = end; 10496 end += size; 10497 10498 if ((end > map->max_offset) || (end < start)) { 10499 if (map->wait_for_space) { 10500 if (size <= (map->max_offset - 10501 map->min_offset)) { 10502 assert_wait((event_t) map, THREAD_INTERRUPTIBLE); 10503 vm_map_unlock(map); 10504 thread_block(THREAD_CONTINUE_NULL); 10505 vm_map_lock(map); 10506 goto StartAgain; 10507 } 10508 } 10509 10510 return(KERN_NO_SPACE); 10511 } 10512 10513 /* 10514 * If there are no more entries, we must win. 10515 */ 10516 10517 next = entry->vme_next; 10518 if (next == vm_map_to_entry(map)) 10519 break; 10520 10521 /* 10522 * If there is another entry, it must be 10523 * after the end of the potential new region. 10524 */ 10525 10526 if (next->vme_start >= end) 10527 break; 10528 10529 /* 10530 * Didn't fit -- move to the next entry. 10531 */ 10532 10533 entry = next; 10534 start = entry->vme_end; 10535 } 10536 *address = start; 10537 } else { 10538 vm_map_entry_t temp_entry; 10539 10540 /* 10541 * Verify that: 10542 * the address doesn't itself violate 10543 * the mask requirement. 10544 */ 10545 10546 if ((start & mask) != 0) 10547 return(KERN_NO_SPACE); 10548 10549 10550 /* 10551 * ... the address is within bounds 10552 */ 10553 10554 end = start + size; 10555 10556 if ((start < map->min_offset) || 10557 (end > map->max_offset) || 10558 (start >= end)) { 10559 return(KERN_INVALID_ADDRESS); 10560 } 10561 10562 /* 10563 * ... the starting address isn't allocated 10564 */ 10565 10566 if (vm_map_lookup_entry(map, start, &temp_entry)) 10567 return(KERN_NO_SPACE); 10568 10569 entry = temp_entry; 10570 10571 /* 10572 * ... the next region doesn't overlap the 10573 * end point. 10574 */ 10575 10576 if ((entry->vme_next != vm_map_to_entry(map)) && 10577 (entry->vme_next->vme_start < end)) 10578 return(KERN_NO_SPACE); 10579 } 10580 *map_entry = entry; 10581 return(KERN_SUCCESS); 10582} 10583 10584/* 10585 * vm_map_switch: 10586 * 10587 * Set the address map for the current thread to the specified map 10588 */ 10589 10590vm_map_t 10591vm_map_switch( 10592 vm_map_t map) 10593{ 10594 int mycpu; 10595 thread_t thread = current_thread(); 10596 vm_map_t oldmap = thread->map; 10597 10598 mp_disable_preemption(); 10599 mycpu = cpu_number(); 10600 10601 /* 10602 * Deactivate the current map and activate the requested map 10603 */ 10604 PMAP_SWITCH_USER(thread, map, mycpu); 10605 10606 mp_enable_preemption(); 10607 return(oldmap); 10608} 10609 10610 10611/* 10612 * Routine: vm_map_write_user 10613 * 10614 * Description: 10615 * Copy out data from a kernel space into space in the 10616 * destination map. The space must already exist in the 10617 * destination map. 10618 * NOTE: This routine should only be called by threads 10619 * which can block on a page fault. i.e. kernel mode user 10620 * threads. 10621 * 10622 */ 10623kern_return_t 10624vm_map_write_user( 10625 vm_map_t map, 10626 void *src_p, 10627 vm_map_address_t dst_addr, 10628 vm_size_t size) 10629{ 10630 kern_return_t kr = KERN_SUCCESS; 10631 10632 if(current_map() == map) { 10633 if (copyout(src_p, dst_addr, size)) { 10634 kr = KERN_INVALID_ADDRESS; 10635 } 10636 } else { 10637 vm_map_t oldmap; 10638 10639 /* take on the identity of the target map while doing */ 10640 /* the transfer */ 10641 10642 vm_map_reference(map); 10643 oldmap = vm_map_switch(map); 10644 if (copyout(src_p, dst_addr, size)) { 10645 kr = KERN_INVALID_ADDRESS; 10646 } 10647 vm_map_switch(oldmap); 10648 vm_map_deallocate(map); 10649 } 10650 return kr; 10651} 10652 10653/* 10654 * Routine: vm_map_read_user 10655 * 10656 * Description: 10657 * Copy in data from a user space source map into the 10658 * kernel map. The space must already exist in the 10659 * kernel map. 10660 * NOTE: This routine should only be called by threads 10661 * which can block on a page fault. i.e. kernel mode user 10662 * threads. 10663 * 10664 */ 10665kern_return_t 10666vm_map_read_user( 10667 vm_map_t map, 10668 vm_map_address_t src_addr, 10669 void *dst_p, 10670 vm_size_t size) 10671{ 10672 kern_return_t kr = KERN_SUCCESS; 10673 10674 if(current_map() == map) { 10675 if (copyin(src_addr, dst_p, size)) { 10676 kr = KERN_INVALID_ADDRESS; 10677 } 10678 } else { 10679 vm_map_t oldmap; 10680 10681 /* take on the identity of the target map while doing */ 10682 /* the transfer */ 10683 10684 vm_map_reference(map); 10685 oldmap = vm_map_switch(map); 10686 if (copyin(src_addr, dst_p, size)) { 10687 kr = KERN_INVALID_ADDRESS; 10688 } 10689 vm_map_switch(oldmap); 10690 vm_map_deallocate(map); 10691 } 10692 return kr; 10693} 10694 10695 10696/* 10697 * vm_map_check_protection: 10698 * 10699 * Assert that the target map allows the specified 10700 * privilege on the entire address region given. 10701 * The entire region must be allocated. 10702 */ 10703boolean_t 10704vm_map_check_protection(vm_map_t map, vm_map_offset_t start, 10705 vm_map_offset_t end, vm_prot_t protection) 10706{ 10707 vm_map_entry_t entry; 10708 vm_map_entry_t tmp_entry; 10709 10710 vm_map_lock(map); 10711 10712 if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) 10713 { 10714 vm_map_unlock(map); 10715 return (FALSE); 10716 } 10717 10718 if (!vm_map_lookup_entry(map, start, &tmp_entry)) { 10719 vm_map_unlock(map); 10720 return(FALSE); 10721 } 10722 10723 entry = tmp_entry; 10724 10725 while (start < end) { 10726 if (entry == vm_map_to_entry(map)) { 10727 vm_map_unlock(map); 10728 return(FALSE); 10729 } 10730 10731 /* 10732 * No holes allowed! 10733 */ 10734 10735 if (start < entry->vme_start) { 10736 vm_map_unlock(map); 10737 return(FALSE); 10738 } 10739 10740 /* 10741 * Check protection associated with entry. 10742 */ 10743 10744 if ((entry->protection & protection) != protection) { 10745 vm_map_unlock(map); 10746 return(FALSE); 10747 } 10748 10749 /* go to next entry */ 10750 10751 start = entry->vme_end; 10752 entry = entry->vme_next; 10753 } 10754 vm_map_unlock(map); 10755 return(TRUE); 10756} 10757 10758kern_return_t 10759vm_map_purgable_control( 10760 vm_map_t map, 10761 vm_map_offset_t address, 10762 vm_purgable_t control, 10763 int *state) 10764{ 10765 vm_map_entry_t entry; 10766 vm_object_t object; 10767 kern_return_t kr; 10768 10769 /* 10770 * Vet all the input parameters and current type and state of the 10771 * underlaying object. Return with an error if anything is amiss. 10772 */ 10773 if (map == VM_MAP_NULL) 10774 return(KERN_INVALID_ARGUMENT); 10775 10776 if (control != VM_PURGABLE_SET_STATE && 10777 control != VM_PURGABLE_GET_STATE) 10778 return(KERN_INVALID_ARGUMENT); 10779 10780 if (control == VM_PURGABLE_SET_STATE && 10781 (((*state & ~(VM_PURGABLE_STATE_MASK|VM_VOLATILE_ORDER_MASK|VM_PURGABLE_ORDERING_MASK|VM_PURGABLE_BEHAVIOR_MASK|VM_VOLATILE_GROUP_MASK)) != 0) || 10782 ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) 10783 return(KERN_INVALID_ARGUMENT); 10784 10785 vm_map_lock(map); 10786 10787 if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) { 10788 10789 /* 10790 * Must pass a valid non-submap address. 10791 */ 10792 vm_map_unlock(map); 10793 return(KERN_INVALID_ADDRESS); 10794 } 10795 10796 if ((entry->protection & VM_PROT_WRITE) == 0) { 10797 /* 10798 * Can't apply purgable controls to something you can't write. 10799 */ 10800 vm_map_unlock(map); 10801 return(KERN_PROTECTION_FAILURE); 10802 } 10803 10804 object = entry->object.vm_object; 10805 if (object == VM_OBJECT_NULL) { 10806 /* 10807 * Object must already be present or it can't be purgable. 10808 */ 10809 vm_map_unlock(map); 10810 return KERN_INVALID_ARGUMENT; 10811 } 10812 10813 vm_object_lock(object); 10814 10815 if (entry->offset != 0 || 10816 entry->vme_end - entry->vme_start != object->size) { 10817 /* 10818 * Can only apply purgable controls to the whole (existing) 10819 * object at once. 10820 */ 10821 vm_map_unlock(map); 10822 vm_object_unlock(object); 10823 return KERN_INVALID_ARGUMENT; 10824 } 10825 10826 vm_map_unlock(map); 10827 10828 kr = vm_object_purgable_control(object, control, state); 10829 10830 vm_object_unlock(object); 10831 10832 return kr; 10833} 10834 10835kern_return_t 10836vm_map_page_info( 10837 vm_map_t target_map, 10838 vm_map_offset_t offset, 10839 int *disposition, 10840 int *ref_count) 10841{ 10842 vm_map_entry_t map_entry; 10843 vm_object_t object; 10844 vm_page_t m; 10845 kern_return_t kr; 10846 kern_return_t retval = KERN_SUCCESS; 10847 boolean_t top_object = TRUE; 10848 10849 *disposition = 0; 10850 *ref_count = 0; 10851 10852 vm_map_lock_read(target_map); 10853 10854restart_page_query: 10855 if (!vm_map_lookup_entry(target_map, offset, &map_entry)) { 10856 vm_map_unlock_read(target_map); 10857 return KERN_FAILURE; 10858 } 10859 offset -= map_entry->vme_start; /* adjust to offset within entry */ 10860 offset += map_entry->offset; /* adjust to target object offset */ 10861 10862 if (map_entry->object.vm_object != VM_OBJECT_NULL) { 10863 if (!map_entry->is_sub_map) { 10864 object = map_entry->object.vm_object; 10865 } else { 10866 vm_map_t sub_map; 10867 10868 sub_map = map_entry->object.sub_map; 10869 vm_map_lock_read(sub_map); 10870 vm_map_unlock_read(target_map); 10871 10872 target_map = sub_map; 10873 goto restart_page_query; 10874 } 10875 } else { 10876 vm_map_unlock_read(target_map); 10877 return KERN_SUCCESS; 10878 } 10879 vm_object_lock(object); 10880 vm_map_unlock_read(target_map); 10881 10882 while (TRUE) { 10883 m = vm_page_lookup(object, offset); 10884 10885 if (m != VM_PAGE_NULL) { 10886 *disposition |= VM_PAGE_QUERY_PAGE_PRESENT; 10887 break; 10888 } else { 10889#if MACH_PAGEMAP 10890 if (object->existence_map) { 10891 if (vm_external_state_get(object->existence_map, offset) 10892 == VM_EXTERNAL_STATE_EXISTS) { 10893 /* 10894 * this page has been paged out 10895 */ 10896 *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; 10897 break; 10898 } 10899 } else 10900#endif 10901 if (object->internal && 10902 object->alive && 10903 !object->terminating && 10904 object->pager_ready) { 10905 10906 memory_object_t pager; 10907 10908 vm_object_paging_begin(object); 10909 pager = object->pager; 10910 vm_object_unlock(object); 10911 10912 kr = memory_object_data_request( 10913 pager, 10914 offset + object->paging_offset, 10915 0, /* just poke the pager */ 10916 VM_PROT_READ, 10917 NULL); 10918 10919 vm_object_lock(object); 10920 vm_object_paging_end(object); 10921 10922 if (kr == KERN_SUCCESS) { 10923 /* 10924 * the pager has this page 10925 */ 10926 *disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT; 10927 break; 10928 } 10929 } 10930 if (object->shadow != VM_OBJECT_NULL) { 10931 vm_object_t shadow; 10932 10933 offset += object->shadow_offset; 10934 shadow = object->shadow; 10935 10936 vm_object_lock(shadow); 10937 vm_object_unlock(object); 10938 10939 object = shadow; 10940 top_object = FALSE; 10941 } else { 10942 if (!object->internal) 10943 break; 10944 10945 retval = KERN_FAILURE; 10946 goto page_query_done; 10947 } 10948 } 10949 } 10950 /* The ref_count is not strictly accurate, it measures the number */ 10951 /* of entities holding a ref on the object, they may not be mapping */ 10952 /* the object or may not be mapping the section holding the */ 10953 /* target page but its still a ball park number and though an over- */ 10954 /* count, it picks up the copy-on-write cases */ 10955 10956 /* We could also get a picture of page sharing from pmap_attributes */ 10957 /* but this would under count as only faulted-in mappings would */ 10958 /* show up. */ 10959 10960 *ref_count = object->ref_count; 10961 10962 if (top_object == TRUE && object->shadow) 10963 *disposition |= VM_PAGE_QUERY_PAGE_COPIED; 10964 10965 if (m == VM_PAGE_NULL) 10966 goto page_query_done; 10967 10968 if (m->fictitious) { 10969 *disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS; 10970 goto page_query_done; 10971 } 10972 if (m->dirty || pmap_is_modified(m->phys_page)) 10973 *disposition |= VM_PAGE_QUERY_PAGE_DIRTY; 10974 10975 if (m->reference || pmap_is_referenced(m->phys_page)) 10976 *disposition |= VM_PAGE_QUERY_PAGE_REF; 10977 10978 if (m->speculative) 10979 *disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE; 10980 10981 if (m->cs_validated) 10982 *disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED; 10983 if (m->cs_tainted) 10984 *disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED; 10985 10986page_query_done: 10987 vm_object_unlock(object); 10988 10989 return retval; 10990} 10991 10992/* 10993 * vm_map_msync 10994 * 10995 * Synchronises the memory range specified with its backing store 10996 * image by either flushing or cleaning the contents to the appropriate 10997 * memory manager engaging in a memory object synchronize dialog with 10998 * the manager. The client doesn't return until the manager issues 10999 * m_o_s_completed message. MIG Magically converts user task parameter 11000 * to the task's address map. 11001 * 11002 * interpretation of sync_flags 11003 * VM_SYNC_INVALIDATE - discard pages, only return precious 11004 * pages to manager. 11005 * 11006 * VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS) 11007 * - discard pages, write dirty or precious 11008 * pages back to memory manager. 11009 * 11010 * VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS 11011 * - write dirty or precious pages back to 11012 * the memory manager. 11013 * 11014 * VM_SYNC_CONTIGUOUS - does everything normally, but if there 11015 * is a hole in the region, and we would 11016 * have returned KERN_SUCCESS, return 11017 * KERN_INVALID_ADDRESS instead. 11018 * 11019 * NOTE 11020 * The memory object attributes have not yet been implemented, this 11021 * function will have to deal with the invalidate attribute 11022 * 11023 * RETURNS 11024 * KERN_INVALID_TASK Bad task parameter 11025 * KERN_INVALID_ARGUMENT both sync and async were specified. 11026 * KERN_SUCCESS The usual. 11027 * KERN_INVALID_ADDRESS There was a hole in the region. 11028 */ 11029 11030kern_return_t 11031vm_map_msync( 11032 vm_map_t map, 11033 vm_map_address_t address, 11034 vm_map_size_t size, 11035 vm_sync_t sync_flags) 11036{ 11037 msync_req_t msr; 11038 msync_req_t new_msr; 11039 queue_chain_t req_q; /* queue of requests for this msync */ 11040 vm_map_entry_t entry; 11041 vm_map_size_t amount_left; 11042 vm_object_offset_t offset; 11043 boolean_t do_sync_req; 11044 boolean_t modifiable; 11045 boolean_t had_hole = FALSE; 11046 memory_object_t pager; 11047 11048 if ((sync_flags & VM_SYNC_ASYNCHRONOUS) && 11049 (sync_flags & VM_SYNC_SYNCHRONOUS)) 11050 return(KERN_INVALID_ARGUMENT); 11051 11052 /* 11053 * align address and size on page boundaries 11054 */ 11055 size = vm_map_round_page(address + size) - vm_map_trunc_page(address); 11056 address = vm_map_trunc_page(address); 11057 11058 if (map == VM_MAP_NULL) 11059 return(KERN_INVALID_TASK); 11060 11061 if (size == 0) 11062 return(KERN_SUCCESS); 11063 11064 queue_init(&req_q); 11065 amount_left = size; 11066 11067 while (amount_left > 0) { 11068 vm_object_size_t flush_size; 11069 vm_object_t object; 11070 11071 vm_map_lock(map); 11072 if (!vm_map_lookup_entry(map, 11073 vm_map_trunc_page(address), &entry)) { 11074 11075 vm_map_size_t skip; 11076 11077 /* 11078 * hole in the address map. 11079 */ 11080 had_hole = TRUE; 11081 11082 /* 11083 * Check for empty map. 11084 */ 11085 if (entry == vm_map_to_entry(map) && 11086 entry->vme_next == entry) { 11087 vm_map_unlock(map); 11088 break; 11089 } 11090 /* 11091 * Check that we don't wrap and that 11092 * we have at least one real map entry. 11093 */ 11094 if ((map->hdr.nentries == 0) || 11095 (entry->vme_next->vme_start < address)) { 11096 vm_map_unlock(map); 11097 break; 11098 } 11099 /* 11100 * Move up to the next entry if needed 11101 */ 11102 skip = (entry->vme_next->vme_start - address); 11103 if (skip >= amount_left) 11104 amount_left = 0; 11105 else 11106 amount_left -= skip; 11107 address = entry->vme_next->vme_start; 11108 vm_map_unlock(map); 11109 continue; 11110 } 11111 11112 offset = address - entry->vme_start; 11113 11114 /* 11115 * do we have more to flush than is contained in this 11116 * entry ? 11117 */ 11118 if (amount_left + entry->vme_start + offset > entry->vme_end) { 11119 flush_size = entry->vme_end - 11120 (entry->vme_start + offset); 11121 } else { 11122 flush_size = amount_left; 11123 } 11124 amount_left -= flush_size; 11125 address += flush_size; 11126 11127 if (entry->is_sub_map == TRUE) { 11128 vm_map_t local_map; 11129 vm_map_offset_t local_offset; 11130 11131 local_map = entry->object.sub_map; 11132 local_offset = entry->offset; 11133 vm_map_unlock(map); 11134 if (vm_map_msync( 11135 local_map, 11136 local_offset, 11137 flush_size, 11138 sync_flags) == KERN_INVALID_ADDRESS) { 11139 had_hole = TRUE; 11140 } 11141 continue; 11142 } 11143 object = entry->object.vm_object; 11144 11145 /* 11146 * We can't sync this object if the object has not been 11147 * created yet 11148 */ 11149 if (object == VM_OBJECT_NULL) { 11150 vm_map_unlock(map); 11151 continue; 11152 } 11153 offset += entry->offset; 11154 modifiable = (entry->protection & VM_PROT_WRITE) 11155 != VM_PROT_NONE; 11156 11157 vm_object_lock(object); 11158 11159 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) { 11160 boolean_t kill_pages = 0; 11161 11162 if (sync_flags & VM_SYNC_KILLPAGES) { 11163 if (object->ref_count == 1 && !entry->needs_copy && !object->shadow) 11164 kill_pages = 1; 11165 else 11166 kill_pages = -1; 11167 } 11168 if (kill_pages != -1) 11169 vm_object_deactivate_pages(object, offset, 11170 (vm_object_size_t)flush_size, kill_pages); 11171 vm_object_unlock(object); 11172 vm_map_unlock(map); 11173 continue; 11174 } 11175 /* 11176 * We can't sync this object if there isn't a pager. 11177 * Don't bother to sync internal objects, since there can't 11178 * be any "permanent" storage for these objects anyway. 11179 */ 11180 if ((object->pager == MEMORY_OBJECT_NULL) || 11181 (object->internal) || (object->private)) { 11182 vm_object_unlock(object); 11183 vm_map_unlock(map); 11184 continue; 11185 } 11186 /* 11187 * keep reference on the object until syncing is done 11188 */ 11189 vm_object_reference_locked(object); 11190 vm_object_unlock(object); 11191 11192 vm_map_unlock(map); 11193 11194 do_sync_req = vm_object_sync(object, 11195 offset, 11196 flush_size, 11197 sync_flags & VM_SYNC_INVALIDATE, 11198 (modifiable && 11199 (sync_flags & VM_SYNC_SYNCHRONOUS || 11200 sync_flags & VM_SYNC_ASYNCHRONOUS)), 11201 sync_flags & VM_SYNC_SYNCHRONOUS); 11202 /* 11203 * only send a m_o_s if we returned pages or if the entry 11204 * is writable (ie dirty pages may have already been sent back) 11205 */ 11206 if (!do_sync_req && !modifiable) { 11207 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) { 11208 /* 11209 * clear out the clustering and read-ahead hints 11210 */ 11211 vm_object_lock(object); 11212 11213 object->pages_created = 0; 11214 object->pages_used = 0; 11215 object->sequential = 0; 11216 object->last_alloc = 0; 11217 11218 vm_object_unlock(object); 11219 } 11220 vm_object_deallocate(object); 11221 continue; 11222 } 11223 msync_req_alloc(new_msr); 11224 11225 vm_object_lock(object); 11226 offset += object->paging_offset; 11227 11228 new_msr->offset = offset; 11229 new_msr->length = flush_size; 11230 new_msr->object = object; 11231 new_msr->flag = VM_MSYNC_SYNCHRONIZING; 11232 re_iterate: 11233 11234 /* 11235 * We can't sync this object if there isn't a pager. The 11236 * pager can disappear anytime we're not holding the object 11237 * lock. So this has to be checked anytime we goto re_iterate. 11238 */ 11239 11240 pager = object->pager; 11241 11242 if (pager == MEMORY_OBJECT_NULL) { 11243 vm_object_unlock(object); 11244 vm_object_deallocate(object); 11245 continue; 11246 } 11247 11248 queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) { 11249 /* 11250 * need to check for overlapping entry, if found, wait 11251 * on overlapping msr to be done, then reiterate 11252 */ 11253 msr_lock(msr); 11254 if (msr->flag == VM_MSYNC_SYNCHRONIZING && 11255 ((offset >= msr->offset && 11256 offset < (msr->offset + msr->length)) || 11257 (msr->offset >= offset && 11258 msr->offset < (offset + flush_size)))) 11259 { 11260 assert_wait((event_t) msr,THREAD_INTERRUPTIBLE); 11261 msr_unlock(msr); 11262 vm_object_unlock(object); 11263 thread_block(THREAD_CONTINUE_NULL); 11264 vm_object_lock(object); 11265 goto re_iterate; 11266 } 11267 msr_unlock(msr); 11268 }/* queue_iterate */ 11269 11270 queue_enter(&object->msr_q, new_msr, msync_req_t, msr_q); 11271 11272 vm_object_paging_begin(object); 11273 vm_object_unlock(object); 11274 11275 queue_enter(&req_q, new_msr, msync_req_t, req_q); 11276 11277 (void) memory_object_synchronize( 11278 pager, 11279 offset, 11280 flush_size, 11281 sync_flags & ~VM_SYNC_CONTIGUOUS); 11282 11283 vm_object_lock(object); 11284 vm_object_paging_end(object); 11285 vm_object_unlock(object); 11286 }/* while */ 11287 11288 /* 11289 * wait for memory_object_sychronize_completed messages from pager(s) 11290 */ 11291 11292 while (!queue_empty(&req_q)) { 11293 msr = (msync_req_t)queue_first(&req_q); 11294 msr_lock(msr); 11295 while(msr->flag != VM_MSYNC_DONE) { 11296 assert_wait((event_t) msr, THREAD_INTERRUPTIBLE); 11297 msr_unlock(msr); 11298 thread_block(THREAD_CONTINUE_NULL); 11299 msr_lock(msr); 11300 }/* while */ 11301 queue_remove(&req_q, msr, msync_req_t, req_q); 11302 msr_unlock(msr); 11303 vm_object_deallocate(msr->object); 11304 msync_req_free(msr); 11305 }/* queue_iterate */ 11306 11307 /* for proper msync() behaviour */ 11308 if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) 11309 return(KERN_INVALID_ADDRESS); 11310 11311 return(KERN_SUCCESS); 11312}/* vm_msync */ 11313 11314/* 11315 * Routine: convert_port_entry_to_map 11316 * Purpose: 11317 * Convert from a port specifying an entry or a task 11318 * to a map. Doesn't consume the port ref; produces a map ref, 11319 * which may be null. Unlike convert_port_to_map, the 11320 * port may be task or a named entry backed. 11321 * Conditions: 11322 * Nothing locked. 11323 */ 11324 11325 11326vm_map_t 11327convert_port_entry_to_map( 11328 ipc_port_t port) 11329{ 11330 vm_map_t map; 11331 vm_named_entry_t named_entry; 11332 uint32_t try_failed_count = 0; 11333 11334 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { 11335 while(TRUE) { 11336 ip_lock(port); 11337 if(ip_active(port) && (ip_kotype(port) 11338 == IKOT_NAMED_ENTRY)) { 11339 named_entry = 11340 (vm_named_entry_t)port->ip_kobject; 11341 if (!(mutex_try(&(named_entry)->Lock))) { 11342 ip_unlock(port); 11343 11344 try_failed_count++; 11345 mutex_pause(try_failed_count); 11346 continue; 11347 } 11348 named_entry->ref_count++; 11349 mutex_unlock(&(named_entry)->Lock); 11350 ip_unlock(port); 11351 if ((named_entry->is_sub_map) && 11352 (named_entry->protection 11353 & VM_PROT_WRITE)) { 11354 map = named_entry->backing.map; 11355 } else { 11356 mach_destroy_memory_entry(port); 11357 return VM_MAP_NULL; 11358 } 11359 vm_map_reference_swap(map); 11360 mach_destroy_memory_entry(port); 11361 break; 11362 } 11363 else 11364 return VM_MAP_NULL; 11365 } 11366 } 11367 else 11368 map = convert_port_to_map(port); 11369 11370 return map; 11371} 11372 11373/* 11374 * Routine: convert_port_entry_to_object 11375 * Purpose: 11376 * Convert from a port specifying a named entry to an 11377 * object. Doesn't consume the port ref; produces a map ref, 11378 * which may be null. 11379 * Conditions: 11380 * Nothing locked. 11381 */ 11382 11383 11384vm_object_t 11385convert_port_entry_to_object( 11386 ipc_port_t port) 11387{ 11388 vm_object_t object; 11389 vm_named_entry_t named_entry; 11390 uint32_t try_failed_count = 0; 11391 11392 if(IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) { 11393 while(TRUE) { 11394 ip_lock(port); 11395 if(ip_active(port) && (ip_kotype(port) 11396 == IKOT_NAMED_ENTRY)) { 11397 named_entry = 11398 (vm_named_entry_t)port->ip_kobject; 11399 if (!(mutex_try(&(named_entry)->Lock))) { 11400 ip_unlock(port); 11401 11402 try_failed_count++; 11403 mutex_pause(try_failed_count); 11404 continue; 11405 } 11406 named_entry->ref_count++; 11407 mutex_unlock(&(named_entry)->Lock); 11408 ip_unlock(port); 11409 if ((!named_entry->is_sub_map) && 11410 (!named_entry->is_pager) && 11411 (named_entry->protection 11412 & VM_PROT_WRITE)) { 11413 object = named_entry->backing.object; 11414 } else { 11415 mach_destroy_memory_entry(port); 11416 return (vm_object_t)NULL; 11417 } 11418 vm_object_reference(named_entry->backing.object); 11419 mach_destroy_memory_entry(port); 11420 break; 11421 } 11422 else 11423 return (vm_object_t)NULL; 11424 } 11425 } else { 11426 return (vm_object_t)NULL; 11427 } 11428 11429 return object; 11430} 11431 11432/* 11433 * Export routines to other components for the things we access locally through 11434 * macros. 11435 */ 11436#undef current_map 11437vm_map_t 11438current_map(void) 11439{ 11440 return (current_map_fast()); 11441} 11442 11443/* 11444 * vm_map_reference: 11445 * 11446 * Most code internal to the osfmk will go through a 11447 * macro defining this. This is always here for the 11448 * use of other kernel components. 11449 */ 11450#undef vm_map_reference 11451void 11452vm_map_reference( 11453 register vm_map_t map) 11454{ 11455 if (map == VM_MAP_NULL) 11456 return; 11457 11458 mutex_lock(&map->s_lock); 11459#if TASK_SWAPPER 11460 assert(map->res_count > 0); 11461 assert(map->ref_count >= map->res_count); 11462 map->res_count++; 11463#endif 11464 map->ref_count++; 11465 mutex_unlock(&map->s_lock); 11466} 11467 11468/* 11469 * vm_map_deallocate: 11470 * 11471 * Removes a reference from the specified map, 11472 * destroying it if no references remain. 11473 * The map should not be locked. 11474 */ 11475void 11476vm_map_deallocate( 11477 register vm_map_t map) 11478{ 11479 unsigned int ref; 11480 11481 if (map == VM_MAP_NULL) 11482 return; 11483 11484 mutex_lock(&map->s_lock); 11485 ref = --map->ref_count; 11486 if (ref > 0) { 11487 vm_map_res_deallocate(map); 11488 mutex_unlock(&map->s_lock); 11489 return; 11490 } 11491 assert(map->ref_count == 0); 11492 mutex_unlock(&map->s_lock); 11493 11494#if TASK_SWAPPER 11495 /* 11496 * The map residence count isn't decremented here because 11497 * the vm_map_delete below will traverse the entire map, 11498 * deleting entries, and the residence counts on objects 11499 * and sharing maps will go away then. 11500 */ 11501#endif 11502 11503 vm_map_destroy(map, VM_MAP_NO_FLAGS); 11504} 11505 11506 11507void 11508vm_map_disable_NX(vm_map_t map) 11509{ 11510 if (map == NULL) 11511 return; 11512 if (map->pmap == NULL) 11513 return; 11514 11515 pmap_disable_NX(map->pmap); 11516} 11517 11518/* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS) 11519 * more descriptive. 11520 */ 11521void 11522vm_map_set_32bit(vm_map_t map) 11523{ 11524 map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS; 11525} 11526 11527 11528void 11529vm_map_set_64bit(vm_map_t map) 11530{ 11531 map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS; 11532} 11533 11534vm_map_offset_t 11535vm_compute_max_offset(unsigned is64) 11536{ 11537 return (is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS); 11538} 11539 11540boolean_t 11541vm_map_is_64bit( 11542 vm_map_t map) 11543{ 11544 return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS); 11545} 11546 11547boolean_t 11548vm_map_has_4GB_pagezero( 11549 vm_map_t map) 11550{ 11551 /* 11552 * XXX FBDP 11553 * We should lock the VM map (for read) here but we can get away 11554 * with it for now because there can't really be any race condition: 11555 * the VM map's min_offset is changed only when the VM map is created 11556 * and when the zero page is established (when the binary gets loaded), 11557 * and this routine gets called only when the task terminates and the 11558 * VM map is being torn down, and when a new map is created via 11559 * load_machfile()/execve(). 11560 */ 11561 return (map->min_offset >= 0x100000000ULL); 11562} 11563 11564void 11565vm_map_set_4GB_pagezero(vm_map_t map) 11566{ 11567 pmap_set_4GB_pagezero(map->pmap); 11568} 11569 11570void 11571vm_map_clear_4GB_pagezero(vm_map_t map) 11572{ 11573 pmap_clear_4GB_pagezero(map->pmap); 11574} 11575 11576/* 11577 * Raise a VM map's minimum offset. 11578 * To strictly enforce "page zero" reservation. 11579 */ 11580kern_return_t 11581vm_map_raise_min_offset( 11582 vm_map_t map, 11583 vm_map_offset_t new_min_offset) 11584{ 11585 vm_map_entry_t first_entry; 11586 11587 new_min_offset = vm_map_round_page(new_min_offset); 11588 11589 vm_map_lock(map); 11590 11591 if (new_min_offset < map->min_offset) { 11592 /* 11593 * Can't move min_offset backwards, as that would expose 11594 * a part of the address space that was previously, and for 11595 * possibly good reasons, inaccessible. 11596 */ 11597 vm_map_unlock(map); 11598 return KERN_INVALID_ADDRESS; 11599 } 11600 11601 first_entry = vm_map_first_entry(map); 11602 if (first_entry != vm_map_to_entry(map) && 11603 first_entry->vme_start < new_min_offset) { 11604 /* 11605 * Some memory was already allocated below the new 11606 * minimun offset. It's too late to change it now... 11607 */ 11608 vm_map_unlock(map); 11609 return KERN_NO_SPACE; 11610 } 11611 11612 map->min_offset = new_min_offset; 11613 11614 vm_map_unlock(map); 11615 11616 return KERN_SUCCESS; 11617} 11618 11619/* 11620 * Set the limit on the maximum amount of user wired memory allowed for this map. 11621 * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of 11622 * the kernel. The limits are checked in the mach VM side, so we keep a copy so we 11623 * don't have to reach over to the BSD data structures. 11624 */ 11625 11626void 11627vm_map_set_user_wire_limit(vm_map_t map, 11628 vm_size_t limit) 11629{ 11630 map->user_wire_limit = limit; 11631} 11632 11633void vm_map_set_prot_copy_allow(vm_map_t map, 11634 boolean_t allow) 11635{ 11636 vm_map_lock(map); 11637 map->prot_copy_allow = allow; 11638 vm_map_unlock(map); 11639}; 11640