1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_kern.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * Date: 1985 62 * 63 * Kernel memory management. 64 */ 65 66#include <mach/kern_return.h> 67#include <mach/vm_param.h> 68#include <kern/assert.h> 69#include <kern/thread.h> 70#include <vm/vm_kern.h> 71#include <vm/vm_map.h> 72#include <vm/vm_object.h> 73#include <vm/vm_page.h> 74#include <vm/vm_pageout.h> 75#include <kern/misc_protos.h> 76#include <vm/cpm.h> 77 78#include <string.h> 79 80#include <libkern/OSDebug.h> 81#include <sys/kdebug.h> 82 83/* 84 * Variables exported by this module. 85 */ 86 87vm_map_t kernel_map; 88vm_map_t kernel_pageable_map; 89 90extern boolean_t vm_kernel_ready; 91 92/* 93 * Forward declarations for internal functions. 94 */ 95extern kern_return_t kmem_alloc_pages( 96 register vm_object_t object, 97 register vm_object_offset_t offset, 98 register vm_object_size_t size); 99 100extern void kmem_remap_pages( 101 register vm_object_t object, 102 register vm_object_offset_t offset, 103 register vm_offset_t start, 104 register vm_offset_t end, 105 vm_prot_t protection); 106 107kern_return_t 108kmem_alloc_contig( 109 vm_map_t map, 110 vm_offset_t *addrp, 111 vm_size_t size, 112 vm_offset_t mask, 113 ppnum_t max_pnum, 114 ppnum_t pnum_mask, 115 int flags) 116{ 117 vm_object_t object; 118 vm_object_offset_t offset; 119 vm_map_offset_t map_addr; 120 vm_map_offset_t map_mask; 121 vm_map_size_t map_size, i; 122 vm_map_entry_t entry; 123 vm_page_t m, pages; 124 kern_return_t kr; 125 126 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) 127 return KERN_INVALID_ARGUMENT; 128 129 map_size = vm_map_round_page(size, 130 VM_MAP_PAGE_MASK(map)); 131 map_mask = (vm_map_offset_t)mask; 132 133 /* Check for zero allocation size (either directly or via overflow) */ 134 if (map_size == 0) { 135 *addrp = 0; 136 return KERN_INVALID_ARGUMENT; 137 } 138 139 /* 140 * Allocate a new object (if necessary) and the reference we 141 * will be donating to the map entry. We must do this before 142 * locking the map, or risk deadlock with the default pager. 143 */ 144 if ((flags & KMA_KOBJECT) != 0) { 145 object = kernel_object; 146 vm_object_reference(object); 147 } else { 148 object = vm_object_allocate(map_size); 149 } 150 151 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry); 152 if (KERN_SUCCESS != kr) { 153 vm_object_deallocate(object); 154 return kr; 155 } 156 157 entry->object.vm_object = object; 158 entry->offset = offset = (object == kernel_object) ? 159 map_addr : 0; 160 161 /* Take an extra object ref in case the map entry gets deleted */ 162 vm_object_reference(object); 163 vm_map_unlock(map); 164 165 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags); 166 167 if (kr != KERN_SUCCESS) { 168 vm_map_remove(map, 169 vm_map_trunc_page(map_addr, 170 VM_MAP_PAGE_MASK(map)), 171 vm_map_round_page(map_addr + map_size, 172 VM_MAP_PAGE_MASK(map)), 173 0); 174 vm_object_deallocate(object); 175 *addrp = 0; 176 return kr; 177 } 178 179 vm_object_lock(object); 180 for (i = 0; i < map_size; i += PAGE_SIZE) { 181 m = pages; 182 pages = NEXT_PAGE(m); 183 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; 184 m->busy = FALSE; 185 vm_page_insert(m, object, offset + i); 186 } 187 vm_object_unlock(object); 188 189 kr = vm_map_wire(map, 190 vm_map_trunc_page(map_addr, 191 VM_MAP_PAGE_MASK(map)), 192 vm_map_round_page(map_addr + map_size, 193 VM_MAP_PAGE_MASK(map)), 194 VM_PROT_DEFAULT, 195 FALSE); 196 if (kr != KERN_SUCCESS) { 197 if (object == kernel_object) { 198 vm_object_lock(object); 199 vm_object_page_remove(object, offset, offset + map_size); 200 vm_object_unlock(object); 201 } 202 vm_map_remove(map, 203 vm_map_trunc_page(map_addr, 204 VM_MAP_PAGE_MASK(map)), 205 vm_map_round_page(map_addr + map_size, 206 VM_MAP_PAGE_MASK(map)), 207 0); 208 vm_object_deallocate(object); 209 return kr; 210 } 211 vm_object_deallocate(object); 212 213 if (object == kernel_object) 214 vm_map_simplify(map, map_addr); 215 216 *addrp = (vm_offset_t) map_addr; 217 assert((vm_map_offset_t) *addrp == map_addr); 218 return KERN_SUCCESS; 219} 220 221/* 222 * Master entry point for allocating kernel memory. 223 * NOTE: this routine is _never_ interrupt safe. 224 * 225 * map : map to allocate into 226 * addrp : pointer to start address of new memory 227 * size : size of memory requested 228 * flags : options 229 * KMA_HERE *addrp is base address, else "anywhere" 230 * KMA_NOPAGEWAIT don't wait for pages if unavailable 231 * KMA_KOBJECT use kernel_object 232 * KMA_LOMEM support for 32 bit devices in a 64 bit world 233 * if set and a lomemory pool is available 234 * grab pages from it... this also implies 235 * KMA_NOPAGEWAIT 236 */ 237 238kern_return_t 239kernel_memory_allocate( 240 register vm_map_t map, 241 register vm_offset_t *addrp, 242 register vm_size_t size, 243 register vm_offset_t mask, 244 int flags) 245{ 246 vm_object_t object; 247 vm_object_offset_t offset; 248 vm_object_offset_t pg_offset; 249 vm_map_entry_t entry = NULL; 250 vm_map_offset_t map_addr, fill_start; 251 vm_map_offset_t map_mask; 252 vm_map_size_t map_size, fill_size; 253 kern_return_t kr, pe_result; 254 vm_page_t mem; 255 vm_page_t guard_page_list = NULL; 256 vm_page_t wired_page_list = NULL; 257 int guard_page_count = 0; 258 int wired_page_count = 0; 259 int i; 260 int vm_alloc_flags; 261 vm_prot_t kma_prot; 262 263 if (! vm_kernel_ready) { 264 panic("kernel_memory_allocate: VM is not ready"); 265 } 266 267 map_size = vm_map_round_page(size, 268 VM_MAP_PAGE_MASK(map)); 269 map_mask = (vm_map_offset_t) mask; 270 vm_alloc_flags = 0; 271 272 /* Check for zero allocation size (either directly or via overflow) */ 273 if (map_size == 0) { 274 *addrp = 0; 275 return KERN_INVALID_ARGUMENT; 276 } 277 278 /* 279 * limit the size of a single extent of wired memory 280 * to try and limit the damage to the system if 281 * too many pages get wired down 282 * limit raised to 2GB with 128GB max physical limit 283 */ 284 if (map_size > (1ULL << 31)) { 285 return KERN_RESOURCE_SHORTAGE; 286 } 287 288 /* 289 * Guard pages: 290 * 291 * Guard pages are implemented as ficticious pages. By placing guard pages 292 * on either end of a stack, they can help detect cases where a thread walks 293 * off either end of its stack. They are allocated and set up here and attempts 294 * to access those pages are trapped in vm_fault_page(). 295 * 296 * The map_size we were passed may include extra space for 297 * guard pages. If those were requested, then back it out of fill_size 298 * since vm_map_find_space() takes just the actual size not including 299 * guard pages. Similarly, fill_start indicates where the actual pages 300 * will begin in the range. 301 */ 302 303 fill_start = 0; 304 fill_size = map_size; 305 306 if (flags & KMA_GUARD_FIRST) { 307 vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE; 308 fill_start += PAGE_SIZE_64; 309 fill_size -= PAGE_SIZE_64; 310 if (map_size < fill_start + fill_size) { 311 /* no space for a guard page */ 312 *addrp = 0; 313 return KERN_INVALID_ARGUMENT; 314 } 315 guard_page_count++; 316 } 317 if (flags & KMA_GUARD_LAST) { 318 vm_alloc_flags |= VM_FLAGS_GUARD_AFTER; 319 fill_size -= PAGE_SIZE_64; 320 if (map_size <= fill_start + fill_size) { 321 /* no space for a guard page */ 322 *addrp = 0; 323 return KERN_INVALID_ARGUMENT; 324 } 325 guard_page_count++; 326 } 327 wired_page_count = (int) (fill_size / PAGE_SIZE_64); 328 assert(wired_page_count * PAGE_SIZE_64 == fill_size); 329 330 for (i = 0; i < guard_page_count; i++) { 331 for (;;) { 332 mem = vm_page_grab_guard(); 333 334 if (mem != VM_PAGE_NULL) 335 break; 336 if (flags & KMA_NOPAGEWAIT) { 337 kr = KERN_RESOURCE_SHORTAGE; 338 goto out; 339 } 340 vm_page_more_fictitious(); 341 } 342 mem->pageq.next = (queue_entry_t)guard_page_list; 343 guard_page_list = mem; 344 } 345 346 if (! (flags & KMA_VAONLY)) { 347 for (i = 0; i < wired_page_count; i++) { 348 uint64_t unavailable; 349 350 for (;;) { 351 if (flags & KMA_LOMEM) 352 mem = vm_page_grablo(); 353 else 354 mem = vm_page_grab(); 355 356 if (mem != VM_PAGE_NULL) 357 break; 358 359 if (flags & KMA_NOPAGEWAIT) { 360 kr = KERN_RESOURCE_SHORTAGE; 361 goto out; 362 } 363 if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) { 364 kr = KERN_RESOURCE_SHORTAGE; 365 goto out; 366 } 367 unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE; 368 369 if (unavailable > max_mem || map_size > (max_mem - unavailable)) { 370 kr = KERN_RESOURCE_SHORTAGE; 371 goto out; 372 } 373 VM_PAGE_WAIT(); 374 } 375 mem->pageq.next = (queue_entry_t)wired_page_list; 376 wired_page_list = mem; 377 } 378 } 379 380 /* 381 * Allocate a new object (if necessary). We must do this before 382 * locking the map, or risk deadlock with the default pager. 383 */ 384 if ((flags & KMA_KOBJECT) != 0) { 385 object = kernel_object; 386 vm_object_reference(object); 387 } else if ((flags & KMA_COMPRESSOR) != 0) { 388 object = compressor_object; 389 vm_object_reference(object); 390 } else { 391 object = vm_object_allocate(map_size); 392 } 393 394 kr = vm_map_find_space(map, &map_addr, 395 fill_size, map_mask, 396 vm_alloc_flags, &entry); 397 if (KERN_SUCCESS != kr) { 398 vm_object_deallocate(object); 399 goto out; 400 } 401 402 entry->object.vm_object = object; 403 entry->offset = offset = (object == kernel_object || object == compressor_object) ? 404 map_addr : 0; 405 406 if (object != compressor_object) 407 entry->wired_count++; 408 409 if (flags & KMA_PERMANENT) 410 entry->permanent = TRUE; 411 412 if (object != kernel_object && object != compressor_object) 413 vm_object_reference(object); 414 415 vm_object_lock(object); 416 vm_map_unlock(map); 417 418 pg_offset = 0; 419 420 if (fill_start) { 421 if (guard_page_list == NULL) 422 panic("kernel_memory_allocate: guard_page_list == NULL"); 423 424 mem = guard_page_list; 425 guard_page_list = (vm_page_t)mem->pageq.next; 426 mem->pageq.next = NULL; 427 428 vm_page_insert(mem, object, offset + pg_offset); 429 430 mem->busy = FALSE; 431 pg_offset += PAGE_SIZE_64; 432 } 433 434 kma_prot = VM_PROT_READ | VM_PROT_WRITE; 435 436 if (flags & KMA_VAONLY) { 437 pg_offset = fill_start + fill_size; 438 } else { 439 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) { 440 if (wired_page_list == NULL) 441 panic("kernel_memory_allocate: wired_page_list == NULL"); 442 443 mem = wired_page_list; 444 wired_page_list = (vm_page_t)mem->pageq.next; 445 mem->pageq.next = NULL; 446 mem->wire_count++; 447 448 vm_page_insert(mem, object, offset + pg_offset); 449 450 mem->busy = FALSE; 451 mem->pmapped = TRUE; 452 mem->wpmapped = TRUE; 453 454 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset, mem, 455 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE, 456 PMAP_OPTIONS_NOWAIT, pe_result); 457 458 if (pe_result == KERN_RESOURCE_SHORTAGE) { 459 vm_object_unlock(object); 460 461 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, 462 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE); 463 464 vm_object_lock(object); 465 } 466 if (flags & KMA_NOENCRYPT) { 467 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE); 468 469 pmap_set_noencrypt(mem->phys_page); 470 } 471 } 472 } 473 if ((fill_start + fill_size) < map_size) { 474 if (guard_page_list == NULL) 475 panic("kernel_memory_allocate: guard_page_list == NULL"); 476 477 mem = guard_page_list; 478 guard_page_list = (vm_page_t)mem->pageq.next; 479 mem->pageq.next = NULL; 480 481 vm_page_insert(mem, object, offset + pg_offset); 482 483 mem->busy = FALSE; 484 } 485 if (guard_page_list || wired_page_list) 486 panic("kernel_memory_allocate: non empty list\n"); 487 488 if (! (flags & KMA_VAONLY)) { 489 vm_page_lockspin_queues(); 490 vm_page_wire_count += wired_page_count; 491 vm_page_unlock_queues(); 492 } 493 494 vm_object_unlock(object); 495 496 /* 497 * now that the pages are wired, we no longer have to fear coalesce 498 */ 499 if (object == kernel_object || object == compressor_object) 500 vm_map_simplify(map, map_addr); 501 else 502 vm_object_deallocate(object); 503 504 /* 505 * Return the memory, not zeroed. 506 */ 507 *addrp = CAST_DOWN(vm_offset_t, map_addr); 508 return KERN_SUCCESS; 509 510out: 511 if (guard_page_list) 512 vm_page_free_list(guard_page_list, FALSE); 513 514 if (wired_page_list) 515 vm_page_free_list(wired_page_list, FALSE); 516 517 return kr; 518} 519 520kern_return_t 521kernel_memory_populate( 522 vm_map_t map, 523 vm_offset_t addr, 524 vm_size_t size, 525 int flags) 526{ 527 vm_object_t object; 528 vm_object_offset_t offset, pg_offset; 529 kern_return_t kr, pe_result; 530 vm_page_t mem; 531 vm_page_t page_list = NULL; 532 int page_count = 0; 533 int i; 534 535 page_count = (int) (size / PAGE_SIZE_64); 536 537 assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT)); 538 539 if (flags & KMA_COMPRESSOR) { 540 541 for (i = 0; i < page_count; i++) { 542 for (;;) { 543 mem = vm_page_grab(); 544 545 if (mem != VM_PAGE_NULL) 546 break; 547 548 VM_PAGE_WAIT(); 549 } 550 mem->pageq.next = (queue_entry_t) page_list; 551 page_list = mem; 552 } 553 offset = addr; 554 object = compressor_object; 555 556 vm_object_lock(object); 557 558 for (pg_offset = 0; 559 pg_offset < size; 560 pg_offset += PAGE_SIZE_64) { 561 562 mem = page_list; 563 page_list = (vm_page_t) mem->pageq.next; 564 mem->pageq.next = NULL; 565 566 vm_page_insert(mem, object, offset + pg_offset); 567 assert(mem->busy); 568 569 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem, 570 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 571 0, TRUE, PMAP_OPTIONS_NOWAIT, pe_result); 572 573 if (pe_result == KERN_RESOURCE_SHORTAGE) { 574 575 vm_object_unlock(object); 576 577 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem, 578 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE); 579 580 vm_object_lock(object); 581 } 582 mem->busy = FALSE; 583 mem->pmapped = TRUE; 584 mem->wpmapped = TRUE; 585 mem->compressor = TRUE; 586 } 587 vm_object_unlock(object); 588 589 return KERN_SUCCESS; 590 } 591 592 for (i = 0; i < page_count; i++) { 593 for (;;) { 594 if (flags & KMA_LOMEM) 595 mem = vm_page_grablo(); 596 else 597 mem = vm_page_grab(); 598 599 if (mem != VM_PAGE_NULL) 600 break; 601 602 if (flags & KMA_NOPAGEWAIT) { 603 kr = KERN_RESOURCE_SHORTAGE; 604 goto out; 605 } 606 if ((flags & KMA_LOMEM) && 607 (vm_lopage_needed == TRUE)) { 608 kr = KERN_RESOURCE_SHORTAGE; 609 goto out; 610 } 611 VM_PAGE_WAIT(); 612 } 613 mem->pageq.next = (queue_entry_t) page_list; 614 page_list = mem; 615 } 616 if (flags & KMA_KOBJECT) { 617 offset = addr; 618 object = kernel_object; 619 620 vm_object_lock(object); 621 } else { 622 /* 623 * If it's not the kernel object, we need to: 624 * lock map; 625 * lookup entry; 626 * lock object; 627 * take reference on object; 628 * unlock map; 629 */ 630 panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): " 631 "!KMA_KOBJECT", 632 map, (uint64_t) addr, (uint64_t) size, flags); 633 } 634 635 for (pg_offset = 0; 636 pg_offset < size; 637 pg_offset += PAGE_SIZE_64) { 638 639 if (page_list == NULL) 640 panic("kernel_memory_populate: page_list == NULL"); 641 642 mem = page_list; 643 page_list = (vm_page_t) mem->pageq.next; 644 mem->pageq.next = NULL; 645 646 mem->wire_count++; 647 648 vm_page_insert(mem, object, offset + pg_offset); 649 650 mem->busy = FALSE; 651 mem->pmapped = TRUE; 652 mem->wpmapped = TRUE; 653 654 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem, 655 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 656 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE, 657 PMAP_OPTIONS_NOWAIT, pe_result); 658 659 if (pe_result == KERN_RESOURCE_SHORTAGE) { 660 661 vm_object_unlock(object); 662 663 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem, 664 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 665 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE); 666 667 vm_object_lock(object); 668 } 669 if (flags & KMA_NOENCRYPT) { 670 bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE); 671 pmap_set_noencrypt(mem->phys_page); 672 } 673 } 674 vm_page_lock_queues(); 675 vm_page_wire_count += page_count; 676 vm_page_unlock_queues(); 677 678 vm_object_unlock(object); 679 680 return KERN_SUCCESS; 681 682out: 683 if (page_list) 684 vm_page_free_list(page_list, FALSE); 685 686 return kr; 687} 688 689 690void 691kernel_memory_depopulate( 692 vm_map_t map, 693 vm_offset_t addr, 694 vm_size_t size, 695 int flags) 696{ 697 vm_object_t object; 698 vm_object_offset_t offset, pg_offset; 699 vm_page_t mem; 700 vm_page_t local_freeq = NULL; 701 702 assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT)); 703 704 if (flags & KMA_COMPRESSOR) { 705 offset = addr; 706 object = compressor_object; 707 708 vm_object_lock(object); 709 } else if (flags & KMA_KOBJECT) { 710 offset = addr; 711 object = kernel_object; 712 713 vm_object_lock(object); 714 } else { 715 offset = 0; 716 object = NULL; 717 /* 718 * If it's not the kernel object, we need to: 719 * lock map; 720 * lookup entry; 721 * lock object; 722 * unlock map; 723 */ 724 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): " 725 "!KMA_KOBJECT", 726 map, (uint64_t) addr, (uint64_t) size, flags); 727 } 728 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE); 729 730 for (pg_offset = 0; 731 pg_offset < size; 732 pg_offset += PAGE_SIZE_64) { 733 734 mem = vm_page_lookup(object, offset + pg_offset); 735 736 assert(mem); 737 738 pmap_disconnect(mem->phys_page); 739 740 mem->busy = TRUE; 741 742 assert(mem->tabled); 743 vm_page_remove(mem, TRUE); 744 assert(mem->busy); 745 746 assert(mem->pageq.next == NULL && 747 mem->pageq.prev == NULL); 748 mem->pageq.next = (queue_entry_t)local_freeq; 749 local_freeq = mem; 750 } 751 vm_object_unlock(object); 752 753 if (local_freeq) 754 vm_page_free_list(local_freeq, TRUE); 755} 756 757/* 758 * kmem_alloc: 759 * 760 * Allocate wired-down memory in the kernel's address map 761 * or a submap. The memory is not zero-filled. 762 */ 763 764kern_return_t 765kmem_alloc( 766 vm_map_t map, 767 vm_offset_t *addrp, 768 vm_size_t size) 769{ 770 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0); 771 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp); 772 return kr; 773} 774 775/* 776 * kmem_realloc: 777 * 778 * Reallocate wired-down memory in the kernel's address map 779 * or a submap. Newly allocated pages are not zeroed. 780 * This can only be used on regions allocated with kmem_alloc. 781 * 782 * If successful, the pages in the old region are mapped twice. 783 * The old region is unchanged. Use kmem_free to get rid of it. 784 */ 785kern_return_t 786kmem_realloc( 787 vm_map_t map, 788 vm_offset_t oldaddr, 789 vm_size_t oldsize, 790 vm_offset_t *newaddrp, 791 vm_size_t newsize) 792{ 793 vm_object_t object; 794 vm_object_offset_t offset; 795 vm_map_offset_t oldmapmin; 796 vm_map_offset_t oldmapmax; 797 vm_map_offset_t newmapaddr; 798 vm_map_size_t oldmapsize; 799 vm_map_size_t newmapsize; 800 vm_map_entry_t oldentry; 801 vm_map_entry_t newentry; 802 vm_page_t mem; 803 kern_return_t kr; 804 805 oldmapmin = vm_map_trunc_page(oldaddr, 806 VM_MAP_PAGE_MASK(map)); 807 oldmapmax = vm_map_round_page(oldaddr + oldsize, 808 VM_MAP_PAGE_MASK(map)); 809 oldmapsize = oldmapmax - oldmapmin; 810 newmapsize = vm_map_round_page(newsize, 811 VM_MAP_PAGE_MASK(map)); 812 813 814 /* 815 * Find the VM object backing the old region. 816 */ 817 818 vm_map_lock(map); 819 820 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) 821 panic("kmem_realloc"); 822 object = oldentry->object.vm_object; 823 824 /* 825 * Increase the size of the object and 826 * fill in the new region. 827 */ 828 829 vm_object_reference(object); 830 /* by grabbing the object lock before unlocking the map */ 831 /* we guarantee that we will panic if more than one */ 832 /* attempt is made to realloc a kmem_alloc'd area */ 833 vm_object_lock(object); 834 vm_map_unlock(map); 835 if (object->vo_size != oldmapsize) 836 panic("kmem_realloc"); 837 object->vo_size = newmapsize; 838 vm_object_unlock(object); 839 840 /* allocate the new pages while expanded portion of the */ 841 /* object is still not mapped */ 842 kmem_alloc_pages(object, vm_object_round_page(oldmapsize), 843 vm_object_round_page(newmapsize-oldmapsize)); 844 845 /* 846 * Find space for the new region. 847 */ 848 849 kr = vm_map_find_space(map, &newmapaddr, newmapsize, 850 (vm_map_offset_t) 0, 0, &newentry); 851 if (kr != KERN_SUCCESS) { 852 vm_object_lock(object); 853 for(offset = oldmapsize; 854 offset < newmapsize; offset += PAGE_SIZE) { 855 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { 856 VM_PAGE_FREE(mem); 857 } 858 } 859 object->vo_size = oldmapsize; 860 vm_object_unlock(object); 861 vm_object_deallocate(object); 862 return kr; 863 } 864 newentry->object.vm_object = object; 865 newentry->offset = 0; 866 assert (newentry->wired_count == 0); 867 868 869 /* add an extra reference in case we have someone doing an */ 870 /* unexpected deallocate */ 871 vm_object_reference(object); 872 vm_map_unlock(map); 873 874 kr = vm_map_wire(map, newmapaddr, newmapaddr + newmapsize, VM_PROT_DEFAULT, FALSE); 875 if (KERN_SUCCESS != kr) { 876 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, 0); 877 vm_object_lock(object); 878 for(offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) { 879 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { 880 VM_PAGE_FREE(mem); 881 } 882 } 883 object->vo_size = oldmapsize; 884 vm_object_unlock(object); 885 vm_object_deallocate(object); 886 return (kr); 887 } 888 vm_object_deallocate(object); 889 890 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr); 891 return KERN_SUCCESS; 892} 893 894/* 895 * kmem_alloc_kobject: 896 * 897 * Allocate wired-down memory in the kernel's address map 898 * or a submap. The memory is not zero-filled. 899 * 900 * The memory is allocated in the kernel_object. 901 * It may not be copied with vm_map_copy, and 902 * it may not be reallocated with kmem_realloc. 903 */ 904 905kern_return_t 906kmem_alloc_kobject( 907 vm_map_t map, 908 vm_offset_t *addrp, 909 vm_size_t size) 910{ 911 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT); 912} 913 914/* 915 * kmem_alloc_aligned: 916 * 917 * Like kmem_alloc_kobject, except that the memory is aligned. 918 * The size should be a power-of-2. 919 */ 920 921kern_return_t 922kmem_alloc_aligned( 923 vm_map_t map, 924 vm_offset_t *addrp, 925 vm_size_t size) 926{ 927 if ((size & (size - 1)) != 0) 928 panic("kmem_alloc_aligned: size not aligned"); 929 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT); 930} 931 932/* 933 * kmem_alloc_pageable: 934 * 935 * Allocate pageable memory in the kernel's address map. 936 */ 937 938kern_return_t 939kmem_alloc_pageable( 940 vm_map_t map, 941 vm_offset_t *addrp, 942 vm_size_t size) 943{ 944 vm_map_offset_t map_addr; 945 vm_map_size_t map_size; 946 kern_return_t kr; 947 948#ifndef normal 949 map_addr = (vm_map_min(map)) + PAGE_SIZE; 950#else 951 map_addr = vm_map_min(map); 952#endif 953 map_size = vm_map_round_page(size, 954 VM_MAP_PAGE_MASK(map)); 955 956 kr = vm_map_enter(map, &map_addr, map_size, 957 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, 958 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE, 959 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 960 961 if (kr != KERN_SUCCESS) 962 return kr; 963 964 *addrp = CAST_DOWN(vm_offset_t, map_addr); 965 return KERN_SUCCESS; 966} 967 968/* 969 * kmem_free: 970 * 971 * Release a region of kernel virtual memory allocated 972 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable, 973 * and return the physical pages associated with that region. 974 */ 975 976void 977kmem_free( 978 vm_map_t map, 979 vm_offset_t addr, 980 vm_size_t size) 981{ 982 kern_return_t kr; 983 984 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS); 985 986 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr); 987 988 if(size == 0) { 989#if MACH_ASSERT 990 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map,(uint64_t)addr); 991#endif 992 return; 993 } 994 995 kr = vm_map_remove(map, 996 vm_map_trunc_page(addr, 997 VM_MAP_PAGE_MASK(map)), 998 vm_map_round_page(addr + size, 999 VM_MAP_PAGE_MASK(map)), 1000 VM_MAP_REMOVE_KUNWIRE); 1001 if (kr != KERN_SUCCESS) 1002 panic("kmem_free"); 1003} 1004 1005/* 1006 * Allocate new pages in an object. 1007 */ 1008 1009kern_return_t 1010kmem_alloc_pages( 1011 register vm_object_t object, 1012 register vm_object_offset_t offset, 1013 register vm_object_size_t size) 1014{ 1015 vm_object_size_t alloc_size; 1016 1017 alloc_size = vm_object_round_page(size); 1018 vm_object_lock(object); 1019 while (alloc_size) { 1020 register vm_page_t mem; 1021 1022 1023 /* 1024 * Allocate a page 1025 */ 1026 while (VM_PAGE_NULL == 1027 (mem = vm_page_alloc(object, offset))) { 1028 vm_object_unlock(object); 1029 VM_PAGE_WAIT(); 1030 vm_object_lock(object); 1031 } 1032 mem->busy = FALSE; 1033 1034 alloc_size -= PAGE_SIZE; 1035 offset += PAGE_SIZE; 1036 } 1037 vm_object_unlock(object); 1038 return KERN_SUCCESS; 1039} 1040 1041/* 1042 * Remap wired pages in an object into a new region. 1043 * The object is assumed to be mapped into the kernel map or 1044 * a submap. 1045 */ 1046void 1047kmem_remap_pages( 1048 register vm_object_t object, 1049 register vm_object_offset_t offset, 1050 register vm_offset_t start, 1051 register vm_offset_t end, 1052 vm_prot_t protection) 1053{ 1054 1055 vm_map_offset_t map_start; 1056 vm_map_offset_t map_end; 1057 1058 /* 1059 * Mark the pmap region as not pageable. 1060 */ 1061 map_start = vm_map_trunc_page(start, 1062 VM_MAP_PAGE_MASK(kernel_map)); 1063 map_end = vm_map_round_page(end, 1064 VM_MAP_PAGE_MASK(kernel_map)); 1065 1066 pmap_pageable(kernel_pmap, map_start, map_end, FALSE); 1067 1068 while (map_start < map_end) { 1069 register vm_page_t mem; 1070 1071 vm_object_lock(object); 1072 1073 /* 1074 * Find a page 1075 */ 1076 if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL) 1077 panic("kmem_remap_pages"); 1078 1079 /* 1080 * Wire it down (again) 1081 */ 1082 vm_page_lockspin_queues(); 1083 vm_page_wire(mem); 1084 vm_page_unlock_queues(); 1085 vm_object_unlock(object); 1086 1087 /* 1088 * ENCRYPTED SWAP: 1089 * The page is supposed to be wired now, so it 1090 * shouldn't be encrypted at this point. It can 1091 * safely be entered in the page table. 1092 */ 1093 ASSERT_PAGE_DECRYPTED(mem); 1094 1095 /* 1096 * Enter it in the kernel pmap. The page isn't busy, 1097 * but this shouldn't be a problem because it is wired. 1098 */ 1099 1100 mem->pmapped = TRUE; 1101 mem->wpmapped = TRUE; 1102 1103 PMAP_ENTER(kernel_pmap, map_start, mem, protection, VM_PROT_NONE, 0, TRUE); 1104 1105 map_start += PAGE_SIZE; 1106 offset += PAGE_SIZE; 1107 } 1108} 1109 1110/* 1111 * kmem_suballoc: 1112 * 1113 * Allocates a map to manage a subrange 1114 * of the kernel virtual address space. 1115 * 1116 * Arguments are as follows: 1117 * 1118 * parent Map to take range from 1119 * addr Address of start of range (IN/OUT) 1120 * size Size of range to find 1121 * pageable Can region be paged 1122 * anywhere Can region be located anywhere in map 1123 * new_map Pointer to new submap 1124 */ 1125kern_return_t 1126kmem_suballoc( 1127 vm_map_t parent, 1128 vm_offset_t *addr, 1129 vm_size_t size, 1130 boolean_t pageable, 1131 int flags, 1132 vm_map_t *new_map) 1133{ 1134 vm_map_t map; 1135 vm_map_offset_t map_addr; 1136 vm_map_size_t map_size; 1137 kern_return_t kr; 1138 1139 map_size = vm_map_round_page(size, 1140 VM_MAP_PAGE_MASK(parent)); 1141 1142 /* 1143 * Need reference on submap object because it is internal 1144 * to the vm_system. vm_object_enter will never be called 1145 * on it (usual source of reference for vm_map_enter). 1146 */ 1147 vm_object_reference(vm_submap_object); 1148 1149 map_addr = ((flags & VM_FLAGS_ANYWHERE) 1150 ? vm_map_min(parent) 1151 : vm_map_trunc_page(*addr, 1152 VM_MAP_PAGE_MASK(parent))); 1153 1154 kr = vm_map_enter(parent, &map_addr, map_size, 1155 (vm_map_offset_t) 0, flags, 1156 vm_submap_object, (vm_object_offset_t) 0, FALSE, 1157 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 1158 if (kr != KERN_SUCCESS) { 1159 vm_object_deallocate(vm_submap_object); 1160 return (kr); 1161 } 1162 1163 pmap_reference(vm_map_pmap(parent)); 1164 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable); 1165 if (map == VM_MAP_NULL) 1166 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */ 1167 /* inherit the parent map's page size */ 1168 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent)); 1169 1170 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE); 1171 if (kr != KERN_SUCCESS) { 1172 /* 1173 * See comment preceding vm_map_submap(). 1174 */ 1175 vm_map_remove(parent, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS); 1176 vm_map_deallocate(map); /* also removes ref to pmap */ 1177 vm_object_deallocate(vm_submap_object); 1178 return (kr); 1179 } 1180 *addr = CAST_DOWN(vm_offset_t, map_addr); 1181 *new_map = map; 1182 return (KERN_SUCCESS); 1183} 1184 1185/* 1186 * kmem_init: 1187 * 1188 * Initialize the kernel's virtual memory map, taking 1189 * into account all memory allocated up to this time. 1190 */ 1191void 1192kmem_init( 1193 vm_offset_t start, 1194 vm_offset_t end) 1195{ 1196 vm_map_offset_t map_start; 1197 vm_map_offset_t map_end; 1198 1199 map_start = vm_map_trunc_page(start, 1200 VM_MAP_PAGE_MASK(kernel_map)); 1201 map_end = vm_map_round_page(end, 1202 VM_MAP_PAGE_MASK(kernel_map)); 1203 1204 kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_AND_KEXT_ADDRESS, 1205 map_end, FALSE); 1206 /* 1207 * Reserve virtual memory allocated up to this time. 1208 */ 1209 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) { 1210 vm_map_offset_t map_addr; 1211 kern_return_t kr; 1212 1213 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS; 1214 kr = vm_map_enter(kernel_map, 1215 &map_addr, 1216 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS), 1217 (vm_map_offset_t) 0, 1218 VM_FLAGS_FIXED | VM_FLAGS_NO_PMAP_CHECK, 1219 VM_OBJECT_NULL, 1220 (vm_object_offset_t) 0, FALSE, 1221 VM_PROT_NONE, VM_PROT_NONE, 1222 VM_INHERIT_DEFAULT); 1223 1224 if (kr != KERN_SUCCESS) { 1225 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n", 1226 (uint64_t) start, (uint64_t) end, 1227 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS, 1228 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS), 1229 kr); 1230 } 1231 } 1232 1233 /* 1234 * Set the default global user wire limit which limits the amount of 1235 * memory that can be locked via mlock(). We set this to the total 1236 * amount of memory that are potentially usable by a user app (max_mem) 1237 * minus a certain amount. This can be overridden via a sysctl. 1238 */ 1239 vm_global_no_user_wire_amount = MIN(max_mem*20/100, 1240 VM_NOT_USER_WIREABLE); 1241 vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount; 1242 1243 /* the default per user limit is the same as the global limit */ 1244 vm_user_wire_limit = vm_global_user_wire_limit; 1245} 1246 1247 1248/* 1249 * Routine: copyinmap 1250 * Purpose: 1251 * Like copyin, except that fromaddr is an address 1252 * in the specified VM map. This implementation 1253 * is incomplete; it handles the current user map 1254 * and the kernel map/submaps. 1255 */ 1256kern_return_t 1257copyinmap( 1258 vm_map_t map, 1259 vm_map_offset_t fromaddr, 1260 void *todata, 1261 vm_size_t length) 1262{ 1263 kern_return_t kr = KERN_SUCCESS; 1264 vm_map_t oldmap; 1265 1266 if (vm_map_pmap(map) == pmap_kernel()) 1267 { 1268 /* assume a correct copy */ 1269 memcpy(todata, CAST_DOWN(void *, fromaddr), length); 1270 } 1271 else if (current_map() == map) 1272 { 1273 if (copyin(fromaddr, todata, length) != 0) 1274 kr = KERN_INVALID_ADDRESS; 1275 } 1276 else 1277 { 1278 vm_map_reference(map); 1279 oldmap = vm_map_switch(map); 1280 if (copyin(fromaddr, todata, length) != 0) 1281 kr = KERN_INVALID_ADDRESS; 1282 vm_map_switch(oldmap); 1283 vm_map_deallocate(map); 1284 } 1285 return kr; 1286} 1287 1288/* 1289 * Routine: copyoutmap 1290 * Purpose: 1291 * Like copyout, except that toaddr is an address 1292 * in the specified VM map. This implementation 1293 * is incomplete; it handles the current user map 1294 * and the kernel map/submaps. 1295 */ 1296kern_return_t 1297copyoutmap( 1298 vm_map_t map, 1299 void *fromdata, 1300 vm_map_address_t toaddr, 1301 vm_size_t length) 1302{ 1303 if (vm_map_pmap(map) == pmap_kernel()) { 1304 /* assume a correct copy */ 1305 memcpy(CAST_DOWN(void *, toaddr), fromdata, length); 1306 return KERN_SUCCESS; 1307 } 1308 1309 if (current_map() != map) 1310 return KERN_NOT_SUPPORTED; 1311 1312 if (copyout(fromdata, toaddr, length) != 0) 1313 return KERN_INVALID_ADDRESS; 1314 1315 return KERN_SUCCESS; 1316} 1317 1318 1319kern_return_t 1320vm_conflict_check( 1321 vm_map_t map, 1322 vm_map_offset_t off, 1323 vm_map_size_t len, 1324 memory_object_t pager, 1325 vm_object_offset_t file_off) 1326{ 1327 vm_map_entry_t entry; 1328 vm_object_t obj; 1329 vm_object_offset_t obj_off; 1330 vm_map_t base_map; 1331 vm_map_offset_t base_offset; 1332 vm_map_offset_t original_offset; 1333 kern_return_t kr; 1334 vm_map_size_t local_len; 1335 1336 base_map = map; 1337 base_offset = off; 1338 original_offset = off; 1339 kr = KERN_SUCCESS; 1340 vm_map_lock(map); 1341 while(vm_map_lookup_entry(map, off, &entry)) { 1342 local_len = len; 1343 1344 if (entry->object.vm_object == VM_OBJECT_NULL) { 1345 vm_map_unlock(map); 1346 return KERN_SUCCESS; 1347 } 1348 if (entry->is_sub_map) { 1349 vm_map_t old_map; 1350 1351 old_map = map; 1352 vm_map_lock(entry->object.sub_map); 1353 map = entry->object.sub_map; 1354 off = entry->offset + (off - entry->vme_start); 1355 vm_map_unlock(old_map); 1356 continue; 1357 } 1358 obj = entry->object.vm_object; 1359 obj_off = (off - entry->vme_start) + entry->offset; 1360 while(obj->shadow) { 1361 obj_off += obj->vo_shadow_offset; 1362 obj = obj->shadow; 1363 } 1364 if((obj->pager_created) && (obj->pager == pager)) { 1365 if(((obj->paging_offset) + obj_off) == file_off) { 1366 if(off != base_offset) { 1367 vm_map_unlock(map); 1368 return KERN_FAILURE; 1369 } 1370 kr = KERN_ALREADY_WAITING; 1371 } else { 1372 vm_object_offset_t obj_off_aligned; 1373 vm_object_offset_t file_off_aligned; 1374 1375 obj_off_aligned = obj_off & ~PAGE_MASK; 1376 file_off_aligned = file_off & ~PAGE_MASK; 1377 1378 if (file_off_aligned == (obj->paging_offset + obj_off_aligned)) { 1379 /* 1380 * the target map and the file offset start in the same page 1381 * but are not identical... 1382 */ 1383 vm_map_unlock(map); 1384 return KERN_FAILURE; 1385 } 1386 if ((file_off < (obj->paging_offset + obj_off_aligned)) && 1387 ((file_off + len) > (obj->paging_offset + obj_off_aligned))) { 1388 /* 1389 * some portion of the tail of the I/O will fall 1390 * within the encompass of the target map 1391 */ 1392 vm_map_unlock(map); 1393 return KERN_FAILURE; 1394 } 1395 if ((file_off_aligned > (obj->paging_offset + obj_off)) && 1396 (file_off_aligned < (obj->paging_offset + obj_off) + len)) { 1397 /* 1398 * the beginning page of the file offset falls within 1399 * the target map's encompass 1400 */ 1401 vm_map_unlock(map); 1402 return KERN_FAILURE; 1403 } 1404 } 1405 } else if(kr != KERN_SUCCESS) { 1406 vm_map_unlock(map); 1407 return KERN_FAILURE; 1408 } 1409 1410 if(len <= ((entry->vme_end - entry->vme_start) - 1411 (off - entry->vme_start))) { 1412 vm_map_unlock(map); 1413 return kr; 1414 } else { 1415 len -= (entry->vme_end - entry->vme_start) - 1416 (off - entry->vme_start); 1417 } 1418 base_offset = base_offset + (local_len - len); 1419 file_off = file_off + (local_len - len); 1420 off = base_offset; 1421 if(map != base_map) { 1422 vm_map_unlock(map); 1423 vm_map_lock(base_map); 1424 map = base_map; 1425 } 1426 } 1427 1428 vm_map_unlock(map); 1429 return kr; 1430} 1431