1/* 2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58/* 59 * File: vm/vm_kern.c 60 * Author: Avadis Tevanian, Jr., Michael Wayne Young 61 * Date: 1985 62 * 63 * Kernel memory management. 64 */ 65 66#include <mach/kern_return.h> 67#include <mach/vm_param.h> 68#include <kern/assert.h> 69#include <kern/lock.h> 70#include <kern/thread.h> 71#include <vm/vm_kern.h> 72#include <vm/vm_map.h> 73#include <vm/vm_object.h> 74#include <vm/vm_page.h> 75#include <vm/vm_pageout.h> 76#include <kern/misc_protos.h> 77#include <vm/cpm.h> 78 79#include <string.h> 80 81#include <libkern/OSDebug.h> 82#include <sys/kdebug.h> 83 84/* 85 * Variables exported by this module. 86 */ 87 88vm_map_t kernel_map; 89vm_map_t kernel_pageable_map; 90 91extern boolean_t vm_kernel_ready; 92 93/* 94 * Forward declarations for internal functions. 95 */ 96extern kern_return_t kmem_alloc_pages( 97 register vm_object_t object, 98 register vm_object_offset_t offset, 99 register vm_object_size_t size); 100 101extern void kmem_remap_pages( 102 register vm_object_t object, 103 register vm_object_offset_t offset, 104 register vm_offset_t start, 105 register vm_offset_t end, 106 vm_prot_t protection); 107 108kern_return_t 109kmem_alloc_contig( 110 vm_map_t map, 111 vm_offset_t *addrp, 112 vm_size_t size, 113 vm_offset_t mask, 114 ppnum_t max_pnum, 115 ppnum_t pnum_mask, 116 int flags) 117{ 118 vm_object_t object; 119 vm_object_offset_t offset; 120 vm_map_offset_t map_addr; 121 vm_map_offset_t map_mask; 122 vm_map_size_t map_size, i; 123 vm_map_entry_t entry; 124 vm_page_t m, pages; 125 kern_return_t kr; 126 127 if (map == VM_MAP_NULL || (flags & ~(KMA_KOBJECT | KMA_LOMEM | KMA_NOPAGEWAIT))) 128 return KERN_INVALID_ARGUMENT; 129 130 map_size = vm_map_round_page(size, 131 VM_MAP_PAGE_MASK(map)); 132 map_mask = (vm_map_offset_t)mask; 133 134 /* Check for zero allocation size (either directly or via overflow) */ 135 if (map_size == 0) { 136 *addrp = 0; 137 return KERN_INVALID_ARGUMENT; 138 } 139 140 /* 141 * Allocate a new object (if necessary) and the reference we 142 * will be donating to the map entry. We must do this before 143 * locking the map, or risk deadlock with the default pager. 144 */ 145 if ((flags & KMA_KOBJECT) != 0) { 146 object = kernel_object; 147 vm_object_reference(object); 148 } else { 149 object = vm_object_allocate(map_size); 150 } 151 152 kr = vm_map_find_space(map, &map_addr, map_size, map_mask, 0, &entry); 153 if (KERN_SUCCESS != kr) { 154 vm_object_deallocate(object); 155 return kr; 156 } 157 158 entry->object.vm_object = object; 159 entry->offset = offset = (object == kernel_object) ? 160 map_addr : 0; 161 162 /* Take an extra object ref in case the map entry gets deleted */ 163 vm_object_reference(object); 164 vm_map_unlock(map); 165 166 kr = cpm_allocate(CAST_DOWN(vm_size_t, map_size), &pages, max_pnum, pnum_mask, FALSE, flags); 167 168 if (kr != KERN_SUCCESS) { 169 vm_map_remove(map, 170 vm_map_trunc_page(map_addr, 171 VM_MAP_PAGE_MASK(map)), 172 vm_map_round_page(map_addr + map_size, 173 VM_MAP_PAGE_MASK(map)), 174 0); 175 vm_object_deallocate(object); 176 *addrp = 0; 177 return kr; 178 } 179 180 vm_object_lock(object); 181 for (i = 0; i < map_size; i += PAGE_SIZE) { 182 m = pages; 183 pages = NEXT_PAGE(m); 184 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL; 185 m->busy = FALSE; 186 vm_page_insert(m, object, offset + i); 187 } 188 vm_object_unlock(object); 189 190 kr = vm_map_wire(map, 191 vm_map_trunc_page(map_addr, 192 VM_MAP_PAGE_MASK(map)), 193 vm_map_round_page(map_addr + map_size, 194 VM_MAP_PAGE_MASK(map)), 195 VM_PROT_DEFAULT, 196 FALSE); 197 if (kr != KERN_SUCCESS) { 198 if (object == kernel_object) { 199 vm_object_lock(object); 200 vm_object_page_remove(object, offset, offset + map_size); 201 vm_object_unlock(object); 202 } 203 vm_map_remove(map, 204 vm_map_trunc_page(map_addr, 205 VM_MAP_PAGE_MASK(map)), 206 vm_map_round_page(map_addr + map_size, 207 VM_MAP_PAGE_MASK(map)), 208 0); 209 vm_object_deallocate(object); 210 return kr; 211 } 212 vm_object_deallocate(object); 213 214 if (object == kernel_object) 215 vm_map_simplify(map, map_addr); 216 217 *addrp = (vm_offset_t) map_addr; 218 assert((vm_map_offset_t) *addrp == map_addr); 219 return KERN_SUCCESS; 220} 221 222/* 223 * Master entry point for allocating kernel memory. 224 * NOTE: this routine is _never_ interrupt safe. 225 * 226 * map : map to allocate into 227 * addrp : pointer to start address of new memory 228 * size : size of memory requested 229 * flags : options 230 * KMA_HERE *addrp is base address, else "anywhere" 231 * KMA_NOPAGEWAIT don't wait for pages if unavailable 232 * KMA_KOBJECT use kernel_object 233 * KMA_LOMEM support for 32 bit devices in a 64 bit world 234 * if set and a lomemory pool is available 235 * grab pages from it... this also implies 236 * KMA_NOPAGEWAIT 237 */ 238 239kern_return_t 240kernel_memory_allocate( 241 register vm_map_t map, 242 register vm_offset_t *addrp, 243 register vm_size_t size, 244 register vm_offset_t mask, 245 int flags) 246{ 247 vm_object_t object; 248 vm_object_offset_t offset; 249 vm_object_offset_t pg_offset; 250 vm_map_entry_t entry = NULL; 251 vm_map_offset_t map_addr, fill_start; 252 vm_map_offset_t map_mask; 253 vm_map_size_t map_size, fill_size; 254 kern_return_t kr, pe_result; 255 vm_page_t mem; 256 vm_page_t guard_page_list = NULL; 257 vm_page_t wired_page_list = NULL; 258 int guard_page_count = 0; 259 int wired_page_count = 0; 260 int i; 261 int vm_alloc_flags; 262 vm_prot_t kma_prot; 263 264 if (! vm_kernel_ready) { 265 panic("kernel_memory_allocate: VM is not ready"); 266 } 267 268 map_size = vm_map_round_page(size, 269 VM_MAP_PAGE_MASK(map)); 270 map_mask = (vm_map_offset_t) mask; 271 vm_alloc_flags = 0; 272 273 /* Check for zero allocation size (either directly or via overflow) */ 274 if (map_size == 0) { 275 *addrp = 0; 276 return KERN_INVALID_ARGUMENT; 277 } 278 279 /* 280 * limit the size of a single extent of wired memory 281 * to try and limit the damage to the system if 282 * too many pages get wired down 283 * limit raised to 2GB with 128GB max physical limit 284 */ 285 if (map_size > (1ULL << 31)) { 286 return KERN_RESOURCE_SHORTAGE; 287 } 288 289 /* 290 * Guard pages: 291 * 292 * Guard pages are implemented as ficticious pages. By placing guard pages 293 * on either end of a stack, they can help detect cases where a thread walks 294 * off either end of its stack. They are allocated and set up here and attempts 295 * to access those pages are trapped in vm_fault_page(). 296 * 297 * The map_size we were passed may include extra space for 298 * guard pages. If those were requested, then back it out of fill_size 299 * since vm_map_find_space() takes just the actual size not including 300 * guard pages. Similarly, fill_start indicates where the actual pages 301 * will begin in the range. 302 */ 303 304 fill_start = 0; 305 fill_size = map_size; 306 307 if (flags & KMA_GUARD_FIRST) { 308 vm_alloc_flags |= VM_FLAGS_GUARD_BEFORE; 309 fill_start += PAGE_SIZE_64; 310 fill_size -= PAGE_SIZE_64; 311 if (map_size < fill_start + fill_size) { 312 /* no space for a guard page */ 313 *addrp = 0; 314 return KERN_INVALID_ARGUMENT; 315 } 316 guard_page_count++; 317 } 318 if (flags & KMA_GUARD_LAST) { 319 vm_alloc_flags |= VM_FLAGS_GUARD_AFTER; 320 fill_size -= PAGE_SIZE_64; 321 if (map_size <= fill_start + fill_size) { 322 /* no space for a guard page */ 323 *addrp = 0; 324 return KERN_INVALID_ARGUMENT; 325 } 326 guard_page_count++; 327 } 328 wired_page_count = (int) (fill_size / PAGE_SIZE_64); 329 assert(wired_page_count * PAGE_SIZE_64 == fill_size); 330 331 for (i = 0; i < guard_page_count; i++) { 332 for (;;) { 333 mem = vm_page_grab_guard(); 334 335 if (mem != VM_PAGE_NULL) 336 break; 337 if (flags & KMA_NOPAGEWAIT) { 338 kr = KERN_RESOURCE_SHORTAGE; 339 goto out; 340 } 341 vm_page_more_fictitious(); 342 } 343 mem->pageq.next = (queue_entry_t)guard_page_list; 344 guard_page_list = mem; 345 } 346 347 if (! (flags & KMA_VAONLY)) { 348 for (i = 0; i < wired_page_count; i++) { 349 uint64_t unavailable; 350 351 for (;;) { 352 if (flags & KMA_LOMEM) 353 mem = vm_page_grablo(); 354 else 355 mem = vm_page_grab(); 356 357 if (mem != VM_PAGE_NULL) 358 break; 359 360 if (flags & KMA_NOPAGEWAIT) { 361 kr = KERN_RESOURCE_SHORTAGE; 362 goto out; 363 } 364 if ((flags & KMA_LOMEM) && (vm_lopage_needed == TRUE)) { 365 kr = KERN_RESOURCE_SHORTAGE; 366 goto out; 367 } 368 unavailable = (vm_page_wire_count + vm_page_free_target) * PAGE_SIZE; 369 370 if (unavailable > max_mem || map_size > (max_mem - unavailable)) { 371 kr = KERN_RESOURCE_SHORTAGE; 372 goto out; 373 } 374 VM_PAGE_WAIT(); 375 } 376 mem->pageq.next = (queue_entry_t)wired_page_list; 377 wired_page_list = mem; 378 } 379 } 380 381 /* 382 * Allocate a new object (if necessary). We must do this before 383 * locking the map, or risk deadlock with the default pager. 384 */ 385 if ((flags & KMA_KOBJECT) != 0) { 386 object = kernel_object; 387 vm_object_reference(object); 388 } else if ((flags & KMA_COMPRESSOR) != 0) { 389 object = compressor_object; 390 vm_object_reference(object); 391 } else { 392 object = vm_object_allocate(map_size); 393 } 394 395 kr = vm_map_find_space(map, &map_addr, 396 fill_size, map_mask, 397 vm_alloc_flags, &entry); 398 if (KERN_SUCCESS != kr) { 399 vm_object_deallocate(object); 400 goto out; 401 } 402 403 entry->object.vm_object = object; 404 entry->offset = offset = (object == kernel_object || object == compressor_object) ? 405 map_addr : 0; 406 407 if (object != compressor_object) 408 entry->wired_count++; 409 410 if (flags & KMA_PERMANENT) 411 entry->permanent = TRUE; 412 413 if (object != kernel_object && object != compressor_object) 414 vm_object_reference(object); 415 416 vm_object_lock(object); 417 vm_map_unlock(map); 418 419 pg_offset = 0; 420 421 if (fill_start) { 422 if (guard_page_list == NULL) 423 panic("kernel_memory_allocate: guard_page_list == NULL"); 424 425 mem = guard_page_list; 426 guard_page_list = (vm_page_t)mem->pageq.next; 427 mem->pageq.next = NULL; 428 429 vm_page_insert(mem, object, offset + pg_offset); 430 431 mem->busy = FALSE; 432 pg_offset += PAGE_SIZE_64; 433 } 434 435 kma_prot = VM_PROT_READ | VM_PROT_WRITE; 436 437 if (flags & KMA_VAONLY) { 438 pg_offset = fill_start + fill_size; 439 } else { 440 for (pg_offset = fill_start; pg_offset < fill_start + fill_size; pg_offset += PAGE_SIZE_64) { 441 if (wired_page_list == NULL) 442 panic("kernel_memory_allocate: wired_page_list == NULL"); 443 444 mem = wired_page_list; 445 wired_page_list = (vm_page_t)mem->pageq.next; 446 mem->pageq.next = NULL; 447 mem->wire_count++; 448 449 vm_page_insert(mem, object, offset + pg_offset); 450 451 mem->busy = FALSE; 452 mem->pmapped = TRUE; 453 mem->wpmapped = TRUE; 454 455 PMAP_ENTER_OPTIONS(kernel_pmap, map_addr + pg_offset, mem, 456 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE, 457 PMAP_OPTIONS_NOWAIT, pe_result); 458 459 if (pe_result == KERN_RESOURCE_SHORTAGE) { 460 vm_object_unlock(object); 461 462 PMAP_ENTER(kernel_pmap, map_addr + pg_offset, mem, 463 kma_prot, VM_PROT_NONE, ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE); 464 465 vm_object_lock(object); 466 } 467 if (flags & KMA_NOENCRYPT) { 468 bzero(CAST_DOWN(void *, (map_addr + pg_offset)), PAGE_SIZE); 469 470 pmap_set_noencrypt(mem->phys_page); 471 } 472 } 473 } 474 if ((fill_start + fill_size) < map_size) { 475 if (guard_page_list == NULL) 476 panic("kernel_memory_allocate: guard_page_list == NULL"); 477 478 mem = guard_page_list; 479 guard_page_list = (vm_page_t)mem->pageq.next; 480 mem->pageq.next = NULL; 481 482 vm_page_insert(mem, object, offset + pg_offset); 483 484 mem->busy = FALSE; 485 } 486 if (guard_page_list || wired_page_list) 487 panic("kernel_memory_allocate: non empty list\n"); 488 489 if (! (flags & KMA_VAONLY)) { 490 vm_page_lockspin_queues(); 491 vm_page_wire_count += wired_page_count; 492 vm_page_unlock_queues(); 493 } 494 495 vm_object_unlock(object); 496 497 /* 498 * now that the pages are wired, we no longer have to fear coalesce 499 */ 500 if (object == kernel_object || object == compressor_object) 501 vm_map_simplify(map, map_addr); 502 else 503 vm_object_deallocate(object); 504 505 /* 506 * Return the memory, not zeroed. 507 */ 508 *addrp = CAST_DOWN(vm_offset_t, map_addr); 509 return KERN_SUCCESS; 510 511out: 512 if (guard_page_list) 513 vm_page_free_list(guard_page_list, FALSE); 514 515 if (wired_page_list) 516 vm_page_free_list(wired_page_list, FALSE); 517 518 return kr; 519} 520 521kern_return_t 522kernel_memory_populate( 523 vm_map_t map, 524 vm_offset_t addr, 525 vm_size_t size, 526 int flags) 527{ 528 vm_object_t object; 529 vm_object_offset_t offset, pg_offset; 530 kern_return_t kr, pe_result; 531 vm_page_t mem; 532 vm_page_t page_list = NULL; 533 int page_count = 0; 534 int i; 535 536 page_count = (int) (size / PAGE_SIZE_64); 537 538 assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT)); 539 540 if (flags & KMA_COMPRESSOR) { 541 542 for (i = 0; i < page_count; i++) { 543 for (;;) { 544 mem = vm_page_grab(); 545 546 if (mem != VM_PAGE_NULL) 547 break; 548 549 VM_PAGE_WAIT(); 550 } 551 mem->pageq.next = (queue_entry_t) page_list; 552 page_list = mem; 553 } 554 offset = addr; 555 object = compressor_object; 556 557 vm_object_lock(object); 558 559 for (pg_offset = 0; 560 pg_offset < size; 561 pg_offset += PAGE_SIZE_64) { 562 563 mem = page_list; 564 page_list = (vm_page_t) mem->pageq.next; 565 mem->pageq.next = NULL; 566 567 vm_page_insert(mem, object, offset + pg_offset); 568 assert(mem->busy); 569 570 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem, 571 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 572 0, TRUE, PMAP_OPTIONS_NOWAIT, pe_result); 573 574 if (pe_result == KERN_RESOURCE_SHORTAGE) { 575 576 vm_object_unlock(object); 577 578 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem, 579 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, TRUE); 580 581 vm_object_lock(object); 582 } 583 mem->busy = FALSE; 584 mem->pmapped = TRUE; 585 mem->wpmapped = TRUE; 586 mem->compressor = TRUE; 587 } 588 vm_object_unlock(object); 589 590 return KERN_SUCCESS; 591 } 592 593 for (i = 0; i < page_count; i++) { 594 for (;;) { 595 if (flags & KMA_LOMEM) 596 mem = vm_page_grablo(); 597 else 598 mem = vm_page_grab(); 599 600 if (mem != VM_PAGE_NULL) 601 break; 602 603 if (flags & KMA_NOPAGEWAIT) { 604 kr = KERN_RESOURCE_SHORTAGE; 605 goto out; 606 } 607 if ((flags & KMA_LOMEM) && 608 (vm_lopage_needed == TRUE)) { 609 kr = KERN_RESOURCE_SHORTAGE; 610 goto out; 611 } 612 VM_PAGE_WAIT(); 613 } 614 mem->pageq.next = (queue_entry_t) page_list; 615 page_list = mem; 616 } 617 if (flags & KMA_KOBJECT) { 618 offset = addr; 619 object = kernel_object; 620 621 vm_object_lock(object); 622 } else { 623 /* 624 * If it's not the kernel object, we need to: 625 * lock map; 626 * lookup entry; 627 * lock object; 628 * take reference on object; 629 * unlock map; 630 */ 631 panic("kernel_memory_populate(%p,0x%llx,0x%llx,0x%x): " 632 "!KMA_KOBJECT", 633 map, (uint64_t) addr, (uint64_t) size, flags); 634 } 635 636 for (pg_offset = 0; 637 pg_offset < size; 638 pg_offset += PAGE_SIZE_64) { 639 640 if (page_list == NULL) 641 panic("kernel_memory_populate: page_list == NULL"); 642 643 mem = page_list; 644 page_list = (vm_page_t) mem->pageq.next; 645 mem->pageq.next = NULL; 646 647 mem->wire_count++; 648 649 vm_page_insert(mem, object, offset + pg_offset); 650 651 mem->busy = FALSE; 652 mem->pmapped = TRUE; 653 mem->wpmapped = TRUE; 654 655 PMAP_ENTER_OPTIONS(kernel_pmap, addr + pg_offset, mem, 656 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 657 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE, 658 PMAP_OPTIONS_NOWAIT, pe_result); 659 660 if (pe_result == KERN_RESOURCE_SHORTAGE) { 661 662 vm_object_unlock(object); 663 664 PMAP_ENTER(kernel_pmap, addr + pg_offset, mem, 665 VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 666 ((flags & KMA_KSTACK) ? VM_MEM_STACK : 0), TRUE); 667 668 vm_object_lock(object); 669 } 670 if (flags & KMA_NOENCRYPT) { 671 bzero(CAST_DOWN(void *, (addr + pg_offset)), PAGE_SIZE); 672 pmap_set_noencrypt(mem->phys_page); 673 } 674 } 675 vm_page_lock_queues(); 676 vm_page_wire_count += page_count; 677 vm_page_unlock_queues(); 678 679 vm_object_unlock(object); 680 681 return KERN_SUCCESS; 682 683out: 684 if (page_list) 685 vm_page_free_list(page_list, FALSE); 686 687 return kr; 688} 689 690 691void 692kernel_memory_depopulate( 693 vm_map_t map, 694 vm_offset_t addr, 695 vm_size_t size, 696 int flags) 697{ 698 vm_object_t object; 699 vm_object_offset_t offset, pg_offset; 700 vm_page_t mem; 701 vm_page_t local_freeq = NULL; 702 703 assert((flags & (KMA_COMPRESSOR|KMA_KOBJECT)) != (KMA_COMPRESSOR|KMA_KOBJECT)); 704 705 if (flags & KMA_COMPRESSOR) { 706 offset = addr; 707 object = compressor_object; 708 709 vm_object_lock(object); 710 } else if (flags & KMA_KOBJECT) { 711 offset = addr; 712 object = kernel_object; 713 714 vm_object_lock(object); 715 } else { 716 offset = 0; 717 object = NULL; 718 /* 719 * If it's not the kernel object, we need to: 720 * lock map; 721 * lookup entry; 722 * lock object; 723 * unlock map; 724 */ 725 panic("kernel_memory_depopulate(%p,0x%llx,0x%llx,0x%x): " 726 "!KMA_KOBJECT", 727 map, (uint64_t) addr, (uint64_t) size, flags); 728 } 729 pmap_protect(kernel_map->pmap, offset, offset + size, VM_PROT_NONE); 730 731 for (pg_offset = 0; 732 pg_offset < size; 733 pg_offset += PAGE_SIZE_64) { 734 735 mem = vm_page_lookup(object, offset + pg_offset); 736 737 assert(mem); 738 739 pmap_disconnect(mem->phys_page); 740 741 mem->busy = TRUE; 742 743 assert(mem->tabled); 744 vm_page_remove(mem, TRUE); 745 assert(mem->busy); 746 747 assert(mem->pageq.next == NULL && 748 mem->pageq.prev == NULL); 749 mem->pageq.next = (queue_entry_t)local_freeq; 750 local_freeq = mem; 751 } 752 vm_object_unlock(object); 753 754 if (local_freeq) 755 vm_page_free_list(local_freeq, TRUE); 756} 757 758/* 759 * kmem_alloc: 760 * 761 * Allocate wired-down memory in the kernel's address map 762 * or a submap. The memory is not zero-filled. 763 */ 764 765kern_return_t 766kmem_alloc( 767 vm_map_t map, 768 vm_offset_t *addrp, 769 vm_size_t size) 770{ 771 kern_return_t kr = kernel_memory_allocate(map, addrp, size, 0, 0); 772 TRACE_MACHLEAKS(KMEM_ALLOC_CODE, KMEM_ALLOC_CODE_2, size, *addrp); 773 return kr; 774} 775 776/* 777 * kmem_realloc: 778 * 779 * Reallocate wired-down memory in the kernel's address map 780 * or a submap. Newly allocated pages are not zeroed. 781 * This can only be used on regions allocated with kmem_alloc. 782 * 783 * If successful, the pages in the old region are mapped twice. 784 * The old region is unchanged. Use kmem_free to get rid of it. 785 */ 786kern_return_t 787kmem_realloc( 788 vm_map_t map, 789 vm_offset_t oldaddr, 790 vm_size_t oldsize, 791 vm_offset_t *newaddrp, 792 vm_size_t newsize) 793{ 794 vm_object_t object; 795 vm_object_offset_t offset; 796 vm_map_offset_t oldmapmin; 797 vm_map_offset_t oldmapmax; 798 vm_map_offset_t newmapaddr; 799 vm_map_size_t oldmapsize; 800 vm_map_size_t newmapsize; 801 vm_map_entry_t oldentry; 802 vm_map_entry_t newentry; 803 vm_page_t mem; 804 kern_return_t kr; 805 806 oldmapmin = vm_map_trunc_page(oldaddr, 807 VM_MAP_PAGE_MASK(map)); 808 oldmapmax = vm_map_round_page(oldaddr + oldsize, 809 VM_MAP_PAGE_MASK(map)); 810 oldmapsize = oldmapmax - oldmapmin; 811 newmapsize = vm_map_round_page(newsize, 812 VM_MAP_PAGE_MASK(map)); 813 814 815 /* 816 * Find the VM object backing the old region. 817 */ 818 819 vm_map_lock(map); 820 821 if (!vm_map_lookup_entry(map, oldmapmin, &oldentry)) 822 panic("kmem_realloc"); 823 object = oldentry->object.vm_object; 824 825 /* 826 * Increase the size of the object and 827 * fill in the new region. 828 */ 829 830 vm_object_reference(object); 831 /* by grabbing the object lock before unlocking the map */ 832 /* we guarantee that we will panic if more than one */ 833 /* attempt is made to realloc a kmem_alloc'd area */ 834 vm_object_lock(object); 835 vm_map_unlock(map); 836 if (object->vo_size != oldmapsize) 837 panic("kmem_realloc"); 838 object->vo_size = newmapsize; 839 vm_object_unlock(object); 840 841 /* allocate the new pages while expanded portion of the */ 842 /* object is still not mapped */ 843 kmem_alloc_pages(object, vm_object_round_page(oldmapsize), 844 vm_object_round_page(newmapsize-oldmapsize)); 845 846 /* 847 * Find space for the new region. 848 */ 849 850 kr = vm_map_find_space(map, &newmapaddr, newmapsize, 851 (vm_map_offset_t) 0, 0, &newentry); 852 if (kr != KERN_SUCCESS) { 853 vm_object_lock(object); 854 for(offset = oldmapsize; 855 offset < newmapsize; offset += PAGE_SIZE) { 856 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { 857 VM_PAGE_FREE(mem); 858 } 859 } 860 object->vo_size = oldmapsize; 861 vm_object_unlock(object); 862 vm_object_deallocate(object); 863 return kr; 864 } 865 newentry->object.vm_object = object; 866 newentry->offset = 0; 867 assert (newentry->wired_count == 0); 868 869 870 /* add an extra reference in case we have someone doing an */ 871 /* unexpected deallocate */ 872 vm_object_reference(object); 873 vm_map_unlock(map); 874 875 kr = vm_map_wire(map, newmapaddr, newmapaddr + newmapsize, VM_PROT_DEFAULT, FALSE); 876 if (KERN_SUCCESS != kr) { 877 vm_map_remove(map, newmapaddr, newmapaddr + newmapsize, 0); 878 vm_object_lock(object); 879 for(offset = oldsize; offset < newmapsize; offset += PAGE_SIZE) { 880 if ((mem = vm_page_lookup(object, offset)) != VM_PAGE_NULL) { 881 VM_PAGE_FREE(mem); 882 } 883 } 884 object->vo_size = oldmapsize; 885 vm_object_unlock(object); 886 vm_object_deallocate(object); 887 return (kr); 888 } 889 vm_object_deallocate(object); 890 891 *newaddrp = CAST_DOWN(vm_offset_t, newmapaddr); 892 return KERN_SUCCESS; 893} 894 895/* 896 * kmem_alloc_kobject: 897 * 898 * Allocate wired-down memory in the kernel's address map 899 * or a submap. The memory is not zero-filled. 900 * 901 * The memory is allocated in the kernel_object. 902 * It may not be copied with vm_map_copy, and 903 * it may not be reallocated with kmem_realloc. 904 */ 905 906kern_return_t 907kmem_alloc_kobject( 908 vm_map_t map, 909 vm_offset_t *addrp, 910 vm_size_t size) 911{ 912 return kernel_memory_allocate(map, addrp, size, 0, KMA_KOBJECT); 913} 914 915/* 916 * kmem_alloc_aligned: 917 * 918 * Like kmem_alloc_kobject, except that the memory is aligned. 919 * The size should be a power-of-2. 920 */ 921 922kern_return_t 923kmem_alloc_aligned( 924 vm_map_t map, 925 vm_offset_t *addrp, 926 vm_size_t size) 927{ 928 if ((size & (size - 1)) != 0) 929 panic("kmem_alloc_aligned: size not aligned"); 930 return kernel_memory_allocate(map, addrp, size, size - 1, KMA_KOBJECT); 931} 932 933/* 934 * kmem_alloc_pageable: 935 * 936 * Allocate pageable memory in the kernel's address map. 937 */ 938 939kern_return_t 940kmem_alloc_pageable( 941 vm_map_t map, 942 vm_offset_t *addrp, 943 vm_size_t size) 944{ 945 vm_map_offset_t map_addr; 946 vm_map_size_t map_size; 947 kern_return_t kr; 948 949#ifndef normal 950 map_addr = (vm_map_min(map)) + 0x1000; 951#else 952 map_addr = vm_map_min(map); 953#endif 954 map_size = vm_map_round_page(size, 955 VM_MAP_PAGE_MASK(map)); 956 957 kr = vm_map_enter(map, &map_addr, map_size, 958 (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE, 959 VM_OBJECT_NULL, (vm_object_offset_t) 0, FALSE, 960 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 961 962 if (kr != KERN_SUCCESS) 963 return kr; 964 965 *addrp = CAST_DOWN(vm_offset_t, map_addr); 966 return KERN_SUCCESS; 967} 968 969/* 970 * kmem_free: 971 * 972 * Release a region of kernel virtual memory allocated 973 * with kmem_alloc, kmem_alloc_kobject, or kmem_alloc_pageable, 974 * and return the physical pages associated with that region. 975 */ 976 977void 978kmem_free( 979 vm_map_t map, 980 vm_offset_t addr, 981 vm_size_t size) 982{ 983 kern_return_t kr; 984 985 assert(addr >= VM_MIN_KERNEL_AND_KEXT_ADDRESS); 986 987 TRACE_MACHLEAKS(KMEM_FREE_CODE, KMEM_FREE_CODE_2, size, addr); 988 989 if(size == 0) { 990#if MACH_ASSERT 991 printf("kmem_free called with size==0 for map: %p with addr: 0x%llx\n",map,(uint64_t)addr); 992#endif 993 return; 994 } 995 996 kr = vm_map_remove(map, 997 vm_map_trunc_page(addr, 998 VM_MAP_PAGE_MASK(map)), 999 vm_map_round_page(addr + size, 1000 VM_MAP_PAGE_MASK(map)), 1001 VM_MAP_REMOVE_KUNWIRE); 1002 if (kr != KERN_SUCCESS) 1003 panic("kmem_free"); 1004} 1005 1006/* 1007 * Allocate new pages in an object. 1008 */ 1009 1010kern_return_t 1011kmem_alloc_pages( 1012 register vm_object_t object, 1013 register vm_object_offset_t offset, 1014 register vm_object_size_t size) 1015{ 1016 vm_object_size_t alloc_size; 1017 1018 alloc_size = vm_object_round_page(size); 1019 vm_object_lock(object); 1020 while (alloc_size) { 1021 register vm_page_t mem; 1022 1023 1024 /* 1025 * Allocate a page 1026 */ 1027 while (VM_PAGE_NULL == 1028 (mem = vm_page_alloc(object, offset))) { 1029 vm_object_unlock(object); 1030 VM_PAGE_WAIT(); 1031 vm_object_lock(object); 1032 } 1033 mem->busy = FALSE; 1034 1035 alloc_size -= PAGE_SIZE; 1036 offset += PAGE_SIZE; 1037 } 1038 vm_object_unlock(object); 1039 return KERN_SUCCESS; 1040} 1041 1042/* 1043 * Remap wired pages in an object into a new region. 1044 * The object is assumed to be mapped into the kernel map or 1045 * a submap. 1046 */ 1047void 1048kmem_remap_pages( 1049 register vm_object_t object, 1050 register vm_object_offset_t offset, 1051 register vm_offset_t start, 1052 register vm_offset_t end, 1053 vm_prot_t protection) 1054{ 1055 1056 vm_map_offset_t map_start; 1057 vm_map_offset_t map_end; 1058 1059 /* 1060 * Mark the pmap region as not pageable. 1061 */ 1062 map_start = vm_map_trunc_page(start, 1063 VM_MAP_PAGE_MASK(kernel_map)); 1064 map_end = vm_map_round_page(end, 1065 VM_MAP_PAGE_MASK(kernel_map)); 1066 1067 pmap_pageable(kernel_pmap, map_start, map_end, FALSE); 1068 1069 while (map_start < map_end) { 1070 register vm_page_t mem; 1071 1072 vm_object_lock(object); 1073 1074 /* 1075 * Find a page 1076 */ 1077 if ((mem = vm_page_lookup(object, offset)) == VM_PAGE_NULL) 1078 panic("kmem_remap_pages"); 1079 1080 /* 1081 * Wire it down (again) 1082 */ 1083 vm_page_lockspin_queues(); 1084 vm_page_wire(mem); 1085 vm_page_unlock_queues(); 1086 vm_object_unlock(object); 1087 1088 /* 1089 * ENCRYPTED SWAP: 1090 * The page is supposed to be wired now, so it 1091 * shouldn't be encrypted at this point. It can 1092 * safely be entered in the page table. 1093 */ 1094 ASSERT_PAGE_DECRYPTED(mem); 1095 1096 /* 1097 * Enter it in the kernel pmap. The page isn't busy, 1098 * but this shouldn't be a problem because it is wired. 1099 */ 1100 1101 mem->pmapped = TRUE; 1102 mem->wpmapped = TRUE; 1103 1104 PMAP_ENTER(kernel_pmap, map_start, mem, protection, VM_PROT_NONE, 0, TRUE); 1105 1106 map_start += PAGE_SIZE; 1107 offset += PAGE_SIZE; 1108 } 1109} 1110 1111/* 1112 * kmem_suballoc: 1113 * 1114 * Allocates a map to manage a subrange 1115 * of the kernel virtual address space. 1116 * 1117 * Arguments are as follows: 1118 * 1119 * parent Map to take range from 1120 * addr Address of start of range (IN/OUT) 1121 * size Size of range to find 1122 * pageable Can region be paged 1123 * anywhere Can region be located anywhere in map 1124 * new_map Pointer to new submap 1125 */ 1126kern_return_t 1127kmem_suballoc( 1128 vm_map_t parent, 1129 vm_offset_t *addr, 1130 vm_size_t size, 1131 boolean_t pageable, 1132 int flags, 1133 vm_map_t *new_map) 1134{ 1135 vm_map_t map; 1136 vm_map_offset_t map_addr; 1137 vm_map_size_t map_size; 1138 kern_return_t kr; 1139 1140 map_size = vm_map_round_page(size, 1141 VM_MAP_PAGE_MASK(parent)); 1142 1143 /* 1144 * Need reference on submap object because it is internal 1145 * to the vm_system. vm_object_enter will never be called 1146 * on it (usual source of reference for vm_map_enter). 1147 */ 1148 vm_object_reference(vm_submap_object); 1149 1150 map_addr = ((flags & VM_FLAGS_ANYWHERE) 1151 ? vm_map_min(parent) 1152 : vm_map_trunc_page(*addr, 1153 VM_MAP_PAGE_MASK(parent))); 1154 1155 kr = vm_map_enter(parent, &map_addr, map_size, 1156 (vm_map_offset_t) 0, flags, 1157 vm_submap_object, (vm_object_offset_t) 0, FALSE, 1158 VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT); 1159 if (kr != KERN_SUCCESS) { 1160 vm_object_deallocate(vm_submap_object); 1161 return (kr); 1162 } 1163 1164 pmap_reference(vm_map_pmap(parent)); 1165 map = vm_map_create(vm_map_pmap(parent), map_addr, map_addr + map_size, pageable); 1166 if (map == VM_MAP_NULL) 1167 panic("kmem_suballoc: vm_map_create failed"); /* "can't happen" */ 1168 /* inherit the parent map's page size */ 1169 vm_map_set_page_shift(map, VM_MAP_PAGE_SHIFT(parent)); 1170 1171 kr = vm_map_submap(parent, map_addr, map_addr + map_size, map, map_addr, FALSE); 1172 if (kr != KERN_SUCCESS) { 1173 /* 1174 * See comment preceding vm_map_submap(). 1175 */ 1176 vm_map_remove(parent, map_addr, map_addr + map_size, VM_MAP_NO_FLAGS); 1177 vm_map_deallocate(map); /* also removes ref to pmap */ 1178 vm_object_deallocate(vm_submap_object); 1179 return (kr); 1180 } 1181 *addr = CAST_DOWN(vm_offset_t, map_addr); 1182 *new_map = map; 1183 return (KERN_SUCCESS); 1184} 1185 1186/* 1187 * kmem_init: 1188 * 1189 * Initialize the kernel's virtual memory map, taking 1190 * into account all memory allocated up to this time. 1191 */ 1192void 1193kmem_init( 1194 vm_offset_t start, 1195 vm_offset_t end) 1196{ 1197 vm_map_offset_t map_start; 1198 vm_map_offset_t map_end; 1199 1200 map_start = vm_map_trunc_page(start, 1201 VM_MAP_PAGE_MASK(kernel_map)); 1202 map_end = vm_map_round_page(end, 1203 VM_MAP_PAGE_MASK(kernel_map)); 1204 1205 kernel_map = vm_map_create(pmap_kernel(),VM_MIN_KERNEL_AND_KEXT_ADDRESS, 1206 map_end, FALSE); 1207 /* 1208 * Reserve virtual memory allocated up to this time. 1209 */ 1210 if (start != VM_MIN_KERNEL_AND_KEXT_ADDRESS) { 1211 vm_map_offset_t map_addr; 1212 kern_return_t kr; 1213 1214 map_addr = VM_MIN_KERNEL_AND_KEXT_ADDRESS; 1215 kr = vm_map_enter(kernel_map, 1216 &map_addr, 1217 (vm_map_size_t)(map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS), 1218 (vm_map_offset_t) 0, 1219 VM_FLAGS_FIXED | VM_FLAGS_NO_PMAP_CHECK, 1220 VM_OBJECT_NULL, 1221 (vm_object_offset_t) 0, FALSE, 1222 VM_PROT_NONE, VM_PROT_NONE, 1223 VM_INHERIT_DEFAULT); 1224 1225 if (kr != KERN_SUCCESS) { 1226 panic("kmem_init(0x%llx,0x%llx): vm_map_enter(0x%llx,0x%llx) error 0x%x\n", 1227 (uint64_t) start, (uint64_t) end, 1228 (uint64_t) VM_MIN_KERNEL_AND_KEXT_ADDRESS, 1229 (uint64_t) (map_start - VM_MIN_KERNEL_AND_KEXT_ADDRESS), 1230 kr); 1231 } 1232 } 1233 1234 /* 1235 * Set the default global user wire limit which limits the amount of 1236 * memory that can be locked via mlock(). We set this to the total 1237 * amount of memory that are potentially usable by a user app (max_mem) 1238 * minus a certain amount. This can be overridden via a sysctl. 1239 */ 1240 vm_global_no_user_wire_amount = MIN(max_mem*20/100, 1241 VM_NOT_USER_WIREABLE); 1242 vm_global_user_wire_limit = max_mem - vm_global_no_user_wire_amount; 1243 1244 /* the default per user limit is the same as the global limit */ 1245 vm_user_wire_limit = vm_global_user_wire_limit; 1246} 1247 1248 1249/* 1250 * Routine: copyinmap 1251 * Purpose: 1252 * Like copyin, except that fromaddr is an address 1253 * in the specified VM map. This implementation 1254 * is incomplete; it handles the current user map 1255 * and the kernel map/submaps. 1256 */ 1257kern_return_t 1258copyinmap( 1259 vm_map_t map, 1260 vm_map_offset_t fromaddr, 1261 void *todata, 1262 vm_size_t length) 1263{ 1264 kern_return_t kr = KERN_SUCCESS; 1265 vm_map_t oldmap; 1266 1267 if (vm_map_pmap(map) == pmap_kernel()) 1268 { 1269 /* assume a correct copy */ 1270 memcpy(todata, CAST_DOWN(void *, fromaddr), length); 1271 } 1272 else if (current_map() == map) 1273 { 1274 if (copyin(fromaddr, todata, length) != 0) 1275 kr = KERN_INVALID_ADDRESS; 1276 } 1277 else 1278 { 1279 vm_map_reference(map); 1280 oldmap = vm_map_switch(map); 1281 if (copyin(fromaddr, todata, length) != 0) 1282 kr = KERN_INVALID_ADDRESS; 1283 vm_map_switch(oldmap); 1284 vm_map_deallocate(map); 1285 } 1286 return kr; 1287} 1288 1289/* 1290 * Routine: copyoutmap 1291 * Purpose: 1292 * Like copyout, except that toaddr is an address 1293 * in the specified VM map. This implementation 1294 * is incomplete; it handles the current user map 1295 * and the kernel map/submaps. 1296 */ 1297kern_return_t 1298copyoutmap( 1299 vm_map_t map, 1300 void *fromdata, 1301 vm_map_address_t toaddr, 1302 vm_size_t length) 1303{ 1304 if (vm_map_pmap(map) == pmap_kernel()) { 1305 /* assume a correct copy */ 1306 memcpy(CAST_DOWN(void *, toaddr), fromdata, length); 1307 return KERN_SUCCESS; 1308 } 1309 1310 if (current_map() != map) 1311 return KERN_NOT_SUPPORTED; 1312 1313 if (copyout(fromdata, toaddr, length) != 0) 1314 return KERN_INVALID_ADDRESS; 1315 1316 return KERN_SUCCESS; 1317} 1318 1319 1320kern_return_t 1321vm_conflict_check( 1322 vm_map_t map, 1323 vm_map_offset_t off, 1324 vm_map_size_t len, 1325 memory_object_t pager, 1326 vm_object_offset_t file_off) 1327{ 1328 vm_map_entry_t entry; 1329 vm_object_t obj; 1330 vm_object_offset_t obj_off; 1331 vm_map_t base_map; 1332 vm_map_offset_t base_offset; 1333 vm_map_offset_t original_offset; 1334 kern_return_t kr; 1335 vm_map_size_t local_len; 1336 1337 base_map = map; 1338 base_offset = off; 1339 original_offset = off; 1340 kr = KERN_SUCCESS; 1341 vm_map_lock(map); 1342 while(vm_map_lookup_entry(map, off, &entry)) { 1343 local_len = len; 1344 1345 if (entry->object.vm_object == VM_OBJECT_NULL) { 1346 vm_map_unlock(map); 1347 return KERN_SUCCESS; 1348 } 1349 if (entry->is_sub_map) { 1350 vm_map_t old_map; 1351 1352 old_map = map; 1353 vm_map_lock(entry->object.sub_map); 1354 map = entry->object.sub_map; 1355 off = entry->offset + (off - entry->vme_start); 1356 vm_map_unlock(old_map); 1357 continue; 1358 } 1359 obj = entry->object.vm_object; 1360 obj_off = (off - entry->vme_start) + entry->offset; 1361 while(obj->shadow) { 1362 obj_off += obj->vo_shadow_offset; 1363 obj = obj->shadow; 1364 } 1365 if((obj->pager_created) && (obj->pager == pager)) { 1366 if(((obj->paging_offset) + obj_off) == file_off) { 1367 if(off != base_offset) { 1368 vm_map_unlock(map); 1369 return KERN_FAILURE; 1370 } 1371 kr = KERN_ALREADY_WAITING; 1372 } else { 1373 vm_object_offset_t obj_off_aligned; 1374 vm_object_offset_t file_off_aligned; 1375 1376 obj_off_aligned = obj_off & ~PAGE_MASK; 1377 file_off_aligned = file_off & ~PAGE_MASK; 1378 1379 if (file_off_aligned == (obj->paging_offset + obj_off_aligned)) { 1380 /* 1381 * the target map and the file offset start in the same page 1382 * but are not identical... 1383 */ 1384 vm_map_unlock(map); 1385 return KERN_FAILURE; 1386 } 1387 if ((file_off < (obj->paging_offset + obj_off_aligned)) && 1388 ((file_off + len) > (obj->paging_offset + obj_off_aligned))) { 1389 /* 1390 * some portion of the tail of the I/O will fall 1391 * within the encompass of the target map 1392 */ 1393 vm_map_unlock(map); 1394 return KERN_FAILURE; 1395 } 1396 if ((file_off_aligned > (obj->paging_offset + obj_off)) && 1397 (file_off_aligned < (obj->paging_offset + obj_off) + len)) { 1398 /* 1399 * the beginning page of the file offset falls within 1400 * the target map's encompass 1401 */ 1402 vm_map_unlock(map); 1403 return KERN_FAILURE; 1404 } 1405 } 1406 } else if(kr != KERN_SUCCESS) { 1407 vm_map_unlock(map); 1408 return KERN_FAILURE; 1409 } 1410 1411 if(len <= ((entry->vme_end - entry->vme_start) - 1412 (off - entry->vme_start))) { 1413 vm_map_unlock(map); 1414 return kr; 1415 } else { 1416 len -= (entry->vme_end - entry->vme_start) - 1417 (off - entry->vme_start); 1418 } 1419 base_offset = base_offset + (local_len - len); 1420 file_off = file_off + (local_len - len); 1421 off = base_offset; 1422 if(map != base_map) { 1423 vm_map_unlock(map); 1424 vm_map_lock(base_map); 1425 map = base_map; 1426 } 1427 } 1428 1429 vm_map_unlock(map); 1430 return kr; 1431} 1432