1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58 59/* 60 * File: pmap.c 61 * Author: Avadis Tevanian, Jr., Michael Wayne Young 62 * (These guys wrote the Vax version) 63 * 64 * Physical Map management code for Intel i386, i486, and i860. 65 * 66 * Manages physical address maps. 67 * 68 * In addition to hardware address maps, this 69 * module is called upon to provide software-use-only 70 * maps which may or may not be stored in the same 71 * form as hardware maps. These pseudo-maps are 72 * used to store intermediate results from copy 73 * operations to and from address spaces. 74 * 75 * Since the information managed by this module is 76 * also stored by the logical address mapping module, 77 * this module may throw away valid virtual-to-physical 78 * mappings at almost any time. However, invalidations 79 * of virtual-to-physical mappings must be done as 80 * requested. 81 * 82 * In order to cope with hardware architectures which 83 * make virtual-to-physical map invalidates expensive, 84 * this module may delay invalidate or reduced protection 85 * operations until such time as they are actually 86 * necessary. This module is given full information as 87 * to which processors are currently using which maps, 88 * and to when physical maps must be made correct. 89 */ 90 91#include <string.h> 92#include <mach_ldebug.h> 93 94#include <libkern/OSAtomic.h> 95 96#include <mach/machine/vm_types.h> 97 98#include <mach/boolean.h> 99#include <kern/thread.h> 100#include <kern/zalloc.h> 101#include <kern/queue.h> 102#include <kern/ledger.h> 103#include <kern/mach_param.h> 104 105#include <kern/kalloc.h> 106#include <kern/spl.h> 107 108#include <vm/pmap.h> 109#include <vm/vm_map.h> 110#include <vm/vm_kern.h> 111#include <mach/vm_param.h> 112#include <mach/vm_prot.h> 113#include <vm/vm_object.h> 114#include <vm/vm_page.h> 115 116#include <mach/machine/vm_param.h> 117#include <machine/thread.h> 118 119#include <kern/misc_protos.h> /* prototyping */ 120#include <i386/misc_protos.h> 121#include <i386/i386_lowmem.h> 122#include <x86_64/lowglobals.h> 123 124#include <i386/cpuid.h> 125#include <i386/cpu_data.h> 126#include <i386/cpu_number.h> 127#include <i386/machine_cpu.h> 128#include <i386/seg.h> 129#include <i386/serial_io.h> 130#include <i386/cpu_capabilities.h> 131#include <i386/machine_routines.h> 132#include <i386/proc_reg.h> 133#include <i386/tsc.h> 134#include <i386/pmap_internal.h> 135#include <i386/pmap_pcid.h> 136 137#include <vm/vm_protos.h> 138 139#include <i386/mp.h> 140#include <i386/mp_desc.h> 141#include <libkern/kernel_mach_header.h> 142 143#include <pexpert/i386/efi.h> 144 145 146#ifdef IWANTTODEBUG 147#undef DEBUG 148#define DEBUG 1 149#define POSTCODE_DELAY 1 150#include <i386/postcode.h> 151#endif /* IWANTTODEBUG */ 152 153#ifdef PMAP_DEBUG 154#define DBG(x...) kprintf("DBG: " x) 155#else 156#define DBG(x...) 157#endif 158/* Compile time assert to ensure adjacency/alignment of per-CPU data fields used 159 * in the trampolines for kernel/user boundary TLB coherency. 160 */ 161char pmap_cpu_data_assert[(((offsetof(cpu_data_t, cpu_tlb_invalid) - offsetof(cpu_data_t, cpu_active_cr3)) == 8) && (offsetof(cpu_data_t, cpu_active_cr3) % 64 == 0)) ? 1 : -1]; 162boolean_t pmap_trace = FALSE; 163 164boolean_t no_shared_cr3 = DEBUG; /* TRUE for DEBUG by default */ 165 166int nx_enabled = 1; /* enable no-execute protection */ 167int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */ 168int allow_stack_exec = 0; /* No apps may execute from the stack by default */ 169 170const boolean_t cpu_64bit = TRUE; /* Mais oui! */ 171 172uint64_t max_preemption_latency_tsc = 0; 173 174pv_hashed_entry_t *pv_hash_table; /* hash lists */ 175 176uint32_t npvhashmask = 0, npvhashbuckets = 0; 177 178pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL; 179pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL; 180decl_simple_lock_data(,pv_hashed_free_list_lock) 181decl_simple_lock_data(,pv_hashed_kern_free_list_lock) 182decl_simple_lock_data(,pv_hash_table_lock) 183 184decl_simple_lock_data(,phys_backup_lock) 185 186zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */ 187 188/* 189 * First and last physical addresses that we maintain any information 190 * for. Initialized to zero so that pmap operations done before 191 * pmap_init won't touch any non-existent structures. 192 */ 193boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */ 194 195static struct vm_object kptobj_object_store; 196static struct vm_object kpml4obj_object_store; 197static struct vm_object kpdptobj_object_store; 198 199/* 200 * Array of physical page attribites for managed pages. 201 * One byte per physical page. 202 */ 203char *pmap_phys_attributes; 204ppnum_t last_managed_page = 0; 205 206/* 207 * Amount of virtual memory mapped by one 208 * page-directory entry. 209 */ 210 211uint64_t pde_mapped_size = PDE_MAPPED_SIZE; 212 213unsigned pmap_memory_region_count; 214unsigned pmap_memory_region_current; 215 216pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; 217 218/* 219 * Other useful macros. 220 */ 221#define current_pmap() (vm_map_pmap(current_thread()->map)) 222 223struct pmap kernel_pmap_store; 224pmap_t kernel_pmap; 225 226struct zone *pmap_zone; /* zone of pmap structures */ 227 228struct zone *pmap_anchor_zone; 229int pmap_debug = 0; /* flag for debugging prints */ 230 231unsigned int inuse_ptepages_count = 0; 232long long alloc_ptepages_count __attribute__((aligned(8))) = 0; /* aligned for atomic access */ 233unsigned int bootstrap_wired_pages = 0; 234int pt_fake_zone_index = -1; 235 236extern long NMIPI_acks; 237 238boolean_t kernel_text_ps_4K = TRUE; 239boolean_t wpkernel = TRUE; 240 241extern char end; 242 243static int nkpt; 244 245pt_entry_t *DMAP1, *DMAP2; 246caddr_t DADDR1; 247caddr_t DADDR2; 248 249const boolean_t pmap_disable_kheap_nx = FALSE; 250const boolean_t pmap_disable_kstack_nx = FALSE; 251extern boolean_t doconstro_override; 252 253extern long __stack_chk_guard[]; 254 255/* 256 * Map memory at initialization. The physical addresses being 257 * mapped are not managed and are never unmapped. 258 * 259 * For now, VM is already on, we only need to map the 260 * specified memory. 261 */ 262vm_offset_t 263pmap_map( 264 vm_offset_t virt, 265 vm_map_offset_t start_addr, 266 vm_map_offset_t end_addr, 267 vm_prot_t prot, 268 unsigned int flags) 269{ 270 int ps; 271 272 ps = PAGE_SIZE; 273 while (start_addr < end_addr) { 274 pmap_enter(kernel_pmap, (vm_map_offset_t)virt, 275 (ppnum_t) i386_btop(start_addr), prot, VM_PROT_NONE, flags, TRUE); 276 virt += ps; 277 start_addr += ps; 278 } 279 return(virt); 280} 281 282extern char *first_avail; 283extern vm_offset_t virtual_avail, virtual_end; 284extern pmap_paddr_t avail_start, avail_end; 285extern vm_offset_t sHIB; 286extern vm_offset_t eHIB; 287extern vm_offset_t stext; 288extern vm_offset_t etext; 289extern vm_offset_t sdata, edata; 290extern vm_offset_t sconstdata, econstdata; 291 292extern void *KPTphys; 293 294boolean_t pmap_smep_enabled = FALSE; 295boolean_t pmap_smap_enabled = FALSE; 296 297void 298pmap_cpu_init(void) 299{ 300 cpu_data_t *cdp = current_cpu_datap(); 301 /* 302 * Here early in the life of a processor (from cpu_mode_init()). 303 * Ensure global page feature is disabled at this point. 304 */ 305 306 set_cr4(get_cr4() &~ CR4_PGE); 307 308 /* 309 * Initialize the per-cpu, TLB-related fields. 310 */ 311 cdp->cpu_kernel_cr3 = kernel_pmap->pm_cr3; 312 cdp->cpu_active_cr3 = kernel_pmap->pm_cr3; 313 cdp->cpu_tlb_invalid = FALSE; 314 cdp->cpu_task_map = TASK_MAP_64BIT; 315 pmap_pcid_configure(); 316 if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_SMEP) { 317 boolean_t nsmep; 318 if (!PE_parse_boot_argn("-pmap_smep_disable", &nsmep, sizeof(nsmep))) { 319 set_cr4(get_cr4() | CR4_SMEP); 320 pmap_smep_enabled = TRUE; 321 } 322 } 323 324 if (cdp->cpu_fixed_pmcs_enabled) { 325 boolean_t enable = TRUE; 326 cpu_pmc_control(&enable); 327 } 328} 329 330static uint32_t pmap_scale_shift(void) { 331 uint32_t scale = 0; 332 333 if (sane_size <= 8*GB) { 334 scale = (uint32_t)(sane_size / (2 * GB)); 335 } else if (sane_size <= 32*GB) { 336 scale = 4 + (uint32_t)((sane_size - (8 * GB))/ (4 * GB)); 337 } else { 338 scale = 10 + (uint32_t)MIN(4, ((sane_size - (32 * GB))/ (8 * GB))); 339 } 340 return scale; 341} 342 343/* 344 * Bootstrap the system enough to run with virtual memory. 345 * Map the kernel's code and data, and allocate the system page table. 346 * Called with mapping OFF. Page_size must already be set. 347 */ 348 349void 350pmap_bootstrap( 351 __unused vm_offset_t load_start, 352 __unused boolean_t IA32e) 353{ 354#if NCOPY_WINDOWS > 0 355 vm_offset_t va; 356 int i; 357#endif 358 assert(IA32e); 359 360 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address 361 * known to VM */ 362 /* 363 * The kernel's pmap is statically allocated so we don't 364 * have to use pmap_create, which is unlikely to work 365 * correctly at this part of the boot sequence. 366 */ 367 368 kernel_pmap = &kernel_pmap_store; 369 kernel_pmap->ref_count = 1; 370 kernel_pmap->nx_enabled = TRUE; 371 kernel_pmap->pm_task_map = TASK_MAP_64BIT; 372 kernel_pmap->pm_obj = (vm_object_t) NULL; 373 kernel_pmap->dirbase = (pd_entry_t *)((uintptr_t)IdlePTD); 374 kernel_pmap->pm_pdpt = (pd_entry_t *) ((uintptr_t)IdlePDPT); 375 kernel_pmap->pm_pml4 = IdlePML4; 376 kernel_pmap->pm_cr3 = (uintptr_t)ID_MAP_VTOP(IdlePML4); 377 pmap_pcid_initialize_kernel(kernel_pmap); 378 379 380 381 current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3; 382 383 nkpt = NKPT; 384 OSAddAtomic(NKPT, &inuse_ptepages_count); 385 OSAddAtomic64(NKPT, &alloc_ptepages_count); 386 bootstrap_wired_pages = NKPT; 387 388 virtual_avail = (vm_offset_t)(VM_MIN_KERNEL_ADDRESS) + (vm_offset_t)first_avail; 389 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS); 390 391#if NCOPY_WINDOWS > 0 392 /* 393 * Reserve some special page table entries/VA space for temporary 394 * mapping of pages. 395 */ 396#define SYSMAP(c, p, v, n) \ 397 v = (c)va; va += ((n)*INTEL_PGBYTES); 398 399 va = virtual_avail; 400 401 for (i=0; i<PMAP_NWINDOWS; i++) { 402#if 1 403 kprintf("trying to do SYSMAP idx %d %p\n", i, 404 current_cpu_datap()); 405 kprintf("cpu_pmap %p\n", current_cpu_datap()->cpu_pmap); 406 kprintf("mapwindow %p\n", current_cpu_datap()->cpu_pmap->mapwindow); 407 kprintf("two stuff %p %p\n", 408 (void *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP), 409 (void *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR)); 410#endif 411 SYSMAP(caddr_t, 412 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP), 413 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR), 414 1); 415 current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 416 &(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP_store); 417 *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0; 418 } 419 420 /* DMAP user for debugger */ 421 SYSMAP(caddr_t, DMAP1, DADDR1, 1); 422 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */ 423 424 virtual_avail = va; 425#endif 426 if (!PE_parse_boot_argn("npvhash", &npvhashmask, sizeof (npvhashmask))) { 427 npvhashmask = ((NPVHASHBUCKETS) << pmap_scale_shift()) - 1; 428 429 } 430 431 npvhashbuckets = npvhashmask + 1; 432 433 if (0 != ((npvhashbuckets) & npvhashmask)) { 434 panic("invalid hash %d, must be ((2^N)-1), " 435 "using default %d\n", npvhashmask, NPVHASHMASK); 436 } 437 438 simple_lock_init(&kernel_pmap->lock, 0); 439 simple_lock_init(&pv_hashed_free_list_lock, 0); 440 simple_lock_init(&pv_hashed_kern_free_list_lock, 0); 441 simple_lock_init(&pv_hash_table_lock,0); 442 simple_lock_init(&phys_backup_lock, 0); 443 444 pmap_cpu_init(); 445 446 if (pmap_pcid_ncpus) 447 printf("PMAP: PCID enabled\n"); 448 449 if (pmap_smep_enabled) 450 printf("PMAP: Supervisor Mode Execute Protection enabled\n"); 451 452#if DEBUG 453 printf("Stack canary: 0x%lx\n", __stack_chk_guard[0]); 454 printf("early_random(): 0x%qx\n", early_random()); 455#endif 456 boolean_t ptmp; 457 /* Check if the user has requested disabling stack or heap no-execute 458 * enforcement. These are "const" variables; that qualifier is cast away 459 * when altering them. The TEXT/DATA const sections are marked 460 * write protected later in the kernel startup sequence, so altering 461 * them is possible at this point, in pmap_bootstrap(). 462 */ 463 if (PE_parse_boot_argn("-pmap_disable_kheap_nx", &ptmp, sizeof(ptmp))) { 464 boolean_t *pdknxp = (boolean_t *) &pmap_disable_kheap_nx; 465 *pdknxp = TRUE; 466 } 467 468 if (PE_parse_boot_argn("-pmap_disable_kstack_nx", &ptmp, sizeof(ptmp))) { 469 boolean_t *pdknhp = (boolean_t *) &pmap_disable_kstack_nx; 470 *pdknhp = TRUE; 471 } 472 473 boot_args *args = (boot_args *)PE_state.bootArgs; 474 if (args->efiMode == kBootArgsEfiMode32) { 475 printf("EFI32: kernel virtual space limited to 4GB\n"); 476 virtual_end = VM_MAX_KERNEL_ADDRESS_EFI32; 477 } 478 kprintf("Kernel virtual space from 0x%lx to 0x%lx.\n", 479 (long)KERNEL_BASE, (long)virtual_end); 480 kprintf("Available physical space from 0x%llx to 0x%llx\n", 481 avail_start, avail_end); 482 483 /* 484 * The -no_shared_cr3 boot-arg is a debugging feature (set by default 485 * in the DEBUG kernel) to force the kernel to switch to its own map 486 * (and cr3) when control is in kernelspace. The kernel's map does not 487 * include (i.e. share) userspace so wild references will cause 488 * a panic. Only copyin and copyout are exempt from this. 489 */ 490 (void) PE_parse_boot_argn("-no_shared_cr3", 491 &no_shared_cr3, sizeof (no_shared_cr3)); 492 if (no_shared_cr3) 493 kprintf("Kernel not sharing user map\n"); 494 495#ifdef PMAP_TRACES 496 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) { 497 kprintf("Kernel traces for pmap operations enabled\n"); 498 } 499#endif /* PMAP_TRACES */ 500} 501 502void 503pmap_virtual_space( 504 vm_offset_t *startp, 505 vm_offset_t *endp) 506{ 507 *startp = virtual_avail; 508 *endp = virtual_end; 509} 510 511 512 513 514#if HIBERNATION 515 516#include <IOKit/IOHibernatePrivate.h> 517 518int32_t pmap_npages; 519int32_t pmap_teardown_last_valid_compact_indx = -1; 520 521 522void hibernate_rebuild_pmap_structs(void); 523void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *); 524void pmap_pack_index(uint32_t); 525int32_t pmap_unpack_index(pv_rooted_entry_t); 526 527 528int32_t 529pmap_unpack_index(pv_rooted_entry_t pv_h) 530{ 531 int32_t indx = 0; 532 533 indx = (int32_t)(*((uint64_t *)(&pv_h->qlink.next)) >> 48); 534 indx = indx << 16; 535 indx |= (int32_t)(*((uint64_t *)(&pv_h->qlink.prev)) >> 48); 536 537 *((uint64_t *)(&pv_h->qlink.next)) |= ((uint64_t)0xffff << 48); 538 *((uint64_t *)(&pv_h->qlink.prev)) |= ((uint64_t)0xffff << 48); 539 540 return (indx); 541} 542 543 544void 545pmap_pack_index(uint32_t indx) 546{ 547 pv_rooted_entry_t pv_h; 548 549 pv_h = &pv_head_table[indx]; 550 551 *((uint64_t *)(&pv_h->qlink.next)) &= ~((uint64_t)0xffff << 48); 552 *((uint64_t *)(&pv_h->qlink.prev)) &= ~((uint64_t)0xffff << 48); 553 554 *((uint64_t *)(&pv_h->qlink.next)) |= ((uint64_t)(indx >> 16)) << 48; 555 *((uint64_t *)(&pv_h->qlink.prev)) |= ((uint64_t)(indx & 0xffff)) << 48; 556} 557 558 559void 560hibernate_teardown_pmap_structs(addr64_t *unneeded_start, addr64_t *unneeded_end) 561{ 562 int32_t i; 563 int32_t compact_target_indx; 564 565 compact_target_indx = 0; 566 567 for (i = 0; i < pmap_npages; i++) { 568 if (pv_head_table[i].pmap == PMAP_NULL) { 569 570 if (pv_head_table[compact_target_indx].pmap != PMAP_NULL) 571 compact_target_indx = i; 572 } else { 573 pmap_pack_index((uint32_t)i); 574 575 if (pv_head_table[compact_target_indx].pmap == PMAP_NULL) { 576 /* 577 * we've got a hole to fill, so 578 * move this pv_rooted_entry_t to it's new home 579 */ 580 pv_head_table[compact_target_indx] = pv_head_table[i]; 581 pv_head_table[i].pmap = PMAP_NULL; 582 583 pmap_teardown_last_valid_compact_indx = compact_target_indx; 584 compact_target_indx++; 585 } else 586 pmap_teardown_last_valid_compact_indx = i; 587 } 588 } 589 *unneeded_start = (addr64_t)&pv_head_table[pmap_teardown_last_valid_compact_indx+1]; 590 *unneeded_end = (addr64_t)&pv_head_table[pmap_npages-1]; 591 592 HIBLOG("hibernate_teardown_pmap_structs done: last_valid_compact_indx %d\n", pmap_teardown_last_valid_compact_indx); 593} 594 595 596void 597hibernate_rebuild_pmap_structs(void) 598{ 599 int32_t cindx, eindx, rindx; 600 pv_rooted_entry_t pv_h; 601 602 eindx = (int32_t)pmap_npages; 603 604 for (cindx = pmap_teardown_last_valid_compact_indx; cindx >= 0; cindx--) { 605 606 pv_h = &pv_head_table[cindx]; 607 608 rindx = pmap_unpack_index(pv_h); 609 assert(rindx < pmap_npages); 610 611 if (rindx != cindx) { 612 /* 613 * this pv_rooted_entry_t was moved by hibernate_teardown_pmap_structs, 614 * so move it back to its real location 615 */ 616 pv_head_table[rindx] = pv_head_table[cindx]; 617 } 618 if (rindx+1 != eindx) { 619 /* 620 * the 'hole' between this vm_rooted_entry_t and the previous 621 * vm_rooted_entry_t we moved needs to be initialized as 622 * a range of zero'd vm_rooted_entry_t's 623 */ 624 bzero((char *)&pv_head_table[rindx+1], (eindx - rindx - 1) * sizeof (struct pv_rooted_entry)); 625 } 626 eindx = rindx; 627 } 628 if (rindx) 629 bzero ((char *)&pv_head_table[0], rindx * sizeof (struct pv_rooted_entry)); 630 631 HIBLOG("hibernate_rebuild_pmap_structs done: last_valid_compact_indx %d\n", pmap_teardown_last_valid_compact_indx); 632} 633 634#endif 635 636/* 637 * Initialize the pmap module. 638 * Called by vm_init, to initialize any structures that the pmap 639 * system needs to map virtual memory. 640 */ 641void 642pmap_init(void) 643{ 644 long npages; 645 vm_offset_t addr; 646 vm_size_t s, vsize; 647 vm_map_offset_t vaddr; 648 ppnum_t ppn; 649 650 651 kernel_pmap->pm_obj_pml4 = &kpml4obj_object_store; 652 _vm_object_allocate((vm_object_size_t)NPML4PGS * PAGE_SIZE, &kpml4obj_object_store); 653 654 kernel_pmap->pm_obj_pdpt = &kpdptobj_object_store; 655 _vm_object_allocate((vm_object_size_t)NPDPTPGS * PAGE_SIZE, &kpdptobj_object_store); 656 657 kernel_pmap->pm_obj = &kptobj_object_store; 658 _vm_object_allocate((vm_object_size_t)NPDEPGS * PAGE_SIZE, &kptobj_object_store); 659 660 /* 661 * Allocate memory for the pv_head_table and its lock bits, 662 * the modify bit array, and the pte_page table. 663 */ 664 665 /* 666 * zero bias all these arrays now instead of off avail_start 667 * so we cover all memory 668 */ 669 670 npages = i386_btop(avail_end); 671#if HIBERNATION 672 pmap_npages = (uint32_t)npages; 673#endif 674 s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages 675 + (sizeof (struct pv_hashed_entry_t *) * (npvhashbuckets)) 676 + pv_lock_table_size(npages) 677 + pv_hash_lock_table_size((npvhashbuckets)) 678 + npages); 679 680 s = round_page(s); 681 if (kernel_memory_allocate(kernel_map, &addr, s, 0, 682 KMA_KOBJECT | KMA_PERMANENT) 683 != KERN_SUCCESS) 684 panic("pmap_init"); 685 686 memset((char *)addr, 0, s); 687 688 vaddr = addr; 689 vsize = s; 690 691#if PV_DEBUG 692 if (0 == npvhashmask) panic("npvhashmask not initialized"); 693#endif 694 695 /* 696 * Allocate the structures first to preserve word-alignment. 697 */ 698 pv_head_table = (pv_rooted_entry_t) addr; 699 addr = (vm_offset_t) (pv_head_table + npages); 700 701 pv_hash_table = (pv_hashed_entry_t *)addr; 702 addr = (vm_offset_t) (pv_hash_table + (npvhashbuckets)); 703 704 pv_lock_table = (char *) addr; 705 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages)); 706 707 pv_hash_lock_table = (char *) addr; 708 addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhashbuckets))); 709 710 pmap_phys_attributes = (char *) addr; 711 712 ppnum_t last_pn = i386_btop(avail_end); 713 unsigned int i; 714 pmap_memory_region_t *pmptr = pmap_memory_regions; 715 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { 716 if (pmptr->type != kEfiConventionalMemory) 717 continue; 718 ppnum_t pn; 719 for (pn = pmptr->base; pn <= pmptr->end; pn++) { 720 if (pn < last_pn) { 721 pmap_phys_attributes[pn] |= PHYS_MANAGED; 722 723 if (pn > last_managed_page) 724 last_managed_page = pn; 725 726 if (pn >= lowest_hi && pn <= highest_hi) 727 pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; 728 } 729 } 730 } 731 while (vsize) { 732 ppn = pmap_find_phys(kernel_pmap, vaddr); 733 734 pmap_phys_attributes[ppn] |= PHYS_NOENCRYPT; 735 736 vaddr += PAGE_SIZE; 737 vsize -= PAGE_SIZE; 738 } 739 /* 740 * Create the zone of physical maps, 741 * and of the physical-to-virtual entries. 742 */ 743 s = (vm_size_t) sizeof(struct pmap); 744 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */ 745 zone_change(pmap_zone, Z_NOENCRYPT, TRUE); 746 747 pmap_anchor_zone = zinit(PAGE_SIZE, task_max, PAGE_SIZE, "pagetable anchors"); 748 zone_change(pmap_anchor_zone, Z_NOENCRYPT, TRUE); 749 750 /* The anchor is required to be page aligned. Zone debugging adds 751 * padding which may violate that requirement. Tell the zone 752 * subsystem that alignment is required. 753 */ 754 755 zone_change(pmap_anchor_zone, Z_ALIGNMENT_REQUIRED, TRUE); 756 757 s = (vm_size_t) sizeof(struct pv_hashed_entry); 758 pv_hashed_list_zone = zinit(s, 10000*s /* Expandable zone */, 759 4096 * 3 /* LCM x86_64*/, "pv_list"); 760 zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE); 761 762 /* create pv entries for kernel pages mapped by low level 763 startup code. these have to exist so we can pmap_remove() 764 e.g. kext pages from the middle of our addr space */ 765 766 vaddr = (vm_map_offset_t) VM_MIN_KERNEL_ADDRESS; 767 for (ppn = VM_MIN_KERNEL_PAGE; ppn < i386_btop(avail_start); ppn++) { 768 pv_rooted_entry_t pv_e; 769 770 pv_e = pai_to_pvh(ppn); 771 pv_e->va = vaddr; 772 vaddr += PAGE_SIZE; 773 pv_e->pmap = kernel_pmap; 774 queue_init(&pv_e->qlink); 775 } 776 pmap_initialized = TRUE; 777 778 max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t); 779 780 /* 781 * Ensure the kernel's PML4 entry exists for the basement 782 * before this is shared with any user. 783 */ 784 pmap_expand_pml4(kernel_pmap, KERNEL_BASEMENT, PMAP_EXPAND_OPTIONS_NONE); 785} 786 787static 788void pmap_mark_range(pmap_t npmap, uint64_t sv, uint64_t nxrosz, boolean_t NX, boolean_t ro) { 789 uint64_t ev = sv + nxrosz, cv = sv; 790 pd_entry_t *pdep; 791 pt_entry_t *ptep = NULL; 792 793 assert(((sv & 0xFFFULL) | (nxrosz & 0xFFFULL)) == 0); 794 795 for (pdep = pmap_pde(npmap, cv); pdep != NULL && (cv < ev);) { 796 uint64_t pdev = (cv & ~((uint64_t)PDEMASK)); 797 798 if (*pdep & INTEL_PTE_PS) { 799 if (NX) 800 *pdep |= INTEL_PTE_NX; 801 if (ro) 802 *pdep &= ~INTEL_PTE_WRITE; 803 cv += NBPD; 804 cv &= ~((uint64_t) PDEMASK); 805 pdep = pmap_pde(npmap, cv); 806 continue; 807 } 808 809 for (ptep = pmap_pte(npmap, cv); ptep != NULL && (cv < (pdev + NBPD)) && (cv < ev);) { 810 if (NX) 811 *ptep |= INTEL_PTE_NX; 812 if (ro) 813 *ptep &= ~INTEL_PTE_WRITE; 814 cv += NBPT; 815 ptep = pmap_pte(npmap, cv); 816 } 817 } 818 DPRINTF("%s(0x%llx, 0x%llx, %u, %u): 0x%llx, 0x%llx\n", __FUNCTION__, sv, nxrosz, NX, ro, cv, ptep ? *ptep: 0); 819} 820 821/* 822 * Called once VM is fully initialized so that we can release unused 823 * sections of low memory to the general pool. 824 * Also complete the set-up of identity-mapped sections of the kernel: 825 * 1) write-protect kernel text 826 * 2) map kernel text using large pages if possible 827 * 3) read and write-protect page zero (for K32) 828 * 4) map the global page at the appropriate virtual address. 829 * 830 * Use of large pages 831 * ------------------ 832 * To effectively map and write-protect all kernel text pages, the text 833 * must be 2M-aligned at the base, and the data section above must also be 834 * 2M-aligned. That is, there's padding below and above. This is achieved 835 * through linker directives. Large pages are used only if this alignment 836 * exists (and not overriden by the -kernel_text_page_4K boot-arg). The 837 * memory layout is: 838 * 839 * : : 840 * | __DATA | 841 * sdata: ================== 2Meg 842 * | | 843 * | zero-padding | 844 * | | 845 * etext: ------------------ 846 * | | 847 * : : 848 * | | 849 * | __TEXT | 850 * | | 851 * : : 852 * | | 853 * stext: ================== 2Meg 854 * | | 855 * | zero-padding | 856 * | | 857 * eHIB: ------------------ 858 * | __HIB | 859 * : : 860 * 861 * Prior to changing the mapping from 4K to 2M, the zero-padding pages 862 * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the 863 * 4K pages covering [stext,etext] are coalesced as 2M large pages. 864 * The now unused level-1 PTE pages are also freed. 865 */ 866extern ppnum_t vm_kernel_base_page; 867void 868pmap_lowmem_finalize(void) 869{ 870 spl_t spl; 871 int i; 872 873 /* 874 * Update wired memory statistics for early boot pages 875 */ 876 PMAP_ZINFO_PALLOC(kernel_pmap, bootstrap_wired_pages * PAGE_SIZE); 877 878 /* 879 * Free pages in pmap regions below the base: 880 * rdar://6332712 881 * We can't free all the pages to VM that EFI reports available. 882 * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake. 883 * There's also a size miscalculation here: pend is one page less 884 * than it should be but this is not fixed to be backwards 885 * compatible. 886 * This is important for KASLR because up to 256*2MB = 512MB of space 887 * needs has to be released to VM. 888 */ 889 for (i = 0; 890 pmap_memory_regions[i].end < vm_kernel_base_page; 891 i++) { 892 vm_offset_t pbase = i386_ptob(pmap_memory_regions[i].base); 893 vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end+1); 894 895 DBG("pmap region %d [%p..[%p\n", 896 i, (void *) pbase, (void *) pend); 897 898 if (pmap_memory_regions[i].attribute & EFI_MEMORY_KERN_RESERVED) 899 continue; 900 /* 901 * rdar://6332712 902 * Adjust limits not to free pages in range 0xc0000-0xff000. 903 */ 904 if (pbase >= 0xc0000 && pend <= 0x100000) 905 continue; 906 if (pbase < 0xc0000 && pend > 0x100000) { 907 /* page range entirely within region, free lower part */ 908 DBG("- ml_static_mfree(%p,%p)\n", 909 (void *) ml_static_ptovirt(pbase), 910 (void *) (0xc0000-pbase)); 911 ml_static_mfree(ml_static_ptovirt(pbase),0xc0000-pbase); 912 pbase = 0x100000; 913 } 914 if (pbase < 0xc0000) 915 pend = MIN(pend, 0xc0000); 916 if (pend > 0x100000) 917 pbase = MAX(pbase, 0x100000); 918 DBG("- ml_static_mfree(%p,%p)\n", 919 (void *) ml_static_ptovirt(pbase), 920 (void *) (pend - pbase)); 921 ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase); 922 } 923 924 /* A final pass to get rid of all initial identity mappings to 925 * low pages. 926 */ 927 DPRINTF("%s: Removing mappings from 0->0x%lx\n", __FUNCTION__, vm_kernel_base); 928 929 /* 930 * Remove all mappings past the boot-cpu descriptor aliases and low globals. 931 * Non-boot-cpu GDT aliases will be remapped later as needed. 932 */ 933 pmap_remove(kernel_pmap, LOWGLOBAL_ALIAS + PAGE_SIZE, vm_kernel_base); 934 935 /* 936 * If text and data are both 2MB-aligned, 937 * we can map text with large-pages, 938 * unless the -kernel_text_ps_4K boot-arg overrides. 939 */ 940 if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) { 941 kprintf("Kernel text is 2MB aligned"); 942 kernel_text_ps_4K = FALSE; 943 if (PE_parse_boot_argn("-kernel_text_ps_4K", 944 &kernel_text_ps_4K, 945 sizeof (kernel_text_ps_4K))) 946 kprintf(" but will be mapped with 4K pages\n"); 947 else 948 kprintf(" and will be mapped with 2M pages\n"); 949 } 950 951 (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel)); 952 if (wpkernel) 953 kprintf("Kernel text %p-%p to be write-protected\n", 954 (void *) stext, (void *) etext); 955 956 spl = splhigh(); 957 958 /* 959 * Scan over text if mappings are to be changed: 960 * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0 961 * - Change to large-pages if possible and not overriden. 962 */ 963 if (kernel_text_ps_4K && wpkernel) { 964 vm_offset_t myva; 965 for (myva = stext; myva < etext; myva += PAGE_SIZE) { 966 pt_entry_t *ptep; 967 968 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); 969 if (ptep) 970 pmap_store_pte(ptep, *ptep & ~INTEL_PTE_WRITE); 971 } 972 } 973 974 if (!kernel_text_ps_4K) { 975 vm_offset_t myva; 976 977 /* 978 * Release zero-filled page padding used for 2M-alignment. 979 */ 980 DBG("ml_static_mfree(%p,%p) for padding below text\n", 981 (void *) eHIB, (void *) (stext - eHIB)); 982 ml_static_mfree(eHIB, stext - eHIB); 983 DBG("ml_static_mfree(%p,%p) for padding above text\n", 984 (void *) etext, (void *) (sdata - etext)); 985 ml_static_mfree(etext, sdata - etext); 986 987 /* 988 * Coalesce text pages into large pages. 989 */ 990 for (myva = stext; myva < sdata; myva += I386_LPGBYTES) { 991 pt_entry_t *ptep; 992 vm_offset_t pte_phys; 993 pt_entry_t *pdep; 994 pt_entry_t pde; 995 996 pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva); 997 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); 998 DBG("myva: %p pdep: %p ptep: %p\n", 999 (void *) myva, (void *) pdep, (void *) ptep); 1000 if ((*ptep & INTEL_PTE_VALID) == 0) 1001 continue; 1002 pte_phys = (vm_offset_t)(*ptep & PG_FRAME); 1003 pde = *pdep & PTMASK; /* page attributes from pde */ 1004 pde |= INTEL_PTE_PS; /* make it a 2M entry */ 1005 pde |= pte_phys; /* take page frame from pte */ 1006 1007 if (wpkernel) 1008 pde &= ~INTEL_PTE_WRITE; 1009 DBG("pmap_store_pte(%p,0x%llx)\n", 1010 (void *)pdep, pde); 1011 pmap_store_pte(pdep, pde); 1012 1013 /* 1014 * Free the now-unused level-1 pte. 1015 * Note: ptep is a virtual address to the pte in the 1016 * recursive map. We can't use this address to free 1017 * the page. Instead we need to compute its address 1018 * in the Idle PTEs in "low memory". 1019 */ 1020 vm_offset_t vm_ptep = (vm_offset_t) KPTphys 1021 + (pte_phys >> PTPGSHIFT); 1022 DBG("ml_static_mfree(%p,0x%x) for pte\n", 1023 (void *) vm_ptep, PAGE_SIZE); 1024 ml_static_mfree(vm_ptep, PAGE_SIZE); 1025 } 1026 1027 /* Change variable read by sysctl machdep.pmap */ 1028 pmap_kernel_text_ps = I386_LPGBYTES; 1029 } 1030 1031 boolean_t doconstro = TRUE; 1032 1033 (void) PE_parse_boot_argn("dataconstro", &doconstro, sizeof(doconstro)); 1034 1035 if ((sconstdata | econstdata) & PAGE_MASK) { 1036 kprintf("Const DATA misaligned 0x%lx 0x%lx\n", sconstdata, econstdata); 1037 if ((sconstdata & PAGE_MASK) || (doconstro_override == FALSE)) 1038 doconstro = FALSE; 1039 } 1040 1041 if ((sconstdata > edata) || (sconstdata < sdata) || ((econstdata - sconstdata) >= (edata - sdata))) { 1042 kprintf("Const DATA incorrect size 0x%lx 0x%lx 0x%lx 0x%lx\n", sconstdata, econstdata, sdata, edata); 1043 doconstro = FALSE; 1044 } 1045 1046 if (doconstro) 1047 kprintf("Marking const DATA read-only\n"); 1048 1049 vm_offset_t dva; 1050 1051 for (dva = sdata; dva < edata; dva += I386_PGBYTES) { 1052 assert(((sdata | edata) & PAGE_MASK) == 0); 1053 if ( (sdata | edata) & PAGE_MASK) { 1054 kprintf("DATA misaligned, 0x%lx, 0x%lx\n", sdata, edata); 1055 break; 1056 } 1057 1058 pt_entry_t dpte, *dptep = pmap_pte(kernel_pmap, dva); 1059 1060 dpte = *dptep; 1061 1062 assert((dpte & INTEL_PTE_VALID)); 1063 if ((dpte & INTEL_PTE_VALID) == 0) { 1064 kprintf("Missing data mapping 0x%lx 0x%lx 0x%lx\n", dva, sdata, edata); 1065 continue; 1066 } 1067 1068 dpte |= INTEL_PTE_NX; 1069 if (doconstro && (dva >= sconstdata) && (dva < econstdata)) { 1070 dpte &= ~INTEL_PTE_WRITE; 1071 } 1072 pmap_store_pte(dptep, dpte); 1073 } 1074 kernel_segment_command_t * seg; 1075 kernel_section_t * sec; 1076 1077 for (seg = firstseg(); seg != NULL; seg = nextsegfromheader(&_mh_execute_header, seg)) { 1078 if (!strcmp(seg->segname, "__TEXT") || 1079 !strcmp(seg->segname, "__DATA")) { 1080 continue; 1081 } 1082 //XXX 1083 if (!strcmp(seg->segname, "__KLD")) { 1084 continue; 1085 } 1086 if (!strcmp(seg->segname, "__HIB")) { 1087 for (sec = firstsect(seg); sec != NULL; sec = nextsect(seg, sec)) { 1088 if (sec->addr & PAGE_MASK) 1089 panic("__HIB segment's sections misaligned"); 1090 if (!strcmp(sec->sectname, "__text")) { 1091 pmap_mark_range(kernel_pmap, sec->addr, round_page(sec->size), FALSE, TRUE); 1092 } else { 1093 pmap_mark_range(kernel_pmap, sec->addr, round_page(sec->size), TRUE, FALSE); 1094 } 1095 } 1096 } else { 1097 pmap_mark_range(kernel_pmap, seg->vmaddr, round_page_64(seg->vmsize), TRUE, FALSE); 1098 } 1099 } 1100 1101 /* 1102 * If we're debugging, map the low global vector page at the fixed 1103 * virtual address. Otherwise, remove the mapping for this. 1104 */ 1105 if (debug_boot_arg) { 1106 pt_entry_t *pte = NULL; 1107 if (0 == (pte = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS))) 1108 panic("lowmem pte"); 1109 /* make sure it is defined on page boundary */ 1110 assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK)); 1111 pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo) 1112 | INTEL_PTE_REF 1113 | INTEL_PTE_MOD 1114 | INTEL_PTE_WIRED 1115 | INTEL_PTE_VALID 1116 | INTEL_PTE_WRITE 1117 | INTEL_PTE_NX); 1118 } else { 1119 pmap_remove(kernel_pmap, 1120 LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE); 1121 } 1122 1123 splx(spl); 1124 if (pmap_pcid_ncpus) 1125 tlb_flush_global(); 1126 else 1127 flush_tlb_raw(); 1128} 1129 1130/* 1131 * this function is only used for debugging fron the vm layer 1132 */ 1133boolean_t 1134pmap_verify_free( 1135 ppnum_t pn) 1136{ 1137 pv_rooted_entry_t pv_h; 1138 int pai; 1139 boolean_t result; 1140 1141 assert(pn != vm_page_fictitious_addr); 1142 1143 if (!pmap_initialized) 1144 return(TRUE); 1145 1146 if (pn == vm_page_guard_addr) 1147 return TRUE; 1148 1149 pai = ppn_to_pai(pn); 1150 if (!IS_MANAGED_PAGE(pai)) 1151 return(FALSE); 1152 pv_h = pai_to_pvh(pn); 1153 result = (pv_h->pmap == PMAP_NULL); 1154 return(result); 1155} 1156 1157boolean_t 1158pmap_is_empty( 1159 pmap_t pmap, 1160 vm_map_offset_t va_start, 1161 vm_map_offset_t va_end) 1162{ 1163 vm_map_offset_t offset; 1164 ppnum_t phys_page; 1165 1166 if (pmap == PMAP_NULL) { 1167 return TRUE; 1168 } 1169 1170 /* 1171 * Check the resident page count 1172 * - if it's zero, the pmap is completely empty. 1173 * This short-circuit test prevents a virtual address scan which is 1174 * painfully slow for 64-bit spaces. 1175 * This assumes the count is correct 1176 * .. the debug kernel ought to be checking perhaps by page table walk. 1177 */ 1178 if (pmap->stats.resident_count == 0) 1179 return TRUE; 1180 1181 for (offset = va_start; 1182 offset < va_end; 1183 offset += PAGE_SIZE_64) { 1184 phys_page = pmap_find_phys(pmap, offset); 1185 if (phys_page) { 1186 kprintf("pmap_is_empty(%p,0x%llx,0x%llx): " 1187 "page %d at 0x%llx\n", 1188 pmap, va_start, va_end, phys_page, offset); 1189 return FALSE; 1190 } 1191 } 1192 1193 return TRUE; 1194} 1195 1196 1197/* 1198 * Create and return a physical map. 1199 * 1200 * If the size specified for the map 1201 * is zero, the map is an actual physical 1202 * map, and may be referenced by the 1203 * hardware. 1204 * 1205 * If the size specified is non-zero, 1206 * the map will be used in software only, and 1207 * is bounded by that size. 1208 */ 1209pmap_t 1210pmap_create( 1211 ledger_t ledger, 1212 vm_map_size_t sz, 1213 boolean_t is_64bit) 1214{ 1215 pmap_t p; 1216 vm_size_t size; 1217 pml4_entry_t *pml4; 1218 pml4_entry_t *kpml4; 1219 1220 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, 1221 (uint32_t) (sz>>32), (uint32_t) sz, is_64bit, 0, 0); 1222 1223 size = (vm_size_t) sz; 1224 1225 /* 1226 * A software use-only map doesn't even need a map. 1227 */ 1228 1229 if (size != 0) { 1230 return(PMAP_NULL); 1231 } 1232 1233 p = (pmap_t) zalloc(pmap_zone); 1234 if (PMAP_NULL == p) 1235 panic("pmap_create zalloc"); 1236 /* Zero all fields */ 1237 bzero(p, sizeof(*p)); 1238 /* init counts now since we'll be bumping some */ 1239 simple_lock_init(&p->lock, 0); 1240#if 00 1241 p->stats.resident_count = 0; 1242 p->stats.resident_max = 0; 1243 p->stats.wired_count = 0; 1244#else 1245 bzero(&p->stats, sizeof (p->stats)); 1246#endif 1247 p->ref_count = 1; 1248 p->nx_enabled = 1; 1249 p->pm_shared = FALSE; 1250 ledger_reference(ledger); 1251 p->ledger = ledger; 1252 1253 p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;; 1254 if (pmap_pcid_ncpus) 1255 pmap_pcid_initialize(p); 1256 1257 p->pm_pml4 = zalloc(pmap_anchor_zone); 1258 1259 pmap_assert((((uintptr_t)p->pm_pml4) & PAGE_MASK) == 0); 1260 1261 memset((char *)p->pm_pml4, 0, PAGE_SIZE); 1262 1263 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4); 1264 1265 /* allocate the vm_objs to hold the pdpt, pde and pte pages */ 1266 1267 p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS) * PAGE_SIZE); 1268 if (NULL == p->pm_obj_pml4) 1269 panic("pmap_create pdpt obj"); 1270 1271 p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS) * PAGE_SIZE); 1272 if (NULL == p->pm_obj_pdpt) 1273 panic("pmap_create pdpt obj"); 1274 1275 p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS) * PAGE_SIZE); 1276 if (NULL == p->pm_obj) 1277 panic("pmap_create pte obj"); 1278 1279 /* All pmaps share the kernel's pml4 */ 1280 pml4 = pmap64_pml4(p, 0ULL); 1281 kpml4 = kernel_pmap->pm_pml4; 1282 pml4[KERNEL_PML4_INDEX] = kpml4[KERNEL_PML4_INDEX]; 1283 pml4[KERNEL_KEXTS_INDEX] = kpml4[KERNEL_KEXTS_INDEX]; 1284 pml4[KERNEL_PHYSMAP_PML4_INDEX] = kpml4[KERNEL_PHYSMAP_PML4_INDEX]; 1285 1286 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, 1287 p, is_64bit, 0, 0, 0); 1288 1289 return(p); 1290} 1291 1292/* 1293 * Retire the given physical map from service. 1294 * Should only be called if the map contains 1295 * no valid mappings. 1296 */ 1297 1298void 1299pmap_destroy(pmap_t p) 1300{ 1301 int c; 1302 1303 if (p == PMAP_NULL) 1304 return; 1305 1306 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, 1307 p, 0, 0, 0, 0); 1308 1309 PMAP_LOCK(p); 1310 1311 c = --p->ref_count; 1312 1313 pmap_assert((current_thread() && (current_thread()->map)) ? (current_thread()->map->pmap != p) : TRUE); 1314 1315 if (c == 0) { 1316 /* 1317 * If some cpu is not using the physical pmap pointer that it 1318 * is supposed to be (see set_dirbase), we might be using the 1319 * pmap that is being destroyed! Make sure we are 1320 * physically on the right pmap: 1321 */ 1322 PMAP_UPDATE_TLBS(p, 0x0ULL, 0xFFFFFFFFFFFFF000ULL); 1323 if (pmap_pcid_ncpus) 1324 pmap_destroy_pcid_sync(p); 1325 } 1326 1327 PMAP_UNLOCK(p); 1328 1329 if (c != 0) { 1330 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END, 1331 p, 1, 0, 0, 0); 1332 pmap_assert(p == kernel_pmap); 1333 return; /* still in use */ 1334 } 1335 1336 /* 1337 * Free the memory maps, then the 1338 * pmap structure. 1339 */ 1340 int inuse_ptepages = 0; 1341 1342 zfree(pmap_anchor_zone, p->pm_pml4); 1343 1344 inuse_ptepages += p->pm_obj_pml4->resident_page_count; 1345 vm_object_deallocate(p->pm_obj_pml4); 1346 1347 inuse_ptepages += p->pm_obj_pdpt->resident_page_count; 1348 vm_object_deallocate(p->pm_obj_pdpt); 1349 1350 inuse_ptepages += p->pm_obj->resident_page_count; 1351 vm_object_deallocate(p->pm_obj); 1352 1353 OSAddAtomic(-inuse_ptepages, &inuse_ptepages_count); 1354 PMAP_ZINFO_PFREE(p, inuse_ptepages * PAGE_SIZE); 1355 ledger_dereference(p->ledger); 1356 zfree(pmap_zone, p); 1357 1358 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END, 1359 0, 0, 0, 0, 0); 1360} 1361 1362/* 1363 * Add a reference to the specified pmap. 1364 */ 1365 1366void 1367pmap_reference(pmap_t p) 1368{ 1369 if (p != PMAP_NULL) { 1370 PMAP_LOCK(p); 1371 p->ref_count++; 1372 PMAP_UNLOCK(p);; 1373 } 1374} 1375 1376/* 1377 * Remove phys addr if mapped in specified map 1378 * 1379 */ 1380void 1381pmap_remove_some_phys( 1382 __unused pmap_t map, 1383 __unused ppnum_t pn) 1384{ 1385 1386/* Implement to support working set code */ 1387 1388} 1389 1390 1391void 1392pmap_protect( 1393 pmap_t map, 1394 vm_map_offset_t sva, 1395 vm_map_offset_t eva, 1396 vm_prot_t prot) 1397{ 1398 pmap_protect_options(map, sva, eva, prot, 0, NULL); 1399} 1400 1401 1402/* 1403 * Set the physical protection on the 1404 * specified range of this map as requested. 1405 * Will not increase permissions. 1406 */ 1407void 1408pmap_protect_options( 1409 pmap_t map, 1410 vm_map_offset_t sva, 1411 vm_map_offset_t eva, 1412 vm_prot_t prot, 1413 unsigned int options, 1414 void *arg) 1415{ 1416 pt_entry_t *pde; 1417 pt_entry_t *spte, *epte; 1418 vm_map_offset_t lva; 1419 vm_map_offset_t orig_sva; 1420 boolean_t set_NX; 1421 int num_found = 0; 1422 1423 pmap_intr_assert(); 1424 1425 if (map == PMAP_NULL) 1426 return; 1427 1428 if (prot == VM_PROT_NONE) { 1429 pmap_remove_options(map, sva, eva, options); 1430 return; 1431 } 1432 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START, 1433 map, 1434 (uint32_t) (sva >> 32), (uint32_t) sva, 1435 (uint32_t) (eva >> 32), (uint32_t) eva); 1436 1437 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled) 1438 set_NX = FALSE; 1439 else 1440 set_NX = TRUE; 1441 1442 PMAP_LOCK(map); 1443 1444 orig_sva = sva; 1445 while (sva < eva) { 1446 lva = (sva + pde_mapped_size) & ~(pde_mapped_size - 1); 1447 if (lva > eva) 1448 lva = eva; 1449 pde = pmap_pde(map, sva); 1450 if (pde && (*pde & INTEL_PTE_VALID)) { 1451 if (*pde & INTEL_PTE_PS) { 1452 /* superpage */ 1453 spte = pde; 1454 epte = spte+1; /* excluded */ 1455 } else { 1456 spte = pmap_pte(map, (sva & ~(pde_mapped_size - 1))); 1457 spte = &spte[ptenum(sva)]; 1458 epte = &spte[intel_btop(lva - sva)]; 1459 } 1460 1461 for (; spte < epte; spte++) { 1462 if (!(*spte & INTEL_PTE_VALID)) 1463 continue; 1464 1465 if (prot & VM_PROT_WRITE) 1466 pmap_update_pte(spte, 0, INTEL_PTE_WRITE); 1467 else 1468 pmap_update_pte(spte, INTEL_PTE_WRITE, 0); 1469 1470 if (set_NX) 1471 pmap_update_pte(spte, 0, INTEL_PTE_NX); 1472 else 1473 pmap_update_pte(spte, INTEL_PTE_NX, 0); 1474 num_found++; 1475 } 1476 } 1477 sva = lva; 1478 } 1479 if (num_found) { 1480 if (options & PMAP_OPTIONS_NOFLUSH) 1481 PMAP_UPDATE_TLBS_DELAYED(map, orig_sva, eva, (pmap_flush_context *)arg); 1482 else 1483 PMAP_UPDATE_TLBS(map, orig_sva, eva); 1484 } 1485 PMAP_UNLOCK(map); 1486 1487 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END, 1488 0, 0, 0, 0, 0); 1489 1490} 1491 1492/* Map a (possibly) autogenned block */ 1493void 1494pmap_map_block( 1495 pmap_t pmap, 1496 addr64_t va, 1497 ppnum_t pa, 1498 uint32_t size, 1499 vm_prot_t prot, 1500 int attr, 1501 __unused unsigned int flags) 1502{ 1503 uint32_t page; 1504 int cur_page_size; 1505 1506 if (attr & VM_MEM_SUPERPAGE) 1507 cur_page_size = SUPERPAGE_SIZE; 1508 else 1509 cur_page_size = PAGE_SIZE; 1510 1511 for (page = 0; page < size; page+=cur_page_size/PAGE_SIZE) { 1512 pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE); 1513 va += cur_page_size; 1514 pa+=cur_page_size/PAGE_SIZE; 1515 } 1516} 1517 1518kern_return_t 1519pmap_expand_pml4( 1520 pmap_t map, 1521 vm_map_offset_t vaddr, 1522 unsigned int options) 1523{ 1524 vm_page_t m; 1525 pmap_paddr_t pa; 1526 uint64_t i; 1527 ppnum_t pn; 1528 pml4_entry_t *pml4p; 1529 1530 DBG("pmap_expand_pml4(%p,%p)\n", map, (void *)vaddr); 1531 1532 /* 1533 * Allocate a VM page for the pml4 page 1534 */ 1535 while ((m = vm_page_grab()) == VM_PAGE_NULL) { 1536 if (options & PMAP_EXPAND_OPTIONS_NOWAIT) 1537 return KERN_RESOURCE_SHORTAGE; 1538 VM_PAGE_WAIT(); 1539 } 1540 /* 1541 * put the page into the pmap's obj list so it 1542 * can be found later. 1543 */ 1544 pn = m->phys_page; 1545 pa = i386_ptob(pn); 1546 i = pml4idx(map, vaddr); 1547 1548 /* 1549 * Zero the page. 1550 */ 1551 pmap_zero_page(pn); 1552 1553 vm_page_lockspin_queues(); 1554 vm_page_wire(m); 1555 vm_page_unlock_queues(); 1556 1557 OSAddAtomic(1, &inuse_ptepages_count); 1558 OSAddAtomic64(1, &alloc_ptepages_count); 1559 PMAP_ZINFO_PALLOC(map, PAGE_SIZE); 1560 1561 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ 1562 vm_object_lock(map->pm_obj_pml4); 1563 1564 PMAP_LOCK(map); 1565 /* 1566 * See if someone else expanded us first 1567 */ 1568 if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) { 1569 PMAP_UNLOCK(map); 1570 vm_object_unlock(map->pm_obj_pml4); 1571 1572 VM_PAGE_FREE(m); 1573 1574 OSAddAtomic(-1, &inuse_ptepages_count); 1575 PMAP_ZINFO_PFREE(map, PAGE_SIZE); 1576 return KERN_SUCCESS; 1577 } 1578 1579#if 0 /* DEBUG */ 1580 if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i * PAGE_SIZE)) { 1581 panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", 1582 map, map->pm_obj_pml4, vaddr, i); 1583 } 1584#endif 1585 vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i * PAGE_SIZE); 1586 vm_object_unlock(map->pm_obj_pml4); 1587 1588 /* 1589 * Set the page directory entry for this page table. 1590 */ 1591 pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */ 1592 1593 pmap_store_pte(pml4p, pa_to_pte(pa) 1594 | INTEL_PTE_VALID 1595 | INTEL_PTE_USER 1596 | INTEL_PTE_WRITE); 1597 1598 PMAP_UNLOCK(map); 1599 1600 return KERN_SUCCESS; 1601} 1602 1603kern_return_t 1604pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, unsigned int options) 1605{ 1606 vm_page_t m; 1607 pmap_paddr_t pa; 1608 uint64_t i; 1609 ppnum_t pn; 1610 pdpt_entry_t *pdptp; 1611 1612 DBG("pmap_expand_pdpt(%p,%p)\n", map, (void *)vaddr); 1613 1614 while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) { 1615 kern_return_t pep4kr = pmap_expand_pml4(map, vaddr, options); 1616 if (pep4kr != KERN_SUCCESS) 1617 return pep4kr; 1618 } 1619 1620 /* 1621 * Allocate a VM page for the pdpt page 1622 */ 1623 while ((m = vm_page_grab()) == VM_PAGE_NULL) { 1624 if (options & PMAP_EXPAND_OPTIONS_NOWAIT) 1625 return KERN_RESOURCE_SHORTAGE; 1626 VM_PAGE_WAIT(); 1627 } 1628 1629 /* 1630 * put the page into the pmap's obj list so it 1631 * can be found later. 1632 */ 1633 pn = m->phys_page; 1634 pa = i386_ptob(pn); 1635 i = pdptidx(map, vaddr); 1636 1637 /* 1638 * Zero the page. 1639 */ 1640 pmap_zero_page(pn); 1641 1642 vm_page_lockspin_queues(); 1643 vm_page_wire(m); 1644 vm_page_unlock_queues(); 1645 1646 OSAddAtomic(1, &inuse_ptepages_count); 1647 OSAddAtomic64(1, &alloc_ptepages_count); 1648 PMAP_ZINFO_PALLOC(map, PAGE_SIZE); 1649 1650 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ 1651 vm_object_lock(map->pm_obj_pdpt); 1652 1653 PMAP_LOCK(map); 1654 /* 1655 * See if someone else expanded us first 1656 */ 1657 if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) { 1658 PMAP_UNLOCK(map); 1659 vm_object_unlock(map->pm_obj_pdpt); 1660 1661 VM_PAGE_FREE(m); 1662 1663 OSAddAtomic(-1, &inuse_ptepages_count); 1664 PMAP_ZINFO_PFREE(map, PAGE_SIZE); 1665 return KERN_SUCCESS; 1666 } 1667 1668#if 0 /* DEBUG */ 1669 if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i * PAGE_SIZE)) { 1670 panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", 1671 map, map->pm_obj_pdpt, vaddr, i); 1672 } 1673#endif 1674 vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i * PAGE_SIZE); 1675 vm_object_unlock(map->pm_obj_pdpt); 1676 1677 /* 1678 * Set the page directory entry for this page table. 1679 */ 1680 pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */ 1681 1682 pmap_store_pte(pdptp, pa_to_pte(pa) 1683 | INTEL_PTE_VALID 1684 | INTEL_PTE_USER 1685 | INTEL_PTE_WRITE); 1686 1687 PMAP_UNLOCK(map); 1688 1689 return KERN_SUCCESS; 1690 1691} 1692 1693 1694 1695/* 1696 * Routine: pmap_expand 1697 * 1698 * Expands a pmap to be able to map the specified virtual address. 1699 * 1700 * Allocates new virtual memory for the P0 or P1 portion of the 1701 * pmap, then re-maps the physical pages that were in the old 1702 * pmap to be in the new pmap. 1703 * 1704 * Must be called with the pmap system and the pmap unlocked, 1705 * since these must be unlocked to use vm_allocate or vm_deallocate. 1706 * Thus it must be called in a loop that checks whether the map 1707 * has been expanded enough. 1708 * (We won't loop forever, since page tables aren't shrunk.) 1709 */ 1710kern_return_t 1711pmap_expand( 1712 pmap_t map, 1713 vm_map_offset_t vaddr, 1714 unsigned int options) 1715{ 1716 pt_entry_t *pdp; 1717 register vm_page_t m; 1718 register pmap_paddr_t pa; 1719 uint64_t i; 1720 ppnum_t pn; 1721 1722 1723 /* 1724 * For the kernel, the virtual address must be in or above the basement 1725 * which is for kexts and is in the 512GB immediately below the kernel.. 1726 * XXX - should use VM_MIN_KERNEL_AND_KEXT_ADDRESS not KERNEL_BASEMENT 1727 */ 1728 if (map == kernel_pmap && 1729 !(vaddr >= KERNEL_BASEMENT && vaddr <= VM_MAX_KERNEL_ADDRESS)) 1730 panic("pmap_expand: bad vaddr 0x%llx for kernel pmap", vaddr); 1731 1732 1733 while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) { 1734 kern_return_t pepkr = pmap_expand_pdpt(map, vaddr, options); 1735 if (pepkr != KERN_SUCCESS) 1736 return pepkr; 1737 } 1738 1739 /* 1740 * Allocate a VM page for the pde entries. 1741 */ 1742 while ((m = vm_page_grab()) == VM_PAGE_NULL) { 1743 if (options & PMAP_EXPAND_OPTIONS_NOWAIT) 1744 return KERN_RESOURCE_SHORTAGE; 1745 VM_PAGE_WAIT(); 1746 } 1747 1748 /* 1749 * put the page into the pmap's obj list so it 1750 * can be found later. 1751 */ 1752 pn = m->phys_page; 1753 pa = i386_ptob(pn); 1754 i = pdeidx(map, vaddr); 1755 1756 /* 1757 * Zero the page. 1758 */ 1759 pmap_zero_page(pn); 1760 1761 vm_page_lockspin_queues(); 1762 vm_page_wire(m); 1763 vm_page_unlock_queues(); 1764 1765 OSAddAtomic(1, &inuse_ptepages_count); 1766 OSAddAtomic64(1, &alloc_ptepages_count); 1767 PMAP_ZINFO_PALLOC(map, PAGE_SIZE); 1768 1769 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ 1770 vm_object_lock(map->pm_obj); 1771 1772 PMAP_LOCK(map); 1773 1774 /* 1775 * See if someone else expanded us first 1776 */ 1777 if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) { 1778 PMAP_UNLOCK(map); 1779 vm_object_unlock(map->pm_obj); 1780 1781 VM_PAGE_FREE(m); 1782 1783 OSAddAtomic(-1, &inuse_ptepages_count); 1784 PMAP_ZINFO_PFREE(map, PAGE_SIZE); 1785 return KERN_SUCCESS; 1786 } 1787 1788#if 0 /* DEBUG */ 1789 if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i * PAGE_SIZE)) { 1790 panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n", 1791 map, map->pm_obj, vaddr, i); 1792 } 1793#endif 1794 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i * PAGE_SIZE); 1795 vm_object_unlock(map->pm_obj); 1796 1797 /* 1798 * Set the page directory entry for this page table. 1799 */ 1800 pdp = pmap_pde(map, vaddr); 1801 pmap_store_pte(pdp, pa_to_pte(pa) 1802 | INTEL_PTE_VALID 1803 | INTEL_PTE_USER 1804 | INTEL_PTE_WRITE); 1805 1806 PMAP_UNLOCK(map); 1807 1808 return KERN_SUCCESS; 1809} 1810 1811/* On K64 machines with more than 32GB of memory, pmap_steal_memory 1812 * will allocate past the 1GB of pre-expanded virtual kernel area. This 1813 * function allocates all the page tables using memory from the same pool 1814 * that pmap_steal_memory uses, rather than calling vm_page_grab (which 1815 * isn't available yet). */ 1816void 1817pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr) 1818{ 1819 ppnum_t pn; 1820 pt_entry_t *pte; 1821 1822 PMAP_LOCK(pmap); 1823 1824 if(pmap64_pdpt(pmap, vaddr) == PDPT_ENTRY_NULL) { 1825 if (!pmap_next_page_hi(&pn)) 1826 panic("pmap_pre_expand"); 1827 1828 pmap_zero_page(pn); 1829 1830 pte = pmap64_pml4(pmap, vaddr); 1831 1832 pmap_store_pte(pte, pa_to_pte(i386_ptob(pn)) 1833 | INTEL_PTE_VALID 1834 | INTEL_PTE_USER 1835 | INTEL_PTE_WRITE); 1836 } 1837 1838 if(pmap64_pde(pmap, vaddr) == PD_ENTRY_NULL) { 1839 if (!pmap_next_page_hi(&pn)) 1840 panic("pmap_pre_expand"); 1841 1842 pmap_zero_page(pn); 1843 1844 pte = pmap64_pdpt(pmap, vaddr); 1845 1846 pmap_store_pte(pte, pa_to_pte(i386_ptob(pn)) 1847 | INTEL_PTE_VALID 1848 | INTEL_PTE_USER 1849 | INTEL_PTE_WRITE); 1850 } 1851 1852 if(pmap_pte(pmap, vaddr) == PT_ENTRY_NULL) { 1853 if (!pmap_next_page_hi(&pn)) 1854 panic("pmap_pre_expand"); 1855 1856 pmap_zero_page(pn); 1857 1858 pte = pmap64_pde(pmap, vaddr); 1859 1860 pmap_store_pte(pte, pa_to_pte(i386_ptob(pn)) 1861 | INTEL_PTE_VALID 1862 | INTEL_PTE_USER 1863 | INTEL_PTE_WRITE); 1864 } 1865 1866 PMAP_UNLOCK(pmap); 1867} 1868 1869/* 1870 * pmap_sync_page_data_phys(ppnum_t pa) 1871 * 1872 * Invalidates all of the instruction cache on a physical page and 1873 * pushes any dirty data from the data cache for the same physical page 1874 * Not required in i386. 1875 */ 1876void 1877pmap_sync_page_data_phys(__unused ppnum_t pa) 1878{ 1879 return; 1880} 1881 1882/* 1883 * pmap_sync_page_attributes_phys(ppnum_t pa) 1884 * 1885 * Write back and invalidate all cachelines on a physical page. 1886 */ 1887void 1888pmap_sync_page_attributes_phys(ppnum_t pa) 1889{ 1890 cache_flush_page_phys(pa); 1891} 1892 1893 1894 1895#ifdef CURRENTLY_UNUSED_AND_UNTESTED 1896 1897int collect_ref; 1898int collect_unref; 1899 1900/* 1901 * Routine: pmap_collect 1902 * Function: 1903 * Garbage collects the physical map system for 1904 * pages which are no longer used. 1905 * Success need not be guaranteed -- that is, there 1906 * may well be pages which are not referenced, but 1907 * others may be collected. 1908 * Usage: 1909 * Called by the pageout daemon when pages are scarce. 1910 */ 1911void 1912pmap_collect( 1913 pmap_t p) 1914{ 1915 register pt_entry_t *pdp, *ptp; 1916 pt_entry_t *eptp; 1917 int wired; 1918 1919 if (p == PMAP_NULL) 1920 return; 1921 1922 if (p == kernel_pmap) 1923 return; 1924 1925 /* 1926 * Garbage collect map. 1927 */ 1928 PMAP_LOCK(p); 1929 1930 for (pdp = (pt_entry_t *)p->dirbase; 1931 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]; 1932 pdp++) 1933 { 1934 if (*pdp & INTEL_PTE_VALID) { 1935 if(*pdp & INTEL_PTE_REF) { 1936 pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF); 1937 collect_ref++; 1938 } else { 1939 collect_unref++; 1940 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase)); 1941 eptp = ptp + NPTEPG; 1942 1943 /* 1944 * If the pte page has any wired mappings, we cannot 1945 * free it. 1946 */ 1947 wired = 0; 1948 { 1949 register pt_entry_t *ptep; 1950 for (ptep = ptp; ptep < eptp; ptep++) { 1951 if (iswired(*ptep)) { 1952 wired = 1; 1953 break; 1954 } 1955 } 1956 } 1957 if (!wired) { 1958 /* 1959 * Remove the virtual addresses mapped by this pte page. 1960 */ 1961 pmap_remove_range(p, 1962 pdetova(pdp - (pt_entry_t *)p->dirbase), 1963 ptp, 1964 eptp); 1965 1966 /* 1967 * Invalidate the page directory pointer. 1968 */ 1969 pmap_store_pte(pdp, 0x0); 1970 1971 PMAP_UNLOCK(p); 1972 1973 /* 1974 * And free the pte page itself. 1975 */ 1976 { 1977 register vm_page_t m; 1978 1979 vm_object_lock(p->pm_obj); 1980 1981 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]) * PAGE_SIZE); 1982 if (m == VM_PAGE_NULL) 1983 panic("pmap_collect: pte page not in object"); 1984 1985 vm_object_unlock(p->pm_obj); 1986 1987 VM_PAGE_FREE(m); 1988 1989 OSAddAtomic(-1, &inuse_ptepages_count); 1990 PMAP_ZINFO_PFREE(p, PAGE_SIZE); 1991 } 1992 1993 PMAP_LOCK(p); 1994 } 1995 } 1996 } 1997 } 1998 1999 PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL); 2000 PMAP_UNLOCK(p); 2001 return; 2002 2003} 2004#endif 2005 2006 2007void 2008pmap_copy_page(ppnum_t src, ppnum_t dst) 2009{ 2010 bcopy_phys((addr64_t)i386_ptob(src), 2011 (addr64_t)i386_ptob(dst), 2012 PAGE_SIZE); 2013} 2014 2015 2016/* 2017 * Routine: pmap_pageable 2018 * Function: 2019 * Make the specified pages (by pmap, offset) 2020 * pageable (or not) as requested. 2021 * 2022 * A page which is not pageable may not take 2023 * a fault; therefore, its page table entry 2024 * must remain valid for the duration. 2025 * 2026 * This routine is merely advisory; pmap_enter 2027 * will specify that these pages are to be wired 2028 * down (or not) as appropriate. 2029 */ 2030void 2031pmap_pageable( 2032 __unused pmap_t pmap, 2033 __unused vm_map_offset_t start_addr, 2034 __unused vm_map_offset_t end_addr, 2035 __unused boolean_t pageable) 2036{ 2037#ifdef lint 2038 pmap++; start_addr++; end_addr++; pageable++; 2039#endif /* lint */ 2040} 2041 2042void 2043invalidate_icache(__unused vm_offset_t addr, 2044 __unused unsigned cnt, 2045 __unused int phys) 2046{ 2047 return; 2048} 2049 2050void 2051flush_dcache(__unused vm_offset_t addr, 2052 __unused unsigned count, 2053 __unused int phys) 2054{ 2055 return; 2056} 2057 2058#if CONFIG_DTRACE 2059/* 2060 * Constrain DTrace copyin/copyout actions 2061 */ 2062extern kern_return_t dtrace_copyio_preflight(addr64_t); 2063extern kern_return_t dtrace_copyio_postflight(addr64_t); 2064 2065kern_return_t dtrace_copyio_preflight(__unused addr64_t va) 2066{ 2067 thread_t thread = current_thread(); 2068 uint64_t ccr3; 2069 if (current_map() == kernel_map) 2070 return KERN_FAILURE; 2071 else if (((ccr3 = get_cr3_base()) != thread->map->pmap->pm_cr3) && (no_shared_cr3 == FALSE)) 2072 return KERN_FAILURE; 2073 else if (no_shared_cr3 && (ccr3 != kernel_pmap->pm_cr3)) 2074 return KERN_FAILURE; 2075 else 2076 return KERN_SUCCESS; 2077} 2078 2079kern_return_t dtrace_copyio_postflight(__unused addr64_t va) 2080{ 2081 return KERN_SUCCESS; 2082} 2083#endif /* CONFIG_DTRACE */ 2084 2085#include <mach_vm_debug.h> 2086#if MACH_VM_DEBUG 2087#include <vm/vm_debug.h> 2088 2089int 2090pmap_list_resident_pages( 2091 __unused pmap_t pmap, 2092 __unused vm_offset_t *listp, 2093 __unused int space) 2094{ 2095 return 0; 2096} 2097#endif /* MACH_VM_DEBUG */ 2098 2099 2100 2101/* temporary workaround */ 2102boolean_t 2103coredumpok(__unused vm_map_t map, __unused vm_offset_t va) 2104{ 2105#if 0 2106 pt_entry_t *ptep; 2107 2108 ptep = pmap_pte(map->pmap, va); 2109 if (0 == ptep) 2110 return FALSE; 2111 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)); 2112#else 2113 return TRUE; 2114#endif 2115} 2116 2117 2118boolean_t 2119phys_page_exists(ppnum_t pn) 2120{ 2121 assert(pn != vm_page_fictitious_addr); 2122 2123 if (!pmap_initialized) 2124 return TRUE; 2125 2126 if (pn == vm_page_guard_addr) 2127 return FALSE; 2128 2129 if (!IS_MANAGED_PAGE(ppn_to_pai(pn))) 2130 return FALSE; 2131 2132 return TRUE; 2133} 2134 2135 2136 2137void 2138pmap_switch(pmap_t tpmap) 2139{ 2140 spl_t s; 2141 2142 s = splhigh(); /* Make sure interruptions are disabled */ 2143 set_dirbase(tpmap, current_thread(), cpu_number()); 2144 splx(s); 2145} 2146 2147 2148/* 2149 * disable no-execute capability on 2150 * the specified pmap 2151 */ 2152void 2153pmap_disable_NX(pmap_t pmap) 2154{ 2155 pmap->nx_enabled = 0; 2156} 2157 2158void 2159pt_fake_zone_init(int zone_index) 2160{ 2161 pt_fake_zone_index = zone_index; 2162} 2163 2164void 2165pt_fake_zone_info( 2166 int *count, 2167 vm_size_t *cur_size, 2168 vm_size_t *max_size, 2169 vm_size_t *elem_size, 2170 vm_size_t *alloc_size, 2171 uint64_t *sum_size, 2172 int *collectable, 2173 int *exhaustable, 2174 int *caller_acct) 2175{ 2176 *count = inuse_ptepages_count; 2177 *cur_size = PAGE_SIZE * inuse_ptepages_count; 2178 *max_size = PAGE_SIZE * (inuse_ptepages_count + 2179 vm_page_inactive_count + 2180 vm_page_active_count + 2181 vm_page_free_count); 2182 *elem_size = PAGE_SIZE; 2183 *alloc_size = PAGE_SIZE; 2184 *sum_size = alloc_ptepages_count * PAGE_SIZE; 2185 2186 *collectable = 1; 2187 *exhaustable = 0; 2188 *caller_acct = 1; 2189} 2190 2191 2192void 2193pmap_flush_context_init(pmap_flush_context *pfc) 2194{ 2195 pfc->pfc_cpus = 0; 2196 pfc->pfc_invalid_global = 0; 2197} 2198 2199extern unsigned TLBTimeOut; 2200void 2201pmap_flush( 2202 pmap_flush_context *pfc) 2203{ 2204 unsigned int my_cpu; 2205 unsigned int cpu; 2206 unsigned int cpu_bit; 2207 cpumask_t cpus_to_respond = 0; 2208 cpumask_t cpus_to_signal = 0; 2209 cpumask_t cpus_signaled = 0; 2210 boolean_t flush_self = FALSE; 2211 uint64_t deadline; 2212 2213 mp_disable_preemption(); 2214 2215 my_cpu = cpu_number(); 2216 cpus_to_signal = pfc->pfc_cpus; 2217 2218 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_DELAYED_TLBS) | DBG_FUNC_START, 2219 NULL, cpus_to_signal, 0, 0, 0); 2220 2221 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus && cpus_to_signal; cpu++, cpu_bit <<= 1) { 2222 2223 if (cpus_to_signal & cpu_bit) { 2224 2225 cpus_to_signal &= ~cpu_bit; 2226 2227 if (!cpu_datap(cpu)->cpu_running) 2228 continue; 2229 2230 if (pfc->pfc_invalid_global & cpu_bit) 2231 cpu_datap(cpu)->cpu_tlb_invalid_global = TRUE; 2232 else 2233 cpu_datap(cpu)->cpu_tlb_invalid_local = TRUE; 2234 mfence(); 2235 2236 if (cpu == my_cpu) { 2237 flush_self = TRUE; 2238 continue; 2239 } 2240 if (CPU_CR3_IS_ACTIVE(cpu)) { 2241 cpus_to_respond |= cpu_bit; 2242 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); 2243 } 2244 } 2245 } 2246 cpus_signaled = cpus_to_respond; 2247 2248 /* 2249 * Flush local tlb if required. 2250 * Do this now to overlap with other processors responding. 2251 */ 2252 if (flush_self && cpu_datap(my_cpu)->cpu_tlb_invalid != FALSE) 2253 process_pmap_updates(); 2254 2255 if (cpus_to_respond) { 2256 2257 deadline = mach_absolute_time() + 2258 (TLBTimeOut ? TLBTimeOut : LockTimeOut); 2259 boolean_t is_timeout_traced = FALSE; 2260 2261 /* 2262 * Wait for those other cpus to acknowledge 2263 */ 2264 while (cpus_to_respond != 0) { 2265 long orig_acks = 0; 2266 2267 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 2268 /* Consider checking local/global invalidity 2269 * as appropriate in the PCID case. 2270 */ 2271 if ((cpus_to_respond & cpu_bit) != 0) { 2272 if (!cpu_datap(cpu)->cpu_running || 2273 cpu_datap(cpu)->cpu_tlb_invalid == FALSE || 2274 !CPU_CR3_IS_ACTIVE(cpu)) { 2275 cpus_to_respond &= ~cpu_bit; 2276 } 2277 cpu_pause(); 2278 } 2279 if (cpus_to_respond == 0) 2280 break; 2281 } 2282 if (cpus_to_respond && (mach_absolute_time() > deadline)) { 2283 if (machine_timeout_suspended()) 2284 continue; 2285 if (TLBTimeOut == 0) { 2286 if (is_timeout_traced) 2287 continue; 2288 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS_TO), 2289 NULL, cpus_to_signal, cpus_to_respond, 0, 0); 2290 is_timeout_traced = TRUE; 2291 continue; 2292 } 2293 pmap_tlb_flush_timeout = TRUE; 2294 orig_acks = NMIPI_acks; 2295 mp_cpus_NMIPI(cpus_to_respond); 2296 2297 panic("TLB invalidation IPI timeout: " 2298 "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", 2299 cpus_to_respond, orig_acks, NMIPI_acks); 2300 } 2301 } 2302 } 2303 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_DELAYED_TLBS) | DBG_FUNC_END, 2304 NULL, cpus_signaled, flush_self, 0, 0); 2305 2306 mp_enable_preemption(); 2307} 2308 2309 2310/* 2311 * Called with pmap locked, we: 2312 * - scan through per-cpu data to see which other cpus need to flush 2313 * - send an IPI to each non-idle cpu to be flushed 2314 * - wait for all to signal back that they are inactive or we see that 2315 * they are at a safe point (idle). 2316 * - flush the local tlb if active for this pmap 2317 * - return ... the caller will unlock the pmap 2318 */ 2319 2320void 2321pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv, int options, pmap_flush_context *pfc) 2322{ 2323 unsigned int cpu; 2324 unsigned int cpu_bit; 2325 cpumask_t cpus_to_signal; 2326 unsigned int my_cpu = cpu_number(); 2327 pmap_paddr_t pmap_cr3 = pmap->pm_cr3; 2328 boolean_t flush_self = FALSE; 2329 uint64_t deadline; 2330 boolean_t pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap)); 2331 boolean_t need_global_flush = FALSE; 2332 uint32_t event_code; 2333 2334 assert((processor_avail_count < 2) || 2335 (ml_get_interrupts_enabled() && get_preemption_level() != 0)); 2336 2337 event_code = (pmap == kernel_pmap) ? PMAP_CODE(PMAP__FLUSH_KERN_TLBS) 2338 : PMAP_CODE(PMAP__FLUSH_TLBS); 2339 PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_START, 2340 pmap, options, startv, endv, 0); 2341 2342 /* 2343 * Scan other cpus for matching active or task CR3. 2344 * For idle cpus (with no active map) we mark them invalid but 2345 * don't signal -- they'll check as they go busy. 2346 */ 2347 cpus_to_signal = 0; 2348 2349 if (pmap_pcid_ncpus) { 2350 if (pmap_is_shared) 2351 need_global_flush = TRUE; 2352 pmap_pcid_invalidate_all_cpus(pmap); 2353 mfence(); 2354 } 2355 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 2356 if (!cpu_datap(cpu)->cpu_running) 2357 continue; 2358 uint64_t cpu_active_cr3 = CPU_GET_ACTIVE_CR3(cpu); 2359 uint64_t cpu_task_cr3 = CPU_GET_TASK_CR3(cpu); 2360 2361 if ((pmap_cr3 == cpu_task_cr3) || 2362 (pmap_cr3 == cpu_active_cr3) || 2363 (pmap_is_shared)) { 2364 2365 if (options & PMAP_DELAY_TLB_FLUSH) { 2366 if (need_global_flush == TRUE) 2367 pfc->pfc_invalid_global |= cpu_bit; 2368 pfc->pfc_cpus |= cpu_bit; 2369 2370 continue; 2371 } 2372 if (cpu == my_cpu) { 2373 flush_self = TRUE; 2374 continue; 2375 } 2376 if (need_global_flush == TRUE) 2377 cpu_datap(cpu)->cpu_tlb_invalid_global = TRUE; 2378 else 2379 cpu_datap(cpu)->cpu_tlb_invalid_local = TRUE; 2380 mfence(); 2381 2382 /* 2383 * We don't need to signal processors which will flush 2384 * lazily at the idle state or kernel boundary. 2385 * For example, if we're invalidating the kernel pmap, 2386 * processors currently in userspace don't need to flush 2387 * their TLBs until the next time they enter the kernel. 2388 * Alterations to the address space of a task active 2389 * on a remote processor result in a signal, to 2390 * account for copy operations. (There may be room 2391 * for optimization in such cases). 2392 * The order of the loads below with respect 2393 * to the store to the "cpu_tlb_invalid" field above 2394 * is important--hence the barrier. 2395 */ 2396 if (CPU_CR3_IS_ACTIVE(cpu) && 2397 (pmap_cr3 == CPU_GET_ACTIVE_CR3(cpu) || 2398 pmap->pm_shared || 2399 (pmap_cr3 == CPU_GET_TASK_CR3(cpu)))) { 2400 cpus_to_signal |= cpu_bit; 2401 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); 2402 } 2403 } 2404 } 2405 if ((options & PMAP_DELAY_TLB_FLUSH)) 2406 goto out; 2407 2408 /* 2409 * Flush local tlb if required. 2410 * Do this now to overlap with other processors responding. 2411 */ 2412 if (flush_self) { 2413 if (pmap_pcid_ncpus) { 2414 pmap_pcid_validate_cpu(pmap, my_cpu); 2415 if (pmap_is_shared) 2416 tlb_flush_global(); 2417 else 2418 flush_tlb_raw(); 2419 } 2420 else 2421 flush_tlb_raw(); 2422 } 2423 2424 if (cpus_to_signal) { 2425 cpumask_t cpus_to_respond = cpus_to_signal; 2426 2427 deadline = mach_absolute_time() + 2428 (TLBTimeOut ? TLBTimeOut : LockTimeOut); 2429 boolean_t is_timeout_traced = FALSE; 2430 2431 /* 2432 * Wait for those other cpus to acknowledge 2433 */ 2434 while (cpus_to_respond != 0) { 2435 long orig_acks = 0; 2436 2437 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 2438 /* Consider checking local/global invalidity 2439 * as appropriate in the PCID case. 2440 */ 2441 if ((cpus_to_respond & cpu_bit) != 0) { 2442 if (!cpu_datap(cpu)->cpu_running || 2443 cpu_datap(cpu)->cpu_tlb_invalid == FALSE || 2444 !CPU_CR3_IS_ACTIVE(cpu)) { 2445 cpus_to_respond &= ~cpu_bit; 2446 } 2447 cpu_pause(); 2448 } 2449 if (cpus_to_respond == 0) 2450 break; 2451 } 2452 if (cpus_to_respond && (mach_absolute_time() > deadline)) { 2453 if (machine_timeout_suspended()) 2454 continue; 2455 if (TLBTimeOut == 0) { 2456 /* cut tracepoint but don't panic */ 2457 if (is_timeout_traced) 2458 continue; 2459 PMAP_TRACE_CONSTANT( 2460 PMAP_CODE(PMAP__FLUSH_TLBS_TO), 2461 pmap, cpus_to_signal, cpus_to_respond, 0, 0); 2462 is_timeout_traced = TRUE; 2463 continue; 2464 } 2465 pmap_tlb_flush_timeout = TRUE; 2466 orig_acks = NMIPI_acks; 2467 mp_cpus_NMIPI(cpus_to_respond); 2468 2469 panic("TLB invalidation IPI timeout: " 2470 "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", 2471 cpus_to_respond, orig_acks, NMIPI_acks); 2472 } 2473 } 2474 } 2475 2476 if (__improbable((pmap == kernel_pmap) && (flush_self != TRUE))) { 2477 panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, pmap_cr3: 0x%llx, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, pmap_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map); 2478 } 2479 2480out: 2481 PMAP_TRACE_CONSTANT(event_code | DBG_FUNC_END, 2482 pmap, cpus_to_signal, startv, endv, 0); 2483 2484} 2485 2486void 2487process_pmap_updates(void) 2488{ 2489 int ccpu = cpu_number(); 2490 pmap_assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 2491 if (pmap_pcid_ncpus) { 2492 pmap_pcid_validate_current(); 2493 if (cpu_datap(ccpu)->cpu_tlb_invalid_global) { 2494 cpu_datap(ccpu)->cpu_tlb_invalid = FALSE; 2495 tlb_flush_global(); 2496 } 2497 else { 2498 cpu_datap(ccpu)->cpu_tlb_invalid_local = FALSE; 2499 flush_tlb_raw(); 2500 } 2501 } 2502 else { 2503 current_cpu_datap()->cpu_tlb_invalid = FALSE; 2504 flush_tlb_raw(); 2505 } 2506 2507 mfence(); 2508} 2509 2510void 2511pmap_update_interrupt(void) 2512{ 2513 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START, 2514 0, 0, 0, 0, 0); 2515 2516 if (current_cpu_datap()->cpu_tlb_invalid) 2517 process_pmap_updates(); 2518 2519 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END, 2520 0, 0, 0, 0, 0); 2521} 2522 2523#include <mach/mach_vm.h> /* mach_vm_region_recurse() */ 2524/* Scan kernel pmap for W+X PTEs, scan kernel VM map for W+X map entries 2525 * and identify ranges with mismatched VM permissions and PTE permissions 2526 */ 2527kern_return_t 2528pmap_permissions_verify(pmap_t ipmap, vm_map_t ivmmap, vm_offset_t sv, vm_offset_t ev) { 2529 vm_offset_t cv = sv; 2530 kern_return_t rv = KERN_SUCCESS; 2531 uint64_t skip4 = 0, skip2 = 0; 2532 2533 sv &= ~PAGE_MASK_64; 2534 ev &= ~PAGE_MASK_64; 2535 while (cv < ev) { 2536 if (__improbable((cv > 0x00007FFFFFFFFFFFULL) && 2537 (cv < 0xFFFF800000000000ULL))) { 2538 cv = 0xFFFF800000000000ULL; 2539 } 2540 /* Potential inconsistencies from not holding pmap lock 2541 * but harmless for the moment. 2542 */ 2543 if (((cv & PML4MASK) == 0) && (pmap64_pml4(ipmap, cv) == 0)) { 2544 if ((cv + NBPML4) > cv) 2545 cv += NBPML4; 2546 else 2547 break; 2548 skip4++; 2549 continue; 2550 } 2551 if (((cv & PDMASK) == 0) && (pmap_pde(ipmap, cv) == 0)) { 2552 if ((cv + NBPD) > cv) 2553 cv += NBPD; 2554 else 2555 break; 2556 skip2++; 2557 continue; 2558 } 2559 2560 pt_entry_t *ptep = pmap_pte(ipmap, cv); 2561 if (ptep && (*ptep & INTEL_PTE_VALID)) { 2562 if (*ptep & INTEL_PTE_WRITE) { 2563 if (!(*ptep & INTEL_PTE_NX)) { 2564 kprintf("W+X PTE at 0x%lx, P4: 0x%llx, P3: 0x%llx, P2: 0x%llx, PT: 0x%llx, VP: %u\n", cv, *pmap64_pml4(ipmap, cv), *pmap64_pdpt(ipmap, cv), *pmap64_pde(ipmap, cv), *ptep, pmap_valid_page((ppnum_t)(i386_btop(pte_to_pa(*ptep))))); 2565 rv = KERN_FAILURE; 2566 } 2567 } 2568 } 2569 cv += PAGE_SIZE; 2570 } 2571 kprintf("Completed pmap scan\n"); 2572 cv = sv; 2573 2574 struct vm_region_submap_info_64 vbr; 2575 mach_msg_type_number_t vbrcount = 0; 2576 mach_vm_size_t vmsize; 2577 vm_prot_t prot; 2578 uint32_t nesting_depth = 0; 2579 kern_return_t kret; 2580 2581 while (cv < ev) { 2582 2583 for (;;) { 2584 vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64; 2585 if((kret = mach_vm_region_recurse(ivmmap, 2586 (mach_vm_address_t *) &cv, &vmsize, &nesting_depth, 2587 (vm_region_recurse_info_t)&vbr, 2588 &vbrcount)) != KERN_SUCCESS) { 2589 break; 2590 } 2591 2592 if(vbr.is_submap) { 2593 nesting_depth++; 2594 continue; 2595 } else { 2596 break; 2597 } 2598 } 2599 2600 if(kret != KERN_SUCCESS) 2601 break; 2602 2603 prot = vbr.protection; 2604 2605 if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE)) { 2606 kprintf("W+X map entry at address 0x%lx\n", cv); 2607 rv = KERN_FAILURE; 2608 } 2609 2610 if (prot) { 2611 vm_offset_t pcv; 2612 for (pcv = cv; pcv < cv + vmsize; pcv += PAGE_SIZE) { 2613 pt_entry_t *ptep = pmap_pte(ipmap, pcv); 2614 vm_prot_t tprot; 2615 2616 if ((ptep == NULL) || !(*ptep & INTEL_PTE_VALID)) 2617 continue; 2618 tprot = VM_PROT_READ; 2619 if (*ptep & INTEL_PTE_WRITE) 2620 tprot |= VM_PROT_WRITE; 2621 if ((*ptep & INTEL_PTE_NX) == 0) 2622 tprot |= VM_PROT_EXECUTE; 2623 if (tprot != prot) { 2624 kprintf("PTE/map entry permissions mismatch at address 0x%lx, pte: 0x%llx, protection: 0x%x\n", pcv, *ptep, prot); 2625 rv = KERN_FAILURE; 2626 } 2627 } 2628 } 2629 cv += vmsize; 2630 } 2631 return rv; 2632} 2633