1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58 59/* 60 * File: pmap.c 61 * Author: Avadis Tevanian, Jr., Michael Wayne Young 62 * (These guys wrote the Vax version) 63 * 64 * Physical Map management code for Intel i386, i486, and i860. 65 * 66 * Manages physical address maps. 67 * 68 * In addition to hardware address maps, this 69 * module is called upon to provide software-use-only 70 * maps which may or may not be stored in the same 71 * form as hardware maps. These pseudo-maps are 72 * used to store intermediate results from copy 73 * operations to and from address spaces. 74 * 75 * Since the information managed by this module is 76 * also stored by the logical address mapping module, 77 * this module may throw away valid virtual-to-physical 78 * mappings at almost any time. However, invalidations 79 * of virtual-to-physical mappings must be done as 80 * requested. 81 * 82 * In order to cope with hardware architectures which 83 * make virtual-to-physical map invalidates expensive, 84 * this module may delay invalidate or reduced protection 85 * operations until such time as they are actually 86 * necessary. This module is given full information as 87 * to which processors are currently using which maps, 88 * and to when physical maps must be made correct. 89 */ 90 91#include <string.h> 92#include <mach_ldebug.h> 93 94#include <libkern/OSAtomic.h> 95 96#include <mach/machine/vm_types.h> 97 98#include <mach/boolean.h> 99#include <kern/thread.h> 100#include <kern/zalloc.h> 101#include <kern/queue.h> 102#include <kern/ledger.h> 103#include <kern/mach_param.h> 104 105#include <kern/lock.h> 106#include <kern/kalloc.h> 107#include <kern/spl.h> 108 109#include <vm/pmap.h> 110#include <vm/vm_map.h> 111#include <vm/vm_kern.h> 112#include <mach/vm_param.h> 113#include <mach/vm_prot.h> 114#include <vm/vm_object.h> 115#include <vm/vm_page.h> 116 117#include <mach/machine/vm_param.h> 118#include <machine/thread.h> 119 120#include <kern/misc_protos.h> /* prototyping */ 121#include <i386/misc_protos.h> 122#include <i386/i386_lowmem.h> 123#include <x86_64/lowglobals.h> 124 125#include <i386/cpuid.h> 126#include <i386/cpu_data.h> 127#include <i386/cpu_number.h> 128#include <i386/machine_cpu.h> 129#include <i386/seg.h> 130#include <i386/serial_io.h> 131#include <i386/cpu_capabilities.h> 132#include <i386/machine_routines.h> 133#include <i386/proc_reg.h> 134#include <i386/tsc.h> 135#include <i386/pmap_internal.h> 136#include <i386/pmap_pcid.h> 137 138#include <vm/vm_protos.h> 139 140#include <i386/mp.h> 141#include <i386/mp_desc.h> 142#include <libkern/kernel_mach_header.h> 143 144#include <pexpert/i386/efi.h> 145 146 147#ifdef IWANTTODEBUG 148#undef DEBUG 149#define DEBUG 1 150#define POSTCODE_DELAY 1 151#include <i386/postcode.h> 152#endif /* IWANTTODEBUG */ 153 154#ifdef PMAP_DEBUG 155#define DBG(x...) kprintf("DBG: " x) 156#else 157#define DBG(x...) 158#endif 159/* Compile time assert to ensure adjacency/alignment of per-CPU data fields used 160 * in the trampolines for kernel/user boundary TLB coherency. 161 */ 162char pmap_cpu_data_assert[(((offsetof(cpu_data_t, cpu_tlb_invalid) - offsetof(cpu_data_t, cpu_active_cr3)) == 8) && (offsetof(cpu_data_t, cpu_active_cr3) % 64 == 0)) ? 1 : -1]; 163boolean_t pmap_trace = FALSE; 164 165boolean_t no_shared_cr3 = DEBUG; /* TRUE for DEBUG by default */ 166 167int nx_enabled = 1; /* enable no-execute protection */ 168int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */ 169int allow_stack_exec = 0; /* No apps may execute from the stack by default */ 170 171const boolean_t cpu_64bit = TRUE; /* Mais oui! */ 172 173uint64_t max_preemption_latency_tsc = 0; 174 175pv_hashed_entry_t *pv_hash_table; /* hash lists */ 176 177uint32_t npvhash = 0; 178 179pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL; 180pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL; 181decl_simple_lock_data(,pv_hashed_free_list_lock) 182decl_simple_lock_data(,pv_hashed_kern_free_list_lock) 183decl_simple_lock_data(,pv_hash_table_lock) 184 185zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */ 186 187/* 188 * First and last physical addresses that we maintain any information 189 * for. Initialized to zero so that pmap operations done before 190 * pmap_init won't touch any non-existent structures. 191 */ 192boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */ 193 194static struct vm_object kptobj_object_store; 195static struct vm_object kpml4obj_object_store; 196static struct vm_object kpdptobj_object_store; 197 198/* 199 * Array of physical page attribites for managed pages. 200 * One byte per physical page. 201 */ 202char *pmap_phys_attributes; 203ppnum_t last_managed_page = 0; 204 205/* 206 * Amount of virtual memory mapped by one 207 * page-directory entry. 208 */ 209 210uint64_t pde_mapped_size = PDE_MAPPED_SIZE; 211 212unsigned pmap_memory_region_count; 213unsigned pmap_memory_region_current; 214 215pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; 216 217/* 218 * Other useful macros. 219 */ 220#define current_pmap() (vm_map_pmap(current_thread()->map)) 221 222struct pmap kernel_pmap_store; 223pmap_t kernel_pmap; 224 225struct zone *pmap_zone; /* zone of pmap structures */ 226 227struct zone *pmap_anchor_zone; 228int pmap_debug = 0; /* flag for debugging prints */ 229 230unsigned int inuse_ptepages_count = 0; 231long long alloc_ptepages_count __attribute__((aligned(8))) = 0; /* aligned for atomic access */ 232unsigned int bootstrap_wired_pages = 0; 233int pt_fake_zone_index = -1; 234 235extern long NMIPI_acks; 236 237boolean_t kernel_text_ps_4K = TRUE; 238boolean_t wpkernel = TRUE; 239 240extern char end; 241 242static int nkpt; 243 244pt_entry_t *DMAP1, *DMAP2; 245caddr_t DADDR1; 246caddr_t DADDR2; 247 248const boolean_t pmap_disable_kheap_nx = FALSE; 249const boolean_t pmap_disable_kstack_nx = FALSE; 250extern boolean_t doconstro_override; 251 252extern long __stack_chk_guard[]; 253 254/* 255 * Map memory at initialization. The physical addresses being 256 * mapped are not managed and are never unmapped. 257 * 258 * For now, VM is already on, we only need to map the 259 * specified memory. 260 */ 261vm_offset_t 262pmap_map( 263 vm_offset_t virt, 264 vm_map_offset_t start_addr, 265 vm_map_offset_t end_addr, 266 vm_prot_t prot, 267 unsigned int flags) 268{ 269 int ps; 270 271 ps = PAGE_SIZE; 272 while (start_addr < end_addr) { 273 pmap_enter(kernel_pmap, (vm_map_offset_t)virt, 274 (ppnum_t) i386_btop(start_addr), prot, VM_PROT_NONE, flags, TRUE); 275 virt += ps; 276 start_addr += ps; 277 } 278 return(virt); 279} 280 281extern char *first_avail; 282extern vm_offset_t virtual_avail, virtual_end; 283extern pmap_paddr_t avail_start, avail_end; 284extern vm_offset_t sHIB; 285extern vm_offset_t eHIB; 286extern vm_offset_t stext; 287extern vm_offset_t etext; 288extern vm_offset_t sdata, edata; 289extern vm_offset_t sconstdata, econstdata; 290 291extern void *KPTphys; 292 293boolean_t pmap_smep_enabled = FALSE; 294 295void 296pmap_cpu_init(void) 297{ 298 cpu_data_t *cdp = current_cpu_datap(); 299 /* 300 * Here early in the life of a processor (from cpu_mode_init()). 301 * Ensure global page feature is disabled at this point. 302 */ 303 304 set_cr4(get_cr4() &~ CR4_PGE); 305 306 /* 307 * Initialize the per-cpu, TLB-related fields. 308 */ 309 cdp->cpu_kernel_cr3 = kernel_pmap->pm_cr3; 310 cdp->cpu_active_cr3 = kernel_pmap->pm_cr3; 311 cdp->cpu_tlb_invalid = FALSE; 312 cdp->cpu_task_map = TASK_MAP_64BIT; 313 pmap_pcid_configure(); 314 if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_SMEP) { 315 boolean_t nsmep; 316 if (!PE_parse_boot_argn("-pmap_smep_disable", &nsmep, sizeof(nsmep))) { 317 set_cr4(get_cr4() | CR4_SMEP); 318 pmap_smep_enabled = TRUE; 319 } 320 } 321 322 if (cdp->cpu_fixed_pmcs_enabled) { 323 boolean_t enable = TRUE; 324 cpu_pmc_control(&enable); 325 } 326} 327 328 329 330/* 331 * Bootstrap the system enough to run with virtual memory. 332 * Map the kernel's code and data, and allocate the system page table. 333 * Called with mapping OFF. Page_size must already be set. 334 */ 335 336void 337pmap_bootstrap( 338 __unused vm_offset_t load_start, 339 __unused boolean_t IA32e) 340{ 341#if NCOPY_WINDOWS > 0 342 vm_offset_t va; 343 int i; 344#endif 345 assert(IA32e); 346 347 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address 348 * known to VM */ 349 /* 350 * The kernel's pmap is statically allocated so we don't 351 * have to use pmap_create, which is unlikely to work 352 * correctly at this part of the boot sequence. 353 */ 354 355 kernel_pmap = &kernel_pmap_store; 356 kernel_pmap->ref_count = 1; 357 kernel_pmap->nx_enabled = TRUE; 358 kernel_pmap->pm_task_map = TASK_MAP_64BIT; 359 kernel_pmap->pm_obj = (vm_object_t) NULL; 360 kernel_pmap->dirbase = (pd_entry_t *)((uintptr_t)IdlePTD); 361 kernel_pmap->pm_pdpt = (pd_entry_t *) ((uintptr_t)IdlePDPT); 362 kernel_pmap->pm_pml4 = IdlePML4; 363 kernel_pmap->pm_cr3 = (uintptr_t)ID_MAP_VTOP(IdlePML4); 364 pmap_pcid_initialize_kernel(kernel_pmap); 365 366 367 368 current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3; 369 370 nkpt = NKPT; 371 OSAddAtomic(NKPT, &inuse_ptepages_count); 372 OSAddAtomic64(NKPT, &alloc_ptepages_count); 373 bootstrap_wired_pages = NKPT; 374 375 virtual_avail = (vm_offset_t)(VM_MIN_KERNEL_ADDRESS) + (vm_offset_t)first_avail; 376 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS); 377 378#if NCOPY_WINDOWS > 0 379 /* 380 * Reserve some special page table entries/VA space for temporary 381 * mapping of pages. 382 */ 383#define SYSMAP(c, p, v, n) \ 384 v = (c)va; va += ((n)*INTEL_PGBYTES); 385 386 va = virtual_avail; 387 388 for (i=0; i<PMAP_NWINDOWS; i++) { 389#if 1 390 kprintf("trying to do SYSMAP idx %d %p\n", i, 391 current_cpu_datap()); 392 kprintf("cpu_pmap %p\n", current_cpu_datap()->cpu_pmap); 393 kprintf("mapwindow %p\n", current_cpu_datap()->cpu_pmap->mapwindow); 394 kprintf("two stuff %p %p\n", 395 (void *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP), 396 (void *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR)); 397#endif 398 SYSMAP(caddr_t, 399 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP), 400 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR), 401 1); 402 current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 403 &(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP_store); 404 *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0; 405 } 406 407 /* DMAP user for debugger */ 408 SYSMAP(caddr_t, DMAP1, DADDR1, 1); 409 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */ 410 411 virtual_avail = va; 412#endif 413 414 if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) { 415 if (0 != ((npvhash + 1) & npvhash)) { 416 kprintf("invalid hash %d, must be ((2^N)-1), " 417 "using default %d\n", npvhash, NPVHASH); 418 npvhash = NPVHASH; 419 } 420 } else { 421 npvhash = NPVHASH; 422 } 423 424 simple_lock_init(&kernel_pmap->lock, 0); 425 simple_lock_init(&pv_hashed_free_list_lock, 0); 426 simple_lock_init(&pv_hashed_kern_free_list_lock, 0); 427 simple_lock_init(&pv_hash_table_lock,0); 428 429 pmap_cpu_init(); 430 431 if (pmap_pcid_ncpus) 432 printf("PMAP: PCID enabled\n"); 433 434 if (pmap_smep_enabled) 435 printf("PMAP: Supervisor Mode Execute Protection enabled\n"); 436 437#if DEBUG 438 printf("Stack canary: 0x%lx\n", __stack_chk_guard[0]); 439 printf("ml_early_random(): 0x%qx\n", ml_early_random()); 440#endif 441 boolean_t ptmp; 442 /* Check if the user has requested disabling stack or heap no-execute 443 * enforcement. These are "const" variables; that qualifier is cast away 444 * when altering them. The TEXT/DATA const sections are marked 445 * write protected later in the kernel startup sequence, so altering 446 * them is possible at this point, in pmap_bootstrap(). 447 */ 448 if (PE_parse_boot_argn("-pmap_disable_kheap_nx", &ptmp, sizeof(ptmp))) { 449 boolean_t *pdknxp = (boolean_t *) &pmap_disable_kheap_nx; 450 *pdknxp = TRUE; 451 } 452 453 if (PE_parse_boot_argn("-pmap_disable_kstack_nx", &ptmp, sizeof(ptmp))) { 454 boolean_t *pdknhp = (boolean_t *) &pmap_disable_kstack_nx; 455 *pdknhp = TRUE; 456 } 457 458 boot_args *args = (boot_args *)PE_state.bootArgs; 459 if (args->efiMode == kBootArgsEfiMode32) { 460 printf("EFI32: kernel virtual space limited to 4GB\n"); 461 virtual_end = VM_MAX_KERNEL_ADDRESS_EFI32; 462 } 463 kprintf("Kernel virtual space from 0x%lx to 0x%lx.\n", 464 (long)KERNEL_BASE, (long)virtual_end); 465 kprintf("Available physical space from 0x%llx to 0x%llx\n", 466 avail_start, avail_end); 467 468 /* 469 * The -no_shared_cr3 boot-arg is a debugging feature (set by default 470 * in the DEBUG kernel) to force the kernel to switch to its own map 471 * (and cr3) when control is in kernelspace. The kernel's map does not 472 * include (i.e. share) userspace so wild references will cause 473 * a panic. Only copyin and copyout are exempt from this. 474 */ 475 (void) PE_parse_boot_argn("-no_shared_cr3", 476 &no_shared_cr3, sizeof (no_shared_cr3)); 477 if (no_shared_cr3) 478 kprintf("Kernel not sharing user map\n"); 479 480#ifdef PMAP_TRACES 481 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) { 482 kprintf("Kernel traces for pmap operations enabled\n"); 483 } 484#endif /* PMAP_TRACES */ 485} 486 487void 488pmap_virtual_space( 489 vm_offset_t *startp, 490 vm_offset_t *endp) 491{ 492 *startp = virtual_avail; 493 *endp = virtual_end; 494} 495 496 497 498 499#if HIBERNATION 500 501#include <IOKit/IOHibernatePrivate.h> 502 503int32_t pmap_npages; 504int32_t pmap_teardown_last_valid_compact_indx = -1; 505 506 507void hibernate_rebuild_pmap_structs(void); 508void hibernate_teardown_pmap_structs(addr64_t *, addr64_t *); 509void pmap_pack_index(uint32_t); 510int32_t pmap_unpack_index(pv_rooted_entry_t); 511 512 513int32_t 514pmap_unpack_index(pv_rooted_entry_t pv_h) 515{ 516 int32_t indx = 0; 517 518 indx = (int32_t)(*((uint64_t *)(&pv_h->qlink.next)) >> 48); 519 indx = indx << 16; 520 indx |= (int32_t)(*((uint64_t *)(&pv_h->qlink.prev)) >> 48); 521 522 *((uint64_t *)(&pv_h->qlink.next)) |= ((uint64_t)0xffff << 48); 523 *((uint64_t *)(&pv_h->qlink.prev)) |= ((uint64_t)0xffff << 48); 524 525 return (indx); 526} 527 528 529void 530pmap_pack_index(uint32_t indx) 531{ 532 pv_rooted_entry_t pv_h; 533 534 pv_h = &pv_head_table[indx]; 535 536 *((uint64_t *)(&pv_h->qlink.next)) &= ~((uint64_t)0xffff << 48); 537 *((uint64_t *)(&pv_h->qlink.prev)) &= ~((uint64_t)0xffff << 48); 538 539 *((uint64_t *)(&pv_h->qlink.next)) |= ((uint64_t)(indx >> 16)) << 48; 540 *((uint64_t *)(&pv_h->qlink.prev)) |= ((uint64_t)(indx & 0xffff)) << 48; 541} 542 543 544void 545hibernate_teardown_pmap_structs(addr64_t *unneeded_start, addr64_t *unneeded_end) 546{ 547 int32_t i; 548 int32_t compact_target_indx; 549 550 compact_target_indx = 0; 551 552 for (i = 0; i < pmap_npages; i++) { 553 if (pv_head_table[i].pmap == PMAP_NULL) { 554 555 if (pv_head_table[compact_target_indx].pmap != PMAP_NULL) 556 compact_target_indx = i; 557 } else { 558 pmap_pack_index((uint32_t)i); 559 560 if (pv_head_table[compact_target_indx].pmap == PMAP_NULL) { 561 /* 562 * we've got a hole to fill, so 563 * move this pv_rooted_entry_t to it's new home 564 */ 565 pv_head_table[compact_target_indx] = pv_head_table[i]; 566 pv_head_table[i].pmap = PMAP_NULL; 567 568 pmap_teardown_last_valid_compact_indx = compact_target_indx; 569 compact_target_indx++; 570 } else 571 pmap_teardown_last_valid_compact_indx = i; 572 } 573 } 574 *unneeded_start = (addr64_t)&pv_head_table[pmap_teardown_last_valid_compact_indx+1]; 575 *unneeded_end = (addr64_t)&pv_head_table[pmap_npages-1]; 576 577 HIBLOG("hibernate_teardown_pmap_structs done: last_valid_compact_indx %d\n", pmap_teardown_last_valid_compact_indx); 578} 579 580 581void 582hibernate_rebuild_pmap_structs(void) 583{ 584 int32_t cindx, eindx, rindx; 585 pv_rooted_entry_t pv_h; 586 587 eindx = (int32_t)pmap_npages; 588 589 for (cindx = pmap_teardown_last_valid_compact_indx; cindx >= 0; cindx--) { 590 591 pv_h = &pv_head_table[cindx]; 592 593 rindx = pmap_unpack_index(pv_h); 594 assert(rindx < pmap_npages); 595 596 if (rindx != cindx) { 597 /* 598 * this pv_rooted_entry_t was moved by hibernate_teardown_pmap_structs, 599 * so move it back to its real location 600 */ 601 pv_head_table[rindx] = pv_head_table[cindx]; 602 } 603 if (rindx+1 != eindx) { 604 /* 605 * the 'hole' between this vm_rooted_entry_t and the previous 606 * vm_rooted_entry_t we moved needs to be initialized as 607 * a range of zero'd vm_rooted_entry_t's 608 */ 609 bzero((char *)&pv_head_table[rindx+1], (eindx - rindx - 1) * sizeof (struct pv_rooted_entry)); 610 } 611 eindx = rindx; 612 } 613 if (rindx) 614 bzero ((char *)&pv_head_table[0], rindx * sizeof (struct pv_rooted_entry)); 615 616 HIBLOG("hibernate_rebuild_pmap_structs done: last_valid_compact_indx %d\n", pmap_teardown_last_valid_compact_indx); 617} 618 619#endif 620 621/* 622 * Initialize the pmap module. 623 * Called by vm_init, to initialize any structures that the pmap 624 * system needs to map virtual memory. 625 */ 626void 627pmap_init(void) 628{ 629 long npages; 630 vm_offset_t addr; 631 vm_size_t s, vsize; 632 vm_map_offset_t vaddr; 633 ppnum_t ppn; 634 635 636 kernel_pmap->pm_obj_pml4 = &kpml4obj_object_store; 637 _vm_object_allocate((vm_object_size_t)NPML4PGS * PAGE_SIZE, &kpml4obj_object_store); 638 639 kernel_pmap->pm_obj_pdpt = &kpdptobj_object_store; 640 _vm_object_allocate((vm_object_size_t)NPDPTPGS * PAGE_SIZE, &kpdptobj_object_store); 641 642 kernel_pmap->pm_obj = &kptobj_object_store; 643 _vm_object_allocate((vm_object_size_t)NPDEPGS * PAGE_SIZE, &kptobj_object_store); 644 645 /* 646 * Allocate memory for the pv_head_table and its lock bits, 647 * the modify bit array, and the pte_page table. 648 */ 649 650 /* 651 * zero bias all these arrays now instead of off avail_start 652 * so we cover all memory 653 */ 654 655 npages = i386_btop(avail_end); 656#if HIBERNATION 657 pmap_npages = (uint32_t)npages; 658#endif 659 s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages 660 + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1)) 661 + pv_lock_table_size(npages) 662 + pv_hash_lock_table_size((npvhash+1)) 663 + npages); 664 665 s = round_page(s); 666 if (kernel_memory_allocate(kernel_map, &addr, s, 0, 667 KMA_KOBJECT | KMA_PERMANENT) 668 != KERN_SUCCESS) 669 panic("pmap_init"); 670 671 memset((char *)addr, 0, s); 672 673 vaddr = addr; 674 vsize = s; 675 676#if PV_DEBUG 677 if (0 == npvhash) panic("npvhash not initialized"); 678#endif 679 680 /* 681 * Allocate the structures first to preserve word-alignment. 682 */ 683 pv_head_table = (pv_rooted_entry_t) addr; 684 addr = (vm_offset_t) (pv_head_table + npages); 685 686 pv_hash_table = (pv_hashed_entry_t *)addr; 687 addr = (vm_offset_t) (pv_hash_table + (npvhash + 1)); 688 689 pv_lock_table = (char *) addr; 690 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages)); 691 692 pv_hash_lock_table = (char *) addr; 693 addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash+1))); 694 695 pmap_phys_attributes = (char *) addr; 696 697 ppnum_t last_pn = i386_btop(avail_end); 698 unsigned int i; 699 pmap_memory_region_t *pmptr = pmap_memory_regions; 700 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { 701 if (pmptr->type != kEfiConventionalMemory) 702 continue; 703 ppnum_t pn; 704 for (pn = pmptr->base; pn <= pmptr->end; pn++) { 705 if (pn < last_pn) { 706 pmap_phys_attributes[pn] |= PHYS_MANAGED; 707 708 if (pn > last_managed_page) 709 last_managed_page = pn; 710 711 if (pn >= lowest_hi && pn <= highest_hi) 712 pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; 713 } 714 } 715 } 716 while (vsize) { 717 ppn = pmap_find_phys(kernel_pmap, vaddr); 718 719 pmap_phys_attributes[ppn] |= PHYS_NOENCRYPT; 720 721 vaddr += PAGE_SIZE; 722 vsize -= PAGE_SIZE; 723 } 724 /* 725 * Create the zone of physical maps, 726 * and of the physical-to-virtual entries. 727 */ 728 s = (vm_size_t) sizeof(struct pmap); 729 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */ 730 zone_change(pmap_zone, Z_NOENCRYPT, TRUE); 731 732 pmap_anchor_zone = zinit(PAGE_SIZE, task_max, PAGE_SIZE, "pagetable anchors"); 733 zone_change(pmap_anchor_zone, Z_NOENCRYPT, TRUE); 734 735 /* The anchor is required to be page aligned. Zone debugging adds 736 * padding which may violate that requirement. Tell the zone 737 * subsystem that alignment is required. 738 */ 739 740 zone_change(pmap_anchor_zone, Z_ALIGNMENT_REQUIRED, TRUE); 741 742 s = (vm_size_t) sizeof(struct pv_hashed_entry); 743 pv_hashed_list_zone = zinit(s, 10000*s /* Expandable zone */, 744 4096 * 3 /* LCM x86_64*/, "pv_list"); 745 zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE); 746 747 /* create pv entries for kernel pages mapped by low level 748 startup code. these have to exist so we can pmap_remove() 749 e.g. kext pages from the middle of our addr space */ 750 751 vaddr = (vm_map_offset_t) VM_MIN_KERNEL_ADDRESS; 752 for (ppn = VM_MIN_KERNEL_PAGE; ppn < i386_btop(avail_start); ppn++) { 753 pv_rooted_entry_t pv_e; 754 755 pv_e = pai_to_pvh(ppn); 756 pv_e->va = vaddr; 757 vaddr += PAGE_SIZE; 758 pv_e->pmap = kernel_pmap; 759 queue_init(&pv_e->qlink); 760 } 761 pmap_initialized = TRUE; 762 763 max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t); 764 765 /* 766 * Ensure the kernel's PML4 entry exists for the basement 767 * before this is shared with any user. 768 */ 769 pmap_expand_pml4(kernel_pmap, KERNEL_BASEMENT, PMAP_EXPAND_OPTIONS_NONE); 770} 771 772static 773void pmap_mark_range(pmap_t npmap, uint64_t sv, uint64_t nxrosz, boolean_t NX, boolean_t ro) { 774 uint64_t ev = sv + nxrosz, cv = sv; 775 pd_entry_t *pdep; 776 pt_entry_t *ptep = NULL; 777 778 assert(((sv & 0xFFFULL) | (nxrosz & 0xFFFULL)) == 0); 779 780 for (pdep = pmap_pde(npmap, cv); pdep != NULL && (cv < ev);) { 781 uint64_t pdev = (cv & ~((uint64_t)PDEMASK)); 782 783 if (*pdep & INTEL_PTE_PS) { 784 if (NX) 785 *pdep |= INTEL_PTE_NX; 786 if (ro) 787 *pdep &= ~INTEL_PTE_WRITE; 788 cv += NBPD; 789 cv &= ~((uint64_t) PDEMASK); 790 pdep = pmap_pde(npmap, cv); 791 continue; 792 } 793 794 for (ptep = pmap_pte(npmap, cv); ptep != NULL && (cv < (pdev + NBPD)) && (cv < ev);) { 795 if (NX) 796 *ptep |= INTEL_PTE_NX; 797 if (ro) 798 *ptep &= ~INTEL_PTE_WRITE; 799 cv += NBPT; 800 ptep = pmap_pte(npmap, cv); 801 } 802 } 803 DPRINTF("%s(0x%llx, 0x%llx, %u, %u): 0x%llx, 0x%llx\n", __FUNCTION__, sv, nxrosz, NX, ro, cv, ptep ? *ptep: 0); 804} 805 806/* 807 * Called once VM is fully initialized so that we can release unused 808 * sections of low memory to the general pool. 809 * Also complete the set-up of identity-mapped sections of the kernel: 810 * 1) write-protect kernel text 811 * 2) map kernel text using large pages if possible 812 * 3) read and write-protect page zero (for K32) 813 * 4) map the global page at the appropriate virtual address. 814 * 815 * Use of large pages 816 * ------------------ 817 * To effectively map and write-protect all kernel text pages, the text 818 * must be 2M-aligned at the base, and the data section above must also be 819 * 2M-aligned. That is, there's padding below and above. This is achieved 820 * through linker directives. Large pages are used only if this alignment 821 * exists (and not overriden by the -kernel_text_page_4K boot-arg). The 822 * memory layout is: 823 * 824 * : : 825 * | __DATA | 826 * sdata: ================== 2Meg 827 * | | 828 * | zero-padding | 829 * | | 830 * etext: ------------------ 831 * | | 832 * : : 833 * | | 834 * | __TEXT | 835 * | | 836 * : : 837 * | | 838 * stext: ================== 2Meg 839 * | | 840 * | zero-padding | 841 * | | 842 * eHIB: ------------------ 843 * | __HIB | 844 * : : 845 * 846 * Prior to changing the mapping from 4K to 2M, the zero-padding pages 847 * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the 848 * 4K pages covering [stext,etext] are coalesced as 2M large pages. 849 * The now unused level-1 PTE pages are also freed. 850 */ 851extern ppnum_t vm_kernel_base_page; 852void 853pmap_lowmem_finalize(void) 854{ 855 spl_t spl; 856 int i; 857 858 /* 859 * Update wired memory statistics for early boot pages 860 */ 861 PMAP_ZINFO_PALLOC(kernel_pmap, bootstrap_wired_pages * PAGE_SIZE); 862 863 /* 864 * Free pages in pmap regions below the base: 865 * rdar://6332712 866 * We can't free all the pages to VM that EFI reports available. 867 * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake. 868 * There's also a size miscalculation here: pend is one page less 869 * than it should be but this is not fixed to be backwards 870 * compatible. 871 * This is important for KASLR because up to 256*2MB = 512MB of space 872 * needs has to be released to VM. 873 */ 874 for (i = 0; 875 pmap_memory_regions[i].end < vm_kernel_base_page; 876 i++) { 877 vm_offset_t pbase = i386_ptob(pmap_memory_regions[i].base); 878 vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end+1); 879 880 DBG("pmap region %d [%p..[%p\n", 881 i, (void *) pbase, (void *) pend); 882 883 if (pmap_memory_regions[i].attribute & EFI_MEMORY_KERN_RESERVED) 884 continue; 885 /* 886 * rdar://6332712 887 * Adjust limits not to free pages in range 0xc0000-0xff000. 888 */ 889 if (pbase >= 0xc0000 && pend <= 0x100000) 890 continue; 891 if (pbase < 0xc0000 && pend > 0x100000) { 892 /* page range entirely within region, free lower part */ 893 DBG("- ml_static_mfree(%p,%p)\n", 894 (void *) ml_static_ptovirt(pbase), 895 (void *) (0xc0000-pbase)); 896 ml_static_mfree(ml_static_ptovirt(pbase),0xc0000-pbase); 897 pbase = 0x100000; 898 } 899 if (pbase < 0xc0000) 900 pend = MIN(pend, 0xc0000); 901 if (pend > 0x100000) 902 pbase = MAX(pbase, 0x100000); 903 DBG("- ml_static_mfree(%p,%p)\n", 904 (void *) ml_static_ptovirt(pbase), 905 (void *) (pend - pbase)); 906 ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase); 907 } 908 909 /* A final pass to get rid of all initial identity mappings to 910 * low pages. 911 */ 912 DPRINTF("%s: Removing mappings from 0->0x%lx\n", __FUNCTION__, vm_kernel_base); 913 914 /* 915 * Remove all mappings past the boot-cpu descriptor aliases and low globals. 916 * Non-boot-cpu GDT aliases will be remapped later as needed. 917 */ 918 pmap_remove(kernel_pmap, LOWGLOBAL_ALIAS + PAGE_SIZE, vm_kernel_base); 919 920 /* 921 * If text and data are both 2MB-aligned, 922 * we can map text with large-pages, 923 * unless the -kernel_text_ps_4K boot-arg overrides. 924 */ 925 if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) { 926 kprintf("Kernel text is 2MB aligned"); 927 kernel_text_ps_4K = FALSE; 928 if (PE_parse_boot_argn("-kernel_text_ps_4K", 929 &kernel_text_ps_4K, 930 sizeof (kernel_text_ps_4K))) 931 kprintf(" but will be mapped with 4K pages\n"); 932 else 933 kprintf(" and will be mapped with 2M pages\n"); 934 } 935 936 (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel)); 937 if (wpkernel) 938 kprintf("Kernel text %p-%p to be write-protected\n", 939 (void *) stext, (void *) etext); 940 941 spl = splhigh(); 942 943 /* 944 * Scan over text if mappings are to be changed: 945 * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0 946 * - Change to large-pages if possible and not overriden. 947 */ 948 if (kernel_text_ps_4K && wpkernel) { 949 vm_offset_t myva; 950 for (myva = stext; myva < etext; myva += PAGE_SIZE) { 951 pt_entry_t *ptep; 952 953 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); 954 if (ptep) 955 pmap_store_pte(ptep, *ptep & ~INTEL_PTE_WRITE); 956 } 957 } 958 959 if (!kernel_text_ps_4K) { 960 vm_offset_t myva; 961 962 /* 963 * Release zero-filled page padding used for 2M-alignment. 964 */ 965 DBG("ml_static_mfree(%p,%p) for padding below text\n", 966 (void *) eHIB, (void *) (stext - eHIB)); 967 ml_static_mfree(eHIB, stext - eHIB); 968 DBG("ml_static_mfree(%p,%p) for padding above text\n", 969 (void *) etext, (void *) (sdata - etext)); 970 ml_static_mfree(etext, sdata - etext); 971 972 /* 973 * Coalesce text pages into large pages. 974 */ 975 for (myva = stext; myva < sdata; myva += I386_LPGBYTES) { 976 pt_entry_t *ptep; 977 vm_offset_t pte_phys; 978 pt_entry_t *pdep; 979 pt_entry_t pde; 980 981 pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva); 982 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); 983 DBG("myva: %p pdep: %p ptep: %p\n", 984 (void *) myva, (void *) pdep, (void *) ptep); 985 if ((*ptep & INTEL_PTE_VALID) == 0) 986 continue; 987 pte_phys = (vm_offset_t)(*ptep & PG_FRAME); 988 pde = *pdep & PTMASK; /* page attributes from pde */ 989 pde |= INTEL_PTE_PS; /* make it a 2M entry */ 990 pde |= pte_phys; /* take page frame from pte */ 991 992 if (wpkernel) 993 pde &= ~INTEL_PTE_WRITE; 994 DBG("pmap_store_pte(%p,0x%llx)\n", 995 (void *)pdep, pde); 996 pmap_store_pte(pdep, pde); 997 998 /* 999 * Free the now-unused level-1 pte. 1000 * Note: ptep is a virtual address to the pte in the 1001 * recursive map. We can't use this address to free 1002 * the page. Instead we need to compute its address 1003 * in the Idle PTEs in "low memory". 1004 */ 1005 vm_offset_t vm_ptep = (vm_offset_t) KPTphys 1006 + (pte_phys >> PTPGSHIFT); 1007 DBG("ml_static_mfree(%p,0x%x) for pte\n", 1008 (void *) vm_ptep, PAGE_SIZE); 1009 ml_static_mfree(vm_ptep, PAGE_SIZE); 1010 } 1011 1012 /* Change variable read by sysctl machdep.pmap */ 1013 pmap_kernel_text_ps = I386_LPGBYTES; 1014 } 1015 1016 boolean_t doconstro = TRUE; 1017 1018 (void) PE_parse_boot_argn("dataconstro", &doconstro, sizeof(doconstro)); 1019 1020 if ((sconstdata | econstdata) & PAGE_MASK) { 1021 kprintf("Const DATA misaligned 0x%lx 0x%lx\n", sconstdata, econstdata); 1022 if ((sconstdata & PAGE_MASK) || (doconstro_override == FALSE)) 1023 doconstro = FALSE; 1024 } 1025 1026 if ((sconstdata > edata) || (sconstdata < sdata) || ((econstdata - sconstdata) >= (edata - sdata))) { 1027 kprintf("Const DATA incorrect size 0x%lx 0x%lx 0x%lx 0x%lx\n", sconstdata, econstdata, sdata, edata); 1028 doconstro = FALSE; 1029 } 1030 1031 if (doconstro) 1032 kprintf("Marking const DATA read-only\n"); 1033 1034 vm_offset_t dva; 1035 1036 for (dva = sdata; dva < edata; dva += I386_PGBYTES) { 1037 assert(((sdata | edata) & PAGE_MASK) == 0); 1038 if ( (sdata | edata) & PAGE_MASK) { 1039 kprintf("DATA misaligned, 0x%lx, 0x%lx\n", sdata, edata); 1040 break; 1041 } 1042 1043 pt_entry_t dpte, *dptep = pmap_pte(kernel_pmap, dva); 1044 1045 dpte = *dptep; 1046 1047 assert((dpte & INTEL_PTE_VALID)); 1048 if ((dpte & INTEL_PTE_VALID) == 0) { 1049 kprintf("Missing data mapping 0x%lx 0x%lx 0x%lx\n", dva, sdata, edata); 1050 continue; 1051 } 1052 1053 dpte |= INTEL_PTE_NX; 1054 if (doconstro && (dva >= sconstdata) && (dva < econstdata)) { 1055 dpte &= ~INTEL_PTE_WRITE; 1056 } 1057 pmap_store_pte(dptep, dpte); 1058 } 1059 kernel_segment_command_t * seg; 1060 kernel_section_t * sec; 1061 1062 for (seg = firstseg(); seg != NULL; seg = nextsegfromheader(&_mh_execute_header, seg)) { 1063 if (!strcmp(seg->segname, "__TEXT") || 1064 !strcmp(seg->segname, "__DATA")) { 1065 continue; 1066 } 1067 //XXX 1068 if (!strcmp(seg->segname, "__KLD")) { 1069 continue; 1070 } 1071 if (!strcmp(seg->segname, "__HIB")) { 1072 for (sec = firstsect(seg); sec != NULL; sec = nextsect(seg, sec)) { 1073 if (sec->addr & PAGE_MASK) 1074 panic("__HIB segment's sections misaligned"); 1075 if (!strcmp(sec->sectname, "__text")) { 1076 pmap_mark_range(kernel_pmap, sec->addr, round_page(sec->size), FALSE, TRUE); 1077 } else { 1078 pmap_mark_range(kernel_pmap, sec->addr, round_page(sec->size), TRUE, FALSE); 1079 } 1080 } 1081 } else { 1082 pmap_mark_range(kernel_pmap, seg->vmaddr, round_page_64(seg->vmsize), TRUE, FALSE); 1083 } 1084 } 1085 1086 /* 1087 * If we're debugging, map the low global vector page at the fixed 1088 * virtual address. Otherwise, remove the mapping for this. 1089 */ 1090 if (debug_boot_arg) { 1091 pt_entry_t *pte = NULL; 1092 if (0 == (pte = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS))) 1093 panic("lowmem pte"); 1094 /* make sure it is defined on page boundary */ 1095 assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK)); 1096 pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo) 1097 | INTEL_PTE_REF 1098 | INTEL_PTE_MOD 1099 | INTEL_PTE_WIRED 1100 | INTEL_PTE_VALID 1101 | INTEL_PTE_WRITE 1102 | INTEL_PTE_NX); 1103 } else { 1104 pmap_remove(kernel_pmap, 1105 LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE); 1106 } 1107 1108 splx(spl); 1109 if (pmap_pcid_ncpus) 1110 tlb_flush_global(); 1111 else 1112 flush_tlb_raw(); 1113} 1114 1115/* 1116 * this function is only used for debugging fron the vm layer 1117 */ 1118boolean_t 1119pmap_verify_free( 1120 ppnum_t pn) 1121{ 1122 pv_rooted_entry_t pv_h; 1123 int pai; 1124 boolean_t result; 1125 1126 assert(pn != vm_page_fictitious_addr); 1127 1128 if (!pmap_initialized) 1129 return(TRUE); 1130 1131 if (pn == vm_page_guard_addr) 1132 return TRUE; 1133 1134 pai = ppn_to_pai(pn); 1135 if (!IS_MANAGED_PAGE(pai)) 1136 return(FALSE); 1137 pv_h = pai_to_pvh(pn); 1138 result = (pv_h->pmap == PMAP_NULL); 1139 return(result); 1140} 1141 1142boolean_t 1143pmap_is_empty( 1144 pmap_t pmap, 1145 vm_map_offset_t va_start, 1146 vm_map_offset_t va_end) 1147{ 1148 vm_map_offset_t offset; 1149 ppnum_t phys_page; 1150 1151 if (pmap == PMAP_NULL) { 1152 return TRUE; 1153 } 1154 1155 /* 1156 * Check the resident page count 1157 * - if it's zero, the pmap is completely empty. 1158 * This short-circuit test prevents a virtual address scan which is 1159 * painfully slow for 64-bit spaces. 1160 * This assumes the count is correct 1161 * .. the debug kernel ought to be checking perhaps by page table walk. 1162 */ 1163 if (pmap->stats.resident_count == 0) 1164 return TRUE; 1165 1166 for (offset = va_start; 1167 offset < va_end; 1168 offset += PAGE_SIZE_64) { 1169 phys_page = pmap_find_phys(pmap, offset); 1170 if (phys_page) { 1171 kprintf("pmap_is_empty(%p,0x%llx,0x%llx): " 1172 "page %d at 0x%llx\n", 1173 pmap, va_start, va_end, phys_page, offset); 1174 return FALSE; 1175 } 1176 } 1177 1178 return TRUE; 1179} 1180 1181 1182/* 1183 * Create and return a physical map. 1184 * 1185 * If the size specified for the map 1186 * is zero, the map is an actual physical 1187 * map, and may be referenced by the 1188 * hardware. 1189 * 1190 * If the size specified is non-zero, 1191 * the map will be used in software only, and 1192 * is bounded by that size. 1193 */ 1194pmap_t 1195pmap_create( 1196 ledger_t ledger, 1197 vm_map_size_t sz, 1198 boolean_t is_64bit) 1199{ 1200 pmap_t p; 1201 vm_size_t size; 1202 pml4_entry_t *pml4; 1203 pml4_entry_t *kpml4; 1204 1205 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, 1206 (uint32_t) (sz>>32), (uint32_t) sz, is_64bit, 0, 0); 1207 1208 size = (vm_size_t) sz; 1209 1210 /* 1211 * A software use-only map doesn't even need a map. 1212 */ 1213 1214 if (size != 0) { 1215 return(PMAP_NULL); 1216 } 1217 1218 p = (pmap_t) zalloc(pmap_zone); 1219 if (PMAP_NULL == p) 1220 panic("pmap_create zalloc"); 1221 /* Zero all fields */ 1222 bzero(p, sizeof(*p)); 1223 /* init counts now since we'll be bumping some */ 1224 simple_lock_init(&p->lock, 0); 1225#if 00 1226 p->stats.resident_count = 0; 1227 p->stats.resident_max = 0; 1228 p->stats.wired_count = 0; 1229#else 1230 bzero(&p->stats, sizeof (p->stats)); 1231#endif 1232 p->ref_count = 1; 1233 p->nx_enabled = 1; 1234 p->pm_shared = FALSE; 1235 ledger_reference(ledger); 1236 p->ledger = ledger; 1237 1238 p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;; 1239 if (pmap_pcid_ncpus) 1240 pmap_pcid_initialize(p); 1241 1242 p->pm_pml4 = zalloc(pmap_anchor_zone); 1243 1244 pmap_assert((((uintptr_t)p->pm_pml4) & PAGE_MASK) == 0); 1245 1246 memset((char *)p->pm_pml4, 0, PAGE_SIZE); 1247 1248 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_pml4); 1249 1250 /* allocate the vm_objs to hold the pdpt, pde and pte pages */ 1251 1252 p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS) * PAGE_SIZE); 1253 if (NULL == p->pm_obj_pml4) 1254 panic("pmap_create pdpt obj"); 1255 1256 p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS) * PAGE_SIZE); 1257 if (NULL == p->pm_obj_pdpt) 1258 panic("pmap_create pdpt obj"); 1259 1260 p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS) * PAGE_SIZE); 1261 if (NULL == p->pm_obj) 1262 panic("pmap_create pte obj"); 1263 1264 /* All pmaps share the kernel's pml4 */ 1265 pml4 = pmap64_pml4(p, 0ULL); 1266 kpml4 = kernel_pmap->pm_pml4; 1267 pml4[KERNEL_PML4_INDEX] = kpml4[KERNEL_PML4_INDEX]; 1268 pml4[KERNEL_KEXTS_INDEX] = kpml4[KERNEL_KEXTS_INDEX]; 1269 pml4[KERNEL_PHYSMAP_PML4_INDEX] = kpml4[KERNEL_PHYSMAP_PML4_INDEX]; 1270 1271 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, 1272 p, is_64bit, 0, 0, 0); 1273 1274 return(p); 1275} 1276 1277/* 1278 * Retire the given physical map from service. 1279 * Should only be called if the map contains 1280 * no valid mappings. 1281 */ 1282 1283void 1284pmap_destroy(pmap_t p) 1285{ 1286 int c; 1287 1288 if (p == PMAP_NULL) 1289 return; 1290 1291 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, 1292 p, 0, 0, 0, 0); 1293 1294 PMAP_LOCK(p); 1295 1296 c = --p->ref_count; 1297 1298 pmap_assert((current_thread() && (current_thread()->map)) ? (current_thread()->map->pmap != p) : TRUE); 1299 1300 if (c == 0) { 1301 /* 1302 * If some cpu is not using the physical pmap pointer that it 1303 * is supposed to be (see set_dirbase), we might be using the 1304 * pmap that is being destroyed! Make sure we are 1305 * physically on the right pmap: 1306 */ 1307 PMAP_UPDATE_TLBS(p, 0x0ULL, 0xFFFFFFFFFFFFF000ULL); 1308 if (pmap_pcid_ncpus) 1309 pmap_destroy_pcid_sync(p); 1310 } 1311 1312 PMAP_UNLOCK(p); 1313 1314 if (c != 0) { 1315 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END, 1316 p, 1, 0, 0, 0); 1317 pmap_assert(p == kernel_pmap); 1318 return; /* still in use */ 1319 } 1320 1321 /* 1322 * Free the memory maps, then the 1323 * pmap structure. 1324 */ 1325 int inuse_ptepages = 0; 1326 1327 zfree(pmap_anchor_zone, p->pm_pml4); 1328 1329 inuse_ptepages += p->pm_obj_pml4->resident_page_count; 1330 vm_object_deallocate(p->pm_obj_pml4); 1331 1332 inuse_ptepages += p->pm_obj_pdpt->resident_page_count; 1333 vm_object_deallocate(p->pm_obj_pdpt); 1334 1335 inuse_ptepages += p->pm_obj->resident_page_count; 1336 vm_object_deallocate(p->pm_obj); 1337 1338 OSAddAtomic(-inuse_ptepages, &inuse_ptepages_count); 1339 PMAP_ZINFO_PFREE(p, inuse_ptepages * PAGE_SIZE); 1340 ledger_dereference(p->ledger); 1341 zfree(pmap_zone, p); 1342 1343 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END, 1344 0, 0, 0, 0, 0); 1345} 1346 1347/* 1348 * Add a reference to the specified pmap. 1349 */ 1350 1351void 1352pmap_reference(pmap_t p) 1353{ 1354 if (p != PMAP_NULL) { 1355 PMAP_LOCK(p); 1356 p->ref_count++; 1357 PMAP_UNLOCK(p);; 1358 } 1359} 1360 1361/* 1362 * Remove phys addr if mapped in specified map 1363 * 1364 */ 1365void 1366pmap_remove_some_phys( 1367 __unused pmap_t map, 1368 __unused ppnum_t pn) 1369{ 1370 1371/* Implement to support working set code */ 1372 1373} 1374 1375 1376void 1377pmap_protect( 1378 pmap_t map, 1379 vm_map_offset_t sva, 1380 vm_map_offset_t eva, 1381 vm_prot_t prot) 1382{ 1383 pmap_protect_options(map, sva, eva, prot, 0, NULL); 1384} 1385 1386 1387/* 1388 * Set the physical protection on the 1389 * specified range of this map as requested. 1390 * Will not increase permissions. 1391 */ 1392void 1393pmap_protect_options( 1394 pmap_t map, 1395 vm_map_offset_t sva, 1396 vm_map_offset_t eva, 1397 vm_prot_t prot, 1398 unsigned int options, 1399 void *arg) 1400{ 1401 pt_entry_t *pde; 1402 pt_entry_t *spte, *epte; 1403 vm_map_offset_t lva; 1404 vm_map_offset_t orig_sva; 1405 boolean_t set_NX; 1406 int num_found = 0; 1407 1408 pmap_intr_assert(); 1409 1410 if (map == PMAP_NULL) 1411 return; 1412 1413 if (prot == VM_PROT_NONE) { 1414 pmap_remove_options(map, sva, eva, options); 1415 return; 1416 } 1417 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START, 1418 map, 1419 (uint32_t) (sva >> 32), (uint32_t) sva, 1420 (uint32_t) (eva >> 32), (uint32_t) eva); 1421 1422 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled) 1423 set_NX = FALSE; 1424 else 1425 set_NX = TRUE; 1426 1427 PMAP_LOCK(map); 1428 1429 orig_sva = sva; 1430 while (sva < eva) { 1431 lva = (sva + pde_mapped_size) & ~(pde_mapped_size - 1); 1432 if (lva > eva) 1433 lva = eva; 1434 pde = pmap_pde(map, sva); 1435 if (pde && (*pde & INTEL_PTE_VALID)) { 1436 if (*pde & INTEL_PTE_PS) { 1437 /* superpage */ 1438 spte = pde; 1439 epte = spte+1; /* excluded */ 1440 } else { 1441 spte = pmap_pte(map, (sva & ~(pde_mapped_size - 1))); 1442 spte = &spte[ptenum(sva)]; 1443 epte = &spte[intel_btop(lva - sva)]; 1444 } 1445 1446 for (; spte < epte; spte++) { 1447 if (!(*spte & INTEL_PTE_VALID)) 1448 continue; 1449 1450 if (prot & VM_PROT_WRITE) 1451 pmap_update_pte(spte, 0, INTEL_PTE_WRITE); 1452 else 1453 pmap_update_pte(spte, INTEL_PTE_WRITE, 0); 1454 1455 if (set_NX) 1456 pmap_update_pte(spte, 0, INTEL_PTE_NX); 1457 else 1458 pmap_update_pte(spte, INTEL_PTE_NX, 0); 1459 num_found++; 1460 } 1461 } 1462 sva = lva; 1463 } 1464 if (num_found) { 1465 if (options & PMAP_OPTIONS_NOFLUSH) 1466 PMAP_UPDATE_TLBS_DELAYED(map, orig_sva, eva, (pmap_flush_context *)arg); 1467 else 1468 PMAP_UPDATE_TLBS(map, orig_sva, eva); 1469 } 1470 PMAP_UNLOCK(map); 1471 1472 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END, 1473 0, 0, 0, 0, 0); 1474 1475} 1476 1477/* Map a (possibly) autogenned block */ 1478void 1479pmap_map_block( 1480 pmap_t pmap, 1481 addr64_t va, 1482 ppnum_t pa, 1483 uint32_t size, 1484 vm_prot_t prot, 1485 int attr, 1486 __unused unsigned int flags) 1487{ 1488 uint32_t page; 1489 int cur_page_size; 1490 1491 if (attr & VM_MEM_SUPERPAGE) 1492 cur_page_size = SUPERPAGE_SIZE; 1493 else 1494 cur_page_size = PAGE_SIZE; 1495 1496 for (page = 0; page < size; page+=cur_page_size/PAGE_SIZE) { 1497 pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE); 1498 va += cur_page_size; 1499 pa+=cur_page_size/PAGE_SIZE; 1500 } 1501} 1502 1503kern_return_t 1504pmap_expand_pml4( 1505 pmap_t map, 1506 vm_map_offset_t vaddr, 1507 unsigned int options) 1508{ 1509 vm_page_t m; 1510 pmap_paddr_t pa; 1511 uint64_t i; 1512 ppnum_t pn; 1513 pml4_entry_t *pml4p; 1514 1515 DBG("pmap_expand_pml4(%p,%p)\n", map, (void *)vaddr); 1516 1517 /* 1518 * Allocate a VM page for the pml4 page 1519 */ 1520 while ((m = vm_page_grab()) == VM_PAGE_NULL) { 1521 if (options & PMAP_EXPAND_OPTIONS_NOWAIT) 1522 return KERN_RESOURCE_SHORTAGE; 1523 VM_PAGE_WAIT(); 1524 } 1525 /* 1526 * put the page into the pmap's obj list so it 1527 * can be found later. 1528 */ 1529 pn = m->phys_page; 1530 pa = i386_ptob(pn); 1531 i = pml4idx(map, vaddr); 1532 1533 /* 1534 * Zero the page. 1535 */ 1536 pmap_zero_page(pn); 1537 1538 vm_page_lockspin_queues(); 1539 vm_page_wire(m); 1540 vm_page_unlock_queues(); 1541 1542 OSAddAtomic(1, &inuse_ptepages_count); 1543 OSAddAtomic64(1, &alloc_ptepages_count); 1544 PMAP_ZINFO_PALLOC(map, PAGE_SIZE); 1545 1546 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ 1547 vm_object_lock(map->pm_obj_pml4); 1548 1549 PMAP_LOCK(map); 1550 /* 1551 * See if someone else expanded us first 1552 */ 1553 if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) { 1554 PMAP_UNLOCK(map); 1555 vm_object_unlock(map->pm_obj_pml4); 1556 1557 VM_PAGE_FREE(m); 1558 1559 OSAddAtomic(-1, &inuse_ptepages_count); 1560 PMAP_ZINFO_PFREE(map, PAGE_SIZE); 1561 return KERN_SUCCESS; 1562 } 1563 1564#if 0 /* DEBUG */ 1565 if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i * PAGE_SIZE)) { 1566 panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", 1567 map, map->pm_obj_pml4, vaddr, i); 1568 } 1569#endif 1570 vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i * PAGE_SIZE); 1571 vm_object_unlock(map->pm_obj_pml4); 1572 1573 /* 1574 * Set the page directory entry for this page table. 1575 */ 1576 pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */ 1577 1578 pmap_store_pte(pml4p, pa_to_pte(pa) 1579 | INTEL_PTE_VALID 1580 | INTEL_PTE_USER 1581 | INTEL_PTE_WRITE); 1582 1583 PMAP_UNLOCK(map); 1584 1585 return KERN_SUCCESS; 1586} 1587 1588kern_return_t 1589pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, unsigned int options) 1590{ 1591 vm_page_t m; 1592 pmap_paddr_t pa; 1593 uint64_t i; 1594 ppnum_t pn; 1595 pdpt_entry_t *pdptp; 1596 1597 DBG("pmap_expand_pdpt(%p,%p)\n", map, (void *)vaddr); 1598 1599 while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) { 1600 kern_return_t pep4kr = pmap_expand_pml4(map, vaddr, options); 1601 if (pep4kr != KERN_SUCCESS) 1602 return pep4kr; 1603 } 1604 1605 /* 1606 * Allocate a VM page for the pdpt page 1607 */ 1608 while ((m = vm_page_grab()) == VM_PAGE_NULL) { 1609 if (options & PMAP_EXPAND_OPTIONS_NOWAIT) 1610 return KERN_RESOURCE_SHORTAGE; 1611 VM_PAGE_WAIT(); 1612 } 1613 1614 /* 1615 * put the page into the pmap's obj list so it 1616 * can be found later. 1617 */ 1618 pn = m->phys_page; 1619 pa = i386_ptob(pn); 1620 i = pdptidx(map, vaddr); 1621 1622 /* 1623 * Zero the page. 1624 */ 1625 pmap_zero_page(pn); 1626 1627 vm_page_lockspin_queues(); 1628 vm_page_wire(m); 1629 vm_page_unlock_queues(); 1630 1631 OSAddAtomic(1, &inuse_ptepages_count); 1632 OSAddAtomic64(1, &alloc_ptepages_count); 1633 PMAP_ZINFO_PALLOC(map, PAGE_SIZE); 1634 1635 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ 1636 vm_object_lock(map->pm_obj_pdpt); 1637 1638 PMAP_LOCK(map); 1639 /* 1640 * See if someone else expanded us first 1641 */ 1642 if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) { 1643 PMAP_UNLOCK(map); 1644 vm_object_unlock(map->pm_obj_pdpt); 1645 1646 VM_PAGE_FREE(m); 1647 1648 OSAddAtomic(-1, &inuse_ptepages_count); 1649 PMAP_ZINFO_PFREE(map, PAGE_SIZE); 1650 return KERN_SUCCESS; 1651 } 1652 1653#if 0 /* DEBUG */ 1654 if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i * PAGE_SIZE)) { 1655 panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", 1656 map, map->pm_obj_pdpt, vaddr, i); 1657 } 1658#endif 1659 vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i * PAGE_SIZE); 1660 vm_object_unlock(map->pm_obj_pdpt); 1661 1662 /* 1663 * Set the page directory entry for this page table. 1664 */ 1665 pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */ 1666 1667 pmap_store_pte(pdptp, pa_to_pte(pa) 1668 | INTEL_PTE_VALID 1669 | INTEL_PTE_USER 1670 | INTEL_PTE_WRITE); 1671 1672 PMAP_UNLOCK(map); 1673 1674 return KERN_SUCCESS; 1675 1676} 1677 1678 1679 1680/* 1681 * Routine: pmap_expand 1682 * 1683 * Expands a pmap to be able to map the specified virtual address. 1684 * 1685 * Allocates new virtual memory for the P0 or P1 portion of the 1686 * pmap, then re-maps the physical pages that were in the old 1687 * pmap to be in the new pmap. 1688 * 1689 * Must be called with the pmap system and the pmap unlocked, 1690 * since these must be unlocked to use vm_allocate or vm_deallocate. 1691 * Thus it must be called in a loop that checks whether the map 1692 * has been expanded enough. 1693 * (We won't loop forever, since page tables aren't shrunk.) 1694 */ 1695kern_return_t 1696pmap_expand( 1697 pmap_t map, 1698 vm_map_offset_t vaddr, 1699 unsigned int options) 1700{ 1701 pt_entry_t *pdp; 1702 register vm_page_t m; 1703 register pmap_paddr_t pa; 1704 uint64_t i; 1705 ppnum_t pn; 1706 1707 1708 /* 1709 * For the kernel, the virtual address must be in or above the basement 1710 * which is for kexts and is in the 512GB immediately below the kernel.. 1711 * XXX - should use VM_MIN_KERNEL_AND_KEXT_ADDRESS not KERNEL_BASEMENT 1712 */ 1713 if (map == kernel_pmap && 1714 !(vaddr >= KERNEL_BASEMENT && vaddr <= VM_MAX_KERNEL_ADDRESS)) 1715 panic("pmap_expand: bad vaddr 0x%llx for kernel pmap", vaddr); 1716 1717 1718 while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) { 1719 kern_return_t pepkr = pmap_expand_pdpt(map, vaddr, options); 1720 if (pepkr != KERN_SUCCESS) 1721 return pepkr; 1722 } 1723 1724 /* 1725 * Allocate a VM page for the pde entries. 1726 */ 1727 while ((m = vm_page_grab()) == VM_PAGE_NULL) { 1728 if (options & PMAP_EXPAND_OPTIONS_NOWAIT) 1729 return KERN_RESOURCE_SHORTAGE; 1730 VM_PAGE_WAIT(); 1731 } 1732 1733 /* 1734 * put the page into the pmap's obj list so it 1735 * can be found later. 1736 */ 1737 pn = m->phys_page; 1738 pa = i386_ptob(pn); 1739 i = pdeidx(map, vaddr); 1740 1741 /* 1742 * Zero the page. 1743 */ 1744 pmap_zero_page(pn); 1745 1746 vm_page_lockspin_queues(); 1747 vm_page_wire(m); 1748 vm_page_unlock_queues(); 1749 1750 OSAddAtomic(1, &inuse_ptepages_count); 1751 OSAddAtomic64(1, &alloc_ptepages_count); 1752 PMAP_ZINFO_PALLOC(map, PAGE_SIZE); 1753 1754 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ 1755 vm_object_lock(map->pm_obj); 1756 1757 PMAP_LOCK(map); 1758 1759 /* 1760 * See if someone else expanded us first 1761 */ 1762 if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) { 1763 PMAP_UNLOCK(map); 1764 vm_object_unlock(map->pm_obj); 1765 1766 VM_PAGE_FREE(m); 1767 1768 OSAddAtomic(-1, &inuse_ptepages_count); 1769 PMAP_ZINFO_PFREE(map, PAGE_SIZE); 1770 return KERN_SUCCESS; 1771 } 1772 1773#if 0 /* DEBUG */ 1774 if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i * PAGE_SIZE)) { 1775 panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n", 1776 map, map->pm_obj, vaddr, i); 1777 } 1778#endif 1779 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i * PAGE_SIZE); 1780 vm_object_unlock(map->pm_obj); 1781 1782 /* 1783 * Set the page directory entry for this page table. 1784 */ 1785 pdp = pmap_pde(map, vaddr); 1786 pmap_store_pte(pdp, pa_to_pte(pa) 1787 | INTEL_PTE_VALID 1788 | INTEL_PTE_USER 1789 | INTEL_PTE_WRITE); 1790 1791 PMAP_UNLOCK(map); 1792 1793 return KERN_SUCCESS; 1794} 1795 1796/* On K64 machines with more than 32GB of memory, pmap_steal_memory 1797 * will allocate past the 1GB of pre-expanded virtual kernel area. This 1798 * function allocates all the page tables using memory from the same pool 1799 * that pmap_steal_memory uses, rather than calling vm_page_grab (which 1800 * isn't available yet). */ 1801void 1802pmap_pre_expand(pmap_t pmap, vm_map_offset_t vaddr) 1803{ 1804 ppnum_t pn; 1805 pt_entry_t *pte; 1806 1807 PMAP_LOCK(pmap); 1808 1809 if(pmap64_pdpt(pmap, vaddr) == PDPT_ENTRY_NULL) { 1810 if (!pmap_next_page_hi(&pn)) 1811 panic("pmap_pre_expand"); 1812 1813 pmap_zero_page(pn); 1814 1815 pte = pmap64_pml4(pmap, vaddr); 1816 1817 pmap_store_pte(pte, pa_to_pte(i386_ptob(pn)) 1818 | INTEL_PTE_VALID 1819 | INTEL_PTE_USER 1820 | INTEL_PTE_WRITE); 1821 } 1822 1823 if(pmap64_pde(pmap, vaddr) == PD_ENTRY_NULL) { 1824 if (!pmap_next_page_hi(&pn)) 1825 panic("pmap_pre_expand"); 1826 1827 pmap_zero_page(pn); 1828 1829 pte = pmap64_pdpt(pmap, vaddr); 1830 1831 pmap_store_pte(pte, pa_to_pte(i386_ptob(pn)) 1832 | INTEL_PTE_VALID 1833 | INTEL_PTE_USER 1834 | INTEL_PTE_WRITE); 1835 } 1836 1837 if(pmap_pte(pmap, vaddr) == PT_ENTRY_NULL) { 1838 if (!pmap_next_page_hi(&pn)) 1839 panic("pmap_pre_expand"); 1840 1841 pmap_zero_page(pn); 1842 1843 pte = pmap64_pde(pmap, vaddr); 1844 1845 pmap_store_pte(pte, pa_to_pte(i386_ptob(pn)) 1846 | INTEL_PTE_VALID 1847 | INTEL_PTE_USER 1848 | INTEL_PTE_WRITE); 1849 } 1850 1851 PMAP_UNLOCK(pmap); 1852} 1853 1854/* 1855 * pmap_sync_page_data_phys(ppnum_t pa) 1856 * 1857 * Invalidates all of the instruction cache on a physical page and 1858 * pushes any dirty data from the data cache for the same physical page 1859 * Not required in i386. 1860 */ 1861void 1862pmap_sync_page_data_phys(__unused ppnum_t pa) 1863{ 1864 return; 1865} 1866 1867/* 1868 * pmap_sync_page_attributes_phys(ppnum_t pa) 1869 * 1870 * Write back and invalidate all cachelines on a physical page. 1871 */ 1872void 1873pmap_sync_page_attributes_phys(ppnum_t pa) 1874{ 1875 cache_flush_page_phys(pa); 1876} 1877 1878 1879 1880#ifdef CURRENTLY_UNUSED_AND_UNTESTED 1881 1882int collect_ref; 1883int collect_unref; 1884 1885/* 1886 * Routine: pmap_collect 1887 * Function: 1888 * Garbage collects the physical map system for 1889 * pages which are no longer used. 1890 * Success need not be guaranteed -- that is, there 1891 * may well be pages which are not referenced, but 1892 * others may be collected. 1893 * Usage: 1894 * Called by the pageout daemon when pages are scarce. 1895 */ 1896void 1897pmap_collect( 1898 pmap_t p) 1899{ 1900 register pt_entry_t *pdp, *ptp; 1901 pt_entry_t *eptp; 1902 int wired; 1903 1904 if (p == PMAP_NULL) 1905 return; 1906 1907 if (p == kernel_pmap) 1908 return; 1909 1910 /* 1911 * Garbage collect map. 1912 */ 1913 PMAP_LOCK(p); 1914 1915 for (pdp = (pt_entry_t *)p->dirbase; 1916 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]; 1917 pdp++) 1918 { 1919 if (*pdp & INTEL_PTE_VALID) { 1920 if(*pdp & INTEL_PTE_REF) { 1921 pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF); 1922 collect_ref++; 1923 } else { 1924 collect_unref++; 1925 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase)); 1926 eptp = ptp + NPTEPG; 1927 1928 /* 1929 * If the pte page has any wired mappings, we cannot 1930 * free it. 1931 */ 1932 wired = 0; 1933 { 1934 register pt_entry_t *ptep; 1935 for (ptep = ptp; ptep < eptp; ptep++) { 1936 if (iswired(*ptep)) { 1937 wired = 1; 1938 break; 1939 } 1940 } 1941 } 1942 if (!wired) { 1943 /* 1944 * Remove the virtual addresses mapped by this pte page. 1945 */ 1946 pmap_remove_range(p, 1947 pdetova(pdp - (pt_entry_t *)p->dirbase), 1948 ptp, 1949 eptp); 1950 1951 /* 1952 * Invalidate the page directory pointer. 1953 */ 1954 pmap_store_pte(pdp, 0x0); 1955 1956 PMAP_UNLOCK(p); 1957 1958 /* 1959 * And free the pte page itself. 1960 */ 1961 { 1962 register vm_page_t m; 1963 1964 vm_object_lock(p->pm_obj); 1965 1966 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0]) * PAGE_SIZE); 1967 if (m == VM_PAGE_NULL) 1968 panic("pmap_collect: pte page not in object"); 1969 1970 vm_object_unlock(p->pm_obj); 1971 1972 VM_PAGE_FREE(m); 1973 1974 OSAddAtomic(-1, &inuse_ptepages_count); 1975 PMAP_ZINFO_PFREE(p, PAGE_SIZE); 1976 } 1977 1978 PMAP_LOCK(p); 1979 } 1980 } 1981 } 1982 } 1983 1984 PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL); 1985 PMAP_UNLOCK(p); 1986 return; 1987 1988} 1989#endif 1990 1991 1992void 1993pmap_copy_page(ppnum_t src, ppnum_t dst) 1994{ 1995 bcopy_phys((addr64_t)i386_ptob(src), 1996 (addr64_t)i386_ptob(dst), 1997 PAGE_SIZE); 1998} 1999 2000 2001/* 2002 * Routine: pmap_pageable 2003 * Function: 2004 * Make the specified pages (by pmap, offset) 2005 * pageable (or not) as requested. 2006 * 2007 * A page which is not pageable may not take 2008 * a fault; therefore, its page table entry 2009 * must remain valid for the duration. 2010 * 2011 * This routine is merely advisory; pmap_enter 2012 * will specify that these pages are to be wired 2013 * down (or not) as appropriate. 2014 */ 2015void 2016pmap_pageable( 2017 __unused pmap_t pmap, 2018 __unused vm_map_offset_t start_addr, 2019 __unused vm_map_offset_t end_addr, 2020 __unused boolean_t pageable) 2021{ 2022#ifdef lint 2023 pmap++; start_addr++; end_addr++; pageable++; 2024#endif /* lint */ 2025} 2026 2027void 2028invalidate_icache(__unused vm_offset_t addr, 2029 __unused unsigned cnt, 2030 __unused int phys) 2031{ 2032 return; 2033} 2034 2035void 2036flush_dcache(__unused vm_offset_t addr, 2037 __unused unsigned count, 2038 __unused int phys) 2039{ 2040 return; 2041} 2042 2043#if CONFIG_DTRACE 2044/* 2045 * Constrain DTrace copyin/copyout actions 2046 */ 2047extern kern_return_t dtrace_copyio_preflight(addr64_t); 2048extern kern_return_t dtrace_copyio_postflight(addr64_t); 2049 2050kern_return_t dtrace_copyio_preflight(__unused addr64_t va) 2051{ 2052 thread_t thread = current_thread(); 2053 uint64_t ccr3; 2054 if (current_map() == kernel_map) 2055 return KERN_FAILURE; 2056 else if (((ccr3 = get_cr3_base()) != thread->map->pmap->pm_cr3) && (no_shared_cr3 == FALSE)) 2057 return KERN_FAILURE; 2058 else if (no_shared_cr3 && (ccr3 != kernel_pmap->pm_cr3)) 2059 return KERN_FAILURE; 2060 else 2061 return KERN_SUCCESS; 2062} 2063 2064kern_return_t dtrace_copyio_postflight(__unused addr64_t va) 2065{ 2066 return KERN_SUCCESS; 2067} 2068#endif /* CONFIG_DTRACE */ 2069 2070#include <mach_vm_debug.h> 2071#if MACH_VM_DEBUG 2072#include <vm/vm_debug.h> 2073 2074int 2075pmap_list_resident_pages( 2076 __unused pmap_t pmap, 2077 __unused vm_offset_t *listp, 2078 __unused int space) 2079{ 2080 return 0; 2081} 2082#endif /* MACH_VM_DEBUG */ 2083 2084 2085 2086/* temporary workaround */ 2087boolean_t 2088coredumpok(__unused vm_map_t map, __unused vm_offset_t va) 2089{ 2090#if 0 2091 pt_entry_t *ptep; 2092 2093 ptep = pmap_pte(map->pmap, va); 2094 if (0 == ptep) 2095 return FALSE; 2096 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)); 2097#else 2098 return TRUE; 2099#endif 2100} 2101 2102 2103boolean_t 2104phys_page_exists(ppnum_t pn) 2105{ 2106 assert(pn != vm_page_fictitious_addr); 2107 2108 if (!pmap_initialized) 2109 return TRUE; 2110 2111 if (pn == vm_page_guard_addr) 2112 return FALSE; 2113 2114 if (!IS_MANAGED_PAGE(ppn_to_pai(pn))) 2115 return FALSE; 2116 2117 return TRUE; 2118} 2119 2120 2121 2122void 2123pmap_switch(pmap_t tpmap) 2124{ 2125 spl_t s; 2126 2127 s = splhigh(); /* Make sure interruptions are disabled */ 2128 set_dirbase(tpmap, current_thread()); 2129 splx(s); 2130} 2131 2132 2133/* 2134 * disable no-execute capability on 2135 * the specified pmap 2136 */ 2137void 2138pmap_disable_NX(pmap_t pmap) 2139{ 2140 pmap->nx_enabled = 0; 2141} 2142 2143void 2144pt_fake_zone_init(int zone_index) 2145{ 2146 pt_fake_zone_index = zone_index; 2147} 2148 2149void 2150pt_fake_zone_info( 2151 int *count, 2152 vm_size_t *cur_size, 2153 vm_size_t *max_size, 2154 vm_size_t *elem_size, 2155 vm_size_t *alloc_size, 2156 uint64_t *sum_size, 2157 int *collectable, 2158 int *exhaustable, 2159 int *caller_acct) 2160{ 2161 *count = inuse_ptepages_count; 2162 *cur_size = PAGE_SIZE * inuse_ptepages_count; 2163 *max_size = PAGE_SIZE * (inuse_ptepages_count + 2164 vm_page_inactive_count + 2165 vm_page_active_count + 2166 vm_page_free_count); 2167 *elem_size = PAGE_SIZE; 2168 *alloc_size = PAGE_SIZE; 2169 *sum_size = alloc_ptepages_count * PAGE_SIZE; 2170 2171 *collectable = 1; 2172 *exhaustable = 0; 2173 *caller_acct = 1; 2174} 2175 2176static inline void 2177pmap_cpuset_NMIPI(cpu_set cpu_mask) { 2178 unsigned int cpu, cpu_bit; 2179 uint64_t deadline; 2180 2181 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 2182 if (cpu_mask & cpu_bit) 2183 cpu_NMI_interrupt(cpu); 2184 } 2185 deadline = mach_absolute_time() + (LockTimeOut); 2186 while (mach_absolute_time() < deadline) 2187 cpu_pause(); 2188} 2189 2190 2191void 2192pmap_flush_context_init(pmap_flush_context *pfc) 2193{ 2194 pfc->pfc_cpus = 0; 2195 pfc->pfc_invalid_global = 0; 2196} 2197 2198void 2199pmap_flush( 2200 pmap_flush_context *pfc) 2201{ 2202 unsigned int my_cpu; 2203 unsigned int cpu; 2204 unsigned int cpu_bit; 2205 cpu_set cpus_to_respond = 0; 2206 cpu_set cpus_to_signal = 0; 2207 cpu_set cpus_signaled = 0; 2208 boolean_t flush_self = FALSE; 2209 uint64_t deadline; 2210 2211 mp_disable_preemption(); 2212 2213 my_cpu = cpu_number(); 2214 cpus_to_signal = pfc->pfc_cpus; 2215 2216 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_DELAYED_TLBS) | DBG_FUNC_START, 2217 NULL, cpus_to_signal, 0, 0, 0); 2218 2219 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus && cpus_to_signal; cpu++, cpu_bit <<= 1) { 2220 2221 if (cpus_to_signal & cpu_bit) { 2222 2223 cpus_to_signal &= ~cpu_bit; 2224 2225 if (!cpu_datap(cpu)->cpu_running) 2226 continue; 2227 2228 if (pfc->pfc_invalid_global & cpu_bit) 2229 cpu_datap(cpu)->cpu_tlb_invalid_global = TRUE; 2230 else 2231 cpu_datap(cpu)->cpu_tlb_invalid_local = TRUE; 2232 mfence(); 2233 2234 if (cpu == my_cpu) { 2235 flush_self = TRUE; 2236 continue; 2237 } 2238 if (CPU_CR3_IS_ACTIVE(cpu)) { 2239 cpus_to_respond |= cpu_bit; 2240 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); 2241 } 2242 } 2243 } 2244 cpus_signaled = cpus_to_respond; 2245 2246 /* 2247 * Flush local tlb if required. 2248 * Do this now to overlap with other processors responding. 2249 */ 2250 if (flush_self && cpu_datap(my_cpu)->cpu_tlb_invalid != FALSE) 2251 process_pmap_updates(); 2252 2253 if (cpus_to_respond) { 2254 2255 deadline = mach_absolute_time() + LockTimeOut; 2256 /* 2257 * Wait for those other cpus to acknowledge 2258 */ 2259 while (cpus_to_respond != 0) { 2260 long orig_acks = 0; 2261 2262 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 2263 /* Consider checking local/global invalidity 2264 * as appropriate in the PCID case. 2265 */ 2266 if ((cpus_to_respond & cpu_bit) != 0) { 2267 if (!cpu_datap(cpu)->cpu_running || 2268 cpu_datap(cpu)->cpu_tlb_invalid == FALSE || 2269 !CPU_CR3_IS_ACTIVE(cpu)) { 2270 cpus_to_respond &= ~cpu_bit; 2271 } 2272 cpu_pause(); 2273 } 2274 if (cpus_to_respond == 0) 2275 break; 2276 } 2277 if (cpus_to_respond && (mach_absolute_time() > deadline)) { 2278 if (machine_timeout_suspended()) 2279 continue; 2280 pmap_tlb_flush_timeout = TRUE; 2281 orig_acks = NMIPI_acks; 2282 pmap_cpuset_NMIPI(cpus_to_respond); 2283 2284 panic("TLB invalidation IPI timeout: " 2285 "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", 2286 cpus_to_respond, orig_acks, NMIPI_acks); 2287 } 2288 } 2289 } 2290 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_DELAYED_TLBS) | DBG_FUNC_END, 2291 NULL, cpus_signaled, flush_self, 0, 0); 2292 2293 mp_enable_preemption(); 2294} 2295 2296 2297/* 2298 * Called with pmap locked, we: 2299 * - scan through per-cpu data to see which other cpus need to flush 2300 * - send an IPI to each non-idle cpu to be flushed 2301 * - wait for all to signal back that they are inactive or we see that 2302 * they are at a safe point (idle). 2303 * - flush the local tlb if active for this pmap 2304 * - return ... the caller will unlock the pmap 2305 */ 2306 2307void 2308pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv, int options, pmap_flush_context *pfc) 2309{ 2310 unsigned int cpu; 2311 unsigned int cpu_bit; 2312 cpu_set cpus_to_signal; 2313 unsigned int my_cpu = cpu_number(); 2314 pmap_paddr_t pmap_cr3 = pmap->pm_cr3; 2315 boolean_t flush_self = FALSE; 2316 uint64_t deadline; 2317 boolean_t pmap_is_shared = (pmap->pm_shared || (pmap == kernel_pmap)); 2318 boolean_t need_global_flush = FALSE; 2319 2320 assert((processor_avail_count < 2) || 2321 (ml_get_interrupts_enabled() && get_preemption_level() != 0)); 2322 2323 /* 2324 * Scan other cpus for matching active or task CR3. 2325 * For idle cpus (with no active map) we mark them invalid but 2326 * don't signal -- they'll check as they go busy. 2327 */ 2328 cpus_to_signal = 0; 2329 2330 if (pmap_pcid_ncpus) { 2331 if (pmap_is_shared) 2332 need_global_flush = TRUE; 2333 pmap_pcid_invalidate_all_cpus(pmap); 2334 mfence(); 2335 } 2336 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 2337 if (!cpu_datap(cpu)->cpu_running) 2338 continue; 2339 uint64_t cpu_active_cr3 = CPU_GET_ACTIVE_CR3(cpu); 2340 uint64_t cpu_task_cr3 = CPU_GET_TASK_CR3(cpu); 2341 2342 if ((pmap_cr3 == cpu_task_cr3) || 2343 (pmap_cr3 == cpu_active_cr3) || 2344 (pmap_is_shared)) { 2345 2346 if (options & PMAP_DELAY_TLB_FLUSH) { 2347 if (need_global_flush == TRUE) 2348 pfc->pfc_invalid_global |= cpu_bit; 2349 pfc->pfc_cpus |= cpu_bit; 2350 2351 continue; 2352 } 2353 if (cpu == my_cpu) { 2354 flush_self = TRUE; 2355 continue; 2356 } 2357 if (need_global_flush == TRUE) 2358 cpu_datap(cpu)->cpu_tlb_invalid_global = TRUE; 2359 else 2360 cpu_datap(cpu)->cpu_tlb_invalid_local = TRUE; 2361 mfence(); 2362 2363 /* 2364 * We don't need to signal processors which will flush 2365 * lazily at the idle state or kernel boundary. 2366 * For example, if we're invalidating the kernel pmap, 2367 * processors currently in userspace don't need to flush 2368 * their TLBs until the next time they enter the kernel. 2369 * Alterations to the address space of a task active 2370 * on a remote processor result in a signal, to 2371 * account for copy operations. (There may be room 2372 * for optimization in such cases). 2373 * The order of the loads below with respect 2374 * to the store to the "cpu_tlb_invalid" field above 2375 * is important--hence the barrier. 2376 */ 2377 if (CPU_CR3_IS_ACTIVE(cpu) && 2378 (pmap_cr3 == CPU_GET_ACTIVE_CR3(cpu) || 2379 pmap->pm_shared || 2380 (pmap_cr3 == CPU_GET_TASK_CR3(cpu)))) { 2381 cpus_to_signal |= cpu_bit; 2382 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); 2383 } 2384 } 2385 } 2386 if ((options & PMAP_DELAY_TLB_FLUSH)) 2387 return; 2388 2389 if (pmap == kernel_pmap) { 2390 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_KERN_TLBS) | DBG_FUNC_START, 2391 pmap, cpus_to_signal, flush_self, startv, endv); 2392 } else { 2393 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START, 2394 pmap, cpus_to_signal, flush_self, startv, endv); 2395 } 2396 /* 2397 * Flush local tlb if required. 2398 * Do this now to overlap with other processors responding. 2399 */ 2400 if (flush_self) { 2401 if (pmap_pcid_ncpus) { 2402 pmap_pcid_validate_cpu(pmap, my_cpu); 2403 if (pmap_is_shared) 2404 tlb_flush_global(); 2405 else 2406 flush_tlb_raw(); 2407 } 2408 else 2409 flush_tlb_raw(); 2410 } 2411 2412 if (cpus_to_signal) { 2413 cpu_set cpus_to_respond = cpus_to_signal; 2414 2415 deadline = mach_absolute_time() + LockTimeOut; 2416 /* 2417 * Wait for those other cpus to acknowledge 2418 */ 2419 while (cpus_to_respond != 0) { 2420 long orig_acks = 0; 2421 2422 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 2423 /* Consider checking local/global invalidity 2424 * as appropriate in the PCID case. 2425 */ 2426 if ((cpus_to_respond & cpu_bit) != 0) { 2427 if (!cpu_datap(cpu)->cpu_running || 2428 cpu_datap(cpu)->cpu_tlb_invalid == FALSE || 2429 !CPU_CR3_IS_ACTIVE(cpu)) { 2430 cpus_to_respond &= ~cpu_bit; 2431 } 2432 cpu_pause(); 2433 } 2434 if (cpus_to_respond == 0) 2435 break; 2436 } 2437 if (cpus_to_respond && (mach_absolute_time() > deadline)) { 2438 if (machine_timeout_suspended()) 2439 continue; 2440 pmap_tlb_flush_timeout = TRUE; 2441 orig_acks = NMIPI_acks; 2442 pmap_cpuset_NMIPI(cpus_to_respond); 2443 2444 panic("TLB invalidation IPI timeout: " 2445 "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", 2446 cpus_to_respond, orig_acks, NMIPI_acks); 2447 } 2448 } 2449 } 2450 2451 if (__improbable((pmap == kernel_pmap) && (flush_self != TRUE))) { 2452 panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, pmap_cr3: 0x%llx, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, pmap_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map); 2453 } 2454 2455 if (pmap == kernel_pmap) { 2456 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_KERN_TLBS) | DBG_FUNC_END, 2457 pmap, cpus_to_signal, startv, endv, 0); 2458 } else { 2459 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END, 2460 pmap, cpus_to_signal, startv, endv, 0); 2461 } 2462 2463} 2464 2465void 2466process_pmap_updates(void) 2467{ 2468 int ccpu = cpu_number(); 2469 pmap_assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 2470 if (pmap_pcid_ncpus) { 2471 pmap_pcid_validate_current(); 2472 if (cpu_datap(ccpu)->cpu_tlb_invalid_global) { 2473 cpu_datap(ccpu)->cpu_tlb_invalid = FALSE; 2474 tlb_flush_global(); 2475 } 2476 else { 2477 cpu_datap(ccpu)->cpu_tlb_invalid_local = FALSE; 2478 flush_tlb_raw(); 2479 } 2480 } 2481 else { 2482 current_cpu_datap()->cpu_tlb_invalid = FALSE; 2483 flush_tlb_raw(); 2484 } 2485 2486 mfence(); 2487} 2488 2489void 2490pmap_update_interrupt(void) 2491{ 2492 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START, 2493 0, 0, 0, 0, 0); 2494 2495 if (current_cpu_datap()->cpu_tlb_invalid) 2496 process_pmap_updates(); 2497 2498 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END, 2499 0, 0, 0, 0, 0); 2500} 2501 2502#include <mach/mach_vm.h> /* mach_vm_region_recurse() */ 2503/* Scan kernel pmap for W+X PTEs, scan kernel VM map for W+X map entries 2504 * and identify ranges with mismatched VM permissions and PTE permissions 2505 */ 2506kern_return_t 2507pmap_permissions_verify(pmap_t ipmap, vm_map_t ivmmap, vm_offset_t sv, vm_offset_t ev) { 2508 vm_offset_t cv = sv; 2509 kern_return_t rv = KERN_SUCCESS; 2510 uint64_t skip4 = 0, skip2 = 0; 2511 2512 sv &= ~PAGE_MASK_64; 2513 ev &= ~PAGE_MASK_64; 2514 while (cv < ev) { 2515 if (__improbable((cv > 0x00007FFFFFFFFFFFULL) && 2516 (cv < 0xFFFF800000000000ULL))) { 2517 cv = 0xFFFF800000000000ULL; 2518 } 2519 /* Potential inconsistencies from not holding pmap lock 2520 * but harmless for the moment. 2521 */ 2522 if (((cv & PML4MASK) == 0) && (pmap64_pml4(ipmap, cv) == 0)) { 2523 if ((cv + NBPML4) > cv) 2524 cv += NBPML4; 2525 else 2526 break; 2527 skip4++; 2528 continue; 2529 } 2530 if (((cv & PDMASK) == 0) && (pmap_pde(ipmap, cv) == 0)) { 2531 if ((cv + NBPD) > cv) 2532 cv += NBPD; 2533 else 2534 break; 2535 skip2++; 2536 continue; 2537 } 2538 2539 pt_entry_t *ptep = pmap_pte(ipmap, cv); 2540 if (ptep && (*ptep & INTEL_PTE_VALID)) { 2541 if (*ptep & INTEL_PTE_WRITE) { 2542 if (!(*ptep & INTEL_PTE_NX)) { 2543 kprintf("W+X PTE at 0x%lx, P4: 0x%llx, P3: 0x%llx, P2: 0x%llx, PT: 0x%llx, VP: %u\n", cv, *pmap64_pml4(ipmap, cv), *pmap64_pdpt(ipmap, cv), *pmap64_pde(ipmap, cv), *ptep, pmap_valid_page((ppnum_t)(i386_btop(pte_to_pa(*ptep))))); 2544 rv = KERN_FAILURE; 2545 } 2546 } 2547 } 2548 cv += PAGE_SIZE; 2549 } 2550 kprintf("Completed pmap scan\n"); 2551 cv = sv; 2552 2553 struct vm_region_submap_info_64 vbr; 2554 mach_msg_type_number_t vbrcount = 0; 2555 mach_vm_size_t vmsize; 2556 vm_prot_t prot; 2557 uint32_t nesting_depth = 0; 2558 kern_return_t kret; 2559 2560 while (cv < ev) { 2561 2562 for (;;) { 2563 vbrcount = VM_REGION_SUBMAP_INFO_COUNT_64; 2564 if((kret = mach_vm_region_recurse(ivmmap, 2565 (mach_vm_address_t *) &cv, &vmsize, &nesting_depth, 2566 (vm_region_recurse_info_t)&vbr, 2567 &vbrcount)) != KERN_SUCCESS) { 2568 break; 2569 } 2570 2571 if(vbr.is_submap) { 2572 nesting_depth++; 2573 continue; 2574 } else { 2575 break; 2576 } 2577 } 2578 2579 if(kret != KERN_SUCCESS) 2580 break; 2581 2582 prot = vbr.protection; 2583 2584 if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == (VM_PROT_WRITE | VM_PROT_EXECUTE)) { 2585 kprintf("W+X map entry at address 0x%lx\n", cv); 2586 rv = KERN_FAILURE; 2587 } 2588 2589 if (prot) { 2590 vm_offset_t pcv; 2591 for (pcv = cv; pcv < cv + vmsize; pcv += PAGE_SIZE) { 2592 pt_entry_t *ptep = pmap_pte(ipmap, pcv); 2593 vm_prot_t tprot; 2594 2595 if ((ptep == NULL) || !(*ptep & INTEL_PTE_VALID)) 2596 continue; 2597 tprot = VM_PROT_READ; 2598 if (*ptep & INTEL_PTE_WRITE) 2599 tprot |= VM_PROT_WRITE; 2600 if ((*ptep & INTEL_PTE_NX) == 0) 2601 tprot |= VM_PROT_EXECUTE; 2602 if (tprot != prot) { 2603 kprintf("PTE/map entry permissions mismatch at address 0x%lx, pte: 0x%llx, protection: 0x%x\n", pcv, *ptep, prot); 2604 rv = KERN_FAILURE; 2605 } 2606 } 2607 } 2608 cv += vmsize; 2609 } 2610 return rv; 2611} 2612