1/* 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * @OSF_COPYRIGHT@ 30 */ 31/* 32 * Mach Operating System 33 * Copyright (c) 1991,1990,1989,1988 Carnegie Mellon University 34 * All Rights Reserved. 35 * 36 * Permission to use, copy, modify and distribute this software and its 37 * documentation is hereby granted, provided that both the copyright 38 * notice and this permission notice appear in all copies of the 39 * software, derivative works or modified versions, and any portions 40 * thereof, and that both notices appear in supporting documentation. 41 * 42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 45 * 46 * Carnegie Mellon requests users of this software to return to 47 * 48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 49 * School of Computer Science 50 * Carnegie Mellon University 51 * Pittsburgh PA 15213-3890 52 * 53 * any improvements or extensions that they make and grant Carnegie Mellon 54 * the rights to redistribute these changes. 55 */ 56/* 57 */ 58 59/* 60 * File: pmap.c 61 * Author: Avadis Tevanian, Jr., Michael Wayne Young 62 * (These guys wrote the Vax version) 63 * 64 * Physical Map management code for Intel i386, i486, and i860. 65 * 66 * Manages physical address maps. 67 * 68 * In addition to hardware address maps, this 69 * module is called upon to provide software-use-only 70 * maps which may or may not be stored in the same 71 * form as hardware maps. These pseudo-maps are 72 * used to store intermediate results from copy 73 * operations to and from address spaces. 74 * 75 * Since the information managed by this module is 76 * also stored by the logical address mapping module, 77 * this module may throw away valid virtual-to-physical 78 * mappings at almost any time. However, invalidations 79 * of virtual-to-physical mappings must be done as 80 * requested. 81 * 82 * In order to cope with hardware architectures which 83 * make virtual-to-physical map invalidates expensive, 84 * this module may delay invalidate or reduced protection 85 * operations until such time as they are actually 86 * necessary. This module is given full information as 87 * to which processors are currently using which maps, 88 * and to when physical maps must be made correct. 89 */ 90 91#include <string.h> 92#include <mach_ldebug.h> 93 94#include <libkern/OSAtomic.h> 95 96#include <mach/machine/vm_types.h> 97 98#include <mach/boolean.h> 99#include <kern/thread.h> 100#include <kern/zalloc.h> 101#include <kern/queue.h> 102#include <kern/ledger.h> 103 104#include <kern/lock.h> 105#include <kern/kalloc.h> 106#include <kern/spl.h> 107 108#include <vm/pmap.h> 109#include <vm/vm_map.h> 110#include <vm/vm_kern.h> 111#include <mach/vm_param.h> 112#include <mach/vm_prot.h> 113#include <vm/vm_object.h> 114#include <vm/vm_page.h> 115 116#include <mach/machine/vm_param.h> 117#include <machine/thread.h> 118 119#include <kern/misc_protos.h> /* prototyping */ 120#include <i386/misc_protos.h> 121 122#include <i386/cpuid.h> 123#include <i386/cpu_data.h> 124#include <i386/cpu_number.h> 125#include <i386/machine_cpu.h> 126#include <i386/seg.h> 127#include <i386/serial_io.h> 128#include <i386/cpu_capabilities.h> 129#include <i386/machine_routines.h> 130#include <i386/proc_reg.h> 131#include <i386/tsc.h> 132#include <i386/acpi.h> 133#include <i386/pmap_internal.h> 134 135#include <vm/vm_protos.h> 136 137#include <i386/mp.h> 138#include <i386/mp_desc.h> 139#include <i386/i386_lowmem.h> 140#include <i386/lowglobals.h> 141 142 143/* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */ 144#ifdef DEBUGINTERRUPTS 145#define pmap_intr_assert() {if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) panic("pmap interrupt assert %s, %d",__FILE__, __LINE__);} 146#else 147#define pmap_intr_assert() 148#endif 149 150#ifdef IWANTTODEBUG 151#undef DEBUG 152#define DEBUG 1 153#define POSTCODE_DELAY 1 154#include <i386/postcode.h> 155#endif /* IWANTTODEBUG */ 156 157#ifdef PMAP_DEBUG 158void dump_pmap(pmap_t); 159void dump_4GB_pdpt(pmap_t p); 160void dump_4GB_pdpt_thread(thread_t tp); 161#endif 162 163int nx_enabled = 1; /* enable no-execute protection */ 164#ifdef CONFIG_EMBEDDED 165int allow_data_exec = 0; /* no exec from data, embedded is hardcore like that */ 166#else 167int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64-bit apps may not */ 168#endif 169int allow_stack_exec = 0; /* No apps may execute from the stack by default */ 170 171#if CONFIG_YONAH 172boolean_t cpu_64bit = FALSE; 173#else 174const boolean_t cpu_64bit = TRUE; 175#endif 176boolean_t pmap_trace = FALSE; 177 178uint64_t max_preemption_latency_tsc = 0; 179 180pv_hashed_entry_t *pv_hash_table; /* hash lists */ 181 182uint32_t npvhash = 0; 183 184/* 185 * pv_list entries are kept on a list that can only be accessed 186 * with the pmap system locked (at SPLVM, not in the cpus_active set). 187 * The list is refilled from the pv_hashed_list_zone if it becomes empty. 188 */ 189pv_rooted_entry_t pv_free_list = PV_ROOTED_ENTRY_NULL; /* free list at SPLVM */ 190pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL; 191pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL; 192decl_simple_lock_data(,pv_hashed_free_list_lock) 193decl_simple_lock_data(,pv_hashed_kern_free_list_lock) 194decl_simple_lock_data(,pv_hash_table_lock) 195 196zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */ 197 198static zone_t pdpt_zone; 199 200/* 201 * First and last physical addresses that we maintain any information 202 * for. Initialized to zero so that pmap operations done before 203 * pmap_init won't touch any non-existent structures. 204 */ 205boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */ 206 207static struct vm_object kptobj_object_store; 208static vm_object_t kptobj; 209 210/* 211 * Index into pv_head table, its lock bits, and the modify/reference and managed bits 212 */ 213 214/* 215 * Array of physical page attribites for managed pages. 216 * One byte per physical page. 217 */ 218char *pmap_phys_attributes; 219unsigned int last_managed_page = 0; 220 221uint64_t pde_mapped_size; 222 223const boolean_t pmap_disable_kheap_nx = TRUE; 224const boolean_t pmap_disable_kstack_nx = TRUE; 225 226 227#if USLOCK_DEBUG 228extern int max_lock_loops; 229#define LOOP_VAR \ 230 unsigned int loop_count; \ 231 loop_count = disable_serial_output ? max_lock_loops \ 232 : max_lock_loops*100 233#define LOOP_CHECK(msg, pmap) \ 234 if (--loop_count == 0) { \ 235 mp_disable_preemption(); \ 236 kprintf("%s: cpu %d pmap %x\n", \ 237 msg, cpu_number(), pmap); \ 238 Debugger("deadlock detection"); \ 239 mp_enable_preemption(); \ 240 loop_count = max_lock_loops; \ 241 } 242#else /* USLOCK_DEBUG */ 243#define LOOP_VAR 244#define LOOP_CHECK(msg, pmap) 245#endif /* USLOCK_DEBUG */ 246 247unsigned pmap_memory_region_count; 248unsigned pmap_memory_region_current; 249 250pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; 251 252/* 253 * Other useful macros. 254 */ 255#define current_pmap() (vm_map_pmap(current_thread()->map)) 256 257struct pmap kernel_pmap_store; 258pmap_t kernel_pmap; 259 260pd_entry_t high_shared_pde; 261pd_entry_t commpage64_pde; 262 263struct zone *pmap_zone; /* zone of pmap structures */ 264 265int pmap_debug = 0; /* flag for debugging prints */ 266 267unsigned int inuse_ptepages_count = 0; 268long long alloc_ptepages_count __attribute__((aligned(8))) = 0LL; /* aligned for atomic access */ 269unsigned int bootstrap_wired_pages = 0; 270int pt_fake_zone_index = -1; 271 272extern long NMIPI_acks; 273 274addr64_t kernel64_cr3; 275boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */ 276 277boolean_t kernel_text_ps_4K = TRUE; 278boolean_t wpkernel = TRUE; 279 280extern char end; 281static int nkpt; 282 283pt_entry_t *DMAP1, *DMAP2; 284caddr_t DADDR1; 285caddr_t DADDR2; 286 287/* 288 * for legacy, returns the address of the pde entry. 289 * for 64 bit, causes the pdpt page containing the pde entry to be mapped, 290 * then returns the mapped address of the pde entry in that page 291 */ 292pd_entry_t * 293pmap_pde(pmap_t m, vm_map_offset_t v) 294{ 295 pd_entry_t *pde; 296 if (!cpu_64bit || (m == kernel_pmap)) { 297 pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT])); 298 } else { 299 assert(m); 300 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 301 pde = pmap64_pde(m, v); 302 } 303 return pde; 304} 305 306/* 307 * the single pml4 page per pmap is allocated at pmap create time and exists 308 * for the duration of the pmap. we allocate this page in kernel vm (to save us one 309 * level of page table dynamic mapping. 310 * this returns the address of the requested pml4 entry in the top level page. 311 */ 312static inline 313pml4_entry_t * 314pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr) 315{ 316 return ((pml4_entry_t *)pmap->pm_hold + ((vm_offset_t)((vaddr>>PML4SHIFT)&(NPML4PG-1)))); 317} 318 319/* 320 * maps in the pml4 page, if any, containing the pdpt entry requested 321 * and returns the address of the pdpt entry in that mapped page 322 */ 323pdpt_entry_t * 324pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr) 325{ 326 pml4_entry_t newpf; 327 pml4_entry_t *pml4; 328 int i; 329 330 assert(pmap); 331 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 332 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) { 333 return(0); 334 } 335 336 pml4 = pmap64_pml4(pmap, vaddr); 337 338 if (pml4 && ((*pml4 & INTEL_PTE_VALID))) { 339 340 newpf = *pml4 & PG_FRAME; 341 342 343 for (i=PMAP_PDPT_FIRST_WINDOW; i < PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS; i++) { 344 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { 345 return((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + 346 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1)))); 347 } 348 } 349 350 current_cpu_datap()->cpu_pmap->pdpt_window_index++; 351 if (current_cpu_datap()->cpu_pmap->pdpt_window_index > (PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS-1)) 352 current_cpu_datap()->cpu_pmap->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW; 353 pmap_store_pte( 354 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CMAP), 355 newpf | INTEL_PTE_RW | INTEL_PTE_VALID); 356 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR)); 357 return ((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR) + 358 ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1)))); 359 } 360 361 return (NULL); 362} 363 364/* 365 * maps in the pdpt page, if any, containing the pde entry requested 366 * and returns the address of the pde entry in that mapped page 367 */ 368pd_entry_t * 369pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr) 370{ 371 pdpt_entry_t newpf; 372 pdpt_entry_t *pdpt; 373 int i; 374 375 assert(pmap); 376 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 377 if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) { 378 return(0); 379 } 380 381 /* if (vaddr & (1ULL << 63)) panic("neg addr");*/ 382 pdpt = pmap64_pdpt(pmap, vaddr); 383 384 if (pdpt && ((*pdpt & INTEL_PTE_VALID))) { 385 386 newpf = *pdpt & PG_FRAME; 387 388 for (i=PMAP_PDE_FIRST_WINDOW; i < PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS; i++) { 389 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { 390 return((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + 391 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1)))); 392 } 393 } 394 395 current_cpu_datap()->cpu_pmap->pde_window_index++; 396 if (current_cpu_datap()->cpu_pmap->pde_window_index > (PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS-1)) 397 current_cpu_datap()->cpu_pmap->pde_window_index = PMAP_PDE_FIRST_WINDOW; 398 pmap_store_pte( 399 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CMAP), 400 newpf | INTEL_PTE_RW | INTEL_PTE_VALID); 401 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR)); 402 return ((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR) + 403 ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1)))); 404 } 405 406 return (NULL); 407} 408 409/* 410 * Because the page tables (top 3 levels) are mapped into per cpu windows, 411 * callers must either disable interrupts or disable preemption before calling 412 * one of the pte mapping routines (e.g. pmap_pte()) as the returned vaddr 413 * is in one of those mapped windows and that cannot be allowed to change until 414 * the caller is done using the returned pte pointer. When done, the caller 415 * restores interrupts or preemption to its previous state after which point the 416 * vaddr for the returned pte can no longer be used 417 */ 418 419 420/* 421 * return address of mapped pte for vaddr va in pmap pmap. 422 * must be called with pre-emption or interrupts disabled 423 * if targeted pmap is not the kernel pmap 424 * since we may be passing back a virtual address that is 425 * associated with this cpu... pre-emption or interrupts 426 * must remain disabled until the caller is done using 427 * the pointer that was passed back . 428 * 429 * maps the pde page, if any, containing the pte in and returns 430 * the address of the pte in that mapped page 431 */ 432pt_entry_t * 433pmap_pte(pmap_t pmap, vm_map_offset_t vaddr) 434{ 435 pd_entry_t *pde; 436 pd_entry_t newpf; 437 int i; 438 439 assert(pmap); 440 pde = pmap_pde(pmap,vaddr); 441 442 if (pde && ((*pde & INTEL_PTE_VALID))) { 443 if (*pde & INTEL_PTE_PS) 444 return pde; 445 if (pmap == kernel_pmap) 446 return (vtopte(vaddr)); /* compat kernel still has pte's mapped */ 447#if TESTING 448 if (ml_get_interrupts_enabled() && get_preemption_level() == 0) 449 panic("pmap_pte: unsafe call"); 450#endif 451 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 452 453 newpf = *pde & PG_FRAME; 454 455 for (i=PMAP_PTE_FIRST_WINDOW; i < PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS; i++) { 456 if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { 457 return((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + 458 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1))); 459 } 460 } 461 462 current_cpu_datap()->cpu_pmap->pte_window_index++; 463 if (current_cpu_datap()->cpu_pmap->pte_window_index > (PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS-1)) 464 current_cpu_datap()->cpu_pmap->pte_window_index = PMAP_PTE_FIRST_WINDOW; 465 pmap_store_pte( 466 (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CMAP), 467 newpf | INTEL_PTE_RW | INTEL_PTE_VALID); 468 invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR)); 469 return ((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR) + 470 ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1))); 471 } 472 473 return(NULL); 474} 475 476 477/* 478 * Map memory at initialization. The physical addresses being 479 * mapped are not managed and are never unmapped. 480 * 481 * For now, VM is already on, we only need to map the 482 * specified memory. 483 */ 484vm_offset_t 485pmap_map( 486 vm_offset_t virt, 487 vm_map_offset_t start_addr, 488 vm_map_offset_t end_addr, 489 vm_prot_t prot, 490 unsigned int flags) 491{ 492 int ps; 493 494 ps = PAGE_SIZE; 495 while (start_addr < end_addr) { 496 pmap_enter(kernel_pmap, (vm_map_offset_t)virt, 497 (ppnum_t) i386_btop(start_addr), prot, VM_PROT_NONE, flags, FALSE); 498 virt += ps; 499 start_addr += ps; 500 } 501 return(virt); 502} 503 504extern pmap_paddr_t first_avail; 505extern vm_offset_t virtual_avail, virtual_end; 506extern pmap_paddr_t avail_start, avail_end; 507extern vm_offset_t sHIB; 508extern vm_offset_t eHIB; 509extern vm_offset_t stext; 510extern vm_offset_t etext; 511extern vm_offset_t sdata; 512 513extern void *KPTphys; 514 515void 516pmap_cpu_init(void) 517{ 518 /* 519 * Here early in the life of a processor (from cpu_mode_init()). 520 */ 521 522 /* 523 * Initialize the per-cpu, TLB-related fields. 524 */ 525 current_cpu_datap()->cpu_active_cr3 = kernel_pmap->pm_cr3; 526 current_cpu_datap()->cpu_tlb_invalid = FALSE; 527} 528 529vm_offset_t 530pmap_high_shared_remap(enum high_fixed_addresses e, vm_offset_t va, int sz) 531{ 532 vm_offset_t ve = pmap_index_to_virt(e); 533 pt_entry_t *ptep; 534 pmap_paddr_t pa; 535 int i; 536 spl_t s; 537 538 assert(0 == (va & PAGE_MASK)); /* expecting page aligned */ 539 s = splhigh(); 540 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ve); 541 542 for (i=0; i< sz; i++) { 543 pa = (pmap_paddr_t) kvtophys(va); 544 pmap_store_pte(ptep, (pa & PG_FRAME) 545 | INTEL_PTE_VALID 546 | INTEL_PTE_GLOBAL 547 | INTEL_PTE_RW 548 | INTEL_PTE_REF 549 | INTEL_PTE_MOD); 550 va+= PAGE_SIZE; 551 ptep++; 552 } 553 splx(s); 554 return ve; 555} 556 557vm_offset_t 558pmap_cpu_high_shared_remap(int cpu, enum high_cpu_types e, vm_offset_t va, int sz) 559{ 560 enum high_fixed_addresses a = e + HIGH_CPU_END * cpu; 561 return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN + a, va, sz); 562} 563 564void pmap_init_high_shared(void); 565 566extern vm_offset_t gdtptr, idtptr; 567 568extern uint32_t low_intstack; 569 570extern struct fake_descriptor ldt_desc_pattern; 571extern struct fake_descriptor tss_desc_pattern; 572 573extern char hi_remap_text, hi_remap_etext; 574extern char t_zero_div; 575 576pt_entry_t *pte_unique_base; 577 578void 579pmap_init_high_shared(void) 580{ 581 582 vm_offset_t haddr; 583 spl_t s; 584 585 cpu_desc_index_t * cdi = &cpu_data_master.cpu_desc_index; 586 587 kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n", 588 HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN)); 589 s = splhigh(); 590 pte_unique_base = pmap_pte(kernel_pmap, (vm_map_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN)); 591 splx(s); 592 593 if (i386_btop(&hi_remap_etext - &hi_remap_text + 1) > 594 HIGH_FIXED_TRAMPS_END - HIGH_FIXED_TRAMPS + 1) 595 panic("tramps too large"); 596 haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS, 597 (vm_offset_t) &hi_remap_text, 3); 598 kprintf("tramp: 0x%x, ",haddr); 599 /* map gdt up high and update ptr for reload */ 600 haddr = pmap_high_shared_remap(HIGH_FIXED_GDT, 601 (vm_offset_t) master_gdt, 1); 602 cdi->cdi_gdt.ptr = (void *)haddr; 603 kprintf("GDT: 0x%x, ",haddr); 604 /* map ldt up high */ 605 haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN, 606 (vm_offset_t) master_ldt, 607 HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1); 608 cdi->cdi_ldt = (struct fake_descriptor *)haddr; 609 kprintf("LDT: 0x%x, ",haddr); 610 /* put new ldt addr into gdt */ 611 struct fake_descriptor temp_fake_desc; 612 temp_fake_desc = ldt_desc_pattern; 613 temp_fake_desc.offset = (vm_offset_t) haddr; 614 fix_desc(&temp_fake_desc, 1); 615 616 *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_LDT)] = temp_fake_desc; 617 *(struct fake_descriptor *) &master_gdt[sel_idx(USER_LDT)] = temp_fake_desc; 618 619 /* map idt up high */ 620 haddr = pmap_high_shared_remap(HIGH_FIXED_IDT, 621 (vm_offset_t) master_idt, 1); 622 cdi->cdi_idt.ptr = (void *)haddr; 623 kprintf("IDT: 0x%x, ", haddr); 624 /* remap ktss up high and put new high addr into gdt */ 625 haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS, 626 (vm_offset_t) &master_ktss, 1); 627 628 temp_fake_desc = tss_desc_pattern; 629 temp_fake_desc.offset = (vm_offset_t) haddr; 630 fix_desc(&temp_fake_desc, 1); 631 *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] = temp_fake_desc; 632 kprintf("KTSS: 0x%x, ",haddr); 633 634 /* remap dftss up high and put new high addr into gdt */ 635 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, 636 (vm_offset_t) &master_dftss, 1); 637 temp_fake_desc = tss_desc_pattern; 638 temp_fake_desc.offset = (vm_offset_t) haddr; 639 fix_desc(&temp_fake_desc, 1); 640 *(struct fake_descriptor *) &master_gdt[sel_idx(DF_TSS)] = temp_fake_desc; 641 kprintf("DFTSS: 0x%x\n",haddr); 642 643 /* remap mctss up high and put new high addr into gdt */ 644 haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, 645 (vm_offset_t) &master_mctss, 1); 646 temp_fake_desc = tss_desc_pattern; 647 temp_fake_desc.offset = (vm_offset_t) haddr; 648 fix_desc(&temp_fake_desc, 1); 649 *(struct fake_descriptor *) &master_gdt[sel_idx(MC_TSS)] = temp_fake_desc; 650 kprintf("MCTSS: 0x%x\n",haddr); 651 652 cpu_desc_load(&cpu_data_master); 653} 654 655 656/* 657 * Bootstrap the system enough to run with virtual memory. 658 * Map the kernel's code and data, and allocate the system page table. 659 * Called with mapping OFF. Page_size must already be set. 660 */ 661 662void 663pmap_bootstrap( 664 __unused vm_offset_t load_start, 665 boolean_t IA32e) 666{ 667 vm_offset_t va; 668 unsigned i; 669 pdpt_entry_t *pdpt; 670 spl_t s; 671 672 vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address 673 * known to VM */ 674 /* 675 * The kernel's pmap is statically allocated so we don't 676 * have to use pmap_create, which is unlikely to work 677 * correctly at this part of the boot sequence. 678 */ 679 680 681 kernel_pmap = &kernel_pmap_store; 682 kernel_pmap->ref_count = 1; 683 kernel_pmap->nx_enabled = FALSE; 684 kernel_pmap->pm_task_map = TASK_MAP_32BIT; 685 kernel_pmap->pm_obj = (vm_object_t) NULL; 686 kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE); 687 kernel_pmap->pdirbase = (pmap_paddr_t)((int)IdlePTD); 688 pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE ); 689 kernel_pmap->pm_pdpt = pdpt; 690 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT); 691 692 693 va = (vm_offset_t)kernel_pmap->dirbase; 694 /* setup self referential mapping(s) */ 695 for (i = 0; i< NPGPTD; i++, pdpt++) { 696 pmap_paddr_t pa; 697 pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i))); 698 pmap_store_pte( 699 (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i), 700 (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF | 701 INTEL_PTE_MOD | INTEL_PTE_WIRED) ; 702 pmap_store_pte(pdpt, pa | INTEL_PTE_VALID); 703 } 704 705#if CONFIG_YONAH 706 /* 32-bit and legacy support depends on IA32e mode being disabled */ 707 cpu_64bit = IA32e; 708#endif 709 710 lo_kernel_cr3 = kernel_pmap->pm_cr3; 711 current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3; 712 713 /* save the value we stuff into created pmaps to share the gdts etc */ 714 high_shared_pde = *pmap_pde(kernel_pmap, HIGH_MEM_BASE); 715 /* make sure G bit is on for high shared pde entry */ 716 high_shared_pde |= INTEL_PTE_GLOBAL; 717 s = splhigh(); 718 pmap_store_pte(pmap_pde(kernel_pmap, HIGH_MEM_BASE), high_shared_pde); 719 splx(s); 720 721 nkpt = NKPT; 722 OSAddAtomic(NKPT, &inuse_ptepages_count); 723 OSAddAtomic64(NKPT, &alloc_ptepages_count); 724 bootstrap_wired_pages = NKPT; 725 726 virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail; 727 virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS); 728 729 /* 730 * Reserve some special page table entries/VA space for temporary 731 * mapping of pages. 732 */ 733 va = virtual_avail; 734 pt_entry_t *pte; 735 pte = vtopte(va); 736#define SYSMAP(c, p, v, n) \ 737 v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n) 738 739 for (i=0; i<PMAP_NWINDOWS; i++) { 740 SYSMAP(caddr_t, 741 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP), 742 (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR), 743 1); 744 *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0; 745 } 746 747 /* DMAP user for debugger */ 748 SYSMAP(caddr_t, DMAP1, DADDR1, 1); 749 SYSMAP(caddr_t, DMAP2, DADDR2, 1); /* XXX temporary - can remove */ 750 751 virtual_avail = va; 752 753 if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) { 754 if (0 != ((npvhash+1) & npvhash)) { 755 kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n",npvhash,NPVHASH); 756 npvhash = NPVHASH; 757 } 758 } else { 759 npvhash = NPVHASH; 760 } 761 printf("npvhash=%d\n",npvhash); 762 763 simple_lock_init(&kernel_pmap->lock, 0); 764 simple_lock_init(&pv_hashed_free_list_lock, 0); 765 simple_lock_init(&pv_hashed_kern_free_list_lock, 0); 766 simple_lock_init(&pv_hash_table_lock,0); 767 768 pmap_init_high_shared(); 769 770 pde_mapped_size = PDE_MAPPED_SIZE; 771 772 if (cpu_64bit) { 773 pdpt_entry_t *ppdpt = IdlePDPT; 774 pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64; 775 pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4; 776 int istate = ml_set_interrupts_enabled(FALSE); 777 778 /* 779 * Clone a new 64-bit 3rd-level page table directory, IdlePML4, 780 * with page bits set for the correct IA-32e operation and so that 781 * the legacy-mode IdlePDPT is retained for slave processor start-up. 782 * This is necessary due to the incompatible use of page bits between 783 * 64-bit and legacy modes. 784 */ 785 kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePML4); /* setup in start.s for us */ 786 kernel_pmap->pm_pml4 = IdlePML4; 787 kernel_pmap->pm_pdpt = (pd_entry_t *) 788 ((unsigned int)IdlePDPT64 | KERNBASE ); 789#define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF 790 pmap_store_pte(kernel_pmap->pm_pml4, 791 (uint32_t)IdlePDPT64 | PAGE_BITS); 792 pmap_store_pte((ppdpt64+0), *(ppdpt+0) | PAGE_BITS); 793 pmap_store_pte((ppdpt64+1), *(ppdpt+1) | PAGE_BITS); 794 pmap_store_pte((ppdpt64+2), *(ppdpt+2) | PAGE_BITS); 795 pmap_store_pte((ppdpt64+3), *(ppdpt+3) | PAGE_BITS); 796 797 /* 798 * The kernel is also mapped in the uber-sapce at the 4GB starting 799 * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level. 800 */ 801 pmap_store_pte((ppml4+KERNEL_UBER_PML4_INDEX), *(ppml4+0)); 802 803 kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3; 804 805 /* Re-initialize descriptors and prepare to switch modes */ 806 cpu_desc_init64(&cpu_data_master); 807 current_cpu_datap()->cpu_is64bit = TRUE; 808 current_cpu_datap()->cpu_active_cr3 = kernel64_cr3; 809 810 pde_mapped_size = 512*4096 ; 811 812 ml_set_interrupts_enabled(istate); 813 } 814 815 /* Sets 64-bit mode if required. */ 816 cpu_mode_init(&cpu_data_master); 817 /* Update in-kernel CPUID information if we're now in 64-bit mode */ 818 if (IA32e) 819 cpuid_set_info(); 820 821 kernel_pmap->pm_hold = (vm_offset_t)kernel_pmap->pm_pml4; 822 823 kprintf("Kernel virtual space from 0x%x to 0x%x.\n", 824 VADDR(KPTDI,0), virtual_end); 825 printf("PAE enabled\n"); 826 if (cpu_64bit){ 827 printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); } 828 829 kprintf("Available physical space from 0x%llx to 0x%llx\n", 830 avail_start, avail_end); 831 832 /* 833 * By default for 64-bit users loaded at 4GB, share kernel mapping. 834 * But this may be overridden by the -no_shared_cr3 boot-arg. 835 */ 836 if (PE_parse_boot_argn("-no_shared_cr3", &no_shared_cr3, sizeof (no_shared_cr3))) { 837 kprintf("Shared kernel address space disabled\n"); 838 } 839 840#ifdef PMAP_TRACES 841 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) { 842 kprintf("Kernel traces for pmap operations enabled\n"); 843 } 844#endif /* PMAP_TRACES */ 845} 846 847void 848pmap_virtual_space( 849 vm_offset_t *startp, 850 vm_offset_t *endp) 851{ 852 *startp = virtual_avail; 853 *endp = virtual_end; 854} 855 856/* 857 * Initialize the pmap module. 858 * Called by vm_init, to initialize any structures that the pmap 859 * system needs to map virtual memory. 860 */ 861void 862pmap_init(void) 863{ 864 long npages; 865 vm_map_offset_t vaddr; 866 vm_offset_t addr; 867 vm_size_t s, vsize; 868 ppnum_t ppn; 869 870 /* 871 * Allocate memory for the pv_head_table and its lock bits, 872 * the modify bit array, and the pte_page table. 873 */ 874 875 /* 876 * zero bias all these arrays now instead of off avail_start 877 * so we cover all memory 878 */ 879 880 npages = (long)i386_btop(avail_end); 881 s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages 882 + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1)) 883 + pv_lock_table_size(npages) 884 + pv_hash_lock_table_size((npvhash+1)) 885 + npages); 886 887 s = round_page(s); 888 if (kernel_memory_allocate(kernel_map, &addr, s, 0, 889 KMA_KOBJECT | KMA_PERMANENT) 890 != KERN_SUCCESS) 891 panic("pmap_init"); 892 893 memset((char *)addr, 0, s); 894 895 vaddr = addr; 896 vsize = s; 897 898#if PV_DEBUG 899 if (0 == npvhash) panic("npvhash not initialized"); 900#endif 901 902 /* 903 * Allocate the structures first to preserve word-alignment. 904 */ 905 pv_head_table = (pv_rooted_entry_t) addr; 906 addr = (vm_offset_t) (pv_head_table + npages); 907 908 pv_hash_table = (pv_hashed_entry_t *)addr; 909 addr = (vm_offset_t) (pv_hash_table + (npvhash + 1)); 910 911 pv_lock_table = (char *) addr; 912 addr = (vm_offset_t) (pv_lock_table + pv_lock_table_size(npages)); 913 914 pv_hash_lock_table = (char *) addr; 915 addr = (vm_offset_t) (pv_hash_lock_table + pv_hash_lock_table_size((npvhash+1))); 916 917 pmap_phys_attributes = (char *) addr; 918 { 919 unsigned int i; 920 unsigned int pn; 921 ppnum_t last_pn; 922 pmap_memory_region_t *pmptr = pmap_memory_regions; 923 924 last_pn = (ppnum_t)i386_btop(avail_end); 925 926 for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { 927 if (pmptr->type == kEfiConventionalMemory) { 928 929 for (pn = pmptr->base; pn <= pmptr->end; pn++) { 930 if (pn < last_pn) { 931 pmap_phys_attributes[pn] |= PHYS_MANAGED; 932 933 if (pn > last_managed_page) 934 last_managed_page = pn; 935 936 if (pn >= lowest_hi && pn <= highest_hi) 937 pmap_phys_attributes[pn] |= PHYS_NOENCRYPT; 938 } 939 } 940 } 941 } 942 } 943 while (vsize) { 944 ppn = pmap_find_phys(kernel_pmap, vaddr); 945 946 pmap_phys_attributes[ppn] |= PHYS_NOENCRYPT; 947 948 vaddr += PAGE_SIZE; 949 vsize -= PAGE_SIZE; 950 } 951 /* 952 * Create the zone of physical maps, 953 * and of the physical-to-virtual entries. 954 */ 955 s = (vm_size_t) sizeof(struct pmap); 956 pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */ 957 zone_change(pmap_zone, Z_NOENCRYPT, TRUE); 958 959 s = (vm_size_t) sizeof(struct pv_hashed_entry); 960 pv_hashed_list_zone = zinit(s, 10000*s /* Expandable zone */, 961 4096 * 4 /* LCM i386 */, "pv_list"); 962 zone_change(pv_hashed_list_zone, Z_NOENCRYPT, TRUE); 963 964 s = 63; 965 pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */ 966 zone_change(pdpt_zone, Z_NOENCRYPT, TRUE); 967 968 kptobj = &kptobj_object_store; 969 _vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG), kptobj); 970 kernel_pmap->pm_obj = kptobj; 971 972 /* create pv entries for kernel pages mapped by low level 973 startup code. these have to exist so we can pmap_remove() 974 e.g. kext pages from the middle of our addr space */ 975 976 vaddr = (vm_map_offset_t)0; 977 for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) { 978 pv_rooted_entry_t pv_e; 979 980 pv_e = pai_to_pvh(ppn); 981 pv_e->va = vaddr; 982 vaddr += PAGE_SIZE; 983 pv_e->pmap = kernel_pmap; 984 queue_init(&pv_e->qlink); 985 } 986 987 pmap_initialized = TRUE; 988 989 max_preemption_latency_tsc = tmrCvt((uint64_t)MAX_PREEMPTION_LATENCY_NS, tscFCvtn2t); 990 991} 992 993#ifdef PMAP_DEBUG 994#define DBG(x...) kprintf("DBG: " x) 995#else 996#define DBG(x...) 997#endif 998 999/* 1000 * Called once VM is fully initialized so that we can release unused 1001 * sections of low memory to the general pool. 1002 * Also complete the set-up of identity-mapped sections of the kernel: 1003 * 1) write-protect kernel text 1004 * 2) map kernel text using large pages if possible 1005 * 3) read and write-protect page zero (for K32) 1006 * 4) map the global page at the appropriate virtual address. 1007 * 1008 * Use of large pages 1009 * ------------------ 1010 * To effectively map and write-protect all kernel text pages, the text 1011 * must be 2M-aligned at the base, and the data section above must also be 1012 * 2M-aligned. That is, there's padding below and above. This is achieved 1013 * through linker directives. Large pages are used only if this alignment 1014 * exists (and not overriden by the -kernel_text_page_4K boot-arg). The 1015 * memory layout is: 1016 * 1017 * : : 1018 * | __DATA | 1019 * sdata: ================== 2Meg 1020 * | | 1021 * | zero-padding | 1022 * | | 1023 * etext: ------------------ 1024 * | | 1025 * : : 1026 * | | 1027 * | __TEXT | 1028 * | | 1029 * : : 1030 * | | 1031 * stext: ================== 2Meg 1032 * | | 1033 * | zero-padding | 1034 * | | 1035 * eHIB: ------------------ 1036 * | __HIB | 1037 * : : 1038 * 1039 * Prior to changing the mapping from 4K to 2M, the zero-padding pages 1040 * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the 1041 * 4K pages covering [stext,etext] are coalesced as 2M large pages. 1042 * The now unused level-1 PTE pages are also freed. 1043 */ 1044extern uint32_t pmap_reserved_ranges; 1045void 1046pmap_lowmem_finalize(void) 1047{ 1048 spl_t spl; 1049 int i; 1050 1051 /* Check the kernel is linked at the expected base address */ 1052 if (i386_btop(kvtophys((vm_offset_t) &IdlePML4)) != 1053 I386_KERNEL_IMAGE_BASE_PAGE) 1054 panic("pmap_lowmem_finalize() unexpected kernel base address"); 1055 1056 /* 1057 * Update wired memory statistics for early boot pages 1058 */ 1059 PMAP_ZINFO_PALLOC(kernel_pmap, bootstrap_wired_pages * PAGE_SIZE); 1060 1061 /* 1062 * Free all pages in pmap regions below the base: 1063 * rdar://6332712 1064 * We can't free all the pages to VM that EFI reports available. 1065 * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake. 1066 * There's also a size miscalculation here: pend is one page less 1067 * than it should be but this is not fixed to be backwards 1068 * compatible. 1069 * Due to this current EFI limitation, we take only the first 1070 * entry in the memory region table. However, the loop is retained 1071 * (with the intended termination criteria commented out) in the 1072 * hope that some day we can free all low-memory ranges. 1073 */ 1074 for (i = 0; 1075// pmap_memory_regions[i].end <= I386_KERNEL_IMAGE_BASE_PAGE; 1076 i < 1 && (pmap_reserved_ranges == 0); 1077 i++) { 1078 vm_offset_t pbase = (vm_offset_t)i386_ptob(pmap_memory_regions[i].base); 1079 vm_offset_t pend = (vm_offset_t)i386_ptob(pmap_memory_regions[i].end); 1080// vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end+1); 1081 1082 DBG("ml_static_mfree(%p,%p) for pmap region %d\n", 1083 (void *) ml_static_ptovirt(pbase), 1084 (void *) (pend - pbase), i); 1085 ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase); 1086 } 1087 1088 /* 1089 * If text and data are both 2MB-aligned, 1090 * we can map text with large-pages, 1091 * unless the -kernel_text_ps_4K boot-arg overrides. 1092 */ 1093 if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) { 1094 kprintf("Kernel text is 2MB aligned"); 1095 kernel_text_ps_4K = FALSE; 1096 if (PE_parse_boot_argn("-kernel_text_ps_4K", 1097 &kernel_text_ps_4K, 1098 sizeof (kernel_text_ps_4K))) 1099 kprintf(" but will be mapped with 4K pages\n"); 1100 else 1101 kprintf(" and will be mapped with 2M pages\n"); 1102 } 1103 1104 (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof (wpkernel)); 1105 if (wpkernel) 1106 kprintf("Kernel text %p-%p to be write-protected\n", 1107 (void *) stext, (void *) etext); 1108 1109 spl = splhigh(); 1110 1111 /* 1112 * Scan over text if mappings are to be changed: 1113 * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0 1114 * - Change to large-pages if possible and not overriden. 1115 */ 1116 if (kernel_text_ps_4K && wpkernel) { 1117 vm_offset_t myva; 1118 for (myva = stext; myva < etext; myva += PAGE_SIZE) { 1119 pt_entry_t *ptep; 1120 1121 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); 1122 if (ptep) 1123 pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW); 1124 } 1125 } 1126 1127 if (!kernel_text_ps_4K) { 1128 vm_offset_t myva; 1129 1130 /* 1131 * Release zero-filled page padding used for 2M-alignment. 1132 */ 1133 DBG("ml_static_mfree(%p,%p) for padding below text\n", 1134 (void *) eHIB, (void *) (stext - eHIB)); 1135 ml_static_mfree(eHIB, stext - eHIB); 1136 DBG("ml_static_mfree(%p,%p) for padding above text\n", 1137 (void *) etext, (void *) (sdata - etext)); 1138 ml_static_mfree(etext, sdata - etext); 1139 1140 /* 1141 * Coalesce text pages into large pages. 1142 */ 1143 for (myva = stext; myva < sdata; myva += I386_LPGBYTES) { 1144 pt_entry_t *ptep; 1145 vm_offset_t pte_phys; 1146 pt_entry_t *pdep; 1147 pt_entry_t pde; 1148 1149 pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva); 1150 ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); 1151 DBG("myva: %p pdep: %p ptep: %p\n", 1152 (void *) myva, (void *) pdep, (void *) ptep); 1153 if ((*ptep & INTEL_PTE_VALID) == 0) 1154 continue; 1155 pte_phys = (vm_offset_t)(*ptep & PG_FRAME); 1156 pde = *pdep & PTMASK; /* page attributes from pde */ 1157 pde |= INTEL_PTE_PS; /* make it a 2M entry */ 1158 pde |= pte_phys; /* take page frame from pte */ 1159 1160 if (wpkernel) 1161 pde &= ~INTEL_PTE_RW; 1162 DBG("pmap_store_pte(%p,0x%llx)\n", 1163 (void *)pdep, pde); 1164 pmap_store_pte(pdep, pde); 1165 1166 /* 1167 * Free the now-unused level-1 pte. 1168 * Note: ptep is a virtual address to the pte in the 1169 * recursive map. We can't use this address to free 1170 * the page. Instead we need to compute its address 1171 * in the Idle PTEs in "low memory". 1172 */ 1173 vm_offset_t vm_ptep = (vm_offset_t) KPTphys 1174 + (pte_phys >> PTPGSHIFT); 1175 DBG("ml_static_mfree(%p,0x%x) for pte\n", 1176 (void *) vm_ptep, PAGE_SIZE); 1177 ml_static_mfree(vm_ptep, PAGE_SIZE); 1178 } 1179 1180 /* Change variable read by sysctl machdep.pmap */ 1181 pmap_kernel_text_ps = I386_LPGBYTES; 1182 } 1183 1184 /* no matter what, kernel page zero is not accessible */ 1185 pmap_store_pte(pmap_pte(kernel_pmap, 0), INTEL_PTE_INVALID); 1186 1187 /* map lowmem global page into fixed addr */ 1188 pt_entry_t *pte = NULL; 1189 if (0 == (pte = pmap_pte(kernel_pmap, 1190 VM_MIN_KERNEL_LOADED_ADDRESS + 0x2000))) 1191 panic("lowmem pte"); 1192 /* make sure it is defined on page boundary */ 1193 assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK)); 1194 pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo) 1195 | INTEL_PTE_REF 1196 | INTEL_PTE_MOD 1197 | INTEL_PTE_WIRED 1198 | INTEL_PTE_VALID 1199 | INTEL_PTE_RW); 1200 splx(spl); 1201 flush_tlb(); 1202} 1203 1204#define managed_page(x) ( (unsigned int)x <= last_managed_page && (pmap_phys_attributes[x] & PHYS_MANAGED) ) 1205 1206/* 1207 * this function is only used for debugging fron the vm layer 1208 */ 1209boolean_t 1210pmap_verify_free( 1211 ppnum_t pn) 1212{ 1213 pv_rooted_entry_t pv_h; 1214 int pai; 1215 boolean_t result; 1216 1217 assert(pn != vm_page_fictitious_addr); 1218 1219 if (!pmap_initialized) 1220 return(TRUE); 1221 1222 if (pn == vm_page_guard_addr) 1223 return TRUE; 1224 1225 pai = ppn_to_pai(pn); 1226 if (!managed_page(pai)) 1227 return(FALSE); 1228 pv_h = pai_to_pvh(pn); 1229 result = (pv_h->pmap == PMAP_NULL); 1230 return(result); 1231} 1232 1233boolean_t 1234pmap_is_empty( 1235 pmap_t pmap, 1236 vm_map_offset_t va_start, 1237 vm_map_offset_t va_end) 1238{ 1239 vm_map_offset_t offset; 1240 ppnum_t phys_page; 1241 1242 if (pmap == PMAP_NULL) { 1243 return TRUE; 1244 } 1245 1246 /* 1247 * Check the resident page count 1248 * - if it's zero, the pmap is completely empty. 1249 * This short-circuit test prevents a virtual address scan which is 1250 * painfully slow for 64-bit spaces. 1251 * This assumes the count is correct 1252 * .. the debug kernel ought to be checking perhaps by page table walk. 1253 */ 1254 if (pmap->stats.resident_count == 0) 1255 return TRUE; 1256 1257 for (offset = va_start; 1258 offset < va_end; 1259 offset += PAGE_SIZE_64) { 1260 phys_page = pmap_find_phys(pmap, offset); 1261 if (phys_page) { 1262 if (pmap != kernel_pmap && 1263 pmap->pm_task_map == TASK_MAP_32BIT && 1264 offset >= HIGH_MEM_BASE) { 1265 /* 1266 * The "high_shared_pde" is used to share 1267 * the entire top-most 2MB of address space 1268 * between the kernel and all 32-bit tasks. 1269 * So none of this can be removed from 32-bit 1270 * tasks. 1271 * Let's pretend there's nothing up 1272 * there... 1273 */ 1274 return TRUE; 1275 } 1276 kprintf("pmap_is_empty(%p,0x%llx,0x%llx): " 1277 "page %d at 0x%llx\n", 1278 pmap, va_start, va_end, phys_page, offset); 1279 return FALSE; 1280 } 1281 } 1282 1283 return TRUE; 1284} 1285 1286 1287/* 1288 * Create and return a physical map. 1289 * 1290 * If the size specified for the map 1291 * is zero, the map is an actual physical 1292 * map, and may be referenced by the 1293 * hardware. 1294 * 1295 * If the size specified is non-zero, 1296 * the map will be used in software only, and 1297 * is bounded by that size. 1298 */ 1299pmap_t 1300pmap_create( 1301 ledger_t ledger, 1302 vm_map_size_t sz, 1303 boolean_t is_64bit) 1304{ 1305 pmap_t p; 1306 unsigned i; 1307 vm_offset_t va; 1308 vm_size_t size; 1309 pdpt_entry_t *pdpt; 1310 pml4_entry_t *pml4p; 1311 pd_entry_t *pdp; 1312 int template; 1313 spl_t s; 1314 1315 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, 1316 (int) (sz>>32), (int) sz, (int) is_64bit, 0, 0); 1317 1318 size = (vm_size_t) sz; 1319 1320 /* 1321 * A software use-only map doesn't even need a map. 1322 */ 1323 1324 if (size != 0) { 1325 return(PMAP_NULL); 1326 } 1327 1328 p = (pmap_t) zalloc(pmap_zone); 1329 if (PMAP_NULL == p) 1330 panic("pmap_create zalloc"); 1331 1332 /* init counts now since we'll be bumping some */ 1333 simple_lock_init(&p->lock, 0); 1334 p->stats.resident_count = 0; 1335 p->stats.resident_max = 0; 1336 p->stats.wired_count = 0; 1337 ledger_reference(ledger); 1338 p->ledger = ledger; 1339 p->ref_count = 1; 1340 p->nx_enabled = 1; 1341 p->pm_shared = FALSE; 1342 1343 assert(!is_64bit || cpu_64bit); 1344 p->pm_task_map = is_64bit ? TASK_MAP_64BIT : TASK_MAP_32BIT;; 1345 1346 if (!cpu_64bit) { 1347 /* legacy 32 bit setup */ 1348 /* in the legacy case the pdpt layer is hardwired to 4 entries and each 1349 * entry covers 1GB of addr space */ 1350 if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) 1351 panic("pmap_create kmem_alloc_kobject"); 1352 p->pm_hold = (vm_offset_t)zalloc(pdpt_zone); 1353 if ((vm_offset_t)NULL == p->pm_hold) { 1354 panic("pdpt zalloc"); 1355 } 1356 pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31); 1357 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)pdpt); 1358 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG)))) 1359 panic("pmap_create vm_object_allocate"); 1360 1361 memset((char *)p->dirbase, 0, NBPTD); 1362 1363 va = (vm_offset_t)p->dirbase; 1364 p->pdirbase = kvtophys(va); 1365 1366 PMAP_ZINFO_SALLOC(p,NBPTD); 1367 1368 template = INTEL_PTE_VALID; 1369 for (i = 0; i< NPGPTD; i++, pdpt++ ) { 1370 pmap_paddr_t pa; 1371 pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i))); 1372 pmap_store_pte(pdpt, pa | template); 1373 } 1374 1375 /* map the high shared pde */ 1376 s = splhigh(); 1377 pmap_store_pte(pmap_pde(p, HIGH_MEM_BASE), high_shared_pde); 1378 splx(s); 1379 1380 } else { 1381 /* 64 bit setup */ 1382 1383 /* alloc the pml4 page in kernel vm */ 1384 if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE)) 1385 panic("pmap_create kmem_alloc_kobject pml4"); 1386 1387 memset((char *)p->pm_hold, 0, PAGE_SIZE); 1388 p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold); 1389 1390 OSAddAtomic(1, &inuse_ptepages_count); 1391 OSAddAtomic64(1, &alloc_ptepages_count); 1392 PMAP_ZINFO_SALLOC(p, PAGE_SIZE); 1393 1394 /* allocate the vm_objs to hold the pdpt, pde and pte pages */ 1395 1396 if (NULL == (p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS)))) 1397 panic("pmap_create pdpt obj"); 1398 1399 if (NULL == (p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS)))) 1400 panic("pmap_create pdpt obj"); 1401 1402 if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS)))) 1403 panic("pmap_create pte obj"); 1404 1405 /* uber space points to uber mapped kernel */ 1406 s = splhigh(); 1407 pml4p = pmap64_pml4(p, 0ULL); 1408 pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4); 1409 1410 1411 if (!is_64bit) { 1412 while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) { 1413 splx(s); 1414 pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE, PMAP_EXPAND_OPTIONS_NONE); /* need room for another pde entry */ 1415 s = splhigh(); 1416 } 1417 pmap_store_pte(pdp, high_shared_pde); 1418 } 1419 splx(s); 1420 } 1421 1422 PMAP_TRACE(PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, 1423 (int) p, is_64bit, 0, 0, 0); 1424 1425 return(p); 1426} 1427 1428/* 1429 * The following routines implement the shared address optmization for 64-bit 1430 * users with a 4GB page zero. 1431 * 1432 * pmap_set_4GB_pagezero() 1433 * is called in the exec and fork paths to mirror the kernel's 1434 * mapping in the bottom 4G of the user's pmap. The task mapping changes 1435 * from TASK_MAP_64BIT to TASK_MAP_64BIT_SHARED. This routine returns 1436 * without doing anything if the -no_shared_cr3 boot-arg is set. 1437 * 1438 * pmap_clear_4GB_pagezero() 1439 * is called in the exec/exit paths to undo this mirror. The task mapping 1440 * reverts to TASK_MAP_64BIT. In addition, we switch to the kernel's 1441 * CR3 by calling pmap_load_kernel_cr3(). 1442 * 1443 * pmap_load_kernel_cr3() 1444 * loads cr3 with the kernel's page table. In addition to being called 1445 * by pmap_clear_4GB_pagezero(), it is used both prior to teardown and 1446 * when we go idle in the context of a shared map. 1447 * 1448 * Further notes on per-cpu data used: 1449 * 1450 * cpu_kernel_cr3 is the cr3 for the kernel's pmap. 1451 * This is loaded in a trampoline on entering the kernel 1452 * from a 32-bit user (or non-shared-cr3 64-bit user). 1453 * cpu_task_cr3 is the cr3 for the current thread. 1454 * This is loaded in a trampoline as we exit the kernel. 1455 * cpu_active_cr3 reflects the cr3 currently loaded. 1456 * However, the low order bit is set when the 1457 * processor is idle or interrupts are disabled 1458 * while the system pmap lock is held. It is used by 1459 * tlb shoot-down. 1460 * cpu_task_map indicates whether the task cr3 belongs to 1461 * a 32-bit, a 64-bit or a 64-bit shared map. 1462 * The latter allows the avoidance of the cr3 load 1463 * on kernel entry and exit. 1464 * cpu_tlb_invalid set TRUE when a tlb flush is requested. 1465 * If the cr3 is "inactive" (the cpu is idle or the 1466 * system-wide pmap lock is held) this not serviced by 1467 * an IPI but at time when the cr3 becomes "active". 1468 */ 1469 1470void 1471pmap_set_4GB_pagezero(pmap_t p) 1472{ 1473 pdpt_entry_t *user_pdptp; 1474 pdpt_entry_t *kern_pdptp; 1475 1476 assert(p->pm_task_map != TASK_MAP_32BIT); 1477 1478 /* Kernel-shared cr3 may be disabled by boot arg. */ 1479 if (no_shared_cr3) 1480 return; 1481 1482 /* 1483 * Set the bottom 4 3rd-level pte's to be the kernel's. 1484 */ 1485 PMAP_LOCK(p); 1486 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) { 1487 PMAP_UNLOCK(p); 1488 pmap_expand_pml4(p, 0x0, PMAP_EXPAND_OPTIONS_NONE); 1489 PMAP_LOCK(p); 1490 } 1491 kern_pdptp = kernel_pmap->pm_pdpt; 1492 pmap_store_pte(user_pdptp+0, *(kern_pdptp+0)); 1493 pmap_store_pte(user_pdptp+1, *(kern_pdptp+1)); 1494 pmap_store_pte(user_pdptp+2, *(kern_pdptp+2)); 1495 pmap_store_pte(user_pdptp+3, *(kern_pdptp+3)); 1496 p->pm_task_map = TASK_MAP_64BIT_SHARED; 1497 PMAP_UNLOCK(p); 1498} 1499 1500void 1501pmap_clear_4GB_pagezero(pmap_t p) 1502{ 1503 pdpt_entry_t *user_pdptp; 1504 boolean_t istate; 1505 1506 if (p->pm_task_map != TASK_MAP_64BIT_SHARED) 1507 return; 1508 1509 PMAP_LOCK(p); 1510 1511 p->pm_task_map = TASK_MAP_64BIT; 1512 1513 istate = ml_set_interrupts_enabled(FALSE); 1514 1515 if (current_cpu_datap()->cpu_task_map == TASK_MAP_64BIT_SHARED) 1516 current_cpu_datap()->cpu_task_map = TASK_MAP_64BIT; 1517 1518 pmap_load_kernel_cr3(); 1519 1520 user_pdptp = pmap64_pdpt(p, 0x0); 1521 pmap_store_pte(user_pdptp+0, 0); 1522 pmap_store_pte(user_pdptp+1, 0); 1523 pmap_store_pte(user_pdptp+2, 0); 1524 pmap_store_pte(user_pdptp+3, 0); 1525 1526 ml_set_interrupts_enabled(istate); 1527 1528 PMAP_UNLOCK(p); 1529} 1530 1531void 1532pmap_load_kernel_cr3(void) 1533{ 1534 uint64_t kernel_cr3; 1535 1536 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 1537 1538 /* 1539 * Reload cr3 with the true kernel cr3. 1540 */ 1541 kernel_cr3 = current_cpu_datap()->cpu_kernel_cr3; 1542 set64_cr3(kernel_cr3); 1543 current_cpu_datap()->cpu_active_cr3 = kernel_cr3; 1544 current_cpu_datap()->cpu_tlb_invalid = FALSE; 1545 __asm__ volatile("mfence"); 1546} 1547 1548/* 1549 * Retire the given physical map from service. 1550 * Should only be called if the map contains 1551 * no valid mappings. 1552 */ 1553 1554void 1555pmap_destroy( 1556 register pmap_t p) 1557{ 1558 register int c; 1559 1560 if (p == PMAP_NULL) 1561 return; 1562 1563 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, 1564 (int) p, 0, 0, 0, 0); 1565 1566 PMAP_LOCK(p); 1567 1568 c = --p->ref_count; 1569 1570 if (c == 0) { 1571 /* 1572 * If some cpu is not using the physical pmap pointer that it 1573 * is supposed to be (see set_dirbase), we might be using the 1574 * pmap that is being destroyed! Make sure we are 1575 * physically on the right pmap: 1576 */ 1577 PMAP_UPDATE_TLBS(p, 1578 0x0ULL, 1579 0xFFFFFFFFFFFFF000ULL); 1580 } 1581 1582 PMAP_UNLOCK(p); 1583 1584 if (c != 0) { 1585 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END, 1586 (int) p, 1, 0, 0, 0); 1587 return; /* still in use */ 1588 } 1589 1590 /* 1591 * Free the memory maps, then the 1592 * pmap structure. 1593 */ 1594 if (!cpu_64bit) { 1595 OSAddAtomic(-p->pm_obj->resident_page_count, &inuse_ptepages_count); 1596 PMAP_ZINFO_PFREE(p, p->pm_obj->resident_page_count * PAGE_SIZE); 1597 1598 kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD); 1599 PMAP_ZINFO_SFREE(p, NBPTD); 1600 1601 zfree(pdpt_zone, (void *)p->pm_hold); 1602 1603 vm_object_deallocate(p->pm_obj); 1604 } else { 1605 /* 64 bit */ 1606 int inuse_ptepages = 0; 1607 1608 /* free 64 bit mode structs */ 1609 kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE); 1610 PMAP_ZINFO_SFREE(p, PAGE_SIZE); 1611 1612 inuse_ptepages += p->pm_obj_pml4->resident_page_count; 1613 vm_object_deallocate(p->pm_obj_pml4); 1614 1615 inuse_ptepages += p->pm_obj_pdpt->resident_page_count; 1616 vm_object_deallocate(p->pm_obj_pdpt); 1617 1618 inuse_ptepages += p->pm_obj->resident_page_count; 1619 vm_object_deallocate(p->pm_obj); 1620 1621 OSAddAtomic(-(inuse_ptepages+1), &inuse_ptepages_count); 1622 PMAP_ZINFO_PFREE(p, inuse_ptepages * PAGE_SIZE); 1623 } 1624 ledger_dereference(p->ledger); 1625 1626 zfree(pmap_zone, p); 1627 1628 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END, 1629 0, 0, 0, 0, 0); 1630 1631} 1632 1633/* 1634 * Add a reference to the specified pmap. 1635 */ 1636 1637void 1638pmap_reference( 1639 register pmap_t p) 1640{ 1641 1642 if (p != PMAP_NULL) { 1643 PMAP_LOCK(p); 1644 p->ref_count++; 1645 PMAP_UNLOCK(p);; 1646 } 1647} 1648 1649/* 1650 * Remove phys addr if mapped in specified map 1651 * 1652 */ 1653void 1654pmap_remove_some_phys( 1655 __unused pmap_t map, 1656 __unused ppnum_t pn) 1657{ 1658 1659/* Implement to support working set code */ 1660 1661} 1662 1663/* 1664 * Set the physical protection on the 1665 * specified range of this map as requested. 1666 * Will not increase permissions. 1667 */ 1668void 1669pmap_protect( 1670 pmap_t map, 1671 vm_map_offset_t sva, 1672 vm_map_offset_t eva, 1673 vm_prot_t prot) 1674{ 1675 register pt_entry_t *pde; 1676 register pt_entry_t *spte, *epte; 1677 vm_map_offset_t lva; 1678 vm_map_offset_t orig_sva; 1679 boolean_t set_NX; 1680 int num_found = 0; 1681 1682 pmap_intr_assert(); 1683 1684 if (map == PMAP_NULL) 1685 return; 1686 1687 if (prot == VM_PROT_NONE) { 1688 pmap_remove(map, sva, eva); 1689 return; 1690 } 1691 1692 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START, 1693 (int) map, 1694 (int) (sva>>32), (int) sva, 1695 (int) (eva>>32), (int) eva); 1696 1697 if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled ) 1698 set_NX = FALSE; 1699 else 1700 set_NX = TRUE; 1701 1702 PMAP_LOCK(map); 1703 1704 orig_sva = sva; 1705 while (sva < eva) { 1706 lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1); 1707 if (lva > eva) 1708 lva = eva; 1709 pde = pmap_pde(map, sva); 1710 if (pde && (*pde & INTEL_PTE_VALID)) { 1711 spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1))); 1712 spte = &spte[ptenum(sva)]; 1713 epte = &spte[intel_btop(lva-sva)]; 1714 1715 while (spte < epte) { 1716 1717 if (*spte & INTEL_PTE_VALID) { 1718 if (prot & VM_PROT_WRITE) 1719 pmap_update_pte(spte, 0, INTEL_PTE_WRITE); 1720 else 1721 pmap_update_pte(spte, INTEL_PTE_WRITE, 0); 1722 1723 if (set_NX == TRUE) 1724 pmap_update_pte(spte,0, INTEL_PTE_NX); 1725 else 1726 pmap_update_pte(spte, INTEL_PTE_NX, 0); 1727 1728 num_found++; 1729 } 1730 spte++; 1731 } 1732 } 1733 sva = lva; 1734 } 1735 if (num_found) 1736 { 1737 PMAP_UPDATE_TLBS(map, orig_sva, eva); 1738 } 1739 1740 PMAP_UNLOCK(map); 1741 1742 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END, 1743 0, 0, 0, 0, 0); 1744 1745} 1746 1747/* Map a (possibly) autogenned block */ 1748void 1749pmap_map_block( 1750 pmap_t pmap, 1751 addr64_t va, 1752 ppnum_t pa, 1753 uint32_t size, 1754 vm_prot_t prot, 1755 int attr, 1756 __unused unsigned int flags) 1757{ 1758 uint32_t page; 1759 1760 for (page = 0; page < size; page++) { 1761 pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE); 1762 va += PAGE_SIZE; 1763 pa++; 1764 } 1765} 1766 1767/* 1768 * Routine: pmap_extract 1769 * Function: 1770 * Extract the physical page address associated 1771 * with the given map/virtual_address pair. 1772 * Change to shim for backwards compatibility but will not 1773 * work for 64 bit systems. Some old drivers that we cannot 1774 * change need this. 1775 */ 1776 1777vm_offset_t 1778pmap_extract( 1779 register pmap_t pmap, 1780 vm_map_offset_t vaddr) 1781{ 1782 ppnum_t ppn; 1783 vm_offset_t paddr; 1784 1785 paddr = (vm_offset_t)0; 1786 ppn = pmap_find_phys(pmap, vaddr); 1787 1788 if (ppn) { 1789 paddr = ((vm_offset_t)i386_ptob(ppn)) | ((vm_offset_t)vaddr & INTEL_OFFMASK); 1790 } 1791 return (paddr); 1792} 1793 1794kern_return_t 1795pmap_expand_pml4( 1796 pmap_t map, 1797 vm_map_offset_t vaddr, 1798 __unused unsigned int options) 1799{ 1800 register vm_page_t m; 1801 register pmap_paddr_t pa; 1802 uint64_t i; 1803 spl_t spl; 1804 ppnum_t pn; 1805 pml4_entry_t *pml4p; 1806 1807 if (kernel_pmap == map) panic("expand kernel pml4"); 1808 1809 spl = splhigh(); 1810 pml4p = pmap64_pml4(map, vaddr); 1811 splx(spl); 1812 if (PML4_ENTRY_NULL == pml4p) panic("pmap_expand_pml4 no pml4p"); 1813 1814 /* 1815 * Allocate a VM page for the pml4 page 1816 */ 1817 while ((m = vm_page_grab()) == VM_PAGE_NULL) 1818 VM_PAGE_WAIT(); 1819 1820 /* 1821 * put the page into the pmap's obj list so it 1822 * can be found later. 1823 */ 1824 pn = m->phys_page; 1825 pa = i386_ptob(pn); 1826 i = pml4idx(map, vaddr); 1827 1828 /* 1829 * Zero the page. 1830 */ 1831 pmap_zero_page(pn); 1832 1833 vm_page_lockspin_queues(); 1834 vm_page_wire(m); 1835 vm_page_unlock_queues(); 1836 1837 OSAddAtomic(1, &inuse_ptepages_count); 1838 OSAddAtomic64(1, &alloc_ptepages_count); 1839 PMAP_ZINFO_PALLOC(map, PAGE_SIZE); 1840 1841 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ 1842 vm_object_lock(map->pm_obj_pml4); 1843 1844 PMAP_LOCK(map); 1845 /* 1846 * See if someone else expanded us first 1847 */ 1848 if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) { 1849 PMAP_UNLOCK(map); 1850 vm_object_unlock(map->pm_obj_pml4); 1851 1852 VM_PAGE_FREE(m); 1853 1854 OSAddAtomic(-1, &inuse_ptepages_count); 1855 PMAP_ZINFO_PFREE(map, PAGE_SIZE); 1856 return KERN_SUCCESS; 1857 } 1858 pmap_set_noencrypt(pn); 1859 1860#if 0 /* DEBUG */ 1861 if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) { 1862 panic("pmap_expand_pml4: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", 1863 map, map->pm_obj_pml4, vaddr, i); 1864 } 1865#endif 1866 vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i); 1867 vm_object_unlock(map->pm_obj_pml4); 1868 1869 /* 1870 * Set the page directory entry for this page table. 1871 */ 1872 pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */ 1873 1874 pmap_store_pte(pml4p, pa_to_pte(pa) 1875 | INTEL_PTE_VALID 1876 | INTEL_PTE_USER 1877 | INTEL_PTE_WRITE); 1878 1879 PMAP_UNLOCK(map); 1880 1881 return KERN_SUCCESS; 1882} 1883 1884kern_return_t 1885pmap_expand_pdpt(pmap_t map, vm_map_offset_t vaddr, __unused unsigned int options) 1886{ 1887 register vm_page_t m; 1888 register pmap_paddr_t pa; 1889 uint64_t i; 1890 spl_t spl; 1891 ppnum_t pn; 1892 pdpt_entry_t *pdptp; 1893 1894 if (kernel_pmap == map) panic("expand kernel pdpt"); 1895 1896 spl = splhigh(); 1897 while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) { 1898 splx(spl); 1899 pmap_expand_pml4(map, vaddr, PMAP_EXPAND_OPTIONS_NONE); /* need room for another pdpt entry */ 1900 spl = splhigh(); 1901 } 1902 splx(spl); 1903 1904 /* 1905 * Allocate a VM page for the pdpt page 1906 */ 1907 while ((m = vm_page_grab()) == VM_PAGE_NULL) 1908 VM_PAGE_WAIT(); 1909 1910 /* 1911 * put the page into the pmap's obj list so it 1912 * can be found later. 1913 */ 1914 pn = m->phys_page; 1915 pa = i386_ptob(pn); 1916 i = pdptidx(map, vaddr); 1917 1918 /* 1919 * Zero the page. 1920 */ 1921 pmap_zero_page(pn); 1922 1923 vm_page_lockspin_queues(); 1924 vm_page_wire(m); 1925 vm_page_unlock_queues(); 1926 1927 OSAddAtomic(1, &inuse_ptepages_count); 1928 OSAddAtomic64(1, &alloc_ptepages_count); 1929 PMAP_ZINFO_PALLOC(map, PAGE_SIZE); 1930 1931 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ 1932 vm_object_lock(map->pm_obj_pdpt); 1933 1934 PMAP_LOCK(map); 1935 /* 1936 * See if someone else expanded us first 1937 */ 1938 if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) { 1939 PMAP_UNLOCK(map); 1940 vm_object_unlock(map->pm_obj_pdpt); 1941 1942 VM_PAGE_FREE(m); 1943 1944 OSAddAtomic(-1, &inuse_ptepages_count); 1945 PMAP_ZINFO_PFREE(map, PAGE_SIZE); 1946 return KERN_SUCCESS; 1947 } 1948 pmap_set_noencrypt(pn); 1949 1950#if 0 /* DEBUG */ 1951 if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) { 1952 panic("pmap_expand_pdpt: obj not empty, pmap %p pm_obj %p vaddr 0x%llx i 0x%llx\n", 1953 map, map->pm_obj_pdpt, vaddr, i); 1954 } 1955#endif 1956 vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i); 1957 vm_object_unlock(map->pm_obj_pdpt); 1958 1959 /* 1960 * Set the page directory entry for this page table. 1961 */ 1962 pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */ 1963 1964 pmap_store_pte(pdptp, pa_to_pte(pa) 1965 | INTEL_PTE_VALID 1966 | INTEL_PTE_USER 1967 | INTEL_PTE_WRITE); 1968 1969 PMAP_UNLOCK(map); 1970 1971 return KERN_SUCCESS; 1972} 1973 1974 1975 1976/* 1977 * Routine: pmap_expand 1978 * 1979 * Expands a pmap to be able to map the specified virtual address. 1980 * 1981 * Allocates new virtual memory for the P0 or P1 portion of the 1982 * pmap, then re-maps the physical pages that were in the old 1983 * pmap to be in the new pmap. 1984 * 1985 * Must be called with the pmap system and the pmap unlocked, 1986 * since these must be unlocked to use vm_allocate or vm_deallocate. 1987 * Thus it must be called in a loop that checks whether the map 1988 * has been expanded enough. 1989 * (We won't loop forever, since page tables aren't shrunk.) 1990 */ 1991kern_return_t 1992pmap_expand( 1993 pmap_t map, 1994 vm_map_offset_t vaddr, 1995 __unused unsigned int options) 1996{ 1997 pt_entry_t *pdp; 1998 register vm_page_t m; 1999 register pmap_paddr_t pa; 2000 uint64_t i; 2001 spl_t spl; 2002 ppnum_t pn; 2003 2004 /* 2005 * if not the kernel map (while we are still compat kernel mode) 2006 * and we are 64 bit, propagate expand upwards 2007 */ 2008 2009 if (cpu_64bit && (map != kernel_pmap)) { 2010 spl = splhigh(); 2011 while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) { 2012 splx(spl); 2013 pmap_expand_pdpt(map, vaddr, PMAP_EXPAND_OPTIONS_NONE); /* need room for another pde entry */ 2014 spl = splhigh(); 2015 } 2016 splx(spl); 2017 } 2018 2019 /* 2020 * Allocate a VM page for the pde entries. 2021 */ 2022 while ((m = vm_page_grab()) == VM_PAGE_NULL) 2023 VM_PAGE_WAIT(); 2024 2025 /* 2026 * put the page into the pmap's obj list so it 2027 * can be found later. 2028 */ 2029 pn = m->phys_page; 2030 pa = i386_ptob(pn); 2031 i = pdeidx(map, vaddr); 2032 2033 /* 2034 * Zero the page. 2035 */ 2036 pmap_zero_page(pn); 2037 2038 vm_page_lockspin_queues(); 2039 vm_page_wire(m); 2040 vm_page_unlock_queues(); 2041 2042 OSAddAtomic(1, &inuse_ptepages_count); 2043 OSAddAtomic64(1, &alloc_ptepages_count); 2044 PMAP_ZINFO_PALLOC(map, PAGE_SIZE); 2045 2046 /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ 2047 vm_object_lock(map->pm_obj); 2048 2049 PMAP_LOCK(map); 2050 /* 2051 * See if someone else expanded us first 2052 */ 2053 2054 if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) { 2055 PMAP_UNLOCK(map); 2056 vm_object_unlock(map->pm_obj); 2057 2058 VM_PAGE_FREE(m); 2059 2060 OSAddAtomic(-1, &inuse_ptepages_count); 2061 PMAP_ZINFO_PFREE(map, PAGE_SIZE); 2062 return KERN_SUCCESS; 2063 } 2064 pmap_set_noencrypt(pn); 2065 2066#if 0 /* DEBUG */ 2067 if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) { 2068 panic("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n", 2069 map, map->pm_obj, vaddr, i); 2070 } 2071#endif 2072 vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i); 2073 vm_object_unlock(map->pm_obj); 2074 2075 /* 2076 * refetch while locked 2077 */ 2078 2079 pdp = pmap_pde(map, vaddr); 2080 2081 /* 2082 * Set the page directory entry for this page table. 2083 */ 2084 pmap_store_pte(pdp, pa_to_pte(pa) 2085 | INTEL_PTE_VALID 2086 | INTEL_PTE_USER 2087 | INTEL_PTE_WRITE); 2088 2089 PMAP_UNLOCK(map); 2090 2091 return KERN_SUCCESS; 2092} 2093 2094 2095/* 2096 * pmap_sync_page_data_phys(ppnum_t pa) 2097 * 2098 * Invalidates all of the instruction cache on a physical page and 2099 * pushes any dirty data from the data cache for the same physical page 2100 * Not required in i386. 2101 */ 2102void 2103pmap_sync_page_data_phys(__unused ppnum_t pa) 2104{ 2105 return; 2106} 2107 2108/* 2109 * pmap_sync_page_attributes_phys(ppnum_t pa) 2110 * 2111 * Write back and invalidate all cachelines on a physical page. 2112 */ 2113void 2114pmap_sync_page_attributes_phys(ppnum_t pa) 2115{ 2116 cache_flush_page_phys(pa); 2117} 2118 2119 2120 2121#ifdef CURRENTLY_UNUSED_AND_UNTESTED 2122 2123int collect_ref; 2124int collect_unref; 2125 2126/* 2127 * Routine: pmap_collect 2128 * Function: 2129 * Garbage collects the physical map system for 2130 * pages which are no longer used. 2131 * Success need not be guaranteed -- that is, there 2132 * may well be pages which are not referenced, but 2133 * others may be collected. 2134 * Usage: 2135 * Called by the pageout daemon when pages are scarce. 2136 */ 2137void 2138pmap_collect( 2139 pmap_t p) 2140{ 2141 register pt_entry_t *pdp, *ptp; 2142 pt_entry_t *eptp; 2143 int wired; 2144 2145 if (p == PMAP_NULL) 2146 return; 2147 2148 if (p == kernel_pmap) 2149 return; 2150 2151 /* 2152 * Garbage collect map. 2153 */ 2154 PMAP_LOCK(p); 2155 2156 for (pdp = (pt_entry_t *)p->dirbase; 2157 pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]; 2158 pdp++) 2159 { 2160 if (*pdp & INTEL_PTE_VALID) { 2161 if(*pdp & INTEL_PTE_REF) { 2162 pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF); 2163 collect_ref++; 2164 } else { 2165 collect_unref++; 2166 ptp = pmap_pte(p, pdetova(pdp - (pt_entry_t *)p->dirbase)); 2167 eptp = ptp + NPTEPG; 2168 2169 /* 2170 * If the pte page has any wired mappings, we cannot 2171 * free it. 2172 */ 2173 wired = 0; 2174 { 2175 register pt_entry_t *ptep; 2176 for (ptep = ptp; ptep < eptp; ptep++) { 2177 if (iswired(*ptep)) { 2178 wired = 1; 2179 break; 2180 } 2181 } 2182 } 2183 if (!wired) { 2184 /* 2185 * Remove the virtual addresses mapped by this pte page. 2186 */ 2187 pmap_remove_range(p, 2188 pdetova(pdp - (pt_entry_t *)p->dirbase), 2189 ptp, 2190 eptp); 2191 2192 /* 2193 * Invalidate the page directory pointer. 2194 */ 2195 pmap_store_pte(pdp, 0x0); 2196 2197 PMAP_UNLOCK(p); 2198 2199 /* 2200 * And free the pte page itself. 2201 */ 2202 { 2203 register vm_page_t m; 2204 2205 vm_object_lock(p->pm_obj); 2206 2207 m = vm_page_lookup(p->pm_obj,(vm_object_offset_t)(pdp - (pt_entry_t *)&p->dirbase[0])); 2208 if (m == VM_PAGE_NULL) 2209 panic("pmap_collect: pte page not in object"); 2210 2211 vm_object_unlock(p->pm_obj); 2212 2213 VM_PAGE_FREE(m); 2214 2215 OSAddAtomic(-1, &inuse_ptepages_count); 2216 PMAP_ZINFO_PFREE(map, PAGE_SIZE); 2217 } 2218 2219 PMAP_LOCK(p); 2220 } 2221 } 2222 } 2223 } 2224 2225 PMAP_UPDATE_TLBS(p, 0x0, 0xFFFFFFFFFFFFF000ULL); 2226 PMAP_UNLOCK(p); 2227 return; 2228 2229} 2230#endif 2231 2232 2233void 2234pmap_copy_page(ppnum_t src, ppnum_t dst) 2235{ 2236 bcopy_phys((addr64_t)i386_ptob(src), 2237 (addr64_t)i386_ptob(dst), 2238 PAGE_SIZE); 2239} 2240 2241 2242/* 2243 * Routine: pmap_pageable 2244 * Function: 2245 * Make the specified pages (by pmap, offset) 2246 * pageable (or not) as requested. 2247 * 2248 * A page which is not pageable may not take 2249 * a fault; therefore, its page table entry 2250 * must remain valid for the duration. 2251 * 2252 * This routine is merely advisory; pmap_enter 2253 * will specify that these pages are to be wired 2254 * down (or not) as appropriate. 2255 */ 2256void 2257pmap_pageable( 2258 __unused pmap_t pmap, 2259 __unused vm_map_offset_t start_addr, 2260 __unused vm_map_offset_t end_addr, 2261 __unused boolean_t pageable) 2262{ 2263#ifdef lint 2264 pmap++; start_addr++; end_addr++; pageable++; 2265#endif /* lint */ 2266} 2267 2268void 2269invalidate_icache(__unused vm_offset_t addr, 2270 __unused unsigned cnt, 2271 __unused int phys) 2272{ 2273 return; 2274} 2275void 2276flush_dcache(__unused vm_offset_t addr, 2277 __unused unsigned count, 2278 __unused int phys) 2279{ 2280 return; 2281} 2282 2283#if CONFIG_DTRACE 2284/* 2285 * Constrain DTrace copyin/copyout actions 2286 */ 2287extern kern_return_t dtrace_copyio_preflight(addr64_t); 2288extern kern_return_t dtrace_copyio_postflight(addr64_t); 2289 2290kern_return_t dtrace_copyio_preflight(__unused addr64_t va) 2291{ 2292 thread_t thread = current_thread(); 2293 2294 if (current_map() == kernel_map) 2295 return KERN_FAILURE; 2296 else if (thread->machine.specFlags & CopyIOActive) 2297 return KERN_FAILURE; 2298 else 2299 return KERN_SUCCESS; 2300} 2301 2302kern_return_t dtrace_copyio_postflight(__unused addr64_t va) 2303{ 2304 return KERN_SUCCESS; 2305} 2306#endif /* CONFIG_DTRACE */ 2307 2308#include <mach_vm_debug.h> 2309#if MACH_VM_DEBUG 2310#include <vm/vm_debug.h> 2311 2312int 2313pmap_list_resident_pages( 2314 __unused pmap_t pmap, 2315 __unused vm_offset_t *listp, 2316 __unused int space) 2317{ 2318 return 0; 2319} 2320#endif /* MACH_VM_DEBUG */ 2321 2322 2323 2324/* temporary workaround */ 2325boolean_t 2326coredumpok(__unused vm_map_t map, __unused vm_offset_t va) 2327{ 2328#if 0 2329 pt_entry_t *ptep; 2330 2331 ptep = pmap_pte(map->pmap, va); 2332 if (0 == ptep) 2333 return FALSE; 2334 return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)); 2335#else 2336 return TRUE; 2337#endif 2338} 2339 2340 2341boolean_t 2342phys_page_exists( 2343 ppnum_t pn) 2344{ 2345 assert(pn != vm_page_fictitious_addr); 2346 2347 if (!pmap_initialized) 2348 return (TRUE); 2349 2350 if (pn == vm_page_guard_addr) 2351 return FALSE; 2352 2353 if (!managed_page(ppn_to_pai(pn))) 2354 return (FALSE); 2355 2356 return TRUE; 2357} 2358 2359void 2360pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt) 2361{ 2362 int i; 2363 pt_entry_t *opte, *npte; 2364 pt_entry_t pte; 2365 spl_t s; 2366 2367 for (i = 0; i < cnt; i++) { 2368 s = splhigh(); 2369 opte = pmap_pte(kernel_pmap, (vm_map_offset_t)kernel_commpage); 2370 if (0 == opte) 2371 panic("kernel_commpage"); 2372 pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL; 2373 pte &= ~INTEL_PTE_WRITE; // ensure read only 2374 npte = pmap_pte(kernel_pmap, (vm_map_offset_t)user_commpage); 2375 if (0 == npte) 2376 panic("user_commpage"); 2377 pmap_store_pte(npte, pte); 2378 splx(s); 2379 kernel_commpage += INTEL_PGBYTES; 2380 user_commpage += INTEL_PGBYTES; 2381 } 2382} 2383 2384 2385#define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE) 2386pt_entry_t pmap_commpage64_ptes[PMAP_COMMPAGE64_CNT]; 2387 2388void 2389pmap_commpage64_init(vm_offset_t kernel_commpage, __unused vm_map_offset_t user_commpage, int cnt) 2390{ 2391 int i; 2392 pt_entry_t *kptep; 2393 2394 PMAP_LOCK(kernel_pmap); 2395 2396 for (i = 0; i < cnt; i++) { 2397 kptep = pmap_pte(kernel_pmap, (uint64_t)kernel_commpage + (i*PAGE_SIZE)); 2398 if ((0 == kptep) || (0 == (*kptep & INTEL_PTE_VALID))) 2399 panic("pmap_commpage64_init pte"); 2400 pmap_commpage64_ptes[i] = ((*kptep & ~INTEL_PTE_WRITE) | INTEL_PTE_USER); 2401 } 2402 PMAP_UNLOCK(kernel_pmap); 2403} 2404 2405 2406static cpu_pmap_t cpu_pmap_master; 2407 2408struct cpu_pmap * 2409pmap_cpu_alloc(boolean_t is_boot_cpu) 2410{ 2411 int ret; 2412 int i; 2413 cpu_pmap_t *cp; 2414 vm_offset_t address; 2415 vm_map_address_t mapaddr; 2416 vm_map_entry_t entry; 2417 pt_entry_t *pte; 2418 2419 if (is_boot_cpu) { 2420 cp = &cpu_pmap_master; 2421 } else { 2422 /* 2423 * The per-cpu pmap data structure itself. 2424 */ 2425 ret = kmem_alloc(kernel_map, 2426 (vm_offset_t *) &cp, sizeof(cpu_pmap_t)); 2427 if (ret != KERN_SUCCESS) { 2428 printf("pmap_cpu_alloc() failed ret=%d\n", ret); 2429 return NULL; 2430 } 2431 bzero((void *)cp, sizeof(cpu_pmap_t)); 2432 2433 /* 2434 * The temporary windows used for copy/zero - see loose_ends.c 2435 */ 2436 ret = vm_map_find_space(kernel_map, 2437 &mapaddr, PMAP_NWINDOWS*PAGE_SIZE, (vm_map_offset_t)0, 0, &entry); 2438 if (ret != KERN_SUCCESS) { 2439 printf("pmap_cpu_alloc() " 2440 "vm_map_find_space ret=%d\n", ret); 2441 pmap_cpu_free(cp); 2442 return NULL; 2443 } 2444 address = (vm_offset_t)mapaddr; 2445 2446 for (i = 0; i < PMAP_NWINDOWS; i++, address += PAGE_SIZE) { 2447 spl_t s; 2448 s = splhigh(); 2449 while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0) 2450 pmap_expand(kernel_pmap, (vm_map_offset_t)address, PMAP_EXPAND_OPTIONS_NONE); 2451 * (int *) pte = 0; 2452 cp->mapwindow[i].prv_CADDR = (caddr_t) address; 2453 cp->mapwindow[i].prv_CMAP = pte; 2454 splx(s); 2455 } 2456 vm_map_unlock(kernel_map); 2457 } 2458 2459 cp->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW; 2460 cp->pde_window_index = PMAP_PDE_FIRST_WINDOW; 2461 cp->pte_window_index = PMAP_PTE_FIRST_WINDOW; 2462 2463 return cp; 2464} 2465 2466void 2467pmap_cpu_free(struct cpu_pmap *cp) 2468{ 2469 if (cp != NULL && cp != &cpu_pmap_master) { 2470 kfree((void *) cp, sizeof(cpu_pmap_t)); 2471 } 2472} 2473 2474mapwindow_t * 2475pmap_get_mapwindow(pt_entry_t pentry) 2476{ 2477 mapwindow_t *mp; 2478 int i; 2479 2480 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 2481 /* fold in cache attributes for this physical page */ 2482 pentry |= pmap_get_cache_attributes(i386_btop(pte_to_pa(pentry))); 2483 /* 2484 * Note: 0th map reserved for pmap_pte() 2485 */ 2486 for (i = PMAP_NWINDOWS_FIRSTFREE; i < PMAP_NWINDOWS; i++) { 2487 mp = ¤t_cpu_datap()->cpu_pmap->mapwindow[i]; 2488 2489 if (*mp->prv_CMAP == 0) { 2490 pmap_store_pte(mp->prv_CMAP, pentry); 2491 2492 invlpg((uintptr_t)mp->prv_CADDR); 2493 2494 return (mp); 2495 } 2496 } 2497 panic("pmap_get_mapwindow: no windows available"); 2498 2499 return NULL; 2500} 2501 2502 2503void 2504pmap_put_mapwindow(mapwindow_t *mp) 2505{ 2506 pmap_store_pte(mp->prv_CMAP, 0); 2507} 2508 2509void 2510pmap_switch(pmap_t tpmap) 2511{ 2512 spl_t s; 2513 2514 s = splhigh(); /* Make sure interruptions are disabled */ 2515 2516 set_dirbase(tpmap, current_thread()); 2517 2518 splx(s); 2519} 2520 2521 2522/* 2523 * disable no-execute capability on 2524 * the specified pmap 2525 */ 2526void pmap_disable_NX(pmap_t pmap) { 2527 2528 pmap->nx_enabled = 0; 2529} 2530 2531void 2532pt_fake_zone_init(int zone_index) 2533{ 2534 pt_fake_zone_index = zone_index; 2535} 2536 2537void 2538pt_fake_zone_info(int *count, 2539 vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size, 2540 uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct) 2541{ 2542 *count = inuse_ptepages_count; 2543 *cur_size = PAGE_SIZE * inuse_ptepages_count; 2544 *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count); 2545 *elem_size = PAGE_SIZE; 2546 *alloc_size = PAGE_SIZE; 2547 *sum_size = alloc_ptepages_count * PAGE_SIZE; 2548 2549 *collectable = 1; 2550 *exhaustable = 0; 2551 *caller_acct = 1; 2552} 2553 2554vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e) 2555{ 2556 enum high_fixed_addresses a; 2557 a = e + HIGH_CPU_END * cpu; 2558 return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a); 2559} 2560 2561vm_offset_t pmap_high_map_vaddr(enum high_cpu_types e) 2562{ 2563 return pmap_cpu_high_map_vaddr(cpu_number(), e); 2564} 2565 2566vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e) 2567{ 2568 enum high_fixed_addresses a; 2569 vm_offset_t vaddr; 2570 2571 a = e + HIGH_CPU_END * cpu_number(); 2572 vaddr = (vm_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a); 2573 pmap_store_pte(pte_unique_base + a, pte); 2574 2575 /* TLB flush for this page for this cpu */ 2576 invlpg((uintptr_t)vaddr); 2577 2578 return vaddr; 2579} 2580 2581static inline void 2582pmap_cpuset_NMIPI(cpu_set cpu_mask) { 2583 unsigned int cpu, cpu_bit; 2584 uint64_t deadline; 2585 2586 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 2587 if (cpu_mask & cpu_bit) 2588 cpu_NMI_interrupt(cpu); 2589 } 2590 deadline = mach_absolute_time() + (((uint64_t)LockTimeOut) * 3); 2591 while (mach_absolute_time() < deadline) 2592 cpu_pause(); 2593} 2594 2595/* 2596 * Called with pmap locked, we: 2597 * - scan through per-cpu data to see which other cpus need to flush 2598 * - send an IPI to each non-idle cpu to be flushed 2599 * - wait for all to signal back that they are inactive or we see that 2600 * they are in an interrupt handler or at a safe point 2601 * - flush the local tlb is active for this pmap 2602 * - return ... the caller will unlock the pmap 2603 */ 2604void 2605pmap_flush_tlbs(pmap_t pmap, vm_map_offset_t startv, vm_map_offset_t endv) 2606{ 2607 unsigned int cpu; 2608 unsigned int cpu_bit; 2609 cpu_set cpus_to_signal; 2610 unsigned int my_cpu = cpu_number(); 2611 pmap_paddr_t pmap_cr3 = pmap->pm_cr3; 2612 boolean_t flush_self = FALSE; 2613 uint64_t deadline; 2614 2615 assert((processor_avail_count < 2) || 2616 (ml_get_interrupts_enabled() && get_preemption_level() != 0)); 2617 2618 /* 2619 * Scan other cpus for matching active or task CR3. 2620 * For idle cpus (with no active map) we mark them invalid but 2621 * don't signal -- they'll check as they go busy. 2622 * Note: for the kernel pmap we look for 64-bit shared address maps. 2623 */ 2624 cpus_to_signal = 0; 2625 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 2626 if (!cpu_datap(cpu)->cpu_running) 2627 continue; 2628 if ((cpu_datap(cpu)->cpu_task_cr3 == pmap_cr3) || 2629 (CPU_GET_ACTIVE_CR3(cpu) == pmap_cr3) || 2630 (pmap->pm_shared) || 2631 ((pmap == kernel_pmap) && 2632 (!CPU_CR3_IS_ACTIVE(cpu) || 2633 cpu_datap(cpu)->cpu_task_map == TASK_MAP_64BIT_SHARED))) { 2634 if (cpu == my_cpu) { 2635 flush_self = TRUE; 2636 continue; 2637 } 2638 cpu_datap(cpu)->cpu_tlb_invalid = TRUE; 2639 __asm__ volatile("mfence"); 2640 2641 if (CPU_CR3_IS_ACTIVE(cpu)) { 2642 cpus_to_signal |= cpu_bit; 2643 i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); 2644 } 2645 } 2646 } 2647 2648 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_START, 2649 (uintptr_t) pmap, cpus_to_signal, flush_self, startv, 0); 2650 2651 if (cpus_to_signal) { 2652 cpu_set cpus_to_respond = cpus_to_signal; 2653 2654 deadline = mach_absolute_time() + LockTimeOut; 2655 /* 2656 * Wait for those other cpus to acknowledge 2657 */ 2658 while (cpus_to_respond != 0) { 2659 long orig_acks = 0; 2660 2661 for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { 2662 if ((cpus_to_respond & cpu_bit) != 0) { 2663 if (!cpu_datap(cpu)->cpu_running || 2664 cpu_datap(cpu)->cpu_tlb_invalid == FALSE || 2665 !CPU_CR3_IS_ACTIVE(cpu)) { 2666 cpus_to_respond &= ~cpu_bit; 2667 } 2668 cpu_pause(); 2669 } 2670 if (cpus_to_respond == 0) 2671 break; 2672 } 2673 2674 if (cpus_to_respond && (mach_absolute_time() > deadline)) { 2675 if (machine_timeout_suspended()) 2676 continue; 2677 pmap_tlb_flush_timeout = TRUE; 2678 orig_acks = NMIPI_acks; 2679 pmap_cpuset_NMIPI(cpus_to_respond); 2680 2681 panic("TLB invalidation IPI timeout: " 2682 "CPU(s) failed to respond to interrupts, unresponsive CPU bitmap: 0x%lx, NMIPI acks: orig: 0x%lx, now: 0x%lx", 2683 cpus_to_respond, orig_acks, NMIPI_acks); 2684 } 2685 } 2686 } 2687 /* 2688 * Flush local tlb if required. 2689 * We need this flush even if the pmap being changed 2690 * is the user map... in case we do a copyin/out 2691 * before returning to user mode. 2692 */ 2693 if (flush_self) 2694 flush_tlb(); 2695 2696 if ((pmap == kernel_pmap) && (flush_self != TRUE)) { 2697 panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map); 2698 } 2699 2700 PMAP_TRACE_CONSTANT(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END, 2701 (uintptr_t) pmap, cpus_to_signal, startv, endv, 0); 2702} 2703 2704void 2705process_pmap_updates(void) 2706{ 2707 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 2708 2709 flush_tlb(); 2710 2711 current_cpu_datap()->cpu_tlb_invalid = FALSE; 2712 __asm__ volatile("mfence"); 2713} 2714 2715void 2716pmap_update_interrupt(void) 2717{ 2718 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_START, 2719 0, 0, 0, 0, 0); 2720 2721 process_pmap_updates(); 2722 2723 PMAP_TRACE(PMAP_CODE(PMAP__UPDATE_INTERRUPT) | DBG_FUNC_END, 2724 0, 0, 0, 0, 0); 2725} 2726#ifdef PMAP_DEBUG 2727void 2728pmap_dump(pmap_t p) 2729{ 2730 int i; 2731 2732 kprintf("pmap 0x%x\n",p); 2733 2734 kprintf(" pm_cr3 0x%llx\n",p->pm_cr3); 2735 kprintf(" pm_pml4 0x%x\n",p->pm_pml4); 2736 kprintf(" pm_pdpt 0x%x\n",p->pm_pdpt); 2737 2738 kprintf(" pml4[0] 0x%llx\n",*p->pm_pml4); 2739 for (i=0;i<8;i++) 2740 kprintf(" pdpt[%d] 0x%llx\n",i, p->pm_pdpt[i]); 2741} 2742 2743void pmap_dump_wrap(void) 2744{ 2745 pmap_dump(current_cpu_datap()->cpu_active_thread->task->map->pmap); 2746} 2747 2748void 2749dump_4GB_pdpt(pmap_t p) 2750{ 2751 int spl; 2752 pdpt_entry_t *user_pdptp; 2753 pdpt_entry_t *kern_pdptp; 2754 pdpt_entry_t *pml4p; 2755 2756 spl = splhigh(); 2757 while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) { 2758 splx(spl); 2759 pmap_expand_pml4(p, 0x0, PMAP_EXPAND_OPTIONS_NONE); 2760 spl = splhigh(); 2761 } 2762 kern_pdptp = kernel_pmap->pm_pdpt; 2763 if (kern_pdptp == NULL) 2764 panic("kern_pdptp == NULL"); 2765 kprintf("dump_4GB_pdpt(%p)\n" 2766 "kern_pdptp=%p (phys=0x%016llx)\n" 2767 "\t 0x%08x: 0x%016llx\n" 2768 "\t 0x%08x: 0x%016llx\n" 2769 "\t 0x%08x: 0x%016llx\n" 2770 "\t 0x%08x: 0x%016llx\n" 2771 "\t 0x%08x: 0x%016llx\n" 2772 "user_pdptp=%p (phys=0x%016llx)\n" 2773 "\t 0x%08x: 0x%016llx\n" 2774 "\t 0x%08x: 0x%016llx\n" 2775 "\t 0x%08x: 0x%016llx\n" 2776 "\t 0x%08x: 0x%016llx\n" 2777 "\t 0x%08x: 0x%016llx\n", 2778 p, kern_pdptp, kvtophys(kern_pdptp), 2779 kern_pdptp+0, *(kern_pdptp+0), 2780 kern_pdptp+1, *(kern_pdptp+1), 2781 kern_pdptp+2, *(kern_pdptp+2), 2782 kern_pdptp+3, *(kern_pdptp+3), 2783 kern_pdptp+4, *(kern_pdptp+4), 2784 user_pdptp, kvtophys(user_pdptp), 2785 user_pdptp+0, *(user_pdptp+0), 2786 user_pdptp+1, *(user_pdptp+1), 2787 user_pdptp+2, *(user_pdptp+2), 2788 user_pdptp+3, *(user_pdptp+3), 2789 user_pdptp+4, *(user_pdptp+4)); 2790 kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n", 2791 p->pm_cr3, p->pm_hold, p->pm_pml4); 2792 pml4p = (pdpt_entry_t *)p->pm_hold; 2793 if (pml4p == NULL) 2794 panic("user pml4p == NULL"); 2795 kprintf("\t 0x%08x: 0x%016llx\n" 2796 "\t 0x%08x: 0x%016llx\n", 2797 pml4p+0, *(pml4p), 2798 pml4p+KERNEL_UBER_PML4_INDEX, *(pml4p+KERNEL_UBER_PML4_INDEX)); 2799 kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n", 2800 kernel_pmap->pm_cr3, kernel_pmap->pm_hold, kernel_pmap->pm_pml4); 2801 pml4p = (pdpt_entry_t *)kernel_pmap->pm_hold; 2802 if (pml4p == NULL) 2803 panic("kern pml4p == NULL"); 2804 kprintf("\t 0x%08x: 0x%016llx\n" 2805 "\t 0x%08x: 0x%016llx\n", 2806 pml4p+0, *(pml4p), 2807 pml4p+511, *(pml4p+511)); 2808 splx(spl); 2809} 2810 2811void dump_4GB_pdpt_thread(thread_t tp) 2812{ 2813 dump_4GB_pdpt(tp->map->pmap); 2814} 2815 2816 2817#endif 2818