1/* 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28#include <vm/pmap.h> 29#include <vm/vm_map.h> 30#include <kern/ledger.h> 31#include <i386/pmap_internal.h> 32 33void pmap_remove_range( 34 pmap_t pmap, 35 vm_map_offset_t va, 36 pt_entry_t *spte, 37 pt_entry_t *epte); 38 39uint32_t pmap_update_clear_pte_count; 40 41/* 42 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time, 43 * on a NBPDE boundary. 44 */ 45 46/* These symbols may be referenced directly by VM */ 47uint64_t pmap_nesting_size_min = NBPDE; 48uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE; 49 50/* 51 * kern_return_t pmap_nest(grand, subord, va_start, size) 52 * 53 * grand = the pmap that we will nest subord into 54 * subord = the pmap that goes into the grand 55 * va_start = start of range in pmap to be inserted 56 * nstart = start of range in pmap nested pmap 57 * size = Size of nest area (up to 16TB) 58 * 59 * Inserts a pmap into another. This is used to implement shared segments. 60 * 61 * Note that we depend upon higher level VM locks to insure that things don't change while 62 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting 63 * or do 2 nests at once. 64 */ 65 66/* 67 * This routine can nest subtrees either at the PDPT level (1GiB) or at the 68 * PDE level (2MiB). We currently disallow disparate offsets for the "subord" 69 * container and the "grand" parent. A minor optimization to consider for the 70 * future: make the "subord" truly a container rather than a full-fledged 71 * pagetable hierarchy which can be unnecessarily sparse (DRK). 72 */ 73 74kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) { 75 vm_map_offset_t vaddr, nvaddr; 76 pd_entry_t *pde,*npde; 77 unsigned int i; 78 uint64_t num_pde; 79 80 if ((size & (pmap_nesting_size_min-1)) || 81 (va_start & (pmap_nesting_size_min-1)) || 82 (nstart & (pmap_nesting_size_min-1)) || 83 ((size >> 28) > 65536)) /* Max size we can nest is 16TB */ 84 return KERN_INVALID_VALUE; 85 86 if(size == 0) { 87 panic("pmap_nest: size is invalid - %016llX\n", size); 88 } 89 90 if (va_start != nstart) 91 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart); 92 93 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START, 94 (uintptr_t) grand, (uintptr_t) subord, 95 (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0); 96 97 nvaddr = (vm_map_offset_t)nstart; 98 num_pde = size >> PDESHIFT; 99 100 PMAP_LOCK(subord); 101 102 subord->pm_shared = TRUE; 103 104 for (i = 0; i < num_pde;) { 105 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) { 106 107 npde = pmap64_pdpt(subord, nvaddr); 108 109 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) { 110 PMAP_UNLOCK(subord); 111 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE); 112 PMAP_LOCK(subord); 113 npde = pmap64_pdpt(subord, nvaddr); 114 } 115 *npde |= INTEL_PDPTE_NESTED; 116 nvaddr += NBPDPT; 117 i += (uint32_t)NPDEPG; 118 } 119 else { 120 npde = pmap_pde(subord, nvaddr); 121 122 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) { 123 PMAP_UNLOCK(subord); 124 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE); 125 PMAP_LOCK(subord); 126 npde = pmap_pde(subord, nvaddr); 127 } 128 nvaddr += NBPDE; 129 i++; 130 } 131 } 132 133 PMAP_UNLOCK(subord); 134 135 vaddr = (vm_map_offset_t)va_start; 136 137 PMAP_LOCK(grand); 138 139 for (i = 0;i < num_pde;) { 140 pd_entry_t tpde; 141 142 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) { 143 npde = pmap64_pdpt(subord, vaddr); 144 if (npde == 0) 145 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr); 146 tpde = *npde; 147 pde = pmap64_pdpt(grand, vaddr); 148 if (0 == pde) { 149 PMAP_UNLOCK(grand); 150 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE); 151 PMAP_LOCK(grand); 152 pde = pmap64_pdpt(grand, vaddr); 153 } 154 if (pde == 0) 155 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr); 156 pmap_store_pte(pde, tpde); 157 vaddr += NBPDPT; 158 i += (uint32_t) NPDEPG; 159 } 160 else { 161 npde = pmap_pde(subord, nstart); 162 if (npde == 0) 163 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart); 164 tpde = *npde; 165 nstart += NBPDE; 166 pde = pmap_pde(grand, vaddr); 167 if ((0 == pde) && cpu_64bit) { 168 PMAP_UNLOCK(grand); 169 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE); 170 PMAP_LOCK(grand); 171 pde = pmap_pde(grand, vaddr); 172 } 173 174 if (pde == 0) 175 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr); 176 vaddr += NBPDE; 177 pmap_store_pte(pde, tpde); 178 i++; 179 } 180 } 181 182 PMAP_UNLOCK(grand); 183 184 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0); 185 186 return KERN_SUCCESS; 187} 188 189/* 190 * kern_return_t pmap_unnest(grand, vaddr) 191 * 192 * grand = the pmap that we will un-nest subord from 193 * vaddr = start of range in pmap to be unnested 194 * 195 * Removes a pmap from another. This is used to implement shared segments. 196 */ 197 198kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) { 199 200 pd_entry_t *pde; 201 unsigned int i; 202 uint64_t num_pde; 203 addr64_t va_start, va_end; 204 uint64_t npdpt = PMAP_INVALID_PDPTNUM; 205 206 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START, 207 (uintptr_t) grand, 208 (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0); 209 210 if ((size & (pmap_nesting_size_min-1)) || 211 (vaddr & (pmap_nesting_size_min-1))) { 212 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n", 213 grand, vaddr, size); 214 } 215 216 /* align everything to PDE boundaries */ 217 va_start = vaddr & ~(NBPDE-1); 218 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1); 219 size = va_end - va_start; 220 221 PMAP_LOCK(grand); 222 223 num_pde = size >> PDESHIFT; 224 vaddr = va_start; 225 226 for (i = 0; i < num_pde; ) { 227 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) { 228 npdpt = pdptnum(grand, vaddr); 229 pde = pmap64_pdpt(grand, vaddr); 230 if (pde && (*pde & INTEL_PDPTE_NESTED)) { 231 pmap_store_pte(pde, (pd_entry_t)0); 232 i += (uint32_t) NPDEPG; 233 vaddr += NBPDPT; 234 continue; 235 } 236 } 237 pde = pmap_pde(grand, (vm_map_offset_t)vaddr); 238 if (pde == 0) 239 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr); 240 pmap_store_pte(pde, (pd_entry_t)0); 241 i++; 242 vaddr += NBPDE; 243 } 244 245 PMAP_UPDATE_TLBS(grand, va_start, va_end); 246 247 PMAP_UNLOCK(grand); 248 249 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0); 250 251 return KERN_SUCCESS; 252} 253 254/* Invoked by the Mach VM to determine the platform specific unnest region */ 255 256boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) { 257 pd_entry_t *pdpte; 258 boolean_t rval = FALSE; 259 260 if (!cpu_64bit) 261 return rval; 262 263 PMAP_LOCK(p); 264 265 pdpte = pmap64_pdpt(p, *s); 266 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) { 267 *s &= ~(NBPDPT -1); 268 rval = TRUE; 269 } 270 271 pdpte = pmap64_pdpt(p, *e); 272 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) { 273 *e = ((*e + NBPDPT) & ~(NBPDPT -1)); 274 rval = TRUE; 275 } 276 277 PMAP_UNLOCK(p); 278 279 return rval; 280} 281 282/* 283 * pmap_find_phys returns the (4K) physical page number containing a 284 * given virtual address in a given pmap. 285 * Note that pmap_pte may return a pde if this virtual address is 286 * mapped by a large page and this is taken into account in order 287 * to return the correct page number in this case. 288 */ 289ppnum_t 290pmap_find_phys(pmap_t pmap, addr64_t va) 291{ 292 pt_entry_t *ptp; 293 pd_entry_t *pdep; 294 ppnum_t ppn = 0; 295 pd_entry_t pde; 296 pt_entry_t pte; 297 298 mp_disable_preemption(); 299 300 /* This refcount test is a band-aid--several infrastructural changes 301 * are necessary to eliminate invocation of this routine from arbitrary 302 * contexts. 303 */ 304 305 if (!pmap->ref_count) 306 goto pfp_exit; 307 308 pdep = pmap_pde(pmap, va); 309 310 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) { 311 if (pde & INTEL_PTE_PS) { 312 ppn = (ppnum_t) i386_btop(pte_to_pa(pde)); 313 ppn += (ppnum_t) ptenum(va); 314 } 315 else { 316 ptp = pmap_pte(pmap, va); 317 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) { 318 ppn = (ppnum_t) i386_btop(pte_to_pa(pte)); 319 } 320 } 321 } 322pfp_exit: 323 mp_enable_preemption(); 324 325 return ppn; 326} 327 328/* 329 * Update cache attributes for all extant managed mappings. 330 * Assumes PV for this page is locked, and that the page 331 * is managed. 332 */ 333 334void 335pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) { 336 pv_rooted_entry_t pv_h, pv_e; 337 pv_hashed_entry_t pvh_e, nexth; 338 vm_map_offset_t vaddr; 339 pmap_t pmap; 340 pt_entry_t *ptep; 341 342 assert(IS_MANAGED_PAGE(pn)); 343 344 pv_h = pai_to_pvh(pn); 345 /* TODO: translate the PHYS_* bits to PTE bits, while they're 346 * currently identical, they may not remain so 347 * Potential optimization (here and in page_protect), 348 * parallel shootdowns, check for redundant 349 * attribute modifications. 350 */ 351 352 /* 353 * Alter attributes on all mappings 354 */ 355 if (pv_h->pmap != PMAP_NULL) { 356 pv_e = pv_h; 357 pvh_e = (pv_hashed_entry_t)pv_e; 358 359 do { 360 pmap = pv_e->pmap; 361 vaddr = pv_e->va; 362 ptep = pmap_pte(pmap, vaddr); 363 364 if (0 == ptep) 365 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap); 366 367 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink); 368 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes); 369 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 370 pvh_e = nexth; 371 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h); 372 } 373} 374 375void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) { 376 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 377 378 if (dofilter) { 379 CPU_CR3_MARK_INACTIVE(); 380 } else { 381 CPU_CR3_MARK_ACTIVE(); 382 __asm__ volatile("mfence"); 383 if (current_cpu_datap()->cpu_tlb_invalid) 384 process_pmap_updates(); 385 } 386} 387 388 389/* 390 * Insert the given physical page (p) at 391 * the specified virtual address (v) in the 392 * target physical map with the protection requested. 393 * 394 * If specified, the page will be wired down, meaning 395 * that the related pte cannot be reclaimed. 396 * 397 * NB: This is the only routine which MAY NOT lazy-evaluate 398 * or lose information. That is, this routine must actually 399 * insert this page into the given map NOW. 400 */ 401 402void 403pmap_enter( 404 register pmap_t pmap, 405 vm_map_offset_t vaddr, 406 ppnum_t pn, 407 vm_prot_t prot, 408 vm_prot_t fault_type, 409 unsigned int flags, 410 boolean_t wired) 411{ 412 (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE); 413} 414 415kern_return_t 416pmap_enter_options( 417 register pmap_t pmap, 418 vm_map_offset_t vaddr, 419 ppnum_t pn, 420 vm_prot_t prot, 421 __unused vm_prot_t fault_type, 422 unsigned int flags, 423 boolean_t wired, 424 unsigned int options) 425{ 426 pt_entry_t *pte; 427 pv_rooted_entry_t pv_h; 428 ppnum_t pai; 429 pv_hashed_entry_t pvh_e; 430 pv_hashed_entry_t pvh_new; 431 pt_entry_t template; 432 pmap_paddr_t old_pa; 433 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn); 434 boolean_t need_tlbflush = FALSE; 435 boolean_t set_NX; 436 char oattr; 437 boolean_t old_pa_locked; 438 /* 2MiB mappings are confined to x86_64 by VM */ 439 boolean_t superpage = flags & VM_MEM_SUPERPAGE; 440 vm_object_t delpage_pm_obj = NULL; 441 int delpage_pde_index = 0; 442 pt_entry_t old_pte; 443 kern_return_t kr_expand; 444 445 pmap_intr_assert(); 446 447 if (pmap == PMAP_NULL) 448 return KERN_INVALID_ARGUMENT; 449 450 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an 451 * unused value for that scenario. 452 */ 453 assert(pn != vm_page_fictitious_addr); 454 455 if (pn == vm_page_guard_addr) 456 return KERN_INVALID_ARGUMENT; 457 458 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START, 459 pmap, 460 (uint32_t) (vaddr >> 32), (uint32_t) vaddr, 461 pn, prot); 462 463 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled) 464 set_NX = FALSE; 465 else 466 set_NX = TRUE; 467 468 if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) { 469 set_NX = FALSE; 470 } 471 472 /* 473 * Must allocate a new pvlist entry while we're unlocked; 474 * zalloc may cause pageout (which will lock the pmap system). 475 * If we determine we need a pvlist entry, we will unlock 476 * and allocate one. Then we will retry, throughing away 477 * the allocated entry later (if we no longer need it). 478 */ 479 480 pvh_new = PV_HASHED_ENTRY_NULL; 481Retry: 482 pvh_e = PV_HASHED_ENTRY_NULL; 483 484 PMAP_LOCK(pmap); 485 486 /* 487 * Expand pmap to include this pte. Assume that 488 * pmap is always expanded to include enough hardware 489 * pages to map one VM page. 490 */ 491 if(superpage) { 492 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) { 493 /* need room for another pde entry */ 494 PMAP_UNLOCK(pmap); 495 kr_expand = pmap_expand_pdpt(pmap, vaddr, options); 496 if (kr_expand != KERN_SUCCESS) 497 return kr_expand; 498 PMAP_LOCK(pmap); 499 } 500 } else { 501 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) { 502 /* 503 * Must unlock to expand the pmap 504 * going to grow pde level page(s) 505 */ 506 PMAP_UNLOCK(pmap); 507 kr_expand = pmap_expand(pmap, vaddr, options); 508 if (kr_expand != KERN_SUCCESS) 509 return kr_expand; 510 PMAP_LOCK(pmap); 511 } 512 } 513 if (options & PMAP_EXPAND_OPTIONS_NOENTER) { 514 PMAP_UNLOCK(pmap); 515 return KERN_SUCCESS; 516 } 517 518 if (superpage && *pte && !(*pte & INTEL_PTE_PS)) { 519 /* 520 * There is still an empty page table mapped that 521 * was used for a previous base page mapping. 522 * Remember the PDE and the PDE index, so that we 523 * can free the page at the end of this function. 524 */ 525 delpage_pde_index = (int)pdeidx(pmap, vaddr); 526 delpage_pm_obj = pmap->pm_obj; 527 *pte = 0; 528 } 529 530 old_pa = pte_to_pa(*pte); 531 pai = pa_index(old_pa); 532 old_pa_locked = FALSE; 533 534 /* 535 * if we have a previous managed page, lock the pv entry now. after 536 * we lock it, check to see if someone beat us to the lock and if so 537 * drop the lock 538 */ 539 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) { 540 LOCK_PVH(pai); 541 old_pa_locked = TRUE; 542 old_pa = pte_to_pa(*pte); 543 if (0 == old_pa) { 544 UNLOCK_PVH(pai); /* another path beat us to it */ 545 old_pa_locked = FALSE; 546 } 547 } 548 549 /* 550 * Special case if the incoming physical page is already mapped 551 * at this address. 552 */ 553 if (old_pa == pa) { 554 pt_entry_t old_attributes = 555 *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD); 556 557 /* 558 * May be changing its wired attribute or protection 559 */ 560 561 template = pa_to_pte(pa) | INTEL_PTE_VALID; 562 template |= pmap_get_cache_attributes(pa_index(pa)); 563 564 if (VM_MEM_NOT_CACHEABLE == 565 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) { 566 if (!(flags & VM_MEM_GUARDED)) 567 template |= INTEL_PTE_PTA; 568 template |= INTEL_PTE_NCACHE; 569 } 570 if (pmap != kernel_pmap) 571 template |= INTEL_PTE_USER; 572 if (prot & VM_PROT_WRITE) 573 template |= INTEL_PTE_WRITE; 574 575 if (set_NX) 576 template |= INTEL_PTE_NX; 577 578 if (wired) { 579 template |= INTEL_PTE_WIRED; 580 if (!iswired(old_attributes)) { 581 OSAddAtomic(+1, &pmap->stats.wired_count); 582 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 583 } 584 } else { 585 if (iswired(old_attributes)) { 586 assert(pmap->stats.wired_count >= 1); 587 OSAddAtomic(-1, &pmap->stats.wired_count); 588 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 589 } 590 } 591 if (superpage) /* this path can not be used */ 592 template |= INTEL_PTE_PS; /* to change the page size! */ 593 /* Determine delta, PV locked */ 594 need_tlbflush = 595 ((old_attributes ^ template) != INTEL_PTE_WIRED); 596 597 /* store modified PTE and preserve RC bits */ 598 pt_entry_t npte, opte;; 599 do { 600 opte = *pte; 601 npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD)); 602 } while (!pmap_cmpx_pte(pte, opte, npte)); 603 if (old_pa_locked) { 604 UNLOCK_PVH(pai); 605 old_pa_locked = FALSE; 606 } 607 goto Done; 608 } 609 610 /* 611 * Outline of code from here: 612 * 1) If va was mapped, update TLBs, remove the mapping 613 * and remove old pvlist entry. 614 * 2) Add pvlist entry for new mapping 615 * 3) Enter new mapping. 616 * 617 * If the old physical page is not managed step 1) is skipped 618 * (except for updating the TLBs), and the mapping is 619 * overwritten at step 3). If the new physical page is not 620 * managed, step 2) is skipped. 621 */ 622 623 if (old_pa != (pmap_paddr_t) 0) { 624 625 /* 626 * Don't do anything to pages outside valid memory here. 627 * Instead convince the code that enters a new mapping 628 * to overwrite the old one. 629 */ 630 631 /* invalidate the PTE */ 632 pmap_update_pte(pte, INTEL_PTE_VALID, 0); 633 /* propagate invalidate everywhere */ 634 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 635 /* remember reference and change */ 636 old_pte = *pte; 637 oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED)); 638 /* completely invalidate the PTE */ 639 pmap_store_pte(pte, 0); 640 641 if (IS_MANAGED_PAGE(pai)) { 642 pmap_assert(old_pa_locked == TRUE); 643 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 644 assert(pmap->stats.resident_count >= 1); 645 OSAddAtomic(-1, &pmap->stats.resident_count); 646 if (iswired(*pte)) { 647 assert(pmap->stats.wired_count >= 1); 648 OSAddAtomic(-1, &pmap->stats.wired_count); 649 pmap_ledger_debit(pmap, task_ledgers.wired_mem, 650 PAGE_SIZE); 651 } 652 pmap_phys_attributes[pai] |= oattr; 653 654 /* 655 * Remove the mapping from the pvlist for 656 * this physical page. 657 * We'll end up with either a rooted pv or a 658 * hashed pv 659 */ 660 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte); 661 662 } else { 663 664 /* 665 * old_pa is not managed. 666 * Do removal part of accounting. 667 */ 668 669 if (iswired(*pte)) { 670 assert(pmap->stats.wired_count >= 1); 671 OSAddAtomic(-1, &pmap->stats.wired_count); 672 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 673 } 674 } 675 } 676 677 /* 678 * if we had a previously managed paged locked, unlock it now 679 */ 680 if (old_pa_locked) { 681 UNLOCK_PVH(pai); 682 old_pa_locked = FALSE; 683 } 684 685 pai = pa_index(pa); /* now working with new incoming phys page */ 686 if (IS_MANAGED_PAGE(pai)) { 687 688 /* 689 * Step 2) Enter the mapping in the PV list for this 690 * physical page. 691 */ 692 pv_h = pai_to_pvh(pai); 693 694 LOCK_PVH(pai); 695 696 if (pv_h->pmap == PMAP_NULL) { 697 /* 698 * No mappings yet, use rooted pv 699 */ 700 pv_h->va = vaddr; 701 pv_h->pmap = pmap; 702 queue_init(&pv_h->qlink); 703 } else { 704 /* 705 * Add new pv_hashed_entry after header. 706 */ 707 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) { 708 pvh_e = pvh_new; 709 pvh_new = PV_HASHED_ENTRY_NULL; 710 } else if (PV_HASHED_ENTRY_NULL == pvh_e) { 711 PV_HASHED_ALLOC(&pvh_e); 712 if (PV_HASHED_ENTRY_NULL == pvh_e) { 713 /* 714 * the pv list is empty. if we are on 715 * the kernel pmap we'll use one of 716 * the special private kernel pv_e's, 717 * else, we need to unlock 718 * everything, zalloc a pv_e, and 719 * restart bringing in the pv_e with 720 * us. 721 */ 722 if (kernel_pmap == pmap) { 723 PV_HASHED_KERN_ALLOC(&pvh_e); 724 } else { 725 UNLOCK_PVH(pai); 726 PMAP_UNLOCK(pmap); 727 pmap_pv_throttle(pmap); 728 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); 729 goto Retry; 730 } 731 } 732 } 733 734 if (PV_HASHED_ENTRY_NULL == pvh_e) 735 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings"); 736 737 pvh_e->va = vaddr; 738 pvh_e->pmap = pmap; 739 pvh_e->ppn = pn; 740 pv_hash_add(pvh_e, pv_h); 741 742 /* 743 * Remember that we used the pvlist entry. 744 */ 745 pvh_e = PV_HASHED_ENTRY_NULL; 746 } 747 748 /* 749 * only count the mapping 750 * for 'managed memory' 751 */ 752 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 753 OSAddAtomic(+1, &pmap->stats.resident_count); 754 if (pmap->stats.resident_count > pmap->stats.resident_max) { 755 pmap->stats.resident_max = pmap->stats.resident_count; 756 } 757 } else if (last_managed_page == 0) { 758 /* Account for early mappings created before "managed pages" 759 * are determined. Consider consulting the available DRAM map. 760 */ 761 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 762 OSAddAtomic(+1, &pmap->stats.resident_count); 763 } 764 /* 765 * Step 3) Enter the mapping. 766 * 767 * Build a template to speed up entering - 768 * only the pfn changes. 769 */ 770 template = pa_to_pte(pa) | INTEL_PTE_VALID; 771 /* 772 * DRK: It may be worth asserting on cache attribute flags that diverge 773 * from the existing physical page attributes. 774 */ 775 776 template |= pmap_get_cache_attributes(pa_index(pa)); 777 778 if (flags & VM_MEM_NOT_CACHEABLE) { 779 if (!(flags & VM_MEM_GUARDED)) 780 template |= INTEL_PTE_PTA; 781 template |= INTEL_PTE_NCACHE; 782 } 783 if (pmap != kernel_pmap) 784 template |= INTEL_PTE_USER; 785 if (prot & VM_PROT_WRITE) 786 template |= INTEL_PTE_WRITE; 787 if (set_NX) 788 template |= INTEL_PTE_NX; 789 if (wired) { 790 template |= INTEL_PTE_WIRED; 791 OSAddAtomic(+1, & pmap->stats.wired_count); 792 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 793 } 794 if (superpage) 795 template |= INTEL_PTE_PS; 796 pmap_store_pte(pte, template); 797 798 /* 799 * if this was a managed page we delayed unlocking the pv until here 800 * to prevent pmap_page_protect et al from finding it until the pte 801 * has been stored 802 */ 803 if (IS_MANAGED_PAGE(pai)) { 804 UNLOCK_PVH(pai); 805 } 806Done: 807 if (need_tlbflush == TRUE) 808 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 809 810 if (pvh_e != PV_HASHED_ENTRY_NULL) { 811 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1); 812 } 813 if (pvh_new != PV_HASHED_ENTRY_NULL) { 814 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1); 815 } 816 PMAP_UNLOCK(pmap); 817 818 if (delpage_pm_obj) { 819 vm_page_t m; 820 821 vm_object_lock(delpage_pm_obj); 822 m = vm_page_lookup(delpage_pm_obj, delpage_pde_index); 823 if (m == VM_PAGE_NULL) 824 panic("pmap_enter: pte page not in object"); 825 vm_object_unlock(delpage_pm_obj); 826 VM_PAGE_FREE(m); 827 OSAddAtomic(-1, &inuse_ptepages_count); 828 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE); 829 } 830 831 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0); 832 return KERN_SUCCESS; 833} 834 835/* 836 * Remove a range of hardware page-table entries. 837 * The entries given are the first (inclusive) 838 * and last (exclusive) entries for the VM pages. 839 * The virtual address is the va for the first pte. 840 * 841 * The pmap must be locked. 842 * If the pmap is not the kernel pmap, the range must lie 843 * entirely within one pte-page. This is NOT checked. 844 * Assumes that the pte-page exists. 845 */ 846 847void 848pmap_remove_range( 849 pmap_t pmap, 850 vm_map_offset_t start_vaddr, 851 pt_entry_t *spte, 852 pt_entry_t *epte) 853{ 854 pt_entry_t *cpte; 855 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; 856 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; 857 pv_hashed_entry_t pvh_e; 858 int pvh_cnt = 0; 859 int num_removed, num_unwired, num_found, num_invalid; 860 ppnum_t pai; 861 pmap_paddr_t pa; 862 vm_map_offset_t vaddr; 863 864 num_removed = 0; 865 num_unwired = 0; 866 num_found = 0; 867 num_invalid = 0; 868#if defined(__i386__) 869 if (pmap != kernel_pmap && 870 pmap->pm_task_map == TASK_MAP_32BIT && 871 start_vaddr >= HIGH_MEM_BASE) { 872 /* 873 * The range is in the "high_shared_pde" which is shared 874 * between the kernel and all 32-bit tasks. It holds 875 * the 32-bit commpage but also the trampolines, GDT, etc... 876 * so we can't let user tasks remove anything from it. 877 */ 878 return; 879 } 880#endif 881 /* invalidate the PTEs first to "freeze" them */ 882 for (cpte = spte, vaddr = start_vaddr; 883 cpte < epte; 884 cpte++, vaddr += PAGE_SIZE_64) { 885 pt_entry_t p = *cpte; 886 887 pa = pte_to_pa(p); 888 if (pa == 0) 889 continue; 890 num_found++; 891 892 if (iswired(p)) 893 num_unwired++; 894 895 pai = pa_index(pa); 896 897 if (!IS_MANAGED_PAGE(pai)) { 898 /* 899 * Outside range of managed physical memory. 900 * Just remove the mappings. 901 */ 902 pmap_store_pte(cpte, 0); 903 continue; 904 } 905 906 if ((p & INTEL_PTE_VALID) == 0) 907 num_invalid++; 908 909 /* invalidate the PTE */ 910 pmap_update_pte(cpte, INTEL_PTE_VALID, 0); 911 } 912 913 if (num_found == 0) { 914 /* nothing was changed: we're done */ 915 goto update_counts; 916 } 917 918 /* propagate the invalidates to other CPUs */ 919 920 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr); 921 922 for (cpte = spte, vaddr = start_vaddr; 923 cpte < epte; 924 cpte++, vaddr += PAGE_SIZE_64) { 925 926 pa = pte_to_pa(*cpte); 927 if (pa == 0) 928 continue; 929 930 pai = pa_index(pa); 931 932 LOCK_PVH(pai); 933 934 pa = pte_to_pa(*cpte); 935 if (pa == 0) { 936 UNLOCK_PVH(pai); 937 continue; 938 } 939 num_removed++; 940 941 /* 942 * Get the modify and reference bits, then 943 * nuke the entry in the page table 944 */ 945 /* remember reference and change */ 946 pmap_phys_attributes[pai] |= 947 (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED)); 948 949 /* 950 * Remove the mapping from the pvlist for this physical page. 951 */ 952 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte); 953 954 /* completely invalidate the PTE */ 955 pmap_store_pte(cpte, 0); 956 957 UNLOCK_PVH(pai); 958 959 if (pvh_e != PV_HASHED_ENTRY_NULL) { 960 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 961 pvh_eh = pvh_e; 962 963 if (pvh_et == PV_HASHED_ENTRY_NULL) { 964 pvh_et = pvh_e; 965 } 966 pvh_cnt++; 967 } 968 } /* for loop */ 969 970 if (pvh_eh != PV_HASHED_ENTRY_NULL) { 971 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); 972 } 973update_counts: 974 /* 975 * Update the counts 976 */ 977#if TESTING 978 if (pmap->stats.resident_count < num_removed) 979 panic("pmap_remove_range: resident_count"); 980#endif 981 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed)); 982 assert(pmap->stats.resident_count >= num_removed); 983 OSAddAtomic(-num_removed, &pmap->stats.resident_count); 984 985#if TESTING 986 if (pmap->stats.wired_count < num_unwired) 987 panic("pmap_remove_range: wired_count"); 988#endif 989 assert(pmap->stats.wired_count >= num_unwired); 990 OSAddAtomic(-num_unwired, &pmap->stats.wired_count); 991 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired)); 992 993 return; 994} 995 996 997/* 998 * Remove the given range of addresses 999 * from the specified map. 1000 * 1001 * It is assumed that the start and end are properly 1002 * rounded to the hardware page size. 1003 */ 1004void 1005pmap_remove( 1006 pmap_t map, 1007 addr64_t s64, 1008 addr64_t e64) 1009{ 1010 pt_entry_t *pde; 1011 pt_entry_t *spte, *epte; 1012 addr64_t l64; 1013 uint64_t deadline; 1014 1015 pmap_intr_assert(); 1016 1017 if (map == PMAP_NULL || s64 == e64) 1018 return; 1019 1020 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START, 1021 map, 1022 (uint32_t) (s64 >> 32), s64, 1023 (uint32_t) (e64 >> 32), e64); 1024 1025 1026 PMAP_LOCK(map); 1027 1028#if 0 1029 /* 1030 * Check that address range in the kernel does not overlap the stacks. 1031 * We initialize local static min/max variables once to avoid making 1032 * 2 function calls for every remove. Note also that these functions 1033 * both return 0 before kernel stacks have been initialized, and hence 1034 * the panic is not triggered in this case. 1035 */ 1036 if (map == kernel_pmap) { 1037 static vm_offset_t kernel_stack_min = 0; 1038 static vm_offset_t kernel_stack_max = 0; 1039 1040 if (kernel_stack_min == 0) { 1041 kernel_stack_min = min_valid_stack_address(); 1042 kernel_stack_max = max_valid_stack_address(); 1043 } 1044 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) || 1045 (kernel_stack_min < e64 && e64 <= kernel_stack_max)) 1046 panic("pmap_remove() attempted in kernel stack"); 1047 } 1048#else 1049 1050 /* 1051 * The values of kernel_stack_min and kernel_stack_max are no longer 1052 * relevant now that we allocate kernel stacks in the kernel map, 1053 * so the old code above no longer applies. If we wanted to check that 1054 * we weren't removing a mapping of a page in a kernel stack we'd 1055 * mark the PTE with an unused bit and check that here. 1056 */ 1057 1058#endif 1059 1060 deadline = rdtsc64() + max_preemption_latency_tsc; 1061 1062 while (s64 < e64) { 1063 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); 1064 if (l64 > e64) 1065 l64 = e64; 1066 pde = pmap_pde(map, s64); 1067 1068 if (pde && (*pde & INTEL_PTE_VALID)) { 1069 if (*pde & INTEL_PTE_PS) { 1070 /* 1071 * If we're removing a superpage, pmap_remove_range() 1072 * must work on level 2 instead of level 1; and we're 1073 * only passing a single level 2 entry instead of a 1074 * level 1 range. 1075 */ 1076 spte = pde; 1077 epte = spte+1; /* excluded */ 1078 } else { 1079 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1))); 1080 spte = &spte[ptenum(s64)]; 1081 epte = &spte[intel_btop(l64 - s64)]; 1082 } 1083 pmap_remove_range(map, s64, spte, epte); 1084 } 1085 s64 = l64; 1086 1087 if (s64 < e64 && rdtsc64() >= deadline) { 1088 PMAP_UNLOCK(map) 1089 PMAP_LOCK(map) 1090 deadline = rdtsc64() + max_preemption_latency_tsc; 1091 } 1092 } 1093 1094 PMAP_UNLOCK(map); 1095 1096 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END, 1097 map, 0, 0, 0, 0); 1098 1099} 1100 1101/* 1102 * Routine: pmap_page_protect 1103 * 1104 * Function: 1105 * Lower the permission for all mappings to a given 1106 * page. 1107 */ 1108void 1109pmap_page_protect( 1110 ppnum_t pn, 1111 vm_prot_t prot) 1112{ 1113 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; 1114 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; 1115 pv_hashed_entry_t nexth; 1116 int pvh_cnt = 0; 1117 pv_rooted_entry_t pv_h; 1118 pv_rooted_entry_t pv_e; 1119 pv_hashed_entry_t pvh_e; 1120 pt_entry_t *pte; 1121 int pai; 1122 pmap_t pmap; 1123 boolean_t remove; 1124 1125 pmap_intr_assert(); 1126 assert(pn != vm_page_fictitious_addr); 1127 if (pn == vm_page_guard_addr) 1128 return; 1129 1130 pai = ppn_to_pai(pn); 1131 1132 if (!IS_MANAGED_PAGE(pai)) { 1133 /* 1134 * Not a managed page. 1135 */ 1136 return; 1137 } 1138 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, 1139 pn, prot, 0, 0, 0); 1140 1141 /* 1142 * Determine the new protection. 1143 */ 1144 switch (prot) { 1145 case VM_PROT_READ: 1146 case VM_PROT_READ | VM_PROT_EXECUTE: 1147 remove = FALSE; 1148 break; 1149 case VM_PROT_ALL: 1150 return; /* nothing to do */ 1151 default: 1152 remove = TRUE; 1153 break; 1154 } 1155 1156 pv_h = pai_to_pvh(pai); 1157 1158 LOCK_PVH(pai); 1159 1160 1161 /* 1162 * Walk down PV list, if any, changing or removing all mappings. 1163 */ 1164 if (pv_h->pmap == PMAP_NULL) 1165 goto done; 1166 1167 pv_e = pv_h; 1168 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */ 1169 1170 do { 1171 vm_map_offset_t vaddr; 1172 1173 pmap = pv_e->pmap; 1174 vaddr = pv_e->va; 1175 pte = pmap_pte(pmap, vaddr); 1176 1177 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn), 1178 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte); 1179 1180 if (0 == pte) { 1181 panic("pmap_page_protect() " 1182 "pmap=%p pn=0x%x vaddr=0x%llx\n", 1183 pmap, pn, vaddr); 1184 } 1185 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink); 1186 1187 /* 1188 * Remove the mapping if new protection is NONE 1189 */ 1190 if (remove) { 1191 /* 1192 * Remove the mapping, collecting dirty bits. 1193 */ 1194 pmap_update_pte(pte, INTEL_PTE_VALID, 0); 1195 1196 /* Remove per-pmap wired count */ 1197 if (iswired(*pte)) { 1198 OSAddAtomic(-1, &pmap->stats.wired_count); 1199 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 1200 } 1201 1202 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); 1203 pmap_phys_attributes[pai] |= 1204 *pte & (PHYS_MODIFIED|PHYS_REFERENCED); 1205 pmap_store_pte(pte, 0); 1206 1207#if TESTING 1208 if (pmap->stats.resident_count < 1) 1209 panic("pmap_page_protect: resident_count"); 1210#endif 1211 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 1212 assert(pmap->stats.resident_count >= 1); 1213 OSAddAtomic(-1, &pmap->stats.resident_count); 1214 /* 1215 * Deal with the pv_rooted_entry. 1216 */ 1217 1218 if (pv_e == pv_h) { 1219 /* 1220 * Fix up head later. 1221 */ 1222 pv_h->pmap = PMAP_NULL; 1223 } else { 1224 /* 1225 * Delete this entry. 1226 */ 1227 pv_hash_remove(pvh_e); 1228 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1229 pvh_eh = pvh_e; 1230 1231 if (pvh_et == PV_HASHED_ENTRY_NULL) 1232 pvh_et = pvh_e; 1233 pvh_cnt++; 1234 } 1235 } else { 1236 /* 1237 * Write-protect, after opportunistic refmod collect 1238 */ 1239 pmap_phys_attributes[pai] |= 1240 *pte & (PHYS_MODIFIED|PHYS_REFERENCED); 1241 pmap_update_pte(pte, INTEL_PTE_WRITE, 0); 1242 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); 1243 } 1244 pvh_e = nexth; 1245 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h); 1246 1247 1248 /* 1249 * If pv_head mapping was removed, fix it up. 1250 */ 1251 if (pv_h->pmap == PMAP_NULL) { 1252 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink); 1253 1254 if (pvh_e != (pv_hashed_entry_t) pv_h) { 1255 pv_hash_remove(pvh_e); 1256 pv_h->pmap = pvh_e->pmap; 1257 pv_h->va = pvh_e->va; 1258 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1259 pvh_eh = pvh_e; 1260 1261 if (pvh_et == PV_HASHED_ENTRY_NULL) 1262 pvh_et = pvh_e; 1263 pvh_cnt++; 1264 } 1265 } 1266 if (pvh_eh != PV_HASHED_ENTRY_NULL) { 1267 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); 1268 } 1269done: 1270 UNLOCK_PVH(pai); 1271 1272 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END, 1273 0, 0, 0, 0, 0); 1274} 1275 1276/* 1277 * Clear specified attribute bits. 1278 */ 1279void 1280phys_attribute_clear( 1281 ppnum_t pn, 1282 int bits) 1283{ 1284 pv_rooted_entry_t pv_h; 1285 pv_hashed_entry_t pv_e; 1286 pt_entry_t *pte; 1287 int pai; 1288 pmap_t pmap; 1289 char attributes = 0; 1290 1291 pmap_intr_assert(); 1292 assert(pn != vm_page_fictitious_addr); 1293 if (pn == vm_page_guard_addr) 1294 return; 1295 1296 pai = ppn_to_pai(pn); 1297 1298 if (!IS_MANAGED_PAGE(pai)) { 1299 /* 1300 * Not a managed page. 1301 */ 1302 return; 1303 } 1304 1305 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, 1306 pn, bits, 0, 0, 0); 1307 1308 pv_h = pai_to_pvh(pai); 1309 1310 LOCK_PVH(pai); 1311 1312 /* 1313 * Walk down PV list, clearing all modify or reference bits. 1314 * We do not have to lock the pv_list because we have 1315 * the per-pmap lock 1316 */ 1317 if (pv_h->pmap != PMAP_NULL) { 1318 /* 1319 * There are some mappings. 1320 */ 1321 1322 pv_e = (pv_hashed_entry_t)pv_h; 1323 1324 do { 1325 vm_map_offset_t va; 1326 1327 pmap = pv_e->pmap; 1328 va = pv_e->va; 1329 1330 /* 1331 * Clear modify and/or reference bits. 1332 */ 1333 pte = pmap_pte(pmap, va); 1334 attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); 1335 pmap_update_pte(pte, bits, 0); 1336 /* Ensure all processors using this translation 1337 * invalidate this TLB entry. The invalidation *must* 1338 * follow the PTE update, to ensure that the TLB 1339 * shadow of the 'D' bit (in particular) is 1340 * synchronized with the updated PTE. 1341 */ 1342 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); 1343 1344 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); 1345 1346 } while (pv_e != (pv_hashed_entry_t)pv_h); 1347 } 1348 /* Opportunistic refmod collection, annulled 1349 * if both REF and MOD are being cleared. 1350 */ 1351 1352 pmap_phys_attributes[pai] |= attributes; 1353 pmap_phys_attributes[pai] &= (~bits); 1354 1355 UNLOCK_PVH(pai); 1356 1357 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END, 1358 0, 0, 0, 0, 0); 1359} 1360 1361/* 1362 * Check specified attribute bits. 1363 */ 1364int 1365phys_attribute_test( 1366 ppnum_t pn, 1367 int bits) 1368{ 1369 pv_rooted_entry_t pv_h; 1370 pv_hashed_entry_t pv_e; 1371 pt_entry_t *pte; 1372 int pai; 1373 pmap_t pmap; 1374 int attributes = 0; 1375 1376 pmap_intr_assert(); 1377 assert(pn != vm_page_fictitious_addr); 1378 if (pn == vm_page_guard_addr) 1379 return 0; 1380 1381 pai = ppn_to_pai(pn); 1382 1383 if (!IS_MANAGED_PAGE(pai)) { 1384 /* 1385 * Not a managed page. 1386 */ 1387 return 0; 1388 } 1389 1390 /* 1391 * Fast check... if bits already collected 1392 * no need to take any locks... 1393 * if not set, we need to recheck after taking 1394 * the lock in case they got pulled in while 1395 * we were waiting for the lock 1396 */ 1397 if ((pmap_phys_attributes[pai] & bits) == bits) 1398 return bits; 1399 1400 pv_h = pai_to_pvh(pai); 1401 1402 LOCK_PVH(pai); 1403 1404 attributes = pmap_phys_attributes[pai] & bits; 1405 1406 1407 /* 1408 * Walk down PV list, checking the mappings until we 1409 * reach the end or we've found the desired attributes. 1410 */ 1411 if (attributes != bits && 1412 pv_h->pmap != PMAP_NULL) { 1413 /* 1414 * There are some mappings. 1415 */ 1416 pv_e = (pv_hashed_entry_t)pv_h; 1417 do { 1418 vm_map_offset_t va; 1419 1420 pmap = pv_e->pmap; 1421 va = pv_e->va; 1422 /* 1423 * pick up modify and/or reference bits from mapping 1424 */ 1425 1426 pte = pmap_pte(pmap, va); 1427 attributes |= (int)(*pte & bits); 1428 1429 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); 1430 1431 } while ((attributes != bits) && 1432 (pv_e != (pv_hashed_entry_t)pv_h)); 1433 } 1434 pmap_phys_attributes[pai] |= attributes; 1435 1436 UNLOCK_PVH(pai); 1437 return (attributes); 1438} 1439 1440/* 1441 * Routine: pmap_change_wiring 1442 * Function: Change the wiring attribute for a map/virtual-address 1443 * pair. 1444 * In/out conditions: 1445 * The mapping must already exist in the pmap. 1446 */ 1447void 1448pmap_change_wiring( 1449 pmap_t map, 1450 vm_map_offset_t vaddr, 1451 boolean_t wired) 1452{ 1453 pt_entry_t *pte; 1454 1455 PMAP_LOCK(map); 1456 1457 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) 1458 panic("pmap_change_wiring: pte missing"); 1459 1460 if (wired && !iswired(*pte)) { 1461 /* 1462 * wiring down mapping 1463 */ 1464 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE); 1465 OSAddAtomic(+1, &map->stats.wired_count); 1466 pmap_update_pte(pte, 0, INTEL_PTE_WIRED); 1467 } 1468 else if (!wired && iswired(*pte)) { 1469 /* 1470 * unwiring mapping 1471 */ 1472 assert(map->stats.wired_count >= 1); 1473 OSAddAtomic(-1, &map->stats.wired_count); 1474 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE); 1475 pmap_update_pte(pte, INTEL_PTE_WIRED, 0); 1476 } 1477 1478 PMAP_UNLOCK(map); 1479} 1480 1481/* 1482 * "Backdoor" direct map routine for early mappings. 1483 * Useful for mapping memory outside the range 1484 * Sets A, D and NC if requested 1485 */ 1486 1487vm_offset_t 1488pmap_map_bd( 1489 vm_offset_t virt, 1490 vm_map_offset_t start_addr, 1491 vm_map_offset_t end_addr, 1492 vm_prot_t prot, 1493 unsigned int flags) 1494{ 1495 pt_entry_t template; 1496 pt_entry_t *pte; 1497 spl_t spl; 1498 vm_offset_t base = virt; 1499 template = pa_to_pte(start_addr) 1500 | INTEL_PTE_REF 1501 | INTEL_PTE_MOD 1502 | INTEL_PTE_WIRED 1503 | INTEL_PTE_VALID; 1504 1505 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) { 1506 template |= INTEL_PTE_NCACHE; 1507 if (!(flags & (VM_MEM_GUARDED))) 1508 template |= INTEL_PTE_PTA; 1509 } 1510 1511#if defined(__x86_64__) 1512 if ((prot & VM_PROT_EXECUTE) == 0) 1513 template |= INTEL_PTE_NX; 1514#endif 1515 1516 if (prot & VM_PROT_WRITE) 1517 template |= INTEL_PTE_WRITE; 1518 1519 while (start_addr < end_addr) { 1520 spl = splhigh(); 1521 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); 1522 if (pte == PT_ENTRY_NULL) { 1523 panic("pmap_map_bd: Invalid kernel address\n"); 1524 } 1525 pmap_store_pte(pte, template); 1526 splx(spl); 1527 pte_increment_pa(template); 1528 virt += PAGE_SIZE; 1529 start_addr += PAGE_SIZE; 1530 } 1531 flush_tlb_raw(); 1532 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr); 1533 return(virt); 1534} 1535