1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28#include <vm/pmap.h> 29#include <vm/vm_map.h> 30#include <kern/ledger.h> 31#include <i386/pmap_internal.h> 32 33void pmap_remove_range( 34 pmap_t pmap, 35 vm_map_offset_t va, 36 pt_entry_t *spte, 37 pt_entry_t *epte); 38 39void pmap_remove_range_options( 40 pmap_t pmap, 41 vm_map_offset_t va, 42 pt_entry_t *spte, 43 pt_entry_t *epte, 44 int options); 45 46void pmap_reusable_range( 47 pmap_t pmap, 48 vm_map_offset_t va, 49 pt_entry_t *spte, 50 pt_entry_t *epte, 51 boolean_t reusable); 52 53uint32_t pmap_update_clear_pte_count; 54 55/* 56 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time, 57 * on a NBPDE boundary. 58 */ 59 60/* These symbols may be referenced directly by VM */ 61uint64_t pmap_nesting_size_min = NBPDE; 62uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE; 63 64/* 65 * kern_return_t pmap_nest(grand, subord, va_start, size) 66 * 67 * grand = the pmap that we will nest subord into 68 * subord = the pmap that goes into the grand 69 * va_start = start of range in pmap to be inserted 70 * nstart = start of range in pmap nested pmap 71 * size = Size of nest area (up to 16TB) 72 * 73 * Inserts a pmap into another. This is used to implement shared segments. 74 * 75 * Note that we depend upon higher level VM locks to insure that things don't change while 76 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting 77 * or do 2 nests at once. 78 */ 79 80/* 81 * This routine can nest subtrees either at the PDPT level (1GiB) or at the 82 * PDE level (2MiB). We currently disallow disparate offsets for the "subord" 83 * container and the "grand" parent. A minor optimization to consider for the 84 * future: make the "subord" truly a container rather than a full-fledged 85 * pagetable hierarchy which can be unnecessarily sparse (DRK). 86 */ 87 88kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) { 89 vm_map_offset_t vaddr, nvaddr; 90 pd_entry_t *pde,*npde; 91 unsigned int i; 92 uint64_t num_pde; 93 94 if ((size & (pmap_nesting_size_min-1)) || 95 (va_start & (pmap_nesting_size_min-1)) || 96 (nstart & (pmap_nesting_size_min-1)) || 97 ((size >> 28) > 65536)) /* Max size we can nest is 16TB */ 98 return KERN_INVALID_VALUE; 99 100 if(size == 0) { 101 panic("pmap_nest: size is invalid - %016llX\n", size); 102 } 103 104 if (va_start != nstart) 105 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart); 106 107 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START, 108 (uintptr_t) grand, (uintptr_t) subord, 109 (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0); 110 111 nvaddr = (vm_map_offset_t)nstart; 112 num_pde = size >> PDESHIFT; 113 114 PMAP_LOCK(subord); 115 116 subord->pm_shared = TRUE; 117 118 for (i = 0; i < num_pde;) { 119 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) { 120 121 npde = pmap64_pdpt(subord, nvaddr); 122 123 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) { 124 PMAP_UNLOCK(subord); 125 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE); 126 PMAP_LOCK(subord); 127 npde = pmap64_pdpt(subord, nvaddr); 128 } 129 *npde |= INTEL_PDPTE_NESTED; 130 nvaddr += NBPDPT; 131 i += (uint32_t)NPDEPG; 132 } 133 else { 134 npde = pmap_pde(subord, nvaddr); 135 136 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) { 137 PMAP_UNLOCK(subord); 138 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE); 139 PMAP_LOCK(subord); 140 npde = pmap_pde(subord, nvaddr); 141 } 142 nvaddr += NBPDE; 143 i++; 144 } 145 } 146 147 PMAP_UNLOCK(subord); 148 149 vaddr = (vm_map_offset_t)va_start; 150 151 PMAP_LOCK(grand); 152 153 for (i = 0;i < num_pde;) { 154 pd_entry_t tpde; 155 156 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) { 157 npde = pmap64_pdpt(subord, vaddr); 158 if (npde == 0) 159 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr); 160 tpde = *npde; 161 pde = pmap64_pdpt(grand, vaddr); 162 if (0 == pde) { 163 PMAP_UNLOCK(grand); 164 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE); 165 PMAP_LOCK(grand); 166 pde = pmap64_pdpt(grand, vaddr); 167 } 168 if (pde == 0) 169 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr); 170 pmap_store_pte(pde, tpde); 171 vaddr += NBPDPT; 172 i += (uint32_t) NPDEPG; 173 } 174 else { 175 npde = pmap_pde(subord, nstart); 176 if (npde == 0) 177 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart); 178 tpde = *npde; 179 nstart += NBPDE; 180 pde = pmap_pde(grand, vaddr); 181 if ((0 == pde) && cpu_64bit) { 182 PMAP_UNLOCK(grand); 183 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE); 184 PMAP_LOCK(grand); 185 pde = pmap_pde(grand, vaddr); 186 } 187 188 if (pde == 0) 189 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr); 190 vaddr += NBPDE; 191 pmap_store_pte(pde, tpde); 192 i++; 193 } 194 } 195 196 PMAP_UNLOCK(grand); 197 198 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0); 199 200 return KERN_SUCCESS; 201} 202 203/* 204 * kern_return_t pmap_unnest(grand, vaddr) 205 * 206 * grand = the pmap that we will un-nest subord from 207 * vaddr = start of range in pmap to be unnested 208 * 209 * Removes a pmap from another. This is used to implement shared segments. 210 */ 211 212kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) { 213 214 pd_entry_t *pde; 215 unsigned int i; 216 uint64_t num_pde; 217 addr64_t va_start, va_end; 218 uint64_t npdpt = PMAP_INVALID_PDPTNUM; 219 220 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START, 221 (uintptr_t) grand, 222 (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0); 223 224 if ((size & (pmap_nesting_size_min-1)) || 225 (vaddr & (pmap_nesting_size_min-1))) { 226 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n", 227 grand, vaddr, size); 228 } 229 230 /* align everything to PDE boundaries */ 231 va_start = vaddr & ~(NBPDE-1); 232 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1); 233 size = va_end - va_start; 234 235 PMAP_LOCK(grand); 236 237 num_pde = size >> PDESHIFT; 238 vaddr = va_start; 239 240 for (i = 0; i < num_pde; ) { 241 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) { 242 npdpt = pdptnum(grand, vaddr); 243 pde = pmap64_pdpt(grand, vaddr); 244 if (pde && (*pde & INTEL_PDPTE_NESTED)) { 245 pmap_store_pte(pde, (pd_entry_t)0); 246 i += (uint32_t) NPDEPG; 247 vaddr += NBPDPT; 248 continue; 249 } 250 } 251 pde = pmap_pde(grand, (vm_map_offset_t)vaddr); 252 if (pde == 0) 253 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr); 254 pmap_store_pte(pde, (pd_entry_t)0); 255 i++; 256 vaddr += NBPDE; 257 } 258 259 PMAP_UPDATE_TLBS(grand, va_start, va_end); 260 261 PMAP_UNLOCK(grand); 262 263 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0); 264 265 return KERN_SUCCESS; 266} 267 268/* Invoked by the Mach VM to determine the platform specific unnest region */ 269 270boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) { 271 pd_entry_t *pdpte; 272 boolean_t rval = FALSE; 273 274 if (!cpu_64bit) 275 return rval; 276 277 PMAP_LOCK(p); 278 279 pdpte = pmap64_pdpt(p, *s); 280 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) { 281 *s &= ~(NBPDPT -1); 282 rval = TRUE; 283 } 284 285 pdpte = pmap64_pdpt(p, *e); 286 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) { 287 *e = ((*e + NBPDPT) & ~(NBPDPT -1)); 288 rval = TRUE; 289 } 290 291 PMAP_UNLOCK(p); 292 293 return rval; 294} 295 296/* 297 * pmap_find_phys returns the (4K) physical page number containing a 298 * given virtual address in a given pmap. 299 * Note that pmap_pte may return a pde if this virtual address is 300 * mapped by a large page and this is taken into account in order 301 * to return the correct page number in this case. 302 */ 303ppnum_t 304pmap_find_phys(pmap_t pmap, addr64_t va) 305{ 306 pt_entry_t *ptp; 307 pd_entry_t *pdep; 308 ppnum_t ppn = 0; 309 pd_entry_t pde; 310 pt_entry_t pte; 311 312 mp_disable_preemption(); 313 314 /* This refcount test is a band-aid--several infrastructural changes 315 * are necessary to eliminate invocation of this routine from arbitrary 316 * contexts. 317 */ 318 319 if (!pmap->ref_count) 320 goto pfp_exit; 321 322 pdep = pmap_pde(pmap, va); 323 324 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) { 325 if (pde & INTEL_PTE_PS) { 326 ppn = (ppnum_t) i386_btop(pte_to_pa(pde)); 327 ppn += (ppnum_t) ptenum(va); 328 } 329 else { 330 ptp = pmap_pte(pmap, va); 331 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) { 332 ppn = (ppnum_t) i386_btop(pte_to_pa(pte)); 333 } 334 } 335 } 336pfp_exit: 337 mp_enable_preemption(); 338 339 return ppn; 340} 341 342/* 343 * Update cache attributes for all extant managed mappings. 344 * Assumes PV for this page is locked, and that the page 345 * is managed. 346 */ 347 348void 349pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) { 350 pv_rooted_entry_t pv_h, pv_e; 351 pv_hashed_entry_t pvh_e, nexth; 352 vm_map_offset_t vaddr; 353 pmap_t pmap; 354 pt_entry_t *ptep; 355 356 assert(IS_MANAGED_PAGE(pn)); 357 358 pv_h = pai_to_pvh(pn); 359 /* TODO: translate the PHYS_* bits to PTE bits, while they're 360 * currently identical, they may not remain so 361 * Potential optimization (here and in page_protect), 362 * parallel shootdowns, check for redundant 363 * attribute modifications. 364 */ 365 366 /* 367 * Alter attributes on all mappings 368 */ 369 if (pv_h->pmap != PMAP_NULL) { 370 pv_e = pv_h; 371 pvh_e = (pv_hashed_entry_t)pv_e; 372 373 do { 374 pmap = pv_e->pmap; 375 vaddr = pv_e->va; 376 ptep = pmap_pte(pmap, vaddr); 377 378 if (0 == ptep) 379 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap); 380 381 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink); 382 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes); 383 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 384 pvh_e = nexth; 385 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h); 386 } 387} 388 389void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) { 390 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 391 392 if (dofilter) { 393 CPU_CR3_MARK_INACTIVE(); 394 } else { 395 CPU_CR3_MARK_ACTIVE(); 396 mfence(); 397 if (current_cpu_datap()->cpu_tlb_invalid) 398 process_pmap_updates(); 399 } 400} 401 402 403/* 404 * Insert the given physical page (p) at 405 * the specified virtual address (v) in the 406 * target physical map with the protection requested. 407 * 408 * If specified, the page will be wired down, meaning 409 * that the related pte cannot be reclaimed. 410 * 411 * NB: This is the only routine which MAY NOT lazy-evaluate 412 * or lose information. That is, this routine must actually 413 * insert this page into the given map NOW. 414 */ 415 416void 417pmap_enter( 418 register pmap_t pmap, 419 vm_map_offset_t vaddr, 420 ppnum_t pn, 421 vm_prot_t prot, 422 vm_prot_t fault_type, 423 unsigned int flags, 424 boolean_t wired) 425{ 426 (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL); 427} 428 429 430kern_return_t 431pmap_enter_options( 432 register pmap_t pmap, 433 vm_map_offset_t vaddr, 434 ppnum_t pn, 435 vm_prot_t prot, 436 __unused vm_prot_t fault_type, 437 unsigned int flags, 438 boolean_t wired, 439 unsigned int options, 440 void *arg) 441{ 442 pt_entry_t *pte; 443 pv_rooted_entry_t pv_h; 444 ppnum_t pai; 445 pv_hashed_entry_t pvh_e; 446 pv_hashed_entry_t pvh_new; 447 pt_entry_t template; 448 pmap_paddr_t old_pa; 449 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn); 450 boolean_t need_tlbflush = FALSE; 451 boolean_t set_NX; 452 char oattr; 453 boolean_t old_pa_locked; 454 /* 2MiB mappings are confined to x86_64 by VM */ 455 boolean_t superpage = flags & VM_MEM_SUPERPAGE; 456 vm_object_t delpage_pm_obj = NULL; 457 uint64_t delpage_pde_index = 0; 458 pt_entry_t old_pte; 459 kern_return_t kr_expand; 460 461 pmap_intr_assert(); 462 463 if (pmap == PMAP_NULL) 464 return KERN_INVALID_ARGUMENT; 465 466 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an 467 * unused value for that scenario. 468 */ 469 assert(pn != vm_page_fictitious_addr); 470 471 if (pn == vm_page_guard_addr) 472 return KERN_INVALID_ARGUMENT; 473 474 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START, 475 pmap, 476 (uint32_t) (vaddr >> 32), (uint32_t) vaddr, 477 pn, prot); 478 479 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled) 480 set_NX = FALSE; 481 else 482 set_NX = TRUE; 483 484 if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) { 485 set_NX = FALSE; 486 } 487 488 /* 489 * Must allocate a new pvlist entry while we're unlocked; 490 * zalloc may cause pageout (which will lock the pmap system). 491 * If we determine we need a pvlist entry, we will unlock 492 * and allocate one. Then we will retry, throughing away 493 * the allocated entry later (if we no longer need it). 494 */ 495 496 pvh_new = PV_HASHED_ENTRY_NULL; 497Retry: 498 pvh_e = PV_HASHED_ENTRY_NULL; 499 500 PMAP_LOCK(pmap); 501 502 /* 503 * Expand pmap to include this pte. Assume that 504 * pmap is always expanded to include enough hardware 505 * pages to map one VM page. 506 */ 507 if(superpage) { 508 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) { 509 /* need room for another pde entry */ 510 PMAP_UNLOCK(pmap); 511 kr_expand = pmap_expand_pdpt(pmap, vaddr, options); 512 if (kr_expand != KERN_SUCCESS) 513 return kr_expand; 514 PMAP_LOCK(pmap); 515 } 516 } else { 517 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) { 518 /* 519 * Must unlock to expand the pmap 520 * going to grow pde level page(s) 521 */ 522 PMAP_UNLOCK(pmap); 523 kr_expand = pmap_expand(pmap, vaddr, options); 524 if (kr_expand != KERN_SUCCESS) 525 return kr_expand; 526 PMAP_LOCK(pmap); 527 } 528 } 529 if (options & PMAP_EXPAND_OPTIONS_NOENTER) { 530 PMAP_UNLOCK(pmap); 531 return KERN_SUCCESS; 532 } 533 534 if (superpage && *pte && !(*pte & INTEL_PTE_PS)) { 535 /* 536 * There is still an empty page table mapped that 537 * was used for a previous base page mapping. 538 * Remember the PDE and the PDE index, so that we 539 * can free the page at the end of this function. 540 */ 541 delpage_pde_index = pdeidx(pmap, vaddr); 542 delpage_pm_obj = pmap->pm_obj; 543 *pte = 0; 544 } 545 546 old_pa = pte_to_pa(*pte); 547 pai = pa_index(old_pa); 548 old_pa_locked = FALSE; 549 550 if (old_pa == 0 && 551 (*pte & INTEL_PTE_COMPRESSED)) { 552 /* one less "compressed" */ 553 OSAddAtomic64(-1, &pmap->stats.compressed); 554 /* marker will be cleared below */ 555 } 556 557 /* 558 * if we have a previous managed page, lock the pv entry now. after 559 * we lock it, check to see if someone beat us to the lock and if so 560 * drop the lock 561 */ 562 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) { 563 LOCK_PVH(pai); 564 old_pa_locked = TRUE; 565 old_pa = pte_to_pa(*pte); 566 if (0 == old_pa) { 567 UNLOCK_PVH(pai); /* another path beat us to it */ 568 old_pa_locked = FALSE; 569 } 570 } 571 572 /* 573 * Special case if the incoming physical page is already mapped 574 * at this address. 575 */ 576 if (old_pa == pa) { 577 pt_entry_t old_attributes = 578 *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD); 579 580 /* 581 * May be changing its wired attribute or protection 582 */ 583 584 template = pa_to_pte(pa) | INTEL_PTE_VALID; 585 template |= pmap_get_cache_attributes(pa_index(pa)); 586 587 if (VM_MEM_NOT_CACHEABLE == 588 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) { 589 if (!(flags & VM_MEM_GUARDED)) 590 template |= INTEL_PTE_PTA; 591 template |= INTEL_PTE_NCACHE; 592 } 593 if (pmap != kernel_pmap) 594 template |= INTEL_PTE_USER; 595 if (prot & VM_PROT_WRITE) { 596 template |= INTEL_PTE_WRITE; 597 } 598 599 if (set_NX) 600 template |= INTEL_PTE_NX; 601 602 if (wired) { 603 template |= INTEL_PTE_WIRED; 604 if (!iswired(old_attributes)) { 605 OSAddAtomic(+1, &pmap->stats.wired_count); 606 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 607 } 608 } else { 609 if (iswired(old_attributes)) { 610 assert(pmap->stats.wired_count >= 1); 611 OSAddAtomic(-1, &pmap->stats.wired_count); 612 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 613 } 614 } 615 if (superpage) /* this path can not be used */ 616 template |= INTEL_PTE_PS; /* to change the page size! */ 617 618 if (old_attributes == template) 619 goto dont_update_pte; 620 621 /* Determine delta, PV locked */ 622 need_tlbflush = 623 ((old_attributes ^ template) != INTEL_PTE_WIRED); 624 625 if (need_tlbflush == TRUE && !(old_attributes & INTEL_PTE_WRITE)) { 626 if ((old_attributes ^ template) == INTEL_PTE_WRITE) 627 need_tlbflush = FALSE; 628 } 629 630 /* store modified PTE and preserve RC bits */ 631 pt_entry_t npte, opte;; 632 do { 633 opte = *pte; 634 npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD)); 635 } while (!pmap_cmpx_pte(pte, opte, npte)); 636dont_update_pte: 637 if (old_pa_locked) { 638 UNLOCK_PVH(pai); 639 old_pa_locked = FALSE; 640 } 641 goto Done; 642 } 643 644 /* 645 * Outline of code from here: 646 * 1) If va was mapped, update TLBs, remove the mapping 647 * and remove old pvlist entry. 648 * 2) Add pvlist entry for new mapping 649 * 3) Enter new mapping. 650 * 651 * If the old physical page is not managed step 1) is skipped 652 * (except for updating the TLBs), and the mapping is 653 * overwritten at step 3). If the new physical page is not 654 * managed, step 2) is skipped. 655 */ 656 657 if (old_pa != (pmap_paddr_t) 0) { 658 659 /* 660 * Don't do anything to pages outside valid memory here. 661 * Instead convince the code that enters a new mapping 662 * to overwrite the old one. 663 */ 664 665 /* invalidate the PTE */ 666 pmap_update_pte(pte, INTEL_PTE_VALID, 0); 667 /* propagate invalidate everywhere */ 668 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 669 /* remember reference and change */ 670 old_pte = *pte; 671 oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED)); 672 /* completely invalidate the PTE */ 673 pmap_store_pte(pte, 0); 674 675 if (IS_MANAGED_PAGE(pai)) { 676 pmap_assert(old_pa_locked == TRUE); 677 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 678 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); 679 assert(pmap->stats.resident_count >= 1); 680 OSAddAtomic(-1, &pmap->stats.resident_count); 681 if (pmap != kernel_pmap) { 682 if (IS_REUSABLE_PAGE(pai)) { 683 assert(pmap->stats.reusable > 0); 684 OSAddAtomic(-1, &pmap->stats.reusable); 685 } else if (IS_INTERNAL_PAGE(pai)) { 686 assert(pmap->stats.internal > 0); 687 OSAddAtomic(-1, &pmap->stats.internal); 688 } else { 689 assert(pmap->stats.external > 0); 690 OSAddAtomic(-1, &pmap->stats.external); 691 } 692 } 693 if (iswired(*pte)) { 694 assert(pmap->stats.wired_count >= 1); 695 OSAddAtomic(-1, &pmap->stats.wired_count); 696 pmap_ledger_debit(pmap, task_ledgers.wired_mem, 697 PAGE_SIZE); 698 } 699 pmap_phys_attributes[pai] |= oattr; 700 701 /* 702 * Remove the mapping from the pvlist for 703 * this physical page. 704 * We'll end up with either a rooted pv or a 705 * hashed pv 706 */ 707 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte); 708 709 } else { 710 711 /* 712 * old_pa is not managed. 713 * Do removal part of accounting. 714 */ 715 716 if (pmap != kernel_pmap) { 717#if 00 718 assert(pmap->stats.device > 0); 719 OSAddAtomic(-1, &pmap->stats.device); 720#endif 721 } 722 if (iswired(*pte)) { 723 assert(pmap->stats.wired_count >= 1); 724 OSAddAtomic(-1, &pmap->stats.wired_count); 725 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 726 } 727 } 728 } 729 730 /* 731 * if we had a previously managed paged locked, unlock it now 732 */ 733 if (old_pa_locked) { 734 UNLOCK_PVH(pai); 735 old_pa_locked = FALSE; 736 } 737 738 pai = pa_index(pa); /* now working with new incoming phys page */ 739 if (IS_MANAGED_PAGE(pai)) { 740 741 /* 742 * Step 2) Enter the mapping in the PV list for this 743 * physical page. 744 */ 745 pv_h = pai_to_pvh(pai); 746 747 LOCK_PVH(pai); 748 749 if (pv_h->pmap == PMAP_NULL) { 750 /* 751 * No mappings yet, use rooted pv 752 */ 753 pv_h->va = vaddr; 754 pv_h->pmap = pmap; 755 queue_init(&pv_h->qlink); 756 757 if (options & PMAP_OPTIONS_INTERNAL) { 758 pmap_phys_attributes[pai] |= PHYS_INTERNAL; 759 } else { 760 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL; 761 } 762 if (options & PMAP_OPTIONS_REUSABLE) { 763 pmap_phys_attributes[pai] |= PHYS_REUSABLE; 764 } else { 765 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE; 766 } 767 } else { 768 /* 769 * Add new pv_hashed_entry after header. 770 */ 771 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) { 772 pvh_e = pvh_new; 773 pvh_new = PV_HASHED_ENTRY_NULL; 774 } else if (PV_HASHED_ENTRY_NULL == pvh_e) { 775 PV_HASHED_ALLOC(&pvh_e); 776 if (PV_HASHED_ENTRY_NULL == pvh_e) { 777 /* 778 * the pv list is empty. if we are on 779 * the kernel pmap we'll use one of 780 * the special private kernel pv_e's, 781 * else, we need to unlock 782 * everything, zalloc a pv_e, and 783 * restart bringing in the pv_e with 784 * us. 785 */ 786 if (kernel_pmap == pmap) { 787 PV_HASHED_KERN_ALLOC(&pvh_e); 788 } else { 789 UNLOCK_PVH(pai); 790 PMAP_UNLOCK(pmap); 791 pmap_pv_throttle(pmap); 792 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); 793 goto Retry; 794 } 795 } 796 } 797 798 if (PV_HASHED_ENTRY_NULL == pvh_e) 799 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings"); 800 801 pvh_e->va = vaddr; 802 pvh_e->pmap = pmap; 803 pvh_e->ppn = pn; 804 pv_hash_add(pvh_e, pv_h); 805 806 /* 807 * Remember that we used the pvlist entry. 808 */ 809 pvh_e = PV_HASHED_ENTRY_NULL; 810 } 811 812 /* 813 * only count the mapping 814 * for 'managed memory' 815 */ 816 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 817 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); 818 OSAddAtomic(+1, &pmap->stats.resident_count); 819 if (pmap->stats.resident_count > pmap->stats.resident_max) { 820 pmap->stats.resident_max = pmap->stats.resident_count; 821 } 822 if (pmap != kernel_pmap) { 823 if (IS_REUSABLE_PAGE(pai)) { 824 OSAddAtomic(+1, &pmap->stats.reusable); 825 PMAP_STATS_PEAK(pmap->stats.reusable); 826 } else if (IS_INTERNAL_PAGE(pai)) { 827 OSAddAtomic(+1, &pmap->stats.internal); 828 PMAP_STATS_PEAK(pmap->stats.internal); 829 } else { 830 OSAddAtomic(+1, &pmap->stats.external); 831 PMAP_STATS_PEAK(pmap->stats.external); 832 } 833 } 834 } else if (last_managed_page == 0) { 835 /* Account for early mappings created before "managed pages" 836 * are determined. Consider consulting the available DRAM map. 837 */ 838 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 839 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); 840 OSAddAtomic(+1, &pmap->stats.resident_count); 841 if (pmap != kernel_pmap) { 842#if 00 843 OSAddAtomic(+1, &pmap->stats.device); 844 PMAP_STATS_PEAK(pmap->stats.device); 845#endif 846 } 847 } 848 /* 849 * Step 3) Enter the mapping. 850 * 851 * Build a template to speed up entering - 852 * only the pfn changes. 853 */ 854 template = pa_to_pte(pa) | INTEL_PTE_VALID; 855 /* 856 * DRK: It may be worth asserting on cache attribute flags that diverge 857 * from the existing physical page attributes. 858 */ 859 860 template |= pmap_get_cache_attributes(pa_index(pa)); 861 862 if (flags & VM_MEM_NOT_CACHEABLE) { 863 if (!(flags & VM_MEM_GUARDED)) 864 template |= INTEL_PTE_PTA; 865 template |= INTEL_PTE_NCACHE; 866 } 867 if (pmap != kernel_pmap) 868 template |= INTEL_PTE_USER; 869 if (prot & VM_PROT_WRITE) 870 template |= INTEL_PTE_WRITE; 871 if (set_NX) 872 template |= INTEL_PTE_NX; 873 if (wired) { 874 template |= INTEL_PTE_WIRED; 875 OSAddAtomic(+1, & pmap->stats.wired_count); 876 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 877 } 878 if (superpage) 879 template |= INTEL_PTE_PS; 880 pmap_store_pte(pte, template); 881 882 /* 883 * if this was a managed page we delayed unlocking the pv until here 884 * to prevent pmap_page_protect et al from finding it until the pte 885 * has been stored 886 */ 887 if (IS_MANAGED_PAGE(pai)) { 888 UNLOCK_PVH(pai); 889 } 890Done: 891 if (need_tlbflush == TRUE) { 892 if (options & PMAP_OPTIONS_NOFLUSH) 893 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg); 894 else 895 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 896 } 897 if (pvh_e != PV_HASHED_ENTRY_NULL) { 898 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1); 899 } 900 if (pvh_new != PV_HASHED_ENTRY_NULL) { 901 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1); 902 } 903 PMAP_UNLOCK(pmap); 904 905 if (delpage_pm_obj) { 906 vm_page_t m; 907 908 vm_object_lock(delpage_pm_obj); 909 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE)); 910 if (m == VM_PAGE_NULL) 911 panic("pmap_enter: pte page not in object"); 912 vm_object_unlock(delpage_pm_obj); 913 VM_PAGE_FREE(m); 914 OSAddAtomic(-1, &inuse_ptepages_count); 915 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE); 916 } 917 918 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0); 919 return KERN_SUCCESS; 920} 921 922/* 923 * Remove a range of hardware page-table entries. 924 * The entries given are the first (inclusive) 925 * and last (exclusive) entries for the VM pages. 926 * The virtual address is the va for the first pte. 927 * 928 * The pmap must be locked. 929 * If the pmap is not the kernel pmap, the range must lie 930 * entirely within one pte-page. This is NOT checked. 931 * Assumes that the pte-page exists. 932 */ 933 934void 935pmap_remove_range( 936 pmap_t pmap, 937 vm_map_offset_t start_vaddr, 938 pt_entry_t *spte, 939 pt_entry_t *epte) 940{ 941 pmap_remove_range_options(pmap, start_vaddr, spte, epte, 0); 942} 943 944void 945pmap_remove_range_options( 946 pmap_t pmap, 947 vm_map_offset_t start_vaddr, 948 pt_entry_t *spte, 949 pt_entry_t *epte, 950 int options) 951{ 952 pt_entry_t *cpte; 953 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; 954 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; 955 pv_hashed_entry_t pvh_e; 956 int pvh_cnt = 0; 957 int num_removed, num_unwired, num_found, num_invalid; 958 int num_device, num_external, num_internal, num_reusable; 959 uint64_t num_compressed; 960 ppnum_t pai; 961 pmap_paddr_t pa; 962 vm_map_offset_t vaddr; 963 964 num_removed = 0; 965 num_unwired = 0; 966 num_found = 0; 967 num_invalid = 0; 968 num_device = 0; 969 num_external = 0; 970 num_internal = 0; 971 num_reusable = 0; 972 num_compressed = 0; 973 /* invalidate the PTEs first to "freeze" them */ 974 for (cpte = spte, vaddr = start_vaddr; 975 cpte < epte; 976 cpte++, vaddr += PAGE_SIZE_64) { 977 pt_entry_t p = *cpte; 978 979 pa = pte_to_pa(p); 980 if (pa == 0) { 981 if (pmap != kernel_pmap && 982 (options & PMAP_OPTIONS_REMOVE) && 983 (p & INTEL_PTE_COMPRESSED)) { 984 /* one less "compressed" */ 985 num_compressed++; 986 /* clear marker */ 987 /* XXX probably does not need to be atomic! */ 988 pmap_update_pte(cpte, INTEL_PTE_COMPRESSED, 0); 989 } 990 continue; 991 } 992 num_found++; 993 994 if (iswired(p)) 995 num_unwired++; 996 997 pai = pa_index(pa); 998 999 if (!IS_MANAGED_PAGE(pai)) { 1000 /* 1001 * Outside range of managed physical memory. 1002 * Just remove the mappings. 1003 */ 1004 pmap_store_pte(cpte, 0); 1005 num_device++; 1006 continue; 1007 } 1008 1009 if ((p & INTEL_PTE_VALID) == 0) 1010 num_invalid++; 1011 1012 /* invalidate the PTE */ 1013 pmap_update_pte(cpte, INTEL_PTE_VALID, 0); 1014 } 1015 1016 if (num_found == 0) { 1017 /* nothing was changed: we're done */ 1018 goto update_counts; 1019 } 1020 1021 /* propagate the invalidates to other CPUs */ 1022 1023 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr); 1024 1025 for (cpte = spte, vaddr = start_vaddr; 1026 cpte < epte; 1027 cpte++, vaddr += PAGE_SIZE_64) { 1028 1029 pa = pte_to_pa(*cpte); 1030 if (pa == 0) 1031 continue; 1032 1033 pai = pa_index(pa); 1034 1035 LOCK_PVH(pai); 1036 1037 pa = pte_to_pa(*cpte); 1038 if (pa == 0) { 1039 UNLOCK_PVH(pai); 1040 continue; 1041 } 1042 num_removed++; 1043 if (IS_REUSABLE_PAGE(pai)) { 1044 num_reusable++; 1045 } else if (IS_INTERNAL_PAGE(pai)) { 1046 num_internal++; 1047 } else { 1048 num_external++; 1049 } 1050 1051 /* 1052 * Get the modify and reference bits, then 1053 * nuke the entry in the page table 1054 */ 1055 /* remember reference and change */ 1056 pmap_phys_attributes[pai] |= 1057 (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED)); 1058 1059 /* 1060 * Remove the mapping from the pvlist for this physical page. 1061 */ 1062 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte); 1063 1064 /* completely invalidate the PTE */ 1065 pmap_store_pte(cpte, 0); 1066 1067 UNLOCK_PVH(pai); 1068 1069 if (pvh_e != PV_HASHED_ENTRY_NULL) { 1070 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1071 pvh_eh = pvh_e; 1072 1073 if (pvh_et == PV_HASHED_ENTRY_NULL) { 1074 pvh_et = pvh_e; 1075 } 1076 pvh_cnt++; 1077 } 1078 } /* for loop */ 1079 1080 if (pvh_eh != PV_HASHED_ENTRY_NULL) { 1081 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); 1082 } 1083update_counts: 1084 /* 1085 * Update the counts 1086 */ 1087#if TESTING 1088 if (pmap->stats.resident_count < num_removed) 1089 panic("pmap_remove_range: resident_count"); 1090#endif 1091 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed)); 1092 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(num_removed)); 1093 assert(pmap->stats.resident_count >= num_removed); 1094 OSAddAtomic(-num_removed, &pmap->stats.resident_count); 1095 1096 if (pmap != kernel_pmap) { 1097#if 00 1098 assert(pmap->stats.device >= num_device); 1099 if (num_device) 1100 OSAddAtomic(-num_device, &pmap->stats.device); 1101#endif /* 00 */ 1102 assert(pmap->stats.external >= num_external); 1103 if (num_external) 1104 OSAddAtomic(-num_external, &pmap->stats.external); 1105 assert(pmap->stats.internal >= num_internal); 1106 if (num_internal) 1107 OSAddAtomic(-num_internal, &pmap->stats.internal); 1108 assert(pmap->stats.reusable >= num_reusable); 1109 if (num_reusable) 1110 OSAddAtomic(-num_reusable, &pmap->stats.reusable); 1111 assert(pmap->stats.compressed >= num_compressed); 1112 if (num_compressed) 1113 OSAddAtomic64(-num_compressed, &pmap->stats.compressed); 1114 } 1115 1116#if TESTING 1117 if (pmap->stats.wired_count < num_unwired) 1118 panic("pmap_remove_range: wired_count"); 1119#endif 1120 assert(pmap->stats.wired_count >= num_unwired); 1121 OSAddAtomic(-num_unwired, &pmap->stats.wired_count); 1122 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired)); 1123 1124 return; 1125} 1126 1127 1128/* 1129 * Remove the given range of addresses 1130 * from the specified map. 1131 * 1132 * It is assumed that the start and end are properly 1133 * rounded to the hardware page size. 1134 */ 1135void 1136pmap_remove( 1137 pmap_t map, 1138 addr64_t s64, 1139 addr64_t e64) 1140{ 1141 pmap_remove_options(map, s64, e64, 0); 1142} 1143 1144void 1145pmap_remove_options( 1146 pmap_t map, 1147 addr64_t s64, 1148 addr64_t e64, 1149 int options) 1150{ 1151 pt_entry_t *pde; 1152 pt_entry_t *spte, *epte; 1153 addr64_t l64; 1154 uint64_t deadline; 1155 1156 pmap_intr_assert(); 1157 1158 if (map == PMAP_NULL || s64 == e64) 1159 return; 1160 1161 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START, 1162 map, 1163 (uint32_t) (s64 >> 32), s64, 1164 (uint32_t) (e64 >> 32), e64); 1165 1166 1167 PMAP_LOCK(map); 1168 1169#if 0 1170 /* 1171 * Check that address range in the kernel does not overlap the stacks. 1172 * We initialize local static min/max variables once to avoid making 1173 * 2 function calls for every remove. Note also that these functions 1174 * both return 0 before kernel stacks have been initialized, and hence 1175 * the panic is not triggered in this case. 1176 */ 1177 if (map == kernel_pmap) { 1178 static vm_offset_t kernel_stack_min = 0; 1179 static vm_offset_t kernel_stack_max = 0; 1180 1181 if (kernel_stack_min == 0) { 1182 kernel_stack_min = min_valid_stack_address(); 1183 kernel_stack_max = max_valid_stack_address(); 1184 } 1185 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) || 1186 (kernel_stack_min < e64 && e64 <= kernel_stack_max)) 1187 panic("pmap_remove() attempted in kernel stack"); 1188 } 1189#else 1190 1191 /* 1192 * The values of kernel_stack_min and kernel_stack_max are no longer 1193 * relevant now that we allocate kernel stacks in the kernel map, 1194 * so the old code above no longer applies. If we wanted to check that 1195 * we weren't removing a mapping of a page in a kernel stack we'd 1196 * mark the PTE with an unused bit and check that here. 1197 */ 1198 1199#endif 1200 1201 deadline = rdtsc64() + max_preemption_latency_tsc; 1202 1203 while (s64 < e64) { 1204 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); 1205 if (l64 > e64) 1206 l64 = e64; 1207 pde = pmap_pde(map, s64); 1208 1209 if (pde && (*pde & INTEL_PTE_VALID)) { 1210 if (*pde & INTEL_PTE_PS) { 1211 /* 1212 * If we're removing a superpage, pmap_remove_range() 1213 * must work on level 2 instead of level 1; and we're 1214 * only passing a single level 2 entry instead of a 1215 * level 1 range. 1216 */ 1217 spte = pde; 1218 epte = spte+1; /* excluded */ 1219 } else { 1220 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1))); 1221 spte = &spte[ptenum(s64)]; 1222 epte = &spte[intel_btop(l64 - s64)]; 1223 } 1224 pmap_remove_range_options(map, s64, spte, epte, 1225 options); 1226 } 1227 s64 = l64; 1228 1229 if (s64 < e64 && rdtsc64() >= deadline) { 1230 PMAP_UNLOCK(map) 1231 PMAP_LOCK(map) 1232 deadline = rdtsc64() + max_preemption_latency_tsc; 1233 } 1234 } 1235 1236 PMAP_UNLOCK(map); 1237 1238 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END, 1239 map, 0, 0, 0, 0); 1240 1241} 1242 1243void 1244pmap_page_protect( 1245 ppnum_t pn, 1246 vm_prot_t prot) 1247{ 1248 pmap_page_protect_options(pn, prot, 0, NULL); 1249} 1250 1251/* 1252 * Routine: pmap_page_protect_options 1253 * 1254 * Function: 1255 * Lower the permission for all mappings to a given 1256 * page. 1257 */ 1258void 1259pmap_page_protect_options( 1260 ppnum_t pn, 1261 vm_prot_t prot, 1262 unsigned int options, 1263 void *arg) 1264{ 1265 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; 1266 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; 1267 pv_hashed_entry_t nexth; 1268 int pvh_cnt = 0; 1269 pv_rooted_entry_t pv_h; 1270 pv_rooted_entry_t pv_e; 1271 pv_hashed_entry_t pvh_e; 1272 pt_entry_t *pte; 1273 int pai; 1274 pmap_t pmap; 1275 boolean_t remove; 1276 pt_entry_t new_pte_value; 1277 1278 pmap_intr_assert(); 1279 assert(pn != vm_page_fictitious_addr); 1280 if (pn == vm_page_guard_addr) 1281 return; 1282 1283 pai = ppn_to_pai(pn); 1284 1285 if (!IS_MANAGED_PAGE(pai)) { 1286 /* 1287 * Not a managed page. 1288 */ 1289 return; 1290 } 1291 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, 1292 pn, prot, 0, 0, 0); 1293 1294 /* 1295 * Determine the new protection. 1296 */ 1297 switch (prot) { 1298 case VM_PROT_READ: 1299 case VM_PROT_READ | VM_PROT_EXECUTE: 1300 remove = FALSE; 1301 break; 1302 case VM_PROT_ALL: 1303 return; /* nothing to do */ 1304 default: 1305 remove = TRUE; 1306 break; 1307 } 1308 1309 pv_h = pai_to_pvh(pai); 1310 1311 LOCK_PVH(pai); 1312 1313 1314 /* 1315 * Walk down PV list, if any, changing or removing all mappings. 1316 */ 1317 if (pv_h->pmap == PMAP_NULL) 1318 goto done; 1319 1320 pv_e = pv_h; 1321 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */ 1322 1323 do { 1324 vm_map_offset_t vaddr; 1325 1326 pmap = pv_e->pmap; 1327 vaddr = pv_e->va; 1328 pte = pmap_pte(pmap, vaddr); 1329 1330 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn), 1331 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte); 1332 1333 if (0 == pte) { 1334 panic("pmap_page_protect() " 1335 "pmap=%p pn=0x%x vaddr=0x%llx\n", 1336 pmap, pn, vaddr); 1337 } 1338 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink); 1339 1340 /* 1341 * Remove the mapping if new protection is NONE 1342 */ 1343 if (remove) { 1344 1345 /* Remove per-pmap wired count */ 1346 if (iswired(*pte)) { 1347 OSAddAtomic(-1, &pmap->stats.wired_count); 1348 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 1349 } 1350 1351 if (pmap != kernel_pmap && 1352 (options & PMAP_OPTIONS_COMPRESSOR) && 1353 IS_INTERNAL_PAGE(pai)) { 1354 /* adjust "reclaimed" stats */ 1355 OSAddAtomic64(+1, &pmap->stats.compressed); 1356 PMAP_STATS_PEAK(pmap->stats.compressed); 1357 pmap->stats.compressed_lifetime++; 1358 /* mark this PTE as having been "reclaimed" */ 1359 new_pte_value = INTEL_PTE_COMPRESSED; 1360 } else { 1361 new_pte_value = 0; 1362 } 1363 1364 if (options & PMAP_OPTIONS_NOREFMOD) { 1365 pmap_store_pte(pte, new_pte_value); 1366 1367 if (options & PMAP_OPTIONS_NOFLUSH) 1368 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg); 1369 else 1370 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 1371 } else { 1372 /* 1373 * Remove the mapping, collecting dirty bits. 1374 */ 1375 pmap_update_pte(pte, INTEL_PTE_VALID, 0); 1376 1377 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); 1378 pmap_phys_attributes[pai] |= 1379 *pte & (PHYS_MODIFIED|PHYS_REFERENCED); 1380 pmap_store_pte(pte, new_pte_value); 1381 } 1382#if TESTING 1383 if (pmap->stats.resident_count < 1) 1384 panic("pmap_page_protect: resident_count"); 1385#endif 1386 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 1387 assert(pmap->stats.resident_count >= 1); 1388 OSAddAtomic(-1, &pmap->stats.resident_count); 1389 if (options & PMAP_OPTIONS_COMPRESSOR) { 1390 /* 1391 * This removal is only being done so we can send this page to 1392 * the compressor; therefore it mustn't affect total task footprint. 1393 */ 1394 pmap_ledger_credit(pmap, task_ledgers.phys_compressed, PAGE_SIZE); 1395 } else { 1396 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); 1397 } 1398 1399 if (pmap != kernel_pmap) { 1400 if (IS_REUSABLE_PAGE(pai)) { 1401 assert(pmap->stats.reusable > 0); 1402 OSAddAtomic(-1, &pmap->stats.reusable); 1403 } else if (IS_INTERNAL_PAGE(pai)) { 1404 assert(pmap->stats.internal > 0); 1405 OSAddAtomic(-1, &pmap->stats.internal); 1406 } else { 1407 assert(pmap->stats.external > 0); 1408 OSAddAtomic(-1, &pmap->stats.external); 1409 } 1410 } 1411 1412 /* 1413 * Deal with the pv_rooted_entry. 1414 */ 1415 1416 if (pv_e == pv_h) { 1417 /* 1418 * Fix up head later. 1419 */ 1420 pv_h->pmap = PMAP_NULL; 1421 } else { 1422 /* 1423 * Delete this entry. 1424 */ 1425 pv_hash_remove(pvh_e); 1426 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1427 pvh_eh = pvh_e; 1428 1429 if (pvh_et == PV_HASHED_ENTRY_NULL) 1430 pvh_et = pvh_e; 1431 pvh_cnt++; 1432 } 1433 } else { 1434 /* 1435 * Write-protect, after opportunistic refmod collect 1436 */ 1437 pmap_phys_attributes[pai] |= 1438 *pte & (PHYS_MODIFIED|PHYS_REFERENCED); 1439 pmap_update_pte(pte, INTEL_PTE_WRITE, 0); 1440 1441 if (options & PMAP_OPTIONS_NOFLUSH) 1442 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg); 1443 else 1444 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); 1445 } 1446 pvh_e = nexth; 1447 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h); 1448 1449 1450 /* 1451 * If pv_head mapping was removed, fix it up. 1452 */ 1453 if (pv_h->pmap == PMAP_NULL) { 1454 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink); 1455 1456 if (pvh_e != (pv_hashed_entry_t) pv_h) { 1457 pv_hash_remove(pvh_e); 1458 pv_h->pmap = pvh_e->pmap; 1459 pv_h->va = pvh_e->va; 1460 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1461 pvh_eh = pvh_e; 1462 1463 if (pvh_et == PV_HASHED_ENTRY_NULL) 1464 pvh_et = pvh_e; 1465 pvh_cnt++; 1466 } 1467 } 1468 if (pvh_eh != PV_HASHED_ENTRY_NULL) { 1469 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); 1470 } 1471done: 1472 UNLOCK_PVH(pai); 1473 1474 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END, 1475 0, 0, 0, 0, 0); 1476} 1477 1478 1479/* 1480 * Clear specified attribute bits. 1481 */ 1482void 1483phys_attribute_clear( 1484 ppnum_t pn, 1485 int bits, 1486 unsigned int options, 1487 void *arg) 1488{ 1489 pv_rooted_entry_t pv_h; 1490 pv_hashed_entry_t pv_e; 1491 pt_entry_t *pte; 1492 int pai; 1493 pmap_t pmap; 1494 char attributes = 0; 1495 1496 pmap_intr_assert(); 1497 assert(pn != vm_page_fictitious_addr); 1498 if (pn == vm_page_guard_addr) 1499 return; 1500 1501 pai = ppn_to_pai(pn); 1502 1503 if (!IS_MANAGED_PAGE(pai)) { 1504 /* 1505 * Not a managed page. 1506 */ 1507 return; 1508 } 1509 1510 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, 1511 pn, bits, 0, 0, 0); 1512 1513 pv_h = pai_to_pvh(pai); 1514 1515 LOCK_PVH(pai); 1516 1517 /* 1518 * Walk down PV list, clearing all modify or reference bits. 1519 * We do not have to lock the pv_list because we have 1520 * the per-pmap lock 1521 */ 1522 if (pv_h->pmap != PMAP_NULL) { 1523 /* 1524 * There are some mappings. 1525 */ 1526 1527 pv_e = (pv_hashed_entry_t)pv_h; 1528 1529 do { 1530 vm_map_offset_t va; 1531 1532 pmap = pv_e->pmap; 1533 va = pv_e->va; 1534 1535 /* 1536 * Clear modify and/or reference bits. 1537 */ 1538 pte = pmap_pte(pmap, va); 1539 attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); 1540 pmap_update_pte(pte, bits, 0); 1541 /* Ensure all processors using this translation 1542 * invalidate this TLB entry. The invalidation *must* 1543 * follow the PTE update, to ensure that the TLB 1544 * shadow of the 'D' bit (in particular) is 1545 * synchronized with the updated PTE. 1546 */ 1547 if (options & PMAP_OPTIONS_NOFLUSH) { 1548 if (arg) 1549 PMAP_UPDATE_TLBS_DELAYED(pmap, va, va + PAGE_SIZE, (pmap_flush_context *)arg); 1550 } else 1551 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); 1552 1553 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); 1554 1555 } while (pv_e != (pv_hashed_entry_t)pv_h); 1556 } 1557 /* Opportunistic refmod collection, annulled 1558 * if both REF and MOD are being cleared. 1559 */ 1560 1561 pmap_phys_attributes[pai] |= attributes; 1562 pmap_phys_attributes[pai] &= (~bits); 1563 1564 UNLOCK_PVH(pai); 1565 1566 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END, 1567 0, 0, 0, 0, 0); 1568} 1569 1570/* 1571 * Check specified attribute bits. 1572 */ 1573int 1574phys_attribute_test( 1575 ppnum_t pn, 1576 int bits) 1577{ 1578 pv_rooted_entry_t pv_h; 1579 pv_hashed_entry_t pv_e; 1580 pt_entry_t *pte; 1581 int pai; 1582 pmap_t pmap; 1583 int attributes = 0; 1584 1585 pmap_intr_assert(); 1586 assert(pn != vm_page_fictitious_addr); 1587 if (pn == vm_page_guard_addr) 1588 return 0; 1589 1590 pai = ppn_to_pai(pn); 1591 1592 if (!IS_MANAGED_PAGE(pai)) { 1593 /* 1594 * Not a managed page. 1595 */ 1596 return 0; 1597 } 1598 1599 /* 1600 * Fast check... if bits already collected 1601 * no need to take any locks... 1602 * if not set, we need to recheck after taking 1603 * the lock in case they got pulled in while 1604 * we were waiting for the lock 1605 */ 1606 if ((pmap_phys_attributes[pai] & bits) == bits) 1607 return bits; 1608 1609 pv_h = pai_to_pvh(pai); 1610 1611 LOCK_PVH(pai); 1612 1613 attributes = pmap_phys_attributes[pai] & bits; 1614 1615 1616 /* 1617 * Walk down PV list, checking the mappings until we 1618 * reach the end or we've found the desired attributes. 1619 */ 1620 if (attributes != bits && 1621 pv_h->pmap != PMAP_NULL) { 1622 /* 1623 * There are some mappings. 1624 */ 1625 pv_e = (pv_hashed_entry_t)pv_h; 1626 do { 1627 vm_map_offset_t va; 1628 1629 pmap = pv_e->pmap; 1630 va = pv_e->va; 1631 /* 1632 * pick up modify and/or reference bits from mapping 1633 */ 1634 1635 pte = pmap_pte(pmap, va); 1636 attributes |= (int)(*pte & bits); 1637 1638 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); 1639 1640 } while ((attributes != bits) && 1641 (pv_e != (pv_hashed_entry_t)pv_h)); 1642 } 1643 pmap_phys_attributes[pai] |= attributes; 1644 1645 UNLOCK_PVH(pai); 1646 return (attributes); 1647} 1648 1649/* 1650 * Routine: pmap_change_wiring 1651 * Function: Change the wiring attribute for a map/virtual-address 1652 * pair. 1653 * In/out conditions: 1654 * The mapping must already exist in the pmap. 1655 */ 1656void 1657pmap_change_wiring( 1658 pmap_t map, 1659 vm_map_offset_t vaddr, 1660 boolean_t wired) 1661{ 1662 pt_entry_t *pte; 1663 1664 PMAP_LOCK(map); 1665 1666 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) 1667 panic("pmap_change_wiring: pte missing"); 1668 1669 if (wired && !iswired(*pte)) { 1670 /* 1671 * wiring down mapping 1672 */ 1673 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE); 1674 OSAddAtomic(+1, &map->stats.wired_count); 1675 pmap_update_pte(pte, 0, INTEL_PTE_WIRED); 1676 } 1677 else if (!wired && iswired(*pte)) { 1678 /* 1679 * unwiring mapping 1680 */ 1681 assert(map->stats.wired_count >= 1); 1682 OSAddAtomic(-1, &map->stats.wired_count); 1683 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE); 1684 pmap_update_pte(pte, INTEL_PTE_WIRED, 0); 1685 } 1686 1687 PMAP_UNLOCK(map); 1688} 1689 1690/* 1691 * "Backdoor" direct map routine for early mappings. 1692 * Useful for mapping memory outside the range 1693 * Sets A, D and NC if requested 1694 */ 1695 1696vm_offset_t 1697pmap_map_bd( 1698 vm_offset_t virt, 1699 vm_map_offset_t start_addr, 1700 vm_map_offset_t end_addr, 1701 vm_prot_t prot, 1702 unsigned int flags) 1703{ 1704 pt_entry_t template; 1705 pt_entry_t *pte; 1706 spl_t spl; 1707 vm_offset_t base = virt; 1708 template = pa_to_pte(start_addr) 1709 | INTEL_PTE_REF 1710 | INTEL_PTE_MOD 1711 | INTEL_PTE_WIRED 1712 | INTEL_PTE_VALID; 1713 1714 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) { 1715 template |= INTEL_PTE_NCACHE; 1716 if (!(flags & (VM_MEM_GUARDED))) 1717 template |= INTEL_PTE_PTA; 1718 } 1719 1720#if defined(__x86_64__) 1721 if ((prot & VM_PROT_EXECUTE) == 0) 1722 template |= INTEL_PTE_NX; 1723#endif 1724 1725 if (prot & VM_PROT_WRITE) 1726 template |= INTEL_PTE_WRITE; 1727 1728 while (start_addr < end_addr) { 1729 spl = splhigh(); 1730 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); 1731 if (pte == PT_ENTRY_NULL) { 1732 panic("pmap_map_bd: Invalid kernel address\n"); 1733 } 1734 pmap_store_pte(pte, template); 1735 splx(spl); 1736 pte_increment_pa(template); 1737 virt += PAGE_SIZE; 1738 start_addr += PAGE_SIZE; 1739 } 1740 flush_tlb_raw(); 1741 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr); 1742 return(virt); 1743} 1744 1745void 1746pmap_reusable( 1747 pmap_t pmap, 1748 addr64_t s64, 1749 addr64_t e64, 1750 boolean_t reusable) 1751{ 1752 pt_entry_t *pde; 1753 pt_entry_t *spte, *epte; 1754 addr64_t l64; 1755 uint64_t deadline; 1756 1757 pmap_intr_assert(); 1758 1759 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) 1760 return; 1761 1762 PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_START, 1763 pmap, 1764 (uint32_t) (s64 >> 32), s64, 1765 (uint32_t) (e64 >> 32), e64); 1766 1767 PMAP_LOCK(pmap); 1768 1769 deadline = rdtsc64() + max_preemption_latency_tsc; 1770 1771 while (s64 < e64) { 1772 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); 1773 if (l64 > e64) 1774 l64 = e64; 1775 pde = pmap_pde(pmap, s64); 1776 1777 if (pde && (*pde & INTEL_PTE_VALID)) { 1778 if (*pde & INTEL_PTE_PS) { 1779 /* superpage: not supported */ 1780 } else { 1781 spte = pmap_pte(pmap, 1782 (s64 & ~(pde_mapped_size - 1))); 1783 spte = &spte[ptenum(s64)]; 1784 epte = &spte[intel_btop(l64 - s64)]; 1785 pmap_reusable_range(pmap, s64, spte, epte, 1786 reusable); 1787 } 1788 } 1789 s64 = l64; 1790 1791 if (s64 < e64 && rdtsc64() >= deadline) { 1792 PMAP_UNLOCK(pmap); 1793 PMAP_LOCK(pmap); 1794 deadline = rdtsc64() + max_preemption_latency_tsc; 1795 } 1796 } 1797 1798 PMAP_UNLOCK(pmap); 1799 1800 PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_END, 1801 pmap, reusable, 0, 0, 0); 1802} 1803 1804void 1805pmap_reusable_range( 1806 pmap_t pmap, 1807 vm_map_offset_t start_vaddr, 1808 pt_entry_t *spte, 1809 pt_entry_t *epte, 1810 boolean_t reusable) 1811{ 1812 pt_entry_t *cpte; 1813 int num_external, num_internal, num_reusable; 1814 ppnum_t pai; 1815 pmap_paddr_t pa; 1816 vm_map_offset_t vaddr; 1817 1818 num_external = 0; 1819 num_internal = 0; 1820 num_reusable = 0; 1821 1822 for (cpte = spte, vaddr = start_vaddr; 1823 cpte < epte; 1824 cpte++, vaddr += PAGE_SIZE_64) { 1825 1826 pa = pte_to_pa(*cpte); 1827 if (pa == 0) 1828 continue; 1829 1830 pai = pa_index(pa); 1831 1832 LOCK_PVH(pai); 1833 1834 pa = pte_to_pa(*cpte); 1835 if (pa == 0) { 1836 UNLOCK_PVH(pai); 1837 continue; 1838 } 1839 if (reusable) { 1840 /* we want to set "reusable" */ 1841 if (IS_REUSABLE_PAGE(pai)) { 1842 /* already reusable: no change */ 1843 } else { 1844 pmap_phys_attributes[pai] |= PHYS_REUSABLE; 1845 /* one more "reusable" */ 1846 num_reusable++; 1847 if (IS_INTERNAL_PAGE(pai)) { 1848 /* one less "internal" */ 1849 num_internal--; 1850 } else { 1851 /* one less "external" */ 1852 num_external--; 1853 } 1854 } 1855 } else { 1856 /* we want to clear "reusable" */ 1857 if (IS_REUSABLE_PAGE(pai)) { 1858 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE; 1859 /* one less "reusable" */ 1860 num_reusable--; 1861 if (IS_INTERNAL_PAGE(pai)) { 1862 /* one more "internal" */ 1863 num_internal++; 1864 } else { 1865 /* one more "external" */ 1866 num_external++; 1867 } 1868 } else { 1869 /* already not reusable: no change */ 1870 } 1871 } 1872 1873 UNLOCK_PVH(pai); 1874 1875 } /* for loop */ 1876 1877 /* 1878 * Update the counts 1879 */ 1880 if (pmap != kernel_pmap) { 1881 if (num_external) { 1882 OSAddAtomic(num_external, &pmap->stats.external); 1883 PMAP_STATS_PEAK(pmap->stats.external); 1884 } 1885 assert(pmap->stats.external >= 0); 1886 if (num_internal) { 1887 OSAddAtomic(num_internal, &pmap->stats.internal); 1888 PMAP_STATS_PEAK(pmap->stats.internal); 1889 } 1890 assert(pmap->stats.internal >= 0); 1891 if (num_reusable) { 1892 OSAddAtomic(num_reusable, &pmap->stats.reusable); 1893 PMAP_STATS_PEAK(pmap->stats.reusable); 1894 } 1895 assert(pmap->stats.reusable >= 0); 1896 } 1897 1898 return; 1899} 1900 1901unsigned int 1902pmap_query_resident( 1903 pmap_t pmap, 1904 addr64_t s64, 1905 addr64_t e64) 1906{ 1907 pt_entry_t *pde; 1908 pt_entry_t *spte, *epte; 1909 addr64_t l64; 1910 uint64_t deadline; 1911 unsigned int result; 1912 1913 pmap_intr_assert(); 1914 1915 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) 1916 return 0; 1917 1918 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START, 1919 pmap, 1920 (uint32_t) (s64 >> 32), s64, 1921 (uint32_t) (e64 >> 32), e64); 1922 1923 result = 0; 1924 1925 PMAP_LOCK(pmap); 1926 1927 deadline = rdtsc64() + max_preemption_latency_tsc; 1928 1929 while (s64 < e64) { 1930 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); 1931 if (l64 > e64) 1932 l64 = e64; 1933 pde = pmap_pde(pmap, s64); 1934 1935 if (pde && (*pde & INTEL_PTE_VALID)) { 1936 if (*pde & INTEL_PTE_PS) { 1937 /* superpage: not supported */ 1938 } else { 1939 spte = pmap_pte(pmap, 1940 (s64 & ~(pde_mapped_size - 1))); 1941 spte = &spte[ptenum(s64)]; 1942 epte = &spte[intel_btop(l64 - s64)]; 1943 1944 for (; spte < epte; spte++) { 1945 if (pte_to_pa(*spte) != 0) { 1946 result++; 1947 } 1948 } 1949 1950 } 1951 } 1952 s64 = l64; 1953 1954 if (s64 < e64 && rdtsc64() >= deadline) { 1955 PMAP_UNLOCK(pmap); 1956 PMAP_LOCK(pmap); 1957 deadline = rdtsc64() + max_preemption_latency_tsc; 1958 } 1959 } 1960 1961 PMAP_UNLOCK(pmap); 1962 1963 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END, 1964 pmap, 0, 0, 0, 0); 1965 1966 return result; 1967} 1968