1/* 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29#include <mach_assert.h> 30 31#include <vm/pmap.h> 32#include <vm/vm_map.h> 33#include <kern/ledger.h> 34#include <i386/pmap_internal.h> 35 36void pmap_remove_range( 37 pmap_t pmap, 38 vm_map_offset_t va, 39 pt_entry_t *spte, 40 pt_entry_t *epte); 41 42void pmap_remove_range_options( 43 pmap_t pmap, 44 vm_map_offset_t va, 45 pt_entry_t *spte, 46 pt_entry_t *epte, 47 int options); 48 49void pmap_reusable_range( 50 pmap_t pmap, 51 vm_map_offset_t va, 52 pt_entry_t *spte, 53 pt_entry_t *epte, 54 boolean_t reusable); 55 56uint32_t pmap_update_clear_pte_count; 57 58/* 59 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time, 60 * on a NBPDE boundary. 61 */ 62 63/* These symbols may be referenced directly by VM */ 64uint64_t pmap_nesting_size_min = NBPDE; 65uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE; 66 67/* 68 * kern_return_t pmap_nest(grand, subord, va_start, size) 69 * 70 * grand = the pmap that we will nest subord into 71 * subord = the pmap that goes into the grand 72 * va_start = start of range in pmap to be inserted 73 * nstart = start of range in pmap nested pmap 74 * size = Size of nest area (up to 16TB) 75 * 76 * Inserts a pmap into another. This is used to implement shared segments. 77 * 78 * Note that we depend upon higher level VM locks to insure that things don't change while 79 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting 80 * or do 2 nests at once. 81 */ 82 83/* 84 * This routine can nest subtrees either at the PDPT level (1GiB) or at the 85 * PDE level (2MiB). We currently disallow disparate offsets for the "subord" 86 * container and the "grand" parent. A minor optimization to consider for the 87 * future: make the "subord" truly a container rather than a full-fledged 88 * pagetable hierarchy which can be unnecessarily sparse (DRK). 89 */ 90 91kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) { 92 vm_map_offset_t vaddr, nvaddr; 93 pd_entry_t *pde,*npde; 94 unsigned int i; 95 uint64_t num_pde; 96 97 if ((size & (pmap_nesting_size_min-1)) || 98 (va_start & (pmap_nesting_size_min-1)) || 99 (nstart & (pmap_nesting_size_min-1)) || 100 ((size >> 28) > 65536)) /* Max size we can nest is 16TB */ 101 return KERN_INVALID_VALUE; 102 103 if(size == 0) { 104 panic("pmap_nest: size is invalid - %016llX\n", size); 105 } 106 107 if (va_start != nstart) 108 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart); 109 110 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START, 111 (uintptr_t) grand, (uintptr_t) subord, 112 (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0); 113 114 nvaddr = (vm_map_offset_t)nstart; 115 num_pde = size >> PDESHIFT; 116 117 PMAP_LOCK(subord); 118 119 subord->pm_shared = TRUE; 120 121 for (i = 0; i < num_pde;) { 122 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) { 123 124 npde = pmap64_pdpt(subord, nvaddr); 125 126 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) { 127 PMAP_UNLOCK(subord); 128 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE); 129 PMAP_LOCK(subord); 130 npde = pmap64_pdpt(subord, nvaddr); 131 } 132 *npde |= INTEL_PDPTE_NESTED; 133 nvaddr += NBPDPT; 134 i += (uint32_t)NPDEPG; 135 } 136 else { 137 npde = pmap_pde(subord, nvaddr); 138 139 while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) { 140 PMAP_UNLOCK(subord); 141 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE); 142 PMAP_LOCK(subord); 143 npde = pmap_pde(subord, nvaddr); 144 } 145 nvaddr += NBPDE; 146 i++; 147 } 148 } 149 150 PMAP_UNLOCK(subord); 151 152 vaddr = (vm_map_offset_t)va_start; 153 154 PMAP_LOCK(grand); 155 156 for (i = 0;i < num_pde;) { 157 pd_entry_t tpde; 158 159 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) { 160 npde = pmap64_pdpt(subord, vaddr); 161 if (npde == 0) 162 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr); 163 tpde = *npde; 164 pde = pmap64_pdpt(grand, vaddr); 165 if (0 == pde) { 166 PMAP_UNLOCK(grand); 167 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE); 168 PMAP_LOCK(grand); 169 pde = pmap64_pdpt(grand, vaddr); 170 } 171 if (pde == 0) 172 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand, vaddr); 173 pmap_store_pte(pde, tpde); 174 vaddr += NBPDPT; 175 i += (uint32_t) NPDEPG; 176 } 177 else { 178 npde = pmap_pde(subord, nstart); 179 if (npde == 0) 180 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart); 181 tpde = *npde; 182 nstart += NBPDE; 183 pde = pmap_pde(grand, vaddr); 184 if ((0 == pde) && cpu_64bit) { 185 PMAP_UNLOCK(grand); 186 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE); 187 PMAP_LOCK(grand); 188 pde = pmap_pde(grand, vaddr); 189 } 190 191 if (pde == 0) 192 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr); 193 vaddr += NBPDE; 194 pmap_store_pte(pde, tpde); 195 i++; 196 } 197 } 198 199 PMAP_UNLOCK(grand); 200 201 PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0); 202 203 return KERN_SUCCESS; 204} 205 206/* 207 * kern_return_t pmap_unnest(grand, vaddr) 208 * 209 * grand = the pmap that we will un-nest subord from 210 * vaddr = start of range in pmap to be unnested 211 * 212 * Removes a pmap from another. This is used to implement shared segments. 213 */ 214 215kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) { 216 217 pd_entry_t *pde; 218 unsigned int i; 219 uint64_t num_pde; 220 addr64_t va_start, va_end; 221 uint64_t npdpt = PMAP_INVALID_PDPTNUM; 222 223 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START, 224 (uintptr_t) grand, 225 (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0); 226 227 if ((size & (pmap_nesting_size_min-1)) || 228 (vaddr & (pmap_nesting_size_min-1))) { 229 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n", 230 grand, vaddr, size); 231 } 232 233 /* align everything to PDE boundaries */ 234 va_start = vaddr & ~(NBPDE-1); 235 va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1); 236 size = va_end - va_start; 237 238 PMAP_LOCK(grand); 239 240 num_pde = size >> PDESHIFT; 241 vaddr = va_start; 242 243 for (i = 0; i < num_pde; ) { 244 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) { 245 npdpt = pdptnum(grand, vaddr); 246 pde = pmap64_pdpt(grand, vaddr); 247 if (pde && (*pde & INTEL_PDPTE_NESTED)) { 248 pmap_store_pte(pde, (pd_entry_t)0); 249 i += (uint32_t) NPDEPG; 250 vaddr += NBPDPT; 251 continue; 252 } 253 } 254 pde = pmap_pde(grand, (vm_map_offset_t)vaddr); 255 if (pde == 0) 256 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr); 257 pmap_store_pte(pde, (pd_entry_t)0); 258 i++; 259 vaddr += NBPDE; 260 } 261 262 PMAP_UPDATE_TLBS(grand, va_start, va_end); 263 264 PMAP_UNLOCK(grand); 265 266 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0); 267 268 return KERN_SUCCESS; 269} 270 271/* Invoked by the Mach VM to determine the platform specific unnest region */ 272 273boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) { 274 pd_entry_t *pdpte; 275 boolean_t rval = FALSE; 276 277 if (!cpu_64bit) 278 return rval; 279 280 PMAP_LOCK(p); 281 282 pdpte = pmap64_pdpt(p, *s); 283 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) { 284 *s &= ~(NBPDPT -1); 285 rval = TRUE; 286 } 287 288 pdpte = pmap64_pdpt(p, *e); 289 if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) { 290 *e = ((*e + NBPDPT) & ~(NBPDPT -1)); 291 rval = TRUE; 292 } 293 294 PMAP_UNLOCK(p); 295 296 return rval; 297} 298 299/* 300 * pmap_find_phys returns the (4K) physical page number containing a 301 * given virtual address in a given pmap. 302 * Note that pmap_pte may return a pde if this virtual address is 303 * mapped by a large page and this is taken into account in order 304 * to return the correct page number in this case. 305 */ 306ppnum_t 307pmap_find_phys(pmap_t pmap, addr64_t va) 308{ 309 pt_entry_t *ptp; 310 pd_entry_t *pdep; 311 ppnum_t ppn = 0; 312 pd_entry_t pde; 313 pt_entry_t pte; 314 315 mp_disable_preemption(); 316 317 /* This refcount test is a band-aid--several infrastructural changes 318 * are necessary to eliminate invocation of this routine from arbitrary 319 * contexts. 320 */ 321 322 if (!pmap->ref_count) 323 goto pfp_exit; 324 325 pdep = pmap_pde(pmap, va); 326 327 if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) { 328 if (pde & INTEL_PTE_PS) { 329 ppn = (ppnum_t) i386_btop(pte_to_pa(pde)); 330 ppn += (ppnum_t) ptenum(va); 331 } 332 else { 333 ptp = pmap_pte(pmap, va); 334 if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) { 335 ppn = (ppnum_t) i386_btop(pte_to_pa(pte)); 336 } 337 } 338 } 339pfp_exit: 340 mp_enable_preemption(); 341 342 return ppn; 343} 344 345/* 346 * Update cache attributes for all extant managed mappings. 347 * Assumes PV for this page is locked, and that the page 348 * is managed. 349 */ 350 351void 352pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) { 353 pv_rooted_entry_t pv_h, pv_e; 354 pv_hashed_entry_t pvh_e, nexth; 355 vm_map_offset_t vaddr; 356 pmap_t pmap; 357 pt_entry_t *ptep; 358 359 assert(IS_MANAGED_PAGE(pn)); 360 361 pv_h = pai_to_pvh(pn); 362 /* TODO: translate the PHYS_* bits to PTE bits, while they're 363 * currently identical, they may not remain so 364 * Potential optimization (here and in page_protect), 365 * parallel shootdowns, check for redundant 366 * attribute modifications. 367 */ 368 369 /* 370 * Alter attributes on all mappings 371 */ 372 if (pv_h->pmap != PMAP_NULL) { 373 pv_e = pv_h; 374 pvh_e = (pv_hashed_entry_t)pv_e; 375 376 do { 377 pmap = pv_e->pmap; 378 vaddr = pv_e->va; 379 ptep = pmap_pte(pmap, vaddr); 380 381 if (0 == ptep) 382 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap); 383 384 nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink); 385 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes); 386 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 387 pvh_e = nexth; 388 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h); 389 } 390} 391 392void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) { 393 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); 394 395 if (dofilter) { 396 CPU_CR3_MARK_INACTIVE(); 397 } else { 398 CPU_CR3_MARK_ACTIVE(); 399 mfence(); 400 if (current_cpu_datap()->cpu_tlb_invalid) 401 process_pmap_updates(); 402 } 403} 404 405 406/* 407 * Insert the given physical page (p) at 408 * the specified virtual address (v) in the 409 * target physical map with the protection requested. 410 * 411 * If specified, the page will be wired down, meaning 412 * that the related pte cannot be reclaimed. 413 * 414 * NB: This is the only routine which MAY NOT lazy-evaluate 415 * or lose information. That is, this routine must actually 416 * insert this page into the given map NOW. 417 */ 418 419void 420pmap_enter( 421 register pmap_t pmap, 422 vm_map_offset_t vaddr, 423 ppnum_t pn, 424 vm_prot_t prot, 425 vm_prot_t fault_type, 426 unsigned int flags, 427 boolean_t wired) 428{ 429 (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL); 430} 431 432 433kern_return_t 434pmap_enter_options( 435 register pmap_t pmap, 436 vm_map_offset_t vaddr, 437 ppnum_t pn, 438 vm_prot_t prot, 439 __unused vm_prot_t fault_type, 440 unsigned int flags, 441 boolean_t wired, 442 unsigned int options, 443 void *arg) 444{ 445 pt_entry_t *pte; 446 pv_rooted_entry_t pv_h; 447 ppnum_t pai; 448 pv_hashed_entry_t pvh_e; 449 pv_hashed_entry_t pvh_new; 450 pt_entry_t template; 451 pmap_paddr_t old_pa; 452 pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn); 453 boolean_t need_tlbflush = FALSE; 454 boolean_t set_NX; 455 char oattr; 456 boolean_t old_pa_locked; 457 /* 2MiB mappings are confined to x86_64 by VM */ 458 boolean_t superpage = flags & VM_MEM_SUPERPAGE; 459 vm_object_t delpage_pm_obj = NULL; 460 uint64_t delpage_pde_index = 0; 461 pt_entry_t old_pte; 462 kern_return_t kr_expand; 463 464 pmap_intr_assert(); 465 466 if (pmap == PMAP_NULL) 467 return KERN_INVALID_ARGUMENT; 468 469 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an 470 * unused value for that scenario. 471 */ 472 assert(pn != vm_page_fictitious_addr); 473 474 if (pn == vm_page_guard_addr) 475 return KERN_INVALID_ARGUMENT; 476 477 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START, 478 pmap, 479 (uint32_t) (vaddr >> 32), (uint32_t) vaddr, 480 pn, prot); 481 482 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled) 483 set_NX = FALSE; 484 else 485 set_NX = TRUE; 486 487 if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) { 488 set_NX = FALSE; 489 } 490 491 /* 492 * Must allocate a new pvlist entry while we're unlocked; 493 * zalloc may cause pageout (which will lock the pmap system). 494 * If we determine we need a pvlist entry, we will unlock 495 * and allocate one. Then we will retry, throughing away 496 * the allocated entry later (if we no longer need it). 497 */ 498 499 pvh_new = PV_HASHED_ENTRY_NULL; 500Retry: 501 pvh_e = PV_HASHED_ENTRY_NULL; 502 503 PMAP_LOCK(pmap); 504 505 /* 506 * Expand pmap to include this pte. Assume that 507 * pmap is always expanded to include enough hardware 508 * pages to map one VM page. 509 */ 510 if(superpage) { 511 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) { 512 /* need room for another pde entry */ 513 PMAP_UNLOCK(pmap); 514 kr_expand = pmap_expand_pdpt(pmap, vaddr, options); 515 if (kr_expand != KERN_SUCCESS) 516 return kr_expand; 517 PMAP_LOCK(pmap); 518 } 519 } else { 520 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) { 521 /* 522 * Must unlock to expand the pmap 523 * going to grow pde level page(s) 524 */ 525 PMAP_UNLOCK(pmap); 526 kr_expand = pmap_expand(pmap, vaddr, options); 527 if (kr_expand != KERN_SUCCESS) 528 return kr_expand; 529 PMAP_LOCK(pmap); 530 } 531 } 532 if (options & PMAP_EXPAND_OPTIONS_NOENTER) { 533 PMAP_UNLOCK(pmap); 534 return KERN_SUCCESS; 535 } 536 537 if (superpage && *pte && !(*pte & INTEL_PTE_PS)) { 538 /* 539 * There is still an empty page table mapped that 540 * was used for a previous base page mapping. 541 * Remember the PDE and the PDE index, so that we 542 * can free the page at the end of this function. 543 */ 544 delpage_pde_index = pdeidx(pmap, vaddr); 545 delpage_pm_obj = pmap->pm_obj; 546 *pte = 0; 547 } 548 549 old_pa = pte_to_pa(*pte); 550 pai = pa_index(old_pa); 551 old_pa_locked = FALSE; 552 553 if (old_pa == 0 && 554 (*pte & INTEL_PTE_COMPRESSED)) { 555 /* one less "compressed" */ 556 OSAddAtomic64(-1, &pmap->stats.compressed); 557 /* marker will be cleared below */ 558 } 559 560 /* 561 * if we have a previous managed page, lock the pv entry now. after 562 * we lock it, check to see if someone beat us to the lock and if so 563 * drop the lock 564 */ 565 if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) { 566 LOCK_PVH(pai); 567 old_pa_locked = TRUE; 568 old_pa = pte_to_pa(*pte); 569 if (0 == old_pa) { 570 UNLOCK_PVH(pai); /* another path beat us to it */ 571 old_pa_locked = FALSE; 572 } 573 } 574 575 /* 576 * Special case if the incoming physical page is already mapped 577 * at this address. 578 */ 579 if (old_pa == pa) { 580 pt_entry_t old_attributes = 581 *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD); 582 583 /* 584 * May be changing its wired attribute or protection 585 */ 586 587 template = pa_to_pte(pa) | INTEL_PTE_VALID; 588 template |= pmap_get_cache_attributes(pa_index(pa)); 589 590 if (VM_MEM_NOT_CACHEABLE == 591 (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) { 592 if (!(flags & VM_MEM_GUARDED)) 593 template |= INTEL_PTE_PTA; 594 template |= INTEL_PTE_NCACHE; 595 } 596 if (pmap != kernel_pmap) 597 template |= INTEL_PTE_USER; 598 if (prot & VM_PROT_WRITE) { 599 template |= INTEL_PTE_WRITE; 600 } 601 602 if (set_NX) 603 template |= INTEL_PTE_NX; 604 605 if (wired) { 606 template |= INTEL_PTE_WIRED; 607 if (!iswired(old_attributes)) { 608 OSAddAtomic(+1, &pmap->stats.wired_count); 609 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 610 } 611 } else { 612 if (iswired(old_attributes)) { 613 assert(pmap->stats.wired_count >= 1); 614 OSAddAtomic(-1, &pmap->stats.wired_count); 615 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 616 } 617 } 618 if (superpage) /* this path can not be used */ 619 template |= INTEL_PTE_PS; /* to change the page size! */ 620 621 if (old_attributes == template) 622 goto dont_update_pte; 623 624 /* Determine delta, PV locked */ 625 need_tlbflush = 626 ((old_attributes ^ template) != INTEL_PTE_WIRED); 627 628 if (need_tlbflush == TRUE && !(old_attributes & INTEL_PTE_WRITE)) { 629 if ((old_attributes ^ template) == INTEL_PTE_WRITE) 630 need_tlbflush = FALSE; 631 } 632 633 /* store modified PTE and preserve RC bits */ 634 pt_entry_t npte, opte;; 635 do { 636 opte = *pte; 637 npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD)); 638 } while (!pmap_cmpx_pte(pte, opte, npte)); 639dont_update_pte: 640 if (old_pa_locked) { 641 UNLOCK_PVH(pai); 642 old_pa_locked = FALSE; 643 } 644 goto Done; 645 } 646 647 /* 648 * Outline of code from here: 649 * 1) If va was mapped, update TLBs, remove the mapping 650 * and remove old pvlist entry. 651 * 2) Add pvlist entry for new mapping 652 * 3) Enter new mapping. 653 * 654 * If the old physical page is not managed step 1) is skipped 655 * (except for updating the TLBs), and the mapping is 656 * overwritten at step 3). If the new physical page is not 657 * managed, step 2) is skipped. 658 */ 659 660 if (old_pa != (pmap_paddr_t) 0) { 661 662 /* 663 * Don't do anything to pages outside valid memory here. 664 * Instead convince the code that enters a new mapping 665 * to overwrite the old one. 666 */ 667 668 /* invalidate the PTE */ 669 pmap_update_pte(pte, INTEL_PTE_VALID, 0); 670 /* propagate invalidate everywhere */ 671 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 672 /* remember reference and change */ 673 old_pte = *pte; 674 oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED)); 675 /* completely invalidate the PTE */ 676 pmap_store_pte(pte, 0); 677 678 if (IS_MANAGED_PAGE(pai)) { 679 pmap_assert(old_pa_locked == TRUE); 680 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 681 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); 682 assert(pmap->stats.resident_count >= 1); 683 OSAddAtomic(-1, &pmap->stats.resident_count); 684 if (pmap != kernel_pmap) { 685 if (IS_REUSABLE_PAGE(pai)) { 686 assert(pmap->stats.reusable > 0); 687 OSAddAtomic(-1, &pmap->stats.reusable); 688 } else if (IS_INTERNAL_PAGE(pai)) { 689 assert(pmap->stats.internal > 0); 690 OSAddAtomic(-1, &pmap->stats.internal); 691 } else { 692 assert(pmap->stats.external > 0); 693 OSAddAtomic(-1, &pmap->stats.external); 694 } 695 } 696 if (iswired(*pte)) { 697 assert(pmap->stats.wired_count >= 1); 698 OSAddAtomic(-1, &pmap->stats.wired_count); 699 pmap_ledger_debit(pmap, task_ledgers.wired_mem, 700 PAGE_SIZE); 701 } 702 pmap_phys_attributes[pai] |= oattr; 703 704 /* 705 * Remove the mapping from the pvlist for 706 * this physical page. 707 * We'll end up with either a rooted pv or a 708 * hashed pv 709 */ 710 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte); 711 712 } else { 713 714 /* 715 * old_pa is not managed. 716 * Do removal part of accounting. 717 */ 718 719 if (pmap != kernel_pmap) { 720#if 00 721 assert(pmap->stats.device > 0); 722 OSAddAtomic(-1, &pmap->stats.device); 723#endif 724 } 725 if (iswired(*pte)) { 726 assert(pmap->stats.wired_count >= 1); 727 OSAddAtomic(-1, &pmap->stats.wired_count); 728 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 729 } 730 } 731 } 732 733 /* 734 * if we had a previously managed paged locked, unlock it now 735 */ 736 if (old_pa_locked) { 737 UNLOCK_PVH(pai); 738 old_pa_locked = FALSE; 739 } 740 741 pai = pa_index(pa); /* now working with new incoming phys page */ 742 if (IS_MANAGED_PAGE(pai)) { 743 744 /* 745 * Step 2) Enter the mapping in the PV list for this 746 * physical page. 747 */ 748 pv_h = pai_to_pvh(pai); 749 750 LOCK_PVH(pai); 751 752 if (pv_h->pmap == PMAP_NULL) { 753 /* 754 * No mappings yet, use rooted pv 755 */ 756 pv_h->va = vaddr; 757 pv_h->pmap = pmap; 758 queue_init(&pv_h->qlink); 759 760 if (options & PMAP_OPTIONS_INTERNAL) { 761 pmap_phys_attributes[pai] |= PHYS_INTERNAL; 762 } else { 763 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL; 764 } 765 if (options & PMAP_OPTIONS_REUSABLE) { 766 pmap_phys_attributes[pai] |= PHYS_REUSABLE; 767 } else { 768 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE; 769 } 770 } else { 771 /* 772 * Add new pv_hashed_entry after header. 773 */ 774 if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) { 775 pvh_e = pvh_new; 776 pvh_new = PV_HASHED_ENTRY_NULL; 777 } else if (PV_HASHED_ENTRY_NULL == pvh_e) { 778 PV_HASHED_ALLOC(&pvh_e); 779 if (PV_HASHED_ENTRY_NULL == pvh_e) { 780 /* 781 * the pv list is empty. if we are on 782 * the kernel pmap we'll use one of 783 * the special private kernel pv_e's, 784 * else, we need to unlock 785 * everything, zalloc a pv_e, and 786 * restart bringing in the pv_e with 787 * us. 788 */ 789 if (kernel_pmap == pmap) { 790 PV_HASHED_KERN_ALLOC(&pvh_e); 791 } else { 792 UNLOCK_PVH(pai); 793 PMAP_UNLOCK(pmap); 794 pmap_pv_throttle(pmap); 795 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); 796 goto Retry; 797 } 798 } 799 } 800 801 if (PV_HASHED_ENTRY_NULL == pvh_e) 802 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings"); 803 804 pvh_e->va = vaddr; 805 pvh_e->pmap = pmap; 806 pvh_e->ppn = pn; 807 pv_hash_add(pvh_e, pv_h); 808 809 /* 810 * Remember that we used the pvlist entry. 811 */ 812 pvh_e = PV_HASHED_ENTRY_NULL; 813 } 814 815 /* 816 * only count the mapping 817 * for 'managed memory' 818 */ 819 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 820 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); 821 OSAddAtomic(+1, &pmap->stats.resident_count); 822 if (pmap->stats.resident_count > pmap->stats.resident_max) { 823 pmap->stats.resident_max = pmap->stats.resident_count; 824 } 825 if (pmap != kernel_pmap) { 826 if (IS_REUSABLE_PAGE(pai)) { 827 OSAddAtomic(+1, &pmap->stats.reusable); 828 PMAP_STATS_PEAK(pmap->stats.reusable); 829 } else if (IS_INTERNAL_PAGE(pai)) { 830 OSAddAtomic(+1, &pmap->stats.internal); 831 PMAP_STATS_PEAK(pmap->stats.internal); 832 } else { 833 OSAddAtomic(+1, &pmap->stats.external); 834 PMAP_STATS_PEAK(pmap->stats.external); 835 } 836 } 837 } else if (last_managed_page == 0) { 838 /* Account for early mappings created before "managed pages" 839 * are determined. Consider consulting the available DRAM map. 840 */ 841 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 842 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); 843 OSAddAtomic(+1, &pmap->stats.resident_count); 844 if (pmap != kernel_pmap) { 845#if 00 846 OSAddAtomic(+1, &pmap->stats.device); 847 PMAP_STATS_PEAK(pmap->stats.device); 848#endif 849 } 850 } 851 /* 852 * Step 3) Enter the mapping. 853 * 854 * Build a template to speed up entering - 855 * only the pfn changes. 856 */ 857 template = pa_to_pte(pa) | INTEL_PTE_VALID; 858 /* 859 * DRK: It may be worth asserting on cache attribute flags that diverge 860 * from the existing physical page attributes. 861 */ 862 863 template |= pmap_get_cache_attributes(pa_index(pa)); 864 865 if (flags & VM_MEM_NOT_CACHEABLE) { 866 if (!(flags & VM_MEM_GUARDED)) 867 template |= INTEL_PTE_PTA; 868 template |= INTEL_PTE_NCACHE; 869 } 870 if (pmap != kernel_pmap) 871 template |= INTEL_PTE_USER; 872 if (prot & VM_PROT_WRITE) 873 template |= INTEL_PTE_WRITE; 874 if (set_NX) 875 template |= INTEL_PTE_NX; 876 if (wired) { 877 template |= INTEL_PTE_WIRED; 878 OSAddAtomic(+1, & pmap->stats.wired_count); 879 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 880 } 881 if (superpage) 882 template |= INTEL_PTE_PS; 883 pmap_store_pte(pte, template); 884 885 /* 886 * if this was a managed page we delayed unlocking the pv until here 887 * to prevent pmap_page_protect et al from finding it until the pte 888 * has been stored 889 */ 890 if (IS_MANAGED_PAGE(pai)) { 891 UNLOCK_PVH(pai); 892 } 893Done: 894 if (need_tlbflush == TRUE) { 895 if (options & PMAP_OPTIONS_NOFLUSH) 896 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg); 897 else 898 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 899 } 900 if (pvh_e != PV_HASHED_ENTRY_NULL) { 901 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1); 902 } 903 if (pvh_new != PV_HASHED_ENTRY_NULL) { 904 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1); 905 } 906 PMAP_UNLOCK(pmap); 907 908 if (delpage_pm_obj) { 909 vm_page_t m; 910 911 vm_object_lock(delpage_pm_obj); 912 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE)); 913 if (m == VM_PAGE_NULL) 914 panic("pmap_enter: pte page not in object"); 915 vm_object_unlock(delpage_pm_obj); 916 VM_PAGE_FREE(m); 917 OSAddAtomic(-1, &inuse_ptepages_count); 918 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE); 919 } 920 921 PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0); 922 return KERN_SUCCESS; 923} 924 925/* 926 * Remove a range of hardware page-table entries. 927 * The entries given are the first (inclusive) 928 * and last (exclusive) entries for the VM pages. 929 * The virtual address is the va for the first pte. 930 * 931 * The pmap must be locked. 932 * If the pmap is not the kernel pmap, the range must lie 933 * entirely within one pte-page. This is NOT checked. 934 * Assumes that the pte-page exists. 935 */ 936 937void 938pmap_remove_range( 939 pmap_t pmap, 940 vm_map_offset_t start_vaddr, 941 pt_entry_t *spte, 942 pt_entry_t *epte) 943{ 944 pmap_remove_range_options(pmap, start_vaddr, spte, epte, 0); 945} 946 947void 948pmap_remove_range_options( 949 pmap_t pmap, 950 vm_map_offset_t start_vaddr, 951 pt_entry_t *spte, 952 pt_entry_t *epte, 953 int options) 954{ 955 pt_entry_t *cpte; 956 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; 957 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; 958 pv_hashed_entry_t pvh_e; 959 int pvh_cnt = 0; 960 int num_removed, num_unwired, num_found, num_invalid; 961 int num_device, num_external, num_internal, num_reusable; 962 uint64_t num_compressed; 963 ppnum_t pai; 964 pmap_paddr_t pa; 965 vm_map_offset_t vaddr; 966 967 num_removed = 0; 968 num_unwired = 0; 969 num_found = 0; 970 num_invalid = 0; 971 num_device = 0; 972 num_external = 0; 973 num_internal = 0; 974 num_reusable = 0; 975 num_compressed = 0; 976 /* invalidate the PTEs first to "freeze" them */ 977 for (cpte = spte, vaddr = start_vaddr; 978 cpte < epte; 979 cpte++, vaddr += PAGE_SIZE_64) { 980 pt_entry_t p = *cpte; 981 982 pa = pte_to_pa(p); 983 if (pa == 0) { 984 if (pmap != kernel_pmap && 985 (options & PMAP_OPTIONS_REMOVE) && 986 (p & INTEL_PTE_COMPRESSED)) { 987 /* one less "compressed" */ 988 num_compressed++; 989 /* clear marker */ 990 /* XXX probably does not need to be atomic! */ 991 pmap_update_pte(cpte, INTEL_PTE_COMPRESSED, 0); 992 } 993 continue; 994 } 995 num_found++; 996 997 if (iswired(p)) 998 num_unwired++; 999 1000 pai = pa_index(pa); 1001 1002 if (!IS_MANAGED_PAGE(pai)) { 1003 /* 1004 * Outside range of managed physical memory. 1005 * Just remove the mappings. 1006 */ 1007 pmap_store_pte(cpte, 0); 1008 num_device++; 1009 continue; 1010 } 1011 1012 if ((p & INTEL_PTE_VALID) == 0) 1013 num_invalid++; 1014 1015 /* invalidate the PTE */ 1016 pmap_update_pte(cpte, INTEL_PTE_VALID, 0); 1017 } 1018 1019 if (num_found == 0) { 1020 /* nothing was changed: we're done */ 1021 goto update_counts; 1022 } 1023 1024 /* propagate the invalidates to other CPUs */ 1025 1026 PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr); 1027 1028 for (cpte = spte, vaddr = start_vaddr; 1029 cpte < epte; 1030 cpte++, vaddr += PAGE_SIZE_64) { 1031 1032 pa = pte_to_pa(*cpte); 1033 if (pa == 0) 1034 continue; 1035 1036 pai = pa_index(pa); 1037 1038 LOCK_PVH(pai); 1039 1040 pa = pte_to_pa(*cpte); 1041 if (pa == 0) { 1042 UNLOCK_PVH(pai); 1043 continue; 1044 } 1045 num_removed++; 1046 if (IS_REUSABLE_PAGE(pai)) { 1047 num_reusable++; 1048 } else if (IS_INTERNAL_PAGE(pai)) { 1049 num_internal++; 1050 } else { 1051 num_external++; 1052 } 1053 1054 /* 1055 * Get the modify and reference bits, then 1056 * nuke the entry in the page table 1057 */ 1058 /* remember reference and change */ 1059 pmap_phys_attributes[pai] |= 1060 (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED)); 1061 1062 /* 1063 * Remove the mapping from the pvlist for this physical page. 1064 */ 1065 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte); 1066 1067 /* completely invalidate the PTE */ 1068 pmap_store_pte(cpte, 0); 1069 1070 UNLOCK_PVH(pai); 1071 1072 if (pvh_e != PV_HASHED_ENTRY_NULL) { 1073 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1074 pvh_eh = pvh_e; 1075 1076 if (pvh_et == PV_HASHED_ENTRY_NULL) { 1077 pvh_et = pvh_e; 1078 } 1079 pvh_cnt++; 1080 } 1081 } /* for loop */ 1082 1083 if (pvh_eh != PV_HASHED_ENTRY_NULL) { 1084 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); 1085 } 1086update_counts: 1087 /* 1088 * Update the counts 1089 */ 1090#if TESTING 1091 if (pmap->stats.resident_count < num_removed) 1092 panic("pmap_remove_range: resident_count"); 1093#endif 1094 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed)); 1095 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(num_removed)); 1096 assert(pmap->stats.resident_count >= num_removed); 1097 OSAddAtomic(-num_removed, &pmap->stats.resident_count); 1098 1099 if (pmap != kernel_pmap) { 1100#if 00 1101 assert(pmap->stats.device >= num_device); 1102 if (num_device) 1103 OSAddAtomic(-num_device, &pmap->stats.device); 1104#endif /* 00 */ 1105 assert(pmap->stats.external >= num_external); 1106 if (num_external) 1107 OSAddAtomic(-num_external, &pmap->stats.external); 1108 assert(pmap->stats.internal >= num_internal); 1109 if (num_internal) 1110 OSAddAtomic(-num_internal, &pmap->stats.internal); 1111 assert(pmap->stats.reusable >= num_reusable); 1112 if (num_reusable) 1113 OSAddAtomic(-num_reusable, &pmap->stats.reusable); 1114 assert(pmap->stats.compressed >= num_compressed); 1115 if (num_compressed) 1116 OSAddAtomic64(-num_compressed, &pmap->stats.compressed); 1117 } 1118 1119#if TESTING 1120 if (pmap->stats.wired_count < num_unwired) 1121 panic("pmap_remove_range: wired_count"); 1122#endif 1123 assert(pmap->stats.wired_count >= num_unwired); 1124 OSAddAtomic(-num_unwired, &pmap->stats.wired_count); 1125 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired)); 1126 1127 return; 1128} 1129 1130 1131/* 1132 * Remove the given range of addresses 1133 * from the specified map. 1134 * 1135 * It is assumed that the start and end are properly 1136 * rounded to the hardware page size. 1137 */ 1138void 1139pmap_remove( 1140 pmap_t map, 1141 addr64_t s64, 1142 addr64_t e64) 1143{ 1144 pmap_remove_options(map, s64, e64, 0); 1145} 1146 1147void 1148pmap_remove_options( 1149 pmap_t map, 1150 addr64_t s64, 1151 addr64_t e64, 1152 int options) 1153{ 1154 pt_entry_t *pde; 1155 pt_entry_t *spte, *epte; 1156 addr64_t l64; 1157 uint64_t deadline; 1158 1159 pmap_intr_assert(); 1160 1161 if (map == PMAP_NULL || s64 == e64) 1162 return; 1163 1164 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START, 1165 map, 1166 (uint32_t) (s64 >> 32), s64, 1167 (uint32_t) (e64 >> 32), e64); 1168 1169 1170 PMAP_LOCK(map); 1171 1172#if 0 1173 /* 1174 * Check that address range in the kernel does not overlap the stacks. 1175 * We initialize local static min/max variables once to avoid making 1176 * 2 function calls for every remove. Note also that these functions 1177 * both return 0 before kernel stacks have been initialized, and hence 1178 * the panic is not triggered in this case. 1179 */ 1180 if (map == kernel_pmap) { 1181 static vm_offset_t kernel_stack_min = 0; 1182 static vm_offset_t kernel_stack_max = 0; 1183 1184 if (kernel_stack_min == 0) { 1185 kernel_stack_min = min_valid_stack_address(); 1186 kernel_stack_max = max_valid_stack_address(); 1187 } 1188 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) || 1189 (kernel_stack_min < e64 && e64 <= kernel_stack_max)) 1190 panic("pmap_remove() attempted in kernel stack"); 1191 } 1192#else 1193 1194 /* 1195 * The values of kernel_stack_min and kernel_stack_max are no longer 1196 * relevant now that we allocate kernel stacks in the kernel map, 1197 * so the old code above no longer applies. If we wanted to check that 1198 * we weren't removing a mapping of a page in a kernel stack we'd 1199 * mark the PTE with an unused bit and check that here. 1200 */ 1201 1202#endif 1203 1204 deadline = rdtsc64() + max_preemption_latency_tsc; 1205 1206 while (s64 < e64) { 1207 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); 1208 if (l64 > e64) 1209 l64 = e64; 1210 pde = pmap_pde(map, s64); 1211 1212 if (pde && (*pde & INTEL_PTE_VALID)) { 1213 if (*pde & INTEL_PTE_PS) { 1214 /* 1215 * If we're removing a superpage, pmap_remove_range() 1216 * must work on level 2 instead of level 1; and we're 1217 * only passing a single level 2 entry instead of a 1218 * level 1 range. 1219 */ 1220 spte = pde; 1221 epte = spte+1; /* excluded */ 1222 } else { 1223 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1))); 1224 spte = &spte[ptenum(s64)]; 1225 epte = &spte[intel_btop(l64 - s64)]; 1226 } 1227 pmap_remove_range_options(map, s64, spte, epte, 1228 options); 1229 } 1230 s64 = l64; 1231 1232 if (s64 < e64 && rdtsc64() >= deadline) { 1233 PMAP_UNLOCK(map) 1234 /* TODO: Rapid release/reacquisition can defeat 1235 * the "backoff" intent here; either consider a 1236 * fair spinlock, or a scheme whereby each lock 1237 * attempt marks the processor as within a spinlock 1238 * acquisition, and scan CPUs here to determine 1239 * if a backoff is necessary, to avoid sacrificing 1240 * performance in the common case. 1241 */ 1242 PMAP_LOCK(map) 1243 deadline = rdtsc64() + max_preemption_latency_tsc; 1244 } 1245 } 1246 1247 PMAP_UNLOCK(map); 1248 1249 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END, 1250 map, 0, 0, 0, 0); 1251 1252} 1253 1254void 1255pmap_page_protect( 1256 ppnum_t pn, 1257 vm_prot_t prot) 1258{ 1259 pmap_page_protect_options(pn, prot, 0, NULL); 1260} 1261 1262/* 1263 * Routine: pmap_page_protect_options 1264 * 1265 * Function: 1266 * Lower the permission for all mappings to a given 1267 * page. 1268 */ 1269void 1270pmap_page_protect_options( 1271 ppnum_t pn, 1272 vm_prot_t prot, 1273 unsigned int options, 1274 void *arg) 1275{ 1276 pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; 1277 pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; 1278 pv_hashed_entry_t nexth; 1279 int pvh_cnt = 0; 1280 pv_rooted_entry_t pv_h; 1281 pv_rooted_entry_t pv_e; 1282 pv_hashed_entry_t pvh_e; 1283 pt_entry_t *pte; 1284 int pai; 1285 pmap_t pmap; 1286 boolean_t remove; 1287 pt_entry_t new_pte_value; 1288 1289 pmap_intr_assert(); 1290 assert(pn != vm_page_fictitious_addr); 1291 if (pn == vm_page_guard_addr) 1292 return; 1293 1294 pai = ppn_to_pai(pn); 1295 1296 if (!IS_MANAGED_PAGE(pai)) { 1297 /* 1298 * Not a managed page. 1299 */ 1300 return; 1301 } 1302 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, 1303 pn, prot, 0, 0, 0); 1304 1305 /* 1306 * Determine the new protection. 1307 */ 1308 switch (prot) { 1309 case VM_PROT_READ: 1310 case VM_PROT_READ | VM_PROT_EXECUTE: 1311 remove = FALSE; 1312 break; 1313 case VM_PROT_ALL: 1314 return; /* nothing to do */ 1315 default: 1316 remove = TRUE; 1317 break; 1318 } 1319 1320 pv_h = pai_to_pvh(pai); 1321 1322 LOCK_PVH(pai); 1323 1324 1325 /* 1326 * Walk down PV list, if any, changing or removing all mappings. 1327 */ 1328 if (pv_h->pmap == PMAP_NULL) 1329 goto done; 1330 1331 pv_e = pv_h; 1332 pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */ 1333 1334 do { 1335 vm_map_offset_t vaddr; 1336 1337 pmap = pv_e->pmap; 1338 vaddr = pv_e->va; 1339 pte = pmap_pte(pmap, vaddr); 1340 1341 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn), 1342 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte); 1343 1344 if (0 == pte) { 1345 panic("pmap_page_protect() " 1346 "pmap=%p pn=0x%x vaddr=0x%llx\n", 1347 pmap, pn, vaddr); 1348 } 1349 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink); 1350 1351 /* 1352 * Remove the mapping if new protection is NONE 1353 */ 1354 if (remove) { 1355 1356 /* Remove per-pmap wired count */ 1357 if (iswired(*pte)) { 1358 OSAddAtomic(-1, &pmap->stats.wired_count); 1359 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); 1360 } 1361 1362 if (pmap != kernel_pmap && 1363 (options & PMAP_OPTIONS_COMPRESSOR) && 1364 IS_INTERNAL_PAGE(pai)) { 1365 /* adjust "reclaimed" stats */ 1366 OSAddAtomic64(+1, &pmap->stats.compressed); 1367 PMAP_STATS_PEAK(pmap->stats.compressed); 1368 pmap->stats.compressed_lifetime++; 1369 /* mark this PTE as having been "reclaimed" */ 1370 new_pte_value = INTEL_PTE_COMPRESSED; 1371 } else { 1372 new_pte_value = 0; 1373 } 1374 1375 if (options & PMAP_OPTIONS_NOREFMOD) { 1376 pmap_store_pte(pte, new_pte_value); 1377 1378 if (options & PMAP_OPTIONS_NOFLUSH) 1379 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg); 1380 else 1381 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); 1382 } else { 1383 /* 1384 * Remove the mapping, collecting dirty bits. 1385 */ 1386 pmap_update_pte(pte, INTEL_PTE_VALID, 0); 1387 1388 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); 1389 pmap_phys_attributes[pai] |= 1390 *pte & (PHYS_MODIFIED|PHYS_REFERENCED); 1391 pmap_store_pte(pte, new_pte_value); 1392 } 1393#if TESTING 1394 if (pmap->stats.resident_count < 1) 1395 panic("pmap_page_protect: resident_count"); 1396#endif 1397 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE); 1398 assert(pmap->stats.resident_count >= 1); 1399 OSAddAtomic(-1, &pmap->stats.resident_count); 1400 if (options & PMAP_OPTIONS_COMPRESSOR) { 1401 /* 1402 * This removal is only being done so we can send this page to 1403 * the compressor; therefore it mustn't affect total task footprint. 1404 */ 1405 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE); 1406 } else { 1407 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); 1408 } 1409 1410 if (pmap != kernel_pmap) { 1411 if (IS_REUSABLE_PAGE(pai)) { 1412 assert(pmap->stats.reusable > 0); 1413 OSAddAtomic(-1, &pmap->stats.reusable); 1414 } else if (IS_INTERNAL_PAGE(pai)) { 1415 assert(pmap->stats.internal > 0); 1416 OSAddAtomic(-1, &pmap->stats.internal); 1417 } else { 1418 assert(pmap->stats.external > 0); 1419 OSAddAtomic(-1, &pmap->stats.external); 1420 } 1421 } 1422 1423 /* 1424 * Deal with the pv_rooted_entry. 1425 */ 1426 1427 if (pv_e == pv_h) { 1428 /* 1429 * Fix up head later. 1430 */ 1431 pv_h->pmap = PMAP_NULL; 1432 } else { 1433 /* 1434 * Delete this entry. 1435 */ 1436 pv_hash_remove(pvh_e); 1437 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1438 pvh_eh = pvh_e; 1439 1440 if (pvh_et == PV_HASHED_ENTRY_NULL) 1441 pvh_et = pvh_e; 1442 pvh_cnt++; 1443 } 1444 } else { 1445 /* 1446 * Write-protect, after opportunistic refmod collect 1447 */ 1448 pmap_phys_attributes[pai] |= 1449 *pte & (PHYS_MODIFIED|PHYS_REFERENCED); 1450 pmap_update_pte(pte, INTEL_PTE_WRITE, 0); 1451 1452 if (options & PMAP_OPTIONS_NOFLUSH) 1453 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg); 1454 else 1455 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); 1456 } 1457 pvh_e = nexth; 1458 } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h); 1459 1460 1461 /* 1462 * If pv_head mapping was removed, fix it up. 1463 */ 1464 if (pv_h->pmap == PMAP_NULL) { 1465 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink); 1466 1467 if (pvh_e != (pv_hashed_entry_t) pv_h) { 1468 pv_hash_remove(pvh_e); 1469 pv_h->pmap = pvh_e->pmap; 1470 pv_h->va = pvh_e->va; 1471 pvh_e->qlink.next = (queue_entry_t) pvh_eh; 1472 pvh_eh = pvh_e; 1473 1474 if (pvh_et == PV_HASHED_ENTRY_NULL) 1475 pvh_et = pvh_e; 1476 pvh_cnt++; 1477 } 1478 } 1479 if (pvh_eh != PV_HASHED_ENTRY_NULL) { 1480 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); 1481 } 1482done: 1483 UNLOCK_PVH(pai); 1484 1485 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END, 1486 0, 0, 0, 0, 0); 1487} 1488 1489 1490/* 1491 * Clear specified attribute bits. 1492 */ 1493void 1494phys_attribute_clear( 1495 ppnum_t pn, 1496 int bits, 1497 unsigned int options, 1498 void *arg) 1499{ 1500 pv_rooted_entry_t pv_h; 1501 pv_hashed_entry_t pv_e; 1502 pt_entry_t *pte; 1503 int pai; 1504 pmap_t pmap; 1505 char attributes = 0; 1506 boolean_t is_internal, is_reusable; 1507 1508 if ((bits & PHYS_MODIFIED) && 1509 (options & PMAP_OPTIONS_NOFLUSH) && 1510 arg == NULL) { 1511 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): " 1512 "should not clear 'modified' without flushing TLBs\n", 1513 pn, bits, options, arg); 1514 } 1515 1516 pmap_intr_assert(); 1517 assert(pn != vm_page_fictitious_addr); 1518 if (pn == vm_page_guard_addr) 1519 return; 1520 1521 pai = ppn_to_pai(pn); 1522 1523 if (!IS_MANAGED_PAGE(pai)) { 1524 /* 1525 * Not a managed page. 1526 */ 1527 return; 1528 } 1529 1530 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, 1531 pn, bits, 0, 0, 0); 1532 1533 pv_h = pai_to_pvh(pai); 1534 1535 LOCK_PVH(pai); 1536 1537 /* 1538 * Walk down PV list, clearing all modify or reference bits. 1539 * We do not have to lock the pv_list because we have 1540 * the per-pmap lock 1541 */ 1542 if (pv_h->pmap != PMAP_NULL) { 1543 /* 1544 * There are some mappings. 1545 */ 1546 1547 is_internal = IS_INTERNAL_PAGE(pai); 1548 is_reusable = IS_REUSABLE_PAGE(pai); 1549 1550 pv_e = (pv_hashed_entry_t)pv_h; 1551 1552 do { 1553 vm_map_offset_t va; 1554 char pte_bits; 1555 1556 pmap = pv_e->pmap; 1557 va = pv_e->va; 1558 pte_bits = 0; 1559 1560 if (bits) { 1561 pte = pmap_pte(pmap, va); 1562 /* grab ref/mod bits from this PTE */ 1563 pte_bits = (*pte & (PHYS_MODIFIED | 1564 PHYS_REFERENCED)); 1565 /* propagate to page's global attributes */ 1566 attributes |= pte_bits; 1567 /* which bits to clear for this PTE? */ 1568 pte_bits &= bits; 1569 } 1570 1571 /* 1572 * Clear modify and/or reference bits. 1573 */ 1574 if (pte_bits) { 1575 pmap_update_pte(pte, bits, 0); 1576 1577 /* Ensure all processors using this translation 1578 * invalidate this TLB entry. The invalidation 1579 * *must* follow the PTE update, to ensure that 1580 * the TLB shadow of the 'D' bit (in particular) 1581 * is synchronized with the updated PTE. 1582 */ 1583 if (! (options & PMAP_OPTIONS_NOFLUSH)) { 1584 /* flush TLBS now */ 1585 PMAP_UPDATE_TLBS(pmap, 1586 va, 1587 va + PAGE_SIZE); 1588 } else if (arg) { 1589 /* delayed TLB flush: add "pmap" info */ 1590 PMAP_UPDATE_TLBS_DELAYED( 1591 pmap, 1592 va, 1593 va + PAGE_SIZE, 1594 (pmap_flush_context *)arg); 1595 } else { 1596 /* no TLB flushing at all */ 1597 } 1598 } 1599 1600 /* update pmap "reusable" stats */ 1601 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) && 1602 is_reusable && 1603 pmap != kernel_pmap) { 1604 /* one less "reusable" */ 1605 assert(pmap->stats.reusable > 0); 1606 OSAddAtomic(-1, &pmap->stats.reusable); 1607 if (is_internal) { 1608 /* one more "internal" */ 1609 OSAddAtomic(+1, &pmap->stats.internal); 1610 PMAP_STATS_PEAK(pmap->stats.internal); 1611 } else { 1612 /* one more "external" */ 1613 OSAddAtomic(+1, &pmap->stats.external); 1614 PMAP_STATS_PEAK(pmap->stats.external); 1615 } 1616 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) && 1617 !is_reusable && 1618 pmap != kernel_pmap) { 1619 /* one more "reusable" */ 1620 OSAddAtomic(+1, &pmap->stats.reusable); 1621 PMAP_STATS_PEAK(pmap->stats.reusable); 1622 if (is_internal) { 1623 /* one less "internal" */ 1624 assert(pmap->stats.internal > 0); 1625 OSAddAtomic(-1, &pmap->stats.internal); 1626 } else { 1627 /* one less "external" */ 1628 assert(pmap->stats.external > 0); 1629 OSAddAtomic(-1, &pmap->stats.external); 1630 } 1631 } 1632 1633 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); 1634 1635 } while (pv_e != (pv_hashed_entry_t)pv_h); 1636 } 1637 /* Opportunistic refmod collection, annulled 1638 * if both REF and MOD are being cleared. 1639 */ 1640 1641 pmap_phys_attributes[pai] |= attributes; 1642 pmap_phys_attributes[pai] &= (~bits); 1643 1644 /* update this page's "reusable" status */ 1645 if (options & PMAP_OPTIONS_CLEAR_REUSABLE) { 1646 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE; 1647 } else if (options & PMAP_OPTIONS_SET_REUSABLE) { 1648 pmap_phys_attributes[pai] |= PHYS_REUSABLE; 1649 } 1650 1651 UNLOCK_PVH(pai); 1652 1653 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END, 1654 0, 0, 0, 0, 0); 1655} 1656 1657/* 1658 * Check specified attribute bits. 1659 */ 1660int 1661phys_attribute_test( 1662 ppnum_t pn, 1663 int bits) 1664{ 1665 pv_rooted_entry_t pv_h; 1666 pv_hashed_entry_t pv_e; 1667 pt_entry_t *pte; 1668 int pai; 1669 pmap_t pmap; 1670 int attributes = 0; 1671 1672 pmap_intr_assert(); 1673 assert(pn != vm_page_fictitious_addr); 1674 if (pn == vm_page_guard_addr) 1675 return 0; 1676 1677 pai = ppn_to_pai(pn); 1678 1679 if (!IS_MANAGED_PAGE(pai)) { 1680 /* 1681 * Not a managed page. 1682 */ 1683 return 0; 1684 } 1685 1686 /* 1687 * Fast check... if bits already collected 1688 * no need to take any locks... 1689 * if not set, we need to recheck after taking 1690 * the lock in case they got pulled in while 1691 * we were waiting for the lock 1692 */ 1693 if ((pmap_phys_attributes[pai] & bits) == bits) 1694 return bits; 1695 1696 pv_h = pai_to_pvh(pai); 1697 1698 LOCK_PVH(pai); 1699 1700 attributes = pmap_phys_attributes[pai] & bits; 1701 1702 1703 /* 1704 * Walk down PV list, checking the mappings until we 1705 * reach the end or we've found the desired attributes. 1706 */ 1707 if (attributes != bits && 1708 pv_h->pmap != PMAP_NULL) { 1709 /* 1710 * There are some mappings. 1711 */ 1712 pv_e = (pv_hashed_entry_t)pv_h; 1713 do { 1714 vm_map_offset_t va; 1715 1716 pmap = pv_e->pmap; 1717 va = pv_e->va; 1718 /* 1719 * pick up modify and/or reference bits from mapping 1720 */ 1721 1722 pte = pmap_pte(pmap, va); 1723 attributes |= (int)(*pte & bits); 1724 1725 pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); 1726 1727 } while ((attributes != bits) && 1728 (pv_e != (pv_hashed_entry_t)pv_h)); 1729 } 1730 pmap_phys_attributes[pai] |= attributes; 1731 1732 UNLOCK_PVH(pai); 1733 return (attributes); 1734} 1735 1736/* 1737 * Routine: pmap_change_wiring 1738 * Function: Change the wiring attribute for a map/virtual-address 1739 * pair. 1740 * In/out conditions: 1741 * The mapping must already exist in the pmap. 1742 */ 1743void 1744pmap_change_wiring( 1745 pmap_t map, 1746 vm_map_offset_t vaddr, 1747 boolean_t wired) 1748{ 1749 pt_entry_t *pte; 1750 1751 PMAP_LOCK(map); 1752 1753 if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) 1754 panic("pmap_change_wiring: pte missing"); 1755 1756 if (wired && !iswired(*pte)) { 1757 /* 1758 * wiring down mapping 1759 */ 1760 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE); 1761 OSAddAtomic(+1, &map->stats.wired_count); 1762 pmap_update_pte(pte, 0, INTEL_PTE_WIRED); 1763 } 1764 else if (!wired && iswired(*pte)) { 1765 /* 1766 * unwiring mapping 1767 */ 1768 assert(map->stats.wired_count >= 1); 1769 OSAddAtomic(-1, &map->stats.wired_count); 1770 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE); 1771 pmap_update_pte(pte, INTEL_PTE_WIRED, 0); 1772 } 1773 1774 PMAP_UNLOCK(map); 1775} 1776 1777/* 1778 * "Backdoor" direct map routine for early mappings. 1779 * Useful for mapping memory outside the range 1780 * Sets A, D and NC if requested 1781 */ 1782 1783vm_offset_t 1784pmap_map_bd( 1785 vm_offset_t virt, 1786 vm_map_offset_t start_addr, 1787 vm_map_offset_t end_addr, 1788 vm_prot_t prot, 1789 unsigned int flags) 1790{ 1791 pt_entry_t template; 1792 pt_entry_t *pte; 1793 spl_t spl; 1794 vm_offset_t base = virt; 1795 template = pa_to_pte(start_addr) 1796 | INTEL_PTE_REF 1797 | INTEL_PTE_MOD 1798 | INTEL_PTE_WIRED 1799 | INTEL_PTE_VALID; 1800 1801 if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) { 1802 template |= INTEL_PTE_NCACHE; 1803 if (!(flags & (VM_MEM_GUARDED))) 1804 template |= INTEL_PTE_PTA; 1805 } 1806 1807#if defined(__x86_64__) 1808 if ((prot & VM_PROT_EXECUTE) == 0) 1809 template |= INTEL_PTE_NX; 1810#endif 1811 1812 if (prot & VM_PROT_WRITE) 1813 template |= INTEL_PTE_WRITE; 1814 1815 while (start_addr < end_addr) { 1816 spl = splhigh(); 1817 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); 1818 if (pte == PT_ENTRY_NULL) { 1819 panic("pmap_map_bd: Invalid kernel address\n"); 1820 } 1821 pmap_store_pte(pte, template); 1822 splx(spl); 1823 pte_increment_pa(template); 1824 virt += PAGE_SIZE; 1825 start_addr += PAGE_SIZE; 1826 } 1827 flush_tlb_raw(); 1828 PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr); 1829 return(virt); 1830} 1831 1832unsigned int 1833pmap_query_resident( 1834 pmap_t pmap, 1835 addr64_t s64, 1836 addr64_t e64) 1837{ 1838 pt_entry_t *pde; 1839 pt_entry_t *spte, *epte; 1840 addr64_t l64; 1841 uint64_t deadline; 1842 unsigned int result; 1843 1844 pmap_intr_assert(); 1845 1846 if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) 1847 return 0; 1848 1849 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START, 1850 pmap, 1851 (uint32_t) (s64 >> 32), s64, 1852 (uint32_t) (e64 >> 32), e64); 1853 1854 result = 0; 1855 1856 PMAP_LOCK(pmap); 1857 1858 deadline = rdtsc64() + max_preemption_latency_tsc; 1859 1860 while (s64 < e64) { 1861 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); 1862 if (l64 > e64) 1863 l64 = e64; 1864 pde = pmap_pde(pmap, s64); 1865 1866 if (pde && (*pde & INTEL_PTE_VALID)) { 1867 if (*pde & INTEL_PTE_PS) { 1868 /* superpage: not supported */ 1869 } else { 1870 spte = pmap_pte(pmap, 1871 (s64 & ~(pde_mapped_size - 1))); 1872 spte = &spte[ptenum(s64)]; 1873 epte = &spte[intel_btop(l64 - s64)]; 1874 1875 for (; spte < epte; spte++) { 1876 if (pte_to_pa(*spte) != 0) { 1877 result++; 1878 } 1879 } 1880 1881 } 1882 } 1883 s64 = l64; 1884 1885 if (s64 < e64 && rdtsc64() >= deadline) { 1886 PMAP_UNLOCK(pmap); 1887 PMAP_LOCK(pmap); 1888 deadline = rdtsc64() + max_preemption_latency_tsc; 1889 } 1890 } 1891 1892 PMAP_UNLOCK(pmap); 1893 1894 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END, 1895 pmap, 0, 0, 0, 0); 1896 1897 return result; 1898} 1899 1900#if MACH_ASSERT 1901void 1902pmap_set_process( 1903 __unused pmap_t pmap, 1904 __unused int pid, 1905 __unused char *procname) 1906{ 1907} 1908#endif /* MACH_ASSERT */ 1909