1/* $NetBSD: pmap.c,v 1.151 2024/02/16 21:32:17 andvar Exp $ */ 2 3/* 4 * Copyright (c) 2017 Ryo Shimizu 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.151 2024/02/16 21:32:17 andvar Exp $"); 31 32#include "opt_arm_debug.h" 33#include "opt_cpuoptions.h" 34#include "opt_ddb.h" 35#include "opt_efi.h" 36#include "opt_modular.h" 37#include "opt_multiprocessor.h" 38#include "opt_pmap.h" 39#include "opt_uvmhist.h" 40 41#include <sys/param.h> 42#include <sys/types.h> 43 44#include <sys/asan.h> 45#include <sys/atomic.h> 46#include <sys/cpu.h> 47#include <sys/kmem.h> 48#include <sys/vmem.h> 49 50#include <uvm/uvm.h> 51#include <uvm/pmap/pmap_pvt.h> 52 53#include <arm/cpufunc.h> 54 55#include <aarch64/pmap.h> 56#include <aarch64/pte.h> 57#include <aarch64/armreg.h> 58#include <aarch64/locore.h> 59#include <aarch64/machdep.h> 60#ifdef DDB 61#include <aarch64/db_machdep.h> 62#include <ddb/db_access.h> 63#endif 64 65#include <arm/cpufunc.h> 66 67//#define PMAP_PV_DEBUG 68 69#ifdef VERBOSE_INIT_ARM 70#define VPRINTF(...) printf(__VA_ARGS__) 71#else 72#define VPRINTF(...) __nothing 73#endif 74 75#ifdef UVMHIST 76 77#ifndef UVMHIST_PMAPHIST_SIZE 78#define UVMHIST_PMAPHIST_SIZE (1024 * 4) 79#endif 80 81struct kern_history_ent pmaphistbuf[UVMHIST_PMAPHIST_SIZE]; 82UVMHIST_DEFINE(pmaphist) = UVMHIST_INITIALIZER(pmaphist, pmaphistbuf); 83 84static void 85pmap_hist_init(void) 86{ 87 static bool inited = false; 88 if (inited == false) { 89 UVMHIST_LINK_STATIC(pmaphist); 90 inited = true; 91 } 92} 93#define PMAP_HIST_INIT() pmap_hist_init() 94 95#else /* UVMHIST */ 96 97#define PMAP_HIST_INIT() ((void)0) 98 99#endif /* UVMHIST */ 100 101 102#ifdef PMAPCOUNTERS 103#define PMAP_COUNT(name) (pmap_evcnt_##name.ev_count++ + 0) 104#define PMAP_COUNTER(name, desc) \ 105 struct evcnt pmap_evcnt_##name = \ 106 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "pmap", desc); \ 107 EVCNT_ATTACH_STATIC(pmap_evcnt_##name) 108 109PMAP_COUNTER(pdp_alloc_boot, "page table page allocate (uvm_pageboot_alloc)"); 110PMAP_COUNTER(pdp_alloc, "page table page allocate (uvm_pagealloc)"); 111PMAP_COUNTER(pdp_free, "page table page free (uvm_pagefree)"); 112 113PMAP_COUNTER(pv_enter, "pv_entry fill"); 114PMAP_COUNTER(pv_remove_dyn, "pv_entry free and unlink dynamic"); 115PMAP_COUNTER(pv_remove_emb, "pv_entry clear embedded"); 116PMAP_COUNTER(pv_remove_nopv, "no pv_entry found when removing pv"); 117 118PMAP_COUNTER(activate, "pmap_activate call"); 119PMAP_COUNTER(deactivate, "pmap_deactivate call"); 120PMAP_COUNTER(create, "pmap_create call"); 121PMAP_COUNTER(destroy, "pmap_destroy call"); 122 123PMAP_COUNTER(page_protect, "pmap_page_protect call"); 124PMAP_COUNTER(protect, "pmap_protect call"); 125PMAP_COUNTER(protect_remove_fallback, "pmap_protect with no-read"); 126PMAP_COUNTER(protect_none, "pmap_protect non-exists pages"); 127PMAP_COUNTER(protect_managed, "pmap_protect managed pages"); 128PMAP_COUNTER(protect_unmanaged, "pmap_protect unmanaged pages"); 129PMAP_COUNTER(protect_pvmanaged, "pmap_protect pv-tracked unmanaged pages"); 130 131PMAP_COUNTER(clear_modify, "pmap_clear_modify call"); 132PMAP_COUNTER(clear_modify_pages, "pmap_clear_modify pages"); 133PMAP_COUNTER(clear_reference, "pmap_clear_reference call"); 134PMAP_COUNTER(clear_reference_pages, "pmap_clear_reference pages"); 135 136PMAP_COUNTER(fixup_referenced, "page reference emulations"); 137PMAP_COUNTER(fixup_modified, "page modification emulations"); 138 139PMAP_COUNTER(kern_mappings_bad, "kernel pages mapped (bad color)"); 140PMAP_COUNTER(kern_mappings_bad_wired, "kernel pages mapped (wired bad color)"); 141PMAP_COUNTER(user_mappings_bad, "user pages mapped (bad color, not wired)"); 142PMAP_COUNTER(user_mappings_bad_wired, "user pages mapped (bad color, wired)"); 143PMAP_COUNTER(kern_mappings, "kernel pages mapped"); 144PMAP_COUNTER(user_mappings, "user pages mapped"); 145PMAP_COUNTER(user_mappings_changed, "user mapping changed"); 146PMAP_COUNTER(kern_mappings_changed, "kernel mapping changed"); 147PMAP_COUNTER(uncached_mappings, "uncached pages mapped"); 148PMAP_COUNTER(unmanaged_mappings, "unmanaged pages mapped"); 149PMAP_COUNTER(pvmanaged_mappings, "pv-tracked unmanaged pages mapped"); 150PMAP_COUNTER(managed_mappings, "managed pages mapped"); 151PMAP_COUNTER(mappings, "pages mapped (including remapped)"); 152PMAP_COUNTER(remappings, "pages remapped"); 153 154PMAP_COUNTER(pv_entry_cannotalloc, "pv_entry allocation failure"); 155 156PMAP_COUNTER(unwire, "pmap_unwire call"); 157PMAP_COUNTER(unwire_failure, "pmap_unwire failure"); 158 159#else /* PMAPCOUNTERS */ 160#define PMAP_COUNT(name) __nothing 161#endif /* PMAPCOUNTERS */ 162 163/* 164 * invalidate TLB entry for ASID and VA. 165 */ 166#define AARCH64_TLBI_BY_ASID_VA(asid, va) \ 167 do { \ 168 if ((asid) == 0) \ 169 aarch64_tlbi_by_va((va)); \ 170 else \ 171 aarch64_tlbi_by_asid_va((asid), (va)); \ 172 } while (0/*CONSTCOND*/) 173 174/* 175 * require access permission in pte to invalidate instruction cache. 176 * change the pte to be accessible temporarily before cpu_icache_sync_range(). 177 * this macro modifies PTE (*ptep). need to update PTE after this. 178 */ 179#define PTE_ICACHE_SYNC_PAGE(pte, ptep, asid, va) \ 180 do { \ 181 atomic_swap_64((ptep), (pte) | LX_BLKPAG_AF); \ 182 AARCH64_TLBI_BY_ASID_VA((asid), (va)); \ 183 cpu_icache_sync_range((va), PAGE_SIZE); \ 184 } while (0/*CONSTCOND*/) 185 186#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mdpg_pp) 187 188#define L3INDEXMASK (L3_SIZE * Ln_ENTRIES - 1) 189#define PDPSWEEP_TRIGGER 512 190 191static pt_entry_t *_pmap_pte_lookup_l3(struct pmap *, vaddr_t); 192static pt_entry_t *_pmap_pte_lookup_bs(struct pmap *, vaddr_t, vsize_t *); 193static pt_entry_t _pmap_pte_adjust_prot(pt_entry_t, vm_prot_t, vm_prot_t, bool); 194static pt_entry_t _pmap_pte_adjust_cacheflags(pt_entry_t, u_int); 195static void _pmap_remove(struct pmap *, vaddr_t, vaddr_t, bool, 196 struct pv_entry **); 197static int _pmap_enter(struct pmap *, vaddr_t, paddr_t, vm_prot_t, u_int, bool); 198static int _pmap_get_pdp(struct pmap *, vaddr_t, bool, int, paddr_t *, 199 struct vm_page **); 200 201static struct pmap kernel_pmap __cacheline_aligned; 202struct pmap * const kernel_pmap_ptr = &kernel_pmap; 203 204#if defined(EFI_RUNTIME) 205static struct pmap efirt_pmap __cacheline_aligned; 206 207pmap_t 208pmap_efirt(void) 209{ 210 return &efirt_pmap; 211} 212#endif 213 214static vaddr_t pmap_maxkvaddr; 215 216vaddr_t virtual_avail, virtual_end; 217vaddr_t virtual_devmap_addr; 218 219static struct pool_cache _pmap_cache; 220static struct pool_cache _pmap_pv_pool; 221 222/* Set to LX_BLKPAG_GP if supported. */ 223uint64_t pmap_attr_gp = 0; 224 225static inline void 226pmap_pv_lock(struct pmap_page *pp) 227{ 228 229 mutex_enter(&pp->pp_pvlock); 230} 231 232static inline void 233pmap_pv_unlock(struct pmap_page *pp) 234{ 235 236 mutex_exit(&pp->pp_pvlock); 237} 238 239 240static inline void 241pm_lock(struct pmap *pm) 242{ 243 mutex_enter(&pm->pm_lock); 244} 245 246static inline void 247pm_unlock(struct pmap *pm) 248{ 249 mutex_exit(&pm->pm_lock); 250} 251 252static bool 253pm_reverse_lock(struct pmap *pm, struct pmap_page *pp) 254{ 255 256 KASSERT(mutex_owned(&pp->pp_pvlock)); 257 258 if (__predict_true(mutex_tryenter(&pm->pm_lock))) 259 return true; 260 261 if (pm != pmap_kernel()) 262 pmap_reference(pm); 263 mutex_exit(&pp->pp_pvlock); 264 mutex_enter(&pm->pm_lock); 265 /* nothing, just wait for lock */ 266 mutex_exit(&pm->pm_lock); 267 if (pm != pmap_kernel()) 268 pmap_destroy(pm); 269 mutex_enter(&pp->pp_pvlock); 270 return false; 271} 272 273static inline struct pmap_page * 274phys_to_pp(paddr_t pa) 275{ 276 struct vm_page *pg; 277 278 pg = PHYS_TO_VM_PAGE(pa); 279 if (pg != NULL) 280 return VM_PAGE_TO_PP(pg); 281 282#ifdef __HAVE_PMAP_PV_TRACK 283 return pmap_pv_tracked(pa); 284#else 285 return NULL; 286#endif /* __HAVE_PMAP_PV_TRACK */ 287} 288 289#define IN_RANGE(va, sta, end) (((sta) <= (va)) && ((va) < (end))) 290 291#define IN_DIRECTMAP_ADDR(va) \ 292 IN_RANGE((va), AARCH64_DIRECTMAP_START, AARCH64_DIRECTMAP_END) 293 294#define PMAP_EFIVA_P(va) \ 295 IN_RANGE((va), EFI_RUNTIME_VA, EFI_RUNTIME_VA + EFI_RUNTIME_SIZE) 296 297#ifdef MODULAR 298#define IN_MODULE_VA(va) IN_RANGE((va), module_start, module_end) 299#else 300#define IN_MODULE_VA(va) false 301#endif 302 303#ifdef DIAGNOSTIC 304 305#define KERNEL_ADDR_P(va) \ 306 (IN_RANGE((va), VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS) || \ 307 PMAP_EFIVA_P(va)) 308 309#define KASSERT_PM_ADDR(pm, va) \ 310 do { \ 311 int space = aarch64_addressspace(va); \ 312 if ((pm) == pmap_kernel()) { \ 313 KASSERTMSG(space == AARCH64_ADDRSPACE_UPPER, \ 314 "%s: kernel pm %p: va=%016lx" \ 315 " is out of upper address space", \ 316 __func__, (pm), (va)); \ 317 KASSERTMSG(KERNEL_ADDR_P(va), \ 318 "%s: kernel pm %p: va=%016lx" \ 319 " is not kernel address", \ 320 __func__, (pm), (va)); \ 321 } else { \ 322 KASSERTMSG(space == AARCH64_ADDRSPACE_LOWER, \ 323 "%s: user pm %p: va=%016lx" \ 324 " is out of lower address space", \ 325 __func__, (pm), (va)); \ 326 KASSERTMSG(IN_RANGE((va), \ 327 VM_MIN_ADDRESS, VM_MAX_ADDRESS), \ 328 "%s: user pm %p: va=%016lx" \ 329 " is not user address", \ 330 __func__, (pm), (va)); \ 331 } \ 332 } while (0 /* CONSTCOND */) 333#else /* DIAGNOSTIC */ 334#define KASSERT_PM_ADDR(pm,va) 335#endif /* DIAGNOSTIC */ 336 337 338vsize_t 339pmap_kenter_range(vaddr_t va, paddr_t pa, vsize_t size, 340 vm_prot_t prot, u_int flags) 341{ 342 pt_entry_t attr; 343 vsize_t resid = round_page(size); 344 345 attr = _pmap_pte_adjust_prot(0, prot, VM_PROT_ALL, false); 346 attr = _pmap_pte_adjust_cacheflags(attr, flags); 347 pmapboot_enter_range(va, pa, resid, attr, printf); 348 349 return resid; 350} 351 352 353void 354pmap_bootstrap(vaddr_t vstart, vaddr_t vend) 355{ 356 struct pmap *kpm; 357 pd_entry_t *l0; 358 paddr_t l0pa; 359 360 PMAP_HIST_INIT(); /* init once */ 361 362 UVMHIST_FUNC(__func__); 363 UVMHIST_CALLARGS(pmaphist, "vstart=%#jx vend=%#jx", (uintptr_t)vstart, 364 (uintptr_t)vend, 0, 0); 365 366 uvmexp.ncolors = aarch64_cache_vindexsize / PAGE_SIZE; 367 368 /* devmap already uses last of va? */ 369 if (virtual_devmap_addr != 0 && virtual_devmap_addr < vend) 370 vend = virtual_devmap_addr; 371 372 virtual_avail = vstart; 373 virtual_end = vend; 374 pmap_maxkvaddr = vstart; 375 376 l0pa = reg_ttbr1_el1_read(); 377 l0 = (void *)AARCH64_PA_TO_KVA(l0pa); 378 379 pmap_tlb_info_init(&pmap_tlb0_info); 380 381 memset(&kernel_pmap, 0, sizeof(kernel_pmap)); 382 383 kpm = pmap_kernel(); 384 struct pmap_asid_info * const pai = PMAP_PAI(kpm, cpu_tlb_info(ci)); 385 386 pai->pai_asid = KERNEL_PID; 387 kpm->pm_refcnt = 1; 388 kpm->pm_idlepdp = 0; 389 kpm->pm_l0table = l0; 390 kpm->pm_l0table_pa = l0pa; 391 kpm->pm_onproc = kcpuset_running; 392 kpm->pm_active = kcpuset_running; 393 kpm->pm_activated = true; 394 LIST_INIT(&kpm->pm_vmlist); 395 LIST_INIT(&kpm->pm_pvlist); /* not used for kernel pmap */ 396 mutex_init(&kpm->pm_lock, MUTEX_DEFAULT, IPL_NONE); 397 398 CTASSERT(sizeof(kpm->pm_stats.wired_count) == sizeof(long)); 399 CTASSERT(sizeof(kpm->pm_stats.resident_count) == sizeof(long)); 400 401#if defined(EFI_RUNTIME) 402 memset(&efirt_pmap, 0, sizeof(efirt_pmap)); 403 struct pmap * const efipm = &efirt_pmap; 404 struct pmap_asid_info * const efipai = PMAP_PAI(efipm, cpu_tlb_info(ci)); 405 406 efipai->pai_asid = KERNEL_PID; 407 efipm->pm_refcnt = 1; 408 409 vaddr_t efi_l0va = uvm_pageboot_alloc(Ln_TABLE_SIZE); 410 KASSERT((efi_l0va & PAGE_MASK) == 0); 411 412 efipm->pm_l0table = (pd_entry_t *)efi_l0va; 413 memset(efipm->pm_l0table, 0, Ln_TABLE_SIZE); 414 415 efipm->pm_l0table_pa = AARCH64_KVA_TO_PA(efi_l0va); 416 417 efipm->pm_activated = false; 418 LIST_INIT(&efipm->pm_vmlist); 419 LIST_INIT(&efipm->pm_pvlist); /* not used for efi pmap */ 420 mutex_init(&efipm->pm_lock, MUTEX_DEFAULT, IPL_NONE); 421#endif 422} 423 424#ifdef MULTIPROCESSOR 425void 426pmap_md_tlb_info_attach(struct pmap_tlb_info *ti, struct cpu_info *ci) 427{ 428 /* nothing */ 429} 430#endif /* MULTIPROCESSOR */ 431 432static inline void 433_pmap_adj_wired_count(struct pmap *pm, int adj) 434{ 435 436 if (pm == pmap_kernel()) { 437 atomic_add_long(&pm->pm_stats.wired_count, adj); 438 } else { 439 KASSERT(mutex_owned(&pm->pm_lock)); 440 pm->pm_stats.wired_count += adj; 441 } 442} 443 444static inline void 445_pmap_adj_resident_count(struct pmap *pm, int adj) 446{ 447 448 if (pm == pmap_kernel()) { 449 atomic_add_long(&pm->pm_stats.resident_count, adj); 450 } else { 451 KASSERT(mutex_owned(&pm->pm_lock)); 452 pm->pm_stats.resident_count += adj; 453 } 454} 455 456inline static int 457_pmap_color(vaddr_t addr) /* or paddr_t */ 458{ 459 return (addr >> PGSHIFT) & (uvmexp.ncolors - 1); 460} 461 462static int 463_pmap_pmap_ctor(void *arg, void *v, int flags) 464{ 465 memset(v, 0, sizeof(struct pmap)); 466 return 0; 467} 468 469static int 470_pmap_pv_ctor(void *arg, void *v, int flags) 471{ 472 memset(v, 0, sizeof(struct pv_entry)); 473 return 0; 474} 475 476pd_entry_t * 477pmap_l0table(struct pmap *pm) 478{ 479 480 return pm->pm_l0table; 481} 482 483void 484pmap_init(void) 485{ 486 487 pool_cache_bootstrap(&_pmap_cache, sizeof(struct pmap), 488 coherency_unit, 0, 0, "pmappl", NULL, IPL_NONE, _pmap_pmap_ctor, 489 NULL, NULL); 490 491 pool_cache_bootstrap(&_pmap_pv_pool, sizeof(struct pv_entry), 492 32, 0, PR_LARGECACHE, "pvpl", NULL, IPL_NONE, _pmap_pv_ctor, 493 NULL, NULL); 494 495 pmap_tlb_info_evcnt_attach(&pmap_tlb0_info); 496} 497 498void 499pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp) 500{ 501 *vstartp = virtual_avail; 502 *vendp = virtual_end; 503} 504 505vaddr_t 506pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp) 507{ 508 int npage; 509 paddr_t pa; 510 vaddr_t va; 511 psize_t bank_npage; 512 uvm_physseg_t bank; 513 514 UVMHIST_FUNC(__func__); 515 UVMHIST_CALLARGS(pmaphist, "size=%llu, *vstartp=%llx, *vendp=%llx", 516 size, *vstartp, *vendp, 0); 517 518 size = round_page(size); 519 npage = atop(size); 520 521 for (bank = uvm_physseg_get_first(); uvm_physseg_valid_p(bank); 522 bank = uvm_physseg_get_next(bank)) { 523 524 bank_npage = uvm_physseg_get_avail_end(bank) - 525 uvm_physseg_get_avail_start(bank); 526 if (npage <= bank_npage) 527 break; 528 } 529 530 if (!uvm_physseg_valid_p(bank)) { 531 panic("%s: no memory", __func__); 532 } 533 534 /* Steal pages */ 535 pa = ptoa(uvm_physseg_get_avail_start(bank)); 536 va = AARCH64_PA_TO_KVA(pa); 537 uvm_physseg_unplug(atop(pa), npage); 538 539 for (; npage > 0; npage--, pa += PAGE_SIZE) 540 pmap_zero_page(pa); 541 542 return va; 543} 544 545void 546pmap_reference(struct pmap *pm) 547{ 548 atomic_inc_uint(&pm->pm_refcnt); 549} 550 551static paddr_t 552pmap_alloc_pdp(struct pmap *pm, struct vm_page **pgp, int flags, bool waitok) 553{ 554 paddr_t pa; 555 struct vm_page *pg; 556 557 UVMHIST_FUNC(__func__); 558 UVMHIST_CALLARGS(pmaphist, "pm=%p, flags=%08x, waitok=%d", 559 pm, flags, waitok, 0); 560 561 if (uvm.page_init_done) { 562 int aflags = ((flags & PMAP_CANFAIL) ? 0 : UVM_PGA_USERESERVE) | 563 UVM_PGA_ZERO; 564 retry: 565 pg = uvm_pagealloc(NULL, 0, NULL, aflags); 566 if (pg == NULL) { 567 if (waitok) { 568 uvm_wait("pmap_alloc_pdp"); 569 goto retry; 570 } 571 return POOL_PADDR_INVALID; 572 } 573 574 LIST_INSERT_HEAD(&pm->pm_vmlist, pg, pageq.list); 575 pg->flags &= ~PG_BUSY; /* never busy */ 576 pg->wire_count = 1; /* max = 1 + Ln_ENTRIES = 513 */ 577 pa = VM_PAGE_TO_PHYS(pg); 578 PMAP_COUNT(pdp_alloc); 579 PMAP_PAGE_INIT(VM_PAGE_TO_PP(pg)); 580 } else { 581 /* uvm_pageboot_alloc() returns a direct mapping address */ 582 pg = NULL; 583 pa = AARCH64_KVA_TO_PA( 584 uvm_pageboot_alloc(Ln_TABLE_SIZE)); 585 PMAP_COUNT(pdp_alloc_boot); 586 } 587 if (pgp != NULL) 588 *pgp = pg; 589 590 UVMHIST_LOG(pmaphist, "pa=%llx, pg=%llx", 591 pa, pg, 0, 0); 592 593 return pa; 594} 595 596static void 597pmap_free_pdp(struct pmap *pm, struct vm_page *pg) 598{ 599 600 KASSERT(pm != pmap_kernel()); 601 KASSERT(VM_PAGE_TO_PP(pg)->pp_pv.pv_pmap == NULL); 602 KASSERT(VM_PAGE_TO_PP(pg)->pp_pv.pv_next == NULL); 603 604 LIST_REMOVE(pg, pageq.list); 605 pg->wire_count = 0; 606 uvm_pagefree(pg); 607 PMAP_COUNT(pdp_free); 608} 609 610/* free empty page table pages */ 611static void 612_pmap_sweep_pdp(struct pmap *pm) 613{ 614 struct vm_page *pg, *tmp; 615 pd_entry_t *ptep_in_parent, opte __diagused; 616 paddr_t pa, pdppa; 617 uint16_t wirecount __diagused; 618 619 KASSERT(mutex_owned(&pm->pm_lock) || pm->pm_refcnt == 0); 620 621 LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, pageq.list, tmp) { 622 if (pg->wire_count != 1) 623 continue; 624 625 pa = VM_PAGE_TO_PHYS(pg); 626 if (pa == pm->pm_l0table_pa) 627 continue; 628 629 ptep_in_parent = VM_PAGE_TO_MD(pg)->mdpg_ptep_parent; 630 if (ptep_in_parent == NULL) { 631 /* no parent */ 632 pmap_free_pdp(pm, pg); 633 continue; 634 } 635 636 /* unlink from parent */ 637 opte = atomic_swap_64(ptep_in_parent, 0); 638 KASSERT(lxpde_valid(opte)); 639 wirecount = --pg->wire_count; /* 1 -> 0 */ 640 KASSERT(wirecount == 0); 641 pmap_free_pdp(pm, pg); 642 643 /* L3->L2->L1. no need for L0 */ 644 pdppa = AARCH64_KVA_TO_PA(trunc_page((vaddr_t)ptep_in_parent)); 645 if (pdppa == pm->pm_l0table_pa) 646 continue; 647 648 pg = PHYS_TO_VM_PAGE(pdppa); 649 KASSERT(pg != NULL); 650 KASSERTMSG(pg->wire_count >= 1, 651 "wire_count=%d", pg->wire_count); 652 /* decrement wire_count of parent */ 653 wirecount = --pg->wire_count; 654 KASSERTMSG(pg->wire_count <= (Ln_ENTRIES + 1), 655 "pm=%p, pg=%p, wire_count=%d", 656 pm, pg, pg->wire_count); 657 } 658 pm->pm_idlepdp = 0; 659} 660 661static void 662_pmap_free_pdp_all(struct pmap *pm, bool free_l0) 663{ 664 struct vm_page *pg, *pgtmp, *pg_reserve; 665 666 pg_reserve = free_l0 ? NULL : PHYS_TO_VM_PAGE(pm->pm_l0table_pa); 667 LIST_FOREACH_SAFE(pg, &pm->pm_vmlist, pageq.list, pgtmp) { 668 if (pg == pg_reserve) 669 continue; 670 pmap_free_pdp(pm, pg); 671 } 672} 673 674vaddr_t 675pmap_growkernel(vaddr_t maxkvaddr) 676{ 677 struct pmap *pm = pmap_kernel(); 678 struct vm_page *pg; 679 int error; 680 vaddr_t va; 681 paddr_t pa; 682 683 UVMHIST_FUNC(__func__); 684 UVMHIST_CALLARGS(pmaphist, "maxkvaddr=%llx, pmap_maxkvaddr=%llx", 685 maxkvaddr, pmap_maxkvaddr, 0, 0); 686 687 mutex_enter(&pm->pm_lock); 688 for (va = pmap_maxkvaddr & L2_FRAME; va <= maxkvaddr; va += L2_SIZE) { 689 error = _pmap_get_pdp(pm, va, false, 0, &pa, &pg); 690 if (error != 0) { 691 panic("%s: cannot allocate L3 table error=%d", 692 __func__, error); 693 } 694 } 695 kasan_shadow_map((void *)pmap_maxkvaddr, 696 (size_t)(va - pmap_maxkvaddr)); 697 pmap_maxkvaddr = va; 698 mutex_exit(&pm->pm_lock); 699 700 return va; 701} 702 703bool 704pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap) 705{ 706 707 return pmap_extract_coherency(pm, va, pap, NULL); 708} 709 710bool 711pmap_extract_coherency(struct pmap *pm, vaddr_t va, paddr_t *pap, 712 bool *coherencyp) 713{ 714 pt_entry_t *ptep, pte; 715 paddr_t pa; 716 vsize_t blocksize = 0; 717 int space; 718 bool coherency, valid; 719 extern char __kernel_text[]; 720 extern char _end[]; 721 722 coherency = false; 723 724 space = aarch64_addressspace(va); 725 if (pm == pmap_kernel()) { 726 if (space != AARCH64_ADDRSPACE_UPPER) 727 return false; 728 729 if (IN_RANGE(va, (vaddr_t)__kernel_text, (vaddr_t)_end)) { 730 /* kernel text/data/bss are definitely linear mapped */ 731 pa = KERN_VTOPHYS(va); 732 goto mapped; 733 } else if (IN_DIRECTMAP_ADDR(va)) { 734 /* 735 * also direct mapping is linear mapped, but areas that 736 * have no physical memory haven't been mapped. 737 * fast lookup by using the S1E1R/PAR_EL1 registers. 738 */ 739 register_t s = daif_disable(DAIF_I | DAIF_F); 740 reg_s1e1r_write(va); 741 isb(); 742 uint64_t par = reg_par_el1_read(); 743 reg_daif_write(s); 744 745 if (par & PAR_F) 746 return false; 747 pa = (__SHIFTOUT(par, PAR_PA) << PAR_PA_SHIFT) + 748 (va & __BITS(PAR_PA_SHIFT - 1, 0)); 749 goto mapped; 750 } 751 } else { 752 if (space != AARCH64_ADDRSPACE_LOWER) 753 return false; 754 } 755 756 /* 757 * other areas, it isn't able to examined using the PAR_EL1 register, 758 * because the page may be in an access fault state due to 759 * reference bit emulation. 760 */ 761 if (pm != pmap_kernel()) 762 mutex_enter(&pm->pm_lock); 763 ptep = _pmap_pte_lookup_bs(pm, va, &blocksize); 764 valid = (ptep != NULL && lxpde_valid(pte = *ptep)); 765 if (pm != pmap_kernel()) 766 mutex_exit(&pm->pm_lock); 767 768 if (!valid) { 769 return false; 770 } 771 772 pa = lxpde_pa(pte) + (va & (blocksize - 1)); 773 774 switch (pte & LX_BLKPAG_ATTR_MASK) { 775 case LX_BLKPAG_ATTR_NORMAL_NC: 776 case LX_BLKPAG_ATTR_DEVICE_MEM: 777 case LX_BLKPAG_ATTR_DEVICE_MEM_NP: 778 coherency = true; 779 break; 780 } 781 782 mapped: 783 if (pap != NULL) 784 *pap = pa; 785 if (coherencyp != NULL) 786 *coherencyp = coherency; 787 return true; 788} 789 790paddr_t 791vtophys(vaddr_t va) 792{ 793 struct pmap *pm; 794 paddr_t pa; 795 796 /* even if TBI is disabled, AARCH64_ADDRTOP_TAG means KVA */ 797 if ((uint64_t)va & AARCH64_ADDRTOP_TAG) 798 pm = pmap_kernel(); 799 else 800 pm = curlwp->l_proc->p_vmspace->vm_map.pmap; 801 802 if (pmap_extract(pm, va, &pa) == false) 803 return VTOPHYS_FAILED; 804 return pa; 805} 806 807/* 808 * return pointer of the pte. regardess of whether the entry is valid or not. 809 */ 810static pt_entry_t * 811_pmap_pte_lookup_bs(struct pmap *pm, vaddr_t va, vsize_t *bs) 812{ 813 pt_entry_t *ptep; 814 pd_entry_t *l0, *l1, *l2, *l3; 815 pd_entry_t pde; 816 vsize_t blocksize; 817 unsigned int idx; 818 819 KASSERT(pm == pmap_kernel() || mutex_owned(&pm->pm_lock)); 820 821 /* 822 * traverse L0 -> L1 -> L2 -> L3 823 */ 824 blocksize = L0_SIZE; 825 l0 = pm->pm_l0table; 826 idx = l0pde_index(va); 827 ptep = &l0[idx]; 828 pde = *ptep; 829 if (!l0pde_valid(pde)) 830 goto done; 831 832 blocksize = L1_SIZE; 833 l1 = (pd_entry_t *)AARCH64_PA_TO_KVA(l0pde_pa(pde)); 834 idx = l1pde_index(va); 835 ptep = &l1[idx]; 836 pde = *ptep; 837 if (!l1pde_valid(pde) || l1pde_is_block(pde)) 838 goto done; 839 840 blocksize = L2_SIZE; 841 l2 = (pd_entry_t *)AARCH64_PA_TO_KVA(l1pde_pa(pde)); 842 idx = l2pde_index(va); 843 ptep = &l2[idx]; 844 pde = *ptep; 845 if (!l2pde_valid(pde) || l2pde_is_block(pde)) 846 goto done; 847 848 blocksize = L3_SIZE; 849 l3 = (pd_entry_t *)AARCH64_PA_TO_KVA(l2pde_pa(pde)); 850 idx = l3pte_index(va); 851 ptep = &l3[idx]; 852 853 done: 854 if (bs != NULL) 855 *bs = blocksize; 856 return ptep; 857} 858 859static pt_entry_t * 860_pmap_pte_lookup_l3(struct pmap *pm, vaddr_t va) 861{ 862 pt_entry_t *ptep; 863 vsize_t blocksize = 0; 864 865 ptep = _pmap_pte_lookup_bs(pm, va, &blocksize); 866 if ((ptep != NULL) && (blocksize == L3_SIZE)) 867 return ptep; 868 869 return NULL; 870} 871 872void 873pmap_icache_sync_range(pmap_t pm, vaddr_t sva, vaddr_t eva) 874{ 875 pt_entry_t *ptep = NULL, pte; 876 vaddr_t va; 877 vsize_t blocksize = 0; 878 879 KASSERT_PM_ADDR(pm, sva); 880 881 pm_lock(pm); 882 883 for (va = sva; va < eva; va = (va + blocksize) & ~(blocksize - 1)) { 884 /* va is belong to the same L3 table as before? */ 885 if ((blocksize == L3_SIZE) && ((va & L3INDEXMASK) != 0)) { 886 ptep++; 887 } else { 888 ptep = _pmap_pte_lookup_bs(pm, va, &blocksize); 889 if (ptep == NULL) 890 break; 891 } 892 893 pte = *ptep; 894 if (!lxpde_valid(pte)) 895 continue; 896 897 vaddr_t eob = (va + blocksize) & ~(blocksize - 1); 898 vsize_t len = ulmin(eva, eob) - va; 899 900 if (l3pte_readable(pte)) { 901 cpu_icache_sync_range(va, len); 902 } else { 903 /* 904 * change to accessible temporarily 905 * to do cpu_icache_sync_range() 906 */ 907 struct pmap_asid_info * const pai = PMAP_PAI(pm, 908 cpu_tlb_info(ci)); 909 910 atomic_swap_64(ptep, pte | LX_BLKPAG_AF); 911 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 912 cpu_icache_sync_range(va, len); 913 atomic_swap_64(ptep, pte); 914 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 915 } 916 } 917 918 pm_unlock(pm); 919} 920 921/* 922 * Routine: pmap_procwr 923 * 924 * Function: 925 * Synchronize caches corresponding to [addr, addr+len) in p. 926 * 927 */ 928void 929pmap_procwr(struct proc *p, vaddr_t sva, int len) 930{ 931 932 if (__predict_true(p == curproc)) 933 cpu_icache_sync_range(sva, len); 934 else { 935 struct pmap *pm = p->p_vmspace->vm_map.pmap; 936 paddr_t pa; 937 vaddr_t va, eva; 938 int tlen; 939 940 for (va = sva; len > 0; va = eva, len -= tlen) { 941 eva = uimin(va + len, trunc_page(va + PAGE_SIZE)); 942 tlen = eva - va; 943 if (!pmap_extract(pm, va, &pa)) 944 continue; 945 va = AARCH64_PA_TO_KVA(pa); 946 cpu_icache_sync_range(va, tlen); 947 } 948 } 949} 950 951static pt_entry_t 952_pmap_pte_adjust_prot(pt_entry_t pte, vm_prot_t prot, vm_prot_t refmod, 953 bool user) 954{ 955 vm_prot_t masked; 956 pt_entry_t xn; 957 958 masked = prot & refmod; 959 pte &= ~(LX_BLKPAG_OS_RWMASK | LX_BLKPAG_AF | LX_BLKPAG_DBM | LX_BLKPAG_AP); 960 961 /* 962 * keep actual prot in the pte as OS_{READ|WRITE} for ref/mod emulation, 963 * and set the DBM bit for HAFDBS if it has write permission. 964 */ 965 pte |= LX_BLKPAG_OS_READ; /* a valid pte can always be readable */ 966 if (prot & VM_PROT_WRITE) 967 pte |= LX_BLKPAG_OS_WRITE | LX_BLKPAG_DBM; 968 969 switch (masked & (VM_PROT_READ | VM_PROT_WRITE)) { 970 case 0: 971 default: 972 /* 973 * it cannot be accessed because there is no AF bit, 974 * but the AF bit will be added by fixup() or HAFDBS. 975 */ 976 pte |= LX_BLKPAG_AP_RO; 977 break; 978 case VM_PROT_READ: 979 /* 980 * as it is RO, it cannot be written as is, 981 * but it may be changed to RW by fixup() or HAFDBS. 982 */ 983 pte |= LX_BLKPAG_AF; 984 pte |= LX_BLKPAG_AP_RO; 985 break; 986 case VM_PROT_WRITE: 987 case VM_PROT_READ | VM_PROT_WRITE: 988 /* fully readable and writable */ 989 pte |= LX_BLKPAG_AF; 990 pte |= LX_BLKPAG_AP_RW; 991 break; 992 } 993 994 /* executable for kernel or user? first set never exec both */ 995 pte |= (LX_BLKPAG_UXN | LX_BLKPAG_PXN); 996 /* and either to executable */ 997 xn = user ? LX_BLKPAG_UXN : LX_BLKPAG_PXN; 998 if (prot & VM_PROT_EXECUTE) 999 pte &= ~xn; 1000 1001 return pte; 1002} 1003 1004static pt_entry_t 1005_pmap_pte_adjust_cacheflags(pt_entry_t pte, u_int flags) 1006{ 1007 1008 pte &= ~LX_BLKPAG_ATTR_MASK; 1009 1010 switch (flags & (PMAP_CACHE_MASK | PMAP_DEV_MASK)) { 1011 case PMAP_DEV_NP ... PMAP_DEV_NP | PMAP_CACHE_MASK: 1012 pte |= LX_BLKPAG_ATTR_DEVICE_MEM_NP; /* Device-nGnRnE */ 1013 break; 1014 case PMAP_DEV ... PMAP_DEV | PMAP_CACHE_MASK: 1015 pte |= LX_BLKPAG_ATTR_DEVICE_MEM; /* Device-nGnRE */ 1016 break; 1017 case PMAP_NOCACHE: 1018 case PMAP_NOCACHE_OVR: 1019 case PMAP_WRITE_COMBINE: 1020 pte |= LX_BLKPAG_ATTR_NORMAL_NC; /* only no-cache */ 1021 break; 1022 case PMAP_WRITE_BACK: 1023 case 0: 1024 default: 1025 pte |= LX_BLKPAG_ATTR_NORMAL_WB; 1026 break; 1027 } 1028 1029 return pte; 1030} 1031 1032#ifdef ARMV81_HAFDBS 1033static inline void 1034_pmap_reflect_refmod_in_pp(pt_entry_t pte, struct pmap_page *pp) 1035{ 1036 if (!lxpde_valid(pte)) 1037 return; 1038 1039 /* 1040 * In order to retain referenced/modified information, 1041 * it should be reflected from pte in the pmap_page. 1042 */ 1043 if (pte & LX_BLKPAG_AF) 1044 pp->pp_pv.pv_va |= VM_PROT_READ; 1045 if ((pte & LX_BLKPAG_AP) == LX_BLKPAG_AP_RW) 1046 pp->pp_pv.pv_va |= VM_PROT_WRITE; 1047} 1048#endif 1049 1050static struct pv_entry * 1051_pmap_remove_pv(struct pmap_page *pp, struct pmap *pm, vaddr_t va, 1052 pt_entry_t pte) 1053{ 1054 struct pv_entry *pv, *ppv; 1055 1056 UVMHIST_FUNC(__func__); 1057 UVMHIST_CALLARGS(pmaphist, "pp=%p, pm=%p, va=%llx, pte=%llx", 1058 pp, pm, va, pte); 1059 1060 KASSERT(mutex_owned(&pm->pm_lock)); /* for pv_proc */ 1061 KASSERT(mutex_owned(&pp->pp_pvlock)); 1062 1063#ifdef ARMV81_HAFDBS 1064 if (aarch64_hafdbs_enabled != ID_AA64MMFR1_EL1_HAFDBS_NONE) 1065 _pmap_reflect_refmod_in_pp(pte, pp); 1066#endif 1067 1068 for (ppv = NULL, pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) { 1069 if (pv->pv_pmap == pm && trunc_page(pv->pv_va) == va) { 1070 break; 1071 } 1072 ppv = pv; 1073 } 1074 1075 if (pm != pmap_kernel() && pv != NULL) 1076 LIST_REMOVE(pv, pv_proc); 1077 1078 if (ppv == NULL) { 1079 /* embedded in pmap_page */ 1080 pv->pv_pmap = NULL; 1081 pv = NULL; 1082 PMAP_COUNT(pv_remove_emb); 1083 } else if (pv != NULL) { 1084 /* dynamically allocated */ 1085 ppv->pv_next = pv->pv_next; 1086 PMAP_COUNT(pv_remove_dyn); 1087 } else { 1088 PMAP_COUNT(pv_remove_nopv); 1089 } 1090 1091 return pv; 1092} 1093 1094#if defined(PMAP_PV_DEBUG) || defined(DDB) 1095 1096static char * 1097str_vmflags(uint32_t flags) 1098{ 1099 static int idx = 0; 1100 static char buf[4][32]; /* XXX */ 1101 char *p; 1102 1103 p = buf[idx]; 1104 idx = (idx + 1) & 3; 1105 1106 p[0] = (flags & VM_PROT_READ) ? 'R' : '-'; 1107 p[1] = (flags & VM_PROT_WRITE) ? 'W' : '-'; 1108 p[2] = (flags & VM_PROT_EXECUTE) ? 'X' : '-'; 1109 if (flags & PMAP_WIRED) 1110 memcpy(&p[3], ",WIRED\0", 7); 1111 else 1112 p[3] = '\0'; 1113 1114 return p; 1115} 1116 1117void 1118pmap_db_mdpg_print(struct vm_page *pg, void (*pr)(const char *, ...) __printflike(1, 2)) 1119{ 1120 struct pmap_page *pp = VM_PAGE_TO_PP(pg); 1121 struct pv_entry *pv; 1122 int i, flags; 1123 1124 i = 0; 1125 flags = pp->pp_pv.pv_va & (PAGE_SIZE - 1); 1126 1127 pr("pp=%p\n", pp); 1128 pr(" pp flags=%08x %s\n", flags, str_vmflags(flags)); 1129 1130 for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) { 1131 if (pv->pv_pmap == NULL) { 1132 KASSERT(pv == &pp->pp_pv); 1133 continue; 1134 } 1135 struct pmap * const pm = pv->pv_pmap; 1136 struct pmap_asid_info * const pai = PMAP_PAI(pm, 1137 cpu_tlb_info(ci)); 1138 1139 pr(" pv[%d] pv=%p\n", i, pv); 1140 pr(" pv[%d].pv_pmap = %p (asid=%d)\n", i, pm, pai->pai_asid); 1141 pr(" pv[%d].pv_va = %016lx (color=%d)\n", i, 1142 trunc_page(pv->pv_va), _pmap_color(pv->pv_va)); 1143 pr(" pv[%d].pv_ptep = %p\n", i, pv->pv_ptep); 1144 i++; 1145 } 1146} 1147#endif /* PMAP_PV_DEBUG & DDB */ 1148 1149static int 1150_pmap_enter_pv(struct pmap_page *pp, struct pmap *pm, struct pv_entry **pvp, 1151 vaddr_t va, pt_entry_t *ptep, paddr_t pa, u_int flags) 1152{ 1153 struct pv_entry *pv; 1154 1155 UVMHIST_FUNC(__func__); 1156 UVMHIST_CALLARGS(pmaphist, "pp=%p, pm=%p, va=%llx, pa=%llx", pp, pm, va, 1157 pa); 1158 UVMHIST_LOG(pmaphist, "ptep=%p, flags=%08x", ptep, flags, 0, 0); 1159 1160 KASSERT(mutex_owned(&pp->pp_pvlock)); 1161 KASSERT(trunc_page(va) == va); 1162 1163 /* 1164 * mapping cannot be already registered at this VA. 1165 */ 1166 if (pp->pp_pv.pv_pmap == NULL) { 1167 /* 1168 * claim pv_entry embedded in pmap_page. 1169 * take care not to wipe out acc/mod flags. 1170 */ 1171 pv = &pp->pp_pv; 1172 pv->pv_va = (pv->pv_va & (PAGE_SIZE - 1)) | va; 1173 } else { 1174 /* 1175 * create and link new pv. 1176 * pv is already allocated at beginning of _pmap_enter(). 1177 */ 1178 pv = *pvp; 1179 if (pv == NULL) 1180 return ENOMEM; 1181 *pvp = NULL; 1182 pv->pv_next = pp->pp_pv.pv_next; 1183 pp->pp_pv.pv_next = pv; 1184 pv->pv_va = va; 1185 } 1186 pv->pv_pmap = pm; 1187 pv->pv_ptep = ptep; 1188 PMAP_COUNT(pv_enter); 1189 1190 if (pm != pmap_kernel()) 1191 LIST_INSERT_HEAD(&pm->pm_pvlist, pv, pv_proc); 1192 1193#ifdef PMAP_PV_DEBUG 1194 printf("pv %p alias added va=%016lx -> pa=%016lx\n", pv, va, pa); 1195 pmap_db_mdpg_print(PHYS_TO_VM_PAGE(pa), printf); 1196#endif 1197 1198 return 0; 1199} 1200 1201void 1202pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 1203{ 1204 1205 _pmap_enter(pmap_kernel(), va, pa, prot, flags | PMAP_WIRED, true); 1206} 1207 1208void 1209pmap_kremove(vaddr_t va, vsize_t size) 1210{ 1211 struct pmap *kpm = pmap_kernel(); 1212 1213 UVMHIST_FUNC(__func__); 1214 UVMHIST_CALLARGS(pmaphist, "va=%llx, size=%llx", va, size, 0, 0); 1215 1216 KDASSERT((va & PGOFSET) == 0); 1217 KDASSERT((size & PGOFSET) == 0); 1218 1219 KDASSERT(!IN_DIRECTMAP_ADDR(va)); 1220 KDASSERT(IN_RANGE(va, VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS)); 1221 1222 _pmap_remove(kpm, va, va + size, true, NULL); 1223} 1224 1225static void 1226_pmap_protect_pv(struct pmap_page *pp, struct pv_entry *pv, vm_prot_t prot) 1227{ 1228 pt_entry_t *ptep, pte; 1229 vm_prot_t pteprot; 1230 uint32_t mdattr; 1231 const bool user = (pv->pv_pmap != pmap_kernel()); 1232 1233 UVMHIST_FUNC(__func__); 1234 UVMHIST_CALLARGS(pmaphist, "pp=%p, pv=%p, prot=%08x", pp, pv, prot, 0); 1235 1236 KASSERT(mutex_owned(&pv->pv_pmap->pm_lock)); 1237 1238 ptep = pv->pv_ptep; 1239 pte = *ptep; 1240 1241 /* get prot mask from pte */ 1242 pteprot = VM_PROT_READ; /* a valid pte can always be readable */ 1243 if ((pte & (LX_BLKPAG_OS_WRITE | LX_BLKPAG_DBM)) != 0) 1244 pteprot |= VM_PROT_WRITE; 1245 if (l3pte_executable(pte, user)) 1246 pteprot |= VM_PROT_EXECUTE; 1247 1248#ifdef ARMV81_HAFDBS 1249 if (aarch64_hafdbs_enabled != ID_AA64MMFR1_EL1_HAFDBS_NONE) 1250 _pmap_reflect_refmod_in_pp(pte, pp); 1251#endif 1252 /* get prot mask from referenced/modified */ 1253 mdattr = pp->pp_pv.pv_va & (VM_PROT_READ | VM_PROT_WRITE); 1254 1255 /* new prot = prot & pteprot & mdattr */ 1256 pte = _pmap_pte_adjust_prot(pte, prot & pteprot, mdattr, user); 1257 atomic_swap_64(ptep, pte); 1258 1259 struct pmap * const pm = pv->pv_pmap; 1260 struct pmap_asid_info * const pai = PMAP_PAI(pm, cpu_tlb_info(ci)); 1261 1262 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, trunc_page(pv->pv_va)); 1263} 1264 1265void 1266pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 1267{ 1268 pt_entry_t *ptep = NULL, pte; 1269 vaddr_t va; 1270 vsize_t blocksize = 0; 1271 const bool user = (pm != pmap_kernel()); 1272 1273 KASSERT((prot & VM_PROT_READ) || !(prot & VM_PROT_WRITE)); 1274 1275 UVMHIST_FUNC(__func__); 1276 UVMHIST_CALLARGS(pmaphist, "pm=%p, sva=%016lx, eva=%016lx, prot=%08x", 1277 pm, sva, eva, prot); 1278 1279 KASSERT_PM_ADDR(pm, sva); 1280 KASSERT(!IN_DIRECTMAP_ADDR(sva)); 1281 1282 /* PROT_EXEC requires implicit PROT_READ */ 1283 if (prot & VM_PROT_EXECUTE) 1284 prot |= VM_PROT_READ; 1285 1286 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1287 PMAP_COUNT(protect_remove_fallback); 1288 pmap_remove(pm, sva, eva); 1289 return; 1290 } 1291 PMAP_COUNT(protect); 1292 1293 KDASSERT((sva & PAGE_MASK) == 0); 1294 KDASSERT((eva & PAGE_MASK) == 0); 1295 1296 pm_lock(pm); 1297 1298 for (va = sva; va < eva; va = (va + blocksize) & ~(blocksize - 1)) { 1299#ifdef UVMHIST 1300 pt_entry_t opte; 1301#endif 1302 struct pmap_page *pp; 1303 uint32_t mdattr; 1304 bool executable; 1305 1306 /* va is belong to the same L3 table as before? */ 1307 if ((blocksize == L3_SIZE) && ((va & L3INDEXMASK) != 0)) 1308 ptep++; 1309 else 1310 ptep = _pmap_pte_lookup_bs(pm, va, &blocksize); 1311 1312 pte = *ptep; 1313 if (!lxpde_valid(pte)) { 1314 PMAP_COUNT(protect_none); 1315 continue; 1316 } 1317 1318 if ((pte & LX_BLKPAG_OS_WIRED) == 0) { 1319 const paddr_t pa = lxpde_pa(pte); 1320 struct vm_page *const pg = PHYS_TO_VM_PAGE(pa); 1321 1322 if (pg != NULL) { 1323 pp = VM_PAGE_TO_PP(pg); 1324 PMAP_COUNT(protect_managed); 1325 } else { 1326#ifdef __HAVE_PMAP_PV_TRACK 1327 pp = pmap_pv_tracked(pa); 1328#ifdef PMAPCOUNTERS 1329 if (pp != NULL) 1330 PMAP_COUNT(protect_pvmanaged); 1331 else 1332 PMAP_COUNT(protect_unmanaged); 1333#endif 1334#else 1335 pp = NULL; 1336 PMAP_COUNT(protect_unmanaged); 1337#endif /* __HAVE_PMAP_PV_TRACK */ 1338 } 1339 } else { /* kenter */ 1340 pp = NULL; 1341 PMAP_COUNT(protect_unmanaged); 1342 } 1343 1344 if (pp != NULL) { 1345#ifdef ARMV81_HAFDBS 1346 if (aarch64_hafdbs_enabled != ID_AA64MMFR1_EL1_HAFDBS_NONE) 1347 _pmap_reflect_refmod_in_pp(pte, pp); 1348#endif 1349 /* get prot mask from referenced/modified */ 1350 mdattr = pp->pp_pv.pv_va & 1351 (VM_PROT_READ | VM_PROT_WRITE); 1352 } else { 1353 /* unmanaged page */ 1354 mdattr = VM_PROT_ALL; 1355 } 1356 1357#ifdef UVMHIST 1358 opte = pte; 1359#endif 1360 executable = l3pte_executable(pte, user); 1361 pte = _pmap_pte_adjust_prot(pte, prot, mdattr, user); 1362 1363 struct pmap_asid_info * const pai = PMAP_PAI(pm, 1364 cpu_tlb_info(ci)); 1365 if (!executable && (prot & VM_PROT_EXECUTE)) { 1366 /* non-exec -> exec */ 1367 UVMHIST_LOG(pmaphist, "icache_sync: " 1368 "pm=%p, va=%016lx, pte: %016lx -> %016lx", 1369 pm, va, opte, pte); 1370 1371 if (!l3pte_readable(pte)) { 1372 PTE_ICACHE_SYNC_PAGE(pte, ptep, pai->pai_asid, 1373 va); 1374 atomic_swap_64(ptep, pte); 1375 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 1376 } else { 1377 atomic_swap_64(ptep, pte); 1378 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 1379 cpu_icache_sync_range(va, PAGE_SIZE); 1380 } 1381 } else { 1382 atomic_swap_64(ptep, pte); 1383 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 1384 } 1385 } 1386 1387 pm_unlock(pm); 1388} 1389 1390#if defined(EFI_RUNTIME) 1391void 1392pmap_activate_efirt(void) 1393{ 1394 struct cpu_info *ci = curcpu(); 1395 struct pmap *pm = &efirt_pmap; 1396 struct pmap_asid_info * const pai = PMAP_PAI(pm, cpu_tlb_info(ci)); 1397 1398 UVMHIST_FUNC(__func__); 1399 UVMHIST_CALLARGS(pmaphist, " (pm=%#jx)", (uintptr_t)pm, 0, 0, 0); 1400 1401 KASSERT(kpreempt_disabled()); 1402 1403 ci->ci_pmap_asid_cur = pai->pai_asid; 1404 UVMHIST_LOG(pmaphist, "setting asid to %#jx", pai->pai_asid, 1405 0, 0, 0); 1406 tlb_set_asid(pai->pai_asid, pm); 1407 1408 /* Re-enable translation table walks using TTBR0 */ 1409 uint64_t tcr = reg_tcr_el1_read(); 1410 reg_tcr_el1_write(tcr & ~TCR_EPD0); 1411 isb(); 1412 pm->pm_activated = true; 1413 1414 PMAP_COUNT(activate); 1415} 1416#endif 1417 1418void 1419pmap_activate(struct lwp *l) 1420{ 1421 struct pmap *pm = l->l_proc->p_vmspace->vm_map.pmap; 1422 uint64_t tcr; 1423 1424 UVMHIST_FUNC(__func__); 1425 UVMHIST_CALLARGS(pmaphist, "lwp=%p (pid=%d, kernel=%u)", l, 1426 l->l_proc->p_pid, pm == pmap_kernel() ? 1 : 0, 0); 1427 1428 KASSERT(kpreempt_disabled()); 1429 KASSERT((reg_tcr_el1_read() & TCR_EPD0) != 0); 1430 1431 if (pm == pmap_kernel()) 1432 return; 1433 if (l != curlwp) 1434 return; 1435 1436 KASSERT(pm->pm_l0table != NULL); 1437 1438 /* this calls tlb_set_asid which calls cpu_set_ttbr0 */ 1439 pmap_tlb_asid_acquire(pm, l); 1440 1441 UVMHIST_LOG(pmaphist, "lwp=%p, asid=%d", l, 1442 PMAP_PAI(pm, cpu_tlb_info(ci))->pai_asid, 0, 0); 1443 1444 /* Re-enable translation table walks using TTBR0 */ 1445 tcr = reg_tcr_el1_read(); 1446 reg_tcr_el1_write(tcr & ~TCR_EPD0); 1447 isb(); 1448 1449 pm->pm_activated = true; 1450 1451 PMAP_COUNT(activate); 1452} 1453 1454#if defined(EFI_RUNTIME) 1455void 1456pmap_deactivate_efirt(void) 1457{ 1458 struct cpu_info * const ci = curcpu(); 1459 struct pmap * const pm = &efirt_pmap; 1460 1461 UVMHIST_FUNC(__func__); UVMHIST_CALLED(pmaphist); 1462 1463 KASSERT(kpreempt_disabled()); 1464 1465 /* Disable translation table walks using TTBR0 */ 1466 uint64_t tcr = reg_tcr_el1_read(); 1467 reg_tcr_el1_write(tcr | TCR_EPD0); 1468 isb(); 1469 1470 UVMHIST_LOG(pmaphist, "setting asid to %#jx", KERNEL_PID, 1471 0, 0, 0); 1472 1473 ci->ci_pmap_asid_cur = KERNEL_PID; 1474 tlb_set_asid(KERNEL_PID, pmap_kernel()); 1475 1476 pm->pm_activated = false; 1477 1478 PMAP_COUNT(deactivate); 1479} 1480#endif 1481 1482void 1483pmap_deactivate(struct lwp *l) 1484{ 1485 struct pmap *pm = l->l_proc->p_vmspace->vm_map.pmap; 1486 uint64_t tcr; 1487 1488 UVMHIST_FUNC(__func__); 1489 UVMHIST_CALLARGS(pmaphist, "lwp=%p (pid=%d, (kernel=%u))", l, 1490 l->l_proc->p_pid, pm == pmap_kernel() ? 1 : 0, 0); 1491 1492 KASSERT(kpreempt_disabled()); 1493 1494 /* Disable translation table walks using TTBR0 */ 1495 tcr = reg_tcr_el1_read(); 1496 reg_tcr_el1_write(tcr | TCR_EPD0); 1497 isb(); 1498 1499 UVMHIST_LOG(pmaphist, "lwp=%p, asid=%d", l, 1500 PMAP_PAI(pm, cpu_tlb_info(ci))->pai_asid, 0, 0); 1501 1502 pmap_tlb_asid_deactivate(pm); 1503 1504 pm->pm_activated = false; 1505 1506 PMAP_COUNT(deactivate); 1507} 1508 1509struct pmap * 1510pmap_create(void) 1511{ 1512 struct pmap *pm; 1513 1514 UVMHIST_FUNC(__func__); 1515 UVMHIST_CALLED(pmaphist); 1516 1517 pm = pool_cache_get(&_pmap_cache, PR_WAITOK); 1518 memset(pm, 0, sizeof(*pm)); 1519 pm->pm_refcnt = 1; 1520 pm->pm_idlepdp = 0; 1521 LIST_INIT(&pm->pm_vmlist); 1522 LIST_INIT(&pm->pm_pvlist); 1523 mutex_init(&pm->pm_lock, MUTEX_DEFAULT, IPL_NONE); 1524 1525 kcpuset_create(&pm->pm_active, true); 1526 kcpuset_create(&pm->pm_onproc, true); 1527 1528 pm->pm_l0table_pa = pmap_alloc_pdp(pm, NULL, 0, true); 1529 KASSERT(pm->pm_l0table_pa != POOL_PADDR_INVALID); 1530 pm->pm_l0table = (pd_entry_t *)AARCH64_PA_TO_KVA(pm->pm_l0table_pa); 1531 KASSERT(((vaddr_t)pm->pm_l0table & (PAGE_SIZE - 1)) == 0); 1532 1533 UVMHIST_LOG(pmaphist, "pm=%p, pm_l0table=%016lx, pm_l0table_pa=%016lx", 1534 pm, pm->pm_l0table, pm->pm_l0table_pa, 0); 1535 1536 PMAP_COUNT(create); 1537 return pm; 1538} 1539 1540void 1541pmap_destroy(struct pmap *pm) 1542{ 1543 unsigned int refcnt; 1544 1545 UVMHIST_FUNC(__func__); 1546 UVMHIST_CALLARGS(pmaphist, "pm=%p, pm_l0table=%016lx, refcnt=%jd", 1547 pm, pm->pm_l0table, pm->pm_refcnt, 0); 1548 1549 if (pm == NULL) 1550 return; 1551 1552 if (pm == pmap_kernel()) 1553 panic("cannot destroy kernel pmap"); 1554 1555 membar_release(); 1556 refcnt = atomic_dec_uint_nv(&pm->pm_refcnt); 1557 if (refcnt > 0) 1558 return; 1559 membar_acquire(); 1560 1561 KASSERT(LIST_EMPTY(&pm->pm_pvlist)); 1562 pmap_tlb_asid_release_all(pm); 1563 1564 _pmap_free_pdp_all(pm, true); 1565 mutex_destroy(&pm->pm_lock); 1566 1567 kcpuset_destroy(pm->pm_active); 1568 kcpuset_destroy(pm->pm_onproc); 1569 1570 pool_cache_put(&_pmap_cache, pm); 1571 1572 PMAP_COUNT(destroy); 1573} 1574 1575static inline void 1576_pmap_pdp_setparent(struct pmap *pm, struct vm_page *pg, pt_entry_t *ptep) 1577{ 1578 1579 if ((pm != pmap_kernel()) && (pg != NULL)) { 1580 KASSERT(mutex_owned(&pm->pm_lock)); 1581 VM_PAGE_TO_MD(pg)->mdpg_ptep_parent = ptep; 1582 } 1583} 1584 1585/* 1586 * increment reference counter of the page descriptor page. 1587 * the reference counter should be equal to 1588 * 1 + num of valid entries the page has. 1589 */ 1590static inline void 1591_pmap_pdp_addref(struct pmap *pm, paddr_t pdppa, struct vm_page *pdppg_hint) 1592{ 1593 struct vm_page *pg; 1594 1595 /* kernel L0-L3 pages will never be freed */ 1596 if (pm == pmap_kernel()) 1597 return; 1598 1599#if defined(EFI_RUNTIME) 1600 /* EFI runtme L0-L3 pages will never be freed */ 1601 if (pm == pmap_efirt()) 1602 return; 1603#endif 1604 1605 KASSERT(mutex_owned(&pm->pm_lock)); 1606 1607 /* no need for L0 page */ 1608 if (pm->pm_l0table_pa == pdppa) 1609 return; 1610 1611 pg = pdppg_hint; 1612 if (pg == NULL) 1613 pg = PHYS_TO_VM_PAGE(pdppa); 1614 KASSERT(pg != NULL); 1615 1616 pg->wire_count++; 1617 1618 KASSERTMSG(pg->wire_count <= (Ln_ENTRIES + 1), 1619 "pg=%p, wire_count=%d", pg, pg->wire_count); 1620} 1621 1622/* 1623 * decrement reference counter of the page descriptor page. 1624 * if reference counter is 1(=empty), pages will be freed, and return true. 1625 * otherwise return false. 1626 * kernel page, or L0 page descriptor page will be never freed. 1627 */ 1628static bool 1629_pmap_pdp_delref(struct pmap *pm, paddr_t pdppa, bool do_free_pdp) 1630{ 1631 struct vm_page *pg; 1632 bool removed; 1633 uint16_t wirecount; 1634 1635 /* kernel L0-L3 pages will never be freed */ 1636 if (pm == pmap_kernel()) 1637 return false; 1638 1639#if defined(EFI_RUNTIME) 1640 /* EFI runtme L0-L3 pages will never be freed */ 1641 if (pm == pmap_efirt()) 1642 return false; 1643#endif 1644 1645 KASSERT(mutex_owned(&pm->pm_lock)); 1646 1647 /* no need for L0 page */ 1648 if (pm->pm_l0table_pa == pdppa) 1649 return false; 1650 1651 pg = PHYS_TO_VM_PAGE(pdppa); 1652 KASSERT(pg != NULL); 1653 1654 wirecount = --pg->wire_count; 1655 1656 if (!do_free_pdp) { 1657 /* 1658 * pm_idlepdp is counted by only pmap_page_protect() with 1659 * VM_PROT_NONE. it is not correct because without considering 1660 * pmap_enter(), but useful hint to just sweep. 1661 */ 1662 if (wirecount == 1) 1663 pm->pm_idlepdp++; 1664 return false; 1665 } 1666 1667 /* if no reference, free pdp */ 1668 removed = false; 1669 while (wirecount == 1) { 1670 pd_entry_t *ptep_in_parent, opte __diagused; 1671 ptep_in_parent = VM_PAGE_TO_MD(pg)->mdpg_ptep_parent; 1672 if (ptep_in_parent == NULL) { 1673 /* no parent */ 1674 pmap_free_pdp(pm, pg); 1675 removed = true; 1676 break; 1677 } 1678 1679 /* unlink from parent */ 1680 opte = atomic_swap_64(ptep_in_parent, 0); 1681 KASSERT(lxpde_valid(opte)); 1682 wirecount = atomic_add_32_nv(&pg->wire_count, -1); /* 1 -> 0 */ 1683 KASSERT(wirecount == 0); 1684 pmap_free_pdp(pm, pg); 1685 removed = true; 1686 1687 /* L3->L2->L1. no need for L0 */ 1688 pdppa = AARCH64_KVA_TO_PA(trunc_page((vaddr_t)ptep_in_parent)); 1689 if (pdppa == pm->pm_l0table_pa) 1690 break; 1691 1692 pg = PHYS_TO_VM_PAGE(pdppa); 1693 KASSERT(pg != NULL); 1694 KASSERTMSG(pg->wire_count >= 1, 1695 "wire_count=%d", pg->wire_count); 1696 /* decrement wire_count of parent */ 1697 wirecount = atomic_add_32_nv(&pg->wire_count, -1); 1698 KASSERTMSG(pg->wire_count <= (Ln_ENTRIES + 1), 1699 "pm=%p, pg=%p, wire_count=%d", 1700 pm, pg, pg->wire_count); 1701 } 1702 1703 return removed; 1704} 1705 1706/* 1707 * traverse L0 -> L1 -> L2 -> L3 table with growing pdp if needed. 1708 */ 1709static int 1710_pmap_get_pdp(struct pmap *pm, vaddr_t va, bool kenter, int flags, 1711 paddr_t *pap, struct vm_page **pgp) 1712{ 1713 pd_entry_t *l0, *l1, *l2; 1714 struct vm_page *pdppg, *pdppg0; 1715 paddr_t pdppa, pdppa0; 1716 unsigned int idx; 1717 pd_entry_t pde; 1718 1719 KASSERT(kenter || mutex_owned(&pm->pm_lock)); 1720 1721 l0 = pm->pm_l0table; 1722 1723 idx = l0pde_index(va); 1724 pde = l0[idx]; 1725 if (!l0pde_valid(pde)) { 1726 KASSERTMSG(!kenter || IN_MODULE_VA(va) || PMAP_EFIVA_P(va), 1727 "%s va %" PRIxVADDR, kenter ? "kernel" : "user", va); 1728 /* no need to increment L0 occupancy. L0 page never freed */ 1729 pdppa = pmap_alloc_pdp(pm, &pdppg, flags, false); /* L1 pdp */ 1730 if (pdppa == POOL_PADDR_INVALID) { 1731 return ENOMEM; 1732 } 1733 atomic_swap_64(&l0[idx], pdppa | L0_TABLE); 1734 _pmap_pdp_setparent(pm, pdppg, &l0[idx]); 1735 } else { 1736 pdppa = l0pde_pa(pde); 1737 pdppg = NULL; 1738 } 1739 l1 = (void *)AARCH64_PA_TO_KVA(pdppa); 1740 1741 idx = l1pde_index(va); 1742 pde = l1[idx]; 1743 if (!l1pde_valid(pde)) { 1744 KASSERTMSG(!kenter || IN_MODULE_VA(va) || PMAP_EFIVA_P(va), 1745 "%s va %" PRIxVADDR, kenter ? "kernel" : "user", va); 1746 pdppa0 = pdppa; 1747 pdppg0 = pdppg; 1748 pdppa = pmap_alloc_pdp(pm, &pdppg, flags, false); /* L2 pdp */ 1749 if (pdppa == POOL_PADDR_INVALID) { 1750 return ENOMEM; 1751 } 1752 atomic_swap_64(&l1[idx], pdppa | L1_TABLE); 1753 _pmap_pdp_addref(pm, pdppa0, pdppg0); /* L1 occupancy++ */ 1754 _pmap_pdp_setparent(pm, pdppg, &l1[idx]); 1755 } else { 1756 pdppa = l1pde_pa(pde); 1757 pdppg = NULL; 1758 } 1759 l2 = (void *)AARCH64_PA_TO_KVA(pdppa); 1760 1761 idx = l2pde_index(va); 1762 pde = l2[idx]; 1763 if (!l2pde_valid(pde)) { 1764 KASSERTMSG(!kenter || IN_MODULE_VA(va) || PMAP_EFIVA_P(va), 1765 "%s va %" PRIxVADDR, kenter ? "kernel" : "user", va); 1766 pdppa0 = pdppa; 1767 pdppg0 = pdppg; 1768 pdppa = pmap_alloc_pdp(pm, &pdppg, flags, false); /* L3 pdp */ 1769 if (pdppa == POOL_PADDR_INVALID) { 1770 return ENOMEM; 1771 } 1772 atomic_swap_64(&l2[idx], pdppa | L2_TABLE); 1773 _pmap_pdp_addref(pm, pdppa0, pdppg0); /* L2 occupancy++ */ 1774 _pmap_pdp_setparent(pm, pdppg, &l2[idx]); 1775 } else { 1776 pdppa = l2pde_pa(pde); 1777 pdppg = NULL; 1778 } 1779 *pap = pdppa; 1780 *pgp = pdppg; 1781 return 0; 1782} 1783 1784static int 1785_pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, 1786 u_int flags, bool kenter) 1787{ 1788 struct vm_page *pdppg; 1789 struct pmap_page *pp, *opp, *pps[2]; 1790 struct pv_entry *spv, *opv = NULL; 1791 pt_entry_t attr, pte, opte, *ptep; 1792 pd_entry_t *l3; 1793 paddr_t pdppa; 1794 uint32_t mdattr; 1795 unsigned int idx; 1796 int error = 0; 1797#if defined(EFI_RUNTIME) 1798 const bool efirt_p = pm == pmap_efirt(); 1799#else 1800 const bool efirt_p = false; 1801#endif 1802 const bool kernel_p = pm == pmap_kernel(); 1803 const bool user = !kernel_p && !efirt_p; 1804 bool need_sync_icache, need_enter_pv; 1805 1806 UVMHIST_FUNC(__func__); 1807 UVMHIST_CALLARGS(pmaphist, "pm=%p, kentermode=%d", pm, kenter, 0, 0); 1808 UVMHIST_LOG(pmaphist, "va=%016lx, pa=%016lx, prot=%08x, flags=%08x", 1809 va, pa, prot, flags); 1810 1811 KASSERT_PM_ADDR(pm, va); 1812 KASSERT(!IN_DIRECTMAP_ADDR(va)); 1813 KASSERT((prot & VM_PROT_ALL) != VM_PROT_NONE); 1814 KASSERT(pa < AARCH64_MAX_PA); 1815 1816#ifdef PMAPCOUNTERS 1817 PMAP_COUNT(mappings); 1818 if (_pmap_color(va) == _pmap_color(pa)) { 1819 if (user) { 1820 PMAP_COUNT(user_mappings); 1821 } else { 1822 PMAP_COUNT(kern_mappings); 1823 } 1824 } else if (flags & PMAP_WIRED) { 1825 if (user) { 1826 PMAP_COUNT(user_mappings_bad_wired); 1827 } else { 1828 PMAP_COUNT(kern_mappings_bad_wired); 1829 } 1830 } else { 1831 if (user) { 1832 PMAP_COUNT(user_mappings_bad); 1833 } else { 1834 PMAP_COUNT(kern_mappings_bad); 1835 } 1836 } 1837#endif 1838 1839 if (kenter) { 1840 pp = NULL; 1841 spv = NULL; 1842 need_enter_pv = false; 1843 } else { 1844 struct vm_page *pg = PHYS_TO_VM_PAGE(pa); 1845 if (pg != NULL) { 1846 pp = VM_PAGE_TO_PP(pg); 1847 PMAP_COUNT(managed_mappings); 1848 } else { 1849#ifdef __HAVE_PMAP_PV_TRACK 1850 pp = pmap_pv_tracked(pa); 1851#ifdef PMAPCOUNTERS 1852 if (pp != NULL) 1853 PMAP_COUNT(pvmanaged_mappings); 1854 else 1855 PMAP_COUNT(unmanaged_mappings); 1856#endif 1857#else 1858 pp = NULL; 1859 PMAP_COUNT(unmanaged_mappings); 1860#endif /* __HAVE_PMAP_PV_TRACK */ 1861 } 1862 1863 if (pp != NULL) { 1864 /* 1865 * allocate pv in advance of pm_lock(). 1866 */ 1867 spv = pool_cache_get(&_pmap_pv_pool, PR_NOWAIT); 1868 need_enter_pv = true; 1869 } else { 1870 spv = NULL; 1871 need_enter_pv = false; 1872 } 1873 1874 pm_lock(pm); 1875 if (pm->pm_idlepdp >= PDPSWEEP_TRIGGER) { 1876 _pmap_sweep_pdp(pm); 1877 } 1878 } 1879 1880 /* 1881 * traverse L0 -> L1 -> L2 -> L3 table with growing pdp if needed. 1882 */ 1883 error = _pmap_get_pdp(pm, va, kenter, flags, &pdppa, &pdppg); 1884 if (error != 0) { 1885 if (flags & PMAP_CANFAIL) { 1886 goto fail0; 1887 } 1888 panic("%s: cannot allocate L3 table error=%d", __func__, 1889 error); 1890 } 1891 1892 l3 = (void *)AARCH64_PA_TO_KVA(pdppa); 1893 1894 idx = l3pte_index(va); 1895 ptep = &l3[idx]; /* as PTE */ 1896 opte = *ptep; 1897 need_sync_icache = (prot & VM_PROT_EXECUTE) && !efirt_p; 1898 1899 /* for lock ordering for old page and new page */ 1900 pps[0] = pp; 1901 pps[1] = NULL; 1902 1903 /* remap? */ 1904 if (l3pte_valid(opte)) { 1905 bool need_remove_pv; 1906 1907 KASSERT(!kenter); /* pmap_kenter_pa() cannot override */ 1908 if (opte & LX_BLKPAG_OS_WIRED) { 1909 _pmap_adj_wired_count(pm, -1); 1910 } 1911 _pmap_adj_resident_count(pm, -1); 1912#ifdef PMAPCOUNTERS 1913 PMAP_COUNT(remappings); 1914 if (user) { 1915 PMAP_COUNT(user_mappings_changed); 1916 } else { 1917 PMAP_COUNT(kern_mappings_changed); 1918 } 1919#endif 1920 UVMHIST_LOG(pmaphist, 1921 "va=%016lx has already mapped." 1922 " old-pa=%016lx new-pa=%016lx, old-pte=%016llx", 1923 va, l3pte_pa(opte), pa, opte); 1924 1925 if (pa == l3pte_pa(opte)) { 1926 /* old and new pte have same pa, no need to update pv */ 1927 need_remove_pv = (pp == NULL); 1928 need_enter_pv = false; 1929 if (need_sync_icache && l3pte_executable(opte, user)) 1930 need_sync_icache = false; 1931 } else { 1932 need_remove_pv = true; 1933 } 1934 1935 if (need_remove_pv && 1936 ((opp = phys_to_pp(l3pte_pa(opte))) != NULL)) { 1937 /* 1938 * need to lock both pp and opp(old pp) 1939 * against deadlock, and 'pp' maybe NULL. 1940 */ 1941 if (pp < opp) { 1942 pps[0] = pp; 1943 pps[1] = opp; 1944 } else { 1945 pps[0] = opp; 1946 pps[1] = pp; 1947 } 1948 if (pps[0] != NULL) 1949 pmap_pv_lock(pps[0]); 1950 if (pps[1] != NULL) 1951 pmap_pv_lock(pps[1]); 1952 opv = _pmap_remove_pv(opp, pm, va, opte); 1953 } else { 1954 if (pp != NULL) 1955 pmap_pv_lock(pp); 1956 } 1957 opte = atomic_swap_64(ptep, 0); 1958 } else { 1959 if (pp != NULL) 1960 pmap_pv_lock(pp); 1961 } 1962 1963 if (!l3pte_valid(opte)) 1964 _pmap_pdp_addref(pm, pdppa, pdppg); /* L3 occupancy++ */ 1965 1966 /* 1967 * read permission is treated as an access permission internally. 1968 * require to add PROT_READ even if only PROT_WRITE or PROT_EXEC 1969 */ 1970 if (prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) 1971 prot |= VM_PROT_READ; 1972 if (flags & (VM_PROT_WRITE | VM_PROT_EXECUTE)) 1973 flags |= VM_PROT_READ; 1974 1975 mdattr = VM_PROT_READ | VM_PROT_WRITE; 1976 if (need_enter_pv) { 1977 KASSERT(!kenter); 1978 error = _pmap_enter_pv(pp, pm, &spv, va, ptep, pa, flags); 1979 if (error != 0) { 1980 /* 1981 * If pmap_enter() fails, 1982 * it must not leave behind an existing pmap entry. 1983 */ 1984 if (lxpde_valid(opte)) { 1985 KASSERT((vaddr_t)l3 == trunc_page((vaddr_t)ptep)); 1986 _pmap_pdp_delref(pm, AARCH64_KVA_TO_PA((vaddr_t)l3), 1987 true); 1988 struct pmap_asid_info * const pai = PMAP_PAI(pm, 1989 cpu_tlb_info(ci)); 1990 1991 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 1992 } 1993 PMAP_COUNT(pv_entry_cannotalloc); 1994 if (flags & PMAP_CANFAIL) 1995 goto fail1; 1996 panic("pmap_enter: failed to allocate pv_entry"); 1997 } 1998 } 1999 2000 if (pp != NULL) { 2001 /* update referenced/modified flags */ 2002 KASSERT(!kenter); 2003 pp->pp_pv.pv_va |= (flags & (VM_PROT_READ | VM_PROT_WRITE)); 2004 mdattr &= (uint32_t)pp->pp_pv.pv_va; 2005 } 2006 2007#ifdef PMAPCOUNTERS 2008 switch (flags & PMAP_CACHE_MASK) { 2009 case PMAP_NOCACHE: 2010 case PMAP_NOCACHE_OVR: 2011 PMAP_COUNT(uncached_mappings); 2012 break; 2013 } 2014#endif 2015 2016 attr = L3_PAGE | (kenter ? 0 : LX_BLKPAG_NG); 2017 attr = _pmap_pte_adjust_prot(attr, prot, mdattr, user); 2018 attr = _pmap_pte_adjust_cacheflags(attr, flags); 2019 if (VM_MAXUSER_ADDRESS > va && !efirt_p) 2020 attr |= LX_BLKPAG_APUSER; 2021 if (flags & PMAP_WIRED) 2022 attr |= LX_BLKPAG_OS_WIRED; 2023#ifdef MULTIPROCESSOR 2024 attr |= LX_BLKPAG_SH_IS; 2025#endif 2026 2027 pte = pa | attr; 2028 2029 struct pmap_asid_info * const pai = PMAP_PAI(pm, cpu_tlb_info(ci)); 2030 const tlb_asid_t asid = pai->pai_asid; 2031 2032 if (need_sync_icache) { 2033 /* non-exec -> exec */ 2034 UVMHIST_LOG(pmaphist, 2035 "icache_sync: pm=%p, va=%016lx, pte: %016lx -> %016lx", 2036 pm, va, opte, pte); 2037 2038 if (!l3pte_readable(pte)) { 2039 PTE_ICACHE_SYNC_PAGE(pte, ptep, asid, va); 2040 atomic_swap_64(ptep, pte); 2041 AARCH64_TLBI_BY_ASID_VA(asid, va); 2042 } else { 2043 atomic_swap_64(ptep, pte); 2044 AARCH64_TLBI_BY_ASID_VA(asid, va); 2045 cpu_icache_sync_range(va, PAGE_SIZE); 2046 } 2047 } else { 2048 atomic_swap_64(ptep, pte); 2049 AARCH64_TLBI_BY_ASID_VA(asid, va); 2050 } 2051 2052 if (pte & LX_BLKPAG_OS_WIRED) { 2053 _pmap_adj_wired_count(pm, 1); 2054 } 2055 _pmap_adj_resident_count(pm, 1); 2056 2057 fail1: 2058 if (pps[1] != NULL) 2059 pmap_pv_unlock(pps[1]); 2060 if (pps[0] != NULL) 2061 pmap_pv_unlock(pps[0]); 2062 fail0: 2063 if (!kenter) { 2064 pm_unlock(pm); 2065 2066 /* spare pv was not used. discard */ 2067 if (spv != NULL) 2068 pool_cache_put(&_pmap_pv_pool, spv); 2069 2070 if (opv != NULL) 2071 pool_cache_put(&_pmap_pv_pool, opv); 2072 } 2073 2074 return error; 2075} 2076 2077int 2078pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 2079{ 2080 return _pmap_enter(pm, va, pa, prot, flags, false); 2081} 2082 2083 2084bool 2085pmap_remove_all(struct pmap *pm) 2086{ 2087 struct pmap_page *pp; 2088 struct pv_entry *pv, *pvtmp, *opv, *pvtofree = NULL; 2089 pt_entry_t pte, *ptep; 2090 paddr_t pa; 2091 2092 UVMHIST_FUNC(__func__); 2093 UVMHIST_CALLARGS(pmaphist, "pm=%p", pm, 0, 0, 0); 2094 2095 KASSERT(pm != pmap_kernel()); 2096 2097 UVMHIST_LOG(pmaphist, "pm=%p, asid=%d", pm, 2098 PMAP_PAI(pm, cpu_tlb_info(ci))->pai_asid, 0, 0); 2099 2100 pm_lock(pm); 2101 2102 LIST_FOREACH_SAFE(pv, &pm->pm_pvlist, pv_proc, pvtmp) { 2103 ptep = pv->pv_ptep; 2104 pte = *ptep; 2105 2106 KASSERTMSG(lxpde_valid(pte), 2107 "pte is not valid: pmap=%p, va=%016lx", 2108 pm, pv->pv_va); 2109 2110 pa = lxpde_pa(pte); 2111 pp = phys_to_pp(pa); 2112 2113 KASSERTMSG(pp != NULL, 2114 "no pmap_page of physical address:%016lx, " 2115 "pmap=%p, va=%016lx", 2116 pa, pm, pv->pv_va); 2117 2118 pmap_pv_lock(pp); 2119 opv = _pmap_remove_pv(pp, pm, trunc_page(pv->pv_va), pte); 2120 pmap_pv_unlock(pp); 2121 if (opv != NULL) { 2122 opv->pv_next = pvtofree; 2123 pvtofree = opv; 2124 } 2125 } 2126 /* all PTE should now be cleared */ 2127 pm->pm_stats.wired_count = 0; 2128 pm->pm_stats.resident_count = 0; 2129 2130 /* clear L0 page table page */ 2131 pmap_zero_page(pm->pm_l0table_pa); 2132 2133 aarch64_tlbi_by_asid(PMAP_PAI(pm, cpu_tlb_info(ci))->pai_asid); 2134 2135 /* free L1-L3 page table pages, but not L0 */ 2136 _pmap_free_pdp_all(pm, false); 2137 2138 pm_unlock(pm); 2139 2140 for (pv = pvtofree; pv != NULL; pv = pvtmp) { 2141 pvtmp = pv->pv_next; 2142 pool_cache_put(&_pmap_pv_pool, pv); 2143 } 2144 2145 return true; 2146} 2147 2148static void 2149_pmap_remove(struct pmap *pm, vaddr_t sva, vaddr_t eva, bool kremove, 2150 struct pv_entry **pvtofree) 2151{ 2152 pt_entry_t pte, *ptep = NULL; 2153 struct pmap_page *pp; 2154 struct pv_entry *opv; 2155 paddr_t pa; 2156 vaddr_t va; 2157 vsize_t blocksize = 0; 2158 bool pdpremoved; 2159 2160 UVMHIST_FUNC(__func__); 2161 UVMHIST_CALLARGS(pmaphist, "pm=%p, sva=%016lx, eva=%016lx, kremove=%d", 2162 pm, sva, eva, kremove); 2163 2164 KASSERT(kremove || mutex_owned(&pm->pm_lock)); 2165 2166 for (va = sva; (va < eva) && (pm->pm_stats.resident_count != 0); 2167 va = (va + blocksize) & ~(blocksize - 1)) { 2168 2169 /* va is belong to the same L3 table as before? */ 2170 if ((blocksize == L3_SIZE) && ((va & L3INDEXMASK) != 0)) 2171 ptep++; 2172 else 2173 ptep = _pmap_pte_lookup_bs(pm, va, &blocksize); 2174 2175 pte = *ptep; 2176 if (!lxpde_valid(pte)) 2177 continue; 2178 2179 if (!kremove) { 2180 pa = lxpde_pa(pte); 2181 pp = phys_to_pp(pa); 2182 if (pp != NULL) { 2183 2184 pmap_pv_lock(pp); 2185 opv = _pmap_remove_pv(pp, pm, va, pte); 2186 pmap_pv_unlock(pp); 2187 if (opv != NULL) { 2188 opv->pv_next = *pvtofree; 2189 *pvtofree = opv; 2190 } 2191 } 2192 } 2193 2194 pte = atomic_swap_64(ptep, 0); 2195 if (!lxpde_valid(pte)) 2196 continue; 2197 struct pmap_asid_info * const pai = PMAP_PAI(pm, 2198 cpu_tlb_info(ci)); 2199 2200 pdpremoved = _pmap_pdp_delref(pm, 2201 AARCH64_KVA_TO_PA(trunc_page((vaddr_t)ptep)), true); 2202 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 2203 2204 if (pdpremoved) { 2205 /* 2206 * this Ln page table page has been removed. 2207 * skip to next Ln table 2208 */ 2209 blocksize *= Ln_ENTRIES; 2210 } 2211 2212 if ((pte & LX_BLKPAG_OS_WIRED) != 0) { 2213 _pmap_adj_wired_count(pm, -1); 2214 } 2215 _pmap_adj_resident_count(pm, -1); 2216 } 2217} 2218 2219void 2220pmap_remove(struct pmap *pm, vaddr_t sva, vaddr_t eva) 2221{ 2222 struct pv_entry *pvtofree = NULL; 2223 struct pv_entry *pv, *pvtmp; 2224 2225 KASSERT_PM_ADDR(pm, sva); 2226 KASSERT(!IN_DIRECTMAP_ADDR(sva)); 2227 2228 pm_lock(pm); 2229 _pmap_remove(pm, sva, eva, false, &pvtofree); 2230 pm_unlock(pm); 2231 2232 for (pv = pvtofree; pv != NULL; pv = pvtmp) { 2233 pvtmp = pv->pv_next; 2234 pool_cache_put(&_pmap_pv_pool, pv); 2235 } 2236} 2237 2238static void 2239pmap_page_remove(struct pmap_page *pp, vm_prot_t prot) 2240{ 2241 struct pv_entry *pv, *pvtmp; 2242 struct pv_entry *pvtofree = NULL; 2243 struct pmap *pm; 2244 pt_entry_t opte; 2245 2246 /* remove all pages reference to this physical page */ 2247 pmap_pv_lock(pp); 2248 for (pv = &pp->pp_pv; pv != NULL;) { 2249 if ((pm = pv->pv_pmap) == NULL) { 2250 KASSERT(pv == &pp->pp_pv); 2251 pv = pp->pp_pv.pv_next; 2252 continue; 2253 } 2254 if (!pm_reverse_lock(pm, pp)) { 2255 /* now retry */ 2256 pv = &pp->pp_pv; 2257 continue; 2258 } 2259 opte = atomic_swap_64(pv->pv_ptep, 0); 2260 struct pmap_asid_info * const pai = PMAP_PAI(pm, cpu_tlb_info(ci)); 2261 const vaddr_t va = trunc_page(pv->pv_va); 2262 2263 if (lxpde_valid(opte)) { 2264 _pmap_pdp_delref(pm, 2265 AARCH64_KVA_TO_PA(trunc_page( 2266 (vaddr_t)pv->pv_ptep)), false); 2267 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 2268 2269 if ((opte & LX_BLKPAG_OS_WIRED) != 0) { 2270 _pmap_adj_wired_count(pm, -1); 2271 } 2272 _pmap_adj_resident_count(pm, -1); 2273 } 2274 pvtmp = _pmap_remove_pv(pp, pm, va, opte); 2275 if (pvtmp == NULL) { 2276 KASSERT(pv == &pp->pp_pv); 2277 } else { 2278 KASSERT(pv == pvtmp); 2279 KASSERT(pp->pp_pv.pv_next == pv->pv_next); 2280 pv->pv_next = pvtofree; 2281 pvtofree = pv; 2282 } 2283 pm_unlock(pm); 2284 pv = pp->pp_pv.pv_next; 2285 } 2286 pmap_pv_unlock(pp); 2287 2288 for (pv = pvtofree; pv != NULL; pv = pvtmp) { 2289 pvtmp = pv->pv_next; 2290 pool_cache_put(&_pmap_pv_pool, pv); 2291 } 2292} 2293 2294#ifdef __HAVE_PMAP_PV_TRACK 2295void 2296pmap_pv_protect(paddr_t pa, vm_prot_t prot) 2297{ 2298 struct pmap_page *pp; 2299 2300 UVMHIST_FUNC(__func__); 2301 UVMHIST_CALLARGS(pmaphist, "pa=%016lx, prot=%08x", pa, prot, 0, 0); 2302 2303 pp = pmap_pv_tracked(pa); 2304 if (pp == NULL) 2305 panic("pmap_pv_protect: page not pv-tracked: %#" PRIxPADDR, pa); 2306 2307 KASSERT(prot == VM_PROT_NONE); 2308 pmap_page_remove(pp, prot); 2309} 2310#endif 2311 2312void 2313pmap_page_protect(struct vm_page *pg, vm_prot_t prot) 2314{ 2315 struct pv_entry *pv; 2316 struct pmap_page *pp; 2317 struct pmap *pm; 2318 2319 KASSERT((prot & VM_PROT_READ) || !(prot & VM_PROT_WRITE)); 2320 2321 pp = VM_PAGE_TO_PP(pg); 2322 2323 UVMHIST_FUNC(__func__); 2324 UVMHIST_CALLARGS(pmaphist, "pg=%p, pp=%p, pa=%016lx, prot=%08x", 2325 pg, pp, VM_PAGE_TO_PHYS(pg), prot); 2326 2327 /* do an unlocked check first */ 2328 if (atomic_load_relaxed(&pp->pp_pv.pv_pmap) == NULL && 2329 atomic_load_relaxed(&pp->pp_pv.pv_next) == NULL) { 2330 return; 2331 } 2332 2333 if ((prot & (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)) == 2334 VM_PROT_NONE) { 2335 pmap_page_remove(pp, prot); 2336 } else { 2337 pmap_pv_lock(pp); 2338 pv = &pp->pp_pv; 2339 while (pv != NULL) { 2340 if ((pm = pv->pv_pmap) == NULL) { 2341 KASSERT(pv == &pp->pp_pv); 2342 pv = pv->pv_next; 2343 continue; 2344 } 2345 if (!pm_reverse_lock(pm, pp)) { 2346 /* retry */ 2347 pv = &pp->pp_pv; 2348 continue; 2349 } 2350 _pmap_protect_pv(pp, pv, prot); 2351 pm_unlock(pm); 2352 pv = pv->pv_next; 2353 } 2354 pmap_pv_unlock(pp); 2355 } 2356} 2357 2358void 2359pmap_unwire(struct pmap *pm, vaddr_t va) 2360{ 2361 pt_entry_t pte, *ptep; 2362 2363 UVMHIST_FUNC(__func__); 2364 UVMHIST_CALLARGS(pmaphist, "pm=%p, va=%016lx", pm, va, 0, 0); 2365 2366 PMAP_COUNT(unwire); 2367 2368 KASSERT_PM_ADDR(pm, va); 2369 KASSERT(!IN_DIRECTMAP_ADDR(va)); 2370 2371 pm_lock(pm); 2372 ptep = _pmap_pte_lookup_l3(pm, va); 2373 if (ptep != NULL) { 2374 pte = *ptep; 2375 if (!l3pte_valid(pte) || 2376 ((pte & LX_BLKPAG_OS_WIRED) == 0)) { 2377 /* invalid pte, or pte is not wired */ 2378 PMAP_COUNT(unwire_failure); 2379 pm_unlock(pm); 2380 return; 2381 } 2382 2383 pte &= ~LX_BLKPAG_OS_WIRED; 2384 atomic_swap_64(ptep, pte); 2385 2386 _pmap_adj_wired_count(pm, -1); 2387 } 2388 pm_unlock(pm); 2389} 2390 2391bool 2392pmap_fault_fixup(struct pmap *pm, vaddr_t va, vm_prot_t accessprot, bool user) 2393{ 2394 struct pmap_page *pp; 2395 pt_entry_t *ptep, pte; 2396 vm_prot_t pmap_prot; 2397 paddr_t pa; 2398 bool fixed = false; 2399 2400 UVMHIST_FUNC(__func__); 2401 UVMHIST_CALLARGS(pmaphist, "pm=%p, va=%016lx, accessprot=%08x", 2402 pm, va, accessprot, 0); 2403 2404#if 0 2405 KASSERT_PM_ADDR(pm, va); 2406#else 2407 if (((pm == pmap_kernel()) && 2408 !(IN_RANGE(va, VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS))) || 2409 ((pm != pmap_kernel()) && 2410 !(IN_RANGE(va, VM_MIN_ADDRESS, VM_MAX_ADDRESS)))) { 2411 2412 UVMHIST_LOG(pmaphist, 2413 "pmap space and va mismatch: kernel=%jd, va=%016lx", 2414 pm == pmap_kernel(), va, 0, 0); 2415 return false; 2416 } 2417#endif 2418 2419 pm_lock(pm); 2420 2421 ptep = _pmap_pte_lookup_l3(pm, va); 2422 if (ptep == NULL) { 2423 UVMHIST_LOG(pmaphist, "pte_lookup failure: va=%016lx", 2424 va, 0, 0, 0); 2425 goto done; 2426 } 2427 2428 pte = *ptep; 2429 if (!l3pte_valid(pte)) { 2430 UVMHIST_LOG(pmaphist, "invalid pte: %016llx: va=%016lx", 2431 pte, va, 0, 0); 2432 goto done; 2433 } 2434 2435 pa = l3pte_pa(*ptep); 2436 pp = phys_to_pp(pa); 2437 if (pp == NULL) { 2438 UVMHIST_LOG(pmaphist, "pmap_page not found: va=%016lx", va, 0, 0, 0); 2439 goto done; 2440 } 2441 2442 /* 2443 * Get the prot specified by pmap_enter(). 2444 * A valid pte is considered a readable page. 2445 * If DBM is 1, it is considered a writable page. 2446 */ 2447 pmap_prot = VM_PROT_READ; 2448 if ((pte & (LX_BLKPAG_OS_WRITE | LX_BLKPAG_DBM)) != 0) 2449 pmap_prot |= VM_PROT_WRITE; 2450 2451 if (l3pte_executable(pte, pm != pmap_kernel())) 2452 pmap_prot |= VM_PROT_EXECUTE; 2453 2454 UVMHIST_LOG(pmaphist, "va=%016lx, pmapprot=%08x, accessprot=%08x", 2455 va, pmap_prot, accessprot, 0); 2456 2457 /* ignore except read/write */ 2458 accessprot &= (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE); 2459 2460 /* PROT_EXEC requires implicit PROT_READ */ 2461 if (accessprot & VM_PROT_EXECUTE) 2462 accessprot |= VM_PROT_READ; 2463 2464 /* no permission to read/write/execute for this page */ 2465 if ((pmap_prot & accessprot) != accessprot) { 2466 UVMHIST_LOG(pmaphist, "no permission to access", 0, 0, 0, 0); 2467 goto done; 2468 } 2469 2470 /* pte is readable and writable, but occurred fault? probably copy(9) */ 2471 if ((pte & LX_BLKPAG_AF) && ((pte & LX_BLKPAG_AP) == LX_BLKPAG_AP_RW)) 2472 goto done; 2473 2474 pmap_pv_lock(pp); 2475 if ((pte & LX_BLKPAG_AF) == 0) { 2476 /* pte has no AF bit, set referenced and AF bit */ 2477 UVMHIST_LOG(pmaphist, 2478 "REFERENCED:" 2479 " va=%016lx, pa=%016lx, pte_prot=%08x, accessprot=%08x", 2480 va, pa, pmap_prot, accessprot); 2481 pp->pp_pv.pv_va |= VM_PROT_READ; /* set referenced */ 2482 pte |= LX_BLKPAG_AF; 2483 2484 PMAP_COUNT(fixup_referenced); 2485 } 2486 if ((accessprot & VM_PROT_WRITE) && 2487 ((pte & LX_BLKPAG_AP) == LX_BLKPAG_AP_RO)) { 2488 /* pte is not RW. set modified and RW */ 2489 2490 UVMHIST_LOG(pmaphist, "MODIFIED:" 2491 " va=%016lx, pa=%016lx, pte_prot=%08x, accessprot=%08x", 2492 va, pa, pmap_prot, accessprot); 2493 pp->pp_pv.pv_va |= VM_PROT_WRITE; /* set modified */ 2494 pte &= ~LX_BLKPAG_AP; 2495 pte |= LX_BLKPAG_AP_RW; 2496 2497 PMAP_COUNT(fixup_modified); 2498 } 2499 pmap_pv_unlock(pp); 2500 2501 atomic_swap_64(ptep, pte); 2502 struct pmap_asid_info * const pai = PMAP_PAI(pm, cpu_tlb_info(ci)); 2503 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 2504 2505 fixed = true; 2506 2507 done: 2508 pm_unlock(pm); 2509 return fixed; 2510} 2511 2512bool 2513pmap_clear_modify(struct vm_page *pg) 2514{ 2515 struct pv_entry *pv; 2516 struct pmap_page * const pp = VM_PAGE_TO_PP(pg); 2517 pt_entry_t *ptep, pte, opte; 2518 vaddr_t va; 2519#ifdef ARMV81_HAFDBS 2520 bool modified; 2521#endif 2522 2523 UVMHIST_FUNC(__func__); 2524 UVMHIST_CALLARGS(pmaphist, "pg=%p, flags=%08x", 2525 pg, (int)(pp->pp_pv.pv_va & (PAGE_SIZE - 1)), 0, 0); 2526 2527 PMAP_COUNT(clear_modify); 2528 2529 /* 2530 * if this is a new page, assert it has no mappings and simply zap 2531 * the stored attributes without taking any locks. 2532 */ 2533 if ((pg->flags & PG_FAKE) != 0) { 2534 KASSERT(atomic_load_relaxed(&pp->pp_pv.pv_pmap) == NULL); 2535 KASSERT(atomic_load_relaxed(&pp->pp_pv.pv_next) == NULL); 2536 atomic_store_relaxed(&pp->pp_pv.pv_va, 0); 2537 return false; 2538 } 2539 2540 pmap_pv_lock(pp); 2541 2542 if ( 2543#ifdef ARMV81_HAFDBS 2544 aarch64_hafdbs_enabled != ID_AA64MMFR1_EL1_HAFDBS_AD && 2545#endif 2546 (pp->pp_pv.pv_va & VM_PROT_WRITE) == 0) { 2547 pmap_pv_unlock(pp); 2548 return false; 2549 } 2550#ifdef ARMV81_HAFDBS 2551 modified = ((pp->pp_pv.pv_va & VM_PROT_WRITE) != 0); 2552#endif 2553 pp->pp_pv.pv_va &= ~(vaddr_t)VM_PROT_WRITE; 2554 2555 for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) { 2556 if (pv->pv_pmap == NULL) { 2557 KASSERT(pv == &pp->pp_pv); 2558 continue; 2559 } 2560 2561 PMAP_COUNT(clear_modify_pages); 2562 2563 va = trunc_page(pv->pv_va); 2564 2565 ptep = pv->pv_ptep; 2566 opte = pte = *ptep; 2567 tryagain: 2568 if (!l3pte_valid(pte)) 2569 continue; 2570 if ((pte & LX_BLKPAG_AP) == LX_BLKPAG_AP_RO) 2571 continue; 2572#ifdef ARMV81_HAFDBS 2573 modified = true; 2574#endif 2575 /* clear write permission */ 2576 pte &= ~LX_BLKPAG_AP; 2577 pte |= LX_BLKPAG_AP_RO; 2578 2579 /* XXX: possible deadlock if using PM_LOCK(). this is racy */ 2580 if ((pte = atomic_cas_64(ptep, opte, pte)) != opte) { 2581 opte = pte; 2582 goto tryagain; 2583 } 2584 2585 struct pmap * const pm = pv->pv_pmap; 2586 struct pmap_asid_info * const pai = PMAP_PAI(pm, cpu_tlb_info(ci)); 2587 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 2588 2589 UVMHIST_LOG(pmaphist, 2590 "va=%016llx, ptep=%p, pa=%016lx, RW -> RO", 2591 va, ptep, l3pte_pa(pte), 0); 2592 } 2593 2594 pmap_pv_unlock(pp); 2595 2596#ifdef ARMV81_HAFDBS 2597 return modified; 2598#else 2599 return true; 2600#endif 2601} 2602 2603bool 2604pmap_clear_reference(struct vm_page *pg) 2605{ 2606 struct pv_entry *pv; 2607 struct pmap_page * const pp = VM_PAGE_TO_PP(pg); 2608 pt_entry_t *ptep, pte, opte; 2609 vaddr_t va; 2610#ifdef ARMV81_HAFDBS 2611 bool referenced; 2612#endif 2613 2614 UVMHIST_FUNC(__func__); 2615 UVMHIST_CALLARGS(pmaphist, "pg=%p, pp=%p, flags=%08x", 2616 pg, pp, (int)(pp->pp_pv.pv_va & (PAGE_SIZE - 1)), 0); 2617 2618 pmap_pv_lock(pp); 2619 2620 if ( 2621#ifdef ARMV81_HAFDBS 2622 aarch64_hafdbs_enabled == ID_AA64MMFR1_EL1_HAFDBS_NONE && 2623#endif 2624 (pp->pp_pv.pv_va & VM_PROT_READ) == 0) { 2625 pmap_pv_unlock(pp); 2626 return false; 2627 } 2628#ifdef ARMV81_HAFDBS 2629 referenced = ((pp->pp_pv.pv_va & VM_PROT_READ) != 0); 2630#endif 2631 pp->pp_pv.pv_va &= ~(vaddr_t)VM_PROT_READ; 2632 2633 PMAP_COUNT(clear_reference); 2634 for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) { 2635 if (pv->pv_pmap == NULL) { 2636 KASSERT(pv == &pp->pp_pv); 2637 continue; 2638 } 2639 2640 PMAP_COUNT(clear_reference_pages); 2641 2642 va = trunc_page(pv->pv_va); 2643 2644 ptep = pv->pv_ptep; 2645 opte = pte = *ptep; 2646 tryagain: 2647 if (!l3pte_valid(pte)) 2648 continue; 2649 if ((pte & LX_BLKPAG_AF) == 0) 2650 continue; 2651#ifdef ARMV81_HAFDBS 2652 referenced = true; 2653#endif 2654 /* clear access permission */ 2655 pte &= ~LX_BLKPAG_AF; 2656 2657 /* XXX: possible deadlock if using PM_LOCK(). this is racy */ 2658 if ((pte = atomic_cas_64(ptep, opte, pte)) != opte) { 2659 opte = pte; 2660 goto tryagain; 2661 } 2662 2663 struct pmap * const pm = pv->pv_pmap; 2664 struct pmap_asid_info * const pai = PMAP_PAI(pm, cpu_tlb_info(ci)); 2665 AARCH64_TLBI_BY_ASID_VA(pai->pai_asid, va); 2666 2667 UVMHIST_LOG(pmaphist, "va=%016llx, ptep=%p, pa=%016lx, unse AF", 2668 va, ptep, l3pte_pa(pte), 0); 2669 } 2670 2671 pmap_pv_unlock(pp); 2672 2673#ifdef ARMV81_HAFDBS 2674 return referenced; 2675#else 2676 return true; 2677#endif 2678} 2679 2680bool 2681pmap_is_modified(struct vm_page *pg) 2682{ 2683 struct pmap_page * const pp = VM_PAGE_TO_PP(pg); 2684 2685 if (pp->pp_pv.pv_va & VM_PROT_WRITE) 2686 return true; 2687 2688#ifdef ARMV81_HAFDBS 2689 /* check hardware dirty flag on each pte */ 2690 if (aarch64_hafdbs_enabled == ID_AA64MMFR1_EL1_HAFDBS_AD) { 2691 struct pv_entry *pv; 2692 pt_entry_t *ptep, pte; 2693 2694 pmap_pv_lock(pp); 2695 for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) { 2696 if (pv->pv_pmap == NULL) { 2697 KASSERT(pv == &pp->pp_pv); 2698 continue; 2699 } 2700 2701 ptep = pv->pv_ptep; 2702 pte = *ptep; 2703 if (!l3pte_valid(pte)) 2704 continue; 2705 2706 if ((pte & LX_BLKPAG_AP) == LX_BLKPAG_AP_RW) { 2707 pp->pp_pv.pv_va |= VM_PROT_WRITE; 2708 pmap_pv_unlock(pp); 2709 return true; 2710 } 2711 } 2712 pmap_pv_unlock(pp); 2713 } 2714#endif 2715 2716 return false; 2717} 2718 2719bool 2720pmap_is_referenced(struct vm_page *pg) 2721{ 2722 struct pmap_page * const pp = VM_PAGE_TO_PP(pg); 2723 2724 if (pp->pp_pv.pv_va & VM_PROT_READ) 2725 return true; 2726 2727#ifdef ARMV81_HAFDBS 2728 /* check hardware access flag on each pte */ 2729 if (aarch64_hafdbs_enabled != ID_AA64MMFR1_EL1_HAFDBS_NONE) { 2730 struct pv_entry *pv; 2731 pt_entry_t *ptep, pte; 2732 2733 pmap_pv_lock(pp); 2734 for (pv = &pp->pp_pv; pv != NULL; pv = pv->pv_next) { 2735 if (pv->pv_pmap == NULL) { 2736 KASSERT(pv == &pp->pp_pv); 2737 continue; 2738 } 2739 2740 ptep = pv->pv_ptep; 2741 pte = *ptep; 2742 if (!l3pte_valid(pte)) 2743 continue; 2744 2745 if (pte & LX_BLKPAG_AF) { 2746 pp->pp_pv.pv_va |= VM_PROT_READ; 2747 pmap_pv_unlock(pp); 2748 return true; 2749 } 2750 } 2751 pmap_pv_unlock(pp); 2752 } 2753#endif 2754 2755 return false; 2756} 2757 2758/* get pointer to kernel segment L2 or L3 table entry */ 2759pt_entry_t * 2760kvtopte(vaddr_t va) 2761{ 2762 KASSERT(IN_RANGE(va, VM_MIN_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS)); 2763 2764 return _pmap_pte_lookup_bs(pmap_kernel(), va, NULL); 2765} 2766 2767#ifdef DDB 2768void 2769pmap_db_pmap_print(struct pmap *pm, 2770 void (*pr)(const char *, ...) __printflike(1, 2)) 2771{ 2772 struct pmap_asid_info * const pai = PMAP_PAI(pm, cpu_tlb_info(ci)); 2773 2774 pr(" pm_asid = %d\n", pai->pai_asid); 2775 pr(" pm_l0table = %p\n", pm->pm_l0table); 2776 pr(" pm_l0table_pa = %lx\n", pm->pm_l0table_pa); 2777 pr(" pm_activated = %d\n\n", pm->pm_activated); 2778} 2779#endif /* DDB */ 2780