pmap.c revision 295425
194575Sdes/*- 294575Sdes * Copyright (c) 1991 Regents of the University of California. 394575Sdes * All rights reserved. 494575Sdes * Copyright (c) 1994 John S. Dyson 594575Sdes * All rights reserved. 694575Sdes * Copyright (c) 1994 David Greenman 794575Sdes * All rights reserved. 894575Sdes * Copyright (c) 2003 Peter Wemm 994575Sdes * All rights reserved. 1094575Sdes * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 1194575Sdes * All rights reserved. 1294575Sdes * Copyright (c) 2014 Andrew Turner 1394575Sdes * All rights reserved. 1494575Sdes * Copyright (c) 2014 The FreeBSD Foundation 1594575Sdes * All rights reserved. 1694575Sdes * 1794575Sdes * This code is derived from software contributed to Berkeley by 1894575Sdes * the Systems Programming Group of the University of Utah Computer 1994575Sdes * Science Department and William Jolitz of UUNET Technologies Inc. 2094575Sdes * 2194575Sdes * This software was developed by Andrew Turner under sponsorship from 2294575Sdes * the FreeBSD Foundation. 2394575Sdes * 2494575Sdes * Redistribution and use in source and binary forms, with or without 2594575Sdes * modification, are permitted provided that the following conditions 2694575Sdes * are met: 2794575Sdes * 1. Redistributions of source code must retain the above copyright 2894575Sdes * notice, this list of conditions and the following disclaimer. 2994575Sdes * 2. Redistributions in binary form must reproduce the above copyright 3094575Sdes * notice, this list of conditions and the following disclaimer in the 3194575Sdes * documentation and/or other materials provided with the distribution. 3294575Sdes * 3. All advertising materials mentioning features or use of this software 3394575Sdes * must display the following acknowledgement: 3494575Sdes * This product includes software developed by the University of 3594575Sdes * California, Berkeley and its contributors. 3694575Sdes * 4. Neither the name of the University nor the names of its contributors 3794575Sdes * may be used to endorse or promote products derived from this software 3894575Sdes * without specific prior written permission. 3994575Sdes * 4094575Sdes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 4194575Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 4294575Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 4394575Sdes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 4494575Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 4594575Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 4694575Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 4794575Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 4894575Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 4994575Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 5094575Sdes * SUCH DAMAGE. 5194575Sdes * 52 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53 */ 54/*- 55 * Copyright (c) 2003 Networks Associates Technology, Inc. 56 * All rights reserved. 57 * 58 * This software was developed for the FreeBSD Project by Jake Burkholder, 59 * Safeport Network Services, and Network Associates Laboratories, the 60 * Security Research Division of Network Associates, Inc. under 61 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62 * CHATS research program. 63 * 64 * Redistribution and use in source and binary forms, with or without 65 * modification, are permitted provided that the following conditions 66 * are met: 67 * 1. Redistributions of source code must retain the above copyright 68 * notice, this list of conditions and the following disclaimer. 69 * 2. Redistributions in binary form must reproduce the above copyright 70 * notice, this list of conditions and the following disclaimer in the 71 * documentation and/or other materials provided with the distribution. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 */ 85 86#include <sys/cdefs.h> 87__FBSDID("$FreeBSD: head/sys/arm64/arm64/pmap.c 295425 2016-02-09 06:26:27Z wma $"); 88 89/* 90 * Manages physical address maps. 91 * 92 * Since the information managed by this module is 93 * also stored by the logical address mapping module, 94 * this module may throw away valid virtual-to-physical 95 * mappings at almost any time. However, invalidations 96 * of virtual-to-physical mappings must be done as 97 * requested. 98 * 99 * In order to cope with hardware architectures which 100 * make virtual-to-physical map invalidates expensive, 101 * this module may delay invalidate or reduced protection 102 * operations until such time as they are actually 103 * necessary. This module is given full information as 104 * to which processors are currently using which maps, 105 * and to when physical maps must be made correct. 106 */ 107 108#include <sys/param.h> 109#include <sys/bus.h> 110#include <sys/systm.h> 111#include <sys/kernel.h> 112#include <sys/ktr.h> 113#include <sys/lock.h> 114#include <sys/malloc.h> 115#include <sys/mman.h> 116#include <sys/msgbuf.h> 117#include <sys/mutex.h> 118#include <sys/proc.h> 119#include <sys/rwlock.h> 120#include <sys/sx.h> 121#include <sys/vmem.h> 122#include <sys/vmmeter.h> 123#include <sys/sched.h> 124#include <sys/sysctl.h> 125#include <sys/_unrhdr.h> 126#include <sys/smp.h> 127 128#include <vm/vm.h> 129#include <vm/vm_param.h> 130#include <vm/vm_kern.h> 131#include <vm/vm_page.h> 132#include <vm/vm_map.h> 133#include <vm/vm_object.h> 134#include <vm/vm_extern.h> 135#include <vm/vm_pageout.h> 136#include <vm/vm_pager.h> 137#include <vm/vm_radix.h> 138#include <vm/vm_reserv.h> 139#include <vm/uma.h> 140 141#include <machine/machdep.h> 142#include <machine/md_var.h> 143#include <machine/pcb.h> 144 145#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) 146#define NUPDE (NPDEPG * NPDEPG) 147#define NUSERPGTBLS (NUPDE + NPDEPG) 148 149#if !defined(DIAGNOSTIC) 150#ifdef __GNUC_GNU_INLINE__ 151#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 152#else 153#define PMAP_INLINE extern inline 154#endif 155#else 156#define PMAP_INLINE 157#endif 158 159/* 160 * These are configured by the mair_el1 register. This is set up in locore.S 161 */ 162#define DEVICE_MEMORY 0 163#define UNCACHED_MEMORY 1 164#define CACHED_MEMORY 2 165 166 167#ifdef PV_STATS 168#define PV_STAT(x) do { x ; } while (0) 169#else 170#define PV_STAT(x) do { } while (0) 171#endif 172 173#define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 174 175#define NPV_LIST_LOCKS MAXCPU 176 177#define PHYS_TO_PV_LIST_LOCK(pa) \ 178 (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 179 180#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 181 struct rwlock **_lockp = (lockp); \ 182 struct rwlock *_new_lock; \ 183 \ 184 _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 185 if (_new_lock != *_lockp) { \ 186 if (*_lockp != NULL) \ 187 rw_wunlock(*_lockp); \ 188 *_lockp = _new_lock; \ 189 rw_wlock(*_lockp); \ 190 } \ 191} while (0) 192 193#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 194 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 195 196#define RELEASE_PV_LIST_LOCK(lockp) do { \ 197 struct rwlock **_lockp = (lockp); \ 198 \ 199 if (*_lockp != NULL) { \ 200 rw_wunlock(*_lockp); \ 201 *_lockp = NULL; \ 202 } \ 203} while (0) 204 205#define VM_PAGE_TO_PV_LIST_LOCK(m) \ 206 PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 207 208struct pmap kernel_pmap_store; 209 210vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 211vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 212vm_offset_t kernel_vm_end = 0; 213 214struct msgbuf *msgbufp = NULL; 215 216static struct rwlock_padalign pvh_global_lock; 217 218vm_paddr_t dmap_phys_base; /* The start of the dmap region */ 219 220/* 221 * Data for the pv entry allocation mechanism 222 */ 223static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 224static struct mtx pv_chunks_mutex; 225static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 226 227static void free_pv_chunk(struct pv_chunk *pc); 228static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 229static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 230static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 231static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 232static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 233 vm_offset_t va); 234static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 235 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 236static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 237 pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 238static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 239 vm_page_t m, struct rwlock **lockp); 240 241static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 242 struct rwlock **lockp); 243 244static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 245 struct spglist *free); 246static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 247 248/* 249 * These load the old table data and store the new value. 250 * They need to be atomic as the System MMU may write to the table at 251 * the same time as the CPU. 252 */ 253#define pmap_load_store(table, entry) atomic_swap_64(table, entry) 254#define pmap_set(table, mask) atomic_set_64(table, mask) 255#define pmap_load_clear(table) atomic_swap_64(table, 0) 256#define pmap_load(table) (*table) 257 258/********************/ 259/* Inline functions */ 260/********************/ 261 262static __inline void 263pagecopy(void *s, void *d) 264{ 265 266 memcpy(d, s, PAGE_SIZE); 267} 268 269static __inline void 270pagezero(void *p) 271{ 272 273 bzero(p, PAGE_SIZE); 274} 275 276#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 277#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 278#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 279 280static __inline pd_entry_t * 281pmap_l1(pmap_t pmap, vm_offset_t va) 282{ 283 284 return (&pmap->pm_l1[pmap_l1_index(va)]); 285} 286 287static __inline pd_entry_t * 288pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 289{ 290 pd_entry_t *l2; 291 292 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 293 return (&l2[pmap_l2_index(va)]); 294} 295 296static __inline pd_entry_t * 297pmap_l2(pmap_t pmap, vm_offset_t va) 298{ 299 pd_entry_t *l1; 300 301 l1 = pmap_l1(pmap, va); 302 if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) 303 return (NULL); 304 305 return (pmap_l1_to_l2(l1, va)); 306} 307 308static __inline pt_entry_t * 309pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 310{ 311 pt_entry_t *l3; 312 313 l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); 314 return (&l3[pmap_l3_index(va)]); 315} 316 317static __inline pt_entry_t * 318pmap_l3(pmap_t pmap, vm_offset_t va) 319{ 320 pd_entry_t *l2; 321 322 l2 = pmap_l2(pmap, va); 323 if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) 324 return (NULL); 325 326 return (pmap_l2_to_l3(l2, va)); 327} 328 329bool 330pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2, 331 pt_entry_t **l3) 332{ 333 pd_entry_t *l1p, *l2p; 334 335 if (pmap->pm_l1 == NULL) 336 return (false); 337 338 l1p = pmap_l1(pmap, va); 339 *l1 = l1p; 340 341 if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { 342 *l2 = NULL; 343 *l3 = NULL; 344 return (true); 345 } 346 347 if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) 348 return (false); 349 350 l2p = pmap_l1_to_l2(l1p, va); 351 *l2 = l2p; 352 353 if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { 354 *l3 = NULL; 355 return (true); 356 } 357 358 *l3 = pmap_l2_to_l3(l2p, va); 359 360 return (true); 361} 362 363static __inline int 364pmap_is_current(pmap_t pmap) 365{ 366 367 return ((pmap == pmap_kernel()) || 368 (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 369} 370 371static __inline int 372pmap_l3_valid(pt_entry_t l3) 373{ 374 375 return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 376} 377 378static __inline int 379pmap_l3_valid_cacheable(pt_entry_t l3) 380{ 381 382 return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && 383 ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 384} 385 386#define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 387 388/* 389 * Checks if the page is dirty. We currently lack proper tracking of this on 390 * arm64 so for now assume is a page mapped as rw was accessed it is. 391 */ 392static inline int 393pmap_page_dirty(pt_entry_t pte) 394{ 395 396 return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 397 (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 398} 399 400static __inline void 401pmap_resident_count_inc(pmap_t pmap, int count) 402{ 403 404 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 405 pmap->pm_stats.resident_count += count; 406} 407 408static __inline void 409pmap_resident_count_dec(pmap_t pmap, int count) 410{ 411 412 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 413 KASSERT(pmap->pm_stats.resident_count >= count, 414 ("pmap %p resident count underflow %ld %d", pmap, 415 pmap->pm_stats.resident_count, count)); 416 pmap->pm_stats.resident_count -= count; 417} 418 419static pt_entry_t * 420pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 421 u_int *l2_slot) 422{ 423 pt_entry_t *l2; 424 pd_entry_t *l1; 425 426 l1 = (pd_entry_t *)l1pt; 427 *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 428 429 /* Check locore has used a table L1 map */ 430 KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 431 ("Invalid bootstrap L1 table")); 432 /* Find the address of the L2 table */ 433 l2 = (pt_entry_t *)init_pt_va; 434 *l2_slot = pmap_l2_index(va); 435 436 return (l2); 437} 438 439static vm_paddr_t 440pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 441{ 442 u_int l1_slot, l2_slot; 443 pt_entry_t *l2; 444 445 l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 446 447 return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 448} 449 450static void 451pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) 452{ 453 vm_offset_t va; 454 vm_paddr_t pa; 455 pd_entry_t *l1; 456 u_int l1_slot; 457 458 pa = dmap_phys_base = kernstart & ~L1_OFFSET; 459 va = DMAP_MIN_ADDRESS; 460 l1 = (pd_entry_t *)l1pt; 461 l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); 462 463 for (; va < DMAP_MAX_ADDRESS; 464 pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 465 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 466 467 pmap_load_store(&l1[l1_slot], 468 (pa & ~L1_OFFSET) | ATTR_DEFAULT | 469 ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); 470 } 471 472 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 473 cpu_tlb_flushID(); 474} 475 476static vm_offset_t 477pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 478{ 479 vm_offset_t l2pt; 480 vm_paddr_t pa; 481 pd_entry_t *l1; 482 u_int l1_slot; 483 484 KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 485 486 l1 = (pd_entry_t *)l1pt; 487 l1_slot = pmap_l1_index(va); 488 l2pt = l2_start; 489 490 for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 491 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 492 493 pa = pmap_early_vtophys(l1pt, l2pt); 494 pmap_load_store(&l1[l1_slot], 495 (pa & ~Ln_TABLE_MASK) | L1_TABLE); 496 l2pt += PAGE_SIZE; 497 } 498 499 /* Clean the L2 page table */ 500 memset((void *)l2_start, 0, l2pt - l2_start); 501 cpu_dcache_wb_range(l2_start, l2pt - l2_start); 502 503 /* Flush the l1 table to ram */ 504 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 505 506 return l2pt; 507} 508 509static vm_offset_t 510pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 511{ 512 vm_offset_t l2pt, l3pt; 513 vm_paddr_t pa; 514 pd_entry_t *l2; 515 u_int l2_slot; 516 517 KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 518 519 l2 = pmap_l2(kernel_pmap, va); 520 l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); 521 l2pt = (vm_offset_t)l2; 522 l2_slot = pmap_l2_index(va); 523 l3pt = l3_start; 524 525 for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 526 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 527 528 pa = pmap_early_vtophys(l1pt, l3pt); 529 pmap_load_store(&l2[l2_slot], 530 (pa & ~Ln_TABLE_MASK) | L2_TABLE); 531 l3pt += PAGE_SIZE; 532 } 533 534 /* Clean the L2 page table */ 535 memset((void *)l3_start, 0, l3pt - l3_start); 536 cpu_dcache_wb_range(l3_start, l3pt - l3_start); 537 538 cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 539 540 return l3pt; 541} 542 543/* 544 * Bootstrap the system enough to run with virtual memory. 545 */ 546void 547pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) 548{ 549 u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 550 uint64_t kern_delta; 551 pt_entry_t *l2; 552 vm_offset_t va, freemempos; 553 vm_offset_t dpcpu, msgbufpv; 554 vm_paddr_t pa, min_pa; 555 int i; 556 557 kern_delta = KERNBASE - kernstart; 558 physmem = 0; 559 560 printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 561 printf("%lx\n", l1pt); 562 printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 563 564 /* Set this early so we can use the pagetable walking functions */ 565 kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; 566 PMAP_LOCK_INIT(kernel_pmap); 567 568 /* 569 * Initialize the global pv list lock. 570 */ 571 rw_init(&pvh_global_lock, "pmap pv global"); 572 573 /* Assume the address we were loaded to is a valid physical address */ 574 min_pa = KERNBASE - kern_delta; 575 576 /* 577 * Find the minimum physical address. physmap is sorted, 578 * but may contain empty ranges. 579 */ 580 for (i = 0; i < (physmap_idx * 2); i += 2) { 581 if (physmap[i] == physmap[i + 1]) 582 continue; 583 if (physmap[i] <= min_pa) 584 min_pa = physmap[i]; 585 break; 586 } 587 588 /* Create a direct map region early so we can use it for pa -> va */ 589 pmap_bootstrap_dmap(l1pt, min_pa); 590 591 va = KERNBASE; 592 pa = KERNBASE - kern_delta; 593 594 /* 595 * Start to initialise phys_avail by copying from physmap 596 * up to the physical address KERNBASE points at. 597 */ 598 map_slot = avail_slot = 0; 599 for (; map_slot < (physmap_idx * 2) && 600 avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) { 601 if (physmap[map_slot] == physmap[map_slot + 1]) 602 continue; 603 604 if (physmap[map_slot] <= pa && 605 physmap[map_slot + 1] > pa) 606 break; 607 608 phys_avail[avail_slot] = physmap[map_slot]; 609 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 610 physmem += (phys_avail[avail_slot + 1] - 611 phys_avail[avail_slot]) >> PAGE_SHIFT; 612 avail_slot += 2; 613 } 614 615 /* Add the memory before the kernel */ 616 if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) { 617 phys_avail[avail_slot] = physmap[map_slot]; 618 phys_avail[avail_slot + 1] = pa; 619 physmem += (phys_avail[avail_slot + 1] - 620 phys_avail[avail_slot]) >> PAGE_SHIFT; 621 avail_slot += 2; 622 } 623 used_map_slot = map_slot; 624 625 /* 626 * Read the page table to find out what is already mapped. 627 * This assumes we have mapped a block of memory from KERNBASE 628 * using a single L1 entry. 629 */ 630 l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 631 632 /* Sanity check the index, KERNBASE should be the first VA */ 633 KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 634 635 /* Find how many pages we have mapped */ 636 for (; l2_slot < Ln_ENTRIES; l2_slot++) { 637 if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 638 break; 639 640 /* Check locore used L2 blocks */ 641 KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 642 ("Invalid bootstrap L2 table")); 643 KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 644 ("Incorrect PA in L2 table")); 645 646 va += L2_SIZE; 647 pa += L2_SIZE; 648 } 649 650 va = roundup2(va, L1_SIZE); 651 652 freemempos = KERNBASE + kernlen; 653 freemempos = roundup2(freemempos, PAGE_SIZE); 654 /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 655 freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 656 /* And the l3 tables for the early devmap */ 657 freemempos = pmap_bootstrap_l3(l1pt, 658 VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 659 660 cpu_tlb_flushID(); 661 662#define alloc_pages(var, np) \ 663 (var) = freemempos; \ 664 freemempos += (np * PAGE_SIZE); \ 665 memset((char *)(var), 0, ((np) * PAGE_SIZE)); 666 667 /* Allocate dynamic per-cpu area. */ 668 alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 669 dpcpu_init((void *)dpcpu, 0); 670 671 /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 672 alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 673 msgbufp = (void *)msgbufpv; 674 675 virtual_avail = roundup2(freemempos, L1_SIZE); 676 virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 677 kernel_vm_end = virtual_avail; 678 679 pa = pmap_early_vtophys(l1pt, freemempos); 680 681 /* Finish initialising physmap */ 682 map_slot = used_map_slot; 683 for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 684 map_slot < (physmap_idx * 2); map_slot += 2) { 685 if (physmap[map_slot] == physmap[map_slot + 1]) 686 continue; 687 688 /* Have we used the current range? */ 689 if (physmap[map_slot + 1] <= pa) 690 continue; 691 692 /* Do we need to split the entry? */ 693 if (physmap[map_slot] < pa) { 694 phys_avail[avail_slot] = pa; 695 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 696 } else { 697 phys_avail[avail_slot] = physmap[map_slot]; 698 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 699 } 700 physmem += (phys_avail[avail_slot + 1] - 701 phys_avail[avail_slot]) >> PAGE_SHIFT; 702 703 avail_slot += 2; 704 } 705 phys_avail[avail_slot] = 0; 706 phys_avail[avail_slot + 1] = 0; 707 708 /* 709 * Maxmem isn't the "maximum memory", it's one larger than the 710 * highest page of the physical address space. It should be 711 * called something like "Maxphyspage". 712 */ 713 Maxmem = atop(phys_avail[avail_slot - 1]); 714 715 cpu_tlb_flushID(); 716} 717 718/* 719 * Initialize a vm_page's machine-dependent fields. 720 */ 721void 722pmap_page_init(vm_page_t m) 723{ 724 725 TAILQ_INIT(&m->md.pv_list); 726 m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 727} 728 729/* 730 * Initialize the pmap module. 731 * Called by vm_init, to initialize any structures that the pmap 732 * system needs to map virtual memory. 733 */ 734void 735pmap_init(void) 736{ 737 int i; 738 739 /* 740 * Initialize the pv chunk list mutex. 741 */ 742 mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 743 744 /* 745 * Initialize the pool of pv list locks. 746 */ 747 for (i = 0; i < NPV_LIST_LOCKS; i++) 748 rw_init(&pv_list_locks[i], "pmap pv list"); 749} 750 751/* 752 * Normal, non-SMP, invalidation functions. 753 * We inline these within pmap.c for speed. 754 */ 755PMAP_INLINE void 756pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 757{ 758 759 sched_pin(); 760 __asm __volatile( 761 "dsb sy \n" 762 "tlbi vaae1is, %0 \n" 763 "dsb sy \n" 764 "isb \n" 765 : : "r"(va >> PAGE_SHIFT)); 766 sched_unpin(); 767} 768 769PMAP_INLINE void 770pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 771{ 772 vm_offset_t addr; 773 774 sched_pin(); 775 sva >>= PAGE_SHIFT; 776 eva >>= PAGE_SHIFT; 777 __asm __volatile("dsb sy"); 778 for (addr = sva; addr < eva; addr++) { 779 __asm __volatile( 780 "tlbi vaae1is, %0" : : "r"(addr)); 781 } 782 __asm __volatile( 783 "dsb sy \n" 784 "isb \n"); 785 sched_unpin(); 786} 787 788PMAP_INLINE void 789pmap_invalidate_all(pmap_t pmap) 790{ 791 792 sched_pin(); 793 __asm __volatile( 794 "dsb sy \n" 795 "tlbi vmalle1is \n" 796 "dsb sy \n" 797 "isb \n"); 798 sched_unpin(); 799} 800 801/* 802 * Routine: pmap_extract 803 * Function: 804 * Extract the physical page address associated 805 * with the given map/virtual_address pair. 806 */ 807vm_paddr_t 808pmap_extract(pmap_t pmap, vm_offset_t va) 809{ 810 pd_entry_t *l2p, l2; 811 pt_entry_t *l3p, l3; 812 vm_paddr_t pa; 813 814 pa = 0; 815 PMAP_LOCK(pmap); 816 /* 817 * Start with the l2 tabel. We are unable to allocate 818 * pages in the l1 table. 819 */ 820 l2p = pmap_l2(pmap, va); 821 if (l2p != NULL) { 822 l2 = pmap_load(l2p); 823 if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) { 824 l3p = pmap_l2_to_l3(l2p, va); 825 if (l3p != NULL) { 826 l3 = pmap_load(l3p); 827 828 if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) 829 pa = (l3 & ~ATTR_MASK) | 830 (va & L3_OFFSET); 831 } 832 } else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) 833 pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET); 834 } 835 PMAP_UNLOCK(pmap); 836 return (pa); 837} 838 839/* 840 * Routine: pmap_extract_and_hold 841 * Function: 842 * Atomically extract and hold the physical page 843 * with the given pmap and virtual address pair 844 * if that mapping permits the given protection. 845 */ 846vm_page_t 847pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 848{ 849 pt_entry_t *l3p, l3; 850 vm_paddr_t pa; 851 vm_page_t m; 852 853 pa = 0; 854 m = NULL; 855 PMAP_LOCK(pmap); 856retry: 857 l3p = pmap_l3(pmap, va); 858 if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { 859 if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 860 ((prot & VM_PROT_WRITE) == 0)) { 861 if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa)) 862 goto retry; 863 m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK); 864 vm_page_hold(m); 865 } 866 } 867 PA_UNLOCK_COND(pa); 868 PMAP_UNLOCK(pmap); 869 return (m); 870} 871 872vm_paddr_t 873pmap_kextract(vm_offset_t va) 874{ 875 pd_entry_t *l2p, l2; 876 pt_entry_t *l3; 877 vm_paddr_t pa; 878 879 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 880 pa = DMAP_TO_PHYS(va); 881 } else { 882 l2p = pmap_l2(kernel_pmap, va); 883 if (l2p == NULL) 884 panic("pmap_kextract: No l2"); 885 l2 = pmap_load(l2p); 886 if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) 887 return ((l2 & ~ATTR_MASK) | 888 (va & L2_OFFSET)); 889 890 l3 = pmap_l2_to_l3(l2p, va); 891 if (l3 == NULL) 892 panic("pmap_kextract: No l3..."); 893 pa = (pmap_load(l3) & ~ATTR_MASK) | (va & PAGE_MASK); 894 } 895 return (pa); 896} 897 898/*************************************************** 899 * Low level mapping routines..... 900 ***************************************************/ 901 902void 903pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 904{ 905 pt_entry_t *l3; 906 vm_offset_t va; 907 908 KASSERT((pa & L3_OFFSET) == 0, 909 ("pmap_kenter_device: Invalid physical address")); 910 KASSERT((sva & L3_OFFSET) == 0, 911 ("pmap_kenter_device: Invalid virtual address")); 912 KASSERT((size & PAGE_MASK) == 0, 913 ("pmap_kenter_device: Mapping is not page-sized")); 914 915 va = sva; 916 while (size != 0) { 917 l3 = pmap_l3(kernel_pmap, va); 918 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 919 pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT | 920 ATTR_IDX(DEVICE_MEMORY) | L3_PAGE); 921 PTE_SYNC(l3); 922 923 va += PAGE_SIZE; 924 pa += PAGE_SIZE; 925 size -= PAGE_SIZE; 926 } 927 pmap_invalidate_range(kernel_pmap, sva, va); 928} 929 930/* 931 * Remove a page from the kernel pagetables. 932 * Note: not SMP coherent. 933 */ 934PMAP_INLINE void 935pmap_kremove(vm_offset_t va) 936{ 937 pt_entry_t *l3; 938 939 l3 = pmap_l3(kernel_pmap, va); 940 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 941 942 if (pmap_l3_valid_cacheable(pmap_load(l3))) 943 cpu_dcache_wb_range(va, L3_SIZE); 944 pmap_load_clear(l3); 945 PTE_SYNC(l3); 946 pmap_invalidate_page(kernel_pmap, va); 947} 948 949void 950pmap_kremove_device(vm_offset_t sva, vm_size_t size) 951{ 952 pt_entry_t *l3; 953 vm_offset_t va; 954 955 KASSERT((sva & L3_OFFSET) == 0, 956 ("pmap_kremove_device: Invalid virtual address")); 957 KASSERT((size & PAGE_MASK) == 0, 958 ("pmap_kremove_device: Mapping is not page-sized")); 959 960 va = sva; 961 while (size != 0) { 962 l3 = pmap_l3(kernel_pmap, va); 963 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 964 pmap_load_clear(l3); 965 PTE_SYNC(l3); 966 967 va += PAGE_SIZE; 968 size -= PAGE_SIZE; 969 } 970 pmap_invalidate_range(kernel_pmap, sva, va); 971} 972 973/* 974 * Used to map a range of physical addresses into kernel 975 * virtual address space. 976 * 977 * The value passed in '*virt' is a suggested virtual address for 978 * the mapping. Architectures which can support a direct-mapped 979 * physical to virtual region can return the appropriate address 980 * within that region, leaving '*virt' unchanged. Other 981 * architectures should map the pages starting at '*virt' and 982 * update '*virt' with the first usable address after the mapped 983 * region. 984 */ 985vm_offset_t 986pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 987{ 988 return PHYS_TO_DMAP(start); 989} 990 991 992/* 993 * Add a list of wired pages to the kva 994 * this routine is only used for temporary 995 * kernel mappings that do not need to have 996 * page modification or references recorded. 997 * Note that old mappings are simply written 998 * over. The page *must* be wired. 999 * Note: SMP coherent. Uses a ranged shootdown IPI. 1000 */ 1001void 1002pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1003{ 1004 pt_entry_t *l3, pa; 1005 vm_offset_t va; 1006 vm_page_t m; 1007 int i; 1008 1009 va = sva; 1010 for (i = 0; i < count; i++) { 1011 m = ma[i]; 1012 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | 1013 ATTR_IDX(m->md.pv_memattr) | L3_PAGE; 1014 l3 = pmap_l3(kernel_pmap, va); 1015 pmap_load_store(l3, pa); 1016 PTE_SYNC(l3); 1017 1018 va += L3_SIZE; 1019 } 1020 pmap_invalidate_range(kernel_pmap, sva, va); 1021} 1022 1023/* 1024 * This routine tears out page mappings from the 1025 * kernel -- it is meant only for temporary mappings. 1026 * Note: SMP coherent. Uses a ranged shootdown IPI. 1027 */ 1028void 1029pmap_qremove(vm_offset_t sva, int count) 1030{ 1031 pt_entry_t *l3; 1032 vm_offset_t va; 1033 1034 KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1035 1036 va = sva; 1037 while (count-- > 0) { 1038 l3 = pmap_l3(kernel_pmap, va); 1039 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 1040 1041 if (pmap_l3_valid_cacheable(pmap_load(l3))) 1042 cpu_dcache_wb_range(va, L3_SIZE); 1043 pmap_load_clear(l3); 1044 PTE_SYNC(l3); 1045 1046 va += PAGE_SIZE; 1047 } 1048 pmap_invalidate_range(kernel_pmap, sva, va); 1049} 1050 1051/*************************************************** 1052 * Page table page management routines..... 1053 ***************************************************/ 1054static __inline void 1055pmap_free_zero_pages(struct spglist *free) 1056{ 1057 vm_page_t m; 1058 1059 while ((m = SLIST_FIRST(free)) != NULL) { 1060 SLIST_REMOVE_HEAD(free, plinks.s.ss); 1061 /* Preserve the page's PG_ZERO setting. */ 1062 vm_page_free_toq(m); 1063 } 1064} 1065 1066/* 1067 * Schedule the specified unused page table page to be freed. Specifically, 1068 * add the page to the specified list of pages that will be released to the 1069 * physical memory manager after the TLB has been updated. 1070 */ 1071static __inline void 1072pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1073 boolean_t set_PG_ZERO) 1074{ 1075 1076 if (set_PG_ZERO) 1077 m->flags |= PG_ZERO; 1078 else 1079 m->flags &= ~PG_ZERO; 1080 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1081} 1082 1083/* 1084 * Decrements a page table page's wire count, which is used to record the 1085 * number of valid page table entries within the page. If the wire count 1086 * drops to zero, then the page table page is unmapped. Returns TRUE if the 1087 * page table page was unmapped and FALSE otherwise. 1088 */ 1089static inline boolean_t 1090pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1091{ 1092 1093 --m->wire_count; 1094 if (m->wire_count == 0) { 1095 _pmap_unwire_l3(pmap, va, m, free); 1096 return (TRUE); 1097 } else 1098 return (FALSE); 1099} 1100 1101static void 1102_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1103{ 1104 1105 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1106 /* 1107 * unmap the page table page 1108 */ 1109 if (m->pindex >= NUPDE) { 1110 /* PD page */ 1111 pd_entry_t *l1; 1112 l1 = pmap_l1(pmap, va); 1113 pmap_load_clear(l1); 1114 PTE_SYNC(l1); 1115 } else { 1116 /* PTE page */ 1117 pd_entry_t *l2; 1118 l2 = pmap_l2(pmap, va); 1119 pmap_load_clear(l2); 1120 PTE_SYNC(l2); 1121 } 1122 pmap_resident_count_dec(pmap, 1); 1123 if (m->pindex < NUPDE) { 1124 /* We just released a PT, unhold the matching PD */ 1125 vm_page_t pdpg; 1126 1127 pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK); 1128 pmap_unwire_l3(pmap, va, pdpg, free); 1129 } 1130 pmap_invalidate_page(pmap, va); 1131 1132 /* 1133 * This is a release store so that the ordinary store unmapping 1134 * the page table page is globally performed before TLB shoot- 1135 * down is begun. 1136 */ 1137 atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1138 1139 /* 1140 * Put page on a list so that it is released after 1141 * *ALL* TLB shootdown is done 1142 */ 1143 pmap_add_delayed_free_list(m, free, TRUE); 1144} 1145 1146/* 1147 * After removing an l3 entry, this routine is used to 1148 * conditionally free the page, and manage the hold/wire counts. 1149 */ 1150static int 1151pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1152 struct spglist *free) 1153{ 1154 vm_page_t mpte; 1155 1156 if (va >= VM_MAXUSER_ADDRESS) 1157 return (0); 1158 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1159 mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1160 return (pmap_unwire_l3(pmap, va, mpte, free)); 1161} 1162 1163void 1164pmap_pinit0(pmap_t pmap) 1165{ 1166 1167 PMAP_LOCK_INIT(pmap); 1168 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1169 pmap->pm_l1 = kernel_pmap->pm_l1; 1170} 1171 1172int 1173pmap_pinit(pmap_t pmap) 1174{ 1175 vm_paddr_t l1phys; 1176 vm_page_t l1pt; 1177 1178 /* 1179 * allocate the l1 page 1180 */ 1181 while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | 1182 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1183 VM_WAIT; 1184 1185 l1phys = VM_PAGE_TO_PHYS(l1pt); 1186 pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); 1187 1188 if ((l1pt->flags & PG_ZERO) == 0) 1189 pagezero(pmap->pm_l1); 1190 1191 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1192 1193 return (1); 1194} 1195 1196/* 1197 * This routine is called if the desired page table page does not exist. 1198 * 1199 * If page table page allocation fails, this routine may sleep before 1200 * returning NULL. It sleeps only if a lock pointer was given. 1201 * 1202 * Note: If a page allocation fails at page table level two or three, 1203 * one or two pages may be held during the wait, only to be released 1204 * afterwards. This conservative approach is easily argued to avoid 1205 * race conditions. 1206 */ 1207static vm_page_t 1208_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1209{ 1210 vm_page_t m, /*pdppg, */pdpg; 1211 1212 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1213 1214 /* 1215 * Allocate a page table page. 1216 */ 1217 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1218 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1219 if (lockp != NULL) { 1220 RELEASE_PV_LIST_LOCK(lockp); 1221 PMAP_UNLOCK(pmap); 1222 rw_runlock(&pvh_global_lock); 1223 VM_WAIT; 1224 rw_rlock(&pvh_global_lock); 1225 PMAP_LOCK(pmap); 1226 } 1227 1228 /* 1229 * Indicate the need to retry. While waiting, the page table 1230 * page may have been allocated. 1231 */ 1232 return (NULL); 1233 } 1234 if ((m->flags & PG_ZERO) == 0) 1235 pmap_zero_page(m); 1236 1237 /* 1238 * Map the pagetable page into the process address space, if 1239 * it isn't already there. 1240 */ 1241 1242 if (ptepindex >= NUPDE) { 1243 pd_entry_t *l1; 1244 vm_pindex_t l1index; 1245 1246 l1index = ptepindex - NUPDE; 1247 l1 = &pmap->pm_l1[l1index]; 1248 pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1249 PTE_SYNC(l1); 1250 1251 } else { 1252 vm_pindex_t l1index; 1253 pd_entry_t *l1, *l2; 1254 1255 l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); 1256 l1 = &pmap->pm_l1[l1index]; 1257 if (pmap_load(l1) == 0) { 1258 /* recurse for allocating page dir */ 1259 if (_pmap_alloc_l3(pmap, NUPDE + l1index, 1260 lockp) == NULL) { 1261 --m->wire_count; 1262 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1263 vm_page_free_zero(m); 1264 return (NULL); 1265 } 1266 } else { 1267 pdpg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK); 1268 pdpg->wire_count++; 1269 } 1270 1271 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 1272 l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1273 pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); 1274 PTE_SYNC(l2); 1275 } 1276 1277 pmap_resident_count_inc(pmap, 1); 1278 1279 return (m); 1280} 1281 1282static vm_page_t 1283pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1284{ 1285 vm_pindex_t ptepindex; 1286 pd_entry_t *l2; 1287 vm_page_t m; 1288 1289 /* 1290 * Calculate pagetable page index 1291 */ 1292 ptepindex = pmap_l2_pindex(va); 1293retry: 1294 /* 1295 * Get the page directory entry 1296 */ 1297 l2 = pmap_l2(pmap, va); 1298 1299 /* 1300 * If the page table page is mapped, we just increment the 1301 * hold count, and activate it. 1302 */ 1303 if (l2 != NULL && pmap_load(l2) != 0) { 1304 m = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); 1305 m->wire_count++; 1306 } else { 1307 /* 1308 * Here if the pte page isn't mapped, or if it has been 1309 * deallocated. 1310 */ 1311 m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1312 if (m == NULL && lockp != NULL) 1313 goto retry; 1314 } 1315 return (m); 1316} 1317 1318 1319/*************************************************** 1320 * Pmap allocation/deallocation routines. 1321 ***************************************************/ 1322 1323/* 1324 * Release any resources held by the given physical map. 1325 * Called when a pmap initialized by pmap_pinit is being released. 1326 * Should only be called if the map contains no valid mappings. 1327 */ 1328void 1329pmap_release(pmap_t pmap) 1330{ 1331 vm_page_t m; 1332 1333 KASSERT(pmap->pm_stats.resident_count == 0, 1334 ("pmap_release: pmap resident count %ld != 0", 1335 pmap->pm_stats.resident_count)); 1336 1337 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); 1338 1339 m->wire_count--; 1340 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1341 vm_page_free_zero(m); 1342} 1343 1344#if 0 1345static int 1346kvm_size(SYSCTL_HANDLER_ARGS) 1347{ 1348 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1349 1350 return sysctl_handle_long(oidp, &ksize, 0, req); 1351} 1352SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1353 0, 0, kvm_size, "LU", "Size of KVM"); 1354 1355static int 1356kvm_free(SYSCTL_HANDLER_ARGS) 1357{ 1358 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1359 1360 return sysctl_handle_long(oidp, &kfree, 0, req); 1361} 1362SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1363 0, 0, kvm_free, "LU", "Amount of KVM free"); 1364#endif /* 0 */ 1365 1366/* 1367 * grow the number of kernel page table entries, if needed 1368 */ 1369void 1370pmap_growkernel(vm_offset_t addr) 1371{ 1372 vm_paddr_t paddr; 1373 vm_page_t nkpg; 1374 pd_entry_t *l1, *l2; 1375 1376 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1377 1378 addr = roundup2(addr, L2_SIZE); 1379 if (addr - 1 >= kernel_map->max_offset) 1380 addr = kernel_map->max_offset; 1381 while (kernel_vm_end < addr) { 1382 l1 = pmap_l1(kernel_pmap, kernel_vm_end); 1383 if (pmap_load(l1) == 0) { 1384 /* We need a new PDP entry */ 1385 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1386 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1387 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1388 if (nkpg == NULL) 1389 panic("pmap_growkernel: no memory to grow kernel"); 1390 if ((nkpg->flags & PG_ZERO) == 0) 1391 pmap_zero_page(nkpg); 1392 paddr = VM_PAGE_TO_PHYS(nkpg); 1393 pmap_load_store(l1, paddr | L1_TABLE); 1394 PTE_SYNC(l1); 1395 continue; /* try again */ 1396 } 1397 l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1398 if ((pmap_load(l2) & ATTR_AF) != 0) { 1399 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1400 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1401 kernel_vm_end = kernel_map->max_offset; 1402 break; 1403 } 1404 continue; 1405 } 1406 1407 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1408 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1409 VM_ALLOC_ZERO); 1410 if (nkpg == NULL) 1411 panic("pmap_growkernel: no memory to grow kernel"); 1412 if ((nkpg->flags & PG_ZERO) == 0) 1413 pmap_zero_page(nkpg); 1414 paddr = VM_PAGE_TO_PHYS(nkpg); 1415 pmap_load_store(l2, paddr | L2_TABLE); 1416 PTE_SYNC(l2); 1417 pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1418 1419 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1420 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1421 kernel_vm_end = kernel_map->max_offset; 1422 break; 1423 } 1424 } 1425} 1426 1427 1428/*************************************************** 1429 * page management routines. 1430 ***************************************************/ 1431 1432CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1433CTASSERT(_NPCM == 3); 1434CTASSERT(_NPCPV == 168); 1435 1436static __inline struct pv_chunk * 1437pv_to_chunk(pv_entry_t pv) 1438{ 1439 1440 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1441} 1442 1443#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1444 1445#define PC_FREE0 0xfffffffffffffffful 1446#define PC_FREE1 0xfffffffffffffffful 1447#define PC_FREE2 0x000000fffffffffful 1448 1449static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1450 1451#if 0 1452#ifdef PV_STATS 1453static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1454 1455SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1456 "Current number of pv entry chunks"); 1457SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1458 "Current number of pv entry chunks allocated"); 1459SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1460 "Current number of pv entry chunks frees"); 1461SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1462 "Number of times tried to get a chunk page but failed."); 1463 1464static long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1465static int pv_entry_spare; 1466 1467SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1468 "Current number of pv entry frees"); 1469SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1470 "Current number of pv entry allocs"); 1471SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1472 "Current number of pv entries"); 1473SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1474 "Current number of spare pv entries"); 1475#endif 1476#endif /* 0 */ 1477 1478/* 1479 * We are in a serious low memory condition. Resort to 1480 * drastic measures to free some pages so we can allocate 1481 * another pv entry chunk. 1482 * 1483 * Returns NULL if PV entries were reclaimed from the specified pmap. 1484 * 1485 * We do not, however, unmap 2mpages because subsequent accesses will 1486 * allocate per-page pv entries until repromotion occurs, thereby 1487 * exacerbating the shortage of free pv entries. 1488 */ 1489static vm_page_t 1490reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1491{ 1492 1493 panic("ARM64TODO: reclaim_pv_chunk"); 1494} 1495 1496/* 1497 * free the pv_entry back to the free list 1498 */ 1499static void 1500free_pv_entry(pmap_t pmap, pv_entry_t pv) 1501{ 1502 struct pv_chunk *pc; 1503 int idx, field, bit; 1504 1505 rw_assert(&pvh_global_lock, RA_LOCKED); 1506 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1507 PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1508 PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1509 PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1510 pc = pv_to_chunk(pv); 1511 idx = pv - &pc->pc_pventry[0]; 1512 field = idx / 64; 1513 bit = idx % 64; 1514 pc->pc_map[field] |= 1ul << bit; 1515 if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1516 pc->pc_map[2] != PC_FREE2) { 1517 /* 98% of the time, pc is already at the head of the list. */ 1518 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1519 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1520 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1521 } 1522 return; 1523 } 1524 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1525 free_pv_chunk(pc); 1526} 1527 1528static void 1529free_pv_chunk(struct pv_chunk *pc) 1530{ 1531 vm_page_t m; 1532 1533 mtx_lock(&pv_chunks_mutex); 1534 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1535 mtx_unlock(&pv_chunks_mutex); 1536 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1537 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1538 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1539 /* entire chunk is free, return it */ 1540 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1541 dump_drop_page(m->phys_addr); 1542 vm_page_unwire(m, PQ_NONE); 1543 vm_page_free(m); 1544} 1545 1546/* 1547 * Returns a new PV entry, allocating a new PV chunk from the system when 1548 * needed. If this PV chunk allocation fails and a PV list lock pointer was 1549 * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 1550 * returned. 1551 * 1552 * The given PV list lock may be released. 1553 */ 1554static pv_entry_t 1555get_pv_entry(pmap_t pmap, struct rwlock **lockp) 1556{ 1557 int bit, field; 1558 pv_entry_t pv; 1559 struct pv_chunk *pc; 1560 vm_page_t m; 1561 1562 rw_assert(&pvh_global_lock, RA_LOCKED); 1563 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1564 PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 1565retry: 1566 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1567 if (pc != NULL) { 1568 for (field = 0; field < _NPCM; field++) { 1569 if (pc->pc_map[field]) { 1570 bit = ffsl(pc->pc_map[field]) - 1; 1571 break; 1572 } 1573 } 1574 if (field < _NPCM) { 1575 pv = &pc->pc_pventry[field * 64 + bit]; 1576 pc->pc_map[field] &= ~(1ul << bit); 1577 /* If this was the last item, move it to tail */ 1578 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 1579 pc->pc_map[2] == 0) { 1580 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1581 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 1582 pc_list); 1583 } 1584 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1585 PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 1586 return (pv); 1587 } 1588 } 1589 /* No free items, allocate another chunk */ 1590 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1591 VM_ALLOC_WIRED); 1592 if (m == NULL) { 1593 if (lockp == NULL) { 1594 PV_STAT(pc_chunk_tryfail++); 1595 return (NULL); 1596 } 1597 m = reclaim_pv_chunk(pmap, lockp); 1598 if (m == NULL) 1599 goto retry; 1600 } 1601 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 1602 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 1603 dump_add_page(m->phys_addr); 1604 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 1605 pc->pc_pmap = pmap; 1606 pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 1607 pc->pc_map[1] = PC_FREE1; 1608 pc->pc_map[2] = PC_FREE2; 1609 mtx_lock(&pv_chunks_mutex); 1610 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1611 mtx_unlock(&pv_chunks_mutex); 1612 pv = &pc->pc_pventry[0]; 1613 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1614 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1615 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 1616 return (pv); 1617} 1618 1619/* 1620 * First find and then remove the pv entry for the specified pmap and virtual 1621 * address from the specified pv list. Returns the pv entry if found and NULL 1622 * otherwise. This operation can be performed on pv lists for either 4KB or 1623 * 2MB page mappings. 1624 */ 1625static __inline pv_entry_t 1626pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1627{ 1628 pv_entry_t pv; 1629 1630 rw_assert(&pvh_global_lock, RA_LOCKED); 1631 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 1632 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1633 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 1634 pvh->pv_gen++; 1635 break; 1636 } 1637 } 1638 return (pv); 1639} 1640 1641/* 1642 * First find and then destroy the pv entry for the specified pmap and virtual 1643 * address. This operation can be performed on pv lists for either 4KB or 2MB 1644 * page mappings. 1645 */ 1646static void 1647pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1648{ 1649 pv_entry_t pv; 1650 1651 pv = pmap_pvh_remove(pvh, pmap, va); 1652 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 1653 free_pv_entry(pmap, pv); 1654} 1655 1656/* 1657 * Conditionally create the PV entry for a 4KB page mapping if the required 1658 * memory can be allocated without resorting to reclamation. 1659 */ 1660static boolean_t 1661pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 1662 struct rwlock **lockp) 1663{ 1664 pv_entry_t pv; 1665 1666 rw_assert(&pvh_global_lock, RA_LOCKED); 1667 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1668 /* Pass NULL instead of the lock pointer to disable reclamation. */ 1669 if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 1670 pv->pv_va = va; 1671 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1672 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 1673 m->md.pv_gen++; 1674 return (TRUE); 1675 } else 1676 return (FALSE); 1677} 1678 1679/* 1680 * pmap_remove_l3: do the things to unmap a page in a process 1681 */ 1682static int 1683pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 1684 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 1685{ 1686 pt_entry_t old_l3; 1687 vm_page_t m; 1688 1689 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1690 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 1691 cpu_dcache_wb_range(va, L3_SIZE); 1692 old_l3 = pmap_load_clear(l3); 1693 PTE_SYNC(l3); 1694 pmap_invalidate_page(pmap, va); 1695 if (old_l3 & ATTR_SW_WIRED) 1696 pmap->pm_stats.wired_count -= 1; 1697 pmap_resident_count_dec(pmap, 1); 1698 if (old_l3 & ATTR_SW_MANAGED) { 1699 m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 1700 if (pmap_page_dirty(old_l3)) 1701 vm_page_dirty(m); 1702 if (old_l3 & ATTR_AF) 1703 vm_page_aflag_set(m, PGA_REFERENCED); 1704 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1705 pmap_pvh_free(&m->md, pmap, va); 1706 } 1707 return (pmap_unuse_l3(pmap, va, l2e, free)); 1708} 1709 1710/* 1711 * Remove the given range of addresses from the specified map. 1712 * 1713 * It is assumed that the start and end are properly 1714 * rounded to the page size. 1715 */ 1716void 1717pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1718{ 1719 struct rwlock *lock; 1720 vm_offset_t va, va_next; 1721 pd_entry_t *l1, *l2; 1722 pt_entry_t l3_paddr, *l3; 1723 struct spglist free; 1724 int anyvalid; 1725 1726 /* 1727 * Perform an unsynchronized read. This is, however, safe. 1728 */ 1729 if (pmap->pm_stats.resident_count == 0) 1730 return; 1731 1732 anyvalid = 0; 1733 SLIST_INIT(&free); 1734 1735 rw_rlock(&pvh_global_lock); 1736 PMAP_LOCK(pmap); 1737 1738 lock = NULL; 1739 for (; sva < eva; sva = va_next) { 1740 1741 if (pmap->pm_stats.resident_count == 0) 1742 break; 1743 1744 l1 = pmap_l1(pmap, sva); 1745 if (pmap_load(l1) == 0) { 1746 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1747 if (va_next < sva) 1748 va_next = eva; 1749 continue; 1750 } 1751 1752 /* 1753 * Calculate index for next page table. 1754 */ 1755 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1756 if (va_next < sva) 1757 va_next = eva; 1758 1759 l2 = pmap_l1_to_l2(l1, sva); 1760 if (l2 == NULL) 1761 continue; 1762 1763 l3_paddr = pmap_load(l2); 1764 1765 /* 1766 * Weed out invalid mappings. 1767 */ 1768 if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 1769 continue; 1770 1771 /* 1772 * Limit our scan to either the end of the va represented 1773 * by the current page table page, or to the end of the 1774 * range being removed. 1775 */ 1776 if (va_next > eva) 1777 va_next = eva; 1778 1779 va = va_next; 1780 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 1781 sva += L3_SIZE) { 1782 if (l3 == NULL) 1783 panic("l3 == NULL"); 1784 if (pmap_load(l3) == 0) { 1785 if (va != va_next) { 1786 pmap_invalidate_range(pmap, va, sva); 1787 va = va_next; 1788 } 1789 continue; 1790 } 1791 if (va == va_next) 1792 va = sva; 1793 if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 1794 &lock)) { 1795 sva += L3_SIZE; 1796 break; 1797 } 1798 } 1799 if (va != va_next) 1800 pmap_invalidate_range(pmap, va, sva); 1801 } 1802 if (lock != NULL) 1803 rw_wunlock(lock); 1804 if (anyvalid) 1805 pmap_invalidate_all(pmap); 1806 rw_runlock(&pvh_global_lock); 1807 PMAP_UNLOCK(pmap); 1808 pmap_free_zero_pages(&free); 1809} 1810 1811/* 1812 * Routine: pmap_remove_all 1813 * Function: 1814 * Removes this physical page from 1815 * all physical maps in which it resides. 1816 * Reflects back modify bits to the pager. 1817 * 1818 * Notes: 1819 * Original versions of this routine were very 1820 * inefficient because they iteratively called 1821 * pmap_remove (slow...) 1822 */ 1823 1824void 1825pmap_remove_all(vm_page_t m) 1826{ 1827 pv_entry_t pv; 1828 pmap_t pmap; 1829 pt_entry_t *l3, tl3; 1830 pd_entry_t *l2, tl2; 1831 struct spglist free; 1832 1833 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1834 ("pmap_remove_all: page %p is not managed", m)); 1835 SLIST_INIT(&free); 1836 rw_wlock(&pvh_global_lock); 1837 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1838 pmap = PV_PMAP(pv); 1839 PMAP_LOCK(pmap); 1840 pmap_resident_count_dec(pmap, 1); 1841 l2 = pmap_l2(pmap, pv->pv_va); 1842 KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); 1843 tl2 = pmap_load(l2); 1844 KASSERT((tl2 & ATTR_DESCR_MASK) == L2_TABLE, 1845 ("pmap_remove_all: found a table when expecting " 1846 "a block in %p's pv list", m)); 1847 l3 = pmap_l2_to_l3(l2, pv->pv_va); 1848 if (pmap_is_current(pmap) && 1849 pmap_l3_valid_cacheable(pmap_load(l3))) 1850 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 1851 tl3 = pmap_load_clear(l3); 1852 PTE_SYNC(l3); 1853 pmap_invalidate_page(pmap, pv->pv_va); 1854 if (tl3 & ATTR_SW_WIRED) 1855 pmap->pm_stats.wired_count--; 1856 if ((tl3 & ATTR_AF) != 0) 1857 vm_page_aflag_set(m, PGA_REFERENCED); 1858 1859 /* 1860 * Update the vm_page_t clean and reference bits. 1861 */ 1862 if (pmap_page_dirty(tl3)) 1863 vm_page_dirty(m); 1864 pmap_unuse_l3(pmap, pv->pv_va, tl2, &free); 1865 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1866 m->md.pv_gen++; 1867 free_pv_entry(pmap, pv); 1868 PMAP_UNLOCK(pmap); 1869 } 1870 vm_page_aflag_clear(m, PGA_WRITEABLE); 1871 rw_wunlock(&pvh_global_lock); 1872 pmap_free_zero_pages(&free); 1873} 1874 1875/* 1876 * Set the physical protection on the 1877 * specified range of this map as requested. 1878 */ 1879void 1880pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1881{ 1882 vm_offset_t va, va_next; 1883 pd_entry_t *l1, *l2; 1884 pt_entry_t *l3p, l3; 1885 1886 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1887 pmap_remove(pmap, sva, eva); 1888 return; 1889 } 1890 1891 if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) 1892 return; 1893 1894 PMAP_LOCK(pmap); 1895 for (; sva < eva; sva = va_next) { 1896 1897 l1 = pmap_l1(pmap, sva); 1898 if (pmap_load(l1) == 0) { 1899 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1900 if (va_next < sva) 1901 va_next = eva; 1902 continue; 1903 } 1904 1905 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1906 if (va_next < sva) 1907 va_next = eva; 1908 1909 l2 = pmap_l1_to_l2(l1, sva); 1910 if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) 1911 continue; 1912 1913 if (va_next > eva) 1914 va_next = eva; 1915 1916 va = va_next; 1917 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 1918 sva += L3_SIZE) { 1919 l3 = pmap_load(l3p); 1920 if (pmap_l3_valid(l3)) { 1921 pmap_set(l3p, ATTR_AP(ATTR_AP_RO)); 1922 PTE_SYNC(l3p); 1923 /* XXX: Use pmap_invalidate_range */ 1924 pmap_invalidate_page(pmap, va); 1925 } 1926 } 1927 } 1928 PMAP_UNLOCK(pmap); 1929 1930 /* TODO: Only invalidate entries we are touching */ 1931 pmap_invalidate_all(pmap); 1932} 1933 1934/* 1935 * Insert the given physical page (p) at 1936 * the specified virtual address (v) in the 1937 * target physical map with the protection requested. 1938 * 1939 * If specified, the page will be wired down, meaning 1940 * that the related pte can not be reclaimed. 1941 * 1942 * NB: This is the only routine which MAY NOT lazy-evaluate 1943 * or lose information. That is, this routine must actually 1944 * insert this page into the given map NOW. 1945 */ 1946int 1947pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1948 u_int flags, int8_t psind __unused) 1949{ 1950 struct rwlock *lock; 1951 pd_entry_t *l1, *l2; 1952 pt_entry_t new_l3, orig_l3; 1953 pt_entry_t *l3; 1954 pv_entry_t pv; 1955 vm_paddr_t opa, pa, l2_pa, l3_pa; 1956 vm_page_t mpte, om, l2_m, l3_m; 1957 boolean_t nosleep; 1958 1959 va = trunc_page(va); 1960 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 1961 VM_OBJECT_ASSERT_LOCKED(m->object); 1962 pa = VM_PAGE_TO_PHYS(m); 1963 new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 1964 L3_PAGE); 1965 if ((prot & VM_PROT_WRITE) == 0) 1966 new_l3 |= ATTR_AP(ATTR_AP_RO); 1967 if ((flags & PMAP_ENTER_WIRED) != 0) 1968 new_l3 |= ATTR_SW_WIRED; 1969 if ((va >> 63) == 0) 1970 new_l3 |= ATTR_AP(ATTR_AP_USER); 1971 1972 CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 1973 1974 mpte = NULL; 1975 1976 lock = NULL; 1977 rw_rlock(&pvh_global_lock); 1978 PMAP_LOCK(pmap); 1979 1980 if (va < VM_MAXUSER_ADDRESS) { 1981 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 1982 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 1983 if (mpte == NULL && nosleep) { 1984 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 1985 if (lock != NULL) 1986 rw_wunlock(lock); 1987 rw_runlock(&pvh_global_lock); 1988 PMAP_UNLOCK(pmap); 1989 return (KERN_RESOURCE_SHORTAGE); 1990 } 1991 l3 = pmap_l3(pmap, va); 1992 } else { 1993 l3 = pmap_l3(pmap, va); 1994 /* TODO: This is not optimal, but should mostly work */ 1995 if (l3 == NULL) { 1996 l2 = pmap_l2(pmap, va); 1997 1998 if (l2 == NULL) { 1999 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2000 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2001 VM_ALLOC_ZERO); 2002 if (l2_m == NULL) 2003 panic("pmap_enter: l2 pte_m == NULL"); 2004 if ((l2_m->flags & PG_ZERO) == 0) 2005 pmap_zero_page(l2_m); 2006 2007 l2_pa = VM_PAGE_TO_PHYS(l2_m); 2008 l1 = pmap_l1(pmap, va); 2009 pmap_load_store(l1, l2_pa | L1_TABLE); 2010 PTE_SYNC(l1); 2011 l2 = pmap_l1_to_l2(l1, va); 2012 } 2013 2014 KASSERT(l2 != NULL, 2015 ("No l2 table after allocating one")); 2016 2017 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2018 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 2019 if (l3_m == NULL) 2020 panic("pmap_enter: l3 pte_m == NULL"); 2021 if ((l3_m->flags & PG_ZERO) == 0) 2022 pmap_zero_page(l3_m); 2023 2024 l3_pa = VM_PAGE_TO_PHYS(l3_m); 2025 pmap_load_store(l2, l3_pa | L2_TABLE); 2026 PTE_SYNC(l2); 2027 l3 = pmap_l2_to_l3(l2, va); 2028 } 2029 pmap_invalidate_page(pmap, va); 2030 } 2031 2032 om = NULL; 2033 orig_l3 = pmap_load(l3); 2034 opa = orig_l3 & ~ATTR_MASK; 2035 2036 /* 2037 * Is the specified virtual address already mapped? 2038 */ 2039 if (pmap_l3_valid(orig_l3)) { 2040 /* 2041 * Wiring change, just update stats. We don't worry about 2042 * wiring PT pages as they remain resident as long as there 2043 * are valid mappings in them. Hence, if a user page is wired, 2044 * the PT page will be also. 2045 */ 2046 if ((flags & PMAP_ENTER_WIRED) != 0 && 2047 (orig_l3 & ATTR_SW_WIRED) == 0) 2048 pmap->pm_stats.wired_count++; 2049 else if ((flags & PMAP_ENTER_WIRED) == 0 && 2050 (orig_l3 & ATTR_SW_WIRED) != 0) 2051 pmap->pm_stats.wired_count--; 2052 2053 /* 2054 * Remove the extra PT page reference. 2055 */ 2056 if (mpte != NULL) { 2057 mpte->wire_count--; 2058 KASSERT(mpte->wire_count > 0, 2059 ("pmap_enter: missing reference to page table page," 2060 " va: 0x%lx", va)); 2061 } 2062 2063 /* 2064 * Has the physical page changed? 2065 */ 2066 if (opa == pa) { 2067 /* 2068 * No, might be a protection or wiring change. 2069 */ 2070 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2071 new_l3 |= ATTR_SW_MANAGED; 2072 if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 2073 ATTR_AP(ATTR_AP_RW)) { 2074 vm_page_aflag_set(m, PGA_WRITEABLE); 2075 } 2076 } 2077 goto validate; 2078 } 2079 2080 /* Flush the cache, there might be uncommitted data in it */ 2081 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 2082 cpu_dcache_wb_range(va, L3_SIZE); 2083 } else { 2084 /* 2085 * Increment the counters. 2086 */ 2087 if ((new_l3 & ATTR_SW_WIRED) != 0) 2088 pmap->pm_stats.wired_count++; 2089 pmap_resident_count_inc(pmap, 1); 2090 } 2091 /* 2092 * Enter on the PV list if part of our managed memory. 2093 */ 2094 if ((m->oflags & VPO_UNMANAGED) == 0) { 2095 new_l3 |= ATTR_SW_MANAGED; 2096 pv = get_pv_entry(pmap, &lock); 2097 pv->pv_va = va; 2098 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 2099 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2100 m->md.pv_gen++; 2101 if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 2102 vm_page_aflag_set(m, PGA_WRITEABLE); 2103 } 2104 2105 /* 2106 * Update the L3 entry. 2107 */ 2108 if (orig_l3 != 0) { 2109validate: 2110 orig_l3 = pmap_load_store(l3, new_l3); 2111 PTE_SYNC(l3); 2112 opa = orig_l3 & ~ATTR_MASK; 2113 2114 if (opa != pa) { 2115 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2116 om = PHYS_TO_VM_PAGE(opa); 2117 if (pmap_page_dirty(orig_l3)) 2118 vm_page_dirty(om); 2119 if ((orig_l3 & ATTR_AF) != 0) 2120 vm_page_aflag_set(om, PGA_REFERENCED); 2121 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 2122 pmap_pvh_free(&om->md, pmap, va); 2123 } 2124 } else if (pmap_page_dirty(orig_l3)) { 2125 if ((orig_l3 & ATTR_SW_MANAGED) != 0) 2126 vm_page_dirty(m); 2127 } 2128 } else { 2129 pmap_load_store(l3, new_l3); 2130 PTE_SYNC(l3); 2131 } 2132 pmap_invalidate_page(pmap, va); 2133 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 2134 cpu_icache_sync_range(va, PAGE_SIZE); 2135 2136 if (lock != NULL) 2137 rw_wunlock(lock); 2138 rw_runlock(&pvh_global_lock); 2139 PMAP_UNLOCK(pmap); 2140 return (KERN_SUCCESS); 2141} 2142 2143/* 2144 * Maps a sequence of resident pages belonging to the same object. 2145 * The sequence begins with the given page m_start. This page is 2146 * mapped at the given virtual address start. Each subsequent page is 2147 * mapped at a virtual address that is offset from start by the same 2148 * amount as the page is offset from m_start within the object. The 2149 * last page in the sequence is the page with the largest offset from 2150 * m_start that can be mapped at a virtual address less than the given 2151 * virtual address end. Not every virtual page between start and end 2152 * is mapped; only those for which a resident page exists with the 2153 * corresponding offset from m_start are mapped. 2154 */ 2155void 2156pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2157 vm_page_t m_start, vm_prot_t prot) 2158{ 2159 struct rwlock *lock; 2160 vm_offset_t va; 2161 vm_page_t m, mpte; 2162 vm_pindex_t diff, psize; 2163 2164 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2165 2166 psize = atop(end - start); 2167 mpte = NULL; 2168 m = m_start; 2169 lock = NULL; 2170 rw_rlock(&pvh_global_lock); 2171 PMAP_LOCK(pmap); 2172 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2173 va = start + ptoa(diff); 2174 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 2175 m = TAILQ_NEXT(m, listq); 2176 } 2177 if (lock != NULL) 2178 rw_wunlock(lock); 2179 rw_runlock(&pvh_global_lock); 2180 PMAP_UNLOCK(pmap); 2181} 2182 2183/* 2184 * this code makes some *MAJOR* assumptions: 2185 * 1. Current pmap & pmap exists. 2186 * 2. Not wired. 2187 * 3. Read access. 2188 * 4. No page table pages. 2189 * but is *MUCH* faster than pmap_enter... 2190 */ 2191 2192void 2193pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2194{ 2195 struct rwlock *lock; 2196 2197 lock = NULL; 2198 rw_rlock(&pvh_global_lock); 2199 PMAP_LOCK(pmap); 2200 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 2201 if (lock != NULL) 2202 rw_wunlock(lock); 2203 rw_runlock(&pvh_global_lock); 2204 PMAP_UNLOCK(pmap); 2205} 2206 2207static vm_page_t 2208pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2209 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 2210{ 2211 struct spglist free; 2212 pd_entry_t *l2; 2213 pt_entry_t *l3; 2214 vm_paddr_t pa; 2215 2216 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2217 (m->oflags & VPO_UNMANAGED) != 0, 2218 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2219 rw_assert(&pvh_global_lock, RA_LOCKED); 2220 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2221 2222 CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 2223 /* 2224 * In the case that a page table page is not 2225 * resident, we are creating it here. 2226 */ 2227 if (va < VM_MAXUSER_ADDRESS) { 2228 vm_pindex_t l2pindex; 2229 2230 /* 2231 * Calculate pagetable page index 2232 */ 2233 l2pindex = pmap_l2_pindex(va); 2234 if (mpte && (mpte->pindex == l2pindex)) { 2235 mpte->wire_count++; 2236 } else { 2237 /* 2238 * Get the l2 entry 2239 */ 2240 l2 = pmap_l2(pmap, va); 2241 2242 /* 2243 * If the page table page is mapped, we just increment 2244 * the hold count, and activate it. Otherwise, we 2245 * attempt to allocate a page table page. If this 2246 * attempt fails, we don't retry. Instead, we give up. 2247 */ 2248 if (l2 != NULL && pmap_load(l2) != 0) { 2249 mpte = 2250 PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); 2251 mpte->wire_count++; 2252 } else { 2253 /* 2254 * Pass NULL instead of the PV list lock 2255 * pointer, because we don't intend to sleep. 2256 */ 2257 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 2258 if (mpte == NULL) 2259 return (mpte); 2260 } 2261 } 2262 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 2263 l3 = &l3[pmap_l3_index(va)]; 2264 } else { 2265 mpte = NULL; 2266 l3 = pmap_l3(kernel_pmap, va); 2267 } 2268 if (l3 == NULL) 2269 panic("pmap_enter_quick_locked: No l3"); 2270 if (pmap_load(l3) != 0) { 2271 if (mpte != NULL) { 2272 mpte->wire_count--; 2273 mpte = NULL; 2274 } 2275 return (mpte); 2276 } 2277 2278 /* 2279 * Enter on the PV list if part of our managed memory. 2280 */ 2281 if ((m->oflags & VPO_UNMANAGED) == 0 && 2282 !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 2283 if (mpte != NULL) { 2284 SLIST_INIT(&free); 2285 if (pmap_unwire_l3(pmap, va, mpte, &free)) { 2286 pmap_invalidate_page(pmap, va); 2287 pmap_free_zero_pages(&free); 2288 } 2289 mpte = NULL; 2290 } 2291 return (mpte); 2292 } 2293 2294 /* 2295 * Increment counters 2296 */ 2297 pmap_resident_count_inc(pmap, 1); 2298 2299 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 2300 ATTR_AP(ATTR_AP_RW) | L3_PAGE; 2301 2302 /* 2303 * Now validate mapping with RO protection 2304 */ 2305 if ((m->oflags & VPO_UNMANAGED) == 0) 2306 pa |= ATTR_SW_MANAGED; 2307 pmap_load_store(l3, pa); 2308 PTE_SYNC(l3); 2309 pmap_invalidate_page(pmap, va); 2310 return (mpte); 2311} 2312 2313/* 2314 * This code maps large physical mmap regions into the 2315 * processor address space. Note that some shortcuts 2316 * are taken, but the code works. 2317 */ 2318void 2319pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2320 vm_pindex_t pindex, vm_size_t size) 2321{ 2322 2323 VM_OBJECT_ASSERT_WLOCKED(object); 2324 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2325 ("pmap_object_init_pt: non-device object")); 2326} 2327 2328/* 2329 * Clear the wired attribute from the mappings for the specified range of 2330 * addresses in the given pmap. Every valid mapping within that range 2331 * must have the wired attribute set. In contrast, invalid mappings 2332 * cannot have the wired attribute set, so they are ignored. 2333 * 2334 * The wired attribute of the page table entry is not a hardware feature, 2335 * so there is no need to invalidate any TLB entries. 2336 */ 2337void 2338pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2339{ 2340 vm_offset_t va_next; 2341 pd_entry_t *l1, *l2; 2342 pt_entry_t *l3; 2343 boolean_t pv_lists_locked; 2344 2345 pv_lists_locked = FALSE; 2346 PMAP_LOCK(pmap); 2347 for (; sva < eva; sva = va_next) { 2348 l1 = pmap_l1(pmap, sva); 2349 if (pmap_load(l1) == 0) { 2350 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2351 if (va_next < sva) 2352 va_next = eva; 2353 continue; 2354 } 2355 2356 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2357 if (va_next < sva) 2358 va_next = eva; 2359 2360 l2 = pmap_l1_to_l2(l1, sva); 2361 if (pmap_load(l2) == 0) 2362 continue; 2363 2364 if (va_next > eva) 2365 va_next = eva; 2366 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2367 sva += L3_SIZE) { 2368 if (pmap_load(l3) == 0) 2369 continue; 2370 if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) 2371 panic("pmap_unwire: l3 %#jx is missing " 2372 "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); 2373 2374 /* 2375 * PG_W must be cleared atomically. Although the pmap 2376 * lock synchronizes access to PG_W, another processor 2377 * could be setting PG_M and/or PG_A concurrently. 2378 */ 2379 atomic_clear_long(l3, ATTR_SW_WIRED); 2380 pmap->pm_stats.wired_count--; 2381 } 2382 } 2383 if (pv_lists_locked) 2384 rw_runlock(&pvh_global_lock); 2385 PMAP_UNLOCK(pmap); 2386} 2387 2388/* 2389 * Copy the range specified by src_addr/len 2390 * from the source map to the range dst_addr/len 2391 * in the destination map. 2392 * 2393 * This routine is only advisory and need not do anything. 2394 */ 2395 2396void 2397pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2398 vm_offset_t src_addr) 2399{ 2400} 2401 2402/* 2403 * pmap_zero_page zeros the specified hardware page by mapping 2404 * the page into KVM and using bzero to clear its contents. 2405 */ 2406void 2407pmap_zero_page(vm_page_t m) 2408{ 2409 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2410 2411 pagezero((void *)va); 2412} 2413 2414/* 2415 * pmap_zero_page_area zeros the specified hardware page by mapping 2416 * the page into KVM and using bzero to clear its contents. 2417 * 2418 * off and size may not cover an area beyond a single hardware page. 2419 */ 2420void 2421pmap_zero_page_area(vm_page_t m, int off, int size) 2422{ 2423 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2424 2425 if (off == 0 && size == PAGE_SIZE) 2426 pagezero((void *)va); 2427 else 2428 bzero((char *)va + off, size); 2429} 2430 2431/* 2432 * pmap_zero_page_idle zeros the specified hardware page by mapping 2433 * the page into KVM and using bzero to clear its contents. This 2434 * is intended to be called from the vm_pagezero process only and 2435 * outside of Giant. 2436 */ 2437void 2438pmap_zero_page_idle(vm_page_t m) 2439{ 2440 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2441 2442 pagezero((void *)va); 2443} 2444 2445/* 2446 * pmap_copy_page copies the specified (machine independent) 2447 * page by mapping the page into virtual memory and using 2448 * bcopy to copy the page, one machine dependent page at a 2449 * time. 2450 */ 2451void 2452pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2453{ 2454 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2455 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2456 2457 pagecopy((void *)src, (void *)dst); 2458} 2459 2460int unmapped_buf_allowed = 1; 2461 2462void 2463pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2464 vm_offset_t b_offset, int xfersize) 2465{ 2466 void *a_cp, *b_cp; 2467 vm_page_t m_a, m_b; 2468 vm_paddr_t p_a, p_b; 2469 vm_offset_t a_pg_offset, b_pg_offset; 2470 int cnt; 2471 2472 while (xfersize > 0) { 2473 a_pg_offset = a_offset & PAGE_MASK; 2474 m_a = ma[a_offset >> PAGE_SHIFT]; 2475 p_a = m_a->phys_addr; 2476 b_pg_offset = b_offset & PAGE_MASK; 2477 m_b = mb[b_offset >> PAGE_SHIFT]; 2478 p_b = m_b->phys_addr; 2479 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2480 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2481 if (__predict_false(!PHYS_IN_DMAP(p_a))) { 2482 panic("!DMAP a %lx", p_a); 2483 } else { 2484 a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 2485 } 2486 if (__predict_false(!PHYS_IN_DMAP(p_b))) { 2487 panic("!DMAP b %lx", p_b); 2488 } else { 2489 b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 2490 } 2491 bcopy(a_cp, b_cp, cnt); 2492 a_offset += cnt; 2493 b_offset += cnt; 2494 xfersize -= cnt; 2495 } 2496} 2497 2498vm_offset_t 2499pmap_quick_enter_page(vm_page_t m) 2500{ 2501 2502 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 2503} 2504 2505void 2506pmap_quick_remove_page(vm_offset_t addr) 2507{ 2508} 2509 2510/* 2511 * Returns true if the pmap's pv is one of the first 2512 * 16 pvs linked to from this page. This count may 2513 * be changed upwards or downwards in the future; it 2514 * is only necessary that true be returned for a small 2515 * subset of pmaps for proper page aging. 2516 */ 2517boolean_t 2518pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2519{ 2520 struct rwlock *lock; 2521 pv_entry_t pv; 2522 int loops = 0; 2523 boolean_t rv; 2524 2525 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2526 ("pmap_page_exists_quick: page %p is not managed", m)); 2527 rv = FALSE; 2528 rw_rlock(&pvh_global_lock); 2529 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2530 rw_rlock(lock); 2531 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2532 if (PV_PMAP(pv) == pmap) { 2533 rv = TRUE; 2534 break; 2535 } 2536 loops++; 2537 if (loops >= 16) 2538 break; 2539 } 2540 rw_runlock(lock); 2541 rw_runlock(&pvh_global_lock); 2542 return (rv); 2543} 2544 2545/* 2546 * pmap_page_wired_mappings: 2547 * 2548 * Return the number of managed mappings to the given physical page 2549 * that are wired. 2550 */ 2551int 2552pmap_page_wired_mappings(vm_page_t m) 2553{ 2554 struct rwlock *lock; 2555 pmap_t pmap; 2556 pt_entry_t *l3; 2557 pv_entry_t pv; 2558 int count, md_gen; 2559 2560 if ((m->oflags & VPO_UNMANAGED) != 0) 2561 return (0); 2562 rw_rlock(&pvh_global_lock); 2563 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2564 rw_rlock(lock); 2565restart: 2566 count = 0; 2567 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2568 pmap = PV_PMAP(pv); 2569 if (!PMAP_TRYLOCK(pmap)) { 2570 md_gen = m->md.pv_gen; 2571 rw_runlock(lock); 2572 PMAP_LOCK(pmap); 2573 rw_rlock(lock); 2574 if (md_gen != m->md.pv_gen) { 2575 PMAP_UNLOCK(pmap); 2576 goto restart; 2577 } 2578 } 2579 l3 = pmap_l3(pmap, pv->pv_va); 2580 if (l3 != NULL && (pmap_load(l3) & ATTR_SW_WIRED) != 0) 2581 count++; 2582 PMAP_UNLOCK(pmap); 2583 } 2584 rw_runlock(lock); 2585 rw_runlock(&pvh_global_lock); 2586 return (count); 2587} 2588 2589/* 2590 * Destroy all managed, non-wired mappings in the given user-space 2591 * pmap. This pmap cannot be active on any processor besides the 2592 * caller. 2593 * 2594 * This function cannot be applied to the kernel pmap. Moreover, it 2595 * is not intended for general use. It is only to be used during 2596 * process termination. Consequently, it can be implemented in ways 2597 * that make it faster than pmap_remove(). First, it can more quickly 2598 * destroy mappings by iterating over the pmap's collection of PV 2599 * entries, rather than searching the page table. Second, it doesn't 2600 * have to test and clear the page table entries atomically, because 2601 * no processor is currently accessing the user address space. In 2602 * particular, a page table entry's dirty bit won't change state once 2603 * this function starts. 2604 */ 2605void 2606pmap_remove_pages(pmap_t pmap) 2607{ 2608 pd_entry_t ptepde, *l2; 2609 pt_entry_t *l3, tl3; 2610 struct spglist free; 2611 vm_page_t m; 2612 pv_entry_t pv; 2613 struct pv_chunk *pc, *npc; 2614 struct rwlock *lock; 2615 int64_t bit; 2616 uint64_t inuse, bitmask; 2617 int allfree, field, freed, idx; 2618 vm_paddr_t pa; 2619 2620 lock = NULL; 2621 2622 SLIST_INIT(&free); 2623 rw_rlock(&pvh_global_lock); 2624 PMAP_LOCK(pmap); 2625 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2626 allfree = 1; 2627 freed = 0; 2628 for (field = 0; field < _NPCM; field++) { 2629 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2630 while (inuse != 0) { 2631 bit = ffsl(inuse) - 1; 2632 bitmask = 1UL << bit; 2633 idx = field * 64 + bit; 2634 pv = &pc->pc_pventry[idx]; 2635 inuse &= ~bitmask; 2636 2637 l2 = pmap_l2(pmap, pv->pv_va); 2638 ptepde = pmap_load(l2); 2639 l3 = pmap_l2_to_l3(l2, pv->pv_va); 2640 tl3 = pmap_load(l3); 2641 2642/* 2643 * We cannot remove wired pages from a process' mapping at this time 2644 */ 2645 if (tl3 & ATTR_SW_WIRED) { 2646 allfree = 0; 2647 continue; 2648 } 2649 2650 pa = tl3 & ~ATTR_MASK; 2651 2652 m = PHYS_TO_VM_PAGE(pa); 2653 KASSERT(m->phys_addr == pa, 2654 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2655 m, (uintmax_t)m->phys_addr, 2656 (uintmax_t)tl3)); 2657 2658 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 2659 m < &vm_page_array[vm_page_array_size], 2660 ("pmap_remove_pages: bad l3 %#jx", 2661 (uintmax_t)tl3)); 2662 2663 if (pmap_is_current(pmap) && 2664 pmap_l3_valid_cacheable(pmap_load(l3))) 2665 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2666 pmap_load_clear(l3); 2667 PTE_SYNC(l3); 2668 pmap_invalidate_page(pmap, pv->pv_va); 2669 2670 /* 2671 * Update the vm_page_t clean/reference bits. 2672 */ 2673 if ((tl3 & ATTR_AP_RW_BIT) == 2674 ATTR_AP(ATTR_AP_RW)) 2675 vm_page_dirty(m); 2676 2677 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 2678 2679 /* Mark free */ 2680 pc->pc_map[field] |= bitmask; 2681 2682 pmap_resident_count_dec(pmap, 1); 2683 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2684 m->md.pv_gen++; 2685 2686 pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); 2687 freed++; 2688 } 2689 } 2690 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 2691 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 2692 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 2693 if (allfree) { 2694 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2695 free_pv_chunk(pc); 2696 } 2697 } 2698 pmap_invalidate_all(pmap); 2699 if (lock != NULL) 2700 rw_wunlock(lock); 2701 rw_runlock(&pvh_global_lock); 2702 PMAP_UNLOCK(pmap); 2703 pmap_free_zero_pages(&free); 2704} 2705 2706/* 2707 * This is used to check if a page has been accessed or modified. As we 2708 * don't have a bit to see if it has been modified we have to assume it 2709 * has been if the page is read/write. 2710 */ 2711static boolean_t 2712pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 2713{ 2714 struct rwlock *lock; 2715 pv_entry_t pv; 2716 pt_entry_t *l3, mask, value; 2717 pmap_t pmap; 2718 int md_gen; 2719 boolean_t rv; 2720 2721 rv = FALSE; 2722 rw_rlock(&pvh_global_lock); 2723 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2724 rw_rlock(lock); 2725restart: 2726 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2727 pmap = PV_PMAP(pv); 2728 if (!PMAP_TRYLOCK(pmap)) { 2729 md_gen = m->md.pv_gen; 2730 rw_runlock(lock); 2731 PMAP_LOCK(pmap); 2732 rw_rlock(lock); 2733 if (md_gen != m->md.pv_gen) { 2734 PMAP_UNLOCK(pmap); 2735 goto restart; 2736 } 2737 } 2738 l3 = pmap_l3(pmap, pv->pv_va); 2739 mask = 0; 2740 value = 0; 2741 if (modified) { 2742 mask |= ATTR_AP_RW_BIT; 2743 value |= ATTR_AP(ATTR_AP_RW); 2744 } 2745 if (accessed) { 2746 mask |= ATTR_AF | ATTR_DESCR_MASK; 2747 value |= ATTR_AF | L3_PAGE; 2748 } 2749 rv = (pmap_load(l3) & mask) == value; 2750 PMAP_UNLOCK(pmap); 2751 if (rv) 2752 goto out; 2753 } 2754out: 2755 rw_runlock(lock); 2756 rw_runlock(&pvh_global_lock); 2757 return (rv); 2758} 2759 2760/* 2761 * pmap_is_modified: 2762 * 2763 * Return whether or not the specified physical page was modified 2764 * in any physical maps. 2765 */ 2766boolean_t 2767pmap_is_modified(vm_page_t m) 2768{ 2769 2770 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2771 ("pmap_is_modified: page %p is not managed", m)); 2772 2773 /* 2774 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2775 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2776 * is clear, no PTEs can have PG_M set. 2777 */ 2778 VM_OBJECT_ASSERT_WLOCKED(m->object); 2779 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2780 return (FALSE); 2781 return (pmap_page_test_mappings(m, FALSE, TRUE)); 2782} 2783 2784/* 2785 * pmap_is_prefaultable: 2786 * 2787 * Return whether or not the specified virtual address is eligible 2788 * for prefault. 2789 */ 2790boolean_t 2791pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2792{ 2793 pt_entry_t *l3; 2794 boolean_t rv; 2795 2796 rv = FALSE; 2797 PMAP_LOCK(pmap); 2798 l3 = pmap_l3(pmap, addr); 2799 if (l3 != NULL && pmap_load(l3) != 0) { 2800 rv = TRUE; 2801 } 2802 PMAP_UNLOCK(pmap); 2803 return (rv); 2804} 2805 2806/* 2807 * pmap_is_referenced: 2808 * 2809 * Return whether or not the specified physical page was referenced 2810 * in any physical maps. 2811 */ 2812boolean_t 2813pmap_is_referenced(vm_page_t m) 2814{ 2815 2816 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2817 ("pmap_is_referenced: page %p is not managed", m)); 2818 return (pmap_page_test_mappings(m, TRUE, FALSE)); 2819} 2820 2821/* 2822 * Clear the write and modified bits in each of the given page's mappings. 2823 */ 2824void 2825pmap_remove_write(vm_page_t m) 2826{ 2827 pmap_t pmap; 2828 struct rwlock *lock; 2829 pv_entry_t pv; 2830 pt_entry_t *l3, oldl3; 2831 int md_gen; 2832 2833 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2834 ("pmap_remove_write: page %p is not managed", m)); 2835 2836 /* 2837 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2838 * set by another thread while the object is locked. Thus, 2839 * if PGA_WRITEABLE is clear, no page table entries need updating. 2840 */ 2841 VM_OBJECT_ASSERT_WLOCKED(m->object); 2842 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2843 return; 2844 rw_rlock(&pvh_global_lock); 2845 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2846retry_pv_loop: 2847 rw_wlock(lock); 2848 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2849 pmap = PV_PMAP(pv); 2850 if (!PMAP_TRYLOCK(pmap)) { 2851 md_gen = m->md.pv_gen; 2852 rw_wunlock(lock); 2853 PMAP_LOCK(pmap); 2854 rw_wlock(lock); 2855 if (md_gen != m->md.pv_gen) { 2856 PMAP_UNLOCK(pmap); 2857 rw_wunlock(lock); 2858 goto retry_pv_loop; 2859 } 2860 } 2861 l3 = pmap_l3(pmap, pv->pv_va); 2862retry: 2863 oldl3 = pmap_load(l3); 2864 if ((oldl3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 2865 if (!atomic_cmpset_long(l3, oldl3, 2866 oldl3 | ATTR_AP(ATTR_AP_RO))) 2867 goto retry; 2868 if ((oldl3 & ATTR_AF) != 0) 2869 vm_page_dirty(m); 2870 pmap_invalidate_page(pmap, pv->pv_va); 2871 } 2872 PMAP_UNLOCK(pmap); 2873 } 2874 rw_wunlock(lock); 2875 vm_page_aflag_clear(m, PGA_WRITEABLE); 2876 rw_runlock(&pvh_global_lock); 2877} 2878 2879static __inline boolean_t 2880safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 2881{ 2882 2883 return (FALSE); 2884} 2885 2886#define PMAP_TS_REFERENCED_MAX 5 2887 2888/* 2889 * pmap_ts_referenced: 2890 * 2891 * Return a count of reference bits for a page, clearing those bits. 2892 * It is not necessary for every reference bit to be cleared, but it 2893 * is necessary that 0 only be returned when there are truly no 2894 * reference bits set. 2895 * 2896 * XXX: The exact number of bits to check and clear is a matter that 2897 * should be tested and standardized at some point in the future for 2898 * optimal aging of shared pages. 2899 */ 2900int 2901pmap_ts_referenced(vm_page_t m) 2902{ 2903 pv_entry_t pv, pvf; 2904 pmap_t pmap; 2905 struct rwlock *lock; 2906 pd_entry_t *l2p, l2; 2907 pt_entry_t *l3; 2908 vm_paddr_t pa; 2909 int cleared, md_gen, not_cleared; 2910 struct spglist free; 2911 2912 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2913 ("pmap_ts_referenced: page %p is not managed", m)); 2914 SLIST_INIT(&free); 2915 cleared = 0; 2916 pa = VM_PAGE_TO_PHYS(m); 2917 lock = PHYS_TO_PV_LIST_LOCK(pa); 2918 rw_rlock(&pvh_global_lock); 2919 rw_wlock(lock); 2920retry: 2921 not_cleared = 0; 2922 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 2923 goto out; 2924 pv = pvf; 2925 do { 2926 if (pvf == NULL) 2927 pvf = pv; 2928 pmap = PV_PMAP(pv); 2929 if (!PMAP_TRYLOCK(pmap)) { 2930 md_gen = m->md.pv_gen; 2931 rw_wunlock(lock); 2932 PMAP_LOCK(pmap); 2933 rw_wlock(lock); 2934 if (md_gen != m->md.pv_gen) { 2935 PMAP_UNLOCK(pmap); 2936 goto retry; 2937 } 2938 } 2939 l2p = pmap_l2(pmap, pv->pv_va); 2940 KASSERT(l2p != NULL, ("pmap_ts_referenced: no l2 table found")); 2941 l2 = pmap_load(l2p); 2942 KASSERT((l2 & ATTR_DESCR_MASK) == L2_TABLE, 2943 ("pmap_ts_referenced: found an invalid l2 table")); 2944 l3 = pmap_l2_to_l3(l2p, pv->pv_va); 2945 if ((pmap_load(l3) & ATTR_AF) != 0) { 2946 if (safe_to_clear_referenced(pmap, pmap_load(l3))) { 2947 /* 2948 * TODO: We don't handle the access flag 2949 * at all. We need to be able to set it in 2950 * the exception handler. 2951 */ 2952 panic("ARM64TODO: safe_to_clear_referenced\n"); 2953 } else if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) { 2954 /* 2955 * Wired pages cannot be paged out so 2956 * doing accessed bit emulation for 2957 * them is wasted effort. We do the 2958 * hard work for unwired pages only. 2959 */ 2960 pmap_remove_l3(pmap, l3, pv->pv_va, l2, 2961 &free, &lock); 2962 pmap_invalidate_page(pmap, pv->pv_va); 2963 cleared++; 2964 if (pvf == pv) 2965 pvf = NULL; 2966 pv = NULL; 2967 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 2968 ("inconsistent pv lock %p %p for page %p", 2969 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 2970 } else 2971 not_cleared++; 2972 } 2973 PMAP_UNLOCK(pmap); 2974 /* Rotate the PV list if it has more than one entry. */ 2975 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 2976 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2977 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2978 m->md.pv_gen++; 2979 } 2980 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 2981 not_cleared < PMAP_TS_REFERENCED_MAX); 2982out: 2983 rw_wunlock(lock); 2984 rw_runlock(&pvh_global_lock); 2985 pmap_free_zero_pages(&free); 2986 return (cleared + not_cleared); 2987} 2988 2989/* 2990 * Apply the given advice to the specified range of addresses within the 2991 * given pmap. Depending on the advice, clear the referenced and/or 2992 * modified flags in each mapping and set the mapped page's dirty field. 2993 */ 2994void 2995pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 2996{ 2997} 2998 2999/* 3000 * Clear the modify bits on the specified physical page. 3001 */ 3002void 3003pmap_clear_modify(vm_page_t m) 3004{ 3005 3006 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3007 ("pmap_clear_modify: page %p is not managed", m)); 3008 VM_OBJECT_ASSERT_WLOCKED(m->object); 3009 KASSERT(!vm_page_xbusied(m), 3010 ("pmap_clear_modify: page %p is exclusive busied", m)); 3011 3012 /* 3013 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 3014 * If the object containing the page is locked and the page is not 3015 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 3016 */ 3017 if ((m->aflags & PGA_WRITEABLE) == 0) 3018 return; 3019 3020 /* ARM64TODO: We lack support for tracking if a page is modified */ 3021} 3022 3023void * 3024pmap_mapbios(vm_paddr_t pa, vm_size_t size) 3025{ 3026 3027 return ((void *)PHYS_TO_DMAP(pa)); 3028} 3029 3030void 3031pmap_unmapbios(vm_paddr_t pa, vm_size_t size) 3032{ 3033} 3034 3035/* 3036 * Sets the memory attribute for the specified page. 3037 */ 3038void 3039pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3040{ 3041 3042 m->md.pv_memattr = ma; 3043 3044 /* 3045 * ARM64TODO: Implement the below (from the amd64 pmap) 3046 * If "m" is a normal page, update its direct mapping. This update 3047 * can be relied upon to perform any cache operations that are 3048 * required for data coherence. 3049 */ 3050 if ((m->flags & PG_FICTITIOUS) == 0 && 3051 PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) 3052 panic("ARM64TODO: pmap_page_set_memattr"); 3053} 3054 3055/* 3056 * perform the pmap work for mincore 3057 */ 3058int 3059pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 3060{ 3061 pd_entry_t *l1p, l1; 3062 pd_entry_t *l2p, l2; 3063 pt_entry_t *l3p, l3; 3064 vm_paddr_t pa; 3065 bool managed; 3066 int val; 3067 3068 PMAP_LOCK(pmap); 3069retry: 3070 pa = 0; 3071 val = 0; 3072 managed = false; 3073 3074 l1p = pmap_l1(pmap, addr); 3075 if (l1p == NULL) /* No l1 */ 3076 goto done; 3077 3078 l1 = pmap_load(l1p); 3079 if ((l1 & ATTR_DESCR_MASK) == L1_INVAL) 3080 goto done; 3081 3082 if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) { 3083 pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET); 3084 managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3085 val = MINCORE_SUPER | MINCORE_INCORE; 3086 if (pmap_page_dirty(l1)) 3087 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3088 if ((l1 & ATTR_AF) == ATTR_AF) 3089 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3090 goto done; 3091 } 3092 3093 l2p = pmap_l1_to_l2(l1p, addr); 3094 if (l2p == NULL) /* No l2 */ 3095 goto done; 3096 3097 l2 = pmap_load(l2p); 3098 if ((l2 & ATTR_DESCR_MASK) == L2_INVAL) 3099 goto done; 3100 3101 if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) { 3102 pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET); 3103 managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3104 val = MINCORE_SUPER | MINCORE_INCORE; 3105 if (pmap_page_dirty(l2)) 3106 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3107 if ((l2 & ATTR_AF) == ATTR_AF) 3108 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3109 goto done; 3110 } 3111 3112 l3p = pmap_l2_to_l3(l2p, addr); 3113 if (l3p == NULL) /* No l3 */ 3114 goto done; 3115 3116 l3 = pmap_load(l2p); 3117 if ((l3 & ATTR_DESCR_MASK) == L3_INVAL) 3118 goto done; 3119 3120 if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) { 3121 pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET); 3122 managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3123 val = MINCORE_INCORE; 3124 if (pmap_page_dirty(l3)) 3125 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3126 if ((l3 & ATTR_AF) == ATTR_AF) 3127 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3128 } 3129 3130done: 3131 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 3132 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 3133 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 3134 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 3135 goto retry; 3136 } else 3137 PA_UNLOCK_COND(*locked_pa); 3138 PMAP_UNLOCK(pmap); 3139 3140 return (val); 3141} 3142 3143void 3144pmap_activate(struct thread *td) 3145{ 3146 pmap_t pmap; 3147 3148 critical_enter(); 3149 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3150 td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); 3151 __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l1addr)); 3152 pmap_invalidate_all(pmap); 3153 critical_exit(); 3154} 3155 3156void 3157pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 3158{ 3159 3160 if (va >= VM_MIN_KERNEL_ADDRESS) { 3161 cpu_icache_sync_range(va, sz); 3162 } else { 3163 u_int len, offset; 3164 vm_paddr_t pa; 3165 3166 /* Find the length of data in this page to flush */ 3167 offset = va & PAGE_MASK; 3168 len = imin(PAGE_SIZE - offset, sz); 3169 3170 while (sz != 0) { 3171 /* Extract the physical address & find it in the DMAP */ 3172 pa = pmap_extract(pmap, va); 3173 if (pa != 0) 3174 cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); 3175 3176 /* Move to the next page */ 3177 sz -= len; 3178 va += len; 3179 /* Set the length for the next iteration */ 3180 len = imin(PAGE_SIZE, sz); 3181 } 3182 } 3183} 3184 3185/* 3186 * Increase the starting virtual address of the given mapping if a 3187 * different alignment might result in more superpage mappings. 3188 */ 3189void 3190pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 3191 vm_offset_t *addr, vm_size_t size) 3192{ 3193} 3194 3195/** 3196 * Get the kernel virtual address of a set of physical pages. If there are 3197 * physical addresses not covered by the DMAP perform a transient mapping 3198 * that will be removed when calling pmap_unmap_io_transient. 3199 * 3200 * \param page The pages the caller wishes to obtain the virtual 3201 * address on the kernel memory map. 3202 * \param vaddr On return contains the kernel virtual memory address 3203 * of the pages passed in the page parameter. 3204 * \param count Number of pages passed in. 3205 * \param can_fault TRUE if the thread using the mapped pages can take 3206 * page faults, FALSE otherwise. 3207 * 3208 * \returns TRUE if the caller must call pmap_unmap_io_transient when 3209 * finished or FALSE otherwise. 3210 * 3211 */ 3212boolean_t 3213pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3214 boolean_t can_fault) 3215{ 3216 vm_paddr_t paddr; 3217 boolean_t needs_mapping; 3218 int error, i; 3219 3220 /* 3221 * Allocate any KVA space that we need, this is done in a separate 3222 * loop to prevent calling vmem_alloc while pinned. 3223 */ 3224 needs_mapping = FALSE; 3225 for (i = 0; i < count; i++) { 3226 paddr = VM_PAGE_TO_PHYS(page[i]); 3227 if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { 3228 error = vmem_alloc(kernel_arena, PAGE_SIZE, 3229 M_BESTFIT | M_WAITOK, &vaddr[i]); 3230 KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 3231 needs_mapping = TRUE; 3232 } else { 3233 vaddr[i] = PHYS_TO_DMAP(paddr); 3234 } 3235 } 3236 3237 /* Exit early if everything is covered by the DMAP */ 3238 if (!needs_mapping) 3239 return (FALSE); 3240 3241 if (!can_fault) 3242 sched_pin(); 3243 for (i = 0; i < count; i++) { 3244 paddr = VM_PAGE_TO_PHYS(page[i]); 3245 if (paddr >= DMAP_MAX_PHYSADDR) { 3246 panic( 3247 "pmap_map_io_transient: TODO: Map out of DMAP data"); 3248 } 3249 } 3250 3251 return (needs_mapping); 3252} 3253 3254void 3255pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3256 boolean_t can_fault) 3257{ 3258 vm_paddr_t paddr; 3259 int i; 3260 3261 if (!can_fault) 3262 sched_unpin(); 3263 for (i = 0; i < count; i++) { 3264 paddr = VM_PAGE_TO_PHYS(page[i]); 3265 if (paddr >= DMAP_MAX_PHYSADDR) { 3266 panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); 3267 } 3268 } 3269} 3270