pmap.c revision 282221
1281494Sandrew/*- 2281494Sandrew * Copyright (c) 1991 Regents of the University of California. 3281494Sandrew * All rights reserved. 4281494Sandrew * Copyright (c) 1994 John S. Dyson 5281494Sandrew * All rights reserved. 6281494Sandrew * Copyright (c) 1994 David Greenman 7281494Sandrew * All rights reserved. 8281494Sandrew * Copyright (c) 2003 Peter Wemm 9281494Sandrew * All rights reserved. 10281494Sandrew * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11281494Sandrew * All rights reserved. 12281494Sandrew * Copyright (c) 2014 Andrew Turner 13281494Sandrew * All rights reserved. 14281494Sandrew * Copyright (c) 2014 The FreeBSD Foundation 15281494Sandrew * All rights reserved. 16281494Sandrew * 17281494Sandrew * This code is derived from software contributed to Berkeley by 18281494Sandrew * the Systems Programming Group of the University of Utah Computer 19281494Sandrew * Science Department and William Jolitz of UUNET Technologies Inc. 20281494Sandrew * 21281494Sandrew * This software was developed by Andrew Turner under sponsorship from 22281494Sandrew * the FreeBSD Foundation. 23281494Sandrew * 24281494Sandrew * Redistribution and use in source and binary forms, with or without 25281494Sandrew * modification, are permitted provided that the following conditions 26281494Sandrew * are met: 27281494Sandrew * 1. Redistributions of source code must retain the above copyright 28281494Sandrew * notice, this list of conditions and the following disclaimer. 29281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright 30281494Sandrew * notice, this list of conditions and the following disclaimer in the 31281494Sandrew * documentation and/or other materials provided with the distribution. 32281494Sandrew * 3. All advertising materials mentioning features or use of this software 33281494Sandrew * must display the following acknowledgement: 34281494Sandrew * This product includes software developed by the University of 35281494Sandrew * California, Berkeley and its contributors. 36281494Sandrew * 4. Neither the name of the University nor the names of its contributors 37281494Sandrew * may be used to endorse or promote products derived from this software 38281494Sandrew * without specific prior written permission. 39281494Sandrew * 40281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43281494Sandrew * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50281494Sandrew * SUCH DAMAGE. 51281494Sandrew * 52281494Sandrew * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53281494Sandrew */ 54281494Sandrew/*- 55281494Sandrew * Copyright (c) 2003 Networks Associates Technology, Inc. 56281494Sandrew * All rights reserved. 57281494Sandrew * 58281494Sandrew * This software was developed for the FreeBSD Project by Jake Burkholder, 59281494Sandrew * Safeport Network Services, and Network Associates Laboratories, the 60281494Sandrew * Security Research Division of Network Associates, Inc. under 61281494Sandrew * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62281494Sandrew * CHATS research program. 63281494Sandrew * 64281494Sandrew * Redistribution and use in source and binary forms, with or without 65281494Sandrew * modification, are permitted provided that the following conditions 66281494Sandrew * are met: 67281494Sandrew * 1. Redistributions of source code must retain the above copyright 68281494Sandrew * notice, this list of conditions and the following disclaimer. 69281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright 70281494Sandrew * notice, this list of conditions and the following disclaimer in the 71281494Sandrew * documentation and/or other materials provided with the distribution. 72281494Sandrew * 73281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76281494Sandrew * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83281494Sandrew * SUCH DAMAGE. 84281494Sandrew */ 85281494Sandrew 86281494Sandrew#define AMD64_NPT_AWARE 87281494Sandrew 88281494Sandrew#include <sys/cdefs.h> 89281494Sandrew__FBSDID("$FreeBSD: head/sys/arm64/arm64/pmap.c 282221 2015-04-29 15:00:43Z andrew $"); 90281494Sandrew 91281494Sandrew/* 92281494Sandrew * Manages physical address maps. 93281494Sandrew * 94281494Sandrew * Since the information managed by this module is 95281494Sandrew * also stored by the logical address mapping module, 96281494Sandrew * this module may throw away valid virtual-to-physical 97281494Sandrew * mappings at almost any time. However, invalidations 98281494Sandrew * of virtual-to-physical mappings must be done as 99281494Sandrew * requested. 100281494Sandrew * 101281494Sandrew * In order to cope with hardware architectures which 102281494Sandrew * make virtual-to-physical map invalidates expensive, 103281494Sandrew * this module may delay invalidate or reduced protection 104281494Sandrew * operations until such time as they are actually 105281494Sandrew * necessary. This module is given full information as 106281494Sandrew * to which processors are currently using which maps, 107281494Sandrew * and to when physical maps must be made correct. 108281494Sandrew */ 109281494Sandrew 110281494Sandrew#include <sys/param.h> 111281494Sandrew#include <sys/bus.h> 112281494Sandrew#include <sys/systm.h> 113281494Sandrew#include <sys/kernel.h> 114281494Sandrew#include <sys/ktr.h> 115281494Sandrew#include <sys/lock.h> 116281494Sandrew#include <sys/malloc.h> 117281494Sandrew#include <sys/mman.h> 118281494Sandrew#include <sys/msgbuf.h> 119281494Sandrew#include <sys/mutex.h> 120281494Sandrew#include <sys/proc.h> 121281494Sandrew#include <sys/rwlock.h> 122281494Sandrew#include <sys/sx.h> 123281494Sandrew#include <sys/vmem.h> 124281494Sandrew#include <sys/vmmeter.h> 125281494Sandrew#include <sys/sched.h> 126281494Sandrew#include <sys/sysctl.h> 127281494Sandrew#include <sys/_unrhdr.h> 128281494Sandrew#include <sys/smp.h> 129281494Sandrew 130281494Sandrew#include <vm/vm.h> 131281494Sandrew#include <vm/vm_param.h> 132281494Sandrew#include <vm/vm_kern.h> 133281494Sandrew#include <vm/vm_page.h> 134281494Sandrew#include <vm/vm_map.h> 135281494Sandrew#include <vm/vm_object.h> 136281494Sandrew#include <vm/vm_extern.h> 137281494Sandrew#include <vm/vm_pageout.h> 138281494Sandrew#include <vm/vm_pager.h> 139281494Sandrew#include <vm/vm_radix.h> 140281494Sandrew#include <vm/vm_reserv.h> 141281494Sandrew#include <vm/uma.h> 142281494Sandrew 143281494Sandrew#include <machine/machdep.h> 144281494Sandrew#include <machine/md_var.h> 145281494Sandrew#include <machine/pcb.h> 146281494Sandrew 147281494Sandrew#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) 148281494Sandrew#define NUPDE (NPDEPG * NPDEPG) 149281494Sandrew#define NUSERPGTBLS (NUPDE + NPDEPG) 150281494Sandrew 151281494Sandrew#if !defined(DIAGNOSTIC) 152281494Sandrew#ifdef __GNUC_GNU_INLINE__ 153281494Sandrew#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 154281494Sandrew#else 155281494Sandrew#define PMAP_INLINE extern inline 156281494Sandrew#endif 157281494Sandrew#else 158281494Sandrew#define PMAP_INLINE 159281494Sandrew#endif 160281494Sandrew 161281494Sandrew/* 162281494Sandrew * These are configured by the mair_el1 register. This is set up in locore.S 163281494Sandrew */ 164281494Sandrew#define DEVICE_MEMORY 0 165281494Sandrew#define UNCACHED_MEMORY 1 166281494Sandrew#define CACHED_MEMORY 2 167281494Sandrew 168281494Sandrew 169281494Sandrew#ifdef PV_STATS 170281494Sandrew#define PV_STAT(x) do { x ; } while (0) 171281494Sandrew#else 172281494Sandrew#define PV_STAT(x) do { } while (0) 173281494Sandrew#endif 174281494Sandrew 175281494Sandrew#define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 176281494Sandrew 177281494Sandrew#define NPV_LIST_LOCKS MAXCPU 178281494Sandrew 179281494Sandrew#define PHYS_TO_PV_LIST_LOCK(pa) \ 180281494Sandrew (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 181281494Sandrew 182281494Sandrew#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 183281494Sandrew struct rwlock **_lockp = (lockp); \ 184281494Sandrew struct rwlock *_new_lock; \ 185281494Sandrew \ 186281494Sandrew _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 187281494Sandrew if (_new_lock != *_lockp) { \ 188281494Sandrew if (*_lockp != NULL) \ 189281494Sandrew rw_wunlock(*_lockp); \ 190281494Sandrew *_lockp = _new_lock; \ 191281494Sandrew rw_wlock(*_lockp); \ 192281494Sandrew } \ 193281494Sandrew} while (0) 194281494Sandrew 195281494Sandrew#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 196281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 197281494Sandrew 198281494Sandrew#define RELEASE_PV_LIST_LOCK(lockp) do { \ 199281494Sandrew struct rwlock **_lockp = (lockp); \ 200281494Sandrew \ 201281494Sandrew if (*_lockp != NULL) { \ 202281494Sandrew rw_wunlock(*_lockp); \ 203281494Sandrew *_lockp = NULL; \ 204281494Sandrew } \ 205281494Sandrew} while (0) 206281494Sandrew 207281494Sandrew#define VM_PAGE_TO_PV_LIST_LOCK(m) \ 208281494Sandrew PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 209281494Sandrew 210281494Sandrewstruct pmap kernel_pmap_store; 211281494Sandrew 212281494Sandrewvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 213281494Sandrewvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 214281494Sandrewvm_offset_t kernel_vm_end = 0; 215281494Sandrew 216281494Sandrewstruct msgbuf *msgbufp = NULL; 217281494Sandrew 218281494Sandrewstatic struct rwlock_padalign pvh_global_lock; 219281494Sandrew 220281494Sandrew/* 221281494Sandrew * Data for the pv entry allocation mechanism 222281494Sandrew */ 223281494Sandrewstatic TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 224281494Sandrewstatic struct mtx pv_chunks_mutex; 225281494Sandrewstatic struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 226281494Sandrew 227281494Sandrewstatic void free_pv_chunk(struct pv_chunk *pc); 228281494Sandrewstatic void free_pv_entry(pmap_t pmap, pv_entry_t pv); 229281494Sandrewstatic pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 230281494Sandrewstatic vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 231281494Sandrewstatic void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 232281494Sandrewstatic pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 233281494Sandrew vm_offset_t va); 234281494Sandrewstatic vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 235281494Sandrew vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 236281494Sandrewstatic int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 237281494Sandrew pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 238281494Sandrewstatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 239281494Sandrew vm_page_t m, struct rwlock **lockp); 240281494Sandrew 241281494Sandrewstatic vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 242281494Sandrew struct rwlock **lockp); 243281494Sandrew 244281494Sandrewstatic void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 245281494Sandrew struct spglist *free); 246281494Sandrewstatic int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 247281494Sandrew 248281494Sandrew/********************/ 249281494Sandrew/* Inline functions */ 250281494Sandrew/********************/ 251281494Sandrew 252281494Sandrewstatic __inline void 253281494Sandrewpagecopy(void *s, void *d) 254281494Sandrew{ 255281494Sandrew 256281494Sandrew memcpy(d, s, PAGE_SIZE); 257281494Sandrew} 258281494Sandrew 259281494Sandrewstatic __inline void 260281494Sandrewpagezero(void *p) 261281494Sandrew{ 262281494Sandrew 263281494Sandrew bzero(p, PAGE_SIZE); 264281494Sandrew} 265281494Sandrew 266281494Sandrew#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 267281494Sandrew#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 268281494Sandrew#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 269281494Sandrew 270281494Sandrewstatic __inline pd_entry_t * 271281494Sandrewpmap_l1(pmap_t pmap, vm_offset_t va) 272281494Sandrew{ 273281494Sandrew 274281494Sandrew return (&pmap->pm_l1[pmap_l1_index(va)]); 275281494Sandrew} 276281494Sandrew 277281494Sandrewstatic __inline pd_entry_t * 278281494Sandrewpmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 279281494Sandrew{ 280281494Sandrew pd_entry_t *l2; 281281494Sandrew 282281494Sandrew l2 = (pd_entry_t *)PHYS_TO_DMAP(*l1 & ~ATTR_MASK); 283281494Sandrew return (&l2[pmap_l2_index(va)]); 284281494Sandrew} 285281494Sandrew 286281494Sandrewstatic __inline pd_entry_t * 287281494Sandrewpmap_l2(pmap_t pmap, vm_offset_t va) 288281494Sandrew{ 289281494Sandrew pd_entry_t *l1; 290281494Sandrew 291281494Sandrew l1 = pmap_l1(pmap, va); 292281494Sandrew if ((*l1 & ATTR_DESCR_MASK) != L1_TABLE) 293281494Sandrew return (NULL); 294281494Sandrew 295281494Sandrew return (pmap_l1_to_l2(l1, va)); 296281494Sandrew} 297281494Sandrew 298281494Sandrewstatic __inline pt_entry_t * 299281494Sandrewpmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 300281494Sandrew{ 301281494Sandrew pt_entry_t *l3; 302281494Sandrew 303281494Sandrew l3 = (pd_entry_t *)PHYS_TO_DMAP(*l2 & ~ATTR_MASK); 304281494Sandrew return (&l3[pmap_l3_index(va)]); 305281494Sandrew} 306281494Sandrew 307281494Sandrewstatic __inline pt_entry_t * 308281494Sandrewpmap_l3(pmap_t pmap, vm_offset_t va) 309281494Sandrew{ 310281494Sandrew pd_entry_t *l2; 311281494Sandrew 312281494Sandrew l2 = pmap_l2(pmap, va); 313281494Sandrew if (l2 == NULL || (*l2 & ATTR_DESCR_MASK) != L2_TABLE) 314281494Sandrew return (NULL); 315281494Sandrew 316281494Sandrew return (pmap_l2_to_l3(l2, va)); 317281494Sandrew} 318281494Sandrew 319281494Sandrew/* 320281494Sandrew * These load the old table data and store the new value. 321281494Sandrew * They need to be atomic as the System MMU may write to the table at 322281494Sandrew * the same time as the CPU. 323281494Sandrew */ 324281494Sandrew#define pmap_load_store(table, entry) atomic_swap_64(table, entry) 325281494Sandrew#define pmap_set(table, mask) atomic_set_64(table, mask) 326281494Sandrew#define pmap_load_clear(table) atomic_swap_64(table, 0) 327281494Sandrew#define pmap_load(table) (*table) 328281494Sandrew 329281494Sandrewstatic __inline int 330281494Sandrewpmap_is_current(pmap_t pmap) 331281494Sandrew{ 332281494Sandrew 333281494Sandrew return ((pmap == pmap_kernel()) || 334281494Sandrew (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 335281494Sandrew} 336281494Sandrew 337281494Sandrewstatic __inline int 338281494Sandrewpmap_l3_valid(pt_entry_t l3) 339281494Sandrew{ 340281494Sandrew 341281494Sandrew return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 342281494Sandrew} 343281494Sandrew 344281494Sandrewstatic __inline int 345281494Sandrewpmap_l3_valid_cacheable(pt_entry_t l3) 346281494Sandrew{ 347281494Sandrew 348281494Sandrew return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && 349281494Sandrew ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 350281494Sandrew} 351281494Sandrew 352281494Sandrew#define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 353281494Sandrew 354281494Sandrew/* 355281494Sandrew * Checks if the page is dirty. We currently lack proper tracking of this on 356281494Sandrew * arm64 so for now assume is a page mapped as rw was accessed it is. 357281494Sandrew */ 358281494Sandrewstatic inline int 359281494Sandrewpmap_page_dirty(pt_entry_t pte) 360281494Sandrew{ 361281494Sandrew 362281494Sandrew return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 363281494Sandrew (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 364281494Sandrew} 365281494Sandrew 366281494Sandrewstatic __inline void 367281494Sandrewpmap_resident_count_inc(pmap_t pmap, int count) 368281494Sandrew{ 369281494Sandrew 370281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 371281494Sandrew pmap->pm_stats.resident_count += count; 372281494Sandrew} 373281494Sandrew 374281494Sandrewstatic __inline void 375281494Sandrewpmap_resident_count_dec(pmap_t pmap, int count) 376281494Sandrew{ 377281494Sandrew 378281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 379281494Sandrew KASSERT(pmap->pm_stats.resident_count >= count, 380281494Sandrew ("pmap %p resident count underflow %ld %d", pmap, 381281494Sandrew pmap->pm_stats.resident_count, count)); 382281494Sandrew pmap->pm_stats.resident_count -= count; 383281494Sandrew} 384281494Sandrew 385281494Sandrewstatic pt_entry_t * 386281494Sandrewpmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 387281494Sandrew u_int *l2_slot) 388281494Sandrew{ 389281494Sandrew pt_entry_t *l2; 390281494Sandrew pd_entry_t *l1; 391281494Sandrew 392281494Sandrew l1 = (pd_entry_t *)l1pt; 393281494Sandrew *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 394281494Sandrew 395281494Sandrew /* Check locore has used a table L1 map */ 396281494Sandrew KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 397281494Sandrew ("Invalid bootstrap L1 table")); 398281494Sandrew /* Find the address of the L2 table */ 399281494Sandrew l2 = (pt_entry_t *)init_pt_va; 400281494Sandrew *l2_slot = pmap_l2_index(va); 401281494Sandrew 402281494Sandrew return (l2); 403281494Sandrew} 404281494Sandrew 405281494Sandrewstatic vm_paddr_t 406281494Sandrewpmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 407281494Sandrew{ 408281494Sandrew u_int l1_slot, l2_slot; 409281494Sandrew pt_entry_t *l2; 410281494Sandrew 411281494Sandrew l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 412281494Sandrew 413281494Sandrew return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 414281494Sandrew} 415281494Sandrew 416281494Sandrewstatic void 417281494Sandrewpmap_bootstrap_dmap(vm_offset_t l1pt) 418281494Sandrew{ 419281494Sandrew vm_offset_t va; 420281494Sandrew vm_paddr_t pa; 421281494Sandrew pd_entry_t *l1; 422281494Sandrew u_int l1_slot; 423281494Sandrew 424281494Sandrew va = DMAP_MIN_ADDRESS; 425281494Sandrew l1 = (pd_entry_t *)l1pt; 426281494Sandrew l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); 427281494Sandrew 428281494Sandrew for (pa = 0; va < DMAP_MAX_ADDRESS; 429281494Sandrew pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 430281494Sandrew KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 431281494Sandrew 432281494Sandrew /* 433281494Sandrew * TODO: Turn the cache on here when we have cache 434281494Sandrew * flushing code. 435281494Sandrew */ 436281494Sandrew pmap_load_store(&l1[l1_slot], 437281494Sandrew (pa & ~L1_OFFSET) | ATTR_AF | L1_BLOCK | 438281494Sandrew ATTR_IDX(CACHED_MEMORY)); 439281494Sandrew } 440281494Sandrew 441281494Sandrew cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 442281494Sandrew cpu_tlb_flushID(); 443281494Sandrew} 444281494Sandrew 445281494Sandrewstatic vm_offset_t 446281494Sandrewpmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 447281494Sandrew{ 448281494Sandrew vm_offset_t l2pt; 449281494Sandrew vm_paddr_t pa; 450281494Sandrew pd_entry_t *l1; 451281494Sandrew u_int l1_slot; 452281494Sandrew 453281494Sandrew KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 454281494Sandrew 455281494Sandrew l1 = (pd_entry_t *)l1pt; 456281494Sandrew l1_slot = pmap_l1_index(va); 457281494Sandrew l2pt = l2_start; 458281494Sandrew 459281494Sandrew for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 460281494Sandrew KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 461281494Sandrew 462281494Sandrew pa = pmap_early_vtophys(l1pt, l2pt); 463281494Sandrew pmap_load_store(&l1[l1_slot], 464281494Sandrew (pa & ~Ln_TABLE_MASK) | L1_TABLE); 465281494Sandrew l2pt += PAGE_SIZE; 466281494Sandrew } 467281494Sandrew 468281494Sandrew /* Clean the L2 page table */ 469281494Sandrew memset((void *)l2_start, 0, l2pt - l2_start); 470281494Sandrew cpu_dcache_wb_range(l2_start, l2pt - l2_start); 471281494Sandrew 472281494Sandrew /* Flush the l1 table to ram */ 473281494Sandrew cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 474281494Sandrew 475281494Sandrew return l2pt; 476281494Sandrew} 477281494Sandrew 478281494Sandrewstatic vm_offset_t 479281494Sandrewpmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 480281494Sandrew{ 481281494Sandrew vm_offset_t l2pt, l3pt; 482281494Sandrew vm_paddr_t pa; 483281494Sandrew pd_entry_t *l2; 484281494Sandrew u_int l2_slot; 485281494Sandrew 486281494Sandrew KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 487281494Sandrew 488281494Sandrew l2 = pmap_l2(kernel_pmap, va); 489281494Sandrew l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); 490281494Sandrew l2pt = (vm_offset_t)l2; 491281494Sandrew l2_slot = pmap_l2_index(va); 492281494Sandrew l3pt = l3_start; 493281494Sandrew 494281494Sandrew for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 495281494Sandrew KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 496281494Sandrew 497281494Sandrew pa = pmap_early_vtophys(l1pt, l3pt); 498281494Sandrew pmap_load_store(&l2[l2_slot], 499281494Sandrew (pa & ~Ln_TABLE_MASK) | L2_TABLE); 500281494Sandrew l3pt += PAGE_SIZE; 501281494Sandrew } 502281494Sandrew 503281494Sandrew /* Clean the L2 page table */ 504281494Sandrew memset((void *)l3_start, 0, l3pt - l3_start); 505281494Sandrew cpu_dcache_wb_range(l3_start, l3pt - l3_start); 506281494Sandrew 507281494Sandrew cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 508281494Sandrew 509281494Sandrew return l3pt; 510281494Sandrew} 511281494Sandrew 512281494Sandrew/* 513281494Sandrew * Bootstrap the system enough to run with virtual memory. 514281494Sandrew */ 515281494Sandrewvoid 516281494Sandrewpmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) 517281494Sandrew{ 518281494Sandrew u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 519281494Sandrew uint64_t kern_delta; 520281494Sandrew pt_entry_t *l2; 521281494Sandrew vm_offset_t va, freemempos; 522281494Sandrew vm_offset_t dpcpu, msgbufpv; 523281494Sandrew vm_paddr_t pa; 524281494Sandrew 525281494Sandrew kern_delta = KERNBASE - kernstart; 526281494Sandrew physmem = 0; 527281494Sandrew 528281494Sandrew printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 529281494Sandrew printf("%lx\n", l1pt); 530281494Sandrew printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 531281494Sandrew 532281494Sandrew /* Set this early so we can use the pagetable walking functions */ 533281494Sandrew kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; 534281494Sandrew PMAP_LOCK_INIT(kernel_pmap); 535281494Sandrew 536281494Sandrew /* 537281494Sandrew * Initialize the global pv list lock. 538281494Sandrew */ 539281494Sandrew rw_init(&pvh_global_lock, "pmap pv global"); 540281494Sandrew 541281494Sandrew /* Create a direct map region early so we can use it for pa -> va */ 542281494Sandrew pmap_bootstrap_dmap(l1pt); 543281494Sandrew 544281494Sandrew va = KERNBASE; 545281494Sandrew pa = KERNBASE - kern_delta; 546281494Sandrew 547281494Sandrew /* 548281494Sandrew * Start to initialise phys_avail by copying from physmap 549281494Sandrew * up to the physical address KERNBASE points at. 550281494Sandrew */ 551281494Sandrew map_slot = avail_slot = 0; 552281494Sandrew for (; map_slot < (physmap_idx * 2); map_slot += 2) { 553281494Sandrew if (physmap[map_slot] == physmap[map_slot + 1]) 554281494Sandrew continue; 555281494Sandrew 556281494Sandrew if (physmap[map_slot] <= pa && 557281494Sandrew physmap[map_slot + 1] > pa) 558281494Sandrew break; 559281494Sandrew 560281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 561281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 562281494Sandrew physmem += (phys_avail[avail_slot + 1] - 563281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 564281494Sandrew avail_slot += 2; 565281494Sandrew } 566281494Sandrew 567281494Sandrew /* Add the memory before the kernel */ 568281494Sandrew if (physmap[avail_slot] < pa) { 569281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 570281494Sandrew phys_avail[avail_slot + 1] = pa; 571281494Sandrew physmem += (phys_avail[avail_slot + 1] - 572281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 573281494Sandrew avail_slot += 2; 574281494Sandrew } 575281494Sandrew used_map_slot = map_slot; 576281494Sandrew 577281494Sandrew /* 578281494Sandrew * Read the page table to find out what is already mapped. 579281494Sandrew * This assumes we have mapped a block of memory from KERNBASE 580281494Sandrew * using a single L1 entry. 581281494Sandrew */ 582281494Sandrew l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 583281494Sandrew 584281494Sandrew /* Sanity check the index, KERNBASE should be the first VA */ 585281494Sandrew KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 586281494Sandrew 587281494Sandrew /* Find how many pages we have mapped */ 588281494Sandrew for (; l2_slot < Ln_ENTRIES; l2_slot++) { 589281494Sandrew if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 590281494Sandrew break; 591281494Sandrew 592281494Sandrew /* Check locore used L2 blocks */ 593281494Sandrew KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 594281494Sandrew ("Invalid bootstrap L2 table")); 595281494Sandrew KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 596281494Sandrew ("Incorrect PA in L2 table")); 597281494Sandrew 598281494Sandrew va += L2_SIZE; 599281494Sandrew pa += L2_SIZE; 600281494Sandrew } 601281494Sandrew 602281494Sandrew va = roundup2(va, L1_SIZE); 603281494Sandrew 604281494Sandrew freemempos = KERNBASE + kernlen; 605281494Sandrew freemempos = roundup2(freemempos, PAGE_SIZE); 606281494Sandrew /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 607281494Sandrew freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 608281494Sandrew /* And the l3 tables for the early devmap */ 609281494Sandrew freemempos = pmap_bootstrap_l3(l1pt, 610281494Sandrew VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 611281494Sandrew 612281494Sandrew cpu_tlb_flushID(); 613281494Sandrew 614281494Sandrew#define alloc_pages(var, np) \ 615281494Sandrew (var) = freemempos; \ 616281494Sandrew freemempos += (np * PAGE_SIZE); \ 617281494Sandrew memset((char *)(var), 0, ((np) * PAGE_SIZE)); 618281494Sandrew 619281494Sandrew /* Allocate dynamic per-cpu area. */ 620281494Sandrew alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 621281494Sandrew dpcpu_init((void *)dpcpu, 0); 622281494Sandrew 623281494Sandrew /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 624281494Sandrew alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 625281494Sandrew msgbufp = (void *)msgbufpv; 626281494Sandrew 627281494Sandrew virtual_avail = roundup2(freemempos, L1_SIZE); 628281494Sandrew virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 629281494Sandrew kernel_vm_end = virtual_avail; 630281494Sandrew 631281494Sandrew pa = pmap_early_vtophys(l1pt, freemempos); 632281494Sandrew 633281494Sandrew /* Finish initialising physmap */ 634281494Sandrew map_slot = used_map_slot; 635281494Sandrew for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 636281494Sandrew map_slot < (physmap_idx * 2); map_slot += 2) { 637281494Sandrew if (physmap[map_slot] == physmap[map_slot + 1]) 638281494Sandrew continue; 639281494Sandrew 640281494Sandrew /* Have we used the current range? */ 641281494Sandrew if (physmap[map_slot + 1] <= pa) 642281494Sandrew continue; 643281494Sandrew 644281494Sandrew /* Do we need to split the entry? */ 645281494Sandrew if (physmap[map_slot] < pa) { 646281494Sandrew phys_avail[avail_slot] = pa; 647281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 648281494Sandrew } else { 649281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 650281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 651281494Sandrew } 652281494Sandrew physmem += (phys_avail[avail_slot + 1] - 653281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 654281494Sandrew 655281494Sandrew avail_slot += 2; 656281494Sandrew } 657281494Sandrew phys_avail[avail_slot] = 0; 658281494Sandrew phys_avail[avail_slot + 1] = 0; 659281494Sandrew 660281494Sandrew /* 661281494Sandrew * Maxmem isn't the "maximum memory", it's one larger than the 662281494Sandrew * highest page of the physical address space. It should be 663281494Sandrew * called something like "Maxphyspage". 664281494Sandrew */ 665281494Sandrew Maxmem = atop(phys_avail[avail_slot - 1]); 666281494Sandrew 667281494Sandrew cpu_tlb_flushID(); 668281494Sandrew} 669281494Sandrew 670281494Sandrew/* 671281494Sandrew * Initialize a vm_page's machine-dependent fields. 672281494Sandrew */ 673281494Sandrewvoid 674281494Sandrewpmap_page_init(vm_page_t m) 675281494Sandrew{ 676281494Sandrew 677281494Sandrew TAILQ_INIT(&m->md.pv_list); 678281494Sandrew m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 679281494Sandrew} 680281494Sandrew 681281494Sandrew/* 682281494Sandrew * Initialize the pmap module. 683281494Sandrew * Called by vm_init, to initialize any structures that the pmap 684281494Sandrew * system needs to map virtual memory. 685281494Sandrew */ 686281494Sandrewvoid 687281494Sandrewpmap_init(void) 688281494Sandrew{ 689281494Sandrew int i; 690281494Sandrew 691281494Sandrew /* 692281494Sandrew * Initialize the pv chunk list mutex. 693281494Sandrew */ 694281494Sandrew mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 695281494Sandrew 696281494Sandrew /* 697281494Sandrew * Initialize the pool of pv list locks. 698281494Sandrew */ 699281494Sandrew for (i = 0; i < NPV_LIST_LOCKS; i++) 700281494Sandrew rw_init(&pv_list_locks[i], "pmap pv list"); 701281494Sandrew} 702281494Sandrew 703281494Sandrew/* 704281494Sandrew * Normal, non-SMP, invalidation functions. 705281494Sandrew * We inline these within pmap.c for speed. 706281494Sandrew */ 707281494SandrewPMAP_INLINE void 708281494Sandrewpmap_invalidate_page(pmap_t pmap, vm_offset_t va) 709281494Sandrew{ 710281494Sandrew 711281494Sandrew sched_pin(); 712281494Sandrew __asm __volatile( 713281494Sandrew "dsb sy \n" 714281494Sandrew "tlbi vaae1is, %0 \n" 715281494Sandrew "dsb sy \n" 716281494Sandrew "isb \n" 717281494Sandrew : : "r"(va >> PAGE_SHIFT)); 718281494Sandrew sched_unpin(); 719281494Sandrew} 720281494Sandrew 721281494SandrewPMAP_INLINE void 722281494Sandrewpmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 723281494Sandrew{ 724281494Sandrew vm_offset_t addr; 725281494Sandrew 726281494Sandrew sched_pin(); 727281494Sandrew sva >>= PAGE_SHIFT; 728281494Sandrew eva >>= PAGE_SHIFT; 729281494Sandrew __asm __volatile("dsb sy"); 730281494Sandrew for (addr = sva; addr < eva; addr++) { 731281494Sandrew __asm __volatile( 732281494Sandrew "tlbi vaae1is, %0" : : "r"(addr)); 733281494Sandrew } 734281494Sandrew __asm __volatile( 735281494Sandrew "dsb sy \n" 736281494Sandrew "isb \n"); 737281494Sandrew sched_unpin(); 738281494Sandrew} 739281494Sandrew 740281494SandrewPMAP_INLINE void 741281494Sandrewpmap_invalidate_all(pmap_t pmap) 742281494Sandrew{ 743281494Sandrew 744281494Sandrew sched_pin(); 745281494Sandrew __asm __volatile( 746281494Sandrew "dsb sy \n" 747281494Sandrew "tlbi vmalle1is \n" 748281494Sandrew "dsb sy \n" 749281494Sandrew "isb \n"); 750281494Sandrew sched_unpin(); 751281494Sandrew} 752281494Sandrew 753281494Sandrew/* 754281494Sandrew * Routine: pmap_extract 755281494Sandrew * Function: 756281494Sandrew * Extract the physical page address associated 757281494Sandrew * with the given map/virtual_address pair. 758281494Sandrew */ 759281494Sandrewvm_paddr_t 760281494Sandrewpmap_extract(pmap_t pmap, vm_offset_t va) 761281494Sandrew{ 762281494Sandrew pd_entry_t *l2p, l2; 763281494Sandrew pt_entry_t *l3p, l3; 764281494Sandrew vm_paddr_t pa; 765281494Sandrew 766281494Sandrew pa = 0; 767281494Sandrew PMAP_LOCK(pmap); 768281494Sandrew /* 769281494Sandrew * Start with the l2 tabel. We are unable to allocate 770281494Sandrew * pages in the l1 table. 771281494Sandrew */ 772281494Sandrew l2p = pmap_l2(pmap, va); 773281494Sandrew if (l2p != NULL) { 774281494Sandrew l2 = *l2p; 775281494Sandrew if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) { 776281494Sandrew l3p = pmap_l2_to_l3(l2p, va); 777281494Sandrew if (l3p != NULL) { 778281494Sandrew l3 = *l3p; 779281494Sandrew 780281494Sandrew if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) 781281494Sandrew pa = (l3 & ~ATTR_MASK) | 782281494Sandrew (va & L3_OFFSET); 783281494Sandrew } 784281494Sandrew } else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) 785281494Sandrew pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET); 786281494Sandrew } 787281494Sandrew PMAP_UNLOCK(pmap); 788281494Sandrew return (pa); 789281494Sandrew} 790281494Sandrew 791281494Sandrew/* 792281494Sandrew * Routine: pmap_extract_and_hold 793281494Sandrew * Function: 794281494Sandrew * Atomically extract and hold the physical page 795281494Sandrew * with the given pmap and virtual address pair 796281494Sandrew * if that mapping permits the given protection. 797281494Sandrew */ 798281494Sandrewvm_page_t 799281494Sandrewpmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 800281494Sandrew{ 801281494Sandrew pt_entry_t *l3p, l3; 802281494Sandrew vm_paddr_t pa; 803281494Sandrew vm_page_t m; 804281494Sandrew 805281494Sandrew pa = 0; 806281494Sandrew m = NULL; 807281494Sandrew PMAP_LOCK(pmap); 808281494Sandrewretry: 809281494Sandrew l3p = pmap_l3(pmap, va); 810281494Sandrew if (l3p != NULL && (l3 = *l3p) != 0) { 811281494Sandrew if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 812281494Sandrew ((prot & VM_PROT_WRITE) == 0)) { 813281494Sandrew if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa)) 814281494Sandrew goto retry; 815281494Sandrew m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK); 816281494Sandrew vm_page_hold(m); 817281494Sandrew } 818281494Sandrew } 819281494Sandrew PA_UNLOCK_COND(pa); 820281494Sandrew PMAP_UNLOCK(pmap); 821281494Sandrew return (m); 822281494Sandrew} 823281494Sandrew 824281494Sandrewvm_paddr_t 825281494Sandrewpmap_kextract(vm_offset_t va) 826281494Sandrew{ 827281494Sandrew pd_entry_t *l2; 828281494Sandrew pt_entry_t *l3; 829281494Sandrew vm_paddr_t pa; 830281494Sandrew 831281494Sandrew if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 832281494Sandrew pa = DMAP_TO_PHYS(va); 833281494Sandrew } else { 834281494Sandrew l2 = pmap_l2(kernel_pmap, va); 835281494Sandrew if (l2 == NULL) 836281494Sandrew panic("pmap_kextract: No l2"); 837281494Sandrew if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK) 838281494Sandrew return ((*l2 & ~ATTR_MASK) | (va & L2_OFFSET)); 839281494Sandrew 840281494Sandrew l3 = pmap_l2_to_l3(l2, va); 841281494Sandrew if (l3 == NULL) 842281494Sandrew panic("pmap_kextract: No l3..."); 843281494Sandrew pa = (*l3 & ~ATTR_MASK) | (va & PAGE_MASK); 844281494Sandrew } 845281494Sandrew return (pa); 846281494Sandrew} 847281494Sandrew 848281494Sandrew/*************************************************** 849281494Sandrew * Low level mapping routines..... 850281494Sandrew ***************************************************/ 851281494Sandrew 852281494Sandrewvoid 853281494Sandrewpmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa) 854281494Sandrew{ 855281494Sandrew pt_entry_t *l3; 856281494Sandrew 857281494Sandrew KASSERT((pa & L3_OFFSET) == 0, 858281494Sandrew ("pmap_kenter_device: Invalid physical address")); 859281494Sandrew KASSERT((va & L3_OFFSET) == 0, 860281494Sandrew ("pmap_kenter_device: Invalid virtual address")); 861281494Sandrew KASSERT((size & PAGE_MASK) == 0, 862281494Sandrew ("pmap_kenter_device: Mapping is not page-sized")); 863281494Sandrew 864281494Sandrew while (size != 0) { 865281494Sandrew l3 = pmap_l3(kernel_pmap, va); 866281494Sandrew KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 867281494Sandrew pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_AF | L3_PAGE | 868281494Sandrew ATTR_IDX(DEVICE_MEMORY)); 869281494Sandrew PTE_SYNC(l3); 870281494Sandrew 871281494Sandrew va += PAGE_SIZE; 872281494Sandrew pa += PAGE_SIZE; 873281494Sandrew size -= PAGE_SIZE; 874281494Sandrew } 875281494Sandrew} 876281494Sandrew 877281494Sandrew/* 878281494Sandrew * Remove a page from the kernel pagetables. 879281494Sandrew * Note: not SMP coherent. 880281494Sandrew */ 881281494SandrewPMAP_INLINE void 882281494Sandrewpmap_kremove(vm_offset_t va) 883281494Sandrew{ 884281494Sandrew pt_entry_t *l3; 885281494Sandrew 886281494Sandrew l3 = pmap_l3(kernel_pmap, va); 887281494Sandrew KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 888281494Sandrew 889281494Sandrew if (pmap_l3_valid_cacheable(pmap_load(l3))) 890281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 891281494Sandrew pmap_load_clear(l3); 892281494Sandrew PTE_SYNC(l3); 893281494Sandrew} 894281494Sandrew 895281494Sandrewvoid 896281494Sandrewpmap_kremove_device(vm_offset_t va, vm_size_t size) 897281494Sandrew{ 898281494Sandrew pt_entry_t *l3; 899281494Sandrew 900281494Sandrew KASSERT((va & L3_OFFSET) == 0, 901281494Sandrew ("pmap_kremove_device: Invalid virtual address")); 902281494Sandrew KASSERT((size & PAGE_MASK) == 0, 903281494Sandrew ("pmap_kremove_device: Mapping is not page-sized")); 904281494Sandrew 905281494Sandrew while (size != 0) { 906281494Sandrew l3 = pmap_l3(kernel_pmap, va); 907281494Sandrew KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 908281494Sandrew pmap_load_clear(l3); 909281494Sandrew PTE_SYNC(l3); 910281494Sandrew 911281494Sandrew va += PAGE_SIZE; 912281494Sandrew size -= PAGE_SIZE; 913281494Sandrew } 914281494Sandrew} 915281494Sandrew 916281494Sandrew/* 917281494Sandrew * Used to map a range of physical addresses into kernel 918281494Sandrew * virtual address space. 919281494Sandrew * 920281494Sandrew * The value passed in '*virt' is a suggested virtual address for 921281494Sandrew * the mapping. Architectures which can support a direct-mapped 922281494Sandrew * physical to virtual region can return the appropriate address 923281494Sandrew * within that region, leaving '*virt' unchanged. Other 924281494Sandrew * architectures should map the pages starting at '*virt' and 925281494Sandrew * update '*virt' with the first usable address after the mapped 926281494Sandrew * region. 927281494Sandrew */ 928281494Sandrewvm_offset_t 929281494Sandrewpmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 930281494Sandrew{ 931281494Sandrew return PHYS_TO_DMAP(start); 932281494Sandrew} 933281494Sandrew 934281494Sandrew 935281494Sandrew/* 936281494Sandrew * Add a list of wired pages to the kva 937281494Sandrew * this routine is only used for temporary 938281494Sandrew * kernel mappings that do not need to have 939281494Sandrew * page modification or references recorded. 940281494Sandrew * Note that old mappings are simply written 941281494Sandrew * over. The page *must* be wired. 942281494Sandrew * Note: SMP coherent. Uses a ranged shootdown IPI. 943281494Sandrew */ 944281494Sandrewvoid 945281494Sandrewpmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 946281494Sandrew{ 947281494Sandrew pt_entry_t *l3, pa; 948281494Sandrew vm_offset_t va; 949281494Sandrew vm_page_t m; 950281494Sandrew int i; 951281494Sandrew 952281494Sandrew va = sva; 953281494Sandrew for (i = 0; i < count; i++) { 954281494Sandrew m = ma[i]; 955281494Sandrew pa = VM_PAGE_TO_PHYS(m) | ATTR_AF | 956281494Sandrew ATTR_IDX(m->md.pv_memattr) | ATTR_AP(ATTR_AP_RW) | L3_PAGE; 957281494Sandrew l3 = pmap_l3(kernel_pmap, va); 958281494Sandrew pmap_load_store(l3, pa); 959281494Sandrew PTE_SYNC(l3); 960281494Sandrew 961281494Sandrew va += L3_SIZE; 962281494Sandrew } 963281494Sandrew} 964281494Sandrew 965281494Sandrew/* 966281494Sandrew * This routine tears out page mappings from the 967281494Sandrew * kernel -- it is meant only for temporary mappings. 968281494Sandrew * Note: SMP coherent. Uses a ranged shootdown IPI. 969281494Sandrew */ 970281494Sandrewvoid 971281494Sandrewpmap_qremove(vm_offset_t sva, int count) 972281494Sandrew{ 973281494Sandrew vm_offset_t va; 974281494Sandrew 975281494Sandrew va = sva; 976281494Sandrew while (count-- > 0) { 977281494Sandrew KASSERT(va >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", va)); 978281494Sandrew pmap_kremove(va); 979281494Sandrew va += PAGE_SIZE; 980281494Sandrew } 981281494Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 982281494Sandrew} 983281494Sandrew 984281494Sandrew/*************************************************** 985281494Sandrew * Page table page management routines..... 986281494Sandrew ***************************************************/ 987281494Sandrewstatic __inline void 988281494Sandrewpmap_free_zero_pages(struct spglist *free) 989281494Sandrew{ 990281494Sandrew vm_page_t m; 991281494Sandrew 992281494Sandrew while ((m = SLIST_FIRST(free)) != NULL) { 993281494Sandrew SLIST_REMOVE_HEAD(free, plinks.s.ss); 994281494Sandrew /* Preserve the page's PG_ZERO setting. */ 995281494Sandrew vm_page_free_toq(m); 996281494Sandrew } 997281494Sandrew} 998281494Sandrew 999281494Sandrew/* 1000281494Sandrew * Schedule the specified unused page table page to be freed. Specifically, 1001281494Sandrew * add the page to the specified list of pages that will be released to the 1002281494Sandrew * physical memory manager after the TLB has been updated. 1003281494Sandrew */ 1004281494Sandrewstatic __inline void 1005281494Sandrewpmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1006281494Sandrew boolean_t set_PG_ZERO) 1007281494Sandrew{ 1008281494Sandrew 1009281494Sandrew if (set_PG_ZERO) 1010281494Sandrew m->flags |= PG_ZERO; 1011281494Sandrew else 1012281494Sandrew m->flags &= ~PG_ZERO; 1013281494Sandrew SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1014281494Sandrew} 1015281494Sandrew 1016281494Sandrew/* 1017281494Sandrew * Decrements a page table page's wire count, which is used to record the 1018281494Sandrew * number of valid page table entries within the page. If the wire count 1019281494Sandrew * drops to zero, then the page table page is unmapped. Returns TRUE if the 1020281494Sandrew * page table page was unmapped and FALSE otherwise. 1021281494Sandrew */ 1022281494Sandrewstatic inline boolean_t 1023281494Sandrewpmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1024281494Sandrew{ 1025281494Sandrew 1026281494Sandrew --m->wire_count; 1027281494Sandrew if (m->wire_count == 0) { 1028281494Sandrew _pmap_unwire_l3(pmap, va, m, free); 1029281494Sandrew return (TRUE); 1030281494Sandrew } else 1031281494Sandrew return (FALSE); 1032281494Sandrew} 1033281494Sandrew 1034281494Sandrewstatic void 1035281494Sandrew_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1036281494Sandrew{ 1037281494Sandrew 1038281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1039281494Sandrew /* 1040281494Sandrew * unmap the page table page 1041281494Sandrew */ 1042281494Sandrew if (m->pindex >= NUPDE) { 1043281494Sandrew /* PD page */ 1044281494Sandrew pd_entry_t *l1; 1045281494Sandrew l1 = pmap_l1(pmap, va); 1046281494Sandrew pmap_load_clear(l1); 1047281494Sandrew PTE_SYNC(l1); 1048281494Sandrew } else { 1049281494Sandrew /* PTE page */ 1050281494Sandrew pd_entry_t *l2; 1051281494Sandrew l2 = pmap_l2(pmap, va); 1052281494Sandrew pmap_load_clear(l2); 1053281494Sandrew PTE_SYNC(l2); 1054281494Sandrew } 1055281494Sandrew pmap_resident_count_dec(pmap, 1); 1056281494Sandrew if (m->pindex < NUPDE) { 1057281494Sandrew /* We just released a PT, unhold the matching PD */ 1058281494Sandrew vm_page_t pdpg; 1059281494Sandrew 1060281494Sandrew pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK); 1061281494Sandrew pmap_unwire_l3(pmap, va, pdpg, free); 1062281494Sandrew } 1063281494Sandrew 1064281494Sandrew /* 1065281494Sandrew * This is a release store so that the ordinary store unmapping 1066281494Sandrew * the page table page is globally performed before TLB shoot- 1067281494Sandrew * down is begun. 1068281494Sandrew */ 1069281494Sandrew atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1070281494Sandrew 1071281494Sandrew /* 1072281494Sandrew * Put page on a list so that it is released after 1073281494Sandrew * *ALL* TLB shootdown is done 1074281494Sandrew */ 1075281494Sandrew pmap_add_delayed_free_list(m, free, TRUE); 1076281494Sandrew} 1077281494Sandrew 1078281494Sandrew/* 1079281494Sandrew * After removing an l3 entry, this routine is used to 1080281494Sandrew * conditionally free the page, and manage the hold/wire counts. 1081281494Sandrew */ 1082281494Sandrewstatic int 1083281494Sandrewpmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1084281494Sandrew struct spglist *free) 1085281494Sandrew{ 1086281494Sandrew vm_page_t mpte; 1087281494Sandrew 1088281494Sandrew if (va >= VM_MAXUSER_ADDRESS) 1089281494Sandrew return (0); 1090281494Sandrew KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1091281494Sandrew mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1092281494Sandrew return (pmap_unwire_l3(pmap, va, mpte, free)); 1093281494Sandrew} 1094281494Sandrew 1095281494Sandrewvoid 1096281494Sandrewpmap_pinit0(pmap_t pmap) 1097281494Sandrew{ 1098281494Sandrew 1099281494Sandrew PMAP_LOCK_INIT(pmap); 1100281494Sandrew bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1101281494Sandrew pmap->pm_l1 = kernel_pmap->pm_l1; 1102281494Sandrew} 1103281494Sandrew 1104281494Sandrewint 1105281494Sandrewpmap_pinit(pmap_t pmap) 1106281494Sandrew{ 1107281494Sandrew vm_paddr_t l1phys; 1108281494Sandrew vm_page_t l1pt; 1109281494Sandrew 1110281494Sandrew /* 1111281494Sandrew * allocate the l1 page 1112281494Sandrew */ 1113281494Sandrew while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | 1114281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1115281494Sandrew VM_WAIT; 1116281494Sandrew 1117281494Sandrew l1phys = VM_PAGE_TO_PHYS(l1pt); 1118281494Sandrew pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); 1119281494Sandrew 1120281494Sandrew if ((l1pt->flags & PG_ZERO) == 0) 1121281494Sandrew pagezero(pmap->pm_l1); 1122281494Sandrew 1123281494Sandrew bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1124281494Sandrew 1125281494Sandrew return (1); 1126281494Sandrew} 1127281494Sandrew 1128281494Sandrew/* 1129281494Sandrew * This routine is called if the desired page table page does not exist. 1130281494Sandrew * 1131281494Sandrew * If page table page allocation fails, this routine may sleep before 1132281494Sandrew * returning NULL. It sleeps only if a lock pointer was given. 1133281494Sandrew * 1134281494Sandrew * Note: If a page allocation fails at page table level two or three, 1135281494Sandrew * one or two pages may be held during the wait, only to be released 1136281494Sandrew * afterwards. This conservative approach is easily argued to avoid 1137281494Sandrew * race conditions. 1138281494Sandrew */ 1139281494Sandrewstatic vm_page_t 1140281494Sandrew_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1141281494Sandrew{ 1142281494Sandrew vm_page_t m, /*pdppg, */pdpg; 1143281494Sandrew 1144281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1145281494Sandrew 1146281494Sandrew /* 1147281494Sandrew * Allocate a page table page. 1148281494Sandrew */ 1149281494Sandrew if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1150281494Sandrew VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1151281494Sandrew if (lockp != NULL) { 1152281494Sandrew RELEASE_PV_LIST_LOCK(lockp); 1153281494Sandrew PMAP_UNLOCK(pmap); 1154281494Sandrew rw_runlock(&pvh_global_lock); 1155281494Sandrew VM_WAIT; 1156281494Sandrew rw_rlock(&pvh_global_lock); 1157281494Sandrew PMAP_LOCK(pmap); 1158281494Sandrew } 1159281494Sandrew 1160281494Sandrew /* 1161281494Sandrew * Indicate the need to retry. While waiting, the page table 1162281494Sandrew * page may have been allocated. 1163281494Sandrew */ 1164281494Sandrew return (NULL); 1165281494Sandrew } 1166281494Sandrew if ((m->flags & PG_ZERO) == 0) 1167281494Sandrew pmap_zero_page(m); 1168281494Sandrew 1169281494Sandrew /* 1170281494Sandrew * Map the pagetable page into the process address space, if 1171281494Sandrew * it isn't already there. 1172281494Sandrew */ 1173281494Sandrew 1174281494Sandrew if (ptepindex >= NUPDE) { 1175281494Sandrew pd_entry_t *l1; 1176281494Sandrew vm_pindex_t l1index; 1177281494Sandrew 1178281494Sandrew l1index = ptepindex - NUPDE; 1179281494Sandrew l1 = &pmap->pm_l1[l1index]; 1180281494Sandrew pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1181281494Sandrew PTE_SYNC(l1); 1182281494Sandrew 1183281494Sandrew } else { 1184281494Sandrew vm_pindex_t l1index; 1185281494Sandrew pd_entry_t *l1, *l2; 1186281494Sandrew 1187281494Sandrew l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); 1188281494Sandrew l1 = &pmap->pm_l1[l1index]; 1189281494Sandrew if (*l1 == 0) { 1190281494Sandrew /* recurse for allocating page dir */ 1191281494Sandrew if (_pmap_alloc_l3(pmap, NUPDE + l1index, 1192281494Sandrew lockp) == NULL) { 1193281494Sandrew --m->wire_count; 1194281494Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1195281494Sandrew vm_page_free_zero(m); 1196281494Sandrew return (NULL); 1197281494Sandrew } 1198281494Sandrew } else { 1199281494Sandrew pdpg = PHYS_TO_VM_PAGE(*l1 & ~ATTR_MASK); 1200281494Sandrew pdpg->wire_count++; 1201281494Sandrew } 1202281494Sandrew 1203281494Sandrew l2 = (pd_entry_t *)PHYS_TO_DMAP(*l1 & ~ATTR_MASK); 1204281494Sandrew l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1205281494Sandrew pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | ATTR_AF | 1206281494Sandrew ATTR_IDX(CACHED_MEMORY) | L2_TABLE); 1207281494Sandrew PTE_SYNC(l2); 1208281494Sandrew } 1209281494Sandrew 1210281494Sandrew pmap_resident_count_inc(pmap, 1); 1211281494Sandrew 1212281494Sandrew return (m); 1213281494Sandrew} 1214281494Sandrew 1215281494Sandrewstatic vm_page_t 1216281494Sandrewpmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1217281494Sandrew{ 1218281494Sandrew vm_pindex_t ptepindex; 1219281494Sandrew pd_entry_t *l2; 1220281494Sandrew vm_page_t m; 1221281494Sandrew 1222281494Sandrew /* 1223281494Sandrew * Calculate pagetable page index 1224281494Sandrew */ 1225281494Sandrew ptepindex = pmap_l2_pindex(va); 1226281494Sandrewretry: 1227281494Sandrew /* 1228281494Sandrew * Get the page directory entry 1229281494Sandrew */ 1230281494Sandrew l2 = pmap_l2(pmap, va); 1231281494Sandrew 1232281494Sandrew /* 1233281494Sandrew * If the page table page is mapped, we just increment the 1234281494Sandrew * hold count, and activate it. 1235281494Sandrew */ 1236281494Sandrew if (l2 != NULL && *l2 != 0) { 1237281494Sandrew m = PHYS_TO_VM_PAGE(*l2 & ~ATTR_MASK); 1238281494Sandrew m->wire_count++; 1239281494Sandrew } else { 1240281494Sandrew /* 1241281494Sandrew * Here if the pte page isn't mapped, or if it has been 1242281494Sandrew * deallocated. 1243281494Sandrew */ 1244281494Sandrew m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1245281494Sandrew if (m == NULL && lockp != NULL) 1246281494Sandrew goto retry; 1247281494Sandrew } 1248281494Sandrew /* 1249281494Sandrew * XXXARM64: I'm not sure why we need this but it fixes a crash 1250281494Sandrew * when running things from a shell script. 1251281494Sandrew */ 1252281494Sandrew pmap_invalidate_all(pmap); 1253281494Sandrew return (m); 1254281494Sandrew} 1255281494Sandrew 1256281494Sandrew 1257281494Sandrew/*************************************************** 1258281494Sandrew * Pmap allocation/deallocation routines. 1259281494Sandrew ***************************************************/ 1260281494Sandrew 1261281494Sandrew/* 1262281494Sandrew * Release any resources held by the given physical map. 1263281494Sandrew * Called when a pmap initialized by pmap_pinit is being released. 1264281494Sandrew * Should only be called if the map contains no valid mappings. 1265281494Sandrew */ 1266281494Sandrewvoid 1267281494Sandrewpmap_release(pmap_t pmap) 1268281494Sandrew{ 1269281494Sandrew vm_page_t m; 1270281494Sandrew 1271281494Sandrew KASSERT(pmap->pm_stats.resident_count == 0, 1272281494Sandrew ("pmap_release: pmap resident count %ld != 0", 1273281494Sandrew pmap->pm_stats.resident_count)); 1274281494Sandrew 1275281494Sandrew m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); 1276281494Sandrew 1277281494Sandrew m->wire_count--; 1278281494Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1279281494Sandrew vm_page_free_zero(m); 1280281494Sandrew} 1281281494Sandrew 1282281494Sandrew#if 0 1283281494Sandrewstatic int 1284281494Sandrewkvm_size(SYSCTL_HANDLER_ARGS) 1285281494Sandrew{ 1286281494Sandrew unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1287281494Sandrew 1288281494Sandrew return sysctl_handle_long(oidp, &ksize, 0, req); 1289281494Sandrew} 1290281494SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1291281494Sandrew 0, 0, kvm_size, "LU", "Size of KVM"); 1292281494Sandrew 1293281494Sandrewstatic int 1294281494Sandrewkvm_free(SYSCTL_HANDLER_ARGS) 1295281494Sandrew{ 1296281494Sandrew unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1297281494Sandrew 1298281494Sandrew return sysctl_handle_long(oidp, &kfree, 0, req); 1299281494Sandrew} 1300281494SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1301281494Sandrew 0, 0, kvm_free, "LU", "Amount of KVM free"); 1302281494Sandrew#endif /* 0 */ 1303281494Sandrew 1304281494Sandrew/* 1305281494Sandrew * grow the number of kernel page table entries, if needed 1306281494Sandrew */ 1307281494Sandrewvoid 1308281494Sandrewpmap_growkernel(vm_offset_t addr) 1309281494Sandrew{ 1310281494Sandrew vm_paddr_t paddr; 1311281494Sandrew vm_page_t nkpg; 1312281494Sandrew pd_entry_t *l1, *l2; 1313281494Sandrew 1314281494Sandrew mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1315281494Sandrew 1316281494Sandrew addr = roundup2(addr, L2_SIZE); 1317281494Sandrew if (addr - 1 >= kernel_map->max_offset) 1318281494Sandrew addr = kernel_map->max_offset; 1319281494Sandrew while (kernel_vm_end < addr) { 1320281494Sandrew l1 = pmap_l1(kernel_pmap, kernel_vm_end); 1321281494Sandrew if (*l1 == 0) { 1322281494Sandrew /* We need a new PDP entry */ 1323281494Sandrew nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1324281494Sandrew VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1325281494Sandrew VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1326281494Sandrew if (nkpg == NULL) 1327281494Sandrew panic("pmap_growkernel: no memory to grow kernel"); 1328281494Sandrew if ((nkpg->flags & PG_ZERO) == 0) 1329281494Sandrew pmap_zero_page(nkpg); 1330281494Sandrew paddr = VM_PAGE_TO_PHYS(nkpg); 1331281494Sandrew pmap_load_store(l1, paddr | L1_TABLE); 1332281494Sandrew PTE_SYNC(l1); 1333281494Sandrew continue; /* try again */ 1334281494Sandrew } 1335281494Sandrew l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1336281494Sandrew if ((*l2 & ATTR_AF) != 0) { 1337281494Sandrew kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1338281494Sandrew if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1339281494Sandrew kernel_vm_end = kernel_map->max_offset; 1340281494Sandrew break; 1341281494Sandrew } 1342281494Sandrew continue; 1343281494Sandrew } 1344281494Sandrew 1345281494Sandrew nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1346281494Sandrew VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1347281494Sandrew VM_ALLOC_ZERO); 1348281494Sandrew if (nkpg == NULL) 1349281494Sandrew panic("pmap_growkernel: no memory to grow kernel"); 1350281494Sandrew if ((nkpg->flags & PG_ZERO) == 0) 1351281494Sandrew pmap_zero_page(nkpg); 1352281494Sandrew paddr = VM_PAGE_TO_PHYS(nkpg); 1353281494Sandrew pmap_load_store(l2, paddr | L2_TABLE); 1354281494Sandrew PTE_SYNC(l2); 1355281494Sandrew 1356281494Sandrew kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1357281494Sandrew if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1358281494Sandrew kernel_vm_end = kernel_map->max_offset; 1359281494Sandrew break; 1360281494Sandrew } 1361281494Sandrew } 1362281494Sandrew} 1363281494Sandrew 1364281494Sandrew 1365281494Sandrew/*************************************************** 1366281494Sandrew * page management routines. 1367281494Sandrew ***************************************************/ 1368281494Sandrew 1369281494SandrewCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1370281494SandrewCTASSERT(_NPCM == 3); 1371281494SandrewCTASSERT(_NPCPV == 168); 1372281494Sandrew 1373281494Sandrewstatic __inline struct pv_chunk * 1374281494Sandrewpv_to_chunk(pv_entry_t pv) 1375281494Sandrew{ 1376281494Sandrew 1377281494Sandrew return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1378281494Sandrew} 1379281494Sandrew 1380281494Sandrew#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1381281494Sandrew 1382281494Sandrew#define PC_FREE0 0xfffffffffffffffful 1383281494Sandrew#define PC_FREE1 0xfffffffffffffffful 1384281494Sandrew#define PC_FREE2 0x000000fffffffffful 1385281494Sandrew 1386281494Sandrewstatic const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1387281494Sandrew 1388281494Sandrew#if 0 1389281494Sandrew#ifdef PV_STATS 1390281494Sandrewstatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1391281494Sandrew 1392281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1393281494Sandrew "Current number of pv entry chunks"); 1394281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1395281494Sandrew "Current number of pv entry chunks allocated"); 1396281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1397281494Sandrew "Current number of pv entry chunks frees"); 1398281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1399281494Sandrew "Number of times tried to get a chunk page but failed."); 1400281494Sandrew 1401281494Sandrewstatic long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1402281494Sandrewstatic int pv_entry_spare; 1403281494Sandrew 1404281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1405281494Sandrew "Current number of pv entry frees"); 1406281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1407281494Sandrew "Current number of pv entry allocs"); 1408281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1409281494Sandrew "Current number of pv entries"); 1410281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1411281494Sandrew "Current number of spare pv entries"); 1412281494Sandrew#endif 1413281494Sandrew#endif /* 0 */ 1414281494Sandrew 1415281494Sandrew/* 1416281494Sandrew * We are in a serious low memory condition. Resort to 1417281494Sandrew * drastic measures to free some pages so we can allocate 1418281494Sandrew * another pv entry chunk. 1419281494Sandrew * 1420281494Sandrew * Returns NULL if PV entries were reclaimed from the specified pmap. 1421281494Sandrew * 1422281494Sandrew * We do not, however, unmap 2mpages because subsequent accesses will 1423281494Sandrew * allocate per-page pv entries until repromotion occurs, thereby 1424281494Sandrew * exacerbating the shortage of free pv entries. 1425281494Sandrew */ 1426281494Sandrewstatic vm_page_t 1427281494Sandrewreclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1428281494Sandrew{ 1429281494Sandrew 1430281494Sandrew panic("reclaim_pv_chunk"); 1431281494Sandrew} 1432281494Sandrew 1433281494Sandrew/* 1434281494Sandrew * free the pv_entry back to the free list 1435281494Sandrew */ 1436281494Sandrewstatic void 1437281494Sandrewfree_pv_entry(pmap_t pmap, pv_entry_t pv) 1438281494Sandrew{ 1439281494Sandrew struct pv_chunk *pc; 1440281494Sandrew int idx, field, bit; 1441281494Sandrew 1442281494Sandrew rw_assert(&pvh_global_lock, RA_LOCKED); 1443281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1444281494Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1445281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1446281494Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1447281494Sandrew pc = pv_to_chunk(pv); 1448281494Sandrew idx = pv - &pc->pc_pventry[0]; 1449281494Sandrew field = idx / 64; 1450281494Sandrew bit = idx % 64; 1451281494Sandrew pc->pc_map[field] |= 1ul << bit; 1452281494Sandrew if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1453281494Sandrew pc->pc_map[2] != PC_FREE2) { 1454281494Sandrew /* 98% of the time, pc is already at the head of the list. */ 1455281494Sandrew if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1456281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1457281494Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1458281494Sandrew } 1459281494Sandrew return; 1460281494Sandrew } 1461281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1462281494Sandrew free_pv_chunk(pc); 1463281494Sandrew} 1464281494Sandrew 1465281494Sandrewstatic void 1466281494Sandrewfree_pv_chunk(struct pv_chunk *pc) 1467281494Sandrew{ 1468281494Sandrew vm_page_t m; 1469281494Sandrew 1470281494Sandrew mtx_lock(&pv_chunks_mutex); 1471281494Sandrew TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1472281494Sandrew mtx_unlock(&pv_chunks_mutex); 1473281494Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1474281494Sandrew PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1475281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1476281494Sandrew /* entire chunk is free, return it */ 1477281494Sandrew m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1478281494Sandrew#if 0 /* TODO: For minidump */ 1479281494Sandrew dump_drop_page(m->phys_addr); 1480281494Sandrew#endif 1481281494Sandrew vm_page_unwire(m, PQ_INACTIVE); 1482281494Sandrew vm_page_free(m); 1483281494Sandrew} 1484281494Sandrew 1485281494Sandrew/* 1486281494Sandrew * Returns a new PV entry, allocating a new PV chunk from the system when 1487281494Sandrew * needed. If this PV chunk allocation fails and a PV list lock pointer was 1488281494Sandrew * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 1489281494Sandrew * returned. 1490281494Sandrew * 1491281494Sandrew * The given PV list lock may be released. 1492281494Sandrew */ 1493281494Sandrewstatic pv_entry_t 1494281494Sandrewget_pv_entry(pmap_t pmap, struct rwlock **lockp) 1495281494Sandrew{ 1496281494Sandrew int bit, field; 1497281494Sandrew pv_entry_t pv; 1498281494Sandrew struct pv_chunk *pc; 1499281494Sandrew vm_page_t m; 1500281494Sandrew 1501281494Sandrew rw_assert(&pvh_global_lock, RA_LOCKED); 1502281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1503281494Sandrew PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 1504281494Sandrewretry: 1505281494Sandrew pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1506281494Sandrew if (pc != NULL) { 1507281494Sandrew for (field = 0; field < _NPCM; field++) { 1508281494Sandrew if (pc->pc_map[field]) { 1509281494Sandrew bit = ffsl(pc->pc_map[field]) - 1; 1510281494Sandrew break; 1511281494Sandrew } 1512281494Sandrew } 1513281494Sandrew if (field < _NPCM) { 1514281494Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 1515281494Sandrew pc->pc_map[field] &= ~(1ul << bit); 1516281494Sandrew /* If this was the last item, move it to tail */ 1517281494Sandrew if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 1518281494Sandrew pc->pc_map[2] == 0) { 1519281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1520281494Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 1521281494Sandrew pc_list); 1522281494Sandrew } 1523281494Sandrew PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1524281494Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 1525281494Sandrew return (pv); 1526281494Sandrew } 1527281494Sandrew } 1528281494Sandrew /* No free items, allocate another chunk */ 1529281494Sandrew m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1530281494Sandrew VM_ALLOC_WIRED); 1531281494Sandrew if (m == NULL) { 1532281494Sandrew if (lockp == NULL) { 1533281494Sandrew PV_STAT(pc_chunk_tryfail++); 1534281494Sandrew return (NULL); 1535281494Sandrew } 1536281494Sandrew m = reclaim_pv_chunk(pmap, lockp); 1537281494Sandrew if (m == NULL) 1538281494Sandrew goto retry; 1539281494Sandrew } 1540281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 1541281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 1542281494Sandrew#if 0 /* TODO: This is for minidump */ 1543281494Sandrew dump_add_page(m->phys_addr); 1544281494Sandrew#endif 1545281494Sandrew pc = (void *)PHYS_TO_DMAP(m->phys_addr); 1546281494Sandrew pc->pc_pmap = pmap; 1547281494Sandrew pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 1548281494Sandrew pc->pc_map[1] = PC_FREE1; 1549281494Sandrew pc->pc_map[2] = PC_FREE2; 1550281494Sandrew mtx_lock(&pv_chunks_mutex); 1551281494Sandrew TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1552281494Sandrew mtx_unlock(&pv_chunks_mutex); 1553281494Sandrew pv = &pc->pc_pventry[0]; 1554281494Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1555281494Sandrew PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1556281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 1557281494Sandrew return (pv); 1558281494Sandrew} 1559281494Sandrew 1560281494Sandrew/* 1561281494Sandrew * First find and then remove the pv entry for the specified pmap and virtual 1562281494Sandrew * address from the specified pv list. Returns the pv entry if found and NULL 1563281494Sandrew * otherwise. This operation can be performed on pv lists for either 4KB or 1564281494Sandrew * 2MB page mappings. 1565281494Sandrew */ 1566281494Sandrewstatic __inline pv_entry_t 1567281494Sandrewpmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1568281494Sandrew{ 1569281494Sandrew pv_entry_t pv; 1570281494Sandrew 1571281494Sandrew rw_assert(&pvh_global_lock, RA_LOCKED); 1572281494Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 1573281494Sandrew if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1574281494Sandrew TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 1575281494Sandrew pvh->pv_gen++; 1576281494Sandrew break; 1577281494Sandrew } 1578281494Sandrew } 1579281494Sandrew return (pv); 1580281494Sandrew} 1581281494Sandrew 1582281494Sandrew/* 1583281494Sandrew * First find and then destroy the pv entry for the specified pmap and virtual 1584281494Sandrew * address. This operation can be performed on pv lists for either 4KB or 2MB 1585281494Sandrew * page mappings. 1586281494Sandrew */ 1587281494Sandrewstatic void 1588281494Sandrewpmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1589281494Sandrew{ 1590281494Sandrew pv_entry_t pv; 1591281494Sandrew 1592281494Sandrew pv = pmap_pvh_remove(pvh, pmap, va); 1593281494Sandrew KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 1594281494Sandrew free_pv_entry(pmap, pv); 1595281494Sandrew} 1596281494Sandrew 1597281494Sandrew/* 1598281494Sandrew * Conditionally create the PV entry for a 4KB page mapping if the required 1599281494Sandrew * memory can be allocated without resorting to reclamation. 1600281494Sandrew */ 1601281494Sandrewstatic boolean_t 1602281494Sandrewpmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 1603281494Sandrew struct rwlock **lockp) 1604281494Sandrew{ 1605281494Sandrew pv_entry_t pv; 1606281494Sandrew 1607281494Sandrew rw_assert(&pvh_global_lock, RA_LOCKED); 1608281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1609281494Sandrew /* Pass NULL instead of the lock pointer to disable reclamation. */ 1610281494Sandrew if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 1611281494Sandrew pv->pv_va = va; 1612281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1613281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 1614281494Sandrew m->md.pv_gen++; 1615281494Sandrew return (TRUE); 1616281494Sandrew } else 1617281494Sandrew return (FALSE); 1618281494Sandrew} 1619281494Sandrew 1620281494Sandrew/* 1621281494Sandrew * pmap_remove_l3: do the things to unmap a page in a process 1622281494Sandrew */ 1623281494Sandrewstatic int 1624281494Sandrewpmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 1625281494Sandrew pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 1626281494Sandrew{ 1627281494Sandrew pt_entry_t old_l3; 1628281494Sandrew vm_page_t m; 1629281494Sandrew 1630281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1631281494Sandrew if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 1632281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 1633281494Sandrew old_l3 = pmap_load_clear(l3); 1634281494Sandrew PTE_SYNC(l3); 1635281494Sandrew if (old_l3 & ATTR_SW_WIRED) 1636281494Sandrew pmap->pm_stats.wired_count -= 1; 1637281494Sandrew pmap_resident_count_dec(pmap, 1); 1638281494Sandrew if (old_l3 & ATTR_SW_MANAGED) { 1639281494Sandrew m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 1640281494Sandrew if (pmap_page_dirty(old_l3)) 1641281494Sandrew vm_page_dirty(m); 1642281494Sandrew if (old_l3 & ATTR_AF) 1643281494Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 1644281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1645281494Sandrew pmap_pvh_free(&m->md, pmap, va); 1646281494Sandrew } 1647281494Sandrew return (pmap_unuse_l3(pmap, va, l2e, free)); 1648281494Sandrew} 1649281494Sandrew 1650281494Sandrew/* 1651281494Sandrew * Remove the given range of addresses from the specified map. 1652281494Sandrew * 1653281494Sandrew * It is assumed that the start and end are properly 1654281494Sandrew * rounded to the page size. 1655281494Sandrew */ 1656281494Sandrewvoid 1657281494Sandrewpmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1658281494Sandrew{ 1659281494Sandrew struct rwlock *lock; 1660281494Sandrew vm_offset_t va, va_next; 1661281494Sandrew pd_entry_t *l1, *l2; 1662281494Sandrew pt_entry_t l3_paddr, *l3; 1663281494Sandrew struct spglist free; 1664281494Sandrew int anyvalid; 1665281494Sandrew 1666281494Sandrew /* 1667281494Sandrew * Perform an unsynchronized read. This is, however, safe. 1668281494Sandrew */ 1669281494Sandrew if (pmap->pm_stats.resident_count == 0) 1670281494Sandrew return; 1671281494Sandrew 1672281494Sandrew anyvalid = 0; 1673281494Sandrew SLIST_INIT(&free); 1674281494Sandrew 1675281494Sandrew rw_rlock(&pvh_global_lock); 1676281494Sandrew PMAP_LOCK(pmap); 1677281494Sandrew 1678281494Sandrew lock = NULL; 1679281494Sandrew for (; sva < eva; sva = va_next) { 1680281494Sandrew 1681281494Sandrew if (pmap->pm_stats.resident_count == 0) 1682281494Sandrew break; 1683281494Sandrew 1684281494Sandrew l1 = pmap_l1(pmap, sva); 1685281494Sandrew if (*l1 == 0) { 1686281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1687281494Sandrew if (va_next < sva) 1688281494Sandrew va_next = eva; 1689281494Sandrew continue; 1690281494Sandrew } 1691281494Sandrew 1692281494Sandrew /* 1693281494Sandrew * Calculate index for next page table. 1694281494Sandrew */ 1695281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1696281494Sandrew if (va_next < sva) 1697281494Sandrew va_next = eva; 1698281494Sandrew 1699281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 1700281494Sandrew if (l2 == NULL) 1701281494Sandrew continue; 1702281494Sandrew 1703281494Sandrew l3_paddr = *l2; 1704281494Sandrew 1705281494Sandrew /* 1706281494Sandrew * Weed out invalid mappings. 1707281494Sandrew */ 1708281494Sandrew if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 1709281494Sandrew continue; 1710281494Sandrew 1711281494Sandrew /* 1712281494Sandrew * Limit our scan to either the end of the va represented 1713281494Sandrew * by the current page table page, or to the end of the 1714281494Sandrew * range being removed. 1715281494Sandrew */ 1716281494Sandrew if (va_next > eva) 1717281494Sandrew va_next = eva; 1718281494Sandrew 1719281494Sandrew va = va_next; 1720281494Sandrew for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 1721281494Sandrew sva += L3_SIZE) { 1722281494Sandrew if (l3 == NULL) 1723281494Sandrew panic("l3 == NULL"); 1724281494Sandrew if (*l3 == 0) { 1725281494Sandrew if (va != va_next) { 1726281494Sandrew pmap_invalidate_range(pmap, va, sva); 1727281494Sandrew va = va_next; 1728281494Sandrew } 1729281494Sandrew continue; 1730281494Sandrew } 1731281494Sandrew if (va == va_next) 1732281494Sandrew va = sva; 1733281494Sandrew if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 1734281494Sandrew &lock)) { 1735281494Sandrew sva += L3_SIZE; 1736281494Sandrew break; 1737281494Sandrew } 1738281494Sandrew } 1739281494Sandrew if (va != va_next) 1740281494Sandrew pmap_invalidate_range(pmap, va, sva); 1741281494Sandrew } 1742281494Sandrew if (lock != NULL) 1743281494Sandrew rw_wunlock(lock); 1744281494Sandrew if (anyvalid) 1745281494Sandrew pmap_invalidate_all(pmap); 1746281494Sandrew rw_runlock(&pvh_global_lock); 1747281494Sandrew PMAP_UNLOCK(pmap); 1748281494Sandrew pmap_free_zero_pages(&free); 1749281494Sandrew} 1750281494Sandrew 1751281494Sandrew/* 1752281494Sandrew * Routine: pmap_remove_all 1753281494Sandrew * Function: 1754281494Sandrew * Removes this physical page from 1755281494Sandrew * all physical maps in which it resides. 1756281494Sandrew * Reflects back modify bits to the pager. 1757281494Sandrew * 1758281494Sandrew * Notes: 1759281494Sandrew * Original versions of this routine were very 1760281494Sandrew * inefficient because they iteratively called 1761281494Sandrew * pmap_remove (slow...) 1762281494Sandrew */ 1763281494Sandrew 1764281494Sandrewvoid 1765281494Sandrewpmap_remove_all(vm_page_t m) 1766281494Sandrew{ 1767281494Sandrew pv_entry_t pv; 1768281494Sandrew pmap_t pmap; 1769281494Sandrew pt_entry_t *l3, tl3; 1770281494Sandrew pd_entry_t *l2; 1771281494Sandrew struct spglist free; 1772281494Sandrew 1773281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1774281494Sandrew ("pmap_remove_all: page %p is not managed", m)); 1775281494Sandrew SLIST_INIT(&free); 1776281494Sandrew rw_wlock(&pvh_global_lock); 1777281494Sandrew while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1778281494Sandrew pmap = PV_PMAP(pv); 1779281494Sandrew PMAP_LOCK(pmap); 1780281494Sandrew pmap_resident_count_dec(pmap, 1); 1781281494Sandrew l2 = pmap_l2(pmap, pv->pv_va); 1782281494Sandrew KASSERT((*l2 & ATTR_DESCR_MASK) == L2_TABLE, 1783281494Sandrew ("pmap_remove_all: found a table when expecting " 1784281494Sandrew "a block in %p's pv list", m)); 1785281494Sandrew l3 = pmap_l2_to_l3(l2, pv->pv_va); 1786281494Sandrew if (pmap_is_current(pmap) && 1787281494Sandrew pmap_l3_valid_cacheable(pmap_load(l3))) 1788281494Sandrew cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 1789281494Sandrew tl3 = pmap_load_clear(l3); 1790281494Sandrew PTE_SYNC(l3); 1791281494Sandrew if (tl3 & ATTR_SW_WIRED) 1792281494Sandrew pmap->pm_stats.wired_count--; 1793281494Sandrew if ((tl3 & ATTR_AF) != 0) 1794281494Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 1795281494Sandrew 1796281494Sandrew /* 1797281494Sandrew * Update the vm_page_t clean and reference bits. 1798281494Sandrew */ 1799281494Sandrew if (pmap_page_dirty(tl3)) 1800281494Sandrew vm_page_dirty(m); 1801281494Sandrew pmap_unuse_l3(pmap, pv->pv_va, *l2, &free); 1802281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 1803281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1804281494Sandrew m->md.pv_gen++; 1805281494Sandrew free_pv_entry(pmap, pv); 1806281494Sandrew PMAP_UNLOCK(pmap); 1807281494Sandrew } 1808281494Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 1809281494Sandrew rw_wunlock(&pvh_global_lock); 1810281494Sandrew pmap_free_zero_pages(&free); 1811281494Sandrew} 1812281494Sandrew 1813281494Sandrew/* 1814281494Sandrew * Set the physical protection on the 1815281494Sandrew * specified range of this map as requested. 1816281494Sandrew */ 1817281494Sandrewvoid 1818281494Sandrewpmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1819281494Sandrew{ 1820281494Sandrew vm_offset_t va, va_next; 1821281494Sandrew pd_entry_t *l1, *l2; 1822281494Sandrew pt_entry_t *l3p, l3; 1823281494Sandrew 1824281494Sandrew if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1825281494Sandrew pmap_remove(pmap, sva, eva); 1826281494Sandrew return; 1827281494Sandrew } 1828281494Sandrew 1829281494Sandrew if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) 1830281494Sandrew return; 1831281494Sandrew 1832281494Sandrew PMAP_LOCK(pmap); 1833281494Sandrew for (; sva < eva; sva = va_next) { 1834281494Sandrew 1835281494Sandrew l1 = pmap_l1(pmap, sva); 1836281494Sandrew if (*l1 == 0) { 1837281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1838281494Sandrew if (va_next < sva) 1839281494Sandrew va_next = eva; 1840281494Sandrew continue; 1841281494Sandrew } 1842281494Sandrew 1843281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1844281494Sandrew if (va_next < sva) 1845281494Sandrew va_next = eva; 1846281494Sandrew 1847281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 1848281494Sandrew if (l2 == NULL || (*l2 & ATTR_DESCR_MASK) != L2_TABLE) 1849281494Sandrew continue; 1850281494Sandrew 1851281494Sandrew if (va_next > eva) 1852281494Sandrew va_next = eva; 1853281494Sandrew 1854281494Sandrew va = va_next; 1855281494Sandrew for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 1856281494Sandrew sva += L3_SIZE) { 1857281494Sandrew l3 = pmap_load(l3p); 1858281494Sandrew if (pmap_l3_valid(l3)) { 1859281494Sandrew pmap_set(l3p, ATTR_AP(ATTR_AP_RO)); 1860281494Sandrew PTE_SYNC(l3p); 1861281494Sandrew } 1862281494Sandrew } 1863281494Sandrew } 1864281494Sandrew PMAP_UNLOCK(pmap); 1865281494Sandrew 1866281494Sandrew /* TODO: Only invalidate entries we are touching */ 1867281494Sandrew pmap_invalidate_all(pmap); 1868281494Sandrew} 1869281494Sandrew 1870281494Sandrew/* 1871281494Sandrew * Insert the given physical page (p) at 1872281494Sandrew * the specified virtual address (v) in the 1873281494Sandrew * target physical map with the protection requested. 1874281494Sandrew * 1875281494Sandrew * If specified, the page will be wired down, meaning 1876281494Sandrew * that the related pte can not be reclaimed. 1877281494Sandrew * 1878281494Sandrew * NB: This is the only routine which MAY NOT lazy-evaluate 1879281494Sandrew * or lose information. That is, this routine must actually 1880281494Sandrew * insert this page into the given map NOW. 1881281494Sandrew */ 1882281494Sandrewint 1883281494Sandrewpmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1884281494Sandrew u_int flags, int8_t psind __unused) 1885281494Sandrew{ 1886281494Sandrew struct rwlock *lock; 1887281494Sandrew pd_entry_t *l1, *l2; 1888281494Sandrew pt_entry_t new_l3, orig_l3; 1889281494Sandrew pt_entry_t *l3; 1890281494Sandrew pv_entry_t pv; 1891281494Sandrew vm_paddr_t opa, pa, l2_pa, l3_pa; 1892281494Sandrew vm_page_t mpte, om, l2_m, l3_m; 1893281494Sandrew boolean_t nosleep; 1894281494Sandrew 1895281494Sandrew va = trunc_page(va); 1896281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 1897281494Sandrew VM_OBJECT_ASSERT_LOCKED(m->object); 1898281494Sandrew pa = VM_PAGE_TO_PHYS(m); 1899281494Sandrew new_l3 = (pt_entry_t)(pa | ATTR_AF | L3_PAGE); 1900281494Sandrew if ((prot & VM_PROT_WRITE) == 0) 1901281494Sandrew new_l3 |= ATTR_AP(ATTR_AP_RO); 1902281494Sandrew if ((flags & PMAP_ENTER_WIRED) != 0) 1903281494Sandrew new_l3 |= ATTR_SW_WIRED; 1904281494Sandrew if ((va >> 63) == 0) 1905281494Sandrew new_l3 |= ATTR_AP(ATTR_AP_USER); 1906281494Sandrew new_l3 |= ATTR_IDX(m->md.pv_memattr); 1907281494Sandrew 1908281494Sandrew mpte = NULL; 1909281494Sandrew 1910281494Sandrew lock = NULL; 1911281494Sandrew rw_rlock(&pvh_global_lock); 1912281494Sandrew PMAP_LOCK(pmap); 1913281494Sandrew 1914281494Sandrew if (va < VM_MAXUSER_ADDRESS) { 1915281494Sandrew nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 1916281494Sandrew mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 1917281494Sandrew if (mpte == NULL && nosleep) { 1918281494Sandrew if (lock != NULL) 1919281494Sandrew rw_wunlock(lock); 1920281494Sandrew rw_runlock(&pvh_global_lock); 1921281494Sandrew PMAP_UNLOCK(pmap); 1922281494Sandrew return (KERN_RESOURCE_SHORTAGE); 1923281494Sandrew } 1924281494Sandrew l3 = pmap_l3(pmap, va); 1925281494Sandrew } else { 1926281494Sandrew l3 = pmap_l3(pmap, va); 1927281494Sandrew /* TODO: This is not optimal, but should mostly work */ 1928281494Sandrew if (l3 == NULL) { 1929281494Sandrew l2 = pmap_l2(pmap, va); 1930281494Sandrew 1931281494Sandrew if (l2 == NULL) { 1932281494Sandrew l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1933281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1934281494Sandrew VM_ALLOC_ZERO); 1935281494Sandrew if (l2_m == NULL) 1936281494Sandrew panic("pmap_enter: l2 pte_m == NULL"); 1937281494Sandrew if ((l2_m->flags & PG_ZERO) == 0) 1938281494Sandrew pmap_zero_page(l2_m); 1939281494Sandrew 1940281494Sandrew l2_pa = VM_PAGE_TO_PHYS(l2_m); 1941281494Sandrew l1 = pmap_l1(pmap, va); 1942281494Sandrew pmap_load_store(l1, l2_pa | L1_TABLE); 1943281494Sandrew PTE_SYNC(l1); 1944281494Sandrew l2 = pmap_l1_to_l2(l1, va); 1945281494Sandrew } 1946281494Sandrew 1947281494Sandrew KASSERT(l2 != NULL, 1948281494Sandrew ("No l2 table after allocating one")); 1949281494Sandrew 1950281494Sandrew l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1951281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1952281494Sandrew if (l3_m == NULL) 1953281494Sandrew panic("pmap_enter: l3 pte_m == NULL"); 1954281494Sandrew if ((l3_m->flags & PG_ZERO) == 0) 1955281494Sandrew pmap_zero_page(l3_m); 1956281494Sandrew 1957281494Sandrew l3_pa = VM_PAGE_TO_PHYS(l3_m); 1958281494Sandrew pmap_load_store(l2, l3_pa | L2_TABLE); 1959281494Sandrew PTE_SYNC(l2); 1960281494Sandrew l3 = pmap_l2_to_l3(l2, va); 1961281494Sandrew } 1962281494Sandrew } 1963281494Sandrew 1964281494Sandrew om = NULL; 1965281494Sandrew orig_l3 = pmap_load(l3); 1966281494Sandrew opa = orig_l3 & ~ATTR_MASK; 1967281494Sandrew 1968281494Sandrew /* 1969281494Sandrew * Is the specified virtual address already mapped? 1970281494Sandrew */ 1971281494Sandrew if (pmap_l3_valid(orig_l3)) { 1972281494Sandrew /* 1973281494Sandrew * Wiring change, just update stats. We don't worry about 1974281494Sandrew * wiring PT pages as they remain resident as long as there 1975281494Sandrew * are valid mappings in them. Hence, if a user page is wired, 1976281494Sandrew * the PT page will be also. 1977281494Sandrew */ 1978281494Sandrew if ((flags & PMAP_ENTER_WIRED) != 0 && 1979281494Sandrew (orig_l3 & ATTR_SW_WIRED) == 0) 1980281494Sandrew pmap->pm_stats.wired_count++; 1981281494Sandrew else if ((flags & PMAP_ENTER_WIRED) == 0 && 1982281494Sandrew (orig_l3 & ATTR_SW_WIRED) != 0) 1983281494Sandrew pmap->pm_stats.wired_count--; 1984281494Sandrew 1985281494Sandrew /* 1986281494Sandrew * Remove the extra PT page reference. 1987281494Sandrew */ 1988281494Sandrew if (mpte != NULL) { 1989281494Sandrew mpte->wire_count--; 1990281494Sandrew KASSERT(mpte->wire_count > 0, 1991281494Sandrew ("pmap_enter: missing reference to page table page," 1992281494Sandrew " va: 0x%lx", va)); 1993281494Sandrew } 1994281494Sandrew 1995281494Sandrew /* 1996281494Sandrew * Has the physical page changed? 1997281494Sandrew */ 1998281494Sandrew if (opa == pa) { 1999281494Sandrew /* 2000281494Sandrew * No, might be a protection or wiring change. 2001281494Sandrew */ 2002281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2003281494Sandrew new_l3 |= ATTR_SW_MANAGED; 2004281494Sandrew if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 2005281494Sandrew ATTR_AP(ATTR_AP_RW)) { 2006281494Sandrew vm_page_aflag_set(m, PGA_WRITEABLE); 2007281494Sandrew } 2008281494Sandrew } 2009281494Sandrew goto validate; 2010281494Sandrew } 2011281494Sandrew 2012281494Sandrew /* Flush the cache, there might be uncommitted data in it */ 2013281494Sandrew if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 2014281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 2015281494Sandrew } else { 2016281494Sandrew /* 2017281494Sandrew * Increment the counters. 2018281494Sandrew */ 2019281494Sandrew if ((new_l3 & ATTR_SW_WIRED) != 0) 2020281494Sandrew pmap->pm_stats.wired_count++; 2021281494Sandrew pmap_resident_count_inc(pmap, 1); 2022281494Sandrew } 2023281494Sandrew /* 2024281494Sandrew * Enter on the PV list if part of our managed memory. 2025281494Sandrew */ 2026281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0) { 2027281494Sandrew new_l3 |= ATTR_SW_MANAGED; 2028281494Sandrew pv = get_pv_entry(pmap, &lock); 2029281494Sandrew pv->pv_va = va; 2030281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 2031281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2032281494Sandrew m->md.pv_gen++; 2033281494Sandrew if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 2034281494Sandrew vm_page_aflag_set(m, PGA_WRITEABLE); 2035281494Sandrew } 2036281494Sandrew 2037281494Sandrew /* 2038281494Sandrew * Update the L3 entry. 2039281494Sandrew */ 2040281494Sandrew if (orig_l3 != 0) { 2041281494Sandrewvalidate: 2042281494Sandrew orig_l3 = pmap_load_store(l3, new_l3); 2043281494Sandrew PTE_SYNC(l3); 2044281494Sandrew opa = orig_l3 & ~ATTR_MASK; 2045281494Sandrew 2046281494Sandrew if (opa != pa) { 2047281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2048281494Sandrew om = PHYS_TO_VM_PAGE(opa); 2049281494Sandrew if (pmap_page_dirty(orig_l3)) 2050281494Sandrew vm_page_dirty(om); 2051281494Sandrew if ((orig_l3 & ATTR_AF) != 0) 2052281494Sandrew vm_page_aflag_set(om, PGA_REFERENCED); 2053281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 2054281494Sandrew pmap_pvh_free(&om->md, pmap, va); 2055281494Sandrew } 2056281494Sandrew } else if (pmap_page_dirty(orig_l3)) { 2057281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) 2058281494Sandrew vm_page_dirty(m); 2059281494Sandrew } 2060281494Sandrew if ((orig_l3 & ATTR_AF) != 0) 2061281494Sandrew pmap_invalidate_page(pmap, va); 2062281494Sandrew } else { 2063281494Sandrew pmap_load_store(l3, new_l3); 2064281494Sandrew PTE_SYNC(l3); 2065281494Sandrew } 2066281494Sandrew if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 2067281494Sandrew cpu_icache_sync_range(va, PAGE_SIZE); 2068281494Sandrew 2069281494Sandrew if (lock != NULL) 2070281494Sandrew rw_wunlock(lock); 2071281494Sandrew rw_runlock(&pvh_global_lock); 2072281494Sandrew PMAP_UNLOCK(pmap); 2073281494Sandrew return (KERN_SUCCESS); 2074281494Sandrew} 2075281494Sandrew 2076281494Sandrew/* 2077281494Sandrew * Maps a sequence of resident pages belonging to the same object. 2078281494Sandrew * The sequence begins with the given page m_start. This page is 2079281494Sandrew * mapped at the given virtual address start. Each subsequent page is 2080281494Sandrew * mapped at a virtual address that is offset from start by the same 2081281494Sandrew * amount as the page is offset from m_start within the object. The 2082281494Sandrew * last page in the sequence is the page with the largest offset from 2083281494Sandrew * m_start that can be mapped at a virtual address less than the given 2084281494Sandrew * virtual address end. Not every virtual page between start and end 2085281494Sandrew * is mapped; only those for which a resident page exists with the 2086281494Sandrew * corresponding offset from m_start are mapped. 2087281494Sandrew */ 2088281494Sandrewvoid 2089281494Sandrewpmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2090281494Sandrew vm_page_t m_start, vm_prot_t prot) 2091281494Sandrew{ 2092281494Sandrew struct rwlock *lock; 2093281494Sandrew vm_offset_t va; 2094281494Sandrew vm_page_t m, mpte; 2095281494Sandrew vm_pindex_t diff, psize; 2096281494Sandrew 2097281494Sandrew VM_OBJECT_ASSERT_LOCKED(m_start->object); 2098281494Sandrew 2099281494Sandrew psize = atop(end - start); 2100281494Sandrew mpte = NULL; 2101281494Sandrew m = m_start; 2102281494Sandrew lock = NULL; 2103281494Sandrew rw_rlock(&pvh_global_lock); 2104281494Sandrew PMAP_LOCK(pmap); 2105281494Sandrew while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2106281494Sandrew va = start + ptoa(diff); 2107281494Sandrew mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 2108281494Sandrew m = TAILQ_NEXT(m, listq); 2109281494Sandrew } 2110281494Sandrew if (lock != NULL) 2111281494Sandrew rw_wunlock(lock); 2112281494Sandrew rw_runlock(&pvh_global_lock); 2113281494Sandrew PMAP_UNLOCK(pmap); 2114281494Sandrew} 2115281494Sandrew 2116281494Sandrew/* 2117281494Sandrew * this code makes some *MAJOR* assumptions: 2118281494Sandrew * 1. Current pmap & pmap exists. 2119281494Sandrew * 2. Not wired. 2120281494Sandrew * 3. Read access. 2121281494Sandrew * 4. No page table pages. 2122281494Sandrew * but is *MUCH* faster than pmap_enter... 2123281494Sandrew */ 2124281494Sandrew 2125281494Sandrewvoid 2126281494Sandrewpmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2127281494Sandrew{ 2128281494Sandrew struct rwlock *lock; 2129281494Sandrew 2130281494Sandrew lock = NULL; 2131281494Sandrew rw_rlock(&pvh_global_lock); 2132281494Sandrew PMAP_LOCK(pmap); 2133281494Sandrew (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 2134281494Sandrew if (lock != NULL) 2135281494Sandrew rw_wunlock(lock); 2136281494Sandrew rw_runlock(&pvh_global_lock); 2137281494Sandrew PMAP_UNLOCK(pmap); 2138281494Sandrew} 2139281494Sandrew 2140281494Sandrewstatic vm_page_t 2141281494Sandrewpmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2142281494Sandrew vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 2143281494Sandrew{ 2144281494Sandrew struct spglist free; 2145281494Sandrew pd_entry_t *l2; 2146281494Sandrew pt_entry_t *l3; 2147281494Sandrew vm_paddr_t pa; 2148281494Sandrew 2149281494Sandrew KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2150281494Sandrew (m->oflags & VPO_UNMANAGED) != 0, 2151281494Sandrew ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2152281494Sandrew rw_assert(&pvh_global_lock, RA_LOCKED); 2153281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2154281494Sandrew 2155281494Sandrew /* 2156281494Sandrew * In the case that a page table page is not 2157281494Sandrew * resident, we are creating it here. 2158281494Sandrew */ 2159281494Sandrew if (va < VM_MAXUSER_ADDRESS) { 2160281494Sandrew vm_pindex_t l2pindex; 2161281494Sandrew 2162281494Sandrew /* 2163281494Sandrew * Calculate pagetable page index 2164281494Sandrew */ 2165281494Sandrew l2pindex = pmap_l2_pindex(va); 2166281494Sandrew if (mpte && (mpte->pindex == l2pindex)) { 2167281494Sandrew mpte->wire_count++; 2168281494Sandrew } else { 2169281494Sandrew /* 2170281494Sandrew * Get the l2 entry 2171281494Sandrew */ 2172281494Sandrew l2 = pmap_l2(pmap, va); 2173281494Sandrew 2174281494Sandrew /* 2175281494Sandrew * If the page table page is mapped, we just increment 2176281494Sandrew * the hold count, and activate it. Otherwise, we 2177281494Sandrew * attempt to allocate a page table page. If this 2178281494Sandrew * attempt fails, we don't retry. Instead, we give up. 2179281494Sandrew */ 2180281494Sandrew if (l2 != NULL && *l2 != 0) { 2181281494Sandrew mpte = PHYS_TO_VM_PAGE(*l2 & ~ATTR_MASK); 2182281494Sandrew mpte->wire_count++; 2183281494Sandrew } else { 2184281494Sandrew /* 2185281494Sandrew * Pass NULL instead of the PV list lock 2186281494Sandrew * pointer, because we don't intend to sleep. 2187281494Sandrew */ 2188281494Sandrew mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 2189281494Sandrew if (mpte == NULL) 2190281494Sandrew return (mpte); 2191281494Sandrew } 2192281494Sandrew } 2193281494Sandrew l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 2194281494Sandrew l3 = &l3[pmap_l3_index(va)]; 2195281494Sandrew } else { 2196281494Sandrew mpte = NULL; 2197281494Sandrew l3 = pmap_l3(kernel_pmap, va); 2198281494Sandrew } 2199281494Sandrew if (l3 == NULL) 2200281494Sandrew panic("pmap_enter_quick_locked: No l3"); 2201281494Sandrew if (*l3) { 2202281494Sandrew if (mpte != NULL) { 2203281494Sandrew mpte->wire_count--; 2204281494Sandrew mpte = NULL; 2205281494Sandrew } 2206281494Sandrew return (mpte); 2207281494Sandrew } 2208281494Sandrew 2209281494Sandrew /* 2210281494Sandrew * Enter on the PV list if part of our managed memory. 2211281494Sandrew */ 2212281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0 && 2213281494Sandrew !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 2214281494Sandrew if (mpte != NULL) { 2215281494Sandrew SLIST_INIT(&free); 2216281494Sandrew if (pmap_unwire_l3(pmap, va, mpte, &free)) { 2217281494Sandrew pmap_invalidate_page(pmap, va); 2218281494Sandrew pmap_free_zero_pages(&free); 2219281494Sandrew } 2220281494Sandrew mpte = NULL; 2221281494Sandrew } 2222281494Sandrew return (mpte); 2223281494Sandrew } 2224281494Sandrew 2225281494Sandrew /* 2226281494Sandrew * Increment counters 2227281494Sandrew */ 2228281494Sandrew pmap_resident_count_inc(pmap, 1); 2229281494Sandrew 2230281494Sandrew pa = VM_PAGE_TO_PHYS(m) | ATTR_AF | ATTR_IDX(m->md.pv_memattr) | 2231281494Sandrew ATTR_AP(ATTR_AP_RW) | L3_PAGE; 2232281494Sandrew 2233281494Sandrew /* 2234281494Sandrew * Now validate mapping with RO protection 2235281494Sandrew */ 2236281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0) 2237281494Sandrew pa |= ATTR_SW_MANAGED; 2238281494Sandrew pmap_load_store(l3, pa); 2239281494Sandrew PTE_SYNC(l3); 2240281494Sandrew pmap_invalidate_page(pmap, va); 2241281494Sandrew return (mpte); 2242281494Sandrew} 2243281494Sandrew 2244281494Sandrew/* 2245281494Sandrew * This code maps large physical mmap regions into the 2246281494Sandrew * processor address space. Note that some shortcuts 2247281494Sandrew * are taken, but the code works. 2248281494Sandrew */ 2249281494Sandrewvoid 2250281494Sandrewpmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2251281494Sandrew vm_pindex_t pindex, vm_size_t size) 2252281494Sandrew{ 2253281494Sandrew 2254281846Sandrew VM_OBJECT_ASSERT_WLOCKED(object); 2255281846Sandrew KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2256281846Sandrew ("pmap_object_init_pt: non-device object")); 2257281494Sandrew} 2258281494Sandrew 2259281494Sandrew/* 2260281494Sandrew * Clear the wired attribute from the mappings for the specified range of 2261281494Sandrew * addresses in the given pmap. Every valid mapping within that range 2262281494Sandrew * must have the wired attribute set. In contrast, invalid mappings 2263281494Sandrew * cannot have the wired attribute set, so they are ignored. 2264281494Sandrew * 2265281494Sandrew * The wired attribute of the page table entry is not a hardware feature, 2266281494Sandrew * so there is no need to invalidate any TLB entries. 2267281494Sandrew */ 2268281494Sandrewvoid 2269281494Sandrewpmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2270281494Sandrew{ 2271281494Sandrew vm_offset_t va_next; 2272281494Sandrew pd_entry_t *l1, *l2; 2273281494Sandrew pt_entry_t *l3; 2274281494Sandrew boolean_t pv_lists_locked; 2275281494Sandrew 2276281494Sandrew pv_lists_locked = FALSE; 2277281494Sandrew PMAP_LOCK(pmap); 2278281494Sandrew for (; sva < eva; sva = va_next) { 2279281494Sandrew l1 = pmap_l1(pmap, sva); 2280281494Sandrew if (*l1 == 0) { 2281281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2282281494Sandrew if (va_next < sva) 2283281494Sandrew va_next = eva; 2284281494Sandrew continue; 2285281494Sandrew } 2286281494Sandrew 2287281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2288281494Sandrew if (va_next < sva) 2289281494Sandrew va_next = eva; 2290281494Sandrew 2291281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 2292281494Sandrew if (*l2 == 0) 2293281494Sandrew continue; 2294281494Sandrew 2295281494Sandrew if (va_next > eva) 2296281494Sandrew va_next = eva; 2297281494Sandrew for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2298281494Sandrew sva += L3_SIZE) { 2299281494Sandrew if (*l3 == 0) 2300281494Sandrew continue; 2301281494Sandrew if ((*l3 & ATTR_SW_WIRED) == 0) 2302281494Sandrew panic("pmap_unwire: l3 %#jx is missing " 2303281494Sandrew "ATTR_SW_WIRED", (uintmax_t)*l3); 2304281494Sandrew 2305281494Sandrew /* 2306281494Sandrew * PG_W must be cleared atomically. Although the pmap 2307281494Sandrew * lock synchronizes access to PG_W, another processor 2308281494Sandrew * could be setting PG_M and/or PG_A concurrently. 2309281494Sandrew */ 2310281494Sandrew atomic_clear_long(l3, ATTR_SW_WIRED); 2311281494Sandrew pmap->pm_stats.wired_count--; 2312281494Sandrew } 2313281494Sandrew } 2314281494Sandrew if (pv_lists_locked) 2315281494Sandrew rw_runlock(&pvh_global_lock); 2316281494Sandrew PMAP_UNLOCK(pmap); 2317281494Sandrew} 2318281494Sandrew 2319281494Sandrew/* 2320281494Sandrew * Copy the range specified by src_addr/len 2321281494Sandrew * from the source map to the range dst_addr/len 2322281494Sandrew * in the destination map. 2323281494Sandrew * 2324281494Sandrew * This routine is only advisory and need not do anything. 2325281494Sandrew */ 2326281494Sandrew 2327281494Sandrewvoid 2328281494Sandrewpmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2329281494Sandrew vm_offset_t src_addr) 2330281494Sandrew{ 2331281494Sandrew} 2332281494Sandrew 2333281494Sandrew/* 2334281494Sandrew * pmap_zero_page zeros the specified hardware page by mapping 2335281494Sandrew * the page into KVM and using bzero to clear its contents. 2336281494Sandrew */ 2337281494Sandrewvoid 2338281494Sandrewpmap_zero_page(vm_page_t m) 2339281494Sandrew{ 2340281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2341281494Sandrew 2342281494Sandrew pagezero((void *)va); 2343281494Sandrew} 2344281494Sandrew 2345281494Sandrew/* 2346281494Sandrew * pmap_zero_page_area zeros the specified hardware page by mapping 2347281494Sandrew * the page into KVM and using bzero to clear its contents. 2348281494Sandrew * 2349281494Sandrew * off and size may not cover an area beyond a single hardware page. 2350281494Sandrew */ 2351281494Sandrewvoid 2352281494Sandrewpmap_zero_page_area(vm_page_t m, int off, int size) 2353281494Sandrew{ 2354281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2355281494Sandrew 2356281494Sandrew if (off == 0 && size == PAGE_SIZE) 2357281494Sandrew pagezero((void *)va); 2358281494Sandrew else 2359281494Sandrew bzero((char *)va + off, size); 2360281494Sandrew} 2361281494Sandrew 2362281494Sandrew/* 2363281494Sandrew * pmap_zero_page_idle zeros the specified hardware page by mapping 2364281494Sandrew * the page into KVM and using bzero to clear its contents. This 2365281494Sandrew * is intended to be called from the vm_pagezero process only and 2366281494Sandrew * outside of Giant. 2367281494Sandrew */ 2368281494Sandrewvoid 2369281494Sandrewpmap_zero_page_idle(vm_page_t m) 2370281494Sandrew{ 2371281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2372281494Sandrew 2373281494Sandrew pagezero((void *)va); 2374281494Sandrew} 2375281494Sandrew 2376281494Sandrew/* 2377281494Sandrew * pmap_copy_page copies the specified (machine independent) 2378281494Sandrew * page by mapping the page into virtual memory and using 2379281494Sandrew * bcopy to copy the page, one machine dependent page at a 2380281494Sandrew * time. 2381281494Sandrew */ 2382281494Sandrewvoid 2383281494Sandrewpmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2384281494Sandrew{ 2385281494Sandrew vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2386281494Sandrew vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2387281494Sandrew 2388281494Sandrew pagecopy((void *)src, (void *)dst); 2389281494Sandrew} 2390281494Sandrew 2391281494Sandrewint unmapped_buf_allowed = 1; 2392281494Sandrew 2393281494Sandrewvoid 2394281494Sandrewpmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2395281494Sandrew vm_offset_t b_offset, int xfersize) 2396281494Sandrew{ 2397281494Sandrew void *a_cp, *b_cp; 2398281494Sandrew vm_page_t m_a, m_b; 2399281494Sandrew vm_paddr_t p_a, p_b; 2400281494Sandrew vm_offset_t a_pg_offset, b_pg_offset; 2401281494Sandrew int cnt; 2402281494Sandrew 2403281494Sandrew while (xfersize > 0) { 2404281494Sandrew a_pg_offset = a_offset & PAGE_MASK; 2405281494Sandrew m_a = ma[a_offset >> PAGE_SHIFT]; 2406281494Sandrew p_a = m_a->phys_addr; 2407281494Sandrew b_pg_offset = b_offset & PAGE_MASK; 2408281494Sandrew m_b = mb[b_offset >> PAGE_SHIFT]; 2409281494Sandrew p_b = m_b->phys_addr; 2410281494Sandrew cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2411281494Sandrew cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2412281494Sandrew if (__predict_false(!PHYS_IN_DMAP(p_a))) { 2413281494Sandrew panic("!DMAP a %lx", p_a); 2414281494Sandrew } else { 2415281494Sandrew a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 2416281494Sandrew } 2417281494Sandrew if (__predict_false(!PHYS_IN_DMAP(p_b))) { 2418281494Sandrew panic("!DMAP b %lx", p_b); 2419281494Sandrew } else { 2420281494Sandrew b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 2421281494Sandrew } 2422281494Sandrew bcopy(a_cp, b_cp, cnt); 2423281494Sandrew a_offset += cnt; 2424281494Sandrew b_offset += cnt; 2425281494Sandrew xfersize -= cnt; 2426281494Sandrew } 2427281494Sandrew} 2428281494Sandrew 2429281494Sandrew/* 2430281494Sandrew * Returns true if the pmap's pv is one of the first 2431281494Sandrew * 16 pvs linked to from this page. This count may 2432281494Sandrew * be changed upwards or downwards in the future; it 2433281494Sandrew * is only necessary that true be returned for a small 2434281494Sandrew * subset of pmaps for proper page aging. 2435281494Sandrew */ 2436281494Sandrewboolean_t 2437281494Sandrewpmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2438281494Sandrew{ 2439281494Sandrew struct rwlock *lock; 2440281494Sandrew pv_entry_t pv; 2441281494Sandrew int loops = 0; 2442281494Sandrew boolean_t rv; 2443281494Sandrew 2444281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2445281494Sandrew ("pmap_page_exists_quick: page %p is not managed", m)); 2446281494Sandrew rv = FALSE; 2447281494Sandrew rw_rlock(&pvh_global_lock); 2448281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2449281494Sandrew rw_rlock(lock); 2450281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2451281494Sandrew if (PV_PMAP(pv) == pmap) { 2452281494Sandrew rv = TRUE; 2453281494Sandrew break; 2454281494Sandrew } 2455281494Sandrew loops++; 2456281494Sandrew if (loops >= 16) 2457281494Sandrew break; 2458281494Sandrew } 2459281494Sandrew rw_runlock(lock); 2460281494Sandrew rw_runlock(&pvh_global_lock); 2461281494Sandrew return (rv); 2462281494Sandrew} 2463281494Sandrew 2464281494Sandrew/* 2465281494Sandrew * pmap_page_wired_mappings: 2466281494Sandrew * 2467281494Sandrew * Return the number of managed mappings to the given physical page 2468281494Sandrew * that are wired. 2469281494Sandrew */ 2470281494Sandrewint 2471281494Sandrewpmap_page_wired_mappings(vm_page_t m) 2472281494Sandrew{ 2473281494Sandrew struct rwlock *lock; 2474281494Sandrew pmap_t pmap; 2475281494Sandrew pt_entry_t *l3; 2476281494Sandrew pv_entry_t pv; 2477281494Sandrew int count, md_gen; 2478281494Sandrew 2479281494Sandrew if ((m->oflags & VPO_UNMANAGED) != 0) 2480281494Sandrew return (0); 2481281494Sandrew rw_rlock(&pvh_global_lock); 2482281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2483281494Sandrew rw_rlock(lock); 2484281494Sandrewrestart: 2485281494Sandrew count = 0; 2486281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2487281494Sandrew pmap = PV_PMAP(pv); 2488281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 2489281494Sandrew md_gen = m->md.pv_gen; 2490281494Sandrew rw_runlock(lock); 2491281494Sandrew PMAP_LOCK(pmap); 2492281494Sandrew rw_rlock(lock); 2493281494Sandrew if (md_gen != m->md.pv_gen) { 2494281494Sandrew PMAP_UNLOCK(pmap); 2495281494Sandrew goto restart; 2496281494Sandrew } 2497281494Sandrew } 2498281494Sandrew l3 = pmap_l3(pmap, pv->pv_va); 2499281494Sandrew if (l3 != NULL && (*l3 & ATTR_SW_WIRED) != 0) 2500281494Sandrew count++; 2501281494Sandrew PMAP_UNLOCK(pmap); 2502281494Sandrew } 2503281494Sandrew rw_runlock(lock); 2504281494Sandrew rw_runlock(&pvh_global_lock); 2505281494Sandrew return (count); 2506281494Sandrew} 2507281494Sandrew 2508281494Sandrew/* 2509281494Sandrew * Destroy all managed, non-wired mappings in the given user-space 2510281494Sandrew * pmap. This pmap cannot be active on any processor besides the 2511281494Sandrew * caller. 2512281494Sandrew * 2513281494Sandrew * This function cannot be applied to the kernel pmap. Moreover, it 2514281494Sandrew * is not intended for general use. It is only to be used during 2515281494Sandrew * process termination. Consequently, it can be implemented in ways 2516281494Sandrew * that make it faster than pmap_remove(). First, it can more quickly 2517281494Sandrew * destroy mappings by iterating over the pmap's collection of PV 2518281494Sandrew * entries, rather than searching the page table. Second, it doesn't 2519281494Sandrew * have to test and clear the page table entries atomically, because 2520281494Sandrew * no processor is currently accessing the user address space. In 2521281494Sandrew * particular, a page table entry's dirty bit won't change state once 2522281494Sandrew * this function starts. 2523281494Sandrew */ 2524281494Sandrewvoid 2525281494Sandrewpmap_remove_pages(pmap_t pmap) 2526281494Sandrew{ 2527281494Sandrew pd_entry_t ptepde, *l2; 2528281494Sandrew pt_entry_t *l3, tl3; 2529281494Sandrew struct spglist free; 2530281494Sandrew vm_page_t m; 2531281494Sandrew pv_entry_t pv; 2532281494Sandrew struct pv_chunk *pc, *npc; 2533281494Sandrew struct rwlock *lock; 2534281494Sandrew int64_t bit; 2535281494Sandrew uint64_t inuse, bitmask; 2536281494Sandrew int allfree, field, freed, idx; 2537281494Sandrew vm_paddr_t pa; 2538281494Sandrew 2539281494Sandrew lock = NULL; 2540281494Sandrew 2541281494Sandrew SLIST_INIT(&free); 2542281494Sandrew rw_rlock(&pvh_global_lock); 2543281494Sandrew PMAP_LOCK(pmap); 2544281494Sandrew TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2545281494Sandrew allfree = 1; 2546281494Sandrew freed = 0; 2547281494Sandrew for (field = 0; field < _NPCM; field++) { 2548281494Sandrew inuse = ~pc->pc_map[field] & pc_freemask[field]; 2549281494Sandrew while (inuse != 0) { 2550281494Sandrew bit = ffsl(inuse) - 1; 2551281494Sandrew bitmask = 1UL << bit; 2552281494Sandrew idx = field * 64 + bit; 2553281494Sandrew pv = &pc->pc_pventry[idx]; 2554281494Sandrew inuse &= ~bitmask; 2555281494Sandrew 2556281494Sandrew l2 = pmap_l2(pmap, pv->pv_va); 2557281494Sandrew ptepde = pmap_load(l2); 2558281494Sandrew l3 = pmap_l2_to_l3(l2, pv->pv_va); 2559281494Sandrew tl3 = pmap_load(l3); 2560281494Sandrew 2561281494Sandrew/* 2562281494Sandrew * We cannot remove wired pages from a process' mapping at this time 2563281494Sandrew */ 2564281494Sandrew if (tl3 & ATTR_SW_WIRED) { 2565281494Sandrew allfree = 0; 2566281494Sandrew continue; 2567281494Sandrew } 2568281494Sandrew 2569281494Sandrew pa = tl3 & ~ATTR_MASK; 2570281494Sandrew 2571281494Sandrew m = PHYS_TO_VM_PAGE(pa); 2572281494Sandrew KASSERT(m->phys_addr == pa, 2573281494Sandrew ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2574281494Sandrew m, (uintmax_t)m->phys_addr, 2575281494Sandrew (uintmax_t)tl3)); 2576281494Sandrew 2577281494Sandrew KASSERT((m->flags & PG_FICTITIOUS) != 0 || 2578281494Sandrew m < &vm_page_array[vm_page_array_size], 2579281494Sandrew ("pmap_remove_pages: bad l3 %#jx", 2580281494Sandrew (uintmax_t)tl3)); 2581281494Sandrew 2582281494Sandrew if (pmap_is_current(pmap) && 2583281494Sandrew pmap_l3_valid_cacheable(pmap_load(l3))) 2584281494Sandrew cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2585281494Sandrew pmap_load_clear(l3); 2586281494Sandrew PTE_SYNC(l3); 2587281494Sandrew 2588281494Sandrew /* 2589281494Sandrew * Update the vm_page_t clean/reference bits. 2590281494Sandrew */ 2591281494Sandrew if ((tl3 & ATTR_AP_RW_BIT) == 2592281494Sandrew ATTR_AP(ATTR_AP_RW)) 2593281494Sandrew vm_page_dirty(m); 2594281494Sandrew 2595281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 2596281494Sandrew 2597281494Sandrew /* Mark free */ 2598281494Sandrew pc->pc_map[field] |= bitmask; 2599281494Sandrew 2600281494Sandrew pmap_resident_count_dec(pmap, 1); 2601281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2602281494Sandrew m->md.pv_gen++; 2603281494Sandrew 2604281494Sandrew pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); 2605281494Sandrew freed++; 2606281494Sandrew } 2607281494Sandrew } 2608281494Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 2609281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 2610281494Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 2611281494Sandrew if (allfree) { 2612281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2613281494Sandrew free_pv_chunk(pc); 2614281494Sandrew } 2615281494Sandrew } 2616281494Sandrew pmap_invalidate_all(pmap); 2617281494Sandrew if (lock != NULL) 2618281494Sandrew rw_wunlock(lock); 2619281494Sandrew rw_runlock(&pvh_global_lock); 2620281494Sandrew PMAP_UNLOCK(pmap); 2621281494Sandrew pmap_free_zero_pages(&free); 2622281494Sandrew} 2623281494Sandrew 2624281494Sandrew/* 2625281494Sandrew * This is used to check if a page has been accessed or modified. As we 2626281494Sandrew * don't have a bit to see if it has been modified we have to assume it 2627281494Sandrew * has been if the page is read/write. 2628281494Sandrew */ 2629281494Sandrewstatic boolean_t 2630281494Sandrewpmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 2631281494Sandrew{ 2632281494Sandrew struct rwlock *lock; 2633281494Sandrew pv_entry_t pv; 2634281494Sandrew pt_entry_t *l3, mask, value; 2635281494Sandrew pmap_t pmap; 2636281494Sandrew int md_gen; 2637281494Sandrew boolean_t rv; 2638281494Sandrew 2639281494Sandrew rv = FALSE; 2640281494Sandrew rw_rlock(&pvh_global_lock); 2641281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2642281494Sandrew rw_rlock(lock); 2643281494Sandrewrestart: 2644281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2645281494Sandrew pmap = PV_PMAP(pv); 2646281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 2647281494Sandrew md_gen = m->md.pv_gen; 2648281494Sandrew rw_runlock(lock); 2649281494Sandrew PMAP_LOCK(pmap); 2650281494Sandrew rw_rlock(lock); 2651281494Sandrew if (md_gen != m->md.pv_gen) { 2652281494Sandrew PMAP_UNLOCK(pmap); 2653281494Sandrew goto restart; 2654281494Sandrew } 2655281494Sandrew } 2656281494Sandrew l3 = pmap_l3(pmap, pv->pv_va); 2657281494Sandrew mask = 0; 2658281494Sandrew value = 0; 2659281494Sandrew if (modified) { 2660281494Sandrew mask |= ATTR_AP_RW_BIT; 2661281494Sandrew value |= ATTR_AP(ATTR_AP_RW); 2662281494Sandrew } 2663281494Sandrew if (accessed) { 2664281494Sandrew mask |= ATTR_AF | ATTR_DESCR_MASK; 2665281494Sandrew value |= ATTR_AF | L3_PAGE; 2666281494Sandrew } 2667281494Sandrew rv = (pmap_load(l3) & mask) == value; 2668281494Sandrew PMAP_UNLOCK(pmap); 2669281494Sandrew if (rv) 2670281494Sandrew goto out; 2671281494Sandrew } 2672281494Sandrewout: 2673281494Sandrew rw_runlock(lock); 2674281494Sandrew rw_runlock(&pvh_global_lock); 2675281494Sandrew return (rv); 2676281494Sandrew} 2677281494Sandrew 2678281494Sandrew/* 2679281494Sandrew * pmap_is_modified: 2680281494Sandrew * 2681281494Sandrew * Return whether or not the specified physical page was modified 2682281494Sandrew * in any physical maps. 2683281494Sandrew */ 2684281494Sandrewboolean_t 2685281494Sandrewpmap_is_modified(vm_page_t m) 2686281494Sandrew{ 2687281494Sandrew 2688281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2689281494Sandrew ("pmap_is_modified: page %p is not managed", m)); 2690281494Sandrew 2691281494Sandrew /* 2692281494Sandrew * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2693281494Sandrew * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2694281494Sandrew * is clear, no PTEs can have PG_M set. 2695281494Sandrew */ 2696281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 2697281494Sandrew if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2698281494Sandrew return (FALSE); 2699281494Sandrew return (pmap_page_test_mappings(m, FALSE, TRUE)); 2700281494Sandrew} 2701281494Sandrew 2702281494Sandrew/* 2703281494Sandrew * pmap_is_prefaultable: 2704281494Sandrew * 2705281494Sandrew * Return whether or not the specified virtual address is eligible 2706281494Sandrew * for prefault. 2707281494Sandrew */ 2708281494Sandrewboolean_t 2709281494Sandrewpmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2710281494Sandrew{ 2711281494Sandrew pt_entry_t *l3; 2712281494Sandrew boolean_t rv; 2713281494Sandrew 2714281494Sandrew rv = FALSE; 2715281494Sandrew PMAP_LOCK(pmap); 2716281494Sandrew l3 = pmap_l3(pmap, addr); 2717281494Sandrew if (l3 != NULL && *l3 != 0) { 2718281494Sandrew rv = TRUE; 2719281494Sandrew } 2720281494Sandrew PMAP_UNLOCK(pmap); 2721281494Sandrew return (rv); 2722281494Sandrew} 2723281494Sandrew 2724281494Sandrew/* 2725281494Sandrew * pmap_is_referenced: 2726281494Sandrew * 2727281494Sandrew * Return whether or not the specified physical page was referenced 2728281494Sandrew * in any physical maps. 2729281494Sandrew */ 2730281494Sandrewboolean_t 2731281494Sandrewpmap_is_referenced(vm_page_t m) 2732281494Sandrew{ 2733281494Sandrew 2734281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2735281494Sandrew ("pmap_is_referenced: page %p is not managed", m)); 2736281494Sandrew return (pmap_page_test_mappings(m, TRUE, FALSE)); 2737281494Sandrew} 2738281494Sandrew 2739281494Sandrew/* 2740281494Sandrew * Clear the write and modified bits in each of the given page's mappings. 2741281494Sandrew */ 2742281494Sandrewvoid 2743281494Sandrewpmap_remove_write(vm_page_t m) 2744281494Sandrew{ 2745281494Sandrew pmap_t pmap; 2746281494Sandrew struct rwlock *lock; 2747281494Sandrew pv_entry_t pv; 2748281494Sandrew pt_entry_t *l3, oldl3; 2749281494Sandrew int md_gen; 2750281494Sandrew 2751281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2752281494Sandrew ("pmap_remove_write: page %p is not managed", m)); 2753281494Sandrew 2754281494Sandrew /* 2755281494Sandrew * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2756281494Sandrew * set by another thread while the object is locked. Thus, 2757281494Sandrew * if PGA_WRITEABLE is clear, no page table entries need updating. 2758281494Sandrew */ 2759281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 2760281494Sandrew if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2761281494Sandrew return; 2762281494Sandrew rw_rlock(&pvh_global_lock); 2763281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2764281494Sandrewretry_pv_loop: 2765281494Sandrew rw_wlock(lock); 2766281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2767281494Sandrew pmap = PV_PMAP(pv); 2768281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 2769281494Sandrew md_gen = m->md.pv_gen; 2770281494Sandrew rw_wunlock(lock); 2771281494Sandrew PMAP_LOCK(pmap); 2772281494Sandrew rw_wlock(lock); 2773281494Sandrew if (md_gen != m->md.pv_gen) { 2774281494Sandrew PMAP_UNLOCK(pmap); 2775281494Sandrew rw_wunlock(lock); 2776281494Sandrew goto retry_pv_loop; 2777281494Sandrew } 2778281494Sandrew } 2779281494Sandrew l3 = pmap_l3(pmap, pv->pv_va); 2780281494Sandrewretry: 2781281494Sandrew oldl3 = *l3; 2782281494Sandrew if ((oldl3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 2783281494Sandrew if (!atomic_cmpset_long(l3, oldl3, 2784281494Sandrew oldl3 | ATTR_AP(ATTR_AP_RO))) 2785281494Sandrew goto retry; 2786281494Sandrew if ((oldl3 & ATTR_AF) != 0) 2787281494Sandrew vm_page_dirty(m); 2788281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 2789281494Sandrew } 2790281494Sandrew PMAP_UNLOCK(pmap); 2791281494Sandrew } 2792281494Sandrew rw_wunlock(lock); 2793281494Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 2794281494Sandrew rw_runlock(&pvh_global_lock); 2795281494Sandrew} 2796281494Sandrew 2797281494Sandrewstatic __inline boolean_t 2798281494Sandrewsafe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 2799281494Sandrew{ 2800281494Sandrew 2801281494Sandrew return (FALSE); 2802281494Sandrew} 2803281494Sandrew 2804281494Sandrew#define PMAP_TS_REFERENCED_MAX 5 2805281494Sandrew 2806281494Sandrew/* 2807281494Sandrew * pmap_ts_referenced: 2808281494Sandrew * 2809281494Sandrew * Return a count of reference bits for a page, clearing those bits. 2810281494Sandrew * It is not necessary for every reference bit to be cleared, but it 2811281494Sandrew * is necessary that 0 only be returned when there are truly no 2812281494Sandrew * reference bits set. 2813281494Sandrew * 2814281494Sandrew * XXX: The exact number of bits to check and clear is a matter that 2815281494Sandrew * should be tested and standardized at some point in the future for 2816281494Sandrew * optimal aging of shared pages. 2817281494Sandrew */ 2818281494Sandrewint 2819281494Sandrewpmap_ts_referenced(vm_page_t m) 2820281494Sandrew{ 2821281494Sandrew pv_entry_t pv, pvf; 2822281494Sandrew pmap_t pmap; 2823281494Sandrew struct rwlock *lock; 2824281494Sandrew pd_entry_t *l2; 2825281494Sandrew pt_entry_t *l3; 2826281494Sandrew vm_paddr_t pa; 2827281494Sandrew int cleared, md_gen, not_cleared; 2828281494Sandrew struct spglist free; 2829281494Sandrew 2830281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2831281494Sandrew ("pmap_ts_referenced: page %p is not managed", m)); 2832281494Sandrew SLIST_INIT(&free); 2833281494Sandrew cleared = 0; 2834281494Sandrew pa = VM_PAGE_TO_PHYS(m); 2835281494Sandrew lock = PHYS_TO_PV_LIST_LOCK(pa); 2836281494Sandrew rw_rlock(&pvh_global_lock); 2837281494Sandrew rw_wlock(lock); 2838281494Sandrewretry: 2839281494Sandrew not_cleared = 0; 2840281494Sandrew if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 2841281494Sandrew goto out; 2842281494Sandrew pv = pvf; 2843281494Sandrew do { 2844281494Sandrew if (pvf == NULL) 2845281494Sandrew pvf = pv; 2846281494Sandrew pmap = PV_PMAP(pv); 2847281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 2848281494Sandrew md_gen = m->md.pv_gen; 2849281494Sandrew rw_wunlock(lock); 2850281494Sandrew PMAP_LOCK(pmap); 2851281494Sandrew rw_wlock(lock); 2852281494Sandrew if (md_gen != m->md.pv_gen) { 2853281494Sandrew PMAP_UNLOCK(pmap); 2854281494Sandrew goto retry; 2855281494Sandrew } 2856281494Sandrew } 2857281494Sandrew l2 = pmap_l2(pmap, pv->pv_va); 2858281494Sandrew KASSERT((*l2 & ATTR_DESCR_MASK) == L2_TABLE, 2859281494Sandrew ("pmap_ts_referenced: found an invalid l2 table")); 2860281494Sandrew l3 = pmap_l2_to_l3(l2, pv->pv_va); 2861281494Sandrew if ((*l3 & ATTR_AF) != 0) { 2862281494Sandrew if (safe_to_clear_referenced(pmap, *l3)) { 2863281494Sandrew /* 2864281494Sandrew * TODO: We don't handle the access flag 2865281494Sandrew * at all. We need to be able to set it in 2866281494Sandrew * the exception handler. 2867281494Sandrew */ 2868281494Sandrew panic("TODO: safe_to_clear_referenced\n"); 2869281494Sandrew } else if ((*l3 & ATTR_SW_WIRED) == 0) { 2870281494Sandrew /* 2871281494Sandrew * Wired pages cannot be paged out so 2872281494Sandrew * doing accessed bit emulation for 2873281494Sandrew * them is wasted effort. We do the 2874281494Sandrew * hard work for unwired pages only. 2875281494Sandrew */ 2876281494Sandrew pmap_remove_l3(pmap, l3, pv->pv_va, 2877281494Sandrew *l2, &free, &lock); 2878281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 2879281494Sandrew cleared++; 2880281494Sandrew if (pvf == pv) 2881281494Sandrew pvf = NULL; 2882281494Sandrew pv = NULL; 2883281494Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 2884281494Sandrew ("inconsistent pv lock %p %p for page %p", 2885281494Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 2886281494Sandrew } else 2887281494Sandrew not_cleared++; 2888281494Sandrew } 2889281494Sandrew PMAP_UNLOCK(pmap); 2890281494Sandrew /* Rotate the PV list if it has more than one entry. */ 2891281494Sandrew if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 2892281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2893281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2894281494Sandrew m->md.pv_gen++; 2895281494Sandrew } 2896281494Sandrew } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 2897281494Sandrew not_cleared < PMAP_TS_REFERENCED_MAX); 2898281494Sandrewout: 2899281494Sandrew rw_wunlock(lock); 2900281494Sandrew rw_runlock(&pvh_global_lock); 2901281494Sandrew pmap_free_zero_pages(&free); 2902281494Sandrew return (cleared + not_cleared); 2903281494Sandrew} 2904281494Sandrew 2905281494Sandrew/* 2906281494Sandrew * Apply the given advice to the specified range of addresses within the 2907281494Sandrew * given pmap. Depending on the advice, clear the referenced and/or 2908281494Sandrew * modified flags in each mapping and set the mapped page's dirty field. 2909281494Sandrew */ 2910281494Sandrewvoid 2911281494Sandrewpmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 2912281494Sandrew{ 2913281494Sandrew} 2914281494Sandrew 2915281494Sandrew/* 2916281494Sandrew * Clear the modify bits on the specified physical page. 2917281494Sandrew */ 2918281494Sandrewvoid 2919281494Sandrewpmap_clear_modify(vm_page_t m) 2920281494Sandrew{ 2921281494Sandrew 2922281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2923281494Sandrew ("pmap_clear_modify: page %p is not managed", m)); 2924281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 2925281494Sandrew KASSERT(!vm_page_xbusied(m), 2926281494Sandrew ("pmap_clear_modify: page %p is exclusive busied", m)); 2927281494Sandrew 2928281494Sandrew /* 2929281494Sandrew * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 2930281494Sandrew * If the object containing the page is locked and the page is not 2931281494Sandrew * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 2932281494Sandrew */ 2933281494Sandrew if ((m->aflags & PGA_WRITEABLE) == 0) 2934281494Sandrew return; 2935281846Sandrew 2936281846Sandrew /* TODO: We lack support for tracking if a page is modified */ 2937281494Sandrew} 2938281494Sandrew 2939282221Sandrewvoid * 2940282221Sandrewpmap_mapbios(vm_paddr_t pa, vm_size_t size) 2941282221Sandrew{ 2942282221Sandrew 2943282221Sandrew return ((void *)PHYS_TO_DMAP(pa)); 2944282221Sandrew} 2945282221Sandrew 2946282221Sandrewvoid 2947282221Sandrewpmap_unmapbios(vm_paddr_t pa, vm_size_t size) 2948282221Sandrew{ 2949282221Sandrew} 2950282221Sandrew 2951281494Sandrew/* 2952281494Sandrew * Sets the memory attribute for the specified page. 2953281494Sandrew */ 2954281494Sandrewvoid 2955281494Sandrewpmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 2956281494Sandrew{ 2957281494Sandrew 2958281494Sandrew panic("pmap_page_set_memattr"); 2959281494Sandrew} 2960281494Sandrew 2961281494Sandrew/* 2962281494Sandrew * perform the pmap work for mincore 2963281494Sandrew */ 2964281494Sandrewint 2965281494Sandrewpmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 2966281494Sandrew{ 2967281494Sandrew 2968281494Sandrew panic("pmap_mincore"); 2969281494Sandrew} 2970281494Sandrew 2971281494Sandrewvoid 2972281494Sandrewpmap_activate(struct thread *td) 2973281494Sandrew{ 2974281494Sandrew pmap_t pmap; 2975281494Sandrew 2976281494Sandrew critical_enter(); 2977281494Sandrew pmap = vmspace_pmap(td->td_proc->p_vmspace); 2978281494Sandrew td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); 2979281494Sandrew __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l1addr)); 2980281494Sandrew critical_exit(); 2981281494Sandrew} 2982281494Sandrew 2983281494Sandrewvoid 2984281494Sandrewpmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 2985281494Sandrew{ 2986281494Sandrew 2987281494Sandrew panic("pmap_sync_icache"); 2988281494Sandrew} 2989281494Sandrew 2990281494Sandrew/* 2991281494Sandrew * Increase the starting virtual address of the given mapping if a 2992281494Sandrew * different alignment might result in more superpage mappings. 2993281494Sandrew */ 2994281494Sandrewvoid 2995281494Sandrewpmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 2996281494Sandrew vm_offset_t *addr, vm_size_t size) 2997281494Sandrew{ 2998281494Sandrew} 2999281494Sandrew 3000281494Sandrew/** 3001281494Sandrew * Get the kernel virtual address of a set of physical pages. If there are 3002281494Sandrew * physical addresses not covered by the DMAP perform a transient mapping 3003281494Sandrew * that will be removed when calling pmap_unmap_io_transient. 3004281494Sandrew * 3005281494Sandrew * \param page The pages the caller wishes to obtain the virtual 3006281494Sandrew * address on the kernel memory map. 3007281494Sandrew * \param vaddr On return contains the kernel virtual memory address 3008281494Sandrew * of the pages passed in the page parameter. 3009281494Sandrew * \param count Number of pages passed in. 3010281494Sandrew * \param can_fault TRUE if the thread using the mapped pages can take 3011281494Sandrew * page faults, FALSE otherwise. 3012281494Sandrew * 3013281494Sandrew * \returns TRUE if the caller must call pmap_unmap_io_transient when 3014281494Sandrew * finished or FALSE otherwise. 3015281494Sandrew * 3016281494Sandrew */ 3017281494Sandrewboolean_t 3018281494Sandrewpmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3019281494Sandrew boolean_t can_fault) 3020281494Sandrew{ 3021281494Sandrew vm_paddr_t paddr; 3022281494Sandrew boolean_t needs_mapping; 3023281494Sandrew int error, i; 3024281494Sandrew 3025281494Sandrew /* 3026281494Sandrew * Allocate any KVA space that we need, this is done in a separate 3027281494Sandrew * loop to prevent calling vmem_alloc while pinned. 3028281494Sandrew */ 3029281494Sandrew needs_mapping = FALSE; 3030281494Sandrew for (i = 0; i < count; i++) { 3031281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 3032281494Sandrew if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { 3033281494Sandrew error = vmem_alloc(kernel_arena, PAGE_SIZE, 3034281494Sandrew M_BESTFIT | M_WAITOK, &vaddr[i]); 3035281494Sandrew KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 3036281494Sandrew needs_mapping = TRUE; 3037281494Sandrew } else { 3038281494Sandrew vaddr[i] = PHYS_TO_DMAP(paddr); 3039281494Sandrew } 3040281494Sandrew } 3041281494Sandrew 3042281494Sandrew /* Exit early if everything is covered by the DMAP */ 3043281494Sandrew if (!needs_mapping) 3044281494Sandrew return (FALSE); 3045281494Sandrew 3046281494Sandrew /* 3047281494Sandrew * NB: The sequence of updating a page table followed by accesses 3048281494Sandrew * to the corresponding pages used in the !DMAP case is subject to 3049281494Sandrew * the situation described in the "AMD64 Architecture Programmer's 3050281494Sandrew * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special 3051281494Sandrew * Coherency Considerations". Therefore, issuing the INVLPG right 3052281494Sandrew * after modifying the PTE bits is crucial. 3053281494Sandrew */ 3054281494Sandrew if (!can_fault) 3055281494Sandrew sched_pin(); 3056281494Sandrew for (i = 0; i < count; i++) { 3057281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 3058281494Sandrew if (paddr >= DMAP_MAX_PHYSADDR) { 3059281494Sandrew panic( 3060281494Sandrew "pmap_map_io_transient: TODO: Map out of DMAP data"); 3061281494Sandrew } 3062281494Sandrew } 3063281494Sandrew 3064281494Sandrew return (needs_mapping); 3065281494Sandrew} 3066281494Sandrew 3067281494Sandrewvoid 3068281494Sandrewpmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3069281494Sandrew boolean_t can_fault) 3070281494Sandrew{ 3071281494Sandrew vm_paddr_t paddr; 3072281494Sandrew int i; 3073281494Sandrew 3074281494Sandrew if (!can_fault) 3075281494Sandrew sched_unpin(); 3076281494Sandrew for (i = 0; i < count; i++) { 3077281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 3078281494Sandrew if (paddr >= DMAP_MAX_PHYSADDR) { 3079281494Sandrew panic("pmap_unmap_io_transient: TODO: Unmap data"); 3080281494Sandrew } 3081281494Sandrew } 3082281494Sandrew} 3083