pmap.c revision 325238
1281494Sandrew/*- 2281494Sandrew * Copyright (c) 1991 Regents of the University of California. 3281494Sandrew * All rights reserved. 4281494Sandrew * Copyright (c) 1994 John S. Dyson 5281494Sandrew * All rights reserved. 6281494Sandrew * Copyright (c) 1994 David Greenman 7281494Sandrew * All rights reserved. 8281494Sandrew * Copyright (c) 2003 Peter Wemm 9281494Sandrew * All rights reserved. 10281494Sandrew * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11281494Sandrew * All rights reserved. 12281494Sandrew * Copyright (c) 2014 Andrew Turner 13281494Sandrew * All rights reserved. 14297446Sandrew * Copyright (c) 2014-2016 The FreeBSD Foundation 15281494Sandrew * All rights reserved. 16281494Sandrew * 17281494Sandrew * This code is derived from software contributed to Berkeley by 18281494Sandrew * the Systems Programming Group of the University of Utah Computer 19281494Sandrew * Science Department and William Jolitz of UUNET Technologies Inc. 20281494Sandrew * 21281494Sandrew * This software was developed by Andrew Turner under sponsorship from 22281494Sandrew * the FreeBSD Foundation. 23281494Sandrew * 24281494Sandrew * Redistribution and use in source and binary forms, with or without 25281494Sandrew * modification, are permitted provided that the following conditions 26281494Sandrew * are met: 27281494Sandrew * 1. Redistributions of source code must retain the above copyright 28281494Sandrew * notice, this list of conditions and the following disclaimer. 29281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright 30281494Sandrew * notice, this list of conditions and the following disclaimer in the 31281494Sandrew * documentation and/or other materials provided with the distribution. 32281494Sandrew * 3. All advertising materials mentioning features or use of this software 33281494Sandrew * must display the following acknowledgement: 34281494Sandrew * This product includes software developed by the University of 35281494Sandrew * California, Berkeley and its contributors. 36281494Sandrew * 4. Neither the name of the University nor the names of its contributors 37281494Sandrew * may be used to endorse or promote products derived from this software 38281494Sandrew * without specific prior written permission. 39281494Sandrew * 40281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43281494Sandrew * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50281494Sandrew * SUCH DAMAGE. 51281494Sandrew * 52281494Sandrew * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53281494Sandrew */ 54281494Sandrew/*- 55281494Sandrew * Copyright (c) 2003 Networks Associates Technology, Inc. 56281494Sandrew * All rights reserved. 57281494Sandrew * 58281494Sandrew * This software was developed for the FreeBSD Project by Jake Burkholder, 59281494Sandrew * Safeport Network Services, and Network Associates Laboratories, the 60281494Sandrew * Security Research Division of Network Associates, Inc. under 61281494Sandrew * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62281494Sandrew * CHATS research program. 63281494Sandrew * 64281494Sandrew * Redistribution and use in source and binary forms, with or without 65281494Sandrew * modification, are permitted provided that the following conditions 66281494Sandrew * are met: 67281494Sandrew * 1. Redistributions of source code must retain the above copyright 68281494Sandrew * notice, this list of conditions and the following disclaimer. 69281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright 70281494Sandrew * notice, this list of conditions and the following disclaimer in the 71281494Sandrew * documentation and/or other materials provided with the distribution. 72281494Sandrew * 73281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76281494Sandrew * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83281494Sandrew * SUCH DAMAGE. 84281494Sandrew */ 85281494Sandrew 86281494Sandrew#include <sys/cdefs.h> 87281494Sandrew__FBSDID("$FreeBSD: stable/11/sys/arm64/arm64/pmap.c 325238 2017-10-31 15:06:26Z markj $"); 88281494Sandrew 89281494Sandrew/* 90281494Sandrew * Manages physical address maps. 91281494Sandrew * 92281494Sandrew * Since the information managed by this module is 93281494Sandrew * also stored by the logical address mapping module, 94281494Sandrew * this module may throw away valid virtual-to-physical 95281494Sandrew * mappings at almost any time. However, invalidations 96281494Sandrew * of virtual-to-physical mappings must be done as 97281494Sandrew * requested. 98281494Sandrew * 99281494Sandrew * In order to cope with hardware architectures which 100281494Sandrew * make virtual-to-physical map invalidates expensive, 101281494Sandrew * this module may delay invalidate or reduced protection 102281494Sandrew * operations until such time as they are actually 103281494Sandrew * necessary. This module is given full information as 104281494Sandrew * to which processors are currently using which maps, 105281494Sandrew * and to when physical maps must be made correct. 106281494Sandrew */ 107281494Sandrew 108325238Smarkj#include "opt_vm.h" 109325238Smarkj 110281494Sandrew#include <sys/param.h> 111305882Sandrew#include <sys/bitstring.h> 112281494Sandrew#include <sys/bus.h> 113281494Sandrew#include <sys/systm.h> 114281494Sandrew#include <sys/kernel.h> 115281494Sandrew#include <sys/ktr.h> 116281494Sandrew#include <sys/lock.h> 117281494Sandrew#include <sys/malloc.h> 118281494Sandrew#include <sys/mman.h> 119281494Sandrew#include <sys/msgbuf.h> 120281494Sandrew#include <sys/mutex.h> 121281494Sandrew#include <sys/proc.h> 122281494Sandrew#include <sys/rwlock.h> 123281494Sandrew#include <sys/sx.h> 124281494Sandrew#include <sys/vmem.h> 125281494Sandrew#include <sys/vmmeter.h> 126281494Sandrew#include <sys/sched.h> 127281494Sandrew#include <sys/sysctl.h> 128281494Sandrew#include <sys/_unrhdr.h> 129281494Sandrew#include <sys/smp.h> 130281494Sandrew 131281494Sandrew#include <vm/vm.h> 132281494Sandrew#include <vm/vm_param.h> 133281494Sandrew#include <vm/vm_kern.h> 134281494Sandrew#include <vm/vm_page.h> 135281494Sandrew#include <vm/vm_map.h> 136281494Sandrew#include <vm/vm_object.h> 137281494Sandrew#include <vm/vm_extern.h> 138281494Sandrew#include <vm/vm_pageout.h> 139281494Sandrew#include <vm/vm_pager.h> 140305882Sandrew#include <vm/vm_phys.h> 141281494Sandrew#include <vm/vm_radix.h> 142281494Sandrew#include <vm/vm_reserv.h> 143281494Sandrew#include <vm/uma.h> 144281494Sandrew 145281494Sandrew#include <machine/machdep.h> 146281494Sandrew#include <machine/md_var.h> 147281494Sandrew#include <machine/pcb.h> 148281494Sandrew 149297446Sandrew#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) 150297446Sandrew#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) 151297446Sandrew#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) 152297446Sandrew#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) 153281494Sandrew 154297446Sandrew#define NUL0E L0_ENTRIES 155297446Sandrew#define NUL1E (NUL0E * NL1PG) 156297446Sandrew#define NUL2E (NUL1E * NL2PG) 157297446Sandrew 158281494Sandrew#if !defined(DIAGNOSTIC) 159281494Sandrew#ifdef __GNUC_GNU_INLINE__ 160281494Sandrew#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 161281494Sandrew#else 162281494Sandrew#define PMAP_INLINE extern inline 163281494Sandrew#endif 164281494Sandrew#else 165281494Sandrew#define PMAP_INLINE 166281494Sandrew#endif 167281494Sandrew 168281494Sandrew/* 169281494Sandrew * These are configured by the mair_el1 register. This is set up in locore.S 170281494Sandrew */ 171281494Sandrew#define DEVICE_MEMORY 0 172281494Sandrew#define UNCACHED_MEMORY 1 173281494Sandrew#define CACHED_MEMORY 2 174281494Sandrew 175281494Sandrew 176281494Sandrew#ifdef PV_STATS 177281494Sandrew#define PV_STAT(x) do { x ; } while (0) 178281494Sandrew#else 179281494Sandrew#define PV_STAT(x) do { } while (0) 180281494Sandrew#endif 181281494Sandrew 182281494Sandrew#define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 183305882Sandrew#define pa_to_pvh(pa) (&pv_table[pmap_l2_pindex(pa)]) 184281494Sandrew 185281494Sandrew#define NPV_LIST_LOCKS MAXCPU 186281494Sandrew 187281494Sandrew#define PHYS_TO_PV_LIST_LOCK(pa) \ 188281494Sandrew (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 189281494Sandrew 190281494Sandrew#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 191281494Sandrew struct rwlock **_lockp = (lockp); \ 192281494Sandrew struct rwlock *_new_lock; \ 193281494Sandrew \ 194281494Sandrew _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 195281494Sandrew if (_new_lock != *_lockp) { \ 196281494Sandrew if (*_lockp != NULL) \ 197281494Sandrew rw_wunlock(*_lockp); \ 198281494Sandrew *_lockp = _new_lock; \ 199281494Sandrew rw_wlock(*_lockp); \ 200281494Sandrew } \ 201281494Sandrew} while (0) 202281494Sandrew 203281494Sandrew#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 204281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 205281494Sandrew 206281494Sandrew#define RELEASE_PV_LIST_LOCK(lockp) do { \ 207281494Sandrew struct rwlock **_lockp = (lockp); \ 208281494Sandrew \ 209281494Sandrew if (*_lockp != NULL) { \ 210281494Sandrew rw_wunlock(*_lockp); \ 211281494Sandrew *_lockp = NULL; \ 212281494Sandrew } \ 213281494Sandrew} while (0) 214281494Sandrew 215281494Sandrew#define VM_PAGE_TO_PV_LIST_LOCK(m) \ 216281494Sandrew PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 217281494Sandrew 218281494Sandrewstruct pmap kernel_pmap_store; 219281494Sandrew 220281494Sandrewvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 221281494Sandrewvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 222281494Sandrewvm_offset_t kernel_vm_end = 0; 223281494Sandrew 224281494Sandrewstruct msgbuf *msgbufp = NULL; 225281494Sandrew 226305882Sandrew/* 227305882Sandrew * Data for the pv entry allocation mechanism. 228305882Sandrew * Updates to pv_invl_gen are protected by the pv_list_locks[] 229305882Sandrew * elements, but reads are not. 230305882Sandrew */ 231305882Sandrewstatic struct md_page *pv_table; 232305882Sandrewstatic struct md_page pv_dummy; 233305882Sandrew 234291246Sandrewvm_paddr_t dmap_phys_base; /* The start of the dmap region */ 235297958Sandrewvm_paddr_t dmap_phys_max; /* The limit of the dmap region */ 236297958Sandrewvm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ 237291246Sandrew 238297914Sandrew/* This code assumes all L1 DMAP entries will be used */ 239297914SandrewCTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS); 240297914SandrewCTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); 241297914Sandrew 242297914Sandrew#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) 243297914Sandrewextern pt_entry_t pagetable_dmap[]; 244297914Sandrew 245305882Sandrewstatic SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 246305882Sandrew 247305882Sandrewstatic int superpages_enabled = 0; 248305882SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, 249305882Sandrew CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, 250305882Sandrew "Are large page mappings enabled?"); 251305882Sandrew 252281494Sandrew/* 253281494Sandrew * Data for the pv entry allocation mechanism 254281494Sandrew */ 255281494Sandrewstatic TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 256281494Sandrewstatic struct mtx pv_chunks_mutex; 257281494Sandrewstatic struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 258281494Sandrew 259281494Sandrewstatic void free_pv_chunk(struct pv_chunk *pc); 260281494Sandrewstatic void free_pv_entry(pmap_t pmap, pv_entry_t pv); 261281494Sandrewstatic pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 262281494Sandrewstatic vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 263281494Sandrewstatic void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 264281494Sandrewstatic pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 265281494Sandrew vm_offset_t va); 266305882Sandrew 267305882Sandrewstatic int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode); 268305882Sandrewstatic int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); 269305882Sandrewstatic pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); 270305882Sandrewstatic pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, 271305882Sandrew vm_offset_t va, struct rwlock **lockp); 272305882Sandrewstatic pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); 273281494Sandrewstatic vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 274281494Sandrew vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 275281494Sandrewstatic int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 276281494Sandrew pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 277281494Sandrewstatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 278281494Sandrew vm_page_t m, struct rwlock **lockp); 279281494Sandrew 280281494Sandrewstatic vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 281281494Sandrew struct rwlock **lockp); 282281494Sandrew 283281494Sandrewstatic void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 284281494Sandrew struct spglist *free); 285281494Sandrewstatic int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 286281494Sandrew 287288445Sandrew/* 288288445Sandrew * These load the old table data and store the new value. 289288445Sandrew * They need to be atomic as the System MMU may write to the table at 290288445Sandrew * the same time as the CPU. 291288445Sandrew */ 292288445Sandrew#define pmap_load_store(table, entry) atomic_swap_64(table, entry) 293288445Sandrew#define pmap_set(table, mask) atomic_set_64(table, mask) 294288445Sandrew#define pmap_load_clear(table) atomic_swap_64(table, 0) 295288445Sandrew#define pmap_load(table) (*table) 296288445Sandrew 297281494Sandrew/********************/ 298281494Sandrew/* Inline functions */ 299281494Sandrew/********************/ 300281494Sandrew 301281494Sandrewstatic __inline void 302281494Sandrewpagecopy(void *s, void *d) 303281494Sandrew{ 304281494Sandrew 305281494Sandrew memcpy(d, s, PAGE_SIZE); 306281494Sandrew} 307281494Sandrew 308297446Sandrew#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK) 309281494Sandrew#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 310281494Sandrew#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 311281494Sandrew#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 312281494Sandrew 313281494Sandrewstatic __inline pd_entry_t * 314297446Sandrewpmap_l0(pmap_t pmap, vm_offset_t va) 315297446Sandrew{ 316297446Sandrew 317297446Sandrew return (&pmap->pm_l0[pmap_l0_index(va)]); 318297446Sandrew} 319297446Sandrew 320297446Sandrewstatic __inline pd_entry_t * 321297446Sandrewpmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) 322297446Sandrew{ 323297446Sandrew pd_entry_t *l1; 324297446Sandrew 325297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 326297446Sandrew return (&l1[pmap_l1_index(va)]); 327297446Sandrew} 328297446Sandrew 329297446Sandrewstatic __inline pd_entry_t * 330281494Sandrewpmap_l1(pmap_t pmap, vm_offset_t va) 331281494Sandrew{ 332297446Sandrew pd_entry_t *l0; 333281494Sandrew 334297446Sandrew l0 = pmap_l0(pmap, va); 335297446Sandrew if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) 336297446Sandrew return (NULL); 337297446Sandrew 338297446Sandrew return (pmap_l0_to_l1(l0, va)); 339281494Sandrew} 340281494Sandrew 341281494Sandrewstatic __inline pd_entry_t * 342281494Sandrewpmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 343281494Sandrew{ 344281494Sandrew pd_entry_t *l2; 345281494Sandrew 346288445Sandrew l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 347281494Sandrew return (&l2[pmap_l2_index(va)]); 348281494Sandrew} 349281494Sandrew 350281494Sandrewstatic __inline pd_entry_t * 351281494Sandrewpmap_l2(pmap_t pmap, vm_offset_t va) 352281494Sandrew{ 353281494Sandrew pd_entry_t *l1; 354281494Sandrew 355281494Sandrew l1 = pmap_l1(pmap, va); 356288445Sandrew if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) 357281494Sandrew return (NULL); 358281494Sandrew 359281494Sandrew return (pmap_l1_to_l2(l1, va)); 360281494Sandrew} 361281494Sandrew 362281494Sandrewstatic __inline pt_entry_t * 363281494Sandrewpmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 364281494Sandrew{ 365281494Sandrew pt_entry_t *l3; 366281494Sandrew 367288445Sandrew l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); 368281494Sandrew return (&l3[pmap_l3_index(va)]); 369281494Sandrew} 370281494Sandrew 371297446Sandrew/* 372297446Sandrew * Returns the lowest valid pde for a given virtual address. 373297446Sandrew * The next level may or may not point to a valid page or block. 374297446Sandrew */ 375297446Sandrewstatic __inline pd_entry_t * 376297446Sandrewpmap_pde(pmap_t pmap, vm_offset_t va, int *level) 377297446Sandrew{ 378297446Sandrew pd_entry_t *l0, *l1, *l2, desc; 379297446Sandrew 380297446Sandrew l0 = pmap_l0(pmap, va); 381297446Sandrew desc = pmap_load(l0) & ATTR_DESCR_MASK; 382297446Sandrew if (desc != L0_TABLE) { 383297446Sandrew *level = -1; 384297446Sandrew return (NULL); 385297446Sandrew } 386297446Sandrew 387297446Sandrew l1 = pmap_l0_to_l1(l0, va); 388297446Sandrew desc = pmap_load(l1) & ATTR_DESCR_MASK; 389297446Sandrew if (desc != L1_TABLE) { 390297446Sandrew *level = 0; 391297446Sandrew return (l0); 392297446Sandrew } 393297446Sandrew 394297446Sandrew l2 = pmap_l1_to_l2(l1, va); 395297446Sandrew desc = pmap_load(l2) & ATTR_DESCR_MASK; 396297446Sandrew if (desc != L2_TABLE) { 397297446Sandrew *level = 1; 398297446Sandrew return (l1); 399297446Sandrew } 400297446Sandrew 401297446Sandrew *level = 2; 402297446Sandrew return (l2); 403297446Sandrew} 404297446Sandrew 405297446Sandrew/* 406297446Sandrew * Returns the lowest valid pte block or table entry for a given virtual 407297446Sandrew * address. If there are no valid entries return NULL and set the level to 408297446Sandrew * the first invalid level. 409297446Sandrew */ 410281494Sandrewstatic __inline pt_entry_t * 411297446Sandrewpmap_pte(pmap_t pmap, vm_offset_t va, int *level) 412281494Sandrew{ 413297446Sandrew pd_entry_t *l1, *l2, desc; 414297446Sandrew pt_entry_t *l3; 415281494Sandrew 416297446Sandrew l1 = pmap_l1(pmap, va); 417297446Sandrew if (l1 == NULL) { 418297446Sandrew *level = 0; 419281494Sandrew return (NULL); 420297446Sandrew } 421297446Sandrew desc = pmap_load(l1) & ATTR_DESCR_MASK; 422297446Sandrew if (desc == L1_BLOCK) { 423297446Sandrew *level = 1; 424297446Sandrew return (l1); 425297446Sandrew } 426281494Sandrew 427297446Sandrew if (desc != L1_TABLE) { 428297446Sandrew *level = 1; 429297446Sandrew return (NULL); 430297446Sandrew } 431297446Sandrew 432297446Sandrew l2 = pmap_l1_to_l2(l1, va); 433297446Sandrew desc = pmap_load(l2) & ATTR_DESCR_MASK; 434297446Sandrew if (desc == L2_BLOCK) { 435297446Sandrew *level = 2; 436297446Sandrew return (l2); 437297446Sandrew } 438297446Sandrew 439297446Sandrew if (desc != L2_TABLE) { 440297446Sandrew *level = 2; 441297446Sandrew return (NULL); 442297446Sandrew } 443297446Sandrew 444297446Sandrew *level = 3; 445297446Sandrew l3 = pmap_l2_to_l3(l2, va); 446297446Sandrew if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) 447297446Sandrew return (NULL); 448297446Sandrew 449297446Sandrew return (l3); 450281494Sandrew} 451281494Sandrew 452305882Sandrewstatic inline bool 453305882Sandrewpmap_superpages_enabled(void) 454305882Sandrew{ 455305882Sandrew 456305882Sandrew return (superpages_enabled != 0); 457305882Sandrew} 458305882Sandrew 459286956Sandrewbool 460297446Sandrewpmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, 461297446Sandrew pd_entry_t **l2, pt_entry_t **l3) 462286956Sandrew{ 463297446Sandrew pd_entry_t *l0p, *l1p, *l2p; 464286956Sandrew 465297446Sandrew if (pmap->pm_l0 == NULL) 466286956Sandrew return (false); 467286956Sandrew 468297446Sandrew l0p = pmap_l0(pmap, va); 469297446Sandrew *l0 = l0p; 470297446Sandrew 471297446Sandrew if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) 472297446Sandrew return (false); 473297446Sandrew 474297446Sandrew l1p = pmap_l0_to_l1(l0p, va); 475286956Sandrew *l1 = l1p; 476286956Sandrew 477288445Sandrew if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { 478286956Sandrew *l2 = NULL; 479286956Sandrew *l3 = NULL; 480286956Sandrew return (true); 481286956Sandrew } 482286956Sandrew 483288445Sandrew if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) 484286956Sandrew return (false); 485286956Sandrew 486286956Sandrew l2p = pmap_l1_to_l2(l1p, va); 487286956Sandrew *l2 = l2p; 488286956Sandrew 489288445Sandrew if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { 490286956Sandrew *l3 = NULL; 491286956Sandrew return (true); 492286956Sandrew } 493286956Sandrew 494286956Sandrew *l3 = pmap_l2_to_l3(l2p, va); 495286956Sandrew 496286956Sandrew return (true); 497286956Sandrew} 498286956Sandrew 499281494Sandrewstatic __inline int 500281494Sandrewpmap_is_current(pmap_t pmap) 501281494Sandrew{ 502281494Sandrew 503281494Sandrew return ((pmap == pmap_kernel()) || 504281494Sandrew (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 505281494Sandrew} 506281494Sandrew 507281494Sandrewstatic __inline int 508281494Sandrewpmap_l3_valid(pt_entry_t l3) 509281494Sandrew{ 510281494Sandrew 511281494Sandrew return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 512281494Sandrew} 513281494Sandrew 514305882Sandrew 515305882Sandrew/* Is a level 1 or 2entry a valid block and cacheable */ 516305882SandrewCTASSERT(L1_BLOCK == L2_BLOCK); 517281494Sandrewstatic __inline int 518305882Sandrewpmap_pte_valid_cacheable(pt_entry_t pte) 519305882Sandrew{ 520305882Sandrew 521305882Sandrew return (((pte & ATTR_DESCR_MASK) == L1_BLOCK) && 522305882Sandrew ((pte & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 523305882Sandrew} 524305882Sandrew 525305882Sandrewstatic __inline int 526281494Sandrewpmap_l3_valid_cacheable(pt_entry_t l3) 527281494Sandrew{ 528281494Sandrew 529281494Sandrew return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && 530281494Sandrew ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 531281494Sandrew} 532281494Sandrew 533281494Sandrew#define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 534281494Sandrew 535281494Sandrew/* 536281494Sandrew * Checks if the page is dirty. We currently lack proper tracking of this on 537281494Sandrew * arm64 so for now assume is a page mapped as rw was accessed it is. 538281494Sandrew */ 539281494Sandrewstatic inline int 540281494Sandrewpmap_page_dirty(pt_entry_t pte) 541281494Sandrew{ 542281494Sandrew 543281494Sandrew return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 544281494Sandrew (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 545281494Sandrew} 546281494Sandrew 547281494Sandrewstatic __inline void 548281494Sandrewpmap_resident_count_inc(pmap_t pmap, int count) 549281494Sandrew{ 550281494Sandrew 551281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 552281494Sandrew pmap->pm_stats.resident_count += count; 553281494Sandrew} 554281494Sandrew 555281494Sandrewstatic __inline void 556281494Sandrewpmap_resident_count_dec(pmap_t pmap, int count) 557281494Sandrew{ 558281494Sandrew 559281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 560281494Sandrew KASSERT(pmap->pm_stats.resident_count >= count, 561281494Sandrew ("pmap %p resident count underflow %ld %d", pmap, 562281494Sandrew pmap->pm_stats.resident_count, count)); 563281494Sandrew pmap->pm_stats.resident_count -= count; 564281494Sandrew} 565281494Sandrew 566281494Sandrewstatic pt_entry_t * 567281494Sandrewpmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 568281494Sandrew u_int *l2_slot) 569281494Sandrew{ 570281494Sandrew pt_entry_t *l2; 571281494Sandrew pd_entry_t *l1; 572281494Sandrew 573281494Sandrew l1 = (pd_entry_t *)l1pt; 574281494Sandrew *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 575281494Sandrew 576281494Sandrew /* Check locore has used a table L1 map */ 577281494Sandrew KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 578281494Sandrew ("Invalid bootstrap L1 table")); 579281494Sandrew /* Find the address of the L2 table */ 580281494Sandrew l2 = (pt_entry_t *)init_pt_va; 581281494Sandrew *l2_slot = pmap_l2_index(va); 582281494Sandrew 583281494Sandrew return (l2); 584281494Sandrew} 585281494Sandrew 586281494Sandrewstatic vm_paddr_t 587281494Sandrewpmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 588281494Sandrew{ 589281494Sandrew u_int l1_slot, l2_slot; 590281494Sandrew pt_entry_t *l2; 591281494Sandrew 592281494Sandrew l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 593281494Sandrew 594281494Sandrew return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 595281494Sandrew} 596281494Sandrew 597281494Sandrewstatic void 598297958Sandrewpmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) 599281494Sandrew{ 600281494Sandrew vm_offset_t va; 601281494Sandrew vm_paddr_t pa; 602281494Sandrew u_int l1_slot; 603281494Sandrew 604297958Sandrew pa = dmap_phys_base = min_pa & ~L1_OFFSET; 605281494Sandrew va = DMAP_MIN_ADDRESS; 606297958Sandrew for (; va < DMAP_MAX_ADDRESS && pa < max_pa; 607281494Sandrew pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 608297914Sandrew l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 609281494Sandrew 610297914Sandrew pmap_load_store(&pagetable_dmap[l1_slot], 611319203Sandrew (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_XN | 612285537Sandrew ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); 613281494Sandrew } 614281494Sandrew 615297958Sandrew /* Set the upper limit of the DMAP region */ 616297958Sandrew dmap_phys_max = pa; 617297958Sandrew dmap_max_addr = va; 618297958Sandrew 619297914Sandrew cpu_dcache_wb_range((vm_offset_t)pagetable_dmap, 620297914Sandrew PAGE_SIZE * DMAP_TABLES); 621281494Sandrew cpu_tlb_flushID(); 622281494Sandrew} 623281494Sandrew 624281494Sandrewstatic vm_offset_t 625281494Sandrewpmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 626281494Sandrew{ 627281494Sandrew vm_offset_t l2pt; 628281494Sandrew vm_paddr_t pa; 629281494Sandrew pd_entry_t *l1; 630281494Sandrew u_int l1_slot; 631281494Sandrew 632281494Sandrew KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 633281494Sandrew 634281494Sandrew l1 = (pd_entry_t *)l1pt; 635281494Sandrew l1_slot = pmap_l1_index(va); 636281494Sandrew l2pt = l2_start; 637281494Sandrew 638281494Sandrew for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 639281494Sandrew KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 640281494Sandrew 641281494Sandrew pa = pmap_early_vtophys(l1pt, l2pt); 642281494Sandrew pmap_load_store(&l1[l1_slot], 643281494Sandrew (pa & ~Ln_TABLE_MASK) | L1_TABLE); 644281494Sandrew l2pt += PAGE_SIZE; 645281494Sandrew } 646281494Sandrew 647281494Sandrew /* Clean the L2 page table */ 648281494Sandrew memset((void *)l2_start, 0, l2pt - l2_start); 649281494Sandrew cpu_dcache_wb_range(l2_start, l2pt - l2_start); 650281494Sandrew 651281494Sandrew /* Flush the l1 table to ram */ 652281494Sandrew cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 653281494Sandrew 654281494Sandrew return l2pt; 655281494Sandrew} 656281494Sandrew 657281494Sandrewstatic vm_offset_t 658281494Sandrewpmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 659281494Sandrew{ 660281494Sandrew vm_offset_t l2pt, l3pt; 661281494Sandrew vm_paddr_t pa; 662281494Sandrew pd_entry_t *l2; 663281494Sandrew u_int l2_slot; 664281494Sandrew 665281494Sandrew KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 666281494Sandrew 667281494Sandrew l2 = pmap_l2(kernel_pmap, va); 668298433Spfg l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE); 669281494Sandrew l2pt = (vm_offset_t)l2; 670281494Sandrew l2_slot = pmap_l2_index(va); 671281494Sandrew l3pt = l3_start; 672281494Sandrew 673281494Sandrew for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 674281494Sandrew KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 675281494Sandrew 676281494Sandrew pa = pmap_early_vtophys(l1pt, l3pt); 677281494Sandrew pmap_load_store(&l2[l2_slot], 678281494Sandrew (pa & ~Ln_TABLE_MASK) | L2_TABLE); 679281494Sandrew l3pt += PAGE_SIZE; 680281494Sandrew } 681281494Sandrew 682281494Sandrew /* Clean the L2 page table */ 683281494Sandrew memset((void *)l3_start, 0, l3pt - l3_start); 684281494Sandrew cpu_dcache_wb_range(l3_start, l3pt - l3_start); 685281494Sandrew 686281494Sandrew cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 687281494Sandrew 688281494Sandrew return l3pt; 689281494Sandrew} 690281494Sandrew 691281494Sandrew/* 692281494Sandrew * Bootstrap the system enough to run with virtual memory. 693281494Sandrew */ 694281494Sandrewvoid 695297446Sandrewpmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, 696297446Sandrew vm_size_t kernlen) 697281494Sandrew{ 698281494Sandrew u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 699281494Sandrew uint64_t kern_delta; 700281494Sandrew pt_entry_t *l2; 701281494Sandrew vm_offset_t va, freemempos; 702281494Sandrew vm_offset_t dpcpu, msgbufpv; 703297958Sandrew vm_paddr_t pa, max_pa, min_pa; 704291246Sandrew int i; 705281494Sandrew 706281494Sandrew kern_delta = KERNBASE - kernstart; 707281494Sandrew physmem = 0; 708281494Sandrew 709281494Sandrew printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 710281494Sandrew printf("%lx\n", l1pt); 711281494Sandrew printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 712281494Sandrew 713281494Sandrew /* Set this early so we can use the pagetable walking functions */ 714297446Sandrew kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; 715281494Sandrew PMAP_LOCK_INIT(kernel_pmap); 716281494Sandrew 717291246Sandrew /* Assume the address we were loaded to is a valid physical address */ 718297958Sandrew min_pa = max_pa = KERNBASE - kern_delta; 719291246Sandrew 720291246Sandrew /* 721291246Sandrew * Find the minimum physical address. physmap is sorted, 722291246Sandrew * but may contain empty ranges. 723291246Sandrew */ 724291246Sandrew for (i = 0; i < (physmap_idx * 2); i += 2) { 725291246Sandrew if (physmap[i] == physmap[i + 1]) 726291246Sandrew continue; 727291246Sandrew if (physmap[i] <= min_pa) 728291246Sandrew min_pa = physmap[i]; 729297958Sandrew if (physmap[i + 1] > max_pa) 730297958Sandrew max_pa = physmap[i + 1]; 731291246Sandrew } 732291246Sandrew 733281494Sandrew /* Create a direct map region early so we can use it for pa -> va */ 734297958Sandrew pmap_bootstrap_dmap(l1pt, min_pa, max_pa); 735281494Sandrew 736281494Sandrew va = KERNBASE; 737281494Sandrew pa = KERNBASE - kern_delta; 738281494Sandrew 739281494Sandrew /* 740281494Sandrew * Start to initialise phys_avail by copying from physmap 741281494Sandrew * up to the physical address KERNBASE points at. 742281494Sandrew */ 743281494Sandrew map_slot = avail_slot = 0; 744295157Sandrew for (; map_slot < (physmap_idx * 2) && 745295157Sandrew avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) { 746281494Sandrew if (physmap[map_slot] == physmap[map_slot + 1]) 747281494Sandrew continue; 748281494Sandrew 749281494Sandrew if (physmap[map_slot] <= pa && 750281494Sandrew physmap[map_slot + 1] > pa) 751281494Sandrew break; 752281494Sandrew 753281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 754281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 755281494Sandrew physmem += (phys_avail[avail_slot + 1] - 756281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 757281494Sandrew avail_slot += 2; 758281494Sandrew } 759281494Sandrew 760281494Sandrew /* Add the memory before the kernel */ 761295157Sandrew if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) { 762281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 763281494Sandrew phys_avail[avail_slot + 1] = pa; 764281494Sandrew physmem += (phys_avail[avail_slot + 1] - 765281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 766281494Sandrew avail_slot += 2; 767281494Sandrew } 768281494Sandrew used_map_slot = map_slot; 769281494Sandrew 770281494Sandrew /* 771281494Sandrew * Read the page table to find out what is already mapped. 772281494Sandrew * This assumes we have mapped a block of memory from KERNBASE 773281494Sandrew * using a single L1 entry. 774281494Sandrew */ 775281494Sandrew l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 776281494Sandrew 777281494Sandrew /* Sanity check the index, KERNBASE should be the first VA */ 778281494Sandrew KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 779281494Sandrew 780281494Sandrew /* Find how many pages we have mapped */ 781281494Sandrew for (; l2_slot < Ln_ENTRIES; l2_slot++) { 782281494Sandrew if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 783281494Sandrew break; 784281494Sandrew 785281494Sandrew /* Check locore used L2 blocks */ 786281494Sandrew KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 787281494Sandrew ("Invalid bootstrap L2 table")); 788281494Sandrew KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 789281494Sandrew ("Incorrect PA in L2 table")); 790281494Sandrew 791281494Sandrew va += L2_SIZE; 792281494Sandrew pa += L2_SIZE; 793281494Sandrew } 794281494Sandrew 795281494Sandrew va = roundup2(va, L1_SIZE); 796281494Sandrew 797281494Sandrew freemempos = KERNBASE + kernlen; 798281494Sandrew freemempos = roundup2(freemempos, PAGE_SIZE); 799281494Sandrew /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 800281494Sandrew freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 801281494Sandrew /* And the l3 tables for the early devmap */ 802281494Sandrew freemempos = pmap_bootstrap_l3(l1pt, 803281494Sandrew VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 804281494Sandrew 805281494Sandrew cpu_tlb_flushID(); 806281494Sandrew 807281494Sandrew#define alloc_pages(var, np) \ 808281494Sandrew (var) = freemempos; \ 809281494Sandrew freemempos += (np * PAGE_SIZE); \ 810281494Sandrew memset((char *)(var), 0, ((np) * PAGE_SIZE)); 811281494Sandrew 812281494Sandrew /* Allocate dynamic per-cpu area. */ 813281494Sandrew alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 814281494Sandrew dpcpu_init((void *)dpcpu, 0); 815281494Sandrew 816281494Sandrew /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 817281494Sandrew alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 818281494Sandrew msgbufp = (void *)msgbufpv; 819281494Sandrew 820281494Sandrew virtual_avail = roundup2(freemempos, L1_SIZE); 821281494Sandrew virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 822281494Sandrew kernel_vm_end = virtual_avail; 823305531Sandrew 824281494Sandrew pa = pmap_early_vtophys(l1pt, freemempos); 825281494Sandrew 826281494Sandrew /* Finish initialising physmap */ 827281494Sandrew map_slot = used_map_slot; 828281494Sandrew for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 829281494Sandrew map_slot < (physmap_idx * 2); map_slot += 2) { 830281494Sandrew if (physmap[map_slot] == physmap[map_slot + 1]) 831281494Sandrew continue; 832281494Sandrew 833281494Sandrew /* Have we used the current range? */ 834281494Sandrew if (physmap[map_slot + 1] <= pa) 835281494Sandrew continue; 836281494Sandrew 837281494Sandrew /* Do we need to split the entry? */ 838281494Sandrew if (physmap[map_slot] < pa) { 839281494Sandrew phys_avail[avail_slot] = pa; 840281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 841281494Sandrew } else { 842281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 843281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 844281494Sandrew } 845281494Sandrew physmem += (phys_avail[avail_slot + 1] - 846281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 847281494Sandrew 848281494Sandrew avail_slot += 2; 849281494Sandrew } 850281494Sandrew phys_avail[avail_slot] = 0; 851281494Sandrew phys_avail[avail_slot + 1] = 0; 852281494Sandrew 853281494Sandrew /* 854281494Sandrew * Maxmem isn't the "maximum memory", it's one larger than the 855281494Sandrew * highest page of the physical address space. It should be 856281494Sandrew * called something like "Maxphyspage". 857281494Sandrew */ 858281494Sandrew Maxmem = atop(phys_avail[avail_slot - 1]); 859281494Sandrew 860281494Sandrew cpu_tlb_flushID(); 861281494Sandrew} 862281494Sandrew 863281494Sandrew/* 864281494Sandrew * Initialize a vm_page's machine-dependent fields. 865281494Sandrew */ 866281494Sandrewvoid 867281494Sandrewpmap_page_init(vm_page_t m) 868281494Sandrew{ 869281494Sandrew 870281494Sandrew TAILQ_INIT(&m->md.pv_list); 871281494Sandrew m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 872281494Sandrew} 873281494Sandrew 874281494Sandrew/* 875281494Sandrew * Initialize the pmap module. 876281494Sandrew * Called by vm_init, to initialize any structures that the pmap 877281494Sandrew * system needs to map virtual memory. 878281494Sandrew */ 879281494Sandrewvoid 880281494Sandrewpmap_init(void) 881281494Sandrew{ 882305882Sandrew vm_size_t s; 883305882Sandrew int i, pv_npg; 884281494Sandrew 885281494Sandrew /* 886305882Sandrew * Are large page mappings enabled? 887305882Sandrew */ 888305882Sandrew TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); 889305882Sandrew 890305882Sandrew /* 891281494Sandrew * Initialize the pv chunk list mutex. 892281494Sandrew */ 893281494Sandrew mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 894281494Sandrew 895281494Sandrew /* 896281494Sandrew * Initialize the pool of pv list locks. 897281494Sandrew */ 898281494Sandrew for (i = 0; i < NPV_LIST_LOCKS; i++) 899281494Sandrew rw_init(&pv_list_locks[i], "pmap pv list"); 900305882Sandrew 901305882Sandrew /* 902305882Sandrew * Calculate the size of the pv head table for superpages. 903305882Sandrew */ 904305882Sandrew pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); 905305882Sandrew 906305882Sandrew /* 907305882Sandrew * Allocate memory for the pv head table for superpages. 908305882Sandrew */ 909305882Sandrew s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 910305882Sandrew s = round_page(s); 911305882Sandrew pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, 912305882Sandrew M_WAITOK | M_ZERO); 913305882Sandrew for (i = 0; i < pv_npg; i++) 914305882Sandrew TAILQ_INIT(&pv_table[i].pv_list); 915305882Sandrew TAILQ_INIT(&pv_dummy.pv_list); 916281494Sandrew} 917281494Sandrew 918305882Sandrewstatic SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, 919305882Sandrew "2MB page mapping counters"); 920305882Sandrew 921305882Sandrewstatic u_long pmap_l2_demotions; 922305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, 923305882Sandrew &pmap_l2_demotions, 0, "2MB page demotions"); 924305882Sandrew 925305882Sandrewstatic u_long pmap_l2_p_failures; 926305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, 927305882Sandrew &pmap_l2_p_failures, 0, "2MB page promotion failures"); 928305882Sandrew 929305882Sandrewstatic u_long pmap_l2_promotions; 930305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, 931305882Sandrew &pmap_l2_promotions, 0, "2MB page promotions"); 932305882Sandrew 933281494Sandrew/* 934305540Sandrew * Invalidate a single TLB entry. 935281494Sandrew */ 936281494SandrewPMAP_INLINE void 937281494Sandrewpmap_invalidate_page(pmap_t pmap, vm_offset_t va) 938281494Sandrew{ 939281494Sandrew 940281494Sandrew sched_pin(); 941281494Sandrew __asm __volatile( 942305540Sandrew "dsb ishst \n" 943281494Sandrew "tlbi vaae1is, %0 \n" 944305540Sandrew "dsb ish \n" 945281494Sandrew "isb \n" 946281494Sandrew : : "r"(va >> PAGE_SHIFT)); 947281494Sandrew sched_unpin(); 948281494Sandrew} 949281494Sandrew 950281494SandrewPMAP_INLINE void 951281494Sandrewpmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 952281494Sandrew{ 953281494Sandrew vm_offset_t addr; 954281494Sandrew 955281494Sandrew sched_pin(); 956305540Sandrew dsb(ishst); 957296828Swma for (addr = sva; addr < eva; addr += PAGE_SIZE) { 958281494Sandrew __asm __volatile( 959296828Swma "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); 960281494Sandrew } 961281494Sandrew __asm __volatile( 962305540Sandrew "dsb ish \n" 963281494Sandrew "isb \n"); 964281494Sandrew sched_unpin(); 965281494Sandrew} 966281494Sandrew 967281494SandrewPMAP_INLINE void 968281494Sandrewpmap_invalidate_all(pmap_t pmap) 969281494Sandrew{ 970281494Sandrew 971281494Sandrew sched_pin(); 972281494Sandrew __asm __volatile( 973305540Sandrew "dsb ishst \n" 974281494Sandrew "tlbi vmalle1is \n" 975305540Sandrew "dsb ish \n" 976281494Sandrew "isb \n"); 977281494Sandrew sched_unpin(); 978281494Sandrew} 979281494Sandrew 980281494Sandrew/* 981281494Sandrew * Routine: pmap_extract 982281494Sandrew * Function: 983281494Sandrew * Extract the physical page address associated 984281494Sandrew * with the given map/virtual_address pair. 985281494Sandrew */ 986305531Sandrewvm_paddr_t 987281494Sandrewpmap_extract(pmap_t pmap, vm_offset_t va) 988281494Sandrew{ 989297446Sandrew pt_entry_t *pte, tpte; 990281494Sandrew vm_paddr_t pa; 991297446Sandrew int lvl; 992281494Sandrew 993281494Sandrew pa = 0; 994281494Sandrew PMAP_LOCK(pmap); 995281494Sandrew /* 996297446Sandrew * Find the block or page map for this virtual address. pmap_pte 997297446Sandrew * will return either a valid block/page entry, or NULL. 998281494Sandrew */ 999297446Sandrew pte = pmap_pte(pmap, va, &lvl); 1000297446Sandrew if (pte != NULL) { 1001297446Sandrew tpte = pmap_load(pte); 1002297446Sandrew pa = tpte & ~ATTR_MASK; 1003297446Sandrew switch(lvl) { 1004297446Sandrew case 1: 1005297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1006297446Sandrew ("pmap_extract: Invalid L1 pte found: %lx", 1007297446Sandrew tpte & ATTR_DESCR_MASK)); 1008297446Sandrew pa |= (va & L1_OFFSET); 1009297446Sandrew break; 1010297446Sandrew case 2: 1011297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1012297446Sandrew ("pmap_extract: Invalid L2 pte found: %lx", 1013297446Sandrew tpte & ATTR_DESCR_MASK)); 1014297446Sandrew pa |= (va & L2_OFFSET); 1015297446Sandrew break; 1016297446Sandrew case 3: 1017297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1018297446Sandrew ("pmap_extract: Invalid L3 pte found: %lx", 1019297446Sandrew tpte & ATTR_DESCR_MASK)); 1020297446Sandrew pa |= (va & L3_OFFSET); 1021297446Sandrew break; 1022297446Sandrew } 1023281494Sandrew } 1024281494Sandrew PMAP_UNLOCK(pmap); 1025281494Sandrew return (pa); 1026281494Sandrew} 1027281494Sandrew 1028281494Sandrew/* 1029281494Sandrew * Routine: pmap_extract_and_hold 1030281494Sandrew * Function: 1031281494Sandrew * Atomically extract and hold the physical page 1032281494Sandrew * with the given pmap and virtual address pair 1033281494Sandrew * if that mapping permits the given protection. 1034281494Sandrew */ 1035281494Sandrewvm_page_t 1036281494Sandrewpmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1037281494Sandrew{ 1038297446Sandrew pt_entry_t *pte, tpte; 1039305882Sandrew vm_offset_t off; 1040281494Sandrew vm_paddr_t pa; 1041281494Sandrew vm_page_t m; 1042297446Sandrew int lvl; 1043281494Sandrew 1044281494Sandrew pa = 0; 1045281494Sandrew m = NULL; 1046281494Sandrew PMAP_LOCK(pmap); 1047281494Sandrewretry: 1048297446Sandrew pte = pmap_pte(pmap, va, &lvl); 1049297446Sandrew if (pte != NULL) { 1050297446Sandrew tpte = pmap_load(pte); 1051297446Sandrew 1052297446Sandrew KASSERT(lvl > 0 && lvl <= 3, 1053297446Sandrew ("pmap_extract_and_hold: Invalid level %d", lvl)); 1054297446Sandrew CTASSERT(L1_BLOCK == L2_BLOCK); 1055297446Sandrew KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || 1056297446Sandrew (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), 1057297446Sandrew ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, 1058297446Sandrew tpte & ATTR_DESCR_MASK)); 1059297446Sandrew if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 1060281494Sandrew ((prot & VM_PROT_WRITE) == 0)) { 1061305882Sandrew switch(lvl) { 1062305882Sandrew case 1: 1063305882Sandrew off = va & L1_OFFSET; 1064305882Sandrew break; 1065305882Sandrew case 2: 1066305882Sandrew off = va & L2_OFFSET; 1067305882Sandrew break; 1068305882Sandrew case 3: 1069305882Sandrew default: 1070305882Sandrew off = 0; 1071305882Sandrew } 1072305882Sandrew if (vm_page_pa_tryrelock(pmap, 1073305882Sandrew (tpte & ~ATTR_MASK) | off, &pa)) 1074281494Sandrew goto retry; 1075305882Sandrew m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); 1076281494Sandrew vm_page_hold(m); 1077281494Sandrew } 1078281494Sandrew } 1079281494Sandrew PA_UNLOCK_COND(pa); 1080281494Sandrew PMAP_UNLOCK(pmap); 1081281494Sandrew return (m); 1082281494Sandrew} 1083281494Sandrew 1084281494Sandrewvm_paddr_t 1085281494Sandrewpmap_kextract(vm_offset_t va) 1086281494Sandrew{ 1087297446Sandrew pt_entry_t *pte, tpte; 1088281494Sandrew vm_paddr_t pa; 1089297446Sandrew int lvl; 1090281494Sandrew 1091281494Sandrew if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 1092281494Sandrew pa = DMAP_TO_PHYS(va); 1093281494Sandrew } else { 1094297446Sandrew pa = 0; 1095297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1096297446Sandrew if (pte != NULL) { 1097297446Sandrew tpte = pmap_load(pte); 1098297446Sandrew pa = tpte & ~ATTR_MASK; 1099297446Sandrew switch(lvl) { 1100297446Sandrew case 1: 1101297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1102297446Sandrew ("pmap_kextract: Invalid L1 pte found: %lx", 1103297446Sandrew tpte & ATTR_DESCR_MASK)); 1104297446Sandrew pa |= (va & L1_OFFSET); 1105297446Sandrew break; 1106297446Sandrew case 2: 1107297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1108297446Sandrew ("pmap_kextract: Invalid L2 pte found: %lx", 1109297446Sandrew tpte & ATTR_DESCR_MASK)); 1110297446Sandrew pa |= (va & L2_OFFSET); 1111297446Sandrew break; 1112297446Sandrew case 3: 1113297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1114297446Sandrew ("pmap_kextract: Invalid L3 pte found: %lx", 1115297446Sandrew tpte & ATTR_DESCR_MASK)); 1116297446Sandrew pa |= (va & L3_OFFSET); 1117297446Sandrew break; 1118297446Sandrew } 1119297446Sandrew } 1120281494Sandrew } 1121281494Sandrew return (pa); 1122281494Sandrew} 1123281494Sandrew 1124281494Sandrew/*************************************************** 1125281494Sandrew * Low level mapping routines..... 1126281494Sandrew ***************************************************/ 1127281494Sandrew 1128305542Sandrewstatic void 1129305542Sandrewpmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) 1130281494Sandrew{ 1131297446Sandrew pd_entry_t *pde; 1132319203Sandrew pt_entry_t *pte, attr; 1133285212Sandrew vm_offset_t va; 1134297446Sandrew int lvl; 1135281494Sandrew 1136281494Sandrew KASSERT((pa & L3_OFFSET) == 0, 1137305542Sandrew ("pmap_kenter: Invalid physical address")); 1138285212Sandrew KASSERT((sva & L3_OFFSET) == 0, 1139305542Sandrew ("pmap_kenter: Invalid virtual address")); 1140281494Sandrew KASSERT((size & PAGE_MASK) == 0, 1141305542Sandrew ("pmap_kenter: Mapping is not page-sized")); 1142281494Sandrew 1143319203Sandrew attr = ATTR_DEFAULT | ATTR_IDX(mode) | L3_PAGE; 1144319203Sandrew if (mode == DEVICE_MEMORY) 1145319203Sandrew attr |= ATTR_XN; 1146319203Sandrew 1147285212Sandrew va = sva; 1148281494Sandrew while (size != 0) { 1149297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 1150297446Sandrew KASSERT(pde != NULL, 1151305542Sandrew ("pmap_kenter: Invalid page entry, va: 0x%lx", va)); 1152305542Sandrew KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl)); 1153297446Sandrew 1154297446Sandrew pte = pmap_l2_to_l3(pde, va); 1155319203Sandrew pmap_load_store(pte, (pa & ~L3_OFFSET) | attr); 1156297446Sandrew PTE_SYNC(pte); 1157281494Sandrew 1158281494Sandrew va += PAGE_SIZE; 1159281494Sandrew pa += PAGE_SIZE; 1160281494Sandrew size -= PAGE_SIZE; 1161281494Sandrew } 1162285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1163281494Sandrew} 1164281494Sandrew 1165305542Sandrewvoid 1166305542Sandrewpmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 1167305542Sandrew{ 1168305542Sandrew 1169305542Sandrew pmap_kenter(sva, size, pa, DEVICE_MEMORY); 1170305542Sandrew} 1171305542Sandrew 1172281494Sandrew/* 1173281494Sandrew * Remove a page from the kernel pagetables. 1174281494Sandrew */ 1175281494SandrewPMAP_INLINE void 1176281494Sandrewpmap_kremove(vm_offset_t va) 1177281494Sandrew{ 1178297446Sandrew pt_entry_t *pte; 1179297446Sandrew int lvl; 1180281494Sandrew 1181297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1182297446Sandrew KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); 1183297446Sandrew KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); 1184281494Sandrew 1185297446Sandrew if (pmap_l3_valid_cacheable(pmap_load(pte))) 1186281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 1187297446Sandrew pmap_load_clear(pte); 1188297446Sandrew PTE_SYNC(pte); 1189285212Sandrew pmap_invalidate_page(kernel_pmap, va); 1190281494Sandrew} 1191281494Sandrew 1192281494Sandrewvoid 1193285212Sandrewpmap_kremove_device(vm_offset_t sva, vm_size_t size) 1194281494Sandrew{ 1195297446Sandrew pt_entry_t *pte; 1196285212Sandrew vm_offset_t va; 1197297446Sandrew int lvl; 1198281494Sandrew 1199285212Sandrew KASSERT((sva & L3_OFFSET) == 0, 1200281494Sandrew ("pmap_kremove_device: Invalid virtual address")); 1201281494Sandrew KASSERT((size & PAGE_MASK) == 0, 1202281494Sandrew ("pmap_kremove_device: Mapping is not page-sized")); 1203281494Sandrew 1204285212Sandrew va = sva; 1205281494Sandrew while (size != 0) { 1206297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1207297446Sandrew KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); 1208297446Sandrew KASSERT(lvl == 3, 1209297446Sandrew ("Invalid device pagetable level: %d != 3", lvl)); 1210297446Sandrew pmap_load_clear(pte); 1211297446Sandrew PTE_SYNC(pte); 1212281494Sandrew 1213281494Sandrew va += PAGE_SIZE; 1214281494Sandrew size -= PAGE_SIZE; 1215281494Sandrew } 1216285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1217281494Sandrew} 1218281494Sandrew 1219281494Sandrew/* 1220281494Sandrew * Used to map a range of physical addresses into kernel 1221281494Sandrew * virtual address space. 1222281494Sandrew * 1223281494Sandrew * The value passed in '*virt' is a suggested virtual address for 1224281494Sandrew * the mapping. Architectures which can support a direct-mapped 1225281494Sandrew * physical to virtual region can return the appropriate address 1226281494Sandrew * within that region, leaving '*virt' unchanged. Other 1227281494Sandrew * architectures should map the pages starting at '*virt' and 1228281494Sandrew * update '*virt' with the first usable address after the mapped 1229281494Sandrew * region. 1230281494Sandrew */ 1231281494Sandrewvm_offset_t 1232281494Sandrewpmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1233281494Sandrew{ 1234281494Sandrew return PHYS_TO_DMAP(start); 1235281494Sandrew} 1236281494Sandrew 1237281494Sandrew 1238281494Sandrew/* 1239281494Sandrew * Add a list of wired pages to the kva 1240281494Sandrew * this routine is only used for temporary 1241281494Sandrew * kernel mappings that do not need to have 1242281494Sandrew * page modification or references recorded. 1243281494Sandrew * Note that old mappings are simply written 1244281494Sandrew * over. The page *must* be wired. 1245281494Sandrew * Note: SMP coherent. Uses a ranged shootdown IPI. 1246281494Sandrew */ 1247281494Sandrewvoid 1248281494Sandrewpmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1249281494Sandrew{ 1250297446Sandrew pd_entry_t *pde; 1251297446Sandrew pt_entry_t *pte, pa; 1252281494Sandrew vm_offset_t va; 1253281494Sandrew vm_page_t m; 1254297446Sandrew int i, lvl; 1255281494Sandrew 1256281494Sandrew va = sva; 1257281494Sandrew for (i = 0; i < count; i++) { 1258297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 1259297446Sandrew KASSERT(pde != NULL, 1260297446Sandrew ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); 1261297446Sandrew KASSERT(lvl == 2, 1262297446Sandrew ("pmap_qenter: Invalid level %d", lvl)); 1263297446Sandrew 1264281494Sandrew m = ma[i]; 1265285537Sandrew pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | 1266285537Sandrew ATTR_IDX(m->md.pv_memattr) | L3_PAGE; 1267319203Sandrew if (m->md.pv_memattr == DEVICE_MEMORY) 1268319203Sandrew pa |= ATTR_XN; 1269297446Sandrew pte = pmap_l2_to_l3(pde, va); 1270297446Sandrew pmap_load_store(pte, pa); 1271297446Sandrew PTE_SYNC(pte); 1272281494Sandrew 1273281494Sandrew va += L3_SIZE; 1274281494Sandrew } 1275285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1276281494Sandrew} 1277281494Sandrew 1278281494Sandrew/* 1279281494Sandrew * This routine tears out page mappings from the 1280281494Sandrew * kernel -- it is meant only for temporary mappings. 1281281494Sandrew */ 1282281494Sandrewvoid 1283281494Sandrewpmap_qremove(vm_offset_t sva, int count) 1284281494Sandrew{ 1285297446Sandrew pt_entry_t *pte; 1286281494Sandrew vm_offset_t va; 1287297446Sandrew int lvl; 1288281494Sandrew 1289285212Sandrew KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1290285212Sandrew 1291281494Sandrew va = sva; 1292281494Sandrew while (count-- > 0) { 1293297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1294297446Sandrew KASSERT(lvl == 3, 1295297446Sandrew ("Invalid device pagetable level: %d != 3", lvl)); 1296297446Sandrew if (pte != NULL) { 1297297446Sandrew if (pmap_l3_valid_cacheable(pmap_load(pte))) 1298297446Sandrew cpu_dcache_wb_range(va, L3_SIZE); 1299297446Sandrew pmap_load_clear(pte); 1300297446Sandrew PTE_SYNC(pte); 1301297446Sandrew } 1302285212Sandrew 1303281494Sandrew va += PAGE_SIZE; 1304281494Sandrew } 1305281494Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1306281494Sandrew} 1307281494Sandrew 1308281494Sandrew/*************************************************** 1309281494Sandrew * Page table page management routines..... 1310281494Sandrew ***************************************************/ 1311281494Sandrewstatic __inline void 1312281494Sandrewpmap_free_zero_pages(struct spglist *free) 1313281494Sandrew{ 1314281494Sandrew vm_page_t m; 1315281494Sandrew 1316281494Sandrew while ((m = SLIST_FIRST(free)) != NULL) { 1317281494Sandrew SLIST_REMOVE_HEAD(free, plinks.s.ss); 1318281494Sandrew /* Preserve the page's PG_ZERO setting. */ 1319281494Sandrew vm_page_free_toq(m); 1320281494Sandrew } 1321281494Sandrew} 1322281494Sandrew 1323281494Sandrew/* 1324281494Sandrew * Schedule the specified unused page table page to be freed. Specifically, 1325281494Sandrew * add the page to the specified list of pages that will be released to the 1326281494Sandrew * physical memory manager after the TLB has been updated. 1327281494Sandrew */ 1328281494Sandrewstatic __inline void 1329281494Sandrewpmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1330281494Sandrew boolean_t set_PG_ZERO) 1331281494Sandrew{ 1332281494Sandrew 1333281494Sandrew if (set_PG_ZERO) 1334281494Sandrew m->flags |= PG_ZERO; 1335281494Sandrew else 1336281494Sandrew m->flags &= ~PG_ZERO; 1337281494Sandrew SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1338281494Sandrew} 1339305531Sandrew 1340281494Sandrew/* 1341281494Sandrew * Decrements a page table page's wire count, which is used to record the 1342281494Sandrew * number of valid page table entries within the page. If the wire count 1343281494Sandrew * drops to zero, then the page table page is unmapped. Returns TRUE if the 1344281494Sandrew * page table page was unmapped and FALSE otherwise. 1345281494Sandrew */ 1346281494Sandrewstatic inline boolean_t 1347281494Sandrewpmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1348281494Sandrew{ 1349281494Sandrew 1350281494Sandrew --m->wire_count; 1351281494Sandrew if (m->wire_count == 0) { 1352281494Sandrew _pmap_unwire_l3(pmap, va, m, free); 1353281494Sandrew return (TRUE); 1354281494Sandrew } else 1355281494Sandrew return (FALSE); 1356281494Sandrew} 1357281494Sandrew 1358281494Sandrewstatic void 1359281494Sandrew_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1360281494Sandrew{ 1361281494Sandrew 1362281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1363281494Sandrew /* 1364281494Sandrew * unmap the page table page 1365281494Sandrew */ 1366297446Sandrew if (m->pindex >= (NUL2E + NUL1E)) { 1367297446Sandrew /* l1 page */ 1368297446Sandrew pd_entry_t *l0; 1369297446Sandrew 1370297446Sandrew l0 = pmap_l0(pmap, va); 1371297446Sandrew pmap_load_clear(l0); 1372297446Sandrew PTE_SYNC(l0); 1373297446Sandrew } else if (m->pindex >= NUL2E) { 1374297446Sandrew /* l2 page */ 1375281494Sandrew pd_entry_t *l1; 1376297446Sandrew 1377281494Sandrew l1 = pmap_l1(pmap, va); 1378281494Sandrew pmap_load_clear(l1); 1379281494Sandrew PTE_SYNC(l1); 1380281494Sandrew } else { 1381297446Sandrew /* l3 page */ 1382281494Sandrew pd_entry_t *l2; 1383297446Sandrew 1384281494Sandrew l2 = pmap_l2(pmap, va); 1385281494Sandrew pmap_load_clear(l2); 1386281494Sandrew PTE_SYNC(l2); 1387281494Sandrew } 1388281494Sandrew pmap_resident_count_dec(pmap, 1); 1389297446Sandrew if (m->pindex < NUL2E) { 1390297446Sandrew /* We just released an l3, unhold the matching l2 */ 1391297446Sandrew pd_entry_t *l1, tl1; 1392297446Sandrew vm_page_t l2pg; 1393281494Sandrew 1394297446Sandrew l1 = pmap_l1(pmap, va); 1395297446Sandrew tl1 = pmap_load(l1); 1396297446Sandrew l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1397297446Sandrew pmap_unwire_l3(pmap, va, l2pg, free); 1398297446Sandrew } else if (m->pindex < (NUL2E + NUL1E)) { 1399297446Sandrew /* We just released an l2, unhold the matching l1 */ 1400297446Sandrew pd_entry_t *l0, tl0; 1401297446Sandrew vm_page_t l1pg; 1402297446Sandrew 1403297446Sandrew l0 = pmap_l0(pmap, va); 1404297446Sandrew tl0 = pmap_load(l0); 1405297446Sandrew l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1406297446Sandrew pmap_unwire_l3(pmap, va, l1pg, free); 1407281494Sandrew } 1408285212Sandrew pmap_invalidate_page(pmap, va); 1409281494Sandrew 1410281494Sandrew /* 1411281494Sandrew * This is a release store so that the ordinary store unmapping 1412281494Sandrew * the page table page is globally performed before TLB shoot- 1413281494Sandrew * down is begun. 1414281494Sandrew */ 1415281494Sandrew atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1416281494Sandrew 1417305531Sandrew /* 1418281494Sandrew * Put page on a list so that it is released after 1419281494Sandrew * *ALL* TLB shootdown is done 1420281494Sandrew */ 1421281494Sandrew pmap_add_delayed_free_list(m, free, TRUE); 1422281494Sandrew} 1423281494Sandrew 1424281494Sandrew/* 1425281494Sandrew * After removing an l3 entry, this routine is used to 1426281494Sandrew * conditionally free the page, and manage the hold/wire counts. 1427281494Sandrew */ 1428281494Sandrewstatic int 1429281494Sandrewpmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1430281494Sandrew struct spglist *free) 1431281494Sandrew{ 1432281494Sandrew vm_page_t mpte; 1433281494Sandrew 1434281494Sandrew if (va >= VM_MAXUSER_ADDRESS) 1435281494Sandrew return (0); 1436281494Sandrew KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1437281494Sandrew mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1438281494Sandrew return (pmap_unwire_l3(pmap, va, mpte, free)); 1439281494Sandrew} 1440281494Sandrew 1441281494Sandrewvoid 1442281494Sandrewpmap_pinit0(pmap_t pmap) 1443281494Sandrew{ 1444281494Sandrew 1445281494Sandrew PMAP_LOCK_INIT(pmap); 1446281494Sandrew bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1447297446Sandrew pmap->pm_l0 = kernel_pmap->pm_l0; 1448305882Sandrew pmap->pm_root.rt_root = 0; 1449281494Sandrew} 1450281494Sandrew 1451281494Sandrewint 1452281494Sandrewpmap_pinit(pmap_t pmap) 1453281494Sandrew{ 1454297446Sandrew vm_paddr_t l0phys; 1455297446Sandrew vm_page_t l0pt; 1456281494Sandrew 1457281494Sandrew /* 1458297446Sandrew * allocate the l0 page 1459281494Sandrew */ 1460297446Sandrew while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1461281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1462281494Sandrew VM_WAIT; 1463281494Sandrew 1464297446Sandrew l0phys = VM_PAGE_TO_PHYS(l0pt); 1465297446Sandrew pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); 1466281494Sandrew 1467297446Sandrew if ((l0pt->flags & PG_ZERO) == 0) 1468297446Sandrew pagezero(pmap->pm_l0); 1469281494Sandrew 1470305882Sandrew pmap->pm_root.rt_root = 0; 1471281494Sandrew bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1472281494Sandrew 1473281494Sandrew return (1); 1474281494Sandrew} 1475281494Sandrew 1476281494Sandrew/* 1477281494Sandrew * This routine is called if the desired page table page does not exist. 1478281494Sandrew * 1479281494Sandrew * If page table page allocation fails, this routine may sleep before 1480281494Sandrew * returning NULL. It sleeps only if a lock pointer was given. 1481281494Sandrew * 1482281494Sandrew * Note: If a page allocation fails at page table level two or three, 1483281494Sandrew * one or two pages may be held during the wait, only to be released 1484281494Sandrew * afterwards. This conservative approach is easily argued to avoid 1485281494Sandrew * race conditions. 1486281494Sandrew */ 1487281494Sandrewstatic vm_page_t 1488281494Sandrew_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1489281494Sandrew{ 1490297446Sandrew vm_page_t m, l1pg, l2pg; 1491281494Sandrew 1492281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1493281494Sandrew 1494281494Sandrew /* 1495281494Sandrew * Allocate a page table page. 1496281494Sandrew */ 1497281494Sandrew if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1498281494Sandrew VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1499281494Sandrew if (lockp != NULL) { 1500281494Sandrew RELEASE_PV_LIST_LOCK(lockp); 1501281494Sandrew PMAP_UNLOCK(pmap); 1502281494Sandrew VM_WAIT; 1503281494Sandrew PMAP_LOCK(pmap); 1504281494Sandrew } 1505281494Sandrew 1506281494Sandrew /* 1507281494Sandrew * Indicate the need to retry. While waiting, the page table 1508281494Sandrew * page may have been allocated. 1509281494Sandrew */ 1510281494Sandrew return (NULL); 1511281494Sandrew } 1512281494Sandrew if ((m->flags & PG_ZERO) == 0) 1513281494Sandrew pmap_zero_page(m); 1514281494Sandrew 1515281494Sandrew /* 1516281494Sandrew * Map the pagetable page into the process address space, if 1517281494Sandrew * it isn't already there. 1518281494Sandrew */ 1519281494Sandrew 1520297446Sandrew if (ptepindex >= (NUL2E + NUL1E)) { 1521297446Sandrew pd_entry_t *l0; 1522297446Sandrew vm_pindex_t l0index; 1523281494Sandrew 1524297446Sandrew l0index = ptepindex - (NUL2E + NUL1E); 1525297446Sandrew l0 = &pmap->pm_l0[l0index]; 1526297446Sandrew pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); 1527297446Sandrew PTE_SYNC(l0); 1528297446Sandrew } else if (ptepindex >= NUL2E) { 1529297446Sandrew vm_pindex_t l0index, l1index; 1530297446Sandrew pd_entry_t *l0, *l1; 1531297446Sandrew pd_entry_t tl0; 1532297446Sandrew 1533297446Sandrew l1index = ptepindex - NUL2E; 1534297446Sandrew l0index = l1index >> L0_ENTRIES_SHIFT; 1535297446Sandrew 1536297446Sandrew l0 = &pmap->pm_l0[l0index]; 1537297446Sandrew tl0 = pmap_load(l0); 1538297446Sandrew if (tl0 == 0) { 1539297446Sandrew /* recurse for allocating page dir */ 1540297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, 1541297446Sandrew lockp) == NULL) { 1542297446Sandrew --m->wire_count; 1543297446Sandrew /* XXX: release mem barrier? */ 1544297446Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1545297446Sandrew vm_page_free_zero(m); 1546297446Sandrew return (NULL); 1547297446Sandrew } 1548297446Sandrew } else { 1549297446Sandrew l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1550297446Sandrew l1pg->wire_count++; 1551297446Sandrew } 1552297446Sandrew 1553297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 1554297446Sandrew l1 = &l1[ptepindex & Ln_ADDR_MASK]; 1555281494Sandrew pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1556281494Sandrew PTE_SYNC(l1); 1557281494Sandrew } else { 1558297446Sandrew vm_pindex_t l0index, l1index; 1559297446Sandrew pd_entry_t *l0, *l1, *l2; 1560297446Sandrew pd_entry_t tl0, tl1; 1561281494Sandrew 1562297446Sandrew l1index = ptepindex >> Ln_ENTRIES_SHIFT; 1563297446Sandrew l0index = l1index >> L0_ENTRIES_SHIFT; 1564297446Sandrew 1565297446Sandrew l0 = &pmap->pm_l0[l0index]; 1566297446Sandrew tl0 = pmap_load(l0); 1567297446Sandrew if (tl0 == 0) { 1568281494Sandrew /* recurse for allocating page dir */ 1569297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1570281494Sandrew lockp) == NULL) { 1571281494Sandrew --m->wire_count; 1572281494Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1573281494Sandrew vm_page_free_zero(m); 1574281494Sandrew return (NULL); 1575281494Sandrew } 1576297446Sandrew tl0 = pmap_load(l0); 1577297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1578297446Sandrew l1 = &l1[l1index & Ln_ADDR_MASK]; 1579281494Sandrew } else { 1580297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1581297446Sandrew l1 = &l1[l1index & Ln_ADDR_MASK]; 1582297446Sandrew tl1 = pmap_load(l1); 1583297446Sandrew if (tl1 == 0) { 1584297446Sandrew /* recurse for allocating page dir */ 1585297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1586297446Sandrew lockp) == NULL) { 1587297446Sandrew --m->wire_count; 1588297446Sandrew /* XXX: release mem barrier? */ 1589297446Sandrew atomic_subtract_int( 1590297446Sandrew &vm_cnt.v_wire_count, 1); 1591297446Sandrew vm_page_free_zero(m); 1592297446Sandrew return (NULL); 1593297446Sandrew } 1594297446Sandrew } else { 1595297446Sandrew l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1596297446Sandrew l2pg->wire_count++; 1597297446Sandrew } 1598281494Sandrew } 1599281494Sandrew 1600288445Sandrew l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 1601281494Sandrew l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1602285537Sandrew pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); 1603281494Sandrew PTE_SYNC(l2); 1604281494Sandrew } 1605281494Sandrew 1606281494Sandrew pmap_resident_count_inc(pmap, 1); 1607281494Sandrew 1608281494Sandrew return (m); 1609281494Sandrew} 1610281494Sandrew 1611281494Sandrewstatic vm_page_t 1612281494Sandrewpmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1613281494Sandrew{ 1614281494Sandrew vm_pindex_t ptepindex; 1615297446Sandrew pd_entry_t *pde, tpde; 1616305882Sandrew#ifdef INVARIANTS 1617305882Sandrew pt_entry_t *pte; 1618305882Sandrew#endif 1619281494Sandrew vm_page_t m; 1620297446Sandrew int lvl; 1621281494Sandrew 1622281494Sandrew /* 1623281494Sandrew * Calculate pagetable page index 1624281494Sandrew */ 1625281494Sandrew ptepindex = pmap_l2_pindex(va); 1626281494Sandrewretry: 1627281494Sandrew /* 1628281494Sandrew * Get the page directory entry 1629281494Sandrew */ 1630297446Sandrew pde = pmap_pde(pmap, va, &lvl); 1631281494Sandrew 1632281494Sandrew /* 1633297446Sandrew * If the page table page is mapped, we just increment the hold count, 1634297446Sandrew * and activate it. If we get a level 2 pde it will point to a level 3 1635297446Sandrew * table. 1636281494Sandrew */ 1637305882Sandrew switch (lvl) { 1638305882Sandrew case -1: 1639305882Sandrew break; 1640305882Sandrew case 0: 1641305882Sandrew#ifdef INVARIANTS 1642305882Sandrew pte = pmap_l0_to_l1(pde, va); 1643305882Sandrew KASSERT(pmap_load(pte) == 0, 1644305882Sandrew ("pmap_alloc_l3: TODO: l0 superpages")); 1645305882Sandrew#endif 1646305882Sandrew break; 1647305882Sandrew case 1: 1648305882Sandrew#ifdef INVARIANTS 1649305882Sandrew pte = pmap_l1_to_l2(pde, va); 1650305882Sandrew KASSERT(pmap_load(pte) == 0, 1651305882Sandrew ("pmap_alloc_l3: TODO: l1 superpages")); 1652305882Sandrew#endif 1653305882Sandrew break; 1654305882Sandrew case 2: 1655297446Sandrew tpde = pmap_load(pde); 1656297446Sandrew if (tpde != 0) { 1657297446Sandrew m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); 1658297446Sandrew m->wire_count++; 1659297446Sandrew return (m); 1660297446Sandrew } 1661305882Sandrew break; 1662305882Sandrew default: 1663305882Sandrew panic("pmap_alloc_l3: Invalid level %d", lvl); 1664281494Sandrew } 1665297446Sandrew 1666297446Sandrew /* 1667297446Sandrew * Here if the pte page isn't mapped, or if it has been deallocated. 1668297446Sandrew */ 1669297446Sandrew m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1670297446Sandrew if (m == NULL && lockp != NULL) 1671297446Sandrew goto retry; 1672297446Sandrew 1673281494Sandrew return (m); 1674281494Sandrew} 1675281494Sandrew 1676281494Sandrew 1677281494Sandrew/*************************************************** 1678281494Sandrew * Pmap allocation/deallocation routines. 1679281494Sandrew ***************************************************/ 1680281494Sandrew 1681281494Sandrew/* 1682281494Sandrew * Release any resources held by the given physical map. 1683281494Sandrew * Called when a pmap initialized by pmap_pinit is being released. 1684281494Sandrew * Should only be called if the map contains no valid mappings. 1685281494Sandrew */ 1686281494Sandrewvoid 1687281494Sandrewpmap_release(pmap_t pmap) 1688281494Sandrew{ 1689281494Sandrew vm_page_t m; 1690281494Sandrew 1691281494Sandrew KASSERT(pmap->pm_stats.resident_count == 0, 1692281494Sandrew ("pmap_release: pmap resident count %ld != 0", 1693281494Sandrew pmap->pm_stats.resident_count)); 1694305882Sandrew KASSERT(vm_radix_is_empty(&pmap->pm_root), 1695305882Sandrew ("pmap_release: pmap has reserved page table page(s)")); 1696281494Sandrew 1697297446Sandrew m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); 1698281494Sandrew 1699281494Sandrew m->wire_count--; 1700281494Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1701281494Sandrew vm_page_free_zero(m); 1702281494Sandrew} 1703281494Sandrew 1704281494Sandrewstatic int 1705281494Sandrewkvm_size(SYSCTL_HANDLER_ARGS) 1706281494Sandrew{ 1707281494Sandrew unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1708281494Sandrew 1709281494Sandrew return sysctl_handle_long(oidp, &ksize, 0, req); 1710281494Sandrew} 1711305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1712281494Sandrew 0, 0, kvm_size, "LU", "Size of KVM"); 1713281494Sandrew 1714281494Sandrewstatic int 1715281494Sandrewkvm_free(SYSCTL_HANDLER_ARGS) 1716281494Sandrew{ 1717281494Sandrew unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1718281494Sandrew 1719281494Sandrew return sysctl_handle_long(oidp, &kfree, 0, req); 1720281494Sandrew} 1721305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1722281494Sandrew 0, 0, kvm_free, "LU", "Amount of KVM free"); 1723281494Sandrew 1724281494Sandrew/* 1725281494Sandrew * grow the number of kernel page table entries, if needed 1726281494Sandrew */ 1727281494Sandrewvoid 1728281494Sandrewpmap_growkernel(vm_offset_t addr) 1729281494Sandrew{ 1730281494Sandrew vm_paddr_t paddr; 1731281494Sandrew vm_page_t nkpg; 1732297446Sandrew pd_entry_t *l0, *l1, *l2; 1733281494Sandrew 1734281494Sandrew mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1735281494Sandrew 1736281494Sandrew addr = roundup2(addr, L2_SIZE); 1737281494Sandrew if (addr - 1 >= kernel_map->max_offset) 1738281494Sandrew addr = kernel_map->max_offset; 1739281494Sandrew while (kernel_vm_end < addr) { 1740297446Sandrew l0 = pmap_l0(kernel_pmap, kernel_vm_end); 1741297446Sandrew KASSERT(pmap_load(l0) != 0, 1742297446Sandrew ("pmap_growkernel: No level 0 kernel entry")); 1743297446Sandrew 1744297446Sandrew l1 = pmap_l0_to_l1(l0, kernel_vm_end); 1745285045Sandrew if (pmap_load(l1) == 0) { 1746281494Sandrew /* We need a new PDP entry */ 1747281494Sandrew nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1748281494Sandrew VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1749281494Sandrew VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1750281494Sandrew if (nkpg == NULL) 1751281494Sandrew panic("pmap_growkernel: no memory to grow kernel"); 1752281494Sandrew if ((nkpg->flags & PG_ZERO) == 0) 1753281494Sandrew pmap_zero_page(nkpg); 1754281494Sandrew paddr = VM_PAGE_TO_PHYS(nkpg); 1755281494Sandrew pmap_load_store(l1, paddr | L1_TABLE); 1756281494Sandrew PTE_SYNC(l1); 1757281494Sandrew continue; /* try again */ 1758281494Sandrew } 1759281494Sandrew l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1760285045Sandrew if ((pmap_load(l2) & ATTR_AF) != 0) { 1761281494Sandrew kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1762281494Sandrew if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1763281494Sandrew kernel_vm_end = kernel_map->max_offset; 1764305531Sandrew break; 1765281494Sandrew } 1766281494Sandrew continue; 1767281494Sandrew } 1768281494Sandrew 1769281494Sandrew nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1770281494Sandrew VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1771281494Sandrew VM_ALLOC_ZERO); 1772281494Sandrew if (nkpg == NULL) 1773281494Sandrew panic("pmap_growkernel: no memory to grow kernel"); 1774281494Sandrew if ((nkpg->flags & PG_ZERO) == 0) 1775281494Sandrew pmap_zero_page(nkpg); 1776281494Sandrew paddr = VM_PAGE_TO_PHYS(nkpg); 1777281494Sandrew pmap_load_store(l2, paddr | L2_TABLE); 1778281494Sandrew PTE_SYNC(l2); 1779285212Sandrew pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1780281494Sandrew 1781281494Sandrew kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1782281494Sandrew if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1783281494Sandrew kernel_vm_end = kernel_map->max_offset; 1784305531Sandrew break; 1785281494Sandrew } 1786281494Sandrew } 1787281494Sandrew} 1788281494Sandrew 1789281494Sandrew 1790281494Sandrew/*************************************************** 1791281494Sandrew * page management routines. 1792281494Sandrew ***************************************************/ 1793281494Sandrew 1794281494SandrewCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1795281494SandrewCTASSERT(_NPCM == 3); 1796281494SandrewCTASSERT(_NPCPV == 168); 1797281494Sandrew 1798281494Sandrewstatic __inline struct pv_chunk * 1799281494Sandrewpv_to_chunk(pv_entry_t pv) 1800281494Sandrew{ 1801281494Sandrew 1802281494Sandrew return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1803281494Sandrew} 1804281494Sandrew 1805281494Sandrew#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1806281494Sandrew 1807281494Sandrew#define PC_FREE0 0xfffffffffffffffful 1808281494Sandrew#define PC_FREE1 0xfffffffffffffffful 1809281494Sandrew#define PC_FREE2 0x000000fffffffffful 1810281494Sandrew 1811281494Sandrewstatic const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1812281494Sandrew 1813281494Sandrew#if 0 1814281494Sandrew#ifdef PV_STATS 1815281494Sandrewstatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1816281494Sandrew 1817281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1818281494Sandrew "Current number of pv entry chunks"); 1819281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1820281494Sandrew "Current number of pv entry chunks allocated"); 1821281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1822281494Sandrew "Current number of pv entry chunks frees"); 1823281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1824281494Sandrew "Number of times tried to get a chunk page but failed."); 1825281494Sandrew 1826281494Sandrewstatic long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1827281494Sandrewstatic int pv_entry_spare; 1828281494Sandrew 1829281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1830281494Sandrew "Current number of pv entry frees"); 1831281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1832281494Sandrew "Current number of pv entry allocs"); 1833281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1834281494Sandrew "Current number of pv entries"); 1835281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1836281494Sandrew "Current number of spare pv entries"); 1837281494Sandrew#endif 1838281494Sandrew#endif /* 0 */ 1839281494Sandrew 1840281494Sandrew/* 1841281494Sandrew * We are in a serious low memory condition. Resort to 1842281494Sandrew * drastic measures to free some pages so we can allocate 1843281494Sandrew * another pv entry chunk. 1844281494Sandrew * 1845281494Sandrew * Returns NULL if PV entries were reclaimed from the specified pmap. 1846281494Sandrew * 1847281494Sandrew * We do not, however, unmap 2mpages because subsequent accesses will 1848281494Sandrew * allocate per-page pv entries until repromotion occurs, thereby 1849281494Sandrew * exacerbating the shortage of free pv entries. 1850281494Sandrew */ 1851281494Sandrewstatic vm_page_t 1852281494Sandrewreclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1853281494Sandrew{ 1854319210Sandrew struct pch new_tail; 1855319210Sandrew struct pv_chunk *pc; 1856319210Sandrew struct md_page *pvh; 1857319210Sandrew pd_entry_t *pde; 1858319210Sandrew pmap_t pmap; 1859319210Sandrew pt_entry_t *pte, tpte; 1860319210Sandrew pv_entry_t pv; 1861319210Sandrew vm_offset_t va; 1862319210Sandrew vm_page_t m, m_pc; 1863319210Sandrew struct spglist free; 1864319210Sandrew uint64_t inuse; 1865319210Sandrew int bit, field, freed, lvl; 1866281494Sandrew 1867319210Sandrew PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1868319210Sandrew KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); 1869319210Sandrew pmap = NULL; 1870319210Sandrew m_pc = NULL; 1871319210Sandrew SLIST_INIT(&free); 1872319210Sandrew TAILQ_INIT(&new_tail); 1873319210Sandrew mtx_lock(&pv_chunks_mutex); 1874319210Sandrew while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) { 1875319210Sandrew TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1876319210Sandrew mtx_unlock(&pv_chunks_mutex); 1877319210Sandrew if (pmap != pc->pc_pmap) { 1878319210Sandrew if (pmap != NULL && pmap != locked_pmap) 1879319210Sandrew PMAP_UNLOCK(pmap); 1880319210Sandrew pmap = pc->pc_pmap; 1881319210Sandrew /* Avoid deadlock and lock recursion. */ 1882319210Sandrew if (pmap > locked_pmap) { 1883319210Sandrew RELEASE_PV_LIST_LOCK(lockp); 1884319210Sandrew PMAP_LOCK(pmap); 1885319210Sandrew } else if (pmap != locked_pmap && 1886319210Sandrew !PMAP_TRYLOCK(pmap)) { 1887319210Sandrew pmap = NULL; 1888319210Sandrew TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 1889319210Sandrew mtx_lock(&pv_chunks_mutex); 1890319210Sandrew continue; 1891319210Sandrew } 1892319210Sandrew } 1893319210Sandrew 1894319210Sandrew /* 1895319210Sandrew * Destroy every non-wired, 4 KB page mapping in the chunk. 1896319210Sandrew */ 1897319210Sandrew freed = 0; 1898319210Sandrew for (field = 0; field < _NPCM; field++) { 1899319210Sandrew for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 1900319210Sandrew inuse != 0; inuse &= ~(1UL << bit)) { 1901319210Sandrew bit = ffsl(inuse) - 1; 1902319210Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 1903319210Sandrew va = pv->pv_va; 1904319210Sandrew pde = pmap_pde(pmap, va, &lvl); 1905319210Sandrew if (lvl != 2) 1906319210Sandrew continue; 1907319210Sandrew pte = pmap_l2_to_l3(pde, va); 1908319210Sandrew tpte = pmap_load(pte); 1909319210Sandrew if ((tpte & ATTR_SW_WIRED) != 0) 1910319210Sandrew continue; 1911319210Sandrew tpte = pmap_load_clear(pte); 1912319210Sandrew PTE_SYNC(pte); 1913319210Sandrew pmap_invalidate_page(pmap, va); 1914319210Sandrew m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); 1915319210Sandrew if (pmap_page_dirty(tpte)) 1916319210Sandrew vm_page_dirty(m); 1917319210Sandrew if ((tpte & ATTR_AF) != 0) 1918319210Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 1919319210Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1920319210Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1921319210Sandrew m->md.pv_gen++; 1922319210Sandrew if (TAILQ_EMPTY(&m->md.pv_list) && 1923319210Sandrew (m->flags & PG_FICTITIOUS) == 0) { 1924319210Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 1925319210Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) { 1926319210Sandrew vm_page_aflag_clear(m, 1927319210Sandrew PGA_WRITEABLE); 1928319210Sandrew } 1929319210Sandrew } 1930319210Sandrew pc->pc_map[field] |= 1UL << bit; 1931319210Sandrew pmap_unuse_l3(pmap, va, pmap_load(pde), &free); 1932319210Sandrew freed++; 1933319210Sandrew } 1934319210Sandrew } 1935319210Sandrew if (freed == 0) { 1936319210Sandrew TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 1937319210Sandrew mtx_lock(&pv_chunks_mutex); 1938319210Sandrew continue; 1939319210Sandrew } 1940319210Sandrew /* Every freed mapping is for a 4 KB page. */ 1941319210Sandrew pmap_resident_count_dec(pmap, freed); 1942319210Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 1943319210Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 1944319210Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 1945319210Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1946319210Sandrew if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && 1947319210Sandrew pc->pc_map[2] == PC_FREE2) { 1948319210Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1949319210Sandrew PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1950319210Sandrew PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1951319210Sandrew /* Entire chunk is free; return it. */ 1952319210Sandrew m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1953319210Sandrew dump_drop_page(m_pc->phys_addr); 1954319210Sandrew mtx_lock(&pv_chunks_mutex); 1955319210Sandrew break; 1956319210Sandrew } 1957319210Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1958319210Sandrew TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 1959319210Sandrew mtx_lock(&pv_chunks_mutex); 1960319210Sandrew /* One freed pv entry in locked_pmap is sufficient. */ 1961319210Sandrew if (pmap == locked_pmap) 1962319210Sandrew break; 1963319210Sandrew } 1964319210Sandrew TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); 1965319210Sandrew mtx_unlock(&pv_chunks_mutex); 1966319210Sandrew if (pmap != NULL && pmap != locked_pmap) 1967319210Sandrew PMAP_UNLOCK(pmap); 1968319210Sandrew if (m_pc == NULL && !SLIST_EMPTY(&free)) { 1969319210Sandrew m_pc = SLIST_FIRST(&free); 1970319210Sandrew SLIST_REMOVE_HEAD(&free, plinks.s.ss); 1971319210Sandrew /* Recycle a freed page table page. */ 1972319210Sandrew m_pc->wire_count = 1; 1973319210Sandrew atomic_add_int(&vm_cnt.v_wire_count, 1); 1974319210Sandrew } 1975319210Sandrew pmap_free_zero_pages(&free); 1976319210Sandrew return (m_pc); 1977281494Sandrew} 1978281494Sandrew 1979281494Sandrew/* 1980281494Sandrew * free the pv_entry back to the free list 1981281494Sandrew */ 1982281494Sandrewstatic void 1983281494Sandrewfree_pv_entry(pmap_t pmap, pv_entry_t pv) 1984281494Sandrew{ 1985281494Sandrew struct pv_chunk *pc; 1986281494Sandrew int idx, field, bit; 1987281494Sandrew 1988281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1989281494Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1990281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1991281494Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1992281494Sandrew pc = pv_to_chunk(pv); 1993281494Sandrew idx = pv - &pc->pc_pventry[0]; 1994281494Sandrew field = idx / 64; 1995281494Sandrew bit = idx % 64; 1996281494Sandrew pc->pc_map[field] |= 1ul << bit; 1997281494Sandrew if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1998281494Sandrew pc->pc_map[2] != PC_FREE2) { 1999281494Sandrew /* 98% of the time, pc is already at the head of the list. */ 2000281494Sandrew if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 2001281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2002281494Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2003281494Sandrew } 2004281494Sandrew return; 2005281494Sandrew } 2006281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2007281494Sandrew free_pv_chunk(pc); 2008281494Sandrew} 2009281494Sandrew 2010281494Sandrewstatic void 2011281494Sandrewfree_pv_chunk(struct pv_chunk *pc) 2012281494Sandrew{ 2013281494Sandrew vm_page_t m; 2014281494Sandrew 2015281494Sandrew mtx_lock(&pv_chunks_mutex); 2016281494Sandrew TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2017281494Sandrew mtx_unlock(&pv_chunks_mutex); 2018281494Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 2019281494Sandrew PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 2020281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 2021281494Sandrew /* entire chunk is free, return it */ 2022281494Sandrew m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 2023281494Sandrew dump_drop_page(m->phys_addr); 2024288256Salc vm_page_unwire(m, PQ_NONE); 2025281494Sandrew vm_page_free(m); 2026281494Sandrew} 2027281494Sandrew 2028281494Sandrew/* 2029281494Sandrew * Returns a new PV entry, allocating a new PV chunk from the system when 2030281494Sandrew * needed. If this PV chunk allocation fails and a PV list lock pointer was 2031281494Sandrew * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 2032281494Sandrew * returned. 2033281494Sandrew * 2034281494Sandrew * The given PV list lock may be released. 2035281494Sandrew */ 2036281494Sandrewstatic pv_entry_t 2037281494Sandrewget_pv_entry(pmap_t pmap, struct rwlock **lockp) 2038281494Sandrew{ 2039281494Sandrew int bit, field; 2040281494Sandrew pv_entry_t pv; 2041281494Sandrew struct pv_chunk *pc; 2042281494Sandrew vm_page_t m; 2043281494Sandrew 2044281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2045281494Sandrew PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 2046281494Sandrewretry: 2047281494Sandrew pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2048281494Sandrew if (pc != NULL) { 2049281494Sandrew for (field = 0; field < _NPCM; field++) { 2050281494Sandrew if (pc->pc_map[field]) { 2051281494Sandrew bit = ffsl(pc->pc_map[field]) - 1; 2052281494Sandrew break; 2053281494Sandrew } 2054281494Sandrew } 2055281494Sandrew if (field < _NPCM) { 2056281494Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 2057281494Sandrew pc->pc_map[field] &= ~(1ul << bit); 2058281494Sandrew /* If this was the last item, move it to tail */ 2059281494Sandrew if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 2060281494Sandrew pc->pc_map[2] == 0) { 2061281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2062281494Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 2063281494Sandrew pc_list); 2064281494Sandrew } 2065281494Sandrew PV_STAT(atomic_add_long(&pv_entry_count, 1)); 2066281494Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 2067281494Sandrew return (pv); 2068281494Sandrew } 2069281494Sandrew } 2070281494Sandrew /* No free items, allocate another chunk */ 2071281494Sandrew m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 2072281494Sandrew VM_ALLOC_WIRED); 2073281494Sandrew if (m == NULL) { 2074281494Sandrew if (lockp == NULL) { 2075281494Sandrew PV_STAT(pc_chunk_tryfail++); 2076281494Sandrew return (NULL); 2077281494Sandrew } 2078281494Sandrew m = reclaim_pv_chunk(pmap, lockp); 2079281494Sandrew if (m == NULL) 2080281494Sandrew goto retry; 2081281494Sandrew } 2082281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 2083281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 2084281494Sandrew dump_add_page(m->phys_addr); 2085281494Sandrew pc = (void *)PHYS_TO_DMAP(m->phys_addr); 2086281494Sandrew pc->pc_pmap = pmap; 2087281494Sandrew pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 2088281494Sandrew pc->pc_map[1] = PC_FREE1; 2089281494Sandrew pc->pc_map[2] = PC_FREE2; 2090281494Sandrew mtx_lock(&pv_chunks_mutex); 2091281494Sandrew TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2092281494Sandrew mtx_unlock(&pv_chunks_mutex); 2093281494Sandrew pv = &pc->pc_pventry[0]; 2094281494Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2095281494Sandrew PV_STAT(atomic_add_long(&pv_entry_count, 1)); 2096281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 2097281494Sandrew return (pv); 2098281494Sandrew} 2099281494Sandrew 2100281494Sandrew/* 2101305882Sandrew * Ensure that the number of spare PV entries in the specified pmap meets or 2102305882Sandrew * exceeds the given count, "needed". 2103305882Sandrew * 2104305882Sandrew * The given PV list lock may be released. 2105305882Sandrew */ 2106305882Sandrewstatic void 2107305882Sandrewreserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) 2108305882Sandrew{ 2109305882Sandrew struct pch new_tail; 2110305882Sandrew struct pv_chunk *pc; 2111305882Sandrew int avail, free; 2112305882Sandrew vm_page_t m; 2113305882Sandrew 2114305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2115305882Sandrew KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); 2116305882Sandrew 2117305882Sandrew /* 2118305882Sandrew * Newly allocated PV chunks must be stored in a private list until 2119305882Sandrew * the required number of PV chunks have been allocated. Otherwise, 2120305882Sandrew * reclaim_pv_chunk() could recycle one of these chunks. In 2121305882Sandrew * contrast, these chunks must be added to the pmap upon allocation. 2122305882Sandrew */ 2123305882Sandrew TAILQ_INIT(&new_tail); 2124305882Sandrewretry: 2125305882Sandrew avail = 0; 2126305882Sandrew TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { 2127305882Sandrew bit_count((bitstr_t *)pc->pc_map, 0, 2128305882Sandrew sizeof(pc->pc_map) * NBBY, &free); 2129305882Sandrew if (free == 0) 2130305882Sandrew break; 2131305882Sandrew avail += free; 2132305882Sandrew if (avail >= needed) 2133305882Sandrew break; 2134305882Sandrew } 2135305882Sandrew for (; avail < needed; avail += _NPCPV) { 2136305882Sandrew m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 2137305882Sandrew VM_ALLOC_WIRED); 2138305882Sandrew if (m == NULL) { 2139305882Sandrew m = reclaim_pv_chunk(pmap, lockp); 2140305882Sandrew if (m == NULL) 2141305882Sandrew goto retry; 2142305882Sandrew } 2143305882Sandrew PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 2144305882Sandrew PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 2145305882Sandrew dump_add_page(m->phys_addr); 2146305882Sandrew pc = (void *)PHYS_TO_DMAP(m->phys_addr); 2147305882Sandrew pc->pc_pmap = pmap; 2148305882Sandrew pc->pc_map[0] = PC_FREE0; 2149305882Sandrew pc->pc_map[1] = PC_FREE1; 2150305882Sandrew pc->pc_map[2] = PC_FREE2; 2151305882Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2152305882Sandrew TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 2153305882Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); 2154305882Sandrew } 2155305882Sandrew if (!TAILQ_EMPTY(&new_tail)) { 2156305882Sandrew mtx_lock(&pv_chunks_mutex); 2157305882Sandrew TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); 2158305882Sandrew mtx_unlock(&pv_chunks_mutex); 2159305882Sandrew } 2160305882Sandrew} 2161305882Sandrew 2162305882Sandrew/* 2163281494Sandrew * First find and then remove the pv entry for the specified pmap and virtual 2164281494Sandrew * address from the specified pv list. Returns the pv entry if found and NULL 2165281494Sandrew * otherwise. This operation can be performed on pv lists for either 4KB or 2166281494Sandrew * 2MB page mappings. 2167281494Sandrew */ 2168281494Sandrewstatic __inline pv_entry_t 2169281494Sandrewpmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2170281494Sandrew{ 2171281494Sandrew pv_entry_t pv; 2172281494Sandrew 2173281494Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 2174281494Sandrew if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2175281494Sandrew TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 2176281494Sandrew pvh->pv_gen++; 2177281494Sandrew break; 2178281494Sandrew } 2179281494Sandrew } 2180281494Sandrew return (pv); 2181281494Sandrew} 2182281494Sandrew 2183281494Sandrew/* 2184305882Sandrew * After demotion from a 2MB page mapping to 512 4KB page mappings, 2185305882Sandrew * destroy the pv entry for the 2MB page mapping and reinstantiate the pv 2186305882Sandrew * entries for each of the 4KB page mappings. 2187305882Sandrew */ 2188305882Sandrewstatic void 2189305882Sandrewpmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 2190305882Sandrew struct rwlock **lockp) 2191305882Sandrew{ 2192305882Sandrew struct md_page *pvh; 2193305882Sandrew struct pv_chunk *pc; 2194305882Sandrew pv_entry_t pv; 2195305882Sandrew vm_offset_t va_last; 2196305882Sandrew vm_page_t m; 2197305882Sandrew int bit, field; 2198305882Sandrew 2199305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2200305882Sandrew KASSERT((pa & L2_OFFSET) == 0, 2201305882Sandrew ("pmap_pv_demote_l2: pa is not 2mpage aligned")); 2202305882Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2203305882Sandrew 2204305882Sandrew /* 2205305882Sandrew * Transfer the 2mpage's pv entry for this mapping to the first 2206305882Sandrew * page's pv list. Once this transfer begins, the pv list lock 2207305882Sandrew * must not be released until the last pv entry is reinstantiated. 2208305882Sandrew */ 2209305882Sandrew pvh = pa_to_pvh(pa); 2210305882Sandrew va = va & ~L2_OFFSET; 2211305882Sandrew pv = pmap_pvh_remove(pvh, pmap, va); 2212305882Sandrew KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); 2213305882Sandrew m = PHYS_TO_VM_PAGE(pa); 2214305882Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2215305882Sandrew m->md.pv_gen++; 2216305882Sandrew /* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */ 2217305882Sandrew PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1)); 2218305882Sandrew va_last = va + L2_SIZE - PAGE_SIZE; 2219305882Sandrew for (;;) { 2220305882Sandrew pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2221305882Sandrew KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || 2222305882Sandrew pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); 2223305882Sandrew for (field = 0; field < _NPCM; field++) { 2224305882Sandrew while (pc->pc_map[field]) { 2225305882Sandrew bit = ffsl(pc->pc_map[field]) - 1; 2226305882Sandrew pc->pc_map[field] &= ~(1ul << bit); 2227305882Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 2228305882Sandrew va += PAGE_SIZE; 2229305882Sandrew pv->pv_va = va; 2230305882Sandrew m++; 2231305882Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2232305882Sandrew ("pmap_pv_demote_l2: page %p is not managed", m)); 2233305882Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2234305882Sandrew m->md.pv_gen++; 2235305882Sandrew if (va == va_last) 2236305882Sandrew goto out; 2237305882Sandrew } 2238305882Sandrew } 2239305882Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2240305882Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2241305882Sandrew } 2242305882Sandrewout: 2243305882Sandrew if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { 2244305882Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2245305882Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2246305882Sandrew } 2247305882Sandrew PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1)); 2248305882Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1)); 2249305882Sandrew} 2250305882Sandrew 2251305882Sandrew/* 2252281494Sandrew * First find and then destroy the pv entry for the specified pmap and virtual 2253281494Sandrew * address. This operation can be performed on pv lists for either 4KB or 2MB 2254281494Sandrew * page mappings. 2255281494Sandrew */ 2256281494Sandrewstatic void 2257281494Sandrewpmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2258281494Sandrew{ 2259281494Sandrew pv_entry_t pv; 2260281494Sandrew 2261281494Sandrew pv = pmap_pvh_remove(pvh, pmap, va); 2262281494Sandrew KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2263281494Sandrew free_pv_entry(pmap, pv); 2264281494Sandrew} 2265281494Sandrew 2266281494Sandrew/* 2267281494Sandrew * Conditionally create the PV entry for a 4KB page mapping if the required 2268281494Sandrew * memory can be allocated without resorting to reclamation. 2269281494Sandrew */ 2270281494Sandrewstatic boolean_t 2271281494Sandrewpmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 2272281494Sandrew struct rwlock **lockp) 2273281494Sandrew{ 2274281494Sandrew pv_entry_t pv; 2275281494Sandrew 2276281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2277281494Sandrew /* Pass NULL instead of the lock pointer to disable reclamation. */ 2278281494Sandrew if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 2279281494Sandrew pv->pv_va = va; 2280281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 2281281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2282281494Sandrew m->md.pv_gen++; 2283281494Sandrew return (TRUE); 2284281494Sandrew } else 2285281494Sandrew return (FALSE); 2286281494Sandrew} 2287281494Sandrew 2288281494Sandrew/* 2289281494Sandrew * pmap_remove_l3: do the things to unmap a page in a process 2290281494Sandrew */ 2291281494Sandrewstatic int 2292305531Sandrewpmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 2293281494Sandrew pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 2294281494Sandrew{ 2295305882Sandrew struct md_page *pvh; 2296281494Sandrew pt_entry_t old_l3; 2297281494Sandrew vm_page_t m; 2298281494Sandrew 2299281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2300281494Sandrew if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 2301281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 2302281494Sandrew old_l3 = pmap_load_clear(l3); 2303281494Sandrew PTE_SYNC(l3); 2304285212Sandrew pmap_invalidate_page(pmap, va); 2305281494Sandrew if (old_l3 & ATTR_SW_WIRED) 2306281494Sandrew pmap->pm_stats.wired_count -= 1; 2307281494Sandrew pmap_resident_count_dec(pmap, 1); 2308281494Sandrew if (old_l3 & ATTR_SW_MANAGED) { 2309281494Sandrew m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 2310281494Sandrew if (pmap_page_dirty(old_l3)) 2311281494Sandrew vm_page_dirty(m); 2312281494Sandrew if (old_l3 & ATTR_AF) 2313281494Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 2314281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 2315281494Sandrew pmap_pvh_free(&m->md, pmap, va); 2316305882Sandrew if (TAILQ_EMPTY(&m->md.pv_list) && 2317305882Sandrew (m->flags & PG_FICTITIOUS) == 0) { 2318305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2319305882Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) 2320305882Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 2321305882Sandrew } 2322281494Sandrew } 2323281494Sandrew return (pmap_unuse_l3(pmap, va, l2e, free)); 2324281494Sandrew} 2325281494Sandrew 2326281494Sandrew/* 2327281494Sandrew * Remove the given range of addresses from the specified map. 2328281494Sandrew * 2329281494Sandrew * It is assumed that the start and end are properly 2330281494Sandrew * rounded to the page size. 2331281494Sandrew */ 2332281494Sandrewvoid 2333281494Sandrewpmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2334281494Sandrew{ 2335281494Sandrew struct rwlock *lock; 2336281494Sandrew vm_offset_t va, va_next; 2337297446Sandrew pd_entry_t *l0, *l1, *l2; 2338281494Sandrew pt_entry_t l3_paddr, *l3; 2339281494Sandrew struct spglist free; 2340281494Sandrew 2341281494Sandrew /* 2342281494Sandrew * Perform an unsynchronized read. This is, however, safe. 2343281494Sandrew */ 2344281494Sandrew if (pmap->pm_stats.resident_count == 0) 2345281494Sandrew return; 2346281494Sandrew 2347281494Sandrew SLIST_INIT(&free); 2348281494Sandrew 2349281494Sandrew PMAP_LOCK(pmap); 2350281494Sandrew 2351281494Sandrew lock = NULL; 2352281494Sandrew for (; sva < eva; sva = va_next) { 2353281494Sandrew 2354281494Sandrew if (pmap->pm_stats.resident_count == 0) 2355281494Sandrew break; 2356281494Sandrew 2357297446Sandrew l0 = pmap_l0(pmap, sva); 2358297446Sandrew if (pmap_load(l0) == 0) { 2359297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2360297446Sandrew if (va_next < sva) 2361297446Sandrew va_next = eva; 2362297446Sandrew continue; 2363297446Sandrew } 2364297446Sandrew 2365297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 2366285045Sandrew if (pmap_load(l1) == 0) { 2367281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2368281494Sandrew if (va_next < sva) 2369281494Sandrew va_next = eva; 2370281494Sandrew continue; 2371281494Sandrew } 2372281494Sandrew 2373281494Sandrew /* 2374281494Sandrew * Calculate index for next page table. 2375281494Sandrew */ 2376281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2377281494Sandrew if (va_next < sva) 2378281494Sandrew va_next = eva; 2379281494Sandrew 2380281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 2381281494Sandrew if (l2 == NULL) 2382281494Sandrew continue; 2383281494Sandrew 2384288445Sandrew l3_paddr = pmap_load(l2); 2385281494Sandrew 2386305882Sandrew if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { 2387305882Sandrew /* TODO: Add pmap_remove_l2 */ 2388305882Sandrew if (pmap_demote_l2_locked(pmap, l2, sva & ~L2_OFFSET, 2389305882Sandrew &lock) == NULL) 2390305882Sandrew continue; 2391305882Sandrew l3_paddr = pmap_load(l2); 2392305882Sandrew } 2393305882Sandrew 2394281494Sandrew /* 2395281494Sandrew * Weed out invalid mappings. 2396281494Sandrew */ 2397281494Sandrew if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 2398281494Sandrew continue; 2399281494Sandrew 2400281494Sandrew /* 2401281494Sandrew * Limit our scan to either the end of the va represented 2402281494Sandrew * by the current page table page, or to the end of the 2403281494Sandrew * range being removed. 2404281494Sandrew */ 2405281494Sandrew if (va_next > eva) 2406281494Sandrew va_next = eva; 2407281494Sandrew 2408281494Sandrew va = va_next; 2409281494Sandrew for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2410281494Sandrew sva += L3_SIZE) { 2411281494Sandrew if (l3 == NULL) 2412281494Sandrew panic("l3 == NULL"); 2413285045Sandrew if (pmap_load(l3) == 0) { 2414281494Sandrew if (va != va_next) { 2415281494Sandrew pmap_invalidate_range(pmap, va, sva); 2416281494Sandrew va = va_next; 2417281494Sandrew } 2418281494Sandrew continue; 2419281494Sandrew } 2420281494Sandrew if (va == va_next) 2421281494Sandrew va = sva; 2422281494Sandrew if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 2423281494Sandrew &lock)) { 2424281494Sandrew sva += L3_SIZE; 2425281494Sandrew break; 2426281494Sandrew } 2427281494Sandrew } 2428281494Sandrew if (va != va_next) 2429281494Sandrew pmap_invalidate_range(pmap, va, sva); 2430281494Sandrew } 2431281494Sandrew if (lock != NULL) 2432281494Sandrew rw_wunlock(lock); 2433281494Sandrew PMAP_UNLOCK(pmap); 2434281494Sandrew pmap_free_zero_pages(&free); 2435281494Sandrew} 2436281494Sandrew 2437281494Sandrew/* 2438281494Sandrew * Routine: pmap_remove_all 2439281494Sandrew * Function: 2440281494Sandrew * Removes this physical page from 2441281494Sandrew * all physical maps in which it resides. 2442281494Sandrew * Reflects back modify bits to the pager. 2443281494Sandrew * 2444281494Sandrew * Notes: 2445281494Sandrew * Original versions of this routine were very 2446281494Sandrew * inefficient because they iteratively called 2447281494Sandrew * pmap_remove (slow...) 2448281494Sandrew */ 2449281494Sandrew 2450281494Sandrewvoid 2451281494Sandrewpmap_remove_all(vm_page_t m) 2452281494Sandrew{ 2453305882Sandrew struct md_page *pvh; 2454281494Sandrew pv_entry_t pv; 2455281494Sandrew pmap_t pmap; 2456305879Sandrew struct rwlock *lock; 2457297446Sandrew pd_entry_t *pde, tpde; 2458297446Sandrew pt_entry_t *pte, tpte; 2459305882Sandrew vm_offset_t va; 2460281494Sandrew struct spglist free; 2461305882Sandrew int lvl, pvh_gen, md_gen; 2462281494Sandrew 2463281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2464281494Sandrew ("pmap_remove_all: page %p is not managed", m)); 2465281494Sandrew SLIST_INIT(&free); 2466305879Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2467305882Sandrew pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : 2468305882Sandrew pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2469305879Sandrewretry: 2470305879Sandrew rw_wlock(lock); 2471305882Sandrew while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 2472305882Sandrew pmap = PV_PMAP(pv); 2473305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 2474305882Sandrew pvh_gen = pvh->pv_gen; 2475305882Sandrew rw_wunlock(lock); 2476305882Sandrew PMAP_LOCK(pmap); 2477305882Sandrew rw_wlock(lock); 2478305882Sandrew if (pvh_gen != pvh->pv_gen) { 2479305882Sandrew rw_wunlock(lock); 2480305882Sandrew PMAP_UNLOCK(pmap); 2481305882Sandrew goto retry; 2482305882Sandrew } 2483305882Sandrew } 2484305882Sandrew va = pv->pv_va; 2485305882Sandrew pte = pmap_pte(pmap, va, &lvl); 2486305882Sandrew KASSERT(pte != NULL, 2487305882Sandrew ("pmap_remove_all: no page table entry found")); 2488305882Sandrew KASSERT(lvl == 2, 2489305882Sandrew ("pmap_remove_all: invalid pte level %d", lvl)); 2490305882Sandrew 2491305882Sandrew pmap_demote_l2_locked(pmap, pte, va, &lock); 2492305882Sandrew PMAP_UNLOCK(pmap); 2493305882Sandrew } 2494281494Sandrew while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2495281494Sandrew pmap = PV_PMAP(pv); 2496305879Sandrew if (!PMAP_TRYLOCK(pmap)) { 2497305882Sandrew pvh_gen = pvh->pv_gen; 2498305879Sandrew md_gen = m->md.pv_gen; 2499305879Sandrew rw_wunlock(lock); 2500305879Sandrew PMAP_LOCK(pmap); 2501305879Sandrew rw_wlock(lock); 2502305882Sandrew if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { 2503305879Sandrew rw_wunlock(lock); 2504305879Sandrew PMAP_UNLOCK(pmap); 2505305879Sandrew goto retry; 2506305879Sandrew } 2507305879Sandrew } 2508281494Sandrew pmap_resident_count_dec(pmap, 1); 2509297446Sandrew 2510297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 2511297446Sandrew KASSERT(pde != NULL, 2512297446Sandrew ("pmap_remove_all: no page directory entry found")); 2513297446Sandrew KASSERT(lvl == 2, 2514297446Sandrew ("pmap_remove_all: invalid pde level %d", lvl)); 2515297446Sandrew tpde = pmap_load(pde); 2516297446Sandrew 2517297446Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 2518297446Sandrew tpte = pmap_load(pte); 2519281494Sandrew if (pmap_is_current(pmap) && 2520297446Sandrew pmap_l3_valid_cacheable(tpte)) 2521281494Sandrew cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2522297446Sandrew pmap_load_clear(pte); 2523297446Sandrew PTE_SYNC(pte); 2524285212Sandrew pmap_invalidate_page(pmap, pv->pv_va); 2525297446Sandrew if (tpte & ATTR_SW_WIRED) 2526281494Sandrew pmap->pm_stats.wired_count--; 2527297446Sandrew if ((tpte & ATTR_AF) != 0) 2528281494Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 2529281494Sandrew 2530281494Sandrew /* 2531281494Sandrew * Update the vm_page_t clean and reference bits. 2532281494Sandrew */ 2533297446Sandrew if (pmap_page_dirty(tpte)) 2534281494Sandrew vm_page_dirty(m); 2535297446Sandrew pmap_unuse_l3(pmap, pv->pv_va, tpde, &free); 2536281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2537281494Sandrew m->md.pv_gen++; 2538281494Sandrew free_pv_entry(pmap, pv); 2539281494Sandrew PMAP_UNLOCK(pmap); 2540281494Sandrew } 2541281494Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 2542305879Sandrew rw_wunlock(lock); 2543281494Sandrew pmap_free_zero_pages(&free); 2544281494Sandrew} 2545281494Sandrew 2546281494Sandrew/* 2547281494Sandrew * Set the physical protection on the 2548281494Sandrew * specified range of this map as requested. 2549281494Sandrew */ 2550281494Sandrewvoid 2551281494Sandrewpmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2552281494Sandrew{ 2553281494Sandrew vm_offset_t va, va_next; 2554297446Sandrew pd_entry_t *l0, *l1, *l2; 2555319203Sandrew pt_entry_t *l3p, l3, nbits; 2556281494Sandrew 2557319203Sandrew KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); 2558319203Sandrew if (prot == VM_PROT_NONE) { 2559281494Sandrew pmap_remove(pmap, sva, eva); 2560281494Sandrew return; 2561281494Sandrew } 2562281494Sandrew 2563319203Sandrew if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == 2564319203Sandrew (VM_PROT_WRITE | VM_PROT_EXECUTE)) 2565281494Sandrew return; 2566281494Sandrew 2567281494Sandrew PMAP_LOCK(pmap); 2568281494Sandrew for (; sva < eva; sva = va_next) { 2569281494Sandrew 2570297446Sandrew l0 = pmap_l0(pmap, sva); 2571297446Sandrew if (pmap_load(l0) == 0) { 2572297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2573297446Sandrew if (va_next < sva) 2574297446Sandrew va_next = eva; 2575297446Sandrew continue; 2576297446Sandrew } 2577297446Sandrew 2578297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 2579285045Sandrew if (pmap_load(l1) == 0) { 2580281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2581281494Sandrew if (va_next < sva) 2582281494Sandrew va_next = eva; 2583281494Sandrew continue; 2584281494Sandrew } 2585281494Sandrew 2586281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2587281494Sandrew if (va_next < sva) 2588281494Sandrew va_next = eva; 2589281494Sandrew 2590281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 2591305882Sandrew if (pmap_load(l2) == 0) 2592281494Sandrew continue; 2593281494Sandrew 2594305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { 2595305882Sandrew l3p = pmap_demote_l2(pmap, l2, sva); 2596305882Sandrew if (l3p == NULL) 2597305882Sandrew continue; 2598305882Sandrew } 2599305882Sandrew KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, 2600305882Sandrew ("pmap_protect: Invalid L2 entry after demotion")); 2601305882Sandrew 2602281494Sandrew if (va_next > eva) 2603281494Sandrew va_next = eva; 2604281494Sandrew 2605281494Sandrew va = va_next; 2606281494Sandrew for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 2607281494Sandrew sva += L3_SIZE) { 2608281494Sandrew l3 = pmap_load(l3p); 2609319203Sandrew if (!pmap_l3_valid(l3)) 2610319203Sandrew continue; 2611319203Sandrew 2612319203Sandrew nbits = 0; 2613319203Sandrew if ((prot & VM_PROT_WRITE) == 0) { 2614317354Skib if ((l3 & ATTR_SW_MANAGED) && 2615317354Skib pmap_page_dirty(l3)) { 2616317354Skib vm_page_dirty(PHYS_TO_VM_PAGE(l3 & 2617317354Skib ~ATTR_MASK)); 2618317354Skib } 2619319203Sandrew nbits |= ATTR_AP(ATTR_AP_RO); 2620281494Sandrew } 2621319203Sandrew if ((prot & VM_PROT_EXECUTE) == 0) 2622319203Sandrew nbits |= ATTR_XN; 2623319203Sandrew 2624319203Sandrew pmap_set(l3p, nbits); 2625319203Sandrew PTE_SYNC(l3p); 2626319203Sandrew /* XXX: Use pmap_invalidate_range */ 2627323845Sandrew pmap_invalidate_page(pmap, sva); 2628281494Sandrew } 2629281494Sandrew } 2630281494Sandrew PMAP_UNLOCK(pmap); 2631281494Sandrew} 2632281494Sandrew 2633281494Sandrew/* 2634305882Sandrew * Inserts the specified page table page into the specified pmap's collection 2635305882Sandrew * of idle page table pages. Each of a pmap's page table pages is responsible 2636305882Sandrew * for mapping a distinct range of virtual addresses. The pmap's collection is 2637305882Sandrew * ordered by this virtual address range. 2638305882Sandrew */ 2639305882Sandrewstatic __inline int 2640305882Sandrewpmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) 2641305882Sandrew{ 2642305882Sandrew 2643305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2644305882Sandrew return (vm_radix_insert(&pmap->pm_root, mpte)); 2645305882Sandrew} 2646305882Sandrew 2647305882Sandrew/* 2648318716Smarkj * Removes the page table page mapping the specified virtual address from the 2649318716Smarkj * specified pmap's collection of idle page table pages, and returns it. 2650318716Smarkj * Otherwise, returns NULL if there is no page table page corresponding to the 2651318716Smarkj * specified virtual address. 2652305882Sandrew */ 2653305882Sandrewstatic __inline vm_page_t 2654318716Smarkjpmap_remove_pt_page(pmap_t pmap, vm_offset_t va) 2655305882Sandrew{ 2656305882Sandrew 2657305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2658318716Smarkj return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va))); 2659305882Sandrew} 2660305882Sandrew 2661305882Sandrew/* 2662305882Sandrew * Performs a break-before-make update of a pmap entry. This is needed when 2663305882Sandrew * either promoting or demoting pages to ensure the TLB doesn't get into an 2664305882Sandrew * inconsistent state. 2665305882Sandrew */ 2666305882Sandrewstatic void 2667305882Sandrewpmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, 2668305882Sandrew vm_offset_t va, vm_size_t size) 2669305882Sandrew{ 2670305882Sandrew register_t intr; 2671305882Sandrew 2672305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2673305882Sandrew 2674305882Sandrew /* 2675305882Sandrew * Ensure we don't get switched out with the page table in an 2676305882Sandrew * inconsistent state. We also need to ensure no interrupts fire 2677305882Sandrew * as they may make use of an address we are about to invalidate. 2678305882Sandrew */ 2679305882Sandrew intr = intr_disable(); 2680305882Sandrew critical_enter(); 2681305882Sandrew 2682305882Sandrew /* Clear the old mapping */ 2683305882Sandrew pmap_load_clear(pte); 2684305882Sandrew PTE_SYNC(pte); 2685305882Sandrew pmap_invalidate_range(pmap, va, va + size); 2686305882Sandrew 2687305882Sandrew /* Create the new mapping */ 2688305882Sandrew pmap_load_store(pte, newpte); 2689305882Sandrew PTE_SYNC(pte); 2690305882Sandrew 2691305882Sandrew critical_exit(); 2692305882Sandrew intr_restore(intr); 2693305882Sandrew} 2694305882Sandrew 2695325238Smarkj#if VM_NRESERVLEVEL > 0 2696305882Sandrew/* 2697305882Sandrew * After promotion from 512 4KB page mappings to a single 2MB page mapping, 2698305882Sandrew * replace the many pv entries for the 4KB page mappings by a single pv entry 2699305882Sandrew * for the 2MB page mapping. 2700305882Sandrew */ 2701305882Sandrewstatic void 2702305882Sandrewpmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 2703305882Sandrew struct rwlock **lockp) 2704305882Sandrew{ 2705305882Sandrew struct md_page *pvh; 2706305882Sandrew pv_entry_t pv; 2707305882Sandrew vm_offset_t va_last; 2708305882Sandrew vm_page_t m; 2709305882Sandrew 2710305882Sandrew KASSERT((pa & L2_OFFSET) == 0, 2711305882Sandrew ("pmap_pv_promote_l2: pa is not 2mpage aligned")); 2712305882Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2713305882Sandrew 2714305882Sandrew /* 2715305882Sandrew * Transfer the first page's pv entry for this mapping to the 2mpage's 2716305882Sandrew * pv list. Aside from avoiding the cost of a call to get_pv_entry(), 2717305882Sandrew * a transfer avoids the possibility that get_pv_entry() calls 2718305882Sandrew * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the 2719305882Sandrew * mappings that is being promoted. 2720305882Sandrew */ 2721305882Sandrew m = PHYS_TO_VM_PAGE(pa); 2722305882Sandrew va = va & ~L2_OFFSET; 2723305882Sandrew pv = pmap_pvh_remove(&m->md, pmap, va); 2724305882Sandrew KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found")); 2725305882Sandrew pvh = pa_to_pvh(pa); 2726305882Sandrew TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 2727305882Sandrew pvh->pv_gen++; 2728305882Sandrew /* Free the remaining NPTEPG - 1 pv entries. */ 2729305882Sandrew va_last = va + L2_SIZE - PAGE_SIZE; 2730305882Sandrew do { 2731305882Sandrew m++; 2732305882Sandrew va += PAGE_SIZE; 2733305882Sandrew pmap_pvh_free(&m->md, pmap, va); 2734305882Sandrew } while (va < va_last); 2735305882Sandrew} 2736305882Sandrew 2737305882Sandrew/* 2738305882Sandrew * Tries to promote the 512, contiguous 4KB page mappings that are within a 2739305882Sandrew * single level 2 table entry to a single 2MB page mapping. For promotion 2740305882Sandrew * to occur, two conditions must be met: (1) the 4KB page mappings must map 2741305882Sandrew * aligned, contiguous physical memory and (2) the 4KB page mappings must have 2742305882Sandrew * identical characteristics. 2743305882Sandrew */ 2744305882Sandrewstatic void 2745305882Sandrewpmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, 2746305882Sandrew struct rwlock **lockp) 2747305882Sandrew{ 2748305882Sandrew pt_entry_t *firstl3, *l3, newl2, oldl3, pa; 2749305882Sandrew vm_page_t mpte; 2750305882Sandrew vm_offset_t sva; 2751305882Sandrew 2752305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2753305882Sandrew 2754305882Sandrew sva = va & ~L2_OFFSET; 2755305882Sandrew firstl3 = pmap_l2_to_l3(l2, sva); 2756305882Sandrew newl2 = pmap_load(firstl3); 2757305882Sandrew 2758305882Sandrew /* Check the alingment is valid */ 2759305882Sandrew if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) { 2760305882Sandrew atomic_add_long(&pmap_l2_p_failures, 1); 2761305882Sandrew CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" 2762305882Sandrew " in pmap %p", va, pmap); 2763305882Sandrew return; 2764305882Sandrew } 2765305882Sandrew 2766305882Sandrew pa = newl2 + L2_SIZE - PAGE_SIZE; 2767305882Sandrew for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { 2768305882Sandrew oldl3 = pmap_load(l3); 2769305882Sandrew if (oldl3 != pa) { 2770305882Sandrew atomic_add_long(&pmap_l2_p_failures, 1); 2771305882Sandrew CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" 2772305882Sandrew " in pmap %p", va, pmap); 2773305882Sandrew return; 2774305882Sandrew } 2775305882Sandrew pa -= PAGE_SIZE; 2776305882Sandrew } 2777305882Sandrew 2778305882Sandrew /* 2779305882Sandrew * Save the page table page in its current state until the L2 2780305882Sandrew * mapping the superpage is demoted by pmap_demote_l2() or 2781305882Sandrew * destroyed by pmap_remove_l3(). 2782305882Sandrew */ 2783305882Sandrew mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); 2784305882Sandrew KASSERT(mpte >= vm_page_array && 2785305882Sandrew mpte < &vm_page_array[vm_page_array_size], 2786305882Sandrew ("pmap_promote_l2: page table page is out of range")); 2787305882Sandrew KASSERT(mpte->pindex == pmap_l2_pindex(va), 2788305882Sandrew ("pmap_promote_l2: page table page's pindex is wrong")); 2789305882Sandrew if (pmap_insert_pt_page(pmap, mpte)) { 2790305882Sandrew atomic_add_long(&pmap_l2_p_failures, 1); 2791305882Sandrew CTR2(KTR_PMAP, 2792305882Sandrew "pmap_promote_l2: failure for va %#lx in pmap %p", va, 2793305882Sandrew pmap); 2794305882Sandrew return; 2795305882Sandrew } 2796305882Sandrew 2797305882Sandrew if ((newl2 & ATTR_SW_MANAGED) != 0) 2798305882Sandrew pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp); 2799305882Sandrew 2800305882Sandrew newl2 &= ~ATTR_DESCR_MASK; 2801305882Sandrew newl2 |= L2_BLOCK; 2802305882Sandrew 2803305882Sandrew pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE); 2804305882Sandrew 2805305882Sandrew atomic_add_long(&pmap_l2_promotions, 1); 2806305882Sandrew CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, 2807305882Sandrew pmap); 2808305882Sandrew} 2809325238Smarkj#endif /* VM_NRESERVLEVEL > 0 */ 2810305882Sandrew 2811305882Sandrew/* 2812281494Sandrew * Insert the given physical page (p) at 2813281494Sandrew * the specified virtual address (v) in the 2814281494Sandrew * target physical map with the protection requested. 2815281494Sandrew * 2816281494Sandrew * If specified, the page will be wired down, meaning 2817281494Sandrew * that the related pte can not be reclaimed. 2818281494Sandrew * 2819281494Sandrew * NB: This is the only routine which MAY NOT lazy-evaluate 2820281494Sandrew * or lose information. That is, this routine must actually 2821281494Sandrew * insert this page into the given map NOW. 2822281494Sandrew */ 2823281494Sandrewint 2824281494Sandrewpmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2825281494Sandrew u_int flags, int8_t psind __unused) 2826281494Sandrew{ 2827281494Sandrew struct rwlock *lock; 2828297446Sandrew pd_entry_t *pde; 2829281494Sandrew pt_entry_t new_l3, orig_l3; 2830305882Sandrew pt_entry_t *l2, *l3; 2831281494Sandrew pv_entry_t pv; 2832297446Sandrew vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; 2833297446Sandrew vm_page_t mpte, om, l1_m, l2_m, l3_m; 2834281494Sandrew boolean_t nosleep; 2835297446Sandrew int lvl; 2836281494Sandrew 2837281494Sandrew va = trunc_page(va); 2838281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2839281494Sandrew VM_OBJECT_ASSERT_LOCKED(m->object); 2840281494Sandrew pa = VM_PAGE_TO_PHYS(m); 2841285537Sandrew new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 2842285537Sandrew L3_PAGE); 2843281494Sandrew if ((prot & VM_PROT_WRITE) == 0) 2844281494Sandrew new_l3 |= ATTR_AP(ATTR_AP_RO); 2845319203Sandrew if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 2846319203Sandrew new_l3 |= ATTR_XN; 2847281494Sandrew if ((flags & PMAP_ENTER_WIRED) != 0) 2848281494Sandrew new_l3 |= ATTR_SW_WIRED; 2849281494Sandrew if ((va >> 63) == 0) 2850319203Sandrew new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; 2851281494Sandrew 2852285212Sandrew CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 2853285212Sandrew 2854281494Sandrew mpte = NULL; 2855281494Sandrew 2856281494Sandrew lock = NULL; 2857281494Sandrew PMAP_LOCK(pmap); 2858281494Sandrew 2859305882Sandrew pde = pmap_pde(pmap, va, &lvl); 2860305882Sandrew if (pde != NULL && lvl == 1) { 2861305882Sandrew l2 = pmap_l1_to_l2(pde, va); 2862305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && 2863305882Sandrew (l3 = pmap_demote_l2_locked(pmap, l2, va & ~L2_OFFSET, 2864305882Sandrew &lock)) != NULL) { 2865305882Sandrew l3 = &l3[pmap_l3_index(va)]; 2866305882Sandrew if (va < VM_MAXUSER_ADDRESS) { 2867305882Sandrew mpte = PHYS_TO_VM_PAGE( 2868305882Sandrew pmap_load(l2) & ~ATTR_MASK); 2869305882Sandrew mpte->wire_count++; 2870305882Sandrew } 2871305882Sandrew goto havel3; 2872305882Sandrew } 2873305882Sandrew } 2874305882Sandrew 2875281494Sandrew if (va < VM_MAXUSER_ADDRESS) { 2876281494Sandrew nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 2877281494Sandrew mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 2878281494Sandrew if (mpte == NULL && nosleep) { 2879285212Sandrew CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 2880281494Sandrew if (lock != NULL) 2881281494Sandrew rw_wunlock(lock); 2882281494Sandrew PMAP_UNLOCK(pmap); 2883281494Sandrew return (KERN_RESOURCE_SHORTAGE); 2884281494Sandrew } 2885297446Sandrew pde = pmap_pde(pmap, va, &lvl); 2886297446Sandrew KASSERT(pde != NULL, 2887297446Sandrew ("pmap_enter: Invalid page entry, va: 0x%lx", va)); 2888297446Sandrew KASSERT(lvl == 2, 2889297446Sandrew ("pmap_enter: Invalid level %d", lvl)); 2890297446Sandrew 2891297446Sandrew l3 = pmap_l2_to_l3(pde, va); 2892281494Sandrew } else { 2893297446Sandrew /* 2894297446Sandrew * If we get a level 2 pde it must point to a level 3 entry 2895297446Sandrew * otherwise we will need to create the intermediate tables 2896297446Sandrew */ 2897297446Sandrew if (lvl < 2) { 2898297446Sandrew switch(lvl) { 2899297446Sandrew default: 2900297446Sandrew case -1: 2901297446Sandrew /* Get the l0 pde to update */ 2902297446Sandrew pde = pmap_l0(pmap, va); 2903297446Sandrew KASSERT(pde != NULL, ("...")); 2904281494Sandrew 2905297446Sandrew l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2906297446Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2907297446Sandrew VM_ALLOC_ZERO); 2908297446Sandrew if (l1_m == NULL) 2909297446Sandrew panic("pmap_enter: l1 pte_m == NULL"); 2910297446Sandrew if ((l1_m->flags & PG_ZERO) == 0) 2911297446Sandrew pmap_zero_page(l1_m); 2912297446Sandrew 2913297446Sandrew l1_pa = VM_PAGE_TO_PHYS(l1_m); 2914297446Sandrew pmap_load_store(pde, l1_pa | L0_TABLE); 2915297446Sandrew PTE_SYNC(pde); 2916297446Sandrew /* FALLTHROUGH */ 2917297446Sandrew case 0: 2918297446Sandrew /* Get the l1 pde to update */ 2919297446Sandrew pde = pmap_l1_to_l2(pde, va); 2920297446Sandrew KASSERT(pde != NULL, ("...")); 2921297446Sandrew 2922281494Sandrew l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2923281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2924281494Sandrew VM_ALLOC_ZERO); 2925281494Sandrew if (l2_m == NULL) 2926281494Sandrew panic("pmap_enter: l2 pte_m == NULL"); 2927281494Sandrew if ((l2_m->flags & PG_ZERO) == 0) 2928281494Sandrew pmap_zero_page(l2_m); 2929281494Sandrew 2930281494Sandrew l2_pa = VM_PAGE_TO_PHYS(l2_m); 2931297446Sandrew pmap_load_store(pde, l2_pa | L1_TABLE); 2932297446Sandrew PTE_SYNC(pde); 2933297446Sandrew /* FALLTHROUGH */ 2934297446Sandrew case 1: 2935297446Sandrew /* Get the l2 pde to update */ 2936297446Sandrew pde = pmap_l1_to_l2(pde, va); 2937281494Sandrew 2938297446Sandrew l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2939297446Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2940297446Sandrew VM_ALLOC_ZERO); 2941297446Sandrew if (l3_m == NULL) 2942297446Sandrew panic("pmap_enter: l3 pte_m == NULL"); 2943297446Sandrew if ((l3_m->flags & PG_ZERO) == 0) 2944297446Sandrew pmap_zero_page(l3_m); 2945281494Sandrew 2946297446Sandrew l3_pa = VM_PAGE_TO_PHYS(l3_m); 2947297446Sandrew pmap_load_store(pde, l3_pa | L2_TABLE); 2948297446Sandrew PTE_SYNC(pde); 2949297446Sandrew break; 2950297446Sandrew } 2951281494Sandrew } 2952297446Sandrew l3 = pmap_l2_to_l3(pde, va); 2953285212Sandrew pmap_invalidate_page(pmap, va); 2954281494Sandrew } 2955305882Sandrewhavel3: 2956281494Sandrew 2957281494Sandrew om = NULL; 2958281494Sandrew orig_l3 = pmap_load(l3); 2959281494Sandrew opa = orig_l3 & ~ATTR_MASK; 2960281494Sandrew 2961281494Sandrew /* 2962281494Sandrew * Is the specified virtual address already mapped? 2963281494Sandrew */ 2964281494Sandrew if (pmap_l3_valid(orig_l3)) { 2965281494Sandrew /* 2966281494Sandrew * Wiring change, just update stats. We don't worry about 2967281494Sandrew * wiring PT pages as they remain resident as long as there 2968281494Sandrew * are valid mappings in them. Hence, if a user page is wired, 2969281494Sandrew * the PT page will be also. 2970281494Sandrew */ 2971281494Sandrew if ((flags & PMAP_ENTER_WIRED) != 0 && 2972281494Sandrew (orig_l3 & ATTR_SW_WIRED) == 0) 2973281494Sandrew pmap->pm_stats.wired_count++; 2974281494Sandrew else if ((flags & PMAP_ENTER_WIRED) == 0 && 2975281494Sandrew (orig_l3 & ATTR_SW_WIRED) != 0) 2976281494Sandrew pmap->pm_stats.wired_count--; 2977281494Sandrew 2978281494Sandrew /* 2979281494Sandrew * Remove the extra PT page reference. 2980281494Sandrew */ 2981281494Sandrew if (mpte != NULL) { 2982281494Sandrew mpte->wire_count--; 2983281494Sandrew KASSERT(mpte->wire_count > 0, 2984281494Sandrew ("pmap_enter: missing reference to page table page," 2985281494Sandrew " va: 0x%lx", va)); 2986281494Sandrew } 2987281494Sandrew 2988281494Sandrew /* 2989281494Sandrew * Has the physical page changed? 2990281494Sandrew */ 2991281494Sandrew if (opa == pa) { 2992281494Sandrew /* 2993281494Sandrew * No, might be a protection or wiring change. 2994281494Sandrew */ 2995281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2996281494Sandrew new_l3 |= ATTR_SW_MANAGED; 2997281494Sandrew if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 2998281494Sandrew ATTR_AP(ATTR_AP_RW)) { 2999281494Sandrew vm_page_aflag_set(m, PGA_WRITEABLE); 3000281494Sandrew } 3001281494Sandrew } 3002281494Sandrew goto validate; 3003281494Sandrew } 3004281494Sandrew 3005281494Sandrew /* Flush the cache, there might be uncommitted data in it */ 3006281494Sandrew if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 3007281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 3008281494Sandrew } else { 3009281494Sandrew /* 3010281494Sandrew * Increment the counters. 3011281494Sandrew */ 3012281494Sandrew if ((new_l3 & ATTR_SW_WIRED) != 0) 3013281494Sandrew pmap->pm_stats.wired_count++; 3014281494Sandrew pmap_resident_count_inc(pmap, 1); 3015281494Sandrew } 3016281494Sandrew /* 3017281494Sandrew * Enter on the PV list if part of our managed memory. 3018281494Sandrew */ 3019281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0) { 3020281494Sandrew new_l3 |= ATTR_SW_MANAGED; 3021281494Sandrew pv = get_pv_entry(pmap, &lock); 3022281494Sandrew pv->pv_va = va; 3023281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 3024281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3025281494Sandrew m->md.pv_gen++; 3026281494Sandrew if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 3027281494Sandrew vm_page_aflag_set(m, PGA_WRITEABLE); 3028281494Sandrew } 3029281494Sandrew 3030281494Sandrew /* 3031281494Sandrew * Update the L3 entry. 3032281494Sandrew */ 3033281494Sandrew if (orig_l3 != 0) { 3034281494Sandrewvalidate: 3035305882Sandrew orig_l3 = pmap_load(l3); 3036281494Sandrew opa = orig_l3 & ~ATTR_MASK; 3037281494Sandrew 3038281494Sandrew if (opa != pa) { 3039305882Sandrew pmap_update_entry(pmap, l3, new_l3, va, PAGE_SIZE); 3040281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 3041281494Sandrew om = PHYS_TO_VM_PAGE(opa); 3042281494Sandrew if (pmap_page_dirty(orig_l3)) 3043281494Sandrew vm_page_dirty(om); 3044281494Sandrew if ((orig_l3 & ATTR_AF) != 0) 3045281494Sandrew vm_page_aflag_set(om, PGA_REFERENCED); 3046281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 3047281494Sandrew pmap_pvh_free(&om->md, pmap, va); 3048305882Sandrew if ((om->aflags & PGA_WRITEABLE) != 0 && 3049305882Sandrew TAILQ_EMPTY(&om->md.pv_list) && 3050305882Sandrew ((om->flags & PG_FICTITIOUS) != 0 || 3051305882Sandrew TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) 3052305882Sandrew vm_page_aflag_clear(om, PGA_WRITEABLE); 3053281494Sandrew } 3054305882Sandrew } else { 3055305882Sandrew pmap_load_store(l3, new_l3); 3056305882Sandrew PTE_SYNC(l3); 3057305882Sandrew pmap_invalidate_page(pmap, va); 3058305882Sandrew if (pmap_page_dirty(orig_l3) && 3059305882Sandrew (orig_l3 & ATTR_SW_MANAGED) != 0) 3060281494Sandrew vm_page_dirty(m); 3061281494Sandrew } 3062281494Sandrew } else { 3063281494Sandrew pmap_load_store(l3, new_l3); 3064281494Sandrew } 3065305882Sandrew 3066305882Sandrew PTE_SYNC(l3); 3067285212Sandrew pmap_invalidate_page(pmap, va); 3068281494Sandrew 3069305882Sandrew if (pmap != pmap_kernel()) { 3070305883Sandrew if (pmap == &curproc->p_vmspace->vm_pmap && 3071305883Sandrew (prot & VM_PROT_EXECUTE) != 0) 3072305883Sandrew cpu_icache_sync_range(va, PAGE_SIZE); 3073305882Sandrew 3074325238Smarkj#if VM_NRESERVLEVEL > 0 3075305882Sandrew if ((mpte == NULL || mpte->wire_count == NL3PG) && 3076305882Sandrew pmap_superpages_enabled() && 3077305882Sandrew (m->flags & PG_FICTITIOUS) == 0 && 3078305882Sandrew vm_reserv_level_iffullpop(m) == 0) { 3079305882Sandrew pmap_promote_l2(pmap, pde, va, &lock); 3080305882Sandrew } 3081325238Smarkj#endif 3082305882Sandrew } 3083305882Sandrew 3084281494Sandrew if (lock != NULL) 3085281494Sandrew rw_wunlock(lock); 3086281494Sandrew PMAP_UNLOCK(pmap); 3087281494Sandrew return (KERN_SUCCESS); 3088281494Sandrew} 3089281494Sandrew 3090281494Sandrew/* 3091281494Sandrew * Maps a sequence of resident pages belonging to the same object. 3092281494Sandrew * The sequence begins with the given page m_start. This page is 3093281494Sandrew * mapped at the given virtual address start. Each subsequent page is 3094281494Sandrew * mapped at a virtual address that is offset from start by the same 3095281494Sandrew * amount as the page is offset from m_start within the object. The 3096281494Sandrew * last page in the sequence is the page with the largest offset from 3097281494Sandrew * m_start that can be mapped at a virtual address less than the given 3098281494Sandrew * virtual address end. Not every virtual page between start and end 3099281494Sandrew * is mapped; only those for which a resident page exists with the 3100281494Sandrew * corresponding offset from m_start are mapped. 3101281494Sandrew */ 3102281494Sandrewvoid 3103281494Sandrewpmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3104281494Sandrew vm_page_t m_start, vm_prot_t prot) 3105281494Sandrew{ 3106281494Sandrew struct rwlock *lock; 3107281494Sandrew vm_offset_t va; 3108281494Sandrew vm_page_t m, mpte; 3109281494Sandrew vm_pindex_t diff, psize; 3110281494Sandrew 3111281494Sandrew VM_OBJECT_ASSERT_LOCKED(m_start->object); 3112281494Sandrew 3113281494Sandrew psize = atop(end - start); 3114281494Sandrew mpte = NULL; 3115281494Sandrew m = m_start; 3116281494Sandrew lock = NULL; 3117281494Sandrew PMAP_LOCK(pmap); 3118281494Sandrew while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3119281494Sandrew va = start + ptoa(diff); 3120281494Sandrew mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 3121281494Sandrew m = TAILQ_NEXT(m, listq); 3122281494Sandrew } 3123281494Sandrew if (lock != NULL) 3124281494Sandrew rw_wunlock(lock); 3125281494Sandrew PMAP_UNLOCK(pmap); 3126281494Sandrew} 3127281494Sandrew 3128281494Sandrew/* 3129281494Sandrew * this code makes some *MAJOR* assumptions: 3130281494Sandrew * 1. Current pmap & pmap exists. 3131281494Sandrew * 2. Not wired. 3132281494Sandrew * 3. Read access. 3133281494Sandrew * 4. No page table pages. 3134281494Sandrew * but is *MUCH* faster than pmap_enter... 3135281494Sandrew */ 3136281494Sandrew 3137281494Sandrewvoid 3138281494Sandrewpmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3139281494Sandrew{ 3140281494Sandrew struct rwlock *lock; 3141281494Sandrew 3142281494Sandrew lock = NULL; 3143281494Sandrew PMAP_LOCK(pmap); 3144281494Sandrew (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 3145281494Sandrew if (lock != NULL) 3146281494Sandrew rw_wunlock(lock); 3147281494Sandrew PMAP_UNLOCK(pmap); 3148281494Sandrew} 3149281494Sandrew 3150281494Sandrewstatic vm_page_t 3151281494Sandrewpmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 3152281494Sandrew vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 3153281494Sandrew{ 3154281494Sandrew struct spglist free; 3155297446Sandrew pd_entry_t *pde; 3156305882Sandrew pt_entry_t *l2, *l3; 3157281494Sandrew vm_paddr_t pa; 3158297446Sandrew int lvl; 3159281494Sandrew 3160281494Sandrew KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 3161281494Sandrew (m->oflags & VPO_UNMANAGED) != 0, 3162281494Sandrew ("pmap_enter_quick_locked: managed mapping within the clean submap")); 3163281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3164281494Sandrew 3165285212Sandrew CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 3166281494Sandrew /* 3167281494Sandrew * In the case that a page table page is not 3168281494Sandrew * resident, we are creating it here. 3169281494Sandrew */ 3170281494Sandrew if (va < VM_MAXUSER_ADDRESS) { 3171281494Sandrew vm_pindex_t l2pindex; 3172281494Sandrew 3173281494Sandrew /* 3174281494Sandrew * Calculate pagetable page index 3175281494Sandrew */ 3176281494Sandrew l2pindex = pmap_l2_pindex(va); 3177281494Sandrew if (mpte && (mpte->pindex == l2pindex)) { 3178281494Sandrew mpte->wire_count++; 3179281494Sandrew } else { 3180281494Sandrew /* 3181281494Sandrew * Get the l2 entry 3182281494Sandrew */ 3183297446Sandrew pde = pmap_pde(pmap, va, &lvl); 3184281494Sandrew 3185281494Sandrew /* 3186281494Sandrew * If the page table page is mapped, we just increment 3187281494Sandrew * the hold count, and activate it. Otherwise, we 3188281494Sandrew * attempt to allocate a page table page. If this 3189281494Sandrew * attempt fails, we don't retry. Instead, we give up. 3190281494Sandrew */ 3191305882Sandrew if (lvl == 1) { 3192305882Sandrew l2 = pmap_l1_to_l2(pde, va); 3193305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == 3194305882Sandrew L2_BLOCK) 3195305882Sandrew return (NULL); 3196305882Sandrew } 3197297446Sandrew if (lvl == 2 && pmap_load(pde) != 0) { 3198285045Sandrew mpte = 3199297446Sandrew PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); 3200281494Sandrew mpte->wire_count++; 3201281494Sandrew } else { 3202281494Sandrew /* 3203281494Sandrew * Pass NULL instead of the PV list lock 3204281494Sandrew * pointer, because we don't intend to sleep. 3205281494Sandrew */ 3206281494Sandrew mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 3207281494Sandrew if (mpte == NULL) 3208281494Sandrew return (mpte); 3209281494Sandrew } 3210281494Sandrew } 3211281494Sandrew l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 3212281494Sandrew l3 = &l3[pmap_l3_index(va)]; 3213281494Sandrew } else { 3214281494Sandrew mpte = NULL; 3215297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 3216297446Sandrew KASSERT(pde != NULL, 3217297446Sandrew ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", 3218297446Sandrew va)); 3219297446Sandrew KASSERT(lvl == 2, 3220297446Sandrew ("pmap_enter_quick_locked: Invalid level %d", lvl)); 3221297446Sandrew l3 = pmap_l2_to_l3(pde, va); 3222281494Sandrew } 3223297446Sandrew 3224285212Sandrew if (pmap_load(l3) != 0) { 3225281494Sandrew if (mpte != NULL) { 3226281494Sandrew mpte->wire_count--; 3227281494Sandrew mpte = NULL; 3228281494Sandrew } 3229281494Sandrew return (mpte); 3230281494Sandrew } 3231281494Sandrew 3232281494Sandrew /* 3233281494Sandrew * Enter on the PV list if part of our managed memory. 3234281494Sandrew */ 3235281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0 && 3236281494Sandrew !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 3237281494Sandrew if (mpte != NULL) { 3238281494Sandrew SLIST_INIT(&free); 3239281494Sandrew if (pmap_unwire_l3(pmap, va, mpte, &free)) { 3240281494Sandrew pmap_invalidate_page(pmap, va); 3241281494Sandrew pmap_free_zero_pages(&free); 3242281494Sandrew } 3243281494Sandrew mpte = NULL; 3244281494Sandrew } 3245281494Sandrew return (mpte); 3246281494Sandrew } 3247281494Sandrew 3248281494Sandrew /* 3249281494Sandrew * Increment counters 3250281494Sandrew */ 3251281494Sandrew pmap_resident_count_inc(pmap, 1); 3252281494Sandrew 3253285537Sandrew pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 3254305882Sandrew ATTR_AP(ATTR_AP_RO) | L3_PAGE; 3255319203Sandrew if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 3256319203Sandrew pa |= ATTR_XN; 3257319203Sandrew else if (va < VM_MAXUSER_ADDRESS) 3258319203Sandrew pa |= ATTR_PXN; 3259281494Sandrew 3260281494Sandrew /* 3261281494Sandrew * Now validate mapping with RO protection 3262281494Sandrew */ 3263281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0) 3264281494Sandrew pa |= ATTR_SW_MANAGED; 3265281494Sandrew pmap_load_store(l3, pa); 3266281494Sandrew PTE_SYNC(l3); 3267281494Sandrew pmap_invalidate_page(pmap, va); 3268281494Sandrew return (mpte); 3269281494Sandrew} 3270281494Sandrew 3271281494Sandrew/* 3272281494Sandrew * This code maps large physical mmap regions into the 3273281494Sandrew * processor address space. Note that some shortcuts 3274281494Sandrew * are taken, but the code works. 3275281494Sandrew */ 3276281494Sandrewvoid 3277281494Sandrewpmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 3278281494Sandrew vm_pindex_t pindex, vm_size_t size) 3279281494Sandrew{ 3280281494Sandrew 3281281846Sandrew VM_OBJECT_ASSERT_WLOCKED(object); 3282281846Sandrew KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3283281846Sandrew ("pmap_object_init_pt: non-device object")); 3284281494Sandrew} 3285281494Sandrew 3286281494Sandrew/* 3287281494Sandrew * Clear the wired attribute from the mappings for the specified range of 3288281494Sandrew * addresses in the given pmap. Every valid mapping within that range 3289281494Sandrew * must have the wired attribute set. In contrast, invalid mappings 3290281494Sandrew * cannot have the wired attribute set, so they are ignored. 3291281494Sandrew * 3292281494Sandrew * The wired attribute of the page table entry is not a hardware feature, 3293281494Sandrew * so there is no need to invalidate any TLB entries. 3294281494Sandrew */ 3295281494Sandrewvoid 3296281494Sandrewpmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3297281494Sandrew{ 3298281494Sandrew vm_offset_t va_next; 3299297446Sandrew pd_entry_t *l0, *l1, *l2; 3300281494Sandrew pt_entry_t *l3; 3301281494Sandrew 3302281494Sandrew PMAP_LOCK(pmap); 3303281494Sandrew for (; sva < eva; sva = va_next) { 3304297446Sandrew l0 = pmap_l0(pmap, sva); 3305297446Sandrew if (pmap_load(l0) == 0) { 3306297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 3307297446Sandrew if (va_next < sva) 3308297446Sandrew va_next = eva; 3309297446Sandrew continue; 3310297446Sandrew } 3311297446Sandrew 3312297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 3313285045Sandrew if (pmap_load(l1) == 0) { 3314281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 3315281494Sandrew if (va_next < sva) 3316281494Sandrew va_next = eva; 3317281494Sandrew continue; 3318281494Sandrew } 3319281494Sandrew 3320281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 3321281494Sandrew if (va_next < sva) 3322281494Sandrew va_next = eva; 3323281494Sandrew 3324281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 3325285045Sandrew if (pmap_load(l2) == 0) 3326281494Sandrew continue; 3327281494Sandrew 3328305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { 3329305882Sandrew l3 = pmap_demote_l2(pmap, l2, sva); 3330305882Sandrew if (l3 == NULL) 3331305882Sandrew continue; 3332305882Sandrew } 3333305882Sandrew KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, 3334305882Sandrew ("pmap_unwire: Invalid l2 entry after demotion")); 3335305882Sandrew 3336281494Sandrew if (va_next > eva) 3337281494Sandrew va_next = eva; 3338281494Sandrew for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 3339281494Sandrew sva += L3_SIZE) { 3340285045Sandrew if (pmap_load(l3) == 0) 3341281494Sandrew continue; 3342285045Sandrew if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) 3343281494Sandrew panic("pmap_unwire: l3 %#jx is missing " 3344288445Sandrew "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); 3345281494Sandrew 3346281494Sandrew /* 3347281494Sandrew * PG_W must be cleared atomically. Although the pmap 3348281494Sandrew * lock synchronizes access to PG_W, another processor 3349281494Sandrew * could be setting PG_M and/or PG_A concurrently. 3350281494Sandrew */ 3351281494Sandrew atomic_clear_long(l3, ATTR_SW_WIRED); 3352281494Sandrew pmap->pm_stats.wired_count--; 3353281494Sandrew } 3354281494Sandrew } 3355281494Sandrew PMAP_UNLOCK(pmap); 3356281494Sandrew} 3357281494Sandrew 3358281494Sandrew/* 3359281494Sandrew * Copy the range specified by src_addr/len 3360281494Sandrew * from the source map to the range dst_addr/len 3361281494Sandrew * in the destination map. 3362281494Sandrew * 3363281494Sandrew * This routine is only advisory and need not do anything. 3364281494Sandrew */ 3365281494Sandrew 3366281494Sandrewvoid 3367281494Sandrewpmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3368281494Sandrew vm_offset_t src_addr) 3369281494Sandrew{ 3370281494Sandrew} 3371281494Sandrew 3372281494Sandrew/* 3373281494Sandrew * pmap_zero_page zeros the specified hardware page by mapping 3374281494Sandrew * the page into KVM and using bzero to clear its contents. 3375281494Sandrew */ 3376281494Sandrewvoid 3377281494Sandrewpmap_zero_page(vm_page_t m) 3378281494Sandrew{ 3379281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3380281494Sandrew 3381281494Sandrew pagezero((void *)va); 3382281494Sandrew} 3383281494Sandrew 3384281494Sandrew/* 3385305531Sandrew * pmap_zero_page_area zeros the specified hardware page by mapping 3386281494Sandrew * the page into KVM and using bzero to clear its contents. 3387281494Sandrew * 3388281494Sandrew * off and size may not cover an area beyond a single hardware page. 3389281494Sandrew */ 3390281494Sandrewvoid 3391281494Sandrewpmap_zero_page_area(vm_page_t m, int off, int size) 3392281494Sandrew{ 3393281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3394281494Sandrew 3395281494Sandrew if (off == 0 && size == PAGE_SIZE) 3396281494Sandrew pagezero((void *)va); 3397281494Sandrew else 3398281494Sandrew bzero((char *)va + off, size); 3399281494Sandrew} 3400281494Sandrew 3401281494Sandrew/* 3402305531Sandrew * pmap_zero_page_idle zeros the specified hardware page by mapping 3403281494Sandrew * the page into KVM and using bzero to clear its contents. This 3404281494Sandrew * is intended to be called from the vm_pagezero process only and 3405281494Sandrew * outside of Giant. 3406281494Sandrew */ 3407281494Sandrewvoid 3408281494Sandrewpmap_zero_page_idle(vm_page_t m) 3409281494Sandrew{ 3410281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3411281494Sandrew 3412281494Sandrew pagezero((void *)va); 3413281494Sandrew} 3414281494Sandrew 3415281494Sandrew/* 3416281494Sandrew * pmap_copy_page copies the specified (machine independent) 3417281494Sandrew * page by mapping the page into virtual memory and using 3418281494Sandrew * bcopy to copy the page, one machine dependent page at a 3419281494Sandrew * time. 3420281494Sandrew */ 3421281494Sandrewvoid 3422281494Sandrewpmap_copy_page(vm_page_t msrc, vm_page_t mdst) 3423281494Sandrew{ 3424281494Sandrew vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 3425281494Sandrew vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 3426281494Sandrew 3427281494Sandrew pagecopy((void *)src, (void *)dst); 3428281494Sandrew} 3429281494Sandrew 3430281494Sandrewint unmapped_buf_allowed = 1; 3431281494Sandrew 3432281494Sandrewvoid 3433281494Sandrewpmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 3434281494Sandrew vm_offset_t b_offset, int xfersize) 3435281494Sandrew{ 3436281494Sandrew void *a_cp, *b_cp; 3437281494Sandrew vm_page_t m_a, m_b; 3438281494Sandrew vm_paddr_t p_a, p_b; 3439281494Sandrew vm_offset_t a_pg_offset, b_pg_offset; 3440281494Sandrew int cnt; 3441281494Sandrew 3442281494Sandrew while (xfersize > 0) { 3443281494Sandrew a_pg_offset = a_offset & PAGE_MASK; 3444281494Sandrew m_a = ma[a_offset >> PAGE_SHIFT]; 3445281494Sandrew p_a = m_a->phys_addr; 3446281494Sandrew b_pg_offset = b_offset & PAGE_MASK; 3447281494Sandrew m_b = mb[b_offset >> PAGE_SHIFT]; 3448281494Sandrew p_b = m_b->phys_addr; 3449281494Sandrew cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 3450281494Sandrew cnt = min(cnt, PAGE_SIZE - b_pg_offset); 3451281494Sandrew if (__predict_false(!PHYS_IN_DMAP(p_a))) { 3452281494Sandrew panic("!DMAP a %lx", p_a); 3453281494Sandrew } else { 3454281494Sandrew a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 3455281494Sandrew } 3456281494Sandrew if (__predict_false(!PHYS_IN_DMAP(p_b))) { 3457281494Sandrew panic("!DMAP b %lx", p_b); 3458281494Sandrew } else { 3459281494Sandrew b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 3460281494Sandrew } 3461281494Sandrew bcopy(a_cp, b_cp, cnt); 3462281494Sandrew a_offset += cnt; 3463281494Sandrew b_offset += cnt; 3464281494Sandrew xfersize -= cnt; 3465281494Sandrew } 3466281494Sandrew} 3467281494Sandrew 3468286296Sjahvm_offset_t 3469286296Sjahpmap_quick_enter_page(vm_page_t m) 3470286296Sjah{ 3471286296Sjah 3472286296Sjah return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 3473286296Sjah} 3474286296Sjah 3475286296Sjahvoid 3476286296Sjahpmap_quick_remove_page(vm_offset_t addr) 3477286296Sjah{ 3478286296Sjah} 3479286296Sjah 3480281494Sandrew/* 3481281494Sandrew * Returns true if the pmap's pv is one of the first 3482281494Sandrew * 16 pvs linked to from this page. This count may 3483281494Sandrew * be changed upwards or downwards in the future; it 3484281494Sandrew * is only necessary that true be returned for a small 3485281494Sandrew * subset of pmaps for proper page aging. 3486281494Sandrew */ 3487281494Sandrewboolean_t 3488281494Sandrewpmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3489281494Sandrew{ 3490305882Sandrew struct md_page *pvh; 3491281494Sandrew struct rwlock *lock; 3492281494Sandrew pv_entry_t pv; 3493281494Sandrew int loops = 0; 3494281494Sandrew boolean_t rv; 3495281494Sandrew 3496281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3497281494Sandrew ("pmap_page_exists_quick: page %p is not managed", m)); 3498281494Sandrew rv = FALSE; 3499281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3500281494Sandrew rw_rlock(lock); 3501281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3502281494Sandrew if (PV_PMAP(pv) == pmap) { 3503281494Sandrew rv = TRUE; 3504281494Sandrew break; 3505281494Sandrew } 3506281494Sandrew loops++; 3507281494Sandrew if (loops >= 16) 3508281494Sandrew break; 3509281494Sandrew } 3510305882Sandrew if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 3511305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3512305882Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3513305882Sandrew if (PV_PMAP(pv) == pmap) { 3514305882Sandrew rv = TRUE; 3515305882Sandrew break; 3516305882Sandrew } 3517305882Sandrew loops++; 3518305882Sandrew if (loops >= 16) 3519305882Sandrew break; 3520305882Sandrew } 3521305882Sandrew } 3522281494Sandrew rw_runlock(lock); 3523281494Sandrew return (rv); 3524281494Sandrew} 3525281494Sandrew 3526281494Sandrew/* 3527281494Sandrew * pmap_page_wired_mappings: 3528281494Sandrew * 3529281494Sandrew * Return the number of managed mappings to the given physical page 3530281494Sandrew * that are wired. 3531281494Sandrew */ 3532281494Sandrewint 3533281494Sandrewpmap_page_wired_mappings(vm_page_t m) 3534281494Sandrew{ 3535281494Sandrew struct rwlock *lock; 3536305882Sandrew struct md_page *pvh; 3537281494Sandrew pmap_t pmap; 3538297446Sandrew pt_entry_t *pte; 3539281494Sandrew pv_entry_t pv; 3540305882Sandrew int count, lvl, md_gen, pvh_gen; 3541281494Sandrew 3542281494Sandrew if ((m->oflags & VPO_UNMANAGED) != 0) 3543281494Sandrew return (0); 3544281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3545281494Sandrew rw_rlock(lock); 3546281494Sandrewrestart: 3547281494Sandrew count = 0; 3548281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3549281494Sandrew pmap = PV_PMAP(pv); 3550281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3551281494Sandrew md_gen = m->md.pv_gen; 3552281494Sandrew rw_runlock(lock); 3553281494Sandrew PMAP_LOCK(pmap); 3554281494Sandrew rw_rlock(lock); 3555281494Sandrew if (md_gen != m->md.pv_gen) { 3556281494Sandrew PMAP_UNLOCK(pmap); 3557281494Sandrew goto restart; 3558281494Sandrew } 3559281494Sandrew } 3560297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3561297446Sandrew if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) 3562281494Sandrew count++; 3563281494Sandrew PMAP_UNLOCK(pmap); 3564281494Sandrew } 3565305882Sandrew if ((m->flags & PG_FICTITIOUS) == 0) { 3566305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3567305882Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3568305882Sandrew pmap = PV_PMAP(pv); 3569305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 3570305882Sandrew md_gen = m->md.pv_gen; 3571305882Sandrew pvh_gen = pvh->pv_gen; 3572305882Sandrew rw_runlock(lock); 3573305882Sandrew PMAP_LOCK(pmap); 3574305882Sandrew rw_rlock(lock); 3575305882Sandrew if (md_gen != m->md.pv_gen || 3576305882Sandrew pvh_gen != pvh->pv_gen) { 3577305882Sandrew PMAP_UNLOCK(pmap); 3578305882Sandrew goto restart; 3579305882Sandrew } 3580305882Sandrew } 3581305882Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3582305882Sandrew if (pte != NULL && 3583305882Sandrew (pmap_load(pte) & ATTR_SW_WIRED) != 0) 3584305882Sandrew count++; 3585305882Sandrew PMAP_UNLOCK(pmap); 3586305882Sandrew } 3587305882Sandrew } 3588281494Sandrew rw_runlock(lock); 3589281494Sandrew return (count); 3590281494Sandrew} 3591281494Sandrew 3592281494Sandrew/* 3593281494Sandrew * Destroy all managed, non-wired mappings in the given user-space 3594281494Sandrew * pmap. This pmap cannot be active on any processor besides the 3595281494Sandrew * caller. 3596305531Sandrew * 3597281494Sandrew * This function cannot be applied to the kernel pmap. Moreover, it 3598281494Sandrew * is not intended for general use. It is only to be used during 3599281494Sandrew * process termination. Consequently, it can be implemented in ways 3600281494Sandrew * that make it faster than pmap_remove(). First, it can more quickly 3601281494Sandrew * destroy mappings by iterating over the pmap's collection of PV 3602281494Sandrew * entries, rather than searching the page table. Second, it doesn't 3603281494Sandrew * have to test and clear the page table entries atomically, because 3604281494Sandrew * no processor is currently accessing the user address space. In 3605281494Sandrew * particular, a page table entry's dirty bit won't change state once 3606281494Sandrew * this function starts. 3607281494Sandrew */ 3608281494Sandrewvoid 3609281494Sandrewpmap_remove_pages(pmap_t pmap) 3610281494Sandrew{ 3611297446Sandrew pd_entry_t *pde; 3612297446Sandrew pt_entry_t *pte, tpte; 3613281494Sandrew struct spglist free; 3614305882Sandrew vm_page_t m, ml3, mt; 3615281494Sandrew pv_entry_t pv; 3616305882Sandrew struct md_page *pvh; 3617281494Sandrew struct pv_chunk *pc, *npc; 3618281494Sandrew struct rwlock *lock; 3619281494Sandrew int64_t bit; 3620281494Sandrew uint64_t inuse, bitmask; 3621297446Sandrew int allfree, field, freed, idx, lvl; 3622281494Sandrew vm_paddr_t pa; 3623281494Sandrew 3624281494Sandrew lock = NULL; 3625281494Sandrew 3626281494Sandrew SLIST_INIT(&free); 3627281494Sandrew PMAP_LOCK(pmap); 3628281494Sandrew TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 3629281494Sandrew allfree = 1; 3630281494Sandrew freed = 0; 3631281494Sandrew for (field = 0; field < _NPCM; field++) { 3632281494Sandrew inuse = ~pc->pc_map[field] & pc_freemask[field]; 3633281494Sandrew while (inuse != 0) { 3634281494Sandrew bit = ffsl(inuse) - 1; 3635281494Sandrew bitmask = 1UL << bit; 3636281494Sandrew idx = field * 64 + bit; 3637281494Sandrew pv = &pc->pc_pventry[idx]; 3638281494Sandrew inuse &= ~bitmask; 3639281494Sandrew 3640297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 3641297446Sandrew KASSERT(pde != NULL, 3642297446Sandrew ("Attempting to remove an unmapped page")); 3643281494Sandrew 3644305882Sandrew switch(lvl) { 3645305882Sandrew case 1: 3646305882Sandrew pte = pmap_l1_to_l2(pde, pv->pv_va); 3647305882Sandrew tpte = pmap_load(pte); 3648305882Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == 3649305882Sandrew L2_BLOCK, 3650305882Sandrew ("Attempting to remove an invalid " 3651305882Sandrew "block: %lx", tpte)); 3652305882Sandrew tpte = pmap_load(pte); 3653305882Sandrew break; 3654305882Sandrew case 2: 3655305882Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 3656305882Sandrew tpte = pmap_load(pte); 3657305882Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == 3658305882Sandrew L3_PAGE, 3659305882Sandrew ("Attempting to remove an invalid " 3660305882Sandrew "page: %lx", tpte)); 3661305882Sandrew break; 3662305882Sandrew default: 3663305882Sandrew panic( 3664305882Sandrew "Invalid page directory level: %d", 3665305882Sandrew lvl); 3666305882Sandrew } 3667297446Sandrew 3668281494Sandrew/* 3669281494Sandrew * We cannot remove wired pages from a process' mapping at this time 3670281494Sandrew */ 3671297446Sandrew if (tpte & ATTR_SW_WIRED) { 3672281494Sandrew allfree = 0; 3673281494Sandrew continue; 3674281494Sandrew } 3675281494Sandrew 3676297446Sandrew pa = tpte & ~ATTR_MASK; 3677281494Sandrew 3678281494Sandrew m = PHYS_TO_VM_PAGE(pa); 3679281494Sandrew KASSERT(m->phys_addr == pa, 3680281494Sandrew ("vm_page_t %p phys_addr mismatch %016jx %016jx", 3681281494Sandrew m, (uintmax_t)m->phys_addr, 3682297446Sandrew (uintmax_t)tpte)); 3683281494Sandrew 3684281494Sandrew KASSERT((m->flags & PG_FICTITIOUS) != 0 || 3685281494Sandrew m < &vm_page_array[vm_page_array_size], 3686297446Sandrew ("pmap_remove_pages: bad pte %#jx", 3687297446Sandrew (uintmax_t)tpte)); 3688281494Sandrew 3689305882Sandrew if (pmap_is_current(pmap)) { 3690305882Sandrew if (lvl == 2 && 3691305882Sandrew pmap_l3_valid_cacheable(tpte)) { 3692305882Sandrew cpu_dcache_wb_range(pv->pv_va, 3693305882Sandrew L3_SIZE); 3694305882Sandrew } else if (lvl == 1 && 3695305882Sandrew pmap_pte_valid_cacheable(tpte)) { 3696305882Sandrew cpu_dcache_wb_range(pv->pv_va, 3697305882Sandrew L2_SIZE); 3698305882Sandrew } 3699305882Sandrew } 3700297446Sandrew pmap_load_clear(pte); 3701297446Sandrew PTE_SYNC(pte); 3702285212Sandrew pmap_invalidate_page(pmap, pv->pv_va); 3703281494Sandrew 3704281494Sandrew /* 3705281494Sandrew * Update the vm_page_t clean/reference bits. 3706281494Sandrew */ 3707305882Sandrew if ((tpte & ATTR_AP_RW_BIT) == 3708305882Sandrew ATTR_AP(ATTR_AP_RW)) { 3709305882Sandrew switch (lvl) { 3710305882Sandrew case 1: 3711305882Sandrew for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 3712305882Sandrew vm_page_dirty(m); 3713305882Sandrew break; 3714305882Sandrew case 2: 3715305882Sandrew vm_page_dirty(m); 3716305882Sandrew break; 3717305882Sandrew } 3718305882Sandrew } 3719281494Sandrew 3720281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 3721281494Sandrew 3722281494Sandrew /* Mark free */ 3723281494Sandrew pc->pc_map[field] |= bitmask; 3724305882Sandrew switch (lvl) { 3725305882Sandrew case 1: 3726305882Sandrew pmap_resident_count_dec(pmap, 3727305882Sandrew L2_SIZE / PAGE_SIZE); 3728305882Sandrew pvh = pa_to_pvh(tpte & ~ATTR_MASK); 3729305882Sandrew TAILQ_REMOVE(&pvh->pv_list, pv,pv_next); 3730305882Sandrew pvh->pv_gen++; 3731305882Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) { 3732305882Sandrew for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 3733305882Sandrew if ((mt->aflags & PGA_WRITEABLE) != 0 && 3734305882Sandrew TAILQ_EMPTY(&mt->md.pv_list)) 3735305882Sandrew vm_page_aflag_clear(mt, PGA_WRITEABLE); 3736305882Sandrew } 3737318716Smarkj ml3 = pmap_remove_pt_page(pmap, 3738305882Sandrew pv->pv_va); 3739305882Sandrew if (ml3 != NULL) { 3740305882Sandrew pmap_resident_count_dec(pmap,1); 3741305882Sandrew KASSERT(ml3->wire_count == NL3PG, 3742305882Sandrew ("pmap_remove_pages: l3 page wire count error")); 3743305882Sandrew ml3->wire_count = 0; 3744305882Sandrew pmap_add_delayed_free_list(ml3, 3745305882Sandrew &free, FALSE); 3746305882Sandrew atomic_subtract_int( 3747305882Sandrew &vm_cnt.v_wire_count, 1); 3748305882Sandrew } 3749305882Sandrew break; 3750305882Sandrew case 2: 3751305882Sandrew pmap_resident_count_dec(pmap, 1); 3752305882Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, 3753305882Sandrew pv_next); 3754305882Sandrew m->md.pv_gen++; 3755305882Sandrew if ((m->aflags & PGA_WRITEABLE) != 0 && 3756305882Sandrew TAILQ_EMPTY(&m->md.pv_list) && 3757305882Sandrew (m->flags & PG_FICTITIOUS) == 0) { 3758305882Sandrew pvh = pa_to_pvh( 3759305882Sandrew VM_PAGE_TO_PHYS(m)); 3760305882Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) 3761305882Sandrew vm_page_aflag_clear(m, 3762305882Sandrew PGA_WRITEABLE); 3763305882Sandrew } 3764305882Sandrew break; 3765305882Sandrew } 3766297446Sandrew pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde), 3767297446Sandrew &free); 3768281494Sandrew freed++; 3769281494Sandrew } 3770281494Sandrew } 3771281494Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 3772281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 3773281494Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 3774281494Sandrew if (allfree) { 3775281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3776281494Sandrew free_pv_chunk(pc); 3777281494Sandrew } 3778281494Sandrew } 3779281494Sandrew pmap_invalidate_all(pmap); 3780281494Sandrew if (lock != NULL) 3781281494Sandrew rw_wunlock(lock); 3782281494Sandrew PMAP_UNLOCK(pmap); 3783281494Sandrew pmap_free_zero_pages(&free); 3784281494Sandrew} 3785281494Sandrew 3786281494Sandrew/* 3787281494Sandrew * This is used to check if a page has been accessed or modified. As we 3788281494Sandrew * don't have a bit to see if it has been modified we have to assume it 3789281494Sandrew * has been if the page is read/write. 3790281494Sandrew */ 3791281494Sandrewstatic boolean_t 3792281494Sandrewpmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 3793281494Sandrew{ 3794281494Sandrew struct rwlock *lock; 3795281494Sandrew pv_entry_t pv; 3796305882Sandrew struct md_page *pvh; 3797297446Sandrew pt_entry_t *pte, mask, value; 3798281494Sandrew pmap_t pmap; 3799305882Sandrew int lvl, md_gen, pvh_gen; 3800281494Sandrew boolean_t rv; 3801281494Sandrew 3802281494Sandrew rv = FALSE; 3803281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3804281494Sandrew rw_rlock(lock); 3805281494Sandrewrestart: 3806281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3807281494Sandrew pmap = PV_PMAP(pv); 3808281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3809281494Sandrew md_gen = m->md.pv_gen; 3810281494Sandrew rw_runlock(lock); 3811281494Sandrew PMAP_LOCK(pmap); 3812281494Sandrew rw_rlock(lock); 3813281494Sandrew if (md_gen != m->md.pv_gen) { 3814281494Sandrew PMAP_UNLOCK(pmap); 3815281494Sandrew goto restart; 3816281494Sandrew } 3817281494Sandrew } 3818297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3819297446Sandrew KASSERT(lvl == 3, 3820297446Sandrew ("pmap_page_test_mappings: Invalid level %d", lvl)); 3821281494Sandrew mask = 0; 3822281494Sandrew value = 0; 3823281494Sandrew if (modified) { 3824281494Sandrew mask |= ATTR_AP_RW_BIT; 3825281494Sandrew value |= ATTR_AP(ATTR_AP_RW); 3826281494Sandrew } 3827281494Sandrew if (accessed) { 3828281494Sandrew mask |= ATTR_AF | ATTR_DESCR_MASK; 3829281494Sandrew value |= ATTR_AF | L3_PAGE; 3830281494Sandrew } 3831297446Sandrew rv = (pmap_load(pte) & mask) == value; 3832281494Sandrew PMAP_UNLOCK(pmap); 3833281494Sandrew if (rv) 3834281494Sandrew goto out; 3835281494Sandrew } 3836305882Sandrew if ((m->flags & PG_FICTITIOUS) == 0) { 3837305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3838305882Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3839305882Sandrew pmap = PV_PMAP(pv); 3840305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 3841305882Sandrew md_gen = m->md.pv_gen; 3842305882Sandrew pvh_gen = pvh->pv_gen; 3843305882Sandrew rw_runlock(lock); 3844305882Sandrew PMAP_LOCK(pmap); 3845305882Sandrew rw_rlock(lock); 3846305882Sandrew if (md_gen != m->md.pv_gen || 3847305882Sandrew pvh_gen != pvh->pv_gen) { 3848305882Sandrew PMAP_UNLOCK(pmap); 3849305882Sandrew goto restart; 3850305882Sandrew } 3851305882Sandrew } 3852305882Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3853305882Sandrew KASSERT(lvl == 2, 3854305882Sandrew ("pmap_page_test_mappings: Invalid level %d", lvl)); 3855305882Sandrew mask = 0; 3856305882Sandrew value = 0; 3857305882Sandrew if (modified) { 3858305882Sandrew mask |= ATTR_AP_RW_BIT; 3859305882Sandrew value |= ATTR_AP(ATTR_AP_RW); 3860305882Sandrew } 3861305882Sandrew if (accessed) { 3862305882Sandrew mask |= ATTR_AF | ATTR_DESCR_MASK; 3863305882Sandrew value |= ATTR_AF | L2_BLOCK; 3864305882Sandrew } 3865305882Sandrew rv = (pmap_load(pte) & mask) == value; 3866305882Sandrew PMAP_UNLOCK(pmap); 3867305882Sandrew if (rv) 3868305882Sandrew goto out; 3869305882Sandrew } 3870305882Sandrew } 3871281494Sandrewout: 3872281494Sandrew rw_runlock(lock); 3873281494Sandrew return (rv); 3874281494Sandrew} 3875281494Sandrew 3876281494Sandrew/* 3877281494Sandrew * pmap_is_modified: 3878281494Sandrew * 3879281494Sandrew * Return whether or not the specified physical page was modified 3880281494Sandrew * in any physical maps. 3881281494Sandrew */ 3882281494Sandrewboolean_t 3883281494Sandrewpmap_is_modified(vm_page_t m) 3884281494Sandrew{ 3885281494Sandrew 3886281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3887281494Sandrew ("pmap_is_modified: page %p is not managed", m)); 3888281494Sandrew 3889281494Sandrew /* 3890281494Sandrew * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3891281494Sandrew * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3892281494Sandrew * is clear, no PTEs can have PG_M set. 3893281494Sandrew */ 3894281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 3895281494Sandrew if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3896281494Sandrew return (FALSE); 3897281494Sandrew return (pmap_page_test_mappings(m, FALSE, TRUE)); 3898281494Sandrew} 3899281494Sandrew 3900281494Sandrew/* 3901281494Sandrew * pmap_is_prefaultable: 3902281494Sandrew * 3903281494Sandrew * Return whether or not the specified virtual address is eligible 3904281494Sandrew * for prefault. 3905281494Sandrew */ 3906281494Sandrewboolean_t 3907281494Sandrewpmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3908281494Sandrew{ 3909297446Sandrew pt_entry_t *pte; 3910281494Sandrew boolean_t rv; 3911297446Sandrew int lvl; 3912281494Sandrew 3913281494Sandrew rv = FALSE; 3914281494Sandrew PMAP_LOCK(pmap); 3915297446Sandrew pte = pmap_pte(pmap, addr, &lvl); 3916297446Sandrew if (pte != NULL && pmap_load(pte) != 0) { 3917281494Sandrew rv = TRUE; 3918281494Sandrew } 3919281494Sandrew PMAP_UNLOCK(pmap); 3920281494Sandrew return (rv); 3921281494Sandrew} 3922281494Sandrew 3923281494Sandrew/* 3924281494Sandrew * pmap_is_referenced: 3925281494Sandrew * 3926281494Sandrew * Return whether or not the specified physical page was referenced 3927281494Sandrew * in any physical maps. 3928281494Sandrew */ 3929281494Sandrewboolean_t 3930281494Sandrewpmap_is_referenced(vm_page_t m) 3931281494Sandrew{ 3932281494Sandrew 3933281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3934281494Sandrew ("pmap_is_referenced: page %p is not managed", m)); 3935281494Sandrew return (pmap_page_test_mappings(m, TRUE, FALSE)); 3936281494Sandrew} 3937281494Sandrew 3938281494Sandrew/* 3939281494Sandrew * Clear the write and modified bits in each of the given page's mappings. 3940281494Sandrew */ 3941281494Sandrewvoid 3942281494Sandrewpmap_remove_write(vm_page_t m) 3943281494Sandrew{ 3944305882Sandrew struct md_page *pvh; 3945281494Sandrew pmap_t pmap; 3946281494Sandrew struct rwlock *lock; 3947305882Sandrew pv_entry_t next_pv, pv; 3948297446Sandrew pt_entry_t oldpte, *pte; 3949305882Sandrew vm_offset_t va; 3950305882Sandrew int lvl, md_gen, pvh_gen; 3951281494Sandrew 3952281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3953281494Sandrew ("pmap_remove_write: page %p is not managed", m)); 3954281494Sandrew 3955281494Sandrew /* 3956281494Sandrew * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3957281494Sandrew * set by another thread while the object is locked. Thus, 3958281494Sandrew * if PGA_WRITEABLE is clear, no page table entries need updating. 3959281494Sandrew */ 3960281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 3961281494Sandrew if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3962281494Sandrew return; 3963281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3964305882Sandrew pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : 3965305882Sandrew pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3966281494Sandrewretry_pv_loop: 3967281494Sandrew rw_wlock(lock); 3968305882Sandrew TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { 3969305882Sandrew pmap = PV_PMAP(pv); 3970305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 3971305882Sandrew pvh_gen = pvh->pv_gen; 3972305882Sandrew rw_wunlock(lock); 3973305882Sandrew PMAP_LOCK(pmap); 3974305882Sandrew rw_wlock(lock); 3975305882Sandrew if (pvh_gen != pvh->pv_gen) { 3976305882Sandrew PMAP_UNLOCK(pmap); 3977305882Sandrew rw_wunlock(lock); 3978305882Sandrew goto retry_pv_loop; 3979305882Sandrew } 3980305882Sandrew } 3981305882Sandrew va = pv->pv_va; 3982305882Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3983305882Sandrew if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 3984305882Sandrew pmap_demote_l2_locked(pmap, pte, va & ~L2_OFFSET, 3985305882Sandrew &lock); 3986305882Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 3987305882Sandrew ("inconsistent pv lock %p %p for page %p", 3988305882Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 3989305882Sandrew PMAP_UNLOCK(pmap); 3990305882Sandrew } 3991281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3992281494Sandrew pmap = PV_PMAP(pv); 3993281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3994305882Sandrew pvh_gen = pvh->pv_gen; 3995281494Sandrew md_gen = m->md.pv_gen; 3996281494Sandrew rw_wunlock(lock); 3997281494Sandrew PMAP_LOCK(pmap); 3998281494Sandrew rw_wlock(lock); 3999305882Sandrew if (pvh_gen != pvh->pv_gen || 4000305882Sandrew md_gen != m->md.pv_gen) { 4001281494Sandrew PMAP_UNLOCK(pmap); 4002281494Sandrew rw_wunlock(lock); 4003281494Sandrew goto retry_pv_loop; 4004281494Sandrew } 4005281494Sandrew } 4006297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 4007281494Sandrewretry: 4008297446Sandrew oldpte = pmap_load(pte); 4009297446Sandrew if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 4010297446Sandrew if (!atomic_cmpset_long(pte, oldpte, 4011297446Sandrew oldpte | ATTR_AP(ATTR_AP_RO))) 4012281494Sandrew goto retry; 4013297446Sandrew if ((oldpte & ATTR_AF) != 0) 4014281494Sandrew vm_page_dirty(m); 4015281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 4016281494Sandrew } 4017281494Sandrew PMAP_UNLOCK(pmap); 4018281494Sandrew } 4019281494Sandrew rw_wunlock(lock); 4020281494Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 4021281494Sandrew} 4022281494Sandrew 4023281494Sandrewstatic __inline boolean_t 4024281494Sandrewsafe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 4025281494Sandrew{ 4026281494Sandrew 4027281494Sandrew return (FALSE); 4028281494Sandrew} 4029281494Sandrew 4030281494Sandrew/* 4031281494Sandrew * pmap_ts_referenced: 4032281494Sandrew * 4033281494Sandrew * Return a count of reference bits for a page, clearing those bits. 4034281494Sandrew * It is not necessary for every reference bit to be cleared, but it 4035281494Sandrew * is necessary that 0 only be returned when there are truly no 4036281494Sandrew * reference bits set. 4037281494Sandrew * 4038324400Salc * As an optimization, update the page's dirty field if a modified bit is 4039324400Salc * found while counting reference bits. This opportunistic update can be 4040324400Salc * performed at low cost and can eliminate the need for some future calls 4041324400Salc * to pmap_is_modified(). However, since this function stops after 4042324400Salc * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 4043324400Salc * dirty pages. Those dirty pages will only be detected by a future call 4044324400Salc * to pmap_is_modified(). 4045281494Sandrew */ 4046281494Sandrewint 4047281494Sandrewpmap_ts_referenced(vm_page_t m) 4048281494Sandrew{ 4049305882Sandrew struct md_page *pvh; 4050281494Sandrew pv_entry_t pv, pvf; 4051281494Sandrew pmap_t pmap; 4052281494Sandrew struct rwlock *lock; 4053297446Sandrew pd_entry_t *pde, tpde; 4054297446Sandrew pt_entry_t *pte, tpte; 4055305882Sandrew pt_entry_t *l3; 4056305882Sandrew vm_offset_t va; 4057281494Sandrew vm_paddr_t pa; 4058305882Sandrew int cleared, md_gen, not_cleared, lvl, pvh_gen; 4059281494Sandrew struct spglist free; 4060305882Sandrew bool demoted; 4061281494Sandrew 4062281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4063281494Sandrew ("pmap_ts_referenced: page %p is not managed", m)); 4064281494Sandrew SLIST_INIT(&free); 4065281494Sandrew cleared = 0; 4066281494Sandrew pa = VM_PAGE_TO_PHYS(m); 4067281494Sandrew lock = PHYS_TO_PV_LIST_LOCK(pa); 4068305882Sandrew pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); 4069281494Sandrew rw_wlock(lock); 4070281494Sandrewretry: 4071281494Sandrew not_cleared = 0; 4072305882Sandrew if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) 4073305882Sandrew goto small_mappings; 4074305882Sandrew pv = pvf; 4075305882Sandrew do { 4076305882Sandrew if (pvf == NULL) 4077305882Sandrew pvf = pv; 4078305882Sandrew pmap = PV_PMAP(pv); 4079305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 4080305882Sandrew pvh_gen = pvh->pv_gen; 4081305882Sandrew rw_wunlock(lock); 4082305882Sandrew PMAP_LOCK(pmap); 4083305882Sandrew rw_wlock(lock); 4084305882Sandrew if (pvh_gen != pvh->pv_gen) { 4085305882Sandrew PMAP_UNLOCK(pmap); 4086305882Sandrew goto retry; 4087305882Sandrew } 4088305882Sandrew } 4089305882Sandrew va = pv->pv_va; 4090305882Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 4091305882Sandrew KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found")); 4092305882Sandrew KASSERT(lvl == 1, 4093305882Sandrew ("pmap_ts_referenced: invalid pde level %d", lvl)); 4094305882Sandrew tpde = pmap_load(pde); 4095305882Sandrew KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE, 4096305882Sandrew ("pmap_ts_referenced: found an invalid l1 table")); 4097305882Sandrew pte = pmap_l1_to_l2(pde, pv->pv_va); 4098305882Sandrew tpte = pmap_load(pte); 4099324400Salc if (pmap_page_dirty(tpte)) { 4100324400Salc /* 4101324400Salc * Although "tpte" is mapping a 2MB page, because 4102324400Salc * this function is called at a 4KB page granularity, 4103324400Salc * we only update the 4KB page under test. 4104324400Salc */ 4105324400Salc vm_page_dirty(m); 4106324400Salc } 4107305882Sandrew if ((tpte & ATTR_AF) != 0) { 4108305882Sandrew /* 4109305882Sandrew * Since this reference bit is shared by 512 4KB 4110305882Sandrew * pages, it should not be cleared every time it is 4111305882Sandrew * tested. Apply a simple "hash" function on the 4112305882Sandrew * physical page number, the virtual superpage number, 4113305882Sandrew * and the pmap address to select one 4KB page out of 4114305882Sandrew * the 512 on which testing the reference bit will 4115305882Sandrew * result in clearing that reference bit. This 4116305882Sandrew * function is designed to avoid the selection of the 4117305882Sandrew * same 4KB page for every 2MB page mapping. 4118305882Sandrew * 4119305882Sandrew * On demotion, a mapping that hasn't been referenced 4120305882Sandrew * is simply destroyed. To avoid the possibility of a 4121305882Sandrew * subsequent page fault on a demoted wired mapping, 4122305882Sandrew * always leave its reference bit set. Moreover, 4123305882Sandrew * since the superpage is wired, the current state of 4124305882Sandrew * its reference bit won't affect page replacement. 4125305882Sandrew */ 4126305882Sandrew if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ 4127305882Sandrew (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && 4128305882Sandrew (tpte & ATTR_SW_WIRED) == 0) { 4129305882Sandrew if (safe_to_clear_referenced(pmap, tpte)) { 4130305882Sandrew /* 4131305882Sandrew * TODO: We don't handle the access 4132305882Sandrew * flag at all. We need to be able 4133305882Sandrew * to set it in the exception handler. 4134305882Sandrew */ 4135305882Sandrew panic("ARM64TODO: " 4136305882Sandrew "safe_to_clear_referenced\n"); 4137305882Sandrew } else if (pmap_demote_l2_locked(pmap, pte, 4138305882Sandrew pv->pv_va, &lock) != NULL) { 4139305882Sandrew demoted = true; 4140305882Sandrew va += VM_PAGE_TO_PHYS(m) - 4141305882Sandrew (tpte & ~ATTR_MASK); 4142305882Sandrew l3 = pmap_l2_to_l3(pte, va); 4143305882Sandrew pmap_remove_l3(pmap, l3, va, 4144305882Sandrew pmap_load(pte), NULL, &lock); 4145305882Sandrew } else 4146305882Sandrew demoted = true; 4147305882Sandrew 4148305882Sandrew if (demoted) { 4149305882Sandrew /* 4150305882Sandrew * The superpage mapping was removed 4151305882Sandrew * entirely and therefore 'pv' is no 4152305882Sandrew * longer valid. 4153305882Sandrew */ 4154305882Sandrew if (pvf == pv) 4155305882Sandrew pvf = NULL; 4156305882Sandrew pv = NULL; 4157305882Sandrew } 4158305882Sandrew cleared++; 4159305882Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4160305882Sandrew ("inconsistent pv lock %p %p for page %p", 4161305882Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4162305882Sandrew } else 4163305882Sandrew not_cleared++; 4164305882Sandrew } 4165305882Sandrew PMAP_UNLOCK(pmap); 4166305882Sandrew /* Rotate the PV list if it has more than one entry. */ 4167305882Sandrew if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 4168305882Sandrew TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 4169305882Sandrew TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 4170305882Sandrew pvh->pv_gen++; 4171305882Sandrew } 4172305882Sandrew if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) 4173305882Sandrew goto out; 4174305882Sandrew } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); 4175305882Sandrewsmall_mappings: 4176281494Sandrew if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 4177281494Sandrew goto out; 4178281494Sandrew pv = pvf; 4179281494Sandrew do { 4180281494Sandrew if (pvf == NULL) 4181281494Sandrew pvf = pv; 4182281494Sandrew pmap = PV_PMAP(pv); 4183281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 4184305882Sandrew pvh_gen = pvh->pv_gen; 4185281494Sandrew md_gen = m->md.pv_gen; 4186281494Sandrew rw_wunlock(lock); 4187281494Sandrew PMAP_LOCK(pmap); 4188281494Sandrew rw_wlock(lock); 4189305882Sandrew if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { 4190281494Sandrew PMAP_UNLOCK(pmap); 4191281494Sandrew goto retry; 4192281494Sandrew } 4193281494Sandrew } 4194297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 4195297446Sandrew KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); 4196297446Sandrew KASSERT(lvl == 2, 4197297446Sandrew ("pmap_ts_referenced: invalid pde level %d", lvl)); 4198297446Sandrew tpde = pmap_load(pde); 4199297446Sandrew KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, 4200281494Sandrew ("pmap_ts_referenced: found an invalid l2 table")); 4201297446Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 4202297446Sandrew tpte = pmap_load(pte); 4203324400Salc if (pmap_page_dirty(tpte)) 4204324400Salc vm_page_dirty(m); 4205297446Sandrew if ((tpte & ATTR_AF) != 0) { 4206297446Sandrew if (safe_to_clear_referenced(pmap, tpte)) { 4207281494Sandrew /* 4208281494Sandrew * TODO: We don't handle the access flag 4209281494Sandrew * at all. We need to be able to set it in 4210281494Sandrew * the exception handler. 4211281494Sandrew */ 4212286073Semaste panic("ARM64TODO: safe_to_clear_referenced\n"); 4213297446Sandrew } else if ((tpte & ATTR_SW_WIRED) == 0) { 4214281494Sandrew /* 4215281494Sandrew * Wired pages cannot be paged out so 4216281494Sandrew * doing accessed bit emulation for 4217281494Sandrew * them is wasted effort. We do the 4218281494Sandrew * hard work for unwired pages only. 4219281494Sandrew */ 4220297446Sandrew pmap_remove_l3(pmap, pte, pv->pv_va, tpde, 4221288445Sandrew &free, &lock); 4222281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 4223281494Sandrew cleared++; 4224281494Sandrew if (pvf == pv) 4225281494Sandrew pvf = NULL; 4226281494Sandrew pv = NULL; 4227281494Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4228281494Sandrew ("inconsistent pv lock %p %p for page %p", 4229281494Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4230281494Sandrew } else 4231281494Sandrew not_cleared++; 4232281494Sandrew } 4233281494Sandrew PMAP_UNLOCK(pmap); 4234281494Sandrew /* Rotate the PV list if it has more than one entry. */ 4235281494Sandrew if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 4236281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 4237281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 4238281494Sandrew m->md.pv_gen++; 4239281494Sandrew } 4240281494Sandrew } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 4241281494Sandrew not_cleared < PMAP_TS_REFERENCED_MAX); 4242281494Sandrewout: 4243281494Sandrew rw_wunlock(lock); 4244281494Sandrew pmap_free_zero_pages(&free); 4245281494Sandrew return (cleared + not_cleared); 4246281494Sandrew} 4247281494Sandrew 4248281494Sandrew/* 4249281494Sandrew * Apply the given advice to the specified range of addresses within the 4250281494Sandrew * given pmap. Depending on the advice, clear the referenced and/or 4251281494Sandrew * modified flags in each mapping and set the mapped page's dirty field. 4252281494Sandrew */ 4253281494Sandrewvoid 4254281494Sandrewpmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 4255281494Sandrew{ 4256281494Sandrew} 4257281494Sandrew 4258281494Sandrew/* 4259281494Sandrew * Clear the modify bits on the specified physical page. 4260281494Sandrew */ 4261281494Sandrewvoid 4262281494Sandrewpmap_clear_modify(vm_page_t m) 4263281494Sandrew{ 4264281494Sandrew 4265281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4266281494Sandrew ("pmap_clear_modify: page %p is not managed", m)); 4267281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 4268281494Sandrew KASSERT(!vm_page_xbusied(m), 4269281494Sandrew ("pmap_clear_modify: page %p is exclusive busied", m)); 4270281494Sandrew 4271281494Sandrew /* 4272281494Sandrew * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 4273281494Sandrew * If the object containing the page is locked and the page is not 4274281494Sandrew * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 4275281494Sandrew */ 4276281494Sandrew if ((m->aflags & PGA_WRITEABLE) == 0) 4277281494Sandrew return; 4278281846Sandrew 4279286073Semaste /* ARM64TODO: We lack support for tracking if a page is modified */ 4280281494Sandrew} 4281281494Sandrew 4282282221Sandrewvoid * 4283282221Sandrewpmap_mapbios(vm_paddr_t pa, vm_size_t size) 4284282221Sandrew{ 4285282221Sandrew 4286282221Sandrew return ((void *)PHYS_TO_DMAP(pa)); 4287282221Sandrew} 4288282221Sandrew 4289282221Sandrewvoid 4290282221Sandrewpmap_unmapbios(vm_paddr_t pa, vm_size_t size) 4291282221Sandrew{ 4292282221Sandrew} 4293282221Sandrew 4294281494Sandrew/* 4295281494Sandrew * Sets the memory attribute for the specified page. 4296281494Sandrew */ 4297281494Sandrewvoid 4298281494Sandrewpmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4299281494Sandrew{ 4300281494Sandrew 4301286080Sandrew m->md.pv_memattr = ma; 4302286080Sandrew 4303286080Sandrew /* 4304286080Sandrew * If "m" is a normal page, update its direct mapping. This update 4305286080Sandrew * can be relied upon to perform any cache operations that are 4306286080Sandrew * required for data coherence. 4307286080Sandrew */ 4308286080Sandrew if ((m->flags & PG_FICTITIOUS) == 0 && 4309305882Sandrew pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, 4310305882Sandrew m->md.pv_memattr) != 0) 4311305882Sandrew panic("memory attribute change on the direct map failed"); 4312281494Sandrew} 4313281494Sandrew 4314281494Sandrew/* 4315305882Sandrew * Changes the specified virtual address range's memory type to that given by 4316305882Sandrew * the parameter "mode". The specified virtual address range must be 4317305882Sandrew * completely contained within either the direct map or the kernel map. If 4318305882Sandrew * the virtual address range is contained within the kernel map, then the 4319305882Sandrew * memory type for each of the corresponding ranges of the direct map is also 4320305882Sandrew * changed. (The corresponding ranges of the direct map are those ranges that 4321305882Sandrew * map the same physical pages as the specified virtual address range.) These 4322305882Sandrew * changes to the direct map are necessary because Intel describes the 4323305882Sandrew * behavior of their processors as "undefined" if two or more mappings to the 4324305882Sandrew * same physical page have different memory types. 4325305882Sandrew * 4326305882Sandrew * Returns zero if the change completed successfully, and either EINVAL or 4327305882Sandrew * ENOMEM if the change failed. Specifically, EINVAL is returned if some part 4328305882Sandrew * of the virtual address range was not mapped, and ENOMEM is returned if 4329305882Sandrew * there was insufficient memory available to complete the change. In the 4330305882Sandrew * latter case, the memory type may have been changed on some part of the 4331305882Sandrew * virtual address range or the direct map. 4332305882Sandrew */ 4333305882Sandrewstatic int 4334305882Sandrewpmap_change_attr(vm_offset_t va, vm_size_t size, int mode) 4335305882Sandrew{ 4336305882Sandrew int error; 4337305882Sandrew 4338305882Sandrew PMAP_LOCK(kernel_pmap); 4339305882Sandrew error = pmap_change_attr_locked(va, size, mode); 4340305882Sandrew PMAP_UNLOCK(kernel_pmap); 4341305882Sandrew return (error); 4342305882Sandrew} 4343305882Sandrew 4344305882Sandrewstatic int 4345305882Sandrewpmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) 4346305882Sandrew{ 4347305882Sandrew vm_offset_t base, offset, tmpva; 4348305882Sandrew pt_entry_t l3, *pte, *newpte; 4349305882Sandrew int lvl; 4350305882Sandrew 4351305882Sandrew PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); 4352305882Sandrew base = trunc_page(va); 4353305882Sandrew offset = va & PAGE_MASK; 4354305882Sandrew size = round_page(offset + size); 4355305882Sandrew 4356305882Sandrew if (!VIRT_IN_DMAP(base)) 4357305882Sandrew return (EINVAL); 4358305882Sandrew 4359305882Sandrew for (tmpva = base; tmpva < base + size; ) { 4360305882Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 4361305882Sandrew if (pte == NULL) 4362305882Sandrew return (EINVAL); 4363305882Sandrew 4364305882Sandrew if ((pmap_load(pte) & ATTR_IDX_MASK) == ATTR_IDX(mode)) { 4365305882Sandrew /* 4366305882Sandrew * We already have the correct attribute, 4367305882Sandrew * ignore this entry. 4368305882Sandrew */ 4369305882Sandrew switch (lvl) { 4370305882Sandrew default: 4371305882Sandrew panic("Invalid DMAP table level: %d\n", lvl); 4372305882Sandrew case 1: 4373305882Sandrew tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE; 4374305882Sandrew break; 4375305882Sandrew case 2: 4376305882Sandrew tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE; 4377305882Sandrew break; 4378305882Sandrew case 3: 4379305882Sandrew tmpva += PAGE_SIZE; 4380305882Sandrew break; 4381305882Sandrew } 4382305882Sandrew } else { 4383305882Sandrew /* 4384305882Sandrew * Split the entry to an level 3 table, then 4385305882Sandrew * set the new attribute. 4386305882Sandrew */ 4387305882Sandrew switch (lvl) { 4388305882Sandrew default: 4389305882Sandrew panic("Invalid DMAP table level: %d\n", lvl); 4390305882Sandrew case 1: 4391305882Sandrew newpte = pmap_demote_l1(kernel_pmap, pte, 4392305882Sandrew tmpva & ~L1_OFFSET); 4393305882Sandrew if (newpte == NULL) 4394305882Sandrew return (EINVAL); 4395305882Sandrew pte = pmap_l1_to_l2(pte, tmpva); 4396305882Sandrew case 2: 4397305882Sandrew newpte = pmap_demote_l2(kernel_pmap, pte, 4398305882Sandrew tmpva & ~L2_OFFSET); 4399305882Sandrew if (newpte == NULL) 4400305882Sandrew return (EINVAL); 4401305882Sandrew pte = pmap_l2_to_l3(pte, tmpva); 4402305882Sandrew case 3: 4403305882Sandrew /* Update the entry */ 4404305882Sandrew l3 = pmap_load(pte); 4405305882Sandrew l3 &= ~ATTR_IDX_MASK; 4406305882Sandrew l3 |= ATTR_IDX(mode); 4407319203Sandrew if (mode == DEVICE_MEMORY) 4408319203Sandrew l3 |= ATTR_XN; 4409305882Sandrew 4410305882Sandrew pmap_update_entry(kernel_pmap, pte, l3, tmpva, 4411305882Sandrew PAGE_SIZE); 4412305882Sandrew 4413305882Sandrew /* 4414305882Sandrew * If moving to a non-cacheable entry flush 4415305882Sandrew * the cache. 4416305882Sandrew */ 4417305882Sandrew if (mode == VM_MEMATTR_UNCACHEABLE) 4418305882Sandrew cpu_dcache_wbinv_range(tmpva, L3_SIZE); 4419305882Sandrew 4420305882Sandrew break; 4421305882Sandrew } 4422305882Sandrew tmpva += PAGE_SIZE; 4423305882Sandrew } 4424305882Sandrew } 4425305882Sandrew 4426305882Sandrew return (0); 4427305882Sandrew} 4428305882Sandrew 4429305882Sandrew/* 4430305882Sandrew * Create an L2 table to map all addresses within an L1 mapping. 4431305882Sandrew */ 4432305882Sandrewstatic pt_entry_t * 4433305882Sandrewpmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) 4434305882Sandrew{ 4435305882Sandrew pt_entry_t *l2, newl2, oldl1; 4436305882Sandrew vm_offset_t tmpl1; 4437305882Sandrew vm_paddr_t l2phys, phys; 4438305882Sandrew vm_page_t ml2; 4439305882Sandrew int i; 4440305882Sandrew 4441305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4442305882Sandrew oldl1 = pmap_load(l1); 4443305882Sandrew KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK, 4444305882Sandrew ("pmap_demote_l1: Demoting a non-block entry")); 4445305882Sandrew KASSERT((va & L1_OFFSET) == 0, 4446305882Sandrew ("pmap_demote_l1: Invalid virtual address %#lx", va)); 4447305882Sandrew KASSERT((oldl1 & ATTR_SW_MANAGED) == 0, 4448305882Sandrew ("pmap_demote_l1: Level 1 table shouldn't be managed")); 4449305882Sandrew 4450305882Sandrew tmpl1 = 0; 4451305882Sandrew if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) { 4452305882Sandrew tmpl1 = kva_alloc(PAGE_SIZE); 4453305882Sandrew if (tmpl1 == 0) 4454305882Sandrew return (NULL); 4455305882Sandrew } 4456305882Sandrew 4457305882Sandrew if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | 4458305882Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 4459305882Sandrew CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx" 4460305882Sandrew " in pmap %p", va, pmap); 4461305882Sandrew return (NULL); 4462305882Sandrew } 4463305882Sandrew 4464305882Sandrew l2phys = VM_PAGE_TO_PHYS(ml2); 4465305882Sandrew l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys); 4466305882Sandrew 4467305882Sandrew /* Address the range points at */ 4468305882Sandrew phys = oldl1 & ~ATTR_MASK; 4469305882Sandrew /* The attributed from the old l1 table to be copied */ 4470305882Sandrew newl2 = oldl1 & ATTR_MASK; 4471305882Sandrew 4472305882Sandrew /* Create the new entries */ 4473305882Sandrew for (i = 0; i < Ln_ENTRIES; i++) { 4474305882Sandrew l2[i] = newl2 | phys; 4475305882Sandrew phys += L2_SIZE; 4476305882Sandrew } 4477305882Sandrew cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 4478305882Sandrew KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK), 4479305882Sandrew ("Invalid l2 page (%lx != %lx)", l2[0], 4480305882Sandrew (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK)); 4481305882Sandrew 4482305882Sandrew if (tmpl1 != 0) { 4483305882Sandrew pmap_kenter(tmpl1, PAGE_SIZE, 4484305882Sandrew DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, CACHED_MEMORY); 4485305882Sandrew l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK)); 4486305882Sandrew } 4487305882Sandrew 4488305882Sandrew pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE); 4489305882Sandrew 4490305882Sandrew if (tmpl1 != 0) { 4491305882Sandrew pmap_kremove(tmpl1); 4492305882Sandrew kva_free(tmpl1, PAGE_SIZE); 4493305882Sandrew } 4494305882Sandrew 4495305882Sandrew return (l2); 4496305882Sandrew} 4497305882Sandrew 4498305882Sandrew/* 4499305882Sandrew * Create an L3 table to map all addresses within an L2 mapping. 4500305882Sandrew */ 4501305882Sandrewstatic pt_entry_t * 4502305882Sandrewpmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, 4503305882Sandrew struct rwlock **lockp) 4504305882Sandrew{ 4505305882Sandrew pt_entry_t *l3, newl3, oldl2; 4506305882Sandrew vm_offset_t tmpl2; 4507305882Sandrew vm_paddr_t l3phys, phys; 4508305882Sandrew vm_page_t ml3; 4509305882Sandrew int i; 4510305882Sandrew 4511305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4512305882Sandrew l3 = NULL; 4513305882Sandrew oldl2 = pmap_load(l2); 4514305882Sandrew KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, 4515305882Sandrew ("pmap_demote_l2: Demoting a non-block entry")); 4516305882Sandrew KASSERT((va & L2_OFFSET) == 0, 4517305882Sandrew ("pmap_demote_l2: Invalid virtual address %#lx", va)); 4518305882Sandrew 4519305882Sandrew tmpl2 = 0; 4520305882Sandrew if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) { 4521305882Sandrew tmpl2 = kva_alloc(PAGE_SIZE); 4522305882Sandrew if (tmpl2 == 0) 4523305882Sandrew return (NULL); 4524305882Sandrew } 4525305882Sandrew 4526318716Smarkj if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) { 4527305882Sandrew ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va), 4528305882Sandrew (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | 4529305882Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); 4530305882Sandrew if (ml3 == NULL) { 4531305882Sandrew CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" 4532305882Sandrew " in pmap %p", va, pmap); 4533305882Sandrew goto fail; 4534305882Sandrew } 4535305882Sandrew if (va < VM_MAXUSER_ADDRESS) 4536305882Sandrew pmap_resident_count_inc(pmap, 1); 4537305882Sandrew } 4538305882Sandrew 4539305882Sandrew l3phys = VM_PAGE_TO_PHYS(ml3); 4540305882Sandrew l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys); 4541305882Sandrew 4542305882Sandrew /* Address the range points at */ 4543305882Sandrew phys = oldl2 & ~ATTR_MASK; 4544305882Sandrew /* The attributed from the old l2 table to be copied */ 4545305882Sandrew newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; 4546305882Sandrew 4547305882Sandrew /* 4548305882Sandrew * If the page table page is new, initialize it. 4549305882Sandrew */ 4550305882Sandrew if (ml3->wire_count == 1) { 4551305882Sandrew for (i = 0; i < Ln_ENTRIES; i++) { 4552305882Sandrew l3[i] = newl3 | phys; 4553305882Sandrew phys += L3_SIZE; 4554305882Sandrew } 4555305882Sandrew cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE); 4556305882Sandrew } 4557305882Sandrew KASSERT(l3[0] == ((oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE), 4558305882Sandrew ("Invalid l3 page (%lx != %lx)", l3[0], 4559305882Sandrew (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE)); 4560305882Sandrew 4561305882Sandrew /* 4562305882Sandrew * Map the temporary page so we don't lose access to the l2 table. 4563305882Sandrew */ 4564305882Sandrew if (tmpl2 != 0) { 4565305882Sandrew pmap_kenter(tmpl2, PAGE_SIZE, 4566305882Sandrew DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY); 4567305882Sandrew l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK)); 4568305882Sandrew } 4569305882Sandrew 4570305882Sandrew /* 4571305882Sandrew * The spare PV entries must be reserved prior to demoting the 4572305882Sandrew * mapping, that is, prior to changing the PDE. Otherwise, the state 4573305882Sandrew * of the L2 and the PV lists will be inconsistent, which can result 4574305882Sandrew * in reclaim_pv_chunk() attempting to remove a PV entry from the 4575305882Sandrew * wrong PV list and pmap_pv_demote_l2() failing to find the expected 4576305882Sandrew * PV entry for the 2MB page mapping that is being demoted. 4577305882Sandrew */ 4578305882Sandrew if ((oldl2 & ATTR_SW_MANAGED) != 0) 4579305882Sandrew reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); 4580305882Sandrew 4581305882Sandrew pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE); 4582305882Sandrew 4583305882Sandrew /* 4584305882Sandrew * Demote the PV entry. 4585305882Sandrew */ 4586305882Sandrew if ((oldl2 & ATTR_SW_MANAGED) != 0) 4587305882Sandrew pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp); 4588305882Sandrew 4589305882Sandrew atomic_add_long(&pmap_l2_demotions, 1); 4590305882Sandrew CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx" 4591305882Sandrew " in pmap %p %lx", va, pmap, l3[0]); 4592305882Sandrew 4593305882Sandrewfail: 4594305882Sandrew if (tmpl2 != 0) { 4595305882Sandrew pmap_kremove(tmpl2); 4596305882Sandrew kva_free(tmpl2, PAGE_SIZE); 4597305882Sandrew } 4598305882Sandrew 4599305882Sandrew return (l3); 4600305882Sandrew 4601305882Sandrew} 4602305882Sandrew 4603305882Sandrewstatic pt_entry_t * 4604305882Sandrewpmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) 4605305882Sandrew{ 4606305882Sandrew struct rwlock *lock; 4607305882Sandrew pt_entry_t *l3; 4608305882Sandrew 4609305882Sandrew lock = NULL; 4610305882Sandrew l3 = pmap_demote_l2_locked(pmap, l2, va, &lock); 4611305882Sandrew if (lock != NULL) 4612305882Sandrew rw_wunlock(lock); 4613305882Sandrew return (l3); 4614305882Sandrew} 4615305882Sandrew 4616305882Sandrew/* 4617281494Sandrew * perform the pmap work for mincore 4618281494Sandrew */ 4619281494Sandrewint 4620281494Sandrewpmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 4621281494Sandrew{ 4622287570Sandrew pd_entry_t *l1p, l1; 4623287570Sandrew pd_entry_t *l2p, l2; 4624287570Sandrew pt_entry_t *l3p, l3; 4625287570Sandrew vm_paddr_t pa; 4626287570Sandrew bool managed; 4627287570Sandrew int val; 4628281494Sandrew 4629287570Sandrew PMAP_LOCK(pmap); 4630287570Sandrewretry: 4631287570Sandrew pa = 0; 4632287570Sandrew val = 0; 4633287570Sandrew managed = false; 4634287570Sandrew 4635287570Sandrew l1p = pmap_l1(pmap, addr); 4636287570Sandrew if (l1p == NULL) /* No l1 */ 4637287570Sandrew goto done; 4638295425Swma 4639287570Sandrew l1 = pmap_load(l1p); 4640295425Swma if ((l1 & ATTR_DESCR_MASK) == L1_INVAL) 4641295425Swma goto done; 4642295425Swma 4643287570Sandrew if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) { 4644287570Sandrew pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET); 4645287570Sandrew managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 4646287570Sandrew val = MINCORE_SUPER | MINCORE_INCORE; 4647287570Sandrew if (pmap_page_dirty(l1)) 4648287570Sandrew val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4649287570Sandrew if ((l1 & ATTR_AF) == ATTR_AF) 4650287570Sandrew val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4651287570Sandrew goto done; 4652287570Sandrew } 4653287570Sandrew 4654287570Sandrew l2p = pmap_l1_to_l2(l1p, addr); 4655287570Sandrew if (l2p == NULL) /* No l2 */ 4656287570Sandrew goto done; 4657295425Swma 4658287570Sandrew l2 = pmap_load(l2p); 4659295425Swma if ((l2 & ATTR_DESCR_MASK) == L2_INVAL) 4660295425Swma goto done; 4661295425Swma 4662287570Sandrew if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) { 4663287570Sandrew pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET); 4664287570Sandrew managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 4665287570Sandrew val = MINCORE_SUPER | MINCORE_INCORE; 4666287570Sandrew if (pmap_page_dirty(l2)) 4667287570Sandrew val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4668287570Sandrew if ((l2 & ATTR_AF) == ATTR_AF) 4669287570Sandrew val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4670287570Sandrew goto done; 4671287570Sandrew } 4672287570Sandrew 4673287570Sandrew l3p = pmap_l2_to_l3(l2p, addr); 4674287570Sandrew if (l3p == NULL) /* No l3 */ 4675287570Sandrew goto done; 4676295425Swma 4677287570Sandrew l3 = pmap_load(l2p); 4678295425Swma if ((l3 & ATTR_DESCR_MASK) == L3_INVAL) 4679295425Swma goto done; 4680295425Swma 4681287570Sandrew if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) { 4682287570Sandrew pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET); 4683287570Sandrew managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 4684287570Sandrew val = MINCORE_INCORE; 4685287570Sandrew if (pmap_page_dirty(l3)) 4686287570Sandrew val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4687287570Sandrew if ((l3 & ATTR_AF) == ATTR_AF) 4688287570Sandrew val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4689287570Sandrew } 4690287570Sandrew 4691287570Sandrewdone: 4692287570Sandrew if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 4693287570Sandrew (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 4694287570Sandrew /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 4695287570Sandrew if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 4696287570Sandrew goto retry; 4697287570Sandrew } else 4698287570Sandrew PA_UNLOCK_COND(*locked_pa); 4699287570Sandrew PMAP_UNLOCK(pmap); 4700287570Sandrew 4701287570Sandrew return (val); 4702281494Sandrew} 4703281494Sandrew 4704281494Sandrewvoid 4705281494Sandrewpmap_activate(struct thread *td) 4706281494Sandrew{ 4707281494Sandrew pmap_t pmap; 4708281494Sandrew 4709281494Sandrew critical_enter(); 4710281494Sandrew pmap = vmspace_pmap(td->td_proc->p_vmspace); 4711297446Sandrew td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0); 4712297446Sandrew __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr)); 4713285212Sandrew pmap_invalidate_all(pmap); 4714281494Sandrew critical_exit(); 4715281494Sandrew} 4716281494Sandrew 4717281494Sandrewvoid 4718287105Sandrewpmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 4719281494Sandrew{ 4720281494Sandrew 4721287105Sandrew if (va >= VM_MIN_KERNEL_ADDRESS) { 4722287105Sandrew cpu_icache_sync_range(va, sz); 4723287105Sandrew } else { 4724287105Sandrew u_int len, offset; 4725287105Sandrew vm_paddr_t pa; 4726287105Sandrew 4727287105Sandrew /* Find the length of data in this page to flush */ 4728287105Sandrew offset = va & PAGE_MASK; 4729287105Sandrew len = imin(PAGE_SIZE - offset, sz); 4730287105Sandrew 4731287105Sandrew while (sz != 0) { 4732287105Sandrew /* Extract the physical address & find it in the DMAP */ 4733287105Sandrew pa = pmap_extract(pmap, va); 4734287105Sandrew if (pa != 0) 4735287105Sandrew cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); 4736287105Sandrew 4737287105Sandrew /* Move to the next page */ 4738287105Sandrew sz -= len; 4739287105Sandrew va += len; 4740287105Sandrew /* Set the length for the next iteration */ 4741287105Sandrew len = imin(PAGE_SIZE, sz); 4742287105Sandrew } 4743287105Sandrew } 4744281494Sandrew} 4745281494Sandrew 4746305882Sandrewint 4747305882Sandrewpmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) 4748305882Sandrew{ 4749305882Sandrew#ifdef SMP 4750305882Sandrew uint64_t par; 4751305882Sandrew#endif 4752305882Sandrew 4753305882Sandrew switch (ESR_ELx_EXCEPTION(esr)) { 4754305882Sandrew case EXCP_DATA_ABORT_L: 4755305882Sandrew case EXCP_DATA_ABORT: 4756305882Sandrew break; 4757305882Sandrew default: 4758305882Sandrew return (KERN_FAILURE); 4759305882Sandrew } 4760305882Sandrew 4761305882Sandrew#ifdef SMP 4762305882Sandrew PMAP_LOCK(pmap); 4763305882Sandrew switch (esr & ISS_DATA_DFSC_MASK) { 4764305882Sandrew case ISS_DATA_DFSC_TF_L0: 4765305882Sandrew case ISS_DATA_DFSC_TF_L1: 4766305882Sandrew case ISS_DATA_DFSC_TF_L2: 4767305882Sandrew case ISS_DATA_DFSC_TF_L3: 4768305882Sandrew /* Ask the MMU to check the address */ 4769305882Sandrew if (pmap == kernel_pmap) 4770305882Sandrew par = arm64_address_translate_s1e1r(far); 4771305882Sandrew else 4772305882Sandrew par = arm64_address_translate_s1e0r(far); 4773305882Sandrew 4774305882Sandrew /* 4775305882Sandrew * If the translation was successful the address was invalid 4776305882Sandrew * due to a break-before-make sequence. We can unlock and 4777305882Sandrew * return success to the trap handler. 4778305882Sandrew */ 4779305882Sandrew if (PAR_SUCCESS(par)) { 4780305882Sandrew PMAP_UNLOCK(pmap); 4781305882Sandrew return (KERN_SUCCESS); 4782305882Sandrew } 4783305882Sandrew break; 4784305882Sandrew default: 4785305882Sandrew break; 4786305882Sandrew } 4787305882Sandrew PMAP_UNLOCK(pmap); 4788305882Sandrew#endif 4789305882Sandrew 4790305882Sandrew return (KERN_FAILURE); 4791305882Sandrew} 4792305882Sandrew 4793281494Sandrew/* 4794281494Sandrew * Increase the starting virtual address of the given mapping if a 4795281494Sandrew * different alignment might result in more superpage mappings. 4796281494Sandrew */ 4797281494Sandrewvoid 4798281494Sandrewpmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 4799281494Sandrew vm_offset_t *addr, vm_size_t size) 4800281494Sandrew{ 4801305880Sandrew vm_offset_t superpage_offset; 4802305880Sandrew 4803305880Sandrew if (size < L2_SIZE) 4804305880Sandrew return; 4805305880Sandrew if (object != NULL && (object->flags & OBJ_COLORED) != 0) 4806305880Sandrew offset += ptoa(object->pg_color); 4807305880Sandrew superpage_offset = offset & L2_OFFSET; 4808305880Sandrew if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || 4809305880Sandrew (*addr & L2_OFFSET) == superpage_offset) 4810305880Sandrew return; 4811305880Sandrew if ((*addr & L2_OFFSET) < superpage_offset) 4812305880Sandrew *addr = (*addr & ~L2_OFFSET) + superpage_offset; 4813305880Sandrew else 4814305880Sandrew *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; 4815281494Sandrew} 4816281494Sandrew 4817281494Sandrew/** 4818281494Sandrew * Get the kernel virtual address of a set of physical pages. If there are 4819281494Sandrew * physical addresses not covered by the DMAP perform a transient mapping 4820281494Sandrew * that will be removed when calling pmap_unmap_io_transient. 4821281494Sandrew * 4822281494Sandrew * \param page The pages the caller wishes to obtain the virtual 4823281494Sandrew * address on the kernel memory map. 4824281494Sandrew * \param vaddr On return contains the kernel virtual memory address 4825281494Sandrew * of the pages passed in the page parameter. 4826281494Sandrew * \param count Number of pages passed in. 4827281494Sandrew * \param can_fault TRUE if the thread using the mapped pages can take 4828281494Sandrew * page faults, FALSE otherwise. 4829281494Sandrew * 4830281494Sandrew * \returns TRUE if the caller must call pmap_unmap_io_transient when 4831281494Sandrew * finished or FALSE otherwise. 4832281494Sandrew * 4833281494Sandrew */ 4834281494Sandrewboolean_t 4835281494Sandrewpmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 4836281494Sandrew boolean_t can_fault) 4837281494Sandrew{ 4838281494Sandrew vm_paddr_t paddr; 4839281494Sandrew boolean_t needs_mapping; 4840281494Sandrew int error, i; 4841281494Sandrew 4842281494Sandrew /* 4843281494Sandrew * Allocate any KVA space that we need, this is done in a separate 4844281494Sandrew * loop to prevent calling vmem_alloc while pinned. 4845281494Sandrew */ 4846281494Sandrew needs_mapping = FALSE; 4847281494Sandrew for (i = 0; i < count; i++) { 4848281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 4849297617Sandrew if (__predict_false(!PHYS_IN_DMAP(paddr))) { 4850281494Sandrew error = vmem_alloc(kernel_arena, PAGE_SIZE, 4851281494Sandrew M_BESTFIT | M_WAITOK, &vaddr[i]); 4852281494Sandrew KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 4853281494Sandrew needs_mapping = TRUE; 4854281494Sandrew } else { 4855281494Sandrew vaddr[i] = PHYS_TO_DMAP(paddr); 4856281494Sandrew } 4857281494Sandrew } 4858281494Sandrew 4859281494Sandrew /* Exit early if everything is covered by the DMAP */ 4860281494Sandrew if (!needs_mapping) 4861281494Sandrew return (FALSE); 4862281494Sandrew 4863281494Sandrew if (!can_fault) 4864281494Sandrew sched_pin(); 4865281494Sandrew for (i = 0; i < count; i++) { 4866281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 4867297617Sandrew if (!PHYS_IN_DMAP(paddr)) { 4868281494Sandrew panic( 4869281494Sandrew "pmap_map_io_transient: TODO: Map out of DMAP data"); 4870281494Sandrew } 4871281494Sandrew } 4872281494Sandrew 4873281494Sandrew return (needs_mapping); 4874281494Sandrew} 4875281494Sandrew 4876281494Sandrewvoid 4877281494Sandrewpmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 4878281494Sandrew boolean_t can_fault) 4879281494Sandrew{ 4880281494Sandrew vm_paddr_t paddr; 4881281494Sandrew int i; 4882281494Sandrew 4883281494Sandrew if (!can_fault) 4884281494Sandrew sched_unpin(); 4885281494Sandrew for (i = 0; i < count; i++) { 4886281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 4887297617Sandrew if (!PHYS_IN_DMAP(paddr)) { 4888286073Semaste panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); 4889281494Sandrew } 4890281494Sandrew } 4891281494Sandrew} 4892