pmap.c revision 323845
1281494Sandrew/*- 2281494Sandrew * Copyright (c) 1991 Regents of the University of California. 3281494Sandrew * All rights reserved. 4281494Sandrew * Copyright (c) 1994 John S. Dyson 5281494Sandrew * All rights reserved. 6281494Sandrew * Copyright (c) 1994 David Greenman 7281494Sandrew * All rights reserved. 8281494Sandrew * Copyright (c) 2003 Peter Wemm 9281494Sandrew * All rights reserved. 10281494Sandrew * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11281494Sandrew * All rights reserved. 12281494Sandrew * Copyright (c) 2014 Andrew Turner 13281494Sandrew * All rights reserved. 14297446Sandrew * Copyright (c) 2014-2016 The FreeBSD Foundation 15281494Sandrew * All rights reserved. 16281494Sandrew * 17281494Sandrew * This code is derived from software contributed to Berkeley by 18281494Sandrew * the Systems Programming Group of the University of Utah Computer 19281494Sandrew * Science Department and William Jolitz of UUNET Technologies Inc. 20281494Sandrew * 21281494Sandrew * This software was developed by Andrew Turner under sponsorship from 22281494Sandrew * the FreeBSD Foundation. 23281494Sandrew * 24281494Sandrew * Redistribution and use in source and binary forms, with or without 25281494Sandrew * modification, are permitted provided that the following conditions 26281494Sandrew * are met: 27281494Sandrew * 1. Redistributions of source code must retain the above copyright 28281494Sandrew * notice, this list of conditions and the following disclaimer. 29281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright 30281494Sandrew * notice, this list of conditions and the following disclaimer in the 31281494Sandrew * documentation and/or other materials provided with the distribution. 32281494Sandrew * 3. All advertising materials mentioning features or use of this software 33281494Sandrew * must display the following acknowledgement: 34281494Sandrew * This product includes software developed by the University of 35281494Sandrew * California, Berkeley and its contributors. 36281494Sandrew * 4. Neither the name of the University nor the names of its contributors 37281494Sandrew * may be used to endorse or promote products derived from this software 38281494Sandrew * without specific prior written permission. 39281494Sandrew * 40281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43281494Sandrew * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50281494Sandrew * SUCH DAMAGE. 51281494Sandrew * 52281494Sandrew * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53281494Sandrew */ 54281494Sandrew/*- 55281494Sandrew * Copyright (c) 2003 Networks Associates Technology, Inc. 56281494Sandrew * All rights reserved. 57281494Sandrew * 58281494Sandrew * This software was developed for the FreeBSD Project by Jake Burkholder, 59281494Sandrew * Safeport Network Services, and Network Associates Laboratories, the 60281494Sandrew * Security Research Division of Network Associates, Inc. under 61281494Sandrew * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62281494Sandrew * CHATS research program. 63281494Sandrew * 64281494Sandrew * Redistribution and use in source and binary forms, with or without 65281494Sandrew * modification, are permitted provided that the following conditions 66281494Sandrew * are met: 67281494Sandrew * 1. Redistributions of source code must retain the above copyright 68281494Sandrew * notice, this list of conditions and the following disclaimer. 69281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright 70281494Sandrew * notice, this list of conditions and the following disclaimer in the 71281494Sandrew * documentation and/or other materials provided with the distribution. 72281494Sandrew * 73281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76281494Sandrew * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83281494Sandrew * SUCH DAMAGE. 84281494Sandrew */ 85281494Sandrew 86281494Sandrew#include <sys/cdefs.h> 87281494Sandrew__FBSDID("$FreeBSD: stable/11/sys/arm64/arm64/pmap.c 323845 2017-09-21 08:16:21Z andrew $"); 88281494Sandrew 89281494Sandrew/* 90281494Sandrew * Manages physical address maps. 91281494Sandrew * 92281494Sandrew * Since the information managed by this module is 93281494Sandrew * also stored by the logical address mapping module, 94281494Sandrew * this module may throw away valid virtual-to-physical 95281494Sandrew * mappings at almost any time. However, invalidations 96281494Sandrew * of virtual-to-physical mappings must be done as 97281494Sandrew * requested. 98281494Sandrew * 99281494Sandrew * In order to cope with hardware architectures which 100281494Sandrew * make virtual-to-physical map invalidates expensive, 101281494Sandrew * this module may delay invalidate or reduced protection 102281494Sandrew * operations until such time as they are actually 103281494Sandrew * necessary. This module is given full information as 104281494Sandrew * to which processors are currently using which maps, 105281494Sandrew * and to when physical maps must be made correct. 106281494Sandrew */ 107281494Sandrew 108281494Sandrew#include <sys/param.h> 109305882Sandrew#include <sys/bitstring.h> 110281494Sandrew#include <sys/bus.h> 111281494Sandrew#include <sys/systm.h> 112281494Sandrew#include <sys/kernel.h> 113281494Sandrew#include <sys/ktr.h> 114281494Sandrew#include <sys/lock.h> 115281494Sandrew#include <sys/malloc.h> 116281494Sandrew#include <sys/mman.h> 117281494Sandrew#include <sys/msgbuf.h> 118281494Sandrew#include <sys/mutex.h> 119281494Sandrew#include <sys/proc.h> 120281494Sandrew#include <sys/rwlock.h> 121281494Sandrew#include <sys/sx.h> 122281494Sandrew#include <sys/vmem.h> 123281494Sandrew#include <sys/vmmeter.h> 124281494Sandrew#include <sys/sched.h> 125281494Sandrew#include <sys/sysctl.h> 126281494Sandrew#include <sys/_unrhdr.h> 127281494Sandrew#include <sys/smp.h> 128281494Sandrew 129281494Sandrew#include <vm/vm.h> 130281494Sandrew#include <vm/vm_param.h> 131281494Sandrew#include <vm/vm_kern.h> 132281494Sandrew#include <vm/vm_page.h> 133281494Sandrew#include <vm/vm_map.h> 134281494Sandrew#include <vm/vm_object.h> 135281494Sandrew#include <vm/vm_extern.h> 136281494Sandrew#include <vm/vm_pageout.h> 137281494Sandrew#include <vm/vm_pager.h> 138305882Sandrew#include <vm/vm_phys.h> 139281494Sandrew#include <vm/vm_radix.h> 140281494Sandrew#include <vm/vm_reserv.h> 141281494Sandrew#include <vm/uma.h> 142281494Sandrew 143281494Sandrew#include <machine/machdep.h> 144281494Sandrew#include <machine/md_var.h> 145281494Sandrew#include <machine/pcb.h> 146281494Sandrew 147297446Sandrew#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) 148297446Sandrew#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) 149297446Sandrew#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) 150297446Sandrew#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) 151281494Sandrew 152297446Sandrew#define NUL0E L0_ENTRIES 153297446Sandrew#define NUL1E (NUL0E * NL1PG) 154297446Sandrew#define NUL2E (NUL1E * NL2PG) 155297446Sandrew 156281494Sandrew#if !defined(DIAGNOSTIC) 157281494Sandrew#ifdef __GNUC_GNU_INLINE__ 158281494Sandrew#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 159281494Sandrew#else 160281494Sandrew#define PMAP_INLINE extern inline 161281494Sandrew#endif 162281494Sandrew#else 163281494Sandrew#define PMAP_INLINE 164281494Sandrew#endif 165281494Sandrew 166281494Sandrew/* 167281494Sandrew * These are configured by the mair_el1 register. This is set up in locore.S 168281494Sandrew */ 169281494Sandrew#define DEVICE_MEMORY 0 170281494Sandrew#define UNCACHED_MEMORY 1 171281494Sandrew#define CACHED_MEMORY 2 172281494Sandrew 173281494Sandrew 174281494Sandrew#ifdef PV_STATS 175281494Sandrew#define PV_STAT(x) do { x ; } while (0) 176281494Sandrew#else 177281494Sandrew#define PV_STAT(x) do { } while (0) 178281494Sandrew#endif 179281494Sandrew 180281494Sandrew#define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 181305882Sandrew#define pa_to_pvh(pa) (&pv_table[pmap_l2_pindex(pa)]) 182281494Sandrew 183281494Sandrew#define NPV_LIST_LOCKS MAXCPU 184281494Sandrew 185281494Sandrew#define PHYS_TO_PV_LIST_LOCK(pa) \ 186281494Sandrew (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 187281494Sandrew 188281494Sandrew#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 189281494Sandrew struct rwlock **_lockp = (lockp); \ 190281494Sandrew struct rwlock *_new_lock; \ 191281494Sandrew \ 192281494Sandrew _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 193281494Sandrew if (_new_lock != *_lockp) { \ 194281494Sandrew if (*_lockp != NULL) \ 195281494Sandrew rw_wunlock(*_lockp); \ 196281494Sandrew *_lockp = _new_lock; \ 197281494Sandrew rw_wlock(*_lockp); \ 198281494Sandrew } \ 199281494Sandrew} while (0) 200281494Sandrew 201281494Sandrew#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 202281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 203281494Sandrew 204281494Sandrew#define RELEASE_PV_LIST_LOCK(lockp) do { \ 205281494Sandrew struct rwlock **_lockp = (lockp); \ 206281494Sandrew \ 207281494Sandrew if (*_lockp != NULL) { \ 208281494Sandrew rw_wunlock(*_lockp); \ 209281494Sandrew *_lockp = NULL; \ 210281494Sandrew } \ 211281494Sandrew} while (0) 212281494Sandrew 213281494Sandrew#define VM_PAGE_TO_PV_LIST_LOCK(m) \ 214281494Sandrew PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 215281494Sandrew 216281494Sandrewstruct pmap kernel_pmap_store; 217281494Sandrew 218281494Sandrewvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 219281494Sandrewvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 220281494Sandrewvm_offset_t kernel_vm_end = 0; 221281494Sandrew 222281494Sandrewstruct msgbuf *msgbufp = NULL; 223281494Sandrew 224305882Sandrew/* 225305882Sandrew * Data for the pv entry allocation mechanism. 226305882Sandrew * Updates to pv_invl_gen are protected by the pv_list_locks[] 227305882Sandrew * elements, but reads are not. 228305882Sandrew */ 229305882Sandrewstatic struct md_page *pv_table; 230305882Sandrewstatic struct md_page pv_dummy; 231305882Sandrew 232291246Sandrewvm_paddr_t dmap_phys_base; /* The start of the dmap region */ 233297958Sandrewvm_paddr_t dmap_phys_max; /* The limit of the dmap region */ 234297958Sandrewvm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ 235291246Sandrew 236297914Sandrew/* This code assumes all L1 DMAP entries will be used */ 237297914SandrewCTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS); 238297914SandrewCTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); 239297914Sandrew 240297914Sandrew#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) 241297914Sandrewextern pt_entry_t pagetable_dmap[]; 242297914Sandrew 243305882Sandrewstatic SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 244305882Sandrew 245305882Sandrewstatic int superpages_enabled = 0; 246305882SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, 247305882Sandrew CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, 248305882Sandrew "Are large page mappings enabled?"); 249305882Sandrew 250281494Sandrew/* 251281494Sandrew * Data for the pv entry allocation mechanism 252281494Sandrew */ 253281494Sandrewstatic TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 254281494Sandrewstatic struct mtx pv_chunks_mutex; 255281494Sandrewstatic struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 256281494Sandrew 257281494Sandrewstatic void free_pv_chunk(struct pv_chunk *pc); 258281494Sandrewstatic void free_pv_entry(pmap_t pmap, pv_entry_t pv); 259281494Sandrewstatic pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 260281494Sandrewstatic vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 261281494Sandrewstatic void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 262281494Sandrewstatic pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 263281494Sandrew vm_offset_t va); 264305882Sandrew 265305882Sandrewstatic int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode); 266305882Sandrewstatic int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); 267305882Sandrewstatic pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); 268305882Sandrewstatic pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, 269305882Sandrew vm_offset_t va, struct rwlock **lockp); 270305882Sandrewstatic pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); 271281494Sandrewstatic vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 272281494Sandrew vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 273281494Sandrewstatic int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 274281494Sandrew pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 275281494Sandrewstatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 276281494Sandrew vm_page_t m, struct rwlock **lockp); 277281494Sandrew 278281494Sandrewstatic vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 279281494Sandrew struct rwlock **lockp); 280281494Sandrew 281281494Sandrewstatic void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 282281494Sandrew struct spglist *free); 283281494Sandrewstatic int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 284281494Sandrew 285288445Sandrew/* 286288445Sandrew * These load the old table data and store the new value. 287288445Sandrew * They need to be atomic as the System MMU may write to the table at 288288445Sandrew * the same time as the CPU. 289288445Sandrew */ 290288445Sandrew#define pmap_load_store(table, entry) atomic_swap_64(table, entry) 291288445Sandrew#define pmap_set(table, mask) atomic_set_64(table, mask) 292288445Sandrew#define pmap_load_clear(table) atomic_swap_64(table, 0) 293288445Sandrew#define pmap_load(table) (*table) 294288445Sandrew 295281494Sandrew/********************/ 296281494Sandrew/* Inline functions */ 297281494Sandrew/********************/ 298281494Sandrew 299281494Sandrewstatic __inline void 300281494Sandrewpagecopy(void *s, void *d) 301281494Sandrew{ 302281494Sandrew 303281494Sandrew memcpy(d, s, PAGE_SIZE); 304281494Sandrew} 305281494Sandrew 306297446Sandrew#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK) 307281494Sandrew#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 308281494Sandrew#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 309281494Sandrew#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 310281494Sandrew 311281494Sandrewstatic __inline pd_entry_t * 312297446Sandrewpmap_l0(pmap_t pmap, vm_offset_t va) 313297446Sandrew{ 314297446Sandrew 315297446Sandrew return (&pmap->pm_l0[pmap_l0_index(va)]); 316297446Sandrew} 317297446Sandrew 318297446Sandrewstatic __inline pd_entry_t * 319297446Sandrewpmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) 320297446Sandrew{ 321297446Sandrew pd_entry_t *l1; 322297446Sandrew 323297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 324297446Sandrew return (&l1[pmap_l1_index(va)]); 325297446Sandrew} 326297446Sandrew 327297446Sandrewstatic __inline pd_entry_t * 328281494Sandrewpmap_l1(pmap_t pmap, vm_offset_t va) 329281494Sandrew{ 330297446Sandrew pd_entry_t *l0; 331281494Sandrew 332297446Sandrew l0 = pmap_l0(pmap, va); 333297446Sandrew if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) 334297446Sandrew return (NULL); 335297446Sandrew 336297446Sandrew return (pmap_l0_to_l1(l0, va)); 337281494Sandrew} 338281494Sandrew 339281494Sandrewstatic __inline pd_entry_t * 340281494Sandrewpmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 341281494Sandrew{ 342281494Sandrew pd_entry_t *l2; 343281494Sandrew 344288445Sandrew l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 345281494Sandrew return (&l2[pmap_l2_index(va)]); 346281494Sandrew} 347281494Sandrew 348281494Sandrewstatic __inline pd_entry_t * 349281494Sandrewpmap_l2(pmap_t pmap, vm_offset_t va) 350281494Sandrew{ 351281494Sandrew pd_entry_t *l1; 352281494Sandrew 353281494Sandrew l1 = pmap_l1(pmap, va); 354288445Sandrew if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) 355281494Sandrew return (NULL); 356281494Sandrew 357281494Sandrew return (pmap_l1_to_l2(l1, va)); 358281494Sandrew} 359281494Sandrew 360281494Sandrewstatic __inline pt_entry_t * 361281494Sandrewpmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 362281494Sandrew{ 363281494Sandrew pt_entry_t *l3; 364281494Sandrew 365288445Sandrew l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); 366281494Sandrew return (&l3[pmap_l3_index(va)]); 367281494Sandrew} 368281494Sandrew 369297446Sandrew/* 370297446Sandrew * Returns the lowest valid pde for a given virtual address. 371297446Sandrew * The next level may or may not point to a valid page or block. 372297446Sandrew */ 373297446Sandrewstatic __inline pd_entry_t * 374297446Sandrewpmap_pde(pmap_t pmap, vm_offset_t va, int *level) 375297446Sandrew{ 376297446Sandrew pd_entry_t *l0, *l1, *l2, desc; 377297446Sandrew 378297446Sandrew l0 = pmap_l0(pmap, va); 379297446Sandrew desc = pmap_load(l0) & ATTR_DESCR_MASK; 380297446Sandrew if (desc != L0_TABLE) { 381297446Sandrew *level = -1; 382297446Sandrew return (NULL); 383297446Sandrew } 384297446Sandrew 385297446Sandrew l1 = pmap_l0_to_l1(l0, va); 386297446Sandrew desc = pmap_load(l1) & ATTR_DESCR_MASK; 387297446Sandrew if (desc != L1_TABLE) { 388297446Sandrew *level = 0; 389297446Sandrew return (l0); 390297446Sandrew } 391297446Sandrew 392297446Sandrew l2 = pmap_l1_to_l2(l1, va); 393297446Sandrew desc = pmap_load(l2) & ATTR_DESCR_MASK; 394297446Sandrew if (desc != L2_TABLE) { 395297446Sandrew *level = 1; 396297446Sandrew return (l1); 397297446Sandrew } 398297446Sandrew 399297446Sandrew *level = 2; 400297446Sandrew return (l2); 401297446Sandrew} 402297446Sandrew 403297446Sandrew/* 404297446Sandrew * Returns the lowest valid pte block or table entry for a given virtual 405297446Sandrew * address. If there are no valid entries return NULL and set the level to 406297446Sandrew * the first invalid level. 407297446Sandrew */ 408281494Sandrewstatic __inline pt_entry_t * 409297446Sandrewpmap_pte(pmap_t pmap, vm_offset_t va, int *level) 410281494Sandrew{ 411297446Sandrew pd_entry_t *l1, *l2, desc; 412297446Sandrew pt_entry_t *l3; 413281494Sandrew 414297446Sandrew l1 = pmap_l1(pmap, va); 415297446Sandrew if (l1 == NULL) { 416297446Sandrew *level = 0; 417281494Sandrew return (NULL); 418297446Sandrew } 419297446Sandrew desc = pmap_load(l1) & ATTR_DESCR_MASK; 420297446Sandrew if (desc == L1_BLOCK) { 421297446Sandrew *level = 1; 422297446Sandrew return (l1); 423297446Sandrew } 424281494Sandrew 425297446Sandrew if (desc != L1_TABLE) { 426297446Sandrew *level = 1; 427297446Sandrew return (NULL); 428297446Sandrew } 429297446Sandrew 430297446Sandrew l2 = pmap_l1_to_l2(l1, va); 431297446Sandrew desc = pmap_load(l2) & ATTR_DESCR_MASK; 432297446Sandrew if (desc == L2_BLOCK) { 433297446Sandrew *level = 2; 434297446Sandrew return (l2); 435297446Sandrew } 436297446Sandrew 437297446Sandrew if (desc != L2_TABLE) { 438297446Sandrew *level = 2; 439297446Sandrew return (NULL); 440297446Sandrew } 441297446Sandrew 442297446Sandrew *level = 3; 443297446Sandrew l3 = pmap_l2_to_l3(l2, va); 444297446Sandrew if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) 445297446Sandrew return (NULL); 446297446Sandrew 447297446Sandrew return (l3); 448281494Sandrew} 449281494Sandrew 450305882Sandrewstatic inline bool 451305882Sandrewpmap_superpages_enabled(void) 452305882Sandrew{ 453305882Sandrew 454305882Sandrew return (superpages_enabled != 0); 455305882Sandrew} 456305882Sandrew 457286956Sandrewbool 458297446Sandrewpmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, 459297446Sandrew pd_entry_t **l2, pt_entry_t **l3) 460286956Sandrew{ 461297446Sandrew pd_entry_t *l0p, *l1p, *l2p; 462286956Sandrew 463297446Sandrew if (pmap->pm_l0 == NULL) 464286956Sandrew return (false); 465286956Sandrew 466297446Sandrew l0p = pmap_l0(pmap, va); 467297446Sandrew *l0 = l0p; 468297446Sandrew 469297446Sandrew if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) 470297446Sandrew return (false); 471297446Sandrew 472297446Sandrew l1p = pmap_l0_to_l1(l0p, va); 473286956Sandrew *l1 = l1p; 474286956Sandrew 475288445Sandrew if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { 476286956Sandrew *l2 = NULL; 477286956Sandrew *l3 = NULL; 478286956Sandrew return (true); 479286956Sandrew } 480286956Sandrew 481288445Sandrew if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) 482286956Sandrew return (false); 483286956Sandrew 484286956Sandrew l2p = pmap_l1_to_l2(l1p, va); 485286956Sandrew *l2 = l2p; 486286956Sandrew 487288445Sandrew if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { 488286956Sandrew *l3 = NULL; 489286956Sandrew return (true); 490286956Sandrew } 491286956Sandrew 492286956Sandrew *l3 = pmap_l2_to_l3(l2p, va); 493286956Sandrew 494286956Sandrew return (true); 495286956Sandrew} 496286956Sandrew 497281494Sandrewstatic __inline int 498281494Sandrewpmap_is_current(pmap_t pmap) 499281494Sandrew{ 500281494Sandrew 501281494Sandrew return ((pmap == pmap_kernel()) || 502281494Sandrew (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 503281494Sandrew} 504281494Sandrew 505281494Sandrewstatic __inline int 506281494Sandrewpmap_l3_valid(pt_entry_t l3) 507281494Sandrew{ 508281494Sandrew 509281494Sandrew return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 510281494Sandrew} 511281494Sandrew 512305882Sandrew 513305882Sandrew/* Is a level 1 or 2entry a valid block and cacheable */ 514305882SandrewCTASSERT(L1_BLOCK == L2_BLOCK); 515281494Sandrewstatic __inline int 516305882Sandrewpmap_pte_valid_cacheable(pt_entry_t pte) 517305882Sandrew{ 518305882Sandrew 519305882Sandrew return (((pte & ATTR_DESCR_MASK) == L1_BLOCK) && 520305882Sandrew ((pte & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 521305882Sandrew} 522305882Sandrew 523305882Sandrewstatic __inline int 524281494Sandrewpmap_l3_valid_cacheable(pt_entry_t l3) 525281494Sandrew{ 526281494Sandrew 527281494Sandrew return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && 528281494Sandrew ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 529281494Sandrew} 530281494Sandrew 531281494Sandrew#define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 532281494Sandrew 533281494Sandrew/* 534281494Sandrew * Checks if the page is dirty. We currently lack proper tracking of this on 535281494Sandrew * arm64 so for now assume is a page mapped as rw was accessed it is. 536281494Sandrew */ 537281494Sandrewstatic inline int 538281494Sandrewpmap_page_dirty(pt_entry_t pte) 539281494Sandrew{ 540281494Sandrew 541281494Sandrew return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 542281494Sandrew (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 543281494Sandrew} 544281494Sandrew 545281494Sandrewstatic __inline void 546281494Sandrewpmap_resident_count_inc(pmap_t pmap, int count) 547281494Sandrew{ 548281494Sandrew 549281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 550281494Sandrew pmap->pm_stats.resident_count += count; 551281494Sandrew} 552281494Sandrew 553281494Sandrewstatic __inline void 554281494Sandrewpmap_resident_count_dec(pmap_t pmap, int count) 555281494Sandrew{ 556281494Sandrew 557281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 558281494Sandrew KASSERT(pmap->pm_stats.resident_count >= count, 559281494Sandrew ("pmap %p resident count underflow %ld %d", pmap, 560281494Sandrew pmap->pm_stats.resident_count, count)); 561281494Sandrew pmap->pm_stats.resident_count -= count; 562281494Sandrew} 563281494Sandrew 564281494Sandrewstatic pt_entry_t * 565281494Sandrewpmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 566281494Sandrew u_int *l2_slot) 567281494Sandrew{ 568281494Sandrew pt_entry_t *l2; 569281494Sandrew pd_entry_t *l1; 570281494Sandrew 571281494Sandrew l1 = (pd_entry_t *)l1pt; 572281494Sandrew *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 573281494Sandrew 574281494Sandrew /* Check locore has used a table L1 map */ 575281494Sandrew KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 576281494Sandrew ("Invalid bootstrap L1 table")); 577281494Sandrew /* Find the address of the L2 table */ 578281494Sandrew l2 = (pt_entry_t *)init_pt_va; 579281494Sandrew *l2_slot = pmap_l2_index(va); 580281494Sandrew 581281494Sandrew return (l2); 582281494Sandrew} 583281494Sandrew 584281494Sandrewstatic vm_paddr_t 585281494Sandrewpmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 586281494Sandrew{ 587281494Sandrew u_int l1_slot, l2_slot; 588281494Sandrew pt_entry_t *l2; 589281494Sandrew 590281494Sandrew l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 591281494Sandrew 592281494Sandrew return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 593281494Sandrew} 594281494Sandrew 595281494Sandrewstatic void 596297958Sandrewpmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) 597281494Sandrew{ 598281494Sandrew vm_offset_t va; 599281494Sandrew vm_paddr_t pa; 600281494Sandrew u_int l1_slot; 601281494Sandrew 602297958Sandrew pa = dmap_phys_base = min_pa & ~L1_OFFSET; 603281494Sandrew va = DMAP_MIN_ADDRESS; 604297958Sandrew for (; va < DMAP_MAX_ADDRESS && pa < max_pa; 605281494Sandrew pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 606297914Sandrew l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 607281494Sandrew 608297914Sandrew pmap_load_store(&pagetable_dmap[l1_slot], 609319203Sandrew (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_XN | 610285537Sandrew ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); 611281494Sandrew } 612281494Sandrew 613297958Sandrew /* Set the upper limit of the DMAP region */ 614297958Sandrew dmap_phys_max = pa; 615297958Sandrew dmap_max_addr = va; 616297958Sandrew 617297914Sandrew cpu_dcache_wb_range((vm_offset_t)pagetable_dmap, 618297914Sandrew PAGE_SIZE * DMAP_TABLES); 619281494Sandrew cpu_tlb_flushID(); 620281494Sandrew} 621281494Sandrew 622281494Sandrewstatic vm_offset_t 623281494Sandrewpmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 624281494Sandrew{ 625281494Sandrew vm_offset_t l2pt; 626281494Sandrew vm_paddr_t pa; 627281494Sandrew pd_entry_t *l1; 628281494Sandrew u_int l1_slot; 629281494Sandrew 630281494Sandrew KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 631281494Sandrew 632281494Sandrew l1 = (pd_entry_t *)l1pt; 633281494Sandrew l1_slot = pmap_l1_index(va); 634281494Sandrew l2pt = l2_start; 635281494Sandrew 636281494Sandrew for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 637281494Sandrew KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 638281494Sandrew 639281494Sandrew pa = pmap_early_vtophys(l1pt, l2pt); 640281494Sandrew pmap_load_store(&l1[l1_slot], 641281494Sandrew (pa & ~Ln_TABLE_MASK) | L1_TABLE); 642281494Sandrew l2pt += PAGE_SIZE; 643281494Sandrew } 644281494Sandrew 645281494Sandrew /* Clean the L2 page table */ 646281494Sandrew memset((void *)l2_start, 0, l2pt - l2_start); 647281494Sandrew cpu_dcache_wb_range(l2_start, l2pt - l2_start); 648281494Sandrew 649281494Sandrew /* Flush the l1 table to ram */ 650281494Sandrew cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 651281494Sandrew 652281494Sandrew return l2pt; 653281494Sandrew} 654281494Sandrew 655281494Sandrewstatic vm_offset_t 656281494Sandrewpmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 657281494Sandrew{ 658281494Sandrew vm_offset_t l2pt, l3pt; 659281494Sandrew vm_paddr_t pa; 660281494Sandrew pd_entry_t *l2; 661281494Sandrew u_int l2_slot; 662281494Sandrew 663281494Sandrew KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 664281494Sandrew 665281494Sandrew l2 = pmap_l2(kernel_pmap, va); 666298433Spfg l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE); 667281494Sandrew l2pt = (vm_offset_t)l2; 668281494Sandrew l2_slot = pmap_l2_index(va); 669281494Sandrew l3pt = l3_start; 670281494Sandrew 671281494Sandrew for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 672281494Sandrew KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 673281494Sandrew 674281494Sandrew pa = pmap_early_vtophys(l1pt, l3pt); 675281494Sandrew pmap_load_store(&l2[l2_slot], 676281494Sandrew (pa & ~Ln_TABLE_MASK) | L2_TABLE); 677281494Sandrew l3pt += PAGE_SIZE; 678281494Sandrew } 679281494Sandrew 680281494Sandrew /* Clean the L2 page table */ 681281494Sandrew memset((void *)l3_start, 0, l3pt - l3_start); 682281494Sandrew cpu_dcache_wb_range(l3_start, l3pt - l3_start); 683281494Sandrew 684281494Sandrew cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 685281494Sandrew 686281494Sandrew return l3pt; 687281494Sandrew} 688281494Sandrew 689281494Sandrew/* 690281494Sandrew * Bootstrap the system enough to run with virtual memory. 691281494Sandrew */ 692281494Sandrewvoid 693297446Sandrewpmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, 694297446Sandrew vm_size_t kernlen) 695281494Sandrew{ 696281494Sandrew u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 697281494Sandrew uint64_t kern_delta; 698281494Sandrew pt_entry_t *l2; 699281494Sandrew vm_offset_t va, freemempos; 700281494Sandrew vm_offset_t dpcpu, msgbufpv; 701297958Sandrew vm_paddr_t pa, max_pa, min_pa; 702291246Sandrew int i; 703281494Sandrew 704281494Sandrew kern_delta = KERNBASE - kernstart; 705281494Sandrew physmem = 0; 706281494Sandrew 707281494Sandrew printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 708281494Sandrew printf("%lx\n", l1pt); 709281494Sandrew printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 710281494Sandrew 711281494Sandrew /* Set this early so we can use the pagetable walking functions */ 712297446Sandrew kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; 713281494Sandrew PMAP_LOCK_INIT(kernel_pmap); 714281494Sandrew 715291246Sandrew /* Assume the address we were loaded to is a valid physical address */ 716297958Sandrew min_pa = max_pa = KERNBASE - kern_delta; 717291246Sandrew 718291246Sandrew /* 719291246Sandrew * Find the minimum physical address. physmap is sorted, 720291246Sandrew * but may contain empty ranges. 721291246Sandrew */ 722291246Sandrew for (i = 0; i < (physmap_idx * 2); i += 2) { 723291246Sandrew if (physmap[i] == physmap[i + 1]) 724291246Sandrew continue; 725291246Sandrew if (physmap[i] <= min_pa) 726291246Sandrew min_pa = physmap[i]; 727297958Sandrew if (physmap[i + 1] > max_pa) 728297958Sandrew max_pa = physmap[i + 1]; 729291246Sandrew } 730291246Sandrew 731281494Sandrew /* Create a direct map region early so we can use it for pa -> va */ 732297958Sandrew pmap_bootstrap_dmap(l1pt, min_pa, max_pa); 733281494Sandrew 734281494Sandrew va = KERNBASE; 735281494Sandrew pa = KERNBASE - kern_delta; 736281494Sandrew 737281494Sandrew /* 738281494Sandrew * Start to initialise phys_avail by copying from physmap 739281494Sandrew * up to the physical address KERNBASE points at. 740281494Sandrew */ 741281494Sandrew map_slot = avail_slot = 0; 742295157Sandrew for (; map_slot < (physmap_idx * 2) && 743295157Sandrew avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) { 744281494Sandrew if (physmap[map_slot] == physmap[map_slot + 1]) 745281494Sandrew continue; 746281494Sandrew 747281494Sandrew if (physmap[map_slot] <= pa && 748281494Sandrew physmap[map_slot + 1] > pa) 749281494Sandrew break; 750281494Sandrew 751281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 752281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 753281494Sandrew physmem += (phys_avail[avail_slot + 1] - 754281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 755281494Sandrew avail_slot += 2; 756281494Sandrew } 757281494Sandrew 758281494Sandrew /* Add the memory before the kernel */ 759295157Sandrew if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) { 760281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 761281494Sandrew phys_avail[avail_slot + 1] = pa; 762281494Sandrew physmem += (phys_avail[avail_slot + 1] - 763281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 764281494Sandrew avail_slot += 2; 765281494Sandrew } 766281494Sandrew used_map_slot = map_slot; 767281494Sandrew 768281494Sandrew /* 769281494Sandrew * Read the page table to find out what is already mapped. 770281494Sandrew * This assumes we have mapped a block of memory from KERNBASE 771281494Sandrew * using a single L1 entry. 772281494Sandrew */ 773281494Sandrew l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 774281494Sandrew 775281494Sandrew /* Sanity check the index, KERNBASE should be the first VA */ 776281494Sandrew KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 777281494Sandrew 778281494Sandrew /* Find how many pages we have mapped */ 779281494Sandrew for (; l2_slot < Ln_ENTRIES; l2_slot++) { 780281494Sandrew if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 781281494Sandrew break; 782281494Sandrew 783281494Sandrew /* Check locore used L2 blocks */ 784281494Sandrew KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 785281494Sandrew ("Invalid bootstrap L2 table")); 786281494Sandrew KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 787281494Sandrew ("Incorrect PA in L2 table")); 788281494Sandrew 789281494Sandrew va += L2_SIZE; 790281494Sandrew pa += L2_SIZE; 791281494Sandrew } 792281494Sandrew 793281494Sandrew va = roundup2(va, L1_SIZE); 794281494Sandrew 795281494Sandrew freemempos = KERNBASE + kernlen; 796281494Sandrew freemempos = roundup2(freemempos, PAGE_SIZE); 797281494Sandrew /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 798281494Sandrew freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 799281494Sandrew /* And the l3 tables for the early devmap */ 800281494Sandrew freemempos = pmap_bootstrap_l3(l1pt, 801281494Sandrew VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 802281494Sandrew 803281494Sandrew cpu_tlb_flushID(); 804281494Sandrew 805281494Sandrew#define alloc_pages(var, np) \ 806281494Sandrew (var) = freemempos; \ 807281494Sandrew freemempos += (np * PAGE_SIZE); \ 808281494Sandrew memset((char *)(var), 0, ((np) * PAGE_SIZE)); 809281494Sandrew 810281494Sandrew /* Allocate dynamic per-cpu area. */ 811281494Sandrew alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 812281494Sandrew dpcpu_init((void *)dpcpu, 0); 813281494Sandrew 814281494Sandrew /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 815281494Sandrew alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 816281494Sandrew msgbufp = (void *)msgbufpv; 817281494Sandrew 818281494Sandrew virtual_avail = roundup2(freemempos, L1_SIZE); 819281494Sandrew virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 820281494Sandrew kernel_vm_end = virtual_avail; 821305531Sandrew 822281494Sandrew pa = pmap_early_vtophys(l1pt, freemempos); 823281494Sandrew 824281494Sandrew /* Finish initialising physmap */ 825281494Sandrew map_slot = used_map_slot; 826281494Sandrew for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 827281494Sandrew map_slot < (physmap_idx * 2); map_slot += 2) { 828281494Sandrew if (physmap[map_slot] == physmap[map_slot + 1]) 829281494Sandrew continue; 830281494Sandrew 831281494Sandrew /* Have we used the current range? */ 832281494Sandrew if (physmap[map_slot + 1] <= pa) 833281494Sandrew continue; 834281494Sandrew 835281494Sandrew /* Do we need to split the entry? */ 836281494Sandrew if (physmap[map_slot] < pa) { 837281494Sandrew phys_avail[avail_slot] = pa; 838281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 839281494Sandrew } else { 840281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 841281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 842281494Sandrew } 843281494Sandrew physmem += (phys_avail[avail_slot + 1] - 844281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 845281494Sandrew 846281494Sandrew avail_slot += 2; 847281494Sandrew } 848281494Sandrew phys_avail[avail_slot] = 0; 849281494Sandrew phys_avail[avail_slot + 1] = 0; 850281494Sandrew 851281494Sandrew /* 852281494Sandrew * Maxmem isn't the "maximum memory", it's one larger than the 853281494Sandrew * highest page of the physical address space. It should be 854281494Sandrew * called something like "Maxphyspage". 855281494Sandrew */ 856281494Sandrew Maxmem = atop(phys_avail[avail_slot - 1]); 857281494Sandrew 858281494Sandrew cpu_tlb_flushID(); 859281494Sandrew} 860281494Sandrew 861281494Sandrew/* 862281494Sandrew * Initialize a vm_page's machine-dependent fields. 863281494Sandrew */ 864281494Sandrewvoid 865281494Sandrewpmap_page_init(vm_page_t m) 866281494Sandrew{ 867281494Sandrew 868281494Sandrew TAILQ_INIT(&m->md.pv_list); 869281494Sandrew m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 870281494Sandrew} 871281494Sandrew 872281494Sandrew/* 873281494Sandrew * Initialize the pmap module. 874281494Sandrew * Called by vm_init, to initialize any structures that the pmap 875281494Sandrew * system needs to map virtual memory. 876281494Sandrew */ 877281494Sandrewvoid 878281494Sandrewpmap_init(void) 879281494Sandrew{ 880305882Sandrew vm_size_t s; 881305882Sandrew int i, pv_npg; 882281494Sandrew 883281494Sandrew /* 884305882Sandrew * Are large page mappings enabled? 885305882Sandrew */ 886305882Sandrew TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); 887305882Sandrew 888305882Sandrew /* 889281494Sandrew * Initialize the pv chunk list mutex. 890281494Sandrew */ 891281494Sandrew mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 892281494Sandrew 893281494Sandrew /* 894281494Sandrew * Initialize the pool of pv list locks. 895281494Sandrew */ 896281494Sandrew for (i = 0; i < NPV_LIST_LOCKS; i++) 897281494Sandrew rw_init(&pv_list_locks[i], "pmap pv list"); 898305882Sandrew 899305882Sandrew /* 900305882Sandrew * Calculate the size of the pv head table for superpages. 901305882Sandrew */ 902305882Sandrew pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); 903305882Sandrew 904305882Sandrew /* 905305882Sandrew * Allocate memory for the pv head table for superpages. 906305882Sandrew */ 907305882Sandrew s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 908305882Sandrew s = round_page(s); 909305882Sandrew pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, 910305882Sandrew M_WAITOK | M_ZERO); 911305882Sandrew for (i = 0; i < pv_npg; i++) 912305882Sandrew TAILQ_INIT(&pv_table[i].pv_list); 913305882Sandrew TAILQ_INIT(&pv_dummy.pv_list); 914281494Sandrew} 915281494Sandrew 916305882Sandrewstatic SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, 917305882Sandrew "2MB page mapping counters"); 918305882Sandrew 919305882Sandrewstatic u_long pmap_l2_demotions; 920305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, 921305882Sandrew &pmap_l2_demotions, 0, "2MB page demotions"); 922305882Sandrew 923305882Sandrewstatic u_long pmap_l2_p_failures; 924305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, 925305882Sandrew &pmap_l2_p_failures, 0, "2MB page promotion failures"); 926305882Sandrew 927305882Sandrewstatic u_long pmap_l2_promotions; 928305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, 929305882Sandrew &pmap_l2_promotions, 0, "2MB page promotions"); 930305882Sandrew 931281494Sandrew/* 932305540Sandrew * Invalidate a single TLB entry. 933281494Sandrew */ 934281494SandrewPMAP_INLINE void 935281494Sandrewpmap_invalidate_page(pmap_t pmap, vm_offset_t va) 936281494Sandrew{ 937281494Sandrew 938281494Sandrew sched_pin(); 939281494Sandrew __asm __volatile( 940305540Sandrew "dsb ishst \n" 941281494Sandrew "tlbi vaae1is, %0 \n" 942305540Sandrew "dsb ish \n" 943281494Sandrew "isb \n" 944281494Sandrew : : "r"(va >> PAGE_SHIFT)); 945281494Sandrew sched_unpin(); 946281494Sandrew} 947281494Sandrew 948281494SandrewPMAP_INLINE void 949281494Sandrewpmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 950281494Sandrew{ 951281494Sandrew vm_offset_t addr; 952281494Sandrew 953281494Sandrew sched_pin(); 954305540Sandrew dsb(ishst); 955296828Swma for (addr = sva; addr < eva; addr += PAGE_SIZE) { 956281494Sandrew __asm __volatile( 957296828Swma "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); 958281494Sandrew } 959281494Sandrew __asm __volatile( 960305540Sandrew "dsb ish \n" 961281494Sandrew "isb \n"); 962281494Sandrew sched_unpin(); 963281494Sandrew} 964281494Sandrew 965281494SandrewPMAP_INLINE void 966281494Sandrewpmap_invalidate_all(pmap_t pmap) 967281494Sandrew{ 968281494Sandrew 969281494Sandrew sched_pin(); 970281494Sandrew __asm __volatile( 971305540Sandrew "dsb ishst \n" 972281494Sandrew "tlbi vmalle1is \n" 973305540Sandrew "dsb ish \n" 974281494Sandrew "isb \n"); 975281494Sandrew sched_unpin(); 976281494Sandrew} 977281494Sandrew 978281494Sandrew/* 979281494Sandrew * Routine: pmap_extract 980281494Sandrew * Function: 981281494Sandrew * Extract the physical page address associated 982281494Sandrew * with the given map/virtual_address pair. 983281494Sandrew */ 984305531Sandrewvm_paddr_t 985281494Sandrewpmap_extract(pmap_t pmap, vm_offset_t va) 986281494Sandrew{ 987297446Sandrew pt_entry_t *pte, tpte; 988281494Sandrew vm_paddr_t pa; 989297446Sandrew int lvl; 990281494Sandrew 991281494Sandrew pa = 0; 992281494Sandrew PMAP_LOCK(pmap); 993281494Sandrew /* 994297446Sandrew * Find the block or page map for this virtual address. pmap_pte 995297446Sandrew * will return either a valid block/page entry, or NULL. 996281494Sandrew */ 997297446Sandrew pte = pmap_pte(pmap, va, &lvl); 998297446Sandrew if (pte != NULL) { 999297446Sandrew tpte = pmap_load(pte); 1000297446Sandrew pa = tpte & ~ATTR_MASK; 1001297446Sandrew switch(lvl) { 1002297446Sandrew case 1: 1003297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1004297446Sandrew ("pmap_extract: Invalid L1 pte found: %lx", 1005297446Sandrew tpte & ATTR_DESCR_MASK)); 1006297446Sandrew pa |= (va & L1_OFFSET); 1007297446Sandrew break; 1008297446Sandrew case 2: 1009297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1010297446Sandrew ("pmap_extract: Invalid L2 pte found: %lx", 1011297446Sandrew tpte & ATTR_DESCR_MASK)); 1012297446Sandrew pa |= (va & L2_OFFSET); 1013297446Sandrew break; 1014297446Sandrew case 3: 1015297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1016297446Sandrew ("pmap_extract: Invalid L3 pte found: %lx", 1017297446Sandrew tpte & ATTR_DESCR_MASK)); 1018297446Sandrew pa |= (va & L3_OFFSET); 1019297446Sandrew break; 1020297446Sandrew } 1021281494Sandrew } 1022281494Sandrew PMAP_UNLOCK(pmap); 1023281494Sandrew return (pa); 1024281494Sandrew} 1025281494Sandrew 1026281494Sandrew/* 1027281494Sandrew * Routine: pmap_extract_and_hold 1028281494Sandrew * Function: 1029281494Sandrew * Atomically extract and hold the physical page 1030281494Sandrew * with the given pmap and virtual address pair 1031281494Sandrew * if that mapping permits the given protection. 1032281494Sandrew */ 1033281494Sandrewvm_page_t 1034281494Sandrewpmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1035281494Sandrew{ 1036297446Sandrew pt_entry_t *pte, tpte; 1037305882Sandrew vm_offset_t off; 1038281494Sandrew vm_paddr_t pa; 1039281494Sandrew vm_page_t m; 1040297446Sandrew int lvl; 1041281494Sandrew 1042281494Sandrew pa = 0; 1043281494Sandrew m = NULL; 1044281494Sandrew PMAP_LOCK(pmap); 1045281494Sandrewretry: 1046297446Sandrew pte = pmap_pte(pmap, va, &lvl); 1047297446Sandrew if (pte != NULL) { 1048297446Sandrew tpte = pmap_load(pte); 1049297446Sandrew 1050297446Sandrew KASSERT(lvl > 0 && lvl <= 3, 1051297446Sandrew ("pmap_extract_and_hold: Invalid level %d", lvl)); 1052297446Sandrew CTASSERT(L1_BLOCK == L2_BLOCK); 1053297446Sandrew KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || 1054297446Sandrew (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), 1055297446Sandrew ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, 1056297446Sandrew tpte & ATTR_DESCR_MASK)); 1057297446Sandrew if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 1058281494Sandrew ((prot & VM_PROT_WRITE) == 0)) { 1059305882Sandrew switch(lvl) { 1060305882Sandrew case 1: 1061305882Sandrew off = va & L1_OFFSET; 1062305882Sandrew break; 1063305882Sandrew case 2: 1064305882Sandrew off = va & L2_OFFSET; 1065305882Sandrew break; 1066305882Sandrew case 3: 1067305882Sandrew default: 1068305882Sandrew off = 0; 1069305882Sandrew } 1070305882Sandrew if (vm_page_pa_tryrelock(pmap, 1071305882Sandrew (tpte & ~ATTR_MASK) | off, &pa)) 1072281494Sandrew goto retry; 1073305882Sandrew m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); 1074281494Sandrew vm_page_hold(m); 1075281494Sandrew } 1076281494Sandrew } 1077281494Sandrew PA_UNLOCK_COND(pa); 1078281494Sandrew PMAP_UNLOCK(pmap); 1079281494Sandrew return (m); 1080281494Sandrew} 1081281494Sandrew 1082281494Sandrewvm_paddr_t 1083281494Sandrewpmap_kextract(vm_offset_t va) 1084281494Sandrew{ 1085297446Sandrew pt_entry_t *pte, tpte; 1086281494Sandrew vm_paddr_t pa; 1087297446Sandrew int lvl; 1088281494Sandrew 1089281494Sandrew if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 1090281494Sandrew pa = DMAP_TO_PHYS(va); 1091281494Sandrew } else { 1092297446Sandrew pa = 0; 1093297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1094297446Sandrew if (pte != NULL) { 1095297446Sandrew tpte = pmap_load(pte); 1096297446Sandrew pa = tpte & ~ATTR_MASK; 1097297446Sandrew switch(lvl) { 1098297446Sandrew case 1: 1099297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1100297446Sandrew ("pmap_kextract: Invalid L1 pte found: %lx", 1101297446Sandrew tpte & ATTR_DESCR_MASK)); 1102297446Sandrew pa |= (va & L1_OFFSET); 1103297446Sandrew break; 1104297446Sandrew case 2: 1105297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1106297446Sandrew ("pmap_kextract: Invalid L2 pte found: %lx", 1107297446Sandrew tpte & ATTR_DESCR_MASK)); 1108297446Sandrew pa |= (va & L2_OFFSET); 1109297446Sandrew break; 1110297446Sandrew case 3: 1111297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1112297446Sandrew ("pmap_kextract: Invalid L3 pte found: %lx", 1113297446Sandrew tpte & ATTR_DESCR_MASK)); 1114297446Sandrew pa |= (va & L3_OFFSET); 1115297446Sandrew break; 1116297446Sandrew } 1117297446Sandrew } 1118281494Sandrew } 1119281494Sandrew return (pa); 1120281494Sandrew} 1121281494Sandrew 1122281494Sandrew/*************************************************** 1123281494Sandrew * Low level mapping routines..... 1124281494Sandrew ***************************************************/ 1125281494Sandrew 1126305542Sandrewstatic void 1127305542Sandrewpmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) 1128281494Sandrew{ 1129297446Sandrew pd_entry_t *pde; 1130319203Sandrew pt_entry_t *pte, attr; 1131285212Sandrew vm_offset_t va; 1132297446Sandrew int lvl; 1133281494Sandrew 1134281494Sandrew KASSERT((pa & L3_OFFSET) == 0, 1135305542Sandrew ("pmap_kenter: Invalid physical address")); 1136285212Sandrew KASSERT((sva & L3_OFFSET) == 0, 1137305542Sandrew ("pmap_kenter: Invalid virtual address")); 1138281494Sandrew KASSERT((size & PAGE_MASK) == 0, 1139305542Sandrew ("pmap_kenter: Mapping is not page-sized")); 1140281494Sandrew 1141319203Sandrew attr = ATTR_DEFAULT | ATTR_IDX(mode) | L3_PAGE; 1142319203Sandrew if (mode == DEVICE_MEMORY) 1143319203Sandrew attr |= ATTR_XN; 1144319203Sandrew 1145285212Sandrew va = sva; 1146281494Sandrew while (size != 0) { 1147297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 1148297446Sandrew KASSERT(pde != NULL, 1149305542Sandrew ("pmap_kenter: Invalid page entry, va: 0x%lx", va)); 1150305542Sandrew KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl)); 1151297446Sandrew 1152297446Sandrew pte = pmap_l2_to_l3(pde, va); 1153319203Sandrew pmap_load_store(pte, (pa & ~L3_OFFSET) | attr); 1154297446Sandrew PTE_SYNC(pte); 1155281494Sandrew 1156281494Sandrew va += PAGE_SIZE; 1157281494Sandrew pa += PAGE_SIZE; 1158281494Sandrew size -= PAGE_SIZE; 1159281494Sandrew } 1160285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1161281494Sandrew} 1162281494Sandrew 1163305542Sandrewvoid 1164305542Sandrewpmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 1165305542Sandrew{ 1166305542Sandrew 1167305542Sandrew pmap_kenter(sva, size, pa, DEVICE_MEMORY); 1168305542Sandrew} 1169305542Sandrew 1170281494Sandrew/* 1171281494Sandrew * Remove a page from the kernel pagetables. 1172281494Sandrew */ 1173281494SandrewPMAP_INLINE void 1174281494Sandrewpmap_kremove(vm_offset_t va) 1175281494Sandrew{ 1176297446Sandrew pt_entry_t *pte; 1177297446Sandrew int lvl; 1178281494Sandrew 1179297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1180297446Sandrew KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); 1181297446Sandrew KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); 1182281494Sandrew 1183297446Sandrew if (pmap_l3_valid_cacheable(pmap_load(pte))) 1184281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 1185297446Sandrew pmap_load_clear(pte); 1186297446Sandrew PTE_SYNC(pte); 1187285212Sandrew pmap_invalidate_page(kernel_pmap, va); 1188281494Sandrew} 1189281494Sandrew 1190281494Sandrewvoid 1191285212Sandrewpmap_kremove_device(vm_offset_t sva, vm_size_t size) 1192281494Sandrew{ 1193297446Sandrew pt_entry_t *pte; 1194285212Sandrew vm_offset_t va; 1195297446Sandrew int lvl; 1196281494Sandrew 1197285212Sandrew KASSERT((sva & L3_OFFSET) == 0, 1198281494Sandrew ("pmap_kremove_device: Invalid virtual address")); 1199281494Sandrew KASSERT((size & PAGE_MASK) == 0, 1200281494Sandrew ("pmap_kremove_device: Mapping is not page-sized")); 1201281494Sandrew 1202285212Sandrew va = sva; 1203281494Sandrew while (size != 0) { 1204297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1205297446Sandrew KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); 1206297446Sandrew KASSERT(lvl == 3, 1207297446Sandrew ("Invalid device pagetable level: %d != 3", lvl)); 1208297446Sandrew pmap_load_clear(pte); 1209297446Sandrew PTE_SYNC(pte); 1210281494Sandrew 1211281494Sandrew va += PAGE_SIZE; 1212281494Sandrew size -= PAGE_SIZE; 1213281494Sandrew } 1214285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1215281494Sandrew} 1216281494Sandrew 1217281494Sandrew/* 1218281494Sandrew * Used to map a range of physical addresses into kernel 1219281494Sandrew * virtual address space. 1220281494Sandrew * 1221281494Sandrew * The value passed in '*virt' is a suggested virtual address for 1222281494Sandrew * the mapping. Architectures which can support a direct-mapped 1223281494Sandrew * physical to virtual region can return the appropriate address 1224281494Sandrew * within that region, leaving '*virt' unchanged. Other 1225281494Sandrew * architectures should map the pages starting at '*virt' and 1226281494Sandrew * update '*virt' with the first usable address after the mapped 1227281494Sandrew * region. 1228281494Sandrew */ 1229281494Sandrewvm_offset_t 1230281494Sandrewpmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1231281494Sandrew{ 1232281494Sandrew return PHYS_TO_DMAP(start); 1233281494Sandrew} 1234281494Sandrew 1235281494Sandrew 1236281494Sandrew/* 1237281494Sandrew * Add a list of wired pages to the kva 1238281494Sandrew * this routine is only used for temporary 1239281494Sandrew * kernel mappings that do not need to have 1240281494Sandrew * page modification or references recorded. 1241281494Sandrew * Note that old mappings are simply written 1242281494Sandrew * over. The page *must* be wired. 1243281494Sandrew * Note: SMP coherent. Uses a ranged shootdown IPI. 1244281494Sandrew */ 1245281494Sandrewvoid 1246281494Sandrewpmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1247281494Sandrew{ 1248297446Sandrew pd_entry_t *pde; 1249297446Sandrew pt_entry_t *pte, pa; 1250281494Sandrew vm_offset_t va; 1251281494Sandrew vm_page_t m; 1252297446Sandrew int i, lvl; 1253281494Sandrew 1254281494Sandrew va = sva; 1255281494Sandrew for (i = 0; i < count; i++) { 1256297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 1257297446Sandrew KASSERT(pde != NULL, 1258297446Sandrew ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); 1259297446Sandrew KASSERT(lvl == 2, 1260297446Sandrew ("pmap_qenter: Invalid level %d", lvl)); 1261297446Sandrew 1262281494Sandrew m = ma[i]; 1263285537Sandrew pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | 1264285537Sandrew ATTR_IDX(m->md.pv_memattr) | L3_PAGE; 1265319203Sandrew if (m->md.pv_memattr == DEVICE_MEMORY) 1266319203Sandrew pa |= ATTR_XN; 1267297446Sandrew pte = pmap_l2_to_l3(pde, va); 1268297446Sandrew pmap_load_store(pte, pa); 1269297446Sandrew PTE_SYNC(pte); 1270281494Sandrew 1271281494Sandrew va += L3_SIZE; 1272281494Sandrew } 1273285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1274281494Sandrew} 1275281494Sandrew 1276281494Sandrew/* 1277281494Sandrew * This routine tears out page mappings from the 1278281494Sandrew * kernel -- it is meant only for temporary mappings. 1279281494Sandrew */ 1280281494Sandrewvoid 1281281494Sandrewpmap_qremove(vm_offset_t sva, int count) 1282281494Sandrew{ 1283297446Sandrew pt_entry_t *pte; 1284281494Sandrew vm_offset_t va; 1285297446Sandrew int lvl; 1286281494Sandrew 1287285212Sandrew KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1288285212Sandrew 1289281494Sandrew va = sva; 1290281494Sandrew while (count-- > 0) { 1291297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1292297446Sandrew KASSERT(lvl == 3, 1293297446Sandrew ("Invalid device pagetable level: %d != 3", lvl)); 1294297446Sandrew if (pte != NULL) { 1295297446Sandrew if (pmap_l3_valid_cacheable(pmap_load(pte))) 1296297446Sandrew cpu_dcache_wb_range(va, L3_SIZE); 1297297446Sandrew pmap_load_clear(pte); 1298297446Sandrew PTE_SYNC(pte); 1299297446Sandrew } 1300285212Sandrew 1301281494Sandrew va += PAGE_SIZE; 1302281494Sandrew } 1303281494Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1304281494Sandrew} 1305281494Sandrew 1306281494Sandrew/*************************************************** 1307281494Sandrew * Page table page management routines..... 1308281494Sandrew ***************************************************/ 1309281494Sandrewstatic __inline void 1310281494Sandrewpmap_free_zero_pages(struct spglist *free) 1311281494Sandrew{ 1312281494Sandrew vm_page_t m; 1313281494Sandrew 1314281494Sandrew while ((m = SLIST_FIRST(free)) != NULL) { 1315281494Sandrew SLIST_REMOVE_HEAD(free, plinks.s.ss); 1316281494Sandrew /* Preserve the page's PG_ZERO setting. */ 1317281494Sandrew vm_page_free_toq(m); 1318281494Sandrew } 1319281494Sandrew} 1320281494Sandrew 1321281494Sandrew/* 1322281494Sandrew * Schedule the specified unused page table page to be freed. Specifically, 1323281494Sandrew * add the page to the specified list of pages that will be released to the 1324281494Sandrew * physical memory manager after the TLB has been updated. 1325281494Sandrew */ 1326281494Sandrewstatic __inline void 1327281494Sandrewpmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1328281494Sandrew boolean_t set_PG_ZERO) 1329281494Sandrew{ 1330281494Sandrew 1331281494Sandrew if (set_PG_ZERO) 1332281494Sandrew m->flags |= PG_ZERO; 1333281494Sandrew else 1334281494Sandrew m->flags &= ~PG_ZERO; 1335281494Sandrew SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1336281494Sandrew} 1337305531Sandrew 1338281494Sandrew/* 1339281494Sandrew * Decrements a page table page's wire count, which is used to record the 1340281494Sandrew * number of valid page table entries within the page. If the wire count 1341281494Sandrew * drops to zero, then the page table page is unmapped. Returns TRUE if the 1342281494Sandrew * page table page was unmapped and FALSE otherwise. 1343281494Sandrew */ 1344281494Sandrewstatic inline boolean_t 1345281494Sandrewpmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1346281494Sandrew{ 1347281494Sandrew 1348281494Sandrew --m->wire_count; 1349281494Sandrew if (m->wire_count == 0) { 1350281494Sandrew _pmap_unwire_l3(pmap, va, m, free); 1351281494Sandrew return (TRUE); 1352281494Sandrew } else 1353281494Sandrew return (FALSE); 1354281494Sandrew} 1355281494Sandrew 1356281494Sandrewstatic void 1357281494Sandrew_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1358281494Sandrew{ 1359281494Sandrew 1360281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1361281494Sandrew /* 1362281494Sandrew * unmap the page table page 1363281494Sandrew */ 1364297446Sandrew if (m->pindex >= (NUL2E + NUL1E)) { 1365297446Sandrew /* l1 page */ 1366297446Sandrew pd_entry_t *l0; 1367297446Sandrew 1368297446Sandrew l0 = pmap_l0(pmap, va); 1369297446Sandrew pmap_load_clear(l0); 1370297446Sandrew PTE_SYNC(l0); 1371297446Sandrew } else if (m->pindex >= NUL2E) { 1372297446Sandrew /* l2 page */ 1373281494Sandrew pd_entry_t *l1; 1374297446Sandrew 1375281494Sandrew l1 = pmap_l1(pmap, va); 1376281494Sandrew pmap_load_clear(l1); 1377281494Sandrew PTE_SYNC(l1); 1378281494Sandrew } else { 1379297446Sandrew /* l3 page */ 1380281494Sandrew pd_entry_t *l2; 1381297446Sandrew 1382281494Sandrew l2 = pmap_l2(pmap, va); 1383281494Sandrew pmap_load_clear(l2); 1384281494Sandrew PTE_SYNC(l2); 1385281494Sandrew } 1386281494Sandrew pmap_resident_count_dec(pmap, 1); 1387297446Sandrew if (m->pindex < NUL2E) { 1388297446Sandrew /* We just released an l3, unhold the matching l2 */ 1389297446Sandrew pd_entry_t *l1, tl1; 1390297446Sandrew vm_page_t l2pg; 1391281494Sandrew 1392297446Sandrew l1 = pmap_l1(pmap, va); 1393297446Sandrew tl1 = pmap_load(l1); 1394297446Sandrew l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1395297446Sandrew pmap_unwire_l3(pmap, va, l2pg, free); 1396297446Sandrew } else if (m->pindex < (NUL2E + NUL1E)) { 1397297446Sandrew /* We just released an l2, unhold the matching l1 */ 1398297446Sandrew pd_entry_t *l0, tl0; 1399297446Sandrew vm_page_t l1pg; 1400297446Sandrew 1401297446Sandrew l0 = pmap_l0(pmap, va); 1402297446Sandrew tl0 = pmap_load(l0); 1403297446Sandrew l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1404297446Sandrew pmap_unwire_l3(pmap, va, l1pg, free); 1405281494Sandrew } 1406285212Sandrew pmap_invalidate_page(pmap, va); 1407281494Sandrew 1408281494Sandrew /* 1409281494Sandrew * This is a release store so that the ordinary store unmapping 1410281494Sandrew * the page table page is globally performed before TLB shoot- 1411281494Sandrew * down is begun. 1412281494Sandrew */ 1413281494Sandrew atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1414281494Sandrew 1415305531Sandrew /* 1416281494Sandrew * Put page on a list so that it is released after 1417281494Sandrew * *ALL* TLB shootdown is done 1418281494Sandrew */ 1419281494Sandrew pmap_add_delayed_free_list(m, free, TRUE); 1420281494Sandrew} 1421281494Sandrew 1422281494Sandrew/* 1423281494Sandrew * After removing an l3 entry, this routine is used to 1424281494Sandrew * conditionally free the page, and manage the hold/wire counts. 1425281494Sandrew */ 1426281494Sandrewstatic int 1427281494Sandrewpmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1428281494Sandrew struct spglist *free) 1429281494Sandrew{ 1430281494Sandrew vm_page_t mpte; 1431281494Sandrew 1432281494Sandrew if (va >= VM_MAXUSER_ADDRESS) 1433281494Sandrew return (0); 1434281494Sandrew KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1435281494Sandrew mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1436281494Sandrew return (pmap_unwire_l3(pmap, va, mpte, free)); 1437281494Sandrew} 1438281494Sandrew 1439281494Sandrewvoid 1440281494Sandrewpmap_pinit0(pmap_t pmap) 1441281494Sandrew{ 1442281494Sandrew 1443281494Sandrew PMAP_LOCK_INIT(pmap); 1444281494Sandrew bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1445297446Sandrew pmap->pm_l0 = kernel_pmap->pm_l0; 1446305882Sandrew pmap->pm_root.rt_root = 0; 1447281494Sandrew} 1448281494Sandrew 1449281494Sandrewint 1450281494Sandrewpmap_pinit(pmap_t pmap) 1451281494Sandrew{ 1452297446Sandrew vm_paddr_t l0phys; 1453297446Sandrew vm_page_t l0pt; 1454281494Sandrew 1455281494Sandrew /* 1456297446Sandrew * allocate the l0 page 1457281494Sandrew */ 1458297446Sandrew while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1459281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1460281494Sandrew VM_WAIT; 1461281494Sandrew 1462297446Sandrew l0phys = VM_PAGE_TO_PHYS(l0pt); 1463297446Sandrew pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); 1464281494Sandrew 1465297446Sandrew if ((l0pt->flags & PG_ZERO) == 0) 1466297446Sandrew pagezero(pmap->pm_l0); 1467281494Sandrew 1468305882Sandrew pmap->pm_root.rt_root = 0; 1469281494Sandrew bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1470281494Sandrew 1471281494Sandrew return (1); 1472281494Sandrew} 1473281494Sandrew 1474281494Sandrew/* 1475281494Sandrew * This routine is called if the desired page table page does not exist. 1476281494Sandrew * 1477281494Sandrew * If page table page allocation fails, this routine may sleep before 1478281494Sandrew * returning NULL. It sleeps only if a lock pointer was given. 1479281494Sandrew * 1480281494Sandrew * Note: If a page allocation fails at page table level two or three, 1481281494Sandrew * one or two pages may be held during the wait, only to be released 1482281494Sandrew * afterwards. This conservative approach is easily argued to avoid 1483281494Sandrew * race conditions. 1484281494Sandrew */ 1485281494Sandrewstatic vm_page_t 1486281494Sandrew_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1487281494Sandrew{ 1488297446Sandrew vm_page_t m, l1pg, l2pg; 1489281494Sandrew 1490281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1491281494Sandrew 1492281494Sandrew /* 1493281494Sandrew * Allocate a page table page. 1494281494Sandrew */ 1495281494Sandrew if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1496281494Sandrew VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1497281494Sandrew if (lockp != NULL) { 1498281494Sandrew RELEASE_PV_LIST_LOCK(lockp); 1499281494Sandrew PMAP_UNLOCK(pmap); 1500281494Sandrew VM_WAIT; 1501281494Sandrew PMAP_LOCK(pmap); 1502281494Sandrew } 1503281494Sandrew 1504281494Sandrew /* 1505281494Sandrew * Indicate the need to retry. While waiting, the page table 1506281494Sandrew * page may have been allocated. 1507281494Sandrew */ 1508281494Sandrew return (NULL); 1509281494Sandrew } 1510281494Sandrew if ((m->flags & PG_ZERO) == 0) 1511281494Sandrew pmap_zero_page(m); 1512281494Sandrew 1513281494Sandrew /* 1514281494Sandrew * Map the pagetable page into the process address space, if 1515281494Sandrew * it isn't already there. 1516281494Sandrew */ 1517281494Sandrew 1518297446Sandrew if (ptepindex >= (NUL2E + NUL1E)) { 1519297446Sandrew pd_entry_t *l0; 1520297446Sandrew vm_pindex_t l0index; 1521281494Sandrew 1522297446Sandrew l0index = ptepindex - (NUL2E + NUL1E); 1523297446Sandrew l0 = &pmap->pm_l0[l0index]; 1524297446Sandrew pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); 1525297446Sandrew PTE_SYNC(l0); 1526297446Sandrew } else if (ptepindex >= NUL2E) { 1527297446Sandrew vm_pindex_t l0index, l1index; 1528297446Sandrew pd_entry_t *l0, *l1; 1529297446Sandrew pd_entry_t tl0; 1530297446Sandrew 1531297446Sandrew l1index = ptepindex - NUL2E; 1532297446Sandrew l0index = l1index >> L0_ENTRIES_SHIFT; 1533297446Sandrew 1534297446Sandrew l0 = &pmap->pm_l0[l0index]; 1535297446Sandrew tl0 = pmap_load(l0); 1536297446Sandrew if (tl0 == 0) { 1537297446Sandrew /* recurse for allocating page dir */ 1538297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, 1539297446Sandrew lockp) == NULL) { 1540297446Sandrew --m->wire_count; 1541297446Sandrew /* XXX: release mem barrier? */ 1542297446Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1543297446Sandrew vm_page_free_zero(m); 1544297446Sandrew return (NULL); 1545297446Sandrew } 1546297446Sandrew } else { 1547297446Sandrew l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1548297446Sandrew l1pg->wire_count++; 1549297446Sandrew } 1550297446Sandrew 1551297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 1552297446Sandrew l1 = &l1[ptepindex & Ln_ADDR_MASK]; 1553281494Sandrew pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1554281494Sandrew PTE_SYNC(l1); 1555281494Sandrew } else { 1556297446Sandrew vm_pindex_t l0index, l1index; 1557297446Sandrew pd_entry_t *l0, *l1, *l2; 1558297446Sandrew pd_entry_t tl0, tl1; 1559281494Sandrew 1560297446Sandrew l1index = ptepindex >> Ln_ENTRIES_SHIFT; 1561297446Sandrew l0index = l1index >> L0_ENTRIES_SHIFT; 1562297446Sandrew 1563297446Sandrew l0 = &pmap->pm_l0[l0index]; 1564297446Sandrew tl0 = pmap_load(l0); 1565297446Sandrew if (tl0 == 0) { 1566281494Sandrew /* recurse for allocating page dir */ 1567297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1568281494Sandrew lockp) == NULL) { 1569281494Sandrew --m->wire_count; 1570281494Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1571281494Sandrew vm_page_free_zero(m); 1572281494Sandrew return (NULL); 1573281494Sandrew } 1574297446Sandrew tl0 = pmap_load(l0); 1575297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1576297446Sandrew l1 = &l1[l1index & Ln_ADDR_MASK]; 1577281494Sandrew } else { 1578297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1579297446Sandrew l1 = &l1[l1index & Ln_ADDR_MASK]; 1580297446Sandrew tl1 = pmap_load(l1); 1581297446Sandrew if (tl1 == 0) { 1582297446Sandrew /* recurse for allocating page dir */ 1583297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1584297446Sandrew lockp) == NULL) { 1585297446Sandrew --m->wire_count; 1586297446Sandrew /* XXX: release mem barrier? */ 1587297446Sandrew atomic_subtract_int( 1588297446Sandrew &vm_cnt.v_wire_count, 1); 1589297446Sandrew vm_page_free_zero(m); 1590297446Sandrew return (NULL); 1591297446Sandrew } 1592297446Sandrew } else { 1593297446Sandrew l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1594297446Sandrew l2pg->wire_count++; 1595297446Sandrew } 1596281494Sandrew } 1597281494Sandrew 1598288445Sandrew l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 1599281494Sandrew l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1600285537Sandrew pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); 1601281494Sandrew PTE_SYNC(l2); 1602281494Sandrew } 1603281494Sandrew 1604281494Sandrew pmap_resident_count_inc(pmap, 1); 1605281494Sandrew 1606281494Sandrew return (m); 1607281494Sandrew} 1608281494Sandrew 1609281494Sandrewstatic vm_page_t 1610281494Sandrewpmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1611281494Sandrew{ 1612281494Sandrew vm_pindex_t ptepindex; 1613297446Sandrew pd_entry_t *pde, tpde; 1614305882Sandrew#ifdef INVARIANTS 1615305882Sandrew pt_entry_t *pte; 1616305882Sandrew#endif 1617281494Sandrew vm_page_t m; 1618297446Sandrew int lvl; 1619281494Sandrew 1620281494Sandrew /* 1621281494Sandrew * Calculate pagetable page index 1622281494Sandrew */ 1623281494Sandrew ptepindex = pmap_l2_pindex(va); 1624281494Sandrewretry: 1625281494Sandrew /* 1626281494Sandrew * Get the page directory entry 1627281494Sandrew */ 1628297446Sandrew pde = pmap_pde(pmap, va, &lvl); 1629281494Sandrew 1630281494Sandrew /* 1631297446Sandrew * If the page table page is mapped, we just increment the hold count, 1632297446Sandrew * and activate it. If we get a level 2 pde it will point to a level 3 1633297446Sandrew * table. 1634281494Sandrew */ 1635305882Sandrew switch (lvl) { 1636305882Sandrew case -1: 1637305882Sandrew break; 1638305882Sandrew case 0: 1639305882Sandrew#ifdef INVARIANTS 1640305882Sandrew pte = pmap_l0_to_l1(pde, va); 1641305882Sandrew KASSERT(pmap_load(pte) == 0, 1642305882Sandrew ("pmap_alloc_l3: TODO: l0 superpages")); 1643305882Sandrew#endif 1644305882Sandrew break; 1645305882Sandrew case 1: 1646305882Sandrew#ifdef INVARIANTS 1647305882Sandrew pte = pmap_l1_to_l2(pde, va); 1648305882Sandrew KASSERT(pmap_load(pte) == 0, 1649305882Sandrew ("pmap_alloc_l3: TODO: l1 superpages")); 1650305882Sandrew#endif 1651305882Sandrew break; 1652305882Sandrew case 2: 1653297446Sandrew tpde = pmap_load(pde); 1654297446Sandrew if (tpde != 0) { 1655297446Sandrew m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); 1656297446Sandrew m->wire_count++; 1657297446Sandrew return (m); 1658297446Sandrew } 1659305882Sandrew break; 1660305882Sandrew default: 1661305882Sandrew panic("pmap_alloc_l3: Invalid level %d", lvl); 1662281494Sandrew } 1663297446Sandrew 1664297446Sandrew /* 1665297446Sandrew * Here if the pte page isn't mapped, or if it has been deallocated. 1666297446Sandrew */ 1667297446Sandrew m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1668297446Sandrew if (m == NULL && lockp != NULL) 1669297446Sandrew goto retry; 1670297446Sandrew 1671281494Sandrew return (m); 1672281494Sandrew} 1673281494Sandrew 1674281494Sandrew 1675281494Sandrew/*************************************************** 1676281494Sandrew * Pmap allocation/deallocation routines. 1677281494Sandrew ***************************************************/ 1678281494Sandrew 1679281494Sandrew/* 1680281494Sandrew * Release any resources held by the given physical map. 1681281494Sandrew * Called when a pmap initialized by pmap_pinit is being released. 1682281494Sandrew * Should only be called if the map contains no valid mappings. 1683281494Sandrew */ 1684281494Sandrewvoid 1685281494Sandrewpmap_release(pmap_t pmap) 1686281494Sandrew{ 1687281494Sandrew vm_page_t m; 1688281494Sandrew 1689281494Sandrew KASSERT(pmap->pm_stats.resident_count == 0, 1690281494Sandrew ("pmap_release: pmap resident count %ld != 0", 1691281494Sandrew pmap->pm_stats.resident_count)); 1692305882Sandrew KASSERT(vm_radix_is_empty(&pmap->pm_root), 1693305882Sandrew ("pmap_release: pmap has reserved page table page(s)")); 1694281494Sandrew 1695297446Sandrew m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); 1696281494Sandrew 1697281494Sandrew m->wire_count--; 1698281494Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1699281494Sandrew vm_page_free_zero(m); 1700281494Sandrew} 1701281494Sandrew 1702281494Sandrewstatic int 1703281494Sandrewkvm_size(SYSCTL_HANDLER_ARGS) 1704281494Sandrew{ 1705281494Sandrew unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1706281494Sandrew 1707281494Sandrew return sysctl_handle_long(oidp, &ksize, 0, req); 1708281494Sandrew} 1709305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1710281494Sandrew 0, 0, kvm_size, "LU", "Size of KVM"); 1711281494Sandrew 1712281494Sandrewstatic int 1713281494Sandrewkvm_free(SYSCTL_HANDLER_ARGS) 1714281494Sandrew{ 1715281494Sandrew unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1716281494Sandrew 1717281494Sandrew return sysctl_handle_long(oidp, &kfree, 0, req); 1718281494Sandrew} 1719305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1720281494Sandrew 0, 0, kvm_free, "LU", "Amount of KVM free"); 1721281494Sandrew 1722281494Sandrew/* 1723281494Sandrew * grow the number of kernel page table entries, if needed 1724281494Sandrew */ 1725281494Sandrewvoid 1726281494Sandrewpmap_growkernel(vm_offset_t addr) 1727281494Sandrew{ 1728281494Sandrew vm_paddr_t paddr; 1729281494Sandrew vm_page_t nkpg; 1730297446Sandrew pd_entry_t *l0, *l1, *l2; 1731281494Sandrew 1732281494Sandrew mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1733281494Sandrew 1734281494Sandrew addr = roundup2(addr, L2_SIZE); 1735281494Sandrew if (addr - 1 >= kernel_map->max_offset) 1736281494Sandrew addr = kernel_map->max_offset; 1737281494Sandrew while (kernel_vm_end < addr) { 1738297446Sandrew l0 = pmap_l0(kernel_pmap, kernel_vm_end); 1739297446Sandrew KASSERT(pmap_load(l0) != 0, 1740297446Sandrew ("pmap_growkernel: No level 0 kernel entry")); 1741297446Sandrew 1742297446Sandrew l1 = pmap_l0_to_l1(l0, kernel_vm_end); 1743285045Sandrew if (pmap_load(l1) == 0) { 1744281494Sandrew /* We need a new PDP entry */ 1745281494Sandrew nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1746281494Sandrew VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1747281494Sandrew VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1748281494Sandrew if (nkpg == NULL) 1749281494Sandrew panic("pmap_growkernel: no memory to grow kernel"); 1750281494Sandrew if ((nkpg->flags & PG_ZERO) == 0) 1751281494Sandrew pmap_zero_page(nkpg); 1752281494Sandrew paddr = VM_PAGE_TO_PHYS(nkpg); 1753281494Sandrew pmap_load_store(l1, paddr | L1_TABLE); 1754281494Sandrew PTE_SYNC(l1); 1755281494Sandrew continue; /* try again */ 1756281494Sandrew } 1757281494Sandrew l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1758285045Sandrew if ((pmap_load(l2) & ATTR_AF) != 0) { 1759281494Sandrew kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1760281494Sandrew if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1761281494Sandrew kernel_vm_end = kernel_map->max_offset; 1762305531Sandrew break; 1763281494Sandrew } 1764281494Sandrew continue; 1765281494Sandrew } 1766281494Sandrew 1767281494Sandrew nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1768281494Sandrew VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1769281494Sandrew VM_ALLOC_ZERO); 1770281494Sandrew if (nkpg == NULL) 1771281494Sandrew panic("pmap_growkernel: no memory to grow kernel"); 1772281494Sandrew if ((nkpg->flags & PG_ZERO) == 0) 1773281494Sandrew pmap_zero_page(nkpg); 1774281494Sandrew paddr = VM_PAGE_TO_PHYS(nkpg); 1775281494Sandrew pmap_load_store(l2, paddr | L2_TABLE); 1776281494Sandrew PTE_SYNC(l2); 1777285212Sandrew pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1778281494Sandrew 1779281494Sandrew kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1780281494Sandrew if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1781281494Sandrew kernel_vm_end = kernel_map->max_offset; 1782305531Sandrew break; 1783281494Sandrew } 1784281494Sandrew } 1785281494Sandrew} 1786281494Sandrew 1787281494Sandrew 1788281494Sandrew/*************************************************** 1789281494Sandrew * page management routines. 1790281494Sandrew ***************************************************/ 1791281494Sandrew 1792281494SandrewCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1793281494SandrewCTASSERT(_NPCM == 3); 1794281494SandrewCTASSERT(_NPCPV == 168); 1795281494Sandrew 1796281494Sandrewstatic __inline struct pv_chunk * 1797281494Sandrewpv_to_chunk(pv_entry_t pv) 1798281494Sandrew{ 1799281494Sandrew 1800281494Sandrew return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1801281494Sandrew} 1802281494Sandrew 1803281494Sandrew#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1804281494Sandrew 1805281494Sandrew#define PC_FREE0 0xfffffffffffffffful 1806281494Sandrew#define PC_FREE1 0xfffffffffffffffful 1807281494Sandrew#define PC_FREE2 0x000000fffffffffful 1808281494Sandrew 1809281494Sandrewstatic const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1810281494Sandrew 1811281494Sandrew#if 0 1812281494Sandrew#ifdef PV_STATS 1813281494Sandrewstatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1814281494Sandrew 1815281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1816281494Sandrew "Current number of pv entry chunks"); 1817281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1818281494Sandrew "Current number of pv entry chunks allocated"); 1819281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1820281494Sandrew "Current number of pv entry chunks frees"); 1821281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1822281494Sandrew "Number of times tried to get a chunk page but failed."); 1823281494Sandrew 1824281494Sandrewstatic long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1825281494Sandrewstatic int pv_entry_spare; 1826281494Sandrew 1827281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1828281494Sandrew "Current number of pv entry frees"); 1829281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1830281494Sandrew "Current number of pv entry allocs"); 1831281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1832281494Sandrew "Current number of pv entries"); 1833281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1834281494Sandrew "Current number of spare pv entries"); 1835281494Sandrew#endif 1836281494Sandrew#endif /* 0 */ 1837281494Sandrew 1838281494Sandrew/* 1839281494Sandrew * We are in a serious low memory condition. Resort to 1840281494Sandrew * drastic measures to free some pages so we can allocate 1841281494Sandrew * another pv entry chunk. 1842281494Sandrew * 1843281494Sandrew * Returns NULL if PV entries were reclaimed from the specified pmap. 1844281494Sandrew * 1845281494Sandrew * We do not, however, unmap 2mpages because subsequent accesses will 1846281494Sandrew * allocate per-page pv entries until repromotion occurs, thereby 1847281494Sandrew * exacerbating the shortage of free pv entries. 1848281494Sandrew */ 1849281494Sandrewstatic vm_page_t 1850281494Sandrewreclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1851281494Sandrew{ 1852319210Sandrew struct pch new_tail; 1853319210Sandrew struct pv_chunk *pc; 1854319210Sandrew struct md_page *pvh; 1855319210Sandrew pd_entry_t *pde; 1856319210Sandrew pmap_t pmap; 1857319210Sandrew pt_entry_t *pte, tpte; 1858319210Sandrew pv_entry_t pv; 1859319210Sandrew vm_offset_t va; 1860319210Sandrew vm_page_t m, m_pc; 1861319210Sandrew struct spglist free; 1862319210Sandrew uint64_t inuse; 1863319210Sandrew int bit, field, freed, lvl; 1864281494Sandrew 1865319210Sandrew PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1866319210Sandrew KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); 1867319210Sandrew pmap = NULL; 1868319210Sandrew m_pc = NULL; 1869319210Sandrew SLIST_INIT(&free); 1870319210Sandrew TAILQ_INIT(&new_tail); 1871319210Sandrew mtx_lock(&pv_chunks_mutex); 1872319210Sandrew while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) { 1873319210Sandrew TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1874319210Sandrew mtx_unlock(&pv_chunks_mutex); 1875319210Sandrew if (pmap != pc->pc_pmap) { 1876319210Sandrew if (pmap != NULL && pmap != locked_pmap) 1877319210Sandrew PMAP_UNLOCK(pmap); 1878319210Sandrew pmap = pc->pc_pmap; 1879319210Sandrew /* Avoid deadlock and lock recursion. */ 1880319210Sandrew if (pmap > locked_pmap) { 1881319210Sandrew RELEASE_PV_LIST_LOCK(lockp); 1882319210Sandrew PMAP_LOCK(pmap); 1883319210Sandrew } else if (pmap != locked_pmap && 1884319210Sandrew !PMAP_TRYLOCK(pmap)) { 1885319210Sandrew pmap = NULL; 1886319210Sandrew TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 1887319210Sandrew mtx_lock(&pv_chunks_mutex); 1888319210Sandrew continue; 1889319210Sandrew } 1890319210Sandrew } 1891319210Sandrew 1892319210Sandrew /* 1893319210Sandrew * Destroy every non-wired, 4 KB page mapping in the chunk. 1894319210Sandrew */ 1895319210Sandrew freed = 0; 1896319210Sandrew for (field = 0; field < _NPCM; field++) { 1897319210Sandrew for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 1898319210Sandrew inuse != 0; inuse &= ~(1UL << bit)) { 1899319210Sandrew bit = ffsl(inuse) - 1; 1900319210Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 1901319210Sandrew va = pv->pv_va; 1902319210Sandrew pde = pmap_pde(pmap, va, &lvl); 1903319210Sandrew if (lvl != 2) 1904319210Sandrew continue; 1905319210Sandrew pte = pmap_l2_to_l3(pde, va); 1906319210Sandrew tpte = pmap_load(pte); 1907319210Sandrew if ((tpte & ATTR_SW_WIRED) != 0) 1908319210Sandrew continue; 1909319210Sandrew tpte = pmap_load_clear(pte); 1910319210Sandrew PTE_SYNC(pte); 1911319210Sandrew pmap_invalidate_page(pmap, va); 1912319210Sandrew m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); 1913319210Sandrew if (pmap_page_dirty(tpte)) 1914319210Sandrew vm_page_dirty(m); 1915319210Sandrew if ((tpte & ATTR_AF) != 0) 1916319210Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 1917319210Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1918319210Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1919319210Sandrew m->md.pv_gen++; 1920319210Sandrew if (TAILQ_EMPTY(&m->md.pv_list) && 1921319210Sandrew (m->flags & PG_FICTITIOUS) == 0) { 1922319210Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 1923319210Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) { 1924319210Sandrew vm_page_aflag_clear(m, 1925319210Sandrew PGA_WRITEABLE); 1926319210Sandrew } 1927319210Sandrew } 1928319210Sandrew pc->pc_map[field] |= 1UL << bit; 1929319210Sandrew pmap_unuse_l3(pmap, va, pmap_load(pde), &free); 1930319210Sandrew freed++; 1931319210Sandrew } 1932319210Sandrew } 1933319210Sandrew if (freed == 0) { 1934319210Sandrew TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 1935319210Sandrew mtx_lock(&pv_chunks_mutex); 1936319210Sandrew continue; 1937319210Sandrew } 1938319210Sandrew /* Every freed mapping is for a 4 KB page. */ 1939319210Sandrew pmap_resident_count_dec(pmap, freed); 1940319210Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 1941319210Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 1942319210Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 1943319210Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1944319210Sandrew if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && 1945319210Sandrew pc->pc_map[2] == PC_FREE2) { 1946319210Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1947319210Sandrew PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1948319210Sandrew PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1949319210Sandrew /* Entire chunk is free; return it. */ 1950319210Sandrew m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1951319210Sandrew dump_drop_page(m_pc->phys_addr); 1952319210Sandrew mtx_lock(&pv_chunks_mutex); 1953319210Sandrew break; 1954319210Sandrew } 1955319210Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1956319210Sandrew TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 1957319210Sandrew mtx_lock(&pv_chunks_mutex); 1958319210Sandrew /* One freed pv entry in locked_pmap is sufficient. */ 1959319210Sandrew if (pmap == locked_pmap) 1960319210Sandrew break; 1961319210Sandrew } 1962319210Sandrew TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); 1963319210Sandrew mtx_unlock(&pv_chunks_mutex); 1964319210Sandrew if (pmap != NULL && pmap != locked_pmap) 1965319210Sandrew PMAP_UNLOCK(pmap); 1966319210Sandrew if (m_pc == NULL && !SLIST_EMPTY(&free)) { 1967319210Sandrew m_pc = SLIST_FIRST(&free); 1968319210Sandrew SLIST_REMOVE_HEAD(&free, plinks.s.ss); 1969319210Sandrew /* Recycle a freed page table page. */ 1970319210Sandrew m_pc->wire_count = 1; 1971319210Sandrew atomic_add_int(&vm_cnt.v_wire_count, 1); 1972319210Sandrew } 1973319210Sandrew pmap_free_zero_pages(&free); 1974319210Sandrew return (m_pc); 1975281494Sandrew} 1976281494Sandrew 1977281494Sandrew/* 1978281494Sandrew * free the pv_entry back to the free list 1979281494Sandrew */ 1980281494Sandrewstatic void 1981281494Sandrewfree_pv_entry(pmap_t pmap, pv_entry_t pv) 1982281494Sandrew{ 1983281494Sandrew struct pv_chunk *pc; 1984281494Sandrew int idx, field, bit; 1985281494Sandrew 1986281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1987281494Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1988281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1989281494Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1990281494Sandrew pc = pv_to_chunk(pv); 1991281494Sandrew idx = pv - &pc->pc_pventry[0]; 1992281494Sandrew field = idx / 64; 1993281494Sandrew bit = idx % 64; 1994281494Sandrew pc->pc_map[field] |= 1ul << bit; 1995281494Sandrew if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1996281494Sandrew pc->pc_map[2] != PC_FREE2) { 1997281494Sandrew /* 98% of the time, pc is already at the head of the list. */ 1998281494Sandrew if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1999281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2000281494Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2001281494Sandrew } 2002281494Sandrew return; 2003281494Sandrew } 2004281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2005281494Sandrew free_pv_chunk(pc); 2006281494Sandrew} 2007281494Sandrew 2008281494Sandrewstatic void 2009281494Sandrewfree_pv_chunk(struct pv_chunk *pc) 2010281494Sandrew{ 2011281494Sandrew vm_page_t m; 2012281494Sandrew 2013281494Sandrew mtx_lock(&pv_chunks_mutex); 2014281494Sandrew TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2015281494Sandrew mtx_unlock(&pv_chunks_mutex); 2016281494Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 2017281494Sandrew PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 2018281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 2019281494Sandrew /* entire chunk is free, return it */ 2020281494Sandrew m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 2021281494Sandrew dump_drop_page(m->phys_addr); 2022288256Salc vm_page_unwire(m, PQ_NONE); 2023281494Sandrew vm_page_free(m); 2024281494Sandrew} 2025281494Sandrew 2026281494Sandrew/* 2027281494Sandrew * Returns a new PV entry, allocating a new PV chunk from the system when 2028281494Sandrew * needed. If this PV chunk allocation fails and a PV list lock pointer was 2029281494Sandrew * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 2030281494Sandrew * returned. 2031281494Sandrew * 2032281494Sandrew * The given PV list lock may be released. 2033281494Sandrew */ 2034281494Sandrewstatic pv_entry_t 2035281494Sandrewget_pv_entry(pmap_t pmap, struct rwlock **lockp) 2036281494Sandrew{ 2037281494Sandrew int bit, field; 2038281494Sandrew pv_entry_t pv; 2039281494Sandrew struct pv_chunk *pc; 2040281494Sandrew vm_page_t m; 2041281494Sandrew 2042281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2043281494Sandrew PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 2044281494Sandrewretry: 2045281494Sandrew pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2046281494Sandrew if (pc != NULL) { 2047281494Sandrew for (field = 0; field < _NPCM; field++) { 2048281494Sandrew if (pc->pc_map[field]) { 2049281494Sandrew bit = ffsl(pc->pc_map[field]) - 1; 2050281494Sandrew break; 2051281494Sandrew } 2052281494Sandrew } 2053281494Sandrew if (field < _NPCM) { 2054281494Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 2055281494Sandrew pc->pc_map[field] &= ~(1ul << bit); 2056281494Sandrew /* If this was the last item, move it to tail */ 2057281494Sandrew if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 2058281494Sandrew pc->pc_map[2] == 0) { 2059281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2060281494Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 2061281494Sandrew pc_list); 2062281494Sandrew } 2063281494Sandrew PV_STAT(atomic_add_long(&pv_entry_count, 1)); 2064281494Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 2065281494Sandrew return (pv); 2066281494Sandrew } 2067281494Sandrew } 2068281494Sandrew /* No free items, allocate another chunk */ 2069281494Sandrew m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 2070281494Sandrew VM_ALLOC_WIRED); 2071281494Sandrew if (m == NULL) { 2072281494Sandrew if (lockp == NULL) { 2073281494Sandrew PV_STAT(pc_chunk_tryfail++); 2074281494Sandrew return (NULL); 2075281494Sandrew } 2076281494Sandrew m = reclaim_pv_chunk(pmap, lockp); 2077281494Sandrew if (m == NULL) 2078281494Sandrew goto retry; 2079281494Sandrew } 2080281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 2081281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 2082281494Sandrew dump_add_page(m->phys_addr); 2083281494Sandrew pc = (void *)PHYS_TO_DMAP(m->phys_addr); 2084281494Sandrew pc->pc_pmap = pmap; 2085281494Sandrew pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 2086281494Sandrew pc->pc_map[1] = PC_FREE1; 2087281494Sandrew pc->pc_map[2] = PC_FREE2; 2088281494Sandrew mtx_lock(&pv_chunks_mutex); 2089281494Sandrew TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2090281494Sandrew mtx_unlock(&pv_chunks_mutex); 2091281494Sandrew pv = &pc->pc_pventry[0]; 2092281494Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2093281494Sandrew PV_STAT(atomic_add_long(&pv_entry_count, 1)); 2094281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 2095281494Sandrew return (pv); 2096281494Sandrew} 2097281494Sandrew 2098281494Sandrew/* 2099305882Sandrew * Ensure that the number of spare PV entries in the specified pmap meets or 2100305882Sandrew * exceeds the given count, "needed". 2101305882Sandrew * 2102305882Sandrew * The given PV list lock may be released. 2103305882Sandrew */ 2104305882Sandrewstatic void 2105305882Sandrewreserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) 2106305882Sandrew{ 2107305882Sandrew struct pch new_tail; 2108305882Sandrew struct pv_chunk *pc; 2109305882Sandrew int avail, free; 2110305882Sandrew vm_page_t m; 2111305882Sandrew 2112305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2113305882Sandrew KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); 2114305882Sandrew 2115305882Sandrew /* 2116305882Sandrew * Newly allocated PV chunks must be stored in a private list until 2117305882Sandrew * the required number of PV chunks have been allocated. Otherwise, 2118305882Sandrew * reclaim_pv_chunk() could recycle one of these chunks. In 2119305882Sandrew * contrast, these chunks must be added to the pmap upon allocation. 2120305882Sandrew */ 2121305882Sandrew TAILQ_INIT(&new_tail); 2122305882Sandrewretry: 2123305882Sandrew avail = 0; 2124305882Sandrew TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { 2125305882Sandrew bit_count((bitstr_t *)pc->pc_map, 0, 2126305882Sandrew sizeof(pc->pc_map) * NBBY, &free); 2127305882Sandrew if (free == 0) 2128305882Sandrew break; 2129305882Sandrew avail += free; 2130305882Sandrew if (avail >= needed) 2131305882Sandrew break; 2132305882Sandrew } 2133305882Sandrew for (; avail < needed; avail += _NPCPV) { 2134305882Sandrew m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 2135305882Sandrew VM_ALLOC_WIRED); 2136305882Sandrew if (m == NULL) { 2137305882Sandrew m = reclaim_pv_chunk(pmap, lockp); 2138305882Sandrew if (m == NULL) 2139305882Sandrew goto retry; 2140305882Sandrew } 2141305882Sandrew PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 2142305882Sandrew PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 2143305882Sandrew dump_add_page(m->phys_addr); 2144305882Sandrew pc = (void *)PHYS_TO_DMAP(m->phys_addr); 2145305882Sandrew pc->pc_pmap = pmap; 2146305882Sandrew pc->pc_map[0] = PC_FREE0; 2147305882Sandrew pc->pc_map[1] = PC_FREE1; 2148305882Sandrew pc->pc_map[2] = PC_FREE2; 2149305882Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2150305882Sandrew TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 2151305882Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); 2152305882Sandrew } 2153305882Sandrew if (!TAILQ_EMPTY(&new_tail)) { 2154305882Sandrew mtx_lock(&pv_chunks_mutex); 2155305882Sandrew TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); 2156305882Sandrew mtx_unlock(&pv_chunks_mutex); 2157305882Sandrew } 2158305882Sandrew} 2159305882Sandrew 2160305882Sandrew/* 2161281494Sandrew * First find and then remove the pv entry for the specified pmap and virtual 2162281494Sandrew * address from the specified pv list. Returns the pv entry if found and NULL 2163281494Sandrew * otherwise. This operation can be performed on pv lists for either 4KB or 2164281494Sandrew * 2MB page mappings. 2165281494Sandrew */ 2166281494Sandrewstatic __inline pv_entry_t 2167281494Sandrewpmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2168281494Sandrew{ 2169281494Sandrew pv_entry_t pv; 2170281494Sandrew 2171281494Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 2172281494Sandrew if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2173281494Sandrew TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 2174281494Sandrew pvh->pv_gen++; 2175281494Sandrew break; 2176281494Sandrew } 2177281494Sandrew } 2178281494Sandrew return (pv); 2179281494Sandrew} 2180281494Sandrew 2181281494Sandrew/* 2182305882Sandrew * After demotion from a 2MB page mapping to 512 4KB page mappings, 2183305882Sandrew * destroy the pv entry for the 2MB page mapping and reinstantiate the pv 2184305882Sandrew * entries for each of the 4KB page mappings. 2185305882Sandrew */ 2186305882Sandrewstatic void 2187305882Sandrewpmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 2188305882Sandrew struct rwlock **lockp) 2189305882Sandrew{ 2190305882Sandrew struct md_page *pvh; 2191305882Sandrew struct pv_chunk *pc; 2192305882Sandrew pv_entry_t pv; 2193305882Sandrew vm_offset_t va_last; 2194305882Sandrew vm_page_t m; 2195305882Sandrew int bit, field; 2196305882Sandrew 2197305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2198305882Sandrew KASSERT((pa & L2_OFFSET) == 0, 2199305882Sandrew ("pmap_pv_demote_l2: pa is not 2mpage aligned")); 2200305882Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2201305882Sandrew 2202305882Sandrew /* 2203305882Sandrew * Transfer the 2mpage's pv entry for this mapping to the first 2204305882Sandrew * page's pv list. Once this transfer begins, the pv list lock 2205305882Sandrew * must not be released until the last pv entry is reinstantiated. 2206305882Sandrew */ 2207305882Sandrew pvh = pa_to_pvh(pa); 2208305882Sandrew va = va & ~L2_OFFSET; 2209305882Sandrew pv = pmap_pvh_remove(pvh, pmap, va); 2210305882Sandrew KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); 2211305882Sandrew m = PHYS_TO_VM_PAGE(pa); 2212305882Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2213305882Sandrew m->md.pv_gen++; 2214305882Sandrew /* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */ 2215305882Sandrew PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1)); 2216305882Sandrew va_last = va + L2_SIZE - PAGE_SIZE; 2217305882Sandrew for (;;) { 2218305882Sandrew pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2219305882Sandrew KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || 2220305882Sandrew pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); 2221305882Sandrew for (field = 0; field < _NPCM; field++) { 2222305882Sandrew while (pc->pc_map[field]) { 2223305882Sandrew bit = ffsl(pc->pc_map[field]) - 1; 2224305882Sandrew pc->pc_map[field] &= ~(1ul << bit); 2225305882Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 2226305882Sandrew va += PAGE_SIZE; 2227305882Sandrew pv->pv_va = va; 2228305882Sandrew m++; 2229305882Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2230305882Sandrew ("pmap_pv_demote_l2: page %p is not managed", m)); 2231305882Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2232305882Sandrew m->md.pv_gen++; 2233305882Sandrew if (va == va_last) 2234305882Sandrew goto out; 2235305882Sandrew } 2236305882Sandrew } 2237305882Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2238305882Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2239305882Sandrew } 2240305882Sandrewout: 2241305882Sandrew if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { 2242305882Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2243305882Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2244305882Sandrew } 2245305882Sandrew PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1)); 2246305882Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1)); 2247305882Sandrew} 2248305882Sandrew 2249305882Sandrew/* 2250281494Sandrew * First find and then destroy the pv entry for the specified pmap and virtual 2251281494Sandrew * address. This operation can be performed on pv lists for either 4KB or 2MB 2252281494Sandrew * page mappings. 2253281494Sandrew */ 2254281494Sandrewstatic void 2255281494Sandrewpmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2256281494Sandrew{ 2257281494Sandrew pv_entry_t pv; 2258281494Sandrew 2259281494Sandrew pv = pmap_pvh_remove(pvh, pmap, va); 2260281494Sandrew KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2261281494Sandrew free_pv_entry(pmap, pv); 2262281494Sandrew} 2263281494Sandrew 2264281494Sandrew/* 2265281494Sandrew * Conditionally create the PV entry for a 4KB page mapping if the required 2266281494Sandrew * memory can be allocated without resorting to reclamation. 2267281494Sandrew */ 2268281494Sandrewstatic boolean_t 2269281494Sandrewpmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 2270281494Sandrew struct rwlock **lockp) 2271281494Sandrew{ 2272281494Sandrew pv_entry_t pv; 2273281494Sandrew 2274281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2275281494Sandrew /* Pass NULL instead of the lock pointer to disable reclamation. */ 2276281494Sandrew if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 2277281494Sandrew pv->pv_va = va; 2278281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 2279281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2280281494Sandrew m->md.pv_gen++; 2281281494Sandrew return (TRUE); 2282281494Sandrew } else 2283281494Sandrew return (FALSE); 2284281494Sandrew} 2285281494Sandrew 2286281494Sandrew/* 2287281494Sandrew * pmap_remove_l3: do the things to unmap a page in a process 2288281494Sandrew */ 2289281494Sandrewstatic int 2290305531Sandrewpmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 2291281494Sandrew pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 2292281494Sandrew{ 2293305882Sandrew struct md_page *pvh; 2294281494Sandrew pt_entry_t old_l3; 2295281494Sandrew vm_page_t m; 2296281494Sandrew 2297281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2298281494Sandrew if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 2299281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 2300281494Sandrew old_l3 = pmap_load_clear(l3); 2301281494Sandrew PTE_SYNC(l3); 2302285212Sandrew pmap_invalidate_page(pmap, va); 2303281494Sandrew if (old_l3 & ATTR_SW_WIRED) 2304281494Sandrew pmap->pm_stats.wired_count -= 1; 2305281494Sandrew pmap_resident_count_dec(pmap, 1); 2306281494Sandrew if (old_l3 & ATTR_SW_MANAGED) { 2307281494Sandrew m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 2308281494Sandrew if (pmap_page_dirty(old_l3)) 2309281494Sandrew vm_page_dirty(m); 2310281494Sandrew if (old_l3 & ATTR_AF) 2311281494Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 2312281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 2313281494Sandrew pmap_pvh_free(&m->md, pmap, va); 2314305882Sandrew if (TAILQ_EMPTY(&m->md.pv_list) && 2315305882Sandrew (m->flags & PG_FICTITIOUS) == 0) { 2316305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2317305882Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) 2318305882Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 2319305882Sandrew } 2320281494Sandrew } 2321281494Sandrew return (pmap_unuse_l3(pmap, va, l2e, free)); 2322281494Sandrew} 2323281494Sandrew 2324281494Sandrew/* 2325281494Sandrew * Remove the given range of addresses from the specified map. 2326281494Sandrew * 2327281494Sandrew * It is assumed that the start and end are properly 2328281494Sandrew * rounded to the page size. 2329281494Sandrew */ 2330281494Sandrewvoid 2331281494Sandrewpmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2332281494Sandrew{ 2333281494Sandrew struct rwlock *lock; 2334281494Sandrew vm_offset_t va, va_next; 2335297446Sandrew pd_entry_t *l0, *l1, *l2; 2336281494Sandrew pt_entry_t l3_paddr, *l3; 2337281494Sandrew struct spglist free; 2338281494Sandrew int anyvalid; 2339281494Sandrew 2340281494Sandrew /* 2341281494Sandrew * Perform an unsynchronized read. This is, however, safe. 2342281494Sandrew */ 2343281494Sandrew if (pmap->pm_stats.resident_count == 0) 2344281494Sandrew return; 2345281494Sandrew 2346281494Sandrew anyvalid = 0; 2347281494Sandrew SLIST_INIT(&free); 2348281494Sandrew 2349281494Sandrew PMAP_LOCK(pmap); 2350281494Sandrew 2351281494Sandrew lock = NULL; 2352281494Sandrew for (; sva < eva; sva = va_next) { 2353281494Sandrew 2354281494Sandrew if (pmap->pm_stats.resident_count == 0) 2355281494Sandrew break; 2356281494Sandrew 2357297446Sandrew l0 = pmap_l0(pmap, sva); 2358297446Sandrew if (pmap_load(l0) == 0) { 2359297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2360297446Sandrew if (va_next < sva) 2361297446Sandrew va_next = eva; 2362297446Sandrew continue; 2363297446Sandrew } 2364297446Sandrew 2365297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 2366285045Sandrew if (pmap_load(l1) == 0) { 2367281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2368281494Sandrew if (va_next < sva) 2369281494Sandrew va_next = eva; 2370281494Sandrew continue; 2371281494Sandrew } 2372281494Sandrew 2373281494Sandrew /* 2374281494Sandrew * Calculate index for next page table. 2375281494Sandrew */ 2376281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2377281494Sandrew if (va_next < sva) 2378281494Sandrew va_next = eva; 2379281494Sandrew 2380281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 2381281494Sandrew if (l2 == NULL) 2382281494Sandrew continue; 2383281494Sandrew 2384288445Sandrew l3_paddr = pmap_load(l2); 2385281494Sandrew 2386305882Sandrew if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { 2387305882Sandrew /* TODO: Add pmap_remove_l2 */ 2388305882Sandrew if (pmap_demote_l2_locked(pmap, l2, sva & ~L2_OFFSET, 2389305882Sandrew &lock) == NULL) 2390305882Sandrew continue; 2391305882Sandrew l3_paddr = pmap_load(l2); 2392305882Sandrew } 2393305882Sandrew 2394281494Sandrew /* 2395281494Sandrew * Weed out invalid mappings. 2396281494Sandrew */ 2397281494Sandrew if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 2398281494Sandrew continue; 2399281494Sandrew 2400281494Sandrew /* 2401281494Sandrew * Limit our scan to either the end of the va represented 2402281494Sandrew * by the current page table page, or to the end of the 2403281494Sandrew * range being removed. 2404281494Sandrew */ 2405281494Sandrew if (va_next > eva) 2406281494Sandrew va_next = eva; 2407281494Sandrew 2408281494Sandrew va = va_next; 2409281494Sandrew for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2410281494Sandrew sva += L3_SIZE) { 2411281494Sandrew if (l3 == NULL) 2412281494Sandrew panic("l3 == NULL"); 2413285045Sandrew if (pmap_load(l3) == 0) { 2414281494Sandrew if (va != va_next) { 2415281494Sandrew pmap_invalidate_range(pmap, va, sva); 2416281494Sandrew va = va_next; 2417281494Sandrew } 2418281494Sandrew continue; 2419281494Sandrew } 2420281494Sandrew if (va == va_next) 2421281494Sandrew va = sva; 2422281494Sandrew if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 2423281494Sandrew &lock)) { 2424281494Sandrew sva += L3_SIZE; 2425281494Sandrew break; 2426281494Sandrew } 2427281494Sandrew } 2428281494Sandrew if (va != va_next) 2429281494Sandrew pmap_invalidate_range(pmap, va, sva); 2430281494Sandrew } 2431281494Sandrew if (lock != NULL) 2432281494Sandrew rw_wunlock(lock); 2433281494Sandrew if (anyvalid) 2434281494Sandrew pmap_invalidate_all(pmap); 2435281494Sandrew PMAP_UNLOCK(pmap); 2436281494Sandrew pmap_free_zero_pages(&free); 2437281494Sandrew} 2438281494Sandrew 2439281494Sandrew/* 2440281494Sandrew * Routine: pmap_remove_all 2441281494Sandrew * Function: 2442281494Sandrew * Removes this physical page from 2443281494Sandrew * all physical maps in which it resides. 2444281494Sandrew * Reflects back modify bits to the pager. 2445281494Sandrew * 2446281494Sandrew * Notes: 2447281494Sandrew * Original versions of this routine were very 2448281494Sandrew * inefficient because they iteratively called 2449281494Sandrew * pmap_remove (slow...) 2450281494Sandrew */ 2451281494Sandrew 2452281494Sandrewvoid 2453281494Sandrewpmap_remove_all(vm_page_t m) 2454281494Sandrew{ 2455305882Sandrew struct md_page *pvh; 2456281494Sandrew pv_entry_t pv; 2457281494Sandrew pmap_t pmap; 2458305879Sandrew struct rwlock *lock; 2459297446Sandrew pd_entry_t *pde, tpde; 2460297446Sandrew pt_entry_t *pte, tpte; 2461305882Sandrew vm_offset_t va; 2462281494Sandrew struct spglist free; 2463305882Sandrew int lvl, pvh_gen, md_gen; 2464281494Sandrew 2465281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2466281494Sandrew ("pmap_remove_all: page %p is not managed", m)); 2467281494Sandrew SLIST_INIT(&free); 2468305879Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2469305882Sandrew pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : 2470305882Sandrew pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2471305879Sandrewretry: 2472305879Sandrew rw_wlock(lock); 2473305882Sandrew while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 2474305882Sandrew pmap = PV_PMAP(pv); 2475305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 2476305882Sandrew pvh_gen = pvh->pv_gen; 2477305882Sandrew rw_wunlock(lock); 2478305882Sandrew PMAP_LOCK(pmap); 2479305882Sandrew rw_wlock(lock); 2480305882Sandrew if (pvh_gen != pvh->pv_gen) { 2481305882Sandrew rw_wunlock(lock); 2482305882Sandrew PMAP_UNLOCK(pmap); 2483305882Sandrew goto retry; 2484305882Sandrew } 2485305882Sandrew } 2486305882Sandrew va = pv->pv_va; 2487305882Sandrew pte = pmap_pte(pmap, va, &lvl); 2488305882Sandrew KASSERT(pte != NULL, 2489305882Sandrew ("pmap_remove_all: no page table entry found")); 2490305882Sandrew KASSERT(lvl == 2, 2491305882Sandrew ("pmap_remove_all: invalid pte level %d", lvl)); 2492305882Sandrew 2493305882Sandrew pmap_demote_l2_locked(pmap, pte, va, &lock); 2494305882Sandrew PMAP_UNLOCK(pmap); 2495305882Sandrew } 2496281494Sandrew while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2497281494Sandrew pmap = PV_PMAP(pv); 2498305879Sandrew if (!PMAP_TRYLOCK(pmap)) { 2499305882Sandrew pvh_gen = pvh->pv_gen; 2500305879Sandrew md_gen = m->md.pv_gen; 2501305879Sandrew rw_wunlock(lock); 2502305879Sandrew PMAP_LOCK(pmap); 2503305879Sandrew rw_wlock(lock); 2504305882Sandrew if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { 2505305879Sandrew rw_wunlock(lock); 2506305879Sandrew PMAP_UNLOCK(pmap); 2507305879Sandrew goto retry; 2508305879Sandrew } 2509305879Sandrew } 2510281494Sandrew pmap_resident_count_dec(pmap, 1); 2511297446Sandrew 2512297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 2513297446Sandrew KASSERT(pde != NULL, 2514297446Sandrew ("pmap_remove_all: no page directory entry found")); 2515297446Sandrew KASSERT(lvl == 2, 2516297446Sandrew ("pmap_remove_all: invalid pde level %d", lvl)); 2517297446Sandrew tpde = pmap_load(pde); 2518297446Sandrew 2519297446Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 2520297446Sandrew tpte = pmap_load(pte); 2521281494Sandrew if (pmap_is_current(pmap) && 2522297446Sandrew pmap_l3_valid_cacheable(tpte)) 2523281494Sandrew cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2524297446Sandrew pmap_load_clear(pte); 2525297446Sandrew PTE_SYNC(pte); 2526285212Sandrew pmap_invalidate_page(pmap, pv->pv_va); 2527297446Sandrew if (tpte & ATTR_SW_WIRED) 2528281494Sandrew pmap->pm_stats.wired_count--; 2529297446Sandrew if ((tpte & ATTR_AF) != 0) 2530281494Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 2531281494Sandrew 2532281494Sandrew /* 2533281494Sandrew * Update the vm_page_t clean and reference bits. 2534281494Sandrew */ 2535297446Sandrew if (pmap_page_dirty(tpte)) 2536281494Sandrew vm_page_dirty(m); 2537297446Sandrew pmap_unuse_l3(pmap, pv->pv_va, tpde, &free); 2538281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2539281494Sandrew m->md.pv_gen++; 2540281494Sandrew free_pv_entry(pmap, pv); 2541281494Sandrew PMAP_UNLOCK(pmap); 2542281494Sandrew } 2543281494Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 2544305879Sandrew rw_wunlock(lock); 2545281494Sandrew pmap_free_zero_pages(&free); 2546281494Sandrew} 2547281494Sandrew 2548281494Sandrew/* 2549281494Sandrew * Set the physical protection on the 2550281494Sandrew * specified range of this map as requested. 2551281494Sandrew */ 2552281494Sandrewvoid 2553281494Sandrewpmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2554281494Sandrew{ 2555281494Sandrew vm_offset_t va, va_next; 2556297446Sandrew pd_entry_t *l0, *l1, *l2; 2557319203Sandrew pt_entry_t *l3p, l3, nbits; 2558281494Sandrew 2559319203Sandrew KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); 2560319203Sandrew if (prot == VM_PROT_NONE) { 2561281494Sandrew pmap_remove(pmap, sva, eva); 2562281494Sandrew return; 2563281494Sandrew } 2564281494Sandrew 2565319203Sandrew if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == 2566319203Sandrew (VM_PROT_WRITE | VM_PROT_EXECUTE)) 2567281494Sandrew return; 2568281494Sandrew 2569281494Sandrew PMAP_LOCK(pmap); 2570281494Sandrew for (; sva < eva; sva = va_next) { 2571281494Sandrew 2572297446Sandrew l0 = pmap_l0(pmap, sva); 2573297446Sandrew if (pmap_load(l0) == 0) { 2574297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2575297446Sandrew if (va_next < sva) 2576297446Sandrew va_next = eva; 2577297446Sandrew continue; 2578297446Sandrew } 2579297446Sandrew 2580297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 2581285045Sandrew if (pmap_load(l1) == 0) { 2582281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2583281494Sandrew if (va_next < sva) 2584281494Sandrew va_next = eva; 2585281494Sandrew continue; 2586281494Sandrew } 2587281494Sandrew 2588281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2589281494Sandrew if (va_next < sva) 2590281494Sandrew va_next = eva; 2591281494Sandrew 2592281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 2593305882Sandrew if (pmap_load(l2) == 0) 2594281494Sandrew continue; 2595281494Sandrew 2596305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { 2597305882Sandrew l3p = pmap_demote_l2(pmap, l2, sva); 2598305882Sandrew if (l3p == NULL) 2599305882Sandrew continue; 2600305882Sandrew } 2601305882Sandrew KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, 2602305882Sandrew ("pmap_protect: Invalid L2 entry after demotion")); 2603305882Sandrew 2604281494Sandrew if (va_next > eva) 2605281494Sandrew va_next = eva; 2606281494Sandrew 2607281494Sandrew va = va_next; 2608281494Sandrew for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 2609281494Sandrew sva += L3_SIZE) { 2610281494Sandrew l3 = pmap_load(l3p); 2611319203Sandrew if (!pmap_l3_valid(l3)) 2612319203Sandrew continue; 2613319203Sandrew 2614319203Sandrew nbits = 0; 2615319203Sandrew if ((prot & VM_PROT_WRITE) == 0) { 2616317354Skib if ((l3 & ATTR_SW_MANAGED) && 2617317354Skib pmap_page_dirty(l3)) { 2618317354Skib vm_page_dirty(PHYS_TO_VM_PAGE(l3 & 2619317354Skib ~ATTR_MASK)); 2620317354Skib } 2621319203Sandrew nbits |= ATTR_AP(ATTR_AP_RO); 2622281494Sandrew } 2623319203Sandrew if ((prot & VM_PROT_EXECUTE) == 0) 2624319203Sandrew nbits |= ATTR_XN; 2625319203Sandrew 2626319203Sandrew pmap_set(l3p, nbits); 2627319203Sandrew PTE_SYNC(l3p); 2628319203Sandrew /* XXX: Use pmap_invalidate_range */ 2629323845Sandrew pmap_invalidate_page(pmap, sva); 2630281494Sandrew } 2631281494Sandrew } 2632281494Sandrew PMAP_UNLOCK(pmap); 2633281494Sandrew} 2634281494Sandrew 2635281494Sandrew/* 2636305882Sandrew * Inserts the specified page table page into the specified pmap's collection 2637305882Sandrew * of idle page table pages. Each of a pmap's page table pages is responsible 2638305882Sandrew * for mapping a distinct range of virtual addresses. The pmap's collection is 2639305882Sandrew * ordered by this virtual address range. 2640305882Sandrew */ 2641305882Sandrewstatic __inline int 2642305882Sandrewpmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) 2643305882Sandrew{ 2644305882Sandrew 2645305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2646305882Sandrew return (vm_radix_insert(&pmap->pm_root, mpte)); 2647305882Sandrew} 2648305882Sandrew 2649305882Sandrew/* 2650318716Smarkj * Removes the page table page mapping the specified virtual address from the 2651318716Smarkj * specified pmap's collection of idle page table pages, and returns it. 2652318716Smarkj * Otherwise, returns NULL if there is no page table page corresponding to the 2653318716Smarkj * specified virtual address. 2654305882Sandrew */ 2655305882Sandrewstatic __inline vm_page_t 2656318716Smarkjpmap_remove_pt_page(pmap_t pmap, vm_offset_t va) 2657305882Sandrew{ 2658305882Sandrew 2659305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2660318716Smarkj return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va))); 2661305882Sandrew} 2662305882Sandrew 2663305882Sandrew/* 2664305882Sandrew * Performs a break-before-make update of a pmap entry. This is needed when 2665305882Sandrew * either promoting or demoting pages to ensure the TLB doesn't get into an 2666305882Sandrew * inconsistent state. 2667305882Sandrew */ 2668305882Sandrewstatic void 2669305882Sandrewpmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, 2670305882Sandrew vm_offset_t va, vm_size_t size) 2671305882Sandrew{ 2672305882Sandrew register_t intr; 2673305882Sandrew 2674305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2675305882Sandrew 2676305882Sandrew /* 2677305882Sandrew * Ensure we don't get switched out with the page table in an 2678305882Sandrew * inconsistent state. We also need to ensure no interrupts fire 2679305882Sandrew * as they may make use of an address we are about to invalidate. 2680305882Sandrew */ 2681305882Sandrew intr = intr_disable(); 2682305882Sandrew critical_enter(); 2683305882Sandrew 2684305882Sandrew /* Clear the old mapping */ 2685305882Sandrew pmap_load_clear(pte); 2686305882Sandrew PTE_SYNC(pte); 2687305882Sandrew pmap_invalidate_range(pmap, va, va + size); 2688305882Sandrew 2689305882Sandrew /* Create the new mapping */ 2690305882Sandrew pmap_load_store(pte, newpte); 2691305882Sandrew PTE_SYNC(pte); 2692305882Sandrew 2693305882Sandrew critical_exit(); 2694305882Sandrew intr_restore(intr); 2695305882Sandrew} 2696305882Sandrew 2697305882Sandrew/* 2698305882Sandrew * After promotion from 512 4KB page mappings to a single 2MB page mapping, 2699305882Sandrew * replace the many pv entries for the 4KB page mappings by a single pv entry 2700305882Sandrew * for the 2MB page mapping. 2701305882Sandrew */ 2702305882Sandrewstatic void 2703305882Sandrewpmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 2704305882Sandrew struct rwlock **lockp) 2705305882Sandrew{ 2706305882Sandrew struct md_page *pvh; 2707305882Sandrew pv_entry_t pv; 2708305882Sandrew vm_offset_t va_last; 2709305882Sandrew vm_page_t m; 2710305882Sandrew 2711305882Sandrew KASSERT((pa & L2_OFFSET) == 0, 2712305882Sandrew ("pmap_pv_promote_l2: pa is not 2mpage aligned")); 2713305882Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2714305882Sandrew 2715305882Sandrew /* 2716305882Sandrew * Transfer the first page's pv entry for this mapping to the 2mpage's 2717305882Sandrew * pv list. Aside from avoiding the cost of a call to get_pv_entry(), 2718305882Sandrew * a transfer avoids the possibility that get_pv_entry() calls 2719305882Sandrew * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the 2720305882Sandrew * mappings that is being promoted. 2721305882Sandrew */ 2722305882Sandrew m = PHYS_TO_VM_PAGE(pa); 2723305882Sandrew va = va & ~L2_OFFSET; 2724305882Sandrew pv = pmap_pvh_remove(&m->md, pmap, va); 2725305882Sandrew KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found")); 2726305882Sandrew pvh = pa_to_pvh(pa); 2727305882Sandrew TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 2728305882Sandrew pvh->pv_gen++; 2729305882Sandrew /* Free the remaining NPTEPG - 1 pv entries. */ 2730305882Sandrew va_last = va + L2_SIZE - PAGE_SIZE; 2731305882Sandrew do { 2732305882Sandrew m++; 2733305882Sandrew va += PAGE_SIZE; 2734305882Sandrew pmap_pvh_free(&m->md, pmap, va); 2735305882Sandrew } while (va < va_last); 2736305882Sandrew} 2737305882Sandrew 2738305882Sandrew/* 2739305882Sandrew * Tries to promote the 512, contiguous 4KB page mappings that are within a 2740305882Sandrew * single level 2 table entry to a single 2MB page mapping. For promotion 2741305882Sandrew * to occur, two conditions must be met: (1) the 4KB page mappings must map 2742305882Sandrew * aligned, contiguous physical memory and (2) the 4KB page mappings must have 2743305882Sandrew * identical characteristics. 2744305882Sandrew */ 2745305882Sandrewstatic void 2746305882Sandrewpmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, 2747305882Sandrew struct rwlock **lockp) 2748305882Sandrew{ 2749305882Sandrew pt_entry_t *firstl3, *l3, newl2, oldl3, pa; 2750305882Sandrew vm_page_t mpte; 2751305882Sandrew vm_offset_t sva; 2752305882Sandrew 2753305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2754305882Sandrew 2755305882Sandrew sva = va & ~L2_OFFSET; 2756305882Sandrew firstl3 = pmap_l2_to_l3(l2, sva); 2757305882Sandrew newl2 = pmap_load(firstl3); 2758305882Sandrew 2759305882Sandrew /* Check the alingment is valid */ 2760305882Sandrew if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) { 2761305882Sandrew atomic_add_long(&pmap_l2_p_failures, 1); 2762305882Sandrew CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" 2763305882Sandrew " in pmap %p", va, pmap); 2764305882Sandrew return; 2765305882Sandrew } 2766305882Sandrew 2767305882Sandrew pa = newl2 + L2_SIZE - PAGE_SIZE; 2768305882Sandrew for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { 2769305882Sandrew oldl3 = pmap_load(l3); 2770305882Sandrew if (oldl3 != pa) { 2771305882Sandrew atomic_add_long(&pmap_l2_p_failures, 1); 2772305882Sandrew CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" 2773305882Sandrew " in pmap %p", va, pmap); 2774305882Sandrew return; 2775305882Sandrew } 2776305882Sandrew pa -= PAGE_SIZE; 2777305882Sandrew } 2778305882Sandrew 2779305882Sandrew /* 2780305882Sandrew * Save the page table page in its current state until the L2 2781305882Sandrew * mapping the superpage is demoted by pmap_demote_l2() or 2782305882Sandrew * destroyed by pmap_remove_l3(). 2783305882Sandrew */ 2784305882Sandrew mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); 2785305882Sandrew KASSERT(mpte >= vm_page_array && 2786305882Sandrew mpte < &vm_page_array[vm_page_array_size], 2787305882Sandrew ("pmap_promote_l2: page table page is out of range")); 2788305882Sandrew KASSERT(mpte->pindex == pmap_l2_pindex(va), 2789305882Sandrew ("pmap_promote_l2: page table page's pindex is wrong")); 2790305882Sandrew if (pmap_insert_pt_page(pmap, mpte)) { 2791305882Sandrew atomic_add_long(&pmap_l2_p_failures, 1); 2792305882Sandrew CTR2(KTR_PMAP, 2793305882Sandrew "pmap_promote_l2: failure for va %#lx in pmap %p", va, 2794305882Sandrew pmap); 2795305882Sandrew return; 2796305882Sandrew } 2797305882Sandrew 2798305882Sandrew if ((newl2 & ATTR_SW_MANAGED) != 0) 2799305882Sandrew pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp); 2800305882Sandrew 2801305882Sandrew newl2 &= ~ATTR_DESCR_MASK; 2802305882Sandrew newl2 |= L2_BLOCK; 2803305882Sandrew 2804305882Sandrew pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE); 2805305882Sandrew 2806305882Sandrew atomic_add_long(&pmap_l2_promotions, 1); 2807305882Sandrew CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, 2808305882Sandrew pmap); 2809305882Sandrew} 2810305882Sandrew 2811305882Sandrew/* 2812281494Sandrew * Insert the given physical page (p) at 2813281494Sandrew * the specified virtual address (v) in the 2814281494Sandrew * target physical map with the protection requested. 2815281494Sandrew * 2816281494Sandrew * If specified, the page will be wired down, meaning 2817281494Sandrew * that the related pte can not be reclaimed. 2818281494Sandrew * 2819281494Sandrew * NB: This is the only routine which MAY NOT lazy-evaluate 2820281494Sandrew * or lose information. That is, this routine must actually 2821281494Sandrew * insert this page into the given map NOW. 2822281494Sandrew */ 2823281494Sandrewint 2824281494Sandrewpmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2825281494Sandrew u_int flags, int8_t psind __unused) 2826281494Sandrew{ 2827281494Sandrew struct rwlock *lock; 2828297446Sandrew pd_entry_t *pde; 2829281494Sandrew pt_entry_t new_l3, orig_l3; 2830305882Sandrew pt_entry_t *l2, *l3; 2831281494Sandrew pv_entry_t pv; 2832297446Sandrew vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; 2833297446Sandrew vm_page_t mpte, om, l1_m, l2_m, l3_m; 2834281494Sandrew boolean_t nosleep; 2835297446Sandrew int lvl; 2836281494Sandrew 2837281494Sandrew va = trunc_page(va); 2838281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2839281494Sandrew VM_OBJECT_ASSERT_LOCKED(m->object); 2840281494Sandrew pa = VM_PAGE_TO_PHYS(m); 2841285537Sandrew new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 2842285537Sandrew L3_PAGE); 2843281494Sandrew if ((prot & VM_PROT_WRITE) == 0) 2844281494Sandrew new_l3 |= ATTR_AP(ATTR_AP_RO); 2845319203Sandrew if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 2846319203Sandrew new_l3 |= ATTR_XN; 2847281494Sandrew if ((flags & PMAP_ENTER_WIRED) != 0) 2848281494Sandrew new_l3 |= ATTR_SW_WIRED; 2849281494Sandrew if ((va >> 63) == 0) 2850319203Sandrew new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; 2851281494Sandrew 2852285212Sandrew CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 2853285212Sandrew 2854281494Sandrew mpte = NULL; 2855281494Sandrew 2856281494Sandrew lock = NULL; 2857281494Sandrew PMAP_LOCK(pmap); 2858281494Sandrew 2859305882Sandrew pde = pmap_pde(pmap, va, &lvl); 2860305882Sandrew if (pde != NULL && lvl == 1) { 2861305882Sandrew l2 = pmap_l1_to_l2(pde, va); 2862305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && 2863305882Sandrew (l3 = pmap_demote_l2_locked(pmap, l2, va & ~L2_OFFSET, 2864305882Sandrew &lock)) != NULL) { 2865305882Sandrew l3 = &l3[pmap_l3_index(va)]; 2866305882Sandrew if (va < VM_MAXUSER_ADDRESS) { 2867305882Sandrew mpte = PHYS_TO_VM_PAGE( 2868305882Sandrew pmap_load(l2) & ~ATTR_MASK); 2869305882Sandrew mpte->wire_count++; 2870305882Sandrew } 2871305882Sandrew goto havel3; 2872305882Sandrew } 2873305882Sandrew } 2874305882Sandrew 2875281494Sandrew if (va < VM_MAXUSER_ADDRESS) { 2876281494Sandrew nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 2877281494Sandrew mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 2878281494Sandrew if (mpte == NULL && nosleep) { 2879285212Sandrew CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 2880281494Sandrew if (lock != NULL) 2881281494Sandrew rw_wunlock(lock); 2882281494Sandrew PMAP_UNLOCK(pmap); 2883281494Sandrew return (KERN_RESOURCE_SHORTAGE); 2884281494Sandrew } 2885297446Sandrew pde = pmap_pde(pmap, va, &lvl); 2886297446Sandrew KASSERT(pde != NULL, 2887297446Sandrew ("pmap_enter: Invalid page entry, va: 0x%lx", va)); 2888297446Sandrew KASSERT(lvl == 2, 2889297446Sandrew ("pmap_enter: Invalid level %d", lvl)); 2890297446Sandrew 2891297446Sandrew l3 = pmap_l2_to_l3(pde, va); 2892281494Sandrew } else { 2893297446Sandrew /* 2894297446Sandrew * If we get a level 2 pde it must point to a level 3 entry 2895297446Sandrew * otherwise we will need to create the intermediate tables 2896297446Sandrew */ 2897297446Sandrew if (lvl < 2) { 2898297446Sandrew switch(lvl) { 2899297446Sandrew default: 2900297446Sandrew case -1: 2901297446Sandrew /* Get the l0 pde to update */ 2902297446Sandrew pde = pmap_l0(pmap, va); 2903297446Sandrew KASSERT(pde != NULL, ("...")); 2904281494Sandrew 2905297446Sandrew l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2906297446Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2907297446Sandrew VM_ALLOC_ZERO); 2908297446Sandrew if (l1_m == NULL) 2909297446Sandrew panic("pmap_enter: l1 pte_m == NULL"); 2910297446Sandrew if ((l1_m->flags & PG_ZERO) == 0) 2911297446Sandrew pmap_zero_page(l1_m); 2912297446Sandrew 2913297446Sandrew l1_pa = VM_PAGE_TO_PHYS(l1_m); 2914297446Sandrew pmap_load_store(pde, l1_pa | L0_TABLE); 2915297446Sandrew PTE_SYNC(pde); 2916297446Sandrew /* FALLTHROUGH */ 2917297446Sandrew case 0: 2918297446Sandrew /* Get the l1 pde to update */ 2919297446Sandrew pde = pmap_l1_to_l2(pde, va); 2920297446Sandrew KASSERT(pde != NULL, ("...")); 2921297446Sandrew 2922281494Sandrew l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2923281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2924281494Sandrew VM_ALLOC_ZERO); 2925281494Sandrew if (l2_m == NULL) 2926281494Sandrew panic("pmap_enter: l2 pte_m == NULL"); 2927281494Sandrew if ((l2_m->flags & PG_ZERO) == 0) 2928281494Sandrew pmap_zero_page(l2_m); 2929281494Sandrew 2930281494Sandrew l2_pa = VM_PAGE_TO_PHYS(l2_m); 2931297446Sandrew pmap_load_store(pde, l2_pa | L1_TABLE); 2932297446Sandrew PTE_SYNC(pde); 2933297446Sandrew /* FALLTHROUGH */ 2934297446Sandrew case 1: 2935297446Sandrew /* Get the l2 pde to update */ 2936297446Sandrew pde = pmap_l1_to_l2(pde, va); 2937281494Sandrew 2938297446Sandrew l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2939297446Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2940297446Sandrew VM_ALLOC_ZERO); 2941297446Sandrew if (l3_m == NULL) 2942297446Sandrew panic("pmap_enter: l3 pte_m == NULL"); 2943297446Sandrew if ((l3_m->flags & PG_ZERO) == 0) 2944297446Sandrew pmap_zero_page(l3_m); 2945281494Sandrew 2946297446Sandrew l3_pa = VM_PAGE_TO_PHYS(l3_m); 2947297446Sandrew pmap_load_store(pde, l3_pa | L2_TABLE); 2948297446Sandrew PTE_SYNC(pde); 2949297446Sandrew break; 2950297446Sandrew } 2951281494Sandrew } 2952297446Sandrew l3 = pmap_l2_to_l3(pde, va); 2953285212Sandrew pmap_invalidate_page(pmap, va); 2954281494Sandrew } 2955305882Sandrewhavel3: 2956281494Sandrew 2957281494Sandrew om = NULL; 2958281494Sandrew orig_l3 = pmap_load(l3); 2959281494Sandrew opa = orig_l3 & ~ATTR_MASK; 2960281494Sandrew 2961281494Sandrew /* 2962281494Sandrew * Is the specified virtual address already mapped? 2963281494Sandrew */ 2964281494Sandrew if (pmap_l3_valid(orig_l3)) { 2965281494Sandrew /* 2966281494Sandrew * Wiring change, just update stats. We don't worry about 2967281494Sandrew * wiring PT pages as they remain resident as long as there 2968281494Sandrew * are valid mappings in them. Hence, if a user page is wired, 2969281494Sandrew * the PT page will be also. 2970281494Sandrew */ 2971281494Sandrew if ((flags & PMAP_ENTER_WIRED) != 0 && 2972281494Sandrew (orig_l3 & ATTR_SW_WIRED) == 0) 2973281494Sandrew pmap->pm_stats.wired_count++; 2974281494Sandrew else if ((flags & PMAP_ENTER_WIRED) == 0 && 2975281494Sandrew (orig_l3 & ATTR_SW_WIRED) != 0) 2976281494Sandrew pmap->pm_stats.wired_count--; 2977281494Sandrew 2978281494Sandrew /* 2979281494Sandrew * Remove the extra PT page reference. 2980281494Sandrew */ 2981281494Sandrew if (mpte != NULL) { 2982281494Sandrew mpte->wire_count--; 2983281494Sandrew KASSERT(mpte->wire_count > 0, 2984281494Sandrew ("pmap_enter: missing reference to page table page," 2985281494Sandrew " va: 0x%lx", va)); 2986281494Sandrew } 2987281494Sandrew 2988281494Sandrew /* 2989281494Sandrew * Has the physical page changed? 2990281494Sandrew */ 2991281494Sandrew if (opa == pa) { 2992281494Sandrew /* 2993281494Sandrew * No, might be a protection or wiring change. 2994281494Sandrew */ 2995281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2996281494Sandrew new_l3 |= ATTR_SW_MANAGED; 2997281494Sandrew if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 2998281494Sandrew ATTR_AP(ATTR_AP_RW)) { 2999281494Sandrew vm_page_aflag_set(m, PGA_WRITEABLE); 3000281494Sandrew } 3001281494Sandrew } 3002281494Sandrew goto validate; 3003281494Sandrew } 3004281494Sandrew 3005281494Sandrew /* Flush the cache, there might be uncommitted data in it */ 3006281494Sandrew if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 3007281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 3008281494Sandrew } else { 3009281494Sandrew /* 3010281494Sandrew * Increment the counters. 3011281494Sandrew */ 3012281494Sandrew if ((new_l3 & ATTR_SW_WIRED) != 0) 3013281494Sandrew pmap->pm_stats.wired_count++; 3014281494Sandrew pmap_resident_count_inc(pmap, 1); 3015281494Sandrew } 3016281494Sandrew /* 3017281494Sandrew * Enter on the PV list if part of our managed memory. 3018281494Sandrew */ 3019281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0) { 3020281494Sandrew new_l3 |= ATTR_SW_MANAGED; 3021281494Sandrew pv = get_pv_entry(pmap, &lock); 3022281494Sandrew pv->pv_va = va; 3023281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 3024281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3025281494Sandrew m->md.pv_gen++; 3026281494Sandrew if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 3027281494Sandrew vm_page_aflag_set(m, PGA_WRITEABLE); 3028281494Sandrew } 3029281494Sandrew 3030281494Sandrew /* 3031281494Sandrew * Update the L3 entry. 3032281494Sandrew */ 3033281494Sandrew if (orig_l3 != 0) { 3034281494Sandrewvalidate: 3035305882Sandrew orig_l3 = pmap_load(l3); 3036281494Sandrew opa = orig_l3 & ~ATTR_MASK; 3037281494Sandrew 3038281494Sandrew if (opa != pa) { 3039305882Sandrew pmap_update_entry(pmap, l3, new_l3, va, PAGE_SIZE); 3040281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 3041281494Sandrew om = PHYS_TO_VM_PAGE(opa); 3042281494Sandrew if (pmap_page_dirty(orig_l3)) 3043281494Sandrew vm_page_dirty(om); 3044281494Sandrew if ((orig_l3 & ATTR_AF) != 0) 3045281494Sandrew vm_page_aflag_set(om, PGA_REFERENCED); 3046281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 3047281494Sandrew pmap_pvh_free(&om->md, pmap, va); 3048305882Sandrew if ((om->aflags & PGA_WRITEABLE) != 0 && 3049305882Sandrew TAILQ_EMPTY(&om->md.pv_list) && 3050305882Sandrew ((om->flags & PG_FICTITIOUS) != 0 || 3051305882Sandrew TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) 3052305882Sandrew vm_page_aflag_clear(om, PGA_WRITEABLE); 3053281494Sandrew } 3054305882Sandrew } else { 3055305882Sandrew pmap_load_store(l3, new_l3); 3056305882Sandrew PTE_SYNC(l3); 3057305882Sandrew pmap_invalidate_page(pmap, va); 3058305882Sandrew if (pmap_page_dirty(orig_l3) && 3059305882Sandrew (orig_l3 & ATTR_SW_MANAGED) != 0) 3060281494Sandrew vm_page_dirty(m); 3061281494Sandrew } 3062281494Sandrew } else { 3063281494Sandrew pmap_load_store(l3, new_l3); 3064281494Sandrew } 3065305882Sandrew 3066305882Sandrew PTE_SYNC(l3); 3067285212Sandrew pmap_invalidate_page(pmap, va); 3068281494Sandrew 3069305882Sandrew if (pmap != pmap_kernel()) { 3070305883Sandrew if (pmap == &curproc->p_vmspace->vm_pmap && 3071305883Sandrew (prot & VM_PROT_EXECUTE) != 0) 3072305883Sandrew cpu_icache_sync_range(va, PAGE_SIZE); 3073305882Sandrew 3074305882Sandrew if ((mpte == NULL || mpte->wire_count == NL3PG) && 3075305882Sandrew pmap_superpages_enabled() && 3076305882Sandrew (m->flags & PG_FICTITIOUS) == 0 && 3077305882Sandrew vm_reserv_level_iffullpop(m) == 0) { 3078305882Sandrew pmap_promote_l2(pmap, pde, va, &lock); 3079305882Sandrew } 3080305882Sandrew } 3081305882Sandrew 3082281494Sandrew if (lock != NULL) 3083281494Sandrew rw_wunlock(lock); 3084281494Sandrew PMAP_UNLOCK(pmap); 3085281494Sandrew return (KERN_SUCCESS); 3086281494Sandrew} 3087281494Sandrew 3088281494Sandrew/* 3089281494Sandrew * Maps a sequence of resident pages belonging to the same object. 3090281494Sandrew * The sequence begins with the given page m_start. This page is 3091281494Sandrew * mapped at the given virtual address start. Each subsequent page is 3092281494Sandrew * mapped at a virtual address that is offset from start by the same 3093281494Sandrew * amount as the page is offset from m_start within the object. The 3094281494Sandrew * last page in the sequence is the page with the largest offset from 3095281494Sandrew * m_start that can be mapped at a virtual address less than the given 3096281494Sandrew * virtual address end. Not every virtual page between start and end 3097281494Sandrew * is mapped; only those for which a resident page exists with the 3098281494Sandrew * corresponding offset from m_start are mapped. 3099281494Sandrew */ 3100281494Sandrewvoid 3101281494Sandrewpmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3102281494Sandrew vm_page_t m_start, vm_prot_t prot) 3103281494Sandrew{ 3104281494Sandrew struct rwlock *lock; 3105281494Sandrew vm_offset_t va; 3106281494Sandrew vm_page_t m, mpte; 3107281494Sandrew vm_pindex_t diff, psize; 3108281494Sandrew 3109281494Sandrew VM_OBJECT_ASSERT_LOCKED(m_start->object); 3110281494Sandrew 3111281494Sandrew psize = atop(end - start); 3112281494Sandrew mpte = NULL; 3113281494Sandrew m = m_start; 3114281494Sandrew lock = NULL; 3115281494Sandrew PMAP_LOCK(pmap); 3116281494Sandrew while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3117281494Sandrew va = start + ptoa(diff); 3118281494Sandrew mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 3119281494Sandrew m = TAILQ_NEXT(m, listq); 3120281494Sandrew } 3121281494Sandrew if (lock != NULL) 3122281494Sandrew rw_wunlock(lock); 3123281494Sandrew PMAP_UNLOCK(pmap); 3124281494Sandrew} 3125281494Sandrew 3126281494Sandrew/* 3127281494Sandrew * this code makes some *MAJOR* assumptions: 3128281494Sandrew * 1. Current pmap & pmap exists. 3129281494Sandrew * 2. Not wired. 3130281494Sandrew * 3. Read access. 3131281494Sandrew * 4. No page table pages. 3132281494Sandrew * but is *MUCH* faster than pmap_enter... 3133281494Sandrew */ 3134281494Sandrew 3135281494Sandrewvoid 3136281494Sandrewpmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3137281494Sandrew{ 3138281494Sandrew struct rwlock *lock; 3139281494Sandrew 3140281494Sandrew lock = NULL; 3141281494Sandrew PMAP_LOCK(pmap); 3142281494Sandrew (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 3143281494Sandrew if (lock != NULL) 3144281494Sandrew rw_wunlock(lock); 3145281494Sandrew PMAP_UNLOCK(pmap); 3146281494Sandrew} 3147281494Sandrew 3148281494Sandrewstatic vm_page_t 3149281494Sandrewpmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 3150281494Sandrew vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 3151281494Sandrew{ 3152281494Sandrew struct spglist free; 3153297446Sandrew pd_entry_t *pde; 3154305882Sandrew pt_entry_t *l2, *l3; 3155281494Sandrew vm_paddr_t pa; 3156297446Sandrew int lvl; 3157281494Sandrew 3158281494Sandrew KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 3159281494Sandrew (m->oflags & VPO_UNMANAGED) != 0, 3160281494Sandrew ("pmap_enter_quick_locked: managed mapping within the clean submap")); 3161281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3162281494Sandrew 3163285212Sandrew CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 3164281494Sandrew /* 3165281494Sandrew * In the case that a page table page is not 3166281494Sandrew * resident, we are creating it here. 3167281494Sandrew */ 3168281494Sandrew if (va < VM_MAXUSER_ADDRESS) { 3169281494Sandrew vm_pindex_t l2pindex; 3170281494Sandrew 3171281494Sandrew /* 3172281494Sandrew * Calculate pagetable page index 3173281494Sandrew */ 3174281494Sandrew l2pindex = pmap_l2_pindex(va); 3175281494Sandrew if (mpte && (mpte->pindex == l2pindex)) { 3176281494Sandrew mpte->wire_count++; 3177281494Sandrew } else { 3178281494Sandrew /* 3179281494Sandrew * Get the l2 entry 3180281494Sandrew */ 3181297446Sandrew pde = pmap_pde(pmap, va, &lvl); 3182281494Sandrew 3183281494Sandrew /* 3184281494Sandrew * If the page table page is mapped, we just increment 3185281494Sandrew * the hold count, and activate it. Otherwise, we 3186281494Sandrew * attempt to allocate a page table page. If this 3187281494Sandrew * attempt fails, we don't retry. Instead, we give up. 3188281494Sandrew */ 3189305882Sandrew if (lvl == 1) { 3190305882Sandrew l2 = pmap_l1_to_l2(pde, va); 3191305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == 3192305882Sandrew L2_BLOCK) 3193305882Sandrew return (NULL); 3194305882Sandrew } 3195297446Sandrew if (lvl == 2 && pmap_load(pde) != 0) { 3196285045Sandrew mpte = 3197297446Sandrew PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); 3198281494Sandrew mpte->wire_count++; 3199281494Sandrew } else { 3200281494Sandrew /* 3201281494Sandrew * Pass NULL instead of the PV list lock 3202281494Sandrew * pointer, because we don't intend to sleep. 3203281494Sandrew */ 3204281494Sandrew mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 3205281494Sandrew if (mpte == NULL) 3206281494Sandrew return (mpte); 3207281494Sandrew } 3208281494Sandrew } 3209281494Sandrew l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 3210281494Sandrew l3 = &l3[pmap_l3_index(va)]; 3211281494Sandrew } else { 3212281494Sandrew mpte = NULL; 3213297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 3214297446Sandrew KASSERT(pde != NULL, 3215297446Sandrew ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", 3216297446Sandrew va)); 3217297446Sandrew KASSERT(lvl == 2, 3218297446Sandrew ("pmap_enter_quick_locked: Invalid level %d", lvl)); 3219297446Sandrew l3 = pmap_l2_to_l3(pde, va); 3220281494Sandrew } 3221297446Sandrew 3222285212Sandrew if (pmap_load(l3) != 0) { 3223281494Sandrew if (mpte != NULL) { 3224281494Sandrew mpte->wire_count--; 3225281494Sandrew mpte = NULL; 3226281494Sandrew } 3227281494Sandrew return (mpte); 3228281494Sandrew } 3229281494Sandrew 3230281494Sandrew /* 3231281494Sandrew * Enter on the PV list if part of our managed memory. 3232281494Sandrew */ 3233281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0 && 3234281494Sandrew !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 3235281494Sandrew if (mpte != NULL) { 3236281494Sandrew SLIST_INIT(&free); 3237281494Sandrew if (pmap_unwire_l3(pmap, va, mpte, &free)) { 3238281494Sandrew pmap_invalidate_page(pmap, va); 3239281494Sandrew pmap_free_zero_pages(&free); 3240281494Sandrew } 3241281494Sandrew mpte = NULL; 3242281494Sandrew } 3243281494Sandrew return (mpte); 3244281494Sandrew } 3245281494Sandrew 3246281494Sandrew /* 3247281494Sandrew * Increment counters 3248281494Sandrew */ 3249281494Sandrew pmap_resident_count_inc(pmap, 1); 3250281494Sandrew 3251285537Sandrew pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 3252305882Sandrew ATTR_AP(ATTR_AP_RO) | L3_PAGE; 3253319203Sandrew if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 3254319203Sandrew pa |= ATTR_XN; 3255319203Sandrew else if (va < VM_MAXUSER_ADDRESS) 3256319203Sandrew pa |= ATTR_PXN; 3257281494Sandrew 3258281494Sandrew /* 3259281494Sandrew * Now validate mapping with RO protection 3260281494Sandrew */ 3261281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0) 3262281494Sandrew pa |= ATTR_SW_MANAGED; 3263281494Sandrew pmap_load_store(l3, pa); 3264281494Sandrew PTE_SYNC(l3); 3265281494Sandrew pmap_invalidate_page(pmap, va); 3266281494Sandrew return (mpte); 3267281494Sandrew} 3268281494Sandrew 3269281494Sandrew/* 3270281494Sandrew * This code maps large physical mmap regions into the 3271281494Sandrew * processor address space. Note that some shortcuts 3272281494Sandrew * are taken, but the code works. 3273281494Sandrew */ 3274281494Sandrewvoid 3275281494Sandrewpmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 3276281494Sandrew vm_pindex_t pindex, vm_size_t size) 3277281494Sandrew{ 3278281494Sandrew 3279281846Sandrew VM_OBJECT_ASSERT_WLOCKED(object); 3280281846Sandrew KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3281281846Sandrew ("pmap_object_init_pt: non-device object")); 3282281494Sandrew} 3283281494Sandrew 3284281494Sandrew/* 3285281494Sandrew * Clear the wired attribute from the mappings for the specified range of 3286281494Sandrew * addresses in the given pmap. Every valid mapping within that range 3287281494Sandrew * must have the wired attribute set. In contrast, invalid mappings 3288281494Sandrew * cannot have the wired attribute set, so they are ignored. 3289281494Sandrew * 3290281494Sandrew * The wired attribute of the page table entry is not a hardware feature, 3291281494Sandrew * so there is no need to invalidate any TLB entries. 3292281494Sandrew */ 3293281494Sandrewvoid 3294281494Sandrewpmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3295281494Sandrew{ 3296281494Sandrew vm_offset_t va_next; 3297297446Sandrew pd_entry_t *l0, *l1, *l2; 3298281494Sandrew pt_entry_t *l3; 3299281494Sandrew 3300281494Sandrew PMAP_LOCK(pmap); 3301281494Sandrew for (; sva < eva; sva = va_next) { 3302297446Sandrew l0 = pmap_l0(pmap, sva); 3303297446Sandrew if (pmap_load(l0) == 0) { 3304297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 3305297446Sandrew if (va_next < sva) 3306297446Sandrew va_next = eva; 3307297446Sandrew continue; 3308297446Sandrew } 3309297446Sandrew 3310297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 3311285045Sandrew if (pmap_load(l1) == 0) { 3312281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 3313281494Sandrew if (va_next < sva) 3314281494Sandrew va_next = eva; 3315281494Sandrew continue; 3316281494Sandrew } 3317281494Sandrew 3318281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 3319281494Sandrew if (va_next < sva) 3320281494Sandrew va_next = eva; 3321281494Sandrew 3322281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 3323285045Sandrew if (pmap_load(l2) == 0) 3324281494Sandrew continue; 3325281494Sandrew 3326305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { 3327305882Sandrew l3 = pmap_demote_l2(pmap, l2, sva); 3328305882Sandrew if (l3 == NULL) 3329305882Sandrew continue; 3330305882Sandrew } 3331305882Sandrew KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, 3332305882Sandrew ("pmap_unwire: Invalid l2 entry after demotion")); 3333305882Sandrew 3334281494Sandrew if (va_next > eva) 3335281494Sandrew va_next = eva; 3336281494Sandrew for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 3337281494Sandrew sva += L3_SIZE) { 3338285045Sandrew if (pmap_load(l3) == 0) 3339281494Sandrew continue; 3340285045Sandrew if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) 3341281494Sandrew panic("pmap_unwire: l3 %#jx is missing " 3342288445Sandrew "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); 3343281494Sandrew 3344281494Sandrew /* 3345281494Sandrew * PG_W must be cleared atomically. Although the pmap 3346281494Sandrew * lock synchronizes access to PG_W, another processor 3347281494Sandrew * could be setting PG_M and/or PG_A concurrently. 3348281494Sandrew */ 3349281494Sandrew atomic_clear_long(l3, ATTR_SW_WIRED); 3350281494Sandrew pmap->pm_stats.wired_count--; 3351281494Sandrew } 3352281494Sandrew } 3353281494Sandrew PMAP_UNLOCK(pmap); 3354281494Sandrew} 3355281494Sandrew 3356281494Sandrew/* 3357281494Sandrew * Copy the range specified by src_addr/len 3358281494Sandrew * from the source map to the range dst_addr/len 3359281494Sandrew * in the destination map. 3360281494Sandrew * 3361281494Sandrew * This routine is only advisory and need not do anything. 3362281494Sandrew */ 3363281494Sandrew 3364281494Sandrewvoid 3365281494Sandrewpmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3366281494Sandrew vm_offset_t src_addr) 3367281494Sandrew{ 3368281494Sandrew} 3369281494Sandrew 3370281494Sandrew/* 3371281494Sandrew * pmap_zero_page zeros the specified hardware page by mapping 3372281494Sandrew * the page into KVM and using bzero to clear its contents. 3373281494Sandrew */ 3374281494Sandrewvoid 3375281494Sandrewpmap_zero_page(vm_page_t m) 3376281494Sandrew{ 3377281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3378281494Sandrew 3379281494Sandrew pagezero((void *)va); 3380281494Sandrew} 3381281494Sandrew 3382281494Sandrew/* 3383305531Sandrew * pmap_zero_page_area zeros the specified hardware page by mapping 3384281494Sandrew * the page into KVM and using bzero to clear its contents. 3385281494Sandrew * 3386281494Sandrew * off and size may not cover an area beyond a single hardware page. 3387281494Sandrew */ 3388281494Sandrewvoid 3389281494Sandrewpmap_zero_page_area(vm_page_t m, int off, int size) 3390281494Sandrew{ 3391281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3392281494Sandrew 3393281494Sandrew if (off == 0 && size == PAGE_SIZE) 3394281494Sandrew pagezero((void *)va); 3395281494Sandrew else 3396281494Sandrew bzero((char *)va + off, size); 3397281494Sandrew} 3398281494Sandrew 3399281494Sandrew/* 3400305531Sandrew * pmap_zero_page_idle zeros the specified hardware page by mapping 3401281494Sandrew * the page into KVM and using bzero to clear its contents. This 3402281494Sandrew * is intended to be called from the vm_pagezero process only and 3403281494Sandrew * outside of Giant. 3404281494Sandrew */ 3405281494Sandrewvoid 3406281494Sandrewpmap_zero_page_idle(vm_page_t m) 3407281494Sandrew{ 3408281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3409281494Sandrew 3410281494Sandrew pagezero((void *)va); 3411281494Sandrew} 3412281494Sandrew 3413281494Sandrew/* 3414281494Sandrew * pmap_copy_page copies the specified (machine independent) 3415281494Sandrew * page by mapping the page into virtual memory and using 3416281494Sandrew * bcopy to copy the page, one machine dependent page at a 3417281494Sandrew * time. 3418281494Sandrew */ 3419281494Sandrewvoid 3420281494Sandrewpmap_copy_page(vm_page_t msrc, vm_page_t mdst) 3421281494Sandrew{ 3422281494Sandrew vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 3423281494Sandrew vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 3424281494Sandrew 3425281494Sandrew pagecopy((void *)src, (void *)dst); 3426281494Sandrew} 3427281494Sandrew 3428281494Sandrewint unmapped_buf_allowed = 1; 3429281494Sandrew 3430281494Sandrewvoid 3431281494Sandrewpmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 3432281494Sandrew vm_offset_t b_offset, int xfersize) 3433281494Sandrew{ 3434281494Sandrew void *a_cp, *b_cp; 3435281494Sandrew vm_page_t m_a, m_b; 3436281494Sandrew vm_paddr_t p_a, p_b; 3437281494Sandrew vm_offset_t a_pg_offset, b_pg_offset; 3438281494Sandrew int cnt; 3439281494Sandrew 3440281494Sandrew while (xfersize > 0) { 3441281494Sandrew a_pg_offset = a_offset & PAGE_MASK; 3442281494Sandrew m_a = ma[a_offset >> PAGE_SHIFT]; 3443281494Sandrew p_a = m_a->phys_addr; 3444281494Sandrew b_pg_offset = b_offset & PAGE_MASK; 3445281494Sandrew m_b = mb[b_offset >> PAGE_SHIFT]; 3446281494Sandrew p_b = m_b->phys_addr; 3447281494Sandrew cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 3448281494Sandrew cnt = min(cnt, PAGE_SIZE - b_pg_offset); 3449281494Sandrew if (__predict_false(!PHYS_IN_DMAP(p_a))) { 3450281494Sandrew panic("!DMAP a %lx", p_a); 3451281494Sandrew } else { 3452281494Sandrew a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 3453281494Sandrew } 3454281494Sandrew if (__predict_false(!PHYS_IN_DMAP(p_b))) { 3455281494Sandrew panic("!DMAP b %lx", p_b); 3456281494Sandrew } else { 3457281494Sandrew b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 3458281494Sandrew } 3459281494Sandrew bcopy(a_cp, b_cp, cnt); 3460281494Sandrew a_offset += cnt; 3461281494Sandrew b_offset += cnt; 3462281494Sandrew xfersize -= cnt; 3463281494Sandrew } 3464281494Sandrew} 3465281494Sandrew 3466286296Sjahvm_offset_t 3467286296Sjahpmap_quick_enter_page(vm_page_t m) 3468286296Sjah{ 3469286296Sjah 3470286296Sjah return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 3471286296Sjah} 3472286296Sjah 3473286296Sjahvoid 3474286296Sjahpmap_quick_remove_page(vm_offset_t addr) 3475286296Sjah{ 3476286296Sjah} 3477286296Sjah 3478281494Sandrew/* 3479281494Sandrew * Returns true if the pmap's pv is one of the first 3480281494Sandrew * 16 pvs linked to from this page. This count may 3481281494Sandrew * be changed upwards or downwards in the future; it 3482281494Sandrew * is only necessary that true be returned for a small 3483281494Sandrew * subset of pmaps for proper page aging. 3484281494Sandrew */ 3485281494Sandrewboolean_t 3486281494Sandrewpmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3487281494Sandrew{ 3488305882Sandrew struct md_page *pvh; 3489281494Sandrew struct rwlock *lock; 3490281494Sandrew pv_entry_t pv; 3491281494Sandrew int loops = 0; 3492281494Sandrew boolean_t rv; 3493281494Sandrew 3494281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3495281494Sandrew ("pmap_page_exists_quick: page %p is not managed", m)); 3496281494Sandrew rv = FALSE; 3497281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3498281494Sandrew rw_rlock(lock); 3499281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3500281494Sandrew if (PV_PMAP(pv) == pmap) { 3501281494Sandrew rv = TRUE; 3502281494Sandrew break; 3503281494Sandrew } 3504281494Sandrew loops++; 3505281494Sandrew if (loops >= 16) 3506281494Sandrew break; 3507281494Sandrew } 3508305882Sandrew if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 3509305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3510305882Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3511305882Sandrew if (PV_PMAP(pv) == pmap) { 3512305882Sandrew rv = TRUE; 3513305882Sandrew break; 3514305882Sandrew } 3515305882Sandrew loops++; 3516305882Sandrew if (loops >= 16) 3517305882Sandrew break; 3518305882Sandrew } 3519305882Sandrew } 3520281494Sandrew rw_runlock(lock); 3521281494Sandrew return (rv); 3522281494Sandrew} 3523281494Sandrew 3524281494Sandrew/* 3525281494Sandrew * pmap_page_wired_mappings: 3526281494Sandrew * 3527281494Sandrew * Return the number of managed mappings to the given physical page 3528281494Sandrew * that are wired. 3529281494Sandrew */ 3530281494Sandrewint 3531281494Sandrewpmap_page_wired_mappings(vm_page_t m) 3532281494Sandrew{ 3533281494Sandrew struct rwlock *lock; 3534305882Sandrew struct md_page *pvh; 3535281494Sandrew pmap_t pmap; 3536297446Sandrew pt_entry_t *pte; 3537281494Sandrew pv_entry_t pv; 3538305882Sandrew int count, lvl, md_gen, pvh_gen; 3539281494Sandrew 3540281494Sandrew if ((m->oflags & VPO_UNMANAGED) != 0) 3541281494Sandrew return (0); 3542281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3543281494Sandrew rw_rlock(lock); 3544281494Sandrewrestart: 3545281494Sandrew count = 0; 3546281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3547281494Sandrew pmap = PV_PMAP(pv); 3548281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3549281494Sandrew md_gen = m->md.pv_gen; 3550281494Sandrew rw_runlock(lock); 3551281494Sandrew PMAP_LOCK(pmap); 3552281494Sandrew rw_rlock(lock); 3553281494Sandrew if (md_gen != m->md.pv_gen) { 3554281494Sandrew PMAP_UNLOCK(pmap); 3555281494Sandrew goto restart; 3556281494Sandrew } 3557281494Sandrew } 3558297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3559297446Sandrew if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) 3560281494Sandrew count++; 3561281494Sandrew PMAP_UNLOCK(pmap); 3562281494Sandrew } 3563305882Sandrew if ((m->flags & PG_FICTITIOUS) == 0) { 3564305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3565305882Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3566305882Sandrew pmap = PV_PMAP(pv); 3567305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 3568305882Sandrew md_gen = m->md.pv_gen; 3569305882Sandrew pvh_gen = pvh->pv_gen; 3570305882Sandrew rw_runlock(lock); 3571305882Sandrew PMAP_LOCK(pmap); 3572305882Sandrew rw_rlock(lock); 3573305882Sandrew if (md_gen != m->md.pv_gen || 3574305882Sandrew pvh_gen != pvh->pv_gen) { 3575305882Sandrew PMAP_UNLOCK(pmap); 3576305882Sandrew goto restart; 3577305882Sandrew } 3578305882Sandrew } 3579305882Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3580305882Sandrew if (pte != NULL && 3581305882Sandrew (pmap_load(pte) & ATTR_SW_WIRED) != 0) 3582305882Sandrew count++; 3583305882Sandrew PMAP_UNLOCK(pmap); 3584305882Sandrew } 3585305882Sandrew } 3586281494Sandrew rw_runlock(lock); 3587281494Sandrew return (count); 3588281494Sandrew} 3589281494Sandrew 3590281494Sandrew/* 3591281494Sandrew * Destroy all managed, non-wired mappings in the given user-space 3592281494Sandrew * pmap. This pmap cannot be active on any processor besides the 3593281494Sandrew * caller. 3594305531Sandrew * 3595281494Sandrew * This function cannot be applied to the kernel pmap. Moreover, it 3596281494Sandrew * is not intended for general use. It is only to be used during 3597281494Sandrew * process termination. Consequently, it can be implemented in ways 3598281494Sandrew * that make it faster than pmap_remove(). First, it can more quickly 3599281494Sandrew * destroy mappings by iterating over the pmap's collection of PV 3600281494Sandrew * entries, rather than searching the page table. Second, it doesn't 3601281494Sandrew * have to test and clear the page table entries atomically, because 3602281494Sandrew * no processor is currently accessing the user address space. In 3603281494Sandrew * particular, a page table entry's dirty bit won't change state once 3604281494Sandrew * this function starts. 3605281494Sandrew */ 3606281494Sandrewvoid 3607281494Sandrewpmap_remove_pages(pmap_t pmap) 3608281494Sandrew{ 3609297446Sandrew pd_entry_t *pde; 3610297446Sandrew pt_entry_t *pte, tpte; 3611281494Sandrew struct spglist free; 3612305882Sandrew vm_page_t m, ml3, mt; 3613281494Sandrew pv_entry_t pv; 3614305882Sandrew struct md_page *pvh; 3615281494Sandrew struct pv_chunk *pc, *npc; 3616281494Sandrew struct rwlock *lock; 3617281494Sandrew int64_t bit; 3618281494Sandrew uint64_t inuse, bitmask; 3619297446Sandrew int allfree, field, freed, idx, lvl; 3620281494Sandrew vm_paddr_t pa; 3621281494Sandrew 3622281494Sandrew lock = NULL; 3623281494Sandrew 3624281494Sandrew SLIST_INIT(&free); 3625281494Sandrew PMAP_LOCK(pmap); 3626281494Sandrew TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 3627281494Sandrew allfree = 1; 3628281494Sandrew freed = 0; 3629281494Sandrew for (field = 0; field < _NPCM; field++) { 3630281494Sandrew inuse = ~pc->pc_map[field] & pc_freemask[field]; 3631281494Sandrew while (inuse != 0) { 3632281494Sandrew bit = ffsl(inuse) - 1; 3633281494Sandrew bitmask = 1UL << bit; 3634281494Sandrew idx = field * 64 + bit; 3635281494Sandrew pv = &pc->pc_pventry[idx]; 3636281494Sandrew inuse &= ~bitmask; 3637281494Sandrew 3638297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 3639297446Sandrew KASSERT(pde != NULL, 3640297446Sandrew ("Attempting to remove an unmapped page")); 3641281494Sandrew 3642305882Sandrew switch(lvl) { 3643305882Sandrew case 1: 3644305882Sandrew pte = pmap_l1_to_l2(pde, pv->pv_va); 3645305882Sandrew tpte = pmap_load(pte); 3646305882Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == 3647305882Sandrew L2_BLOCK, 3648305882Sandrew ("Attempting to remove an invalid " 3649305882Sandrew "block: %lx", tpte)); 3650305882Sandrew tpte = pmap_load(pte); 3651305882Sandrew break; 3652305882Sandrew case 2: 3653305882Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 3654305882Sandrew tpte = pmap_load(pte); 3655305882Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == 3656305882Sandrew L3_PAGE, 3657305882Sandrew ("Attempting to remove an invalid " 3658305882Sandrew "page: %lx", tpte)); 3659305882Sandrew break; 3660305882Sandrew default: 3661305882Sandrew panic( 3662305882Sandrew "Invalid page directory level: %d", 3663305882Sandrew lvl); 3664305882Sandrew } 3665297446Sandrew 3666281494Sandrew/* 3667281494Sandrew * We cannot remove wired pages from a process' mapping at this time 3668281494Sandrew */ 3669297446Sandrew if (tpte & ATTR_SW_WIRED) { 3670281494Sandrew allfree = 0; 3671281494Sandrew continue; 3672281494Sandrew } 3673281494Sandrew 3674297446Sandrew pa = tpte & ~ATTR_MASK; 3675281494Sandrew 3676281494Sandrew m = PHYS_TO_VM_PAGE(pa); 3677281494Sandrew KASSERT(m->phys_addr == pa, 3678281494Sandrew ("vm_page_t %p phys_addr mismatch %016jx %016jx", 3679281494Sandrew m, (uintmax_t)m->phys_addr, 3680297446Sandrew (uintmax_t)tpte)); 3681281494Sandrew 3682281494Sandrew KASSERT((m->flags & PG_FICTITIOUS) != 0 || 3683281494Sandrew m < &vm_page_array[vm_page_array_size], 3684297446Sandrew ("pmap_remove_pages: bad pte %#jx", 3685297446Sandrew (uintmax_t)tpte)); 3686281494Sandrew 3687305882Sandrew if (pmap_is_current(pmap)) { 3688305882Sandrew if (lvl == 2 && 3689305882Sandrew pmap_l3_valid_cacheable(tpte)) { 3690305882Sandrew cpu_dcache_wb_range(pv->pv_va, 3691305882Sandrew L3_SIZE); 3692305882Sandrew } else if (lvl == 1 && 3693305882Sandrew pmap_pte_valid_cacheable(tpte)) { 3694305882Sandrew cpu_dcache_wb_range(pv->pv_va, 3695305882Sandrew L2_SIZE); 3696305882Sandrew } 3697305882Sandrew } 3698297446Sandrew pmap_load_clear(pte); 3699297446Sandrew PTE_SYNC(pte); 3700285212Sandrew pmap_invalidate_page(pmap, pv->pv_va); 3701281494Sandrew 3702281494Sandrew /* 3703281494Sandrew * Update the vm_page_t clean/reference bits. 3704281494Sandrew */ 3705305882Sandrew if ((tpte & ATTR_AP_RW_BIT) == 3706305882Sandrew ATTR_AP(ATTR_AP_RW)) { 3707305882Sandrew switch (lvl) { 3708305882Sandrew case 1: 3709305882Sandrew for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 3710305882Sandrew vm_page_dirty(m); 3711305882Sandrew break; 3712305882Sandrew case 2: 3713305882Sandrew vm_page_dirty(m); 3714305882Sandrew break; 3715305882Sandrew } 3716305882Sandrew } 3717281494Sandrew 3718281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 3719281494Sandrew 3720281494Sandrew /* Mark free */ 3721281494Sandrew pc->pc_map[field] |= bitmask; 3722305882Sandrew switch (lvl) { 3723305882Sandrew case 1: 3724305882Sandrew pmap_resident_count_dec(pmap, 3725305882Sandrew L2_SIZE / PAGE_SIZE); 3726305882Sandrew pvh = pa_to_pvh(tpte & ~ATTR_MASK); 3727305882Sandrew TAILQ_REMOVE(&pvh->pv_list, pv,pv_next); 3728305882Sandrew pvh->pv_gen++; 3729305882Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) { 3730305882Sandrew for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 3731305882Sandrew if ((mt->aflags & PGA_WRITEABLE) != 0 && 3732305882Sandrew TAILQ_EMPTY(&mt->md.pv_list)) 3733305882Sandrew vm_page_aflag_clear(mt, PGA_WRITEABLE); 3734305882Sandrew } 3735318716Smarkj ml3 = pmap_remove_pt_page(pmap, 3736305882Sandrew pv->pv_va); 3737305882Sandrew if (ml3 != NULL) { 3738305882Sandrew pmap_resident_count_dec(pmap,1); 3739305882Sandrew KASSERT(ml3->wire_count == NL3PG, 3740305882Sandrew ("pmap_remove_pages: l3 page wire count error")); 3741305882Sandrew ml3->wire_count = 0; 3742305882Sandrew pmap_add_delayed_free_list(ml3, 3743305882Sandrew &free, FALSE); 3744305882Sandrew atomic_subtract_int( 3745305882Sandrew &vm_cnt.v_wire_count, 1); 3746305882Sandrew } 3747305882Sandrew break; 3748305882Sandrew case 2: 3749305882Sandrew pmap_resident_count_dec(pmap, 1); 3750305882Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, 3751305882Sandrew pv_next); 3752305882Sandrew m->md.pv_gen++; 3753305882Sandrew if ((m->aflags & PGA_WRITEABLE) != 0 && 3754305882Sandrew TAILQ_EMPTY(&m->md.pv_list) && 3755305882Sandrew (m->flags & PG_FICTITIOUS) == 0) { 3756305882Sandrew pvh = pa_to_pvh( 3757305882Sandrew VM_PAGE_TO_PHYS(m)); 3758305882Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) 3759305882Sandrew vm_page_aflag_clear(m, 3760305882Sandrew PGA_WRITEABLE); 3761305882Sandrew } 3762305882Sandrew break; 3763305882Sandrew } 3764297446Sandrew pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde), 3765297446Sandrew &free); 3766281494Sandrew freed++; 3767281494Sandrew } 3768281494Sandrew } 3769281494Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 3770281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 3771281494Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 3772281494Sandrew if (allfree) { 3773281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3774281494Sandrew free_pv_chunk(pc); 3775281494Sandrew } 3776281494Sandrew } 3777281494Sandrew pmap_invalidate_all(pmap); 3778281494Sandrew if (lock != NULL) 3779281494Sandrew rw_wunlock(lock); 3780281494Sandrew PMAP_UNLOCK(pmap); 3781281494Sandrew pmap_free_zero_pages(&free); 3782281494Sandrew} 3783281494Sandrew 3784281494Sandrew/* 3785281494Sandrew * This is used to check if a page has been accessed or modified. As we 3786281494Sandrew * don't have a bit to see if it has been modified we have to assume it 3787281494Sandrew * has been if the page is read/write. 3788281494Sandrew */ 3789281494Sandrewstatic boolean_t 3790281494Sandrewpmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 3791281494Sandrew{ 3792281494Sandrew struct rwlock *lock; 3793281494Sandrew pv_entry_t pv; 3794305882Sandrew struct md_page *pvh; 3795297446Sandrew pt_entry_t *pte, mask, value; 3796281494Sandrew pmap_t pmap; 3797305882Sandrew int lvl, md_gen, pvh_gen; 3798281494Sandrew boolean_t rv; 3799281494Sandrew 3800281494Sandrew rv = FALSE; 3801281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3802281494Sandrew rw_rlock(lock); 3803281494Sandrewrestart: 3804281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3805281494Sandrew pmap = PV_PMAP(pv); 3806281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3807281494Sandrew md_gen = m->md.pv_gen; 3808281494Sandrew rw_runlock(lock); 3809281494Sandrew PMAP_LOCK(pmap); 3810281494Sandrew rw_rlock(lock); 3811281494Sandrew if (md_gen != m->md.pv_gen) { 3812281494Sandrew PMAP_UNLOCK(pmap); 3813281494Sandrew goto restart; 3814281494Sandrew } 3815281494Sandrew } 3816297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3817297446Sandrew KASSERT(lvl == 3, 3818297446Sandrew ("pmap_page_test_mappings: Invalid level %d", lvl)); 3819281494Sandrew mask = 0; 3820281494Sandrew value = 0; 3821281494Sandrew if (modified) { 3822281494Sandrew mask |= ATTR_AP_RW_BIT; 3823281494Sandrew value |= ATTR_AP(ATTR_AP_RW); 3824281494Sandrew } 3825281494Sandrew if (accessed) { 3826281494Sandrew mask |= ATTR_AF | ATTR_DESCR_MASK; 3827281494Sandrew value |= ATTR_AF | L3_PAGE; 3828281494Sandrew } 3829297446Sandrew rv = (pmap_load(pte) & mask) == value; 3830281494Sandrew PMAP_UNLOCK(pmap); 3831281494Sandrew if (rv) 3832281494Sandrew goto out; 3833281494Sandrew } 3834305882Sandrew if ((m->flags & PG_FICTITIOUS) == 0) { 3835305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3836305882Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3837305882Sandrew pmap = PV_PMAP(pv); 3838305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 3839305882Sandrew md_gen = m->md.pv_gen; 3840305882Sandrew pvh_gen = pvh->pv_gen; 3841305882Sandrew rw_runlock(lock); 3842305882Sandrew PMAP_LOCK(pmap); 3843305882Sandrew rw_rlock(lock); 3844305882Sandrew if (md_gen != m->md.pv_gen || 3845305882Sandrew pvh_gen != pvh->pv_gen) { 3846305882Sandrew PMAP_UNLOCK(pmap); 3847305882Sandrew goto restart; 3848305882Sandrew } 3849305882Sandrew } 3850305882Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3851305882Sandrew KASSERT(lvl == 2, 3852305882Sandrew ("pmap_page_test_mappings: Invalid level %d", lvl)); 3853305882Sandrew mask = 0; 3854305882Sandrew value = 0; 3855305882Sandrew if (modified) { 3856305882Sandrew mask |= ATTR_AP_RW_BIT; 3857305882Sandrew value |= ATTR_AP(ATTR_AP_RW); 3858305882Sandrew } 3859305882Sandrew if (accessed) { 3860305882Sandrew mask |= ATTR_AF | ATTR_DESCR_MASK; 3861305882Sandrew value |= ATTR_AF | L2_BLOCK; 3862305882Sandrew } 3863305882Sandrew rv = (pmap_load(pte) & mask) == value; 3864305882Sandrew PMAP_UNLOCK(pmap); 3865305882Sandrew if (rv) 3866305882Sandrew goto out; 3867305882Sandrew } 3868305882Sandrew } 3869281494Sandrewout: 3870281494Sandrew rw_runlock(lock); 3871281494Sandrew return (rv); 3872281494Sandrew} 3873281494Sandrew 3874281494Sandrew/* 3875281494Sandrew * pmap_is_modified: 3876281494Sandrew * 3877281494Sandrew * Return whether or not the specified physical page was modified 3878281494Sandrew * in any physical maps. 3879281494Sandrew */ 3880281494Sandrewboolean_t 3881281494Sandrewpmap_is_modified(vm_page_t m) 3882281494Sandrew{ 3883281494Sandrew 3884281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3885281494Sandrew ("pmap_is_modified: page %p is not managed", m)); 3886281494Sandrew 3887281494Sandrew /* 3888281494Sandrew * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3889281494Sandrew * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3890281494Sandrew * is clear, no PTEs can have PG_M set. 3891281494Sandrew */ 3892281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 3893281494Sandrew if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3894281494Sandrew return (FALSE); 3895281494Sandrew return (pmap_page_test_mappings(m, FALSE, TRUE)); 3896281494Sandrew} 3897281494Sandrew 3898281494Sandrew/* 3899281494Sandrew * pmap_is_prefaultable: 3900281494Sandrew * 3901281494Sandrew * Return whether or not the specified virtual address is eligible 3902281494Sandrew * for prefault. 3903281494Sandrew */ 3904281494Sandrewboolean_t 3905281494Sandrewpmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3906281494Sandrew{ 3907297446Sandrew pt_entry_t *pte; 3908281494Sandrew boolean_t rv; 3909297446Sandrew int lvl; 3910281494Sandrew 3911281494Sandrew rv = FALSE; 3912281494Sandrew PMAP_LOCK(pmap); 3913297446Sandrew pte = pmap_pte(pmap, addr, &lvl); 3914297446Sandrew if (pte != NULL && pmap_load(pte) != 0) { 3915281494Sandrew rv = TRUE; 3916281494Sandrew } 3917281494Sandrew PMAP_UNLOCK(pmap); 3918281494Sandrew return (rv); 3919281494Sandrew} 3920281494Sandrew 3921281494Sandrew/* 3922281494Sandrew * pmap_is_referenced: 3923281494Sandrew * 3924281494Sandrew * Return whether or not the specified physical page was referenced 3925281494Sandrew * in any physical maps. 3926281494Sandrew */ 3927281494Sandrewboolean_t 3928281494Sandrewpmap_is_referenced(vm_page_t m) 3929281494Sandrew{ 3930281494Sandrew 3931281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3932281494Sandrew ("pmap_is_referenced: page %p is not managed", m)); 3933281494Sandrew return (pmap_page_test_mappings(m, TRUE, FALSE)); 3934281494Sandrew} 3935281494Sandrew 3936281494Sandrew/* 3937281494Sandrew * Clear the write and modified bits in each of the given page's mappings. 3938281494Sandrew */ 3939281494Sandrewvoid 3940281494Sandrewpmap_remove_write(vm_page_t m) 3941281494Sandrew{ 3942305882Sandrew struct md_page *pvh; 3943281494Sandrew pmap_t pmap; 3944281494Sandrew struct rwlock *lock; 3945305882Sandrew pv_entry_t next_pv, pv; 3946297446Sandrew pt_entry_t oldpte, *pte; 3947305882Sandrew vm_offset_t va; 3948305882Sandrew int lvl, md_gen, pvh_gen; 3949281494Sandrew 3950281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3951281494Sandrew ("pmap_remove_write: page %p is not managed", m)); 3952281494Sandrew 3953281494Sandrew /* 3954281494Sandrew * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3955281494Sandrew * set by another thread while the object is locked. Thus, 3956281494Sandrew * if PGA_WRITEABLE is clear, no page table entries need updating. 3957281494Sandrew */ 3958281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 3959281494Sandrew if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3960281494Sandrew return; 3961281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3962305882Sandrew pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : 3963305882Sandrew pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3964281494Sandrewretry_pv_loop: 3965281494Sandrew rw_wlock(lock); 3966305882Sandrew TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { 3967305882Sandrew pmap = PV_PMAP(pv); 3968305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 3969305882Sandrew pvh_gen = pvh->pv_gen; 3970305882Sandrew rw_wunlock(lock); 3971305882Sandrew PMAP_LOCK(pmap); 3972305882Sandrew rw_wlock(lock); 3973305882Sandrew if (pvh_gen != pvh->pv_gen) { 3974305882Sandrew PMAP_UNLOCK(pmap); 3975305882Sandrew rw_wunlock(lock); 3976305882Sandrew goto retry_pv_loop; 3977305882Sandrew } 3978305882Sandrew } 3979305882Sandrew va = pv->pv_va; 3980305882Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3981305882Sandrew if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 3982305882Sandrew pmap_demote_l2_locked(pmap, pte, va & ~L2_OFFSET, 3983305882Sandrew &lock); 3984305882Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 3985305882Sandrew ("inconsistent pv lock %p %p for page %p", 3986305882Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 3987305882Sandrew PMAP_UNLOCK(pmap); 3988305882Sandrew } 3989281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3990281494Sandrew pmap = PV_PMAP(pv); 3991281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3992305882Sandrew pvh_gen = pvh->pv_gen; 3993281494Sandrew md_gen = m->md.pv_gen; 3994281494Sandrew rw_wunlock(lock); 3995281494Sandrew PMAP_LOCK(pmap); 3996281494Sandrew rw_wlock(lock); 3997305882Sandrew if (pvh_gen != pvh->pv_gen || 3998305882Sandrew md_gen != m->md.pv_gen) { 3999281494Sandrew PMAP_UNLOCK(pmap); 4000281494Sandrew rw_wunlock(lock); 4001281494Sandrew goto retry_pv_loop; 4002281494Sandrew } 4003281494Sandrew } 4004297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 4005281494Sandrewretry: 4006297446Sandrew oldpte = pmap_load(pte); 4007297446Sandrew if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 4008297446Sandrew if (!atomic_cmpset_long(pte, oldpte, 4009297446Sandrew oldpte | ATTR_AP(ATTR_AP_RO))) 4010281494Sandrew goto retry; 4011297446Sandrew if ((oldpte & ATTR_AF) != 0) 4012281494Sandrew vm_page_dirty(m); 4013281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 4014281494Sandrew } 4015281494Sandrew PMAP_UNLOCK(pmap); 4016281494Sandrew } 4017281494Sandrew rw_wunlock(lock); 4018281494Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 4019281494Sandrew} 4020281494Sandrew 4021281494Sandrewstatic __inline boolean_t 4022281494Sandrewsafe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 4023281494Sandrew{ 4024281494Sandrew 4025281494Sandrew return (FALSE); 4026281494Sandrew} 4027281494Sandrew 4028281494Sandrew#define PMAP_TS_REFERENCED_MAX 5 4029281494Sandrew 4030281494Sandrew/* 4031281494Sandrew * pmap_ts_referenced: 4032281494Sandrew * 4033281494Sandrew * Return a count of reference bits for a page, clearing those bits. 4034281494Sandrew * It is not necessary for every reference bit to be cleared, but it 4035281494Sandrew * is necessary that 0 only be returned when there are truly no 4036281494Sandrew * reference bits set. 4037281494Sandrew * 4038281494Sandrew * XXX: The exact number of bits to check and clear is a matter that 4039281494Sandrew * should be tested and standardized at some point in the future for 4040281494Sandrew * optimal aging of shared pages. 4041281494Sandrew */ 4042281494Sandrewint 4043281494Sandrewpmap_ts_referenced(vm_page_t m) 4044281494Sandrew{ 4045305882Sandrew struct md_page *pvh; 4046281494Sandrew pv_entry_t pv, pvf; 4047281494Sandrew pmap_t pmap; 4048281494Sandrew struct rwlock *lock; 4049297446Sandrew pd_entry_t *pde, tpde; 4050297446Sandrew pt_entry_t *pte, tpte; 4051305882Sandrew pt_entry_t *l3; 4052305882Sandrew vm_offset_t va; 4053281494Sandrew vm_paddr_t pa; 4054305882Sandrew int cleared, md_gen, not_cleared, lvl, pvh_gen; 4055281494Sandrew struct spglist free; 4056305882Sandrew bool demoted; 4057281494Sandrew 4058281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4059281494Sandrew ("pmap_ts_referenced: page %p is not managed", m)); 4060281494Sandrew SLIST_INIT(&free); 4061281494Sandrew cleared = 0; 4062281494Sandrew pa = VM_PAGE_TO_PHYS(m); 4063281494Sandrew lock = PHYS_TO_PV_LIST_LOCK(pa); 4064305882Sandrew pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); 4065281494Sandrew rw_wlock(lock); 4066281494Sandrewretry: 4067281494Sandrew not_cleared = 0; 4068305882Sandrew if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) 4069305882Sandrew goto small_mappings; 4070305882Sandrew pv = pvf; 4071305882Sandrew do { 4072305882Sandrew if (pvf == NULL) 4073305882Sandrew pvf = pv; 4074305882Sandrew pmap = PV_PMAP(pv); 4075305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 4076305882Sandrew pvh_gen = pvh->pv_gen; 4077305882Sandrew rw_wunlock(lock); 4078305882Sandrew PMAP_LOCK(pmap); 4079305882Sandrew rw_wlock(lock); 4080305882Sandrew if (pvh_gen != pvh->pv_gen) { 4081305882Sandrew PMAP_UNLOCK(pmap); 4082305882Sandrew goto retry; 4083305882Sandrew } 4084305882Sandrew } 4085305882Sandrew va = pv->pv_va; 4086305882Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 4087305882Sandrew KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found")); 4088305882Sandrew KASSERT(lvl == 1, 4089305882Sandrew ("pmap_ts_referenced: invalid pde level %d", lvl)); 4090305882Sandrew tpde = pmap_load(pde); 4091305882Sandrew KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE, 4092305882Sandrew ("pmap_ts_referenced: found an invalid l1 table")); 4093305882Sandrew pte = pmap_l1_to_l2(pde, pv->pv_va); 4094305882Sandrew tpte = pmap_load(pte); 4095305882Sandrew if ((tpte & ATTR_AF) != 0) { 4096305882Sandrew /* 4097305882Sandrew * Since this reference bit is shared by 512 4KB 4098305882Sandrew * pages, it should not be cleared every time it is 4099305882Sandrew * tested. Apply a simple "hash" function on the 4100305882Sandrew * physical page number, the virtual superpage number, 4101305882Sandrew * and the pmap address to select one 4KB page out of 4102305882Sandrew * the 512 on which testing the reference bit will 4103305882Sandrew * result in clearing that reference bit. This 4104305882Sandrew * function is designed to avoid the selection of the 4105305882Sandrew * same 4KB page for every 2MB page mapping. 4106305882Sandrew * 4107305882Sandrew * On demotion, a mapping that hasn't been referenced 4108305882Sandrew * is simply destroyed. To avoid the possibility of a 4109305882Sandrew * subsequent page fault on a demoted wired mapping, 4110305882Sandrew * always leave its reference bit set. Moreover, 4111305882Sandrew * since the superpage is wired, the current state of 4112305882Sandrew * its reference bit won't affect page replacement. 4113305882Sandrew */ 4114305882Sandrew if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ 4115305882Sandrew (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && 4116305882Sandrew (tpte & ATTR_SW_WIRED) == 0) { 4117305882Sandrew if (safe_to_clear_referenced(pmap, tpte)) { 4118305882Sandrew /* 4119305882Sandrew * TODO: We don't handle the access 4120305882Sandrew * flag at all. We need to be able 4121305882Sandrew * to set it in the exception handler. 4122305882Sandrew */ 4123305882Sandrew panic("ARM64TODO: " 4124305882Sandrew "safe_to_clear_referenced\n"); 4125305882Sandrew } else if (pmap_demote_l2_locked(pmap, pte, 4126305882Sandrew pv->pv_va, &lock) != NULL) { 4127305882Sandrew demoted = true; 4128305882Sandrew va += VM_PAGE_TO_PHYS(m) - 4129305882Sandrew (tpte & ~ATTR_MASK); 4130305882Sandrew l3 = pmap_l2_to_l3(pte, va); 4131305882Sandrew pmap_remove_l3(pmap, l3, va, 4132305882Sandrew pmap_load(pte), NULL, &lock); 4133305882Sandrew } else 4134305882Sandrew demoted = true; 4135305882Sandrew 4136305882Sandrew if (demoted) { 4137305882Sandrew /* 4138305882Sandrew * The superpage mapping was removed 4139305882Sandrew * entirely and therefore 'pv' is no 4140305882Sandrew * longer valid. 4141305882Sandrew */ 4142305882Sandrew if (pvf == pv) 4143305882Sandrew pvf = NULL; 4144305882Sandrew pv = NULL; 4145305882Sandrew } 4146305882Sandrew cleared++; 4147305882Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4148305882Sandrew ("inconsistent pv lock %p %p for page %p", 4149305882Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4150305882Sandrew } else 4151305882Sandrew not_cleared++; 4152305882Sandrew } 4153305882Sandrew PMAP_UNLOCK(pmap); 4154305882Sandrew /* Rotate the PV list if it has more than one entry. */ 4155305882Sandrew if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 4156305882Sandrew TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 4157305882Sandrew TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 4158305882Sandrew pvh->pv_gen++; 4159305882Sandrew } 4160305882Sandrew if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) 4161305882Sandrew goto out; 4162305882Sandrew } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); 4163305882Sandrewsmall_mappings: 4164281494Sandrew if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 4165281494Sandrew goto out; 4166281494Sandrew pv = pvf; 4167281494Sandrew do { 4168281494Sandrew if (pvf == NULL) 4169281494Sandrew pvf = pv; 4170281494Sandrew pmap = PV_PMAP(pv); 4171281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 4172305882Sandrew pvh_gen = pvh->pv_gen; 4173281494Sandrew md_gen = m->md.pv_gen; 4174281494Sandrew rw_wunlock(lock); 4175281494Sandrew PMAP_LOCK(pmap); 4176281494Sandrew rw_wlock(lock); 4177305882Sandrew if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { 4178281494Sandrew PMAP_UNLOCK(pmap); 4179281494Sandrew goto retry; 4180281494Sandrew } 4181281494Sandrew } 4182297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 4183297446Sandrew KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); 4184297446Sandrew KASSERT(lvl == 2, 4185297446Sandrew ("pmap_ts_referenced: invalid pde level %d", lvl)); 4186297446Sandrew tpde = pmap_load(pde); 4187297446Sandrew KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, 4188281494Sandrew ("pmap_ts_referenced: found an invalid l2 table")); 4189297446Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 4190297446Sandrew tpte = pmap_load(pte); 4191297446Sandrew if ((tpte & ATTR_AF) != 0) { 4192297446Sandrew if (safe_to_clear_referenced(pmap, tpte)) { 4193281494Sandrew /* 4194281494Sandrew * TODO: We don't handle the access flag 4195281494Sandrew * at all. We need to be able to set it in 4196281494Sandrew * the exception handler. 4197281494Sandrew */ 4198286073Semaste panic("ARM64TODO: safe_to_clear_referenced\n"); 4199297446Sandrew } else if ((tpte & ATTR_SW_WIRED) == 0) { 4200281494Sandrew /* 4201281494Sandrew * Wired pages cannot be paged out so 4202281494Sandrew * doing accessed bit emulation for 4203281494Sandrew * them is wasted effort. We do the 4204281494Sandrew * hard work for unwired pages only. 4205281494Sandrew */ 4206297446Sandrew pmap_remove_l3(pmap, pte, pv->pv_va, tpde, 4207288445Sandrew &free, &lock); 4208281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 4209281494Sandrew cleared++; 4210281494Sandrew if (pvf == pv) 4211281494Sandrew pvf = NULL; 4212281494Sandrew pv = NULL; 4213281494Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4214281494Sandrew ("inconsistent pv lock %p %p for page %p", 4215281494Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4216281494Sandrew } else 4217281494Sandrew not_cleared++; 4218281494Sandrew } 4219281494Sandrew PMAP_UNLOCK(pmap); 4220281494Sandrew /* Rotate the PV list if it has more than one entry. */ 4221281494Sandrew if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 4222281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 4223281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 4224281494Sandrew m->md.pv_gen++; 4225281494Sandrew } 4226281494Sandrew } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 4227281494Sandrew not_cleared < PMAP_TS_REFERENCED_MAX); 4228281494Sandrewout: 4229281494Sandrew rw_wunlock(lock); 4230281494Sandrew pmap_free_zero_pages(&free); 4231281494Sandrew return (cleared + not_cleared); 4232281494Sandrew} 4233281494Sandrew 4234281494Sandrew/* 4235281494Sandrew * Apply the given advice to the specified range of addresses within the 4236281494Sandrew * given pmap. Depending on the advice, clear the referenced and/or 4237281494Sandrew * modified flags in each mapping and set the mapped page's dirty field. 4238281494Sandrew */ 4239281494Sandrewvoid 4240281494Sandrewpmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 4241281494Sandrew{ 4242281494Sandrew} 4243281494Sandrew 4244281494Sandrew/* 4245281494Sandrew * Clear the modify bits on the specified physical page. 4246281494Sandrew */ 4247281494Sandrewvoid 4248281494Sandrewpmap_clear_modify(vm_page_t m) 4249281494Sandrew{ 4250281494Sandrew 4251281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4252281494Sandrew ("pmap_clear_modify: page %p is not managed", m)); 4253281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 4254281494Sandrew KASSERT(!vm_page_xbusied(m), 4255281494Sandrew ("pmap_clear_modify: page %p is exclusive busied", m)); 4256281494Sandrew 4257281494Sandrew /* 4258281494Sandrew * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 4259281494Sandrew * If the object containing the page is locked and the page is not 4260281494Sandrew * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 4261281494Sandrew */ 4262281494Sandrew if ((m->aflags & PGA_WRITEABLE) == 0) 4263281494Sandrew return; 4264281846Sandrew 4265286073Semaste /* ARM64TODO: We lack support for tracking if a page is modified */ 4266281494Sandrew} 4267281494Sandrew 4268282221Sandrewvoid * 4269282221Sandrewpmap_mapbios(vm_paddr_t pa, vm_size_t size) 4270282221Sandrew{ 4271282221Sandrew 4272282221Sandrew return ((void *)PHYS_TO_DMAP(pa)); 4273282221Sandrew} 4274282221Sandrew 4275282221Sandrewvoid 4276282221Sandrewpmap_unmapbios(vm_paddr_t pa, vm_size_t size) 4277282221Sandrew{ 4278282221Sandrew} 4279282221Sandrew 4280281494Sandrew/* 4281281494Sandrew * Sets the memory attribute for the specified page. 4282281494Sandrew */ 4283281494Sandrewvoid 4284281494Sandrewpmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4285281494Sandrew{ 4286281494Sandrew 4287286080Sandrew m->md.pv_memattr = ma; 4288286080Sandrew 4289286080Sandrew /* 4290286080Sandrew * If "m" is a normal page, update its direct mapping. This update 4291286080Sandrew * can be relied upon to perform any cache operations that are 4292286080Sandrew * required for data coherence. 4293286080Sandrew */ 4294286080Sandrew if ((m->flags & PG_FICTITIOUS) == 0 && 4295305882Sandrew pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, 4296305882Sandrew m->md.pv_memattr) != 0) 4297305882Sandrew panic("memory attribute change on the direct map failed"); 4298281494Sandrew} 4299281494Sandrew 4300281494Sandrew/* 4301305882Sandrew * Changes the specified virtual address range's memory type to that given by 4302305882Sandrew * the parameter "mode". The specified virtual address range must be 4303305882Sandrew * completely contained within either the direct map or the kernel map. If 4304305882Sandrew * the virtual address range is contained within the kernel map, then the 4305305882Sandrew * memory type for each of the corresponding ranges of the direct map is also 4306305882Sandrew * changed. (The corresponding ranges of the direct map are those ranges that 4307305882Sandrew * map the same physical pages as the specified virtual address range.) These 4308305882Sandrew * changes to the direct map are necessary because Intel describes the 4309305882Sandrew * behavior of their processors as "undefined" if two or more mappings to the 4310305882Sandrew * same physical page have different memory types. 4311305882Sandrew * 4312305882Sandrew * Returns zero if the change completed successfully, and either EINVAL or 4313305882Sandrew * ENOMEM if the change failed. Specifically, EINVAL is returned if some part 4314305882Sandrew * of the virtual address range was not mapped, and ENOMEM is returned if 4315305882Sandrew * there was insufficient memory available to complete the change. In the 4316305882Sandrew * latter case, the memory type may have been changed on some part of the 4317305882Sandrew * virtual address range or the direct map. 4318305882Sandrew */ 4319305882Sandrewstatic int 4320305882Sandrewpmap_change_attr(vm_offset_t va, vm_size_t size, int mode) 4321305882Sandrew{ 4322305882Sandrew int error; 4323305882Sandrew 4324305882Sandrew PMAP_LOCK(kernel_pmap); 4325305882Sandrew error = pmap_change_attr_locked(va, size, mode); 4326305882Sandrew PMAP_UNLOCK(kernel_pmap); 4327305882Sandrew return (error); 4328305882Sandrew} 4329305882Sandrew 4330305882Sandrewstatic int 4331305882Sandrewpmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) 4332305882Sandrew{ 4333305882Sandrew vm_offset_t base, offset, tmpva; 4334305882Sandrew pt_entry_t l3, *pte, *newpte; 4335305882Sandrew int lvl; 4336305882Sandrew 4337305882Sandrew PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); 4338305882Sandrew base = trunc_page(va); 4339305882Sandrew offset = va & PAGE_MASK; 4340305882Sandrew size = round_page(offset + size); 4341305882Sandrew 4342305882Sandrew if (!VIRT_IN_DMAP(base)) 4343305882Sandrew return (EINVAL); 4344305882Sandrew 4345305882Sandrew for (tmpva = base; tmpva < base + size; ) { 4346305882Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 4347305882Sandrew if (pte == NULL) 4348305882Sandrew return (EINVAL); 4349305882Sandrew 4350305882Sandrew if ((pmap_load(pte) & ATTR_IDX_MASK) == ATTR_IDX(mode)) { 4351305882Sandrew /* 4352305882Sandrew * We already have the correct attribute, 4353305882Sandrew * ignore this entry. 4354305882Sandrew */ 4355305882Sandrew switch (lvl) { 4356305882Sandrew default: 4357305882Sandrew panic("Invalid DMAP table level: %d\n", lvl); 4358305882Sandrew case 1: 4359305882Sandrew tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE; 4360305882Sandrew break; 4361305882Sandrew case 2: 4362305882Sandrew tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE; 4363305882Sandrew break; 4364305882Sandrew case 3: 4365305882Sandrew tmpva += PAGE_SIZE; 4366305882Sandrew break; 4367305882Sandrew } 4368305882Sandrew } else { 4369305882Sandrew /* 4370305882Sandrew * Split the entry to an level 3 table, then 4371305882Sandrew * set the new attribute. 4372305882Sandrew */ 4373305882Sandrew switch (lvl) { 4374305882Sandrew default: 4375305882Sandrew panic("Invalid DMAP table level: %d\n", lvl); 4376305882Sandrew case 1: 4377305882Sandrew newpte = pmap_demote_l1(kernel_pmap, pte, 4378305882Sandrew tmpva & ~L1_OFFSET); 4379305882Sandrew if (newpte == NULL) 4380305882Sandrew return (EINVAL); 4381305882Sandrew pte = pmap_l1_to_l2(pte, tmpva); 4382305882Sandrew case 2: 4383305882Sandrew newpte = pmap_demote_l2(kernel_pmap, pte, 4384305882Sandrew tmpva & ~L2_OFFSET); 4385305882Sandrew if (newpte == NULL) 4386305882Sandrew return (EINVAL); 4387305882Sandrew pte = pmap_l2_to_l3(pte, tmpva); 4388305882Sandrew case 3: 4389305882Sandrew /* Update the entry */ 4390305882Sandrew l3 = pmap_load(pte); 4391305882Sandrew l3 &= ~ATTR_IDX_MASK; 4392305882Sandrew l3 |= ATTR_IDX(mode); 4393319203Sandrew if (mode == DEVICE_MEMORY) 4394319203Sandrew l3 |= ATTR_XN; 4395305882Sandrew 4396305882Sandrew pmap_update_entry(kernel_pmap, pte, l3, tmpva, 4397305882Sandrew PAGE_SIZE); 4398305882Sandrew 4399305882Sandrew /* 4400305882Sandrew * If moving to a non-cacheable entry flush 4401305882Sandrew * the cache. 4402305882Sandrew */ 4403305882Sandrew if (mode == VM_MEMATTR_UNCACHEABLE) 4404305882Sandrew cpu_dcache_wbinv_range(tmpva, L3_SIZE); 4405305882Sandrew 4406305882Sandrew break; 4407305882Sandrew } 4408305882Sandrew tmpva += PAGE_SIZE; 4409305882Sandrew } 4410305882Sandrew } 4411305882Sandrew 4412305882Sandrew return (0); 4413305882Sandrew} 4414305882Sandrew 4415305882Sandrew/* 4416305882Sandrew * Create an L2 table to map all addresses within an L1 mapping. 4417305882Sandrew */ 4418305882Sandrewstatic pt_entry_t * 4419305882Sandrewpmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) 4420305882Sandrew{ 4421305882Sandrew pt_entry_t *l2, newl2, oldl1; 4422305882Sandrew vm_offset_t tmpl1; 4423305882Sandrew vm_paddr_t l2phys, phys; 4424305882Sandrew vm_page_t ml2; 4425305882Sandrew int i; 4426305882Sandrew 4427305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4428305882Sandrew oldl1 = pmap_load(l1); 4429305882Sandrew KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK, 4430305882Sandrew ("pmap_demote_l1: Demoting a non-block entry")); 4431305882Sandrew KASSERT((va & L1_OFFSET) == 0, 4432305882Sandrew ("pmap_demote_l1: Invalid virtual address %#lx", va)); 4433305882Sandrew KASSERT((oldl1 & ATTR_SW_MANAGED) == 0, 4434305882Sandrew ("pmap_demote_l1: Level 1 table shouldn't be managed")); 4435305882Sandrew 4436305882Sandrew tmpl1 = 0; 4437305882Sandrew if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) { 4438305882Sandrew tmpl1 = kva_alloc(PAGE_SIZE); 4439305882Sandrew if (tmpl1 == 0) 4440305882Sandrew return (NULL); 4441305882Sandrew } 4442305882Sandrew 4443305882Sandrew if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | 4444305882Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 4445305882Sandrew CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx" 4446305882Sandrew " in pmap %p", va, pmap); 4447305882Sandrew return (NULL); 4448305882Sandrew } 4449305882Sandrew 4450305882Sandrew l2phys = VM_PAGE_TO_PHYS(ml2); 4451305882Sandrew l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys); 4452305882Sandrew 4453305882Sandrew /* Address the range points at */ 4454305882Sandrew phys = oldl1 & ~ATTR_MASK; 4455305882Sandrew /* The attributed from the old l1 table to be copied */ 4456305882Sandrew newl2 = oldl1 & ATTR_MASK; 4457305882Sandrew 4458305882Sandrew /* Create the new entries */ 4459305882Sandrew for (i = 0; i < Ln_ENTRIES; i++) { 4460305882Sandrew l2[i] = newl2 | phys; 4461305882Sandrew phys += L2_SIZE; 4462305882Sandrew } 4463305882Sandrew cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 4464305882Sandrew KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK), 4465305882Sandrew ("Invalid l2 page (%lx != %lx)", l2[0], 4466305882Sandrew (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK)); 4467305882Sandrew 4468305882Sandrew if (tmpl1 != 0) { 4469305882Sandrew pmap_kenter(tmpl1, PAGE_SIZE, 4470305882Sandrew DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, CACHED_MEMORY); 4471305882Sandrew l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK)); 4472305882Sandrew } 4473305882Sandrew 4474305882Sandrew pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE); 4475305882Sandrew 4476305882Sandrew if (tmpl1 != 0) { 4477305882Sandrew pmap_kremove(tmpl1); 4478305882Sandrew kva_free(tmpl1, PAGE_SIZE); 4479305882Sandrew } 4480305882Sandrew 4481305882Sandrew return (l2); 4482305882Sandrew} 4483305882Sandrew 4484305882Sandrew/* 4485305882Sandrew * Create an L3 table to map all addresses within an L2 mapping. 4486305882Sandrew */ 4487305882Sandrewstatic pt_entry_t * 4488305882Sandrewpmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, 4489305882Sandrew struct rwlock **lockp) 4490305882Sandrew{ 4491305882Sandrew pt_entry_t *l3, newl3, oldl2; 4492305882Sandrew vm_offset_t tmpl2; 4493305882Sandrew vm_paddr_t l3phys, phys; 4494305882Sandrew vm_page_t ml3; 4495305882Sandrew int i; 4496305882Sandrew 4497305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4498305882Sandrew l3 = NULL; 4499305882Sandrew oldl2 = pmap_load(l2); 4500305882Sandrew KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, 4501305882Sandrew ("pmap_demote_l2: Demoting a non-block entry")); 4502305882Sandrew KASSERT((va & L2_OFFSET) == 0, 4503305882Sandrew ("pmap_demote_l2: Invalid virtual address %#lx", va)); 4504305882Sandrew 4505305882Sandrew tmpl2 = 0; 4506305882Sandrew if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) { 4507305882Sandrew tmpl2 = kva_alloc(PAGE_SIZE); 4508305882Sandrew if (tmpl2 == 0) 4509305882Sandrew return (NULL); 4510305882Sandrew } 4511305882Sandrew 4512318716Smarkj if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) { 4513305882Sandrew ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va), 4514305882Sandrew (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | 4515305882Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); 4516305882Sandrew if (ml3 == NULL) { 4517305882Sandrew CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" 4518305882Sandrew " in pmap %p", va, pmap); 4519305882Sandrew goto fail; 4520305882Sandrew } 4521305882Sandrew if (va < VM_MAXUSER_ADDRESS) 4522305882Sandrew pmap_resident_count_inc(pmap, 1); 4523305882Sandrew } 4524305882Sandrew 4525305882Sandrew l3phys = VM_PAGE_TO_PHYS(ml3); 4526305882Sandrew l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys); 4527305882Sandrew 4528305882Sandrew /* Address the range points at */ 4529305882Sandrew phys = oldl2 & ~ATTR_MASK; 4530305882Sandrew /* The attributed from the old l2 table to be copied */ 4531305882Sandrew newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; 4532305882Sandrew 4533305882Sandrew /* 4534305882Sandrew * If the page table page is new, initialize it. 4535305882Sandrew */ 4536305882Sandrew if (ml3->wire_count == 1) { 4537305882Sandrew for (i = 0; i < Ln_ENTRIES; i++) { 4538305882Sandrew l3[i] = newl3 | phys; 4539305882Sandrew phys += L3_SIZE; 4540305882Sandrew } 4541305882Sandrew cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE); 4542305882Sandrew } 4543305882Sandrew KASSERT(l3[0] == ((oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE), 4544305882Sandrew ("Invalid l3 page (%lx != %lx)", l3[0], 4545305882Sandrew (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE)); 4546305882Sandrew 4547305882Sandrew /* 4548305882Sandrew * Map the temporary page so we don't lose access to the l2 table. 4549305882Sandrew */ 4550305882Sandrew if (tmpl2 != 0) { 4551305882Sandrew pmap_kenter(tmpl2, PAGE_SIZE, 4552305882Sandrew DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY); 4553305882Sandrew l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK)); 4554305882Sandrew } 4555305882Sandrew 4556305882Sandrew /* 4557305882Sandrew * The spare PV entries must be reserved prior to demoting the 4558305882Sandrew * mapping, that is, prior to changing the PDE. Otherwise, the state 4559305882Sandrew * of the L2 and the PV lists will be inconsistent, which can result 4560305882Sandrew * in reclaim_pv_chunk() attempting to remove a PV entry from the 4561305882Sandrew * wrong PV list and pmap_pv_demote_l2() failing to find the expected 4562305882Sandrew * PV entry for the 2MB page mapping that is being demoted. 4563305882Sandrew */ 4564305882Sandrew if ((oldl2 & ATTR_SW_MANAGED) != 0) 4565305882Sandrew reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); 4566305882Sandrew 4567305882Sandrew pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE); 4568305882Sandrew 4569305882Sandrew /* 4570305882Sandrew * Demote the PV entry. 4571305882Sandrew */ 4572305882Sandrew if ((oldl2 & ATTR_SW_MANAGED) != 0) 4573305882Sandrew pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp); 4574305882Sandrew 4575305882Sandrew atomic_add_long(&pmap_l2_demotions, 1); 4576305882Sandrew CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx" 4577305882Sandrew " in pmap %p %lx", va, pmap, l3[0]); 4578305882Sandrew 4579305882Sandrewfail: 4580305882Sandrew if (tmpl2 != 0) { 4581305882Sandrew pmap_kremove(tmpl2); 4582305882Sandrew kva_free(tmpl2, PAGE_SIZE); 4583305882Sandrew } 4584305882Sandrew 4585305882Sandrew return (l3); 4586305882Sandrew 4587305882Sandrew} 4588305882Sandrew 4589305882Sandrewstatic pt_entry_t * 4590305882Sandrewpmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) 4591305882Sandrew{ 4592305882Sandrew struct rwlock *lock; 4593305882Sandrew pt_entry_t *l3; 4594305882Sandrew 4595305882Sandrew lock = NULL; 4596305882Sandrew l3 = pmap_demote_l2_locked(pmap, l2, va, &lock); 4597305882Sandrew if (lock != NULL) 4598305882Sandrew rw_wunlock(lock); 4599305882Sandrew return (l3); 4600305882Sandrew} 4601305882Sandrew 4602305882Sandrew/* 4603281494Sandrew * perform the pmap work for mincore 4604281494Sandrew */ 4605281494Sandrewint 4606281494Sandrewpmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 4607281494Sandrew{ 4608287570Sandrew pd_entry_t *l1p, l1; 4609287570Sandrew pd_entry_t *l2p, l2; 4610287570Sandrew pt_entry_t *l3p, l3; 4611287570Sandrew vm_paddr_t pa; 4612287570Sandrew bool managed; 4613287570Sandrew int val; 4614281494Sandrew 4615287570Sandrew PMAP_LOCK(pmap); 4616287570Sandrewretry: 4617287570Sandrew pa = 0; 4618287570Sandrew val = 0; 4619287570Sandrew managed = false; 4620287570Sandrew 4621287570Sandrew l1p = pmap_l1(pmap, addr); 4622287570Sandrew if (l1p == NULL) /* No l1 */ 4623287570Sandrew goto done; 4624295425Swma 4625287570Sandrew l1 = pmap_load(l1p); 4626295425Swma if ((l1 & ATTR_DESCR_MASK) == L1_INVAL) 4627295425Swma goto done; 4628295425Swma 4629287570Sandrew if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) { 4630287570Sandrew pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET); 4631287570Sandrew managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 4632287570Sandrew val = MINCORE_SUPER | MINCORE_INCORE; 4633287570Sandrew if (pmap_page_dirty(l1)) 4634287570Sandrew val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4635287570Sandrew if ((l1 & ATTR_AF) == ATTR_AF) 4636287570Sandrew val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4637287570Sandrew goto done; 4638287570Sandrew } 4639287570Sandrew 4640287570Sandrew l2p = pmap_l1_to_l2(l1p, addr); 4641287570Sandrew if (l2p == NULL) /* No l2 */ 4642287570Sandrew goto done; 4643295425Swma 4644287570Sandrew l2 = pmap_load(l2p); 4645295425Swma if ((l2 & ATTR_DESCR_MASK) == L2_INVAL) 4646295425Swma goto done; 4647295425Swma 4648287570Sandrew if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) { 4649287570Sandrew pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET); 4650287570Sandrew managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 4651287570Sandrew val = MINCORE_SUPER | MINCORE_INCORE; 4652287570Sandrew if (pmap_page_dirty(l2)) 4653287570Sandrew val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4654287570Sandrew if ((l2 & ATTR_AF) == ATTR_AF) 4655287570Sandrew val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4656287570Sandrew goto done; 4657287570Sandrew } 4658287570Sandrew 4659287570Sandrew l3p = pmap_l2_to_l3(l2p, addr); 4660287570Sandrew if (l3p == NULL) /* No l3 */ 4661287570Sandrew goto done; 4662295425Swma 4663287570Sandrew l3 = pmap_load(l2p); 4664295425Swma if ((l3 & ATTR_DESCR_MASK) == L3_INVAL) 4665295425Swma goto done; 4666295425Swma 4667287570Sandrew if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) { 4668287570Sandrew pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET); 4669287570Sandrew managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 4670287570Sandrew val = MINCORE_INCORE; 4671287570Sandrew if (pmap_page_dirty(l3)) 4672287570Sandrew val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4673287570Sandrew if ((l3 & ATTR_AF) == ATTR_AF) 4674287570Sandrew val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4675287570Sandrew } 4676287570Sandrew 4677287570Sandrewdone: 4678287570Sandrew if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 4679287570Sandrew (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 4680287570Sandrew /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 4681287570Sandrew if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 4682287570Sandrew goto retry; 4683287570Sandrew } else 4684287570Sandrew PA_UNLOCK_COND(*locked_pa); 4685287570Sandrew PMAP_UNLOCK(pmap); 4686287570Sandrew 4687287570Sandrew return (val); 4688281494Sandrew} 4689281494Sandrew 4690281494Sandrewvoid 4691281494Sandrewpmap_activate(struct thread *td) 4692281494Sandrew{ 4693281494Sandrew pmap_t pmap; 4694281494Sandrew 4695281494Sandrew critical_enter(); 4696281494Sandrew pmap = vmspace_pmap(td->td_proc->p_vmspace); 4697297446Sandrew td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0); 4698297446Sandrew __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr)); 4699285212Sandrew pmap_invalidate_all(pmap); 4700281494Sandrew critical_exit(); 4701281494Sandrew} 4702281494Sandrew 4703281494Sandrewvoid 4704287105Sandrewpmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 4705281494Sandrew{ 4706281494Sandrew 4707287105Sandrew if (va >= VM_MIN_KERNEL_ADDRESS) { 4708287105Sandrew cpu_icache_sync_range(va, sz); 4709287105Sandrew } else { 4710287105Sandrew u_int len, offset; 4711287105Sandrew vm_paddr_t pa; 4712287105Sandrew 4713287105Sandrew /* Find the length of data in this page to flush */ 4714287105Sandrew offset = va & PAGE_MASK; 4715287105Sandrew len = imin(PAGE_SIZE - offset, sz); 4716287105Sandrew 4717287105Sandrew while (sz != 0) { 4718287105Sandrew /* Extract the physical address & find it in the DMAP */ 4719287105Sandrew pa = pmap_extract(pmap, va); 4720287105Sandrew if (pa != 0) 4721287105Sandrew cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); 4722287105Sandrew 4723287105Sandrew /* Move to the next page */ 4724287105Sandrew sz -= len; 4725287105Sandrew va += len; 4726287105Sandrew /* Set the length for the next iteration */ 4727287105Sandrew len = imin(PAGE_SIZE, sz); 4728287105Sandrew } 4729287105Sandrew } 4730281494Sandrew} 4731281494Sandrew 4732305882Sandrewint 4733305882Sandrewpmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) 4734305882Sandrew{ 4735305882Sandrew#ifdef SMP 4736305882Sandrew uint64_t par; 4737305882Sandrew#endif 4738305882Sandrew 4739305882Sandrew switch (ESR_ELx_EXCEPTION(esr)) { 4740305882Sandrew case EXCP_DATA_ABORT_L: 4741305882Sandrew case EXCP_DATA_ABORT: 4742305882Sandrew break; 4743305882Sandrew default: 4744305882Sandrew return (KERN_FAILURE); 4745305882Sandrew } 4746305882Sandrew 4747305882Sandrew#ifdef SMP 4748305882Sandrew PMAP_LOCK(pmap); 4749305882Sandrew switch (esr & ISS_DATA_DFSC_MASK) { 4750305882Sandrew case ISS_DATA_DFSC_TF_L0: 4751305882Sandrew case ISS_DATA_DFSC_TF_L1: 4752305882Sandrew case ISS_DATA_DFSC_TF_L2: 4753305882Sandrew case ISS_DATA_DFSC_TF_L3: 4754305882Sandrew /* Ask the MMU to check the address */ 4755305882Sandrew if (pmap == kernel_pmap) 4756305882Sandrew par = arm64_address_translate_s1e1r(far); 4757305882Sandrew else 4758305882Sandrew par = arm64_address_translate_s1e0r(far); 4759305882Sandrew 4760305882Sandrew /* 4761305882Sandrew * If the translation was successful the address was invalid 4762305882Sandrew * due to a break-before-make sequence. We can unlock and 4763305882Sandrew * return success to the trap handler. 4764305882Sandrew */ 4765305882Sandrew if (PAR_SUCCESS(par)) { 4766305882Sandrew PMAP_UNLOCK(pmap); 4767305882Sandrew return (KERN_SUCCESS); 4768305882Sandrew } 4769305882Sandrew break; 4770305882Sandrew default: 4771305882Sandrew break; 4772305882Sandrew } 4773305882Sandrew PMAP_UNLOCK(pmap); 4774305882Sandrew#endif 4775305882Sandrew 4776305882Sandrew return (KERN_FAILURE); 4777305882Sandrew} 4778305882Sandrew 4779281494Sandrew/* 4780281494Sandrew * Increase the starting virtual address of the given mapping if a 4781281494Sandrew * different alignment might result in more superpage mappings. 4782281494Sandrew */ 4783281494Sandrewvoid 4784281494Sandrewpmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 4785281494Sandrew vm_offset_t *addr, vm_size_t size) 4786281494Sandrew{ 4787305880Sandrew vm_offset_t superpage_offset; 4788305880Sandrew 4789305880Sandrew if (size < L2_SIZE) 4790305880Sandrew return; 4791305880Sandrew if (object != NULL && (object->flags & OBJ_COLORED) != 0) 4792305880Sandrew offset += ptoa(object->pg_color); 4793305880Sandrew superpage_offset = offset & L2_OFFSET; 4794305880Sandrew if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || 4795305880Sandrew (*addr & L2_OFFSET) == superpage_offset) 4796305880Sandrew return; 4797305880Sandrew if ((*addr & L2_OFFSET) < superpage_offset) 4798305880Sandrew *addr = (*addr & ~L2_OFFSET) + superpage_offset; 4799305880Sandrew else 4800305880Sandrew *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; 4801281494Sandrew} 4802281494Sandrew 4803281494Sandrew/** 4804281494Sandrew * Get the kernel virtual address of a set of physical pages. If there are 4805281494Sandrew * physical addresses not covered by the DMAP perform a transient mapping 4806281494Sandrew * that will be removed when calling pmap_unmap_io_transient. 4807281494Sandrew * 4808281494Sandrew * \param page The pages the caller wishes to obtain the virtual 4809281494Sandrew * address on the kernel memory map. 4810281494Sandrew * \param vaddr On return contains the kernel virtual memory address 4811281494Sandrew * of the pages passed in the page parameter. 4812281494Sandrew * \param count Number of pages passed in. 4813281494Sandrew * \param can_fault TRUE if the thread using the mapped pages can take 4814281494Sandrew * page faults, FALSE otherwise. 4815281494Sandrew * 4816281494Sandrew * \returns TRUE if the caller must call pmap_unmap_io_transient when 4817281494Sandrew * finished or FALSE otherwise. 4818281494Sandrew * 4819281494Sandrew */ 4820281494Sandrewboolean_t 4821281494Sandrewpmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 4822281494Sandrew boolean_t can_fault) 4823281494Sandrew{ 4824281494Sandrew vm_paddr_t paddr; 4825281494Sandrew boolean_t needs_mapping; 4826281494Sandrew int error, i; 4827281494Sandrew 4828281494Sandrew /* 4829281494Sandrew * Allocate any KVA space that we need, this is done in a separate 4830281494Sandrew * loop to prevent calling vmem_alloc while pinned. 4831281494Sandrew */ 4832281494Sandrew needs_mapping = FALSE; 4833281494Sandrew for (i = 0; i < count; i++) { 4834281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 4835297617Sandrew if (__predict_false(!PHYS_IN_DMAP(paddr))) { 4836281494Sandrew error = vmem_alloc(kernel_arena, PAGE_SIZE, 4837281494Sandrew M_BESTFIT | M_WAITOK, &vaddr[i]); 4838281494Sandrew KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 4839281494Sandrew needs_mapping = TRUE; 4840281494Sandrew } else { 4841281494Sandrew vaddr[i] = PHYS_TO_DMAP(paddr); 4842281494Sandrew } 4843281494Sandrew } 4844281494Sandrew 4845281494Sandrew /* Exit early if everything is covered by the DMAP */ 4846281494Sandrew if (!needs_mapping) 4847281494Sandrew return (FALSE); 4848281494Sandrew 4849281494Sandrew if (!can_fault) 4850281494Sandrew sched_pin(); 4851281494Sandrew for (i = 0; i < count; i++) { 4852281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 4853297617Sandrew if (!PHYS_IN_DMAP(paddr)) { 4854281494Sandrew panic( 4855281494Sandrew "pmap_map_io_transient: TODO: Map out of DMAP data"); 4856281494Sandrew } 4857281494Sandrew } 4858281494Sandrew 4859281494Sandrew return (needs_mapping); 4860281494Sandrew} 4861281494Sandrew 4862281494Sandrewvoid 4863281494Sandrewpmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 4864281494Sandrew boolean_t can_fault) 4865281494Sandrew{ 4866281494Sandrew vm_paddr_t paddr; 4867281494Sandrew int i; 4868281494Sandrew 4869281494Sandrew if (!can_fault) 4870281494Sandrew sched_unpin(); 4871281494Sandrew for (i = 0; i < count; i++) { 4872281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 4873297617Sandrew if (!PHYS_IN_DMAP(paddr)) { 4874286073Semaste panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); 4875281494Sandrew } 4876281494Sandrew } 4877281494Sandrew} 4878