1281494Sandrew/*- 2281494Sandrew * Copyright (c) 1991 Regents of the University of California. 3281494Sandrew * All rights reserved. 4281494Sandrew * Copyright (c) 1994 John S. Dyson 5281494Sandrew * All rights reserved. 6281494Sandrew * Copyright (c) 1994 David Greenman 7281494Sandrew * All rights reserved. 8281494Sandrew * Copyright (c) 2003 Peter Wemm 9281494Sandrew * All rights reserved. 10281494Sandrew * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11281494Sandrew * All rights reserved. 12281494Sandrew * Copyright (c) 2014 Andrew Turner 13281494Sandrew * All rights reserved. 14297446Sandrew * Copyright (c) 2014-2016 The FreeBSD Foundation 15281494Sandrew * All rights reserved. 16281494Sandrew * 17281494Sandrew * This code is derived from software contributed to Berkeley by 18281494Sandrew * the Systems Programming Group of the University of Utah Computer 19281494Sandrew * Science Department and William Jolitz of UUNET Technologies Inc. 20281494Sandrew * 21281494Sandrew * This software was developed by Andrew Turner under sponsorship from 22281494Sandrew * the FreeBSD Foundation. 23281494Sandrew * 24281494Sandrew * Redistribution and use in source and binary forms, with or without 25281494Sandrew * modification, are permitted provided that the following conditions 26281494Sandrew * are met: 27281494Sandrew * 1. Redistributions of source code must retain the above copyright 28281494Sandrew * notice, this list of conditions and the following disclaimer. 29281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright 30281494Sandrew * notice, this list of conditions and the following disclaimer in the 31281494Sandrew * documentation and/or other materials provided with the distribution. 32281494Sandrew * 3. All advertising materials mentioning features or use of this software 33281494Sandrew * must display the following acknowledgement: 34281494Sandrew * This product includes software developed by the University of 35281494Sandrew * California, Berkeley and its contributors. 36281494Sandrew * 4. Neither the name of the University nor the names of its contributors 37281494Sandrew * may be used to endorse or promote products derived from this software 38281494Sandrew * without specific prior written permission. 39281494Sandrew * 40281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43281494Sandrew * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50281494Sandrew * SUCH DAMAGE. 51281494Sandrew * 52281494Sandrew * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53281494Sandrew */ 54281494Sandrew/*- 55281494Sandrew * Copyright (c) 2003 Networks Associates Technology, Inc. 56281494Sandrew * All rights reserved. 57281494Sandrew * 58281494Sandrew * This software was developed for the FreeBSD Project by Jake Burkholder, 59281494Sandrew * Safeport Network Services, and Network Associates Laboratories, the 60281494Sandrew * Security Research Division of Network Associates, Inc. under 61281494Sandrew * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62281494Sandrew * CHATS research program. 63281494Sandrew * 64281494Sandrew * Redistribution and use in source and binary forms, with or without 65281494Sandrew * modification, are permitted provided that the following conditions 66281494Sandrew * are met: 67281494Sandrew * 1. Redistributions of source code must retain the above copyright 68281494Sandrew * notice, this list of conditions and the following disclaimer. 69281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright 70281494Sandrew * notice, this list of conditions and the following disclaimer in the 71281494Sandrew * documentation and/or other materials provided with the distribution. 72281494Sandrew * 73281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76281494Sandrew * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83281494Sandrew * SUCH DAMAGE. 84281494Sandrew */ 85281494Sandrew 86281494Sandrew#include <sys/cdefs.h> 87281494Sandrew__FBSDID("$FreeBSD: stable/11/sys/arm64/arm64/pmap.c 338484 2018-09-05 21:28:33Z kib $"); 88281494Sandrew 89281494Sandrew/* 90281494Sandrew * Manages physical address maps. 91281494Sandrew * 92281494Sandrew * Since the information managed by this module is 93281494Sandrew * also stored by the logical address mapping module, 94281494Sandrew * this module may throw away valid virtual-to-physical 95281494Sandrew * mappings at almost any time. However, invalidations 96281494Sandrew * of virtual-to-physical mappings must be done as 97281494Sandrew * requested. 98281494Sandrew * 99281494Sandrew * In order to cope with hardware architectures which 100281494Sandrew * make virtual-to-physical map invalidates expensive, 101281494Sandrew * this module may delay invalidate or reduced protection 102281494Sandrew * operations until such time as they are actually 103281494Sandrew * necessary. This module is given full information as 104281494Sandrew * to which processors are currently using which maps, 105281494Sandrew * and to when physical maps must be made correct. 106281494Sandrew */ 107281494Sandrew 108325238Smarkj#include "opt_vm.h" 109325238Smarkj 110281494Sandrew#include <sys/param.h> 111305882Sandrew#include <sys/bitstring.h> 112281494Sandrew#include <sys/bus.h> 113281494Sandrew#include <sys/systm.h> 114281494Sandrew#include <sys/kernel.h> 115281494Sandrew#include <sys/ktr.h> 116281494Sandrew#include <sys/lock.h> 117281494Sandrew#include <sys/malloc.h> 118281494Sandrew#include <sys/mman.h> 119281494Sandrew#include <sys/msgbuf.h> 120281494Sandrew#include <sys/mutex.h> 121281494Sandrew#include <sys/proc.h> 122281494Sandrew#include <sys/rwlock.h> 123281494Sandrew#include <sys/sx.h> 124281494Sandrew#include <sys/vmem.h> 125281494Sandrew#include <sys/vmmeter.h> 126281494Sandrew#include <sys/sched.h> 127281494Sandrew#include <sys/sysctl.h> 128281494Sandrew#include <sys/_unrhdr.h> 129281494Sandrew#include <sys/smp.h> 130281494Sandrew 131281494Sandrew#include <vm/vm.h> 132281494Sandrew#include <vm/vm_param.h> 133281494Sandrew#include <vm/vm_kern.h> 134281494Sandrew#include <vm/vm_page.h> 135281494Sandrew#include <vm/vm_map.h> 136281494Sandrew#include <vm/vm_object.h> 137281494Sandrew#include <vm/vm_extern.h> 138281494Sandrew#include <vm/vm_pageout.h> 139281494Sandrew#include <vm/vm_pager.h> 140305882Sandrew#include <vm/vm_phys.h> 141281494Sandrew#include <vm/vm_radix.h> 142281494Sandrew#include <vm/vm_reserv.h> 143281494Sandrew#include <vm/uma.h> 144281494Sandrew 145281494Sandrew#include <machine/machdep.h> 146281494Sandrew#include <machine/md_var.h> 147281494Sandrew#include <machine/pcb.h> 148281494Sandrew 149297446Sandrew#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) 150297446Sandrew#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) 151297446Sandrew#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) 152297446Sandrew#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) 153281494Sandrew 154297446Sandrew#define NUL0E L0_ENTRIES 155297446Sandrew#define NUL1E (NUL0E * NL1PG) 156297446Sandrew#define NUL2E (NUL1E * NL2PG) 157297446Sandrew 158281494Sandrew#if !defined(DIAGNOSTIC) 159281494Sandrew#ifdef __GNUC_GNU_INLINE__ 160281494Sandrew#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 161281494Sandrew#else 162281494Sandrew#define PMAP_INLINE extern inline 163281494Sandrew#endif 164281494Sandrew#else 165281494Sandrew#define PMAP_INLINE 166281494Sandrew#endif 167281494Sandrew 168281494Sandrew/* 169281494Sandrew * These are configured by the mair_el1 register. This is set up in locore.S 170281494Sandrew */ 171281494Sandrew#define DEVICE_MEMORY 0 172281494Sandrew#define UNCACHED_MEMORY 1 173281494Sandrew#define CACHED_MEMORY 2 174281494Sandrew 175281494Sandrew 176281494Sandrew#ifdef PV_STATS 177281494Sandrew#define PV_STAT(x) do { x ; } while (0) 178281494Sandrew#else 179281494Sandrew#define PV_STAT(x) do { } while (0) 180281494Sandrew#endif 181281494Sandrew 182281494Sandrew#define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 183305882Sandrew#define pa_to_pvh(pa) (&pv_table[pmap_l2_pindex(pa)]) 184281494Sandrew 185281494Sandrew#define NPV_LIST_LOCKS MAXCPU 186281494Sandrew 187281494Sandrew#define PHYS_TO_PV_LIST_LOCK(pa) \ 188281494Sandrew (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 189281494Sandrew 190281494Sandrew#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 191281494Sandrew struct rwlock **_lockp = (lockp); \ 192281494Sandrew struct rwlock *_new_lock; \ 193281494Sandrew \ 194281494Sandrew _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 195281494Sandrew if (_new_lock != *_lockp) { \ 196281494Sandrew if (*_lockp != NULL) \ 197281494Sandrew rw_wunlock(*_lockp); \ 198281494Sandrew *_lockp = _new_lock; \ 199281494Sandrew rw_wlock(*_lockp); \ 200281494Sandrew } \ 201281494Sandrew} while (0) 202281494Sandrew 203281494Sandrew#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 204281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 205281494Sandrew 206281494Sandrew#define RELEASE_PV_LIST_LOCK(lockp) do { \ 207281494Sandrew struct rwlock **_lockp = (lockp); \ 208281494Sandrew \ 209281494Sandrew if (*_lockp != NULL) { \ 210281494Sandrew rw_wunlock(*_lockp); \ 211281494Sandrew *_lockp = NULL; \ 212281494Sandrew } \ 213281494Sandrew} while (0) 214281494Sandrew 215281494Sandrew#define VM_PAGE_TO_PV_LIST_LOCK(m) \ 216281494Sandrew PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 217281494Sandrew 218281494Sandrewstruct pmap kernel_pmap_store; 219281494Sandrew 220281494Sandrewvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 221281494Sandrewvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 222281494Sandrewvm_offset_t kernel_vm_end = 0; 223281494Sandrew 224281494Sandrewstruct msgbuf *msgbufp = NULL; 225281494Sandrew 226305882Sandrew/* 227305882Sandrew * Data for the pv entry allocation mechanism. 228305882Sandrew * Updates to pv_invl_gen are protected by the pv_list_locks[] 229305882Sandrew * elements, but reads are not. 230305882Sandrew */ 231305882Sandrewstatic struct md_page *pv_table; 232305882Sandrewstatic struct md_page pv_dummy; 233305882Sandrew 234291246Sandrewvm_paddr_t dmap_phys_base; /* The start of the dmap region */ 235297958Sandrewvm_paddr_t dmap_phys_max; /* The limit of the dmap region */ 236297958Sandrewvm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ 237291246Sandrew 238297914Sandrew/* This code assumes all L1 DMAP entries will be used */ 239297914SandrewCTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS); 240297914SandrewCTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); 241297914Sandrew 242297914Sandrew#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) 243297914Sandrewextern pt_entry_t pagetable_dmap[]; 244297914Sandrew 245305882Sandrewstatic SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 246305882Sandrew 247305882Sandrewstatic int superpages_enabled = 0; 248305882SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, 249305882Sandrew CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, 250305882Sandrew "Are large page mappings enabled?"); 251305882Sandrew 252281494Sandrew/* 253281494Sandrew * Data for the pv entry allocation mechanism 254281494Sandrew */ 255281494Sandrewstatic TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 256281494Sandrewstatic struct mtx pv_chunks_mutex; 257281494Sandrewstatic struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 258281494Sandrew 259281494Sandrewstatic void free_pv_chunk(struct pv_chunk *pc); 260281494Sandrewstatic void free_pv_entry(pmap_t pmap, pv_entry_t pv); 261281494Sandrewstatic pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 262281494Sandrewstatic vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 263281494Sandrewstatic void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 264281494Sandrewstatic pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 265281494Sandrew vm_offset_t va); 266305882Sandrew 267305882Sandrewstatic int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode); 268305882Sandrewstatic int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); 269305882Sandrewstatic pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); 270305882Sandrewstatic pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, 271305882Sandrew vm_offset_t va, struct rwlock **lockp); 272305882Sandrewstatic pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); 273281494Sandrewstatic vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 274281494Sandrew vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 275281494Sandrewstatic int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 276281494Sandrew pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 277281494Sandrewstatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 278281494Sandrew vm_page_t m, struct rwlock **lockp); 279281494Sandrew 280281494Sandrewstatic vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 281281494Sandrew struct rwlock **lockp); 282281494Sandrew 283281494Sandrewstatic void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 284281494Sandrew struct spglist *free); 285281494Sandrewstatic int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 286281494Sandrew 287288445Sandrew/* 288288445Sandrew * These load the old table data and store the new value. 289288445Sandrew * They need to be atomic as the System MMU may write to the table at 290288445Sandrew * the same time as the CPU. 291288445Sandrew */ 292288445Sandrew#define pmap_load_store(table, entry) atomic_swap_64(table, entry) 293288445Sandrew#define pmap_set(table, mask) atomic_set_64(table, mask) 294288445Sandrew#define pmap_load_clear(table) atomic_swap_64(table, 0) 295288445Sandrew#define pmap_load(table) (*table) 296288445Sandrew 297281494Sandrew/********************/ 298281494Sandrew/* Inline functions */ 299281494Sandrew/********************/ 300281494Sandrew 301281494Sandrewstatic __inline void 302281494Sandrewpagecopy(void *s, void *d) 303281494Sandrew{ 304281494Sandrew 305281494Sandrew memcpy(d, s, PAGE_SIZE); 306281494Sandrew} 307281494Sandrew 308297446Sandrew#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK) 309281494Sandrew#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 310281494Sandrew#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 311281494Sandrew#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 312281494Sandrew 313281494Sandrewstatic __inline pd_entry_t * 314297446Sandrewpmap_l0(pmap_t pmap, vm_offset_t va) 315297446Sandrew{ 316297446Sandrew 317297446Sandrew return (&pmap->pm_l0[pmap_l0_index(va)]); 318297446Sandrew} 319297446Sandrew 320297446Sandrewstatic __inline pd_entry_t * 321297446Sandrewpmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) 322297446Sandrew{ 323297446Sandrew pd_entry_t *l1; 324297446Sandrew 325297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 326297446Sandrew return (&l1[pmap_l1_index(va)]); 327297446Sandrew} 328297446Sandrew 329297446Sandrewstatic __inline pd_entry_t * 330281494Sandrewpmap_l1(pmap_t pmap, vm_offset_t va) 331281494Sandrew{ 332297446Sandrew pd_entry_t *l0; 333281494Sandrew 334297446Sandrew l0 = pmap_l0(pmap, va); 335297446Sandrew if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) 336297446Sandrew return (NULL); 337297446Sandrew 338297446Sandrew return (pmap_l0_to_l1(l0, va)); 339281494Sandrew} 340281494Sandrew 341281494Sandrewstatic __inline pd_entry_t * 342281494Sandrewpmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 343281494Sandrew{ 344281494Sandrew pd_entry_t *l2; 345281494Sandrew 346288445Sandrew l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 347281494Sandrew return (&l2[pmap_l2_index(va)]); 348281494Sandrew} 349281494Sandrew 350281494Sandrewstatic __inline pd_entry_t * 351281494Sandrewpmap_l2(pmap_t pmap, vm_offset_t va) 352281494Sandrew{ 353281494Sandrew pd_entry_t *l1; 354281494Sandrew 355281494Sandrew l1 = pmap_l1(pmap, va); 356288445Sandrew if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) 357281494Sandrew return (NULL); 358281494Sandrew 359281494Sandrew return (pmap_l1_to_l2(l1, va)); 360281494Sandrew} 361281494Sandrew 362281494Sandrewstatic __inline pt_entry_t * 363281494Sandrewpmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 364281494Sandrew{ 365281494Sandrew pt_entry_t *l3; 366281494Sandrew 367288445Sandrew l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); 368281494Sandrew return (&l3[pmap_l3_index(va)]); 369281494Sandrew} 370281494Sandrew 371297446Sandrew/* 372297446Sandrew * Returns the lowest valid pde for a given virtual address. 373297446Sandrew * The next level may or may not point to a valid page or block. 374297446Sandrew */ 375297446Sandrewstatic __inline pd_entry_t * 376297446Sandrewpmap_pde(pmap_t pmap, vm_offset_t va, int *level) 377297446Sandrew{ 378297446Sandrew pd_entry_t *l0, *l1, *l2, desc; 379297446Sandrew 380297446Sandrew l0 = pmap_l0(pmap, va); 381297446Sandrew desc = pmap_load(l0) & ATTR_DESCR_MASK; 382297446Sandrew if (desc != L0_TABLE) { 383297446Sandrew *level = -1; 384297446Sandrew return (NULL); 385297446Sandrew } 386297446Sandrew 387297446Sandrew l1 = pmap_l0_to_l1(l0, va); 388297446Sandrew desc = pmap_load(l1) & ATTR_DESCR_MASK; 389297446Sandrew if (desc != L1_TABLE) { 390297446Sandrew *level = 0; 391297446Sandrew return (l0); 392297446Sandrew } 393297446Sandrew 394297446Sandrew l2 = pmap_l1_to_l2(l1, va); 395297446Sandrew desc = pmap_load(l2) & ATTR_DESCR_MASK; 396297446Sandrew if (desc != L2_TABLE) { 397297446Sandrew *level = 1; 398297446Sandrew return (l1); 399297446Sandrew } 400297446Sandrew 401297446Sandrew *level = 2; 402297446Sandrew return (l2); 403297446Sandrew} 404297446Sandrew 405297446Sandrew/* 406297446Sandrew * Returns the lowest valid pte block or table entry for a given virtual 407297446Sandrew * address. If there are no valid entries return NULL and set the level to 408297446Sandrew * the first invalid level. 409297446Sandrew */ 410281494Sandrewstatic __inline pt_entry_t * 411297446Sandrewpmap_pte(pmap_t pmap, vm_offset_t va, int *level) 412281494Sandrew{ 413297446Sandrew pd_entry_t *l1, *l2, desc; 414297446Sandrew pt_entry_t *l3; 415281494Sandrew 416297446Sandrew l1 = pmap_l1(pmap, va); 417297446Sandrew if (l1 == NULL) { 418297446Sandrew *level = 0; 419281494Sandrew return (NULL); 420297446Sandrew } 421297446Sandrew desc = pmap_load(l1) & ATTR_DESCR_MASK; 422297446Sandrew if (desc == L1_BLOCK) { 423297446Sandrew *level = 1; 424297446Sandrew return (l1); 425297446Sandrew } 426281494Sandrew 427297446Sandrew if (desc != L1_TABLE) { 428297446Sandrew *level = 1; 429297446Sandrew return (NULL); 430297446Sandrew } 431297446Sandrew 432297446Sandrew l2 = pmap_l1_to_l2(l1, va); 433297446Sandrew desc = pmap_load(l2) & ATTR_DESCR_MASK; 434297446Sandrew if (desc == L2_BLOCK) { 435297446Sandrew *level = 2; 436297446Sandrew return (l2); 437297446Sandrew } 438297446Sandrew 439297446Sandrew if (desc != L2_TABLE) { 440297446Sandrew *level = 2; 441297446Sandrew return (NULL); 442297446Sandrew } 443297446Sandrew 444297446Sandrew *level = 3; 445297446Sandrew l3 = pmap_l2_to_l3(l2, va); 446297446Sandrew if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) 447297446Sandrew return (NULL); 448297446Sandrew 449297446Sandrew return (l3); 450281494Sandrew} 451281494Sandrew 452305882Sandrewstatic inline bool 453305882Sandrewpmap_superpages_enabled(void) 454305882Sandrew{ 455305882Sandrew 456305882Sandrew return (superpages_enabled != 0); 457305882Sandrew} 458305882Sandrew 459286956Sandrewbool 460297446Sandrewpmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, 461297446Sandrew pd_entry_t **l2, pt_entry_t **l3) 462286956Sandrew{ 463297446Sandrew pd_entry_t *l0p, *l1p, *l2p; 464286956Sandrew 465297446Sandrew if (pmap->pm_l0 == NULL) 466286956Sandrew return (false); 467286956Sandrew 468297446Sandrew l0p = pmap_l0(pmap, va); 469297446Sandrew *l0 = l0p; 470297446Sandrew 471297446Sandrew if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) 472297446Sandrew return (false); 473297446Sandrew 474297446Sandrew l1p = pmap_l0_to_l1(l0p, va); 475286956Sandrew *l1 = l1p; 476286956Sandrew 477288445Sandrew if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { 478286956Sandrew *l2 = NULL; 479286956Sandrew *l3 = NULL; 480286956Sandrew return (true); 481286956Sandrew } 482286956Sandrew 483288445Sandrew if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) 484286956Sandrew return (false); 485286956Sandrew 486286956Sandrew l2p = pmap_l1_to_l2(l1p, va); 487286956Sandrew *l2 = l2p; 488286956Sandrew 489288445Sandrew if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { 490286956Sandrew *l3 = NULL; 491286956Sandrew return (true); 492286956Sandrew } 493286956Sandrew 494286956Sandrew *l3 = pmap_l2_to_l3(l2p, va); 495286956Sandrew 496286956Sandrew return (true); 497286956Sandrew} 498286956Sandrew 499281494Sandrewstatic __inline int 500281494Sandrewpmap_is_current(pmap_t pmap) 501281494Sandrew{ 502281494Sandrew 503281494Sandrew return ((pmap == pmap_kernel()) || 504281494Sandrew (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 505281494Sandrew} 506281494Sandrew 507281494Sandrewstatic __inline int 508281494Sandrewpmap_l3_valid(pt_entry_t l3) 509281494Sandrew{ 510281494Sandrew 511281494Sandrew return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 512281494Sandrew} 513281494Sandrew 514305882Sandrew 515305882Sandrew/* Is a level 1 or 2entry a valid block and cacheable */ 516305882SandrewCTASSERT(L1_BLOCK == L2_BLOCK); 517281494Sandrewstatic __inline int 518305882Sandrewpmap_pte_valid_cacheable(pt_entry_t pte) 519305882Sandrew{ 520305882Sandrew 521305882Sandrew return (((pte & ATTR_DESCR_MASK) == L1_BLOCK) && 522305882Sandrew ((pte & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 523305882Sandrew} 524305882Sandrew 525305882Sandrewstatic __inline int 526281494Sandrewpmap_l3_valid_cacheable(pt_entry_t l3) 527281494Sandrew{ 528281494Sandrew 529281494Sandrew return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && 530281494Sandrew ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 531281494Sandrew} 532281494Sandrew 533281494Sandrew#define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 534281494Sandrew 535281494Sandrew/* 536281494Sandrew * Checks if the page is dirty. We currently lack proper tracking of this on 537281494Sandrew * arm64 so for now assume is a page mapped as rw was accessed it is. 538281494Sandrew */ 539281494Sandrewstatic inline int 540281494Sandrewpmap_page_dirty(pt_entry_t pte) 541281494Sandrew{ 542281494Sandrew 543281494Sandrew return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 544281494Sandrew (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 545281494Sandrew} 546281494Sandrew 547281494Sandrewstatic __inline void 548281494Sandrewpmap_resident_count_inc(pmap_t pmap, int count) 549281494Sandrew{ 550281494Sandrew 551281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 552281494Sandrew pmap->pm_stats.resident_count += count; 553281494Sandrew} 554281494Sandrew 555281494Sandrewstatic __inline void 556281494Sandrewpmap_resident_count_dec(pmap_t pmap, int count) 557281494Sandrew{ 558281494Sandrew 559281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 560281494Sandrew KASSERT(pmap->pm_stats.resident_count >= count, 561281494Sandrew ("pmap %p resident count underflow %ld %d", pmap, 562281494Sandrew pmap->pm_stats.resident_count, count)); 563281494Sandrew pmap->pm_stats.resident_count -= count; 564281494Sandrew} 565281494Sandrew 566281494Sandrewstatic pt_entry_t * 567281494Sandrewpmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 568281494Sandrew u_int *l2_slot) 569281494Sandrew{ 570281494Sandrew pt_entry_t *l2; 571281494Sandrew pd_entry_t *l1; 572281494Sandrew 573281494Sandrew l1 = (pd_entry_t *)l1pt; 574281494Sandrew *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 575281494Sandrew 576281494Sandrew /* Check locore has used a table L1 map */ 577281494Sandrew KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 578281494Sandrew ("Invalid bootstrap L1 table")); 579281494Sandrew /* Find the address of the L2 table */ 580281494Sandrew l2 = (pt_entry_t *)init_pt_va; 581281494Sandrew *l2_slot = pmap_l2_index(va); 582281494Sandrew 583281494Sandrew return (l2); 584281494Sandrew} 585281494Sandrew 586281494Sandrewstatic vm_paddr_t 587281494Sandrewpmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 588281494Sandrew{ 589281494Sandrew u_int l1_slot, l2_slot; 590281494Sandrew pt_entry_t *l2; 591281494Sandrew 592281494Sandrew l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 593281494Sandrew 594281494Sandrew return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 595281494Sandrew} 596281494Sandrew 597281494Sandrewstatic void 598297958Sandrewpmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) 599281494Sandrew{ 600281494Sandrew vm_offset_t va; 601281494Sandrew vm_paddr_t pa; 602281494Sandrew u_int l1_slot; 603281494Sandrew 604297958Sandrew pa = dmap_phys_base = min_pa & ~L1_OFFSET; 605281494Sandrew va = DMAP_MIN_ADDRESS; 606297958Sandrew for (; va < DMAP_MAX_ADDRESS && pa < max_pa; 607281494Sandrew pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 608297914Sandrew l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 609281494Sandrew 610297914Sandrew pmap_load_store(&pagetable_dmap[l1_slot], 611319203Sandrew (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_XN | 612285537Sandrew ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); 613281494Sandrew } 614281494Sandrew 615297958Sandrew /* Set the upper limit of the DMAP region */ 616297958Sandrew dmap_phys_max = pa; 617297958Sandrew dmap_max_addr = va; 618297958Sandrew 619297914Sandrew cpu_dcache_wb_range((vm_offset_t)pagetable_dmap, 620297914Sandrew PAGE_SIZE * DMAP_TABLES); 621281494Sandrew cpu_tlb_flushID(); 622281494Sandrew} 623281494Sandrew 624281494Sandrewstatic vm_offset_t 625281494Sandrewpmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 626281494Sandrew{ 627281494Sandrew vm_offset_t l2pt; 628281494Sandrew vm_paddr_t pa; 629281494Sandrew pd_entry_t *l1; 630281494Sandrew u_int l1_slot; 631281494Sandrew 632281494Sandrew KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 633281494Sandrew 634281494Sandrew l1 = (pd_entry_t *)l1pt; 635281494Sandrew l1_slot = pmap_l1_index(va); 636281494Sandrew l2pt = l2_start; 637281494Sandrew 638281494Sandrew for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 639281494Sandrew KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 640281494Sandrew 641281494Sandrew pa = pmap_early_vtophys(l1pt, l2pt); 642281494Sandrew pmap_load_store(&l1[l1_slot], 643281494Sandrew (pa & ~Ln_TABLE_MASK) | L1_TABLE); 644281494Sandrew l2pt += PAGE_SIZE; 645281494Sandrew } 646281494Sandrew 647281494Sandrew /* Clean the L2 page table */ 648281494Sandrew memset((void *)l2_start, 0, l2pt - l2_start); 649281494Sandrew cpu_dcache_wb_range(l2_start, l2pt - l2_start); 650281494Sandrew 651281494Sandrew /* Flush the l1 table to ram */ 652281494Sandrew cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 653281494Sandrew 654281494Sandrew return l2pt; 655281494Sandrew} 656281494Sandrew 657281494Sandrewstatic vm_offset_t 658281494Sandrewpmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 659281494Sandrew{ 660281494Sandrew vm_offset_t l2pt, l3pt; 661281494Sandrew vm_paddr_t pa; 662281494Sandrew pd_entry_t *l2; 663281494Sandrew u_int l2_slot; 664281494Sandrew 665281494Sandrew KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 666281494Sandrew 667281494Sandrew l2 = pmap_l2(kernel_pmap, va); 668298433Spfg l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE); 669281494Sandrew l2pt = (vm_offset_t)l2; 670281494Sandrew l2_slot = pmap_l2_index(va); 671281494Sandrew l3pt = l3_start; 672281494Sandrew 673281494Sandrew for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 674281494Sandrew KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 675281494Sandrew 676281494Sandrew pa = pmap_early_vtophys(l1pt, l3pt); 677281494Sandrew pmap_load_store(&l2[l2_slot], 678281494Sandrew (pa & ~Ln_TABLE_MASK) | L2_TABLE); 679281494Sandrew l3pt += PAGE_SIZE; 680281494Sandrew } 681281494Sandrew 682281494Sandrew /* Clean the L2 page table */ 683281494Sandrew memset((void *)l3_start, 0, l3pt - l3_start); 684281494Sandrew cpu_dcache_wb_range(l3_start, l3pt - l3_start); 685281494Sandrew 686281494Sandrew cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 687281494Sandrew 688281494Sandrew return l3pt; 689281494Sandrew} 690281494Sandrew 691281494Sandrew/* 692281494Sandrew * Bootstrap the system enough to run with virtual memory. 693281494Sandrew */ 694281494Sandrewvoid 695297446Sandrewpmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, 696297446Sandrew vm_size_t kernlen) 697281494Sandrew{ 698281494Sandrew u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 699281494Sandrew uint64_t kern_delta; 700281494Sandrew pt_entry_t *l2; 701281494Sandrew vm_offset_t va, freemempos; 702281494Sandrew vm_offset_t dpcpu, msgbufpv; 703297958Sandrew vm_paddr_t pa, max_pa, min_pa; 704291246Sandrew int i; 705281494Sandrew 706281494Sandrew kern_delta = KERNBASE - kernstart; 707281494Sandrew physmem = 0; 708281494Sandrew 709281494Sandrew printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 710281494Sandrew printf("%lx\n", l1pt); 711281494Sandrew printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 712281494Sandrew 713281494Sandrew /* Set this early so we can use the pagetable walking functions */ 714297446Sandrew kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; 715281494Sandrew PMAP_LOCK_INIT(kernel_pmap); 716281494Sandrew 717291246Sandrew /* Assume the address we were loaded to is a valid physical address */ 718297958Sandrew min_pa = max_pa = KERNBASE - kern_delta; 719291246Sandrew 720291246Sandrew /* 721291246Sandrew * Find the minimum physical address. physmap is sorted, 722291246Sandrew * but may contain empty ranges. 723291246Sandrew */ 724291246Sandrew for (i = 0; i < (physmap_idx * 2); i += 2) { 725291246Sandrew if (physmap[i] == physmap[i + 1]) 726291246Sandrew continue; 727291246Sandrew if (physmap[i] <= min_pa) 728291246Sandrew min_pa = physmap[i]; 729297958Sandrew if (physmap[i + 1] > max_pa) 730297958Sandrew max_pa = physmap[i + 1]; 731291246Sandrew } 732291246Sandrew 733281494Sandrew /* Create a direct map region early so we can use it for pa -> va */ 734297958Sandrew pmap_bootstrap_dmap(l1pt, min_pa, max_pa); 735281494Sandrew 736281494Sandrew va = KERNBASE; 737281494Sandrew pa = KERNBASE - kern_delta; 738281494Sandrew 739281494Sandrew /* 740281494Sandrew * Start to initialise phys_avail by copying from physmap 741281494Sandrew * up to the physical address KERNBASE points at. 742281494Sandrew */ 743281494Sandrew map_slot = avail_slot = 0; 744295157Sandrew for (; map_slot < (physmap_idx * 2) && 745295157Sandrew avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) { 746281494Sandrew if (physmap[map_slot] == physmap[map_slot + 1]) 747281494Sandrew continue; 748281494Sandrew 749281494Sandrew if (physmap[map_slot] <= pa && 750281494Sandrew physmap[map_slot + 1] > pa) 751281494Sandrew break; 752281494Sandrew 753281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 754281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 755281494Sandrew physmem += (phys_avail[avail_slot + 1] - 756281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 757281494Sandrew avail_slot += 2; 758281494Sandrew } 759281494Sandrew 760281494Sandrew /* Add the memory before the kernel */ 761295157Sandrew if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) { 762281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 763281494Sandrew phys_avail[avail_slot + 1] = pa; 764281494Sandrew physmem += (phys_avail[avail_slot + 1] - 765281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 766281494Sandrew avail_slot += 2; 767281494Sandrew } 768281494Sandrew used_map_slot = map_slot; 769281494Sandrew 770281494Sandrew /* 771281494Sandrew * Read the page table to find out what is already mapped. 772281494Sandrew * This assumes we have mapped a block of memory from KERNBASE 773281494Sandrew * using a single L1 entry. 774281494Sandrew */ 775281494Sandrew l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 776281494Sandrew 777281494Sandrew /* Sanity check the index, KERNBASE should be the first VA */ 778281494Sandrew KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 779281494Sandrew 780281494Sandrew /* Find how many pages we have mapped */ 781281494Sandrew for (; l2_slot < Ln_ENTRIES; l2_slot++) { 782281494Sandrew if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 783281494Sandrew break; 784281494Sandrew 785281494Sandrew /* Check locore used L2 blocks */ 786281494Sandrew KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 787281494Sandrew ("Invalid bootstrap L2 table")); 788281494Sandrew KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 789281494Sandrew ("Incorrect PA in L2 table")); 790281494Sandrew 791281494Sandrew va += L2_SIZE; 792281494Sandrew pa += L2_SIZE; 793281494Sandrew } 794281494Sandrew 795281494Sandrew va = roundup2(va, L1_SIZE); 796281494Sandrew 797281494Sandrew freemempos = KERNBASE + kernlen; 798281494Sandrew freemempos = roundup2(freemempos, PAGE_SIZE); 799281494Sandrew /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 800281494Sandrew freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 801281494Sandrew /* And the l3 tables for the early devmap */ 802281494Sandrew freemempos = pmap_bootstrap_l3(l1pt, 803281494Sandrew VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 804281494Sandrew 805281494Sandrew cpu_tlb_flushID(); 806281494Sandrew 807281494Sandrew#define alloc_pages(var, np) \ 808281494Sandrew (var) = freemempos; \ 809281494Sandrew freemempos += (np * PAGE_SIZE); \ 810281494Sandrew memset((char *)(var), 0, ((np) * PAGE_SIZE)); 811281494Sandrew 812281494Sandrew /* Allocate dynamic per-cpu area. */ 813281494Sandrew alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 814281494Sandrew dpcpu_init((void *)dpcpu, 0); 815281494Sandrew 816281494Sandrew /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 817281494Sandrew alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 818281494Sandrew msgbufp = (void *)msgbufpv; 819281494Sandrew 820281494Sandrew virtual_avail = roundup2(freemempos, L1_SIZE); 821281494Sandrew virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 822281494Sandrew kernel_vm_end = virtual_avail; 823305531Sandrew 824281494Sandrew pa = pmap_early_vtophys(l1pt, freemempos); 825281494Sandrew 826281494Sandrew /* Finish initialising physmap */ 827281494Sandrew map_slot = used_map_slot; 828281494Sandrew for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 829281494Sandrew map_slot < (physmap_idx * 2); map_slot += 2) { 830281494Sandrew if (physmap[map_slot] == physmap[map_slot + 1]) 831281494Sandrew continue; 832281494Sandrew 833281494Sandrew /* Have we used the current range? */ 834281494Sandrew if (physmap[map_slot + 1] <= pa) 835281494Sandrew continue; 836281494Sandrew 837281494Sandrew /* Do we need to split the entry? */ 838281494Sandrew if (physmap[map_slot] < pa) { 839281494Sandrew phys_avail[avail_slot] = pa; 840281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 841281494Sandrew } else { 842281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 843281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 844281494Sandrew } 845281494Sandrew physmem += (phys_avail[avail_slot + 1] - 846281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 847281494Sandrew 848281494Sandrew avail_slot += 2; 849281494Sandrew } 850281494Sandrew phys_avail[avail_slot] = 0; 851281494Sandrew phys_avail[avail_slot + 1] = 0; 852281494Sandrew 853281494Sandrew /* 854281494Sandrew * Maxmem isn't the "maximum memory", it's one larger than the 855281494Sandrew * highest page of the physical address space. It should be 856281494Sandrew * called something like "Maxphyspage". 857281494Sandrew */ 858281494Sandrew Maxmem = atop(phys_avail[avail_slot - 1]); 859281494Sandrew 860281494Sandrew cpu_tlb_flushID(); 861281494Sandrew} 862281494Sandrew 863281494Sandrew/* 864281494Sandrew * Initialize a vm_page's machine-dependent fields. 865281494Sandrew */ 866281494Sandrewvoid 867281494Sandrewpmap_page_init(vm_page_t m) 868281494Sandrew{ 869281494Sandrew 870281494Sandrew TAILQ_INIT(&m->md.pv_list); 871281494Sandrew m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 872281494Sandrew} 873281494Sandrew 874281494Sandrew/* 875281494Sandrew * Initialize the pmap module. 876281494Sandrew * Called by vm_init, to initialize any structures that the pmap 877281494Sandrew * system needs to map virtual memory. 878281494Sandrew */ 879281494Sandrewvoid 880281494Sandrewpmap_init(void) 881281494Sandrew{ 882305882Sandrew vm_size_t s; 883305882Sandrew int i, pv_npg; 884281494Sandrew 885281494Sandrew /* 886305882Sandrew * Are large page mappings enabled? 887305882Sandrew */ 888305882Sandrew TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); 889305882Sandrew 890305882Sandrew /* 891281494Sandrew * Initialize the pv chunk list mutex. 892281494Sandrew */ 893281494Sandrew mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 894281494Sandrew 895281494Sandrew /* 896281494Sandrew * Initialize the pool of pv list locks. 897281494Sandrew */ 898281494Sandrew for (i = 0; i < NPV_LIST_LOCKS; i++) 899281494Sandrew rw_init(&pv_list_locks[i], "pmap pv list"); 900305882Sandrew 901305882Sandrew /* 902305882Sandrew * Calculate the size of the pv head table for superpages. 903305882Sandrew */ 904305882Sandrew pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); 905305882Sandrew 906305882Sandrew /* 907305882Sandrew * Allocate memory for the pv head table for superpages. 908305882Sandrew */ 909305882Sandrew s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 910305882Sandrew s = round_page(s); 911305882Sandrew pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, 912305882Sandrew M_WAITOK | M_ZERO); 913305882Sandrew for (i = 0; i < pv_npg; i++) 914305882Sandrew TAILQ_INIT(&pv_table[i].pv_list); 915305882Sandrew TAILQ_INIT(&pv_dummy.pv_list); 916281494Sandrew} 917281494Sandrew 918305882Sandrewstatic SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, 919305882Sandrew "2MB page mapping counters"); 920305882Sandrew 921305882Sandrewstatic u_long pmap_l2_demotions; 922305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, 923305882Sandrew &pmap_l2_demotions, 0, "2MB page demotions"); 924305882Sandrew 925305882Sandrewstatic u_long pmap_l2_p_failures; 926305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, 927305882Sandrew &pmap_l2_p_failures, 0, "2MB page promotion failures"); 928305882Sandrew 929305882Sandrewstatic u_long pmap_l2_promotions; 930305882SandrewSYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, 931305882Sandrew &pmap_l2_promotions, 0, "2MB page promotions"); 932305882Sandrew 933281494Sandrew/* 934305540Sandrew * Invalidate a single TLB entry. 935281494Sandrew */ 936281494SandrewPMAP_INLINE void 937281494Sandrewpmap_invalidate_page(pmap_t pmap, vm_offset_t va) 938281494Sandrew{ 939281494Sandrew 940281494Sandrew sched_pin(); 941281494Sandrew __asm __volatile( 942305540Sandrew "dsb ishst \n" 943281494Sandrew "tlbi vaae1is, %0 \n" 944305540Sandrew "dsb ish \n" 945281494Sandrew "isb \n" 946281494Sandrew : : "r"(va >> PAGE_SHIFT)); 947281494Sandrew sched_unpin(); 948281494Sandrew} 949281494Sandrew 950281494SandrewPMAP_INLINE void 951281494Sandrewpmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 952281494Sandrew{ 953281494Sandrew vm_offset_t addr; 954281494Sandrew 955281494Sandrew sched_pin(); 956305540Sandrew dsb(ishst); 957296828Swma for (addr = sva; addr < eva; addr += PAGE_SIZE) { 958281494Sandrew __asm __volatile( 959296828Swma "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); 960281494Sandrew } 961281494Sandrew __asm __volatile( 962305540Sandrew "dsb ish \n" 963281494Sandrew "isb \n"); 964281494Sandrew sched_unpin(); 965281494Sandrew} 966281494Sandrew 967281494SandrewPMAP_INLINE void 968281494Sandrewpmap_invalidate_all(pmap_t pmap) 969281494Sandrew{ 970281494Sandrew 971281494Sandrew sched_pin(); 972281494Sandrew __asm __volatile( 973305540Sandrew "dsb ishst \n" 974281494Sandrew "tlbi vmalle1is \n" 975305540Sandrew "dsb ish \n" 976281494Sandrew "isb \n"); 977281494Sandrew sched_unpin(); 978281494Sandrew} 979281494Sandrew 980281494Sandrew/* 981281494Sandrew * Routine: pmap_extract 982281494Sandrew * Function: 983281494Sandrew * Extract the physical page address associated 984281494Sandrew * with the given map/virtual_address pair. 985281494Sandrew */ 986305531Sandrewvm_paddr_t 987281494Sandrewpmap_extract(pmap_t pmap, vm_offset_t va) 988281494Sandrew{ 989297446Sandrew pt_entry_t *pte, tpte; 990281494Sandrew vm_paddr_t pa; 991297446Sandrew int lvl; 992281494Sandrew 993281494Sandrew pa = 0; 994281494Sandrew PMAP_LOCK(pmap); 995281494Sandrew /* 996297446Sandrew * Find the block or page map for this virtual address. pmap_pte 997297446Sandrew * will return either a valid block/page entry, or NULL. 998281494Sandrew */ 999297446Sandrew pte = pmap_pte(pmap, va, &lvl); 1000297446Sandrew if (pte != NULL) { 1001297446Sandrew tpte = pmap_load(pte); 1002297446Sandrew pa = tpte & ~ATTR_MASK; 1003297446Sandrew switch(lvl) { 1004297446Sandrew case 1: 1005297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1006297446Sandrew ("pmap_extract: Invalid L1 pte found: %lx", 1007297446Sandrew tpte & ATTR_DESCR_MASK)); 1008297446Sandrew pa |= (va & L1_OFFSET); 1009297446Sandrew break; 1010297446Sandrew case 2: 1011297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1012297446Sandrew ("pmap_extract: Invalid L2 pte found: %lx", 1013297446Sandrew tpte & ATTR_DESCR_MASK)); 1014297446Sandrew pa |= (va & L2_OFFSET); 1015297446Sandrew break; 1016297446Sandrew case 3: 1017297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1018297446Sandrew ("pmap_extract: Invalid L3 pte found: %lx", 1019297446Sandrew tpte & ATTR_DESCR_MASK)); 1020297446Sandrew pa |= (va & L3_OFFSET); 1021297446Sandrew break; 1022297446Sandrew } 1023281494Sandrew } 1024281494Sandrew PMAP_UNLOCK(pmap); 1025281494Sandrew return (pa); 1026281494Sandrew} 1027281494Sandrew 1028281494Sandrew/* 1029281494Sandrew * Routine: pmap_extract_and_hold 1030281494Sandrew * Function: 1031281494Sandrew * Atomically extract and hold the physical page 1032281494Sandrew * with the given pmap and virtual address pair 1033281494Sandrew * if that mapping permits the given protection. 1034281494Sandrew */ 1035281494Sandrewvm_page_t 1036281494Sandrewpmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1037281494Sandrew{ 1038297446Sandrew pt_entry_t *pte, tpte; 1039305882Sandrew vm_offset_t off; 1040281494Sandrew vm_paddr_t pa; 1041281494Sandrew vm_page_t m; 1042297446Sandrew int lvl; 1043281494Sandrew 1044281494Sandrew pa = 0; 1045281494Sandrew m = NULL; 1046281494Sandrew PMAP_LOCK(pmap); 1047281494Sandrewretry: 1048297446Sandrew pte = pmap_pte(pmap, va, &lvl); 1049297446Sandrew if (pte != NULL) { 1050297446Sandrew tpte = pmap_load(pte); 1051297446Sandrew 1052297446Sandrew KASSERT(lvl > 0 && lvl <= 3, 1053297446Sandrew ("pmap_extract_and_hold: Invalid level %d", lvl)); 1054297446Sandrew CTASSERT(L1_BLOCK == L2_BLOCK); 1055297446Sandrew KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || 1056297446Sandrew (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), 1057297446Sandrew ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, 1058297446Sandrew tpte & ATTR_DESCR_MASK)); 1059297446Sandrew if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 1060281494Sandrew ((prot & VM_PROT_WRITE) == 0)) { 1061305882Sandrew switch(lvl) { 1062305882Sandrew case 1: 1063305882Sandrew off = va & L1_OFFSET; 1064305882Sandrew break; 1065305882Sandrew case 2: 1066305882Sandrew off = va & L2_OFFSET; 1067305882Sandrew break; 1068305882Sandrew case 3: 1069305882Sandrew default: 1070305882Sandrew off = 0; 1071305882Sandrew } 1072305882Sandrew if (vm_page_pa_tryrelock(pmap, 1073305882Sandrew (tpte & ~ATTR_MASK) | off, &pa)) 1074281494Sandrew goto retry; 1075305882Sandrew m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); 1076281494Sandrew vm_page_hold(m); 1077281494Sandrew } 1078281494Sandrew } 1079281494Sandrew PA_UNLOCK_COND(pa); 1080281494Sandrew PMAP_UNLOCK(pmap); 1081281494Sandrew return (m); 1082281494Sandrew} 1083281494Sandrew 1084281494Sandrewvm_paddr_t 1085281494Sandrewpmap_kextract(vm_offset_t va) 1086281494Sandrew{ 1087297446Sandrew pt_entry_t *pte, tpte; 1088281494Sandrew vm_paddr_t pa; 1089297446Sandrew int lvl; 1090281494Sandrew 1091281494Sandrew if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 1092281494Sandrew pa = DMAP_TO_PHYS(va); 1093281494Sandrew } else { 1094297446Sandrew pa = 0; 1095297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1096297446Sandrew if (pte != NULL) { 1097297446Sandrew tpte = pmap_load(pte); 1098297446Sandrew pa = tpte & ~ATTR_MASK; 1099297446Sandrew switch(lvl) { 1100297446Sandrew case 1: 1101297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1102297446Sandrew ("pmap_kextract: Invalid L1 pte found: %lx", 1103297446Sandrew tpte & ATTR_DESCR_MASK)); 1104297446Sandrew pa |= (va & L1_OFFSET); 1105297446Sandrew break; 1106297446Sandrew case 2: 1107297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1108297446Sandrew ("pmap_kextract: Invalid L2 pte found: %lx", 1109297446Sandrew tpte & ATTR_DESCR_MASK)); 1110297446Sandrew pa |= (va & L2_OFFSET); 1111297446Sandrew break; 1112297446Sandrew case 3: 1113297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1114297446Sandrew ("pmap_kextract: Invalid L3 pte found: %lx", 1115297446Sandrew tpte & ATTR_DESCR_MASK)); 1116297446Sandrew pa |= (va & L3_OFFSET); 1117297446Sandrew break; 1118297446Sandrew } 1119297446Sandrew } 1120281494Sandrew } 1121281494Sandrew return (pa); 1122281494Sandrew} 1123281494Sandrew 1124281494Sandrew/*************************************************** 1125281494Sandrew * Low level mapping routines..... 1126281494Sandrew ***************************************************/ 1127281494Sandrew 1128305542Sandrewstatic void 1129305542Sandrewpmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) 1130281494Sandrew{ 1131297446Sandrew pd_entry_t *pde; 1132319203Sandrew pt_entry_t *pte, attr; 1133285212Sandrew vm_offset_t va; 1134297446Sandrew int lvl; 1135281494Sandrew 1136281494Sandrew KASSERT((pa & L3_OFFSET) == 0, 1137305542Sandrew ("pmap_kenter: Invalid physical address")); 1138285212Sandrew KASSERT((sva & L3_OFFSET) == 0, 1139305542Sandrew ("pmap_kenter: Invalid virtual address")); 1140281494Sandrew KASSERT((size & PAGE_MASK) == 0, 1141305542Sandrew ("pmap_kenter: Mapping is not page-sized")); 1142281494Sandrew 1143319203Sandrew attr = ATTR_DEFAULT | ATTR_IDX(mode) | L3_PAGE; 1144319203Sandrew if (mode == DEVICE_MEMORY) 1145319203Sandrew attr |= ATTR_XN; 1146319203Sandrew 1147285212Sandrew va = sva; 1148281494Sandrew while (size != 0) { 1149297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 1150297446Sandrew KASSERT(pde != NULL, 1151305542Sandrew ("pmap_kenter: Invalid page entry, va: 0x%lx", va)); 1152305542Sandrew KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl)); 1153297446Sandrew 1154297446Sandrew pte = pmap_l2_to_l3(pde, va); 1155319203Sandrew pmap_load_store(pte, (pa & ~L3_OFFSET) | attr); 1156297446Sandrew PTE_SYNC(pte); 1157281494Sandrew 1158281494Sandrew va += PAGE_SIZE; 1159281494Sandrew pa += PAGE_SIZE; 1160281494Sandrew size -= PAGE_SIZE; 1161281494Sandrew } 1162285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1163281494Sandrew} 1164281494Sandrew 1165305542Sandrewvoid 1166305542Sandrewpmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 1167305542Sandrew{ 1168305542Sandrew 1169305542Sandrew pmap_kenter(sva, size, pa, DEVICE_MEMORY); 1170305542Sandrew} 1171305542Sandrew 1172281494Sandrew/* 1173281494Sandrew * Remove a page from the kernel pagetables. 1174281494Sandrew */ 1175281494SandrewPMAP_INLINE void 1176281494Sandrewpmap_kremove(vm_offset_t va) 1177281494Sandrew{ 1178297446Sandrew pt_entry_t *pte; 1179297446Sandrew int lvl; 1180281494Sandrew 1181297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1182297446Sandrew KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); 1183297446Sandrew KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); 1184281494Sandrew 1185297446Sandrew if (pmap_l3_valid_cacheable(pmap_load(pte))) 1186281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 1187297446Sandrew pmap_load_clear(pte); 1188297446Sandrew PTE_SYNC(pte); 1189285212Sandrew pmap_invalidate_page(kernel_pmap, va); 1190281494Sandrew} 1191281494Sandrew 1192281494Sandrewvoid 1193285212Sandrewpmap_kremove_device(vm_offset_t sva, vm_size_t size) 1194281494Sandrew{ 1195297446Sandrew pt_entry_t *pte; 1196285212Sandrew vm_offset_t va; 1197297446Sandrew int lvl; 1198281494Sandrew 1199285212Sandrew KASSERT((sva & L3_OFFSET) == 0, 1200281494Sandrew ("pmap_kremove_device: Invalid virtual address")); 1201281494Sandrew KASSERT((size & PAGE_MASK) == 0, 1202281494Sandrew ("pmap_kremove_device: Mapping is not page-sized")); 1203281494Sandrew 1204285212Sandrew va = sva; 1205281494Sandrew while (size != 0) { 1206297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1207297446Sandrew KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); 1208297446Sandrew KASSERT(lvl == 3, 1209297446Sandrew ("Invalid device pagetable level: %d != 3", lvl)); 1210297446Sandrew pmap_load_clear(pte); 1211297446Sandrew PTE_SYNC(pte); 1212281494Sandrew 1213281494Sandrew va += PAGE_SIZE; 1214281494Sandrew size -= PAGE_SIZE; 1215281494Sandrew } 1216285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1217281494Sandrew} 1218281494Sandrew 1219281494Sandrew/* 1220281494Sandrew * Used to map a range of physical addresses into kernel 1221281494Sandrew * virtual address space. 1222281494Sandrew * 1223281494Sandrew * The value passed in '*virt' is a suggested virtual address for 1224281494Sandrew * the mapping. Architectures which can support a direct-mapped 1225281494Sandrew * physical to virtual region can return the appropriate address 1226281494Sandrew * within that region, leaving '*virt' unchanged. Other 1227281494Sandrew * architectures should map the pages starting at '*virt' and 1228281494Sandrew * update '*virt' with the first usable address after the mapped 1229281494Sandrew * region. 1230281494Sandrew */ 1231281494Sandrewvm_offset_t 1232281494Sandrewpmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1233281494Sandrew{ 1234281494Sandrew return PHYS_TO_DMAP(start); 1235281494Sandrew} 1236281494Sandrew 1237281494Sandrew 1238281494Sandrew/* 1239281494Sandrew * Add a list of wired pages to the kva 1240281494Sandrew * this routine is only used for temporary 1241281494Sandrew * kernel mappings that do not need to have 1242281494Sandrew * page modification or references recorded. 1243281494Sandrew * Note that old mappings are simply written 1244281494Sandrew * over. The page *must* be wired. 1245281494Sandrew * Note: SMP coherent. Uses a ranged shootdown IPI. 1246281494Sandrew */ 1247281494Sandrewvoid 1248281494Sandrewpmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1249281494Sandrew{ 1250297446Sandrew pd_entry_t *pde; 1251297446Sandrew pt_entry_t *pte, pa; 1252281494Sandrew vm_offset_t va; 1253281494Sandrew vm_page_t m; 1254297446Sandrew int i, lvl; 1255281494Sandrew 1256281494Sandrew va = sva; 1257281494Sandrew for (i = 0; i < count; i++) { 1258297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 1259297446Sandrew KASSERT(pde != NULL, 1260297446Sandrew ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); 1261297446Sandrew KASSERT(lvl == 2, 1262297446Sandrew ("pmap_qenter: Invalid level %d", lvl)); 1263297446Sandrew 1264281494Sandrew m = ma[i]; 1265285537Sandrew pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | 1266285537Sandrew ATTR_IDX(m->md.pv_memattr) | L3_PAGE; 1267319203Sandrew if (m->md.pv_memattr == DEVICE_MEMORY) 1268319203Sandrew pa |= ATTR_XN; 1269297446Sandrew pte = pmap_l2_to_l3(pde, va); 1270297446Sandrew pmap_load_store(pte, pa); 1271297446Sandrew PTE_SYNC(pte); 1272281494Sandrew 1273281494Sandrew va += L3_SIZE; 1274281494Sandrew } 1275285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1276281494Sandrew} 1277281494Sandrew 1278281494Sandrew/* 1279281494Sandrew * This routine tears out page mappings from the 1280281494Sandrew * kernel -- it is meant only for temporary mappings. 1281281494Sandrew */ 1282281494Sandrewvoid 1283281494Sandrewpmap_qremove(vm_offset_t sva, int count) 1284281494Sandrew{ 1285297446Sandrew pt_entry_t *pte; 1286281494Sandrew vm_offset_t va; 1287297446Sandrew int lvl; 1288281494Sandrew 1289285212Sandrew KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1290285212Sandrew 1291281494Sandrew va = sva; 1292281494Sandrew while (count-- > 0) { 1293297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1294297446Sandrew KASSERT(lvl == 3, 1295297446Sandrew ("Invalid device pagetable level: %d != 3", lvl)); 1296297446Sandrew if (pte != NULL) { 1297297446Sandrew if (pmap_l3_valid_cacheable(pmap_load(pte))) 1298297446Sandrew cpu_dcache_wb_range(va, L3_SIZE); 1299297446Sandrew pmap_load_clear(pte); 1300297446Sandrew PTE_SYNC(pte); 1301297446Sandrew } 1302285212Sandrew 1303281494Sandrew va += PAGE_SIZE; 1304281494Sandrew } 1305281494Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1306281494Sandrew} 1307281494Sandrew 1308281494Sandrew/*************************************************** 1309281494Sandrew * Page table page management routines..... 1310281494Sandrew ***************************************************/ 1311281494Sandrewstatic __inline void 1312281494Sandrewpmap_free_zero_pages(struct spglist *free) 1313281494Sandrew{ 1314281494Sandrew vm_page_t m; 1315281494Sandrew 1316281494Sandrew while ((m = SLIST_FIRST(free)) != NULL) { 1317281494Sandrew SLIST_REMOVE_HEAD(free, plinks.s.ss); 1318281494Sandrew /* Preserve the page's PG_ZERO setting. */ 1319281494Sandrew vm_page_free_toq(m); 1320281494Sandrew } 1321281494Sandrew} 1322281494Sandrew 1323281494Sandrew/* 1324281494Sandrew * Schedule the specified unused page table page to be freed. Specifically, 1325281494Sandrew * add the page to the specified list of pages that will be released to the 1326281494Sandrew * physical memory manager after the TLB has been updated. 1327281494Sandrew */ 1328281494Sandrewstatic __inline void 1329281494Sandrewpmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1330281494Sandrew boolean_t set_PG_ZERO) 1331281494Sandrew{ 1332281494Sandrew 1333281494Sandrew if (set_PG_ZERO) 1334281494Sandrew m->flags |= PG_ZERO; 1335281494Sandrew else 1336281494Sandrew m->flags &= ~PG_ZERO; 1337281494Sandrew SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1338281494Sandrew} 1339305531Sandrew 1340281494Sandrew/* 1341281494Sandrew * Decrements a page table page's wire count, which is used to record the 1342281494Sandrew * number of valid page table entries within the page. If the wire count 1343281494Sandrew * drops to zero, then the page table page is unmapped. Returns TRUE if the 1344281494Sandrew * page table page was unmapped and FALSE otherwise. 1345281494Sandrew */ 1346281494Sandrewstatic inline boolean_t 1347281494Sandrewpmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1348281494Sandrew{ 1349281494Sandrew 1350281494Sandrew --m->wire_count; 1351281494Sandrew if (m->wire_count == 0) { 1352281494Sandrew _pmap_unwire_l3(pmap, va, m, free); 1353281494Sandrew return (TRUE); 1354281494Sandrew } else 1355281494Sandrew return (FALSE); 1356281494Sandrew} 1357281494Sandrew 1358281494Sandrewstatic void 1359281494Sandrew_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1360281494Sandrew{ 1361281494Sandrew 1362281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1363281494Sandrew /* 1364281494Sandrew * unmap the page table page 1365281494Sandrew */ 1366297446Sandrew if (m->pindex >= (NUL2E + NUL1E)) { 1367297446Sandrew /* l1 page */ 1368297446Sandrew pd_entry_t *l0; 1369297446Sandrew 1370297446Sandrew l0 = pmap_l0(pmap, va); 1371297446Sandrew pmap_load_clear(l0); 1372297446Sandrew PTE_SYNC(l0); 1373297446Sandrew } else if (m->pindex >= NUL2E) { 1374297446Sandrew /* l2 page */ 1375281494Sandrew pd_entry_t *l1; 1376297446Sandrew 1377281494Sandrew l1 = pmap_l1(pmap, va); 1378281494Sandrew pmap_load_clear(l1); 1379281494Sandrew PTE_SYNC(l1); 1380281494Sandrew } else { 1381297446Sandrew /* l3 page */ 1382281494Sandrew pd_entry_t *l2; 1383297446Sandrew 1384281494Sandrew l2 = pmap_l2(pmap, va); 1385281494Sandrew pmap_load_clear(l2); 1386281494Sandrew PTE_SYNC(l2); 1387281494Sandrew } 1388281494Sandrew pmap_resident_count_dec(pmap, 1); 1389297446Sandrew if (m->pindex < NUL2E) { 1390297446Sandrew /* We just released an l3, unhold the matching l2 */ 1391297446Sandrew pd_entry_t *l1, tl1; 1392297446Sandrew vm_page_t l2pg; 1393281494Sandrew 1394297446Sandrew l1 = pmap_l1(pmap, va); 1395297446Sandrew tl1 = pmap_load(l1); 1396297446Sandrew l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1397297446Sandrew pmap_unwire_l3(pmap, va, l2pg, free); 1398297446Sandrew } else if (m->pindex < (NUL2E + NUL1E)) { 1399297446Sandrew /* We just released an l2, unhold the matching l1 */ 1400297446Sandrew pd_entry_t *l0, tl0; 1401297446Sandrew vm_page_t l1pg; 1402297446Sandrew 1403297446Sandrew l0 = pmap_l0(pmap, va); 1404297446Sandrew tl0 = pmap_load(l0); 1405297446Sandrew l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1406297446Sandrew pmap_unwire_l3(pmap, va, l1pg, free); 1407281494Sandrew } 1408285212Sandrew pmap_invalidate_page(pmap, va); 1409281494Sandrew 1410281494Sandrew /* 1411281494Sandrew * This is a release store so that the ordinary store unmapping 1412281494Sandrew * the page table page is globally performed before TLB shoot- 1413281494Sandrew * down is begun. 1414281494Sandrew */ 1415281494Sandrew atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1416281494Sandrew 1417305531Sandrew /* 1418281494Sandrew * Put page on a list so that it is released after 1419281494Sandrew * *ALL* TLB shootdown is done 1420281494Sandrew */ 1421281494Sandrew pmap_add_delayed_free_list(m, free, TRUE); 1422281494Sandrew} 1423281494Sandrew 1424281494Sandrew/* 1425281494Sandrew * After removing an l3 entry, this routine is used to 1426281494Sandrew * conditionally free the page, and manage the hold/wire counts. 1427281494Sandrew */ 1428281494Sandrewstatic int 1429281494Sandrewpmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1430281494Sandrew struct spglist *free) 1431281494Sandrew{ 1432281494Sandrew vm_page_t mpte; 1433281494Sandrew 1434281494Sandrew if (va >= VM_MAXUSER_ADDRESS) 1435281494Sandrew return (0); 1436281494Sandrew KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1437281494Sandrew mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1438281494Sandrew return (pmap_unwire_l3(pmap, va, mpte, free)); 1439281494Sandrew} 1440281494Sandrew 1441281494Sandrewvoid 1442281494Sandrewpmap_pinit0(pmap_t pmap) 1443281494Sandrew{ 1444281494Sandrew 1445281494Sandrew PMAP_LOCK_INIT(pmap); 1446281494Sandrew bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1447297446Sandrew pmap->pm_l0 = kernel_pmap->pm_l0; 1448305882Sandrew pmap->pm_root.rt_root = 0; 1449281494Sandrew} 1450281494Sandrew 1451281494Sandrewint 1452281494Sandrewpmap_pinit(pmap_t pmap) 1453281494Sandrew{ 1454297446Sandrew vm_paddr_t l0phys; 1455297446Sandrew vm_page_t l0pt; 1456281494Sandrew 1457281494Sandrew /* 1458297446Sandrew * allocate the l0 page 1459281494Sandrew */ 1460297446Sandrew while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1461281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1462281494Sandrew VM_WAIT; 1463281494Sandrew 1464297446Sandrew l0phys = VM_PAGE_TO_PHYS(l0pt); 1465297446Sandrew pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); 1466281494Sandrew 1467297446Sandrew if ((l0pt->flags & PG_ZERO) == 0) 1468297446Sandrew pagezero(pmap->pm_l0); 1469281494Sandrew 1470305882Sandrew pmap->pm_root.rt_root = 0; 1471281494Sandrew bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1472281494Sandrew 1473281494Sandrew return (1); 1474281494Sandrew} 1475281494Sandrew 1476281494Sandrew/* 1477281494Sandrew * This routine is called if the desired page table page does not exist. 1478281494Sandrew * 1479281494Sandrew * If page table page allocation fails, this routine may sleep before 1480281494Sandrew * returning NULL. It sleeps only if a lock pointer was given. 1481281494Sandrew * 1482281494Sandrew * Note: If a page allocation fails at page table level two or three, 1483281494Sandrew * one or two pages may be held during the wait, only to be released 1484281494Sandrew * afterwards. This conservative approach is easily argued to avoid 1485281494Sandrew * race conditions. 1486281494Sandrew */ 1487281494Sandrewstatic vm_page_t 1488281494Sandrew_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1489281494Sandrew{ 1490297446Sandrew vm_page_t m, l1pg, l2pg; 1491281494Sandrew 1492281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1493281494Sandrew 1494281494Sandrew /* 1495281494Sandrew * Allocate a page table page. 1496281494Sandrew */ 1497281494Sandrew if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1498281494Sandrew VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1499281494Sandrew if (lockp != NULL) { 1500281494Sandrew RELEASE_PV_LIST_LOCK(lockp); 1501281494Sandrew PMAP_UNLOCK(pmap); 1502281494Sandrew VM_WAIT; 1503281494Sandrew PMAP_LOCK(pmap); 1504281494Sandrew } 1505281494Sandrew 1506281494Sandrew /* 1507281494Sandrew * Indicate the need to retry. While waiting, the page table 1508281494Sandrew * page may have been allocated. 1509281494Sandrew */ 1510281494Sandrew return (NULL); 1511281494Sandrew } 1512281494Sandrew if ((m->flags & PG_ZERO) == 0) 1513281494Sandrew pmap_zero_page(m); 1514281494Sandrew 1515281494Sandrew /* 1516281494Sandrew * Map the pagetable page into the process address space, if 1517281494Sandrew * it isn't already there. 1518281494Sandrew */ 1519281494Sandrew 1520297446Sandrew if (ptepindex >= (NUL2E + NUL1E)) { 1521297446Sandrew pd_entry_t *l0; 1522297446Sandrew vm_pindex_t l0index; 1523281494Sandrew 1524297446Sandrew l0index = ptepindex - (NUL2E + NUL1E); 1525297446Sandrew l0 = &pmap->pm_l0[l0index]; 1526297446Sandrew pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); 1527297446Sandrew PTE_SYNC(l0); 1528297446Sandrew } else if (ptepindex >= NUL2E) { 1529297446Sandrew vm_pindex_t l0index, l1index; 1530297446Sandrew pd_entry_t *l0, *l1; 1531297446Sandrew pd_entry_t tl0; 1532297446Sandrew 1533297446Sandrew l1index = ptepindex - NUL2E; 1534297446Sandrew l0index = l1index >> L0_ENTRIES_SHIFT; 1535297446Sandrew 1536297446Sandrew l0 = &pmap->pm_l0[l0index]; 1537297446Sandrew tl0 = pmap_load(l0); 1538297446Sandrew if (tl0 == 0) { 1539297446Sandrew /* recurse for allocating page dir */ 1540297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, 1541297446Sandrew lockp) == NULL) { 1542297446Sandrew --m->wire_count; 1543297446Sandrew /* XXX: release mem barrier? */ 1544297446Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1545297446Sandrew vm_page_free_zero(m); 1546297446Sandrew return (NULL); 1547297446Sandrew } 1548297446Sandrew } else { 1549297446Sandrew l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1550297446Sandrew l1pg->wire_count++; 1551297446Sandrew } 1552297446Sandrew 1553297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 1554297446Sandrew l1 = &l1[ptepindex & Ln_ADDR_MASK]; 1555281494Sandrew pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1556281494Sandrew PTE_SYNC(l1); 1557281494Sandrew } else { 1558297446Sandrew vm_pindex_t l0index, l1index; 1559297446Sandrew pd_entry_t *l0, *l1, *l2; 1560297446Sandrew pd_entry_t tl0, tl1; 1561281494Sandrew 1562297446Sandrew l1index = ptepindex >> Ln_ENTRIES_SHIFT; 1563297446Sandrew l0index = l1index >> L0_ENTRIES_SHIFT; 1564297446Sandrew 1565297446Sandrew l0 = &pmap->pm_l0[l0index]; 1566297446Sandrew tl0 = pmap_load(l0); 1567297446Sandrew if (tl0 == 0) { 1568281494Sandrew /* recurse for allocating page dir */ 1569297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1570281494Sandrew lockp) == NULL) { 1571281494Sandrew --m->wire_count; 1572281494Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1573281494Sandrew vm_page_free_zero(m); 1574281494Sandrew return (NULL); 1575281494Sandrew } 1576297446Sandrew tl0 = pmap_load(l0); 1577297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1578297446Sandrew l1 = &l1[l1index & Ln_ADDR_MASK]; 1579281494Sandrew } else { 1580297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1581297446Sandrew l1 = &l1[l1index & Ln_ADDR_MASK]; 1582297446Sandrew tl1 = pmap_load(l1); 1583297446Sandrew if (tl1 == 0) { 1584297446Sandrew /* recurse for allocating page dir */ 1585297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1586297446Sandrew lockp) == NULL) { 1587297446Sandrew --m->wire_count; 1588297446Sandrew /* XXX: release mem barrier? */ 1589297446Sandrew atomic_subtract_int( 1590297446Sandrew &vm_cnt.v_wire_count, 1); 1591297446Sandrew vm_page_free_zero(m); 1592297446Sandrew return (NULL); 1593297446Sandrew } 1594297446Sandrew } else { 1595297446Sandrew l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1596297446Sandrew l2pg->wire_count++; 1597297446Sandrew } 1598281494Sandrew } 1599281494Sandrew 1600288445Sandrew l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 1601281494Sandrew l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1602285537Sandrew pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); 1603281494Sandrew PTE_SYNC(l2); 1604281494Sandrew } 1605281494Sandrew 1606281494Sandrew pmap_resident_count_inc(pmap, 1); 1607281494Sandrew 1608281494Sandrew return (m); 1609281494Sandrew} 1610281494Sandrew 1611281494Sandrewstatic vm_page_t 1612281494Sandrewpmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1613281494Sandrew{ 1614281494Sandrew vm_pindex_t ptepindex; 1615297446Sandrew pd_entry_t *pde, tpde; 1616305882Sandrew#ifdef INVARIANTS 1617305882Sandrew pt_entry_t *pte; 1618305882Sandrew#endif 1619281494Sandrew vm_page_t m; 1620297446Sandrew int lvl; 1621281494Sandrew 1622281494Sandrew /* 1623281494Sandrew * Calculate pagetable page index 1624281494Sandrew */ 1625281494Sandrew ptepindex = pmap_l2_pindex(va); 1626281494Sandrewretry: 1627281494Sandrew /* 1628281494Sandrew * Get the page directory entry 1629281494Sandrew */ 1630297446Sandrew pde = pmap_pde(pmap, va, &lvl); 1631281494Sandrew 1632281494Sandrew /* 1633297446Sandrew * If the page table page is mapped, we just increment the hold count, 1634297446Sandrew * and activate it. If we get a level 2 pde it will point to a level 3 1635297446Sandrew * table. 1636281494Sandrew */ 1637305882Sandrew switch (lvl) { 1638305882Sandrew case -1: 1639305882Sandrew break; 1640305882Sandrew case 0: 1641305882Sandrew#ifdef INVARIANTS 1642305882Sandrew pte = pmap_l0_to_l1(pde, va); 1643305882Sandrew KASSERT(pmap_load(pte) == 0, 1644305882Sandrew ("pmap_alloc_l3: TODO: l0 superpages")); 1645305882Sandrew#endif 1646305882Sandrew break; 1647305882Sandrew case 1: 1648305882Sandrew#ifdef INVARIANTS 1649305882Sandrew pte = pmap_l1_to_l2(pde, va); 1650305882Sandrew KASSERT(pmap_load(pte) == 0, 1651305882Sandrew ("pmap_alloc_l3: TODO: l1 superpages")); 1652305882Sandrew#endif 1653305882Sandrew break; 1654305882Sandrew case 2: 1655297446Sandrew tpde = pmap_load(pde); 1656297446Sandrew if (tpde != 0) { 1657297446Sandrew m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); 1658297446Sandrew m->wire_count++; 1659297446Sandrew return (m); 1660297446Sandrew } 1661305882Sandrew break; 1662305882Sandrew default: 1663305882Sandrew panic("pmap_alloc_l3: Invalid level %d", lvl); 1664281494Sandrew } 1665297446Sandrew 1666297446Sandrew /* 1667297446Sandrew * Here if the pte page isn't mapped, or if it has been deallocated. 1668297446Sandrew */ 1669297446Sandrew m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1670297446Sandrew if (m == NULL && lockp != NULL) 1671297446Sandrew goto retry; 1672297446Sandrew 1673281494Sandrew return (m); 1674281494Sandrew} 1675281494Sandrew 1676281494Sandrew 1677281494Sandrew/*************************************************** 1678281494Sandrew * Pmap allocation/deallocation routines. 1679281494Sandrew ***************************************************/ 1680281494Sandrew 1681281494Sandrew/* 1682281494Sandrew * Release any resources held by the given physical map. 1683281494Sandrew * Called when a pmap initialized by pmap_pinit is being released. 1684281494Sandrew * Should only be called if the map contains no valid mappings. 1685281494Sandrew */ 1686281494Sandrewvoid 1687281494Sandrewpmap_release(pmap_t pmap) 1688281494Sandrew{ 1689281494Sandrew vm_page_t m; 1690281494Sandrew 1691281494Sandrew KASSERT(pmap->pm_stats.resident_count == 0, 1692281494Sandrew ("pmap_release: pmap resident count %ld != 0", 1693281494Sandrew pmap->pm_stats.resident_count)); 1694305882Sandrew KASSERT(vm_radix_is_empty(&pmap->pm_root), 1695305882Sandrew ("pmap_release: pmap has reserved page table page(s)")); 1696281494Sandrew 1697297446Sandrew m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); 1698281494Sandrew 1699281494Sandrew m->wire_count--; 1700281494Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1701281494Sandrew vm_page_free_zero(m); 1702281494Sandrew} 1703281494Sandrew 1704281494Sandrewstatic int 1705281494Sandrewkvm_size(SYSCTL_HANDLER_ARGS) 1706281494Sandrew{ 1707281494Sandrew unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1708281494Sandrew 1709281494Sandrew return sysctl_handle_long(oidp, &ksize, 0, req); 1710281494Sandrew} 1711305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1712281494Sandrew 0, 0, kvm_size, "LU", "Size of KVM"); 1713281494Sandrew 1714281494Sandrewstatic int 1715281494Sandrewkvm_free(SYSCTL_HANDLER_ARGS) 1716281494Sandrew{ 1717281494Sandrew unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1718281494Sandrew 1719281494Sandrew return sysctl_handle_long(oidp, &kfree, 0, req); 1720281494Sandrew} 1721305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1722281494Sandrew 0, 0, kvm_free, "LU", "Amount of KVM free"); 1723281494Sandrew 1724281494Sandrew/* 1725281494Sandrew * grow the number of kernel page table entries, if needed 1726281494Sandrew */ 1727281494Sandrewvoid 1728281494Sandrewpmap_growkernel(vm_offset_t addr) 1729281494Sandrew{ 1730281494Sandrew vm_paddr_t paddr; 1731281494Sandrew vm_page_t nkpg; 1732297446Sandrew pd_entry_t *l0, *l1, *l2; 1733281494Sandrew 1734281494Sandrew mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1735281494Sandrew 1736281494Sandrew addr = roundup2(addr, L2_SIZE); 1737338484Skib if (addr - 1 >= vm_map_max(kernel_map)) 1738338484Skib addr = vm_map_max(kernel_map); 1739281494Sandrew while (kernel_vm_end < addr) { 1740297446Sandrew l0 = pmap_l0(kernel_pmap, kernel_vm_end); 1741297446Sandrew KASSERT(pmap_load(l0) != 0, 1742297446Sandrew ("pmap_growkernel: No level 0 kernel entry")); 1743297446Sandrew 1744297446Sandrew l1 = pmap_l0_to_l1(l0, kernel_vm_end); 1745285045Sandrew if (pmap_load(l1) == 0) { 1746281494Sandrew /* We need a new PDP entry */ 1747281494Sandrew nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1748281494Sandrew VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1749281494Sandrew VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1750281494Sandrew if (nkpg == NULL) 1751281494Sandrew panic("pmap_growkernel: no memory to grow kernel"); 1752281494Sandrew if ((nkpg->flags & PG_ZERO) == 0) 1753281494Sandrew pmap_zero_page(nkpg); 1754281494Sandrew paddr = VM_PAGE_TO_PHYS(nkpg); 1755281494Sandrew pmap_load_store(l1, paddr | L1_TABLE); 1756281494Sandrew PTE_SYNC(l1); 1757281494Sandrew continue; /* try again */ 1758281494Sandrew } 1759281494Sandrew l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1760285045Sandrew if ((pmap_load(l2) & ATTR_AF) != 0) { 1761281494Sandrew kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1762338484Skib if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { 1763338484Skib kernel_vm_end = vm_map_max(kernel_map); 1764305531Sandrew break; 1765281494Sandrew } 1766281494Sandrew continue; 1767281494Sandrew } 1768281494Sandrew 1769281494Sandrew nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1770281494Sandrew VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1771281494Sandrew VM_ALLOC_ZERO); 1772281494Sandrew if (nkpg == NULL) 1773281494Sandrew panic("pmap_growkernel: no memory to grow kernel"); 1774281494Sandrew if ((nkpg->flags & PG_ZERO) == 0) 1775281494Sandrew pmap_zero_page(nkpg); 1776281494Sandrew paddr = VM_PAGE_TO_PHYS(nkpg); 1777281494Sandrew pmap_load_store(l2, paddr | L2_TABLE); 1778281494Sandrew PTE_SYNC(l2); 1779285212Sandrew pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1780281494Sandrew 1781281494Sandrew kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1782338484Skib if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { 1783338484Skib kernel_vm_end = vm_map_max(kernel_map); 1784305531Sandrew break; 1785281494Sandrew } 1786281494Sandrew } 1787281494Sandrew} 1788281494Sandrew 1789281494Sandrew 1790281494Sandrew/*************************************************** 1791281494Sandrew * page management routines. 1792281494Sandrew ***************************************************/ 1793281494Sandrew 1794281494SandrewCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1795281494SandrewCTASSERT(_NPCM == 3); 1796281494SandrewCTASSERT(_NPCPV == 168); 1797281494Sandrew 1798281494Sandrewstatic __inline struct pv_chunk * 1799281494Sandrewpv_to_chunk(pv_entry_t pv) 1800281494Sandrew{ 1801281494Sandrew 1802281494Sandrew return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1803281494Sandrew} 1804281494Sandrew 1805281494Sandrew#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1806281494Sandrew 1807281494Sandrew#define PC_FREE0 0xfffffffffffffffful 1808281494Sandrew#define PC_FREE1 0xfffffffffffffffful 1809281494Sandrew#define PC_FREE2 0x000000fffffffffful 1810281494Sandrew 1811281494Sandrewstatic const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1812281494Sandrew 1813281494Sandrew#if 0 1814281494Sandrew#ifdef PV_STATS 1815281494Sandrewstatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1816281494Sandrew 1817281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1818281494Sandrew "Current number of pv entry chunks"); 1819281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1820281494Sandrew "Current number of pv entry chunks allocated"); 1821281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1822281494Sandrew "Current number of pv entry chunks frees"); 1823281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1824281494Sandrew "Number of times tried to get a chunk page but failed."); 1825281494Sandrew 1826281494Sandrewstatic long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1827281494Sandrewstatic int pv_entry_spare; 1828281494Sandrew 1829281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1830281494Sandrew "Current number of pv entry frees"); 1831281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1832281494Sandrew "Current number of pv entry allocs"); 1833281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1834281494Sandrew "Current number of pv entries"); 1835281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1836281494Sandrew "Current number of spare pv entries"); 1837281494Sandrew#endif 1838281494Sandrew#endif /* 0 */ 1839281494Sandrew 1840281494Sandrew/* 1841281494Sandrew * We are in a serious low memory condition. Resort to 1842281494Sandrew * drastic measures to free some pages so we can allocate 1843281494Sandrew * another pv entry chunk. 1844281494Sandrew * 1845281494Sandrew * Returns NULL if PV entries were reclaimed from the specified pmap. 1846281494Sandrew * 1847281494Sandrew * We do not, however, unmap 2mpages because subsequent accesses will 1848281494Sandrew * allocate per-page pv entries until repromotion occurs, thereby 1849281494Sandrew * exacerbating the shortage of free pv entries. 1850281494Sandrew */ 1851281494Sandrewstatic vm_page_t 1852281494Sandrewreclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1853281494Sandrew{ 1854336711Smarkj struct pv_chunk *pc, *pc_marker, *pc_marker_end; 1855336711Smarkj struct pv_chunk_header pc_marker_b, pc_marker_end_b; 1856319210Sandrew struct md_page *pvh; 1857319210Sandrew pd_entry_t *pde; 1858336711Smarkj pmap_t next_pmap, pmap; 1859319210Sandrew pt_entry_t *pte, tpte; 1860319210Sandrew pv_entry_t pv; 1861319210Sandrew vm_offset_t va; 1862319210Sandrew vm_page_t m, m_pc; 1863319210Sandrew struct spglist free; 1864319210Sandrew uint64_t inuse; 1865319210Sandrew int bit, field, freed, lvl; 1866336711Smarkj static int active_reclaims = 0; 1867281494Sandrew 1868319210Sandrew PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1869319210Sandrew KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); 1870336711Smarkj 1871319210Sandrew pmap = NULL; 1872319210Sandrew m_pc = NULL; 1873319210Sandrew SLIST_INIT(&free); 1874336711Smarkj bzero(&pc_marker_b, sizeof(pc_marker_b)); 1875336711Smarkj bzero(&pc_marker_end_b, sizeof(pc_marker_end_b)); 1876336711Smarkj pc_marker = (struct pv_chunk *)&pc_marker_b; 1877336711Smarkj pc_marker_end = (struct pv_chunk *)&pc_marker_end_b; 1878336711Smarkj 1879319210Sandrew mtx_lock(&pv_chunks_mutex); 1880336711Smarkj active_reclaims++; 1881336711Smarkj TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru); 1882336711Smarkj TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru); 1883336711Smarkj while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end && 1884336711Smarkj SLIST_EMPTY(&free)) { 1885336711Smarkj next_pmap = pc->pc_pmap; 1886336711Smarkj if (next_pmap == NULL) { 1887336711Smarkj /* 1888336711Smarkj * The next chunk is a marker. However, it is 1889336711Smarkj * not our marker, so active_reclaims must be 1890336711Smarkj * > 1. Consequently, the next_chunk code 1891336711Smarkj * will not rotate the pv_chunks list. 1892336711Smarkj */ 1893336711Smarkj goto next_chunk; 1894336711Smarkj } 1895319210Sandrew mtx_unlock(&pv_chunks_mutex); 1896336711Smarkj 1897336711Smarkj /* 1898336711Smarkj * A pv_chunk can only be removed from the pc_lru list 1899336711Smarkj * when both pv_chunks_mutex is owned and the 1900336711Smarkj * corresponding pmap is locked. 1901336711Smarkj */ 1902336711Smarkj if (pmap != next_pmap) { 1903319210Sandrew if (pmap != NULL && pmap != locked_pmap) 1904319210Sandrew PMAP_UNLOCK(pmap); 1905336711Smarkj pmap = next_pmap; 1906319210Sandrew /* Avoid deadlock and lock recursion. */ 1907319210Sandrew if (pmap > locked_pmap) { 1908319210Sandrew RELEASE_PV_LIST_LOCK(lockp); 1909319210Sandrew PMAP_LOCK(pmap); 1910319210Sandrew mtx_lock(&pv_chunks_mutex); 1911319210Sandrew continue; 1912336711Smarkj } else if (pmap != locked_pmap) { 1913336711Smarkj if (PMAP_TRYLOCK(pmap)) { 1914336711Smarkj mtx_lock(&pv_chunks_mutex); 1915336711Smarkj continue; 1916336711Smarkj } else { 1917336711Smarkj pmap = NULL; /* pmap is not locked */ 1918336711Smarkj mtx_lock(&pv_chunks_mutex); 1919336711Smarkj pc = TAILQ_NEXT(pc_marker, pc_lru); 1920336711Smarkj if (pc == NULL || 1921336711Smarkj pc->pc_pmap != next_pmap) 1922336711Smarkj continue; 1923336711Smarkj goto next_chunk; 1924336711Smarkj } 1925319210Sandrew } 1926319210Sandrew } 1927319210Sandrew 1928319210Sandrew /* 1929319210Sandrew * Destroy every non-wired, 4 KB page mapping in the chunk. 1930319210Sandrew */ 1931319210Sandrew freed = 0; 1932319210Sandrew for (field = 0; field < _NPCM; field++) { 1933319210Sandrew for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 1934319210Sandrew inuse != 0; inuse &= ~(1UL << bit)) { 1935319210Sandrew bit = ffsl(inuse) - 1; 1936319210Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 1937319210Sandrew va = pv->pv_va; 1938319210Sandrew pde = pmap_pde(pmap, va, &lvl); 1939319210Sandrew if (lvl != 2) 1940319210Sandrew continue; 1941319210Sandrew pte = pmap_l2_to_l3(pde, va); 1942319210Sandrew tpte = pmap_load(pte); 1943319210Sandrew if ((tpte & ATTR_SW_WIRED) != 0) 1944319210Sandrew continue; 1945319210Sandrew tpte = pmap_load_clear(pte); 1946319210Sandrew PTE_SYNC(pte); 1947319210Sandrew pmap_invalidate_page(pmap, va); 1948319210Sandrew m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); 1949319210Sandrew if (pmap_page_dirty(tpte)) 1950319210Sandrew vm_page_dirty(m); 1951319210Sandrew if ((tpte & ATTR_AF) != 0) 1952319210Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 1953319210Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1954319210Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1955319210Sandrew m->md.pv_gen++; 1956319210Sandrew if (TAILQ_EMPTY(&m->md.pv_list) && 1957319210Sandrew (m->flags & PG_FICTITIOUS) == 0) { 1958319210Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 1959319210Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) { 1960319210Sandrew vm_page_aflag_clear(m, 1961319210Sandrew PGA_WRITEABLE); 1962319210Sandrew } 1963319210Sandrew } 1964319210Sandrew pc->pc_map[field] |= 1UL << bit; 1965319210Sandrew pmap_unuse_l3(pmap, va, pmap_load(pde), &free); 1966319210Sandrew freed++; 1967319210Sandrew } 1968319210Sandrew } 1969319210Sandrew if (freed == 0) { 1970319210Sandrew mtx_lock(&pv_chunks_mutex); 1971336711Smarkj goto next_chunk; 1972319210Sandrew } 1973319210Sandrew /* Every freed mapping is for a 4 KB page. */ 1974319210Sandrew pmap_resident_count_dec(pmap, freed); 1975319210Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 1976319210Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 1977319210Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 1978319210Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1979319210Sandrew if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && 1980319210Sandrew pc->pc_map[2] == PC_FREE2) { 1981319210Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1982319210Sandrew PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1983319210Sandrew PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1984319210Sandrew /* Entire chunk is free; return it. */ 1985319210Sandrew m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1986319210Sandrew dump_drop_page(m_pc->phys_addr); 1987319210Sandrew mtx_lock(&pv_chunks_mutex); 1988336711Smarkj TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1989319210Sandrew break; 1990319210Sandrew } 1991319210Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1992319210Sandrew mtx_lock(&pv_chunks_mutex); 1993319210Sandrew /* One freed pv entry in locked_pmap is sufficient. */ 1994319210Sandrew if (pmap == locked_pmap) 1995319210Sandrew break; 1996336711Smarkj 1997336711Smarkjnext_chunk: 1998336711Smarkj TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); 1999336711Smarkj TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru); 2000336711Smarkj if (active_reclaims == 1 && pmap != NULL) { 2001336711Smarkj /* 2002336711Smarkj * Rotate the pv chunks list so that we do not 2003336711Smarkj * scan the same pv chunks that could not be 2004336711Smarkj * freed (because they contained a wired 2005336711Smarkj * and/or superpage mapping) on every 2006336711Smarkj * invocation of reclaim_pv_chunk(). 2007336711Smarkj */ 2008336711Smarkj while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) { 2009336711Smarkj MPASS(pc->pc_pmap != NULL); 2010336711Smarkj TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2011336711Smarkj TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2012336711Smarkj } 2013336711Smarkj } 2014319210Sandrew } 2015336711Smarkj TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); 2016336711Smarkj TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru); 2017336711Smarkj active_reclaims--; 2018319210Sandrew mtx_unlock(&pv_chunks_mutex); 2019319210Sandrew if (pmap != NULL && pmap != locked_pmap) 2020319210Sandrew PMAP_UNLOCK(pmap); 2021319210Sandrew if (m_pc == NULL && !SLIST_EMPTY(&free)) { 2022319210Sandrew m_pc = SLIST_FIRST(&free); 2023319210Sandrew SLIST_REMOVE_HEAD(&free, plinks.s.ss); 2024319210Sandrew /* Recycle a freed page table page. */ 2025319210Sandrew m_pc->wire_count = 1; 2026319210Sandrew atomic_add_int(&vm_cnt.v_wire_count, 1); 2027319210Sandrew } 2028319210Sandrew pmap_free_zero_pages(&free); 2029319210Sandrew return (m_pc); 2030281494Sandrew} 2031281494Sandrew 2032281494Sandrew/* 2033281494Sandrew * free the pv_entry back to the free list 2034281494Sandrew */ 2035281494Sandrewstatic void 2036281494Sandrewfree_pv_entry(pmap_t pmap, pv_entry_t pv) 2037281494Sandrew{ 2038281494Sandrew struct pv_chunk *pc; 2039281494Sandrew int idx, field, bit; 2040281494Sandrew 2041281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2042281494Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 2043281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 2044281494Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 2045281494Sandrew pc = pv_to_chunk(pv); 2046281494Sandrew idx = pv - &pc->pc_pventry[0]; 2047281494Sandrew field = idx / 64; 2048281494Sandrew bit = idx % 64; 2049281494Sandrew pc->pc_map[field] |= 1ul << bit; 2050281494Sandrew if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 2051281494Sandrew pc->pc_map[2] != PC_FREE2) { 2052281494Sandrew /* 98% of the time, pc is already at the head of the list. */ 2053281494Sandrew if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 2054281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2055281494Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2056281494Sandrew } 2057281494Sandrew return; 2058281494Sandrew } 2059281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2060281494Sandrew free_pv_chunk(pc); 2061281494Sandrew} 2062281494Sandrew 2063281494Sandrewstatic void 2064281494Sandrewfree_pv_chunk(struct pv_chunk *pc) 2065281494Sandrew{ 2066281494Sandrew vm_page_t m; 2067281494Sandrew 2068281494Sandrew mtx_lock(&pv_chunks_mutex); 2069281494Sandrew TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2070281494Sandrew mtx_unlock(&pv_chunks_mutex); 2071281494Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 2072281494Sandrew PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 2073281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 2074281494Sandrew /* entire chunk is free, return it */ 2075281494Sandrew m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 2076281494Sandrew dump_drop_page(m->phys_addr); 2077288256Salc vm_page_unwire(m, PQ_NONE); 2078281494Sandrew vm_page_free(m); 2079281494Sandrew} 2080281494Sandrew 2081281494Sandrew/* 2082281494Sandrew * Returns a new PV entry, allocating a new PV chunk from the system when 2083281494Sandrew * needed. If this PV chunk allocation fails and a PV list lock pointer was 2084281494Sandrew * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 2085281494Sandrew * returned. 2086281494Sandrew * 2087281494Sandrew * The given PV list lock may be released. 2088281494Sandrew */ 2089281494Sandrewstatic pv_entry_t 2090281494Sandrewget_pv_entry(pmap_t pmap, struct rwlock **lockp) 2091281494Sandrew{ 2092281494Sandrew int bit, field; 2093281494Sandrew pv_entry_t pv; 2094281494Sandrew struct pv_chunk *pc; 2095281494Sandrew vm_page_t m; 2096281494Sandrew 2097281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2098281494Sandrew PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 2099281494Sandrewretry: 2100281494Sandrew pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2101281494Sandrew if (pc != NULL) { 2102281494Sandrew for (field = 0; field < _NPCM; field++) { 2103281494Sandrew if (pc->pc_map[field]) { 2104281494Sandrew bit = ffsl(pc->pc_map[field]) - 1; 2105281494Sandrew break; 2106281494Sandrew } 2107281494Sandrew } 2108281494Sandrew if (field < _NPCM) { 2109281494Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 2110281494Sandrew pc->pc_map[field] &= ~(1ul << bit); 2111281494Sandrew /* If this was the last item, move it to tail */ 2112281494Sandrew if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 2113281494Sandrew pc->pc_map[2] == 0) { 2114281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2115281494Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 2116281494Sandrew pc_list); 2117281494Sandrew } 2118281494Sandrew PV_STAT(atomic_add_long(&pv_entry_count, 1)); 2119281494Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 2120281494Sandrew return (pv); 2121281494Sandrew } 2122281494Sandrew } 2123281494Sandrew /* No free items, allocate another chunk */ 2124281494Sandrew m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 2125281494Sandrew VM_ALLOC_WIRED); 2126281494Sandrew if (m == NULL) { 2127281494Sandrew if (lockp == NULL) { 2128281494Sandrew PV_STAT(pc_chunk_tryfail++); 2129281494Sandrew return (NULL); 2130281494Sandrew } 2131281494Sandrew m = reclaim_pv_chunk(pmap, lockp); 2132281494Sandrew if (m == NULL) 2133281494Sandrew goto retry; 2134281494Sandrew } 2135281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 2136281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 2137281494Sandrew dump_add_page(m->phys_addr); 2138281494Sandrew pc = (void *)PHYS_TO_DMAP(m->phys_addr); 2139281494Sandrew pc->pc_pmap = pmap; 2140281494Sandrew pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 2141281494Sandrew pc->pc_map[1] = PC_FREE1; 2142281494Sandrew pc->pc_map[2] = PC_FREE2; 2143281494Sandrew mtx_lock(&pv_chunks_mutex); 2144281494Sandrew TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2145281494Sandrew mtx_unlock(&pv_chunks_mutex); 2146281494Sandrew pv = &pc->pc_pventry[0]; 2147281494Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2148281494Sandrew PV_STAT(atomic_add_long(&pv_entry_count, 1)); 2149281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 2150281494Sandrew return (pv); 2151281494Sandrew} 2152281494Sandrew 2153281494Sandrew/* 2154305882Sandrew * Ensure that the number of spare PV entries in the specified pmap meets or 2155305882Sandrew * exceeds the given count, "needed". 2156305882Sandrew * 2157305882Sandrew * The given PV list lock may be released. 2158305882Sandrew */ 2159305882Sandrewstatic void 2160305882Sandrewreserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) 2161305882Sandrew{ 2162305882Sandrew struct pch new_tail; 2163305882Sandrew struct pv_chunk *pc; 2164336071Smarkj vm_page_t m; 2165305882Sandrew int avail, free; 2166336071Smarkj bool reclaimed; 2167305882Sandrew 2168305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2169305882Sandrew KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); 2170305882Sandrew 2171305882Sandrew /* 2172305882Sandrew * Newly allocated PV chunks must be stored in a private list until 2173305882Sandrew * the required number of PV chunks have been allocated. Otherwise, 2174305882Sandrew * reclaim_pv_chunk() could recycle one of these chunks. In 2175305882Sandrew * contrast, these chunks must be added to the pmap upon allocation. 2176305882Sandrew */ 2177305882Sandrew TAILQ_INIT(&new_tail); 2178305882Sandrewretry: 2179305882Sandrew avail = 0; 2180305882Sandrew TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { 2181305882Sandrew bit_count((bitstr_t *)pc->pc_map, 0, 2182305882Sandrew sizeof(pc->pc_map) * NBBY, &free); 2183305882Sandrew if (free == 0) 2184305882Sandrew break; 2185305882Sandrew avail += free; 2186305882Sandrew if (avail >= needed) 2187305882Sandrew break; 2188305882Sandrew } 2189336071Smarkj for (reclaimed = false; avail < needed; avail += _NPCPV) { 2190305882Sandrew m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 2191305882Sandrew VM_ALLOC_WIRED); 2192305882Sandrew if (m == NULL) { 2193305882Sandrew m = reclaim_pv_chunk(pmap, lockp); 2194305882Sandrew if (m == NULL) 2195305882Sandrew goto retry; 2196336071Smarkj reclaimed = true; 2197305882Sandrew } 2198305882Sandrew PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 2199305882Sandrew PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 2200305882Sandrew dump_add_page(m->phys_addr); 2201305882Sandrew pc = (void *)PHYS_TO_DMAP(m->phys_addr); 2202305882Sandrew pc->pc_pmap = pmap; 2203305882Sandrew pc->pc_map[0] = PC_FREE0; 2204305882Sandrew pc->pc_map[1] = PC_FREE1; 2205305882Sandrew pc->pc_map[2] = PC_FREE2; 2206305882Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2207305882Sandrew TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 2208305882Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); 2209336071Smarkj 2210336071Smarkj /* 2211336071Smarkj * The reclaim might have freed a chunk from the current pmap. 2212336071Smarkj * If that chunk contained available entries, we need to 2213336071Smarkj * re-count the number of available entries. 2214336071Smarkj */ 2215336071Smarkj if (reclaimed) 2216336071Smarkj goto retry; 2217305882Sandrew } 2218305882Sandrew if (!TAILQ_EMPTY(&new_tail)) { 2219305882Sandrew mtx_lock(&pv_chunks_mutex); 2220305882Sandrew TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); 2221305882Sandrew mtx_unlock(&pv_chunks_mutex); 2222305882Sandrew } 2223305882Sandrew} 2224305882Sandrew 2225305882Sandrew/* 2226281494Sandrew * First find and then remove the pv entry for the specified pmap and virtual 2227281494Sandrew * address from the specified pv list. Returns the pv entry if found and NULL 2228281494Sandrew * otherwise. This operation can be performed on pv lists for either 4KB or 2229281494Sandrew * 2MB page mappings. 2230281494Sandrew */ 2231281494Sandrewstatic __inline pv_entry_t 2232281494Sandrewpmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2233281494Sandrew{ 2234281494Sandrew pv_entry_t pv; 2235281494Sandrew 2236281494Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 2237281494Sandrew if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2238281494Sandrew TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 2239281494Sandrew pvh->pv_gen++; 2240281494Sandrew break; 2241281494Sandrew } 2242281494Sandrew } 2243281494Sandrew return (pv); 2244281494Sandrew} 2245281494Sandrew 2246281494Sandrew/* 2247305882Sandrew * After demotion from a 2MB page mapping to 512 4KB page mappings, 2248305882Sandrew * destroy the pv entry for the 2MB page mapping and reinstantiate the pv 2249305882Sandrew * entries for each of the 4KB page mappings. 2250305882Sandrew */ 2251305882Sandrewstatic void 2252305882Sandrewpmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 2253305882Sandrew struct rwlock **lockp) 2254305882Sandrew{ 2255305882Sandrew struct md_page *pvh; 2256305882Sandrew struct pv_chunk *pc; 2257305882Sandrew pv_entry_t pv; 2258305882Sandrew vm_offset_t va_last; 2259305882Sandrew vm_page_t m; 2260305882Sandrew int bit, field; 2261305882Sandrew 2262305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2263305882Sandrew KASSERT((pa & L2_OFFSET) == 0, 2264305882Sandrew ("pmap_pv_demote_l2: pa is not 2mpage aligned")); 2265305882Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2266305882Sandrew 2267305882Sandrew /* 2268305882Sandrew * Transfer the 2mpage's pv entry for this mapping to the first 2269305882Sandrew * page's pv list. Once this transfer begins, the pv list lock 2270305882Sandrew * must not be released until the last pv entry is reinstantiated. 2271305882Sandrew */ 2272305882Sandrew pvh = pa_to_pvh(pa); 2273305882Sandrew va = va & ~L2_OFFSET; 2274305882Sandrew pv = pmap_pvh_remove(pvh, pmap, va); 2275305882Sandrew KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); 2276305882Sandrew m = PHYS_TO_VM_PAGE(pa); 2277305882Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2278305882Sandrew m->md.pv_gen++; 2279305882Sandrew /* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */ 2280305882Sandrew PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1)); 2281305882Sandrew va_last = va + L2_SIZE - PAGE_SIZE; 2282305882Sandrew for (;;) { 2283305882Sandrew pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2284305882Sandrew KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || 2285305882Sandrew pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); 2286305882Sandrew for (field = 0; field < _NPCM; field++) { 2287305882Sandrew while (pc->pc_map[field]) { 2288305882Sandrew bit = ffsl(pc->pc_map[field]) - 1; 2289305882Sandrew pc->pc_map[field] &= ~(1ul << bit); 2290305882Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 2291305882Sandrew va += PAGE_SIZE; 2292305882Sandrew pv->pv_va = va; 2293305882Sandrew m++; 2294305882Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2295305882Sandrew ("pmap_pv_demote_l2: page %p is not managed", m)); 2296305882Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2297305882Sandrew m->md.pv_gen++; 2298305882Sandrew if (va == va_last) 2299305882Sandrew goto out; 2300305882Sandrew } 2301305882Sandrew } 2302305882Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2303305882Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2304305882Sandrew } 2305305882Sandrewout: 2306305882Sandrew if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { 2307305882Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2308305882Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2309305882Sandrew } 2310305882Sandrew PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1)); 2311305882Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1)); 2312305882Sandrew} 2313305882Sandrew 2314305882Sandrew/* 2315281494Sandrew * First find and then destroy the pv entry for the specified pmap and virtual 2316281494Sandrew * address. This operation can be performed on pv lists for either 4KB or 2MB 2317281494Sandrew * page mappings. 2318281494Sandrew */ 2319281494Sandrewstatic void 2320281494Sandrewpmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2321281494Sandrew{ 2322281494Sandrew pv_entry_t pv; 2323281494Sandrew 2324281494Sandrew pv = pmap_pvh_remove(pvh, pmap, va); 2325281494Sandrew KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2326281494Sandrew free_pv_entry(pmap, pv); 2327281494Sandrew} 2328281494Sandrew 2329281494Sandrew/* 2330281494Sandrew * Conditionally create the PV entry for a 4KB page mapping if the required 2331281494Sandrew * memory can be allocated without resorting to reclamation. 2332281494Sandrew */ 2333281494Sandrewstatic boolean_t 2334281494Sandrewpmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 2335281494Sandrew struct rwlock **lockp) 2336281494Sandrew{ 2337281494Sandrew pv_entry_t pv; 2338281494Sandrew 2339281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2340281494Sandrew /* Pass NULL instead of the lock pointer to disable reclamation. */ 2341281494Sandrew if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 2342281494Sandrew pv->pv_va = va; 2343281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 2344281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2345281494Sandrew m->md.pv_gen++; 2346281494Sandrew return (TRUE); 2347281494Sandrew } else 2348281494Sandrew return (FALSE); 2349281494Sandrew} 2350281494Sandrew 2351281494Sandrew/* 2352281494Sandrew * pmap_remove_l3: do the things to unmap a page in a process 2353281494Sandrew */ 2354281494Sandrewstatic int 2355305531Sandrewpmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 2356281494Sandrew pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 2357281494Sandrew{ 2358305882Sandrew struct md_page *pvh; 2359281494Sandrew pt_entry_t old_l3; 2360281494Sandrew vm_page_t m; 2361281494Sandrew 2362281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2363281494Sandrew if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 2364281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 2365281494Sandrew old_l3 = pmap_load_clear(l3); 2366281494Sandrew PTE_SYNC(l3); 2367285212Sandrew pmap_invalidate_page(pmap, va); 2368281494Sandrew if (old_l3 & ATTR_SW_WIRED) 2369281494Sandrew pmap->pm_stats.wired_count -= 1; 2370281494Sandrew pmap_resident_count_dec(pmap, 1); 2371281494Sandrew if (old_l3 & ATTR_SW_MANAGED) { 2372281494Sandrew m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 2373281494Sandrew if (pmap_page_dirty(old_l3)) 2374281494Sandrew vm_page_dirty(m); 2375281494Sandrew if (old_l3 & ATTR_AF) 2376281494Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 2377281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 2378281494Sandrew pmap_pvh_free(&m->md, pmap, va); 2379305882Sandrew if (TAILQ_EMPTY(&m->md.pv_list) && 2380305882Sandrew (m->flags & PG_FICTITIOUS) == 0) { 2381305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2382305882Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) 2383305882Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 2384305882Sandrew } 2385281494Sandrew } 2386281494Sandrew return (pmap_unuse_l3(pmap, va, l2e, free)); 2387281494Sandrew} 2388281494Sandrew 2389281494Sandrew/* 2390281494Sandrew * Remove the given range of addresses from the specified map. 2391281494Sandrew * 2392281494Sandrew * It is assumed that the start and end are properly 2393281494Sandrew * rounded to the page size. 2394281494Sandrew */ 2395281494Sandrewvoid 2396281494Sandrewpmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2397281494Sandrew{ 2398281494Sandrew struct rwlock *lock; 2399281494Sandrew vm_offset_t va, va_next; 2400297446Sandrew pd_entry_t *l0, *l1, *l2; 2401281494Sandrew pt_entry_t l3_paddr, *l3; 2402281494Sandrew struct spglist free; 2403281494Sandrew 2404281494Sandrew /* 2405281494Sandrew * Perform an unsynchronized read. This is, however, safe. 2406281494Sandrew */ 2407281494Sandrew if (pmap->pm_stats.resident_count == 0) 2408281494Sandrew return; 2409281494Sandrew 2410281494Sandrew SLIST_INIT(&free); 2411281494Sandrew 2412281494Sandrew PMAP_LOCK(pmap); 2413281494Sandrew 2414281494Sandrew lock = NULL; 2415281494Sandrew for (; sva < eva; sva = va_next) { 2416281494Sandrew 2417281494Sandrew if (pmap->pm_stats.resident_count == 0) 2418281494Sandrew break; 2419281494Sandrew 2420297446Sandrew l0 = pmap_l0(pmap, sva); 2421297446Sandrew if (pmap_load(l0) == 0) { 2422297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2423297446Sandrew if (va_next < sva) 2424297446Sandrew va_next = eva; 2425297446Sandrew continue; 2426297446Sandrew } 2427297446Sandrew 2428297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 2429285045Sandrew if (pmap_load(l1) == 0) { 2430281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2431281494Sandrew if (va_next < sva) 2432281494Sandrew va_next = eva; 2433281494Sandrew continue; 2434281494Sandrew } 2435281494Sandrew 2436281494Sandrew /* 2437281494Sandrew * Calculate index for next page table. 2438281494Sandrew */ 2439281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2440281494Sandrew if (va_next < sva) 2441281494Sandrew va_next = eva; 2442281494Sandrew 2443281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 2444281494Sandrew if (l2 == NULL) 2445281494Sandrew continue; 2446281494Sandrew 2447288445Sandrew l3_paddr = pmap_load(l2); 2448281494Sandrew 2449305882Sandrew if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { 2450305882Sandrew /* TODO: Add pmap_remove_l2 */ 2451305882Sandrew if (pmap_demote_l2_locked(pmap, l2, sva & ~L2_OFFSET, 2452305882Sandrew &lock) == NULL) 2453305882Sandrew continue; 2454305882Sandrew l3_paddr = pmap_load(l2); 2455305882Sandrew } 2456305882Sandrew 2457281494Sandrew /* 2458281494Sandrew * Weed out invalid mappings. 2459281494Sandrew */ 2460281494Sandrew if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 2461281494Sandrew continue; 2462281494Sandrew 2463281494Sandrew /* 2464281494Sandrew * Limit our scan to either the end of the va represented 2465281494Sandrew * by the current page table page, or to the end of the 2466281494Sandrew * range being removed. 2467281494Sandrew */ 2468281494Sandrew if (va_next > eva) 2469281494Sandrew va_next = eva; 2470281494Sandrew 2471281494Sandrew va = va_next; 2472281494Sandrew for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2473281494Sandrew sva += L3_SIZE) { 2474281494Sandrew if (l3 == NULL) 2475281494Sandrew panic("l3 == NULL"); 2476285045Sandrew if (pmap_load(l3) == 0) { 2477281494Sandrew if (va != va_next) { 2478281494Sandrew pmap_invalidate_range(pmap, va, sva); 2479281494Sandrew va = va_next; 2480281494Sandrew } 2481281494Sandrew continue; 2482281494Sandrew } 2483281494Sandrew if (va == va_next) 2484281494Sandrew va = sva; 2485281494Sandrew if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 2486281494Sandrew &lock)) { 2487281494Sandrew sva += L3_SIZE; 2488281494Sandrew break; 2489281494Sandrew } 2490281494Sandrew } 2491281494Sandrew if (va != va_next) 2492281494Sandrew pmap_invalidate_range(pmap, va, sva); 2493281494Sandrew } 2494281494Sandrew if (lock != NULL) 2495281494Sandrew rw_wunlock(lock); 2496281494Sandrew PMAP_UNLOCK(pmap); 2497281494Sandrew pmap_free_zero_pages(&free); 2498281494Sandrew} 2499281494Sandrew 2500281494Sandrew/* 2501281494Sandrew * Routine: pmap_remove_all 2502281494Sandrew * Function: 2503281494Sandrew * Removes this physical page from 2504281494Sandrew * all physical maps in which it resides. 2505281494Sandrew * Reflects back modify bits to the pager. 2506281494Sandrew * 2507281494Sandrew * Notes: 2508281494Sandrew * Original versions of this routine were very 2509281494Sandrew * inefficient because they iteratively called 2510281494Sandrew * pmap_remove (slow...) 2511281494Sandrew */ 2512281494Sandrew 2513281494Sandrewvoid 2514281494Sandrewpmap_remove_all(vm_page_t m) 2515281494Sandrew{ 2516305882Sandrew struct md_page *pvh; 2517281494Sandrew pv_entry_t pv; 2518281494Sandrew pmap_t pmap; 2519305879Sandrew struct rwlock *lock; 2520297446Sandrew pd_entry_t *pde, tpde; 2521297446Sandrew pt_entry_t *pte, tpte; 2522305882Sandrew vm_offset_t va; 2523281494Sandrew struct spglist free; 2524305882Sandrew int lvl, pvh_gen, md_gen; 2525281494Sandrew 2526281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2527281494Sandrew ("pmap_remove_all: page %p is not managed", m)); 2528281494Sandrew SLIST_INIT(&free); 2529305879Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2530305882Sandrew pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : 2531305882Sandrew pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2532305879Sandrewretry: 2533305879Sandrew rw_wlock(lock); 2534305882Sandrew while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 2535305882Sandrew pmap = PV_PMAP(pv); 2536305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 2537305882Sandrew pvh_gen = pvh->pv_gen; 2538305882Sandrew rw_wunlock(lock); 2539305882Sandrew PMAP_LOCK(pmap); 2540305882Sandrew rw_wlock(lock); 2541305882Sandrew if (pvh_gen != pvh->pv_gen) { 2542305882Sandrew rw_wunlock(lock); 2543305882Sandrew PMAP_UNLOCK(pmap); 2544305882Sandrew goto retry; 2545305882Sandrew } 2546305882Sandrew } 2547305882Sandrew va = pv->pv_va; 2548305882Sandrew pte = pmap_pte(pmap, va, &lvl); 2549305882Sandrew KASSERT(pte != NULL, 2550305882Sandrew ("pmap_remove_all: no page table entry found")); 2551305882Sandrew KASSERT(lvl == 2, 2552305882Sandrew ("pmap_remove_all: invalid pte level %d", lvl)); 2553305882Sandrew 2554305882Sandrew pmap_demote_l2_locked(pmap, pte, va, &lock); 2555305882Sandrew PMAP_UNLOCK(pmap); 2556305882Sandrew } 2557281494Sandrew while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2558281494Sandrew pmap = PV_PMAP(pv); 2559305879Sandrew if (!PMAP_TRYLOCK(pmap)) { 2560305882Sandrew pvh_gen = pvh->pv_gen; 2561305879Sandrew md_gen = m->md.pv_gen; 2562305879Sandrew rw_wunlock(lock); 2563305879Sandrew PMAP_LOCK(pmap); 2564305879Sandrew rw_wlock(lock); 2565305882Sandrew if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { 2566305879Sandrew rw_wunlock(lock); 2567305879Sandrew PMAP_UNLOCK(pmap); 2568305879Sandrew goto retry; 2569305879Sandrew } 2570305879Sandrew } 2571281494Sandrew pmap_resident_count_dec(pmap, 1); 2572297446Sandrew 2573297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 2574297446Sandrew KASSERT(pde != NULL, 2575297446Sandrew ("pmap_remove_all: no page directory entry found")); 2576297446Sandrew KASSERT(lvl == 2, 2577297446Sandrew ("pmap_remove_all: invalid pde level %d", lvl)); 2578297446Sandrew tpde = pmap_load(pde); 2579297446Sandrew 2580297446Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 2581297446Sandrew tpte = pmap_load(pte); 2582281494Sandrew if (pmap_is_current(pmap) && 2583297446Sandrew pmap_l3_valid_cacheable(tpte)) 2584281494Sandrew cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2585297446Sandrew pmap_load_clear(pte); 2586297446Sandrew PTE_SYNC(pte); 2587285212Sandrew pmap_invalidate_page(pmap, pv->pv_va); 2588297446Sandrew if (tpte & ATTR_SW_WIRED) 2589281494Sandrew pmap->pm_stats.wired_count--; 2590297446Sandrew if ((tpte & ATTR_AF) != 0) 2591281494Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 2592281494Sandrew 2593281494Sandrew /* 2594281494Sandrew * Update the vm_page_t clean and reference bits. 2595281494Sandrew */ 2596297446Sandrew if (pmap_page_dirty(tpte)) 2597281494Sandrew vm_page_dirty(m); 2598297446Sandrew pmap_unuse_l3(pmap, pv->pv_va, tpde, &free); 2599281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2600281494Sandrew m->md.pv_gen++; 2601281494Sandrew free_pv_entry(pmap, pv); 2602281494Sandrew PMAP_UNLOCK(pmap); 2603281494Sandrew } 2604281494Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 2605305879Sandrew rw_wunlock(lock); 2606281494Sandrew pmap_free_zero_pages(&free); 2607281494Sandrew} 2608281494Sandrew 2609281494Sandrew/* 2610281494Sandrew * Set the physical protection on the 2611281494Sandrew * specified range of this map as requested. 2612281494Sandrew */ 2613281494Sandrewvoid 2614281494Sandrewpmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2615281494Sandrew{ 2616281494Sandrew vm_offset_t va, va_next; 2617297446Sandrew pd_entry_t *l0, *l1, *l2; 2618319203Sandrew pt_entry_t *l3p, l3, nbits; 2619281494Sandrew 2620319203Sandrew KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); 2621319203Sandrew if (prot == VM_PROT_NONE) { 2622281494Sandrew pmap_remove(pmap, sva, eva); 2623281494Sandrew return; 2624281494Sandrew } 2625281494Sandrew 2626319203Sandrew if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == 2627319203Sandrew (VM_PROT_WRITE | VM_PROT_EXECUTE)) 2628281494Sandrew return; 2629281494Sandrew 2630281494Sandrew PMAP_LOCK(pmap); 2631281494Sandrew for (; sva < eva; sva = va_next) { 2632281494Sandrew 2633297446Sandrew l0 = pmap_l0(pmap, sva); 2634297446Sandrew if (pmap_load(l0) == 0) { 2635297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2636297446Sandrew if (va_next < sva) 2637297446Sandrew va_next = eva; 2638297446Sandrew continue; 2639297446Sandrew } 2640297446Sandrew 2641297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 2642285045Sandrew if (pmap_load(l1) == 0) { 2643281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2644281494Sandrew if (va_next < sva) 2645281494Sandrew va_next = eva; 2646281494Sandrew continue; 2647281494Sandrew } 2648281494Sandrew 2649281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2650281494Sandrew if (va_next < sva) 2651281494Sandrew va_next = eva; 2652281494Sandrew 2653281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 2654305882Sandrew if (pmap_load(l2) == 0) 2655281494Sandrew continue; 2656281494Sandrew 2657305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { 2658305882Sandrew l3p = pmap_demote_l2(pmap, l2, sva); 2659305882Sandrew if (l3p == NULL) 2660305882Sandrew continue; 2661305882Sandrew } 2662305882Sandrew KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, 2663305882Sandrew ("pmap_protect: Invalid L2 entry after demotion")); 2664305882Sandrew 2665281494Sandrew if (va_next > eva) 2666281494Sandrew va_next = eva; 2667281494Sandrew 2668281494Sandrew va = va_next; 2669281494Sandrew for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 2670281494Sandrew sva += L3_SIZE) { 2671281494Sandrew l3 = pmap_load(l3p); 2672319203Sandrew if (!pmap_l3_valid(l3)) 2673319203Sandrew continue; 2674319203Sandrew 2675319203Sandrew nbits = 0; 2676319203Sandrew if ((prot & VM_PROT_WRITE) == 0) { 2677317354Skib if ((l3 & ATTR_SW_MANAGED) && 2678317354Skib pmap_page_dirty(l3)) { 2679317354Skib vm_page_dirty(PHYS_TO_VM_PAGE(l3 & 2680317354Skib ~ATTR_MASK)); 2681317354Skib } 2682319203Sandrew nbits |= ATTR_AP(ATTR_AP_RO); 2683281494Sandrew } 2684319203Sandrew if ((prot & VM_PROT_EXECUTE) == 0) 2685319203Sandrew nbits |= ATTR_XN; 2686319203Sandrew 2687319203Sandrew pmap_set(l3p, nbits); 2688319203Sandrew PTE_SYNC(l3p); 2689319203Sandrew /* XXX: Use pmap_invalidate_range */ 2690323845Sandrew pmap_invalidate_page(pmap, sva); 2691281494Sandrew } 2692281494Sandrew } 2693281494Sandrew PMAP_UNLOCK(pmap); 2694281494Sandrew} 2695281494Sandrew 2696281494Sandrew/* 2697305882Sandrew * Inserts the specified page table page into the specified pmap's collection 2698305882Sandrew * of idle page table pages. Each of a pmap's page table pages is responsible 2699305882Sandrew * for mapping a distinct range of virtual addresses. The pmap's collection is 2700305882Sandrew * ordered by this virtual address range. 2701305882Sandrew */ 2702305882Sandrewstatic __inline int 2703305882Sandrewpmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) 2704305882Sandrew{ 2705305882Sandrew 2706305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2707305882Sandrew return (vm_radix_insert(&pmap->pm_root, mpte)); 2708305882Sandrew} 2709305882Sandrew 2710305882Sandrew/* 2711318716Smarkj * Removes the page table page mapping the specified virtual address from the 2712318716Smarkj * specified pmap's collection of idle page table pages, and returns it. 2713318716Smarkj * Otherwise, returns NULL if there is no page table page corresponding to the 2714318716Smarkj * specified virtual address. 2715305882Sandrew */ 2716305882Sandrewstatic __inline vm_page_t 2717318716Smarkjpmap_remove_pt_page(pmap_t pmap, vm_offset_t va) 2718305882Sandrew{ 2719305882Sandrew 2720305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2721318716Smarkj return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va))); 2722305882Sandrew} 2723305882Sandrew 2724305882Sandrew/* 2725305882Sandrew * Performs a break-before-make update of a pmap entry. This is needed when 2726305882Sandrew * either promoting or demoting pages to ensure the TLB doesn't get into an 2727305882Sandrew * inconsistent state. 2728305882Sandrew */ 2729305882Sandrewstatic void 2730305882Sandrewpmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, 2731305882Sandrew vm_offset_t va, vm_size_t size) 2732305882Sandrew{ 2733305882Sandrew register_t intr; 2734305882Sandrew 2735305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2736305882Sandrew 2737305882Sandrew /* 2738305882Sandrew * Ensure we don't get switched out with the page table in an 2739305882Sandrew * inconsistent state. We also need to ensure no interrupts fire 2740305882Sandrew * as they may make use of an address we are about to invalidate. 2741305882Sandrew */ 2742305882Sandrew intr = intr_disable(); 2743305882Sandrew critical_enter(); 2744305882Sandrew 2745305882Sandrew /* Clear the old mapping */ 2746305882Sandrew pmap_load_clear(pte); 2747305882Sandrew PTE_SYNC(pte); 2748305882Sandrew pmap_invalidate_range(pmap, va, va + size); 2749305882Sandrew 2750305882Sandrew /* Create the new mapping */ 2751305882Sandrew pmap_load_store(pte, newpte); 2752305882Sandrew PTE_SYNC(pte); 2753305882Sandrew 2754305882Sandrew critical_exit(); 2755305882Sandrew intr_restore(intr); 2756305882Sandrew} 2757305882Sandrew 2758325238Smarkj#if VM_NRESERVLEVEL > 0 2759305882Sandrew/* 2760305882Sandrew * After promotion from 512 4KB page mappings to a single 2MB page mapping, 2761305882Sandrew * replace the many pv entries for the 4KB page mappings by a single pv entry 2762305882Sandrew * for the 2MB page mapping. 2763305882Sandrew */ 2764305882Sandrewstatic void 2765305882Sandrewpmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 2766305882Sandrew struct rwlock **lockp) 2767305882Sandrew{ 2768305882Sandrew struct md_page *pvh; 2769305882Sandrew pv_entry_t pv; 2770305882Sandrew vm_offset_t va_last; 2771305882Sandrew vm_page_t m; 2772305882Sandrew 2773305882Sandrew KASSERT((pa & L2_OFFSET) == 0, 2774305882Sandrew ("pmap_pv_promote_l2: pa is not 2mpage aligned")); 2775305882Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2776305882Sandrew 2777305882Sandrew /* 2778305882Sandrew * Transfer the first page's pv entry for this mapping to the 2mpage's 2779305882Sandrew * pv list. Aside from avoiding the cost of a call to get_pv_entry(), 2780305882Sandrew * a transfer avoids the possibility that get_pv_entry() calls 2781305882Sandrew * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the 2782305882Sandrew * mappings that is being promoted. 2783305882Sandrew */ 2784305882Sandrew m = PHYS_TO_VM_PAGE(pa); 2785305882Sandrew va = va & ~L2_OFFSET; 2786305882Sandrew pv = pmap_pvh_remove(&m->md, pmap, va); 2787305882Sandrew KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found")); 2788305882Sandrew pvh = pa_to_pvh(pa); 2789305882Sandrew TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 2790305882Sandrew pvh->pv_gen++; 2791305882Sandrew /* Free the remaining NPTEPG - 1 pv entries. */ 2792305882Sandrew va_last = va + L2_SIZE - PAGE_SIZE; 2793305882Sandrew do { 2794305882Sandrew m++; 2795305882Sandrew va += PAGE_SIZE; 2796305882Sandrew pmap_pvh_free(&m->md, pmap, va); 2797305882Sandrew } while (va < va_last); 2798305882Sandrew} 2799305882Sandrew 2800305882Sandrew/* 2801305882Sandrew * Tries to promote the 512, contiguous 4KB page mappings that are within a 2802305882Sandrew * single level 2 table entry to a single 2MB page mapping. For promotion 2803305882Sandrew * to occur, two conditions must be met: (1) the 4KB page mappings must map 2804305882Sandrew * aligned, contiguous physical memory and (2) the 4KB page mappings must have 2805305882Sandrew * identical characteristics. 2806305882Sandrew */ 2807305882Sandrewstatic void 2808305882Sandrewpmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, 2809305882Sandrew struct rwlock **lockp) 2810305882Sandrew{ 2811305882Sandrew pt_entry_t *firstl3, *l3, newl2, oldl3, pa; 2812305882Sandrew vm_page_t mpte; 2813305882Sandrew vm_offset_t sva; 2814305882Sandrew 2815305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2816305882Sandrew 2817305882Sandrew sva = va & ~L2_OFFSET; 2818305882Sandrew firstl3 = pmap_l2_to_l3(l2, sva); 2819305882Sandrew newl2 = pmap_load(firstl3); 2820305882Sandrew 2821305882Sandrew /* Check the alingment is valid */ 2822305882Sandrew if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) { 2823305882Sandrew atomic_add_long(&pmap_l2_p_failures, 1); 2824305882Sandrew CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" 2825305882Sandrew " in pmap %p", va, pmap); 2826305882Sandrew return; 2827305882Sandrew } 2828305882Sandrew 2829305882Sandrew pa = newl2 + L2_SIZE - PAGE_SIZE; 2830305882Sandrew for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { 2831305882Sandrew oldl3 = pmap_load(l3); 2832305882Sandrew if (oldl3 != pa) { 2833305882Sandrew atomic_add_long(&pmap_l2_p_failures, 1); 2834305882Sandrew CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" 2835305882Sandrew " in pmap %p", va, pmap); 2836305882Sandrew return; 2837305882Sandrew } 2838305882Sandrew pa -= PAGE_SIZE; 2839305882Sandrew } 2840305882Sandrew 2841305882Sandrew /* 2842305882Sandrew * Save the page table page in its current state until the L2 2843305882Sandrew * mapping the superpage is demoted by pmap_demote_l2() or 2844305882Sandrew * destroyed by pmap_remove_l3(). 2845305882Sandrew */ 2846305882Sandrew mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); 2847305882Sandrew KASSERT(mpte >= vm_page_array && 2848305882Sandrew mpte < &vm_page_array[vm_page_array_size], 2849305882Sandrew ("pmap_promote_l2: page table page is out of range")); 2850305882Sandrew KASSERT(mpte->pindex == pmap_l2_pindex(va), 2851305882Sandrew ("pmap_promote_l2: page table page's pindex is wrong")); 2852305882Sandrew if (pmap_insert_pt_page(pmap, mpte)) { 2853305882Sandrew atomic_add_long(&pmap_l2_p_failures, 1); 2854305882Sandrew CTR2(KTR_PMAP, 2855305882Sandrew "pmap_promote_l2: failure for va %#lx in pmap %p", va, 2856305882Sandrew pmap); 2857305882Sandrew return; 2858305882Sandrew } 2859305882Sandrew 2860305882Sandrew if ((newl2 & ATTR_SW_MANAGED) != 0) 2861305882Sandrew pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp); 2862305882Sandrew 2863305882Sandrew newl2 &= ~ATTR_DESCR_MASK; 2864305882Sandrew newl2 |= L2_BLOCK; 2865305882Sandrew 2866305882Sandrew pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE); 2867305882Sandrew 2868305882Sandrew atomic_add_long(&pmap_l2_promotions, 1); 2869305882Sandrew CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, 2870305882Sandrew pmap); 2871305882Sandrew} 2872325238Smarkj#endif /* VM_NRESERVLEVEL > 0 */ 2873305882Sandrew 2874305882Sandrew/* 2875281494Sandrew * Insert the given physical page (p) at 2876281494Sandrew * the specified virtual address (v) in the 2877281494Sandrew * target physical map with the protection requested. 2878281494Sandrew * 2879281494Sandrew * If specified, the page will be wired down, meaning 2880281494Sandrew * that the related pte can not be reclaimed. 2881281494Sandrew * 2882281494Sandrew * NB: This is the only routine which MAY NOT lazy-evaluate 2883281494Sandrew * or lose information. That is, this routine must actually 2884281494Sandrew * insert this page into the given map NOW. 2885281494Sandrew */ 2886281494Sandrewint 2887281494Sandrewpmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2888281494Sandrew u_int flags, int8_t psind __unused) 2889281494Sandrew{ 2890281494Sandrew struct rwlock *lock; 2891297446Sandrew pd_entry_t *pde; 2892281494Sandrew pt_entry_t new_l3, orig_l3; 2893305882Sandrew pt_entry_t *l2, *l3; 2894281494Sandrew pv_entry_t pv; 2895297446Sandrew vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; 2896297446Sandrew vm_page_t mpte, om, l1_m, l2_m, l3_m; 2897281494Sandrew boolean_t nosleep; 2898297446Sandrew int lvl; 2899281494Sandrew 2900281494Sandrew va = trunc_page(va); 2901281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2902281494Sandrew VM_OBJECT_ASSERT_LOCKED(m->object); 2903281494Sandrew pa = VM_PAGE_TO_PHYS(m); 2904285537Sandrew new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 2905285537Sandrew L3_PAGE); 2906281494Sandrew if ((prot & VM_PROT_WRITE) == 0) 2907281494Sandrew new_l3 |= ATTR_AP(ATTR_AP_RO); 2908319203Sandrew if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 2909319203Sandrew new_l3 |= ATTR_XN; 2910281494Sandrew if ((flags & PMAP_ENTER_WIRED) != 0) 2911281494Sandrew new_l3 |= ATTR_SW_WIRED; 2912281494Sandrew if ((va >> 63) == 0) 2913319203Sandrew new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; 2914281494Sandrew 2915285212Sandrew CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 2916285212Sandrew 2917281494Sandrew mpte = NULL; 2918281494Sandrew 2919281494Sandrew lock = NULL; 2920281494Sandrew PMAP_LOCK(pmap); 2921281494Sandrew 2922305882Sandrew pde = pmap_pde(pmap, va, &lvl); 2923305882Sandrew if (pde != NULL && lvl == 1) { 2924305882Sandrew l2 = pmap_l1_to_l2(pde, va); 2925305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && 2926305882Sandrew (l3 = pmap_demote_l2_locked(pmap, l2, va & ~L2_OFFSET, 2927305882Sandrew &lock)) != NULL) { 2928305882Sandrew l3 = &l3[pmap_l3_index(va)]; 2929305882Sandrew if (va < VM_MAXUSER_ADDRESS) { 2930305882Sandrew mpte = PHYS_TO_VM_PAGE( 2931305882Sandrew pmap_load(l2) & ~ATTR_MASK); 2932305882Sandrew mpte->wire_count++; 2933305882Sandrew } 2934305882Sandrew goto havel3; 2935305882Sandrew } 2936305882Sandrew } 2937305882Sandrew 2938281494Sandrew if (va < VM_MAXUSER_ADDRESS) { 2939281494Sandrew nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 2940281494Sandrew mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 2941281494Sandrew if (mpte == NULL && nosleep) { 2942285212Sandrew CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 2943281494Sandrew if (lock != NULL) 2944281494Sandrew rw_wunlock(lock); 2945281494Sandrew PMAP_UNLOCK(pmap); 2946281494Sandrew return (KERN_RESOURCE_SHORTAGE); 2947281494Sandrew } 2948297446Sandrew pde = pmap_pde(pmap, va, &lvl); 2949297446Sandrew KASSERT(pde != NULL, 2950297446Sandrew ("pmap_enter: Invalid page entry, va: 0x%lx", va)); 2951297446Sandrew KASSERT(lvl == 2, 2952297446Sandrew ("pmap_enter: Invalid level %d", lvl)); 2953297446Sandrew 2954297446Sandrew l3 = pmap_l2_to_l3(pde, va); 2955281494Sandrew } else { 2956297446Sandrew /* 2957297446Sandrew * If we get a level 2 pde it must point to a level 3 entry 2958297446Sandrew * otherwise we will need to create the intermediate tables 2959297446Sandrew */ 2960297446Sandrew if (lvl < 2) { 2961297446Sandrew switch(lvl) { 2962297446Sandrew default: 2963297446Sandrew case -1: 2964297446Sandrew /* Get the l0 pde to update */ 2965297446Sandrew pde = pmap_l0(pmap, va); 2966297446Sandrew KASSERT(pde != NULL, ("...")); 2967281494Sandrew 2968297446Sandrew l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2969297446Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2970297446Sandrew VM_ALLOC_ZERO); 2971297446Sandrew if (l1_m == NULL) 2972297446Sandrew panic("pmap_enter: l1 pte_m == NULL"); 2973297446Sandrew if ((l1_m->flags & PG_ZERO) == 0) 2974297446Sandrew pmap_zero_page(l1_m); 2975297446Sandrew 2976297446Sandrew l1_pa = VM_PAGE_TO_PHYS(l1_m); 2977297446Sandrew pmap_load_store(pde, l1_pa | L0_TABLE); 2978297446Sandrew PTE_SYNC(pde); 2979297446Sandrew /* FALLTHROUGH */ 2980297446Sandrew case 0: 2981297446Sandrew /* Get the l1 pde to update */ 2982297446Sandrew pde = pmap_l1_to_l2(pde, va); 2983297446Sandrew KASSERT(pde != NULL, ("...")); 2984297446Sandrew 2985281494Sandrew l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2986281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2987281494Sandrew VM_ALLOC_ZERO); 2988281494Sandrew if (l2_m == NULL) 2989281494Sandrew panic("pmap_enter: l2 pte_m == NULL"); 2990281494Sandrew if ((l2_m->flags & PG_ZERO) == 0) 2991281494Sandrew pmap_zero_page(l2_m); 2992281494Sandrew 2993281494Sandrew l2_pa = VM_PAGE_TO_PHYS(l2_m); 2994297446Sandrew pmap_load_store(pde, l2_pa | L1_TABLE); 2995297446Sandrew PTE_SYNC(pde); 2996297446Sandrew /* FALLTHROUGH */ 2997297446Sandrew case 1: 2998297446Sandrew /* Get the l2 pde to update */ 2999297446Sandrew pde = pmap_l1_to_l2(pde, va); 3000281494Sandrew 3001297446Sandrew l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 3002297446Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 3003297446Sandrew VM_ALLOC_ZERO); 3004297446Sandrew if (l3_m == NULL) 3005297446Sandrew panic("pmap_enter: l3 pte_m == NULL"); 3006297446Sandrew if ((l3_m->flags & PG_ZERO) == 0) 3007297446Sandrew pmap_zero_page(l3_m); 3008281494Sandrew 3009297446Sandrew l3_pa = VM_PAGE_TO_PHYS(l3_m); 3010297446Sandrew pmap_load_store(pde, l3_pa | L2_TABLE); 3011297446Sandrew PTE_SYNC(pde); 3012297446Sandrew break; 3013297446Sandrew } 3014281494Sandrew } 3015297446Sandrew l3 = pmap_l2_to_l3(pde, va); 3016285212Sandrew pmap_invalidate_page(pmap, va); 3017281494Sandrew } 3018305882Sandrewhavel3: 3019281494Sandrew 3020281494Sandrew om = NULL; 3021281494Sandrew orig_l3 = pmap_load(l3); 3022281494Sandrew opa = orig_l3 & ~ATTR_MASK; 3023281494Sandrew 3024281494Sandrew /* 3025281494Sandrew * Is the specified virtual address already mapped? 3026281494Sandrew */ 3027281494Sandrew if (pmap_l3_valid(orig_l3)) { 3028281494Sandrew /* 3029281494Sandrew * Wiring change, just update stats. We don't worry about 3030281494Sandrew * wiring PT pages as they remain resident as long as there 3031281494Sandrew * are valid mappings in them. Hence, if a user page is wired, 3032281494Sandrew * the PT page will be also. 3033281494Sandrew */ 3034281494Sandrew if ((flags & PMAP_ENTER_WIRED) != 0 && 3035281494Sandrew (orig_l3 & ATTR_SW_WIRED) == 0) 3036281494Sandrew pmap->pm_stats.wired_count++; 3037281494Sandrew else if ((flags & PMAP_ENTER_WIRED) == 0 && 3038281494Sandrew (orig_l3 & ATTR_SW_WIRED) != 0) 3039281494Sandrew pmap->pm_stats.wired_count--; 3040281494Sandrew 3041281494Sandrew /* 3042281494Sandrew * Remove the extra PT page reference. 3043281494Sandrew */ 3044281494Sandrew if (mpte != NULL) { 3045281494Sandrew mpte->wire_count--; 3046281494Sandrew KASSERT(mpte->wire_count > 0, 3047281494Sandrew ("pmap_enter: missing reference to page table page," 3048281494Sandrew " va: 0x%lx", va)); 3049281494Sandrew } 3050281494Sandrew 3051281494Sandrew /* 3052281494Sandrew * Has the physical page changed? 3053281494Sandrew */ 3054281494Sandrew if (opa == pa) { 3055281494Sandrew /* 3056281494Sandrew * No, might be a protection or wiring change. 3057281494Sandrew */ 3058281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 3059281494Sandrew new_l3 |= ATTR_SW_MANAGED; 3060281494Sandrew if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 3061281494Sandrew ATTR_AP(ATTR_AP_RW)) { 3062281494Sandrew vm_page_aflag_set(m, PGA_WRITEABLE); 3063281494Sandrew } 3064281494Sandrew } 3065281494Sandrew goto validate; 3066281494Sandrew } 3067281494Sandrew 3068281494Sandrew /* Flush the cache, there might be uncommitted data in it */ 3069281494Sandrew if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 3070281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 3071281494Sandrew } else { 3072281494Sandrew /* 3073281494Sandrew * Increment the counters. 3074281494Sandrew */ 3075281494Sandrew if ((new_l3 & ATTR_SW_WIRED) != 0) 3076281494Sandrew pmap->pm_stats.wired_count++; 3077281494Sandrew pmap_resident_count_inc(pmap, 1); 3078281494Sandrew } 3079281494Sandrew /* 3080281494Sandrew * Enter on the PV list if part of our managed memory. 3081281494Sandrew */ 3082281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0) { 3083281494Sandrew new_l3 |= ATTR_SW_MANAGED; 3084281494Sandrew pv = get_pv_entry(pmap, &lock); 3085281494Sandrew pv->pv_va = va; 3086281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 3087281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3088281494Sandrew m->md.pv_gen++; 3089281494Sandrew if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 3090281494Sandrew vm_page_aflag_set(m, PGA_WRITEABLE); 3091281494Sandrew } 3092281494Sandrew 3093281494Sandrew /* 3094281494Sandrew * Update the L3 entry. 3095281494Sandrew */ 3096281494Sandrew if (orig_l3 != 0) { 3097281494Sandrewvalidate: 3098305882Sandrew orig_l3 = pmap_load(l3); 3099281494Sandrew opa = orig_l3 & ~ATTR_MASK; 3100281494Sandrew 3101281494Sandrew if (opa != pa) { 3102305882Sandrew pmap_update_entry(pmap, l3, new_l3, va, PAGE_SIZE); 3103281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 3104281494Sandrew om = PHYS_TO_VM_PAGE(opa); 3105281494Sandrew if (pmap_page_dirty(orig_l3)) 3106281494Sandrew vm_page_dirty(om); 3107281494Sandrew if ((orig_l3 & ATTR_AF) != 0) 3108281494Sandrew vm_page_aflag_set(om, PGA_REFERENCED); 3109281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 3110281494Sandrew pmap_pvh_free(&om->md, pmap, va); 3111305882Sandrew if ((om->aflags & PGA_WRITEABLE) != 0 && 3112305882Sandrew TAILQ_EMPTY(&om->md.pv_list) && 3113305882Sandrew ((om->flags & PG_FICTITIOUS) != 0 || 3114305882Sandrew TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) 3115305882Sandrew vm_page_aflag_clear(om, PGA_WRITEABLE); 3116281494Sandrew } 3117305882Sandrew } else { 3118305882Sandrew pmap_load_store(l3, new_l3); 3119305882Sandrew PTE_SYNC(l3); 3120305882Sandrew pmap_invalidate_page(pmap, va); 3121305882Sandrew if (pmap_page_dirty(orig_l3) && 3122305882Sandrew (orig_l3 & ATTR_SW_MANAGED) != 0) 3123281494Sandrew vm_page_dirty(m); 3124281494Sandrew } 3125281494Sandrew } else { 3126281494Sandrew pmap_load_store(l3, new_l3); 3127281494Sandrew } 3128305882Sandrew 3129305882Sandrew PTE_SYNC(l3); 3130285212Sandrew pmap_invalidate_page(pmap, va); 3131281494Sandrew 3132305882Sandrew if (pmap != pmap_kernel()) { 3133305883Sandrew if (pmap == &curproc->p_vmspace->vm_pmap && 3134305883Sandrew (prot & VM_PROT_EXECUTE) != 0) 3135305883Sandrew cpu_icache_sync_range(va, PAGE_SIZE); 3136305882Sandrew 3137325238Smarkj#if VM_NRESERVLEVEL > 0 3138305882Sandrew if ((mpte == NULL || mpte->wire_count == NL3PG) && 3139305882Sandrew pmap_superpages_enabled() && 3140305882Sandrew (m->flags & PG_FICTITIOUS) == 0 && 3141305882Sandrew vm_reserv_level_iffullpop(m) == 0) { 3142305882Sandrew pmap_promote_l2(pmap, pde, va, &lock); 3143305882Sandrew } 3144325238Smarkj#endif 3145305882Sandrew } 3146305882Sandrew 3147281494Sandrew if (lock != NULL) 3148281494Sandrew rw_wunlock(lock); 3149281494Sandrew PMAP_UNLOCK(pmap); 3150281494Sandrew return (KERN_SUCCESS); 3151281494Sandrew} 3152281494Sandrew 3153281494Sandrew/* 3154281494Sandrew * Maps a sequence of resident pages belonging to the same object. 3155281494Sandrew * The sequence begins with the given page m_start. This page is 3156281494Sandrew * mapped at the given virtual address start. Each subsequent page is 3157281494Sandrew * mapped at a virtual address that is offset from start by the same 3158281494Sandrew * amount as the page is offset from m_start within the object. The 3159281494Sandrew * last page in the sequence is the page with the largest offset from 3160281494Sandrew * m_start that can be mapped at a virtual address less than the given 3161281494Sandrew * virtual address end. Not every virtual page between start and end 3162281494Sandrew * is mapped; only those for which a resident page exists with the 3163281494Sandrew * corresponding offset from m_start are mapped. 3164281494Sandrew */ 3165281494Sandrewvoid 3166281494Sandrewpmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3167281494Sandrew vm_page_t m_start, vm_prot_t prot) 3168281494Sandrew{ 3169281494Sandrew struct rwlock *lock; 3170281494Sandrew vm_offset_t va; 3171281494Sandrew vm_page_t m, mpte; 3172281494Sandrew vm_pindex_t diff, psize; 3173281494Sandrew 3174281494Sandrew VM_OBJECT_ASSERT_LOCKED(m_start->object); 3175281494Sandrew 3176281494Sandrew psize = atop(end - start); 3177281494Sandrew mpte = NULL; 3178281494Sandrew m = m_start; 3179281494Sandrew lock = NULL; 3180281494Sandrew PMAP_LOCK(pmap); 3181281494Sandrew while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3182281494Sandrew va = start + ptoa(diff); 3183281494Sandrew mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 3184281494Sandrew m = TAILQ_NEXT(m, listq); 3185281494Sandrew } 3186281494Sandrew if (lock != NULL) 3187281494Sandrew rw_wunlock(lock); 3188281494Sandrew PMAP_UNLOCK(pmap); 3189281494Sandrew} 3190281494Sandrew 3191281494Sandrew/* 3192281494Sandrew * this code makes some *MAJOR* assumptions: 3193281494Sandrew * 1. Current pmap & pmap exists. 3194281494Sandrew * 2. Not wired. 3195281494Sandrew * 3. Read access. 3196281494Sandrew * 4. No page table pages. 3197281494Sandrew * but is *MUCH* faster than pmap_enter... 3198281494Sandrew */ 3199281494Sandrew 3200281494Sandrewvoid 3201281494Sandrewpmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3202281494Sandrew{ 3203281494Sandrew struct rwlock *lock; 3204281494Sandrew 3205281494Sandrew lock = NULL; 3206281494Sandrew PMAP_LOCK(pmap); 3207281494Sandrew (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 3208281494Sandrew if (lock != NULL) 3209281494Sandrew rw_wunlock(lock); 3210281494Sandrew PMAP_UNLOCK(pmap); 3211281494Sandrew} 3212281494Sandrew 3213281494Sandrewstatic vm_page_t 3214281494Sandrewpmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 3215281494Sandrew vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 3216281494Sandrew{ 3217281494Sandrew struct spglist free; 3218297446Sandrew pd_entry_t *pde; 3219305882Sandrew pt_entry_t *l2, *l3; 3220281494Sandrew vm_paddr_t pa; 3221297446Sandrew int lvl; 3222281494Sandrew 3223281494Sandrew KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 3224281494Sandrew (m->oflags & VPO_UNMANAGED) != 0, 3225281494Sandrew ("pmap_enter_quick_locked: managed mapping within the clean submap")); 3226281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3227281494Sandrew 3228285212Sandrew CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 3229281494Sandrew /* 3230281494Sandrew * In the case that a page table page is not 3231281494Sandrew * resident, we are creating it here. 3232281494Sandrew */ 3233281494Sandrew if (va < VM_MAXUSER_ADDRESS) { 3234281494Sandrew vm_pindex_t l2pindex; 3235281494Sandrew 3236281494Sandrew /* 3237281494Sandrew * Calculate pagetable page index 3238281494Sandrew */ 3239281494Sandrew l2pindex = pmap_l2_pindex(va); 3240281494Sandrew if (mpte && (mpte->pindex == l2pindex)) { 3241281494Sandrew mpte->wire_count++; 3242281494Sandrew } else { 3243281494Sandrew /* 3244281494Sandrew * Get the l2 entry 3245281494Sandrew */ 3246297446Sandrew pde = pmap_pde(pmap, va, &lvl); 3247281494Sandrew 3248281494Sandrew /* 3249281494Sandrew * If the page table page is mapped, we just increment 3250281494Sandrew * the hold count, and activate it. Otherwise, we 3251281494Sandrew * attempt to allocate a page table page. If this 3252281494Sandrew * attempt fails, we don't retry. Instead, we give up. 3253281494Sandrew */ 3254305882Sandrew if (lvl == 1) { 3255305882Sandrew l2 = pmap_l1_to_l2(pde, va); 3256305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == 3257305882Sandrew L2_BLOCK) 3258305882Sandrew return (NULL); 3259305882Sandrew } 3260297446Sandrew if (lvl == 2 && pmap_load(pde) != 0) { 3261285045Sandrew mpte = 3262297446Sandrew PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); 3263281494Sandrew mpte->wire_count++; 3264281494Sandrew } else { 3265281494Sandrew /* 3266281494Sandrew * Pass NULL instead of the PV list lock 3267281494Sandrew * pointer, because we don't intend to sleep. 3268281494Sandrew */ 3269281494Sandrew mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 3270281494Sandrew if (mpte == NULL) 3271281494Sandrew return (mpte); 3272281494Sandrew } 3273281494Sandrew } 3274281494Sandrew l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 3275281494Sandrew l3 = &l3[pmap_l3_index(va)]; 3276281494Sandrew } else { 3277281494Sandrew mpte = NULL; 3278297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 3279297446Sandrew KASSERT(pde != NULL, 3280297446Sandrew ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", 3281297446Sandrew va)); 3282297446Sandrew KASSERT(lvl == 2, 3283297446Sandrew ("pmap_enter_quick_locked: Invalid level %d", lvl)); 3284297446Sandrew l3 = pmap_l2_to_l3(pde, va); 3285281494Sandrew } 3286297446Sandrew 3287285212Sandrew if (pmap_load(l3) != 0) { 3288281494Sandrew if (mpte != NULL) { 3289281494Sandrew mpte->wire_count--; 3290281494Sandrew mpte = NULL; 3291281494Sandrew } 3292281494Sandrew return (mpte); 3293281494Sandrew } 3294281494Sandrew 3295281494Sandrew /* 3296281494Sandrew * Enter on the PV list if part of our managed memory. 3297281494Sandrew */ 3298281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0 && 3299281494Sandrew !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 3300281494Sandrew if (mpte != NULL) { 3301281494Sandrew SLIST_INIT(&free); 3302281494Sandrew if (pmap_unwire_l3(pmap, va, mpte, &free)) { 3303281494Sandrew pmap_invalidate_page(pmap, va); 3304281494Sandrew pmap_free_zero_pages(&free); 3305281494Sandrew } 3306281494Sandrew mpte = NULL; 3307281494Sandrew } 3308281494Sandrew return (mpte); 3309281494Sandrew } 3310281494Sandrew 3311281494Sandrew /* 3312281494Sandrew * Increment counters 3313281494Sandrew */ 3314281494Sandrew pmap_resident_count_inc(pmap, 1); 3315281494Sandrew 3316285537Sandrew pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 3317305882Sandrew ATTR_AP(ATTR_AP_RO) | L3_PAGE; 3318319203Sandrew if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 3319319203Sandrew pa |= ATTR_XN; 3320319203Sandrew else if (va < VM_MAXUSER_ADDRESS) 3321319203Sandrew pa |= ATTR_PXN; 3322281494Sandrew 3323281494Sandrew /* 3324281494Sandrew * Now validate mapping with RO protection 3325281494Sandrew */ 3326281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0) 3327281494Sandrew pa |= ATTR_SW_MANAGED; 3328281494Sandrew pmap_load_store(l3, pa); 3329281494Sandrew PTE_SYNC(l3); 3330281494Sandrew pmap_invalidate_page(pmap, va); 3331281494Sandrew return (mpte); 3332281494Sandrew} 3333281494Sandrew 3334281494Sandrew/* 3335281494Sandrew * This code maps large physical mmap regions into the 3336281494Sandrew * processor address space. Note that some shortcuts 3337281494Sandrew * are taken, but the code works. 3338281494Sandrew */ 3339281494Sandrewvoid 3340281494Sandrewpmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 3341281494Sandrew vm_pindex_t pindex, vm_size_t size) 3342281494Sandrew{ 3343281494Sandrew 3344281846Sandrew VM_OBJECT_ASSERT_WLOCKED(object); 3345281846Sandrew KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3346281846Sandrew ("pmap_object_init_pt: non-device object")); 3347281494Sandrew} 3348281494Sandrew 3349281494Sandrew/* 3350281494Sandrew * Clear the wired attribute from the mappings for the specified range of 3351281494Sandrew * addresses in the given pmap. Every valid mapping within that range 3352281494Sandrew * must have the wired attribute set. In contrast, invalid mappings 3353281494Sandrew * cannot have the wired attribute set, so they are ignored. 3354281494Sandrew * 3355281494Sandrew * The wired attribute of the page table entry is not a hardware feature, 3356281494Sandrew * so there is no need to invalidate any TLB entries. 3357281494Sandrew */ 3358281494Sandrewvoid 3359281494Sandrewpmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3360281494Sandrew{ 3361281494Sandrew vm_offset_t va_next; 3362297446Sandrew pd_entry_t *l0, *l1, *l2; 3363281494Sandrew pt_entry_t *l3; 3364281494Sandrew 3365281494Sandrew PMAP_LOCK(pmap); 3366281494Sandrew for (; sva < eva; sva = va_next) { 3367297446Sandrew l0 = pmap_l0(pmap, sva); 3368297446Sandrew if (pmap_load(l0) == 0) { 3369297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 3370297446Sandrew if (va_next < sva) 3371297446Sandrew va_next = eva; 3372297446Sandrew continue; 3373297446Sandrew } 3374297446Sandrew 3375297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 3376285045Sandrew if (pmap_load(l1) == 0) { 3377281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 3378281494Sandrew if (va_next < sva) 3379281494Sandrew va_next = eva; 3380281494Sandrew continue; 3381281494Sandrew } 3382281494Sandrew 3383281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 3384281494Sandrew if (va_next < sva) 3385281494Sandrew va_next = eva; 3386281494Sandrew 3387281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 3388285045Sandrew if (pmap_load(l2) == 0) 3389281494Sandrew continue; 3390281494Sandrew 3391305882Sandrew if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { 3392305882Sandrew l3 = pmap_demote_l2(pmap, l2, sva); 3393305882Sandrew if (l3 == NULL) 3394305882Sandrew continue; 3395305882Sandrew } 3396305882Sandrew KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, 3397305882Sandrew ("pmap_unwire: Invalid l2 entry after demotion")); 3398305882Sandrew 3399281494Sandrew if (va_next > eva) 3400281494Sandrew va_next = eva; 3401281494Sandrew for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 3402281494Sandrew sva += L3_SIZE) { 3403285045Sandrew if (pmap_load(l3) == 0) 3404281494Sandrew continue; 3405285045Sandrew if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) 3406281494Sandrew panic("pmap_unwire: l3 %#jx is missing " 3407288445Sandrew "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); 3408281494Sandrew 3409281494Sandrew /* 3410281494Sandrew * PG_W must be cleared atomically. Although the pmap 3411281494Sandrew * lock synchronizes access to PG_W, another processor 3412281494Sandrew * could be setting PG_M and/or PG_A concurrently. 3413281494Sandrew */ 3414281494Sandrew atomic_clear_long(l3, ATTR_SW_WIRED); 3415281494Sandrew pmap->pm_stats.wired_count--; 3416281494Sandrew } 3417281494Sandrew } 3418281494Sandrew PMAP_UNLOCK(pmap); 3419281494Sandrew} 3420281494Sandrew 3421281494Sandrew/* 3422281494Sandrew * Copy the range specified by src_addr/len 3423281494Sandrew * from the source map to the range dst_addr/len 3424281494Sandrew * in the destination map. 3425281494Sandrew * 3426281494Sandrew * This routine is only advisory and need not do anything. 3427281494Sandrew */ 3428281494Sandrew 3429281494Sandrewvoid 3430281494Sandrewpmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3431281494Sandrew vm_offset_t src_addr) 3432281494Sandrew{ 3433281494Sandrew} 3434281494Sandrew 3435281494Sandrew/* 3436281494Sandrew * pmap_zero_page zeros the specified hardware page by mapping 3437281494Sandrew * the page into KVM and using bzero to clear its contents. 3438281494Sandrew */ 3439281494Sandrewvoid 3440281494Sandrewpmap_zero_page(vm_page_t m) 3441281494Sandrew{ 3442281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3443281494Sandrew 3444281494Sandrew pagezero((void *)va); 3445281494Sandrew} 3446281494Sandrew 3447281494Sandrew/* 3448305531Sandrew * pmap_zero_page_area zeros the specified hardware page by mapping 3449281494Sandrew * the page into KVM and using bzero to clear its contents. 3450281494Sandrew * 3451281494Sandrew * off and size may not cover an area beyond a single hardware page. 3452281494Sandrew */ 3453281494Sandrewvoid 3454281494Sandrewpmap_zero_page_area(vm_page_t m, int off, int size) 3455281494Sandrew{ 3456281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3457281494Sandrew 3458281494Sandrew if (off == 0 && size == PAGE_SIZE) 3459281494Sandrew pagezero((void *)va); 3460281494Sandrew else 3461281494Sandrew bzero((char *)va + off, size); 3462281494Sandrew} 3463281494Sandrew 3464281494Sandrew/* 3465305531Sandrew * pmap_zero_page_idle zeros the specified hardware page by mapping 3466281494Sandrew * the page into KVM and using bzero to clear its contents. This 3467281494Sandrew * is intended to be called from the vm_pagezero process only and 3468281494Sandrew * outside of Giant. 3469281494Sandrew */ 3470281494Sandrewvoid 3471281494Sandrewpmap_zero_page_idle(vm_page_t m) 3472281494Sandrew{ 3473281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3474281494Sandrew 3475281494Sandrew pagezero((void *)va); 3476281494Sandrew} 3477281494Sandrew 3478281494Sandrew/* 3479281494Sandrew * pmap_copy_page copies the specified (machine independent) 3480281494Sandrew * page by mapping the page into virtual memory and using 3481281494Sandrew * bcopy to copy the page, one machine dependent page at a 3482281494Sandrew * time. 3483281494Sandrew */ 3484281494Sandrewvoid 3485281494Sandrewpmap_copy_page(vm_page_t msrc, vm_page_t mdst) 3486281494Sandrew{ 3487281494Sandrew vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 3488281494Sandrew vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 3489281494Sandrew 3490281494Sandrew pagecopy((void *)src, (void *)dst); 3491281494Sandrew} 3492281494Sandrew 3493281494Sandrewint unmapped_buf_allowed = 1; 3494281494Sandrew 3495281494Sandrewvoid 3496281494Sandrewpmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 3497281494Sandrew vm_offset_t b_offset, int xfersize) 3498281494Sandrew{ 3499281494Sandrew void *a_cp, *b_cp; 3500281494Sandrew vm_page_t m_a, m_b; 3501281494Sandrew vm_paddr_t p_a, p_b; 3502281494Sandrew vm_offset_t a_pg_offset, b_pg_offset; 3503281494Sandrew int cnt; 3504281494Sandrew 3505281494Sandrew while (xfersize > 0) { 3506281494Sandrew a_pg_offset = a_offset & PAGE_MASK; 3507281494Sandrew m_a = ma[a_offset >> PAGE_SHIFT]; 3508281494Sandrew p_a = m_a->phys_addr; 3509281494Sandrew b_pg_offset = b_offset & PAGE_MASK; 3510281494Sandrew m_b = mb[b_offset >> PAGE_SHIFT]; 3511281494Sandrew p_b = m_b->phys_addr; 3512281494Sandrew cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 3513281494Sandrew cnt = min(cnt, PAGE_SIZE - b_pg_offset); 3514281494Sandrew if (__predict_false(!PHYS_IN_DMAP(p_a))) { 3515281494Sandrew panic("!DMAP a %lx", p_a); 3516281494Sandrew } else { 3517281494Sandrew a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 3518281494Sandrew } 3519281494Sandrew if (__predict_false(!PHYS_IN_DMAP(p_b))) { 3520281494Sandrew panic("!DMAP b %lx", p_b); 3521281494Sandrew } else { 3522281494Sandrew b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 3523281494Sandrew } 3524281494Sandrew bcopy(a_cp, b_cp, cnt); 3525281494Sandrew a_offset += cnt; 3526281494Sandrew b_offset += cnt; 3527281494Sandrew xfersize -= cnt; 3528281494Sandrew } 3529281494Sandrew} 3530281494Sandrew 3531286296Sjahvm_offset_t 3532286296Sjahpmap_quick_enter_page(vm_page_t m) 3533286296Sjah{ 3534286296Sjah 3535286296Sjah return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 3536286296Sjah} 3537286296Sjah 3538286296Sjahvoid 3539286296Sjahpmap_quick_remove_page(vm_offset_t addr) 3540286296Sjah{ 3541286296Sjah} 3542286296Sjah 3543281494Sandrew/* 3544281494Sandrew * Returns true if the pmap's pv is one of the first 3545281494Sandrew * 16 pvs linked to from this page. This count may 3546281494Sandrew * be changed upwards or downwards in the future; it 3547281494Sandrew * is only necessary that true be returned for a small 3548281494Sandrew * subset of pmaps for proper page aging. 3549281494Sandrew */ 3550281494Sandrewboolean_t 3551281494Sandrewpmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3552281494Sandrew{ 3553305882Sandrew struct md_page *pvh; 3554281494Sandrew struct rwlock *lock; 3555281494Sandrew pv_entry_t pv; 3556281494Sandrew int loops = 0; 3557281494Sandrew boolean_t rv; 3558281494Sandrew 3559281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3560281494Sandrew ("pmap_page_exists_quick: page %p is not managed", m)); 3561281494Sandrew rv = FALSE; 3562281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3563281494Sandrew rw_rlock(lock); 3564281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3565281494Sandrew if (PV_PMAP(pv) == pmap) { 3566281494Sandrew rv = TRUE; 3567281494Sandrew break; 3568281494Sandrew } 3569281494Sandrew loops++; 3570281494Sandrew if (loops >= 16) 3571281494Sandrew break; 3572281494Sandrew } 3573305882Sandrew if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 3574305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3575305882Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3576305882Sandrew if (PV_PMAP(pv) == pmap) { 3577305882Sandrew rv = TRUE; 3578305882Sandrew break; 3579305882Sandrew } 3580305882Sandrew loops++; 3581305882Sandrew if (loops >= 16) 3582305882Sandrew break; 3583305882Sandrew } 3584305882Sandrew } 3585281494Sandrew rw_runlock(lock); 3586281494Sandrew return (rv); 3587281494Sandrew} 3588281494Sandrew 3589281494Sandrew/* 3590281494Sandrew * pmap_page_wired_mappings: 3591281494Sandrew * 3592281494Sandrew * Return the number of managed mappings to the given physical page 3593281494Sandrew * that are wired. 3594281494Sandrew */ 3595281494Sandrewint 3596281494Sandrewpmap_page_wired_mappings(vm_page_t m) 3597281494Sandrew{ 3598281494Sandrew struct rwlock *lock; 3599305882Sandrew struct md_page *pvh; 3600281494Sandrew pmap_t pmap; 3601297446Sandrew pt_entry_t *pte; 3602281494Sandrew pv_entry_t pv; 3603305882Sandrew int count, lvl, md_gen, pvh_gen; 3604281494Sandrew 3605281494Sandrew if ((m->oflags & VPO_UNMANAGED) != 0) 3606281494Sandrew return (0); 3607281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3608281494Sandrew rw_rlock(lock); 3609281494Sandrewrestart: 3610281494Sandrew count = 0; 3611281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3612281494Sandrew pmap = PV_PMAP(pv); 3613281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3614281494Sandrew md_gen = m->md.pv_gen; 3615281494Sandrew rw_runlock(lock); 3616281494Sandrew PMAP_LOCK(pmap); 3617281494Sandrew rw_rlock(lock); 3618281494Sandrew if (md_gen != m->md.pv_gen) { 3619281494Sandrew PMAP_UNLOCK(pmap); 3620281494Sandrew goto restart; 3621281494Sandrew } 3622281494Sandrew } 3623297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3624297446Sandrew if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) 3625281494Sandrew count++; 3626281494Sandrew PMAP_UNLOCK(pmap); 3627281494Sandrew } 3628305882Sandrew if ((m->flags & PG_FICTITIOUS) == 0) { 3629305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3630305882Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3631305882Sandrew pmap = PV_PMAP(pv); 3632305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 3633305882Sandrew md_gen = m->md.pv_gen; 3634305882Sandrew pvh_gen = pvh->pv_gen; 3635305882Sandrew rw_runlock(lock); 3636305882Sandrew PMAP_LOCK(pmap); 3637305882Sandrew rw_rlock(lock); 3638305882Sandrew if (md_gen != m->md.pv_gen || 3639305882Sandrew pvh_gen != pvh->pv_gen) { 3640305882Sandrew PMAP_UNLOCK(pmap); 3641305882Sandrew goto restart; 3642305882Sandrew } 3643305882Sandrew } 3644305882Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3645305882Sandrew if (pte != NULL && 3646305882Sandrew (pmap_load(pte) & ATTR_SW_WIRED) != 0) 3647305882Sandrew count++; 3648305882Sandrew PMAP_UNLOCK(pmap); 3649305882Sandrew } 3650305882Sandrew } 3651281494Sandrew rw_runlock(lock); 3652281494Sandrew return (count); 3653281494Sandrew} 3654281494Sandrew 3655281494Sandrew/* 3656281494Sandrew * Destroy all managed, non-wired mappings in the given user-space 3657281494Sandrew * pmap. This pmap cannot be active on any processor besides the 3658281494Sandrew * caller. 3659305531Sandrew * 3660281494Sandrew * This function cannot be applied to the kernel pmap. Moreover, it 3661281494Sandrew * is not intended for general use. It is only to be used during 3662281494Sandrew * process termination. Consequently, it can be implemented in ways 3663281494Sandrew * that make it faster than pmap_remove(). First, it can more quickly 3664281494Sandrew * destroy mappings by iterating over the pmap's collection of PV 3665281494Sandrew * entries, rather than searching the page table. Second, it doesn't 3666281494Sandrew * have to test and clear the page table entries atomically, because 3667281494Sandrew * no processor is currently accessing the user address space. In 3668281494Sandrew * particular, a page table entry's dirty bit won't change state once 3669281494Sandrew * this function starts. 3670281494Sandrew */ 3671281494Sandrewvoid 3672281494Sandrewpmap_remove_pages(pmap_t pmap) 3673281494Sandrew{ 3674297446Sandrew pd_entry_t *pde; 3675297446Sandrew pt_entry_t *pte, tpte; 3676281494Sandrew struct spglist free; 3677305882Sandrew vm_page_t m, ml3, mt; 3678281494Sandrew pv_entry_t pv; 3679305882Sandrew struct md_page *pvh; 3680281494Sandrew struct pv_chunk *pc, *npc; 3681281494Sandrew struct rwlock *lock; 3682281494Sandrew int64_t bit; 3683281494Sandrew uint64_t inuse, bitmask; 3684297446Sandrew int allfree, field, freed, idx, lvl; 3685281494Sandrew vm_paddr_t pa; 3686281494Sandrew 3687281494Sandrew lock = NULL; 3688281494Sandrew 3689281494Sandrew SLIST_INIT(&free); 3690281494Sandrew PMAP_LOCK(pmap); 3691281494Sandrew TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 3692281494Sandrew allfree = 1; 3693281494Sandrew freed = 0; 3694281494Sandrew for (field = 0; field < _NPCM; field++) { 3695281494Sandrew inuse = ~pc->pc_map[field] & pc_freemask[field]; 3696281494Sandrew while (inuse != 0) { 3697281494Sandrew bit = ffsl(inuse) - 1; 3698281494Sandrew bitmask = 1UL << bit; 3699281494Sandrew idx = field * 64 + bit; 3700281494Sandrew pv = &pc->pc_pventry[idx]; 3701281494Sandrew inuse &= ~bitmask; 3702281494Sandrew 3703297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 3704297446Sandrew KASSERT(pde != NULL, 3705297446Sandrew ("Attempting to remove an unmapped page")); 3706281494Sandrew 3707305882Sandrew switch(lvl) { 3708305882Sandrew case 1: 3709305882Sandrew pte = pmap_l1_to_l2(pde, pv->pv_va); 3710305882Sandrew tpte = pmap_load(pte); 3711305882Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == 3712305882Sandrew L2_BLOCK, 3713305882Sandrew ("Attempting to remove an invalid " 3714305882Sandrew "block: %lx", tpte)); 3715305882Sandrew tpte = pmap_load(pte); 3716305882Sandrew break; 3717305882Sandrew case 2: 3718305882Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 3719305882Sandrew tpte = pmap_load(pte); 3720305882Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == 3721305882Sandrew L3_PAGE, 3722305882Sandrew ("Attempting to remove an invalid " 3723305882Sandrew "page: %lx", tpte)); 3724305882Sandrew break; 3725305882Sandrew default: 3726305882Sandrew panic( 3727305882Sandrew "Invalid page directory level: %d", 3728305882Sandrew lvl); 3729305882Sandrew } 3730297446Sandrew 3731281494Sandrew/* 3732281494Sandrew * We cannot remove wired pages from a process' mapping at this time 3733281494Sandrew */ 3734297446Sandrew if (tpte & ATTR_SW_WIRED) { 3735281494Sandrew allfree = 0; 3736281494Sandrew continue; 3737281494Sandrew } 3738281494Sandrew 3739297446Sandrew pa = tpte & ~ATTR_MASK; 3740281494Sandrew 3741281494Sandrew m = PHYS_TO_VM_PAGE(pa); 3742281494Sandrew KASSERT(m->phys_addr == pa, 3743281494Sandrew ("vm_page_t %p phys_addr mismatch %016jx %016jx", 3744281494Sandrew m, (uintmax_t)m->phys_addr, 3745297446Sandrew (uintmax_t)tpte)); 3746281494Sandrew 3747281494Sandrew KASSERT((m->flags & PG_FICTITIOUS) != 0 || 3748281494Sandrew m < &vm_page_array[vm_page_array_size], 3749297446Sandrew ("pmap_remove_pages: bad pte %#jx", 3750297446Sandrew (uintmax_t)tpte)); 3751281494Sandrew 3752305882Sandrew if (pmap_is_current(pmap)) { 3753305882Sandrew if (lvl == 2 && 3754305882Sandrew pmap_l3_valid_cacheable(tpte)) { 3755305882Sandrew cpu_dcache_wb_range(pv->pv_va, 3756305882Sandrew L3_SIZE); 3757305882Sandrew } else if (lvl == 1 && 3758305882Sandrew pmap_pte_valid_cacheable(tpte)) { 3759305882Sandrew cpu_dcache_wb_range(pv->pv_va, 3760305882Sandrew L2_SIZE); 3761305882Sandrew } 3762305882Sandrew } 3763297446Sandrew pmap_load_clear(pte); 3764297446Sandrew PTE_SYNC(pte); 3765285212Sandrew pmap_invalidate_page(pmap, pv->pv_va); 3766281494Sandrew 3767281494Sandrew /* 3768281494Sandrew * Update the vm_page_t clean/reference bits. 3769281494Sandrew */ 3770305882Sandrew if ((tpte & ATTR_AP_RW_BIT) == 3771305882Sandrew ATTR_AP(ATTR_AP_RW)) { 3772305882Sandrew switch (lvl) { 3773305882Sandrew case 1: 3774305882Sandrew for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 3775305882Sandrew vm_page_dirty(m); 3776305882Sandrew break; 3777305882Sandrew case 2: 3778305882Sandrew vm_page_dirty(m); 3779305882Sandrew break; 3780305882Sandrew } 3781305882Sandrew } 3782281494Sandrew 3783281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 3784281494Sandrew 3785281494Sandrew /* Mark free */ 3786281494Sandrew pc->pc_map[field] |= bitmask; 3787305882Sandrew switch (lvl) { 3788305882Sandrew case 1: 3789305882Sandrew pmap_resident_count_dec(pmap, 3790305882Sandrew L2_SIZE / PAGE_SIZE); 3791305882Sandrew pvh = pa_to_pvh(tpte & ~ATTR_MASK); 3792305882Sandrew TAILQ_REMOVE(&pvh->pv_list, pv,pv_next); 3793305882Sandrew pvh->pv_gen++; 3794305882Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) { 3795305882Sandrew for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 3796305882Sandrew if ((mt->aflags & PGA_WRITEABLE) != 0 && 3797305882Sandrew TAILQ_EMPTY(&mt->md.pv_list)) 3798305882Sandrew vm_page_aflag_clear(mt, PGA_WRITEABLE); 3799305882Sandrew } 3800318716Smarkj ml3 = pmap_remove_pt_page(pmap, 3801305882Sandrew pv->pv_va); 3802305882Sandrew if (ml3 != NULL) { 3803305882Sandrew pmap_resident_count_dec(pmap,1); 3804305882Sandrew KASSERT(ml3->wire_count == NL3PG, 3805305882Sandrew ("pmap_remove_pages: l3 page wire count error")); 3806305882Sandrew ml3->wire_count = 0; 3807305882Sandrew pmap_add_delayed_free_list(ml3, 3808305882Sandrew &free, FALSE); 3809305882Sandrew atomic_subtract_int( 3810305882Sandrew &vm_cnt.v_wire_count, 1); 3811305882Sandrew } 3812305882Sandrew break; 3813305882Sandrew case 2: 3814305882Sandrew pmap_resident_count_dec(pmap, 1); 3815305882Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, 3816305882Sandrew pv_next); 3817305882Sandrew m->md.pv_gen++; 3818305882Sandrew if ((m->aflags & PGA_WRITEABLE) != 0 && 3819305882Sandrew TAILQ_EMPTY(&m->md.pv_list) && 3820305882Sandrew (m->flags & PG_FICTITIOUS) == 0) { 3821305882Sandrew pvh = pa_to_pvh( 3822305882Sandrew VM_PAGE_TO_PHYS(m)); 3823305882Sandrew if (TAILQ_EMPTY(&pvh->pv_list)) 3824305882Sandrew vm_page_aflag_clear(m, 3825305882Sandrew PGA_WRITEABLE); 3826305882Sandrew } 3827305882Sandrew break; 3828305882Sandrew } 3829297446Sandrew pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde), 3830297446Sandrew &free); 3831281494Sandrew freed++; 3832281494Sandrew } 3833281494Sandrew } 3834281494Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 3835281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 3836281494Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 3837281494Sandrew if (allfree) { 3838281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3839281494Sandrew free_pv_chunk(pc); 3840281494Sandrew } 3841281494Sandrew } 3842281494Sandrew pmap_invalidate_all(pmap); 3843281494Sandrew if (lock != NULL) 3844281494Sandrew rw_wunlock(lock); 3845281494Sandrew PMAP_UNLOCK(pmap); 3846281494Sandrew pmap_free_zero_pages(&free); 3847281494Sandrew} 3848281494Sandrew 3849281494Sandrew/* 3850281494Sandrew * This is used to check if a page has been accessed or modified. As we 3851281494Sandrew * don't have a bit to see if it has been modified we have to assume it 3852281494Sandrew * has been if the page is read/write. 3853281494Sandrew */ 3854281494Sandrewstatic boolean_t 3855281494Sandrewpmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 3856281494Sandrew{ 3857281494Sandrew struct rwlock *lock; 3858281494Sandrew pv_entry_t pv; 3859305882Sandrew struct md_page *pvh; 3860297446Sandrew pt_entry_t *pte, mask, value; 3861281494Sandrew pmap_t pmap; 3862305882Sandrew int lvl, md_gen, pvh_gen; 3863281494Sandrew boolean_t rv; 3864281494Sandrew 3865281494Sandrew rv = FALSE; 3866281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3867281494Sandrew rw_rlock(lock); 3868281494Sandrewrestart: 3869281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3870281494Sandrew pmap = PV_PMAP(pv); 3871281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3872281494Sandrew md_gen = m->md.pv_gen; 3873281494Sandrew rw_runlock(lock); 3874281494Sandrew PMAP_LOCK(pmap); 3875281494Sandrew rw_rlock(lock); 3876281494Sandrew if (md_gen != m->md.pv_gen) { 3877281494Sandrew PMAP_UNLOCK(pmap); 3878281494Sandrew goto restart; 3879281494Sandrew } 3880281494Sandrew } 3881297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3882297446Sandrew KASSERT(lvl == 3, 3883297446Sandrew ("pmap_page_test_mappings: Invalid level %d", lvl)); 3884281494Sandrew mask = 0; 3885281494Sandrew value = 0; 3886281494Sandrew if (modified) { 3887281494Sandrew mask |= ATTR_AP_RW_BIT; 3888281494Sandrew value |= ATTR_AP(ATTR_AP_RW); 3889281494Sandrew } 3890281494Sandrew if (accessed) { 3891281494Sandrew mask |= ATTR_AF | ATTR_DESCR_MASK; 3892281494Sandrew value |= ATTR_AF | L3_PAGE; 3893281494Sandrew } 3894297446Sandrew rv = (pmap_load(pte) & mask) == value; 3895281494Sandrew PMAP_UNLOCK(pmap); 3896281494Sandrew if (rv) 3897281494Sandrew goto out; 3898281494Sandrew } 3899305882Sandrew if ((m->flags & PG_FICTITIOUS) == 0) { 3900305882Sandrew pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3901305882Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3902305882Sandrew pmap = PV_PMAP(pv); 3903305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 3904305882Sandrew md_gen = m->md.pv_gen; 3905305882Sandrew pvh_gen = pvh->pv_gen; 3906305882Sandrew rw_runlock(lock); 3907305882Sandrew PMAP_LOCK(pmap); 3908305882Sandrew rw_rlock(lock); 3909305882Sandrew if (md_gen != m->md.pv_gen || 3910305882Sandrew pvh_gen != pvh->pv_gen) { 3911305882Sandrew PMAP_UNLOCK(pmap); 3912305882Sandrew goto restart; 3913305882Sandrew } 3914305882Sandrew } 3915305882Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3916305882Sandrew KASSERT(lvl == 2, 3917305882Sandrew ("pmap_page_test_mappings: Invalid level %d", lvl)); 3918305882Sandrew mask = 0; 3919305882Sandrew value = 0; 3920305882Sandrew if (modified) { 3921305882Sandrew mask |= ATTR_AP_RW_BIT; 3922305882Sandrew value |= ATTR_AP(ATTR_AP_RW); 3923305882Sandrew } 3924305882Sandrew if (accessed) { 3925305882Sandrew mask |= ATTR_AF | ATTR_DESCR_MASK; 3926305882Sandrew value |= ATTR_AF | L2_BLOCK; 3927305882Sandrew } 3928305882Sandrew rv = (pmap_load(pte) & mask) == value; 3929305882Sandrew PMAP_UNLOCK(pmap); 3930305882Sandrew if (rv) 3931305882Sandrew goto out; 3932305882Sandrew } 3933305882Sandrew } 3934281494Sandrewout: 3935281494Sandrew rw_runlock(lock); 3936281494Sandrew return (rv); 3937281494Sandrew} 3938281494Sandrew 3939281494Sandrew/* 3940281494Sandrew * pmap_is_modified: 3941281494Sandrew * 3942281494Sandrew * Return whether or not the specified physical page was modified 3943281494Sandrew * in any physical maps. 3944281494Sandrew */ 3945281494Sandrewboolean_t 3946281494Sandrewpmap_is_modified(vm_page_t m) 3947281494Sandrew{ 3948281494Sandrew 3949281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3950281494Sandrew ("pmap_is_modified: page %p is not managed", m)); 3951281494Sandrew 3952281494Sandrew /* 3953281494Sandrew * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3954281494Sandrew * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3955281494Sandrew * is clear, no PTEs can have PG_M set. 3956281494Sandrew */ 3957281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 3958281494Sandrew if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3959281494Sandrew return (FALSE); 3960281494Sandrew return (pmap_page_test_mappings(m, FALSE, TRUE)); 3961281494Sandrew} 3962281494Sandrew 3963281494Sandrew/* 3964281494Sandrew * pmap_is_prefaultable: 3965281494Sandrew * 3966281494Sandrew * Return whether or not the specified virtual address is eligible 3967281494Sandrew * for prefault. 3968281494Sandrew */ 3969281494Sandrewboolean_t 3970281494Sandrewpmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3971281494Sandrew{ 3972297446Sandrew pt_entry_t *pte; 3973281494Sandrew boolean_t rv; 3974297446Sandrew int lvl; 3975281494Sandrew 3976281494Sandrew rv = FALSE; 3977281494Sandrew PMAP_LOCK(pmap); 3978297446Sandrew pte = pmap_pte(pmap, addr, &lvl); 3979297446Sandrew if (pte != NULL && pmap_load(pte) != 0) { 3980281494Sandrew rv = TRUE; 3981281494Sandrew } 3982281494Sandrew PMAP_UNLOCK(pmap); 3983281494Sandrew return (rv); 3984281494Sandrew} 3985281494Sandrew 3986281494Sandrew/* 3987281494Sandrew * pmap_is_referenced: 3988281494Sandrew * 3989281494Sandrew * Return whether or not the specified physical page was referenced 3990281494Sandrew * in any physical maps. 3991281494Sandrew */ 3992281494Sandrewboolean_t 3993281494Sandrewpmap_is_referenced(vm_page_t m) 3994281494Sandrew{ 3995281494Sandrew 3996281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3997281494Sandrew ("pmap_is_referenced: page %p is not managed", m)); 3998281494Sandrew return (pmap_page_test_mappings(m, TRUE, FALSE)); 3999281494Sandrew} 4000281494Sandrew 4001281494Sandrew/* 4002281494Sandrew * Clear the write and modified bits in each of the given page's mappings. 4003281494Sandrew */ 4004281494Sandrewvoid 4005281494Sandrewpmap_remove_write(vm_page_t m) 4006281494Sandrew{ 4007305882Sandrew struct md_page *pvh; 4008281494Sandrew pmap_t pmap; 4009281494Sandrew struct rwlock *lock; 4010305882Sandrew pv_entry_t next_pv, pv; 4011297446Sandrew pt_entry_t oldpte, *pte; 4012305882Sandrew vm_offset_t va; 4013305882Sandrew int lvl, md_gen, pvh_gen; 4014281494Sandrew 4015281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4016281494Sandrew ("pmap_remove_write: page %p is not managed", m)); 4017281494Sandrew 4018281494Sandrew /* 4019281494Sandrew * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 4020281494Sandrew * set by another thread while the object is locked. Thus, 4021281494Sandrew * if PGA_WRITEABLE is clear, no page table entries need updating. 4022281494Sandrew */ 4023281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 4024281494Sandrew if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 4025281494Sandrew return; 4026281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 4027305882Sandrew pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : 4028305882Sandrew pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4029281494Sandrewretry_pv_loop: 4030281494Sandrew rw_wlock(lock); 4031305882Sandrew TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { 4032305882Sandrew pmap = PV_PMAP(pv); 4033305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 4034305882Sandrew pvh_gen = pvh->pv_gen; 4035305882Sandrew rw_wunlock(lock); 4036305882Sandrew PMAP_LOCK(pmap); 4037305882Sandrew rw_wlock(lock); 4038305882Sandrew if (pvh_gen != pvh->pv_gen) { 4039305882Sandrew PMAP_UNLOCK(pmap); 4040305882Sandrew rw_wunlock(lock); 4041305882Sandrew goto retry_pv_loop; 4042305882Sandrew } 4043305882Sandrew } 4044305882Sandrew va = pv->pv_va; 4045305882Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 4046305882Sandrew if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 4047305882Sandrew pmap_demote_l2_locked(pmap, pte, va & ~L2_OFFSET, 4048305882Sandrew &lock); 4049305882Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4050305882Sandrew ("inconsistent pv lock %p %p for page %p", 4051305882Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4052305882Sandrew PMAP_UNLOCK(pmap); 4053305882Sandrew } 4054281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 4055281494Sandrew pmap = PV_PMAP(pv); 4056281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 4057305882Sandrew pvh_gen = pvh->pv_gen; 4058281494Sandrew md_gen = m->md.pv_gen; 4059281494Sandrew rw_wunlock(lock); 4060281494Sandrew PMAP_LOCK(pmap); 4061281494Sandrew rw_wlock(lock); 4062305882Sandrew if (pvh_gen != pvh->pv_gen || 4063305882Sandrew md_gen != m->md.pv_gen) { 4064281494Sandrew PMAP_UNLOCK(pmap); 4065281494Sandrew rw_wunlock(lock); 4066281494Sandrew goto retry_pv_loop; 4067281494Sandrew } 4068281494Sandrew } 4069297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 4070281494Sandrewretry: 4071297446Sandrew oldpte = pmap_load(pte); 4072297446Sandrew if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 4073297446Sandrew if (!atomic_cmpset_long(pte, oldpte, 4074297446Sandrew oldpte | ATTR_AP(ATTR_AP_RO))) 4075281494Sandrew goto retry; 4076297446Sandrew if ((oldpte & ATTR_AF) != 0) 4077281494Sandrew vm_page_dirty(m); 4078281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 4079281494Sandrew } 4080281494Sandrew PMAP_UNLOCK(pmap); 4081281494Sandrew } 4082281494Sandrew rw_wunlock(lock); 4083281494Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 4084281494Sandrew} 4085281494Sandrew 4086281494Sandrewstatic __inline boolean_t 4087281494Sandrewsafe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 4088281494Sandrew{ 4089281494Sandrew 4090281494Sandrew return (FALSE); 4091281494Sandrew} 4092281494Sandrew 4093281494Sandrew/* 4094281494Sandrew * pmap_ts_referenced: 4095281494Sandrew * 4096281494Sandrew * Return a count of reference bits for a page, clearing those bits. 4097281494Sandrew * It is not necessary for every reference bit to be cleared, but it 4098281494Sandrew * is necessary that 0 only be returned when there are truly no 4099281494Sandrew * reference bits set. 4100281494Sandrew * 4101324400Salc * As an optimization, update the page's dirty field if a modified bit is 4102324400Salc * found while counting reference bits. This opportunistic update can be 4103324400Salc * performed at low cost and can eliminate the need for some future calls 4104324400Salc * to pmap_is_modified(). However, since this function stops after 4105324400Salc * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 4106324400Salc * dirty pages. Those dirty pages will only be detected by a future call 4107324400Salc * to pmap_is_modified(). 4108281494Sandrew */ 4109281494Sandrewint 4110281494Sandrewpmap_ts_referenced(vm_page_t m) 4111281494Sandrew{ 4112305882Sandrew struct md_page *pvh; 4113281494Sandrew pv_entry_t pv, pvf; 4114281494Sandrew pmap_t pmap; 4115281494Sandrew struct rwlock *lock; 4116297446Sandrew pd_entry_t *pde, tpde; 4117297446Sandrew pt_entry_t *pte, tpte; 4118305882Sandrew pt_entry_t *l3; 4119305882Sandrew vm_offset_t va; 4120281494Sandrew vm_paddr_t pa; 4121305882Sandrew int cleared, md_gen, not_cleared, lvl, pvh_gen; 4122281494Sandrew struct spglist free; 4123305882Sandrew bool demoted; 4124281494Sandrew 4125281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4126281494Sandrew ("pmap_ts_referenced: page %p is not managed", m)); 4127281494Sandrew SLIST_INIT(&free); 4128281494Sandrew cleared = 0; 4129281494Sandrew pa = VM_PAGE_TO_PHYS(m); 4130281494Sandrew lock = PHYS_TO_PV_LIST_LOCK(pa); 4131305882Sandrew pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); 4132281494Sandrew rw_wlock(lock); 4133281494Sandrewretry: 4134281494Sandrew not_cleared = 0; 4135305882Sandrew if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) 4136305882Sandrew goto small_mappings; 4137305882Sandrew pv = pvf; 4138305882Sandrew do { 4139305882Sandrew if (pvf == NULL) 4140305882Sandrew pvf = pv; 4141305882Sandrew pmap = PV_PMAP(pv); 4142305882Sandrew if (!PMAP_TRYLOCK(pmap)) { 4143305882Sandrew pvh_gen = pvh->pv_gen; 4144305882Sandrew rw_wunlock(lock); 4145305882Sandrew PMAP_LOCK(pmap); 4146305882Sandrew rw_wlock(lock); 4147305882Sandrew if (pvh_gen != pvh->pv_gen) { 4148305882Sandrew PMAP_UNLOCK(pmap); 4149305882Sandrew goto retry; 4150305882Sandrew } 4151305882Sandrew } 4152305882Sandrew va = pv->pv_va; 4153305882Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 4154305882Sandrew KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found")); 4155305882Sandrew KASSERT(lvl == 1, 4156305882Sandrew ("pmap_ts_referenced: invalid pde level %d", lvl)); 4157305882Sandrew tpde = pmap_load(pde); 4158305882Sandrew KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE, 4159305882Sandrew ("pmap_ts_referenced: found an invalid l1 table")); 4160305882Sandrew pte = pmap_l1_to_l2(pde, pv->pv_va); 4161305882Sandrew tpte = pmap_load(pte); 4162324400Salc if (pmap_page_dirty(tpte)) { 4163324400Salc /* 4164324400Salc * Although "tpte" is mapping a 2MB page, because 4165324400Salc * this function is called at a 4KB page granularity, 4166324400Salc * we only update the 4KB page under test. 4167324400Salc */ 4168324400Salc vm_page_dirty(m); 4169324400Salc } 4170305882Sandrew if ((tpte & ATTR_AF) != 0) { 4171305882Sandrew /* 4172305882Sandrew * Since this reference bit is shared by 512 4KB 4173305882Sandrew * pages, it should not be cleared every time it is 4174305882Sandrew * tested. Apply a simple "hash" function on the 4175305882Sandrew * physical page number, the virtual superpage number, 4176305882Sandrew * and the pmap address to select one 4KB page out of 4177305882Sandrew * the 512 on which testing the reference bit will 4178305882Sandrew * result in clearing that reference bit. This 4179305882Sandrew * function is designed to avoid the selection of the 4180305882Sandrew * same 4KB page for every 2MB page mapping. 4181305882Sandrew * 4182305882Sandrew * On demotion, a mapping that hasn't been referenced 4183305882Sandrew * is simply destroyed. To avoid the possibility of a 4184305882Sandrew * subsequent page fault on a demoted wired mapping, 4185305882Sandrew * always leave its reference bit set. Moreover, 4186305882Sandrew * since the superpage is wired, the current state of 4187305882Sandrew * its reference bit won't affect page replacement. 4188305882Sandrew */ 4189305882Sandrew if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ 4190305882Sandrew (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && 4191305882Sandrew (tpte & ATTR_SW_WIRED) == 0) { 4192305882Sandrew if (safe_to_clear_referenced(pmap, tpte)) { 4193305882Sandrew /* 4194305882Sandrew * TODO: We don't handle the access 4195305882Sandrew * flag at all. We need to be able 4196305882Sandrew * to set it in the exception handler. 4197305882Sandrew */ 4198305882Sandrew panic("ARM64TODO: " 4199305882Sandrew "safe_to_clear_referenced\n"); 4200305882Sandrew } else if (pmap_demote_l2_locked(pmap, pte, 4201305882Sandrew pv->pv_va, &lock) != NULL) { 4202305882Sandrew demoted = true; 4203305882Sandrew va += VM_PAGE_TO_PHYS(m) - 4204305882Sandrew (tpte & ~ATTR_MASK); 4205305882Sandrew l3 = pmap_l2_to_l3(pte, va); 4206305882Sandrew pmap_remove_l3(pmap, l3, va, 4207305882Sandrew pmap_load(pte), NULL, &lock); 4208305882Sandrew } else 4209305882Sandrew demoted = true; 4210305882Sandrew 4211305882Sandrew if (demoted) { 4212305882Sandrew /* 4213305882Sandrew * The superpage mapping was removed 4214305882Sandrew * entirely and therefore 'pv' is no 4215305882Sandrew * longer valid. 4216305882Sandrew */ 4217305882Sandrew if (pvf == pv) 4218305882Sandrew pvf = NULL; 4219305882Sandrew pv = NULL; 4220305882Sandrew } 4221305882Sandrew cleared++; 4222305882Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4223305882Sandrew ("inconsistent pv lock %p %p for page %p", 4224305882Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4225305882Sandrew } else 4226305882Sandrew not_cleared++; 4227305882Sandrew } 4228305882Sandrew PMAP_UNLOCK(pmap); 4229305882Sandrew /* Rotate the PV list if it has more than one entry. */ 4230305882Sandrew if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 4231305882Sandrew TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 4232305882Sandrew TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 4233305882Sandrew pvh->pv_gen++; 4234305882Sandrew } 4235305882Sandrew if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) 4236305882Sandrew goto out; 4237305882Sandrew } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); 4238305882Sandrewsmall_mappings: 4239281494Sandrew if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 4240281494Sandrew goto out; 4241281494Sandrew pv = pvf; 4242281494Sandrew do { 4243281494Sandrew if (pvf == NULL) 4244281494Sandrew pvf = pv; 4245281494Sandrew pmap = PV_PMAP(pv); 4246281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 4247305882Sandrew pvh_gen = pvh->pv_gen; 4248281494Sandrew md_gen = m->md.pv_gen; 4249281494Sandrew rw_wunlock(lock); 4250281494Sandrew PMAP_LOCK(pmap); 4251281494Sandrew rw_wlock(lock); 4252305882Sandrew if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { 4253281494Sandrew PMAP_UNLOCK(pmap); 4254281494Sandrew goto retry; 4255281494Sandrew } 4256281494Sandrew } 4257297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 4258297446Sandrew KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); 4259297446Sandrew KASSERT(lvl == 2, 4260297446Sandrew ("pmap_ts_referenced: invalid pde level %d", lvl)); 4261297446Sandrew tpde = pmap_load(pde); 4262297446Sandrew KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, 4263281494Sandrew ("pmap_ts_referenced: found an invalid l2 table")); 4264297446Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 4265297446Sandrew tpte = pmap_load(pte); 4266324400Salc if (pmap_page_dirty(tpte)) 4267324400Salc vm_page_dirty(m); 4268297446Sandrew if ((tpte & ATTR_AF) != 0) { 4269297446Sandrew if (safe_to_clear_referenced(pmap, tpte)) { 4270281494Sandrew /* 4271281494Sandrew * TODO: We don't handle the access flag 4272281494Sandrew * at all. We need to be able to set it in 4273281494Sandrew * the exception handler. 4274281494Sandrew */ 4275286073Semaste panic("ARM64TODO: safe_to_clear_referenced\n"); 4276297446Sandrew } else if ((tpte & ATTR_SW_WIRED) == 0) { 4277281494Sandrew /* 4278281494Sandrew * Wired pages cannot be paged out so 4279281494Sandrew * doing accessed bit emulation for 4280281494Sandrew * them is wasted effort. We do the 4281281494Sandrew * hard work for unwired pages only. 4282281494Sandrew */ 4283297446Sandrew pmap_remove_l3(pmap, pte, pv->pv_va, tpde, 4284288445Sandrew &free, &lock); 4285281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 4286281494Sandrew cleared++; 4287281494Sandrew if (pvf == pv) 4288281494Sandrew pvf = NULL; 4289281494Sandrew pv = NULL; 4290281494Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4291281494Sandrew ("inconsistent pv lock %p %p for page %p", 4292281494Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4293281494Sandrew } else 4294281494Sandrew not_cleared++; 4295281494Sandrew } 4296281494Sandrew PMAP_UNLOCK(pmap); 4297281494Sandrew /* Rotate the PV list if it has more than one entry. */ 4298281494Sandrew if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 4299281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 4300281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 4301281494Sandrew m->md.pv_gen++; 4302281494Sandrew } 4303281494Sandrew } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 4304281494Sandrew not_cleared < PMAP_TS_REFERENCED_MAX); 4305281494Sandrewout: 4306281494Sandrew rw_wunlock(lock); 4307281494Sandrew pmap_free_zero_pages(&free); 4308281494Sandrew return (cleared + not_cleared); 4309281494Sandrew} 4310281494Sandrew 4311281494Sandrew/* 4312281494Sandrew * Apply the given advice to the specified range of addresses within the 4313281494Sandrew * given pmap. Depending on the advice, clear the referenced and/or 4314281494Sandrew * modified flags in each mapping and set the mapped page's dirty field. 4315281494Sandrew */ 4316281494Sandrewvoid 4317281494Sandrewpmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 4318281494Sandrew{ 4319281494Sandrew} 4320281494Sandrew 4321281494Sandrew/* 4322281494Sandrew * Clear the modify bits on the specified physical page. 4323281494Sandrew */ 4324281494Sandrewvoid 4325281494Sandrewpmap_clear_modify(vm_page_t m) 4326281494Sandrew{ 4327281494Sandrew 4328281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4329281494Sandrew ("pmap_clear_modify: page %p is not managed", m)); 4330281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 4331281494Sandrew KASSERT(!vm_page_xbusied(m), 4332281494Sandrew ("pmap_clear_modify: page %p is exclusive busied", m)); 4333281494Sandrew 4334281494Sandrew /* 4335281494Sandrew * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 4336281494Sandrew * If the object containing the page is locked and the page is not 4337281494Sandrew * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 4338281494Sandrew */ 4339281494Sandrew if ((m->aflags & PGA_WRITEABLE) == 0) 4340281494Sandrew return; 4341281846Sandrew 4342286073Semaste /* ARM64TODO: We lack support for tracking if a page is modified */ 4343281494Sandrew} 4344281494Sandrew 4345282221Sandrewvoid * 4346282221Sandrewpmap_mapbios(vm_paddr_t pa, vm_size_t size) 4347282221Sandrew{ 4348282221Sandrew 4349282221Sandrew return ((void *)PHYS_TO_DMAP(pa)); 4350282221Sandrew} 4351282221Sandrew 4352282221Sandrewvoid 4353282221Sandrewpmap_unmapbios(vm_paddr_t pa, vm_size_t size) 4354282221Sandrew{ 4355282221Sandrew} 4356282221Sandrew 4357281494Sandrew/* 4358281494Sandrew * Sets the memory attribute for the specified page. 4359281494Sandrew */ 4360281494Sandrewvoid 4361281494Sandrewpmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4362281494Sandrew{ 4363281494Sandrew 4364286080Sandrew m->md.pv_memattr = ma; 4365286080Sandrew 4366286080Sandrew /* 4367286080Sandrew * If "m" is a normal page, update its direct mapping. This update 4368286080Sandrew * can be relied upon to perform any cache operations that are 4369286080Sandrew * required for data coherence. 4370286080Sandrew */ 4371286080Sandrew if ((m->flags & PG_FICTITIOUS) == 0 && 4372305882Sandrew pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, 4373305882Sandrew m->md.pv_memattr) != 0) 4374305882Sandrew panic("memory attribute change on the direct map failed"); 4375281494Sandrew} 4376281494Sandrew 4377281494Sandrew/* 4378305882Sandrew * Changes the specified virtual address range's memory type to that given by 4379305882Sandrew * the parameter "mode". The specified virtual address range must be 4380305882Sandrew * completely contained within either the direct map or the kernel map. If 4381305882Sandrew * the virtual address range is contained within the kernel map, then the 4382305882Sandrew * memory type for each of the corresponding ranges of the direct map is also 4383305882Sandrew * changed. (The corresponding ranges of the direct map are those ranges that 4384305882Sandrew * map the same physical pages as the specified virtual address range.) These 4385305882Sandrew * changes to the direct map are necessary because Intel describes the 4386305882Sandrew * behavior of their processors as "undefined" if two or more mappings to the 4387305882Sandrew * same physical page have different memory types. 4388305882Sandrew * 4389305882Sandrew * Returns zero if the change completed successfully, and either EINVAL or 4390305882Sandrew * ENOMEM if the change failed. Specifically, EINVAL is returned if some part 4391305882Sandrew * of the virtual address range was not mapped, and ENOMEM is returned if 4392305882Sandrew * there was insufficient memory available to complete the change. In the 4393305882Sandrew * latter case, the memory type may have been changed on some part of the 4394305882Sandrew * virtual address range or the direct map. 4395305882Sandrew */ 4396305882Sandrewstatic int 4397305882Sandrewpmap_change_attr(vm_offset_t va, vm_size_t size, int mode) 4398305882Sandrew{ 4399305882Sandrew int error; 4400305882Sandrew 4401305882Sandrew PMAP_LOCK(kernel_pmap); 4402305882Sandrew error = pmap_change_attr_locked(va, size, mode); 4403305882Sandrew PMAP_UNLOCK(kernel_pmap); 4404305882Sandrew return (error); 4405305882Sandrew} 4406305882Sandrew 4407305882Sandrewstatic int 4408305882Sandrewpmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) 4409305882Sandrew{ 4410305882Sandrew vm_offset_t base, offset, tmpva; 4411305882Sandrew pt_entry_t l3, *pte, *newpte; 4412305882Sandrew int lvl; 4413305882Sandrew 4414305882Sandrew PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); 4415305882Sandrew base = trunc_page(va); 4416305882Sandrew offset = va & PAGE_MASK; 4417305882Sandrew size = round_page(offset + size); 4418305882Sandrew 4419305882Sandrew if (!VIRT_IN_DMAP(base)) 4420305882Sandrew return (EINVAL); 4421305882Sandrew 4422305882Sandrew for (tmpva = base; tmpva < base + size; ) { 4423305882Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 4424305882Sandrew if (pte == NULL) 4425305882Sandrew return (EINVAL); 4426305882Sandrew 4427305882Sandrew if ((pmap_load(pte) & ATTR_IDX_MASK) == ATTR_IDX(mode)) { 4428305882Sandrew /* 4429305882Sandrew * We already have the correct attribute, 4430305882Sandrew * ignore this entry. 4431305882Sandrew */ 4432305882Sandrew switch (lvl) { 4433305882Sandrew default: 4434305882Sandrew panic("Invalid DMAP table level: %d\n", lvl); 4435305882Sandrew case 1: 4436305882Sandrew tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE; 4437305882Sandrew break; 4438305882Sandrew case 2: 4439305882Sandrew tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE; 4440305882Sandrew break; 4441305882Sandrew case 3: 4442305882Sandrew tmpva += PAGE_SIZE; 4443305882Sandrew break; 4444305882Sandrew } 4445305882Sandrew } else { 4446305882Sandrew /* 4447305882Sandrew * Split the entry to an level 3 table, then 4448305882Sandrew * set the new attribute. 4449305882Sandrew */ 4450305882Sandrew switch (lvl) { 4451305882Sandrew default: 4452305882Sandrew panic("Invalid DMAP table level: %d\n", lvl); 4453305882Sandrew case 1: 4454305882Sandrew newpte = pmap_demote_l1(kernel_pmap, pte, 4455305882Sandrew tmpva & ~L1_OFFSET); 4456305882Sandrew if (newpte == NULL) 4457305882Sandrew return (EINVAL); 4458305882Sandrew pte = pmap_l1_to_l2(pte, tmpva); 4459305882Sandrew case 2: 4460305882Sandrew newpte = pmap_demote_l2(kernel_pmap, pte, 4461305882Sandrew tmpva & ~L2_OFFSET); 4462305882Sandrew if (newpte == NULL) 4463305882Sandrew return (EINVAL); 4464305882Sandrew pte = pmap_l2_to_l3(pte, tmpva); 4465305882Sandrew case 3: 4466305882Sandrew /* Update the entry */ 4467305882Sandrew l3 = pmap_load(pte); 4468305882Sandrew l3 &= ~ATTR_IDX_MASK; 4469305882Sandrew l3 |= ATTR_IDX(mode); 4470319203Sandrew if (mode == DEVICE_MEMORY) 4471319203Sandrew l3 |= ATTR_XN; 4472305882Sandrew 4473305882Sandrew pmap_update_entry(kernel_pmap, pte, l3, tmpva, 4474305882Sandrew PAGE_SIZE); 4475305882Sandrew 4476305882Sandrew /* 4477305882Sandrew * If moving to a non-cacheable entry flush 4478305882Sandrew * the cache. 4479305882Sandrew */ 4480305882Sandrew if (mode == VM_MEMATTR_UNCACHEABLE) 4481305882Sandrew cpu_dcache_wbinv_range(tmpva, L3_SIZE); 4482305882Sandrew 4483305882Sandrew break; 4484305882Sandrew } 4485305882Sandrew tmpva += PAGE_SIZE; 4486305882Sandrew } 4487305882Sandrew } 4488305882Sandrew 4489305882Sandrew return (0); 4490305882Sandrew} 4491305882Sandrew 4492305882Sandrew/* 4493305882Sandrew * Create an L2 table to map all addresses within an L1 mapping. 4494305882Sandrew */ 4495305882Sandrewstatic pt_entry_t * 4496305882Sandrewpmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) 4497305882Sandrew{ 4498305882Sandrew pt_entry_t *l2, newl2, oldl1; 4499305882Sandrew vm_offset_t tmpl1; 4500305882Sandrew vm_paddr_t l2phys, phys; 4501305882Sandrew vm_page_t ml2; 4502305882Sandrew int i; 4503305882Sandrew 4504305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4505305882Sandrew oldl1 = pmap_load(l1); 4506305882Sandrew KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK, 4507305882Sandrew ("pmap_demote_l1: Demoting a non-block entry")); 4508305882Sandrew KASSERT((va & L1_OFFSET) == 0, 4509305882Sandrew ("pmap_demote_l1: Invalid virtual address %#lx", va)); 4510305882Sandrew KASSERT((oldl1 & ATTR_SW_MANAGED) == 0, 4511305882Sandrew ("pmap_demote_l1: Level 1 table shouldn't be managed")); 4512305882Sandrew 4513305882Sandrew tmpl1 = 0; 4514305882Sandrew if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) { 4515305882Sandrew tmpl1 = kva_alloc(PAGE_SIZE); 4516305882Sandrew if (tmpl1 == 0) 4517305882Sandrew return (NULL); 4518305882Sandrew } 4519305882Sandrew 4520305882Sandrew if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | 4521305882Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 4522305882Sandrew CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx" 4523305882Sandrew " in pmap %p", va, pmap); 4524305882Sandrew return (NULL); 4525305882Sandrew } 4526305882Sandrew 4527305882Sandrew l2phys = VM_PAGE_TO_PHYS(ml2); 4528305882Sandrew l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys); 4529305882Sandrew 4530305882Sandrew /* Address the range points at */ 4531305882Sandrew phys = oldl1 & ~ATTR_MASK; 4532305882Sandrew /* The attributed from the old l1 table to be copied */ 4533305882Sandrew newl2 = oldl1 & ATTR_MASK; 4534305882Sandrew 4535305882Sandrew /* Create the new entries */ 4536305882Sandrew for (i = 0; i < Ln_ENTRIES; i++) { 4537305882Sandrew l2[i] = newl2 | phys; 4538305882Sandrew phys += L2_SIZE; 4539305882Sandrew } 4540305882Sandrew cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 4541305882Sandrew KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK), 4542305882Sandrew ("Invalid l2 page (%lx != %lx)", l2[0], 4543305882Sandrew (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK)); 4544305882Sandrew 4545305882Sandrew if (tmpl1 != 0) { 4546305882Sandrew pmap_kenter(tmpl1, PAGE_SIZE, 4547305882Sandrew DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, CACHED_MEMORY); 4548305882Sandrew l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK)); 4549305882Sandrew } 4550305882Sandrew 4551305882Sandrew pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE); 4552305882Sandrew 4553305882Sandrew if (tmpl1 != 0) { 4554305882Sandrew pmap_kremove(tmpl1); 4555305882Sandrew kva_free(tmpl1, PAGE_SIZE); 4556305882Sandrew } 4557305882Sandrew 4558305882Sandrew return (l2); 4559305882Sandrew} 4560305882Sandrew 4561305882Sandrew/* 4562305882Sandrew * Create an L3 table to map all addresses within an L2 mapping. 4563305882Sandrew */ 4564305882Sandrewstatic pt_entry_t * 4565305882Sandrewpmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, 4566305882Sandrew struct rwlock **lockp) 4567305882Sandrew{ 4568305882Sandrew pt_entry_t *l3, newl3, oldl2; 4569305882Sandrew vm_offset_t tmpl2; 4570305882Sandrew vm_paddr_t l3phys, phys; 4571305882Sandrew vm_page_t ml3; 4572305882Sandrew int i; 4573305882Sandrew 4574305882Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4575305882Sandrew l3 = NULL; 4576305882Sandrew oldl2 = pmap_load(l2); 4577305882Sandrew KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, 4578305882Sandrew ("pmap_demote_l2: Demoting a non-block entry")); 4579305882Sandrew KASSERT((va & L2_OFFSET) == 0, 4580305882Sandrew ("pmap_demote_l2: Invalid virtual address %#lx", va)); 4581305882Sandrew 4582305882Sandrew tmpl2 = 0; 4583305882Sandrew if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) { 4584305882Sandrew tmpl2 = kva_alloc(PAGE_SIZE); 4585305882Sandrew if (tmpl2 == 0) 4586305882Sandrew return (NULL); 4587305882Sandrew } 4588305882Sandrew 4589318716Smarkj if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) { 4590305882Sandrew ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va), 4591305882Sandrew (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | 4592305882Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); 4593305882Sandrew if (ml3 == NULL) { 4594305882Sandrew CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" 4595305882Sandrew " in pmap %p", va, pmap); 4596305882Sandrew goto fail; 4597305882Sandrew } 4598305882Sandrew if (va < VM_MAXUSER_ADDRESS) 4599305882Sandrew pmap_resident_count_inc(pmap, 1); 4600305882Sandrew } 4601305882Sandrew 4602305882Sandrew l3phys = VM_PAGE_TO_PHYS(ml3); 4603305882Sandrew l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys); 4604305882Sandrew 4605305882Sandrew /* Address the range points at */ 4606305882Sandrew phys = oldl2 & ~ATTR_MASK; 4607305882Sandrew /* The attributed from the old l2 table to be copied */ 4608305882Sandrew newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; 4609305882Sandrew 4610305882Sandrew /* 4611305882Sandrew * If the page table page is new, initialize it. 4612305882Sandrew */ 4613305882Sandrew if (ml3->wire_count == 1) { 4614336765Smarkj ml3->wire_count = NL3PG; 4615305882Sandrew for (i = 0; i < Ln_ENTRIES; i++) { 4616305882Sandrew l3[i] = newl3 | phys; 4617305882Sandrew phys += L3_SIZE; 4618305882Sandrew } 4619305882Sandrew cpu_dcache_wb_range((vm_offset_t)l3, PAGE_SIZE); 4620305882Sandrew } 4621305882Sandrew KASSERT(l3[0] == ((oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE), 4622305882Sandrew ("Invalid l3 page (%lx != %lx)", l3[0], 4623305882Sandrew (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE)); 4624305882Sandrew 4625305882Sandrew /* 4626305882Sandrew * Map the temporary page so we don't lose access to the l2 table. 4627305882Sandrew */ 4628305882Sandrew if (tmpl2 != 0) { 4629305882Sandrew pmap_kenter(tmpl2, PAGE_SIZE, 4630305882Sandrew DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY); 4631305882Sandrew l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK)); 4632305882Sandrew } 4633305882Sandrew 4634305882Sandrew /* 4635305882Sandrew * The spare PV entries must be reserved prior to demoting the 4636305882Sandrew * mapping, that is, prior to changing the PDE. Otherwise, the state 4637305882Sandrew * of the L2 and the PV lists will be inconsistent, which can result 4638305882Sandrew * in reclaim_pv_chunk() attempting to remove a PV entry from the 4639305882Sandrew * wrong PV list and pmap_pv_demote_l2() failing to find the expected 4640305882Sandrew * PV entry for the 2MB page mapping that is being demoted. 4641305882Sandrew */ 4642305882Sandrew if ((oldl2 & ATTR_SW_MANAGED) != 0) 4643305882Sandrew reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); 4644305882Sandrew 4645305882Sandrew pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE); 4646305882Sandrew 4647305882Sandrew /* 4648305882Sandrew * Demote the PV entry. 4649305882Sandrew */ 4650305882Sandrew if ((oldl2 & ATTR_SW_MANAGED) != 0) 4651305882Sandrew pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp); 4652305882Sandrew 4653305882Sandrew atomic_add_long(&pmap_l2_demotions, 1); 4654305882Sandrew CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx" 4655305882Sandrew " in pmap %p %lx", va, pmap, l3[0]); 4656305882Sandrew 4657305882Sandrewfail: 4658305882Sandrew if (tmpl2 != 0) { 4659305882Sandrew pmap_kremove(tmpl2); 4660305882Sandrew kva_free(tmpl2, PAGE_SIZE); 4661305882Sandrew } 4662305882Sandrew 4663305882Sandrew return (l3); 4664305882Sandrew 4665305882Sandrew} 4666305882Sandrew 4667305882Sandrewstatic pt_entry_t * 4668305882Sandrewpmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) 4669305882Sandrew{ 4670305882Sandrew struct rwlock *lock; 4671305882Sandrew pt_entry_t *l3; 4672305882Sandrew 4673305882Sandrew lock = NULL; 4674305882Sandrew l3 = pmap_demote_l2_locked(pmap, l2, va, &lock); 4675305882Sandrew if (lock != NULL) 4676305882Sandrew rw_wunlock(lock); 4677305882Sandrew return (l3); 4678305882Sandrew} 4679305882Sandrew 4680305882Sandrew/* 4681281494Sandrew * perform the pmap work for mincore 4682281494Sandrew */ 4683281494Sandrewint 4684281494Sandrewpmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 4685281494Sandrew{ 4686337466Smarkj pt_entry_t *pte, tpte; 4687337466Smarkj vm_paddr_t mask, pa; 4688337466Smarkj int lvl, val; 4689287570Sandrew bool managed; 4690281494Sandrew 4691287570Sandrew PMAP_LOCK(pmap); 4692287570Sandrewretry: 4693287570Sandrew val = 0; 4694337466Smarkj pte = pmap_pte(pmap, addr, &lvl); 4695337466Smarkj if (pte != NULL) { 4696337466Smarkj tpte = pmap_load(pte); 4697287570Sandrew 4698337466Smarkj switch (lvl) { 4699337466Smarkj case 3: 4700337466Smarkj mask = L3_OFFSET; 4701337466Smarkj break; 4702337466Smarkj case 2: 4703337466Smarkj mask = L2_OFFSET; 4704337466Smarkj break; 4705337466Smarkj case 1: 4706337466Smarkj mask = L1_OFFSET; 4707337466Smarkj break; 4708337466Smarkj default: 4709337466Smarkj panic("pmap_mincore: invalid level %d", lvl); 4710337466Smarkj } 4711295425Swma 4712337466Smarkj val = MINCORE_INCORE; 4713337466Smarkj if (lvl != 3) 4714337466Smarkj val |= MINCORE_SUPER; 4715337466Smarkj if (pmap_page_dirty(tpte)) 4716287570Sandrew val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4717337466Smarkj if ((tpte & ATTR_AF) == ATTR_AF) 4718287570Sandrew val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4719287570Sandrew 4720337466Smarkj managed = (tpte & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 4721337466Smarkj pa = (tpte & ~ATTR_MASK) | (addr & mask); 4722337466Smarkj } else 4723337466Smarkj managed = false; 4724295425Swma 4725287570Sandrew if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 4726287570Sandrew (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 4727287570Sandrew /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 4728287570Sandrew if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 4729287570Sandrew goto retry; 4730287570Sandrew } else 4731287570Sandrew PA_UNLOCK_COND(*locked_pa); 4732287570Sandrew PMAP_UNLOCK(pmap); 4733287570Sandrew 4734287570Sandrew return (val); 4735281494Sandrew} 4736281494Sandrew 4737281494Sandrewvoid 4738281494Sandrewpmap_activate(struct thread *td) 4739281494Sandrew{ 4740281494Sandrew pmap_t pmap; 4741281494Sandrew 4742281494Sandrew critical_enter(); 4743281494Sandrew pmap = vmspace_pmap(td->td_proc->p_vmspace); 4744297446Sandrew td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0); 4745297446Sandrew __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr)); 4746285212Sandrew pmap_invalidate_all(pmap); 4747281494Sandrew critical_exit(); 4748281494Sandrew} 4749281494Sandrew 4750281494Sandrewvoid 4751287105Sandrewpmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 4752281494Sandrew{ 4753281494Sandrew 4754287105Sandrew if (va >= VM_MIN_KERNEL_ADDRESS) { 4755287105Sandrew cpu_icache_sync_range(va, sz); 4756287105Sandrew } else { 4757287105Sandrew u_int len, offset; 4758287105Sandrew vm_paddr_t pa; 4759287105Sandrew 4760287105Sandrew /* Find the length of data in this page to flush */ 4761287105Sandrew offset = va & PAGE_MASK; 4762287105Sandrew len = imin(PAGE_SIZE - offset, sz); 4763287105Sandrew 4764287105Sandrew while (sz != 0) { 4765287105Sandrew /* Extract the physical address & find it in the DMAP */ 4766287105Sandrew pa = pmap_extract(pmap, va); 4767287105Sandrew if (pa != 0) 4768287105Sandrew cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); 4769287105Sandrew 4770287105Sandrew /* Move to the next page */ 4771287105Sandrew sz -= len; 4772287105Sandrew va += len; 4773287105Sandrew /* Set the length for the next iteration */ 4774287105Sandrew len = imin(PAGE_SIZE, sz); 4775287105Sandrew } 4776287105Sandrew } 4777281494Sandrew} 4778281494Sandrew 4779305882Sandrewint 4780305882Sandrewpmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) 4781305882Sandrew{ 4782305882Sandrew#ifdef SMP 4783305882Sandrew uint64_t par; 4784305882Sandrew#endif 4785305882Sandrew 4786305882Sandrew switch (ESR_ELx_EXCEPTION(esr)) { 4787305882Sandrew case EXCP_DATA_ABORT_L: 4788305882Sandrew case EXCP_DATA_ABORT: 4789305882Sandrew break; 4790305882Sandrew default: 4791305882Sandrew return (KERN_FAILURE); 4792305882Sandrew } 4793305882Sandrew 4794305882Sandrew#ifdef SMP 4795305882Sandrew PMAP_LOCK(pmap); 4796305882Sandrew switch (esr & ISS_DATA_DFSC_MASK) { 4797305882Sandrew case ISS_DATA_DFSC_TF_L0: 4798305882Sandrew case ISS_DATA_DFSC_TF_L1: 4799305882Sandrew case ISS_DATA_DFSC_TF_L2: 4800305882Sandrew case ISS_DATA_DFSC_TF_L3: 4801305882Sandrew /* Ask the MMU to check the address */ 4802305882Sandrew if (pmap == kernel_pmap) 4803305882Sandrew par = arm64_address_translate_s1e1r(far); 4804305882Sandrew else 4805305882Sandrew par = arm64_address_translate_s1e0r(far); 4806305882Sandrew 4807305882Sandrew /* 4808305882Sandrew * If the translation was successful the address was invalid 4809305882Sandrew * due to a break-before-make sequence. We can unlock and 4810305882Sandrew * return success to the trap handler. 4811305882Sandrew */ 4812305882Sandrew if (PAR_SUCCESS(par)) { 4813305882Sandrew PMAP_UNLOCK(pmap); 4814305882Sandrew return (KERN_SUCCESS); 4815305882Sandrew } 4816305882Sandrew break; 4817305882Sandrew default: 4818305882Sandrew break; 4819305882Sandrew } 4820305882Sandrew PMAP_UNLOCK(pmap); 4821305882Sandrew#endif 4822305882Sandrew 4823305882Sandrew return (KERN_FAILURE); 4824305882Sandrew} 4825305882Sandrew 4826281494Sandrew/* 4827281494Sandrew * Increase the starting virtual address of the given mapping if a 4828281494Sandrew * different alignment might result in more superpage mappings. 4829281494Sandrew */ 4830281494Sandrewvoid 4831281494Sandrewpmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 4832281494Sandrew vm_offset_t *addr, vm_size_t size) 4833281494Sandrew{ 4834305880Sandrew vm_offset_t superpage_offset; 4835305880Sandrew 4836305880Sandrew if (size < L2_SIZE) 4837305880Sandrew return; 4838305880Sandrew if (object != NULL && (object->flags & OBJ_COLORED) != 0) 4839305880Sandrew offset += ptoa(object->pg_color); 4840305880Sandrew superpage_offset = offset & L2_OFFSET; 4841305880Sandrew if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || 4842305880Sandrew (*addr & L2_OFFSET) == superpage_offset) 4843305880Sandrew return; 4844305880Sandrew if ((*addr & L2_OFFSET) < superpage_offset) 4845305880Sandrew *addr = (*addr & ~L2_OFFSET) + superpage_offset; 4846305880Sandrew else 4847305880Sandrew *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; 4848281494Sandrew} 4849281494Sandrew 4850281494Sandrew/** 4851281494Sandrew * Get the kernel virtual address of a set of physical pages. If there are 4852281494Sandrew * physical addresses not covered by the DMAP perform a transient mapping 4853281494Sandrew * that will be removed when calling pmap_unmap_io_transient. 4854281494Sandrew * 4855281494Sandrew * \param page The pages the caller wishes to obtain the virtual 4856281494Sandrew * address on the kernel memory map. 4857281494Sandrew * \param vaddr On return contains the kernel virtual memory address 4858281494Sandrew * of the pages passed in the page parameter. 4859281494Sandrew * \param count Number of pages passed in. 4860281494Sandrew * \param can_fault TRUE if the thread using the mapped pages can take 4861281494Sandrew * page faults, FALSE otherwise. 4862281494Sandrew * 4863281494Sandrew * \returns TRUE if the caller must call pmap_unmap_io_transient when 4864281494Sandrew * finished or FALSE otherwise. 4865281494Sandrew * 4866281494Sandrew */ 4867281494Sandrewboolean_t 4868281494Sandrewpmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 4869281494Sandrew boolean_t can_fault) 4870281494Sandrew{ 4871281494Sandrew vm_paddr_t paddr; 4872281494Sandrew boolean_t needs_mapping; 4873281494Sandrew int error, i; 4874281494Sandrew 4875281494Sandrew /* 4876281494Sandrew * Allocate any KVA space that we need, this is done in a separate 4877281494Sandrew * loop to prevent calling vmem_alloc while pinned. 4878281494Sandrew */ 4879281494Sandrew needs_mapping = FALSE; 4880281494Sandrew for (i = 0; i < count; i++) { 4881281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 4882297617Sandrew if (__predict_false(!PHYS_IN_DMAP(paddr))) { 4883281494Sandrew error = vmem_alloc(kernel_arena, PAGE_SIZE, 4884281494Sandrew M_BESTFIT | M_WAITOK, &vaddr[i]); 4885281494Sandrew KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 4886281494Sandrew needs_mapping = TRUE; 4887281494Sandrew } else { 4888281494Sandrew vaddr[i] = PHYS_TO_DMAP(paddr); 4889281494Sandrew } 4890281494Sandrew } 4891281494Sandrew 4892281494Sandrew /* Exit early if everything is covered by the DMAP */ 4893281494Sandrew if (!needs_mapping) 4894281494Sandrew return (FALSE); 4895281494Sandrew 4896281494Sandrew if (!can_fault) 4897281494Sandrew sched_pin(); 4898281494Sandrew for (i = 0; i < count; i++) { 4899281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 4900297617Sandrew if (!PHYS_IN_DMAP(paddr)) { 4901281494Sandrew panic( 4902281494Sandrew "pmap_map_io_transient: TODO: Map out of DMAP data"); 4903281494Sandrew } 4904281494Sandrew } 4905281494Sandrew 4906281494Sandrew return (needs_mapping); 4907281494Sandrew} 4908281494Sandrew 4909281494Sandrewvoid 4910281494Sandrewpmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 4911281494Sandrew boolean_t can_fault) 4912281494Sandrew{ 4913281494Sandrew vm_paddr_t paddr; 4914281494Sandrew int i; 4915281494Sandrew 4916281494Sandrew if (!can_fault) 4917281494Sandrew sched_unpin(); 4918281494Sandrew for (i = 0; i < count; i++) { 4919281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 4920297617Sandrew if (!PHYS_IN_DMAP(paddr)) { 4921286073Semaste panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); 4922281494Sandrew } 4923281494Sandrew } 4924281494Sandrew} 4925