pmap.c revision 305879
1281494Sandrew/*- 2281494Sandrew * Copyright (c) 1991 Regents of the University of California. 3281494Sandrew * All rights reserved. 4281494Sandrew * Copyright (c) 1994 John S. Dyson 5281494Sandrew * All rights reserved. 6281494Sandrew * Copyright (c) 1994 David Greenman 7281494Sandrew * All rights reserved. 8281494Sandrew * Copyright (c) 2003 Peter Wemm 9281494Sandrew * All rights reserved. 10281494Sandrew * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11281494Sandrew * All rights reserved. 12281494Sandrew * Copyright (c) 2014 Andrew Turner 13281494Sandrew * All rights reserved. 14297446Sandrew * Copyright (c) 2014-2016 The FreeBSD Foundation 15281494Sandrew * All rights reserved. 16281494Sandrew * 17281494Sandrew * This code is derived from software contributed to Berkeley by 18281494Sandrew * the Systems Programming Group of the University of Utah Computer 19281494Sandrew * Science Department and William Jolitz of UUNET Technologies Inc. 20281494Sandrew * 21281494Sandrew * This software was developed by Andrew Turner under sponsorship from 22281494Sandrew * the FreeBSD Foundation. 23281494Sandrew * 24281494Sandrew * Redistribution and use in source and binary forms, with or without 25281494Sandrew * modification, are permitted provided that the following conditions 26281494Sandrew * are met: 27281494Sandrew * 1. Redistributions of source code must retain the above copyright 28281494Sandrew * notice, this list of conditions and the following disclaimer. 29281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright 30281494Sandrew * notice, this list of conditions and the following disclaimer in the 31281494Sandrew * documentation and/or other materials provided with the distribution. 32281494Sandrew * 3. All advertising materials mentioning features or use of this software 33281494Sandrew * must display the following acknowledgement: 34281494Sandrew * This product includes software developed by the University of 35281494Sandrew * California, Berkeley and its contributors. 36281494Sandrew * 4. Neither the name of the University nor the names of its contributors 37281494Sandrew * may be used to endorse or promote products derived from this software 38281494Sandrew * without specific prior written permission. 39281494Sandrew * 40281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43281494Sandrew * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50281494Sandrew * SUCH DAMAGE. 51281494Sandrew * 52281494Sandrew * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53281494Sandrew */ 54281494Sandrew/*- 55281494Sandrew * Copyright (c) 2003 Networks Associates Technology, Inc. 56281494Sandrew * All rights reserved. 57281494Sandrew * 58281494Sandrew * This software was developed for the FreeBSD Project by Jake Burkholder, 59281494Sandrew * Safeport Network Services, and Network Associates Laboratories, the 60281494Sandrew * Security Research Division of Network Associates, Inc. under 61281494Sandrew * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62281494Sandrew * CHATS research program. 63281494Sandrew * 64281494Sandrew * Redistribution and use in source and binary forms, with or without 65281494Sandrew * modification, are permitted provided that the following conditions 66281494Sandrew * are met: 67281494Sandrew * 1. Redistributions of source code must retain the above copyright 68281494Sandrew * notice, this list of conditions and the following disclaimer. 69281494Sandrew * 2. Redistributions in binary form must reproduce the above copyright 70281494Sandrew * notice, this list of conditions and the following disclaimer in the 71281494Sandrew * documentation and/or other materials provided with the distribution. 72281494Sandrew * 73281494Sandrew * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74281494Sandrew * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75281494Sandrew * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76281494Sandrew * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77281494Sandrew * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78281494Sandrew * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79281494Sandrew * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80281494Sandrew * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81281494Sandrew * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82281494Sandrew * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83281494Sandrew * SUCH DAMAGE. 84281494Sandrew */ 85281494Sandrew 86281494Sandrew#include <sys/cdefs.h> 87281494Sandrew__FBSDID("$FreeBSD: stable/11/sys/arm64/arm64/pmap.c 305879 2016-09-16 12:17:01Z andrew $"); 88281494Sandrew 89281494Sandrew/* 90281494Sandrew * Manages physical address maps. 91281494Sandrew * 92281494Sandrew * Since the information managed by this module is 93281494Sandrew * also stored by the logical address mapping module, 94281494Sandrew * this module may throw away valid virtual-to-physical 95281494Sandrew * mappings at almost any time. However, invalidations 96281494Sandrew * of virtual-to-physical mappings must be done as 97281494Sandrew * requested. 98281494Sandrew * 99281494Sandrew * In order to cope with hardware architectures which 100281494Sandrew * make virtual-to-physical map invalidates expensive, 101281494Sandrew * this module may delay invalidate or reduced protection 102281494Sandrew * operations until such time as they are actually 103281494Sandrew * necessary. This module is given full information as 104281494Sandrew * to which processors are currently using which maps, 105281494Sandrew * and to when physical maps must be made correct. 106281494Sandrew */ 107281494Sandrew 108281494Sandrew#include <sys/param.h> 109281494Sandrew#include <sys/bus.h> 110281494Sandrew#include <sys/systm.h> 111281494Sandrew#include <sys/kernel.h> 112281494Sandrew#include <sys/ktr.h> 113281494Sandrew#include <sys/lock.h> 114281494Sandrew#include <sys/malloc.h> 115281494Sandrew#include <sys/mman.h> 116281494Sandrew#include <sys/msgbuf.h> 117281494Sandrew#include <sys/mutex.h> 118281494Sandrew#include <sys/proc.h> 119281494Sandrew#include <sys/rwlock.h> 120281494Sandrew#include <sys/sx.h> 121281494Sandrew#include <sys/vmem.h> 122281494Sandrew#include <sys/vmmeter.h> 123281494Sandrew#include <sys/sched.h> 124281494Sandrew#include <sys/sysctl.h> 125281494Sandrew#include <sys/_unrhdr.h> 126281494Sandrew#include <sys/smp.h> 127281494Sandrew 128281494Sandrew#include <vm/vm.h> 129281494Sandrew#include <vm/vm_param.h> 130281494Sandrew#include <vm/vm_kern.h> 131281494Sandrew#include <vm/vm_page.h> 132281494Sandrew#include <vm/vm_map.h> 133281494Sandrew#include <vm/vm_object.h> 134281494Sandrew#include <vm/vm_extern.h> 135281494Sandrew#include <vm/vm_pageout.h> 136281494Sandrew#include <vm/vm_pager.h> 137281494Sandrew#include <vm/vm_radix.h> 138281494Sandrew#include <vm/vm_reserv.h> 139281494Sandrew#include <vm/uma.h> 140281494Sandrew 141281494Sandrew#include <machine/machdep.h> 142281494Sandrew#include <machine/md_var.h> 143281494Sandrew#include <machine/pcb.h> 144281494Sandrew 145297446Sandrew#define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) 146297446Sandrew#define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) 147297446Sandrew#define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) 148297446Sandrew#define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) 149281494Sandrew 150297446Sandrew#define NUL0E L0_ENTRIES 151297446Sandrew#define NUL1E (NUL0E * NL1PG) 152297446Sandrew#define NUL2E (NUL1E * NL2PG) 153297446Sandrew 154281494Sandrew#if !defined(DIAGNOSTIC) 155281494Sandrew#ifdef __GNUC_GNU_INLINE__ 156281494Sandrew#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 157281494Sandrew#else 158281494Sandrew#define PMAP_INLINE extern inline 159281494Sandrew#endif 160281494Sandrew#else 161281494Sandrew#define PMAP_INLINE 162281494Sandrew#endif 163281494Sandrew 164281494Sandrew/* 165281494Sandrew * These are configured by the mair_el1 register. This is set up in locore.S 166281494Sandrew */ 167281494Sandrew#define DEVICE_MEMORY 0 168281494Sandrew#define UNCACHED_MEMORY 1 169281494Sandrew#define CACHED_MEMORY 2 170281494Sandrew 171281494Sandrew 172281494Sandrew#ifdef PV_STATS 173281494Sandrew#define PV_STAT(x) do { x ; } while (0) 174281494Sandrew#else 175281494Sandrew#define PV_STAT(x) do { } while (0) 176281494Sandrew#endif 177281494Sandrew 178281494Sandrew#define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 179281494Sandrew 180281494Sandrew#define NPV_LIST_LOCKS MAXCPU 181281494Sandrew 182281494Sandrew#define PHYS_TO_PV_LIST_LOCK(pa) \ 183281494Sandrew (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 184281494Sandrew 185281494Sandrew#define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 186281494Sandrew struct rwlock **_lockp = (lockp); \ 187281494Sandrew struct rwlock *_new_lock; \ 188281494Sandrew \ 189281494Sandrew _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 190281494Sandrew if (_new_lock != *_lockp) { \ 191281494Sandrew if (*_lockp != NULL) \ 192281494Sandrew rw_wunlock(*_lockp); \ 193281494Sandrew *_lockp = _new_lock; \ 194281494Sandrew rw_wlock(*_lockp); \ 195281494Sandrew } \ 196281494Sandrew} while (0) 197281494Sandrew 198281494Sandrew#define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 199281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 200281494Sandrew 201281494Sandrew#define RELEASE_PV_LIST_LOCK(lockp) do { \ 202281494Sandrew struct rwlock **_lockp = (lockp); \ 203281494Sandrew \ 204281494Sandrew if (*_lockp != NULL) { \ 205281494Sandrew rw_wunlock(*_lockp); \ 206281494Sandrew *_lockp = NULL; \ 207281494Sandrew } \ 208281494Sandrew} while (0) 209281494Sandrew 210281494Sandrew#define VM_PAGE_TO_PV_LIST_LOCK(m) \ 211281494Sandrew PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 212281494Sandrew 213281494Sandrewstruct pmap kernel_pmap_store; 214281494Sandrew 215281494Sandrewvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 216281494Sandrewvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 217281494Sandrewvm_offset_t kernel_vm_end = 0; 218281494Sandrew 219281494Sandrewstruct msgbuf *msgbufp = NULL; 220281494Sandrew 221291246Sandrewvm_paddr_t dmap_phys_base; /* The start of the dmap region */ 222297958Sandrewvm_paddr_t dmap_phys_max; /* The limit of the dmap region */ 223297958Sandrewvm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ 224291246Sandrew 225297914Sandrew/* This code assumes all L1 DMAP entries will be used */ 226297914SandrewCTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS); 227297914SandrewCTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); 228297914Sandrew 229297914Sandrew#define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) 230297914Sandrewextern pt_entry_t pagetable_dmap[]; 231297914Sandrew 232281494Sandrew/* 233281494Sandrew * Data for the pv entry allocation mechanism 234281494Sandrew */ 235281494Sandrewstatic TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 236281494Sandrewstatic struct mtx pv_chunks_mutex; 237281494Sandrewstatic struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 238281494Sandrew 239281494Sandrewstatic void free_pv_chunk(struct pv_chunk *pc); 240281494Sandrewstatic void free_pv_entry(pmap_t pmap, pv_entry_t pv); 241281494Sandrewstatic pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 242281494Sandrewstatic vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 243281494Sandrewstatic void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 244281494Sandrewstatic pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 245281494Sandrew vm_offset_t va); 246281494Sandrewstatic vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 247281494Sandrew vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 248281494Sandrewstatic int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 249281494Sandrew pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 250281494Sandrewstatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 251281494Sandrew vm_page_t m, struct rwlock **lockp); 252281494Sandrew 253281494Sandrewstatic vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 254281494Sandrew struct rwlock **lockp); 255281494Sandrew 256281494Sandrewstatic void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 257281494Sandrew struct spglist *free); 258281494Sandrewstatic int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 259281494Sandrew 260288445Sandrew/* 261288445Sandrew * These load the old table data and store the new value. 262288445Sandrew * They need to be atomic as the System MMU may write to the table at 263288445Sandrew * the same time as the CPU. 264288445Sandrew */ 265288445Sandrew#define pmap_load_store(table, entry) atomic_swap_64(table, entry) 266288445Sandrew#define pmap_set(table, mask) atomic_set_64(table, mask) 267288445Sandrew#define pmap_load_clear(table) atomic_swap_64(table, 0) 268288445Sandrew#define pmap_load(table) (*table) 269288445Sandrew 270281494Sandrew/********************/ 271281494Sandrew/* Inline functions */ 272281494Sandrew/********************/ 273281494Sandrew 274281494Sandrewstatic __inline void 275281494Sandrewpagecopy(void *s, void *d) 276281494Sandrew{ 277281494Sandrew 278281494Sandrew memcpy(d, s, PAGE_SIZE); 279281494Sandrew} 280281494Sandrew 281297446Sandrew#define pmap_l0_index(va) (((va) >> L0_SHIFT) & L0_ADDR_MASK) 282281494Sandrew#define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 283281494Sandrew#define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 284281494Sandrew#define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 285281494Sandrew 286281494Sandrewstatic __inline pd_entry_t * 287297446Sandrewpmap_l0(pmap_t pmap, vm_offset_t va) 288297446Sandrew{ 289297446Sandrew 290297446Sandrew return (&pmap->pm_l0[pmap_l0_index(va)]); 291297446Sandrew} 292297446Sandrew 293297446Sandrewstatic __inline pd_entry_t * 294297446Sandrewpmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) 295297446Sandrew{ 296297446Sandrew pd_entry_t *l1; 297297446Sandrew 298297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 299297446Sandrew return (&l1[pmap_l1_index(va)]); 300297446Sandrew} 301297446Sandrew 302297446Sandrewstatic __inline pd_entry_t * 303281494Sandrewpmap_l1(pmap_t pmap, vm_offset_t va) 304281494Sandrew{ 305297446Sandrew pd_entry_t *l0; 306281494Sandrew 307297446Sandrew l0 = pmap_l0(pmap, va); 308297446Sandrew if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) 309297446Sandrew return (NULL); 310297446Sandrew 311297446Sandrew return (pmap_l0_to_l1(l0, va)); 312281494Sandrew} 313281494Sandrew 314281494Sandrewstatic __inline pd_entry_t * 315281494Sandrewpmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 316281494Sandrew{ 317281494Sandrew pd_entry_t *l2; 318281494Sandrew 319288445Sandrew l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 320281494Sandrew return (&l2[pmap_l2_index(va)]); 321281494Sandrew} 322281494Sandrew 323281494Sandrewstatic __inline pd_entry_t * 324281494Sandrewpmap_l2(pmap_t pmap, vm_offset_t va) 325281494Sandrew{ 326281494Sandrew pd_entry_t *l1; 327281494Sandrew 328281494Sandrew l1 = pmap_l1(pmap, va); 329288445Sandrew if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) 330281494Sandrew return (NULL); 331281494Sandrew 332281494Sandrew return (pmap_l1_to_l2(l1, va)); 333281494Sandrew} 334281494Sandrew 335281494Sandrewstatic __inline pt_entry_t * 336281494Sandrewpmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 337281494Sandrew{ 338281494Sandrew pt_entry_t *l3; 339281494Sandrew 340288445Sandrew l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); 341281494Sandrew return (&l3[pmap_l3_index(va)]); 342281494Sandrew} 343281494Sandrew 344297446Sandrew/* 345297446Sandrew * Returns the lowest valid pde for a given virtual address. 346297446Sandrew * The next level may or may not point to a valid page or block. 347297446Sandrew */ 348297446Sandrewstatic __inline pd_entry_t * 349297446Sandrewpmap_pde(pmap_t pmap, vm_offset_t va, int *level) 350297446Sandrew{ 351297446Sandrew pd_entry_t *l0, *l1, *l2, desc; 352297446Sandrew 353297446Sandrew l0 = pmap_l0(pmap, va); 354297446Sandrew desc = pmap_load(l0) & ATTR_DESCR_MASK; 355297446Sandrew if (desc != L0_TABLE) { 356297446Sandrew *level = -1; 357297446Sandrew return (NULL); 358297446Sandrew } 359297446Sandrew 360297446Sandrew l1 = pmap_l0_to_l1(l0, va); 361297446Sandrew desc = pmap_load(l1) & ATTR_DESCR_MASK; 362297446Sandrew if (desc != L1_TABLE) { 363297446Sandrew *level = 0; 364297446Sandrew return (l0); 365297446Sandrew } 366297446Sandrew 367297446Sandrew l2 = pmap_l1_to_l2(l1, va); 368297446Sandrew desc = pmap_load(l2) & ATTR_DESCR_MASK; 369297446Sandrew if (desc != L2_TABLE) { 370297446Sandrew *level = 1; 371297446Sandrew return (l1); 372297446Sandrew } 373297446Sandrew 374297446Sandrew *level = 2; 375297446Sandrew return (l2); 376297446Sandrew} 377297446Sandrew 378297446Sandrew/* 379297446Sandrew * Returns the lowest valid pte block or table entry for a given virtual 380297446Sandrew * address. If there are no valid entries return NULL and set the level to 381297446Sandrew * the first invalid level. 382297446Sandrew */ 383281494Sandrewstatic __inline pt_entry_t * 384297446Sandrewpmap_pte(pmap_t pmap, vm_offset_t va, int *level) 385281494Sandrew{ 386297446Sandrew pd_entry_t *l1, *l2, desc; 387297446Sandrew pt_entry_t *l3; 388281494Sandrew 389297446Sandrew l1 = pmap_l1(pmap, va); 390297446Sandrew if (l1 == NULL) { 391297446Sandrew *level = 0; 392281494Sandrew return (NULL); 393297446Sandrew } 394297446Sandrew desc = pmap_load(l1) & ATTR_DESCR_MASK; 395297446Sandrew if (desc == L1_BLOCK) { 396297446Sandrew *level = 1; 397297446Sandrew return (l1); 398297446Sandrew } 399281494Sandrew 400297446Sandrew if (desc != L1_TABLE) { 401297446Sandrew *level = 1; 402297446Sandrew return (NULL); 403297446Sandrew } 404297446Sandrew 405297446Sandrew l2 = pmap_l1_to_l2(l1, va); 406297446Sandrew desc = pmap_load(l2) & ATTR_DESCR_MASK; 407297446Sandrew if (desc == L2_BLOCK) { 408297446Sandrew *level = 2; 409297446Sandrew return (l2); 410297446Sandrew } 411297446Sandrew 412297446Sandrew if (desc != L2_TABLE) { 413297446Sandrew *level = 2; 414297446Sandrew return (NULL); 415297446Sandrew } 416297446Sandrew 417297446Sandrew *level = 3; 418297446Sandrew l3 = pmap_l2_to_l3(l2, va); 419297446Sandrew if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) 420297446Sandrew return (NULL); 421297446Sandrew 422297446Sandrew return (l3); 423281494Sandrew} 424281494Sandrew 425286956Sandrewbool 426297446Sandrewpmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, 427297446Sandrew pd_entry_t **l2, pt_entry_t **l3) 428286956Sandrew{ 429297446Sandrew pd_entry_t *l0p, *l1p, *l2p; 430286956Sandrew 431297446Sandrew if (pmap->pm_l0 == NULL) 432286956Sandrew return (false); 433286956Sandrew 434297446Sandrew l0p = pmap_l0(pmap, va); 435297446Sandrew *l0 = l0p; 436297446Sandrew 437297446Sandrew if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) 438297446Sandrew return (false); 439297446Sandrew 440297446Sandrew l1p = pmap_l0_to_l1(l0p, va); 441286956Sandrew *l1 = l1p; 442286956Sandrew 443288445Sandrew if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { 444286956Sandrew *l2 = NULL; 445286956Sandrew *l3 = NULL; 446286956Sandrew return (true); 447286956Sandrew } 448286956Sandrew 449288445Sandrew if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) 450286956Sandrew return (false); 451286956Sandrew 452286956Sandrew l2p = pmap_l1_to_l2(l1p, va); 453286956Sandrew *l2 = l2p; 454286956Sandrew 455288445Sandrew if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { 456286956Sandrew *l3 = NULL; 457286956Sandrew return (true); 458286956Sandrew } 459286956Sandrew 460286956Sandrew *l3 = pmap_l2_to_l3(l2p, va); 461286956Sandrew 462286956Sandrew return (true); 463286956Sandrew} 464286956Sandrew 465281494Sandrewstatic __inline int 466281494Sandrewpmap_is_current(pmap_t pmap) 467281494Sandrew{ 468281494Sandrew 469281494Sandrew return ((pmap == pmap_kernel()) || 470281494Sandrew (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 471281494Sandrew} 472281494Sandrew 473281494Sandrewstatic __inline int 474281494Sandrewpmap_l3_valid(pt_entry_t l3) 475281494Sandrew{ 476281494Sandrew 477281494Sandrew return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 478281494Sandrew} 479281494Sandrew 480281494Sandrewstatic __inline int 481281494Sandrewpmap_l3_valid_cacheable(pt_entry_t l3) 482281494Sandrew{ 483281494Sandrew 484281494Sandrew return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && 485281494Sandrew ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 486281494Sandrew} 487281494Sandrew 488281494Sandrew#define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 489281494Sandrew 490281494Sandrew/* 491281494Sandrew * Checks if the page is dirty. We currently lack proper tracking of this on 492281494Sandrew * arm64 so for now assume is a page mapped as rw was accessed it is. 493281494Sandrew */ 494281494Sandrewstatic inline int 495281494Sandrewpmap_page_dirty(pt_entry_t pte) 496281494Sandrew{ 497281494Sandrew 498281494Sandrew return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 499281494Sandrew (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 500281494Sandrew} 501281494Sandrew 502281494Sandrewstatic __inline void 503281494Sandrewpmap_resident_count_inc(pmap_t pmap, int count) 504281494Sandrew{ 505281494Sandrew 506281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 507281494Sandrew pmap->pm_stats.resident_count += count; 508281494Sandrew} 509281494Sandrew 510281494Sandrewstatic __inline void 511281494Sandrewpmap_resident_count_dec(pmap_t pmap, int count) 512281494Sandrew{ 513281494Sandrew 514281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 515281494Sandrew KASSERT(pmap->pm_stats.resident_count >= count, 516281494Sandrew ("pmap %p resident count underflow %ld %d", pmap, 517281494Sandrew pmap->pm_stats.resident_count, count)); 518281494Sandrew pmap->pm_stats.resident_count -= count; 519281494Sandrew} 520281494Sandrew 521281494Sandrewstatic pt_entry_t * 522281494Sandrewpmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 523281494Sandrew u_int *l2_slot) 524281494Sandrew{ 525281494Sandrew pt_entry_t *l2; 526281494Sandrew pd_entry_t *l1; 527281494Sandrew 528281494Sandrew l1 = (pd_entry_t *)l1pt; 529281494Sandrew *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 530281494Sandrew 531281494Sandrew /* Check locore has used a table L1 map */ 532281494Sandrew KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 533281494Sandrew ("Invalid bootstrap L1 table")); 534281494Sandrew /* Find the address of the L2 table */ 535281494Sandrew l2 = (pt_entry_t *)init_pt_va; 536281494Sandrew *l2_slot = pmap_l2_index(va); 537281494Sandrew 538281494Sandrew return (l2); 539281494Sandrew} 540281494Sandrew 541281494Sandrewstatic vm_paddr_t 542281494Sandrewpmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 543281494Sandrew{ 544281494Sandrew u_int l1_slot, l2_slot; 545281494Sandrew pt_entry_t *l2; 546281494Sandrew 547281494Sandrew l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 548281494Sandrew 549281494Sandrew return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 550281494Sandrew} 551281494Sandrew 552281494Sandrewstatic void 553297958Sandrewpmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, vm_paddr_t max_pa) 554281494Sandrew{ 555281494Sandrew vm_offset_t va; 556281494Sandrew vm_paddr_t pa; 557281494Sandrew u_int l1_slot; 558281494Sandrew 559297958Sandrew pa = dmap_phys_base = min_pa & ~L1_OFFSET; 560281494Sandrew va = DMAP_MIN_ADDRESS; 561297958Sandrew for (; va < DMAP_MAX_ADDRESS && pa < max_pa; 562281494Sandrew pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 563297914Sandrew l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 564281494Sandrew 565297914Sandrew pmap_load_store(&pagetable_dmap[l1_slot], 566285537Sandrew (pa & ~L1_OFFSET) | ATTR_DEFAULT | 567285537Sandrew ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); 568281494Sandrew } 569281494Sandrew 570297958Sandrew /* Set the upper limit of the DMAP region */ 571297958Sandrew dmap_phys_max = pa; 572297958Sandrew dmap_max_addr = va; 573297958Sandrew 574297914Sandrew cpu_dcache_wb_range((vm_offset_t)pagetable_dmap, 575297914Sandrew PAGE_SIZE * DMAP_TABLES); 576281494Sandrew cpu_tlb_flushID(); 577281494Sandrew} 578281494Sandrew 579281494Sandrewstatic vm_offset_t 580281494Sandrewpmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 581281494Sandrew{ 582281494Sandrew vm_offset_t l2pt; 583281494Sandrew vm_paddr_t pa; 584281494Sandrew pd_entry_t *l1; 585281494Sandrew u_int l1_slot; 586281494Sandrew 587281494Sandrew KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 588281494Sandrew 589281494Sandrew l1 = (pd_entry_t *)l1pt; 590281494Sandrew l1_slot = pmap_l1_index(va); 591281494Sandrew l2pt = l2_start; 592281494Sandrew 593281494Sandrew for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 594281494Sandrew KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 595281494Sandrew 596281494Sandrew pa = pmap_early_vtophys(l1pt, l2pt); 597281494Sandrew pmap_load_store(&l1[l1_slot], 598281494Sandrew (pa & ~Ln_TABLE_MASK) | L1_TABLE); 599281494Sandrew l2pt += PAGE_SIZE; 600281494Sandrew } 601281494Sandrew 602281494Sandrew /* Clean the L2 page table */ 603281494Sandrew memset((void *)l2_start, 0, l2pt - l2_start); 604281494Sandrew cpu_dcache_wb_range(l2_start, l2pt - l2_start); 605281494Sandrew 606281494Sandrew /* Flush the l1 table to ram */ 607281494Sandrew cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 608281494Sandrew 609281494Sandrew return l2pt; 610281494Sandrew} 611281494Sandrew 612281494Sandrewstatic vm_offset_t 613281494Sandrewpmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 614281494Sandrew{ 615281494Sandrew vm_offset_t l2pt, l3pt; 616281494Sandrew vm_paddr_t pa; 617281494Sandrew pd_entry_t *l2; 618281494Sandrew u_int l2_slot; 619281494Sandrew 620281494Sandrew KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 621281494Sandrew 622281494Sandrew l2 = pmap_l2(kernel_pmap, va); 623298433Spfg l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE); 624281494Sandrew l2pt = (vm_offset_t)l2; 625281494Sandrew l2_slot = pmap_l2_index(va); 626281494Sandrew l3pt = l3_start; 627281494Sandrew 628281494Sandrew for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 629281494Sandrew KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 630281494Sandrew 631281494Sandrew pa = pmap_early_vtophys(l1pt, l3pt); 632281494Sandrew pmap_load_store(&l2[l2_slot], 633281494Sandrew (pa & ~Ln_TABLE_MASK) | L2_TABLE); 634281494Sandrew l3pt += PAGE_SIZE; 635281494Sandrew } 636281494Sandrew 637281494Sandrew /* Clean the L2 page table */ 638281494Sandrew memset((void *)l3_start, 0, l3pt - l3_start); 639281494Sandrew cpu_dcache_wb_range(l3_start, l3pt - l3_start); 640281494Sandrew 641281494Sandrew cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 642281494Sandrew 643281494Sandrew return l3pt; 644281494Sandrew} 645281494Sandrew 646281494Sandrew/* 647281494Sandrew * Bootstrap the system enough to run with virtual memory. 648281494Sandrew */ 649281494Sandrewvoid 650297446Sandrewpmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, 651297446Sandrew vm_size_t kernlen) 652281494Sandrew{ 653281494Sandrew u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 654281494Sandrew uint64_t kern_delta; 655281494Sandrew pt_entry_t *l2; 656281494Sandrew vm_offset_t va, freemempos; 657281494Sandrew vm_offset_t dpcpu, msgbufpv; 658297958Sandrew vm_paddr_t pa, max_pa, min_pa; 659291246Sandrew int i; 660281494Sandrew 661281494Sandrew kern_delta = KERNBASE - kernstart; 662281494Sandrew physmem = 0; 663281494Sandrew 664281494Sandrew printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 665281494Sandrew printf("%lx\n", l1pt); 666281494Sandrew printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 667281494Sandrew 668281494Sandrew /* Set this early so we can use the pagetable walking functions */ 669297446Sandrew kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; 670281494Sandrew PMAP_LOCK_INIT(kernel_pmap); 671281494Sandrew 672291246Sandrew /* Assume the address we were loaded to is a valid physical address */ 673297958Sandrew min_pa = max_pa = KERNBASE - kern_delta; 674291246Sandrew 675291246Sandrew /* 676291246Sandrew * Find the minimum physical address. physmap is sorted, 677291246Sandrew * but may contain empty ranges. 678291246Sandrew */ 679291246Sandrew for (i = 0; i < (physmap_idx * 2); i += 2) { 680291246Sandrew if (physmap[i] == physmap[i + 1]) 681291246Sandrew continue; 682291246Sandrew if (physmap[i] <= min_pa) 683291246Sandrew min_pa = physmap[i]; 684297958Sandrew if (physmap[i + 1] > max_pa) 685297958Sandrew max_pa = physmap[i + 1]; 686291246Sandrew } 687291246Sandrew 688281494Sandrew /* Create a direct map region early so we can use it for pa -> va */ 689297958Sandrew pmap_bootstrap_dmap(l1pt, min_pa, max_pa); 690281494Sandrew 691281494Sandrew va = KERNBASE; 692281494Sandrew pa = KERNBASE - kern_delta; 693281494Sandrew 694281494Sandrew /* 695281494Sandrew * Start to initialise phys_avail by copying from physmap 696281494Sandrew * up to the physical address KERNBASE points at. 697281494Sandrew */ 698281494Sandrew map_slot = avail_slot = 0; 699295157Sandrew for (; map_slot < (physmap_idx * 2) && 700295157Sandrew avail_slot < (PHYS_AVAIL_SIZE - 2); map_slot += 2) { 701281494Sandrew if (physmap[map_slot] == physmap[map_slot + 1]) 702281494Sandrew continue; 703281494Sandrew 704281494Sandrew if (physmap[map_slot] <= pa && 705281494Sandrew physmap[map_slot + 1] > pa) 706281494Sandrew break; 707281494Sandrew 708281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 709281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 710281494Sandrew physmem += (phys_avail[avail_slot + 1] - 711281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 712281494Sandrew avail_slot += 2; 713281494Sandrew } 714281494Sandrew 715281494Sandrew /* Add the memory before the kernel */ 716295157Sandrew if (physmap[avail_slot] < pa && avail_slot < (PHYS_AVAIL_SIZE - 2)) { 717281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 718281494Sandrew phys_avail[avail_slot + 1] = pa; 719281494Sandrew physmem += (phys_avail[avail_slot + 1] - 720281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 721281494Sandrew avail_slot += 2; 722281494Sandrew } 723281494Sandrew used_map_slot = map_slot; 724281494Sandrew 725281494Sandrew /* 726281494Sandrew * Read the page table to find out what is already mapped. 727281494Sandrew * This assumes we have mapped a block of memory from KERNBASE 728281494Sandrew * using a single L1 entry. 729281494Sandrew */ 730281494Sandrew l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 731281494Sandrew 732281494Sandrew /* Sanity check the index, KERNBASE should be the first VA */ 733281494Sandrew KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 734281494Sandrew 735281494Sandrew /* Find how many pages we have mapped */ 736281494Sandrew for (; l2_slot < Ln_ENTRIES; l2_slot++) { 737281494Sandrew if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 738281494Sandrew break; 739281494Sandrew 740281494Sandrew /* Check locore used L2 blocks */ 741281494Sandrew KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 742281494Sandrew ("Invalid bootstrap L2 table")); 743281494Sandrew KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 744281494Sandrew ("Incorrect PA in L2 table")); 745281494Sandrew 746281494Sandrew va += L2_SIZE; 747281494Sandrew pa += L2_SIZE; 748281494Sandrew } 749281494Sandrew 750281494Sandrew va = roundup2(va, L1_SIZE); 751281494Sandrew 752281494Sandrew freemempos = KERNBASE + kernlen; 753281494Sandrew freemempos = roundup2(freemempos, PAGE_SIZE); 754281494Sandrew /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 755281494Sandrew freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 756281494Sandrew /* And the l3 tables for the early devmap */ 757281494Sandrew freemempos = pmap_bootstrap_l3(l1pt, 758281494Sandrew VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 759281494Sandrew 760281494Sandrew cpu_tlb_flushID(); 761281494Sandrew 762281494Sandrew#define alloc_pages(var, np) \ 763281494Sandrew (var) = freemempos; \ 764281494Sandrew freemempos += (np * PAGE_SIZE); \ 765281494Sandrew memset((char *)(var), 0, ((np) * PAGE_SIZE)); 766281494Sandrew 767281494Sandrew /* Allocate dynamic per-cpu area. */ 768281494Sandrew alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 769281494Sandrew dpcpu_init((void *)dpcpu, 0); 770281494Sandrew 771281494Sandrew /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 772281494Sandrew alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 773281494Sandrew msgbufp = (void *)msgbufpv; 774281494Sandrew 775281494Sandrew virtual_avail = roundup2(freemempos, L1_SIZE); 776281494Sandrew virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 777281494Sandrew kernel_vm_end = virtual_avail; 778305531Sandrew 779281494Sandrew pa = pmap_early_vtophys(l1pt, freemempos); 780281494Sandrew 781281494Sandrew /* Finish initialising physmap */ 782281494Sandrew map_slot = used_map_slot; 783281494Sandrew for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 784281494Sandrew map_slot < (physmap_idx * 2); map_slot += 2) { 785281494Sandrew if (physmap[map_slot] == physmap[map_slot + 1]) 786281494Sandrew continue; 787281494Sandrew 788281494Sandrew /* Have we used the current range? */ 789281494Sandrew if (physmap[map_slot + 1] <= pa) 790281494Sandrew continue; 791281494Sandrew 792281494Sandrew /* Do we need to split the entry? */ 793281494Sandrew if (physmap[map_slot] < pa) { 794281494Sandrew phys_avail[avail_slot] = pa; 795281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 796281494Sandrew } else { 797281494Sandrew phys_avail[avail_slot] = physmap[map_slot]; 798281494Sandrew phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 799281494Sandrew } 800281494Sandrew physmem += (phys_avail[avail_slot + 1] - 801281494Sandrew phys_avail[avail_slot]) >> PAGE_SHIFT; 802281494Sandrew 803281494Sandrew avail_slot += 2; 804281494Sandrew } 805281494Sandrew phys_avail[avail_slot] = 0; 806281494Sandrew phys_avail[avail_slot + 1] = 0; 807281494Sandrew 808281494Sandrew /* 809281494Sandrew * Maxmem isn't the "maximum memory", it's one larger than the 810281494Sandrew * highest page of the physical address space. It should be 811281494Sandrew * called something like "Maxphyspage". 812281494Sandrew */ 813281494Sandrew Maxmem = atop(phys_avail[avail_slot - 1]); 814281494Sandrew 815281494Sandrew cpu_tlb_flushID(); 816281494Sandrew} 817281494Sandrew 818281494Sandrew/* 819281494Sandrew * Initialize a vm_page's machine-dependent fields. 820281494Sandrew */ 821281494Sandrewvoid 822281494Sandrewpmap_page_init(vm_page_t m) 823281494Sandrew{ 824281494Sandrew 825281494Sandrew TAILQ_INIT(&m->md.pv_list); 826281494Sandrew m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 827281494Sandrew} 828281494Sandrew 829281494Sandrew/* 830281494Sandrew * Initialize the pmap module. 831281494Sandrew * Called by vm_init, to initialize any structures that the pmap 832281494Sandrew * system needs to map virtual memory. 833281494Sandrew */ 834281494Sandrewvoid 835281494Sandrewpmap_init(void) 836281494Sandrew{ 837281494Sandrew int i; 838281494Sandrew 839281494Sandrew /* 840281494Sandrew * Initialize the pv chunk list mutex. 841281494Sandrew */ 842281494Sandrew mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 843281494Sandrew 844281494Sandrew /* 845281494Sandrew * Initialize the pool of pv list locks. 846281494Sandrew */ 847281494Sandrew for (i = 0; i < NPV_LIST_LOCKS; i++) 848281494Sandrew rw_init(&pv_list_locks[i], "pmap pv list"); 849281494Sandrew} 850281494Sandrew 851281494Sandrew/* 852305540Sandrew * Invalidate a single TLB entry. 853281494Sandrew */ 854281494SandrewPMAP_INLINE void 855281494Sandrewpmap_invalidate_page(pmap_t pmap, vm_offset_t va) 856281494Sandrew{ 857281494Sandrew 858281494Sandrew sched_pin(); 859281494Sandrew __asm __volatile( 860305540Sandrew "dsb ishst \n" 861281494Sandrew "tlbi vaae1is, %0 \n" 862305540Sandrew "dsb ish \n" 863281494Sandrew "isb \n" 864281494Sandrew : : "r"(va >> PAGE_SHIFT)); 865281494Sandrew sched_unpin(); 866281494Sandrew} 867281494Sandrew 868281494SandrewPMAP_INLINE void 869281494Sandrewpmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 870281494Sandrew{ 871281494Sandrew vm_offset_t addr; 872281494Sandrew 873281494Sandrew sched_pin(); 874305540Sandrew dsb(ishst); 875296828Swma for (addr = sva; addr < eva; addr += PAGE_SIZE) { 876281494Sandrew __asm __volatile( 877296828Swma "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); 878281494Sandrew } 879281494Sandrew __asm __volatile( 880305540Sandrew "dsb ish \n" 881281494Sandrew "isb \n"); 882281494Sandrew sched_unpin(); 883281494Sandrew} 884281494Sandrew 885281494SandrewPMAP_INLINE void 886281494Sandrewpmap_invalidate_all(pmap_t pmap) 887281494Sandrew{ 888281494Sandrew 889281494Sandrew sched_pin(); 890281494Sandrew __asm __volatile( 891305540Sandrew "dsb ishst \n" 892281494Sandrew "tlbi vmalle1is \n" 893305540Sandrew "dsb ish \n" 894281494Sandrew "isb \n"); 895281494Sandrew sched_unpin(); 896281494Sandrew} 897281494Sandrew 898281494Sandrew/* 899281494Sandrew * Routine: pmap_extract 900281494Sandrew * Function: 901281494Sandrew * Extract the physical page address associated 902281494Sandrew * with the given map/virtual_address pair. 903281494Sandrew */ 904305531Sandrewvm_paddr_t 905281494Sandrewpmap_extract(pmap_t pmap, vm_offset_t va) 906281494Sandrew{ 907297446Sandrew pt_entry_t *pte, tpte; 908281494Sandrew vm_paddr_t pa; 909297446Sandrew int lvl; 910281494Sandrew 911281494Sandrew pa = 0; 912281494Sandrew PMAP_LOCK(pmap); 913281494Sandrew /* 914297446Sandrew * Find the block or page map for this virtual address. pmap_pte 915297446Sandrew * will return either a valid block/page entry, or NULL. 916281494Sandrew */ 917297446Sandrew pte = pmap_pte(pmap, va, &lvl); 918297446Sandrew if (pte != NULL) { 919297446Sandrew tpte = pmap_load(pte); 920297446Sandrew pa = tpte & ~ATTR_MASK; 921297446Sandrew switch(lvl) { 922297446Sandrew case 1: 923297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 924297446Sandrew ("pmap_extract: Invalid L1 pte found: %lx", 925297446Sandrew tpte & ATTR_DESCR_MASK)); 926297446Sandrew pa |= (va & L1_OFFSET); 927297446Sandrew break; 928297446Sandrew case 2: 929297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 930297446Sandrew ("pmap_extract: Invalid L2 pte found: %lx", 931297446Sandrew tpte & ATTR_DESCR_MASK)); 932297446Sandrew pa |= (va & L2_OFFSET); 933297446Sandrew break; 934297446Sandrew case 3: 935297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 936297446Sandrew ("pmap_extract: Invalid L3 pte found: %lx", 937297446Sandrew tpte & ATTR_DESCR_MASK)); 938297446Sandrew pa |= (va & L3_OFFSET); 939297446Sandrew break; 940297446Sandrew } 941281494Sandrew } 942281494Sandrew PMAP_UNLOCK(pmap); 943281494Sandrew return (pa); 944281494Sandrew} 945281494Sandrew 946281494Sandrew/* 947281494Sandrew * Routine: pmap_extract_and_hold 948281494Sandrew * Function: 949281494Sandrew * Atomically extract and hold the physical page 950281494Sandrew * with the given pmap and virtual address pair 951281494Sandrew * if that mapping permits the given protection. 952281494Sandrew */ 953281494Sandrewvm_page_t 954281494Sandrewpmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 955281494Sandrew{ 956297446Sandrew pt_entry_t *pte, tpte; 957281494Sandrew vm_paddr_t pa; 958281494Sandrew vm_page_t m; 959297446Sandrew int lvl; 960281494Sandrew 961281494Sandrew pa = 0; 962281494Sandrew m = NULL; 963281494Sandrew PMAP_LOCK(pmap); 964281494Sandrewretry: 965297446Sandrew pte = pmap_pte(pmap, va, &lvl); 966297446Sandrew if (pte != NULL) { 967297446Sandrew tpte = pmap_load(pte); 968297446Sandrew 969297446Sandrew KASSERT(lvl > 0 && lvl <= 3, 970297446Sandrew ("pmap_extract_and_hold: Invalid level %d", lvl)); 971297446Sandrew CTASSERT(L1_BLOCK == L2_BLOCK); 972297446Sandrew KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || 973297446Sandrew (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), 974297446Sandrew ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, 975297446Sandrew tpte & ATTR_DESCR_MASK)); 976297446Sandrew if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 977281494Sandrew ((prot & VM_PROT_WRITE) == 0)) { 978297446Sandrew if (vm_page_pa_tryrelock(pmap, tpte & ~ATTR_MASK, &pa)) 979281494Sandrew goto retry; 980297446Sandrew m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); 981281494Sandrew vm_page_hold(m); 982281494Sandrew } 983281494Sandrew } 984281494Sandrew PA_UNLOCK_COND(pa); 985281494Sandrew PMAP_UNLOCK(pmap); 986281494Sandrew return (m); 987281494Sandrew} 988281494Sandrew 989281494Sandrewvm_paddr_t 990281494Sandrewpmap_kextract(vm_offset_t va) 991281494Sandrew{ 992297446Sandrew pt_entry_t *pte, tpte; 993281494Sandrew vm_paddr_t pa; 994297446Sandrew int lvl; 995281494Sandrew 996281494Sandrew if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 997281494Sandrew pa = DMAP_TO_PHYS(va); 998281494Sandrew } else { 999297446Sandrew pa = 0; 1000297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1001297446Sandrew if (pte != NULL) { 1002297446Sandrew tpte = pmap_load(pte); 1003297446Sandrew pa = tpte & ~ATTR_MASK; 1004297446Sandrew switch(lvl) { 1005297446Sandrew case 1: 1006297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1007297446Sandrew ("pmap_kextract: Invalid L1 pte found: %lx", 1008297446Sandrew tpte & ATTR_DESCR_MASK)); 1009297446Sandrew pa |= (va & L1_OFFSET); 1010297446Sandrew break; 1011297446Sandrew case 2: 1012297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1013297446Sandrew ("pmap_kextract: Invalid L2 pte found: %lx", 1014297446Sandrew tpte & ATTR_DESCR_MASK)); 1015297446Sandrew pa |= (va & L2_OFFSET); 1016297446Sandrew break; 1017297446Sandrew case 3: 1018297446Sandrew KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1019297446Sandrew ("pmap_kextract: Invalid L3 pte found: %lx", 1020297446Sandrew tpte & ATTR_DESCR_MASK)); 1021297446Sandrew pa |= (va & L3_OFFSET); 1022297446Sandrew break; 1023297446Sandrew } 1024297446Sandrew } 1025281494Sandrew } 1026281494Sandrew return (pa); 1027281494Sandrew} 1028281494Sandrew 1029281494Sandrew/*************************************************** 1030281494Sandrew * Low level mapping routines..... 1031281494Sandrew ***************************************************/ 1032281494Sandrew 1033305542Sandrewstatic void 1034305542Sandrewpmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) 1035281494Sandrew{ 1036297446Sandrew pd_entry_t *pde; 1037297446Sandrew pt_entry_t *pte; 1038285212Sandrew vm_offset_t va; 1039297446Sandrew int lvl; 1040281494Sandrew 1041281494Sandrew KASSERT((pa & L3_OFFSET) == 0, 1042305542Sandrew ("pmap_kenter: Invalid physical address")); 1043285212Sandrew KASSERT((sva & L3_OFFSET) == 0, 1044305542Sandrew ("pmap_kenter: Invalid virtual address")); 1045281494Sandrew KASSERT((size & PAGE_MASK) == 0, 1046305542Sandrew ("pmap_kenter: Mapping is not page-sized")); 1047281494Sandrew 1048285212Sandrew va = sva; 1049281494Sandrew while (size != 0) { 1050297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 1051297446Sandrew KASSERT(pde != NULL, 1052305542Sandrew ("pmap_kenter: Invalid page entry, va: 0x%lx", va)); 1053305542Sandrew KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl)); 1054297446Sandrew 1055297446Sandrew pte = pmap_l2_to_l3(pde, va); 1056297446Sandrew pmap_load_store(pte, (pa & ~L3_OFFSET) | ATTR_DEFAULT | 1057305542Sandrew ATTR_IDX(mode) | L3_PAGE); 1058297446Sandrew PTE_SYNC(pte); 1059281494Sandrew 1060281494Sandrew va += PAGE_SIZE; 1061281494Sandrew pa += PAGE_SIZE; 1062281494Sandrew size -= PAGE_SIZE; 1063281494Sandrew } 1064285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1065281494Sandrew} 1066281494Sandrew 1067305542Sandrewvoid 1068305542Sandrewpmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 1069305542Sandrew{ 1070305542Sandrew 1071305542Sandrew pmap_kenter(sva, size, pa, DEVICE_MEMORY); 1072305542Sandrew} 1073305542Sandrew 1074281494Sandrew/* 1075281494Sandrew * Remove a page from the kernel pagetables. 1076281494Sandrew */ 1077281494SandrewPMAP_INLINE void 1078281494Sandrewpmap_kremove(vm_offset_t va) 1079281494Sandrew{ 1080297446Sandrew pt_entry_t *pte; 1081297446Sandrew int lvl; 1082281494Sandrew 1083297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1084297446Sandrew KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); 1085297446Sandrew KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); 1086281494Sandrew 1087297446Sandrew if (pmap_l3_valid_cacheable(pmap_load(pte))) 1088281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 1089297446Sandrew pmap_load_clear(pte); 1090297446Sandrew PTE_SYNC(pte); 1091285212Sandrew pmap_invalidate_page(kernel_pmap, va); 1092281494Sandrew} 1093281494Sandrew 1094281494Sandrewvoid 1095285212Sandrewpmap_kremove_device(vm_offset_t sva, vm_size_t size) 1096281494Sandrew{ 1097297446Sandrew pt_entry_t *pte; 1098285212Sandrew vm_offset_t va; 1099297446Sandrew int lvl; 1100281494Sandrew 1101285212Sandrew KASSERT((sva & L3_OFFSET) == 0, 1102281494Sandrew ("pmap_kremove_device: Invalid virtual address")); 1103281494Sandrew KASSERT((size & PAGE_MASK) == 0, 1104281494Sandrew ("pmap_kremove_device: Mapping is not page-sized")); 1105281494Sandrew 1106285212Sandrew va = sva; 1107281494Sandrew while (size != 0) { 1108297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1109297446Sandrew KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); 1110297446Sandrew KASSERT(lvl == 3, 1111297446Sandrew ("Invalid device pagetable level: %d != 3", lvl)); 1112297446Sandrew pmap_load_clear(pte); 1113297446Sandrew PTE_SYNC(pte); 1114281494Sandrew 1115281494Sandrew va += PAGE_SIZE; 1116281494Sandrew size -= PAGE_SIZE; 1117281494Sandrew } 1118285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1119281494Sandrew} 1120281494Sandrew 1121281494Sandrew/* 1122281494Sandrew * Used to map a range of physical addresses into kernel 1123281494Sandrew * virtual address space. 1124281494Sandrew * 1125281494Sandrew * The value passed in '*virt' is a suggested virtual address for 1126281494Sandrew * the mapping. Architectures which can support a direct-mapped 1127281494Sandrew * physical to virtual region can return the appropriate address 1128281494Sandrew * within that region, leaving '*virt' unchanged. Other 1129281494Sandrew * architectures should map the pages starting at '*virt' and 1130281494Sandrew * update '*virt' with the first usable address after the mapped 1131281494Sandrew * region. 1132281494Sandrew */ 1133281494Sandrewvm_offset_t 1134281494Sandrewpmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1135281494Sandrew{ 1136281494Sandrew return PHYS_TO_DMAP(start); 1137281494Sandrew} 1138281494Sandrew 1139281494Sandrew 1140281494Sandrew/* 1141281494Sandrew * Add a list of wired pages to the kva 1142281494Sandrew * this routine is only used for temporary 1143281494Sandrew * kernel mappings that do not need to have 1144281494Sandrew * page modification or references recorded. 1145281494Sandrew * Note that old mappings are simply written 1146281494Sandrew * over. The page *must* be wired. 1147281494Sandrew * Note: SMP coherent. Uses a ranged shootdown IPI. 1148281494Sandrew */ 1149281494Sandrewvoid 1150281494Sandrewpmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1151281494Sandrew{ 1152297446Sandrew pd_entry_t *pde; 1153297446Sandrew pt_entry_t *pte, pa; 1154281494Sandrew vm_offset_t va; 1155281494Sandrew vm_page_t m; 1156297446Sandrew int i, lvl; 1157281494Sandrew 1158281494Sandrew va = sva; 1159281494Sandrew for (i = 0; i < count; i++) { 1160297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 1161297446Sandrew KASSERT(pde != NULL, 1162297446Sandrew ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); 1163297446Sandrew KASSERT(lvl == 2, 1164297446Sandrew ("pmap_qenter: Invalid level %d", lvl)); 1165297446Sandrew 1166281494Sandrew m = ma[i]; 1167285537Sandrew pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | 1168285537Sandrew ATTR_IDX(m->md.pv_memattr) | L3_PAGE; 1169297446Sandrew pte = pmap_l2_to_l3(pde, va); 1170297446Sandrew pmap_load_store(pte, pa); 1171297446Sandrew PTE_SYNC(pte); 1172281494Sandrew 1173281494Sandrew va += L3_SIZE; 1174281494Sandrew } 1175285212Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1176281494Sandrew} 1177281494Sandrew 1178281494Sandrew/* 1179281494Sandrew * This routine tears out page mappings from the 1180281494Sandrew * kernel -- it is meant only for temporary mappings. 1181281494Sandrew */ 1182281494Sandrewvoid 1183281494Sandrewpmap_qremove(vm_offset_t sva, int count) 1184281494Sandrew{ 1185297446Sandrew pt_entry_t *pte; 1186281494Sandrew vm_offset_t va; 1187297446Sandrew int lvl; 1188281494Sandrew 1189285212Sandrew KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1190285212Sandrew 1191281494Sandrew va = sva; 1192281494Sandrew while (count-- > 0) { 1193297446Sandrew pte = pmap_pte(kernel_pmap, va, &lvl); 1194297446Sandrew KASSERT(lvl == 3, 1195297446Sandrew ("Invalid device pagetable level: %d != 3", lvl)); 1196297446Sandrew if (pte != NULL) { 1197297446Sandrew if (pmap_l3_valid_cacheable(pmap_load(pte))) 1198297446Sandrew cpu_dcache_wb_range(va, L3_SIZE); 1199297446Sandrew pmap_load_clear(pte); 1200297446Sandrew PTE_SYNC(pte); 1201297446Sandrew } 1202285212Sandrew 1203281494Sandrew va += PAGE_SIZE; 1204281494Sandrew } 1205281494Sandrew pmap_invalidate_range(kernel_pmap, sva, va); 1206281494Sandrew} 1207281494Sandrew 1208281494Sandrew/*************************************************** 1209281494Sandrew * Page table page management routines..... 1210281494Sandrew ***************************************************/ 1211281494Sandrewstatic __inline void 1212281494Sandrewpmap_free_zero_pages(struct spglist *free) 1213281494Sandrew{ 1214281494Sandrew vm_page_t m; 1215281494Sandrew 1216281494Sandrew while ((m = SLIST_FIRST(free)) != NULL) { 1217281494Sandrew SLIST_REMOVE_HEAD(free, plinks.s.ss); 1218281494Sandrew /* Preserve the page's PG_ZERO setting. */ 1219281494Sandrew vm_page_free_toq(m); 1220281494Sandrew } 1221281494Sandrew} 1222281494Sandrew 1223281494Sandrew/* 1224281494Sandrew * Schedule the specified unused page table page to be freed. Specifically, 1225281494Sandrew * add the page to the specified list of pages that will be released to the 1226281494Sandrew * physical memory manager after the TLB has been updated. 1227281494Sandrew */ 1228281494Sandrewstatic __inline void 1229281494Sandrewpmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1230281494Sandrew boolean_t set_PG_ZERO) 1231281494Sandrew{ 1232281494Sandrew 1233281494Sandrew if (set_PG_ZERO) 1234281494Sandrew m->flags |= PG_ZERO; 1235281494Sandrew else 1236281494Sandrew m->flags &= ~PG_ZERO; 1237281494Sandrew SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1238281494Sandrew} 1239305531Sandrew 1240281494Sandrew/* 1241281494Sandrew * Decrements a page table page's wire count, which is used to record the 1242281494Sandrew * number of valid page table entries within the page. If the wire count 1243281494Sandrew * drops to zero, then the page table page is unmapped. Returns TRUE if the 1244281494Sandrew * page table page was unmapped and FALSE otherwise. 1245281494Sandrew */ 1246281494Sandrewstatic inline boolean_t 1247281494Sandrewpmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1248281494Sandrew{ 1249281494Sandrew 1250281494Sandrew --m->wire_count; 1251281494Sandrew if (m->wire_count == 0) { 1252281494Sandrew _pmap_unwire_l3(pmap, va, m, free); 1253281494Sandrew return (TRUE); 1254281494Sandrew } else 1255281494Sandrew return (FALSE); 1256281494Sandrew} 1257281494Sandrew 1258281494Sandrewstatic void 1259281494Sandrew_pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1260281494Sandrew{ 1261281494Sandrew 1262281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1263281494Sandrew /* 1264281494Sandrew * unmap the page table page 1265281494Sandrew */ 1266297446Sandrew if (m->pindex >= (NUL2E + NUL1E)) { 1267297446Sandrew /* l1 page */ 1268297446Sandrew pd_entry_t *l0; 1269297446Sandrew 1270297446Sandrew l0 = pmap_l0(pmap, va); 1271297446Sandrew pmap_load_clear(l0); 1272297446Sandrew PTE_SYNC(l0); 1273297446Sandrew } else if (m->pindex >= NUL2E) { 1274297446Sandrew /* l2 page */ 1275281494Sandrew pd_entry_t *l1; 1276297446Sandrew 1277281494Sandrew l1 = pmap_l1(pmap, va); 1278281494Sandrew pmap_load_clear(l1); 1279281494Sandrew PTE_SYNC(l1); 1280281494Sandrew } else { 1281297446Sandrew /* l3 page */ 1282281494Sandrew pd_entry_t *l2; 1283297446Sandrew 1284281494Sandrew l2 = pmap_l2(pmap, va); 1285281494Sandrew pmap_load_clear(l2); 1286281494Sandrew PTE_SYNC(l2); 1287281494Sandrew } 1288281494Sandrew pmap_resident_count_dec(pmap, 1); 1289297446Sandrew if (m->pindex < NUL2E) { 1290297446Sandrew /* We just released an l3, unhold the matching l2 */ 1291297446Sandrew pd_entry_t *l1, tl1; 1292297446Sandrew vm_page_t l2pg; 1293281494Sandrew 1294297446Sandrew l1 = pmap_l1(pmap, va); 1295297446Sandrew tl1 = pmap_load(l1); 1296297446Sandrew l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1297297446Sandrew pmap_unwire_l3(pmap, va, l2pg, free); 1298297446Sandrew } else if (m->pindex < (NUL2E + NUL1E)) { 1299297446Sandrew /* We just released an l2, unhold the matching l1 */ 1300297446Sandrew pd_entry_t *l0, tl0; 1301297446Sandrew vm_page_t l1pg; 1302297446Sandrew 1303297446Sandrew l0 = pmap_l0(pmap, va); 1304297446Sandrew tl0 = pmap_load(l0); 1305297446Sandrew l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1306297446Sandrew pmap_unwire_l3(pmap, va, l1pg, free); 1307281494Sandrew } 1308285212Sandrew pmap_invalidate_page(pmap, va); 1309281494Sandrew 1310281494Sandrew /* 1311281494Sandrew * This is a release store so that the ordinary store unmapping 1312281494Sandrew * the page table page is globally performed before TLB shoot- 1313281494Sandrew * down is begun. 1314281494Sandrew */ 1315281494Sandrew atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1316281494Sandrew 1317305531Sandrew /* 1318281494Sandrew * Put page on a list so that it is released after 1319281494Sandrew * *ALL* TLB shootdown is done 1320281494Sandrew */ 1321281494Sandrew pmap_add_delayed_free_list(m, free, TRUE); 1322281494Sandrew} 1323281494Sandrew 1324281494Sandrew/* 1325281494Sandrew * After removing an l3 entry, this routine is used to 1326281494Sandrew * conditionally free the page, and manage the hold/wire counts. 1327281494Sandrew */ 1328281494Sandrewstatic int 1329281494Sandrewpmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1330281494Sandrew struct spglist *free) 1331281494Sandrew{ 1332281494Sandrew vm_page_t mpte; 1333281494Sandrew 1334281494Sandrew if (va >= VM_MAXUSER_ADDRESS) 1335281494Sandrew return (0); 1336281494Sandrew KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1337281494Sandrew mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1338281494Sandrew return (pmap_unwire_l3(pmap, va, mpte, free)); 1339281494Sandrew} 1340281494Sandrew 1341281494Sandrewvoid 1342281494Sandrewpmap_pinit0(pmap_t pmap) 1343281494Sandrew{ 1344281494Sandrew 1345281494Sandrew PMAP_LOCK_INIT(pmap); 1346281494Sandrew bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1347297446Sandrew pmap->pm_l0 = kernel_pmap->pm_l0; 1348281494Sandrew} 1349281494Sandrew 1350281494Sandrewint 1351281494Sandrewpmap_pinit(pmap_t pmap) 1352281494Sandrew{ 1353297446Sandrew vm_paddr_t l0phys; 1354297446Sandrew vm_page_t l0pt; 1355281494Sandrew 1356281494Sandrew /* 1357297446Sandrew * allocate the l0 page 1358281494Sandrew */ 1359297446Sandrew while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1360281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1361281494Sandrew VM_WAIT; 1362281494Sandrew 1363297446Sandrew l0phys = VM_PAGE_TO_PHYS(l0pt); 1364297446Sandrew pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); 1365281494Sandrew 1366297446Sandrew if ((l0pt->flags & PG_ZERO) == 0) 1367297446Sandrew pagezero(pmap->pm_l0); 1368281494Sandrew 1369281494Sandrew bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1370281494Sandrew 1371281494Sandrew return (1); 1372281494Sandrew} 1373281494Sandrew 1374281494Sandrew/* 1375281494Sandrew * This routine is called if the desired page table page does not exist. 1376281494Sandrew * 1377281494Sandrew * If page table page allocation fails, this routine may sleep before 1378281494Sandrew * returning NULL. It sleeps only if a lock pointer was given. 1379281494Sandrew * 1380281494Sandrew * Note: If a page allocation fails at page table level two or three, 1381281494Sandrew * one or two pages may be held during the wait, only to be released 1382281494Sandrew * afterwards. This conservative approach is easily argued to avoid 1383281494Sandrew * race conditions. 1384281494Sandrew */ 1385281494Sandrewstatic vm_page_t 1386281494Sandrew_pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1387281494Sandrew{ 1388297446Sandrew vm_page_t m, l1pg, l2pg; 1389281494Sandrew 1390281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1391281494Sandrew 1392281494Sandrew /* 1393281494Sandrew * Allocate a page table page. 1394281494Sandrew */ 1395281494Sandrew if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1396281494Sandrew VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1397281494Sandrew if (lockp != NULL) { 1398281494Sandrew RELEASE_PV_LIST_LOCK(lockp); 1399281494Sandrew PMAP_UNLOCK(pmap); 1400281494Sandrew VM_WAIT; 1401281494Sandrew PMAP_LOCK(pmap); 1402281494Sandrew } 1403281494Sandrew 1404281494Sandrew /* 1405281494Sandrew * Indicate the need to retry. While waiting, the page table 1406281494Sandrew * page may have been allocated. 1407281494Sandrew */ 1408281494Sandrew return (NULL); 1409281494Sandrew } 1410281494Sandrew if ((m->flags & PG_ZERO) == 0) 1411281494Sandrew pmap_zero_page(m); 1412281494Sandrew 1413281494Sandrew /* 1414281494Sandrew * Map the pagetable page into the process address space, if 1415281494Sandrew * it isn't already there. 1416281494Sandrew */ 1417281494Sandrew 1418297446Sandrew if (ptepindex >= (NUL2E + NUL1E)) { 1419297446Sandrew pd_entry_t *l0; 1420297446Sandrew vm_pindex_t l0index; 1421281494Sandrew 1422297446Sandrew l0index = ptepindex - (NUL2E + NUL1E); 1423297446Sandrew l0 = &pmap->pm_l0[l0index]; 1424297446Sandrew pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); 1425297446Sandrew PTE_SYNC(l0); 1426297446Sandrew } else if (ptepindex >= NUL2E) { 1427297446Sandrew vm_pindex_t l0index, l1index; 1428297446Sandrew pd_entry_t *l0, *l1; 1429297446Sandrew pd_entry_t tl0; 1430297446Sandrew 1431297446Sandrew l1index = ptepindex - NUL2E; 1432297446Sandrew l0index = l1index >> L0_ENTRIES_SHIFT; 1433297446Sandrew 1434297446Sandrew l0 = &pmap->pm_l0[l0index]; 1435297446Sandrew tl0 = pmap_load(l0); 1436297446Sandrew if (tl0 == 0) { 1437297446Sandrew /* recurse for allocating page dir */ 1438297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, 1439297446Sandrew lockp) == NULL) { 1440297446Sandrew --m->wire_count; 1441297446Sandrew /* XXX: release mem barrier? */ 1442297446Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1443297446Sandrew vm_page_free_zero(m); 1444297446Sandrew return (NULL); 1445297446Sandrew } 1446297446Sandrew } else { 1447297446Sandrew l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1448297446Sandrew l1pg->wire_count++; 1449297446Sandrew } 1450297446Sandrew 1451297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 1452297446Sandrew l1 = &l1[ptepindex & Ln_ADDR_MASK]; 1453281494Sandrew pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1454281494Sandrew PTE_SYNC(l1); 1455281494Sandrew } else { 1456297446Sandrew vm_pindex_t l0index, l1index; 1457297446Sandrew pd_entry_t *l0, *l1, *l2; 1458297446Sandrew pd_entry_t tl0, tl1; 1459281494Sandrew 1460297446Sandrew l1index = ptepindex >> Ln_ENTRIES_SHIFT; 1461297446Sandrew l0index = l1index >> L0_ENTRIES_SHIFT; 1462297446Sandrew 1463297446Sandrew l0 = &pmap->pm_l0[l0index]; 1464297446Sandrew tl0 = pmap_load(l0); 1465297446Sandrew if (tl0 == 0) { 1466281494Sandrew /* recurse for allocating page dir */ 1467297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1468281494Sandrew lockp) == NULL) { 1469281494Sandrew --m->wire_count; 1470281494Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1471281494Sandrew vm_page_free_zero(m); 1472281494Sandrew return (NULL); 1473281494Sandrew } 1474297446Sandrew tl0 = pmap_load(l0); 1475297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1476297446Sandrew l1 = &l1[l1index & Ln_ADDR_MASK]; 1477281494Sandrew } else { 1478297446Sandrew l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1479297446Sandrew l1 = &l1[l1index & Ln_ADDR_MASK]; 1480297446Sandrew tl1 = pmap_load(l1); 1481297446Sandrew if (tl1 == 0) { 1482297446Sandrew /* recurse for allocating page dir */ 1483297446Sandrew if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1484297446Sandrew lockp) == NULL) { 1485297446Sandrew --m->wire_count; 1486297446Sandrew /* XXX: release mem barrier? */ 1487297446Sandrew atomic_subtract_int( 1488297446Sandrew &vm_cnt.v_wire_count, 1); 1489297446Sandrew vm_page_free_zero(m); 1490297446Sandrew return (NULL); 1491297446Sandrew } 1492297446Sandrew } else { 1493297446Sandrew l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1494297446Sandrew l2pg->wire_count++; 1495297446Sandrew } 1496281494Sandrew } 1497281494Sandrew 1498288445Sandrew l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 1499281494Sandrew l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1500285537Sandrew pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); 1501281494Sandrew PTE_SYNC(l2); 1502281494Sandrew } 1503281494Sandrew 1504281494Sandrew pmap_resident_count_inc(pmap, 1); 1505281494Sandrew 1506281494Sandrew return (m); 1507281494Sandrew} 1508281494Sandrew 1509281494Sandrewstatic vm_page_t 1510281494Sandrewpmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1511281494Sandrew{ 1512281494Sandrew vm_pindex_t ptepindex; 1513297446Sandrew pd_entry_t *pde, tpde; 1514281494Sandrew vm_page_t m; 1515297446Sandrew int lvl; 1516281494Sandrew 1517281494Sandrew /* 1518281494Sandrew * Calculate pagetable page index 1519281494Sandrew */ 1520281494Sandrew ptepindex = pmap_l2_pindex(va); 1521281494Sandrewretry: 1522281494Sandrew /* 1523281494Sandrew * Get the page directory entry 1524281494Sandrew */ 1525297446Sandrew pde = pmap_pde(pmap, va, &lvl); 1526281494Sandrew 1527281494Sandrew /* 1528297446Sandrew * If the page table page is mapped, we just increment the hold count, 1529297446Sandrew * and activate it. If we get a level 2 pde it will point to a level 3 1530297446Sandrew * table. 1531281494Sandrew */ 1532297446Sandrew if (lvl == 2) { 1533297446Sandrew tpde = pmap_load(pde); 1534297446Sandrew if (tpde != 0) { 1535297446Sandrew m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); 1536297446Sandrew m->wire_count++; 1537297446Sandrew return (m); 1538297446Sandrew } 1539281494Sandrew } 1540297446Sandrew 1541297446Sandrew /* 1542297446Sandrew * Here if the pte page isn't mapped, or if it has been deallocated. 1543297446Sandrew */ 1544297446Sandrew m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1545297446Sandrew if (m == NULL && lockp != NULL) 1546297446Sandrew goto retry; 1547297446Sandrew 1548281494Sandrew return (m); 1549281494Sandrew} 1550281494Sandrew 1551281494Sandrew 1552281494Sandrew/*************************************************** 1553281494Sandrew * Pmap allocation/deallocation routines. 1554281494Sandrew ***************************************************/ 1555281494Sandrew 1556281494Sandrew/* 1557281494Sandrew * Release any resources held by the given physical map. 1558281494Sandrew * Called when a pmap initialized by pmap_pinit is being released. 1559281494Sandrew * Should only be called if the map contains no valid mappings. 1560281494Sandrew */ 1561281494Sandrewvoid 1562281494Sandrewpmap_release(pmap_t pmap) 1563281494Sandrew{ 1564281494Sandrew vm_page_t m; 1565281494Sandrew 1566281494Sandrew KASSERT(pmap->pm_stats.resident_count == 0, 1567281494Sandrew ("pmap_release: pmap resident count %ld != 0", 1568281494Sandrew pmap->pm_stats.resident_count)); 1569281494Sandrew 1570297446Sandrew m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); 1571281494Sandrew 1572281494Sandrew m->wire_count--; 1573281494Sandrew atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1574281494Sandrew vm_page_free_zero(m); 1575281494Sandrew} 1576281494Sandrew 1577281494Sandrew#if 0 1578281494Sandrewstatic int 1579281494Sandrewkvm_size(SYSCTL_HANDLER_ARGS) 1580281494Sandrew{ 1581281494Sandrew unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1582281494Sandrew 1583281494Sandrew return sysctl_handle_long(oidp, &ksize, 0, req); 1584281494Sandrew} 1585305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1586281494Sandrew 0, 0, kvm_size, "LU", "Size of KVM"); 1587281494Sandrew 1588281494Sandrewstatic int 1589281494Sandrewkvm_free(SYSCTL_HANDLER_ARGS) 1590281494Sandrew{ 1591281494Sandrew unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1592281494Sandrew 1593281494Sandrew return sysctl_handle_long(oidp, &kfree, 0, req); 1594281494Sandrew} 1595305531SandrewSYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1596281494Sandrew 0, 0, kvm_free, "LU", "Amount of KVM free"); 1597281494Sandrew#endif /* 0 */ 1598281494Sandrew 1599281494Sandrew/* 1600281494Sandrew * grow the number of kernel page table entries, if needed 1601281494Sandrew */ 1602281494Sandrewvoid 1603281494Sandrewpmap_growkernel(vm_offset_t addr) 1604281494Sandrew{ 1605281494Sandrew vm_paddr_t paddr; 1606281494Sandrew vm_page_t nkpg; 1607297446Sandrew pd_entry_t *l0, *l1, *l2; 1608281494Sandrew 1609281494Sandrew mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1610281494Sandrew 1611281494Sandrew addr = roundup2(addr, L2_SIZE); 1612281494Sandrew if (addr - 1 >= kernel_map->max_offset) 1613281494Sandrew addr = kernel_map->max_offset; 1614281494Sandrew while (kernel_vm_end < addr) { 1615297446Sandrew l0 = pmap_l0(kernel_pmap, kernel_vm_end); 1616297446Sandrew KASSERT(pmap_load(l0) != 0, 1617297446Sandrew ("pmap_growkernel: No level 0 kernel entry")); 1618297446Sandrew 1619297446Sandrew l1 = pmap_l0_to_l1(l0, kernel_vm_end); 1620285045Sandrew if (pmap_load(l1) == 0) { 1621281494Sandrew /* We need a new PDP entry */ 1622281494Sandrew nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1623281494Sandrew VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1624281494Sandrew VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1625281494Sandrew if (nkpg == NULL) 1626281494Sandrew panic("pmap_growkernel: no memory to grow kernel"); 1627281494Sandrew if ((nkpg->flags & PG_ZERO) == 0) 1628281494Sandrew pmap_zero_page(nkpg); 1629281494Sandrew paddr = VM_PAGE_TO_PHYS(nkpg); 1630281494Sandrew pmap_load_store(l1, paddr | L1_TABLE); 1631281494Sandrew PTE_SYNC(l1); 1632281494Sandrew continue; /* try again */ 1633281494Sandrew } 1634281494Sandrew l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1635285045Sandrew if ((pmap_load(l2) & ATTR_AF) != 0) { 1636281494Sandrew kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1637281494Sandrew if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1638281494Sandrew kernel_vm_end = kernel_map->max_offset; 1639305531Sandrew break; 1640281494Sandrew } 1641281494Sandrew continue; 1642281494Sandrew } 1643281494Sandrew 1644281494Sandrew nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1645281494Sandrew VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1646281494Sandrew VM_ALLOC_ZERO); 1647281494Sandrew if (nkpg == NULL) 1648281494Sandrew panic("pmap_growkernel: no memory to grow kernel"); 1649281494Sandrew if ((nkpg->flags & PG_ZERO) == 0) 1650281494Sandrew pmap_zero_page(nkpg); 1651281494Sandrew paddr = VM_PAGE_TO_PHYS(nkpg); 1652281494Sandrew pmap_load_store(l2, paddr | L2_TABLE); 1653281494Sandrew PTE_SYNC(l2); 1654285212Sandrew pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1655281494Sandrew 1656281494Sandrew kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1657281494Sandrew if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1658281494Sandrew kernel_vm_end = kernel_map->max_offset; 1659305531Sandrew break; 1660281494Sandrew } 1661281494Sandrew } 1662281494Sandrew} 1663281494Sandrew 1664281494Sandrew 1665281494Sandrew/*************************************************** 1666281494Sandrew * page management routines. 1667281494Sandrew ***************************************************/ 1668281494Sandrew 1669281494SandrewCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1670281494SandrewCTASSERT(_NPCM == 3); 1671281494SandrewCTASSERT(_NPCPV == 168); 1672281494Sandrew 1673281494Sandrewstatic __inline struct pv_chunk * 1674281494Sandrewpv_to_chunk(pv_entry_t pv) 1675281494Sandrew{ 1676281494Sandrew 1677281494Sandrew return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1678281494Sandrew} 1679281494Sandrew 1680281494Sandrew#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1681281494Sandrew 1682281494Sandrew#define PC_FREE0 0xfffffffffffffffful 1683281494Sandrew#define PC_FREE1 0xfffffffffffffffful 1684281494Sandrew#define PC_FREE2 0x000000fffffffffful 1685281494Sandrew 1686281494Sandrewstatic const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1687281494Sandrew 1688281494Sandrew#if 0 1689281494Sandrew#ifdef PV_STATS 1690281494Sandrewstatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1691281494Sandrew 1692281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1693281494Sandrew "Current number of pv entry chunks"); 1694281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1695281494Sandrew "Current number of pv entry chunks allocated"); 1696281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1697281494Sandrew "Current number of pv entry chunks frees"); 1698281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1699281494Sandrew "Number of times tried to get a chunk page but failed."); 1700281494Sandrew 1701281494Sandrewstatic long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1702281494Sandrewstatic int pv_entry_spare; 1703281494Sandrew 1704281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1705281494Sandrew "Current number of pv entry frees"); 1706281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1707281494Sandrew "Current number of pv entry allocs"); 1708281494SandrewSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1709281494Sandrew "Current number of pv entries"); 1710281494SandrewSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1711281494Sandrew "Current number of spare pv entries"); 1712281494Sandrew#endif 1713281494Sandrew#endif /* 0 */ 1714281494Sandrew 1715281494Sandrew/* 1716281494Sandrew * We are in a serious low memory condition. Resort to 1717281494Sandrew * drastic measures to free some pages so we can allocate 1718281494Sandrew * another pv entry chunk. 1719281494Sandrew * 1720281494Sandrew * Returns NULL if PV entries were reclaimed from the specified pmap. 1721281494Sandrew * 1722281494Sandrew * We do not, however, unmap 2mpages because subsequent accesses will 1723281494Sandrew * allocate per-page pv entries until repromotion occurs, thereby 1724281494Sandrew * exacerbating the shortage of free pv entries. 1725281494Sandrew */ 1726281494Sandrewstatic vm_page_t 1727281494Sandrewreclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1728281494Sandrew{ 1729281494Sandrew 1730286073Semaste panic("ARM64TODO: reclaim_pv_chunk"); 1731281494Sandrew} 1732281494Sandrew 1733281494Sandrew/* 1734281494Sandrew * free the pv_entry back to the free list 1735281494Sandrew */ 1736281494Sandrewstatic void 1737281494Sandrewfree_pv_entry(pmap_t pmap, pv_entry_t pv) 1738281494Sandrew{ 1739281494Sandrew struct pv_chunk *pc; 1740281494Sandrew int idx, field, bit; 1741281494Sandrew 1742281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1743281494Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1744281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1745281494Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1746281494Sandrew pc = pv_to_chunk(pv); 1747281494Sandrew idx = pv - &pc->pc_pventry[0]; 1748281494Sandrew field = idx / 64; 1749281494Sandrew bit = idx % 64; 1750281494Sandrew pc->pc_map[field] |= 1ul << bit; 1751281494Sandrew if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1752281494Sandrew pc->pc_map[2] != PC_FREE2) { 1753281494Sandrew /* 98% of the time, pc is already at the head of the list. */ 1754281494Sandrew if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1755281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1756281494Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1757281494Sandrew } 1758281494Sandrew return; 1759281494Sandrew } 1760281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1761281494Sandrew free_pv_chunk(pc); 1762281494Sandrew} 1763281494Sandrew 1764281494Sandrewstatic void 1765281494Sandrewfree_pv_chunk(struct pv_chunk *pc) 1766281494Sandrew{ 1767281494Sandrew vm_page_t m; 1768281494Sandrew 1769281494Sandrew mtx_lock(&pv_chunks_mutex); 1770281494Sandrew TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1771281494Sandrew mtx_unlock(&pv_chunks_mutex); 1772281494Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1773281494Sandrew PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1774281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1775281494Sandrew /* entire chunk is free, return it */ 1776281494Sandrew m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1777281494Sandrew dump_drop_page(m->phys_addr); 1778288256Salc vm_page_unwire(m, PQ_NONE); 1779281494Sandrew vm_page_free(m); 1780281494Sandrew} 1781281494Sandrew 1782281494Sandrew/* 1783281494Sandrew * Returns a new PV entry, allocating a new PV chunk from the system when 1784281494Sandrew * needed. If this PV chunk allocation fails and a PV list lock pointer was 1785281494Sandrew * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 1786281494Sandrew * returned. 1787281494Sandrew * 1788281494Sandrew * The given PV list lock may be released. 1789281494Sandrew */ 1790281494Sandrewstatic pv_entry_t 1791281494Sandrewget_pv_entry(pmap_t pmap, struct rwlock **lockp) 1792281494Sandrew{ 1793281494Sandrew int bit, field; 1794281494Sandrew pv_entry_t pv; 1795281494Sandrew struct pv_chunk *pc; 1796281494Sandrew vm_page_t m; 1797281494Sandrew 1798281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1799281494Sandrew PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 1800281494Sandrewretry: 1801281494Sandrew pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1802281494Sandrew if (pc != NULL) { 1803281494Sandrew for (field = 0; field < _NPCM; field++) { 1804281494Sandrew if (pc->pc_map[field]) { 1805281494Sandrew bit = ffsl(pc->pc_map[field]) - 1; 1806281494Sandrew break; 1807281494Sandrew } 1808281494Sandrew } 1809281494Sandrew if (field < _NPCM) { 1810281494Sandrew pv = &pc->pc_pventry[field * 64 + bit]; 1811281494Sandrew pc->pc_map[field] &= ~(1ul << bit); 1812281494Sandrew /* If this was the last item, move it to tail */ 1813281494Sandrew if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 1814281494Sandrew pc->pc_map[2] == 0) { 1815281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1816281494Sandrew TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 1817281494Sandrew pc_list); 1818281494Sandrew } 1819281494Sandrew PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1820281494Sandrew PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 1821281494Sandrew return (pv); 1822281494Sandrew } 1823281494Sandrew } 1824281494Sandrew /* No free items, allocate another chunk */ 1825281494Sandrew m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1826281494Sandrew VM_ALLOC_WIRED); 1827281494Sandrew if (m == NULL) { 1828281494Sandrew if (lockp == NULL) { 1829281494Sandrew PV_STAT(pc_chunk_tryfail++); 1830281494Sandrew return (NULL); 1831281494Sandrew } 1832281494Sandrew m = reclaim_pv_chunk(pmap, lockp); 1833281494Sandrew if (m == NULL) 1834281494Sandrew goto retry; 1835281494Sandrew } 1836281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 1837281494Sandrew PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 1838281494Sandrew dump_add_page(m->phys_addr); 1839281494Sandrew pc = (void *)PHYS_TO_DMAP(m->phys_addr); 1840281494Sandrew pc->pc_pmap = pmap; 1841281494Sandrew pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 1842281494Sandrew pc->pc_map[1] = PC_FREE1; 1843281494Sandrew pc->pc_map[2] = PC_FREE2; 1844281494Sandrew mtx_lock(&pv_chunks_mutex); 1845281494Sandrew TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1846281494Sandrew mtx_unlock(&pv_chunks_mutex); 1847281494Sandrew pv = &pc->pc_pventry[0]; 1848281494Sandrew TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1849281494Sandrew PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1850281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 1851281494Sandrew return (pv); 1852281494Sandrew} 1853281494Sandrew 1854281494Sandrew/* 1855281494Sandrew * First find and then remove the pv entry for the specified pmap and virtual 1856281494Sandrew * address from the specified pv list. Returns the pv entry if found and NULL 1857281494Sandrew * otherwise. This operation can be performed on pv lists for either 4KB or 1858281494Sandrew * 2MB page mappings. 1859281494Sandrew */ 1860281494Sandrewstatic __inline pv_entry_t 1861281494Sandrewpmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1862281494Sandrew{ 1863281494Sandrew pv_entry_t pv; 1864281494Sandrew 1865281494Sandrew TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 1866281494Sandrew if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1867281494Sandrew TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 1868281494Sandrew pvh->pv_gen++; 1869281494Sandrew break; 1870281494Sandrew } 1871281494Sandrew } 1872281494Sandrew return (pv); 1873281494Sandrew} 1874281494Sandrew 1875281494Sandrew/* 1876281494Sandrew * First find and then destroy the pv entry for the specified pmap and virtual 1877281494Sandrew * address. This operation can be performed on pv lists for either 4KB or 2MB 1878281494Sandrew * page mappings. 1879281494Sandrew */ 1880281494Sandrewstatic void 1881281494Sandrewpmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1882281494Sandrew{ 1883281494Sandrew pv_entry_t pv; 1884281494Sandrew 1885281494Sandrew pv = pmap_pvh_remove(pvh, pmap, va); 1886281494Sandrew KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 1887281494Sandrew free_pv_entry(pmap, pv); 1888281494Sandrew} 1889281494Sandrew 1890281494Sandrew/* 1891281494Sandrew * Conditionally create the PV entry for a 4KB page mapping if the required 1892281494Sandrew * memory can be allocated without resorting to reclamation. 1893281494Sandrew */ 1894281494Sandrewstatic boolean_t 1895281494Sandrewpmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 1896281494Sandrew struct rwlock **lockp) 1897281494Sandrew{ 1898281494Sandrew pv_entry_t pv; 1899281494Sandrew 1900281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1901281494Sandrew /* Pass NULL instead of the lock pointer to disable reclamation. */ 1902281494Sandrew if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 1903281494Sandrew pv->pv_va = va; 1904281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1905281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 1906281494Sandrew m->md.pv_gen++; 1907281494Sandrew return (TRUE); 1908281494Sandrew } else 1909281494Sandrew return (FALSE); 1910281494Sandrew} 1911281494Sandrew 1912281494Sandrew/* 1913281494Sandrew * pmap_remove_l3: do the things to unmap a page in a process 1914281494Sandrew */ 1915281494Sandrewstatic int 1916305531Sandrewpmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 1917281494Sandrew pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 1918281494Sandrew{ 1919281494Sandrew pt_entry_t old_l3; 1920281494Sandrew vm_page_t m; 1921281494Sandrew 1922281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1923281494Sandrew if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 1924281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 1925281494Sandrew old_l3 = pmap_load_clear(l3); 1926281494Sandrew PTE_SYNC(l3); 1927285212Sandrew pmap_invalidate_page(pmap, va); 1928281494Sandrew if (old_l3 & ATTR_SW_WIRED) 1929281494Sandrew pmap->pm_stats.wired_count -= 1; 1930281494Sandrew pmap_resident_count_dec(pmap, 1); 1931281494Sandrew if (old_l3 & ATTR_SW_MANAGED) { 1932281494Sandrew m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 1933281494Sandrew if (pmap_page_dirty(old_l3)) 1934281494Sandrew vm_page_dirty(m); 1935281494Sandrew if (old_l3 & ATTR_AF) 1936281494Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 1937281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1938281494Sandrew pmap_pvh_free(&m->md, pmap, va); 1939281494Sandrew } 1940281494Sandrew return (pmap_unuse_l3(pmap, va, l2e, free)); 1941281494Sandrew} 1942281494Sandrew 1943281494Sandrew/* 1944281494Sandrew * Remove the given range of addresses from the specified map. 1945281494Sandrew * 1946281494Sandrew * It is assumed that the start and end are properly 1947281494Sandrew * rounded to the page size. 1948281494Sandrew */ 1949281494Sandrewvoid 1950281494Sandrewpmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1951281494Sandrew{ 1952281494Sandrew struct rwlock *lock; 1953281494Sandrew vm_offset_t va, va_next; 1954297446Sandrew pd_entry_t *l0, *l1, *l2; 1955281494Sandrew pt_entry_t l3_paddr, *l3; 1956281494Sandrew struct spglist free; 1957281494Sandrew int anyvalid; 1958281494Sandrew 1959281494Sandrew /* 1960281494Sandrew * Perform an unsynchronized read. This is, however, safe. 1961281494Sandrew */ 1962281494Sandrew if (pmap->pm_stats.resident_count == 0) 1963281494Sandrew return; 1964281494Sandrew 1965281494Sandrew anyvalid = 0; 1966281494Sandrew SLIST_INIT(&free); 1967281494Sandrew 1968281494Sandrew PMAP_LOCK(pmap); 1969281494Sandrew 1970281494Sandrew lock = NULL; 1971281494Sandrew for (; sva < eva; sva = va_next) { 1972281494Sandrew 1973281494Sandrew if (pmap->pm_stats.resident_count == 0) 1974281494Sandrew break; 1975281494Sandrew 1976297446Sandrew l0 = pmap_l0(pmap, sva); 1977297446Sandrew if (pmap_load(l0) == 0) { 1978297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 1979297446Sandrew if (va_next < sva) 1980297446Sandrew va_next = eva; 1981297446Sandrew continue; 1982297446Sandrew } 1983297446Sandrew 1984297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 1985285045Sandrew if (pmap_load(l1) == 0) { 1986281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1987281494Sandrew if (va_next < sva) 1988281494Sandrew va_next = eva; 1989281494Sandrew continue; 1990281494Sandrew } 1991281494Sandrew 1992281494Sandrew /* 1993281494Sandrew * Calculate index for next page table. 1994281494Sandrew */ 1995281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1996281494Sandrew if (va_next < sva) 1997281494Sandrew va_next = eva; 1998281494Sandrew 1999281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 2000281494Sandrew if (l2 == NULL) 2001281494Sandrew continue; 2002281494Sandrew 2003288445Sandrew l3_paddr = pmap_load(l2); 2004281494Sandrew 2005281494Sandrew /* 2006281494Sandrew * Weed out invalid mappings. 2007281494Sandrew */ 2008281494Sandrew if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 2009281494Sandrew continue; 2010281494Sandrew 2011281494Sandrew /* 2012281494Sandrew * Limit our scan to either the end of the va represented 2013281494Sandrew * by the current page table page, or to the end of the 2014281494Sandrew * range being removed. 2015281494Sandrew */ 2016281494Sandrew if (va_next > eva) 2017281494Sandrew va_next = eva; 2018281494Sandrew 2019281494Sandrew va = va_next; 2020281494Sandrew for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2021281494Sandrew sva += L3_SIZE) { 2022281494Sandrew if (l3 == NULL) 2023281494Sandrew panic("l3 == NULL"); 2024285045Sandrew if (pmap_load(l3) == 0) { 2025281494Sandrew if (va != va_next) { 2026281494Sandrew pmap_invalidate_range(pmap, va, sva); 2027281494Sandrew va = va_next; 2028281494Sandrew } 2029281494Sandrew continue; 2030281494Sandrew } 2031281494Sandrew if (va == va_next) 2032281494Sandrew va = sva; 2033281494Sandrew if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 2034281494Sandrew &lock)) { 2035281494Sandrew sva += L3_SIZE; 2036281494Sandrew break; 2037281494Sandrew } 2038281494Sandrew } 2039281494Sandrew if (va != va_next) 2040281494Sandrew pmap_invalidate_range(pmap, va, sva); 2041281494Sandrew } 2042281494Sandrew if (lock != NULL) 2043281494Sandrew rw_wunlock(lock); 2044281494Sandrew if (anyvalid) 2045281494Sandrew pmap_invalidate_all(pmap); 2046281494Sandrew PMAP_UNLOCK(pmap); 2047281494Sandrew pmap_free_zero_pages(&free); 2048281494Sandrew} 2049281494Sandrew 2050281494Sandrew/* 2051281494Sandrew * Routine: pmap_remove_all 2052281494Sandrew * Function: 2053281494Sandrew * Removes this physical page from 2054281494Sandrew * all physical maps in which it resides. 2055281494Sandrew * Reflects back modify bits to the pager. 2056281494Sandrew * 2057281494Sandrew * Notes: 2058281494Sandrew * Original versions of this routine were very 2059281494Sandrew * inefficient because they iteratively called 2060281494Sandrew * pmap_remove (slow...) 2061281494Sandrew */ 2062281494Sandrew 2063281494Sandrewvoid 2064281494Sandrewpmap_remove_all(vm_page_t m) 2065281494Sandrew{ 2066281494Sandrew pv_entry_t pv; 2067281494Sandrew pmap_t pmap; 2068305879Sandrew struct rwlock *lock; 2069297446Sandrew pd_entry_t *pde, tpde; 2070297446Sandrew pt_entry_t *pte, tpte; 2071281494Sandrew struct spglist free; 2072305879Sandrew int lvl, md_gen; 2073281494Sandrew 2074281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2075281494Sandrew ("pmap_remove_all: page %p is not managed", m)); 2076281494Sandrew SLIST_INIT(&free); 2077305879Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2078305879Sandrewretry: 2079305879Sandrew rw_wlock(lock); 2080281494Sandrew while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2081281494Sandrew pmap = PV_PMAP(pv); 2082305879Sandrew if (!PMAP_TRYLOCK(pmap)) { 2083305879Sandrew md_gen = m->md.pv_gen; 2084305879Sandrew rw_wunlock(lock); 2085305879Sandrew PMAP_LOCK(pmap); 2086305879Sandrew rw_wlock(lock); 2087305879Sandrew if (md_gen != m->md.pv_gen) { 2088305879Sandrew rw_wunlock(lock); 2089305879Sandrew PMAP_UNLOCK(pmap); 2090305879Sandrew goto retry; 2091305879Sandrew } 2092305879Sandrew } 2093281494Sandrew pmap_resident_count_dec(pmap, 1); 2094297446Sandrew 2095297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 2096297446Sandrew KASSERT(pde != NULL, 2097297446Sandrew ("pmap_remove_all: no page directory entry found")); 2098297446Sandrew KASSERT(lvl == 2, 2099297446Sandrew ("pmap_remove_all: invalid pde level %d", lvl)); 2100297446Sandrew tpde = pmap_load(pde); 2101297446Sandrew 2102297446Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 2103297446Sandrew tpte = pmap_load(pte); 2104281494Sandrew if (pmap_is_current(pmap) && 2105297446Sandrew pmap_l3_valid_cacheable(tpte)) 2106281494Sandrew cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2107297446Sandrew pmap_load_clear(pte); 2108297446Sandrew PTE_SYNC(pte); 2109285212Sandrew pmap_invalidate_page(pmap, pv->pv_va); 2110297446Sandrew if (tpte & ATTR_SW_WIRED) 2111281494Sandrew pmap->pm_stats.wired_count--; 2112297446Sandrew if ((tpte & ATTR_AF) != 0) 2113281494Sandrew vm_page_aflag_set(m, PGA_REFERENCED); 2114281494Sandrew 2115281494Sandrew /* 2116281494Sandrew * Update the vm_page_t clean and reference bits. 2117281494Sandrew */ 2118297446Sandrew if (pmap_page_dirty(tpte)) 2119281494Sandrew vm_page_dirty(m); 2120297446Sandrew pmap_unuse_l3(pmap, pv->pv_va, tpde, &free); 2121281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2122281494Sandrew m->md.pv_gen++; 2123281494Sandrew free_pv_entry(pmap, pv); 2124281494Sandrew PMAP_UNLOCK(pmap); 2125281494Sandrew } 2126281494Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 2127305879Sandrew rw_wunlock(lock); 2128281494Sandrew pmap_free_zero_pages(&free); 2129281494Sandrew} 2130281494Sandrew 2131281494Sandrew/* 2132281494Sandrew * Set the physical protection on the 2133281494Sandrew * specified range of this map as requested. 2134281494Sandrew */ 2135281494Sandrewvoid 2136281494Sandrewpmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2137281494Sandrew{ 2138281494Sandrew vm_offset_t va, va_next; 2139297446Sandrew pd_entry_t *l0, *l1, *l2; 2140281494Sandrew pt_entry_t *l3p, l3; 2141281494Sandrew 2142281494Sandrew if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2143281494Sandrew pmap_remove(pmap, sva, eva); 2144281494Sandrew return; 2145281494Sandrew } 2146281494Sandrew 2147281494Sandrew if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) 2148281494Sandrew return; 2149281494Sandrew 2150281494Sandrew PMAP_LOCK(pmap); 2151281494Sandrew for (; sva < eva; sva = va_next) { 2152281494Sandrew 2153297446Sandrew l0 = pmap_l0(pmap, sva); 2154297446Sandrew if (pmap_load(l0) == 0) { 2155297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2156297446Sandrew if (va_next < sva) 2157297446Sandrew va_next = eva; 2158297446Sandrew continue; 2159297446Sandrew } 2160297446Sandrew 2161297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 2162285045Sandrew if (pmap_load(l1) == 0) { 2163281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2164281494Sandrew if (va_next < sva) 2165281494Sandrew va_next = eva; 2166281494Sandrew continue; 2167281494Sandrew } 2168281494Sandrew 2169281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2170281494Sandrew if (va_next < sva) 2171281494Sandrew va_next = eva; 2172281494Sandrew 2173281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 2174288445Sandrew if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) 2175281494Sandrew continue; 2176281494Sandrew 2177281494Sandrew if (va_next > eva) 2178281494Sandrew va_next = eva; 2179281494Sandrew 2180281494Sandrew va = va_next; 2181281494Sandrew for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 2182281494Sandrew sva += L3_SIZE) { 2183281494Sandrew l3 = pmap_load(l3p); 2184281494Sandrew if (pmap_l3_valid(l3)) { 2185281494Sandrew pmap_set(l3p, ATTR_AP(ATTR_AP_RO)); 2186281494Sandrew PTE_SYNC(l3p); 2187285212Sandrew /* XXX: Use pmap_invalidate_range */ 2188285212Sandrew pmap_invalidate_page(pmap, va); 2189281494Sandrew } 2190281494Sandrew } 2191281494Sandrew } 2192281494Sandrew PMAP_UNLOCK(pmap); 2193281494Sandrew 2194281494Sandrew /* TODO: Only invalidate entries we are touching */ 2195281494Sandrew pmap_invalidate_all(pmap); 2196281494Sandrew} 2197281494Sandrew 2198281494Sandrew/* 2199281494Sandrew * Insert the given physical page (p) at 2200281494Sandrew * the specified virtual address (v) in the 2201281494Sandrew * target physical map with the protection requested. 2202281494Sandrew * 2203281494Sandrew * If specified, the page will be wired down, meaning 2204281494Sandrew * that the related pte can not be reclaimed. 2205281494Sandrew * 2206281494Sandrew * NB: This is the only routine which MAY NOT lazy-evaluate 2207281494Sandrew * or lose information. That is, this routine must actually 2208281494Sandrew * insert this page into the given map NOW. 2209281494Sandrew */ 2210281494Sandrewint 2211281494Sandrewpmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2212281494Sandrew u_int flags, int8_t psind __unused) 2213281494Sandrew{ 2214281494Sandrew struct rwlock *lock; 2215297446Sandrew pd_entry_t *pde; 2216281494Sandrew pt_entry_t new_l3, orig_l3; 2217281494Sandrew pt_entry_t *l3; 2218281494Sandrew pv_entry_t pv; 2219297446Sandrew vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; 2220297446Sandrew vm_page_t mpte, om, l1_m, l2_m, l3_m; 2221281494Sandrew boolean_t nosleep; 2222297446Sandrew int lvl; 2223281494Sandrew 2224281494Sandrew va = trunc_page(va); 2225281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2226281494Sandrew VM_OBJECT_ASSERT_LOCKED(m->object); 2227281494Sandrew pa = VM_PAGE_TO_PHYS(m); 2228285537Sandrew new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 2229285537Sandrew L3_PAGE); 2230281494Sandrew if ((prot & VM_PROT_WRITE) == 0) 2231281494Sandrew new_l3 |= ATTR_AP(ATTR_AP_RO); 2232281494Sandrew if ((flags & PMAP_ENTER_WIRED) != 0) 2233281494Sandrew new_l3 |= ATTR_SW_WIRED; 2234281494Sandrew if ((va >> 63) == 0) 2235281494Sandrew new_l3 |= ATTR_AP(ATTR_AP_USER); 2236281494Sandrew 2237285212Sandrew CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 2238285212Sandrew 2239281494Sandrew mpte = NULL; 2240281494Sandrew 2241281494Sandrew lock = NULL; 2242281494Sandrew PMAP_LOCK(pmap); 2243281494Sandrew 2244281494Sandrew if (va < VM_MAXUSER_ADDRESS) { 2245281494Sandrew nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 2246281494Sandrew mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 2247281494Sandrew if (mpte == NULL && nosleep) { 2248285212Sandrew CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 2249281494Sandrew if (lock != NULL) 2250281494Sandrew rw_wunlock(lock); 2251281494Sandrew PMAP_UNLOCK(pmap); 2252281494Sandrew return (KERN_RESOURCE_SHORTAGE); 2253281494Sandrew } 2254297446Sandrew pde = pmap_pde(pmap, va, &lvl); 2255297446Sandrew KASSERT(pde != NULL, 2256297446Sandrew ("pmap_enter: Invalid page entry, va: 0x%lx", va)); 2257297446Sandrew KASSERT(lvl == 2, 2258297446Sandrew ("pmap_enter: Invalid level %d", lvl)); 2259297446Sandrew 2260297446Sandrew l3 = pmap_l2_to_l3(pde, va); 2261281494Sandrew } else { 2262297446Sandrew pde = pmap_pde(pmap, va, &lvl); 2263297446Sandrew /* 2264297446Sandrew * If we get a level 2 pde it must point to a level 3 entry 2265297446Sandrew * otherwise we will need to create the intermediate tables 2266297446Sandrew */ 2267297446Sandrew if (lvl < 2) { 2268297446Sandrew switch(lvl) { 2269297446Sandrew default: 2270297446Sandrew case -1: 2271297446Sandrew /* Get the l0 pde to update */ 2272297446Sandrew pde = pmap_l0(pmap, va); 2273297446Sandrew KASSERT(pde != NULL, ("...")); 2274281494Sandrew 2275297446Sandrew l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2276297446Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2277297446Sandrew VM_ALLOC_ZERO); 2278297446Sandrew if (l1_m == NULL) 2279297446Sandrew panic("pmap_enter: l1 pte_m == NULL"); 2280297446Sandrew if ((l1_m->flags & PG_ZERO) == 0) 2281297446Sandrew pmap_zero_page(l1_m); 2282297446Sandrew 2283297446Sandrew l1_pa = VM_PAGE_TO_PHYS(l1_m); 2284297446Sandrew pmap_load_store(pde, l1_pa | L0_TABLE); 2285297446Sandrew PTE_SYNC(pde); 2286297446Sandrew /* FALLTHROUGH */ 2287297446Sandrew case 0: 2288297446Sandrew /* Get the l1 pde to update */ 2289297446Sandrew pde = pmap_l1_to_l2(pde, va); 2290297446Sandrew KASSERT(pde != NULL, ("...")); 2291297446Sandrew 2292281494Sandrew l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2293281494Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2294281494Sandrew VM_ALLOC_ZERO); 2295281494Sandrew if (l2_m == NULL) 2296281494Sandrew panic("pmap_enter: l2 pte_m == NULL"); 2297281494Sandrew if ((l2_m->flags & PG_ZERO) == 0) 2298281494Sandrew pmap_zero_page(l2_m); 2299281494Sandrew 2300281494Sandrew l2_pa = VM_PAGE_TO_PHYS(l2_m); 2301297446Sandrew pmap_load_store(pde, l2_pa | L1_TABLE); 2302297446Sandrew PTE_SYNC(pde); 2303297446Sandrew /* FALLTHROUGH */ 2304297446Sandrew case 1: 2305297446Sandrew /* Get the l2 pde to update */ 2306297446Sandrew pde = pmap_l1_to_l2(pde, va); 2307281494Sandrew 2308297446Sandrew l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2309297446Sandrew VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2310297446Sandrew VM_ALLOC_ZERO); 2311297446Sandrew if (l3_m == NULL) 2312297446Sandrew panic("pmap_enter: l3 pte_m == NULL"); 2313297446Sandrew if ((l3_m->flags & PG_ZERO) == 0) 2314297446Sandrew pmap_zero_page(l3_m); 2315281494Sandrew 2316297446Sandrew l3_pa = VM_PAGE_TO_PHYS(l3_m); 2317297446Sandrew pmap_load_store(pde, l3_pa | L2_TABLE); 2318297446Sandrew PTE_SYNC(pde); 2319297446Sandrew break; 2320297446Sandrew } 2321281494Sandrew } 2322297446Sandrew l3 = pmap_l2_to_l3(pde, va); 2323285212Sandrew pmap_invalidate_page(pmap, va); 2324281494Sandrew } 2325281494Sandrew 2326281494Sandrew om = NULL; 2327281494Sandrew orig_l3 = pmap_load(l3); 2328281494Sandrew opa = orig_l3 & ~ATTR_MASK; 2329281494Sandrew 2330281494Sandrew /* 2331281494Sandrew * Is the specified virtual address already mapped? 2332281494Sandrew */ 2333281494Sandrew if (pmap_l3_valid(orig_l3)) { 2334281494Sandrew /* 2335281494Sandrew * Wiring change, just update stats. We don't worry about 2336281494Sandrew * wiring PT pages as they remain resident as long as there 2337281494Sandrew * are valid mappings in them. Hence, if a user page is wired, 2338281494Sandrew * the PT page will be also. 2339281494Sandrew */ 2340281494Sandrew if ((flags & PMAP_ENTER_WIRED) != 0 && 2341281494Sandrew (orig_l3 & ATTR_SW_WIRED) == 0) 2342281494Sandrew pmap->pm_stats.wired_count++; 2343281494Sandrew else if ((flags & PMAP_ENTER_WIRED) == 0 && 2344281494Sandrew (orig_l3 & ATTR_SW_WIRED) != 0) 2345281494Sandrew pmap->pm_stats.wired_count--; 2346281494Sandrew 2347281494Sandrew /* 2348281494Sandrew * Remove the extra PT page reference. 2349281494Sandrew */ 2350281494Sandrew if (mpte != NULL) { 2351281494Sandrew mpte->wire_count--; 2352281494Sandrew KASSERT(mpte->wire_count > 0, 2353281494Sandrew ("pmap_enter: missing reference to page table page," 2354281494Sandrew " va: 0x%lx", va)); 2355281494Sandrew } 2356281494Sandrew 2357281494Sandrew /* 2358281494Sandrew * Has the physical page changed? 2359281494Sandrew */ 2360281494Sandrew if (opa == pa) { 2361281494Sandrew /* 2362281494Sandrew * No, might be a protection or wiring change. 2363281494Sandrew */ 2364281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2365281494Sandrew new_l3 |= ATTR_SW_MANAGED; 2366281494Sandrew if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 2367281494Sandrew ATTR_AP(ATTR_AP_RW)) { 2368281494Sandrew vm_page_aflag_set(m, PGA_WRITEABLE); 2369281494Sandrew } 2370281494Sandrew } 2371281494Sandrew goto validate; 2372281494Sandrew } 2373281494Sandrew 2374281494Sandrew /* Flush the cache, there might be uncommitted data in it */ 2375281494Sandrew if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 2376281494Sandrew cpu_dcache_wb_range(va, L3_SIZE); 2377281494Sandrew } else { 2378281494Sandrew /* 2379281494Sandrew * Increment the counters. 2380281494Sandrew */ 2381281494Sandrew if ((new_l3 & ATTR_SW_WIRED) != 0) 2382281494Sandrew pmap->pm_stats.wired_count++; 2383281494Sandrew pmap_resident_count_inc(pmap, 1); 2384281494Sandrew } 2385281494Sandrew /* 2386281494Sandrew * Enter on the PV list if part of our managed memory. 2387281494Sandrew */ 2388281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0) { 2389281494Sandrew new_l3 |= ATTR_SW_MANAGED; 2390281494Sandrew pv = get_pv_entry(pmap, &lock); 2391281494Sandrew pv->pv_va = va; 2392281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 2393281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2394281494Sandrew m->md.pv_gen++; 2395281494Sandrew if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 2396281494Sandrew vm_page_aflag_set(m, PGA_WRITEABLE); 2397281494Sandrew } 2398281494Sandrew 2399281494Sandrew /* 2400281494Sandrew * Update the L3 entry. 2401281494Sandrew */ 2402281494Sandrew if (orig_l3 != 0) { 2403281494Sandrewvalidate: 2404281494Sandrew orig_l3 = pmap_load_store(l3, new_l3); 2405281494Sandrew PTE_SYNC(l3); 2406281494Sandrew opa = orig_l3 & ~ATTR_MASK; 2407281494Sandrew 2408281494Sandrew if (opa != pa) { 2409281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2410281494Sandrew om = PHYS_TO_VM_PAGE(opa); 2411281494Sandrew if (pmap_page_dirty(orig_l3)) 2412281494Sandrew vm_page_dirty(om); 2413281494Sandrew if ((orig_l3 & ATTR_AF) != 0) 2414281494Sandrew vm_page_aflag_set(om, PGA_REFERENCED); 2415281494Sandrew CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 2416281494Sandrew pmap_pvh_free(&om->md, pmap, va); 2417281494Sandrew } 2418281494Sandrew } else if (pmap_page_dirty(orig_l3)) { 2419281494Sandrew if ((orig_l3 & ATTR_SW_MANAGED) != 0) 2420281494Sandrew vm_page_dirty(m); 2421281494Sandrew } 2422281494Sandrew } else { 2423281494Sandrew pmap_load_store(l3, new_l3); 2424281494Sandrew PTE_SYNC(l3); 2425281494Sandrew } 2426285212Sandrew pmap_invalidate_page(pmap, va); 2427281494Sandrew if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 2428281494Sandrew cpu_icache_sync_range(va, PAGE_SIZE); 2429281494Sandrew 2430281494Sandrew if (lock != NULL) 2431281494Sandrew rw_wunlock(lock); 2432281494Sandrew PMAP_UNLOCK(pmap); 2433281494Sandrew return (KERN_SUCCESS); 2434281494Sandrew} 2435281494Sandrew 2436281494Sandrew/* 2437281494Sandrew * Maps a sequence of resident pages belonging to the same object. 2438281494Sandrew * The sequence begins with the given page m_start. This page is 2439281494Sandrew * mapped at the given virtual address start. Each subsequent page is 2440281494Sandrew * mapped at a virtual address that is offset from start by the same 2441281494Sandrew * amount as the page is offset from m_start within the object. The 2442281494Sandrew * last page in the sequence is the page with the largest offset from 2443281494Sandrew * m_start that can be mapped at a virtual address less than the given 2444281494Sandrew * virtual address end. Not every virtual page between start and end 2445281494Sandrew * is mapped; only those for which a resident page exists with the 2446281494Sandrew * corresponding offset from m_start are mapped. 2447281494Sandrew */ 2448281494Sandrewvoid 2449281494Sandrewpmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2450281494Sandrew vm_page_t m_start, vm_prot_t prot) 2451281494Sandrew{ 2452281494Sandrew struct rwlock *lock; 2453281494Sandrew vm_offset_t va; 2454281494Sandrew vm_page_t m, mpte; 2455281494Sandrew vm_pindex_t diff, psize; 2456281494Sandrew 2457281494Sandrew VM_OBJECT_ASSERT_LOCKED(m_start->object); 2458281494Sandrew 2459281494Sandrew psize = atop(end - start); 2460281494Sandrew mpte = NULL; 2461281494Sandrew m = m_start; 2462281494Sandrew lock = NULL; 2463281494Sandrew PMAP_LOCK(pmap); 2464281494Sandrew while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2465281494Sandrew va = start + ptoa(diff); 2466281494Sandrew mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 2467281494Sandrew m = TAILQ_NEXT(m, listq); 2468281494Sandrew } 2469281494Sandrew if (lock != NULL) 2470281494Sandrew rw_wunlock(lock); 2471281494Sandrew PMAP_UNLOCK(pmap); 2472281494Sandrew} 2473281494Sandrew 2474281494Sandrew/* 2475281494Sandrew * this code makes some *MAJOR* assumptions: 2476281494Sandrew * 1. Current pmap & pmap exists. 2477281494Sandrew * 2. Not wired. 2478281494Sandrew * 3. Read access. 2479281494Sandrew * 4. No page table pages. 2480281494Sandrew * but is *MUCH* faster than pmap_enter... 2481281494Sandrew */ 2482281494Sandrew 2483281494Sandrewvoid 2484281494Sandrewpmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2485281494Sandrew{ 2486281494Sandrew struct rwlock *lock; 2487281494Sandrew 2488281494Sandrew lock = NULL; 2489281494Sandrew PMAP_LOCK(pmap); 2490281494Sandrew (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 2491281494Sandrew if (lock != NULL) 2492281494Sandrew rw_wunlock(lock); 2493281494Sandrew PMAP_UNLOCK(pmap); 2494281494Sandrew} 2495281494Sandrew 2496281494Sandrewstatic vm_page_t 2497281494Sandrewpmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2498281494Sandrew vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 2499281494Sandrew{ 2500281494Sandrew struct spglist free; 2501297446Sandrew pd_entry_t *pde; 2502281494Sandrew pt_entry_t *l3; 2503281494Sandrew vm_paddr_t pa; 2504297446Sandrew int lvl; 2505281494Sandrew 2506281494Sandrew KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2507281494Sandrew (m->oflags & VPO_UNMANAGED) != 0, 2508281494Sandrew ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2509281494Sandrew PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2510281494Sandrew 2511285212Sandrew CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 2512281494Sandrew /* 2513281494Sandrew * In the case that a page table page is not 2514281494Sandrew * resident, we are creating it here. 2515281494Sandrew */ 2516281494Sandrew if (va < VM_MAXUSER_ADDRESS) { 2517281494Sandrew vm_pindex_t l2pindex; 2518281494Sandrew 2519281494Sandrew /* 2520281494Sandrew * Calculate pagetable page index 2521281494Sandrew */ 2522281494Sandrew l2pindex = pmap_l2_pindex(va); 2523281494Sandrew if (mpte && (mpte->pindex == l2pindex)) { 2524281494Sandrew mpte->wire_count++; 2525281494Sandrew } else { 2526281494Sandrew /* 2527281494Sandrew * Get the l2 entry 2528281494Sandrew */ 2529297446Sandrew pde = pmap_pde(pmap, va, &lvl); 2530281494Sandrew 2531281494Sandrew /* 2532281494Sandrew * If the page table page is mapped, we just increment 2533281494Sandrew * the hold count, and activate it. Otherwise, we 2534281494Sandrew * attempt to allocate a page table page. If this 2535281494Sandrew * attempt fails, we don't retry. Instead, we give up. 2536281494Sandrew */ 2537297446Sandrew if (lvl == 2 && pmap_load(pde) != 0) { 2538285045Sandrew mpte = 2539297446Sandrew PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); 2540281494Sandrew mpte->wire_count++; 2541281494Sandrew } else { 2542281494Sandrew /* 2543281494Sandrew * Pass NULL instead of the PV list lock 2544281494Sandrew * pointer, because we don't intend to sleep. 2545281494Sandrew */ 2546281494Sandrew mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 2547281494Sandrew if (mpte == NULL) 2548281494Sandrew return (mpte); 2549281494Sandrew } 2550281494Sandrew } 2551281494Sandrew l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 2552281494Sandrew l3 = &l3[pmap_l3_index(va)]; 2553281494Sandrew } else { 2554281494Sandrew mpte = NULL; 2555297446Sandrew pde = pmap_pde(kernel_pmap, va, &lvl); 2556297446Sandrew KASSERT(pde != NULL, 2557297446Sandrew ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", 2558297446Sandrew va)); 2559297446Sandrew KASSERT(lvl == 2, 2560297446Sandrew ("pmap_enter_quick_locked: Invalid level %d", lvl)); 2561297446Sandrew l3 = pmap_l2_to_l3(pde, va); 2562281494Sandrew } 2563297446Sandrew 2564285212Sandrew if (pmap_load(l3) != 0) { 2565281494Sandrew if (mpte != NULL) { 2566281494Sandrew mpte->wire_count--; 2567281494Sandrew mpte = NULL; 2568281494Sandrew } 2569281494Sandrew return (mpte); 2570281494Sandrew } 2571281494Sandrew 2572281494Sandrew /* 2573281494Sandrew * Enter on the PV list if part of our managed memory. 2574281494Sandrew */ 2575281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0 && 2576281494Sandrew !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 2577281494Sandrew if (mpte != NULL) { 2578281494Sandrew SLIST_INIT(&free); 2579281494Sandrew if (pmap_unwire_l3(pmap, va, mpte, &free)) { 2580281494Sandrew pmap_invalidate_page(pmap, va); 2581281494Sandrew pmap_free_zero_pages(&free); 2582281494Sandrew } 2583281494Sandrew mpte = NULL; 2584281494Sandrew } 2585281494Sandrew return (mpte); 2586281494Sandrew } 2587281494Sandrew 2588281494Sandrew /* 2589281494Sandrew * Increment counters 2590281494Sandrew */ 2591281494Sandrew pmap_resident_count_inc(pmap, 1); 2592281494Sandrew 2593285537Sandrew pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 2594281494Sandrew ATTR_AP(ATTR_AP_RW) | L3_PAGE; 2595281494Sandrew 2596281494Sandrew /* 2597281494Sandrew * Now validate mapping with RO protection 2598281494Sandrew */ 2599281494Sandrew if ((m->oflags & VPO_UNMANAGED) == 0) 2600281494Sandrew pa |= ATTR_SW_MANAGED; 2601281494Sandrew pmap_load_store(l3, pa); 2602281494Sandrew PTE_SYNC(l3); 2603281494Sandrew pmap_invalidate_page(pmap, va); 2604281494Sandrew return (mpte); 2605281494Sandrew} 2606281494Sandrew 2607281494Sandrew/* 2608281494Sandrew * This code maps large physical mmap regions into the 2609281494Sandrew * processor address space. Note that some shortcuts 2610281494Sandrew * are taken, but the code works. 2611281494Sandrew */ 2612281494Sandrewvoid 2613281494Sandrewpmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2614281494Sandrew vm_pindex_t pindex, vm_size_t size) 2615281494Sandrew{ 2616281494Sandrew 2617281846Sandrew VM_OBJECT_ASSERT_WLOCKED(object); 2618281846Sandrew KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2619281846Sandrew ("pmap_object_init_pt: non-device object")); 2620281494Sandrew} 2621281494Sandrew 2622281494Sandrew/* 2623281494Sandrew * Clear the wired attribute from the mappings for the specified range of 2624281494Sandrew * addresses in the given pmap. Every valid mapping within that range 2625281494Sandrew * must have the wired attribute set. In contrast, invalid mappings 2626281494Sandrew * cannot have the wired attribute set, so they are ignored. 2627281494Sandrew * 2628281494Sandrew * The wired attribute of the page table entry is not a hardware feature, 2629281494Sandrew * so there is no need to invalidate any TLB entries. 2630281494Sandrew */ 2631281494Sandrewvoid 2632281494Sandrewpmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2633281494Sandrew{ 2634281494Sandrew vm_offset_t va_next; 2635297446Sandrew pd_entry_t *l0, *l1, *l2; 2636281494Sandrew pt_entry_t *l3; 2637281494Sandrew 2638281494Sandrew PMAP_LOCK(pmap); 2639281494Sandrew for (; sva < eva; sva = va_next) { 2640297446Sandrew l0 = pmap_l0(pmap, sva); 2641297446Sandrew if (pmap_load(l0) == 0) { 2642297446Sandrew va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2643297446Sandrew if (va_next < sva) 2644297446Sandrew va_next = eva; 2645297446Sandrew continue; 2646297446Sandrew } 2647297446Sandrew 2648297446Sandrew l1 = pmap_l0_to_l1(l0, sva); 2649285045Sandrew if (pmap_load(l1) == 0) { 2650281494Sandrew va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2651281494Sandrew if (va_next < sva) 2652281494Sandrew va_next = eva; 2653281494Sandrew continue; 2654281494Sandrew } 2655281494Sandrew 2656281494Sandrew va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2657281494Sandrew if (va_next < sva) 2658281494Sandrew va_next = eva; 2659281494Sandrew 2660281494Sandrew l2 = pmap_l1_to_l2(l1, sva); 2661285045Sandrew if (pmap_load(l2) == 0) 2662281494Sandrew continue; 2663281494Sandrew 2664281494Sandrew if (va_next > eva) 2665281494Sandrew va_next = eva; 2666281494Sandrew for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2667281494Sandrew sva += L3_SIZE) { 2668285045Sandrew if (pmap_load(l3) == 0) 2669281494Sandrew continue; 2670285045Sandrew if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) 2671281494Sandrew panic("pmap_unwire: l3 %#jx is missing " 2672288445Sandrew "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); 2673281494Sandrew 2674281494Sandrew /* 2675281494Sandrew * PG_W must be cleared atomically. Although the pmap 2676281494Sandrew * lock synchronizes access to PG_W, another processor 2677281494Sandrew * could be setting PG_M and/or PG_A concurrently. 2678281494Sandrew */ 2679281494Sandrew atomic_clear_long(l3, ATTR_SW_WIRED); 2680281494Sandrew pmap->pm_stats.wired_count--; 2681281494Sandrew } 2682281494Sandrew } 2683281494Sandrew PMAP_UNLOCK(pmap); 2684281494Sandrew} 2685281494Sandrew 2686281494Sandrew/* 2687281494Sandrew * Copy the range specified by src_addr/len 2688281494Sandrew * from the source map to the range dst_addr/len 2689281494Sandrew * in the destination map. 2690281494Sandrew * 2691281494Sandrew * This routine is only advisory and need not do anything. 2692281494Sandrew */ 2693281494Sandrew 2694281494Sandrewvoid 2695281494Sandrewpmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2696281494Sandrew vm_offset_t src_addr) 2697281494Sandrew{ 2698281494Sandrew} 2699281494Sandrew 2700281494Sandrew/* 2701281494Sandrew * pmap_zero_page zeros the specified hardware page by mapping 2702281494Sandrew * the page into KVM and using bzero to clear its contents. 2703281494Sandrew */ 2704281494Sandrewvoid 2705281494Sandrewpmap_zero_page(vm_page_t m) 2706281494Sandrew{ 2707281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2708281494Sandrew 2709281494Sandrew pagezero((void *)va); 2710281494Sandrew} 2711281494Sandrew 2712281494Sandrew/* 2713305531Sandrew * pmap_zero_page_area zeros the specified hardware page by mapping 2714281494Sandrew * the page into KVM and using bzero to clear its contents. 2715281494Sandrew * 2716281494Sandrew * off and size may not cover an area beyond a single hardware page. 2717281494Sandrew */ 2718281494Sandrewvoid 2719281494Sandrewpmap_zero_page_area(vm_page_t m, int off, int size) 2720281494Sandrew{ 2721281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2722281494Sandrew 2723281494Sandrew if (off == 0 && size == PAGE_SIZE) 2724281494Sandrew pagezero((void *)va); 2725281494Sandrew else 2726281494Sandrew bzero((char *)va + off, size); 2727281494Sandrew} 2728281494Sandrew 2729281494Sandrew/* 2730305531Sandrew * pmap_zero_page_idle zeros the specified hardware page by mapping 2731281494Sandrew * the page into KVM and using bzero to clear its contents. This 2732281494Sandrew * is intended to be called from the vm_pagezero process only and 2733281494Sandrew * outside of Giant. 2734281494Sandrew */ 2735281494Sandrewvoid 2736281494Sandrewpmap_zero_page_idle(vm_page_t m) 2737281494Sandrew{ 2738281494Sandrew vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2739281494Sandrew 2740281494Sandrew pagezero((void *)va); 2741281494Sandrew} 2742281494Sandrew 2743281494Sandrew/* 2744281494Sandrew * pmap_copy_page copies the specified (machine independent) 2745281494Sandrew * page by mapping the page into virtual memory and using 2746281494Sandrew * bcopy to copy the page, one machine dependent page at a 2747281494Sandrew * time. 2748281494Sandrew */ 2749281494Sandrewvoid 2750281494Sandrewpmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2751281494Sandrew{ 2752281494Sandrew vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2753281494Sandrew vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2754281494Sandrew 2755281494Sandrew pagecopy((void *)src, (void *)dst); 2756281494Sandrew} 2757281494Sandrew 2758281494Sandrewint unmapped_buf_allowed = 1; 2759281494Sandrew 2760281494Sandrewvoid 2761281494Sandrewpmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2762281494Sandrew vm_offset_t b_offset, int xfersize) 2763281494Sandrew{ 2764281494Sandrew void *a_cp, *b_cp; 2765281494Sandrew vm_page_t m_a, m_b; 2766281494Sandrew vm_paddr_t p_a, p_b; 2767281494Sandrew vm_offset_t a_pg_offset, b_pg_offset; 2768281494Sandrew int cnt; 2769281494Sandrew 2770281494Sandrew while (xfersize > 0) { 2771281494Sandrew a_pg_offset = a_offset & PAGE_MASK; 2772281494Sandrew m_a = ma[a_offset >> PAGE_SHIFT]; 2773281494Sandrew p_a = m_a->phys_addr; 2774281494Sandrew b_pg_offset = b_offset & PAGE_MASK; 2775281494Sandrew m_b = mb[b_offset >> PAGE_SHIFT]; 2776281494Sandrew p_b = m_b->phys_addr; 2777281494Sandrew cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2778281494Sandrew cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2779281494Sandrew if (__predict_false(!PHYS_IN_DMAP(p_a))) { 2780281494Sandrew panic("!DMAP a %lx", p_a); 2781281494Sandrew } else { 2782281494Sandrew a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 2783281494Sandrew } 2784281494Sandrew if (__predict_false(!PHYS_IN_DMAP(p_b))) { 2785281494Sandrew panic("!DMAP b %lx", p_b); 2786281494Sandrew } else { 2787281494Sandrew b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 2788281494Sandrew } 2789281494Sandrew bcopy(a_cp, b_cp, cnt); 2790281494Sandrew a_offset += cnt; 2791281494Sandrew b_offset += cnt; 2792281494Sandrew xfersize -= cnt; 2793281494Sandrew } 2794281494Sandrew} 2795281494Sandrew 2796286296Sjahvm_offset_t 2797286296Sjahpmap_quick_enter_page(vm_page_t m) 2798286296Sjah{ 2799286296Sjah 2800286296Sjah return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 2801286296Sjah} 2802286296Sjah 2803286296Sjahvoid 2804286296Sjahpmap_quick_remove_page(vm_offset_t addr) 2805286296Sjah{ 2806286296Sjah} 2807286296Sjah 2808281494Sandrew/* 2809281494Sandrew * Returns true if the pmap's pv is one of the first 2810281494Sandrew * 16 pvs linked to from this page. This count may 2811281494Sandrew * be changed upwards or downwards in the future; it 2812281494Sandrew * is only necessary that true be returned for a small 2813281494Sandrew * subset of pmaps for proper page aging. 2814281494Sandrew */ 2815281494Sandrewboolean_t 2816281494Sandrewpmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2817281494Sandrew{ 2818281494Sandrew struct rwlock *lock; 2819281494Sandrew pv_entry_t pv; 2820281494Sandrew int loops = 0; 2821281494Sandrew boolean_t rv; 2822281494Sandrew 2823281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2824281494Sandrew ("pmap_page_exists_quick: page %p is not managed", m)); 2825281494Sandrew rv = FALSE; 2826281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2827281494Sandrew rw_rlock(lock); 2828281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2829281494Sandrew if (PV_PMAP(pv) == pmap) { 2830281494Sandrew rv = TRUE; 2831281494Sandrew break; 2832281494Sandrew } 2833281494Sandrew loops++; 2834281494Sandrew if (loops >= 16) 2835281494Sandrew break; 2836281494Sandrew } 2837281494Sandrew rw_runlock(lock); 2838281494Sandrew return (rv); 2839281494Sandrew} 2840281494Sandrew 2841281494Sandrew/* 2842281494Sandrew * pmap_page_wired_mappings: 2843281494Sandrew * 2844281494Sandrew * Return the number of managed mappings to the given physical page 2845281494Sandrew * that are wired. 2846281494Sandrew */ 2847281494Sandrewint 2848281494Sandrewpmap_page_wired_mappings(vm_page_t m) 2849281494Sandrew{ 2850281494Sandrew struct rwlock *lock; 2851281494Sandrew pmap_t pmap; 2852297446Sandrew pt_entry_t *pte; 2853281494Sandrew pv_entry_t pv; 2854297446Sandrew int count, lvl, md_gen; 2855281494Sandrew 2856281494Sandrew if ((m->oflags & VPO_UNMANAGED) != 0) 2857281494Sandrew return (0); 2858281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2859281494Sandrew rw_rlock(lock); 2860281494Sandrewrestart: 2861281494Sandrew count = 0; 2862281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2863281494Sandrew pmap = PV_PMAP(pv); 2864281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 2865281494Sandrew md_gen = m->md.pv_gen; 2866281494Sandrew rw_runlock(lock); 2867281494Sandrew PMAP_LOCK(pmap); 2868281494Sandrew rw_rlock(lock); 2869281494Sandrew if (md_gen != m->md.pv_gen) { 2870281494Sandrew PMAP_UNLOCK(pmap); 2871281494Sandrew goto restart; 2872281494Sandrew } 2873281494Sandrew } 2874297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 2875297446Sandrew if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) 2876281494Sandrew count++; 2877281494Sandrew PMAP_UNLOCK(pmap); 2878281494Sandrew } 2879281494Sandrew rw_runlock(lock); 2880281494Sandrew return (count); 2881281494Sandrew} 2882281494Sandrew 2883281494Sandrew/* 2884281494Sandrew * Destroy all managed, non-wired mappings in the given user-space 2885281494Sandrew * pmap. This pmap cannot be active on any processor besides the 2886281494Sandrew * caller. 2887305531Sandrew * 2888281494Sandrew * This function cannot be applied to the kernel pmap. Moreover, it 2889281494Sandrew * is not intended for general use. It is only to be used during 2890281494Sandrew * process termination. Consequently, it can be implemented in ways 2891281494Sandrew * that make it faster than pmap_remove(). First, it can more quickly 2892281494Sandrew * destroy mappings by iterating over the pmap's collection of PV 2893281494Sandrew * entries, rather than searching the page table. Second, it doesn't 2894281494Sandrew * have to test and clear the page table entries atomically, because 2895281494Sandrew * no processor is currently accessing the user address space. In 2896281494Sandrew * particular, a page table entry's dirty bit won't change state once 2897281494Sandrew * this function starts. 2898281494Sandrew */ 2899281494Sandrewvoid 2900281494Sandrewpmap_remove_pages(pmap_t pmap) 2901281494Sandrew{ 2902297446Sandrew pd_entry_t *pde; 2903297446Sandrew pt_entry_t *pte, tpte; 2904281494Sandrew struct spglist free; 2905281494Sandrew vm_page_t m; 2906281494Sandrew pv_entry_t pv; 2907281494Sandrew struct pv_chunk *pc, *npc; 2908281494Sandrew struct rwlock *lock; 2909281494Sandrew int64_t bit; 2910281494Sandrew uint64_t inuse, bitmask; 2911297446Sandrew int allfree, field, freed, idx, lvl; 2912281494Sandrew vm_paddr_t pa; 2913281494Sandrew 2914281494Sandrew lock = NULL; 2915281494Sandrew 2916281494Sandrew SLIST_INIT(&free); 2917281494Sandrew PMAP_LOCK(pmap); 2918281494Sandrew TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2919281494Sandrew allfree = 1; 2920281494Sandrew freed = 0; 2921281494Sandrew for (field = 0; field < _NPCM; field++) { 2922281494Sandrew inuse = ~pc->pc_map[field] & pc_freemask[field]; 2923281494Sandrew while (inuse != 0) { 2924281494Sandrew bit = ffsl(inuse) - 1; 2925281494Sandrew bitmask = 1UL << bit; 2926281494Sandrew idx = field * 64 + bit; 2927281494Sandrew pv = &pc->pc_pventry[idx]; 2928281494Sandrew inuse &= ~bitmask; 2929281494Sandrew 2930297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 2931297446Sandrew KASSERT(pde != NULL, 2932297446Sandrew ("Attempting to remove an unmapped page")); 2933297446Sandrew KASSERT(lvl == 2, 2934297446Sandrew ("Invalid page directory level: %d", lvl)); 2935281494Sandrew 2936297446Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 2937297446Sandrew KASSERT(pte != NULL, 2938297446Sandrew ("Attempting to remove an unmapped page")); 2939297446Sandrew 2940297446Sandrew tpte = pmap_load(pte); 2941297446Sandrew 2942281494Sandrew/* 2943281494Sandrew * We cannot remove wired pages from a process' mapping at this time 2944281494Sandrew */ 2945297446Sandrew if (tpte & ATTR_SW_WIRED) { 2946281494Sandrew allfree = 0; 2947281494Sandrew continue; 2948281494Sandrew } 2949281494Sandrew 2950297446Sandrew pa = tpte & ~ATTR_MASK; 2951281494Sandrew 2952281494Sandrew m = PHYS_TO_VM_PAGE(pa); 2953281494Sandrew KASSERT(m->phys_addr == pa, 2954281494Sandrew ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2955281494Sandrew m, (uintmax_t)m->phys_addr, 2956297446Sandrew (uintmax_t)tpte)); 2957281494Sandrew 2958281494Sandrew KASSERT((m->flags & PG_FICTITIOUS) != 0 || 2959281494Sandrew m < &vm_page_array[vm_page_array_size], 2960297446Sandrew ("pmap_remove_pages: bad pte %#jx", 2961297446Sandrew (uintmax_t)tpte)); 2962281494Sandrew 2963297446Sandrew /* XXX: assumes tpte is level 3 */ 2964281494Sandrew if (pmap_is_current(pmap) && 2965297446Sandrew pmap_l3_valid_cacheable(tpte)) 2966281494Sandrew cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2967297446Sandrew pmap_load_clear(pte); 2968297446Sandrew PTE_SYNC(pte); 2969285212Sandrew pmap_invalidate_page(pmap, pv->pv_va); 2970281494Sandrew 2971281494Sandrew /* 2972281494Sandrew * Update the vm_page_t clean/reference bits. 2973281494Sandrew */ 2974297446Sandrew if ((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 2975281494Sandrew vm_page_dirty(m); 2976281494Sandrew 2977281494Sandrew CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 2978281494Sandrew 2979281494Sandrew /* Mark free */ 2980281494Sandrew pc->pc_map[field] |= bitmask; 2981281494Sandrew 2982281494Sandrew pmap_resident_count_dec(pmap, 1); 2983281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2984281494Sandrew m->md.pv_gen++; 2985281494Sandrew 2986297446Sandrew pmap_unuse_l3(pmap, pv->pv_va, pmap_load(pde), 2987297446Sandrew &free); 2988281494Sandrew freed++; 2989281494Sandrew } 2990281494Sandrew } 2991281494Sandrew PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 2992281494Sandrew PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 2993281494Sandrew PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 2994281494Sandrew if (allfree) { 2995281494Sandrew TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2996281494Sandrew free_pv_chunk(pc); 2997281494Sandrew } 2998281494Sandrew } 2999281494Sandrew pmap_invalidate_all(pmap); 3000281494Sandrew if (lock != NULL) 3001281494Sandrew rw_wunlock(lock); 3002281494Sandrew PMAP_UNLOCK(pmap); 3003281494Sandrew pmap_free_zero_pages(&free); 3004281494Sandrew} 3005281494Sandrew 3006281494Sandrew/* 3007281494Sandrew * This is used to check if a page has been accessed or modified. As we 3008281494Sandrew * don't have a bit to see if it has been modified we have to assume it 3009281494Sandrew * has been if the page is read/write. 3010281494Sandrew */ 3011281494Sandrewstatic boolean_t 3012281494Sandrewpmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 3013281494Sandrew{ 3014281494Sandrew struct rwlock *lock; 3015281494Sandrew pv_entry_t pv; 3016297446Sandrew pt_entry_t *pte, mask, value; 3017281494Sandrew pmap_t pmap; 3018297446Sandrew int lvl, md_gen; 3019281494Sandrew boolean_t rv; 3020281494Sandrew 3021281494Sandrew rv = FALSE; 3022281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3023281494Sandrew rw_rlock(lock); 3024281494Sandrewrestart: 3025281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3026281494Sandrew pmap = PV_PMAP(pv); 3027281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3028281494Sandrew md_gen = m->md.pv_gen; 3029281494Sandrew rw_runlock(lock); 3030281494Sandrew PMAP_LOCK(pmap); 3031281494Sandrew rw_rlock(lock); 3032281494Sandrew if (md_gen != m->md.pv_gen) { 3033281494Sandrew PMAP_UNLOCK(pmap); 3034281494Sandrew goto restart; 3035281494Sandrew } 3036281494Sandrew } 3037297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3038297446Sandrew KASSERT(lvl == 3, 3039297446Sandrew ("pmap_page_test_mappings: Invalid level %d", lvl)); 3040281494Sandrew mask = 0; 3041281494Sandrew value = 0; 3042281494Sandrew if (modified) { 3043281494Sandrew mask |= ATTR_AP_RW_BIT; 3044281494Sandrew value |= ATTR_AP(ATTR_AP_RW); 3045281494Sandrew } 3046281494Sandrew if (accessed) { 3047281494Sandrew mask |= ATTR_AF | ATTR_DESCR_MASK; 3048281494Sandrew value |= ATTR_AF | L3_PAGE; 3049281494Sandrew } 3050297446Sandrew rv = (pmap_load(pte) & mask) == value; 3051281494Sandrew PMAP_UNLOCK(pmap); 3052281494Sandrew if (rv) 3053281494Sandrew goto out; 3054281494Sandrew } 3055281494Sandrewout: 3056281494Sandrew rw_runlock(lock); 3057281494Sandrew return (rv); 3058281494Sandrew} 3059281494Sandrew 3060281494Sandrew/* 3061281494Sandrew * pmap_is_modified: 3062281494Sandrew * 3063281494Sandrew * Return whether or not the specified physical page was modified 3064281494Sandrew * in any physical maps. 3065281494Sandrew */ 3066281494Sandrewboolean_t 3067281494Sandrewpmap_is_modified(vm_page_t m) 3068281494Sandrew{ 3069281494Sandrew 3070281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3071281494Sandrew ("pmap_is_modified: page %p is not managed", m)); 3072281494Sandrew 3073281494Sandrew /* 3074281494Sandrew * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3075281494Sandrew * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3076281494Sandrew * is clear, no PTEs can have PG_M set. 3077281494Sandrew */ 3078281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 3079281494Sandrew if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3080281494Sandrew return (FALSE); 3081281494Sandrew return (pmap_page_test_mappings(m, FALSE, TRUE)); 3082281494Sandrew} 3083281494Sandrew 3084281494Sandrew/* 3085281494Sandrew * pmap_is_prefaultable: 3086281494Sandrew * 3087281494Sandrew * Return whether or not the specified virtual address is eligible 3088281494Sandrew * for prefault. 3089281494Sandrew */ 3090281494Sandrewboolean_t 3091281494Sandrewpmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3092281494Sandrew{ 3093297446Sandrew pt_entry_t *pte; 3094281494Sandrew boolean_t rv; 3095297446Sandrew int lvl; 3096281494Sandrew 3097281494Sandrew rv = FALSE; 3098281494Sandrew PMAP_LOCK(pmap); 3099297446Sandrew pte = pmap_pte(pmap, addr, &lvl); 3100297446Sandrew if (pte != NULL && pmap_load(pte) != 0) { 3101281494Sandrew rv = TRUE; 3102281494Sandrew } 3103281494Sandrew PMAP_UNLOCK(pmap); 3104281494Sandrew return (rv); 3105281494Sandrew} 3106281494Sandrew 3107281494Sandrew/* 3108281494Sandrew * pmap_is_referenced: 3109281494Sandrew * 3110281494Sandrew * Return whether or not the specified physical page was referenced 3111281494Sandrew * in any physical maps. 3112281494Sandrew */ 3113281494Sandrewboolean_t 3114281494Sandrewpmap_is_referenced(vm_page_t m) 3115281494Sandrew{ 3116281494Sandrew 3117281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3118281494Sandrew ("pmap_is_referenced: page %p is not managed", m)); 3119281494Sandrew return (pmap_page_test_mappings(m, TRUE, FALSE)); 3120281494Sandrew} 3121281494Sandrew 3122281494Sandrew/* 3123281494Sandrew * Clear the write and modified bits in each of the given page's mappings. 3124281494Sandrew */ 3125281494Sandrewvoid 3126281494Sandrewpmap_remove_write(vm_page_t m) 3127281494Sandrew{ 3128281494Sandrew pmap_t pmap; 3129281494Sandrew struct rwlock *lock; 3130281494Sandrew pv_entry_t pv; 3131297446Sandrew pt_entry_t oldpte, *pte; 3132297446Sandrew int lvl, md_gen; 3133281494Sandrew 3134281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3135281494Sandrew ("pmap_remove_write: page %p is not managed", m)); 3136281494Sandrew 3137281494Sandrew /* 3138281494Sandrew * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3139281494Sandrew * set by another thread while the object is locked. Thus, 3140281494Sandrew * if PGA_WRITEABLE is clear, no page table entries need updating. 3141281494Sandrew */ 3142281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 3143281494Sandrew if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3144281494Sandrew return; 3145281494Sandrew lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3146281494Sandrewretry_pv_loop: 3147281494Sandrew rw_wlock(lock); 3148281494Sandrew TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3149281494Sandrew pmap = PV_PMAP(pv); 3150281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3151281494Sandrew md_gen = m->md.pv_gen; 3152281494Sandrew rw_wunlock(lock); 3153281494Sandrew PMAP_LOCK(pmap); 3154281494Sandrew rw_wlock(lock); 3155281494Sandrew if (md_gen != m->md.pv_gen) { 3156281494Sandrew PMAP_UNLOCK(pmap); 3157281494Sandrew rw_wunlock(lock); 3158281494Sandrew goto retry_pv_loop; 3159281494Sandrew } 3160281494Sandrew } 3161297446Sandrew pte = pmap_pte(pmap, pv->pv_va, &lvl); 3162281494Sandrewretry: 3163297446Sandrew oldpte = pmap_load(pte); 3164297446Sandrew if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 3165297446Sandrew if (!atomic_cmpset_long(pte, oldpte, 3166297446Sandrew oldpte | ATTR_AP(ATTR_AP_RO))) 3167281494Sandrew goto retry; 3168297446Sandrew if ((oldpte & ATTR_AF) != 0) 3169281494Sandrew vm_page_dirty(m); 3170281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 3171281494Sandrew } 3172281494Sandrew PMAP_UNLOCK(pmap); 3173281494Sandrew } 3174281494Sandrew rw_wunlock(lock); 3175281494Sandrew vm_page_aflag_clear(m, PGA_WRITEABLE); 3176281494Sandrew} 3177281494Sandrew 3178281494Sandrewstatic __inline boolean_t 3179281494Sandrewsafe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 3180281494Sandrew{ 3181281494Sandrew 3182281494Sandrew return (FALSE); 3183281494Sandrew} 3184281494Sandrew 3185281494Sandrew#define PMAP_TS_REFERENCED_MAX 5 3186281494Sandrew 3187281494Sandrew/* 3188281494Sandrew * pmap_ts_referenced: 3189281494Sandrew * 3190281494Sandrew * Return a count of reference bits for a page, clearing those bits. 3191281494Sandrew * It is not necessary for every reference bit to be cleared, but it 3192281494Sandrew * is necessary that 0 only be returned when there are truly no 3193281494Sandrew * reference bits set. 3194281494Sandrew * 3195281494Sandrew * XXX: The exact number of bits to check and clear is a matter that 3196281494Sandrew * should be tested and standardized at some point in the future for 3197281494Sandrew * optimal aging of shared pages. 3198281494Sandrew */ 3199281494Sandrewint 3200281494Sandrewpmap_ts_referenced(vm_page_t m) 3201281494Sandrew{ 3202281494Sandrew pv_entry_t pv, pvf; 3203281494Sandrew pmap_t pmap; 3204281494Sandrew struct rwlock *lock; 3205297446Sandrew pd_entry_t *pde, tpde; 3206297446Sandrew pt_entry_t *pte, tpte; 3207281494Sandrew vm_paddr_t pa; 3208297446Sandrew int cleared, md_gen, not_cleared, lvl; 3209281494Sandrew struct spglist free; 3210281494Sandrew 3211281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3212281494Sandrew ("pmap_ts_referenced: page %p is not managed", m)); 3213281494Sandrew SLIST_INIT(&free); 3214281494Sandrew cleared = 0; 3215281494Sandrew pa = VM_PAGE_TO_PHYS(m); 3216281494Sandrew lock = PHYS_TO_PV_LIST_LOCK(pa); 3217281494Sandrew rw_wlock(lock); 3218281494Sandrewretry: 3219281494Sandrew not_cleared = 0; 3220281494Sandrew if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 3221281494Sandrew goto out; 3222281494Sandrew pv = pvf; 3223281494Sandrew do { 3224281494Sandrew if (pvf == NULL) 3225281494Sandrew pvf = pv; 3226281494Sandrew pmap = PV_PMAP(pv); 3227281494Sandrew if (!PMAP_TRYLOCK(pmap)) { 3228281494Sandrew md_gen = m->md.pv_gen; 3229281494Sandrew rw_wunlock(lock); 3230281494Sandrew PMAP_LOCK(pmap); 3231281494Sandrew rw_wlock(lock); 3232281494Sandrew if (md_gen != m->md.pv_gen) { 3233281494Sandrew PMAP_UNLOCK(pmap); 3234281494Sandrew goto retry; 3235281494Sandrew } 3236281494Sandrew } 3237297446Sandrew pde = pmap_pde(pmap, pv->pv_va, &lvl); 3238297446Sandrew KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); 3239297446Sandrew KASSERT(lvl == 2, 3240297446Sandrew ("pmap_ts_referenced: invalid pde level %d", lvl)); 3241297446Sandrew tpde = pmap_load(pde); 3242297446Sandrew KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, 3243281494Sandrew ("pmap_ts_referenced: found an invalid l2 table")); 3244297446Sandrew pte = pmap_l2_to_l3(pde, pv->pv_va); 3245297446Sandrew tpte = pmap_load(pte); 3246297446Sandrew if ((tpte & ATTR_AF) != 0) { 3247297446Sandrew if (safe_to_clear_referenced(pmap, tpte)) { 3248281494Sandrew /* 3249281494Sandrew * TODO: We don't handle the access flag 3250281494Sandrew * at all. We need to be able to set it in 3251281494Sandrew * the exception handler. 3252281494Sandrew */ 3253286073Semaste panic("ARM64TODO: safe_to_clear_referenced\n"); 3254297446Sandrew } else if ((tpte & ATTR_SW_WIRED) == 0) { 3255281494Sandrew /* 3256281494Sandrew * Wired pages cannot be paged out so 3257281494Sandrew * doing accessed bit emulation for 3258281494Sandrew * them is wasted effort. We do the 3259281494Sandrew * hard work for unwired pages only. 3260281494Sandrew */ 3261297446Sandrew pmap_remove_l3(pmap, pte, pv->pv_va, tpde, 3262288445Sandrew &free, &lock); 3263281494Sandrew pmap_invalidate_page(pmap, pv->pv_va); 3264281494Sandrew cleared++; 3265281494Sandrew if (pvf == pv) 3266281494Sandrew pvf = NULL; 3267281494Sandrew pv = NULL; 3268281494Sandrew KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 3269281494Sandrew ("inconsistent pv lock %p %p for page %p", 3270281494Sandrew lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 3271281494Sandrew } else 3272281494Sandrew not_cleared++; 3273281494Sandrew } 3274281494Sandrew PMAP_UNLOCK(pmap); 3275281494Sandrew /* Rotate the PV list if it has more than one entry. */ 3276281494Sandrew if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 3277281494Sandrew TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 3278281494Sandrew TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3279281494Sandrew m->md.pv_gen++; 3280281494Sandrew } 3281281494Sandrew } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 3282281494Sandrew not_cleared < PMAP_TS_REFERENCED_MAX); 3283281494Sandrewout: 3284281494Sandrew rw_wunlock(lock); 3285281494Sandrew pmap_free_zero_pages(&free); 3286281494Sandrew return (cleared + not_cleared); 3287281494Sandrew} 3288281494Sandrew 3289281494Sandrew/* 3290281494Sandrew * Apply the given advice to the specified range of addresses within the 3291281494Sandrew * given pmap. Depending on the advice, clear the referenced and/or 3292281494Sandrew * modified flags in each mapping and set the mapped page's dirty field. 3293281494Sandrew */ 3294281494Sandrewvoid 3295281494Sandrewpmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 3296281494Sandrew{ 3297281494Sandrew} 3298281494Sandrew 3299281494Sandrew/* 3300281494Sandrew * Clear the modify bits on the specified physical page. 3301281494Sandrew */ 3302281494Sandrewvoid 3303281494Sandrewpmap_clear_modify(vm_page_t m) 3304281494Sandrew{ 3305281494Sandrew 3306281494Sandrew KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3307281494Sandrew ("pmap_clear_modify: page %p is not managed", m)); 3308281494Sandrew VM_OBJECT_ASSERT_WLOCKED(m->object); 3309281494Sandrew KASSERT(!vm_page_xbusied(m), 3310281494Sandrew ("pmap_clear_modify: page %p is exclusive busied", m)); 3311281494Sandrew 3312281494Sandrew /* 3313281494Sandrew * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 3314281494Sandrew * If the object containing the page is locked and the page is not 3315281494Sandrew * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 3316281494Sandrew */ 3317281494Sandrew if ((m->aflags & PGA_WRITEABLE) == 0) 3318281494Sandrew return; 3319281846Sandrew 3320286073Semaste /* ARM64TODO: We lack support for tracking if a page is modified */ 3321281494Sandrew} 3322281494Sandrew 3323282221Sandrewvoid * 3324282221Sandrewpmap_mapbios(vm_paddr_t pa, vm_size_t size) 3325282221Sandrew{ 3326282221Sandrew 3327282221Sandrew return ((void *)PHYS_TO_DMAP(pa)); 3328282221Sandrew} 3329282221Sandrew 3330282221Sandrewvoid 3331282221Sandrewpmap_unmapbios(vm_paddr_t pa, vm_size_t size) 3332282221Sandrew{ 3333282221Sandrew} 3334282221Sandrew 3335281494Sandrew/* 3336281494Sandrew * Sets the memory attribute for the specified page. 3337281494Sandrew */ 3338281494Sandrewvoid 3339281494Sandrewpmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3340281494Sandrew{ 3341281494Sandrew 3342286080Sandrew m->md.pv_memattr = ma; 3343286080Sandrew 3344286080Sandrew /* 3345286080Sandrew * ARM64TODO: Implement the below (from the amd64 pmap) 3346286080Sandrew * If "m" is a normal page, update its direct mapping. This update 3347286080Sandrew * can be relied upon to perform any cache operations that are 3348286080Sandrew * required for data coherence. 3349286080Sandrew */ 3350286080Sandrew if ((m->flags & PG_FICTITIOUS) == 0 && 3351286080Sandrew PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) 3352286080Sandrew panic("ARM64TODO: pmap_page_set_memattr"); 3353281494Sandrew} 3354281494Sandrew 3355281494Sandrew/* 3356281494Sandrew * perform the pmap work for mincore 3357281494Sandrew */ 3358281494Sandrewint 3359281494Sandrewpmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 3360281494Sandrew{ 3361287570Sandrew pd_entry_t *l1p, l1; 3362287570Sandrew pd_entry_t *l2p, l2; 3363287570Sandrew pt_entry_t *l3p, l3; 3364287570Sandrew vm_paddr_t pa; 3365287570Sandrew bool managed; 3366287570Sandrew int val; 3367281494Sandrew 3368287570Sandrew PMAP_LOCK(pmap); 3369287570Sandrewretry: 3370287570Sandrew pa = 0; 3371287570Sandrew val = 0; 3372287570Sandrew managed = false; 3373287570Sandrew 3374287570Sandrew l1p = pmap_l1(pmap, addr); 3375287570Sandrew if (l1p == NULL) /* No l1 */ 3376287570Sandrew goto done; 3377295425Swma 3378287570Sandrew l1 = pmap_load(l1p); 3379295425Swma if ((l1 & ATTR_DESCR_MASK) == L1_INVAL) 3380295425Swma goto done; 3381295425Swma 3382287570Sandrew if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) { 3383287570Sandrew pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET); 3384287570Sandrew managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3385287570Sandrew val = MINCORE_SUPER | MINCORE_INCORE; 3386287570Sandrew if (pmap_page_dirty(l1)) 3387287570Sandrew val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3388287570Sandrew if ((l1 & ATTR_AF) == ATTR_AF) 3389287570Sandrew val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3390287570Sandrew goto done; 3391287570Sandrew } 3392287570Sandrew 3393287570Sandrew l2p = pmap_l1_to_l2(l1p, addr); 3394287570Sandrew if (l2p == NULL) /* No l2 */ 3395287570Sandrew goto done; 3396295425Swma 3397287570Sandrew l2 = pmap_load(l2p); 3398295425Swma if ((l2 & ATTR_DESCR_MASK) == L2_INVAL) 3399295425Swma goto done; 3400295425Swma 3401287570Sandrew if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) { 3402287570Sandrew pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET); 3403287570Sandrew managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3404287570Sandrew val = MINCORE_SUPER | MINCORE_INCORE; 3405287570Sandrew if (pmap_page_dirty(l2)) 3406287570Sandrew val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3407287570Sandrew if ((l2 & ATTR_AF) == ATTR_AF) 3408287570Sandrew val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3409287570Sandrew goto done; 3410287570Sandrew } 3411287570Sandrew 3412287570Sandrew l3p = pmap_l2_to_l3(l2p, addr); 3413287570Sandrew if (l3p == NULL) /* No l3 */ 3414287570Sandrew goto done; 3415295425Swma 3416287570Sandrew l3 = pmap_load(l2p); 3417295425Swma if ((l3 & ATTR_DESCR_MASK) == L3_INVAL) 3418295425Swma goto done; 3419295425Swma 3420287570Sandrew if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) { 3421287570Sandrew pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET); 3422287570Sandrew managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3423287570Sandrew val = MINCORE_INCORE; 3424287570Sandrew if (pmap_page_dirty(l3)) 3425287570Sandrew val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3426287570Sandrew if ((l3 & ATTR_AF) == ATTR_AF) 3427287570Sandrew val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3428287570Sandrew } 3429287570Sandrew 3430287570Sandrewdone: 3431287570Sandrew if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 3432287570Sandrew (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 3433287570Sandrew /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 3434287570Sandrew if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 3435287570Sandrew goto retry; 3436287570Sandrew } else 3437287570Sandrew PA_UNLOCK_COND(*locked_pa); 3438287570Sandrew PMAP_UNLOCK(pmap); 3439287570Sandrew 3440287570Sandrew return (val); 3441281494Sandrew} 3442281494Sandrew 3443281494Sandrewvoid 3444281494Sandrewpmap_activate(struct thread *td) 3445281494Sandrew{ 3446281494Sandrew pmap_t pmap; 3447281494Sandrew 3448281494Sandrew critical_enter(); 3449281494Sandrew pmap = vmspace_pmap(td->td_proc->p_vmspace); 3450297446Sandrew td->td_pcb->pcb_l0addr = vtophys(pmap->pm_l0); 3451297446Sandrew __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l0addr)); 3452285212Sandrew pmap_invalidate_all(pmap); 3453281494Sandrew critical_exit(); 3454281494Sandrew} 3455281494Sandrew 3456281494Sandrewvoid 3457287105Sandrewpmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 3458281494Sandrew{ 3459281494Sandrew 3460287105Sandrew if (va >= VM_MIN_KERNEL_ADDRESS) { 3461287105Sandrew cpu_icache_sync_range(va, sz); 3462287105Sandrew } else { 3463287105Sandrew u_int len, offset; 3464287105Sandrew vm_paddr_t pa; 3465287105Sandrew 3466287105Sandrew /* Find the length of data in this page to flush */ 3467287105Sandrew offset = va & PAGE_MASK; 3468287105Sandrew len = imin(PAGE_SIZE - offset, sz); 3469287105Sandrew 3470287105Sandrew while (sz != 0) { 3471287105Sandrew /* Extract the physical address & find it in the DMAP */ 3472287105Sandrew pa = pmap_extract(pmap, va); 3473287105Sandrew if (pa != 0) 3474287105Sandrew cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); 3475287105Sandrew 3476287105Sandrew /* Move to the next page */ 3477287105Sandrew sz -= len; 3478287105Sandrew va += len; 3479287105Sandrew /* Set the length for the next iteration */ 3480287105Sandrew len = imin(PAGE_SIZE, sz); 3481287105Sandrew } 3482287105Sandrew } 3483281494Sandrew} 3484281494Sandrew 3485281494Sandrew/* 3486281494Sandrew * Increase the starting virtual address of the given mapping if a 3487281494Sandrew * different alignment might result in more superpage mappings. 3488281494Sandrew */ 3489281494Sandrewvoid 3490281494Sandrewpmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 3491281494Sandrew vm_offset_t *addr, vm_size_t size) 3492281494Sandrew{ 3493281494Sandrew} 3494281494Sandrew 3495281494Sandrew/** 3496281494Sandrew * Get the kernel virtual address of a set of physical pages. If there are 3497281494Sandrew * physical addresses not covered by the DMAP perform a transient mapping 3498281494Sandrew * that will be removed when calling pmap_unmap_io_transient. 3499281494Sandrew * 3500281494Sandrew * \param page The pages the caller wishes to obtain the virtual 3501281494Sandrew * address on the kernel memory map. 3502281494Sandrew * \param vaddr On return contains the kernel virtual memory address 3503281494Sandrew * of the pages passed in the page parameter. 3504281494Sandrew * \param count Number of pages passed in. 3505281494Sandrew * \param can_fault TRUE if the thread using the mapped pages can take 3506281494Sandrew * page faults, FALSE otherwise. 3507281494Sandrew * 3508281494Sandrew * \returns TRUE if the caller must call pmap_unmap_io_transient when 3509281494Sandrew * finished or FALSE otherwise. 3510281494Sandrew * 3511281494Sandrew */ 3512281494Sandrewboolean_t 3513281494Sandrewpmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3514281494Sandrew boolean_t can_fault) 3515281494Sandrew{ 3516281494Sandrew vm_paddr_t paddr; 3517281494Sandrew boolean_t needs_mapping; 3518281494Sandrew int error, i; 3519281494Sandrew 3520281494Sandrew /* 3521281494Sandrew * Allocate any KVA space that we need, this is done in a separate 3522281494Sandrew * loop to prevent calling vmem_alloc while pinned. 3523281494Sandrew */ 3524281494Sandrew needs_mapping = FALSE; 3525281494Sandrew for (i = 0; i < count; i++) { 3526281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 3527297617Sandrew if (__predict_false(!PHYS_IN_DMAP(paddr))) { 3528281494Sandrew error = vmem_alloc(kernel_arena, PAGE_SIZE, 3529281494Sandrew M_BESTFIT | M_WAITOK, &vaddr[i]); 3530281494Sandrew KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 3531281494Sandrew needs_mapping = TRUE; 3532281494Sandrew } else { 3533281494Sandrew vaddr[i] = PHYS_TO_DMAP(paddr); 3534281494Sandrew } 3535281494Sandrew } 3536281494Sandrew 3537281494Sandrew /* Exit early if everything is covered by the DMAP */ 3538281494Sandrew if (!needs_mapping) 3539281494Sandrew return (FALSE); 3540281494Sandrew 3541281494Sandrew if (!can_fault) 3542281494Sandrew sched_pin(); 3543281494Sandrew for (i = 0; i < count; i++) { 3544281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 3545297617Sandrew if (!PHYS_IN_DMAP(paddr)) { 3546281494Sandrew panic( 3547281494Sandrew "pmap_map_io_transient: TODO: Map out of DMAP data"); 3548281494Sandrew } 3549281494Sandrew } 3550281494Sandrew 3551281494Sandrew return (needs_mapping); 3552281494Sandrew} 3553281494Sandrew 3554281494Sandrewvoid 3555281494Sandrewpmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3556281494Sandrew boolean_t can_fault) 3557281494Sandrew{ 3558281494Sandrew vm_paddr_t paddr; 3559281494Sandrew int i; 3560281494Sandrew 3561281494Sandrew if (!can_fault) 3562281494Sandrew sched_unpin(); 3563281494Sandrew for (i = 0; i < count; i++) { 3564281494Sandrew paddr = VM_PAGE_TO_PHYS(page[i]); 3565297617Sandrew if (!PHYS_IN_DMAP(paddr)) { 3566286073Semaste panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); 3567281494Sandrew } 3568281494Sandrew } 3569281494Sandrew} 3570