pmap.c revision 204420
1130561Sobrien/*- 2218822Sdim * Copyright (c) 1991 Regents of the University of California. 3130561Sobrien * All rights reserved. 468765Sobrien * Copyright (c) 1994 John S. Dyson 5130561Sobrien * All rights reserved. 677298Sobrien * Copyright (c) 1994 David Greenman 7130561Sobrien * All rights reserved. 868765Sobrien * Copyright (c) 2005-2008 Alan L. Cox <alc@cs.rice.edu> 968765Sobrien * All rights reserved. 1068765Sobrien * 1168765Sobrien * This code is derived from software contributed to Berkeley by 12104834Sobrien * the Systems Programming Group of the University of Utah Computer 13104834Sobrien * Science Department and William Jolitz of UUNET Technologies Inc. 14104834Sobrien * 15104834Sobrien * Redistribution and use in source and binary forms, with or without 16104834Sobrien * modification, are permitted provided that the following conditions 17104834Sobrien * are met: 18104834Sobrien * 1. Redistributions of source code must retain the above copyright 19104834Sobrien * notice, this list of conditions and the following disclaimer. 20104834Sobrien * 2. Redistributions in binary form must reproduce the above copyright 2168765Sobrien * notice, this list of conditions and the following disclaimer in the 2268765Sobrien * documentation and/or other materials provided with the distribution. 2368765Sobrien * 3. All advertising materials mentioning features or use of this software 2468765Sobrien * must display the following acknowledgement: 2568765Sobrien * This product includes software developed by the University of 2668765Sobrien * California, Berkeley and its contributors. 2768765Sobrien * 4. Neither the name of the University nor the names of its contributors 28218822Sdim * may be used to endorse or promote products derived from this software 2968765Sobrien * without specific prior written permission. 3068765Sobrien * 31130561Sobrien * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32130561Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33130561Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3468765Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35130561Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36130561Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37130561Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38130561Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39130561Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40130561Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41130561Sobrien * SUCH DAMAGE. 42130561Sobrien * 43130561Sobrien * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44130561Sobrien */ 45218822Sdim/*- 46218822Sdim * Copyright (c) 2003 Networks Associates Technology, Inc. 47218822Sdim * All rights reserved. 48218822Sdim * 49130561Sobrien * This software was developed for the FreeBSD Project by Jake Burkholder, 50130561Sobrien * Safeport Network Services, and Network Associates Laboratories, the 51130561Sobrien * Security Research Division of Network Associates, Inc. under 52130561Sobrien * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 53130561Sobrien * CHATS research program. 54130561Sobrien * 55130561Sobrien * Redistribution and use in source and binary forms, with or without 56130561Sobrien * modification, are permitted provided that the following conditions 57218822Sdim * are met: 58130561Sobrien * 1. Redistributions of source code must retain the above copyright 59130561Sobrien * notice, this list of conditions and the following disclaimer. 60130561Sobrien * 2. Redistributions in binary form must reproduce the above copyright 61130561Sobrien * notice, this list of conditions and the following disclaimer in the 62130561Sobrien * documentation and/or other materials provided with the distribution. 63130561Sobrien * 64218822Sdim * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 65130561Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66130561Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67130561Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 68130561Sobrien * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69130561Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70130561Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71130561Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72130561Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73218822Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74130561Sobrien * SUCH DAMAGE. 75130561Sobrien */ 76218822Sdim 77218822Sdim#include <sys/cdefs.h> 78218822Sdim__FBSDID("$FreeBSD: head/sys/i386/i386/pmap.c 204420 2010-02-27 18:00:57Z alc $"); 79218822Sdim 80218822Sdim/* 81218822Sdim * Manages physical address maps. 82130561Sobrien * 83130561Sobrien * In addition to hardware address maps, this 84218822Sdim * module is called upon to provide software-use-only 85218822Sdim * maps which may or may not be stored in the same 86218822Sdim * form as hardware maps. These pseudo-maps are 87130561Sobrien * used to store intermediate results from copy 88130561Sobrien * operations to and from address spaces. 89130561Sobrien * 90130561Sobrien * Since the information managed by this module is 91130561Sobrien * also stored by the logical address mapping module, 92130561Sobrien * this module may throw away valid virtual-to-physical 93130561Sobrien * mappings at almost any time. However, invalidations 94130561Sobrien * of virtual-to-physical mappings must be done as 95130561Sobrien * requested. 96130561Sobrien * 97218822Sdim * In order to cope with hardware architectures which 98218822Sdim * make virtual-to-physical map invalidates expensive, 99218822Sdim * this module may delay invalidate or reduced protection 100218822Sdim * operations until such time as they are actually 10168765Sobrien * necessary. This module is given full information as 10268765Sobrien * to which processors are currently using which maps, 10368765Sobrien * and to when physical maps must be made correct. 10468765Sobrien */ 105130561Sobrien 10668765Sobrien#include "opt_cpu.h" 10768765Sobrien#include "opt_pmap.h" 10868765Sobrien#include "opt_msgbuf.h" 10968765Sobrien#include "opt_smp.h" 11068765Sobrien#include "opt_xbox.h" 11168765Sobrien 11268765Sobrien#include <sys/param.h> 11368765Sobrien#include <sys/systm.h> 114218822Sdim#include <sys/kernel.h> 115218822Sdim#include <sys/ktr.h> 116218822Sdim#include <sys/lock.h> 117218822Sdim#include <sys/malloc.h> 118218822Sdim#include <sys/mman.h> 119218822Sdim#include <sys/msgbuf.h> 120218822Sdim#include <sys/mutex.h> 121218822Sdim#include <sys/proc.h> 122218822Sdim#include <sys/sf_buf.h> 123218822Sdim#include <sys/sx.h> 124218822Sdim#include <sys/vmmeter.h> 125218822Sdim#include <sys/sched.h> 12668765Sobrien#include <sys/sysctl.h> 12768765Sobrien#ifdef SMP 12868765Sobrien#include <sys/smp.h> 129130561Sobrien#endif 13068765Sobrien 131130561Sobrien#include <vm/vm.h> 132130561Sobrien#include <vm/vm_param.h> 133130561Sobrien#include <vm/vm_kern.h> 134130561Sobrien#include <vm/vm_page.h> 135130561Sobrien#include <vm/vm_map.h> 136130561Sobrien#include <vm/vm_object.h> 137130561Sobrien#include <vm/vm_extern.h> 138130561Sobrien#include <vm/vm_pageout.h> 139130561Sobrien#include <vm/vm_pager.h> 140218822Sdim#include <vm/vm_reserv.h> 141130561Sobrien#include <vm/uma.h> 142130561Sobrien 143130561Sobrien#include <machine/cpu.h> 144218822Sdim#include <machine/cputypes.h> 145218822Sdim#include <machine/md_var.h> 146130561Sobrien#include <machine/pcb.h> 147130561Sobrien#include <machine/specialreg.h> 148130561Sobrien#ifdef SMP 149218822Sdim#include <machine/smp.h> 150218822Sdim#endif 151130561Sobrien 152130561Sobrien#ifdef XBOX 153130561Sobrien#include <machine/xbox.h> 154218822Sdim#endif 155218822Sdim 156130561Sobrien#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 157130561Sobrien#define CPU_ENABLE_SSE 158218822Sdim#endif 159130561Sobrien 160130561Sobrien#ifndef PMAP_SHPGPERPROC 161218822Sdim#define PMAP_SHPGPERPROC 200 162130561Sobrien#endif 163130561Sobrien 164218822Sdim#if !defined(DIAGNOSTIC) 165130561Sobrien#ifdef __GNUC_GNU_INLINE__ 166218822Sdim#define PMAP_INLINE inline 167218822Sdim#else 168218822Sdim#define PMAP_INLINE extern inline 169218822Sdim#endif 170130561Sobrien#else 171218822Sdim#define PMAP_INLINE 172130561Sobrien#endif 173130561Sobrien 174130561Sobrien#define PV_STATS 175130561Sobrien#ifdef PV_STATS 176130561Sobrien#define PV_STAT(x) do { x ; } while (0) 177130561Sobrien#else 178130561Sobrien#define PV_STAT(x) do { } while (0) 179130561Sobrien#endif 180130561Sobrien 18168765Sobrien#define pa_index(pa) ((pa) >> PDRSHIFT) 182130561Sobrien#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 183130561Sobrien 184130561Sobrien/* 185130561Sobrien * Get PDEs and PTEs for user/kernel address space 186130561Sobrien */ 187130561Sobrien#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 188130561Sobrien#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 189130561Sobrien 19068765Sobrien#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 191130561Sobrien#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 192130561Sobrien#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 193130561Sobrien#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 194130561Sobrien#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 19568765Sobrien 196130561Sobrien#define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ 197218822Sdim atomic_clear_int((u_int *)(pte), PG_W)) 198218822Sdim#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 199130561Sobrien 200130561Sobrienstruct pmap kernel_pmap_store; 201130561SobrienLIST_HEAD(pmaplist, pmap); 202130561Sobrienstatic struct pmaplist allpmaps; 203130561Sobrienstatic struct mtx allpmaps_lock; 20477298Sobrien 20577298Sobrienvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 20677298Sobrienvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 207130561Sobrienint pgeflag = 0; /* PG_G or-in */ 208130561Sobrienint pseflag = 0; /* PG_PS or-in */ 20977298Sobrien 210130561Sobrienstatic int nkpt; 21189857Sobrienvm_offset_t kernel_vm_end; 212130561Sobrienextern u_int32_t KERNend; 213130561Sobrienextern u_int32_t KPTphys; 214130561Sobrien 215130561Sobrien#ifdef PAE 216130561Sobrienpt_entry_t pg_nx; 217130561Sobrienstatic uma_zone_t pdptzone; 218130561Sobrien#endif 219130561Sobrien 220130561Sobrienstatic int pat_works = 0; /* Is page attribute table sane? */ 221130561Sobrien 222130561SobrienSYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 223130561Sobrien 224130561Sobrienstatic int pg_ps_enabled = 1; 225130561SobrienSYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0, 226130561Sobrien "Are large page mappings enabled?"); 227130561Sobrien 228130561Sobrien/* 229130561Sobrien * Data for the pv entry allocation mechanism 230130561Sobrien */ 231130561Sobrienstatic int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 23268765Sobrienstatic struct md_page *pv_table; 233130561Sobrienstatic int shpgperproc = PMAP_SHPGPERPROC; 23468765Sobrien 235130561Sobrienstruct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 236130561Sobrienint pv_maxchunks; /* How many chunks we have KVA for */ 23768765Sobrienvm_offset_t pv_vafree; /* freelist stored in the PTE */ 238130561Sobrien 239130561Sobrien/* 240130561Sobrien * All those kernel PT submaps that BSD is so fond of 24168765Sobrien */ 242130561Sobrienstruct sysmaps { 243130561Sobrien struct mtx lock; 244130561Sobrien pt_entry_t *CMAP1; 245218822Sdim pt_entry_t *CMAP2; 246130561Sobrien caddr_t CADDR1; 24777298Sobrien caddr_t CADDR2; 248130561Sobrien}; 24977298Sobrienstatic struct sysmaps sysmaps_pcpu[MAXCPU]; 250130561Sobrienpt_entry_t *CMAP1 = 0, *KPTmap; 251130561Sobrienstatic pt_entry_t *CMAP3; 252130561Sobrienstatic pd_entry_t *KPTD; 253130561Sobriencaddr_t CADDR1 = 0, ptvmmap = 0; 254130561Sobrienstatic caddr_t CADDR3; 255130561Sobrienstruct msgbuf *msgbufp = 0; 256130561Sobrien 257130561Sobrien/* 258130561Sobrien * Crashdump maps. 259130561Sobrien */ 260130561Sobrienstatic caddr_t crashdumpmap; 26168765Sobrien 26268765Sobrienstatic pt_entry_t *PMAP1 = 0, *PMAP2; 263218822Sdimstatic pt_entry_t *PADDR1 = 0, *PADDR2; 26468765Sobrien#ifdef SMP 265218822Sdimstatic int PMAP1cpu; 26668765Sobrienstatic int PMAP1changedcpu; 267130561SobrienSYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 268130561Sobrien &PMAP1changedcpu, 0, 269130561Sobrien "Number of times pmap_pte_quick changed CPU with same PMAP1"); 270130561Sobrien#endif 271130561Sobrienstatic int PMAP1changed; 272130561SobrienSYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 273218822Sdim &PMAP1changed, 0, 274218822Sdim "Number of times pmap_pte_quick changed PMAP1"); 275218822Sdimstatic int PMAP1unchanged; 276218822SdimSYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 277218822Sdim &PMAP1unchanged, 0, 278218822Sdim "Number of times pmap_pte_quick didn't change PMAP1"); 279218822Sdimstatic struct mtx PMAP2mutex; 280218822Sdim 281218822Sdimstatic void free_pv_entry(pmap_t pmap, pv_entry_t pv); 282218822Sdimstatic pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); 283218822Sdimstatic void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); 284218822Sdimstatic boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); 285218822Sdimstatic void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); 286218822Sdimstatic void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 287218822Sdimstatic pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 288218822Sdim vm_offset_t va); 289218822Sdimstatic int pmap_pvh_wired_mappings(struct md_page *pvh, int count); 290218822Sdim 291218822Sdimstatic boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); 292218822Sdimstatic boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, 293218822Sdim vm_prot_t prot); 294130561Sobrienstatic vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 295130561Sobrien vm_page_t m, vm_prot_t prot, vm_page_t mpte); 296130561Sobrienstatic void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); 297130561Sobrienstatic void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); 298130561Sobrienstatic boolean_t pmap_is_modified_pvh(struct md_page *pvh); 299218822Sdimstatic void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); 300218822Sdimstatic vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); 30168765Sobrienstatic void pmap_pde_attr(pd_entry_t *pde, int cache_bits); 30268765Sobrienstatic void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); 303130561Sobrienstatic boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, 304218822Sdim vm_prot_t prot); 305130561Sobrienstatic void pmap_pte_attr(pt_entry_t *pte, int cache_bits); 30668765Sobrienstatic void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, 307130561Sobrien vm_page_t *free); 308218822Sdimstatic int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, 30968765Sobrien vm_page_t *free); 310130561Sobrienstatic void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte); 311218822Sdimstatic void pmap_remove_page(struct pmap *pmap, vm_offset_t va, 312218822Sdim vm_page_t *free); 313218822Sdimstatic void pmap_remove_entry(struct pmap *pmap, vm_page_t m, 31468765Sobrien vm_offset_t va); 315130561Sobrienstatic void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); 316218822Sdimstatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 31768765Sobrien vm_page_t m); 318130561Sobrien 319218822Sdimstatic vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 320218822Sdim 32168765Sobrienstatic vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 322130561Sobrienstatic int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free); 323218822Sdimstatic pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); 324218822Sdimstatic void pmap_pte_release(pt_entry_t *pte); 32568765Sobrienstatic int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); 326130561Sobrien#ifdef PAE 327218822Sdimstatic void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 328218822Sdim#endif 32968765Sobrienstatic void pmap_set_pg(void); 330130561Sobrien 331218822SdimCTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 332218822SdimCTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 33368765Sobrien 334130561Sobrien/* 335218822Sdim * If you get an error here, then you set KVA_PAGES wrong! See the 336218822Sdim * description of KVA_PAGES in sys/i386/include/pmap.h. It must be 33768765Sobrien * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. 338130561Sobrien */ 339218822SdimCTASSERT(KERNBASE % (1 << 24) == 0); 34068765Sobrien 341130561Sobrien/* 342218822Sdim * Bootstrap the system enough to run with virtual memory. 34368765Sobrien * 344130561Sobrien * On the i386 this is called after mapping has already been enabled 345218822Sdim * and just syncs the pmap module with what has already been done. 34689857Sobrien * [We can't call it easily with mapping off since the kernel is not 347130561Sobrien * mapped with PA == VA, hence we would have to relocate every address 348218822Sdim * from the linked base (virtual) address "KERNBASE" to the actual 34989857Sobrien * (physical) address starting relative to 0] 350218822Sdim */ 35189857Sobrienvoid 352218822Sdimpmap_bootstrap(vm_paddr_t firstaddr) 35368765Sobrien{ 354218822Sdim vm_offset_t va; 35568765Sobrien pt_entry_t *pte, *unused; 356218822Sdim struct sysmaps *sysmaps; 35768765Sobrien int i; 358218822Sdim 35968765Sobrien /* 360218822Sdim * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 36168765Sobrien * large. It should instead be correctly calculated in locore.s and 362218822Sdim * not based on 'first' (which is a physical address, not a virtual 36368765Sobrien * address, for the start of unused physical memory). The kernel 364218822Sdim * page tables are NOT double mapped and thus should not be included 36568765Sobrien * in this calculation. 366218822Sdim */ 36768765Sobrien virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 368218822Sdim 36968765Sobrien virtual_end = VM_MAX_KERNEL_ADDRESS; 370218822Sdim 37168765Sobrien /* 372218822Sdim * Initialize the kernel pmap (which is statically allocated). 37368765Sobrien */ 374130561Sobrien PMAP_LOCK_INIT(kernel_pmap); 375218822Sdim kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 37668765Sobrien#ifdef PAE 377130561Sobrien kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); 378218822Sdim#endif 37968765Sobrien kernel_pmap->pm_root = NULL; 380130561Sobrien kernel_pmap->pm_active = -1; /* don't allow deactivation */ 381218822Sdim TAILQ_INIT(&kernel_pmap->pm_pvchunk); 38268765Sobrien LIST_INIT(&allpmaps); 383130561Sobrien mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 384218822Sdim mtx_lock_spin(&allpmaps_lock); 38568765Sobrien LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 386218822Sdim mtx_unlock_spin(&allpmaps_lock); 38768765Sobrien nkpt = NKPT; 388130561Sobrien 389218822Sdim /* 39068765Sobrien * Reserve some special page table entries/VA space for temporary 391130561Sobrien * mapping of pages. 392218822Sdim */ 39377298Sobrien#define SYSMAP(c, p, v, n) \ 394218822Sdim v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 39568765Sobrien 396130561Sobrien va = virtual_avail; 397218822Sdim pte = vtopte(va); 39868765Sobrien 399218822Sdim /* 40068765Sobrien * CMAP1/CMAP2 are used for zeroing and copying pages. 401218822Sdim * CMAP3 is used for the idle process page zeroing. 40277298Sobrien */ 403218822Sdim for (i = 0; i < MAXCPU; i++) { 40477298Sobrien sysmaps = &sysmaps_pcpu[i]; 405218822Sdim mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); 40677298Sobrien SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) 407130561Sobrien SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) 408218822Sdim } 40968765Sobrien SYSMAP(caddr_t, CMAP1, CADDR1, 1) 410218822Sdim SYSMAP(caddr_t, CMAP3, CADDR3, 1) 41168765Sobrien 412218822Sdim /* 41368765Sobrien * Crashdump maps. 414218822Sdim */ 415218822Sdim SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) 41668765Sobrien 417218822Sdim /* 418218822Sdim * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 419218822Sdim */ 420130561Sobrien SYSMAP(caddr_t, unused, ptvmmap, 1) 421218822Sdim 42268765Sobrien /* 423130561Sobrien * msgbufp is used to map the system message buffer. 424218822Sdim */ 42568765Sobrien SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) 426218822Sdim 427218822Sdim /* 428218822Sdim * KPTmap is used by pmap_kextract(). 429218822Sdim */ 430218822Sdim SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES) 431218822Sdim 432218822Sdim for (i = 0; i < NKPT; i++) 433218822Sdim KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V; 434218822Sdim 435218822Sdim /* 436218822Sdim * Adjust the start of the KPTD and KPTmap so that the implementation 437218822Sdim * of pmap_kextract() and pmap_growkernel() can be made simpler. 438218822Sdim */ 439218822Sdim KPTD -= KPTDI; 440218822Sdim KPTmap -= i386_btop(KPTDI << PDRSHIFT); 441130561Sobrien 442218822Sdim /* 44368765Sobrien * ptemap is used for pmap_pte_quick 444130561Sobrien */ 445218822Sdim SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1) 44668765Sobrien SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1) 447130561Sobrien 448218822Sdim mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); 44968765Sobrien 450130561Sobrien virtual_avail = va; 451218822Sdim 45268765Sobrien /* 453130561Sobrien * Leave in place an identity mapping (virt == phys) for the low 1 MB 454218822Sdim * physical memory region that is used by the ACPI wakeup code. This 455218822Sdim * mapping must not have PG_G set. 456218822Sdim */ 45768765Sobrien#ifdef XBOX 458130561Sobrien /* FIXME: This is gross, but needed for the XBOX. Since we are in such 459218822Sdim * an early stadium, we cannot yet neatly map video memory ... :-( 460218822Sdim * Better fixes are very welcome! */ 461218822Sdim if (!arch_i386_is_xbox) 46268765Sobrien#endif 463130561Sobrien for (i = 1; i < NKPT; i++) 464218822Sdim PTD[i] = 0; 46568765Sobrien 46668765Sobrien /* Initialize the PAT MSR if present. */ 467218822Sdim pmap_init_pat(); 46868765Sobrien 469218822Sdim /* Turn on PG_G on kernel page(s) */ 470218822Sdim pmap_set_pg(); 471218822Sdim} 47268765Sobrien 473130561Sobrien/* 474130561Sobrien * Setup the PAT MSR. 475130561Sobrien */ 476218822Sdimvoid 47768765Sobrienpmap_init_pat(void) 478130561Sobrien{ 47968765Sobrien uint64_t pat_msr; 480130561Sobrien char *sysenv; 481218822Sdim static int pat_tested = 0; 482218822Sdim 483218822Sdim /* Bail if this CPU doesn't implement PAT. */ 484218822Sdim if (!(cpu_feature & CPUID_PAT)) 485218822Sdim return; 48668765Sobrien 487130561Sobrien /* 488130561Sobrien * Due to some Intel errata, we can only safely use the lower 4 48968765Sobrien * PAT entries. 490130561Sobrien * 491130561Sobrien * Intel Pentium III Processor Specification Update 492130561Sobrien * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B 493130561Sobrien * or Mode C Paging) 494130561Sobrien * 495130561Sobrien * Intel Pentium IV Processor Specification Update 496130561Sobrien * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) 497130561Sobrien * 498130561Sobrien * Some Apple Macs based on nVidia chipsets cannot enter ACPI mode 499130561Sobrien * via SMI# when we use upper 4 PAT entries for unknown reason. 500130561Sobrien */ 501130561Sobrien if (!pat_tested) { 502130561Sobrien if (cpu_vendor_id != CPU_VENDOR_INTEL || 503130561Sobrien (CPUID_TO_FAMILY(cpu_id) == 6 && 504130561Sobrien CPUID_TO_MODEL(cpu_id) >= 0xe)) { 505130561Sobrien pat_works = 1; 506130561Sobrien sysenv = getenv("smbios.system.product"); 507130561Sobrien if (sysenv != NULL) { 508130561Sobrien if (strncmp(sysenv, "MacBook5,1", 10) == 0 || 509130561Sobrien strncmp(sysenv, "MacBookPro5,5", 13) == 0 || 510130561Sobrien strncmp(sysenv, "Macmini3,1", 10) == 0) 511130561Sobrien pat_works = 0; 512130561Sobrien freeenv(sysenv); 513130561Sobrien } 514130561Sobrien } 515130561Sobrien pat_tested = 1; 516130561Sobrien } 517130561Sobrien 518130561Sobrien /* Initialize default PAT entries. */ 519130561Sobrien pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | 52068765Sobrien PAT_VALUE(1, PAT_WRITE_THROUGH) | 521130561Sobrien PAT_VALUE(2, PAT_UNCACHED) | 522130561Sobrien PAT_VALUE(3, PAT_UNCACHEABLE) | 523130561Sobrien PAT_VALUE(4, PAT_WRITE_BACK) | 524130561Sobrien PAT_VALUE(5, PAT_WRITE_THROUGH) | 525130561Sobrien PAT_VALUE(6, PAT_UNCACHED) | 526130561Sobrien PAT_VALUE(7, PAT_UNCACHEABLE); 527130561Sobrien 528130561Sobrien if (pat_works) { 529130561Sobrien /* 530130561Sobrien * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. 531130561Sobrien * Program 4 and 5 as WP and WC. 532130561Sobrien * Leave 6 and 7 as UC- and UC. 533130561Sobrien */ 534130561Sobrien pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); 535130561Sobrien pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | 536130561Sobrien PAT_VALUE(5, PAT_WRITE_COMBINING); 537130561Sobrien } else { 538130561Sobrien /* 539130561Sobrien * Just replace PAT Index 2 with WC instead of UC-. 540130561Sobrien */ 541130561Sobrien pat_msr &= ~PAT_MASK(2); 542130561Sobrien pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); 543130561Sobrien } 544130561Sobrien wrmsr(MSR_PAT, pat_msr); 545130561Sobrien} 546130561Sobrien 547130561Sobrien/* 548130561Sobrien * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. 549130561Sobrien */ 550130561Sobrienstatic void 551130561Sobrienpmap_set_pg(void) 552130561Sobrien{ 553130561Sobrien pt_entry_t *pte; 554130561Sobrien vm_offset_t va, endva; 555130561Sobrien 556130561Sobrien if (pgeflag == 0) 557130561Sobrien return; 558130561Sobrien 559130561Sobrien endva = KERNBASE + KERNend; 560130561Sobrien 561130561Sobrien if (pseflag) { 562130561Sobrien va = KERNBASE + KERNLOAD; 563130561Sobrien while (va < endva) { 564130561Sobrien pdir_pde(PTD, va) |= pgeflag; 565130561Sobrien invltlb(); /* Play it safe, invltlb() every time */ 566130561Sobrien va += NBPDR; 567130561Sobrien } 568130561Sobrien } else { 569218822Sdim va = (vm_offset_t)btext; 570218822Sdim while (va < endva) { 571218822Sdim pte = vtopte(va); 572130561Sobrien if (*pte) 573130561Sobrien *pte |= pgeflag; 574130561Sobrien invltlb(); /* Play it safe, invltlb() every time */ 575130561Sobrien va += PAGE_SIZE; 576130561Sobrien } 577130561Sobrien } 578130561Sobrien} 579130561Sobrien 580130561Sobrien/* 581130561Sobrien * Initialize a vm_page's machine-dependent fields. 582130561Sobrien */ 583130561Sobrienvoid 584130561Sobrienpmap_page_init(vm_page_t m) 585130561Sobrien{ 586130561Sobrien 587130561Sobrien TAILQ_INIT(&m->md.pv_list); 588130561Sobrien m->md.pat_mode = PAT_WRITE_BACK; 589130561Sobrien} 590130561Sobrien 591130561Sobrien#ifdef PAE 592130561Sobrienstatic void * 593130561Sobrienpmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 594130561Sobrien{ 595130561Sobrien 596130561Sobrien /* Inform UMA that this allocator uses kernel_map/object. */ 597130561Sobrien *flags = UMA_SLAB_KERNEL; 598130561Sobrien return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL, 599130561Sobrien 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); 600130561Sobrien} 601130561Sobrien#endif 602130561Sobrien 603130561Sobrien/* 604130561Sobrien * ABuse the pte nodes for unmapped kva to thread a kva freelist through. 605130561Sobrien * Requirements: 606130561Sobrien * - Must deal with pages in order to ensure that none of the PG_* bits 607130561Sobrien * are ever set, PG_V in particular. 608130561Sobrien * - Assumes we can write to ptes without pte_store() atomic ops, even 609130561Sobrien * on PAE systems. This should be ok. 610130561Sobrien * - Assumes nothing will ever test these addresses for 0 to indicate 611130561Sobrien * no mapping instead of correctly checking PG_V. 612130561Sobrien * - Assumes a vm_offset_t will fit in a pte (true for i386). 613130561Sobrien * Because PG_V is never set, there can be no mappings to invalidate. 614130561Sobrien */ 615130561Sobrienstatic vm_offset_t 616130561Sobrienpmap_ptelist_alloc(vm_offset_t *head) 617130561Sobrien{ 618130561Sobrien pt_entry_t *pte; 619130561Sobrien vm_offset_t va; 620130561Sobrien 621130561Sobrien va = *head; 622130561Sobrien if (va == 0) 623130561Sobrien return (va); /* Out of memory */ 624130561Sobrien pte = vtopte(va); 625130561Sobrien *head = *pte; 626130561Sobrien if (*head & PG_V) 627130561Sobrien panic("pmap_ptelist_alloc: va with PG_V set!"); 628130561Sobrien *pte = 0; 629130561Sobrien return (va); 630130561Sobrien} 631130561Sobrien 632130561Sobrienstatic void 633130561Sobrienpmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 634130561Sobrien{ 635130561Sobrien pt_entry_t *pte; 636130561Sobrien 637130561Sobrien if (va & PG_V) 638130561Sobrien panic("pmap_ptelist_free: freeing va with PG_V set!"); 639130561Sobrien pte = vtopte(va); 640130561Sobrien *pte = *head; /* virtual! PG_V is 0 though */ 641130561Sobrien *head = va; 642130561Sobrien} 643130561Sobrien 644130561Sobrienstatic void 645130561Sobrienpmap_ptelist_init(vm_offset_t *head, void *base, int npages) 646130561Sobrien{ 647130561Sobrien int i; 648130561Sobrien vm_offset_t va; 649130561Sobrien 650130561Sobrien *head = 0; 65168765Sobrien for (i = npages - 1; i >= 0; i--) { 652130561Sobrien va = (vm_offset_t)base + i * PAGE_SIZE; 653130561Sobrien pmap_ptelist_free(head, va); 65468765Sobrien } 65568765Sobrien} 656130561Sobrien 65768765Sobrien 658130561Sobrien/* 659130561Sobrien * Initialize the pmap module. 660130561Sobrien * Called by vm_init, to initialize any structures that the pmap 661130561Sobrien * system needs to map virtual memory. 662218822Sdim */ 66368765Sobrienvoid 664130561Sobrienpmap_init(void) 665130561Sobrien{ 666130561Sobrien vm_page_t mpte; 667130561Sobrien vm_size_t s; 668130561Sobrien int i, pv_npg; 669130561Sobrien 67068765Sobrien /* 67168765Sobrien * Initialize the vm page array entries for the kernel pmap's 672130561Sobrien * page table pages. 67377298Sobrien */ 674130561Sobrien for (i = 0; i < NKPT; i++) { 675130561Sobrien mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); 676218822Sdim KASSERT(mpte >= vm_page_array && 677218822Sdim mpte < &vm_page_array[vm_page_array_size], 67877298Sobrien ("pmap_init: page table page is out of range")); 679130561Sobrien mpte->pindex = i + KPTDI; 680130561Sobrien mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); 681130561Sobrien } 682130561Sobrien 683130561Sobrien /* 684130561Sobrien * Initialize the address space (zone) for the pv entries. Set a 68577298Sobrien * high water mark so that the system can recover from excessive 68677298Sobrien * numbers of pv entries. 687130561Sobrien */ 68877298Sobrien TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 689130561Sobrien pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 690130561Sobrien TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 691218822Sdim pv_entry_max = roundup(pv_entry_max, _NPCPV); 692218822Sdim pv_entry_high_water = 9 * (pv_entry_max / 10); 693218822Sdim 69477298Sobrien /* 695130561Sobrien * Disable large page mappings by default if the kernel is running in 696130561Sobrien * a virtual machine on an AMD Family 10h processor. This is a work- 697130561Sobrien * around for Erratum 383. 698130561Sobrien */ 699130561Sobrien if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD && 700130561Sobrien CPUID_TO_FAMILY(cpu_id) == 0x10) 701130561Sobrien pg_ps_enabled = 0; 702130561Sobrien 703130561Sobrien /* 70477298Sobrien * Are large page mappings enabled? 70577298Sobrien */ 706130561Sobrien TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); 70777298Sobrien if (pg_ps_enabled) { 708130561Sobrien KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, 709130561Sobrien ("pmap_init: can't assign to pagesizes[1]")); 710218822Sdim pagesizes[1] = NBPDR; 711218822Sdim } 712218822Sdim 71377298Sobrien /* 714130561Sobrien * Calculate the size of the pv head table for superpages. 715130561Sobrien */ 716130561Sobrien for (i = 0; phys_avail[i + 1]; i += 2); 717130561Sobrien pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR; 718130561Sobrien 719130561Sobrien /* 720130561Sobrien * Allocate memory for the pv head table for superpages. 721130561Sobrien */ 722130561Sobrien s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 72377298Sobrien s = round_page(s); 72477298Sobrien pv_table = (struct md_page *)kmem_alloc(kernel_map, s); 725130561Sobrien for (i = 0; i < pv_npg; i++) 72677298Sobrien TAILQ_INIT(&pv_table[i].pv_list); 727130561Sobrien 728218822Sdim pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 72977298Sobrien pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, 730130561Sobrien PAGE_SIZE * pv_maxchunks); 731130561Sobrien if (pv_chunkbase == NULL) 732130561Sobrien panic("pmap_init: not enough kvm for pv chunks"); 733130561Sobrien pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 734130561Sobrien#ifdef PAE 735130561Sobrien pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, 736130561Sobrien NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 73777298Sobrien UMA_ZONE_VM | UMA_ZONE_NOFREE); 73877298Sobrien uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); 739130561Sobrien#endif 74068765Sobrien} 741130561Sobrien 742218822Sdim 743218822SdimSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 744218822Sdim "Max number of PV entries"); 74568765SobrienSYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 746130561Sobrien "Page share factor per proc"); 74768765Sobrien 748130561SobrienSYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, 749130561Sobrien "2/4MB page mapping counters"); 750130561Sobrien 751130561Sobrienstatic u_long pmap_pde_demotions; 752130561SobrienSYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, 753130561Sobrien &pmap_pde_demotions, 0, "2/4MB page demotions"); 754130561Sobrien 755130561Sobrienstatic u_long pmap_pde_mappings; 756130561SobrienSYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, 757130561Sobrien &pmap_pde_mappings, 0, "2/4MB page mappings"); 758130561Sobrien 759130561Sobrienstatic u_long pmap_pde_p_failures; 760130561SobrienSYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, 761130561Sobrien &pmap_pde_p_failures, 0, "2/4MB page promotion failures"); 762130561Sobrien 763130561Sobrienstatic u_long pmap_pde_promotions; 764130561SobrienSYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, 765130561Sobrien &pmap_pde_promotions, 0, "2/4MB page promotions"); 766130561Sobrien 767130561Sobrien/*************************************************** 768130561Sobrien * Low level helper routines..... 769130561Sobrien ***************************************************/ 770130561Sobrien 771130561Sobrien/* 77268765Sobrien * Determine the appropriate bits to set in a PTE or PDE for a specified 773130561Sobrien * caching mode. 774130561Sobrien */ 775130561Sobrienint 776130561Sobrienpmap_cache_bits(int mode, boolean_t is_pde) 777130561Sobrien{ 778130561Sobrien int pat_flag, pat_index, cache_bits; 779130561Sobrien 780130561Sobrien /* The PAT bit is different for PTE's and PDE's. */ 781130561Sobrien pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; 782130561Sobrien 783130561Sobrien /* If we don't support PAT, map extended modes to older ones. */ 784130561Sobrien if (!(cpu_feature & CPUID_PAT)) { 785218822Sdim switch (mode) { 786130561Sobrien case PAT_UNCACHEABLE: 787130561Sobrien case PAT_WRITE_THROUGH: 788130561Sobrien case PAT_WRITE_BACK: 789130561Sobrien break; 790130561Sobrien case PAT_UNCACHED: 791130561Sobrien case PAT_WRITE_COMBINING: 792130561Sobrien case PAT_WRITE_PROTECTED: 793130561Sobrien mode = PAT_UNCACHEABLE; 794130561Sobrien break; 795130561Sobrien } 796130561Sobrien } 79768765Sobrien 798130561Sobrien /* Map the caching mode to a PAT index. */ 799130561Sobrien if (pat_works) { 800130561Sobrien switch (mode) { 801130561Sobrien case PAT_UNCACHEABLE: 802130561Sobrien pat_index = 3; 803130561Sobrien break; 80468765Sobrien case PAT_WRITE_THROUGH: 805130561Sobrien pat_index = 1; 806130561Sobrien break; 807130561Sobrien case PAT_WRITE_BACK: 808130561Sobrien pat_index = 0; 809130561Sobrien break; 810130561Sobrien case PAT_UNCACHED: 811130561Sobrien pat_index = 2; 812130561Sobrien break; 813130561Sobrien case PAT_WRITE_COMBINING: 814130561Sobrien pat_index = 5; 815130561Sobrien break; 816130561Sobrien case PAT_WRITE_PROTECTED: 817130561Sobrien pat_index = 4; 818130561Sobrien break; 81968765Sobrien default: 82068765Sobrien panic("Unknown caching mode %d\n", mode); 821130561Sobrien } 822130561Sobrien } else { 82368765Sobrien switch (mode) { 824130561Sobrien case PAT_UNCACHED: 825130561Sobrien case PAT_UNCACHEABLE: 826130561Sobrien case PAT_WRITE_PROTECTED: 82768765Sobrien pat_index = 3; 828130561Sobrien break; 829130561Sobrien case PAT_WRITE_THROUGH: 83068765Sobrien pat_index = 1; 831130561Sobrien break; 83268765Sobrien case PAT_WRITE_BACK: 833130561Sobrien pat_index = 0; 834218822Sdim break; 835130561Sobrien case PAT_WRITE_COMBINING: 836130561Sobrien pat_index = 2; 83768765Sobrien break; 838130561Sobrien default: 839130561Sobrien panic("Unknown caching mode %d\n", mode); 840130561Sobrien } 841130561Sobrien } 84268765Sobrien 84368765Sobrien /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ 844130561Sobrien cache_bits = 0; 84568765Sobrien if (pat_index & 0x4) 846130561Sobrien cache_bits |= pat_flag; 847218822Sdim if (pat_index & 0x2) 848218822Sdim cache_bits |= PG_NC_PCD; 84968765Sobrien if (pat_index & 0x1) 850130561Sobrien cache_bits |= PG_NC_PWT; 85168765Sobrien return (cache_bits); 852130561Sobrien} 85368765Sobrien#ifdef SMP 854130561Sobrien/* 855130561Sobrien * For SMP, these functions have to use the IPI mechanism for coherence. 856130561Sobrien * 857130561Sobrien * N.B.: Before calling any of the following TLB invalidation functions, 858130561Sobrien * the calling processor must ensure that all stores updating a non- 859130561Sobrien * kernel page table are globally performed. Otherwise, another 860130561Sobrien * processor could cache an old, pre-update entry without being 86168765Sobrien * invalidated. This can happen one of two ways: (1) The pmap becomes 86268765Sobrien * active on another processor after its pm_active field is checked by 863130561Sobrien * one of the following functions but before a store updating the page 86468765Sobrien * table is globally performed. (2) The pmap becomes active on another 865130561Sobrien * processor before its pm_active field is checked but due to 866218822Sdim * speculative loads one of the following functions stills reads the 86768765Sobrien * pmap as inactive on the other processor. 868130561Sobrien * 86968765Sobrien * The kernel page table is exempt because its pm_active field is 870130561Sobrien * immutable. The kernel page table is always active on every 871130561Sobrien * processor. 87268765Sobrien */ 873130561Sobrienvoid 874130561Sobrienpmap_invalidate_page(pmap_t pmap, vm_offset_t va) 87568765Sobrien{ 876130561Sobrien u_int cpumask; 87768765Sobrien u_int other_cpus; 87868765Sobrien 879130561Sobrien sched_pin(); 88068765Sobrien if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 881130561Sobrien invlpg(va); 882218822Sdim smp_invlpg(va); 883218822Sdim } else { 884130561Sobrien cpumask = PCPU_GET(cpumask); 885130561Sobrien other_cpus = PCPU_GET(other_cpus); 88668765Sobrien if (pmap->pm_active & cpumask) 887130561Sobrien invlpg(va); 888130561Sobrien if (pmap->pm_active & other_cpus) 88968765Sobrien smp_masked_invlpg(pmap->pm_active & other_cpus, va); 890130561Sobrien } 89168765Sobrien sched_unpin(); 89268765Sobrien} 893130561Sobrien 89468765Sobrienvoid 895130561Sobrienpmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 896218822Sdim{ 897218822Sdim u_int cpumask; 89868765Sobrien u_int other_cpus; 899130561Sobrien vm_offset_t addr; 900130561Sobrien 901130561Sobrien sched_pin(); 902130561Sobrien if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 903130561Sobrien for (addr = sva; addr < eva; addr += PAGE_SIZE) 904130561Sobrien invlpg(addr); 90568765Sobrien smp_invlpg_range(sva, eva); 90668765Sobrien } else { 907130561Sobrien cpumask = PCPU_GET(cpumask); 90868765Sobrien other_cpus = PCPU_GET(other_cpus); 909130561Sobrien if (pmap->pm_active & cpumask) 910218822Sdim for (addr = sva; addr < eva; addr += PAGE_SIZE) 911218822Sdim invlpg(addr); 91268765Sobrien if (pmap->pm_active & other_cpus) 913130561Sobrien smp_masked_invlpg_range(pmap->pm_active & other_cpus, 91468765Sobrien sva, eva); 915130561Sobrien } 916130561Sobrien sched_unpin(); 917130561Sobrien} 918130561Sobrien 91968765Sobrienvoid 92068765Sobrienpmap_invalidate_all(pmap_t pmap) 921130561Sobrien{ 92268765Sobrien u_int cpumask; 923130561Sobrien u_int other_cpus; 924218822Sdim 92568765Sobrien sched_pin(); 926130561Sobrien if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 92768765Sobrien invltlb(); 928130561Sobrien smp_invltlb(); 929130561Sobrien } else { 93068765Sobrien cpumask = PCPU_GET(cpumask); 931130561Sobrien other_cpus = PCPU_GET(other_cpus); 932130561Sobrien if (pmap->pm_active & cpumask) 93368765Sobrien invltlb(); 934130561Sobrien if (pmap->pm_active & other_cpus) 93568765Sobrien smp_masked_invltlb(pmap->pm_active & other_cpus); 93668765Sobrien } 937130561Sobrien sched_unpin(); 93868765Sobrien} 939130561Sobrien 940218822Sdimvoid 94168765Sobrienpmap_invalidate_cache(void) 942130561Sobrien{ 94368765Sobrien 944130561Sobrien sched_pin(); 945130561Sobrien wbinvd(); 94668765Sobrien smp_cache_flush(); 947130561Sobrien sched_unpin(); 948130561Sobrien} 949130561Sobrien#else /* !SMP */ 95068765Sobrien/* 951130561Sobrien * Normal, non-SMP, 486+ invalidation functions. 95268765Sobrien * We inline these within pmap.c for speed. 95368765Sobrien */ 954130561SobrienPMAP_INLINE void 95568765Sobrienpmap_invalidate_page(pmap_t pmap, vm_offset_t va) 956130561Sobrien{ 95768765Sobrien 958130561Sobrien if (pmap == kernel_pmap || pmap->pm_active) 959130561Sobrien invlpg(va); 960218822Sdim} 96168765Sobrien 962218822SdimPMAP_INLINE void 963130561Sobrienpmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 964218822Sdim{ 965130561Sobrien vm_offset_t addr; 966130561Sobrien 96768765Sobrien if (pmap == kernel_pmap || pmap->pm_active) 96868765Sobrien for (addr = sva; addr < eva; addr += PAGE_SIZE) 969130561Sobrien invlpg(addr); 970130561Sobrien} 971130561Sobrien 972130561SobrienPMAP_INLINE void 973130561Sobrienpmap_invalidate_all(pmap_t pmap) 974130561Sobrien{ 975130561Sobrien 976130561Sobrien if (pmap == kernel_pmap || pmap->pm_active) 97768765Sobrien invltlb(); 978130561Sobrien} 979218822Sdim 98068765SobrienPMAP_INLINE void 981130561Sobrienpmap_invalidate_cache(void) 982130561Sobrien{ 983130561Sobrien 98468765Sobrien wbinvd(); 985130561Sobrien} 986130561Sobrien#endif /* !SMP */ 987130561Sobrien 988130561Sobrienvoid 989130561Sobrienpmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) 990130561Sobrien{ 991130561Sobrien 992130561Sobrien KASSERT((sva & PAGE_MASK) == 0, 99368765Sobrien ("pmap_invalidate_cache_range: sva not page-aligned")); 99468765Sobrien KASSERT((eva & PAGE_MASK) == 0, 99568765Sobrien ("pmap_invalidate_cache_range: eva not page-aligned")); 996130561Sobrien 997130561Sobrien if (cpu_feature & CPUID_SS) 99868765Sobrien ; /* If "Self Snoop" is supported, do nothing. */ 999130561Sobrien else if (cpu_feature & CPUID_CLFSH) { 1000218822Sdim 100168765Sobrien /* 1002130561Sobrien * Otherwise, do per-cache line flush. Use the mfence 1003130561Sobrien * instruction to insure that previous stores are 1004130561Sobrien * included in the write-back. The processor 100568765Sobrien * propagates flush to other processors in the cache 1006130561Sobrien * coherence domain. 1007130561Sobrien */ 1008130561Sobrien mfence(); 1009130561Sobrien for (; sva < eva; sva += cpu_clflush_line_size) 1010130561Sobrien clflush(sva); 1011130561Sobrien mfence(); 1012130561Sobrien } else { 1013130561Sobrien 1014130561Sobrien /* 101568765Sobrien * No targeted cache flush methods are supported by CPU, 101668765Sobrien * globally invalidate cache as a last resort. 101768765Sobrien */ 1018130561Sobrien pmap_invalidate_cache(); 1019130561Sobrien } 1020130561Sobrien} 102168765Sobrien 1022130561Sobrien/* 1023130561Sobrien * Are we current address space or kernel? N.B. We return FALSE when 1024130561Sobrien * a pmap's page table is in use because a kernel thread is borrowing 1025130561Sobrien * it. The borrowed page table can change spontaneously, making any 102668765Sobrien * dependence on its continued use subject to a race condition. 1027130561Sobrien */ 1028218822Sdimstatic __inline int 102968765Sobrienpmap_is_current(pmap_t pmap) 1030130561Sobrien{ 103168765Sobrien 1032130561Sobrien return (pmap == kernel_pmap || 1033130561Sobrien (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && 103468765Sobrien (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); 103568765Sobrien} 1036130561Sobrien 103768765Sobrien/* 1038130561Sobrien * If the given pmap is not the current or kernel pmap, the returned pte must 103968765Sobrien * be released by passing it to pmap_pte_release(). 1040130561Sobrien */ 1041130561Sobrienpt_entry_t * 1042130561Sobrienpmap_pte(pmap_t pmap, vm_offset_t va) 1043130561Sobrien{ 1044130561Sobrien pd_entry_t newpf; 1045130561Sobrien pd_entry_t *pde; 1046130561Sobrien 1047130561Sobrien pde = pmap_pde(pmap, va); 1048130561Sobrien if (*pde & PG_PS) 104968765Sobrien return (pde); 1050130561Sobrien if (*pde != 0) { 1051130561Sobrien /* are we current address space or kernel? */ 1052130561Sobrien if (pmap_is_current(pmap)) 1053130561Sobrien return (vtopte(va)); 1054130561Sobrien mtx_lock(&PMAP2mutex); 1055130561Sobrien newpf = *pde & PG_FRAME; 1056130561Sobrien if ((*PMAP2 & PG_FRAME) != newpf) { 105768765Sobrien *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; 1058130561Sobrien pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); 1059130561Sobrien } 1060130561Sobrien return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); 1061130561Sobrien } 1062130561Sobrien return (0); 1063130561Sobrien} 1064130561Sobrien 106568765Sobrien/* 1066130561Sobrien * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte 1067130561Sobrien * being NULL. 106868765Sobrien */ 1069130561Sobrienstatic __inline void 1070218822Sdimpmap_pte_release(pt_entry_t *pte) 1071130561Sobrien{ 1072130561Sobrien 1073130561Sobrien if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) 107468765Sobrien mtx_unlock(&PMAP2mutex); 107568765Sobrien} 107668765Sobrien 1077130561Sobrienstatic __inline void 1078130561Sobrieninvlcaddr(void *caddr) 1079130561Sobrien{ 1080130561Sobrien 108168765Sobrien invlpg((u_int)caddr); 1082130561Sobrien} 1083130561Sobrien 108468765Sobrien/* 1085130561Sobrien * Super fast pmap_pte routine best used when scanning 1086130561Sobrien * the pv lists. This eliminates many coarse-grained 1087130561Sobrien * invltlb calls. Note that many of the pv list 108868765Sobrien * scans are across different pmaps. It is very wasteful 1089130561Sobrien * to do an entire invltlb for checking a single mapping. 1090218822Sdim * 109168765Sobrien * If the given pmap is not the current pmap, vm_page_queue_mtx 1092130561Sobrien * must be held and curthread pinned to a CPU. 1093130561Sobrien */ 109468765Sobrienstatic pt_entry_t * 109577298Sobrienpmap_pte_quick(pmap_t pmap, vm_offset_t va) 109668765Sobrien{ 109768765Sobrien pd_entry_t newpf; 1098130561Sobrien pd_entry_t *pde; 109968765Sobrien 110068765Sobrien pde = pmap_pde(pmap, va); 1101130561Sobrien if (*pde & PG_PS) 110268765Sobrien return (pde); 1103218822Sdim if (*pde != 0) { 1104218822Sdim /* are we current address space or kernel? */ 1105218822Sdim if (pmap_is_current(pmap)) 110668765Sobrien return (vtopte(va)); 1107130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1108130561Sobrien KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 110977298Sobrien newpf = *pde & PG_FRAME; 1110130561Sobrien if ((*PMAP1 & PG_FRAME) != newpf) { 1111130561Sobrien *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M; 1112130561Sobrien#ifdef SMP 1113130561Sobrien PMAP1cpu = PCPU_GET(cpuid); 1114130561Sobrien#endif 1115130561Sobrien invlcaddr(PADDR1); 1116130561Sobrien PMAP1changed++; 1117130561Sobrien } else 1118130561Sobrien#ifdef SMP 1119130561Sobrien if (PMAP1cpu != PCPU_GET(cpuid)) { 1120130561Sobrien PMAP1cpu = PCPU_GET(cpuid); 1121130561Sobrien invlcaddr(PADDR1); 1122130561Sobrien PMAP1changedcpu++; 1123130561Sobrien } else 112468765Sobrien#endif 1125130561Sobrien PMAP1unchanged++; 1126130561Sobrien return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); 1127130561Sobrien } 1128130561Sobrien return (0); 1129130561Sobrien} 1130130561Sobrien 1131130561Sobrien/* 1132130561Sobrien * Routine: pmap_extract 1133130561Sobrien * Function: 1134130561Sobrien * Extract the physical page address associated 1135130561Sobrien * with the given map/virtual_address pair. 1136130561Sobrien */ 1137130561Sobrienvm_paddr_t 1138130561Sobrienpmap_extract(pmap_t pmap, vm_offset_t va) 1139130561Sobrien{ 1140130561Sobrien vm_paddr_t rtval; 1141130561Sobrien pt_entry_t *pte; 1142130561Sobrien pd_entry_t pde; 1143130561Sobrien 1144130561Sobrien rtval = 0; 1145130561Sobrien PMAP_LOCK(pmap); 1146130561Sobrien pde = pmap->pm_pdir[va >> PDRSHIFT]; 1147130561Sobrien if (pde != 0) { 1148130561Sobrien if ((pde & PG_PS) != 0) 114968765Sobrien rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); 1150130561Sobrien else { 1151130561Sobrien pte = pmap_pte(pmap, va); 115268765Sobrien rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); 1153130561Sobrien pmap_pte_release(pte); 1154130561Sobrien } 1155130561Sobrien } 1156130561Sobrien PMAP_UNLOCK(pmap); 1157130561Sobrien return (rtval); 1158130561Sobrien} 1159130561Sobrien 116068765Sobrien/* 1161130561Sobrien * Routine: pmap_extract_and_hold 116268765Sobrien * Function: 116368765Sobrien * Atomically extract and hold the physical page 116468765Sobrien * with the given pmap and virtual address pair 1165130561Sobrien * if that mapping permits the given protection. 1166130561Sobrien */ 1167130561Sobrienvm_page_t 116868765Sobrienpmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1169130561Sobrien{ 1170218822Sdim pd_entry_t pde; 117168765Sobrien pt_entry_t pte; 1172130561Sobrien vm_page_t m; 1173130561Sobrien 117468765Sobrien m = NULL; 1175218822Sdim vm_page_lock_queues(); 1176130561Sobrien PMAP_LOCK(pmap); 117768765Sobrien pde = *pmap_pde(pmap, va); 1178130561Sobrien if (pde != 0) { 1179130561Sobrien if (pde & PG_PS) { 1180130561Sobrien if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { 118168765Sobrien m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | 1182130561Sobrien (va & PDRMASK)); 1183130561Sobrien vm_page_hold(m); 1184130561Sobrien } 118568765Sobrien } else { 1186218822Sdim sched_pin(); 1187130561Sobrien pte = *pmap_pte_quick(pmap, va); 118868765Sobrien if (pte != 0 && 1189130561Sobrien ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { 119068765Sobrien m = PHYS_TO_VM_PAGE(pte & PG_FRAME); 119168765Sobrien vm_page_hold(m); 1192130561Sobrien } 1193130561Sobrien sched_unpin(); 1194130561Sobrien } 1195130561Sobrien } 1196130561Sobrien vm_page_unlock_queues(); 119768765Sobrien PMAP_UNLOCK(pmap); 1198130561Sobrien return (m); 1199130561Sobrien} 1200130561Sobrien 1201130561Sobrien/*************************************************** 120268765Sobrien * Low level mapping routines..... 1203130561Sobrien ***************************************************/ 1204218822Sdim 120568765Sobrien/* 1206130561Sobrien * Add a wired page to the kva. 120768765Sobrien * Note: not SMP coherent. 120868765Sobrien */ 120968765SobrienPMAP_INLINE void 121068765Sobrienpmap_kenter(vm_offset_t va, vm_paddr_t pa) 1211130561Sobrien{ 1212130561Sobrien pt_entry_t *pte; 121368765Sobrien 1214130561Sobrien pte = vtopte(va); 1215130561Sobrien pte_store(pte, pa | PG_RW | PG_V | pgeflag); 1216130561Sobrien} 121768765Sobrien 1218130561Sobrienstatic __inline void 1219130561Sobrienpmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) 1220130561Sobrien{ 122177298Sobrien pt_entry_t *pte; 1222130561Sobrien 1223130561Sobrien pte = vtopte(va); 1224130561Sobrien pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); 1225130561Sobrien} 1226218822Sdim 1227218822Sdim/* 1228130561Sobrien * Remove a page from the kernel pagetables. 1229130561Sobrien * Note: not SMP coherent. 1230130561Sobrien */ 123168765SobrienPMAP_INLINE void 123268765Sobrienpmap_kremove(vm_offset_t va) 1233130561Sobrien{ 1234130561Sobrien pt_entry_t *pte; 1235130561Sobrien 1236130561Sobrien pte = vtopte(va); 123768765Sobrien pte_clear(pte); 1238130561Sobrien} 1239130561Sobrien 124068765Sobrien/* 1241130561Sobrien * Used to map a range of physical addresses into kernel 124268765Sobrien * virtual address space. 1243130561Sobrien * 124468765Sobrien * The value passed in '*virt' is a suggested virtual address for 1245130561Sobrien * the mapping. Architectures which can support a direct-mapped 1246130561Sobrien * physical to virtual region can return the appropriate address 1247130561Sobrien * within that region, leaving '*virt' unchanged. Other 1248130561Sobrien * architectures should map the pages starting at '*virt' and 1249130561Sobrien * update '*virt' with the first usable address after the mapped 1250130561Sobrien * region. 1251130561Sobrien */ 1252130561Sobrienvm_offset_t 1253130561Sobrienpmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1254130561Sobrien{ 125568765Sobrien vm_offset_t va, sva; 125668765Sobrien 125768765Sobrien va = sva = *virt; 1258130561Sobrien while (start < end) { 1259130561Sobrien pmap_kenter(va, start); 1260130561Sobrien va += PAGE_SIZE; 1261218822Sdim start += PAGE_SIZE; 1262218822Sdim } 1263218822Sdim pmap_invalidate_range(kernel_pmap, sva, va); 1264130561Sobrien *virt = va; 126568765Sobrien return (sva); 1266130561Sobrien} 1267218822Sdim 126868765Sobrien 1269130561Sobrien/* 127068765Sobrien * Add a list of wired pages to the kva 1271130561Sobrien * this routine is only used for temporary 1272130561Sobrien * kernel mappings that do not need to have 1273130561Sobrien * page modification or references recorded. 1274130561Sobrien * Note that old mappings are simply written 127568765Sobrien * over. The page *must* be wired. 1276130561Sobrien * Note: SMP coherent. Uses a ranged shootdown IPI. 127777298Sobrien */ 1278130561Sobrienvoid 1279130561Sobrienpmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1280130561Sobrien{ 1281130561Sobrien pt_entry_t *endpte, oldpte, *pte; 128268765Sobrien 128368765Sobrien oldpte = 0; 1284130561Sobrien pte = vtopte(sva); 1285218822Sdim endpte = pte + count; 1286218822Sdim while (pte < endpte) { 1287218822Sdim oldpte |= *pte; 1288218822Sdim pte_store(pte, VM_PAGE_TO_PHYS(*ma) | pgeflag | 1289218822Sdim pmap_cache_bits((*ma)->md.pat_mode, 0) | PG_RW | PG_V); 1290218822Sdim pte++; 1291218822Sdim ma++; 1292218822Sdim } 1293218822Sdim if ((oldpte & PG_V) != 0) 1294218822Sdim pmap_invalidate_range(kernel_pmap, sva, sva + count * 1295218822Sdim PAGE_SIZE); 1296218822Sdim} 1297218822Sdim 129868765Sobrien/* 1299130561Sobrien * This routine tears out page mappings from the 130068765Sobrien * kernel -- it is meant only for temporary mappings. 130168765Sobrien * Note: SMP coherent. Uses a ranged shootdown IPI. 1302130561Sobrien */ 130368765Sobrienvoid 1304130561Sobrienpmap_qremove(vm_offset_t sva, int count) 1305218822Sdim{ 130668765Sobrien vm_offset_t va; 1307130561Sobrien 1308130561Sobrien va = sva; 130968765Sobrien while (count-- > 0) { 1310130561Sobrien pmap_kremove(va); 1311130561Sobrien va += PAGE_SIZE; 1312130561Sobrien } 1313130561Sobrien pmap_invalidate_range(kernel_pmap, sva, va); 1314130561Sobrien} 1315130561Sobrien 1316130561Sobrien/*************************************************** 131768765Sobrien * Page table page management routines..... 1318130561Sobrien ***************************************************/ 131968765Sobrienstatic __inline void 1320130561Sobrienpmap_free_zero_pages(vm_page_t free) 1321218822Sdim{ 1322130561Sobrien vm_page_t m; 1323130561Sobrien 1324130561Sobrien while (free != NULL) { 1325130561Sobrien m = free; 132668765Sobrien free = m->right; 1327130561Sobrien /* Preserve the page's PG_ZERO setting. */ 1328130561Sobrien vm_page_free_toq(m); 1329130561Sobrien } 1330130561Sobrien} 1331130561Sobrien 1332130561Sobrien/* 1333130561Sobrien * Schedule the specified unused page table page to be freed. Specifically, 1334130561Sobrien * add the page to the specified list of pages that will be released to the 133568765Sobrien * physical memory manager after the TLB has been updated. 1336130561Sobrien */ 1337130561Sobrienstatic __inline void 1338130561Sobrienpmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO) 1339130561Sobrien{ 1340130561Sobrien 1341130561Sobrien if (set_PG_ZERO) 1342130561Sobrien m->flags |= PG_ZERO; 1343130561Sobrien else 1344130561Sobrien m->flags &= ~PG_ZERO; 1345130561Sobrien m->right = *free; 1346130561Sobrien *free = m; 1347130561Sobrien} 1348130561Sobrien 134968765Sobrien/* 135068765Sobrien * Inserts the specified page table page into the specified pmap's collection 1351130561Sobrien * of idle page table pages. Each of a pmap's page table pages is responsible 135268765Sobrien * for mapping a distinct range of virtual addresses. The pmap's collection is 1353130561Sobrien * ordered by this virtual address range. 1354218822Sdim */ 1355130561Sobrienstatic void 1356130561Sobrienpmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) 135768765Sobrien{ 1358130561Sobrien vm_page_t root; 135968765Sobrien 1360130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1361130561Sobrien root = pmap->pm_root; 136268765Sobrien if (root == NULL) { 1363130561Sobrien mpte->left = NULL; 136468765Sobrien mpte->right = NULL; 1365130561Sobrien } else { 1366130561Sobrien root = vm_page_splay(mpte->pindex, root); 1367130561Sobrien if (mpte->pindex < root->pindex) { 1368130561Sobrien mpte->left = root->left; 1369130561Sobrien mpte->right = root; 1370130561Sobrien root->left = NULL; 137168765Sobrien } else if (mpte->pindex == root->pindex) 1372130561Sobrien panic("pmap_insert_pt_page: pindex already inserted"); 1373130561Sobrien else { 1374130561Sobrien mpte->right = root->right; 1375130561Sobrien mpte->left = root; 1376130561Sobrien root->right = NULL; 1377130561Sobrien } 1378130561Sobrien } 1379130561Sobrien pmap->pm_root = mpte; 138068765Sobrien} 1381130561Sobrien 1382130561Sobrien/* 1383130561Sobrien * Looks for a page table page mapping the specified virtual address in the 1384130561Sobrien * specified pmap's collection of idle page table pages. Returns NULL if there 1385130561Sobrien * is no page table page corresponding to the specified virtual address. 1386130561Sobrien */ 1387130561Sobrienstatic vm_page_t 1388130561Sobrienpmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) 1389130561Sobrien{ 1390130561Sobrien vm_page_t mpte; 1391130561Sobrien vm_pindex_t pindex = va >> PDRSHIFT; 139268765Sobrien 139368765Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1394130561Sobrien if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { 1395130561Sobrien mpte = vm_page_splay(pindex, mpte); 1396130561Sobrien if ((pmap->pm_root = mpte)->pindex != pindex) 1397130561Sobrien mpte = NULL; 139868765Sobrien } 1399130561Sobrien return (mpte); 1400130561Sobrien} 1401130561Sobrien 1402130561Sobrien/* 140368765Sobrien * Removes the specified page table page from the specified pmap's collection 1404130561Sobrien * of idle page table pages. The specified page table page must be a member of 1405130561Sobrien * the pmap's collection. 1406130561Sobrien */ 1407130561Sobrienstatic void 1408130561Sobrienpmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) 1409130561Sobrien{ 1410130561Sobrien vm_page_t root; 1411130561Sobrien 1412130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1413130561Sobrien if (mpte != pmap->pm_root) 1414130561Sobrien vm_page_splay(mpte->pindex, pmap->pm_root); 1415130561Sobrien if (mpte->left == NULL) 1416130561Sobrien root = mpte->right; 1417130561Sobrien else { 1418130561Sobrien root = vm_page_splay(mpte->pindex, mpte->left); 1419130561Sobrien root->right = mpte->right; 1420130561Sobrien } 1421130561Sobrien pmap->pm_root = root; 1422130561Sobrien} 1423130561Sobrien 1424130561Sobrien/* 1425130561Sobrien * This routine unholds page table pages, and if the hold count 1426130561Sobrien * drops to zero, then it decrements the wire count. 1427130561Sobrien */ 1428130561Sobrienstatic __inline int 1429130561Sobrienpmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) 1430130561Sobrien{ 1431130561Sobrien 1432130561Sobrien --m->wire_count; 1433130561Sobrien if (m->wire_count == 0) 1434130561Sobrien return _pmap_unwire_pte_hold(pmap, m, free); 1435130561Sobrien else 1436130561Sobrien return 0; 1437130561Sobrien} 1438130561Sobrien 1439130561Sobrienstatic int 1440130561Sobrien_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) 1441130561Sobrien{ 1442130561Sobrien vm_offset_t pteva; 1443130561Sobrien 1444130561Sobrien /* 1445130561Sobrien * unmap the page table page 1446130561Sobrien */ 1447130561Sobrien pmap->pm_pdir[m->pindex] = 0; 1448130561Sobrien --pmap->pm_stats.resident_count; 1449130561Sobrien 1450130561Sobrien /* 1451130561Sobrien * This is a release store so that the ordinary store unmapping 1452130561Sobrien * the page table page is globally performed before TLB shoot- 1453130561Sobrien * down is begun. 1454130561Sobrien */ 145568765Sobrien atomic_subtract_rel_int(&cnt.v_wire_count, 1); 1456130561Sobrien 1457218822Sdim /* 1458130561Sobrien * Do an invltlb to make the invalidated mapping 1459130561Sobrien * take effect immediately. 1460130561Sobrien */ 146168765Sobrien pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1462130561Sobrien pmap_invalidate_page(pmap, pteva); 1463130561Sobrien 1464130561Sobrien /* 1465130561Sobrien * Put page on a list so that it is released after 1466130561Sobrien * *ALL* TLB shootdown is done 1467130561Sobrien */ 1468130561Sobrien pmap_add_delayed_free_list(m, free, TRUE); 1469130561Sobrien 147068765Sobrien return 1; 1471130561Sobrien} 1472130561Sobrien 1473130561Sobrien/* 1474130561Sobrien * After removing a page table entry, this routine is used to 1475130561Sobrien * conditionally free the page, and manage the hold/wire counts. 1476130561Sobrien */ 1477130561Sobrienstatic int 147868765Sobrienpmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) 1479130561Sobrien{ 148068765Sobrien pd_entry_t ptepde; 1481130561Sobrien vm_page_t mpte; 1482130561Sobrien 1483130561Sobrien if (va >= VM_MAXUSER_ADDRESS) 1484130561Sobrien return 0; 1485130561Sobrien ptepde = *pmap_pde(pmap, va); 1486130561Sobrien mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); 1487130561Sobrien return pmap_unwire_pte_hold(pmap, mpte, free); 1488130561Sobrien} 1489130561Sobrien 1490130561Sobrienvoid 1491130561Sobrienpmap_pinit0(pmap_t pmap) 1492130561Sobrien{ 1493130561Sobrien 1494130561Sobrien PMAP_LOCK_INIT(pmap); 1495130561Sobrien pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); 149668765Sobrien#ifdef PAE 149768765Sobrien pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); 149868765Sobrien#endif 149968765Sobrien pmap->pm_root = NULL; 1500130561Sobrien pmap->pm_active = 0; 1501130561Sobrien PCPU_SET(curpmap, pmap); 1502130561Sobrien TAILQ_INIT(&pmap->pm_pvchunk); 1503130561Sobrien bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1504130561Sobrien mtx_lock_spin(&allpmaps_lock); 1505130561Sobrien LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1506130561Sobrien mtx_unlock_spin(&allpmaps_lock); 1507130561Sobrien} 1508130561Sobrien 1509130561Sobrien/* 1510130561Sobrien * Initialize a preallocated and zeroed pmap structure, 1511130561Sobrien * such as one in a vmspace structure. 1512218822Sdim */ 1513130561Sobrienint 151468765Sobrienpmap_pinit(pmap_t pmap) 1515130561Sobrien{ 1516218822Sdim vm_page_t m, ptdpg[NPGPTD]; 151768765Sobrien vm_paddr_t pa; 1518130561Sobrien static int color; 1519218822Sdim int i; 152068765Sobrien 1521130561Sobrien PMAP_LOCK_INIT(pmap); 1522130561Sobrien 1523130561Sobrien /* 1524130561Sobrien * No need to allocate page table space yet but we do need a valid 1525130561Sobrien * page directory table. 1526130561Sobrien */ 1527130561Sobrien if (pmap->pm_pdir == NULL) { 1528130561Sobrien pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1529130561Sobrien NBPTD); 1530130561Sobrien 1531130561Sobrien if (pmap->pm_pdir == NULL) { 1532130561Sobrien PMAP_LOCK_DESTROY(pmap); 1533130561Sobrien return (0); 1534130561Sobrien } 1535130561Sobrien#ifdef PAE 1536130561Sobrien pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); 1537104834Sobrien KASSERT(((vm_offset_t)pmap->pm_pdpt & 1538130561Sobrien ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, 1539130561Sobrien ("pmap_pinit: pdpt misaligned")); 1540130561Sobrien KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), 1541130561Sobrien ("pmap_pinit: pdpt above 4g")); 1542130561Sobrien#endif 1543130561Sobrien pmap->pm_root = NULL; 1544130561Sobrien } 1545130561Sobrien KASSERT(pmap->pm_root == NULL, 1546130561Sobrien ("pmap_pinit: pmap has reserved page table page(s)")); 1547130561Sobrien 1548130561Sobrien /* 1549130561Sobrien * allocate the page directory page(s) 1550130561Sobrien */ 1551130561Sobrien for (i = 0; i < NPGPTD;) { 1552130561Sobrien m = vm_page_alloc(NULL, color++, 1553130561Sobrien VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1554130561Sobrien VM_ALLOC_ZERO); 1555130561Sobrien if (m == NULL) 1556130561Sobrien VM_WAIT; 1557130561Sobrien else { 1558130561Sobrien ptdpg[i++] = m; 1559130561Sobrien } 1560130561Sobrien } 1561130561Sobrien 1562130561Sobrien pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); 1563130561Sobrien 1564130561Sobrien for (i = 0; i < NPGPTD; i++) { 1565130561Sobrien if ((ptdpg[i]->flags & PG_ZERO) == 0) 1566130561Sobrien bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); 1567130561Sobrien } 1568218822Sdim 1569130561Sobrien mtx_lock_spin(&allpmaps_lock); 1570130561Sobrien LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1571130561Sobrien mtx_unlock_spin(&allpmaps_lock); 1572130561Sobrien /* Wire in kernel global address entries. */ 1573130561Sobrien bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); 1574130561Sobrien 1575130561Sobrien /* install self-referential address mapping entry(s) */ 1576130561Sobrien for (i = 0; i < NPGPTD; i++) { 1577130561Sobrien pa = VM_PAGE_TO_PHYS(ptdpg[i]); 1578130561Sobrien pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; 1579130561Sobrien#ifdef PAE 1580130561Sobrien pmap->pm_pdpt[i] = pa | PG_V; 1581130561Sobrien#endif 1582130561Sobrien } 1583130561Sobrien 1584130561Sobrien pmap->pm_active = 0; 1585130561Sobrien TAILQ_INIT(&pmap->pm_pvchunk); 1586130561Sobrien bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1587104834Sobrien 158868765Sobrien return (1); 1589218822Sdim} 159077298Sobrien 1591130561Sobrien/* 1592130561Sobrien * this routine is called if the page table page is not 1593130561Sobrien * mapped correctly. 1594130561Sobrien */ 159577298Sobrienstatic vm_page_t 1596130561Sobrien_pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) 1597130561Sobrien{ 1598130561Sobrien vm_paddr_t ptepa; 1599130561Sobrien vm_page_t m; 1600218822Sdim 1601218822Sdim KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1602218822Sdim (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1603218822Sdim ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1604130561Sobrien 1605130561Sobrien /* 160677298Sobrien * Allocate a page table page. 160777298Sobrien */ 1608130561Sobrien if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1609130561Sobrien VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 161068765Sobrien if (flags & M_WAITOK) { 161168765Sobrien PMAP_UNLOCK(pmap); 1612130561Sobrien vm_page_unlock_queues(); 1613130561Sobrien VM_WAIT; 161468765Sobrien vm_page_lock_queues(); 1615130561Sobrien PMAP_LOCK(pmap); 161668765Sobrien } 1617130561Sobrien 161868765Sobrien /* 1619130561Sobrien * Indicate the need to retry. While waiting, the page table 1620130561Sobrien * page may have been allocated. 162168765Sobrien */ 1622130561Sobrien return (NULL); 1623130561Sobrien } 162468765Sobrien if ((m->flags & PG_ZERO) == 0) 1625130561Sobrien pmap_zero_page(m); 1626218822Sdim 1627130561Sobrien /* 1628130561Sobrien * Map the pagetable page into the process address space, if 1629130561Sobrien * it isn't already there. 163068765Sobrien */ 1631130561Sobrien 1632218822Sdim pmap->pm_stats.resident_count++; 1633130561Sobrien 163468765Sobrien ptepa = VM_PAGE_TO_PHYS(m); 1635218822Sdim pmap->pm_pdir[ptepindex] = 1636218822Sdim (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); 1637130561Sobrien 1638218822Sdim return m; 163968765Sobrien} 1640130561Sobrien 1641130561Sobrienstatic vm_page_t 164268765Sobrienpmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1643218822Sdim{ 1644130561Sobrien unsigned ptepindex; 164568765Sobrien pd_entry_t ptepa; 1646130561Sobrien vm_page_t m; 1647130561Sobrien 1648130561Sobrien KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1649130561Sobrien (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1650130561Sobrien ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1651130561Sobrien 1652130561Sobrien /* 1653130561Sobrien * Calculate pagetable page index 1654130561Sobrien */ 1655130561Sobrien ptepindex = va >> PDRSHIFT; 1656130561Sobrienretry: 1657130561Sobrien /* 1658218822Sdim * Get the page directory entry 1659130561Sobrien */ 1660130561Sobrien ptepa = pmap->pm_pdir[ptepindex]; 1661104834Sobrien 1662130561Sobrien /* 1663130561Sobrien * This supports switching from a 4MB page to a 1664130561Sobrien * normal 4K page. 1665130561Sobrien */ 1666104834Sobrien if (ptepa & PG_PS) { 1667218822Sdim (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va); 166868765Sobrien ptepa = pmap->pm_pdir[ptepindex]; 1669130561Sobrien } 1670130561Sobrien 1671130561Sobrien /* 167268765Sobrien * If the page table page is mapped, we just increment the 1673218822Sdim * hold count, and activate it. 1674130561Sobrien */ 1675130561Sobrien if (ptepa) { 1676130561Sobrien m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); 1677130561Sobrien m->wire_count++; 1678130561Sobrien } else { 1679130561Sobrien /* 1680130561Sobrien * Here if the pte page isn't mapped, or if it has 1681130561Sobrien * been deallocated. 1682130561Sobrien */ 1683130561Sobrien m = _pmap_allocpte(pmap, ptepindex, flags); 1684130561Sobrien if (m == NULL && (flags & M_WAITOK)) 1685130561Sobrien goto retry; 1686130561Sobrien } 1687218822Sdim return (m); 1688130561Sobrien} 1689130561Sobrien 169068765Sobrien 1691130561Sobrien/*************************************************** 1692130561Sobrien* Pmap allocation/deallocation routines. 1693130561Sobrien ***************************************************/ 169468765Sobrien 1695218822Sdim#ifdef SMP 1696130561Sobrien/* 1697130561Sobrien * Deal with a SMP shootdown of other users of the pmap that we are 1698130561Sobrien * trying to dispose of. This can be a bit hairy. 1699130561Sobrien */ 1700130561Sobrienstatic cpumask_t *lazymask; 1701130561Sobrienstatic u_int lazyptd; 1702130561Sobrienstatic volatile u_int lazywait; 1703130561Sobrien 1704130561Sobrienvoid pmap_lazyfix_action(void); 1705130561Sobrien 1706130561Sobrienvoid 1707130561Sobrienpmap_lazyfix_action(void) 1708130561Sobrien{ 1709218822Sdim cpumask_t mymask = PCPU_GET(cpumask); 1710130561Sobrien 1711130561Sobrien#ifdef COUNT_IPIS 171268765Sobrien (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; 1713130561Sobrien#endif 1714130561Sobrien if (rcr3() == lazyptd) 171568765Sobrien load_cr3(PCPU_GET(curpcb)->pcb_cr3); 171668765Sobrien atomic_clear_int(lazymask, mymask); 171768765Sobrien atomic_store_rel_int(&lazywait, 1); 1718130561Sobrien} 1719130561Sobrien 1720130561Sobrienstatic void 1721130561Sobrienpmap_lazyfix_self(cpumask_t mymask) 1722130561Sobrien{ 1723130561Sobrien 1724130561Sobrien if (rcr3() == lazyptd) 1725130561Sobrien load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1726130561Sobrien atomic_clear_int(lazymask, mymask); 1727130561Sobrien} 1728130561Sobrien 1729130561Sobrien 1730130561Sobrienstatic void 1731130561Sobrienpmap_lazyfix(pmap_t pmap) 173277298Sobrien{ 1733130561Sobrien cpumask_t mymask, mask; 1734130561Sobrien u_int spins; 1735130561Sobrien 173677298Sobrien while ((mask = pmap->pm_active) != 0) { 1737130561Sobrien spins = 50000000; 1738130561Sobrien mask = mask & -mask; /* Find least significant set bit */ 1739130561Sobrien mtx_lock_spin(&smp_ipi_mtx); 174077298Sobrien#ifdef PAE 1741218822Sdim lazyptd = vtophys(pmap->pm_pdpt); 1742130561Sobrien#else 1743218822Sdim lazyptd = vtophys(pmap->pm_pdir); 1744218822Sdim#endif 1745218822Sdim mymask = PCPU_GET(cpumask); 1746218822Sdim if (mask == mymask) { 1747218822Sdim lazymask = &pmap->pm_active; 1748218822Sdim pmap_lazyfix_self(mymask); 1749130561Sobrien } else { 1750218822Sdim atomic_store_rel_int((u_int *)&lazymask, 1751130561Sobrien (u_int)&pmap->pm_active); 1752130561Sobrien atomic_store_rel_int(&lazywait, 0); 1753218822Sdim ipi_selected(mask, IPI_LAZYPMAP); 1754130561Sobrien while (lazywait == 0) { 1755218822Sdim ia32_pause(); 1756218822Sdim if (--spins == 0) 1757130561Sobrien break; 1758130561Sobrien } 1759130561Sobrien } 1760218822Sdim mtx_unlock_spin(&smp_ipi_mtx); 1761218822Sdim if (spins == 0) 1762130561Sobrien printf("pmap_lazyfix: spun for 50000000\n"); 1763130561Sobrien } 1764218822Sdim} 1765218822Sdim 1766218822Sdim#else /* SMP */ 1767218822Sdim 1768130561Sobrien/* 1769130561Sobrien * Cleaning up on uniprocessor is easy. For various reasons, we're 177077298Sobrien * unlikely to have to even execute this code, including the fact 1771130561Sobrien * that the cleanup is deferred until the parent does a wait(2), which 1772130561Sobrien * means that another userland process has run. 1773218822Sdim */ 1774130561Sobrienstatic void 1775130561Sobrienpmap_lazyfix(pmap_t pmap) 1776130561Sobrien{ 1777130561Sobrien u_int cr3; 177877298Sobrien 1779130561Sobrien cr3 = vtophys(pmap->pm_pdir); 1780130561Sobrien if (cr3 == rcr3()) { 1781130561Sobrien load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1782130561Sobrien pmap->pm_active &= ~(PCPU_GET(cpumask)); 1783130561Sobrien } 1784130561Sobrien} 178577298Sobrien#endif /* SMP */ 1786130561Sobrien 1787130561Sobrien/* 1788130561Sobrien * Release any resources held by the given physical map. 1789130561Sobrien * Called when a pmap initialized by pmap_pinit is being released. 1790130561Sobrien * Should only be called if the map contains no valid mappings. 1791130561Sobrien */ 1792130561Sobrienvoid 1793130561Sobrienpmap_release(pmap_t pmap) 1794130561Sobrien{ 1795130561Sobrien vm_page_t m, ptdpg[NPGPTD]; 179677298Sobrien int i; 1797130561Sobrien 1798130561Sobrien KASSERT(pmap->pm_stats.resident_count == 0, 1799130561Sobrien ("pmap_release: pmap resident count %ld != 0", 1800130561Sobrien pmap->pm_stats.resident_count)); 1801130561Sobrien KASSERT(pmap->pm_root == NULL, 1802130561Sobrien ("pmap_release: pmap has reserved page table page(s)")); 1803218822Sdim 1804130561Sobrien pmap_lazyfix(pmap); 1805130561Sobrien mtx_lock_spin(&allpmaps_lock); 180677298Sobrien LIST_REMOVE(pmap, pm_list); 180777298Sobrien mtx_unlock_spin(&allpmaps_lock); 1808130561Sobrien 180977298Sobrien for (i = 0; i < NPGPTD; i++) 1810130561Sobrien ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] & 1811130561Sobrien PG_FRAME); 1812130561Sobrien 1813130561Sobrien bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * 1814130561Sobrien sizeof(*pmap->pm_pdir)); 1815130561Sobrien 1816130561Sobrien pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); 1817130561Sobrien 1818130561Sobrien for (i = 0; i < NPGPTD; i++) { 1819130561Sobrien m = ptdpg[i]; 1820130561Sobrien#ifdef PAE 1821130561Sobrien KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), 182277298Sobrien ("pmap_release: got wrong ptd page")); 1823130561Sobrien#endif 1824130561Sobrien m->wire_count--; 1825130561Sobrien atomic_subtract_int(&cnt.v_wire_count, 1); 1826130561Sobrien vm_page_free_zero(m); 1827130561Sobrien } 182877298Sobrien PMAP_LOCK_DESTROY(pmap); 1829130561Sobrien} 1830130561Sobrien 1831130561Sobrienstatic int 183277298Sobrienkvm_size(SYSCTL_HANDLER_ARGS) 1833130561Sobrien{ 1834130561Sobrien unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1835130561Sobrien 1836130561Sobrien return sysctl_handle_long(oidp, &ksize, 0, req); 1837130561Sobrien} 1838130561SobrienSYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 183977298Sobrien 0, 0, kvm_size, "IU", "Size of KVM"); 1840130561Sobrien 1841130561Sobrienstatic int 1842130561Sobrienkvm_free(SYSCTL_HANDLER_ARGS) 184377298Sobrien{ 1844130561Sobrien unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1845130561Sobrien 1846130561Sobrien return sysctl_handle_long(oidp, &kfree, 0, req); 184777298Sobrien} 1848130561SobrienSYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1849130561Sobrien 0, 0, kvm_free, "IU", "Amount of KVM free"); 1850130561Sobrien 1851130561Sobrien/* 1852130561Sobrien * grow the number of kernel page table entries, if needed 1853130561Sobrien */ 1854130561Sobrienvoid 1855130561Sobrienpmap_growkernel(vm_offset_t addr) 1856130561Sobrien{ 1857130561Sobrien struct pmap *pmap; 1858130561Sobrien vm_paddr_t ptppaddr; 1859130561Sobrien vm_page_t nkpg; 1860130561Sobrien pd_entry_t newpdir; 186177298Sobrien pt_entry_t *pde; 1862130561Sobrien boolean_t updated_PTD; 1863130561Sobrien 1864130561Sobrien mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1865130561Sobrien if (kernel_vm_end == 0) { 1866130561Sobrien kernel_vm_end = KERNBASE; 186777298Sobrien nkpt = 0; 1868130561Sobrien while (pdir_pde(PTD, kernel_vm_end)) { 1869130561Sobrien kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1870130561Sobrien nkpt++; 1871130561Sobrien if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1872130561Sobrien kernel_vm_end = kernel_map->max_offset; 1873130561Sobrien break; 1874130561Sobrien } 1875130561Sobrien } 1876130561Sobrien } 1877130561Sobrien addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1878130561Sobrien if (addr - 1 >= kernel_map->max_offset) 1879130561Sobrien addr = kernel_map->max_offset; 1880130561Sobrien while (kernel_vm_end < addr) { 1881130561Sobrien if (pdir_pde(PTD, kernel_vm_end)) { 1882130561Sobrien kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1883130561Sobrien if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1884130561Sobrien kernel_vm_end = kernel_map->max_offset; 1885130561Sobrien break; 1886130561Sobrien } 1887130561Sobrien continue; 1888130561Sobrien } 1889130561Sobrien 1890130561Sobrien nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, 1891130561Sobrien VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1892130561Sobrien VM_ALLOC_ZERO); 1893130561Sobrien if (nkpg == NULL) 1894130561Sobrien panic("pmap_growkernel: no memory to grow kernel"); 1895130561Sobrien 1896130561Sobrien nkpt++; 1897130561Sobrien 1898130561Sobrien if ((nkpg->flags & PG_ZERO) == 0) 1899130561Sobrien pmap_zero_page(nkpg); 1900130561Sobrien ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1901130561Sobrien newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1902130561Sobrien pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir; 1903130561Sobrien 1904130561Sobrien updated_PTD = FALSE; 1905130561Sobrien mtx_lock_spin(&allpmaps_lock); 1906130561Sobrien LIST_FOREACH(pmap, &allpmaps, pm_list) { 1907130561Sobrien if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & 1908130561Sobrien PG_FRAME)) 1909130561Sobrien updated_PTD = TRUE; 1910130561Sobrien pde = pmap_pde(pmap, kernel_vm_end); 1911130561Sobrien pde_store(pde, newpdir); 1912130561Sobrien } 1913130561Sobrien mtx_unlock_spin(&allpmaps_lock); 1914130561Sobrien KASSERT(updated_PTD, 1915130561Sobrien ("pmap_growkernel: current page table is not in allpmaps")); 1916130561Sobrien kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1917130561Sobrien if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1918130561Sobrien kernel_vm_end = kernel_map->max_offset; 1919130561Sobrien break; 1920130561Sobrien } 1921130561Sobrien } 1922130561Sobrien} 1923130561Sobrien 1924130561Sobrien 1925130561Sobrien/*************************************************** 1926130561Sobrien * page management routines. 1927130561Sobrien ***************************************************/ 1928130561Sobrien 1929130561SobrienCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1930130561SobrienCTASSERT(_NPCM == 11); 193177298Sobrien 1932130561Sobrienstatic __inline struct pv_chunk * 1933130561Sobrienpv_to_chunk(pv_entry_t pv) 193477298Sobrien{ 193577298Sobrien 1936130561Sobrien return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); 193777298Sobrien} 193877298Sobrien 1939130561Sobrien#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 194077298Sobrien 1941130561Sobrien#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 1942218822Sdim#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 1943218822Sdim 194477298Sobrienstatic uint32_t pc_freemask[11] = { 1945130561Sobrien PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 194677298Sobrien PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1947130561Sobrien PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1948130561Sobrien PC_FREE0_9, PC_FREE10 194977298Sobrien}; 1950130561Sobrien 195177298SobrienSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1952130561Sobrien "Current number of pv entries"); 1953130561Sobrien 1954130561Sobrien#ifdef PV_STATS 1955130561Sobrienstatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1956130561Sobrien 1957130561SobrienSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1958130561Sobrien "Current number of pv entry chunks"); 1959130561SobrienSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1960130561Sobrien "Current number of pv entry chunks allocated"); 1961130561SobrienSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1962130561Sobrien "Current number of pv entry chunks frees"); 1963130561SobrienSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1964130561Sobrien "Number of times tried to get a chunk page but failed."); 1965130561Sobrien 1966130561Sobrienstatic long pv_entry_frees, pv_entry_allocs; 1967130561Sobrienstatic int pv_entry_spare; 1968130561Sobrien 1969130561SobrienSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1970130561Sobrien "Current number of pv entry frees"); 1971130561SobrienSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1972130561Sobrien "Current number of pv entry allocs"); 1973130561SobrienSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 197477298Sobrien "Current number of spare pv entries"); 1975130561Sobrien 1976130561Sobrienstatic int pmap_collect_inactive, pmap_collect_active; 1977130561Sobrien 1978130561SobrienSYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, 1979130561Sobrien "Current number times pmap_collect called on inactive queue"); 1980130561SobrienSYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, 198177298Sobrien "Current number times pmap_collect called on active queue"); 198277298Sobrien#endif 1983130561Sobrien 198477298Sobrien/* 198577298Sobrien * We are in a serious low memory condition. Resort to 1986130561Sobrien * drastic measures to free some pages so we can allocate 198768765Sobrien * another pv entry chunk. This is normally called to 1988130561Sobrien * unmap inactive pages, and if necessary, active pages. 1989218822Sdim */ 1990130561Sobrienstatic void 1991130561Sobrienpmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) 199268765Sobrien{ 1993218822Sdim struct md_page *pvh; 1994130561Sobrien pd_entry_t *pde; 1995130561Sobrien pmap_t pmap; 1996130561Sobrien pt_entry_t *pte, tpte; 1997130561Sobrien pv_entry_t next_pv, pv; 1998130561Sobrien vm_offset_t va; 1999130561Sobrien vm_page_t m, free; 2000130561Sobrien 2001130561Sobrien sched_pin(); 2002218822Sdim TAILQ_FOREACH(m, &vpq->pl, pageq) { 2003130561Sobrien if (m->hold_count || m->busy) 2004130561Sobrien continue; 2005130561Sobrien TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 200677298Sobrien va = pv->pv_va; 2007218822Sdim pmap = PV_PMAP(pv); 200877298Sobrien /* Avoid deadlock and lock recursion. */ 2009130561Sobrien if (pmap > locked_pmap) 2010218822Sdim PMAP_LOCK(pmap); 2011130561Sobrien else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 2012130561Sobrien continue; 2013130561Sobrien pmap->pm_stats.resident_count--; 2014130561Sobrien pde = pmap_pde(pmap, va); 2015218822Sdim KASSERT((*pde & PG_PS) == 0, ("pmap_collect: found" 201677298Sobrien " a 4mpage in page %p's pv list", m)); 2017218822Sdim pte = pmap_pte_quick(pmap, va); 2018218822Sdim tpte = pte_load_clear(pte); 2019218822Sdim KASSERT((tpte & PG_W) == 0, 2020218822Sdim ("pmap_collect: wired pte %#jx", (uintmax_t)tpte)); 2021218822Sdim if (tpte & PG_A) 2022218822Sdim vm_page_flag_set(m, PG_REFERENCED); 2023218822Sdim if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2024218822Sdim vm_page_dirty(m); 2025218822Sdim free = NULL; 2026130561Sobrien pmap_unuse_pt(pmap, va, &free); 2027130561Sobrien pmap_invalidate_page(pmap, va); 2028130561Sobrien pmap_free_zero_pages(free); 2029130561Sobrien TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2030130561Sobrien if (TAILQ_EMPTY(&m->md.pv_list)) { 2031130561Sobrien pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 203277298Sobrien if (TAILQ_EMPTY(&pvh->pv_list)) 2033130561Sobrien vm_page_flag_clear(m, PG_WRITEABLE); 2034130561Sobrien } 2035130561Sobrien free_pv_entry(pmap, pv); 2036130561Sobrien if (pmap != locked_pmap) 2037130561Sobrien PMAP_UNLOCK(pmap); 2038130561Sobrien } 2039130561Sobrien } 2040130561Sobrien sched_unpin(); 2041130561Sobrien} 2042130561Sobrien 2043130561Sobrien 2044130561Sobrien/* 2045130561Sobrien * free the pv_entry back to the free list 2046130561Sobrien */ 2047130561Sobrienstatic void 2048130561Sobrienfree_pv_entry(pmap_t pmap, pv_entry_t pv) 2049130561Sobrien{ 2050130561Sobrien vm_page_t m; 2051130561Sobrien struct pv_chunk *pc; 205277298Sobrien int idx, field, bit; 2053130561Sobrien 2054130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2055130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2056130561Sobrien PV_STAT(pv_entry_frees++); 2057130561Sobrien PV_STAT(pv_entry_spare++); 205868765Sobrien pv_entry_count--; 2059130561Sobrien pc = pv_to_chunk(pv); 2060130561Sobrien idx = pv - &pc->pc_pventry[0]; 2061130561Sobrien field = idx / 32; 2062130561Sobrien bit = idx % 32; 2063130561Sobrien pc->pc_map[field] |= 1ul << bit; 2064130561Sobrien /* move to head of list */ 2065130561Sobrien TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2066130561Sobrien for (idx = 0; idx < _NPCM; idx++) 206768765Sobrien if (pc->pc_map[idx] != pc_freemask[idx]) { 2068130561Sobrien TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2069130561Sobrien return; 2070130561Sobrien } 2071130561Sobrien PV_STAT(pv_entry_spare -= _NPCPV); 2072130561Sobrien PV_STAT(pc_chunk_count--); 2073130561Sobrien PV_STAT(pc_chunk_frees++); 2074218822Sdim /* entire chunk is free, return it */ 207568765Sobrien m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2076130561Sobrien pmap_qremove((vm_offset_t)pc, 1); 2077130561Sobrien vm_page_unwire(m, 0); 207868765Sobrien vm_page_free(m); 2079130561Sobrien pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 2080130561Sobrien} 2081130561Sobrien 208268765Sobrien/* 2083130561Sobrien * get a new pv_entry, allocating a block from the system 2084218822Sdim * when needed. 2085130561Sobrien */ 2086130561Sobrienstatic pv_entry_t 2087130561Sobrienget_pv_entry(pmap_t pmap, int try) 2088130561Sobrien{ 2089218822Sdim static const struct timeval printinterval = { 60, 0 }; 2090130561Sobrien static struct timeval lastprint; 2091130561Sobrien static vm_pindex_t colour; 2092130561Sobrien struct vpgqueues *pq; 2093130561Sobrien int bit, field; 2094130561Sobrien pv_entry_t pv; 2095130561Sobrien struct pv_chunk *pc; 209668765Sobrien vm_page_t m; 2097130561Sobrien 2098130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2099130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2100130561Sobrien PV_STAT(pv_entry_allocs++); 210189857Sobrien pv_entry_count++; 2102130561Sobrien if (pv_entry_count > pv_entry_high_water) 2103130561Sobrien if (ratecheck(&lastprint, &printinterval)) 210489857Sobrien printf("Approaching the limit on PV entries, consider " 2105130561Sobrien "increasing either the vm.pmap.shpgperproc or the " 2106130561Sobrien "vm.pmap.pv_entry_max tunable.\n"); 2107130561Sobrien pq = NULL; 2108130561Sobrienretry: 210968765Sobrien pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2110130561Sobrien if (pc != NULL) { 211168765Sobrien for (field = 0; field < _NPCM; field++) { 2112130561Sobrien if (pc->pc_map[field]) { 2113130561Sobrien bit = bsfl(pc->pc_map[field]); 2114130561Sobrien break; 2115130561Sobrien } 211668765Sobrien } 2117218822Sdim if (field < _NPCM) { 2118130561Sobrien pv = &pc->pc_pventry[field * 32 + bit]; 211968765Sobrien pc->pc_map[field] &= ~(1ul << bit); 2120130561Sobrien /* If this was the last item, move it to tail */ 2121130561Sobrien for (field = 0; field < _NPCM; field++) 2122130561Sobrien if (pc->pc_map[field] != 0) { 212368765Sobrien PV_STAT(pv_entry_spare--); 2124130561Sobrien return (pv); /* not full, return */ 212568765Sobrien } 2126130561Sobrien TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2127218822Sdim TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2128130561Sobrien PV_STAT(pv_entry_spare--); 2129130561Sobrien return (pv); 2130130561Sobrien } 2131130561Sobrien } 213268765Sobrien /* 2133218822Sdim * Access to the ptelist "pv_vafree" is synchronized by the page 2134130561Sobrien * queues lock. If "pv_vafree" is currently non-empty, it will 213568765Sobrien * remain non-empty until pmap_ptelist_alloc() completes. 2136130561Sobrien */ 213768765Sobrien if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq == 2138130561Sobrien &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | 2139130561Sobrien VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 2140130561Sobrien if (try) { 2141130561Sobrien pv_entry_count--; 2142130561Sobrien PV_STAT(pc_chunk_tryfail++); 2143130561Sobrien return (NULL); 2144130561Sobrien } 2145130561Sobrien /* 2146130561Sobrien * Reclaim pv entries: At first, destroy mappings to 2147130561Sobrien * inactive pages. After that, if a pv chunk entry 2148130561Sobrien * is still needed, destroy mappings to active pages. 2149218822Sdim */ 2150218822Sdim if (pq == NULL) { 2151218822Sdim PV_STAT(pmap_collect_inactive++); 2152218822Sdim pq = &vm_page_queues[PQ_INACTIVE]; 215368765Sobrien } else if (pq == &vm_page_queues[PQ_INACTIVE]) { 2154130561Sobrien PV_STAT(pmap_collect_active++); 2155130561Sobrien pq = &vm_page_queues[PQ_ACTIVE]; 2156130561Sobrien } else 2157130561Sobrien panic("get_pv_entry: increase vm.pmap.shpgperproc"); 2158218822Sdim pmap_collect(pmap, pq); 2159218822Sdim goto retry; 216068765Sobrien } 2161218822Sdim PV_STAT(pc_chunk_count++); 2162218822Sdim PV_STAT(pc_chunk_allocs++); 2163218822Sdim colour++; 2164218822Sdim pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 2165218822Sdim pmap_qenter((vm_offset_t)pc, &m, 1); 2166218822Sdim pc->pc_pmap = pmap; 2167130561Sobrien pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 2168130561Sobrien for (field = 1; field < _NPCM; field++) 216968765Sobrien pc->pc_map[field] = pc_freemask[field]; 2170130561Sobrien pv = &pc->pc_pventry[0]; 2171130561Sobrien TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2172130561Sobrien PV_STAT(pv_entry_spare += _NPCPV - 1); 217377298Sobrien return (pv); 2174130561Sobrien} 2175218822Sdim 2176130561Sobrienstatic __inline pv_entry_t 2177130561Sobrienpmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 217868765Sobrien{ 2179218822Sdim pv_entry_t pv; 2180130561Sobrien 218168765Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2182130561Sobrien TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 2183130561Sobrien if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2184130561Sobrien TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 2185130561Sobrien break; 2186130561Sobrien } 2187130561Sobrien } 2188130561Sobrien return (pv); 2189130561Sobrien} 2190130561Sobrien 219168765Sobrienstatic void 2192218822Sdimpmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 2193130561Sobrien{ 219468765Sobrien struct md_page *pvh; 2195130561Sobrien pv_entry_t pv; 219668765Sobrien vm_offset_t va_last; 2197130561Sobrien vm_page_t m; 2198130561Sobrien 219968765Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2200130561Sobrien KASSERT((pa & PDRMASK) == 0, 220168765Sobrien ("pmap_pv_demote_pde: pa is not 4mpage aligned")); 2202130561Sobrien 2203218822Sdim /* 2204130561Sobrien * Transfer the 4mpage's pv entry for this mapping to the first 2205130561Sobrien * page's pv list. 2206130561Sobrien */ 2207130561Sobrien pvh = pa_to_pvh(pa); 2208130561Sobrien va = trunc_4mpage(va); 2209130561Sobrien pv = pmap_pvh_remove(pvh, pmap, va); 2210130561Sobrien KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); 2211130561Sobrien m = PHYS_TO_VM_PAGE(pa); 2212130561Sobrien TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2213130561Sobrien /* Instantiate the remaining NPTEPG - 1 pv entries. */ 2214218822Sdim va_last = va + NBPDR - PAGE_SIZE; 2215130561Sobrien do { 2216130561Sobrien m++; 2217130561Sobrien KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 2218130561Sobrien ("pmap_pv_demote_pde: page %p is not managed", m)); 2219130561Sobrien va += PAGE_SIZE; 2220130561Sobrien pmap_insert_entry(pmap, va, m); 2221130561Sobrien } while (va < va_last); 2222130561Sobrien} 2223130561Sobrien 2224130561Sobrienstatic void 2225130561Sobrienpmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 2226130561Sobrien{ 2227130561Sobrien struct md_page *pvh; 2228130561Sobrien pv_entry_t pv; 2229130561Sobrien vm_offset_t va_last; 2230130561Sobrien vm_page_t m; 2231130561Sobrien 2232130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2233130561Sobrien KASSERT((pa & PDRMASK) == 0, 2234130561Sobrien ("pmap_pv_promote_pde: pa is not 4mpage aligned")); 2235130561Sobrien 223668765Sobrien /* 223768765Sobrien * Transfer the first page's pv entry for this mapping to the 223868765Sobrien * 4mpage's pv list. Aside from avoiding the cost of a call 2239130561Sobrien * to get_pv_entry(), a transfer avoids the possibility that 2240130561Sobrien * get_pv_entry() calls pmap_collect() and that pmap_collect() 2241130561Sobrien * removes one of the mappings that is being promoted. 224268765Sobrien */ 224368765Sobrien m = PHYS_TO_VM_PAGE(pa); 2244130561Sobrien va = trunc_4mpage(va); 2245130561Sobrien pv = pmap_pvh_remove(&m->md, pmap, va); 2246130561Sobrien KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); 2247130561Sobrien pvh = pa_to_pvh(pa); 224868765Sobrien TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 2249130561Sobrien /* Free the remaining NPTEPG - 1 pv entries. */ 2250218822Sdim va_last = va + NBPDR - PAGE_SIZE; 225168765Sobrien do { 2252130561Sobrien m++; 225368765Sobrien va += PAGE_SIZE; 2254130561Sobrien pmap_pvh_free(&m->md, pmap, va); 2255130561Sobrien } while (va < va_last); 2256130561Sobrien} 2257130561Sobrien 2258130561Sobrienstatic void 2259218822Sdimpmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2260130561Sobrien{ 2261130561Sobrien pv_entry_t pv; 226268765Sobrien 2263130561Sobrien pv = pmap_pvh_remove(pvh, pmap, va); 2264130561Sobrien KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2265130561Sobrien free_pv_entry(pmap, pv); 2266130561Sobrien} 2267130561Sobrien 2268130561Sobrienstatic void 2269130561Sobrienpmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 2270130561Sobrien{ 2271130561Sobrien struct md_page *pvh; 2272130561Sobrien 2273130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2274130561Sobrien pmap_pvh_free(&m->md, pmap, va); 2275130561Sobrien if (TAILQ_EMPTY(&m->md.pv_list)) { 2276130561Sobrien pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2277130561Sobrien if (TAILQ_EMPTY(&pvh->pv_list)) 2278130561Sobrien vm_page_flag_clear(m, PG_WRITEABLE); 2279130561Sobrien } 2280130561Sobrien} 2281130561Sobrien 2282218822Sdim/* 2283130561Sobrien * Create a pv entry for page at pa for 2284130561Sobrien * (pmap, va). 2285130561Sobrien */ 2286130561Sobrienstatic void 2287130561Sobrienpmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2288130561Sobrien{ 2289130561Sobrien pv_entry_t pv; 2290130561Sobrien 2291130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 229268765Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2293130561Sobrien pv = get_pv_entry(pmap, FALSE); 2294130561Sobrien pv->pv_va = va; 2295130561Sobrien TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2296130561Sobrien} 2297130561Sobrien 2298130561Sobrien/* 2299130561Sobrien * Conditionally create a pv entry. 2300130561Sobrien */ 2301130561Sobrienstatic boolean_t 2302130561Sobrienpmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2303130561Sobrien{ 2304130561Sobrien pv_entry_t pv; 230568765Sobrien 2306130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 230768765Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2308130561Sobrien if (pv_entry_count < pv_entry_high_water && 2309130561Sobrien (pv = get_pv_entry(pmap, TRUE)) != NULL) { 2310130561Sobrien pv->pv_va = va; 2311130561Sobrien TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2312130561Sobrien return (TRUE); 2313130561Sobrien } else 2314130561Sobrien return (FALSE); 2315130561Sobrien} 2316130561Sobrien 2317130561Sobrien/* 2318130561Sobrien * Create the pv entries for each of the pages within a superpage. 2319130561Sobrien */ 2320130561Sobrienstatic boolean_t 2321130561Sobrienpmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 2322130561Sobrien{ 2323130561Sobrien struct md_page *pvh; 232489857Sobrien pv_entry_t pv; 2325130561Sobrien 2326130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2327130561Sobrien if (pv_entry_count < pv_entry_high_water && 2328130561Sobrien (pv = get_pv_entry(pmap, TRUE)) != NULL) { 232989857Sobrien pv->pv_va = va; 2330130561Sobrien pvh = pa_to_pvh(pa); 2331130561Sobrien TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 233289857Sobrien return (TRUE); 2333130561Sobrien } else 2334130561Sobrien return (FALSE); 233589857Sobrien} 233689857Sobrien 2337130561Sobrien/* 2338130561Sobrien * Fills a page table page with mappings to consecutive physical pages. 233968765Sobrien */ 2340130561Sobrienstatic void 2341130561Sobrienpmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) 2342130561Sobrien{ 2343130561Sobrien pt_entry_t *pte; 2344130561Sobrien 2345130561Sobrien for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { 2346130561Sobrien *pte = newpte; 2347130561Sobrien newpte += PAGE_SIZE; 2348130561Sobrien } 2349130561Sobrien} 2350130561Sobrien 2351130561Sobrien/* 2352130561Sobrien * Tries to demote a 2- or 4MB page mapping. If demotion fails, the 2353130561Sobrien * 2- or 4MB page mapping is invalidated. 2354130561Sobrien */ 2355130561Sobrienstatic boolean_t 2356130561Sobrienpmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) 2357130561Sobrien{ 2358130561Sobrien pd_entry_t newpde, oldpde; 2359130561Sobrien pmap_t allpmaps_entry; 2360130561Sobrien pt_entry_t *firstpte, newpte; 2361130561Sobrien vm_paddr_t mptepa; 2362130561Sobrien vm_page_t free, mpte; 2363130561Sobrien 2364130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2365130561Sobrien oldpde = *pde; 2366130561Sobrien KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), 2367130561Sobrien ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); 2368130561Sobrien mpte = pmap_lookup_pt_page(pmap, va); 2369130561Sobrien if (mpte != NULL) 2370130561Sobrien pmap_remove_pt_page(pmap, mpte); 2371130561Sobrien else { 237268765Sobrien KASSERT((oldpde & PG_W) == 0, 237368765Sobrien ("pmap_demote_pde: page table page for a wired mapping" 2374130561Sobrien " is missing")); 2375130561Sobrien 2376130561Sobrien /* 2377130561Sobrien * Invalidate the 2- or 4MB page mapping and return 2378130561Sobrien * "failure" if the mapping was never accessed or the 2379130561Sobrien * allocation of the new page table page fails. 2380130561Sobrien */ 2381130561Sobrien if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, 2382218822Sdim va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | 2383130561Sobrien VM_ALLOC_WIRED)) == NULL) { 2384130561Sobrien free = NULL; 2385130561Sobrien pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); 2386218822Sdim pmap_invalidate_page(pmap, trunc_4mpage(va)); 2387130561Sobrien pmap_free_zero_pages(free); 2388130561Sobrien CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" 2389130561Sobrien " in pmap %p", va, pmap); 239068765Sobrien return (FALSE); 2391130561Sobrien } 2392130561Sobrien if (va < VM_MAXUSER_ADDRESS) 2393130561Sobrien pmap->pm_stats.resident_count++; 2394130561Sobrien } 239568765Sobrien mptepa = VM_PAGE_TO_PHYS(mpte); 2396130561Sobrien 2397175790Sobrien /* 2398175790Sobrien * If the page mapping is in the kernel's address space, then the 239968765Sobrien * KPTmap can provide access to the page table page. Otherwise, 2400130561Sobrien * temporarily map the page table page (mpte) into the kernel's 2401130561Sobrien * address space at either PADDR1 or PADDR2. 2402130561Sobrien */ 2403130561Sobrien if (va >= KERNBASE) 2404130561Sobrien firstpte = &KPTmap[i386_btop(trunc_4mpage(va))]; 240568765Sobrien else if (curthread->td_pinned > 0 && mtx_owned(&vm_page_queue_mtx)) { 2406130561Sobrien if ((*PMAP1 & PG_FRAME) != mptepa) { 2407130561Sobrien *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M; 2408130561Sobrien#ifdef SMP 2409130561Sobrien PMAP1cpu = PCPU_GET(cpuid); 2410130561Sobrien#endif 2411130561Sobrien invlcaddr(PADDR1); 2412130561Sobrien PMAP1changed++; 2413130561Sobrien } else 2414130561Sobrien#ifdef SMP 2415130561Sobrien if (PMAP1cpu != PCPU_GET(cpuid)) { 241677298Sobrien PMAP1cpu = PCPU_GET(cpuid); 2417130561Sobrien invlcaddr(PADDR1); 2418130561Sobrien PMAP1changedcpu++; 2419130561Sobrien } else 2420130561Sobrien#endif 2421130561Sobrien PMAP1unchanged++; 2422130561Sobrien firstpte = PADDR1; 2423130561Sobrien } else { 2424130561Sobrien mtx_lock(&PMAP2mutex); 2425218822Sdim if ((*PMAP2 & PG_FRAME) != mptepa) { 2426218822Sdim *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M; 2427218822Sdim pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); 2428218822Sdim } 2429218822Sdim firstpte = PADDR2; 2430130561Sobrien } 2431130561Sobrien newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; 2432218822Sdim KASSERT((oldpde & PG_A) != 0, 2433130561Sobrien ("pmap_demote_pde: oldpde is missing PG_A")); 2434130561Sobrien KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, 2435130561Sobrien ("pmap_demote_pde: oldpde is missing PG_M")); 243677298Sobrien newpte = oldpde & ~PG_PS; 2437130561Sobrien if ((newpte & PG_PDE_PAT) != 0) 2438130561Sobrien newpte ^= PG_PDE_PAT | PG_PTE_PAT; 2439130561Sobrien 244068765Sobrien /* 2441130561Sobrien * If the page table page is new, initialize it. 2442218822Sdim */ 244368765Sobrien if (mpte->wire_count == 1) { 2444130561Sobrien mpte->wire_count = NPTEPG; 244568765Sobrien pmap_fill_ptp(firstpte, newpte); 2446218822Sdim } 2447130561Sobrien KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), 244868765Sobrien ("pmap_demote_pde: firstpte and newpte map different physical" 2449130561Sobrien " addresses")); 245068765Sobrien 2451218822Sdim /* 2452130561Sobrien * If the mapping has changed attributes, update the page table 245368765Sobrien * entries. 2454130561Sobrien */ 245577298Sobrien if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) 2456130561Sobrien pmap_fill_ptp(firstpte, newpte); 2457130561Sobrien 2458130561Sobrien /* 2459130561Sobrien * Demote the mapping. This pmap is locked. The old PDE has 2460130561Sobrien * PG_A set. If the old PDE has PG_RW set, it also has PG_M 2461130561Sobrien * set. Thus, there is no danger of a race with another 2462130561Sobrien * processor changing the setting of PG_A and/or PG_M between 2463130561Sobrien * the read above and the store below. 2464130561Sobrien */ 2465130561Sobrien if (pmap == kernel_pmap) { 246668765Sobrien /* 2467130561Sobrien * A harmless race exists between this loop and the bcopy() 2468130561Sobrien * in pmap_pinit() that initializes the kernel segment of 2469130561Sobrien * the new page table directory. Specifically, that bcopy() 2470130561Sobrien * may copy the new PDE from the PTD to the new page table 2471130561Sobrien * before this loop updates that new page table. 2472130561Sobrien */ 247368765Sobrien mtx_lock_spin(&allpmaps_lock); 2474130561Sobrien LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) { 247568765Sobrien pde = pmap_pde(allpmaps_entry, va); 2476130561Sobrien KASSERT(*pde == newpde || (*pde & PG_PTE_PROMOTE) == 2477130561Sobrien (oldpde & PG_PTE_PROMOTE), 247877298Sobrien ("pmap_demote_pde: pde was %#jx, expected %#jx", 2479130561Sobrien (uintmax_t)*pde, (uintmax_t)oldpde)); 2480218822Sdim pde_store(pde, newpde); 2481130561Sobrien } 2482130561Sobrien mtx_unlock_spin(&allpmaps_lock); 248377298Sobrien } else 2484130561Sobrien pde_store(pde, newpde); 2485130561Sobrien if (firstpte == PADDR2) 2486130561Sobrien mtx_unlock(&PMAP2mutex); 2487130561Sobrien 2488130561Sobrien /* 2489130561Sobrien * Invalidate the recursive mapping of the page table page. 2490130561Sobrien */ 2491130561Sobrien pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); 249277298Sobrien 2493130561Sobrien /* 2494130561Sobrien * Demote the pv entry. This depends on the earlier demotion 2495130561Sobrien * of the mapping. Specifically, the (re)creation of a per- 2496218822Sdim * page pv entry might trigger the execution of pmap_collect(), 2497130561Sobrien * which might reclaim a newly (re)created per-page pv entry 2498130561Sobrien * and destroy the associated mapping. In order to destroy 2499130561Sobrien * the mapping, the PDE must have already changed from mapping 2500130561Sobrien * the 2mpage to referencing the page table page. 2501130561Sobrien */ 2502130561Sobrien if ((oldpde & PG_MANAGED) != 0) 2503130561Sobrien pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME); 2504130561Sobrien 250568765Sobrien pmap_pde_demotions++; 250668765Sobrien CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x" 2507130561Sobrien " in pmap %p", va, pmap); 2508130561Sobrien return (TRUE); 2509130561Sobrien} 2510130561Sobrien 2511130561Sobrien/* 2512130561Sobrien * pmap_remove_pde: do the things to unmap a superpage in a process 2513130561Sobrien */ 2514130561Sobrienstatic void 2515130561Sobrienpmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, 251668765Sobrien vm_page_t *free) 2517130561Sobrien{ 2518130561Sobrien struct md_page *pvh; 2519130561Sobrien pd_entry_t oldpde; 2520130561Sobrien vm_offset_t eva, va; 2521130561Sobrien vm_page_t m, mpte; 2522130561Sobrien 2523130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2524130561Sobrien KASSERT((sva & PDRMASK) == 0, 252568765Sobrien ("pmap_remove_pde: sva is not 4mpage aligned")); 2526130561Sobrien oldpde = pte_load_clear(pdq); 252768765Sobrien if (oldpde & PG_W) 2528130561Sobrien pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; 2529130561Sobrien 2530130561Sobrien /* 2531130561Sobrien * Machines that don't support invlpg, also don't support 2532130561Sobrien * PG_G. 2533130561Sobrien */ 2534130561Sobrien if (oldpde & PG_G) 2535130561Sobrien pmap_invalidate_page(kernel_pmap, sva); 2536130561Sobrien pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2537130561Sobrien if (oldpde & PG_MANAGED) { 2538130561Sobrien pvh = pa_to_pvh(oldpde & PG_PS_FRAME); 2539130561Sobrien pmap_pvh_free(pvh, pmap, sva); 2540130561Sobrien eva = sva + NBPDR; 2541130561Sobrien for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); 2542130561Sobrien va < eva; va += PAGE_SIZE, m++) { 2543130561Sobrien if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2544130561Sobrien vm_page_dirty(m); 2545130561Sobrien if (oldpde & PG_A) 2546130561Sobrien vm_page_flag_set(m, PG_REFERENCED); 2547130561Sobrien if (TAILQ_EMPTY(&m->md.pv_list) && 2548130561Sobrien TAILQ_EMPTY(&pvh->pv_list)) 2549130561Sobrien vm_page_flag_clear(m, PG_WRITEABLE); 255068765Sobrien } 2551130561Sobrien } 2552218822Sdim if (pmap == kernel_pmap) { 2553130561Sobrien if (!pmap_demote_pde(pmap, pdq, sva)) 2554130561Sobrien panic("pmap_remove_pde: failed demotion"); 255577298Sobrien } else { 2556218822Sdim mpte = pmap_lookup_pt_page(pmap, sva); 2557130561Sobrien if (mpte != NULL) { 255868765Sobrien pmap_remove_pt_page(pmap, mpte); 2559130561Sobrien pmap->pm_stats.resident_count--; 2560130561Sobrien KASSERT(mpte->wire_count == NPTEPG, 256168765Sobrien ("pmap_remove_pde: pte page wire count error")); 2562130561Sobrien mpte->wire_count = 0; 2563130561Sobrien pmap_add_delayed_free_list(mpte, free, FALSE); 2564130561Sobrien atomic_subtract_int(&cnt.v_wire_count, 1); 2565130561Sobrien } 256668765Sobrien } 2567130561Sobrien} 2568130561Sobrien 2569130561Sobrien/* 2570130561Sobrien * pmap_remove_pte: do the things to unmap a page in a process 2571130561Sobrien */ 2572130561Sobrienstatic int 2573130561Sobrienpmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free) 2574130561Sobrien{ 2575218822Sdim pt_entry_t oldpte; 2576218822Sdim vm_page_t m; 2577130561Sobrien 2578130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2579130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 258068765Sobrien oldpte = pte_load_clear(ptq); 2581130561Sobrien if (oldpte & PG_W) 2582130561Sobrien pmap->pm_stats.wired_count -= 1; 258368765Sobrien /* 2584130561Sobrien * Machines that don't support invlpg, also don't support 2585130561Sobrien * PG_G. 258668765Sobrien */ 2587130561Sobrien if (oldpte & PG_G) 258868765Sobrien pmap_invalidate_page(kernel_pmap, va); 2589130561Sobrien pmap->pm_stats.resident_count -= 1; 2590130561Sobrien if (oldpte & PG_MANAGED) { 2591130561Sobrien m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); 2592130561Sobrien if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2593130561Sobrien vm_page_dirty(m); 2594130561Sobrien if (oldpte & PG_A) 2595130561Sobrien vm_page_flag_set(m, PG_REFERENCED); 259668765Sobrien pmap_remove_entry(pmap, m, va); 2597130561Sobrien } 2598130561Sobrien return (pmap_unuse_pt(pmap, va, free)); 2599130561Sobrien} 2600130561Sobrien 260168765Sobrien/* 2602130561Sobrien * Remove a single page from a process address space 2603130561Sobrien */ 2604130561Sobrienstatic void 2605130561Sobrienpmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free) 260668765Sobrien{ 2607130561Sobrien pt_entry_t *pte; 2608130561Sobrien 2609130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2610130561Sobrien KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 2611130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2612130561Sobrien if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) 2613130561Sobrien return; 2614130561Sobrien pmap_remove_pte(pmap, pte, va, free); 261577298Sobrien pmap_invalidate_page(pmap, va); 2616130561Sobrien} 2617130561Sobrien 2618130561Sobrien/* 2619130561Sobrien * Remove the given range of addresses from the specified map. 2620130561Sobrien * 2621130561Sobrien * It is assumed that the start and end are properly 2622130561Sobrien * rounded to the page size. 2623130561Sobrien */ 2624130561Sobrienvoid 2625130561Sobrienpmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2626130561Sobrien{ 2627130561Sobrien vm_offset_t pdnxt; 2628130561Sobrien pd_entry_t ptpaddr; 2629130561Sobrien pt_entry_t *pte; 2630130561Sobrien vm_page_t free = NULL; 2631130561Sobrien int anyvalid; 2632130561Sobrien 263377298Sobrien /* 2634130561Sobrien * Perform an unsynchronized read. This is, however, safe. 2635130561Sobrien */ 2636130561Sobrien if (pmap->pm_stats.resident_count == 0) 263777298Sobrien return; 2638218822Sdim 263968765Sobrien anyvalid = 0; 2640130561Sobrien 2641218822Sdim vm_page_lock_queues(); 2642130561Sobrien sched_pin(); 2643218822Sdim PMAP_LOCK(pmap); 2644218822Sdim 2645218822Sdim /* 2646218822Sdim * special handling of removing one page. a very 264777298Sobrien * common operation and easy to short circuit some 2648218822Sdim * code. 2649218822Sdim */ 2650218822Sdim if ((sva + PAGE_SIZE == eva) && 2651218822Sdim ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 2652218822Sdim pmap_remove_page(pmap, sva, &free); 2653218822Sdim goto out; 2654218822Sdim } 2655218822Sdim 2656218822Sdim for (; sva < eva; sva = pdnxt) { 2657218822Sdim unsigned pdirindex; 2658218822Sdim 2659218822Sdim /* 2660218822Sdim * Calculate index for next page table. 2661130561Sobrien */ 2662218822Sdim pdnxt = (sva + NBPDR) & ~PDRMASK; 2663218822Sdim if (pdnxt < sva) 2664218822Sdim pdnxt = eva; 2665218822Sdim if (pmap->pm_stats.resident_count == 0) 2666218822Sdim break; 2667218822Sdim 2668218822Sdim pdirindex = sva >> PDRSHIFT; 2669218822Sdim ptpaddr = pmap->pm_pdir[pdirindex]; 2670218822Sdim 2671218822Sdim /* 2672130561Sobrien * Weed out invalid mappings. Note: we assume that the page 2673218822Sdim * directory table is always allocated, and in kernel virtual. 2674218822Sdim */ 2675218822Sdim if (ptpaddr == 0) 2676218822Sdim continue; 2677218822Sdim 2678218822Sdim /* 2679130561Sobrien * Check for large page. 2680218822Sdim */ 2681218822Sdim if ((ptpaddr & PG_PS) != 0) { 2682130561Sobrien /* 268368765Sobrien * Are we removing the entire large page? If not, 2684218822Sdim * demote the mapping and fall through. 268568765Sobrien */ 2686218822Sdim if (sva + NBPDR == pdnxt && eva >= pdnxt) { 2687218822Sdim /* 2688218822Sdim * The TLB entry for a PG_G mapping is 2689130561Sobrien * invalidated by pmap_remove_pde(). 2690218822Sdim */ 269168765Sobrien if ((ptpaddr & PG_G) == 0) 2692218822Sdim anyvalid = 1; 2693218822Sdim pmap_remove_pde(pmap, 2694218822Sdim &pmap->pm_pdir[pdirindex], sva, &free); 2695218822Sdim continue; 2696218822Sdim } else if (!pmap_demote_pde(pmap, 2697218822Sdim &pmap->pm_pdir[pdirindex], sva)) { 2698218822Sdim /* The large page mapping was destroyed. */ 2699218822Sdim continue; 2700218822Sdim } 2701218822Sdim } 2702130561Sobrien 270368765Sobrien /* 2704218822Sdim * Limit our scan to either the end of the va represented 270568765Sobrien * by the current page table page, or to the end of the 2706130561Sobrien * range being removed. 2707218822Sdim */ 2708130561Sobrien if (pdnxt > eva) 2709218822Sdim pdnxt = eva; 271068765Sobrien 2711218822Sdim for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2712130561Sobrien sva += PAGE_SIZE) { 271368765Sobrien if (*pte == 0) 2714218822Sdim continue; 271568765Sobrien 2716130561Sobrien /* 2717218822Sdim * The TLB entry for a PG_G mapping is invalidated 2718218822Sdim * by pmap_remove_pte(). 2719130561Sobrien */ 2720218822Sdim if ((*pte & PG_G) == 0) 2721218822Sdim anyvalid = 1; 2722218822Sdim if (pmap_remove_pte(pmap, pte, sva, &free)) 2723218822Sdim break; 2724218822Sdim } 2725218822Sdim } 2726218822Sdimout: 2727218822Sdim sched_unpin(); 2728218822Sdim if (anyvalid) 2729218822Sdim pmap_invalidate_all(pmap); 273068765Sobrien vm_page_unlock_queues(); 273168765Sobrien PMAP_UNLOCK(pmap); 2732218822Sdim pmap_free_zero_pages(free); 2733218822Sdim} 2734218822Sdim 2735218822Sdim/* 2736218822Sdim * Routine: pmap_remove_all 2737218822Sdim * Function: 2738218822Sdim * Removes this physical page from 2739218822Sdim * all physical maps in which it resides. 2740218822Sdim * Reflects back modify bits to the pager. 2741218822Sdim * 2742218822Sdim * Notes: 2743218822Sdim * Original versions of this routine were very 2744218822Sdim * inefficient because they iteratively called 2745218822Sdim * pmap_remove (slow...) 2746218822Sdim */ 2747218822Sdim 2748218822Sdimvoid 2749218822Sdimpmap_remove_all(vm_page_t m) 2750218822Sdim{ 2751218822Sdim struct md_page *pvh; 2752218822Sdim pv_entry_t pv; 2753218822Sdim pmap_t pmap; 2754218822Sdim pt_entry_t *pte, tpte; 2755218822Sdim pd_entry_t *pde; 2756218822Sdim vm_offset_t va; 2757218822Sdim vm_page_t free; 2758218822Sdim 2759218822Sdim KASSERT((m->flags & PG_FICTITIOUS) == 0, 2760218822Sdim ("pmap_remove_all: page %p is fictitious", m)); 2761218822Sdim mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2762218822Sdim sched_pin(); 2763218822Sdim pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2764218822Sdim while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 2765218822Sdim va = pv->pv_va; 2766218822Sdim pmap = PV_PMAP(pv); 2767218822Sdim PMAP_LOCK(pmap); 2768218822Sdim pde = pmap_pde(pmap, va); 2769218822Sdim (void)pmap_demote_pde(pmap, pde, va); 2770218822Sdim PMAP_UNLOCK(pmap); 2771218822Sdim } 2772218822Sdim while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2773218822Sdim pmap = PV_PMAP(pv); 2774218822Sdim PMAP_LOCK(pmap); 2775218822Sdim pmap->pm_stats.resident_count--; 2776218822Sdim pde = pmap_pde(pmap, pv->pv_va); 2777218822Sdim KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" 2778218822Sdim " a 4mpage in page %p's pv list", m)); 2779218822Sdim pte = pmap_pte_quick(pmap, pv->pv_va); 2780218822Sdim tpte = pte_load_clear(pte); 2781218822Sdim if (tpte & PG_W) 2782218822Sdim pmap->pm_stats.wired_count--; 2783218822Sdim if (tpte & PG_A) 2784218822Sdim vm_page_flag_set(m, PG_REFERENCED); 2785218822Sdim 2786218822Sdim /* 2787218822Sdim * Update the vm_page_t clean and reference bits. 2788218822Sdim */ 2789130561Sobrien if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2790130561Sobrien vm_page_dirty(m); 2791218822Sdim free = NULL; 2792218822Sdim pmap_unuse_pt(pmap, pv->pv_va, &free); 2793218822Sdim pmap_invalidate_page(pmap, pv->pv_va); 2794218822Sdim pmap_free_zero_pages(free); 2795218822Sdim TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2796218822Sdim free_pv_entry(pmap, pv); 2797218822Sdim PMAP_UNLOCK(pmap); 2798218822Sdim } 2799218822Sdim vm_page_flag_clear(m, PG_WRITEABLE); 2800218822Sdim sched_unpin(); 2801218822Sdim} 2802218822Sdim 2803218822Sdim/* 2804218822Sdim * pmap_protect_pde: do the things to protect a 4mpage in a process 2805218822Sdim */ 2806218822Sdimstatic boolean_t 2807218822Sdimpmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot) 2808218822Sdim{ 2809218822Sdim pd_entry_t newpde, oldpde; 2810218822Sdim vm_offset_t eva, va; 2811218822Sdim vm_page_t m; 2812218822Sdim boolean_t anychanged; 2813218822Sdim 2814218822Sdim PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2815218822Sdim KASSERT((sva & PDRMASK) == 0, 2816218822Sdim ("pmap_protect_pde: sva is not 4mpage aligned")); 2817130561Sobrien anychanged = FALSE; 2818130561Sobrienretry: 2819130561Sobrien oldpde = newpde = *pde; 2820130561Sobrien if (oldpde & PG_MANAGED) { 2821130561Sobrien eva = sva + NBPDR; 282268765Sobrien for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); 2823130561Sobrien va < eva; va += PAGE_SIZE, m++) { 2824130561Sobrien /* 2825218822Sdim * In contrast to the analogous operation on a 4KB page 2826218822Sdim * mapping, the mapping's PG_A flag is not cleared and 282789857Sobrien * the page's PG_REFERENCED flag is not set. The 2828218822Sdim * reason is that pmap_demote_pde() expects that a 2/4MB 282989857Sobrien * page mapping with a stored page table page has PG_A 2830218822Sdim * set. 283168765Sobrien */ 2832218822Sdim if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2833218822Sdim vm_page_dirty(m); 2834218822Sdim } 2835130561Sobrien } 2836218822Sdim if ((prot & VM_PROT_WRITE) == 0) 2837218822Sdim newpde &= ~(PG_RW | PG_M); 2838130561Sobrien#ifdef PAE 2839130561Sobrien if ((prot & VM_PROT_EXECUTE) == 0) 284068765Sobrien newpde |= pg_nx; 2841218822Sdim#endif 2842218822Sdim if (newpde != oldpde) { 284368765Sobrien if (!pde_cmpset(pde, oldpde, newpde)) 284468765Sobrien goto retry; 2845130561Sobrien if (oldpde & PG_G) 284668765Sobrien pmap_invalidate_page(pmap, sva); 2847130561Sobrien else 2848218822Sdim anychanged = TRUE; 2849218822Sdim } 285068765Sobrien return (anychanged); 2851130561Sobrien} 2852130561Sobrien 2853130561Sobrien/* 2854130561Sobrien * Set the physical protection on the 2855130561Sobrien * specified range of this map as requested. 2856130561Sobrien */ 2857130561Sobrienvoid 285868765Sobrienpmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2859130561Sobrien{ 286068765Sobrien vm_offset_t pdnxt; 2861130561Sobrien pd_entry_t ptpaddr; 2862130561Sobrien pt_entry_t *pte; 2863130561Sobrien int anychanged; 2864130561Sobrien 2865130561Sobrien if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2866130561Sobrien pmap_remove(pmap, sva, eva); 286768765Sobrien return; 2868130561Sobrien } 2869130561Sobrien 2870130561Sobrien#ifdef PAE 2871130561Sobrien if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == 2872218822Sdim (VM_PROT_WRITE|VM_PROT_EXECUTE)) 2873130561Sobrien return; 2874130561Sobrien#else 2875130561Sobrien if (prot & VM_PROT_WRITE) 2876130561Sobrien return; 287768765Sobrien#endif 2878130561Sobrien 2879130561Sobrien anychanged = 0; 2880130561Sobrien 2881130561Sobrien vm_page_lock_queues(); 2882130561Sobrien sched_pin(); 2883130561Sobrien PMAP_LOCK(pmap); 2884130561Sobrien for (; sva < eva; sva = pdnxt) { 288568765Sobrien pt_entry_t obits, pbits; 2886130561Sobrien unsigned pdirindex; 2887130561Sobrien 2888130561Sobrien pdnxt = (sva + NBPDR) & ~PDRMASK; 2889130561Sobrien if (pdnxt < sva) 2890130561Sobrien pdnxt = eva; 2891130561Sobrien 2892130561Sobrien pdirindex = sva >> PDRSHIFT; 2893130561Sobrien ptpaddr = pmap->pm_pdir[pdirindex]; 2894130561Sobrien 2895130561Sobrien /* 2896130561Sobrien * Weed out invalid mappings. Note: we assume that the page 2897130561Sobrien * directory table is always allocated, and in kernel virtual. 2898130561Sobrien */ 289968765Sobrien if (ptpaddr == 0) 2900130561Sobrien continue; 2901130561Sobrien 2902130561Sobrien /* 2903130561Sobrien * Check for large page. 2904130561Sobrien */ 2905130561Sobrien if ((ptpaddr & PG_PS) != 0) { 290668765Sobrien /* 2907130561Sobrien * Are we protecting the entire large page? If not, 2908130561Sobrien * demote the mapping and fall through. 2909130561Sobrien */ 2910130561Sobrien if (sva + NBPDR == pdnxt && eva >= pdnxt) { 291168765Sobrien /* 2912130561Sobrien * The TLB entry for a PG_G mapping is 2913130561Sobrien * invalidated by pmap_protect_pde(). 291468765Sobrien */ 2915130561Sobrien if (pmap_protect_pde(pmap, 2916130561Sobrien &pmap->pm_pdir[pdirindex], sva, prot)) 2917130561Sobrien anychanged = 1; 2918130561Sobrien continue; 2919130561Sobrien } else if (!pmap_demote_pde(pmap, 2920130561Sobrien &pmap->pm_pdir[pdirindex], sva)) { 2921218822Sdim /* The large page mapping was destroyed. */ 2922130561Sobrien continue; 292368765Sobrien } 2924130561Sobrien } 2925130561Sobrien 2926130561Sobrien if (pdnxt > eva) 2927130561Sobrien pdnxt = eva; 2928130561Sobrien 2929130561Sobrien for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2930130561Sobrien sva += PAGE_SIZE) { 293168765Sobrien vm_page_t m; 2932130561Sobrien 2933130561Sobrienretry: 2934130561Sobrien /* 2935130561Sobrien * Regardless of whether a pte is 32 or 64 bits in 2936130561Sobrien * size, PG_RW, PG_A, and PG_M are among the least 2937130561Sobrien * significant 32 bits. 2938130561Sobrien */ 2939130561Sobrien obits = pbits = *pte; 2940130561Sobrien if ((pbits & PG_V) == 0) 2941130561Sobrien continue; 294268765Sobrien if (pbits & PG_MANAGED) { 2943130561Sobrien m = NULL; 2944130561Sobrien if (pbits & PG_A) { 2945130561Sobrien m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); 294668765Sobrien vm_page_flag_set(m, PG_REFERENCED); 2947130561Sobrien pbits &= ~PG_A; 2948130561Sobrien } 2949130561Sobrien if ((pbits & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 2950130561Sobrien if (m == NULL) 295168765Sobrien m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); 2952130561Sobrien vm_page_dirty(m); 2953130561Sobrien } 2954130561Sobrien } 295568765Sobrien 2956130561Sobrien if ((prot & VM_PROT_WRITE) == 0) 295768765Sobrien pbits &= ~(PG_RW | PG_M); 2958130561Sobrien#ifdef PAE 2959130561Sobrien if ((prot & VM_PROT_EXECUTE) == 0) 296068765Sobrien pbits |= pg_nx; 2961130561Sobrien#endif 2962130561Sobrien 2963130561Sobrien if (pbits != obits) { 2964130561Sobrien#ifdef PAE 2965130561Sobrien if (!atomic_cmpset_64(pte, obits, pbits)) 2966130561Sobrien goto retry; 2967130561Sobrien#else 2968130561Sobrien if (!atomic_cmpset_int((u_int *)pte, obits, 2969130561Sobrien pbits)) 2970130561Sobrien goto retry; 2971130561Sobrien#endif 297268765Sobrien if (obits & PG_G) 2973130561Sobrien pmap_invalidate_page(pmap, sva); 297468765Sobrien else 2975130561Sobrien anychanged = 1; 2976130561Sobrien } 297777298Sobrien } 2978130561Sobrien } 2979130561Sobrien sched_unpin(); 2980130561Sobrien if (anychanged) 2981218822Sdim pmap_invalidate_all(pmap); 298268765Sobrien vm_page_unlock_queues(); 2983130561Sobrien PMAP_UNLOCK(pmap); 2984130561Sobrien} 2985130561Sobrien 298668765Sobrien/* 2987130561Sobrien * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are 2988130561Sobrien * within a single page table page (PTP) to a single 2- or 4MB page mapping. 298968765Sobrien * For promotion to occur, two conditions must be met: (1) the 4KB page 2990218822Sdim * mappings must map aligned, contiguous physical memory and (2) the 4KB page 299168765Sobrien * mappings must have identical characteristics. 2992218822Sdim * 2993218822Sdim * Managed (PG_MANAGED) mappings within the kernel address space are not 2994218822Sdim * promoted. The reason is that kernel PDEs are replicated in each pmap but 2995218822Sdim * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel 2996218822Sdim * pmap. 2997218822Sdim */ 2998218822Sdimstatic void 2999218822Sdimpmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) 3000218822Sdim{ 3001218822Sdim pd_entry_t newpde; 3002218822Sdim pmap_t allpmaps_entry; 3003218822Sdim pt_entry_t *firstpte, oldpte, pa, *pte; 3004218822Sdim vm_offset_t oldpteva; 3005218822Sdim vm_page_t mpte; 3006218822Sdim 3007218822Sdim PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3008218822Sdim 3009218822Sdim /* 3010218822Sdim * Examine the first PTE in the specified PTP. Abort if this PTE is 3011218822Sdim * either invalid, unused, or does not map the first 4KB physical page 3012218822Sdim * within a 2- or 4MB page. 3013218822Sdim */ 3014218822Sdim firstpte = vtopte(trunc_4mpage(va)); 3015218822Sdimsetpde: 3016218822Sdim newpde = *firstpte; 3017130561Sobrien if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { 301868765Sobrien pmap_pde_p_failures++; 3019130561Sobrien CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3020130561Sobrien " in pmap %p", va, pmap); 302168765Sobrien return; 3022130561Sobrien } 3023130561Sobrien if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) { 3024130561Sobrien pmap_pde_p_failures++; 3025130561Sobrien CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3026130561Sobrien " in pmap %p", va, pmap); 302768765Sobrien return; 3028130561Sobrien } 3029130561Sobrien if ((newpde & (PG_M | PG_RW)) == PG_RW) { 3030130561Sobrien /* 3031130561Sobrien * When PG_M is already clear, PG_RW can be cleared without 3032130561Sobrien * a TLB invalidation. 3033130561Sobrien */ 3034218822Sdim if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde & 3035130561Sobrien ~PG_RW)) 3036130561Sobrien goto setpde; 3037130561Sobrien newpde &= ~PG_RW; 3038130561Sobrien } 3039130561Sobrien 3040130561Sobrien /* 3041130561Sobrien * Examine each of the other PTEs in the specified PTP. Abort if this 3042130561Sobrien * PTE maps an unexpected 4KB physical page or does not have identical 3043130561Sobrien * characteristics to the first PTE. 3044130561Sobrien */ 3045130561Sobrien pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; 3046130561Sobrien for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { 3047130561Sobriensetpte: 3048130561Sobrien oldpte = *pte; 3049130561Sobrien if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { 3050130561Sobrien pmap_pde_p_failures++; 3051130561Sobrien CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 305277298Sobrien " in pmap %p", va, pmap); 3053130561Sobrien return; 3054130561Sobrien } 3055130561Sobrien if ((oldpte & (PG_M | PG_RW)) == PG_RW) { 3056130561Sobrien /* 305777298Sobrien * When PG_M is already clear, PG_RW can be cleared 3058130561Sobrien * without a TLB invalidation. 3059130561Sobrien */ 306068765Sobrien if (!atomic_cmpset_int((u_int *)pte, oldpte, 3061130561Sobrien oldpte & ~PG_RW)) 306268765Sobrien goto setpte; 3063130561Sobrien oldpte &= ~PG_RW; 306468765Sobrien oldpteva = (oldpte & PG_FRAME & PDRMASK) | 3065130561Sobrien (va & ~PDRMASK); 3066130561Sobrien CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x" 306768765Sobrien " in pmap %p", oldpteva, pmap); 3068130561Sobrien } 3069130561Sobrien if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { 3070130561Sobrien pmap_pde_p_failures++; 307168765Sobrien CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3072130561Sobrien " in pmap %p", va, pmap); 3073130561Sobrien return; 3074130561Sobrien } 3075130561Sobrien pa -= PAGE_SIZE; 307677298Sobrien } 3077130561Sobrien 3078218822Sdim /* 3079130561Sobrien * Save the page table page in its current state until the PDE 3080130561Sobrien * mapping the superpage is demoted by pmap_demote_pde() or 308168765Sobrien * destroyed by pmap_remove_pde(). 3082130561Sobrien */ 3083218822Sdim mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); 3084130561Sobrien KASSERT(mpte >= vm_page_array && 3085130561Sobrien mpte < &vm_page_array[vm_page_array_size], 308668765Sobrien ("pmap_promote_pde: page table page is out of range")); 3087130561Sobrien KASSERT(mpte->pindex == va >> PDRSHIFT, 3088218822Sdim ("pmap_promote_pde: page table page's pindex is wrong")); 3089130561Sobrien pmap_insert_pt_page(pmap, mpte); 3090218822Sdim 3091130561Sobrien /* 3092130561Sobrien * Promote the pv entries. 309368765Sobrien */ 3094130561Sobrien if ((newpde & PG_MANAGED) != 0) 3095218822Sdim pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME); 3096130561Sobrien 3097130561Sobrien /* 309868765Sobrien * Propagate the PAT index to its proper position. 3099130561Sobrien */ 3100218822Sdim if ((newpde & PG_PTE_PAT) != 0) 3101130561Sobrien newpde ^= PG_PDE_PAT | PG_PTE_PAT; 3102130561Sobrien 310368765Sobrien /* 3104130561Sobrien * Map the superpage. 3105218822Sdim */ 3106130561Sobrien if (pmap == kernel_pmap) { 3107130561Sobrien mtx_lock_spin(&allpmaps_lock); 310868765Sobrien LIST_FOREACH(allpmaps_entry, &allpmaps, pm_list) { 3109130561Sobrien pde = pmap_pde(allpmaps_entry, va); 3110218822Sdim pde_store(pde, PG_PS | newpde); 3111130561Sobrien } 3112130561Sobrien mtx_unlock_spin(&allpmaps_lock); 311368765Sobrien } else 3114130561Sobrien pde_store(pde, PG_PS | newpde); 3115218822Sdim 3116130561Sobrien pmap_pde_promotions++; 3117130561Sobrien CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x" 311868765Sobrien " in pmap %p", va, pmap); 3119130561Sobrien} 3120218822Sdim 3121130561Sobrien/* 3122130561Sobrien * Insert the given physical page (p) at 312368765Sobrien * the specified virtual address (v) in the 3124130561Sobrien * target physical map with the protection requested. 3125218822Sdim * 3126130561Sobrien * If specified, the page will be wired down, meaning 3127130561Sobrien * that the related pte can not be reclaimed. 312868765Sobrien * 3129130561Sobrien * NB: This is the only routine which MAY NOT lazy-evaluate 3130218822Sdim * or lose information. That is, this routine must actually 3131130561Sobrien * insert this page into the given map NOW. 3132130561Sobrien */ 313368765Sobrienvoid 3134130561Sobrienpmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 3135218822Sdim vm_prot_t prot, boolean_t wired) 3136130561Sobrien{ 3137130561Sobrien vm_paddr_t pa; 313868765Sobrien pd_entry_t *pde; 3139218822Sdim pt_entry_t *pte; 3140218822Sdim vm_paddr_t opa; 3141218822Sdim pt_entry_t origpte, newpte; 3142218822Sdim vm_page_t mpte, om; 3143218822Sdim boolean_t invlva; 3144130561Sobrien 3145130561Sobrien va = trunc_page(va); 3146130561Sobrien KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 314768765Sobrien KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, 3148130561Sobrien ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va)); 3149130561Sobrien 3150130561Sobrien mpte = NULL; 3151218822Sdim 3152218822Sdim vm_page_lock_queues(); 3153218822Sdim PMAP_LOCK(pmap); 3154218822Sdim sched_pin(); 3155218822Sdim 3156218822Sdim /* 3157218822Sdim * In the case that a page table page is not 3158218822Sdim * resident, we are creating it here. 3159218822Sdim */ 3160218822Sdim if (va < VM_MAXUSER_ADDRESS) { 3161218822Sdim mpte = pmap_allocpte(pmap, va, M_WAITOK); 3162218822Sdim } 3163218822Sdim 3164218822Sdim pde = pmap_pde(pmap, va); 3165218822Sdim if ((*pde & PG_PS) != 0) 3166218822Sdim panic("pmap_enter: attempted pmap_enter on 4MB page"); 3167218822Sdim pte = pmap_pte_quick(pmap, va); 3168218822Sdim 3169218822Sdim /* 3170218822Sdim * Page Directory table entry not valid, we need a new PT page 3171218822Sdim */ 3172218822Sdim if (pte == NULL) { 3173218822Sdim panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", 3174218822Sdim (uintmax_t)pmap->pm_pdir[PTDPTDI], va); 3175130561Sobrien } 3176130561Sobrien 3177130561Sobrien pa = VM_PAGE_TO_PHYS(m); 3178130561Sobrien om = NULL; 3179130561Sobrien origpte = *pte; 3180130561Sobrien opa = origpte & PG_FRAME; 3181130561Sobrien 3182130561Sobrien /* 3183130561Sobrien * Mapping has not changed, must be protection or wiring change. 3184130561Sobrien */ 3185130561Sobrien if (origpte && (opa == pa)) { 318668765Sobrien /* 3187130561Sobrien * Wiring change, just update stats. We don't worry about 3188130561Sobrien * wiring PT pages as they remain resident as long as there 3189130561Sobrien * are valid mappings in them. Hence, if a user page is wired, 3190130561Sobrien * the PT page will be also. 3191130561Sobrien */ 319268765Sobrien if (wired && ((origpte & PG_W) == 0)) 3193130561Sobrien pmap->pm_stats.wired_count++; 319468765Sobrien else if (!wired && (origpte & PG_W)) 3195130561Sobrien pmap->pm_stats.wired_count--; 3196130561Sobrien 3197130561Sobrien /* 3198130561Sobrien * Remove extra pte reference 319968765Sobrien */ 3200130561Sobrien if (mpte) 320168765Sobrien mpte->wire_count--; 3202130561Sobrien 3203130561Sobrien /* 3204130561Sobrien * We might be turning off write access to the page, 3205130561Sobrien * so we go ahead and sense modify status. 3206130561Sobrien */ 3207130561Sobrien if (origpte & PG_MANAGED) { 3208130561Sobrien om = m; 320968765Sobrien pa |= PG_MANAGED; 3210130561Sobrien } 3211130561Sobrien goto validate; 3212130561Sobrien } 321368765Sobrien /* 3214130561Sobrien * Mapping has changed, invalidate old range and fall through to 3215130561Sobrien * handle validating new mapping. 3216130561Sobrien */ 321768765Sobrien if (opa) { 3218130561Sobrien if (origpte & PG_W) 3219130561Sobrien pmap->pm_stats.wired_count--; 3220218822Sdim if (origpte & PG_MANAGED) { 3221218822Sdim om = PHYS_TO_VM_PAGE(opa); 3222218822Sdim pmap_remove_entry(pmap, om, va); 3223218822Sdim } 3224130561Sobrien if (mpte != NULL) { 3225130561Sobrien mpte->wire_count--; 3226130561Sobrien KASSERT(mpte->wire_count > 0, 322768765Sobrien ("pmap_enter: missing reference to page table page," 3228130561Sobrien " va: 0x%x", va)); 3229130561Sobrien } 3230130561Sobrien } else 3231130561Sobrien pmap->pm_stats.resident_count++; 3232130561Sobrien 3233130561Sobrien /* 3234130561Sobrien * Enter on the PV list if part of our managed memory. 323568765Sobrien */ 3236130561Sobrien if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 323768765Sobrien KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 3238130561Sobrien ("pmap_enter: managed mapping within the clean submap")); 323968765Sobrien pmap_insert_entry(pmap, va, m); 3240130561Sobrien pa |= PG_MANAGED; 3241130561Sobrien } 324268765Sobrien 3243218822Sdim /* 3244218822Sdim * Increment counters 3245218822Sdim */ 3246218822Sdim if (wired) 3247130561Sobrien pmap->pm_stats.wired_count++; 324868765Sobrien 3249218822Sdimvalidate: 3250218822Sdim /* 325168765Sobrien * Now validate mapping with desired protection/wiring. 3252130561Sobrien */ 3253130561Sobrien newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V); 325468765Sobrien if ((prot & VM_PROT_WRITE) != 0) { 3255130561Sobrien newpte |= PG_RW; 3256130561Sobrien vm_page_flag_set(m, PG_WRITEABLE); 3257218822Sdim } 3258218822Sdim#ifdef PAE 3259218822Sdim if ((prot & VM_PROT_EXECUTE) == 0) 3260218822Sdim newpte |= pg_nx; 326168765Sobrien#endif 3262130561Sobrien if (wired) 3263218822Sdim newpte |= PG_W; 3264218822Sdim if (va < VM_MAXUSER_ADDRESS) 3265218822Sdim newpte |= PG_U; 3266218822Sdim if (pmap == kernel_pmap) 3267218822Sdim newpte |= pgeflag; 3268218822Sdim 326968765Sobrien /* 3270218822Sdim * if the mapping or permission bits are different, we need 327168765Sobrien * to update the pte. 3272218822Sdim */ 3273218822Sdim if ((origpte & ~(PG_M|PG_A)) != newpte) { 3274218822Sdim newpte |= PG_A; 3275218822Sdim if ((access & VM_PROT_WRITE) != 0) 3276218822Sdim newpte |= PG_M; 3277218822Sdim if (origpte & PG_V) { 3278218822Sdim invlva = FALSE; 3279218822Sdim origpte = pte_load_store(pte, newpte); 3280218822Sdim if (origpte & PG_A) { 3281218822Sdim if (origpte & PG_MANAGED) 3282218822Sdim vm_page_flag_set(om, PG_REFERENCED); 3283218822Sdim if (opa != VM_PAGE_TO_PHYS(m)) 3284218822Sdim invlva = TRUE; 3285218822Sdim#ifdef PAE 3286218822Sdim if ((origpte & PG_NX) == 0 && 3287218822Sdim (newpte & PG_NX) != 0) 3288218822Sdim invlva = TRUE; 3289218822Sdim#endif 3290218822Sdim } 3291218822Sdim if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 3292218822Sdim if ((origpte & PG_MANAGED) != 0) 3293218822Sdim vm_page_dirty(om); 3294218822Sdim if ((prot & VM_PROT_WRITE) == 0) 3295218822Sdim invlva = TRUE; 3296218822Sdim } 3297218822Sdim if (invlva) 3298218822Sdim pmap_invalidate_page(pmap, va); 3299218822Sdim } else 3300218822Sdim pte_store(pte, newpte); 3301218822Sdim } 3302218822Sdim 3303130561Sobrien /* 330468765Sobrien * If both the page table page and the reservation are fully 3305218822Sdim * populated, then attempt promotion. 3306130561Sobrien */ 3307218822Sdim if ((mpte == NULL || mpte->wire_count == NPTEPG) && 3308130561Sobrien pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0) 3309130561Sobrien pmap_promote_pde(pmap, pde, va); 3310218822Sdim 3311218822Sdim sched_unpin(); 3312218822Sdim vm_page_unlock_queues(); 3313218822Sdim PMAP_UNLOCK(pmap); 3314218822Sdim} 3315218822Sdim 3316130561Sobrien/* 3317130561Sobrien * Tries to create a 2- or 4MB page mapping. Returns TRUE if successful and 3318130561Sobrien * FALSE otherwise. Fails if (1) a page table page cannot be allocated without 3319130561Sobrien * blocking, (2) a mapping already exists at the specified virtual address, or 3320130561Sobrien * (3) a pv entry cannot be allocated without reclaiming another pv entry. 3321130561Sobrien */ 3322130561Sobrienstatic boolean_t 3323130561Sobrienpmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3324130561Sobrien{ 3325130561Sobrien pd_entry_t *pde, newpde; 3326130561Sobrien 3327130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3328130561Sobrien PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3329130561Sobrien pde = pmap_pde(pmap, va); 3330130561Sobrien if (*pde != 0) { 3331130561Sobrien CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" 3332130561Sobrien " in pmap %p", va, pmap); 3333130561Sobrien return (FALSE); 3334130561Sobrien } 3335130561Sobrien newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) | 3336130561Sobrien PG_PS | PG_V; 3337130561Sobrien if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 3338130561Sobrien newpde |= PG_MANAGED; 3339218822Sdim 3340130561Sobrien /* 3341130561Sobrien * Abort this mapping if its PV entry could not be created. 3342130561Sobrien */ 3343130561Sobrien if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) { 3344130561Sobrien CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" 3345130561Sobrien " in pmap %p", va, pmap); 3346130561Sobrien return (FALSE); 3347130561Sobrien } 3348130561Sobrien } 3349130561Sobrien#ifdef PAE 3350130561Sobrien if ((prot & VM_PROT_EXECUTE) == 0) 335168765Sobrien newpde |= pg_nx; 3352218822Sdim#endif 3353130561Sobrien if (va < VM_MAXUSER_ADDRESS) 335468765Sobrien newpde |= PG_U; 3355130561Sobrien 335668765Sobrien /* 3357130561Sobrien * Increment counters. 3358130561Sobrien */ 3359130561Sobrien pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; 336068765Sobrien 3361218822Sdim /* 3362130561Sobrien * Map the superpage. 3363130561Sobrien */ 3364130561Sobrien pde_store(pde, newpde); 3365130561Sobrien 3366130561Sobrien pmap_pde_mappings++; 3367130561Sobrien CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" 3368130561Sobrien " in pmap %p", va, pmap); 336968765Sobrien return (TRUE); 3370130561Sobrien} 3371218822Sdim 3372130561Sobrien/* 3373130561Sobrien * Maps a sequence of resident pages belonging to the same object. 3374130561Sobrien * The sequence begins with the given page m_start. This page is 3375130561Sobrien * mapped at the given virtual address start. Each subsequent page is 3376218822Sdim * mapped at a virtual address that is offset from start by the same 3377130561Sobrien * amount as the page is offset from m_start within the object. The 3378130561Sobrien * last page in the sequence is the page with the largest offset from 3379130561Sobrien * m_start that can be mapped at a virtual address less than the given 3380130561Sobrien * virtual address end. Not every virtual page between start and end 3381130561Sobrien * is mapped; only those for which a resident page exists with the 3382130561Sobrien * corresponding offset from m_start are mapped. 3383130561Sobrien */ 3384130561Sobrienvoid 3385218822Sdimpmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3386130561Sobrien vm_page_t m_start, vm_prot_t prot) 3387130561Sobrien{ 338868765Sobrien vm_offset_t va; 3389130561Sobrien vm_page_t m, mpte; 3390130561Sobrien vm_pindex_t diff, psize; 3391130561Sobrien 3392130561Sobrien VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 339368765Sobrien psize = atop(end - start); 3394130561Sobrien mpte = NULL; 3395130561Sobrien m = m_start; 3396130561Sobrien PMAP_LOCK(pmap); 3397130561Sobrien while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3398130561Sobrien va = start + ptoa(diff); 3399130561Sobrien if ((va & PDRMASK) == 0 && va + NBPDR <= end && 340068765Sobrien (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 && 3401130561Sobrien pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 && 3402130561Sobrien pmap_enter_pde(pmap, va, m, prot)) 3403130561Sobrien m = &m[NBPDR / PAGE_SIZE - 1]; 3404130561Sobrien else 3405130561Sobrien mpte = pmap_enter_quick_locked(pmap, va, m, prot, 3406130561Sobrien mpte); 3407130561Sobrien m = TAILQ_NEXT(m, listq); 340868765Sobrien } 3409130561Sobrien PMAP_UNLOCK(pmap); 3410130561Sobrien} 3411218822Sdim 3412130561Sobrien/* 3413218822Sdim * this code makes some *MAJOR* assumptions: 3414130561Sobrien * 1. Current pmap & pmap exists. 3415130561Sobrien * 2. Not wired. 341668765Sobrien * 3. Read access. 3417130561Sobrien * 4. No page table pages. 3418130561Sobrien * but is *MUCH* faster than pmap_enter... 3419130561Sobrien */ 3420130561Sobrien 342168765Sobrienvoid 3422130561Sobrienpmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 342368765Sobrien{ 3424130561Sobrien 3425130561Sobrien PMAP_LOCK(pmap); 3426130561Sobrien (void) pmap_enter_quick_locked(pmap, va, m, prot, NULL); 3427130561Sobrien PMAP_UNLOCK(pmap); 342868765Sobrien} 3429130561Sobrien 3430130561Sobrienstatic vm_page_t 3431130561Sobrienpmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 3432130561Sobrien vm_prot_t prot, vm_page_t mpte) 3433130561Sobrien{ 3434130561Sobrien pt_entry_t *pte; 3435130561Sobrien vm_paddr_t pa; 3436130561Sobrien vm_page_t free; 3437130561Sobrien 3438218822Sdim KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 3439130561Sobrien (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 3440218822Sdim ("pmap_enter_quick_locked: managed mapping within the clean submap")); 3441130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3442218822Sdim PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3443130561Sobrien 3444130561Sobrien /* 3445130561Sobrien * In the case that a page table page is not 344668765Sobrien * resident, we are creating it here. 3447130561Sobrien */ 3448130561Sobrien if (va < VM_MAXUSER_ADDRESS) { 3449130561Sobrien unsigned ptepindex; 3450130561Sobrien pd_entry_t ptepa; 3451130561Sobrien 345268765Sobrien /* 3453130561Sobrien * Calculate pagetable page index 3454130561Sobrien */ 3455218822Sdim ptepindex = va >> PDRSHIFT; 3456218822Sdim if (mpte && (mpte->pindex == ptepindex)) { 3457130561Sobrien mpte->wire_count++; 3458218822Sdim } else { 3459218822Sdim /* 3460218822Sdim * Get the page directory entry 3461218822Sdim */ 3462218822Sdim ptepa = pmap->pm_pdir[ptepindex]; 3463218822Sdim 3464218822Sdim /* 3465218822Sdim * If the page table page is mapped, we just increment 3466218822Sdim * the hold count, and activate it. 3467218822Sdim */ 3468218822Sdim if (ptepa) { 3469218822Sdim if (ptepa & PG_PS) 3470218822Sdim return (NULL); 3471218822Sdim mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); 3472218822Sdim mpte->wire_count++; 3473218822Sdim } else { 3474218822Sdim mpte = _pmap_allocpte(pmap, ptepindex, 3475218822Sdim M_NOWAIT); 3476218822Sdim if (mpte == NULL) 3477218822Sdim return (mpte); 3478218822Sdim } 3479218822Sdim } 3480218822Sdim } else { 3481218822Sdim mpte = NULL; 3482218822Sdim } 3483218822Sdim 3484218822Sdim /* 3485218822Sdim * This call to vtopte makes the assumption that we are 3486218822Sdim * entering the page into the current pmap. In order to support 3487218822Sdim * quick entry into any pmap, one would likely use pmap_pte_quick. 3488218822Sdim * But that isn't as quick as vtopte. 3489218822Sdim */ 3490218822Sdim pte = vtopte(va); 3491218822Sdim if (*pte) { 3492218822Sdim if (mpte != NULL) { 3493218822Sdim mpte->wire_count--; 3494218822Sdim mpte = NULL; 3495218822Sdim } 3496218822Sdim return (mpte); 3497218822Sdim } 3498218822Sdim 3499130561Sobrien /* 3500218822Sdim * Enter on the PV list if part of our managed memory. 3501218822Sdim */ 3502218822Sdim if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && 3503218822Sdim !pmap_try_insert_pv_entry(pmap, va, m)) { 3504218822Sdim if (mpte != NULL) { 3505218822Sdim free = NULL; 3506218822Sdim if (pmap_unwire_pte_hold(pmap, mpte, &free)) { 3507218822Sdim pmap_invalidate_page(pmap, va); 3508218822Sdim pmap_free_zero_pages(free); 3509218822Sdim } 3510218822Sdim 3511218822Sdim mpte = NULL; 3512218822Sdim } 3513218822Sdim return (mpte); 3514218822Sdim } 3515218822Sdim 3516218822Sdim /* 3517218822Sdim * Increment counters 3518130561Sobrien */ 3519218822Sdim pmap->pm_stats.resident_count++; 3520218822Sdim 3521218822Sdim pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); 3522218822Sdim#ifdef PAE 352368765Sobrien if ((prot & VM_PROT_EXECUTE) == 0) 3524218822Sdim pa |= pg_nx; 3525218822Sdim#endif 3526218822Sdim 3527218822Sdim /* 3528218822Sdim * Now validate mapping with RO protection 3529218822Sdim */ 3530218822Sdim if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 3531218822Sdim pte_store(pte, pa | PG_V | PG_U); 3532218822Sdim else 3533218822Sdim pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 3534218822Sdim return mpte; 3535130561Sobrien} 353668765Sobrien 353768765Sobrien/* 3538130561Sobrien * Make a temporary mapping for a physical address. This is only intended 3539130561Sobrien * to be used for panic dumps. 354068765Sobrien */ 354168765Sobrienvoid * 354268765Sobrienpmap_kenter_temporary(vm_paddr_t pa, int i) 3543130561Sobrien{ 3544130561Sobrien vm_offset_t va; 3545130561Sobrien 3546130561Sobrien va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 354768765Sobrien pmap_kenter(va, pa); 3548130561Sobrien invlpg(va); 3549218822Sdim return ((void *)crashdumpmap); 355068765Sobrien} 3551130561Sobrien 3552130561Sobrien/* 355368765Sobrien * This code maps large physical mmap regions into the 3554130561Sobrien * processor address space. Note that some shortcuts 3555130561Sobrien * are taken, but the code works. 355668765Sobrien */ 3557130561Sobrienvoid 3558130561Sobrienpmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 3559130561Sobrien vm_pindex_t pindex, vm_size_t size) 3560130561Sobrien{ 3561130561Sobrien pd_entry_t *pde; 3562130561Sobrien vm_paddr_t pa, ptepa; 3563130561Sobrien vm_page_t p; 356468765Sobrien int pat_mode; 3565130561Sobrien 3566130561Sobrien VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 3567130561Sobrien KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3568130561Sobrien ("pmap_object_init_pt: non-device object")); 356968765Sobrien if (pseflag && 3570130561Sobrien (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { 3571130561Sobrien if (!vm_object_populate(object, pindex, pindex + atop(size))) 3572130561Sobrien return; 3573130561Sobrien p = vm_page_lookup(object, pindex); 3574130561Sobrien KASSERT(p->valid == VM_PAGE_BITS_ALL, 3575130561Sobrien ("pmap_object_init_pt: invalid page %p", p)); 3576130561Sobrien pat_mode = p->md.pat_mode; 3577130561Sobrien 3578130561Sobrien /* 3579130561Sobrien * Abort the mapping if the first page is not physically 3580130561Sobrien * aligned to a 2/4MB page boundary. 3581130561Sobrien */ 3582130561Sobrien ptepa = VM_PAGE_TO_PHYS(p); 3583130561Sobrien if (ptepa & (NBPDR - 1)) 3584130561Sobrien return; 3585130561Sobrien 3586130561Sobrien /* 3587130561Sobrien * Skip the first page. Abort the mapping if the rest of 3588130561Sobrien * the pages are not physically contiguous or have differing 358968765Sobrien * memory attributes. 359068765Sobrien */ 3591130561Sobrien p = TAILQ_NEXT(p, listq); 359268765Sobrien for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; 359368765Sobrien pa += PAGE_SIZE) { 359468765Sobrien KASSERT(p->valid == VM_PAGE_BITS_ALL, 3595130561Sobrien ("pmap_object_init_pt: invalid page %p", p)); 3596130561Sobrien if (pa != VM_PAGE_TO_PHYS(p) || 359768765Sobrien pat_mode != p->md.pat_mode) 3598130561Sobrien return; 3599218822Sdim p = TAILQ_NEXT(p, listq); 3600218822Sdim } 360168765Sobrien 3602130561Sobrien /* 360368765Sobrien * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and 3604130561Sobrien * "size" is a multiple of 2/4M, adding the PAT setting to 3605130561Sobrien * "pa" will not affect the termination of this loop. 360668765Sobrien */ 3607130561Sobrien PMAP_LOCK(pmap); 3608130561Sobrien for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + 3609130561Sobrien size; pa += NBPDR) { 3610130561Sobrien pde = pmap_pde(pmap, addr); 3611130561Sobrien if (*pde == 0) { 3612130561Sobrien pde_store(pde, pa | PG_PS | PG_M | PG_A | 3613130561Sobrien PG_U | PG_RW | PG_V); 3614130561Sobrien pmap->pm_stats.resident_count += NBPDR / 3615130561Sobrien PAGE_SIZE; 361668765Sobrien pmap_pde_mappings++; 3617130561Sobrien } 361868765Sobrien /* Else continue on if the PDE is already valid. */ 3619130561Sobrien addr += NBPDR; 3620130561Sobrien } 362168765Sobrien PMAP_UNLOCK(pmap); 3622130561Sobrien } 362368765Sobrien} 3624130561Sobrien 3625130561Sobrien/* 3626130561Sobrien * Routine: pmap_change_wiring 3627130561Sobrien * Function: Change the wiring attribute for a map/virtual-address 3628130561Sobrien * pair. 3629130561Sobrien * In/out conditions: 3630130561Sobrien * The mapping must already exist in the pmap. 3631130561Sobrien */ 3632130561Sobrienvoid 3633130561Sobrienpmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 3634130561Sobrien{ 3635130561Sobrien pd_entry_t *pde; 3636130561Sobrien pt_entry_t *pte; 3637130561Sobrien boolean_t are_queues_locked; 363868765Sobrien 3639130561Sobrien are_queues_locked = FALSE; 3640130561Sobrienretry: 3641130561Sobrien PMAP_LOCK(pmap); 3642130561Sobrien pde = pmap_pde(pmap, va); 3643130561Sobrien if ((*pde & PG_PS) != 0) { 3644130561Sobrien if (!wired != ((*pde & PG_W) == 0)) { 3645130561Sobrien if (!are_queues_locked) { 3646130561Sobrien are_queues_locked = TRUE; 3647130561Sobrien if (!mtx_trylock(&vm_page_queue_mtx)) { 3648130561Sobrien PMAP_UNLOCK(pmap); 3649130561Sobrien vm_page_lock_queues(); 3650218822Sdim goto retry; 365168765Sobrien } 3652130561Sobrien } 365368765Sobrien if (!pmap_demote_pde(pmap, pde, va)) 3654130561Sobrien panic("pmap_change_wiring: demotion failed"); 3655130561Sobrien } else 3656130561Sobrien goto out; 3657130561Sobrien } 3658130561Sobrien pte = pmap_pte(pmap, va); 3659130561Sobrien 3660130561Sobrien if (wired && !pmap_pte_w(pte)) 3661130561Sobrien pmap->pm_stats.wired_count++; 3662130561Sobrien else if (!wired && pmap_pte_w(pte)) 3663130561Sobrien pmap->pm_stats.wired_count--; 366468765Sobrien 366568765Sobrien /* 3666130561Sobrien * Wiring is not a hardware characteristic so there is no need to 366768765Sobrien * invalidate TLB. 3668130561Sobrien */ 366968765Sobrien pmap_pte_set_w(pte, wired); 3670130561Sobrien pmap_pte_release(pte); 3671130561Sobrienout: 367268765Sobrien if (are_queues_locked) 3673130561Sobrien vm_page_unlock_queues(); 367468765Sobrien PMAP_UNLOCK(pmap); 3675130561Sobrien} 3676218822Sdim 3677218822Sdim 367868765Sobrien 3679130561Sobrien/* 3680130561Sobrien * Copy the range specified by src_addr/len 3681130561Sobrien * from the source map to the range dst_addr/len 3682130561Sobrien * in the destination map. 3683218822Sdim * 3684130561Sobrien * This routine is only advisory and need not do anything. 3685130561Sobrien */ 3686130561Sobrien 3687218822Sdimvoid 3688130561Sobrienpmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3689130561Sobrien vm_offset_t src_addr) 3690130561Sobrien{ 3691218822Sdim vm_page_t free; 3692130561Sobrien vm_offset_t addr; 3693130561Sobrien vm_offset_t end_addr = src_addr + len; 3694130561Sobrien vm_offset_t pdnxt; 3695130561Sobrien 3696130561Sobrien if (dst_addr != src_addr) 3697130561Sobrien return; 3698130561Sobrien 3699130561Sobrien if (!pmap_is_current(src_pmap)) 3700130561Sobrien return; 3701130561Sobrien 3702130561Sobrien vm_page_lock_queues(); 3703130561Sobrien if (dst_pmap < src_pmap) { 3704130561Sobrien PMAP_LOCK(dst_pmap); 3705130561Sobrien PMAP_LOCK(src_pmap); 3706218822Sdim } else { 3707130561Sobrien PMAP_LOCK(src_pmap); 3708130561Sobrien PMAP_LOCK(dst_pmap); 3709218822Sdim } 3710130561Sobrien sched_pin(); 3711130561Sobrien for (addr = src_addr; addr < end_addr; addr = pdnxt) { 3712130561Sobrien pt_entry_t *src_pte, *dst_pte; 3713130561Sobrien vm_page_t dstmpte, srcmpte; 3714130561Sobrien pd_entry_t srcptepaddr; 3715218822Sdim unsigned ptepindex; 3716130561Sobrien 3717130561Sobrien KASSERT(addr < UPT_MIN_ADDRESS, 3718130561Sobrien ("pmap_copy: invalid to pmap_copy page tables")); 3719130561Sobrien 3720130561Sobrien pdnxt = (addr + NBPDR) & ~PDRMASK; 3721130561Sobrien if (pdnxt < addr) 3722130561Sobrien pdnxt = end_addr; 3723130561Sobrien ptepindex = addr >> PDRSHIFT; 3724130561Sobrien 3725130561Sobrien srcptepaddr = src_pmap->pm_pdir[ptepindex]; 3726130561Sobrien if (srcptepaddr == 0) 372768765Sobrien continue; 3728130561Sobrien 372968765Sobrien if (srcptepaddr & PG_PS) { 3730130561Sobrien if (dst_pmap->pm_pdir[ptepindex] == 0 && 3731218822Sdim ((srcptepaddr & PG_MANAGED) == 0 || 3732218822Sdim pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & 3733218822Sdim PG_PS_FRAME))) { 3734130561Sobrien dst_pmap->pm_pdir[ptepindex] = srcptepaddr & 3735130561Sobrien ~PG_W; 3736130561Sobrien dst_pmap->pm_stats.resident_count += 3737218822Sdim NBPDR / PAGE_SIZE; 3738130561Sobrien } 3739130561Sobrien continue; 374068765Sobrien } 3741130561Sobrien 3742130561Sobrien srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); 3743218822Sdim KASSERT(srcmpte->wire_count > 0, 3744130561Sobrien ("pmap_copy: source page table page is unused")); 374568765Sobrien 3746130561Sobrien if (pdnxt > end_addr) 3747130561Sobrien pdnxt = end_addr; 374868765Sobrien 3749130561Sobrien src_pte = vtopte(addr); 3750130561Sobrien while (addr < pdnxt) { 375168765Sobrien pt_entry_t ptetemp; 3752218822Sdim ptetemp = *src_pte; 3753218822Sdim /* 3754218822Sdim * we only virtual copy managed pages 3755218822Sdim */ 3756130561Sobrien if ((ptetemp & PG_MANAGED) != 0) { 3757130561Sobrien dstmpte = pmap_allocpte(dst_pmap, addr, 3758130561Sobrien M_NOWAIT); 3759130561Sobrien if (dstmpte == NULL) 3760130561Sobrien goto out; 3761130561Sobrien dst_pte = pmap_pte_quick(dst_pmap, addr); 3762130561Sobrien if (*dst_pte == 0 && 3763218822Sdim pmap_try_insert_pv_entry(dst_pmap, addr, 3764130561Sobrien PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { 376568765Sobrien /* 3766130561Sobrien * Clear the wired, modified, and 3767130561Sobrien * accessed (referenced) bits 3768130561Sobrien * during the copy. 376968765Sobrien */ 3770130561Sobrien *dst_pte = ptetemp & ~(PG_W | PG_M | 377168765Sobrien PG_A); 3772130561Sobrien dst_pmap->pm_stats.resident_count++; 3773130561Sobrien } else { 3774130561Sobrien free = NULL; 3775130561Sobrien if (pmap_unwire_pte_hold(dst_pmap, 377668765Sobrien dstmpte, &free)) { 3777130561Sobrien pmap_invalidate_page(dst_pmap, 3778130561Sobrien addr); 3779130561Sobrien pmap_free_zero_pages(free); 3780130561Sobrien } 3781130561Sobrien goto out; 3782218822Sdim } 3783130561Sobrien if (dstmpte->wire_count >= srcmpte->wire_count) 3784218822Sdim break; 3785218822Sdim } 3786218822Sdim addr += PAGE_SIZE; 3787130561Sobrien src_pte++; 3788218822Sdim } 3789218822Sdim } 3790130561Sobrienout: 3791130561Sobrien sched_unpin(); 3792130561Sobrien vm_page_unlock_queues(); 3793130561Sobrien PMAP_UNLOCK(src_pmap); 3794130561Sobrien PMAP_UNLOCK(dst_pmap); 3795130561Sobrien} 3796130561Sobrien 3797130561Sobrienstatic __inline void 3798130561Sobrienpagezero(void *page) 3799130561Sobrien{ 3800130561Sobrien#if defined(I686_CPU) 3801130561Sobrien if (cpu_class == CPUCLASS_686) { 3802130561Sobrien#if defined(CPU_ENABLE_SSE) 3803130561Sobrien if (cpu_feature & CPUID_SSE2) 3804130561Sobrien sse2_pagezero(page); 3805130561Sobrien else 3806130561Sobrien#endif 3807130561Sobrien i686_pagezero(page); 3808130561Sobrien } else 3809130561Sobrien#endif 3810130561Sobrien bzero(page, PAGE_SIZE); 3811130561Sobrien} 3812130561Sobrien 3813130561Sobrien/* 3814130561Sobrien * pmap_zero_page zeros the specified hardware page by mapping 3815130561Sobrien * the page into KVM and using bzero to clear its contents. 3816218822Sdim */ 3817218822Sdimvoid 3818218822Sdimpmap_zero_page(vm_page_t m) 3819130561Sobrien{ 3820130561Sobrien struct sysmaps *sysmaps; 3821130561Sobrien 3822130561Sobrien sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3823130561Sobrien mtx_lock(&sysmaps->lock); 3824130561Sobrien if (*sysmaps->CMAP2) 3825130561Sobrien panic("pmap_zero_page: CMAP2 busy"); 3826130561Sobrien sched_pin(); 3827130561Sobrien *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | 3828130561Sobrien pmap_cache_bits(m->md.pat_mode, 0); 3829130561Sobrien invlcaddr(sysmaps->CADDR2); 3830130561Sobrien pagezero(sysmaps->CADDR2); 3831218822Sdim *sysmaps->CMAP2 = 0; 383268765Sobrien sched_unpin(); 3833218822Sdim mtx_unlock(&sysmaps->lock); 3834218822Sdim} 3835218822Sdim 3836218822Sdim/* 3837218822Sdim * pmap_zero_page_area zeros the specified hardware page by mapping 3838218822Sdim * the page into KVM and using bzero to clear its contents. 3839218822Sdim * 3840218822Sdim * off and size may not cover an area beyond a single hardware page. 3841218822Sdim */ 3842218822Sdimvoid 3843218822Sdimpmap_zero_page_area(vm_page_t m, int off, int size) 384468765Sobrien{ 384568765Sobrien struct sysmaps *sysmaps; 384668765Sobrien 3847130561Sobrien sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3848218822Sdim mtx_lock(&sysmaps->lock); 384968765Sobrien if (*sysmaps->CMAP2) 3850130561Sobrien panic("pmap_zero_page_area: CMAP2 busy"); 3851130561Sobrien sched_pin(); 3852130561Sobrien *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | 3853130561Sobrien pmap_cache_bits(m->md.pat_mode, 0); 385468765Sobrien invlcaddr(sysmaps->CADDR2); 385568765Sobrien if (off == 0 && size == PAGE_SIZE) 3856130561Sobrien pagezero(sysmaps->CADDR2); 3857130561Sobrien else 385868765Sobrien bzero((char *)sysmaps->CADDR2 + off, size); 3859130561Sobrien *sysmaps->CMAP2 = 0; 386068765Sobrien sched_unpin(); 3861130561Sobrien mtx_unlock(&sysmaps->lock); 3862130561Sobrien} 3863130561Sobrien 3864130561Sobrien/* 386568765Sobrien * pmap_zero_page_idle zeros the specified hardware page by mapping 386668765Sobrien * the page into KVM and using bzero to clear its contents. This 3867130561Sobrien * is intended to be called from the vm_pagezero process only and 386868765Sobrien * outside of Giant. 3869130561Sobrien */ 3870218822Sdimvoid 3871218822Sdimpmap_zero_page_idle(vm_page_t m) 387268765Sobrien{ 3873130561Sobrien 3874130561Sobrien if (*CMAP3) 3875130561Sobrien panic("pmap_zero_page_idle: CMAP3 busy"); 3876130561Sobrien sched_pin(); 3877130561Sobrien *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | 3878130561Sobrien pmap_cache_bits(m->md.pat_mode, 0); 387968765Sobrien invlcaddr(CADDR3); 3880130561Sobrien pagezero(CADDR3); 388168765Sobrien *CMAP3 = 0; 3882130561Sobrien sched_unpin(); 3883218822Sdim} 3884218822Sdim 3885130561Sobrien/* 3886130561Sobrien * pmap_copy_page copies the specified (machine independent) 3887130561Sobrien * page by mapping the page into virtual memory and using 388868765Sobrien * bcopy to copy the page, one machine dependent page at a 388968765Sobrien * time. 3890130561Sobrien */ 3891130561Sobrienvoid 389268765Sobrienpmap_copy_page(vm_page_t src, vm_page_t dst) 3893130561Sobrien{ 3894130561Sobrien struct sysmaps *sysmaps; 3895130561Sobrien 3896130561Sobrien sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 389768765Sobrien mtx_lock(&sysmaps->lock); 3898130561Sobrien if (*sysmaps->CMAP1) 3899130561Sobrien panic("pmap_copy_page: CMAP1 busy"); 390068765Sobrien if (*sysmaps->CMAP2) 3901130561Sobrien panic("pmap_copy_page: CMAP2 busy"); 3902130561Sobrien sched_pin(); 3903218822Sdim invlpg((u_int)sysmaps->CADDR1); 390468765Sobrien invlpg((u_int)sysmaps->CADDR2); 3905130561Sobrien *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A | 390668765Sobrien pmap_cache_bits(src->md.pat_mode, 0); 3907130561Sobrien *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M | 3908130561Sobrien pmap_cache_bits(dst->md.pat_mode, 0); 3909130561Sobrien bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); 3910130561Sobrien *sysmaps->CMAP1 = 0; 3911130561Sobrien *sysmaps->CMAP2 = 0; 3912130561Sobrien sched_unpin(); 3913130561Sobrien mtx_unlock(&sysmaps->lock); 3914130561Sobrien} 3915130561Sobrien 3916130561Sobrien/* 3917130561Sobrien * Returns true if the pmap's pv is one of the first 3918130561Sobrien * 16 pvs linked to from this page. This count may 3919130561Sobrien * be changed upwards or downwards in the future; it 392068765Sobrien * is only necessary that true be returned for a small 3921130561Sobrien * subset of pmaps for proper page aging. 392268765Sobrien */ 3923130561Sobrienboolean_t 3924130561Sobrienpmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3925130561Sobrien{ 3926130561Sobrien struct md_page *pvh; 3927130561Sobrien pv_entry_t pv; 3928218822Sdim int loops = 0; 3929218822Sdim 3930130561Sobrien if (m->flags & PG_FICTITIOUS) 3931130561Sobrien return FALSE; 3932130561Sobrien 3933130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3934130561Sobrien TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3935130561Sobrien if (PV_PMAP(pv) == pmap) { 3936130561Sobrien return TRUE; 3937130561Sobrien } 3938130561Sobrien loops++; 3939130561Sobrien if (loops >= 16) 3940130561Sobrien break; 3941130561Sobrien } 3942130561Sobrien if (loops < 16) { 3943130561Sobrien pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3944130561Sobrien TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 3945130561Sobrien if (PV_PMAP(pv) == pmap) 3946130561Sobrien return (TRUE); 3947130561Sobrien loops++; 3948130561Sobrien if (loops >= 16) 3949130561Sobrien break; 3950130561Sobrien } 3951130561Sobrien } 395268765Sobrien return (FALSE); 395368765Sobrien} 3954218822Sdim 3955218822Sdim/* 3956218822Sdim * pmap_page_wired_mappings: 3957218822Sdim * 395868765Sobrien * Return the number of managed mappings to the given physical page 3959218822Sdim * that are wired. 3960218822Sdim */ 3961218822Sdimint 396268765Sobrienpmap_page_wired_mappings(vm_page_t m) 3963130561Sobrien{ 3964130561Sobrien int count; 3965130561Sobrien 3966218822Sdim count = 0; 396768765Sobrien if ((m->flags & PG_FICTITIOUS) != 0) 3968130561Sobrien return (count); 3969130561Sobrien count = pmap_pvh_wired_mappings(&m->md, count); 3970130561Sobrien return (pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), count)); 3971130561Sobrien} 3972130561Sobrien 3973130561Sobrien/* 397468765Sobrien * pmap_pvh_wired_mappings: 3975218822Sdim * 397668765Sobrien * Return the updated number "count" of managed mappings that are wired. 3977218822Sdim */ 3978218822Sdimstatic int 3979218822Sdimpmap_pvh_wired_mappings(struct md_page *pvh, int count) 3980218822Sdim{ 3981218822Sdim pmap_t pmap; 3982218822Sdim pt_entry_t *pte; 3983218822Sdim pv_entry_t pv; 398468765Sobrien 398568765Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 398668765Sobrien sched_pin(); 3987130561Sobrien TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 3988218822Sdim pmap = PV_PMAP(pv); 3989130561Sobrien PMAP_LOCK(pmap); 399068765Sobrien pte = pmap_pte_quick(pmap, pv->pv_va); 399168765Sobrien if ((*pte & PG_W) != 0) 3992218822Sdim count++; 399368765Sobrien PMAP_UNLOCK(pmap); 3994130561Sobrien } 3995130561Sobrien sched_unpin(); 3996130561Sobrien return (count); 3997130561Sobrien} 399868765Sobrien 3999218822Sdim/* 4000218822Sdim * Returns TRUE if the given page is mapped individually or as part of 4001130561Sobrien * a 4mpage. Otherwise, returns FALSE. 4002218822Sdim */ 4003218822Sdimboolean_t 4004130561Sobrienpmap_page_is_mapped(vm_page_t m) 400568765Sobrien{ 4006218822Sdim struct md_page *pvh; 4007218822Sdim 4008218822Sdim if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) 4009130561Sobrien return (FALSE); 401068765Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4011130561Sobrien if (TAILQ_EMPTY(&m->md.pv_list)) { 4012130561Sobrien pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4013130561Sobrien return (!TAILQ_EMPTY(&pvh->pv_list)); 4014130561Sobrien } else 4015130561Sobrien return (TRUE); 4016130561Sobrien} 401768765Sobrien 4018130561Sobrien/* 4019218822Sdim * Remove all pages from specified address space 4020130561Sobrien * this aids process exit speeds. Also, this code 402168765Sobrien * is special cased for current process only, but 4022218822Sdim * can have the more generic (and slightly slower) 4023218822Sdim * mode enabled. This is much faster than pmap_remove 4024218822Sdim * in the case of running down an entire address space. 4025218822Sdim */ 402668765Sobrienvoid 4027218822Sdimpmap_remove_pages(pmap_t pmap) 4028218822Sdim{ 4029130561Sobrien pt_entry_t *pte, tpte; 4030218822Sdim vm_page_t free = NULL; 4031218822Sdim vm_page_t m, mpte, mt; 4032218822Sdim pv_entry_t pv; 4033218822Sdim struct md_page *pvh; 4034218822Sdim struct pv_chunk *pc, *npc; 4035218822Sdim int field, idx; 4036130561Sobrien int32_t bit; 4037218822Sdim uint32_t inuse, bitmask; 4038218822Sdim int allfree; 4039218822Sdim 4040218822Sdim if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 4041218822Sdim printf("warning: pmap_remove_pages called with non-current pmap\n"); 4042130561Sobrien return; 4043218822Sdim } 4044130561Sobrien vm_page_lock_queues(); 4045218822Sdim PMAP_LOCK(pmap); 4046218822Sdim sched_pin(); 4047218822Sdim TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 4048218822Sdim allfree = 1; 4049218822Sdim for (field = 0; field < _NPCM; field++) { 4050218822Sdim inuse = (~(pc->pc_map[field])) & pc_freemask[field]; 4051218822Sdim while (inuse != 0) { 4052218822Sdim bit = bsfl(inuse); 4053218822Sdim bitmask = 1UL << bit; 4054218822Sdim idx = field * 32 + bit; 4055218822Sdim pv = &pc->pc_pventry[idx]; 405668765Sobrien inuse &= ~bitmask; 405768765Sobrien 4058104834Sobrien pte = pmap_pde(pmap, pv->pv_va); 4059130561Sobrien tpte = *pte; 4060218822Sdim if ((tpte & PG_PS) == 0) { 406168765Sobrien pte = vtopte(pv->pv_va); 4062130561Sobrien tpte = *pte & ~PG_PTE_PAT; 4063130561Sobrien } 4064130561Sobrien 406568765Sobrien if (tpte == 0) { 406668765Sobrien printf( 406768765Sobrien "TPTE at %p IS ZERO @ VA %08x\n", 406868765Sobrien pte, pv->pv_va); 406968765Sobrien panic("bad pte"); 4070130561Sobrien } 407168765Sobrien 4072218822Sdim/* 4073130561Sobrien * We cannot remove wired pages from a process' mapping at this time 407468765Sobrien */ 407568765Sobrien if (tpte & PG_W) { 407668765Sobrien allfree = 0; 4077130561Sobrien continue; 407868765Sobrien } 407968765Sobrien 408068765Sobrien m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 4081130561Sobrien KASSERT(m->phys_addr == (tpte & PG_FRAME), 408268765Sobrien ("vm_page_t %p phys_addr mismatch %016jx %016jx", 408368765Sobrien m, (uintmax_t)m->phys_addr, 408468765Sobrien (uintmax_t)tpte)); 4085130561Sobrien 408668765Sobrien KASSERT(m < &vm_page_array[vm_page_array_size], 408768765Sobrien ("pmap_remove_pages: bad tpte %#jx", 408868765Sobrien (uintmax_t)tpte)); 4089218822Sdim 4090218822Sdim pte_clear(pte); 409168765Sobrien 4092130561Sobrien /* 4093130561Sobrien * Update the vm_page_t clean/reference bits. 409468765Sobrien */ 4095130561Sobrien if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 4096130561Sobrien if ((tpte & PG_PS) != 0) { 4097218822Sdim for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) 4098218822Sdim vm_page_dirty(mt); 4099130561Sobrien } else 4100130561Sobrien vm_page_dirty(m); 410168765Sobrien } 4102130561Sobrien 4103130561Sobrien /* Mark free */ 4104218822Sdim PV_STAT(pv_entry_frees++); 4105218822Sdim PV_STAT(pv_entry_spare++); 410668765Sobrien pv_entry_count--; 410768765Sobrien pc->pc_map[field] |= bitmask; 410868765Sobrien if ((tpte & PG_PS) != 0) { 4109218822Sdim pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 411068765Sobrien pvh = pa_to_pvh(tpte & PG_PS_FRAME); 4111130561Sobrien TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 411268765Sobrien if (TAILQ_EMPTY(&pvh->pv_list)) { 4113218822Sdim for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) 4114218822Sdim if (TAILQ_EMPTY(&mt->md.pv_list)) 4115218822Sdim vm_page_flag_clear(mt, PG_WRITEABLE); 4116218822Sdim } 4117218822Sdim mpte = pmap_lookup_pt_page(pmap, pv->pv_va); 4118218822Sdim if (mpte != NULL) { 4119218822Sdim pmap_remove_pt_page(pmap, mpte); 4120130561Sobrien pmap->pm_stats.resident_count--; 412168765Sobrien KASSERT(mpte->wire_count == NPTEPG, 4122130561Sobrien ("pmap_remove_pages: pte page wire count error")); 4123130561Sobrien mpte->wire_count = 0; 412468765Sobrien pmap_add_delayed_free_list(mpte, &free, FALSE); 4125130561Sobrien atomic_subtract_int(&cnt.v_wire_count, 1); 4126130561Sobrien } 412768765Sobrien } else { 412868765Sobrien pmap->pm_stats.resident_count--; 412968765Sobrien TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 4130130561Sobrien if (TAILQ_EMPTY(&m->md.pv_list)) { 4131130561Sobrien pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4132130561Sobrien if (TAILQ_EMPTY(&pvh->pv_list)) 4133130561Sobrien vm_page_flag_clear(m, PG_WRITEABLE); 4134130561Sobrien } 4135130561Sobrien pmap_unuse_pt(pmap, pv->pv_va, &free); 4136130561Sobrien } 4137130561Sobrien } 4138130561Sobrien } 4139130561Sobrien if (allfree) { 4140130561Sobrien PV_STAT(pv_entry_spare -= _NPCPV); 414168765Sobrien PV_STAT(pc_chunk_count--); 4142130561Sobrien PV_STAT(pc_chunk_frees++); 4143218822Sdim TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4144218822Sdim m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 4145130561Sobrien pmap_qremove((vm_offset_t)pc, 1); 4146130561Sobrien vm_page_unwire(m, 0); 414768765Sobrien vm_page_free(m); 414868765Sobrien pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 4149218822Sdim } 4150218822Sdim } 4151218822Sdim sched_unpin(); 4152218822Sdim pmap_invalidate_all(pmap); 4153218822Sdim vm_page_unlock_queues(); 4154218822Sdim PMAP_UNLOCK(pmap); 4155218822Sdim pmap_free_zero_pages(free); 4156218822Sdim} 4157218822Sdim 4158218822Sdim/* 4159218822Sdim * pmap_is_modified: 4160218822Sdim * 4161218822Sdim * Return whether or not the specified physical page was modified 4162218822Sdim * in any physical maps. 4163218822Sdim */ 4164218822Sdimboolean_t 4165218822Sdimpmap_is_modified(vm_page_t m) 4166218822Sdim{ 4167218822Sdim 4168218822Sdim if (m->flags & PG_FICTITIOUS) 4169218822Sdim return (FALSE); 4170218822Sdim if (pmap_is_modified_pvh(&m->md)) 4171218822Sdim return (TRUE); 4172218822Sdim return (pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 4173218822Sdim} 4174218822Sdim 4175218822Sdim/* 4176218822Sdim * Returns TRUE if any of the given mappings were used to modify 4177218822Sdim * physical memory. Otherwise, returns FALSE. Both page and 2mpage 4178218822Sdim * mappings are supported. 4179218822Sdim */ 4180218822Sdimstatic boolean_t 4181218822Sdimpmap_is_modified_pvh(struct md_page *pvh) 4182218822Sdim{ 4183218822Sdim pv_entry_t pv; 4184218822Sdim pt_entry_t *pte; 4185218822Sdim pmap_t pmap; 4186218822Sdim boolean_t rv; 4187218822Sdim 4188218822Sdim mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4189218822Sdim rv = FALSE; 4190218822Sdim sched_pin(); 4191104834Sobrien TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 419268765Sobrien pmap = PV_PMAP(pv); 4193130561Sobrien PMAP_LOCK(pmap); 4194130561Sobrien pte = pmap_pte_quick(pmap, pv->pv_va); 4195130561Sobrien rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW); 419668765Sobrien PMAP_UNLOCK(pmap); 419768765Sobrien if (rv) 4198218822Sdim break; 419968765Sobrien } 4200130561Sobrien sched_unpin(); 420177298Sobrien return (rv); 4202130561Sobrien} 420368765Sobrien 420468765Sobrien/* 4205218822Sdim * pmap_is_prefaultable: 4206218822Sdim * 4207218822Sdim * Return whether or not the specified virtual address is elgible 4208218822Sdim * for prefault. 4209218822Sdim */ 4210218822Sdimboolean_t 4211218822Sdimpmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 421289857Sobrien{ 421389857Sobrien pd_entry_t *pde; 4214218822Sdim pt_entry_t *pte; 4215218822Sdim boolean_t rv; 4216218822Sdim 421789857Sobrien rv = FALSE; 421889857Sobrien PMAP_LOCK(pmap); 4219218822Sdim pde = pmap_pde(pmap, addr); 422089857Sobrien if (*pde != 0 && (*pde & PG_PS) == 0) { 4221130561Sobrien pte = vtopte(addr); 422289857Sobrien rv = *pte == 0; 4223218822Sdim } 4224218822Sdim PMAP_UNLOCK(pmap); 422589857Sobrien return (rv); 4226218822Sdim} 4227218822Sdim 4228218822Sdim/* 4229218822Sdim * Clear the write and modified bits in each of the given page's mappings. 4230218822Sdim */ 4231218822Sdimvoid 4232218822Sdimpmap_remove_write(vm_page_t m) 423389857Sobrien{ 423489857Sobrien struct md_page *pvh; 4235104834Sobrien pv_entry_t next_pv, pv; 423668765Sobrien pmap_t pmap; 4237130561Sobrien pd_entry_t *pde; 423889857Sobrien pt_entry_t oldpte, *pte; 4239130561Sobrien vm_offset_t va; 4240130561Sobrien 4241130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4242130561Sobrien if ((m->flags & PG_FICTITIOUS) != 0 || 4243130561Sobrien (m->flags & PG_WRITEABLE) == 0) 4244218822Sdim return; 4245218822Sdim sched_pin(); 4246218822Sdim pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 424789857Sobrien TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 4248130561Sobrien va = pv->pv_va; 4249130561Sobrien pmap = PV_PMAP(pv); 4250130561Sobrien PMAP_LOCK(pmap); 425189857Sobrien pde = pmap_pde(pmap, va); 4252130561Sobrien if ((*pde & PG_RW) != 0) 4253130561Sobrien (void)pmap_demote_pde(pmap, pde, va); 425489857Sobrien PMAP_UNLOCK(pmap); 4255130561Sobrien } 425689857Sobrien TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4257130561Sobrien pmap = PV_PMAP(pv); 4258130561Sobrien PMAP_LOCK(pmap); 4259130561Sobrien pde = pmap_pde(pmap, pv->pv_va); 4260130561Sobrien KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found" 426189857Sobrien " a 4mpage in page %p's pv list", m)); 4262218822Sdim pte = pmap_pte_quick(pmap, pv->pv_va); 4263218822Sdimretry: 4264130561Sobrien oldpte = *pte; 4265218822Sdim if ((oldpte & PG_RW) != 0) { 4266218822Sdim /* 4267130561Sobrien * Regardless of whether a pte is 32 or 64 bits 426889857Sobrien * in size, PG_RW and PG_M are among the least 4269130561Sobrien * significant 32 bits. 4270130561Sobrien */ 4271130561Sobrien if (!atomic_cmpset_int((u_int *)pte, oldpte, 4272130561Sobrien oldpte & ~(PG_RW | PG_M))) 4273130561Sobrien goto retry; 4274130561Sobrien if ((oldpte & PG_M) != 0) 4275130561Sobrien vm_page_dirty(m); 4276130561Sobrien pmap_invalidate_page(pmap, pv->pv_va); 4277130561Sobrien } 4278130561Sobrien PMAP_UNLOCK(pmap); 4279130561Sobrien } 4280130561Sobrien vm_page_flag_clear(m, PG_WRITEABLE); 4281130561Sobrien sched_unpin(); 4282130561Sobrien} 4283130561Sobrien 4284130561Sobrien/* 4285130561Sobrien * pmap_ts_referenced: 4286130561Sobrien * 4287130561Sobrien * Return a count of reference bits for a page, clearing those bits. 4288130561Sobrien * It is not necessary for every reference bit to be cleared, but it 4289130561Sobrien * is necessary that 0 only be returned when there are truly no 4290130561Sobrien * reference bits set. 4291130561Sobrien * 4292130561Sobrien * XXX: The exact number of bits to check and clear is a matter that 4293130561Sobrien * should be tested and standardized at some point in the future for 4294130561Sobrien * optimal aging of shared pages. 4295130561Sobrien */ 4296130561Sobrienint 4297130561Sobrienpmap_ts_referenced(vm_page_t m) 4298130561Sobrien{ 4299130561Sobrien struct md_page *pvh; 4300130561Sobrien pv_entry_t pv, pvf, pvn; 4301130561Sobrien pmap_t pmap; 4302130561Sobrien pd_entry_t oldpde, *pde; 4303130561Sobrien pt_entry_t *pte; 4304130561Sobrien vm_offset_t va; 4305130561Sobrien int rtval = 0; 4306130561Sobrien 4307130561Sobrien if (m->flags & PG_FICTITIOUS) 430889857Sobrien return (rtval); 430989857Sobrien sched_pin(); 4310130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4311130561Sobrien pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 431289857Sobrien TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { 431389857Sobrien va = pv->pv_va; 4314218822Sdim pmap = PV_PMAP(pv); 431589857Sobrien PMAP_LOCK(pmap); 4316130561Sobrien pde = pmap_pde(pmap, va); 4317130561Sobrien oldpde = *pde; 431889857Sobrien if ((oldpde & PG_A) != 0) { 4319130561Sobrien if (pmap_demote_pde(pmap, pde, va)) { 4320130561Sobrien if ((oldpde & PG_W) == 0) { 4321130561Sobrien /* 432289857Sobrien * Remove the mapping to a single page 432389857Sobrien * so that a subsequent access may 432489857Sobrien * repromote. Since the underlying 4325130561Sobrien * page table page is fully populated, 4326130561Sobrien * this removal never frees a page 4327130561Sobrien * table page. 432889857Sobrien */ 4329218822Sdim va += VM_PAGE_TO_PHYS(m) - (oldpde & 433089857Sobrien PG_PS_FRAME); 4331130561Sobrien pmap_remove_page(pmap, va, NULL); 4332130561Sobrien rtval++; 433389857Sobrien if (rtval > 4) { 4334130561Sobrien PMAP_UNLOCK(pmap); 4335130561Sobrien return (rtval); 4336130561Sobrien } 433789857Sobrien } 4338130561Sobrien } 4339104834Sobrien } 434089857Sobrien PMAP_UNLOCK(pmap); 434168765Sobrien } 434268765Sobrien if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 434368765Sobrien pvf = pv; 4344130561Sobrien do { 434568765Sobrien pvn = TAILQ_NEXT(pv, pv_list); 4346218822Sdim TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 434768765Sobrien TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 4348130561Sobrien pmap = PV_PMAP(pv); 4349130561Sobrien PMAP_LOCK(pmap); 4350130561Sobrien pde = pmap_pde(pmap, pv->pv_va); 4351130561Sobrien KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:" 435268765Sobrien " found a 4mpage in page %p's pv list", m)); 435368765Sobrien pte = pmap_pte_quick(pmap, pv->pv_va); 435477298Sobrien if ((*pte & PG_A) != 0) { 435577298Sobrien atomic_clear_int((u_int *)pte, PG_A); 435668765Sobrien pmap_invalidate_page(pmap, pv->pv_va); 435768765Sobrien rtval++; 435868765Sobrien if (rtval > 4) 435968765Sobrien pvn = NULL; 436068765Sobrien } 436168765Sobrien PMAP_UNLOCK(pmap); 436268765Sobrien } while ((pv = pvn) != NULL && pv != pvf); 4363218822Sdim } 436468765Sobrien sched_unpin(); 436568765Sobrien return (rtval); 436677298Sobrien} 436768765Sobrien 4368130561Sobrien/* 436968765Sobrien * Clear the modify bits on the specified physical page. 437068765Sobrien */ 437168765Sobrienvoid 437268765Sobrienpmap_clear_modify(vm_page_t m) 437368765Sobrien{ 437468765Sobrien struct md_page *pvh; 437568765Sobrien pv_entry_t next_pv, pv; 437689857Sobrien pmap_t pmap; 437768765Sobrien pd_entry_t oldpde, *pde; 4378130561Sobrien pt_entry_t oldpte, *pte; 4379130561Sobrien vm_offset_t va; 4380130561Sobrien 4381130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 438268765Sobrien if ((m->flags & PG_FICTITIOUS) != 0) 438368765Sobrien return; 438468765Sobrien sched_pin(); 438568765Sobrien pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 438668765Sobrien TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 438768765Sobrien va = pv->pv_va; 438868765Sobrien pmap = PV_PMAP(pv); 438968765Sobrien PMAP_LOCK(pmap); 4390218822Sdim pde = pmap_pde(pmap, va); 439168765Sobrien oldpde = *pde; 439268765Sobrien if ((oldpde & PG_RW) != 0) { 439368765Sobrien if (pmap_demote_pde(pmap, pde, va)) { 4394130561Sobrien if ((oldpde & PG_W) == 0) { 439568765Sobrien /* 439668765Sobrien * Write protect the mapping to a 439768765Sobrien * single page so that a subsequent 439868765Sobrien * write access may repromote. 439968765Sobrien */ 440068765Sobrien va += VM_PAGE_TO_PHYS(m) - (oldpde & 440168765Sobrien PG_PS_FRAME); 4402130561Sobrien pte = pmap_pte_quick(pmap, va); 440368765Sobrien oldpte = *pte; 440468765Sobrien if ((oldpte & PG_V) != 0) { 440568765Sobrien /* 440668765Sobrien * Regardless of whether a pte is 32 or 64 bits 440768765Sobrien * in size, PG_RW and PG_M are among the least 440868765Sobrien * significant 32 bits. 440968765Sobrien */ 441068765Sobrien while (!atomic_cmpset_int((u_int *)pte, 441168765Sobrien oldpte, 441268765Sobrien oldpte & ~(PG_M | PG_RW))) 4413130561Sobrien oldpte = *pte; 4414130561Sobrien vm_page_dirty(m); 441568765Sobrien pmap_invalidate_page(pmap, va); 441668765Sobrien } 441768765Sobrien } 4418130561Sobrien } 441968765Sobrien } 442068765Sobrien PMAP_UNLOCK(pmap); 442168765Sobrien } 442268765Sobrien TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 442368765Sobrien pmap = PV_PMAP(pv); 442468765Sobrien PMAP_LOCK(pmap); 442568765Sobrien pde = pmap_pde(pmap, pv->pv_va); 442668765Sobrien KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" 442768765Sobrien " a 4mpage in page %p's pv list", m)); 4428130561Sobrien pte = pmap_pte_quick(pmap, pv->pv_va); 442968765Sobrien if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 443068765Sobrien /* 443168765Sobrien * Regardless of whether a pte is 32 or 64 bits 443268765Sobrien * in size, PG_M is among the least significant 4433130561Sobrien * 32 bits. 443468765Sobrien */ 443568765Sobrien atomic_clear_int((u_int *)pte, PG_M); 443668765Sobrien pmap_invalidate_page(pmap, pv->pv_va); 443768765Sobrien } 443868765Sobrien PMAP_UNLOCK(pmap); 443968765Sobrien } 444068765Sobrien sched_unpin(); 444168765Sobrien} 444268765Sobrien 444368765Sobrien/* 444468765Sobrien * pmap_clear_reference: 444568765Sobrien * 4446130561Sobrien * Clear the reference bit on the specified physical page. 4447130561Sobrien */ 4448218822Sdimvoid 4449218822Sdimpmap_clear_reference(vm_page_t m) 4450218822Sdim{ 4451130561Sobrien struct md_page *pvh; 4452218822Sdim pv_entry_t next_pv, pv; 445368765Sobrien pmap_t pmap; 4454130561Sobrien pd_entry_t oldpde, *pde; 4455130561Sobrien pt_entry_t *pte; 4456130561Sobrien vm_offset_t va; 4457130561Sobrien 4458130561Sobrien mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4459130561Sobrien if ((m->flags & PG_FICTITIOUS) != 0) 4460130561Sobrien return; 4461130561Sobrien sched_pin(); 4462130561Sobrien pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4463130561Sobrien TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 4464130561Sobrien va = pv->pv_va; 4465130561Sobrien pmap = PV_PMAP(pv); 4466130561Sobrien PMAP_LOCK(pmap); 446768765Sobrien pde = pmap_pde(pmap, va); 446868765Sobrien oldpde = *pde; 446968765Sobrien if ((oldpde & PG_A) != 0) { 447068765Sobrien if (pmap_demote_pde(pmap, pde, va)) { 447168765Sobrien /* 447268765Sobrien * Remove the mapping to a single page so 447368765Sobrien * that a subsequent access may repromote. 447468765Sobrien * Since the underlying page table page is 447568765Sobrien * fully populated, this removal never frees 447668765Sobrien * a page table page. 447768765Sobrien */ 447868765Sobrien va += VM_PAGE_TO_PHYS(m) - (oldpde & 447968765Sobrien PG_PS_FRAME); 448068765Sobrien pmap_remove_page(pmap, va, NULL); 448168765Sobrien } 448268765Sobrien } 4483130561Sobrien PMAP_UNLOCK(pmap); 4484218822Sdim } 4485218822Sdim TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4486218822Sdim pmap = PV_PMAP(pv); 4487130561Sobrien PMAP_LOCK(pmap); 448868765Sobrien pde = pmap_pde(pmap, pv->pv_va); 4489218822Sdim KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found" 4490218822Sdim " a 4mpage in page %p's pv list", m)); 4491218822Sdim pte = pmap_pte_quick(pmap, pv->pv_va); 4492130561Sobrien if ((*pte & PG_A) != 0) { 4493218822Sdim /* 449468765Sobrien * Regardless of whether a pte is 32 or 64 bits 449568765Sobrien * in size, PG_A is among the least significant 4496130561Sobrien * 32 bits. 449768765Sobrien */ 4498130561Sobrien atomic_clear_int((u_int *)pte, PG_A); 4499130561Sobrien pmap_invalidate_page(pmap, pv->pv_va); 450068765Sobrien } 4501130561Sobrien PMAP_UNLOCK(pmap); 4502218822Sdim } 4503218822Sdim sched_unpin(); 4504218822Sdim} 4505218822Sdim 4506218822Sdim/* 4507218822Sdim * Miscellaneous support routines follow 4508218822Sdim */ 450968765Sobrien 451068765Sobrien/* Adjust the cache mode for a 4KB page mapped via a PTE. */ 451168765Sobrienstatic __inline void 451268765Sobrienpmap_pte_attr(pt_entry_t *pte, int cache_bits) 451368765Sobrien{ 451468765Sobrien u_int opte, npte; 451568765Sobrien 4516 /* 4517 * The cache mode bits are all in the low 32-bits of the 4518 * PTE, so we can just spin on updating the low 32-bits. 4519 */ 4520 do { 4521 opte = *(u_int *)pte; 4522 npte = opte & ~PG_PTE_CACHE; 4523 npte |= cache_bits; 4524 } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); 4525} 4526 4527/* Adjust the cache mode for a 2/4MB page mapped via a PDE. */ 4528static __inline void 4529pmap_pde_attr(pd_entry_t *pde, int cache_bits) 4530{ 4531 u_int opde, npde; 4532 4533 /* 4534 * The cache mode bits are all in the low 32-bits of the 4535 * PDE, so we can just spin on updating the low 32-bits. 4536 */ 4537 do { 4538 opde = *(u_int *)pde; 4539 npde = opde & ~PG_PDE_CACHE; 4540 npde |= cache_bits; 4541 } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); 4542} 4543 4544/* 4545 * Map a set of physical memory pages into the kernel virtual 4546 * address space. Return a pointer to where it is mapped. This 4547 * routine is intended to be used for mapping device memory, 4548 * NOT real memory. 4549 */ 4550void * 4551pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) 4552{ 4553 vm_offset_t va, offset; 4554 vm_size_t tmpsize; 4555 4556 offset = pa & PAGE_MASK; 4557 size = roundup(offset + size, PAGE_SIZE); 4558 pa = pa & PG_FRAME; 4559 4560 if (pa < KERNLOAD && pa + size <= KERNLOAD) 4561 va = KERNBASE + pa; 4562 else 4563 va = kmem_alloc_nofault(kernel_map, size); 4564 if (!va) 4565 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 4566 4567 for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) 4568 pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); 4569 pmap_invalidate_range(kernel_pmap, va, va + tmpsize); 4570 pmap_invalidate_cache_range(va, va + size); 4571 return ((void *)(va + offset)); 4572} 4573 4574void * 4575pmap_mapdev(vm_paddr_t pa, vm_size_t size) 4576{ 4577 4578 return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); 4579} 4580 4581void * 4582pmap_mapbios(vm_paddr_t pa, vm_size_t size) 4583{ 4584 4585 return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); 4586} 4587 4588void 4589pmap_unmapdev(vm_offset_t va, vm_size_t size) 4590{ 4591 vm_offset_t base, offset, tmpva; 4592 4593 if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) 4594 return; 4595 base = trunc_page(va); 4596 offset = va & PAGE_MASK; 4597 size = roundup(offset + size, PAGE_SIZE); 4598 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) 4599 pmap_kremove(tmpva); 4600 pmap_invalidate_range(kernel_pmap, va, tmpva); 4601 kmem_free(kernel_map, base, size); 4602} 4603 4604/* 4605 * Sets the memory attribute for the specified page. 4606 */ 4607void 4608pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4609{ 4610 struct sysmaps *sysmaps; 4611 vm_offset_t sva, eva; 4612 4613 m->md.pat_mode = ma; 4614 if ((m->flags & PG_FICTITIOUS) != 0) 4615 return; 4616 4617 /* 4618 * If "m" is a normal page, flush it from the cache. 4619 * See pmap_invalidate_cache_range(). 4620 * 4621 * First, try to find an existing mapping of the page by sf 4622 * buffer. sf_buf_invalidate_cache() modifies mapping and 4623 * flushes the cache. 4624 */ 4625 if (sf_buf_invalidate_cache(m)) 4626 return; 4627 4628 /* 4629 * If page is not mapped by sf buffer, but CPU does not 4630 * support self snoop, map the page transient and do 4631 * invalidation. In the worst case, whole cache is flushed by 4632 * pmap_invalidate_cache_range(). 4633 */ 4634 if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) { 4635 sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 4636 mtx_lock(&sysmaps->lock); 4637 if (*sysmaps->CMAP2) 4638 panic("pmap_page_set_memattr: CMAP2 busy"); 4639 sched_pin(); 4640 *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | 4641 PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); 4642 invlcaddr(sysmaps->CADDR2); 4643 sva = (vm_offset_t)sysmaps->CADDR2; 4644 eva = sva + PAGE_SIZE; 4645 } else 4646 sva = eva = 0; /* gcc */ 4647 pmap_invalidate_cache_range(sva, eva); 4648 if (sva != 0) { 4649 *sysmaps->CMAP2 = 0; 4650 sched_unpin(); 4651 mtx_unlock(&sysmaps->lock); 4652 } 4653} 4654 4655/* 4656 * Changes the specified virtual address range's memory type to that given by 4657 * the parameter "mode". The specified virtual address range must be 4658 * completely contained within either the kernel map. 4659 * 4660 * Returns zero if the change completed successfully, and either EINVAL or 4661 * ENOMEM if the change failed. Specifically, EINVAL is returned if some part 4662 * of the virtual address range was not mapped, and ENOMEM is returned if 4663 * there was insufficient memory available to complete the change. 4664 */ 4665int 4666pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) 4667{ 4668 vm_offset_t base, offset, tmpva; 4669 pd_entry_t *pde; 4670 pt_entry_t *pte; 4671 int cache_bits_pte, cache_bits_pde; 4672 boolean_t changed; 4673 4674 base = trunc_page(va); 4675 offset = va & PAGE_MASK; 4676 size = roundup(offset + size, PAGE_SIZE); 4677 4678 /* 4679 * Only supported on kernel virtual addresses above the recursive map. 4680 */ 4681 if (base < VM_MIN_KERNEL_ADDRESS) 4682 return (EINVAL); 4683 4684 cache_bits_pde = pmap_cache_bits(mode, 1); 4685 cache_bits_pte = pmap_cache_bits(mode, 0); 4686 changed = FALSE; 4687 4688 /* 4689 * Pages that aren't mapped aren't supported. Also break down 4690 * 2/4MB pages into 4KB pages if required. 4691 */ 4692 PMAP_LOCK(kernel_pmap); 4693 for (tmpva = base; tmpva < base + size; ) { 4694 pde = pmap_pde(kernel_pmap, tmpva); 4695 if (*pde == 0) { 4696 PMAP_UNLOCK(kernel_pmap); 4697 return (EINVAL); 4698 } 4699 if (*pde & PG_PS) { 4700 /* 4701 * If the current 2/4MB page already has 4702 * the required memory type, then we need not 4703 * demote this page. Just increment tmpva to 4704 * the next 2/4MB page frame. 4705 */ 4706 if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { 4707 tmpva = trunc_4mpage(tmpva) + NBPDR; 4708 continue; 4709 } 4710 4711 /* 4712 * If the current offset aligns with a 2/4MB 4713 * page frame and there is at least 2/4MB left 4714 * within the range, then we need not break 4715 * down this page into 4KB pages. 4716 */ 4717 if ((tmpva & PDRMASK) == 0 && 4718 tmpva + PDRMASK < base + size) { 4719 tmpva += NBPDR; 4720 continue; 4721 } 4722 if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) { 4723 PMAP_UNLOCK(kernel_pmap); 4724 return (ENOMEM); 4725 } 4726 } 4727 pte = vtopte(tmpva); 4728 if (*pte == 0) { 4729 PMAP_UNLOCK(kernel_pmap); 4730 return (EINVAL); 4731 } 4732 tmpva += PAGE_SIZE; 4733 } 4734 PMAP_UNLOCK(kernel_pmap); 4735 4736 /* 4737 * Ok, all the pages exist, so run through them updating their 4738 * cache mode if required. 4739 */ 4740 for (tmpva = base; tmpva < base + size; ) { 4741 pde = pmap_pde(kernel_pmap, tmpva); 4742 if (*pde & PG_PS) { 4743 if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { 4744 pmap_pde_attr(pde, cache_bits_pde); 4745 changed = TRUE; 4746 } 4747 tmpva = trunc_4mpage(tmpva) + NBPDR; 4748 } else { 4749 pte = vtopte(tmpva); 4750 if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { 4751 pmap_pte_attr(pte, cache_bits_pte); 4752 changed = TRUE; 4753 } 4754 tmpva += PAGE_SIZE; 4755 } 4756 } 4757 4758 /* 4759 * Flush CPU caches to make sure any data isn't cached that 4760 * shouldn't be, etc. 4761 */ 4762 if (changed) { 4763 pmap_invalidate_range(kernel_pmap, base, tmpva); 4764 pmap_invalidate_cache_range(base, tmpva); 4765 } 4766 return (0); 4767} 4768 4769/* 4770 * perform the pmap work for mincore 4771 */ 4772int 4773pmap_mincore(pmap_t pmap, vm_offset_t addr) 4774{ 4775 pd_entry_t *pdep; 4776 pt_entry_t *ptep, pte; 4777 vm_paddr_t pa; 4778 vm_page_t m; 4779 int val = 0; 4780 4781 PMAP_LOCK(pmap); 4782 pdep = pmap_pde(pmap, addr); 4783 if (*pdep != 0) { 4784 if (*pdep & PG_PS) { 4785 pte = *pdep; 4786 val = MINCORE_SUPER; 4787 /* Compute the physical address of the 4KB page. */ 4788 pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & 4789 PG_FRAME; 4790 } else { 4791 ptep = pmap_pte(pmap, addr); 4792 pte = *ptep; 4793 pmap_pte_release(ptep); 4794 pa = pte & PG_FRAME; 4795 } 4796 } else { 4797 pte = 0; 4798 pa = 0; 4799 } 4800 PMAP_UNLOCK(pmap); 4801 4802 if (pte != 0) { 4803 val |= MINCORE_INCORE; 4804 if ((pte & PG_MANAGED) == 0) 4805 return val; 4806 4807 m = PHYS_TO_VM_PAGE(pa); 4808 4809 /* 4810 * Modified by us 4811 */ 4812 if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 4813 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 4814 else { 4815 /* 4816 * Modified by someone else 4817 */ 4818 vm_page_lock_queues(); 4819 if (m->dirty || pmap_is_modified(m)) 4820 val |= MINCORE_MODIFIED_OTHER; 4821 vm_page_unlock_queues(); 4822 } 4823 /* 4824 * Referenced by us 4825 */ 4826 if (pte & PG_A) 4827 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 4828 else { 4829 /* 4830 * Referenced by someone else 4831 */ 4832 vm_page_lock_queues(); 4833 if ((m->flags & PG_REFERENCED) || 4834 pmap_ts_referenced(m)) { 4835 val |= MINCORE_REFERENCED_OTHER; 4836 vm_page_flag_set(m, PG_REFERENCED); 4837 } 4838 vm_page_unlock_queues(); 4839 } 4840 } 4841 return val; 4842} 4843 4844void 4845pmap_activate(struct thread *td) 4846{ 4847 pmap_t pmap, oldpmap; 4848 u_int32_t cr3; 4849 4850 critical_enter(); 4851 pmap = vmspace_pmap(td->td_proc->p_vmspace); 4852 oldpmap = PCPU_GET(curpmap); 4853#if defined(SMP) 4854 atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); 4855 atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); 4856#else 4857 oldpmap->pm_active &= ~1; 4858 pmap->pm_active |= 1; 4859#endif 4860#ifdef PAE 4861 cr3 = vtophys(pmap->pm_pdpt); 4862#else 4863 cr3 = vtophys(pmap->pm_pdir); 4864#endif 4865 /* 4866 * pmap_activate is for the current thread on the current cpu 4867 */ 4868 td->td_pcb->pcb_cr3 = cr3; 4869 load_cr3(cr3); 4870 PCPU_SET(curpmap, pmap); 4871 critical_exit(); 4872} 4873 4874void 4875pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 4876{ 4877} 4878 4879/* 4880 * Increase the starting virtual address of the given mapping if a 4881 * different alignment might result in more superpage mappings. 4882 */ 4883void 4884pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 4885 vm_offset_t *addr, vm_size_t size) 4886{ 4887 vm_offset_t superpage_offset; 4888 4889 if (size < NBPDR) 4890 return; 4891 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 4892 offset += ptoa(object->pg_color); 4893 superpage_offset = offset & PDRMASK; 4894 if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || 4895 (*addr & PDRMASK) == superpage_offset) 4896 return; 4897 if ((*addr & PDRMASK) < superpage_offset) 4898 *addr = (*addr & ~PDRMASK) + superpage_offset; 4899 else 4900 *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; 4901} 4902 4903 4904#if defined(PMAP_DEBUG) 4905pmap_pid_dump(int pid) 4906{ 4907 pmap_t pmap; 4908 struct proc *p; 4909 int npte = 0; 4910 int index; 4911 4912 sx_slock(&allproc_lock); 4913 FOREACH_PROC_IN_SYSTEM(p) { 4914 if (p->p_pid != pid) 4915 continue; 4916 4917 if (p->p_vmspace) { 4918 int i,j; 4919 index = 0; 4920 pmap = vmspace_pmap(p->p_vmspace); 4921 for (i = 0; i < NPDEPTD; i++) { 4922 pd_entry_t *pde; 4923 pt_entry_t *pte; 4924 vm_offset_t base = i << PDRSHIFT; 4925 4926 pde = &pmap->pm_pdir[i]; 4927 if (pde && pmap_pde_v(pde)) { 4928 for (j = 0; j < NPTEPG; j++) { 4929 vm_offset_t va = base + (j << PAGE_SHIFT); 4930 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 4931 if (index) { 4932 index = 0; 4933 printf("\n"); 4934 } 4935 sx_sunlock(&allproc_lock); 4936 return npte; 4937 } 4938 pte = pmap_pte(pmap, va); 4939 if (pte && pmap_pte_v(pte)) { 4940 pt_entry_t pa; 4941 vm_page_t m; 4942 pa = *pte; 4943 m = PHYS_TO_VM_PAGE(pa & PG_FRAME); 4944 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 4945 va, pa, m->hold_count, m->wire_count, m->flags); 4946 npte++; 4947 index++; 4948 if (index >= 2) { 4949 index = 0; 4950 printf("\n"); 4951 } else { 4952 printf(" "); 4953 } 4954 } 4955 } 4956 } 4957 } 4958 } 4959 } 4960 sx_sunlock(&allproc_lock); 4961 return npte; 4962} 4963#endif 4964 4965#if defined(DEBUG) 4966 4967static void pads(pmap_t pm); 4968void pmap_pvdump(vm_offset_t pa); 4969 4970/* print address space of pmap*/ 4971static void 4972pads(pmap_t pm) 4973{ 4974 int i, j; 4975 vm_paddr_t va; 4976 pt_entry_t *ptep; 4977 4978 if (pm == kernel_pmap) 4979 return; 4980 for (i = 0; i < NPDEPTD; i++) 4981 if (pm->pm_pdir[i]) 4982 for (j = 0; j < NPTEPG; j++) { 4983 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 4984 if (pm == kernel_pmap && va < KERNBASE) 4985 continue; 4986 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 4987 continue; 4988 ptep = pmap_pte(pm, va); 4989 if (pmap_pte_v(ptep)) 4990 printf("%x:%x ", va, *ptep); 4991 }; 4992 4993} 4994 4995void 4996pmap_pvdump(vm_paddr_t pa) 4997{ 4998 pv_entry_t pv; 4999 pmap_t pmap; 5000 vm_page_t m; 5001 5002 printf("pa %x", pa); 5003 m = PHYS_TO_VM_PAGE(pa); 5004 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 5005 pmap = PV_PMAP(pv); 5006 printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); 5007 pads(pmap); 5008 } 5009 printf(" "); 5010} 5011#endif 5012