pmap.c revision 237952
1151497Sru/*- 2151497Sru * Copyright (c) 1991 Regents of the University of California. 3151497Sru * All rights reserved. 4151497Sru * Copyright (c) 1994 John S. Dyson 5151497Sru * All rights reserved. 6151497Sru * Copyright (c) 1994 David Greenman 7151497Sru * All rights reserved. 8151497Sru * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 9151497Sru * All rights reserved. 10151497Sru * 11151497Sru * This code is derived from software contributed to Berkeley by 12151497Sru * the Systems Programming Group of the University of Utah Computer 13151497Sru * Science Department and William Jolitz of UUNET Technologies Inc. 14151497Sru * 15151497Sru * Redistribution and use in source and binary forms, with or without 16151497Sru * modification, are permitted provided that the following conditions 17151497Sru * are met: 18151497Sru * 1. Redistributions of source code must retain the above copyright 19151497Sru * notice, this list of conditions and the following disclaimer. 20151497Sru * 2. Redistributions in binary form must reproduce the above copyright 21151497Sru * notice, this list of conditions and the following disclaimer in the 22151497Sru * documentation and/or other materials provided with the distribution. 23151497Sru * 3. All advertising materials mentioning features or use of this software 24151497Sru * must display the following acknowledgement: 25151497Sru * This product includes software developed by the University of 26151497Sru * California, Berkeley and its contributors. 27151497Sru * 4. Neither the name of the University nor the names of its contributors 28151497Sru * may be used to endorse or promote products derived from this software 29151497Sru * without specific prior written permission. 30151497Sru * 31151497Sru * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32151497Sru * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33151497Sru * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34151497Sru * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35151497Sru * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36151497Sru * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37151497Sru * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38151497Sru * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39151497Sru * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40151497Sru * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41151497Sru * SUCH DAMAGE. 42151497Sru * 43151497Sru * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44151497Sru */ 45151497Sru/*- 46151497Sru * Copyright (c) 2003 Networks Associates Technology, Inc. 47151497Sru * All rights reserved. 48151497Sru * 49151497Sru * This software was developed for the FreeBSD Project by Jake Burkholder, 50151497Sru * Safeport Network Services, and Network Associates Laboratories, the 51151497Sru * Security Research Division of Network Associates, Inc. under 52151497Sru * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 53151497Sru * CHATS research program. 54151497Sru * 55151497Sru * Redistribution and use in source and binary forms, with or without 56151497Sru * modification, are permitted provided that the following conditions 57151497Sru * are met: 58151497Sru * 1. Redistributions of source code must retain the above copyright 59151497Sru * notice, this list of conditions and the following disclaimer. 60151497Sru * 2. Redistributions in binary form must reproduce the above copyright 61151497Sru * notice, this list of conditions and the following disclaimer in the 62151497Sru * documentation and/or other materials provided with the distribution. 63151497Sru * 64151497Sru * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 65151497Sru * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66151497Sru * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67151497Sru * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 68151497Sru * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69151497Sru * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70151497Sru * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71151497Sru * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72151497Sru * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73151497Sru * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74151497Sru * SUCH DAMAGE. 75151497Sru */ 76151497Sru 77151497Sru#include <sys/cdefs.h> 78151497Sru__FBSDID("$FreeBSD: stable/9/sys/i386/i386/pmap.c 237952 2012-07-02 05:57:44Z alc $"); 79151497Sru 80151497Sru/* 81151497Sru * Manages physical address maps. 82151497Sru * 83151497Sru * In addition to hardware address maps, this 84151497Sru * module is called upon to provide software-use-only 85151497Sru * maps which may or may not be stored in the same 86151497Sru * form as hardware maps. These pseudo-maps are 87151497Sru * used to store intermediate results from copy 88151497Sru * operations to and from address spaces. 89151497Sru * 90151497Sru * Since the information managed by this module is 91151497Sru * also stored by the logical address mapping module, 92151497Sru * this module may throw away valid virtual-to-physical 93151497Sru * mappings at almost any time. However, invalidations 94151497Sru * of virtual-to-physical mappings must be done as 95151497Sru * requested. 96151497Sru * 97151497Sru * In order to cope with hardware architectures which 98151497Sru * make virtual-to-physical map invalidates expensive, 99151497Sru * this module may delay invalidate or reduced protection 100151497Sru * operations until such time as they are actually 101151497Sru * necessary. This module is given full information as 102151497Sru * to which processors are currently using which maps, 103151497Sru * and to when physical maps must be made correct. 104151497Sru */ 105151497Sru 106151497Sru#include "opt_cpu.h" 107151497Sru#include "opt_pmap.h" 108151497Sru#include "opt_smp.h" 109151497Sru#include "opt_xbox.h" 110151497Sru 111151497Sru#include <sys/param.h> 112151497Sru#include <sys/systm.h> 113151497Sru#include <sys/kernel.h> 114151497Sru#include <sys/ktr.h> 115151497Sru#include <sys/lock.h> 116151497Sru#include <sys/malloc.h> 117151497Sru#include <sys/mman.h> 118151497Sru#include <sys/msgbuf.h> 119151497Sru#include <sys/mutex.h> 120151497Sru#include <sys/proc.h> 121151497Sru#include <sys/sf_buf.h> 122151497Sru#include <sys/sx.h> 123151497Sru#include <sys/vmmeter.h> 124151497Sru#include <sys/sched.h> 125151497Sru#include <sys/sysctl.h> 126151497Sru#ifdef SMP 127151497Sru#include <sys/smp.h> 128151497Sru#else 129151497Sru#include <sys/cpuset.h> 130151497Sru#endif 131151497Sru 132151497Sru#include <vm/vm.h> 133151497Sru#include <vm/vm_param.h> 134151497Sru#include <vm/vm_kern.h> 135151497Sru#include <vm/vm_page.h> 136151497Sru#include <vm/vm_map.h> 137151497Sru#include <vm/vm_object.h> 138151497Sru#include <vm/vm_extern.h> 139151497Sru#include <vm/vm_pageout.h> 140151497Sru#include <vm/vm_pager.h> 141151497Sru#include <vm/vm_reserv.h> 142151497Sru#include <vm/uma.h> 143151497Sru 144151497Sru#include <machine/cpu.h> 145151497Sru#include <machine/cputypes.h> 146151497Sru#include <machine/md_var.h> 147151497Sru#include <machine/pcb.h> 148151497Sru#include <machine/specialreg.h> 149151497Sru#ifdef SMP 150151497Sru#include <machine/smp.h> 151151497Sru#endif 152151497Sru 153151497Sru#ifdef XBOX 154151497Sru#include <machine/xbox.h> 155151497Sru#endif 156151497Sru 157151497Sru#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 158151497Sru#define CPU_ENABLE_SSE 159151497Sru#endif 160151497Sru 161151497Sru#ifndef PMAP_SHPGPERPROC 162151497Sru#define PMAP_SHPGPERPROC 200 163151497Sru#endif 164151497Sru 165151497Sru#if !defined(DIAGNOSTIC) 166151497Sru#ifdef __GNUC_GNU_INLINE__ 167151497Sru#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 168151497Sru#else 169151497Sru#define PMAP_INLINE extern inline 170151497Sru#endif 171151497Sru#else 172151497Sru#define PMAP_INLINE 173151497Sru#endif 174151497Sru 175151497Sru#ifdef PV_STATS 176151497Sru#define PV_STAT(x) do { x ; } while (0) 177151497Sru#else 178151497Sru#define PV_STAT(x) do { } while (0) 179151497Sru#endif 180151497Sru 181151497Sru#define pa_index(pa) ((pa) >> PDRSHIFT) 182151497Sru#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 183151497Sru 184151497Sru/* 185151497Sru * Get PDEs and PTEs for user/kernel address space 186151497Sru */ 187151497Sru#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 188151497Sru#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 189151497Sru 190151497Sru#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 191151497Sru#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 192151497Sru#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 193151497Sru#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 194151497Sru#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 195151497Sru 196151497Sru#define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ 197151497Sru atomic_clear_int((u_int *)(pte), PG_W)) 198151497Sru#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 199151497Sru 200151497Srustruct pmap kernel_pmap_store; 201151497SruLIST_HEAD(pmaplist, pmap); 202151497Srustatic struct pmaplist allpmaps; 203151497Srustatic struct mtx allpmaps_lock; 204151497Sru 205151497Sruvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 206151497Sruvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 207151497Sruint pgeflag = 0; /* PG_G or-in */ 208151497Sruint pseflag = 0; /* PG_PS or-in */ 209151497Sru 210151497Srustatic int nkpt = NKPT; 211151497Sruvm_offset_t kernel_vm_end = KERNBASE + NKPT * NBPDR; 212151497Sruextern u_int32_t KERNend; 213151497Sruextern u_int32_t KPTphys; 214151497Sru 215151497Sru#ifdef PAE 216151497Srupt_entry_t pg_nx; 217151497Srustatic uma_zone_t pdptzone; 218151497Sru#endif 219151497Sru 220151497SruSYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 221151497Sru 222151497Srustatic int pat_works = 1; 223151497SruSYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1, 224151497Sru "Is page attribute table fully functional?"); 225151497Sru 226151497Srustatic int pg_ps_enabled = 1; 227151497SruSYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0, 228151497Sru "Are large page mappings enabled?"); 229151497Sru 230151497Sru#define PAT_INDEX_SIZE 8 231151497Srustatic int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */ 232151497Sru 233151497Sru/* 234151497Sru * Data for the pv entry allocation mechanism 235151497Sru */ 236151497Srustatic TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 237151497Srustatic int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 238151497Srustatic struct md_page *pv_table; 239151497Srustatic int shpgperproc = PMAP_SHPGPERPROC; 240151497Sru 241151497Srustruct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 242151497Sruint pv_maxchunks; /* How many chunks we have KVA for */ 243151497Sruvm_offset_t pv_vafree; /* freelist stored in the PTE */ 244151497Sru 245151497Sru/* 246151497Sru * All those kernel PT submaps that BSD is so fond of 247151497Sru */ 248151497Srustruct sysmaps { 249151497Sru struct mtx lock; 250151497Sru pt_entry_t *CMAP1; 251151497Sru pt_entry_t *CMAP2; 252151497Sru caddr_t CADDR1; 253151497Sru caddr_t CADDR2; 254151497Sru}; 255151497Srustatic struct sysmaps sysmaps_pcpu[MAXCPU]; 256151497Srupt_entry_t *CMAP1 = 0; 257151497Srustatic pt_entry_t *CMAP3; 258151497Srustatic pd_entry_t *KPTD; 259151497Srucaddr_t CADDR1 = 0, ptvmmap = 0; 260151497Srustatic caddr_t CADDR3; 261151497Srustruct msgbuf *msgbufp = 0; 262151497Sru 263151497Sru/* 264151497Sru * Crashdump maps. 265151497Sru */ 266151497Srustatic caddr_t crashdumpmap; 267151497Sru 268151497Srustatic pt_entry_t *PMAP1 = 0, *PMAP2; 269151497Srustatic pt_entry_t *PADDR1 = 0, *PADDR2; 270151497Sru#ifdef SMP 271151497Srustatic int PMAP1cpu; 272151497Srustatic int PMAP1changedcpu; 273151497SruSYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 274151497Sru &PMAP1changedcpu, 0, 275151497Sru "Number of times pmap_pte_quick changed CPU with same PMAP1"); 276151497Sru#endif 277151497Srustatic int PMAP1changed; 278151497SruSYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 279151497Sru &PMAP1changed, 0, 280151497Sru "Number of times pmap_pte_quick changed PMAP1"); 281151497Srustatic int PMAP1unchanged; 282151497SruSYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 283151497Sru &PMAP1unchanged, 0, 284151497Sru "Number of times pmap_pte_quick didn't change PMAP1"); 285151497Srustatic struct mtx PMAP2mutex; 286151497Sru 287151497Srustatic void free_pv_chunk(struct pv_chunk *pc); 288151497Srustatic void free_pv_entry(pmap_t pmap, pv_entry_t pv); 289151497Srustatic pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); 290151497Srustatic void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); 291151497Srustatic boolean_t pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); 292151497Srustatic void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa); 293151497Srustatic void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 294151497Srustatic pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 295151497Sru vm_offset_t va); 296151497Srustatic int pmap_pvh_wired_mappings(struct md_page *pvh, int count); 297151497Sru 298151497Srustatic boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); 299151497Srustatic boolean_t pmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, 300151497Sru vm_prot_t prot); 301151497Srustatic vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 302151497Sru vm_page_t m, vm_prot_t prot, vm_page_t mpte); 303151497Srustatic void pmap_flush_page(vm_page_t m); 304151497Srustatic void pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); 305151497Srustatic void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); 306151497Srustatic boolean_t pmap_is_modified_pvh(struct md_page *pvh); 307151497Srustatic boolean_t pmap_is_referenced_pvh(struct md_page *pvh); 308151497Srustatic void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); 309151497Srustatic void pmap_kenter_pde(vm_offset_t va, pd_entry_t newpde); 310151497Srustatic vm_page_t pmap_lookup_pt_page(pmap_t pmap, vm_offset_t va); 311151497Srustatic void pmap_pde_attr(pd_entry_t *pde, int cache_bits); 312151497Srustatic void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); 313151497Srustatic boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, 314151497Sru vm_prot_t prot); 315151497Srustatic void pmap_pte_attr(pt_entry_t *pte, int cache_bits); 316151497Srustatic void pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, 317151497Sru vm_page_t *free); 318151497Srustatic int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, 319151497Sru vm_page_t *free); 320151497Srustatic void pmap_remove_pt_page(pmap_t pmap, vm_page_t mpte); 321151497Srustatic void pmap_remove_page(struct pmap *pmap, vm_offset_t va, 322151497Sru vm_page_t *free); 323151497Srustatic void pmap_remove_entry(struct pmap *pmap, vm_page_t m, 324151497Sru vm_offset_t va); 325151497Srustatic void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); 326151497Srustatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 327151497Sru vm_page_t m); 328151497Srustatic void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, 329151497Sru pd_entry_t newpde); 330151497Srustatic void pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde); 331151497Sru 332151497Srustatic vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 333151497Sru 334151497Srustatic vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags); 335151497Srustatic int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free); 336151497Srustatic pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); 337151497Srustatic void pmap_pte_release(pt_entry_t *pte); 338151497Srustatic int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); 339151497Sru#ifdef PAE 340151497Srustatic void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 341151497Sru#endif 342151497Srustatic void pmap_set_pg(void); 343151497Sru 344151497Srustatic __inline void pagezero(void *page); 345151497Sru 346151497SruCTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 347151497SruCTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 348151497Sru 349151497Sru/* 350151497Sru * If you get an error here, then you set KVA_PAGES wrong! See the 351151497Sru * description of KVA_PAGES in sys/i386/include/pmap.h. It must be 352151497Sru * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. 353151497Sru */ 354151497SruCTASSERT(KERNBASE % (1 << 24) == 0); 355151497Sru 356151497Sru/* 357151497Sru * Bootstrap the system enough to run with virtual memory. 358151497Sru * 359151497Sru * On the i386 this is called after mapping has already been enabled 360151497Sru * and just syncs the pmap module with what has already been done. 361151497Sru * [We can't call it easily with mapping off since the kernel is not 362151497Sru * mapped with PA == VA, hence we would have to relocate every address 363151497Sru * from the linked base (virtual) address "KERNBASE" to the actual 364151497Sru * (physical) address starting relative to 0] 365151497Sru */ 366151497Sruvoid 367151497Srupmap_bootstrap(vm_paddr_t firstaddr) 368151497Sru{ 369151497Sru vm_offset_t va; 370151497Sru pt_entry_t *pte, *unused; 371151497Sru struct sysmaps *sysmaps; 372151497Sru int i; 373151497Sru 374151497Sru /* 375151497Sru * Initialize the first available kernel virtual address. However, 376151497Sru * using "firstaddr" may waste a few pages of the kernel virtual 377151497Sru * address space, because locore may not have mapped every physical 378151497Sru * page that it allocated. Preferably, locore would provide a first 379151497Sru * unused virtual address in addition to "firstaddr". 380151497Sru */ 381151497Sru virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 382151497Sru 383151497Sru virtual_end = VM_MAX_KERNEL_ADDRESS; 384151497Sru 385151497Sru /* 386151497Sru * Initialize the kernel pmap (which is statically allocated). 387151497Sru */ 388151497Sru PMAP_LOCK_INIT(kernel_pmap); 389151497Sru kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 390151497Sru#ifdef PAE 391151497Sru kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); 392151497Sru#endif 393151497Sru kernel_pmap->pm_root = NULL; 394151497Sru CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ 395151497Sru TAILQ_INIT(&kernel_pmap->pm_pvchunk); 396151497Sru LIST_INIT(&allpmaps); 397151497Sru 398151497Sru /* 399151497Sru * Request a spin mutex so that changes to allpmaps cannot be 400151497Sru * preempted by smp_rendezvous_cpus(). Otherwise, 401151497Sru * pmap_update_pde_kernel() could access allpmaps while it is 402151497Sru * being changed. 403151497Sru */ 404151497Sru mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 405151497Sru mtx_lock_spin(&allpmaps_lock); 406151497Sru LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 407151497Sru mtx_unlock_spin(&allpmaps_lock); 408151497Sru 409151497Sru /* 410151497Sru * Reserve some special page table entries/VA space for temporary 411151497Sru * mapping of pages. 412151497Sru */ 413151497Sru#define SYSMAP(c, p, v, n) \ 414151497Sru v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 415151497Sru 416151497Sru va = virtual_avail; 417151497Sru pte = vtopte(va); 418151497Sru 419151497Sru /* 420151497Sru * CMAP1/CMAP2 are used for zeroing and copying pages. 421151497Sru * CMAP3 is used for the idle process page zeroing. 422151497Sru */ 423151497Sru for (i = 0; i < MAXCPU; i++) { 424151497Sru sysmaps = &sysmaps_pcpu[i]; 425151497Sru mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); 426151497Sru SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) 427151497Sru SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) 428151497Sru } 429151497Sru SYSMAP(caddr_t, CMAP1, CADDR1, 1) 430151497Sru SYSMAP(caddr_t, CMAP3, CADDR3, 1) 431151497Sru 432151497Sru /* 433151497Sru * Crashdump maps. 434151497Sru */ 435151497Sru SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) 436151497Sru 437151497Sru /* 438151497Sru * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 439151497Sru */ 440151497Sru SYSMAP(caddr_t, unused, ptvmmap, 1) 441151497Sru 442151497Sru /* 443151497Sru * msgbufp is used to map the system message buffer. 444151497Sru */ 445151497Sru SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize))) 446151497Sru 447151497Sru /* 448151497Sru * KPTmap is used by pmap_kextract(). 449151497Sru * 450151497Sru * KPTmap is first initialized by locore. However, that initial 451151497Sru * KPTmap can only support NKPT page table pages. Here, a larger 452151497Sru * KPTmap is created that can support KVA_PAGES page table pages. 453151497Sru */ 454151497Sru SYSMAP(pt_entry_t *, KPTD, KPTmap, KVA_PAGES) 455151497Sru 456151497Sru for (i = 0; i < NKPT; i++) 457151497Sru KPTD[i] = (KPTphys + (i << PAGE_SHIFT)) | pgeflag | PG_RW | PG_V; 458151497Sru 459151497Sru /* 460151497Sru * Adjust the start of the KPTD and KPTmap so that the implementation 461151497Sru * of pmap_kextract() and pmap_growkernel() can be made simpler. 462151497Sru */ 463151497Sru KPTD -= KPTDI; 464151497Sru KPTmap -= i386_btop(KPTDI << PDRSHIFT); 465151497Sru 466151497Sru /* 467151497Sru * ptemap is used for pmap_pte_quick 468151497Sru */ 469151497Sru SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1) 470151497Sru SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1) 471151497Sru 472151497Sru mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); 473151497Sru 474151497Sru virtual_avail = va; 475151497Sru 476151497Sru /* 477151497Sru * Leave in place an identity mapping (virt == phys) for the low 1 MB 478151497Sru * physical memory region that is used by the ACPI wakeup code. This 479151497Sru * mapping must not have PG_G set. 480151497Sru */ 481151497Sru#ifdef XBOX 482151497Sru /* FIXME: This is gross, but needed for the XBOX. Since we are in such 483151497Sru * an early stadium, we cannot yet neatly map video memory ... :-( 484151497Sru * Better fixes are very welcome! */ 485151497Sru if (!arch_i386_is_xbox) 486151497Sru#endif 487151497Sru for (i = 1; i < NKPT; i++) 488151497Sru PTD[i] = 0; 489151497Sru 490151497Sru /* Initialize the PAT MSR if present. */ 491151497Sru pmap_init_pat(); 492151497Sru 493151497Sru /* Turn on PG_G on kernel page(s) */ 494151497Sru pmap_set_pg(); 495151497Sru} 496151497Sru 497151497Sru/* 498151497Sru * Setup the PAT MSR. 499151497Sru */ 500151497Sruvoid 501151497Srupmap_init_pat(void) 502151497Sru{ 503151497Sru int pat_table[PAT_INDEX_SIZE]; 504151497Sru uint64_t pat_msr; 505151497Sru u_long cr0, cr4; 506151497Sru int i; 507151497Sru 508151497Sru /* Set default PAT index table. */ 509151497Sru for (i = 0; i < PAT_INDEX_SIZE; i++) 510151497Sru pat_table[i] = -1; 511151497Sru pat_table[PAT_WRITE_BACK] = 0; 512151497Sru pat_table[PAT_WRITE_THROUGH] = 1; 513151497Sru pat_table[PAT_UNCACHEABLE] = 3; 514151497Sru pat_table[PAT_WRITE_COMBINING] = 3; 515151497Sru pat_table[PAT_WRITE_PROTECTED] = 3; 516151497Sru pat_table[PAT_UNCACHED] = 3; 517151497Sru 518151497Sru /* Bail if this CPU doesn't implement PAT. */ 519151497Sru if ((cpu_feature & CPUID_PAT) == 0) { 520151497Sru for (i = 0; i < PAT_INDEX_SIZE; i++) 521151497Sru pat_index[i] = pat_table[i]; 522151497Sru pat_works = 0; 523151497Sru return; 524151497Sru } 525151497Sru 526151497Sru /* 527151497Sru * Due to some Intel errata, we can only safely use the lower 4 528151497Sru * PAT entries. 529151497Sru * 530151497Sru * Intel Pentium III Processor Specification Update 531151497Sru * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B 532151497Sru * or Mode C Paging) 533151497Sru * 534151497Sru * Intel Pentium IV Processor Specification Update 535151497Sru * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) 536151497Sru */ 537151497Sru if (cpu_vendor_id == CPU_VENDOR_INTEL && 538151497Sru !(CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) 539151497Sru pat_works = 0; 540151497Sru 541151497Sru /* Initialize default PAT entries. */ 542151497Sru pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | 543151497Sru PAT_VALUE(1, PAT_WRITE_THROUGH) | 544151497Sru PAT_VALUE(2, PAT_UNCACHED) | 545151497Sru PAT_VALUE(3, PAT_UNCACHEABLE) | 546151497Sru PAT_VALUE(4, PAT_WRITE_BACK) | 547151497Sru PAT_VALUE(5, PAT_WRITE_THROUGH) | 548151497Sru PAT_VALUE(6, PAT_UNCACHED) | 549151497Sru PAT_VALUE(7, PAT_UNCACHEABLE); 550151497Sru 551151497Sru if (pat_works) { 552151497Sru /* 553151497Sru * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. 554151497Sru * Program 5 and 6 as WP and WC. 555151497Sru * Leave 4 and 7 as WB and UC. 556151497Sru */ 557151497Sru pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6)); 558151497Sru pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) | 559151497Sru PAT_VALUE(6, PAT_WRITE_COMBINING); 560151497Sru pat_table[PAT_UNCACHED] = 2; 561151497Sru pat_table[PAT_WRITE_PROTECTED] = 5; 562151497Sru pat_table[PAT_WRITE_COMBINING] = 6; 563151497Sru } else { 564151497Sru /* 565151497Sru * Just replace PAT Index 2 with WC instead of UC-. 566151497Sru */ 567151497Sru pat_msr &= ~PAT_MASK(2); 568151497Sru pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); 569151497Sru pat_table[PAT_WRITE_COMBINING] = 2; 570151497Sru } 571151497Sru 572151497Sru /* Disable PGE. */ 573151497Sru cr4 = rcr4(); 574151497Sru load_cr4(cr4 & ~CR4_PGE); 575151497Sru 576151497Sru /* Disable caches (CD = 1, NW = 0). */ 577151497Sru cr0 = rcr0(); 578151497Sru load_cr0((cr0 & ~CR0_NW) | CR0_CD); 579151497Sru 580151497Sru /* Flushes caches and TLBs. */ 581151497Sru wbinvd(); 582151497Sru invltlb(); 583151497Sru 584151497Sru /* Update PAT and index table. */ 585151497Sru wrmsr(MSR_PAT, pat_msr); 586151497Sru for (i = 0; i < PAT_INDEX_SIZE; i++) 587151497Sru pat_index[i] = pat_table[i]; 588151497Sru 589151497Sru /* Flush caches and TLBs again. */ 590151497Sru wbinvd(); 591151497Sru invltlb(); 592151497Sru 593151497Sru /* Restore caches and PGE. */ 594151497Sru load_cr0(cr0); 595151497Sru load_cr4(cr4); 596151497Sru} 597151497Sru 598151497Sru/* 599151497Sru * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. 600151497Sru */ 601151497Srustatic void 602151497Srupmap_set_pg(void) 603151497Sru{ 604151497Sru pt_entry_t *pte; 605151497Sru vm_offset_t va, endva; 606151497Sru 607151497Sru if (pgeflag == 0) 608151497Sru return; 609151497Sru 610151497Sru endva = KERNBASE + KERNend; 611151497Sru 612151497Sru if (pseflag) { 613151497Sru va = KERNBASE + KERNLOAD; 614151497Sru while (va < endva) { 615151497Sru pdir_pde(PTD, va) |= pgeflag; 616151497Sru invltlb(); /* Play it safe, invltlb() every time */ 617151497Sru va += NBPDR; 618151497Sru } 619151497Sru } else { 620151497Sru va = (vm_offset_t)btext; 621151497Sru while (va < endva) { 622151497Sru pte = vtopte(va); 623151497Sru if (*pte) 624151497Sru *pte |= pgeflag; 625151497Sru invltlb(); /* Play it safe, invltlb() every time */ 626151497Sru va += PAGE_SIZE; 627151497Sru } 628151497Sru } 629151497Sru} 630151497Sru 631151497Sru/* 632151497Sru * Initialize a vm_page's machine-dependent fields. 633151497Sru */ 634151497Sruvoid 635151497Srupmap_page_init(vm_page_t m) 636151497Sru{ 637151497Sru 638151497Sru TAILQ_INIT(&m->md.pv_list); 639151497Sru m->md.pat_mode = PAT_WRITE_BACK; 640151497Sru} 641151497Sru 642151497Sru#ifdef PAE 643151497Srustatic void * 644151497Srupmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 645151497Sru{ 646151497Sru 647151497Sru /* Inform UMA that this allocator uses kernel_map/object. */ 648151497Sru *flags = UMA_SLAB_KERNEL; 649151497Sru return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL, 650151497Sru 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); 651151497Sru} 652151497Sru#endif 653151497Sru 654151497Sru/* 655151497Sru * ABuse the pte nodes for unmapped kva to thread a kva freelist through. 656151497Sru * Requirements: 657151497Sru * - Must deal with pages in order to ensure that none of the PG_* bits 658151497Sru * are ever set, PG_V in particular. 659151497Sru * - Assumes we can write to ptes without pte_store() atomic ops, even 660151497Sru * on PAE systems. This should be ok. 661151497Sru * - Assumes nothing will ever test these addresses for 0 to indicate 662151497Sru * no mapping instead of correctly checking PG_V. 663151497Sru * - Assumes a vm_offset_t will fit in a pte (true for i386). 664151497Sru * Because PG_V is never set, there can be no mappings to invalidate. 665151497Sru */ 666151497Srustatic vm_offset_t 667151497Srupmap_ptelist_alloc(vm_offset_t *head) 668151497Sru{ 669151497Sru pt_entry_t *pte; 670151497Sru vm_offset_t va; 671151497Sru 672151497Sru va = *head; 673151497Sru if (va == 0) 674151497Sru return (va); /* Out of memory */ 675151497Sru pte = vtopte(va); 676151497Sru *head = *pte; 677151497Sru if (*head & PG_V) 678151497Sru panic("pmap_ptelist_alloc: va with PG_V set!"); 679151497Sru *pte = 0; 680151497Sru return (va); 681151497Sru} 682151497Sru 683151497Srustatic void 684151497Srupmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 685151497Sru{ 686151497Sru pt_entry_t *pte; 687151497Sru 688151497Sru if (va & PG_V) 689151497Sru panic("pmap_ptelist_free: freeing va with PG_V set!"); 690151497Sru pte = vtopte(va); 691151497Sru *pte = *head; /* virtual! PG_V is 0 though */ 692151497Sru *head = va; 693151497Sru} 694151497Sru 695151497Srustatic void 696151497Srupmap_ptelist_init(vm_offset_t *head, void *base, int npages) 697151497Sru{ 698151497Sru int i; 699151497Sru vm_offset_t va; 700151497Sru 701151497Sru *head = 0; 702151497Sru for (i = npages - 1; i >= 0; i--) { 703151497Sru va = (vm_offset_t)base + i * PAGE_SIZE; 704151497Sru pmap_ptelist_free(head, va); 705151497Sru } 706151497Sru} 707151497Sru 708151497Sru 709151497Sru/* 710151497Sru * Initialize the pmap module. 711151497Sru * Called by vm_init, to initialize any structures that the pmap 712151497Sru * system needs to map virtual memory. 713151497Sru */ 714151497Sruvoid 715151497Srupmap_init(void) 716151497Sru{ 717151497Sru vm_page_t mpte; 718151497Sru vm_size_t s; 719151497Sru int i, pv_npg; 720151497Sru 721151497Sru /* 722151497Sru * Initialize the vm page array entries for the kernel pmap's 723151497Sru * page table pages. 724151497Sru */ 725151497Sru for (i = 0; i < NKPT; i++) { 726151497Sru mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); 727151497Sru KASSERT(mpte >= vm_page_array && 728151497Sru mpte < &vm_page_array[vm_page_array_size], 729151497Sru ("pmap_init: page table page is out of range")); 730151497Sru mpte->pindex = i + KPTDI; 731151497Sru mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); 732151497Sru } 733151497Sru 734151497Sru /* 735151497Sru * Initialize the address space (zone) for the pv entries. Set a 736151497Sru * high water mark so that the system can recover from excessive 737151497Sru * numbers of pv entries. 738151497Sru */ 739151497Sru TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 740151497Sru pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 741151497Sru TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 742151497Sru pv_entry_max = roundup(pv_entry_max, _NPCPV); 743151497Sru pv_entry_high_water = 9 * (pv_entry_max / 10); 744151497Sru 745151497Sru /* 746151497Sru * If the kernel is running in a virtual machine on an AMD Family 10h 747151497Sru * processor, then it must assume that MCA is enabled by the virtual 748151497Sru * machine monitor. 749151497Sru */ 750151497Sru if (vm_guest == VM_GUEST_VM && cpu_vendor_id == CPU_VENDOR_AMD && 751151497Sru CPUID_TO_FAMILY(cpu_id) == 0x10) 752151497Sru workaround_erratum383 = 1; 753151497Sru 754151497Sru /* 755151497Sru * Are large page mappings supported and enabled? 756151497Sru */ 757151497Sru TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); 758151497Sru if (pseflag == 0) 759151497Sru pg_ps_enabled = 0; 760151497Sru else if (pg_ps_enabled) { 761151497Sru KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, 762151497Sru ("pmap_init: can't assign to pagesizes[1]")); 763151497Sru pagesizes[1] = NBPDR; 764151497Sru } 765151497Sru 766151497Sru /* 767151497Sru * Calculate the size of the pv head table for superpages. 768151497Sru */ 769151497Sru for (i = 0; phys_avail[i + 1]; i += 2); 770151497Sru pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR; 771151497Sru 772151497Sru /* 773151497Sru * Allocate memory for the pv head table for superpages. 774151497Sru */ 775151497Sru s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 776151497Sru s = round_page(s); 777151497Sru pv_table = (struct md_page *)kmem_alloc(kernel_map, s); 778151497Sru for (i = 0; i < pv_npg; i++) 779151497Sru TAILQ_INIT(&pv_table[i].pv_list); 780151497Sru 781151497Sru pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 782151497Sru pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, 783151497Sru PAGE_SIZE * pv_maxchunks); 784151497Sru if (pv_chunkbase == NULL) 785151497Sru panic("pmap_init: not enough kvm for pv chunks"); 786151497Sru pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 787151497Sru#ifdef PAE 788151497Sru pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, 789151497Sru NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 790151497Sru UMA_ZONE_VM | UMA_ZONE_NOFREE); 791151497Sru uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); 792151497Sru#endif 793151497Sru} 794151497Sru 795151497Sru 796151497SruSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 797151497Sru "Max number of PV entries"); 798151497SruSYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 799151497Sru "Page share factor per proc"); 800151497Sru 801151497SruSYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, 802151497Sru "2/4MB page mapping counters"); 803151497Sru 804151497Srustatic u_long pmap_pde_demotions; 805151497SruSYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, 806151497Sru &pmap_pde_demotions, 0, "2/4MB page demotions"); 807151497Sru 808151497Srustatic u_long pmap_pde_mappings; 809151497SruSYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, 810151497Sru &pmap_pde_mappings, 0, "2/4MB page mappings"); 811151497Sru 812151497Srustatic u_long pmap_pde_p_failures; 813151497SruSYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, 814151497Sru &pmap_pde_p_failures, 0, "2/4MB page promotion failures"); 815151497Sru 816151497Srustatic u_long pmap_pde_promotions; 817151497SruSYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, 818151497Sru &pmap_pde_promotions, 0, "2/4MB page promotions"); 819151497Sru 820151497Sru/*************************************************** 821151497Sru * Low level helper routines..... 822151497Sru ***************************************************/ 823151497Sru 824151497Sru/* 825151497Sru * Determine the appropriate bits to set in a PTE or PDE for a specified 826151497Sru * caching mode. 827151497Sru */ 828151497Sruint 829151497Srupmap_cache_bits(int mode, boolean_t is_pde) 830151497Sru{ 831151497Sru int cache_bits, pat_flag, pat_idx; 832151497Sru 833151497Sru if (mode < 0 || mode >= PAT_INDEX_SIZE || pat_index[mode] < 0) 834151497Sru panic("Unknown caching mode %d\n", mode); 835151497Sru 836151497Sru /* The PAT bit is different for PTE's and PDE's. */ 837151497Sru pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; 838151497Sru 839151497Sru /* Map the caching mode to a PAT index. */ 840151497Sru pat_idx = pat_index[mode]; 841151497Sru 842151497Sru /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ 843151497Sru cache_bits = 0; 844151497Sru if (pat_idx & 0x4) 845151497Sru cache_bits |= pat_flag; 846151497Sru if (pat_idx & 0x2) 847151497Sru cache_bits |= PG_NC_PCD; 848151497Sru if (pat_idx & 0x1) 849151497Sru cache_bits |= PG_NC_PWT; 850151497Sru return (cache_bits); 851151497Sru} 852151497Sru 853151497Sru/* 854151497Sru * The caller is responsible for maintaining TLB consistency. 855151497Sru */ 856151497Srustatic void 857151497Srupmap_kenter_pde(vm_offset_t va, pd_entry_t newpde) 858151497Sru{ 859151497Sru pd_entry_t *pde; 860151497Sru pmap_t pmap; 861151497Sru boolean_t PTD_updated; 862151497Sru 863151497Sru PTD_updated = FALSE; 864151497Sru mtx_lock_spin(&allpmaps_lock); 865151497Sru LIST_FOREACH(pmap, &allpmaps, pm_list) { 866151497Sru if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & 867151497Sru PG_FRAME)) 868151497Sru PTD_updated = TRUE; 869151497Sru pde = pmap_pde(pmap, va); 870151497Sru pde_store(pde, newpde); 871151497Sru } 872151497Sru mtx_unlock_spin(&allpmaps_lock); 873151497Sru KASSERT(PTD_updated, 874151497Sru ("pmap_kenter_pde: current page table is not in allpmaps")); 875151497Sru} 876151497Sru 877151497Sru/* 878151497Sru * After changing the page size for the specified virtual address in the page 879151497Sru * table, flush the corresponding entries from the processor's TLB. Only the 880151497Sru * calling processor's TLB is affected. 881151497Sru * 882151497Sru * The calling thread must be pinned to a processor. 883151497Sru */ 884151497Srustatic void 885151497Srupmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) 886151497Sru{ 887151497Sru u_long cr4; 888151497Sru 889151497Sru if ((newpde & PG_PS) == 0) 890151497Sru /* Demotion: flush a specific 2MB page mapping. */ 891151497Sru invlpg(va); 892151497Sru else if ((newpde & PG_G) == 0) 893151497Sru /* 894151497Sru * Promotion: flush every 4KB page mapping from the TLB 895151497Sru * because there are too many to flush individually. 896151497Sru */ 897151497Sru invltlb(); 898151497Sru else { 899151497Sru /* 900151497Sru * Promotion: flush every 4KB page mapping from the TLB, 901151497Sru * including any global (PG_G) mappings. 902151497Sru */ 903151497Sru cr4 = rcr4(); 904151497Sru load_cr4(cr4 & ~CR4_PGE); 905151497Sru /* 906151497Sru * Although preemption at this point could be detrimental to 907151497Sru * performance, it would not lead to an error. PG_G is simply 908151497Sru * ignored if CR4.PGE is clear. Moreover, in case this block 909151497Sru * is re-entered, the load_cr4() either above or below will 910151497Sru * modify CR4.PGE flushing the TLB. 911151497Sru */ 912151497Sru load_cr4(cr4 | CR4_PGE); 913151497Sru } 914151497Sru} 915151497Sru#ifdef SMP 916151497Sru/* 917151497Sru * For SMP, these functions have to use the IPI mechanism for coherence. 918151497Sru * 919151497Sru * N.B.: Before calling any of the following TLB invalidation functions, 920151497Sru * the calling processor must ensure that all stores updating a non- 921151497Sru * kernel page table are globally performed. Otherwise, another 922151497Sru * processor could cache an old, pre-update entry without being 923151497Sru * invalidated. This can happen one of two ways: (1) The pmap becomes 924151497Sru * active on another processor after its pm_active field is checked by 925151497Sru * one of the following functions but before a store updating the page 926151497Sru * table is globally performed. (2) The pmap becomes active on another 927151497Sru * processor before its pm_active field is checked but due to 928151497Sru * speculative loads one of the following functions stills reads the 929151497Sru * pmap as inactive on the other processor. 930151497Sru * 931151497Sru * The kernel page table is exempt because its pm_active field is 932151497Sru * immutable. The kernel page table is always active on every 933151497Sru * processor. 934151497Sru */ 935151497Sruvoid 936151497Srupmap_invalidate_page(pmap_t pmap, vm_offset_t va) 937151497Sru{ 938151497Sru cpuset_t other_cpus; 939151497Sru u_int cpuid; 940151497Sru 941151497Sru sched_pin(); 942151497Sru if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { 943151497Sru invlpg(va); 944151497Sru smp_invlpg(va); 945151497Sru } else { 946151497Sru cpuid = PCPU_GET(cpuid); 947151497Sru other_cpus = all_cpus; 948151497Sru CPU_CLR(cpuid, &other_cpus); 949151497Sru if (CPU_ISSET(cpuid, &pmap->pm_active)) 950151497Sru invlpg(va); 951151497Sru CPU_AND(&other_cpus, &pmap->pm_active); 952151497Sru if (!CPU_EMPTY(&other_cpus)) 953151497Sru smp_masked_invlpg(other_cpus, va); 954151497Sru } 955151497Sru sched_unpin(); 956151497Sru} 957151497Sru 958151497Sruvoid 959151497Srupmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 960151497Sru{ 961151497Sru cpuset_t other_cpus; 962151497Sru vm_offset_t addr; 963151497Sru u_int cpuid; 964151497Sru 965151497Sru sched_pin(); 966151497Sru if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { 967151497Sru for (addr = sva; addr < eva; addr += PAGE_SIZE) 968151497Sru invlpg(addr); 969151497Sru smp_invlpg_range(sva, eva); 970151497Sru } else { 971151497Sru cpuid = PCPU_GET(cpuid); 972151497Sru other_cpus = all_cpus; 973151497Sru CPU_CLR(cpuid, &other_cpus); 974151497Sru if (CPU_ISSET(cpuid, &pmap->pm_active)) 975151497Sru for (addr = sva; addr < eva; addr += PAGE_SIZE) 976151497Sru invlpg(addr); 977151497Sru CPU_AND(&other_cpus, &pmap->pm_active); 978151497Sru if (!CPU_EMPTY(&other_cpus)) 979151497Sru smp_masked_invlpg_range(other_cpus, sva, eva); 980151497Sru } 981151497Sru sched_unpin(); 982151497Sru} 983151497Sru 984151497Sruvoid 985151497Srupmap_invalidate_all(pmap_t pmap) 986151497Sru{ 987151497Sru cpuset_t other_cpus; 988151497Sru u_int cpuid; 989151497Sru 990151497Sru sched_pin(); 991151497Sru if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { 992151497Sru invltlb(); 993151497Sru smp_invltlb(); 994151497Sru } else { 995151497Sru cpuid = PCPU_GET(cpuid); 996151497Sru other_cpus = all_cpus; 997151497Sru CPU_CLR(cpuid, &other_cpus); 998151497Sru if (CPU_ISSET(cpuid, &pmap->pm_active)) 999151497Sru invltlb(); 1000151497Sru CPU_AND(&other_cpus, &pmap->pm_active); 1001151497Sru if (!CPU_EMPTY(&other_cpus)) 1002151497Sru smp_masked_invltlb(other_cpus); 1003151497Sru } 1004151497Sru sched_unpin(); 1005151497Sru} 1006151497Sru 1007151497Sruvoid 1008151497Srupmap_invalidate_cache(void) 1009151497Sru{ 1010151497Sru 1011151497Sru sched_pin(); 1012151497Sru wbinvd(); 1013151497Sru smp_cache_flush(); 1014151497Sru sched_unpin(); 1015151497Sru} 1016151497Sru 1017151497Srustruct pde_action { 1018151497Sru cpuset_t invalidate; /* processors that invalidate their TLB */ 1019151497Sru vm_offset_t va; 1020151497Sru pd_entry_t *pde; 1021151497Sru pd_entry_t newpde; 1022151497Sru u_int store; /* processor that updates the PDE */ 1023151497Sru}; 1024151497Sru 1025151497Srustatic void 1026151497Srupmap_update_pde_kernel(void *arg) 1027151497Sru{ 1028151497Sru struct pde_action *act = arg; 1029151497Sru pd_entry_t *pde; 1030151497Sru pmap_t pmap; 1031151497Sru 1032151497Sru if (act->store == PCPU_GET(cpuid)) { 1033151497Sru 1034151497Sru /* 1035151497Sru * Elsewhere, this operation requires allpmaps_lock for 1036151497Sru * synchronization. Here, it does not because it is being 1037151497Sru * performed in the context of an all_cpus rendezvous. 1038151497Sru */ 1039151497Sru LIST_FOREACH(pmap, &allpmaps, pm_list) { 1040151497Sru pde = pmap_pde(pmap, act->va); 1041151497Sru pde_store(pde, act->newpde); 1042151497Sru } 1043151497Sru } 1044151497Sru} 1045151497Sru 1046151497Srustatic void 1047151497Srupmap_update_pde_user(void *arg) 1048151497Sru{ 1049151497Sru struct pde_action *act = arg; 1050151497Sru 1051151497Sru if (act->store == PCPU_GET(cpuid)) 1052151497Sru pde_store(act->pde, act->newpde); 1053151497Sru} 1054151497Sru 1055151497Srustatic void 1056151497Srupmap_update_pde_teardown(void *arg) 1057151497Sru{ 1058151497Sru struct pde_action *act = arg; 1059151497Sru 1060151497Sru if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate)) 1061151497Sru pmap_update_pde_invalidate(act->va, act->newpde); 1062151497Sru} 1063151497Sru 1064151497Sru/* 1065151497Sru * Change the page size for the specified virtual address in a way that 1066151497Sru * prevents any possibility of the TLB ever having two entries that map the 1067151497Sru * same virtual address using different page sizes. This is the recommended 1068151497Sru * workaround for Erratum 383 on AMD Family 10h processors. It prevents a 1069151497Sru * machine check exception for a TLB state that is improperly diagnosed as a 1070151497Sru * hardware error. 1071151497Sru */ 1072151497Srustatic void 1073151497Srupmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) 1074151497Sru{ 1075151497Sru struct pde_action act; 1076151497Sru cpuset_t active, other_cpus; 1077151497Sru u_int cpuid; 1078151497Sru 1079151497Sru sched_pin(); 1080151497Sru cpuid = PCPU_GET(cpuid); 1081151497Sru other_cpus = all_cpus; 1082151497Sru CPU_CLR(cpuid, &other_cpus); 1083151497Sru if (pmap == kernel_pmap) 1084151497Sru active = all_cpus; 1085151497Sru else 1086151497Sru active = pmap->pm_active; 1087151497Sru if (CPU_OVERLAP(&active, &other_cpus)) { 1088151497Sru act.store = cpuid; 1089151497Sru act.invalidate = active; 1090151497Sru act.va = va; 1091151497Sru act.pde = pde; 1092151497Sru act.newpde = newpde; 1093151497Sru CPU_SET(cpuid, &active); 1094151497Sru smp_rendezvous_cpus(active, 1095151497Sru smp_no_rendevous_barrier, pmap == kernel_pmap ? 1096151497Sru pmap_update_pde_kernel : pmap_update_pde_user, 1097151497Sru pmap_update_pde_teardown, &act); 1098151497Sru } else { 1099151497Sru if (pmap == kernel_pmap) 1100151497Sru pmap_kenter_pde(va, newpde); 1101151497Sru else 1102151497Sru pde_store(pde, newpde); 1103151497Sru if (CPU_ISSET(cpuid, &active)) 1104151497Sru pmap_update_pde_invalidate(va, newpde); 1105151497Sru } 1106151497Sru sched_unpin(); 1107151497Sru} 1108151497Sru#else /* !SMP */ 1109151497Sru/* 1110151497Sru * Normal, non-SMP, 486+ invalidation functions. 1111151497Sru * We inline these within pmap.c for speed. 1112151497Sru */ 1113151497SruPMAP_INLINE void 1114151497Srupmap_invalidate_page(pmap_t pmap, vm_offset_t va) 1115151497Sru{ 1116151497Sru 1117151497Sru if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 1118151497Sru invlpg(va); 1119151497Sru} 1120151497Sru 1121151497SruPMAP_INLINE void 1122151497Srupmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1123151497Sru{ 1124151497Sru vm_offset_t addr; 1125151497Sru 1126151497Sru if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 1127151497Sru for (addr = sva; addr < eva; addr += PAGE_SIZE) 1128151497Sru invlpg(addr); 1129151497Sru} 1130151497Sru 1131151497SruPMAP_INLINE void 1132151497Srupmap_invalidate_all(pmap_t pmap) 1133151497Sru{ 1134151497Sru 1135151497Sru if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 1136151497Sru invltlb(); 1137151497Sru} 1138151497Sru 1139151497SruPMAP_INLINE void 1140151497Srupmap_invalidate_cache(void) 1141151497Sru{ 1142151497Sru 1143151497Sru wbinvd(); 1144151497Sru} 1145151497Sru 1146151497Srustatic void 1147151497Srupmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) 1148151497Sru{ 1149151497Sru 1150151497Sru if (pmap == kernel_pmap) 1151151497Sru pmap_kenter_pde(va, newpde); 1152151497Sru else 1153151497Sru pde_store(pde, newpde); 1154151497Sru if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 1155151497Sru pmap_update_pde_invalidate(va, newpde); 1156151497Sru} 1157151497Sru#endif /* !SMP */ 1158151497Sru 1159151497Sru#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) 1160151497Sru 1161151497Sruvoid 1162151497Srupmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) 1163151497Sru{ 1164151497Sru 1165151497Sru KASSERT((sva & PAGE_MASK) == 0, 1166151497Sru ("pmap_invalidate_cache_range: sva not page-aligned")); 1167151497Sru KASSERT((eva & PAGE_MASK) == 0, 1168151497Sru ("pmap_invalidate_cache_range: eva not page-aligned")); 1169151497Sru 1170151497Sru if (cpu_feature & CPUID_SS) 1171151497Sru ; /* If "Self Snoop" is supported, do nothing. */ 1172151497Sru else if ((cpu_feature & CPUID_CLFSH) != 0 && 1173151497Sru eva - sva < PMAP_CLFLUSH_THRESHOLD) { 1174151497Sru 1175151497Sru /* 1176151497Sru * Otherwise, do per-cache line flush. Use the mfence 1177151497Sru * instruction to insure that previous stores are 1178151497Sru * included in the write-back. The processor 1179151497Sru * propagates flush to other processors in the cache 1180151497Sru * coherence domain. 1181151497Sru */ 1182151497Sru mfence(); 1183151497Sru for (; sva < eva; sva += cpu_clflush_line_size) 1184151497Sru clflush(sva); 1185151497Sru mfence(); 1186151497Sru } else { 1187151497Sru 1188151497Sru /* 1189151497Sru * No targeted cache flush methods are supported by CPU, 1190151497Sru * or the supplied range is bigger than 2MB. 1191151497Sru * Globally invalidate cache. 1192151497Sru */ 1193151497Sru pmap_invalidate_cache(); 1194151497Sru } 1195151497Sru} 1196151497Sru 1197151497Sruvoid 1198151497Srupmap_invalidate_cache_pages(vm_page_t *pages, int count) 1199151497Sru{ 1200151497Sru int i; 1201151497Sru 1202151497Sru if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || 1203151497Sru (cpu_feature & CPUID_CLFSH) == 0) { 1204151497Sru pmap_invalidate_cache(); 1205151497Sru } else { 1206151497Sru for (i = 0; i < count; i++) 1207151497Sru pmap_flush_page(pages[i]); 1208151497Sru } 1209151497Sru} 1210151497Sru 1211151497Sru/* 1212151497Sru * Are we current address space or kernel? N.B. We return FALSE when 1213151497Sru * a pmap's page table is in use because a kernel thread is borrowing 1214151497Sru * it. The borrowed page table can change spontaneously, making any 1215151497Sru * dependence on its continued use subject to a race condition. 1216151497Sru */ 1217151497Srustatic __inline int 1218151497Srupmap_is_current(pmap_t pmap) 1219151497Sru{ 1220151497Sru 1221151497Sru return (pmap == kernel_pmap || 1222151497Sru (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && 1223151497Sru (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); 1224151497Sru} 1225151497Sru 1226151497Sru/* 1227151497Sru * If the given pmap is not the current or kernel pmap, the returned pte must 1228151497Sru * be released by passing it to pmap_pte_release(). 1229151497Sru */ 1230151497Srupt_entry_t * 1231151497Srupmap_pte(pmap_t pmap, vm_offset_t va) 1232151497Sru{ 1233151497Sru pd_entry_t newpf; 1234151497Sru pd_entry_t *pde; 1235151497Sru 1236151497Sru pde = pmap_pde(pmap, va); 1237151497Sru if (*pde & PG_PS) 1238151497Sru return (pde); 1239151497Sru if (*pde != 0) { 1240151497Sru /* are we current address space or kernel? */ 1241151497Sru if (pmap_is_current(pmap)) 1242151497Sru return (vtopte(va)); 1243151497Sru mtx_lock(&PMAP2mutex); 1244151497Sru newpf = *pde & PG_FRAME; 1245151497Sru if ((*PMAP2 & PG_FRAME) != newpf) { 1246151497Sru *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; 1247151497Sru pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); 1248151497Sru } 1249151497Sru return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); 1250151497Sru } 1251151497Sru return (NULL); 1252151497Sru} 1253151497Sru 1254151497Sru/* 1255151497Sru * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte 1256151497Sru * being NULL. 1257151497Sru */ 1258151497Srustatic __inline void 1259151497Srupmap_pte_release(pt_entry_t *pte) 1260151497Sru{ 1261151497Sru 1262151497Sru if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) 1263151497Sru mtx_unlock(&PMAP2mutex); 1264151497Sru} 1265151497Sru 1266151497Srustatic __inline void 1267151497Sruinvlcaddr(void *caddr) 1268151497Sru{ 1269151497Sru 1270151497Sru invlpg((u_int)caddr); 1271151497Sru} 1272151497Sru 1273151497Sru/* 1274151497Sru * Super fast pmap_pte routine best used when scanning 1275151497Sru * the pv lists. This eliminates many coarse-grained 1276151497Sru * invltlb calls. Note that many of the pv list 1277151497Sru * scans are across different pmaps. It is very wasteful 1278151497Sru * to do an entire invltlb for checking a single mapping. 1279151497Sru * 1280151497Sru * If the given pmap is not the current pmap, vm_page_queue_mtx 1281151497Sru * must be held and curthread pinned to a CPU. 1282151497Sru */ 1283151497Srustatic pt_entry_t * 1284151497Srupmap_pte_quick(pmap_t pmap, vm_offset_t va) 1285151497Sru{ 1286151497Sru pd_entry_t newpf; 1287151497Sru pd_entry_t *pde; 1288151497Sru 1289151497Sru pde = pmap_pde(pmap, va); 1290151497Sru if (*pde & PG_PS) 1291151497Sru return (pde); 1292151497Sru if (*pde != 0) { 1293151497Sru /* are we current address space or kernel? */ 1294151497Sru if (pmap_is_current(pmap)) 1295151497Sru return (vtopte(va)); 1296151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1297151497Sru KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 1298151497Sru newpf = *pde & PG_FRAME; 1299151497Sru if ((*PMAP1 & PG_FRAME) != newpf) { 1300151497Sru *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M; 1301151497Sru#ifdef SMP 1302151497Sru PMAP1cpu = PCPU_GET(cpuid); 1303151497Sru#endif 1304151497Sru invlcaddr(PADDR1); 1305151497Sru PMAP1changed++; 1306151497Sru } else 1307151497Sru#ifdef SMP 1308151497Sru if (PMAP1cpu != PCPU_GET(cpuid)) { 1309151497Sru PMAP1cpu = PCPU_GET(cpuid); 1310151497Sru invlcaddr(PADDR1); 1311151497Sru PMAP1changedcpu++; 1312151497Sru } else 1313151497Sru#endif 1314151497Sru PMAP1unchanged++; 1315151497Sru return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); 1316151497Sru } 1317151497Sru return (0); 1318151497Sru} 1319151497Sru 1320151497Sru/* 1321151497Sru * Routine: pmap_extract 1322151497Sru * Function: 1323151497Sru * Extract the physical page address associated 1324151497Sru * with the given map/virtual_address pair. 1325151497Sru */ 1326151497Sruvm_paddr_t 1327151497Srupmap_extract(pmap_t pmap, vm_offset_t va) 1328151497Sru{ 1329151497Sru vm_paddr_t rtval; 1330151497Sru pt_entry_t *pte; 1331151497Sru pd_entry_t pde; 1332151497Sru 1333151497Sru rtval = 0; 1334151497Sru PMAP_LOCK(pmap); 1335151497Sru pde = pmap->pm_pdir[va >> PDRSHIFT]; 1336151497Sru if (pde != 0) { 1337151497Sru if ((pde & PG_PS) != 0) 1338151497Sru rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); 1339151497Sru else { 1340151497Sru pte = pmap_pte(pmap, va); 1341151497Sru rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); 1342151497Sru pmap_pte_release(pte); 1343151497Sru } 1344151497Sru } 1345151497Sru PMAP_UNLOCK(pmap); 1346151497Sru return (rtval); 1347151497Sru} 1348151497Sru 1349151497Sru/* 1350151497Sru * Routine: pmap_extract_and_hold 1351151497Sru * Function: 1352151497Sru * Atomically extract and hold the physical page 1353151497Sru * with the given pmap and virtual address pair 1354151497Sru * if that mapping permits the given protection. 1355151497Sru */ 1356151497Sruvm_page_t 1357151497Srupmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1358151497Sru{ 1359151497Sru pd_entry_t pde; 1360151497Sru pt_entry_t pte, *ptep; 1361151497Sru vm_page_t m; 1362151497Sru vm_paddr_t pa; 1363151497Sru 1364151497Sru pa = 0; 1365151497Sru m = NULL; 1366151497Sru PMAP_LOCK(pmap); 1367151497Sruretry: 1368151497Sru pde = *pmap_pde(pmap, va); 1369151497Sru if (pde != 0) { 1370151497Sru if (pde & PG_PS) { 1371151497Sru if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { 1372151497Sru if (vm_page_pa_tryrelock(pmap, (pde & 1373151497Sru PG_PS_FRAME) | (va & PDRMASK), &pa)) 1374151497Sru goto retry; 1375151497Sru m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | 1376151497Sru (va & PDRMASK)); 1377151497Sru vm_page_hold(m); 1378151497Sru } 1379151497Sru } else { 1380151497Sru ptep = pmap_pte(pmap, va); 1381151497Sru pte = *ptep; 1382151497Sru pmap_pte_release(ptep); 1383151497Sru if (pte != 0 && 1384151497Sru ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { 1385151497Sru if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, 1386151497Sru &pa)) 1387151497Sru goto retry; 1388151497Sru m = PHYS_TO_VM_PAGE(pte & PG_FRAME); 1389151497Sru vm_page_hold(m); 1390151497Sru } 1391151497Sru } 1392151497Sru } 1393151497Sru PA_UNLOCK_COND(pa); 1394151497Sru PMAP_UNLOCK(pmap); 1395151497Sru return (m); 1396151497Sru} 1397151497Sru 1398151497Sru/*************************************************** 1399151497Sru * Low level mapping routines..... 1400151497Sru ***************************************************/ 1401151497Sru 1402151497Sru/* 1403151497Sru * Add a wired page to the kva. 1404151497Sru * Note: not SMP coherent. 1405151497Sru * 1406151497Sru * This function may be used before pmap_bootstrap() is called. 1407151497Sru */ 1408151497SruPMAP_INLINE void 1409151497Srupmap_kenter(vm_offset_t va, vm_paddr_t pa) 1410151497Sru{ 1411151497Sru pt_entry_t *pte; 1412151497Sru 1413151497Sru pte = vtopte(va); 1414151497Sru pte_store(pte, pa | PG_RW | PG_V | pgeflag); 1415151497Sru} 1416151497Sru 1417151497Srustatic __inline void 1418151497Srupmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) 1419151497Sru{ 1420151497Sru pt_entry_t *pte; 1421151497Sru 1422151497Sru pte = vtopte(va); 1423151497Sru pte_store(pte, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); 1424151497Sru} 1425151497Sru 1426151497Sru/* 1427151497Sru * Remove a page from the kernel pagetables. 1428151497Sru * Note: not SMP coherent. 1429151497Sru * 1430151497Sru * This function may be used before pmap_bootstrap() is called. 1431151497Sru */ 1432151497SruPMAP_INLINE void 1433151497Srupmap_kremove(vm_offset_t va) 1434151497Sru{ 1435151497Sru pt_entry_t *pte; 1436151497Sru 1437151497Sru pte = vtopte(va); 1438151497Sru pte_clear(pte); 1439151497Sru} 1440151497Sru 1441151497Sru/* 1442151497Sru * Used to map a range of physical addresses into kernel 1443151497Sru * virtual address space. 1444151497Sru * 1445151497Sru * The value passed in '*virt' is a suggested virtual address for 1446151497Sru * the mapping. Architectures which can support a direct-mapped 1447151497Sru * physical to virtual region can return the appropriate address 1448151497Sru * within that region, leaving '*virt' unchanged. Other 1449151497Sru * architectures should map the pages starting at '*virt' and 1450151497Sru * update '*virt' with the first usable address after the mapped 1451151497Sru * region. 1452151497Sru */ 1453151497Sruvm_offset_t 1454151497Srupmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1455151497Sru{ 1456151497Sru vm_offset_t va, sva; 1457151497Sru vm_paddr_t superpage_offset; 1458151497Sru pd_entry_t newpde; 1459151497Sru 1460151497Sru va = *virt; 1461151497Sru /* 1462151497Sru * Does the physical address range's size and alignment permit at 1463151497Sru * least one superpage mapping to be created? 1464151497Sru */ 1465151497Sru superpage_offset = start & PDRMASK; 1466151497Sru if ((end - start) - ((NBPDR - superpage_offset) & PDRMASK) >= NBPDR) { 1467151497Sru /* 1468151497Sru * Increase the starting virtual address so that its alignment 1469151497Sru * does not preclude the use of superpage mappings. 1470151497Sru */ 1471151497Sru if ((va & PDRMASK) < superpage_offset) 1472151497Sru va = (va & ~PDRMASK) + superpage_offset; 1473151497Sru else if ((va & PDRMASK) > superpage_offset) 1474151497Sru va = ((va + PDRMASK) & ~PDRMASK) + superpage_offset; 1475151497Sru } 1476151497Sru sva = va; 1477151497Sru while (start < end) { 1478151497Sru if ((start & PDRMASK) == 0 && end - start >= NBPDR && 1479151497Sru pseflag) { 1480151497Sru KASSERT((va & PDRMASK) == 0, 1481151497Sru ("pmap_map: misaligned va %#x", va)); 1482151497Sru newpde = start | PG_PS | pgeflag | PG_RW | PG_V; 1483151497Sru pmap_kenter_pde(va, newpde); 1484151497Sru va += NBPDR; 1485151497Sru start += NBPDR; 1486151497Sru } else { 1487151497Sru pmap_kenter(va, start); 1488151497Sru va += PAGE_SIZE; 1489151497Sru start += PAGE_SIZE; 1490151497Sru } 1491151497Sru } 1492151497Sru pmap_invalidate_range(kernel_pmap, sva, va); 1493151497Sru *virt = va; 1494151497Sru return (sva); 1495151497Sru} 1496151497Sru 1497151497Sru 1498151497Sru/* 1499151497Sru * Add a list of wired pages to the kva 1500151497Sru * this routine is only used for temporary 1501151497Sru * kernel mappings that do not need to have 1502151497Sru * page modification or references recorded. 1503151497Sru * Note that old mappings are simply written 1504151497Sru * over. The page *must* be wired. 1505151497Sru * Note: SMP coherent. Uses a ranged shootdown IPI. 1506151497Sru */ 1507151497Sruvoid 1508151497Srupmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1509151497Sru{ 1510151497Sru pt_entry_t *endpte, oldpte, pa, *pte; 1511151497Sru vm_page_t m; 1512151497Sru 1513151497Sru oldpte = 0; 1514151497Sru pte = vtopte(sva); 1515151497Sru endpte = pte + count; 1516151497Sru while (pte < endpte) { 1517151497Sru m = *ma++; 1518151497Sru pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); 1519151497Sru if ((*pte & (PG_FRAME | PG_PTE_CACHE)) != pa) { 1520151497Sru oldpte |= *pte; 1521151497Sru pte_store(pte, pa | pgeflag | PG_RW | PG_V); 1522151497Sru } 1523151497Sru pte++; 1524151497Sru } 1525151497Sru if (__predict_false((oldpte & PG_V) != 0)) 1526151497Sru pmap_invalidate_range(kernel_pmap, sva, sva + count * 1527151497Sru PAGE_SIZE); 1528151497Sru} 1529151497Sru 1530151497Sru/* 1531151497Sru * This routine tears out page mappings from the 1532151497Sru * kernel -- it is meant only for temporary mappings. 1533151497Sru * Note: SMP coherent. Uses a ranged shootdown IPI. 1534151497Sru */ 1535151497Sruvoid 1536151497Srupmap_qremove(vm_offset_t sva, int count) 1537151497Sru{ 1538151497Sru vm_offset_t va; 1539151497Sru 1540151497Sru va = sva; 1541151497Sru while (count-- > 0) { 1542151497Sru pmap_kremove(va); 1543151497Sru va += PAGE_SIZE; 1544151497Sru } 1545151497Sru pmap_invalidate_range(kernel_pmap, sva, va); 1546151497Sru} 1547151497Sru 1548151497Sru/*************************************************** 1549151497Sru * Page table page management routines..... 1550151497Sru ***************************************************/ 1551151497Srustatic __inline void 1552151497Srupmap_free_zero_pages(vm_page_t free) 1553151497Sru{ 1554151497Sru vm_page_t m; 1555151497Sru 1556151497Sru while (free != NULL) { 1557151497Sru m = free; 1558151497Sru free = m->right; 1559151497Sru /* Preserve the page's PG_ZERO setting. */ 1560151497Sru vm_page_free_toq(m); 1561151497Sru } 1562151497Sru} 1563151497Sru 1564151497Sru/* 1565151497Sru * Schedule the specified unused page table page to be freed. Specifically, 1566151497Sru * add the page to the specified list of pages that will be released to the 1567151497Sru * physical memory manager after the TLB has been updated. 1568151497Sru */ 1569151497Srustatic __inline void 1570151497Srupmap_add_delayed_free_list(vm_page_t m, vm_page_t *free, boolean_t set_PG_ZERO) 1571151497Sru{ 1572151497Sru 1573151497Sru if (set_PG_ZERO) 1574151497Sru m->flags |= PG_ZERO; 1575151497Sru else 1576151497Sru m->flags &= ~PG_ZERO; 1577151497Sru m->right = *free; 1578151497Sru *free = m; 1579151497Sru} 1580151497Sru 1581151497Sru/* 1582151497Sru * Inserts the specified page table page into the specified pmap's collection 1583151497Sru * of idle page table pages. Each of a pmap's page table pages is responsible 1584151497Sru * for mapping a distinct range of virtual addresses. The pmap's collection is 1585151497Sru * ordered by this virtual address range. 1586151497Sru */ 1587151497Srustatic void 1588151497Srupmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) 1589151497Sru{ 1590151497Sru vm_page_t root; 1591151497Sru 1592151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1593151497Sru root = pmap->pm_root; 1594151497Sru if (root == NULL) { 1595151497Sru mpte->left = NULL; 1596151497Sru mpte->right = NULL; 1597151497Sru } else { 1598151497Sru root = vm_page_splay(mpte->pindex, root); 1599151497Sru if (mpte->pindex < root->pindex) { 1600151497Sru mpte->left = root->left; 1601151497Sru mpte->right = root; 1602151497Sru root->left = NULL; 1603151497Sru } else if (mpte->pindex == root->pindex) 1604151497Sru panic("pmap_insert_pt_page: pindex already inserted"); 1605151497Sru else { 1606151497Sru mpte->right = root->right; 1607151497Sru mpte->left = root; 1608151497Sru root->right = NULL; 1609151497Sru } 1610151497Sru } 1611151497Sru pmap->pm_root = mpte; 1612151497Sru} 1613151497Sru 1614151497Sru/* 1615151497Sru * Looks for a page table page mapping the specified virtual address in the 1616151497Sru * specified pmap's collection of idle page table pages. Returns NULL if there 1617151497Sru * is no page table page corresponding to the specified virtual address. 1618151497Sru */ 1619151497Srustatic vm_page_t 1620151497Srupmap_lookup_pt_page(pmap_t pmap, vm_offset_t va) 1621151497Sru{ 1622151497Sru vm_page_t mpte; 1623151497Sru vm_pindex_t pindex = va >> PDRSHIFT; 1624151497Sru 1625151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1626151497Sru if ((mpte = pmap->pm_root) != NULL && mpte->pindex != pindex) { 1627151497Sru mpte = vm_page_splay(pindex, mpte); 1628151497Sru if ((pmap->pm_root = mpte)->pindex != pindex) 1629151497Sru mpte = NULL; 1630151497Sru } 1631151497Sru return (mpte); 1632151497Sru} 1633151497Sru 1634151497Sru/* 1635151497Sru * Removes the specified page table page from the specified pmap's collection 1636151497Sru * of idle page table pages. The specified page table page must be a member of 1637151497Sru * the pmap's collection. 1638151497Sru */ 1639151497Srustatic void 1640151497Srupmap_remove_pt_page(pmap_t pmap, vm_page_t mpte) 1641151497Sru{ 1642151497Sru vm_page_t root; 1643151497Sru 1644151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1645151497Sru if (mpte != pmap->pm_root) 1646151497Sru vm_page_splay(mpte->pindex, pmap->pm_root); 1647151497Sru if (mpte->left == NULL) 1648151497Sru root = mpte->right; 1649151497Sru else { 1650151497Sru root = vm_page_splay(mpte->pindex, mpte->left); 1651151497Sru root->right = mpte->right; 1652151497Sru } 1653151497Sru pmap->pm_root = root; 1654151497Sru} 1655151497Sru 1656151497Sru/* 1657151497Sru * This routine unholds page table pages, and if the hold count 1658151497Sru * drops to zero, then it decrements the wire count. 1659151497Sru */ 1660151497Srustatic __inline int 1661151497Srupmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) 1662151497Sru{ 1663151497Sru 1664151497Sru --m->wire_count; 1665151497Sru if (m->wire_count == 0) 1666151497Sru return (_pmap_unwire_pte_hold(pmap, m, free)); 1667151497Sru else 1668151497Sru return (0); 1669151497Sru} 1670151497Sru 1671151497Srustatic int 1672151497Sru_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) 1673151497Sru{ 1674151497Sru vm_offset_t pteva; 1675151497Sru 1676151497Sru /* 1677151497Sru * unmap the page table page 1678151497Sru */ 1679151497Sru pmap->pm_pdir[m->pindex] = 0; 1680151497Sru --pmap->pm_stats.resident_count; 1681151497Sru 1682151497Sru /* 1683151497Sru * This is a release store so that the ordinary store unmapping 1684151497Sru * the page table page is globally performed before TLB shoot- 1685151497Sru * down is begun. 1686151497Sru */ 1687151497Sru atomic_subtract_rel_int(&cnt.v_wire_count, 1); 1688151497Sru 1689151497Sru /* 1690151497Sru * Do an invltlb to make the invalidated mapping 1691151497Sru * take effect immediately. 1692151497Sru */ 1693151497Sru pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1694151497Sru pmap_invalidate_page(pmap, pteva); 1695151497Sru 1696151497Sru /* 1697151497Sru * Put page on a list so that it is released after 1698151497Sru * *ALL* TLB shootdown is done 1699151497Sru */ 1700151497Sru pmap_add_delayed_free_list(m, free, TRUE); 1701151497Sru 1702151497Sru return (1); 1703151497Sru} 1704151497Sru 1705151497Sru/* 1706151497Sru * After removing a page table entry, this routine is used to 1707151497Sru * conditionally free the page, and manage the hold/wire counts. 1708151497Sru */ 1709151497Srustatic int 1710151497Srupmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) 1711151497Sru{ 1712151497Sru pd_entry_t ptepde; 1713151497Sru vm_page_t mpte; 1714151497Sru 1715151497Sru if (va >= VM_MAXUSER_ADDRESS) 1716151497Sru return (0); 1717151497Sru ptepde = *pmap_pde(pmap, va); 1718151497Sru mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); 1719151497Sru return (pmap_unwire_pte_hold(pmap, mpte, free)); 1720151497Sru} 1721151497Sru 1722151497Sru/* 1723151497Sru * Initialize the pmap for the swapper process. 1724151497Sru */ 1725151497Sruvoid 1726151497Srupmap_pinit0(pmap_t pmap) 1727151497Sru{ 1728151497Sru 1729151497Sru PMAP_LOCK_INIT(pmap); 1730151497Sru /* 1731151497Sru * Since the page table directory is shared with the kernel pmap, 1732151497Sru * which is already included in the list "allpmaps", this pmap does 1733151497Sru * not need to be inserted into that list. 1734151497Sru */ 1735151497Sru pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); 1736151497Sru#ifdef PAE 1737151497Sru pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); 1738151497Sru#endif 1739151497Sru pmap->pm_root = NULL; 1740151497Sru CPU_ZERO(&pmap->pm_active); 1741151497Sru PCPU_SET(curpmap, pmap); 1742151497Sru TAILQ_INIT(&pmap->pm_pvchunk); 1743151497Sru bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1744151497Sru} 1745151497Sru 1746151497Sru/* 1747151497Sru * Initialize a preallocated and zeroed pmap structure, 1748151497Sru * such as one in a vmspace structure. 1749151497Sru */ 1750151497Sruint 1751151497Srupmap_pinit(pmap_t pmap) 1752151497Sru{ 1753151497Sru vm_page_t m, ptdpg[NPGPTD]; 1754151497Sru vm_paddr_t pa; 1755151497Sru int i; 1756151497Sru 1757151497Sru PMAP_LOCK_INIT(pmap); 1758151497Sru 1759151497Sru /* 1760151497Sru * No need to allocate page table space yet but we do need a valid 1761151497Sru * page directory table. 1762151497Sru */ 1763151497Sru if (pmap->pm_pdir == NULL) { 1764151497Sru pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1765151497Sru NBPTD); 1766151497Sru if (pmap->pm_pdir == NULL) { 1767151497Sru PMAP_LOCK_DESTROY(pmap); 1768151497Sru return (0); 1769151497Sru } 1770151497Sru#ifdef PAE 1771151497Sru pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); 1772151497Sru KASSERT(((vm_offset_t)pmap->pm_pdpt & 1773151497Sru ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, 1774151497Sru ("pmap_pinit: pdpt misaligned")); 1775151497Sru KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), 1776151497Sru ("pmap_pinit: pdpt above 4g")); 1777151497Sru#endif 1778151497Sru pmap->pm_root = NULL; 1779151497Sru } 1780151497Sru KASSERT(pmap->pm_root == NULL, 1781151497Sru ("pmap_pinit: pmap has reserved page table page(s)")); 1782151497Sru 1783151497Sru /* 1784151497Sru * allocate the page directory page(s) 1785151497Sru */ 1786151497Sru for (i = 0; i < NPGPTD;) { 1787151497Sru m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1788151497Sru VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1789151497Sru if (m == NULL) 1790151497Sru VM_WAIT; 1791151497Sru else { 1792151497Sru ptdpg[i++] = m; 1793151497Sru } 1794151497Sru } 1795151497Sru 1796151497Sru pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); 1797151497Sru 1798151497Sru for (i = 0; i < NPGPTD; i++) 1799151497Sru if ((ptdpg[i]->flags & PG_ZERO) == 0) 1800151497Sru pagezero(pmap->pm_pdir + (i * NPDEPG)); 1801151497Sru 1802151497Sru mtx_lock_spin(&allpmaps_lock); 1803151497Sru LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1804151497Sru /* Copy the kernel page table directory entries. */ 1805151497Sru bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); 1806151497Sru mtx_unlock_spin(&allpmaps_lock); 1807151497Sru 1808151497Sru /* install self-referential address mapping entry(s) */ 1809151497Sru for (i = 0; i < NPGPTD; i++) { 1810151497Sru pa = VM_PAGE_TO_PHYS(ptdpg[i]); 1811151497Sru pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; 1812151497Sru#ifdef PAE 1813151497Sru pmap->pm_pdpt[i] = pa | PG_V; 1814151497Sru#endif 1815151497Sru } 1816151497Sru 1817151497Sru CPU_ZERO(&pmap->pm_active); 1818151497Sru TAILQ_INIT(&pmap->pm_pvchunk); 1819151497Sru bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1820151497Sru 1821151497Sru return (1); 1822151497Sru} 1823151497Sru 1824151497Sru/* 1825151497Sru * this routine is called if the page table page is not 1826151497Sru * mapped correctly. 1827151497Sru */ 1828151497Srustatic vm_page_t 1829151497Sru_pmap_allocpte(pmap_t pmap, u_int ptepindex, int flags) 1830151497Sru{ 1831151497Sru vm_paddr_t ptepa; 1832151497Sru vm_page_t m; 1833151497Sru 1834151497Sru KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1835151497Sru (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1836151497Sru ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1837151497Sru 1838151497Sru /* 1839151497Sru * Allocate a page table page. 1840151497Sru */ 1841151497Sru if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1842151497Sru VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1843151497Sru if (flags & M_WAITOK) { 1844151497Sru PMAP_UNLOCK(pmap); 1845151497Sru vm_page_unlock_queues(); 1846151497Sru VM_WAIT; 1847151497Sru vm_page_lock_queues(); 1848151497Sru PMAP_LOCK(pmap); 1849151497Sru } 1850151497Sru 1851151497Sru /* 1852151497Sru * Indicate the need to retry. While waiting, the page table 1853151497Sru * page may have been allocated. 1854151497Sru */ 1855151497Sru return (NULL); 1856151497Sru } 1857151497Sru if ((m->flags & PG_ZERO) == 0) 1858151497Sru pmap_zero_page(m); 1859151497Sru 1860151497Sru /* 1861151497Sru * Map the pagetable page into the process address space, if 1862151497Sru * it isn't already there. 1863151497Sru */ 1864151497Sru 1865151497Sru pmap->pm_stats.resident_count++; 1866151497Sru 1867151497Sru ptepa = VM_PAGE_TO_PHYS(m); 1868151497Sru pmap->pm_pdir[ptepindex] = 1869151497Sru (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); 1870151497Sru 1871151497Sru return (m); 1872151497Sru} 1873151497Sru 1874151497Srustatic vm_page_t 1875151497Srupmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1876151497Sru{ 1877151497Sru u_int ptepindex; 1878151497Sru pd_entry_t ptepa; 1879151497Sru vm_page_t m; 1880151497Sru 1881151497Sru KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1882151497Sru (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1883151497Sru ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1884151497Sru 1885151497Sru /* 1886151497Sru * Calculate pagetable page index 1887151497Sru */ 1888151497Sru ptepindex = va >> PDRSHIFT; 1889151497Sruretry: 1890151497Sru /* 1891151497Sru * Get the page directory entry 1892151497Sru */ 1893151497Sru ptepa = pmap->pm_pdir[ptepindex]; 1894151497Sru 1895151497Sru /* 1896151497Sru * This supports switching from a 4MB page to a 1897151497Sru * normal 4K page. 1898151497Sru */ 1899151497Sru if (ptepa & PG_PS) { 1900151497Sru (void)pmap_demote_pde(pmap, &pmap->pm_pdir[ptepindex], va); 1901151497Sru ptepa = pmap->pm_pdir[ptepindex]; 1902151497Sru } 1903151497Sru 1904151497Sru /* 1905151497Sru * If the page table page is mapped, we just increment the 1906151497Sru * hold count, and activate it. 1907151497Sru */ 1908151497Sru if (ptepa) { 1909151497Sru m = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); 1910151497Sru m->wire_count++; 1911151497Sru } else { 1912151497Sru /* 1913151497Sru * Here if the pte page isn't mapped, or if it has 1914151497Sru * been deallocated. 1915151497Sru */ 1916151497Sru m = _pmap_allocpte(pmap, ptepindex, flags); 1917151497Sru if (m == NULL && (flags & M_WAITOK)) 1918151497Sru goto retry; 1919151497Sru } 1920151497Sru return (m); 1921151497Sru} 1922151497Sru 1923151497Sru 1924151497Sru/*************************************************** 1925151497Sru* Pmap allocation/deallocation routines. 1926151497Sru ***************************************************/ 1927151497Sru 1928151497Sru#ifdef SMP 1929151497Sru/* 1930151497Sru * Deal with a SMP shootdown of other users of the pmap that we are 1931151497Sru * trying to dispose of. This can be a bit hairy. 1932151497Sru */ 1933151497Srustatic cpuset_t *lazymask; 1934151497Srustatic u_int lazyptd; 1935151497Srustatic volatile u_int lazywait; 1936151497Sru 1937151497Sruvoid pmap_lazyfix_action(void); 1938151497Sru 1939151497Sruvoid 1940151497Srupmap_lazyfix_action(void) 1941151497Sru{ 1942151497Sru 1943151497Sru#ifdef COUNT_IPIS 1944151497Sru (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; 1945151497Sru#endif 1946151497Sru if (rcr3() == lazyptd) 1947151497Sru load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1948151497Sru CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); 1949151497Sru atomic_store_rel_int(&lazywait, 1); 1950151497Sru} 1951151497Sru 1952151497Srustatic void 1953151497Srupmap_lazyfix_self(u_int cpuid) 1954151497Sru{ 1955151497Sru 1956151497Sru if (rcr3() == lazyptd) 1957151497Sru load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1958151497Sru CPU_CLR_ATOMIC(cpuid, lazymask); 1959151497Sru} 1960151497Sru 1961151497Sru 1962151497Srustatic void 1963151497Srupmap_lazyfix(pmap_t pmap) 1964151497Sru{ 1965151497Sru cpuset_t mymask, mask; 1966151497Sru u_int cpuid, spins; 1967151497Sru int lsb; 1968151497Sru 1969151497Sru mask = pmap->pm_active; 1970151497Sru while (!CPU_EMPTY(&mask)) { 1971151497Sru spins = 50000000; 1972151497Sru 1973151497Sru /* Find least significant set bit. */ 1974151497Sru lsb = cpusetobj_ffs(&mask); 1975151497Sru MPASS(lsb != 0); 1976151497Sru lsb--; 1977151497Sru CPU_SETOF(lsb, &mask); 1978151497Sru mtx_lock_spin(&smp_ipi_mtx); 1979151497Sru#ifdef PAE 1980151497Sru lazyptd = vtophys(pmap->pm_pdpt); 1981151497Sru#else 1982151497Sru lazyptd = vtophys(pmap->pm_pdir); 1983151497Sru#endif 1984151497Sru cpuid = PCPU_GET(cpuid); 1985151497Sru 1986151497Sru /* Use a cpuset just for having an easy check. */ 1987151497Sru CPU_SETOF(cpuid, &mymask); 1988151497Sru if (!CPU_CMP(&mask, &mymask)) { 1989151497Sru lazymask = &pmap->pm_active; 1990151497Sru pmap_lazyfix_self(cpuid); 1991151497Sru } else { 1992151497Sru atomic_store_rel_int((u_int *)&lazymask, 1993151497Sru (u_int)&pmap->pm_active); 1994151497Sru atomic_store_rel_int(&lazywait, 0); 1995151497Sru ipi_selected(mask, IPI_LAZYPMAP); 1996151497Sru while (lazywait == 0) { 1997151497Sru ia32_pause(); 1998151497Sru if (--spins == 0) 1999151497Sru break; 2000151497Sru } 2001151497Sru } 2002151497Sru mtx_unlock_spin(&smp_ipi_mtx); 2003151497Sru if (spins == 0) 2004151497Sru printf("pmap_lazyfix: spun for 50000000\n"); 2005151497Sru mask = pmap->pm_active; 2006151497Sru } 2007151497Sru} 2008151497Sru 2009151497Sru#else /* SMP */ 2010151497Sru 2011151497Sru/* 2012151497Sru * Cleaning up on uniprocessor is easy. For various reasons, we're 2013151497Sru * unlikely to have to even execute this code, including the fact 2014151497Sru * that the cleanup is deferred until the parent does a wait(2), which 2015151497Sru * means that another userland process has run. 2016151497Sru */ 2017151497Srustatic void 2018151497Srupmap_lazyfix(pmap_t pmap) 2019151497Sru{ 2020151497Sru u_int cr3; 2021151497Sru 2022151497Sru cr3 = vtophys(pmap->pm_pdir); 2023151497Sru if (cr3 == rcr3()) { 2024151497Sru load_cr3(PCPU_GET(curpcb)->pcb_cr3); 2025151497Sru CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); 2026151497Sru } 2027151497Sru} 2028151497Sru#endif /* SMP */ 2029151497Sru 2030151497Sru/* 2031151497Sru * Release any resources held by the given physical map. 2032151497Sru * Called when a pmap initialized by pmap_pinit is being released. 2033151497Sru * Should only be called if the map contains no valid mappings. 2034151497Sru */ 2035151497Sruvoid 2036151497Srupmap_release(pmap_t pmap) 2037151497Sru{ 2038151497Sru vm_page_t m, ptdpg[NPGPTD]; 2039151497Sru int i; 2040151497Sru 2041151497Sru KASSERT(pmap->pm_stats.resident_count == 0, 2042151497Sru ("pmap_release: pmap resident count %ld != 0", 2043151497Sru pmap->pm_stats.resident_count)); 2044151497Sru KASSERT(pmap->pm_root == NULL, 2045151497Sru ("pmap_release: pmap has reserved page table page(s)")); 2046151497Sru 2047151497Sru pmap_lazyfix(pmap); 2048151497Sru mtx_lock_spin(&allpmaps_lock); 2049151497Sru LIST_REMOVE(pmap, pm_list); 2050151497Sru mtx_unlock_spin(&allpmaps_lock); 2051151497Sru 2052151497Sru for (i = 0; i < NPGPTD; i++) 2053151497Sru ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i] & 2054151497Sru PG_FRAME); 2055151497Sru 2056151497Sru bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * 2057151497Sru sizeof(*pmap->pm_pdir)); 2058151497Sru 2059151497Sru pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); 2060151497Sru 2061151497Sru for (i = 0; i < NPGPTD; i++) { 2062151497Sru m = ptdpg[i]; 2063151497Sru#ifdef PAE 2064151497Sru KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), 2065151497Sru ("pmap_release: got wrong ptd page")); 2066151497Sru#endif 2067151497Sru m->wire_count--; 2068151497Sru atomic_subtract_int(&cnt.v_wire_count, 1); 2069151497Sru vm_page_free_zero(m); 2070151497Sru } 2071151497Sru PMAP_LOCK_DESTROY(pmap); 2072151497Sru} 2073151497Sru 2074151497Srustatic int 2075151497Srukvm_size(SYSCTL_HANDLER_ARGS) 2076151497Sru{ 2077151497Sru unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 2078151497Sru 2079151497Sru return (sysctl_handle_long(oidp, &ksize, 0, req)); 2080151497Sru} 2081151497SruSYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 2082151497Sru 0, 0, kvm_size, "IU", "Size of KVM"); 2083151497Sru 2084151497Srustatic int 2085151497Srukvm_free(SYSCTL_HANDLER_ARGS) 2086151497Sru{ 2087151497Sru unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 2088151497Sru 2089151497Sru return (sysctl_handle_long(oidp, &kfree, 0, req)); 2090151497Sru} 2091151497SruSYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 2092151497Sru 0, 0, kvm_free, "IU", "Amount of KVM free"); 2093151497Sru 2094151497Sru/* 2095151497Sru * grow the number of kernel page table entries, if needed 2096151497Sru */ 2097151497Sruvoid 2098151497Srupmap_growkernel(vm_offset_t addr) 2099151497Sru{ 2100151497Sru vm_paddr_t ptppaddr; 2101151497Sru vm_page_t nkpg; 2102151497Sru pd_entry_t newpdir; 2103151497Sru 2104151497Sru mtx_assert(&kernel_map->system_mtx, MA_OWNED); 2105151497Sru addr = roundup2(addr, NBPDR); 2106151497Sru if (addr - 1 >= kernel_map->max_offset) 2107151497Sru addr = kernel_map->max_offset; 2108151497Sru while (kernel_vm_end < addr) { 2109151497Sru if (pdir_pde(PTD, kernel_vm_end)) { 2110151497Sru kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 2111151497Sru if (kernel_vm_end - 1 >= kernel_map->max_offset) { 2112151497Sru kernel_vm_end = kernel_map->max_offset; 2113151497Sru break; 2114151497Sru } 2115151497Sru continue; 2116151497Sru } 2117151497Sru 2118151497Sru nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, 2119151497Sru VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2120151497Sru VM_ALLOC_ZERO); 2121151497Sru if (nkpg == NULL) 2122151497Sru panic("pmap_growkernel: no memory to grow kernel"); 2123151497Sru 2124151497Sru nkpt++; 2125151497Sru 2126151497Sru if ((nkpg->flags & PG_ZERO) == 0) 2127151497Sru pmap_zero_page(nkpg); 2128151497Sru ptppaddr = VM_PAGE_TO_PHYS(nkpg); 2129151497Sru newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 2130151497Sru pdir_pde(KPTD, kernel_vm_end) = pgeflag | newpdir; 2131151497Sru 2132151497Sru pmap_kenter_pde(kernel_vm_end, newpdir); 2133151497Sru kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 2134151497Sru if (kernel_vm_end - 1 >= kernel_map->max_offset) { 2135151497Sru kernel_vm_end = kernel_map->max_offset; 2136151497Sru break; 2137151497Sru } 2138151497Sru } 2139151497Sru} 2140151497Sru 2141151497Sru 2142151497Sru/*************************************************** 2143151497Sru * page management routines. 2144151497Sru ***************************************************/ 2145151497Sru 2146151497SruCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 2147151497SruCTASSERT(_NPCM == 11); 2148151497SruCTASSERT(_NPCPV == 336); 2149151497Sru 2150151497Srustatic __inline struct pv_chunk * 2151151497Srupv_to_chunk(pv_entry_t pv) 2152151497Sru{ 2153151497Sru 2154151497Sru return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 2155151497Sru} 2156151497Sru 2157151497Sru#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 2158151497Sru 2159151497Sru#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 2160151497Sru#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 2161151497Sru 2162151497Srustatic uint32_t pc_freemask[_NPCM] = { 2163151497Sru PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2164151497Sru PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2165151497Sru PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2166151497Sru PC_FREE0_9, PC_FREE10 2167151497Sru}; 2168151497Sru 2169151497SruSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 2170151497Sru "Current number of pv entries"); 2171151497Sru 2172151497Sru#ifdef PV_STATS 2173151497Srustatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 2174151497Sru 2175151497SruSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 2176151497Sru "Current number of pv entry chunks"); 2177151497SruSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 2178151497Sru "Current number of pv entry chunks allocated"); 2179151497SruSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 2180151497Sru "Current number of pv entry chunks frees"); 2181151497SruSYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 2182151497Sru "Number of times tried to get a chunk page but failed."); 2183151497Sru 2184151497Srustatic long pv_entry_frees, pv_entry_allocs; 2185151497Srustatic int pv_entry_spare; 2186151497Sru 2187151497SruSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 2188151497Sru "Current number of pv entry frees"); 2189151497SruSYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 2190151497Sru "Current number of pv entry allocs"); 2191151497SruSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 2192151497Sru "Current number of spare pv entries"); 2193151497Sru#endif 2194151497Sru 2195151497Sru/* 2196151497Sru * We are in a serious low memory condition. Resort to 2197151497Sru * drastic measures to free some pages so we can allocate 2198151497Sru * another pv entry chunk. 2199151497Sru */ 2200151497Srustatic vm_page_t 2201151497Srupmap_pv_reclaim(pmap_t locked_pmap) 2202151497Sru{ 2203151497Sru struct pch newtail; 2204151497Sru struct pv_chunk *pc; 2205151497Sru struct md_page *pvh; 2206151497Sru pd_entry_t *pde; 2207151497Sru pmap_t pmap; 2208151497Sru pt_entry_t *pte, tpte; 2209151497Sru pv_entry_t pv; 2210151497Sru vm_offset_t va; 2211151497Sru vm_page_t free, m, m_pc; 2212151497Sru uint32_t inuse, freemask; 2213151497Sru int bit, field, freed; 2214151497Sru 2215151497Sru PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 2216151497Sru pmap = NULL; 2217151497Sru free = m_pc = NULL; 2218151497Sru TAILQ_INIT(&newtail); 2219151497Sru sched_pin(); 2220151497Sru while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || 2221151497Sru free == NULL)) { 2222151497Sru TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2223151497Sru if (pmap != pc->pc_pmap) { 2224151497Sru if (pmap != NULL) { 2225151497Sru pmap_invalidate_all(pmap); 2226151497Sru if (pmap != locked_pmap) 2227151497Sru PMAP_UNLOCK(pmap); 2228151497Sru } 2229151497Sru pmap = pc->pc_pmap; 2230151497Sru /* Avoid deadlock and lock recursion. */ 2231151497Sru if (pmap > locked_pmap) 2232151497Sru PMAP_LOCK(pmap); 2233151497Sru else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 2234151497Sru pmap = NULL; 2235151497Sru TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2236151497Sru continue; 2237151497Sru } 2238151497Sru } 2239151497Sru 2240151497Sru /* 2241151497Sru * Destroy every non-wired, 4 KB page mapping in the chunk. 2242151497Sru */ 2243151497Sru freed = 0; 2244151497Sru for (field = 0; field < _NPCM; field++) { 2245151497Sru freemask = 0; 2246151497Sru for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 2247151497Sru inuse != 0; inuse &= ~(1UL << bit)) { 2248151497Sru bit = bsfl(inuse); 2249151497Sru pv = &pc->pc_pventry[field * 32 + bit]; 2250151497Sru va = pv->pv_va; 2251151497Sru pde = pmap_pde(pmap, va); 2252151497Sru if ((*pde & PG_PS) != 0) 2253151497Sru continue; 2254151497Sru pte = pmap_pte_quick(pmap, va); 2255151497Sru if ((*pte & PG_W) != 0) 2256151497Sru continue; 2257151497Sru tpte = pte_load_clear(pte); 2258151497Sru if ((tpte & PG_G) != 0) 2259151497Sru pmap_invalidate_page(pmap, va); 2260151497Sru m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 2261151497Sru if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2262151497Sru vm_page_dirty(m); 2263151497Sru if ((tpte & PG_A) != 0) 2264151497Sru vm_page_aflag_set(m, PGA_REFERENCED); 2265151497Sru TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2266151497Sru if (TAILQ_EMPTY(&m->md.pv_list) && 2267151497Sru (m->flags & PG_FICTITIOUS) == 0) { 2268151497Sru pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2269151497Sru if (TAILQ_EMPTY(&pvh->pv_list)) { 2270151497Sru vm_page_aflag_clear(m, 2271151497Sru PGA_WRITEABLE); 2272151497Sru } 2273151497Sru } 2274151497Sru pmap_unuse_pt(pmap, va, &free); 2275151497Sru freemask |= 1UL << bit; 2276151497Sru freed++; 2277151497Sru } 2278151497Sru pc->pc_map[field] |= freemask; 2279151497Sru } 2280151497Sru if (freed == 0) { 2281151497Sru TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2282151497Sru continue; 2283151497Sru } 2284151497Sru pmap->pm_stats.resident_count -= freed; 2285151497Sru PV_STAT(pv_entry_frees += freed); 2286151497Sru PV_STAT(pv_entry_spare += freed); 2287151497Sru pv_entry_count -= freed; 2288151497Sru TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2289151497Sru for (field = 0; field < _NPCM; field++) 2290151497Sru if (pc->pc_map[field] != pc_freemask[field]) { 2291151497Sru TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 2292151497Sru pc_list); 2293151497Sru TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2294151497Sru 2295151497Sru /* 2296151497Sru * One freed pv entry in locked_pmap is 2297151497Sru * sufficient. 2298151497Sru */ 2299151497Sru if (pmap == locked_pmap) 2300151497Sru goto out; 2301151497Sru break; 2302151497Sru } 2303151497Sru if (field == _NPCM) { 2304151497Sru PV_STAT(pv_entry_spare -= _NPCPV); 2305151497Sru PV_STAT(pc_chunk_count--); 2306151497Sru PV_STAT(pc_chunk_frees++); 2307151497Sru /* Entire chunk is free; return it. */ 2308151497Sru m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2309151497Sru pmap_qremove((vm_offset_t)pc, 1); 2310151497Sru pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 2311151497Sru break; 2312151497Sru } 2313151497Sru } 2314151497Sruout: 2315151497Sru sched_unpin(); 2316151497Sru TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 2317151497Sru if (pmap != NULL) { 2318151497Sru pmap_invalidate_all(pmap); 2319151497Sru if (pmap != locked_pmap) 2320151497Sru PMAP_UNLOCK(pmap); 2321151497Sru } 2322151497Sru if (m_pc == NULL && pv_vafree != 0 && free != NULL) { 2323151497Sru m_pc = free; 2324151497Sru free = m_pc->right; 2325151497Sru /* Recycle a freed page table page. */ 2326151497Sru m_pc->wire_count = 1; 2327151497Sru atomic_add_int(&cnt.v_wire_count, 1); 2328151497Sru } 2329151497Sru pmap_free_zero_pages(free); 2330151497Sru return (m_pc); 2331151497Sru} 2332151497Sru 2333151497Sru/* 2334151497Sru * free the pv_entry back to the free list 2335151497Sru */ 2336151497Srustatic void 2337151497Srufree_pv_entry(pmap_t pmap, pv_entry_t pv) 2338151497Sru{ 2339151497Sru struct pv_chunk *pc; 2340151497Sru int idx, field, bit; 2341151497Sru 2342151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2343151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2344151497Sru PV_STAT(pv_entry_frees++); 2345151497Sru PV_STAT(pv_entry_spare++); 2346151497Sru pv_entry_count--; 2347151497Sru pc = pv_to_chunk(pv); 2348151497Sru idx = pv - &pc->pc_pventry[0]; 2349151497Sru field = idx / 32; 2350151497Sru bit = idx % 32; 2351151497Sru pc->pc_map[field] |= 1ul << bit; 2352151497Sru /* move to head of list */ 2353151497Sru TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2354151497Sru for (idx = 0; idx < _NPCM; idx++) 2355151497Sru if (pc->pc_map[idx] != pc_freemask[idx]) { 2356151497Sru TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2357151497Sru return; 2358151497Sru } 2359151497Sru free_pv_chunk(pc); 2360151497Sru} 2361151497Sru 2362151497Srustatic void 2363151497Srufree_pv_chunk(struct pv_chunk *pc) 2364151497Sru{ 2365151497Sru vm_page_t m; 2366151497Sru 2367151497Sru TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2368151497Sru PV_STAT(pv_entry_spare -= _NPCPV); 2369151497Sru PV_STAT(pc_chunk_count--); 2370151497Sru PV_STAT(pc_chunk_frees++); 2371151497Sru /* entire chunk is free, return it */ 2372151497Sru m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2373151497Sru pmap_qremove((vm_offset_t)pc, 1); 2374151497Sru vm_page_unwire(m, 0); 2375151497Sru vm_page_free(m); 2376151497Sru pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 2377151497Sru} 2378151497Sru 2379151497Sru/* 2380151497Sru * get a new pv_entry, allocating a block from the system 2381151497Sru * when needed. 2382151497Sru */ 2383151497Srustatic pv_entry_t 2384151497Sruget_pv_entry(pmap_t pmap, boolean_t try) 2385151497Sru{ 2386151497Sru static const struct timeval printinterval = { 60, 0 }; 2387151497Sru static struct timeval lastprint; 2388151497Sru int bit, field; 2389151497Sru pv_entry_t pv; 2390151497Sru struct pv_chunk *pc; 2391151497Sru vm_page_t m; 2392151497Sru 2393151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2394151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2395151497Sru PV_STAT(pv_entry_allocs++); 2396151497Sru pv_entry_count++; 2397151497Sru if (pv_entry_count > pv_entry_high_water) 2398151497Sru if (ratecheck(&lastprint, &printinterval)) 2399151497Sru printf("Approaching the limit on PV entries, consider " 2400151497Sru "increasing either the vm.pmap.shpgperproc or the " 2401151497Sru "vm.pmap.pv_entry_max tunable.\n"); 2402151497Sruretry: 2403151497Sru pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2404151497Sru if (pc != NULL) { 2405151497Sru for (field = 0; field < _NPCM; field++) { 2406151497Sru if (pc->pc_map[field]) { 2407151497Sru bit = bsfl(pc->pc_map[field]); 2408151497Sru break; 2409151497Sru } 2410151497Sru } 2411151497Sru if (field < _NPCM) { 2412151497Sru pv = &pc->pc_pventry[field * 32 + bit]; 2413151497Sru pc->pc_map[field] &= ~(1ul << bit); 2414151497Sru /* If this was the last item, move it to tail */ 2415151497Sru for (field = 0; field < _NPCM; field++) 2416151497Sru if (pc->pc_map[field] != 0) { 2417151497Sru PV_STAT(pv_entry_spare--); 2418151497Sru return (pv); /* not full, return */ 2419151497Sru } 2420151497Sru TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2421151497Sru TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2422151497Sru if (pc != TAILQ_LAST(&pv_chunks, pch)) { 2423151497Sru TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2424151497Sru TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2425151497Sru } 2426151497Sru PV_STAT(pv_entry_spare--); 2427151497Sru return (pv); 2428151497Sru } 2429151497Sru } 2430151497Sru /* 2431151497Sru * Access to the ptelist "pv_vafree" is synchronized by the page 2432151497Sru * queues lock. If "pv_vafree" is currently non-empty, it will 2433151497Sru * remain non-empty until pmap_ptelist_alloc() completes. 2434151497Sru */ 2435151497Sru if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2436151497Sru VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 2437151497Sru if (try) { 2438151497Sru pv_entry_count--; 2439151497Sru PV_STAT(pc_chunk_tryfail++); 2440151497Sru return (NULL); 2441151497Sru } 2442151497Sru m = pmap_pv_reclaim(pmap); 2443151497Sru if (m == NULL) 2444151497Sru goto retry; 2445151497Sru } 2446151497Sru PV_STAT(pc_chunk_count++); 2447151497Sru PV_STAT(pc_chunk_allocs++); 2448151497Sru pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 2449151497Sru pmap_qenter((vm_offset_t)pc, &m, 1); 2450151497Sru pc->pc_pmap = pmap; 2451151497Sru pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 2452151497Sru for (field = 1; field < _NPCM; field++) 2453151497Sru pc->pc_map[field] = pc_freemask[field]; 2454151497Sru TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2455151497Sru pv = &pc->pc_pventry[0]; 2456151497Sru TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2457151497Sru PV_STAT(pv_entry_spare += _NPCPV - 1); 2458151497Sru return (pv); 2459151497Sru} 2460151497Sru 2461151497Srustatic __inline pv_entry_t 2462151497Srupmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2463151497Sru{ 2464151497Sru pv_entry_t pv; 2465151497Sru 2466151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2467151497Sru TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 2468151497Sru if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2469151497Sru TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 2470151497Sru break; 2471151497Sru } 2472151497Sru } 2473151497Sru return (pv); 2474151497Sru} 2475151497Sru 2476151497Srustatic void 2477151497Srupmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 2478151497Sru{ 2479151497Sru struct md_page *pvh; 2480151497Sru pv_entry_t pv; 2481151497Sru vm_offset_t va_last; 2482151497Sru vm_page_t m; 2483151497Sru 2484151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2485151497Sru KASSERT((pa & PDRMASK) == 0, 2486151497Sru ("pmap_pv_demote_pde: pa is not 4mpage aligned")); 2487151497Sru 2488151497Sru /* 2489151497Sru * Transfer the 4mpage's pv entry for this mapping to the first 2490151497Sru * page's pv list. 2491151497Sru */ 2492151497Sru pvh = pa_to_pvh(pa); 2493151497Sru va = trunc_4mpage(va); 2494151497Sru pv = pmap_pvh_remove(pvh, pmap, va); 2495151497Sru KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); 2496151497Sru m = PHYS_TO_VM_PAGE(pa); 2497151497Sru TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2498151497Sru /* Instantiate the remaining NPTEPG - 1 pv entries. */ 2499151497Sru va_last = va + NBPDR - PAGE_SIZE; 2500151497Sru do { 2501151497Sru m++; 2502151497Sru KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2503151497Sru ("pmap_pv_demote_pde: page %p is not managed", m)); 2504151497Sru va += PAGE_SIZE; 2505151497Sru pmap_insert_entry(pmap, va, m); 2506151497Sru } while (va < va_last); 2507151497Sru} 2508151497Sru 2509151497Srustatic void 2510151497Srupmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 2511151497Sru{ 2512151497Sru struct md_page *pvh; 2513151497Sru pv_entry_t pv; 2514151497Sru vm_offset_t va_last; 2515151497Sru vm_page_t m; 2516151497Sru 2517151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2518151497Sru KASSERT((pa & PDRMASK) == 0, 2519151497Sru ("pmap_pv_promote_pde: pa is not 4mpage aligned")); 2520151497Sru 2521151497Sru /* 2522151497Sru * Transfer the first page's pv entry for this mapping to the 2523151497Sru * 4mpage's pv list. Aside from avoiding the cost of a call 2524151497Sru * to get_pv_entry(), a transfer avoids the possibility that 2525151497Sru * get_pv_entry() calls pmap_collect() and that pmap_collect() 2526151497Sru * removes one of the mappings that is being promoted. 2527151497Sru */ 2528151497Sru m = PHYS_TO_VM_PAGE(pa); 2529151497Sru va = trunc_4mpage(va); 2530151497Sru pv = pmap_pvh_remove(&m->md, pmap, va); 2531151497Sru KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); 2532151497Sru pvh = pa_to_pvh(pa); 2533151497Sru TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 2534151497Sru /* Free the remaining NPTEPG - 1 pv entries. */ 2535151497Sru va_last = va + NBPDR - PAGE_SIZE; 2536151497Sru do { 2537151497Sru m++; 2538151497Sru va += PAGE_SIZE; 2539151497Sru pmap_pvh_free(&m->md, pmap, va); 2540151497Sru } while (va < va_last); 2541151497Sru} 2542151497Sru 2543151497Srustatic void 2544151497Srupmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2545151497Sru{ 2546151497Sru pv_entry_t pv; 2547151497Sru 2548151497Sru pv = pmap_pvh_remove(pvh, pmap, va); 2549151497Sru KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2550151497Sru free_pv_entry(pmap, pv); 2551151497Sru} 2552151497Sru 2553151497Srustatic void 2554151497Srupmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 2555151497Sru{ 2556151497Sru struct md_page *pvh; 2557151497Sru 2558151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2559151497Sru pmap_pvh_free(&m->md, pmap, va); 2560151497Sru if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { 2561151497Sru pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2562151497Sru if (TAILQ_EMPTY(&pvh->pv_list)) 2563151497Sru vm_page_aflag_clear(m, PGA_WRITEABLE); 2564151497Sru } 2565151497Sru} 2566151497Sru 2567151497Sru/* 2568151497Sru * Create a pv entry for page at pa for 2569151497Sru * (pmap, va). 2570151497Sru */ 2571151497Srustatic void 2572151497Srupmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2573151497Sru{ 2574151497Sru pv_entry_t pv; 2575151497Sru 2576151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2577151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2578151497Sru pv = get_pv_entry(pmap, FALSE); 2579151497Sru pv->pv_va = va; 2580151497Sru TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2581151497Sru} 2582151497Sru 2583151497Sru/* 2584151497Sru * Conditionally create a pv entry. 2585151497Sru */ 2586151497Srustatic boolean_t 2587151497Srupmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2588151497Sru{ 2589151497Sru pv_entry_t pv; 2590151497Sru 2591151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2592151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2593151497Sru if (pv_entry_count < pv_entry_high_water && 2594151497Sru (pv = get_pv_entry(pmap, TRUE)) != NULL) { 2595151497Sru pv->pv_va = va; 2596151497Sru TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2597151497Sru return (TRUE); 2598151497Sru } else 2599151497Sru return (FALSE); 2600151497Sru} 2601151497Sru 2602151497Sru/* 2603151497Sru * Create the pv entries for each of the pages within a superpage. 2604151497Sru */ 2605151497Srustatic boolean_t 2606151497Srupmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa) 2607151497Sru{ 2608151497Sru struct md_page *pvh; 2609151497Sru pv_entry_t pv; 2610151497Sru 2611151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2612151497Sru if (pv_entry_count < pv_entry_high_water && 2613151497Sru (pv = get_pv_entry(pmap, TRUE)) != NULL) { 2614151497Sru pv->pv_va = va; 2615151497Sru pvh = pa_to_pvh(pa); 2616151497Sru TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_list); 2617151497Sru return (TRUE); 2618151497Sru } else 2619151497Sru return (FALSE); 2620151497Sru} 2621151497Sru 2622151497Sru/* 2623151497Sru * Fills a page table page with mappings to consecutive physical pages. 2624151497Sru */ 2625151497Srustatic void 2626151497Srupmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) 2627151497Sru{ 2628151497Sru pt_entry_t *pte; 2629151497Sru 2630151497Sru for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { 2631151497Sru *pte = newpte; 2632151497Sru newpte += PAGE_SIZE; 2633151497Sru } 2634151497Sru} 2635151497Sru 2636151497Sru/* 2637151497Sru * Tries to demote a 2- or 4MB page mapping. If demotion fails, the 2638151497Sru * 2- or 4MB page mapping is invalidated. 2639151497Sru */ 2640151497Srustatic boolean_t 2641151497Srupmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) 2642151497Sru{ 2643151497Sru pd_entry_t newpde, oldpde; 2644151497Sru pt_entry_t *firstpte, newpte; 2645151497Sru vm_paddr_t mptepa; 2646151497Sru vm_page_t free, mpte; 2647151497Sru 2648151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2649151497Sru oldpde = *pde; 2650151497Sru KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), 2651151497Sru ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); 2652151497Sru mpte = pmap_lookup_pt_page(pmap, va); 2653151497Sru if (mpte != NULL) 2654151497Sru pmap_remove_pt_page(pmap, mpte); 2655151497Sru else { 2656151497Sru KASSERT((oldpde & PG_W) == 0, 2657151497Sru ("pmap_demote_pde: page table page for a wired mapping" 2658151497Sru " is missing")); 2659151497Sru 2660151497Sru /* 2661151497Sru * Invalidate the 2- or 4MB page mapping and return 2662151497Sru * "failure" if the mapping was never accessed or the 2663151497Sru * allocation of the new page table page fails. 2664151497Sru */ 2665151497Sru if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, 2666151497Sru va >> PDRSHIFT, VM_ALLOC_NOOBJ | VM_ALLOC_NORMAL | 2667151497Sru VM_ALLOC_WIRED)) == NULL) { 2668151497Sru free = NULL; 2669151497Sru pmap_remove_pde(pmap, pde, trunc_4mpage(va), &free); 2670151497Sru pmap_invalidate_page(pmap, trunc_4mpage(va)); 2671151497Sru pmap_free_zero_pages(free); 2672151497Sru CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#x" 2673151497Sru " in pmap %p", va, pmap); 2674151497Sru return (FALSE); 2675151497Sru } 2676151497Sru if (va < VM_MAXUSER_ADDRESS) 2677151497Sru pmap->pm_stats.resident_count++; 2678151497Sru } 2679151497Sru mptepa = VM_PAGE_TO_PHYS(mpte); 2680151497Sru 2681151497Sru /* 2682151497Sru * If the page mapping is in the kernel's address space, then the 2683151497Sru * KPTmap can provide access to the page table page. Otherwise, 2684151497Sru * temporarily map the page table page (mpte) into the kernel's 2685151497Sru * address space at either PADDR1 or PADDR2. 2686151497Sru */ 2687151497Sru if (va >= KERNBASE) 2688151497Sru firstpte = &KPTmap[i386_btop(trunc_4mpage(va))]; 2689151497Sru else if (curthread->td_pinned > 0 && mtx_owned(&vm_page_queue_mtx)) { 2690151497Sru if ((*PMAP1 & PG_FRAME) != mptepa) { 2691151497Sru *PMAP1 = mptepa | PG_RW | PG_V | PG_A | PG_M; 2692151497Sru#ifdef SMP 2693151497Sru PMAP1cpu = PCPU_GET(cpuid); 2694151497Sru#endif 2695151497Sru invlcaddr(PADDR1); 2696151497Sru PMAP1changed++; 2697151497Sru } else 2698151497Sru#ifdef SMP 2699151497Sru if (PMAP1cpu != PCPU_GET(cpuid)) { 2700151497Sru PMAP1cpu = PCPU_GET(cpuid); 2701151497Sru invlcaddr(PADDR1); 2702151497Sru PMAP1changedcpu++; 2703151497Sru } else 2704151497Sru#endif 2705151497Sru PMAP1unchanged++; 2706151497Sru firstpte = PADDR1; 2707151497Sru } else { 2708151497Sru mtx_lock(&PMAP2mutex); 2709151497Sru if ((*PMAP2 & PG_FRAME) != mptepa) { 2710151497Sru *PMAP2 = mptepa | PG_RW | PG_V | PG_A | PG_M; 2711151497Sru pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); 2712151497Sru } 2713151497Sru firstpte = PADDR2; 2714151497Sru } 2715151497Sru newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; 2716151497Sru KASSERT((oldpde & PG_A) != 0, 2717151497Sru ("pmap_demote_pde: oldpde is missing PG_A")); 2718151497Sru KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, 2719151497Sru ("pmap_demote_pde: oldpde is missing PG_M")); 2720151497Sru newpte = oldpde & ~PG_PS; 2721151497Sru if ((newpte & PG_PDE_PAT) != 0) 2722151497Sru newpte ^= PG_PDE_PAT | PG_PTE_PAT; 2723151497Sru 2724151497Sru /* 2725151497Sru * If the page table page is new, initialize it. 2726151497Sru */ 2727151497Sru if (mpte->wire_count == 1) { 2728151497Sru mpte->wire_count = NPTEPG; 2729151497Sru pmap_fill_ptp(firstpte, newpte); 2730151497Sru } 2731151497Sru KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), 2732151497Sru ("pmap_demote_pde: firstpte and newpte map different physical" 2733151497Sru " addresses")); 2734151497Sru 2735151497Sru /* 2736151497Sru * If the mapping has changed attributes, update the page table 2737151497Sru * entries. 2738151497Sru */ 2739151497Sru if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) 2740151497Sru pmap_fill_ptp(firstpte, newpte); 2741151497Sru 2742151497Sru /* 2743151497Sru * Demote the mapping. This pmap is locked. The old PDE has 2744151497Sru * PG_A set. If the old PDE has PG_RW set, it also has PG_M 2745151497Sru * set. Thus, there is no danger of a race with another 2746151497Sru * processor changing the setting of PG_A and/or PG_M between 2747151497Sru * the read above and the store below. 2748151497Sru */ 2749151497Sru if (workaround_erratum383) 2750151497Sru pmap_update_pde(pmap, va, pde, newpde); 2751151497Sru else if (pmap == kernel_pmap) 2752151497Sru pmap_kenter_pde(va, newpde); 2753151497Sru else 2754151497Sru pde_store(pde, newpde); 2755151497Sru if (firstpte == PADDR2) 2756151497Sru mtx_unlock(&PMAP2mutex); 2757151497Sru 2758151497Sru /* 2759151497Sru * Invalidate the recursive mapping of the page table page. 2760151497Sru */ 2761151497Sru pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); 2762151497Sru 2763151497Sru /* 2764151497Sru * Demote the pv entry. This depends on the earlier demotion 2765151497Sru * of the mapping. Specifically, the (re)creation of a per- 2766151497Sru * page pv entry might trigger the execution of pmap_collect(), 2767151497Sru * which might reclaim a newly (re)created per-page pv entry 2768151497Sru * and destroy the associated mapping. In order to destroy 2769151497Sru * the mapping, the PDE must have already changed from mapping 2770151497Sru * the 2mpage to referencing the page table page. 2771151497Sru */ 2772151497Sru if ((oldpde & PG_MANAGED) != 0) 2773151497Sru pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME); 2774151497Sru 2775151497Sru pmap_pde_demotions++; 2776151497Sru CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#x" 2777151497Sru " in pmap %p", va, pmap); 2778151497Sru return (TRUE); 2779151497Sru} 2780151497Sru 2781151497Sru/* 2782151497Sru * pmap_remove_pde: do the things to unmap a superpage in a process 2783151497Sru */ 2784151497Srustatic void 2785151497Srupmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, 2786151497Sru vm_page_t *free) 2787151497Sru{ 2788151497Sru struct md_page *pvh; 2789151497Sru pd_entry_t oldpde; 2790151497Sru vm_offset_t eva, va; 2791151497Sru vm_page_t m, mpte; 2792151497Sru 2793151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2794151497Sru KASSERT((sva & PDRMASK) == 0, 2795151497Sru ("pmap_remove_pde: sva is not 4mpage aligned")); 2796151497Sru oldpde = pte_load_clear(pdq); 2797151497Sru if (oldpde & PG_W) 2798151497Sru pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; 2799151497Sru 2800151497Sru /* 2801151497Sru * Machines that don't support invlpg, also don't support 2802151497Sru * PG_G. 2803151497Sru */ 2804151497Sru if (oldpde & PG_G) 2805151497Sru pmap_invalidate_page(kernel_pmap, sva); 2806151497Sru pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2807151497Sru if (oldpde & PG_MANAGED) { 2808151497Sru pvh = pa_to_pvh(oldpde & PG_PS_FRAME); 2809151497Sru pmap_pvh_free(pvh, pmap, sva); 2810151497Sru eva = sva + NBPDR; 2811151497Sru for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); 2812151497Sru va < eva; va += PAGE_SIZE, m++) { 2813151497Sru if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2814151497Sru vm_page_dirty(m); 2815151497Sru if (oldpde & PG_A) 2816151497Sru vm_page_aflag_set(m, PGA_REFERENCED); 2817151497Sru if (TAILQ_EMPTY(&m->md.pv_list) && 2818151497Sru TAILQ_EMPTY(&pvh->pv_list)) 2819151497Sru vm_page_aflag_clear(m, PGA_WRITEABLE); 2820151497Sru } 2821151497Sru } 2822151497Sru if (pmap == kernel_pmap) { 2823151497Sru if (!pmap_demote_pde(pmap, pdq, sva)) 2824151497Sru panic("pmap_remove_pde: failed demotion"); 2825151497Sru } else { 2826151497Sru mpte = pmap_lookup_pt_page(pmap, sva); 2827151497Sru if (mpte != NULL) { 2828151497Sru pmap_remove_pt_page(pmap, mpte); 2829151497Sru pmap->pm_stats.resident_count--; 2830151497Sru KASSERT(mpte->wire_count == NPTEPG, 2831151497Sru ("pmap_remove_pde: pte page wire count error")); 2832151497Sru mpte->wire_count = 0; 2833151497Sru pmap_add_delayed_free_list(mpte, free, FALSE); 2834151497Sru atomic_subtract_int(&cnt.v_wire_count, 1); 2835151497Sru } 2836151497Sru } 2837151497Sru} 2838151497Sru 2839151497Sru/* 2840151497Sru * pmap_remove_pte: do the things to unmap a page in a process 2841151497Sru */ 2842151497Srustatic int 2843151497Srupmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free) 2844151497Sru{ 2845151497Sru pt_entry_t oldpte; 2846151497Sru vm_page_t m; 2847151497Sru 2848151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2849151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2850151497Sru oldpte = pte_load_clear(ptq); 2851151497Sru if (oldpte & PG_W) 2852151497Sru pmap->pm_stats.wired_count -= 1; 2853151497Sru /* 2854151497Sru * Machines that don't support invlpg, also don't support 2855151497Sru * PG_G. 2856151497Sru */ 2857151497Sru if (oldpte & PG_G) 2858151497Sru pmap_invalidate_page(kernel_pmap, va); 2859151497Sru pmap->pm_stats.resident_count -= 1; 2860151497Sru if (oldpte & PG_MANAGED) { 2861151497Sru m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); 2862151497Sru if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2863151497Sru vm_page_dirty(m); 2864151497Sru if (oldpte & PG_A) 2865151497Sru vm_page_aflag_set(m, PGA_REFERENCED); 2866151497Sru pmap_remove_entry(pmap, m, va); 2867151497Sru } 2868151497Sru return (pmap_unuse_pt(pmap, va, free)); 2869151497Sru} 2870151497Sru 2871151497Sru/* 2872151497Sru * Remove a single page from a process address space 2873151497Sru */ 2874151497Srustatic void 2875151497Srupmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free) 2876151497Sru{ 2877151497Sru pt_entry_t *pte; 2878151497Sru 2879151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2880151497Sru KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 2881151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2882151497Sru if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0) 2883151497Sru return; 2884151497Sru pmap_remove_pte(pmap, pte, va, free); 2885151497Sru pmap_invalidate_page(pmap, va); 2886151497Sru} 2887151497Sru 2888151497Sru/* 2889151497Sru * Remove the given range of addresses from the specified map. 2890151497Sru * 2891151497Sru * It is assumed that the start and end are properly 2892151497Sru * rounded to the page size. 2893151497Sru */ 2894151497Sruvoid 2895151497Srupmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2896151497Sru{ 2897151497Sru vm_offset_t pdnxt; 2898151497Sru pd_entry_t ptpaddr; 2899151497Sru pt_entry_t *pte; 2900151497Sru vm_page_t free = NULL; 2901151497Sru int anyvalid; 2902151497Sru 2903151497Sru /* 2904151497Sru * Perform an unsynchronized read. This is, however, safe. 2905151497Sru */ 2906151497Sru if (pmap->pm_stats.resident_count == 0) 2907151497Sru return; 2908151497Sru 2909151497Sru anyvalid = 0; 2910151497Sru 2911151497Sru vm_page_lock_queues(); 2912151497Sru sched_pin(); 2913151497Sru PMAP_LOCK(pmap); 2914151497Sru 2915151497Sru /* 2916151497Sru * special handling of removing one page. a very 2917151497Sru * common operation and easy to short circuit some 2918151497Sru * code. 2919151497Sru */ 2920151497Sru if ((sva + PAGE_SIZE == eva) && 2921151497Sru ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 2922151497Sru pmap_remove_page(pmap, sva, &free); 2923151497Sru goto out; 2924151497Sru } 2925151497Sru 2926151497Sru for (; sva < eva; sva = pdnxt) { 2927151497Sru u_int pdirindex; 2928151497Sru 2929151497Sru /* 2930151497Sru * Calculate index for next page table. 2931151497Sru */ 2932151497Sru pdnxt = (sva + NBPDR) & ~PDRMASK; 2933151497Sru if (pdnxt < sva) 2934151497Sru pdnxt = eva; 2935151497Sru if (pmap->pm_stats.resident_count == 0) 2936151497Sru break; 2937151497Sru 2938151497Sru pdirindex = sva >> PDRSHIFT; 2939151497Sru ptpaddr = pmap->pm_pdir[pdirindex]; 2940151497Sru 2941151497Sru /* 2942151497Sru * Weed out invalid mappings. Note: we assume that the page 2943151497Sru * directory table is always allocated, and in kernel virtual. 2944151497Sru */ 2945151497Sru if (ptpaddr == 0) 2946151497Sru continue; 2947151497Sru 2948151497Sru /* 2949151497Sru * Check for large page. 2950151497Sru */ 2951151497Sru if ((ptpaddr & PG_PS) != 0) { 2952151497Sru /* 2953151497Sru * Are we removing the entire large page? If not, 2954151497Sru * demote the mapping and fall through. 2955151497Sru */ 2956151497Sru if (sva + NBPDR == pdnxt && eva >= pdnxt) { 2957151497Sru /* 2958151497Sru * The TLB entry for a PG_G mapping is 2959151497Sru * invalidated by pmap_remove_pde(). 2960151497Sru */ 2961151497Sru if ((ptpaddr & PG_G) == 0) 2962151497Sru anyvalid = 1; 2963151497Sru pmap_remove_pde(pmap, 2964151497Sru &pmap->pm_pdir[pdirindex], sva, &free); 2965151497Sru continue; 2966151497Sru } else if (!pmap_demote_pde(pmap, 2967151497Sru &pmap->pm_pdir[pdirindex], sva)) { 2968151497Sru /* The large page mapping was destroyed. */ 2969151497Sru continue; 2970151497Sru } 2971151497Sru } 2972151497Sru 2973151497Sru /* 2974151497Sru * Limit our scan to either the end of the va represented 2975151497Sru * by the current page table page, or to the end of the 2976151497Sru * range being removed. 2977151497Sru */ 2978151497Sru if (pdnxt > eva) 2979151497Sru pdnxt = eva; 2980151497Sru 2981151497Sru for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2982151497Sru sva += PAGE_SIZE) { 2983151497Sru if (*pte == 0) 2984151497Sru continue; 2985151497Sru 2986151497Sru /* 2987151497Sru * The TLB entry for a PG_G mapping is invalidated 2988151497Sru * by pmap_remove_pte(). 2989151497Sru */ 2990151497Sru if ((*pte & PG_G) == 0) 2991151497Sru anyvalid = 1; 2992151497Sru if (pmap_remove_pte(pmap, pte, sva, &free)) 2993151497Sru break; 2994151497Sru } 2995151497Sru } 2996151497Sruout: 2997151497Sru sched_unpin(); 2998151497Sru if (anyvalid) 2999151497Sru pmap_invalidate_all(pmap); 3000151497Sru vm_page_unlock_queues(); 3001151497Sru PMAP_UNLOCK(pmap); 3002151497Sru pmap_free_zero_pages(free); 3003151497Sru} 3004151497Sru 3005151497Sru/* 3006151497Sru * Routine: pmap_remove_all 3007151497Sru * Function: 3008151497Sru * Removes this physical page from 3009151497Sru * all physical maps in which it resides. 3010151497Sru * Reflects back modify bits to the pager. 3011151497Sru * 3012151497Sru * Notes: 3013151497Sru * Original versions of this routine were very 3014151497Sru * inefficient because they iteratively called 3015151497Sru * pmap_remove (slow...) 3016151497Sru */ 3017151497Sru 3018151497Sruvoid 3019151497Srupmap_remove_all(vm_page_t m) 3020151497Sru{ 3021151497Sru struct md_page *pvh; 3022151497Sru pv_entry_t pv; 3023151497Sru pmap_t pmap; 3024151497Sru pt_entry_t *pte, tpte; 3025151497Sru pd_entry_t *pde; 3026151497Sru vm_offset_t va; 3027151497Sru vm_page_t free; 3028151497Sru 3029151497Sru KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3030151497Sru ("pmap_remove_all: page %p is not managed", m)); 3031151497Sru free = NULL; 3032151497Sru vm_page_lock_queues(); 3033151497Sru sched_pin(); 3034151497Sru if ((m->flags & PG_FICTITIOUS) != 0) 3035151497Sru goto small_mappings; 3036151497Sru pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3037151497Sru while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 3038151497Sru va = pv->pv_va; 3039151497Sru pmap = PV_PMAP(pv); 3040151497Sru PMAP_LOCK(pmap); 3041151497Sru pde = pmap_pde(pmap, va); 3042151497Sru (void)pmap_demote_pde(pmap, pde, va); 3043151497Sru PMAP_UNLOCK(pmap); 3044151497Sru } 3045151497Srusmall_mappings: 3046151497Sru while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3047151497Sru pmap = PV_PMAP(pv); 3048151497Sru PMAP_LOCK(pmap); 3049151497Sru pmap->pm_stats.resident_count--; 3050151497Sru pde = pmap_pde(pmap, pv->pv_va); 3051151497Sru KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" 3052151497Sru " a 4mpage in page %p's pv list", m)); 3053151497Sru pte = pmap_pte_quick(pmap, pv->pv_va); 3054151497Sru tpte = pte_load_clear(pte); 3055151497Sru if (tpte & PG_W) 3056151497Sru pmap->pm_stats.wired_count--; 3057151497Sru if (tpte & PG_A) 3058151497Sru vm_page_aflag_set(m, PGA_REFERENCED); 3059151497Sru 3060151497Sru /* 3061151497Sru * Update the vm_page_t clean and reference bits. 3062151497Sru */ 3063151497Sru if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 3064151497Sru vm_page_dirty(m); 3065151497Sru pmap_unuse_pt(pmap, pv->pv_va, &free); 3066151497Sru pmap_invalidate_page(pmap, pv->pv_va); 3067151497Sru TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3068151497Sru free_pv_entry(pmap, pv); 3069151497Sru PMAP_UNLOCK(pmap); 3070151497Sru } 3071151497Sru vm_page_aflag_clear(m, PGA_WRITEABLE); 3072151497Sru sched_unpin(); 3073151497Sru vm_page_unlock_queues(); 3074151497Sru pmap_free_zero_pages(free); 3075151497Sru} 3076151497Sru 3077151497Sru/* 3078151497Sru * pmap_protect_pde: do the things to protect a 4mpage in a process 3079151497Sru */ 3080151497Srustatic boolean_t 3081151497Srupmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot) 3082151497Sru{ 3083151497Sru pd_entry_t newpde, oldpde; 3084151497Sru vm_offset_t eva, va; 3085151497Sru vm_page_t m; 3086151497Sru boolean_t anychanged; 3087151497Sru 3088151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3089151497Sru KASSERT((sva & PDRMASK) == 0, 3090151497Sru ("pmap_protect_pde: sva is not 4mpage aligned")); 3091151497Sru anychanged = FALSE; 3092151497Sruretry: 3093151497Sru oldpde = newpde = *pde; 3094151497Sru if (oldpde & PG_MANAGED) { 3095151497Sru eva = sva + NBPDR; 3096151497Sru for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); 3097151497Sru va < eva; va += PAGE_SIZE, m++) 3098151497Sru if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) 3099151497Sru vm_page_dirty(m); 3100151497Sru } 3101151497Sru if ((prot & VM_PROT_WRITE) == 0) 3102151497Sru newpde &= ~(PG_RW | PG_M); 3103151497Sru#ifdef PAE 3104151497Sru if ((prot & VM_PROT_EXECUTE) == 0) 3105151497Sru newpde |= pg_nx; 3106151497Sru#endif 3107151497Sru if (newpde != oldpde) { 3108151497Sru if (!pde_cmpset(pde, oldpde, newpde)) 3109151497Sru goto retry; 3110151497Sru if (oldpde & PG_G) 3111151497Sru pmap_invalidate_page(pmap, sva); 3112151497Sru else 3113151497Sru anychanged = TRUE; 3114151497Sru } 3115151497Sru return (anychanged); 3116151497Sru} 3117151497Sru 3118151497Sru/* 3119151497Sru * Set the physical protection on the 3120151497Sru * specified range of this map as requested. 3121151497Sru */ 3122151497Sruvoid 3123151497Srupmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 3124151497Sru{ 3125151497Sru vm_offset_t pdnxt; 3126151497Sru pd_entry_t ptpaddr; 3127151497Sru pt_entry_t *pte; 3128151497Sru boolean_t anychanged, pv_lists_locked; 3129151497Sru 3130151497Sru if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 3131151497Sru pmap_remove(pmap, sva, eva); 3132151497Sru return; 3133151497Sru } 3134151497Sru 3135151497Sru#ifdef PAE 3136151497Sru if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == 3137151497Sru (VM_PROT_WRITE|VM_PROT_EXECUTE)) 3138151497Sru return; 3139151497Sru#else 3140151497Sru if (prot & VM_PROT_WRITE) 3141151497Sru return; 3142151497Sru#endif 3143151497Sru 3144151497Sru if (pmap_is_current(pmap)) 3145151497Sru pv_lists_locked = FALSE; 3146151497Sru else { 3147151497Sru pv_lists_locked = TRUE; 3148151497Sruresume: 3149151497Sru vm_page_lock_queues(); 3150151497Sru sched_pin(); 3151151497Sru } 3152151497Sru anychanged = FALSE; 3153151497Sru 3154151497Sru PMAP_LOCK(pmap); 3155151497Sru for (; sva < eva; sva = pdnxt) { 3156151497Sru pt_entry_t obits, pbits; 3157151497Sru u_int pdirindex; 3158151497Sru 3159151497Sru pdnxt = (sva + NBPDR) & ~PDRMASK; 3160151497Sru if (pdnxt < sva) 3161151497Sru pdnxt = eva; 3162151497Sru 3163151497Sru pdirindex = sva >> PDRSHIFT; 3164151497Sru ptpaddr = pmap->pm_pdir[pdirindex]; 3165151497Sru 3166151497Sru /* 3167151497Sru * Weed out invalid mappings. Note: we assume that the page 3168151497Sru * directory table is always allocated, and in kernel virtual. 3169151497Sru */ 3170151497Sru if (ptpaddr == 0) 3171151497Sru continue; 3172151497Sru 3173151497Sru /* 3174151497Sru * Check for large page. 3175151497Sru */ 3176151497Sru if ((ptpaddr & PG_PS) != 0) { 3177151497Sru /* 3178151497Sru * Are we protecting the entire large page? If not, 3179151497Sru * demote the mapping and fall through. 3180151497Sru */ 3181151497Sru if (sva + NBPDR == pdnxt && eva >= pdnxt) { 3182151497Sru /* 3183151497Sru * The TLB entry for a PG_G mapping is 3184151497Sru * invalidated by pmap_protect_pde(). 3185151497Sru */ 3186151497Sru if (pmap_protect_pde(pmap, 3187151497Sru &pmap->pm_pdir[pdirindex], sva, prot)) 3188151497Sru anychanged = TRUE; 3189151497Sru continue; 3190151497Sru } else { 3191151497Sru if (!pv_lists_locked) { 3192151497Sru pv_lists_locked = TRUE; 3193151497Sru if (!mtx_trylock(&vm_page_queue_mtx)) { 3194151497Sru if (anychanged) 3195151497Sru pmap_invalidate_all( 3196151497Sru pmap); 3197151497Sru PMAP_UNLOCK(pmap); 3198151497Sru goto resume; 3199151497Sru } 3200151497Sru } 3201151497Sru if (!pmap_demote_pde(pmap, 3202151497Sru &pmap->pm_pdir[pdirindex], sva)) { 3203151497Sru /* 3204151497Sru * The large page mapping was 3205151497Sru * destroyed. 3206151497Sru */ 3207151497Sru continue; 3208151497Sru } 3209151497Sru } 3210151497Sru } 3211151497Sru 3212151497Sru if (pdnxt > eva) 3213151497Sru pdnxt = eva; 3214151497Sru 3215151497Sru for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 3216151497Sru sva += PAGE_SIZE) { 3217151497Sru vm_page_t m; 3218151497Sru 3219151497Sruretry: 3220151497Sru /* 3221151497Sru * Regardless of whether a pte is 32 or 64 bits in 3222151497Sru * size, PG_RW, PG_A, and PG_M are among the least 3223151497Sru * significant 32 bits. 3224151497Sru */ 3225151497Sru obits = pbits = *pte; 3226151497Sru if ((pbits & PG_V) == 0) 3227151497Sru continue; 3228151497Sru 3229151497Sru if ((prot & VM_PROT_WRITE) == 0) { 3230151497Sru if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == 3231151497Sru (PG_MANAGED | PG_M | PG_RW)) { 3232151497Sru m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); 3233151497Sru vm_page_dirty(m); 3234151497Sru } 3235151497Sru pbits &= ~(PG_RW | PG_M); 3236151497Sru } 3237151497Sru#ifdef PAE 3238151497Sru if ((prot & VM_PROT_EXECUTE) == 0) 3239151497Sru pbits |= pg_nx; 3240151497Sru#endif 3241151497Sru 3242151497Sru if (pbits != obits) { 3243151497Sru#ifdef PAE 3244151497Sru if (!atomic_cmpset_64(pte, obits, pbits)) 3245151497Sru goto retry; 3246151497Sru#else 3247151497Sru if (!atomic_cmpset_int((u_int *)pte, obits, 3248151497Sru pbits)) 3249151497Sru goto retry; 3250151497Sru#endif 3251151497Sru if (obits & PG_G) 3252151497Sru pmap_invalidate_page(pmap, sva); 3253151497Sru else 3254151497Sru anychanged = TRUE; 3255151497Sru } 3256151497Sru } 3257151497Sru } 3258151497Sru if (anychanged) 3259151497Sru pmap_invalidate_all(pmap); 3260151497Sru if (pv_lists_locked) { 3261151497Sru sched_unpin(); 3262151497Sru vm_page_unlock_queues(); 3263151497Sru } 3264151497Sru PMAP_UNLOCK(pmap); 3265151497Sru} 3266151497Sru 3267151497Sru/* 3268151497Sru * Tries to promote the 512 or 1024, contiguous 4KB page mappings that are 3269151497Sru * within a single page table page (PTP) to a single 2- or 4MB page mapping. 3270151497Sru * For promotion to occur, two conditions must be met: (1) the 4KB page 3271151497Sru * mappings must map aligned, contiguous physical memory and (2) the 4KB page 3272151497Sru * mappings must have identical characteristics. 3273151497Sru * 3274151497Sru * Managed (PG_MANAGED) mappings within the kernel address space are not 3275151497Sru * promoted. The reason is that kernel PDEs are replicated in each pmap but 3276151497Sru * pmap_clear_ptes() and pmap_ts_referenced() only read the PDE from the kernel 3277151497Sru * pmap. 3278151497Sru */ 3279151497Srustatic void 3280151497Srupmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) 3281151497Sru{ 3282151497Sru pd_entry_t newpde; 3283151497Sru pt_entry_t *firstpte, oldpte, pa, *pte; 3284151497Sru vm_offset_t oldpteva; 3285151497Sru vm_page_t mpte; 3286151497Sru 3287151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3288151497Sru 3289151497Sru /* 3290151497Sru * Examine the first PTE in the specified PTP. Abort if this PTE is 3291151497Sru * either invalid, unused, or does not map the first 4KB physical page 3292151497Sru * within a 2- or 4MB page. 3293151497Sru */ 3294151497Sru firstpte = pmap_pte_quick(pmap, trunc_4mpage(va)); 3295151497Srusetpde: 3296151497Sru newpde = *firstpte; 3297151497Sru if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { 3298151497Sru pmap_pde_p_failures++; 3299151497Sru CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3300151497Sru " in pmap %p", va, pmap); 3301151497Sru return; 3302151497Sru } 3303151497Sru if ((*firstpte & PG_MANAGED) != 0 && pmap == kernel_pmap) { 3304151497Sru pmap_pde_p_failures++; 3305151497Sru CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3306151497Sru " in pmap %p", va, pmap); 3307151497Sru return; 3308151497Sru } 3309151497Sru if ((newpde & (PG_M | PG_RW)) == PG_RW) { 3310151497Sru /* 3311151497Sru * When PG_M is already clear, PG_RW can be cleared without 3312151497Sru * a TLB invalidation. 3313151497Sru */ 3314151497Sru if (!atomic_cmpset_int((u_int *)firstpte, newpde, newpde & 3315151497Sru ~PG_RW)) 3316151497Sru goto setpde; 3317151497Sru newpde &= ~PG_RW; 3318151497Sru } 3319151497Sru 3320151497Sru /* 3321151497Sru * Examine each of the other PTEs in the specified PTP. Abort if this 3322151497Sru * PTE maps an unexpected 4KB physical page or does not have identical 3323151497Sru * characteristics to the first PTE. 3324151497Sru */ 3325151497Sru pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; 3326151497Sru for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { 3327151497Srusetpte: 3328151497Sru oldpte = *pte; 3329151497Sru if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { 3330151497Sru pmap_pde_p_failures++; 3331151497Sru CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3332151497Sru " in pmap %p", va, pmap); 3333151497Sru return; 3334151497Sru } 3335151497Sru if ((oldpte & (PG_M | PG_RW)) == PG_RW) { 3336151497Sru /* 3337151497Sru * When PG_M is already clear, PG_RW can be cleared 3338151497Sru * without a TLB invalidation. 3339151497Sru */ 3340151497Sru if (!atomic_cmpset_int((u_int *)pte, oldpte, 3341151497Sru oldpte & ~PG_RW)) 3342151497Sru goto setpte; 3343151497Sru oldpte &= ~PG_RW; 3344151497Sru oldpteva = (oldpte & PG_FRAME & PDRMASK) | 3345151497Sru (va & ~PDRMASK); 3346151497Sru CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#x" 3347151497Sru " in pmap %p", oldpteva, pmap); 3348151497Sru } 3349151497Sru if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { 3350151497Sru pmap_pde_p_failures++; 3351151497Sru CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#x" 3352151497Sru " in pmap %p", va, pmap); 3353151497Sru return; 3354151497Sru } 3355151497Sru pa -= PAGE_SIZE; 3356151497Sru } 3357151497Sru 3358151497Sru /* 3359151497Sru * Save the page table page in its current state until the PDE 3360151497Sru * mapping the superpage is demoted by pmap_demote_pde() or 3361151497Sru * destroyed by pmap_remove_pde(). 3362151497Sru */ 3363151497Sru mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); 3364151497Sru KASSERT(mpte >= vm_page_array && 3365151497Sru mpte < &vm_page_array[vm_page_array_size], 3366151497Sru ("pmap_promote_pde: page table page is out of range")); 3367151497Sru KASSERT(mpte->pindex == va >> PDRSHIFT, 3368151497Sru ("pmap_promote_pde: page table page's pindex is wrong")); 3369151497Sru pmap_insert_pt_page(pmap, mpte); 3370151497Sru 3371151497Sru /* 3372151497Sru * Promote the pv entries. 3373151497Sru */ 3374151497Sru if ((newpde & PG_MANAGED) != 0) 3375151497Sru pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME); 3376151497Sru 3377151497Sru /* 3378151497Sru * Propagate the PAT index to its proper position. 3379151497Sru */ 3380151497Sru if ((newpde & PG_PTE_PAT) != 0) 3381151497Sru newpde ^= PG_PDE_PAT | PG_PTE_PAT; 3382151497Sru 3383151497Sru /* 3384151497Sru * Map the superpage. 3385151497Sru */ 3386151497Sru if (workaround_erratum383) 3387151497Sru pmap_update_pde(pmap, va, pde, PG_PS | newpde); 3388151497Sru else if (pmap == kernel_pmap) 3389151497Sru pmap_kenter_pde(va, PG_PS | newpde); 3390151497Sru else 3391151497Sru pde_store(pde, PG_PS | newpde); 3392151497Sru 3393151497Sru pmap_pde_promotions++; 3394151497Sru CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#x" 3395151497Sru " in pmap %p", va, pmap); 3396151497Sru} 3397151497Sru 3398151497Sru/* 3399151497Sru * Insert the given physical page (p) at 3400151497Sru * the specified virtual address (v) in the 3401151497Sru * target physical map with the protection requested. 3402151497Sru * 3403151497Sru * If specified, the page will be wired down, meaning 3404151497Sru * that the related pte can not be reclaimed. 3405151497Sru * 3406151497Sru * NB: This is the only routine which MAY NOT lazy-evaluate 3407151497Sru * or lose information. That is, this routine must actually 3408151497Sru * insert this page into the given map NOW. 3409151497Sru */ 3410151497Sruvoid 3411151497Srupmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 3412151497Sru vm_prot_t prot, boolean_t wired) 3413151497Sru{ 3414151497Sru pd_entry_t *pde; 3415151497Sru pt_entry_t *pte; 3416151497Sru pt_entry_t newpte, origpte; 3417151497Sru pv_entry_t pv; 3418151497Sru vm_paddr_t opa, pa; 3419151497Sru vm_page_t mpte, om; 3420151497Sru boolean_t invlva; 3421151497Sru 3422151497Sru va = trunc_page(va); 3423151497Sru KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 3424151497Sru KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, 3425151497Sru ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", 3426151497Sru va)); 3427151497Sru KASSERT((m->oflags & (VPO_UNMANAGED | VPO_BUSY)) != 0 || 3428151497Sru VM_OBJECT_LOCKED(m->object), 3429151497Sru ("pmap_enter: page %p is not busy", m)); 3430151497Sru 3431151497Sru mpte = NULL; 3432151497Sru 3433151497Sru vm_page_lock_queues(); 3434151497Sru PMAP_LOCK(pmap); 3435151497Sru sched_pin(); 3436151497Sru 3437151497Sru /* 3438151497Sru * In the case that a page table page is not 3439151497Sru * resident, we are creating it here. 3440151497Sru */ 3441151497Sru if (va < VM_MAXUSER_ADDRESS) { 3442151497Sru mpte = pmap_allocpte(pmap, va, M_WAITOK); 3443151497Sru } 3444151497Sru 3445151497Sru pde = pmap_pde(pmap, va); 3446151497Sru if ((*pde & PG_PS) != 0) 3447151497Sru panic("pmap_enter: attempted pmap_enter on 4MB page"); 3448151497Sru pte = pmap_pte_quick(pmap, va); 3449151497Sru 3450151497Sru /* 3451151497Sru * Page Directory table entry not valid, we need a new PT page 3452151497Sru */ 3453151497Sru if (pte == NULL) { 3454151497Sru panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", 3455151497Sru (uintmax_t)pmap->pm_pdir[PTDPTDI], va); 3456151497Sru } 3457151497Sru 3458151497Sru pa = VM_PAGE_TO_PHYS(m); 3459151497Sru om = NULL; 3460151497Sru origpte = *pte; 3461151497Sru opa = origpte & PG_FRAME; 3462151497Sru 3463151497Sru /* 3464151497Sru * Mapping has not changed, must be protection or wiring change. 3465151497Sru */ 3466151497Sru if (origpte && (opa == pa)) { 3467151497Sru /* 3468151497Sru * Wiring change, just update stats. We don't worry about 3469151497Sru * wiring PT pages as they remain resident as long as there 3470151497Sru * are valid mappings in them. Hence, if a user page is wired, 3471151497Sru * the PT page will be also. 3472151497Sru */ 3473151497Sru if (wired && ((origpte & PG_W) == 0)) 3474151497Sru pmap->pm_stats.wired_count++; 3475151497Sru else if (!wired && (origpte & PG_W)) 3476151497Sru pmap->pm_stats.wired_count--; 3477151497Sru 3478151497Sru /* 3479151497Sru * Remove extra pte reference 3480151497Sru */ 3481151497Sru if (mpte) 3482151497Sru mpte->wire_count--; 3483151497Sru 3484151497Sru if (origpte & PG_MANAGED) { 3485151497Sru om = m; 3486151497Sru pa |= PG_MANAGED; 3487151497Sru } 3488151497Sru goto validate; 3489151497Sru } 3490151497Sru 3491151497Sru pv = NULL; 3492151497Sru 3493151497Sru /* 3494151497Sru * Mapping has changed, invalidate old range and fall through to 3495151497Sru * handle validating new mapping. 3496151497Sru */ 3497151497Sru if (opa) { 3498151497Sru if (origpte & PG_W) 3499151497Sru pmap->pm_stats.wired_count--; 3500151497Sru if (origpte & PG_MANAGED) { 3501151497Sru om = PHYS_TO_VM_PAGE(opa); 3502151497Sru pv = pmap_pvh_remove(&om->md, pmap, va); 3503151497Sru } 3504151497Sru if (mpte != NULL) { 3505151497Sru mpte->wire_count--; 3506151497Sru KASSERT(mpte->wire_count > 0, 3507151497Sru ("pmap_enter: missing reference to page table page," 3508151497Sru " va: 0x%x", va)); 3509151497Sru } 3510151497Sru } else 3511151497Sru pmap->pm_stats.resident_count++; 3512151497Sru 3513151497Sru /* 3514151497Sru * Enter on the PV list if part of our managed memory. 3515151497Sru */ 3516151497Sru if ((m->oflags & VPO_UNMANAGED) == 0) { 3517151497Sru KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 3518151497Sru ("pmap_enter: managed mapping within the clean submap")); 3519151497Sru if (pv == NULL) 3520151497Sru pv = get_pv_entry(pmap, FALSE); 3521151497Sru pv->pv_va = va; 3522151497Sru TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3523151497Sru pa |= PG_MANAGED; 3524151497Sru } else if (pv != NULL) 3525151497Sru free_pv_entry(pmap, pv); 3526151497Sru 3527151497Sru /* 3528151497Sru * Increment counters 3529151497Sru */ 3530151497Sru if (wired) 3531151497Sru pmap->pm_stats.wired_count++; 3532151497Sru 3533151497Sruvalidate: 3534151497Sru /* 3535151497Sru * Now validate mapping with desired protection/wiring. 3536151497Sru */ 3537151497Sru newpte = (pt_entry_t)(pa | pmap_cache_bits(m->md.pat_mode, 0) | PG_V); 3538151497Sru if ((prot & VM_PROT_WRITE) != 0) { 3539151497Sru newpte |= PG_RW; 3540151497Sru if ((newpte & PG_MANAGED) != 0) 3541151497Sru vm_page_aflag_set(m, PGA_WRITEABLE); 3542151497Sru } 3543151497Sru#ifdef PAE 3544151497Sru if ((prot & VM_PROT_EXECUTE) == 0) 3545151497Sru newpte |= pg_nx; 3546151497Sru#endif 3547151497Sru if (wired) 3548151497Sru newpte |= PG_W; 3549151497Sru if (va < VM_MAXUSER_ADDRESS) 3550151497Sru newpte |= PG_U; 3551151497Sru if (pmap == kernel_pmap) 3552151497Sru newpte |= pgeflag; 3553151497Sru 3554151497Sru /* 3555151497Sru * if the mapping or permission bits are different, we need 3556151497Sru * to update the pte. 3557151497Sru */ 3558151497Sru if ((origpte & ~(PG_M|PG_A)) != newpte) { 3559151497Sru newpte |= PG_A; 3560151497Sru if ((access & VM_PROT_WRITE) != 0) 3561151497Sru newpte |= PG_M; 3562151497Sru if (origpte & PG_V) { 3563151497Sru invlva = FALSE; 3564151497Sru origpte = pte_load_store(pte, newpte); 3565151497Sru if (origpte & PG_A) { 3566151497Sru if (origpte & PG_MANAGED) 3567151497Sru vm_page_aflag_set(om, PGA_REFERENCED); 3568151497Sru if (opa != VM_PAGE_TO_PHYS(m)) 3569151497Sru invlva = TRUE; 3570151497Sru#ifdef PAE 3571151497Sru if ((origpte & PG_NX) == 0 && 3572151497Sru (newpte & PG_NX) != 0) 3573151497Sru invlva = TRUE; 3574151497Sru#endif 3575151497Sru } 3576151497Sru if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 3577151497Sru if ((origpte & PG_MANAGED) != 0) 3578151497Sru vm_page_dirty(om); 3579151497Sru if ((prot & VM_PROT_WRITE) == 0) 3580151497Sru invlva = TRUE; 3581151497Sru } 3582151497Sru if ((origpte & PG_MANAGED) != 0 && 3583151497Sru TAILQ_EMPTY(&om->md.pv_list) && 3584151497Sru ((om->flags & PG_FICTITIOUS) != 0 || 3585151497Sru TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) 3586151497Sru vm_page_aflag_clear(om, PGA_WRITEABLE); 3587151497Sru if (invlva) 3588151497Sru pmap_invalidate_page(pmap, va); 3589151497Sru } else 3590151497Sru pte_store(pte, newpte); 3591151497Sru } 3592151497Sru 3593151497Sru /* 3594151497Sru * If both the page table page and the reservation are fully 3595151497Sru * populated, then attempt promotion. 3596151497Sru */ 3597151497Sru if ((mpte == NULL || mpte->wire_count == NPTEPG) && 3598151497Sru pg_ps_enabled && (m->flags & PG_FICTITIOUS) == 0 && 3599151497Sru vm_reserv_level_iffullpop(m) == 0) 3600151497Sru pmap_promote_pde(pmap, pde, va); 3601151497Sru 3602151497Sru sched_unpin(); 3603151497Sru vm_page_unlock_queues(); 3604151497Sru PMAP_UNLOCK(pmap); 3605151497Sru} 3606151497Sru 3607151497Sru/* 3608151497Sru * Tries to create a 2- or 4MB page mapping. Returns TRUE if successful and 3609151497Sru * FALSE otherwise. Fails if (1) a page table page cannot be allocated without 3610151497Sru * blocking, (2) a mapping already exists at the specified virtual address, or 3611151497Sru * (3) a pv entry cannot be allocated without reclaiming another pv entry. 3612151497Sru */ 3613151497Srustatic boolean_t 3614151497Srupmap_enter_pde(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3615151497Sru{ 3616151497Sru pd_entry_t *pde, newpde; 3617151497Sru 3618151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3619151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3620151497Sru pde = pmap_pde(pmap, va); 3621151497Sru if (*pde != 0) { 3622151497Sru CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" 3623151497Sru " in pmap %p", va, pmap); 3624151497Sru return (FALSE); 3625151497Sru } 3626151497Sru newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 1) | 3627151497Sru PG_PS | PG_V; 3628151497Sru if ((m->oflags & VPO_UNMANAGED) == 0) { 3629151497Sru newpde |= PG_MANAGED; 3630151497Sru 3631151497Sru /* 3632151497Sru * Abort this mapping if its PV entry could not be created. 3633151497Sru */ 3634151497Sru if (!pmap_pv_insert_pde(pmap, va, VM_PAGE_TO_PHYS(m))) { 3635151497Sru CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" 3636151497Sru " in pmap %p", va, pmap); 3637151497Sru return (FALSE); 3638151497Sru } 3639151497Sru } 3640151497Sru#ifdef PAE 3641151497Sru if ((prot & VM_PROT_EXECUTE) == 0) 3642151497Sru newpde |= pg_nx; 3643151497Sru#endif 3644151497Sru if (va < VM_MAXUSER_ADDRESS) 3645151497Sru newpde |= PG_U; 3646151497Sru 3647151497Sru /* 3648151497Sru * Increment counters. 3649151497Sru */ 3650151497Sru pmap->pm_stats.resident_count += NBPDR / PAGE_SIZE; 3651151497Sru 3652151497Sru /* 3653151497Sru * Map the superpage. 3654151497Sru */ 3655151497Sru pde_store(pde, newpde); 3656151497Sru 3657151497Sru pmap_pde_mappings++; 3658151497Sru CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" 3659151497Sru " in pmap %p", va, pmap); 3660151497Sru return (TRUE); 3661151497Sru} 3662151497Sru 3663151497Sru/* 3664151497Sru * Maps a sequence of resident pages belonging to the same object. 3665151497Sru * The sequence begins with the given page m_start. This page is 3666151497Sru * mapped at the given virtual address start. Each subsequent page is 3667151497Sru * mapped at a virtual address that is offset from start by the same 3668151497Sru * amount as the page is offset from m_start within the object. The 3669151497Sru * last page in the sequence is the page with the largest offset from 3670151497Sru * m_start that can be mapped at a virtual address less than the given 3671151497Sru * virtual address end. Not every virtual page between start and end 3672151497Sru * is mapped; only those for which a resident page exists with the 3673151497Sru * corresponding offset from m_start are mapped. 3674151497Sru */ 3675151497Sruvoid 3676151497Srupmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3677151497Sru vm_page_t m_start, vm_prot_t prot) 3678151497Sru{ 3679151497Sru vm_offset_t va; 3680151497Sru vm_page_t m, mpte; 3681151497Sru vm_pindex_t diff, psize; 3682151497Sru 3683151497Sru VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 3684151497Sru psize = atop(end - start); 3685151497Sru mpte = NULL; 3686151497Sru m = m_start; 3687151497Sru vm_page_lock_queues(); 3688151497Sru PMAP_LOCK(pmap); 3689151497Sru while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3690151497Sru va = start + ptoa(diff); 3691151497Sru if ((va & PDRMASK) == 0 && va + NBPDR <= end && 3692151497Sru (VM_PAGE_TO_PHYS(m) & PDRMASK) == 0 && 3693151497Sru pg_ps_enabled && vm_reserv_level_iffullpop(m) == 0 && 3694151497Sru pmap_enter_pde(pmap, va, m, prot)) 3695151497Sru m = &m[NBPDR / PAGE_SIZE - 1]; 3696151497Sru else 3697151497Sru mpte = pmap_enter_quick_locked(pmap, va, m, prot, 3698151497Sru mpte); 3699151497Sru m = TAILQ_NEXT(m, listq); 3700151497Sru } 3701151497Sru vm_page_unlock_queues(); 3702151497Sru PMAP_UNLOCK(pmap); 3703151497Sru} 3704151497Sru 3705151497Sru/* 3706151497Sru * this code makes some *MAJOR* assumptions: 3707151497Sru * 1. Current pmap & pmap exists. 3708151497Sru * 2. Not wired. 3709151497Sru * 3. Read access. 3710151497Sru * 4. No page table pages. 3711151497Sru * but is *MUCH* faster than pmap_enter... 3712151497Sru */ 3713151497Sru 3714151497Sruvoid 3715151497Srupmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3716151497Sru{ 3717151497Sru 3718151497Sru vm_page_lock_queues(); 3719151497Sru PMAP_LOCK(pmap); 3720151497Sru (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL); 3721151497Sru vm_page_unlock_queues(); 3722151497Sru PMAP_UNLOCK(pmap); 3723151497Sru} 3724151497Sru 3725151497Srustatic vm_page_t 3726151497Srupmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 3727151497Sru vm_prot_t prot, vm_page_t mpte) 3728151497Sru{ 3729151497Sru pt_entry_t *pte; 3730151497Sru vm_paddr_t pa; 3731151497Sru vm_page_t free; 3732151497Sru 3733151497Sru KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 3734151497Sru (m->oflags & VPO_UNMANAGED) != 0, 3735151497Sru ("pmap_enter_quick_locked: managed mapping within the clean submap")); 3736151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3737151497Sru PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3738151497Sru 3739151497Sru /* 3740151497Sru * In the case that a page table page is not 3741151497Sru * resident, we are creating it here. 3742151497Sru */ 3743151497Sru if (va < VM_MAXUSER_ADDRESS) { 3744151497Sru u_int ptepindex; 3745151497Sru pd_entry_t ptepa; 3746151497Sru 3747151497Sru /* 3748151497Sru * Calculate pagetable page index 3749151497Sru */ 3750151497Sru ptepindex = va >> PDRSHIFT; 3751151497Sru if (mpte && (mpte->pindex == ptepindex)) { 3752151497Sru mpte->wire_count++; 3753151497Sru } else { 3754151497Sru /* 3755151497Sru * Get the page directory entry 3756151497Sru */ 3757151497Sru ptepa = pmap->pm_pdir[ptepindex]; 3758151497Sru 3759151497Sru /* 3760151497Sru * If the page table page is mapped, we just increment 3761151497Sru * the hold count, and activate it. 3762151497Sru */ 3763151497Sru if (ptepa) { 3764151497Sru if (ptepa & PG_PS) 3765151497Sru return (NULL); 3766151497Sru mpte = PHYS_TO_VM_PAGE(ptepa & PG_FRAME); 3767151497Sru mpte->wire_count++; 3768151497Sru } else { 3769151497Sru mpte = _pmap_allocpte(pmap, ptepindex, 3770151497Sru M_NOWAIT); 3771151497Sru if (mpte == NULL) 3772151497Sru return (mpte); 3773151497Sru } 3774151497Sru } 3775151497Sru } else { 3776151497Sru mpte = NULL; 3777151497Sru } 3778151497Sru 3779151497Sru /* 3780151497Sru * This call to vtopte makes the assumption that we are 3781151497Sru * entering the page into the current pmap. In order to support 3782151497Sru * quick entry into any pmap, one would likely use pmap_pte_quick. 3783151497Sru * But that isn't as quick as vtopte. 3784151497Sru */ 3785151497Sru pte = vtopte(va); 3786151497Sru if (*pte) { 3787151497Sru if (mpte != NULL) { 3788151497Sru mpte->wire_count--; 3789151497Sru mpte = NULL; 3790151497Sru } 3791151497Sru return (mpte); 3792151497Sru } 3793151497Sru 3794151497Sru /* 3795151497Sru * Enter on the PV list if part of our managed memory. 3796151497Sru */ 3797151497Sru if ((m->oflags & VPO_UNMANAGED) == 0 && 3798151497Sru !pmap_try_insert_pv_entry(pmap, va, m)) { 3799151497Sru if (mpte != NULL) { 3800151497Sru free = NULL; 3801151497Sru if (pmap_unwire_pte_hold(pmap, mpte, &free)) { 3802151497Sru pmap_invalidate_page(pmap, va); 3803151497Sru pmap_free_zero_pages(free); 3804151497Sru } 3805151497Sru 3806151497Sru mpte = NULL; 3807151497Sru } 3808151497Sru return (mpte); 3809151497Sru } 3810151497Sru 3811151497Sru /* 3812151497Sru * Increment counters 3813151497Sru */ 3814151497Sru pmap->pm_stats.resident_count++; 3815151497Sru 3816151497Sru pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(m->md.pat_mode, 0); 3817151497Sru#ifdef PAE 3818151497Sru if ((prot & VM_PROT_EXECUTE) == 0) 3819151497Sru pa |= pg_nx; 3820151497Sru#endif 3821151497Sru 3822151497Sru /* 3823151497Sru * Now validate mapping with RO protection 3824151497Sru */ 3825151497Sru if ((m->oflags & VPO_UNMANAGED) != 0) 3826151497Sru pte_store(pte, pa | PG_V | PG_U); 3827151497Sru else 3828151497Sru pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 3829151497Sru return (mpte); 3830151497Sru} 3831151497Sru 3832151497Sru/* 3833151497Sru * Make a temporary mapping for a physical address. This is only intended 3834151497Sru * to be used for panic dumps. 3835151497Sru */ 3836151497Sruvoid * 3837151497Srupmap_kenter_temporary(vm_paddr_t pa, int i) 3838151497Sru{ 3839151497Sru vm_offset_t va; 3840151497Sru 3841151497Sru va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 3842151497Sru pmap_kenter(va, pa); 3843151497Sru invlpg(va); 3844151497Sru return ((void *)crashdumpmap); 3845151497Sru} 3846151497Sru 3847151497Sru/* 3848151497Sru * This code maps large physical mmap regions into the 3849151497Sru * processor address space. Note that some shortcuts 3850151497Sru * are taken, but the code works. 3851151497Sru */ 3852151497Sruvoid 3853151497Srupmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 3854151497Sru vm_pindex_t pindex, vm_size_t size) 3855151497Sru{ 3856151497Sru pd_entry_t *pde; 3857151497Sru vm_paddr_t pa, ptepa; 3858151497Sru vm_page_t p; 3859151497Sru int pat_mode; 3860151497Sru 3861151497Sru VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 3862151497Sru KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3863151497Sru ("pmap_object_init_pt: non-device object")); 3864151497Sru if (pseflag && 3865151497Sru (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { 3866151497Sru if (!vm_object_populate(object, pindex, pindex + atop(size))) 3867151497Sru return; 3868151497Sru p = vm_page_lookup(object, pindex); 3869151497Sru KASSERT(p->valid == VM_PAGE_BITS_ALL, 3870151497Sru ("pmap_object_init_pt: invalid page %p", p)); 3871151497Sru pat_mode = p->md.pat_mode; 3872151497Sru 3873151497Sru /* 3874151497Sru * Abort the mapping if the first page is not physically 3875151497Sru * aligned to a 2/4MB page boundary. 3876151497Sru */ 3877151497Sru ptepa = VM_PAGE_TO_PHYS(p); 3878151497Sru if (ptepa & (NBPDR - 1)) 3879151497Sru return; 3880151497Sru 3881151497Sru /* 3882151497Sru * Skip the first page. Abort the mapping if the rest of 3883151497Sru * the pages are not physically contiguous or have differing 3884151497Sru * memory attributes. 3885151497Sru */ 3886151497Sru p = TAILQ_NEXT(p, listq); 3887151497Sru for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; 3888151497Sru pa += PAGE_SIZE) { 3889151497Sru KASSERT(p->valid == VM_PAGE_BITS_ALL, 3890151497Sru ("pmap_object_init_pt: invalid page %p", p)); 3891151497Sru if (pa != VM_PAGE_TO_PHYS(p) || 3892151497Sru pat_mode != p->md.pat_mode) 3893151497Sru return; 3894151497Sru p = TAILQ_NEXT(p, listq); 3895151497Sru } 3896151497Sru 3897151497Sru /* 3898151497Sru * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and 3899151497Sru * "size" is a multiple of 2/4M, adding the PAT setting to 3900151497Sru * "pa" will not affect the termination of this loop. 3901151497Sru */ 3902151497Sru PMAP_LOCK(pmap); 3903151497Sru for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + 3904151497Sru size; pa += NBPDR) { 3905151497Sru pde = pmap_pde(pmap, addr); 3906151497Sru if (*pde == 0) { 3907151497Sru pde_store(pde, pa | PG_PS | PG_M | PG_A | 3908151497Sru PG_U | PG_RW | PG_V); 3909151497Sru pmap->pm_stats.resident_count += NBPDR / 3910151497Sru PAGE_SIZE; 3911151497Sru pmap_pde_mappings++; 3912151497Sru } 3913151497Sru /* Else continue on if the PDE is already valid. */ 3914151497Sru addr += NBPDR; 3915151497Sru } 3916151497Sru PMAP_UNLOCK(pmap); 3917151497Sru } 3918151497Sru} 3919151497Sru 3920151497Sru/* 3921151497Sru * Routine: pmap_change_wiring 3922151497Sru * Function: Change the wiring attribute for a map/virtual-address 3923151497Sru * pair. 3924151497Sru * In/out conditions: 3925151497Sru * The mapping must already exist in the pmap. 3926151497Sru */ 3927151497Sruvoid 3928151497Srupmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 3929151497Sru{ 3930151497Sru pd_entry_t *pde; 3931151497Sru pt_entry_t *pte; 3932151497Sru boolean_t are_queues_locked; 3933151497Sru 3934151497Sru are_queues_locked = FALSE; 3935151497Sruretry: 3936151497Sru PMAP_LOCK(pmap); 3937151497Sru pde = pmap_pde(pmap, va); 3938151497Sru if ((*pde & PG_PS) != 0) { 3939151497Sru if (!wired != ((*pde & PG_W) == 0)) { 3940151497Sru if (!are_queues_locked) { 3941151497Sru are_queues_locked = TRUE; 3942151497Sru if (!mtx_trylock(&vm_page_queue_mtx)) { 3943151497Sru PMAP_UNLOCK(pmap); 3944151497Sru vm_page_lock_queues(); 3945151497Sru goto retry; 3946151497Sru } 3947151497Sru } 3948151497Sru if (!pmap_demote_pde(pmap, pde, va)) 3949151497Sru panic("pmap_change_wiring: demotion failed"); 3950151497Sru } else 3951151497Sru goto out; 3952151497Sru } 3953151497Sru pte = pmap_pte(pmap, va); 3954151497Sru 3955151497Sru if (wired && !pmap_pte_w(pte)) 3956151497Sru pmap->pm_stats.wired_count++; 3957151497Sru else if (!wired && pmap_pte_w(pte)) 3958151497Sru pmap->pm_stats.wired_count--; 3959151497Sru 3960151497Sru /* 3961151497Sru * Wiring is not a hardware characteristic so there is no need to 3962151497Sru * invalidate TLB. 3963151497Sru */ 3964151497Sru pmap_pte_set_w(pte, wired); 3965151497Sru pmap_pte_release(pte); 3966151497Sruout: 3967151497Sru if (are_queues_locked) 3968151497Sru vm_page_unlock_queues(); 3969151497Sru PMAP_UNLOCK(pmap); 3970151497Sru} 3971151497Sru 3972151497Sru 3973151497Sru 3974151497Sru/* 3975151497Sru * Copy the range specified by src_addr/len 3976151497Sru * from the source map to the range dst_addr/len 3977151497Sru * in the destination map. 3978151497Sru * 3979151497Sru * This routine is only advisory and need not do anything. 3980151497Sru */ 3981151497Sru 3982151497Sruvoid 3983151497Srupmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3984151497Sru vm_offset_t src_addr) 3985151497Sru{ 3986151497Sru vm_page_t free; 3987151497Sru vm_offset_t addr; 3988151497Sru vm_offset_t end_addr = src_addr + len; 3989151497Sru vm_offset_t pdnxt; 3990151497Sru 3991151497Sru if (dst_addr != src_addr) 3992151497Sru return; 3993151497Sru 3994151497Sru if (!pmap_is_current(src_pmap)) 3995151497Sru return; 3996151497Sru 3997151497Sru vm_page_lock_queues(); 3998151497Sru if (dst_pmap < src_pmap) { 3999151497Sru PMAP_LOCK(dst_pmap); 4000151497Sru PMAP_LOCK(src_pmap); 4001151497Sru } else { 4002151497Sru PMAP_LOCK(src_pmap); 4003151497Sru PMAP_LOCK(dst_pmap); 4004151497Sru } 4005151497Sru sched_pin(); 4006151497Sru for (addr = src_addr; addr < end_addr; addr = pdnxt) { 4007151497Sru pt_entry_t *src_pte, *dst_pte; 4008151497Sru vm_page_t dstmpte, srcmpte; 4009151497Sru pd_entry_t srcptepaddr; 4010151497Sru u_int ptepindex; 4011151497Sru 4012151497Sru KASSERT(addr < UPT_MIN_ADDRESS, 4013151497Sru ("pmap_copy: invalid to pmap_copy page tables")); 4014151497Sru 4015151497Sru pdnxt = (addr + NBPDR) & ~PDRMASK; 4016151497Sru if (pdnxt < addr) 4017151497Sru pdnxt = end_addr; 4018151497Sru ptepindex = addr >> PDRSHIFT; 4019151497Sru 4020151497Sru srcptepaddr = src_pmap->pm_pdir[ptepindex]; 4021151497Sru if (srcptepaddr == 0) 4022151497Sru continue; 4023151497Sru 4024151497Sru if (srcptepaddr & PG_PS) { 4025151497Sru if (dst_pmap->pm_pdir[ptepindex] == 0 && 4026151497Sru ((srcptepaddr & PG_MANAGED) == 0 || 4027151497Sru pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr & 4028151497Sru PG_PS_FRAME))) { 4029151497Sru dst_pmap->pm_pdir[ptepindex] = srcptepaddr & 4030151497Sru ~PG_W; 4031151497Sru dst_pmap->pm_stats.resident_count += 4032151497Sru NBPDR / PAGE_SIZE; 4033151497Sru } 4034151497Sru continue; 4035151497Sru } 4036151497Sru 4037151497Sru srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); 4038151497Sru KASSERT(srcmpte->wire_count > 0, 4039151497Sru ("pmap_copy: source page table page is unused")); 4040151497Sru 4041151497Sru if (pdnxt > end_addr) 4042151497Sru pdnxt = end_addr; 4043151497Sru 4044151497Sru src_pte = vtopte(addr); 4045151497Sru while (addr < pdnxt) { 4046151497Sru pt_entry_t ptetemp; 4047151497Sru ptetemp = *src_pte; 4048151497Sru /* 4049151497Sru * we only virtual copy managed pages 4050151497Sru */ 4051151497Sru if ((ptetemp & PG_MANAGED) != 0) { 4052151497Sru dstmpte = pmap_allocpte(dst_pmap, addr, 4053151497Sru M_NOWAIT); 4054151497Sru if (dstmpte == NULL) 4055151497Sru goto out; 4056151497Sru dst_pte = pmap_pte_quick(dst_pmap, addr); 4057151497Sru if (*dst_pte == 0 && 4058151497Sru pmap_try_insert_pv_entry(dst_pmap, addr, 4059151497Sru PHYS_TO_VM_PAGE(ptetemp & PG_FRAME))) { 4060151497Sru /* 4061151497Sru * Clear the wired, modified, and 4062151497Sru * accessed (referenced) bits 4063151497Sru * during the copy. 4064151497Sru */ 4065151497Sru *dst_pte = ptetemp & ~(PG_W | PG_M | 4066151497Sru PG_A); 4067151497Sru dst_pmap->pm_stats.resident_count++; 4068151497Sru } else { 4069151497Sru free = NULL; 4070151497Sru if (pmap_unwire_pte_hold(dst_pmap, 4071151497Sru dstmpte, &free)) { 4072151497Sru pmap_invalidate_page(dst_pmap, 4073151497Sru addr); 4074151497Sru pmap_free_zero_pages(free); 4075151497Sru } 4076151497Sru goto out; 4077151497Sru } 4078151497Sru if (dstmpte->wire_count >= srcmpte->wire_count) 4079151497Sru break; 4080151497Sru } 4081151497Sru addr += PAGE_SIZE; 4082151497Sru src_pte++; 4083151497Sru } 4084151497Sru } 4085151497Sruout: 4086151497Sru sched_unpin(); 4087151497Sru vm_page_unlock_queues(); 4088151497Sru PMAP_UNLOCK(src_pmap); 4089151497Sru PMAP_UNLOCK(dst_pmap); 4090151497Sru} 4091151497Sru 4092151497Srustatic __inline void 4093151497Srupagezero(void *page) 4094151497Sru{ 4095151497Sru#if defined(I686_CPU) 4096151497Sru if (cpu_class == CPUCLASS_686) { 4097151497Sru#if defined(CPU_ENABLE_SSE) 4098151497Sru if (cpu_feature & CPUID_SSE2) 4099151497Sru sse2_pagezero(page); 4100151497Sru else 4101151497Sru#endif 4102151497Sru i686_pagezero(page); 4103151497Sru } else 4104151497Sru#endif 4105151497Sru bzero(page, PAGE_SIZE); 4106151497Sru} 4107151497Sru 4108151497Sru/* 4109151497Sru * pmap_zero_page zeros the specified hardware page by mapping 4110151497Sru * the page into KVM and using bzero to clear its contents. 4111151497Sru */ 4112151497Sruvoid 4113151497Srupmap_zero_page(vm_page_t m) 4114151497Sru{ 4115151497Sru struct sysmaps *sysmaps; 4116151497Sru 4117151497Sru sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 4118151497Sru mtx_lock(&sysmaps->lock); 4119151497Sru if (*sysmaps->CMAP2) 4120151497Sru panic("pmap_zero_page: CMAP2 busy"); 4121151497Sru sched_pin(); 4122151497Sru *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | 4123151497Sru pmap_cache_bits(m->md.pat_mode, 0); 4124151497Sru invlcaddr(sysmaps->CADDR2); 4125151497Sru pagezero(sysmaps->CADDR2); 4126151497Sru *sysmaps->CMAP2 = 0; 4127151497Sru sched_unpin(); 4128151497Sru mtx_unlock(&sysmaps->lock); 4129151497Sru} 4130151497Sru 4131151497Sru/* 4132151497Sru * pmap_zero_page_area zeros the specified hardware page by mapping 4133151497Sru * the page into KVM and using bzero to clear its contents. 4134151497Sru * 4135151497Sru * off and size may not cover an area beyond a single hardware page. 4136151497Sru */ 4137151497Sruvoid 4138151497Srupmap_zero_page_area(vm_page_t m, int off, int size) 4139151497Sru{ 4140151497Sru struct sysmaps *sysmaps; 4141151497Sru 4142151497Sru sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 4143151497Sru mtx_lock(&sysmaps->lock); 4144151497Sru if (*sysmaps->CMAP2) 4145151497Sru panic("pmap_zero_page_area: CMAP2 busy"); 4146151497Sru sched_pin(); 4147151497Sru *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | 4148151497Sru pmap_cache_bits(m->md.pat_mode, 0); 4149151497Sru invlcaddr(sysmaps->CADDR2); 4150151497Sru if (off == 0 && size == PAGE_SIZE) 4151151497Sru pagezero(sysmaps->CADDR2); 4152151497Sru else 4153151497Sru bzero((char *)sysmaps->CADDR2 + off, size); 4154151497Sru *sysmaps->CMAP2 = 0; 4155151497Sru sched_unpin(); 4156151497Sru mtx_unlock(&sysmaps->lock); 4157151497Sru} 4158151497Sru 4159151497Sru/* 4160151497Sru * pmap_zero_page_idle zeros the specified hardware page by mapping 4161151497Sru * the page into KVM and using bzero to clear its contents. This 4162151497Sru * is intended to be called from the vm_pagezero process only and 4163151497Sru * outside of Giant. 4164151497Sru */ 4165151497Sruvoid 4166151497Srupmap_zero_page_idle(vm_page_t m) 4167151497Sru{ 4168151497Sru 4169151497Sru if (*CMAP3) 4170151497Sru panic("pmap_zero_page_idle: CMAP3 busy"); 4171151497Sru sched_pin(); 4172151497Sru *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M | 4173151497Sru pmap_cache_bits(m->md.pat_mode, 0); 4174151497Sru invlcaddr(CADDR3); 4175151497Sru pagezero(CADDR3); 4176151497Sru *CMAP3 = 0; 4177151497Sru sched_unpin(); 4178151497Sru} 4179151497Sru 4180151497Sru/* 4181151497Sru * pmap_copy_page copies the specified (machine independent) 4182151497Sru * page by mapping the page into virtual memory and using 4183151497Sru * bcopy to copy the page, one machine dependent page at a 4184151497Sru * time. 4185151497Sru */ 4186151497Sruvoid 4187151497Srupmap_copy_page(vm_page_t src, vm_page_t dst) 4188151497Sru{ 4189151497Sru struct sysmaps *sysmaps; 4190151497Sru 4191151497Sru sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 4192151497Sru mtx_lock(&sysmaps->lock); 4193151497Sru if (*sysmaps->CMAP1) 4194151497Sru panic("pmap_copy_page: CMAP1 busy"); 4195151497Sru if (*sysmaps->CMAP2) 4196151497Sru panic("pmap_copy_page: CMAP2 busy"); 4197151497Sru sched_pin(); 4198151497Sru invlpg((u_int)sysmaps->CADDR1); 4199151497Sru invlpg((u_int)sysmaps->CADDR2); 4200151497Sru *sysmaps->CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A | 4201151497Sru pmap_cache_bits(src->md.pat_mode, 0); 4202151497Sru *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M | 4203151497Sru pmap_cache_bits(dst->md.pat_mode, 0); 4204151497Sru bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); 4205151497Sru *sysmaps->CMAP1 = 0; 4206151497Sru *sysmaps->CMAP2 = 0; 4207151497Sru sched_unpin(); 4208151497Sru mtx_unlock(&sysmaps->lock); 4209151497Sru} 4210151497Sru 4211151497Sru/* 4212151497Sru * Returns true if the pmap's pv is one of the first 4213151497Sru * 16 pvs linked to from this page. This count may 4214151497Sru * be changed upwards or downwards in the future; it 4215151497Sru * is only necessary that true be returned for a small 4216151497Sru * subset of pmaps for proper page aging. 4217151497Sru */ 4218151497Sruboolean_t 4219151497Srupmap_page_exists_quick(pmap_t pmap, vm_page_t m) 4220151497Sru{ 4221151497Sru struct md_page *pvh; 4222151497Sru pv_entry_t pv; 4223151497Sru int loops = 0; 4224151497Sru boolean_t rv; 4225151497Sru 4226151497Sru KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4227151497Sru ("pmap_page_exists_quick: page %p is not managed", m)); 4228151497Sru rv = FALSE; 4229151497Sru vm_page_lock_queues(); 4230151497Sru TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4231151497Sru if (PV_PMAP(pv) == pmap) { 4232151497Sru rv = TRUE; 4233151497Sru break; 4234151497Sru } 4235151497Sru loops++; 4236151497Sru if (loops >= 16) 4237151497Sru break; 4238151497Sru } 4239151497Sru if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 4240151497Sru pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4241151497Sru TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4242151497Sru if (PV_PMAP(pv) == pmap) { 4243151497Sru rv = TRUE; 4244151497Sru break; 4245151497Sru } 4246151497Sru loops++; 4247151497Sru if (loops >= 16) 4248151497Sru break; 4249151497Sru } 4250151497Sru } 4251151497Sru vm_page_unlock_queues(); 4252151497Sru return (rv); 4253151497Sru} 4254151497Sru 4255151497Sru/* 4256151497Sru * pmap_page_wired_mappings: 4257151497Sru * 4258151497Sru * Return the number of managed mappings to the given physical page 4259151497Sru * that are wired. 4260151497Sru */ 4261151497Sruint 4262151497Srupmap_page_wired_mappings(vm_page_t m) 4263151497Sru{ 4264151497Sru int count; 4265151497Sru 4266151497Sru count = 0; 4267151497Sru if ((m->oflags & VPO_UNMANAGED) != 0) 4268151497Sru return (count); 4269151497Sru vm_page_lock_queues(); 4270151497Sru count = pmap_pvh_wired_mappings(&m->md, count); 4271151497Sru if ((m->flags & PG_FICTITIOUS) == 0) { 4272151497Sru count = pmap_pvh_wired_mappings(pa_to_pvh(VM_PAGE_TO_PHYS(m)), 4273151497Sru count); 4274151497Sru } 4275151497Sru vm_page_unlock_queues(); 4276151497Sru return (count); 4277151497Sru} 4278151497Sru 4279151497Sru/* 4280151497Sru * pmap_pvh_wired_mappings: 4281151497Sru * 4282151497Sru * Return the updated number "count" of managed mappings that are wired. 4283151497Sru */ 4284151497Srustatic int 4285151497Srupmap_pvh_wired_mappings(struct md_page *pvh, int count) 4286151497Sru{ 4287151497Sru pmap_t pmap; 4288151497Sru pt_entry_t *pte; 4289151497Sru pv_entry_t pv; 4290151497Sru 4291151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4292151497Sru sched_pin(); 4293151497Sru TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4294151497Sru pmap = PV_PMAP(pv); 4295151497Sru PMAP_LOCK(pmap); 4296151497Sru pte = pmap_pte_quick(pmap, pv->pv_va); 4297151497Sru if ((*pte & PG_W) != 0) 4298151497Sru count++; 4299151497Sru PMAP_UNLOCK(pmap); 4300151497Sru } 4301151497Sru sched_unpin(); 4302151497Sru return (count); 4303151497Sru} 4304151497Sru 4305151497Sru/* 4306151497Sru * Returns TRUE if the given page is mapped individually or as part of 4307151497Sru * a 4mpage. Otherwise, returns FALSE. 4308151497Sru */ 4309151497Sruboolean_t 4310151497Srupmap_page_is_mapped(vm_page_t m) 4311151497Sru{ 4312151497Sru boolean_t rv; 4313151497Sru 4314151497Sru if ((m->oflags & VPO_UNMANAGED) != 0) 4315151497Sru return (FALSE); 4316151497Sru vm_page_lock_queues(); 4317151497Sru rv = !TAILQ_EMPTY(&m->md.pv_list) || 4318151497Sru ((m->flags & PG_FICTITIOUS) == 0 && 4319151497Sru !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); 4320151497Sru vm_page_unlock_queues(); 4321151497Sru return (rv); 4322151497Sru} 4323151497Sru 4324151497Sru/* 4325151497Sru * Remove all pages from specified address space 4326151497Sru * this aids process exit speeds. Also, this code 4327151497Sru * is special cased for current process only, but 4328151497Sru * can have the more generic (and slightly slower) 4329151497Sru * mode enabled. This is much faster than pmap_remove 4330151497Sru * in the case of running down an entire address space. 4331151497Sru */ 4332151497Sruvoid 4333151497Srupmap_remove_pages(pmap_t pmap) 4334151497Sru{ 4335151497Sru pt_entry_t *pte, tpte; 4336151497Sru vm_page_t free = NULL; 4337151497Sru vm_page_t m, mpte, mt; 4338151497Sru pv_entry_t pv; 4339151497Sru struct md_page *pvh; 4340151497Sru struct pv_chunk *pc, *npc; 4341151497Sru int field, idx; 4342151497Sru int32_t bit; 4343151497Sru uint32_t inuse, bitmask; 4344151497Sru int allfree; 4345151497Sru 4346151497Sru if (pmap != PCPU_GET(curpmap)) { 4347151497Sru printf("warning: pmap_remove_pages called with non-current pmap\n"); 4348151497Sru return; 4349151497Sru } 4350151497Sru vm_page_lock_queues(); 4351151497Sru PMAP_LOCK(pmap); 4352151497Sru sched_pin(); 4353151497Sru TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 4354151497Sru allfree = 1; 4355151497Sru for (field = 0; field < _NPCM; field++) { 4356151497Sru inuse = (~(pc->pc_map[field])) & pc_freemask[field]; 4357151497Sru while (inuse != 0) { 4358151497Sru bit = bsfl(inuse); 4359151497Sru bitmask = 1UL << bit; 4360151497Sru idx = field * 32 + bit; 4361151497Sru pv = &pc->pc_pventry[idx]; 4362151497Sru inuse &= ~bitmask; 4363151497Sru 4364151497Sru pte = pmap_pde(pmap, pv->pv_va); 4365151497Sru tpte = *pte; 4366151497Sru if ((tpte & PG_PS) == 0) { 4367151497Sru pte = vtopte(pv->pv_va); 4368151497Sru tpte = *pte & ~PG_PTE_PAT; 4369151497Sru } 4370151497Sru 4371151497Sru if (tpte == 0) { 4372151497Sru printf( 4373151497Sru "TPTE at %p IS ZERO @ VA %08x\n", 4374151497Sru pte, pv->pv_va); 4375151497Sru panic("bad pte"); 4376151497Sru } 4377151497Sru 4378151497Sru/* 4379151497Sru * We cannot remove wired pages from a process' mapping at this time 4380151497Sru */ 4381151497Sru if (tpte & PG_W) { 4382151497Sru allfree = 0; 4383151497Sru continue; 4384151497Sru } 4385151497Sru 4386151497Sru m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 4387151497Sru KASSERT(m->phys_addr == (tpte & PG_FRAME), 4388151497Sru ("vm_page_t %p phys_addr mismatch %016jx %016jx", 4389151497Sru m, (uintmax_t)m->phys_addr, 4390151497Sru (uintmax_t)tpte)); 4391151497Sru 4392151497Sru KASSERT((m->flags & PG_FICTITIOUS) != 0 || 4393151497Sru m < &vm_page_array[vm_page_array_size], 4394151497Sru ("pmap_remove_pages: bad tpte %#jx", 4395151497Sru (uintmax_t)tpte)); 4396151497Sru 4397151497Sru pte_clear(pte); 4398151497Sru 4399151497Sru /* 4400151497Sru * Update the vm_page_t clean/reference bits. 4401151497Sru */ 4402151497Sru if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 4403151497Sru if ((tpte & PG_PS) != 0) { 4404151497Sru for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) 4405151497Sru vm_page_dirty(mt); 4406151497Sru } else 4407151497Sru vm_page_dirty(m); 4408151497Sru } 4409151497Sru 4410151497Sru /* Mark free */ 4411151497Sru PV_STAT(pv_entry_frees++); 4412151497Sru PV_STAT(pv_entry_spare++); 4413151497Sru pv_entry_count--; 4414151497Sru pc->pc_map[field] |= bitmask; 4415151497Sru if ((tpte & PG_PS) != 0) { 4416151497Sru pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 4417151497Sru pvh = pa_to_pvh(tpte & PG_PS_FRAME); 4418151497Sru TAILQ_REMOVE(&pvh->pv_list, pv, pv_list); 4419151497Sru if (TAILQ_EMPTY(&pvh->pv_list)) { 4420151497Sru for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) 4421151497Sru if (TAILQ_EMPTY(&mt->md.pv_list)) 4422151497Sru vm_page_aflag_clear(mt, PGA_WRITEABLE); 4423151497Sru } 4424151497Sru mpte = pmap_lookup_pt_page(pmap, pv->pv_va); 4425151497Sru if (mpte != NULL) { 4426151497Sru pmap_remove_pt_page(pmap, mpte); 4427151497Sru pmap->pm_stats.resident_count--; 4428151497Sru KASSERT(mpte->wire_count == NPTEPG, 4429151497Sru ("pmap_remove_pages: pte page wire count error")); 4430151497Sru mpte->wire_count = 0; 4431151497Sru pmap_add_delayed_free_list(mpte, &free, FALSE); 4432151497Sru atomic_subtract_int(&cnt.v_wire_count, 1); 4433151497Sru } 4434151497Sru } else { 4435151497Sru pmap->pm_stats.resident_count--; 4436151497Sru TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 4437151497Sru if (TAILQ_EMPTY(&m->md.pv_list) && 4438151497Sru (m->flags & PG_FICTITIOUS) == 0) { 4439151497Sru pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4440151497Sru if (TAILQ_EMPTY(&pvh->pv_list)) 4441151497Sru vm_page_aflag_clear(m, PGA_WRITEABLE); 4442151497Sru } 4443151497Sru pmap_unuse_pt(pmap, pv->pv_va, &free); 4444151497Sru } 4445151497Sru } 4446151497Sru } 4447151497Sru if (allfree) { 4448151497Sru TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4449151497Sru free_pv_chunk(pc); 4450151497Sru } 4451151497Sru } 4452151497Sru sched_unpin(); 4453151497Sru pmap_invalidate_all(pmap); 4454151497Sru vm_page_unlock_queues(); 4455151497Sru PMAP_UNLOCK(pmap); 4456151497Sru pmap_free_zero_pages(free); 4457151497Sru} 4458151497Sru 4459151497Sru/* 4460151497Sru * pmap_is_modified: 4461151497Sru * 4462151497Sru * Return whether or not the specified physical page was modified 4463151497Sru * in any physical maps. 4464151497Sru */ 4465151497Sruboolean_t 4466151497Srupmap_is_modified(vm_page_t m) 4467151497Sru{ 4468151497Sru boolean_t rv; 4469151497Sru 4470151497Sru KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4471151497Sru ("pmap_is_modified: page %p is not managed", m)); 4472151497Sru 4473151497Sru /* 4474151497Sru * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be 4475151497Sru * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 4476151497Sru * is clear, no PTEs can have PG_M set. 4477151497Sru */ 4478151497Sru VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 4479151497Sru if ((m->oflags & VPO_BUSY) == 0 && 4480151497Sru (m->aflags & PGA_WRITEABLE) == 0) 4481151497Sru return (FALSE); 4482151497Sru vm_page_lock_queues(); 4483151497Sru rv = pmap_is_modified_pvh(&m->md) || 4484151497Sru ((m->flags & PG_FICTITIOUS) == 0 && 4485151497Sru pmap_is_modified_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 4486151497Sru vm_page_unlock_queues(); 4487151497Sru return (rv); 4488151497Sru} 4489151497Sru 4490151497Sru/* 4491151497Sru * Returns TRUE if any of the given mappings were used to modify 4492151497Sru * physical memory. Otherwise, returns FALSE. Both page and 2mpage 4493151497Sru * mappings are supported. 4494151497Sru */ 4495151497Srustatic boolean_t 4496151497Srupmap_is_modified_pvh(struct md_page *pvh) 4497151497Sru{ 4498151497Sru pv_entry_t pv; 4499151497Sru pt_entry_t *pte; 4500151497Sru pmap_t pmap; 4501151497Sru boolean_t rv; 4502151497Sru 4503151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4504151497Sru rv = FALSE; 4505151497Sru sched_pin(); 4506151497Sru TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4507151497Sru pmap = PV_PMAP(pv); 4508151497Sru PMAP_LOCK(pmap); 4509151497Sru pte = pmap_pte_quick(pmap, pv->pv_va); 4510151497Sru rv = (*pte & (PG_M | PG_RW)) == (PG_M | PG_RW); 4511151497Sru PMAP_UNLOCK(pmap); 4512151497Sru if (rv) 4513151497Sru break; 4514151497Sru } 4515151497Sru sched_unpin(); 4516151497Sru return (rv); 4517151497Sru} 4518151497Sru 4519151497Sru/* 4520151497Sru * pmap_is_prefaultable: 4521151497Sru * 4522151497Sru * Return whether or not the specified virtual address is elgible 4523151497Sru * for prefault. 4524151497Sru */ 4525151497Sruboolean_t 4526151497Srupmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 4527151497Sru{ 4528151497Sru pd_entry_t *pde; 4529151497Sru pt_entry_t *pte; 4530151497Sru boolean_t rv; 4531151497Sru 4532151497Sru rv = FALSE; 4533151497Sru PMAP_LOCK(pmap); 4534151497Sru pde = pmap_pde(pmap, addr); 4535151497Sru if (*pde != 0 && (*pde & PG_PS) == 0) { 4536151497Sru pte = vtopte(addr); 4537151497Sru rv = *pte == 0; 4538151497Sru } 4539151497Sru PMAP_UNLOCK(pmap); 4540151497Sru return (rv); 4541151497Sru} 4542151497Sru 4543151497Sru/* 4544151497Sru * pmap_is_referenced: 4545151497Sru * 4546151497Sru * Return whether or not the specified physical page was referenced 4547151497Sru * in any physical maps. 4548151497Sru */ 4549151497Sruboolean_t 4550151497Srupmap_is_referenced(vm_page_t m) 4551151497Sru{ 4552151497Sru boolean_t rv; 4553151497Sru 4554151497Sru KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4555151497Sru ("pmap_is_referenced: page %p is not managed", m)); 4556151497Sru vm_page_lock_queues(); 4557151497Sru rv = pmap_is_referenced_pvh(&m->md) || 4558151497Sru ((m->flags & PG_FICTITIOUS) == 0 && 4559151497Sru pmap_is_referenced_pvh(pa_to_pvh(VM_PAGE_TO_PHYS(m)))); 4560151497Sru vm_page_unlock_queues(); 4561151497Sru return (rv); 4562151497Sru} 4563151497Sru 4564151497Sru/* 4565151497Sru * Returns TRUE if any of the given mappings were referenced and FALSE 4566151497Sru * otherwise. Both page and 4mpage mappings are supported. 4567151497Sru */ 4568151497Srustatic boolean_t 4569151497Srupmap_is_referenced_pvh(struct md_page *pvh) 4570151497Sru{ 4571151497Sru pv_entry_t pv; 4572151497Sru pt_entry_t *pte; 4573151497Sru pmap_t pmap; 4574151497Sru boolean_t rv; 4575151497Sru 4576151497Sru mtx_assert(&vm_page_queue_mtx, MA_OWNED); 4577151497Sru rv = FALSE; 4578151497Sru sched_pin(); 4579151497Sru TAILQ_FOREACH(pv, &pvh->pv_list, pv_list) { 4580151497Sru pmap = PV_PMAP(pv); 4581151497Sru PMAP_LOCK(pmap); 4582151497Sru pte = pmap_pte_quick(pmap, pv->pv_va); 4583151497Sru rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); 4584151497Sru PMAP_UNLOCK(pmap); 4585151497Sru if (rv) 4586151497Sru break; 4587151497Sru } 4588151497Sru sched_unpin(); 4589151497Sru return (rv); 4590151497Sru} 4591151497Sru 4592151497Sru/* 4593151497Sru * Clear the write and modified bits in each of the given page's mappings. 4594151497Sru */ 4595151497Sruvoid 4596151497Srupmap_remove_write(vm_page_t m) 4597151497Sru{ 4598151497Sru struct md_page *pvh; 4599151497Sru pv_entry_t next_pv, pv; 4600151497Sru pmap_t pmap; 4601151497Sru pd_entry_t *pde; 4602151497Sru pt_entry_t oldpte, *pte; 4603151497Sru vm_offset_t va; 4604151497Sru 4605151497Sru KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4606151497Sru ("pmap_remove_write: page %p is not managed", m)); 4607151497Sru 4608151497Sru /* 4609151497Sru * If the page is not VPO_BUSY, then PGA_WRITEABLE cannot be set by 4610151497Sru * another thread while the object is locked. Thus, if PGA_WRITEABLE 4611151497Sru * is clear, no page table entries need updating. 4612151497Sru */ 4613151497Sru VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 4614151497Sru if ((m->oflags & VPO_BUSY) == 0 && 4615151497Sru (m->aflags & PGA_WRITEABLE) == 0) 4616151497Sru return; 4617151497Sru vm_page_lock_queues(); 4618151497Sru sched_pin(); 4619151497Sru if ((m->flags & PG_FICTITIOUS) != 0) 4620151497Sru goto small_mappings; 4621151497Sru pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4622151497Sru TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 4623151497Sru va = pv->pv_va; 4624151497Sru pmap = PV_PMAP(pv); 4625151497Sru PMAP_LOCK(pmap); 4626151497Sru pde = pmap_pde(pmap, va); 4627151497Sru if ((*pde & PG_RW) != 0) 4628151497Sru (void)pmap_demote_pde(pmap, pde, va); 4629151497Sru PMAP_UNLOCK(pmap); 4630151497Sru } 4631151497Srusmall_mappings: 4632151497Sru TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4633151497Sru pmap = PV_PMAP(pv); 4634151497Sru PMAP_LOCK(pmap); 4635151497Sru pde = pmap_pde(pmap, pv->pv_va); 4636151497Sru KASSERT((*pde & PG_PS) == 0, ("pmap_clear_write: found" 4637151497Sru " a 4mpage in page %p's pv list", m)); 4638151497Sru pte = pmap_pte_quick(pmap, pv->pv_va); 4639151497Sruretry: 4640151497Sru oldpte = *pte; 4641151497Sru if ((oldpte & PG_RW) != 0) { 4642151497Sru /* 4643151497Sru * Regardless of whether a pte is 32 or 64 bits 4644151497Sru * in size, PG_RW and PG_M are among the least 4645151497Sru * significant 32 bits. 4646151497Sru */ 4647151497Sru if (!atomic_cmpset_int((u_int *)pte, oldpte, 4648151497Sru oldpte & ~(PG_RW | PG_M))) 4649151497Sru goto retry; 4650151497Sru if ((oldpte & PG_M) != 0) 4651151497Sru vm_page_dirty(m); 4652151497Sru pmap_invalidate_page(pmap, pv->pv_va); 4653151497Sru } 4654151497Sru PMAP_UNLOCK(pmap); 4655151497Sru } 4656151497Sru vm_page_aflag_clear(m, PGA_WRITEABLE); 4657151497Sru sched_unpin(); 4658151497Sru vm_page_unlock_queues(); 4659151497Sru} 4660151497Sru 4661151497Sru/* 4662151497Sru * pmap_ts_referenced: 4663151497Sru * 4664151497Sru * Return a count of reference bits for a page, clearing those bits. 4665151497Sru * It is not necessary for every reference bit to be cleared, but it 4666151497Sru * is necessary that 0 only be returned when there are truly no 4667151497Sru * reference bits set. 4668151497Sru * 4669151497Sru * XXX: The exact number of bits to check and clear is a matter that 4670151497Sru * should be tested and standardized at some point in the future for 4671151497Sru * optimal aging of shared pages. 4672151497Sru */ 4673151497Sruint 4674151497Srupmap_ts_referenced(vm_page_t m) 4675151497Sru{ 4676151497Sru struct md_page *pvh; 4677151497Sru pv_entry_t pv, pvf, pvn; 4678151497Sru pmap_t pmap; 4679151497Sru pd_entry_t oldpde, *pde; 4680151497Sru pt_entry_t *pte; 4681151497Sru vm_offset_t va; 4682151497Sru int rtval = 0; 4683151497Sru 4684151497Sru KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4685151497Sru ("pmap_ts_referenced: page %p is not managed", m)); 4686151497Sru pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4687151497Sru vm_page_lock_queues(); 4688151497Sru sched_pin(); 4689151497Sru if ((m->flags & PG_FICTITIOUS) != 0) 4690151497Sru goto small_mappings; 4691151497Sru TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, pvn) { 4692151497Sru va = pv->pv_va; 4693151497Sru pmap = PV_PMAP(pv); 4694151497Sru PMAP_LOCK(pmap); 4695151497Sru pde = pmap_pde(pmap, va); 4696151497Sru oldpde = *pde; 4697151497Sru if ((oldpde & PG_A) != 0) { 4698151497Sru if (pmap_demote_pde(pmap, pde, va)) { 4699151497Sru if ((oldpde & PG_W) == 0) { 4700151497Sru /* 4701151497Sru * Remove the mapping to a single page 4702151497Sru * so that a subsequent access may 4703151497Sru * repromote. Since the underlying 4704151497Sru * page table page is fully populated, 4705151497Sru * this removal never frees a page 4706151497Sru * table page. 4707151497Sru */ 4708151497Sru va += VM_PAGE_TO_PHYS(m) - (oldpde & 4709151497Sru PG_PS_FRAME); 4710151497Sru pmap_remove_page(pmap, va, NULL); 4711151497Sru rtval++; 4712151497Sru if (rtval > 4) { 4713151497Sru PMAP_UNLOCK(pmap); 4714151497Sru goto out; 4715151497Sru } 4716151497Sru } 4717151497Sru } 4718151497Sru } 4719151497Sru PMAP_UNLOCK(pmap); 4720151497Sru } 4721151497Srusmall_mappings: 4722151497Sru if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 4723151497Sru pvf = pv; 4724151497Sru do { 4725151497Sru pvn = TAILQ_NEXT(pv, pv_list); 4726151497Sru TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 4727151497Sru TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 4728151497Sru pmap = PV_PMAP(pv); 4729151497Sru PMAP_LOCK(pmap); 4730151497Sru pde = pmap_pde(pmap, pv->pv_va); 4731151497Sru KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced:" 4732151497Sru " found a 4mpage in page %p's pv list", m)); 4733151497Sru pte = pmap_pte_quick(pmap, pv->pv_va); 4734151497Sru if ((*pte & PG_A) != 0) { 4735151497Sru atomic_clear_int((u_int *)pte, PG_A); 4736151497Sru pmap_invalidate_page(pmap, pv->pv_va); 4737151497Sru rtval++; 4738151497Sru if (rtval > 4) 4739151497Sru pvn = NULL; 4740151497Sru } 4741151497Sru PMAP_UNLOCK(pmap); 4742151497Sru } while ((pv = pvn) != NULL && pv != pvf); 4743151497Sru } 4744151497Sruout: 4745151497Sru sched_unpin(); 4746151497Sru vm_page_unlock_queues(); 4747151497Sru return (rtval); 4748151497Sru} 4749151497Sru 4750151497Sru/* 4751151497Sru * Clear the modify bits on the specified physical page. 4752151497Sru */ 4753151497Sruvoid 4754151497Srupmap_clear_modify(vm_page_t m) 4755151497Sru{ 4756151497Sru struct md_page *pvh; 4757151497Sru pv_entry_t next_pv, pv; 4758151497Sru pmap_t pmap; 4759151497Sru pd_entry_t oldpde, *pde; 4760151497Sru pt_entry_t oldpte, *pte; 4761151497Sru vm_offset_t va; 4762151497Sru 4763151497Sru KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4764151497Sru ("pmap_clear_modify: page %p is not managed", m)); 4765151497Sru VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 4766151497Sru KASSERT((m->oflags & VPO_BUSY) == 0, 4767151497Sru ("pmap_clear_modify: page %p is busy", m)); 4768151497Sru 4769151497Sru /* 4770151497Sru * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 4771151497Sru * If the object containing the page is locked and the page is not 4772151497Sru * VPO_BUSY, then PGA_WRITEABLE cannot be concurrently set. 4773151497Sru */ 4774151497Sru if ((m->aflags & PGA_WRITEABLE) == 0) 4775151497Sru return; 4776151497Sru vm_page_lock_queues(); 4777151497Sru sched_pin(); 4778151497Sru if ((m->flags & PG_FICTITIOUS) != 0) 4779151497Sru goto small_mappings; 4780151497Sru pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4781151497Sru TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 4782151497Sru va = pv->pv_va; 4783151497Sru pmap = PV_PMAP(pv); 4784151497Sru PMAP_LOCK(pmap); 4785151497Sru pde = pmap_pde(pmap, va); 4786151497Sru oldpde = *pde; 4787151497Sru if ((oldpde & PG_RW) != 0) { 4788151497Sru if (pmap_demote_pde(pmap, pde, va)) { 4789151497Sru if ((oldpde & PG_W) == 0) { 4790151497Sru /* 4791151497Sru * Write protect the mapping to a 4792151497Sru * single page so that a subsequent 4793151497Sru * write access may repromote. 4794151497Sru */ 4795151497Sru va += VM_PAGE_TO_PHYS(m) - (oldpde & 4796151497Sru PG_PS_FRAME); 4797151497Sru pte = pmap_pte_quick(pmap, va); 4798151497Sru oldpte = *pte; 4799151497Sru if ((oldpte & PG_V) != 0) { 4800151497Sru /* 4801151497Sru * Regardless of whether a pte is 32 or 64 bits 4802151497Sru * in size, PG_RW and PG_M are among the least 4803151497Sru * significant 32 bits. 4804151497Sru */ 4805151497Sru while (!atomic_cmpset_int((u_int *)pte, 4806151497Sru oldpte, 4807151497Sru oldpte & ~(PG_M | PG_RW))) 4808151497Sru oldpte = *pte; 4809151497Sru vm_page_dirty(m); 4810151497Sru pmap_invalidate_page(pmap, va); 4811151497Sru } 4812151497Sru } 4813151497Sru } 4814151497Sru } 4815151497Sru PMAP_UNLOCK(pmap); 4816151497Sru } 4817151497Srusmall_mappings: 4818151497Sru TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4819151497Sru pmap = PV_PMAP(pv); 4820151497Sru PMAP_LOCK(pmap); 4821151497Sru pde = pmap_pde(pmap, pv->pv_va); 4822151497Sru KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" 4823151497Sru " a 4mpage in page %p's pv list", m)); 4824151497Sru pte = pmap_pte_quick(pmap, pv->pv_va); 4825151497Sru if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 4826151497Sru /* 4827151497Sru * Regardless of whether a pte is 32 or 64 bits 4828151497Sru * in size, PG_M is among the least significant 4829151497Sru * 32 bits. 4830151497Sru */ 4831151497Sru atomic_clear_int((u_int *)pte, PG_M); 4832151497Sru pmap_invalidate_page(pmap, pv->pv_va); 4833151497Sru } 4834151497Sru PMAP_UNLOCK(pmap); 4835151497Sru } 4836151497Sru sched_unpin(); 4837151497Sru vm_page_unlock_queues(); 4838151497Sru} 4839151497Sru 4840151497Sru/* 4841151497Sru * pmap_clear_reference: 4842151497Sru * 4843151497Sru * Clear the reference bit on the specified physical page. 4844151497Sru */ 4845151497Sruvoid 4846151497Srupmap_clear_reference(vm_page_t m) 4847151497Sru{ 4848151497Sru struct md_page *pvh; 4849151497Sru pv_entry_t next_pv, pv; 4850151497Sru pmap_t pmap; 4851151497Sru pd_entry_t oldpde, *pde; 4852151497Sru pt_entry_t *pte; 4853151497Sru vm_offset_t va; 4854151497Sru 4855151497Sru KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4856151497Sru ("pmap_clear_reference: page %p is not managed", m)); 4857151497Sru vm_page_lock_queues(); 4858151497Sru sched_pin(); 4859151497Sru if ((m->flags & PG_FICTITIOUS) != 0) 4860151497Sru goto small_mappings; 4861151497Sru pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4862151497Sru TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_list, next_pv) { 4863151497Sru va = pv->pv_va; 4864151497Sru pmap = PV_PMAP(pv); 4865151497Sru PMAP_LOCK(pmap); 4866151497Sru pde = pmap_pde(pmap, va); 4867151497Sru oldpde = *pde; 4868151497Sru if ((oldpde & PG_A) != 0) { 4869151497Sru if (pmap_demote_pde(pmap, pde, va)) { 4870151497Sru /* 4871151497Sru * Remove the mapping to a single page so 4872151497Sru * that a subsequent access may repromote. 4873151497Sru * Since the underlying page table page is 4874151497Sru * fully populated, this removal never frees 4875151497Sru * a page table page. 4876151497Sru */ 4877151497Sru va += VM_PAGE_TO_PHYS(m) - (oldpde & 4878151497Sru PG_PS_FRAME); 4879151497Sru pmap_remove_page(pmap, va, NULL); 4880151497Sru } 4881151497Sru } 4882151497Sru PMAP_UNLOCK(pmap); 4883151497Sru } 4884151497Srusmall_mappings: 4885151497Sru TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4886151497Sru pmap = PV_PMAP(pv); 4887151497Sru PMAP_LOCK(pmap); 4888151497Sru pde = pmap_pde(pmap, pv->pv_va); 4889151497Sru KASSERT((*pde & PG_PS) == 0, ("pmap_clear_reference: found" 4890151497Sru " a 4mpage in page %p's pv list", m)); 4891151497Sru pte = pmap_pte_quick(pmap, pv->pv_va); 4892151497Sru if ((*pte & PG_A) != 0) { 4893151497Sru /* 4894151497Sru * Regardless of whether a pte is 32 or 64 bits 4895151497Sru * in size, PG_A is among the least significant 4896151497Sru * 32 bits. 4897151497Sru */ 4898151497Sru atomic_clear_int((u_int *)pte, PG_A); 4899151497Sru pmap_invalidate_page(pmap, pv->pv_va); 4900151497Sru } 4901151497Sru PMAP_UNLOCK(pmap); 4902151497Sru } 4903151497Sru sched_unpin(); 4904151497Sru vm_page_unlock_queues(); 4905151497Sru} 4906151497Sru 4907151497Sru/* 4908151497Sru * Miscellaneous support routines follow 4909151497Sru */ 4910151497Sru 4911151497Sru/* Adjust the cache mode for a 4KB page mapped via a PTE. */ 4912151497Srustatic __inline void 4913151497Srupmap_pte_attr(pt_entry_t *pte, int cache_bits) 4914151497Sru{ 4915151497Sru u_int opte, npte; 4916151497Sru 4917151497Sru /* 4918151497Sru * The cache mode bits are all in the low 32-bits of the 4919151497Sru * PTE, so we can just spin on updating the low 32-bits. 4920151497Sru */ 4921151497Sru do { 4922151497Sru opte = *(u_int *)pte; 4923151497Sru npte = opte & ~PG_PTE_CACHE; 4924151497Sru npte |= cache_bits; 4925151497Sru } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); 4926151497Sru} 4927151497Sru 4928151497Sru/* Adjust the cache mode for a 2/4MB page mapped via a PDE. */ 4929151497Srustatic __inline void 4930151497Srupmap_pde_attr(pd_entry_t *pde, int cache_bits) 4931151497Sru{ 4932151497Sru u_int opde, npde; 4933151497Sru 4934151497Sru /* 4935151497Sru * The cache mode bits are all in the low 32-bits of the 4936151497Sru * PDE, so we can just spin on updating the low 32-bits. 4937151497Sru */ 4938151497Sru do { 4939151497Sru opde = *(u_int *)pde; 4940151497Sru npde = opde & ~PG_PDE_CACHE; 4941151497Sru npde |= cache_bits; 4942151497Sru } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); 4943151497Sru} 4944151497Sru 4945151497Sru/* 4946151497Sru * Map a set of physical memory pages into the kernel virtual 4947151497Sru * address space. Return a pointer to where it is mapped. This 4948151497Sru * routine is intended to be used for mapping device memory, 4949151497Sru * NOT real memory. 4950151497Sru */ 4951151497Sruvoid * 4952151497Srupmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) 4953151497Sru{ 4954151497Sru vm_offset_t va, offset; 4955151497Sru vm_size_t tmpsize; 4956151497Sru 4957151497Sru offset = pa & PAGE_MASK; 4958151497Sru size = roundup(offset + size, PAGE_SIZE); 4959151497Sru pa = pa & PG_FRAME; 4960151497Sru 4961151497Sru if (pa < KERNLOAD && pa + size <= KERNLOAD) 4962151497Sru va = KERNBASE + pa; 4963151497Sru else 4964151497Sru va = kmem_alloc_nofault(kernel_map, size); 4965151497Sru if (!va) 4966151497Sru panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 4967151497Sru 4968151497Sru for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) 4969151497Sru pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); 4970151497Sru pmap_invalidate_range(kernel_pmap, va, va + tmpsize); 4971151497Sru pmap_invalidate_cache_range(va, va + size); 4972151497Sru return ((void *)(va + offset)); 4973151497Sru} 4974151497Sru 4975151497Sruvoid * 4976151497Srupmap_mapdev(vm_paddr_t pa, vm_size_t size) 4977151497Sru{ 4978151497Sru 4979151497Sru return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); 4980151497Sru} 4981151497Sru 4982151497Sruvoid * 4983151497Srupmap_mapbios(vm_paddr_t pa, vm_size_t size) 4984151497Sru{ 4985151497Sru 4986151497Sru return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); 4987151497Sru} 4988151497Sru 4989151497Sruvoid 4990151497Srupmap_unmapdev(vm_offset_t va, vm_size_t size) 4991151497Sru{ 4992151497Sru vm_offset_t base, offset, tmpva; 4993151497Sru 4994151497Sru if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) 4995151497Sru return; 4996151497Sru base = trunc_page(va); 4997151497Sru offset = va & PAGE_MASK; 4998151497Sru size = roundup(offset + size, PAGE_SIZE); 4999151497Sru for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) 5000151497Sru pmap_kremove(tmpva); 5001151497Sru pmap_invalidate_range(kernel_pmap, va, tmpva); 5002151497Sru kmem_free(kernel_map, base, size); 5003151497Sru} 5004151497Sru 5005151497Sru/* 5006151497Sru * Sets the memory attribute for the specified page. 5007151497Sru */ 5008151497Sruvoid 5009151497Srupmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 5010151497Sru{ 5011151497Sru 5012151497Sru m->md.pat_mode = ma; 5013151497Sru if ((m->flags & PG_FICTITIOUS) != 0) 5014151497Sru return; 5015151497Sru 5016151497Sru /* 5017151497Sru * If "m" is a normal page, flush it from the cache. 5018151497Sru * See pmap_invalidate_cache_range(). 5019151497Sru * 5020151497Sru * First, try to find an existing mapping of the page by sf 5021151497Sru * buffer. sf_buf_invalidate_cache() modifies mapping and 5022151497Sru * flushes the cache. 5023151497Sru */ 5024151497Sru if (sf_buf_invalidate_cache(m)) 5025151497Sru return; 5026151497Sru 5027151497Sru /* 5028151497Sru * If page is not mapped by sf buffer, but CPU does not 5029151497Sru * support self snoop, map the page transient and do 5030151497Sru * invalidation. In the worst case, whole cache is flushed by 5031151497Sru * pmap_invalidate_cache_range(). 5032151497Sru */ 5033151497Sru if ((cpu_feature & CPUID_SS) == 0) 5034151497Sru pmap_flush_page(m); 5035151497Sru} 5036151497Sru 5037151497Srustatic void 5038151497Srupmap_flush_page(vm_page_t m) 5039151497Sru{ 5040151497Sru struct sysmaps *sysmaps; 5041151497Sru vm_offset_t sva, eva; 5042151497Sru 5043151497Sru if ((cpu_feature & CPUID_CLFSH) != 0) { 5044151497Sru sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 5045151497Sru mtx_lock(&sysmaps->lock); 5046151497Sru if (*sysmaps->CMAP2) 5047151497Sru panic("pmap_flush_page: CMAP2 busy"); 5048151497Sru sched_pin(); 5049151497Sru *sysmaps->CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | 5050151497Sru PG_A | PG_M | pmap_cache_bits(m->md.pat_mode, 0); 5051151497Sru invlcaddr(sysmaps->CADDR2); 5052151497Sru sva = (vm_offset_t)sysmaps->CADDR2; 5053151497Sru eva = sva + PAGE_SIZE; 5054151497Sru 5055151497Sru /* 5056151497Sru * Use mfence despite the ordering implied by 5057151497Sru * mtx_{un,}lock() because clflush is not guaranteed 5058151497Sru * to be ordered by any other instruction. 5059151497Sru */ 5060151497Sru mfence(); 5061151497Sru for (; sva < eva; sva += cpu_clflush_line_size) 5062151497Sru clflush(sva); 5063151497Sru mfence(); 5064151497Sru *sysmaps->CMAP2 = 0; 5065151497Sru sched_unpin(); 5066151497Sru mtx_unlock(&sysmaps->lock); 5067151497Sru } else 5068151497Sru pmap_invalidate_cache(); 5069151497Sru} 5070151497Sru 5071151497Sru/* 5072151497Sru * Changes the specified virtual address range's memory type to that given by 5073151497Sru * the parameter "mode". The specified virtual address range must be 5074151497Sru * completely contained within either the kernel map. 5075151497Sru * 5076151497Sru * Returns zero if the change completed successfully, and either EINVAL or 5077151497Sru * ENOMEM if the change failed. Specifically, EINVAL is returned if some part 5078151497Sru * of the virtual address range was not mapped, and ENOMEM is returned if 5079151497Sru * there was insufficient memory available to complete the change. 5080151497Sru */ 5081151497Sruint 5082151497Srupmap_change_attr(vm_offset_t va, vm_size_t size, int mode) 5083151497Sru{ 5084151497Sru vm_offset_t base, offset, tmpva; 5085151497Sru pd_entry_t *pde; 5086151497Sru pt_entry_t *pte; 5087151497Sru int cache_bits_pte, cache_bits_pde; 5088151497Sru boolean_t changed; 5089151497Sru 5090151497Sru base = trunc_page(va); 5091151497Sru offset = va & PAGE_MASK; 5092151497Sru size = roundup(offset + size, PAGE_SIZE); 5093151497Sru 5094151497Sru /* 5095151497Sru * Only supported on kernel virtual addresses above the recursive map. 5096151497Sru */ 5097151497Sru if (base < VM_MIN_KERNEL_ADDRESS) 5098151497Sru return (EINVAL); 5099151497Sru 5100151497Sru cache_bits_pde = pmap_cache_bits(mode, 1); 5101151497Sru cache_bits_pte = pmap_cache_bits(mode, 0); 5102151497Sru changed = FALSE; 5103151497Sru 5104151497Sru /* 5105151497Sru * Pages that aren't mapped aren't supported. Also break down 5106151497Sru * 2/4MB pages into 4KB pages if required. 5107151497Sru */ 5108151497Sru PMAP_LOCK(kernel_pmap); 5109151497Sru for (tmpva = base; tmpva < base + size; ) { 5110151497Sru pde = pmap_pde(kernel_pmap, tmpva); 5111151497Sru if (*pde == 0) { 5112151497Sru PMAP_UNLOCK(kernel_pmap); 5113151497Sru return (EINVAL); 5114151497Sru } 5115151497Sru if (*pde & PG_PS) { 5116151497Sru /* 5117151497Sru * If the current 2/4MB page already has 5118151497Sru * the required memory type, then we need not 5119151497Sru * demote this page. Just increment tmpva to 5120151497Sru * the next 2/4MB page frame. 5121151497Sru */ 5122151497Sru if ((*pde & PG_PDE_CACHE) == cache_bits_pde) { 5123151497Sru tmpva = trunc_4mpage(tmpva) + NBPDR; 5124151497Sru continue; 5125151497Sru } 5126151497Sru 5127151497Sru /* 5128151497Sru * If the current offset aligns with a 2/4MB 5129151497Sru * page frame and there is at least 2/4MB left 5130151497Sru * within the range, then we need not break 5131151497Sru * down this page into 4KB pages. 5132151497Sru */ 5133151497Sru if ((tmpva & PDRMASK) == 0 && 5134151497Sru tmpva + PDRMASK < base + size) { 5135151497Sru tmpva += NBPDR; 5136151497Sru continue; 5137151497Sru } 5138151497Sru if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) { 5139151497Sru PMAP_UNLOCK(kernel_pmap); 5140151497Sru return (ENOMEM); 5141151497Sru } 5142151497Sru } 5143151497Sru pte = vtopte(tmpva); 5144151497Sru if (*pte == 0) { 5145151497Sru PMAP_UNLOCK(kernel_pmap); 5146151497Sru return (EINVAL); 5147151497Sru } 5148151497Sru tmpva += PAGE_SIZE; 5149151497Sru } 5150151497Sru PMAP_UNLOCK(kernel_pmap); 5151151497Sru 5152151497Sru /* 5153151497Sru * Ok, all the pages exist, so run through them updating their 5154151497Sru * cache mode if required. 5155151497Sru */ 5156151497Sru for (tmpva = base; tmpva < base + size; ) { 5157151497Sru pde = pmap_pde(kernel_pmap, tmpva); 5158151497Sru if (*pde & PG_PS) { 5159151497Sru if ((*pde & PG_PDE_CACHE) != cache_bits_pde) { 5160151497Sru pmap_pde_attr(pde, cache_bits_pde); 5161151497Sru changed = TRUE; 5162151497Sru } 5163151497Sru tmpva = trunc_4mpage(tmpva) + NBPDR; 5164151497Sru } else { 5165151497Sru pte = vtopte(tmpva); 5166151497Sru if ((*pte & PG_PTE_CACHE) != cache_bits_pte) { 5167151497Sru pmap_pte_attr(pte, cache_bits_pte); 5168151497Sru changed = TRUE; 5169151497Sru } 5170151497Sru tmpva += PAGE_SIZE; 5171151497Sru } 5172151497Sru } 5173151497Sru 5174151497Sru /* 5175151497Sru * Flush CPU caches to make sure any data isn't cached that 5176151497Sru * shouldn't be, etc. 5177151497Sru */ 5178151497Sru if (changed) { 5179151497Sru pmap_invalidate_range(kernel_pmap, base, tmpva); 5180151497Sru pmap_invalidate_cache_range(base, tmpva); 5181151497Sru } 5182151497Sru return (0); 5183151497Sru} 5184151497Sru 5185151497Sru/* 5186151497Sru * perform the pmap work for mincore 5187151497Sru */ 5188151497Sruint 5189151497Srupmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 5190151497Sru{ 5191151497Sru pd_entry_t *pdep; 5192151497Sru pt_entry_t *ptep, pte; 5193151497Sru vm_paddr_t pa; 5194151497Sru int val; 5195151497Sru 5196151497Sru PMAP_LOCK(pmap); 5197151497Sruretry: 5198151497Sru pdep = pmap_pde(pmap, addr); 5199151497Sru if (*pdep != 0) { 5200151497Sru if (*pdep & PG_PS) { 5201151497Sru pte = *pdep; 5202151497Sru /* Compute the physical address of the 4KB page. */ 5203151497Sru pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & 5204151497Sru PG_FRAME; 5205151497Sru val = MINCORE_SUPER; 5206151497Sru } else { 5207151497Sru ptep = pmap_pte(pmap, addr); 5208151497Sru pte = *ptep; 5209151497Sru pmap_pte_release(ptep); 5210151497Sru pa = pte & PG_FRAME; 5211151497Sru val = 0; 5212151497Sru } 5213151497Sru } else { 5214151497Sru pte = 0; 5215151497Sru pa = 0; 5216151497Sru val = 0; 5217151497Sru } 5218151497Sru if ((pte & PG_V) != 0) { 5219151497Sru val |= MINCORE_INCORE; 5220151497Sru if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 5221151497Sru val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 5222151497Sru if ((pte & PG_A) != 0) 5223151497Sru val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 5224151497Sru } 5225151497Sru if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 5226151497Sru (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 5227151497Sru (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { 5228151497Sru /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 5229151497Sru if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 5230151497Sru goto retry; 5231151497Sru } else 5232151497Sru PA_UNLOCK_COND(*locked_pa); 5233151497Sru PMAP_UNLOCK(pmap); 5234151497Sru return (val); 5235151497Sru} 5236151497Sru 5237151497Sruvoid 5238151497Srupmap_activate(struct thread *td) 5239151497Sru{ 5240151497Sru pmap_t pmap, oldpmap; 5241151497Sru u_int cpuid; 5242151497Sru u_int32_t cr3; 5243151497Sru 5244151497Sru critical_enter(); 5245151497Sru pmap = vmspace_pmap(td->td_proc->p_vmspace); 5246151497Sru oldpmap = PCPU_GET(curpmap); 5247151497Sru cpuid = PCPU_GET(cpuid); 5248151497Sru#if defined(SMP) 5249151497Sru CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); 5250151497Sru CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 5251151497Sru#else 5252151497Sru CPU_CLR(cpuid, &oldpmap->pm_active); 5253151497Sru CPU_SET(cpuid, &pmap->pm_active); 5254151497Sru#endif 5255151497Sru#ifdef PAE 5256151497Sru cr3 = vtophys(pmap->pm_pdpt); 5257151497Sru#else 5258151497Sru cr3 = vtophys(pmap->pm_pdir); 5259151497Sru#endif 5260151497Sru /* 5261151497Sru * pmap_activate is for the current thread on the current cpu 5262151497Sru */ 5263151497Sru td->td_pcb->pcb_cr3 = cr3; 5264151497Sru load_cr3(cr3); 5265151497Sru PCPU_SET(curpmap, pmap); 5266151497Sru critical_exit(); 5267151497Sru} 5268151497Sru 5269151497Sruvoid 5270151497Srupmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 5271151497Sru{ 5272151497Sru} 5273151497Sru 5274151497Sru/* 5275151497Sru * Increase the starting virtual address of the given mapping if a 5276151497Sru * different alignment might result in more superpage mappings. 5277151497Sru */ 5278151497Sruvoid 5279151497Srupmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 5280151497Sru vm_offset_t *addr, vm_size_t size) 5281151497Sru{ 5282151497Sru vm_offset_t superpage_offset; 5283151497Sru 5284151497Sru if (size < NBPDR) 5285151497Sru return; 5286151497Sru if (object != NULL && (object->flags & OBJ_COLORED) != 0) 5287151497Sru offset += ptoa(object->pg_color); 5288151497Sru superpage_offset = offset & PDRMASK; 5289151497Sru if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || 5290151497Sru (*addr & PDRMASK) == superpage_offset) 5291151497Sru return; 5292151497Sru if ((*addr & PDRMASK) < superpage_offset) 5293151497Sru *addr = (*addr & ~PDRMASK) + superpage_offset; 5294151497Sru else 5295151497Sru *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; 5296151497Sru} 5297151497Sru 5298151497Sru 5299151497Sru#if defined(PMAP_DEBUG) 5300151497Srupmap_pid_dump(int pid) 5301151497Sru{ 5302151497Sru pmap_t pmap; 5303151497Sru struct proc *p; 5304151497Sru int npte = 0; 5305151497Sru int index; 5306151497Sru 5307151497Sru sx_slock(&allproc_lock); 5308151497Sru FOREACH_PROC_IN_SYSTEM(p) { 5309151497Sru if (p->p_pid != pid) 5310151497Sru continue; 5311151497Sru 5312151497Sru if (p->p_vmspace) { 5313151497Sru int i,j; 5314151497Sru index = 0; 5315151497Sru pmap = vmspace_pmap(p->p_vmspace); 5316151497Sru for (i = 0; i < NPDEPTD; i++) { 5317151497Sru pd_entry_t *pde; 5318151497Sru pt_entry_t *pte; 5319151497Sru vm_offset_t base = i << PDRSHIFT; 5320151497Sru 5321151497Sru pde = &pmap->pm_pdir[i]; 5322151497Sru if (pde && pmap_pde_v(pde)) { 5323151497Sru for (j = 0; j < NPTEPG; j++) { 5324151497Sru vm_offset_t va = base + (j << PAGE_SHIFT); 5325151497Sru if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 5326151497Sru if (index) { 5327151497Sru index = 0; 5328151497Sru printf("\n"); 5329151497Sru } 5330151497Sru sx_sunlock(&allproc_lock); 5331151497Sru return (npte); 5332151497Sru } 5333151497Sru pte = pmap_pte(pmap, va); 5334151497Sru if (pte && pmap_pte_v(pte)) { 5335151497Sru pt_entry_t pa; 5336151497Sru vm_page_t m; 5337151497Sru pa = *pte; 5338151497Sru m = PHYS_TO_VM_PAGE(pa & PG_FRAME); 5339151497Sru printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 5340151497Sru va, pa, m->hold_count, m->wire_count, m->flags); 5341151497Sru npte++; 5342151497Sru index++; 5343151497Sru if (index >= 2) { 5344151497Sru index = 0; 5345151497Sru printf("\n"); 5346151497Sru } else { 5347151497Sru printf(" "); 5348151497Sru } 5349151497Sru } 5350151497Sru } 5351151497Sru } 5352151497Sru } 5353151497Sru } 5354151497Sru } 5355151497Sru sx_sunlock(&allproc_lock); 5356151497Sru return (npte); 5357151497Sru} 5358151497Sru#endif 5359151497Sru 5360151497Sru#if defined(DEBUG) 5361151497Sru 5362151497Srustatic void pads(pmap_t pm); 5363151497Sruvoid pmap_pvdump(vm_paddr_t pa); 5364151497Sru 5365151497Sru/* print address space of pmap*/ 5366151497Srustatic void 5367151497Srupads(pmap_t pm) 5368151497Sru{ 5369151497Sru int i, j; 5370151497Sru vm_paddr_t va; 5371151497Sru pt_entry_t *ptep; 5372151497Sru 5373151497Sru if (pm == kernel_pmap) 5374151497Sru return; 5375151497Sru for (i = 0; i < NPDEPTD; i++) 5376151497Sru if (pm->pm_pdir[i]) 5377151497Sru for (j = 0; j < NPTEPG; j++) { 5378151497Sru va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 5379151497Sru if (pm == kernel_pmap && va < KERNBASE) 5380151497Sru continue; 5381151497Sru if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 5382151497Sru continue; 5383151497Sru ptep = pmap_pte(pm, va); 5384151497Sru if (pmap_pte_v(ptep)) 5385151497Sru printf("%x:%x ", va, *ptep); 5386151497Sru }; 5387151497Sru 5388151497Sru} 5389151497Sru 5390151497Sruvoid 5391151497Srupmap_pvdump(vm_paddr_t pa) 5392151497Sru{ 5393151497Sru pv_entry_t pv; 5394151497Sru pmap_t pmap; 5395151497Sru vm_page_t m; 5396151497Sru 5397151497Sru printf("pa %x", pa); 5398151497Sru m = PHYS_TO_VM_PAGE(pa); 5399151497Sru TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 5400151497Sru pmap = PV_PMAP(pv); 5401151497Sru printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); 5402151497Sru pads(pmap); 5403151497Sru } 5404151497Sru printf(" "); 5405151497Sru} 5406151497Sru#endif 5407151497Sru