1181641Skmacy/*- 2181641Skmacy * Copyright (c) 1991 Regents of the University of California. 3181641Skmacy * All rights reserved. 4181641Skmacy * Copyright (c) 1994 John S. Dyson 5181641Skmacy * All rights reserved. 6181641Skmacy * Copyright (c) 1994 David Greenman 7181641Skmacy * All rights reserved. 8181641Skmacy * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu> 9181641Skmacy * All rights reserved. 10181641Skmacy * 11181641Skmacy * This code is derived from software contributed to Berkeley by 12181641Skmacy * the Systems Programming Group of the University of Utah Computer 13181641Skmacy * Science Department and William Jolitz of UUNET Technologies Inc. 14181641Skmacy * 15181641Skmacy * Redistribution and use in source and binary forms, with or without 16181641Skmacy * modification, are permitted provided that the following conditions 17181641Skmacy * are met: 18181641Skmacy * 1. Redistributions of source code must retain the above copyright 19181641Skmacy * notice, this list of conditions and the following disclaimer. 20181641Skmacy * 2. Redistributions in binary form must reproduce the above copyright 21181641Skmacy * notice, this list of conditions and the following disclaimer in the 22181641Skmacy * documentation and/or other materials provided with the distribution. 23181641Skmacy * 3. All advertising materials mentioning features or use of this software 24181641Skmacy * must display the following acknowledgement: 25181641Skmacy * This product includes software developed by the University of 26181641Skmacy * California, Berkeley and its contributors. 27181641Skmacy * 4. Neither the name of the University nor the names of its contributors 28181641Skmacy * may be used to endorse or promote products derived from this software 29181641Skmacy * without specific prior written permission. 30181641Skmacy * 31181641Skmacy * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32181641Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33181641Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34181641Skmacy * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35181641Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36181641Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37181641Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38181641Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39181641Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40181641Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41181641Skmacy * SUCH DAMAGE. 42181641Skmacy * 43181641Skmacy * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44181641Skmacy */ 45181641Skmacy/*- 46181641Skmacy * Copyright (c) 2003 Networks Associates Technology, Inc. 47181641Skmacy * All rights reserved. 48181641Skmacy * 49181641Skmacy * This software was developed for the FreeBSD Project by Jake Burkholder, 50181641Skmacy * Safeport Network Services, and Network Associates Laboratories, the 51181641Skmacy * Security Research Division of Network Associates, Inc. under 52181641Skmacy * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 53181641Skmacy * CHATS research program. 54181641Skmacy * 55181641Skmacy * Redistribution and use in source and binary forms, with or without 56181641Skmacy * modification, are permitted provided that the following conditions 57181641Skmacy * are met: 58181641Skmacy * 1. Redistributions of source code must retain the above copyright 59181641Skmacy * notice, this list of conditions and the following disclaimer. 60181641Skmacy * 2. Redistributions in binary form must reproduce the above copyright 61181641Skmacy * notice, this list of conditions and the following disclaimer in the 62181641Skmacy * documentation and/or other materials provided with the distribution. 63181641Skmacy * 64181641Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 65181641Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66181641Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67181641Skmacy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 68181641Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69181641Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70181641Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71181641Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72181641Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73181641Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74181641Skmacy * SUCH DAMAGE. 75181641Skmacy */ 76181641Skmacy 77181641Skmacy#include <sys/cdefs.h> 78181641Skmacy__FBSDID("$FreeBSD$"); 79181641Skmacy 80181641Skmacy/* 81181641Skmacy * Manages physical address maps. 82181641Skmacy * 83181641Skmacy * Since the information managed by this module is 84181641Skmacy * also stored by the logical address mapping module, 85181641Skmacy * this module may throw away valid virtual-to-physical 86181641Skmacy * mappings at almost any time. However, invalidations 87181641Skmacy * of virtual-to-physical mappings must be done as 88181641Skmacy * requested. 89181641Skmacy * 90181641Skmacy * In order to cope with hardware architectures which 91181641Skmacy * make virtual-to-physical map invalidates expensive, 92181641Skmacy * this module may delay invalidate or reduced protection 93181641Skmacy * operations until such time as they are actually 94181641Skmacy * necessary. This module is given full information as 95181641Skmacy * to which processors are currently using which maps, 96181641Skmacy * and to when physical maps must be made correct. 97181641Skmacy */ 98181641Skmacy 99181641Skmacy#include "opt_cpu.h" 100181641Skmacy#include "opt_pmap.h" 101181641Skmacy#include "opt_smp.h" 102181641Skmacy#include "opt_xbox.h" 103181641Skmacy 104181641Skmacy#include <sys/param.h> 105181641Skmacy#include <sys/systm.h> 106181641Skmacy#include <sys/kernel.h> 107181641Skmacy#include <sys/ktr.h> 108181641Skmacy#include <sys/lock.h> 109181641Skmacy#include <sys/malloc.h> 110181641Skmacy#include <sys/mman.h> 111181641Skmacy#include <sys/msgbuf.h> 112181641Skmacy#include <sys/mutex.h> 113181641Skmacy#include <sys/proc.h> 114241498Salc#include <sys/rwlock.h> 115195949Skib#include <sys/sf_buf.h> 116181641Skmacy#include <sys/sx.h> 117181641Skmacy#include <sys/vmmeter.h> 118181641Skmacy#include <sys/sched.h> 119181641Skmacy#include <sys/sysctl.h> 120181641Skmacy#ifdef SMP 121181641Skmacy#include <sys/smp.h> 122228923Salc#else 123228923Salc#include <sys/cpuset.h> 124181641Skmacy#endif 125181641Skmacy 126181641Skmacy#include <vm/vm.h> 127181641Skmacy#include <vm/vm_param.h> 128181641Skmacy#include <vm/vm_kern.h> 129181641Skmacy#include <vm/vm_page.h> 130181641Skmacy#include <vm/vm_map.h> 131181641Skmacy#include <vm/vm_object.h> 132181641Skmacy#include <vm/vm_extern.h> 133181641Skmacy#include <vm/vm_pageout.h> 134181641Skmacy#include <vm/vm_pager.h> 135181641Skmacy#include <vm/uma.h> 136181641Skmacy 137181641Skmacy#include <machine/cpu.h> 138181641Skmacy#include <machine/cputypes.h> 139181641Skmacy#include <machine/md_var.h> 140181641Skmacy#include <machine/pcb.h> 141181641Skmacy#include <machine/specialreg.h> 142181641Skmacy#ifdef SMP 143181641Skmacy#include <machine/smp.h> 144181641Skmacy#endif 145181641Skmacy 146181641Skmacy#ifdef XBOX 147181641Skmacy#include <machine/xbox.h> 148181641Skmacy#endif 149181641Skmacy 150181641Skmacy#include <xen/interface/xen.h> 151186557Skmacy#include <xen/hypervisor.h> 152181641Skmacy#include <machine/xen/hypercall.h> 153181641Skmacy#include <machine/xen/xenvar.h> 154181641Skmacy#include <machine/xen/xenfunc.h> 155181641Skmacy 156181641Skmacy#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 157181641Skmacy#define CPU_ENABLE_SSE 158181641Skmacy#endif 159181641Skmacy 160181641Skmacy#ifndef PMAP_SHPGPERPROC 161181641Skmacy#define PMAP_SHPGPERPROC 200 162181641Skmacy#endif 163181641Skmacy 164208651Salc#define DIAGNOSTIC 165181641Skmacy 166208651Salc#if !defined(DIAGNOSTIC) 167204041Sed#ifdef __GNUC_GNU_INLINE__ 168208651Salc#define PMAP_INLINE __attribute__((__gnu_inline__)) inline 169204041Sed#else 170202628Sed#define PMAP_INLINE extern inline 171204041Sed#endif 172181641Skmacy#else 173181641Skmacy#define PMAP_INLINE 174181641Skmacy#endif 175181641Skmacy 176181641Skmacy#ifdef PV_STATS 177181641Skmacy#define PV_STAT(x) do { x ; } while (0) 178181641Skmacy#else 179181641Skmacy#define PV_STAT(x) do { } while (0) 180181641Skmacy#endif 181181641Skmacy 182181641Skmacy/* 183181641Skmacy * Get PDEs and PTEs for user/kernel address space 184181641Skmacy */ 185181641Skmacy#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 186181641Skmacy#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 187181641Skmacy 188181641Skmacy#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 189181641Skmacy#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 190181641Skmacy#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 191181641Skmacy#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 192181641Skmacy#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 193181641Skmacy 194181641Skmacy#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 195181641Skmacy 196216960Scperciva#define HAMFISTED_LOCKING 197216960Scperciva#ifdef HAMFISTED_LOCKING 198216960Scpercivastatic struct mtx createdelete_lock; 199216960Scperciva#endif 200216960Scperciva 201181641Skmacystruct pmap kernel_pmap_store; 202181641SkmacyLIST_HEAD(pmaplist, pmap); 203181641Skmacystatic struct pmaplist allpmaps; 204181641Skmacystatic struct mtx allpmaps_lock; 205181641Skmacy 206181641Skmacyvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 207181641Skmacyvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 208181641Skmacyint pgeflag = 0; /* PG_G or-in */ 209181641Skmacyint pseflag = 0; /* PG_PS or-in */ 210181641Skmacy 211182902Skmacyint nkpt; 212181641Skmacyvm_offset_t kernel_vm_end; 213181641Skmacyextern u_int32_t KERNend; 214181641Skmacy 215181641Skmacy#ifdef PAE 216181641Skmacypt_entry_t pg_nx; 217181641Skmacy#endif 218181641Skmacy 219228923Salcstatic SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 220228923Salc 221196726Sadrianstatic int pat_works; /* Is page attribute table sane? */ 222196726Sadrian 223181641Skmacy/* 224241498Salc * This lock is defined as static in other pmap implementations. It cannot, 225241498Salc * however, be defined as static here, because it is (ab)used to serialize 226241498Salc * queued page table changes in other sources files. 227241498Salc */ 228241498Salcstruct rwlock pvh_global_lock; 229241498Salc 230241498Salc/* 231181641Skmacy * Data for the pv entry allocation mechanism 232181641Skmacy */ 233236240Salcstatic TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 234181641Skmacystatic int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 235181641Skmacystatic int shpgperproc = PMAP_SHPGPERPROC; 236181641Skmacy 237181641Skmacystruct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 238181641Skmacyint pv_maxchunks; /* How many chunks we have KVA for */ 239181641Skmacyvm_offset_t pv_vafree; /* freelist stored in the PTE */ 240181641Skmacy 241181641Skmacy/* 242181641Skmacy * All those kernel PT submaps that BSD is so fond of 243181641Skmacy */ 244181641Skmacystruct sysmaps { 245181641Skmacy struct mtx lock; 246181641Skmacy pt_entry_t *CMAP1; 247181641Skmacy pt_entry_t *CMAP2; 248181641Skmacy caddr_t CADDR1; 249181641Skmacy caddr_t CADDR2; 250181641Skmacy}; 251181641Skmacystatic struct sysmaps sysmaps_pcpu[MAXCPU]; 252267964Sjhbpt_entry_t *CMAP3; 253204160Skmacycaddr_t ptvmmap = 0; 254267964Sjhbcaddr_t CADDR3; 255181641Skmacystruct msgbuf *msgbufp = 0; 256181641Skmacy 257181641Skmacy/* 258181641Skmacy * Crashdump maps. 259181641Skmacy */ 260181641Skmacystatic caddr_t crashdumpmap; 261181641Skmacy 262181641Skmacystatic pt_entry_t *PMAP1 = 0, *PMAP2; 263181641Skmacystatic pt_entry_t *PADDR1 = 0, *PADDR2; 264181641Skmacy#ifdef SMP 265181641Skmacystatic int PMAP1cpu; 266181641Skmacystatic int PMAP1changedcpu; 267181641SkmacySYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 268181641Skmacy &PMAP1changedcpu, 0, 269181641Skmacy "Number of times pmap_pte_quick changed CPU with same PMAP1"); 270181641Skmacy#endif 271181641Skmacystatic int PMAP1changed; 272181641SkmacySYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 273181641Skmacy &PMAP1changed, 0, 274181641Skmacy "Number of times pmap_pte_quick changed PMAP1"); 275181641Skmacystatic int PMAP1unchanged; 276181641SkmacySYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 277181641Skmacy &PMAP1unchanged, 0, 278181641Skmacy "Number of times pmap_pte_quick didn't change PMAP1"); 279181641Skmacystatic struct mtx PMAP2mutex; 280181641Skmacy 281236378Salcstatic void free_pv_chunk(struct pv_chunk *pc); 282181641Skmacystatic void free_pv_entry(pmap_t pmap, pv_entry_t pv); 283236291Salcstatic pv_entry_t get_pv_entry(pmap_t pmap, boolean_t try); 284208651Salcstatic void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 285208651Salcstatic pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 286208651Salc vm_offset_t va); 287181641Skmacy 288181641Skmacystatic vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va, 289181641Skmacy vm_page_t m, vm_prot_t prot, vm_page_t mpte); 290228923Salcstatic void pmap_flush_page(vm_page_t m); 291228923Salcstatic void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); 292181641Skmacystatic int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, 293181641Skmacy vm_page_t *free); 294181641Skmacystatic void pmap_remove_page(struct pmap *pmap, vm_offset_t va, 295181641Skmacy vm_page_t *free); 296181641Skmacystatic void pmap_remove_entry(struct pmap *pmap, vm_page_t m, 297181641Skmacy vm_offset_t va); 298181641Skmacystatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 299181641Skmacy vm_page_t m); 300181641Skmacy 301270439Skibstatic vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags); 302181641Skmacy 303270439Skibstatic vm_page_t _pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags); 304240126Salcstatic void _pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free); 305181641Skmacystatic pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); 306181641Skmacystatic void pmap_pte_release(pt_entry_t *pte); 307181641Skmacystatic int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); 308181641Skmacystatic boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr); 309181641Skmacy 310196725Sadrianstatic __inline void pagezero(void *page); 311181747Skmacy 312181641SkmacyCTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 313181641SkmacyCTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 314181641Skmacy 315181641Skmacy/* 316181641Skmacy * If you get an error here, then you set KVA_PAGES wrong! See the 317181641Skmacy * description of KVA_PAGES in sys/i386/include/pmap.h. It must be 318181641Skmacy * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. 319181641Skmacy */ 320181641SkmacyCTASSERT(KERNBASE % (1 << 24) == 0); 321181641Skmacy 322181641Skmacyvoid 323181641Skmacypd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type) 324181641Skmacy{ 325181641Skmacy vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]); 326181641Skmacy 327181641Skmacy switch (type) { 328181641Skmacy case SH_PD_SET_VA: 329181641Skmacy#if 0 330181641Skmacy xen_queue_pt_update(shadow_pdir_ma, 331181641Skmacy xpmap_ptom(val & ~(PG_RW))); 332181641Skmacy#endif 333181641Skmacy xen_queue_pt_update(pdir_ma, 334181641Skmacy xpmap_ptom(val)); 335181641Skmacy break; 336181641Skmacy case SH_PD_SET_VA_MA: 337181641Skmacy#if 0 338181641Skmacy xen_queue_pt_update(shadow_pdir_ma, 339181641Skmacy val & ~(PG_RW)); 340181641Skmacy#endif 341181641Skmacy xen_queue_pt_update(pdir_ma, val); 342181641Skmacy break; 343181641Skmacy case SH_PD_SET_VA_CLEAR: 344181641Skmacy#if 0 345181641Skmacy xen_queue_pt_update(shadow_pdir_ma, 0); 346181641Skmacy#endif 347181641Skmacy xen_queue_pt_update(pdir_ma, 0); 348181641Skmacy break; 349181641Skmacy } 350181641Skmacy} 351181641Skmacy 352181641Skmacy/* 353181641Skmacy * Bootstrap the system enough to run with virtual memory. 354181641Skmacy * 355181641Skmacy * On the i386 this is called after mapping has already been enabled 356181641Skmacy * and just syncs the pmap module with what has already been done. 357181641Skmacy * [We can't call it easily with mapping off since the kernel is not 358181641Skmacy * mapped with PA == VA, hence we would have to relocate every address 359181641Skmacy * from the linked base (virtual) address "KERNBASE" to the actual 360181641Skmacy * (physical) address starting relative to 0] 361181641Skmacy */ 362181641Skmacyvoid 363181641Skmacypmap_bootstrap(vm_paddr_t firstaddr) 364181641Skmacy{ 365181641Skmacy vm_offset_t va; 366181641Skmacy pt_entry_t *pte, *unused; 367181641Skmacy struct sysmaps *sysmaps; 368181641Skmacy int i; 369181641Skmacy 370181641Skmacy /* 371228923Salc * Initialize the first available kernel virtual address. However, 372228923Salc * using "firstaddr" may waste a few pages of the kernel virtual 373228923Salc * address space, because locore may not have mapped every physical 374228923Salc * page that it allocated. Preferably, locore would provide a first 375228923Salc * unused virtual address in addition to "firstaddr". 376181641Skmacy */ 377181641Skmacy virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 378181641Skmacy 379181641Skmacy virtual_end = VM_MAX_KERNEL_ADDRESS; 380181641Skmacy 381181641Skmacy /* 382181641Skmacy * Initialize the kernel pmap (which is statically allocated). 383181641Skmacy */ 384181641Skmacy PMAP_LOCK_INIT(kernel_pmap); 385181641Skmacy kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 386181641Skmacy#ifdef PAE 387181641Skmacy kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); 388181641Skmacy#endif 389222813Sattilio CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ 390181641Skmacy TAILQ_INIT(&kernel_pmap->pm_pvchunk); 391241498Salc 392241498Salc /* 393241498Salc * Initialize the global pv list lock. 394241498Salc */ 395241498Salc rw_init_flags(&pvh_global_lock, "pmap pv global", RW_RECURSE); 396241498Salc 397181641Skmacy LIST_INIT(&allpmaps); 398181641Skmacy mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 399181641Skmacy mtx_lock_spin(&allpmaps_lock); 400181641Skmacy LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 401181641Skmacy mtx_unlock_spin(&allpmaps_lock); 402183342Skmacy if (nkpt == 0) 403183342Skmacy nkpt = NKPT; 404181641Skmacy 405181641Skmacy /* 406181641Skmacy * Reserve some special page table entries/VA space for temporary 407181641Skmacy * mapping of pages. 408181641Skmacy */ 409181641Skmacy#define SYSMAP(c, p, v, n) \ 410181641Skmacy v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 411181641Skmacy 412181641Skmacy va = virtual_avail; 413181641Skmacy pte = vtopte(va); 414181641Skmacy 415181641Skmacy /* 416181641Skmacy * CMAP1/CMAP2 are used for zeroing and copying pages. 417181641Skmacy * CMAP3 is used for the idle process page zeroing. 418181641Skmacy */ 419181641Skmacy for (i = 0; i < MAXCPU; i++) { 420181641Skmacy sysmaps = &sysmaps_pcpu[i]; 421181641Skmacy mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); 422181641Skmacy SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) 423181641Skmacy SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) 424204160Skmacy PT_SET_MA(sysmaps->CADDR1, 0); 425204160Skmacy PT_SET_MA(sysmaps->CADDR2, 0); 426181641Skmacy } 427181641Skmacy SYSMAP(caddr_t, CMAP3, CADDR3, 1) 428181641Skmacy PT_SET_MA(CADDR3, 0); 429181641Skmacy 430181641Skmacy /* 431181641Skmacy * Crashdump maps. 432181641Skmacy */ 433181641Skmacy SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) 434181641Skmacy 435181641Skmacy /* 436181641Skmacy * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 437181641Skmacy */ 438181641Skmacy SYSMAP(caddr_t, unused, ptvmmap, 1) 439181641Skmacy 440181641Skmacy /* 441181641Skmacy * msgbufp is used to map the system message buffer. 442181641Skmacy */ 443217688Spluknet SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(msgbufsize))) 444181641Skmacy 445181641Skmacy /* 446241353Salc * PADDR1 and PADDR2 are used by pmap_pte_quick() and pmap_pte(), 447241353Salc * respectively. 448181641Skmacy */ 449228923Salc SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1) 450228923Salc SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1) 451181641Skmacy 452181641Skmacy mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); 453181641Skmacy 454181641Skmacy virtual_avail = va; 455181641Skmacy 456181641Skmacy /* 457181641Skmacy * Leave in place an identity mapping (virt == phys) for the low 1 MB 458181641Skmacy * physical memory region that is used by the ACPI wakeup code. This 459181641Skmacy * mapping must not have PG_G set. 460181641Skmacy */ 461181641Skmacy#ifndef XEN 462181641Skmacy /* 463181641Skmacy * leave here deliberately to show that this is not supported 464181641Skmacy */ 465181641Skmacy#ifdef XBOX 466181641Skmacy /* FIXME: This is gross, but needed for the XBOX. Since we are in such 467181641Skmacy * an early stadium, we cannot yet neatly map video memory ... :-( 468181641Skmacy * Better fixes are very welcome! */ 469181641Skmacy if (!arch_i386_is_xbox) 470181641Skmacy#endif 471181641Skmacy for (i = 1; i < NKPT; i++) 472181641Skmacy PTD[i] = 0; 473181641Skmacy 474181641Skmacy /* Initialize the PAT MSR if present. */ 475181641Skmacy pmap_init_pat(); 476181641Skmacy 477181641Skmacy /* Turn on PG_G on kernel page(s) */ 478181641Skmacy pmap_set_pg(); 479181641Skmacy#endif 480216960Scperciva 481216960Scperciva#ifdef HAMFISTED_LOCKING 482216960Scperciva mtx_init(&createdelete_lock, "pmap create/delete", NULL, MTX_DEF); 483216960Scperciva#endif 484181641Skmacy} 485181641Skmacy 486181641Skmacy/* 487181641Skmacy * Setup the PAT MSR. 488181641Skmacy */ 489181641Skmacyvoid 490181641Skmacypmap_init_pat(void) 491181641Skmacy{ 492181641Skmacy uint64_t pat_msr; 493181641Skmacy 494181641Skmacy /* Bail if this CPU doesn't implement PAT. */ 495181641Skmacy if (!(cpu_feature & CPUID_PAT)) 496181641Skmacy return; 497181641Skmacy 498196726Sadrian if (cpu_vendor_id != CPU_VENDOR_INTEL || 499197070Sjkim (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) { 500196726Sadrian /* 501196726Sadrian * Leave the indices 0-3 at the default of WB, WT, UC, and UC-. 502196726Sadrian * Program 4 and 5 as WP and WC. 503196726Sadrian * Leave 6 and 7 as UC and UC-. 504196726Sadrian */ 505196726Sadrian pat_msr = rdmsr(MSR_PAT); 506196726Sadrian pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); 507196726Sadrian pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | 508196726Sadrian PAT_VALUE(5, PAT_WRITE_COMBINING); 509196726Sadrian pat_works = 1; 510196726Sadrian } else { 511196726Sadrian /* 512196726Sadrian * Due to some Intel errata, we can only safely use the lower 4 513196726Sadrian * PAT entries. Thus, just replace PAT Index 2 with WC instead 514196726Sadrian * of UC-. 515196726Sadrian * 516196726Sadrian * Intel Pentium III Processor Specification Update 517196726Sadrian * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B 518196726Sadrian * or Mode C Paging) 519196726Sadrian * 520196726Sadrian * Intel Pentium IV Processor Specification Update 521196726Sadrian * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) 522196726Sadrian */ 523196726Sadrian pat_msr = rdmsr(MSR_PAT); 524196726Sadrian pat_msr &= ~PAT_MASK(2); 525196726Sadrian pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); 526196726Sadrian pat_works = 0; 527196726Sadrian } 528181641Skmacy wrmsr(MSR_PAT, pat_msr); 529181641Skmacy} 530181641Skmacy 531181641Skmacy/* 532181641Skmacy * Initialize a vm_page's machine-dependent fields. 533181641Skmacy */ 534181641Skmacyvoid 535181641Skmacypmap_page_init(vm_page_t m) 536181641Skmacy{ 537181641Skmacy 538181641Skmacy TAILQ_INIT(&m->md.pv_list); 539195649Salc m->md.pat_mode = PAT_WRITE_BACK; 540181641Skmacy} 541181641Skmacy 542181641Skmacy/* 543181641Skmacy * ABuse the pte nodes for unmapped kva to thread a kva freelist through. 544181641Skmacy * Requirements: 545181641Skmacy * - Must deal with pages in order to ensure that none of the PG_* bits 546181641Skmacy * are ever set, PG_V in particular. 547181641Skmacy * - Assumes we can write to ptes without pte_store() atomic ops, even 548181641Skmacy * on PAE systems. This should be ok. 549181641Skmacy * - Assumes nothing will ever test these addresses for 0 to indicate 550181641Skmacy * no mapping instead of correctly checking PG_V. 551181641Skmacy * - Assumes a vm_offset_t will fit in a pte (true for i386). 552181641Skmacy * Because PG_V is never set, there can be no mappings to invalidate. 553181641Skmacy */ 554181641Skmacystatic int ptelist_count = 0; 555181641Skmacystatic vm_offset_t 556181641Skmacypmap_ptelist_alloc(vm_offset_t *head) 557181641Skmacy{ 558181641Skmacy vm_offset_t va; 559181641Skmacy vm_offset_t *phead = (vm_offset_t *)*head; 560181641Skmacy 561181641Skmacy if (ptelist_count == 0) { 562181641Skmacy printf("out of memory!!!!!!\n"); 563181641Skmacy return (0); /* Out of memory */ 564181641Skmacy } 565181641Skmacy ptelist_count--; 566181641Skmacy va = phead[ptelist_count]; 567181641Skmacy return (va); 568181641Skmacy} 569181641Skmacy 570181641Skmacystatic void 571181641Skmacypmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 572181641Skmacy{ 573181641Skmacy vm_offset_t *phead = (vm_offset_t *)*head; 574181641Skmacy 575181641Skmacy phead[ptelist_count++] = va; 576181641Skmacy} 577181641Skmacy 578181641Skmacystatic void 579181641Skmacypmap_ptelist_init(vm_offset_t *head, void *base, int npages) 580181641Skmacy{ 581181641Skmacy int i, nstackpages; 582181641Skmacy vm_offset_t va; 583181641Skmacy vm_page_t m; 584181641Skmacy 585181641Skmacy nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t)); 586181641Skmacy for (i = 0; i < nstackpages; i++) { 587181641Skmacy va = (vm_offset_t)base + i * PAGE_SIZE; 588181641Skmacy m = vm_page_alloc(NULL, i, 589181641Skmacy VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 590181641Skmacy VM_ALLOC_ZERO); 591181641Skmacy pmap_qenter(va, &m, 1); 592181641Skmacy } 593181641Skmacy 594181641Skmacy *head = (vm_offset_t)base; 595181641Skmacy for (i = npages - 1; i >= nstackpages; i--) { 596181641Skmacy va = (vm_offset_t)base + i * PAGE_SIZE; 597181641Skmacy pmap_ptelist_free(head, va); 598181641Skmacy } 599181641Skmacy} 600181641Skmacy 601181641Skmacy 602181641Skmacy/* 603181641Skmacy * Initialize the pmap module. 604181641Skmacy * Called by vm_init, to initialize any structures that the pmap 605181641Skmacy * system needs to map virtual memory. 606181641Skmacy */ 607181641Skmacyvoid 608181641Skmacypmap_init(void) 609181641Skmacy{ 610181641Skmacy 611181641Skmacy /* 612181641Skmacy * Initialize the address space (zone) for the pv entries. Set a 613181641Skmacy * high water mark so that the system can recover from excessive 614181641Skmacy * numbers of pv entries. 615181641Skmacy */ 616181641Skmacy TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 617181641Skmacy pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 618181641Skmacy TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 619181641Skmacy pv_entry_max = roundup(pv_entry_max, _NPCPV); 620181641Skmacy pv_entry_high_water = 9 * (pv_entry_max / 10); 621181641Skmacy 622181641Skmacy pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 623254025Sjeff pv_chunkbase = (struct pv_chunk *)kva_alloc(PAGE_SIZE * pv_maxchunks); 624181641Skmacy if (pv_chunkbase == NULL) 625181641Skmacy panic("pmap_init: not enough kvm for pv chunks"); 626181641Skmacy pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 627181641Skmacy} 628181641Skmacy 629181641Skmacy 630228923SalcSYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 631228923Salc "Max number of PV entries"); 632228923SalcSYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 633228923Salc "Page share factor per proc"); 634228923Salc 635228923Salcstatic SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, 636228923Salc "2/4MB page mapping counters"); 637228923Salc 638228923Salcstatic u_long pmap_pde_mappings; 639228923SalcSYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, 640228923Salc &pmap_pde_mappings, 0, "2/4MB page mappings"); 641228923Salc 642181641Skmacy/*************************************************** 643181641Skmacy * Low level helper routines..... 644181641Skmacy ***************************************************/ 645181641Skmacy 646181641Skmacy/* 647181641Skmacy * Determine the appropriate bits to set in a PTE or PDE for a specified 648181641Skmacy * caching mode. 649181641Skmacy */ 650195949Skibint 651181641Skmacypmap_cache_bits(int mode, boolean_t is_pde) 652181641Skmacy{ 653181641Skmacy int pat_flag, pat_index, cache_bits; 654181641Skmacy 655181641Skmacy /* The PAT bit is different for PTE's and PDE's. */ 656181641Skmacy pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; 657181641Skmacy 658181641Skmacy /* If we don't support PAT, map extended modes to older ones. */ 659181641Skmacy if (!(cpu_feature & CPUID_PAT)) { 660181641Skmacy switch (mode) { 661181641Skmacy case PAT_UNCACHEABLE: 662181641Skmacy case PAT_WRITE_THROUGH: 663181641Skmacy case PAT_WRITE_BACK: 664181641Skmacy break; 665181641Skmacy case PAT_UNCACHED: 666181641Skmacy case PAT_WRITE_COMBINING: 667181641Skmacy case PAT_WRITE_PROTECTED: 668181641Skmacy mode = PAT_UNCACHEABLE; 669181641Skmacy break; 670181641Skmacy } 671181641Skmacy } 672181641Skmacy 673181641Skmacy /* Map the caching mode to a PAT index. */ 674196726Sadrian if (pat_works) { 675196726Sadrian switch (mode) { 676196726Sadrian case PAT_UNCACHEABLE: 677196726Sadrian pat_index = 3; 678196726Sadrian break; 679196726Sadrian case PAT_WRITE_THROUGH: 680196726Sadrian pat_index = 1; 681196726Sadrian break; 682196726Sadrian case PAT_WRITE_BACK: 683196726Sadrian pat_index = 0; 684196726Sadrian break; 685196726Sadrian case PAT_UNCACHED: 686196726Sadrian pat_index = 2; 687196726Sadrian break; 688196726Sadrian case PAT_WRITE_COMBINING: 689196726Sadrian pat_index = 5; 690196726Sadrian break; 691196726Sadrian case PAT_WRITE_PROTECTED: 692196726Sadrian pat_index = 4; 693196726Sadrian break; 694196726Sadrian default: 695196726Sadrian panic("Unknown caching mode %d\n", mode); 696196726Sadrian } 697196726Sadrian } else { 698196726Sadrian switch (mode) { 699196726Sadrian case PAT_UNCACHED: 700196726Sadrian case PAT_UNCACHEABLE: 701196726Sadrian case PAT_WRITE_PROTECTED: 702196726Sadrian pat_index = 3; 703196726Sadrian break; 704196726Sadrian case PAT_WRITE_THROUGH: 705196726Sadrian pat_index = 1; 706196726Sadrian break; 707196726Sadrian case PAT_WRITE_BACK: 708196726Sadrian pat_index = 0; 709196726Sadrian break; 710196726Sadrian case PAT_WRITE_COMBINING: 711196726Sadrian pat_index = 2; 712196726Sadrian break; 713196726Sadrian default: 714196726Sadrian panic("Unknown caching mode %d\n", mode); 715196726Sadrian } 716181641Skmacy } 717181641Skmacy 718181641Skmacy /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ 719181641Skmacy cache_bits = 0; 720181641Skmacy if (pat_index & 0x4) 721181641Skmacy cache_bits |= pat_flag; 722181641Skmacy if (pat_index & 0x2) 723181641Skmacy cache_bits |= PG_NC_PCD; 724181641Skmacy if (pat_index & 0x1) 725181641Skmacy cache_bits |= PG_NC_PWT; 726181641Skmacy return (cache_bits); 727181641Skmacy} 728181641Skmacy#ifdef SMP 729181641Skmacy/* 730181641Skmacy * For SMP, these functions have to use the IPI mechanism for coherence. 731181641Skmacy * 732181641Skmacy * N.B.: Before calling any of the following TLB invalidation functions, 733181641Skmacy * the calling processor must ensure that all stores updating a non- 734181641Skmacy * kernel page table are globally performed. Otherwise, another 735181641Skmacy * processor could cache an old, pre-update entry without being 736181641Skmacy * invalidated. This can happen one of two ways: (1) The pmap becomes 737181641Skmacy * active on another processor after its pm_active field is checked by 738181641Skmacy * one of the following functions but before a store updating the page 739181641Skmacy * table is globally performed. (2) The pmap becomes active on another 740181641Skmacy * processor before its pm_active field is checked but due to 741181641Skmacy * speculative loads one of the following functions stills reads the 742181641Skmacy * pmap as inactive on the other processor. 743181641Skmacy * 744181641Skmacy * The kernel page table is exempt because its pm_active field is 745181641Skmacy * immutable. The kernel page table is always active on every 746181641Skmacy * processor. 747181641Skmacy */ 748181641Skmacyvoid 749181641Skmacypmap_invalidate_page(pmap_t pmap, vm_offset_t va) 750181641Skmacy{ 751223758Sattilio cpuset_t other_cpus; 752223758Sattilio u_int cpuid; 753181641Skmacy 754181641Skmacy CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", 755181641Skmacy pmap, va); 756181641Skmacy 757181641Skmacy sched_pin(); 758222813Sattilio if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { 759181641Skmacy invlpg(va); 760181641Skmacy smp_invlpg(va); 761181641Skmacy } else { 762223758Sattilio cpuid = PCPU_GET(cpuid); 763223758Sattilio other_cpus = all_cpus; 764223758Sattilio CPU_CLR(cpuid, &other_cpus); 765223758Sattilio if (CPU_ISSET(cpuid, &pmap->pm_active)) 766181641Skmacy invlpg(va); 767222813Sattilio CPU_AND(&other_cpus, &pmap->pm_active); 768222813Sattilio if (!CPU_EMPTY(&other_cpus)) 769222813Sattilio smp_masked_invlpg(other_cpus, va); 770181641Skmacy } 771181641Skmacy sched_unpin(); 772181641Skmacy PT_UPDATES_FLUSH(); 773181641Skmacy} 774181641Skmacy 775181641Skmacyvoid 776181641Skmacypmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 777181641Skmacy{ 778223758Sattilio cpuset_t other_cpus; 779181641Skmacy vm_offset_t addr; 780223758Sattilio u_int cpuid; 781181641Skmacy 782181641Skmacy CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x", 783181641Skmacy pmap, sva, eva); 784181641Skmacy 785181641Skmacy sched_pin(); 786222813Sattilio if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { 787181641Skmacy for (addr = sva; addr < eva; addr += PAGE_SIZE) 788181641Skmacy invlpg(addr); 789181641Skmacy smp_invlpg_range(sva, eva); 790181641Skmacy } else { 791223758Sattilio cpuid = PCPU_GET(cpuid); 792223758Sattilio other_cpus = all_cpus; 793223758Sattilio CPU_CLR(cpuid, &other_cpus); 794223758Sattilio if (CPU_ISSET(cpuid, &pmap->pm_active)) 795181641Skmacy for (addr = sva; addr < eva; addr += PAGE_SIZE) 796181641Skmacy invlpg(addr); 797222813Sattilio CPU_AND(&other_cpus, &pmap->pm_active); 798222813Sattilio if (!CPU_EMPTY(&other_cpus)) 799222813Sattilio smp_masked_invlpg_range(other_cpus, sva, eva); 800181641Skmacy } 801181641Skmacy sched_unpin(); 802181641Skmacy PT_UPDATES_FLUSH(); 803181641Skmacy} 804181641Skmacy 805181641Skmacyvoid 806181641Skmacypmap_invalidate_all(pmap_t pmap) 807181641Skmacy{ 808223758Sattilio cpuset_t other_cpus; 809223758Sattilio u_int cpuid; 810181641Skmacy 811181641Skmacy CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap); 812181641Skmacy 813181641Skmacy sched_pin(); 814222813Sattilio if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { 815181641Skmacy invltlb(); 816181641Skmacy smp_invltlb(); 817181641Skmacy } else { 818223758Sattilio cpuid = PCPU_GET(cpuid); 819223758Sattilio other_cpus = all_cpus; 820223758Sattilio CPU_CLR(cpuid, &other_cpus); 821223758Sattilio if (CPU_ISSET(cpuid, &pmap->pm_active)) 822181641Skmacy invltlb(); 823222813Sattilio CPU_AND(&other_cpus, &pmap->pm_active); 824222813Sattilio if (!CPU_EMPTY(&other_cpus)) 825222813Sattilio smp_masked_invltlb(other_cpus); 826181641Skmacy } 827181641Skmacy sched_unpin(); 828181641Skmacy} 829181641Skmacy 830181641Skmacyvoid 831181641Skmacypmap_invalidate_cache(void) 832181641Skmacy{ 833181641Skmacy 834181641Skmacy sched_pin(); 835181641Skmacy wbinvd(); 836181641Skmacy smp_cache_flush(); 837181641Skmacy sched_unpin(); 838181641Skmacy} 839181641Skmacy#else /* !SMP */ 840181641Skmacy/* 841181641Skmacy * Normal, non-SMP, 486+ invalidation functions. 842181641Skmacy * We inline these within pmap.c for speed. 843181641Skmacy */ 844181641SkmacyPMAP_INLINE void 845181641Skmacypmap_invalidate_page(pmap_t pmap, vm_offset_t va) 846181641Skmacy{ 847181641Skmacy CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", 848181641Skmacy pmap, va); 849181641Skmacy 850222813Sattilio if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 851181641Skmacy invlpg(va); 852181641Skmacy PT_UPDATES_FLUSH(); 853181641Skmacy} 854181641Skmacy 855181641SkmacyPMAP_INLINE void 856181641Skmacypmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 857181641Skmacy{ 858181641Skmacy vm_offset_t addr; 859181641Skmacy 860181641Skmacy if (eva - sva > PAGE_SIZE) 861181641Skmacy CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x", 862181641Skmacy pmap, sva, eva); 863181641Skmacy 864222813Sattilio if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 865181641Skmacy for (addr = sva; addr < eva; addr += PAGE_SIZE) 866181641Skmacy invlpg(addr); 867181641Skmacy PT_UPDATES_FLUSH(); 868181641Skmacy} 869181641Skmacy 870181641SkmacyPMAP_INLINE void 871181641Skmacypmap_invalidate_all(pmap_t pmap) 872181641Skmacy{ 873181641Skmacy 874181641Skmacy CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap); 875181641Skmacy 876222813Sattilio if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) 877181641Skmacy invltlb(); 878181641Skmacy} 879181641Skmacy 880181641SkmacyPMAP_INLINE void 881181641Skmacypmap_invalidate_cache(void) 882181641Skmacy{ 883181641Skmacy 884181641Skmacy wbinvd(); 885181641Skmacy} 886181641Skmacy#endif /* !SMP */ 887181641Skmacy 888228923Salc#define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) 889228923Salc 890195949Skibvoid 891273136Skibpmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva, boolean_t force) 892195949Skib{ 893195949Skib 894273136Skib if (force) { 895273136Skib sva &= ~(vm_offset_t)cpu_clflush_line_size; 896273136Skib } else { 897273136Skib KASSERT((sva & PAGE_MASK) == 0, 898273136Skib ("pmap_invalidate_cache_range: sva not page-aligned")); 899273136Skib KASSERT((eva & PAGE_MASK) == 0, 900273136Skib ("pmap_invalidate_cache_range: eva not page-aligned")); 901273136Skib } 902195949Skib 903273136Skib if ((cpu_feature & CPUID_SS) != 0 && !force) 904195949Skib ; /* If "Self Snoop" is supported, do nothing. */ 905228923Salc else if ((cpu_feature & CPUID_CLFSH) != 0 && 906228923Salc eva - sva < PMAP_CLFLUSH_THRESHOLD) { 907195949Skib 908195949Skib /* 909195949Skib * Otherwise, do per-cache line flush. Use the mfence 910195949Skib * instruction to insure that previous stores are 911195949Skib * included in the write-back. The processor 912195949Skib * propagates flush to other processors in the cache 913195949Skib * coherence domain. 914195949Skib */ 915195949Skib mfence(); 916197046Skib for (; sva < eva; sva += cpu_clflush_line_size) 917197046Skib clflush(sva); 918195949Skib mfence(); 919195949Skib } else { 920195949Skib 921195949Skib /* 922195949Skib * No targeted cache flush methods are supported by CPU, 923228923Salc * or the supplied range is bigger than 2MB. 924228923Salc * Globally invalidate cache. 925195949Skib */ 926195949Skib pmap_invalidate_cache(); 927195949Skib } 928195949Skib} 929195949Skib 930228923Salcvoid 931228923Salcpmap_invalidate_cache_pages(vm_page_t *pages, int count) 932228923Salc{ 933228923Salc int i; 934228923Salc 935228923Salc if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || 936228923Salc (cpu_feature & CPUID_CLFSH) == 0) { 937228923Salc pmap_invalidate_cache(); 938228923Salc } else { 939228923Salc for (i = 0; i < count; i++) 940228923Salc pmap_flush_page(pages[i]); 941228923Salc } 942228923Salc} 943228923Salc 944181641Skmacy/* 945181641Skmacy * Are we current address space or kernel? N.B. We return FALSE when 946181641Skmacy * a pmap's page table is in use because a kernel thread is borrowing 947181641Skmacy * it. The borrowed page table can change spontaneously, making any 948181641Skmacy * dependence on its continued use subject to a race condition. 949181641Skmacy */ 950181641Skmacystatic __inline int 951181641Skmacypmap_is_current(pmap_t pmap) 952181641Skmacy{ 953181641Skmacy 954181641Skmacy return (pmap == kernel_pmap || 955181641Skmacy (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && 956228923Salc (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); 957181641Skmacy} 958181641Skmacy 959181641Skmacy/* 960181641Skmacy * If the given pmap is not the current or kernel pmap, the returned pte must 961181641Skmacy * be released by passing it to pmap_pte_release(). 962181641Skmacy */ 963181641Skmacypt_entry_t * 964181641Skmacypmap_pte(pmap_t pmap, vm_offset_t va) 965181641Skmacy{ 966181641Skmacy pd_entry_t newpf; 967181641Skmacy pd_entry_t *pde; 968181641Skmacy 969181641Skmacy pde = pmap_pde(pmap, va); 970181641Skmacy if (*pde & PG_PS) 971181641Skmacy return (pde); 972181641Skmacy if (*pde != 0) { 973181641Skmacy /* are we current address space or kernel? */ 974181641Skmacy if (pmap_is_current(pmap)) 975181641Skmacy return (vtopte(va)); 976181641Skmacy mtx_lock(&PMAP2mutex); 977181641Skmacy newpf = *pde & PG_FRAME; 978181641Skmacy if ((*PMAP2 & PG_FRAME) != newpf) { 979181641Skmacy PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M); 980181641Skmacy CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x", 981181641Skmacy pmap, va, (*PMAP2 & 0xffffffff)); 982181641Skmacy } 983181641Skmacy return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); 984181641Skmacy } 985228923Salc return (NULL); 986181641Skmacy} 987181641Skmacy 988181641Skmacy/* 989181641Skmacy * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte 990181641Skmacy * being NULL. 991181641Skmacy */ 992181641Skmacystatic __inline void 993181641Skmacypmap_pte_release(pt_entry_t *pte) 994181641Skmacy{ 995181641Skmacy 996181641Skmacy if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) { 997181641Skmacy CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx", 998181641Skmacy *PMAP2); 999241498Salc rw_wlock(&pvh_global_lock); 1000181641Skmacy PT_SET_VA(PMAP2, 0, TRUE); 1001241498Salc rw_wunlock(&pvh_global_lock); 1002181641Skmacy mtx_unlock(&PMAP2mutex); 1003181641Skmacy } 1004181641Skmacy} 1005181641Skmacy 1006181641Skmacystatic __inline void 1007181641Skmacyinvlcaddr(void *caddr) 1008181641Skmacy{ 1009181641Skmacy 1010181641Skmacy invlpg((u_int)caddr); 1011181641Skmacy PT_UPDATES_FLUSH(); 1012181641Skmacy} 1013181641Skmacy 1014181641Skmacy/* 1015181641Skmacy * Super fast pmap_pte routine best used when scanning 1016181641Skmacy * the pv lists. This eliminates many coarse-grained 1017181641Skmacy * invltlb calls. Note that many of the pv list 1018181641Skmacy * scans are across different pmaps. It is very wasteful 1019181641Skmacy * to do an entire invltlb for checking a single mapping. 1020181641Skmacy * 1021241498Salc * If the given pmap is not the current pmap, pvh_global_lock 1022181641Skmacy * must be held and curthread pinned to a CPU. 1023181641Skmacy */ 1024181641Skmacystatic pt_entry_t * 1025181641Skmacypmap_pte_quick(pmap_t pmap, vm_offset_t va) 1026181641Skmacy{ 1027181641Skmacy pd_entry_t newpf; 1028181641Skmacy pd_entry_t *pde; 1029181641Skmacy 1030181641Skmacy pde = pmap_pde(pmap, va); 1031181641Skmacy if (*pde & PG_PS) 1032181641Skmacy return (pde); 1033181641Skmacy if (*pde != 0) { 1034181641Skmacy /* are we current address space or kernel? */ 1035181641Skmacy if (pmap_is_current(pmap)) 1036181641Skmacy return (vtopte(va)); 1037241498Salc rw_assert(&pvh_global_lock, RA_WLOCKED); 1038181641Skmacy KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 1039181641Skmacy newpf = *pde & PG_FRAME; 1040181641Skmacy if ((*PMAP1 & PG_FRAME) != newpf) { 1041181641Skmacy PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M); 1042181641Skmacy CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x", 1043181641Skmacy pmap, va, (u_long)*PMAP1); 1044181641Skmacy 1045181641Skmacy#ifdef SMP 1046181641Skmacy PMAP1cpu = PCPU_GET(cpuid); 1047181641Skmacy#endif 1048181641Skmacy PMAP1changed++; 1049181641Skmacy } else 1050181641Skmacy#ifdef SMP 1051181641Skmacy if (PMAP1cpu != PCPU_GET(cpuid)) { 1052181641Skmacy PMAP1cpu = PCPU_GET(cpuid); 1053181641Skmacy invlcaddr(PADDR1); 1054181641Skmacy PMAP1changedcpu++; 1055181641Skmacy } else 1056181641Skmacy#endif 1057181641Skmacy PMAP1unchanged++; 1058181641Skmacy return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); 1059181641Skmacy } 1060181641Skmacy return (0); 1061181641Skmacy} 1062181641Skmacy 1063181641Skmacy/* 1064181641Skmacy * Routine: pmap_extract 1065181641Skmacy * Function: 1066181641Skmacy * Extract the physical page address associated 1067181641Skmacy * with the given map/virtual_address pair. 1068181641Skmacy */ 1069181641Skmacyvm_paddr_t 1070181641Skmacypmap_extract(pmap_t pmap, vm_offset_t va) 1071181641Skmacy{ 1072181641Skmacy vm_paddr_t rtval; 1073181641Skmacy pt_entry_t *pte; 1074181641Skmacy pd_entry_t pde; 1075181641Skmacy pt_entry_t pteval; 1076228923Salc 1077181641Skmacy rtval = 0; 1078181641Skmacy PMAP_LOCK(pmap); 1079181641Skmacy pde = pmap->pm_pdir[va >> PDRSHIFT]; 1080181641Skmacy if (pde != 0) { 1081181641Skmacy if ((pde & PG_PS) != 0) { 1082181641Skmacy rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK); 1083181641Skmacy PMAP_UNLOCK(pmap); 1084181641Skmacy return rtval; 1085181641Skmacy } 1086181641Skmacy pte = pmap_pte(pmap, va); 1087181641Skmacy pteval = *pte ? xpmap_mtop(*pte) : 0; 1088181641Skmacy rtval = (pteval & PG_FRAME) | (va & PAGE_MASK); 1089181641Skmacy pmap_pte_release(pte); 1090181641Skmacy } 1091181641Skmacy PMAP_UNLOCK(pmap); 1092181641Skmacy return (rtval); 1093181641Skmacy} 1094181641Skmacy 1095181641Skmacy/* 1096181641Skmacy * Routine: pmap_extract_ma 1097181641Skmacy * Function: 1098181641Skmacy * Like pmap_extract, but returns machine address 1099181641Skmacy */ 1100181641Skmacyvm_paddr_t 1101181641Skmacypmap_extract_ma(pmap_t pmap, vm_offset_t va) 1102181641Skmacy{ 1103181641Skmacy vm_paddr_t rtval; 1104181641Skmacy pt_entry_t *pte; 1105181641Skmacy pd_entry_t pde; 1106181641Skmacy 1107181641Skmacy rtval = 0; 1108181641Skmacy PMAP_LOCK(pmap); 1109181641Skmacy pde = pmap->pm_pdir[va >> PDRSHIFT]; 1110181641Skmacy if (pde != 0) { 1111181641Skmacy if ((pde & PG_PS) != 0) { 1112181641Skmacy rtval = (pde & ~PDRMASK) | (va & PDRMASK); 1113181641Skmacy PMAP_UNLOCK(pmap); 1114181641Skmacy return rtval; 1115181641Skmacy } 1116181641Skmacy pte = pmap_pte(pmap, va); 1117181641Skmacy rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); 1118181641Skmacy pmap_pte_release(pte); 1119181641Skmacy } 1120181641Skmacy PMAP_UNLOCK(pmap); 1121181641Skmacy return (rtval); 1122181641Skmacy} 1123181641Skmacy 1124181641Skmacy/* 1125181641Skmacy * Routine: pmap_extract_and_hold 1126181641Skmacy * Function: 1127181641Skmacy * Atomically extract and hold the physical page 1128181641Skmacy * with the given pmap and virtual address pair 1129181641Skmacy * if that mapping permits the given protection. 1130181641Skmacy */ 1131181641Skmacyvm_page_t 1132181641Skmacypmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1133181641Skmacy{ 1134181641Skmacy pd_entry_t pde; 1135229007Salc pt_entry_t pte, *ptep; 1136181641Skmacy vm_page_t m; 1137207410Skmacy vm_paddr_t pa; 1138181641Skmacy 1139207410Skmacy pa = 0; 1140181641Skmacy m = NULL; 1141181641Skmacy PMAP_LOCK(pmap); 1142207410Skmacyretry: 1143181641Skmacy pde = PT_GET(pmap_pde(pmap, va)); 1144181641Skmacy if (pde != 0) { 1145181641Skmacy if (pde & PG_PS) { 1146181641Skmacy if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { 1147228935Salc if (vm_page_pa_tryrelock(pmap, (pde & 1148228935Salc PG_PS_FRAME) | (va & PDRMASK), &pa)) 1149207410Skmacy goto retry; 1150181641Skmacy m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | 1151181641Skmacy (va & PDRMASK)); 1152181641Skmacy vm_page_hold(m); 1153181641Skmacy } 1154181641Skmacy } else { 1155229007Salc ptep = pmap_pte(pmap, va); 1156229007Salc pte = PT_GET(ptep); 1157229007Salc pmap_pte_release(ptep); 1158229007Salc if (pte != 0 && 1159181641Skmacy ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { 1160228935Salc if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, 1161229007Salc &pa)) 1162207410Skmacy goto retry; 1163181641Skmacy m = PHYS_TO_VM_PAGE(pte & PG_FRAME); 1164181641Skmacy vm_page_hold(m); 1165181641Skmacy } 1166181641Skmacy } 1167181641Skmacy } 1168207410Skmacy PA_UNLOCK_COND(pa); 1169181641Skmacy PMAP_UNLOCK(pmap); 1170181641Skmacy return (m); 1171181641Skmacy} 1172181641Skmacy 1173181641Skmacy/*************************************************** 1174181641Skmacy * Low level mapping routines..... 1175181641Skmacy ***************************************************/ 1176181641Skmacy 1177181641Skmacy/* 1178181641Skmacy * Add a wired page to the kva. 1179181641Skmacy * Note: not SMP coherent. 1180228923Salc * 1181228923Salc * This function may be used before pmap_bootstrap() is called. 1182181641Skmacy */ 1183181747Skmacyvoid 1184181641Skmacypmap_kenter(vm_offset_t va, vm_paddr_t pa) 1185181641Skmacy{ 1186228923Salc 1187181641Skmacy PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag); 1188181641Skmacy} 1189181641Skmacy 1190181747Skmacyvoid 1191181641Skmacypmap_kenter_ma(vm_offset_t va, vm_paddr_t ma) 1192181641Skmacy{ 1193181641Skmacy pt_entry_t *pte; 1194181641Skmacy 1195181641Skmacy pte = vtopte(va); 1196181641Skmacy pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag); 1197181641Skmacy} 1198181641Skmacy 1199228923Salcstatic __inline void 1200181641Skmacypmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) 1201181641Skmacy{ 1202228923Salc 1203181641Skmacy PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); 1204181641Skmacy} 1205181641Skmacy 1206181641Skmacy/* 1207181641Skmacy * Remove a page from the kernel pagetables. 1208181641Skmacy * Note: not SMP coherent. 1209228923Salc * 1210228923Salc * This function may be used before pmap_bootstrap() is called. 1211181641Skmacy */ 1212181641SkmacyPMAP_INLINE void 1213181641Skmacypmap_kremove(vm_offset_t va) 1214181641Skmacy{ 1215181641Skmacy pt_entry_t *pte; 1216181641Skmacy 1217181641Skmacy pte = vtopte(va); 1218181641Skmacy PT_CLEAR_VA(pte, FALSE); 1219181641Skmacy} 1220181641Skmacy 1221181641Skmacy/* 1222181641Skmacy * Used to map a range of physical addresses into kernel 1223181641Skmacy * virtual address space. 1224181641Skmacy * 1225181641Skmacy * The value passed in '*virt' is a suggested virtual address for 1226181641Skmacy * the mapping. Architectures which can support a direct-mapped 1227181641Skmacy * physical to virtual region can return the appropriate address 1228181641Skmacy * within that region, leaving '*virt' unchanged. Other 1229181641Skmacy * architectures should map the pages starting at '*virt' and 1230181641Skmacy * update '*virt' with the first usable address after the mapped 1231181641Skmacy * region. 1232181641Skmacy */ 1233181641Skmacyvm_offset_t 1234181641Skmacypmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1235181641Skmacy{ 1236181641Skmacy vm_offset_t va, sva; 1237181641Skmacy 1238181641Skmacy va = sva = *virt; 1239181641Skmacy CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x", 1240181641Skmacy va, start, end, prot); 1241181641Skmacy while (start < end) { 1242181641Skmacy pmap_kenter(va, start); 1243181641Skmacy va += PAGE_SIZE; 1244181641Skmacy start += PAGE_SIZE; 1245181641Skmacy } 1246181641Skmacy pmap_invalidate_range(kernel_pmap, sva, va); 1247181641Skmacy *virt = va; 1248181641Skmacy return (sva); 1249181641Skmacy} 1250181641Skmacy 1251181641Skmacy 1252181641Skmacy/* 1253181641Skmacy * Add a list of wired pages to the kva 1254181641Skmacy * this routine is only used for temporary 1255181641Skmacy * kernel mappings that do not need to have 1256181641Skmacy * page modification or references recorded. 1257181641Skmacy * Note that old mappings are simply written 1258181641Skmacy * over. The page *must* be wired. 1259181641Skmacy * Note: SMP coherent. Uses a ranged shootdown IPI. 1260181641Skmacy */ 1261181641Skmacyvoid 1262181641Skmacypmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1263181641Skmacy{ 1264181641Skmacy pt_entry_t *endpte, *pte; 1265181641Skmacy vm_paddr_t pa; 1266181641Skmacy vm_offset_t va = sva; 1267181641Skmacy int mclcount = 0; 1268181641Skmacy multicall_entry_t mcl[16]; 1269181641Skmacy multicall_entry_t *mclp = mcl; 1270181641Skmacy int error; 1271181641Skmacy 1272181641Skmacy CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count); 1273181641Skmacy pte = vtopte(sva); 1274181641Skmacy endpte = pte + count; 1275181641Skmacy while (pte < endpte) { 1276215587Scperciva pa = VM_PAGE_TO_MACH(*ma) | pgeflag | PG_RW | PG_V | PG_M | PG_A; 1277181641Skmacy 1278181641Skmacy mclp->op = __HYPERVISOR_update_va_mapping; 1279181641Skmacy mclp->args[0] = va; 1280181641Skmacy mclp->args[1] = (uint32_t)(pa & 0xffffffff); 1281181641Skmacy mclp->args[2] = (uint32_t)(pa >> 32); 1282181641Skmacy mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0; 1283181641Skmacy 1284181641Skmacy va += PAGE_SIZE; 1285181641Skmacy pte++; 1286181641Skmacy ma++; 1287181641Skmacy mclp++; 1288181641Skmacy mclcount++; 1289181641Skmacy if (mclcount == 16) { 1290181641Skmacy error = HYPERVISOR_multicall(mcl, mclcount); 1291181641Skmacy mclp = mcl; 1292181641Skmacy mclcount = 0; 1293181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 1294181641Skmacy } 1295181641Skmacy } 1296181641Skmacy if (mclcount) { 1297181641Skmacy error = HYPERVISOR_multicall(mcl, mclcount); 1298181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 1299181641Skmacy } 1300181641Skmacy 1301181641Skmacy#ifdef INVARIANTS 1302181641Skmacy for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++) 1303181641Skmacy KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE)); 1304181641Skmacy#endif 1305181641Skmacy} 1306181641Skmacy 1307181641Skmacy/* 1308181641Skmacy * This routine tears out page mappings from the 1309181641Skmacy * kernel -- it is meant only for temporary mappings. 1310181641Skmacy * Note: SMP coherent. Uses a ranged shootdown IPI. 1311181641Skmacy */ 1312181641Skmacyvoid 1313181641Skmacypmap_qremove(vm_offset_t sva, int count) 1314181641Skmacy{ 1315181641Skmacy vm_offset_t va; 1316181641Skmacy 1317181641Skmacy CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count); 1318181641Skmacy va = sva; 1319241498Salc rw_wlock(&pvh_global_lock); 1320181641Skmacy critical_enter(); 1321181641Skmacy while (count-- > 0) { 1322181641Skmacy pmap_kremove(va); 1323181641Skmacy va += PAGE_SIZE; 1324181641Skmacy } 1325215844Scperciva PT_UPDATES_FLUSH(); 1326181641Skmacy pmap_invalidate_range(kernel_pmap, sva, va); 1327181641Skmacy critical_exit(); 1328241498Salc rw_wunlock(&pvh_global_lock); 1329181641Skmacy} 1330181641Skmacy 1331181641Skmacy/*************************************************** 1332181641Skmacy * Page table page management routines..... 1333181641Skmacy ***************************************************/ 1334181641Skmacystatic __inline void 1335181641Skmacypmap_free_zero_pages(vm_page_t free) 1336181641Skmacy{ 1337181641Skmacy vm_page_t m; 1338181641Skmacy 1339181641Skmacy while (free != NULL) { 1340181641Skmacy m = free; 1341248449Sattilio free = (void *)m->object; 1342248449Sattilio m->object = NULL; 1343181641Skmacy vm_page_free_zero(m); 1344181641Skmacy } 1345181641Skmacy} 1346181641Skmacy 1347181641Skmacy/* 1348240126Salc * Decrements a page table page's wire count, which is used to record the 1349240126Salc * number of valid page table entries within the page. If the wire count 1350240126Salc * drops to zero, then the page table page is unmapped. Returns TRUE if the 1351240126Salc * page table page was unmapped and FALSE otherwise. 1352181641Skmacy */ 1353240126Salcstatic inline boolean_t 1354240126Salcpmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free) 1355181641Skmacy{ 1356181641Skmacy 1357181641Skmacy --m->wire_count; 1358240126Salc if (m->wire_count == 0) { 1359240126Salc _pmap_unwire_ptp(pmap, m, free); 1360240126Salc return (TRUE); 1361240126Salc } else 1362240126Salc return (FALSE); 1363181641Skmacy} 1364181641Skmacy 1365240126Salcstatic void 1366240126Salc_pmap_unwire_ptp(pmap_t pmap, vm_page_t m, vm_page_t *free) 1367181641Skmacy{ 1368181641Skmacy vm_offset_t pteva; 1369181641Skmacy 1370181641Skmacy PT_UPDATES_FLUSH(); 1371181641Skmacy /* 1372181641Skmacy * unmap the page table page 1373181641Skmacy */ 1374181641Skmacy xen_pt_unpin(pmap->pm_pdir[m->pindex]); 1375181641Skmacy /* 1376181641Skmacy * page *might* contain residual mapping :-/ 1377181641Skmacy */ 1378181641Skmacy PD_CLEAR_VA(pmap, m->pindex, TRUE); 1379181641Skmacy pmap_zero_page(m); 1380181641Skmacy --pmap->pm_stats.resident_count; 1381181641Skmacy 1382181641Skmacy /* 1383181641Skmacy * This is a release store so that the ordinary store unmapping 1384181641Skmacy * the page table page is globally performed before TLB shoot- 1385181641Skmacy * down is begun. 1386181641Skmacy */ 1387181641Skmacy atomic_subtract_rel_int(&cnt.v_wire_count, 1); 1388181641Skmacy 1389181641Skmacy /* 1390181641Skmacy * Do an invltlb to make the invalidated mapping 1391181641Skmacy * take effect immediately. 1392181641Skmacy */ 1393181641Skmacy pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1394181641Skmacy pmap_invalidate_page(pmap, pteva); 1395181641Skmacy 1396181641Skmacy /* 1397181641Skmacy * Put page on a list so that it is released after 1398181641Skmacy * *ALL* TLB shootdown is done 1399181641Skmacy */ 1400248449Sattilio m->object = (void *)*free; 1401181641Skmacy *free = m; 1402181641Skmacy} 1403181641Skmacy 1404181641Skmacy/* 1405181641Skmacy * After removing a page table entry, this routine is used to 1406181641Skmacy * conditionally free the page, and manage the hold/wire counts. 1407181641Skmacy */ 1408181641Skmacystatic int 1409181641Skmacypmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) 1410181641Skmacy{ 1411181641Skmacy pd_entry_t ptepde; 1412181641Skmacy vm_page_t mpte; 1413181641Skmacy 1414181641Skmacy if (va >= VM_MAXUSER_ADDRESS) 1415228923Salc return (0); 1416181641Skmacy ptepde = PT_GET(pmap_pde(pmap, va)); 1417181641Skmacy mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); 1418240126Salc return (pmap_unwire_ptp(pmap, mpte, free)); 1419181641Skmacy} 1420181641Skmacy 1421228923Salc/* 1422228923Salc * Initialize the pmap for the swapper process. 1423228923Salc */ 1424181641Skmacyvoid 1425181641Skmacypmap_pinit0(pmap_t pmap) 1426181641Skmacy{ 1427181641Skmacy 1428181641Skmacy PMAP_LOCK_INIT(pmap); 1429228923Salc /* 1430228923Salc * Since the page table directory is shared with the kernel pmap, 1431228923Salc * which is already included in the list "allpmaps", this pmap does 1432228923Salc * not need to be inserted into that list. 1433228923Salc */ 1434181641Skmacy pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); 1435181641Skmacy#ifdef PAE 1436181641Skmacy pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); 1437181641Skmacy#endif 1438222813Sattilio CPU_ZERO(&pmap->pm_active); 1439181641Skmacy PCPU_SET(curpmap, pmap); 1440181641Skmacy TAILQ_INIT(&pmap->pm_pvchunk); 1441181641Skmacy bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1442181641Skmacy} 1443181641Skmacy 1444181641Skmacy/* 1445181641Skmacy * Initialize a preallocated and zeroed pmap structure, 1446181641Skmacy * such as one in a vmspace structure. 1447181641Skmacy */ 1448181641Skmacyint 1449181641Skmacypmap_pinit(pmap_t pmap) 1450181641Skmacy{ 1451181641Skmacy vm_page_t m, ptdpg[NPGPTD + 1]; 1452181641Skmacy int npgptd = NPGPTD + 1; 1453181641Skmacy int i; 1454181641Skmacy 1455216960Scperciva#ifdef HAMFISTED_LOCKING 1456216960Scperciva mtx_lock(&createdelete_lock); 1457216960Scperciva#endif 1458216960Scperciva 1459181641Skmacy /* 1460181641Skmacy * No need to allocate page table space yet but we do need a valid 1461181641Skmacy * page directory table. 1462181641Skmacy */ 1463181641Skmacy if (pmap->pm_pdir == NULL) { 1464254025Sjeff pmap->pm_pdir = (pd_entry_t *)kva_alloc(NBPTD); 1465181641Skmacy if (pmap->pm_pdir == NULL) { 1466216960Scperciva#ifdef HAMFISTED_LOCKING 1467216960Scperciva mtx_unlock(&createdelete_lock); 1468216960Scperciva#endif 1469181641Skmacy return (0); 1470181641Skmacy } 1471215593Scperciva#ifdef PAE 1472254025Sjeff pmap->pm_pdpt = (pd_entry_t *)kva_alloc(1); 1473181641Skmacy#endif 1474181641Skmacy } 1475181641Skmacy 1476181641Skmacy /* 1477181641Skmacy * allocate the page directory page(s) 1478181641Skmacy */ 1479181641Skmacy for (i = 0; i < npgptd;) { 1480226843Salc m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1481226843Salc VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1482181641Skmacy if (m == NULL) 1483181641Skmacy VM_WAIT; 1484181641Skmacy else { 1485181641Skmacy ptdpg[i++] = m; 1486181641Skmacy } 1487181641Skmacy } 1488228923Salc 1489181641Skmacy pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); 1490228923Salc 1491228923Salc for (i = 0; i < NPGPTD; i++) 1492181641Skmacy if ((ptdpg[i]->flags & PG_ZERO) == 0) 1493228923Salc pagezero(pmap->pm_pdir + (i * NPDEPG)); 1494181641Skmacy 1495181641Skmacy mtx_lock_spin(&allpmaps_lock); 1496181641Skmacy LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1497228923Salc /* Copy the kernel page table directory entries. */ 1498228923Salc bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); 1499181641Skmacy mtx_unlock_spin(&allpmaps_lock); 1500181641Skmacy 1501181641Skmacy#ifdef PAE 1502181641Skmacy pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1); 1503181641Skmacy if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0) 1504181641Skmacy bzero(pmap->pm_pdpt, PAGE_SIZE); 1505181641Skmacy for (i = 0; i < NPGPTD; i++) { 1506181641Skmacy vm_paddr_t ma; 1507181641Skmacy 1508215587Scperciva ma = VM_PAGE_TO_MACH(ptdpg[i]); 1509181641Skmacy pmap->pm_pdpt[i] = ma | PG_V; 1510181641Skmacy 1511181641Skmacy } 1512181641Skmacy#endif 1513181641Skmacy for (i = 0; i < NPGPTD; i++) { 1514181641Skmacy pt_entry_t *pd; 1515181641Skmacy vm_paddr_t ma; 1516181641Skmacy 1517215587Scperciva ma = VM_PAGE_TO_MACH(ptdpg[i]); 1518181641Skmacy pd = pmap->pm_pdir + (i * NPDEPG); 1519181641Skmacy PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW)); 1520181641Skmacy#if 0 1521181641Skmacy xen_pgd_pin(ma); 1522181641Skmacy#endif 1523181641Skmacy } 1524181641Skmacy 1525181641Skmacy#ifdef PAE 1526181641Skmacy PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW); 1527181641Skmacy#endif 1528241498Salc rw_wlock(&pvh_global_lock); 1529181641Skmacy xen_flush_queue(); 1530215587Scperciva xen_pgdpt_pin(VM_PAGE_TO_MACH(ptdpg[NPGPTD])); 1531181641Skmacy for (i = 0; i < NPGPTD; i++) { 1532215587Scperciva vm_paddr_t ma = VM_PAGE_TO_MACH(ptdpg[i]); 1533181641Skmacy PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE); 1534181641Skmacy } 1535181641Skmacy xen_flush_queue(); 1536241498Salc rw_wunlock(&pvh_global_lock); 1537222813Sattilio CPU_ZERO(&pmap->pm_active); 1538181641Skmacy TAILQ_INIT(&pmap->pm_pvchunk); 1539181641Skmacy bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1540181641Skmacy 1541216960Scperciva#ifdef HAMFISTED_LOCKING 1542216960Scperciva mtx_unlock(&createdelete_lock); 1543216960Scperciva#endif 1544181641Skmacy return (1); 1545181641Skmacy} 1546181641Skmacy 1547181641Skmacy/* 1548181641Skmacy * this routine is called if the page table page is not 1549181641Skmacy * mapped correctly. 1550181641Skmacy */ 1551181641Skmacystatic vm_page_t 1552270439Skib_pmap_allocpte(pmap_t pmap, u_int ptepindex, u_int flags) 1553181641Skmacy{ 1554181641Skmacy vm_paddr_t ptema; 1555181641Skmacy vm_page_t m; 1556181641Skmacy 1557181641Skmacy /* 1558181641Skmacy * Allocate a page table page. 1559181641Skmacy */ 1560181641Skmacy if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1561181641Skmacy VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1562270439Skib if ((flags & PMAP_ENTER_NOSLEEP) == 0) { 1563181641Skmacy PMAP_UNLOCK(pmap); 1564241498Salc rw_wunlock(&pvh_global_lock); 1565181641Skmacy VM_WAIT; 1566241498Salc rw_wlock(&pvh_global_lock); 1567181641Skmacy PMAP_LOCK(pmap); 1568181641Skmacy } 1569181641Skmacy 1570181641Skmacy /* 1571181641Skmacy * Indicate the need to retry. While waiting, the page table 1572181641Skmacy * page may have been allocated. 1573181641Skmacy */ 1574181641Skmacy return (NULL); 1575181641Skmacy } 1576181641Skmacy if ((m->flags & PG_ZERO) == 0) 1577181641Skmacy pmap_zero_page(m); 1578181641Skmacy 1579181641Skmacy /* 1580181641Skmacy * Map the pagetable page into the process address space, if 1581181641Skmacy * it isn't already there. 1582181641Skmacy */ 1583228923Salc 1584181641Skmacy pmap->pm_stats.resident_count++; 1585181641Skmacy 1586215587Scperciva ptema = VM_PAGE_TO_MACH(m); 1587181641Skmacy xen_pt_pin(ptema); 1588181641Skmacy PT_SET_VA_MA(&pmap->pm_pdir[ptepindex], 1589181641Skmacy (ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE); 1590181641Skmacy 1591181641Skmacy KASSERT(pmap->pm_pdir[ptepindex], 1592181641Skmacy ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex)); 1593181641Skmacy return (m); 1594181641Skmacy} 1595181641Skmacy 1596181641Skmacystatic vm_page_t 1597270439Skibpmap_allocpte(pmap_t pmap, vm_offset_t va, u_int flags) 1598181641Skmacy{ 1599228923Salc u_int ptepindex; 1600181641Skmacy pd_entry_t ptema; 1601181641Skmacy vm_page_t m; 1602181641Skmacy 1603181641Skmacy /* 1604181641Skmacy * Calculate pagetable page index 1605181641Skmacy */ 1606181641Skmacy ptepindex = va >> PDRSHIFT; 1607181641Skmacyretry: 1608181641Skmacy /* 1609181641Skmacy * Get the page directory entry 1610181641Skmacy */ 1611181641Skmacy ptema = pmap->pm_pdir[ptepindex]; 1612181641Skmacy 1613181641Skmacy /* 1614181641Skmacy * This supports switching from a 4MB page to a 1615181641Skmacy * normal 4K page. 1616181641Skmacy */ 1617181641Skmacy if (ptema & PG_PS) { 1618181641Skmacy /* 1619181641Skmacy * XXX 1620181641Skmacy */ 1621181641Skmacy pmap->pm_pdir[ptepindex] = 0; 1622181641Skmacy ptema = 0; 1623181641Skmacy pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1624181641Skmacy pmap_invalidate_all(kernel_pmap); 1625181641Skmacy } 1626181641Skmacy 1627181641Skmacy /* 1628181641Skmacy * If the page table page is mapped, we just increment the 1629181641Skmacy * hold count, and activate it. 1630181641Skmacy */ 1631181641Skmacy if (ptema & PG_V) { 1632181641Skmacy m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME); 1633181641Skmacy m->wire_count++; 1634181641Skmacy } else { 1635181641Skmacy /* 1636181641Skmacy * Here if the pte page isn't mapped, or if it has 1637181641Skmacy * been deallocated. 1638181641Skmacy */ 1639181641Skmacy CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x", 1640181641Skmacy pmap, va, flags); 1641181641Skmacy m = _pmap_allocpte(pmap, ptepindex, flags); 1642270439Skib if (m == NULL && (flags & PMAP_ENTER_NOSLEEP) == 0) 1643181641Skmacy goto retry; 1644181641Skmacy 1645181641Skmacy KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex)); 1646181641Skmacy } 1647181641Skmacy return (m); 1648181641Skmacy} 1649181641Skmacy 1650181641Skmacy 1651181641Skmacy/*************************************************** 1652181641Skmacy* Pmap allocation/deallocation routines. 1653181641Skmacy ***************************************************/ 1654181641Skmacy 1655181641Skmacy#ifdef SMP 1656181641Skmacy/* 1657181641Skmacy * Deal with a SMP shootdown of other users of the pmap that we are 1658181641Skmacy * trying to dispose of. This can be a bit hairy. 1659181641Skmacy */ 1660222813Sattiliostatic cpuset_t *lazymask; 1661181641Skmacystatic u_int lazyptd; 1662181641Skmacystatic volatile u_int lazywait; 1663181641Skmacy 1664181641Skmacyvoid pmap_lazyfix_action(void); 1665181641Skmacy 1666181641Skmacyvoid 1667181641Skmacypmap_lazyfix_action(void) 1668181641Skmacy{ 1669181641Skmacy 1670181641Skmacy#ifdef COUNT_IPIS 1671181641Skmacy (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; 1672181641Skmacy#endif 1673181641Skmacy if (rcr3() == lazyptd) 1674181641Skmacy load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1675222813Sattilio CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); 1676181641Skmacy atomic_store_rel_int(&lazywait, 1); 1677181641Skmacy} 1678181641Skmacy 1679181641Skmacystatic void 1680223758Sattiliopmap_lazyfix_self(u_int cpuid) 1681181641Skmacy{ 1682181641Skmacy 1683181641Skmacy if (rcr3() == lazyptd) 1684181641Skmacy load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1685223758Sattilio CPU_CLR_ATOMIC(cpuid, lazymask); 1686181641Skmacy} 1687181641Skmacy 1688181641Skmacy 1689181641Skmacystatic void 1690181641Skmacypmap_lazyfix(pmap_t pmap) 1691181641Skmacy{ 1692222813Sattilio cpuset_t mymask, mask; 1693223758Sattilio u_int cpuid, spins; 1694222813Sattilio int lsb; 1695181641Skmacy 1696222813Sattilio mask = pmap->pm_active; 1697222813Sattilio while (!CPU_EMPTY(&mask)) { 1698181641Skmacy spins = 50000000; 1699222813Sattilio 1700222813Sattilio /* Find least significant set bit. */ 1701251703Sjeff lsb = CPU_FFS(&mask); 1702222813Sattilio MPASS(lsb != 0); 1703222813Sattilio lsb--; 1704222813Sattilio CPU_SETOF(lsb, &mask); 1705181641Skmacy mtx_lock_spin(&smp_ipi_mtx); 1706181641Skmacy#ifdef PAE 1707181641Skmacy lazyptd = vtophys(pmap->pm_pdpt); 1708181641Skmacy#else 1709181641Skmacy lazyptd = vtophys(pmap->pm_pdir); 1710181641Skmacy#endif 1711223758Sattilio cpuid = PCPU_GET(cpuid); 1712223758Sattilio 1713223758Sattilio /* Use a cpuset just for having an easy check. */ 1714223758Sattilio CPU_SETOF(cpuid, &mymask); 1715222813Sattilio if (!CPU_CMP(&mask, &mymask)) { 1716181641Skmacy lazymask = &pmap->pm_active; 1717223758Sattilio pmap_lazyfix_self(cpuid); 1718181641Skmacy } else { 1719181641Skmacy atomic_store_rel_int((u_int *)&lazymask, 1720181641Skmacy (u_int)&pmap->pm_active); 1721181641Skmacy atomic_store_rel_int(&lazywait, 0); 1722181641Skmacy ipi_selected(mask, IPI_LAZYPMAP); 1723181641Skmacy while (lazywait == 0) { 1724181641Skmacy ia32_pause(); 1725181641Skmacy if (--spins == 0) 1726181641Skmacy break; 1727181641Skmacy } 1728181641Skmacy } 1729181641Skmacy mtx_unlock_spin(&smp_ipi_mtx); 1730181641Skmacy if (spins == 0) 1731181641Skmacy printf("pmap_lazyfix: spun for 50000000\n"); 1732222813Sattilio mask = pmap->pm_active; 1733181641Skmacy } 1734181641Skmacy} 1735181641Skmacy 1736181641Skmacy#else /* SMP */ 1737181641Skmacy 1738181641Skmacy/* 1739181641Skmacy * Cleaning up on uniprocessor is easy. For various reasons, we're 1740181641Skmacy * unlikely to have to even execute this code, including the fact 1741181641Skmacy * that the cleanup is deferred until the parent does a wait(2), which 1742181641Skmacy * means that another userland process has run. 1743181641Skmacy */ 1744181641Skmacystatic void 1745181641Skmacypmap_lazyfix(pmap_t pmap) 1746181641Skmacy{ 1747181641Skmacy u_int cr3; 1748181641Skmacy 1749181641Skmacy cr3 = vtophys(pmap->pm_pdir); 1750181641Skmacy if (cr3 == rcr3()) { 1751181641Skmacy load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1752222813Sattilio CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); 1753181641Skmacy } 1754181641Skmacy} 1755181641Skmacy#endif /* SMP */ 1756181641Skmacy 1757181641Skmacy/* 1758181641Skmacy * Release any resources held by the given physical map. 1759181641Skmacy * Called when a pmap initialized by pmap_pinit is being released. 1760181641Skmacy * Should only be called if the map contains no valid mappings. 1761181641Skmacy */ 1762181641Skmacyvoid 1763181641Skmacypmap_release(pmap_t pmap) 1764181641Skmacy{ 1765181641Skmacy vm_page_t m, ptdpg[2*NPGPTD+1]; 1766181641Skmacy vm_paddr_t ma; 1767181641Skmacy int i; 1768181641Skmacy#ifdef PAE 1769181641Skmacy int npgptd = NPGPTD + 1; 1770181641Skmacy#else 1771181641Skmacy int npgptd = NPGPTD; 1772181641Skmacy#endif 1773228923Salc 1774181641Skmacy KASSERT(pmap->pm_stats.resident_count == 0, 1775181641Skmacy ("pmap_release: pmap resident count %ld != 0", 1776181641Skmacy pmap->pm_stats.resident_count)); 1777181641Skmacy PT_UPDATES_FLUSH(); 1778181641Skmacy 1779216960Scperciva#ifdef HAMFISTED_LOCKING 1780216960Scperciva mtx_lock(&createdelete_lock); 1781216960Scperciva#endif 1782216960Scperciva 1783181641Skmacy pmap_lazyfix(pmap); 1784181641Skmacy mtx_lock_spin(&allpmaps_lock); 1785181641Skmacy LIST_REMOVE(pmap, pm_list); 1786181641Skmacy mtx_unlock_spin(&allpmaps_lock); 1787181641Skmacy 1788181641Skmacy for (i = 0; i < NPGPTD; i++) 1789181641Skmacy ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME); 1790181641Skmacy pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); 1791215593Scperciva#ifdef PAE 1792181641Skmacy ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt)); 1793181641Skmacy#endif 1794181641Skmacy 1795181641Skmacy for (i = 0; i < npgptd; i++) { 1796181641Skmacy m = ptdpg[i]; 1797215587Scperciva ma = VM_PAGE_TO_MACH(m); 1798181641Skmacy /* unpinning L1 and L2 treated the same */ 1799215525Scperciva#if 0 1800181641Skmacy xen_pgd_unpin(ma); 1801215525Scperciva#else 1802215525Scperciva if (i == NPGPTD) 1803215525Scperciva xen_pgd_unpin(ma); 1804215525Scperciva#endif 1805181641Skmacy#ifdef PAE 1806215470Scperciva if (i < NPGPTD) 1807215587Scperciva KASSERT(VM_PAGE_TO_MACH(m) == (pmap->pm_pdpt[i] & PG_FRAME), 1808215470Scperciva ("pmap_release: got wrong ptd page")); 1809181641Skmacy#endif 1810181641Skmacy m->wire_count--; 1811181641Skmacy atomic_subtract_int(&cnt.v_wire_count, 1); 1812181641Skmacy vm_page_free(m); 1813181641Skmacy } 1814215472Scperciva#ifdef PAE 1815215472Scperciva pmap_qremove((vm_offset_t)pmap->pm_pdpt, 1); 1816215472Scperciva#endif 1817216960Scperciva 1818216960Scperciva#ifdef HAMFISTED_LOCKING 1819216960Scperciva mtx_unlock(&createdelete_lock); 1820216960Scperciva#endif 1821181641Skmacy} 1822181641Skmacy 1823181641Skmacystatic int 1824181641Skmacykvm_size(SYSCTL_HANDLER_ARGS) 1825181641Skmacy{ 1826181641Skmacy unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1827181641Skmacy 1828228923Salc return (sysctl_handle_long(oidp, &ksize, 0, req)); 1829181641Skmacy} 1830181641SkmacySYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1831181641Skmacy 0, 0, kvm_size, "IU", "Size of KVM"); 1832181641Skmacy 1833181641Skmacystatic int 1834181641Skmacykvm_free(SYSCTL_HANDLER_ARGS) 1835181641Skmacy{ 1836181641Skmacy unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1837181641Skmacy 1838228923Salc return (sysctl_handle_long(oidp, &kfree, 0, req)); 1839181641Skmacy} 1840181641SkmacySYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1841181641Skmacy 0, 0, kvm_free, "IU", "Amount of KVM free"); 1842181641Skmacy 1843181641Skmacy/* 1844181641Skmacy * grow the number of kernel page table entries, if needed 1845181641Skmacy */ 1846181641Skmacyvoid 1847181641Skmacypmap_growkernel(vm_offset_t addr) 1848181641Skmacy{ 1849181641Skmacy struct pmap *pmap; 1850181641Skmacy vm_paddr_t ptppaddr; 1851181641Skmacy vm_page_t nkpg; 1852181641Skmacy pd_entry_t newpdir; 1853181641Skmacy 1854181641Skmacy mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1855181641Skmacy if (kernel_vm_end == 0) { 1856181641Skmacy kernel_vm_end = KERNBASE; 1857181641Skmacy nkpt = 0; 1858181641Skmacy while (pdir_pde(PTD, kernel_vm_end)) { 1859181641Skmacy kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1860181641Skmacy nkpt++; 1861181641Skmacy if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1862181641Skmacy kernel_vm_end = kernel_map->max_offset; 1863181641Skmacy break; 1864181641Skmacy } 1865181641Skmacy } 1866181641Skmacy } 1867228923Salc addr = roundup2(addr, NBPDR); 1868181641Skmacy if (addr - 1 >= kernel_map->max_offset) 1869181641Skmacy addr = kernel_map->max_offset; 1870181641Skmacy while (kernel_vm_end < addr) { 1871181641Skmacy if (pdir_pde(PTD, kernel_vm_end)) { 1872228923Salc kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1873181641Skmacy if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1874181641Skmacy kernel_vm_end = kernel_map->max_offset; 1875181641Skmacy break; 1876181641Skmacy } 1877181641Skmacy continue; 1878181641Skmacy } 1879181641Skmacy 1880228923Salc nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDRSHIFT, 1881228923Salc VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1882228923Salc VM_ALLOC_ZERO); 1883228923Salc if (nkpg == NULL) 1884181641Skmacy panic("pmap_growkernel: no memory to grow kernel"); 1885181641Skmacy 1886181641Skmacy nkpt++; 1887181641Skmacy 1888228923Salc if ((nkpg->flags & PG_ZERO) == 0) 1889228923Salc pmap_zero_page(nkpg); 1890181641Skmacy ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1891181641Skmacy newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1892241498Salc rw_wlock(&pvh_global_lock); 1893181641Skmacy PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); 1894181641Skmacy mtx_lock_spin(&allpmaps_lock); 1895181641Skmacy LIST_FOREACH(pmap, &allpmaps, pm_list) 1896181641Skmacy PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); 1897181641Skmacy 1898181641Skmacy mtx_unlock_spin(&allpmaps_lock); 1899241498Salc rw_wunlock(&pvh_global_lock); 1900181946Skmacy 1901228923Salc kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; 1902181641Skmacy if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1903181641Skmacy kernel_vm_end = kernel_map->max_offset; 1904181641Skmacy break; 1905181641Skmacy } 1906181641Skmacy } 1907181641Skmacy} 1908181641Skmacy 1909181641Skmacy 1910181641Skmacy/*************************************************** 1911181641Skmacy * page management routines. 1912181641Skmacy ***************************************************/ 1913181641Skmacy 1914181641SkmacyCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1915181641SkmacyCTASSERT(_NPCM == 11); 1916236291SalcCTASSERT(_NPCPV == 336); 1917181641Skmacy 1918181641Skmacystatic __inline struct pv_chunk * 1919181641Skmacypv_to_chunk(pv_entry_t pv) 1920181641Skmacy{ 1921181641Skmacy 1922228923Salc return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1923181641Skmacy} 1924181641Skmacy 1925181641Skmacy#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1926181641Skmacy 1927181641Skmacy#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 1928181641Skmacy#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 1929181641Skmacy 1930236534Salcstatic const uint32_t pc_freemask[_NPCM] = { 1931181641Skmacy PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1932181641Skmacy PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1933181641Skmacy PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 1934181641Skmacy PC_FREE0_9, PC_FREE10 1935181641Skmacy}; 1936181641Skmacy 1937181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1938181641Skmacy "Current number of pv entries"); 1939181641Skmacy 1940181641Skmacy#ifdef PV_STATS 1941181641Skmacystatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1942181641Skmacy 1943181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1944181641Skmacy "Current number of pv entry chunks"); 1945181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1946181641Skmacy "Current number of pv entry chunks allocated"); 1947181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1948181641Skmacy "Current number of pv entry chunks frees"); 1949181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1950181641Skmacy "Number of times tried to get a chunk page but failed."); 1951181641Skmacy 1952181641Skmacystatic long pv_entry_frees, pv_entry_allocs; 1953181641Skmacystatic int pv_entry_spare; 1954181641Skmacy 1955181641SkmacySYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1956181641Skmacy "Current number of pv entry frees"); 1957181641SkmacySYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1958181641Skmacy "Current number of pv entry allocs"); 1959181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1960181641Skmacy "Current number of spare pv entries"); 1961181641Skmacy#endif 1962181641Skmacy 1963181641Skmacy/* 1964181641Skmacy * We are in a serious low memory condition. Resort to 1965181641Skmacy * drastic measures to free some pages so we can allocate 1966236240Salc * another pv entry chunk. 1967181641Skmacy */ 1968236240Salcstatic vm_page_t 1969236240Salcpmap_pv_reclaim(pmap_t locked_pmap) 1970181641Skmacy{ 1971236240Salc struct pch newtail; 1972236240Salc struct pv_chunk *pc; 1973181641Skmacy pmap_t pmap; 1974181641Skmacy pt_entry_t *pte, tpte; 1975236240Salc pv_entry_t pv; 1976181641Skmacy vm_offset_t va; 1977236240Salc vm_page_t free, m, m_pc; 1978236534Salc uint32_t inuse; 1979236240Salc int bit, field, freed; 1980181641Skmacy 1981236240Salc PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1982236240Salc pmap = NULL; 1983236240Salc free = m_pc = NULL; 1984236240Salc TAILQ_INIT(&newtail); 1985236240Salc while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && (pv_vafree == 0 || 1986236240Salc free == NULL)) { 1987236240Salc TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1988236240Salc if (pmap != pc->pc_pmap) { 1989236240Salc if (pmap != NULL) { 1990236240Salc pmap_invalidate_all(pmap); 1991236240Salc if (pmap != locked_pmap) 1992236240Salc PMAP_UNLOCK(pmap); 1993236240Salc } 1994236240Salc pmap = pc->pc_pmap; 1995181641Skmacy /* Avoid deadlock and lock recursion. */ 1996181641Skmacy if (pmap > locked_pmap) 1997181641Skmacy PMAP_LOCK(pmap); 1998236240Salc else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) { 1999236240Salc pmap = NULL; 2000236240Salc TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2001181641Skmacy continue; 2002236240Salc } 2003181641Skmacy } 2004236240Salc 2005236240Salc /* 2006236240Salc * Destroy every non-wired, 4 KB page mapping in the chunk. 2007236240Salc */ 2008236240Salc freed = 0; 2009236240Salc for (field = 0; field < _NPCM; field++) { 2010236240Salc for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 2011236240Salc inuse != 0; inuse &= ~(1UL << bit)) { 2012236240Salc bit = bsfl(inuse); 2013236240Salc pv = &pc->pc_pventry[field * 32 + bit]; 2014236240Salc va = pv->pv_va; 2015241353Salc pte = pmap_pte(pmap, va); 2016241353Salc tpte = *pte; 2017241353Salc if ((tpte & PG_W) == 0) 2018241353Salc tpte = pte_load_clear(pte); 2019241353Salc pmap_pte_release(pte); 2020241353Salc if ((tpte & PG_W) != 0) 2021236240Salc continue; 2022241400Salc KASSERT(tpte != 0, 2023241400Salc ("pmap_pv_reclaim: pmap %p va %x zero pte", 2024241400Salc pmap, va)); 2025236240Salc if ((tpte & PG_G) != 0) 2026236240Salc pmap_invalidate_page(pmap, va); 2027236240Salc m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 2028236240Salc if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2029236240Salc vm_page_dirty(m); 2030236240Salc if ((tpte & PG_A) != 0) 2031236240Salc vm_page_aflag_set(m, PGA_REFERENCED); 2032247678Sattilio TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2033236240Salc if (TAILQ_EMPTY(&m->md.pv_list)) 2034236240Salc vm_page_aflag_clear(m, PGA_WRITEABLE); 2035236534Salc pc->pc_map[field] |= 1UL << bit; 2036236240Salc pmap_unuse_pt(pmap, va, &free); 2037236240Salc freed++; 2038236240Salc } 2039236240Salc } 2040236240Salc if (freed == 0) { 2041236240Salc TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2042236240Salc continue; 2043236240Salc } 2044236534Salc /* Every freed mapping is for a 4 KB page. */ 2045236240Salc pmap->pm_stats.resident_count -= freed; 2046236240Salc PV_STAT(pv_entry_frees += freed); 2047236240Salc PV_STAT(pv_entry_spare += freed); 2048236240Salc pv_entry_count -= freed; 2049236240Salc TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2050236240Salc for (field = 0; field < _NPCM; field++) 2051236240Salc if (pc->pc_map[field] != pc_freemask[field]) { 2052236240Salc TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 2053236240Salc pc_list); 2054236240Salc TAILQ_INSERT_TAIL(&newtail, pc, pc_lru); 2055236240Salc 2056236240Salc /* 2057236240Salc * One freed pv entry in locked_pmap is 2058236240Salc * sufficient. 2059236240Salc */ 2060236240Salc if (pmap == locked_pmap) 2061236240Salc goto out; 2062236240Salc break; 2063236240Salc } 2064236240Salc if (field == _NPCM) { 2065236240Salc PV_STAT(pv_entry_spare -= _NPCPV); 2066236240Salc PV_STAT(pc_chunk_count--); 2067236240Salc PV_STAT(pc_chunk_frees++); 2068236240Salc /* Entire chunk is free; return it. */ 2069236240Salc m_pc = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2070236240Salc pmap_qremove((vm_offset_t)pc, 1); 2071236240Salc pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 2072236240Salc break; 2073236240Salc } 2074181641Skmacy } 2075236240Salcout: 2076236240Salc TAILQ_CONCAT(&pv_chunks, &newtail, pc_lru); 2077236240Salc if (pmap != NULL) { 2078236240Salc pmap_invalidate_all(pmap); 2079236240Salc if (pmap != locked_pmap) 2080236240Salc PMAP_UNLOCK(pmap); 2081236240Salc } 2082236240Salc if (m_pc == NULL && pv_vafree != 0 && free != NULL) { 2083236240Salc m_pc = free; 2084248449Sattilio free = (void *)m_pc->object; 2085236240Salc /* Recycle a freed page table page. */ 2086236240Salc m_pc->wire_count = 1; 2087236240Salc atomic_add_int(&cnt.v_wire_count, 1); 2088236240Salc } 2089236240Salc pmap_free_zero_pages(free); 2090236240Salc return (m_pc); 2091181641Skmacy} 2092181641Skmacy 2093181641Skmacy/* 2094181641Skmacy * free the pv_entry back to the free list 2095181641Skmacy */ 2096181641Skmacystatic void 2097181641Skmacyfree_pv_entry(pmap_t pmap, pv_entry_t pv) 2098181641Skmacy{ 2099181641Skmacy struct pv_chunk *pc; 2100181641Skmacy int idx, field, bit; 2101181641Skmacy 2102241498Salc rw_assert(&pvh_global_lock, RA_WLOCKED); 2103181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2104181641Skmacy PV_STAT(pv_entry_frees++); 2105181641Skmacy PV_STAT(pv_entry_spare++); 2106181641Skmacy pv_entry_count--; 2107181641Skmacy pc = pv_to_chunk(pv); 2108181641Skmacy idx = pv - &pc->pc_pventry[0]; 2109181641Skmacy field = idx / 32; 2110181641Skmacy bit = idx % 32; 2111181641Skmacy pc->pc_map[field] |= 1ul << bit; 2112181641Skmacy for (idx = 0; idx < _NPCM; idx++) 2113228923Salc if (pc->pc_map[idx] != pc_freemask[idx]) { 2114236534Salc /* 2115236534Salc * 98% of the time, pc is already at the head of the 2116236534Salc * list. If it isn't already, move it to the head. 2117236534Salc */ 2118236534Salc if (__predict_false(TAILQ_FIRST(&pmap->pm_pvchunk) != 2119236534Salc pc)) { 2120236534Salc TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2121236534Salc TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, 2122236534Salc pc_list); 2123236534Salc } 2124181641Skmacy return; 2125228923Salc } 2126236534Salc TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2127236378Salc free_pv_chunk(pc); 2128236378Salc} 2129236378Salc 2130236378Salcstatic void 2131236378Salcfree_pv_chunk(struct pv_chunk *pc) 2132236378Salc{ 2133236378Salc vm_page_t m; 2134236378Salc 2135236240Salc TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2136181641Skmacy PV_STAT(pv_entry_spare -= _NPCPV); 2137181641Skmacy PV_STAT(pc_chunk_count--); 2138181641Skmacy PV_STAT(pc_chunk_frees++); 2139181641Skmacy /* entire chunk is free, return it */ 2140181641Skmacy m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2141181641Skmacy pmap_qremove((vm_offset_t)pc, 1); 2142181641Skmacy vm_page_unwire(m, 0); 2143181641Skmacy vm_page_free(m); 2144181641Skmacy pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 2145181641Skmacy} 2146181641Skmacy 2147181641Skmacy/* 2148181641Skmacy * get a new pv_entry, allocating a block from the system 2149181641Skmacy * when needed. 2150181641Skmacy */ 2151181641Skmacystatic pv_entry_t 2152236291Salcget_pv_entry(pmap_t pmap, boolean_t try) 2153181641Skmacy{ 2154181641Skmacy static const struct timeval printinterval = { 60, 0 }; 2155181641Skmacy static struct timeval lastprint; 2156181641Skmacy int bit, field; 2157181641Skmacy pv_entry_t pv; 2158181641Skmacy struct pv_chunk *pc; 2159181641Skmacy vm_page_t m; 2160181641Skmacy 2161181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2162241498Salc rw_assert(&pvh_global_lock, RA_WLOCKED); 2163181641Skmacy PV_STAT(pv_entry_allocs++); 2164181641Skmacy pv_entry_count++; 2165181641Skmacy if (pv_entry_count > pv_entry_high_water) 2166181641Skmacy if (ratecheck(&lastprint, &printinterval)) 2167181641Skmacy printf("Approaching the limit on PV entries, consider " 2168181641Skmacy "increasing either the vm.pmap.shpgperproc or the " 2169181641Skmacy "vm.pmap.pv_entry_max tunable.\n"); 2170181641Skmacyretry: 2171181641Skmacy pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2172181641Skmacy if (pc != NULL) { 2173181641Skmacy for (field = 0; field < _NPCM; field++) { 2174181641Skmacy if (pc->pc_map[field]) { 2175181641Skmacy bit = bsfl(pc->pc_map[field]); 2176181641Skmacy break; 2177181641Skmacy } 2178181641Skmacy } 2179181641Skmacy if (field < _NPCM) { 2180181641Skmacy pv = &pc->pc_pventry[field * 32 + bit]; 2181181641Skmacy pc->pc_map[field] &= ~(1ul << bit); 2182181641Skmacy /* If this was the last item, move it to tail */ 2183181641Skmacy for (field = 0; field < _NPCM; field++) 2184181641Skmacy if (pc->pc_map[field] != 0) { 2185181641Skmacy PV_STAT(pv_entry_spare--); 2186181641Skmacy return (pv); /* not full, return */ 2187181641Skmacy } 2188181641Skmacy TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2189181641Skmacy TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2190181641Skmacy PV_STAT(pv_entry_spare--); 2191181641Skmacy return (pv); 2192181641Skmacy } 2193181641Skmacy } 2194181641Skmacy /* 2195181641Skmacy * Access to the ptelist "pv_vafree" is synchronized by the page 2196181641Skmacy * queues lock. If "pv_vafree" is currently non-empty, it will 2197181641Skmacy * remain non-empty until pmap_ptelist_alloc() completes. 2198181641Skmacy */ 2199236240Salc if (pv_vafree == 0 || (m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2200181641Skmacy VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 2201181641Skmacy if (try) { 2202181641Skmacy pv_entry_count--; 2203181641Skmacy PV_STAT(pc_chunk_tryfail++); 2204181641Skmacy return (NULL); 2205181641Skmacy } 2206236240Salc m = pmap_pv_reclaim(pmap); 2207236240Salc if (m == NULL) 2208236240Salc goto retry; 2209181641Skmacy } 2210181641Skmacy PV_STAT(pc_chunk_count++); 2211181641Skmacy PV_STAT(pc_chunk_allocs++); 2212181641Skmacy pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 2213181641Skmacy pmap_qenter((vm_offset_t)pc, &m, 1); 2214181641Skmacy if ((m->flags & PG_ZERO) == 0) 2215181641Skmacy pagezero(pc); 2216181641Skmacy pc->pc_pmap = pmap; 2217181641Skmacy pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 2218181641Skmacy for (field = 1; field < _NPCM; field++) 2219181641Skmacy pc->pc_map[field] = pc_freemask[field]; 2220236240Salc TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2221181641Skmacy pv = &pc->pc_pventry[0]; 2222181641Skmacy TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2223181641Skmacy PV_STAT(pv_entry_spare += _NPCPV - 1); 2224181641Skmacy return (pv); 2225181641Skmacy} 2226181641Skmacy 2227208651Salcstatic __inline pv_entry_t 2228208651Salcpmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2229181641Skmacy{ 2230181641Skmacy pv_entry_t pv; 2231181641Skmacy 2232241498Salc rw_assert(&pvh_global_lock, RA_WLOCKED); 2233247678Sattilio TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 2234208651Salc if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2235247678Sattilio TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 2236181641Skmacy break; 2237208651Salc } 2238181641Skmacy } 2239208651Salc return (pv); 2240181641Skmacy} 2241181641Skmacy 2242181641Skmacystatic void 2243208651Salcpmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2244181641Skmacy{ 2245181641Skmacy pv_entry_t pv; 2246181641Skmacy 2247208651Salc pv = pmap_pvh_remove(pvh, pmap, va); 2248208651Salc KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2249208651Salc free_pv_entry(pmap, pv); 2250208651Salc} 2251208651Salc 2252208651Salcstatic void 2253208651Salcpmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 2254208651Salc{ 2255208651Salc 2256241498Salc rw_assert(&pvh_global_lock, RA_WLOCKED); 2257208651Salc pmap_pvh_free(&m->md, pmap, va); 2258208651Salc if (TAILQ_EMPTY(&m->md.pv_list)) 2259225418Skib vm_page_aflag_clear(m, PGA_WRITEABLE); 2260181641Skmacy} 2261181641Skmacy 2262181641Skmacy/* 2263181641Skmacy * Conditionally create a pv entry. 2264181641Skmacy */ 2265181641Skmacystatic boolean_t 2266181641Skmacypmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2267181641Skmacy{ 2268181641Skmacy pv_entry_t pv; 2269181641Skmacy 2270181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2271241498Salc rw_assert(&pvh_global_lock, RA_WLOCKED); 2272181641Skmacy if (pv_entry_count < pv_entry_high_water && 2273181641Skmacy (pv = get_pv_entry(pmap, TRUE)) != NULL) { 2274181641Skmacy pv->pv_va = va; 2275247678Sattilio TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2276181641Skmacy return (TRUE); 2277181641Skmacy } else 2278181641Skmacy return (FALSE); 2279181641Skmacy} 2280181641Skmacy 2281181641Skmacy/* 2282181641Skmacy * pmap_remove_pte: do the things to unmap a page in a process 2283181641Skmacy */ 2284181641Skmacystatic int 2285181641Skmacypmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free) 2286181641Skmacy{ 2287181641Skmacy pt_entry_t oldpte; 2288181641Skmacy vm_page_t m; 2289181641Skmacy 2290181641Skmacy CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x", 2291181641Skmacy pmap, (u_long)*ptq, va); 2292181641Skmacy 2293241498Salc rw_assert(&pvh_global_lock, RA_WLOCKED); 2294181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2295181641Skmacy oldpte = *ptq; 2296181641Skmacy PT_SET_VA_MA(ptq, 0, TRUE); 2297241400Salc KASSERT(oldpte != 0, 2298241400Salc ("pmap_remove_pte: pmap %p va %x zero pte", pmap, va)); 2299181641Skmacy if (oldpte & PG_W) 2300181641Skmacy pmap->pm_stats.wired_count -= 1; 2301181641Skmacy /* 2302181641Skmacy * Machines that don't support invlpg, also don't support 2303181641Skmacy * PG_G. 2304181641Skmacy */ 2305181641Skmacy if (oldpte & PG_G) 2306181641Skmacy pmap_invalidate_page(kernel_pmap, va); 2307181641Skmacy pmap->pm_stats.resident_count -= 1; 2308216762Scperciva if (oldpte & PG_MANAGED) { 2309181641Skmacy m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME); 2310208651Salc if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2311181641Skmacy vm_page_dirty(m); 2312181641Skmacy if (oldpte & PG_A) 2313225418Skib vm_page_aflag_set(m, PGA_REFERENCED); 2314181641Skmacy pmap_remove_entry(pmap, m, va); 2315216762Scperciva } 2316181641Skmacy return (pmap_unuse_pt(pmap, va, free)); 2317181641Skmacy} 2318181641Skmacy 2319181641Skmacy/* 2320181641Skmacy * Remove a single page from a process address space 2321181641Skmacy */ 2322181641Skmacystatic void 2323181641Skmacypmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free) 2324181641Skmacy{ 2325181641Skmacy pt_entry_t *pte; 2326181641Skmacy 2327181641Skmacy CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x", 2328181641Skmacy pmap, va); 2329181641Skmacy 2330241498Salc rw_assert(&pvh_global_lock, RA_WLOCKED); 2331181641Skmacy KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 2332181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2333181641Skmacy if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0) 2334181641Skmacy return; 2335181641Skmacy pmap_remove_pte(pmap, pte, va, free); 2336181641Skmacy pmap_invalidate_page(pmap, va); 2337181641Skmacy if (*PMAP1) 2338181641Skmacy PT_SET_MA(PADDR1, 0); 2339181641Skmacy 2340181641Skmacy} 2341181641Skmacy 2342181641Skmacy/* 2343181641Skmacy * Remove the given range of addresses from the specified map. 2344181641Skmacy * 2345181641Skmacy * It is assumed that the start and end are properly 2346181641Skmacy * rounded to the page size. 2347181641Skmacy */ 2348181641Skmacyvoid 2349181641Skmacypmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2350181641Skmacy{ 2351181641Skmacy vm_offset_t pdnxt; 2352181641Skmacy pd_entry_t ptpaddr; 2353181641Skmacy pt_entry_t *pte; 2354181641Skmacy vm_page_t free = NULL; 2355181641Skmacy int anyvalid; 2356228923Salc 2357181641Skmacy CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x", 2358181641Skmacy pmap, sva, eva); 2359228923Salc 2360181641Skmacy /* 2361181641Skmacy * Perform an unsynchronized read. This is, however, safe. 2362181641Skmacy */ 2363181641Skmacy if (pmap->pm_stats.resident_count == 0) 2364181641Skmacy return; 2365181641Skmacy 2366181641Skmacy anyvalid = 0; 2367181641Skmacy 2368241498Salc rw_wlock(&pvh_global_lock); 2369181641Skmacy sched_pin(); 2370181641Skmacy PMAP_LOCK(pmap); 2371181641Skmacy 2372181641Skmacy /* 2373181641Skmacy * special handling of removing one page. a very 2374181641Skmacy * common operation and easy to short circuit some 2375181641Skmacy * code. 2376181641Skmacy */ 2377181641Skmacy if ((sva + PAGE_SIZE == eva) && 2378181641Skmacy ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 2379181641Skmacy pmap_remove_page(pmap, sva, &free); 2380181641Skmacy goto out; 2381181641Skmacy } 2382181641Skmacy 2383181641Skmacy for (; sva < eva; sva = pdnxt) { 2384228923Salc u_int pdirindex; 2385181641Skmacy 2386181641Skmacy /* 2387181641Skmacy * Calculate index for next page table. 2388181641Skmacy */ 2389181641Skmacy pdnxt = (sva + NBPDR) & ~PDRMASK; 2390229007Salc if (pdnxt < sva) 2391229007Salc pdnxt = eva; 2392181641Skmacy if (pmap->pm_stats.resident_count == 0) 2393181641Skmacy break; 2394181641Skmacy 2395181641Skmacy pdirindex = sva >> PDRSHIFT; 2396181641Skmacy ptpaddr = pmap->pm_pdir[pdirindex]; 2397181641Skmacy 2398181641Skmacy /* 2399181641Skmacy * Weed out invalid mappings. Note: we assume that the page 2400181641Skmacy * directory table is always allocated, and in kernel virtual. 2401181641Skmacy */ 2402181641Skmacy if (ptpaddr == 0) 2403181641Skmacy continue; 2404181641Skmacy 2405181641Skmacy /* 2406181641Skmacy * Check for large page. 2407181641Skmacy */ 2408181641Skmacy if ((ptpaddr & PG_PS) != 0) { 2409181641Skmacy PD_CLEAR_VA(pmap, pdirindex, TRUE); 2410181641Skmacy pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2411181641Skmacy anyvalid = 1; 2412181641Skmacy continue; 2413181641Skmacy } 2414181641Skmacy 2415181641Skmacy /* 2416181641Skmacy * Limit our scan to either the end of the va represented 2417181641Skmacy * by the current page table page, or to the end of the 2418181641Skmacy * range being removed. 2419181641Skmacy */ 2420181641Skmacy if (pdnxt > eva) 2421181641Skmacy pdnxt = eva; 2422181641Skmacy 2423181641Skmacy for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2424181641Skmacy sva += PAGE_SIZE) { 2425181641Skmacy if ((*pte & PG_V) == 0) 2426181641Skmacy continue; 2427181641Skmacy 2428181641Skmacy /* 2429181641Skmacy * The TLB entry for a PG_G mapping is invalidated 2430181641Skmacy * by pmap_remove_pte(). 2431181641Skmacy */ 2432181641Skmacy if ((*pte & PG_G) == 0) 2433181641Skmacy anyvalid = 1; 2434181641Skmacy if (pmap_remove_pte(pmap, pte, sva, &free)) 2435181641Skmacy break; 2436181641Skmacy } 2437181641Skmacy } 2438181641Skmacy PT_UPDATES_FLUSH(); 2439181641Skmacy if (*PMAP1) 2440181641Skmacy PT_SET_VA_MA(PMAP1, 0, TRUE); 2441181641Skmacyout: 2442181641Skmacy if (anyvalid) 2443181641Skmacy pmap_invalidate_all(pmap); 2444181641Skmacy sched_unpin(); 2445241498Salc rw_wunlock(&pvh_global_lock); 2446181641Skmacy PMAP_UNLOCK(pmap); 2447181641Skmacy pmap_free_zero_pages(free); 2448181641Skmacy} 2449181641Skmacy 2450181641Skmacy/* 2451181641Skmacy * Routine: pmap_remove_all 2452181641Skmacy * Function: 2453181641Skmacy * Removes this physical page from 2454181641Skmacy * all physical maps in which it resides. 2455181641Skmacy * Reflects back modify bits to the pager. 2456181641Skmacy * 2457181641Skmacy * Notes: 2458181641Skmacy * Original versions of this routine were very 2459181641Skmacy * inefficient because they iteratively called 2460181641Skmacy * pmap_remove (slow...) 2461181641Skmacy */ 2462181641Skmacy 2463181641Skmacyvoid 2464181641Skmacypmap_remove_all(vm_page_t m) 2465181641Skmacy{ 2466181641Skmacy pv_entry_t pv; 2467181641Skmacy pmap_t pmap; 2468181641Skmacy pt_entry_t *pte, tpte; 2469181641Skmacy vm_page_t free; 2470181641Skmacy 2471224746Skib KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2472223677Salc ("pmap_remove_all: page %p is not managed", m)); 2473208651Salc free = NULL; 2474241498Salc rw_wlock(&pvh_global_lock); 2475181641Skmacy sched_pin(); 2476181641Skmacy while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2477181641Skmacy pmap = PV_PMAP(pv); 2478181641Skmacy PMAP_LOCK(pmap); 2479181641Skmacy pmap->pm_stats.resident_count--; 2480181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 2481181641Skmacy tpte = *pte; 2482181641Skmacy PT_SET_VA_MA(pte, 0, TRUE); 2483241400Salc KASSERT(tpte != 0, ("pmap_remove_all: pmap %p va %x zero pte", 2484241400Salc pmap, pv->pv_va)); 2485181641Skmacy if (tpte & PG_W) 2486181641Skmacy pmap->pm_stats.wired_count--; 2487181641Skmacy if (tpte & PG_A) 2488225418Skib vm_page_aflag_set(m, PGA_REFERENCED); 2489181641Skmacy 2490181641Skmacy /* 2491181641Skmacy * Update the vm_page_t clean and reference bits. 2492181641Skmacy */ 2493208651Salc if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 2494181641Skmacy vm_page_dirty(m); 2495181641Skmacy pmap_unuse_pt(pmap, pv->pv_va, &free); 2496181641Skmacy pmap_invalidate_page(pmap, pv->pv_va); 2497247678Sattilio TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2498181641Skmacy free_pv_entry(pmap, pv); 2499181641Skmacy PMAP_UNLOCK(pmap); 2500181641Skmacy } 2501225418Skib vm_page_aflag_clear(m, PGA_WRITEABLE); 2502181641Skmacy PT_UPDATES_FLUSH(); 2503181641Skmacy if (*PMAP1) 2504181641Skmacy PT_SET_MA(PADDR1, 0); 2505181641Skmacy sched_unpin(); 2506241498Salc rw_wunlock(&pvh_global_lock); 2507208651Salc pmap_free_zero_pages(free); 2508181641Skmacy} 2509181641Skmacy 2510181641Skmacy/* 2511181641Skmacy * Set the physical protection on the 2512181641Skmacy * specified range of this map as requested. 2513181641Skmacy */ 2514181641Skmacyvoid 2515181641Skmacypmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2516181641Skmacy{ 2517181641Skmacy vm_offset_t pdnxt; 2518181641Skmacy pd_entry_t ptpaddr; 2519181641Skmacy pt_entry_t *pte; 2520181641Skmacy int anychanged; 2521181641Skmacy 2522181641Skmacy CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x", 2523181641Skmacy pmap, sva, eva, prot); 2524181641Skmacy 2525181641Skmacy if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2526181641Skmacy pmap_remove(pmap, sva, eva); 2527181641Skmacy return; 2528181641Skmacy } 2529181641Skmacy 2530181641Skmacy#ifdef PAE 2531181641Skmacy if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == 2532181641Skmacy (VM_PROT_WRITE|VM_PROT_EXECUTE)) 2533181641Skmacy return; 2534181641Skmacy#else 2535181641Skmacy if (prot & VM_PROT_WRITE) 2536181641Skmacy return; 2537181641Skmacy#endif 2538181641Skmacy 2539181641Skmacy anychanged = 0; 2540181641Skmacy 2541241498Salc rw_wlock(&pvh_global_lock); 2542181641Skmacy sched_pin(); 2543181641Skmacy PMAP_LOCK(pmap); 2544181641Skmacy for (; sva < eva; sva = pdnxt) { 2545181641Skmacy pt_entry_t obits, pbits; 2546228923Salc u_int pdirindex; 2547181641Skmacy 2548181641Skmacy pdnxt = (sva + NBPDR) & ~PDRMASK; 2549229007Salc if (pdnxt < sva) 2550229007Salc pdnxt = eva; 2551181641Skmacy 2552181641Skmacy pdirindex = sva >> PDRSHIFT; 2553181641Skmacy ptpaddr = pmap->pm_pdir[pdirindex]; 2554181641Skmacy 2555181641Skmacy /* 2556181641Skmacy * Weed out invalid mappings. Note: we assume that the page 2557181641Skmacy * directory table is always allocated, and in kernel virtual. 2558181641Skmacy */ 2559181641Skmacy if (ptpaddr == 0) 2560181641Skmacy continue; 2561181641Skmacy 2562181641Skmacy /* 2563181641Skmacy * Check for large page. 2564181641Skmacy */ 2565181641Skmacy if ((ptpaddr & PG_PS) != 0) { 2566181641Skmacy if ((prot & VM_PROT_WRITE) == 0) 2567181641Skmacy pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 2568181641Skmacy#ifdef PAE 2569181641Skmacy if ((prot & VM_PROT_EXECUTE) == 0) 2570181641Skmacy pmap->pm_pdir[pdirindex] |= pg_nx; 2571181641Skmacy#endif 2572181641Skmacy anychanged = 1; 2573181641Skmacy continue; 2574181641Skmacy } 2575181641Skmacy 2576181641Skmacy if (pdnxt > eva) 2577181641Skmacy pdnxt = eva; 2578181641Skmacy 2579181641Skmacy for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2580181641Skmacy sva += PAGE_SIZE) { 2581181641Skmacy vm_page_t m; 2582181641Skmacy 2583181641Skmacyretry: 2584181641Skmacy /* 2585181641Skmacy * Regardless of whether a pte is 32 or 64 bits in 2586181641Skmacy * size, PG_RW, PG_A, and PG_M are among the least 2587181641Skmacy * significant 32 bits. 2588181641Skmacy */ 2589181641Skmacy obits = pbits = *pte; 2590181641Skmacy if ((pbits & PG_V) == 0) 2591181641Skmacy continue; 2592207262Salc 2593207262Salc if ((prot & VM_PROT_WRITE) == 0) { 2594207262Salc if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == 2595207262Salc (PG_MANAGED | PG_M | PG_RW)) { 2596207262Salc m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & 2597207262Salc PG_FRAME); 2598181641Skmacy vm_page_dirty(m); 2599181641Skmacy } 2600207262Salc pbits &= ~(PG_RW | PG_M); 2601181641Skmacy } 2602181641Skmacy#ifdef PAE 2603181641Skmacy if ((prot & VM_PROT_EXECUTE) == 0) 2604181641Skmacy pbits |= pg_nx; 2605181641Skmacy#endif 2606181641Skmacy 2607181641Skmacy if (pbits != obits) { 2608181641Skmacy obits = *pte; 2609181641Skmacy PT_SET_VA_MA(pte, pbits, TRUE); 2610181641Skmacy if (*pte != pbits) 2611181641Skmacy goto retry; 2612181641Skmacy if (obits & PG_G) 2613181641Skmacy pmap_invalidate_page(pmap, sva); 2614181641Skmacy else 2615181641Skmacy anychanged = 1; 2616181641Skmacy } 2617181641Skmacy } 2618181641Skmacy } 2619181641Skmacy PT_UPDATES_FLUSH(); 2620181641Skmacy if (*PMAP1) 2621181641Skmacy PT_SET_VA_MA(PMAP1, 0, TRUE); 2622181641Skmacy if (anychanged) 2623181641Skmacy pmap_invalidate_all(pmap); 2624181641Skmacy sched_unpin(); 2625241498Salc rw_wunlock(&pvh_global_lock); 2626181641Skmacy PMAP_UNLOCK(pmap); 2627181641Skmacy} 2628181641Skmacy 2629181641Skmacy/* 2630181641Skmacy * Insert the given physical page (p) at 2631181641Skmacy * the specified virtual address (v) in the 2632181641Skmacy * target physical map with the protection requested. 2633181641Skmacy * 2634181641Skmacy * If specified, the page will be wired down, meaning 2635181641Skmacy * that the related pte can not be reclaimed. 2636181641Skmacy * 2637181641Skmacy * NB: This is the only routine which MAY NOT lazy-evaluate 2638181641Skmacy * or lose information. That is, this routine must actually 2639181641Skmacy * insert this page into the given map NOW. 2640181641Skmacy */ 2641270439Skibint 2642270439Skibpmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2643270439Skib u_int flags, int8_t psind __unused) 2644181641Skmacy{ 2645181641Skmacy pd_entry_t *pde; 2646181641Skmacy pt_entry_t *pte; 2647208651Salc pt_entry_t newpte, origpte; 2648208651Salc pv_entry_t pv; 2649208651Salc vm_paddr_t opa, pa; 2650181641Skmacy vm_page_t mpte, om; 2651270439Skib boolean_t invlva, wired; 2652181641Skmacy 2653270439Skib CTR5(KTR_PMAP, 2654270439Skib "pmap_enter: pmap=%08p va=0x%08x ma=0x%08x prot=0x%x flags=0x%x", 2655270439Skib pmap, va, VM_PAGE_TO_MACH(m), prot, flags); 2656181641Skmacy va = trunc_page(va); 2657208651Salc KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 2658208651Salc KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, 2659208175Salc ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", 2660208175Salc va)); 2661254138Sattilio if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2662270439Skib VM_OBJECT_ASSERT_LOCKED(m->object); 2663181641Skmacy 2664181641Skmacy mpte = NULL; 2665270439Skib wired = (flags & PMAP_ENTER_WIRED) != 0; 2666181641Skmacy 2667241498Salc rw_wlock(&pvh_global_lock); 2668181641Skmacy PMAP_LOCK(pmap); 2669181641Skmacy sched_pin(); 2670181641Skmacy 2671181641Skmacy /* 2672181641Skmacy * In the case that a page table page is not 2673181641Skmacy * resident, we are creating it here. 2674181641Skmacy */ 2675181641Skmacy if (va < VM_MAXUSER_ADDRESS) { 2676270439Skib mpte = pmap_allocpte(pmap, va, flags); 2677270439Skib if (mpte == NULL) { 2678270439Skib KASSERT((flags & PMAP_ENTER_NOSLEEP) != 0, 2679270439Skib ("pmap_allocpte failed with sleep allowed")); 2680270439Skib sched_unpin(); 2681270439Skib rw_wunlock(&pvh_global_lock); 2682270439Skib PMAP_UNLOCK(pmap); 2683270439Skib return (KERN_RESOURCE_SHORTAGE); 2684270439Skib } 2685181641Skmacy } 2686181641Skmacy 2687181641Skmacy pde = pmap_pde(pmap, va); 2688181641Skmacy if ((*pde & PG_PS) != 0) 2689181641Skmacy panic("pmap_enter: attempted pmap_enter on 4MB page"); 2690181641Skmacy pte = pmap_pte_quick(pmap, va); 2691181641Skmacy 2692181641Skmacy /* 2693181641Skmacy * Page Directory table entry not valid, we need a new PT page 2694181641Skmacy */ 2695181641Skmacy if (pte == NULL) { 2696208651Salc panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x", 2697181641Skmacy (uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va); 2698181641Skmacy } 2699181641Skmacy 2700181641Skmacy pa = VM_PAGE_TO_PHYS(m); 2701181641Skmacy om = NULL; 2702181641Skmacy opa = origpte = 0; 2703181641Skmacy 2704181641Skmacy#if 0 2705181641Skmacy KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx", 2706181641Skmacy pte, *pte)); 2707181641Skmacy#endif 2708181641Skmacy origpte = *pte; 2709181641Skmacy if (origpte) 2710181641Skmacy origpte = xpmap_mtop(origpte); 2711181641Skmacy opa = origpte & PG_FRAME; 2712181641Skmacy 2713181641Skmacy /* 2714181641Skmacy * Mapping has not changed, must be protection or wiring change. 2715181641Skmacy */ 2716181641Skmacy if (origpte && (opa == pa)) { 2717181641Skmacy /* 2718181641Skmacy * Wiring change, just update stats. We don't worry about 2719181641Skmacy * wiring PT pages as they remain resident as long as there 2720181641Skmacy * are valid mappings in them. Hence, if a user page is wired, 2721181641Skmacy * the PT page will be also. 2722181641Skmacy */ 2723181641Skmacy if (wired && ((origpte & PG_W) == 0)) 2724181641Skmacy pmap->pm_stats.wired_count++; 2725181641Skmacy else if (!wired && (origpte & PG_W)) 2726181641Skmacy pmap->pm_stats.wired_count--; 2727181641Skmacy 2728181641Skmacy /* 2729181641Skmacy * Remove extra pte reference 2730181641Skmacy */ 2731181641Skmacy if (mpte) 2732181641Skmacy mpte->wire_count--; 2733181641Skmacy 2734181641Skmacy if (origpte & PG_MANAGED) { 2735181641Skmacy om = m; 2736181641Skmacy pa |= PG_MANAGED; 2737181641Skmacy } 2738181641Skmacy goto validate; 2739181641Skmacy } 2740208651Salc 2741208651Salc pv = NULL; 2742208651Salc 2743181641Skmacy /* 2744181641Skmacy * Mapping has changed, invalidate old range and fall through to 2745181641Skmacy * handle validating new mapping. 2746181641Skmacy */ 2747181641Skmacy if (opa) { 2748181641Skmacy if (origpte & PG_W) 2749181641Skmacy pmap->pm_stats.wired_count--; 2750181641Skmacy if (origpte & PG_MANAGED) { 2751181641Skmacy om = PHYS_TO_VM_PAGE(opa); 2752208651Salc pv = pmap_pvh_remove(&om->md, pmap, va); 2753181641Skmacy } else if (va < VM_MAXUSER_ADDRESS) 2754181641Skmacy printf("va=0x%x is unmanaged :-( \n", va); 2755181641Skmacy 2756181641Skmacy if (mpte != NULL) { 2757181641Skmacy mpte->wire_count--; 2758181641Skmacy KASSERT(mpte->wire_count > 0, 2759181641Skmacy ("pmap_enter: missing reference to page table page," 2760181641Skmacy " va: 0x%x", va)); 2761181641Skmacy } 2762181641Skmacy } else 2763181641Skmacy pmap->pm_stats.resident_count++; 2764181641Skmacy 2765181641Skmacy /* 2766181641Skmacy * Enter on the PV list if part of our managed memory. 2767181641Skmacy */ 2768224746Skib if ((m->oflags & VPO_UNMANAGED) == 0) { 2769181641Skmacy KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 2770181641Skmacy ("pmap_enter: managed mapping within the clean submap")); 2771208651Salc if (pv == NULL) 2772208651Salc pv = get_pv_entry(pmap, FALSE); 2773208651Salc pv->pv_va = va; 2774247678Sattilio TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2775181641Skmacy pa |= PG_MANAGED; 2776208651Salc } else if (pv != NULL) 2777208651Salc free_pv_entry(pmap, pv); 2778181641Skmacy 2779181641Skmacy /* 2780181641Skmacy * Increment counters 2781181641Skmacy */ 2782181641Skmacy if (wired) 2783181641Skmacy pmap->pm_stats.wired_count++; 2784181641Skmacy 2785181641Skmacyvalidate: 2786181641Skmacy /* 2787181641Skmacy * Now validate mapping with desired protection/wiring. 2788181641Skmacy */ 2789181641Skmacy newpte = (pt_entry_t)(pa | PG_V); 2790181641Skmacy if ((prot & VM_PROT_WRITE) != 0) { 2791181641Skmacy newpte |= PG_RW; 2792208651Salc if ((newpte & PG_MANAGED) != 0) 2793225418Skib vm_page_aflag_set(m, PGA_WRITEABLE); 2794181641Skmacy } 2795181641Skmacy#ifdef PAE 2796181641Skmacy if ((prot & VM_PROT_EXECUTE) == 0) 2797181641Skmacy newpte |= pg_nx; 2798181641Skmacy#endif 2799181641Skmacy if (wired) 2800181641Skmacy newpte |= PG_W; 2801181641Skmacy if (va < VM_MAXUSER_ADDRESS) 2802181641Skmacy newpte |= PG_U; 2803181641Skmacy if (pmap == kernel_pmap) 2804181641Skmacy newpte |= pgeflag; 2805181641Skmacy 2806181641Skmacy critical_enter(); 2807181641Skmacy /* 2808181641Skmacy * if the mapping or permission bits are different, we need 2809181641Skmacy * to update the pte. 2810181641Skmacy */ 2811181641Skmacy if ((origpte & ~(PG_M|PG_A)) != newpte) { 2812181641Skmacy if (origpte) { 2813181641Skmacy invlva = FALSE; 2814181641Skmacy origpte = *pte; 2815181641Skmacy PT_SET_VA(pte, newpte | PG_A, FALSE); 2816181641Skmacy if (origpte & PG_A) { 2817181641Skmacy if (origpte & PG_MANAGED) 2818225418Skib vm_page_aflag_set(om, PGA_REFERENCED); 2819181641Skmacy if (opa != VM_PAGE_TO_PHYS(m)) 2820181641Skmacy invlva = TRUE; 2821181641Skmacy#ifdef PAE 2822181641Skmacy if ((origpte & PG_NX) == 0 && 2823181641Skmacy (newpte & PG_NX) != 0) 2824181641Skmacy invlva = TRUE; 2825181641Skmacy#endif 2826181641Skmacy } 2827208651Salc if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 2828181641Skmacy if ((origpte & PG_MANAGED) != 0) 2829181641Skmacy vm_page_dirty(om); 2830181641Skmacy if ((prot & VM_PROT_WRITE) == 0) 2831181641Skmacy invlva = TRUE; 2832181641Skmacy } 2833208651Salc if ((origpte & PG_MANAGED) != 0 && 2834208651Salc TAILQ_EMPTY(&om->md.pv_list)) 2835225418Skib vm_page_aflag_clear(om, PGA_WRITEABLE); 2836181641Skmacy if (invlva) 2837181641Skmacy pmap_invalidate_page(pmap, va); 2838181641Skmacy } else{ 2839181641Skmacy PT_SET_VA(pte, newpte | PG_A, FALSE); 2840181641Skmacy } 2841181641Skmacy 2842181641Skmacy } 2843181641Skmacy PT_UPDATES_FLUSH(); 2844181641Skmacy critical_exit(); 2845181641Skmacy if (*PMAP1) 2846181641Skmacy PT_SET_VA_MA(PMAP1, 0, TRUE); 2847181641Skmacy sched_unpin(); 2848241498Salc rw_wunlock(&pvh_global_lock); 2849181641Skmacy PMAP_UNLOCK(pmap); 2850270439Skib return (KERN_SUCCESS); 2851181641Skmacy} 2852181641Skmacy 2853181641Skmacy/* 2854181641Skmacy * Maps a sequence of resident pages belonging to the same object. 2855181641Skmacy * The sequence begins with the given page m_start. This page is 2856181641Skmacy * mapped at the given virtual address start. Each subsequent page is 2857181641Skmacy * mapped at a virtual address that is offset from start by the same 2858181641Skmacy * amount as the page is offset from m_start within the object. The 2859181641Skmacy * last page in the sequence is the page with the largest offset from 2860181641Skmacy * m_start that can be mapped at a virtual address less than the given 2861181641Skmacy * virtual address end. Not every virtual page between start and end 2862181641Skmacy * is mapped; only those for which a resident page exists with the 2863181641Skmacy * corresponding offset from m_start are mapped. 2864181641Skmacy */ 2865181641Skmacyvoid 2866181641Skmacypmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2867181641Skmacy vm_page_t m_start, vm_prot_t prot) 2868181641Skmacy{ 2869181641Skmacy vm_page_t m, mpte; 2870181641Skmacy vm_pindex_t diff, psize; 2871181641Skmacy multicall_entry_t mcl[16]; 2872181641Skmacy multicall_entry_t *mclp = mcl; 2873181641Skmacy int error, count = 0; 2874228923Salc 2875250884Sattilio VM_OBJECT_ASSERT_LOCKED(m_start->object); 2876250884Sattilio 2877181641Skmacy psize = atop(end - start); 2878181641Skmacy mpte = NULL; 2879181641Skmacy m = m_start; 2880241498Salc rw_wlock(&pvh_global_lock); 2881181641Skmacy PMAP_LOCK(pmap); 2882181641Skmacy while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2883181641Skmacy mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m, 2884181641Skmacy prot, mpte); 2885181641Skmacy m = TAILQ_NEXT(m, listq); 2886181641Skmacy if (count == 16) { 2887181641Skmacy error = HYPERVISOR_multicall(mcl, count); 2888181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 2889181641Skmacy mclp = mcl; 2890181641Skmacy count = 0; 2891181641Skmacy } 2892181641Skmacy } 2893181641Skmacy if (count) { 2894181641Skmacy error = HYPERVISOR_multicall(mcl, count); 2895181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 2896181641Skmacy } 2897241498Salc rw_wunlock(&pvh_global_lock); 2898181641Skmacy PMAP_UNLOCK(pmap); 2899181641Skmacy} 2900181641Skmacy 2901181641Skmacy/* 2902181641Skmacy * this code makes some *MAJOR* assumptions: 2903181641Skmacy * 1. Current pmap & pmap exists. 2904181641Skmacy * 2. Not wired. 2905181641Skmacy * 3. Read access. 2906181641Skmacy * 4. No page table pages. 2907181641Skmacy * but is *MUCH* faster than pmap_enter... 2908181641Skmacy */ 2909181641Skmacy 2910181641Skmacyvoid 2911181641Skmacypmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2912181641Skmacy{ 2913181641Skmacy multicall_entry_t mcl, *mclp; 2914181641Skmacy int count = 0; 2915181641Skmacy mclp = &mcl; 2916228923Salc 2917181641Skmacy CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x", 2918181641Skmacy pmap, va, m, prot); 2919181641Skmacy 2920241498Salc rw_wlock(&pvh_global_lock); 2921181641Skmacy PMAP_LOCK(pmap); 2922207796Salc (void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL); 2923181641Skmacy if (count) 2924181641Skmacy HYPERVISOR_multicall(&mcl, count); 2925241498Salc rw_wunlock(&pvh_global_lock); 2926181641Skmacy PMAP_UNLOCK(pmap); 2927181641Skmacy} 2928181641Skmacy 2929181747Skmacy#ifdef notyet 2930181641Skmacyvoid 2931181641Skmacypmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count) 2932181641Skmacy{ 2933181641Skmacy int i, error, index = 0; 2934181641Skmacy multicall_entry_t mcl[16]; 2935181641Skmacy multicall_entry_t *mclp = mcl; 2936181641Skmacy 2937181641Skmacy PMAP_LOCK(pmap); 2938181641Skmacy for (i = 0; i < count; i++, addrs++, pages++, prots++) { 2939181641Skmacy if (!pmap_is_prefaultable_locked(pmap, *addrs)) 2940181641Skmacy continue; 2941181641Skmacy 2942181641Skmacy (void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL); 2943181641Skmacy if (index == 16) { 2944181641Skmacy error = HYPERVISOR_multicall(mcl, index); 2945181641Skmacy mclp = mcl; 2946181641Skmacy index = 0; 2947181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 2948181641Skmacy } 2949181641Skmacy } 2950181641Skmacy if (index) { 2951181641Skmacy error = HYPERVISOR_multicall(mcl, index); 2952181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 2953181641Skmacy } 2954181641Skmacy 2955181641Skmacy PMAP_UNLOCK(pmap); 2956181641Skmacy} 2957181747Skmacy#endif 2958181641Skmacy 2959181641Skmacystatic vm_page_t 2960181641Skmacypmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m, 2961181641Skmacy vm_prot_t prot, vm_page_t mpte) 2962181641Skmacy{ 2963181641Skmacy pt_entry_t *pte; 2964181641Skmacy vm_paddr_t pa; 2965181641Skmacy vm_page_t free; 2966181641Skmacy multicall_entry_t *mcl = *mclpp; 2967228923Salc 2968181641Skmacy KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2969224746Skib (m->oflags & VPO_UNMANAGED) != 0, 2970181641Skmacy ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2971241498Salc rw_assert(&pvh_global_lock, RA_WLOCKED); 2972181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2973181641Skmacy 2974181641Skmacy /* 2975181641Skmacy * In the case that a page table page is not 2976181641Skmacy * resident, we are creating it here. 2977181641Skmacy */ 2978181641Skmacy if (va < VM_MAXUSER_ADDRESS) { 2979228923Salc u_int ptepindex; 2980181641Skmacy pd_entry_t ptema; 2981181641Skmacy 2982181641Skmacy /* 2983181641Skmacy * Calculate pagetable page index 2984181641Skmacy */ 2985181641Skmacy ptepindex = va >> PDRSHIFT; 2986181641Skmacy if (mpte && (mpte->pindex == ptepindex)) { 2987181641Skmacy mpte->wire_count++; 2988181641Skmacy } else { 2989181641Skmacy /* 2990181641Skmacy * Get the page directory entry 2991181641Skmacy */ 2992181641Skmacy ptema = pmap->pm_pdir[ptepindex]; 2993181641Skmacy 2994181641Skmacy /* 2995181641Skmacy * If the page table page is mapped, we just increment 2996181641Skmacy * the hold count, and activate it. 2997181641Skmacy */ 2998181641Skmacy if (ptema & PG_V) { 2999181641Skmacy if (ptema & PG_PS) 3000181641Skmacy panic("pmap_enter_quick: unexpected mapping into 4MB page"); 3001181641Skmacy mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME); 3002181641Skmacy mpte->wire_count++; 3003181641Skmacy } else { 3004181641Skmacy mpte = _pmap_allocpte(pmap, ptepindex, 3005270439Skib PMAP_ENTER_NOSLEEP); 3006181641Skmacy if (mpte == NULL) 3007181641Skmacy return (mpte); 3008181641Skmacy } 3009181641Skmacy } 3010181641Skmacy } else { 3011181641Skmacy mpte = NULL; 3012181641Skmacy } 3013181641Skmacy 3014181641Skmacy /* 3015181641Skmacy * This call to vtopte makes the assumption that we are 3016181641Skmacy * entering the page into the current pmap. In order to support 3017181641Skmacy * quick entry into any pmap, one would likely use pmap_pte_quick. 3018181641Skmacy * But that isn't as quick as vtopte. 3019181641Skmacy */ 3020181641Skmacy KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap")); 3021181641Skmacy pte = vtopte(va); 3022181641Skmacy if (*pte & PG_V) { 3023181641Skmacy if (mpte != NULL) { 3024181641Skmacy mpte->wire_count--; 3025181641Skmacy mpte = NULL; 3026181641Skmacy } 3027181641Skmacy return (mpte); 3028181641Skmacy } 3029181641Skmacy 3030181641Skmacy /* 3031181641Skmacy * Enter on the PV list if part of our managed memory. 3032181641Skmacy */ 3033224746Skib if ((m->oflags & VPO_UNMANAGED) == 0 && 3034181641Skmacy !pmap_try_insert_pv_entry(pmap, va, m)) { 3035181641Skmacy if (mpte != NULL) { 3036181641Skmacy free = NULL; 3037240126Salc if (pmap_unwire_ptp(pmap, mpte, &free)) { 3038181641Skmacy pmap_invalidate_page(pmap, va); 3039181641Skmacy pmap_free_zero_pages(free); 3040181641Skmacy } 3041181641Skmacy 3042181641Skmacy mpte = NULL; 3043181641Skmacy } 3044181641Skmacy return (mpte); 3045181641Skmacy } 3046181641Skmacy 3047181641Skmacy /* 3048181641Skmacy * Increment counters 3049181641Skmacy */ 3050181641Skmacy pmap->pm_stats.resident_count++; 3051181641Skmacy 3052181641Skmacy pa = VM_PAGE_TO_PHYS(m); 3053181641Skmacy#ifdef PAE 3054181641Skmacy if ((prot & VM_PROT_EXECUTE) == 0) 3055181641Skmacy pa |= pg_nx; 3056181641Skmacy#endif 3057181641Skmacy 3058181641Skmacy#if 0 3059181641Skmacy /* 3060181641Skmacy * Now validate mapping with RO protection 3061181641Skmacy */ 3062224746Skib if ((m->oflags & VPO_UNMANAGED) != 0) 3063181641Skmacy pte_store(pte, pa | PG_V | PG_U); 3064181641Skmacy else 3065181641Skmacy pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 3066181641Skmacy#else 3067181641Skmacy /* 3068181641Skmacy * Now validate mapping with RO protection 3069181641Skmacy */ 3070224746Skib if ((m->oflags & VPO_UNMANAGED) != 0) 3071181641Skmacy pa = xpmap_ptom(pa | PG_V | PG_U); 3072181641Skmacy else 3073181641Skmacy pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED); 3074181641Skmacy 3075181641Skmacy mcl->op = __HYPERVISOR_update_va_mapping; 3076181641Skmacy mcl->args[0] = va; 3077181641Skmacy mcl->args[1] = (uint32_t)(pa & 0xffffffff); 3078181641Skmacy mcl->args[2] = (uint32_t)(pa >> 32); 3079181641Skmacy mcl->args[3] = 0; 3080181641Skmacy *mclpp = mcl + 1; 3081181641Skmacy *count = *count + 1; 3082181641Skmacy#endif 3083228923Salc return (mpte); 3084181641Skmacy} 3085181641Skmacy 3086181641Skmacy/* 3087181641Skmacy * Make a temporary mapping for a physical address. This is only intended 3088181641Skmacy * to be used for panic dumps. 3089181641Skmacy */ 3090181641Skmacyvoid * 3091181641Skmacypmap_kenter_temporary(vm_paddr_t pa, int i) 3092181641Skmacy{ 3093181641Skmacy vm_offset_t va; 3094200346Skmacy vm_paddr_t ma = xpmap_ptom(pa); 3095181641Skmacy 3096181641Skmacy va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 3097200346Skmacy PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag); 3098181641Skmacy invlpg(va); 3099181641Skmacy return ((void *)crashdumpmap); 3100181641Skmacy} 3101181641Skmacy 3102181641Skmacy/* 3103181641Skmacy * This code maps large physical mmap regions into the 3104181641Skmacy * processor address space. Note that some shortcuts 3105181641Skmacy * are taken, but the code works. 3106181641Skmacy */ 3107181641Skmacyvoid 3108228923Salcpmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 3109228923Salc vm_pindex_t pindex, vm_size_t size) 3110181641Skmacy{ 3111207419Skmacy pd_entry_t *pde; 3112207419Skmacy vm_paddr_t pa, ptepa; 3113181641Skmacy vm_page_t p; 3114207419Skmacy int pat_mode; 3115181641Skmacy 3116248084Sattilio VM_OBJECT_ASSERT_WLOCKED(object); 3117195840Sjhb KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3118181641Skmacy ("pmap_object_init_pt: non-device object")); 3119181641Skmacy if (pseflag && 3120207419Skmacy (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { 3121207419Skmacy if (!vm_object_populate(object, pindex, pindex + atop(size))) 3122207419Skmacy return; 3123181641Skmacy p = vm_page_lookup(object, pindex); 3124207419Skmacy KASSERT(p->valid == VM_PAGE_BITS_ALL, 3125207419Skmacy ("pmap_object_init_pt: invalid page %p", p)); 3126207419Skmacy pat_mode = p->md.pat_mode; 3127228923Salc 3128207419Skmacy /* 3129207419Skmacy * Abort the mapping if the first page is not physically 3130207419Skmacy * aligned to a 2/4MB page boundary. 3131207419Skmacy */ 3132181641Skmacy ptepa = VM_PAGE_TO_PHYS(p); 3133181641Skmacy if (ptepa & (NBPDR - 1)) 3134181641Skmacy return; 3135228923Salc 3136207419Skmacy /* 3137207419Skmacy * Skip the first page. Abort the mapping if the rest of 3138207419Skmacy * the pages are not physically contiguous or have differing 3139207419Skmacy * memory attributes. 3140207419Skmacy */ 3141207419Skmacy p = TAILQ_NEXT(p, listq); 3142207419Skmacy for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; 3143207419Skmacy pa += PAGE_SIZE) { 3144207419Skmacy KASSERT(p->valid == VM_PAGE_BITS_ALL, 3145207419Skmacy ("pmap_object_init_pt: invalid page %p", p)); 3146207419Skmacy if (pa != VM_PAGE_TO_PHYS(p) || 3147207419Skmacy pat_mode != p->md.pat_mode) 3148207419Skmacy return; 3149207419Skmacy p = TAILQ_NEXT(p, listq); 3150207419Skmacy } 3151228923Salc 3152228923Salc /* 3153228923Salc * Map using 2/4MB pages. Since "ptepa" is 2/4M aligned and 3154228923Salc * "size" is a multiple of 2/4M, adding the PAT setting to 3155228923Salc * "pa" will not affect the termination of this loop. 3156228923Salc */ 3157181641Skmacy PMAP_LOCK(pmap); 3158207419Skmacy for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + 3159207419Skmacy size; pa += NBPDR) { 3160207419Skmacy pde = pmap_pde(pmap, addr); 3161207419Skmacy if (*pde == 0) { 3162207419Skmacy pde_store(pde, pa | PG_PS | PG_M | PG_A | 3163207419Skmacy PG_U | PG_RW | PG_V); 3164207419Skmacy pmap->pm_stats.resident_count += NBPDR / 3165207419Skmacy PAGE_SIZE; 3166207419Skmacy pmap_pde_mappings++; 3167207419Skmacy } 3168207419Skmacy /* Else continue on if the PDE is already valid. */ 3169207419Skmacy addr += NBPDR; 3170181641Skmacy } 3171181641Skmacy PMAP_UNLOCK(pmap); 3172181641Skmacy } 3173181641Skmacy} 3174181641Skmacy 3175181641Skmacy/* 3176270920Skib * Clear the wired attribute from the mappings for the specified range of 3177270920Skib * addresses in the given pmap. Every valid mapping within that range 3178270920Skib * must have the wired attribute set. In contrast, invalid mappings 3179270920Skib * cannot have the wired attribute set, so they are ignored. 3180270920Skib * 3181270920Skib * The wired attribute of the page table entry is not a hardware feature, 3182270920Skib * so there is no need to invalidate any TLB entries. 3183181641Skmacy */ 3184181641Skmacyvoid 3185270920Skibpmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3186181641Skmacy{ 3187270920Skib vm_offset_t pdnxt; 3188270920Skib pd_entry_t *pde; 3189181641Skmacy pt_entry_t *pte; 3190181641Skmacy 3191270920Skib CTR3(KTR_PMAP, "pmap_unwire: pmap=%p sva=0x%x eva=0x%x", pmap, sva, 3192270920Skib eva); 3193241498Salc rw_wlock(&pvh_global_lock); 3194270920Skib sched_pin(); 3195181641Skmacy PMAP_LOCK(pmap); 3196270920Skib for (; sva < eva; sva = pdnxt) { 3197270920Skib pdnxt = (sva + NBPDR) & ~PDRMASK; 3198270920Skib if (pdnxt < sva) 3199270920Skib pdnxt = eva; 3200270920Skib pde = pmap_pde(pmap, sva); 3201270920Skib if ((*pde & PG_V) == 0) 3202270920Skib continue; 3203270920Skib if ((*pde & PG_PS) != 0) 3204270920Skib panic("pmap_unwire: unexpected PG_PS in pde %#jx", 3205270920Skib (uintmax_t)*pde); 3206270920Skib if (pdnxt > eva) 3207270920Skib pdnxt = eva; 3208270920Skib for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 3209270920Skib sva += PAGE_SIZE) { 3210270920Skib if ((*pte & PG_V) == 0) 3211270920Skib continue; 3212270920Skib if ((*pte & PG_W) == 0) 3213270920Skib panic("pmap_unwire: pte %#jx is missing PG_W", 3214270920Skib (uintmax_t)*pte); 3215270920Skib PT_SET_VA_MA(pte, *pte & ~PG_W, FALSE); 3216270920Skib pmap->pm_stats.wired_count--; 3217270920Skib } 3218181641Skmacy } 3219270920Skib if (*PMAP1) 3220270920Skib PT_CLEAR_VA(PMAP1, FALSE); 3221270920Skib PT_UPDATES_FLUSH(); 3222270920Skib sched_unpin(); 3223270920Skib rw_wunlock(&pvh_global_lock); 3224181641Skmacy PMAP_UNLOCK(pmap); 3225181641Skmacy} 3226181641Skmacy 3227181641Skmacy 3228181641Skmacy/* 3229181641Skmacy * Copy the range specified by src_addr/len 3230181641Skmacy * from the source map to the range dst_addr/len 3231181641Skmacy * in the destination map. 3232181641Skmacy * 3233181641Skmacy * This routine is only advisory and need not do anything. 3234181641Skmacy */ 3235181641Skmacy 3236181641Skmacyvoid 3237181641Skmacypmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3238228923Salc vm_offset_t src_addr) 3239181641Skmacy{ 3240181641Skmacy vm_page_t free; 3241181641Skmacy vm_offset_t addr; 3242181641Skmacy vm_offset_t end_addr = src_addr + len; 3243181641Skmacy vm_offset_t pdnxt; 3244181641Skmacy 3245181641Skmacy if (dst_addr != src_addr) 3246181641Skmacy return; 3247181641Skmacy 3248181641Skmacy if (!pmap_is_current(src_pmap)) { 3249181641Skmacy CTR2(KTR_PMAP, 3250181641Skmacy "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx", 3251181641Skmacy (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME)); 3252181641Skmacy 3253181641Skmacy return; 3254181641Skmacy } 3255181641Skmacy CTR5(KTR_PMAP, "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x", 3256181641Skmacy dst_pmap, src_pmap, dst_addr, len, src_addr); 3257181641Skmacy 3258216960Scperciva#ifdef HAMFISTED_LOCKING 3259216960Scperciva mtx_lock(&createdelete_lock); 3260216960Scperciva#endif 3261216960Scperciva 3262241498Salc rw_wlock(&pvh_global_lock); 3263181641Skmacy if (dst_pmap < src_pmap) { 3264181641Skmacy PMAP_LOCK(dst_pmap); 3265181641Skmacy PMAP_LOCK(src_pmap); 3266181641Skmacy } else { 3267181641Skmacy PMAP_LOCK(src_pmap); 3268181641Skmacy PMAP_LOCK(dst_pmap); 3269181641Skmacy } 3270181641Skmacy sched_pin(); 3271181641Skmacy for (addr = src_addr; addr < end_addr; addr = pdnxt) { 3272181641Skmacy pt_entry_t *src_pte, *dst_pte; 3273181641Skmacy vm_page_t dstmpte, srcmpte; 3274181641Skmacy pd_entry_t srcptepaddr; 3275228923Salc u_int ptepindex; 3276181641Skmacy 3277208651Salc KASSERT(addr < UPT_MIN_ADDRESS, 3278208651Salc ("pmap_copy: invalid to pmap_copy page tables")); 3279181641Skmacy 3280181641Skmacy pdnxt = (addr + NBPDR) & ~PDRMASK; 3281229007Salc if (pdnxt < addr) 3282229007Salc pdnxt = end_addr; 3283181641Skmacy ptepindex = addr >> PDRSHIFT; 3284181641Skmacy 3285181641Skmacy srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]); 3286181641Skmacy if (srcptepaddr == 0) 3287181641Skmacy continue; 3288181641Skmacy 3289181641Skmacy if (srcptepaddr & PG_PS) { 3290181641Skmacy if (dst_pmap->pm_pdir[ptepindex] == 0) { 3291181641Skmacy PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE); 3292181641Skmacy dst_pmap->pm_stats.resident_count += 3293181641Skmacy NBPDR / PAGE_SIZE; 3294181641Skmacy } 3295181641Skmacy continue; 3296181641Skmacy } 3297181641Skmacy 3298181641Skmacy srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); 3299208651Salc KASSERT(srcmpte->wire_count > 0, 3300208651Salc ("pmap_copy: source page table page is unused")); 3301181641Skmacy 3302181641Skmacy if (pdnxt > end_addr) 3303181641Skmacy pdnxt = end_addr; 3304181641Skmacy 3305181641Skmacy src_pte = vtopte(addr); 3306181641Skmacy while (addr < pdnxt) { 3307181641Skmacy pt_entry_t ptetemp; 3308181641Skmacy ptetemp = *src_pte; 3309181641Skmacy /* 3310181641Skmacy * we only virtual copy managed pages 3311181641Skmacy */ 3312181641Skmacy if ((ptetemp & PG_MANAGED) != 0) { 3313181641Skmacy dstmpte = pmap_allocpte(dst_pmap, addr, 3314270439Skib PMAP_ENTER_NOSLEEP); 3315181641Skmacy if (dstmpte == NULL) 3316228923Salc goto out; 3317181641Skmacy dst_pte = pmap_pte_quick(dst_pmap, addr); 3318181641Skmacy if (*dst_pte == 0 && 3319181641Skmacy pmap_try_insert_pv_entry(dst_pmap, addr, 3320181641Skmacy PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) { 3321181641Skmacy /* 3322181641Skmacy * Clear the wired, modified, and 3323181641Skmacy * accessed (referenced) bits 3324181641Skmacy * during the copy. 3325181641Skmacy */ 3326181641Skmacy KASSERT(ptetemp != 0, ("src_pte not set")); 3327181641Skmacy PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */); 3328181641Skmacy KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)), 3329181641Skmacy ("no pmap copy expected: 0x%jx saw: 0x%jx", 3330181641Skmacy ptetemp & ~(PG_W | PG_M | PG_A), *dst_pte)); 3331181641Skmacy dst_pmap->pm_stats.resident_count++; 3332181641Skmacy } else { 3333181641Skmacy free = NULL; 3334240126Salc if (pmap_unwire_ptp(dst_pmap, dstmpte, 3335240126Salc &free)) { 3336181641Skmacy pmap_invalidate_page(dst_pmap, 3337181641Skmacy addr); 3338181641Skmacy pmap_free_zero_pages(free); 3339181641Skmacy } 3340228923Salc goto out; 3341181641Skmacy } 3342181641Skmacy if (dstmpte->wire_count >= srcmpte->wire_count) 3343181641Skmacy break; 3344181641Skmacy } 3345181641Skmacy addr += PAGE_SIZE; 3346181641Skmacy src_pte++; 3347181641Skmacy } 3348181641Skmacy } 3349228923Salcout: 3350181641Skmacy PT_UPDATES_FLUSH(); 3351181641Skmacy sched_unpin(); 3352241498Salc rw_wunlock(&pvh_global_lock); 3353181641Skmacy PMAP_UNLOCK(src_pmap); 3354181641Skmacy PMAP_UNLOCK(dst_pmap); 3355216960Scperciva 3356216960Scperciva#ifdef HAMFISTED_LOCKING 3357216960Scperciva mtx_unlock(&createdelete_lock); 3358216960Scperciva#endif 3359181641Skmacy} 3360181641Skmacy 3361196723Sadrianstatic __inline void 3362196723Sadrianpagezero(void *page) 3363196723Sadrian{ 3364196723Sadrian#if defined(I686_CPU) 3365196723Sadrian if (cpu_class == CPUCLASS_686) { 3366196723Sadrian#if defined(CPU_ENABLE_SSE) 3367196723Sadrian if (cpu_feature & CPUID_SSE2) 3368196723Sadrian sse2_pagezero(page); 3369196723Sadrian else 3370196723Sadrian#endif 3371196723Sadrian i686_pagezero(page); 3372196723Sadrian } else 3373196723Sadrian#endif 3374196723Sadrian bzero(page, PAGE_SIZE); 3375196723Sadrian} 3376196723Sadrian 3377181641Skmacy/* 3378181641Skmacy * pmap_zero_page zeros the specified hardware page by mapping 3379181641Skmacy * the page into KVM and using bzero to clear its contents. 3380181641Skmacy */ 3381181641Skmacyvoid 3382181641Skmacypmap_zero_page(vm_page_t m) 3383181641Skmacy{ 3384181641Skmacy struct sysmaps *sysmaps; 3385181641Skmacy 3386181641Skmacy sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3387181641Skmacy mtx_lock(&sysmaps->lock); 3388181641Skmacy if (*sysmaps->CMAP2) 3389181641Skmacy panic("pmap_zero_page: CMAP2 busy"); 3390181641Skmacy sched_pin(); 3391215587Scperciva PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); 3392181641Skmacy pagezero(sysmaps->CADDR2); 3393181641Skmacy PT_SET_MA(sysmaps->CADDR2, 0); 3394181641Skmacy sched_unpin(); 3395181641Skmacy mtx_unlock(&sysmaps->lock); 3396181641Skmacy} 3397181641Skmacy 3398181641Skmacy/* 3399181641Skmacy * pmap_zero_page_area zeros the specified hardware page by mapping 3400181641Skmacy * the page into KVM and using bzero to clear its contents. 3401181641Skmacy * 3402181641Skmacy * off and size may not cover an area beyond a single hardware page. 3403181641Skmacy */ 3404181641Skmacyvoid 3405181641Skmacypmap_zero_page_area(vm_page_t m, int off, int size) 3406181641Skmacy{ 3407181641Skmacy struct sysmaps *sysmaps; 3408181641Skmacy 3409181641Skmacy sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3410181641Skmacy mtx_lock(&sysmaps->lock); 3411181641Skmacy if (*sysmaps->CMAP2) 3412228923Salc panic("pmap_zero_page_area: CMAP2 busy"); 3413181641Skmacy sched_pin(); 3414215587Scperciva PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); 3415181641Skmacy 3416181641Skmacy if (off == 0 && size == PAGE_SIZE) 3417181641Skmacy pagezero(sysmaps->CADDR2); 3418181641Skmacy else 3419181641Skmacy bzero((char *)sysmaps->CADDR2 + off, size); 3420181641Skmacy PT_SET_MA(sysmaps->CADDR2, 0); 3421181641Skmacy sched_unpin(); 3422181641Skmacy mtx_unlock(&sysmaps->lock); 3423181641Skmacy} 3424181641Skmacy 3425181641Skmacy/* 3426181641Skmacy * pmap_zero_page_idle zeros the specified hardware page by mapping 3427181641Skmacy * the page into KVM and using bzero to clear its contents. This 3428181641Skmacy * is intended to be called from the vm_pagezero process only and 3429181641Skmacy * outside of Giant. 3430181641Skmacy */ 3431181641Skmacyvoid 3432181641Skmacypmap_zero_page_idle(vm_page_t m) 3433181641Skmacy{ 3434181641Skmacy 3435181641Skmacy if (*CMAP3) 3436228923Salc panic("pmap_zero_page_idle: CMAP3 busy"); 3437181641Skmacy sched_pin(); 3438215587Scperciva PT_SET_MA(CADDR3, PG_V | PG_RW | VM_PAGE_TO_MACH(m) | PG_A | PG_M); 3439181641Skmacy pagezero(CADDR3); 3440181641Skmacy PT_SET_MA(CADDR3, 0); 3441181641Skmacy sched_unpin(); 3442181641Skmacy} 3443181641Skmacy 3444181641Skmacy/* 3445181641Skmacy * pmap_copy_page copies the specified (machine independent) 3446181641Skmacy * page by mapping the page into virtual memory and using 3447181641Skmacy * bcopy to copy the page, one machine dependent page at a 3448181641Skmacy * time. 3449181641Skmacy */ 3450181641Skmacyvoid 3451181641Skmacypmap_copy_page(vm_page_t src, vm_page_t dst) 3452181641Skmacy{ 3453181641Skmacy struct sysmaps *sysmaps; 3454181641Skmacy 3455181641Skmacy sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3456181641Skmacy mtx_lock(&sysmaps->lock); 3457181641Skmacy if (*sysmaps->CMAP1) 3458181641Skmacy panic("pmap_copy_page: CMAP1 busy"); 3459181641Skmacy if (*sysmaps->CMAP2) 3460181641Skmacy panic("pmap_copy_page: CMAP2 busy"); 3461181641Skmacy sched_pin(); 3462215587Scperciva PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(src) | PG_A); 3463215587Scperciva PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | VM_PAGE_TO_MACH(dst) | PG_A | PG_M); 3464181641Skmacy bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); 3465181641Skmacy PT_SET_MA(sysmaps->CADDR1, 0); 3466181641Skmacy PT_SET_MA(sysmaps->CADDR2, 0); 3467181641Skmacy sched_unpin(); 3468181641Skmacy mtx_unlock(&sysmaps->lock); 3469181641Skmacy} 3470181641Skmacy 3471248508Skibint unmapped_buf_allowed = 1; 3472248508Skib 3473248280Skibvoid 3474248280Skibpmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 3475248280Skib vm_offset_t b_offset, int xfersize) 3476248280Skib{ 3477248280Skib struct sysmaps *sysmaps; 3478248280Skib vm_page_t a_pg, b_pg; 3479248280Skib char *a_cp, *b_cp; 3480248280Skib vm_offset_t a_pg_offset, b_pg_offset; 3481248280Skib int cnt; 3482248280Skib 3483248280Skib sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3484248280Skib mtx_lock(&sysmaps->lock); 3485248280Skib if (*sysmaps->CMAP1 != 0) 3486248280Skib panic("pmap_copy_pages: CMAP1 busy"); 3487248280Skib if (*sysmaps->CMAP2 != 0) 3488248280Skib panic("pmap_copy_pages: CMAP2 busy"); 3489248280Skib sched_pin(); 3490248280Skib while (xfersize > 0) { 3491248280Skib a_pg = ma[a_offset >> PAGE_SHIFT]; 3492248280Skib a_pg_offset = a_offset & PAGE_MASK; 3493248280Skib cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 3494248280Skib b_pg = mb[b_offset >> PAGE_SHIFT]; 3495248280Skib b_pg_offset = b_offset & PAGE_MASK; 3496248280Skib cnt = min(cnt, PAGE_SIZE - b_pg_offset); 3497248280Skib PT_SET_MA(sysmaps->CADDR1, PG_V | VM_PAGE_TO_MACH(a_pg) | PG_A); 3498248280Skib PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | 3499248280Skib VM_PAGE_TO_MACH(b_pg) | PG_A | PG_M); 3500248280Skib a_cp = sysmaps->CADDR1 + a_pg_offset; 3501248280Skib b_cp = sysmaps->CADDR2 + b_pg_offset; 3502248280Skib bcopy(a_cp, b_cp, cnt); 3503248280Skib a_offset += cnt; 3504248280Skib b_offset += cnt; 3505248280Skib xfersize -= cnt; 3506248280Skib } 3507248280Skib PT_SET_MA(sysmaps->CADDR1, 0); 3508248280Skib PT_SET_MA(sysmaps->CADDR2, 0); 3509248280Skib sched_unpin(); 3510248280Skib mtx_unlock(&sysmaps->lock); 3511248280Skib} 3512248280Skib 3513181641Skmacy/* 3514181641Skmacy * Returns true if the pmap's pv is one of the first 3515181641Skmacy * 16 pvs linked to from this page. This count may 3516181641Skmacy * be changed upwards or downwards in the future; it 3517181641Skmacy * is only necessary that true be returned for a small 3518181641Skmacy * subset of pmaps for proper page aging. 3519181641Skmacy */ 3520181641Skmacyboolean_t 3521181641Skmacypmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3522181641Skmacy{ 3523181641Skmacy pv_entry_t pv; 3524181641Skmacy int loops = 0; 3525208990Salc boolean_t rv; 3526181641Skmacy 3527224746Skib KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3528208990Salc ("pmap_page_exists_quick: page %p is not managed", m)); 3529208990Salc rv = FALSE; 3530241498Salc rw_wlock(&pvh_global_lock); 3531247678Sattilio TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3532181641Skmacy if (PV_PMAP(pv) == pmap) { 3533208990Salc rv = TRUE; 3534208990Salc break; 3535181641Skmacy } 3536181641Skmacy loops++; 3537181641Skmacy if (loops >= 16) 3538181641Skmacy break; 3539181641Skmacy } 3540241498Salc rw_wunlock(&pvh_global_lock); 3541208990Salc return (rv); 3542181641Skmacy} 3543181641Skmacy 3544181641Skmacy/* 3545181641Skmacy * pmap_page_wired_mappings: 3546181641Skmacy * 3547181641Skmacy * Return the number of managed mappings to the given physical page 3548181641Skmacy * that are wired. 3549181641Skmacy */ 3550181641Skmacyint 3551181641Skmacypmap_page_wired_mappings(vm_page_t m) 3552181641Skmacy{ 3553181641Skmacy pv_entry_t pv; 3554181641Skmacy pt_entry_t *pte; 3555181641Skmacy pmap_t pmap; 3556181641Skmacy int count; 3557181641Skmacy 3558181641Skmacy count = 0; 3559224746Skib if ((m->oflags & VPO_UNMANAGED) != 0) 3560181641Skmacy return (count); 3561241498Salc rw_wlock(&pvh_global_lock); 3562181641Skmacy sched_pin(); 3563247678Sattilio TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3564181641Skmacy pmap = PV_PMAP(pv); 3565181641Skmacy PMAP_LOCK(pmap); 3566181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 3567181641Skmacy if ((*pte & PG_W) != 0) 3568181641Skmacy count++; 3569181641Skmacy PMAP_UNLOCK(pmap); 3570181641Skmacy } 3571181641Skmacy sched_unpin(); 3572241498Salc rw_wunlock(&pvh_global_lock); 3573181641Skmacy return (count); 3574181641Skmacy} 3575181641Skmacy 3576181641Skmacy/* 3577228746Salc * Returns TRUE if the given page is mapped. Otherwise, returns FALSE. 3578181747Skmacy */ 3579181747Skmacyboolean_t 3580181747Skmacypmap_page_is_mapped(vm_page_t m) 3581181747Skmacy{ 3582181747Skmacy 3583224746Skib if ((m->oflags & VPO_UNMANAGED) != 0) 3584181747Skmacy return (FALSE); 3585228746Salc return (!TAILQ_EMPTY(&m->md.pv_list)); 3586181747Skmacy} 3587181747Skmacy 3588181747Skmacy/* 3589181641Skmacy * Remove all pages from specified address space 3590181641Skmacy * this aids process exit speeds. Also, this code 3591181641Skmacy * is special cased for current process only, but 3592181641Skmacy * can have the more generic (and slightly slower) 3593181641Skmacy * mode enabled. This is much faster than pmap_remove 3594181641Skmacy * in the case of running down an entire address space. 3595181641Skmacy */ 3596181641Skmacyvoid 3597181641Skmacypmap_remove_pages(pmap_t pmap) 3598181641Skmacy{ 3599181641Skmacy pt_entry_t *pte, tpte; 3600181641Skmacy vm_page_t m, free = NULL; 3601181641Skmacy pv_entry_t pv; 3602181641Skmacy struct pv_chunk *pc, *npc; 3603181641Skmacy int field, idx; 3604181641Skmacy int32_t bit; 3605181641Skmacy uint32_t inuse, bitmask; 3606181641Skmacy int allfree; 3607181641Skmacy 3608181641Skmacy CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap); 3609181641Skmacy 3610181641Skmacy if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 3611181641Skmacy printf("warning: pmap_remove_pages called with non-current pmap\n"); 3612181641Skmacy return; 3613181641Skmacy } 3614241498Salc rw_wlock(&pvh_global_lock); 3615181641Skmacy KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap")); 3616181641Skmacy PMAP_LOCK(pmap); 3617181641Skmacy sched_pin(); 3618181641Skmacy TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 3619241400Salc KASSERT(pc->pc_pmap == pmap, ("Wrong pmap %p %p", pmap, 3620241400Salc pc->pc_pmap)); 3621181641Skmacy allfree = 1; 3622181641Skmacy for (field = 0; field < _NPCM; field++) { 3623236534Salc inuse = ~pc->pc_map[field] & pc_freemask[field]; 3624181641Skmacy while (inuse != 0) { 3625181641Skmacy bit = bsfl(inuse); 3626181641Skmacy bitmask = 1UL << bit; 3627181641Skmacy idx = field * 32 + bit; 3628181641Skmacy pv = &pc->pc_pventry[idx]; 3629181641Skmacy inuse &= ~bitmask; 3630181641Skmacy 3631181641Skmacy pte = vtopte(pv->pv_va); 3632181641Skmacy tpte = *pte ? xpmap_mtop(*pte) : 0; 3633181641Skmacy 3634181641Skmacy if (tpte == 0) { 3635181641Skmacy printf( 3636181641Skmacy "TPTE at %p IS ZERO @ VA %08x\n", 3637181641Skmacy pte, pv->pv_va); 3638181641Skmacy panic("bad pte"); 3639181641Skmacy } 3640181641Skmacy 3641181641Skmacy/* 3642181641Skmacy * We cannot remove wired pages from a process' mapping at this time 3643181641Skmacy */ 3644181641Skmacy if (tpte & PG_W) { 3645181641Skmacy allfree = 0; 3646181641Skmacy continue; 3647181641Skmacy } 3648181641Skmacy 3649181641Skmacy m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 3650181641Skmacy KASSERT(m->phys_addr == (tpte & PG_FRAME), 3651181641Skmacy ("vm_page_t %p phys_addr mismatch %016jx %016jx", 3652181641Skmacy m, (uintmax_t)m->phys_addr, 3653181641Skmacy (uintmax_t)tpte)); 3654181641Skmacy 3655181641Skmacy KASSERT(m < &vm_page_array[vm_page_array_size], 3656181641Skmacy ("pmap_remove_pages: bad tpte %#jx", 3657181641Skmacy (uintmax_t)tpte)); 3658181641Skmacy 3659181641Skmacy 3660181641Skmacy PT_CLEAR_VA(pte, FALSE); 3661181641Skmacy 3662181641Skmacy /* 3663181641Skmacy * Update the vm_page_t clean/reference bits. 3664181641Skmacy */ 3665181641Skmacy if (tpte & PG_M) 3666181641Skmacy vm_page_dirty(m); 3667181641Skmacy 3668247678Sattilio TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 3669181641Skmacy if (TAILQ_EMPTY(&m->md.pv_list)) 3670225418Skib vm_page_aflag_clear(m, PGA_WRITEABLE); 3671181641Skmacy 3672181641Skmacy pmap_unuse_pt(pmap, pv->pv_va, &free); 3673181641Skmacy 3674181641Skmacy /* Mark free */ 3675181641Skmacy PV_STAT(pv_entry_frees++); 3676181641Skmacy PV_STAT(pv_entry_spare++); 3677181641Skmacy pv_entry_count--; 3678181641Skmacy pc->pc_map[field] |= bitmask; 3679181641Skmacy pmap->pm_stats.resident_count--; 3680181641Skmacy } 3681181641Skmacy } 3682181641Skmacy PT_UPDATES_FLUSH(); 3683181641Skmacy if (allfree) { 3684181641Skmacy TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3685236378Salc free_pv_chunk(pc); 3686181641Skmacy } 3687181641Skmacy } 3688181641Skmacy PT_UPDATES_FLUSH(); 3689181641Skmacy if (*PMAP1) 3690181641Skmacy PT_SET_MA(PADDR1, 0); 3691181641Skmacy 3692181641Skmacy sched_unpin(); 3693181641Skmacy pmap_invalidate_all(pmap); 3694241498Salc rw_wunlock(&pvh_global_lock); 3695181641Skmacy PMAP_UNLOCK(pmap); 3696181641Skmacy pmap_free_zero_pages(free); 3697181641Skmacy} 3698181641Skmacy 3699181641Skmacy/* 3700181641Skmacy * pmap_is_modified: 3701181641Skmacy * 3702181641Skmacy * Return whether or not the specified physical page was modified 3703181641Skmacy * in any physical maps. 3704181641Skmacy */ 3705181641Skmacyboolean_t 3706181641Skmacypmap_is_modified(vm_page_t m) 3707181641Skmacy{ 3708181641Skmacy pv_entry_t pv; 3709181641Skmacy pt_entry_t *pte; 3710181641Skmacy pmap_t pmap; 3711181641Skmacy boolean_t rv; 3712181641Skmacy 3713224746Skib KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3714208504Salc ("pmap_is_modified: page %p is not managed", m)); 3715181641Skmacy rv = FALSE; 3716208504Salc 3717208504Salc /* 3718254138Sattilio * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3719225418Skib * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3720208504Salc * is clear, no PTEs can have PG_M set. 3721208504Salc */ 3722248084Sattilio VM_OBJECT_ASSERT_WLOCKED(m->object); 3723254138Sattilio if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3724181641Skmacy return (rv); 3725241498Salc rw_wlock(&pvh_global_lock); 3726181641Skmacy sched_pin(); 3727247678Sattilio TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3728181641Skmacy pmap = PV_PMAP(pv); 3729181641Skmacy PMAP_LOCK(pmap); 3730181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 3731181641Skmacy rv = (*pte & PG_M) != 0; 3732181641Skmacy PMAP_UNLOCK(pmap); 3733181641Skmacy if (rv) 3734181641Skmacy break; 3735181641Skmacy } 3736181641Skmacy if (*PMAP1) 3737181641Skmacy PT_SET_MA(PADDR1, 0); 3738181641Skmacy sched_unpin(); 3739241498Salc rw_wunlock(&pvh_global_lock); 3740181641Skmacy return (rv); 3741181641Skmacy} 3742181641Skmacy 3743181641Skmacy/* 3744181641Skmacy * pmap_is_prefaultable: 3745181641Skmacy * 3746181641Skmacy * Return whether or not the specified virtual address is elgible 3747181641Skmacy * for prefault. 3748181641Skmacy */ 3749181641Skmacystatic boolean_t 3750181641Skmacypmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr) 3751181641Skmacy{ 3752181641Skmacy pt_entry_t *pte; 3753181641Skmacy boolean_t rv = FALSE; 3754181641Skmacy 3755181641Skmacy return (rv); 3756181641Skmacy 3757181641Skmacy if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) { 3758181641Skmacy pte = vtopte(addr); 3759181641Skmacy rv = (*pte == 0); 3760181641Skmacy } 3761181641Skmacy return (rv); 3762181641Skmacy} 3763181641Skmacy 3764181641Skmacyboolean_t 3765181641Skmacypmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3766181641Skmacy{ 3767181641Skmacy boolean_t rv; 3768181641Skmacy 3769181641Skmacy PMAP_LOCK(pmap); 3770181641Skmacy rv = pmap_is_prefaultable_locked(pmap, addr); 3771181641Skmacy PMAP_UNLOCK(pmap); 3772181641Skmacy return (rv); 3773181641Skmacy} 3774181641Skmacy 3775207155Salcboolean_t 3776207155Salcpmap_is_referenced(vm_page_t m) 3777207155Salc{ 3778207155Salc pv_entry_t pv; 3779207155Salc pt_entry_t *pte; 3780207155Salc pmap_t pmap; 3781207155Salc boolean_t rv; 3782207155Salc 3783224746Skib KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3784208574Salc ("pmap_is_referenced: page %p is not managed", m)); 3785207155Salc rv = FALSE; 3786241498Salc rw_wlock(&pvh_global_lock); 3787207155Salc sched_pin(); 3788247678Sattilio TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3789207155Salc pmap = PV_PMAP(pv); 3790207155Salc PMAP_LOCK(pmap); 3791207155Salc pte = pmap_pte_quick(pmap, pv->pv_va); 3792207155Salc rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); 3793207155Salc PMAP_UNLOCK(pmap); 3794207155Salc if (rv) 3795207155Salc break; 3796207155Salc } 3797207155Salc if (*PMAP1) 3798207155Salc PT_SET_MA(PADDR1, 0); 3799207155Salc sched_unpin(); 3800241498Salc rw_wunlock(&pvh_global_lock); 3801207155Salc return (rv); 3802207155Salc} 3803207155Salc 3804181641Skmacyvoid 3805181641Skmacypmap_map_readonly(pmap_t pmap, vm_offset_t va, int len) 3806181641Skmacy{ 3807181641Skmacy int i, npages = round_page(len) >> PAGE_SHIFT; 3808181641Skmacy for (i = 0; i < npages; i++) { 3809181641Skmacy pt_entry_t *pte; 3810181641Skmacy pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); 3811241498Salc rw_wlock(&pvh_global_lock); 3812181641Skmacy pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M))); 3813241498Salc rw_wunlock(&pvh_global_lock); 3814181641Skmacy PMAP_MARK_PRIV(xpmap_mtop(*pte)); 3815181641Skmacy pmap_pte_release(pte); 3816181641Skmacy } 3817181641Skmacy} 3818181641Skmacy 3819181641Skmacyvoid 3820181641Skmacypmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len) 3821181641Skmacy{ 3822181641Skmacy int i, npages = round_page(len) >> PAGE_SHIFT; 3823181641Skmacy for (i = 0; i < npages; i++) { 3824181641Skmacy pt_entry_t *pte; 3825181641Skmacy pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); 3826181641Skmacy PMAP_MARK_UNPRIV(xpmap_mtop(*pte)); 3827241498Salc rw_wlock(&pvh_global_lock); 3828181641Skmacy pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M)); 3829241498Salc rw_wunlock(&pvh_global_lock); 3830181641Skmacy pmap_pte_release(pte); 3831181641Skmacy } 3832181641Skmacy} 3833181641Skmacy 3834181641Skmacy/* 3835181641Skmacy * Clear the write and modified bits in each of the given page's mappings. 3836181641Skmacy */ 3837181641Skmacyvoid 3838181641Skmacypmap_remove_write(vm_page_t m) 3839181641Skmacy{ 3840181641Skmacy pv_entry_t pv; 3841181641Skmacy pmap_t pmap; 3842181641Skmacy pt_entry_t oldpte, *pte; 3843181641Skmacy 3844224746Skib KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3845208175Salc ("pmap_remove_write: page %p is not managed", m)); 3846208175Salc 3847208175Salc /* 3848254138Sattilio * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3849254138Sattilio * set by another thread while the object is locked. Thus, 3850254138Sattilio * if PGA_WRITEABLE is clear, no page table entries need updating. 3851208175Salc */ 3852248084Sattilio VM_OBJECT_ASSERT_WLOCKED(m->object); 3853254138Sattilio if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3854181641Skmacy return; 3855241498Salc rw_wlock(&pvh_global_lock); 3856181641Skmacy sched_pin(); 3857247678Sattilio TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3858181641Skmacy pmap = PV_PMAP(pv); 3859181641Skmacy PMAP_LOCK(pmap); 3860181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 3861181641Skmacyretry: 3862181641Skmacy oldpte = *pte; 3863181641Skmacy if ((oldpte & PG_RW) != 0) { 3864188341Skmacy vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M); 3865188341Skmacy 3866181641Skmacy /* 3867181641Skmacy * Regardless of whether a pte is 32 or 64 bits 3868181641Skmacy * in size, PG_RW and PG_M are among the least 3869181641Skmacy * significant 32 bits. 3870181641Skmacy */ 3871188341Skmacy PT_SET_VA_MA(pte, newpte, TRUE); 3872188341Skmacy if (*pte != newpte) 3873181641Skmacy goto retry; 3874188341Skmacy 3875181641Skmacy if ((oldpte & PG_M) != 0) 3876181641Skmacy vm_page_dirty(m); 3877181641Skmacy pmap_invalidate_page(pmap, pv->pv_va); 3878181641Skmacy } 3879181641Skmacy PMAP_UNLOCK(pmap); 3880181641Skmacy } 3881225418Skib vm_page_aflag_clear(m, PGA_WRITEABLE); 3882181641Skmacy PT_UPDATES_FLUSH(); 3883181641Skmacy if (*PMAP1) 3884181641Skmacy PT_SET_MA(PADDR1, 0); 3885181641Skmacy sched_unpin(); 3886241498Salc rw_wunlock(&pvh_global_lock); 3887181641Skmacy} 3888181641Skmacy 3889181641Skmacy/* 3890181641Skmacy * pmap_ts_referenced: 3891181641Skmacy * 3892181641Skmacy * Return a count of reference bits for a page, clearing those bits. 3893181641Skmacy * It is not necessary for every reference bit to be cleared, but it 3894181641Skmacy * is necessary that 0 only be returned when there are truly no 3895181641Skmacy * reference bits set. 3896181641Skmacy * 3897181641Skmacy * XXX: The exact number of bits to check and clear is a matter that 3898181641Skmacy * should be tested and standardized at some point in the future for 3899181641Skmacy * optimal aging of shared pages. 3900181641Skmacy */ 3901181641Skmacyint 3902181641Skmacypmap_ts_referenced(vm_page_t m) 3903181641Skmacy{ 3904181641Skmacy pv_entry_t pv, pvf, pvn; 3905181641Skmacy pmap_t pmap; 3906181641Skmacy pt_entry_t *pte; 3907181641Skmacy int rtval = 0; 3908181641Skmacy 3909224746Skib KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3910208990Salc ("pmap_ts_referenced: page %p is not managed", m)); 3911241498Salc rw_wlock(&pvh_global_lock); 3912181641Skmacy sched_pin(); 3913181641Skmacy if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3914181641Skmacy pvf = pv; 3915181641Skmacy do { 3916247678Sattilio pvn = TAILQ_NEXT(pv, pv_next); 3917247678Sattilio TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 3918247678Sattilio TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3919181641Skmacy pmap = PV_PMAP(pv); 3920181641Skmacy PMAP_LOCK(pmap); 3921181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 3922181641Skmacy if ((*pte & PG_A) != 0) { 3923181641Skmacy PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE); 3924181641Skmacy pmap_invalidate_page(pmap, pv->pv_va); 3925181641Skmacy rtval++; 3926181641Skmacy if (rtval > 4) 3927181641Skmacy pvn = NULL; 3928181641Skmacy } 3929181641Skmacy PMAP_UNLOCK(pmap); 3930181641Skmacy } while ((pv = pvn) != NULL && pv != pvf); 3931181641Skmacy } 3932181641Skmacy PT_UPDATES_FLUSH(); 3933181641Skmacy if (*PMAP1) 3934181641Skmacy PT_SET_MA(PADDR1, 0); 3935181641Skmacy sched_unpin(); 3936241498Salc rw_wunlock(&pvh_global_lock); 3937181641Skmacy return (rtval); 3938181641Skmacy} 3939181641Skmacy 3940181641Skmacy/* 3941255028Salc * Apply the given advice to the specified range of addresses within the 3942255028Salc * given pmap. Depending on the advice, clear the referenced and/or 3943255028Salc * modified flags in each mapping and set the mapped page's dirty field. 3944255028Salc */ 3945255028Salcvoid 3946255028Salcpmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 3947255028Salc{ 3948255028Salc pd_entry_t oldpde; 3949255028Salc pt_entry_t *pte; 3950255028Salc vm_offset_t pdnxt; 3951255028Salc vm_page_t m; 3952255028Salc boolean_t anychanged; 3953255028Salc 3954255028Salc if (advice != MADV_DONTNEED && advice != MADV_FREE) 3955255028Salc return; 3956255028Salc anychanged = FALSE; 3957255028Salc rw_wlock(&pvh_global_lock); 3958255028Salc sched_pin(); 3959255028Salc PMAP_LOCK(pmap); 3960255028Salc for (; sva < eva; sva = pdnxt) { 3961255028Salc pdnxt = (sva + NBPDR) & ~PDRMASK; 3962255028Salc if (pdnxt < sva) 3963255028Salc pdnxt = eva; 3964255028Salc oldpde = pmap->pm_pdir[sva >> PDRSHIFT]; 3965255028Salc if ((oldpde & (PG_PS | PG_V)) != PG_V) 3966255028Salc continue; 3967255028Salc if (pdnxt > eva) 3968255028Salc pdnxt = eva; 3969255028Salc for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 3970255028Salc sva += PAGE_SIZE) { 3971255028Salc if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED | 3972255028Salc PG_V)) 3973255028Salc continue; 3974255028Salc else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 3975255028Salc if (advice == MADV_DONTNEED) { 3976255028Salc /* 3977255028Salc * Future calls to pmap_is_modified() 3978255028Salc * can be avoided by making the page 3979255028Salc * dirty now. 3980255028Salc */ 3981255028Salc m = PHYS_TO_VM_PAGE(xpmap_mtop(*pte) & 3982255028Salc PG_FRAME); 3983255028Salc vm_page_dirty(m); 3984255028Salc } 3985255028Salc PT_SET_VA_MA(pte, *pte & ~(PG_M | PG_A), TRUE); 3986255028Salc } else if ((*pte & PG_A) != 0) 3987255028Salc PT_SET_VA_MA(pte, *pte & ~PG_A, TRUE); 3988255028Salc else 3989255028Salc continue; 3990255028Salc if ((*pte & PG_G) != 0) 3991255028Salc pmap_invalidate_page(pmap, sva); 3992255028Salc else 3993255028Salc anychanged = TRUE; 3994255028Salc } 3995255028Salc } 3996255028Salc PT_UPDATES_FLUSH(); 3997255028Salc if (*PMAP1) 3998255028Salc PT_SET_VA_MA(PMAP1, 0, TRUE); 3999255028Salc if (anychanged) 4000255028Salc pmap_invalidate_all(pmap); 4001255028Salc sched_unpin(); 4002255028Salc rw_wunlock(&pvh_global_lock); 4003255028Salc PMAP_UNLOCK(pmap); 4004255028Salc} 4005255028Salc 4006255028Salc/* 4007181641Skmacy * Clear the modify bits on the specified physical page. 4008181641Skmacy */ 4009181641Skmacyvoid 4010181641Skmacypmap_clear_modify(vm_page_t m) 4011181641Skmacy{ 4012181641Skmacy pv_entry_t pv; 4013181641Skmacy pmap_t pmap; 4014181641Skmacy pt_entry_t *pte; 4015181641Skmacy 4016224746Skib KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4017208504Salc ("pmap_clear_modify: page %p is not managed", m)); 4018248084Sattilio VM_OBJECT_ASSERT_WLOCKED(m->object); 4019254138Sattilio KASSERT(!vm_page_xbusied(m), 4020254138Sattilio ("pmap_clear_modify: page %p is exclusive busied", m)); 4021208504Salc 4022208504Salc /* 4023225418Skib * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 4024208504Salc * If the object containing the page is locked and the page is not 4025254138Sattilio * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 4026208504Salc */ 4027225418Skib if ((m->aflags & PGA_WRITEABLE) == 0) 4028181641Skmacy return; 4029241498Salc rw_wlock(&pvh_global_lock); 4030181641Skmacy sched_pin(); 4031247678Sattilio TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 4032181641Skmacy pmap = PV_PMAP(pv); 4033181641Skmacy PMAP_LOCK(pmap); 4034181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 4035228923Salc if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { 4036181641Skmacy /* 4037181641Skmacy * Regardless of whether a pte is 32 or 64 bits 4038181641Skmacy * in size, PG_M is among the least significant 4039181641Skmacy * 32 bits. 4040181641Skmacy */ 4041181641Skmacy PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE); 4042181641Skmacy pmap_invalidate_page(pmap, pv->pv_va); 4043181641Skmacy } 4044181641Skmacy PMAP_UNLOCK(pmap); 4045181641Skmacy } 4046181641Skmacy sched_unpin(); 4047241498Salc rw_wunlock(&pvh_global_lock); 4048181641Skmacy} 4049181641Skmacy 4050181641Skmacy/* 4051181641Skmacy * Miscellaneous support routines follow 4052181641Skmacy */ 4053181641Skmacy 4054181641Skmacy/* 4055181641Skmacy * Map a set of physical memory pages into the kernel virtual 4056181641Skmacy * address space. Return a pointer to where it is mapped. This 4057181641Skmacy * routine is intended to be used for mapping device memory, 4058181641Skmacy * NOT real memory. 4059181641Skmacy */ 4060181641Skmacyvoid * 4061181641Skmacypmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) 4062181641Skmacy{ 4063195949Skib vm_offset_t va, offset; 4064195949Skib vm_size_t tmpsize; 4065181641Skmacy 4066181641Skmacy offset = pa & PAGE_MASK; 4067246855Sjkim size = round_page(offset + size); 4068181641Skmacy pa = pa & PG_FRAME; 4069181641Skmacy 4070181641Skmacy if (pa < KERNLOAD && pa + size <= KERNLOAD) 4071181641Skmacy va = KERNBASE + pa; 4072181641Skmacy else 4073254025Sjeff va = kva_alloc(size); 4074181641Skmacy if (!va) 4075181641Skmacy panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 4076181641Skmacy 4077195949Skib for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) 4078195949Skib pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); 4079195949Skib pmap_invalidate_range(kernel_pmap, va, va + tmpsize); 4080273136Skib pmap_invalidate_cache_range(va, va + size, FALSE); 4081181641Skmacy return ((void *)(va + offset)); 4082181641Skmacy} 4083181641Skmacy 4084181641Skmacyvoid * 4085181641Skmacypmap_mapdev(vm_paddr_t pa, vm_size_t size) 4086181641Skmacy{ 4087181641Skmacy 4088181641Skmacy return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); 4089181641Skmacy} 4090181641Skmacy 4091181641Skmacyvoid * 4092181641Skmacypmap_mapbios(vm_paddr_t pa, vm_size_t size) 4093181641Skmacy{ 4094181641Skmacy 4095181641Skmacy return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); 4096181641Skmacy} 4097181641Skmacy 4098181641Skmacyvoid 4099181641Skmacypmap_unmapdev(vm_offset_t va, vm_size_t size) 4100181641Skmacy{ 4101240317Salc vm_offset_t base, offset; 4102181641Skmacy 4103181641Skmacy if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) 4104181641Skmacy return; 4105181641Skmacy base = trunc_page(va); 4106181641Skmacy offset = va & PAGE_MASK; 4107246855Sjkim size = round_page(offset + size); 4108254025Sjeff kva_free(base, size); 4109181641Skmacy} 4110181641Skmacy 4111195774Salc/* 4112195774Salc * Sets the memory attribute for the specified page. 4113195774Salc */ 4114195774Salcvoid 4115195774Salcpmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4116195774Salc{ 4117195774Salc 4118195774Salc m->md.pat_mode = ma; 4119195949Skib if ((m->flags & PG_FICTITIOUS) != 0) 4120195949Skib return; 4121195774Salc 4122195774Salc /* 4123195774Salc * If "m" is a normal page, flush it from the cache. 4124195949Skib * See pmap_invalidate_cache_range(). 4125195949Skib * 4126195949Skib * First, try to find an existing mapping of the page by sf 4127195949Skib * buffer. sf_buf_invalidate_cache() modifies mapping and 4128195949Skib * flushes the cache. 4129195774Salc */ 4130195949Skib if (sf_buf_invalidate_cache(m)) 4131195949Skib return; 4132195949Skib 4133195949Skib /* 4134195949Skib * If page is not mapped by sf buffer, but CPU does not 4135195949Skib * support self snoop, map the page transient and do 4136195949Skib * invalidation. In the worst case, whole cache is flushed by 4137195949Skib * pmap_invalidate_cache_range(). 4138195949Skib */ 4139228923Salc if ((cpu_feature & CPUID_SS) == 0) 4140228923Salc pmap_flush_page(m); 4141228923Salc} 4142228923Salc 4143228923Salcstatic void 4144228923Salcpmap_flush_page(vm_page_t m) 4145228923Salc{ 4146228923Salc struct sysmaps *sysmaps; 4147228923Salc vm_offset_t sva, eva; 4148228923Salc 4149228923Salc if ((cpu_feature & CPUID_CLFSH) != 0) { 4150195949Skib sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 4151195949Skib mtx_lock(&sysmaps->lock); 4152195949Skib if (*sysmaps->CMAP2) 4153228923Salc panic("pmap_flush_page: CMAP2 busy"); 4154195949Skib sched_pin(); 4155195949Skib PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | 4156215587Scperciva VM_PAGE_TO_MACH(m) | PG_A | PG_M | 4157195949Skib pmap_cache_bits(m->md.pat_mode, 0)); 4158195949Skib invlcaddr(sysmaps->CADDR2); 4159195949Skib sva = (vm_offset_t)sysmaps->CADDR2; 4160195949Skib eva = sva + PAGE_SIZE; 4161228923Salc 4162228923Salc /* 4163228923Salc * Use mfence despite the ordering implied by 4164228923Salc * mtx_{un,}lock() because clflush is not guaranteed 4165228923Salc * to be ordered by any other instruction. 4166228923Salc */ 4167228923Salc mfence(); 4168228923Salc for (; sva < eva; sva += cpu_clflush_line_size) 4169228923Salc clflush(sva); 4170228923Salc mfence(); 4171195949Skib PT_SET_MA(sysmaps->CADDR2, 0); 4172195949Skib sched_unpin(); 4173195949Skib mtx_unlock(&sysmaps->lock); 4174228923Salc } else 4175228923Salc pmap_invalidate_cache(); 4176195774Salc} 4177195774Salc 4178228923Salc/* 4179228923Salc * Changes the specified virtual address range's memory type to that given by 4180228923Salc * the parameter "mode". The specified virtual address range must be 4181228923Salc * completely contained within either the kernel map. 4182228923Salc * 4183228923Salc * Returns zero if the change completed successfully, and either EINVAL or 4184228923Salc * ENOMEM if the change failed. Specifically, EINVAL is returned if some part 4185228923Salc * of the virtual address range was not mapped, and ENOMEM is returned if 4186228923Salc * there was insufficient memory available to complete the change. 4187228923Salc */ 4188181641Skmacyint 4189228923Salcpmap_change_attr(vm_offset_t va, vm_size_t size, int mode) 4190181641Skmacy{ 4191181641Skmacy vm_offset_t base, offset, tmpva; 4192181641Skmacy pt_entry_t *pte; 4193181641Skmacy u_int opte, npte; 4194181641Skmacy pd_entry_t *pde; 4195195949Skib boolean_t changed; 4196181641Skmacy 4197181641Skmacy base = trunc_page(va); 4198181641Skmacy offset = va & PAGE_MASK; 4199246855Sjkim size = round_page(offset + size); 4200181641Skmacy 4201181641Skmacy /* Only supported on kernel virtual addresses. */ 4202181641Skmacy if (base <= VM_MAXUSER_ADDRESS) 4203181641Skmacy return (EINVAL); 4204181641Skmacy 4205181641Skmacy /* 4MB pages and pages that aren't mapped aren't supported. */ 4206181641Skmacy for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { 4207181641Skmacy pde = pmap_pde(kernel_pmap, tmpva); 4208181641Skmacy if (*pde & PG_PS) 4209181641Skmacy return (EINVAL); 4210181641Skmacy if ((*pde & PG_V) == 0) 4211181641Skmacy return (EINVAL); 4212181641Skmacy pte = vtopte(va); 4213181641Skmacy if ((*pte & PG_V) == 0) 4214181641Skmacy return (EINVAL); 4215181641Skmacy } 4216181641Skmacy 4217195949Skib changed = FALSE; 4218195949Skib 4219181641Skmacy /* 4220181641Skmacy * Ok, all the pages exist and are 4k, so run through them updating 4221181641Skmacy * their cache mode. 4222181641Skmacy */ 4223181641Skmacy for (tmpva = base; size > 0; ) { 4224181641Skmacy pte = vtopte(tmpva); 4225181641Skmacy 4226181641Skmacy /* 4227181641Skmacy * The cache mode bits are all in the low 32-bits of the 4228181641Skmacy * PTE, so we can just spin on updating the low 32-bits. 4229181641Skmacy */ 4230181641Skmacy do { 4231181641Skmacy opte = *(u_int *)pte; 4232181641Skmacy npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); 4233181641Skmacy npte |= pmap_cache_bits(mode, 0); 4234181641Skmacy PT_SET_VA_MA(pte, npte, TRUE); 4235181641Skmacy } while (npte != opte && (*pte != npte)); 4236195949Skib if (npte != opte) 4237195949Skib changed = TRUE; 4238181641Skmacy tmpva += PAGE_SIZE; 4239181641Skmacy size -= PAGE_SIZE; 4240181641Skmacy } 4241181641Skmacy 4242181641Skmacy /* 4243228923Salc * Flush CPU caches to make sure any data isn't cached that 4244228923Salc * shouldn't be, etc. 4245181641Skmacy */ 4246195949Skib if (changed) { 4247195949Skib pmap_invalidate_range(kernel_pmap, base, tmpva); 4248273136Skib pmap_invalidate_cache_range(base, tmpva, FALSE); 4249195949Skib } 4250181641Skmacy return (0); 4251181641Skmacy} 4252181641Skmacy 4253181641Skmacy/* 4254181641Skmacy * perform the pmap work for mincore 4255181641Skmacy */ 4256181641Skmacyint 4257208504Salcpmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 4258181641Skmacy{ 4259181641Skmacy pt_entry_t *ptep, pte; 4260208504Salc vm_paddr_t pa; 4261208504Salc int val; 4262228923Salc 4263181641Skmacy PMAP_LOCK(pmap); 4264208504Salcretry: 4265181641Skmacy ptep = pmap_pte(pmap, addr); 4266181641Skmacy pte = (ptep != NULL) ? PT_GET(ptep) : 0; 4267181641Skmacy pmap_pte_release(ptep); 4268208504Salc val = 0; 4269208504Salc if ((pte & PG_V) != 0) { 4270208504Salc val |= MINCORE_INCORE; 4271208504Salc if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 4272208504Salc val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4273208504Salc if ((pte & PG_A) != 0) 4274208504Salc val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4275208504Salc } 4276208504Salc if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 4277208504Salc (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 4278208504Salc (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { 4279208504Salc pa = pte & PG_FRAME; 4280208504Salc /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 4281208504Salc if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 4282208504Salc goto retry; 4283208504Salc } else 4284208504Salc PA_UNLOCK_COND(*locked_pa); 4285181641Skmacy PMAP_UNLOCK(pmap); 4286208504Salc return (val); 4287181641Skmacy} 4288181641Skmacy 4289181641Skmacyvoid 4290181641Skmacypmap_activate(struct thread *td) 4291181641Skmacy{ 4292181641Skmacy pmap_t pmap, oldpmap; 4293223758Sattilio u_int cpuid; 4294181641Skmacy u_int32_t cr3; 4295181641Skmacy 4296181641Skmacy critical_enter(); 4297181641Skmacy pmap = vmspace_pmap(td->td_proc->p_vmspace); 4298181641Skmacy oldpmap = PCPU_GET(curpmap); 4299223758Sattilio cpuid = PCPU_GET(cpuid); 4300181641Skmacy#if defined(SMP) 4301223758Sattilio CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); 4302223758Sattilio CPU_SET_ATOMIC(cpuid, &pmap->pm_active); 4303181641Skmacy#else 4304223758Sattilio CPU_CLR(cpuid, &oldpmap->pm_active); 4305223758Sattilio CPU_SET(cpuid, &pmap->pm_active); 4306181641Skmacy#endif 4307181641Skmacy#ifdef PAE 4308181641Skmacy cr3 = vtophys(pmap->pm_pdpt); 4309181641Skmacy#else 4310181641Skmacy cr3 = vtophys(pmap->pm_pdir); 4311181641Skmacy#endif 4312181641Skmacy /* 4313181641Skmacy * pmap_activate is for the current thread on the current cpu 4314181641Skmacy */ 4315181641Skmacy td->td_pcb->pcb_cr3 = cr3; 4316181641Skmacy PT_UPDATES_FLUSH(); 4317181641Skmacy load_cr3(cr3); 4318181641Skmacy PCPU_SET(curpmap, pmap); 4319181641Skmacy critical_exit(); 4320181641Skmacy} 4321181641Skmacy 4322198341Smarcelvoid 4323198341Smarcelpmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 4324198341Smarcel{ 4325198341Smarcel} 4326198341Smarcel 4327181747Skmacy/* 4328181747Skmacy * Increase the starting virtual address of the given mapping if a 4329181747Skmacy * different alignment might result in more superpage mappings. 4330181747Skmacy */ 4331181747Skmacyvoid 4332181747Skmacypmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 4333181747Skmacy vm_offset_t *addr, vm_size_t size) 4334181641Skmacy{ 4335181747Skmacy vm_offset_t superpage_offset; 4336181641Skmacy 4337181747Skmacy if (size < NBPDR) 4338181747Skmacy return; 4339181747Skmacy if (object != NULL && (object->flags & OBJ_COLORED) != 0) 4340181747Skmacy offset += ptoa(object->pg_color); 4341181747Skmacy superpage_offset = offset & PDRMASK; 4342181747Skmacy if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || 4343181747Skmacy (*addr & PDRMASK) == superpage_offset) 4344181747Skmacy return; 4345181747Skmacy if ((*addr & PDRMASK) < superpage_offset) 4346181747Skmacy *addr = (*addr & ~PDRMASK) + superpage_offset; 4347181747Skmacy else 4348181747Skmacy *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; 4349181641Skmacy} 4350181641Skmacy 4351190627Sdfrvoid 4352190627Sdfrpmap_suspend() 4353190627Sdfr{ 4354190627Sdfr pmap_t pmap; 4355190627Sdfr int i, pdir, offset; 4356190627Sdfr vm_paddr_t pdirma; 4357190627Sdfr mmu_update_t mu[4]; 4358190627Sdfr 4359190627Sdfr /* 4360190627Sdfr * We need to remove the recursive mapping structure from all 4361190627Sdfr * our pmaps so that Xen doesn't get confused when it restores 4362190627Sdfr * the page tables. The recursive map lives at page directory 4363190627Sdfr * index PTDPTDI. We assume that the suspend code has stopped 4364190627Sdfr * the other vcpus (if any). 4365190627Sdfr */ 4366190627Sdfr LIST_FOREACH(pmap, &allpmaps, pm_list) { 4367190627Sdfr for (i = 0; i < 4; i++) { 4368190627Sdfr /* 4369190627Sdfr * Figure out which page directory (L2) page 4370190627Sdfr * contains this bit of the recursive map and 4371190627Sdfr * the offset within that page of the map 4372190627Sdfr * entry 4373190627Sdfr */ 4374190627Sdfr pdir = (PTDPTDI + i) / NPDEPG; 4375190627Sdfr offset = (PTDPTDI + i) % NPDEPG; 4376190627Sdfr pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; 4377190627Sdfr mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); 4378190627Sdfr mu[i].val = 0; 4379190627Sdfr } 4380190627Sdfr HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); 4381190627Sdfr } 4382190627Sdfr} 4383190627Sdfr 4384190627Sdfrvoid 4385190627Sdfrpmap_resume() 4386190627Sdfr{ 4387190627Sdfr pmap_t pmap; 4388190627Sdfr int i, pdir, offset; 4389190627Sdfr vm_paddr_t pdirma; 4390190627Sdfr mmu_update_t mu[4]; 4391190627Sdfr 4392190627Sdfr /* 4393190627Sdfr * Restore the recursive map that we removed on suspend. 4394190627Sdfr */ 4395190627Sdfr LIST_FOREACH(pmap, &allpmaps, pm_list) { 4396190627Sdfr for (i = 0; i < 4; i++) { 4397190627Sdfr /* 4398190627Sdfr * Figure out which page directory (L2) page 4399190627Sdfr * contains this bit of the recursive map and 4400190627Sdfr * the offset within that page of the map 4401190627Sdfr * entry 4402190627Sdfr */ 4403190627Sdfr pdir = (PTDPTDI + i) / NPDEPG; 4404190627Sdfr offset = (PTDPTDI + i) % NPDEPG; 4405190627Sdfr pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; 4406190627Sdfr mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); 4407190627Sdfr mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V; 4408190627Sdfr } 4409190627Sdfr HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); 4410190627Sdfr } 4411190627Sdfr} 4412190627Sdfr 4413181641Skmacy#if defined(PMAP_DEBUG) 4414181641Skmacypmap_pid_dump(int pid) 4415181641Skmacy{ 4416181641Skmacy pmap_t pmap; 4417181641Skmacy struct proc *p; 4418181641Skmacy int npte = 0; 4419181641Skmacy int index; 4420181641Skmacy 4421181641Skmacy sx_slock(&allproc_lock); 4422181641Skmacy FOREACH_PROC_IN_SYSTEM(p) { 4423181641Skmacy if (p->p_pid != pid) 4424181641Skmacy continue; 4425181641Skmacy 4426181641Skmacy if (p->p_vmspace) { 4427181641Skmacy int i,j; 4428181641Skmacy index = 0; 4429181641Skmacy pmap = vmspace_pmap(p->p_vmspace); 4430181641Skmacy for (i = 0; i < NPDEPTD; i++) { 4431181641Skmacy pd_entry_t *pde; 4432181641Skmacy pt_entry_t *pte; 4433181641Skmacy vm_offset_t base = i << PDRSHIFT; 4434181641Skmacy 4435181641Skmacy pde = &pmap->pm_pdir[i]; 4436181641Skmacy if (pde && pmap_pde_v(pde)) { 4437181641Skmacy for (j = 0; j < NPTEPG; j++) { 4438181641Skmacy vm_offset_t va = base + (j << PAGE_SHIFT); 4439181641Skmacy if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 4440181641Skmacy if (index) { 4441181641Skmacy index = 0; 4442181641Skmacy printf("\n"); 4443181641Skmacy } 4444181641Skmacy sx_sunlock(&allproc_lock); 4445228923Salc return (npte); 4446181641Skmacy } 4447181641Skmacy pte = pmap_pte(pmap, va); 4448181641Skmacy if (pte && pmap_pte_v(pte)) { 4449181641Skmacy pt_entry_t pa; 4450181641Skmacy vm_page_t m; 4451181641Skmacy pa = PT_GET(pte); 4452181641Skmacy m = PHYS_TO_VM_PAGE(pa & PG_FRAME); 4453181641Skmacy printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 4454181641Skmacy va, pa, m->hold_count, m->wire_count, m->flags); 4455181641Skmacy npte++; 4456181641Skmacy index++; 4457181641Skmacy if (index >= 2) { 4458181641Skmacy index = 0; 4459181641Skmacy printf("\n"); 4460181641Skmacy } else { 4461181641Skmacy printf(" "); 4462181641Skmacy } 4463181641Skmacy } 4464181641Skmacy } 4465181641Skmacy } 4466181641Skmacy } 4467181641Skmacy } 4468181641Skmacy } 4469181641Skmacy sx_sunlock(&allproc_lock); 4470228923Salc return (npte); 4471181641Skmacy} 4472181641Skmacy#endif 4473181641Skmacy 4474181641Skmacy#if defined(DEBUG) 4475181641Skmacy 4476181641Skmacystatic void pads(pmap_t pm); 4477181641Skmacyvoid pmap_pvdump(vm_paddr_t pa); 4478181641Skmacy 4479181641Skmacy/* print address space of pmap*/ 4480181641Skmacystatic void 4481181641Skmacypads(pmap_t pm) 4482181641Skmacy{ 4483181641Skmacy int i, j; 4484181641Skmacy vm_paddr_t va; 4485181641Skmacy pt_entry_t *ptep; 4486181641Skmacy 4487181641Skmacy if (pm == kernel_pmap) 4488181641Skmacy return; 4489181641Skmacy for (i = 0; i < NPDEPTD; i++) 4490181641Skmacy if (pm->pm_pdir[i]) 4491181641Skmacy for (j = 0; j < NPTEPG; j++) { 4492181641Skmacy va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 4493181641Skmacy if (pm == kernel_pmap && va < KERNBASE) 4494181641Skmacy continue; 4495181641Skmacy if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 4496181641Skmacy continue; 4497181641Skmacy ptep = pmap_pte(pm, va); 4498181641Skmacy if (pmap_pte_v(ptep)) 4499181641Skmacy printf("%x:%x ", va, *ptep); 4500181641Skmacy }; 4501181641Skmacy 4502181641Skmacy} 4503181641Skmacy 4504181641Skmacyvoid 4505181641Skmacypmap_pvdump(vm_paddr_t pa) 4506181641Skmacy{ 4507181641Skmacy pv_entry_t pv; 4508181641Skmacy pmap_t pmap; 4509181641Skmacy vm_page_t m; 4510181641Skmacy 4511181641Skmacy printf("pa %x", pa); 4512181641Skmacy m = PHYS_TO_VM_PAGE(pa); 4513247678Sattilio TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 4514181641Skmacy pmap = PV_PMAP(pv); 4515181641Skmacy printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); 4516181641Skmacy pads(pmap); 4517181641Skmacy } 4518181641Skmacy printf(" "); 4519181641Skmacy} 4520181641Skmacy#endif 4521