pmap.c revision 208504
1181641Skmacy/*- 2181641Skmacy * Copyright (c) 1991 Regents of the University of California. 3181641Skmacy * All rights reserved. 4181641Skmacy * Copyright (c) 1994 John S. Dyson 5181641Skmacy * All rights reserved. 6181641Skmacy * Copyright (c) 1994 David Greenman 7181641Skmacy * All rights reserved. 8181641Skmacy * Copyright (c) 2005 Alan L. Cox <alc@cs.rice.edu> 9181641Skmacy * All rights reserved. 10181641Skmacy * 11181641Skmacy * This code is derived from software contributed to Berkeley by 12181641Skmacy * the Systems Programming Group of the University of Utah Computer 13181641Skmacy * Science Department and William Jolitz of UUNET Technologies Inc. 14181641Skmacy * 15181641Skmacy * Redistribution and use in source and binary forms, with or without 16181641Skmacy * modification, are permitted provided that the following conditions 17181641Skmacy * are met: 18181641Skmacy * 1. Redistributions of source code must retain the above copyright 19181641Skmacy * notice, this list of conditions and the following disclaimer. 20181641Skmacy * 2. Redistributions in binary form must reproduce the above copyright 21181641Skmacy * notice, this list of conditions and the following disclaimer in the 22181641Skmacy * documentation and/or other materials provided with the distribution. 23181641Skmacy * 3. All advertising materials mentioning features or use of this software 24181641Skmacy * must display the following acknowledgement: 25181641Skmacy * This product includes software developed by the University of 26181641Skmacy * California, Berkeley and its contributors. 27181641Skmacy * 4. Neither the name of the University nor the names of its contributors 28181641Skmacy * may be used to endorse or promote products derived from this software 29181641Skmacy * without specific prior written permission. 30181641Skmacy * 31181641Skmacy * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32181641Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33181641Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34181641Skmacy * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35181641Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36181641Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37181641Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38181641Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39181641Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40181641Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41181641Skmacy * SUCH DAMAGE. 42181641Skmacy * 43181641Skmacy * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44181641Skmacy */ 45181641Skmacy/*- 46181641Skmacy * Copyright (c) 2003 Networks Associates Technology, Inc. 47181641Skmacy * All rights reserved. 48181641Skmacy * 49181641Skmacy * This software was developed for the FreeBSD Project by Jake Burkholder, 50181641Skmacy * Safeport Network Services, and Network Associates Laboratories, the 51181641Skmacy * Security Research Division of Network Associates, Inc. under 52181641Skmacy * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 53181641Skmacy * CHATS research program. 54181641Skmacy * 55181641Skmacy * Redistribution and use in source and binary forms, with or without 56181641Skmacy * modification, are permitted provided that the following conditions 57181641Skmacy * are met: 58181641Skmacy * 1. Redistributions of source code must retain the above copyright 59181641Skmacy * notice, this list of conditions and the following disclaimer. 60181641Skmacy * 2. Redistributions in binary form must reproduce the above copyright 61181641Skmacy * notice, this list of conditions and the following disclaimer in the 62181641Skmacy * documentation and/or other materials provided with the distribution. 63181641Skmacy * 64181641Skmacy * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 65181641Skmacy * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66181641Skmacy * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67181641Skmacy * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 68181641Skmacy * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69181641Skmacy * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70181641Skmacy * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71181641Skmacy * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72181641Skmacy * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73181641Skmacy * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74181641Skmacy * SUCH DAMAGE. 75181641Skmacy */ 76181641Skmacy 77181641Skmacy#include <sys/cdefs.h> 78181641Skmacy__FBSDID("$FreeBSD: head/sys/i386/xen/pmap.c 208504 2010-05-24 14:26:57Z alc $"); 79181641Skmacy 80181641Skmacy/* 81181641Skmacy * Manages physical address maps. 82181641Skmacy * 83181641Skmacy * In addition to hardware address maps, this 84181641Skmacy * module is called upon to provide software-use-only 85181641Skmacy * maps which may or may not be stored in the same 86181641Skmacy * form as hardware maps. These pseudo-maps are 87181641Skmacy * used to store intermediate results from copy 88181641Skmacy * operations to and from address spaces. 89181641Skmacy * 90181641Skmacy * Since the information managed by this module is 91181641Skmacy * also stored by the logical address mapping module, 92181641Skmacy * this module may throw away valid virtual-to-physical 93181641Skmacy * mappings at almost any time. However, invalidations 94181641Skmacy * of virtual-to-physical mappings must be done as 95181641Skmacy * requested. 96181641Skmacy * 97181641Skmacy * In order to cope with hardware architectures which 98181641Skmacy * make virtual-to-physical map invalidates expensive, 99181641Skmacy * this module may delay invalidate or reduced protection 100181641Skmacy * operations until such time as they are actually 101181641Skmacy * necessary. This module is given full information as 102181641Skmacy * to which processors are currently using which maps, 103181641Skmacy * and to when physical maps must be made correct. 104181641Skmacy */ 105181641Skmacy 106181641Skmacy#define PMAP_DIAGNOSTIC 107181641Skmacy 108181641Skmacy#include "opt_cpu.h" 109181641Skmacy#include "opt_pmap.h" 110181641Skmacy#include "opt_msgbuf.h" 111181641Skmacy#include "opt_smp.h" 112181641Skmacy#include "opt_xbox.h" 113181641Skmacy 114181641Skmacy#include <sys/param.h> 115181641Skmacy#include <sys/systm.h> 116181641Skmacy#include <sys/kernel.h> 117181641Skmacy#include <sys/ktr.h> 118181641Skmacy#include <sys/lock.h> 119181641Skmacy#include <sys/malloc.h> 120181641Skmacy#include <sys/mman.h> 121181641Skmacy#include <sys/msgbuf.h> 122181641Skmacy#include <sys/mutex.h> 123181641Skmacy#include <sys/proc.h> 124195949Skib#include <sys/sf_buf.h> 125181641Skmacy#include <sys/sx.h> 126181641Skmacy#include <sys/vmmeter.h> 127181641Skmacy#include <sys/sched.h> 128181641Skmacy#include <sys/sysctl.h> 129181641Skmacy#ifdef SMP 130181641Skmacy#include <sys/smp.h> 131181641Skmacy#endif 132181641Skmacy 133181641Skmacy#include <vm/vm.h> 134181641Skmacy#include <vm/vm_param.h> 135181641Skmacy#include <vm/vm_kern.h> 136181641Skmacy#include <vm/vm_page.h> 137181641Skmacy#include <vm/vm_map.h> 138181641Skmacy#include <vm/vm_object.h> 139181641Skmacy#include <vm/vm_extern.h> 140181641Skmacy#include <vm/vm_pageout.h> 141181641Skmacy#include <vm/vm_pager.h> 142181641Skmacy#include <vm/uma.h> 143181641Skmacy 144181641Skmacy#include <machine/cpu.h> 145181641Skmacy#include <machine/cputypes.h> 146181641Skmacy#include <machine/md_var.h> 147181641Skmacy#include <machine/pcb.h> 148181641Skmacy#include <machine/specialreg.h> 149181641Skmacy#ifdef SMP 150181641Skmacy#include <machine/smp.h> 151181641Skmacy#endif 152181641Skmacy 153181641Skmacy#ifdef XBOX 154181641Skmacy#include <machine/xbox.h> 155181641Skmacy#endif 156181641Skmacy 157181641Skmacy#include <xen/interface/xen.h> 158186557Skmacy#include <xen/hypervisor.h> 159181641Skmacy#include <machine/xen/hypercall.h> 160181641Skmacy#include <machine/xen/xenvar.h> 161181641Skmacy#include <machine/xen/xenfunc.h> 162181641Skmacy 163181641Skmacy#if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) 164181641Skmacy#define CPU_ENABLE_SSE 165181641Skmacy#endif 166181641Skmacy 167181641Skmacy#ifndef PMAP_SHPGPERPROC 168181641Skmacy#define PMAP_SHPGPERPROC 200 169181641Skmacy#endif 170181641Skmacy 171181641Skmacy#if defined(DIAGNOSTIC) 172181641Skmacy#define PMAP_DIAGNOSTIC 173181641Skmacy#endif 174181641Skmacy 175181641Skmacy#if !defined(PMAP_DIAGNOSTIC) 176204041Sed#ifdef __GNUC_GNU_INLINE__ 177204041Sed#define PMAP_INLINE inline 178204041Sed#else 179202628Sed#define PMAP_INLINE extern inline 180204041Sed#endif 181181641Skmacy#else 182181641Skmacy#define PMAP_INLINE 183181641Skmacy#endif 184181641Skmacy 185181641Skmacy#define PV_STATS 186181641Skmacy#ifdef PV_STATS 187181641Skmacy#define PV_STAT(x) do { x ; } while (0) 188181641Skmacy#else 189181641Skmacy#define PV_STAT(x) do { } while (0) 190181641Skmacy#endif 191181641Skmacy 192181747Skmacy#define pa_index(pa) ((pa) >> PDRSHIFT) 193181747Skmacy#define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) 194181747Skmacy 195181641Skmacy/* 196181641Skmacy * Get PDEs and PTEs for user/kernel address space 197181641Skmacy */ 198181641Skmacy#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 199181641Skmacy#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 200181641Skmacy 201181641Skmacy#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 202181641Skmacy#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 203181641Skmacy#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 204181641Skmacy#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 205181641Skmacy#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 206181641Skmacy 207181641Skmacy#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 208181641Skmacy 209181641Skmacystruct pmap kernel_pmap_store; 210181641SkmacyLIST_HEAD(pmaplist, pmap); 211181641Skmacystatic struct pmaplist allpmaps; 212181641Skmacystatic struct mtx allpmaps_lock; 213181641Skmacy 214181641Skmacyvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 215181641Skmacyvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 216181641Skmacyint pgeflag = 0; /* PG_G or-in */ 217181641Skmacyint pseflag = 0; /* PG_PS or-in */ 218181641Skmacy 219182902Skmacyint nkpt; 220181641Skmacyvm_offset_t kernel_vm_end; 221181641Skmacyextern u_int32_t KERNend; 222181641Skmacy 223181641Skmacy#ifdef PAE 224181641Skmacypt_entry_t pg_nx; 225181641Skmacy#if !defined(XEN) 226181641Skmacystatic uma_zone_t pdptzone; 227181641Skmacy#endif 228181641Skmacy#endif 229181641Skmacy 230196726Sadrianstatic int pat_works; /* Is page attribute table sane? */ 231196726Sadrian 232181641Skmacy/* 233181641Skmacy * Data for the pv entry allocation mechanism 234181641Skmacy */ 235181641Skmacystatic int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 236181747Skmacystatic struct md_page *pv_table; 237181641Skmacystatic int shpgperproc = PMAP_SHPGPERPROC; 238181641Skmacy 239181641Skmacystruct pv_chunk *pv_chunkbase; /* KVA block for pv_chunks */ 240181641Skmacyint pv_maxchunks; /* How many chunks we have KVA for */ 241181641Skmacyvm_offset_t pv_vafree; /* freelist stored in the PTE */ 242181641Skmacy 243181641Skmacy/* 244181641Skmacy * All those kernel PT submaps that BSD is so fond of 245181641Skmacy */ 246181641Skmacystruct sysmaps { 247181641Skmacy struct mtx lock; 248181641Skmacy pt_entry_t *CMAP1; 249181641Skmacy pt_entry_t *CMAP2; 250181641Skmacy caddr_t CADDR1; 251181641Skmacy caddr_t CADDR2; 252181641Skmacy}; 253181641Skmacystatic struct sysmaps sysmaps_pcpu[MAXCPU]; 254181641Skmacystatic pt_entry_t *CMAP3; 255204160Skmacycaddr_t ptvmmap = 0; 256181641Skmacystatic caddr_t CADDR3; 257181641Skmacystruct msgbuf *msgbufp = 0; 258181641Skmacy 259181641Skmacy/* 260181641Skmacy * Crashdump maps. 261181641Skmacy */ 262181641Skmacystatic caddr_t crashdumpmap; 263181641Skmacy 264181641Skmacystatic pt_entry_t *PMAP1 = 0, *PMAP2; 265181641Skmacystatic pt_entry_t *PADDR1 = 0, *PADDR2; 266181641Skmacy#ifdef SMP 267181641Skmacystatic int PMAP1cpu; 268181641Skmacystatic int PMAP1changedcpu; 269181641SkmacySYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 270181641Skmacy &PMAP1changedcpu, 0, 271181641Skmacy "Number of times pmap_pte_quick changed CPU with same PMAP1"); 272181641Skmacy#endif 273181641Skmacystatic int PMAP1changed; 274181641SkmacySYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 275181641Skmacy &PMAP1changed, 0, 276181641Skmacy "Number of times pmap_pte_quick changed PMAP1"); 277181641Skmacystatic int PMAP1unchanged; 278181641SkmacySYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 279181641Skmacy &PMAP1unchanged, 0, 280181641Skmacy "Number of times pmap_pte_quick didn't change PMAP1"); 281181641Skmacystatic struct mtx PMAP2mutex; 282181641Skmacy 283181747SkmacySYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 284181747Skmacystatic int pg_ps_enabled; 285199184SavgSYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN, &pg_ps_enabled, 0, 286181747Skmacy "Are large page mappings enabled?"); 287181747Skmacy 288181747SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_max, CTLFLAG_RD, &pv_entry_max, 0, 289181747Skmacy "Max number of PV entries"); 290181747SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, shpgperproc, CTLFLAG_RD, &shpgperproc, 0, 291181747Skmacy "Page share factor per proc"); 292207419SkmacySYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, 293207419Skmacy "2/4MB page mapping counters"); 294181747Skmacy 295207419Skmacystatic u_long pmap_pde_mappings; 296207419SkmacySYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, 297207419Skmacy &pmap_pde_mappings, 0, "2/4MB page mappings"); 298207419Skmacy 299181641Skmacystatic void free_pv_entry(pmap_t pmap, pv_entry_t pv); 300181641Skmacystatic pv_entry_t get_pv_entry(pmap_t locked_pmap, int try); 301181641Skmacy 302181641Skmacystatic vm_page_t pmap_enter_quick_locked(multicall_entry_t **mcl, int *count, pmap_t pmap, vm_offset_t va, 303181641Skmacy vm_page_t m, vm_prot_t prot, vm_page_t mpte); 304181641Skmacystatic int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, 305181641Skmacy vm_page_t *free); 306181641Skmacystatic void pmap_remove_page(struct pmap *pmap, vm_offset_t va, 307181641Skmacy vm_page_t *free); 308181641Skmacystatic void pmap_remove_entry(struct pmap *pmap, vm_page_t m, 309181641Skmacy vm_offset_t va); 310181641Skmacystatic void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); 311181641Skmacystatic boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 312181641Skmacy vm_page_t m); 313181641Skmacy 314181641Skmacystatic vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 315181641Skmacy 316181641Skmacystatic vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); 317181641Skmacystatic int _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free); 318181641Skmacystatic pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); 319181641Skmacystatic void pmap_pte_release(pt_entry_t *pte); 320181641Skmacystatic int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t *); 321181641Skmacystatic vm_offset_t pmap_kmem_choose(vm_offset_t addr); 322181641Skmacystatic boolean_t pmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr); 323181747Skmacystatic void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); 324181641Skmacy 325196725Sadrianstatic __inline void pagezero(void *page); 326181747Skmacy 327181641Skmacy#if defined(PAE) && !defined(XEN) 328181641Skmacystatic void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 329181641Skmacy#endif 330201804Sbz#ifndef XEN 331201751Salcstatic void pmap_set_pg(void); 332201804Sbz#endif 333181641Skmacy 334181641SkmacyCTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 335181641SkmacyCTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 336181641Skmacy 337181641Skmacy/* 338181641Skmacy * If you get an error here, then you set KVA_PAGES wrong! See the 339181641Skmacy * description of KVA_PAGES in sys/i386/include/pmap.h. It must be 340181641Skmacy * multiple of 4 for a normal kernel, or a multiple of 8 for a PAE. 341181641Skmacy */ 342181641SkmacyCTASSERT(KERNBASE % (1 << 24) == 0); 343181641Skmacy 344181641Skmacy 345181641Skmacy 346181641Skmacyvoid 347181641Skmacypd_set(struct pmap *pmap, int ptepindex, vm_paddr_t val, int type) 348181641Skmacy{ 349181641Skmacy vm_paddr_t pdir_ma = vtomach(&pmap->pm_pdir[ptepindex]); 350181641Skmacy 351181641Skmacy switch (type) { 352181641Skmacy case SH_PD_SET_VA: 353181641Skmacy#if 0 354181641Skmacy xen_queue_pt_update(shadow_pdir_ma, 355181641Skmacy xpmap_ptom(val & ~(PG_RW))); 356181641Skmacy#endif 357181641Skmacy xen_queue_pt_update(pdir_ma, 358181641Skmacy xpmap_ptom(val)); 359181641Skmacy break; 360181641Skmacy case SH_PD_SET_VA_MA: 361181641Skmacy#if 0 362181641Skmacy xen_queue_pt_update(shadow_pdir_ma, 363181641Skmacy val & ~(PG_RW)); 364181641Skmacy#endif 365181641Skmacy xen_queue_pt_update(pdir_ma, val); 366181641Skmacy break; 367181641Skmacy case SH_PD_SET_VA_CLEAR: 368181641Skmacy#if 0 369181641Skmacy xen_queue_pt_update(shadow_pdir_ma, 0); 370181641Skmacy#endif 371181641Skmacy xen_queue_pt_update(pdir_ma, 0); 372181641Skmacy break; 373181641Skmacy } 374181641Skmacy} 375181641Skmacy 376181641Skmacy/* 377181641Skmacy * Move the kernel virtual free pointer to the next 378181641Skmacy * 4MB. This is used to help improve performance 379181641Skmacy * by using a large (4MB) page for much of the kernel 380181641Skmacy * (.text, .data, .bss) 381181641Skmacy */ 382181641Skmacystatic vm_offset_t 383181641Skmacypmap_kmem_choose(vm_offset_t addr) 384181641Skmacy{ 385181641Skmacy vm_offset_t newaddr = addr; 386181641Skmacy 387181641Skmacy#ifndef DISABLE_PSE 388181641Skmacy if (cpu_feature & CPUID_PSE) 389181641Skmacy newaddr = (addr + PDRMASK) & ~PDRMASK; 390181641Skmacy#endif 391181641Skmacy return newaddr; 392181641Skmacy} 393181641Skmacy 394181641Skmacy/* 395181641Skmacy * Bootstrap the system enough to run with virtual memory. 396181641Skmacy * 397181641Skmacy * On the i386 this is called after mapping has already been enabled 398181641Skmacy * and just syncs the pmap module with what has already been done. 399181641Skmacy * [We can't call it easily with mapping off since the kernel is not 400181641Skmacy * mapped with PA == VA, hence we would have to relocate every address 401181641Skmacy * from the linked base (virtual) address "KERNBASE" to the actual 402181641Skmacy * (physical) address starting relative to 0] 403181641Skmacy */ 404181641Skmacyvoid 405181641Skmacypmap_bootstrap(vm_paddr_t firstaddr) 406181641Skmacy{ 407181641Skmacy vm_offset_t va; 408181641Skmacy pt_entry_t *pte, *unused; 409181641Skmacy struct sysmaps *sysmaps; 410181641Skmacy int i; 411181641Skmacy 412181641Skmacy /* 413181641Skmacy * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 414181641Skmacy * large. It should instead be correctly calculated in locore.s and 415181641Skmacy * not based on 'first' (which is a physical address, not a virtual 416181641Skmacy * address, for the start of unused physical memory). The kernel 417181641Skmacy * page tables are NOT double mapped and thus should not be included 418181641Skmacy * in this calculation. 419181641Skmacy */ 420181641Skmacy virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 421181641Skmacy virtual_avail = pmap_kmem_choose(virtual_avail); 422181641Skmacy 423181641Skmacy virtual_end = VM_MAX_KERNEL_ADDRESS; 424181641Skmacy 425181641Skmacy /* 426181641Skmacy * Initialize the kernel pmap (which is statically allocated). 427181641Skmacy */ 428181641Skmacy PMAP_LOCK_INIT(kernel_pmap); 429181641Skmacy kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 430181641Skmacy#ifdef PAE 431181641Skmacy kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); 432181641Skmacy#endif 433181641Skmacy kernel_pmap->pm_active = -1; /* don't allow deactivation */ 434181641Skmacy TAILQ_INIT(&kernel_pmap->pm_pvchunk); 435181641Skmacy LIST_INIT(&allpmaps); 436181641Skmacy mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 437181641Skmacy mtx_lock_spin(&allpmaps_lock); 438181641Skmacy LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 439181641Skmacy mtx_unlock_spin(&allpmaps_lock); 440183342Skmacy if (nkpt == 0) 441183342Skmacy nkpt = NKPT; 442181641Skmacy 443181641Skmacy /* 444181641Skmacy * Reserve some special page table entries/VA space for temporary 445181641Skmacy * mapping of pages. 446181641Skmacy */ 447181641Skmacy#define SYSMAP(c, p, v, n) \ 448181641Skmacy v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 449181641Skmacy 450181641Skmacy va = virtual_avail; 451181641Skmacy pte = vtopte(va); 452181641Skmacy 453181641Skmacy /* 454181641Skmacy * CMAP1/CMAP2 are used for zeroing and copying pages. 455181641Skmacy * CMAP3 is used for the idle process page zeroing. 456181641Skmacy */ 457181641Skmacy for (i = 0; i < MAXCPU; i++) { 458181641Skmacy sysmaps = &sysmaps_pcpu[i]; 459181641Skmacy mtx_init(&sysmaps->lock, "SYSMAPS", NULL, MTX_DEF); 460181641Skmacy SYSMAP(caddr_t, sysmaps->CMAP1, sysmaps->CADDR1, 1) 461181641Skmacy SYSMAP(caddr_t, sysmaps->CMAP2, sysmaps->CADDR2, 1) 462204160Skmacy PT_SET_MA(sysmaps->CADDR1, 0); 463204160Skmacy PT_SET_MA(sysmaps->CADDR2, 0); 464181641Skmacy } 465181641Skmacy SYSMAP(caddr_t, CMAP3, CADDR3, 1) 466181641Skmacy PT_SET_MA(CADDR3, 0); 467181641Skmacy 468181641Skmacy /* 469181641Skmacy * Crashdump maps. 470181641Skmacy */ 471181641Skmacy SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) 472181641Skmacy 473181641Skmacy /* 474181641Skmacy * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 475181641Skmacy */ 476181641Skmacy SYSMAP(caddr_t, unused, ptvmmap, 1) 477181641Skmacy 478181641Skmacy /* 479181641Skmacy * msgbufp is used to map the system message buffer. 480181641Skmacy */ 481181641Skmacy SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) 482181641Skmacy 483181641Skmacy /* 484181641Skmacy * ptemap is used for pmap_pte_quick 485181641Skmacy */ 486181641Skmacy SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); 487181641Skmacy SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1); 488181641Skmacy 489181641Skmacy mtx_init(&PMAP2mutex, "PMAP2", NULL, MTX_DEF); 490181641Skmacy 491181641Skmacy virtual_avail = va; 492181641Skmacy 493181641Skmacy /* 494181641Skmacy * Leave in place an identity mapping (virt == phys) for the low 1 MB 495181641Skmacy * physical memory region that is used by the ACPI wakeup code. This 496181641Skmacy * mapping must not have PG_G set. 497181641Skmacy */ 498181641Skmacy#ifndef XEN 499181641Skmacy /* 500181641Skmacy * leave here deliberately to show that this is not supported 501181641Skmacy */ 502181641Skmacy#ifdef XBOX 503181641Skmacy /* FIXME: This is gross, but needed for the XBOX. Since we are in such 504181641Skmacy * an early stadium, we cannot yet neatly map video memory ... :-( 505181641Skmacy * Better fixes are very welcome! */ 506181641Skmacy if (!arch_i386_is_xbox) 507181641Skmacy#endif 508181641Skmacy for (i = 1; i < NKPT; i++) 509181641Skmacy PTD[i] = 0; 510181641Skmacy 511181641Skmacy /* Initialize the PAT MSR if present. */ 512181641Skmacy pmap_init_pat(); 513181641Skmacy 514181641Skmacy /* Turn on PG_G on kernel page(s) */ 515181641Skmacy pmap_set_pg(); 516181641Skmacy#endif 517181641Skmacy} 518181641Skmacy 519181641Skmacy/* 520181641Skmacy * Setup the PAT MSR. 521181641Skmacy */ 522181641Skmacyvoid 523181641Skmacypmap_init_pat(void) 524181641Skmacy{ 525181641Skmacy uint64_t pat_msr; 526181641Skmacy 527181641Skmacy /* Bail if this CPU doesn't implement PAT. */ 528181641Skmacy if (!(cpu_feature & CPUID_PAT)) 529181641Skmacy return; 530181641Skmacy 531196726Sadrian if (cpu_vendor_id != CPU_VENDOR_INTEL || 532197070Sjkim (CPUID_TO_FAMILY(cpu_id) == 6 && CPUID_TO_MODEL(cpu_id) >= 0xe)) { 533196726Sadrian /* 534196726Sadrian * Leave the indices 0-3 at the default of WB, WT, UC, and UC-. 535196726Sadrian * Program 4 and 5 as WP and WC. 536196726Sadrian * Leave 6 and 7 as UC and UC-. 537196726Sadrian */ 538196726Sadrian pat_msr = rdmsr(MSR_PAT); 539196726Sadrian pat_msr &= ~(PAT_MASK(4) | PAT_MASK(5)); 540196726Sadrian pat_msr |= PAT_VALUE(4, PAT_WRITE_PROTECTED) | 541196726Sadrian PAT_VALUE(5, PAT_WRITE_COMBINING); 542196726Sadrian pat_works = 1; 543196726Sadrian } else { 544196726Sadrian /* 545196726Sadrian * Due to some Intel errata, we can only safely use the lower 4 546196726Sadrian * PAT entries. Thus, just replace PAT Index 2 with WC instead 547196726Sadrian * of UC-. 548196726Sadrian * 549196726Sadrian * Intel Pentium III Processor Specification Update 550196726Sadrian * Errata E.27 (Upper Four PAT Entries Not Usable With Mode B 551196726Sadrian * or Mode C Paging) 552196726Sadrian * 553196726Sadrian * Intel Pentium IV Processor Specification Update 554196726Sadrian * Errata N46 (PAT Index MSB May Be Calculated Incorrectly) 555196726Sadrian */ 556196726Sadrian pat_msr = rdmsr(MSR_PAT); 557196726Sadrian pat_msr &= ~PAT_MASK(2); 558196726Sadrian pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); 559196726Sadrian pat_works = 0; 560196726Sadrian } 561181641Skmacy wrmsr(MSR_PAT, pat_msr); 562181641Skmacy} 563181641Skmacy 564201804Sbz#ifndef XEN 565181641Skmacy/* 566181641Skmacy * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. 567181641Skmacy */ 568201751Salcstatic void 569181641Skmacypmap_set_pg(void) 570181641Skmacy{ 571181641Skmacy pd_entry_t pdir; 572181641Skmacy pt_entry_t *pte; 573181641Skmacy vm_offset_t va, endva; 574181641Skmacy int i; 575181641Skmacy 576181641Skmacy if (pgeflag == 0) 577181641Skmacy return; 578181641Skmacy 579181641Skmacy i = KERNLOAD/NBPDR; 580181641Skmacy endva = KERNBASE + KERNend; 581181641Skmacy 582181641Skmacy if (pseflag) { 583181641Skmacy va = KERNBASE + KERNLOAD; 584181641Skmacy while (va < endva) { 585181641Skmacy pdir = kernel_pmap->pm_pdir[KPTDI+i]; 586181641Skmacy pdir |= pgeflag; 587181641Skmacy kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir; 588181641Skmacy invltlb(); /* Play it safe, invltlb() every time */ 589181641Skmacy i++; 590181641Skmacy va += NBPDR; 591181641Skmacy } 592181641Skmacy } else { 593181641Skmacy va = (vm_offset_t)btext; 594181641Skmacy while (va < endva) { 595181641Skmacy pte = vtopte(va); 596181641Skmacy if (*pte & PG_V) 597181641Skmacy *pte |= pgeflag; 598181641Skmacy invltlb(); /* Play it safe, invltlb() every time */ 599181641Skmacy va += PAGE_SIZE; 600181641Skmacy } 601181641Skmacy } 602181641Skmacy} 603201804Sbz#endif 604181641Skmacy 605181641Skmacy/* 606181641Skmacy * Initialize a vm_page's machine-dependent fields. 607181641Skmacy */ 608181641Skmacyvoid 609181641Skmacypmap_page_init(vm_page_t m) 610181641Skmacy{ 611181641Skmacy 612181641Skmacy TAILQ_INIT(&m->md.pv_list); 613195649Salc m->md.pat_mode = PAT_WRITE_BACK; 614181641Skmacy} 615181641Skmacy 616181641Skmacy#if defined(PAE) && !defined(XEN) 617181641Skmacystatic void * 618181641Skmacypmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 619181641Skmacy{ 620195385Salc 621195385Salc /* Inform UMA that this allocator uses kernel_map/object. */ 622195385Salc *flags = UMA_SLAB_KERNEL; 623195385Salc return ((void *)kmem_alloc_contig(kernel_map, bytes, wait, 0x0ULL, 624195649Salc 0xffffffffULL, 1, 0, VM_MEMATTR_DEFAULT)); 625181641Skmacy} 626181641Skmacy#endif 627181641Skmacy 628181641Skmacy/* 629181641Skmacy * ABuse the pte nodes for unmapped kva to thread a kva freelist through. 630181641Skmacy * Requirements: 631181641Skmacy * - Must deal with pages in order to ensure that none of the PG_* bits 632181641Skmacy * are ever set, PG_V in particular. 633181641Skmacy * - Assumes we can write to ptes without pte_store() atomic ops, even 634181641Skmacy * on PAE systems. This should be ok. 635181641Skmacy * - Assumes nothing will ever test these addresses for 0 to indicate 636181641Skmacy * no mapping instead of correctly checking PG_V. 637181641Skmacy * - Assumes a vm_offset_t will fit in a pte (true for i386). 638181641Skmacy * Because PG_V is never set, there can be no mappings to invalidate. 639181641Skmacy */ 640181641Skmacystatic int ptelist_count = 0; 641181641Skmacystatic vm_offset_t 642181641Skmacypmap_ptelist_alloc(vm_offset_t *head) 643181641Skmacy{ 644181641Skmacy vm_offset_t va; 645181641Skmacy vm_offset_t *phead = (vm_offset_t *)*head; 646181641Skmacy 647181641Skmacy if (ptelist_count == 0) { 648181641Skmacy printf("out of memory!!!!!!\n"); 649181641Skmacy return (0); /* Out of memory */ 650181641Skmacy } 651181641Skmacy ptelist_count--; 652181641Skmacy va = phead[ptelist_count]; 653181641Skmacy return (va); 654181641Skmacy} 655181641Skmacy 656181641Skmacystatic void 657181641Skmacypmap_ptelist_free(vm_offset_t *head, vm_offset_t va) 658181641Skmacy{ 659181641Skmacy vm_offset_t *phead = (vm_offset_t *)*head; 660181641Skmacy 661181641Skmacy phead[ptelist_count++] = va; 662181641Skmacy} 663181641Skmacy 664181641Skmacystatic void 665181641Skmacypmap_ptelist_init(vm_offset_t *head, void *base, int npages) 666181641Skmacy{ 667181641Skmacy int i, nstackpages; 668181641Skmacy vm_offset_t va; 669181641Skmacy vm_page_t m; 670181641Skmacy 671181641Skmacy nstackpages = (npages + PAGE_SIZE/sizeof(vm_offset_t) - 1)/ (PAGE_SIZE/sizeof(vm_offset_t)); 672181641Skmacy for (i = 0; i < nstackpages; i++) { 673181641Skmacy va = (vm_offset_t)base + i * PAGE_SIZE; 674181641Skmacy m = vm_page_alloc(NULL, i, 675181641Skmacy VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 676181641Skmacy VM_ALLOC_ZERO); 677181641Skmacy pmap_qenter(va, &m, 1); 678181641Skmacy } 679181641Skmacy 680181641Skmacy *head = (vm_offset_t)base; 681181641Skmacy for (i = npages - 1; i >= nstackpages; i--) { 682181641Skmacy va = (vm_offset_t)base + i * PAGE_SIZE; 683181641Skmacy pmap_ptelist_free(head, va); 684181641Skmacy } 685181641Skmacy} 686181641Skmacy 687181641Skmacy 688181641Skmacy/* 689181641Skmacy * Initialize the pmap module. 690181641Skmacy * Called by vm_init, to initialize any structures that the pmap 691181641Skmacy * system needs to map virtual memory. 692181641Skmacy */ 693181641Skmacyvoid 694181641Skmacypmap_init(void) 695181641Skmacy{ 696181747Skmacy vm_page_t mpte; 697181747Skmacy vm_size_t s; 698181747Skmacy int i, pv_npg; 699181641Skmacy 700181641Skmacy /* 701181747Skmacy * Initialize the vm page array entries for the kernel pmap's 702181747Skmacy * page table pages. 703181747Skmacy */ 704181747Skmacy for (i = 0; i < nkpt; i++) { 705181808Skmacy mpte = PHYS_TO_VM_PAGE(xpmap_mtop(PTD[i + KPTDI] & PG_FRAME)); 706181747Skmacy KASSERT(mpte >= vm_page_array && 707181747Skmacy mpte < &vm_page_array[vm_page_array_size], 708181747Skmacy ("pmap_init: page table page is out of range")); 709181747Skmacy mpte->pindex = i + KPTDI; 710181808Skmacy mpte->phys_addr = xpmap_mtop(PTD[i + KPTDI] & PG_FRAME); 711181747Skmacy } 712181747Skmacy 713181747Skmacy /* 714181641Skmacy * Initialize the address space (zone) for the pv entries. Set a 715181641Skmacy * high water mark so that the system can recover from excessive 716181641Skmacy * numbers of pv entries. 717181641Skmacy */ 718181641Skmacy TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 719181641Skmacy pv_entry_max = shpgperproc * maxproc + cnt.v_page_count; 720181641Skmacy TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 721181641Skmacy pv_entry_max = roundup(pv_entry_max, _NPCPV); 722181641Skmacy pv_entry_high_water = 9 * (pv_entry_max / 10); 723181641Skmacy 724181747Skmacy /* 725181747Skmacy * Are large page mappings enabled? 726181747Skmacy */ 727181747Skmacy TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); 728181747Skmacy 729181747Skmacy /* 730181747Skmacy * Calculate the size of the pv head table for superpages. 731181747Skmacy */ 732181747Skmacy for (i = 0; phys_avail[i + 1]; i += 2); 733181747Skmacy pv_npg = round_4mpage(phys_avail[(i - 2) + 1]) / NBPDR; 734181747Skmacy 735181747Skmacy /* 736181747Skmacy * Allocate memory for the pv head table for superpages. 737181747Skmacy */ 738181747Skmacy s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 739181747Skmacy s = round_page(s); 740181747Skmacy pv_table = (struct md_page *)kmem_alloc(kernel_map, s); 741181747Skmacy for (i = 0; i < pv_npg; i++) 742181747Skmacy TAILQ_INIT(&pv_table[i].pv_list); 743181747Skmacy 744181641Skmacy pv_maxchunks = MAX(pv_entry_max / _NPCPV, maxproc); 745181641Skmacy pv_chunkbase = (struct pv_chunk *)kmem_alloc_nofault(kernel_map, 746181641Skmacy PAGE_SIZE * pv_maxchunks); 747181641Skmacy if (pv_chunkbase == NULL) 748181641Skmacy panic("pmap_init: not enough kvm for pv chunks"); 749181641Skmacy pmap_ptelist_init(&pv_vafree, pv_chunkbase, pv_maxchunks); 750181641Skmacy#if defined(PAE) && !defined(XEN) 751181641Skmacy pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, 752181641Skmacy NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 753181641Skmacy UMA_ZONE_VM | UMA_ZONE_NOFREE); 754181641Skmacy uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); 755181641Skmacy#endif 756181641Skmacy} 757181641Skmacy 758181641Skmacy 759181641Skmacy/*************************************************** 760181641Skmacy * Low level helper routines..... 761181641Skmacy ***************************************************/ 762181641Skmacy 763181641Skmacy/* 764181641Skmacy * Determine the appropriate bits to set in a PTE or PDE for a specified 765181641Skmacy * caching mode. 766181641Skmacy */ 767195949Skibint 768181641Skmacypmap_cache_bits(int mode, boolean_t is_pde) 769181641Skmacy{ 770181641Skmacy int pat_flag, pat_index, cache_bits; 771181641Skmacy 772181641Skmacy /* The PAT bit is different for PTE's and PDE's. */ 773181641Skmacy pat_flag = is_pde ? PG_PDE_PAT : PG_PTE_PAT; 774181641Skmacy 775181641Skmacy /* If we don't support PAT, map extended modes to older ones. */ 776181641Skmacy if (!(cpu_feature & CPUID_PAT)) { 777181641Skmacy switch (mode) { 778181641Skmacy case PAT_UNCACHEABLE: 779181641Skmacy case PAT_WRITE_THROUGH: 780181641Skmacy case PAT_WRITE_BACK: 781181641Skmacy break; 782181641Skmacy case PAT_UNCACHED: 783181641Skmacy case PAT_WRITE_COMBINING: 784181641Skmacy case PAT_WRITE_PROTECTED: 785181641Skmacy mode = PAT_UNCACHEABLE; 786181641Skmacy break; 787181641Skmacy } 788181641Skmacy } 789181641Skmacy 790181641Skmacy /* Map the caching mode to a PAT index. */ 791196726Sadrian if (pat_works) { 792196726Sadrian switch (mode) { 793196726Sadrian case PAT_UNCACHEABLE: 794196726Sadrian pat_index = 3; 795196726Sadrian break; 796196726Sadrian case PAT_WRITE_THROUGH: 797196726Sadrian pat_index = 1; 798196726Sadrian break; 799196726Sadrian case PAT_WRITE_BACK: 800196726Sadrian pat_index = 0; 801196726Sadrian break; 802196726Sadrian case PAT_UNCACHED: 803196726Sadrian pat_index = 2; 804196726Sadrian break; 805196726Sadrian case PAT_WRITE_COMBINING: 806196726Sadrian pat_index = 5; 807196726Sadrian break; 808196726Sadrian case PAT_WRITE_PROTECTED: 809196726Sadrian pat_index = 4; 810196726Sadrian break; 811196726Sadrian default: 812196726Sadrian panic("Unknown caching mode %d\n", mode); 813196726Sadrian } 814196726Sadrian } else { 815196726Sadrian switch (mode) { 816196726Sadrian case PAT_UNCACHED: 817196726Sadrian case PAT_UNCACHEABLE: 818196726Sadrian case PAT_WRITE_PROTECTED: 819196726Sadrian pat_index = 3; 820196726Sadrian break; 821196726Sadrian case PAT_WRITE_THROUGH: 822196726Sadrian pat_index = 1; 823196726Sadrian break; 824196726Sadrian case PAT_WRITE_BACK: 825196726Sadrian pat_index = 0; 826196726Sadrian break; 827196726Sadrian case PAT_WRITE_COMBINING: 828196726Sadrian pat_index = 2; 829196726Sadrian break; 830196726Sadrian default: 831196726Sadrian panic("Unknown caching mode %d\n", mode); 832196726Sadrian } 833181641Skmacy } 834181641Skmacy 835181641Skmacy /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ 836181641Skmacy cache_bits = 0; 837181641Skmacy if (pat_index & 0x4) 838181641Skmacy cache_bits |= pat_flag; 839181641Skmacy if (pat_index & 0x2) 840181641Skmacy cache_bits |= PG_NC_PCD; 841181641Skmacy if (pat_index & 0x1) 842181641Skmacy cache_bits |= PG_NC_PWT; 843181641Skmacy return (cache_bits); 844181641Skmacy} 845181641Skmacy#ifdef SMP 846181641Skmacy/* 847181641Skmacy * For SMP, these functions have to use the IPI mechanism for coherence. 848181641Skmacy * 849181641Skmacy * N.B.: Before calling any of the following TLB invalidation functions, 850181641Skmacy * the calling processor must ensure that all stores updating a non- 851181641Skmacy * kernel page table are globally performed. Otherwise, another 852181641Skmacy * processor could cache an old, pre-update entry without being 853181641Skmacy * invalidated. This can happen one of two ways: (1) The pmap becomes 854181641Skmacy * active on another processor after its pm_active field is checked by 855181641Skmacy * one of the following functions but before a store updating the page 856181641Skmacy * table is globally performed. (2) The pmap becomes active on another 857181641Skmacy * processor before its pm_active field is checked but due to 858181641Skmacy * speculative loads one of the following functions stills reads the 859181641Skmacy * pmap as inactive on the other processor. 860181641Skmacy * 861181641Skmacy * The kernel page table is exempt because its pm_active field is 862181641Skmacy * immutable. The kernel page table is always active on every 863181641Skmacy * processor. 864181641Skmacy */ 865181641Skmacyvoid 866181641Skmacypmap_invalidate_page(pmap_t pmap, vm_offset_t va) 867181641Skmacy{ 868181641Skmacy u_int cpumask; 869181641Skmacy u_int other_cpus; 870181641Skmacy 871181641Skmacy CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", 872181641Skmacy pmap, va); 873181641Skmacy 874181641Skmacy sched_pin(); 875181641Skmacy if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 876181641Skmacy invlpg(va); 877181641Skmacy smp_invlpg(va); 878181641Skmacy } else { 879181641Skmacy cpumask = PCPU_GET(cpumask); 880181641Skmacy other_cpus = PCPU_GET(other_cpus); 881181641Skmacy if (pmap->pm_active & cpumask) 882181641Skmacy invlpg(va); 883181641Skmacy if (pmap->pm_active & other_cpus) 884181641Skmacy smp_masked_invlpg(pmap->pm_active & other_cpus, va); 885181641Skmacy } 886181641Skmacy sched_unpin(); 887181641Skmacy PT_UPDATES_FLUSH(); 888181641Skmacy} 889181641Skmacy 890181641Skmacyvoid 891181641Skmacypmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 892181641Skmacy{ 893181641Skmacy u_int cpumask; 894181641Skmacy u_int other_cpus; 895181641Skmacy vm_offset_t addr; 896181641Skmacy 897181641Skmacy CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x", 898181641Skmacy pmap, sva, eva); 899181641Skmacy 900181641Skmacy sched_pin(); 901181641Skmacy if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 902181641Skmacy for (addr = sva; addr < eva; addr += PAGE_SIZE) 903181641Skmacy invlpg(addr); 904181641Skmacy smp_invlpg_range(sva, eva); 905181641Skmacy } else { 906181641Skmacy cpumask = PCPU_GET(cpumask); 907181641Skmacy other_cpus = PCPU_GET(other_cpus); 908181641Skmacy if (pmap->pm_active & cpumask) 909181641Skmacy for (addr = sva; addr < eva; addr += PAGE_SIZE) 910181641Skmacy invlpg(addr); 911181641Skmacy if (pmap->pm_active & other_cpus) 912181641Skmacy smp_masked_invlpg_range(pmap->pm_active & other_cpus, 913181641Skmacy sva, eva); 914181641Skmacy } 915181641Skmacy sched_unpin(); 916181641Skmacy PT_UPDATES_FLUSH(); 917181641Skmacy} 918181641Skmacy 919181641Skmacyvoid 920181641Skmacypmap_invalidate_all(pmap_t pmap) 921181641Skmacy{ 922181641Skmacy u_int cpumask; 923181641Skmacy u_int other_cpus; 924181641Skmacy 925181641Skmacy CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap); 926181641Skmacy 927181641Skmacy sched_pin(); 928181641Skmacy if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 929181641Skmacy invltlb(); 930181641Skmacy smp_invltlb(); 931181641Skmacy } else { 932181641Skmacy cpumask = PCPU_GET(cpumask); 933181641Skmacy other_cpus = PCPU_GET(other_cpus); 934181641Skmacy if (pmap->pm_active & cpumask) 935181641Skmacy invltlb(); 936181641Skmacy if (pmap->pm_active & other_cpus) 937181641Skmacy smp_masked_invltlb(pmap->pm_active & other_cpus); 938181641Skmacy } 939181641Skmacy sched_unpin(); 940181641Skmacy} 941181641Skmacy 942181641Skmacyvoid 943181641Skmacypmap_invalidate_cache(void) 944181641Skmacy{ 945181641Skmacy 946181641Skmacy sched_pin(); 947181641Skmacy wbinvd(); 948181641Skmacy smp_cache_flush(); 949181641Skmacy sched_unpin(); 950181641Skmacy} 951181641Skmacy#else /* !SMP */ 952181641Skmacy/* 953181641Skmacy * Normal, non-SMP, 486+ invalidation functions. 954181641Skmacy * We inline these within pmap.c for speed. 955181641Skmacy */ 956181641SkmacyPMAP_INLINE void 957181641Skmacypmap_invalidate_page(pmap_t pmap, vm_offset_t va) 958181641Skmacy{ 959181641Skmacy CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", 960181641Skmacy pmap, va); 961181641Skmacy 962181641Skmacy if (pmap == kernel_pmap || pmap->pm_active) 963181641Skmacy invlpg(va); 964181641Skmacy PT_UPDATES_FLUSH(); 965181641Skmacy} 966181641Skmacy 967181641SkmacyPMAP_INLINE void 968181641Skmacypmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 969181641Skmacy{ 970181641Skmacy vm_offset_t addr; 971181641Skmacy 972181641Skmacy if (eva - sva > PAGE_SIZE) 973181641Skmacy CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x", 974181641Skmacy pmap, sva, eva); 975181641Skmacy 976181641Skmacy if (pmap == kernel_pmap || pmap->pm_active) 977181641Skmacy for (addr = sva; addr < eva; addr += PAGE_SIZE) 978181641Skmacy invlpg(addr); 979181641Skmacy PT_UPDATES_FLUSH(); 980181641Skmacy} 981181641Skmacy 982181641SkmacyPMAP_INLINE void 983181641Skmacypmap_invalidate_all(pmap_t pmap) 984181641Skmacy{ 985181641Skmacy 986181641Skmacy CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap); 987181641Skmacy 988181641Skmacy if (pmap == kernel_pmap || pmap->pm_active) 989181641Skmacy invltlb(); 990181641Skmacy} 991181641Skmacy 992181641SkmacyPMAP_INLINE void 993181641Skmacypmap_invalidate_cache(void) 994181641Skmacy{ 995181641Skmacy 996181641Skmacy wbinvd(); 997181641Skmacy} 998181641Skmacy#endif /* !SMP */ 999181641Skmacy 1000195949Skibvoid 1001195949Skibpmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) 1002195949Skib{ 1003195949Skib 1004195949Skib KASSERT((sva & PAGE_MASK) == 0, 1005195949Skib ("pmap_invalidate_cache_range: sva not page-aligned")); 1006195949Skib KASSERT((eva & PAGE_MASK) == 0, 1007195949Skib ("pmap_invalidate_cache_range: eva not page-aligned")); 1008195949Skib 1009195949Skib if (cpu_feature & CPUID_SS) 1010195949Skib ; /* If "Self Snoop" is supported, do nothing. */ 1011195949Skib else if (cpu_feature & CPUID_CLFSH) { 1012195949Skib 1013195949Skib /* 1014195949Skib * Otherwise, do per-cache line flush. Use the mfence 1015195949Skib * instruction to insure that previous stores are 1016195949Skib * included in the write-back. The processor 1017195949Skib * propagates flush to other processors in the cache 1018195949Skib * coherence domain. 1019195949Skib */ 1020195949Skib mfence(); 1021197046Skib for (; sva < eva; sva += cpu_clflush_line_size) 1022197046Skib clflush(sva); 1023195949Skib mfence(); 1024195949Skib } else { 1025195949Skib 1026195949Skib /* 1027195949Skib * No targeted cache flush methods are supported by CPU, 1028195949Skib * globally invalidate cache as a last resort. 1029195949Skib */ 1030195949Skib pmap_invalidate_cache(); 1031195949Skib } 1032195949Skib} 1033195949Skib 1034181641Skmacy/* 1035181641Skmacy * Are we current address space or kernel? N.B. We return FALSE when 1036181641Skmacy * a pmap's page table is in use because a kernel thread is borrowing 1037181641Skmacy * it. The borrowed page table can change spontaneously, making any 1038181641Skmacy * dependence on its continued use subject to a race condition. 1039181641Skmacy */ 1040181641Skmacystatic __inline int 1041181641Skmacypmap_is_current(pmap_t pmap) 1042181641Skmacy{ 1043181641Skmacy 1044181641Skmacy return (pmap == kernel_pmap || 1045181641Skmacy (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && 1046181641Skmacy (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); 1047181641Skmacy} 1048181641Skmacy 1049181641Skmacy/* 1050181641Skmacy * If the given pmap is not the current or kernel pmap, the returned pte must 1051181641Skmacy * be released by passing it to pmap_pte_release(). 1052181641Skmacy */ 1053181641Skmacypt_entry_t * 1054181641Skmacypmap_pte(pmap_t pmap, vm_offset_t va) 1055181641Skmacy{ 1056181641Skmacy pd_entry_t newpf; 1057181641Skmacy pd_entry_t *pde; 1058181641Skmacy 1059181641Skmacy pde = pmap_pde(pmap, va); 1060181641Skmacy if (*pde & PG_PS) 1061181641Skmacy return (pde); 1062181641Skmacy if (*pde != 0) { 1063181641Skmacy /* are we current address space or kernel? */ 1064181641Skmacy if (pmap_is_current(pmap)) 1065181641Skmacy return (vtopte(va)); 1066181641Skmacy mtx_lock(&PMAP2mutex); 1067181641Skmacy newpf = *pde & PG_FRAME; 1068181641Skmacy if ((*PMAP2 & PG_FRAME) != newpf) { 1069204160Skmacy vm_page_lock_queues(); 1070181641Skmacy PT_SET_MA(PADDR2, newpf | PG_V | PG_A | PG_M); 1071204160Skmacy vm_page_unlock_queues(); 1072181641Skmacy CTR3(KTR_PMAP, "pmap_pte: pmap=%p va=0x%x newpte=0x%08x", 1073181641Skmacy pmap, va, (*PMAP2 & 0xffffffff)); 1074181641Skmacy } 1075181641Skmacy 1076181641Skmacy return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); 1077181641Skmacy } 1078181641Skmacy return (0); 1079181641Skmacy} 1080181641Skmacy 1081181641Skmacy/* 1082181641Skmacy * Releases a pte that was obtained from pmap_pte(). Be prepared for the pte 1083181641Skmacy * being NULL. 1084181641Skmacy */ 1085181641Skmacystatic __inline void 1086181641Skmacypmap_pte_release(pt_entry_t *pte) 1087181641Skmacy{ 1088181641Skmacy 1089181641Skmacy if ((pt_entry_t *)((vm_offset_t)pte & ~PAGE_MASK) == PADDR2) { 1090181641Skmacy CTR1(KTR_PMAP, "pmap_pte_release: pte=0x%jx", 1091181641Skmacy *PMAP2); 1092181641Skmacy PT_SET_VA(PMAP2, 0, TRUE); 1093181641Skmacy mtx_unlock(&PMAP2mutex); 1094181641Skmacy } 1095181641Skmacy} 1096181641Skmacy 1097181641Skmacystatic __inline void 1098181641Skmacyinvlcaddr(void *caddr) 1099181641Skmacy{ 1100181641Skmacy 1101181641Skmacy invlpg((u_int)caddr); 1102181641Skmacy PT_UPDATES_FLUSH(); 1103181641Skmacy} 1104181641Skmacy 1105181641Skmacy/* 1106181641Skmacy * Super fast pmap_pte routine best used when scanning 1107181641Skmacy * the pv lists. This eliminates many coarse-grained 1108181641Skmacy * invltlb calls. Note that many of the pv list 1109181641Skmacy * scans are across different pmaps. It is very wasteful 1110181641Skmacy * to do an entire invltlb for checking a single mapping. 1111181641Skmacy * 1112181641Skmacy * If the given pmap is not the current pmap, vm_page_queue_mtx 1113181641Skmacy * must be held and curthread pinned to a CPU. 1114181641Skmacy */ 1115181641Skmacystatic pt_entry_t * 1116181641Skmacypmap_pte_quick(pmap_t pmap, vm_offset_t va) 1117181641Skmacy{ 1118181641Skmacy pd_entry_t newpf; 1119181641Skmacy pd_entry_t *pde; 1120181641Skmacy 1121181641Skmacy pde = pmap_pde(pmap, va); 1122181641Skmacy if (*pde & PG_PS) 1123181641Skmacy return (pde); 1124181641Skmacy if (*pde != 0) { 1125181641Skmacy /* are we current address space or kernel? */ 1126181641Skmacy if (pmap_is_current(pmap)) 1127181641Skmacy return (vtopte(va)); 1128181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1129181641Skmacy KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 1130181641Skmacy newpf = *pde & PG_FRAME; 1131181641Skmacy if ((*PMAP1 & PG_FRAME) != newpf) { 1132181641Skmacy PT_SET_MA(PADDR1, newpf | PG_V | PG_A | PG_M); 1133181641Skmacy CTR3(KTR_PMAP, "pmap_pte_quick: pmap=%p va=0x%x newpte=0x%08x", 1134181641Skmacy pmap, va, (u_long)*PMAP1); 1135181641Skmacy 1136181641Skmacy#ifdef SMP 1137181641Skmacy PMAP1cpu = PCPU_GET(cpuid); 1138181641Skmacy#endif 1139181641Skmacy PMAP1changed++; 1140181641Skmacy } else 1141181641Skmacy#ifdef SMP 1142181641Skmacy if (PMAP1cpu != PCPU_GET(cpuid)) { 1143181641Skmacy PMAP1cpu = PCPU_GET(cpuid); 1144181641Skmacy invlcaddr(PADDR1); 1145181641Skmacy PMAP1changedcpu++; 1146181641Skmacy } else 1147181641Skmacy#endif 1148181641Skmacy PMAP1unchanged++; 1149181641Skmacy return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); 1150181641Skmacy } 1151181641Skmacy return (0); 1152181641Skmacy} 1153181641Skmacy 1154181641Skmacy/* 1155181641Skmacy * Routine: pmap_extract 1156181641Skmacy * Function: 1157181641Skmacy * Extract the physical page address associated 1158181641Skmacy * with the given map/virtual_address pair. 1159181641Skmacy */ 1160181641Skmacyvm_paddr_t 1161181641Skmacypmap_extract(pmap_t pmap, vm_offset_t va) 1162181641Skmacy{ 1163181641Skmacy vm_paddr_t rtval; 1164181641Skmacy pt_entry_t *pte; 1165181641Skmacy pd_entry_t pde; 1166181641Skmacy pt_entry_t pteval; 1167181641Skmacy 1168181641Skmacy rtval = 0; 1169181641Skmacy PMAP_LOCK(pmap); 1170181641Skmacy pde = pmap->pm_pdir[va >> PDRSHIFT]; 1171181641Skmacy if (pde != 0) { 1172181641Skmacy if ((pde & PG_PS) != 0) { 1173181641Skmacy rtval = xpmap_mtop(pde & PG_PS_FRAME) | (va & PDRMASK); 1174181641Skmacy PMAP_UNLOCK(pmap); 1175181641Skmacy return rtval; 1176181641Skmacy } 1177181641Skmacy pte = pmap_pte(pmap, va); 1178181641Skmacy pteval = *pte ? xpmap_mtop(*pte) : 0; 1179181641Skmacy rtval = (pteval & PG_FRAME) | (va & PAGE_MASK); 1180181641Skmacy pmap_pte_release(pte); 1181181641Skmacy } 1182181641Skmacy PMAP_UNLOCK(pmap); 1183181641Skmacy return (rtval); 1184181641Skmacy} 1185181641Skmacy 1186181641Skmacy/* 1187181641Skmacy * Routine: pmap_extract_ma 1188181641Skmacy * Function: 1189181641Skmacy * Like pmap_extract, but returns machine address 1190181641Skmacy */ 1191181641Skmacyvm_paddr_t 1192181641Skmacypmap_extract_ma(pmap_t pmap, vm_offset_t va) 1193181641Skmacy{ 1194181641Skmacy vm_paddr_t rtval; 1195181641Skmacy pt_entry_t *pte; 1196181641Skmacy pd_entry_t pde; 1197181641Skmacy 1198181641Skmacy rtval = 0; 1199181641Skmacy PMAP_LOCK(pmap); 1200181641Skmacy pde = pmap->pm_pdir[va >> PDRSHIFT]; 1201181641Skmacy if (pde != 0) { 1202181641Skmacy if ((pde & PG_PS) != 0) { 1203181641Skmacy rtval = (pde & ~PDRMASK) | (va & PDRMASK); 1204181641Skmacy PMAP_UNLOCK(pmap); 1205181641Skmacy return rtval; 1206181641Skmacy } 1207181641Skmacy pte = pmap_pte(pmap, va); 1208181641Skmacy rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); 1209181641Skmacy pmap_pte_release(pte); 1210181641Skmacy } 1211181641Skmacy PMAP_UNLOCK(pmap); 1212181641Skmacy return (rtval); 1213181641Skmacy} 1214181641Skmacy 1215181641Skmacy/* 1216181641Skmacy * Routine: pmap_extract_and_hold 1217181641Skmacy * Function: 1218181641Skmacy * Atomically extract and hold the physical page 1219181641Skmacy * with the given pmap and virtual address pair 1220181641Skmacy * if that mapping permits the given protection. 1221181641Skmacy */ 1222181641Skmacyvm_page_t 1223181641Skmacypmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1224181641Skmacy{ 1225181641Skmacy pd_entry_t pde; 1226181641Skmacy pt_entry_t pte; 1227181641Skmacy vm_page_t m; 1228207410Skmacy vm_paddr_t pa; 1229181641Skmacy 1230207410Skmacy pa = 0; 1231181641Skmacy m = NULL; 1232181641Skmacy PMAP_LOCK(pmap); 1233207410Skmacyretry: 1234181641Skmacy pde = PT_GET(pmap_pde(pmap, va)); 1235181641Skmacy if (pde != 0) { 1236181641Skmacy if (pde & PG_PS) { 1237181641Skmacy if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { 1238207410Skmacy if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | 1239207410Skmacy (va & PDRMASK), &pa)) 1240207410Skmacy goto retry; 1241181641Skmacy m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | 1242181641Skmacy (va & PDRMASK)); 1243181641Skmacy vm_page_hold(m); 1244181641Skmacy } 1245181641Skmacy } else { 1246181641Skmacy sched_pin(); 1247181641Skmacy pte = PT_GET(pmap_pte_quick(pmap, va)); 1248181641Skmacy if (*PMAP1) 1249181641Skmacy PT_SET_MA(PADDR1, 0); 1250181641Skmacy if ((pte & PG_V) && 1251181641Skmacy ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { 1252207410Skmacy if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) 1253207410Skmacy goto retry; 1254181641Skmacy m = PHYS_TO_VM_PAGE(pte & PG_FRAME); 1255181641Skmacy vm_page_hold(m); 1256181641Skmacy } 1257181641Skmacy sched_unpin(); 1258181641Skmacy } 1259181641Skmacy } 1260207410Skmacy PA_UNLOCK_COND(pa); 1261181641Skmacy PMAP_UNLOCK(pmap); 1262181641Skmacy return (m); 1263181641Skmacy} 1264181641Skmacy 1265181641Skmacy/*************************************************** 1266181641Skmacy * Low level mapping routines..... 1267181641Skmacy ***************************************************/ 1268181641Skmacy 1269181641Skmacy/* 1270181641Skmacy * Add a wired page to the kva. 1271181641Skmacy * Note: not SMP coherent. 1272181641Skmacy */ 1273181747Skmacyvoid 1274181641Skmacypmap_kenter(vm_offset_t va, vm_paddr_t pa) 1275181641Skmacy{ 1276181641Skmacy PT_SET_MA(va, xpmap_ptom(pa)| PG_RW | PG_V | pgeflag); 1277181641Skmacy} 1278181641Skmacy 1279181747Skmacyvoid 1280181641Skmacypmap_kenter_ma(vm_offset_t va, vm_paddr_t ma) 1281181641Skmacy{ 1282181641Skmacy pt_entry_t *pte; 1283181641Skmacy 1284181641Skmacy pte = vtopte(va); 1285181641Skmacy pte_store_ma(pte, ma | PG_RW | PG_V | pgeflag); 1286181641Skmacy} 1287181641Skmacy 1288181641Skmacy 1289181747Skmacystatic __inline void 1290181641Skmacypmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) 1291181641Skmacy{ 1292181641Skmacy PT_SET_MA(va, pa | PG_RW | PG_V | pgeflag | pmap_cache_bits(mode, 0)); 1293181641Skmacy} 1294181641Skmacy 1295181641Skmacy/* 1296181641Skmacy * Remove a page from the kernel pagetables. 1297181641Skmacy * Note: not SMP coherent. 1298181641Skmacy */ 1299181641SkmacyPMAP_INLINE void 1300181641Skmacypmap_kremove(vm_offset_t va) 1301181641Skmacy{ 1302181641Skmacy pt_entry_t *pte; 1303181641Skmacy 1304181641Skmacy pte = vtopte(va); 1305181641Skmacy PT_CLEAR_VA(pte, FALSE); 1306181641Skmacy} 1307181641Skmacy 1308181641Skmacy/* 1309181641Skmacy * Used to map a range of physical addresses into kernel 1310181641Skmacy * virtual address space. 1311181641Skmacy * 1312181641Skmacy * The value passed in '*virt' is a suggested virtual address for 1313181641Skmacy * the mapping. Architectures which can support a direct-mapped 1314181641Skmacy * physical to virtual region can return the appropriate address 1315181641Skmacy * within that region, leaving '*virt' unchanged. Other 1316181641Skmacy * architectures should map the pages starting at '*virt' and 1317181641Skmacy * update '*virt' with the first usable address after the mapped 1318181641Skmacy * region. 1319181641Skmacy */ 1320181641Skmacyvm_offset_t 1321181641Skmacypmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1322181641Skmacy{ 1323181641Skmacy vm_offset_t va, sva; 1324181641Skmacy 1325181641Skmacy va = sva = *virt; 1326181641Skmacy CTR4(KTR_PMAP, "pmap_map: va=0x%x start=0x%jx end=0x%jx prot=0x%x", 1327181641Skmacy va, start, end, prot); 1328181641Skmacy while (start < end) { 1329181641Skmacy pmap_kenter(va, start); 1330181641Skmacy va += PAGE_SIZE; 1331181641Skmacy start += PAGE_SIZE; 1332181641Skmacy } 1333181641Skmacy pmap_invalidate_range(kernel_pmap, sva, va); 1334181641Skmacy *virt = va; 1335181641Skmacy return (sva); 1336181641Skmacy} 1337181641Skmacy 1338181641Skmacy 1339181641Skmacy/* 1340181641Skmacy * Add a list of wired pages to the kva 1341181641Skmacy * this routine is only used for temporary 1342181641Skmacy * kernel mappings that do not need to have 1343181641Skmacy * page modification or references recorded. 1344181641Skmacy * Note that old mappings are simply written 1345181641Skmacy * over. The page *must* be wired. 1346181641Skmacy * Note: SMP coherent. Uses a ranged shootdown IPI. 1347181641Skmacy */ 1348181641Skmacyvoid 1349181641Skmacypmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1350181641Skmacy{ 1351181641Skmacy pt_entry_t *endpte, *pte; 1352181641Skmacy vm_paddr_t pa; 1353181641Skmacy vm_offset_t va = sva; 1354181641Skmacy int mclcount = 0; 1355181641Skmacy multicall_entry_t mcl[16]; 1356181641Skmacy multicall_entry_t *mclp = mcl; 1357181641Skmacy int error; 1358181641Skmacy 1359181641Skmacy CTR2(KTR_PMAP, "pmap_qenter:sva=0x%x count=%d", va, count); 1360181641Skmacy pte = vtopte(sva); 1361181641Skmacy endpte = pte + count; 1362181641Skmacy while (pte < endpte) { 1363181641Skmacy pa = xpmap_ptom(VM_PAGE_TO_PHYS(*ma)) | pgeflag | PG_RW | PG_V | PG_M | PG_A; 1364181641Skmacy 1365181641Skmacy mclp->op = __HYPERVISOR_update_va_mapping; 1366181641Skmacy mclp->args[0] = va; 1367181641Skmacy mclp->args[1] = (uint32_t)(pa & 0xffffffff); 1368181641Skmacy mclp->args[2] = (uint32_t)(pa >> 32); 1369181641Skmacy mclp->args[3] = (*pte & PG_V) ? UVMF_INVLPG|UVMF_ALL : 0; 1370181641Skmacy 1371181641Skmacy va += PAGE_SIZE; 1372181641Skmacy pte++; 1373181641Skmacy ma++; 1374181641Skmacy mclp++; 1375181641Skmacy mclcount++; 1376181641Skmacy if (mclcount == 16) { 1377181641Skmacy error = HYPERVISOR_multicall(mcl, mclcount); 1378181641Skmacy mclp = mcl; 1379181641Skmacy mclcount = 0; 1380181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 1381181641Skmacy } 1382181641Skmacy } 1383181641Skmacy if (mclcount) { 1384181641Skmacy error = HYPERVISOR_multicall(mcl, mclcount); 1385181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 1386181641Skmacy } 1387181641Skmacy 1388181641Skmacy#ifdef INVARIANTS 1389181641Skmacy for (pte = vtopte(sva), mclcount = 0; mclcount < count; mclcount++, pte++) 1390181641Skmacy KASSERT(*pte, ("pte not set for va=0x%x", sva + mclcount*PAGE_SIZE)); 1391181641Skmacy#endif 1392181641Skmacy} 1393181641Skmacy 1394181641Skmacy 1395181641Skmacy/* 1396181641Skmacy * This routine tears out page mappings from the 1397181641Skmacy * kernel -- it is meant only for temporary mappings. 1398181641Skmacy * Note: SMP coherent. Uses a ranged shootdown IPI. 1399181641Skmacy */ 1400181641Skmacyvoid 1401181641Skmacypmap_qremove(vm_offset_t sva, int count) 1402181641Skmacy{ 1403181641Skmacy vm_offset_t va; 1404181641Skmacy 1405181641Skmacy CTR2(KTR_PMAP, "pmap_qremove: sva=0x%x count=%d", sva, count); 1406181641Skmacy va = sva; 1407181641Skmacy vm_page_lock_queues(); 1408181641Skmacy critical_enter(); 1409181641Skmacy while (count-- > 0) { 1410181641Skmacy pmap_kremove(va); 1411181641Skmacy va += PAGE_SIZE; 1412181641Skmacy } 1413181641Skmacy pmap_invalidate_range(kernel_pmap, sva, va); 1414181641Skmacy critical_exit(); 1415181641Skmacy vm_page_unlock_queues(); 1416181641Skmacy} 1417181641Skmacy 1418181641Skmacy/*************************************************** 1419181641Skmacy * Page table page management routines..... 1420181641Skmacy ***************************************************/ 1421181641Skmacystatic __inline void 1422181641Skmacypmap_free_zero_pages(vm_page_t free) 1423181641Skmacy{ 1424181641Skmacy vm_page_t m; 1425181641Skmacy 1426181641Skmacy while (free != NULL) { 1427181641Skmacy m = free; 1428181641Skmacy free = m->right; 1429181641Skmacy vm_page_free_zero(m); 1430181641Skmacy } 1431181641Skmacy} 1432181641Skmacy 1433181641Skmacy/* 1434181641Skmacy * This routine unholds page table pages, and if the hold count 1435181641Skmacy * drops to zero, then it decrements the wire count. 1436181641Skmacy */ 1437181641Skmacystatic __inline int 1438181641Skmacypmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) 1439181641Skmacy{ 1440181641Skmacy 1441181641Skmacy --m->wire_count; 1442181641Skmacy if (m->wire_count == 0) 1443181641Skmacy return _pmap_unwire_pte_hold(pmap, m, free); 1444181641Skmacy else 1445181641Skmacy return 0; 1446181641Skmacy} 1447181641Skmacy 1448181641Skmacystatic int 1449181641Skmacy_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m, vm_page_t *free) 1450181641Skmacy{ 1451181641Skmacy vm_offset_t pteva; 1452181641Skmacy 1453181641Skmacy PT_UPDATES_FLUSH(); 1454181641Skmacy /* 1455181641Skmacy * unmap the page table page 1456181641Skmacy */ 1457181641Skmacy xen_pt_unpin(pmap->pm_pdir[m->pindex]); 1458181641Skmacy /* 1459181641Skmacy * page *might* contain residual mapping :-/ 1460181641Skmacy */ 1461181641Skmacy PD_CLEAR_VA(pmap, m->pindex, TRUE); 1462181641Skmacy pmap_zero_page(m); 1463181641Skmacy --pmap->pm_stats.resident_count; 1464181641Skmacy 1465181641Skmacy /* 1466181641Skmacy * This is a release store so that the ordinary store unmapping 1467181641Skmacy * the page table page is globally performed before TLB shoot- 1468181641Skmacy * down is begun. 1469181641Skmacy */ 1470181641Skmacy atomic_subtract_rel_int(&cnt.v_wire_count, 1); 1471181641Skmacy 1472181641Skmacy /* 1473181641Skmacy * Do an invltlb to make the invalidated mapping 1474181641Skmacy * take effect immediately. 1475181641Skmacy */ 1476181641Skmacy pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 1477181641Skmacy pmap_invalidate_page(pmap, pteva); 1478181641Skmacy 1479181641Skmacy /* 1480181641Skmacy * Put page on a list so that it is released after 1481181641Skmacy * *ALL* TLB shootdown is done 1482181641Skmacy */ 1483181641Skmacy m->right = *free; 1484181641Skmacy *free = m; 1485181641Skmacy 1486181641Skmacy return 1; 1487181641Skmacy} 1488181641Skmacy 1489181641Skmacy/* 1490181641Skmacy * After removing a page table entry, this routine is used to 1491181641Skmacy * conditionally free the page, and manage the hold/wire counts. 1492181641Skmacy */ 1493181641Skmacystatic int 1494181641Skmacypmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t *free) 1495181641Skmacy{ 1496181641Skmacy pd_entry_t ptepde; 1497181641Skmacy vm_page_t mpte; 1498181641Skmacy 1499181641Skmacy if (va >= VM_MAXUSER_ADDRESS) 1500181641Skmacy return 0; 1501181641Skmacy ptepde = PT_GET(pmap_pde(pmap, va)); 1502181641Skmacy mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); 1503181641Skmacy return pmap_unwire_pte_hold(pmap, mpte, free); 1504181641Skmacy} 1505181641Skmacy 1506181641Skmacyvoid 1507181641Skmacypmap_pinit0(pmap_t pmap) 1508181641Skmacy{ 1509181641Skmacy 1510181641Skmacy PMAP_LOCK_INIT(pmap); 1511181641Skmacy pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); 1512181641Skmacy#ifdef PAE 1513181641Skmacy pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); 1514181641Skmacy#endif 1515181641Skmacy pmap->pm_active = 0; 1516181641Skmacy PCPU_SET(curpmap, pmap); 1517181641Skmacy TAILQ_INIT(&pmap->pm_pvchunk); 1518181641Skmacy bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1519181641Skmacy mtx_lock_spin(&allpmaps_lock); 1520181641Skmacy LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1521181641Skmacy mtx_unlock_spin(&allpmaps_lock); 1522181641Skmacy} 1523181641Skmacy 1524181641Skmacy/* 1525181641Skmacy * Initialize a preallocated and zeroed pmap structure, 1526181641Skmacy * such as one in a vmspace structure. 1527181641Skmacy */ 1528181641Skmacyint 1529181641Skmacypmap_pinit(pmap_t pmap) 1530181641Skmacy{ 1531181641Skmacy vm_page_t m, ptdpg[NPGPTD + 1]; 1532181641Skmacy int npgptd = NPGPTD + 1; 1533181641Skmacy static int color; 1534181641Skmacy int i; 1535181641Skmacy 1536181641Skmacy PMAP_LOCK_INIT(pmap); 1537181641Skmacy 1538181641Skmacy /* 1539181641Skmacy * No need to allocate page table space yet but we do need a valid 1540181641Skmacy * page directory table. 1541181641Skmacy */ 1542181641Skmacy if (pmap->pm_pdir == NULL) { 1543181641Skmacy pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1544181641Skmacy NBPTD); 1545181641Skmacy if (pmap->pm_pdir == NULL) { 1546181641Skmacy PMAP_LOCK_DESTROY(pmap); 1547181641Skmacy return (0); 1548181641Skmacy } 1549181641Skmacy#if defined(XEN) && defined(PAE) 1550181641Skmacy pmap->pm_pdpt = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 1); 1551181641Skmacy#endif 1552181641Skmacy 1553181641Skmacy#if defined(PAE) && !defined(XEN) 1554181641Skmacy pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); 1555181641Skmacy KASSERT(((vm_offset_t)pmap->pm_pdpt & 1556181641Skmacy ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, 1557181641Skmacy ("pmap_pinit: pdpt misaligned")); 1558181641Skmacy KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), 1559181641Skmacy ("pmap_pinit: pdpt above 4g")); 1560181641Skmacy#endif 1561181641Skmacy } 1562181641Skmacy 1563181641Skmacy /* 1564181641Skmacy * allocate the page directory page(s) 1565181641Skmacy */ 1566181641Skmacy for (i = 0; i < npgptd;) { 1567181641Skmacy m = vm_page_alloc(NULL, color++, 1568181641Skmacy VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1569181641Skmacy VM_ALLOC_ZERO); 1570181641Skmacy if (m == NULL) 1571181641Skmacy VM_WAIT; 1572181641Skmacy else { 1573181641Skmacy ptdpg[i++] = m; 1574181641Skmacy } 1575181641Skmacy } 1576181641Skmacy pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); 1577181641Skmacy for (i = 0; i < NPGPTD; i++) { 1578181641Skmacy if ((ptdpg[i]->flags & PG_ZERO) == 0) 1579181641Skmacy pagezero(&pmap->pm_pdir[i*NPTEPG]); 1580181641Skmacy } 1581181641Skmacy 1582181641Skmacy mtx_lock_spin(&allpmaps_lock); 1583181641Skmacy LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1584181641Skmacy mtx_unlock_spin(&allpmaps_lock); 1585181641Skmacy /* Wire in kernel global address entries. */ 1586181641Skmacy 1587181641Skmacy bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); 1588181641Skmacy#ifdef PAE 1589181641Skmacy#ifdef XEN 1590181641Skmacy pmap_qenter((vm_offset_t)pmap->pm_pdpt, &ptdpg[NPGPTD], 1); 1591181641Skmacy if ((ptdpg[NPGPTD]->flags & PG_ZERO) == 0) 1592181641Skmacy bzero(pmap->pm_pdpt, PAGE_SIZE); 1593181641Skmacy#endif 1594181641Skmacy for (i = 0; i < NPGPTD; i++) { 1595181641Skmacy vm_paddr_t ma; 1596181641Skmacy 1597181641Skmacy ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i])); 1598181641Skmacy pmap->pm_pdpt[i] = ma | PG_V; 1599181641Skmacy 1600181641Skmacy } 1601181641Skmacy#endif 1602181641Skmacy#ifdef XEN 1603181641Skmacy for (i = 0; i < NPGPTD; i++) { 1604181641Skmacy pt_entry_t *pd; 1605181641Skmacy vm_paddr_t ma; 1606181641Skmacy 1607181641Skmacy ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i])); 1608181641Skmacy pd = pmap->pm_pdir + (i * NPDEPG); 1609181641Skmacy PT_SET_MA(pd, *vtopte((vm_offset_t)pd) & ~(PG_M|PG_A|PG_U|PG_RW)); 1610181641Skmacy#if 0 1611181641Skmacy xen_pgd_pin(ma); 1612181641Skmacy#endif 1613181641Skmacy } 1614181641Skmacy 1615181641Skmacy#ifdef PAE 1616181641Skmacy PT_SET_MA(pmap->pm_pdpt, *vtopte((vm_offset_t)pmap->pm_pdpt) & ~PG_RW); 1617181641Skmacy#endif 1618181641Skmacy vm_page_lock_queues(); 1619181641Skmacy xen_flush_queue(); 1620181641Skmacy xen_pgdpt_pin(xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[NPGPTD]))); 1621181641Skmacy for (i = 0; i < NPGPTD; i++) { 1622181641Skmacy vm_paddr_t ma = xpmap_ptom(VM_PAGE_TO_PHYS(ptdpg[i])); 1623181641Skmacy PT_SET_VA_MA(&pmap->pm_pdir[PTDPTDI + i], ma | PG_V | PG_A, FALSE); 1624181641Skmacy } 1625181641Skmacy xen_flush_queue(); 1626181641Skmacy vm_page_unlock_queues(); 1627181641Skmacy#endif 1628181641Skmacy pmap->pm_active = 0; 1629181641Skmacy TAILQ_INIT(&pmap->pm_pvchunk); 1630181641Skmacy bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1631181641Skmacy 1632181641Skmacy return (1); 1633181641Skmacy} 1634181641Skmacy 1635181641Skmacy/* 1636181641Skmacy * this routine is called if the page table page is not 1637181641Skmacy * mapped correctly. 1638181641Skmacy */ 1639181641Skmacystatic vm_page_t 1640181641Skmacy_pmap_allocpte(pmap_t pmap, unsigned int ptepindex, int flags) 1641181641Skmacy{ 1642181641Skmacy vm_paddr_t ptema; 1643181641Skmacy vm_page_t m; 1644181641Skmacy 1645181641Skmacy KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1646181641Skmacy (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1647181641Skmacy ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1648181641Skmacy 1649181641Skmacy /* 1650181641Skmacy * Allocate a page table page. 1651181641Skmacy */ 1652181641Skmacy if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1653181641Skmacy VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1654181641Skmacy if (flags & M_WAITOK) { 1655181641Skmacy PMAP_UNLOCK(pmap); 1656181641Skmacy vm_page_unlock_queues(); 1657181641Skmacy VM_WAIT; 1658181641Skmacy vm_page_lock_queues(); 1659181641Skmacy PMAP_LOCK(pmap); 1660181641Skmacy } 1661181641Skmacy 1662181641Skmacy /* 1663181641Skmacy * Indicate the need to retry. While waiting, the page table 1664181641Skmacy * page may have been allocated. 1665181641Skmacy */ 1666181641Skmacy return (NULL); 1667181641Skmacy } 1668181641Skmacy if ((m->flags & PG_ZERO) == 0) 1669181641Skmacy pmap_zero_page(m); 1670181641Skmacy 1671181641Skmacy /* 1672181641Skmacy * Map the pagetable page into the process address space, if 1673181641Skmacy * it isn't already there. 1674181641Skmacy */ 1675181641Skmacy pmap->pm_stats.resident_count++; 1676181641Skmacy 1677181641Skmacy ptema = xpmap_ptom(VM_PAGE_TO_PHYS(m)); 1678181641Skmacy xen_pt_pin(ptema); 1679181641Skmacy PT_SET_VA_MA(&pmap->pm_pdir[ptepindex], 1680181641Skmacy (ptema | PG_U | PG_RW | PG_V | PG_A | PG_M), TRUE); 1681181641Skmacy 1682181641Skmacy KASSERT(pmap->pm_pdir[ptepindex], 1683181641Skmacy ("_pmap_allocpte: ptepindex=%d did not get mapped", ptepindex)); 1684181641Skmacy return (m); 1685181641Skmacy} 1686181641Skmacy 1687181641Skmacystatic vm_page_t 1688181641Skmacypmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1689181641Skmacy{ 1690181641Skmacy unsigned ptepindex; 1691181641Skmacy pd_entry_t ptema; 1692181641Skmacy vm_page_t m; 1693181641Skmacy 1694181641Skmacy KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1695181641Skmacy (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1696181641Skmacy ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1697181641Skmacy 1698181641Skmacy /* 1699181641Skmacy * Calculate pagetable page index 1700181641Skmacy */ 1701181641Skmacy ptepindex = va >> PDRSHIFT; 1702181641Skmacyretry: 1703181641Skmacy /* 1704181641Skmacy * Get the page directory entry 1705181641Skmacy */ 1706181641Skmacy ptema = pmap->pm_pdir[ptepindex]; 1707181641Skmacy 1708181641Skmacy /* 1709181641Skmacy * This supports switching from a 4MB page to a 1710181641Skmacy * normal 4K page. 1711181641Skmacy */ 1712181641Skmacy if (ptema & PG_PS) { 1713181641Skmacy /* 1714181641Skmacy * XXX 1715181641Skmacy */ 1716181641Skmacy pmap->pm_pdir[ptepindex] = 0; 1717181641Skmacy ptema = 0; 1718181641Skmacy pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1719181641Skmacy pmap_invalidate_all(kernel_pmap); 1720181641Skmacy } 1721181641Skmacy 1722181641Skmacy /* 1723181641Skmacy * If the page table page is mapped, we just increment the 1724181641Skmacy * hold count, and activate it. 1725181641Skmacy */ 1726181641Skmacy if (ptema & PG_V) { 1727181641Skmacy m = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME); 1728181641Skmacy m->wire_count++; 1729181641Skmacy } else { 1730181641Skmacy /* 1731181641Skmacy * Here if the pte page isn't mapped, or if it has 1732181641Skmacy * been deallocated. 1733181641Skmacy */ 1734181641Skmacy CTR3(KTR_PMAP, "pmap_allocpte: pmap=%p va=0x%08x flags=0x%x", 1735181641Skmacy pmap, va, flags); 1736181641Skmacy m = _pmap_allocpte(pmap, ptepindex, flags); 1737181641Skmacy if (m == NULL && (flags & M_WAITOK)) 1738181641Skmacy goto retry; 1739181641Skmacy 1740181641Skmacy KASSERT(pmap->pm_pdir[ptepindex], ("ptepindex=%d did not get mapped", ptepindex)); 1741181641Skmacy } 1742181641Skmacy return (m); 1743181641Skmacy} 1744181641Skmacy 1745181641Skmacy 1746181641Skmacy/*************************************************** 1747181641Skmacy* Pmap allocation/deallocation routines. 1748181641Skmacy ***************************************************/ 1749181641Skmacy 1750181641Skmacy#ifdef SMP 1751181641Skmacy/* 1752181641Skmacy * Deal with a SMP shootdown of other users of the pmap that we are 1753181641Skmacy * trying to dispose of. This can be a bit hairy. 1754181641Skmacy */ 1755196728Sadrianstatic cpumask_t *lazymask; 1756181641Skmacystatic u_int lazyptd; 1757181641Skmacystatic volatile u_int lazywait; 1758181641Skmacy 1759181641Skmacyvoid pmap_lazyfix_action(void); 1760181641Skmacy 1761181641Skmacyvoid 1762181641Skmacypmap_lazyfix_action(void) 1763181641Skmacy{ 1764196728Sadrian cpumask_t mymask = PCPU_GET(cpumask); 1765181641Skmacy 1766181641Skmacy#ifdef COUNT_IPIS 1767181641Skmacy (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; 1768181641Skmacy#endif 1769181641Skmacy if (rcr3() == lazyptd) 1770181641Skmacy load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1771181641Skmacy atomic_clear_int(lazymask, mymask); 1772181641Skmacy atomic_store_rel_int(&lazywait, 1); 1773181641Skmacy} 1774181641Skmacy 1775181641Skmacystatic void 1776196728Sadrianpmap_lazyfix_self(cpumask_t mymask) 1777181641Skmacy{ 1778181641Skmacy 1779181641Skmacy if (rcr3() == lazyptd) 1780181641Skmacy load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1781181641Skmacy atomic_clear_int(lazymask, mymask); 1782181641Skmacy} 1783181641Skmacy 1784181641Skmacy 1785181641Skmacystatic void 1786181641Skmacypmap_lazyfix(pmap_t pmap) 1787181641Skmacy{ 1788196728Sadrian cpumask_t mymask, mask; 1789181641Skmacy u_int spins; 1790181641Skmacy 1791181641Skmacy while ((mask = pmap->pm_active) != 0) { 1792181641Skmacy spins = 50000000; 1793181641Skmacy mask = mask & -mask; /* Find least significant set bit */ 1794181641Skmacy mtx_lock_spin(&smp_ipi_mtx); 1795181641Skmacy#ifdef PAE 1796181641Skmacy lazyptd = vtophys(pmap->pm_pdpt); 1797181641Skmacy#else 1798181641Skmacy lazyptd = vtophys(pmap->pm_pdir); 1799181641Skmacy#endif 1800181641Skmacy mymask = PCPU_GET(cpumask); 1801181641Skmacy if (mask == mymask) { 1802181641Skmacy lazymask = &pmap->pm_active; 1803181641Skmacy pmap_lazyfix_self(mymask); 1804181641Skmacy } else { 1805181641Skmacy atomic_store_rel_int((u_int *)&lazymask, 1806181641Skmacy (u_int)&pmap->pm_active); 1807181641Skmacy atomic_store_rel_int(&lazywait, 0); 1808181641Skmacy ipi_selected(mask, IPI_LAZYPMAP); 1809181641Skmacy while (lazywait == 0) { 1810181641Skmacy ia32_pause(); 1811181641Skmacy if (--spins == 0) 1812181641Skmacy break; 1813181641Skmacy } 1814181641Skmacy } 1815181641Skmacy mtx_unlock_spin(&smp_ipi_mtx); 1816181641Skmacy if (spins == 0) 1817181641Skmacy printf("pmap_lazyfix: spun for 50000000\n"); 1818181641Skmacy } 1819181641Skmacy} 1820181641Skmacy 1821181641Skmacy#else /* SMP */ 1822181641Skmacy 1823181641Skmacy/* 1824181641Skmacy * Cleaning up on uniprocessor is easy. For various reasons, we're 1825181641Skmacy * unlikely to have to even execute this code, including the fact 1826181641Skmacy * that the cleanup is deferred until the parent does a wait(2), which 1827181641Skmacy * means that another userland process has run. 1828181641Skmacy */ 1829181641Skmacystatic void 1830181641Skmacypmap_lazyfix(pmap_t pmap) 1831181641Skmacy{ 1832181641Skmacy u_int cr3; 1833181641Skmacy 1834181641Skmacy cr3 = vtophys(pmap->pm_pdir); 1835181641Skmacy if (cr3 == rcr3()) { 1836181641Skmacy load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1837181641Skmacy pmap->pm_active &= ~(PCPU_GET(cpumask)); 1838181641Skmacy } 1839181641Skmacy} 1840181641Skmacy#endif /* SMP */ 1841181641Skmacy 1842181641Skmacy/* 1843181641Skmacy * Release any resources held by the given physical map. 1844181641Skmacy * Called when a pmap initialized by pmap_pinit is being released. 1845181641Skmacy * Should only be called if the map contains no valid mappings. 1846181641Skmacy */ 1847181641Skmacyvoid 1848181641Skmacypmap_release(pmap_t pmap) 1849181641Skmacy{ 1850181641Skmacy vm_page_t m, ptdpg[2*NPGPTD+1]; 1851181641Skmacy vm_paddr_t ma; 1852181641Skmacy int i; 1853181641Skmacy#ifdef XEN 1854181641Skmacy#ifdef PAE 1855181641Skmacy int npgptd = NPGPTD + 1; 1856181641Skmacy#else 1857181641Skmacy int npgptd = NPGPTD; 1858181641Skmacy#endif 1859181641Skmacy#else 1860181641Skmacy int npgptd = NPGPTD; 1861181641Skmacy#endif 1862181641Skmacy KASSERT(pmap->pm_stats.resident_count == 0, 1863181641Skmacy ("pmap_release: pmap resident count %ld != 0", 1864181641Skmacy pmap->pm_stats.resident_count)); 1865181641Skmacy PT_UPDATES_FLUSH(); 1866181641Skmacy 1867181641Skmacy pmap_lazyfix(pmap); 1868181641Skmacy mtx_lock_spin(&allpmaps_lock); 1869181641Skmacy LIST_REMOVE(pmap, pm_list); 1870181641Skmacy mtx_unlock_spin(&allpmaps_lock); 1871181641Skmacy 1872181641Skmacy for (i = 0; i < NPGPTD; i++) 1873181641Skmacy ptdpg[i] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdir + (i*NPDEPG)) & PG_FRAME); 1874181641Skmacy pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); 1875181641Skmacy#if defined(PAE) && defined(XEN) 1876181641Skmacy ptdpg[NPGPTD] = PHYS_TO_VM_PAGE(vtophys(pmap->pm_pdpt)); 1877181641Skmacy#endif 1878181641Skmacy 1879181641Skmacy for (i = 0; i < npgptd; i++) { 1880181641Skmacy m = ptdpg[i]; 1881181641Skmacy ma = xpmap_ptom(VM_PAGE_TO_PHYS(m)); 1882181641Skmacy /* unpinning L1 and L2 treated the same */ 1883181641Skmacy xen_pgd_unpin(ma); 1884181641Skmacy#ifdef PAE 1885181641Skmacy KASSERT(xpmap_ptom(VM_PAGE_TO_PHYS(m)) == (pmap->pm_pdpt[i] & PG_FRAME), 1886181641Skmacy ("pmap_release: got wrong ptd page")); 1887181641Skmacy#endif 1888181641Skmacy m->wire_count--; 1889181641Skmacy atomic_subtract_int(&cnt.v_wire_count, 1); 1890181641Skmacy vm_page_free(m); 1891181641Skmacy } 1892181641Skmacy PMAP_LOCK_DESTROY(pmap); 1893181641Skmacy} 1894181641Skmacy 1895181641Skmacystatic int 1896181641Skmacykvm_size(SYSCTL_HANDLER_ARGS) 1897181641Skmacy{ 1898181641Skmacy unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1899181641Skmacy 1900181641Skmacy return sysctl_handle_long(oidp, &ksize, 0, req); 1901181641Skmacy} 1902181641SkmacySYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1903181641Skmacy 0, 0, kvm_size, "IU", "Size of KVM"); 1904181641Skmacy 1905181641Skmacystatic int 1906181641Skmacykvm_free(SYSCTL_HANDLER_ARGS) 1907181641Skmacy{ 1908181641Skmacy unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1909181641Skmacy 1910181641Skmacy return sysctl_handle_long(oidp, &kfree, 0, req); 1911181641Skmacy} 1912181641SkmacySYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1913181641Skmacy 0, 0, kvm_free, "IU", "Amount of KVM free"); 1914181641Skmacy 1915181641Skmacy/* 1916181641Skmacy * grow the number of kernel page table entries, if needed 1917181641Skmacy */ 1918181641Skmacyvoid 1919181641Skmacypmap_growkernel(vm_offset_t addr) 1920181641Skmacy{ 1921181641Skmacy struct pmap *pmap; 1922181641Skmacy vm_paddr_t ptppaddr; 1923181641Skmacy vm_page_t nkpg; 1924181641Skmacy pd_entry_t newpdir; 1925181641Skmacy 1926181641Skmacy mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1927181641Skmacy if (kernel_vm_end == 0) { 1928181641Skmacy kernel_vm_end = KERNBASE; 1929181641Skmacy nkpt = 0; 1930181641Skmacy while (pdir_pde(PTD, kernel_vm_end)) { 1931181641Skmacy kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1932181641Skmacy nkpt++; 1933181641Skmacy if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1934181641Skmacy kernel_vm_end = kernel_map->max_offset; 1935181641Skmacy break; 1936181641Skmacy } 1937181641Skmacy } 1938181641Skmacy } 1939181641Skmacy addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1940181641Skmacy if (addr - 1 >= kernel_map->max_offset) 1941181641Skmacy addr = kernel_map->max_offset; 1942181641Skmacy while (kernel_vm_end < addr) { 1943181641Skmacy if (pdir_pde(PTD, kernel_vm_end)) { 1944181641Skmacy kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1945181641Skmacy if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1946181641Skmacy kernel_vm_end = kernel_map->max_offset; 1947181641Skmacy break; 1948181641Skmacy } 1949181641Skmacy continue; 1950181641Skmacy } 1951181641Skmacy 1952181641Skmacy /* 1953181641Skmacy * This index is bogus, but out of the way 1954181641Skmacy */ 1955181641Skmacy nkpg = vm_page_alloc(NULL, nkpt, 1956181641Skmacy VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1957181641Skmacy if (!nkpg) 1958181641Skmacy panic("pmap_growkernel: no memory to grow kernel"); 1959181641Skmacy 1960181641Skmacy nkpt++; 1961181641Skmacy 1962181641Skmacy pmap_zero_page(nkpg); 1963181641Skmacy ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1964181641Skmacy newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1965181946Skmacy vm_page_lock_queues(); 1966181641Skmacy PD_SET_VA(kernel_pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); 1967181641Skmacy mtx_lock_spin(&allpmaps_lock); 1968181641Skmacy LIST_FOREACH(pmap, &allpmaps, pm_list) 1969181641Skmacy PD_SET_VA(pmap, (kernel_vm_end >> PDRSHIFT), newpdir, TRUE); 1970181641Skmacy 1971181641Skmacy mtx_unlock_spin(&allpmaps_lock); 1972181946Skmacy vm_page_unlock_queues(); 1973181946Skmacy 1974181641Skmacy kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1975181641Skmacy if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1976181641Skmacy kernel_vm_end = kernel_map->max_offset; 1977181641Skmacy break; 1978181641Skmacy } 1979181641Skmacy } 1980181641Skmacy} 1981181641Skmacy 1982181641Skmacy 1983181641Skmacy/*************************************************** 1984181641Skmacy * page management routines. 1985181641Skmacy ***************************************************/ 1986181641Skmacy 1987181641SkmacyCTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1988181641SkmacyCTASSERT(_NPCM == 11); 1989181641Skmacy 1990181641Skmacystatic __inline struct pv_chunk * 1991181641Skmacypv_to_chunk(pv_entry_t pv) 1992181641Skmacy{ 1993181641Skmacy 1994181641Skmacy return (struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK); 1995181641Skmacy} 1996181641Skmacy 1997181641Skmacy#define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1998181641Skmacy 1999181641Skmacy#define PC_FREE0_9 0xfffffffful /* Free values for index 0 through 9 */ 2000181641Skmacy#define PC_FREE10 0x0000fffful /* Free values for index 10 */ 2001181641Skmacy 2002181641Skmacystatic uint32_t pc_freemask[11] = { 2003181641Skmacy PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2004181641Skmacy PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2005181641Skmacy PC_FREE0_9, PC_FREE0_9, PC_FREE0_9, 2006181641Skmacy PC_FREE0_9, PC_FREE10 2007181641Skmacy}; 2008181641Skmacy 2009181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 2010181641Skmacy "Current number of pv entries"); 2011181641Skmacy 2012181641Skmacy#ifdef PV_STATS 2013181641Skmacystatic int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 2014181641Skmacy 2015181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 2016181641Skmacy "Current number of pv entry chunks"); 2017181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 2018181641Skmacy "Current number of pv entry chunks allocated"); 2019181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 2020181641Skmacy "Current number of pv entry chunks frees"); 2021181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 2022181641Skmacy "Number of times tried to get a chunk page but failed."); 2023181641Skmacy 2024181641Skmacystatic long pv_entry_frees, pv_entry_allocs; 2025181641Skmacystatic int pv_entry_spare; 2026181641Skmacy 2027181641SkmacySYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 2028181641Skmacy "Current number of pv entry frees"); 2029181641SkmacySYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 2030181641Skmacy "Current number of pv entry allocs"); 2031181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 2032181641Skmacy "Current number of spare pv entries"); 2033181641Skmacy 2034181641Skmacystatic int pmap_collect_inactive, pmap_collect_active; 2035181641Skmacy 2036181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_inactive, CTLFLAG_RD, &pmap_collect_inactive, 0, 2037181641Skmacy "Current number times pmap_collect called on inactive queue"); 2038181641SkmacySYSCTL_INT(_vm_pmap, OID_AUTO, pmap_collect_active, CTLFLAG_RD, &pmap_collect_active, 0, 2039181641Skmacy "Current number times pmap_collect called on active queue"); 2040181641Skmacy#endif 2041181641Skmacy 2042181641Skmacy/* 2043181641Skmacy * We are in a serious low memory condition. Resort to 2044181641Skmacy * drastic measures to free some pages so we can allocate 2045181641Skmacy * another pv entry chunk. This is normally called to 2046181641Skmacy * unmap inactive pages, and if necessary, active pages. 2047181641Skmacy */ 2048181641Skmacystatic void 2049181641Skmacypmap_collect(pmap_t locked_pmap, struct vpgqueues *vpq) 2050181641Skmacy{ 2051181641Skmacy pmap_t pmap; 2052181641Skmacy pt_entry_t *pte, tpte; 2053181641Skmacy pv_entry_t next_pv, pv; 2054181641Skmacy vm_offset_t va; 2055181641Skmacy vm_page_t m, free; 2056181641Skmacy 2057181641Skmacy sched_pin(); 2058181641Skmacy TAILQ_FOREACH(m, &vpq->pl, pageq) { 2059181641Skmacy if (m->hold_count || m->busy) 2060181641Skmacy continue; 2061181641Skmacy TAILQ_FOREACH_SAFE(pv, &m->md.pv_list, pv_list, next_pv) { 2062181641Skmacy va = pv->pv_va; 2063181641Skmacy pmap = PV_PMAP(pv); 2064181641Skmacy /* Avoid deadlock and lock recursion. */ 2065181641Skmacy if (pmap > locked_pmap) 2066181641Skmacy PMAP_LOCK(pmap); 2067181641Skmacy else if (pmap != locked_pmap && !PMAP_TRYLOCK(pmap)) 2068181641Skmacy continue; 2069181641Skmacy pmap->pm_stats.resident_count--; 2070181641Skmacy pte = pmap_pte_quick(pmap, va); 2071181641Skmacy tpte = pte_load_clear(pte); 2072181641Skmacy KASSERT((tpte & PG_W) == 0, 2073181641Skmacy ("pmap_collect: wired pte %#jx", (uintmax_t)tpte)); 2074181641Skmacy if (tpte & PG_A) 2075181641Skmacy vm_page_flag_set(m, PG_REFERENCED); 2076181641Skmacy if (tpte & PG_M) { 2077181641Skmacy KASSERT((tpte & PG_RW), 2078181641Skmacy ("pmap_collect: modified page not writable: va: %#x, pte: %#jx", 2079181641Skmacy va, (uintmax_t)tpte)); 2080181641Skmacy vm_page_dirty(m); 2081181641Skmacy } 2082181641Skmacy free = NULL; 2083181641Skmacy pmap_unuse_pt(pmap, va, &free); 2084181641Skmacy pmap_invalidate_page(pmap, va); 2085181641Skmacy pmap_free_zero_pages(free); 2086181641Skmacy TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2087181641Skmacy if (TAILQ_EMPTY(&m->md.pv_list)) 2088181641Skmacy vm_page_flag_clear(m, PG_WRITEABLE); 2089181641Skmacy free_pv_entry(pmap, pv); 2090181641Skmacy if (pmap != locked_pmap) 2091181641Skmacy PMAP_UNLOCK(pmap); 2092181641Skmacy } 2093181641Skmacy } 2094181641Skmacy sched_unpin(); 2095181641Skmacy} 2096181641Skmacy 2097181641Skmacy 2098181641Skmacy/* 2099181641Skmacy * free the pv_entry back to the free list 2100181641Skmacy */ 2101181641Skmacystatic void 2102181641Skmacyfree_pv_entry(pmap_t pmap, pv_entry_t pv) 2103181641Skmacy{ 2104181641Skmacy vm_page_t m; 2105181641Skmacy struct pv_chunk *pc; 2106181641Skmacy int idx, field, bit; 2107181641Skmacy 2108181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2109181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2110181641Skmacy PV_STAT(pv_entry_frees++); 2111181641Skmacy PV_STAT(pv_entry_spare++); 2112181641Skmacy pv_entry_count--; 2113181641Skmacy pc = pv_to_chunk(pv); 2114181641Skmacy idx = pv - &pc->pc_pventry[0]; 2115181641Skmacy field = idx / 32; 2116181641Skmacy bit = idx % 32; 2117181641Skmacy pc->pc_map[field] |= 1ul << bit; 2118181641Skmacy /* move to head of list */ 2119181641Skmacy TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2120181641Skmacy TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2121181641Skmacy for (idx = 0; idx < _NPCM; idx++) 2122181641Skmacy if (pc->pc_map[idx] != pc_freemask[idx]) 2123181641Skmacy return; 2124181641Skmacy PV_STAT(pv_entry_spare -= _NPCPV); 2125181641Skmacy PV_STAT(pc_chunk_count--); 2126181641Skmacy PV_STAT(pc_chunk_frees++); 2127181641Skmacy /* entire chunk is free, return it */ 2128181641Skmacy TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2129181641Skmacy m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 2130181641Skmacy pmap_qremove((vm_offset_t)pc, 1); 2131181641Skmacy vm_page_unwire(m, 0); 2132181641Skmacy vm_page_free(m); 2133181641Skmacy pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 2134181641Skmacy} 2135181641Skmacy 2136181641Skmacy/* 2137181641Skmacy * get a new pv_entry, allocating a block from the system 2138181641Skmacy * when needed. 2139181641Skmacy */ 2140181641Skmacystatic pv_entry_t 2141181641Skmacyget_pv_entry(pmap_t pmap, int try) 2142181641Skmacy{ 2143181641Skmacy static const struct timeval printinterval = { 60, 0 }; 2144181641Skmacy static struct timeval lastprint; 2145181641Skmacy static vm_pindex_t colour; 2146181641Skmacy struct vpgqueues *pq; 2147181641Skmacy int bit, field; 2148181641Skmacy pv_entry_t pv; 2149181641Skmacy struct pv_chunk *pc; 2150181641Skmacy vm_page_t m; 2151181641Skmacy 2152181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2153181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2154181641Skmacy PV_STAT(pv_entry_allocs++); 2155181641Skmacy pv_entry_count++; 2156181641Skmacy if (pv_entry_count > pv_entry_high_water) 2157181641Skmacy if (ratecheck(&lastprint, &printinterval)) 2158181641Skmacy printf("Approaching the limit on PV entries, consider " 2159181641Skmacy "increasing either the vm.pmap.shpgperproc or the " 2160181641Skmacy "vm.pmap.pv_entry_max tunable.\n"); 2161181641Skmacy pq = NULL; 2162181641Skmacyretry: 2163181641Skmacy pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2164181641Skmacy if (pc != NULL) { 2165181641Skmacy for (field = 0; field < _NPCM; field++) { 2166181641Skmacy if (pc->pc_map[field]) { 2167181641Skmacy bit = bsfl(pc->pc_map[field]); 2168181641Skmacy break; 2169181641Skmacy } 2170181641Skmacy } 2171181641Skmacy if (field < _NPCM) { 2172181641Skmacy pv = &pc->pc_pventry[field * 32 + bit]; 2173181641Skmacy pc->pc_map[field] &= ~(1ul << bit); 2174181641Skmacy /* If this was the last item, move it to tail */ 2175181641Skmacy for (field = 0; field < _NPCM; field++) 2176181641Skmacy if (pc->pc_map[field] != 0) { 2177181641Skmacy PV_STAT(pv_entry_spare--); 2178181641Skmacy return (pv); /* not full, return */ 2179181641Skmacy } 2180181641Skmacy TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2181181641Skmacy TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2182181641Skmacy PV_STAT(pv_entry_spare--); 2183181641Skmacy return (pv); 2184181641Skmacy } 2185181641Skmacy } 2186181641Skmacy /* 2187181641Skmacy * Access to the ptelist "pv_vafree" is synchronized by the page 2188181641Skmacy * queues lock. If "pv_vafree" is currently non-empty, it will 2189181641Skmacy * remain non-empty until pmap_ptelist_alloc() completes. 2190181641Skmacy */ 2191181641Skmacy if (pv_vafree == 0 || (m = vm_page_alloc(NULL, colour, (pq == 2192181641Skmacy &vm_page_queues[PQ_ACTIVE] ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL) | 2193181641Skmacy VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 2194181641Skmacy if (try) { 2195181641Skmacy pv_entry_count--; 2196181641Skmacy PV_STAT(pc_chunk_tryfail++); 2197181641Skmacy return (NULL); 2198181641Skmacy } 2199181641Skmacy /* 2200181641Skmacy * Reclaim pv entries: At first, destroy mappings to 2201181641Skmacy * inactive pages. After that, if a pv chunk entry 2202181641Skmacy * is still needed, destroy mappings to active pages. 2203181641Skmacy */ 2204181641Skmacy if (pq == NULL) { 2205181641Skmacy PV_STAT(pmap_collect_inactive++); 2206181641Skmacy pq = &vm_page_queues[PQ_INACTIVE]; 2207181641Skmacy } else if (pq == &vm_page_queues[PQ_INACTIVE]) { 2208181641Skmacy PV_STAT(pmap_collect_active++); 2209181641Skmacy pq = &vm_page_queues[PQ_ACTIVE]; 2210181641Skmacy } else 2211181641Skmacy panic("get_pv_entry: increase vm.pmap.shpgperproc"); 2212181641Skmacy pmap_collect(pmap, pq); 2213181641Skmacy goto retry; 2214181641Skmacy } 2215181641Skmacy PV_STAT(pc_chunk_count++); 2216181641Skmacy PV_STAT(pc_chunk_allocs++); 2217181641Skmacy colour++; 2218181641Skmacy pc = (struct pv_chunk *)pmap_ptelist_alloc(&pv_vafree); 2219181641Skmacy pmap_qenter((vm_offset_t)pc, &m, 1); 2220181641Skmacy if ((m->flags & PG_ZERO) == 0) 2221181641Skmacy pagezero(pc); 2222181641Skmacy pc->pc_pmap = pmap; 2223181641Skmacy pc->pc_map[0] = pc_freemask[0] & ~1ul; /* preallocated bit 0 */ 2224181641Skmacy for (field = 1; field < _NPCM; field++) 2225181641Skmacy pc->pc_map[field] = pc_freemask[field]; 2226181641Skmacy pv = &pc->pc_pventry[0]; 2227181641Skmacy TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2228181641Skmacy PV_STAT(pv_entry_spare += _NPCPV - 1); 2229181641Skmacy return (pv); 2230181641Skmacy} 2231181641Skmacy 2232181641Skmacystatic void 2233181641Skmacypmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 2234181641Skmacy{ 2235181641Skmacy pv_entry_t pv; 2236181641Skmacy 2237181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2238181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2239181641Skmacy TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2240181641Skmacy if (pmap == PV_PMAP(pv) && va == pv->pv_va) 2241181641Skmacy break; 2242181641Skmacy } 2243181641Skmacy KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); 2244181641Skmacy TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2245181641Skmacy if (TAILQ_EMPTY(&m->md.pv_list)) 2246181641Skmacy vm_page_flag_clear(m, PG_WRITEABLE); 2247181641Skmacy free_pv_entry(pmap, pv); 2248181641Skmacy} 2249181641Skmacy 2250181641Skmacy/* 2251181641Skmacy * Create a pv entry for page at pa for 2252181641Skmacy * (pmap, va). 2253181641Skmacy */ 2254181641Skmacystatic void 2255181641Skmacypmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2256181641Skmacy{ 2257181641Skmacy pv_entry_t pv; 2258181641Skmacy 2259181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2260181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2261181641Skmacy pv = get_pv_entry(pmap, FALSE); 2262181641Skmacy pv->pv_va = va; 2263181641Skmacy TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2264181641Skmacy} 2265181641Skmacy 2266181641Skmacy/* 2267181641Skmacy * Conditionally create a pv entry. 2268181641Skmacy */ 2269181641Skmacystatic boolean_t 2270181641Skmacypmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 2271181641Skmacy{ 2272181641Skmacy pv_entry_t pv; 2273181641Skmacy 2274181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2275181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2276181641Skmacy if (pv_entry_count < pv_entry_high_water && 2277181641Skmacy (pv = get_pv_entry(pmap, TRUE)) != NULL) { 2278181641Skmacy pv->pv_va = va; 2279181641Skmacy TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2280181641Skmacy return (TRUE); 2281181641Skmacy } else 2282181641Skmacy return (FALSE); 2283181641Skmacy} 2284181641Skmacy 2285181641Skmacy/* 2286181641Skmacy * pmap_remove_pte: do the things to unmap a page in a process 2287181641Skmacy */ 2288181641Skmacystatic int 2289181641Skmacypmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, vm_page_t *free) 2290181641Skmacy{ 2291181641Skmacy pt_entry_t oldpte; 2292181641Skmacy vm_page_t m; 2293181641Skmacy 2294181641Skmacy CTR3(KTR_PMAP, "pmap_remove_pte: pmap=%p *ptq=0x%x va=0x%x", 2295181641Skmacy pmap, (u_long)*ptq, va); 2296181641Skmacy 2297181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2298181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2299181641Skmacy oldpte = *ptq; 2300181641Skmacy PT_SET_VA_MA(ptq, 0, TRUE); 2301181641Skmacy if (oldpte & PG_W) 2302181641Skmacy pmap->pm_stats.wired_count -= 1; 2303181641Skmacy /* 2304181641Skmacy * Machines that don't support invlpg, also don't support 2305181641Skmacy * PG_G. 2306181641Skmacy */ 2307181641Skmacy if (oldpte & PG_G) 2308181641Skmacy pmap_invalidate_page(kernel_pmap, va); 2309181641Skmacy pmap->pm_stats.resident_count -= 1; 2310181641Skmacy /* 2311181641Skmacy * XXX This is not strictly correctly, but somewhere along the line 2312181641Skmacy * we are losing the managed bit on some pages. It is unclear to me 2313181641Skmacy * why, but I think the most likely explanation is that xen's writable 2314181641Skmacy * page table implementation doesn't respect the unused bits. 2315181641Skmacy */ 2316181641Skmacy if ((oldpte & PG_MANAGED) || ((oldpte & PG_V) && (va < VM_MAXUSER_ADDRESS)) 2317181641Skmacy ) { 2318181641Skmacy m = PHYS_TO_VM_PAGE(xpmap_mtop(oldpte) & PG_FRAME); 2319181641Skmacy 2320181641Skmacy if (!(oldpte & PG_MANAGED)) 2321181641Skmacy printf("va=0x%x is unmanaged :-( pte=0x%llx\n", va, oldpte); 2322181641Skmacy 2323181641Skmacy if (oldpte & PG_M) { 2324181641Skmacy KASSERT((oldpte & PG_RW), 2325181641Skmacy ("pmap_remove_pte: modified page not writable: va: %#x, pte: %#jx", 2326181641Skmacy va, (uintmax_t)oldpte)); 2327181641Skmacy vm_page_dirty(m); 2328181641Skmacy } 2329181641Skmacy if (oldpte & PG_A) 2330181641Skmacy vm_page_flag_set(m, PG_REFERENCED); 2331181641Skmacy pmap_remove_entry(pmap, m, va); 2332181641Skmacy } else if ((va < VM_MAXUSER_ADDRESS) && (oldpte & PG_V)) 2333181641Skmacy printf("va=0x%x is unmanaged :-( pte=0x%llx\n", va, oldpte); 2334181641Skmacy 2335181641Skmacy return (pmap_unuse_pt(pmap, va, free)); 2336181641Skmacy} 2337181641Skmacy 2338181641Skmacy/* 2339181641Skmacy * Remove a single page from a process address space 2340181641Skmacy */ 2341181641Skmacystatic void 2342181641Skmacypmap_remove_page(pmap_t pmap, vm_offset_t va, vm_page_t *free) 2343181641Skmacy{ 2344181641Skmacy pt_entry_t *pte; 2345181641Skmacy 2346181641Skmacy CTR2(KTR_PMAP, "pmap_remove_page: pmap=%p va=0x%x", 2347181641Skmacy pmap, va); 2348181641Skmacy 2349181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2350181641Skmacy KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 2351181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2352181641Skmacy if ((pte = pmap_pte_quick(pmap, va)) == NULL || (*pte & PG_V) == 0) 2353181641Skmacy return; 2354181641Skmacy pmap_remove_pte(pmap, pte, va, free); 2355181641Skmacy pmap_invalidate_page(pmap, va); 2356181641Skmacy if (*PMAP1) 2357181641Skmacy PT_SET_MA(PADDR1, 0); 2358181641Skmacy 2359181641Skmacy} 2360181641Skmacy 2361181641Skmacy/* 2362181641Skmacy * Remove the given range of addresses from the specified map. 2363181641Skmacy * 2364181641Skmacy * It is assumed that the start and end are properly 2365181641Skmacy * rounded to the page size. 2366181641Skmacy */ 2367181641Skmacyvoid 2368181641Skmacypmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2369181641Skmacy{ 2370181641Skmacy vm_offset_t pdnxt; 2371181641Skmacy pd_entry_t ptpaddr; 2372181641Skmacy pt_entry_t *pte; 2373181641Skmacy vm_page_t free = NULL; 2374181641Skmacy int anyvalid; 2375181641Skmacy 2376181641Skmacy CTR3(KTR_PMAP, "pmap_remove: pmap=%p sva=0x%x eva=0x%x", 2377181641Skmacy pmap, sva, eva); 2378181641Skmacy 2379181641Skmacy /* 2380181641Skmacy * Perform an unsynchronized read. This is, however, safe. 2381181641Skmacy */ 2382181641Skmacy if (pmap->pm_stats.resident_count == 0) 2383181641Skmacy return; 2384181641Skmacy 2385181641Skmacy anyvalid = 0; 2386181641Skmacy 2387181641Skmacy vm_page_lock_queues(); 2388181641Skmacy sched_pin(); 2389181641Skmacy PMAP_LOCK(pmap); 2390181641Skmacy 2391181641Skmacy /* 2392181641Skmacy * special handling of removing one page. a very 2393181641Skmacy * common operation and easy to short circuit some 2394181641Skmacy * code. 2395181641Skmacy */ 2396181641Skmacy if ((sva + PAGE_SIZE == eva) && 2397181641Skmacy ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 2398181641Skmacy pmap_remove_page(pmap, sva, &free); 2399181641Skmacy goto out; 2400181641Skmacy } 2401181641Skmacy 2402181641Skmacy for (; sva < eva; sva = pdnxt) { 2403181641Skmacy unsigned pdirindex; 2404181641Skmacy 2405181641Skmacy /* 2406181641Skmacy * Calculate index for next page table. 2407181641Skmacy */ 2408181641Skmacy pdnxt = (sva + NBPDR) & ~PDRMASK; 2409181641Skmacy if (pmap->pm_stats.resident_count == 0) 2410181641Skmacy break; 2411181641Skmacy 2412181641Skmacy pdirindex = sva >> PDRSHIFT; 2413181641Skmacy ptpaddr = pmap->pm_pdir[pdirindex]; 2414181641Skmacy 2415181641Skmacy /* 2416181641Skmacy * Weed out invalid mappings. Note: we assume that the page 2417181641Skmacy * directory table is always allocated, and in kernel virtual. 2418181641Skmacy */ 2419181641Skmacy if (ptpaddr == 0) 2420181641Skmacy continue; 2421181641Skmacy 2422181641Skmacy /* 2423181641Skmacy * Check for large page. 2424181641Skmacy */ 2425181641Skmacy if ((ptpaddr & PG_PS) != 0) { 2426181641Skmacy PD_CLEAR_VA(pmap, pdirindex, TRUE); 2427181641Skmacy pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 2428181641Skmacy anyvalid = 1; 2429181641Skmacy continue; 2430181641Skmacy } 2431181641Skmacy 2432181641Skmacy /* 2433181641Skmacy * Limit our scan to either the end of the va represented 2434181641Skmacy * by the current page table page, or to the end of the 2435181641Skmacy * range being removed. 2436181641Skmacy */ 2437181641Skmacy if (pdnxt > eva) 2438181641Skmacy pdnxt = eva; 2439181641Skmacy 2440181641Skmacy for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2441181641Skmacy sva += PAGE_SIZE) { 2442181641Skmacy if ((*pte & PG_V) == 0) 2443181641Skmacy continue; 2444181641Skmacy 2445181641Skmacy /* 2446181641Skmacy * The TLB entry for a PG_G mapping is invalidated 2447181641Skmacy * by pmap_remove_pte(). 2448181641Skmacy */ 2449181641Skmacy if ((*pte & PG_G) == 0) 2450181641Skmacy anyvalid = 1; 2451181641Skmacy if (pmap_remove_pte(pmap, pte, sva, &free)) 2452181641Skmacy break; 2453181641Skmacy } 2454181641Skmacy } 2455181641Skmacy PT_UPDATES_FLUSH(); 2456181641Skmacy if (*PMAP1) 2457181641Skmacy PT_SET_VA_MA(PMAP1, 0, TRUE); 2458181641Skmacyout: 2459181641Skmacy if (anyvalid) 2460181641Skmacy pmap_invalidate_all(pmap); 2461181641Skmacy sched_unpin(); 2462181641Skmacy vm_page_unlock_queues(); 2463181641Skmacy PMAP_UNLOCK(pmap); 2464181641Skmacy pmap_free_zero_pages(free); 2465181641Skmacy} 2466181641Skmacy 2467181641Skmacy/* 2468181641Skmacy * Routine: pmap_remove_all 2469181641Skmacy * Function: 2470181641Skmacy * Removes this physical page from 2471181641Skmacy * all physical maps in which it resides. 2472181641Skmacy * Reflects back modify bits to the pager. 2473181641Skmacy * 2474181641Skmacy * Notes: 2475181641Skmacy * Original versions of this routine were very 2476181641Skmacy * inefficient because they iteratively called 2477181641Skmacy * pmap_remove (slow...) 2478181641Skmacy */ 2479181641Skmacy 2480181641Skmacyvoid 2481181641Skmacypmap_remove_all(vm_page_t m) 2482181641Skmacy{ 2483181641Skmacy pv_entry_t pv; 2484181641Skmacy pmap_t pmap; 2485181641Skmacy pt_entry_t *pte, tpte; 2486181641Skmacy vm_page_t free; 2487181641Skmacy 2488207796Salc KASSERT((m->flags & PG_FICTITIOUS) == 0, 2489207796Salc ("pmap_remove_all: page %p is fictitious", m)); 2490207796Salc vm_page_lock_queues(); 2491181641Skmacy sched_pin(); 2492181641Skmacy while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2493181641Skmacy pmap = PV_PMAP(pv); 2494181641Skmacy PMAP_LOCK(pmap); 2495181641Skmacy pmap->pm_stats.resident_count--; 2496181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 2497181641Skmacy 2498181641Skmacy tpte = *pte; 2499181641Skmacy PT_SET_VA_MA(pte, 0, TRUE); 2500181641Skmacy if (tpte & PG_W) 2501181641Skmacy pmap->pm_stats.wired_count--; 2502181641Skmacy if (tpte & PG_A) 2503181641Skmacy vm_page_flag_set(m, PG_REFERENCED); 2504181641Skmacy 2505181641Skmacy /* 2506181641Skmacy * Update the vm_page_t clean and reference bits. 2507181641Skmacy */ 2508181641Skmacy if (tpte & PG_M) { 2509181641Skmacy KASSERT((tpte & PG_RW), 2510181641Skmacy ("pmap_remove_all: modified page not writable: va: %#x, pte: %#jx", 2511181641Skmacy pv->pv_va, (uintmax_t)tpte)); 2512181641Skmacy vm_page_dirty(m); 2513181641Skmacy } 2514181641Skmacy free = NULL; 2515181641Skmacy pmap_unuse_pt(pmap, pv->pv_va, &free); 2516181641Skmacy pmap_invalidate_page(pmap, pv->pv_va); 2517181641Skmacy pmap_free_zero_pages(free); 2518181641Skmacy TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2519181641Skmacy free_pv_entry(pmap, pv); 2520181641Skmacy PMAP_UNLOCK(pmap); 2521181641Skmacy } 2522181641Skmacy vm_page_flag_clear(m, PG_WRITEABLE); 2523181641Skmacy PT_UPDATES_FLUSH(); 2524181641Skmacy if (*PMAP1) 2525181641Skmacy PT_SET_MA(PADDR1, 0); 2526181641Skmacy sched_unpin(); 2527207796Salc vm_page_unlock_queues(); 2528181641Skmacy} 2529181641Skmacy 2530181641Skmacy/* 2531181641Skmacy * Set the physical protection on the 2532181641Skmacy * specified range of this map as requested. 2533181641Skmacy */ 2534181641Skmacyvoid 2535181641Skmacypmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2536181641Skmacy{ 2537181641Skmacy vm_offset_t pdnxt; 2538181641Skmacy pd_entry_t ptpaddr; 2539181641Skmacy pt_entry_t *pte; 2540181641Skmacy int anychanged; 2541181641Skmacy 2542181641Skmacy CTR4(KTR_PMAP, "pmap_protect: pmap=%p sva=0x%x eva=0x%x prot=0x%x", 2543181641Skmacy pmap, sva, eva, prot); 2544181641Skmacy 2545181641Skmacy if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 2546181641Skmacy pmap_remove(pmap, sva, eva); 2547181641Skmacy return; 2548181641Skmacy } 2549181641Skmacy 2550181641Skmacy#ifdef PAE 2551181641Skmacy if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == 2552181641Skmacy (VM_PROT_WRITE|VM_PROT_EXECUTE)) 2553181641Skmacy return; 2554181641Skmacy#else 2555181641Skmacy if (prot & VM_PROT_WRITE) 2556181641Skmacy return; 2557181641Skmacy#endif 2558181641Skmacy 2559181641Skmacy anychanged = 0; 2560181641Skmacy 2561181641Skmacy vm_page_lock_queues(); 2562181641Skmacy sched_pin(); 2563181641Skmacy PMAP_LOCK(pmap); 2564181641Skmacy for (; sva < eva; sva = pdnxt) { 2565181641Skmacy pt_entry_t obits, pbits; 2566181641Skmacy unsigned pdirindex; 2567181641Skmacy 2568181641Skmacy pdnxt = (sva + NBPDR) & ~PDRMASK; 2569181641Skmacy 2570181641Skmacy pdirindex = sva >> PDRSHIFT; 2571181641Skmacy ptpaddr = pmap->pm_pdir[pdirindex]; 2572181641Skmacy 2573181641Skmacy /* 2574181641Skmacy * Weed out invalid mappings. Note: we assume that the page 2575181641Skmacy * directory table is always allocated, and in kernel virtual. 2576181641Skmacy */ 2577181641Skmacy if (ptpaddr == 0) 2578181641Skmacy continue; 2579181641Skmacy 2580181641Skmacy /* 2581181641Skmacy * Check for large page. 2582181641Skmacy */ 2583181641Skmacy if ((ptpaddr & PG_PS) != 0) { 2584181641Skmacy if ((prot & VM_PROT_WRITE) == 0) 2585181641Skmacy pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 2586181641Skmacy#ifdef PAE 2587181641Skmacy if ((prot & VM_PROT_EXECUTE) == 0) 2588181641Skmacy pmap->pm_pdir[pdirindex] |= pg_nx; 2589181641Skmacy#endif 2590181641Skmacy anychanged = 1; 2591181641Skmacy continue; 2592181641Skmacy } 2593181641Skmacy 2594181641Skmacy if (pdnxt > eva) 2595181641Skmacy pdnxt = eva; 2596181641Skmacy 2597181641Skmacy for (pte = pmap_pte_quick(pmap, sva); sva != pdnxt; pte++, 2598181641Skmacy sva += PAGE_SIZE) { 2599181641Skmacy vm_page_t m; 2600181641Skmacy 2601181641Skmacyretry: 2602181641Skmacy /* 2603181641Skmacy * Regardless of whether a pte is 32 or 64 bits in 2604181641Skmacy * size, PG_RW, PG_A, and PG_M are among the least 2605181641Skmacy * significant 32 bits. 2606181641Skmacy */ 2607181641Skmacy obits = pbits = *pte; 2608181641Skmacy if ((pbits & PG_V) == 0) 2609181641Skmacy continue; 2610207262Salc 2611207262Salc if ((prot & VM_PROT_WRITE) == 0) { 2612207262Salc if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == 2613207262Salc (PG_MANAGED | PG_M | PG_RW)) { 2614207262Salc m = PHYS_TO_VM_PAGE(xpmap_mtop(pbits) & 2615207262Salc PG_FRAME); 2616181641Skmacy vm_page_dirty(m); 2617181641Skmacy } 2618207262Salc pbits &= ~(PG_RW | PG_M); 2619181641Skmacy } 2620181641Skmacy#ifdef PAE 2621181641Skmacy if ((prot & VM_PROT_EXECUTE) == 0) 2622181641Skmacy pbits |= pg_nx; 2623181641Skmacy#endif 2624181641Skmacy 2625181641Skmacy if (pbits != obits) { 2626181641Skmacy#ifdef XEN 2627181641Skmacy obits = *pte; 2628181641Skmacy PT_SET_VA_MA(pte, pbits, TRUE); 2629181641Skmacy if (*pte != pbits) 2630181641Skmacy goto retry; 2631181641Skmacy#else 2632181641Skmacy#ifdef PAE 2633181641Skmacy if (!atomic_cmpset_64(pte, obits, pbits)) 2634181641Skmacy goto retry; 2635181641Skmacy#else 2636181641Skmacy if (!atomic_cmpset_int((u_int *)pte, obits, 2637181641Skmacy pbits)) 2638181641Skmacy goto retry; 2639181641Skmacy#endif 2640181641Skmacy#endif 2641181641Skmacy if (obits & PG_G) 2642181641Skmacy pmap_invalidate_page(pmap, sva); 2643181641Skmacy else 2644181641Skmacy anychanged = 1; 2645181641Skmacy } 2646181641Skmacy } 2647181641Skmacy } 2648181641Skmacy PT_UPDATES_FLUSH(); 2649181641Skmacy if (*PMAP1) 2650181641Skmacy PT_SET_VA_MA(PMAP1, 0, TRUE); 2651181641Skmacy if (anychanged) 2652181641Skmacy pmap_invalidate_all(pmap); 2653181641Skmacy sched_unpin(); 2654181641Skmacy vm_page_unlock_queues(); 2655181641Skmacy PMAP_UNLOCK(pmap); 2656181641Skmacy} 2657181641Skmacy 2658181641Skmacy/* 2659181641Skmacy * Insert the given physical page (p) at 2660181641Skmacy * the specified virtual address (v) in the 2661181641Skmacy * target physical map with the protection requested. 2662181641Skmacy * 2663181641Skmacy * If specified, the page will be wired down, meaning 2664181641Skmacy * that the related pte can not be reclaimed. 2665181641Skmacy * 2666181641Skmacy * NB: This is the only routine which MAY NOT lazy-evaluate 2667181641Skmacy * or lose information. That is, this routine must actually 2668181641Skmacy * insert this page into the given map NOW. 2669181641Skmacy */ 2670181641Skmacyvoid 2671181641Skmacypmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m, 2672181641Skmacy vm_prot_t prot, boolean_t wired) 2673181641Skmacy{ 2674181641Skmacy vm_paddr_t pa; 2675181641Skmacy pd_entry_t *pde; 2676181641Skmacy pt_entry_t *pte; 2677181641Skmacy vm_paddr_t opa; 2678181641Skmacy pt_entry_t origpte, newpte; 2679181641Skmacy vm_page_t mpte, om; 2680181641Skmacy boolean_t invlva; 2681181641Skmacy 2682181641Skmacy CTR6(KTR_PMAP, "pmap_enter: pmap=%08p va=0x%08x access=0x%x ma=0x%08x prot=0x%x wired=%d", 2683181641Skmacy pmap, va, access, xpmap_ptom(VM_PAGE_TO_PHYS(m)), prot, wired); 2684181641Skmacy va = trunc_page(va); 2685208175Salc KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); 2686208175Salc KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, 2687208175Salc ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", 2688208175Salc va)); 2689208175Salc KASSERT((m->oflags & VPO_BUSY) != 0, 2690208175Salc ("pmap_enter: page %p is not busy", m)); 2691181641Skmacy 2692181641Skmacy mpte = NULL; 2693181641Skmacy 2694181641Skmacy vm_page_lock_queues(); 2695181641Skmacy PMAP_LOCK(pmap); 2696181641Skmacy sched_pin(); 2697181641Skmacy 2698181641Skmacy /* 2699181641Skmacy * In the case that a page table page is not 2700181641Skmacy * resident, we are creating it here. 2701181641Skmacy */ 2702181641Skmacy if (va < VM_MAXUSER_ADDRESS) { 2703181641Skmacy mpte = pmap_allocpte(pmap, va, M_WAITOK); 2704181641Skmacy } 2705181641Skmacy#if 0 && defined(PMAP_DIAGNOSTIC) 2706181641Skmacy else { 2707181641Skmacy pd_entry_t *pdeaddr = pmap_pde(pmap, va); 2708181641Skmacy origpte = *pdeaddr; 2709181641Skmacy if ((origpte & PG_V) == 0) { 2710181641Skmacy panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", 2711181641Skmacy pmap->pm_pdir[PTDPTDI], origpte, va); 2712181641Skmacy } 2713181641Skmacy } 2714181641Skmacy#endif 2715181641Skmacy 2716181641Skmacy pde = pmap_pde(pmap, va); 2717181641Skmacy if ((*pde & PG_PS) != 0) 2718181641Skmacy panic("pmap_enter: attempted pmap_enter on 4MB page"); 2719181641Skmacy pte = pmap_pte_quick(pmap, va); 2720181641Skmacy 2721181641Skmacy /* 2722181641Skmacy * Page Directory table entry not valid, we need a new PT page 2723181641Skmacy */ 2724181641Skmacy if (pte == NULL) { 2725181641Skmacy panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n", 2726181641Skmacy (uintmax_t)pmap->pm_pdir[va >> PDRSHIFT], va); 2727181641Skmacy } 2728181641Skmacy 2729181641Skmacy pa = VM_PAGE_TO_PHYS(m); 2730181641Skmacy om = NULL; 2731181641Skmacy opa = origpte = 0; 2732181641Skmacy 2733181641Skmacy#if 0 2734181641Skmacy KASSERT((*pte & PG_V) || (*pte == 0), ("address set but not valid pte=%p *pte=0x%016jx", 2735181641Skmacy pte, *pte)); 2736181641Skmacy#endif 2737181641Skmacy origpte = *pte; 2738181641Skmacy if (origpte) 2739181641Skmacy origpte = xpmap_mtop(origpte); 2740181641Skmacy opa = origpte & PG_FRAME; 2741181641Skmacy 2742181641Skmacy /* 2743181641Skmacy * Mapping has not changed, must be protection or wiring change. 2744181641Skmacy */ 2745181641Skmacy if (origpte && (opa == pa)) { 2746181641Skmacy /* 2747181641Skmacy * Wiring change, just update stats. We don't worry about 2748181641Skmacy * wiring PT pages as they remain resident as long as there 2749181641Skmacy * are valid mappings in them. Hence, if a user page is wired, 2750181641Skmacy * the PT page will be also. 2751181641Skmacy */ 2752181641Skmacy if (wired && ((origpte & PG_W) == 0)) 2753181641Skmacy pmap->pm_stats.wired_count++; 2754181641Skmacy else if (!wired && (origpte & PG_W)) 2755181641Skmacy pmap->pm_stats.wired_count--; 2756181641Skmacy 2757181641Skmacy /* 2758181641Skmacy * Remove extra pte reference 2759181641Skmacy */ 2760181641Skmacy if (mpte) 2761181641Skmacy mpte->wire_count--; 2762181641Skmacy 2763181641Skmacy /* 2764181641Skmacy * We might be turning off write access to the page, 2765181641Skmacy * so we go ahead and sense modify status. 2766181641Skmacy */ 2767181641Skmacy if (origpte & PG_MANAGED) { 2768181641Skmacy om = m; 2769181641Skmacy pa |= PG_MANAGED; 2770181641Skmacy } 2771181641Skmacy goto validate; 2772181641Skmacy } 2773181641Skmacy /* 2774181641Skmacy * Mapping has changed, invalidate old range and fall through to 2775181641Skmacy * handle validating new mapping. 2776181641Skmacy */ 2777181641Skmacy if (opa) { 2778181641Skmacy if (origpte & PG_W) 2779181641Skmacy pmap->pm_stats.wired_count--; 2780181641Skmacy if (origpte & PG_MANAGED) { 2781181641Skmacy om = PHYS_TO_VM_PAGE(opa); 2782181641Skmacy pmap_remove_entry(pmap, om, va); 2783181641Skmacy } else if (va < VM_MAXUSER_ADDRESS) 2784181641Skmacy printf("va=0x%x is unmanaged :-( \n", va); 2785181641Skmacy 2786181641Skmacy if (mpte != NULL) { 2787181641Skmacy mpte->wire_count--; 2788181641Skmacy KASSERT(mpte->wire_count > 0, 2789181641Skmacy ("pmap_enter: missing reference to page table page," 2790181641Skmacy " va: 0x%x", va)); 2791181641Skmacy } 2792181641Skmacy } else 2793181641Skmacy pmap->pm_stats.resident_count++; 2794181641Skmacy 2795181641Skmacy /* 2796181641Skmacy * Enter on the PV list if part of our managed memory. 2797181641Skmacy */ 2798181641Skmacy if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 2799181641Skmacy KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva, 2800181641Skmacy ("pmap_enter: managed mapping within the clean submap")); 2801181641Skmacy pmap_insert_entry(pmap, va, m); 2802181641Skmacy pa |= PG_MANAGED; 2803181641Skmacy } 2804181641Skmacy 2805181641Skmacy /* 2806181641Skmacy * Increment counters 2807181641Skmacy */ 2808181641Skmacy if (wired) 2809181641Skmacy pmap->pm_stats.wired_count++; 2810181641Skmacy 2811181641Skmacyvalidate: 2812181641Skmacy /* 2813181641Skmacy * Now validate mapping with desired protection/wiring. 2814181641Skmacy */ 2815181641Skmacy newpte = (pt_entry_t)(pa | PG_V); 2816181641Skmacy if ((prot & VM_PROT_WRITE) != 0) { 2817181641Skmacy newpte |= PG_RW; 2818181641Skmacy vm_page_flag_set(m, PG_WRITEABLE); 2819181641Skmacy } 2820181641Skmacy#ifdef PAE 2821181641Skmacy if ((prot & VM_PROT_EXECUTE) == 0) 2822181641Skmacy newpte |= pg_nx; 2823181641Skmacy#endif 2824181641Skmacy if (wired) 2825181641Skmacy newpte |= PG_W; 2826181641Skmacy if (va < VM_MAXUSER_ADDRESS) 2827181641Skmacy newpte |= PG_U; 2828181641Skmacy if (pmap == kernel_pmap) 2829181641Skmacy newpte |= pgeflag; 2830181641Skmacy 2831181641Skmacy critical_enter(); 2832181641Skmacy /* 2833181641Skmacy * if the mapping or permission bits are different, we need 2834181641Skmacy * to update the pte. 2835181641Skmacy */ 2836181641Skmacy if ((origpte & ~(PG_M|PG_A)) != newpte) { 2837181641Skmacy if (origpte) { 2838181641Skmacy invlva = FALSE; 2839181641Skmacy origpte = *pte; 2840181641Skmacy PT_SET_VA(pte, newpte | PG_A, FALSE); 2841181641Skmacy if (origpte & PG_A) { 2842181641Skmacy if (origpte & PG_MANAGED) 2843181641Skmacy vm_page_flag_set(om, PG_REFERENCED); 2844181641Skmacy if (opa != VM_PAGE_TO_PHYS(m)) 2845181641Skmacy invlva = TRUE; 2846181641Skmacy#ifdef PAE 2847181641Skmacy if ((origpte & PG_NX) == 0 && 2848181641Skmacy (newpte & PG_NX) != 0) 2849181641Skmacy invlva = TRUE; 2850181641Skmacy#endif 2851181641Skmacy } 2852181641Skmacy if (origpte & PG_M) { 2853181641Skmacy KASSERT((origpte & PG_RW), 2854181641Skmacy ("pmap_enter: modified page not writable: va: %#x, pte: %#jx", 2855181641Skmacy va, (uintmax_t)origpte)); 2856181641Skmacy if ((origpte & PG_MANAGED) != 0) 2857181641Skmacy vm_page_dirty(om); 2858181641Skmacy if ((prot & VM_PROT_WRITE) == 0) 2859181641Skmacy invlva = TRUE; 2860181641Skmacy } 2861181641Skmacy if (invlva) 2862181641Skmacy pmap_invalidate_page(pmap, va); 2863181641Skmacy } else{ 2864181641Skmacy PT_SET_VA(pte, newpte | PG_A, FALSE); 2865181641Skmacy } 2866181641Skmacy 2867181641Skmacy } 2868181641Skmacy PT_UPDATES_FLUSH(); 2869181641Skmacy critical_exit(); 2870181641Skmacy if (*PMAP1) 2871181641Skmacy PT_SET_VA_MA(PMAP1, 0, TRUE); 2872181641Skmacy sched_unpin(); 2873181641Skmacy vm_page_unlock_queues(); 2874181641Skmacy PMAP_UNLOCK(pmap); 2875181641Skmacy} 2876181641Skmacy 2877181641Skmacy/* 2878181641Skmacy * Maps a sequence of resident pages belonging to the same object. 2879181641Skmacy * The sequence begins with the given page m_start. This page is 2880181641Skmacy * mapped at the given virtual address start. Each subsequent page is 2881181641Skmacy * mapped at a virtual address that is offset from start by the same 2882181641Skmacy * amount as the page is offset from m_start within the object. The 2883181641Skmacy * last page in the sequence is the page with the largest offset from 2884181641Skmacy * m_start that can be mapped at a virtual address less than the given 2885181641Skmacy * virtual address end. Not every virtual page between start and end 2886181641Skmacy * is mapped; only those for which a resident page exists with the 2887181641Skmacy * corresponding offset from m_start are mapped. 2888181641Skmacy */ 2889181641Skmacyvoid 2890181641Skmacypmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2891181641Skmacy vm_page_t m_start, vm_prot_t prot) 2892181641Skmacy{ 2893181641Skmacy vm_page_t m, mpte; 2894181641Skmacy vm_pindex_t diff, psize; 2895181641Skmacy multicall_entry_t mcl[16]; 2896181641Skmacy multicall_entry_t *mclp = mcl; 2897181641Skmacy int error, count = 0; 2898181641Skmacy 2899181641Skmacy VM_OBJECT_LOCK_ASSERT(m_start->object, MA_OWNED); 2900181641Skmacy psize = atop(end - start); 2901181641Skmacy 2902181641Skmacy mpte = NULL; 2903181641Skmacy m = m_start; 2904181641Skmacy PMAP_LOCK(pmap); 2905181641Skmacy while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2906181641Skmacy mpte = pmap_enter_quick_locked(&mclp, &count, pmap, start + ptoa(diff), m, 2907181641Skmacy prot, mpte); 2908181641Skmacy m = TAILQ_NEXT(m, listq); 2909181641Skmacy if (count == 16) { 2910181641Skmacy error = HYPERVISOR_multicall(mcl, count); 2911181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 2912181641Skmacy mclp = mcl; 2913181641Skmacy count = 0; 2914181641Skmacy } 2915181641Skmacy } 2916181641Skmacy if (count) { 2917181641Skmacy error = HYPERVISOR_multicall(mcl, count); 2918181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 2919181641Skmacy } 2920181641Skmacy 2921181641Skmacy PMAP_UNLOCK(pmap); 2922181641Skmacy} 2923181641Skmacy 2924181641Skmacy/* 2925181641Skmacy * this code makes some *MAJOR* assumptions: 2926181641Skmacy * 1. Current pmap & pmap exists. 2927181641Skmacy * 2. Not wired. 2928181641Skmacy * 3. Read access. 2929181641Skmacy * 4. No page table pages. 2930181641Skmacy * but is *MUCH* faster than pmap_enter... 2931181641Skmacy */ 2932181641Skmacy 2933181641Skmacyvoid 2934181641Skmacypmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2935181641Skmacy{ 2936181641Skmacy multicall_entry_t mcl, *mclp; 2937181641Skmacy int count = 0; 2938181641Skmacy mclp = &mcl; 2939181641Skmacy 2940181641Skmacy CTR4(KTR_PMAP, "pmap_enter_quick: pmap=%p va=0x%x m=%p prot=0x%x", 2941181641Skmacy pmap, va, m, prot); 2942181641Skmacy 2943207796Salc vm_page_lock_queues(); 2944181641Skmacy PMAP_LOCK(pmap); 2945207796Salc (void)pmap_enter_quick_locked(&mclp, &count, pmap, va, m, prot, NULL); 2946181641Skmacy if (count) 2947181641Skmacy HYPERVISOR_multicall(&mcl, count); 2948207796Salc vm_page_unlock_queues(); 2949181641Skmacy PMAP_UNLOCK(pmap); 2950181641Skmacy} 2951181641Skmacy 2952181747Skmacy#ifdef notyet 2953181641Skmacyvoid 2954181641Skmacypmap_enter_quick_range(pmap_t pmap, vm_offset_t *addrs, vm_page_t *pages, vm_prot_t *prots, int count) 2955181641Skmacy{ 2956181641Skmacy int i, error, index = 0; 2957181641Skmacy multicall_entry_t mcl[16]; 2958181641Skmacy multicall_entry_t *mclp = mcl; 2959181641Skmacy 2960181641Skmacy PMAP_LOCK(pmap); 2961181641Skmacy for (i = 0; i < count; i++, addrs++, pages++, prots++) { 2962181641Skmacy if (!pmap_is_prefaultable_locked(pmap, *addrs)) 2963181641Skmacy continue; 2964181641Skmacy 2965181641Skmacy (void) pmap_enter_quick_locked(&mclp, &index, pmap, *addrs, *pages, *prots, NULL); 2966181641Skmacy if (index == 16) { 2967181641Skmacy error = HYPERVISOR_multicall(mcl, index); 2968181641Skmacy mclp = mcl; 2969181641Skmacy index = 0; 2970181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 2971181641Skmacy } 2972181641Skmacy } 2973181641Skmacy if (index) { 2974181641Skmacy error = HYPERVISOR_multicall(mcl, index); 2975181641Skmacy KASSERT(error == 0, ("bad multicall %d", error)); 2976181641Skmacy } 2977181641Skmacy 2978181641Skmacy PMAP_UNLOCK(pmap); 2979181641Skmacy} 2980181747Skmacy#endif 2981181641Skmacy 2982181641Skmacystatic vm_page_t 2983181641Skmacypmap_enter_quick_locked(multicall_entry_t **mclpp, int *count, pmap_t pmap, vm_offset_t va, vm_page_t m, 2984181641Skmacy vm_prot_t prot, vm_page_t mpte) 2985181641Skmacy{ 2986181641Skmacy pt_entry_t *pte; 2987181641Skmacy vm_paddr_t pa; 2988181641Skmacy vm_page_t free; 2989181641Skmacy multicall_entry_t *mcl = *mclpp; 2990181641Skmacy 2991181641Skmacy KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2992181641Skmacy (m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0, 2993181641Skmacy ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2994181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2995181641Skmacy PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2996181641Skmacy 2997181641Skmacy /* 2998181641Skmacy * In the case that a page table page is not 2999181641Skmacy * resident, we are creating it here. 3000181641Skmacy */ 3001181641Skmacy if (va < VM_MAXUSER_ADDRESS) { 3002181641Skmacy unsigned ptepindex; 3003181641Skmacy pd_entry_t ptema; 3004181641Skmacy 3005181641Skmacy /* 3006181641Skmacy * Calculate pagetable page index 3007181641Skmacy */ 3008181641Skmacy ptepindex = va >> PDRSHIFT; 3009181641Skmacy if (mpte && (mpte->pindex == ptepindex)) { 3010181641Skmacy mpte->wire_count++; 3011181641Skmacy } else { 3012181641Skmacy /* 3013181641Skmacy * Get the page directory entry 3014181641Skmacy */ 3015181641Skmacy ptema = pmap->pm_pdir[ptepindex]; 3016181641Skmacy 3017181641Skmacy /* 3018181641Skmacy * If the page table page is mapped, we just increment 3019181641Skmacy * the hold count, and activate it. 3020181641Skmacy */ 3021181641Skmacy if (ptema & PG_V) { 3022181641Skmacy if (ptema & PG_PS) 3023181641Skmacy panic("pmap_enter_quick: unexpected mapping into 4MB page"); 3024181641Skmacy mpte = PHYS_TO_VM_PAGE(xpmap_mtop(ptema) & PG_FRAME); 3025181641Skmacy mpte->wire_count++; 3026181641Skmacy } else { 3027181641Skmacy mpte = _pmap_allocpte(pmap, ptepindex, 3028181641Skmacy M_NOWAIT); 3029181641Skmacy if (mpte == NULL) 3030181641Skmacy return (mpte); 3031181641Skmacy } 3032181641Skmacy } 3033181641Skmacy } else { 3034181641Skmacy mpte = NULL; 3035181641Skmacy } 3036181641Skmacy 3037181641Skmacy /* 3038181641Skmacy * This call to vtopte makes the assumption that we are 3039181641Skmacy * entering the page into the current pmap. In order to support 3040181641Skmacy * quick entry into any pmap, one would likely use pmap_pte_quick. 3041181641Skmacy * But that isn't as quick as vtopte. 3042181641Skmacy */ 3043181641Skmacy KASSERT(pmap_is_current(pmap), ("entering pages in non-current pmap")); 3044181641Skmacy pte = vtopte(va); 3045181641Skmacy if (*pte & PG_V) { 3046181641Skmacy if (mpte != NULL) { 3047181641Skmacy mpte->wire_count--; 3048181641Skmacy mpte = NULL; 3049181641Skmacy } 3050181641Skmacy return (mpte); 3051181641Skmacy } 3052181641Skmacy 3053181641Skmacy /* 3054181641Skmacy * Enter on the PV list if part of our managed memory. 3055181641Skmacy */ 3056181641Skmacy if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0 && 3057181641Skmacy !pmap_try_insert_pv_entry(pmap, va, m)) { 3058181641Skmacy if (mpte != NULL) { 3059181641Skmacy free = NULL; 3060181641Skmacy if (pmap_unwire_pte_hold(pmap, mpte, &free)) { 3061181641Skmacy pmap_invalidate_page(pmap, va); 3062181641Skmacy pmap_free_zero_pages(free); 3063181641Skmacy } 3064181641Skmacy 3065181641Skmacy mpte = NULL; 3066181641Skmacy } 3067181641Skmacy return (mpte); 3068181641Skmacy } 3069181641Skmacy 3070181641Skmacy /* 3071181641Skmacy * Increment counters 3072181641Skmacy */ 3073181641Skmacy pmap->pm_stats.resident_count++; 3074181641Skmacy 3075181641Skmacy pa = VM_PAGE_TO_PHYS(m); 3076181641Skmacy#ifdef PAE 3077181641Skmacy if ((prot & VM_PROT_EXECUTE) == 0) 3078181641Skmacy pa |= pg_nx; 3079181641Skmacy#endif 3080181641Skmacy 3081181641Skmacy#if 0 3082181641Skmacy /* 3083181641Skmacy * Now validate mapping with RO protection 3084181641Skmacy */ 3085181641Skmacy if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 3086181641Skmacy pte_store(pte, pa | PG_V | PG_U); 3087181641Skmacy else 3088181641Skmacy pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 3089181641Skmacy#else 3090181641Skmacy /* 3091181641Skmacy * Now validate mapping with RO protection 3092181641Skmacy */ 3093181641Skmacy if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 3094181641Skmacy pa = xpmap_ptom(pa | PG_V | PG_U); 3095181641Skmacy else 3096181641Skmacy pa = xpmap_ptom(pa | PG_V | PG_U | PG_MANAGED); 3097181641Skmacy 3098181641Skmacy mcl->op = __HYPERVISOR_update_va_mapping; 3099181641Skmacy mcl->args[0] = va; 3100181641Skmacy mcl->args[1] = (uint32_t)(pa & 0xffffffff); 3101181641Skmacy mcl->args[2] = (uint32_t)(pa >> 32); 3102181641Skmacy mcl->args[3] = 0; 3103181641Skmacy *mclpp = mcl + 1; 3104181641Skmacy *count = *count + 1; 3105181641Skmacy#endif 3106181641Skmacy return mpte; 3107181641Skmacy} 3108181641Skmacy 3109181641Skmacy/* 3110181641Skmacy * Make a temporary mapping for a physical address. This is only intended 3111181641Skmacy * to be used for panic dumps. 3112181641Skmacy */ 3113181641Skmacyvoid * 3114181641Skmacypmap_kenter_temporary(vm_paddr_t pa, int i) 3115181641Skmacy{ 3116181641Skmacy vm_offset_t va; 3117200346Skmacy vm_paddr_t ma = xpmap_ptom(pa); 3118181641Skmacy 3119181641Skmacy va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 3120200346Skmacy PT_SET_MA(va, (ma & ~PAGE_MASK) | PG_V | pgeflag); 3121181641Skmacy invlpg(va); 3122181641Skmacy return ((void *)crashdumpmap); 3123181641Skmacy} 3124181641Skmacy 3125181641Skmacy/* 3126181641Skmacy * This code maps large physical mmap regions into the 3127181641Skmacy * processor address space. Note that some shortcuts 3128181641Skmacy * are taken, but the code works. 3129181641Skmacy */ 3130181641Skmacyvoid 3131181641Skmacypmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 3132181641Skmacy vm_object_t object, vm_pindex_t pindex, 3133181641Skmacy vm_size_t size) 3134181641Skmacy{ 3135207419Skmacy pd_entry_t *pde; 3136207419Skmacy vm_paddr_t pa, ptepa; 3137181641Skmacy vm_page_t p; 3138207419Skmacy int pat_mode; 3139181641Skmacy 3140181641Skmacy VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 3141195840Sjhb KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3142181641Skmacy ("pmap_object_init_pt: non-device object")); 3143181641Skmacy if (pseflag && 3144207419Skmacy (addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { 3145207419Skmacy if (!vm_object_populate(object, pindex, pindex + atop(size))) 3146207419Skmacy return; 3147181641Skmacy p = vm_page_lookup(object, pindex); 3148207419Skmacy KASSERT(p->valid == VM_PAGE_BITS_ALL, 3149207419Skmacy ("pmap_object_init_pt: invalid page %p", p)); 3150207419Skmacy pat_mode = p->md.pat_mode; 3151207419Skmacy /* 3152207419Skmacy * Abort the mapping if the first page is not physically 3153207419Skmacy * aligned to a 2/4MB page boundary. 3154207419Skmacy */ 3155181641Skmacy ptepa = VM_PAGE_TO_PHYS(p); 3156181641Skmacy if (ptepa & (NBPDR - 1)) 3157181641Skmacy return; 3158207419Skmacy /* 3159207419Skmacy * Skip the first page. Abort the mapping if the rest of 3160207419Skmacy * the pages are not physically contiguous or have differing 3161207419Skmacy * memory attributes. 3162207419Skmacy */ 3163207419Skmacy p = TAILQ_NEXT(p, listq); 3164207419Skmacy for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; 3165207419Skmacy pa += PAGE_SIZE) { 3166207419Skmacy KASSERT(p->valid == VM_PAGE_BITS_ALL, 3167207419Skmacy ("pmap_object_init_pt: invalid page %p", p)); 3168207419Skmacy if (pa != VM_PAGE_TO_PHYS(p) || 3169207419Skmacy pat_mode != p->md.pat_mode) 3170207419Skmacy return; 3171207419Skmacy p = TAILQ_NEXT(p, listq); 3172207419Skmacy } 3173207419Skmacy /* Map using 2/4MB pages. */ 3174181641Skmacy PMAP_LOCK(pmap); 3175207419Skmacy for (pa = ptepa | pmap_cache_bits(pat_mode, 1); pa < ptepa + 3176207419Skmacy size; pa += NBPDR) { 3177207419Skmacy pde = pmap_pde(pmap, addr); 3178207419Skmacy if (*pde == 0) { 3179207419Skmacy pde_store(pde, pa | PG_PS | PG_M | PG_A | 3180207419Skmacy PG_U | PG_RW | PG_V); 3181207419Skmacy pmap->pm_stats.resident_count += NBPDR / 3182207419Skmacy PAGE_SIZE; 3183207419Skmacy pmap_pde_mappings++; 3184207419Skmacy } 3185207419Skmacy /* Else continue on if the PDE is already valid. */ 3186207419Skmacy addr += NBPDR; 3187181641Skmacy } 3188181641Skmacy PMAP_UNLOCK(pmap); 3189181641Skmacy } 3190181641Skmacy} 3191181641Skmacy 3192181641Skmacy/* 3193181641Skmacy * Routine: pmap_change_wiring 3194181641Skmacy * Function: Change the wiring attribute for a map/virtual-address 3195181641Skmacy * pair. 3196181641Skmacy * In/out conditions: 3197181641Skmacy * The mapping must already exist in the pmap. 3198181641Skmacy */ 3199181641Skmacyvoid 3200181641Skmacypmap_change_wiring(pmap_t pmap, vm_offset_t va, boolean_t wired) 3201181641Skmacy{ 3202181641Skmacy pt_entry_t *pte; 3203181641Skmacy 3204181641Skmacy vm_page_lock_queues(); 3205181641Skmacy PMAP_LOCK(pmap); 3206181641Skmacy pte = pmap_pte(pmap, va); 3207181641Skmacy 3208181641Skmacy if (wired && !pmap_pte_w(pte)) { 3209181641Skmacy PT_SET_VA_MA((pte), *(pte) | PG_W, TRUE); 3210181641Skmacy pmap->pm_stats.wired_count++; 3211181641Skmacy } else if (!wired && pmap_pte_w(pte)) { 3212181641Skmacy PT_SET_VA_MA((pte), *(pte) & ~PG_W, TRUE); 3213181641Skmacy pmap->pm_stats.wired_count--; 3214181641Skmacy } 3215181641Skmacy 3216181641Skmacy /* 3217181641Skmacy * Wiring is not a hardware characteristic so there is no need to 3218181641Skmacy * invalidate TLB. 3219181641Skmacy */ 3220181641Skmacy pmap_pte_release(pte); 3221181641Skmacy PMAP_UNLOCK(pmap); 3222181641Skmacy vm_page_unlock_queues(); 3223181641Skmacy} 3224181641Skmacy 3225181641Skmacy 3226181641Skmacy 3227181641Skmacy/* 3228181641Skmacy * Copy the range specified by src_addr/len 3229181641Skmacy * from the source map to the range dst_addr/len 3230181641Skmacy * in the destination map. 3231181641Skmacy * 3232181641Skmacy * This routine is only advisory and need not do anything. 3233181641Skmacy */ 3234181641Skmacy 3235181641Skmacyvoid 3236181641Skmacypmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3237181641Skmacy vm_offset_t src_addr) 3238181641Skmacy{ 3239181641Skmacy vm_page_t free; 3240181641Skmacy vm_offset_t addr; 3241181641Skmacy vm_offset_t end_addr = src_addr + len; 3242181641Skmacy vm_offset_t pdnxt; 3243181641Skmacy 3244181641Skmacy if (dst_addr != src_addr) 3245181641Skmacy return; 3246181641Skmacy 3247181641Skmacy if (!pmap_is_current(src_pmap)) { 3248181641Skmacy CTR2(KTR_PMAP, 3249181641Skmacy "pmap_copy, skipping: pdir[PTDPTDI]=0x%jx PTDpde[0]=0x%jx", 3250181641Skmacy (src_pmap->pm_pdir[PTDPTDI] & PG_FRAME), (PTDpde[0] & PG_FRAME)); 3251181641Skmacy 3252181641Skmacy return; 3253181641Skmacy } 3254181641Skmacy CTR5(KTR_PMAP, "pmap_copy: dst_pmap=%p src_pmap=%p dst_addr=0x%x len=%d src_addr=0x%x", 3255181641Skmacy dst_pmap, src_pmap, dst_addr, len, src_addr); 3256181641Skmacy 3257181641Skmacy vm_page_lock_queues(); 3258181641Skmacy if (dst_pmap < src_pmap) { 3259181641Skmacy PMAP_LOCK(dst_pmap); 3260181641Skmacy PMAP_LOCK(src_pmap); 3261181641Skmacy } else { 3262181641Skmacy PMAP_LOCK(src_pmap); 3263181641Skmacy PMAP_LOCK(dst_pmap); 3264181641Skmacy } 3265181641Skmacy sched_pin(); 3266181641Skmacy for (addr = src_addr; addr < end_addr; addr = pdnxt) { 3267181641Skmacy pt_entry_t *src_pte, *dst_pte; 3268181641Skmacy vm_page_t dstmpte, srcmpte; 3269181641Skmacy pd_entry_t srcptepaddr; 3270181641Skmacy unsigned ptepindex; 3271181641Skmacy 3272181641Skmacy if (addr >= UPT_MIN_ADDRESS) 3273181641Skmacy panic("pmap_copy: invalid to pmap_copy page tables"); 3274181641Skmacy 3275181641Skmacy pdnxt = (addr + NBPDR) & ~PDRMASK; 3276181641Skmacy ptepindex = addr >> PDRSHIFT; 3277181641Skmacy 3278181641Skmacy srcptepaddr = PT_GET(&src_pmap->pm_pdir[ptepindex]); 3279181641Skmacy if (srcptepaddr == 0) 3280181641Skmacy continue; 3281181641Skmacy 3282181641Skmacy if (srcptepaddr & PG_PS) { 3283181641Skmacy if (dst_pmap->pm_pdir[ptepindex] == 0) { 3284181641Skmacy PD_SET_VA(dst_pmap, ptepindex, srcptepaddr & ~PG_W, TRUE); 3285181641Skmacy dst_pmap->pm_stats.resident_count += 3286181641Skmacy NBPDR / PAGE_SIZE; 3287181641Skmacy } 3288181641Skmacy continue; 3289181641Skmacy } 3290181641Skmacy 3291181641Skmacy srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); 3292181641Skmacy if (srcmpte->wire_count == 0) 3293181641Skmacy panic("pmap_copy: source page table page is unused"); 3294181641Skmacy 3295181641Skmacy if (pdnxt > end_addr) 3296181641Skmacy pdnxt = end_addr; 3297181641Skmacy 3298181641Skmacy src_pte = vtopte(addr); 3299181641Skmacy while (addr < pdnxt) { 3300181641Skmacy pt_entry_t ptetemp; 3301181641Skmacy ptetemp = *src_pte; 3302181641Skmacy /* 3303181641Skmacy * we only virtual copy managed pages 3304181641Skmacy */ 3305181641Skmacy if ((ptetemp & PG_MANAGED) != 0) { 3306181641Skmacy dstmpte = pmap_allocpte(dst_pmap, addr, 3307181641Skmacy M_NOWAIT); 3308181641Skmacy if (dstmpte == NULL) 3309181641Skmacy break; 3310181641Skmacy dst_pte = pmap_pte_quick(dst_pmap, addr); 3311181641Skmacy if (*dst_pte == 0 && 3312181641Skmacy pmap_try_insert_pv_entry(dst_pmap, addr, 3313181641Skmacy PHYS_TO_VM_PAGE(xpmap_mtop(ptetemp) & PG_FRAME))) { 3314181641Skmacy /* 3315181641Skmacy * Clear the wired, modified, and 3316181641Skmacy * accessed (referenced) bits 3317181641Skmacy * during the copy. 3318181641Skmacy */ 3319181641Skmacy KASSERT(ptetemp != 0, ("src_pte not set")); 3320181641Skmacy PT_SET_VA_MA(dst_pte, ptetemp & ~(PG_W | PG_M | PG_A), TRUE /* XXX debug */); 3321181641Skmacy KASSERT(*dst_pte == (ptetemp & ~(PG_W | PG_M | PG_A)), 3322181641Skmacy ("no pmap copy expected: 0x%jx saw: 0x%jx", 3323181641Skmacy ptetemp & ~(PG_W | PG_M | PG_A), *dst_pte)); 3324181641Skmacy dst_pmap->pm_stats.resident_count++; 3325181641Skmacy } else { 3326181641Skmacy free = NULL; 3327181641Skmacy if (pmap_unwire_pte_hold(dst_pmap, 3328181641Skmacy dstmpte, &free)) { 3329181641Skmacy pmap_invalidate_page(dst_pmap, 3330181641Skmacy addr); 3331181641Skmacy pmap_free_zero_pages(free); 3332181641Skmacy } 3333181641Skmacy } 3334181641Skmacy if (dstmpte->wire_count >= srcmpte->wire_count) 3335181641Skmacy break; 3336181641Skmacy } 3337181641Skmacy addr += PAGE_SIZE; 3338181641Skmacy src_pte++; 3339181641Skmacy } 3340181641Skmacy } 3341181641Skmacy PT_UPDATES_FLUSH(); 3342181641Skmacy sched_unpin(); 3343181641Skmacy vm_page_unlock_queues(); 3344181641Skmacy PMAP_UNLOCK(src_pmap); 3345181641Skmacy PMAP_UNLOCK(dst_pmap); 3346181641Skmacy} 3347181641Skmacy 3348196723Sadrianstatic __inline void 3349196723Sadrianpagezero(void *page) 3350196723Sadrian{ 3351196723Sadrian#if defined(I686_CPU) 3352196723Sadrian if (cpu_class == CPUCLASS_686) { 3353196723Sadrian#if defined(CPU_ENABLE_SSE) 3354196723Sadrian if (cpu_feature & CPUID_SSE2) 3355196723Sadrian sse2_pagezero(page); 3356196723Sadrian else 3357196723Sadrian#endif 3358196723Sadrian i686_pagezero(page); 3359196723Sadrian } else 3360196723Sadrian#endif 3361196723Sadrian bzero(page, PAGE_SIZE); 3362196723Sadrian} 3363196723Sadrian 3364181641Skmacy/* 3365181641Skmacy * pmap_zero_page zeros the specified hardware page by mapping 3366181641Skmacy * the page into KVM and using bzero to clear its contents. 3367181641Skmacy */ 3368181641Skmacyvoid 3369181641Skmacypmap_zero_page(vm_page_t m) 3370181641Skmacy{ 3371181641Skmacy struct sysmaps *sysmaps; 3372181641Skmacy 3373181641Skmacy sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3374181641Skmacy mtx_lock(&sysmaps->lock); 3375181641Skmacy if (*sysmaps->CMAP2) 3376181641Skmacy panic("pmap_zero_page: CMAP2 busy"); 3377181641Skmacy sched_pin(); 3378181641Skmacy PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M); 3379181641Skmacy pagezero(sysmaps->CADDR2); 3380181641Skmacy PT_SET_MA(sysmaps->CADDR2, 0); 3381181641Skmacy sched_unpin(); 3382181641Skmacy mtx_unlock(&sysmaps->lock); 3383181641Skmacy} 3384181641Skmacy 3385181641Skmacy/* 3386181641Skmacy * pmap_zero_page_area zeros the specified hardware page by mapping 3387181641Skmacy * the page into KVM and using bzero to clear its contents. 3388181641Skmacy * 3389181641Skmacy * off and size may not cover an area beyond a single hardware page. 3390181641Skmacy */ 3391181641Skmacyvoid 3392181641Skmacypmap_zero_page_area(vm_page_t m, int off, int size) 3393181641Skmacy{ 3394181641Skmacy struct sysmaps *sysmaps; 3395181641Skmacy 3396181641Skmacy sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3397181641Skmacy mtx_lock(&sysmaps->lock); 3398181641Skmacy if (*sysmaps->CMAP2) 3399181641Skmacy panic("pmap_zero_page: CMAP2 busy"); 3400181641Skmacy sched_pin(); 3401181641Skmacy PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M); 3402181641Skmacy 3403181641Skmacy if (off == 0 && size == PAGE_SIZE) 3404181641Skmacy pagezero(sysmaps->CADDR2); 3405181641Skmacy else 3406181641Skmacy bzero((char *)sysmaps->CADDR2 + off, size); 3407181641Skmacy PT_SET_MA(sysmaps->CADDR2, 0); 3408181641Skmacy sched_unpin(); 3409181641Skmacy mtx_unlock(&sysmaps->lock); 3410181641Skmacy} 3411181641Skmacy 3412181641Skmacy/* 3413181641Skmacy * pmap_zero_page_idle zeros the specified hardware page by mapping 3414181641Skmacy * the page into KVM and using bzero to clear its contents. This 3415181641Skmacy * is intended to be called from the vm_pagezero process only and 3416181641Skmacy * outside of Giant. 3417181641Skmacy */ 3418181641Skmacyvoid 3419181641Skmacypmap_zero_page_idle(vm_page_t m) 3420181641Skmacy{ 3421181641Skmacy 3422181641Skmacy if (*CMAP3) 3423181641Skmacy panic("pmap_zero_page: CMAP3 busy"); 3424181641Skmacy sched_pin(); 3425181641Skmacy PT_SET_MA(CADDR3, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M); 3426181641Skmacy pagezero(CADDR3); 3427181641Skmacy PT_SET_MA(CADDR3, 0); 3428181641Skmacy sched_unpin(); 3429181641Skmacy} 3430181641Skmacy 3431181641Skmacy/* 3432181641Skmacy * pmap_copy_page copies the specified (machine independent) 3433181641Skmacy * page by mapping the page into virtual memory and using 3434181641Skmacy * bcopy to copy the page, one machine dependent page at a 3435181641Skmacy * time. 3436181641Skmacy */ 3437181641Skmacyvoid 3438181641Skmacypmap_copy_page(vm_page_t src, vm_page_t dst) 3439181641Skmacy{ 3440181641Skmacy struct sysmaps *sysmaps; 3441181641Skmacy 3442181641Skmacy sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 3443181641Skmacy mtx_lock(&sysmaps->lock); 3444181641Skmacy if (*sysmaps->CMAP1) 3445181641Skmacy panic("pmap_copy_page: CMAP1 busy"); 3446181641Skmacy if (*sysmaps->CMAP2) 3447181641Skmacy panic("pmap_copy_page: CMAP2 busy"); 3448181641Skmacy sched_pin(); 3449181641Skmacy PT_SET_MA(sysmaps->CADDR1, PG_V | xpmap_ptom(VM_PAGE_TO_PHYS(src)) | PG_A); 3450181641Skmacy PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | xpmap_ptom(VM_PAGE_TO_PHYS(dst)) | PG_A | PG_M); 3451181641Skmacy bcopy(sysmaps->CADDR1, sysmaps->CADDR2, PAGE_SIZE); 3452181641Skmacy PT_SET_MA(sysmaps->CADDR1, 0); 3453181641Skmacy PT_SET_MA(sysmaps->CADDR2, 0); 3454181641Skmacy sched_unpin(); 3455181641Skmacy mtx_unlock(&sysmaps->lock); 3456181641Skmacy} 3457181641Skmacy 3458181641Skmacy/* 3459181641Skmacy * Returns true if the pmap's pv is one of the first 3460181641Skmacy * 16 pvs linked to from this page. This count may 3461181641Skmacy * be changed upwards or downwards in the future; it 3462181641Skmacy * is only necessary that true be returned for a small 3463181641Skmacy * subset of pmaps for proper page aging. 3464181641Skmacy */ 3465181641Skmacyboolean_t 3466181641Skmacypmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3467181641Skmacy{ 3468181641Skmacy pv_entry_t pv; 3469181641Skmacy int loops = 0; 3470181641Skmacy 3471181641Skmacy if (m->flags & PG_FICTITIOUS) 3472181641Skmacy return (FALSE); 3473181641Skmacy 3474181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3475181641Skmacy TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3476181641Skmacy if (PV_PMAP(pv) == pmap) { 3477181641Skmacy return TRUE; 3478181641Skmacy } 3479181641Skmacy loops++; 3480181641Skmacy if (loops >= 16) 3481181641Skmacy break; 3482181641Skmacy } 3483181641Skmacy return (FALSE); 3484181641Skmacy} 3485181641Skmacy 3486181641Skmacy/* 3487181641Skmacy * pmap_page_wired_mappings: 3488181641Skmacy * 3489181641Skmacy * Return the number of managed mappings to the given physical page 3490181641Skmacy * that are wired. 3491181641Skmacy */ 3492181641Skmacyint 3493181641Skmacypmap_page_wired_mappings(vm_page_t m) 3494181641Skmacy{ 3495181641Skmacy pv_entry_t pv; 3496181641Skmacy pt_entry_t *pte; 3497181641Skmacy pmap_t pmap; 3498181641Skmacy int count; 3499181641Skmacy 3500181641Skmacy count = 0; 3501181641Skmacy if ((m->flags & PG_FICTITIOUS) != 0) 3502181641Skmacy return (count); 3503207796Salc vm_page_lock_queues(); 3504181641Skmacy sched_pin(); 3505181641Skmacy TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3506181641Skmacy pmap = PV_PMAP(pv); 3507181641Skmacy PMAP_LOCK(pmap); 3508181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 3509181641Skmacy if ((*pte & PG_W) != 0) 3510181641Skmacy count++; 3511181641Skmacy PMAP_UNLOCK(pmap); 3512181641Skmacy } 3513181641Skmacy sched_unpin(); 3514207796Salc vm_page_unlock_queues(); 3515181641Skmacy return (count); 3516181641Skmacy} 3517181641Skmacy 3518181641Skmacy/* 3519181747Skmacy * Returns TRUE if the given page is mapped individually or as part of 3520181747Skmacy * a 4mpage. Otherwise, returns FALSE. 3521181747Skmacy */ 3522181747Skmacyboolean_t 3523181747Skmacypmap_page_is_mapped(vm_page_t m) 3524181747Skmacy{ 3525207796Salc boolean_t rv; 3526181747Skmacy 3527181747Skmacy if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) != 0) 3528181747Skmacy return (FALSE); 3529207796Salc vm_page_lock_queues(); 3530207796Salc rv = !TAILQ_EMPTY(&m->md.pv_list) || 3531207796Salc !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list); 3532207796Salc vm_page_unlock_queues(); 3533207796Salc return (rv); 3534181747Skmacy} 3535181747Skmacy 3536181747Skmacy/* 3537181641Skmacy * Remove all pages from specified address space 3538181641Skmacy * this aids process exit speeds. Also, this code 3539181641Skmacy * is special cased for current process only, but 3540181641Skmacy * can have the more generic (and slightly slower) 3541181641Skmacy * mode enabled. This is much faster than pmap_remove 3542181641Skmacy * in the case of running down an entire address space. 3543181641Skmacy */ 3544181641Skmacyvoid 3545181641Skmacypmap_remove_pages(pmap_t pmap) 3546181641Skmacy{ 3547181641Skmacy pt_entry_t *pte, tpte; 3548181641Skmacy vm_page_t m, free = NULL; 3549181641Skmacy pv_entry_t pv; 3550181641Skmacy struct pv_chunk *pc, *npc; 3551181641Skmacy int field, idx; 3552181641Skmacy int32_t bit; 3553181641Skmacy uint32_t inuse, bitmask; 3554181641Skmacy int allfree; 3555181641Skmacy 3556181641Skmacy CTR1(KTR_PMAP, "pmap_remove_pages: pmap=%p", pmap); 3557181641Skmacy 3558181641Skmacy if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 3559181641Skmacy printf("warning: pmap_remove_pages called with non-current pmap\n"); 3560181641Skmacy return; 3561181641Skmacy } 3562181641Skmacy vm_page_lock_queues(); 3563181641Skmacy KASSERT(pmap_is_current(pmap), ("removing pages from non-current pmap")); 3564181641Skmacy PMAP_LOCK(pmap); 3565181641Skmacy sched_pin(); 3566181641Skmacy TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 3567181641Skmacy allfree = 1; 3568181641Skmacy for (field = 0; field < _NPCM; field++) { 3569181641Skmacy inuse = (~(pc->pc_map[field])) & pc_freemask[field]; 3570181641Skmacy while (inuse != 0) { 3571181641Skmacy bit = bsfl(inuse); 3572181641Skmacy bitmask = 1UL << bit; 3573181641Skmacy idx = field * 32 + bit; 3574181641Skmacy pv = &pc->pc_pventry[idx]; 3575181641Skmacy inuse &= ~bitmask; 3576181641Skmacy 3577181641Skmacy pte = vtopte(pv->pv_va); 3578181641Skmacy tpte = *pte ? xpmap_mtop(*pte) : 0; 3579181641Skmacy 3580181641Skmacy if (tpte == 0) { 3581181641Skmacy printf( 3582181641Skmacy "TPTE at %p IS ZERO @ VA %08x\n", 3583181641Skmacy pte, pv->pv_va); 3584181641Skmacy panic("bad pte"); 3585181641Skmacy } 3586181641Skmacy 3587181641Skmacy/* 3588181641Skmacy * We cannot remove wired pages from a process' mapping at this time 3589181641Skmacy */ 3590181641Skmacy if (tpte & PG_W) { 3591181641Skmacy allfree = 0; 3592181641Skmacy continue; 3593181641Skmacy } 3594181641Skmacy 3595181641Skmacy m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 3596181641Skmacy KASSERT(m->phys_addr == (tpte & PG_FRAME), 3597181641Skmacy ("vm_page_t %p phys_addr mismatch %016jx %016jx", 3598181641Skmacy m, (uintmax_t)m->phys_addr, 3599181641Skmacy (uintmax_t)tpte)); 3600181641Skmacy 3601181641Skmacy KASSERT(m < &vm_page_array[vm_page_array_size], 3602181641Skmacy ("pmap_remove_pages: bad tpte %#jx", 3603181641Skmacy (uintmax_t)tpte)); 3604181641Skmacy 3605181641Skmacy 3606181641Skmacy PT_CLEAR_VA(pte, FALSE); 3607181641Skmacy 3608181641Skmacy /* 3609181641Skmacy * Update the vm_page_t clean/reference bits. 3610181641Skmacy */ 3611181641Skmacy if (tpte & PG_M) 3612181641Skmacy vm_page_dirty(m); 3613181641Skmacy 3614181641Skmacy TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3615181641Skmacy if (TAILQ_EMPTY(&m->md.pv_list)) 3616181641Skmacy vm_page_flag_clear(m, PG_WRITEABLE); 3617181641Skmacy 3618181641Skmacy pmap_unuse_pt(pmap, pv->pv_va, &free); 3619181641Skmacy 3620181641Skmacy /* Mark free */ 3621181641Skmacy PV_STAT(pv_entry_frees++); 3622181641Skmacy PV_STAT(pv_entry_spare++); 3623181641Skmacy pv_entry_count--; 3624181641Skmacy pc->pc_map[field] |= bitmask; 3625181641Skmacy pmap->pm_stats.resident_count--; 3626181641Skmacy } 3627181641Skmacy } 3628181641Skmacy PT_UPDATES_FLUSH(); 3629181641Skmacy if (allfree) { 3630181641Skmacy PV_STAT(pv_entry_spare -= _NPCPV); 3631181641Skmacy PV_STAT(pc_chunk_count--); 3632181641Skmacy PV_STAT(pc_chunk_frees++); 3633181641Skmacy TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3634181641Skmacy m = PHYS_TO_VM_PAGE(pmap_kextract((vm_offset_t)pc)); 3635181641Skmacy pmap_qremove((vm_offset_t)pc, 1); 3636181641Skmacy vm_page_unwire(m, 0); 3637181641Skmacy vm_page_free(m); 3638181641Skmacy pmap_ptelist_free(&pv_vafree, (vm_offset_t)pc); 3639181641Skmacy } 3640181641Skmacy } 3641181641Skmacy PT_UPDATES_FLUSH(); 3642181641Skmacy if (*PMAP1) 3643181641Skmacy PT_SET_MA(PADDR1, 0); 3644181641Skmacy 3645181641Skmacy sched_unpin(); 3646181641Skmacy pmap_invalidate_all(pmap); 3647181641Skmacy vm_page_unlock_queues(); 3648181641Skmacy PMAP_UNLOCK(pmap); 3649181641Skmacy pmap_free_zero_pages(free); 3650181641Skmacy} 3651181641Skmacy 3652181641Skmacy/* 3653181641Skmacy * pmap_is_modified: 3654181641Skmacy * 3655181641Skmacy * Return whether or not the specified physical page was modified 3656181641Skmacy * in any physical maps. 3657181641Skmacy */ 3658181641Skmacyboolean_t 3659181641Skmacypmap_is_modified(vm_page_t m) 3660181641Skmacy{ 3661181641Skmacy pv_entry_t pv; 3662181641Skmacy pt_entry_t *pte; 3663181641Skmacy pmap_t pmap; 3664181641Skmacy boolean_t rv; 3665181641Skmacy 3666208504Salc KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3667208504Salc ("pmap_is_modified: page %p is not managed", m)); 3668181641Skmacy rv = FALSE; 3669208504Salc 3670208504Salc /* 3671208504Salc * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be 3672208504Salc * concurrently set while the object is locked. Thus, if PG_WRITEABLE 3673208504Salc * is clear, no PTEs can have PG_M set. 3674208504Salc */ 3675208504Salc VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 3676208504Salc if ((m->oflags & VPO_BUSY) == 0 && 3677208504Salc (m->flags & PG_WRITEABLE) == 0) 3678181641Skmacy return (rv); 3679208504Salc vm_page_lock_queues(); 3680181641Skmacy sched_pin(); 3681181641Skmacy TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3682181641Skmacy pmap = PV_PMAP(pv); 3683181641Skmacy PMAP_LOCK(pmap); 3684181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 3685181641Skmacy rv = (*pte & PG_M) != 0; 3686181641Skmacy PMAP_UNLOCK(pmap); 3687181641Skmacy if (rv) 3688181641Skmacy break; 3689181641Skmacy } 3690181641Skmacy if (*PMAP1) 3691181641Skmacy PT_SET_MA(PADDR1, 0); 3692181641Skmacy sched_unpin(); 3693208504Salc vm_page_unlock_queues(); 3694181641Skmacy return (rv); 3695181641Skmacy} 3696181641Skmacy 3697181641Skmacy/* 3698181641Skmacy * pmap_is_prefaultable: 3699181641Skmacy * 3700181641Skmacy * Return whether or not the specified virtual address is elgible 3701181641Skmacy * for prefault. 3702181641Skmacy */ 3703181641Skmacystatic boolean_t 3704181641Skmacypmap_is_prefaultable_locked(pmap_t pmap, vm_offset_t addr) 3705181641Skmacy{ 3706181641Skmacy pt_entry_t *pte; 3707181641Skmacy boolean_t rv = FALSE; 3708181641Skmacy 3709181641Skmacy return (rv); 3710181641Skmacy 3711181641Skmacy if (pmap_is_current(pmap) && *pmap_pde(pmap, addr)) { 3712181641Skmacy pte = vtopte(addr); 3713181641Skmacy rv = (*pte == 0); 3714181641Skmacy } 3715181641Skmacy return (rv); 3716181641Skmacy} 3717181641Skmacy 3718181641Skmacyboolean_t 3719181641Skmacypmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3720181641Skmacy{ 3721181641Skmacy boolean_t rv; 3722181641Skmacy 3723181641Skmacy PMAP_LOCK(pmap); 3724181641Skmacy rv = pmap_is_prefaultable_locked(pmap, addr); 3725181641Skmacy PMAP_UNLOCK(pmap); 3726181641Skmacy return (rv); 3727181641Skmacy} 3728181641Skmacy 3729207155Salcboolean_t 3730207155Salcpmap_is_referenced(vm_page_t m) 3731207155Salc{ 3732207155Salc pv_entry_t pv; 3733207155Salc pt_entry_t *pte; 3734207155Salc pmap_t pmap; 3735207155Salc boolean_t rv; 3736207155Salc 3737207155Salc rv = FALSE; 3738207155Salc if (m->flags & PG_FICTITIOUS) 3739207155Salc return (rv); 3740207155Salc sched_pin(); 3741207155Salc mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3742207155Salc TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3743207155Salc pmap = PV_PMAP(pv); 3744207155Salc PMAP_LOCK(pmap); 3745207155Salc pte = pmap_pte_quick(pmap, pv->pv_va); 3746207155Salc rv = (*pte & (PG_A | PG_V)) == (PG_A | PG_V); 3747207155Salc PMAP_UNLOCK(pmap); 3748207155Salc if (rv) 3749207155Salc break; 3750207155Salc } 3751207155Salc if (*PMAP1) 3752207155Salc PT_SET_MA(PADDR1, 0); 3753207155Salc sched_unpin(); 3754207155Salc return (rv); 3755207155Salc} 3756207155Salc 3757181641Skmacyvoid 3758181641Skmacypmap_map_readonly(pmap_t pmap, vm_offset_t va, int len) 3759181641Skmacy{ 3760181641Skmacy int i, npages = round_page(len) >> PAGE_SHIFT; 3761181641Skmacy for (i = 0; i < npages; i++) { 3762181641Skmacy pt_entry_t *pte; 3763181641Skmacy pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); 3764181641Skmacy pte_store(pte, xpmap_mtop(*pte & ~(PG_RW|PG_M))); 3765181641Skmacy PMAP_MARK_PRIV(xpmap_mtop(*pte)); 3766181641Skmacy pmap_pte_release(pte); 3767181641Skmacy } 3768181641Skmacy} 3769181641Skmacy 3770181641Skmacyvoid 3771181641Skmacypmap_map_readwrite(pmap_t pmap, vm_offset_t va, int len) 3772181641Skmacy{ 3773181641Skmacy int i, npages = round_page(len) >> PAGE_SHIFT; 3774181641Skmacy for (i = 0; i < npages; i++) { 3775181641Skmacy pt_entry_t *pte; 3776181641Skmacy pte = pmap_pte(pmap, (vm_offset_t)(va + i*PAGE_SIZE)); 3777181641Skmacy PMAP_MARK_UNPRIV(xpmap_mtop(*pte)); 3778181641Skmacy pte_store(pte, xpmap_mtop(*pte) | (PG_RW|PG_M)); 3779181641Skmacy pmap_pte_release(pte); 3780181641Skmacy } 3781181641Skmacy} 3782181641Skmacy 3783181641Skmacy/* 3784181641Skmacy * Clear the write and modified bits in each of the given page's mappings. 3785181641Skmacy */ 3786181641Skmacyvoid 3787181641Skmacypmap_remove_write(vm_page_t m) 3788181641Skmacy{ 3789181641Skmacy pv_entry_t pv; 3790181641Skmacy pmap_t pmap; 3791181641Skmacy pt_entry_t oldpte, *pte; 3792181641Skmacy 3793208175Salc KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3794208175Salc ("pmap_remove_write: page %p is not managed", m)); 3795208175Salc 3796208175Salc /* 3797208175Salc * If the page is not VPO_BUSY, then PG_WRITEABLE cannot be set by 3798208175Salc * another thread while the object is locked. Thus, if PG_WRITEABLE 3799208175Salc * is clear, no page table entries need updating. 3800208175Salc */ 3801208175Salc VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 3802208175Salc if ((m->oflags & VPO_BUSY) == 0 && 3803181641Skmacy (m->flags & PG_WRITEABLE) == 0) 3804181641Skmacy return; 3805207796Salc vm_page_lock_queues(); 3806181641Skmacy sched_pin(); 3807181641Skmacy TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3808181641Skmacy pmap = PV_PMAP(pv); 3809181641Skmacy PMAP_LOCK(pmap); 3810181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 3811181641Skmacyretry: 3812181641Skmacy oldpte = *pte; 3813181641Skmacy if ((oldpte & PG_RW) != 0) { 3814188341Skmacy vm_paddr_t newpte = oldpte & ~(PG_RW | PG_M); 3815188341Skmacy 3816181641Skmacy /* 3817181641Skmacy * Regardless of whether a pte is 32 or 64 bits 3818181641Skmacy * in size, PG_RW and PG_M are among the least 3819181641Skmacy * significant 32 bits. 3820181641Skmacy */ 3821188341Skmacy PT_SET_VA_MA(pte, newpte, TRUE); 3822188341Skmacy if (*pte != newpte) 3823181641Skmacy goto retry; 3824188341Skmacy 3825181641Skmacy if ((oldpte & PG_M) != 0) 3826181641Skmacy vm_page_dirty(m); 3827181641Skmacy pmap_invalidate_page(pmap, pv->pv_va); 3828181641Skmacy } 3829181641Skmacy PMAP_UNLOCK(pmap); 3830181641Skmacy } 3831181641Skmacy vm_page_flag_clear(m, PG_WRITEABLE); 3832181641Skmacy PT_UPDATES_FLUSH(); 3833181641Skmacy if (*PMAP1) 3834181641Skmacy PT_SET_MA(PADDR1, 0); 3835181641Skmacy sched_unpin(); 3836207796Salc vm_page_unlock_queues(); 3837181641Skmacy} 3838181641Skmacy 3839181641Skmacy/* 3840181641Skmacy * pmap_ts_referenced: 3841181641Skmacy * 3842181641Skmacy * Return a count of reference bits for a page, clearing those bits. 3843181641Skmacy * It is not necessary for every reference bit to be cleared, but it 3844181641Skmacy * is necessary that 0 only be returned when there are truly no 3845181641Skmacy * reference bits set. 3846181641Skmacy * 3847181641Skmacy * XXX: The exact number of bits to check and clear is a matter that 3848181641Skmacy * should be tested and standardized at some point in the future for 3849181641Skmacy * optimal aging of shared pages. 3850181641Skmacy */ 3851181641Skmacyint 3852181641Skmacypmap_ts_referenced(vm_page_t m) 3853181641Skmacy{ 3854181641Skmacy pv_entry_t pv, pvf, pvn; 3855181641Skmacy pmap_t pmap; 3856181641Skmacy pt_entry_t *pte; 3857181641Skmacy int rtval = 0; 3858181641Skmacy 3859181641Skmacy if (m->flags & PG_FICTITIOUS) 3860181641Skmacy return (rtval); 3861181641Skmacy sched_pin(); 3862181641Skmacy mtx_assert(&vm_page_queue_mtx, MA_OWNED); 3863181641Skmacy if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 3864181641Skmacy pvf = pv; 3865181641Skmacy do { 3866181641Skmacy pvn = TAILQ_NEXT(pv, pv_list); 3867181641Skmacy TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 3868181641Skmacy TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 3869181641Skmacy pmap = PV_PMAP(pv); 3870181641Skmacy PMAP_LOCK(pmap); 3871181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 3872181641Skmacy if ((*pte & PG_A) != 0) { 3873181641Skmacy PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE); 3874181641Skmacy pmap_invalidate_page(pmap, pv->pv_va); 3875181641Skmacy rtval++; 3876181641Skmacy if (rtval > 4) 3877181641Skmacy pvn = NULL; 3878181641Skmacy } 3879181641Skmacy PMAP_UNLOCK(pmap); 3880181641Skmacy } while ((pv = pvn) != NULL && pv != pvf); 3881181641Skmacy } 3882181641Skmacy PT_UPDATES_FLUSH(); 3883181641Skmacy if (*PMAP1) 3884181641Skmacy PT_SET_MA(PADDR1, 0); 3885181641Skmacy 3886181641Skmacy sched_unpin(); 3887181641Skmacy return (rtval); 3888181641Skmacy} 3889181641Skmacy 3890181641Skmacy/* 3891181641Skmacy * Clear the modify bits on the specified physical page. 3892181641Skmacy */ 3893181641Skmacyvoid 3894181641Skmacypmap_clear_modify(vm_page_t m) 3895181641Skmacy{ 3896181641Skmacy pv_entry_t pv; 3897181641Skmacy pmap_t pmap; 3898181641Skmacy pt_entry_t *pte; 3899181641Skmacy 3900208504Salc KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3901208504Salc ("pmap_clear_modify: page %p is not managed", m)); 3902208504Salc VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 3903208504Salc KASSERT((m->oflags & VPO_BUSY) == 0, 3904208504Salc ("pmap_clear_modify: page %p is busy", m)); 3905208504Salc 3906208504Salc /* 3907208504Salc * If the page is not PG_WRITEABLE, then no PTEs can have PG_M set. 3908208504Salc * If the object containing the page is locked and the page is not 3909208504Salc * VPO_BUSY, then PG_WRITEABLE cannot be concurrently set. 3910208504Salc */ 3911208504Salc if ((m->flags & PG_WRITEABLE) == 0) 3912181641Skmacy return; 3913208504Salc vm_page_lock_queues(); 3914181641Skmacy sched_pin(); 3915181641Skmacy TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3916181641Skmacy pmap = PV_PMAP(pv); 3917181641Skmacy PMAP_LOCK(pmap); 3918181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 3919181641Skmacy if ((*pte & PG_M) != 0) { 3920181641Skmacy /* 3921181641Skmacy * Regardless of whether a pte is 32 or 64 bits 3922181641Skmacy * in size, PG_M is among the least significant 3923181641Skmacy * 32 bits. 3924181641Skmacy */ 3925181641Skmacy PT_SET_VA_MA(pte, *pte & ~PG_M, FALSE); 3926181641Skmacy pmap_invalidate_page(pmap, pv->pv_va); 3927181641Skmacy } 3928181641Skmacy PMAP_UNLOCK(pmap); 3929181641Skmacy } 3930181641Skmacy sched_unpin(); 3931208504Salc vm_page_unlock_queues(); 3932181641Skmacy} 3933181641Skmacy 3934181641Skmacy/* 3935181641Skmacy * pmap_clear_reference: 3936181641Skmacy * 3937181641Skmacy * Clear the reference bit on the specified physical page. 3938181641Skmacy */ 3939181641Skmacyvoid 3940181641Skmacypmap_clear_reference(vm_page_t m) 3941181641Skmacy{ 3942181641Skmacy pv_entry_t pv; 3943181641Skmacy pmap_t pmap; 3944181641Skmacy pt_entry_t *pte; 3945181641Skmacy 3946208504Salc KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, 3947208504Salc ("pmap_clear_reference: page %p is not managed", m)); 3948208504Salc vm_page_lock_queues(); 3949181641Skmacy sched_pin(); 3950181641Skmacy TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3951181641Skmacy pmap = PV_PMAP(pv); 3952181641Skmacy PMAP_LOCK(pmap); 3953181641Skmacy pte = pmap_pte_quick(pmap, pv->pv_va); 3954181641Skmacy if ((*pte & PG_A) != 0) { 3955181641Skmacy /* 3956181641Skmacy * Regardless of whether a pte is 32 or 64 bits 3957181641Skmacy * in size, PG_A is among the least significant 3958181641Skmacy * 32 bits. 3959181641Skmacy */ 3960181641Skmacy PT_SET_VA_MA(pte, *pte & ~PG_A, FALSE); 3961181641Skmacy pmap_invalidate_page(pmap, pv->pv_va); 3962181641Skmacy } 3963181641Skmacy PMAP_UNLOCK(pmap); 3964181641Skmacy } 3965181641Skmacy sched_unpin(); 3966208504Salc vm_page_unlock_queues(); 3967181641Skmacy} 3968181641Skmacy 3969181641Skmacy/* 3970181641Skmacy * Miscellaneous support routines follow 3971181641Skmacy */ 3972181641Skmacy 3973181641Skmacy/* 3974181641Skmacy * Map a set of physical memory pages into the kernel virtual 3975181641Skmacy * address space. Return a pointer to where it is mapped. This 3976181641Skmacy * routine is intended to be used for mapping device memory, 3977181641Skmacy * NOT real memory. 3978181641Skmacy */ 3979181641Skmacyvoid * 3980181641Skmacypmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) 3981181641Skmacy{ 3982195949Skib vm_offset_t va, offset; 3983195949Skib vm_size_t tmpsize; 3984181641Skmacy 3985181641Skmacy offset = pa & PAGE_MASK; 3986181641Skmacy size = roundup(offset + size, PAGE_SIZE); 3987181641Skmacy pa = pa & PG_FRAME; 3988181641Skmacy 3989181641Skmacy if (pa < KERNLOAD && pa + size <= KERNLOAD) 3990181641Skmacy va = KERNBASE + pa; 3991181641Skmacy else 3992181641Skmacy va = kmem_alloc_nofault(kernel_map, size); 3993181641Skmacy if (!va) 3994181641Skmacy panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 3995181641Skmacy 3996195949Skib for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) 3997195949Skib pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); 3998195949Skib pmap_invalidate_range(kernel_pmap, va, va + tmpsize); 3999195949Skib pmap_invalidate_cache_range(va, va + size); 4000181641Skmacy return ((void *)(va + offset)); 4001181641Skmacy} 4002181641Skmacy 4003181641Skmacyvoid * 4004181641Skmacypmap_mapdev(vm_paddr_t pa, vm_size_t size) 4005181641Skmacy{ 4006181641Skmacy 4007181641Skmacy return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); 4008181641Skmacy} 4009181641Skmacy 4010181641Skmacyvoid * 4011181641Skmacypmap_mapbios(vm_paddr_t pa, vm_size_t size) 4012181641Skmacy{ 4013181641Skmacy 4014181641Skmacy return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); 4015181641Skmacy} 4016181641Skmacy 4017181641Skmacyvoid 4018181641Skmacypmap_unmapdev(vm_offset_t va, vm_size_t size) 4019181641Skmacy{ 4020181641Skmacy vm_offset_t base, offset, tmpva; 4021181641Skmacy 4022181641Skmacy if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) 4023181641Skmacy return; 4024181641Skmacy base = trunc_page(va); 4025181641Skmacy offset = va & PAGE_MASK; 4026181641Skmacy size = roundup(offset + size, PAGE_SIZE); 4027181641Skmacy critical_enter(); 4028181641Skmacy for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) 4029181641Skmacy pmap_kremove(tmpva); 4030181641Skmacy pmap_invalidate_range(kernel_pmap, va, tmpva); 4031181641Skmacy critical_exit(); 4032181641Skmacy kmem_free(kernel_map, base, size); 4033181641Skmacy} 4034181641Skmacy 4035195774Salc/* 4036195774Salc * Sets the memory attribute for the specified page. 4037195774Salc */ 4038195774Salcvoid 4039195774Salcpmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4040195774Salc{ 4041195949Skib struct sysmaps *sysmaps; 4042195949Skib vm_offset_t sva, eva; 4043195774Salc 4044195774Salc m->md.pat_mode = ma; 4045195949Skib if ((m->flags & PG_FICTITIOUS) != 0) 4046195949Skib return; 4047195774Salc 4048195774Salc /* 4049195774Salc * If "m" is a normal page, flush it from the cache. 4050195949Skib * See pmap_invalidate_cache_range(). 4051195949Skib * 4052195949Skib * First, try to find an existing mapping of the page by sf 4053195949Skib * buffer. sf_buf_invalidate_cache() modifies mapping and 4054195949Skib * flushes the cache. 4055195774Salc */ 4056195949Skib if (sf_buf_invalidate_cache(m)) 4057195949Skib return; 4058195949Skib 4059195949Skib /* 4060195949Skib * If page is not mapped by sf buffer, but CPU does not 4061195949Skib * support self snoop, map the page transient and do 4062195949Skib * invalidation. In the worst case, whole cache is flushed by 4063195949Skib * pmap_invalidate_cache_range(). 4064195949Skib */ 4065195949Skib if ((cpu_feature & (CPUID_SS|CPUID_CLFSH)) == CPUID_CLFSH) { 4066195949Skib sysmaps = &sysmaps_pcpu[PCPU_GET(cpuid)]; 4067195949Skib mtx_lock(&sysmaps->lock); 4068195949Skib if (*sysmaps->CMAP2) 4069195949Skib panic("pmap_page_set_memattr: CMAP2 busy"); 4070195949Skib sched_pin(); 4071195949Skib PT_SET_MA(sysmaps->CADDR2, PG_V | PG_RW | 4072195949Skib xpmap_ptom(VM_PAGE_TO_PHYS(m)) | PG_A | PG_M | 4073195949Skib pmap_cache_bits(m->md.pat_mode, 0)); 4074195949Skib invlcaddr(sysmaps->CADDR2); 4075195949Skib sva = (vm_offset_t)sysmaps->CADDR2; 4076195949Skib eva = sva + PAGE_SIZE; 4077195949Skib } else 4078195949Skib sva = eva = 0; /* gcc */ 4079195949Skib pmap_invalidate_cache_range(sva, eva); 4080195949Skib if (sva != 0) { 4081195949Skib PT_SET_MA(sysmaps->CADDR2, 0); 4082195949Skib sched_unpin(); 4083195949Skib mtx_unlock(&sysmaps->lock); 4084195774Salc } 4085195774Salc} 4086195774Salc 4087181641Skmacyint 4088181641Skmacypmap_change_attr(va, size, mode) 4089181641Skmacy vm_offset_t va; 4090181641Skmacy vm_size_t size; 4091181641Skmacy int mode; 4092181641Skmacy{ 4093181641Skmacy vm_offset_t base, offset, tmpva; 4094181641Skmacy pt_entry_t *pte; 4095181641Skmacy u_int opte, npte; 4096181641Skmacy pd_entry_t *pde; 4097195949Skib boolean_t changed; 4098181641Skmacy 4099181641Skmacy base = trunc_page(va); 4100181641Skmacy offset = va & PAGE_MASK; 4101181641Skmacy size = roundup(offset + size, PAGE_SIZE); 4102181641Skmacy 4103181641Skmacy /* Only supported on kernel virtual addresses. */ 4104181641Skmacy if (base <= VM_MAXUSER_ADDRESS) 4105181641Skmacy return (EINVAL); 4106181641Skmacy 4107181641Skmacy /* 4MB pages and pages that aren't mapped aren't supported. */ 4108181641Skmacy for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { 4109181641Skmacy pde = pmap_pde(kernel_pmap, tmpva); 4110181641Skmacy if (*pde & PG_PS) 4111181641Skmacy return (EINVAL); 4112181641Skmacy if ((*pde & PG_V) == 0) 4113181641Skmacy return (EINVAL); 4114181641Skmacy pte = vtopte(va); 4115181641Skmacy if ((*pte & PG_V) == 0) 4116181641Skmacy return (EINVAL); 4117181641Skmacy } 4118181641Skmacy 4119195949Skib changed = FALSE; 4120195949Skib 4121181641Skmacy /* 4122181641Skmacy * Ok, all the pages exist and are 4k, so run through them updating 4123181641Skmacy * their cache mode. 4124181641Skmacy */ 4125181641Skmacy for (tmpva = base; size > 0; ) { 4126181641Skmacy pte = vtopte(tmpva); 4127181641Skmacy 4128181641Skmacy /* 4129181641Skmacy * The cache mode bits are all in the low 32-bits of the 4130181641Skmacy * PTE, so we can just spin on updating the low 32-bits. 4131181641Skmacy */ 4132181641Skmacy do { 4133181641Skmacy opte = *(u_int *)pte; 4134181641Skmacy npte = opte & ~(PG_PTE_PAT | PG_NC_PCD | PG_NC_PWT); 4135181641Skmacy npte |= pmap_cache_bits(mode, 0); 4136181641Skmacy PT_SET_VA_MA(pte, npte, TRUE); 4137181641Skmacy } while (npte != opte && (*pte != npte)); 4138195949Skib if (npte != opte) 4139195949Skib changed = TRUE; 4140181641Skmacy tmpva += PAGE_SIZE; 4141181641Skmacy size -= PAGE_SIZE; 4142181641Skmacy } 4143181641Skmacy 4144181641Skmacy /* 4145181641Skmacy * Flush CPU caches to make sure any data isn't cached that shouldn't 4146181641Skmacy * be, etc. 4147181641Skmacy */ 4148195949Skib if (changed) { 4149195949Skib pmap_invalidate_range(kernel_pmap, base, tmpva); 4150195949Skib pmap_invalidate_cache_range(base, tmpva); 4151195949Skib } 4152181641Skmacy return (0); 4153181641Skmacy} 4154181641Skmacy 4155181641Skmacy/* 4156181641Skmacy * perform the pmap work for mincore 4157181641Skmacy */ 4158181641Skmacyint 4159208504Salcpmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 4160181641Skmacy{ 4161181641Skmacy pt_entry_t *ptep, pte; 4162208504Salc vm_paddr_t pa; 4163208504Salc int val; 4164181641Skmacy 4165181641Skmacy PMAP_LOCK(pmap); 4166208504Salcretry: 4167181641Skmacy ptep = pmap_pte(pmap, addr); 4168181641Skmacy pte = (ptep != NULL) ? PT_GET(ptep) : 0; 4169181641Skmacy pmap_pte_release(ptep); 4170208504Salc val = 0; 4171208504Salc if ((pte & PG_V) != 0) { 4172208504Salc val |= MINCORE_INCORE; 4173208504Salc if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) 4174208504Salc val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4175208504Salc if ((pte & PG_A) != 0) 4176208504Salc val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4177208504Salc } 4178208504Salc if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 4179208504Salc (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && 4180208504Salc (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { 4181208504Salc pa = pte & PG_FRAME; 4182208504Salc /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 4183208504Salc if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 4184208504Salc goto retry; 4185208504Salc } else 4186208504Salc PA_UNLOCK_COND(*locked_pa); 4187181641Skmacy PMAP_UNLOCK(pmap); 4188208504Salc return (val); 4189181641Skmacy} 4190181641Skmacy 4191181641Skmacyvoid 4192181641Skmacypmap_activate(struct thread *td) 4193181641Skmacy{ 4194181641Skmacy pmap_t pmap, oldpmap; 4195181641Skmacy u_int32_t cr3; 4196181641Skmacy 4197181641Skmacy critical_enter(); 4198181641Skmacy pmap = vmspace_pmap(td->td_proc->p_vmspace); 4199181641Skmacy oldpmap = PCPU_GET(curpmap); 4200181641Skmacy#if defined(SMP) 4201181641Skmacy atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); 4202181641Skmacy atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); 4203181641Skmacy#else 4204181641Skmacy oldpmap->pm_active &= ~1; 4205181641Skmacy pmap->pm_active |= 1; 4206181641Skmacy#endif 4207181641Skmacy#ifdef PAE 4208181641Skmacy cr3 = vtophys(pmap->pm_pdpt); 4209181641Skmacy#else 4210181641Skmacy cr3 = vtophys(pmap->pm_pdir); 4211181641Skmacy#endif 4212181641Skmacy /* 4213181641Skmacy * pmap_activate is for the current thread on the current cpu 4214181641Skmacy */ 4215181641Skmacy td->td_pcb->pcb_cr3 = cr3; 4216181641Skmacy PT_UPDATES_FLUSH(); 4217181641Skmacy load_cr3(cr3); 4218181641Skmacy PCPU_SET(curpmap, pmap); 4219181641Skmacy critical_exit(); 4220181641Skmacy} 4221181641Skmacy 4222198341Smarcelvoid 4223198341Smarcelpmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) 4224198341Smarcel{ 4225198341Smarcel} 4226198341Smarcel 4227181747Skmacy/* 4228181747Skmacy * Increase the starting virtual address of the given mapping if a 4229181747Skmacy * different alignment might result in more superpage mappings. 4230181747Skmacy */ 4231181747Skmacyvoid 4232181747Skmacypmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 4233181747Skmacy vm_offset_t *addr, vm_size_t size) 4234181641Skmacy{ 4235181747Skmacy vm_offset_t superpage_offset; 4236181641Skmacy 4237181747Skmacy if (size < NBPDR) 4238181747Skmacy return; 4239181747Skmacy if (object != NULL && (object->flags & OBJ_COLORED) != 0) 4240181747Skmacy offset += ptoa(object->pg_color); 4241181747Skmacy superpage_offset = offset & PDRMASK; 4242181747Skmacy if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || 4243181747Skmacy (*addr & PDRMASK) == superpage_offset) 4244181747Skmacy return; 4245181747Skmacy if ((*addr & PDRMASK) < superpage_offset) 4246181747Skmacy *addr = (*addr & ~PDRMASK) + superpage_offset; 4247181747Skmacy else 4248181747Skmacy *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; 4249181641Skmacy} 4250181641Skmacy 4251190627Sdfr#ifdef XEN 4252190627Sdfr 4253190627Sdfrvoid 4254190627Sdfrpmap_suspend() 4255190627Sdfr{ 4256190627Sdfr pmap_t pmap; 4257190627Sdfr int i, pdir, offset; 4258190627Sdfr vm_paddr_t pdirma; 4259190627Sdfr mmu_update_t mu[4]; 4260190627Sdfr 4261190627Sdfr /* 4262190627Sdfr * We need to remove the recursive mapping structure from all 4263190627Sdfr * our pmaps so that Xen doesn't get confused when it restores 4264190627Sdfr * the page tables. The recursive map lives at page directory 4265190627Sdfr * index PTDPTDI. We assume that the suspend code has stopped 4266190627Sdfr * the other vcpus (if any). 4267190627Sdfr */ 4268190627Sdfr LIST_FOREACH(pmap, &allpmaps, pm_list) { 4269190627Sdfr for (i = 0; i < 4; i++) { 4270190627Sdfr /* 4271190627Sdfr * Figure out which page directory (L2) page 4272190627Sdfr * contains this bit of the recursive map and 4273190627Sdfr * the offset within that page of the map 4274190627Sdfr * entry 4275190627Sdfr */ 4276190627Sdfr pdir = (PTDPTDI + i) / NPDEPG; 4277190627Sdfr offset = (PTDPTDI + i) % NPDEPG; 4278190627Sdfr pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; 4279190627Sdfr mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); 4280190627Sdfr mu[i].val = 0; 4281190627Sdfr } 4282190627Sdfr HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); 4283190627Sdfr } 4284190627Sdfr} 4285190627Sdfr 4286190627Sdfrvoid 4287190627Sdfrpmap_resume() 4288190627Sdfr{ 4289190627Sdfr pmap_t pmap; 4290190627Sdfr int i, pdir, offset; 4291190627Sdfr vm_paddr_t pdirma; 4292190627Sdfr mmu_update_t mu[4]; 4293190627Sdfr 4294190627Sdfr /* 4295190627Sdfr * Restore the recursive map that we removed on suspend. 4296190627Sdfr */ 4297190627Sdfr LIST_FOREACH(pmap, &allpmaps, pm_list) { 4298190627Sdfr for (i = 0; i < 4; i++) { 4299190627Sdfr /* 4300190627Sdfr * Figure out which page directory (L2) page 4301190627Sdfr * contains this bit of the recursive map and 4302190627Sdfr * the offset within that page of the map 4303190627Sdfr * entry 4304190627Sdfr */ 4305190627Sdfr pdir = (PTDPTDI + i) / NPDEPG; 4306190627Sdfr offset = (PTDPTDI + i) % NPDEPG; 4307190627Sdfr pdirma = pmap->pm_pdpt[pdir] & PG_FRAME; 4308190627Sdfr mu[i].ptr = pdirma + offset * sizeof(pd_entry_t); 4309190627Sdfr mu[i].val = (pmap->pm_pdpt[i] & PG_FRAME) | PG_V; 4310190627Sdfr } 4311190627Sdfr HYPERVISOR_mmu_update(mu, 4, NULL, DOMID_SELF); 4312190627Sdfr } 4313190627Sdfr} 4314190627Sdfr 4315190627Sdfr#endif 4316190627Sdfr 4317181641Skmacy#if defined(PMAP_DEBUG) 4318181641Skmacypmap_pid_dump(int pid) 4319181641Skmacy{ 4320181641Skmacy pmap_t pmap; 4321181641Skmacy struct proc *p; 4322181641Skmacy int npte = 0; 4323181641Skmacy int index; 4324181641Skmacy 4325181641Skmacy sx_slock(&allproc_lock); 4326181641Skmacy FOREACH_PROC_IN_SYSTEM(p) { 4327181641Skmacy if (p->p_pid != pid) 4328181641Skmacy continue; 4329181641Skmacy 4330181641Skmacy if (p->p_vmspace) { 4331181641Skmacy int i,j; 4332181641Skmacy index = 0; 4333181641Skmacy pmap = vmspace_pmap(p->p_vmspace); 4334181641Skmacy for (i = 0; i < NPDEPTD; i++) { 4335181641Skmacy pd_entry_t *pde; 4336181641Skmacy pt_entry_t *pte; 4337181641Skmacy vm_offset_t base = i << PDRSHIFT; 4338181641Skmacy 4339181641Skmacy pde = &pmap->pm_pdir[i]; 4340181641Skmacy if (pde && pmap_pde_v(pde)) { 4341181641Skmacy for (j = 0; j < NPTEPG; j++) { 4342181641Skmacy vm_offset_t va = base + (j << PAGE_SHIFT); 4343181641Skmacy if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 4344181641Skmacy if (index) { 4345181641Skmacy index = 0; 4346181641Skmacy printf("\n"); 4347181641Skmacy } 4348181641Skmacy sx_sunlock(&allproc_lock); 4349181641Skmacy return npte; 4350181641Skmacy } 4351181641Skmacy pte = pmap_pte(pmap, va); 4352181641Skmacy if (pte && pmap_pte_v(pte)) { 4353181641Skmacy pt_entry_t pa; 4354181641Skmacy vm_page_t m; 4355181641Skmacy pa = PT_GET(pte); 4356181641Skmacy m = PHYS_TO_VM_PAGE(pa & PG_FRAME); 4357181641Skmacy printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 4358181641Skmacy va, pa, m->hold_count, m->wire_count, m->flags); 4359181641Skmacy npte++; 4360181641Skmacy index++; 4361181641Skmacy if (index >= 2) { 4362181641Skmacy index = 0; 4363181641Skmacy printf("\n"); 4364181641Skmacy } else { 4365181641Skmacy printf(" "); 4366181641Skmacy } 4367181641Skmacy } 4368181641Skmacy } 4369181641Skmacy } 4370181641Skmacy } 4371181641Skmacy } 4372181641Skmacy } 4373181641Skmacy sx_sunlock(&allproc_lock); 4374181641Skmacy return npte; 4375181641Skmacy} 4376181641Skmacy#endif 4377181641Skmacy 4378181641Skmacy#if defined(DEBUG) 4379181641Skmacy 4380181641Skmacystatic void pads(pmap_t pm); 4381181641Skmacyvoid pmap_pvdump(vm_paddr_t pa); 4382181641Skmacy 4383181641Skmacy/* print address space of pmap*/ 4384181641Skmacystatic void 4385181641Skmacypads(pmap_t pm) 4386181641Skmacy{ 4387181641Skmacy int i, j; 4388181641Skmacy vm_paddr_t va; 4389181641Skmacy pt_entry_t *ptep; 4390181641Skmacy 4391181641Skmacy if (pm == kernel_pmap) 4392181641Skmacy return; 4393181641Skmacy for (i = 0; i < NPDEPTD; i++) 4394181641Skmacy if (pm->pm_pdir[i]) 4395181641Skmacy for (j = 0; j < NPTEPG; j++) { 4396181641Skmacy va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 4397181641Skmacy if (pm == kernel_pmap && va < KERNBASE) 4398181641Skmacy continue; 4399181641Skmacy if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 4400181641Skmacy continue; 4401181641Skmacy ptep = pmap_pte(pm, va); 4402181641Skmacy if (pmap_pte_v(ptep)) 4403181641Skmacy printf("%x:%x ", va, *ptep); 4404181641Skmacy }; 4405181641Skmacy 4406181641Skmacy} 4407181641Skmacy 4408181641Skmacyvoid 4409181641Skmacypmap_pvdump(vm_paddr_t pa) 4410181641Skmacy{ 4411181641Skmacy pv_entry_t pv; 4412181641Skmacy pmap_t pmap; 4413181641Skmacy vm_page_t m; 4414181641Skmacy 4415181641Skmacy printf("pa %x", pa); 4416181641Skmacy m = PHYS_TO_VM_PAGE(pa); 4417181641Skmacy TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 4418181641Skmacy pmap = PV_PMAP(pv); 4419181641Skmacy printf(" -> pmap %p, va %x", (void *)pmap, pv->pv_va); 4420181641Skmacy pads(pmap); 4421181641Skmacy } 4422181641Skmacy printf(" "); 4423181641Skmacy} 4424181641Skmacy#endif 4425