pmap.c revision 130539
164562Sgshapiro/*- 264562Sgshapiro * Copyright (c) 1991 Regents of the University of California. 394334Sgshapiro * All rights reserved. 490792Sgshapiro * Copyright (c) 1994 John S. Dyson 564562Sgshapiro * All rights reserved. 694334Sgshapiro * Copyright (c) 1994 David Greenman 764562Sgshapiro * All rights reserved. 864562Sgshapiro * 964562Sgshapiro * This code is derived from software contributed to Berkeley by 1064562Sgshapiro * the Systems Programming Group of the University of Utah Computer 1164562Sgshapiro * Science Department and William Jolitz of UUNET Technologies Inc. 1264562Sgshapiro * 1364562Sgshapiro * Redistribution and use in source and binary forms, with or without 1464562Sgshapiro * modification, are permitted provided that the following conditions 1564562Sgshapiro * are met: 1664562Sgshapiro * 1. Redistributions of source code must retain the above copyright 1764562Sgshapiro * notice, this list of conditions and the following disclaimer. 1864562Sgshapiro * 2. Redistributions in binary form must reproduce the above copyright 1964562Sgshapiro * notice, this list of conditions and the following disclaimer in the 2064562Sgshapiro * documentation and/or other materials provided with the distribution. 2194334Sgshapiro * 3. All advertising materials mentioning features or use of this software 2294334Sgshapiro * must display the following acknowledgement: 2394334Sgshapiro * This product includes software developed by the University of 2494334Sgshapiro * California, Berkeley and its contributors. 2594334Sgshapiro * 4. Neither the name of the University nor the names of its contributors 2664562Sgshapiro * may be used to endorse or promote products derived from this software 2764562Sgshapiro * without specific prior written permission. 2864562Sgshapiro * 2964562Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 3064562Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 3164562Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3264562Sgshapiro * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 3364562Sgshapiro * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3464562Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3564562Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3664562Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3764562Sgshapiro * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3864562Sgshapiro * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3964562Sgshapiro * SUCH DAMAGE. 4064562Sgshapiro * 4164562Sgshapiro * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 4264562Sgshapiro */ 4364562Sgshapiro/*- 4464562Sgshapiro * Copyright (c) 2003 Networks Associates Technology, Inc. 4564562Sgshapiro * All rights reserved. 4664562Sgshapiro * 4764562Sgshapiro * This software was developed for the FreeBSD Project by Jake Burkholder, 4864562Sgshapiro * Safeport Network Services, and Network Associates Laboratories, the 4964562Sgshapiro * Security Research Division of Network Associates, Inc. under 5064562Sgshapiro * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 5190792Sgshapiro * CHATS research program. 5264562Sgshapiro * 5364562Sgshapiro * Redistribution and use in source and binary forms, with or without 5464562Sgshapiro * modification, are permitted provided that the following conditions 5564562Sgshapiro * are met: 5664562Sgshapiro * 1. Redistributions of source code must retain the above copyright 5764562Sgshapiro * notice, this list of conditions and the following disclaimer. 5864562Sgshapiro * 2. Redistributions in binary form must reproduce the above copyright 5964562Sgshapiro * notice, this list of conditions and the following disclaimer in the 6064562Sgshapiro * documentation and/or other materials provided with the distribution. 6164562Sgshapiro * 6264562Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 6364562Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 6464562Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 6564562Sgshapiro * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 6664562Sgshapiro * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 6764562Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 6864562Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 6990792Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 7094334Sgshapiro * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 7164562Sgshapiro * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 7264562Sgshapiro * SUCH DAMAGE. 7390792Sgshapiro */ 7464562Sgshapiro 7564562Sgshapiro#include <sys/cdefs.h> 7664562Sgshapiro__FBSDID("$FreeBSD: head/sys/i386/i386/pmap.c 130539 2004-06-15 19:28:40Z alc $"); 7764562Sgshapiro 7864562Sgshapiro/* 7964562Sgshapiro * Manages physical address maps. 8064562Sgshapiro * 8164562Sgshapiro * In addition to hardware address maps, this 8264562Sgshapiro * module is called upon to provide software-use-only 8364562Sgshapiro * maps which may or may not be stored in the same 8464562Sgshapiro * form as hardware maps. These pseudo-maps are 8564562Sgshapiro * used to store intermediate results from copy 8664562Sgshapiro * operations to and from address spaces. 8764562Sgshapiro * 8864562Sgshapiro * Since the information managed by this module is 8964562Sgshapiro * also stored by the logical address mapping module, 9064562Sgshapiro * this module may throw away valid virtual-to-physical 9164562Sgshapiro * mappings at almost any time. However, invalidations 9264562Sgshapiro * of virtual-to-physical mappings must be done as 9364562Sgshapiro * requested. 9464562Sgshapiro * 9564562Sgshapiro * In order to cope with hardware architectures which 9664562Sgshapiro * make virtual-to-physical map invalidates expensive, 9764562Sgshapiro * this module may delay invalidate or reduced protection 9864562Sgshapiro * operations until such time as they are actually 9964562Sgshapiro * necessary. This module is given full information as 10064562Sgshapiro * to which processors are currently using which maps, 10164562Sgshapiro * and to when physical maps must be made correct. 10264562Sgshapiro */ 10364562Sgshapiro 10464562Sgshapiro#include "opt_cpu.h" 10564562Sgshapiro#include "opt_pmap.h" 10664562Sgshapiro#include "opt_msgbuf.h" 10764562Sgshapiro#include "opt_kstack_pages.h" 10864562Sgshapiro 10964562Sgshapiro#include <sys/param.h> 11064562Sgshapiro#include <sys/systm.h> 11164562Sgshapiro#include <sys/kernel.h> 11264562Sgshapiro#include <sys/lock.h> 11364562Sgshapiro#include <sys/mman.h> 11464562Sgshapiro#include <sys/msgbuf.h> 11564562Sgshapiro#include <sys/mutex.h> 11664562Sgshapiro#include <sys/proc.h> 11764562Sgshapiro#include <sys/sx.h> 11864562Sgshapiro#include <sys/user.h> 11964562Sgshapiro#include <sys/vmmeter.h> 12064562Sgshapiro#include <sys/sched.h> 12164562Sgshapiro#include <sys/sysctl.h> 12264562Sgshapiro#ifdef SMP 12390792Sgshapiro#include <sys/smp.h> 12490792Sgshapiro#endif 12590792Sgshapiro 12690792Sgshapiro#include <vm/vm.h> 12790792Sgshapiro#include <vm/vm_param.h> 12890792Sgshapiro#include <vm/vm_kern.h> 12990792Sgshapiro#include <vm/vm_page.h> 13090792Sgshapiro#include <vm/vm_map.h> 13190792Sgshapiro#include <vm/vm_object.h> 13264562Sgshapiro#include <vm/vm_extern.h> 13364562Sgshapiro#include <vm/vm_pageout.h> 13464562Sgshapiro#include <vm/vm_pager.h> 13564562Sgshapiro#include <vm/uma.h> 13664562Sgshapiro 13764562Sgshapiro#include <machine/cpu.h> 13864562Sgshapiro#include <machine/cputypes.h> 13964562Sgshapiro#include <machine/md_var.h> 14064562Sgshapiro#include <machine/specialreg.h> 14164562Sgshapiro#ifdef SMP 14264562Sgshapiro#include <machine/smp.h> 14364562Sgshapiro#endif 14464562Sgshapiro 14590792Sgshapiro#if !defined(CPU_ENABLE_SSE) && defined(I686_CPU) 14690792Sgshapiro#define CPU_ENABLE_SSE 14790792Sgshapiro#endif 14890792Sgshapiro#if defined(CPU_DISABLE_SSE) 14990792Sgshapiro#undef CPU_ENABLE_SSE 15090792Sgshapiro#endif 15190792Sgshapiro 15290792Sgshapiro#define PMAP_KEEP_PDIRS 15390792Sgshapiro#ifndef PMAP_SHPGPERPROC 15490792Sgshapiro#define PMAP_SHPGPERPROC 200 15590792Sgshapiro#endif 15694334Sgshapiro 15794334Sgshapiro#if defined(DIAGNOSTIC) 15894334Sgshapiro#define PMAP_DIAGNOSTIC 15994334Sgshapiro#endif 16090792Sgshapiro 16190792Sgshapiro#define MINPV 2048 16290792Sgshapiro 16390792Sgshapiro#if !defined(PMAP_DIAGNOSTIC) 16490792Sgshapiro#define PMAP_INLINE __inline 16590792Sgshapiro#else 16690792Sgshapiro#define PMAP_INLINE 16790792Sgshapiro#endif 16890792Sgshapiro 16990792Sgshapiro/* 17094334Sgshapiro * Get PDEs and PTEs for user/kernel address space 17194334Sgshapiro */ 17294334Sgshapiro#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT])) 17394334Sgshapiro#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT]) 17494334Sgshapiro 17590792Sgshapiro#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0) 17694334Sgshapiro#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0) 17794334Sgshapiro#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0) 17894334Sgshapiro#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0) 17994334Sgshapiro#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0) 18094334Sgshapiro 18194334Sgshapiro#define pmap_pte_set_w(pte, v) ((v) ? atomic_set_int((u_int *)(pte), PG_W) : \ 18294334Sgshapiro atomic_clear_int((u_int *)(pte), PG_W)) 18394334Sgshapiro#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v))) 18494334Sgshapiro 18594334Sgshapirostruct pmap kernel_pmap_store; 18694334SgshapiroLIST_HEAD(pmaplist, pmap); 18794334Sgshapirostatic struct pmaplist allpmaps; 18894334Sgshapirostatic struct mtx allpmaps_lock; 18994334Sgshapiro#ifdef SMP 19094334Sgshapirostatic struct mtx lazypmap_lock; 19190792Sgshapiro#endif 19290792Sgshapiro 19390792Sgshapirovm_paddr_t avail_end; /* PA of last available physical page */ 19490792Sgshapirovm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 19590792Sgshapirovm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 19690792Sgshapirostatic boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 19790792Sgshapiroint pgeflag = 0; /* PG_G or-in */ 19890792Sgshapiroint pseflag = 0; /* PG_PS or-in */ 19964562Sgshapiro 20064562Sgshapirostatic int nkpt; 20164562Sgshapirovm_offset_t kernel_vm_end; 20264562Sgshapiroextern u_int32_t KERNend; 20364562Sgshapiro 20464562Sgshapiro#ifdef PAE 20564562Sgshapirostatic uma_zone_t pdptzone; 20671345Sgshapiro#endif 20764562Sgshapiro 20864562Sgshapiro/* 20964562Sgshapiro * Data for the pv entry allocation mechanism 21064562Sgshapiro */ 21164562Sgshapirostatic uma_zone_t pvzone; 21264562Sgshapirostatic struct vm_object pvzone_obj; 21364562Sgshapirostatic int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 21464562Sgshapiroint pmap_pagedaemon_waken; 21564562Sgshapiro 21664562Sgshapiro/* 21764562Sgshapiro * All those kernel PT submaps that BSD is so fond of 21864562Sgshapiro */ 21964562Sgshapiropt_entry_t *CMAP1 = 0; 22064562Sgshapirostatic pt_entry_t *CMAP2, *CMAP3, *ptmmap; 22164562Sgshapirocaddr_t CADDR1 = 0, ptvmmap = 0; 22264562Sgshapirostatic caddr_t CADDR2, CADDR3; 22364562Sgshapirostatic struct mtx CMAPCADDR12_lock; 22464562Sgshapirostatic pt_entry_t *msgbufmap; 22564562Sgshapirostruct msgbuf *msgbufp = 0; 22664562Sgshapiro 22764562Sgshapiro/* 22864562Sgshapiro * Crashdump maps. 22964562Sgshapiro */ 23090792Sgshapirostatic pt_entry_t *pt_crashdumpmap; 23190792Sgshapirostatic caddr_t crashdumpmap; 23290792Sgshapiro 23390792Sgshapiro#ifdef SMP 23490792Sgshapiroextern pt_entry_t *SMPpt; 23590792Sgshapiro#endif 23690792Sgshapirostatic pt_entry_t *PMAP1 = 0, *PMAP2; 23790792Sgshapirostatic pt_entry_t *PADDR1 = 0, *PADDR2; 23890792Sgshapiro#ifdef SMP 23990792Sgshapirostatic int PMAP1cpu; 24090792Sgshapirostatic int PMAP1changedcpu; 24190792SgshapiroSYSCTL_INT(_debug, OID_AUTO, PMAP1changedcpu, CTLFLAG_RD, 24264562Sgshapiro &PMAP1changedcpu, 0, 24364562Sgshapiro "Number of times pmap_pte_quick changed CPU with same PMAP1"); 24464562Sgshapiro#endif 24564562Sgshapirostatic int PMAP1changed; 24664562SgshapiroSYSCTL_INT(_debug, OID_AUTO, PMAP1changed, CTLFLAG_RD, 24764562Sgshapiro &PMAP1changed, 0, 24864562Sgshapiro "Number of times pmap_pte_quick changed PMAP1"); 24964562Sgshapirostatic int PMAP1unchanged; 25064562SgshapiroSYSCTL_INT(_debug, OID_AUTO, PMAP1unchanged, CTLFLAG_RD, 25164562Sgshapiro &PMAP1unchanged, 0, 25264562Sgshapiro "Number of times pmap_pte_quick didn't change PMAP1"); 25364562Sgshapiro 25464562Sgshapirostatic PMAP_INLINE void free_pv_entry(pv_entry_t pv); 25564562Sgshapirostatic pv_entry_t get_pv_entry(void); 25664562Sgshapirostatic void pmap_clear_ptes(vm_page_t m, int bit) 25764562Sgshapiro __always_inline; 25864562Sgshapiro 25964562Sgshapirostatic int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva); 26064562Sgshapirostatic void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 26164562Sgshapirostatic int pmap_remove_entry(struct pmap *pmap, vm_page_t m, 26264562Sgshapiro vm_offset_t va); 26364562Sgshapirostatic void pmap_insert_entry(pmap_t pmap, vm_offset_t va, 26464562Sgshapiro vm_page_t mpte, vm_page_t m); 26564562Sgshapiro 26664562Sgshapirostatic vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va); 26764562Sgshapiro 26864562Sgshapirostatic vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex); 26964562Sgshapirostatic pt_entry_t *pmap_pte_quick(pmap_t pmap, vm_offset_t va); 27064562Sgshapirostatic int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); 27164562Sgshapirostatic vm_offset_t pmap_kmem_choose(vm_offset_t addr); 27264562Sgshapiro#ifdef PAE 27364562Sgshapirostatic void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 27464562Sgshapiro#endif 27564562Sgshapiro 27664562SgshapiroCTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 27764562SgshapiroCTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 27864562Sgshapiro 27964562Sgshapiro/* 28064562Sgshapiro * Move the kernel virtual free pointer to the next 28164562Sgshapiro * 4MB. This is used to help improve performance 28290792Sgshapiro * by using a large (4MB) page for much of the kernel 28364562Sgshapiro * (.text, .data, .bss) 28464562Sgshapiro */ 28590792Sgshapirostatic vm_offset_t 28664562Sgshapiropmap_kmem_choose(vm_offset_t addr) 28764562Sgshapiro{ 28864562Sgshapiro vm_offset_t newaddr = addr; 28964562Sgshapiro 29064562Sgshapiro#ifndef DISABLE_PSE 29164562Sgshapiro if (cpu_feature & CPUID_PSE) 29264562Sgshapiro newaddr = (addr + PDRMASK) & ~PDRMASK; 29364562Sgshapiro#endif 29490792Sgshapiro return newaddr; 29564562Sgshapiro} 29664562Sgshapiro 29764562Sgshapiro/* 29864562Sgshapiro * Bootstrap the system enough to run with virtual memory. 29964562Sgshapiro * 30090792Sgshapiro * On the i386 this is called after mapping has already been enabled 30190792Sgshapiro * and just syncs the pmap module with what has already been done. 30290792Sgshapiro * [We can't call it easily with mapping off since the kernel is not 30390792Sgshapiro * mapped with PA == VA, hence we would have to relocate every address 30464562Sgshapiro * from the linked base (virtual) address "KERNBASE" to the actual 30564562Sgshapiro * (physical) address starting relative to 0] 30664562Sgshapiro */ 30764562Sgshapirovoid 30864562Sgshapiropmap_bootstrap(firstaddr, loadaddr) 30964562Sgshapiro vm_paddr_t firstaddr; 31064562Sgshapiro vm_paddr_t loadaddr; 31164562Sgshapiro{ 31264562Sgshapiro vm_offset_t va; 31364562Sgshapiro pt_entry_t *pte; 31464562Sgshapiro int i; 31564562Sgshapiro 31664562Sgshapiro /* 31764562Sgshapiro * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too 31864562Sgshapiro * large. It should instead be correctly calculated in locore.s and 31964562Sgshapiro * not based on 'first' (which is a physical address, not a virtual 32064562Sgshapiro * address, for the start of unused physical memory). The kernel 32164562Sgshapiro * page tables are NOT double mapped and thus should not be included 32264562Sgshapiro * in this calculation. 32364562Sgshapiro */ 32464562Sgshapiro virtual_avail = (vm_offset_t) KERNBASE + firstaddr; 32564562Sgshapiro virtual_avail = pmap_kmem_choose(virtual_avail); 32664562Sgshapiro 32764562Sgshapiro virtual_end = VM_MAX_KERNEL_ADDRESS; 32864562Sgshapiro 32964562Sgshapiro /* 33064562Sgshapiro * Initialize the kernel pmap (which is statically allocated). 33164562Sgshapiro */ 33264562Sgshapiro kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD); 33364562Sgshapiro#ifdef PAE 33464562Sgshapiro kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); 33564562Sgshapiro#endif 33664562Sgshapiro kernel_pmap->pm_active = -1; /* don't allow deactivation */ 33764562Sgshapiro TAILQ_INIT(&kernel_pmap->pm_pvlist); 33864562Sgshapiro LIST_INIT(&allpmaps); 33964562Sgshapiro#ifdef SMP 34064562Sgshapiro mtx_init(&lazypmap_lock, "lazypmap", NULL, MTX_SPIN); 34164562Sgshapiro#endif 34264562Sgshapiro mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 34364562Sgshapiro mtx_lock_spin(&allpmaps_lock); 34464562Sgshapiro LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 34564562Sgshapiro mtx_unlock_spin(&allpmaps_lock); 34664562Sgshapiro nkpt = NKPT; 34764562Sgshapiro 34864562Sgshapiro /* 34964562Sgshapiro * Reserve some special page table entries/VA space for temporary 35064562Sgshapiro * mapping of pages. 35164562Sgshapiro */ 35264562Sgshapiro#define SYSMAP(c, p, v, n) \ 35364562Sgshapiro v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 35464562Sgshapiro 35564562Sgshapiro va = virtual_avail; 35664562Sgshapiro pte = vtopte(va); 35764562Sgshapiro 35864562Sgshapiro /* 35964562Sgshapiro * CMAP1/CMAP2 are used for zeroing and copying pages. 36064562Sgshapiro * CMAP3 is used for the idle process page zeroing. 36164562Sgshapiro */ 36264562Sgshapiro SYSMAP(caddr_t, CMAP1, CADDR1, 1) 36364562Sgshapiro SYSMAP(caddr_t, CMAP2, CADDR2, 1) 36464562Sgshapiro SYSMAP(caddr_t, CMAP3, CADDR3, 1) 36564562Sgshapiro *CMAP3 = 0; 36664562Sgshapiro 36764562Sgshapiro mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF); 36864562Sgshapiro 36964562Sgshapiro /* 37064562Sgshapiro * Crashdump maps. 37164562Sgshapiro */ 37264562Sgshapiro SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 37364562Sgshapiro 37464562Sgshapiro /* 37564562Sgshapiro * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 37664562Sgshapiro * XXX ptmmap is not used. 37764562Sgshapiro */ 37864562Sgshapiro SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 37964562Sgshapiro 38064562Sgshapiro /* 38164562Sgshapiro * msgbufp is used to map the system message buffer. 38264562Sgshapiro * XXX msgbufmap is not used. 38364562Sgshapiro */ 38464562Sgshapiro SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 38564562Sgshapiro atop(round_page(MSGBUF_SIZE))) 38664562Sgshapiro 38764562Sgshapiro /* 38864562Sgshapiro * ptemap is used for pmap_pte_quick 38964562Sgshapiro */ 39064562Sgshapiro SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1); 39164562Sgshapiro SYSMAP(pt_entry_t *, PMAP2, PADDR2, 1); 39264562Sgshapiro 39364562Sgshapiro virtual_avail = va; 39464562Sgshapiro 39564562Sgshapiro *CMAP1 = *CMAP2 = 0; 39664562Sgshapiro for (i = 0; i < NKPT; i++) 39764562Sgshapiro PTD[i] = 0; 39864562Sgshapiro 39964562Sgshapiro /* Turn on PG_G on kernel page(s) */ 40064562Sgshapiro pmap_set_pg(); 40164562Sgshapiro} 40264562Sgshapiro 40364562Sgshapiro/* 40464562Sgshapiro * Set PG_G on kernel pages. Only the BSP calls this when SMP is turned on. 40564562Sgshapiro */ 40664562Sgshapirovoid 40764562Sgshapiropmap_set_pg(void) 40864562Sgshapiro{ 40964562Sgshapiro pd_entry_t pdir; 41064562Sgshapiro pt_entry_t *pte; 41164562Sgshapiro vm_offset_t va, endva; 41264562Sgshapiro int i; 41364562Sgshapiro 41464562Sgshapiro if (pgeflag == 0) 41564562Sgshapiro return; 41664562Sgshapiro 41764562Sgshapiro i = KERNLOAD/NBPDR; 41864562Sgshapiro endva = KERNBASE + KERNend; 41964562Sgshapiro 42064562Sgshapiro if (pseflag) { 42164562Sgshapiro va = KERNBASE + KERNLOAD; 42264562Sgshapiro while (va < endva) { 42364562Sgshapiro pdir = kernel_pmap->pm_pdir[KPTDI+i]; 42464562Sgshapiro pdir |= pgeflag; 42564562Sgshapiro kernel_pmap->pm_pdir[KPTDI+i] = PTD[KPTDI+i] = pdir; 42664562Sgshapiro invltlb(); /* Play it safe, invltlb() every time */ 42764562Sgshapiro i++; 42864562Sgshapiro va += NBPDR; 42964562Sgshapiro } 43064562Sgshapiro } else { 43164562Sgshapiro va = (vm_offset_t)btext; 43264562Sgshapiro while (va < endva) { 43364562Sgshapiro pte = vtopte(va); 43464562Sgshapiro if (*pte) 43564562Sgshapiro *pte |= pgeflag; 43664562Sgshapiro invltlb(); /* Play it safe, invltlb() every time */ 43764562Sgshapiro va += PAGE_SIZE; 43864562Sgshapiro } 43964562Sgshapiro } 44064562Sgshapiro} 44164562Sgshapiro 44264562Sgshapiro#ifdef PAE 44364562Sgshapirostatic void * 44464562Sgshapiropmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 44564562Sgshapiro{ 44664562Sgshapiro *flags = UMA_SLAB_PRIV; 44764562Sgshapiro return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0)); 44864562Sgshapiro} 44964562Sgshapiro#endif 45064562Sgshapiro 45164562Sgshapiro/* 45264562Sgshapiro * Initialize the pmap module. 45364562Sgshapiro * Called by vm_init, to initialize any structures that the pmap 45464562Sgshapiro * system needs to map virtual memory. 45564562Sgshapiro * pmap_init has been enhanced to support in a fairly consistant 45664562Sgshapiro * way, discontiguous physical memory. 45764562Sgshapiro */ 45864562Sgshapirovoid 45990792Sgshapiropmap_init(void) 46064562Sgshapiro{ 46164562Sgshapiro int i; 46264562Sgshapiro 46364562Sgshapiro /* 46464562Sgshapiro * Allocate memory for random pmap data structures. Includes the 46564562Sgshapiro * pv_head_table. 46664562Sgshapiro */ 46764562Sgshapiro 46864562Sgshapiro for(i = 0; i < vm_page_array_size; i++) { 46964562Sgshapiro vm_page_t m; 47064562Sgshapiro 47164562Sgshapiro m = &vm_page_array[i]; 47264562Sgshapiro TAILQ_INIT(&m->md.pv_list); 47364562Sgshapiro m->md.pv_list_count = 0; 47464562Sgshapiro } 47564562Sgshapiro 47664562Sgshapiro /* 47764562Sgshapiro * init the pv free list 47864562Sgshapiro */ 47964562Sgshapiro pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 48064562Sgshapiro NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 48164562Sgshapiro uma_prealloc(pvzone, MINPV); 48264562Sgshapiro 48364562Sgshapiro#ifdef PAE 48490792Sgshapiro pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL, 48564562Sgshapiro NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 48664562Sgshapiro UMA_ZONE_VM | UMA_ZONE_NOFREE); 48764562Sgshapiro uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf); 48890792Sgshapiro#endif 48964562Sgshapiro 49064562Sgshapiro /* 49164562Sgshapiro * Now it is safe to enable pv_table recording. 49264562Sgshapiro */ 49364562Sgshapiro pmap_initialized = TRUE; 49464562Sgshapiro} 49564562Sgshapiro 49664562Sgshapiro/* 49790792Sgshapiro * Initialize the address space (zone) for the pv_entries. Set a 49890792Sgshapiro * high water mark so that the system can recover from excessive 49964562Sgshapiro * numbers of pv entries. 50064562Sgshapiro */ 50164562Sgshapirovoid 50290792Sgshapiropmap_init2() 50364562Sgshapiro{ 50490792Sgshapiro int shpgperproc = PMAP_SHPGPERPROC; 50564562Sgshapiro 50664562Sgshapiro TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 50764562Sgshapiro pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 50864562Sgshapiro TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 50964562Sgshapiro pv_entry_high_water = 9 * (pv_entry_max / 10); 51064562Sgshapiro uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 51164562Sgshapiro} 51264562Sgshapiro 51364562Sgshapiro 51464562Sgshapiro/*************************************************** 51564562Sgshapiro * Low level helper routines..... 51664562Sgshapiro ***************************************************/ 51764562Sgshapiro 51864562Sgshapiro#if defined(PMAP_DIAGNOSTIC) 51964562Sgshapiro 52064562Sgshapiro/* 52164562Sgshapiro * This code checks for non-writeable/modified pages. 52264562Sgshapiro * This should be an invalid condition. 52364562Sgshapiro */ 52464562Sgshapirostatic int 52564562Sgshapiropmap_nw_modified(pt_entry_t ptea) 52664562Sgshapiro{ 52764562Sgshapiro int pte; 52864562Sgshapiro 52964562Sgshapiro pte = (int) ptea; 53064562Sgshapiro 53164562Sgshapiro if ((pte & (PG_M|PG_RW)) == PG_M) 53264562Sgshapiro return 1; 53364562Sgshapiro else 53464562Sgshapiro return 0; 53564562Sgshapiro} 53664562Sgshapiro#endif 53764562Sgshapiro 53864562Sgshapiro 53964562Sgshapiro/* 54064562Sgshapiro * this routine defines the region(s) of memory that should 54164562Sgshapiro * not be tested for the modified bit. 54264562Sgshapiro */ 54364562Sgshapirostatic PMAP_INLINE int 54464562Sgshapiropmap_track_modified(vm_offset_t va) 54564562Sgshapiro{ 54664562Sgshapiro if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 54764562Sgshapiro return 1; 54864562Sgshapiro else 54964562Sgshapiro return 0; 55064562Sgshapiro} 55164562Sgshapiro 55264562Sgshapiro#ifdef I386_CPU 55364562Sgshapiro/* 55464562Sgshapiro * i386 only has "invalidate everything" and no SMP to worry about. 55564562Sgshapiro */ 55664562SgshapiroPMAP_INLINE void 55764562Sgshapiropmap_invalidate_page(pmap_t pmap, vm_offset_t va) 55864562Sgshapiro{ 55964562Sgshapiro 56064562Sgshapiro if (pmap == kernel_pmap || pmap->pm_active) 56164562Sgshapiro invltlb(); 56264562Sgshapiro} 56364562Sgshapiro 56464562SgshapiroPMAP_INLINE void 56564562Sgshapiropmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 56664562Sgshapiro{ 56764562Sgshapiro 56864562Sgshapiro if (pmap == kernel_pmap || pmap->pm_active) 56964562Sgshapiro invltlb(); 57064562Sgshapiro} 57164562Sgshapiro 57264562SgshapiroPMAP_INLINE void 57364562Sgshapiropmap_invalidate_all(pmap_t pmap) 57464562Sgshapiro{ 57590792Sgshapiro 57664562Sgshapiro if (pmap == kernel_pmap || pmap->pm_active) 57764562Sgshapiro invltlb(); 57864562Sgshapiro} 57964562Sgshapiro#else /* !I386_CPU */ 58064562Sgshapiro#ifdef SMP 58164562Sgshapiro/* 58264562Sgshapiro * For SMP, these functions have to use the IPI mechanism for coherence. 58364562Sgshapiro */ 58464562Sgshapirovoid 58564562Sgshapiropmap_invalidate_page(pmap_t pmap, vm_offset_t va) 58664562Sgshapiro{ 58764562Sgshapiro u_int cpumask; 58864562Sgshapiro u_int other_cpus; 58964562Sgshapiro 59064562Sgshapiro if (smp_started) { 59164562Sgshapiro if (!(read_eflags() & PSL_I)) 59264562Sgshapiro panic("%s: interrupts disabled", __func__); 59364562Sgshapiro mtx_lock_spin(&smp_tlb_mtx); 59464562Sgshapiro } else 59564562Sgshapiro critical_enter(); 59664562Sgshapiro /* 59764562Sgshapiro * We need to disable interrupt preemption but MUST NOT have 59864562Sgshapiro * interrupts disabled here. 59964562Sgshapiro * XXX we may need to hold schedlock to get a coherent pm_active 60064562Sgshapiro * XXX critical sections disable interrupts again 60164562Sgshapiro */ 60264562Sgshapiro if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 60364562Sgshapiro invlpg(va); 60464562Sgshapiro smp_invlpg(va); 60564562Sgshapiro } else { 60664562Sgshapiro cpumask = PCPU_GET(cpumask); 60764562Sgshapiro other_cpus = PCPU_GET(other_cpus); 60864562Sgshapiro if (pmap->pm_active & cpumask) 60964562Sgshapiro invlpg(va); 61064562Sgshapiro if (pmap->pm_active & other_cpus) 61164562Sgshapiro smp_masked_invlpg(pmap->pm_active & other_cpus, va); 61264562Sgshapiro } 61364562Sgshapiro if (smp_started) 61464562Sgshapiro mtx_unlock_spin(&smp_tlb_mtx); 61564562Sgshapiro else 61664562Sgshapiro critical_exit(); 61764562Sgshapiro} 61864562Sgshapiro 61964562Sgshapirovoid 62064562Sgshapiropmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 62164562Sgshapiro{ 62264562Sgshapiro u_int cpumask; 62364562Sgshapiro u_int other_cpus; 62464562Sgshapiro vm_offset_t addr; 62564562Sgshapiro 62664562Sgshapiro if (smp_started) { 62764562Sgshapiro if (!(read_eflags() & PSL_I)) 62864562Sgshapiro panic("%s: interrupts disabled", __func__); 62964562Sgshapiro mtx_lock_spin(&smp_tlb_mtx); 63064562Sgshapiro } else 63164562Sgshapiro critical_enter(); 63264562Sgshapiro /* 63364562Sgshapiro * We need to disable interrupt preemption but MUST NOT have 63464562Sgshapiro * interrupts disabled here. 63564562Sgshapiro * XXX we may need to hold schedlock to get a coherent pm_active 63664562Sgshapiro * XXX critical sections disable interrupts again 63764562Sgshapiro */ 63864562Sgshapiro if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 63964562Sgshapiro for (addr = sva; addr < eva; addr += PAGE_SIZE) 64064562Sgshapiro invlpg(addr); 64164562Sgshapiro smp_invlpg_range(sva, eva); 64264562Sgshapiro } else { 64364562Sgshapiro cpumask = PCPU_GET(cpumask); 64464562Sgshapiro other_cpus = PCPU_GET(other_cpus); 64564562Sgshapiro if (pmap->pm_active & cpumask) 64664562Sgshapiro for (addr = sva; addr < eva; addr += PAGE_SIZE) 64764562Sgshapiro invlpg(addr); 64864562Sgshapiro if (pmap->pm_active & other_cpus) 64964562Sgshapiro smp_masked_invlpg_range(pmap->pm_active & other_cpus, 65064562Sgshapiro sva, eva); 65164562Sgshapiro } 65264562Sgshapiro if (smp_started) 65364562Sgshapiro mtx_unlock_spin(&smp_tlb_mtx); 65464562Sgshapiro else 65564562Sgshapiro critical_exit(); 65664562Sgshapiro} 65764562Sgshapiro 65864562Sgshapirovoid 65964562Sgshapiropmap_invalidate_all(pmap_t pmap) 66064562Sgshapiro{ 66164562Sgshapiro u_int cpumask; 66264562Sgshapiro u_int other_cpus; 66364562Sgshapiro 66464562Sgshapiro if (smp_started) { 66564562Sgshapiro if (!(read_eflags() & PSL_I)) 66664562Sgshapiro panic("%s: interrupts disabled", __func__); 66764562Sgshapiro mtx_lock_spin(&smp_tlb_mtx); 66864562Sgshapiro } else 66964562Sgshapiro critical_enter(); 67064562Sgshapiro /* 67164562Sgshapiro * We need to disable interrupt preemption but MUST NOT have 67264562Sgshapiro * interrupts disabled here. 67364562Sgshapiro * XXX we may need to hold schedlock to get a coherent pm_active 67464562Sgshapiro * XXX critical sections disable interrupts again 67564562Sgshapiro */ 67664562Sgshapiro if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 67764562Sgshapiro invltlb(); 67864562Sgshapiro smp_invltlb(); 67964562Sgshapiro } else { 68064562Sgshapiro cpumask = PCPU_GET(cpumask); 68164562Sgshapiro other_cpus = PCPU_GET(other_cpus); 68264562Sgshapiro if (pmap->pm_active & cpumask) 68364562Sgshapiro invltlb(); 68464562Sgshapiro if (pmap->pm_active & other_cpus) 68564562Sgshapiro smp_masked_invltlb(pmap->pm_active & other_cpus); 68664562Sgshapiro } 68764562Sgshapiro if (smp_started) 68864562Sgshapiro mtx_unlock_spin(&smp_tlb_mtx); 68964562Sgshapiro else 69064562Sgshapiro critical_exit(); 69164562Sgshapiro} 69264562Sgshapiro#else /* !SMP */ 69364562Sgshapiro/* 69464562Sgshapiro * Normal, non-SMP, 486+ invalidation functions. 69564562Sgshapiro * We inline these within pmap.c for speed. 69664562Sgshapiro */ 69764562SgshapiroPMAP_INLINE void 69864562Sgshapiropmap_invalidate_page(pmap_t pmap, vm_offset_t va) 69964562Sgshapiro{ 70064562Sgshapiro 70164562Sgshapiro if (pmap == kernel_pmap || pmap->pm_active) 70264562Sgshapiro invlpg(va); 70364562Sgshapiro} 70464562Sgshapiro 70564562SgshapiroPMAP_INLINE void 70664562Sgshapiropmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 70764562Sgshapiro{ 70890792Sgshapiro vm_offset_t addr; 70964562Sgshapiro 71090792Sgshapiro if (pmap == kernel_pmap || pmap->pm_active) 71164562Sgshapiro for (addr = sva; addr < eva; addr += PAGE_SIZE) 71290792Sgshapiro invlpg(addr); 71390792Sgshapiro} 71490792Sgshapiro 71590792SgshapiroPMAP_INLINE void 71690792Sgshapiropmap_invalidate_all(pmap_t pmap) 71790792Sgshapiro{ 71890792Sgshapiro 71990792Sgshapiro if (pmap == kernel_pmap || pmap->pm_active) 72090792Sgshapiro invltlb(); 72190792Sgshapiro} 72290792Sgshapiro#endif /* !SMP */ 72364562Sgshapiro#endif /* !I386_CPU */ 72464562Sgshapiro 72564562Sgshapiro/* 72664562Sgshapiro * Are we current address space or kernel? N.B. We return FALSE when 72764562Sgshapiro * a pmap's page table is in use because a kernel thread is borrowing 72864562Sgshapiro * it. The borrowed page table can change spontaneously, making any 72964562Sgshapiro * dependence on its continued use subject to a race condition. 73064562Sgshapiro */ 73164562Sgshapirostatic __inline int 73264562Sgshapiropmap_is_current(pmap_t pmap) 73364562Sgshapiro{ 73464562Sgshapiro 73564562Sgshapiro return (pmap == kernel_pmap || 73664562Sgshapiro (pmap == vmspace_pmap(curthread->td_proc->p_vmspace) && 73764562Sgshapiro (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME))); 73864562Sgshapiro} 73964562Sgshapiro 74064562Sgshapiro/* 74164562Sgshapiro * If the given pmap is not the current pmap, Giant must be held. 74264562Sgshapiro */ 74364562Sgshapiropt_entry_t * 74464562Sgshapiropmap_pte(pmap_t pmap, vm_offset_t va) 74564562Sgshapiro{ 74664562Sgshapiro pd_entry_t newpf; 74764562Sgshapiro pd_entry_t *pde; 74864562Sgshapiro 74964562Sgshapiro pde = pmap_pde(pmap, va); 75064562Sgshapiro if (*pde & PG_PS) 75164562Sgshapiro return (pde); 75264562Sgshapiro if (*pde != 0) { 75364562Sgshapiro /* are we current address space or kernel? */ 75464562Sgshapiro if (pmap_is_current(pmap)) 75564562Sgshapiro return (vtopte(va)); 75664562Sgshapiro GIANT_REQUIRED; 75764562Sgshapiro newpf = *pde & PG_FRAME; 75864562Sgshapiro if ((*PMAP2 & PG_FRAME) != newpf) { 75964562Sgshapiro *PMAP2 = newpf | PG_RW | PG_V | PG_A | PG_M; 76064562Sgshapiro pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR2); 76164562Sgshapiro } 76264562Sgshapiro return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); 76364562Sgshapiro } 76464562Sgshapiro return (0); 76564562Sgshapiro} 76664562Sgshapiro 76764562Sgshapirostatic __inline void 76864562Sgshapiroinvlcaddr(void *caddr) 76964562Sgshapiro{ 77064562Sgshapiro#ifdef I386_CPU 77164562Sgshapiro invltlb(); 77264562Sgshapiro#else 77364562Sgshapiro invlpg((u_int)caddr); 77464562Sgshapiro#endif 77564562Sgshapiro} 77664562Sgshapiro 77764562Sgshapiro/* 77864562Sgshapiro * Super fast pmap_pte routine best used when scanning 77964562Sgshapiro * the pv lists. This eliminates many coarse-grained 78064562Sgshapiro * invltlb calls. Note that many of the pv list 78164562Sgshapiro * scans are across different pmaps. It is very wasteful 78264562Sgshapiro * to do an entire invltlb for checking a single mapping. 78364562Sgshapiro * 78464562Sgshapiro * If the given pmap is not the current pmap, vm_page_queue_mtx 78564562Sgshapiro * must be held and curthread pinned to a CPU. 78664562Sgshapiro */ 78764562Sgshapirostatic pt_entry_t * 78864562Sgshapiropmap_pte_quick(pmap_t pmap, vm_offset_t va) 78964562Sgshapiro{ 79064562Sgshapiro pd_entry_t newpf; 79164562Sgshapiro pd_entry_t *pde; 79290792Sgshapiro 79364562Sgshapiro pde = pmap_pde(pmap, va); 79490792Sgshapiro if (*pde & PG_PS) 79564562Sgshapiro return (pde); 79690792Sgshapiro if (*pde != 0) { 79764562Sgshapiro /* are we current address space or kernel? */ 79864562Sgshapiro if (pmap_is_current(pmap)) 79964562Sgshapiro return (vtopte(va)); 80064562Sgshapiro mtx_assert(&vm_page_queue_mtx, MA_OWNED); 80164562Sgshapiro KASSERT(curthread->td_pinned > 0, ("curthread not pinned")); 80264562Sgshapiro newpf = *pde & PG_FRAME; 80364562Sgshapiro if ((*PMAP1 & PG_FRAME) != newpf) { 80464562Sgshapiro *PMAP1 = newpf | PG_RW | PG_V | PG_A | PG_M; 80564562Sgshapiro#ifdef SMP 80664562Sgshapiro PMAP1cpu = PCPU_GET(cpuid); 80764562Sgshapiro#endif 80864562Sgshapiro invlcaddr(PADDR1); 80964562Sgshapiro PMAP1changed++; 81064562Sgshapiro } else 81164562Sgshapiro#ifdef SMP 81264562Sgshapiro if (PMAP1cpu != PCPU_GET(cpuid)) { 81364562Sgshapiro PMAP1cpu = PCPU_GET(cpuid); 81464562Sgshapiro invlcaddr(PADDR1); 81564562Sgshapiro PMAP1changedcpu++; 81664562Sgshapiro } else 81764562Sgshapiro#endif 81864562Sgshapiro PMAP1unchanged++; 81964562Sgshapiro return (PADDR1 + (i386_btop(va) & (NPTEPG - 1))); 82064562Sgshapiro } 82164562Sgshapiro return (0); 82264562Sgshapiro} 82364562Sgshapiro 82464562Sgshapiro/* 82564562Sgshapiro * Routine: pmap_extract 82664562Sgshapiro * Function: 82764562Sgshapiro * Extract the physical page address associated 82864562Sgshapiro * with the given map/virtual_address pair. 82964562Sgshapiro */ 83064562Sgshapirovm_paddr_t 83164562Sgshapiropmap_extract(pmap, va) 83264562Sgshapiro register pmap_t pmap; 83364562Sgshapiro vm_offset_t va; 83464562Sgshapiro{ 83564562Sgshapiro vm_paddr_t rtval; 83664562Sgshapiro pt_entry_t *pte; 83764562Sgshapiro pd_entry_t pde; 83864562Sgshapiro 83964562Sgshapiro if (pmap == 0) 84064562Sgshapiro return 0; 84164562Sgshapiro pde = pmap->pm_pdir[va >> PDRSHIFT]; 84264562Sgshapiro if (pde != 0) { 84364562Sgshapiro if ((pde & PG_PS) != 0) { 84464562Sgshapiro rtval = (pde & ~PDRMASK) | (va & PDRMASK); 84564562Sgshapiro return rtval; 84664562Sgshapiro } 84764562Sgshapiro pte = pmap_pte(pmap, va); 84864562Sgshapiro rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); 84964562Sgshapiro return rtval; 85064562Sgshapiro } 85164562Sgshapiro return 0; 85264562Sgshapiro 85364562Sgshapiro} 85464562Sgshapiro 85564562Sgshapiro/* 85664562Sgshapiro * Routine: pmap_extract_and_hold 85764562Sgshapiro * Function: 85864562Sgshapiro * Atomically extract and hold the physical page 85964562Sgshapiro * with the given pmap and virtual address pair 86064562Sgshapiro * if that mapping permits the given protection. 86164562Sgshapiro */ 86264562Sgshapirovm_page_t 86364562Sgshapiropmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 86464562Sgshapiro{ 86564562Sgshapiro vm_paddr_t pa; 86664562Sgshapiro vm_page_t m; 86764562Sgshapiro 86864562Sgshapiro m = NULL; 86964562Sgshapiro mtx_lock(&Giant); 87064562Sgshapiro if ((pa = pmap_extract(pmap, va)) != 0) { 87164562Sgshapiro m = PHYS_TO_VM_PAGE(pa); 87264562Sgshapiro vm_page_lock_queues(); 87364562Sgshapiro vm_page_hold(m); 87464562Sgshapiro vm_page_unlock_queues(); 87590792Sgshapiro } 87664562Sgshapiro mtx_unlock(&Giant); 87764562Sgshapiro return (m); 87864562Sgshapiro} 87964562Sgshapiro 88064562Sgshapiro/*************************************************** 88164562Sgshapiro * Low level mapping routines..... 88264562Sgshapiro ***************************************************/ 88364562Sgshapiro 88464562Sgshapiro/* 88564562Sgshapiro * Add a wired page to the kva. 88664562Sgshapiro * Note: not SMP coherent. 88764562Sgshapiro */ 88864562SgshapiroPMAP_INLINE void 88964562Sgshapiropmap_kenter(vm_offset_t va, vm_paddr_t pa) 89064562Sgshapiro{ 89164562Sgshapiro pt_entry_t *pte; 89264562Sgshapiro 89364562Sgshapiro pte = vtopte(va); 89464562Sgshapiro pte_store(pte, pa | PG_RW | PG_V | pgeflag); 89564562Sgshapiro} 89664562Sgshapiro 89764562Sgshapiro/* 89864562Sgshapiro * Remove a page from the kernel pagetables. 89964562Sgshapiro * Note: not SMP coherent. 90064562Sgshapiro */ 90164562SgshapiroPMAP_INLINE void 90264562Sgshapiropmap_kremove(vm_offset_t va) 90364562Sgshapiro{ 90464562Sgshapiro pt_entry_t *pte; 90564562Sgshapiro 90664562Sgshapiro pte = vtopte(va); 90764562Sgshapiro pte_clear(pte); 90864562Sgshapiro} 90964562Sgshapiro 91064562Sgshapiro/* 91164562Sgshapiro * Used to map a range of physical addresses into kernel 91264562Sgshapiro * virtual address space. 91364562Sgshapiro * 91464562Sgshapiro * The value passed in '*virt' is a suggested virtual address for 91564562Sgshapiro * the mapping. Architectures which can support a direct-mapped 91664562Sgshapiro * physical to virtual region can return the appropriate address 91764562Sgshapiro * within that region, leaving '*virt' unchanged. Other 91864562Sgshapiro * architectures should map the pages starting at '*virt' and 91964562Sgshapiro * update '*virt' with the first usable address after the mapped 92064562Sgshapiro * region. 92164562Sgshapiro */ 92264562Sgshapirovm_offset_t 92364562Sgshapiropmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 92464562Sgshapiro{ 92564562Sgshapiro vm_offset_t va, sva; 92664562Sgshapiro 92764562Sgshapiro va = sva = *virt; 92864562Sgshapiro while (start < end) { 92964562Sgshapiro pmap_kenter(va, start); 93064562Sgshapiro va += PAGE_SIZE; 93164562Sgshapiro start += PAGE_SIZE; 93264562Sgshapiro } 93364562Sgshapiro pmap_invalidate_range(kernel_pmap, sva, va); 93464562Sgshapiro *virt = va; 93564562Sgshapiro return (sva); 93664562Sgshapiro} 93764562Sgshapiro 93864562Sgshapiro 93964562Sgshapiro/* 94064562Sgshapiro * Add a list of wired pages to the kva 94164562Sgshapiro * this routine is only used for temporary 94264562Sgshapiro * kernel mappings that do not need to have 94364562Sgshapiro * page modification or references recorded. 94464562Sgshapiro * Note that old mappings are simply written 94564562Sgshapiro * over. The page *must* be wired. 94664562Sgshapiro * Note: SMP coherent. Uses a ranged shootdown IPI. 94764562Sgshapiro */ 94864562Sgshapirovoid 94964562Sgshapiropmap_qenter(vm_offset_t sva, vm_page_t *m, int count) 95064562Sgshapiro{ 95164562Sgshapiro vm_offset_t va; 95264562Sgshapiro 95364562Sgshapiro va = sva; 95464562Sgshapiro while (count-- > 0) { 95564562Sgshapiro pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); 95664562Sgshapiro va += PAGE_SIZE; 95764562Sgshapiro m++; 95864562Sgshapiro } 95964562Sgshapiro pmap_invalidate_range(kernel_pmap, sva, va); 96064562Sgshapiro} 96164562Sgshapiro 96264562Sgshapiro/* 96364562Sgshapiro * This routine tears out page mappings from the 96464562Sgshapiro * kernel -- it is meant only for temporary mappings. 96564562Sgshapiro * Note: SMP coherent. Uses a ranged shootdown IPI. 96664562Sgshapiro */ 96764562Sgshapirovoid 96864562Sgshapiropmap_qremove(vm_offset_t sva, int count) 96964562Sgshapiro{ 97064562Sgshapiro vm_offset_t va; 97164562Sgshapiro 97264562Sgshapiro va = sva; 97364562Sgshapiro while (count-- > 0) { 97464562Sgshapiro pmap_kremove(va); 97564562Sgshapiro va += PAGE_SIZE; 97664562Sgshapiro } 97764562Sgshapiro pmap_invalidate_range(kernel_pmap, sva, va); 97864562Sgshapiro} 97990792Sgshapiro 98090792Sgshapiro/*************************************************** 98164562Sgshapiro * Page table page management routines..... 98264562Sgshapiro ***************************************************/ 98364562Sgshapiro 98464562Sgshapiro/* 98564562Sgshapiro * This routine unholds page table pages, and if the hold count 98664562Sgshapiro * drops to zero, then it decrements the wire count. 98764562Sgshapiro */ 98864562Sgshapirostatic int 98964562Sgshapiro_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 99064562Sgshapiro{ 99164562Sgshapiro 99264562Sgshapiro while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt")) 99364562Sgshapiro vm_page_lock_queues(); 99464562Sgshapiro 99564562Sgshapiro if (m->hold_count == 0) { 99664562Sgshapiro vm_offset_t pteva; 99764562Sgshapiro /* 99864562Sgshapiro * unmap the page table page 99964562Sgshapiro */ 100064562Sgshapiro pmap->pm_pdir[m->pindex] = 0; 100164562Sgshapiro --pmap->pm_stats.resident_count; 100264562Sgshapiro /* 100364562Sgshapiro * We never unwire a kernel page table page, making a 100464562Sgshapiro * check for the kernel_pmap unnecessary. 100564562Sgshapiro */ 100664562Sgshapiro if ((pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)) { 100764562Sgshapiro /* 100864562Sgshapiro * Do an invltlb to make the invalidated mapping 100964562Sgshapiro * take effect immediately. 101064562Sgshapiro */ 101164562Sgshapiro pteva = VM_MAXUSER_ADDRESS + i386_ptob(m->pindex); 101264562Sgshapiro pmap_invalidate_page(pmap, pteva); 101364562Sgshapiro } 101464562Sgshapiro 101564562Sgshapiro /* 101690792Sgshapiro * If the page is finally unwired, simply free it. 101764562Sgshapiro */ 101864562Sgshapiro --m->wire_count; 101964562Sgshapiro if (m->wire_count == 0) { 102064562Sgshapiro vm_page_busy(m); 102164562Sgshapiro vm_page_free_zero(m); 102264562Sgshapiro atomic_subtract_int(&cnt.v_wire_count, 1); 102364562Sgshapiro } 102464562Sgshapiro return 1; 102564562Sgshapiro } 102664562Sgshapiro return 0; 102764562Sgshapiro} 102864562Sgshapiro 102964562Sgshapirostatic PMAP_INLINE int 103064562Sgshapiropmap_unwire_pte_hold(pmap_t pmap, vm_page_t m) 103164562Sgshapiro{ 103264562Sgshapiro vm_page_unhold(m); 103364562Sgshapiro if (m->hold_count == 0) 103464562Sgshapiro return _pmap_unwire_pte_hold(pmap, m); 103564562Sgshapiro else 103664562Sgshapiro return 0; 103764562Sgshapiro} 103864562Sgshapiro 103964562Sgshapiro/* 104064562Sgshapiro * After removing a page table entry, this routine is used to 104164562Sgshapiro * conditionally free the page, and manage the hold/wire counts. 104264562Sgshapiro */ 104364562Sgshapirostatic int 104464562Sgshapiropmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 104564562Sgshapiro{ 104664562Sgshapiro 104764562Sgshapiro if (va >= VM_MAXUSER_ADDRESS) 104864562Sgshapiro return 0; 104964562Sgshapiro 105064562Sgshapiro return pmap_unwire_pte_hold(pmap, mpte); 105164562Sgshapiro} 105264562Sgshapiro 105364562Sgshapirovoid 105464562Sgshapiropmap_pinit0(pmap) 105564562Sgshapiro struct pmap *pmap; 105664562Sgshapiro{ 105764562Sgshapiro 105864562Sgshapiro pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD); 105964562Sgshapiro#ifdef PAE 106064562Sgshapiro pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); 106164562Sgshapiro#endif 106264562Sgshapiro pmap->pm_active = 0; 106364562Sgshapiro PCPU_SET(curpmap, pmap); 106464562Sgshapiro TAILQ_INIT(&pmap->pm_pvlist); 106564562Sgshapiro bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 106664562Sgshapiro mtx_lock_spin(&allpmaps_lock); 106764562Sgshapiro LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 106864562Sgshapiro mtx_unlock_spin(&allpmaps_lock); 106964562Sgshapiro} 107064562Sgshapiro 107164562Sgshapiro/* 107264562Sgshapiro * Initialize a preallocated and zeroed pmap structure, 107364562Sgshapiro * such as one in a vmspace structure. 107464562Sgshapiro */ 107564562Sgshapirovoid 107664562Sgshapiropmap_pinit(pmap) 107764562Sgshapiro register struct pmap *pmap; 107864562Sgshapiro{ 107964562Sgshapiro vm_page_t m, ptdpg[NPGPTD]; 108064562Sgshapiro vm_paddr_t pa; 108164562Sgshapiro static int color; 108264562Sgshapiro int i; 108364562Sgshapiro 108464562Sgshapiro /* 108564562Sgshapiro * No need to allocate page table space yet but we do need a valid 108664562Sgshapiro * page directory table. 108764562Sgshapiro */ 108864562Sgshapiro if (pmap->pm_pdir == NULL) { 108964562Sgshapiro pmap->pm_pdir = (pd_entry_t *)kmem_alloc_nofault(kernel_map, 109064562Sgshapiro NBPTD); 109164562Sgshapiro#ifdef PAE 109264562Sgshapiro pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO); 109364562Sgshapiro KASSERT(((vm_offset_t)pmap->pm_pdpt & 109464562Sgshapiro ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0, 109564562Sgshapiro ("pmap_pinit: pdpt misaligned")); 109664562Sgshapiro KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30), 109764562Sgshapiro ("pmap_pinit: pdpt above 4g")); 109864562Sgshapiro#endif 109964562Sgshapiro } 110064562Sgshapiro 110164562Sgshapiro /* 110264562Sgshapiro * allocate the page directory page(s) 110364562Sgshapiro */ 110464562Sgshapiro for (i = 0; i < NPGPTD;) { 110564562Sgshapiro m = vm_page_alloc(NULL, color++, 110664562Sgshapiro VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 110764562Sgshapiro VM_ALLOC_ZERO); 110864562Sgshapiro if (m == NULL) 110964562Sgshapiro VM_WAIT; 111064562Sgshapiro else { 111164562Sgshapiro ptdpg[i++] = m; 111264562Sgshapiro } 111364562Sgshapiro } 111464562Sgshapiro 111564562Sgshapiro pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD); 111664562Sgshapiro 111764562Sgshapiro for (i = 0; i < NPGPTD; i++) { 111864562Sgshapiro if ((ptdpg[i]->flags & PG_ZERO) == 0) 111964562Sgshapiro bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE); 112064562Sgshapiro } 112164562Sgshapiro 112264562Sgshapiro mtx_lock_spin(&allpmaps_lock); 112364562Sgshapiro LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 112464562Sgshapiro mtx_unlock_spin(&allpmaps_lock); 112564562Sgshapiro /* Wire in kernel global address entries. */ 112664562Sgshapiro /* XXX copies current process, does not fill in MPPTDI */ 112764562Sgshapiro bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t)); 112864562Sgshapiro#ifdef SMP 112964562Sgshapiro pmap->pm_pdir[MPPTDI] = PTD[MPPTDI]; 113064562Sgshapiro#endif 113164562Sgshapiro 113264562Sgshapiro /* install self-referential address mapping entry(s) */ 113364562Sgshapiro for (i = 0; i < NPGPTD; i++) { 113464562Sgshapiro pa = VM_PAGE_TO_PHYS(ptdpg[i]); 113564562Sgshapiro pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M; 113664562Sgshapiro#ifdef PAE 113764562Sgshapiro pmap->pm_pdpt[i] = pa | PG_V; 113864562Sgshapiro#endif 113964562Sgshapiro } 114064562Sgshapiro 114164562Sgshapiro pmap->pm_active = 0; 114264562Sgshapiro TAILQ_INIT(&pmap->pm_pvlist); 114364562Sgshapiro bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 114464562Sgshapiro} 114564562Sgshapiro 114664562Sgshapiro/* 114764562Sgshapiro * this routine is called if the page table page is not 114864562Sgshapiro * mapped correctly. 114964562Sgshapiro */ 115064562Sgshapirostatic vm_page_t 115164562Sgshapiro_pmap_allocpte(pmap, ptepindex) 115264562Sgshapiro pmap_t pmap; 115364562Sgshapiro unsigned ptepindex; 115464562Sgshapiro{ 115564562Sgshapiro vm_paddr_t ptepa; 115664562Sgshapiro vm_page_t m; 115764562Sgshapiro 115864562Sgshapiro /* 115964562Sgshapiro * Allocate a page table page. 116064562Sgshapiro */ 116164562Sgshapiro if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 116264562Sgshapiro VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 116364562Sgshapiro VM_WAIT; 116464562Sgshapiro /* 116564562Sgshapiro * Indicate the need to retry. While waiting, the page table 116664562Sgshapiro * page may have been allocated. 116764562Sgshapiro */ 116864562Sgshapiro return (NULL); 116964562Sgshapiro } 117064562Sgshapiro if ((m->flags & PG_ZERO) == 0) 117164562Sgshapiro pmap_zero_page(m); 117264562Sgshapiro 117364562Sgshapiro KASSERT(m->queue == PQ_NONE, 117464562Sgshapiro ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 117564562Sgshapiro 117664562Sgshapiro /* 117764562Sgshapiro * Increment the hold count for the page table page 117864562Sgshapiro * (denoting a new mapping.) 117964562Sgshapiro */ 118064562Sgshapiro m->hold_count++; 118164562Sgshapiro 118264562Sgshapiro /* 118364562Sgshapiro * Map the pagetable page into the process address space, if 118464562Sgshapiro * it isn't already there. 118564562Sgshapiro */ 118664562Sgshapiro 118764562Sgshapiro pmap->pm_stats.resident_count++; 118864562Sgshapiro 118964562Sgshapiro ptepa = VM_PAGE_TO_PHYS(m); 119064562Sgshapiro pmap->pm_pdir[ptepindex] = 119164562Sgshapiro (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M); 119264562Sgshapiro 119364562Sgshapiro vm_page_lock_queues(); 119464562Sgshapiro vm_page_wakeup(m); 119564562Sgshapiro vm_page_unlock_queues(); 119664562Sgshapiro 119764562Sgshapiro return m; 119864562Sgshapiro} 119964562Sgshapiro 120064562Sgshapirostatic vm_page_t 120164562Sgshapiropmap_allocpte(pmap_t pmap, vm_offset_t va) 120264562Sgshapiro{ 120364562Sgshapiro unsigned ptepindex; 120464562Sgshapiro pd_entry_t ptepa; 120564562Sgshapiro vm_page_t m; 120664562Sgshapiro 120764562Sgshapiro /* 120864562Sgshapiro * Calculate pagetable page index 120964562Sgshapiro */ 121064562Sgshapiro ptepindex = va >> PDRSHIFT; 121164562Sgshapiroretry: 121264562Sgshapiro /* 121364562Sgshapiro * Get the page directory entry 121464562Sgshapiro */ 121564562Sgshapiro ptepa = pmap->pm_pdir[ptepindex]; 121664562Sgshapiro 121764562Sgshapiro /* 121864562Sgshapiro * This supports switching from a 4MB page to a 121964562Sgshapiro * normal 4K page. 122064562Sgshapiro */ 122164562Sgshapiro if (ptepa & PG_PS) { 122264562Sgshapiro pmap->pm_pdir[ptepindex] = 0; 122364562Sgshapiro ptepa = 0; 122464562Sgshapiro pmap_invalidate_all(kernel_pmap); 122564562Sgshapiro } 122664562Sgshapiro 122764562Sgshapiro /* 122864562Sgshapiro * If the page table page is mapped, we just increment the 122964562Sgshapiro * hold count, and activate it. 123064562Sgshapiro */ 123164562Sgshapiro if (ptepa) { 123264562Sgshapiro m = PHYS_TO_VM_PAGE(ptepa); 123364562Sgshapiro m->hold_count++; 123464562Sgshapiro } else { 123564562Sgshapiro /* 123664562Sgshapiro * Here if the pte page isn't mapped, or if it has 123764562Sgshapiro * been deallocated. 123864562Sgshapiro */ 123964562Sgshapiro m = _pmap_allocpte(pmap, ptepindex); 124064562Sgshapiro if (m == NULL) 124164562Sgshapiro goto retry; 124264562Sgshapiro } 124364562Sgshapiro return (m); 124464562Sgshapiro} 124564562Sgshapiro 124664562Sgshapiro 124764562Sgshapiro/*************************************************** 124864562Sgshapiro* Pmap allocation/deallocation routines. 124964562Sgshapiro ***************************************************/ 125064562Sgshapiro 125164562Sgshapiro#ifdef SMP 125264562Sgshapiro/* 125364562Sgshapiro * Deal with a SMP shootdown of other users of the pmap that we are 125464562Sgshapiro * trying to dispose of. This can be a bit hairy. 125564562Sgshapiro */ 125664562Sgshapirostatic u_int *lazymask; 125764562Sgshapirostatic u_int lazyptd; 125864562Sgshapirostatic volatile u_int lazywait; 125964562Sgshapiro 126064562Sgshapirovoid pmap_lazyfix_action(void); 126164562Sgshapiro 126264562Sgshapirovoid 126364562Sgshapiropmap_lazyfix_action(void) 126464562Sgshapiro{ 126564562Sgshapiro u_int mymask = PCPU_GET(cpumask); 126664562Sgshapiro 126764562Sgshapiro if (rcr3() == lazyptd) 126864562Sgshapiro load_cr3(PCPU_GET(curpcb)->pcb_cr3); 126964562Sgshapiro atomic_clear_int(lazymask, mymask); 127064562Sgshapiro atomic_store_rel_int(&lazywait, 1); 127164562Sgshapiro} 127264562Sgshapiro 127364562Sgshapirostatic void 127464562Sgshapiropmap_lazyfix_self(u_int mymask) 127564562Sgshapiro{ 127664562Sgshapiro 127764562Sgshapiro if (rcr3() == lazyptd) 127864562Sgshapiro load_cr3(PCPU_GET(curpcb)->pcb_cr3); 127964562Sgshapiro atomic_clear_int(lazymask, mymask); 128064562Sgshapiro} 128164562Sgshapiro 128264562Sgshapiro 1283static void 1284pmap_lazyfix(pmap_t pmap) 1285{ 1286 u_int mymask = PCPU_GET(cpumask); 1287 u_int mask; 1288 register u_int spins; 1289 1290 while ((mask = pmap->pm_active) != 0) { 1291 spins = 50000000; 1292 mask = mask & -mask; /* Find least significant set bit */ 1293 mtx_lock_spin(&lazypmap_lock); 1294#ifdef PAE 1295 lazyptd = vtophys(pmap->pm_pdpt); 1296#else 1297 lazyptd = vtophys(pmap->pm_pdir); 1298#endif 1299 if (mask == mymask) { 1300 lazymask = &pmap->pm_active; 1301 pmap_lazyfix_self(mymask); 1302 } else { 1303 atomic_store_rel_int((u_int *)&lazymask, 1304 (u_int)&pmap->pm_active); 1305 atomic_store_rel_int(&lazywait, 0); 1306 ipi_selected(mask, IPI_LAZYPMAP); 1307 while (lazywait == 0) { 1308 ia32_pause(); 1309 if (--spins == 0) 1310 break; 1311 } 1312 } 1313 mtx_unlock_spin(&lazypmap_lock); 1314 if (spins == 0) 1315 printf("pmap_lazyfix: spun for 50000000\n"); 1316 } 1317} 1318 1319#else /* SMP */ 1320 1321/* 1322 * Cleaning up on uniprocessor is easy. For various reasons, we're 1323 * unlikely to have to even execute this code, including the fact 1324 * that the cleanup is deferred until the parent does a wait(2), which 1325 * means that another userland process has run. 1326 */ 1327static void 1328pmap_lazyfix(pmap_t pmap) 1329{ 1330 u_int cr3; 1331 1332 cr3 = vtophys(pmap->pm_pdir); 1333 if (cr3 == rcr3()) { 1334 load_cr3(PCPU_GET(curpcb)->pcb_cr3); 1335 pmap->pm_active &= ~(PCPU_GET(cpumask)); 1336 } 1337} 1338#endif /* SMP */ 1339 1340/* 1341 * Release any resources held by the given physical map. 1342 * Called when a pmap initialized by pmap_pinit is being released. 1343 * Should only be called if the map contains no valid mappings. 1344 */ 1345void 1346pmap_release(pmap_t pmap) 1347{ 1348 vm_page_t m, ptdpg[NPGPTD]; 1349 int i; 1350 1351 KASSERT(pmap->pm_stats.resident_count == 0, 1352 ("pmap_release: pmap resident count %ld != 0", 1353 pmap->pm_stats.resident_count)); 1354 1355 pmap_lazyfix(pmap); 1356 mtx_lock_spin(&allpmaps_lock); 1357 LIST_REMOVE(pmap, pm_list); 1358 mtx_unlock_spin(&allpmaps_lock); 1359 1360 for (i = 0; i < NPGPTD; i++) 1361 ptdpg[i] = PHYS_TO_VM_PAGE(pmap->pm_pdir[PTDPTDI + i]); 1362 1363 bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) * 1364 sizeof(*pmap->pm_pdir)); 1365#ifdef SMP 1366 pmap->pm_pdir[MPPTDI] = 0; 1367#endif 1368 1369 pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD); 1370 1371 vm_page_lock_queues(); 1372 for (i = 0; i < NPGPTD; i++) { 1373 m = ptdpg[i]; 1374#ifdef PAE 1375 KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME), 1376 ("pmap_release: got wrong ptd page")); 1377#endif 1378 m->wire_count--; 1379 atomic_subtract_int(&cnt.v_wire_count, 1); 1380 vm_page_free_zero(m); 1381 } 1382 vm_page_unlock_queues(); 1383} 1384 1385static int 1386kvm_size(SYSCTL_HANDLER_ARGS) 1387{ 1388 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1389 1390 return sysctl_handle_long(oidp, &ksize, 0, req); 1391} 1392SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1393 0, 0, kvm_size, "IU", "Size of KVM"); 1394 1395static int 1396kvm_free(SYSCTL_HANDLER_ARGS) 1397{ 1398 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1399 1400 return sysctl_handle_long(oidp, &kfree, 0, req); 1401} 1402SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1403 0, 0, kvm_free, "IU", "Amount of KVM free"); 1404 1405/* 1406 * grow the number of kernel page table entries, if needed 1407 */ 1408void 1409pmap_growkernel(vm_offset_t addr) 1410{ 1411 struct pmap *pmap; 1412 int s; 1413 vm_paddr_t ptppaddr; 1414 vm_page_t nkpg; 1415 pd_entry_t newpdir; 1416 pt_entry_t *pde; 1417 1418 s = splhigh(); 1419 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1420 if (kernel_vm_end == 0) { 1421 kernel_vm_end = KERNBASE; 1422 nkpt = 0; 1423 while (pdir_pde(PTD, kernel_vm_end)) { 1424 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1425 nkpt++; 1426 } 1427 } 1428 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1429 while (kernel_vm_end < addr) { 1430 if (pdir_pde(PTD, kernel_vm_end)) { 1431 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1432 continue; 1433 } 1434 1435 /* 1436 * This index is bogus, but out of the way 1437 */ 1438 nkpg = vm_page_alloc(NULL, nkpt, 1439 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1440 if (!nkpg) 1441 panic("pmap_growkernel: no memory to grow kernel"); 1442 1443 nkpt++; 1444 1445 pmap_zero_page(nkpg); 1446 ptppaddr = VM_PAGE_TO_PHYS(nkpg); 1447 newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M); 1448 pdir_pde(PTD, kernel_vm_end) = newpdir; 1449 1450 mtx_lock_spin(&allpmaps_lock); 1451 LIST_FOREACH(pmap, &allpmaps, pm_list) { 1452 pde = pmap_pde(pmap, kernel_vm_end); 1453 pde_store(pde, newpdir); 1454 } 1455 mtx_unlock_spin(&allpmaps_lock); 1456 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1457 } 1458 splx(s); 1459} 1460 1461 1462/*************************************************** 1463 * page management routines. 1464 ***************************************************/ 1465 1466/* 1467 * free the pv_entry back to the free list 1468 */ 1469static PMAP_INLINE void 1470free_pv_entry(pv_entry_t pv) 1471{ 1472 pv_entry_count--; 1473 uma_zfree(pvzone, pv); 1474} 1475 1476/* 1477 * get a new pv_entry, allocating a block from the system 1478 * when needed. 1479 * the memory allocation is performed bypassing the malloc code 1480 * because of the possibility of allocations at interrupt time. 1481 */ 1482static pv_entry_t 1483get_pv_entry(void) 1484{ 1485 pv_entry_count++; 1486 if (pv_entry_high_water && 1487 (pv_entry_count > pv_entry_high_water) && 1488 (pmap_pagedaemon_waken == 0)) { 1489 pmap_pagedaemon_waken = 1; 1490 wakeup (&vm_pages_needed); 1491 } 1492 return uma_zalloc(pvzone, M_NOWAIT); 1493} 1494 1495 1496static int 1497pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1498{ 1499 pv_entry_t pv; 1500 int rtval; 1501 int s; 1502 1503 s = splvm(); 1504 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1505 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1506 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1507 if (pmap == pv->pv_pmap && va == pv->pv_va) 1508 break; 1509 } 1510 } else { 1511 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1512 if (va == pv->pv_va) 1513 break; 1514 } 1515 } 1516 1517 rtval = 0; 1518 if (pv) { 1519 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1520 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1521 m->md.pv_list_count--; 1522 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1523 vm_page_flag_clear(m, PG_WRITEABLE); 1524 1525 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1526 free_pv_entry(pv); 1527 } 1528 1529 splx(s); 1530 return rtval; 1531} 1532 1533/* 1534 * Create a pv entry for page at pa for 1535 * (pmap, va). 1536 */ 1537static void 1538pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1539{ 1540 1541 int s; 1542 pv_entry_t pv; 1543 1544 s = splvm(); 1545 pv = get_pv_entry(); 1546 pv->pv_va = va; 1547 pv->pv_pmap = pmap; 1548 pv->pv_ptem = mpte; 1549 1550 vm_page_lock_queues(); 1551 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1552 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1553 m->md.pv_list_count++; 1554 1555 vm_page_unlock_queues(); 1556 splx(s); 1557} 1558 1559/* 1560 * pmap_remove_pte: do the things to unmap a page in a process 1561 */ 1562static int 1563pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) 1564{ 1565 pt_entry_t oldpte; 1566 vm_page_t m, mpte; 1567 1568 oldpte = pte_load_clear(ptq); 1569 if (oldpte & PG_W) 1570 pmap->pm_stats.wired_count -= 1; 1571 /* 1572 * Machines that don't support invlpg, also don't support 1573 * PG_G. 1574 */ 1575 if (oldpte & PG_G) 1576 pmap_invalidate_page(kernel_pmap, va); 1577 pmap->pm_stats.resident_count -= 1; 1578 if (oldpte & PG_MANAGED) { 1579 m = PHYS_TO_VM_PAGE(oldpte); 1580 if (oldpte & PG_M) { 1581#if defined(PMAP_DIAGNOSTIC) 1582 if (pmap_nw_modified((pt_entry_t) oldpte)) { 1583 printf( 1584 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1585 va, oldpte); 1586 } 1587#endif 1588 if (pmap_track_modified(va)) 1589 vm_page_dirty(m); 1590 } 1591 if (oldpte & PG_A) 1592 vm_page_flag_set(m, PG_REFERENCED); 1593 return pmap_remove_entry(pmap, m, va); 1594 } else { 1595 mpte = PHYS_TO_VM_PAGE(*pmap_pde(pmap, va)); 1596 return pmap_unuse_pt(pmap, va, mpte); 1597 } 1598} 1599 1600/* 1601 * Remove a single page from a process address space 1602 */ 1603static void 1604pmap_remove_page(pmap_t pmap, vm_offset_t va) 1605{ 1606 pt_entry_t *pte; 1607 1608 if ((pte = pmap_pte(pmap, va)) == NULL || *pte == 0) 1609 return; 1610 pmap_remove_pte(pmap, pte, va); 1611 pmap_invalidate_page(pmap, va); 1612} 1613 1614/* 1615 * Remove the given range of addresses from the specified map. 1616 * 1617 * It is assumed that the start and end are properly 1618 * rounded to the page size. 1619 */ 1620void 1621pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1622{ 1623 vm_offset_t pdnxt; 1624 pd_entry_t ptpaddr; 1625 pt_entry_t *pte; 1626 int anyvalid; 1627 1628 if (pmap == NULL) 1629 return; 1630 1631 if (pmap->pm_stats.resident_count == 0) 1632 return; 1633 1634 /* 1635 * special handling of removing one page. a very 1636 * common operation and easy to short circuit some 1637 * code. 1638 */ 1639 if ((sva + PAGE_SIZE == eva) && 1640 ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) { 1641 pmap_remove_page(pmap, sva); 1642 return; 1643 } 1644 1645 anyvalid = 0; 1646 1647 for (; sva < eva; sva = pdnxt) { 1648 unsigned pdirindex; 1649 1650 /* 1651 * Calculate index for next page table. 1652 */ 1653 pdnxt = (sva + NBPDR) & ~PDRMASK; 1654 if (pmap->pm_stats.resident_count == 0) 1655 break; 1656 1657 pdirindex = sva >> PDRSHIFT; 1658 ptpaddr = pmap->pm_pdir[pdirindex]; 1659 1660 /* 1661 * Weed out invalid mappings. Note: we assume that the page 1662 * directory table is always allocated, and in kernel virtual. 1663 */ 1664 if (ptpaddr == 0) 1665 continue; 1666 1667 /* 1668 * Check for large page. 1669 */ 1670 if ((ptpaddr & PG_PS) != 0) { 1671 pmap->pm_pdir[pdirindex] = 0; 1672 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1673 anyvalid = 1; 1674 continue; 1675 } 1676 1677 /* 1678 * Limit our scan to either the end of the va represented 1679 * by the current page table page, or to the end of the 1680 * range being removed. 1681 */ 1682 if (pdnxt > eva) 1683 pdnxt = eva; 1684 1685 for (; sva != pdnxt; sva += PAGE_SIZE) { 1686 if ((pte = pmap_pte(pmap, sva)) == NULL || 1687 *pte == 0) 1688 continue; 1689 anyvalid = 1; 1690 if (pmap_remove_pte(pmap, pte, sva)) 1691 break; 1692 } 1693 } 1694 1695 if (anyvalid) 1696 pmap_invalidate_all(pmap); 1697} 1698 1699/* 1700 * Routine: pmap_remove_all 1701 * Function: 1702 * Removes this physical page from 1703 * all physical maps in which it resides. 1704 * Reflects back modify bits to the pager. 1705 * 1706 * Notes: 1707 * Original versions of this routine were very 1708 * inefficient because they iteratively called 1709 * pmap_remove (slow...) 1710 */ 1711 1712void 1713pmap_remove_all(vm_page_t m) 1714{ 1715 register pv_entry_t pv; 1716 pt_entry_t *pte, tpte; 1717 int s; 1718 1719#if defined(PMAP_DIAGNOSTIC) 1720 /* 1721 * XXX This makes pmap_remove_all() illegal for non-managed pages! 1722 */ 1723 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 1724 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", 1725 VM_PAGE_TO_PHYS(m)); 1726 } 1727#endif 1728 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1729 s = splvm(); 1730 sched_pin(); 1731 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1732 pv->pv_pmap->pm_stats.resident_count--; 1733 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 1734 tpte = pte_load_clear(pte); 1735 if (tpte & PG_W) 1736 pv->pv_pmap->pm_stats.wired_count--; 1737 if (tpte & PG_A) 1738 vm_page_flag_set(m, PG_REFERENCED); 1739 1740 /* 1741 * Update the vm_page_t clean and reference bits. 1742 */ 1743 if (tpte & PG_M) { 1744#if defined(PMAP_DIAGNOSTIC) 1745 if (pmap_nw_modified((pt_entry_t) tpte)) { 1746 printf( 1747 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 1748 pv->pv_va, tpte); 1749 } 1750#endif 1751 if (pmap_track_modified(pv->pv_va)) 1752 vm_page_dirty(m); 1753 } 1754 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1755 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1756 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1757 m->md.pv_list_count--; 1758 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 1759 free_pv_entry(pv); 1760 } 1761 vm_page_flag_clear(m, PG_WRITEABLE); 1762 sched_unpin(); 1763 splx(s); 1764} 1765 1766/* 1767 * Set the physical protection on the 1768 * specified range of this map as requested. 1769 */ 1770void 1771pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1772{ 1773 vm_offset_t pdnxt; 1774 pd_entry_t ptpaddr; 1775 int anychanged; 1776 1777 if (pmap == NULL) 1778 return; 1779 1780 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1781 pmap_remove(pmap, sva, eva); 1782 return; 1783 } 1784 1785 if (prot & VM_PROT_WRITE) 1786 return; 1787 1788 anychanged = 0; 1789 1790 for (; sva < eva; sva = pdnxt) { 1791 unsigned pdirindex; 1792 1793 pdnxt = (sva + NBPDR) & ~PDRMASK; 1794 1795 pdirindex = sva >> PDRSHIFT; 1796 ptpaddr = pmap->pm_pdir[pdirindex]; 1797 1798 /* 1799 * Weed out invalid mappings. Note: we assume that the page 1800 * directory table is always allocated, and in kernel virtual. 1801 */ 1802 if (ptpaddr == 0) 1803 continue; 1804 1805 /* 1806 * Check for large page. 1807 */ 1808 if ((ptpaddr & PG_PS) != 0) { 1809 pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW); 1810 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1811 anychanged = 1; 1812 continue; 1813 } 1814 1815 if (pdnxt > eva) 1816 pdnxt = eva; 1817 1818 for (; sva != pdnxt; sva += PAGE_SIZE) { 1819 pt_entry_t pbits; 1820 pt_entry_t *pte; 1821 vm_page_t m; 1822 1823 if ((pte = pmap_pte(pmap, sva)) == NULL) 1824 continue; 1825 pbits = *pte; 1826 if (pbits & PG_MANAGED) { 1827 m = NULL; 1828 if (pbits & PG_A) { 1829 m = PHYS_TO_VM_PAGE(pbits); 1830 vm_page_flag_set(m, PG_REFERENCED); 1831 pbits &= ~PG_A; 1832 } 1833 if ((pbits & PG_M) != 0 && 1834 pmap_track_modified(sva)) { 1835 if (m == NULL) 1836 m = PHYS_TO_VM_PAGE(pbits); 1837 vm_page_dirty(m); 1838 pbits &= ~PG_M; 1839 } 1840 } 1841 1842 pbits &= ~PG_RW; 1843 1844 if (pbits != *pte) { 1845 pte_store(pte, pbits); 1846 anychanged = 1; 1847 } 1848 } 1849 } 1850 if (anychanged) 1851 pmap_invalidate_all(pmap); 1852} 1853 1854/* 1855 * Insert the given physical page (p) at 1856 * the specified virtual address (v) in the 1857 * target physical map with the protection requested. 1858 * 1859 * If specified, the page will be wired down, meaning 1860 * that the related pte can not be reclaimed. 1861 * 1862 * NB: This is the only routine which MAY NOT lazy-evaluate 1863 * or lose information. That is, this routine must actually 1864 * insert this page into the given map NOW. 1865 */ 1866void 1867pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1868 boolean_t wired) 1869{ 1870 vm_paddr_t pa; 1871 register pt_entry_t *pte; 1872 vm_paddr_t opa; 1873 pt_entry_t origpte, newpte; 1874 vm_page_t mpte; 1875 1876 if (pmap == NULL) 1877 return; 1878 1879 va &= PG_FRAME; 1880#ifdef PMAP_DIAGNOSTIC 1881 if (va > VM_MAX_KERNEL_ADDRESS) 1882 panic("pmap_enter: toobig"); 1883 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 1884 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); 1885#endif 1886 1887 mpte = NULL; 1888 /* 1889 * In the case that a page table page is not 1890 * resident, we are creating it here. 1891 */ 1892 if (va < VM_MAXUSER_ADDRESS) { 1893 mpte = pmap_allocpte(pmap, va); 1894 } 1895#if 0 && defined(PMAP_DIAGNOSTIC) 1896 else { 1897 pd_entry_t *pdeaddr = pmap_pde(pmap, va); 1898 origpte = *pdeaddr; 1899 if ((origpte & PG_V) == 0) { 1900 panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n", 1901 pmap->pm_pdir[PTDPTDI], origpte, va); 1902 } 1903 } 1904#endif 1905 1906 pte = pmap_pte(pmap, va); 1907 1908 /* 1909 * Page Directory table entry not valid, we need a new PT page 1910 */ 1911 if (pte == NULL) { 1912 panic("pmap_enter: invalid page directory pdir=%#jx, va=%#x\n", 1913 (uintmax_t)pmap->pm_pdir[PTDPTDI], va); 1914 } 1915 1916 pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; 1917 origpte = *pte; 1918 opa = origpte & PG_FRAME; 1919 1920 if (origpte & PG_PS) { 1921 /* 1922 * Yes, I know this will truncate upper address bits for PAE, 1923 * but I'm actually more interested in the lower bits 1924 */ 1925 printf("pmap_enter: va %p, pte %p, origpte %p\n", 1926 (void *)va, (void *)pte, (void *)(uintptr_t)origpte); 1927 panic("pmap_enter: attempted pmap_enter on 4MB page"); 1928 } 1929 1930 /* 1931 * Mapping has not changed, must be protection or wiring change. 1932 */ 1933 if (origpte && (opa == pa)) { 1934 /* 1935 * Wiring change, just update stats. We don't worry about 1936 * wiring PT pages as they remain resident as long as there 1937 * are valid mappings in them. Hence, if a user page is wired, 1938 * the PT page will be also. 1939 */ 1940 if (wired && ((origpte & PG_W) == 0)) 1941 pmap->pm_stats.wired_count++; 1942 else if (!wired && (origpte & PG_W)) 1943 pmap->pm_stats.wired_count--; 1944 1945#if defined(PMAP_DIAGNOSTIC) 1946 if (pmap_nw_modified((pt_entry_t) origpte)) { 1947 printf( 1948 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 1949 va, origpte); 1950 } 1951#endif 1952 1953 /* 1954 * Remove extra pte reference 1955 */ 1956 if (mpte) 1957 mpte->hold_count--; 1958 1959 /* 1960 * We might be turning off write access to the page, 1961 * so we go ahead and sense modify status. 1962 */ 1963 if (origpte & PG_MANAGED) { 1964 if ((origpte & PG_M) && pmap_track_modified(va)) { 1965 vm_page_t om; 1966 om = PHYS_TO_VM_PAGE(opa); 1967 vm_page_dirty(om); 1968 } 1969 pa |= PG_MANAGED; 1970 } 1971 goto validate; 1972 } 1973 /* 1974 * Mapping has changed, invalidate old range and fall through to 1975 * handle validating new mapping. 1976 */ 1977 if (opa) { 1978 int err; 1979 vm_page_lock_queues(); 1980 err = pmap_remove_pte(pmap, pte, va); 1981 vm_page_unlock_queues(); 1982 if (err) 1983 panic("pmap_enter: pte vanished, va: 0x%x", va); 1984 } 1985 1986 /* 1987 * Enter on the PV list if part of our managed memory. Note that we 1988 * raise IPL while manipulating pv_table since pmap_enter can be 1989 * called at interrupt time. 1990 */ 1991 if (pmap_initialized && 1992 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 1993 pmap_insert_entry(pmap, va, mpte, m); 1994 pa |= PG_MANAGED; 1995 } 1996 1997 /* 1998 * Increment counters 1999 */ 2000 pmap->pm_stats.resident_count++; 2001 if (wired) 2002 pmap->pm_stats.wired_count++; 2003 2004validate: 2005 /* 2006 * Now validate mapping with desired protection/wiring. 2007 */ 2008 newpte = (pt_entry_t)(pa | PG_V); 2009 if ((prot & VM_PROT_WRITE) != 0) 2010 newpte |= PG_RW; 2011 if (wired) 2012 newpte |= PG_W; 2013 if (va < VM_MAXUSER_ADDRESS) 2014 newpte |= PG_U; 2015 if (pmap == kernel_pmap) 2016 newpte |= pgeflag; 2017 2018 /* 2019 * if the mapping or permission bits are different, we need 2020 * to update the pte. 2021 */ 2022 if ((origpte & ~(PG_M|PG_A)) != newpte) { 2023 pte_store(pte, newpte | PG_A); 2024 /*if (origpte)*/ { 2025 pmap_invalidate_page(pmap, va); 2026 } 2027 } 2028} 2029 2030/* 2031 * this code makes some *MAJOR* assumptions: 2032 * 1. Current pmap & pmap exists. 2033 * 2. Not wired. 2034 * 3. Read access. 2035 * 4. No page table pages. 2036 * 5. Tlbflush is deferred to calling procedure. 2037 * 6. Page IS managed. 2038 * but is *MUCH* faster than pmap_enter... 2039 */ 2040 2041vm_page_t 2042pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) 2043{ 2044 pt_entry_t *pte; 2045 vm_paddr_t pa; 2046 2047 /* 2048 * In the case that a page table page is not 2049 * resident, we are creating it here. 2050 */ 2051 if (va < VM_MAXUSER_ADDRESS) { 2052 unsigned ptepindex; 2053 pd_entry_t ptepa; 2054 2055 /* 2056 * Calculate pagetable page index 2057 */ 2058 ptepindex = va >> PDRSHIFT; 2059 if (mpte && (mpte->pindex == ptepindex)) { 2060 mpte->hold_count++; 2061 } else { 2062retry: 2063 /* 2064 * Get the page directory entry 2065 */ 2066 ptepa = pmap->pm_pdir[ptepindex]; 2067 2068 /* 2069 * If the page table page is mapped, we just increment 2070 * the hold count, and activate it. 2071 */ 2072 if (ptepa) { 2073 if (ptepa & PG_PS) 2074 panic("pmap_enter_quick: unexpected mapping into 4MB page"); 2075 mpte = PHYS_TO_VM_PAGE(ptepa); 2076 mpte->hold_count++; 2077 } else { 2078 mpte = _pmap_allocpte(pmap, ptepindex); 2079 if (mpte == NULL) 2080 goto retry; 2081 } 2082 } 2083 } else { 2084 mpte = NULL; 2085 } 2086 2087 /* 2088 * This call to vtopte makes the assumption that we are 2089 * entering the page into the current pmap. In order to support 2090 * quick entry into any pmap, one would likely use pmap_pte_quick. 2091 * But that isn't as quick as vtopte. 2092 */ 2093 pte = vtopte(va); 2094 if (*pte) { 2095 if (mpte != NULL) { 2096 vm_page_lock_queues(); 2097 pmap_unwire_pte_hold(pmap, mpte); 2098 vm_page_unlock_queues(); 2099 } 2100 return 0; 2101 } 2102 2103 /* 2104 * Enter on the PV list if part of our managed memory. Note that we 2105 * raise IPL while manipulating pv_table since pmap_enter can be 2106 * called at interrupt time. 2107 */ 2108 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) 2109 pmap_insert_entry(pmap, va, mpte, m); 2110 2111 /* 2112 * Increment counters 2113 */ 2114 pmap->pm_stats.resident_count++; 2115 2116 pa = VM_PAGE_TO_PHYS(m); 2117 2118 /* 2119 * Now validate mapping with RO protection 2120 */ 2121 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2122 pte_store(pte, pa | PG_V | PG_U); 2123 else 2124 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 2125 2126 return mpte; 2127} 2128 2129/* 2130 * Make a temporary mapping for a physical address. This is only intended 2131 * to be used for panic dumps. 2132 */ 2133void * 2134pmap_kenter_temporary(vm_paddr_t pa, int i) 2135{ 2136 vm_offset_t va; 2137 2138 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 2139 pmap_kenter(va, pa); 2140#ifndef I386_CPU 2141 invlpg(va); 2142#else 2143 invltlb(); 2144#endif 2145 return ((void *)crashdumpmap); 2146} 2147 2148/* 2149 * This code maps large physical mmap regions into the 2150 * processor address space. Note that some shortcuts 2151 * are taken, but the code works. 2152 */ 2153void 2154pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2155 vm_object_t object, vm_pindex_t pindex, 2156 vm_size_t size) 2157{ 2158 vm_page_t p; 2159 2160 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2161 KASSERT(object->type == OBJT_DEVICE, 2162 ("pmap_object_init_pt: non-device object")); 2163 if (pseflag && 2164 ((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { 2165 int i; 2166 vm_page_t m[1]; 2167 unsigned int ptepindex; 2168 int npdes; 2169 pd_entry_t ptepa; 2170 2171 if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)]) 2172 return; 2173retry: 2174 p = vm_page_lookup(object, pindex); 2175 if (p != NULL) { 2176 vm_page_lock_queues(); 2177 if (vm_page_sleep_if_busy(p, FALSE, "init4p")) 2178 goto retry; 2179 } else { 2180 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2181 if (p == NULL) 2182 return; 2183 m[0] = p; 2184 2185 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2186 vm_page_lock_queues(); 2187 vm_page_free(p); 2188 vm_page_unlock_queues(); 2189 return; 2190 } 2191 2192 p = vm_page_lookup(object, pindex); 2193 vm_page_lock_queues(); 2194 vm_page_wakeup(p); 2195 } 2196 vm_page_unlock_queues(); 2197 2198 ptepa = VM_PAGE_TO_PHYS(p); 2199 if (ptepa & (NBPDR - 1)) 2200 return; 2201 2202 p->valid = VM_PAGE_BITS_ALL; 2203 2204 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2205 npdes = size >> PDRSHIFT; 2206 for(i = 0; i < npdes; i++) { 2207 pde_store(&pmap->pm_pdir[ptepindex], 2208 ptepa | PG_U | PG_RW | PG_V | PG_PS); 2209 ptepa += NBPDR; 2210 ptepindex += 1; 2211 } 2212 pmap_invalidate_all(pmap); 2213 } 2214} 2215 2216/* 2217 * Routine: pmap_change_wiring 2218 * Function: Change the wiring attribute for a map/virtual-address 2219 * pair. 2220 * In/out conditions: 2221 * The mapping must already exist in the pmap. 2222 */ 2223void 2224pmap_change_wiring(pmap, va, wired) 2225 register pmap_t pmap; 2226 vm_offset_t va; 2227 boolean_t wired; 2228{ 2229 register pt_entry_t *pte; 2230 2231 if (pmap == NULL) 2232 return; 2233 2234 pte = pmap_pte(pmap, va); 2235 2236 if (wired && !pmap_pte_w(pte)) 2237 pmap->pm_stats.wired_count++; 2238 else if (!wired && pmap_pte_w(pte)) 2239 pmap->pm_stats.wired_count--; 2240 2241 /* 2242 * Wiring is not a hardware characteristic so there is no need to 2243 * invalidate TLB. 2244 */ 2245 pmap_pte_set_w(pte, wired); 2246} 2247 2248 2249 2250/* 2251 * Copy the range specified by src_addr/len 2252 * from the source map to the range dst_addr/len 2253 * in the destination map. 2254 * 2255 * This routine is only advisory and need not do anything. 2256 */ 2257 2258void 2259pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2260 vm_offset_t src_addr) 2261{ 2262 vm_offset_t addr; 2263 vm_offset_t end_addr = src_addr + len; 2264 vm_offset_t pdnxt; 2265 vm_page_t m; 2266 2267 if (dst_addr != src_addr) 2268 return; 2269 2270 if (!pmap_is_current(src_pmap)) 2271 return; 2272 2273 for (addr = src_addr; addr < end_addr; addr = pdnxt) { 2274 pt_entry_t *src_pte, *dst_pte; 2275 vm_page_t dstmpte, srcmpte; 2276 pd_entry_t srcptepaddr; 2277 unsigned ptepindex; 2278 2279 if (addr >= UPT_MIN_ADDRESS) 2280 panic("pmap_copy: invalid to pmap_copy page tables\n"); 2281 2282 /* 2283 * Don't let optional prefaulting of pages make us go 2284 * way below the low water mark of free pages or way 2285 * above high water mark of used pv entries. 2286 */ 2287 if (cnt.v_free_count < cnt.v_free_reserved || 2288 pv_entry_count > pv_entry_high_water) 2289 break; 2290 2291 pdnxt = (addr + NBPDR) & ~PDRMASK; 2292 ptepindex = addr >> PDRSHIFT; 2293 2294 srcptepaddr = src_pmap->pm_pdir[ptepindex]; 2295 if (srcptepaddr == 0) 2296 continue; 2297 2298 if (srcptepaddr & PG_PS) { 2299 if (dst_pmap->pm_pdir[ptepindex] == 0) { 2300 dst_pmap->pm_pdir[ptepindex] = srcptepaddr; 2301 dst_pmap->pm_stats.resident_count += 2302 NBPDR / PAGE_SIZE; 2303 } 2304 continue; 2305 } 2306 2307 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); 2308 if (srcmpte->hold_count == 0 || (srcmpte->flags & PG_BUSY)) 2309 continue; 2310 2311 if (pdnxt > end_addr) 2312 pdnxt = end_addr; 2313 2314 src_pte = vtopte(addr); 2315 while (addr < pdnxt) { 2316 pt_entry_t ptetemp; 2317 ptetemp = *src_pte; 2318 /* 2319 * we only virtual copy managed pages 2320 */ 2321 if ((ptetemp & PG_MANAGED) != 0) { 2322 /* 2323 * We have to check after allocpte for the 2324 * pte still being around... allocpte can 2325 * block. 2326 */ 2327 dstmpte = pmap_allocpte(dst_pmap, addr); 2328 dst_pte = pmap_pte(dst_pmap, addr); 2329 if ((*dst_pte == 0) && (ptetemp = *src_pte)) { 2330 /* 2331 * Clear the modified and 2332 * accessed (referenced) bits 2333 * during the copy. 2334 */ 2335 m = PHYS_TO_VM_PAGE(ptetemp); 2336 *dst_pte = ptetemp & ~(PG_M | PG_A); 2337 dst_pmap->pm_stats.resident_count++; 2338 pmap_insert_entry(dst_pmap, addr, 2339 dstmpte, m); 2340 } else { 2341 vm_page_lock_queues(); 2342 pmap_unwire_pte_hold(dst_pmap, dstmpte); 2343 vm_page_unlock_queues(); 2344 } 2345 if (dstmpte->hold_count >= srcmpte->hold_count) 2346 break; 2347 } 2348 addr += PAGE_SIZE; 2349 src_pte++; 2350 } 2351 } 2352} 2353 2354static __inline void 2355pagezero(void *page) 2356{ 2357#if defined(I686_CPU) 2358 if (cpu_class == CPUCLASS_686) { 2359#if defined(CPU_ENABLE_SSE) 2360 if (cpu_feature & CPUID_SSE2) 2361 sse2_pagezero(page); 2362 else 2363#endif 2364 i686_pagezero(page); 2365 } else 2366#endif 2367 bzero(page, PAGE_SIZE); 2368} 2369 2370/* 2371 * pmap_zero_page zeros the specified hardware page by mapping 2372 * the page into KVM and using bzero to clear its contents. 2373 */ 2374void 2375pmap_zero_page(vm_page_t m) 2376{ 2377 2378 mtx_lock(&CMAPCADDR12_lock); 2379 if (*CMAP2) 2380 panic("pmap_zero_page: CMAP2 busy"); 2381 sched_pin(); 2382 *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; 2383 invlcaddr(CADDR2); 2384 pagezero(CADDR2); 2385 *CMAP2 = 0; 2386 sched_unpin(); 2387 mtx_unlock(&CMAPCADDR12_lock); 2388} 2389 2390/* 2391 * pmap_zero_page_area zeros the specified hardware page by mapping 2392 * the page into KVM and using bzero to clear its contents. 2393 * 2394 * off and size may not cover an area beyond a single hardware page. 2395 */ 2396void 2397pmap_zero_page_area(vm_page_t m, int off, int size) 2398{ 2399 2400 mtx_lock(&CMAPCADDR12_lock); 2401 if (*CMAP2) 2402 panic("pmap_zero_page: CMAP2 busy"); 2403 sched_pin(); 2404 *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; 2405 invlcaddr(CADDR2); 2406 if (off == 0 && size == PAGE_SIZE) 2407 pagezero(CADDR2); 2408 else 2409 bzero((char *)CADDR2 + off, size); 2410 *CMAP2 = 0; 2411 sched_unpin(); 2412 mtx_unlock(&CMAPCADDR12_lock); 2413} 2414 2415/* 2416 * pmap_zero_page_idle zeros the specified hardware page by mapping 2417 * the page into KVM and using bzero to clear its contents. This 2418 * is intended to be called from the vm_pagezero process only and 2419 * outside of Giant. 2420 */ 2421void 2422pmap_zero_page_idle(vm_page_t m) 2423{ 2424 2425 if (*CMAP3) 2426 panic("pmap_zero_page: CMAP3 busy"); 2427 sched_pin(); 2428 *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M; 2429 invlcaddr(CADDR3); 2430 pagezero(CADDR3); 2431 *CMAP3 = 0; 2432 sched_unpin(); 2433} 2434 2435/* 2436 * pmap_copy_page copies the specified (machine independent) 2437 * page by mapping the page into virtual memory and using 2438 * bcopy to copy the page, one machine dependent page at a 2439 * time. 2440 */ 2441void 2442pmap_copy_page(vm_page_t src, vm_page_t dst) 2443{ 2444 2445 mtx_lock(&CMAPCADDR12_lock); 2446 if (*CMAP1) 2447 panic("pmap_copy_page: CMAP1 busy"); 2448 if (*CMAP2) 2449 panic("pmap_copy_page: CMAP2 busy"); 2450 sched_pin(); 2451#ifdef I386_CPU 2452 invltlb(); 2453#else 2454 invlpg((u_int)CADDR1); 2455 invlpg((u_int)CADDR2); 2456#endif 2457 *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A; 2458 *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M; 2459 bcopy(CADDR1, CADDR2, PAGE_SIZE); 2460 *CMAP1 = 0; 2461 *CMAP2 = 0; 2462 sched_unpin(); 2463 mtx_unlock(&CMAPCADDR12_lock); 2464} 2465 2466/* 2467 * Returns true if the pmap's pv is one of the first 2468 * 16 pvs linked to from this page. This count may 2469 * be changed upwards or downwards in the future; it 2470 * is only necessary that true be returned for a small 2471 * subset of pmaps for proper page aging. 2472 */ 2473boolean_t 2474pmap_page_exists_quick(pmap, m) 2475 pmap_t pmap; 2476 vm_page_t m; 2477{ 2478 pv_entry_t pv; 2479 int loops = 0; 2480 int s; 2481 2482 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2483 return FALSE; 2484 2485 s = splvm(); 2486 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2487 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2488 if (pv->pv_pmap == pmap) { 2489 splx(s); 2490 return TRUE; 2491 } 2492 loops++; 2493 if (loops >= 16) 2494 break; 2495 } 2496 splx(s); 2497 return (FALSE); 2498} 2499 2500#define PMAP_REMOVE_PAGES_CURPROC_ONLY 2501/* 2502 * Remove all pages from specified address space 2503 * this aids process exit speeds. Also, this code 2504 * is special cased for current process only, but 2505 * can have the more generic (and slightly slower) 2506 * mode enabled. This is much faster than pmap_remove 2507 * in the case of running down an entire address space. 2508 */ 2509void 2510pmap_remove_pages(pmap, sva, eva) 2511 pmap_t pmap; 2512 vm_offset_t sva, eva; 2513{ 2514 pt_entry_t *pte, tpte; 2515 vm_page_t m; 2516 pv_entry_t pv, npv; 2517 int s; 2518 2519#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2520 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { 2521 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2522 return; 2523 } 2524#endif 2525 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2526 s = splvm(); 2527 sched_pin(); 2528 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2529 2530 if (pv->pv_va >= eva || pv->pv_va < sva) { 2531 npv = TAILQ_NEXT(pv, pv_plist); 2532 continue; 2533 } 2534 2535#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2536 pte = vtopte(pv->pv_va); 2537#else 2538 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 2539#endif 2540 tpte = *pte; 2541 2542 if (tpte == 0) { 2543 printf("TPTE at %p IS ZERO @ VA %08x\n", 2544 pte, pv->pv_va); 2545 panic("bad pte"); 2546 } 2547 2548/* 2549 * We cannot remove wired pages from a process' mapping at this time 2550 */ 2551 if (tpte & PG_W) { 2552 npv = TAILQ_NEXT(pv, pv_plist); 2553 continue; 2554 } 2555 2556 m = PHYS_TO_VM_PAGE(tpte); 2557 KASSERT(m->phys_addr == (tpte & PG_FRAME), 2558 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2559 m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); 2560 2561 KASSERT(m < &vm_page_array[vm_page_array_size], 2562 ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); 2563 2564 pv->pv_pmap->pm_stats.resident_count--; 2565 2566 pte_clear(pte); 2567 2568 /* 2569 * Update the vm_page_t clean and reference bits. 2570 */ 2571 if (tpte & PG_M) { 2572 vm_page_dirty(m); 2573 } 2574 2575 npv = TAILQ_NEXT(pv, pv_plist); 2576 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2577 2578 m->md.pv_list_count--; 2579 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2580 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 2581 vm_page_flag_clear(m, PG_WRITEABLE); 2582 } 2583 2584 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2585 free_pv_entry(pv); 2586 } 2587 sched_unpin(); 2588 splx(s); 2589 pmap_invalidate_all(pmap); 2590} 2591 2592/* 2593 * pmap_is_modified: 2594 * 2595 * Return whether or not the specified physical page was modified 2596 * in any physical maps. 2597 */ 2598boolean_t 2599pmap_is_modified(vm_page_t m) 2600{ 2601 pv_entry_t pv; 2602 pt_entry_t *pte; 2603 int s; 2604 2605 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2606 return FALSE; 2607 2608 s = splvm(); 2609 sched_pin(); 2610 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2611 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2612 /* 2613 * if the bit being tested is the modified bit, then 2614 * mark clean_map and ptes as never 2615 * modified. 2616 */ 2617 if (!pmap_track_modified(pv->pv_va)) 2618 continue; 2619#if defined(PMAP_DIAGNOSTIC) 2620 if (!pv->pv_pmap) { 2621 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 2622 continue; 2623 } 2624#endif 2625 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 2626 if (*pte & PG_M) { 2627 sched_unpin(); 2628 splx(s); 2629 return TRUE; 2630 } 2631 } 2632 sched_unpin(); 2633 splx(s); 2634 return (FALSE); 2635} 2636 2637/* 2638 * pmap_is_prefaultable: 2639 * 2640 * Return whether or not the specified virtual address is elgible 2641 * for prefault. 2642 */ 2643boolean_t 2644pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2645{ 2646 pt_entry_t *pte; 2647 2648 if ((*pmap_pde(pmap, addr)) == 0) 2649 return (FALSE); 2650 pte = vtopte(addr); 2651 if (*pte) 2652 return (FALSE); 2653 return (TRUE); 2654} 2655 2656/* 2657 * Clear the given bit in each of the given page's ptes. 2658 */ 2659static __inline void 2660pmap_clear_ptes(vm_page_t m, int bit) 2661{ 2662 register pv_entry_t pv; 2663 pt_entry_t pbits, *pte; 2664 int s; 2665 2666 if (!pmap_initialized || (m->flags & PG_FICTITIOUS) || 2667 (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) 2668 return; 2669 2670 s = splvm(); 2671 sched_pin(); 2672 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2673 /* 2674 * Loop over all current mappings setting/clearing as appropos If 2675 * setting RO do we need to clear the VAC? 2676 */ 2677 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2678 /* 2679 * don't write protect pager mappings 2680 */ 2681 if (bit == PG_RW) { 2682 if (!pmap_track_modified(pv->pv_va)) 2683 continue; 2684 } 2685 2686#if defined(PMAP_DIAGNOSTIC) 2687 if (!pv->pv_pmap) { 2688 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 2689 continue; 2690 } 2691#endif 2692 2693 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 2694 pbits = *pte; 2695 if (pbits & bit) { 2696 if (bit == PG_RW) { 2697 if (pbits & PG_M) { 2698 vm_page_dirty(m); 2699 } 2700 pte_store(pte, pbits & ~(PG_M|PG_RW)); 2701 } else { 2702 pte_store(pte, pbits & ~bit); 2703 } 2704 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2705 } 2706 } 2707 if (bit == PG_RW) 2708 vm_page_flag_clear(m, PG_WRITEABLE); 2709 sched_unpin(); 2710 splx(s); 2711} 2712 2713/* 2714 * pmap_page_protect: 2715 * 2716 * Lower the permission for all mappings to a given page. 2717 */ 2718void 2719pmap_page_protect(vm_page_t m, vm_prot_t prot) 2720{ 2721 if ((prot & VM_PROT_WRITE) == 0) { 2722 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 2723 pmap_clear_ptes(m, PG_RW); 2724 } else { 2725 pmap_remove_all(m); 2726 } 2727 } 2728} 2729 2730/* 2731 * pmap_ts_referenced: 2732 * 2733 * Return a count of reference bits for a page, clearing those bits. 2734 * It is not necessary for every reference bit to be cleared, but it 2735 * is necessary that 0 only be returned when there are truly no 2736 * reference bits set. 2737 * 2738 * XXX: The exact number of bits to check and clear is a matter that 2739 * should be tested and standardized at some point in the future for 2740 * optimal aging of shared pages. 2741 */ 2742int 2743pmap_ts_referenced(vm_page_t m) 2744{ 2745 register pv_entry_t pv, pvf, pvn; 2746 pt_entry_t *pte; 2747 pt_entry_t v; 2748 int s; 2749 int rtval = 0; 2750 2751 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2752 return (rtval); 2753 2754 s = splvm(); 2755 sched_pin(); 2756 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2757 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2758 2759 pvf = pv; 2760 2761 do { 2762 pvn = TAILQ_NEXT(pv, pv_list); 2763 2764 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2765 2766 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2767 2768 if (!pmap_track_modified(pv->pv_va)) 2769 continue; 2770 2771 pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va); 2772 2773 if (pte && ((v = pte_load(pte)) & PG_A) != 0) { 2774 atomic_clear_int((u_int *)pte, PG_A); 2775 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2776 2777 rtval++; 2778 if (rtval > 4) { 2779 break; 2780 } 2781 } 2782 } while ((pv = pvn) != NULL && pv != pvf); 2783 } 2784 sched_unpin(); 2785 splx(s); 2786 2787 return (rtval); 2788} 2789 2790/* 2791 * Clear the modify bits on the specified physical page. 2792 */ 2793void 2794pmap_clear_modify(vm_page_t m) 2795{ 2796 pmap_clear_ptes(m, PG_M); 2797} 2798 2799/* 2800 * pmap_clear_reference: 2801 * 2802 * Clear the reference bit on the specified physical page. 2803 */ 2804void 2805pmap_clear_reference(vm_page_t m) 2806{ 2807 pmap_clear_ptes(m, PG_A); 2808} 2809 2810/* 2811 * Miscellaneous support routines follow 2812 */ 2813 2814/* 2815 * Map a set of physical memory pages into the kernel virtual 2816 * address space. Return a pointer to where it is mapped. This 2817 * routine is intended to be used for mapping device memory, 2818 * NOT real memory. 2819 */ 2820void * 2821pmap_mapdev(pa, size) 2822 vm_paddr_t pa; 2823 vm_size_t size; 2824{ 2825 vm_offset_t va, tmpva, offset; 2826 2827 offset = pa & PAGE_MASK; 2828 size = roundup(offset + size, PAGE_SIZE); 2829 pa = pa & PG_FRAME; 2830 2831 if (pa < KERNLOAD && pa + size <= KERNLOAD) 2832 va = KERNBASE + pa; 2833 else 2834 va = kmem_alloc_nofault(kernel_map, size); 2835 if (!va) 2836 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2837 2838 for (tmpva = va; size > 0; ) { 2839 pmap_kenter(tmpva, pa); 2840 size -= PAGE_SIZE; 2841 tmpva += PAGE_SIZE; 2842 pa += PAGE_SIZE; 2843 } 2844 pmap_invalidate_range(kernel_pmap, va, tmpva); 2845 return ((void *)(va + offset)); 2846} 2847 2848void 2849pmap_unmapdev(va, size) 2850 vm_offset_t va; 2851 vm_size_t size; 2852{ 2853 vm_offset_t base, offset, tmpva; 2854 2855 if (va >= KERNBASE && va + size <= KERNBASE + KERNLOAD) 2856 return; 2857 base = va & PG_FRAME; 2858 offset = va & PAGE_MASK; 2859 size = roundup(offset + size, PAGE_SIZE); 2860 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) 2861 pmap_kremove(tmpva); 2862 pmap_invalidate_range(kernel_pmap, va, tmpva); 2863 kmem_free(kernel_map, base, size); 2864} 2865 2866/* 2867 * perform the pmap work for mincore 2868 */ 2869int 2870pmap_mincore(pmap, addr) 2871 pmap_t pmap; 2872 vm_offset_t addr; 2873{ 2874 pt_entry_t *ptep, pte; 2875 vm_page_t m; 2876 int val = 0; 2877 2878 ptep = pmap_pte(pmap, addr); 2879 if (ptep == 0) { 2880 return 0; 2881 } 2882 2883 if ((pte = *ptep) != 0) { 2884 vm_paddr_t pa; 2885 2886 val = MINCORE_INCORE; 2887 if ((pte & PG_MANAGED) == 0) 2888 return val; 2889 2890 pa = pte & PG_FRAME; 2891 2892 m = PHYS_TO_VM_PAGE(pa); 2893 2894 /* 2895 * Modified by us 2896 */ 2897 if (pte & PG_M) 2898 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 2899 else { 2900 /* 2901 * Modified by someone else 2902 */ 2903 vm_page_lock_queues(); 2904 if (m->dirty || pmap_is_modified(m)) 2905 val |= MINCORE_MODIFIED_OTHER; 2906 vm_page_unlock_queues(); 2907 } 2908 /* 2909 * Referenced by us 2910 */ 2911 if (pte & PG_A) 2912 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 2913 else { 2914 /* 2915 * Referenced by someone else 2916 */ 2917 vm_page_lock_queues(); 2918 if ((m->flags & PG_REFERENCED) || 2919 pmap_ts_referenced(m)) { 2920 val |= MINCORE_REFERENCED_OTHER; 2921 vm_page_flag_set(m, PG_REFERENCED); 2922 } 2923 vm_page_unlock_queues(); 2924 } 2925 } 2926 return val; 2927} 2928 2929void 2930pmap_activate(struct thread *td) 2931{ 2932 struct proc *p = td->td_proc; 2933 pmap_t pmap, oldpmap; 2934 u_int32_t cr3; 2935 2936 critical_enter(); 2937 pmap = vmspace_pmap(td->td_proc->p_vmspace); 2938 oldpmap = PCPU_GET(curpmap); 2939#if defined(SMP) 2940 atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); 2941 atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); 2942#else 2943 oldpmap->pm_active &= ~1; 2944 pmap->pm_active |= 1; 2945#endif 2946#ifdef PAE 2947 cr3 = vtophys(pmap->pm_pdpt); 2948#else 2949 cr3 = vtophys(pmap->pm_pdir); 2950#endif 2951 /* XXXKSE this is wrong. 2952 * pmap_activate is for the current thread on the current cpu 2953 */ 2954 if (p->p_flag & P_SA) { 2955 /* Make sure all other cr3 entries are updated. */ 2956 /* what if they are running? XXXKSE (maybe abort them) */ 2957 FOREACH_THREAD_IN_PROC(p, td) { 2958 td->td_pcb->pcb_cr3 = cr3; 2959 } 2960 } else { 2961 td->td_pcb->pcb_cr3 = cr3; 2962 } 2963 load_cr3(cr3); 2964 PCPU_SET(curpmap, pmap); 2965 critical_exit(); 2966} 2967 2968vm_offset_t 2969pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 2970{ 2971 2972 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 2973 return addr; 2974 } 2975 2976 addr = (addr + PDRMASK) & ~PDRMASK; 2977 return addr; 2978} 2979 2980 2981#if defined(PMAP_DEBUG) 2982pmap_pid_dump(int pid) 2983{ 2984 pmap_t pmap; 2985 struct proc *p; 2986 int npte = 0; 2987 int index; 2988 2989 sx_slock(&allproc_lock); 2990 LIST_FOREACH(p, &allproc, p_list) { 2991 if (p->p_pid != pid) 2992 continue; 2993 2994 if (p->p_vmspace) { 2995 int i,j; 2996 index = 0; 2997 pmap = vmspace_pmap(p->p_vmspace); 2998 for (i = 0; i < NPDEPTD; i++) { 2999 pd_entry_t *pde; 3000 pt_entry_t *pte; 3001 vm_offset_t base = i << PDRSHIFT; 3002 3003 pde = &pmap->pm_pdir[i]; 3004 if (pde && pmap_pde_v(pde)) { 3005 for (j = 0; j < NPTEPG; j++) { 3006 vm_offset_t va = base + (j << PAGE_SHIFT); 3007 if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) { 3008 if (index) { 3009 index = 0; 3010 printf("\n"); 3011 } 3012 sx_sunlock(&allproc_lock); 3013 return npte; 3014 } 3015 pte = pmap_pte(pmap, va); 3016 if (pte && pmap_pte_v(pte)) { 3017 pt_entry_t pa; 3018 vm_page_t m; 3019 pa = *pte; 3020 m = PHYS_TO_VM_PAGE(pa); 3021 printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x", 3022 va, pa, m->hold_count, m->wire_count, m->flags); 3023 npte++; 3024 index++; 3025 if (index >= 2) { 3026 index = 0; 3027 printf("\n"); 3028 } else { 3029 printf(" "); 3030 } 3031 } 3032 } 3033 } 3034 } 3035 } 3036 } 3037 sx_sunlock(&allproc_lock); 3038 return npte; 3039} 3040#endif 3041 3042#if defined(DEBUG) 3043 3044static void pads(pmap_t pm); 3045void pmap_pvdump(vm_offset_t pa); 3046 3047/* print address space of pmap*/ 3048static void 3049pads(pm) 3050 pmap_t pm; 3051{ 3052 int i, j; 3053 vm_paddr_t va; 3054 pt_entry_t *ptep; 3055 3056 if (pm == kernel_pmap) 3057 return; 3058 for (i = 0; i < NPDEPTD; i++) 3059 if (pm->pm_pdir[i]) 3060 for (j = 0; j < NPTEPG; j++) { 3061 va = (i << PDRSHIFT) + (j << PAGE_SHIFT); 3062 if (pm == kernel_pmap && va < KERNBASE) 3063 continue; 3064 if (pm != kernel_pmap && va > UPT_MAX_ADDRESS) 3065 continue; 3066 ptep = pmap_pte(pm, va); 3067 if (pmap_pte_v(ptep)) 3068 printf("%x:%x ", va, *ptep); 3069 }; 3070 3071} 3072 3073void 3074pmap_pvdump(pa) 3075 vm_paddr_t pa; 3076{ 3077 pv_entry_t pv; 3078 vm_page_t m; 3079 3080 printf("pa %x", pa); 3081 m = PHYS_TO_VM_PAGE(pa); 3082 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 3083 printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va); 3084 pads(pv->pv_pmap); 3085 } 3086 printf(" "); 3087} 3088#endif 3089