pmap.c revision 134233
152419Sjulian/*- 252419Sjulian * Copyright (c) 1991 Regents of the University of California. 352419Sjulian * All rights reserved. 452419Sjulian * Copyright (c) 1994 John S. Dyson 552419Sjulian * All rights reserved. 652419Sjulian * Copyright (c) 1994 David Greenman 752419Sjulian * All rights reserved. 852419Sjulian * Copyright (c) 2003 Peter Wemm 952419Sjulian * All rights reserved. 1052419Sjulian * 1152419Sjulian * This code is derived from software contributed to Berkeley by 1252419Sjulian * the Systems Programming Group of the University of Utah Computer 1352419Sjulian * Science Department and William Jolitz of UUNET Technologies Inc. 1452419Sjulian * 1552419Sjulian * Redistribution and use in source and binary forms, with or without 1652419Sjulian * modification, are permitted provided that the following conditions 1752419Sjulian * are met: 1852419Sjulian * 1. Redistributions of source code must retain the above copyright 1952419Sjulian * notice, this list of conditions and the following disclaimer. 2052419Sjulian * 2. Redistributions in binary form must reproduce the above copyright 2152419Sjulian * notice, this list of conditions and the following disclaimer in the 2252419Sjulian * documentation and/or other materials provided with the distribution. 2352419Sjulian * 3. All advertising materials mentioning features or use of this software 2452419Sjulian * must display the following acknowledgement: 2552419Sjulian * This product includes software developed by the University of 2652419Sjulian * California, Berkeley and its contributors. 2752419Sjulian * 4. Neither the name of the University nor the names of its contributors 2852419Sjulian * may be used to endorse or promote products derived from this software 2952419Sjulian * without specific prior written permission. 3052419Sjulian * 3152419Sjulian * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 3252419Sjulian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 3352419Sjulian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3452419Sjulian * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 3552419Sjulian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3652419Sjulian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3767506Sjulian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3852419Sjulian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3952419Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 4052752Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 4152419Sjulian * SUCH DAMAGE. 4252419Sjulian * 4352419Sjulian * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 4452419Sjulian */ 4552419Sjulian/*- 4652419Sjulian * Copyright (c) 2003 Networks Associates Technology, Inc. 4752419Sjulian * All rights reserved. 4852419Sjulian * 4952419Sjulian * This software was developed for the FreeBSD Project by Jake Burkholder, 5052419Sjulian * Safeport Network Services, and Network Associates Laboratories, the 5152419Sjulian * Security Research Division of Network Associates, Inc. under 5252419Sjulian * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 5352419Sjulian * CHATS research program. 5452419Sjulian * 5552419Sjulian * Redistribution and use in source and binary forms, with or without 5652419Sjulian * modification, are permitted provided that the following conditions 5752419Sjulian * are met: 5852419Sjulian * 1. Redistributions of source code must retain the above copyright 5952419Sjulian * notice, this list of conditions and the following disclaimer. 6052419Sjulian * 2. Redistributions in binary form must reproduce the above copyright 6152419Sjulian * notice, this list of conditions and the following disclaimer in the 6252419Sjulian * documentation and/or other materials provided with the distribution. 6352419Sjulian * 6452419Sjulian * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 6552419Sjulian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 6652419Sjulian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 6752419Sjulian * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 6852419Sjulian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 6952419Sjulian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 7052419Sjulian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 7152419Sjulian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 7252419Sjulian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 7352419Sjulian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 7452419Sjulian * SUCH DAMAGE. 7552419Sjulian */ 7652419Sjulian 7752419Sjulian#include <sys/cdefs.h> 7852419Sjulian__FBSDID("$FreeBSD: head/sys/amd64/amd64/pmap.c 134233 2004-08-24 00:17:52Z peter $"); 7952419Sjulian 8052419Sjulian/* 8152419Sjulian * Manages physical address maps. 8253407Sarchie * 8352419Sjulian * In addition to hardware address maps, this 8452419Sjulian * module is called upon to provide software-use-only 8552419Sjulian * maps which may or may not be stored in the same 8652419Sjulian * form as hardware maps. These pseudo-maps are 8753407Sarchie * used to store intermediate results from copy 8852419Sjulian * operations to and from address spaces. 8952419Sjulian * 9052752Sjulian * Since the information managed by this module is 9152752Sjulian * also stored by the logical address mapping module, 9252752Sjulian * this module may throw away valid virtual-to-physical 9352752Sjulian * mappings at almost any time. However, invalidations 9452752Sjulian * of virtual-to-physical mappings must be done as 9552752Sjulian * requested. 9652419Sjulian * 9752419Sjulian * In order to cope with hardware architectures which 9852419Sjulian * make virtual-to-physical map invalidates expensive, 9952419Sjulian * this module may delay invalidate or reduced protection 10052419Sjulian * operations until such time as they are actually 10152419Sjulian * necessary. This module is given full information as 10252419Sjulian * to which processors are currently using which maps, 10352419Sjulian * and to when physical maps must be made correct. 10452419Sjulian */ 10552419Sjulian 10652419Sjulian#include "opt_msgbuf.h" 10752419Sjulian#include "opt_kstack_pages.h" 10852419Sjulian 10952419Sjulian#include <sys/param.h> 11053913Sarchie#include <sys/systm.h> 11153913Sarchie#include <sys/kernel.h> 11252419Sjulian#include <sys/lock.h> 11352419Sjulian#include <sys/mman.h> 11452419Sjulian#include <sys/msgbuf.h> 11552419Sjulian#include <sys/mutex.h> 11652419Sjulian#include <sys/proc.h> 11752419Sjulian#include <sys/sx.h> 11852419Sjulian#include <sys/user.h> 11952419Sjulian#include <sys/vmmeter.h> 12052419Sjulian#include <sys/sched.h> 12152419Sjulian#include <sys/sysctl.h> 12252419Sjulian#ifdef SMP 12352419Sjulian#include <sys/smp.h> 12452419Sjulian#endif 12552419Sjulian 12652419Sjulian#include <vm/vm.h> 12752419Sjulian#include <vm/vm_param.h> 12852419Sjulian#include <vm/vm_kern.h> 12966182Sarchie#include <vm/vm_page.h> 13052419Sjulian#include <vm/vm_map.h> 13152419Sjulian#include <vm/vm_object.h> 13252419Sjulian#include <vm/vm_extern.h> 13352419Sjulian#include <vm/vm_pageout.h> 13452419Sjulian#include <vm/vm_pager.h> 13552419Sjulian#include <vm/uma.h> 13652419Sjulian 13752419Sjulian#include <machine/cpu.h> 13852419Sjulian#include <machine/cputypes.h> 13952419Sjulian#include <machine/md_var.h> 14052419Sjulian#include <machine/specialreg.h> 14152419Sjulian#ifdef SMP 14252419Sjulian#include <machine/smp.h> 14352419Sjulian#endif 14452419Sjulian 14552419Sjulian#ifndef PMAP_SHPGPERPROC 14652419Sjulian#define PMAP_SHPGPERPROC 200 14752419Sjulian#endif 14852419Sjulian 14952419Sjulian#if defined(DIAGNOSTIC) 15052419Sjulian#define PMAP_DIAGNOSTIC 15152419Sjulian#endif 15252419Sjulian 15352419Sjulian#define MINPV 2048 15452419Sjulian 15552419Sjulian#if !defined(PMAP_DIAGNOSTIC) 15652419Sjulian#define PMAP_INLINE __inline 15752419Sjulian#else 15852419Sjulian#define PMAP_INLINE 15952419Sjulian#endif 16052419Sjulian 16152419Sjulianstruct pmap kernel_pmap_store; 16252419SjulianLIST_HEAD(pmaplist, pmap); 16352419Sjulian 16452419Sjulianvm_paddr_t avail_start; /* PA of first available physical page */ 16552419Sjulianvm_paddr_t avail_end; /* PA of last available physical page */ 16652419Sjulianvm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 16752419Sjulianvm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 16852419Sjulianstatic boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 16952419Sjulian 17052419Sjulianstatic int nkpt; 17152419Sjulianstatic int ndmpdp; 17252419Sjulianstatic vm_paddr_t dmaplimit; 17352419Sjulianvm_offset_t kernel_vm_end; 17452419Sjulianpt_entry_t pg_nx; 17559728Sjulian 17652419Sjulianstatic u_int64_t KPTphys; /* phys addr of kernel level 1 */ 17752419Sjulianstatic u_int64_t KPDphys; /* phys addr of kernel level 2 */ 17852419Sjulianstatic u_int64_t KPDPphys; /* phys addr of kernel level 3 */ 17952419Sjulianu_int64_t KPML4phys; /* phys addr of kernel level 4 */ 18052419Sjulian 18152419Sjulianstatic u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ 18252419Sjulianstatic u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ 18352419Sjulian 18452419Sjulian/* 18552419Sjulian * Data for the pv entry allocation mechanism 18652419Sjulian */ 18752419Sjulianstatic uma_zone_t pvzone; 18852419Sjulianstatic struct vm_object pvzone_obj; 18952419Sjulianstatic int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 19052419Sjulianint pmap_pagedaemon_waken; 19152419Sjulian 19252419Sjulian/* 19352419Sjulian * All those kernel PT submaps that BSD is so fond of 19452419Sjulian */ 19552419Sjulianpt_entry_t *CMAP1 = 0; 19652419Sjuliancaddr_t CADDR1 = 0; 19752419Sjulianstruct msgbuf *msgbufp = 0; 19852419Sjulian 19952419Sjulian/* 20052419Sjulian * Crashdump maps. 20152419Sjulian */ 20252419Sjulianstatic caddr_t crashdumpmap; 20352419Sjulian 20452419Sjulianstatic PMAP_INLINE void free_pv_entry(pv_entry_t pv); 20552419Sjulianstatic pv_entry_t get_pv_entry(void); 20652419Sjulianstatic void pmap_clear_ptes(vm_page_t m, int bit); 20752419Sjulian 20852419Sjulianstatic int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, 20952419Sjulian vm_offset_t sva, pd_entry_t ptepde); 21052419Sjulianstatic void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 21152419Sjulianstatic int pmap_remove_entry(struct pmap *pmap, vm_page_t m, 21252419Sjulian vm_offset_t va, pd_entry_t ptepde); 21352419Sjulianstatic void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); 21452419Sjulian 21552419Sjulianstatic vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va); 21652419Sjulian 21752419Sjulianstatic vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex); 21859728Sjulianstatic int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m); 21959728Sjulianstatic int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t); 22052419Sjulianstatic vm_offset_t pmap_kmem_choose(vm_offset_t addr); 22152419Sjulian 22252419SjulianCTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 22352419SjulianCTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 22452419Sjulian 22552419Sjulian/* 22652419Sjulian * Move the kernel virtual free pointer to the next 22752419Sjulian * 2MB. This is used to help improve performance 22852539Sjulian * by using a large (2MB) page for much of the kernel 22952539Sjulian * (.text, .data, .bss) 23052419Sjulian */ 23152419Sjulianstatic vm_offset_t 23252419Sjulianpmap_kmem_choose(vm_offset_t addr) 23352419Sjulian{ 23452419Sjulian vm_offset_t newaddr = addr; 23552419Sjulian 23652419Sjulian newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 23752419Sjulian return newaddr; 23852419Sjulian} 23952419Sjulian 24052419Sjulian/********************/ 24152419Sjulian/* Inline functions */ 24252419Sjulian/********************/ 24352419Sjulian 24452419Sjulian/* Return a non-clipped PD index for a given VA */ 24552419Sjulianstatic __inline vm_pindex_t 24652419Sjulianpmap_pde_pindex(vm_offset_t va) 24752419Sjulian{ 24852419Sjulian return va >> PDRSHIFT; 24952419Sjulian} 25052419Sjulian 25152419Sjulian 25252419Sjulian/* Return various clipped indexes for a given VA */ 25352419Sjulianstatic __inline vm_pindex_t 25452419Sjulianpmap_pte_index(vm_offset_t va) 25552419Sjulian{ 25652419Sjulian 25752419Sjulian return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 25852419Sjulian} 25952419Sjulian 26052419Sjulianstatic __inline vm_pindex_t 26152419Sjulianpmap_pde_index(vm_offset_t va) 26252419Sjulian{ 26352419Sjulian 26452419Sjulian return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 26552419Sjulian} 26652419Sjulian 26752419Sjulianstatic __inline vm_pindex_t 26852419Sjulianpmap_pdpe_index(vm_offset_t va) 26952419Sjulian{ 27052419Sjulian 27152419Sjulian return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 27252419Sjulian} 27352419Sjulian 27452419Sjulianstatic __inline vm_pindex_t 27552419Sjulianpmap_pml4e_index(vm_offset_t va) 27652419Sjulian{ 27752419Sjulian 27852419Sjulian return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 27952419Sjulian} 28052419Sjulian 28152419Sjulian/* Return a pointer to the PML4 slot that corresponds to a VA */ 28252419Sjulianstatic __inline pml4_entry_t * 28352419Sjulianpmap_pml4e(pmap_t pmap, vm_offset_t va) 28452419Sjulian{ 28552419Sjulian 28652419Sjulian if (!pmap) 28752419Sjulian return NULL; 28852419Sjulian return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 28952419Sjulian} 29052419Sjulian 29152419Sjulian/* Return a pointer to the PDP slot that corresponds to a VA */ 29252419Sjulianstatic __inline pdp_entry_t * 29352419Sjulianpmap_pdpe(pmap_t pmap, vm_offset_t va) 29452419Sjulian{ 29552419Sjulian pml4_entry_t *pml4e; 29652419Sjulian pdp_entry_t *pdpe; 29752419Sjulian 29852419Sjulian pml4e = pmap_pml4e(pmap, va); 29952419Sjulian if (pml4e == NULL || (*pml4e & PG_V) == 0) 30052419Sjulian return NULL; 30152419Sjulian pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME); 30252419Sjulian return (&pdpe[pmap_pdpe_index(va)]); 30352419Sjulian} 30452419Sjulian 30552419Sjulian/* Return a pointer to the PD slot that corresponds to a VA */ 30652419Sjulianstatic __inline pd_entry_t * 30752419Sjulianpmap_pde(pmap_t pmap, vm_offset_t va) 30852419Sjulian{ 30952419Sjulian pdp_entry_t *pdpe; 31052419Sjulian pd_entry_t *pde; 31152419Sjulian 31252419Sjulian pdpe = pmap_pdpe(pmap, va); 31352419Sjulian if (pdpe == NULL || (*pdpe & PG_V) == 0) 31452419Sjulian return NULL; 31552419Sjulian pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME); 31652419Sjulian return (&pde[pmap_pde_index(va)]); 31752419Sjulian} 31852419Sjulian 31952419Sjulian/* Return a pointer to the PT slot that corresponds to a VA */ 32052419Sjulianstatic __inline pt_entry_t * 32152419Sjulianpmap_pte(pmap_t pmap, vm_offset_t va) 32252419Sjulian{ 32352419Sjulian pd_entry_t *pde; 32452419Sjulian pt_entry_t *pte; 32552419Sjulian 32652419Sjulian pde = pmap_pde(pmap, va); 32752419Sjulian if (pde == NULL || (*pde & PG_V) == 0) 32852419Sjulian return NULL; 32952419Sjulian if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ 33052419Sjulian return ((pt_entry_t *)pde); 33152419Sjulian pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); 33252419Sjulian return (&pte[pmap_pte_index(va)]); 33352419Sjulian} 33452419Sjulian 33552419Sjulian 33652419Sjulianstatic __inline pt_entry_t * 33752419Sjulianpmap_pte_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *ptepde) 33852419Sjulian{ 33952419Sjulian pd_entry_t *pde; 34052419Sjulian pt_entry_t *pte; 34152419Sjulian 34252419Sjulian pde = pmap_pde(pmap, va); 34352419Sjulian if (pde == NULL || (*pde & PG_V) == 0) 34452419Sjulian return NULL; 34552419Sjulian *ptepde = *pde; 34652419Sjulian if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ 34752419Sjulian return ((pt_entry_t *)pde); 348 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); 349 return (&pte[pmap_pte_index(va)]); 350} 351 352 353PMAP_INLINE pt_entry_t * 354vtopte(vm_offset_t va) 355{ 356 u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 357 358 return (PTmap + ((va >> PAGE_SHIFT) & mask)); 359} 360 361static __inline pd_entry_t * 362vtopde(vm_offset_t va) 363{ 364 u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 365 366 return (PDmap + ((va >> PDRSHIFT) & mask)); 367} 368 369static u_int64_t 370allocpages(int n) 371{ 372 u_int64_t ret; 373 374 ret = avail_start; 375 bzero((void *)ret, n * PAGE_SIZE); 376 avail_start += n * PAGE_SIZE; 377 return (ret); 378} 379 380static void 381create_pagetables(void) 382{ 383 int i; 384 385 /* Allocate pages */ 386 KPTphys = allocpages(NKPT); 387 KPML4phys = allocpages(1); 388 KPDPphys = allocpages(NKPML4E); 389 KPDphys = allocpages(NKPDPE); 390 391 ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT; 392 if (ndmpdp < 4) /* Minimum 4GB of dirmap */ 393 ndmpdp = 4; 394 DMPDPphys = allocpages(NDMPML4E); 395 DMPDphys = allocpages(ndmpdp); 396 dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; 397 398 /* Fill in the underlying page table pages */ 399 /* Read-only from zero to physfree */ 400 /* XXX not fully used, underneath 2M pages */ 401 for (i = 0; (i << PAGE_SHIFT) < avail_start; i++) { 402 ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT; 403 ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V | PG_G; 404 } 405 406 /* Now map the page tables at their location within PTmap */ 407 for (i = 0; i < NKPT; i++) { 408 ((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT); 409 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V; 410 } 411 412 /* Map from zero to end of allocations under 2M pages */ 413 /* This replaces some of the KPTphys entries above */ 414 for (i = 0; (i << PDRSHIFT) < avail_start; i++) { 415 ((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT; 416 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G; 417 } 418 419 /* And connect up the PD to the PDP */ 420 for (i = 0; i < NKPDPE; i++) { 421 ((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i << PAGE_SHIFT); 422 ((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U; 423 } 424 425 426 /* Now set up the direct map space using 2MB pages */ 427 for (i = 0; i < NPDEPG * ndmpdp; i++) { 428 ((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i << PDRSHIFT; 429 ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G; 430 } 431 432 /* And the direct map space's PDP */ 433 for (i = 0; i < ndmpdp; i++) { 434 ((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i << PAGE_SHIFT); 435 ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U; 436 } 437 438 /* And recursively map PML4 to itself in order to get PTmap */ 439 ((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys; 440 ((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U; 441 442 /* Connect the Direct Map slot up to the PML4 */ 443 ((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys; 444 ((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U; 445 446 /* Connect the KVA slot up to the PML4 */ 447 ((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys; 448 ((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U; 449} 450 451/* 452 * Bootstrap the system enough to run with virtual memory. 453 * 454 * On amd64 this is called after mapping has already been enabled 455 * and just syncs the pmap module with what has already been done. 456 * [We can't call it easily with mapping off since the kernel is not 457 * mapped with PA == VA, hence we would have to relocate every address 458 * from the linked base (virtual) address "KERNBASE" to the actual 459 * (physical) address starting relative to 0] 460 */ 461void 462pmap_bootstrap(firstaddr) 463 vm_paddr_t *firstaddr; 464{ 465 vm_offset_t va; 466 pt_entry_t *pte, *unused; 467 468 avail_start = *firstaddr; 469 470 /* 471 * Create an initial set of page tables to run the kernel in. 472 */ 473 create_pagetables(); 474 *firstaddr = avail_start; 475 476 virtual_avail = (vm_offset_t) KERNBASE + avail_start; 477 virtual_avail = pmap_kmem_choose(virtual_avail); 478 479 virtual_end = VM_MAX_KERNEL_ADDRESS; 480 481 482 /* XXX do %cr0 as well */ 483 load_cr4(rcr4() | CR4_PGE | CR4_PSE); 484 load_cr3(KPML4phys); 485 486 /* 487 * Initialize the kernel pmap (which is statically allocated). 488 */ 489 PMAP_LOCK_INIT(kernel_pmap); 490 kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys); 491 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 492 TAILQ_INIT(&kernel_pmap->pm_pvlist); 493 nkpt = NKPT; 494 495 /* 496 * Reserve some special page table entries/VA space for temporary 497 * mapping of pages. 498 */ 499#define SYSMAP(c, p, v, n) \ 500 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 501 502 va = virtual_avail; 503 pte = vtopte(va); 504 505 /* 506 * CMAP1 is only used for the memory test. 507 */ 508 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 509 510 /* 511 * Crashdump maps. 512 */ 513 SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) 514 515 /* 516 * msgbufp is used to map the system message buffer. 517 */ 518 SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) 519 520 virtual_avail = va; 521 522 *CMAP1 = 0; 523 524 invltlb(); 525} 526 527/* 528 * Initialize the pmap module. 529 * Called by vm_init, to initialize any structures that the pmap 530 * system needs to map virtual memory. 531 * pmap_init has been enhanced to support in a fairly consistant 532 * way, discontiguous physical memory. 533 */ 534void 535pmap_init(void) 536{ 537 int i; 538 539 /* 540 * Allocate memory for random pmap data structures. Includes the 541 * pv_head_table. 542 */ 543 544 for(i = 0; i < vm_page_array_size; i++) { 545 vm_page_t m; 546 547 m = &vm_page_array[i]; 548 TAILQ_INIT(&m->md.pv_list); 549 m->md.pv_list_count = 0; 550 } 551 552 /* 553 * init the pv free list 554 */ 555 pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 556 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 557 uma_prealloc(pvzone, MINPV); 558 559 /* 560 * Now it is safe to enable pv_table recording. 561 */ 562 pmap_initialized = TRUE; 563} 564 565/* 566 * Initialize the address space (zone) for the pv_entries. Set a 567 * high water mark so that the system can recover from excessive 568 * numbers of pv entries. 569 */ 570void 571pmap_init2() 572{ 573 int shpgperproc = PMAP_SHPGPERPROC; 574 575 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 576 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 577 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 578 pv_entry_high_water = 9 * (pv_entry_max / 10); 579 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 580} 581 582 583/*************************************************** 584 * Low level helper routines..... 585 ***************************************************/ 586 587#if defined(PMAP_DIAGNOSTIC) 588 589/* 590 * This code checks for non-writeable/modified pages. 591 * This should be an invalid condition. 592 */ 593static int 594pmap_nw_modified(pt_entry_t ptea) 595{ 596 int pte; 597 598 pte = (int) ptea; 599 600 if ((pte & (PG_M|PG_RW)) == PG_M) 601 return 1; 602 else 603 return 0; 604} 605#endif 606 607 608/* 609 * this routine defines the region(s) of memory that should 610 * not be tested for the modified bit. 611 */ 612static PMAP_INLINE int 613pmap_track_modified(vm_offset_t va) 614{ 615 if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 616 return 1; 617 else 618 return 0; 619} 620 621#ifdef SMP 622/* 623 * For SMP, these functions have to use the IPI mechanism for coherence. 624 */ 625void 626pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 627{ 628 u_int cpumask; 629 u_int other_cpus; 630 631 if (smp_started) { 632 if (!(read_rflags() & PSL_I)) 633 panic("%s: interrupts disabled", __func__); 634 mtx_lock_spin(&smp_rv_mtx); 635 } else 636 critical_enter(); 637 /* 638 * We need to disable interrupt preemption but MUST NOT have 639 * interrupts disabled here. 640 * XXX we may need to hold schedlock to get a coherent pm_active 641 * XXX critical sections disable interrupts again 642 */ 643 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 644 invlpg(va); 645 smp_invlpg(va); 646 } else { 647 cpumask = PCPU_GET(cpumask); 648 other_cpus = PCPU_GET(other_cpus); 649 if (pmap->pm_active & cpumask) 650 invlpg(va); 651 if (pmap->pm_active & other_cpus) 652 smp_masked_invlpg(pmap->pm_active & other_cpus, va); 653 } 654 if (smp_started) 655 mtx_unlock_spin(&smp_rv_mtx); 656 else 657 critical_exit(); 658} 659 660void 661pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 662{ 663 u_int cpumask; 664 u_int other_cpus; 665 vm_offset_t addr; 666 667 if (smp_started) { 668 if (!(read_rflags() & PSL_I)) 669 panic("%s: interrupts disabled", __func__); 670 mtx_lock_spin(&smp_rv_mtx); 671 } else 672 critical_enter(); 673 /* 674 * We need to disable interrupt preemption but MUST NOT have 675 * interrupts disabled here. 676 * XXX we may need to hold schedlock to get a coherent pm_active 677 * XXX critical sections disable interrupts again 678 */ 679 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 680 for (addr = sva; addr < eva; addr += PAGE_SIZE) 681 invlpg(addr); 682 smp_invlpg_range(sva, eva); 683 } else { 684 cpumask = PCPU_GET(cpumask); 685 other_cpus = PCPU_GET(other_cpus); 686 if (pmap->pm_active & cpumask) 687 for (addr = sva; addr < eva; addr += PAGE_SIZE) 688 invlpg(addr); 689 if (pmap->pm_active & other_cpus) 690 smp_masked_invlpg_range(pmap->pm_active & other_cpus, 691 sva, eva); 692 } 693 if (smp_started) 694 mtx_unlock_spin(&smp_rv_mtx); 695 else 696 critical_exit(); 697} 698 699void 700pmap_invalidate_all(pmap_t pmap) 701{ 702 u_int cpumask; 703 u_int other_cpus; 704 705 if (smp_started) { 706 if (!(read_rflags() & PSL_I)) 707 panic("%s: interrupts disabled", __func__); 708 mtx_lock_spin(&smp_rv_mtx); 709 } else 710 critical_enter(); 711 /* 712 * We need to disable interrupt preemption but MUST NOT have 713 * interrupts disabled here. 714 * XXX we may need to hold schedlock to get a coherent pm_active 715 * XXX critical sections disable interrupts again 716 */ 717 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 718 invltlb(); 719 smp_invltlb(); 720 } else { 721 cpumask = PCPU_GET(cpumask); 722 other_cpus = PCPU_GET(other_cpus); 723 if (pmap->pm_active & cpumask) 724 invltlb(); 725 if (pmap->pm_active & other_cpus) 726 smp_masked_invltlb(pmap->pm_active & other_cpus); 727 } 728 if (smp_started) 729 mtx_unlock_spin(&smp_rv_mtx); 730 else 731 critical_exit(); 732} 733#else /* !SMP */ 734/* 735 * Normal, non-SMP, invalidation functions. 736 * We inline these within pmap.c for speed. 737 */ 738PMAP_INLINE void 739pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 740{ 741 742 if (pmap == kernel_pmap || pmap->pm_active) 743 invlpg(va); 744} 745 746PMAP_INLINE void 747pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 748{ 749 vm_offset_t addr; 750 751 if (pmap == kernel_pmap || pmap->pm_active) 752 for (addr = sva; addr < eva; addr += PAGE_SIZE) 753 invlpg(addr); 754} 755 756PMAP_INLINE void 757pmap_invalidate_all(pmap_t pmap) 758{ 759 760 if (pmap == kernel_pmap || pmap->pm_active) 761 invltlb(); 762} 763#endif /* !SMP */ 764 765/* 766 * Are we current address space or kernel? 767 */ 768static __inline int 769pmap_is_current(pmap_t pmap) 770{ 771 return (pmap == kernel_pmap || 772 (pmap->pm_pml4[PML4PML4I] & PG_FRAME) == (PML4pml4e[0] & PG_FRAME)); 773} 774 775/* 776 * Routine: pmap_extract 777 * Function: 778 * Extract the physical page address associated 779 * with the given map/virtual_address pair. 780 */ 781vm_paddr_t 782pmap_extract(pmap_t pmap, vm_offset_t va) 783{ 784 vm_paddr_t rtval; 785 pt_entry_t *pte; 786 pd_entry_t pde, *pdep; 787 788 rtval = 0; 789 if (pmap == NULL) 790 return (rtval); 791 PMAP_LOCK(pmap); 792 pdep = pmap_pde(pmap, va); 793 if (pdep != NULL) { 794 pde = *pdep; 795 if (pde) { 796 if ((pde & PG_PS) != 0) { 797 rtval = (pde & ~PDRMASK) | (va & PDRMASK); 798 PMAP_UNLOCK(pmap); 799 return rtval; 800 } 801 pte = pmap_pte(pmap, va); 802 rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); 803 } 804 } 805 PMAP_UNLOCK(pmap); 806 return (rtval); 807} 808 809/* 810 * Routine: pmap_extract_and_hold 811 * Function: 812 * Atomically extract and hold the physical page 813 * with the given pmap and virtual address pair 814 * if that mapping permits the given protection. 815 */ 816vm_page_t 817pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 818{ 819 pd_entry_t pde, *pdep; 820 pt_entry_t pte; 821 vm_page_t m; 822 823 m = NULL; 824 if (pmap == NULL) 825 return (m); 826 vm_page_lock_queues(); 827 PMAP_LOCK(pmap); 828 pdep = pmap_pde(pmap, va); 829 if (pdep != NULL && (pde = *pdep)) { 830 if (pde & PG_PS) { 831 if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { 832 m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) | 833 (va & PDRMASK)); 834 vm_page_hold(m); 835 } 836 } else { 837 pte = *pmap_pte(pmap, va); 838 if ((pte & PG_V) && 839 ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { 840 m = PHYS_TO_VM_PAGE(pte & PG_FRAME); 841 vm_page_hold(m); 842 } 843 } 844 } 845 vm_page_unlock_queues(); 846 PMAP_UNLOCK(pmap); 847 return (m); 848} 849 850vm_paddr_t 851pmap_kextract(vm_offset_t va) 852{ 853 pd_entry_t *pde; 854 vm_paddr_t pa; 855 856 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 857 pa = DMAP_TO_PHYS(va); 858 } else { 859 pde = pmap_pde(kernel_pmap, va); 860 if (*pde & PG_PS) { 861 pa = (*pde & ~(NBPDR - 1)) | (va & (NBPDR - 1)); 862 } else { 863 pa = *vtopte(va); 864 pa = (pa & PG_FRAME) | (va & PAGE_MASK); 865 } 866 } 867 return pa; 868} 869 870/*************************************************** 871 * Low level mapping routines..... 872 ***************************************************/ 873 874/* 875 * Add a wired page to the kva. 876 * Note: not SMP coherent. 877 */ 878PMAP_INLINE void 879pmap_kenter(vm_offset_t va, vm_paddr_t pa) 880{ 881 pt_entry_t *pte; 882 883 pte = vtopte(va); 884 pte_store(pte, pa | PG_RW | PG_V | PG_G); 885} 886 887/* 888 * Remove a page from the kernel pagetables. 889 * Note: not SMP coherent. 890 */ 891PMAP_INLINE void 892pmap_kremove(vm_offset_t va) 893{ 894 pt_entry_t *pte; 895 896 pte = vtopte(va); 897 pte_clear(pte); 898} 899 900/* 901 * Used to map a range of physical addresses into kernel 902 * virtual address space. 903 * 904 * The value passed in '*virt' is a suggested virtual address for 905 * the mapping. Architectures which can support a direct-mapped 906 * physical to virtual region can return the appropriate address 907 * within that region, leaving '*virt' unchanged. Other 908 * architectures should map the pages starting at '*virt' and 909 * update '*virt' with the first usable address after the mapped 910 * region. 911 */ 912vm_offset_t 913pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 914{ 915 return PHYS_TO_DMAP(start); 916} 917 918 919/* 920 * Add a list of wired pages to the kva 921 * this routine is only used for temporary 922 * kernel mappings that do not need to have 923 * page modification or references recorded. 924 * Note that old mappings are simply written 925 * over. The page *must* be wired. 926 * Note: SMP coherent. Uses a ranged shootdown IPI. 927 */ 928void 929pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) 930{ 931 vm_offset_t va; 932 933 va = sva; 934 while (count-- > 0) { 935 pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); 936 va += PAGE_SIZE; 937 m++; 938 } 939 pmap_invalidate_range(kernel_pmap, sva, va); 940} 941 942/* 943 * This routine tears out page mappings from the 944 * kernel -- it is meant only for temporary mappings. 945 * Note: SMP coherent. Uses a ranged shootdown IPI. 946 */ 947void 948pmap_qremove(vm_offset_t sva, int count) 949{ 950 vm_offset_t va; 951 952 va = sva; 953 while (count-- > 0) { 954 pmap_kremove(va); 955 va += PAGE_SIZE; 956 } 957 pmap_invalidate_range(kernel_pmap, sva, va); 958} 959 960/*************************************************** 961 * Page table page management routines..... 962 ***************************************************/ 963 964/* 965 * This routine unholds page table pages, and if the hold count 966 * drops to zero, then it decrements the wire count. 967 */ 968static PMAP_INLINE int 969pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 970{ 971 972 vm_page_unhold(m); 973 if (m->hold_count == 0) 974 return _pmap_unwire_pte_hold(pmap, va, m); 975 else 976 return 0; 977} 978 979static int 980_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 981{ 982 vm_offset_t pteva; 983 984 /* 985 * unmap the page table page 986 */ 987 if (m->pindex >= (NUPDE + NUPDPE)) { 988 /* PDP page */ 989 pml4_entry_t *pml4; 990 pml4 = pmap_pml4e(pmap, va); 991 pteva = (vm_offset_t) PDPmap + amd64_ptob(m->pindex - (NUPDE + NUPDPE)); 992 *pml4 = 0; 993 } else if (m->pindex >= NUPDE) { 994 /* PD page */ 995 pdp_entry_t *pdp; 996 pdp = pmap_pdpe(pmap, va); 997 pteva = (vm_offset_t) PDmap + amd64_ptob(m->pindex - NUPDE); 998 *pdp = 0; 999 } else { 1000 /* PTE page */ 1001 pd_entry_t *pd; 1002 pd = pmap_pde(pmap, va); 1003 pteva = (vm_offset_t) PTmap + amd64_ptob(m->pindex); 1004 *pd = 0; 1005 } 1006 --pmap->pm_stats.resident_count; 1007 if (m->pindex < NUPDE) { 1008 /* We just released a PT, unhold the matching PD */ 1009 vm_page_t pdpg; 1010 1011 pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME); 1012 pmap_unwire_pte_hold(pmap, va, pdpg); 1013 } 1014 if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { 1015 /* We just released a PD, unhold the matching PDP */ 1016 vm_page_t pdppg; 1017 1018 pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME); 1019 pmap_unwire_pte_hold(pmap, va, pdppg); 1020 } 1021 if (pmap_is_current(pmap)) { 1022 /* 1023 * Do an invltlb to make the invalidated mapping 1024 * take effect immediately. 1025 */ 1026 pmap_invalidate_page(pmap, pteva); 1027 } 1028 1029 /* 1030 * If the page is finally unwired, simply free it. 1031 */ 1032 --m->wire_count; 1033 if (m->wire_count == 0) { 1034 vm_page_free_zero(m); 1035 atomic_subtract_int(&cnt.v_wire_count, 1); 1036 } 1037 return 1; 1038} 1039 1040/* 1041 * After removing a page table entry, this routine is used to 1042 * conditionally free the page, and manage the hold/wire counts. 1043 */ 1044static int 1045pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde) 1046{ 1047 vm_page_t mpte; 1048 1049 if (va >= VM_MAXUSER_ADDRESS) 1050 return 0; 1051 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1052 mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); 1053 return pmap_unwire_pte_hold(pmap, va, mpte); 1054} 1055 1056void 1057pmap_pinit0(pmap) 1058 struct pmap *pmap; 1059{ 1060 1061 PMAP_LOCK_INIT(pmap); 1062 pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys); 1063 pmap->pm_active = 0; 1064 TAILQ_INIT(&pmap->pm_pvlist); 1065 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1066} 1067 1068/* 1069 * Initialize a preallocated and zeroed pmap structure, 1070 * such as one in a vmspace structure. 1071 */ 1072void 1073pmap_pinit(pmap) 1074 register struct pmap *pmap; 1075{ 1076 vm_page_t pml4pg; 1077 static vm_pindex_t color; 1078 1079 PMAP_LOCK_INIT(pmap); 1080 1081 /* 1082 * allocate the page directory page 1083 */ 1084 while ((pml4pg = vm_page_alloc(NULL, color++, VM_ALLOC_NOOBJ | 1085 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1086 VM_WAIT; 1087 1088 pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); 1089 1090 if ((pml4pg->flags & PG_ZERO) == 0) 1091 pagezero(pmap->pm_pml4); 1092 1093 /* Wire in kernel global address entries. */ 1094 pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U; 1095 pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U; 1096 1097 /* install self-referential address mapping entry(s) */ 1098 pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; 1099 1100 pmap->pm_active = 0; 1101 TAILQ_INIT(&pmap->pm_pvlist); 1102 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1103} 1104 1105/* 1106 * this routine is called if the page table page is not 1107 * mapped correctly. 1108 * 1109 * Note: If a page allocation fails at page table level two or three, 1110 * one or two pages may be held during the wait, only to be released 1111 * afterwards. This conservative approach is easily argued to avoid 1112 * race conditions. 1113 */ 1114static vm_page_t 1115_pmap_allocpte(pmap, ptepindex) 1116 pmap_t pmap; 1117 vm_pindex_t ptepindex; 1118{ 1119 vm_page_t m, pdppg, pdpg; 1120 1121 /* 1122 * Allocate a page table page. 1123 */ 1124 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1125 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1126 PMAP_UNLOCK(pmap); 1127 vm_page_unlock_queues(); 1128 VM_WAIT; 1129 vm_page_lock_queues(); 1130 PMAP_LOCK(pmap); 1131 1132 /* 1133 * Indicate the need to retry. While waiting, the page table 1134 * page may have been allocated. 1135 */ 1136 return (NULL); 1137 } 1138 if ((m->flags & PG_ZERO) == 0) 1139 pmap_zero_page(m); 1140 1141 KASSERT(m->queue == PQ_NONE, 1142 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1143 1144 /* 1145 * Increment the hold count for the page table page 1146 * (denoting a new mapping.) 1147 */ 1148 m->hold_count++; 1149 1150 /* 1151 * Map the pagetable page into the process address space, if 1152 * it isn't already there. 1153 */ 1154 1155 pmap->pm_stats.resident_count++; 1156 1157 if (ptepindex >= (NUPDE + NUPDPE)) { 1158 pml4_entry_t *pml4; 1159 vm_pindex_t pml4index; 1160 1161 /* Wire up a new PDPE page */ 1162 pml4index = ptepindex - (NUPDE + NUPDPE); 1163 pml4 = &pmap->pm_pml4[pml4index]; 1164 *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1165 1166 } else if (ptepindex >= NUPDE) { 1167 vm_pindex_t pml4index; 1168 vm_pindex_t pdpindex; 1169 pml4_entry_t *pml4; 1170 pdp_entry_t *pdp; 1171 1172 /* Wire up a new PDE page */ 1173 pdpindex = ptepindex - NUPDE; 1174 pml4index = pdpindex >> NPML4EPGSHIFT; 1175 1176 pml4 = &pmap->pm_pml4[pml4index]; 1177 if ((*pml4 & PG_V) == 0) { 1178 /* Have to allocate a new pdp, recurse */ 1179 if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index) == NULL) { 1180 vm_page_unhold(m); 1181 vm_page_free(m); 1182 return (NULL); 1183 } 1184 } else { 1185 /* Add reference to pdp page */ 1186 pdppg = PHYS_TO_VM_PAGE(*pml4 & PG_FRAME); 1187 pdppg->hold_count++; 1188 } 1189 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1190 1191 /* Now find the pdp page */ 1192 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1193 *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1194 1195 } else { 1196 vm_pindex_t pml4index; 1197 vm_pindex_t pdpindex; 1198 pml4_entry_t *pml4; 1199 pdp_entry_t *pdp; 1200 pd_entry_t *pd; 1201 1202 /* Wire up a new PTE page */ 1203 pdpindex = ptepindex >> NPDPEPGSHIFT; 1204 pml4index = pdpindex >> NPML4EPGSHIFT; 1205 1206 /* First, find the pdp and check that its valid. */ 1207 pml4 = &pmap->pm_pml4[pml4index]; 1208 if ((*pml4 & PG_V) == 0) { 1209 /* Have to allocate a new pd, recurse */ 1210 if (_pmap_allocpte(pmap, NUPDE + pdpindex) == NULL) { 1211 vm_page_unhold(m); 1212 vm_page_free(m); 1213 return (NULL); 1214 } 1215 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1216 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1217 } else { 1218 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1219 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1220 if ((*pdp & PG_V) == 0) { 1221 /* Have to allocate a new pd, recurse */ 1222 if (_pmap_allocpte(pmap, NUPDE + pdpindex) == NULL) { 1223 vm_page_unhold(m); 1224 vm_page_free(m); 1225 return (NULL); 1226 } 1227 } else { 1228 /* Add reference to the pd page */ 1229 pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME); 1230 pdpg->hold_count++; 1231 } 1232 } 1233 pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME); 1234 1235 /* Now we know where the page directory page is */ 1236 pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; 1237 *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1238 } 1239 1240 return m; 1241} 1242 1243static vm_page_t 1244pmap_allocpte(pmap_t pmap, vm_offset_t va) 1245{ 1246 vm_pindex_t ptepindex; 1247 pd_entry_t *pd; 1248 vm_page_t m; 1249 1250 /* 1251 * Calculate pagetable page index 1252 */ 1253 ptepindex = pmap_pde_pindex(va); 1254retry: 1255 /* 1256 * Get the page directory entry 1257 */ 1258 pd = pmap_pde(pmap, va); 1259 1260 /* 1261 * This supports switching from a 2MB page to a 1262 * normal 4K page. 1263 */ 1264 if (pd != 0 && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 1265 *pd = 0; 1266 pd = 0; 1267 pmap_invalidate_all(kernel_pmap); 1268 } 1269 1270 /* 1271 * If the page table page is mapped, we just increment the 1272 * hold count, and activate it. 1273 */ 1274 if (pd != 0 && (*pd & PG_V) != 0) { 1275 m = PHYS_TO_VM_PAGE(*pd & PG_FRAME); 1276 m->hold_count++; 1277 } else { 1278 /* 1279 * Here if the pte page isn't mapped, or if it has been 1280 * deallocated. 1281 */ 1282 m = _pmap_allocpte(pmap, ptepindex); 1283 if (m == NULL) 1284 goto retry; 1285 } 1286 return (m); 1287} 1288 1289 1290/*************************************************** 1291 * Pmap allocation/deallocation routines. 1292 ***************************************************/ 1293 1294/* 1295 * Release any resources held by the given physical map. 1296 * Called when a pmap initialized by pmap_pinit is being released. 1297 * Should only be called if the map contains no valid mappings. 1298 */ 1299void 1300pmap_release(pmap_t pmap) 1301{ 1302 vm_page_t m; 1303 1304 KASSERT(pmap->pm_stats.resident_count == 0, 1305 ("pmap_release: pmap resident count %ld != 0", 1306 pmap->pm_stats.resident_count)); 1307 1308 m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME); 1309 1310 pmap->pm_pml4[KPML4I] = 0; /* KVA */ 1311 pmap->pm_pml4[DMPML4I] = 0; /* Direct Map */ 1312 pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */ 1313 1314 vm_page_lock_queues(); 1315 m->wire_count--; 1316 atomic_subtract_int(&cnt.v_wire_count, 1); 1317 vm_page_free_zero(m); 1318 vm_page_unlock_queues(); 1319 PMAP_LOCK_DESTROY(pmap); 1320} 1321 1322static int 1323kvm_size(SYSCTL_HANDLER_ARGS) 1324{ 1325 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1326 1327 return sysctl_handle_long(oidp, &ksize, 0, req); 1328} 1329SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1330 0, 0, kvm_size, "IU", "Size of KVM"); 1331 1332static int 1333kvm_free(SYSCTL_HANDLER_ARGS) 1334{ 1335 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1336 1337 return sysctl_handle_long(oidp, &kfree, 0, req); 1338} 1339SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1340 0, 0, kvm_free, "IU", "Amount of KVM free"); 1341 1342/* 1343 * grow the number of kernel page table entries, if needed 1344 */ 1345void 1346pmap_growkernel(vm_offset_t addr) 1347{ 1348 vm_paddr_t paddr; 1349 vm_page_t nkpg; 1350 pd_entry_t *pde, newpdir; 1351 pdp_entry_t newpdp; 1352 1353 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1354 if (kernel_vm_end == 0) { 1355 kernel_vm_end = KERNBASE; 1356 nkpt = 0; 1357 while ((*pmap_pde(kernel_pmap, kernel_vm_end) & PG_V) != 0) { 1358 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1359 nkpt++; 1360 } 1361 } 1362 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1363 while (kernel_vm_end < addr) { 1364 pde = pmap_pde(kernel_pmap, kernel_vm_end); 1365 if (pde == NULL) { 1366 /* We need a new PDP entry */ 1367 nkpg = vm_page_alloc(NULL, nkpt, 1368 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1369 if (!nkpg) 1370 panic("pmap_growkernel: no memory to grow kernel"); 1371 pmap_zero_page(nkpg); 1372 paddr = VM_PAGE_TO_PHYS(nkpg); 1373 newpdp = (pdp_entry_t) 1374 (paddr | PG_V | PG_RW | PG_A | PG_M); 1375 *pmap_pdpe(kernel_pmap, kernel_vm_end) = newpdp; 1376 continue; /* try again */ 1377 } 1378 if ((*pde & PG_V) != 0) { 1379 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1380 continue; 1381 } 1382 1383 /* 1384 * This index is bogus, but out of the way 1385 */ 1386 nkpg = vm_page_alloc(NULL, nkpt, 1387 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1388 if (!nkpg) 1389 panic("pmap_growkernel: no memory to grow kernel"); 1390 1391 nkpt++; 1392 1393 pmap_zero_page(nkpg); 1394 paddr = VM_PAGE_TO_PHYS(nkpg); 1395 newpdir = (pd_entry_t) (paddr | PG_V | PG_RW | PG_A | PG_M); 1396 *pmap_pde(kernel_pmap, kernel_vm_end) = newpdir; 1397 1398 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1399 } 1400} 1401 1402 1403/*************************************************** 1404 * page management routines. 1405 ***************************************************/ 1406 1407/* 1408 * free the pv_entry back to the free list 1409 */ 1410static PMAP_INLINE void 1411free_pv_entry(pv_entry_t pv) 1412{ 1413 pv_entry_count--; 1414 uma_zfree(pvzone, pv); 1415} 1416 1417/* 1418 * get a new pv_entry, allocating a block from the system 1419 * when needed. 1420 * the memory allocation is performed bypassing the malloc code 1421 * because of the possibility of allocations at interrupt time. 1422 */ 1423static pv_entry_t 1424get_pv_entry(void) 1425{ 1426 pv_entry_count++; 1427 if (pv_entry_high_water && 1428 (pv_entry_count > pv_entry_high_water) && 1429 (pmap_pagedaemon_waken == 0)) { 1430 pmap_pagedaemon_waken = 1; 1431 wakeup (&vm_pages_needed); 1432 } 1433 return uma_zalloc(pvzone, M_NOWAIT); 1434} 1435 1436 1437static int 1438pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va, pd_entry_t ptepde) 1439{ 1440 pv_entry_t pv; 1441 int rtval; 1442 1443 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1444 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1445 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1446 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1447 if (pmap == pv->pv_pmap && va == pv->pv_va) 1448 break; 1449 } 1450 } else { 1451 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1452 if (va == pv->pv_va) 1453 break; 1454 } 1455 } 1456 1457 rtval = 0; 1458 if (pv) { 1459 rtval = pmap_unuse_pt(pmap, va, ptepde); 1460 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1461 m->md.pv_list_count--; 1462 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1463 vm_page_flag_clear(m, PG_WRITEABLE); 1464 1465 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1466 free_pv_entry(pv); 1467 } 1468 1469 return rtval; 1470} 1471 1472/* 1473 * Create a pv entry for page at pa for 1474 * (pmap, va). 1475 */ 1476static void 1477pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 1478{ 1479 pv_entry_t pv; 1480 1481 pv = get_pv_entry(); 1482 pv->pv_va = va; 1483 pv->pv_pmap = pmap; 1484 1485 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1486 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1487 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1488 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1489 m->md.pv_list_count++; 1490} 1491 1492/* 1493 * pmap_remove_pte: do the things to unmap a page in a process 1494 */ 1495static int 1496pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, pd_entry_t ptepde) 1497{ 1498 pt_entry_t oldpte; 1499 vm_page_t m; 1500 1501 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1502 oldpte = pte_load_clear(ptq); 1503 if (oldpte & PG_W) 1504 pmap->pm_stats.wired_count -= 1; 1505 /* 1506 * Machines that don't support invlpg, also don't support 1507 * PG_G. 1508 */ 1509 if (oldpte & PG_G) 1510 pmap_invalidate_page(kernel_pmap, va); 1511 pmap->pm_stats.resident_count -= 1; 1512 if (oldpte & PG_MANAGED) { 1513 m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); 1514 if (oldpte & PG_M) { 1515#if defined(PMAP_DIAGNOSTIC) 1516 if (pmap_nw_modified((pt_entry_t) oldpte)) { 1517 printf( 1518 "pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 1519 va, oldpte); 1520 } 1521#endif 1522 if (pmap_track_modified(va)) 1523 vm_page_dirty(m); 1524 } 1525 if (oldpte & PG_A) 1526 vm_page_flag_set(m, PG_REFERENCED); 1527 return pmap_remove_entry(pmap, m, va, ptepde); 1528 } else { 1529 return pmap_unuse_pt(pmap, va, ptepde); 1530 } 1531} 1532 1533/* 1534 * Remove a single page from a process address space 1535 */ 1536static void 1537pmap_remove_page(pmap_t pmap, vm_offset_t va) 1538{ 1539 pd_entry_t ptepde; 1540 pt_entry_t *pte; 1541 1542 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1543 pte = pmap_pte_pde(pmap, va, &ptepde); 1544 if (pte == NULL || (*pte & PG_V) == 0) 1545 return; 1546 pmap_remove_pte(pmap, pte, va, ptepde); 1547 pmap_invalidate_page(pmap, va); 1548} 1549 1550/* 1551 * Remove the given range of addresses from the specified map. 1552 * 1553 * It is assumed that the start and end are properly 1554 * rounded to the page size. 1555 */ 1556void 1557pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1558{ 1559 vm_offset_t va_next; 1560 pml4_entry_t *pml4e; 1561 pdp_entry_t *pdpe; 1562 pd_entry_t ptpaddr, *pde; 1563 pt_entry_t *pte; 1564 int anyvalid; 1565 1566 if (pmap == NULL) 1567 return; 1568 1569 /* 1570 * Perform an unsynchronized read. This is, however, safe. 1571 */ 1572 if (pmap->pm_stats.resident_count == 0) 1573 return; 1574 1575 vm_page_lock_queues(); 1576 PMAP_LOCK(pmap); 1577 1578 /* 1579 * special handling of removing one page. a very 1580 * common operation and easy to short circuit some 1581 * code. 1582 */ 1583 if (sva + PAGE_SIZE == eva) { 1584 pde = pmap_pde(pmap, sva); 1585 if (pde && (*pde & PG_PS) == 0) { 1586 pmap_remove_page(pmap, sva); 1587 goto out; 1588 } 1589 } 1590 1591 anyvalid = 0; 1592 1593 for (; sva < eva; sva = va_next) { 1594 1595 if (pmap->pm_stats.resident_count == 0) 1596 break; 1597 1598 pml4e = pmap_pml4e(pmap, sva); 1599 if (pml4e == 0) { 1600 va_next = (sva + NBPML4) & ~PML4MASK; 1601 continue; 1602 } 1603 1604 pdpe = pmap_pdpe(pmap, sva); 1605 if (pdpe == 0) { 1606 va_next = (sva + NBPDP) & ~PDPMASK; 1607 continue; 1608 } 1609 1610 /* 1611 * Calculate index for next page table. 1612 */ 1613 va_next = (sva + NBPDR) & ~PDRMASK; 1614 1615 pde = pmap_pde(pmap, sva); 1616 if (pde == 0) 1617 continue; 1618 ptpaddr = *pde; 1619 1620 /* 1621 * Weed out invalid mappings. Note: we assume that the page 1622 * directory table is always allocated, and in kernel virtual. 1623 */ 1624 if (ptpaddr == 0) 1625 continue; 1626 1627 /* 1628 * Check for large page. 1629 */ 1630 if ((ptpaddr & PG_PS) != 0) { 1631 *pde = 0; 1632 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1633 anyvalid = 1; 1634 continue; 1635 } 1636 1637 /* 1638 * Limit our scan to either the end of the va represented 1639 * by the current page table page, or to the end of the 1640 * range being removed. 1641 */ 1642 if (va_next > eva) 1643 va_next = eva; 1644 1645 for (; sva != va_next; sva += PAGE_SIZE) { 1646 pte = pmap_pte(pmap, sva); 1647 if (pte == NULL || *pte == 0) 1648 continue; 1649 anyvalid = 1; 1650 if (pmap_remove_pte(pmap, pte, sva, ptpaddr)) 1651 break; 1652 } 1653 } 1654 1655 if (anyvalid) 1656 pmap_invalidate_all(pmap); 1657out: 1658 vm_page_unlock_queues(); 1659 PMAP_UNLOCK(pmap); 1660} 1661 1662/* 1663 * Routine: pmap_remove_all 1664 * Function: 1665 * Removes this physical page from 1666 * all physical maps in which it resides. 1667 * Reflects back modify bits to the pager. 1668 * 1669 * Notes: 1670 * Original versions of this routine were very 1671 * inefficient because they iteratively called 1672 * pmap_remove (slow...) 1673 */ 1674 1675void 1676pmap_remove_all(vm_page_t m) 1677{ 1678 register pv_entry_t pv; 1679 pt_entry_t *pte, tpte; 1680 pd_entry_t ptepde; 1681 1682#if defined(PMAP_DIAGNOSTIC) 1683 /* 1684 * XXX This makes pmap_remove_all() illegal for non-managed pages! 1685 */ 1686 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 1687 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx", 1688 VM_PAGE_TO_PHYS(m)); 1689 } 1690#endif 1691 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1692 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1693 PMAP_LOCK(pv->pv_pmap); 1694 pv->pv_pmap->pm_stats.resident_count--; 1695 pte = pmap_pte_pde(pv->pv_pmap, pv->pv_va, &ptepde); 1696 tpte = pte_load_clear(pte); 1697 if (tpte & PG_W) 1698 pv->pv_pmap->pm_stats.wired_count--; 1699 if (tpte & PG_A) 1700 vm_page_flag_set(m, PG_REFERENCED); 1701 1702 /* 1703 * Update the vm_page_t clean and reference bits. 1704 */ 1705 if (tpte & PG_M) { 1706#if defined(PMAP_DIAGNOSTIC) 1707 if (pmap_nw_modified((pt_entry_t) tpte)) { 1708 printf( 1709 "pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 1710 pv->pv_va, tpte); 1711 } 1712#endif 1713 if (pmap_track_modified(pv->pv_va)) 1714 vm_page_dirty(m); 1715 } 1716 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1717 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1718 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1719 m->md.pv_list_count--; 1720 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, ptepde); 1721 PMAP_UNLOCK(pv->pv_pmap); 1722 free_pv_entry(pv); 1723 } 1724 vm_page_flag_clear(m, PG_WRITEABLE); 1725} 1726 1727/* 1728 * Set the physical protection on the 1729 * specified range of this map as requested. 1730 */ 1731void 1732pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1733{ 1734 vm_offset_t va_next; 1735 pml4_entry_t *pml4e; 1736 pdp_entry_t *pdpe; 1737 pd_entry_t ptpaddr, *pde; 1738 int anychanged; 1739 1740 if (pmap == NULL) 1741 return; 1742 1743 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1744 pmap_remove(pmap, sva, eva); 1745 return; 1746 } 1747 1748 if (prot & VM_PROT_WRITE) 1749 return; 1750 1751 anychanged = 0; 1752 1753 vm_page_lock_queues(); 1754 PMAP_LOCK(pmap); 1755 for (; sva < eva; sva = va_next) { 1756 1757 pml4e = pmap_pml4e(pmap, sva); 1758 if (pml4e == 0) { 1759 va_next = (sva + NBPML4) & ~PML4MASK; 1760 continue; 1761 } 1762 1763 pdpe = pmap_pdpe(pmap, sva); 1764 if (pdpe == 0) { 1765 va_next = (sva + NBPDP) & ~PDPMASK; 1766 continue; 1767 } 1768 1769 va_next = (sva + NBPDR) & ~PDRMASK; 1770 1771 pde = pmap_pde(pmap, sva); 1772 if (pde == NULL) 1773 continue; 1774 ptpaddr = *pde; 1775 1776 /* 1777 * Weed out invalid mappings. Note: we assume that the page 1778 * directory table is always allocated, and in kernel virtual. 1779 */ 1780 if (ptpaddr == 0) 1781 continue; 1782 1783 /* 1784 * Check for large page. 1785 */ 1786 if ((ptpaddr & PG_PS) != 0) { 1787 *pde &= ~(PG_M|PG_RW); 1788 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1789 anychanged = 1; 1790 continue; 1791 } 1792 1793 if (va_next > eva) 1794 va_next = eva; 1795 1796 for (; sva != va_next; sva += PAGE_SIZE) { 1797 pt_entry_t pbits; 1798 pt_entry_t *pte; 1799 vm_page_t m; 1800 1801 pte = pmap_pte(pmap, sva); 1802 if (pte == NULL) 1803 continue; 1804 pbits = *pte; 1805 if (pbits & PG_MANAGED) { 1806 m = NULL; 1807 if (pbits & PG_A) { 1808 m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); 1809 vm_page_flag_set(m, PG_REFERENCED); 1810 pbits &= ~PG_A; 1811 } 1812 if ((pbits & PG_M) != 0 && 1813 pmap_track_modified(sva)) { 1814 if (m == NULL) 1815 m = PHYS_TO_VM_PAGE(pbits & 1816 PG_FRAME); 1817 vm_page_dirty(m); 1818 pbits &= ~PG_M; 1819 } 1820 } 1821 1822 pbits &= ~PG_RW; 1823 1824 if (pbits != *pte) { 1825 pte_store(pte, pbits); 1826 anychanged = 1; 1827 } 1828 } 1829 } 1830 if (anychanged) 1831 pmap_invalidate_all(pmap); 1832 vm_page_unlock_queues(); 1833 PMAP_UNLOCK(pmap); 1834} 1835 1836/* 1837 * Insert the given physical page (p) at 1838 * the specified virtual address (v) in the 1839 * target physical map with the protection requested. 1840 * 1841 * If specified, the page will be wired down, meaning 1842 * that the related pte can not be reclaimed. 1843 * 1844 * NB: This is the only routine which MAY NOT lazy-evaluate 1845 * or lose information. That is, this routine must actually 1846 * insert this page into the given map NOW. 1847 */ 1848void 1849pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1850 boolean_t wired) 1851{ 1852 vm_paddr_t pa; 1853 register pt_entry_t *pte; 1854 vm_paddr_t opa; 1855 pd_entry_t ptepde; 1856 pt_entry_t origpte, newpte; 1857 vm_page_t mpte; 1858 1859 if (pmap == NULL) 1860 return; 1861 1862 va = trunc_page(va); 1863#ifdef PMAP_DIAGNOSTIC 1864 if (va > VM_MAX_KERNEL_ADDRESS) 1865 panic("pmap_enter: toobig"); 1866 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 1867 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", va); 1868#endif 1869 1870 mpte = NULL; 1871 1872 vm_page_lock_queues(); 1873 PMAP_LOCK(pmap); 1874 1875 /* 1876 * In the case that a page table page is not 1877 * resident, we are creating it here. 1878 */ 1879 if (va < VM_MAXUSER_ADDRESS) { 1880 mpte = pmap_allocpte(pmap, va); 1881 } 1882#if 0 && defined(PMAP_DIAGNOSTIC) 1883 else { 1884 pd_entry_t *pdeaddr = pmap_pde(pmap, va); 1885 origpte = *pdeaddr; 1886 if ((origpte & PG_V) == 0) { 1887 panic("pmap_enter: invalid kernel page table page, pde=%p, va=%p\n", 1888 origpte, va); 1889 } 1890 } 1891#endif 1892 1893 pte = pmap_pte_pde(pmap, va, &ptepde); 1894 1895 /* 1896 * Page Directory table entry not valid, we need a new PT page 1897 */ 1898 if (pte == NULL) 1899 panic("pmap_enter: invalid page directory va=%#lx\n", va); 1900 1901 pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; 1902 origpte = *pte; 1903 opa = origpte & PG_FRAME; 1904 1905 if (origpte & PG_PS) 1906 panic("pmap_enter: attempted pmap_enter on 2MB page"); 1907 1908 /* 1909 * Mapping has not changed, must be protection or wiring change. 1910 */ 1911 if (origpte && (opa == pa)) { 1912 /* 1913 * Wiring change, just update stats. We don't worry about 1914 * wiring PT pages as they remain resident as long as there 1915 * are valid mappings in them. Hence, if a user page is wired, 1916 * the PT page will be also. 1917 */ 1918 if (wired && ((origpte & PG_W) == 0)) 1919 pmap->pm_stats.wired_count++; 1920 else if (!wired && (origpte & PG_W)) 1921 pmap->pm_stats.wired_count--; 1922 1923#if defined(PMAP_DIAGNOSTIC) 1924 if (pmap_nw_modified((pt_entry_t) origpte)) { 1925 printf( 1926 "pmap_enter: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 1927 va, origpte); 1928 } 1929#endif 1930 1931 /* 1932 * Remove extra pte reference 1933 */ 1934 if (mpte) 1935 mpte->hold_count--; 1936 1937 /* 1938 * We might be turning off write access to the page, 1939 * so we go ahead and sense modify status. 1940 */ 1941 if (origpte & PG_MANAGED) { 1942 if ((origpte & PG_M) && pmap_track_modified(va)) { 1943 vm_page_t om; 1944 om = PHYS_TO_VM_PAGE(opa); 1945 vm_page_dirty(om); 1946 } 1947 pa |= PG_MANAGED; 1948 } 1949 goto validate; 1950 } 1951 /* 1952 * Mapping has changed, invalidate old range and fall through to 1953 * handle validating new mapping. 1954 */ 1955 if (opa) { 1956 int err; 1957 err = pmap_remove_pte(pmap, pte, va, ptepde); 1958 if (err) 1959 panic("pmap_enter: pte vanished, va: 0x%lx", va); 1960 } 1961 1962 /* 1963 * Enter on the PV list if part of our managed memory. Note that we 1964 * raise IPL while manipulating pv_table since pmap_enter can be 1965 * called at interrupt time. 1966 */ 1967 if (pmap_initialized && 1968 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 1969 pmap_insert_entry(pmap, va, m); 1970 pa |= PG_MANAGED; 1971 } 1972 1973 /* 1974 * Increment counters 1975 */ 1976 pmap->pm_stats.resident_count++; 1977 if (wired) 1978 pmap->pm_stats.wired_count++; 1979 1980validate: 1981 /* 1982 * Now validate mapping with desired protection/wiring. 1983 */ 1984 newpte = (pt_entry_t)(pa | PG_V); 1985 if ((prot & VM_PROT_WRITE) != 0) 1986 newpte |= PG_RW; 1987 if ((prot & VM_PROT_EXECUTE) == 0) 1988 newpte |= pg_nx; 1989 if (wired) 1990 newpte |= PG_W; 1991 if (va < VM_MAXUSER_ADDRESS) 1992 newpte |= PG_U; 1993 if (pmap == kernel_pmap) 1994 newpte |= PG_G; 1995 1996 /* 1997 * if the mapping or permission bits are different, we need 1998 * to update the pte. 1999 */ 2000 if ((origpte & ~(PG_M|PG_A)) != newpte) { 2001 pte_store(pte, newpte | PG_A); 2002 /*if (origpte)*/ { 2003 pmap_invalidate_page(pmap, va); 2004 } 2005 } 2006 vm_page_unlock_queues(); 2007 PMAP_UNLOCK(pmap); 2008} 2009 2010/* 2011 * this code makes some *MAJOR* assumptions: 2012 * 1. Current pmap & pmap exists. 2013 * 2. Not wired. 2014 * 3. Read access. 2015 * 4. No page table pages. 2016 * 5. Tlbflush is deferred to calling procedure. 2017 * 6. Page IS managed. 2018 * but is *MUCH* faster than pmap_enter... 2019 */ 2020 2021vm_page_t 2022pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) 2023{ 2024 pt_entry_t *pte; 2025 vm_paddr_t pa; 2026 2027 vm_page_lock_queues(); 2028 PMAP_LOCK(pmap); 2029 2030 /* 2031 * In the case that a page table page is not 2032 * resident, we are creating it here. 2033 */ 2034 if (va < VM_MAXUSER_ADDRESS) { 2035 vm_pindex_t ptepindex; 2036 pd_entry_t *ptepa; 2037 2038 /* 2039 * Calculate pagetable page index 2040 */ 2041 ptepindex = pmap_pde_pindex(va); 2042 if (mpte && (mpte->pindex == ptepindex)) { 2043 mpte->hold_count++; 2044 } else { 2045 retry: 2046 /* 2047 * Get the page directory entry 2048 */ 2049 ptepa = pmap_pde(pmap, va); 2050 2051 /* 2052 * If the page table page is mapped, we just increment 2053 * the hold count, and activate it. 2054 */ 2055 if (ptepa && (*ptepa & PG_V) != 0) { 2056 if (*ptepa & PG_PS) 2057 panic("pmap_enter_quick: unexpected mapping into 2MB page"); 2058 mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME); 2059 mpte->hold_count++; 2060 } else { 2061 mpte = _pmap_allocpte(pmap, ptepindex); 2062 if (mpte == NULL) 2063 goto retry; 2064 } 2065 } 2066 } else { 2067 mpte = NULL; 2068 } 2069 2070 /* 2071 * This call to vtopte makes the assumption that we are 2072 * entering the page into the current pmap. In order to support 2073 * quick entry into any pmap, one would likely use pmap_pte. 2074 * But that isn't as quick as vtopte. 2075 */ 2076 pte = vtopte(va); 2077 if (*pte) { 2078 if (mpte != NULL) { 2079 pmap_unwire_pte_hold(pmap, va, mpte); 2080 mpte = NULL; 2081 } 2082 goto out; 2083 } 2084 2085 /* 2086 * Enter on the PV list if part of our managed memory. Note that we 2087 * raise IPL while manipulating pv_table since pmap_enter can be 2088 * called at interrupt time. 2089 */ 2090 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) 2091 pmap_insert_entry(pmap, va, m); 2092 2093 /* 2094 * Increment counters 2095 */ 2096 pmap->pm_stats.resident_count++; 2097 2098 pa = VM_PAGE_TO_PHYS(m); 2099 2100 /* 2101 * Now validate mapping with RO protection 2102 */ 2103 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2104 pte_store(pte, pa | PG_V | PG_U); 2105 else 2106 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 2107out: 2108 vm_page_unlock_queues(); 2109 PMAP_UNLOCK(pmap); 2110 return mpte; 2111} 2112 2113/* 2114 * Make a temporary mapping for a physical address. This is only intended 2115 * to be used for panic dumps. 2116 */ 2117void * 2118pmap_kenter_temporary(vm_paddr_t pa, int i) 2119{ 2120 vm_offset_t va; 2121 2122 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 2123 pmap_kenter(va, pa); 2124 invlpg(va); 2125 return ((void *)crashdumpmap); 2126} 2127 2128/* 2129 * This code maps large physical mmap regions into the 2130 * processor address space. Note that some shortcuts 2131 * are taken, but the code works. 2132 */ 2133void 2134pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2135 vm_object_t object, vm_pindex_t pindex, 2136 vm_size_t size) 2137{ 2138 vm_page_t p; 2139 2140 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2141 KASSERT(object->type == OBJT_DEVICE, 2142 ("pmap_object_init_pt: non-device object")); 2143 if (((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { 2144 int i; 2145 vm_page_t m[1]; 2146 int npdes; 2147 pd_entry_t ptepa, *pde; 2148 2149 PMAP_LOCK(pmap); 2150 pde = pmap_pde(pmap, addr); 2151 if (pde != 0 && (*pde & PG_V) != 0) 2152 goto out; 2153 PMAP_UNLOCK(pmap); 2154retry: 2155 p = vm_page_lookup(object, pindex); 2156 if (p != NULL) { 2157 vm_page_lock_queues(); 2158 if (vm_page_sleep_if_busy(p, FALSE, "init4p")) 2159 goto retry; 2160 } else { 2161 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2162 if (p == NULL) 2163 return; 2164 m[0] = p; 2165 2166 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2167 vm_page_lock_queues(); 2168 vm_page_free(p); 2169 vm_page_unlock_queues(); 2170 return; 2171 } 2172 2173 p = vm_page_lookup(object, pindex); 2174 vm_page_lock_queues(); 2175 vm_page_wakeup(p); 2176 } 2177 vm_page_unlock_queues(); 2178 2179 ptepa = VM_PAGE_TO_PHYS(p); 2180 if (ptepa & (NBPDR - 1)) 2181 return; 2182 2183 p->valid = VM_PAGE_BITS_ALL; 2184 2185 PMAP_LOCK(pmap); 2186 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2187 npdes = size >> PDRSHIFT; 2188 for(i = 0; i < npdes; i++) { 2189 pde_store(pde, ptepa | PG_U | PG_RW | PG_V | PG_PS); 2190 ptepa += NBPDR; 2191 pde++; 2192 } 2193 pmap_invalidate_all(pmap); 2194out: 2195 PMAP_UNLOCK(pmap); 2196 } 2197} 2198 2199/* 2200 * Routine: pmap_change_wiring 2201 * Function: Change the wiring attribute for a map/virtual-address 2202 * pair. 2203 * In/out conditions: 2204 * The mapping must already exist in the pmap. 2205 */ 2206void 2207pmap_change_wiring(pmap, va, wired) 2208 register pmap_t pmap; 2209 vm_offset_t va; 2210 boolean_t wired; 2211{ 2212 register pt_entry_t *pte; 2213 2214 if (pmap == NULL) 2215 return; 2216 2217 /* 2218 * Wiring is not a hardware characteristic so there is no need to 2219 * invalidate TLB. 2220 */ 2221 PMAP_LOCK(pmap); 2222 pte = pmap_pte(pmap, va); 2223 if (wired && (*pte & PG_W) == 0) { 2224 pmap->pm_stats.wired_count++; 2225 atomic_set_long(pte, PG_W); 2226 } else if (!wired && (*pte & PG_W) != 0) { 2227 pmap->pm_stats.wired_count--; 2228 atomic_clear_long(pte, PG_W); 2229 } 2230 PMAP_UNLOCK(pmap); 2231} 2232 2233 2234 2235/* 2236 * Copy the range specified by src_addr/len 2237 * from the source map to the range dst_addr/len 2238 * in the destination map. 2239 * 2240 * This routine is only advisory and need not do anything. 2241 */ 2242 2243void 2244pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2245 vm_offset_t src_addr) 2246{ 2247 vm_offset_t addr; 2248 vm_offset_t end_addr = src_addr + len; 2249 vm_offset_t va_next; 2250 vm_page_t m; 2251 2252 if (dst_addr != src_addr) 2253 return; 2254 2255 if (!pmap_is_current(src_pmap)) 2256 return; 2257 2258 vm_page_lock_queues(); 2259 PMAP_LOCK(dst_pmap); 2260 for (addr = src_addr; addr < end_addr; addr = va_next) { 2261 pt_entry_t *src_pte, *dst_pte; 2262 vm_page_t dstmpte, srcmpte; 2263 pml4_entry_t *pml4e; 2264 pdp_entry_t *pdpe; 2265 pd_entry_t srcptepaddr, *pde; 2266 2267 if (addr >= UPT_MIN_ADDRESS) 2268 panic("pmap_copy: invalid to pmap_copy page tables"); 2269 2270 /* 2271 * Don't let optional prefaulting of pages make us go 2272 * way below the low water mark of free pages or way 2273 * above high water mark of used pv entries. 2274 */ 2275 if (cnt.v_free_count < cnt.v_free_reserved || 2276 pv_entry_count > pv_entry_high_water) 2277 break; 2278 2279 pml4e = pmap_pml4e(src_pmap, addr); 2280 if (pml4e == 0) { 2281 va_next = (addr + NBPML4) & ~PML4MASK; 2282 continue; 2283 } 2284 2285 pdpe = pmap_pdpe(src_pmap, addr); 2286 if (pdpe == 0) { 2287 va_next = (addr + NBPDP) & ~PDPMASK; 2288 continue; 2289 } 2290 2291 va_next = (addr + NBPDR) & ~PDRMASK; 2292 2293 pde = pmap_pde(src_pmap, addr); 2294 if (pde) 2295 srcptepaddr = *pde; 2296 else 2297 continue; 2298 if (srcptepaddr == 0) 2299 continue; 2300 2301 if (srcptepaddr & PG_PS) { 2302 pde = pmap_pde(dst_pmap, addr); 2303 if (pde == 0) { 2304 /* 2305 * XXX should do an allocpte here to 2306 * instantiate the pde 2307 */ 2308 continue; 2309 } 2310 if (*pde == 0) { 2311 *pde = srcptepaddr; 2312 dst_pmap->pm_stats.resident_count += 2313 NBPDR / PAGE_SIZE; 2314 } 2315 continue; 2316 } 2317 2318 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); 2319 if (srcmpte->hold_count == 0) 2320 panic("pmap_copy: source page table page is unused"); 2321 2322 if (va_next > end_addr) 2323 va_next = end_addr; 2324 2325 src_pte = vtopte(addr); 2326 while (addr < va_next) { 2327 pt_entry_t ptetemp; 2328 ptetemp = *src_pte; 2329 /* 2330 * we only virtual copy managed pages 2331 */ 2332 if ((ptetemp & PG_MANAGED) != 0) { 2333 /* 2334 * We have to check after allocpte for the 2335 * pte still being around... allocpte can 2336 * block. 2337 */ 2338 dstmpte = pmap_allocpte(dst_pmap, addr); 2339 dst_pte = pmap_pte(dst_pmap, addr); 2340 if ((*dst_pte == 0) && (ptetemp = *src_pte)) { 2341 /* 2342 * Clear the modified and 2343 * accessed (referenced) bits 2344 * during the copy. 2345 */ 2346 m = PHYS_TO_VM_PAGE(ptetemp & PG_FRAME); 2347 *dst_pte = ptetemp & ~(PG_M | PG_A); 2348 dst_pmap->pm_stats.resident_count++; 2349 pmap_insert_entry(dst_pmap, addr, m); 2350 } else 2351 pmap_unwire_pte_hold(dst_pmap, addr, dstmpte); 2352 if (dstmpte->hold_count >= srcmpte->hold_count) 2353 break; 2354 } 2355 addr += PAGE_SIZE; 2356 src_pte++; 2357 } 2358 } 2359 vm_page_unlock_queues(); 2360 PMAP_UNLOCK(dst_pmap); 2361} 2362 2363/* 2364 * pmap_zero_page zeros the specified hardware page by mapping 2365 * the page into KVM and using bzero to clear its contents. 2366 */ 2367void 2368pmap_zero_page(vm_page_t m) 2369{ 2370 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2371 2372 pagezero((void *)va); 2373} 2374 2375/* 2376 * pmap_zero_page_area zeros the specified hardware page by mapping 2377 * the page into KVM and using bzero to clear its contents. 2378 * 2379 * off and size may not cover an area beyond a single hardware page. 2380 */ 2381void 2382pmap_zero_page_area(vm_page_t m, int off, int size) 2383{ 2384 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2385 2386 if (off == 0 && size == PAGE_SIZE) 2387 pagezero((void *)va); 2388 else 2389 bzero((char *)va + off, size); 2390} 2391 2392/* 2393 * pmap_zero_page_idle zeros the specified hardware page by mapping 2394 * the page into KVM and using bzero to clear its contents. This 2395 * is intended to be called from the vm_pagezero process only and 2396 * outside of Giant. 2397 */ 2398void 2399pmap_zero_page_idle(vm_page_t m) 2400{ 2401 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2402 2403 pagezero((void *)va); 2404} 2405 2406/* 2407 * pmap_copy_page copies the specified (machine independent) 2408 * page by mapping the page into virtual memory and using 2409 * bcopy to copy the page, one machine dependent page at a 2410 * time. 2411 */ 2412void 2413pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2414{ 2415 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2416 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2417 2418 pagecopy((void *)src, (void *)dst); 2419} 2420 2421/* 2422 * Returns true if the pmap's pv is one of the first 2423 * 16 pvs linked to from this page. This count may 2424 * be changed upwards or downwards in the future; it 2425 * is only necessary that true be returned for a small 2426 * subset of pmaps for proper page aging. 2427 */ 2428boolean_t 2429pmap_page_exists_quick(pmap, m) 2430 pmap_t pmap; 2431 vm_page_t m; 2432{ 2433 pv_entry_t pv; 2434 int loops = 0; 2435 2436 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2437 return FALSE; 2438 2439 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2440 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2441 if (pv->pv_pmap == pmap) { 2442 return TRUE; 2443 } 2444 loops++; 2445 if (loops >= 16) 2446 break; 2447 } 2448 return (FALSE); 2449} 2450 2451#define PMAP_REMOVE_PAGES_CURPROC_ONLY 2452/* 2453 * Remove all pages from specified address space 2454 * this aids process exit speeds. Also, this code 2455 * is special cased for current process only, but 2456 * can have the more generic (and slightly slower) 2457 * mode enabled. This is much faster than pmap_remove 2458 * in the case of running down an entire address space. 2459 */ 2460void 2461pmap_remove_pages(pmap, sva, eva) 2462 pmap_t pmap; 2463 vm_offset_t sva, eva; 2464{ 2465 pt_entry_t *pte, tpte; 2466 vm_page_t m; 2467 pv_entry_t pv, npv; 2468 2469#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2470 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { 2471 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2472 return; 2473 } 2474#endif 2475 vm_page_lock_queues(); 2476 PMAP_LOCK(pmap); 2477 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2478 2479 if (pv->pv_va >= eva || pv->pv_va < sva) { 2480 npv = TAILQ_NEXT(pv, pv_plist); 2481 continue; 2482 } 2483 2484#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2485 pte = vtopte(pv->pv_va); 2486#else 2487 pte = pmap_pte(pmap, pv->pv_va); 2488#endif 2489 tpte = *pte; 2490 2491 if (tpte == 0) { 2492 printf("TPTE at %p IS ZERO @ VA %08lx\n", 2493 pte, pv->pv_va); 2494 panic("bad pte"); 2495 } 2496 2497/* 2498 * We cannot remove wired pages from a process' mapping at this time 2499 */ 2500 if (tpte & PG_W) { 2501 npv = TAILQ_NEXT(pv, pv_plist); 2502 continue; 2503 } 2504 2505 m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 2506 KASSERT(m->phys_addr == (tpte & PG_FRAME), 2507 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2508 m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); 2509 2510 KASSERT(m < &vm_page_array[vm_page_array_size], 2511 ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); 2512 2513 pmap->pm_stats.resident_count--; 2514 2515 pte_clear(pte); 2516 2517 /* 2518 * Update the vm_page_t clean and reference bits. 2519 */ 2520 if (tpte & PG_M) { 2521 vm_page_dirty(m); 2522 } 2523 2524 npv = TAILQ_NEXT(pv, pv_plist); 2525 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 2526 2527 m->md.pv_list_count--; 2528 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2529 if (TAILQ_EMPTY(&m->md.pv_list)) 2530 vm_page_flag_clear(m, PG_WRITEABLE); 2531 2532 pmap_unuse_pt(pmap, pv->pv_va, *vtopde(pv->pv_va)); 2533 free_pv_entry(pv); 2534 } 2535 pmap_invalidate_all(pmap); 2536 PMAP_UNLOCK(pmap); 2537 vm_page_unlock_queues(); 2538} 2539 2540/* 2541 * pmap_is_modified: 2542 * 2543 * Return whether or not the specified physical page was modified 2544 * in any physical maps. 2545 */ 2546boolean_t 2547pmap_is_modified(vm_page_t m) 2548{ 2549 pv_entry_t pv; 2550 pt_entry_t *pte; 2551 boolean_t rv; 2552 2553 rv = FALSE; 2554 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2555 return (rv); 2556 2557 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2558 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2559 /* 2560 * if the bit being tested is the modified bit, then 2561 * mark clean_map and ptes as never 2562 * modified. 2563 */ 2564 if (!pmap_track_modified(pv->pv_va)) 2565 continue; 2566#if defined(PMAP_DIAGNOSTIC) 2567 if (!pv->pv_pmap) { 2568 printf("Null pmap (tb) at va: 0x%lx\n", pv->pv_va); 2569 continue; 2570 } 2571#endif 2572 PMAP_LOCK(pv->pv_pmap); 2573 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2574 rv = (*pte & PG_M) != 0; 2575 PMAP_UNLOCK(pv->pv_pmap); 2576 if (rv) 2577 break; 2578 } 2579 return (rv); 2580} 2581 2582/* 2583 * pmap_is_prefaultable: 2584 * 2585 * Return whether or not the specified virtual address is elgible 2586 * for prefault. 2587 */ 2588boolean_t 2589pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2590{ 2591 pd_entry_t *pde; 2592 pt_entry_t *pte; 2593 boolean_t rv; 2594 2595 rv = FALSE; 2596 PMAP_LOCK(pmap); 2597 pde = pmap_pde(pmap, addr); 2598 if (pde != NULL && (*pde & PG_V)) { 2599 pte = vtopte(addr); 2600 rv = (*pte & PG_V) == 0; 2601 } 2602 PMAP_UNLOCK(pmap); 2603 return (rv); 2604} 2605 2606/* 2607 * Clear the given bit in each of the given page's ptes. 2608 */ 2609static __inline void 2610pmap_clear_ptes(vm_page_t m, int bit) 2611{ 2612 register pv_entry_t pv; 2613 pt_entry_t pbits, *pte; 2614 2615 if (!pmap_initialized || (m->flags & PG_FICTITIOUS) || 2616 (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) 2617 return; 2618 2619 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2620 /* 2621 * Loop over all current mappings setting/clearing as appropos If 2622 * setting RO do we need to clear the VAC? 2623 */ 2624 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2625 /* 2626 * don't write protect pager mappings 2627 */ 2628 if (bit == PG_RW) { 2629 if (!pmap_track_modified(pv->pv_va)) 2630 continue; 2631 } 2632 2633#if defined(PMAP_DIAGNOSTIC) 2634 if (!pv->pv_pmap) { 2635 printf("Null pmap (cb) at va: 0x%lx\n", pv->pv_va); 2636 continue; 2637 } 2638#endif 2639 2640 PMAP_LOCK(pv->pv_pmap); 2641 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2642 pbits = *pte; 2643 if (pbits & bit) { 2644 if (bit == PG_RW) { 2645 if (pbits & PG_M) { 2646 vm_page_dirty(m); 2647 } 2648 pte_store(pte, pbits & ~(PG_M|PG_RW)); 2649 } else { 2650 pte_store(pte, pbits & ~bit); 2651 } 2652 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2653 } 2654 PMAP_UNLOCK(pv->pv_pmap); 2655 } 2656 if (bit == PG_RW) 2657 vm_page_flag_clear(m, PG_WRITEABLE); 2658} 2659 2660/* 2661 * pmap_page_protect: 2662 * 2663 * Lower the permission for all mappings to a given page. 2664 */ 2665void 2666pmap_page_protect(vm_page_t m, vm_prot_t prot) 2667{ 2668 if ((prot & VM_PROT_WRITE) == 0) { 2669 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 2670 pmap_clear_ptes(m, PG_RW); 2671 } else { 2672 pmap_remove_all(m); 2673 } 2674 } 2675} 2676 2677/* 2678 * pmap_ts_referenced: 2679 * 2680 * Return a count of reference bits for a page, clearing those bits. 2681 * It is not necessary for every reference bit to be cleared, but it 2682 * is necessary that 0 only be returned when there are truly no 2683 * reference bits set. 2684 * 2685 * XXX: The exact number of bits to check and clear is a matter that 2686 * should be tested and standardized at some point in the future for 2687 * optimal aging of shared pages. 2688 */ 2689int 2690pmap_ts_referenced(vm_page_t m) 2691{ 2692 register pv_entry_t pv, pvf, pvn; 2693 pt_entry_t *pte; 2694 pt_entry_t v; 2695 int rtval = 0; 2696 2697 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2698 return (rtval); 2699 2700 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2701 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2702 2703 pvf = pv; 2704 2705 do { 2706 pvn = TAILQ_NEXT(pv, pv_list); 2707 2708 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2709 2710 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2711 2712 if (!pmap_track_modified(pv->pv_va)) 2713 continue; 2714 2715 PMAP_LOCK(pv->pv_pmap); 2716 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2717 2718 if (pte && ((v = pte_load(pte)) & PG_A) != 0) { 2719 atomic_clear_long(pte, PG_A); 2720 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2721 2722 rtval++; 2723 if (rtval > 4) { 2724 PMAP_UNLOCK(pv->pv_pmap); 2725 break; 2726 } 2727 } 2728 PMAP_UNLOCK(pv->pv_pmap); 2729 } while ((pv = pvn) != NULL && pv != pvf); 2730 } 2731 2732 return (rtval); 2733} 2734 2735/* 2736 * Clear the modify bits on the specified physical page. 2737 */ 2738void 2739pmap_clear_modify(vm_page_t m) 2740{ 2741 pmap_clear_ptes(m, PG_M); 2742} 2743 2744/* 2745 * pmap_clear_reference: 2746 * 2747 * Clear the reference bit on the specified physical page. 2748 */ 2749void 2750pmap_clear_reference(vm_page_t m) 2751{ 2752 pmap_clear_ptes(m, PG_A); 2753} 2754 2755/* 2756 * Miscellaneous support routines follow 2757 */ 2758 2759/* 2760 * Map a set of physical memory pages into the kernel virtual 2761 * address space. Return a pointer to where it is mapped. This 2762 * routine is intended to be used for mapping device memory, 2763 * NOT real memory. 2764 */ 2765void * 2766pmap_mapdev(pa, size) 2767 vm_paddr_t pa; 2768 vm_size_t size; 2769{ 2770 vm_offset_t va, tmpva, offset; 2771 2772 /* If this fits within the direct map window, use it */ 2773 if (pa < dmaplimit && (pa + size) < dmaplimit) 2774 return ((void *)PHYS_TO_DMAP(pa)); 2775 offset = pa & PAGE_MASK; 2776 size = roundup(offset + size, PAGE_SIZE); 2777 va = kmem_alloc_nofault(kernel_map, size); 2778 if (!va) 2779 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2780 pa = trunc_page(pa); 2781 for (tmpva = va; size > 0; ) { 2782 pmap_kenter(tmpva, pa); 2783 size -= PAGE_SIZE; 2784 tmpva += PAGE_SIZE; 2785 pa += PAGE_SIZE; 2786 } 2787 pmap_invalidate_range(kernel_pmap, va, tmpva); 2788 return ((void *)(va + offset)); 2789} 2790 2791void 2792pmap_unmapdev(va, size) 2793 vm_offset_t va; 2794 vm_size_t size; 2795{ 2796 vm_offset_t base, offset, tmpva; 2797 2798 /* If we gave a direct map region in pmap_mapdev, do nothing */ 2799 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) 2800 return; 2801 base = trunc_page(va); 2802 offset = va & PAGE_MASK; 2803 size = roundup(offset + size, PAGE_SIZE); 2804 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) 2805 pmap_kremove(tmpva); 2806 pmap_invalidate_range(kernel_pmap, va, tmpva); 2807 kmem_free(kernel_map, base, size); 2808} 2809 2810/* 2811 * perform the pmap work for mincore 2812 */ 2813int 2814pmap_mincore(pmap, addr) 2815 pmap_t pmap; 2816 vm_offset_t addr; 2817{ 2818 pt_entry_t *ptep, pte; 2819 vm_page_t m; 2820 int val = 0; 2821 2822 PMAP_LOCK(pmap); 2823 ptep = pmap_pte(pmap, addr); 2824 pte = (ptep != NULL) ? *ptep : 0; 2825 PMAP_UNLOCK(pmap); 2826 2827 if (pte != 0) { 2828 vm_paddr_t pa; 2829 2830 val = MINCORE_INCORE; 2831 if ((pte & PG_MANAGED) == 0) 2832 return val; 2833 2834 pa = pte & PG_FRAME; 2835 2836 m = PHYS_TO_VM_PAGE(pa); 2837 2838 /* 2839 * Modified by us 2840 */ 2841 if (pte & PG_M) 2842 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 2843 else { 2844 /* 2845 * Modified by someone else 2846 */ 2847 vm_page_lock_queues(); 2848 if (m->dirty || pmap_is_modified(m)) 2849 val |= MINCORE_MODIFIED_OTHER; 2850 vm_page_unlock_queues(); 2851 } 2852 /* 2853 * Referenced by us 2854 */ 2855 if (pte & PG_A) 2856 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 2857 else { 2858 /* 2859 * Referenced by someone else 2860 */ 2861 vm_page_lock_queues(); 2862 if ((m->flags & PG_REFERENCED) || 2863 pmap_ts_referenced(m)) { 2864 val |= MINCORE_REFERENCED_OTHER; 2865 vm_page_flag_set(m, PG_REFERENCED); 2866 } 2867 vm_page_unlock_queues(); 2868 } 2869 } 2870 return val; 2871} 2872 2873void 2874pmap_activate(struct thread *td) 2875{ 2876 struct proc *p = td->td_proc; 2877 pmap_t pmap, oldpmap; 2878 u_int64_t cr3; 2879 2880 critical_enter(); 2881 pmap = vmspace_pmap(td->td_proc->p_vmspace); 2882 oldpmap = PCPU_GET(curpmap); 2883#ifdef SMP 2884if (oldpmap) /* XXX FIXME */ 2885 atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); 2886 atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); 2887#else 2888if (oldpmap) /* XXX FIXME */ 2889 oldpmap->pm_active &= ~PCPU_GET(cpumask); 2890 pmap->pm_active |= PCPU_GET(cpumask); 2891#endif 2892 cr3 = vtophys(pmap->pm_pml4); 2893 /* XXXKSE this is wrong. 2894 * pmap_activate is for the current thread on the current cpu 2895 */ 2896 if (p->p_flag & P_SA) { 2897 /* Make sure all other cr3 entries are updated. */ 2898 /* what if they are running? XXXKSE (maybe abort them) */ 2899 FOREACH_THREAD_IN_PROC(p, td) { 2900 td->td_pcb->pcb_cr3 = cr3; 2901 } 2902 } else { 2903 td->td_pcb->pcb_cr3 = cr3; 2904 } 2905 load_cr3(cr3); 2906 critical_exit(); 2907} 2908 2909vm_offset_t 2910pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 2911{ 2912 2913 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 2914 return addr; 2915 } 2916 2917 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 2918 return addr; 2919} 2920