pmap.c revision 149058
1/*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2003 Peter Wemm 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to Berkeley by 12 * the Systems Programming Group of the University of Utah Computer 13 * Science Department and William Jolitz of UUNET Technologies Inc. 14 * 15 * Redistribution and use in source and binary forms, with or without 16 * modification, are permitted provided that the following conditions 17 * are met: 18 * 1. Redistributions of source code must retain the above copyright 19 * notice, this list of conditions and the following disclaimer. 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in the 22 * documentation and/or other materials provided with the distribution. 23 * 3. All advertising materials mentioning features or use of this software 24 * must display the following acknowledgement: 25 * This product includes software developed by the University of 26 * California, Berkeley and its contributors. 27 * 4. Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 32 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 33 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 34 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 35 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 36 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 37 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 38 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 39 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 40 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 41 * SUCH DAMAGE. 42 * 43 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 44 */ 45/*- 46 * Copyright (c) 2003 Networks Associates Technology, Inc. 47 * All rights reserved. 48 * 49 * This software was developed for the FreeBSD Project by Jake Burkholder, 50 * Safeport Network Services, and Network Associates Laboratories, the 51 * Security Research Division of Network Associates, Inc. under 52 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 53 * CHATS research program. 54 * 55 * Redistribution and use in source and binary forms, with or without 56 * modification, are permitted provided that the following conditions 57 * are met: 58 * 1. Redistributions of source code must retain the above copyright 59 * notice, this list of conditions and the following disclaimer. 60 * 2. Redistributions in binary form must reproduce the above copyright 61 * notice, this list of conditions and the following disclaimer in the 62 * documentation and/or other materials provided with the distribution. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 */ 76 77#include <sys/cdefs.h> 78__FBSDID("$FreeBSD: head/sys/amd64/amd64/pmap.c 149058 2005-08-14 20:02:50Z alc $"); 79 80/* 81 * Manages physical address maps. 82 * 83 * In addition to hardware address maps, this 84 * module is called upon to provide software-use-only 85 * maps which may or may not be stored in the same 86 * form as hardware maps. These pseudo-maps are 87 * used to store intermediate results from copy 88 * operations to and from address spaces. 89 * 90 * Since the information managed by this module is 91 * also stored by the logical address mapping module, 92 * this module may throw away valid virtual-to-physical 93 * mappings at almost any time. However, invalidations 94 * of virtual-to-physical mappings must be done as 95 * requested. 96 * 97 * In order to cope with hardware architectures which 98 * make virtual-to-physical map invalidates expensive, 99 * this module may delay invalidate or reduced protection 100 * operations until such time as they are actually 101 * necessary. This module is given full information as 102 * to which processors are currently using which maps, 103 * and to when physical maps must be made correct. 104 */ 105 106#include "opt_msgbuf.h" 107 108#include <sys/param.h> 109#include <sys/systm.h> 110#include <sys/kernel.h> 111#include <sys/lock.h> 112#include <sys/malloc.h> 113#include <sys/mman.h> 114#include <sys/msgbuf.h> 115#include <sys/mutex.h> 116#include <sys/proc.h> 117#include <sys/sx.h> 118#include <sys/vmmeter.h> 119#include <sys/sched.h> 120#include <sys/sysctl.h> 121#ifdef SMP 122#include <sys/smp.h> 123#endif 124 125#include <vm/vm.h> 126#include <vm/vm_param.h> 127#include <vm/vm_kern.h> 128#include <vm/vm_page.h> 129#include <vm/vm_map.h> 130#include <vm/vm_object.h> 131#include <vm/vm_extern.h> 132#include <vm/vm_pageout.h> 133#include <vm/vm_pager.h> 134#include <vm/uma.h> 135 136#include <machine/cpu.h> 137#include <machine/cputypes.h> 138#include <machine/md_var.h> 139#include <machine/pcb.h> 140#include <machine/specialreg.h> 141#ifdef SMP 142#include <machine/smp.h> 143#endif 144 145#ifndef PMAP_SHPGPERPROC 146#define PMAP_SHPGPERPROC 200 147#endif 148 149#if defined(DIAGNOSTIC) 150#define PMAP_DIAGNOSTIC 151#endif 152 153#define MINPV 2048 154 155#if !defined(PMAP_DIAGNOSTIC) 156#define PMAP_INLINE __inline 157#else 158#define PMAP_INLINE 159#endif 160 161struct pmap kernel_pmap_store; 162 163vm_paddr_t avail_start; /* PA of first available physical page */ 164vm_paddr_t avail_end; /* PA of last available physical page */ 165vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 166vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 167 168static int nkpt; 169static int ndmpdp; 170static vm_paddr_t dmaplimit; 171vm_offset_t kernel_vm_end; 172pt_entry_t pg_nx; 173 174static u_int64_t KPTphys; /* phys addr of kernel level 1 */ 175static u_int64_t KPDphys; /* phys addr of kernel level 2 */ 176static u_int64_t KPDPphys; /* phys addr of kernel level 3 */ 177u_int64_t KPML4phys; /* phys addr of kernel level 4 */ 178 179static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ 180static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ 181 182/* 183 * Data for the pv entry allocation mechanism 184 */ 185static uma_zone_t pvzone; 186static struct vm_object pvzone_obj; 187static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 188int pmap_pagedaemon_waken; 189 190/* 191 * All those kernel PT submaps that BSD is so fond of 192 */ 193pt_entry_t *CMAP1 = 0; 194caddr_t CADDR1 = 0; 195struct msgbuf *msgbufp = 0; 196 197/* 198 * Crashdump maps. 199 */ 200static caddr_t crashdumpmap; 201 202static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 203static pv_entry_t get_pv_entry(void); 204static void pmap_clear_ptes(vm_page_t m, long bit); 205 206static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, 207 vm_offset_t sva, pd_entry_t ptepde); 208static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 209static void pmap_remove_entry(struct pmap *pmap, vm_page_t m, 210 vm_offset_t va); 211static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m); 212 213static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); 214 215static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags); 216static int _pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m); 217static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t); 218static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 219 220CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 221CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 222 223/* 224 * Move the kernel virtual free pointer to the next 225 * 2MB. This is used to help improve performance 226 * by using a large (2MB) page for much of the kernel 227 * (.text, .data, .bss) 228 */ 229static vm_offset_t 230pmap_kmem_choose(vm_offset_t addr) 231{ 232 vm_offset_t newaddr = addr; 233 234 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 235 return newaddr; 236} 237 238/********************/ 239/* Inline functions */ 240/********************/ 241 242/* Return a non-clipped PD index for a given VA */ 243static __inline vm_pindex_t 244pmap_pde_pindex(vm_offset_t va) 245{ 246 return va >> PDRSHIFT; 247} 248 249 250/* Return various clipped indexes for a given VA */ 251static __inline vm_pindex_t 252pmap_pte_index(vm_offset_t va) 253{ 254 255 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 256} 257 258static __inline vm_pindex_t 259pmap_pde_index(vm_offset_t va) 260{ 261 262 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 263} 264 265static __inline vm_pindex_t 266pmap_pdpe_index(vm_offset_t va) 267{ 268 269 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 270} 271 272static __inline vm_pindex_t 273pmap_pml4e_index(vm_offset_t va) 274{ 275 276 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 277} 278 279/* Return a pointer to the PML4 slot that corresponds to a VA */ 280static __inline pml4_entry_t * 281pmap_pml4e(pmap_t pmap, vm_offset_t va) 282{ 283 284 if (!pmap) 285 return NULL; 286 return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 287} 288 289/* Return a pointer to the PDP slot that corresponds to a VA */ 290static __inline pdp_entry_t * 291pmap_pdpe(pmap_t pmap, vm_offset_t va) 292{ 293 pml4_entry_t *pml4e; 294 pdp_entry_t *pdpe; 295 296 pml4e = pmap_pml4e(pmap, va); 297 if (pml4e == NULL || (*pml4e & PG_V) == 0) 298 return NULL; 299 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME); 300 return (&pdpe[pmap_pdpe_index(va)]); 301} 302 303/* Return a pointer to the PD slot that corresponds to a VA */ 304static __inline pd_entry_t * 305pmap_pde(pmap_t pmap, vm_offset_t va) 306{ 307 pdp_entry_t *pdpe; 308 pd_entry_t *pde; 309 310 pdpe = pmap_pdpe(pmap, va); 311 if (pdpe == NULL || (*pdpe & PG_V) == 0) 312 return NULL; 313 pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME); 314 return (&pde[pmap_pde_index(va)]); 315} 316 317/* Return a pointer to the PT slot that corresponds to a VA */ 318static __inline pt_entry_t * 319pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) 320{ 321 pt_entry_t *pte; 322 323 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); 324 return (&pte[pmap_pte_index(va)]); 325} 326 327/* Return a pointer to the PT slot that corresponds to a VA */ 328static __inline pt_entry_t * 329pmap_pte(pmap_t pmap, vm_offset_t va) 330{ 331 pd_entry_t *pde; 332 333 pde = pmap_pde(pmap, va); 334 if (pde == NULL || (*pde & PG_V) == 0) 335 return NULL; 336 if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ 337 return ((pt_entry_t *)pde); 338 return (pmap_pde_to_pte(pde, va)); 339} 340 341 342static __inline pt_entry_t * 343pmap_pte_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *ptepde) 344{ 345 pd_entry_t *pde; 346 347 pde = pmap_pde(pmap, va); 348 if (pde == NULL || (*pde & PG_V) == 0) 349 return NULL; 350 *ptepde = *pde; 351 if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ 352 return ((pt_entry_t *)pde); 353 return (pmap_pde_to_pte(pde, va)); 354} 355 356 357PMAP_INLINE pt_entry_t * 358vtopte(vm_offset_t va) 359{ 360 u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 361 362 return (PTmap + ((va >> PAGE_SHIFT) & mask)); 363} 364 365static __inline pd_entry_t * 366vtopde(vm_offset_t va) 367{ 368 u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 369 370 return (PDmap + ((va >> PDRSHIFT) & mask)); 371} 372 373static u_int64_t 374allocpages(int n) 375{ 376 u_int64_t ret; 377 378 ret = avail_start; 379 bzero((void *)ret, n * PAGE_SIZE); 380 avail_start += n * PAGE_SIZE; 381 return (ret); 382} 383 384static void 385create_pagetables(void) 386{ 387 int i; 388 389 /* Allocate pages */ 390 KPTphys = allocpages(NKPT); 391 KPML4phys = allocpages(1); 392 KPDPphys = allocpages(NKPML4E); 393 KPDphys = allocpages(NKPDPE); 394 395 ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT; 396 if (ndmpdp < 4) /* Minimum 4GB of dirmap */ 397 ndmpdp = 4; 398 DMPDPphys = allocpages(NDMPML4E); 399 DMPDphys = allocpages(ndmpdp); 400 dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; 401 402 /* Fill in the underlying page table pages */ 403 /* Read-only from zero to physfree */ 404 /* XXX not fully used, underneath 2M pages */ 405 for (i = 0; (i << PAGE_SHIFT) < avail_start; i++) { 406 ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT; 407 ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V | PG_G; 408 } 409 410 /* Now map the page tables at their location within PTmap */ 411 for (i = 0; i < NKPT; i++) { 412 ((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT); 413 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V; 414 } 415 416 /* Map from zero to end of allocations under 2M pages */ 417 /* This replaces some of the KPTphys entries above */ 418 for (i = 0; (i << PDRSHIFT) < avail_start; i++) { 419 ((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT; 420 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G; 421 } 422 423 /* And connect up the PD to the PDP */ 424 for (i = 0; i < NKPDPE; i++) { 425 ((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i << PAGE_SHIFT); 426 ((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U; 427 } 428 429 430 /* Now set up the direct map space using 2MB pages */ 431 for (i = 0; i < NPDEPG * ndmpdp; i++) { 432 ((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i << PDRSHIFT; 433 ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS | PG_G; 434 } 435 436 /* And the direct map space's PDP */ 437 for (i = 0; i < ndmpdp; i++) { 438 ((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i << PAGE_SHIFT); 439 ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U; 440 } 441 442 /* And recursively map PML4 to itself in order to get PTmap */ 443 ((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys; 444 ((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U; 445 446 /* Connect the Direct Map slot up to the PML4 */ 447 ((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys; 448 ((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U; 449 450 /* Connect the KVA slot up to the PML4 */ 451 ((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys; 452 ((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U; 453} 454 455/* 456 * Bootstrap the system enough to run with virtual memory. 457 * 458 * On amd64 this is called after mapping has already been enabled 459 * and just syncs the pmap module with what has already been done. 460 * [We can't call it easily with mapping off since the kernel is not 461 * mapped with PA == VA, hence we would have to relocate every address 462 * from the linked base (virtual) address "KERNBASE" to the actual 463 * (physical) address starting relative to 0] 464 */ 465void 466pmap_bootstrap(firstaddr) 467 vm_paddr_t *firstaddr; 468{ 469 vm_offset_t va; 470 pt_entry_t *pte, *unused; 471 472 avail_start = *firstaddr; 473 474 /* 475 * Create an initial set of page tables to run the kernel in. 476 */ 477 create_pagetables(); 478 *firstaddr = avail_start; 479 480 virtual_avail = (vm_offset_t) KERNBASE + avail_start; 481 virtual_avail = pmap_kmem_choose(virtual_avail); 482 483 virtual_end = VM_MAX_KERNEL_ADDRESS; 484 485 486 /* XXX do %cr0 as well */ 487 load_cr4(rcr4() | CR4_PGE | CR4_PSE); 488 load_cr3(KPML4phys); 489 490 /* 491 * Initialize the kernel pmap (which is statically allocated). 492 */ 493 PMAP_LOCK_INIT(kernel_pmap); 494 kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys); 495 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 496 TAILQ_INIT(&kernel_pmap->pm_pvlist); 497 nkpt = NKPT; 498 499 /* 500 * Reserve some special page table entries/VA space for temporary 501 * mapping of pages. 502 */ 503#define SYSMAP(c, p, v, n) \ 504 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 505 506 va = virtual_avail; 507 pte = vtopte(va); 508 509 /* 510 * CMAP1 is only used for the memory test. 511 */ 512 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 513 514 /* 515 * Crashdump maps. 516 */ 517 SYSMAP(caddr_t, unused, crashdumpmap, MAXDUMPPGS) 518 519 /* 520 * msgbufp is used to map the system message buffer. 521 */ 522 SYSMAP(struct msgbuf *, unused, msgbufp, atop(round_page(MSGBUF_SIZE))) 523 524 virtual_avail = va; 525 526 *CMAP1 = 0; 527 528 invltlb(); 529} 530 531/* 532 * Initialize a vm_page's machine-dependent fields. 533 */ 534void 535pmap_page_init(vm_page_t m) 536{ 537 538 TAILQ_INIT(&m->md.pv_list); 539 m->md.pv_list_count = 0; 540} 541 542/* 543 * Initialize the pmap module. 544 * Called by vm_init, to initialize any structures that the pmap 545 * system needs to map virtual memory. 546 */ 547void 548pmap_init(void) 549{ 550 551 /* 552 * init the pv free list 553 */ 554 pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 555 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 556 uma_prealloc(pvzone, MINPV); 557} 558 559/* 560 * Initialize the address space (zone) for the pv_entries. Set a 561 * high water mark so that the system can recover from excessive 562 * numbers of pv entries. 563 */ 564void 565pmap_init2() 566{ 567 int shpgperproc = PMAP_SHPGPERPROC; 568 569 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 570 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 571 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 572 pv_entry_high_water = 9 * (pv_entry_max / 10); 573 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 574} 575 576 577/*************************************************** 578 * Low level helper routines..... 579 ***************************************************/ 580 581#if defined(PMAP_DIAGNOSTIC) 582 583/* 584 * This code checks for non-writeable/modified pages. 585 * This should be an invalid condition. 586 */ 587static int 588pmap_nw_modified(pt_entry_t ptea) 589{ 590 int pte; 591 592 pte = (int) ptea; 593 594 if ((pte & (PG_M|PG_RW)) == PG_M) 595 return 1; 596 else 597 return 0; 598} 599#endif 600 601 602/* 603 * this routine defines the region(s) of memory that should 604 * not be tested for the modified bit. 605 */ 606static PMAP_INLINE int 607pmap_track_modified(vm_offset_t va) 608{ 609 if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 610 return 1; 611 else 612 return 0; 613} 614 615#ifdef SMP 616/* 617 * For SMP, these functions have to use the IPI mechanism for coherence. 618 */ 619void 620pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 621{ 622 u_int cpumask; 623 u_int other_cpus; 624 625 if (smp_started) { 626 if (!(read_rflags() & PSL_I)) 627 panic("%s: interrupts disabled", __func__); 628 mtx_lock_spin(&smp_ipi_mtx); 629 } else 630 critical_enter(); 631 /* 632 * We need to disable interrupt preemption but MUST NOT have 633 * interrupts disabled here. 634 * XXX we may need to hold schedlock to get a coherent pm_active 635 * XXX critical sections disable interrupts again 636 */ 637 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 638 invlpg(va); 639 smp_invlpg(va); 640 } else { 641 cpumask = PCPU_GET(cpumask); 642 other_cpus = PCPU_GET(other_cpus); 643 if (pmap->pm_active & cpumask) 644 invlpg(va); 645 if (pmap->pm_active & other_cpus) 646 smp_masked_invlpg(pmap->pm_active & other_cpus, va); 647 } 648 if (smp_started) 649 mtx_unlock_spin(&smp_ipi_mtx); 650 else 651 critical_exit(); 652} 653 654void 655pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 656{ 657 u_int cpumask; 658 u_int other_cpus; 659 vm_offset_t addr; 660 661 if (smp_started) { 662 if (!(read_rflags() & PSL_I)) 663 panic("%s: interrupts disabled", __func__); 664 mtx_lock_spin(&smp_ipi_mtx); 665 } else 666 critical_enter(); 667 /* 668 * We need to disable interrupt preemption but MUST NOT have 669 * interrupts disabled here. 670 * XXX we may need to hold schedlock to get a coherent pm_active 671 * XXX critical sections disable interrupts again 672 */ 673 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 674 for (addr = sva; addr < eva; addr += PAGE_SIZE) 675 invlpg(addr); 676 smp_invlpg_range(sva, eva); 677 } else { 678 cpumask = PCPU_GET(cpumask); 679 other_cpus = PCPU_GET(other_cpus); 680 if (pmap->pm_active & cpumask) 681 for (addr = sva; addr < eva; addr += PAGE_SIZE) 682 invlpg(addr); 683 if (pmap->pm_active & other_cpus) 684 smp_masked_invlpg_range(pmap->pm_active & other_cpus, 685 sva, eva); 686 } 687 if (smp_started) 688 mtx_unlock_spin(&smp_ipi_mtx); 689 else 690 critical_exit(); 691} 692 693void 694pmap_invalidate_all(pmap_t pmap) 695{ 696 u_int cpumask; 697 u_int other_cpus; 698 699 if (smp_started) { 700 if (!(read_rflags() & PSL_I)) 701 panic("%s: interrupts disabled", __func__); 702 mtx_lock_spin(&smp_ipi_mtx); 703 } else 704 critical_enter(); 705 /* 706 * We need to disable interrupt preemption but MUST NOT have 707 * interrupts disabled here. 708 * XXX we may need to hold schedlock to get a coherent pm_active 709 * XXX critical sections disable interrupts again 710 */ 711 if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { 712 invltlb(); 713 smp_invltlb(); 714 } else { 715 cpumask = PCPU_GET(cpumask); 716 other_cpus = PCPU_GET(other_cpus); 717 if (pmap->pm_active & cpumask) 718 invltlb(); 719 if (pmap->pm_active & other_cpus) 720 smp_masked_invltlb(pmap->pm_active & other_cpus); 721 } 722 if (smp_started) 723 mtx_unlock_spin(&smp_ipi_mtx); 724 else 725 critical_exit(); 726} 727#else /* !SMP */ 728/* 729 * Normal, non-SMP, invalidation functions. 730 * We inline these within pmap.c for speed. 731 */ 732PMAP_INLINE void 733pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 734{ 735 736 if (pmap == kernel_pmap || pmap->pm_active) 737 invlpg(va); 738} 739 740PMAP_INLINE void 741pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 742{ 743 vm_offset_t addr; 744 745 if (pmap == kernel_pmap || pmap->pm_active) 746 for (addr = sva; addr < eva; addr += PAGE_SIZE) 747 invlpg(addr); 748} 749 750PMAP_INLINE void 751pmap_invalidate_all(pmap_t pmap) 752{ 753 754 if (pmap == kernel_pmap || pmap->pm_active) 755 invltlb(); 756} 757#endif /* !SMP */ 758 759/* 760 * Are we current address space or kernel? 761 */ 762static __inline int 763pmap_is_current(pmap_t pmap) 764{ 765 return (pmap == kernel_pmap || 766 (pmap->pm_pml4[PML4PML4I] & PG_FRAME) == (PML4pml4e[0] & PG_FRAME)); 767} 768 769/* 770 * Routine: pmap_extract 771 * Function: 772 * Extract the physical page address associated 773 * with the given map/virtual_address pair. 774 */ 775vm_paddr_t 776pmap_extract(pmap_t pmap, vm_offset_t va) 777{ 778 vm_paddr_t rtval; 779 pt_entry_t *pte; 780 pd_entry_t pde, *pdep; 781 782 rtval = 0; 783 PMAP_LOCK(pmap); 784 pdep = pmap_pde(pmap, va); 785 if (pdep != NULL) { 786 pde = *pdep; 787 if (pde) { 788 if ((pde & PG_PS) != 0) { 789 rtval = (pde & ~PDRMASK) | (va & PDRMASK); 790 PMAP_UNLOCK(pmap); 791 return rtval; 792 } 793 pte = pmap_pde_to_pte(pdep, va); 794 rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); 795 } 796 } 797 PMAP_UNLOCK(pmap); 798 return (rtval); 799} 800 801/* 802 * Routine: pmap_extract_and_hold 803 * Function: 804 * Atomically extract and hold the physical page 805 * with the given pmap and virtual address pair 806 * if that mapping permits the given protection. 807 */ 808vm_page_t 809pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 810{ 811 pd_entry_t pde, *pdep; 812 pt_entry_t pte; 813 vm_page_t m; 814 815 m = NULL; 816 vm_page_lock_queues(); 817 PMAP_LOCK(pmap); 818 pdep = pmap_pde(pmap, va); 819 if (pdep != NULL && (pde = *pdep)) { 820 if (pde & PG_PS) { 821 if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { 822 m = PHYS_TO_VM_PAGE((pde & ~PDRMASK) | 823 (va & PDRMASK)); 824 vm_page_hold(m); 825 } 826 } else { 827 pte = *pmap_pde_to_pte(pdep, va); 828 if ((pte & PG_V) && 829 ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { 830 m = PHYS_TO_VM_PAGE(pte & PG_FRAME); 831 vm_page_hold(m); 832 } 833 } 834 } 835 vm_page_unlock_queues(); 836 PMAP_UNLOCK(pmap); 837 return (m); 838} 839 840vm_paddr_t 841pmap_kextract(vm_offset_t va) 842{ 843 pd_entry_t *pde; 844 vm_paddr_t pa; 845 846 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 847 pa = DMAP_TO_PHYS(va); 848 } else { 849 pde = vtopde(va); 850 if (*pde & PG_PS) { 851 pa = (*pde & ~(NBPDR - 1)) | (va & (NBPDR - 1)); 852 } else { 853 pa = *vtopte(va); 854 pa = (pa & PG_FRAME) | (va & PAGE_MASK); 855 } 856 } 857 return pa; 858} 859 860/*************************************************** 861 * Low level mapping routines..... 862 ***************************************************/ 863 864/* 865 * Add a wired page to the kva. 866 * Note: not SMP coherent. 867 */ 868PMAP_INLINE void 869pmap_kenter(vm_offset_t va, vm_paddr_t pa) 870{ 871 pt_entry_t *pte; 872 873 pte = vtopte(va); 874 pte_store(pte, pa | PG_RW | PG_V | PG_G); 875} 876 877/* 878 * Remove a page from the kernel pagetables. 879 * Note: not SMP coherent. 880 */ 881PMAP_INLINE void 882pmap_kremove(vm_offset_t va) 883{ 884 pt_entry_t *pte; 885 886 pte = vtopte(va); 887 pte_clear(pte); 888} 889 890/* 891 * Used to map a range of physical addresses into kernel 892 * virtual address space. 893 * 894 * The value passed in '*virt' is a suggested virtual address for 895 * the mapping. Architectures which can support a direct-mapped 896 * physical to virtual region can return the appropriate address 897 * within that region, leaving '*virt' unchanged. Other 898 * architectures should map the pages starting at '*virt' and 899 * update '*virt' with the first usable address after the mapped 900 * region. 901 */ 902vm_offset_t 903pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 904{ 905 return PHYS_TO_DMAP(start); 906} 907 908 909/* 910 * Add a list of wired pages to the kva 911 * this routine is only used for temporary 912 * kernel mappings that do not need to have 913 * page modification or references recorded. 914 * Note that old mappings are simply written 915 * over. The page *must* be wired. 916 * Note: SMP coherent. Uses a ranged shootdown IPI. 917 */ 918void 919pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) 920{ 921 vm_offset_t va; 922 923 va = sva; 924 while (count-- > 0) { 925 pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); 926 va += PAGE_SIZE; 927 m++; 928 } 929 pmap_invalidate_range(kernel_pmap, sva, va); 930} 931 932/* 933 * This routine tears out page mappings from the 934 * kernel -- it is meant only for temporary mappings. 935 * Note: SMP coherent. Uses a ranged shootdown IPI. 936 */ 937void 938pmap_qremove(vm_offset_t sva, int count) 939{ 940 vm_offset_t va; 941 942 va = sva; 943 while (count-- > 0) { 944 pmap_kremove(va); 945 va += PAGE_SIZE; 946 } 947 pmap_invalidate_range(kernel_pmap, sva, va); 948} 949 950/*************************************************** 951 * Page table page management routines..... 952 ***************************************************/ 953 954/* 955 * This routine unholds page table pages, and if the hold count 956 * drops to zero, then it decrements the wire count. 957 */ 958static PMAP_INLINE int 959pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 960{ 961 962 --m->wire_count; 963 if (m->wire_count == 0) 964 return _pmap_unwire_pte_hold(pmap, va, m); 965 else 966 return 0; 967} 968 969static int 970_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 971{ 972 vm_offset_t pteva; 973 974 /* 975 * unmap the page table page 976 */ 977 if (m->pindex >= (NUPDE + NUPDPE)) { 978 /* PDP page */ 979 pml4_entry_t *pml4; 980 pml4 = pmap_pml4e(pmap, va); 981 pteva = (vm_offset_t) PDPmap + amd64_ptob(m->pindex - (NUPDE + NUPDPE)); 982 *pml4 = 0; 983 } else if (m->pindex >= NUPDE) { 984 /* PD page */ 985 pdp_entry_t *pdp; 986 pdp = pmap_pdpe(pmap, va); 987 pteva = (vm_offset_t) PDmap + amd64_ptob(m->pindex - NUPDE); 988 *pdp = 0; 989 } else { 990 /* PTE page */ 991 pd_entry_t *pd; 992 pd = pmap_pde(pmap, va); 993 pteva = (vm_offset_t) PTmap + amd64_ptob(m->pindex); 994 *pd = 0; 995 } 996 --pmap->pm_stats.resident_count; 997 if (m->pindex < NUPDE) { 998 /* We just released a PT, unhold the matching PD */ 999 vm_page_t pdpg; 1000 1001 pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME); 1002 pmap_unwire_pte_hold(pmap, va, pdpg); 1003 } 1004 if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { 1005 /* We just released a PD, unhold the matching PDP */ 1006 vm_page_t pdppg; 1007 1008 pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME); 1009 pmap_unwire_pte_hold(pmap, va, pdppg); 1010 } 1011 1012 /* 1013 * Do an invltlb to make the invalidated mapping 1014 * take effect immediately. 1015 */ 1016 pmap_invalidate_page(pmap, pteva); 1017 1018 vm_page_free_zero(m); 1019 atomic_subtract_int(&cnt.v_wire_count, 1); 1020 return 1; 1021} 1022 1023/* 1024 * After removing a page table entry, this routine is used to 1025 * conditionally free the page, and manage the hold/wire counts. 1026 */ 1027static int 1028pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde) 1029{ 1030 vm_page_t mpte; 1031 1032 if (va >= VM_MAXUSER_ADDRESS) 1033 return 0; 1034 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1035 mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); 1036 return pmap_unwire_pte_hold(pmap, va, mpte); 1037} 1038 1039void 1040pmap_pinit0(pmap) 1041 struct pmap *pmap; 1042{ 1043 1044 PMAP_LOCK_INIT(pmap); 1045 pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys); 1046 pmap->pm_active = 0; 1047 TAILQ_INIT(&pmap->pm_pvlist); 1048 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1049} 1050 1051/* 1052 * Initialize a preallocated and zeroed pmap structure, 1053 * such as one in a vmspace structure. 1054 */ 1055void 1056pmap_pinit(pmap) 1057 register struct pmap *pmap; 1058{ 1059 vm_page_t pml4pg; 1060 static vm_pindex_t color; 1061 1062 PMAP_LOCK_INIT(pmap); 1063 1064 /* 1065 * allocate the page directory page 1066 */ 1067 while ((pml4pg = vm_page_alloc(NULL, color++, VM_ALLOC_NOOBJ | 1068 VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1069 VM_WAIT; 1070 1071 pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); 1072 1073 if ((pml4pg->flags & PG_ZERO) == 0) 1074 pagezero(pmap->pm_pml4); 1075 1076 /* Wire in kernel global address entries. */ 1077 pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U; 1078 pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U; 1079 1080 /* install self-referential address mapping entry(s) */ 1081 pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; 1082 1083 pmap->pm_active = 0; 1084 TAILQ_INIT(&pmap->pm_pvlist); 1085 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1086} 1087 1088/* 1089 * this routine is called if the page table page is not 1090 * mapped correctly. 1091 * 1092 * Note: If a page allocation fails at page table level two or three, 1093 * one or two pages may be held during the wait, only to be released 1094 * afterwards. This conservative approach is easily argued to avoid 1095 * race conditions. 1096 */ 1097static vm_page_t 1098_pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, int flags) 1099{ 1100 vm_page_t m, pdppg, pdpg; 1101 1102 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1103 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1104 ("_pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1105 1106 /* 1107 * Allocate a page table page. 1108 */ 1109 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1110 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1111 if (flags & M_WAITOK) { 1112 PMAP_UNLOCK(pmap); 1113 vm_page_unlock_queues(); 1114 VM_WAIT; 1115 vm_page_lock_queues(); 1116 PMAP_LOCK(pmap); 1117 } 1118 1119 /* 1120 * Indicate the need to retry. While waiting, the page table 1121 * page may have been allocated. 1122 */ 1123 return (NULL); 1124 } 1125 if ((m->flags & PG_ZERO) == 0) 1126 pmap_zero_page(m); 1127 1128 /* 1129 * Map the pagetable page into the process address space, if 1130 * it isn't already there. 1131 */ 1132 1133 pmap->pm_stats.resident_count++; 1134 1135 if (ptepindex >= (NUPDE + NUPDPE)) { 1136 pml4_entry_t *pml4; 1137 vm_pindex_t pml4index; 1138 1139 /* Wire up a new PDPE page */ 1140 pml4index = ptepindex - (NUPDE + NUPDPE); 1141 pml4 = &pmap->pm_pml4[pml4index]; 1142 *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1143 1144 } else if (ptepindex >= NUPDE) { 1145 vm_pindex_t pml4index; 1146 vm_pindex_t pdpindex; 1147 pml4_entry_t *pml4; 1148 pdp_entry_t *pdp; 1149 1150 /* Wire up a new PDE page */ 1151 pdpindex = ptepindex - NUPDE; 1152 pml4index = pdpindex >> NPML4EPGSHIFT; 1153 1154 pml4 = &pmap->pm_pml4[pml4index]; 1155 if ((*pml4 & PG_V) == 0) { 1156 /* Have to allocate a new pdp, recurse */ 1157 if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index, 1158 flags) == NULL) { 1159 --m->wire_count; 1160 vm_page_free(m); 1161 return (NULL); 1162 } 1163 } else { 1164 /* Add reference to pdp page */ 1165 pdppg = PHYS_TO_VM_PAGE(*pml4 & PG_FRAME); 1166 pdppg->wire_count++; 1167 } 1168 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1169 1170 /* Now find the pdp page */ 1171 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1172 *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1173 1174 } else { 1175 vm_pindex_t pml4index; 1176 vm_pindex_t pdpindex; 1177 pml4_entry_t *pml4; 1178 pdp_entry_t *pdp; 1179 pd_entry_t *pd; 1180 1181 /* Wire up a new PTE page */ 1182 pdpindex = ptepindex >> NPDPEPGSHIFT; 1183 pml4index = pdpindex >> NPML4EPGSHIFT; 1184 1185 /* First, find the pdp and check that its valid. */ 1186 pml4 = &pmap->pm_pml4[pml4index]; 1187 if ((*pml4 & PG_V) == 0) { 1188 /* Have to allocate a new pd, recurse */ 1189 if (_pmap_allocpte(pmap, NUPDE + pdpindex, 1190 flags) == NULL) { 1191 --m->wire_count; 1192 vm_page_free(m); 1193 return (NULL); 1194 } 1195 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1196 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1197 } else { 1198 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1199 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1200 if ((*pdp & PG_V) == 0) { 1201 /* Have to allocate a new pd, recurse */ 1202 if (_pmap_allocpte(pmap, NUPDE + pdpindex, 1203 flags) == NULL) { 1204 --m->wire_count; 1205 vm_page_free(m); 1206 return (NULL); 1207 } 1208 } else { 1209 /* Add reference to the pd page */ 1210 pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME); 1211 pdpg->wire_count++; 1212 } 1213 } 1214 pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME); 1215 1216 /* Now we know where the page directory page is */ 1217 pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; 1218 *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1219 } 1220 1221 return m; 1222} 1223 1224static vm_page_t 1225pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) 1226{ 1227 vm_pindex_t ptepindex; 1228 pd_entry_t *pd; 1229 vm_page_t m; 1230 1231 KASSERT((flags & (M_NOWAIT | M_WAITOK)) == M_NOWAIT || 1232 (flags & (M_NOWAIT | M_WAITOK)) == M_WAITOK, 1233 ("pmap_allocpte: flags is neither M_NOWAIT nor M_WAITOK")); 1234 1235 /* 1236 * Calculate pagetable page index 1237 */ 1238 ptepindex = pmap_pde_pindex(va); 1239retry: 1240 /* 1241 * Get the page directory entry 1242 */ 1243 pd = pmap_pde(pmap, va); 1244 1245 /* 1246 * This supports switching from a 2MB page to a 1247 * normal 4K page. 1248 */ 1249 if (pd != 0 && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 1250 *pd = 0; 1251 pd = 0; 1252 pmap_invalidate_all(kernel_pmap); 1253 } 1254 1255 /* 1256 * If the page table page is mapped, we just increment the 1257 * hold count, and activate it. 1258 */ 1259 if (pd != 0 && (*pd & PG_V) != 0) { 1260 m = PHYS_TO_VM_PAGE(*pd & PG_FRAME); 1261 m->wire_count++; 1262 } else { 1263 /* 1264 * Here if the pte page isn't mapped, or if it has been 1265 * deallocated. 1266 */ 1267 m = _pmap_allocpte(pmap, ptepindex, flags); 1268 if (m == NULL && (flags & M_WAITOK)) 1269 goto retry; 1270 } 1271 return (m); 1272} 1273 1274 1275/*************************************************** 1276 * Pmap allocation/deallocation routines. 1277 ***************************************************/ 1278 1279/* 1280 * Release any resources held by the given physical map. 1281 * Called when a pmap initialized by pmap_pinit is being released. 1282 * Should only be called if the map contains no valid mappings. 1283 */ 1284void 1285pmap_release(pmap_t pmap) 1286{ 1287 vm_page_t m; 1288 1289 KASSERT(pmap->pm_stats.resident_count == 0, 1290 ("pmap_release: pmap resident count %ld != 0", 1291 pmap->pm_stats.resident_count)); 1292 1293 m = PHYS_TO_VM_PAGE(pmap->pm_pml4[PML4PML4I] & PG_FRAME); 1294 1295 pmap->pm_pml4[KPML4I] = 0; /* KVA */ 1296 pmap->pm_pml4[DMPML4I] = 0; /* Direct Map */ 1297 pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */ 1298 1299 vm_page_lock_queues(); 1300 m->wire_count--; 1301 atomic_subtract_int(&cnt.v_wire_count, 1); 1302 vm_page_free_zero(m); 1303 vm_page_unlock_queues(); 1304 PMAP_LOCK_DESTROY(pmap); 1305} 1306 1307static int 1308kvm_size(SYSCTL_HANDLER_ARGS) 1309{ 1310 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1311 1312 return sysctl_handle_long(oidp, &ksize, 0, req); 1313} 1314SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1315 0, 0, kvm_size, "IU", "Size of KVM"); 1316 1317static int 1318kvm_free(SYSCTL_HANDLER_ARGS) 1319{ 1320 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1321 1322 return sysctl_handle_long(oidp, &kfree, 0, req); 1323} 1324SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1325 0, 0, kvm_free, "IU", "Amount of KVM free"); 1326 1327/* 1328 * grow the number of kernel page table entries, if needed 1329 */ 1330void 1331pmap_growkernel(vm_offset_t addr) 1332{ 1333 vm_paddr_t paddr; 1334 vm_page_t nkpg; 1335 pd_entry_t *pde, newpdir; 1336 pdp_entry_t newpdp; 1337 1338 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1339 if (kernel_vm_end == 0) { 1340 kernel_vm_end = KERNBASE; 1341 nkpt = 0; 1342 while ((*pmap_pde(kernel_pmap, kernel_vm_end) & PG_V) != 0) { 1343 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1344 nkpt++; 1345 } 1346 } 1347 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1348 while (kernel_vm_end < addr) { 1349 pde = pmap_pde(kernel_pmap, kernel_vm_end); 1350 if (pde == NULL) { 1351 /* We need a new PDP entry */ 1352 nkpg = vm_page_alloc(NULL, nkpt, 1353 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1354 if (!nkpg) 1355 panic("pmap_growkernel: no memory to grow kernel"); 1356 pmap_zero_page(nkpg); 1357 paddr = VM_PAGE_TO_PHYS(nkpg); 1358 newpdp = (pdp_entry_t) 1359 (paddr | PG_V | PG_RW | PG_A | PG_M); 1360 *pmap_pdpe(kernel_pmap, kernel_vm_end) = newpdp; 1361 continue; /* try again */ 1362 } 1363 if ((*pde & PG_V) != 0) { 1364 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1365 continue; 1366 } 1367 1368 /* 1369 * This index is bogus, but out of the way 1370 */ 1371 nkpg = vm_page_alloc(NULL, nkpt, 1372 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1373 if (!nkpg) 1374 panic("pmap_growkernel: no memory to grow kernel"); 1375 1376 nkpt++; 1377 1378 pmap_zero_page(nkpg); 1379 paddr = VM_PAGE_TO_PHYS(nkpg); 1380 newpdir = (pd_entry_t) (paddr | PG_V | PG_RW | PG_A | PG_M); 1381 *pmap_pde(kernel_pmap, kernel_vm_end) = newpdir; 1382 1383 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1384 } 1385} 1386 1387 1388/*************************************************** 1389 * page management routines. 1390 ***************************************************/ 1391 1392/* 1393 * free the pv_entry back to the free list 1394 */ 1395static PMAP_INLINE void 1396free_pv_entry(pv_entry_t pv) 1397{ 1398 pv_entry_count--; 1399 uma_zfree(pvzone, pv); 1400} 1401 1402/* 1403 * get a new pv_entry, allocating a block from the system 1404 * when needed. 1405 * the memory allocation is performed bypassing the malloc code 1406 * because of the possibility of allocations at interrupt time. 1407 */ 1408static pv_entry_t 1409get_pv_entry(void) 1410{ 1411 pv_entry_count++; 1412 if (pv_entry_high_water && 1413 (pv_entry_count > pv_entry_high_water) && 1414 (pmap_pagedaemon_waken == 0)) { 1415 pmap_pagedaemon_waken = 1; 1416 wakeup (&vm_pages_needed); 1417 } 1418 return uma_zalloc(pvzone, M_NOWAIT); 1419} 1420 1421 1422static void 1423pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1424{ 1425 pv_entry_t pv; 1426 1427 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1428 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1429 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1430 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1431 if (pmap == pv->pv_pmap && va == pv->pv_va) 1432 break; 1433 } 1434 } else { 1435 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1436 if (va == pv->pv_va) 1437 break; 1438 } 1439 } 1440 KASSERT(pv != NULL, ("pmap_remove_entry: pv not found")); 1441 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1442 m->md.pv_list_count--; 1443 if (TAILQ_EMPTY(&m->md.pv_list)) 1444 vm_page_flag_clear(m, PG_WRITEABLE); 1445 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1446 free_pv_entry(pv); 1447} 1448 1449/* 1450 * Create a pv entry for page at pa for 1451 * (pmap, va). 1452 */ 1453static void 1454pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t m) 1455{ 1456 pv_entry_t pv; 1457 1458 pv = get_pv_entry(); 1459 pv->pv_va = va; 1460 pv->pv_pmap = pmap; 1461 1462 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1463 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1464 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1465 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1466 m->md.pv_list_count++; 1467} 1468 1469/* 1470 * pmap_remove_pte: do the things to unmap a page in a process 1471 */ 1472static int 1473pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, pd_entry_t ptepde) 1474{ 1475 pt_entry_t oldpte; 1476 vm_page_t m; 1477 1478 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1479 oldpte = pte_load_clear(ptq); 1480 if (oldpte & PG_W) 1481 pmap->pm_stats.wired_count -= 1; 1482 /* 1483 * Machines that don't support invlpg, also don't support 1484 * PG_G. 1485 */ 1486 if (oldpte & PG_G) 1487 pmap_invalidate_page(kernel_pmap, va); 1488 pmap->pm_stats.resident_count -= 1; 1489 if (oldpte & PG_MANAGED) { 1490 m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); 1491 if (oldpte & PG_M) { 1492#if defined(PMAP_DIAGNOSTIC) 1493 if (pmap_nw_modified((pt_entry_t) oldpte)) { 1494 printf( 1495 "pmap_remove: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 1496 va, oldpte); 1497 } 1498#endif 1499 if (pmap_track_modified(va)) 1500 vm_page_dirty(m); 1501 } 1502 if (oldpte & PG_A) 1503 vm_page_flag_set(m, PG_REFERENCED); 1504 pmap_remove_entry(pmap, m, va); 1505 } 1506 return (pmap_unuse_pt(pmap, va, ptepde)); 1507} 1508 1509/* 1510 * Remove a single page from a process address space 1511 */ 1512static void 1513pmap_remove_page(pmap_t pmap, vm_offset_t va) 1514{ 1515 pd_entry_t ptepde; 1516 pt_entry_t *pte; 1517 1518 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1519 pte = pmap_pte_pde(pmap, va, &ptepde); 1520 if (pte == NULL || (*pte & PG_V) == 0) 1521 return; 1522 pmap_remove_pte(pmap, pte, va, ptepde); 1523 pmap_invalidate_page(pmap, va); 1524} 1525 1526/* 1527 * Remove the given range of addresses from the specified map. 1528 * 1529 * It is assumed that the start and end are properly 1530 * rounded to the page size. 1531 */ 1532void 1533pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1534{ 1535 vm_offset_t va_next; 1536 pml4_entry_t *pml4e; 1537 pdp_entry_t *pdpe; 1538 pd_entry_t ptpaddr, *pde; 1539 pt_entry_t *pte; 1540 int anyvalid; 1541 1542 /* 1543 * Perform an unsynchronized read. This is, however, safe. 1544 */ 1545 if (pmap->pm_stats.resident_count == 0) 1546 return; 1547 1548 anyvalid = 0; 1549 1550 vm_page_lock_queues(); 1551 PMAP_LOCK(pmap); 1552 1553 /* 1554 * special handling of removing one page. a very 1555 * common operation and easy to short circuit some 1556 * code. 1557 */ 1558 if (sva + PAGE_SIZE == eva) { 1559 pde = pmap_pde(pmap, sva); 1560 if (pde && (*pde & PG_PS) == 0) { 1561 pmap_remove_page(pmap, sva); 1562 goto out; 1563 } 1564 } 1565 1566 for (; sva < eva; sva = va_next) { 1567 1568 if (pmap->pm_stats.resident_count == 0) 1569 break; 1570 1571 pml4e = pmap_pml4e(pmap, sva); 1572 if (pml4e == 0) { 1573 va_next = (sva + NBPML4) & ~PML4MASK; 1574 continue; 1575 } 1576 1577 pdpe = pmap_pdpe(pmap, sva); 1578 if (pdpe == 0) { 1579 va_next = (sva + NBPDP) & ~PDPMASK; 1580 continue; 1581 } 1582 1583 /* 1584 * Calculate index for next page table. 1585 */ 1586 va_next = (sva + NBPDR) & ~PDRMASK; 1587 1588 pde = pmap_pde(pmap, sva); 1589 if (pde == 0) 1590 continue; 1591 ptpaddr = *pde; 1592 1593 /* 1594 * Weed out invalid mappings. 1595 */ 1596 if (ptpaddr == 0) 1597 continue; 1598 1599 /* 1600 * Check for large page. 1601 */ 1602 if ((ptpaddr & PG_PS) != 0) { 1603 *pde = 0; 1604 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1605 anyvalid = 1; 1606 continue; 1607 } 1608 1609 /* 1610 * Limit our scan to either the end of the va represented 1611 * by the current page table page, or to the end of the 1612 * range being removed. 1613 */ 1614 if (va_next > eva) 1615 va_next = eva; 1616 1617 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1618 sva += PAGE_SIZE) { 1619 if (*pte == 0) 1620 continue; 1621 anyvalid = 1; 1622 if (pmap_remove_pte(pmap, pte, sva, ptpaddr)) 1623 break; 1624 } 1625 } 1626out: 1627 vm_page_unlock_queues(); 1628 if (anyvalid) 1629 pmap_invalidate_all(pmap); 1630 PMAP_UNLOCK(pmap); 1631} 1632 1633/* 1634 * Routine: pmap_remove_all 1635 * Function: 1636 * Removes this physical page from 1637 * all physical maps in which it resides. 1638 * Reflects back modify bits to the pager. 1639 * 1640 * Notes: 1641 * Original versions of this routine were very 1642 * inefficient because they iteratively called 1643 * pmap_remove (slow...) 1644 */ 1645 1646void 1647pmap_remove_all(vm_page_t m) 1648{ 1649 register pv_entry_t pv; 1650 pt_entry_t *pte, tpte; 1651 pd_entry_t ptepde; 1652 1653#if defined(PMAP_DIAGNOSTIC) 1654 /* 1655 * XXX This makes pmap_remove_all() illegal for non-managed pages! 1656 */ 1657 if (m->flags & PG_FICTITIOUS) { 1658 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%lx", 1659 VM_PAGE_TO_PHYS(m)); 1660 } 1661#endif 1662 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1663 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1664 PMAP_LOCK(pv->pv_pmap); 1665 pv->pv_pmap->pm_stats.resident_count--; 1666 pte = pmap_pte_pde(pv->pv_pmap, pv->pv_va, &ptepde); 1667 tpte = pte_load_clear(pte); 1668 if (tpte & PG_W) 1669 pv->pv_pmap->pm_stats.wired_count--; 1670 if (tpte & PG_A) 1671 vm_page_flag_set(m, PG_REFERENCED); 1672 1673 /* 1674 * Update the vm_page_t clean and reference bits. 1675 */ 1676 if (tpte & PG_M) { 1677#if defined(PMAP_DIAGNOSTIC) 1678 if (pmap_nw_modified((pt_entry_t) tpte)) { 1679 printf( 1680 "pmap_remove_all: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 1681 pv->pv_va, tpte); 1682 } 1683#endif 1684 if (pmap_track_modified(pv->pv_va)) 1685 vm_page_dirty(m); 1686 } 1687 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1688 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1689 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1690 m->md.pv_list_count--; 1691 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, ptepde); 1692 PMAP_UNLOCK(pv->pv_pmap); 1693 free_pv_entry(pv); 1694 } 1695 vm_page_flag_clear(m, PG_WRITEABLE); 1696} 1697 1698/* 1699 * Set the physical protection on the 1700 * specified range of this map as requested. 1701 */ 1702void 1703pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1704{ 1705 vm_offset_t va_next; 1706 pml4_entry_t *pml4e; 1707 pdp_entry_t *pdpe; 1708 pd_entry_t ptpaddr, *pde; 1709 pt_entry_t *pte; 1710 int anychanged; 1711 1712 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1713 pmap_remove(pmap, sva, eva); 1714 return; 1715 } 1716 1717 if (prot & VM_PROT_WRITE) 1718 return; 1719 1720 anychanged = 0; 1721 1722 vm_page_lock_queues(); 1723 PMAP_LOCK(pmap); 1724 for (; sva < eva; sva = va_next) { 1725 1726 pml4e = pmap_pml4e(pmap, sva); 1727 if (pml4e == 0) { 1728 va_next = (sva + NBPML4) & ~PML4MASK; 1729 continue; 1730 } 1731 1732 pdpe = pmap_pdpe(pmap, sva); 1733 if (pdpe == 0) { 1734 va_next = (sva + NBPDP) & ~PDPMASK; 1735 continue; 1736 } 1737 1738 va_next = (sva + NBPDR) & ~PDRMASK; 1739 1740 pde = pmap_pde(pmap, sva); 1741 if (pde == NULL) 1742 continue; 1743 ptpaddr = *pde; 1744 1745 /* 1746 * Weed out invalid mappings. 1747 */ 1748 if (ptpaddr == 0) 1749 continue; 1750 1751 /* 1752 * Check for large page. 1753 */ 1754 if ((ptpaddr & PG_PS) != 0) { 1755 *pde &= ~(PG_M|PG_RW); 1756 anychanged = 1; 1757 continue; 1758 } 1759 1760 if (va_next > eva) 1761 va_next = eva; 1762 1763 for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, 1764 sva += PAGE_SIZE) { 1765 pt_entry_t obits, pbits; 1766 vm_page_t m; 1767 1768retry: 1769 obits = pbits = *pte; 1770 if (pbits & PG_MANAGED) { 1771 m = NULL; 1772 if (pbits & PG_A) { 1773 m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); 1774 vm_page_flag_set(m, PG_REFERENCED); 1775 pbits &= ~PG_A; 1776 } 1777 if ((pbits & PG_M) != 0 && 1778 pmap_track_modified(sva)) { 1779 if (m == NULL) 1780 m = PHYS_TO_VM_PAGE(pbits & 1781 PG_FRAME); 1782 vm_page_dirty(m); 1783 } 1784 } 1785 1786 pbits &= ~(PG_RW | PG_M); 1787 1788 if (pbits != obits) { 1789 if (!atomic_cmpset_long(pte, obits, pbits)) 1790 goto retry; 1791 if (obits & PG_G) 1792 pmap_invalidate_page(pmap, sva); 1793 else 1794 anychanged = 1; 1795 } 1796 } 1797 } 1798 vm_page_unlock_queues(); 1799 if (anychanged) 1800 pmap_invalidate_all(pmap); 1801 PMAP_UNLOCK(pmap); 1802} 1803 1804/* 1805 * Insert the given physical page (p) at 1806 * the specified virtual address (v) in the 1807 * target physical map with the protection requested. 1808 * 1809 * If specified, the page will be wired down, meaning 1810 * that the related pte can not be reclaimed. 1811 * 1812 * NB: This is the only routine which MAY NOT lazy-evaluate 1813 * or lose information. That is, this routine must actually 1814 * insert this page into the given map NOW. 1815 */ 1816void 1817pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1818 boolean_t wired) 1819{ 1820 vm_paddr_t pa; 1821 register pt_entry_t *pte; 1822 vm_paddr_t opa; 1823 pt_entry_t origpte, newpte; 1824 vm_page_t mpte, om; 1825 1826 va = trunc_page(va); 1827#ifdef PMAP_DIAGNOSTIC 1828 if (va > VM_MAX_KERNEL_ADDRESS) 1829 panic("pmap_enter: toobig"); 1830 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 1831 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", va); 1832#endif 1833 1834 mpte = NULL; 1835 1836 vm_page_lock_queues(); 1837 PMAP_LOCK(pmap); 1838 1839 /* 1840 * In the case that a page table page is not 1841 * resident, we are creating it here. 1842 */ 1843 if (va < VM_MAXUSER_ADDRESS) { 1844 mpte = pmap_allocpte(pmap, va, M_WAITOK); 1845 } 1846#if 0 && defined(PMAP_DIAGNOSTIC) 1847 else { 1848 pd_entry_t *pdeaddr = pmap_pde(pmap, va); 1849 origpte = *pdeaddr; 1850 if ((origpte & PG_V) == 0) { 1851 panic("pmap_enter: invalid kernel page table page, pde=%p, va=%p\n", 1852 origpte, va); 1853 } 1854 } 1855#endif 1856 1857 pte = pmap_pte(pmap, va); 1858 1859 /* 1860 * Page Directory table entry not valid, we need a new PT page 1861 */ 1862 if (pte == NULL) 1863 panic("pmap_enter: invalid page directory va=%#lx\n", va); 1864 1865 pa = VM_PAGE_TO_PHYS(m); 1866 om = NULL; 1867 origpte = *pte; 1868 opa = origpte & PG_FRAME; 1869 1870 if (origpte & PG_PS) 1871 panic("pmap_enter: attempted pmap_enter on 2MB page"); 1872 1873 /* 1874 * Mapping has not changed, must be protection or wiring change. 1875 */ 1876 if (origpte && (opa == pa)) { 1877 /* 1878 * Wiring change, just update stats. We don't worry about 1879 * wiring PT pages as they remain resident as long as there 1880 * are valid mappings in them. Hence, if a user page is wired, 1881 * the PT page will be also. 1882 */ 1883 if (wired && ((origpte & PG_W) == 0)) 1884 pmap->pm_stats.wired_count++; 1885 else if (!wired && (origpte & PG_W)) 1886 pmap->pm_stats.wired_count--; 1887 1888#if defined(PMAP_DIAGNOSTIC) 1889 if (pmap_nw_modified((pt_entry_t) origpte)) { 1890 printf( 1891 "pmap_enter: modified page not writable: va: 0x%lx, pte: 0x%lx\n", 1892 va, origpte); 1893 } 1894#endif 1895 1896 /* 1897 * Remove extra pte reference 1898 */ 1899 if (mpte) 1900 mpte->wire_count--; 1901 1902 /* 1903 * We might be turning off write access to the page, 1904 * so we go ahead and sense modify status. 1905 */ 1906 if (origpte & PG_MANAGED) { 1907 om = m; 1908 pa |= PG_MANAGED; 1909 } 1910 goto validate; 1911 } 1912 /* 1913 * Mapping has changed, invalidate old range and fall through to 1914 * handle validating new mapping. 1915 */ 1916 if (opa) { 1917 if (origpte & PG_W) 1918 pmap->pm_stats.wired_count--; 1919 if (origpte & PG_MANAGED) { 1920 om = PHYS_TO_VM_PAGE(opa); 1921 pmap_remove_entry(pmap, om, va); 1922 } 1923 if (mpte != NULL) { 1924 mpte->wire_count--; 1925 KASSERT(mpte->wire_count > 0, 1926 ("pmap_enter: missing reference to page table page," 1927 " va: 0x%lx", va)); 1928 } 1929 } else 1930 pmap->pm_stats.resident_count++; 1931 1932 /* 1933 * Enter on the PV list if part of our managed memory. 1934 */ 1935 if ((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0) { 1936 pmap_insert_entry(pmap, va, m); 1937 pa |= PG_MANAGED; 1938 } 1939 1940 /* 1941 * Increment counters 1942 */ 1943 if (wired) 1944 pmap->pm_stats.wired_count++; 1945 1946validate: 1947 /* 1948 * Now validate mapping with desired protection/wiring. 1949 */ 1950 newpte = (pt_entry_t)(pa | PG_V); 1951 if ((prot & VM_PROT_WRITE) != 0) 1952 newpte |= PG_RW; 1953 if ((prot & VM_PROT_EXECUTE) == 0) 1954 newpte |= pg_nx; 1955 if (wired) 1956 newpte |= PG_W; 1957 if (va < VM_MAXUSER_ADDRESS) 1958 newpte |= PG_U; 1959 if (pmap == kernel_pmap) 1960 newpte |= PG_G; 1961 1962 /* 1963 * if the mapping or permission bits are different, we need 1964 * to update the pte. 1965 */ 1966 if ((origpte & ~(PG_M|PG_A)) != newpte) { 1967 if (origpte & PG_MANAGED) { 1968 origpte = pte_load_store(pte, newpte | PG_A); 1969 if ((origpte & PG_M) && pmap_track_modified(va)) 1970 vm_page_dirty(om); 1971 if (origpte & PG_A) 1972 vm_page_flag_set(om, PG_REFERENCED); 1973 } else 1974 pte_store(pte, newpte | PG_A); 1975 if (origpte) { 1976 pmap_invalidate_page(pmap, va); 1977 } 1978 } 1979 vm_page_unlock_queues(); 1980 PMAP_UNLOCK(pmap); 1981} 1982 1983/* 1984 * this code makes some *MAJOR* assumptions: 1985 * 1. Current pmap & pmap exists. 1986 * 2. Not wired. 1987 * 3. Read access. 1988 * 4. No page table pages. 1989 * 6. Page IS managed. 1990 * but is *MUCH* faster than pmap_enter... 1991 */ 1992 1993vm_page_t 1994pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) 1995{ 1996 pt_entry_t *pte; 1997 vm_paddr_t pa; 1998 1999 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2000 VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); 2001 PMAP_LOCK(pmap); 2002 2003 /* 2004 * In the case that a page table page is not 2005 * resident, we are creating it here. 2006 */ 2007 if (va < VM_MAXUSER_ADDRESS) { 2008 vm_pindex_t ptepindex; 2009 pd_entry_t *ptepa; 2010 2011 /* 2012 * Calculate pagetable page index 2013 */ 2014 ptepindex = pmap_pde_pindex(va); 2015 if (mpte && (mpte->pindex == ptepindex)) { 2016 mpte->wire_count++; 2017 } else { 2018 retry: 2019 /* 2020 * Get the page directory entry 2021 */ 2022 ptepa = pmap_pde(pmap, va); 2023 2024 /* 2025 * If the page table page is mapped, we just increment 2026 * the hold count, and activate it. 2027 */ 2028 if (ptepa && (*ptepa & PG_V) != 0) { 2029 if (*ptepa & PG_PS) 2030 panic("pmap_enter_quick: unexpected mapping into 2MB page"); 2031 mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME); 2032 mpte->wire_count++; 2033 } else { 2034 mpte = _pmap_allocpte(pmap, ptepindex, 2035 M_NOWAIT); 2036 if (mpte == NULL) { 2037 PMAP_UNLOCK(pmap); 2038 vm_page_busy(m); 2039 vm_page_unlock_queues(); 2040 VM_OBJECT_UNLOCK(m->object); 2041 VM_WAIT; 2042 VM_OBJECT_LOCK(m->object); 2043 vm_page_lock_queues(); 2044 vm_page_wakeup(m); 2045 PMAP_LOCK(pmap); 2046 goto retry; 2047 } 2048 } 2049 } 2050 } else { 2051 mpte = NULL; 2052 } 2053 2054 /* 2055 * This call to vtopte makes the assumption that we are 2056 * entering the page into the current pmap. In order to support 2057 * quick entry into any pmap, one would likely use pmap_pte. 2058 * But that isn't as quick as vtopte. 2059 */ 2060 pte = vtopte(va); 2061 if (*pte) { 2062 if (mpte != NULL) { 2063 pmap_unwire_pte_hold(pmap, va, mpte); 2064 mpte = NULL; 2065 } 2066 goto out; 2067 } 2068 2069 /* 2070 * Enter on the PV list if part of our managed memory. Note that we 2071 * raise IPL while manipulating pv_table since pmap_enter can be 2072 * called at interrupt time. 2073 */ 2074 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) 2075 pmap_insert_entry(pmap, va, m); 2076 2077 /* 2078 * Increment counters 2079 */ 2080 pmap->pm_stats.resident_count++; 2081 2082 pa = VM_PAGE_TO_PHYS(m); 2083 2084 /* 2085 * Now validate mapping with RO protection 2086 */ 2087 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2088 pte_store(pte, pa | PG_V | PG_U); 2089 else 2090 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 2091out: 2092 PMAP_UNLOCK(pmap); 2093 return mpte; 2094} 2095 2096/* 2097 * Make a temporary mapping for a physical address. This is only intended 2098 * to be used for panic dumps. 2099 */ 2100void * 2101pmap_kenter_temporary(vm_paddr_t pa, int i) 2102{ 2103 vm_offset_t va; 2104 2105 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 2106 pmap_kenter(va, pa); 2107 invlpg(va); 2108 return ((void *)crashdumpmap); 2109} 2110 2111/* 2112 * This code maps large physical mmap regions into the 2113 * processor address space. Note that some shortcuts 2114 * are taken, but the code works. 2115 */ 2116void 2117pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2118 vm_object_t object, vm_pindex_t pindex, 2119 vm_size_t size) 2120{ 2121 vm_page_t p; 2122 2123 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2124 KASSERT(object->type == OBJT_DEVICE, 2125 ("pmap_object_init_pt: non-device object")); 2126 if (((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { 2127 int i; 2128 vm_page_t m[1]; 2129 int npdes; 2130 pd_entry_t ptepa, *pde; 2131 2132 PMAP_LOCK(pmap); 2133 pde = pmap_pde(pmap, addr); 2134 if (pde != 0 && (*pde & PG_V) != 0) 2135 goto out; 2136 PMAP_UNLOCK(pmap); 2137retry: 2138 p = vm_page_lookup(object, pindex); 2139 if (p != NULL) { 2140 vm_page_lock_queues(); 2141 if (vm_page_sleep_if_busy(p, FALSE, "init4p")) 2142 goto retry; 2143 } else { 2144 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2145 if (p == NULL) 2146 return; 2147 m[0] = p; 2148 2149 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2150 vm_page_lock_queues(); 2151 vm_page_free(p); 2152 vm_page_unlock_queues(); 2153 return; 2154 } 2155 2156 p = vm_page_lookup(object, pindex); 2157 vm_page_lock_queues(); 2158 vm_page_wakeup(p); 2159 } 2160 vm_page_unlock_queues(); 2161 2162 ptepa = VM_PAGE_TO_PHYS(p); 2163 if (ptepa & (NBPDR - 1)) 2164 return; 2165 2166 p->valid = VM_PAGE_BITS_ALL; 2167 2168 PMAP_LOCK(pmap); 2169 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2170 npdes = size >> PDRSHIFT; 2171 for(i = 0; i < npdes; i++) { 2172 pde_store(pde, ptepa | PG_U | PG_RW | PG_V | PG_PS); 2173 ptepa += NBPDR; 2174 pde++; 2175 } 2176 pmap_invalidate_all(pmap); 2177out: 2178 PMAP_UNLOCK(pmap); 2179 } 2180} 2181 2182/* 2183 * Routine: pmap_change_wiring 2184 * Function: Change the wiring attribute for a map/virtual-address 2185 * pair. 2186 * In/out conditions: 2187 * The mapping must already exist in the pmap. 2188 */ 2189void 2190pmap_change_wiring(pmap, va, wired) 2191 register pmap_t pmap; 2192 vm_offset_t va; 2193 boolean_t wired; 2194{ 2195 register pt_entry_t *pte; 2196 2197 /* 2198 * Wiring is not a hardware characteristic so there is no need to 2199 * invalidate TLB. 2200 */ 2201 PMAP_LOCK(pmap); 2202 pte = pmap_pte(pmap, va); 2203 if (wired && (*pte & PG_W) == 0) { 2204 pmap->pm_stats.wired_count++; 2205 atomic_set_long(pte, PG_W); 2206 } else if (!wired && (*pte & PG_W) != 0) { 2207 pmap->pm_stats.wired_count--; 2208 atomic_clear_long(pte, PG_W); 2209 } 2210 PMAP_UNLOCK(pmap); 2211} 2212 2213 2214 2215/* 2216 * Copy the range specified by src_addr/len 2217 * from the source map to the range dst_addr/len 2218 * in the destination map. 2219 * 2220 * This routine is only advisory and need not do anything. 2221 */ 2222 2223void 2224pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2225 vm_offset_t src_addr) 2226{ 2227 vm_offset_t addr; 2228 vm_offset_t end_addr = src_addr + len; 2229 vm_offset_t va_next; 2230 vm_page_t m; 2231 2232 if (dst_addr != src_addr) 2233 return; 2234 2235 if (!pmap_is_current(src_pmap)) 2236 return; 2237 2238 vm_page_lock_queues(); 2239 if (dst_pmap < src_pmap) { 2240 PMAP_LOCK(dst_pmap); 2241 PMAP_LOCK(src_pmap); 2242 } else { 2243 PMAP_LOCK(src_pmap); 2244 PMAP_LOCK(dst_pmap); 2245 } 2246 for (addr = src_addr; addr < end_addr; addr = va_next) { 2247 pt_entry_t *src_pte, *dst_pte; 2248 vm_page_t dstmpte, srcmpte; 2249 pml4_entry_t *pml4e; 2250 pdp_entry_t *pdpe; 2251 pd_entry_t srcptepaddr, *pde; 2252 2253 if (addr >= UPT_MIN_ADDRESS) 2254 panic("pmap_copy: invalid to pmap_copy page tables"); 2255 2256 /* 2257 * Don't let optional prefaulting of pages make us go 2258 * way below the low water mark of free pages or way 2259 * above high water mark of used pv entries. 2260 */ 2261 if (cnt.v_free_count < cnt.v_free_reserved || 2262 pv_entry_count > pv_entry_high_water) 2263 break; 2264 2265 pml4e = pmap_pml4e(src_pmap, addr); 2266 if (pml4e == 0) { 2267 va_next = (addr + NBPML4) & ~PML4MASK; 2268 continue; 2269 } 2270 2271 pdpe = pmap_pdpe(src_pmap, addr); 2272 if (pdpe == 0) { 2273 va_next = (addr + NBPDP) & ~PDPMASK; 2274 continue; 2275 } 2276 2277 va_next = (addr + NBPDR) & ~PDRMASK; 2278 2279 pde = pmap_pde(src_pmap, addr); 2280 if (pde) 2281 srcptepaddr = *pde; 2282 else 2283 continue; 2284 if (srcptepaddr == 0) 2285 continue; 2286 2287 if (srcptepaddr & PG_PS) { 2288 pde = pmap_pde(dst_pmap, addr); 2289 if (pde == 0) { 2290 /* 2291 * XXX should do an allocpte here to 2292 * instantiate the pde 2293 */ 2294 continue; 2295 } 2296 if (*pde == 0) { 2297 *pde = srcptepaddr; 2298 dst_pmap->pm_stats.resident_count += 2299 NBPDR / PAGE_SIZE; 2300 } 2301 continue; 2302 } 2303 2304 srcmpte = PHYS_TO_VM_PAGE(srcptepaddr & PG_FRAME); 2305 if (srcmpte->wire_count == 0) 2306 panic("pmap_copy: source page table page is unused"); 2307 2308 if (va_next > end_addr) 2309 va_next = end_addr; 2310 2311 src_pte = vtopte(addr); 2312 while (addr < va_next) { 2313 pt_entry_t ptetemp; 2314 ptetemp = *src_pte; 2315 /* 2316 * we only virtual copy managed pages 2317 */ 2318 if ((ptetemp & PG_MANAGED) != 0) { 2319 /* 2320 * We have to check after allocpte for the 2321 * pte still being around... allocpte can 2322 * block. 2323 */ 2324 dstmpte = pmap_allocpte(dst_pmap, addr, 2325 M_NOWAIT); 2326 if (dstmpte == NULL) 2327 break; 2328 dst_pte = (pt_entry_t *) 2329 PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte)); 2330 dst_pte = &dst_pte[pmap_pte_index(addr)]; 2331 if (*dst_pte == 0) { 2332 /* 2333 * Clear the modified and 2334 * accessed (referenced) bits 2335 * during the copy. 2336 */ 2337 m = PHYS_TO_VM_PAGE(ptetemp & PG_FRAME); 2338 *dst_pte = ptetemp & ~(PG_M | PG_A); 2339 dst_pmap->pm_stats.resident_count++; 2340 pmap_insert_entry(dst_pmap, addr, m); 2341 } else 2342 pmap_unwire_pte_hold(dst_pmap, addr, dstmpte); 2343 if (dstmpte->wire_count >= srcmpte->wire_count) 2344 break; 2345 } 2346 addr += PAGE_SIZE; 2347 src_pte++; 2348 } 2349 } 2350 vm_page_unlock_queues(); 2351 PMAP_UNLOCK(src_pmap); 2352 PMAP_UNLOCK(dst_pmap); 2353} 2354 2355/* 2356 * pmap_zero_page zeros the specified hardware page by mapping 2357 * the page into KVM and using bzero to clear its contents. 2358 */ 2359void 2360pmap_zero_page(vm_page_t m) 2361{ 2362 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2363 2364 pagezero((void *)va); 2365} 2366 2367/* 2368 * pmap_zero_page_area zeros the specified hardware page by mapping 2369 * the page into KVM and using bzero to clear its contents. 2370 * 2371 * off and size may not cover an area beyond a single hardware page. 2372 */ 2373void 2374pmap_zero_page_area(vm_page_t m, int off, int size) 2375{ 2376 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2377 2378 if (off == 0 && size == PAGE_SIZE) 2379 pagezero((void *)va); 2380 else 2381 bzero((char *)va + off, size); 2382} 2383 2384/* 2385 * pmap_zero_page_idle zeros the specified hardware page by mapping 2386 * the page into KVM and using bzero to clear its contents. This 2387 * is intended to be called from the vm_pagezero process only and 2388 * outside of Giant. 2389 */ 2390void 2391pmap_zero_page_idle(vm_page_t m) 2392{ 2393 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2394 2395 pagezero((void *)va); 2396} 2397 2398/* 2399 * pmap_copy_page copies the specified (machine independent) 2400 * page by mapping the page into virtual memory and using 2401 * bcopy to copy the page, one machine dependent page at a 2402 * time. 2403 */ 2404void 2405pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2406{ 2407 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2408 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2409 2410 pagecopy((void *)src, (void *)dst); 2411} 2412 2413/* 2414 * Returns true if the pmap's pv is one of the first 2415 * 16 pvs linked to from this page. This count may 2416 * be changed upwards or downwards in the future; it 2417 * is only necessary that true be returned for a small 2418 * subset of pmaps for proper page aging. 2419 */ 2420boolean_t 2421pmap_page_exists_quick(pmap, m) 2422 pmap_t pmap; 2423 vm_page_t m; 2424{ 2425 pv_entry_t pv; 2426 int loops = 0; 2427 2428 if (m->flags & PG_FICTITIOUS) 2429 return FALSE; 2430 2431 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2432 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2433 if (pv->pv_pmap == pmap) { 2434 return TRUE; 2435 } 2436 loops++; 2437 if (loops >= 16) 2438 break; 2439 } 2440 return (FALSE); 2441} 2442 2443#define PMAP_REMOVE_PAGES_CURPROC_ONLY 2444/* 2445 * Remove all pages from specified address space 2446 * this aids process exit speeds. Also, this code 2447 * is special cased for current process only, but 2448 * can have the more generic (and slightly slower) 2449 * mode enabled. This is much faster than pmap_remove 2450 * in the case of running down an entire address space. 2451 */ 2452void 2453pmap_remove_pages(pmap, sva, eva) 2454 pmap_t pmap; 2455 vm_offset_t sva, eva; 2456{ 2457 pt_entry_t *pte, tpte; 2458 vm_page_t m; 2459 pv_entry_t pv, npv; 2460 2461#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2462 if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) { 2463 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2464 return; 2465 } 2466#endif 2467 vm_page_lock_queues(); 2468 PMAP_LOCK(pmap); 2469 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2470 2471 if (pv->pv_va >= eva || pv->pv_va < sva) { 2472 npv = TAILQ_NEXT(pv, pv_plist); 2473 continue; 2474 } 2475 2476#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2477 pte = vtopte(pv->pv_va); 2478#else 2479 pte = pmap_pte(pmap, pv->pv_va); 2480#endif 2481 tpte = *pte; 2482 2483 if (tpte == 0) { 2484 printf("TPTE at %p IS ZERO @ VA %08lx\n", 2485 pte, pv->pv_va); 2486 panic("bad pte"); 2487 } 2488 2489/* 2490 * We cannot remove wired pages from a process' mapping at this time 2491 */ 2492 if (tpte & PG_W) { 2493 npv = TAILQ_NEXT(pv, pv_plist); 2494 continue; 2495 } 2496 2497 m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); 2498 KASSERT(m->phys_addr == (tpte & PG_FRAME), 2499 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2500 m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); 2501 2502 KASSERT(m < &vm_page_array[vm_page_array_size], 2503 ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); 2504 2505 pmap->pm_stats.resident_count--; 2506 2507 pte_clear(pte); 2508 2509 /* 2510 * Update the vm_page_t clean and reference bits. 2511 */ 2512 if (tpte & PG_M) { 2513 vm_page_dirty(m); 2514 } 2515 2516 npv = TAILQ_NEXT(pv, pv_plist); 2517 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 2518 2519 m->md.pv_list_count--; 2520 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2521 if (TAILQ_EMPTY(&m->md.pv_list)) 2522 vm_page_flag_clear(m, PG_WRITEABLE); 2523 2524 pmap_unuse_pt(pmap, pv->pv_va, *vtopde(pv->pv_va)); 2525 free_pv_entry(pv); 2526 } 2527 pmap_invalidate_all(pmap); 2528 PMAP_UNLOCK(pmap); 2529 vm_page_unlock_queues(); 2530} 2531 2532/* 2533 * pmap_is_modified: 2534 * 2535 * Return whether or not the specified physical page was modified 2536 * in any physical maps. 2537 */ 2538boolean_t 2539pmap_is_modified(vm_page_t m) 2540{ 2541 pv_entry_t pv; 2542 pt_entry_t *pte; 2543 boolean_t rv; 2544 2545 rv = FALSE; 2546 if (m->flags & PG_FICTITIOUS) 2547 return (rv); 2548 2549 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2550 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2551 /* 2552 * if the bit being tested is the modified bit, then 2553 * mark clean_map and ptes as never 2554 * modified. 2555 */ 2556 if (!pmap_track_modified(pv->pv_va)) 2557 continue; 2558 PMAP_LOCK(pv->pv_pmap); 2559 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2560 rv = (*pte & PG_M) != 0; 2561 PMAP_UNLOCK(pv->pv_pmap); 2562 if (rv) 2563 break; 2564 } 2565 return (rv); 2566} 2567 2568/* 2569 * pmap_is_prefaultable: 2570 * 2571 * Return whether or not the specified virtual address is elgible 2572 * for prefault. 2573 */ 2574boolean_t 2575pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2576{ 2577 pd_entry_t *pde; 2578 pt_entry_t *pte; 2579 boolean_t rv; 2580 2581 rv = FALSE; 2582 PMAP_LOCK(pmap); 2583 pde = pmap_pde(pmap, addr); 2584 if (pde != NULL && (*pde & PG_V)) { 2585 pte = vtopte(addr); 2586 rv = (*pte & PG_V) == 0; 2587 } 2588 PMAP_UNLOCK(pmap); 2589 return (rv); 2590} 2591 2592/* 2593 * Clear the given bit in each of the given page's ptes. 2594 */ 2595static __inline void 2596pmap_clear_ptes(vm_page_t m, long bit) 2597{ 2598 register pv_entry_t pv; 2599 pt_entry_t pbits, *pte; 2600 2601 if ((m->flags & PG_FICTITIOUS) || 2602 (bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) 2603 return; 2604 2605 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2606 /* 2607 * Loop over all current mappings setting/clearing as appropos If 2608 * setting RO do we need to clear the VAC? 2609 */ 2610 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2611 /* 2612 * don't write protect pager mappings 2613 */ 2614 if (bit == PG_RW) { 2615 if (!pmap_track_modified(pv->pv_va)) 2616 continue; 2617 } 2618 2619 PMAP_LOCK(pv->pv_pmap); 2620 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2621retry: 2622 pbits = *pte; 2623 if (pbits & bit) { 2624 if (bit == PG_RW) { 2625 if (!atomic_cmpset_long(pte, pbits, 2626 pbits & ~(PG_RW | PG_M))) 2627 goto retry; 2628 if (pbits & PG_M) { 2629 vm_page_dirty(m); 2630 } 2631 } else { 2632 atomic_clear_long(pte, bit); 2633 } 2634 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2635 } 2636 PMAP_UNLOCK(pv->pv_pmap); 2637 } 2638 if (bit == PG_RW) 2639 vm_page_flag_clear(m, PG_WRITEABLE); 2640} 2641 2642/* 2643 * pmap_page_protect: 2644 * 2645 * Lower the permission for all mappings to a given page. 2646 */ 2647void 2648pmap_page_protect(vm_page_t m, vm_prot_t prot) 2649{ 2650 if ((prot & VM_PROT_WRITE) == 0) { 2651 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 2652 pmap_clear_ptes(m, PG_RW); 2653 } else { 2654 pmap_remove_all(m); 2655 } 2656 } 2657} 2658 2659/* 2660 * pmap_ts_referenced: 2661 * 2662 * Return a count of reference bits for a page, clearing those bits. 2663 * It is not necessary for every reference bit to be cleared, but it 2664 * is necessary that 0 only be returned when there are truly no 2665 * reference bits set. 2666 * 2667 * XXX: The exact number of bits to check and clear is a matter that 2668 * should be tested and standardized at some point in the future for 2669 * optimal aging of shared pages. 2670 */ 2671int 2672pmap_ts_referenced(vm_page_t m) 2673{ 2674 register pv_entry_t pv, pvf, pvn; 2675 pt_entry_t *pte; 2676 pt_entry_t v; 2677 int rtval = 0; 2678 2679 if (m->flags & PG_FICTITIOUS) 2680 return (rtval); 2681 2682 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2683 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2684 2685 pvf = pv; 2686 2687 do { 2688 pvn = TAILQ_NEXT(pv, pv_list); 2689 2690 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2691 2692 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2693 2694 if (!pmap_track_modified(pv->pv_va)) 2695 continue; 2696 2697 PMAP_LOCK(pv->pv_pmap); 2698 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2699 2700 if (pte && ((v = pte_load(pte)) & PG_A) != 0) { 2701 atomic_clear_long(pte, PG_A); 2702 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2703 2704 rtval++; 2705 if (rtval > 4) { 2706 PMAP_UNLOCK(pv->pv_pmap); 2707 break; 2708 } 2709 } 2710 PMAP_UNLOCK(pv->pv_pmap); 2711 } while ((pv = pvn) != NULL && pv != pvf); 2712 } 2713 2714 return (rtval); 2715} 2716 2717/* 2718 * Clear the modify bits on the specified physical page. 2719 */ 2720void 2721pmap_clear_modify(vm_page_t m) 2722{ 2723 pmap_clear_ptes(m, PG_M); 2724} 2725 2726/* 2727 * pmap_clear_reference: 2728 * 2729 * Clear the reference bit on the specified physical page. 2730 */ 2731void 2732pmap_clear_reference(vm_page_t m) 2733{ 2734 pmap_clear_ptes(m, PG_A); 2735} 2736 2737/* 2738 * Miscellaneous support routines follow 2739 */ 2740 2741/* 2742 * Map a set of physical memory pages into the kernel virtual 2743 * address space. Return a pointer to where it is mapped. This 2744 * routine is intended to be used for mapping device memory, 2745 * NOT real memory. 2746 */ 2747void * 2748pmap_mapdev(pa, size) 2749 vm_paddr_t pa; 2750 vm_size_t size; 2751{ 2752 vm_offset_t va, tmpva, offset; 2753 2754 /* If this fits within the direct map window, use it */ 2755 if (pa < dmaplimit && (pa + size) < dmaplimit) 2756 return ((void *)PHYS_TO_DMAP(pa)); 2757 offset = pa & PAGE_MASK; 2758 size = roundup(offset + size, PAGE_SIZE); 2759 va = kmem_alloc_nofault(kernel_map, size); 2760 if (!va) 2761 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2762 pa = trunc_page(pa); 2763 for (tmpva = va; size > 0; ) { 2764 pmap_kenter(tmpva, pa); 2765 size -= PAGE_SIZE; 2766 tmpva += PAGE_SIZE; 2767 pa += PAGE_SIZE; 2768 } 2769 pmap_invalidate_range(kernel_pmap, va, tmpva); 2770 return ((void *)(va + offset)); 2771} 2772 2773void 2774pmap_unmapdev(va, size) 2775 vm_offset_t va; 2776 vm_size_t size; 2777{ 2778 vm_offset_t base, offset, tmpva; 2779 2780 /* If we gave a direct map region in pmap_mapdev, do nothing */ 2781 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) 2782 return; 2783 base = trunc_page(va); 2784 offset = va & PAGE_MASK; 2785 size = roundup(offset + size, PAGE_SIZE); 2786 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) 2787 pmap_kremove(tmpva); 2788 pmap_invalidate_range(kernel_pmap, va, tmpva); 2789 kmem_free(kernel_map, base, size); 2790} 2791 2792/* 2793 * perform the pmap work for mincore 2794 */ 2795int 2796pmap_mincore(pmap, addr) 2797 pmap_t pmap; 2798 vm_offset_t addr; 2799{ 2800 pt_entry_t *ptep, pte; 2801 vm_page_t m; 2802 int val = 0; 2803 2804 PMAP_LOCK(pmap); 2805 ptep = pmap_pte(pmap, addr); 2806 pte = (ptep != NULL) ? *ptep : 0; 2807 PMAP_UNLOCK(pmap); 2808 2809 if (pte != 0) { 2810 vm_paddr_t pa; 2811 2812 val = MINCORE_INCORE; 2813 if ((pte & PG_MANAGED) == 0) 2814 return val; 2815 2816 pa = pte & PG_FRAME; 2817 2818 m = PHYS_TO_VM_PAGE(pa); 2819 2820 /* 2821 * Modified by us 2822 */ 2823 if (pte & PG_M) 2824 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 2825 else { 2826 /* 2827 * Modified by someone else 2828 */ 2829 vm_page_lock_queues(); 2830 if (m->dirty || pmap_is_modified(m)) 2831 val |= MINCORE_MODIFIED_OTHER; 2832 vm_page_unlock_queues(); 2833 } 2834 /* 2835 * Referenced by us 2836 */ 2837 if (pte & PG_A) 2838 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 2839 else { 2840 /* 2841 * Referenced by someone else 2842 */ 2843 vm_page_lock_queues(); 2844 if ((m->flags & PG_REFERENCED) || 2845 pmap_ts_referenced(m)) { 2846 val |= MINCORE_REFERENCED_OTHER; 2847 vm_page_flag_set(m, PG_REFERENCED); 2848 } 2849 vm_page_unlock_queues(); 2850 } 2851 } 2852 return val; 2853} 2854 2855void 2856pmap_activate(struct thread *td) 2857{ 2858 struct proc *p = td->td_proc; 2859 pmap_t pmap, oldpmap; 2860 u_int64_t cr3; 2861 2862 critical_enter(); 2863 pmap = vmspace_pmap(td->td_proc->p_vmspace); 2864 oldpmap = PCPU_GET(curpmap); 2865#ifdef SMP 2866if (oldpmap) /* XXX FIXME */ 2867 atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); 2868 atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); 2869#else 2870if (oldpmap) /* XXX FIXME */ 2871 oldpmap->pm_active &= ~PCPU_GET(cpumask); 2872 pmap->pm_active |= PCPU_GET(cpumask); 2873#endif 2874 cr3 = vtophys(pmap->pm_pml4); 2875 /* XXXKSE this is wrong. 2876 * pmap_activate is for the current thread on the current cpu 2877 */ 2878 if (p->p_flag & P_SA) { 2879 /* Make sure all other cr3 entries are updated. */ 2880 /* what if they are running? XXXKSE (maybe abort them) */ 2881 FOREACH_THREAD_IN_PROC(p, td) { 2882 td->td_pcb->pcb_cr3 = cr3; 2883 } 2884 } else { 2885 td->td_pcb->pcb_cr3 = cr3; 2886 } 2887 load_cr3(cr3); 2888 critical_exit(); 2889} 2890 2891vm_offset_t 2892pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 2893{ 2894 2895 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 2896 return addr; 2897 } 2898 2899 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 2900 return addr; 2901} 2902