pmap.c revision 118641
1/* 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * the Systems Programming Group of the University of Utah Computer 11 * Science Department and William Jolitz of UUNET Technologies Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)pmap.c 7.7 (Berkeley) 5/12/91 42 */ 43/*- 44 * Copyright (c) 2003 Networks Associates Technology, Inc. 45 * All rights reserved. 46 * 47 * This software was developed for the FreeBSD Project by Jake Burkholder, 48 * Safeport Network Services, and Network Associates Laboratories, the 49 * Security Research Division of Network Associates, Inc. under 50 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 51 * CHATS research program. 52 * 53 * Redistribution and use in source and binary forms, with or without 54 * modification, are permitted provided that the following conditions 55 * are met: 56 * 1. Redistributions of source code must retain the above copyright 57 * notice, this list of conditions and the following disclaimer. 58 * 2. Redistributions in binary form must reproduce the above copyright 59 * notice, this list of conditions and the following disclaimer in the 60 * documentation and/or other materials provided with the distribution. 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 72 * SUCH DAMAGE. 73 */ 74 75#include <sys/cdefs.h> 76__FBSDID("$FreeBSD: head/sys/amd64/amd64/pmap.c 118641 2003-08-08 01:52:03Z alc $"); 77 78/* 79 * Manages physical address maps. 80 * 81 * In addition to hardware address maps, this 82 * module is called upon to provide software-use-only 83 * maps which may or may not be stored in the same 84 * form as hardware maps. These pseudo-maps are 85 * used to store intermediate results from copy 86 * operations to and from address spaces. 87 * 88 * Since the information managed by this module is 89 * also stored by the logical address mapping module, 90 * this module may throw away valid virtual-to-physical 91 * mappings at almost any time. However, invalidations 92 * of virtual-to-physical mappings must be done as 93 * requested. 94 * 95 * In order to cope with hardware architectures which 96 * make virtual-to-physical map invalidates expensive, 97 * this module may delay invalidate or reduced protection 98 * operations until such time as they are actually 99 * necessary. This module is given full information as 100 * to which processors are currently using which maps, 101 * and to when physical maps must be made correct. 102 */ 103 104#include "opt_msgbuf.h" 105#include "opt_kstack_pages.h" 106 107#include <sys/param.h> 108#include <sys/systm.h> 109#include <sys/kernel.h> 110#include <sys/lock.h> 111#include <sys/mman.h> 112#include <sys/msgbuf.h> 113#include <sys/mutex.h> 114#include <sys/proc.h> 115#include <sys/sx.h> 116#include <sys/user.h> 117#include <sys/vmmeter.h> 118#include <sys/sysctl.h> 119 120#include <vm/vm.h> 121#include <vm/vm_param.h> 122#include <vm/vm_kern.h> 123#include <vm/vm_page.h> 124#include <vm/vm_map.h> 125#include <vm/vm_object.h> 126#include <vm/vm_extern.h> 127#include <vm/vm_pageout.h> 128#include <vm/vm_pager.h> 129#include <vm/uma.h> 130#include <vm/uma_int.h> 131 132#include <machine/cpu.h> 133#include <machine/cputypes.h> 134#include <machine/md_var.h> 135#include <machine/specialreg.h> 136 137#define PMAP_KEEP_PDIRS 138#ifndef PMAP_SHPGPERPROC 139#define PMAP_SHPGPERPROC 200 140#endif 141 142#if defined(DIAGNOSTIC) 143#define PMAP_DIAGNOSTIC 144#endif 145 146#define MINPV 2048 147 148#if !defined(PMAP_DIAGNOSTIC) 149#define PMAP_INLINE __inline 150#else 151#define PMAP_INLINE 152#endif 153 154/* 155 * Given a map and a machine independent protection code, 156 * convert to a vax protection code. 157 */ 158#define pte_prot(m, p) (protection_codes[p]) 159static pt_entry_t protection_codes[8]; 160 161struct pmap kernel_pmap_store; 162LIST_HEAD(pmaplist, pmap); 163static struct pmaplist allpmaps; 164static struct mtx allpmaps_lock; 165 166vm_paddr_t avail_start; /* PA of first available physical page */ 167vm_paddr_t avail_end; /* PA of last available physical page */ 168vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 169vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 170static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */ 171 172static int nkpt; 173static int ndmpdp; 174static vm_paddr_t dmaplimit; 175vm_offset_t kernel_vm_end; 176 177static u_int64_t KPTphys; /* phys addr of kernel level 1 */ 178static u_int64_t KPDphys; /* phys addr of kernel level 2 */ 179static u_int64_t KPDPphys; /* phys addr of kernel level 3 */ 180u_int64_t KPML4phys; /* phys addr of kernel level 4 */ 181 182static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ 183static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ 184 185/* 186 * Data for the pv entry allocation mechanism 187 */ 188static uma_zone_t pvzone; 189static struct vm_object pvzone_obj; 190static int pv_entry_count = 0, pv_entry_max = 0, pv_entry_high_water = 0; 191int pmap_pagedaemon_waken; 192 193/* 194 * All those kernel PT submaps that BSD is so fond of 195 */ 196pt_entry_t *CMAP1 = 0; 197static pt_entry_t *ptmmap; 198caddr_t CADDR1 = 0, ptvmmap = 0; 199static pt_entry_t *msgbufmap; 200struct msgbuf *msgbufp = 0; 201 202/* 203 * Crashdump maps. 204 */ 205static pt_entry_t *pt_crashdumpmap; 206static caddr_t crashdumpmap; 207 208static PMAP_INLINE void free_pv_entry(pv_entry_t pv); 209static pv_entry_t get_pv_entry(void); 210static void amd64_protection_init(void); 211static void pmap_changebit(vm_page_t m, int bit, boolean_t setem) 212 __always_inline; 213 214static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva); 215static void pmap_remove_page(struct pmap *pmap, vm_offset_t va); 216static int pmap_remove_entry(struct pmap *pmap, vm_page_t m, 217 vm_offset_t va); 218static void pmap_insert_entry(pmap_t pmap, vm_offset_t va, 219 vm_page_t mpte, vm_page_t m); 220 221static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va); 222 223static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex); 224static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex); 225static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); 226static vm_offset_t pmap_kmem_choose(vm_offset_t addr); 227static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait); 228 229CTASSERT(1 << PDESHIFT == sizeof(pd_entry_t)); 230CTASSERT(1 << PTESHIFT == sizeof(pt_entry_t)); 231 232/* 233 * Move the kernel virtual free pointer to the next 234 * 2MB. This is used to help improve performance 235 * by using a large (2MB) page for much of the kernel 236 * (.text, .data, .bss) 237 */ 238static vm_offset_t 239pmap_kmem_choose(vm_offset_t addr) 240{ 241 vm_offset_t newaddr = addr; 242 243 newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 244 return newaddr; 245} 246 247/********************/ 248/* Inline functions */ 249/********************/ 250 251/* Return a non-clipped PD index for a given VA */ 252static __inline vm_pindex_t 253pmap_pde_pindex(vm_offset_t va) 254{ 255 return va >> PDRSHIFT; 256} 257 258 259/* Return various clipped indexes for a given VA */ 260static __inline vm_pindex_t 261pmap_pte_index(vm_offset_t va) 262{ 263 264 return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1)); 265} 266 267static __inline vm_pindex_t 268pmap_pde_index(vm_offset_t va) 269{ 270 271 return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1)); 272} 273 274static __inline vm_pindex_t 275pmap_pdpe_index(vm_offset_t va) 276{ 277 278 return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1)); 279} 280 281static __inline vm_pindex_t 282pmap_pml4e_index(vm_offset_t va) 283{ 284 285 return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1)); 286} 287 288/* Return a pointer to the PML4 slot that corresponds to a VA */ 289static __inline pml4_entry_t * 290pmap_pml4e(pmap_t pmap, vm_offset_t va) 291{ 292 293 if (!pmap) 294 return NULL; 295 return (&pmap->pm_pml4[pmap_pml4e_index(va)]); 296} 297 298/* Return a pointer to the PDP slot that corresponds to a VA */ 299static __inline pdp_entry_t * 300pmap_pdpe(pmap_t pmap, vm_offset_t va) 301{ 302 pml4_entry_t *pml4e; 303 pdp_entry_t *pdpe; 304 305 pml4e = pmap_pml4e(pmap, va); 306 if (pml4e == NULL || (*pml4e & PG_V) == 0) 307 return NULL; 308 pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME); 309 return (&pdpe[pmap_pdpe_index(va)]); 310} 311 312/* Return a pointer to the PD slot that corresponds to a VA */ 313static __inline pd_entry_t * 314pmap_pde(pmap_t pmap, vm_offset_t va) 315{ 316 pdp_entry_t *pdpe; 317 pd_entry_t *pde; 318 319 pdpe = pmap_pdpe(pmap, va); 320 if (pdpe == NULL || (*pdpe & PG_V) == 0) 321 return NULL; 322 pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME); 323 return (&pde[pmap_pde_index(va)]); 324} 325 326/* Return a pointer to the PT slot that corresponds to a VA */ 327static __inline pt_entry_t * 328pmap_pte(pmap_t pmap, vm_offset_t va) 329{ 330 pd_entry_t *pde; 331 pt_entry_t *pte; 332 333 pde = pmap_pde(pmap, va); 334 if (pde == NULL || (*pde & PG_V) == 0) 335 return NULL; 336 if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ 337 return ((pt_entry_t *)pde); 338 pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); 339 return (&pte[pmap_pte_index(va)]); 340} 341 342 343PMAP_INLINE pt_entry_t * 344vtopte(vm_offset_t va) 345{ 346 u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); 347 348 return (PTmap + (amd64_btop(va) & mask)); 349} 350 351static u_int64_t 352allocpages(int n) 353{ 354 u_int64_t ret; 355 356 ret = avail_start; 357 bzero((void *)ret, n * PAGE_SIZE); 358 avail_start += n * PAGE_SIZE; 359 return (ret); 360} 361 362static void 363create_pagetables(void) 364{ 365 int i; 366 367 /* Allocate pages */ 368 KPTphys = allocpages(NKPT); 369 KPML4phys = allocpages(1); 370 KPDPphys = allocpages(NKPML4E); 371 KPDphys = allocpages(NKPDPE); 372 373 ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT; 374 if (ndmpdp < 4) /* Minimum 4GB of dirmap */ 375 ndmpdp = 4; 376 DMPDPphys = allocpages(NDMPML4E); 377 DMPDphys = allocpages(ndmpdp); 378 dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; 379 380 /* Fill in the underlying page table pages */ 381 /* Read-only from zero to physfree */ 382 /* XXX not fully used, underneath 2M pages */ 383 for (i = 0; (i << PAGE_SHIFT) < avail_start; i++) { 384 ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT; 385 ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V; 386 } 387 388 /* Now map the page tables at their location within PTmap */ 389 for (i = 0; i < NKPT; i++) { 390 ((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT); 391 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V; 392 } 393 394 /* Map from zero to end of allocations under 2M pages */ 395 /* This replaces some of the KPTphys entries above */ 396 for (i = 0; (i << PDRSHIFT) < avail_start; i++) { 397 ((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT; 398 ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS; 399 } 400 401 /* And connect up the PD to the PDP */ 402 for (i = 0; i < NKPDPE; i++) { 403 ((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i << PAGE_SHIFT); 404 ((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U; 405 } 406 407 408 /* Now set up the direct map space using 2MB pages */ 409 for (i = 0; i < NPDEPG * ndmpdp; i++) { 410 ((pd_entry_t *)DMPDphys)[i] = (vm_paddr_t)i << PDRSHIFT; 411 ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS; 412 } 413 414 /* And the direct map space's PDP */ 415 for (i = 0; i < ndmpdp; i++) { 416 ((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i << PAGE_SHIFT); 417 ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U; 418 } 419 420 /* And recursively map PML4 to itself in order to get PTmap */ 421 ((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys; 422 ((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U; 423 424 /* Connect the Direct Map slot up to the PML4 */ 425 ((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys; 426 ((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U; 427 428 /* Connect the KVA slot up to the PML4 */ 429 ((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys; 430 ((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U; 431} 432 433/* 434 * Bootstrap the system enough to run with virtual memory. 435 * 436 * On amd64 this is called after mapping has already been enabled 437 * and just syncs the pmap module with what has already been done. 438 * [We can't call it easily with mapping off since the kernel is not 439 * mapped with PA == VA, hence we would have to relocate every address 440 * from the linked base (virtual) address "KERNBASE" to the actual 441 * (physical) address starting relative to 0] 442 */ 443void 444pmap_bootstrap(firstaddr) 445 vm_paddr_t *firstaddr; 446{ 447 vm_offset_t va; 448 pt_entry_t *pte; 449 450 avail_start = *firstaddr; 451 452 /* 453 * Create an initial set of page tables to run the kernel in. 454 */ 455 create_pagetables(); 456 *firstaddr = avail_start; 457 458 virtual_avail = (vm_offset_t) KERNBASE + avail_start; 459 virtual_avail = pmap_kmem_choose(virtual_avail); 460 461 virtual_end = VM_MAX_KERNEL_ADDRESS; 462 463 464 /* XXX do %cr0 as well */ 465 load_cr4(rcr4() | CR4_PGE | CR4_PSE); 466 load_cr3(KPML4phys); 467 468 /* 469 * Initialize protection array. 470 */ 471 amd64_protection_init(); 472 473 /* 474 * Initialize the kernel pmap (which is statically allocated). 475 */ 476 kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys); 477 kernel_pmap->pm_active = -1; /* don't allow deactivation */ 478 TAILQ_INIT(&kernel_pmap->pm_pvlist); 479 LIST_INIT(&allpmaps); 480 mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); 481 mtx_lock_spin(&allpmaps_lock); 482 LIST_INSERT_HEAD(&allpmaps, kernel_pmap, pm_list); 483 mtx_unlock_spin(&allpmaps_lock); 484 nkpt = NKPT; 485 486 /* 487 * Reserve some special page table entries/VA space for temporary 488 * mapping of pages. 489 */ 490#define SYSMAP(c, p, v, n) \ 491 v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); 492 493 va = virtual_avail; 494 pte = vtopte(va); 495 496 /* 497 * CMAP1 is only used for the memory test. 498 */ 499 SYSMAP(caddr_t, CMAP1, CADDR1, 1) 500 501 /* 502 * Crashdump maps. 503 */ 504 SYSMAP(caddr_t, pt_crashdumpmap, crashdumpmap, MAXDUMPPGS); 505 506 /* 507 * ptvmmap is used for reading arbitrary physical pages via /dev/mem. 508 * XXX ptmmap is not used. 509 */ 510 SYSMAP(caddr_t, ptmmap, ptvmmap, 1) 511 512 /* 513 * msgbufp is used to map the system message buffer. 514 * XXX msgbufmap is not used. 515 */ 516 SYSMAP(struct msgbuf *, msgbufmap, msgbufp, 517 atop(round_page(MSGBUF_SIZE))) 518 519 virtual_avail = va; 520 521 *CMAP1 = 0; 522 523 invltlb(); 524} 525 526static void * 527pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 528{ 529 *flags = UMA_SLAB_PRIV; 530 return (void *)kmem_alloc(kernel_map, bytes); 531} 532 533void * 534uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) 535{ 536 static vm_pindex_t colour; 537 vm_page_t m; 538 int pflags; 539 void *va; 540 541 *flags = UMA_SLAB_PRIV; 542 543 if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) 544 pflags = VM_ALLOC_INTERRUPT; 545 else 546 pflags = VM_ALLOC_SYSTEM; 547 548 if (wait & M_ZERO) 549 pflags |= VM_ALLOC_ZERO; 550 551 for (;;) { 552 m = vm_page_alloc(NULL, colour++, pflags | VM_ALLOC_NOOBJ); 553 if (m == NULL) { 554 if (wait & M_NOWAIT) 555 return (NULL); 556 else 557 VM_WAIT; 558 } else 559 break; 560 } 561 562 va = (void *)PHYS_TO_DMAP(m->phys_addr); 563 if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) 564 pagezero(va); 565 return (va); 566} 567 568void 569uma_small_free(void *mem, int size, u_int8_t flags) 570{ 571 vm_page_t m; 572 573 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)mem)); 574 vm_page_lock_queues(); 575 vm_page_free(m); 576 vm_page_unlock_queues(); 577} 578 579/* 580 * Initialize the pmap module. 581 * Called by vm_init, to initialize any structures that the pmap 582 * system needs to map virtual memory. 583 * pmap_init has been enhanced to support in a fairly consistant 584 * way, discontiguous physical memory. 585 */ 586void 587pmap_init(phys_start, phys_end) 588 vm_paddr_t phys_start, phys_end; 589{ 590 int i; 591 int initial_pvs; 592 593 /* 594 * Allocate memory for random pmap data structures. Includes the 595 * pv_head_table. 596 */ 597 598 for(i = 0; i < vm_page_array_size; i++) { 599 vm_page_t m; 600 601 m = &vm_page_array[i]; 602 TAILQ_INIT(&m->md.pv_list); 603 m->md.pv_list_count = 0; 604 } 605 606 /* 607 * init the pv free list 608 */ 609 initial_pvs = vm_page_array_size; 610 if (initial_pvs < MINPV) 611 initial_pvs = MINPV; 612 pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL, 613 NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM | UMA_ZONE_NOFREE); 614 uma_zone_set_allocf(pvzone, pmap_pv_allocf); 615 uma_prealloc(pvzone, initial_pvs); 616 617 /* 618 * Now it is safe to enable pv_table recording. 619 */ 620 pmap_initialized = TRUE; 621} 622 623/* 624 * Initialize the address space (zone) for the pv_entries. Set a 625 * high water mark so that the system can recover from excessive 626 * numbers of pv entries. 627 */ 628void 629pmap_init2() 630{ 631 int shpgperproc = PMAP_SHPGPERPROC; 632 633 TUNABLE_INT_FETCH("vm.pmap.shpgperproc", &shpgperproc); 634 pv_entry_max = shpgperproc * maxproc + vm_page_array_size; 635 TUNABLE_INT_FETCH("vm.pmap.pv_entries", &pv_entry_max); 636 pv_entry_high_water = 9 * (pv_entry_max / 10); 637 uma_zone_set_obj(pvzone, &pvzone_obj, pv_entry_max); 638} 639 640 641/*************************************************** 642 * Low level helper routines..... 643 ***************************************************/ 644 645#if defined(PMAP_DIAGNOSTIC) 646 647/* 648 * This code checks for non-writeable/modified pages. 649 * This should be an invalid condition. 650 */ 651static int 652pmap_nw_modified(pt_entry_t ptea) 653{ 654 int pte; 655 656 pte = (int) ptea; 657 658 if ((pte & (PG_M|PG_RW)) == PG_M) 659 return 1; 660 else 661 return 0; 662} 663#endif 664 665 666/* 667 * this routine defines the region(s) of memory that should 668 * not be tested for the modified bit. 669 */ 670static PMAP_INLINE int 671pmap_track_modified(vm_offset_t va) 672{ 673 if ((va < kmi.clean_sva) || (va >= kmi.clean_eva)) 674 return 1; 675 else 676 return 0; 677} 678 679/* 680 * Normal invalidation functions. 681 * We inline these within pmap.c for speed. 682 */ 683PMAP_INLINE void 684pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 685{ 686 687 if (pmap == kernel_pmap || pmap->pm_active) 688 invlpg(va); 689} 690 691PMAP_INLINE void 692pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 693{ 694 vm_offset_t addr; 695 696 if (pmap == kernel_pmap || pmap->pm_active) 697 for (addr = sva; addr < eva; addr += PAGE_SIZE) 698 invlpg(addr); 699} 700 701PMAP_INLINE void 702pmap_invalidate_all(pmap_t pmap) 703{ 704 705 if (pmap == kernel_pmap || pmap->pm_active) 706 invltlb(); 707} 708 709/* 710 * Are we current address space or kernel? 711 */ 712static __inline int 713pmap_is_current(pmap_t pmap) 714{ 715 return (pmap == kernel_pmap || 716 (pmap->pm_pml4[PML4PML4I] & PG_FRAME) == (PML4pml4e[0] & PG_FRAME)); 717} 718 719/* 720 * Routine: pmap_extract 721 * Function: 722 * Extract the physical page address associated 723 * with the given map/virtual_address pair. 724 */ 725vm_paddr_t 726pmap_extract(pmap, va) 727 register pmap_t pmap; 728 vm_offset_t va; 729{ 730 vm_paddr_t rtval; 731 pt_entry_t *pte; 732 pd_entry_t pde, *pdep; 733 734 if (pmap == 0) 735 return 0; 736 pdep = pmap_pde(pmap, va); 737 if (pdep) { 738 pde = *pdep; 739 if (pde) { 740 if ((pde & PG_PS) != 0) { 741 rtval = (pde & ~PDRMASK) | (va & PDRMASK); 742 return rtval; 743 } 744 pte = pmap_pte(pmap, va); 745 rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK)); 746 return rtval; 747 } 748 } 749 return 0; 750 751} 752 753vm_paddr_t 754pmap_kextract(vm_offset_t va) 755{ 756 pd_entry_t *pde; 757 vm_paddr_t pa; 758 759 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 760 pa = DMAP_TO_PHYS(va); 761 } else { 762 pde = pmap_pde(kernel_pmap, va); 763 if (*pde & PG_PS) { 764 pa = (*pde & ~(NBPDR - 1)) | (va & (NBPDR - 1)); 765 } else { 766 pa = *vtopte(va); 767 pa = (pa & PG_FRAME) | (va & PAGE_MASK); 768 } 769 } 770 return pa; 771} 772 773/*************************************************** 774 * Low level mapping routines..... 775 ***************************************************/ 776 777/* 778 * Add a wired page to the kva. 779 * Note: not SMP coherent. 780 */ 781PMAP_INLINE void 782pmap_kenter(vm_offset_t va, vm_paddr_t pa) 783{ 784 pt_entry_t *pte; 785 786 pte = vtopte(va); 787 pte_store(pte, pa | PG_RW | PG_V | PG_G); 788} 789 790/* 791 * Remove a page from the kernel pagetables. 792 * Note: not SMP coherent. 793 */ 794PMAP_INLINE void 795pmap_kremove(vm_offset_t va) 796{ 797 pt_entry_t *pte; 798 799 pte = vtopte(va); 800 pte_clear(pte); 801} 802 803/* 804 * Used to map a range of physical addresses into kernel 805 * virtual address space. 806 * 807 * The value passed in '*virt' is a suggested virtual address for 808 * the mapping. Architectures which can support a direct-mapped 809 * physical to virtual region can return the appropriate address 810 * within that region, leaving '*virt' unchanged. Other 811 * architectures should map the pages starting at '*virt' and 812 * update '*virt' with the first usable address after the mapped 813 * region. 814 */ 815vm_offset_t 816pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 817{ 818 return PHYS_TO_DMAP(start); 819} 820 821 822/* 823 * Add a list of wired pages to the kva 824 * this routine is only used for temporary 825 * kernel mappings that do not need to have 826 * page modification or references recorded. 827 * Note that old mappings are simply written 828 * over. The page *must* be wired. 829 * Note: SMP coherent. Uses a ranged shootdown IPI. 830 */ 831void 832pmap_qenter(vm_offset_t sva, vm_page_t *m, int count) 833{ 834 vm_offset_t va; 835 836 va = sva; 837 while (count-- > 0) { 838 pmap_kenter(va, VM_PAGE_TO_PHYS(*m)); 839 va += PAGE_SIZE; 840 m++; 841 } 842 pmap_invalidate_range(kernel_pmap, sva, va); 843} 844 845/* 846 * This routine tears out page mappings from the 847 * kernel -- it is meant only for temporary mappings. 848 * Note: SMP coherent. Uses a ranged shootdown IPI. 849 */ 850void 851pmap_qremove(vm_offset_t sva, int count) 852{ 853 vm_offset_t va; 854 855 va = sva; 856 while (count-- > 0) { 857 pmap_kremove(va); 858 va += PAGE_SIZE; 859 } 860 pmap_invalidate_range(kernel_pmap, sva, va); 861} 862 863static vm_page_t 864pmap_page_lookup(vm_object_t object, vm_pindex_t pindex) 865{ 866 vm_page_t m; 867 868retry: 869 m = vm_page_lookup(object, pindex); 870 if (m != NULL) { 871 vm_page_lock_queues(); 872 if (vm_page_sleep_if_busy(m, FALSE, "pplookp")) 873 goto retry; 874 vm_page_unlock_queues(); 875 } 876 return m; 877} 878 879/*************************************************** 880 * Page table page management routines..... 881 ***************************************************/ 882 883/* 884 * This routine unholds page table pages, and if the hold count 885 * drops to zero, then it decrements the wire count. 886 */ 887static int 888_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 889{ 890 891 while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt")) 892 vm_page_lock_queues(); 893 894 if (m->hold_count == 0) { 895 vm_offset_t pteva; 896 897 /* 898 * unmap the page table page 899 */ 900 if (m->pindex >= (NUPDE + NUPDPE)) { 901 /* PDP page */ 902 pml4_entry_t *pml4; 903 pml4 = pmap_pml4e(pmap, va); 904 pteva = (vm_offset_t) PDPmap + amd64_ptob(m->pindex - (NUPDE + NUPDPE)); 905 *pml4 = 0; 906 } else if (m->pindex >= NUPDE) { 907 /* PD page */ 908 pdp_entry_t *pdp; 909 pdp = pmap_pdpe(pmap, va); 910 pteva = (vm_offset_t) PDmap + amd64_ptob(m->pindex - NUPDE); 911 *pdp = 0; 912 } else { 913 /* PTE page */ 914 pd_entry_t *pd; 915 pd = pmap_pde(pmap, va); 916 pteva = (vm_offset_t) PTmap + amd64_ptob(m->pindex); 917 *pd = 0; 918 } 919 --pmap->pm_stats.resident_count; 920 if (m->pindex < NUPDE) { 921 /* We just released a PT, unhold the matching PD */ 922 vm_page_t pdpg; 923 924 pdpg = vm_page_lookup(pmap->pm_pteobj, NUPDE + 925 ((va >> PDPSHIFT) & (NUPDPE - 1))); 926 while (vm_page_sleep_if_busy(pdpg, FALSE, "pulook")) 927 vm_page_lock_queues(); 928 vm_page_unhold(pdpg); 929 if (pdpg->hold_count == 0) 930 _pmap_unwire_pte_hold(pmap, va, pdpg); 931 } 932 if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { 933 /* We just released a PD, unhold the matching PDP */ 934 vm_page_t pdppg; 935 936 pdppg = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + 937 ((va >> PML4SHIFT) & (NUPML4E - 1))); 938 while (vm_page_sleep_if_busy(pdppg, FALSE, "pulooK")) 939 vm_page_lock_queues(); 940 vm_page_unhold(pdppg); 941 if (pdppg->hold_count == 0) 942 _pmap_unwire_pte_hold(pmap, va, pdppg); 943 } 944 if (pmap_is_current(pmap)) { 945 /* 946 * Do an invltlb to make the invalidated mapping 947 * take effect immediately. 948 */ 949 pmap_invalidate_page(pmap, pteva); 950 } 951 952 /* 953 * If the page is finally unwired, simply free it. 954 */ 955 --m->wire_count; 956 if (m->wire_count == 0) { 957 vm_page_busy(m); 958 vm_page_free_zero(m); 959 atomic_subtract_int(&cnt.v_wire_count, 1); 960 } 961 return 1; 962 } 963 return 0; 964} 965 966static PMAP_INLINE int 967pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m) 968{ 969 vm_page_unhold(m); 970 if (m->hold_count == 0) 971 return _pmap_unwire_pte_hold(pmap, va, m); 972 else 973 return 0; 974} 975 976/* 977 * After removing a page table entry, this routine is used to 978 * conditionally free the page, and manage the hold/wire counts. 979 */ 980static int 981pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte) 982{ 983 vm_pindex_t ptepindex; 984 985 if (va >= VM_MAXUSER_ADDRESS) 986 return 0; 987 988 if (mpte == NULL) { 989 ptepindex = pmap_pde_pindex(va); 990 if (pmap->pm_pteobj->root && 991 pmap->pm_pteobj->root->pindex == ptepindex) { 992 mpte = pmap->pm_pteobj->root; 993 } else { 994 while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL && 995 vm_page_sleep_if_busy(mpte, FALSE, "pulook")) 996 vm_page_lock_queues(); 997 } 998 } 999 1000 return pmap_unwire_pte_hold(pmap, va, mpte); 1001} 1002 1003void 1004pmap_pinit0(pmap) 1005 struct pmap *pmap; 1006{ 1007 1008 pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys); 1009 pmap->pm_active = 0; 1010 TAILQ_INIT(&pmap->pm_pvlist); 1011 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1012 mtx_lock_spin(&allpmaps_lock); 1013 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1014 mtx_unlock_spin(&allpmaps_lock); 1015} 1016 1017/* 1018 * Initialize a preallocated and zeroed pmap structure, 1019 * such as one in a vmspace structure. 1020 */ 1021void 1022pmap_pinit(pmap) 1023 register struct pmap *pmap; 1024{ 1025 vm_page_t pml4pg; 1026 1027 /* 1028 * allocate object for the ptes 1029 */ 1030 if (pmap->pm_pteobj == NULL) 1031 pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPDE + NUPDPE + NUPML4E + 1); 1032 1033 /* 1034 * allocate the page directory page 1035 */ 1036 pml4pg = vm_page_grab(pmap->pm_pteobj, NUPDE + NUPDPE + NUPML4E, 1037 VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1038 vm_page_lock_queues(); 1039 vm_page_flag_clear(pml4pg, PG_BUSY); 1040 pml4pg->valid = VM_PAGE_BITS_ALL; 1041 vm_page_unlock_queues(); 1042 1043 pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); 1044 1045 if ((pml4pg->flags & PG_ZERO) == 0) 1046 bzero(pmap->pm_pml4, PAGE_SIZE); 1047 1048 mtx_lock_spin(&allpmaps_lock); 1049 LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); 1050 mtx_unlock_spin(&allpmaps_lock); 1051 1052 /* Wire in kernel global address entries. */ 1053 pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U; 1054 pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U; 1055 1056 /* install self-referential address mapping entry(s) */ 1057 pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; 1058 1059 pmap->pm_active = 0; 1060 TAILQ_INIT(&pmap->pm_pvlist); 1061 bzero(&pmap->pm_stats, sizeof pmap->pm_stats); 1062} 1063 1064/* 1065 * Wire in kernel global address entries. To avoid a race condition 1066 * between pmap initialization and pmap_growkernel, this procedure 1067 * should be called after the vmspace is attached to the process 1068 * but before this pmap is activated. 1069 */ 1070void 1071pmap_pinit2(pmap) 1072 struct pmap *pmap; 1073{ 1074 /* XXX: Remove this stub when no longer called */ 1075} 1076 1077/* 1078 * this routine is called if the page table page is not 1079 * mapped correctly. 1080 */ 1081static vm_page_t 1082_pmap_allocpte(pmap, ptepindex) 1083 pmap_t pmap; 1084 vm_pindex_t ptepindex; 1085{ 1086 vm_page_t m, pdppg, pdpg; 1087 1088 /* 1089 * Find or fabricate a new pagetable page 1090 */ 1091 m = vm_page_grab(pmap->pm_pteobj, ptepindex, 1092 VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 1093 1094 KASSERT(m->queue == PQ_NONE, 1095 ("_pmap_allocpte: %p->queue != PQ_NONE", m)); 1096 1097 /* 1098 * Increment the hold count for the page table page 1099 * (denoting a new mapping.) 1100 */ 1101 m->hold_count++; 1102 1103 /* 1104 * Map the pagetable page into the process address space, if 1105 * it isn't already there. 1106 */ 1107 1108 pmap->pm_stats.resident_count++; 1109 1110 if (ptepindex >= (NUPDE + NUPDPE)) { 1111 pml4_entry_t *pml4; 1112 vm_pindex_t pml4index; 1113 1114 /* Wire up a new PDPE page */ 1115 pml4index = ptepindex - (NUPDE + NUPDPE); 1116 pml4 = &pmap->pm_pml4[pml4index]; 1117 *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1118 1119 } else if (ptepindex >= NUPDE) { 1120 vm_pindex_t pml4index; 1121 vm_pindex_t pdpindex; 1122 pml4_entry_t *pml4; 1123 pdp_entry_t *pdp; 1124 1125 /* Wire up a new PDE page */ 1126 pdpindex = ptepindex - NUPDE; 1127 pml4index = pdpindex >> NPML4EPGSHIFT; 1128 1129 pml4 = &pmap->pm_pml4[pml4index]; 1130 if ((*pml4 & PG_V) == 0) { 1131 /* Have to allocate a new pdp, recurse */ 1132 _pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index); 1133 } else { 1134 /* Add reference to pdp page */ 1135 pdppg = pmap_page_lookup(pmap->pm_pteobj, 1136 NUPDE + NUPDPE + pml4index); 1137 pdppg->hold_count++; 1138 } 1139 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1140 1141 /* Now find the pdp page */ 1142 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1143 *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1144 1145 } else { 1146 vm_pindex_t pml4index; 1147 vm_pindex_t pdpindex; 1148 pml4_entry_t *pml4; 1149 pdp_entry_t *pdp; 1150 pd_entry_t *pd; 1151 1152 /* Wire up a new PTE page */ 1153 pdpindex = ptepindex >> NPDPEPGSHIFT; 1154 pml4index = pdpindex >> NPML4EPGSHIFT; 1155 1156 /* First, find the pdp and check that its valid. */ 1157 pml4 = &pmap->pm_pml4[pml4index]; 1158 if ((*pml4 & PG_V) == 0) { 1159 /* Have to allocate a new pd, recurse */ 1160 _pmap_allocpte(pmap, NUPDE + pdpindex); 1161 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1162 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1163 } else { 1164 pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); 1165 pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; 1166 if ((*pdp & PG_V) == 0) { 1167 /* Have to allocate a new pd, recurse */ 1168 _pmap_allocpte(pmap, NUPDE + pdpindex); 1169 } else { 1170 /* Add reference to the pd page */ 1171 pdpg = pmap_page_lookup(pmap->pm_pteobj, 1172 NUPDE + pdpindex); 1173 pdpg->hold_count++; 1174 } 1175 } 1176 pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME); 1177 1178 /* Now we know where the page directory page is */ 1179 pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; 1180 *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; 1181 } 1182 1183 /* 1184 * Try to use the new mapping, but if we cannot, then 1185 * do it with the routine that maps the page explicitly. 1186 */ 1187 if ((m->flags & PG_ZERO) == 0) 1188 pmap_zero_page(m); 1189 vm_page_lock_queues(); 1190 m->valid = VM_PAGE_BITS_ALL; 1191 vm_page_flag_clear(m, PG_ZERO); 1192 vm_page_wakeup(m); 1193 vm_page_unlock_queues(); 1194 1195 return m; 1196} 1197 1198static vm_page_t 1199pmap_allocpte(pmap_t pmap, vm_offset_t va) 1200{ 1201 vm_pindex_t ptepindex; 1202 pd_entry_t *pd; 1203 vm_page_t m; 1204 1205 /* 1206 * Calculate pagetable page index 1207 */ 1208 ptepindex = pmap_pde_pindex(va); 1209 1210 /* 1211 * Get the page directory entry 1212 */ 1213 pd = pmap_pde(pmap, va); 1214 1215 /* 1216 * This supports switching from a 2MB page to a 1217 * normal 4K page. 1218 */ 1219 if (pd != 0 && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) { 1220 *pd = 0; 1221 pd = 0; 1222 pmap_invalidate_all(kernel_pmap); 1223 } 1224 1225 /* 1226 * If the page table page is mapped, we just increment the 1227 * hold count, and activate it. 1228 */ 1229 if (pd != 0 && (*pd & PG_V) != 0) { 1230 /* 1231 * In order to get the page table page, try the 1232 * hint first. 1233 */ 1234 if (pmap->pm_pteobj->root && 1235 (pmap->pm_pteobj->root->pindex == ptepindex)) { 1236 m = pmap->pm_pteobj->root; 1237 } else { 1238 m = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 1239 } 1240 m->hold_count++; 1241 return m; 1242 } 1243 /* 1244 * Here if the pte page isn't mapped, or if it has been deallocated. 1245 */ 1246 m = _pmap_allocpte(pmap, ptepindex); 1247 return m; 1248} 1249 1250 1251/*************************************************** 1252 * Pmap allocation/deallocation routines. 1253 ***************************************************/ 1254 1255/* 1256 * Release any resources held by the given physical map. 1257 * Called when a pmap initialized by pmap_pinit is being released. 1258 * Should only be called if the map contains no valid mappings. 1259 */ 1260void 1261pmap_release(pmap_t pmap) 1262{ 1263 vm_object_t object; 1264 vm_page_t m; 1265 1266 object = pmap->pm_pteobj; 1267 1268 KASSERT(object->ref_count == 1, 1269 ("pmap_release: pteobj reference count %d != 1", 1270 object->ref_count)); 1271 KASSERT(pmap->pm_stats.resident_count == 0, 1272 ("pmap_release: pmap resident count %ld != 0", 1273 pmap->pm_stats.resident_count)); 1274 1275 mtx_lock_spin(&allpmaps_lock); 1276 LIST_REMOVE(pmap, pm_list); 1277 mtx_unlock_spin(&allpmaps_lock); 1278 1279 vm_page_lock_queues(); 1280 while ((m = TAILQ_FIRST(&object->memq)) != NULL) { 1281 m->wire_count--; 1282 atomic_subtract_int(&cnt.v_wire_count, 1); 1283 vm_page_busy(m); 1284 vm_page_free(m); 1285 } 1286 KASSERT(TAILQ_EMPTY(&object->memq), 1287 ("pmap_release: leaking page table pages")); 1288 vm_page_unlock_queues(); 1289} 1290 1291static int 1292kvm_size(SYSCTL_HANDLER_ARGS) 1293{ 1294 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - KERNBASE; 1295 1296 return sysctl_handle_long(oidp, &ksize, 0, req); 1297} 1298SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1299 0, 0, kvm_size, "IU", "Size of KVM"); 1300 1301static int 1302kvm_free(SYSCTL_HANDLER_ARGS) 1303{ 1304 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1305 1306 return sysctl_handle_long(oidp, &kfree, 0, req); 1307} 1308SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1309 0, 0, kvm_free, "IU", "Amount of KVM free"); 1310 1311/* 1312 * grow the number of kernel page table entries, if needed 1313 */ 1314void 1315pmap_growkernel(vm_offset_t addr) 1316{ 1317 int s; 1318 vm_paddr_t paddr; 1319 vm_page_t nkpg; 1320 pd_entry_t *pde, newpdir; 1321 pdp_entry_t newpdp; 1322 1323 s = splhigh(); 1324 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1325 if (kernel_vm_end == 0) { 1326 kernel_vm_end = KERNBASE; 1327 nkpt = 0; 1328 while ((*pmap_pde(kernel_pmap, kernel_vm_end) & PG_V) != 0) { 1329 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1330 nkpt++; 1331 } 1332 } 1333 addr = roundup2(addr, PAGE_SIZE * NPTEPG); 1334 while (kernel_vm_end < addr) { 1335 pde = pmap_pde(kernel_pmap, kernel_vm_end); 1336 if (pde == NULL) { 1337 /* We need a new PDP entry */ 1338 nkpg = vm_page_alloc(NULL, nkpt, 1339 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1340 if (!nkpg) 1341 panic("pmap_growkernel: no memory to grow kernel"); 1342 pmap_zero_page(nkpg); 1343 paddr = VM_PAGE_TO_PHYS(nkpg); 1344 newpdp = (pdp_entry_t) 1345 (paddr | PG_V | PG_RW | PG_A | PG_M); 1346 *pmap_pdpe(kernel_pmap, kernel_vm_end) = newpdp; 1347 continue; /* try again */ 1348 } 1349 if ((*pde & PG_V) != 0) { 1350 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1351 continue; 1352 } 1353 1354 /* 1355 * This index is bogus, but out of the way 1356 */ 1357 nkpg = vm_page_alloc(NULL, nkpt, 1358 VM_ALLOC_NOOBJ | VM_ALLOC_SYSTEM | VM_ALLOC_WIRED); 1359 if (!nkpg) 1360 panic("pmap_growkernel: no memory to grow kernel"); 1361 1362 nkpt++; 1363 1364 pmap_zero_page(nkpg); 1365 paddr = VM_PAGE_TO_PHYS(nkpg); 1366 newpdir = (pd_entry_t) (paddr | PG_V | PG_RW | PG_A | PG_M); 1367 *pmap_pde(kernel_pmap, kernel_vm_end) = newpdir; 1368 1369 kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); 1370 } 1371 splx(s); 1372} 1373 1374 1375/*************************************************** 1376 * page management routines. 1377 ***************************************************/ 1378 1379/* 1380 * free the pv_entry back to the free list 1381 */ 1382static PMAP_INLINE void 1383free_pv_entry(pv_entry_t pv) 1384{ 1385 pv_entry_count--; 1386 uma_zfree(pvzone, pv); 1387} 1388 1389/* 1390 * get a new pv_entry, allocating a block from the system 1391 * when needed. 1392 * the memory allocation is performed bypassing the malloc code 1393 * because of the possibility of allocations at interrupt time. 1394 */ 1395static pv_entry_t 1396get_pv_entry(void) 1397{ 1398 pv_entry_count++; 1399 if (pv_entry_high_water && 1400 (pv_entry_count > pv_entry_high_water) && 1401 (pmap_pagedaemon_waken == 0)) { 1402 pmap_pagedaemon_waken = 1; 1403 wakeup (&vm_pages_needed); 1404 } 1405 return uma_zalloc(pvzone, M_NOWAIT); 1406} 1407 1408/* 1409 * If it is the first entry on the list, it is actually 1410 * in the header and we must copy the following entry up 1411 * to the header. Otherwise we must search the list for 1412 * the entry. In either case we free the now unused entry. 1413 */ 1414 1415static int 1416pmap_remove_entry(pmap_t pmap, vm_page_t m, vm_offset_t va) 1417{ 1418 pv_entry_t pv; 1419 int rtval; 1420 int s; 1421 1422 s = splvm(); 1423 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1424 if (m->md.pv_list_count < pmap->pm_stats.resident_count) { 1425 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 1426 if (pmap == pv->pv_pmap && va == pv->pv_va) 1427 break; 1428 } 1429 } else { 1430 TAILQ_FOREACH(pv, &pmap->pm_pvlist, pv_plist) { 1431 if (va == pv->pv_va) 1432 break; 1433 } 1434 } 1435 1436 rtval = 0; 1437 if (pv) { 1438 rtval = pmap_unuse_pt(pmap, va, pv->pv_ptem); 1439 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1440 m->md.pv_list_count--; 1441 if (TAILQ_FIRST(&m->md.pv_list) == NULL) 1442 vm_page_flag_clear(m, PG_WRITEABLE); 1443 1444 TAILQ_REMOVE(&pmap->pm_pvlist, pv, pv_plist); 1445 free_pv_entry(pv); 1446 } 1447 1448 splx(s); 1449 return rtval; 1450} 1451 1452/* 1453 * Create a pv entry for page at pa for 1454 * (pmap, va). 1455 */ 1456static void 1457pmap_insert_entry(pmap_t pmap, vm_offset_t va, vm_page_t mpte, vm_page_t m) 1458{ 1459 1460 int s; 1461 pv_entry_t pv; 1462 1463 s = splvm(); 1464 pv = get_pv_entry(); 1465 pv->pv_va = va; 1466 pv->pv_pmap = pmap; 1467 pv->pv_ptem = mpte; 1468 1469 vm_page_lock_queues(); 1470 TAILQ_INSERT_TAIL(&pmap->pm_pvlist, pv, pv_plist); 1471 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 1472 m->md.pv_list_count++; 1473 1474 vm_page_unlock_queues(); 1475 splx(s); 1476} 1477 1478/* 1479 * pmap_remove_pte: do the things to unmap a page in a process 1480 */ 1481static int 1482pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va) 1483{ 1484 pt_entry_t oldpte; 1485 vm_page_t m; 1486 1487 oldpte = pte_load_clear(ptq); 1488 if (oldpte & PG_W) 1489 pmap->pm_stats.wired_count -= 1; 1490 /* 1491 * Machines that don't support invlpg, also don't support 1492 * PG_G. 1493 */ 1494 if (oldpte & PG_G) 1495 pmap_invalidate_page(kernel_pmap, va); 1496 pmap->pm_stats.resident_count -= 1; 1497 if (oldpte & PG_MANAGED) { 1498 m = PHYS_TO_VM_PAGE(oldpte); 1499 if (oldpte & PG_M) { 1500#if defined(PMAP_DIAGNOSTIC) 1501 if (pmap_nw_modified((pt_entry_t) oldpte)) { 1502 printf( 1503 "pmap_remove: modified page not writable: va: 0x%x, pte: 0x%x\n", 1504 va, oldpte); 1505 } 1506#endif 1507 if (pmap_track_modified(va)) 1508 vm_page_dirty(m); 1509 } 1510 if (oldpte & PG_A) 1511 vm_page_flag_set(m, PG_REFERENCED); 1512 return pmap_remove_entry(pmap, m, va); 1513 } else { 1514 return pmap_unuse_pt(pmap, va, NULL); 1515 } 1516 1517 return 0; 1518} 1519 1520/* 1521 * Remove a single page from a process address space 1522 */ 1523static void 1524pmap_remove_page(pmap_t pmap, vm_offset_t va) 1525{ 1526 pt_entry_t *pte; 1527 1528 pte = pmap_pte(pmap, va); 1529 if (pte == NULL || (*pte & PG_V) == 0) 1530 return; 1531 pmap_remove_pte(pmap, pte, va); 1532 pmap_invalidate_page(pmap, va); 1533} 1534 1535/* 1536 * Remove the given range of addresses from the specified map. 1537 * 1538 * It is assumed that the start and end are properly 1539 * rounded to the page size. 1540 */ 1541void 1542pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1543{ 1544 vm_offset_t pdnxt; 1545 pd_entry_t ptpaddr, *pde; 1546 pt_entry_t *pte; 1547 int anyvalid; 1548 1549 if (pmap == NULL) 1550 return; 1551 1552 if (pmap->pm_stats.resident_count == 0) 1553 return; 1554 1555 /* 1556 * special handling of removing one page. a very 1557 * common operation and easy to short circuit some 1558 * code. 1559 */ 1560 if (sva + PAGE_SIZE == eva) { 1561 pde = pmap_pde(pmap, sva); 1562 if (pde && (*pde & PG_PS) == 0) { 1563 pmap_remove_page(pmap, sva); 1564 return; 1565 } 1566 } 1567 1568 anyvalid = 0; 1569 1570 for (; sva < eva; sva = pdnxt) { 1571 1572 if (pmap->pm_stats.resident_count == 0) 1573 break; 1574 1575 /* 1576 * Calculate index for next page table. 1577 */ 1578 pdnxt = (sva + NBPDR) & ~PDRMASK; 1579 1580 pde = pmap_pde(pmap, sva); 1581 if (pde == 0) 1582 continue; 1583 ptpaddr = *pde; 1584 1585 /* 1586 * Weed out invalid mappings. Note: we assume that the page 1587 * directory table is always allocated, and in kernel virtual. 1588 */ 1589 if (ptpaddr == 0) 1590 continue; 1591 1592 /* 1593 * Check for large page. 1594 */ 1595 if ((ptpaddr & PG_PS) != 0) { 1596 *pde = 0; 1597 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1598 anyvalid = 1; 1599 continue; 1600 } 1601 1602 /* 1603 * Limit our scan to either the end of the va represented 1604 * by the current page table page, or to the end of the 1605 * range being removed. 1606 */ 1607 if (pdnxt > eva) 1608 pdnxt = eva; 1609 1610 for (; sva != pdnxt; sva += PAGE_SIZE) { 1611 pte = pmap_pte(pmap, sva); 1612 if (pte == NULL || *pte == 0) 1613 continue; 1614 anyvalid = 1; 1615 if (pmap_remove_pte(pmap, pte, sva)) 1616 break; 1617 } 1618 } 1619 1620 if (anyvalid) 1621 pmap_invalidate_all(pmap); 1622} 1623 1624/* 1625 * Routine: pmap_remove_all 1626 * Function: 1627 * Removes this physical page from 1628 * all physical maps in which it resides. 1629 * Reflects back modify bits to the pager. 1630 * 1631 * Notes: 1632 * Original versions of this routine were very 1633 * inefficient because they iteratively called 1634 * pmap_remove (slow...) 1635 */ 1636 1637void 1638pmap_remove_all(vm_page_t m) 1639{ 1640 register pv_entry_t pv; 1641 pt_entry_t *pte, tpte; 1642 int s; 1643 1644#if defined(PMAP_DIAGNOSTIC) 1645 /* 1646 * XXX This makes pmap_remove_all() illegal for non-managed pages! 1647 */ 1648 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) { 1649 panic("pmap_remove_all: illegal for unmanaged page, va: 0x%x", 1650 VM_PAGE_TO_PHYS(m)); 1651 } 1652#endif 1653 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 1654 s = splvm(); 1655 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1656 pv->pv_pmap->pm_stats.resident_count--; 1657 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 1658 tpte = pte_load_clear(pte); 1659 if (tpte & PG_W) 1660 pv->pv_pmap->pm_stats.wired_count--; 1661 if (tpte & PG_A) 1662 vm_page_flag_set(m, PG_REFERENCED); 1663 1664 /* 1665 * Update the vm_page_t clean and reference bits. 1666 */ 1667 if (tpte & PG_M) { 1668#if defined(PMAP_DIAGNOSTIC) 1669 if (pmap_nw_modified((pt_entry_t) tpte)) { 1670 printf( 1671 "pmap_remove_all: modified page not writable: va: 0x%x, pte: 0x%x\n", 1672 pv->pv_va, tpte); 1673 } 1674#endif 1675 if (pmap_track_modified(pv->pv_va)) 1676 vm_page_dirty(m); 1677 } 1678 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 1679 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 1680 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 1681 m->md.pv_list_count--; 1682 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 1683 free_pv_entry(pv); 1684 } 1685 vm_page_flag_clear(m, PG_WRITEABLE); 1686 splx(s); 1687} 1688 1689/* 1690 * Set the physical protection on the 1691 * specified range of this map as requested. 1692 */ 1693void 1694pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1695{ 1696 vm_offset_t pdnxt; 1697 pd_entry_t ptpaddr, *pde; 1698 int anychanged; 1699 1700 if (pmap == NULL) 1701 return; 1702 1703 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1704 pmap_remove(pmap, sva, eva); 1705 return; 1706 } 1707 1708 if (prot & VM_PROT_WRITE) 1709 return; 1710 1711 anychanged = 0; 1712 1713 for (; sva < eva; sva = pdnxt) { 1714 1715 pdnxt = (sva + NBPDR) & ~PDRMASK; 1716 1717 pde = pmap_pde(pmap, sva); 1718 if (pde == NULL) 1719 continue; 1720 ptpaddr = *pde; 1721 1722 /* 1723 * Weed out invalid mappings. Note: we assume that the page 1724 * directory table is always allocated, and in kernel virtual. 1725 */ 1726 if (ptpaddr == 0) 1727 continue; 1728 1729 /* 1730 * Check for large page. 1731 */ 1732 if ((ptpaddr & PG_PS) != 0) { 1733 *pde &= ~(PG_M|PG_RW); 1734 pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE; 1735 anychanged = 1; 1736 continue; 1737 } 1738 1739 if (pdnxt > eva) 1740 pdnxt = eva; 1741 1742 for (; sva != pdnxt; sva += PAGE_SIZE) { 1743 pt_entry_t pbits; 1744 pt_entry_t *pte; 1745 vm_page_t m; 1746 1747 pte = pmap_pte(pmap, sva); 1748 if (pte == NULL) 1749 continue; 1750 pbits = *pte; 1751 if (pbits & PG_MANAGED) { 1752 m = NULL; 1753 if (pbits & PG_A) { 1754 m = PHYS_TO_VM_PAGE(pbits); 1755 vm_page_flag_set(m, PG_REFERENCED); 1756 pbits &= ~PG_A; 1757 } 1758 if ((pbits & PG_M) != 0 && 1759 pmap_track_modified(sva)) { 1760 if (m == NULL) 1761 m = PHYS_TO_VM_PAGE(pbits); 1762 vm_page_dirty(m); 1763 pbits &= ~PG_M; 1764 } 1765 } 1766 1767 pbits &= ~PG_RW; 1768 1769 if (pbits != *pte) { 1770 pte_store(pte, pbits); 1771 anychanged = 1; 1772 } 1773 } 1774 } 1775 if (anychanged) 1776 pmap_invalidate_all(pmap); 1777} 1778 1779/* 1780 * Insert the given physical page (p) at 1781 * the specified virtual address (v) in the 1782 * target physical map with the protection requested. 1783 * 1784 * If specified, the page will be wired down, meaning 1785 * that the related pte can not be reclaimed. 1786 * 1787 * NB: This is the only routine which MAY NOT lazy-evaluate 1788 * or lose information. That is, this routine must actually 1789 * insert this page into the given map NOW. 1790 */ 1791void 1792pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1793 boolean_t wired) 1794{ 1795 vm_paddr_t pa; 1796 register pt_entry_t *pte; 1797 vm_paddr_t opa; 1798 pt_entry_t origpte, newpte; 1799 vm_page_t mpte; 1800 1801 if (pmap == NULL) 1802 return; 1803 1804 va &= PG_FRAME; 1805#ifdef PMAP_DIAGNOSTIC 1806 if (va > VM_MAX_KERNEL_ADDRESS) 1807 panic("pmap_enter: toobig"); 1808 if ((va >= UPT_MIN_ADDRESS) && (va < UPT_MAX_ADDRESS)) 1809 panic("pmap_enter: invalid to pmap_enter page table pages (va: 0x%x)", va); 1810#endif 1811 1812 mpte = NULL; 1813 /* 1814 * In the case that a page table page is not 1815 * resident, we are creating it here. 1816 */ 1817 if (va < VM_MAXUSER_ADDRESS) { 1818 mpte = pmap_allocpte(pmap, va); 1819 } 1820#if 0 && defined(PMAP_DIAGNOSTIC) 1821 else { 1822 pd_entry_t *pdeaddr = pmap_pde(pmap, va); 1823 origpte = *pdeaddr; 1824 if ((origpte & PG_V) == 0) { 1825 panic("pmap_enter: invalid kernel page table page, pde=%p, va=%p\n", 1826 origpte, va); 1827 } 1828 } 1829#endif 1830 1831 pte = pmap_pte(pmap, va); 1832 1833 /* 1834 * Page Directory table entry not valid, we need a new PT page 1835 */ 1836 if (pte == NULL) 1837 panic("pmap_enter: invalid page directory va=%#lx\n", va); 1838 1839 pa = VM_PAGE_TO_PHYS(m) & PG_FRAME; 1840 origpte = *pte; 1841 opa = origpte & PG_FRAME; 1842 1843 if (origpte & PG_PS) 1844 panic("pmap_enter: attempted pmap_enter on 2MB page"); 1845 1846 /* 1847 * Mapping has not changed, must be protection or wiring change. 1848 */ 1849 if (origpte && (opa == pa)) { 1850 /* 1851 * Wiring change, just update stats. We don't worry about 1852 * wiring PT pages as they remain resident as long as there 1853 * are valid mappings in them. Hence, if a user page is wired, 1854 * the PT page will be also. 1855 */ 1856 if (wired && ((origpte & PG_W) == 0)) 1857 pmap->pm_stats.wired_count++; 1858 else if (!wired && (origpte & PG_W)) 1859 pmap->pm_stats.wired_count--; 1860 1861#if defined(PMAP_DIAGNOSTIC) 1862 if (pmap_nw_modified((pt_entry_t) origpte)) { 1863 printf( 1864 "pmap_enter: modified page not writable: va: 0x%x, pte: 0x%x\n", 1865 va, origpte); 1866 } 1867#endif 1868 1869 /* 1870 * Remove extra pte reference 1871 */ 1872 if (mpte) 1873 mpte->hold_count--; 1874 1875 if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { 1876 if ((origpte & PG_RW) == 0) { 1877 pte_store(pte, origpte | PG_RW); 1878 pmap_invalidate_page(pmap, va); 1879 } 1880 return; 1881 } 1882 1883 /* 1884 * We might be turning off write access to the page, 1885 * so we go ahead and sense modify status. 1886 */ 1887 if (origpte & PG_MANAGED) { 1888 if ((origpte & PG_M) && pmap_track_modified(va)) { 1889 vm_page_t om; 1890 om = PHYS_TO_VM_PAGE(opa); 1891 vm_page_dirty(om); 1892 } 1893 pa |= PG_MANAGED; 1894 } 1895 goto validate; 1896 } 1897 /* 1898 * Mapping has changed, invalidate old range and fall through to 1899 * handle validating new mapping. 1900 */ 1901 if (opa) { 1902 int err; 1903 vm_page_lock_queues(); 1904 err = pmap_remove_pte(pmap, pte, va); 1905 vm_page_unlock_queues(); 1906 if (err) 1907 panic("pmap_enter: pte vanished, va: 0x%lx", va); 1908 } 1909 1910 /* 1911 * Enter on the PV list if part of our managed memory. Note that we 1912 * raise IPL while manipulating pv_table since pmap_enter can be 1913 * called at interrupt time. 1914 */ 1915 if (pmap_initialized && 1916 (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) { 1917 pmap_insert_entry(pmap, va, mpte, m); 1918 pa |= PG_MANAGED; 1919 } 1920 1921 /* 1922 * Increment counters 1923 */ 1924 pmap->pm_stats.resident_count++; 1925 if (wired) 1926 pmap->pm_stats.wired_count++; 1927 1928validate: 1929 /* 1930 * Now validate mapping with desired protection/wiring. 1931 */ 1932 newpte = (pt_entry_t)(pa | pte_prot(pmap, prot) | PG_V); 1933 1934 if (wired) 1935 newpte |= PG_W; 1936 if (va < VM_MAXUSER_ADDRESS) 1937 newpte |= PG_U; 1938 if (pmap == kernel_pmap) 1939 newpte |= PG_G; 1940 1941 /* 1942 * if the mapping or permission bits are different, we need 1943 * to update the pte. 1944 */ 1945 if ((origpte & ~(PG_M|PG_A)) != newpte) { 1946 pte_store(pte, newpte | PG_A); 1947 /*if (origpte)*/ { 1948 pmap_invalidate_page(pmap, va); 1949 } 1950 } 1951} 1952 1953/* 1954 * this code makes some *MAJOR* assumptions: 1955 * 1. Current pmap & pmap exists. 1956 * 2. Not wired. 1957 * 3. Read access. 1958 * 4. No page table pages. 1959 * 5. Tlbflush is deferred to calling procedure. 1960 * 6. Page IS managed. 1961 * but is *MUCH* faster than pmap_enter... 1962 */ 1963 1964vm_page_t 1965pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte) 1966{ 1967 pt_entry_t *pte; 1968 vm_paddr_t pa; 1969 1970 /* 1971 * In the case that a page table page is not 1972 * resident, we are creating it here. 1973 */ 1974 if (va < VM_MAXUSER_ADDRESS) { 1975 vm_pindex_t ptepindex; 1976 pd_entry_t *ptepa; 1977 1978 /* 1979 * Calculate pagetable page index 1980 */ 1981 ptepindex = pmap_pde_pindex(va); 1982 if (mpte && (mpte->pindex == ptepindex)) { 1983 mpte->hold_count++; 1984 } else { 1985retry: 1986 /* 1987 * Get the page directory entry 1988 */ 1989 ptepa = pmap_pde(pmap, va); 1990 1991 /* 1992 * If the page table page is mapped, we just increment 1993 * the hold count, and activate it. 1994 */ 1995 if (ptepa && (*ptepa & PG_V) != 0) { 1996 if (*ptepa & PG_PS) 1997 panic("pmap_enter_quick: unexpected mapping into 2MB page"); 1998 if (pmap->pm_pteobj->root && 1999 (pmap->pm_pteobj->root->pindex == ptepindex)) { 2000 mpte = pmap->pm_pteobj->root; 2001 } else { 2002 mpte = pmap_page_lookup(pmap->pm_pteobj, ptepindex); 2003 } 2004 if (mpte == NULL) 2005 goto retry; 2006 mpte->hold_count++; 2007 } else { 2008 mpte = _pmap_allocpte(pmap, ptepindex); 2009 } 2010 } 2011 } else { 2012 mpte = NULL; 2013 } 2014 2015 /* 2016 * This call to vtopte makes the assumption that we are 2017 * entering the page into the current pmap. In order to support 2018 * quick entry into any pmap, one would likely use pmap_pte. 2019 * But that isn't as quick as vtopte. 2020 */ 2021 pte = vtopte(va); 2022 if (*pte) { 2023 if (mpte != NULL) { 2024 vm_page_lock_queues(); 2025 pmap_unwire_pte_hold(pmap, va, mpte); 2026 vm_page_unlock_queues(); 2027 } 2028 return 0; 2029 } 2030 2031 /* 2032 * Enter on the PV list if part of our managed memory. Note that we 2033 * raise IPL while manipulating pv_table since pmap_enter can be 2034 * called at interrupt time. 2035 */ 2036 if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) 2037 pmap_insert_entry(pmap, va, mpte, m); 2038 2039 /* 2040 * Increment counters 2041 */ 2042 pmap->pm_stats.resident_count++; 2043 2044 pa = VM_PAGE_TO_PHYS(m); 2045 2046 /* 2047 * Now validate mapping with RO protection 2048 */ 2049 if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) 2050 pte_store(pte, pa | PG_V | PG_U); 2051 else 2052 pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); 2053 2054 return mpte; 2055} 2056 2057/* 2058 * Make a temporary mapping for a physical address. This is only intended 2059 * to be used for panic dumps. 2060 */ 2061void * 2062pmap_kenter_temporary(vm_offset_t pa, int i) 2063{ 2064 vm_offset_t va; 2065 2066 va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); 2067 pmap_kenter(va, pa); 2068 invlpg(va); 2069 return ((void *)crashdumpmap); 2070} 2071 2072/* 2073 * This code maps large physical mmap regions into the 2074 * processor address space. Note that some shortcuts 2075 * are taken, but the code works. 2076 */ 2077void 2078pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, 2079 vm_object_t object, vm_pindex_t pindex, 2080 vm_size_t size) 2081{ 2082 vm_page_t p; 2083 2084 VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); 2085 KASSERT(object->type == OBJT_DEVICE, 2086 ("pmap_object_init_pt: non-device object")); 2087 if (((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) { 2088 int i; 2089 vm_page_t m[1]; 2090 int npdes; 2091 pd_entry_t ptepa, *pde; 2092 2093 pde = pmap_pde(pmap, addr); 2094 if (pde != 0 && (*pde & PG_V) != 0) 2095 return; 2096retry: 2097 p = vm_page_lookup(object, pindex); 2098 if (p != NULL) { 2099 vm_page_lock_queues(); 2100 if (vm_page_sleep_if_busy(p, FALSE, "init4p")) 2101 goto retry; 2102 } else { 2103 p = vm_page_alloc(object, pindex, VM_ALLOC_NORMAL); 2104 if (p == NULL) 2105 return; 2106 m[0] = p; 2107 2108 if (vm_pager_get_pages(object, m, 1, 0) != VM_PAGER_OK) { 2109 vm_page_lock_queues(); 2110 vm_page_free(p); 2111 vm_page_unlock_queues(); 2112 return; 2113 } 2114 2115 p = vm_page_lookup(object, pindex); 2116 vm_page_lock_queues(); 2117 vm_page_wakeup(p); 2118 } 2119 vm_page_unlock_queues(); 2120 2121 ptepa = VM_PAGE_TO_PHYS(p); 2122 if (ptepa & (NBPDR - 1)) 2123 return; 2124 2125 p->valid = VM_PAGE_BITS_ALL; 2126 2127 pmap->pm_stats.resident_count += size >> PAGE_SHIFT; 2128 npdes = size >> PDRSHIFT; 2129 for(i = 0; i < npdes; i++) { 2130 pde_store(pde, ptepa | PG_U | PG_RW | PG_V | PG_PS); 2131 ptepa += NBPDR; 2132 pde++; 2133 } 2134 pmap_invalidate_all(pmap); 2135 } 2136} 2137 2138/* 2139 * pmap_prefault provides a quick way of clustering 2140 * pagefaults into a processes address space. It is a "cousin" 2141 * of pmap_object_init_pt, except it runs at page fault time instead 2142 * of mmap time. 2143 */ 2144#define PFBAK 4 2145#define PFFOR 4 2146#define PAGEORDER_SIZE (PFBAK+PFFOR) 2147 2148static int pmap_prefault_pageorder[] = { 2149 -1 * PAGE_SIZE, 1 * PAGE_SIZE, 2150 -2 * PAGE_SIZE, 2 * PAGE_SIZE, 2151 -3 * PAGE_SIZE, 3 * PAGE_SIZE, 2152 -4 * PAGE_SIZE, 4 * PAGE_SIZE 2153}; 2154 2155void 2156pmap_prefault(pmap, addra, entry) 2157 pmap_t pmap; 2158 vm_offset_t addra; 2159 vm_map_entry_t entry; 2160{ 2161 int i; 2162 vm_offset_t starta; 2163 vm_offset_t addr; 2164 vm_pindex_t pindex; 2165 vm_page_t m, mpte; 2166 vm_object_t object; 2167 pd_entry_t *pde; 2168 2169 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) 2170 return; 2171 2172 object = entry->object.vm_object; 2173 2174 starta = addra - PFBAK * PAGE_SIZE; 2175 if (starta < entry->start) { 2176 starta = entry->start; 2177 } else if (starta > addra) { 2178 starta = 0; 2179 } 2180 2181 mpte = NULL; 2182 for (i = 0; i < PAGEORDER_SIZE; i++) { 2183 vm_object_t backing_object, lobject; 2184 pt_entry_t *pte; 2185 2186 addr = addra + pmap_prefault_pageorder[i]; 2187 if (addr > addra + (PFFOR * PAGE_SIZE)) 2188 addr = 0; 2189 2190 if (addr < starta || addr >= entry->end) 2191 continue; 2192 2193 pde = pmap_pde(pmap, addr); 2194 if (pde == NULL || (*pde & PG_V) == 0) 2195 continue; 2196 2197 pte = vtopte(addr); 2198 if ((*pte & PG_V) == 0) 2199 continue; 2200 2201 pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; 2202 lobject = object; 2203 VM_OBJECT_LOCK(lobject); 2204 while ((m = vm_page_lookup(lobject, pindex)) == NULL && 2205 lobject->type == OBJT_DEFAULT && 2206 (backing_object = lobject->backing_object) != NULL) { 2207 if (lobject->backing_object_offset & PAGE_MASK) 2208 break; 2209 pindex += lobject->backing_object_offset >> PAGE_SHIFT; 2210 VM_OBJECT_LOCK(backing_object); 2211 VM_OBJECT_UNLOCK(lobject); 2212 lobject = backing_object; 2213 } 2214 VM_OBJECT_UNLOCK(lobject); 2215 /* 2216 * give-up when a page is not in memory 2217 */ 2218 if (m == NULL) 2219 break; 2220 vm_page_lock_queues(); 2221 if (((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) && 2222 (m->busy == 0) && 2223 (m->flags & (PG_BUSY | PG_FICTITIOUS)) == 0) { 2224 2225 if ((m->queue - m->pc) == PQ_CACHE) { 2226 vm_page_deactivate(m); 2227 } 2228 vm_page_busy(m); 2229 vm_page_unlock_queues(); 2230 mpte = pmap_enter_quick(pmap, addr, m, mpte); 2231 vm_page_lock_queues(); 2232 vm_page_wakeup(m); 2233 } 2234 vm_page_unlock_queues(); 2235 } 2236} 2237 2238/* 2239 * Routine: pmap_change_wiring 2240 * Function: Change the wiring attribute for a map/virtual-address 2241 * pair. 2242 * In/out conditions: 2243 * The mapping must already exist in the pmap. 2244 */ 2245void 2246pmap_change_wiring(pmap, va, wired) 2247 register pmap_t pmap; 2248 vm_offset_t va; 2249 boolean_t wired; 2250{ 2251 register pt_entry_t *pte; 2252 2253 if (pmap == NULL) 2254 return; 2255 2256 /* 2257 * Wiring is not a hardware characteristic so there is no need to 2258 * invalidate TLB. 2259 */ 2260 pte = pmap_pte(pmap, va); 2261 if (wired && (*pte & PG_W) == 0) { 2262 pmap->pm_stats.wired_count++; 2263 *pte |= PG_W; 2264 } else if (!wired && (*pte & PG_W) != 0) { 2265 pmap->pm_stats.wired_count--; 2266 *pte &= ~PG_W; 2267 } 2268} 2269 2270 2271 2272/* 2273 * Copy the range specified by src_addr/len 2274 * from the source map to the range dst_addr/len 2275 * in the destination map. 2276 * 2277 * This routine is only advisory and need not do anything. 2278 */ 2279 2280void 2281pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2282 vm_offset_t src_addr) 2283{ 2284 vm_offset_t addr; 2285 vm_offset_t end_addr = src_addr + len; 2286 vm_offset_t pdnxt; 2287 vm_page_t m; 2288 2289 if (dst_addr != src_addr) 2290 return; 2291 2292 if (!pmap_is_current(src_pmap)) 2293 return; 2294 2295 for (addr = src_addr; addr < end_addr; addr = pdnxt) { 2296 pt_entry_t *src_pte, *dst_pte; 2297 vm_page_t dstmpte, srcmpte; 2298 pd_entry_t srcptepaddr, *pde; 2299 vm_pindex_t ptepindex; 2300 2301 if (addr >= UPT_MIN_ADDRESS) 2302 panic("pmap_copy: invalid to pmap_copy page tables\n"); 2303 2304 /* 2305 * Don't let optional prefaulting of pages make us go 2306 * way below the low water mark of free pages or way 2307 * above high water mark of used pv entries. 2308 */ 2309 if (cnt.v_free_count < cnt.v_free_reserved || 2310 pv_entry_count > pv_entry_high_water) 2311 break; 2312 2313 pdnxt = (addr + NBPDR) & ~PDRMASK; 2314 ptepindex = pmap_pde_pindex(addr); 2315 2316 pde = pmap_pde(src_pmap, addr); 2317 if (pde) 2318 srcptepaddr = *pde; 2319 else 2320 continue; 2321 if (srcptepaddr == 0) 2322 continue; 2323 2324 if (srcptepaddr & PG_PS) { 2325 pde = pmap_pde(dst_pmap, addr); 2326 if (pde == 0) { 2327 /* 2328 * XXX should do an allocpte here to 2329 * instantiate the pde 2330 */ 2331 continue; 2332 } 2333 if (*pde == 0) { 2334 *pde = srcptepaddr; 2335 dst_pmap->pm_stats.resident_count += 2336 NBPDR / PAGE_SIZE; 2337 } 2338 continue; 2339 } 2340 2341 srcmpte = vm_page_lookup(src_pmap->pm_pteobj, ptepindex); 2342 if ((srcmpte == NULL) || 2343 (srcmpte->hold_count == 0) || (srcmpte->flags & PG_BUSY)) 2344 continue; 2345 2346 if (pdnxt > end_addr) 2347 pdnxt = end_addr; 2348 2349 src_pte = vtopte(addr); 2350 while (addr < pdnxt) { 2351 pt_entry_t ptetemp; 2352 ptetemp = *src_pte; 2353 /* 2354 * we only virtual copy managed pages 2355 */ 2356 if ((ptetemp & PG_MANAGED) != 0) { 2357 /* 2358 * We have to check after allocpte for the 2359 * pte still being around... allocpte can 2360 * block. 2361 */ 2362 dstmpte = pmap_allocpte(dst_pmap, addr); 2363 dst_pte = pmap_pte(dst_pmap, addr); 2364 if ((*dst_pte == 0) && (ptetemp = *src_pte)) { 2365 /* 2366 * Clear the modified and 2367 * accessed (referenced) bits 2368 * during the copy. 2369 */ 2370 m = PHYS_TO_VM_PAGE(ptetemp); 2371 *dst_pte = ptetemp & ~(PG_M | PG_A); 2372 dst_pmap->pm_stats.resident_count++; 2373 pmap_insert_entry(dst_pmap, addr, 2374 dstmpte, m); 2375 } else { 2376 vm_page_lock_queues(); 2377 pmap_unwire_pte_hold(dst_pmap, addr, dstmpte); 2378 vm_page_unlock_queues(); 2379 } 2380 if (dstmpte->hold_count >= srcmpte->hold_count) 2381 break; 2382 } 2383 addr += PAGE_SIZE; 2384 src_pte++; 2385 } 2386 } 2387} 2388 2389/* 2390 * pmap_zero_page zeros the specified hardware page by mapping 2391 * the page into KVM and using bzero to clear its contents. 2392 */ 2393void 2394pmap_zero_page(vm_page_t m) 2395{ 2396 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2397 2398 pagezero((void *)va); 2399} 2400 2401/* 2402 * pmap_zero_page_area zeros the specified hardware page by mapping 2403 * the page into KVM and using bzero to clear its contents. 2404 * 2405 * off and size may not cover an area beyond a single hardware page. 2406 */ 2407void 2408pmap_zero_page_area(vm_page_t m, int off, int size) 2409{ 2410 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2411 2412 if (off == 0 && size == PAGE_SIZE) 2413 pagezero((void *)va); 2414 else 2415 bzero((char *)va + off, size); 2416} 2417 2418/* 2419 * pmap_zero_page_idle zeros the specified hardware page by mapping 2420 * the page into KVM and using bzero to clear its contents. This 2421 * is intended to be called from the vm_pagezero process only and 2422 * outside of Giant. 2423 */ 2424void 2425pmap_zero_page_idle(vm_page_t m) 2426{ 2427 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2428 2429 pagezero((void *)va); 2430} 2431 2432/* 2433 * pmap_copy_page copies the specified (machine independent) 2434 * page by mapping the page into virtual memory and using 2435 * bcopy to copy the page, one machine dependent page at a 2436 * time. 2437 */ 2438void 2439pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2440{ 2441 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2442 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2443 2444 bcopy((void *)src, (void *)dst, PAGE_SIZE); 2445} 2446 2447/* 2448 * Returns true if the pmap's pv is one of the first 2449 * 16 pvs linked to from this page. This count may 2450 * be changed upwards or downwards in the future; it 2451 * is only necessary that true be returned for a small 2452 * subset of pmaps for proper page aging. 2453 */ 2454boolean_t 2455pmap_page_exists_quick(pmap, m) 2456 pmap_t pmap; 2457 vm_page_t m; 2458{ 2459 pv_entry_t pv; 2460 int loops = 0; 2461 int s; 2462 2463 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2464 return FALSE; 2465 2466 s = splvm(); 2467 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2468 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2469 if (pv->pv_pmap == pmap) { 2470 splx(s); 2471 return TRUE; 2472 } 2473 loops++; 2474 if (loops >= 16) 2475 break; 2476 } 2477 splx(s); 2478 return (FALSE); 2479} 2480 2481#define PMAP_REMOVE_PAGES_CURPROC_ONLY 2482/* 2483 * Remove all pages from specified address space 2484 * this aids process exit speeds. Also, this code 2485 * is special cased for current process only, but 2486 * can have the more generic (and slightly slower) 2487 * mode enabled. This is much faster than pmap_remove 2488 * in the case of running down an entire address space. 2489 */ 2490void 2491pmap_remove_pages(pmap, sva, eva) 2492 pmap_t pmap; 2493 vm_offset_t sva, eva; 2494{ 2495 pt_entry_t *pte, tpte; 2496 vm_page_t m; 2497 pv_entry_t pv, npv; 2498 int s; 2499 2500#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2501 if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace))) { 2502 printf("warning: pmap_remove_pages called with non-current pmap\n"); 2503 return; 2504 } 2505#endif 2506 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2507 s = splvm(); 2508 for (pv = TAILQ_FIRST(&pmap->pm_pvlist); pv; pv = npv) { 2509 2510 if (pv->pv_va >= eva || pv->pv_va < sva) { 2511 npv = TAILQ_NEXT(pv, pv_plist); 2512 continue; 2513 } 2514 2515#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY 2516 pte = vtopte(pv->pv_va); 2517#else 2518 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2519#endif 2520 tpte = *pte; 2521 2522 if (tpte == 0) { 2523 printf("TPTE at %p IS ZERO @ VA %08lx\n", 2524 pte, pv->pv_va); 2525 panic("bad pte"); 2526 } 2527 2528/* 2529 * We cannot remove wired pages from a process' mapping at this time 2530 */ 2531 if (tpte & PG_W) { 2532 npv = TAILQ_NEXT(pv, pv_plist); 2533 continue; 2534 } 2535 2536 m = PHYS_TO_VM_PAGE(tpte); 2537 KASSERT(m->phys_addr == (tpte & PG_FRAME), 2538 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2539 m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); 2540 2541 KASSERT(m < &vm_page_array[vm_page_array_size], 2542 ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); 2543 2544 pv->pv_pmap->pm_stats.resident_count--; 2545 2546 pte_clear(pte); 2547 2548 /* 2549 * Update the vm_page_t clean and reference bits. 2550 */ 2551 if (tpte & PG_M) { 2552 vm_page_dirty(m); 2553 } 2554 2555 npv = TAILQ_NEXT(pv, pv_plist); 2556 TAILQ_REMOVE(&pv->pv_pmap->pm_pvlist, pv, pv_plist); 2557 2558 m->md.pv_list_count--; 2559 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2560 if (TAILQ_FIRST(&m->md.pv_list) == NULL) { 2561 vm_page_flag_clear(m, PG_WRITEABLE); 2562 } 2563 2564 pmap_unuse_pt(pv->pv_pmap, pv->pv_va, pv->pv_ptem); 2565 free_pv_entry(pv); 2566 } 2567 splx(s); 2568 pmap_invalidate_all(pmap); 2569} 2570 2571/* 2572 * pmap_is_modified: 2573 * 2574 * Return whether or not the specified physical page was modified 2575 * in any physical maps. 2576 */ 2577boolean_t 2578pmap_is_modified(vm_page_t m) 2579{ 2580 pv_entry_t pv; 2581 pt_entry_t *pte; 2582 int s; 2583 2584 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2585 return FALSE; 2586 2587 s = splvm(); 2588 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2589 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2590 /* 2591 * if the bit being tested is the modified bit, then 2592 * mark clean_map and ptes as never 2593 * modified. 2594 */ 2595 if (!pmap_track_modified(pv->pv_va)) 2596 continue; 2597#if defined(PMAP_DIAGNOSTIC) 2598 if (!pv->pv_pmap) { 2599 printf("Null pmap (tb) at va: 0x%x\n", pv->pv_va); 2600 continue; 2601 } 2602#endif 2603 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2604 if (*pte & PG_M) { 2605 splx(s); 2606 return TRUE; 2607 } 2608 } 2609 splx(s); 2610 return (FALSE); 2611} 2612 2613/* 2614 * this routine is used to modify bits in ptes 2615 */ 2616static __inline void 2617pmap_changebit(vm_page_t m, int bit, boolean_t setem) 2618{ 2619 register pv_entry_t pv; 2620 register pt_entry_t *pte; 2621 int s; 2622 2623 if (!pmap_initialized || (m->flags & PG_FICTITIOUS) || 2624 (!setem && bit == PG_RW && (m->flags & PG_WRITEABLE) == 0)) 2625 return; 2626 2627 s = splvm(); 2628 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2629 /* 2630 * Loop over all current mappings setting/clearing as appropos If 2631 * setting RO do we need to clear the VAC? 2632 */ 2633 TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) { 2634 /* 2635 * don't write protect pager mappings 2636 */ 2637 if (!setem && (bit == PG_RW)) { 2638 if (!pmap_track_modified(pv->pv_va)) 2639 continue; 2640 } 2641 2642#if defined(PMAP_DIAGNOSTIC) 2643 if (!pv->pv_pmap) { 2644 printf("Null pmap (cb) at va: 0x%x\n", pv->pv_va); 2645 continue; 2646 } 2647#endif 2648 2649 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2650 2651 if (setem) { 2652 *pte |= bit; 2653 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2654 } else { 2655 pt_entry_t pbits = *pte; 2656 if (pbits & bit) { 2657 if (bit == PG_RW) { 2658 if (pbits & PG_M) { 2659 vm_page_dirty(m); 2660 } 2661 pte_store(pte, pbits & ~(PG_M|PG_RW)); 2662 } else { 2663 pte_store(pte, pbits & ~bit); 2664 } 2665 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2666 } 2667 } 2668 } 2669 if (!setem && bit == PG_RW) 2670 vm_page_flag_clear(m, PG_WRITEABLE); 2671 splx(s); 2672} 2673 2674/* 2675 * pmap_page_protect: 2676 * 2677 * Lower the permission for all mappings to a given page. 2678 */ 2679void 2680pmap_page_protect(vm_page_t m, vm_prot_t prot) 2681{ 2682 if ((prot & VM_PROT_WRITE) == 0) { 2683 if (prot & (VM_PROT_READ | VM_PROT_EXECUTE)) { 2684 pmap_changebit(m, PG_RW, FALSE); 2685 } else { 2686 pmap_remove_all(m); 2687 } 2688 } 2689} 2690 2691/* 2692 * pmap_ts_referenced: 2693 * 2694 * Return a count of reference bits for a page, clearing those bits. 2695 * It is not necessary for every reference bit to be cleared, but it 2696 * is necessary that 0 only be returned when there are truly no 2697 * reference bits set. 2698 * 2699 * XXX: The exact number of bits to check and clear is a matter that 2700 * should be tested and standardized at some point in the future for 2701 * optimal aging of shared pages. 2702 */ 2703int 2704pmap_ts_referenced(vm_page_t m) 2705{ 2706 register pv_entry_t pv, pvf, pvn; 2707 pt_entry_t *pte; 2708 pt_entry_t v; 2709 int s; 2710 int rtval = 0; 2711 2712 if (!pmap_initialized || (m->flags & PG_FICTITIOUS)) 2713 return (rtval); 2714 2715 s = splvm(); 2716 mtx_assert(&vm_page_queue_mtx, MA_OWNED); 2717 if ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2718 2719 pvf = pv; 2720 2721 do { 2722 pvn = TAILQ_NEXT(pv, pv_list); 2723 2724 TAILQ_REMOVE(&m->md.pv_list, pv, pv_list); 2725 2726 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_list); 2727 2728 if (!pmap_track_modified(pv->pv_va)) 2729 continue; 2730 2731 pte = pmap_pte(pv->pv_pmap, pv->pv_va); 2732 2733 if (pte && ((v = pte_load(pte)) & PG_A) != 0) { 2734 pte_store(pte, v & ~PG_A); 2735 pmap_invalidate_page(pv->pv_pmap, pv->pv_va); 2736 2737 rtval++; 2738 if (rtval > 4) { 2739 break; 2740 } 2741 } 2742 } while ((pv = pvn) != NULL && pv != pvf); 2743 } 2744 splx(s); 2745 2746 return (rtval); 2747} 2748 2749/* 2750 * Clear the modify bits on the specified physical page. 2751 */ 2752void 2753pmap_clear_modify(vm_page_t m) 2754{ 2755 pmap_changebit(m, PG_M, FALSE); 2756} 2757 2758/* 2759 * pmap_clear_reference: 2760 * 2761 * Clear the reference bit on the specified physical page. 2762 */ 2763void 2764pmap_clear_reference(vm_page_t m) 2765{ 2766 pmap_changebit(m, PG_A, FALSE); 2767} 2768 2769/* 2770 * Miscellaneous support routines follow 2771 */ 2772 2773static void 2774amd64_protection_init() 2775{ 2776 register long *kp, prot; 2777 2778#if 0 2779#define PG_NX (1ul << 63) 2780#else 2781#define PG_NX 0 2782#endif 2783 2784 kp = protection_codes; 2785 for (prot = 0; prot < 8; prot++) { 2786 switch (prot) { 2787 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE: 2788 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE: 2789 *kp++ = PG_NX; 2790 break; 2791 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE: 2792 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE: 2793 *kp++ = 0; 2794 break; 2795 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE: 2796 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE: 2797 *kp++ = PG_RW | PG_NX; 2798 break; 2799 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE: 2800 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE: 2801 *kp++ = PG_RW; 2802 break; 2803 } 2804 } 2805} 2806 2807/* 2808 * Map a set of physical memory pages into the kernel virtual 2809 * address space. Return a pointer to where it is mapped. This 2810 * routine is intended to be used for mapping device memory, 2811 * NOT real memory. 2812 */ 2813void * 2814pmap_mapdev(pa, size) 2815 vm_paddr_t pa; 2816 vm_size_t size; 2817{ 2818 vm_offset_t va, tmpva, offset; 2819 2820 /* If this fits within the direct map window, use it */ 2821 if (pa < dmaplimit && (pa + size) < dmaplimit) 2822 return ((void *)PHYS_TO_DMAP(pa)); 2823 offset = pa & PAGE_MASK; 2824 size = roundup(offset + size, PAGE_SIZE); 2825 va = kmem_alloc_nofault(kernel_map, size); 2826 if (!va) 2827 panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); 2828 pa = pa & PG_FRAME; 2829 for (tmpva = va; size > 0; ) { 2830 pmap_kenter(tmpva, pa); 2831 size -= PAGE_SIZE; 2832 tmpva += PAGE_SIZE; 2833 pa += PAGE_SIZE; 2834 } 2835 pmap_invalidate_range(kernel_pmap, va, tmpva); 2836 return ((void *)(va + offset)); 2837} 2838 2839void 2840pmap_unmapdev(va, size) 2841 vm_offset_t va; 2842 vm_size_t size; 2843{ 2844 vm_offset_t base, offset, tmpva; 2845 pt_entry_t *pte; 2846 2847 /* If we gave a direct map region in pmap_mapdev, do nothing */ 2848 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) 2849 return; 2850 base = va & PG_FRAME; 2851 offset = va & PAGE_MASK; 2852 size = roundup(offset + size, PAGE_SIZE); 2853 for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) { 2854 pte = vtopte(tmpva); 2855 pte_clear(pte); 2856 } 2857 pmap_invalidate_range(kernel_pmap, va, tmpva); 2858 kmem_free(kernel_map, base, size); 2859} 2860 2861/* 2862 * perform the pmap work for mincore 2863 */ 2864int 2865pmap_mincore(pmap, addr) 2866 pmap_t pmap; 2867 vm_offset_t addr; 2868{ 2869 pt_entry_t *ptep, pte; 2870 vm_page_t m; 2871 int val = 0; 2872 2873 ptep = pmap_pte(pmap, addr); 2874 if (ptep == 0) { 2875 return 0; 2876 } 2877 2878 if ((pte = *ptep) != 0) { 2879 vm_paddr_t pa; 2880 2881 val = MINCORE_INCORE; 2882 if ((pte & PG_MANAGED) == 0) 2883 return val; 2884 2885 pa = pte & PG_FRAME; 2886 2887 m = PHYS_TO_VM_PAGE(pa); 2888 2889 /* 2890 * Modified by us 2891 */ 2892 if (pte & PG_M) 2893 val |= MINCORE_MODIFIED|MINCORE_MODIFIED_OTHER; 2894 else { 2895 /* 2896 * Modified by someone else 2897 */ 2898 vm_page_lock_queues(); 2899 if (m->dirty || pmap_is_modified(m)) 2900 val |= MINCORE_MODIFIED_OTHER; 2901 vm_page_unlock_queues(); 2902 } 2903 /* 2904 * Referenced by us 2905 */ 2906 if (pte & PG_A) 2907 val |= MINCORE_REFERENCED|MINCORE_REFERENCED_OTHER; 2908 else { 2909 /* 2910 * Referenced by someone else 2911 */ 2912 vm_page_lock_queues(); 2913 if ((m->flags & PG_REFERENCED) || 2914 pmap_ts_referenced(m)) { 2915 val |= MINCORE_REFERENCED_OTHER; 2916 vm_page_flag_set(m, PG_REFERENCED); 2917 } 2918 vm_page_unlock_queues(); 2919 } 2920 } 2921 return val; 2922} 2923 2924void 2925pmap_activate(struct thread *td) 2926{ 2927 struct proc *p = td->td_proc; 2928 pmap_t pmap; 2929 u_int64_t cr3; 2930 2931 critical_enter(); 2932 pmap = vmspace_pmap(td->td_proc->p_vmspace); 2933 pmap->pm_active |= PCPU_GET(cpumask); 2934 cr3 = vtophys(pmap->pm_pml4); 2935 /* XXXKSE this is wrong. 2936 * pmap_activate is for the current thread on the current cpu 2937 */ 2938 if (p->p_flag & P_SA) { 2939 /* Make sure all other cr3 entries are updated. */ 2940 /* what if they are running? XXXKSE (maybe abort them) */ 2941 FOREACH_THREAD_IN_PROC(p, td) { 2942 td->td_pcb->pcb_cr3 = cr3; 2943 } 2944 } else { 2945 td->td_pcb->pcb_cr3 = cr3; 2946 } 2947 load_cr3(cr3); 2948 critical_exit(); 2949} 2950 2951vm_offset_t 2952pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size) 2953{ 2954 2955 if ((obj == NULL) || (size < NBPDR) || (obj->type != OBJT_DEVICE)) { 2956 return addr; 2957 } 2958 2959 addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); 2960 return addr; 2961} 2962